Source code for mercurial.utils.overfitting_controls

"""Overfitting countermeasures: regularization, early stopping, pruning."""

from copy import deepcopy
from typing import Tuple

import numpy as np


[docs] class L2Regularizer: """L2 regularization (ridge) for model parameters.""" def __init__(self, lambda_reg: float = 0.01): self.lambda_reg = lambda_reg
[docs] def penalty(self, params: np.ndarray) -> float: """λ * ||θ||₂².""" return self.lambda_reg * np.sum(params**2)
[docs] def gradient(self, params: np.ndarray) -> np.ndarray: """2λ * θ.""" return 2 * self.lambda_reg * params
[docs] class EarlyStopping: """Stop training when validation loss stops improving.""" def __init__(self, patience: int = 5, min_delta: float = 1e-4): self.patience = patience self.min_delta = min_delta self.best_loss = np.inf self.counter = 0 self.best_params = None
[docs] def step(self, loss: float, params: np.ndarray) -> bool: """Return True if training should continue, False if stop.""" if loss < self.best_loss - self.min_delta: self.best_loss = loss self.best_params = deepcopy(params) self.counter = 0 return True else: self.counter += 1 return self.counter < self.patience
[docs] def reset(self): self.best_loss = np.inf self.counter = 0 self.best_params = None
[docs] def prune_parameters(params: np.ndarray, threshold: float = 0.01) -> np.ndarray: """Set small-magnitude parameters to zero.""" pruned = params.copy() pruned[np.abs(pruned) < threshold] = 0.0 return pruned
[docs] def sparsity_ratio(params: np.ndarray) -> float: """Fraction of parameters that are zero.""" return np.sum(np.abs(params) < 1e-12) / len(params)
[docs] class LinearModel: """Simple linear model with L2 regularization and early stopping.""" def __init__(self, input_dim: int, lambda_reg: float = 0.01): self.weights = np.random.randn(input_dim) * 0.01 self.bias = 0.0 self.regularizer = L2Regularizer(lambda_reg) self.early_stopping = EarlyStopping(patience=5)
[docs] def predict(self, X: np.ndarray) -> np.ndarray: return X @ self.weights + self.bias
[docs] def loss(self, X: np.ndarray, y: np.ndarray) -> float: pred = self.predict(X) mse = np.mean((pred - y) ** 2) reg = self.regularizer.penalty(self.weights) return mse + reg
[docs] def gradient(self, X: np.ndarray, y: np.ndarray) -> Tuple[np.ndarray, float]: pred = self.predict(X) error = pred - y grad_w = (2 / len(y)) * (X.T @ error) + self.regularizer.gradient(self.weights) grad_b = (2 / len(y)) * np.sum(error) return grad_w, grad_b
[docs] def train( self, X_train: np.ndarray, y_train: np.ndarray, X_val: np.ndarray, y_val: np.ndarray, epochs: int = 100, lr: float = 0.01, ) -> dict: """Train with early stopping.""" history = {"train_loss": [], "val_loss": []} self.early_stopping.reset() for epoch in range(epochs): # Forward + backward grad_w, grad_b = self.gradient(X_train, y_train) self.weights -= lr * grad_w self.bias -= lr * grad_b # Compute losses train_loss = self.loss(X_train, y_train) val_loss = self.loss(X_val, y_val) history["train_loss"].append(train_loss) history["val_loss"].append(val_loss) # Early stopping check if not self.early_stopping.step(val_loss, self.weights): print(f"Early stopping at epoch {epoch+1}") self.weights = self.early_stopping.best_params break return history