8000 Add scaling to huber loss function · scikit-learn/scikit-learn@8eee69e · GitHub
[go: up one dir, main page]

Skip to content

Commit 8eee69e

Browse files
committed
Add scaling to huber loss function
1 parent 096ecef commit 8eee69e

File tree

1 file changed

+34
-17
lines changed

1 file changed

+34
-17
lines changed

sklearn/linear_model/huber.py

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,79 @@
1+
from math import exp
2+
13
import numpy as np
24

35
from scipy import optimize
46

57
from sklearn.base import BaseEstimator, RegressorMixin
68
from sklearn.linear_model.base import center_data, LinearModel
79
from sklearn.preprocessing import StandardScaler
8-
from sklearn.utils import check_X_y
10+
from sklearn.utils import check_X_y, check_array
911

1012
def _huber_loss_and_gradient(w, X, y, epsilon, alpha):
13+
"""
14+
Calculate the robust huber loss as described in
15+
"A robust hybrid of lasso and ridge regression.
16+
17+
"""
18+
sigma = w[-1]
19+
w = w[:-1]
1120

21+
# Calculate the values where |y - X'w / exp(sigma)| > epsilon
22+
# The values above this threshold are outliers.
1223
linear_loss = y - np.dot(X, w)
1324
abs_linear_loss = np.abs(linear_loss)
14-
outliers_true = abs_linear_loss > epsilon
25+
outliers_true = abs_linear_loss * exp(-sigma) > epsilon
1526

1627
# Calculate the linear loss due to the outliers.
28+
# This is equal to (2 * M * |y - X'w / exp(sigma)| - M**2)*exp(sigma)
1729
n_outliers = np.count_nonzero(outliers_true)
1830
outliers = abs_linear_loss[outliers_true]
19-
outlier_loss = epsilon * np.sum(outliers) - n_outliers * 0.5 * epsilon**2
31+
outlier_loss = 2 * epsilon * np.sum(outliers) - exp(sigma) * n_outliers * epsilon**2
2032

2133
# Calculate the quadratic loss due to the non-outliers.-
34+
# This is equal to |(y - X'w)**2 / exp(2*sigma)|*exp(sigma)
2235
non_outliers = linear_loss[~outliers_true]
23-
loss = 0.5 * np.dot(non_outliers, non_outliers) + outlier_loss
36+
loss = exp(-sigma) * np.dot(non_outliers, non_outliers) + outlier_loss
2437

2538
# Calulate the gradient
26-
grad = np.dot(non_outliers, -X[~outliers_true, :])
27-
outliers_true_pos = np.logical_and(linear_loss >= 0, outliers_true)
28-
outliers_true_neg = np.logical_and(linear_loss < 0, outliers_true)
29-
grad -= epsilon * X[outliers_true_pos, :].sum(axis=0)
30-
grad += epsilon * X[outliers_true_neg, :].sum(axis=0)
31-
grad += alpha * 2 * w
32-
return loss + alpha * np.dot(w, w), grad
39+
# grad = np.dot(non_outliers, -X[~outliers_true, :])
40+
# outliers_true_pos = np.logical_and(linear_loss >= 0, outliers_true)
41+
# outliers_true_neg = np.logical_and(linear_loss < 0, outliers_true)
42+
# grad -= epsilon * X[outliers_true_pos, :].sum(axis=0)
43+
# grad += epsilon * X[outliers_true_neg, :].sum(axis=0)
44+
# grad += alpha * 2 * w
45+
return X.shape[0] * exp(sigma) + loss + alpha * np.dot(w, w)#, grad
3346

3447

3548
class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
36-
def __init__(self, epsilon=0.1, n_iter=100, alpha=0.0001,
37-
warm_start=False, copy=True):
49+
def __init__(self, epsilon=1.35, n_iter=100, alpha=0.0001,
50+
warm_start=False, copy=True, fit_intercept=True):
3851
self.epsilon = epsilon
3952
self.n_iter = n_iter
4053
self.alpha = alpha
4154
self.warm_start = warm_start
4255
self.copy = copy
56+
self.fit_intercept = fit_intercept
4357

4458
def fit(self, X, y):
45-
X, y = check_X_y(X, y, copy=self.copy)
59+
X = check_array(X, copy=self.copy)
60+
y = check_array(y, copy=self.copy)
4661

4762
coef = getattr(self, 'coef_', None)
4863
if not self.warm_start or (self.warm_start and coef is None):
49-
self.coef_ = np.zeros(X.shape[1])
64+
self.coef_ = np.zeros(X.shape[1] + 1)
5065

5166
try:
5267
self.coef_, f, self.dict_ = optimize.fmin_l_bfgs_b(
5368
_huber_loss_and_gradient, self.coef_, approx_grad=True,
54-
args=(X, y, self.epsilon, self.alpha), maxiter=self.n_iter)
69+
args=(X, y, self.epsilon, self.alpha), maxiter=self.n_iter, pgtol=1e-3)
5570
except TypeError:
5671
self.coef_, f, self.dict_ = optimize.fmin_l_bfgs_b(
5772
_huber_loss_and_gradient, self.coef_,
5873
args=(X, y, self.epsilon, self.alpha))
5974

60-
self.intercept_ = 0.0
75+
self.scale_ = self.coef_[-1]
76+
self.coef_ = self.coef_[:-1]
77+
6178
self.loss_ = f
6279
return self

0 commit comments

Comments
 (0)
0