|
| 1 | +from math import exp |
| 2 | + |
1 | 3 | import numpy as np
|
2 | 4 |
|
3 | 5 | from scipy import optimize
|
4 | 6 |
|
5 | 7 | from sklearn.base import BaseEstimator, RegressorMixin
|
6 | 8 | from sklearn.linear_model.base import center_data, LinearModel
|
7 | 9 | from sklearn.preprocessing import StandardScaler
|
8 |
| -from sklearn.utils import check_X_y |
| 10 | +from sklearn.utils import check_X_y, check_array |
9 | 11 |
|
10 | 12 | def _huber_loss_and_gradient(w, X, y, epsilon, alpha):
|
| 13 | + """ |
| 14 | + Calculate the robust huber loss as described in |
| 15 | + "A robust hybrid of lasso and ridge regression. |
| 16 | +
|
| 17 | + """ |
| 18 | + sigma = w[-1] |
| 19 | + w = w[:-1] |
11 | 20 |
|
| 21 | + # Calculate the values where |y - X'w / exp(sigma)| > epsilon |
| 22 | + # The values above this threshold are outliers. |
12 | 23 | linear_loss = y - np.dot(X, w)
|
13 | 24 | abs_linear_loss = np.abs(linear_loss)
|
14 |
| - outliers_true = abs_linear_loss > epsilon |
| 25 | + outliers_true = abs_linear_loss * exp(-sigma) > epsilon |
15 | 26 |
|
16 | 27 | # Calculate the linear loss due to the outliers.
|
| 28 | + # This is equal to (2 * M * |y - X'w / exp(sigma)| - M**2)*exp(sigma) |
17 | 29 | n_outliers = np.count_nonzero(outliers_true)
|
18 | 30 | outliers = abs_linear_loss[outliers_true]
|
19 |
| - outlier_loss = epsilon * np.sum(outliers) - n_outliers * 0.5 * epsilon**2 |
| 31 | + outlier_loss = 2 * epsilon * np.sum(outliers) - exp(sigma) * n_outliers * epsilon**2 |
20 | 32 |
|
21 | 33 | # Calculate the quadratic loss due to the non-outliers.-
|
| 34 | + # This is equal to |(y - X'w)**2 / exp(2*sigma)|*exp(sigma) |
22 | 35 | non_outliers = linear_loss[~outliers_true]
|
23 |
| - loss = 0.5 * np.dot(non_outliers, non_outliers) + outlier_loss |
| 36 | + loss = exp(-sigma) * np.dot(non_outliers, non_outliers) + outlier_loss |
24 | 37 |
|
25 | 38 | # Calulate the gradient
|
26 |
| - grad = np.dot(non_outliers, -X[~outliers_true, :]) |
27 |
| - outliers_true_pos = np.logical_and(linear_loss >= 0, outliers_true) |
28 |
| - outliers_true_neg = np.logical_and(linear_loss < 0, outliers_true) |
29 |
| - grad -= epsilon * X[outliers_true_pos, :].sum(axis=0) |
30 |
| - grad += epsilon * X[outliers_true_neg, :].sum(axis=0) |
31 |
| - grad += alpha * 2 * w |
32 |
| - return loss + alpha * np.dot(w, w), grad |
| 39 | + # grad = np.dot(non_outliers, -X[~outliers_true, :]) |
| 40 | + # outliers_true_pos = np.logical_and(linear_loss >= 0, outliers_true) |
| 41 | + # outliers_true_neg = np.logical_and(linear_loss < 0, outliers_true) |
| 42 | + # grad -= epsilon * X[outliers_true_pos, :].sum(axis=0) |
| 43 | + # grad += epsilon * X[outliers_true_neg, :].sum(axis=0) |
| 44 | + # grad += alpha * 2 * w |
| 45 | + return X.shape[0] * exp(sigma) + loss + alpha * np.dot(w, w)#, grad |
33 | 46 |
|
34 | 47 |
|
35 | 48 | class HuberRegressor(LinearModel, RegressorMixin, BaseEstimator):
|
36 |
| - def __init__(self, epsilon=0.1, n_iter=100, alpha=0.0001, |
37 |
| - warm_start=False, copy=True): |
| 49 | + def __init__(self, epsilon=1.35, n_iter=100, alpha=0.0001, |
| 50 | + warm_start=False, copy=True, fit_intercept=True): |
38 | 51 | self.epsilon = epsilon
|
39 | 52 | self.n_iter = n_iter
|
40 | 53 | self.alpha = alpha
|
41 | 54 | self.warm_start = warm_start
|
42 | 55 | self.copy = copy
|
| 56 | + self.fit_intercept = fit_intercept |
43 | 57 |
|
44 | 58 | def fit(self, X, y):
|
45 |
| - X, y = check_X_y(X, y, copy=self.copy) |
| 59 | + X = check_array(X, copy=self.copy) |
| 60 | + y = check_array(y, copy=self.copy) |
46 | 61 |
|
47 | 62 | coef = getattr(self, 'coef_', None)
|
48 | 63 | if not self.warm_start or (self.warm_start and coef is None):
|
49 |
| - self.coef_ = np.zeros(X.shape[1]) |
| 64 | + self.coef_ = np.zeros(X.shape[1] + 1) |
50 | 65 |
|
51 | 66 | try:
|
52 | 67 | self.coef_, f, self.dict_ = optimize.fmin_l_bfgs_b(
|
53 | 68 | _huber_loss_and_gradient, self.coef_, approx_grad=True,
|
54 |
| - args=(X, y, self.epsilon, self.alpha), maxiter=self.n_iter) |
| 69 | + args=(X, y, self.epsilon, self.alpha), maxiter=self.n_iter, pgtol=1e-3) |
55 | 70 | except TypeError:
|
56 | 71 | self.coef_, f, self.dict_ = optimize.fmin_l_bfgs_b(
|
57 | 72 | _huber_loss_and_gradient, self.coef_,
|
58 | 73 | args=(X, y, self.epsilon, self.alpha))
|
59 | 74 |
|
60 |
| - self.intercept_ = 0.0 |
| 75 | + self.scale_ = self.coef_[-1] |
| 76 | + self.coef_ = self.coef_[:-1] |
| 77 | + |
61 | 78 | self.loss_ = f
|
62 | 79 | return self
|
0 commit comments