fix: Huber loss function in gradient_boosting fails if negative_gradient is not called before __call__; now computes on-demand.

pprett · pprett · commit c5ce49be9202 · 2013-11-07T17:33:25.000+02:00
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
@@ -245,6 +245,7 @@ class HuberLossFunction(RegressionLossFunction):
     def __init__(self, n_classes, alpha=0.9):
         super(HuberLossFunction, self).__init__(n_classes)
         self.alpha = alpha
+        self.gamma = None
 
     def init_estimator(self):
         return QuantileEstimator(alpha=0.5)
@@ -253,6 +254,8 @@ def __call__(self, y, pred):
         pred = pred.ravel()
         diff = y - pred
         gamma = self.gamma
+        if gamma is None:
+            gamma = stats.scoreatpercentile(np.abs(diff), self.alpha * 100)
         gamma_mask = np.abs(diff) <= gamma
         sq_loss = np.sum(0.5 * diff[gamma_mask] ** 2.0)
         lin_loss = np.sum(gamma * (np.abs(diff[~gamma_mask]) - gamma / 2.0))
diff --git a/sklearn/ensemble/tests/test_gradient_boosting.py b/sklearn/ensemble/tests/test_gradient_boosting.py
@@ -153,14 +153,15 @@ def test_boston():
     """Check consistency on dataset boston house prices with least squares
     and least absolute deviation. """
     for loss in ("ls", "lad", "huber"):
-        clf = GradientBoostingRegressor(n_estimators=100, loss=loss,
-                                        max_depth=4,
-                                        min_samples_split=1, random_state=1)
-        assert_raises(ValueError, clf.predict, boston.data)
-        clf.fit(boston.data, boston.target)
-        y_pred = clf.predict(boston.data)
-        mse = mean_squared_error(boston.target, y_pred)
-        assert mse < 6.0, "Failed with loss %s and mse = %.4f" % (loss, mse)
+        for subsample in (1.0, 0.5):
+            clf = GradientBoostingRegressor(n_estimators=100, loss=loss,
+                                            max_depth=4, subsample=subsample,
+                                            min_samples_split=1, random_state=1)
+            assert_raises(ValueError, clf.predict, boston.data)
+            clf.fit(boston.data, boston.target)
+            y_pred = clf.predict(boston.data)
+            mse = mean_squared_error(boston.target, y_pred)
+            assert mse < 6.0, "Failed with loss %s and mse = %.4f" % (loss, mse)
 
 
 def test_iris():