diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py index 63ae3038df8ae..78b1eb8543c8d 100644 --- a/sklearn/_loss/__init__.py +++ b/sklearn/_loss/__init__.py @@ -7,6 +7,7 @@ HalfSquaredError, AbsoluteError, PinballLoss, + HuberLoss, HalfPoissonLoss, HalfGammaLoss, HalfTweedieLoss, @@ -20,6 +21,7 @@ "HalfSquaredError", "AbsoluteError", "PinballLoss", + "HuberLoss", "HalfPoissonLoss", "HalfGammaLoss", "HalfTweedieLoss", diff --git a/sklearn/_loss/_loss.pxd b/sklearn/_loss/_loss.pxd index 3aad078c0f3a1..19ee21b2a40fa 100644 --- a/sklearn/_loss/_loss.pxd +++ b/sklearn/_loss/_loss.pxd @@ -44,6 +44,13 @@ cdef class CyPinballLoss(CyLossFunction): cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil +cdef class CyHuberLoss(CyLossFunction): + cdef public double delta # public makes it accessible from Python + cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil + cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil + cdef double_pair cy_grad_hess(self, double y_true, double raw_prediction) noexcept nogil + + cdef class CyHalfPoissonLoss(CyLossFunction): cdef double cy_loss(self, double y_true, double raw_prediction) noexcept nogil cdef double cy_gradient(self, double y_true, double raw_prediction) noexcept nogil diff --git a/sklearn/_loss/_loss.pyx.tp b/sklearn/_loss/_loss.pyx.tp index ae4fee45540db..c4c92cba1fc86 100644 --- a/sklearn/_loss/_loss.pyx.tp +++ b/sklearn/_loss/_loss.pyx.tp @@ -46,6 +46,18 @@ doc_PinballLoss = ( """ ) +doc_HuberLoss = ( + """Huber Loss with identity link. + + Domain: + y_true and y_pred all real numbers + delta in positive real numbers + + Link: + y_pred = raw_prediction + """ +) + doc_HalfPoissonLoss = ( """Half Poisson deviance loss with log-link. @@ -164,6 +176,9 @@ class_list = [ ("CyPinballLoss", doc_PinballLoss, "quantile", "closs_pinball_loss", None, "cgradient_pinball_loss", "cgrad_hess_pinball_loss"), + ("CyHuberLoss", doc_HuberLoss, "delta", + "closs_huber_loss", None, + "cgradient_huber_loss", "cgrad_hess_huber_loss"), ("CyHalfPoissonLoss", doc_HalfPoissonLoss, None, "closs_half_poisson", "closs_grad_half_poisson", "cgradient_half_poisson", "cgrad_hess_half_poisson"), @@ -359,6 +374,47 @@ cdef inline double_pair cgrad_hess_pinball_loss( return gh +# Huber Loss +cdef inline double closs_huber_loss( + double y_true, + double raw_prediction, + double delta, +) noexcept nogil: + cdef double abserr = fabs(y_true - raw_prediction) + if abserr <= delta: + return 0.5 * abserr**2 + else: + return delta * (abserr - 0.5 * delta) + + +cdef inline double cgradient_huber_loss( + double y_true, + double raw_prediction, + double delta, +) noexcept nogil: + cdef double res = raw_prediction - y_true + if fabs(res) <= delta: + return res + else: + return delta if res >=0 else -delta + + +cdef inline double_pair cgrad_hess_huber_loss( + double y_true, + double raw_prediction, + double delta, +) noexcept nogil: + cdef double_pair gh + gh.val2 = raw_prediction - y_true # used as temporary + if fabs(gh.val2) <= delta: + gh.val1 = gh.val2 # gradient + gh.val2 = 1 # hessian + else: + gh.val1 = delta if gh.val2 >=0 else -delta # gradient + gh.val2 = 0 # hessian + return gh + + # Half Poisson Deviance with Log-Link, dropping constant terms cdef inline double closs_half_poisson( double y_true, diff --git a/sklearn/_loss/loss.py b/sklearn/_loss/loss.py index 1a79abd901376..f9d9b406e27d8 100644 --- a/sklearn/_loss/loss.py +++ b/sklearn/_loss/loss.py @@ -22,6 +22,7 @@ CyHalfSquaredError, CyAbsoluteError, CyPinballLoss, + CyHuberLoss, CyHalfPoissonLoss, CyHalfGammaLoss, CyHalfTweedieLoss, @@ -583,7 +584,7 @@ class PinballLoss(BaseLoss): Additional Attributes --------------------- quantile : float - The quantile to be estimated. Must be in range (0, 1). + The quantile level of the quantile to be estimated. Must be in range (0, 1). """ differentiable = False @@ -619,6 +620,79 @@ def fit_intercept_only(self, y_true, sample_weight=None): ) +class HuberLoss(BaseLoss): + """Huber loss, for regression. + + Domain: + y_true and y_pred all real numbers + quantile in (0, 1) + + Link: + y_pred = raw_prediction + + For a given sample x_i, the Huber loss is defined as:: + + loss(x_i) = 1/2 * abserr**2 if abserr <= delta + delta * (abserr - delta/2) if abserr > delta + + abserr = |y_true_i - raw_prediction_i| + delta = quantile(abserr, self.quantile) + + Note: HuberLoss(quantile=1) equals HalfSquaredError and HuberLoss(quantile=0) + equals delta * (AbsoluteError() - delta/2). + + Additional Attributes + --------------------- + quantile : float + The quantile level which defines the breaking point `delta` to distinguish + between absolute error and squared error. Must be in range (0, 1). + + Reference + --------- + .. [1] Friedman, J.H. (2001). :doi:`Greedy function approximation: A gradient + boosting machine <10.1214/aos/1013203451>`. + Annals of Statistics, 29, 1189-1232. + """ + + differentiable = False + need_update_leaves_values = True + + def __init__(self, sample_weight=None, quantile=0.9, delta=0.5): + check_scalar( + quantile, + "quantile", + target_type=numbers.Real, + min_val=0, + max_val=1, + include_boundaries="neither", + ) + self.quantile = quantile # This is better stored outside of Cython. + super().__init__( + closs=CyHuberLoss(delta=float(delta)), + link=IdentityLink(), + ) + self.approx_hessian = True + self.constant_hessian = False + + def fit_intercept_only(self, y_true, sample_weight=None): + """Compute raw_prediction of an intercept-only model. + + This is the weighted median of the target, i.e. over the samples + axis=0. + """ + # See formula before algo 4 in Friedman (2001), but we apply it to y_true, + # not to the residual y_true - raw_prediction. An estimator like + # HistGradientBoostingRegressor might then call it on the residual, e.g. + # fit_intercept_only(y_true - raw_prediction). + if sample_weight is None: + median = np.percentile(y_true, 50, axis=0) + else: + median = _weighted_percentile(y_true, sample_weight, 50) + diff = y_true - median + term = np.sign(diff) * np.minimum(self.closs.delta, np.abs(diff)) + return median + np.average(term, weights=sample_weight) + + class HalfPoissonLoss(BaseLoss): """Half Poisson deviance loss with log-link, for regression. @@ -998,6 +1072,7 @@ def gradient_proba( "squared_error": HalfSquaredError, "absolute_error": AbsoluteError, "pinball_loss": PinballLoss, + "huber_loss": HuberLoss, "poisson_loss": HalfPoissonLoss, "gamma_loss": HalfGammaLoss, "tweedie_loss": HalfTweedieLoss, diff --git a/sklearn/_loss/tests/test_loss.py b/sklearn/_loss/tests/test_loss.py index 4261b8366f64d..867a66b7422dc 100644 --- a/sklearn/_loss/tests/test_loss.py +++ b/sklearn/_loss/tests/test_loss.py @@ -24,6 +24,7 @@ HalfSquaredError, HalfTweedieLoss, HalfTweedieLossIdentity, + HuberLoss, PinballLoss, ) from sklearn.utils import assert_all_finite @@ -36,6 +37,7 @@ # HalfTweedieLoss(power=1.5) is already there as default LOSS_INSTANCES += [ PinballLoss(quantile=0.25), + HuberLoss(quantile=0.75), HalfTweedieLoss(power=-1.5), HalfTweedieLoss(power=0), HalfTweedieLoss(power=1), @@ -52,9 +54,11 @@ def loss_instance_name(param): if isinstance(param, BaseLoss): loss = param name = loss.__class__.__name__ - if hasattr(loss, "quantile"): + if isinstance(loss, PinballLoss): name += f"(quantile={loss.closs.quantile})" - elif hasattr(loss, "power"): + elif isinstance(loss, HuberLoss): + name += f"(quantile={loss.quantile}" + elif hasattr(loss, "closs") and hasattr(loss.closs, "power"): name += f"(power={loss.closs.power})" return name else: @@ -153,6 +157,7 @@ def test_loss_boundary(loss): (HalfSquaredError(), [-100, 0, 0.1, 100], [-np.inf, np.inf]), (AbsoluteError(), [-100, 0, 0.1, 100], [-np.inf, np.inf]), (PinballLoss(), [-100, 0, 0.1, 100], [-np.inf, np.inf]), + (HuberLoss(), [-100, 0, 0.1, 100], [-np.inf, np.inf]), (HalfPoissonLoss(), [0.1, 100], [-np.inf, -3, -0.1, np.inf]), (HalfGammaLoss(), [0.1, 100], [-np.inf, -3, -0.1, 0, np.inf]), (HalfTweedieLoss(power=-3), [0.1, 100], [-np.inf, np.inf]), @@ -173,6 +178,7 @@ def test_loss_boundary(loss): Y_TRUE_PARAMS = [ # type: ignore # (loss, [y success], [y fail]) (HalfPoissonLoss(), [0], []), + (HuberLoss(), [0], []), (HalfTweedieLoss(power=-3), [-100, -0.1, 0], []), (HalfTweedieLoss(power=0), [-100, 0], []), (HalfTweedieLoss(power=1.5), [0], []), @@ -226,6 +232,8 @@ def test_loss_boundary_y_pred(loss, y_pred_success, y_pred_fail): (PinballLoss(quantile=0.5), 1.0, 5.0, 2), (PinballLoss(quantile=0.25), 1.0, 5.0, 4 * (1 - 0.25)), (PinballLoss(quantile=0.25), 5.0, 1.0, 4 * 0.25), + (HuberLoss(quantile=0.5, delta=3), 1.0, 5.0, 3 * (4 - 3 / 2)), + (HuberLoss(quantile=0.5, delta=3), 1.0, 3.0, 0.5 * 2**2), (HalfPoissonLoss(), 2.0, np.log(4), 4 - 2 * np.log(4)), (HalfGammaLoss(), 2.0, np.log(4), np.log(4) + 2 / 4), (HalfTweedieLoss(power=3), 2.0, np.log(4), -1 / 4 + 1 / 4**2), @@ -1090,6 +1098,19 @@ def test_init_gradient_and_hessian_raises(loss, params, err_msg): "quantile == 0, must be > 0.", ), (PinballLoss, {"quantile": 1.1}, ValueError, "quantile == 1.1, must be < 1."), + ( + HuberLoss, + {"quantile": None}, + TypeError, + "quantile must be an instance of float, not NoneType.", + ), + ( + HuberLoss, + {"quantile": 0}, + ValueError, + "quantile == 0, must be > 0.", + ), + (HuberLoss, {"quantile": 1.1}, ValueError, "quantile == 1.1, must be < 1."), ], ) def test_loss_init_parameter_validation(loss, params, err_type, err_msg):