diff --git a/sklearn/linear_model/_sag_fast.pyx.tp b/sklearn/linear_model/_sag_fast.pyx.tp index 97bf3020d6602..9bfeed559bc13 100644 --- a/sklearn/linear_model/_sag_fast.pyx.tp +++ b/sklearn/linear_model/_sag_fast.pyx.tp @@ -85,7 +85,7 @@ cdef {{c_type}} _logsumexp{{name_suffix}}({{c_type}}* arr, int n_classes) noexce {{for name_suffix, c_type, np_type in dtypes}} cdef class MultinomialLogLoss{{name_suffix}}: - cdef {{c_type}} _loss(self, {{c_type}}* prediction, {{c_type}} y, int n_classes, + cdef {{c_type}} _loss(self, {{c_type}} y, {{c_type}}* prediction, int n_classes, {{c_type}} sample_weight) noexcept nogil: r"""Multinomial Logistic regression loss. @@ -100,12 +100,12 @@ cdef class MultinomialLogLoss{{name_suffix}}: Parameters ---------- - prediction : pointer to a np.ndarray[{{c_type}}] of shape (n_classes,) - Prediction of the multinomial classifier, for current sample. - y : {{c_type}}, between 0 and n_classes - 1 Indice of the correct class for current sample (i.e. label encoded). + prediction : pointer to a np.ndarray[{{c_type}}] of shape (n_classes,) + Prediction of the multinomial classifier, for current sample. + n_classes : integer Total number of classes. @@ -129,7 +129,7 @@ cdef class MultinomialLogLoss{{name_suffix}}: loss = (logsumexp_prediction - prediction[int(y)]) * sample_weight return loss - cdef void dloss(self, {{c_type}}* prediction, {{c_type}} y, int n_classes, + cdef void dloss(self, {{c_type}} y, {{c_type}}* prediction, int n_classes, {{c_type}} sample_weight, {{c_type}}* gradient_ptr) noexcept nogil: r"""Multinomial Logistic regression gradient of the loss. @@ -414,9 +414,9 @@ def sag{{name_suffix}}( # compute the gradient for this sample, given the prediction if multinomial: - multiloss.dloss(&prediction[0], y, n_classes, sample_weight, &gradient[0]) + multiloss.dloss(y, &prediction[0], n_classes, sample_weight, &gradient[0]) else: - gradient[0] = loss.dloss(prediction[0], y) * sample_weight + gradient[0] = loss.dloss(y, prediction[0]) * sample_weight # L2 regularization by simply rescaling the weights wscale *= wscale_update @@ -835,10 +835,10 @@ def _multinomial_grad_loss_all_samples( ) # compute the gradient for this sample, given the prediction - multiloss.dloss(&prediction[0], y, n_classes, sample_weight, &gradient[0]) + multiloss.dloss(y, &prediction[0], n_classes, sample_weight, &gradient[0]) # compute the loss for this sample, given the prediction - sum_loss += multiloss._loss(&prediction[0], y, n_classes, sample_weight) + sum_loss += multiloss._loss(y, &prediction[0], n_classes, sample_weight) # update the sum of the gradient for j in range(xnnz): diff --git a/sklearn/linear_model/_sgd_fast.pxd b/sklearn/linear_model/_sgd_fast.pxd index 7ae704eee18db..da7f155c6fa6e 100644 --- a/sklearn/linear_model/_sgd_fast.pxd +++ b/sklearn/linear_model/_sgd_fast.pxd @@ -2,25 +2,25 @@ """Helper to load LossFunction from sgd_fast.pyx to sag_fast.pyx""" cdef class LossFunction: - cdef double loss(self, double p, double y) noexcept nogil - cdef double dloss(self, double p, double y) noexcept nogil + cdef double loss(self, double y, double p) noexcept nogil + cdef double dloss(self, double y, double p) noexcept nogil cdef class Regression(LossFunction): - cdef double loss(self, double p, double y) noexcept nogil - cdef double dloss(self, double p, double y) noexcept nogil + cdef double loss(self, double y, double p) noexcept nogil + cdef double dloss(self, double y, double p) noexcept nogil cdef class Classification(LossFunction): - cdef double loss(self, double p, double y) noexcept nogil - cdef double dloss(self, double p, double y) noexcept nogil + cdef double loss(self, double y, double p) noexcept nogil + cdef double dloss(self, double y, double p) noexcept nogil cdef class Log(Classification): - cdef double loss(self, double p, double y) noexcept nogil - cdef double dloss(self, double p, double y) noexcept nogil + cdef double loss(self, double y, double p) noexcept nogil + cdef double dloss(self, double y, double p) noexcept nogil cdef class SquaredLoss(Regression): - cdef double loss(self, double p, double y) noexcept nogil - cdef double dloss(self, double p, double y) noexcept nogil + cdef double loss(self, double y, double p) noexcept nogil + cdef double dloss(self, double y, double p) noexcept nogil diff --git a/sklearn/linear_model/_sgd_fast.pyx.tp b/sklearn/linear_model/_sgd_fast.pyx.tp index bcd2bd7e5576e..b92d983a1b4b8 100644 --- a/sklearn/linear_model/_sgd_fast.pyx.tp +++ b/sklearn/linear_model/_sgd_fast.pyx.tp @@ -77,15 +77,15 @@ cdef extern from *: cdef class LossFunction: """Base class for convex loss functions""" - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: """Evaluate the loss function. Parameters ---------- - p : double - The prediction, `p = w^T x + intercept`. y : double The true value (aka target). + p : double + The prediction, `p = w^T x + intercept`. Returns ------- @@ -111,7 +111,7 @@ cdef class LossFunction: double The derivative of the loss function with regards to `p`. """ - return self.dloss(p, y) + return self.dloss(y, p) def py_loss(self, double p, double y): """Python version of `loss` for testing. @@ -130,18 +130,18 @@ cdef class LossFunction: double The loss evaluated at `p` and `y`. """ - return self.loss(p, y) + return self.loss(y, p) - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: """Evaluate the derivative of the loss function with respect to the prediction `p`. Parameters ---------- - p : double - The prediction, `p = w^T x`. y : double The true value (aka target). + p : double + The prediction, `p = w^T x`. Returns ------- @@ -154,20 +154,20 @@ cdef class LossFunction: cdef class Regression(LossFunction): """Base class for loss functions for regression""" - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: return 0. - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: return 0. cdef class Classification(LossFunction): """Base class for loss functions for classification""" - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: return 0. - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: return 0. @@ -179,7 +179,7 @@ cdef class ModifiedHuber(Classification): See T. Zhang 'Solving Large Scale Linear Prediction Problems Using Stochastic Gradient Descent', ICML'04. """ - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: cdef double z = p * y if z >= 1.0: return 0.0 @@ -188,7 +188,7 @@ cdef class ModifiedHuber(Classification): else: return -4.0 * z - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: cdef double z = p * y if z >= 1.0: return 0.0 @@ -217,13 +217,13 @@ cdef class Hinge(Classification): def __init__(self, double threshold=1.0): self.threshold = threshold - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: cdef double z = p * y if z <= self.threshold: return self.threshold - z return 0.0 - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: cdef double z = p * y if z <= self.threshold: return -y @@ -249,13 +249,13 @@ cdef class SquaredHinge(Classification): def __init__(self, double threshold=1.0): self.threshold = threshold - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: cdef double z = self.threshold - p * y if z > 0: return z * z return 0.0 - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: cdef double z = self.threshold - p * y if z > 0: return -2 * y * z @@ -268,7 +268,7 @@ cdef class SquaredHinge(Classification): cdef class Log(Classification): """Logistic regression loss for binary classification with y in {-1, 1}""" - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: cdef double z = p * y # approximately equal and saves the computation of the log if z > 18: @@ -277,7 +277,7 @@ cdef class Log(Classification): return -z return log(1.0 + exp(-z)) - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: cdef double z = p * y # approximately equal and saves the computation of the log if z > 18.0: @@ -292,10 +292,10 @@ cdef class Log(Classification): cdef class SquaredLoss(Regression): """Squared loss traditional used in linear regression.""" - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: return 0.5 * (p - y) * (p - y) - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: return p - y def __reduce__(self): @@ -316,7 +316,7 @@ cdef class Huber(Regression): def __init__(self, double c): self.c = c - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: cdef double r = p - y cdef double abs_r = fabs(r) if abs_r <= self.c: @@ -324,7 +324,7 @@ cdef class Huber(Regression): else: return self.c * abs_r - (0.5 * self.c * self.c) - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: cdef double r = p - y cdef double abs_r = fabs(r) if abs_r <= self.c: @@ -349,11 +349,11 @@ cdef class EpsilonInsensitive(Regression): def __init__(self, double epsilon): self.epsilon = epsilon - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: cdef double ret = fabs(y - p) - self.epsilon return ret if ret > 0 else 0 - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: if y - p > self.epsilon: return -1 elif p - y > self.epsilon: @@ -376,11 +376,11 @@ cdef class SquaredEpsilonInsensitive(Regression): def __init__(self, double epsilon): self.epsilon = epsilon - cdef double loss(self, double p, double y) noexcept nogil: + cdef double loss(self, double y, double p) noexcept nogil: cdef double ret = fabs(y - p) - self.epsilon return ret * ret if ret > 0 else 0 - cdef double dloss(self, double p, double y) noexcept nogil: + cdef double dloss(self, double y, double p) noexcept nogil: cdef double z z = y - p if z > self.epsilon: @@ -569,7 +569,7 @@ def _plain_sgd{{name_suffix}}( if learning_rate == OPTIMAL: typw = np.sqrt(1.0 / np.sqrt(alpha)) # computing eta0, the initial learning rate - initial_eta0 = typw / max(1.0, loss.dloss(-typw, 1.0)) + initial_eta0 = typw / max(1.0, loss.dloss(1.0, -typw)) # initialize t such that eta at first sample equals eta0 optimal_init = 1.0 / (initial_eta0 * alpha) @@ -598,7 +598,7 @@ def _plain_sgd{{name_suffix}}( eta = eta0 / pow(t, power_t) if verbose or not early_stopping: - sumloss += loss.loss(p, y) + sumloss += loss.loss(y, p) if y > 0.0: class_weight = weight_pos @@ -609,12 +609,12 @@ def _plain_sgd{{name_suffix}}( update = sqnorm(x_data_ptr, x_ind_ptr, xnnz) if update == 0: continue - update = min(C, loss.loss(p, y) / update) + update = min(C, loss.loss(y, p) / update) elif learning_rate == PA2: update = sqnorm(x_data_ptr, x_ind_ptr, xnnz) - update = loss.loss(p, y) / (update + 0.5 / C) + update = loss.loss(y, p) / (update + 0.5 / C) else: - dloss = loss.dloss(p, y) + dloss = loss.dloss(y, p) # clip dloss with large values to avoid numerical # instabilities if dloss < -MAX_DLOSS: