@@ -10,6 +10,14 @@ cimport numpy as np
10
10
from sklearn.utils.extmath import row_norms
11
11
from cython cimport floating
12
12
13
+ import sys
14
+ from time import time
15
+
16
+ from libc.math cimport exp, log, sqrt, pow , fabs
17
+ cimport numpy as np
18
+ from numpy.math cimport INFINITY
19
+
20
+
13
21
# Modified from sklearn.cluster._k_means_fast.pyx
14
22
np.import_array()
15
23
@@ -73,3 +81,255 @@ cpdef np.ndarray[floating] _kmeans_loss(np.ndarray[floating, ndim=2, mode='c'] X
73
81
j = labels[i]
74
82
inertias[i] = _euclidean_dense_dense(& X[i, 0 ], & centers[j, 0 ], n_features)
75
83
return inertias
84
+
85
+
86
+
87
+
88
+
89
+ # Regression and Classification losses, from scikit-learn.
90
+
91
+
92
+
93
+
94
+ # ----------------------------------------
95
+ # Extension Types for Loss Functions
96
+ # ----------------------------------------
97
+
98
+ cdef class LossFunction:
99
+ """ Base class for convex loss functions"""
100
+
101
+ cdef double loss(self , double p, double y) nogil:
102
+ """ Evaluate the loss function.
103
+
104
+ Parameters
105
+ ----------
106
+ p : double
107
+ The prediction, p = w^T x
108
+ y : double
109
+ The true value (aka target)
110
+
111
+ Returns
112
+ -------
113
+ double
114
+ The loss evaluated at `p` and `y`.
115
+ """
116
+ return 0.
117
+
118
+ def py_dloss (self , double p , double y ):
119
+ """ Python version of `dloss` for testing.
120
+
121
+ Pytest needs a python function and can't use cdef functions.
122
+ """
123
+ return self .dloss(p, y)
124
+
125
+ def py_loss (self , double p , double y ):
126
+ """ Python version of `dloss` for testing.
127
+
128
+ Pytest needs a python function and can't use cdef functions.
129
+ """
130
+ return self .loss(p, y)
131
+
132
+
133
+ cdef double dloss(self , double p, double y) nogil:
134
+ """ Evaluate the derivative of the loss function with respect to
135
+ the prediction `p`.
136
+
137
+ Parameters
138
+ ----------
139
+ p : double
140
+ The prediction, p = w^T x
141
+ y : double
142
+ The true value (aka target)
143
+ Returns
144
+ -------
145
+ double
146
+ The derivative of the loss function with regards to `p`.
147
+ """
148
+ return 0.
149
+
150
+
151
+ cdef class Regression(LossFunction):
152
+ """ Base class for loss functions for regression"""
153
+
154
+ cdef double loss(self , double p, double y) nogil:
155
+ return 0.
156
+
157
+ cdef double dloss(self , double p, double y) nogil:
158
+ return 0.
159
+
160
+
161
+ cdef class Classification(LossFunction):
162
+ """ Base class for loss functions for classification"""
163
+
164
+ cdef double loss(self , double p, double y) nogil:
165
+ return 0.
166
+
167
+ cdef double dloss(self , double p, double y) nogil:
168
+ return 0.
169
+
170
+
171
+ cdef class ModifiedHuber(Classification):
172
+ """ Modified Huber loss for binary classification with y in {-1, 1}
173
+
174
+ This is equivalent to quadratically smoothed SVM with gamma = 2.
175
+
176
+ See T. Zhang 'Solving Large Scale Linear Prediction Problems Using
177
+ Stochastic Gradient Descent', ICML'04.
178
+ """
179
+ cdef double loss(self , double p, double y) nogil:
180
+ cdef double z = p * y
181
+ if z >= 1.0 :
182
+ return 0.0
183
+ elif z >= - 1.0 :
184
+ return (1.0 - z) * (1.0 - z)
185
+ else :
186
+ return - 4.0 * z
187
+
188
+ cdef double dloss(self , double p, double y) nogil:
189
+ cdef double z = p * y
190
+ if z >= 1.0 :
191
+ return 0.0
192
+ elif z >= - 1.0 :
193
+ return 2.0 * (1.0 - z) * - y
194
+ else :
195
+ return - 4.0 * y
196
+
197
+ def __reduce__ (self ):
198
+ return ModifiedHuber, ()
199
+
200
+
201
+ cdef class Hinge(Classification):
202
+ """ Hinge loss for binary classification tasks with y in {-1,1}
203
+
204
+ Parameters
205
+ ----------
206
+
207
+ threshold : float > 0.0
208
+ Margin threshold. When threshold=1.0, one gets the loss used by SVM.
209
+ When threshold=0.0, one gets the loss used by the Perceptron.
210
+ """
211
+
212
+ cdef double threshold
213
+
214
+ def __init__ (self , double threshold = 1.0 ):
215
+ self .threshold = threshold
216
+
217
+ cdef double loss(self , double p, double y) nogil:
218
+ cdef double z = p * y
219
+ if z <= self .threshold:
220
+ return self .threshold - z
221
+ return 0.0
222
+
223
+ cdef double dloss(self , double p, double y) nogil:
224
+ cdef double z = p * y
225
+ if z <= self .threshold:
226
+ return - y
227
+ return 0.0
228
+
229
+ def __reduce__ (self ):
230
+ return Hinge, (self .threshold,)
231
+
232
+
233
+ cdef class SquaredHinge(Classification):
234
+ """ Squared Hinge loss for binary classification tasks with y in {-1,1}
235
+
236
+ Parameters
237
+ ----------
238
+
239
+ threshold : float > 0.0
240
+ Margin threshold. When threshold=1.0, one gets the loss used by
241
+ (quadratically penalized) SVM.
242
+ """
243
+
244
+ cdef double threshold
245
+
246
+ def __init__ (self , double threshold = 1.0 ):
247
+ self .threshold = threshold
248
+
249
+ cdef double loss(self , double p, double y) nogil:
250
+ cdef double z = self .threshold - p * y
251
+ if z > 0 :
252
+ return z * z
253
+ return 0.0
254
+
255
+ cdef double dloss(self , double p, double y) nogil:
256
+ cdef double z = self .threshold - p * y
257
+ if z > 0 :
258
+ return - 2 * y * z
259
+ return 0.0
260
+
261
+ def __reduce__ (self ):
262
+ return SquaredHinge, (self .threshold,)
263
+
264
+
265
+ cdef class Log(Classification):
266
+ """ Logistic regression loss for binary classification with y in {-1, 1}"""
267
+
268
+ cdef double loss(self , double p, double y) nogil:
269
+ cdef double z = p * y
270
+ # approximately equal and saves the computation of the log
271
+ if z > 18 :
272
+ return exp(- z)
273
+ if z < - 18 :
274
+ return - z
275
+ return log(1.0 + exp(- z))
276
+
277
+ cdef double dloss(self , double p, double y) nogil:
278
+ cdef double z = p * y
279
+ # approximately equal and saves the computation of the log
280
+ if z > 18.0 :
281
+ return exp(- z) * - y
282
+ if z < - 18.0 :
283
+ return - y
284
+ return - y / (exp(z) + 1.0 )
285
+
286
+ def __reduce__ (self ):
287
+ return Log, ()
288
+
289
+
290
+ cdef class SquaredLoss(Regression):
291
+ """ Squared loss traditional used in linear regression."""
292
+ cdef double loss(self , double p, double y) nogil:
293
+ return 0.5 * (p - y) * (p - y)
294
+
295
+ cdef double dloss(self , double p, double y) nogil:
296
+ return p - y
297
+
298
+ def __reduce__ (self ):
299
+ return SquaredLoss, ()
300
+
301
+
302
+ cdef class Huber(Regression):
303
+ """ Huber regression loss
304
+
305
+ Variant of the SquaredLoss that is robust to outliers (quadratic near zero,
306
+ linear in for large errors).
307
+
308
+ https://en.wikipedia.org/wiki/Huber_Loss_Function
309
+ """
310
+
311
+ cdef double c
312
+
313
+ def __init__ (self , double c ):
314
+ self .c = c
315
+
316
+ cdef double loss(self , double p, double y) nogil:
317
+ cdef double r = p - y
318
+ cdef double abs_r = fabs(r)
319
+ if abs_r <= self .c:
320
+ return 0.5 * r * r
321
+ else :
322
+ return self .c * abs_r - (0.5 * self .c * self .c)
323
+
324
+ cdef double dloss(self , double p, double y) nogil:
325
+ cdef double r = p - y
326
+ cdef double abs_r = fabs(r)
327
+ if abs_r <= self .c:
328
+ return r
329
+ elif r > 0.0 :
330
+ return self .c
331
+ else :
332
+ return - self .c
333
+
334
+ def __reduce__ (self ):
335
+ return Huber, (self .c,)
0 commit comments