8000 FIX fixes memory leak seen in PyPy in Cython losses (#27670) · scikit-learn/scikit-learn@a5fed0d · GitHub
[go: up one dir, main page]

Skip to content

Commit a5fed0d

Browse files
glemaitrebetatim
andauthored
FIX fixes memory leak seen in PyPy in Cython losses (#27670)
Co-authored-by: Tim Head <betatim@gmail.com>
1 parent 25b71d2 commit a5fed0d

File tree

4 files changed

+48
-77
lines changed

4 files changed

+48
-77
lines changed

doc/whats_new/v1.4.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ random sampling procedures.
3535
solvers (when fit on the same data again). The amount of change depends on the
3636
specified `tol`, for small values you will get more precise results.
3737

38+
- |Fix| fixes a memory leak seen in PyPy for estimators using the Cython loss functions.
39+
:pr:`27670` by :user:`Guillaume Lemaitre <glemaitre>`.
40+
3841
Changes impacting all modules
3942
-----------------------------
4043

sklearn/_loss/_loss.pyx.tp

Lines changed: 11 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -870,7 +870,9 @@ cdef class CyLossFunction:
870870
floating_out[::1] loss_out, # OUT
871871
int n_threads=1
872872
):
873-
"""Compute the pointwise loss value for each input.
873+
"""Compute the point-wise loss value for each input.
874+
875+
The point-wise loss is written to `loss_out` and no array is returned.
874876

875877
Parameters
876878
----------
@@ -884,11 +886,6 @@ cdef class CyLossFunction:
884886
A location into which the result is stored.
885887
n_threads : int
886888
Number of threads used by OpenMP (if any).
887-
888-
Returns
889-
-------
890-
loss : array of shape (n_samples,)
891-
Element-wise loss function.
892889
"""
893890
pass
894891

@@ -902,6 +899,8 @@ cdef class CyLossFunction:
902899
):
903900
"""Compute gradient of loss w.r.t raw_prediction for each input.
904901

902+
The gradient is written to `gradient_out` and no array is returned.
903+
905904
Parameters
906905
----------
907906
y_true : array of shape (n_samples,)
@@ -914,11 +913,6 @@ cdef class CyLossFunction:
914913
A location into which the result is stored.
915914
n_threads : int
916915
Number of threads used by OpenMP (if any).
917-
918-
Returns
919-
-------
920-
gradient : array of shape (n_samples,)
921-
Element-wise gradients.
922916
"""
923917
pass
924918

@@ -933,6 +927,9 @@ cdef class CyLossFunction:
933927
):
934928
"""Compute loss and gradient of loss w.r.t raw_prediction.
935929

930+
The loss and gradient are written to `loss_out` and `gradient_out` and no arrays
931+
are returned.
932+
936933
Parameters
937934
----------
938935
y_true : array of shape (n_samples,)
@@ -947,18 +944,9 @@ cdef class CyLossFunction:
947944
A location into which the gradient is stored.
948945
n_threads : int
949946
Number of threads used by OpenMP (if any).
950-
951-
Returns
952-
-------
953-
loss : array of shape (n_samples,)
954-
Element-wise loss function.
955-
956-
gradient : array of shape (n_samples,)
957-
Element-wise gradients.
958947
"""
959948
self.loss(y_true, raw_prediction, sample_weight, loss_out, n_threads)
960949
self.gradient(y_true, raw_prediction, sample_weight, gradient_out, n_threads)
961-
return np.asarray(loss_out), np.asarray(gradient_out)
962950

963951
def gradient_hessian(
964952
self,
@@ -971,6 +959,9 @@ cdef class CyLossFunction:
971959
):
972960
"""Compute gradient and hessian of loss w.r.t raw_prediction.
973961

962+
The gradient and hessian are written to `gradient_out` and `hessian_out` and no
963+
arrays are returned.
964+
974965
Parameters
975966
----------
976967
y_true : array of shape (n_samples,)
@@ -985,14 +976,6 @@ cdef class CyLossFunction:
985976
A location into which the hessian is stored.
986977
n_threads : int
987978
Number of threads used by OpenMP (if any).
988-
989-
Returns
990-
-------
991-
gradient : array of shape (n_samples,)
992-
Element-wise gradients.
993-
994-
hessian : array of shape (n_samples,)
995-
Element-wise hessians.
996979
"""
997980
pass
998981

@@ -1045,8 +1028,6 @@ cdef class {{name}}(CyLossFunction):
10451028
):
10461029
loss_out[i] = sample_weight[i] * {{closs}}(y_true[i], raw_prediction[i]{{with_param}})
10471030

1048-
return np.asarray(loss_out)
1049-
10501031
{{if closs_grad is not None}}
10511032
def loss_gradient(
10521033
self,
@@ -1077,7 +1058,6 @@ cdef class {{name}}(CyLossFunction):
10771058
loss_out[i] = sample_weight[i] * dbl2.val1
10781059
gradient_out[i] = sample_weight[i] * dbl2.val2
10791060

1080-
return np.asarray(loss_out), np.asarray(gradient_out)
10811061
{{endif}}
10821062

10831063
def gradient(
@@ -1103,8 +1083,6 @@ cdef class {{name}}(CyLossFunction):
11031083
):
11041084
gradient_out[i] = sample_weight[i] * {{cgrad}}(y_true[i], raw_prediction[i]{{with_param}})
11051085

1106-
return np.asarray(gradient_out)
1107-
11081086
def gradient_hessian(
11091087
self,
11101088
const floating_in[::1] y_true, # IN
@@ -1134,8 +1112,6 @@ cdef class {{name}}(CyLossFunction):
11341112
gradient_out[i] = sample_weight[i] * dbl2.val1
11351113
hessian_out[i] = sample_weight[i] * dbl2.val2
11361114

1137-
return np.asarray(gradient_out), np.asarray(hessian_out)
1138-
11391115
{{endfor}}
11401116

11411117

@@ -1216,8 +1192,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
12161192

12171193
free(p)
12181194

1219-
return np.asarray(loss_out)
1220-
12211195
def loss_gradient(
12221196
self,
12231197
const floating_in[::1] y_true, # IN
@@ -1278,8 +1252,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
12781252

12791253
free(p)
12801254

1281-
return np.asarray(loss_out), np.asarray(gradient_out)
1282-
12831255
def gradient(
12841256
self,
12851257
const floating_in[::1] y_true, # IN
@@ -1327,8 +1299,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
13271299

13281300
free(p)
13291301

1330-
return np.asarray(gradient_out)
1331-
13321302
def gradient_hessian(
13331303
self,
13341304
const floating_in[::1] y_true, # IN
@@ -1381,9 +1351,6 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
13811351

13821352
free(p)
13831353

1384-
return np.asarray(gradient_out), np.asarray(hessian_out)
1385-
1386-
13871354
# This method simplifies the implementation of hessp in linear models,
13881355
# i.e. the matrix-vector product of the full hessian, not only of the
13891356
# diagonal (in the classes) approximation as implemented above.
@@ -1434,5 +1401,3 @@ cdef class CyHalfMultinomialLoss(CyLossFunction):
14341401
gradient_out[i, k] = (proba_out[i, k] - (y_true[i] == k)) * sample_weight[i]
14351402

14361403
free(p)
1437-
1438-
return np.asarray(gradient_out), np.asarray(proba_out)

sklearn/_loss/loss.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,13 +189,14 @@ def loss(
189189
if raw_prediction.ndim == 2 and raw_prediction.shape[1] == 1:
190190
raw_prediction = raw_prediction.squeeze(1)
191191

192-
return self.closs.loss(
192+
self.closs.loss(
193193
y_true=y_true,
194194
raw_prediction=raw_prediction,
195195
sample_weight=sample_weight,
196196
loss_out=loss_out,
197197
n_threads=n_threads,
198198
)
199+
return loss_out
199200

200201
def loss_gradient(
201202
self,
@@ -250,14 +251,15 @@ def loss_gradient(
250251
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
251252
gradient_out = gradient_out.squeeze(1)
252253

253-
return self.closs.loss_gradient(
254+
self.closs.loss_gradient(
254255
y_true=y_true,
255256
raw_prediction=raw_prediction,
256257
sample_weight=sample_weight,
257258
loss_out=loss_out,
258259
gradient_out=gradient_out,
259260
n_threads=n_threads,
260261
)
262+
return loss_out, gradient_out
261263

262264
def gradient(
263265
self,
@@ -299,13 +301,14 @@ def gradient(
299301
if gradient_out.ndim == 2 and gradient_out.shape[1] == 1:
300302
gradient_out = gradient_out.squeeze(1)
301303

302-
return self.closs.gradient(
304+
self.closs.gradient(
303305
y_true=y_true,
304306
raw_prediction=raw_prediction,
305307
sample_weight=sample_weight,
306308
gradient_out=gradient_out,
307309
n_threads=n_threads,
308310
)
311+
return gradient_out
309312

310313
def gradient_hessian(
311314
self,
@@ -363,14 +366,15 @@ def gradient_hessian(
363366
if hessian_out.ndim == 2 and hessian_out.shape[1] == 1:
364367
hessian_out = hessian_out.squeeze(1)
365368

366-
return self.closs.gradient_hessian(
369+
self.closs.gradient_hessian(
367370
y_true=y_true,
368371
raw_prediction=raw_prediction,
369372
sample_weight=sample_weight,
370373
gradient_out=gradient_out,
371374
hessian_out=hessian_out,
372375
n_threads=n_threads,
373376
)
377+
return gradient_out, hessian_out
374378

375379
def __call__(self, y_true, raw_prediction, sample_weight=None, n_threads=1):
376380
"""Compute the weighted average loss.
@@ -1075,14 +1079,15 @@ def gradient_proba(
10751079
elif proba_out is None:
10761080
proba_out = np.empty_like(gradient_out)
10771081

1078-
return self.closs.gradient_proba(
1082+
self.closs.gradient_proba(
10791083
y_true=y_true,
10801084
raw_prediction=raw_prediction,
10811085
sample_weight=sample_weight,
10821086
gradient_out=gradient_out,
10831087
proba_out=proba_out,
10841088
n_threads=n_threads,
10851089
)
1090+
return gradient_out, proba_out
10861091

10871092

10881093
class ExponentialLoss(BaseLoss):

sklearn/_loss/tests/test_loss.py

Lines changed: 24 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -383,34 +383,32 @@ def test_loss_same_as_C_functions(loss, sample_weight):
383383
out_g2 = np.empty_like(raw_prediction)
384384
out_h1 = np.empty_like(raw_prediction)
385385
out_h2 = np.empty_like(raw_prediction)
386-
assert_allclose(
387-
loss.loss(
388-
y_true=y_true,
389-
raw_prediction=raw_prediction,
390-
sample_weight=sample_weight,
391-
loss_out=out_l1,
392-
),
393-
loss.closs.loss(
394-
y_true=y_true,
395-
raw_prediction=raw_prediction,
396-
sample_weight=sample_weight,
397-
loss_out=out_l2,
398-
),
386+
loss.loss(
387+
y_true=y_true,
388+
raw_prediction=raw_prediction,
389+
sample_weight=sample_weight,
390+
loss_out=out_l1,
399391
)
400-
assert_allclose(
401-
loss.gradient(
402-
y_true=y_true,
403-
raw_prediction=raw_prediction,
404-
sample_weight=sample_weight,
405-
gradient_out=out_g1,
406-
),
407-
loss.closs.gradient(
408-
y_true=y_true,
409-
raw_prediction=raw_prediction,
410-
sample_weight=sample_weight,
411-
gradient_out=out_g2,
412-
),
392+
loss.closs.loss(
393+
y_true=y_true,
394+
raw_prediction=raw_prediction,
395+
sample_weight=sample_weight,
396+
loss_out=out_l2,
397+
),
398+
assert_allclose(out_l1, out_l2)
399+
loss.gradient(
400+
y_true=y_true,
401+
raw_prediction=raw_prediction,
402+
sample_weight=sample_weight,
403+
gradient_out=out_g1,
404+
)
405+
loss.closs.gradient(
406+
y_true=y_true,
407+
raw_prediction=raw_prediction,
408+
sample_weight=sample_weight,
409+
gradient_out=out_g2,
413410
)
411+
assert_allclose(out_g1, out_g2)
414412
loss.closs.loss_gradient(
415413
y_true=y_true,
416414
raw_prediction=raw_prediction,

0 commit comments

Comments
 (0)
0