@@ -59,6 +59,12 @@ from ..utils.seq_dataset cimport SequentialDataset32, SequentialDataset64
59
59
60
60
from libc.stdio cimport printf
61
61
62
+ cdef void raise_infinite_error(int n_iter):
63
+ raise ValueError("Floating-point under-/overflow occurred at "
64
+ "epoch #%d. Lowering the step_size or "
65
+ "scaling the input data with StandardScaler "
66
+ "or MinMaxScaler might help." % (n_iter + 1))
67
+
62
68
63
69
64
70
{{for name, c_type, np_type in get_dispatch(dtypes)}}
@@ -343,9 +349,6 @@ def sag{{name}}(SequentialDataset{{name}} dataset,
343
349
# the scalar used for multiplying z
344
350
cdef {{c_type}} wscale = 1.0
345
351
346
- # return value (-1 if an error occurred, 0 otherwise)
347
- cdef int status = 0
348
-
349
352
# the cumulative sums for each iteration for the sparse implementation
350
353
cumulative_sums[0] = 0.0
351
354
@@ -399,19 +402,16 @@ def sag{{name}}(SequentialDataset{{name}} dataset,
399
402
400
403
# make the weight updates
401
404
if sample_itr > 0:
402
- status = lagged_update{{name}}(weights, wscale, xnnz,
403
- n_samples, n_classes,
404
- sample_itr,
405
- cumulative_sums,
406
- cumulative_sums_prox,
407
- feature_hist,
408
- prox,
409
- sum_gradient,
410
- x_ind_ptr,
411
- False,
412
- n_iter)
413
- if status == -1:
414
- break
405
+ lagged_update{{name}}(weights, wscale, xnnz,
406
+ n_samples, n_classes, sample_itr,
407
+ cumulative_sums,
408
+ cumulative_sums_prox,
409
+ feature_hist,
410
+ prox,
411
+ sum_gradient,
412
+ x_ind_ptr,
413
+ False,
414
+ n_iter)
415
415
416
416
# find the current prediction
417
417
predict_sample{{name}}(x_data_ptr, x_ind_ptr, xnnz, weights, wscale,
@@ -460,12 +460,8 @@ def sag{{name}}(SequentialDataset{{name}} dataset,
460
460
461
461
# check to see that the intercept is not inf or NaN
462
462
if not skl_isfinite{{name}}(intercept[class_ind]):
463
- status = -1
464
- break
465
- # Break from the n_samples outer loop if an error happened
466
- # in the fit_intercept n_classes inner loop
467
- if status == -1:
468
- break
463
+ with gil:
464
+ raise_infinite_error(n_iter)
469
465
470
466
# update the gradient memory for this sample
471
467
for class_ind in range(n_classes):
@@ -488,32 +484,21 @@ def sag{{name}}(SequentialDataset{{name}} dataset,
488
484
if verbose:
489
485
with gil:
490
486
print("rescaling...")
491
- status = scale_weights{{name}}(
492
- weights, & wscale, n_features, n_samples, n_classes,
487
+ wscale = scale_weights{{name}}(
488
+ weights, wscale, n_features, n_samples, n_classes,
493
489
sample_itr, cumulative_sums,
494
490
cumulative_sums_prox,
495
491
feature_hist,
496
492
prox, sum_gradient, n_iter)
497
- if status == -1:
498
- break
499
-
500
- # Break from the n_iter outer loop if an error happened in the
501
- # n_samples inner loop
502
- if status == -1:
503
- break
504
493
505
494
# we scale the weights every n_samples iterations and reset the
506
495
# just-in-time update system for numerical stability.
507
- status = scale_weights{{name}}(weights, &wscale, n_features,
508
- n_samples,
509
- n_classes, n_samples - 1,
510
- cumulative_sums,
511
- cumulative_sums_prox,
512
- feature_hist,
513
- prox, sum_gradient, n_iter)
514
-
515
- if status == -1:
516
- break
496
+ wscale = scale_weights{{name}}(weights, wscale, n_features, n_samples,
497
+ n_classes, n_samples - 1, cumulative_sums,
498
+ cumulative_sums_prox,
499
+ feature_hist,
500
+ prox, sum_gradient, n_iter)
501
+
517
502
# check if the stopping criteria is reached
518
503
max_change = 0.0
519
504
max_weight = 0.0
@@ -535,13 +520,6 @@ def sag{{name}}(SequentialDataset{{name}} dataset,
535
520
printf('Epoch %d, change: %.8f\n', n_iter + 1,
536
521
max_change / max_weight)
537
522
n_iter += 1
538
- # We do the error treatment here based on error code in status to avoid
539
- # re-acquiring the GIL within the cython code, which slows the computation
540
- # when the sag/saga solver is used concurrently in multiple Python threads.
541
- if status == -1:
542
- raise ValueError(("Floating-point under-/overflow occurred at epoch"
543
- " #%d. Scaling input data with StandardScaler or"
544
- " MinMaxScaler might help.") % n_iter)
545
523
546
524
if verbose and n_iter >= max_iter:
547
525
end_time = time(NULL)
@@ -555,15 +533,14 @@ def sag{{name}}(SequentialDataset{{name}} dataset,
555
533
556
534
{{for name, c_type, np_type in get_dispatch(dtypes)}}
557
535
558
- cdef int scale_weights{{name}}({{c_type}}* weights, {{c_type}}* wscale,
559
- int n_features,
560
- int n_samples, int n_classes, int sample_itr,
561
- {{c_type}}* cumulative_sums,
562
- {{c_type}}* cumulative_sums_prox,
563
- int* feature_hist,
564
- bint prox,
565
- {{c_type}}* sum_gradient,
566
- int n_iter) nogil:
536
+ cdef {{c_type}} scale_weights{{name}}({{c_type}}* weights, {{c_type}} wscale, int n_features,
537
+ int n_samples, int n_classes, int sample_itr,
538
+ {{c_type}}* cumulative_sums,
539
+ {{c_type}}* cumulative_sums_prox,
540
+ int* feature_hist,
541
+ bint prox,
542
+ {{c_type}}* sum_gradient,
543
+ int n_iter) nogil:
567
544
"""Scale the weights with wscale for numerical stability.
568
545
569
546
wscale = (1 - step_size * alpha) ** (n_iter * n_samples + sample_itr)
@@ -573,37 +550,34 @@ cdef int scale_weights{{name}}({{c_type}}* weights, {{c_type}}* wscale,
573
550
This also limits the size of `cumulative_sums`.
574
551
"""
575
552
576
- cdef int status
577
- status = lagged_update{{name}}(weights, wscale[0], n_features,
578
- n_samples, n_classes, sample_itr + 1,
579
- cumulative_sums,
580
- cumulative_sums_prox,
581
- feature_hist,
582
- prox,
583
- sum_gradient,
584
- NULL,
585
- True,
586
- n_iter)
587
- # if lagged update succeeded, reset wscale to 1.0
588
- if status == 0:
589
- wscale[0] = 1.0
590
- return status
553
+ lagged_update{{name}}(weights, wscale, n_features,
554
+ n_samples, n_classes, sample_itr + 1,
555
+ cumulative_sums,
556
+ cumulative_sums_prox,
557
+ feature_hist,
558
+ prox,
559
+ sum_gradient,
560
+ NULL,
561
+ True,
562
+ n_iter)
563
+ # reset wscale to 1.0
564
+ return 1.0
591
565
592
566
{{endfor}}
593
567
594
568
595
569
{{for name, c_type, np_type in get_dispatch(dtypes)}}
596
570
597
- cdef int lagged_update{{name}}({{c_type}}* weights, {{c_type}} wscale, int xnnz,
598
- int n_samples, int n_classes, int sample_itr,
599
- {{c_type}}* cumulative_sums,
600
- {{c_type}}* cumulative_sums_prox,
601
- int* feature_hist,
602
- bint prox,
603
- {{c_type}}* sum_gradient,
604
- int* x_ind_ptr,
605
- bint reset,
606
- int n_iter) nogil:
571
+ cdef void lagged_update{{name}}({{c_type}}* weights, {{c_type}} wscale, int xnnz,
572
+ int n_samples, int n_classes, int sample_itr,
573
+ {{c_type}}* cumulative_sums,
574
+ {{c_type}}* cumulative_sums_prox,
575
+ int* feature_hist,
576
+ bint prox,
577
+ {{c_type}}* sum_gradient,
578
+ int* x_ind_ptr,
579
+ bint reset,
580
+ int n_iter) nogil:
607
581
"""Hard perform the JIT updates for non-zero features of present sample.
608
582
The updates that awaits are kept in memory using cumulative_sums,
609
583
cumulative_sums_prox, wscale and feature_hist. See original SAGA paper
@@ -631,9 +605,8 @@ cdef int lagged_update{{name}}({{c_type}}* weights, {{c_type}} wscale, int xnnz,
631
605
if reset:
632
606
weights[idx] *= wscale
633
607
if not skl_isfinite{{name}}(weights[idx]):
634
- # returning here does not require the gil as the return
635
- # type is a C integer
636
- return -1
608
+ with gil:
609
+ raise_infinite_error(n_iter)
637
610
else:
638
611
for class_ind in range(n_classes):
639
612
idx = f_idx + class_ind
@@ -667,7 +640,8 @@ cdef int lagged_update{{name}}({{c_type}}* weights, {{c_type}} wscale, int xnnz,
667
640
weights[idx] *= wscale
668
641
# check to see that the weight is not inf or NaN
669
642
if not skl_isfinite{{name}}(weights[idx]):
670
- return -1
643
+ with gil:
644
+ raise_infinite_error(n_iter)
671
645
if reset:
672
646
feature_hist[feature_ind] = sample_itr % n_samples
673
647
else:
@@ -678,8 +652,6 @@ cdef int lagged_update{{name}}({{c_type}}* weights, {{c_type}} wscale, int xnnz,
678
652
if prox:
679
653
cumulative_sums_prox[sample_itr - 1] = 0.0
680
654
681
- return 0
682
-
683
655
{{endfor}}
684
656
685
657
0 commit comments