@@ -593,11 +593,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
593
593
sample_weight = np .ones (X .shape [0 ])
594
594
595
595
# If class_weights is a dict (provided by the user), the weights
596
- # are assigned to the original labels. If it is "auto ", then
596
+ # are assigned to the original labels. If it is "balanced ", then
597
597
# the class_weights are assigned after masking the labels with a OvR.
598
598
le = LabelEncoder ()
599
599
600
- if isinstance (class_weight , dict ):
600
+ if isinstance (class_weight , dict ) or multi_class == 'multinomial' :
601
601
if solver == "liblinear" :
602
602
if classes .size == 2 :
603
603
# Reconstruct the weights with keys 1 and -1
@@ -609,7 +609,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
609
609
"solver cannot handle multiclass with "
610
610
"class_weight of type dict. Use the lbfgs, "
611
611
"newton-cg or sag solvers or set "
612
- "class_weight='auto '" )
612
+ "class_weight='balanced '" )
613
613
else :
614
614
class_weight_ = compute_class_weight (class_weight , classes , y )
615
615
sample_weight *= class_weight_ [le .fit_transform (y )]
@@ -622,20 +622,21 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
622
622
mask = (y == pos_class )
623
623
y_bin = np .ones (y .shape , dtype = np .float64 )
624
624
y_bin [~ mask ] = - 1.
625
+ # for compute_class_weight
626
+
627
+ # 'auto' is deprecated and will be removed in 0.19
628
+ if class_weight in ("auto" , "balanced" ):
629
+ class_weight_ = compute_class_weight (class_weight , mask_classes ,
630
+ y_bin )
631
+ sample_weight *= class_weight_ [le .fit_transform (y_bin )]
625
632
626
633
else :
627
634
lbin = LabelBinarizer ()
628
- Y_bin = lbin .fit_transform (y )
629
- if Y_bin .shape [1 ] == 1 :
630
- Y_bin = np .hstack ([1 - Y_bin , Y_bin ])
631
- w0 = np .zeros ((Y_bin .shape [1 ], n_features + int (fit_intercept )),
635
+ Y_binarized = lbin .fit_transform (y )
636
+ if Y_binarized .shape [1 ] == 1 :
637
+ Y_binarized = np .hstack ([1 - Y_binarized , Y_binarized ])
638
+ w0 = np .zeros ((Y_binarized .shape [1 ], n_features + int (fit_intercept )),
632
639
order = 'F' )
633
- mask_classes = classes
634
-
635
- if class_weight == "auto" :
636
- class_weight_ = compute_class_weight (class_weight , mask_classes ,
637
- y_bin )
638
- sample_weight *= class_weight_ [le .fit_transform (y_bin )]
639
640
640
641
if coef is not None :
641
642
# it must work both giving the bias term and not
@@ -664,7 +665,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
664
665
if multi_class == 'multinomial' :
665
666
# fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
666
667
w0 = w0 .ravel ()
667
- target = Y_bin
668
+ target = Y_binarized
668
669
if solver == 'lbfgs' :
669
670
func = lambda x , * args : _multinomial_loss_grad (x , * args )[0 :2 ]
670
671
elif solver == 'newton-cg' :
@@ -1534,9 +1535,18 @@ def fit(self, X, y, sample_weight=None):
1534
1535
if self .class_weight and not (isinstance (self .class_weight , dict ) or
1535
1536
self .class_weight in
1536
1537
['balanced' , 'auto' ]):
1538
+ # 'auto' is deprecated and will be removed in 0.19
1537
1539
raise ValueError ("class_weight provided should be a "
1538
1540
"dict or 'balanced'" )
1539
1541
1542
+ # compute the class weights for the entire dataset y
1543
+ if self .class_weight in ("auto" , "balanced" ):
1544
+ classes = np .unique (y )
1545
+ class_weight = compute_class_weight (self .class_weight , classes , y )
1546
+ class_weight = dict (zip (classes , class_weight ))
1547
+ else :
1548
+ class_weight = self .class_weight
1549
+
1540
1550
path_func = delayed (_log_reg_scoring_path )
1541
1551
1542
1552
# The SAG solver releases the GIL so it's more efficient to use
@@ -1548,7 +1558,7 @@ def fit(self, X, y, sample_weight=None):
1548
1558
fit_intercept = self .fit_intercept , penalty = self .penalty ,
1549
1559
dual = self .dual , solver = self .solver , tol = self .tol ,
1550
1560
max_iter = self .max_iter , verbose = self .verbose ,
1551
- class_weight = self . class_weight , scoring = self .scoring ,
1561
+ class_weight = class_weight , scoring = self .scoring ,
1552
1562
multi_class = self .multi_class ,
1553
1563
intercept_scaling = self .intercept_scaling ,
1554
1564
random_state = self .random_state ,
@@ -1620,7 +1630,7 @@ def fit(self, X, y, sample_weight=None):
1620
1630
fit_intercept = self .fit_intercept , coef = coef_init ,
1621
1631
max_iter = self .max_iter , tol = self .tol ,
1622
1632
penalty = self .penalty , copy = False ,
1623
- class_weight = self . class_weight ,
1633
+ class_weight = class_weight ,
1624
1634
multi_class = self .multi_class ,
1625
1635
verbose = max (0 , self .verbose - 1 ),
1626
1636
random_state = self .random_state ,
0 commit comments