@@ -593,11 +593,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
593
593
sample_weight = np .ones (X .shape [0 ])
594
594
595
595
# If class_weights is a dict (provided by the user), the weights
596
- # are assigned to the original labels. If it is "auto ", then
596
+ # are assigned to the original labels. If it is "balanced ", then
597
597
# the class_weights are assigned after masking the labels with a OvR.
598
598
le = LabelEncoder ()
599
599
600
- if isinstance (class_weight , dict ):
600
+ if isinstance (class_weight , dict ) or multi_class == 'multinomial' :
601
601
if solver == "liblinear" :
602
602
if classes .size == 2 :
603
603
# Reconstruct the weights with keys 1 and -1
@@ -609,7 +609,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
609
609
"solver cannot handle multiclass with "
610
610
"class_weight of type dict. Use the lbfgs, "
611
611
"newton-cg or sag solvers or set "
612
- "class_weight='auto '" )
612
+ "class_weight='balanced '" )
613
613
else :
614
614
class_weight_ = compute_class_weight (class_weight , classes , y )
615
615
sample_weight *= class_weight_ [le .fit_transform (y )]
@@ -622,20 +622,20 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
622
622
mask = (y == pos_class )
623
623
y_bin = np .ones (y .shape , dtype = np .float64 )
624
624
y_bin [~ mask ] = - 1.
625
+ # for compute_class_weight
626
+
627
+ if class_weight in ("auto" , "balanced" ):
628
+ class_weight_ = compute_class_weight (class_weight , mask_classes ,
629
+ y_bin )
630
+ sample_weight *= class_weight_ [le .fit_transform (y_bin )]
625
631
626
632
else :
627
633
lbin = LabelBinarizer ()
628
- Y_bin = lbin .fit_transform (y )
629
- if Y_bin .shape [1 ] == 1 :
630
- Y_bin = np .hstack ([1 - Y_bin , Y_bin ])
631
- w0 = np .zeros ((Y_bin .shape [1 ], n_features + int (fit_intercept )),
634
+ Y_binarized = lbin .fit_transform (y )
635
+ if Y_binarized .shape [1 ] == 1 :
636
+ Y_binarized = np .hstack ([1 - Y_binarized , Y_binarized ])
637
+ w0 = np .zeros ((Y_binarized .shape [1 ], n_features + int (fit_intercept )),
632
638
order = 'F' )
633
- mask_classes = classes
634
-
635
- if class_weight == "auto" :
636
- class_weight_ = compute_class_weight (class_weight , mask_classes ,
637
- y_bin )
638
- sample_weight *= class_weight_ [le .fit_transform (y_bin )]
639
639
640
640
if coef is not None :
641
641
# it must work both giving the bias term and not
@@ -664,7 +664,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
664
664
if multi_class == 'multinomial' :
665
665
# fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
666
666
w0 = w0 .ravel ()
667
- target = Y_bin
667
+ target = Y_binarized
668
668
if solver == 'lbfgs' :
669
669
func = lambda x , * args : _multinomial_loss_grad (x , * args )[0 :2 ]
670
670
elif solver == 'newton-cg' :
@@ -1537,6 +1537,15 @@ def fit(self, X, y, sample_weight=None):
1537
1537
raise ValueError ("class_weight provided should be a "
1538
1538
"dict or 'balanced'" )
1539
1539
1540
+ # compute the class weights for the entire dataset y
1541
+ if self .class_weight in ("auto" , "balanced" ):
1542
+ classes = np .unique (y )
1543
+ class_weight = compute_class_weight (self .class_weight , classes , y )
1544
+ class_weight = dict (zip (classes , class_weight ))
1545
+ else :
1546
+ class_weight = self .class_weight
1547
+
1548
+
1540
1549
path_func = delayed (_log_reg_scoring_path )
1541
1550
1542
1551
# The SAG solver releases the GIL so it's more efficient to use
@@ -1548,7 +1557,7 @@ def fit(self, X, y, sample_weight=None):
1548
1557
fit_intercept = self .fit_intercept , penalty = self .penalty ,
1549
1558
dual = self .dual , solver = self .solver , tol = self .tol ,
1550
1559
max_iter = self .max_iter , verbose = self .verbose ,
1551
- class_weight = self . class_weight , scoring = self .scoring ,
1560
+ class_weight = class_weight , scoring = self .scoring ,
1552
1561
multi_class = self .multi_class ,
1553
1562
intercept_scaling = self .intercept_scaling ,
1554
1563
random_state = self .random_state ,
@@ -1620,7 +1629,7 @@ def fit(self, X, y, sample_weight=None):
1620
1629
fit_intercept = self .fit_intercept , coef = coef_init ,
1621
1630
max_iter = self .max_iter , tol = self .tol ,
1622
1631
penalty = self .penalty , copy = False ,
1623
- class_weight = self . class_weight ,
1632
+ class_weight = class_weight ,
1624
1633
multi_class = self .multi_class ,
1625
1634
verbose = max (0 , self .verbose - 1 ),
1626
1635
random_state = self .random_state ,
0 commit comments