@@ -594,11 +594,11 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
594
594
sample_weight = np .ones (X .shape [0 ])
595
595
596
596
# If class_weights is a dict (provided by the user), the weights
10000
597
- # are assigned to the original labels. If it is "auto ", then
597
+ # are assigned to the original labels. If it is "balanced ", then
598
598
# the class_weights are assigned after masking the labels with a OvR.
599
599
le = LabelEncoder ()
600
600
601
- if isinstance (class_weight , dict ):
601
+ if isinstance (class_weight , dict ) or multi_class == 'multinomial' :
602
602
if solver == "liblinear" :
603
603
if classes .size == 2 :
604
604
# Reconstruct the weights with keys 1 and -1
@@ -610,7 +610,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
610
610
"solver cannot handle multiclass with "
611
611
"class_weight of type dict. Use the lbfgs, "
612
612
"newton-cg or sag solvers or set "
613
- "class_weight='auto '" )
613
+ "class_weight='balanced '" )
614
614
else :
615
615
class_weight_ = compute_class_weight (class_weight , classes , y )
616
616
sample_weight *= class_weight_ [le .fit_transform (y )]
@@ -623,20 +623,20 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
623
623
mask = (y == pos_class )
624
624
y_bin = np .ones (y .shape , dtype = np .float64 )
625
625
y_bin [~ mask ] = - 1.
626
+ # for compute_class_weight
627
+
628
+ if class_weight in ("auto" , "balanced" ):
629
+ class_weight_ = compute_class_weight (class_weight , mask_classes ,
630
+ y_bin )
631
+ sample_weight *= class_weight_ [le .fit_transform (y_bin )]
626
632
627
633
else :
628
634
lbin = LabelBinarizer ()
629
- Y_bin = lbin .fit_transform (y )
630
- if Y_bin .shape [1 ] == 1 :
631
- Y_bin = np .hstack ([1 - Y_bin , Y_bin ])
632
- w0 = np .zeros ((Y_bin .shape [1 ], n_features + int (fit_intercept )),
635
+ Y_binarized = lbin .fit_transform (y )
636
+ if Y_binarized .shape [1 ] == 1 :
637
+ Y_binarized = np .hstack ([1 - Y_binarized , Y_binarized ])
638
+ w0 = np .zeros ((Y_binarized .shape [1 ], n_features + int (fit_intercept )),
633
639
order = 'F' )
634
- mask_classes = classes
635
-
636
- if class_weight == "auto" :
637
- class_weight_ = compute_class_weight (class_weight , mask_classes ,
638
- y_bin )
639
- sample_weight *= class_weight_ [le .fit_transform (y_bin )]
640
640
641
641
if coef is not None :
642
642
# it must work both giving the bias term and not
@@ -665,7 +665,7 @@ def logistic_regression_path(X, y, pos_class=None, Cs=10, fit_intercept=True,
665
665
if multi_class == 'multinomial' :
666
666
# fmin_l_bfgs_b and newton-cg accepts only ravelled parameters.
667
667
w0 = w0 .ravel ()
668
- target = Y_bin
668
+ target = Y_binarized
669
669
if solver == 'lbfgs' :
670
670
func = lambda x , * args : _multinomial_loss_grad (x , * args )[0 :2 ]
671
671
elif solver == 'newton-cg' :
@@ -1538,6 +1538,15 @@ def fit(self, X, y, sample_weight=None):
1538
1538
raise ValueError ("class_weight provided should be a "
1539
1539
"dict or 'balanced'" )
1540
1540
1541
+ # compute the class weights for the entire dataset y
1542
+ if self .class_weight in ("auto" , "balanced" ):
1543
+ classes = np .unique (y )
1544
+ class_weight =
341A
compute_class_weight (self .class_weight , classes , y )
1545
+ class_weight = dict (zip (classes , class_weight ))
1546
+ else :
1547
+ class_weight = self .class_weight
1548
+
1549
+
1541
1550
path_func = delayed (_log_reg_scoring_path )
1542
1551
1543
1552
# The SAG solver releases the GIL so it's more efficient to use
@@ -1549,7 +1558,7 @@ def fit(self, X, y, sample_weight=None):
1549
1558
fit_intercept = self .fit_intercept , penalty = self .penalty ,
1550
1559
dual = self .dual , solver = self .solver , tol = self .tol ,
1551
1560
max_iter = self .max_iter , verbose = self .verbose ,
1552
- class_weight = self . class_weight , scoring = self .scoring ,
1561
+ class_weight = class_weight , scoring = self .scoring ,
1553
1562
multi_class = self .multi_class ,
1554
1563
intercept_scaling = self .intercept_scaling ,
1555
1564
random_state = self .random_state ,
@@ -1621,7 +1630,7 @@ def fit(self, X, y, sample_weight=None):
1621
1630
fit_intercept = self .fit_intercept , coef = coef_init ,
1622
1631
max_iter = self .max_iter , tol = self .tol ,
1623
1632
penalty = self .penalty , copy = False ,
1624
- class_weight = self . class_weight ,
1633
+ class_weight = class_weight ,
1625
1634
multi_class = self .multi_class ,
1626
1635
verbose = max (0 , self .verbose - 1 ),
1627
1636
random_state = self .random_state ,
0 commit comments