@@ -87,24 +87,27 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
87
87
sample_counts = np .bincount (indices , minlength = n_samples )
88
88
curr_sample_weight *= sample_counts
89
89
90
- if class_weight == 'bootstrap' :
90
+ if class_weight == 'subsample' :
91
+
91
92
expanded_class_weight = [curr_sample_weight ]
93
+
92
94
for k in range (y .shape [1 ]):
93
95
y_full = y [:, k ]
94
96
classes_full = np .unique (y_full )
95
- y_boot = y_full [indices ]
97
+ y_boot = y [indices , k ]
96
98
classes_boot = np .unique (y_boot )
97
- # Get class weights for the bootstrap sample
98
- weight_k = compute_class_weight ( 'auto' , classes_boot , y_boot )
99
- # Expand class weights to cover all classes in original y
100
- # (in case some were missing from the bootstrap sample)
101
- weight_k = np .array ([ weight_k [ np . where ( classes_boot == c )][ 0 ]
102
- if c in classes_boot
103
- else 0.
104
- for c in classes_full ])
99
+
100
+ # Get class weights for the bootstrap sample, covering all
101
+ # classes in case some were missing from the bootstrap sample
102
+ weight_k = np . choose (
103
+ np .searchsorted ( classes_boot , classes_full ),
104
+ compute_class_weight ( 'auto' , classes_boot , y_boot ),
105
+ mode = 'clip' )
106
+
105
107
# Expand weights over the original y for this output
106
108
weight_k = weight_k [np .searchsorted (classes_full , y_full )]
107
109
expanded_class_weight .append (weight_k )
110
+
108
111
# Multiply all weights by sample & bootstrap weights
109
112
curr_sample_weight = np .prod (expanded_class_weight ,
110
113
axis = 0 ,
@@ -243,7 +246,7 @@ def fit(self, X, y, sample_weight=None):
243
246
244
247
if expanded_class_weight is not None :
245
248
if sample_weight is not None :
246
- sample_weight = np . copy ( sample_weight ) * expanded_class_weight
249
+ sample_weight = sample_weight * expanded_class_weight
247
250
else :
248
251
sample_weight = expanded_class_weight
249
252
@@ -428,14 +431,14 @@ def _validate_y_class_weight(self, y):
428
431
self .n_classes_ .append (classes_k .shape [0 ])
429
432
430
433
if self .class_weight is not None :
431
- valid_presets = [ 'auto' , 'bootstrap' ]
434
+ valid_presets = ( 'auto' , 'subsample' )
432
435
if isinstance (self .class_weight , six .string_types ):
433
436
if self .class_weight not in valid_presets :
434
437
raise ValueError ('Valid presets for class_weight include '
435
- '"auto" and "bootstrap ". Given "%s".'
438
+ '"auto" and "subsample ". Given "%s".'
436
439
% self .class_weight )
437
440
if self .warm_start :
438
- warn ('class_weight presets "auto" or "bootstrap " are '
441
+ warn ('class_weight presets "auto" or "subsample " are '
439
442
'not recommended for warm_start if the fitted data '
440
443
'differs from the full dataset. In order to use '
441
444
'"auto" weights, use compute_class_weight("auto", '
@@ -453,7 +456,7 @@ def _validate_y_class_weight(self, y):
453
456
"in class_weight should match number of "
454
457
"outputs." )
455
458
456
- if self .class_weight != 'bootstrap ' or not self .bootstrap :
459
+ if self .class_weight != 'subsample ' or not self .bootstrap :
457
460
expanded_class_weight = []
458
461
for k in range (self .n_outputs_ ):
459
462
if self .class_weight in valid_presets :
@@ -797,7 +800,7 @@ class RandomForestClassifier(ForestClassifier):
797
800
and add more estimators to the ensemble, otherwise, just fit a whole
798
801
new forest.
799
802
800
- class_weight : dict, list of dicts, "auto", "bootstrap " or None, optional
803
+ class_weight : dict, list of dicts, "auto", "subsample " or None, optional
801
804
802
805
Weights associated with classes in the form ``{class_label: weight}``.
803
806
If not given, all classes are supposed to have weight one. For
@@ -807,7 +810,7 @@ class RandomForestClassifier(ForestClassifier):
807
810
The "auto" mode uses the values of y to automatically adjust
808
811
weights inversely proportional to class frequencies in the input data.
809
812
810
- The "bootstrap " mode is the same as "auto" except that weights are
813
+ The "subsample " mode is the same as "auto" except that weights are
811
814
computed based on the bootstrap sample for every tree grown.
812
815
813
816
For multi-output, the weights of each column of y will be multiplied.
@@ -1127,7 +1130,7 @@ class ExtraTreesClassifier(ForestClassifier):
1127
1130
and add more estimators to the ensemble, otherwise, just fit a whole
1128
1131
new forest.
1129
1132
1130
- class_weight : dict, list of dicts, "auto", "bootstrap " or None, optional
1133
+ class_weight : dict, list of dicts, "auto", "subsample " or None, optional
1131
1134
1132
1135
Weights associated with classes in the form ``{class_label: weight}``.
1133
1136
If not given, all classes are supposed to have weight one. For
@@ -1137,7 +1140,7 @@ class ExtraTreesClassifier(ForestClassifier):
1137
1140
The "auto" mode uses the values of y to automatically adjust
1138
1141
weights inversely proportional to class frequencies in the input data.
1139
1142
1140
- The "bootstrap " mode is the same as "auto" except that weights are
1143
+ The "subsample " mode is the same as "auto" except that weights are
1141
1144
computed based on the bootstrap sample for every tree grown.
1142
1145
1143
1146
For multi-output, the weights of each column of y will be multiplied.
0 commit comments