@@ -41,7 +41,9 @@ class calls the ``fit`` method of each sub-estimator on random samples
41
41
42
42
from __future__ import division
43
43
44
+ import warnings
44
45
from warnings import warn
46
+
45
47
from abc import ABCMeta , abstractmethod
46
48
47
49
import numpy as np
@@ -89,6 +91,10 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
89
91
curr_sample_weight *= sample_counts
90
92
91
93
if class_weight == 'subsample' :
94
+ with warnings .catch_warnings ():
95
+ warnings .simplefilter ('ignore' , DeprecationWarning )
96
+ curr_sample_weight *= compute_sample_weight ('auto' , y , indices )
97
+ elif class_weight == 'balanced_subsample' :
92
98
curr_sample_weight *= compute_sample_weight ('balanced' , y , indices )
93
99
94
100
tree .fit (X , y , sample_weight = curr_sample_weight , check_input = False )
@@ -414,30 +420,40 @@ def _validate_y_class_weight(self, y):
414
420
self .n_classes_ .append (classes_k .shape [0 ])
415
421
416
422
if self .class_weight is not None :
417
- valid_presets = ('auto' , 'balanced' , 'subsample' , 'auto' )
423
+ valid_presets = ('auto' , 'balanced' , 'balanced_subsample' , ' subsample' , 'auto' )
418
424
if isinstance (self .class_weight , six .string_types ):
419
425
if self .class_weight not in valid_presets :
420
426
raise ValueError ('Valid presets for class_weight include '
421
- '"balanced" and "subsample ". Given "%s".'
427
+ '"balanced" and "balanced_subsample ". Given "%s".'
422
428
% self .class_weight )
429
+ if self .class_weight == "subsample" :
430
+ warn ("class_weight='subsample' is deprecated and will be removed in 0.18."
431
+ " It was replaced by class_weight='balanced_subsample' "
432
+ "using the balanced strategy." , DeprecationWarning )
423
433
if self .warm_start :
424
- warn ('class_weight presets "balanced" or "subsample " are '
434
+ warn ('class_weight presets "balanced" or "balanced_subsample " are '
425
435
'not recommended for warm_start if the fitted data '
426
436
'differs from the full dataset. In order to use '
427
- '"auto " weights, use compute_class_weight("balanced", '
437
+ '"balanced " weights, use compute_class_weight("balanced", '
428
438
'classes, y). In place of y you can use a large '
429
439
'enough sample of the full training set target to '
430
440
'properly estimate the class frequency '
431
441
'distributions. Pass the resulting weights as the '
432
442
'class_weight parameter.' )
433
443
434
- if self .class_weight != 'subsample' or not self .bootstrap :
444
+ if (self .class_weight not in ['subsample' , 'balanced_subsample' ] or
445
+ not self .bootstrap ):
435
446
if self .class_weight == 'subsample' :
436
- class_weight = 'balanced'
447
+ class_weight = 'auto'
448
+ elif self .class_weight == "balanced_subsample" :
449
+ class_weight = "balanced"
437
450
else :
438
451
class_weight = self .class_weight
439
- expanded_class_weight = compute_sample_weight (class_weight ,
440
- y_original )
452
+ with warnings .catch_warnings ():
453
+ if class_weight == "auto" :
454
+ warnings .simplefilter ('ignore' , DeprecationWarning )
455
+ expanded_class_weight = compute_sample_weight (class_weight ,
456
+ y_original )
441
457
442
458
return y , expanded_class_weight
443
459
@@ -758,7 +774,7 @@ class RandomForestClassifier(ForestClassifier):
758
774
and add more estimators to the ensemble, otherwise, just fit a whole
759
775
new forest.
760
776
761
- class_weight : dict, list of dicts, "balanced", "subsample " or None, optional
777
+ class_weight : dict, list of dicts, "balanced", "balanced_subsample " or None, optional
762
778
763
779
Weights associated with classes in the form ``{class_label: weight}``.
764
780
If not given, all classes are supposed to have weight one. For
@@ -769,7 +785,7 @@ class RandomForestClassifier(ForestClassifier):
769
785
weights inversely proportional to class frequencies in the input data
770
786
as ``n_samples / (n_classes * np.bincount(y))``
771
787
772
- The "subsample " mode is the same as "balanced" except that weights are
788
+ The "balanced_subsample " mode is the same as "balanced" except that weights are
773
789
computed based on the bootstrap sample for every tree grown.
774
790
775
791
For multi-output, the weights of each column of y will be multiplied.
@@ -1101,7 +1117,7 @@ class ExtraTreesClassifier(ForestClassifier):
1101
1117
and add more estimators to the ensemble, otherwise, just fit a whole
1102
1118
new forest.
1103
1119
1104
- class_weight : dict, list of dicts, "balanced", "subsample " or None, optional
1120
+ class_weight : dict, list of dicts, "balanced", "balanced_subsample " or None, optional
1105
1121
1106
1122
Weights associated with classes in the form ``{class_label: weight}``.
1107
1123
If not given, all classes are supposed to have weight one. For
@@ -1112,7 +1128,7 @@ class ExtraTreesClassifier(ForestClassifier):
1112
1128
weights inversely proportional to class frequencies in the input data
1113
1129
as ``n_samples / (n_classes * np.bincount(y))``
1114
1130
1115
- The "subsample " mode is the same as "balanced" except that weights are
1131
+ The "balanced_subsample " mode is the same as "balanced" except that weights are
1116
1132
computed based on the bootstrap sample for every tree grown.
1117
1133
1118
1134
For multi-output, the weights of each column of y will be multiplied.
0 commit comments