scikit-learn
diff --git a/‎sklearn/ensemble/forest.py
Lines changed: 8 additions & 50 deletions b/‎sklearn/ensemble/forest.py
Lines changed: 8 additions & 50 deletions
diff --git a/‎sklearn/linear_model/ridge.py
Lines changed: 4 additions & 7 deletions b/‎sklearn/linear_model/ridge.py
Lines changed: 4 additions & 7 deletions
diff --git a/‎sklearn/tree/tree.py
Lines changed: 3 additions & 30 deletions b/‎sklearn/tree/tree.py
Lines changed: 3 additions & 30 deletions
diff --git a/‎sklearn/utils/__init__.py
Lines changed: 2 additions & 2 deletions b/‎sklearn/utils/__init__.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎sklearn/utils/class_weight.py
Lines changed: 103 additions & 0 deletions b/‎sklearn/utils/class_weight.py
Lines changed: 103 additions & 0 deletions
@@ -56,7 +56,7 @@ class calls the ``fit`` method of each sub-estimator on random samples
 from ..tree import (DecisionTreeClassifier, DecisionTreeRegressor,
                     ExtraTreeClassifier, ExtraTreeRegressor)
 from ..tree._tree import DTYPE, DOUBLE
-from ..utils import check_random_state, check_array, compute_class_weight
+from ..utils import check_random_state, check_array, compute_sample_weight
 from ..utils.validation import DataConversionWarning, check_is_fitted
 from .base import BaseEnsemble, _partition_estimators
 from ..utils.fixes import bincount
@@ -89,30 +89,7 @@ def _parallel_build_trees(tree, forest, X, y, sample_weight, tree_idx, n_trees,
         curr_sample_weight *= sample_counts
 
         if class_weight == 'subsample':
-
-            expanded_class_weight = [curr_sample_weight]
-
-            for k in range(y.shape[1]):
-                y_full = y[:, k]
-                classes_full = np.unique(y_full)
-                y_boot = y[indices, k]
-                classes_boot = np.unique(y_boot)
-
-                # Get class weights for the bootstrap sample, covering all
-                # classes in case some were missing from the bootstrap sample
-                weight_k = np.choose(
-                    np.searchsorted(classes_boot, classes_full),
-                    compute_class_weight('auto', classes_boot, y_boot),
-                    mode='clip')
-
-                # Expand weights over the original y for this output
-                weight_k = weight_k[np.searchsorted(classes_full, y_full)]
-                expanded_class_weight.append(weight_k)
-
-            # Multiply all weights by sample & bootstrap weights
-            curr_sample_weight = np.prod(expanded_class_weight,
-                                         axis=0,
-                                         dtype=np.float64)
+            curr_sample_weight *= compute_sample_weight('auto', y, indices)
 
         tree.fit(X, y, sample_weight=curr_sample_weight, check_input=False)
 
@@ -449,33 +426,14 @@ def _validate_y_class_weight(self, y):
                          'properly estimate the class frequency '
                          'distributions. Pass the resulting weights as the '
                          'class_weight parameter.')
-            elif self.n_outputs_ > 1:
-                if not hasattr(self.class_weight, "__iter__"):
-                    raise ValueError("For multi-output, class_weight should "
-                                     "be a list of dicts, or a valid string.")
-                elif len(self.class_weight) != self.n_outputs_:
-                    raise ValueError("For multi-output, number of elements "
-                                     "in class_weight should match number of "
-                                     "outputs.")
 
             if self.class_weight != 'subsample' or not self.bootstrap:
-                expanded_class_weight = []
-                for k in range(self.n_outputs_):
-                    if self.class_weight in valid_presets:
-                        class_weight_k = 'auto'
-                    elif self.n_outputs_ == 1:
-                        class_weight_k = self.class_weight
-                    else:
-                        class_weight_k = self.class_weight[k]
-                    weight_k = compute_class_weight(class_weight_k,
-                                                    self.classes_[k],
-                                                    y_original[:, k])
-                    weight_k = weight_k[np.searchsorted(self.classes_[k],
-                                                        y_original[:, k])]
-                    expanded_class_weight.append(weight_k)
-                expanded_class_weight = np.prod(expanded_class_weight,
-                                                axis=0,
-                                                dtype=np.float64)
+                if self.class_weight == 'subsample':
+                    class_weight = 'auto'
+                else:
+                    class_weight = self.class_weight
+                expanded_class_weight = compute_sample_weight(class_weight,
+                                                              y_original)
 
         return y, expanded_class_weight
 
 
@@ -21,7 +21,7 @@
 from ..base import RegressorMixin
 from ..utils.extmath import safe_sparse_dot
 from ..utils import check_X_y
-from ..utils import compute_class_weight
+from ..utils import compute_sample_weight, compute_class_weight
 from ..utils import column_or_1d
 from ..preprocessing import LabelBinarizer
 from ..grid_search import GridSearchCV
@@ -597,10 +597,8 @@ def fit(self, X, y):
             y = column_or_1d(y, warn=True)
 
         if self.class_weight:
-            cw = compute_class_weight(self.class_weight,
-                                      self.classes_, y)
             # get the class weight corresponding to each sample
-            sample_weight = cw[np.searchsorted(self.classes_, y)]
+            sample_weight = compute_sample_weight(self.class_weight, y)
         else:
             sample_weight = None
 
@@ -1074,10 +1072,9 @@ def fit(self, X, y, sample_weight=None):
         Y = self._label_binarizer.fit_transform(y)
         if not self._label_binarizer.y_type_.startswith('multilabel'):
             y = column_or_1d(y, warn=True)
-        cw = compute_class_weight(self.class_weight,
-                                  self.classes_, Y)
         # modify the sample weights with the corresponding class weight
-        sample_weight *= cw[np.searchsorted(self.classes_, y)]
+        sample_weight = (sample_weight *
+                         compute_sample_weight(self.class_weight, y))
         _BaseRidgeCV.fit(self, X, Y, sample_weight=sample_weight)
         return self
 
 
@@ -25,7 +25,7 @@
 from ..base import BaseEstimator, ClassifierMixin, RegressorMixin
 from ..externals import six
 from ..feature_selection.from_model import _LearntSelectorMixin
-from ..utils import check_array, check_random_state, compute_class_weight
+from ..utils import check_array, check_random_state, compute_sample_weight
 from ..utils.validation import NotFittedError, check_is_fitted
 
 
@@ -172,35 +172,8 @@ def fit(self, X, y, sample_weight=None, check_input=True):
                 self.n_classes_.append(classes_k.shape[0])
 
             if self.class_weight is not None:
-                if isinstance(self.class_weight, six.string_types):
-                    if self.class_weight != "auto":
-                        raise ValueError('The only supported preset for '
-                                         'class_weight is "auto". Given "%s".'
-                                         % self.class_weight)
-                elif self.n_outputs_ > 1:
-                    if not hasattr(self.class_weight, "__iter__"):
-                        raise ValueError('For multi-output, class_weight '
-                                         'should be a list of dicts, or '
-                                         '"auto".')
-                    elif len(self.class_weight) != self.n_outputs_:
-                        raise ValueError("For multi-output, number of "
-                                         "elements in class_weight should "
-                                         "match number of outputs.")
-                expanded_class_weight = []
-                for k in range(self.n_outputs_):
-                    if self.n_outputs_ == 1 or self.class_weight == 'auto':
-                        class_weight_k = self.class_weight
-                    else:
-                        class_weight_k = self.class_weight[k]
-                    weight_k = compute_class_weight(class_weight_k,
-                                                    self.classes_[k],
-                                                    y_original[:, k])
-                    weight_k = weight_k[np.searchsorted(self.classes_[k],
-                                                        y_original[:, k])]
-                    expanded_class_weight.append(weight_k)
-                expanded_class_weight = np.prod(expanded_class_weight,
-                                                axis=0,
-                                                dtype=np.float64)
+                expanded_class_weight = compute_sample_weight(
+                    self.class_weight, y_original)
 
         else:
             self.classes_ = [None] * self.n_outputs_
 
@@ -13,15 +13,15 @@
                          check_random_state, column_or_1d, check_array,
                          check_consistent_length, check_X_y, indexable,
                          check_symmetric)
-from .class_weight import compute_class_weight
+from .class_weight import compute_class_weight, compute_sample_weight
 from ..externals.joblib import cpu_count
 
 
 __all__ = ["murmurhash3_32", "as_float_array",
            "assert_all_finite", "check_array",
            "warn_if_not_float",
            "check_random_state",
-           "compute_class_weight",
+           "compute_class_weight", "compute_sample_weight",
            "column_or_1d", "safe_indexing",
            "check_consistent_length", "check_X_y", 'indexable']
 
 
@@ -3,6 +3,8 @@
 # License: BSD 3 clause
 
 import numpy as np
+from ..externals import six
+from ..utils.fixes import in1d
 
 from .fixes import bincount
 
@@ -61,3 +63,104 @@ def compute_class_weight(class_weight, classes, y):
                 weight[i] = class_weight[c]
 
     return weight
+
+
+def compute_sample_weight(class_weight, y, indices=None):
+    """Estimate sample weights by class for unbalanced datasets.
+
+    Parameters
+    ----------
+    class_weight : dict, list of dicts, "auto", or None, optional
+        Weights associated with classes in the form ``{class_label: weight}``.
+        If not given, all classes are supposed to have weight one. For
+        multi-output problems, a list of dicts can be provided in the same
+        order as the columns of y.
+
+        The "auto" mode uses the values of y to automatically adjust
+        weights inversely proportional to class frequencies in the input data.
+
+        For multi-output, the weights of each column of y will be multiplied.
+
+    y : array-like, shape = [n_samples] or [n_samples, n_outputs]
+        Array of original class labels per sample.
+
+    indices : array-like, shape (n_subsample,), or None
+        Array of indices to be used in a subsample. Can be of length less than
+        n_samples in the case of a subsample, or equal to n_samples in the
+        case of a bootstrap subsample with repeated indices. If None, the
+        sample weight will be calculated over the full sample. Only "auto" is
+        supported for class_weight if this is provided.
+
+    Returns
+    -------
+    sample_weight_vect : ndarray, shape (n_samples,)
+        Array with sample weights as applied to the original y
+    """
+
+    y = np.atleast_1d(y)
+    if y.ndim == 1:
+        y = np.reshape(y, (-1, 1))
+    n_outputs = y.shape[1]
+
+    if isinstance(class_weight, six.string_types):
+        if class_weight != 'auto':
+            raise ValueError('The only valid preset for class_weight is '
+                             '"auto". Given "%s".' % class_weight)
+    elif (indices is not None and
+          not isinstance(class_weight, six.string_types)):
+        raise ValueError('The only valid class_weight for subsampling is '
+                         '"auto". Given "%s".' % class_weight)
+    elif n_outputs > 1:
+        if (not hasattr(class_weight, "__iter__") or
+                isinstance(class_weight, dict)):
+            raise ValueError("For multi-output, class_weight should be a "
+                             "list of dicts, or a valid string.")
+        if len(class_weight) != n_outputs:
+            raise ValueError("For multi-output, number of elements in "
+                             "class_weight should match number of outputs.")
+
+    expanded_class_weight = []
+    for k in range(n_outputs):
+
+        y_full = y[:, k]
+        classes_full = np.unique(y_full)
+        classes_missing = None
+
+        if class_weight == 'auto' or n_outputs == 1:
+            class_weight_k = class_weight
+        else:
+            class_weight_k = class_weight[k]
+
+        if indices is not None:
+            # Get class weights for the subsample, covering all classes in
+            # case some labels that were present in the original data are
+            # missing from the sample.
+            y_subsample = y[indices, k]
+            classes_subsample = np.unique(y_subsample)
+
+            weight_k = np.choose(np.searchsorted(classes_subsample,
+                                                 classes_full),
+                                 compute_class_weight(class_weight_k,
+                                                      classes_subsample,
+                                                      y_subsample),
+                                 mode='clip')
+
+            classes_missing = set(classes_full) - set(classes_subsample)
+        else:
+            weight_k = compute_class_weight(class_weight_k,
+                                            classes_full,
+                                            y_full)
+
+        weight_k = weight_k[np.searchsorted(classes_full, y_full)]
+
+        if classes_missing:
+            # Make missing classes' weight zero
+            weight_k[in1d(y_full, list(classes_missing))] = 0.
+
+        expanded_class_weight.append(weight_k)
+
+    expanded_class_weight = np.prod(expanded_class_weight,
+                                    axis=0,
+                                    dtype=np.float64)
+
+    return expanded_class_weight