seckcoder
diff --git a/‎doc/whats_new.rst
Lines changed: 12 additions & 5 deletions b/‎doc/whats_new.rst
Lines changed: 12 additions & 5 deletions
diff --git a/‎sklearn/ensemble/forest.py
Lines changed: 23 additions & 24 deletions b/‎sklearn/ensemble/forest.py
Lines changed: 23 additions & 24 deletions
diff --git a/‎sklearn/ensemble/gradient_boosting.py
Lines changed: 10 additions & 4 deletions b/‎sklearn/ensemble/gradient_boosting.py
Lines changed: 10 additions & 4 deletions
diff --git a/‎sklearn/ensemble/tests/test_forest.py
Lines changed: 1 addition & 5 deletions b/‎sklearn/ensemble/tests/test_forest.py
Lines changed: 1 addition & 5 deletions
diff --git a/‎sklearn/ensemble/tests/test_weight_boosting.py
Lines changed: 1 addition & 5 deletions b/‎sklearn/ensemble/tests/test_weight_boosting.py
Lines changed: 1 addition & 5 deletions
diff --git a/‎sklearn/ensemble/weight_boosting.py
Lines changed: 30 additions & 42 deletions b/‎sklearn/ensemble/weight_boosting.py
Lines changed: 30 additions & 42 deletions
@@ -16,12 +16,19 @@ Changelog
      scoring function such as area under the ROC curve and f-beta scores.
      See :ref:`score_func_objects` for details. By `Andreas Müller`_.
      Passing a function from :mod:`sklearn.metrics` as ``score_func`` is
-     deprecated. 
+     deprecated.
+
+   - Added :class:`ensemble.AdaBoostClassifier` and
+     :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_  and
+     `Gilles Louppe`_. See the :ref:`AdaBoost <adaboost>` section of the user
+     guide for details and examples.
+
+   - Feature importances in :class:`tree.DecisionTreeClassifier`,
+     :class:`tree.DecisionTreeRegressor` and all derived ensemble estimators
+     are now computed on the fly when accessing  the ``feature_importances_``
+     attribute. Setting ``compute_importances=True`` is no longer required.
+     By `Gilles Louppe`_.
 
-   - Added :class:`ensemble.AdaBoostClassifier` and 
-     :class:`ensemble.AdaBoostRegressor`, by `Noel Dawe`_  and `Gilles Louppe`_.
-     See the :ref:`AdaBoost <adaboost>` section of the user guide for
-     details and examples.
 
 .. _changes_0_13:
 
 
@@ -75,7 +75,6 @@ def _parallel_build_trees(n_trees, forest, X, y, sample_weight,
         seed = random_state.randint(MAX_INT)
 
         tree = forest._make_estimator(append=False)
-        tree.set_params(compute_importances=forest.compute_importances)
         tree.set_params(random_state=check_random_state(seed))
 
         if forest.bootstrap:
@@ -230,6 +229,13 @@ def __init__(self,
             estimator_params=estimator_params)
 
         self.bootstrap = bootstrap
+
+        if compute_importances:
+            warn("Setting compute_importances=True is no longer "
+                 "required. Variable importances are now computed on the fly "
+                 "when accessing the feature_importances_ attribute. This "
+                 "parameter will be removed in 0.15.", DeprecationWarning)
+
         self.compute_importances = compute_importances
         self.oob_score = oob_score
         self.n_jobs = n_jobs
@@ -239,7 +245,6 @@ def __init__(self,
         self.n_outputs_ = None
         self.classes_ = None
         self.n_classes_ = None
-        self.feature_importances_ = None
 
         self.verbose = verbose
 
@@ -453,14 +458,24 @@ def fit(self, X, y, sample_weight=None):
 
                 self.oob_score_ /= self.n_outputs_
 
-        # Sum the importances
-        if self.compute_importances:
-            self.feature_importances_ = \
-                sum(tree.feature_importances_ for tree in self.estimators_) \
-                / self.n_estimators
-
         return self
 
+    @property
+    def feature_importances_(self):
+        """Return the feature importances (the higher, the more important the
+           feature).
+
+        Returns
+        -------
+        feature_importances_ : array, shape = [n_features]
+        """
+        if self.estimators_ is None or len(self.estimators_) == 0:
+            raise ValueError("Estimator not fitted, "
+                             "call `fit` before `feature_importances_`.")
+
+        return sum(tree.feature_importances_
+                   for tree in self.estimators_) / self.n_estimators
+
 
 class ForestClassifier(BaseForest, ClassifierMixin):
     """Base class for forest of trees-based classifiers.
@@ -731,10 +746,6 @@ class RandomForestClassifier(ForestClassifier):
     bootstrap : boolean, optional (default=True)
         Whether bootstrap samples are used when building trees.
 
-    compute_importances : boolean, optional (default=True)
-        Whether feature importances are computed and stored into the
-        ``feature_importances_`` attribute when calling fit.
-
     oob_score : bool
         Whether to use out-of-bag samples to estimate
         the generalization error.
@@ -877,10 +888,6 @@ class RandomForestRegressor(ForestRegressor):
     bootstrap : boolean, optional (default=True)
         Whether bootstrap samples are used when building trees.
 
-    compute_importances : boolean, optional (default=True)
-        Whether feature importances are computed and stored into the
-        ``feature_importances_`` attribute when calling fit.
-
     oob_score : bool
         whether to use out-of-bag samples to estimate
         the generalization error.
@@ -1015,10 +1022,6 @@ class ExtraTreesClassifier(ForestClassifier):
     bootstrap : boolean, optional (default=False)
         Whether bootstrap samples are used when building trees.
 
-    compute_importances : boolean, optional (default=True)
-        Whether feature importances are computed and stored into the
-        ``feature_importances_`` attribute when calling fit.
-
     oob_score : bool
         Whether to use out-of-bag samples to estimate
         the generalization error.
@@ -1166,10 +1169,6 @@ class ExtraTreesRegressor(ForestRegressor):
         Whether bootstrap samples are used when building trees.
         Note: this parameter is tree-specific.
 
-    compute_importances : boolean, optional (default=True)
-        Whether feature importances are computed and stored into the
-        ``feature_importances_`` attribute when calling fit.
-
     oob_score : bool
         Whether to use out-of-bag samples to estimate
         the generalization error.
 
@@ -463,7 +463,6 @@ def _fit_stage(self, i, X, X_argsorted, y, y_pred, sample_mask,
                 min_samples_leaf=self.min_samples_leaf,
                 min_density=self.min_density,
                 max_features=self.max_features,
-                compute_importances=False,
                 random_state=random_state)
 
             tree.fit(X, residual, sample_mask, X_argsorted, check_input=False)
@@ -681,14 +680,21 @@ def staged_decision_function(self, X):
 
     @property
     def feature_importances_(self):
+        """Return the feature importances (the higher, the more important the
+           feature).
+
+        Returns
+        -------
+        feature_importances_ : array, shape = [n_features]
+        """
         if self.estimators_ is None or len(self.estimators_) == 0:
             raise ValueError("Estimator not fitted, "
                              "call `fit` before `feature_importances_`.")
+
         total_sum = np.zeros((self.n_features, ), dtype=np.float64)
         for stage in self.estimators_:
-            stage_sum = sum(
-                tree.tree_.compute_feature_importances(method='gini')
-                for tree in stage) / len(stage)
+            stage_sum = sum(tree.feature_importances_
+                            for tree in stage) / len(stage)
             total_sum += stage_sum
 
         importances = total_sum / len(self.estimators_)
 
@@ -183,7 +183,7 @@ def test_importances():
                                         shuffle=False,
                                         random_state=0)
 
-    clf = RandomForestClassifier(n_estimators=10, compute_importances=True)
+    clf = RandomForestClassifier(n_estimators=10)
     clf.fit(X, y)
     importances = clf.feature_importances_
     n_important = sum(importances > 0.1)
@@ -194,10 +194,6 @@ def test_importances():
     X_new = clf.transform(X, threshold="mean")
     assert_less(0 < X_new.shape[1], X.shape[1])
 
-    clf = RandomForestClassifier(n_estimators=10)
-    clf.fit(X, y)
-    assert_true(clf.feature_importances_ is None)
-
 
 def test_oob_score_classification():
     """Check that oob prediction is a good estimation of the generalization
 
@@ -164,7 +164,7 @@ def test_importances():
                                         random_state=1)
 
     for alg in ['SAMME', 'SAMME.R']:
-        clf = AdaBoostClassifier(algorithm=alg, compute_importances=True)
+        clf = AdaBoostClassifier(algorithm=alg)
 
         clf.fit(X, y)
         importances = clf.feature_importances_
@@ -173,10 +173,6 @@ def test_importances():
         assert_e
F438
qual((importances[:3, np.newaxis] >= importances[3:]).all(),
                      True)
 
-    clf = AdaBoostClassifier()
-    clf.fit(X, y)
-    assert_true(clf.feature_importances_ is None)
-
 
 def test_error():
     """Test that it gives proper exception on deficient input."""
 
@@ -50,8 +50,7 @@ def __init__(self,
                  base_estimator,
                  n_estimators=50,
                  estimator_params=tuple(),
-                 learning_rate=1.,
-                 compute_importances=False):
+                 learning_rate=1.):
 
         super(BaseWeightBoosting, self).__init__(
             base_estimator=base_estimator,
@@ -61,8 +60,6 @@ def __init__(self,
         self.estimator_weights_ = None
         self.estimator_errors_ = None
         self.learning_rate = learning_rate
-        self.compute_importances = compute_importances
-        self.feature_importances_ = None
 
     def fit(self, X, y, sample_weight=None):
         """Build a boosted classifier/regressor from the training set (X, y).
@@ -89,9 +86,6 @@ def fit(self, X, y, sample_weight=None):
         if self.learning_rate <= 0:
             raise ValueError("learning_rate must be greater than zero")
 
-        if self.compute_importances:
-            self.base_estimator.set_params(compute_importances=True)
-
         # Check data
         X, y = check_arrays(X, y, sparse_format="dense")
 
@@ -142,21 +136,6 @@ def fit(self, X, y, sample_weight=None):
                 # Normalize
                 sample_weight /= sample_weight_sum
 
-        # Sum the importances
-        try:
-            if self.compute_importances:
-                norm = self.estimator_weights_.sum()
-                self.feature_importances_ = (
-                    sum(weight * clf.feature_importances_ for weight, clf
-                        in zip(self.estimator_weights_, self.estimators_))
-                    / norm)
-
-        except AttributeError:
-            raise AttributeError(
-                "Unable to compute feature importances "
-                "since base_estimator does not have a "
-                "feature_importances_ attribute")
-
         return self
 
     @abstractmethod
@@ -213,18 +192,41 @@ def staged_score(self, X, y):
         Returns
         -------
         z : float
-
         """
         for y_pred in self.staged_predict(X):
             if isinstance(self, ClassifierMixin):
                 yield accuracy_score(y, y_pred)
             else:
                 yield r2_score(y, y_pred)
 
+    @property
+    def feature_importances_(self):
+        """Return the feature importances (the higher, the more important the
+           feature).
+
+        Returns
+        -------
+        feature_importances_ : array, shape = [n_features]
+        """
+        if self.estimators_ is None or len(self.estimators_) == 0:
+            raise ValueError("Estimator not fitted, "
+                             "call `fit` before `feature_importances_`.")
+
+        try:
+            norm = self.estimator_weights_.sum()
+            return (sum(weight * clf.feature_importances_ for weight, clf
+                    in zip(self.estimator_weights_, self.estimators_))
+                    / norm)
+
+        except AttributeError:
+            raise AttributeError(
+                "Unable to compute feature importances "
+                "since base_estimator does not have a "
+                "feature_importances_ attribute")
+
 
 def _samme_proba(estimator, n_classes, X):
-    """
-    Calculate algorithm 4, step 2, equation c) of Zhu et al [1]
+    """Calculate algorithm 4, step 2, equation c) of Zhu et al [1].
 
     References
     ----------
@@ -277,10 +279,6 @@ class AdaBoostClassifier(BaseWeightBoosting, ClassifierMixin):
         The SAMME.R algorithm typically converges faster than SAMME,
         achieving a lower test error with fewer boosting iterations.
 
-    compute_importances : boolean, optional (default=False)
-        Whether feature importances are computed and stored in the
-        ``feature_importances_`` attribute when calling fit.
-
     Attributes
     ----------
     `estimators_` : list of classifiers
@@ -301,7 +299,6 @@ class AdaBoostClassifier(BaseWeightBoosting, ClassifierMixin):
 
     `feature_importances_` : array of shape = [n_features]
         The feature importances if supported by the ``base_estimator``.
-        Only computed if ``compute_importances=True``.
 
     See also
     --------
@@ -319,14 +316,12 @@ def __init__(self,
                  base_estimator=DecisionTreeClassifier(max_depth=1),
                  n_estimators=50,
                  learning_rate=1.,
-                 algorithm='SAMME.R',
-                 compute_importances=False):
+                 algorithm='SAMME.R'):
 
         super(AdaBoostClassifier, self).__init__(
             base_estimator=base_estimator,
             n_estimators=n_estimators,
-            learning_rate=learning_rate,
-            compute_importances=compute_importances)
+            learning_rate=learning_rate)
 
         self.algorithm = algorithm
 
@@ -801,10 +796,6 @@ class AdaBoostRegressor(BaseWeightBoosting, RegressorMixin):
         The loss function to use when updating the weights after each
         boosting iteration.
 
-    compute_importances : boolean, optional (default=False)
-        Whether feature importances are computed and stored in the
-        ``feature_importances_`` attribute when calling fit.
-
     random_state : int, RandomState instance or None, optional (default=None)
         If int, random_state is the seed used by the random number generator;
         If RandomState instance, random_state is the random number generator;
@@ -824,7 +815,6 @@ class AdaBoostRegressor(BaseWeightBoosting, RegressorMixin):
 
     `feature_importances_` : array of shape = [n_features]
         The feature importances if supported by the ``base_estimator``.
-        Only computed if ``compute_importances=True``.
 
     See also
     --------
@@ -843,14 +833,12 @@ def __init__(self,
                  n_estimators=50,
                  learning_rate=1.,
                  loss='linear',
-                 compute_importances=False,
                  random_state=None):
 
         super(AdaBoostRegressor, self).__init__(
             base_estimator=base_estimator,
             n_estimators=n_estimators,
-            learning_rate=learning_rate,
-            compute_importances=compute_importances)
+            learning_rate=learning_rate)
 
         self.loss = loss
         self.random_state = random_state