scikit-learn
diff --git a/‎Makefile
Lines changed: 1 addition & 1 deletion b/‎Makefile
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/modules/model_evaluation.rst
Lines changed: 2 additions & 0 deletions b/‎doc/modules/model_evaluation.rst
Lines changed: 2 additions & 0 deletions
diff --git a/‎doc/whats_new.rst
Lines changed: 35 additions & 0 deletions b/‎doc/whats_new.rst
Lines changed: 35 additions & 0 deletions
diff --git a/‎sklearn/dummy.py
Lines changed: 8 additions & 5 deletions b/‎sklearn/dummy.py
Lines changed: 8 additions & 5 deletions
diff --git a/‎sklearn/ensemble/forest.py
Lines changed: 9 additions & 4 deletions b/‎sklearn/ensemble/forest.py
Lines changed: 9 additions & 4 deletions
diff --git a/‎sklearn/neighbors/base.py
Lines changed: 2 additions & 1 deletion b/‎sklearn/neighbors/base.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎sklearn/pipeline.py
Lines changed: 20 additions & 0 deletions b/‎sklearn/pipeline.py
Lines changed: 20 additions & 0 deletions
diff --git a/‎sklearn/tests/test_dummy.py
Lines changed: 38 additions & 30 deletions b/‎sklearn/tests/test_dummy.py
Lines changed: 38 additions & 30 deletions
diff --git a/‎sklearn/tests/test_pipeline.py
Lines changed: 32 additions & 1 deletion b/‎sklearn/tests/test_pipeline.py
Lines changed: 32 additions & 1 deletion
@@ -47,7 +47,7 @@ cython:
 ctags:
 	# make tags for symbol based navigation in emacs and vim
 	# Install with: sudo apt-get install exuberant-ctags
-	$(CTAGS) -R *
+	$(CTAGS) -R sklearn
 
 doc: inplace
 	$(MAKE) -C doc html
 
@@ -1270,6 +1270,8 @@ implements three such simple strategies for classification:
 - ``stratified`` generates random predictions by respecting the training
   set class distribution.
 - ``most_frequent`` always predicts the most frequent label in the training set.
+- ``prior`` always predicts the class that maximizes the class prior
+  (like ``most_frequent`) and ``predict_proba`` returns the class prior.
 - ``uniform`` generates predictions uniformly at random.
 - ``constant`` always predicts a constant label that is provided by the user.
    A major motivation of this method is F1-scoring, when the positive class
 
@@ -19,6 +19,9 @@ Enhancements
    - :class:`naive_bayes.GaussianNB` now supports fitting with ``sample_weights``.
      By `Jan Hendrik Metzen`_.
 
+   - :class:`dummy.DummyClassifier` now supports a prior fitting strategy.
+     By `Arnaud Joly`_.
+
 Bug fixes
 .........
 
@@ -29,6 +32,38 @@ API changes summary
       for retrieving the leaf indices samples are predicted as. By
       `Daniel Galvez`_ and `Gilles Louppe`_.
 
+.. _changes_0_1_16:
+
+0.16.1
+=======
+
+Changelog
+---------
+
+Bug fixes
+.........
+
+   - Allow input data larger than ``block_size`` in
+     :class:`covariance.LedoitWolf` by `Andreas Müller`_.
+
+   - Fix a bug in :class:`isotonic.IsotonicRegression` deduplication that
+     caused unstable result in :class:`calibration.CalibratedClassifierCV` by
+     `Jan Hendrik Metzen`_.
+
+   - Fix sorting of labels in func:`preprocessing.label_binarize` by Michael Heilman.
+
+   - Fix several stability and convergence issues in
+     :class:`cross_decomposition.CCA` and
+     :class:`cross_decomposition.PLSCanonical` by `Andreas Müller`_
+
+   - Fix a bug in :class:`cluster.KMeans` when ``precompute_distances=False``
+     on fortran-ordered data.
+
+   - Fix a speed regression in :class:`ensemble.RandomForestClassifier`'s ``predict``
+     and ``predict_proba`` by `Andreas Müller`_.
+
+   - Fix a regression where ``utils.shuffle`` converted lists and dataframes to arrays, by `Olivier Grisel`_
+
 .. _changes_0_16:
 
 0.16
 
@@ -33,6 +33,8 @@ class DummyClassifier(BaseEstimator, ClassifierMixin):
           set's class distribution.
         * "most_frequent": always predicts the most frequent label in the
           training set.
+        * "prior": always predicts the class that maximizes the class prior
+          (like "most_frequent") and ``predict_proba`` returns the class prior.
         * "uniform": generates predictions uniformly at random.
         * "constant": always predicts a constant label that is provided by
           the user. This is useful for metrics that evaluate a non-majority
@@ -95,7 +97,7 @@ def fit(self, X, y, sample_weight=None):
             Returns self.
         """
         if self.strategy not in ("most_frequent", "stratified", "uniform",
-                                 "constant"):
+                                 "constant", "prior"):
             raise ValueError("Unknown strategy type.")
 
         if self.strategy == "uniform" and sp.issparse(y):
@@ -147,8 +149,7 @@ def fit(self, X, y, sample_weight=None):
         return self
 
     def predict(self, X):
-        """
-        Perform classification on test vectors X.
+        """Perform classification on test vectors X.
 
         Parameters
         ----------
@@ -188,7 +189,7 @@ def predict(self, X):
 
         if self.sparse_output_:
             class_prob = None
-            if self.strategy == "most_frequent":
+            if self.strategy in ("most_frequent", "prior"):
                 classes_ = [np.array([cp.argmax()]) for cp in class_prior_]
 
             elif self.strategy == "stratified":
@@ -204,7 +205,7 @@ def predict(self, X):
             y = random_choice_csc(n_samples, classes_, class_prob,
                                   self.random_state)
         else:
-            if self.strategy == "most_frequent":
+            if self.strategy in ("most_frequent", "prior"):
                 y = np.tile([classes_[k][class_prior_[k].argmax()] for
                              k in range(self.n_outputs_)], [n_samples, 1])
 
@@ -268,6 +269,8 @@ def predict_proba(self, X):
                 ind = np.ones(n_samples, dtype=int) * class_prior_[k].argmax()
                 out = np.zeros((n_samples, n_classes_[k]), dtype=np.float64)
                 out[:, ind] = 1.0
+            elif self.strategy == "prior":
+                out = np.ones((n_samples, 1)) * class_prior_[k]
 
             elif self.strategy == "stratified":
                 out = rs.multinomial(1, class_prior_[k], size=n_samples)
 
@@ -365,7 +365,8 @@ def _set_oob_score(self, X, y):
             mask = np.ones(n_samples, dtype=np.bool)
             mask[estimator.indices_] = False
             mask_indices = sample_indices[mask]
-            p_estimator = estimator.predict_proba(X[mask_indices, :])
+            p_estimator = estimator.predict_proba(X[mask_indices, :],
+                                                  check_input=False)
 
             if self.n_outputs_ == 1:
                 p_estimator = [p_estimator]
@@ -508,7 +509,7 @@ class in a leaf.
         # Parallel loop
         all_proba = Parallel(n_jobs=n_jobs, verbose=self.verbose,
                              backend="threading")(
-            delayed(_parallel_helper)(e, 'predict_proba', X)
+            delayed(_parallel_helper)(e, 'predict_proba', X, check_input=False)
             for e in self.estimators_)
 
         # Reduce
@@ -614,6 +615,10 @@ def predict(self, X):
 
         # Check data
         X = check_array(X, dtype=DTYPE, accept_sparse="csr")
+        if issparse(X) and (X.indices.dtype != np.intc or
+                            X.indptr.dtype != np.intc):
+            raise ValueError("No support for np.int64 index based "
+                             "sparse matrices")
 
         # Assign chunk of trees to jobs
         n_jobs, n_trees, starts = _partition_estimators(self.n_estimators,
@@ -622,7 +627,7 @@ def predict(self, X):
         # Parallel loop
         all_y_hat = Parallel(n_jobs=n_jobs, verbose=self.verbose,
                              backend="threading")(
-            delayed(_parallel_helper)(e, 'predict', X)
+            delayed(_parallel_helper)(e, 'predict', X, check_input=False)
             for e in self.estimators_)
 
         # Reduce
@@ -642,7 +647,7 @@ def _set_oob_score(self, X, y):
             mask = np.ones(n_samples, dtype=np.bool)
             mask[estimator.indices_] = False
             mask_indices = sample_indices[mask]
-            p_estimator = estimator.predict(X[mask_indices, :])
+            p_estimator = estimator.predict(X[mask_indices, :], check_input=False)
 
             if self.n_outputs_ == 1:
                 p_estimator = p_estimator[:, np.newaxis]
 
@@ -335,7 +335,8 @@ class from an array representing our data set and ask who's
         train_size = self._fit_X.shape[0]
         if n_neighbors > train_size:
             raise ValueError(
-                "Expected n_neighbors <= %d. Got %d" %
+                "Expected n_neighbors <= n_samples, "
+                " but n_samples = %d, n_neighbors = %d" %
                 (train_size, n_neighbors)
             )
         n_samples, _ = X.shape
 
@@ -183,6 +183,26 @@ def predict(self, X):
             Xt = transform.transform(Xt)
         return self.steps[-1][-1].predict(Xt)
 
+    @if_delegate_has_method(delegate='_final_estimator')
+    def fit_predict(self, X, y=None, **fit_params):
+        """Applies fit_predict of last step in pipeline after transforms.
+
+        Applies fit_transforms of a pipeline to the data, followed by the
+        fit_predict method of the final estimator in the pipeline. Valid
+        only if the final estimator implements fit_predict.
+
+        Parameters
+        ----------
+        X : iterable
+            Training data. Must fulfill input requirements of first step of
+            the pipeline.
+        y : iterable, default=None
+            Training targets. Must fulfill label requirements for all steps
+            of the pipeline.
+        """
+        Xt, fit_params = self._pre_transform(X, y, **fit_params)
+        return self.steps[-1][-1].fit_predict(Xt, y, **fit_params)
+
     @if_delegate_has_method(delegate='_final_estimator')
     def predict_proba(self, X):
         """Applies transforms to the data, and the predict_proba method of the
 
@@ -1,5 +1,5 @@
 from __future__ import division
-import warnings
+
 import numpy as np
 import scipy.sparse as sp
 
@@ -11,17 +11,17 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_true
 from sklearn.utils.testing import assert_warns_message
+from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.stats import _weighted_percentile
 
 from sklearn.dummy import DummyClassifier, DummyRegressor
 
 
+@ignore_warnings
 def _check_predict_proba(clf, X, y):
     proba = clf.predict_proba(X)
-    with warnings.catch_warnings():
-        warnings.simplefilter("ignore")
-        # We know that we can have division by zero
-        log_proba = clf.predict_log_proba(X)
+    # We know that we can have division by zero
+    log_proba = clf.predict_log_proba(X)
 
     y = np.atleast_1d(y)
     if y.ndim == 1:
@@ -38,10 +38,8 @@ def _check_predict_proba(clf, X, y):
         assert_equal(proba[k].shape[0], n_samples)
         assert_equal(proba[k].shape[1], len(np.unique(y[:, k])))
         assert_array_equal(proba[k].sum(axis=1), np.ones(len(X)))
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore")
-            # We know that we can have division by zero
-            assert_array_equal(np.log(proba[k]), log_proba[k])
+        # We know that we can have division by zero
+        assert_array_equal(np.log(proba[k]), log_proba[k])
 
 
 def _check_behavior_2d(clf):
@@ -85,17 +83,25 @@ def _check_equality_regressor(statistic, y_learn, y_pred_learn,
                        y_pred_test)
 
 
-def test_most_frequent_strategy():
+def test_most_frequent_and_prior_strategy():
     X = [[0], [0], [0], [0]]  # ignored
     y = [1, 2, 1, 1]
 
-    clf = DummyClassifier(strategy="most_frequent", random_state=0)
-    clf.fit(X, y)
-    assert_array_equal(clf.predict(X), np.ones(len(X)))
-    _check_predict_proba(clf, X, y)
+    for strategy in ("most_frequent", "prior"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+        assert_array_equal(clf.predict(X), np.ones(len(X)))
+        _check_predict_proba(clf, X, y)
+
+        if strategy == "prior":
+            assert_array_equal(clf.predict_proba(X[0]),
+                               clf.class_prior_.reshape((1, -1)))
+        else:
+            assert_array_equal(clf.predict_proba(X[0]),
+                               clf.class_prior_.reshape((1, -1)) > 0.5)
 
 
-def test_most_frequent_strategy_multioutput():
+def test_most_frequent_and_prior_strategy_multioutput():
     X = [[0], [0], [0], [0]]  # ignored
     y = np.array([[1, 0],
                   [2, 0],
@@ -104,13 +110,14 @@ def test_most_frequent_strategy_multioutput():
 
     n_samples = len(X)
 
-    clf = DummyClassifier(strategy="most_frequent", random_state=0)
-    clf.fit(X, y)
-    assert_array_equal(clf.predict(X),
-                       np.hstack([np.ones((n_samples, 1)),
-                                  np.zeros((n_samples, 1))]))
-    _check_predict_proba(clf, X, y)
-    _check_behavior_2d(clf)
+    for strategy in ("prior", "most_frequent"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+        assert_array_equal(clf.predict(X),
+                           np.hstack([np.ones((n_samples, 1)),
+                                      np.zeros((n_samples, 1))]))
+        _check_predict_proba(clf, X, y)
+        _check_behavior_2d(clf)
 
 
 def test_stratified_strategy():
@@ -555,7 +562,7 @@ def test_stratified_strategy_sparse_target():
         assert_almost_equal(p[4], 1. / 5, decimal=1)
 
 
-def test_most_frequent_strategy_sparse_target():
+def test_most_frequent_and_prior_strategy_sparse_target():
     X = [[0]] * 5  # ignored
     y = sp.csc_matrix(np.array([[1, 0],
                                 [1, 3],
@@ -564,13 +571,14 @@ def test_most_frequent_strategy_sparse_target():
                                 [1, 0]]))
 
     n_samples = len(X)
-    clf = DummyClassifier(strategy="most_frequent", random_state=0)
-    clf.fit(X, y)
-
-    y_pred = clf.predict(X)
-    assert_true(sp.issparse(y_pred))
-    assert_array_equal(y_pred.toarray(), np.hstack([np.ones((n_samples, 1)),
-                                                    np.zeros((n_samples, 1))]))
+    y_expected = np.hstack([np.ones((n_samples, 1)), np.zeros((n_samples, 1))])
+    for strategy in ("most_frequent", "prior"):
+        clf = DummyClassifier(strategy=strategy, random_state=0)
+        clf.fit(X, y)
+
+        y_pred = clf.predict(X)
+        assert_true(sp.issparse(y_pred))
+        assert_array_equal(y_pred.toarray(), y_expected)
 
 
 def test_dummy_regressor_sample_weight(n_samples=10):
 
@@ -5,7 +5,7 @@
 from scipy import sparse
 
 from sklearn.externals.six.moves import zip
-from sklearn.utils.testing import assert_raises
+from sklearn.utils.testing import assert_raises, assert_raises_regex
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_true
@@ -17,6 +17,7 @@
 from sklearn.svm import SVC
 from sklearn.linear_model import LogisticRegression
 from sklearn.linear_model import LinearRegression
+from sklearn.cluster import KMeans
 from sklearn.feature_selection import SelectKBest, f_classif
 from sklearn.decomposition import PCA, RandomizedPCA, TruncatedSVD
 from sklearn.datasets import load_iris
@@ -202,6 +203,36 @@ def test_pipeline_methods_preprocessing_svm():
         pipe.score(X, y)
 
 
+def test_fit_predict_on_pipeline():
+    # test that the fit_predict method is implemented on a pipeline
+    # test that the fit_predict on pipeline yields same results as applying
+    # transform and clustering steps separately
+    iris = load_iris()
+    scaler = StandardScaler()
+    km = KMeans(random_state=0)
 
+    # first compute the transform and clustering step separately
+    scaled = scaler.fit_transform(iris.data)
+    separate_pred = km.fit_predict(scaled)
+
+    # use a pipeline to do the transform and clustering in one step
+    pipe = Pipeline([('scaler', scaler), ('Kmeans', km)])
+    pipeline_pred = pipe.fit_predict(iris.data)
+
+    assert_array_almost_equal(pipeline_pred, separate_pred)
+
+
+def test_fit_predict_on_pipeline_without_fit_predict():
+    # tests that a pipeline does not have fit_predict method when final
+    # step of pipeline does not have fit_predict defined
+    scaler = StandardScaler()
+    pca = PCA()
+    pipe = Pipeline([('scaler', scaler), ('pca', pca)])
+    assert_raises_regex(AttributeError,
+                        "'PCA' object has no attribute 'fit_predict'",
+                        getattr, pipe, 'fit_predict')
+
+
 def test_feature_union():
     # basic sanity check for feature union
     iris = load_iris()