From 7ef1deb3053dc394337a4c8914171b4105519eea Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 19 Dec 2016 17:10:51 -0500
Subject: [PATCH 01/20] some bug fixes.

---
 doc/modules/model_evaluation.rst              |  2 +-
 doc/whats_new.rst                             | 36 ++++++++++++++-
 sklearn/covariance/outlier_detection.py       | 35 +++++++++++++--
 sklearn/decomposition/dict_learning.py        |  9 ++--
 sklearn/decomposition/truncated_svd.py        |  8 +---
 sklearn/dummy.py                              | 12 +++--
 sklearn/ensemble/base.py                      |  6 ++-
 sklearn/ensemble/gradient_boosting.py         | 10 ++---
 sklearn/feature_extraction/tests/test_text.py | 12 ++---
 sklearn/feature_extraction/text.py            | 32 +++++++-------
 sklearn/feature_selection/from_model.py       | 11 +++--
 sklearn/feature_selection/rfe.py              |  5 ++-
 .../tests/test_from_model.py                  |  8 +++-
 sklearn/multiclass.py                         | 20 ++++++---
 sklearn/multioutput.py                        | 13 ++++--
 sklearn/naive_bayes.py                        |  6 +--
 sklearn/neighbors/approximate.py              |  2 +-
 sklearn/tests/test_multiclass.py              | 44 +++++++++++++++++--
 sklearn/tests/test_multioutput.py             |  3 ++
 sklearn/utils/multiclass.py                   |  5 ++-
 20 files changed, 205 insertions(+), 74 deletions(-)
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index db7b59d6c1d3a..beaeabafee752 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -173,7 +173,7 @@ Here is an example of building custom scorers, and of using the
     >>> #  and predictions defined below.
     >>> loss  = make_scorer(my_custom_loss_func, greater_is_better=False)
     >>> score = make_scorer(my_custom_loss_func, greater_is_better=True)
-    >>> ground_truth = [[1, 1]]
+    >>> ground_truth = [[1], [1]]
     >>> predictions  = [0, 1]
     >>> from sklearn.dummy import DummyClassifier
     >>> clf = DummyClassifier(strategy='most_frequent', random_state=0)
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 7d2fa8a562887..0f0fa26918445 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -177,11 +177,12 @@ Bug fixes
    - Fixed a bug where :class:`sklearn.linear_model.LassoLars` does not give
      the same result as the LassoLars implementation available
      in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`
+     in R (lars library). :issue:`7849` by `Jair Montoya Martinez`_
+
    - Some ``fetch_`` functions in `sklearn.datasets` were ignoring the
      ``download_if_missing`` keyword.  This was fixed in :issue:`7944` by
      :user:`Ralf Gommers <rgommers>`.
 
-
    - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a
      sparse array X and initial centroids, where X's means were unnecessarily
      being subtracted from the centroids. :issue:`7872` by `Josh Karnofsky <https://github.com/jkarno>`_.
@@ -207,6 +208,13 @@ Bug fixes
      :class:`sklearn.ensemble.GradientBoostingRegressor` ignored the
      ``min_impurity_split`` parameter.
      :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
+   - Fixes to the input validation in :class:`sklearn.covariance.EllipticEnvelope` by
+     `Andreas Müller`_.
+
+   - Fix shape output shape of :class:`sklearn.decomposition.DictionaryLearning` transform
+     for one-dimensional data by `Andreas Müller`_.
+
+   - Several fixes to input validation in :class:`multiclass.OutputCodeClassifier` by `Andreas Müller`_
 
    - Fix a bug where
      :class:`sklearn.ensemble.gradient_boosting.QuantileLossFunction` computed
@@ -267,6 +275,32 @@ API changes summary
       :func:`sklearn.model_selection.cross_val_predict`.
       :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
 
+
+   - Gradient boosting base models are not longer estimators. By `Andreas Müller`_.
+
+   - `feature_extraction.text.TfidfTransformer` now supports numpy arrays as inputs, and produces numpy
+     arrays for list inputs and numpy array inputs. By `Andreas `Müller_.
+
+   - `feature_selection.SelectFromModel` now validates the ``threshold``
+     parameter and sets the ``threshold_`` attribute during the call to
+     ``fit``, and no longer during the call to ``transform```, by `Andreas Müller`_.
+
+   - `features_selection.SelectFromModel` now has a ``partial_fit`` method only if the underlying
+     estimator does. By `Andreas Müller`_.
+
+   - All checks in ``utils.estimator_checks``, in particular :func:`utils.estimator_checks.check_estimator` now
+     accept estimator instances. All checks apart from ``check_estimator`` do not accept estimator classes any more.
+     By `Andreas Müller`_.
+
+   - The ``include_others`` and ``dont_test`` parameters of :func:`utils.testing.all_estimators` are deprecated
+     and are assumed ``True``, by  `Andreas Müller`_.
+
+
+   - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now do input validation on ``X`` and check
+     whether ``X`` and ``y`` are of the same length, by `Andreas Müller`_.
+
+   - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method only if the underlying estimator does.
+     By `Andreas Müller`_. 
 .. _changes_0_18_1:
 
 Version 0.18.1
diff --git a/sklearn/covariance/outlier_detection.py b/sklearn/covariance/outlier_detection.py
index 1cafe885fdd47..3349f71af42d2 100644
--- a/sklearn/covariance/outlier_detection.py
+++ b/sklearn/covariance/outlier_detection.py
@@ -15,8 +15,8 @@
 import numpy as np
 import scipy as sp
 from . import MinCovDet
-from ..base import ClassifierMixin
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, check_array
+from ..metrics import accuracy_score
 
 
 class OutlierDetectionMixin(object):
@@ -63,11 +63,11 @@ def decision_function(self, X, raw_values=False):
 
         """
         check_is_fitted(self, 'threshold_')
+        X = check_array(X)
         mahal_dist = self.mahalanobis(X)
         if raw_values:
             decision = mahal_dist
         else:
-            check_is_fitted(self, 'threshold_')
             transformed_mahal_dist = mahal_dist ** 0.33
             decision = self.threshold_ ** 0.33 - transformed_mahal_dist
 
@@ -91,6 +91,7 @@ def predict(self, X):
 
         """
         check_is_fitted(self, 'threshold_')
+        X = check_array(X)
         is_inlier = -np.ones(X.shape[0], dtype=int)
         if self.contamination is not None:
             values = self.decision_function(X, raw_values=True)
@@ -101,7 +102,7 @@ def predict(self, X):
         return is_inlier
 
 
-class EllipticEnvelope(ClassifierMixin, OutlierDetectionMixin, MinCovDet):
+class EllipticEnvelope(OutlierDetectionMixin, MinCovDet):
     """An object for detecting outliers in a Gaussian distributed dataset.
 
     Read more in the :ref:`User Guide <outlier_detection>`.
@@ -176,3 +177,29 @@ def fit(self, X, y=None):
         self.threshold_ = sp.stats.scoreatpercentile(
             self.dist_, 100. * (1. - self.contamination))
         return self
+
+    def score(self, X, y, sample_weight=None):
+        """Returns the mean accuracy on the given test data and labels.
+
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Test samples.
+
+        y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+            True labels for X.
+
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of self.predict(X) wrt. y.
+
+        """
+        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index baf79544dd172..a81a16be3f718 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -282,9 +282,9 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
                               check_input=False,
                               verbose=verbose)
         # This ensure that dimensionality of code is always 2,
-        # consistant with the case n_jobs > 1
+        # consistent with the case n_jobs > 1
         if code.ndim == 1:
-            code = code[np.newaxis, :]
+            code = code[:, np.newaxis]
         return code
 
     # Enter parallel code block
@@ -722,8 +722,8 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
             sys.stdout.flush()
         elif verbose:
             if verbose > 10 or ii % ceil(100. / verbose) == 0:
-                print ("Iteration % 3i (elapsed time: % 3is, % 4.1fmn)"
-                       % (ii, dt, dt / 60))
+                print("Iteration % 3i (elapsed time: % 3is, % 4.1fmn)"
+                      % (ii, dt, dt / 60))
 
         this_code = sparse_encode(this_X, dictionary.T, algorithm=method,
                                   alpha=alpha, n_jobs=n_jobs).T
@@ -811,7 +811,6 @@ def transform(self, X, y=None):
         """
         check_is_fitted(self, 'components_')
 
-        # XXX : kwargs is not documented
         X = check_array(X)
         n_samples, n_features = X.shape
 
diff --git a/sklearn/decomposition/truncated_svd.py b/sklearn/decomposition/truncated_svd.py
index 5d029d1205bd0..4be64c3ac64d5 100644
--- a/sklearn/decomposition/truncated_svd.py
+++ b/sklearn/decomposition/truncated_svd.py
@@ -15,7 +15,7 @@
     from ..utils.arpack import svds
 
 from ..base import BaseEstimator, TransformerMixin
-from ..utils import check_array, as_float_array, check_random_state
+from ..utils import check_array, check_random_state
 from ..utils.extmath import randomized_svd, safe_sparse_dot, svd_flip
 from ..utils.sparsefuncs import mean_variance_axis
 
@@ -155,13 +155,9 @@ def fit_transform(self, X, y=None):
         X_new : array, shape (n_samples, n_components)
             Reduced version of X. This will always be a dense array.
         """
-        X = as_float_array(X, copy=False)
+        X = check_array(X, accept_sparse=['csr', 'csc'])
         random_state = check_random_state(self.random_state)
 
-        # If sparse and not csr or csc, convert to csr
-        if sp.issparse(X) and X.getformat() not in ["csr", "csc"]:
-            X = X.tocsr()
-
         if self.algorithm == "arpack":
             U, Sigma, VT = svds(X, k=self.n_components, tol=self.tol)
             # svds doesn't abide by scipy.linalg.svd/randomized_svd
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 84d42e7177a0a..0f01d18cb2b9c 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -10,7 +10,7 @@
 
 from .base import BaseEstimator, ClassifierMixin, RegressorMixin
 from .utils import check_random_state
-from .utils.validation import check_array
+from .utils.validation import check_array, check_X_y
 from .utils.validation import check_consistent_length
 from .utils.validation import check_is_fitted
 from .utils.random import random_choice_csc
@@ -117,6 +117,9 @@ def fit(self, X, y, sample_weight=None):
 
         self.sparse_output_ = sp.issparse(y)
 
+        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
+        check_consistent_length(X, y)
+
         if not self.sparse_output_:
             y = np.atleast_1d(y)
 
@@ -181,7 +184,7 @@ def predict(self, X):
         classes_ = self.classes_
         class_prior_ = self.class_prior_
         constant = self.constant
-        if self.n_outputs_ == 1:
+        if self.n_outputs_ == 1 and not self.output_2d_:
             # Get same type even for self.n_outputs_ == 1
             n_classes_ = [n_classes_]
             classes_ = [classes_]
@@ -190,7 +193,7 @@ def predict(self, X):
         # Compute probability only once
         if self.strategy == "stratified":
             proba = self.predict_proba(X)
-            if self.n_outputs_ == 1:
+            if self.n_outputs_ == 1 and not self.output_2d_:
                 proba = [proba]
 
         if self.sparse_output_:
@@ -395,7 +398,8 @@ def fit(self, X, y, sample_weight=None):
                              "'mean', 'median', 'quantile' or 'constant'"
                              % self.strategy)
 
-        y = check_array(y, ensure_2d=False)
+        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
+                         multi_output=True)
         if len(y) == 0:
             raise ValueError("y must not be empty.")
 
diff --git a/sklearn/ensemble/base.py b/sklearn/ensemble/base.py
index 165124d62428a..5bf3d72dcdd38 100644
--- a/sklearn/ensemble/base.py
+++ b/sklearn/ensemble/base.py
@@ -12,6 +12,8 @@
 from ..base import BaseEstimator
 from ..base import MetaEstimatorMixin
 from ..utils import _get_n_jobs, check_random_state
+from ..externals import six
+from abc import ABCMeta, abstractmethod
 
 MAX_RAND_SEED = np.iinfo(np.int32).max
 
@@ -52,7 +54,8 @@ def _set_random_states(estimator, random_state=None):
         estimator.set_params(**to_set)
 
 
-class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
+class BaseEnsemble(six.with_metaclass(ABCMeta, BaseEstimator,
+                                      MetaEstimatorMixin)):
     """Base class for all ensemble classes.
 
     Warning: This class should not be used directly. Use derived classes
@@ -79,6 +82,7 @@ class BaseEnsemble(BaseEstimator, MetaEstimatorMixin):
         The collection of fitted base estimators.
     """
 
+    @abstractmethod
     def __init__(self, base_estimator, n_estimators=10,
                  estimator_params=tuple()):
         # Set parameters
diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py
index 26797ca25cb1f..ec6d5d74b9452 100644
--- a/sklearn/ensemble/gradient_boosting.py
+++ b/sklearn/ensemble/gradient_boosting.py
@@ -64,7 +64,7 @@
 from ..exceptions import NotFittedError
 
 
-class QuantileEstimator(BaseEstimator):
+class QuantileEstimator(object):
     """An estimator predicting the alpha-quantile of the training targets."""
     def __init__(self, alpha=0.9):
         if not 0 < alpha < 1.0:
@@ -86,7 +86,7 @@ def predict(self, X):
         return y
 
 
-class MeanEstimator(BaseEstimator):
+class MeanEstimator(object):
     """An estimator predicting the mean of the training targets."""
     def fit(self, X, y, sample_weight=None):
         if sample_weight is None:
@@ -102,7 +102,7 @@ def predict(self, X):
         return y
 
 
-class LogOddsEstimator(BaseEstimator):
+class LogOddsEstimator(object):
     """An estimator predicting the log odds ratio."""
     scale = 1.0
 
@@ -132,7 +132,7 @@ class ScaledLogOddsEstimator(LogOddsEstimator):
     scale = 0.5
 
 
-class PriorProbabilityEstimator(BaseEstimator):
+class PriorProbabilityEstimator(object):
     """An estimator predicting the probability of each
     class in the training data.
     """
@@ -150,7 +150,7 @@ def predict(self, X):
         return y
 
 
-class ZeroEstimator(BaseEstimator):
+class ZeroEstimator(object):
     """An estimator that simply predicts zero. """
 
     def fit(self, X, y, sample_weight=None):
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index 88382f7d13c0b..ab8d9d39aadc2 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -35,6 +35,7 @@
 from functools import partial
 import pickle
 from io import StringIO
+from scipy import sparse
 
 
 JUNK_FOOD_DOCS = (
@@ -309,7 +310,7 @@ def test_tf_idf_smoothing():
          [1, 1, 0],
          [1, 0, 0]]
     tr = TfidfTransformer(smooth_idf=True, norm='l2')
-    tfidf = tr.fit_transform(X).toarray()
+    tfidf = tr.fit_transform(X)
     assert_true((tfidf >= 0).all())
 
     # check normalization
@@ -320,7 +321,7 @@ def test_tf_idf_smoothing():
          [1, 1, 0],
          [1, 0, 0]]
     tr = TfidfTransformer(smooth_idf=True, norm='l2')
-    tfidf = tr.fit_transform(X).toarray()
+    tfidf = tr.fit_transform(X)
     assert_true((tfidf >= 0).all())
 
 
@@ -329,7 +330,7 @@ def test_tfidf_no_smoothing():
          [1, 1, 0],
          [1, 0, 0]]
     tr = TfidfTransformer(smooth_idf=False, norm='l2')
-    tfidf = tr.fit_transform(X).toarray()
+    tfidf = tr.fit_transform(X)
     assert_true((tfidf >= 0).all())
 
     # check normalization
@@ -340,6 +341,7 @@ def test_tfidf_no_smoothing():
     X = [[1, 1, 0],
          [1, 1, 0],
          [1, 0, 0]]
+    X = sparse.csr_matrix(X)
     tr = TfidfTransformer(smooth_idf=False, norm='l2')
 
     clean_warning_registry()
@@ -357,7 +359,7 @@ def test_tfidf_no_smoothing():
 def test_sublinear_tf():
     X = [[1], [2], [3]]
     tr = TfidfTransformer(sublinear_tf=True, use_idf=False, norm=None)
-    tfidf = tr.fit_transform(X).toarray()
+    tfidf = tr.fit_transform(X)
     assert_equal(tfidf[0], 1)
     assert_greater(tfidf[1], tfidf[0])
     assert_greater(tfidf[2], tfidf[1])
@@ -420,7 +422,7 @@ def test_vectorizer():
     # test tf alone
     t2 = TfidfTransformer(norm='l1', use_idf=False)
     tf = t2.fit(counts_train).transform(counts_train).toarray()
-    assert_equal(t2.idf_, None)
+    assert_false(hasattr(t2, "idf_"))
 
     # test idf transform with unlearned idf vector
     t3 = TfidfTransformer(use_idf=True)
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index f5b548a5278cd..72676b69a36fc 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -29,9 +29,8 @@
 from ..preprocessing import normalize
 from .hashing import FeatureHasher
 from .stop_words import ENGLISH_STOP_WORDS
-from ..utils import deprecated
 from ..utils.fixes import frombuffer_empty, bincount
-from ..utils.validation import check_is_fitted
+from ..utils.validation import check_is_fitted, check_array
 
 __all__ = ['CountVectorizer',
            'ENGLISH_STOP_WORDS',
@@ -1023,7 +1022,8 @@ def fit(self, X, y=None):
             a matrix of term/token counts
         """
         if not sp.issparse(X):
-            X = sp.csc_matrix(X)
+            X = sp.csc_matrix(X, dtype=np.float64)
+        X = check_array(X, accept_sparse=["csc", "csr"])
         if self.use_idf:
             n_samples, n_features = X.shape
             df = _document_frequency(X)
@@ -1035,7 +1035,7 @@ def fit(self, X, y=None):
             # log+1 instead of log makes sure terms with zero idf don't get
             # suppressed entirely.
             idf = np.log(float(n_samples) / df) + 1.0
-            self._idf_diag = sp.spdiags(idf, diags=0, m=n_features, 
+            self._idf_diag = sp.spdiags(idf, diags=0, m=n_features,
                                         n=n_features, format='csr')
 
         return self
@@ -1056,18 +1056,19 @@ def transform(self, X, copy=True):
         -------
         vectors : sparse matrix, [n_samples, n_features]
         """
-        if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
-            # preserve float family dtype
-            X = sp.csr_matrix(X, copy=copy)
-        else:
-            # convert counts or binary occurrences to floats
-            X = sp.csr_matrix(X, dtype=np.float64, copy=copy)
+        X = check_array(X, accept_sparse=["csr"], copy=copy,
+                        dtype=[np.float64, np.float32])
 
         n_samples, n_features = X.shape
 
         if self.sublinear_tf:
-            np.log(X.data, X.data)
-            X.data += 1
+            if sp.issparse(X):
+                np.log(X.data, X.data)
+                X.data += 1
+            else:
+                mask = X != 0
+                X[mask] = np.log(X[mask])
+                X[mask] += 1
 
         if self.use_idf:
             check_is_fitted(self, '_idf_diag', 'idf vector is not fitted')
@@ -1087,10 +1088,9 @@ def transform(self, X, copy=True):
 
     @property
     def idf_(self):
-        if hasattr(self, "_idf_diag"):
-            return np.ravel(self._idf_diag.sum(axis=0))
-        else:
-            return None
+        # if _idf_diag is not set, this will raise an attribute error,
+        # which means hasatt(self, "idf_") is False
+        return np.ravel(self._idf_diag.sum(axis=0))
 
 
 class TfidfVectorizer(CountVectorizer):
diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index 7fe4456ccd390..e27c0bd267bf9 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -4,11 +4,12 @@
 import numpy as np
 
 from .base import SelectorMixin
-from ..base import BaseEstimator, clone
+from ..base import BaseEstimator, clone, MetaEstimatorMixin
 from ..externals import six
 
 from ..exceptions import NotFittedError
 from ..utils.fixes import norm
+from ..utils.metaestimators import if_delegate_has_method
 
 
 def _get_feature_importances(estimator, norm_order=1):
@@ -76,7 +77,7 @@ def _calculate_threshold(estimator, importances, threshold):
     return threshold
 
 
-class SelectFromModel(BaseEstimator, SelectorMixin):
+class SelectFromModel(BaseEstimator, SelectorMixin, MetaEstimatorMixin):
     """Meta-transformer for selecting features based on importance weights.
 
     .. versionadded:: 0.17
@@ -121,7 +122,6 @@ class SelectFromModel(BaseEstimator, SelectorMixin):
     threshold_ : float
         The threshold value used for feature selection.
     """
-
     def __init__(self, estimator, threshold=None, prefit=False, norm_order=1):
         self.estimator = estimator
         self.threshold = threshold
@@ -138,6 +138,7 @@ def _get_support_mask(self):
             raise ValueError(
                 'Either fit the model before transform or set "prefit=True"'
                 ' while passing the fitted estimator to the constructor.')
+        # XXX duplicate computation if we called fit before
         scores = _get_feature_importances(estimator, self.norm_order)
         self.threshold_ = _calculate_threshold(estimator, scores,
                                                self.threshold)
@@ -167,8 +168,12 @@ def fit(self, X, y=None, **fit_params):
                 "Since 'prefit=True', call transform directly")
         self.estimator_ = clone(self.estimator)
         self.estimator_.fit(X, y, **fit_params)
+        scores = _get_feature_importances(self.estimator_, self.norm_order)
+        self.threshold_ = _calculate_threshold(self.estimator, scores,
+                                               self.threshold)
         return self
 
+    @if_delegate_has_method('estimator')
     def partial_fit(self, X, y=None, **fit_params):
         """Fit the SelectFromModel meta-transformer only once.
 
diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py
index d92e341676371..31ff0057d8d8e 100644
--- a/sklearn/feature_selection/rfe.py
+++ b/sklearn/feature_selection/rfe.py
@@ -30,6 +30,7 @@ def _rfe_single_fit(rfe, estimator, X, y, train, test, scorer):
         X_train, y_train, lambda estimator, features:
         _score(estimator, X_test[:, features], y_test, scorer)).scores_
 
+
 class RFE(BaseEstimator, MetaEstimatorMixin, SelectorMixin):
     """Feature ranking with recursive feature elimination.
 
@@ -293,8 +294,8 @@ class RFECV(RFE, MetaEstimatorMixin):
         - An iterable yielding train/test splits.
 
         For integer/None inputs, if ``y`` is binary or multiclass,
-        :class:`sklearn.model_selection.StratifiedKFold` is used. If the 
-        estimator is a classifier or if ``y`` is neither binary nor multiclass, 
+        :class:`sklearn.model_selection.StratifiedKFold` is used. If the
+        estimator is a classifier or if ``y`` is neither binary nor multiclass,
         :class:`sklearn.model_selection.KFold` is used.
 
         Refer :ref:`User Guide <cross_validation>` for the various
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 6efb6f405bb1c..9beeef78a17be 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -1,6 +1,7 @@
 import numpy as np
 
 from sklearn.utils.testing import assert_true
+from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_less
 from sklearn.utils.testing import assert_greater
@@ -27,8 +28,7 @@ def test_invalid_input():
     clf = SGDClassifier(alpha=0.1, n_iter=10, shuffle=True, random_state=None)
     for threshold in ["gobbledigook", ".5 * gobbledigook"]:
         model = SelectFromModel(clf, threshold=threshold)
-        model.fit(data, y)
-        assert_raises(ValueError, model.transform, data)
+        assert_raises(ValueError, model.fit, data, y)
 
 
 def test_input_estimator_unchanged():
@@ -120,6 +120,10 @@ def test_partial_fit():
     transformer.fit(np.vstack((data, data)), np.concatenate((y, y)))
     assert_array_equal(X_transform, transformer.transform(data))
 
+    # check that if est doesn't have partial_fit, neither does SelectFromModel
+    transformer = SelectFromModel(estimator=RandomForestClassifier())
+    assert_false(hasattr(transformer, "partial_fit"))
+
 
 def test_calling_fit_reinitializes():
     est = LinearSVC(random_state=0)
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index 3de5ee319c718..63645a96cf37e 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -46,7 +46,7 @@
 from .utils import check_random_state
 from .utils.validation import _num_samples
 from .utils.validation import check_is_fitted
-from .utils.validation import check_X_y
+from .utils.validation import check_X_y, check_array
 from .utils.multiclass import (_check_partial_fit_first_call,
                                check_classification_targets,
                                _ovr_decision_function)
@@ -176,7 +176,6 @@ class OneVsRestClassifier(BaseEstimator, ClassifierMixin, MetaEstimatorMixin):
     multilabel_ : boolean
         Whether a OneVsRestClassifier is a multilabel classifier.
     """
-
     def __init__(self, estimator, n_jobs=1):
         self.estimator = estimator
         self.n_jobs = n_jobs
@@ -217,6 +216,7 @@ def fit(self, X, y):
 
         return self
 
+    @if_delegate_has_method('estimator')
     def partial_fit(self, X, y, classes=None):
         """Partially fit underlying estimators
 
@@ -488,8 +488,12 @@ def fit(self, X, y):
         self
         """
         X, y = check_X_y(X, y, accept_sparse=['csr', 'csc'])
+        check_classification_targets(y)
 
         self.classes_ = np.unique(y)
+        if len(self.classes_) == 1:
+            raise ValueError("OneVsOneClassifier can not be fit when only one"
+                             " class is present.")
         n_classes = self.classes_.shape[0]
         estimators_indices = list(zip(*(Parallel(n_jobs=self.n_jobs)(
             delayed(_fit_ovo_binary)
@@ -498,13 +502,14 @@ def fit(self, X, y):
 
         self.estimators_ = estimators_indices[0]
         try:
-            self.pairwise_indices_ = estimators_indices[1] \
-                                     if self._pairwise else None
+            self.pairwise_indices_ = (
+                estimators_indices[1] if self._pairwise else None)
         except AttributeError:
             self.pairwise_indices_ = None
 
         return self
 
+    @if_delegate_has_method(delegate='estimator')
     def partial_fit(self, X, y, classes=None):
         """Partially fit underlying estimators
 
@@ -544,8 +549,8 @@ def partial_fit(self, X, y, classes=None):
             n_jobs=self.n_jobs)(
                 delayed(_partial_fit_ovo_binary)(
                     estimator, X, y, self.classes_[i], self.classes_[j])
-                for estimator, (i, j) in izip(
-                        self.estimators_, (combinations)))
+                for estimator, (i, j) in izip(self.estimators_,
+                                              (combinations)))
 
         self.pairwise_indices_ = None
 
@@ -701,12 +706,14 @@ def fit(self, X, y):
         -------
         self
         """
+        X, y = check_X_y(X, y)
         if self.code_size <= 0:
             raise ValueError("code_size should be greater than 0, got {1}"
                              "".format(self.code_size))
 
         _check_estimator(self.estimator)
         random_state = check_random_state(self.random_state)
+        check_classification_targets(y)
 
         self.classes_ = np.unique(y)
         n_classes = self.classes_.shape[0]
@@ -747,6 +754,7 @@ def predict(self, X):
             Predicted multi-class targets.
         """
         check_is_fitted(self, 'estimators_')
+        X = check_array(X)
         Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T
         pred = euclidean_distances(Y, self.code_book_).argmin(axis=1)
         return self.classes_[pred]
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index 826ece6d50d98..f608936e952ab 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -16,13 +16,14 @@
 
 import numpy as np
 
-from abc import ABCMeta
-from .base import BaseEstimator, clone
+from abc import ABCMeta, abstractmethod
+from .base import BaseEstimator, clone, MetaEstimatorMixin
 from .base import RegressorMixin, ClassifierMixin
 from .utils import check_array, check_X_y
 from .utils.fixes import parallel_helper
 from .utils.validation import check_is_fitted, has_fit_parameter
 from .utils.metaestimators import if_delegate_has_method
+from .utils.multiclass import check_classification_targets
 from .externals.joblib import Parallel, delayed
 from .externals import six
 
@@ -57,8 +58,9 @@ def _partial_fit_estimator(estimator, X, y, classes=None, sample_weight=None,
     return estimator
 
 
-class MultiOutputEstimator(six.with_metaclass(ABCMeta, BaseEstimator)):
-
+class MultiOutputEstimator(six.with_metaclass(ABCMeta, BaseEstimator,
+                                              MetaEstimatorMixin)):
+    @abstractmethod
     def __init__(self, estimator, n_jobs=1):
         self.estimator = estimator
         self.n_jobs = n_jobs
@@ -149,6 +151,9 @@ def fit(self, X, y, sample_weight=None):
                          multi_output=True,
                          accept_sparse=True)
 
+        if isinstance(self, ClassifierMixin):
+            check_classification_targets(y)
+
         if y.ndim == 1:
             raise ValueError("y must have at least two dimensions for "
                              "multi-output regression but has only one.")
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 843bf9ce126cc..d370eda994047 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -483,13 +483,13 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
         y : array-like, shape = [n_samples]
             Target values.
 
-        classes : array-like, shape = [n_classes], optional (default=None)
+        classes : array-like, shape = [n_classes], (default=None)
             List of all the classes that can possibly appear in the y vector.
 
             Must be provided at the first call to partial_fit, can be omitted
             in subsequent calls.
 
-        sample_weight : array-like, shape = [n_samples], optional (default=None)
+        sample_weight : array-like, shape = [n_samples], (default=None)
             Weights applied to individual samples (1. for unweighted).
 
         Returns
@@ -554,7 +554,7 @@ def fit(self, X, y, sample_weight=None):
         y : array-like, shape = [n_samples]
             Target values.
 
-        sample_weight : array-like, shape = [n_samples], optional (default=None)
+        sample_weight : array-like, shape = [n_samples], (default=None)
             Weights applied to individual samples (1. for unweighted).
 
         Returns
diff --git a/sklearn/neighbors/approximate.py b/sklearn/neighbors/approximate.py
index c6f602979ea1b..3a4e1c3868c95 100644
--- a/sklearn/neighbors/approximate.py
+++ b/sklearn/neighbors/approximate.py
@@ -93,7 +93,7 @@ class GaussianRandomProjectionHash(ProjectionToHashMixin,
                                    GaussianRandomProjection):
     """Use GaussianRandomProjection to produce a cosine LSH fingerprint"""
     def __init__(self,
-                 n_components=8,
+                 n_components=32,
                  random_state=None):
         super(GaussianRandomProjectionHash, self).__init__(
             n_components=n_components,
diff --git a/sklearn/tests/test_multiclass.py b/sklearn/tests/test_multiclass.py
index b62e78e87c223..20ec4b132fc7f 100644
--- a/sklearn/tests/test_multiclass.py
+++ b/sklearn/tests/test_multiclass.py
@@ -13,7 +13,8 @@
 from sklearn.multiclass import OneVsRestClassifier
 from sklearn.multiclass import OneVsOneClassifier
 from sklearn.multiclass import OutputCodeClassifier
-from sklearn.utils.multiclass import check_classification_targets, type_of_target
+from sklearn.utils.multiclass import (check_classification_targets,
+                                      type_of_target)
 from sklearn.utils import shuffle
 
 from sklearn.metrics import precision_score
@@ -104,6 +105,10 @@ def test_ovr_partial_fit():
     pred1 = ovr1.fit(X, y).predict(X)
     assert_equal(np.mean(pred == y), np.mean(pred1 == y))
 
+    # test partial_fit only exists if estimator has it:
+    ovr = OneVsRestClassifier(SVC())
+    assert_false(hasattr(ovr, "partial_fit"))
+
 
 def test_ovr_partial_fit_exceptions():
     ovr = OneVsRestClassifier(MultinomialNB())
@@ -428,7 +433,8 @@ def test_ovr_pipeline():
 
 
 def test_ovr_coef_():
-    for base_classifier in [SVC(kernel='linear', random_state=0), LinearSVC(random_state=0)]:
+    for base_classifier in [SVC(kernel='linear', random_state=0),
+                            LinearSVC(random_state=0)]:
         # SVC has sparse coef with sparse input data
 
         ovr = OneVsRestClassifier(base_classifier)
@@ -439,7 +445,8 @@ def test_ovr_coef_():
             assert_equal(shape[0], n_classes)
             assert_equal(shape[1], iris.data.shape[1])
             # don't densify sparse coefficients
-            assert_equal(sp.issparse(ovr.estimators_[0].coef_), sp.issparse(ovr.coef_))
+            assert_equal(sp.issparse(ovr.estimators_[0].coef_),
+                         sp.issparse(ovr.coef_))
 
 
 def test_ovr_coef_exceptions():
@@ -508,6 +515,10 @@ def test_ovo_partial_fit_predict():
     assert_equal(len(ovo1.estimators_), len(np.unique(iris.target)))
     assert_greater(np.mean(iris.target == pred1), 0.65)
 
+    # test partial_fit only exists if estimator has it:
+    ovr = OneVsOneClassifier(SVC())
+    assert_false(hasattr(ovr, "partial_fit"))
+
 
 def test_ovo_decision_function():
     n_samples = iris.data.shape[0]
@@ -606,6 +617,24 @@ def test_ovo_string_y():
     assert_array_equal(y, ovo.predict(X))
 
 
+def test_ovo_one_class():
+    # Test error for OvO with one class
+    X = np.eye(4)
+    y = np.array(['a'] * 4)
+
+    ovo = OneVsOneClassifier(LinearSVC())
+    assert_raise_message(ValueError, "when only one class", ovo.fit, X, y)
+
+
+def test_ovo_float_y():
+    # Test that the OvO errors on float targets
+    X = iris.data
+    y = iris.data[:, 0]
+
+    ovo = OneVsOneClassifier(LinearSVC())
+    assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y)
+
+
 def test_ecoc_exceptions():
     ecoc = OutputCodeClassifier(LinearSVC(random_state=0))
     assert_raises(ValueError, ecoc.predict, [])
@@ -634,6 +663,15 @@ def test_ecoc_gridsearch():
     assert_true(best_C in Cs)
 
 
+def test_ecoc_float_y():
+    # Test that the OCC errors on float targets
+    X = iris.data
+    y = iris.data[:, 0]
+
+    ovo = OutputCodeClassifier(LinearSVC())
+    assert_raise_message(ValueError, "Unknown label type", ovo.fit, X, y)
+
+
 def test_pairwise_indices():
     clf_precomputed = svm.SVC(kernel='precomputed')
     X, y = iris.data, iris.target
diff --git a/sklearn/tests/test_multioutput.py b/sklearn/tests/test_multioutput.py
index 163363155ca3d..a4217bea63a7c 100644
--- a/sklearn/tests/test_multioutput.py
+++ b/sklearn/tests/test_multioutput.py
@@ -6,6 +6,7 @@
 from sklearn.utils.testing import assert_raises
 from sklearn.utils.testing import assert_false
 from sklearn.utils.testing import assert_raises_regex
+from sklearn.utils.testing import assert_raise_message
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_equal
 from sklearn.utils.testing import assert_not_equal
@@ -336,3 +337,5 @@ def test_multi_output_exceptions():
     y_new = np.column_stack((y1, y2))
     moc.fit(X, y)
     assert_raises(ValueError, moc.score, X, y_new)
+    # ValueError when y is continuous
+    assert_raise_message(ValueError, "Unknown label type", moc.fit, X, X[:, 1])
diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py
index 2a2cfe1c30fbf..2d3c80510db0d 100644
--- a/sklearn/utils/multiclass.py
+++ b/sklearn/utils/multiclass.py
@@ -23,6 +23,7 @@
 from ..utils.fixes import bincount
 from ..utils.fixes import array_equal
 
+
 def _unique_multiclass(y):
     if hasattr(y, '__array__'):
         return np.unique(np.asarray(y))
@@ -155,6 +156,7 @@ def is_multilabel(y):
         return len(labels) < 3 and (y.dtype.kind in 'biu' or  # bool, int, uint
                                     _is_integral_float(labels))
 
+
 def check_classification_targets(y):
     """Ensure that target y is of a non-regression type.
 
@@ -168,11 +170,10 @@ def check_classification_targets(y):
     """
     y_type = type_of_target(y)
     if y_type not in ['binary', 'multiclass', 'multiclass-multioutput',
-            'multilabel-indicator', 'multilabel-sequences']:
+                      'multilabel-indicator', 'multilabel-sequences']:
         raise ValueError("Unknown label type: %r" % y_type)
 
 
-
 def type_of_target(y):
     """Determine the type of data indicated by target `y`
 

From c99b9ecf988198c1a4a1928ade926a35a5000e1d Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 19 Dec 2016 17:16:39 -0500
Subject: [PATCH 02/20] minor fixes to whatsnew

---
 doc/whats_new.rst | 49 ++++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 0f0fa26918445..9b0630711289a 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -183,9 +183,10 @@ Bug fixes
      ``download_if_missing`` keyword.  This was fixed in :issue:`7944` by
      :user:`Ralf Gommers <rgommers>`.
 
-   - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a
-     sparse array X and initial centroids, where X's means were unnecessarily
-     being subtracted from the centroids. :issue:`7872` by `Josh Karnofsky <https://github.com/jkarno>`_.
+   - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a sparse
+     array X and initial centroids, where X's means were unnecessarily being
+     subtracted from the centroids. :issue:`7872` by `Josh Karnofsky
+     <https://github.com/jkarno>`_.
 
    - Fix estimators to accept a ``sample_weight`` parameter of type
      ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
@@ -208,13 +209,16 @@ Bug fixes
      :class:`sklearn.ensemble.GradientBoostingRegressor` ignored the
      ``min_impurity_split`` parameter.
      :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
-   - Fixes to the input validation in :class:`sklearn.covariance.EllipticEnvelope` by
-     `Andreas Müller`_.
 
-   - Fix shape output shape of :class:`sklearn.decomposition.DictionaryLearning` transform
-     for one-dimensional data by `Andreas Müller`_.
+   - Fixes to the input validation in
+     :class:`sklearn.covariance.EllipticEnvelope` by `Andreas Müller`_.
+
+   - Fix shape output shape of
+     :class:`sklearn.decomposition.DictionaryLearning` transform for
+     one-dimensional data by `Andreas Müller`_.
 
-   - Several fixes to input validation in :class:`multiclass.OutputCodeClassifier` by `Andreas Müller`_
+   - Several fixes to input validation in
+     :class:`multiclass.OutputCodeClassifier` by `Andreas Müller`_
 
    - Fix a bug where
      :class:`sklearn.ensemble.gradient_boosting.QuantileLossFunction` computed
@@ -278,29 +282,26 @@ API changes summary
 
    - Gradient boosting base models are not longer estimators. By `Andreas Müller`_.
 
-   - `feature_extraction.text.TfidfTransformer` now supports numpy arrays as inputs, and produces numpy
-     arrays for list inputs and numpy array inputs. By `Andreas `Müller_.
+   - :class:`feature_extraction.text.TfidfTransformer` now supports numpy
+     arrays as inputs, and produces numpy arrays for list inputs and numpy
+     array inputs. By `Andreas `Müller_.
 
-   - `feature_selection.SelectFromModel` now validates the ``threshold``
+   - :class:`feature_selection.SelectFromModel` now validates the ``threshold``
      parameter and sets the ``threshold_`` attribute during the call to
-     ``fit``, and no longer during the call to ``transform```, by `Andreas Müller`_.
-
-   - `features_selection.SelectFromModel` now has a ``partial_fit`` method only if the underlying
-     estimator does. By `Andreas Müller`_.
+     ``fit``, and no longer during the call to ``transform```, by `Andreas
+     Müller`_.
 
-   - All checks in ``utils.estimator_checks``, in particular :func:`utils.estimator_checks.check_estimator` now
-     accept estimator instances. All checks apart from ``check_estimator`` do not accept estimator classes any more.
-     By `Andreas Müller`_.
+   - :class:`features_selection.SelectFromModel` now has a ``partial_fit``
+     method only if the underlying estimator does. By `Andreas Müller`_.
 
-   - The ``include_others`` and ``dont_test`` parameters of :func:`utils.testing.all_estimators` are deprecated
-     and are assumed ``True``, by  `Andreas Müller`_.
+   - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now do
+     input validation on ``X`` and check whether ``X`` and ``y`` are of the
+     same length, by `Andreas Müller`_.
 
+   - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
+     only if the underlying estimator does.  By `Andreas Müller`_. 
 
-   - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now do input validation on ``X`` and check
-     whether ``X`` and ``y`` are of the same length, by `Andreas Müller`_.
 
-   - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method only if the underlying estimator does.
-     By `Andreas Müller`_. 
 .. _changes_0_18_1:
 
 Version 0.18.1

From 534b0c5a16ca30f2c9338eaa8144724b4231d1c7 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 19 Dec 2016 17:17:51 -0500
Subject: [PATCH 03/20] typo in whatsnew

---
 doc/whats_new.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 9b0630711289a..c31569d8f7a60 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -284,7 +284,7 @@ API changes summary
 
    - :class:`feature_extraction.text.TfidfTransformer` now supports numpy
      arrays as inputs, and produces numpy arrays for list inputs and numpy
-     array inputs. By `Andreas `Müller_.
+     array inputs. By `Andreas Müller_`.
 
    - :class:`feature_selection.SelectFromModel` now validates the ``threshold``
      parameter and sets the ``threshold_`` attribute during the call to

From c7cd00dd54e385c72aab467a493949ea0437633b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 19 Dec 2016 17:41:54 -0500
Subject: [PATCH 04/20] add test for n_components = 1 transform in dict
 learning

---
 sklearn/decomposition/tests/test_dict_learning.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index b7ed5c4703492..9df3528d33443 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -28,7 +28,12 @@
 def test_dict_learning_shapes():
     n_components = 5
     dico = DictionaryLearning(n_components, random_state=0).fit(X)
-    assert_true(dico.components_.shape == (n_components, n_features))
+    assert_equal(dico.components_.shape, (n_components, n_features))
+
+    n_components = 1
+    dico = DictionaryLearning(n_components, random_state=0).fit(X)
+    assert_equal(dico.components_.shape, (n_components, n_features))
+    assert_equal(dico.transform(X).shape, (X.shape[0], n_components))
 
 
 def test_dict_learning_overcomplete():

From 91559ce20738cb26419ee9727fd3fc6dbdfc1e0b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Tue, 20 Dec 2016 11:45:39 -0500
Subject: [PATCH 05/20] feature extraction doc fix

---
 doc/modules/feature_extraction.rst | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index 4995177705c1d..009b58dcfdfa9 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -450,7 +450,7 @@ Let's take an example with the following counts. The first term is present
 100% of the time hence not very interesting. The two other features only
 in less than 50% of the time hence probably more representative of the
 content of the documents::
-
+ 
   >>> counts = [[3, 0, 1],
   ...           [2, 0, 0],
   ...           [3, 0, 0],
@@ -460,10 +460,6 @@ content of the documents::
   ...
   >>> tfidf = transformer.fit_transform(counts)
   >>> tfidf                         # doctest: +NORMALIZE_WHITESPACE  +ELLIPSIS
-  <6x3 sparse matrix of type '<... 'numpy.float64'>'
-      with 9 stored elements in Compressed Sparse ... format>
-
-  >>> tfidf.toarray()                        # doctest: +ELLIPSIS
   array([[ 0.81940995,  0.        ,  0.57320793],
          [ 1.        ,  0.        ,  0.        ],
          [ 1.        ,  0.        ,  0.        ],

From 6ee218db1a3571711f1835b1258c874665cb58e4 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Sat, 25 Feb 2017 13:39:40 -0500
Subject: [PATCH 06/20] fix broken test

---
 doc/modules/feature_extraction.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index 009b58dcfdfa9..b7a7755ebcfb6 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -519,7 +519,7 @@ And the L2-normalized tf-idf changes to
 = [0.8515, 0, 0.5243]`::
 
   >>> transformer = TfidfTransformer()
-  >>> transformer.fit_transform(counts).toarray()
+  >>> transformer.fit_transform(counts)
   array([[ 0.85151335,  0.        ,  0.52433293],
          [ 1.        ,  0.        ,  0.        ],
          [ 1.        ,  0.        ,  0.        ],

From 27775e9aa363dc21872b687c17ffae1ad301750c Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 15 May 2017 17:42:28 -0400
Subject: [PATCH 07/20] revert aggressive input validation changes

---
 doc/whats_new.rst                  |  4 ----
 sklearn/dummy.py                   |  7 +++----
 sklearn/feature_extraction/text.py | 30 +++++++++++++++---------------
 sklearn/naive_bayes.py             |  4 ++--
 4 files changed, 20 insertions(+), 25 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index c31569d8f7a60..e181640e25724 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -282,10 +282,6 @@ API changes summary
 
    - Gradient boosting base models are not longer estimators. By `Andreas Müller`_.
 
-   - :class:`feature_extraction.text.TfidfTransformer` now supports numpy
-     arrays as inputs, and produces numpy arrays for list inputs and numpy
-     array inputs. By `Andreas Müller_`.
-
    - :class:`feature_selection.SelectFromModel` now validates the ``threshold``
      parameter and sets the ``threshold_`` attribute during the call to
      ``fit``, and no longer during the call to ``transform```, by `Andreas
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 0f01d18cb2b9c..ddee7167272ab 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -10,7 +10,7 @@
 
 from .base import BaseEstimator, ClassifierMixin, RegressorMixin
 from .utils import check_random_state
-from .utils.validation import check_array, check_X_y
+from .utils.validation import check_array
 from .utils.validation import check_consistent_length
 from .utils.validation import check_is_fitted
 from .utils.random import random_choice_csc
@@ -117,7 +117,6 @@ def fit(self, X, y, sample_weight=None):
 
         self.sparse_output_ = sp.issparse(y)
 
-        X = check_array(X, accept_sparse=['csr', 'csc', 'coo'])
         check_consistent_length(X, y)
 
         if not self.sparse_output_:
@@ -398,8 +397,8 @@ def fit(self, X, y, sample_weight=None):
                              "'mean', 'median', 'quantile' or 'constant'"
                              % self.strategy)
 
-        X, y = check_X_y(X, y, accept_sparse=['csr', 'csc', 'coo'],
-                         multi_output=True)
+        y = check_array(y, ensure_2d=False)
+
         if len(y) == 0:
             raise ValueError("y must not be empty.")
 
diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index 72676b69a36fc..d3d4206d44c9c 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -29,8 +29,9 @@
 from ..preprocessing import normalize
 from .hashing import FeatureHasher
 from .stop_words import ENGLISH_STOP_WORDS
+from ..utils import deprecated
 from ..utils.fixes import frombuffer_empty, bincount
-from ..utils.validation import check_is_fitted, check_array
+from ..utils.validation import check_is_fitted
 
 __all__ = ['CountVectorizer',
            'ENGLISH_STOP_WORDS',
@@ -158,7 +159,8 @@ def _char_wb_ngrams(self, text_document):
         """Whitespace sensitive char-n-gram tokenization.
 
         Tokenize text_document into a sequence of character n-grams
-        excluding any whitespace (operating only inside word boundaries)"""
+        operating only inside word boundaries. n-grams at the edges
+        of words are padded with space."""
         # normalize white spaces
         text_document = self._white_spaces.sub(" ", text_document)
 
@@ -353,7 +355,7 @@ class HashingVectorizer(BaseEstimator, VectorizerMixin):
     analyzer : string, {'word', 'char', 'char_wb'} or callable
         Whether the feature should be made of word or character n-grams.
         Option 'char_wb' creates character n-grams only from text inside
-        word boundaries.
+        word boundaries; n-grams at the edges of words are padded with space.
 
         If a callable is passed it is used to extract the sequence of features
         out of the raw, unprocessed input.
@@ -552,7 +554,7 @@ class CountVectorizer(BaseEstimator, VectorizerMixin):
     analyzer : string, {'word', 'char', 'char_wb'} or callable
         Whether the feature should be made of word or character n-grams.
         Option 'char_wb' creates character n-grams only from text inside
-        word boundaries.
+        word boundaries; n-grams at the edges of words are padded with space.
 
         If a callable is passed it is used to extract the sequence of features
         out of the raw, unprocessed input.
@@ -1022,8 +1024,7 @@ def fit(self, X, y=None):
             a matrix of term/token counts
         """
         if not sp.issparse(X):
-            X = sp.csc_matrix(X, dtype=np.float64)
-        X = check_array(X, accept_sparse=["csc", "csr"])
+            X = sp.csc_matrix(X)
         if self.use_idf:
             n_samples, n_features = X.shape
             df = _document_frequency(X)
@@ -1056,19 +1057,18 @@ def transform(self, X, copy=True):
         -------
         vectors : sparse matrix, [n_samples, n_features]
         """
-        X = check_array(X, accept_sparse=["csr"], copy=copy,
-                        dtype=[np.float64, np.float32])
+        if hasattr(X, 'dtype') and np.issubdtype(X.dtype, np.float):
+            # preserve float family dtype
+            X = sp.csr_matrix(X, copy=copy)
+        else:
+            # convert counts or binary occurrences to floats
+            X = sp.csr_matrix(X, dtype=np.float64, copy=copy)
 
         n_samples, n_features = X.shape
 
         if self.sublinear_tf:
-            if sp.issparse(X):
-                np.log(X.data, X.data)
-                X.data += 1
-            else:
-                mask = X != 0
-                X[mask] = np.log(X[mask])
-                X[mask] += 1
+            np.log(X.data, X.data)
+            X.data += 1
 
         if self.use_idf:
             check_is_fitted(self, '_idf_diag', 'idf vector is not fitted')
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index d370eda994047..4f8a94115dc5a 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -483,13 +483,13 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
         y : array-like, shape = [n_samples]
             Target values.
 
-        classes : array-like, shape = [n_classes], (default=None)
+        classes : array-like, shape = [n_classes] (default=None)
             List of all the classes that can possibly appear in the y vector.
 
             Must be provided at the first call to partial_fit, can be omitted
             in subsequent calls.
 
-        sample_weight : array-like, shape = [n_samples], (default=None)
+        sample_weight : array-like, shape = [n_samples] (default=None)
             Weights applied to individual samples (1. for unweighted).
 
         Returns

From f4c9d60a930389a75a8a5076aba591d4046b67b9 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 15 May 2017 17:42:55 -0400
Subject: [PATCH 08/20] in SelectFromModel, don't store threshold_ in
 transform. If we called "fit", use estimates from last "fit".

---
 sklearn/feature_selection/from_model.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index e27c0bd267bf9..262484c1ad1c0 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -129,20 +129,18 @@ def __init__(self, estimator, threshold=None, prefit=False, norm_order=1):
         self.norm_order = norm_order
 
     def _get_support_mask(self):
+        if hasattr(self, "_mask"):
+            return self._mask
         # SelectFromModel can directly call on transform.
         if self.prefit:
             estimator = self.estimator
-        elif hasattr(self, 'estimator_'):
-            estimator = self.estimator_
         else:
             raise ValueError(
-                'Either fit the model before transform or set "prefit=True"'
-                ' while passing the fitted estimator to the constructor.')
-        # XXX duplicate computation if we called fit before
+                'Either fit SelectFromModel before transform or set "prefit='
+                'True" and pass a fitted estimator to the constructor.')
         scores = _get_feature_importances(estimator, self.norm_order)
-        self.threshold_ = _calculate_threshold(estimator, scores,
-                                               self.threshold)
-        return scores >= self.threshold_
+        threshold_ = _calculate_threshold(estimator, scores, self.threshold)
+        return scores >= threshold_
 
     def fit(self, X, y=None, **fit_params):
         """Fit the SelectFromModel meta-transformer.
@@ -171,6 +169,7 @@ def fit(self, X, y=None, **fit_params):
         scores = _get_feature_importances(self.estimator_, self.norm_order)
         self.threshold_ = _calculate_threshold(self.estimator, scores,
                                                self.threshold)
+        self._mask = scores >= self.threshold_
         return self
 
     @if_delegate_has_method('estimator')

From 30bdd041a261823ed6ac08028129a601a31c13ac Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 15 May 2017 17:48:40 -0400
Subject: [PATCH 09/20] move score from EllipticEnvelope to
 OutlierDetectionMixin

---
 sklearn/covariance/outlier_detection.py | 52 ++++++++++++-------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/sklearn/covariance/outlier_detection.py b/sklearn/covariance/outlier_detection.py
index 3349f71af42d2..7cfa84d880422 100644
--- a/sklearn/covariance/outlier_detection.py
+++ b/sklearn/covariance/outlier_detection.py
@@ -101,6 +101,32 @@ def predict(self, X):
 
         return is_inlier
 
+    def score(self, X, y, sample_weight=None):
+        """Returns the mean accuracy on the given test data and labels.
+
+        In multi-label classification, this is the subset accuracy
+        which is a harsh metric since you require for each sample that
+        each label set be correctly predicted.
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Test samples.
+
+        y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+            True labels for X.
+
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Mean accuracy of self.predict(X) wrt. y.
+
+        """
+        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
+
 
 class EllipticEnvelope(OutlierDetectionMixin, MinCovDet):
     """An object for detecting outliers in a Gaussian distributed dataset.
@@ -177,29 +203,3 @@ def fit(self, X, y=None):
         self.threshold_ = sp.stats.scoreatpercentile(
             self.dist_, 100. * (1. - self.contamination))
         return self
-
-    def score(self, X, y, sample_weight=None):
-        """Returns the mean accuracy on the given test data and labels.
-
-        In multi-label classification, this is the subset accuracy
-        which is a harsh metric since you require for each sample that
-        each label set be correctly predicted.
-
-        Parameters
-        ----------
-        X : array-like, shape = (n_samples, n_features)
-            Test samples.
-
-        y : array-like, shape = (n_samples) or (n_samples, n_outputs)
-            True labels for X.
-
-        sample_weight : array-like, shape = [n_samples], optional
-            Sample weights.
-
-        Returns
-        -------
-        score : float
-            Mean accuracy of self.predict(X) wrt. y.
-
-        """
-        return accuracy_score(y, self.predict(X), sample_weight=sample_weight)

From 5ed1174f666afcf6a3a30f2605529204ffce8833 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 15 May 2017 17:50:07 -0400
Subject: [PATCH 10/20] revert changes to Tfidf documentation

---
 doc/modules/feature_extraction.rst | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index f4e59fbc63bd9..32e53f0817e6e 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -450,7 +450,7 @@ Let's take an example with the following counts. The first term is present
 100% of the time hence not very interesting. The two other features only
 in less than 50% of the time hence probably more representative of the
 content of the documents::
- 
+
   >>> counts = [[3, 0, 1],
   ...           [2, 0, 0],
   ...           [3, 0, 0],
@@ -460,6 +460,10 @@ content of the documents::
   ...
   >>> tfidf = transformer.fit_transform(counts)
   >>> tfidf                         # doctest: +NORMALIZE_WHITESPACE  +ELLIPSIS
+  <6x3 sparse matrix of type '<... 'numpy.float64'>'
+      with 9 stored elements in Compressed Sparse ... format>
+
+  >>> tfidf.toarray()                        # doctest: +ELLIPSIS
   array([[ 0.81940995,  0.        ,  0.57320793],
          [ 1.        ,  0.        ,  0.        ],
          [ 1.        ,  0.        ,  0.        ],
@@ -519,7 +523,7 @@ And the L2-normalized tf-idf changes to
 = [0.8515, 0, 0.5243]`::
 
   >>> transformer = TfidfTransformer()
-  >>> transformer.fit_transform(counts)
+  >>> transformer.fit_transform(counts).toarray()
   array([[ 0.85151335,  0.        ,  0.52433293],
          [ 1.        ,  0.        ,  0.        ],
          [ 1.        ,  0.        ,  0.        ],

From adee7a342ff2c86ed4dabeb4320be870a4cc681d Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 15 May 2017 17:52:58 -0400
Subject: [PATCH 11/20] remove dummy input validation from whatsnew

---
 doc/whats_new.rst | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 63a8c960826bd..441ae13bf5238 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -351,10 +351,6 @@ API changes summary
    - :class:`features_selection.SelectFromModel` now has a ``partial_fit``
      method only if the underlying estimator does. By `Andreas Müller`_.
 
-   - :class:`dummy.DummyClassifier` and :class:`dummy.DummyRegressor` now do
-     input validation on ``X`` and check whether ``X`` and ``y`` are of the
-     same length, by `Andreas Müller`_.
-
    - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method
      only if the underlying estimator does.  By `Andreas Müller`_. 
 

From a83697f5f7a9e8392d2cde551db81b636433447f Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 15 May 2017 18:23:26 -0400
Subject: [PATCH 12/20] fix text feature tests

---
 .travis.yml                                   |  2 +-
 doc/whats_new.rst                             | 13 ++++++-------
 sklearn/feature_extraction/tests/test_text.py | 10 ++++------
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5ba455625c313..6a513d4a0f3d9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -35,7 +35,7 @@ env:
     - DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
       NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.1"
       CYTHON_VERSION="0.25.2" COVERAGE=true
-    # This environment use pytest to run the tests. It uses the newest
+    # This environment uses pytest to run the tests. It uses the newest
     # supported anaconda env. It also runs tests requiring Pandas.
     - USE_PYTEST="true" DISTRIB="conda" PYTHON_VERSION="3.6" INSTALL_MKL="true"
       NUMPY_VERSION="1.11.2" SCIPY_VERSION="0.18.1" PANDAS_VERSION="0.19.1"
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 441ae13bf5238..56665be7caed3 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -206,23 +206,22 @@ Bug fixes
 
    - Fixed a bug where :class:`sklearn.linear_model.LassoLars` does not give
      the same result as the LassoLars implementation available
-     in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`
      in R (lars library). :issue:`7849` by `Jair Montoya Martinez`_
 
    - Some ``fetch_`` functions in `sklearn.datasets` were ignoring the
      ``download_if_missing`` keyword.  This was fixed in :issue:`7944` by
      :user:`Ralf Gommers <rgommers>`.
 
-   - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a sparse
-     array X and initial centroids, where X's means were unnecessarily being
-     subtracted from the centroids. :issue:`7872` by `Josh Karnofsky
-     <https://github.com/jkarno>`_.
-
    - Fixed a bug in :class:`sklearn.ensemble.GradientBoostingClassifier`
      and :class:`sklearn.ensemble.GradientBoostingRegressor`
      where a float being compared to ``0.0`` using ``==`` caused a divide by zero
      error. This was fixed in :issue:`7970` by :user:`He Chen <chenhe95>`.
 
+   - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a sparse
+     array X and initial centroids, where X's means were unnecessarily being
+     subtracted from the centroids. :issue:`7872` by `Josh Karnofsky
+     <https://github.com/jkarno>`_.
+
    - Fix estimators to accept a ``sample_weight`` parameter of type
      ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
      `Kathleen Chen`_.
@@ -5107,4 +5106,4 @@ David Huard, Dave Morrill, Ed Schofield, Travis Oliphant, Pearu Peterson.
 .. _Anish Shah: https://github.com/AnishShah
 
 .. _Neeraj Gangwar: http://neerajgangwar.in
-.. _Arthur Mensch: https://amensch.fr
\ No newline at end of file
+.. _Arthur Mensch: https://amensch.fr
diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py
index ab8d9d39aadc2..341486abd3b1c 100644
--- a/sklearn/feature_extraction/tests/test_text.py
+++ b/sklearn/feature_extraction/tests/test_text.py
@@ -35,7 +35,6 @@
 from functools import partial
 import pickle
 from io import StringIO
-from scipy import sparse
 
 
 JUNK_FOOD_DOCS = (
@@ -310,7 +309,7 @@ def test_tf_idf_smoothing():
          [1, 1, 0],
          [1, 0, 0]]
     tr = TfidfTransformer(smooth_idf=True, norm='l2')
-    tfidf = tr.fit_transform(X)
+    tfidf = tr.fit_transform(X).toarray()
     assert_true((tfidf >= 0).all())
 
     # check normalization
@@ -321,7 +320,7 @@ def test_tf_idf_smoothing():
          [1, 1, 0],
          [1, 0, 0]]
     tr = TfidfTransformer(smooth_idf=True, norm='l2')
-    tfidf = tr.fit_transform(X)
+    tfidf = tr.fit_transform(X).toarray()
     assert_true((tfidf >= 0).all())
 
 
@@ -330,7 +329,7 @@ def test_tfidf_no_smoothing():
          [1, 1, 0],
          [1, 0, 0]]
     tr = TfidfTransformer(smooth_idf=False, norm='l2')
-    tfidf = tr.fit_transform(X)
+    tfidf = tr.fit_transform(X).toarray()
     assert_true((tfidf >= 0).all())
 
     # check normalization
@@ -341,7 +340,6 @@ def test_tfidf_no_smoothing():
     X = [[1, 1, 0],
          [1, 1, 0],
          [1, 0, 0]]
-    X = sparse.csr_matrix(X)
     tr = TfidfTransformer(smooth_idf=False, norm='l2')
 
     clean_warning_registry()
@@ -359,7 +357,7 @@ def test_tfidf_no_smoothing():
 def test_sublinear_tf():
     X = [[1], [2], [3]]
     tr = TfidfTransformer(sublinear_tf=True, use_idf=False, norm=None)
-    tfidf = tr.fit_transform(X)
+    tfidf = tr.fit_transform(X).toarray()
     assert_equal(tfidf[0], 1)
     assert_greater(tfidf[1], tfidf[0])
     assert_greater(tfidf[2], tfidf[1])

From 9ce47472c61ddf390f11d0ad19c2b2744c493002 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 15 May 2017 18:34:34 -0400
Subject: [PATCH 13/20] rewrite from_model threshold again...

---
 sklearn/feature_selection/from_model.py          | 16 ++++++++++------
 .../feature_selection/tests/test_from_model.py   |  3 ++-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index 262484c1ad1c0..b1993f4eb4088 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -134,13 +134,16 @@ def _get_support_mask(self):
         # SelectFromModel can directly call on transform.
         if self.prefit:
             estimator = self.estimator
+        elif hasattr(self, 'estimator_'):
+            estimator = self.estimator_
         else:
             raise ValueError(
                 'Either fit SelectFromModel before transform or set "prefit='
                 'True" and pass a fitted estimator to the constructor.')
         scores = _get_feature_importances(estimator, self.norm_order)
-        threshold_ = _calculate_threshold(estimator, scores, self.threshold)
-        return scores >= threshold_
+        threshold = _calculate_threshold(self.estimator, scores,
+                                         self.threshold)
+        return scores >= threshold
 
     def fit(self, X, y=None, **fit_params):
         """Fit the SelectFromModel meta-transformer.
@@ -166,12 +169,13 @@ def fit(self, X, y=None, **fit_params):
                 "Since 'prefit=True', call transform directly")
         self.estimator_ = clone(self.estimator)
         self.estimator_.fit(X, y, **fit_params)
-        scores = _get_feature_importances(self.estimator_, self.norm_order)
-        self.threshold_ = _calculate_threshold(self.estimator, scores,
-                                               self.threshold)
-        self._mask = scores >= self.threshold_
         return self
 
+    @property
+    def threshold_(self):
+        scores = _get_feature_importances(self.estimator_, self.norm_order)
+        return _calculate_threshold(self.estimator, scores, self.threshold)
+
     @if_delegate_has_method('estimator')
     def partial_fit(self, X, y=None, **fit_params):
         """Fit the SelectFromModel meta-transformer only once.
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index 9beeef78a17be..fc64f13723f8d 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -28,7 +28,8 @@ def test_invalid_input():
     clf = SGDClassifier(alpha=0.1, n_iter=10, shuffle=True, random_state=None)
     for threshold in ["gobbledigook", ".5 * gobbledigook"]:
         model = SelectFromModel(clf, threshold=threshold)
-        assert_raises(ValueError, model.fit, data, y)
+        model.fit(data, y)
+        assert_raises(ValueError, model.transform, data)
 
 
 def test_input_estimator_unchanged():

From f727e899353e6c4c9529b5c0fc685268f821e14e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Tue, 16 May 2017 17:42:17 -0400
Subject: [PATCH 14/20] remove stray condition

---
 sklearn/feature_selection/from_model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index b1993f4eb4088..f221d6d749457 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -129,8 +129,6 @@ def __init__(self, estimator, threshold=None, prefit=False, norm_order=1):
         self.norm_order = norm_order
 
     def _get_support_mask(self):
-        if hasattr(self, "_mask"):
-            return self._mask
         # SelectFromModel can directly call on transform.
         if self.prefit:
             estimator = self.estimator

From 8bbb7424a61639bba62e4125a7de5c390d2f4b3e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Wed, 17 May 2017 16:23:56 -0400
Subject: [PATCH 15/20] fix self.estimator -> estimator, slightly more
 interesting test

---
 sklearn/feature_selection/from_model.py            | 3 +--
 sklearn/feature_selection/tests/test_from_model.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/feature_selection/from_model.py b/sklearn/feature_selection/from_model.py
index f221d6d749457..dada33e9a75cc 100644
--- a/sklearn/feature_selection/from_model.py
+++ b/sklearn/feature_selection/from_model.py
@@ -139,8 +139,7 @@ def _get_support_mask(self):
                 'Either fit SelectFromModel before transform or set "prefit='
                 'True" and pass a fitted estimator to the constructor.')
         scores = _get_feature_importances(estimator, self.norm_order)
-        threshold = _calculate_threshold(self.estimator, scores,
-                                         self.threshold)
+        threshold = _calculate_threshold(estimator, scores, self.threshold)
         return scores >= threshold
 
     def fit(self, X, y=None, **fit_params):
diff --git a/sklearn/feature_selection/tests/test_from_model.py b/sklearn/feature_selection/tests/test_from_model.py
index fc64f13723f8d..6ef0d824b587c 100644
--- a/sklearn/feature_selection/tests/test_from_model.py
+++ b/sklearn/feature_selection/tests/test_from_model.py
@@ -176,10 +176,10 @@ def test_threshold_string():
 def test_threshold_without_refitting():
     """Test that the threshold can be set without refitting the model."""
     clf = SGDClassifier(alpha=0.1, n_iter=10, shuffle=True, random_state=0)
-    model = SelectFromModel(clf, threshold=0.1)
+    model = SelectFromModel(clf, threshold="0.1 * mean")
     model.fit(data, y)
     X_transform = model.transform(data)
 
     # Set a higher threshold to filter out more features.
-    model.threshold = 1.0
+    model.threshold = "1.0 * mean"
     assert_greater(X_transform.shape[1], model.transform(data).shape[1])

From 746ccdbd89cab7803449d2e6b7b655b3d1d8b6c4 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Wed, 17 May 2017 16:26:27 -0400
Subject: [PATCH 16/20] typo in comment

---
 sklearn/feature_extraction/text.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py
index d3d4206d44c9c..539e88973bcc0 100644
--- a/sklearn/feature_extraction/text.py
+++ b/sklearn/feature_extraction/text.py
@@ -29,7 +29,6 @@
 from ..preprocessing import normalize
 from .hashing import FeatureHasher
 from .stop_words import ENGLISH_STOP_WORDS
-from ..utils import deprecated
 from ..utils.fixes import frombuffer_empty, bincount
 from ..utils.validation import check_is_fitted
 
@@ -1089,7 +1088,7 @@ def transform(self, X, copy=True):
     @property
     def idf_(self):
         # if _idf_diag is not set, this will raise an attribute error,
-        # which means hasatt(self, "idf_") is False
+        # which means hasattr(self, "idf_") is False
         return np.ravel(self._idf_diag.sum(axis=0))
 
 

From 9564e0f04cf31901d96e5edf34480f7522c0b740 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Mon, 5 Jun 2017 13:37:56 +0200
Subject: [PATCH 17/20] Fix issues in SparseEncoder, add tests. more explicit
 explanation of SparseEncoder change, add issue numbers to whatsnew

---
 doc/whats_new.rst                                 | 14 +++++++++-----
 sklearn/decomposition/dict_learning.py            | 12 ++++++++----
 sklearn/decomposition/tests/test_dict_learning.py | 13 +++++++++++++
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 56665be7caed3..67d581bd1c6fe 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -245,14 +245,18 @@ Bug fixes
      :issue:`8006` by :user:`Sebastian Pölsterl <sebp>`.
 
    - Fixes to the input validation in
-     :class:`sklearn.covariance.EllipticEnvelope` by `Andreas Müller`_.
+     :class:`sklearn.covariance.EllipticEnvelope`.
+     :issue:`8086` by `Andreas Müller`_.
 
-   - Fix shape output shape of
-     :class:`sklearn.decomposition.DictionaryLearning` transform for
-     one-dimensional data by `Andreas Müller`_.
+   - Fix output shape and bugs with n_jobs > 1 in  
+     :class:`sklearn.decomposition.SparseEncoder` transform and :func:`sklarn.decomposition.sparse_encode`
+     for one-dimensional data and one component.
+     This also impacts the output shape of :class:`sklearn.decomposition.DictionaryLearning`.
+     :issue:`8086` by `Andreas Müller`_.
 
    - Several fixes to input validation in
-     :class:`multiclass.OutputCodeClassifier` by `Andreas Müller`_
+     :class:`multiclass.OutputCodeClassifier`
+     :issue:`8086` by `Andreas Müller`_.
 
    - Fix a bug where
      :class:`sklearn.ensemble.gradient_boosting.QuantileLossFunction` computed
diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py
index 9a302196526d8..b9bb0fcea864c 100644
--- a/sklearn/decomposition/dict_learning.py
+++ b/sklearn/decomposition/dict_learning.py
@@ -94,6 +94,11 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',
     if X.ndim == 1:
         X = X[:, np.newaxis]
     n_samples, n_features = X.shape
+    n_components = dictionary.shape[0]
+    if dictionary.shape[1] != X.shape[1]:
+        raise ValueError("Dictionary and X have different numbers of features:"
+                         "dictionary.shape: {} X.shape{}".format(
+                             dictionary.shape, X.shape))
     if cov is None and algorithm != 'lasso_cd':
         # overwriting cov is safe
         copy_cov = False
@@ -157,6 +162,8 @@ def _sparse_encode(X, dictionary, gram, cov=None, algorithm='lasso_lars',
         raise ValueError('Sparse coding method must be "lasso_lars" '
                          '"lasso_cd",  "lasso", "threshold" or "omp", got %s.'
                          % algorithm)
+    if new_code.ndim != 2:
+        return new_code.reshape(n_samples, n_components)
     return new_code
 
 
@@ -281,10 +288,6 @@ def sparse_encode(X, dictionary, gram=None, cov=None, algorithm='lasso_lars',
                               max_iter=max_iter,
                               check_input=False,
                               verbose=verbose)
-        # This ensure that dimensionality of code is always 2,
-        # consistent with the case n_jobs > 1
-        if code.ndim == 1:
-            code = code[:, np.newaxis]
         return code
 
     # Enter parallel code block
@@ -905,6 +908,7 @@ class SparseCoder(BaseEstimator, SparseCodingMixin):
     MiniBatchSparsePCA
     sparse_encode
     """
+    _required_parameters = ["dictionary"]
 
     def __init__(self, dictionary, transform_algorithm='omp',
                  transform_n_nonzero_coefs=None, transform_alpha=None,
diff --git a/sklearn/decomposition/tests/test_dict_learning.py b/sklearn/decomposition/tests/test_dict_learning.py
index 9df3528d33443..5bf9836aa6a9e 100644
--- a/sklearn/decomposition/tests/test_dict_learning.py
+++ b/sklearn/decomposition/tests/test_dict_learning.py
@@ -1,4 +1,5 @@
 import numpy as np
+import itertools
 
 from sklearn.exceptions import ConvergenceWarning
 
@@ -25,6 +26,18 @@
 X = rng_global.randn(n_samples, n_features)
 
 
+def test_sparse_encode_shapes_omp():
+    rng = np.random.RandomState(0)
+    algorithms = ['omp', 'lasso_lars', 'lasso_cd', 'lars', 'threshold']
+    for n_components, n_samples in itertools.product([1, 5], [1, 9]):
+        X_ = rng.randn(n_samples, n_features)
+        dictionary = rng.randn(n_components, n_features)
+        for algorithm, n_jobs in itertools.product(algorithms, [1, 3]):
+            code = sparse_encode(X_, dictionary, algorithm=algorithm,
+                                 n_jobs=n_jobs)
+            assert_equal(code.shape, (n_samples, n_components))
+
+
 def test_dict_learning_shapes():
     n_components = 5
     dico = DictionaryLearning(n_components, random_state=0).fit(X)

From bb7f085eeed0550bbd073fe44c2f6f62a4b553b6 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Tue, 6 Jun 2017 11:09:13 +0200
Subject: [PATCH 18/20] minor fixes in whats_new.rst

---
 doc/whats_new.rst | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index b3b27f9beac03..c0e874231ca2a 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -213,7 +213,7 @@ Bug fixes
 
    - Fixed a bug where :class:`sklearn.linear_model.LassoLars` does not give
      the same result as the LassoLars implementation available
-     in R (lars library). :issue:`7849` by `Jair Montoya Martinez`_
+     in R (lars library). :issue:`7849` by :user:`Jair Montoya Martinez <jmontoyam>`.
 
    - Some ``fetch_`` functions in `sklearn.datasets` were ignoring the
      ``download_if_missing`` keyword.  This was fixed in :issue:`7944` by
@@ -226,8 +226,7 @@ Bug fixes
 
    - Fix a bug regarding fitting :class:`sklearn.cluster.KMeans` with a sparse
      array X and initial centroids, where X's means were unnecessarily being
-     subtracted from the centroids. :issue:`7872` by `Josh Karnofsky
-     <https://github.com/jkarno>`_.
+     subtracted from the centroids. :issue:`7872` by :user:`Josh Karnofsky <jkarno>`.
 
    - Fix estimators to accept a ``sample_weight`` parameter of type
      ``pandas.Series`` in their ``fit`` function. :issue:`7825` by
@@ -256,7 +255,7 @@ Bug fixes
      :issue:`8086` by `Andreas Müller`_.
 
    - Fix output shape and bugs with n_jobs > 1 in  
-     :class:`sklearn.decomposition.SparseEncoder` transform and :func:`sklarn.decomposition.sparse_encode`
+     :class:`sklearn.decomposition.SparseCoder` transform and :func:`sklarn.decomposition.sparse_encode`
      for one-dimensional data and one component.
      This also impacts the output shape of :class:`sklearn.decomposition.DictionaryLearning`.
      :issue:`8086` by `Andreas Müller`_.
@@ -360,7 +359,7 @@ API changes summary
      ``fit``, and no longer during the call to ``transform```, by `Andreas
      Müller`_.
 
-   - :class:`features_selection.SelectFromModel` now has a ``partial_fit``
+   - :class:`feature_selection.SelectFromModel` now has a ``partial_fit``
      method only if the underlying estimator does. By `Andreas Müller`_.
 
    - :class:`multiclass.OneVsRestClassifier` now has a ``partial_fit`` method

From c18d646b4bf8b27e3fbf47b7d308476ba9ce55e6 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Tue, 6 Jun 2017 11:09:26 +0200
Subject: [PATCH 19/20] slightly more consistency with tuples for shapes

---
 sklearn/covariance/outlier_detection.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/covariance/outlier_detection.py b/sklearn/covariance/outlier_detection.py
index 7cfa84d880422..9fe219ba5d0b6 100644
--- a/sklearn/covariance/outlier_detection.py
+++ b/sklearn/covariance/outlier_detection.py
@@ -113,10 +113,10 @@ def score(self, X, y, sample_weight=None):
         X : array-like, shape = (n_samples, n_features)
             Test samples.
 
-        y : array-like, shape = (n_samples) or (n_samples, n_outputs)
+        y : array-like, shape = (n_samples,) or (n_samples, n_outputs)
             True labels for X.
 
-        sample_weight : array-like, shape = [n_samples], optional
+        sample_weight : array-like, shape = (n_samples,), optional
             Sample weights.
 
         Returns

From 8a3ea13a47eef41af1e306ed80668c5e3c11159e Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Tue, 6 Jun 2017 13:20:14 +0200
Subject: [PATCH 20/20] not longer typo

---
 doc/whats_new.rst                     | 2 +-
 sklearn/cluster/tests/test_k_means.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index c0e874231ca2a..bb52411e2fba4 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -352,7 +352,7 @@ API changes summary
      :issue:`2879` by :user:`Stephen Hoover <stephen-hoover>`.
 
 
-   - Gradient boosting base models are not longer estimators. By `Andreas Müller`_.
+   - Gradient boosting base models are no longer estimators. By `Andreas Müller`_.
 
    - :class:`feature_selection.SelectFromModel` now validates the ``threshold``
      parameter and sets the ``threshold_`` attribute during the call to
diff --git a/sklearn/cluster/tests/test_k_means.py b/sklearn/cluster/tests/test_k_means.py
index 31307e55801a5..38fcff94d7505 100644
--- a/sklearn/cluster/tests/test_k_means.py
+++ b/sklearn/cluster/tests/test_k_means.py
@@ -404,7 +404,7 @@ def test_minibatch_sensible_reassign_partial_fit():
 def test_minibatch_reassign():
     # Give a perfect initialization, but a large reassignment_ratio,
     # as a result all the centers should be reassigned and the model
-    # should not longer be good
+    # should no longer be good
     for this_X in (X, X_csr):
         mb_k_means = MiniBatchKMeans(n_clusters=n_clusters, batch_size=100,
                                      random_state=42)