From 773ae4cda63cc3c8b47bab0ffb5e97734f766a76 Mon Sep 17 00:00:00 2001
From: twosigmajab <40300730+twosigmajab@users.noreply.github.com>
Date: Fri, 15 Jun 2018 15:58:58 -0400
Subject: [PATCH 01/24] DOC add missing requirements for building docs (#11292)

Fixes #11286.
---
 doc/developers/contributing.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 0d6174f3b2d48..5e614911dce11 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -434,10 +434,9 @@ HTML output by building the documentation website.
 Building the documentation
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Building the documentation requires the ``sphinx``, ``sphinx-gallery``,
-``numpydoc``, ``matplotlib``, and ``Pillow`` packages::
+Building the documentation requires installing some additional packages::
 
-    pip install sphinx sphinx-gallery numpydoc matplotlib Pillow
+    pip install sphinx sphinx-gallery numpydoc matplotlib Pillow pandas scikit-image
 
 To build the documentation, you need to be in the ``doc`` folder::
 

From 47891de7b50033b622bd2d2803fe30e16eb5f879 Mon Sep 17 00:00:00 2001
From: dlovell <dlovell@twosigma.com>
Date: Fri, 15 Jun 2018 17:26:15 -0400
Subject: [PATCH 02/24] DOC: replace TODO with link to the glossary (#11279)

Fixes #9294
---
 doc/developers/performance.rst | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/doc/developers/performance.rst b/doc/developers/performance.rst
index d3d6204ec328f..89ee4af1325ff 100644
--- a/doc/developers/performance.rst
+++ b/doc/developers/performance.rst
@@ -388,8 +388,7 @@ Checkout the official joblib documentation:
 
 .. _warm-restarts:
 
-A sample algorithmic trick: warm restarts for cross validation
+A sample algorithmic trick: warm restarts
 ==============================================================
 
-TODO: demonstrate the warm restart tricks for cross validation of linear
-regression with Coordinate Descent.
+See the glossary entry for `warm_start <http://scikit-learn.org/dev/glossary.html#term-warm-start>`_

From 761355958319021e939ab954f9df64b98f392a4a Mon Sep 17 00:00:00 2001
From: Andrew Peng <apeng@berkeley.edu>
Date: Fri, 15 Jun 2018 17:50:01 -0400
Subject: [PATCH 03/24] FIX #11215 : Changing return in docstring to yields for
 generator functions (#11276)

---
 sklearn/model_selection/_split.py | 36 +++++++++++++++----------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 866cb4cc53aa8..399f8df3a0ee2 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -76,8 +76,8 @@ def split(self, X, y=None, groups=None):
             Group labels for the samples used while splitting the dataset into
             train/test set.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 
@@ -301,8 +301,8 @@ def split(self, X, y=None, groups=None):
             Group labels for the samples used while splitting the dataset into
             train/test set.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 
@@ -647,8 +647,8 @@ def split(self, X, y, groups=None):
         groups : object
             Always ignored, exists for compatibility.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 
@@ -734,8 +734,8 @@ def split(self, X, y=None, groups=None):
         groups : array-like, with shape (n_samples,), optional
             Always ignored, exists for compatibility.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 
@@ -1006,8 +1006,8 @@ def split(self, X, y=None, groups=None):
             Group labels for the samples used while splitting the dataset into
             train/test set.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 
@@ -1182,8 +1182,8 @@ def split(self, X, y=None, groups=None):
             Group labels for the samples used while splitting the dataset into
             train/test set.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 
@@ -1603,8 +1603,8 @@ def split(self, X, y, groups=None):
         groups : object
             Always ignored, exists for compatibility.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 
@@ -1763,8 +1763,8 @@ def split(self, X=None, y=None, groups=None):
         groups : object
             Always ignored, exists for compatibility.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 
@@ -1847,8 +1847,8 @@ def split(self, X=None, y=None, groups=None):
         groups : object
             Always ignored, exists for compatibility.
 
-        Returns
-        -------
+        Yields
+        ------
         train : ndarray
             The training set indices for that split.
 

From 939542408dc3fed00670170331c0ff91ab62eba3 Mon Sep 17 00:00:00 2001
From: twosigmajab <40300730+twosigmajab@users.noreply.github.com>
Date: Fri, 15 Jun 2018 23:53:30 -0400
Subject: [PATCH 04/24] DOC replace OpenHub/ohloh badge with star button
 (#11288)

---
 doc/developers/contributing.rst | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst
index 5e614911dce11..dd3b95e15789d 100644
--- a/doc/developers/contributing.rst
+++ b/doc/developers/contributing.rst
@@ -53,11 +53,14 @@ project maintainers.
 Another way to contribute is to report issues you're facing, and give a "thumbs
 up" on issues that others reported and that are relevant to you.  It also helps
 us if you spread the word: reference the project from your blog and articles,
-link to it from your website, or simply say "I use it":
+link to it from your website, or simply star to say "I use it":
 
 .. raw:: html
 
-   <script type="text/javascript" src="http://www.ohloh.net/p/480792/widgets/project_users.js?style=rainbow"></script>
+   <a class="github-button" href="https://github.com/scikit-learn/scikit-learn"
+   data-icon="octicon-star" data-size="large" data-show-count="true" aria-label="Star
+   scikit-learn/scikit-learn on GitHub">Star</a>
+   <script async defer src="https://buttons.github.io/buttons.js"></script>
 
 Submitting a bug report or a feature request
 ============================================

From e751d658c4d1539f9370d7d00c4b4ee9924a7fd0 Mon Sep 17 00:00:00 2001
From: Arthur Ozga <aozgaa@users.noreply.github.com>
Date: Sat, 16 Jun 2018 23:26:56 -0400
Subject: [PATCH 05/24] MAINT skip dataset downloading doctest (#11284)

---
 doc/conftest.py                    |  8 +++++++-
 doc/datasets/twenty_newsgroups.rst | 24 ++++++++++++++----------
 2 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/doc/conftest.py b/doc/conftest.py
index 158fff5830acf..0c9c79a2dd7ed 100644
--- a/doc/conftest.py
+++ b/doc/conftest.py
@@ -6,6 +6,8 @@
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import check_skip_network
 from sklearn.datasets import get_data_home
+from sklearn.datasets.base import _pkl_filepath
+from sklearn.datasets.twenty_newsgroups import CACHE_NAME
 from sklearn.utils.testing import install_mldata_mock
 from sklearn.utils.testing import uninstall_mldata_mock
 
@@ -47,12 +49,16 @@ def setup_rcv1():
 
 def setup_twenty_newsgroups():
     data_home = get_data_home()
-    if not exists(join(data_home, '20news_home')):
+    cache_path = _pkl_filepath(get_data_home(), CACHE_NAME)
+    if not exists(cache_path):
         raise SkipTest("Skipping dataset loading doctests")
 
 
 def setup_working_with_text_data():
     check_skip_network()
+    cache_path = _pkl_filepath(get_data_home(), CACHE_NAME)
+    if not exists(cache_path):
+        raise SkipTest("Skipping dataset loading doctests")
 
 
 def setup_compose():
diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst
index 23c11b2998ddd..5d6906c3cfa14 100644
--- a/doc/datasets/twenty_newsgroups.rst
+++ b/doc/datasets/twenty_newsgroups.rst
@@ -62,7 +62,7 @@ attribute is the integer index of the category::
   >>> newsgroups_train.target.shape
   (11314,)
   >>> newsgroups_train.target[:10]
-  array([12,  6,  9,  8,  6,  7,  9,  2, 13, 19])
+  array([ 7,  4,  4,  1, 14, 16, 13,  3,  2,  4])
 
 It is possible to load only a sub-selection of the categories by passing the
 list of the categories to load to the
@@ -78,7 +78,7 @@ list of the categories to load to the
   >>> newsgroups_train.target.shape
   (1073,)
   >>> newsgroups_train.target[:10]
-  array([1, 1, 1, 0, 1, 0, 0, 1, 1, 1])
+  array([0, 1, 1, 1, 0, 1, 1, 0, 0, 0])
 
 Converting text to vectors
 --------------------------
@@ -105,7 +105,7 @@ components by sample in a more than 30000-dimensional space
 (less than .5% non-zero features)::
 
   >>> vectors.nnz / float(vectors.shape[0])
-  159.01327433628319
+  159.01327...
 
 :func:`sklearn.datasets.fetch_20newsgroups_vectorized` is a function which returns
 ready-to-use tfidf features instead of file names.
@@ -131,9 +131,11 @@ which is fast to train and achieves a decent F-score::
   >>> vectors_test = vectorizer.transform(newsgroups_test.data)
   >>> clf = MultinomialNB(alpha=.01)
   >>> clf.fit(vectors, newsgroups_train.target)
+  MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True)
+
   >>> pred = clf.predict(vectors_test)
   >>> metrics.f1_score(newsgroups_test.target, pred, average='macro')
-  0.88213592402729568
+  0.88213...
 
 (The example :ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py` shuffles
 the training and test data, instead of segmenting by time, and in that case
@@ -150,10 +152,10 @@ Let's take a look at what the most informative features are:
   ...         print("%s: %s" % (category, " ".join(feature_names[top10])))
   ...
   >>> show_top10(clf, vectorizer, newsgroups_train.target_names)
-  alt.atheism: sgi livesey atheists writes people caltech com god keith edu
-  comp.graphics: organization thanks files subject com image lines university edu graphics
-  sci.space: toronto moon gov com alaska access henry nasa edu space
-  talk.religion.misc: article writes kent people christian jesus sandvik edu com god
+  alt.atheism: edu it and in you that is of to the
+  comp.graphics: edu in graphics it is for and of to the
+  sci.space: edu it that is in and space to of the
+  talk.religion.misc: not it you in is that and to of the
 
 You can now see many things that these features have overfit to:
 
@@ -183,7 +185,7 @@ blocks, and quotation blocks respectively.
   >>> vectors_test = vectorizer.transform(newsgroups_test.data)
   >>> pred = clf.predict(vectors_test)
   >>> metrics.f1_score(pred, newsgroups_test.target, average='macro')
-  0.77310350681274775
+  0.77310...
 
 This classifier lost over a lot of its F-score, just because we removed
 metadata that has little to do with topic classification.
@@ -195,10 +197,12 @@ It loses even more if we also strip this metadata from the training data:
   >>> vectors = vectorizer.fit_transform(newsgroups_train.data)
   >>> clf = MultinomialNB(alpha=.01)
   >>> clf.fit(vectors, newsgroups_train.target)
+  MultinomialNB(alpha=0.01, class_prior=None, fit_prior=True)
+
   >>> vectors_test = vectorizer.transform(newsgroups_test.data)
   >>> pred = clf.predict(vectors_test)
   >>> metrics.f1_score(newsgroups_test.target, pred, average='macro')
-  0.76995175184521725
+  0.76995...
 
 Some other classifiers cope better with this harder version of the task. Try
 running :ref:`sphx_glr_auto_examples_model_selection_grid_search_text_feature_extraction.py` with and without

From 13b33ed50432dc33f95026568f58e6fd9a81749f Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Sun, 17 Jun 2018 12:11:08 +0200
Subject: [PATCH 06/24] DOC: add references for CD in LASSO and duality gap
 criterion (#11302)

---
 doc/modules/linear_model.rst | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index 06259cd867d47..e2cc0ba2601ab 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -205,6 +205,20 @@ computes the coefficients along the full path of possible values.
       thus be used to perform feature selection, as detailed in
       :ref:`l1_feature_selection`.
 
+The following two references explain the iterations
+used in the coordinate descent solver of scikit-learn, as well as
+the duality gap computation used for convergence control.
+
+.. topic:: References
+
+    * "Regularization Path For Generalized linear Models by Coordinate Descent",
+      Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper
+      <https://www.jstatsoft.org/article/view/v033i01/v33i01.pdf>`_).
+    * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares,"
+      S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky,
+      in IEEE Journal of Selected Topics in Signal Processing, 2007
+      (`Paper <https://web.stanford.edu/~boyd/papers/pdf/l1_ls.pdf>`_)
+
 
 Setting regularization parameter
 --------------------------------
@@ -358,7 +372,19 @@ The class :class:`ElasticNetCV` can be used to set the parameters
   * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_and_elasticnet.py`
   * :ref:`sphx_glr_auto_examples_linear_model_plot_lasso_coordinate_descent_path.py`
 
+The following two references explain the iterations
+used in the coordinate descent solver of scikit-learn, as well as
+the duality gap computation used for convergence control.
+
+.. topic:: References
 
+    * "Regularization Path For Generalized linear Models by Coordinate Descent",
+      Friedman, Hastie & Tibshirani, J Stat Softw, 2010 (`Paper
+      <https://www.jstatsoft.org/article/view/v033i01/v33i01.pdf>`_).
+    * "An Interior-Point Method for Large-Scale L1-Regularized Least Squares,"
+      S. J. Kim, K. Koh, M. Lustig, S. Boyd and D. Gorinevsky,
+      in IEEE Journal of Selected Topics in Signal Processing, 2007
+      (`Paper <https://web.stanford.edu/~boyd/papers/pdf/l1_ls.pdf>`_)
 
 .. _multi_task_elastic_net:
 

From cb5ec0a8423d9f476f66500dab1617c3c7d11e24 Mon Sep 17 00:00:00 2001
From: Scott Gigante <scottgigante@users.noreply.github.com>
Date: Sun, 17 Jun 2018 10:25:30 -0400
Subject: [PATCH 07/24] Add sparse efficiency warning to randomized_svd for
 dok_matrix / lil_matrix (#11264)

---
 sklearn/utils/extmath.py            | 21 +++++++++++++--------
 sklearn/utils/tests/test_extmath.py | 15 +++++++++++++++
 2 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/sklearn/utils/extmath.py b/sklearn/utils/extmath.py
index a619065f26878..2004b24f05c41 100644
--- a/sklearn/utils/extmath.py
+++ b/sklearn/utils/extmath.py
@@ -15,8 +15,7 @@
 import warnings
 
 import numpy as np
-from scipy import linalg
-from scipy.sparse import issparse, csr_matrix
+from scipy import linalg, sparse
 
 from . import check_random_state, deprecated
 from .fixes import np_version
@@ -60,9 +59,9 @@ def row_norms(X, squared=False):
 
     Performs no input validation.
     """
-    if issparse(X):
-        if not isinstance(X, csr_matrix):
-            X = csr_matrix(X)
+    if sparse.issparse(X):
+        if not isinstance(X, sparse.csr_matrix):
+            X = sparse.csr_matrix(X)
         norms = csr_row_norms(X)
     else:
         norms = np.einsum('ij,ij->i', X, X)
@@ -131,7 +130,7 @@ def safe_sparse_dot(a, b, dense_output=False):
     dot_product : array or sparse matrix
         sparse if ``a`` or ``b`` is sparse and ``dense_output=False``.
     """
-    if issparse(a) or issparse(b):
+    if sparse.issparse(a) or sparse.issparse(b):
         ret = a * b
         if dense_output and hasattr(ret, "toarray"):
             ret = ret.toarray()
@@ -307,6 +306,12 @@ def randomized_svd(M, n_components, n_oversamples=10, n_iter='auto',
       analysis
       A. Szlam et al. 2014
     """
+    if isinstance(M, (sparse.lil_matrix, sparse.dok_matrix)):
+        warnings.warn("Calculating SVD of a {} is expensive. "
+                      "csr_matrix is more efficient.".format(
+                          type(M).__name__),
+                      sparse.SparseEfficiencyWarning)
+
     random_state = check_random_state(random_state)
     n_random = n_components + n_oversamples
     n_samples, n_features = M.shape
@@ -620,7 +625,7 @@ def safe_min(X):
     Adapated from http://stackoverflow.com/q/13426580
 
     """
-    if issparse(X):
+    if sparse.issparse(X):
         if len(X.data) == 0:
             return 0
         m = X.data.min()
@@ -633,7 +638,7 @@ def make_nonnegative(X, min_value=0):
     """Ensure `X.min()` >= `min_value`."""
     min_ = safe_min(X)
     if min_ < min_value:
-        if issparse(X):
+        if sparse.issparse(X):
             raise ValueError("Cannot make the data matrix"
                              " nonnegative because it is sparse."
                              " Adding a value to every entry would"
diff --git a/sklearn/utils/tests/test_extmath.py b/sklearn/utils/tests/test_extmath.py
index d89e2a1aa1223..522435d32deda 100644
--- a/sklearn/utils/tests/test_extmath.py
+++ b/sklearn/utils/tests/test_extmath.py
@@ -365,6 +365,21 @@ def test_randomized_svd_power_iteration_normalizer():
             assert_greater(15, np.abs(error_2 - error))
 
 
+def test_randomized_svd_sparse_warnings():
+    # randomized_svd throws a warning for lil and dok matrix
+    rng = np.random.RandomState(42)
+    X = make_low_rank_matrix(50, 20, effective_rank=10, random_state=rng)
+    n_components = 5
+    for cls in (sparse.lil_matrix, sparse.dok_matrix):
+        X = cls(X)
+        assert_warns_message(
+            sparse.SparseEfficiencyWarning,
+            "Calculating SVD of a {} is expensive. "
+            "csr_matrix is more efficient.".format(cls.__name__),
+            randomized_svd, X, n_components, n_iter=1,
+            power_iteration_normalizer='none')
+
+
 def test_svd_flip():
     # Check that svd_flip works in both situations, and reconstructs input.
     rs = np.random.RandomState(1999)

From 0badbeac615679a3ca2b94de6a2504c056521669 Mon Sep 17 00:00:00 2001
From: Thomas Fan <thomasjpfan@gmail.com>
Date: Sun, 17 Jun 2018 10:54:37 -0400
Subject: [PATCH 08/24] FIX Uses self.scoring for score function (#11192)

---
 doc/whats_new/v0.20.rst                     |  5 +++
 sklearn/linear_model/logistic.py            | 37 ++++++++++++++++-
 sklearn/linear_model/tests/test_logistic.py | 44 +++++++++++++++++++++
 3 files changed, 85 insertions(+), 1 deletion(-)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 87569d8649d86..32b4ef3098263 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -332,6 +332,11 @@ Classifiers and regressors
   returning incorrect probabilities in the case of binary outcomes.
   :issue:`9939` by :user:`Roger Westover <rwolst>`.
 
+- Fixed a bug in :class:`linear_model.LogisticRegressionCV` where the
+  ``score`` method always computes accuracy, not the metric given by
+  the ``scoring`` parameter.
+  :issue:`10998` by :user:`Thomas Fan <thomasjpfan>`.
+
 - Fixed a bug in :class:`linear_model.OrthogonalMatchingPursuit` that was
   broken when setting ``normalize=False``.
   :issue:`10071` by `Alexandre Gramfort`_.
diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 3e8a104d57d75..30e8560940500 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -29,7 +29,8 @@
 from ..utils.fixes import logsumexp
 from ..utils.optimize import newton_cg
 from ..utils.validation import check_X_y
-from ..exceptions import NotFittedError, ConvergenceWarning
+from..exceptions import (NotFittedError, ConvergenceWarning,
+                         ChangedBehaviorWarning)
 from ..utils.multiclass import check_classification_targets
 from ..externals.joblib import Parallel, delayed
 from ..model_selection import check_cv
@@ -1789,3 +1790,37 @@ def fit(self, X, y, sample_weight=None):
 
         self.C_ = np.asarray(self.C_)
         return self
+
+    def score(self, X, y, sample_weight=None):
+        """Returns the score using the `scoring` option on the given
+        test data and labels.
+
+        Parameters
+        ----------
+        X : array-like, shape = (n_samples, n_features)
+            Test samples.
+
+        y : array-like, shape = (n_samples,)
+            True labels for X.
+
+        sample_weight : array-like, shape = [n_samples], optional
+            Sample weights.
+
+        Returns
+        -------
+        score : float
+            Score of self.predict(X) wrt. y.
+
+        """
+
+        if self.scoring is not None:
+            warnings.warn("The long-standing behavior to use the "
+                          "accuracy score has changed. The scoring "
+                          "parameter is now used. "
+                          "This warning will disappear in version 0.22.",
+                          ChangedBehaviorWarning)
+        scoring = self.scoring or 'accuracy'
+        if isinstance(scoring, six.string_types):
+            scoring = get_scorer(scoring)
+
+        return scoring(self, X, y, sample_weight=sample_weight)
diff --git a/sklearn/linear_model/tests/test_logistic.py b/sklearn/linear_model/tests/test_logistic.py
index e363fed2abb9d..9939644f4d4ea 100644
--- a/sklearn/linear_model/tests/test_logistic.py
+++ b/sklearn/linear_model/tests/test_logistic.py
@@ -22,6 +22,7 @@
 from sklearn.utils.testing import assert_warns_message
 
 from sklearn.exceptions import ConvergenceWarning
+from sklearn.exceptions import ChangedBehaviorWarning
 from sklearn.linear_model.logistic import (
     LogisticRegression,
     logistic_regression_path, LogisticRegressionCV,
@@ -92,6 +93,49 @@ def test_error():
         assert_raise_message(ValueError, msg, LR(max_iter="test").fit, X, Y1)
 
 
+def test_logistic_cv_mock_scorer():
+
+    class MockScorer(object):
+        def __init__(self):
+            self.calls = 0
+            self.scores = [0.1, 0.4, 0.8, 0.5]
+
+        def __call__(self, model, X, y, sample_weight=None):
+            score = self.scores[self.calls % len(self.scores)]
+            self.calls += 1
+            return score
+
+    mock_scorer = MockScorer()
+    Cs = [1, 2, 3, 4]
+    cv = 2
+
+    lr = LogisticRegressionCV(Cs=Cs, scoring=mock_scorer, cv=cv)
+    lr.fit(X, Y1)
+
+    # Cs[2] has the highest score (0.8) from MockScorer
+    assert lr.C_[0] == Cs[2]
+
+    # scorer called 8 times (cv*len(Cs))
+    assert mock_scorer.calls == cv * len(Cs)
+
+    # reset mock_scorer
+    mock_scorer.calls = 0
+    with pytest.warns(ChangedBehaviorWarning):
+        custom_score = lr.score(X, lr.predict(X))
+
+    assert custom_score == mock_scorer.scores[0]
+    assert mock_scorer.calls == 1
+
+
+def test_logistic_cv_score_does_not_warn_by_default():
+    lr = LogisticRegressionCV(cv=2)
+    lr.fit(X, Y1)
+
+    with pytest.warns(None) as record:
+        lr.score(X, lr.predict(X))
+    assert len(record) == 0
+
+
 def test_lr_liblinear_warning():
     n_samples, n_features = iris.data.shape
     target = iris.target_names[iris.target]

From c80665f47c71104efe84e09124b30a68fef4874a Mon Sep 17 00:00:00 2001
From: Alexandre Gramfort <alexandre.gramfort@m4x.org>
Date: Mon, 18 Jun 2018 08:07:33 +0200
Subject: [PATCH 09/24] BLD fix sphx gallery errors (#11307)

---
 examples/manifold/plot_compare_methods.py  | 2 +-
 examples/manifold/plot_t_sne_perplexity.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/manifold/plot_compare_methods.py b/examples/manifold/plot_compare_methods.py
index 34e161dfb0461..3af18269aeaae 100644
--- a/examples/manifold/plot_compare_methods.py
+++ b/examples/manifold/plot_compare_methods.py
@@ -1,6 +1,6 @@
 """
 =========================================
- Comparison of Manifold Learning methods
+Comparison of Manifold Learning methods
 =========================================
 
 An illustration of dimensionality reduction on the S-curve dataset
diff --git a/examples/manifold/plot_t_sne_perplexity.py b/examples/manifold/plot_t_sne_perplexity.py
index c1cbe0001d0ee..0fbade5746afa 100644
--- a/examples/manifold/plot_t_sne_perplexity.py
+++ b/examples/manifold/plot_t_sne_perplexity.py
@@ -1,6 +1,6 @@
 """
 =============================================================================
- t-SNE: The effect of various perplexity values on the shape
+t-SNE: The effect of various perplexity values on the shape
 =============================================================================
 
 An illustration of t-SNE on the two concentric circles and the S-curve

From 5a063ed57e71e9acbe983b07725d5af61c8d7e54 Mon Sep 17 00:00:00 2001
From: Yufeng <yufengg@users.noreply.github.com>
Date: Mon, 18 Jun 2018 06:34:35 -0700
Subject: [PATCH 10/24] proposal to use .joblib file extension (#11230)

Recommend to use of `filename.joblib` instead of `filename.pkl` for models persisted via the joblib library to reduce confusion when it comes to time load a model, as it will be more clear whether a file was saved using the `pickle` or `joblib` library.
---
 doc/modules/model_persistence.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst
index 15ecf3c2d88f6..f5173e5d9f3fe 100644
--- a/doc/modules/model_persistence.rst
+++ b/doc/modules/model_persistence.rst
@@ -42,12 +42,12 @@ is often the case for fitted scikit-learn estimators, but can only pickle to the
 disk and not to a string::
 
   >>> from sklearn.externals import joblib
-  >>> joblib.dump(clf, 'filename.pkl') # doctest: +SKIP
+  >>> joblib.dump(clf, 'filename.joblib') # doctest: +SKIP
 
 Later you can load back the pickled model (possibly in another Python process)
 with::
 
-  >>> clf = joblib.load('filename.pkl') # doctest:+SKIP
+  >>> clf = joblib.load('filename.joblib') # doctest:+SKIP
 
 .. note::
 

From 2aee027f784a394eb57d8b129bc43e98304bf3c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Mon, 18 Jun 2018 15:37:35 +0200
Subject: [PATCH 11/24] DOC: use .joblib file extension rather than .pkl

---
 doc/tutorial/basic/tutorial.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index ece691f7de973..781495df9931b 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -239,12 +239,12 @@ which is more efficient on big data but it can only pickle to the disk
 and not to a string::
 
   >>> from sklearn.externals import joblib
-  >>> joblib.dump(clf, 'filename.pkl') # doctest: +SKIP
+  >>> joblib.dump(clf, 'filename.joblib') # doctest: +SKIP
 
 Later, you can reload the pickled model (possibly in another Python process)
 with::
 
-  >>> clf = joblib.load('filename.pkl') # doctest:+SKIP
+  >>> clf = joblib.load('filename.joblib') # doctest:+SKIP
 
 .. note::
 

From 877ab46084044d40e4168edc5e43864995e8c051 Mon Sep 17 00:00:00 2001
From: twosigmajab <jab@twosigma.com>
Date: Mon, 18 Jun 2018 18:42:39 -0400
Subject: [PATCH 12/24] DOC Add libraries.io and changelog links (#11298)

---
 README.rst        | 6 ++++++
 doc/whats_new.rst | 3 +++
 2 files changed, 9 insertions(+)

diff --git a/README.rst b/README.rst
index cf011277f539a..4df228acd4c49 100644
--- a/README.rst
+++ b/README.rst
@@ -78,6 +78,12 @@ or ``conda``::
 The documentation includes more detailed `installation instructions <http://scikit-learn.org/stable/install.html>`_.
 
 
+Changelog
+---------
+
+See the `changelog <http://scikit-learn.org/dev/whats_new.html>`__
+for a history of notable changes to scikit-learn.
+
 Development
 -----------
 
diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index 63b1b309b8449..0e7345836f482 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -8,6 +8,9 @@ Release History
 Release notes for current and recent releases are detailed on this page, with
 :ref:`previous releases <previous_releases_whats_new>` linked below.
 
+**Tip:** `Subscribe to scikit-learn releases <https://libraries.io/pypi/scikit-learn>`__
+on libraries.io to be notified when new versions are released.
+
 .. include:: whats_new/v0.20.rst
 .. include:: whats_new/v0.19.rst
 

From c9e48bfae22b9a6a9dd1871efbbc6836cd38e787 Mon Sep 17 00:00:00 2001
From: jeremiedbb <34657725+jeremiedbb@users.noreply.github.com>
Date: Tue, 19 Jun 2018 02:40:33 +0200
Subject: [PATCH 13/24] DOC reorganize datasets documentation page (#11180)

---
 doc/datasets/covtype.rst                      |   3 +-
 doc/datasets/index.rst                        | 266 ++++++++++++------
 doc/datasets/kddcup99.rst                     |   3 +-
 doc/datasets/labeled_faces.rst                |   6 +-
 doc/datasets/mldata.rst                       |  82 ------
 doc/datasets/olivetti_faces.rst               |   2 +-
 doc/datasets/rcv1.rst                         |   2 +-
 doc/datasets/twenty_newsgroups.rst            |   9 +-
 .../datasets/descr/boston_house_prices.rst    |  16 +-
 sklearn/datasets/descr/breast_cancer.rst      |  17 +-
 sklearn/datasets/descr/diabetes.rst           |  33 ++-
 sklearn/datasets/descr/digits.rst             |  17 +-
 sklearn/datasets/descr/iris.rst               |  18 +-
 sklearn/datasets/descr/linnerud.rst           |  15 +-
 sklearn/datasets/descr/wine_data.rst          |  81 +++---
 15 files changed, 298 insertions(+), 272 deletions(-)
 delete mode 100644 doc/datasets/mldata.rst

diff --git a/doc/datasets/covtype.rst b/doc/datasets/covtype.rst
index c0ed4ea08af3d..5996ec74abf78 100644
--- a/doc/datasets/covtype.rst
+++ b/doc/datasets/covtype.rst
@@ -1,8 +1,7 @@
-
 .. _covtype:
 
 Forest covertypes
-=================
+-----------------
 
 The samples in this dataset correspond to 30×30m patches of forest in the US,
 collected for the task of predicting each patch's cover type,
diff --git a/doc/datasets/index.rst b/doc/datasets/index.rst
index 1d27bdfd7f628..aac8f372d247b 100644
--- a/doc/datasets/index.rst
+++ b/doc/datasets/index.rst
@@ -43,12 +43,15 @@ The datasets also contain a description in ``DESCR`` and some contain
 ``feature_names`` and ``target_names``.
 See the dataset descriptions below for details.
 
+.. _toy_datasets:
 
 Toy datasets
 ============
 
 scikit-learn comes with a few small standard datasets that do not
-require to download any file from some external website.
+require to download any file from some external website. 
+
+*desc*
 
 .. autosummary::
 
@@ -67,46 +70,81 @@ These datasets are useful to quickly illustrate the behavior of the
 various algorithms implemented in scikit-learn. They are however often too
 small to be representative of real world machine learning tasks.
 
-.. _sample_images:
+.. toctree::
+    :maxdepth: 2
+    :hidden:
 
-Sample images
-=============
+    boston_house_prices
+    iris
+    diabetes
+    digits
+    linnerud
+    wine_data
+    breast_cancer
 
-Scikit-learn also embed a couple of sample JPEG images published under Creative
-Commons license by their authors. Those image can be useful to test algorithms
-and pipeline on 2D data.
+.. include:: ../../sklearn/datasets/descr/boston_house_prices.rst
+
+.. include:: ../../sklearn/datasets/descr/iris.rst
+
+.. include:: ../../sklearn/datasets/descr/diabetes.rst
+
+.. include:: ../../sklearn/datasets/descr/digits.rst
+
+.. include:: ../../sklearn/datasets/descr/linnerud.rst
+
+.. include:: ../../sklearn/datasets/descr/wine_data.rst
+
+.. include:: ../../sklearn/datasets/descr/breast_cancer.rst
+
+.. _real_world_datasets:
+
+Real world datasets
+===================
+
+*Add desc*
 
 .. autosummary::
 
    :toctree: ../modules/generated/
    :template: function.rst
 
-   load_sample_images
-   load_sample_image
+   fetch_olivetti_faces
+   fetch_20newsgroups
+   fetch_20newsgroups_vectorized
+   fetch_lfw_people
+   fetch_lfw_pairs
+   fetch_covtype
+   fetch_rcv1
+   fetch_kddcup99
 
-.. image:: ../auto_examples/cluster/images/sphx_glr_plot_color_quantization_001.png
-   :target: ../auto_examples/cluster/plot_color_quantization.html
-   :scale: 30
-   :align: right
 
+.. toctree::
+    :maxdepth: 2
+    :hidden:
 
-.. warning::
+    olivetti_faces
+    twenty_newsgroups
+    labeled_faces
+    covtype
+    rcv1
+    kddcup99
 
-  The default coding of images is based on the ``uint8`` dtype to
-  spare memory.  Often machine learning algorithms work best if the
-  input is converted to a floating point representation first.  Also,
-  if you plan to use ``matplotlib.pyplpt.imshow`` don't forget to scale to the range
-  0 - 1 as done in the following example.
+.. include:: ./olivetti_faces.rst
 
-.. topic:: Examples:
+.. include:: ./twenty_newsgroups.rst
 
-    * :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`
+.. include:: ./labeled_faces.rst
+
+.. include:: ./covtype.rst
+
+.. include:: ./rcv1.rst
 
+.. include:: ./kddcup99.rst
 
-.. _sample_generators:
+.. _generated_datasets:
 
-Sample generators
-=================
+Generated datasets
+==================
 
 In addition, scikit-learn includes various random sample generators that
 can be used to build artificial datasets of controlled size and complexity.
@@ -219,10 +257,50 @@ Generators for decomposition
    make_sparse_spd_matrix
 
 
+.. _loading_other_datasets:
+
+Loading other datasets
+======================
+
+.. _sample_images:
+
+Sample images
+-------------
+
+Scikit-learn also embed a couple of sample JPEG images published under Creative
+Commons license by their authors. Those image can be useful to test algorithms
+and pipeline on 2D data.
+
+.. autosummary::
+
+   :toctree: ../modules/generated/
+   :template: function.rst
+
+   load_sample_images
+   load_sample_image
+
+.. image:: ../auto_examples/cluster/images/sphx_glr_plot_color_quantization_001.png
+   :target: ../auto_examples/cluster/plot_color_quantization.html
+   :scale: 30
+   :align: right
+
+
+.. warning::
+
+  The default coding of images is based on the ``uint8`` dtype to
+  spare memory.  Often machine learning algorithms work best if the
+  input is converted to a floating point representation first.  Also,
+  if you plan to use ``matplotlib.pyplpt.imshow`` don't forget to scale to the range
+  0 - 1 as done in the following example.
+
+.. topic:: Examples:
+
+    * :ref:`sphx_glr_auto_examples_cluster_plot_color_quantization.py`
+
 .. _libsvm_loader:
 
 Datasets in svmlight / libsvm format
-====================================
+------------------------------------
 
 scikit-learn includes utility functions for loading
 datasets in the svmlight / libsvm format. In this format, each line
@@ -256,10 +334,93 @@ features::
 
  _`Faster API-compatible implementation`: https://github.com/mblondel/svmlight-loader
 
+..
+    For doctests:
+
+    >>> import numpy as np
+    >>> import os
+    >>> import tempfile
+    >>> # Create a temporary folder for the data fetcher
+    >>> custom_data_home = tempfile.mkdtemp()
+    >>> os.makedirs(os.path.join(custom_data_home, 'mldata'))
+
+
+.. _mldata:
+
+Downloading datasets from the mldata.org repository
+---------------------------------------------------
+
+`mldata.org <http://mldata.org>`_ is a public repository for machine learning
+data, supported by the `PASCAL network <http://www.pascal-network.org>`_ .
+
+The ``sklearn.datasets`` package is able to directly download data
+sets from the repository using the function
+:func:`sklearn.datasets.fetch_mldata`.
+
+For example, to download the MNIST digit recognition database::
+
+  >>> from sklearn.datasets import fetch_mldata
+  >>> mnist = fetch_mldata('MNIST original', data_home=custom_data_home)
+
+The MNIST database contains a total of 70000 examples of handwritten digits
+of size 28x28 pixels, labeled from 0 to 9::
+
+  >>> mnist.data.shape
+  (70000, 784)
+  >>> mnist.target.shape
+  (70000,)
+  >>> np.unique(mnist.target)
+  array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
+
+After the first download, the dataset is cached locally in the path
+specified by the ``data_home`` keyword argument, which defaults to
+``~/scikit_learn_data/``::
+
+  >>> os.listdir(os.path.join(custom_data_home, 'mldata'))
+  ['mnist-original.mat']
+
+Data sets in `mldata.org <http://mldata.org>`_ do not adhere to a strict
+naming or formatting convention. :func:`sklearn.datasets.fetch_mldata` is
+able to make sense of the most common cases, but allows to tailor the
+defaults to individual datasets:
+
+* The data arrays in `mldata.org <http://mldata.org>`_ are most often
+  shaped as ``(n_features, n_samples)``. This is the opposite of the
+  ``scikit-learn`` convention, so :func:`sklearn.datasets.fetch_mldata`
+  transposes the matrix by default. The ``transpose_data`` keyword controls
+  this behavior::
+
+    >>> iris = fetch_mldata('iris', data_home=custom_data_home)
+    >>> iris.data.shape
+    (150, 4)
+    >>> iris = fetch_mldata('iris', transpose_data=False,
+    ...                     data_home=custom_data_home)
+    >>> iris.data.shape
+    (4, 150)
+
+* For datasets with multiple columns, :func:`sklearn.datasets.fetch_mldata`
+  tries to identify the target and data columns and rename them to ``target``
+  and ``data``. This is done by looking for arrays named ``label`` and
+  ``data`` in the dataset, and failing that by choosing the first array to be
+  ``target`` and the second to be ``data``. This behavior can be changed with
+  the ``target_name`` and ``data_name`` keywords, setting them to a specific
+  name or index number (the name and order of the columns in the datasets
+  can be found at its `mldata.org <http://mldata.org>`_ under the tab "Data"::
+
+    >>> iris2 = fetch_mldata('datasets-UCI iris', target_name=1, data_name=0,
+    ...                      data_home=custom_data_home)
+    >>> iris3 = fetch_mldata('datasets-UCI iris', target_name='class',
+    ...                      data_name='double0', data_home=custom_data_home)
+
+
+..
+    >>> import shutil
+    >>> shutil.rmtree(custom_data_home)
+
 .. _external_datasets:
 
 Loading from external datasets
-==============================
+------------------------------
 
 scikit-learn works on any numeric data stored as numpy arrays or scipy sparse
 matrices. Other types that are convertible to numeric arrays such as pandas
@@ -304,56 +465,3 @@ Note: if you manage your own numerical data it is recommended to use an
 optimized file format such as HDF5 to reduce data load times. Various libraries
 such as H5Py, PyTables and pandas provides a Python interface for reading and 
 writing data in that format.
-
-.. make sure everything is in a toc tree
-
-.. toctree::
-    :maxdepth: 2
-    :hidden:
-
-    olivetti_faces
-    twenty_newsgroups
-    mldata
-    labeled_faces
-    covtype
-    rcv1
-    kddcup99
-
-
-.. include:: olivetti_faces.rst
-
-.. include:: twenty_newsgroups.rst
-
-.. include:: mldata.rst
-
-.. include:: labeled_faces.rst
-
-.. include:: covtype.rst
-
-.. include:: rcv1.rst
-
-.. include:: kddcup99.rst
-
-.. _boston_house_prices:
-
-.. include:: ../../sklearn/datasets/descr/boston_house_prices.rst
-
-.. _breast_cancer:
-
-.. include:: ../../sklearn/datasets/descr/breast_cancer.rst
-
-.. _diabetes:
-
-.. include:: ../../sklearn/datasets/descr/diabetes.rst
-
-.. _digits:
-
-.. include:: ../../sklearn/datasets/descr/digits.rst
-
-.. _iris:
-
-.. include:: ../../sklearn/datasets/descr/iris.rst
-
-.. _linnerud:
-
-.. include:: ../../sklearn/datasets/descr/linnerud.rst
diff --git a/doc/datasets/kddcup99.rst b/doc/datasets/kddcup99.rst
index 407b2d8e2c0bf..603f3d0e64c6b 100644
--- a/doc/datasets/kddcup99.rst
+++ b/doc/datasets/kddcup99.rst
@@ -1,8 +1,7 @@
-
 .. _kddcup99:
 
 Kddcup 99 dataset
-=================
+-----------------
 
 The KDD Cup '99 dataset was created by processing the tcpdump portions
 of the 1998 DARPA Intrusion Detection System (IDS) Evaluation dataset,
diff --git a/doc/datasets/labeled_faces.rst b/doc/datasets/labeled_faces.rst
index 0e70aca8aa705..a7b592ae1a941 100644
--- a/doc/datasets/labeled_faces.rst
+++ b/doc/datasets/labeled_faces.rst
@@ -1,7 +1,7 @@
 .. _labeled_faces_in_the_wild:
 
 The Labeled Faces in the Wild face recognition dataset
-======================================================
+------------------------------------------------------
 
 This dataset is a collection of JPEG pictures of famous people collected
 over the internet, all details are available on the official website:
@@ -25,7 +25,7 @@ face detector from various online websites.
 
 
 Usage
------
+~~~~~
 
 ``scikit-learn`` provides two loaders that will automatically download,
 cache, parse the metadata files, decode the jpeg and convert the
@@ -113,6 +113,6 @@ an evaluation ``10_folds`` set meant to compute performance metrics using a
 
 
 Examples
---------
+~~~~~~~~
 
 :ref:`sphx_glr_auto_examples_applications_plot_face_recognition.py`
diff --git a/doc/datasets/mldata.rst b/doc/datasets/mldata.rst
deleted file mode 100644
index b098abbdcef92..0000000000000
--- a/doc/datasets/mldata.rst
+++ /dev/null
@@ -1,82 +0,0 @@
-..
-    For doctests:
-
-    >>> import numpy as np
-    >>> import os
-    >>> import tempfile
-    >>> # Create a temporary folder for the data fetcher
-    >>> custom_data_home = tempfile.mkdtemp()
-    >>> os.makedirs(os.path.join(custom_data_home, 'mldata'))
-
-
-.. _mldata:
-
-Downloading datasets from the mldata.org repository
-===================================================
-
-`mldata.org <http://mldata.org>`_ is a public repository for machine learning
-data, supported by the `PASCAL network <http://www.pascal-network.org>`_ .
-
-The ``sklearn.datasets`` package is able to directly download data
-sets from the repository using the function
-:func:`sklearn.datasets.fetch_mldata`.
-
-For example, to download the MNIST digit recognition database::
-
-  >>> from sklearn.datasets import fetch_mldata
-  >>> mnist = fetch_mldata('MNIST original', data_home=custom_data_home)
-
-The MNIST database contains a total of 70000 examples of handwritten digits
-of size 28x28 pixels, labeled from 0 to 9::
-
-  >>> mnist.data.shape
-  (70000, 784)
-  >>> mnist.target.shape
-  (70000,)
-  >>> np.unique(mnist.target)
-  array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
-
-After the first download, the dataset is cached locally in the path
-specified by the ``data_home`` keyword argument, which defaults to
-``~/scikit_learn_data/``::
-
-  >>> os.listdir(os.path.join(custom_data_home, 'mldata'))
-  ['mnist-original.mat']
-
-Data sets in `mldata.org <http://mldata.org>`_ do not adhere to a strict
-naming or formatting convention. :func:`sklearn.datasets.fetch_mldata` is
-able to make sense of the most common cases, but allows to tailor the
-defaults to individual datasets:
-
-* The data arrays in `mldata.org <http://mldata.org>`_ are most often
-  shaped as ``(n_features, n_samples)``. This is the opposite of the
-  ``scikit-learn`` convention, so :func:`sklearn.datasets.fetch_mldata`
-  transposes the matrix by default. The ``transpose_data`` keyword controls
-  this behavior::
-
-    >>> iris = fetch_mldata('iris', data_home=custom_data_home)
-    >>> iris.data.shape
-    (150, 4)
-    >>> iris = fetch_mldata('iris', transpose_data=False,
-    ...                     data_home=custom_data_home)
-    >>> iris.data.shape
-    (4, 150)
-
-* For datasets with multiple columns, :func:`sklearn.datasets.fetch_mldata`
-  tries to identify the target and data columns and rename them to ``target``
-  and ``data``. This is done by looking for arrays named ``label`` and
-  ``data`` in the dataset, and failing that by choosing the first array to be
-  ``target`` and the second to be ``data``. This behavior can be changed with
-  the ``target_name`` and ``data_name`` keywords, setting them to a specific
-  name or index number (the name and order of the columns in the datasets
-  can be found at its `mldata.org <http://mldata.org>`_ under the tab "Data"::
-
-    >>> iris2 = fetch_mldata('datasets-UCI iris', target_name=1, data_name=0,
-    ...                      data_home=custom_data_home)
-    >>> iris3 = fetch_mldata('datasets-UCI iris', target_name='class',
-    ...                      data_name='double0', data_home=custom_data_home)
-
-
-..
-    >>> import shutil
-    >>> shutil.rmtree(custom_data_home)
diff --git a/doc/datasets/olivetti_faces.rst b/doc/datasets/olivetti_faces.rst
index 19c5601f7cac5..71be4f66a2fc1 100644
--- a/doc/datasets/olivetti_faces.rst
+++ b/doc/datasets/olivetti_faces.rst
@@ -2,7 +2,7 @@
 .. _olivetti_faces:
 
 The Olivetti faces dataset
-==========================
+--------------------------
 
 
 `This dataset contains a set of face images`_ taken between April 1992 and April
diff --git a/doc/datasets/rcv1.rst b/doc/datasets/rcv1.rst
index bcc0c95ef8f34..afbe797cc0c0b 100644
--- a/doc/datasets/rcv1.rst
+++ b/doc/datasets/rcv1.rst
@@ -2,7 +2,7 @@
 .. _rcv1:
 
 RCV1 dataset
-============
+------------
 
 Reuters Corpus Volume I (RCV1) is an archive of over 800,000 manually categorized newswire stories made available by Reuters, Ltd. for research purposes. The dataset is extensively described in [1]_.
 
diff --git a/doc/datasets/twenty_newsgroups.rst b/doc/datasets/twenty_newsgroups.rst
index 5d6906c3cfa14..5aaca66c5d670 100644
--- a/doc/datasets/twenty_newsgroups.rst
+++ b/doc/datasets/twenty_newsgroups.rst
@@ -1,7 +1,7 @@
 .. _20newsgroups:
 
 The 20 newsgroups text dataset
-==============================
+------------------------------
 
 The 20 newsgroups dataset comprises around 18000 newsgroups posts on
 20 topics split in two subsets: one for training (or development)
@@ -19,7 +19,7 @@ returns ready-to-use features, i.e., it is not necessary to use a feature
 extractor.
 
 Usage
------
+~~~~~
 
 The :func:`sklearn.datasets.fetch_20newsgroups` function is a data
 fetching / caching functions that downloads the data archive from
@@ -81,7 +81,7 @@ list of the categories to load to the
   array([0, 1, 1, 1, 0, 1, 1, 0, 0, 0])
 
 Converting text to vectors
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 In order to feed predictive or clustering models with the text data,
 one first need to turn the text into vectors of numerical values suitable
@@ -115,7 +115,8 @@ ready-to-use tfidf features instead of file names.
 
 
 Filtering text for more realistic training
-------------------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 It is easy for a classifier to overfit on particular things that appear in the
 20 Newsgroups data, such as newsgroup headers. Many classifiers achieve very
 high F-scores, but their results would not generalize to other documents that
diff --git a/sklearn/datasets/descr/boston_house_prices.rst b/sklearn/datasets/descr/boston_house_prices.rst
index 31227b087dfff..dec9b999cd592 100644
--- a/sklearn/datasets/descr/boston_house_prices.rst
+++ b/sklearn/datasets/descr/boston_house_prices.rst
@@ -1,15 +1,13 @@
-Boston House Prices dataset
-===========================
+.. _boston_dataset:
 
-Notes
-------
-Data Set Characteristics:  
+Boston house prices dataset
+---------------------------
+
+**Data Set Characteristics:**  
 
     :Number of Instances: 506 
 
-    :Number of Attributes: 13 numeric/categorical predictive
-    
-    :Median Value (attribute 14) is usually the target
+    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.
 
     :Attribute Information (in order):
         - CRIM     per capita crime rate by town
@@ -46,7 +44,7 @@ pages 244-261 of the latter.
 The Boston house-price data has been used in many machine learning papers that address regression
 problems.   
      
-**References**
+.. topic:: References
 
    - Belsley, Kuh & Welsch, 'Regression diagnostics: Identifying Influential Data and Sources of Collinearity', Wiley, 1980. 244-261.
    - Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning. In Proceedings on the Tenth International Conference of Machine Learning, 236-243, University of Massachusetts, Amherst. Morgan Kaufmann.
diff --git a/sklearn/datasets/descr/breast_cancer.rst b/sklearn/datasets/descr/breast_cancer.rst
index 547b41021ef2f..fea6b6f017c16 100644
--- a/sklearn/datasets/descr/breast_cancer.rst
+++ b/sklearn/datasets/descr/breast_cancer.rst
@@ -1,9 +1,10 @@
-Breast Cancer Wisconsin (Diagnostic) Database
-=============================================
+.. _breast_cancer_dataset:
+
+Breast cancer wisconsin (diagnostic) dataset
+--------------------------------------------
+
+**Data Set Characteristics:**
 
-Notes
------
-Data Set Characteristics:
     :Number of Instances: 569
 
     :Number of Attributes: 30 numeric, predictive attributes and the class
@@ -103,8 +104,8 @@ This database is also available through the UW CS ftp server:
 ftp ftp.cs.wisc.edu
 cd math-prog/cpo-dataset/machine-learn/WDBC/
 
-References
-----------
+.. topic:: References
+
    - W.N. Street, W.H. Wolberg and O.L. Mangasarian. Nuclear feature extraction 
      for breast tumor diagnosis. IS&T/SPIE 1993 International Symposium on 
      Electronic Imaging: Science and Technology, volume 1905, pages 861-870,
@@ -114,4 +115,4 @@ References
      July-August 1995.
    - W.H. Wolberg, W.N. Street, and O.L. Mangasarian. Machine learning techniques
      to diagnose breast cancer from fine-needle aspirates. Cancer Letters 77 (1994) 
-     163-171.
+     163-171.
\ No newline at end of file
diff --git a/sklearn/datasets/descr/diabetes.rst b/sklearn/datasets/descr/diabetes.rst
index df102a1bec1fa..f75beafd37b95 100644
--- a/sklearn/datasets/descr/diabetes.rst
+++ b/sklearn/datasets/descr/diabetes.rst
@@ -1,15 +1,14 @@
-Diabetes dataset
-================
+.. _diabetes_dataset:
 
-Notes
------
+Diabetes dataset
+----------------
 
 Ten baseline variables, age, sex, body mass index, average blood
 pressure, and six blood serum measurements were obtained for each of n =
 442 diabetes patients, as well as the response of interest, a
 quantitative measure of disease progression one year after baseline.
 
-Data Set Characteristics:
+**Data Set Characteristics:**
 
   :Number of Instances: 442
 
@@ -17,17 +16,17 @@ Data Set Characteristics:
 
   :Target: Column 11 is a quantitative measure of disease progression one year after baseline
 
-  :Attributes:
-    :Age:
-    :Sex:
-    :Body mass index:
-    :Average blood pressure:
-    :S1:
-    :S2:
-    :S3:
-    :S4:
-    :S5:
-    :S6:
+  :Attribute Information:
+      - Age
+      - Sex
+      - Body mass index
+      - Average blood pressure
+      - S1
+      - S2
+      - S3
+      - S4
+      - S5
+      - S6
 
 Note: Each of these 10 feature variables have been mean centered and scaled by the standard deviation times `n_samples` (i.e. the sum of squares of each column totals 1).
 
@@ -36,4 +35,4 @@ http://www4.stat.ncsu.edu/~boos/var.select/diabetes.html
 
 For more information see:
 Bradley Efron, Trevor Hastie, Iain Johnstone and Robert Tibshirani (2004) "Least Angle Regression," Annals of Statistics (with discussion), 407-499.
-(http://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)
+(http://web.stanford.edu/~hastie/Papers/LARS/LeastAngle_2002.pdf)
\ No newline at end of file
diff --git a/sklearn/datasets/descr/digits.rst b/sklearn/datasets/descr/digits.rst
index a30514474f524..b4ecb714a01b9 100644
--- a/sklearn/datasets/descr/digits.rst
+++ b/sklearn/datasets/descr/digits.rst
@@ -1,9 +1,10 @@
-Optical Recognition of Handwritten Digits Data Set
-===================================================
+.. _digits_dataset:
+
+Optical recognition of handwritten digits dataset
+--------------------------------------------------
+
+**Data Set Characteristics:**
 
-Notes
------
-Data Set Characteristics:
     :Number of Instances: 5620
     :Number of Attributes: 64
     :Attribute Information: 8x8 image of integer pixels in the range 0..16.
@@ -31,8 +32,8 @@ T. Candela, D. L. Dimmick, J. Geist, P. J. Grother, S. A. Janet, and C.
 L. Wilson, NIST Form-Based Handprint Recognition System, NISTIR 5469,
 1994.
 
-References
-----------
+.. topic:: References
+
   - C. Kaynak (1995) Methods of Combining Multiple Classifiers and Their
     Applications to Handwritten Digit Recognition, MSc Thesis, Institute of
     Graduate Studies in Science and Engineering, Bogazici University.
@@ -42,4 +43,4 @@ References
     Electrical and Electronic Engineering Nanyang Technological University.
     2005.
   - Claudio Gentile. A New Approximate Maximal Margin Classification
-    Algorithm. NIPS. 2000.
+    Algorithm. NIPS. 2000.
\ No newline at end of file
diff --git a/sklearn/datasets/descr/iris.rst b/sklearn/datasets/descr/iris.rst
index 25cada25f54fb..a35edc728c7d9 100644
--- a/sklearn/datasets/descr/iris.rst
+++ b/sklearn/datasets/descr/iris.rst
@@ -1,9 +1,10 @@
-Iris Plants Database
-====================
+.. _iris_dataset:
+
+Iris plants dataset
+--------------------
+
+**Data Set Characteristics:**
 
-Notes
------
-Data Set Characteristics:
     :Number of Instances: 150 (50 in each of three classes)
     :Number of Attributes: 4 numeric, predictive attributes and the class
     :Attribute Information:
@@ -15,6 +16,7 @@ Data Set Characteristics:
                 - Iris-Setosa
                 - Iris-Versicolour
                 - Iris-Virginica
+                
     :Summary Statistics:
 
     ============== ==== ==== ======= ===== ====================
@@ -43,8 +45,8 @@ data set contains 3 classes of 50 instances each, where each class refers to a
 type of iris plant.  One class is linearly separable from the other 2; the
 latter are NOT linearly separable from each other.
 
-References
-----------
+.. topic:: References
+
    - Fisher, R.A. "The use of multiple measurements in taxonomic problems"
      Annual Eugenics, 7, Part II, 179-188 (1936); also in "Contributions to
      Mathematical Statistics" (John Wiley, NY, 1950).
@@ -58,4 +60,4 @@ References
      on Information Theory, May 1972, 431-433.
    - See also: 1988 MLC Proceedings, 54-64.  Cheeseman et al"s AUTOCLASS II
      conceptual clustering system finds 3 classes in the data.
-   - Many, many more ...
+   - Many, many more ...
\ No newline at end of file
diff --git a/sklearn/datasets/descr/linnerud.rst b/sklearn/datasets/descr/linnerud.rst
index d790d3c0c9086..848ee193e1adc 100644
--- a/sklearn/datasets/descr/linnerud.rst
+++ b/sklearn/datasets/descr/linnerud.rst
@@ -1,9 +1,10 @@
+.. _linnerrud_dataset:
+
 Linnerrud dataset
-=================
+-----------------
+
+**Data Set Characteristics:**
 
-Notes
------
-Data Set Characteristics:
     :Number of Instances: 20
     :Number of Attributes: 3
     :Missing Attribute Values: None
@@ -16,6 +17,6 @@ The Linnerud dataset constains two small dataset:
 - *physiological*: Data frame with 20 observations on 3 physiological variables:
    Chins, Situps and Jumps.
 
-References
-----------
-  * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic.
+.. topic:: References
+
+  * Tenenhaus, M. (1998). La regression PLS: theorie et pratique. Paris: Editions Technic.
\ No newline at end of file
diff --git a/sklearn/datasets/descr/wine_data.rst b/sklearn/datasets/descr/wine_data.rst
index 3d3341874a584..f43e6524130bd 100644
--- a/sklearn/datasets/descr/wine_data.rst
+++ b/sklearn/datasets/descr/wine_data.rst
@@ -1,29 +1,30 @@
-Wine Data Database
-====================
+.. _wine_dataset:
+
+Wine recognition dataset
+------------------------
+
+**Data Set Characteristics:**
 
-Notes
------
-Data Set Characteristics:
     :Number of Instances: 178 (50 in each of three classes)
     :Number of Attributes: 13 numeric, predictive attributes and the class
     :Attribute Information:
- 		- 1) Alcohol
- 		- 2) Malic acid
- 		- 3) Ash
-		- 4) Alcalinity of ash  
- 		- 5) Magnesium
-		- 6) Total phenols
- 		- 7) Flavanoids
- 		- 8) Nonflavanoid phenols
- 		- 9) Proanthocyanins
-		- 10)Color intensity
- 		- 11)Hue
- 		- 12)OD280/OD315 of diluted wines
- 		- 13)Proline
-        	- class:
-                - class_0
-                - class_1
-                - class_2
+ 		- Alcohol
+ 		- Malic acid
+ 		- Ash
+		- Alcalinity of ash  
+ 		- Magnesium
+		- Total phenols
+ 		- Flavanoids
+ 		- Nonflavanoid phenols
+ 		- Proanthocyanins
+		- Color intensity
+ 		- Hue
+ 		- OD280/OD315 of diluted wines
+ 		- Proline
+    - class:
+            - class_0
+            - class_1
+            - class_2
 		
     :Summary Statistics:
     
@@ -72,24 +73,22 @@ Lichman, M. (2013). UCI Machine Learning Repository
 [http://archive.ics.uci.edu/ml]. Irvine, CA: University of California,
 School of Information and Computer Science. 
 
-References
-----------
-(1) 
-S. Aeberhard, D. Coomans and O. de Vel, 
-Comparison of Classifiers in High Dimensional Settings, 
-Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of 
-Mathematics and Statistics, James Cook University of North Queensland. 
-(Also submitted to Technometrics). 
+.. topic:: References
+
+  (1) S. Aeberhard, D. Coomans and O. de Vel, 
+  Comparison of Classifiers in High Dimensional Settings, 
+  Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of  
+  Mathematics and Statistics, James Cook University of North Queensland. 
+  (Also submitted to Technometrics). 
 
-The data was used with many others for comparing various 
-classifiers. The classes are separable, though only RDA 
-has achieved 100% correct classification. 
-(RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) 
-(All results using the leave-one-out technique) 
+  The data was used with many others for comparing various 
+  classifiers. The classes are separable, though only RDA 
+  has achieved 100% correct classification. 
+  (RDA : 100%, QDA 99.4%, LDA 98.9%, 1NN 96.1% (z-transformed data)) 
+  (All results using the leave-one-out technique) 
 
-(2) 
-S. Aeberhard, D. Coomans and O. de Vel, 
-"THE CLASSIFICATION PERFORMANCE OF RDA" 
-Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of 
-Mathematics and Statistics, James Cook University of North Queensland. 
-(Also submitted to Journal of Chemometrics). 
+  (2) S. Aeberhard, D. Coomans and O. de Vel, 
+  "THE CLASSIFICATION PERFORMANCE OF RDA" 
+  Tech. Rep. no. 92-01, (1992), Dept. of Computer Science and Dept. of 
+  Mathematics and Statistics, James Cook University of North Queensland. 
+  (Also submitted to Journal of Chemometrics).
\ No newline at end of file

From b67149eb0bd4e5693342afc086aff951e12232e2 Mon Sep 17 00:00:00 2001
From: Matthias Feurer <feurerm@informatik.uni-freiburg.de>
Date: Wed, 20 Jun 2018 04:51:38 +0200
Subject: [PATCH 14/24] ENH Add refit_time_ attribute to model selection
 (#11310)

---
 doc/whats_new/v0.20.rst                      |  7 +++++++
 sklearn/model_selection/_search.py           | 14 ++++++++++++++
 sklearn/model_selection/tests/test_search.py |  5 +++++
 3 files changed, 26 insertions(+)

diff --git a/doc/whats_new/v0.20.rst b/doc/whats_new/v0.20.rst
index 32b4ef3098263..b5a43b0b8fc3b 100644
--- a/doc/whats_new/v0.20.rst
+++ b/doc/whats_new/v0.20.rst
@@ -253,6 +253,13 @@ Model evaluation and meta-estimators
   return estimators fitted on each split. :issue:`9686` by :user:`Aurélien Bellet
   <bellet>`.
 
+- New ``refit_time_`` attribute will be stored in
+  :class:`model_selection.GridSearchCV` and
+  :class:`model_selection.RandomizedSearchCV` if ``refit`` is set to ``True``.
+  This will allow measuring the complete time it takes to perform
+  hyperparameter optimization and refitting the best model on the whole
+  dataset. :issue:`11310` by :user:`Matthias Feurer <mfeurer>`.
+
 Decomposition and manifold learning
 
 - Speed improvements for both 'exact' and 'barnes_hut' methods in
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 99d6096af73db..a339b9b167634 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -17,6 +17,7 @@
 from functools import partial, reduce
 from itertools import product
 import operator
+import time
 import warnings
 
 import numpy as np
@@ -766,10 +767,13 @@ def _store(key_name, array, weights=None, splits=False, rank=False):
         if self.refit:
             self.best_estimator_ = clone(base_estimator).set_params(
                 **self.best_params_)
+            refit_start_time = time.time()
             if y is not None:
                 self.best_estimator_.fit(X, y, **fit_params)
             else:
                 self.best_estimator_.fit(X, **fit_params)
+            refit_end_time = time.time()
+            self.refit_time_ = refit_end_time - refit_start_time
 
         # Store the only scorer not as a dict for single metric evaluation
         self.scorer_ = scorers if self.multimetric_ else scorers['score']
@@ -1076,6 +1080,11 @@ class GridSearchCV(BaseSearchCV):
     n_splits_ : int
         The number of cross-validation splits (folds/iterations).
 
+    refit_time_ : float
+        Seconds used for refitting the best model on the whole dataset.
+
+        This is present only if ``refit`` is not False.
+
     Notes
     ------
     The parameters selected are those that maximize the score of the left out
@@ -1387,6 +1396,11 @@ class RandomizedSearchCV(BaseSearchCV):
     n_splits_ : int
         The number of cross-validation splits (folds/iterations).
 
+    refit_time_ : float
+        Seconds used for refitting the best model on the whole dataset.
+
+        This is present only if ``refit`` is not False.
+
     Notes
     -----
     The parameters selected are those that maximize the score of the held-out
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index f436c7b55cf36..0b7b0338f9ada 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -26,6 +26,7 @@
 from sklearn.utils.testing import assert_array_equal
 from sklearn.utils.testing import assert_array_almost_equal
 from sklearn.utils.testing import assert_almost_equal
+from sklearn.utils.testing import assert_greater_equal
 from sklearn.utils.testing import ignore_warnings
 from sklearn.utils.mocking import CheckingClassifier, MockDataFrame
 
@@ -1172,6 +1173,10 @@ def test_search_cv_timing():
             assert_true(search.cv_results_[key][0] == 0.0)
             assert_true(np.all(search.cv_results_[key] < 1))
 
+        assert_true(hasattr(search, "refit_time_"))
+        assert_true(isinstance(search.refit_time_, float))
+        assert_greater_equal(search.refit_time_, 0)
+
 
 def test_grid_search_correct_score_results():
     # test that correct scores are used

From 95667387e2cd3c10881b078bbe5c8699e4157b9e Mon Sep 17 00:00:00 2001
From: Mark Roth <markroth8@gmail.com>
Date: Tue, 19 Jun 2018 22:58:23 -0400
Subject: [PATCH 15/24] MAINT Fix #9350: Enable has_fit_parameter() and
 fit_score_takes_y() to work with @deprecated in Python 2 (#11277)

---
 sklearn/utils/deprecation.py                 |  3 +++
 sklearn/utils/tests/test_estimator_checks.py | 15 +++++++++++++++
 sklearn/utils/tests/test_validation.py       | 10 ++++++++++
 3 files changed, 28 insertions(+)

diff --git a/sklearn/utils/deprecation.py b/sklearn/utils/deprecation.py
index 5621f436d9baf..fc06f9bc84d3b 100644
--- a/sklearn/utils/deprecation.py
+++ b/sklearn/utils/deprecation.py
@@ -78,6 +78,9 @@ def wrapped(*args, **kwargs):
             return fun(*args, **kwargs)
 
         wrapped.__doc__ = self._update_doc(wrapped.__doc__)
+        # Add a reference to the wrapped function so that we can introspect
+        # on function arguments in Python 2 (already works in Python 3)
+        wrapped.__wrapped__ = fun
 
         return wrapped
 
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 53a67693843d9..049dff4baa920 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -9,12 +9,14 @@
 from sklearn.externals import joblib
 
 from sklearn.base import BaseEstimator, ClassifierMixin
+from sklearn.utils import deprecated
 from sklearn.utils.testing import (assert_raises_regex, assert_true,
                                    assert_equal, ignore_warnings)
 from sklearn.utils.estimator_checks import check_estimator
 from sklearn.utils.estimator_checks import set_random_state
 from sklearn.utils.estimator_checks import set_checking_parameters
 from sklearn.utils.estimator_checks import check_estimators_unfitted
+from sklearn.utils.estimator_checks import check_fit_score_takes_y
 from sklearn.utils.estimator_checks import check_no_attributes_set_in_init
 from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier
 from sklearn.linear_model import LinearRegression, SGDClassifier
@@ -176,6 +178,19 @@ def transform(self, X):
         return sp.csr_matrix(X)
 
 
+def test_check_fit_score_takes_y_works_on_deprecated_fit():
+    # Tests that check_fit_score_takes_y works on a class with
+    # a deprecated fit method
+
+    class TestEstimatorWithDeprecatedFitMethod(BaseEstimator):
+        @deprecated("Deprecated for the purpose of testing "
+                    "check_fit_score_takes_y")
+        def fit(self, X, y):
+            return self
+
+    check_fit_score_takes_y("test", TestEstimatorWithDeprecatedFitMethod())
+
+
 def test_check_estimator():
     # tests that the estimator actually fails on "bad" estimators.
     # not a complete test of all checks, which are very extensive.
diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py
index 076e6d88440f1..5af26ac5b978f 100644
--- a/sklearn/utils/tests/test_validation.py
+++ b/sklearn/utils/tests/test_validation.py
@@ -22,6 +22,7 @@
 from sklearn.utils.testing import assert_allclose_dense_sparse
 from sklearn.utils import as_float_array, check_array, check_symmetric
 from sklearn.utils import check_X_y
+from sklearn.utils import deprecated
 from sklearn.utils.mocking import MockDataFrame
 from sklearn.utils.estimator_checks import NotAnArray
 from sklearn.random_projection import sparse_random_matrix
@@ -563,6 +564,15 @@ def test_has_fit_parameter():
     assert_true(has_fit_parameter(SVR, "sample_weight"))
     assert_true(has_fit_parameter(SVR(), "sample_weight"))
 
+    class TestClassWithDeprecatedFitMethod:
+        @deprecated("Deprecated for the purpose of testing has_fit_parameter")
+        def fit(self, X, y, sample_weight=None):
+            pass
+
+    assert has_fit_parameter(TestClassWithDeprecatedFitMethod,
+                             "sample_weight"), \
+        "has_fit_parameter fails for class with deprecated fit method."
+
 
 def test_check_symmetric():
     arr_sym = np.array([[0, 1], [1, 2]])

From 2ce21c2395a1486f5719c2cbe7b29c3465e68ef1 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Wed, 20 Jun 2018 14:33:12 +1000
Subject: [PATCH 16/24] Fix skipping in conftest.py (#11318)

---
 doc/conftest.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/doc/conftest.py b/doc/conftest.py
index 0c9c79a2dd7ed..5423122d62b4b 100644
--- a/doc/conftest.py
+++ b/doc/conftest.py
@@ -70,17 +70,19 @@ def setup_compose():
 
 def pytest_runtest_setup(item):
     fname = item.fspath.strpath
-    if fname.endswith('datasets/labeled_faces.rst'):
+    is_index = fname.endswith('datasets/index.rst')
+    if fname.endswith('datasets/labeled_faces.rst') or is_index:
         setup_labeled_faces()
-    elif fname.endswith('datasets/mldata.rst'):
+    elif fname.endswith('datasets/mldata.rst') or is_index:
         setup_mldata()
-    elif fname.endswith('datasets/rcv1.rst'):
+    elif fname.endswith('datasets/rcv1.rst') or is_index:
         setup_rcv1()
-    elif fname.endswith('datasets/twenty_newsgroups.rst'):
+    elif fname.endswith('datasets/twenty_newsgroups.rst') or is_index:
         setup_twenty_newsgroups()
-    elif fname.endswith('tutorial/text_analytics/working_with_text_data.rst'):
+    elif fname.endswith('tutorial/text_analytics/working_with_text_data.rst')\
+            or is_index:
         setup_working_with_text_data()
-    elif fname.endswith('modules/compose.rst'):
+    elif fname.endswith('modules/compose.rst') or is_index:
         setup_compose()
 
 

From 786c94dbbddd57aca6b9add4718b71814f651fb0 Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Wed, 20 Jun 2018 19:14:56 +1000
Subject: [PATCH 17/24] MAINT clarifications in
 ColumnTransformer._update_transformers (#11323)

---
 sklearn/compose/_column_transformer.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 5afe342ddc7a3..abd8435305f6b 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -290,22 +290,25 @@ def get_feature_names(self):
         return feature_names
 
     def _update_fitted_transformers(self, transformers):
+        # transformers are fitted; excludes 'drop' cases
         transformers = iter(transformers)
         transformers_ = []
 
         for name, old, column in self.transformers:
             if old == 'drop':
-                trans = old
+                trans = 'drop'
             elif old == 'passthrough':
                 # FunctionTransformer is present in list of transformers,
                 # so get next transformer, but save original string
                 next(transformers)
-                trans = old
+                trans = 'passthrough'
             else:
                 trans = next(transformers)
 
             transformers_.append((name, trans, column))
 
+        # sanity check that transformers is exhausted
+        assert not list(transformers)
         self.transformers_ = transformers_
 
     def _validate_output(self, result):

From caa426f7f831583ba684f1db29d4ff15d7776b5a Mon Sep 17 00:00:00 2001
From: Joel Nothman <joel.nothman@gmail.com>
Date: Wed, 20 Jun 2018 22:16:15 +1000
Subject: [PATCH 18/24] COSMIT fix syntax quirk

---
 sklearn/linear_model/logistic.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py
index 30e8560940500..4509f485d2604 100644
--- a/sklearn/linear_model/logistic.py
+++ b/sklearn/linear_model/logistic.py
@@ -29,8 +29,8 @@
 from ..utils.fixes import logsumexp
 from ..utils.optimize import newton_cg
 from ..utils.validation import check_X_y
-from..exceptions import (NotFittedError, ConvergenceWarning,
-                         ChangedBehaviorWarning)
+from ..exceptions import (NotFittedError, ConvergenceWarning,
+                          ChangedBehaviorWarning)
 from ..utils.multiclass import check_classification_targets
 from ..externals.joblib import Parallel, delayed
 from ..model_selection import check_cv

From 580026e036cf33f94ea07986b5d38149f3bde2a3 Mon Sep 17 00:00:00 2001
From: Georgi Peev <gpeev@Georgis-MacBook-Pro.local>
Date: Fri, 15 Jun 2018 15:17:09 -0400
Subject: [PATCH 19/24] changed examples so they produce the same values on
 MacOS

---
 doc/modules/model_evaluation.rst | 6 +++---
 sklearn/svm/classes.py           | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 8c4874edf84c1..6c75998ba6e59 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -98,9 +98,9 @@ Usage examples:
     >>> from sklearn.model_selection import cross_val_score
     >>> iris = datasets.load_iris()
     >>> X, y = iris.data, iris.target
-    >>> clf = svm.SVC(gamma='scale', probability=True, random_state=0)
-    >>> cross_val_score(clf, X, y, scoring='neg_log_loss') # doctest: +ELLIPSIS
-    array([-0.10..., -0.16..., -0.07...])
+    >>> clf = svm.SVC(gamma='scale', random_state=0)
+    >>> cross_val_score(clf, X, y, scoring='recall_macro') # doctest: +ELLIPSIS
+    array([0.980..., 0.960..., 0.979...])
     >>> model = svm.SVC()
     >>> cross_val_score(model, X, y, scoring='wrong_choice')
     Traceback (most recent call last):
diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index db9360e64fb4f..9e314d4ee5e72 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -116,11 +116,11 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
     >>> from sklearn.svm import LinearSVC
     >>> from sklearn.datasets import make_classification
     >>> X, y = make_classification(n_features=4, random_state=0)
-    >>> clf = LinearSVC(random_state=0)
+    >>> clf = LinearSVC(random_state=0, tol=0.00001)
     >>> clf.fit(X, y)
     LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
-         multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
+         multi_class='ovr', penalty='l2', random_state=0, tol=0.00001,
          verbose=0)
     >>> print(clf.coef_)
     [[0.08551385 0.39414796 0.49847831 0.37513797]]
@@ -327,11 +327,11 @@ class LinearSVR(LinearModel, RegressorMixin):
     >>> from sklearn.svm import LinearSVR
     >>> from sklearn.datasets import make_regression
     >>> X, y = make_regression(n_features=4, random_state=0)
-    >>> regr = LinearSVR(random_state=0)
+    >>> regr = LinearSVR(random_state=0, tol=0.00001)
     >>> regr.fit(X, y)
     LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
-         random_state=0, tol=0.0001, verbose=0)
+         random_state=0, tol=0.00001, verbose=0)
     >>> print(regr.coef_)
     [16.35750999 26.91499923 42.30652207 60.47843124]
     >>> print(regr.intercept_)

From ca6adf73e7e271a69cdd0eb07f89e6085f34b23e Mon Sep 17 00:00:00 2001
From: Georgi Peev <gpeev@Georgis-MacBook-Pro.local>
Date: Fri, 15 Jun 2018 17:54:59 -0400
Subject: [PATCH 20/24] switched two constants to scientific notation

---
 sklearn/svm/classes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 9e314d4ee5e72..259e33a9ec661 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -120,7 +120,7 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
     >>> clf.fit(X, y)
     LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
-         multi_class='ovr', penalty='l2', random_state=0, tol=0.00001,
+         multi_class='ovr', penalty='l2', random_state=0, tol=1e-05,
          verbose=0)
     >>> print(clf.coef_)
     [[0.08551385 0.39414796 0.49847831 0.37513797]]
@@ -331,7 +331,7 @@ class LinearSVR(LinearModel, RegressorMixin):
     >>> regr.fit(X, y)
     LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
-         random_state=0, tol=0.00001, verbose=0)
+         random_state=0, tol=1e-05, verbose=0)
     >>> print(regr.coef_)
     [16.35750999 26.91499923 42.30652207 60.47843124]
     >>> print(regr.intercept_)

From f473b29a2af0e3f959c19a5f25dd790f6dcd786f Mon Sep 17 00:00:00 2001
From: Georgi Peev <gpeev@Georgis-MacBook-Pro.local>
Date: Mon, 18 Jun 2018 10:24:32 -0400
Subject: [PATCH 21/24] use slipsis for example values that depend on RNG
 library

---
 sklearn/svm/classes.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 259e33a9ec661..3411fc9dc3886 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -120,8 +120,7 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
     >>> clf.fit(X, y)
     LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
-         multi_class='ovr', penalty='l2', random_state=0, tol=1e-05,
-         verbose=0)
+         multi_class='ovr', penalty='l2', random_state=0, tol=1e-05, verbose=0)
     >>> print(clf.coef_)
     [[0.08551385 0.39414796 0.49847831 0.37513797]]
     >>> print(clf.intercept_)
@@ -333,7 +332,7 @@ class LinearSVR(LinearModel, RegressorMixin):
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,
          random_state=0, tol=1e-05, verbose=0)
     >>> print(regr.coef_)
-    [16.35750999 26.91499923 42.30652207 60.47843124]
+    [16.35... 26.91... 42.30... 60.47...]
     >>> print(regr.intercept_)
     [-4.29756543]
     >>> print(regr.predict([[0, 0, 0, 0]]))
@@ -352,7 +351,7 @@ class LinearSVR(LinearModel, RegressorMixin):
 
     sklearn.linear_model.SGDRegressor
         SGDRegressor can optimize the same cost function as LinearSVR
-        by adjusting the penalty and loss parameters. In addition it requires
+        by adjusting the penalty and loss parameters. In addition it .requires
         less memory, allows incremental (online) learning, and implements
         various loss functions and regularization regimes.
     """

From 58161fc4b922629007bebbd84a7ef5e018510bc1 Mon Sep 17 00:00:00 2001
From: Georgi Peev <gpeev@Georgis-MacBook-Pro.local>
Date: Mon, 18 Jun 2018 11:34:26 -0400
Subject: [PATCH 22/24] addede more ellipsis

---
 sklearn/svm/classes.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 3411fc9dc3886..f3b30b98b09e3 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -122,9 +122,9 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
          multi_class='ovr', penalty='l2', random_state=0, tol=1e-05, verbose=0)
     >>> print(clf.coef_)
-    [[0.08551385 0.39414796 0.49847831 0.37513797]]
+    [[0.085... 0.394... 0.498... 0.375...]]
     >>> print(clf.intercept_)
-    [0.28418066]
+    [0.284...]
     >>> print(clf.predict([[0, 0, 0, 0]]))
     [1]
 
@@ -334,9 +334,9 @@ class LinearSVR(LinearModel, RegressorMixin):
     >>> print(regr.coef_)
     [16.35... 26.91... 42.30... 60.47...]
     >>> print(regr.intercept_)
-    [-4.29756543]
+    [-4.29...]
     >>> print(regr.predict([[0, 0, 0, 0]]))
-    [-4.29756543]
+    [-4.29...]
 
     See also
     --------

From 6492d8ac083e35ccc738c61f04cc4f3f059ccabb Mon Sep 17 00:00:00 2001
From: Georgi Peev <gpeev@Georgis-MacBook-Pro.local>
Date: Tue, 19 Jun 2018 10:32:21 -0400
Subject: [PATCH 23/24] removed extra dot

---
 sklearn/svm/classes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index f3b30b98b09e3..4680fd8514e3f 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -351,7 +351,7 @@ class LinearSVR(LinearModel, RegressorMixin):
 
     sklearn.linear_model.SGDRegressor
         SGDRegressor can optimize the same cost function as LinearSVR
-        by adjusting the penalty and loss parameters. In addition it .requires
+        by adjusting the penalty and loss parameters. In addition it requires
         less memory, allows incremental (online) learning, and implements
         various loss functions and regularization regimes.
     """

From 723cd095bea910f2040380dee803dfd63c63eb56 Mon Sep 17 00:00:00 2001
From: Guillaume Lemaitre <g.lemaitre58@gmail.com>
Date: Sat, 14 Jul 2018 21:18:18 +0200
Subject: [PATCH 24/24] nitpicks

---
 sklearn/svm/classes.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/svm/classes.py b/sklearn/svm/classes.py
index 4680fd8514e3f..99d07b7d51fce 100644
--- a/sklearn/svm/classes.py
+++ b/sklearn/svm/classes.py
@@ -116,7 +116,7 @@ class LinearSVC(BaseEstimator, LinearClassifierMixin,
     >>> from sklearn.svm import LinearSVC
     >>> from sklearn.datasets import make_classification
     >>> X, y = make_classification(n_features=4, random_state=0)
-    >>> clf = LinearSVC(random_state=0, tol=0.00001)
+    >>> clf = LinearSVC(random_state=0, tol=1e-5)
     >>> clf.fit(X, y)
     LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
          intercept_scaling=1, loss='squared_hinge', max_iter=1000,
@@ -326,7 +326,7 @@ class LinearSVR(LinearModel, RegressorMixin):
     >>> from sklearn.svm import LinearSVR
     >>> from sklearn.datasets import make_regression
     >>> X, y = make_regression(n_features=4, random_state=0)
-    >>> regr = LinearSVR(random_state=0, tol=0.00001)
+    >>> regr = LinearSVR(random_state=0, tol=1e-5)
     >>> regr.fit(X, y)
     LinearSVR(C=1.0, dual=True, epsilon=0.0, fit_intercept=True,
          intercept_scaling=1.0, loss='epsilon_insensitive', max_iter=1000,