From 9f13329971dab61ff1b3a15587e4340cfd9b7903 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Wed, 7 Jun 2017 14:53:18 +0200
Subject: [PATCH 01/12] work on fancy repr

---
 sklearn/base.py                    | 22 +++++++++++++++++-----
 sklearn/model_selection/_search.py | 12 ++++++------
 2 files changed, 23 insertions(+), 11 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 119696f5b3722..a8c038b8cd01f 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -121,7 +121,7 @@ def clone(estimator, safe=True):
 
 
 ###############################################################################
-def _pprint(params, offset=0, printer=repr):
+def _pprint(params, offset=0, printer=repr, cutoff=500):
     """Pretty print the dictionary 'params'
 
     Parameters
@@ -150,9 +150,9 @@ def _pprint(params, offset=0, printer=repr):
             # architectures and versions.
             this_repr = '%s=%s' % (k, str(v))
         else:
-            # use repr of the rest
+            # use printer of the rest
             this_repr = '%s=%s' % (k, printer(v))
-        if len(this_repr) > 500:
+        if cutoff is not None and len(this_repr) > cutoff:
             this_repr = this_repr[:300] + '...' + this_repr[-100:]
         if i > 0:
             if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr):
@@ -284,9 +284,22 @@ def set_params(self, **params):
                 setattr(self, key, value)
         return self
 
+    def _changed_params(self):
+        params = self.get_params(deep=False)
+        filtered_params = {}
+        default_params = {}
+        init_params = signature(self.__init__).parameters
+        for k, v in params.items():
+            if v == init_params[k].default:
+                default_params[k] = v
+            else:
+                filtered_params[k] = v
+        return filtered_params, default_params
+
     def __repr__(self):
         class_name = self.__class__.__name__
-        return '%s(%s)' % (class_name, _pprint(self.get_params(deep=False),
+        params = self.get_params(deep=False)
+        return '%s(%s)' % (class_name, _pprint(params,
                                                offset=len(class_name),),)
 
     def __getstate__(self):
@@ -316,7 +329,6 @@ def __setstate__(self, state):
             self.__dict__.update(state)
 
 
-
 ###############################################################################
 class ClassifierMixin(object):
     """Mixin class for all classifiers in scikit-learn."""
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 10d0b3171992b..452aea09772c1 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -386,7 +386,7 @@ def __init__(self, estimator, scoring=None,
         self.scoring = scoring
         self.estimator = estimator
         self.n_jobs = n_jobs
-        self.fit_params = fit_params if fit_params is not None else {}
+        self.fit_params = fit_params
         self.iid = iid
         self.refit = refit
         self.cv = cv
@@ -836,7 +836,7 @@ class GridSearchCV(BaseSearchCV):
                          kernel='rbf', max_iter=-1, probability=False,
                          random_state=None, shrinking=True, tol=...,
                          verbose=False),
-           fit_params={}, iid=..., n_jobs=1,
+           fit_params=None, iid=..., n_jobs=1,
            param_grid=..., pre_dispatch=..., refit=..., return_train_score=...,
            scoring=..., verbose=...)
     >>> sorted(clf.cv_results_.keys())
@@ -1196,10 +1196,10 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None,
         self.n_iter = n_iter
         self.random_state = random_state
         super(RandomizedSearchCV, self).__init__(
-             estimator=estimator, scoring=scoring, fit_params=fit_params,
-             n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
-             pre_dispatch=pre_dispatch, error_score=error_score,
-             return_train_score=return_train_score)
+            estimator=estimator, scoring=scoring, fit_params=fit_params,
+            n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
+            pre_dispatch=pre_dispatch, error_score=error_score,
+            return_train_score=return_train_score)
 
     def _get_param_iterator(self):
         """Return ParameterSampler instance for the given distributions"""

From e3b662e7cb627561cc5bf3f4c2000208d2e866d3 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Wed, 7 Jun 2017 14:58:32 +0200
Subject: [PATCH 02/12] add printoptions

---
 sklearn/base.py | 46 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 36 insertions(+), 10 deletions(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index a8c038b8cd01f..e0ed54f2bf531 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -13,6 +13,33 @@
 from . import __version__
 
 
+_PRINTOPTIONS = {'parameters': 'all'}
+
+
+def set_print(parameters=None):
+    """Set estimator print options.
+
+    WARNING: This functionality is experimental and might be removed or changed
+    at any time.
+
+    Parameters
+    ----------
+    parameters : None, 'all' or 'changed', default=None
+        Which parameters to show when printing estimators.
+        If None, this setting is not changed, if 'all',
+        all parameters are shown, if 'changed', only the
+        parameters that are not at their default value are shown.
+
+    Returns
+    -------
+    printoptions : dict
+        Current print options.
+    """
+    if parameters is not None:
+        _PRINTOPTIONS['parameters'] = parameters
+    return _PRINTOPTIONS
+
+
 ##############################################################################
 def _first_and_last_element(arr):
     """Returns first and last element of numpy array or sparse matrix."""
@@ -286,19 +313,18 @@ def set_params(self, **params):
 
     def _changed_params(self):
         params = self.get_params(deep=False)
-        filtered_params = {}
-        default_params = {}
-        init_params = signature(self.__init__).parameters
-        for k, v in params.items():
-            if v == init_params[k].default:
-                default_params[k] = v
-            else:
-                filtered_params[k] = v
-        return filtered_params, default_params
+        if _PRINTOPTIONS['parameters'] == 'changed':
+            filtered_params = {}
+            init_params = signature(self.__init__).parameters
+            for k, v in params.items():
+                if v != init_params[k].default:
+                    filtered_params[k] = v
+            return filtered_params
+        return params
 
     def __repr__(self):
         class_name = self.__class__.__name__
-        params = self.get_params(deep=False)
+        params = self._changed_params()
         return '%s(%s)' % (class_name, _pprint(params,
                                                offset=len(class_name),),)
 

From 00a10d81594330f44e61cfa13af8cf9061dab93a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 16:32:22 +0200
Subject: [PATCH 03/12] use global option mechanism to change repr

---
 sklearn/__init__.py | 19 ++++++++++++++-----
 sklearn/base.py     | 31 ++-----------------------------
 2 files changed, 16 insertions(+), 34 deletions(-)

diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index b4916dd5925de..8b510840c1793 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -18,7 +18,11 @@
 import os
 from contextlib import contextmanager as _contextmanager
 
-_ASSUME_FINITE = bool(os.environ.get('SKLEARN_ASSUME_FINITE', False))
+_CONFIG = {'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE',
+                                                False)),
+           'show_parameters': bool(os.environ.get('SKLEARN_SHOW_PARAMETERS',
+                                                  'all'))
+           }
 
 
 def get_config():
@@ -29,10 +33,10 @@ def get_config():
     config : dict
         Keys are parameter names that can be passed to :func:`set_config`.
     """
-    return {'assume_finite': _ASSUME_FINITE}
+    return _CONFIG
 
 
-def set_config(assume_finite=None):
+def set_config(assume_finite=None, show_parameters=None):
     """Set global scikit-learn configuration
 
     Parameters
@@ -42,10 +46,15 @@ def set_config(assume_finite=None):
         saving time, but leading to potential crashes. If
         False, validation for finiteness will be performed,
         avoiding error.
+    show_parameters : str, 'all' or 'changed'
+        Whether to include all estimator parameters in the
+        string representation or only the changed ones.
     """
-    global _ASSUME_FINITE
+    global _CONFIG
     if assume_finite is not None:
-        _ASSUME_FINITE = assume_finite
+        _CONFIG.update(assume_finite=assume_finite)
+    if show_parameters is not None:
+        _CONFIG.update(show_parmeters=show_parameters)
 
 
 @_contextmanager
diff --git a/sklearn/base.py b/sklearn/base.py
index e0ed54f2bf531..ba3d118c8b029 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -10,34 +10,7 @@
 from scipy import sparse
 from .externals import six
 from .utils.fixes import signature
-from . import __version__
-
-
-_PRINTOPTIONS = {'parameters': 'all'}
-
-
-def set_print(parameters=None):
-    """Set estimator print options.
-
-    WARNING: This functionality is experimental and might be removed or changed
-    at any time.
-
-    Parameters
-    ----------
-    parameters : None, 'all' or 'changed', default=None
-        Which parameters to show when printing estimators.
-        If None, this setting is not changed, if 'all',
-        all parameters are shown, if 'changed', only the
-        parameters that are not at their default value are shown.
-
-    Returns
-    -------
-    printoptions : dict
-        Current print options.
-    """
-    if parameters is not None:
-        _PRINTOPTIONS['parameters'] = parameters
-    return _PRINTOPTIONS
+from . import __version__, get_config
 
 
 ##############################################################################
@@ -313,7 +286,7 @@ def set_params(self, **params):
 
     def _changed_params(self):
         params = self.get_params(deep=False)
-        if _PRINTOPTIONS['parameters'] == 'changed':
+        if get_config()['show_parameters'] == 'changed':
             filtered_params = {}
             init_params = signature(self.__init__).parameters
             for k, v in params.items():

From 819827359805255e63a28b33dc4534ef2c42b6e7 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 16:39:15 +0200
Subject: [PATCH 04/12] typo

---
 sklearn/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 8b510840c1793..51c280639e120 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -54,7 +54,7 @@ def set_config(assume_finite=None, show_parameters=None):
     if assume_finite is not None:
         _CONFIG.update(assume_finite=assume_finite)
     if show_parameters is not None:
-        _CONFIG.update(show_parmeters=show_parameters)
+        _CONFIG.update(show_parameters=show_parameters)
 
 
 @_contextmanager

From 63501914481d6ef59aceec39b15d65b42421799a Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 16:52:16 +0200
Subject: [PATCH 05/12] boolean option is neater, add test

---
 sklearn/__init__.py          | 12 +++++-----
 sklearn/tests/test_base.py   |  3 +++
 sklearn/tests/test_config.py | 43 +++++++++++++++++++-----------------
 3 files changed, 32 insertions(+), 26 deletions(-)

diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 51c280639e120..5ea9ae5d0ffc7 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -20,8 +20,8 @@
 
 _CONFIG = {'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE',
                                                 False)),
-           'show_parameters': bool(os.environ.get('SKLEARN_SHOW_PARAMETERS',
-                                                  'all'))
+           'show_default_parameters':
+           bool(os.environ.get('SKLEARN_SHOW_DEFAULT_PARAMETERS', True))
            }
 
 
@@ -36,7 +36,7 @@ def get_config():
     return _CONFIG
 
 
-def set_config(assume_finite=None, show_parameters=None):
+def set_config(assume_finite=None, show_default_parameters=None):
     """Set global scikit-learn configuration
 
     Parameters
@@ -46,15 +46,15 @@ def set_config(assume_finite=None, show_parameters=None):
         saving time, but leading to potential crashes. If
         False, validation for finiteness will be performed,
         avoiding error.
-    show_parameters : str, 'all' or 'changed'
+    show_default_parameters : bool, optional
         Whether to include all estimator parameters in the
         string representation or only the changed ones.
     """
     global _CONFIG
     if assume_finite is not None:
         _CONFIG.update(assume_finite=assume_finite)
-    if show_parameters is not None:
-        _CONFIG.update(show_parameters=show_parameters)
+    if show_default_parameters is not None:
+        _CONFIG.update(show_default_parameters=show_default_parameters)
 
 
 @_contextmanager
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index 8112e7fd8196b..bbb53fd7eb1b2 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -201,6 +201,9 @@ def test_repr():
     some_est = T(a=["long_params"] * 1000)
     assert_equal(len(repr(some_est)), 415)
 
+    with sklearn.config_context(show_default_parameters=False):
+        assert_equal(repr(test), "T(a=K(), b=K())")
+
 
 def test_str():
     # Smoke test the str of the base estimator
diff --git a/sklearn/tests/test_config.py b/sklearn/tests/test_config.py
index b968e7b7917ea..0c790226210db 100644
--- a/sklearn/tests/test_config.py
+++ b/sklearn/tests/test_config.py
@@ -1,40 +1,43 @@
 from sklearn import get_config, set_config, config_context
 from sklearn.utils.testing import assert_equal, assert_raises
 
+dict_true = {'assume_finite': True, 'show_default_parameters': True}
+dict_false = {'assume_finite': False, 'show_default_parameters': True}
+
 
 def test_config_context():
-    assert_equal(get_config(), {'assume_finite': False})
+    assert_equal(get_config(), dict_false)
 
     # Not using as a context manager affects nothing
     config_context(assume_finite=True)
-    assert_equal(get_config(), {'assume_finite': False})
+    assert_equal(get_config(), dict_false)
 
     with config_context(assume_finite=True):
-        assert_equal(get_config(), {'assume_finite': True})
-    assert_equal(get_config(), {'assume_finite': False})
+        assert_equal(get_config(), dict_true)
+    assert_equal(get_config(), dict_false)
 
     with config_context(assume_finite=True):
         with config_context(assume_finite=None):
-            assert_equal(get_config(), {'assume_finite': True})
+            assert_equal(get_config(), dict_true)
 
-        assert_equal(get_config(), {'assume_finite': True})
+        assert_equal(get_config(), dict_true)
 
         with config_context(assume_finite=False):
-            assert_equal(get_config(), {'assume_finite': False})
+            assert_equal(get_config(), dict_false)
 
             with config_context(assume_finite=None):
-                assert_equal(get_config(), {'assume_finite': False})
+                assert_equal(get_config(), dict_false)
 
                 # global setting will not be retained outside of context that
                 # did not modify this setting
                 set_config(assume_finite=True)
-                assert_equal(get_config(), {'assume_finite': True})
+                assert_equal(get_config(), dict_true)
 
-            assert_equal(get_config(), {'assume_finite': False})
+            assert_equal(get_config(), dict_false)
 
-        assert_equal(get_config(), {'assume_finite': True})
+        assert_equal(get_config(), dict_true)
 
-    assert_equal(get_config(), {'assume_finite': False})
+    assert_equal(get_config(), dict_false)
 
     # No positional arguments
     assert_raises(TypeError, config_context, True)
@@ -43,26 +46,26 @@ def test_config_context():
 
 
 def test_config_context_exception():
-    assert_equal(get_config(), {'assume_finite': False})
+    assert_equal(get_config(), dict_false)
     try:
         with config_context(assume_finite=True):
-            assert_equal(get_config(), {'assume_finite': True})
+            assert_equal(get_config(), dict_true)
             raise ValueError()
     except ValueError:
         pass
-    assert_equal(get_config(), {'assume_finite': False})
+    assert_equal(get_config(), dict_false)
 
 
 def test_set_config():
-    assert_equal(get_config(), {'assume_finite': False})
+    assert_equal(get_config(), dict_false)
     set_config(assume_finite=None)
-    assert_equal(get_config(), {'assume_finite': False})
+    assert_equal(get_config(), dict_false)
     set_config(assume_finite=True)
-    assert_equal(get_config(), {'assume_finite': True})
+    assert_equal(get_config(), dict_true)
     set_config(assume_finite=None)
-    assert_equal(get_config(), {'assume_finite': True})
+    assert_equal(get_config(), dict_true)
     set_config(assume_finite=False)
-    assert_equal(get_config(), {'assume_finite': False})
+    assert_equal(get_config(), dict_false)
 
     # No unknown arguments
     assert_raises(TypeError, set_config, do_something_else=True)

From 85a32179c770e8f21fddc2dca5886b8098894a97 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 16:53:34 +0200
Subject: [PATCH 06/12] actually change in base

---
 sklearn/base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index ba3d118c8b029..5e6482946b960 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -286,7 +286,7 @@ def set_params(self, **params):
 
     def _changed_params(self):
         params = self.get_params(deep=False)
-        if get_config()['show_parameters'] == 'changed':
+        if not get_config()['show_default_parameters']:
             filtered_params = {}
             init_params = signature(self.__init__).parameters
             for k, v in params.items():

From 1503eadced2945509a4d26f39bfc42568b7debd1 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 17:04:42 +0200
Subject: [PATCH 07/12] interpret empty string, False and FALSE as False

---
 sklearn/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/__init__.py b/sklearn/__init__.py
index 5ea9ae5d0ffc7..284be69e4014a 100644
--- a/sklearn/__init__.py
+++ b/sklearn/__init__.py
@@ -21,7 +21,8 @@
 _CONFIG = {'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE',
                                                 False)),
            'show_default_parameters':
-           bool(os.environ.get('SKLEARN_SHOW_DEFAULT_PARAMETERS', True))
+               (os.environ.get('SKLEARN_SHOW_DEFAULT_PARAMETERS')
+                not in ["False", "FALSE", ""])
            }
 
 

From 9d699dd0a5741cd24969f3b924740844ffba2a4b Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 17:45:21 +0200
Subject: [PATCH 08/12] run doctests with new option

---
 Makefile                                      |  2 +-
 doc/modules/feature_extraction.rst            | 12 ++----
 doc/modules/gaussian_process.rst              |  8 +---
 doc/modules/kernel_approximation.rst          |  6 +--
 doc/modules/linear_model.rst                  | 24 ++++-------
 doc/modules/model_evaluation.rst              | 12 ++----
 doc/modules/model_persistence.rst             |  5 +--
 doc/modules/neighbors.rst                     |  2 +-
 doc/modules/neural_networks_supervised.rst    | 18 ++------
 doc/modules/pipeline.rst                      | 41 +++++++------------
 doc/modules/preprocessing.rst                 | 16 ++++----
 doc/modules/preprocessing_targets.rst         |  2 +-
 doc/modules/sgd.rst                           |  8 +---
 doc/modules/svm.rst                           | 23 +++--------
 doc/tutorial/basic/tutorial.rst               | 32 ++++-----------
 .../statistical_inference/model_selection.rst |  7 +---
 .../supervised_learning.rst                   | 20 +++------
 .../unsupervised_learning.rst                 | 13 +++---
 18 files changed, 73 insertions(+), 178 deletions(-)

diff --git a/Makefile b/Makefile
index aa6203f3cdbe7..360132dc6eecd 100644
--- a/Makefile
+++ b/Makefile
@@ -34,7 +34,7 @@ test-sphinxext:
 	$(NOSETESTS) -s -v doc/sphinxext/
 test-doc:
 ifeq ($(BITS),64)
-	$(NOSETESTS) -s -v doc/*.rst doc/modules/ doc/datasets/ \
+	SKLEARN_SHOW_DEFAULT_PARAMETERS=False $(NOSETESTS) -s -v doc/*.rst doc/modules/ doc/datasets/ \
 	doc/developers doc/tutorial/basic doc/tutorial/statistical_inference \
 	doc/tutorial/text_analytics
 endif
diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
index 32e53f0817e6e..e770ca2b36832 100644
--- a/doc/modules/feature_extraction.rst
+++ b/doc/modules/feature_extraction.rst
@@ -289,14 +289,9 @@ This model has many parameters, however the default values are quite
 reasonable (please see  the :ref:`reference documentation
 <text_feature_extraction_ref>` for the details)::
 
-  >>> vectorizer = CountVectorizer(min_df=1)
+  >>> vectorizer = CountVectorizer()
   >>> vectorizer                     # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-  CountVectorizer(analyzer=...'word', binary=False, decode_error=...'strict',
-          dtype=<... 'numpy.int64'>, encoding=...'utf-8', input=...'content',
-          lowercase=True, max_df=1.0, max_features=None, min_df=1,
-          ngram_range=(1, 1), preprocessor=None, stop_words=None,
-          strip_accents=None, token_pattern=...'(?u)\\b\\w\\w+\\b',
-          tokenizer=None, vocabulary=None)
+  CountVectorizer()
 
 Let's use it to tokenize and count the word occurrences of a minimalistic
 corpus of text documents::
@@ -440,8 +435,7 @@ class::
   >>> from sklearn.feature_extraction.text import TfidfTransformer
   >>> transformer = TfidfTransformer(smooth_idf=False)
   >>> transformer   # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-  TfidfTransformer(norm=...'l2', smooth_idf=False, sublinear_tf=False,
-                   use_idf=True)
+  TfidfTransformer(smooth_idf=False)
 
 Again please see the :ref:`reference documentation
 <text_feature_extraction_ref>` for the details on all the parameters.
diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst
index 7fae49349f342..1d072f5c074e3 100644
--- a/doc/modules/gaussian_process.rst
+++ b/doc/modules/gaussian_process.rst
@@ -643,12 +643,8 @@ parameters or alternatively it uses the given parameters.
     >>> x = np.atleast_2d(np.linspace(0, 10, 1000)).T
     >>> gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1)
     >>> gp.fit(X, y)  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    GaussianProcess(beta0=None, corr=<function squared_exponential at 0x...>,
-            normalize=True, nugget=array(2.22...-15),
-            optimizer='fmin_cobyla', random_start=1, random_state=...
-            regr=<function constant at 0x...>, storage_mode='full',
-            theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]),
-            thetaU=array([[ 0.1]]), verbose=False)
+    GaussianProcess(theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]),
+            thetaU=array([[ 0.1]]))
     >>> y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
 
 
diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst
index 72363faf66403..0e4d3fce9956d 100644
--- a/doc/modules/kernel_approximation.rst
+++ b/doc/modules/kernel_approximation.rst
@@ -61,11 +61,7 @@ a linear algorithm, for example a linear SVM::
     >>> X_features = rbf_feature.fit_transform(X)
     >>> clf = SGDClassifier()   # doctest: +NORMALIZE_WHITESPACE
     >>> clf.fit(X_features, y)
-    SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
-           eta0=0.0, fit_intercept=True, l1_ratio=0.15,
-           learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
-           penalty='l2', power_t=0.5, random_state=None, shuffle=True,
-           verbose=0, warm_start=False)
+    SGDClassifier()
     >>> clf.score(X_features, y)
     1.0
 
diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
index b3e82b56a48a2..7b77ef5a7a24e 100644
--- a/doc/modules/linear_model.rst
+++ b/doc/modules/linear_model.rst
@@ -45,7 +45,7 @@ and will store the coefficients :math:`w` of the linear model in its
     >>> from sklearn import linear_model
     >>> reg = linear_model.LinearRegression()
     >>> reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
-    LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
+    LinearRegression()
     >>> reg.coef_
     array([ 0.5,  0.5])
 
@@ -101,10 +101,9 @@ arrays X, y and will store the coefficients :math:`w` of the linear model in
 its ``coef_`` member::
 
     >>> from sklearn import linear_model
-    >>> reg = linear_model.Ridge (alpha = .5)
+    >>> reg = linear_model.Ridge(alpha=.5)
     >>> reg.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) # doctest: +NORMALIZE_WHITESPACE
-    Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
-          normalize=False, random_state=None, solver='auto', tol=0.001)
+    Ridge(alpha=0.5)
     >>> reg.coef_
     array([ 0.34545455,  0.34545455])
     >>> reg.intercept_ #doctest: +ELLIPSIS
@@ -140,8 +139,7 @@ as GridSearchCV except that it defaults to Generalized Cross-Validation
     >>> from sklearn import linear_model
     >>> reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
     >>> reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])       # doctest: +SKIP
-    RidgeCV(alphas=[0.1, 1.0, 10.0], cv=None, fit_intercept=True, scoring=None,
-        normalize=False)
+    RidgeCV(alphas=[0.1, 1.0, 10.0])
     >>> reg.alpha_                                      # doctest: +SKIP
     0.1
 
@@ -182,11 +180,9 @@ the algorithm to fit the coefficients. See :ref:`least_angle_regression`
 for another implementation::
 
     >>> from sklearn import linear_model
-    >>> reg = linear_model.Lasso(alpha = 0.1)
+    >>> reg = linear_model.Lasso(alpha=0.1)
     >>> reg.fit([[0, 0], [1, 1]], [0, 1])
-    Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
-       normalize=False, positive=False, precompute=False, random_state=None,
-       selection='cyclic', tol=0.0001, warm_start=False)
+    Lasso(alpha=0.1)
     >>> reg.predict([[1, 1]])
     array([ 0.8])
 
@@ -454,9 +450,7 @@ function of the norm of its coefficients.
    >>> from sklearn import linear_model
    >>> reg = linear_model.LassoLars(alpha=.1)
    >>> reg.fit([[0, 0], [1, 1]], [0, 1])  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-   LassoLars(alpha=0.1, copy_X=True, eps=..., fit_intercept=True,
-        fit_path=True, max_iter=500, normalize=True, positive=False,
-        precompute='auto', verbose=False)
+   LassoLars(alpha=0.1)
    >>> reg.coef_    # doctest: +ELLIPSIS
    array([ 0.717157...,  0.        ])
 
@@ -617,9 +611,7 @@ Bayesian Ridge Regression is used for regression::
     >>> Y = [0., 1., 2., 3.]
     >>> reg = linear_model.BayesianRidge()
     >>> reg.fit(X, Y)
-    BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
-           fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
-           normalize=False, tol=0.001, verbose=False)
+    BayesianRidge()
 
 After being fitted, the model can then be used to predict new values::
 
diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
index 7d65806acb807..64664f99c084b 100644
--- a/doc/modules/model_evaluation.rst
+++ b/doc/modules/model_evaluation.rst
@@ -802,10 +802,7 @@ with a svm classifier in a binary class problem::
   >>> y = [-1, 1]
   >>> est = svm.LinearSVC(random_state=0)
   >>> est.fit(X, y)
-  LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-       intercept_scaling=1, loss='squared_hinge', max_iter=1000,
-       multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
-       verbose=0)
+  LinearSVC(random_state=0)
   >>> pred_decision = est.decision_function([[-2], [3], [0.5]])
   >>> pred_decision  # doctest: +ELLIPSIS
   array([-2.18...,  2.36...,  0.09...])
@@ -820,10 +817,7 @@ with a svm classifier in a multiclass problem::
   >>> labels = np.array([0, 1, 2, 3])
   >>> est = svm.LinearSVC()
   >>> est.fit(X, Y)
-  LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-       intercept_scaling=1, loss='squared_hinge', max_iter=1000,
-       multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
-       verbose=0)
+  LinearSVC()
   >>> pred_decision = est.decision_function([[-1], [2], [3]])
   >>> y_true = [0, 2, 3]
   >>> hinge_loss(y_true, pred_decision, labels)  #doctest: +ELLIPSIS
@@ -1567,7 +1561,7 @@ Next, let's compare the accuracy of ``SVC`` and ``most_frequent``::
   0.63...
   >>> clf = DummyClassifier(strategy='most_frequent',random_state=0)
   >>> clf.fit(X_train, y_train)
-  DummyClassifier(constant=None, random_state=0, strategy='most_frequent')
+  DummyClassifier(random_state=0, strategy='most_frequent')
   >>> clf.score(X_test, y_test)  # doctest: +ELLIPSIS
   0.57...
 
diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst
index 5b83bc28a7b1e..729086c9c9197 100644
--- a/doc/modules/model_persistence.rst
+++ b/doc/modules/model_persistence.rst
@@ -22,10 +22,7 @@ persistence model, namely `pickle <https://docs.python.org/2/library/pickle.html
   >>> iris = datasets.load_iris()
   >>> X, y = iris.data, iris.target
   >>> clf.fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
-  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
-      max_iter=-1, probability=False, random_state=None, shrinking=True,
-      tol=0.001, verbose=False)
+  SVC()
 
   >>> import pickle
   >>> s = pickle.dumps(clf)
diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
index 1440c49403a5c..586e1314186db 100644
--- a/doc/modules/neighbors.rst
+++ b/doc/modules/neighbors.rst
@@ -478,7 +478,7 @@ for more complex methods that do not make this assumption. Usage of the default
     >>> y = np.array([1, 1, 1, 2, 2, 2])
     >>> clf = NearestCentroid()
     >>> clf.fit(X, y)
-    NearestCentroid(metric='euclidean', shrink_threshold=None)
+    NearestCentroid()
     >>> print(clf.predict([[-0.8, -1]]))
     [1]
 
diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst
index 292ed903eeffc..e07895b03cb79 100644
--- a/doc/modules/neural_networks_supervised.rst
+++ b/doc/modules/neural_networks_supervised.rst
@@ -90,13 +90,8 @@ training samples::
     ...                     hidden_layer_sizes=(5, 2), random_state=1)
     ...
     >>> clf.fit(X, y)                         # doctest: +NORMALIZE_WHITESPACE
-    MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto',
-           beta_1=0.9, beta_2=0.999, early_stopping=False,
-           epsilon=1e-08, hidden_layer_sizes=(5, 2), learning_rate='constant',
-           learning_rate_init=0.001, max_iter=200, momentum=0.9,
-           nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
-           solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
-           warm_start=False)
+    MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1,
+           solver='lbfgs')
 
 After fitting (training), the model can predict labels for new samples::
 
@@ -138,13 +133,8 @@ indices where the value is `1` represents the assigned classes of that sample::
     ...                     hidden_layer_sizes=(15,), random_state=1)
     ...
     >>> clf.fit(X, y)                         # doctest: +NORMALIZE_WHITESPACE
-    MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto',
-           beta_1=0.9, beta_2=0.999, early_stopping=False,
-           epsilon=1e-08, hidden_layer_sizes=(15,), learning_rate='constant',
-           learning_rate_init=0.001, max_iter=200, momentum=0.9,
-           nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
-           solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
-           warm_start=False)
+    MLPClassifier(alpha=1e-05, hidden_layer_sizes=(15,), random_state=1,
+           solver='lbfgs')
     >>> clf.predict([[1., 2.]])
     array([[1, 1]])
     >>> clf.predict([[0., 0.]])
diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst
index c90f35753fb00..2cd0f02273ce8 100644
--- a/doc/modules/pipeline.rst
+++ b/doc/modules/pipeline.rst
@@ -40,9 +40,8 @@ is an estimator object::
     >>> estimators = [('reduce_dim', PCA()), ('clf', SVC())]
     >>> pipe = Pipeline(estimators)
     >>> pipe # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    Pipeline(memory=None,
-             steps=[('reduce_dim', PCA(copy=True,...)),
-                    ('clf', SVC(C=1.0,...))])
+    Pipeline(steps=[('reduce_dim', PCA()),
+                    ('clf', SVC())])
 
 The utility function :func:`make_pipeline` is a shorthand
 for constructing pipelines;
@@ -53,31 +52,24 @@ filling in the names automatically::
     >>> from sklearn.naive_bayes import MultinomialNB
     >>> from sklearn.preprocessing import Binarizer
     >>> make_pipeline(Binarizer(), MultinomialNB()) # doctest: +NORMALIZE_WHITESPACE
-    Pipeline(memory=None,
-             steps=[('binarizer', Binarizer(copy=True, threshold=0.0)),
-                    ('multinomialnb', MultinomialNB(alpha=1.0,
-                                                    class_prior=None,
-                                                    fit_prior=True))])
+    Pipeline(steps=[('binarizer', Binarizer()),
+                    ('multinomialnb', MultinomialNB())])
 
 The estimators of a pipeline are stored as a list in the ``steps`` attribute::
 
     >>> pipe.steps[0]
-    ('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
-      svd_solver='auto', tol=0.0, whiten=False))
+    ('reduce_dim', PCA())
 
 and as a ``dict`` in ``named_steps``::
 
     >>> pipe.named_steps['reduce_dim']
-    PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
-      svd_solver='auto', tol=0.0, whiten=False)
+    PCA()
 
 Parameters of the estimators in the pipeline can be accessed using the
 ``<estimator>__<parameter>`` syntax::
 
     >>> pipe.set_params(clf__C=10) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    Pipeline(memory=None,
-             steps=[('reduce_dim', PCA(copy=True, iterated_power='auto',...)),
-                    ('clf', SVC(C=10, cache_size=200, class_weight=None,...))])
+    Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC(C=10))])
 
 Attributes of named_steps map to keys, enabling tab completion in interactive environments::
 
@@ -152,8 +144,8 @@ object::
     >>> pipe = Pipeline(estimators, memory=cachedir)
     >>> pipe # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
     Pipeline(...,
-             steps=[('reduce_dim', PCA(copy=True,...)),
-                    ('clf', SVC(C=1.0,...))])
+             steps=[('reduce_dim', PCA()),
+                    ('clf', SVC())])
     >>> # Clear the cache directory when you don't need it anymore
     >>> rmtree(cachedir)
 
@@ -169,8 +161,7 @@ object::
      >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)])
      >>> pipe.fit(digits.data, digits.target)
      ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-     Pipeline(memory=None,
-              steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))])
+     Pipeline(steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))])
      >>> # The pca instance can be inspected directly
      >>> print(pca1.components_) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
          [[ -1.77484909e-19  ... 4.07058917e-18]]
@@ -243,10 +234,8 @@ and ``value`` is an estimator object::
     >>> estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
     >>> combined = FeatureUnion(estimators)
     >>> combined # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    FeatureUnion(n_jobs=1,
-                 transformer_list=[('linear_pca', PCA(copy=True,...)),
-                                   ('kernel_pca', KernelPCA(alpha=1.0,...))],
-                 transformer_weights=None)
+    FeatureUnion(transformer_list=[('linear_pca', PCA()),
+                                   ('kernel_pca', KernelPCA())])
 
 
 Like pipelines, feature unions have a shorthand constructor called
@@ -258,10 +247,8 @@ and ignored by setting to ``None``::
 
     >>> combined.set_params(kernel_pca=None)
     ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    FeatureUnion(n_jobs=1,
-                 transformer_list=[('linear_pca', PCA(copy=True,...)),
-                                   ('kernel_pca', None)],
-                 transformer_weights=None)
+    FeatureUnion(transformer_list=[('linear_pca', PCA()),
+                                   ('kernel_pca', None)])
 
 .. topic:: Examples:
 
diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
index 709239687158e..b12c7d34ea425 100644
--- a/doc/modules/preprocessing.rst
+++ b/doc/modules/preprocessing.rst
@@ -73,7 +73,7 @@ This class is hence suitable for use in the early steps of a
 
   >>> scaler = preprocessing.StandardScaler().fit(X)
   >>> scaler
-  StandardScaler(copy=True, with_mean=True, with_std=True)
+  StandardScaler()
 
   >>> scaler.mean_                                      # doctest: +ELLIPSIS
   array([ 1. ...,  0. ...,  0.33...])
@@ -286,7 +286,7 @@ This class is hence suitable for use in the early steps of a
 
   >>> normalizer = preprocessing.Normalizer().fit(X)  # fit does nothing
   >>> normalizer
-  Normalizer(copy=True, norm='l2')
+  Normalizer()
 
 
 The normalizer instance can then be used on sample vectors as any transformer::
@@ -341,7 +341,7 @@ as each sample is treated independently of others::
 
   >>> binarizer = preprocessing.Binarizer().fit(X)  # fit does nothing
   >>> binarizer
-  Binarizer(copy=True, threshold=0.0)
+  Binarizer()
 
   >>> binarizer.transform(X)
   array([[ 1.,  0.,  1.],
@@ -398,8 +398,7 @@ Continuing the example above::
 
   >>> enc = preprocessing.OneHotEncoder()
   >>> enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]])  # doctest: +ELLIPSIS
-  OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>,
-         handle_unknown='error', n_values='auto', sparse=True)
+  OneHotEncoder()
   >>> enc.transform([[0, 1, 3]]).toarray()
   array([[ 1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.]])
 
@@ -418,8 +417,7 @@ features, one has to explicitly set ``n_values``. For example,
     >>> # Note that there are missing categorical values for the 2nd and 3rd
     >>> # features
     >>> enc.fit([[1, 2, 3], [0, 2, 0]])  # doctest: +ELLIPSIS
-    OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>,
-           handle_unknown='error', n_values=[2, 3, 4], sparse=True)
+    OneHotEncoder(n_values=[2, 3, 4])
     >>> enc.transform([[1, 0, 0]]).toarray()
     array([[ 0.,  1.,  1.,  0.,  0.,  1.,  0.,  0.,  0.]])
 
@@ -453,7 +451,7 @@ that contain the missing values::
     >>> from sklearn.preprocessing import Imputer
     >>> imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
     >>> imp.fit([[1, 2], [np.nan, 3], [7, 6]])
-    Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)
+    Imputer()
     >>> X = [[np.nan, 2], [6, np.nan], [7, 6]]
     >>> print(imp.transform(X))                           # doctest: +ELLIPSIS
     [[ 4.          2.        ]
@@ -466,7 +464,7 @@ The :class:`Imputer` class also supports sparse matrices::
     >>> X = sp.csc_matrix([[1, 2], [0, 3], [7, 6]])
     >>> imp = Imputer(missing_values=0, strategy='mean', axis=0)
     >>> imp.fit(X)
-    Imputer(axis=0, copy=True, missing_values=0, strategy='mean', verbose=0)
+    Imputer(missing_values=0)
     >>> X_test = sp.csc_matrix([[0, 2], [6, 0], [7, 6]])
     >>> print(imp.transform(X_test))                      # doctest: +ELLIPSIS
     [[ 4.          2.        ]
diff --git a/doc/modules/preprocessing_targets.rst b/doc/modules/preprocessing_targets.rst
index 88663a55fa0d4..5b8ccb192f04b 100644
--- a/doc/modules/preprocessing_targets.rst
+++ b/doc/modules/preprocessing_targets.rst
@@ -16,7 +16,7 @@ matrix from a list of multi-class labels::
     >>> from sklearn import preprocessing
     >>> lb = preprocessing.LabelBinarizer()
     >>> lb.fit([1, 2, 6, 4, 2])
-    LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
+    LabelBinarizer()
     >>> lb.classes_
     array([1, 2, 4, 6])
     >>> lb.transform([1, 6])
diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst
index e8febda201bf7..f23bf4fbdcecc 100644
--- a/doc/modules/sgd.rst
+++ b/doc/modules/sgd.rst
@@ -59,13 +59,9 @@ for the training samples::
     >>> from sklearn.linear_model import SGDClassifier
     >>> X = [[0., 0.], [1., 1.]]
     >>> y = [0, 1]
-    >>> clf = SGDClassifier(loss="hinge", penalty="l2")
+    >>> clf = SGDClassifier()
     >>> clf.fit(X, y)
-    SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
-           eta0=0.0, fit_intercept=True, l1_ratio=0.15,
-           learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
-           penalty='l2', power_t=0.5, random_state=None, shuffle=True,
-           verbose=0, warm_start=False)
+    SGDClassifier()
 
 
 After being fitted, the model can then be used to predict new values::
diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst
index 386865d3d0a8a..8f69f563a852c 100644
--- a/doc/modules/svm.rst
+++ b/doc/modules/svm.rst
@@ -77,10 +77,7 @@ n_features]`` holding the training samples, and an array y of class labels
     >>> y = [0, 1]
     >>> clf = svm.SVC()
     >>> clf.fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
-    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-        decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
-        max_iter=-1, probability=False, random_state=None, shrinking=True,
-        tol=0.001, verbose=False)
+    SVC()
 
 After being fitted, the model can then be used to predict new values::
 
@@ -121,10 +118,7 @@ n_classes)``::
     >>> Y = [0, 1, 2, 3]
     >>> clf = svm.SVC(decision_function_shape='ovo')
     >>> clf.fit(X, Y) # doctest: +NORMALIZE_WHITESPACE
-    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-        decision_function_shape='ovo', degree=3, gamma='auto', kernel='rbf',
-        max_iter=-1, probability=False, random_state=None, shrinking=True,
-        tol=0.001, verbose=False)
+    SVC(decision_function_shape='ovo')
     >>> dec = clf.decision_function([[1]])
     >>> dec.shape[1] # 4 classes: 4*3/2 = 6
     6
@@ -139,10 +133,7 @@ two classes, only one model is trained::
 
     >>> lin_clf = svm.LinearSVC()
     >>> lin_clf.fit(X, Y) # doctest: +NORMALIZE_WHITESPACE
-    LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-         intercept_scaling=1, loss='squared_hinge', max_iter=1000,
-         multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
-         verbose=0)
+    LinearSVC()
     >>> dec = lin_clf.decision_function([[1]])
     >>> dec.shape[1]
     4
@@ -319,8 +310,7 @@ floating point values instead of integer values::
     >>> y = [0.5, 2.5]
     >>> clf = svm.SVR()
     >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE
-    SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto',
-        kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)
+    SVR()
     >>> clf.predict([[1, 1]])
     array([ 1.5])
 
@@ -520,10 +510,7 @@ test vectors must be provided.
     >>> # linear kernel computation
     >>> gram = np.dot(X, X.T)
     >>> clf.fit(gram, y) # doctest: +NORMALIZE_WHITESPACE
-    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-        decision_function_shape='ovr', degree=3, gamma='auto',
-        kernel='precomputed', max_iter=-1, probability=False,
-        random_state=None, shrinking=True, tol=0.001, verbose=False)
+    SVC(kernel='precomputed')
     >>> # predict on training examples
     >>> clf.predict(gram)
     array([0, 1])
diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst
index 89600953a870f..d090671b52f33 100644
--- a/doc/tutorial/basic/tutorial.rst
+++ b/doc/tutorial/basic/tutorial.rst
@@ -179,10 +179,7 @@ which produces a new array that contains all but
 the last entry of ``digits.data``::
 
   >>> clf.fit(digits.data[:-1], digits.target[:-1])  # doctest: +NORMALIZE_WHITESPACE
-  SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
-    max_iter=-1, probability=False, random_state=None, shrinking=True,
-    tol=0.001, verbose=False)
+  SVC(C=100.0, gamma=0.001)
 
 Now you can predict new values, in particular, we can ask to the
 classifier what is the digit of our last image in the ``digits`` dataset,
@@ -218,10 +215,7 @@ persistence model, namely `pickle <https://docs.python.org/2/library/pickle.html
   >>> iris = datasets.load_iris()
   >>> X, y = iris.data, iris.target
   >>> clf.fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
-  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
-    max_iter=-1, probability=False, random_state=None, shrinking=True,
-    tol=0.001, verbose=False)
+  SVC()
 
   >>> import pickle
   >>> s = pickle.dumps(clf)
@@ -292,19 +286,13 @@ maintained::
     >>> iris = datasets.load_iris()
     >>> clf = SVC()
     >>> clf.fit(iris.data, iris.target)  # doctest: +NORMALIZE_WHITESPACE
-    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
-      max_iter=-1, probability=False, random_state=None, shrinking=True,
-      tol=0.001, verbose=False)
+    SVC()
 
     >>> list(clf.predict(iris.data[:3]))
     [0, 0, 0]
 
     >>> clf.fit(iris.data, iris.target_names[iris.target])  # doctest: +NORMALIZE_WHITESPACE
-    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
-      max_iter=-1, probability=False, random_state=None, shrinking=True,
-      tol=0.001, verbose=False)
+    SVC()
 
     >>> list(clf.predict(iris.data[:3]))  # doctest: +NORMALIZE_WHITESPACE
     ['setosa', 'setosa', 'setosa']
@@ -330,18 +318,12 @@ more than once will overwrite what was learned by any previous ``fit()``::
 
   >>> clf = SVC()
   >>> clf.set_params(kernel='linear').fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
-  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
-    max_iter=-1, probability=False, random_state=None, shrinking=True,
-    tol=0.001, verbose=False)
+  SVC(kernel='linear')
   >>> clf.predict(X_test)
   array([1, 0, 1, 1, 0])
 
   >>> clf.set_params(kernel='rbf').fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
-  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-    decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
-    max_iter=-1, probability=False, random_state=None, shrinking=True,
-    tol=0.001, verbose=False)
+  SVC()
   >>> clf.predict(X_test)
   array([0, 0, 0, 1, 0])
 
@@ -401,4 +383,4 @@ is similarly possible for an instance to be assigned multiple labels::
 In this case, the classifier is fit upon instances each assigned multiple labels.
 The :class:`MultiLabelBinarizer <sklearn.preprocessing.MultiLabelBinarizer>` is
 used to binarize the 2d array of multilabels to ``fit`` upon. As a result,
-``predict()`` returns a 2d array with multiple predicted labels for each instance.
\ No newline at end of file
+``predict()`` returns a 2d array with multiple predicted labels for each instance.
diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst
index 315ca420e4d19..98e10e14bd1bc 100644
--- a/doc/tutorial/statistical_inference/model_selection.rst
+++ b/doc/tutorial/statistical_inference/model_selection.rst
@@ -216,7 +216,7 @@ estimator during the construction and exposes an estimator API::
     >>> clf = GridSearchCV(estimator=svc, param_grid=dict(C=Cs),
     ...                    n_jobs=-1)
     >>> clf.fit(X_digits[:1000], y_digits[:1000])        # doctest: +ELLIPSIS
-    GridSearchCV(cv=None,...
+    GridSearchCV(...
     >>> clf.best_score_                                  # doctest: +ELLIPSIS
     0.925...
     >>> clf.best_estimator_.C                            # doctest: +ELLIPSIS
@@ -266,10 +266,7 @@ parameter automatically by cross-validation::
     >>> X_diabetes = diabetes.data
     >>> y_diabetes = diabetes.target
     >>> lasso.fit(X_diabetes, y_diabetes)
-    LassoCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True,
-        max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False,
-        precompute='auto', random_state=None, selection='cyclic', tol=0.0001,
-        verbose=False)
+    LassoCV()
     >>> # The estimator chose automatically its lambda:
     >>> lasso.alpha_ # doctest: +ELLIPSIS
     0.01229...
diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst
index e5342c5cad64a..3e44e7197352b 100644
--- a/doc/tutorial/statistical_inference/supervised_learning.rst
+++ b/doc/tutorial/statistical_inference/supervised_learning.rst
@@ -94,9 +94,7 @@ Scikit-learn documentation for more information about this type of classifier.)
     >>> from sklearn.neighbors import KNeighborsClassifier
     >>> knn = KNeighborsClassifier()
     >>> knn.fit(iris_X_train, iris_y_train) # doctest: +NORMALIZE_WHITESPACE
-    KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
-               metric_params=None, n_jobs=1, n_neighbors=5, p=2,
-               weights='uniform')
+    KNeighborsClassifier()
     >>> knn.predict(iris_X_test)
     array([1, 2, 1, 0, 0, 0, 2, 1, 2, 0])
     >>> iris_y_test
@@ -176,7 +174,7 @@ Linear models: :math:`y = X\beta + \epsilon`
     >>> from sklearn import linear_model
     >>> regr = linear_model.LinearRegression()
     >>> regr.fit(diabetes_X_train, diabetes_y_train)
-    LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
+    LinearRegression()
     >>> print(regr.coef_)
     [   0.30349955 -237.63931533  510.53060544  327.73698041 -814.13170937
       492.81458798  102.84845219  184.60648906  743.51961675   76.09517222]
@@ -327,9 +325,7 @@ application of Occam's razor: *prefer simpler models*.
     >>> best_alpha = alphas[scores.index(max(scores))]
     >>> regr.alpha = best_alpha
     >>> regr.fit(diabetes_X_train, diabetes_y_train)
-    Lasso(alpha=0.025118864315095794, copy_X=True, fit_intercept=True,
-       max_iter=1000, normalize=False, positive=False, precompute=False,
-       random_state=None, selection='cyclic', tol=0.0001, warm_start=False)
+    Lasso(alpha=0.025118864315095794)
     >>> print(regr.coef_)
     [   0.         -212.43764548  517.19478111  313.77959962 -160.8303982    -0.
      -187.19554705   69.38229038  508.66011217   71.84239008]
@@ -370,10 +366,7 @@ function or **logistic** function:
 
     >>> logistic = linear_model.LogisticRegression(C=1e5)
     >>> logistic.fit(iris_X_train, iris_y_train)
-    LogisticRegression(C=100000.0, class_weight=None, dual=False,
-              fit_intercept=True, intercept_scaling=1, max_iter=100,
-              multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
-              solver='liblinear', tol=0.0001, verbose=0, warm_start=False)
+    LogisticRegression(C=100000.0)
 
 This is known as :class:`LogisticRegression`.
 
@@ -454,10 +447,7 @@ classification --:class:`SVC` (Support Vector Classification).
     >>> from sklearn import svm
     >>> svc = svm.SVC(kernel='linear')
     >>> svc.fit(iris_X_train, iris_y_train)    # doctest: +NORMALIZE_WHITESPACE
-    SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-        decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
-        max_iter=-1, probability=False, random_state=None, shrinking=True,
-        tol=0.001, verbose=False)
+    SVC(kernel='linear')
 
 
 .. warning:: **Normalizing data**
diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst
index be32fabd96cb8..b311006260880 100644
--- a/doc/tutorial/statistical_inference/unsupervised_learning.rst
+++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst
@@ -38,8 +38,8 @@ algorithms. The simplest clustering algorithm is
     >>> y_iris = iris.target
 
     >>> k_means = cluster.KMeans(n_clusters=3)
-    >>> k_means.fit(X_iris) # doctest: +ELLIPSIS
-    KMeans(algorithm='auto', copy_x=True, init='k-means++', ...
+    >>> k_means.fit(X_iris)
+    KMeans(n_clusters=3)
     >>> print(k_means.labels_[::10])
     [1 1 1 1 1 0 0 0 0 0 2 2 2 2 2]
     >>> print(y_iris[::10])
@@ -117,8 +117,8 @@ algorithms. The simplest clustering algorithm is
         ...    face = misc.face(gray=True)
     	>>> X = face.reshape((-1, 1)) # We need an (n_sample, n_feature) array
     	>>> k_means = cluster.KMeans(n_clusters=5, n_init=1)
-    	>>> k_means.fit(X) # doctest: +ELLIPSIS
-    	KMeans(algorithm='auto', copy_x=True, init='k-means++', ...
+    	>>> k_means.fit(X)
+    	KMeans(n_clusters=5, n_init=1)
     	>>> values = k_means.cluster_centers_.squeeze()
     	>>> labels = k_means.labels_
     	>>> face_compressed = np.choose(labels, values)
@@ -215,7 +215,7 @@ transposed data.
    >>> agglo = cluster.FeatureAgglomeration(connectivity=connectivity,
    ...                                      n_clusters=32)
    >>> agglo.fit(X) # doctest: +ELLIPSIS
-   FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',...
+   FeatureAgglomeration(connectivity=...
    >>> X_reduced = agglo.transform(X)
 
    >>> X_approx = agglo.inverse_transform(X_reduced)
@@ -275,8 +275,7 @@ data by projecting on a principal subspace.
     >>> from sklearn import decomposition
     >>> pca = decomposition.PCA()
     >>> pca.fit(X)
-    PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
-      svd_solver='auto', tol=0.0, whiten=False)
+    PCA()
     >>> print(pca.explained_variance_)  # doctest: +SKIP
     [  2.18565811e+00   1.19346747e+00   8.43026679e-32]
 

From 42eddd22f1e73bbe85ad11e56b0b059f86c338b2 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 17:53:56 +0200
Subject: [PATCH 09/12] fix repr for deprecated classes

---
 sklearn/base.py            |  3 ++-
 sklearn/tests/test_base.py | 15 +++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/sklearn/base.py b/sklearn/base.py
index 5e6482946b960..284d77b1d6be5 100644
--- a/sklearn/base.py
+++ b/sklearn/base.py
@@ -288,7 +288,8 @@ def _changed_params(self):
         params = self.get_params(deep=False)
         if not get_config()['show_default_parameters']:
             filtered_params = {}
-            init_params = signature(self.__init__).parameters
+            init = getattr(self.__init__, 'deprecated_original', self.__init__)
+            init_params = signature(init).parameters
             for k, v in params.items():
                 if v != init_params[k].default:
                     filtered_params[k] = v
diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py
index bbb53fd7eb1b2..45100368f3f26 100644
--- a/sklearn/tests/test_base.py
+++ b/sklearn/tests/test_base.py
@@ -61,6 +61,11 @@ def __init__(self, a=np.array([0])):
         self.a = a.copy()
 
 
+@deprecated("This estimator is deprecated")
+class DeprecatedEstimator(T):
+    pass
+
+
 class DeprecatedAttributeEstimator(BaseEstimator):
     def __init__(self, a=None, b=None):
         self.a = a
@@ -205,6 +210,16 @@ def test_repr():
         assert_equal(repr(test), "T(a=K(), b=K())")
 
 
+@ignore_warnings(category=DeprecationWarning)
+def test_short_repr_deprecated():
+    with sklearn.config_context(show_default_parameters=False):
+        est = DeprecatedEstimator()
+        assert_equal(repr(est), "DeprecatedEstimator()")
+
+        est = DeprecatedEstimator(a='c')
+        assert_equal(repr(est), "DeprecatedEstimator(a='c')")
+
+
 def test_str():
     # Smoke test the str of the base estimator
     my_estimator = MyEstimator()

From 484190d04d0bcbb5f8641f0fca55bb6be7b8fd84 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 17:57:54 +0200
Subject: [PATCH 10/12] minor fix for deprecated DP repr

---
 doc/modules/gaussian_process.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst
index 1d072f5c074e3..0588864b6d1c3 100644
--- a/doc/modules/gaussian_process.rst
+++ b/doc/modules/gaussian_process.rst
@@ -643,7 +643,7 @@ parameters or alternatively it uses the given parameters.
     >>> x = np.atleast_2d(np.linspace(0, 10, 1000)).T
     >>> gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1)
     >>> gp.fit(X, y)  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    GaussianProcess(theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]),
+    GaussianProcess(...theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]),
             thetaU=array([[ 0.1]]))
     >>> y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
 

From 4f2059a95b9c7a17255761925fa9f72f93e63504 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 18:02:06 +0200
Subject: [PATCH 11/12] added whatsnew entry

---
 doc/whats_new.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/doc/whats_new.rst b/doc/whats_new.rst
index e7bb058b3c69b..13a033d9ac7fc 100644
--- a/doc/whats_new.rst
+++ b/doc/whats_new.rst
@@ -65,6 +65,11 @@ New features
 Enhancements
 ............
 
+   - Simplified string representations (``repr``) of all estimators which
+     can be enabled via :func:`sklearn.set_config`. The simplified
+     representation only shows parameters with settings that differ
+     from the default parameter settings. :issue:`9039` by `Andreas Müller`_.
+
    - Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in
      documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986`
      :user:`Oscar Najera <Titan-C>`

From 5bbe0ed499eb626ebb98bdcb0dcf75173b905940 Mon Sep 17 00:00:00 2001
From: Andreas Mueller <t3kcit@gmail.com>
Date: Thu, 8 Jun 2017 20:52:49 +0200
Subject: [PATCH 12/12] add simple repr to common tests

---
 sklearn/utils/estimator_checks.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5c8c0e90c94c0..88eb4e685fe7c 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -11,6 +11,7 @@
 from scipy.stats import rankdata
 import struct
 
+import sklearn
 from sklearn.externals.six.moves import zip
 from sklearn.externals.joblib import hash, Memory
 from sklearn.utils.testing import assert_raises
@@ -1553,6 +1554,8 @@ def check_parameters_default_constructible(name, Estimator):
         clone(estimator)
         # test __repr__
         repr(estimator)
+        with sklearn.config_context(show_default_parameters=False):
+            repr(estimator)
         # test that set_params returns self
         assert_true(estimator.set_params() is estimator)