From 9f13329971dab61ff1b3a15587e4340cfd9b7903 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 7 Jun 2017 14:53:18 +0200 Subject: [PATCH 01/12] work on fancy repr --- sklearn/base.py | 22 +++++++++++++++++----- sklearn/model_selection/_search.py | 12 ++++++------ 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index 119696f5b3722..a8c038b8cd01f 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -121,7 +121,7 @@ def clone(estimator, safe=True): ############################################################################### -def _pprint(params, offset=0, printer=repr): +def _pprint(params, offset=0, printer=repr, cutoff=500): """Pretty print the dictionary 'params' Parameters @@ -150,9 +150,9 @@ def _pprint(params, offset=0, printer=repr): # architectures and versions. this_repr = '%s=%s' % (k, str(v)) else: - # use repr of the rest + # use printer of the rest this_repr = '%s=%s' % (k, printer(v)) - if len(this_repr) > 500: + if cutoff is not None and len(this_repr) > cutoff: this_repr = this_repr[:300] + '...' + this_repr[-100:] if i > 0: if (this_line_length + len(this_repr) >= 75 or '\n' in this_repr): @@ -284,9 +284,22 @@ def set_params(self, **params): setattr(self, key, value) return self + def _changed_params(self): + params = self.get_params(deep=False) + filtered_params = {} + default_params = {} + init_params = signature(self.__init__).parameters + for k, v in params.items(): + if v == init_params[k].default: + default_params[k] = v + else: + filtered_params[k] = v + return filtered_params, default_params + def __repr__(self): class_name = self.__class__.__name__ - return '%s(%s)' % (class_name, _pprint(self.get_params(deep=False), + params = self.get_params(deep=False) + return '%s(%s)' % (class_name, _pprint(params, offset=len(class_name),),) def __getstate__(self): @@ -316,7 +329,6 @@ def __setstate__(self, state): self.__dict__.update(state) - ############################################################################### class ClassifierMixin(object): """Mixin class for all classifiers in scikit-learn.""" diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 10d0b3171992b..452aea09772c1 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -386,7 +386,7 @@ def __init__(self, estimator, scoring=None, self.scoring = scoring self.estimator = estimator self.n_jobs = n_jobs - self.fit_params = fit_params if fit_params is not None else {} + self.fit_params = fit_params self.iid = iid self.refit = refit self.cv = cv @@ -836,7 +836,7 @@ class GridSearchCV(BaseSearchCV): kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=..., verbose=False), - fit_params={}, iid=..., n_jobs=1, + fit_params=None, iid=..., n_jobs=1, param_grid=..., pre_dispatch=..., refit=..., return_train_score=..., scoring=..., verbose=...) >>> sorted(clf.cv_results_.keys()) @@ -1196,10 +1196,10 @@ def __init__(self, estimator, param_distributions, n_iter=10, scoring=None, self.n_iter = n_iter self.random_state = random_state super(RandomizedSearchCV, self).__init__( - estimator=estimator, scoring=scoring, fit_params=fit_params, - n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, - pre_dispatch=pre_dispatch, error_score=error_score, - return_train_score=return_train_score) + estimator=estimator, scoring=scoring, fit_params=fit_params, + n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose, + pre_dispatch=pre_dispatch, error_score=error_score, + return_train_score=return_train_score) def _get_param_iterator(self): """Return ParameterSampler instance for the given distributions""" From e3b662e7cb627561cc5bf3f4c2000208d2e866d3 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Wed, 7 Jun 2017 14:58:32 +0200 Subject: [PATCH 02/12] add printoptions --- sklearn/base.py | 46 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/sklearn/base.py b/sklearn/base.py index a8c038b8cd01f..e0ed54f2bf531 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -13,6 +13,33 @@ from . import __version__ +_PRINTOPTIONS = {'parameters': 'all'} + + +def set_print(parameters=None): + """Set estimator print options. + + WARNING: This functionality is experimental and might be removed or changed + at any time. + + Parameters + ---------- + parameters : None, 'all' or 'changed', default=None + Which parameters to show when printing estimators. + If None, this setting is not changed, if 'all', + all parameters are shown, if 'changed', only the + parameters that are not at their default value are shown. + + Returns + ------- + printoptions : dict + Current print options. + """ + if parameters is not None: + _PRINTOPTIONS['parameters'] = parameters + return _PRINTOPTIONS + + ############################################################################## def _first_and_last_element(arr): """Returns first and last element of numpy array or sparse matrix.""" @@ -286,19 +313,18 @@ def set_params(self, **params): def _changed_params(self): params = self.get_params(deep=False) - filtered_params = {} - default_params = {} - init_params = signature(self.__init__).parameters - for k, v in params.items(): - if v == init_params[k].default: - default_params[k] = v - else: - filtered_params[k] = v - return filtered_params, default_params + if _PRINTOPTIONS['parameters'] == 'changed': + filtered_params = {} + init_params = signature(self.__init__).parameters + for k, v in params.items(): + if v != init_params[k].default: + filtered_params[k] = v + return filtered_params + return params def __repr__(self): class_name = self.__class__.__name__ - params = self.get_params(deep=False) + params = self._changed_params() return '%s(%s)' % (class_name, _pprint(params, offset=len(class_name),),) From 00a10d81594330f44e61cfa13af8cf9061dab93a Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 16:32:22 +0200 Subject: [PATCH 03/12] use global option mechanism to change repr --- sklearn/__init__.py | 19 ++++++++++++++----- sklearn/base.py | 31 ++----------------------------- 2 files changed, 16 insertions(+), 34 deletions(-) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index b4916dd5925de..8b510840c1793 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -18,7 +18,11 @@ import os from contextlib import contextmanager as _contextmanager -_ASSUME_FINITE = bool(os.environ.get('SKLEARN_ASSUME_FINITE', False)) +_CONFIG = {'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE', + False)), + 'show_parameters': bool(os.environ.get('SKLEARN_SHOW_PARAMETERS', + 'all')) + } def get_config(): @@ -29,10 +33,10 @@ def get_config(): config : dict Keys are parameter names that can be passed to :func:`set_config`. """ - return {'assume_finite': _ASSUME_FINITE} + return _CONFIG -def set_config(assume_finite=None): +def set_config(assume_finite=None, show_parameters=None): """Set global scikit-learn configuration Parameters @@ -42,10 +46,15 @@ def set_config(assume_finite=None): saving time, but leading to potential crashes. If False, validation for finiteness will be performed, avoiding error. + show_parameters : str, 'all' or 'changed' + Whether to include all estimator parameters in the + string representation or only the changed ones. """ - global _ASSUME_FINITE + global _CONFIG if assume_finite is not None: - _ASSUME_FINITE = assume_finite + _CONFIG.update(assume_finite=assume_finite) + if show_parameters is not None: + _CONFIG.update(show_parmeters=show_parameters) @_contextmanager diff --git a/sklearn/base.py b/sklearn/base.py index e0ed54f2bf531..ba3d118c8b029 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -10,34 +10,7 @@ from scipy import sparse from .externals import six from .utils.fixes import signature -from . import __version__ - - -_PRINTOPTIONS = {'parameters': 'all'} - - -def set_print(parameters=None): - """Set estimator print options. - - WARNING: This functionality is experimental and might be removed or changed - at any time. - - Parameters - ---------- - parameters : None, 'all' or 'changed', default=None - Which parameters to show when printing estimators. - If None, this setting is not changed, if 'all', - all parameters are shown, if 'changed', only the - parameters that are not at their default value are shown. - - Returns - ------- - printoptions : dict - Current print options. - """ - if parameters is not None: - _PRINTOPTIONS['parameters'] = parameters - return _PRINTOPTIONS +from . import __version__, get_config ############################################################################## @@ -313,7 +286,7 @@ def set_params(self, **params): def _changed_params(self): params = self.get_params(deep=False) - if _PRINTOPTIONS['parameters'] == 'changed': + if get_config()['show_parameters'] == 'changed': filtered_params = {} init_params = signature(self.__init__).parameters for k, v in params.items(): From 819827359805255e63a28b33dc4534ef2c42b6e7 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 16:39:15 +0200 Subject: [PATCH 04/12] typo --- sklearn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 8b510840c1793..51c280639e120 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -54,7 +54,7 @@ def set_config(assume_finite=None, show_parameters=None): if assume_finite is not None: _CONFIG.update(assume_finite=assume_finite) if show_parameters is not None: - _CONFIG.update(show_parmeters=show_parameters) + _CONFIG.update(show_parameters=show_parameters) @_contextmanager From 63501914481d6ef59aceec39b15d65b42421799a Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 16:52:16 +0200 Subject: [PATCH 05/12] boolean option is neater, add test --- sklearn/__init__.py | 12 +++++----- sklearn/tests/test_base.py | 3 +++ sklearn/tests/test_config.py | 43 +++++++++++++++++++----------------- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 51c280639e120..5ea9ae5d0ffc7 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -20,8 +20,8 @@ _CONFIG = {'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE', False)), - 'show_parameters': bool(os.environ.get('SKLEARN_SHOW_PARAMETERS', - 'all')) + 'show_default_parameters': + bool(os.environ.get('SKLEARN_SHOW_DEFAULT_PARAMETERS', True)) } @@ -36,7 +36,7 @@ def get_config(): return _CONFIG -def set_config(assume_finite=None, show_parameters=None): +def set_config(assume_finite=None, show_default_parameters=None): """Set global scikit-learn configuration Parameters @@ -46,15 +46,15 @@ def set_config(assume_finite=None, show_parameters=None): saving time, but leading to potential crashes. If False, validation for finiteness will be performed, avoiding error. - show_parameters : str, 'all' or 'changed' + show_default_parameters : bool, optional Whether to include all estimator parameters in the string representation or only the changed ones. """ global _CONFIG if assume_finite is not None: _CONFIG.update(assume_finite=assume_finite) - if show_parameters is not None: - _CONFIG.update(show_parameters=show_parameters) + if show_default_parameters is not None: + _CONFIG.update(show_default_parameters=show_default_parameters) @_contextmanager diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index 8112e7fd8196b..bbb53fd7eb1b2 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -201,6 +201,9 @@ def test_repr(): some_est = T(a=["long_params"] * 1000) assert_equal(len(repr(some_est)), 415) + with sklearn.config_context(show_default_parameters=False): + assert_equal(repr(test), "T(a=K(), b=K())") + def test_str(): # Smoke test the str of the base estimator diff --git a/sklearn/tests/test_config.py b/sklearn/tests/test_config.py index b968e7b7917ea..0c790226210db 100644 --- a/sklearn/tests/test_config.py +++ b/sklearn/tests/test_config.py @@ -1,40 +1,43 @@ from sklearn import get_config, set_config, config_context from sklearn.utils.testing import assert_equal, assert_raises +dict_true = {'assume_finite': True, 'show_default_parameters': True} +dict_false = {'assume_finite': False, 'show_default_parameters': True} + def test_config_context(): - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) # Not using as a context manager affects nothing config_context(assume_finite=True) - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) with config_context(assume_finite=True): - assert_equal(get_config(), {'assume_finite': True}) - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_true) + assert_equal(get_config(), dict_false) with config_context(assume_finite=True): with config_context(assume_finite=None): - assert_equal(get_config(), {'assume_finite': True}) + assert_equal(get_config(), dict_true) - assert_equal(get_config(), {'assume_finite': True}) + assert_equal(get_config(), dict_true) with config_context(assume_finite=False): - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) with config_context(assume_finite=None): - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) # global setting will not be retained outside of context that # did not modify this setting set_config(assume_finite=True) - assert_equal(get_config(), {'assume_finite': True}) + assert_equal(get_config(), dict_true) - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) - assert_equal(get_config(), {'assume_finite': True}) + assert_equal(get_config(), dict_true) - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) # No positional arguments assert_raises(TypeError, config_context, True) @@ -43,26 +46,26 @@ def test_config_context(): def test_config_context_exception(): - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) try: with config_context(assume_finite=True): - assert_equal(get_config(), {'assume_finite': True}) + assert_equal(get_config(), dict_true) raise ValueError() except ValueError: pass - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) def test_set_config(): - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) set_config(assume_finite=None) - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) set_config(assume_finite=True) - assert_equal(get_config(), {'assume_finite': True}) + assert_equal(get_config(), dict_true) set_config(assume_finite=None) - assert_equal(get_config(), {'assume_finite': True}) + assert_equal(get_config(), dict_true) set_config(assume_finite=False) - assert_equal(get_config(), {'assume_finite': False}) + assert_equal(get_config(), dict_false) # No unknown arguments assert_raises(TypeError, set_config, do_something_else=True) From 85a32179c770e8f21fddc2dca5886b8098894a97 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 16:53:34 +0200 Subject: [PATCH 06/12] actually change in base --- sklearn/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/base.py b/sklearn/base.py index ba3d118c8b029..5e6482946b960 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -286,7 +286,7 @@ def set_params(self, **params): def _changed_params(self): params = self.get_params(deep=False) - if get_config()['show_parameters'] == 'changed': + if not get_config()['show_default_parameters']: filtered_params = {} init_params = signature(self.__init__).parameters for k, v in params.items(): From 1503eadced2945509a4d26f39bfc42568b7debd1 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 17:04:42 +0200 Subject: [PATCH 07/12] interpret empty string, False and FALSE as False --- sklearn/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/__init__.py b/sklearn/__init__.py index 5ea9ae5d0ffc7..284be69e4014a 100644 --- a/sklearn/__init__.py +++ b/sklearn/__init__.py @@ -21,7 +21,8 @@ _CONFIG = {'assume_finite': bool(os.environ.get('SKLEARN_ASSUME_FINITE', False)), 'show_default_parameters': - bool(os.environ.get('SKLEARN_SHOW_DEFAULT_PARAMETERS', True)) + (os.environ.get('SKLEARN_SHOW_DEFAULT_PARAMETERS') + not in ["False", "FALSE", ""]) } From 9d699dd0a5741cd24969f3b924740844ffba2a4b Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 17:45:21 +0200 Subject: [PATCH 08/12] run doctests with new option --- Makefile | 2 +- doc/modules/feature_extraction.rst | 12 ++---- doc/modules/gaussian_process.rst | 8 +--- doc/modules/kernel_approximation.rst | 6 +-- doc/modules/linear_model.rst | 24 ++++------- doc/modules/model_evaluation.rst | 12 ++---- doc/modules/model_persistence.rst | 5 +-- doc/modules/neighbors.rst | 2 +- doc/modules/neural_networks_supervised.rst | 18 ++------ doc/modules/pipeline.rst | 41 +++++++------------ doc/modules/preprocessing.rst | 16 ++++---- doc/modules/preprocessing_targets.rst | 2 +- doc/modules/sgd.rst | 8 +--- doc/modules/svm.rst | 23 +++-------- doc/tutorial/basic/tutorial.rst | 32 ++++----------- .../statistical_inference/model_selection.rst | 7 +--- .../supervised_learning.rst | 20 +++------ .../unsupervised_learning.rst | 13 +++--- 18 files changed, 73 insertions(+), 178 deletions(-) diff --git a/Makefile b/Makefile index aa6203f3cdbe7..360132dc6eecd 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ test-sphinxext: $(NOSETESTS) -s -v doc/sphinxext/ test-doc: ifeq ($(BITS),64) - $(NOSETESTS) -s -v doc/*.rst doc/modules/ doc/datasets/ \ + SKLEARN_SHOW_DEFAULT_PARAMETERS=False $(NOSETESTS) -s -v doc/*.rst doc/modules/ doc/datasets/ \ doc/developers doc/tutorial/basic doc/tutorial/statistical_inference \ doc/tutorial/text_analytics endif diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst index 32e53f0817e6e..e770ca2b36832 100644 --- a/doc/modules/feature_extraction.rst +++ b/doc/modules/feature_extraction.rst @@ -289,14 +289,9 @@ This model has many parameters, however the default values are quite reasonable (please see the :ref:`reference documentation ` for the details):: - >>> vectorizer = CountVectorizer(min_df=1) + >>> vectorizer = CountVectorizer() >>> vectorizer # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS - CountVectorizer(analyzer=...'word', binary=False, decode_error=...'strict', - dtype=<... 'numpy.int64'>, encoding=...'utf-8', input=...'content', - lowercase=True, max_df=1.0, max_features=None, min_df=1, - ngram_range=(1, 1), preprocessor=None, stop_words=None, - strip_accents=None, token_pattern=...'(?u)\\b\\w\\w+\\b', - tokenizer=None, vocabulary=None) + CountVectorizer() Let's use it to tokenize and count the word occurrences of a minimalistic corpus of text documents:: @@ -440,8 +435,7 @@ class:: >>> from sklearn.feature_extraction.text import TfidfTransformer >>> transformer = TfidfTransformer(smooth_idf=False) >>> transformer # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS - TfidfTransformer(norm=...'l2', smooth_idf=False, sublinear_tf=False, - use_idf=True) + TfidfTransformer(smooth_idf=False) Again please see the :ref:`reference documentation ` for the details on all the parameters. diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst index 7fae49349f342..1d072f5c074e3 100644 --- a/doc/modules/gaussian_process.rst +++ b/doc/modules/gaussian_process.rst @@ -643,12 +643,8 @@ parameters or alternatively it uses the given parameters. >>> x = np.atleast_2d(np.linspace(0, 10, 1000)).T >>> gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1) >>> gp.fit(X, y) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - GaussianProcess(beta0=None, corr=, - normalize=True, nugget=array(2.22...-15), - optimizer='fmin_cobyla', random_start=1, random_state=... - regr=, storage_mode='full', - theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]), - thetaU=array([[ 0.1]]), verbose=False) + GaussianProcess(theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]), + thetaU=array([[ 0.1]])) >>> y_pred, sigma2_pred = gp.predict(x, eval_MSE=True) diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst index 72363faf66403..0e4d3fce9956d 100644 --- a/doc/modules/kernel_approximation.rst +++ b/doc/modules/kernel_approximation.rst @@ -61,11 +61,7 @@ a linear algorithm, for example a linear SVM:: >>> X_features = rbf_feature.fit_transform(X) >>> clf = SGDClassifier() # doctest: +NORMALIZE_WHITESPACE >>> clf.fit(X_features, y) - SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1, - eta0=0.0, fit_intercept=True, l1_ratio=0.15, - learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1, - penalty='l2', power_t=0.5, random_state=None, shuffle=True, - verbose=0, warm_start=False) + SGDClassifier() >>> clf.score(X_features, y) 1.0 diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst index b3e82b56a48a2..7b77ef5a7a24e 100644 --- a/doc/modules/linear_model.rst +++ b/doc/modules/linear_model.rst @@ -45,7 +45,7 @@ and will store the coefficients :math:`w` of the linear model in its >>> from sklearn import linear_model >>> reg = linear_model.LinearRegression() >>> reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2]) - LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) + LinearRegression() >>> reg.coef_ array([ 0.5, 0.5]) @@ -101,10 +101,9 @@ arrays X, y and will store the coefficients :math:`w` of the linear model in its ``coef_`` member:: >>> from sklearn import linear_model - >>> reg = linear_model.Ridge (alpha = .5) + >>> reg = linear_model.Ridge(alpha=.5) >>> reg.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) # doctest: +NORMALIZE_WHITESPACE - Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None, - normalize=False, random_state=None, solver='auto', tol=0.001) + Ridge(alpha=0.5) >>> reg.coef_ array([ 0.34545455, 0.34545455]) >>> reg.intercept_ #doctest: +ELLIPSIS @@ -140,8 +139,7 @@ as GridSearchCV except that it defaults to Generalized Cross-Validation >>> from sklearn import linear_model >>> reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0]) >>> reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) # doctest: +SKIP - RidgeCV(alphas=[0.1, 1.0, 10.0], cv=None, fit_intercept=True, scoring=None, - normalize=False) + RidgeCV(alphas=[0.1, 1.0, 10.0]) >>> reg.alpha_ # doctest: +SKIP 0.1 @@ -182,11 +180,9 @@ the algorithm to fit the coefficients. See :ref:`least_angle_regression` for another implementation:: >>> from sklearn import linear_model - >>> reg = linear_model.Lasso(alpha = 0.1) + >>> reg = linear_model.Lasso(alpha=0.1) >>> reg.fit([[0, 0], [1, 1]], [0, 1]) - Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000, - normalize=False, positive=False, precompute=False, random_state=None, - selection='cyclic', tol=0.0001, warm_start=False) + Lasso(alpha=0.1) >>> reg.predict([[1, 1]]) array([ 0.8]) @@ -454,9 +450,7 @@ function of the norm of its coefficients. >>> from sklearn import linear_model >>> reg = linear_model.LassoLars(alpha=.1) >>> reg.fit([[0, 0], [1, 1]], [0, 1]) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE - LassoLars(alpha=0.1, copy_X=True, eps=..., fit_intercept=True, - fit_path=True, max_iter=500, normalize=True, positive=False, - precompute='auto', verbose=False) + LassoLars(alpha=0.1) >>> reg.coef_ # doctest: +ELLIPSIS array([ 0.717157..., 0. ]) @@ -617,9 +611,7 @@ Bayesian Ridge Regression is used for regression:: >>> Y = [0., 1., 2., 3.] >>> reg = linear_model.BayesianRidge() >>> reg.fit(X, Y) - BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True, - fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300, - normalize=False, tol=0.001, verbose=False) + BayesianRidge() After being fitted, the model can then be used to predict new values:: diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst index 7d65806acb807..64664f99c084b 100644 --- a/doc/modules/model_evaluation.rst +++ b/doc/modules/model_evaluation.rst @@ -802,10 +802,7 @@ with a svm classifier in a binary class problem:: >>> y = [-1, 1] >>> est = svm.LinearSVC(random_state=0) >>> est.fit(X, y) - LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, - intercept_scaling=1, loss='squared_hinge', max_iter=1000, - multi_class='ovr', penalty='l2', random_state=0, tol=0.0001, - verbose=0) + LinearSVC(random_state=0) >>> pred_decision = est.decision_function([[-2], [3], [0.5]]) >>> pred_decision # doctest: +ELLIPSIS array([-2.18..., 2.36..., 0.09...]) @@ -820,10 +817,7 @@ with a svm classifier in a multiclass problem:: >>> labels = np.array([0, 1, 2, 3]) >>> est = svm.LinearSVC() >>> est.fit(X, Y) - LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, - intercept_scaling=1, loss='squared_hinge', max_iter=1000, - multi_class='ovr', penalty='l2', random_state=None, tol=0.0001, - verbose=0) + LinearSVC() >>> pred_decision = est.decision_function([[-1], [2], [3]]) >>> y_true = [0, 2, 3] >>> hinge_loss(y_true, pred_decision, labels) #doctest: +ELLIPSIS @@ -1567,7 +1561,7 @@ Next, let's compare the accuracy of ``SVC`` and ``most_frequent``:: 0.63... >>> clf = DummyClassifier(strategy='most_frequent',random_state=0) >>> clf.fit(X_train, y_train) - DummyClassifier(constant=None, random_state=0, strategy='most_frequent') + DummyClassifier(random_state=0, strategy='most_frequent') >>> clf.score(X_test, y_test) # doctest: +ELLIPSIS 0.57... diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst index 5b83bc28a7b1e..729086c9c9197 100644 --- a/doc/modules/model_persistence.rst +++ b/doc/modules/model_persistence.rst @@ -22,10 +22,7 @@ persistence model, namely `pickle >> iris = datasets.load_iris() >>> X, y = iris.data, iris.target >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC() >>> import pickle >>> s = pickle.dumps(clf) diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst index 1440c49403a5c..586e1314186db 100644 --- a/doc/modules/neighbors.rst +++ b/doc/modules/neighbors.rst @@ -478,7 +478,7 @@ for more complex methods that do not make this assumption. Usage of the default >>> y = np.array([1, 1, 1, 2, 2, 2]) >>> clf = NearestCentroid() >>> clf.fit(X, y) - NearestCentroid(metric='euclidean', shrink_threshold=None) + NearestCentroid() >>> print(clf.predict([[-0.8, -1]])) [1] diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst index 292ed903eeffc..e07895b03cb79 100644 --- a/doc/modules/neural_networks_supervised.rst +++ b/doc/modules/neural_networks_supervised.rst @@ -90,13 +90,8 @@ training samples:: ... hidden_layer_sizes=(5, 2), random_state=1) ... >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE - MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', - beta_1=0.9, beta_2=0.999, early_stopping=False, - epsilon=1e-08, hidden_layer_sizes=(5, 2), learning_rate='constant', - learning_rate_init=0.001, max_iter=200, momentum=0.9, - nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, - solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, - warm_start=False) + MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1, + solver='lbfgs') After fitting (training), the model can predict labels for new samples:: @@ -138,13 +133,8 @@ indices where the value is `1` represents the assigned classes of that sample:: ... hidden_layer_sizes=(15,), random_state=1) ... >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE - MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto', - beta_1=0.9, beta_2=0.999, early_stopping=False, - epsilon=1e-08, hidden_layer_sizes=(15,), learning_rate='constant', - learning_rate_init=0.001, max_iter=200, momentum=0.9, - nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True, - solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False, - warm_start=False) + MLPClassifier(alpha=1e-05, hidden_layer_sizes=(15,), random_state=1, + solver='lbfgs') >>> clf.predict([[1., 2.]]) array([[1, 1]]) >>> clf.predict([[0., 0.]]) diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst index c90f35753fb00..2cd0f02273ce8 100644 --- a/doc/modules/pipeline.rst +++ b/doc/modules/pipeline.rst @@ -40,9 +40,8 @@ is an estimator object:: >>> estimators = [('reduce_dim', PCA()), ('clf', SVC())] >>> pipe = Pipeline(estimators) >>> pipe # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - Pipeline(memory=None, - steps=[('reduce_dim', PCA(copy=True,...)), - ('clf', SVC(C=1.0,...))]) + Pipeline(steps=[('reduce_dim', PCA()), + ('clf', SVC())]) The utility function :func:`make_pipeline` is a shorthand for constructing pipelines; @@ -53,31 +52,24 @@ filling in the names automatically:: >>> from sklearn.naive_bayes import MultinomialNB >>> from sklearn.preprocessing import Binarizer >>> make_pipeline(Binarizer(), MultinomialNB()) # doctest: +NORMALIZE_WHITESPACE - Pipeline(memory=None, - steps=[('binarizer', Binarizer(copy=True, threshold=0.0)), - ('multinomialnb', MultinomialNB(alpha=1.0, - class_prior=None, - fit_prior=True))]) + Pipeline(steps=[('binarizer', Binarizer()), + ('multinomialnb', MultinomialNB())]) The estimators of a pipeline are stored as a list in the ``steps`` attribute:: >>> pipe.steps[0] - ('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None, - svd_solver='auto', tol=0.0, whiten=False)) + ('reduce_dim', PCA()) and as a ``dict`` in ``named_steps``:: >>> pipe.named_steps['reduce_dim'] - PCA(copy=True, iterated_power='auto', n_components=None, random_state=None, - svd_solver='auto', tol=0.0, whiten=False) + PCA() Parameters of the estimators in the pipeline can be accessed using the ``__`` syntax:: >>> pipe.set_params(clf__C=10) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - Pipeline(memory=None, - steps=[('reduce_dim', PCA(copy=True, iterated_power='auto',...)), - ('clf', SVC(C=10, cache_size=200, class_weight=None,...))]) + Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC(C=10))]) Attributes of named_steps map to keys, enabling tab completion in interactive environments:: @@ -152,8 +144,8 @@ object:: >>> pipe = Pipeline(estimators, memory=cachedir) >>> pipe # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS Pipeline(..., - steps=[('reduce_dim', PCA(copy=True,...)), - ('clf', SVC(C=1.0,...))]) + steps=[('reduce_dim', PCA()), + ('clf', SVC())]) >>> # Clear the cache directory when you don't need it anymore >>> rmtree(cachedir) @@ -169,8 +161,7 @@ object:: >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)]) >>> pipe.fit(digits.data, digits.target) ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - Pipeline(memory=None, - steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))]) + Pipeline(steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))]) >>> # The pca instance can be inspected directly >>> print(pca1.components_) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS [[ -1.77484909e-19 ... 4.07058917e-18]] @@ -243,10 +234,8 @@ and ``value`` is an estimator object:: >>> estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())] >>> combined = FeatureUnion(estimators) >>> combined # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - FeatureUnion(n_jobs=1, - transformer_list=[('linear_pca', PCA(copy=True,...)), - ('kernel_pca', KernelPCA(alpha=1.0,...))], - transformer_weights=None) + FeatureUnion(transformer_list=[('linear_pca', PCA()), + ('kernel_pca', KernelPCA())]) Like pipelines, feature unions have a shorthand constructor called @@ -258,10 +247,8 @@ and ignored by setting to ``None``:: >>> combined.set_params(kernel_pca=None) ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS - FeatureUnion(n_jobs=1, - transformer_list=[('linear_pca', PCA(copy=True,...)), - ('kernel_pca', None)], - transformer_weights=None) + FeatureUnion(transformer_list=[('linear_pca', PCA()), + ('kernel_pca', None)]) .. topic:: Examples: diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst index 709239687158e..b12c7d34ea425 100644 --- a/doc/modules/preprocessing.rst +++ b/doc/modules/preprocessing.rst @@ -73,7 +73,7 @@ This class is hence suitable for use in the early steps of a >>> scaler = preprocessing.StandardScaler().fit(X) >>> scaler - StandardScaler(copy=True, with_mean=True, with_std=True) + StandardScaler() >>> scaler.mean_ # doctest: +ELLIPSIS array([ 1. ..., 0. ..., 0.33...]) @@ -286,7 +286,7 @@ This class is hence suitable for use in the early steps of a >>> normalizer = preprocessing.Normalizer().fit(X) # fit does nothing >>> normalizer - Normalizer(copy=True, norm='l2') + Normalizer() The normalizer instance can then be used on sample vectors as any transformer:: @@ -341,7 +341,7 @@ as each sample is treated independently of others:: >>> binarizer = preprocessing.Binarizer().fit(X) # fit does nothing >>> binarizer - Binarizer(copy=True, threshold=0.0) + Binarizer() >>> binarizer.transform(X) array([[ 1., 0., 1.], @@ -398,8 +398,7 @@ Continuing the example above:: >>> enc = preprocessing.OneHotEncoder() >>> enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]]) # doctest: +ELLIPSIS - OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>, - handle_unknown='error', n_values='auto', sparse=True) + OneHotEncoder() >>> enc.transform([[0, 1, 3]]).toarray() array([[ 1., 0., 0., 1., 0., 0., 0., 0., 1.]]) @@ -418,8 +417,7 @@ features, one has to explicitly set ``n_values``. For example, >>> # Note that there are missing categorical values for the 2nd and 3rd >>> # features >>> enc.fit([[1, 2, 3], [0, 2, 0]]) # doctest: +ELLIPSIS - OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>, - handle_unknown='error', n_values=[2, 3, 4], sparse=True) + OneHotEncoder(n_values=[2, 3, 4]) >>> enc.transform([[1, 0, 0]]).toarray() array([[ 0., 1., 1., 0., 0., 1., 0., 0., 0.]]) @@ -453,7 +451,7 @@ that contain the missing values:: >>> from sklearn.preprocessing import Imputer >>> imp = Imputer(missing_values='NaN', strategy='mean', axis=0) >>> imp.fit([[1, 2], [np.nan, 3], [7, 6]]) - Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0) + Imputer() >>> X = [[np.nan, 2], [6, np.nan], [7, 6]] >>> print(imp.transform(X)) # doctest: +ELLIPSIS [[ 4. 2. ] @@ -466,7 +464,7 @@ The :class:`Imputer` class also supports sparse matrices:: >>> X = sp.csc_matrix([[1, 2], [0, 3], [7, 6]]) >>> imp = Imputer(missing_values=0, strategy='mean', axis=0) >>> imp.fit(X) - Imputer(axis=0, copy=True, missing_values=0, strategy='mean', verbose=0) + Imputer(missing_values=0) >>> X_test = sp.csc_matrix([[0, 2], [6, 0], [7, 6]]) >>> print(imp.transform(X_test)) # doctest: +ELLIPSIS [[ 4. 2. ] diff --git a/doc/modules/preprocessing_targets.rst b/doc/modules/preprocessing_targets.rst index 88663a55fa0d4..5b8ccb192f04b 100644 --- a/doc/modules/preprocessing_targets.rst +++ b/doc/modules/preprocessing_targets.rst @@ -16,7 +16,7 @@ matrix from a list of multi-class labels:: >>> from sklearn import preprocessing >>> lb = preprocessing.LabelBinarizer() >>> lb.fit([1, 2, 6, 4, 2]) - LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False) + LabelBinarizer() >>> lb.classes_ array([1, 2, 4, 6]) >>> lb.transform([1, 6]) diff --git a/doc/modules/sgd.rst b/doc/modules/sgd.rst index e8febda201bf7..f23bf4fbdcecc 100644 --- a/doc/modules/sgd.rst +++ b/doc/modules/sgd.rst @@ -59,13 +59,9 @@ for the training samples:: >>> from sklearn.linear_model import SGDClassifier >>> X = [[0., 0.], [1., 1.]] >>> y = [0, 1] - >>> clf = SGDClassifier(loss="hinge", penalty="l2") + >>> clf = SGDClassifier() >>> clf.fit(X, y) - SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1, - eta0=0.0, fit_intercept=True, l1_ratio=0.15, - learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1, - penalty='l2', power_t=0.5, random_state=None, shuffle=True, - verbose=0, warm_start=False) + SGDClassifier() After being fitted, the model can then be used to predict new values:: diff --git a/doc/modules/svm.rst b/doc/modules/svm.rst index 386865d3d0a8a..8f69f563a852c 100644 --- a/doc/modules/svm.rst +++ b/doc/modules/svm.rst @@ -77,10 +77,7 @@ n_features]`` holding the training samples, and an array y of class labels >>> y = [0, 1] >>> clf = svm.SVC() >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC() After being fitted, the model can then be used to predict new values:: @@ -121,10 +118,7 @@ n_classes)``:: >>> Y = [0, 1, 2, 3] >>> clf = svm.SVC(decision_function_shape='ovo') >>> clf.fit(X, Y) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovo', degree=3, gamma='auto', kernel='rbf', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC(decision_function_shape='ovo') >>> dec = clf.decision_function([[1]]) >>> dec.shape[1] # 4 classes: 4*3/2 = 6 6 @@ -139,10 +133,7 @@ two classes, only one model is trained:: >>> lin_clf = svm.LinearSVC() >>> lin_clf.fit(X, Y) # doctest: +NORMALIZE_WHITESPACE - LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True, - intercept_scaling=1, loss='squared_hinge', max_iter=1000, - multi_class='ovr', penalty='l2', random_state=None, tol=0.0001, - verbose=0) + LinearSVC() >>> dec = lin_clf.decision_function([[1]]) >>> dec.shape[1] 4 @@ -319,8 +310,7 @@ floating point values instead of integer values:: >>> y = [0.5, 2.5] >>> clf = svm.SVR() >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE - SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1, gamma='auto', - kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False) + SVR() >>> clf.predict([[1, 1]]) array([ 1.5]) @@ -520,10 +510,7 @@ test vectors must be provided. >>> # linear kernel computation >>> gram = np.dot(X, X.T) >>> clf.fit(gram, y) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', - kernel='precomputed', max_iter=-1, probability=False, - random_state=None, shrinking=True, tol=0.001, verbose=False) + SVC(kernel='precomputed') >>> # predict on training examples >>> clf.predict(gram) array([0, 1]) diff --git a/doc/tutorial/basic/tutorial.rst b/doc/tutorial/basic/tutorial.rst index 89600953a870f..d090671b52f33 100644 --- a/doc/tutorial/basic/tutorial.rst +++ b/doc/tutorial/basic/tutorial.rst @@ -179,10 +179,7 @@ which produces a new array that contains all but the last entry of ``digits.data``:: >>> clf.fit(digits.data[:-1], digits.target[:-1]) # doctest: +NORMALIZE_WHITESPACE - SVC(C=100.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC(C=100.0, gamma=0.001) Now you can predict new values, in particular, we can ask to the classifier what is the digit of our last image in the ``digits`` dataset, @@ -218,10 +215,7 @@ persistence model, namely `pickle >> iris = datasets.load_iris() >>> X, y = iris.data, iris.target >>> clf.fit(X, y) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC() >>> import pickle >>> s = pickle.dumps(clf) @@ -292,19 +286,13 @@ maintained:: >>> iris = datasets.load_iris() >>> clf = SVC() >>> clf.fit(iris.data, iris.target) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC() >>> list(clf.predict(iris.data[:3])) [0, 0, 0] >>> clf.fit(iris.data, iris.target_names[iris.target]) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC() >>> list(clf.predict(iris.data[:3])) # doctest: +NORMALIZE_WHITESPACE ['setosa', 'setosa', 'setosa'] @@ -330,18 +318,12 @@ more than once will overwrite what was learned by any previous ``fit()``:: >>> clf = SVC() >>> clf.set_params(kernel='linear').fit(X, y) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC(kernel='linear') >>> clf.predict(X_test) array([1, 0, 1, 1, 0]) >>> clf.set_params(kernel='rbf').fit(X, y) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC() >>> clf.predict(X_test) array([0, 0, 0, 1, 0]) @@ -401,4 +383,4 @@ is similarly possible for an instance to be assigned multiple labels:: In this case, the classifier is fit upon instances each assigned multiple labels. The :class:`MultiLabelBinarizer ` is used to binarize the 2d array of multilabels to ``fit`` upon. As a result, -``predict()`` returns a 2d array with multiple predicted labels for each instance. \ No newline at end of file +``predict()`` returns a 2d array with multiple predicted labels for each instance. diff --git a/doc/tutorial/statistical_inference/model_selection.rst b/doc/tutorial/statistical_inference/model_selection.rst index 315ca420e4d19..98e10e14bd1bc 100644 --- a/doc/tutorial/statistical_inference/model_selection.rst +++ b/doc/tutorial/statistical_inference/model_selection.rst @@ -216,7 +216,7 @@ estimator during the construction and exposes an estimator API:: >>> clf = GridSearchCV(estimator=svc, param_grid=dict(C=Cs), ... n_jobs=-1) >>> clf.fit(X_digits[:1000], y_digits[:1000]) # doctest: +ELLIPSIS - GridSearchCV(cv=None,... + GridSearchCV(... >>> clf.best_score_ # doctest: +ELLIPSIS 0.925... >>> clf.best_estimator_.C # doctest: +ELLIPSIS @@ -266,10 +266,7 @@ parameter automatically by cross-validation:: >>> X_diabetes = diabetes.data >>> y_diabetes = diabetes.target >>> lasso.fit(X_diabetes, y_diabetes) - LassoCV(alphas=None, copy_X=True, cv=None, eps=0.001, fit_intercept=True, - max_iter=1000, n_alphas=100, n_jobs=1, normalize=False, positive=False, - precompute='auto', random_state=None, selection='cyclic', tol=0.0001, - verbose=False) + LassoCV() >>> # The estimator chose automatically its lambda: >>> lasso.alpha_ # doctest: +ELLIPSIS 0.01229... diff --git a/doc/tutorial/statistical_inference/supervised_learning.rst b/doc/tutorial/statistical_inference/supervised_learning.rst index e5342c5cad64a..3e44e7197352b 100644 --- a/doc/tutorial/statistical_inference/supervised_learning.rst +++ b/doc/tutorial/statistical_inference/supervised_learning.rst @@ -94,9 +94,7 @@ Scikit-learn documentation for more information about this type of classifier.) >>> from sklearn.neighbors import KNeighborsClassifier >>> knn = KNeighborsClassifier() >>> knn.fit(iris_X_train, iris_y_train) # doctest: +NORMALIZE_WHITESPACE - KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', - metric_params=None, n_jobs=1, n_neighbors=5, p=2, - weights='uniform') + KNeighborsClassifier() >>> knn.predict(iris_X_test) array([1, 2, 1, 0, 0, 0, 2, 1, 2, 0]) >>> iris_y_test @@ -176,7 +174,7 @@ Linear models: :math:`y = X\beta + \epsilon` >>> from sklearn import linear_model >>> regr = linear_model.LinearRegression() >>> regr.fit(diabetes_X_train, diabetes_y_train) - LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False) + LinearRegression() >>> print(regr.coef_) [ 0.30349955 -237.63931533 510.53060544 327.73698041 -814.13170937 492.81458798 102.84845219 184.60648906 743.51961675 76.09517222] @@ -327,9 +325,7 @@ application of Occam's razor: *prefer simpler models*. >>> best_alpha = alphas[scores.index(max(scores))] >>> regr.alpha = best_alpha >>> regr.fit(diabetes_X_train, diabetes_y_train) - Lasso(alpha=0.025118864315095794, copy_X=True, fit_intercept=True, - max_iter=1000, normalize=False, positive=False, precompute=False, - random_state=None, selection='cyclic', tol=0.0001, warm_start=False) + Lasso(alpha=0.025118864315095794) >>> print(regr.coef_) [ 0. -212.43764548 517.19478111 313.77959962 -160.8303982 -0. -187.19554705 69.38229038 508.66011217 71.84239008] @@ -370,10 +366,7 @@ function or **logistic** function: >>> logistic = linear_model.LogisticRegression(C=1e5) >>> logistic.fit(iris_X_train, iris_y_train) - LogisticRegression(C=100000.0, class_weight=None, dual=False, - fit_intercept=True, intercept_scaling=1, max_iter=100, - multi_class='ovr', n_jobs=1, penalty='l2', random_state=None, - solver='liblinear', tol=0.0001, verbose=0, warm_start=False) + LogisticRegression(C=100000.0) This is known as :class:`LogisticRegression`. @@ -454,10 +447,7 @@ classification --:class:`SVC` (Support Vector Classification). >>> from sklearn import svm >>> svc = svm.SVC(kernel='linear') >>> svc.fit(iris_X_train, iris_y_train) # doctest: +NORMALIZE_WHITESPACE - SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, - decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear', - max_iter=-1, probability=False, random_state=None, shrinking=True, - tol=0.001, verbose=False) + SVC(kernel='linear') .. warning:: **Normalizing data** diff --git a/doc/tutorial/statistical_inference/unsupervised_learning.rst b/doc/tutorial/statistical_inference/unsupervised_learning.rst index be32fabd96cb8..b311006260880 100644 --- a/doc/tutorial/statistical_inference/unsupervised_learning.rst +++ b/doc/tutorial/statistical_inference/unsupervised_learning.rst @@ -38,8 +38,8 @@ algorithms. The simplest clustering algorithm is >>> y_iris = iris.target >>> k_means = cluster.KMeans(n_clusters=3) - >>> k_means.fit(X_iris) # doctest: +ELLIPSIS - KMeans(algorithm='auto', copy_x=True, init='k-means++', ... + >>> k_means.fit(X_iris) + KMeans(n_clusters=3) >>> print(k_means.labels_[::10]) [1 1 1 1 1 0 0 0 0 0 2 2 2 2 2] >>> print(y_iris[::10]) @@ -117,8 +117,8 @@ algorithms. The simplest clustering algorithm is ... face = misc.face(gray=True) >>> X = face.reshape((-1, 1)) # We need an (n_sample, n_feature) array >>> k_means = cluster.KMeans(n_clusters=5, n_init=1) - >>> k_means.fit(X) # doctest: +ELLIPSIS - KMeans(algorithm='auto', copy_x=True, init='k-means++', ... + >>> k_means.fit(X) + KMeans(n_clusters=5, n_init=1) >>> values = k_means.cluster_centers_.squeeze() >>> labels = k_means.labels_ >>> face_compressed = np.choose(labels, values) @@ -215,7 +215,7 @@ transposed data. >>> agglo = cluster.FeatureAgglomeration(connectivity=connectivity, ... n_clusters=32) >>> agglo.fit(X) # doctest: +ELLIPSIS - FeatureAgglomeration(affinity='euclidean', compute_full_tree='auto',... + FeatureAgglomeration(connectivity=... >>> X_reduced = agglo.transform(X) >>> X_approx = agglo.inverse_transform(X_reduced) @@ -275,8 +275,7 @@ data by projecting on a principal subspace. >>> from sklearn import decomposition >>> pca = decomposition.PCA() >>> pca.fit(X) - PCA(copy=True, iterated_power='auto', n_components=None, random_state=None, - svd_solver='auto', tol=0.0, whiten=False) + PCA() >>> print(pca.explained_variance_) # doctest: +SKIP [ 2.18565811e+00 1.19346747e+00 8.43026679e-32] From 42eddd22f1e73bbe85ad11e56b0b059f86c338b2 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 17:53:56 +0200 Subject: [PATCH 09/12] fix repr for deprecated classes --- sklearn/base.py | 3 ++- sklearn/tests/test_base.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/sklearn/base.py b/sklearn/base.py index 5e6482946b960..284d77b1d6be5 100644 --- a/sklearn/base.py +++ b/sklearn/base.py @@ -288,7 +288,8 @@ def _changed_params(self): params = self.get_params(deep=False) if not get_config()['show_default_parameters']: filtered_params = {} - init_params = signature(self.__init__).parameters + init = getattr(self.__init__, 'deprecated_original', self.__init__) + init_params = signature(init).parameters for k, v in params.items(): if v != init_params[k].default: filtered_params[k] = v diff --git a/sklearn/tests/test_base.py b/sklearn/tests/test_base.py index bbb53fd7eb1b2..45100368f3f26 100644 --- a/sklearn/tests/test_base.py +++ b/sklearn/tests/test_base.py @@ -61,6 +61,11 @@ def __init__(self, a=np.array([0])): self.a = a.copy() +@deprecated("This estimator is deprecated") +class DeprecatedEstimator(T): + pass + + class DeprecatedAttributeEstimator(BaseEstimator): def __init__(self, a=None, b=None): self.a = a @@ -205,6 +210,16 @@ def test_repr(): assert_equal(repr(test), "T(a=K(), b=K())") +@ignore_warnings(category=DeprecationWarning) +def test_short_repr_deprecated(): + with sklearn.config_context(show_default_parameters=False): + est = DeprecatedEstimator() + assert_equal(repr(est), "DeprecatedEstimator()") + + est = DeprecatedEstimator(a='c') + assert_equal(repr(est), "DeprecatedEstimator(a='c')") + + def test_str(): # Smoke test the str of the base estimator my_estimator = MyEstimator() From 484190d04d0bcbb5f8641f0fca55bb6be7b8fd84 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 17:57:54 +0200 Subject: [PATCH 10/12] minor fix for deprecated DP repr --- doc/modules/gaussian_process.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst index 1d072f5c074e3..0588864b6d1c3 100644 --- a/doc/modules/gaussian_process.rst +++ b/doc/modules/gaussian_process.rst @@ -643,7 +643,7 @@ parameters or alternatively it uses the given parameters. >>> x = np.atleast_2d(np.linspace(0, 10, 1000)).T >>> gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1) >>> gp.fit(X, y) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE - GaussianProcess(theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]), + GaussianProcess(...theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]), thetaU=array([[ 0.1]])) >>> y_pred, sigma2_pred = gp.predict(x, eval_MSE=True) From 4f2059a95b9c7a17255761925fa9f72f93e63504 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 18:02:06 +0200 Subject: [PATCH 11/12] added whatsnew entry --- doc/whats_new.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/doc/whats_new.rst b/doc/whats_new.rst index e7bb058b3c69b..13a033d9ac7fc 100644 --- a/doc/whats_new.rst +++ b/doc/whats_new.rst @@ -65,6 +65,11 @@ New features Enhancements ............ + - Simplified string representations (``repr``) of all estimators which + can be enabled via :func:`sklearn.set_config`. The simplified + representation only shows parameters with settings that differ + from the default parameter settings. :issue:`9039` by `Andreas Müller`_. + - Update Sphinx-Gallery from 0.1.4 to 0.1.7 for resolving links in documentation build with Sphinx>1.5 :issue:`8010`, :issue:`7986` :user:`Oscar Najera ` From 5bbe0ed499eb626ebb98bdcb0dcf75173b905940 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Jun 2017 20:52:49 +0200 Subject: [PATCH 12/12] add simple repr to common tests --- sklearn/utils/estimator_checks.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 5c8c0e90c94c0..88eb4e685fe7c 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -11,6 +11,7 @@ from scipy.stats import rankdata import struct +import sklearn from sklearn.externals.six.moves import zip from sklearn.externals.joblib import hash, Memory from sklearn.utils.testing import assert_raises @@ -1553,6 +1554,8 @@ def check_parameters_default_constructible(name, Estimator): clone(estimator) # test __repr__ repr(estimator) + with sklearn.config_context(show_default_parameters=False): + repr(estimator) # test that set_params returns self assert_true(estimator.set_params() is estimator)