scikit-learn · amueller · Jun 7, 2017 · Jun 7, 2017 · Jun 8, 2017 · Jun 8, 2017
diff --git a/Makefile b/Makefile
@@ -34,7 +34,7 @@ test-sphinxext:
 	$(NOSETESTS) -s -v doc/sphinxext/
 test-doc:
 ifeq ($(BITS),64)
-	$(NOSETESTS) -s -v doc/*.rst doc/modules/ doc/datasets/ \
+	SKLEARN_SHOW_DEFAULT_PARAMETERS=False $(NOSETESTS) -s -v doc/*.rst doc/modules/ doc/datasets/ \
 	doc/developers doc/tutorial/basic doc/tutorial/statistical_inference \
 	doc/tutorial/text_analytics
 endif

diff --git a/doc/modules/feature_extraction.rst b/doc/modules/feature_extraction.rst
@@ -289,14 +289,9 @@ This model has many parameters, however the default values are quite
 reasonable (please see  the :ref:`reference documentation
 <text_feature_extraction_ref>` for the details)::
 
-  >>> vectorizer = CountVectorizer(min_df=1)
+  >>> vectorizer = CountVectorizer()
   >>> vectorizer                     # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-  CountVectorizer(analyzer=...'word', binary=False, decode_error=...'strict',
-          dtype=<... 'numpy.int64'>, encoding=...'utf-8', input=...'content',
-          lowercase=True, max_df=1.0, max_features=None, min_df=1,
-          ngram_range=(1, 1), preprocessor=None, stop_words=None,
-          strip_accents=None, token_pattern=...'(?u)\\b\\w\\w+\\b',
-          tokenizer=None, vocabulary=None)
+  CountVectorizer()
 
 Let's use it to tokenize and count the word occurrences of a minimalistic
 corpus of text documents::
@@ -440,8 +435,7 @@ class::
   >>> from sklearn.feature_extraction.text import TfidfTransformer
   >>> transformer = TfidfTransformer(smooth_idf=False)
   >>> transformer   # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS
-  TfidfTransformer(norm=...'l2', smooth_idf=False, sublinear_tf=False,
-                   use_idf=True)
+  TfidfTransformer(smooth_idf=False)
 
 Again please see the :ref:`reference documentation
 <text_feature_extraction_ref>` for the details on all the parameters.

diff --git a/doc/modules/gaussian_process.rst b/doc/modules/gaussian_process.rst
@@ -643,12 +643,8 @@ parameters or alternatively it uses the given parameters.
     >>> x = np.atleast_2d(np.linspace(0, 10, 1000)).T
     >>> gp = gaussian_process.GaussianProcess(theta0=1e-2, thetaL=1e-4, thetaU=1e-1)
     >>> gp.fit(X, y)  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
-    GaussianProcess(beta0=None, corr=<function squared_exponential at 0x...>,
-            normalize=True, nugget=array(2.22...-15),
-            optimizer='fmin_cobyla', random_start=1, random_state=...
-            regr=<function constant at 0x...>, storage_mode='full',
-            theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]),
-            thetaU=array([[ 0.1]]), verbose=False)
+    GaussianProcess(...theta0=array([[ 0.01]]), thetaL=array([[ 0.0001]]),
+            thetaU=array([[ 0.1]]))
     >>> y_pred, sigma2_pred = gp.predict(x, eval_MSE=True)
 
 

diff --git a/doc/modules/kernel_approximation.rst b/doc/modules/kernel_approximation.rst
@@ -61,11 +61,7 @@ a linear algorithm, for example a linear SVM::
     >>> X_features = rbf_feature.fit_transform(X)
     >>> clf = SGDClassifier()   # doctest: +NORMALIZE_WHITESPACE
     >>> clf.fit(X_features, y)
-    SGDClassifier(alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
-           eta0=0.0, fit_intercept=True, l1_ratio=0.15,
-           learning_rate='optimal', loss='hinge', n_iter=5, n_jobs=1,
-           penalty='l2', power_t=0.5, random_state=None, shuffle=True,
-           verbose=0, warm_start=False)
+    SGDClassifier()
     >>> clf.score(X_features, y)
     1.0
 

diff --git a/doc/modules/linear_model.rst b/doc/modules/linear_model.rst
@@ -45,7 +45,7 @@ and will store the coefficients :math:`w` of the linear model in its
     >>> from sklearn import linear_model
     >>> reg = linear_model.LinearRegression()
     >>> reg.fit ([[0, 0], [1, 1], [2, 2]], [0, 1, 2])
-    LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)
+    LinearRegression()
     >>> reg.coef_
     array([ 0.5,  0.5])
 
@@ -101,10 +101,9 @@ arrays X, y and will store the coefficients :math:`w` of the linear model 
1E0A
in
 its ``coef_`` member::
 
     >>> from sklearn import linear_model
-    >>> reg = linear_model.Ridge (alpha = .5)
+    >>> reg = linear_model.Ridge(alpha=.5)
     >>> reg.fit ([[0, 0], [0, 0], [1, 1]], [0, .1, 1]) # doctest: +NORMALIZE_WHITESPACE
-    Ridge(alpha=0.5, copy_X=True, fit_intercept=True, max_iter=None,
-          normalize=False, random_state=None, solver='auto', tol=0.001)
+    Ridge(alpha=0.5)
     >>> reg.coef_
     array([ 0.34545455,  0.34545455])
     >>> reg.intercept_ #doctest: +ELLIPSIS
@@ -140,8 +139,7 @@ as GridSearchCV except that it defaults to Generalized Cross-Validation
     >>> from sklearn import linear_model
     >>> reg = linear_model.RidgeCV(alphas=[0.1, 1.0, 10.0])
     >>> reg.fit([[0, 0], [0, 0], [1, 1]], [0, .1, 1])       # doctest: +SKIP
-    RidgeCV(alphas=[0.1, 1.0, 10.0], cv=None, fit_intercept=True, scoring=None,
-        normalize=False)
+    RidgeCV(alphas=[0.1, 1.0, 10.0])
     >>> reg.alpha_                                      # doctest: +SKIP
     0.1
 
@@ -182,11 +180,9 @@ the algorithm to fit the coefficients. See :ref:`least_angle_regression`
 for another implementation::
 
     >>> from sklearn import linear_model
-    >>> reg = linear_model.Lasso(alpha = 0.1)
+    >>> reg = linear_model.Lasso(alpha=0.1)
     >>> reg.fit([[0, 0], [1, 1]], [0, 1])
-    Lasso(alpha=0.1, copy_X=True, fit_intercept=True, max_iter=1000,
-       normalize=False, positive=False, precompute=False, random_state=None,
-       selection='cyclic', tol=0.0001, warm_start=False)
+    Lasso(alpha=0.1)
     >>> reg.predict([[1, 1]])
     array([ 0.8])
 
@@ -454,9 +450,7 @@ function of the norm of its coefficients.
    >>> from sklearn import linear_model
    >>> reg = linear_model.LassoLars(alpha=.1)
    >>> reg.fit([[0, 0], [1, 1]], [0, 1])  # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
-   LassoLars(alpha=0.1, copy_X=True, eps=..., fit_intercept=True,
-        fit_path=True, max_iter=500, normalize=True, positive=False,
-        precompute='auto', verbose=False)
+   LassoLars(alpha=0.1)
    >>> reg.coef_    # doctest: +ELLIPSIS
    array([ 0.717157...,  0.        ])
 
@@ -617,9 +611,7 @@ Bayesian Ridge Regression is used for regression::
     >>> Y = [0., 1., 2., 3.]
     >>> reg = linear_model.BayesianRidge()
     >>> reg.fit(X, Y)
-    BayesianRidge(alpha_1=1e-06, alpha_2=1e-06, compute_score=False, copy_X=True,
-           fit_intercept=True, lambda_1=1e-06, lambda_2=1e-06, n_iter=300,
-           normalize=False, tol=0.001, verbose=False)
+    BayesianRidge()
 
 After being fitted, the model can then be used to predict new values::
 

diff --git a/doc/modules/model_evaluation.rst b/doc/modules/model_evaluation.rst
@@ -802,10 +802,7 @@ with a svm classifier in a binary class problem::
   >>> y = [-1, 1]
   >>> est = svm.LinearSVC(random_state=0)
   >>> est.fit(X, y)
-  LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-       intercept_scaling=1, loss='squared_hinge', max_iter=1000,
-       multi_class='ovr', penalty='l2', random_state=0, tol=0.0001,
-       verbose=0)
+  LinearSVC(random_state=0)
   >>> pred_decision = est.decision_function([[-2], [3], [0.5]])
   >>> pred_decision  # doctest: +ELLIPSIS
   array([-2.18...,  2.36...,  0.09...])
@@ -820,10 +817,7 @@ with a svm classifier in a multiclass problem::
   >>> labels = np.array([0, 1, 2, 3])
   >>> est = svm.LinearSVC()
   >>> est.fit(X, Y)
-  LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
-       intercept_scaling=1, loss='squared_hinge', max_iter=1000,
-       multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
-       verbose=0)
+  LinearSVC()
   >>> pred_decision = est.decision_function([[-1], [2], [3]])
   >>> y_true = [0, 2, 3]
   >>> hinge_loss(y_true, pred_decision, labels)  #doctest: +ELLIPSIS
@@ -1567,7 +1561,7 @@ Next, let's compare the accuracy of ``SVC`` and ``most_frequent``::
   0.63...
   >>> clf = DummyClassifier(strategy='most_frequent',random_state=0)
   >>> clf.fit(X_train, y_train)
-  DummyClassifier(constant=None, random_state=0, strategy='most_frequent')
+  DummyClassifier(random_state=0, strategy='most_frequent')
   >>> clf.score(X_test, y_test)  # doctest: +ELLIPSIS
   0.57...
 

diff --git a/doc/modules/model_persistence.rst b/doc/modules/model_persistence.rst
@@ -22,10 +22,7 @@ persistence model, namely `pickle <https://docs.python.org/2/library/pickle.html
   >>> iris = datasets.load_iris()
   >>> X, y = iris.data, iris.target
   >>> clf.fit(X, y)  # doctest: +NORMALIZE_WHITESPACE
-  SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
-      decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
-      max_iter=-1, probability=False, random_state=None, shrinking=True,
-      tol=0.001, verbose=False)
+  SVC()
 
   >>> import pickle
   >>> s = pickle.dumps(clf)

diff --git a/doc/modules/neighbors.rst b/doc/modules/neighbors.rst
@@ -478,7 +478,7 @@ for more complex methods that do not make this assumption. Usage of the default
     >>> y = np.array([1, 1, 1, 2, 2, 2])
     >>> clf = NearestCentroid()
     >>> clf.fit(X, y)
-    NearestCentroid(metric='euclidean', shrink_threshold=None)
+    NearestCentroid()
     >>> print(clf.predict([[-0.8, -1]]))
     [1]
 

diff --git a/doc/modules/neural_networks_supervised.rst b/doc/modules/neural_networks_supervised.rst
@@ -90,13 +90,8 @@ training samples::
     ...                     hidden_layer_sizes=(5, 2), random_state=1)
     ...
     >>> clf.fit(X, y)                         # doctest: +NORMALIZE_WHITESPACE
-    MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto',
-           beta_1=0.9, beta_2=0.999, early_stopping=False,
-           epsilon=1e-08, hidden_layer_sizes=(5, 2), learning_rate='constant',
-           learning_rate_init=0.001, max_iter=200, momentum=0.9,
-           nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
-           solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
-           warm_start=False)
+    MLPClassifier(alpha=1e-05, hidden_layer_sizes=(5, 2), random_state=1,
+           solver='lbfgs')
 
 After fitting (training), the model can predict labels for new samples::
 
@@ -138,13 +133,8 @@ indices where the value is `1` represents the assigned classes of that sample::
     ...                     hidden_layer_sizes=(15,), random_state=1)
     ...
     >>> clf.fit(X, y)                         # doctest: +NORMALIZE_WHITESPACE
-    MLPClassifier(activation='relu', alpha=1e-05, batch_size='auto',
-           beta_1=0.9, beta_2=0.999, early_stopping=False,
-           epsilon=1e-08, hidden_layer_sizes=(15,), learning_rate='constant',
-           learning_rate_init=0.001, max_iter=200, momentum=0.9,
-           nesterovs_momentum=True, power_t=0.5, random_state=1, shuffle=True,
-           solver='lbfgs', tol=0.0001, validation_fraction=0.1, verbose=False,
-           warm_start=False)
+    MLPClassifier(alpha=1e-05, hidden_layer_sizes=(15,), random_state=1,
+           solver='lbfgs')
     >>> clf.predict([[1., 2.]])
     array([[1, 1]])
     >>> clf.predict([[0., 0.]])

diff --git a/doc/modules/pipeline.rst b/doc/modules/pipeline.rst
@@ -40,9 +40,8 @@ is an estimator object::
     >>> estimators = [('reduce_dim', PCA()), ('clf', SVC())]
     >>> pipe = Pipeline(estimators)
     >>> pipe # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    Pipeline(memory=None,
-             steps=[('reduce_dim', PCA(copy=True,...)),
-                    ('clf', SVC(C=1.0,...))])
+    Pipeline(steps=[('reduce_dim', PCA()),
+                    ('clf', SVC())])
 
 The utility function :func:`make_pipeline` is a shorthand
 for constructing pipelines;
@@ -53,31 +52,24 @@ filling in the names automatically::
     >>> from sklearn.naive_bayes import MultinomialNB
     >>> from sklearn.preprocessing import Binarizer
     >>> make_pipeline(Binarizer(), MultinomialNB()) # doctest: +NORMALIZE_WHITESPACE
-    Pipeline(memory=None,
-             steps=[('binarizer', Binarizer(copy=True, threshold=0.0)),
-                    ('multinomialnb', MultinomialNB(alpha=1.0,
-                                                    class_prior=None,
-                                                    fit_prior=True))])
+    Pipeline(steps=[('binarizer', Binarizer()),
+                    ('multinomialnb', MultinomialNB())])
 
 The estimators of a pipeline are stored as a list in the ``steps`` attribute::
 
     >>> pipe.steps[0]
-    ('reduce_dim', PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
-      svd_solver='auto', tol=0.0, whiten=False))
+    ('reduce_dim', PCA())
 
 and as a ``dict`` in ``named_steps``::
 
     >>> pipe.named_steps['reduce_dim']
-    PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
-      svd_solver='auto', tol=0.0, whiten=False)
+    PCA()
 
 Parameters of the estimators in the pipeline can be accessed using the
 ``<estimator>__<parameter>`` syntax::
 
     >>> pipe.set_params(clf__C=10) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    Pipeline(memory=None,
-             steps=[('reduce_dim', PCA(copy=True, iterated_power='auto',...)),
-                    ('clf', SVC(C=10, cache_size=200, class_weight=None,...))])
+    Pipeline(steps=[('reduce_dim', PCA()), ('clf', SVC(C=10))])
 
 Attributes of named_steps map to keys, enabling tab completion in interactive environments::
 
@@ -152,8 +144,8 @@ object::
     >>> pipe = Pipeline(estimators, memory=cachedir)
     >>> pipe # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
     Pipeline(...,
-             steps=[('reduce_dim', PCA(copy=True,...)),
-                    ('clf', SVC(C=1.0,...))])
+             steps=[('reduce_dim', PCA()),
+                    ('clf', SVC())])
     >>> # Clear the cache directory when you don't need it anymore
     >>> rmtree(cachedir)
 
@@ -169,8 +161,7 @@ object::
      >>> pipe = Pipeline([('reduce_dim', pca1), ('clf', svm1)])
      >>> pipe.fit(digits.data, digits.target)
      ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-     Pipeline(memory=None,
-              steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))])
+     Pipeline(steps=[('reduce_dim', PCA(...)), ('clf', SVC(...))])
      >>> # The pca instance can be inspected directly
      >>> print(pca1.components_) # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
          [[ -1.77484909e-19  ... 4.07058917e-18]]
@@ -243,10 +234,8 @@ and ``value`` is an estimator object::
     >>> estimators = [('linear_pca', PCA()), ('kernel_pca', KernelPCA())]
     >>> combined = FeatureUnion(estimators)
     >>> combined # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    FeatureUnion(n_jobs=1,
-                 transformer_list=[('linear_pca', PCA(copy=True,...)),
-                                       ('kernel_pca', KernelPCA(alpha=1.0,...))],
-                 transformer_weights=None)
+    FeatureUnion(transformer_list=[('linear_pca', PCA()),
+                                   ('kernel_pca', KernelPCA())])
 
 
 Like pipelines, feature unions have a shorthand constructor called
@@ -258,10 +247,8 @@ and ignored by setting to ``None``::
 
     >>> combined.set_params(kernel_pca=None)
     ... # doctest: +NORMALIZE_WHITESPACE, +ELLIPSIS
-    FeatureUnion(n_jobs=1,
-                 transformer_list=[('linear_pca', PCA(copy=True,...)),
-                                   ('kernel_pca', None)],
-                 transformer_weights=None)
+    FeatureUnion(transformer_list=[('linear_pca', PCA()),
+                                   ('kernel_pca', None)])
 
 .. topic:: Examples:
 

diff --git a/doc/modules/preprocessing.rst b/doc/modules/preprocessing.rst
@@ -73,7 +73,7 @@ This class is hence suitable for use in the early steps of a
 
   >>> scaler = preprocessing.StandardScaler().fit(X)
   >>> scaler
-  StandardScaler(copy=True, with_mean=True, with_std=True)
+  StandardScaler()
 
   >>> scaler.mean_                                      # doctest: +ELLIPSIS
   array([ 1. ...,  0. ...,  0.33...])
@@ -286,7 +286,7 @@ This class is hence suitable for use in the early steps of a
 
   >>> normalizer = preprocessing.Normalizer().fit(X)  # fit does nothing
   >>> normalizer
-  Normalizer(copy=True, norm='l2')
+  Normalizer()
 
 
 The normalizer instance can then be used on sample vectors as any transformer::
@@ -341,7 +341,7 @@ as each sample is treated independently of others::
 
   >>> binarizer = preprocessing.Binarizer().fit(X)  # fit does nothing
   >>> binarizer
-  Binarizer(copy=True, threshold=0.0)
+  Binarizer()
 
   >>> binarizer.transform(X)
   array([[ 1.,  0.,  1.],
@@ -398,8 +398,7 @@ Continuing the example above::
 
   >>> enc = preprocessing.OneHotEncoder()
   >>> enc.fit([[0, 0, 3], [1, 1, 0], [0, 2, 1], [1, 0, 2]])  # doctest: +ELLIPSIS
-  OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>,
-         handle_unknown='error', n_values='auto', sparse=True)
+  OneHotEncoder()
   >>> enc.transform([[0, 1, 3]]).toarray()
   array([[ 1.,  0.,  0.,  1.,  0.,  0.,  0.,  0.,  1.]])
 
@@ -418,8 +417,7 @@ features, one has to explicitly set ``n_values``. For example,
     >>> # Note that there are missing categorical values for the 2nd and 3rd
     >>> # features
     >>> enc.fit([[1, 2, 3], [0, 2, 0]])  # doctest: +ELLIPSIS
-    OneHotEncoder(categorical_features='all', dtype=<... 'numpy.float64'>,
-           handle_unknown='error', n_values=[2, 3, 4], sparse=True)
+    OneHotEncoder(n_values=[2, 3, 4])
     >>> enc.transform([[1, 0, 0]]).toarray()
     array([[ 0.,  1.,  1.,  0.,  0.,  1.,  0.,  0.,  0.]])
 
@@ -453,7 +451,7 @@ that contain the missing values::
     >>> from sklearn.preprocessing import Imputer
     >>> imp = Imputer(missing_values='NaN', strategy='mean', axis=0)
     >>> imp.fit([[1, 2], [np.nan, 3], [7, 6]])
-    Imputer(axis=0, copy=True, missing_values='NaN', strategy='mean', verbose=0)
+    Imputer()
     >>> X = [[np.nan, 2], [6, np.nan], [7, 6]]
     >>> print(imp.transform(X))                           # doctest: +ELLIPSIS
     [[ 4.          2.        ]
@@ -466,7 +464,7 @@ The :class:`Imputer` class also supports sparse matrices::
     >>> X = sp.csc_matrix([[1, 2], [0, 3], [7, 6]])
     >>> imp = Imputer(missing_values=0, strategy='mean', axis=0)
     >>> imp.fit(X)
-    Imputer(axis=0, copy=True, missing_values=0, strategy='mean', verbose=0)
+    Imputer(missing_values=0)
     >>> X_test = sp.csc_matrix([[0, 2], [6, 0], [7, 6]])
     >>> print(imp.transform(X_test))                      # doctest: +ELLIPSIS
     [[ 4.          2.        ]

diff --git a/doc/modules/preprocessing_targets.rst b/doc/modules/preprocessing_targets.rst
@@ -16,7 +16,7 @@ matrix from a list of multi-class labels::
     >>> from sklearn import preprocessing
     >>> lb = preprocessing.LabelBinarizer()
     >>> lb.fit([1, 2, 6, 4, 2])
-    LabelBinarizer(neg_label=0, pos_label=1, sparse_output=False)
+    LabelBinarizer()
     >>> lb.classes_
     array([1, 2, 4, 6])
     >>> lb.transform([1, 6])