scikit-learn · adrinjalali · Jun 7, 2020 · Jun 6, 2020 · Jun 6, 2020 · Jun 6, 2020
diff --git a/sklearn/feature_selection/_base.py b/sklearn/feature_selection/_base.py
@@ -32,7 +32,7 @@ def get_support(self, indices=False):
 
         Parameters
         ----------
-        indices : boolean (default False)
+        indices : bool, default=False
             If True, the return value will be an array of integers, rather
             than a boolean mask.
 

diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
@@ -73,7 +73,7 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
         or a non-fitted estimator. The estimator must have either a
         ``feature_importances_`` or ``coef_`` attribute after fitting.
 
-    threshold : string, float, optional default None
+    threshold : string or float, default=None
         The threshold value to use for feature selection. Features whose
         importance is greater or equal are kept while the others are
         discarded. If "median" (resp. "mean"), then the ``threshold`` value is
@@ -83,26 +83,26 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
         or implicitly (e.g, Lasso), the threshold used is 1e-5.
         Otherwise, "mean" is used by default.
 
-    prefit : bool, default False
+    prefit : bool, default=False
         Whether a prefit model is expected to be passed into the constructor
         directly or not. If True, ``transform`` must be called directly
         and SelectFromModel cannot be used with ``cross_val_score``,
         ``GridSearchCV`` and similar utilities that clone the estimator.
         Otherwise train the model using ``fit`` and then ``transform`` to do
         feature selection.
 
-    norm_order : non-zero int, inf, -inf, default 1
+    norm_order : non-zero int, inf, -inf, default=1
         Order of the norm used to filter the vectors of coefficients below
         ``threshold`` in the case where the ``coef_`` attribute of the
         estimator is of dimension 2.
 
-    max_features : int or None, optional
+    max_features : int, default=None
         The maximum number of features to select.
         To only select based on ``max_features``, set ``threshold=-np.inf``.
 
         .. versionadded:: 0.20
 
-    importance_getter : str or callable, optional (default='auto')
+    importance_getter : str or callable, default='auto'
         If 'auto', uses the feature importance either through a ``coef_``
         attribute or ``feature_importances_`` attribute of estimator.
 
@@ -198,7 +198,7 @@ def fit(self, X, y=None, **fit_params):
         X : array-like of shape (n_samples, n_features)
             The training input samples.
 
-        y : array-like, shape (n_samples,)
+        y : array-like of shape (n_samples,), default=None
             The target values (integers that correspond to classes in
             classification, real numbers in regression).
 
@@ -242,7 +242,7 @@ def partial_fit(self, X, y=None, **fit_params):
         X : array-like of shape (n_samples, n_features)
             The training input samples.
 
-        y : array-like, shape (n_samples,)
+        y : array-like of shape (n_samples,), default=None
             The target values (integers that correspond to classes in
             classification, real numbers in regression).
 

diff --git a/sklearn/feature_selection/_mutual_info.py b/sklearn/feature_selection/_mutual_info.py
@@ -171,7 +171,7 @@ def _iterate_columns(X, columns=None):
     X : ndarray or csc_matrix, shape (n_samples, n_features)
         Matrix over which to iterate.
 
-    columns : iterable or None, default None
+    columns : iterable or None, default=None
         Indices of columns to iterate over. If None, iterate over all columns.
 
     Yields
@@ -202,29 +202,29 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False,
     X : array-like or sparse matrix, shape (n_samples, n_features)
         Feature matrix.
 
-    y : array-like, shape (n_samples,)
+    y : array-like of shape (n_samples,)
         Target vector.
 
-    discrete_features : {'auto', bool, array-like}, default 'auto'
+    discrete_features : {'auto', bool, array-like}, default='auto'
         If bool, then determines whether to consider all features discrete
         or continuous. If array, then it should be either a boolean mask
         with shape (n_features,) or array with indices of discrete features.
        If 'auto', it is assigned to False for dense `X` and to True for
         sparse `X`.
 
-    discrete_target : bool, default False
+    discrete_target : bool, default=False
         Whether to consider `y` as a discrete variable.
 
-    n_neighbors : int, default 3
+    n_neighbors : int, default=3
         Number of neighbors to use for MI estimation for continuous variables,
         see [1]_ and [2]_. Higher values reduce variance of the estimation, but
         could introduce a bias.
 
-    copy : bool, default True
+    copy : bool, default=True
         Whether to make a copy of the given data. If set to False, the initial
         data will be overwritten.
 
-    random_state : int, RandomState instance or None, optional, default None
+    random_state : int, RandomState instance or None, default=None
         Determines random number generation for adding small noise to
         continuous variables in order to remove repeated values.
         Pass an int for reproducible results across multiple function calls.
@@ -313,26 +313,26 @@ def mutual_info_regression(X, y, *, discrete_features='auto', n_neighbors=3,
     X : array-like or sparse matrix, shape (n_samples, n_features)
         Feature matrix.
 
-    y : array-like, shape (n_samples,)
+    y : array-like of shape (n_samples,)
         Target vector.
 
-    discrete_features : {'auto', bool, array-like}, default 'auto'
+    discrete_features : {'auto', bool, array-like}, default='auto'
         If bool, then determines whether to consider all features discrete
         or continuous. If array, then it should be either a boolean mask
         with shape (n_features,) or array with indices of discrete features.
         If 'auto', it is assigned to False for dense `X` and to True for
         sparse `X`.
 
-    n_neighbors : int, default 3
+    n_neighbors : int, default=3
         Number of neighbors to use for MI estimation for continuous variables,
         see [2]_ and [3]_. Higher values reduce variance of the estimation, but
         could introduce a bias.
 
-    copy : bool, defau
8000
lt True
+    copy : bool, default=True
         Whether to make a copy of the given data. If set to False, the initial
         data will be overwritten.
 
-    random_state : int, RandomState instance or None, optional, default None
+    random_state : int, RandomState instance or None, default=None
         Determines random number generation for adding small noise to
         continuous variables in order to remove repeated values.
         Pass an int for reproducible results across multiple function calls.
@@ -350,13 +350,15 @@ def mutual_info_regression(X, y, *, discrete_features='auto', n_neighbors=3,
        For example, pixel intensities of an image are discrete features
        (but hardly categorical) and you will get better results if mark them
        as such. Also note, that treating a continuous variable as discrete and
-       vice versa will usually give incorrect results, so be attentive about that.
+       vice versa will usually give incorrect results, so be attentive about
+       that.
     2. True mutual information can't be negative. If its estimate turns out
        to be negative, it is replaced by zero.
 
     References
     ----------
-    .. [1] `Mutual Information <https://en.wikipedia.org/wiki/Mutual_information>`_
+    .. [1] `Mutual Information
+           <https://en.wikipedia.org/wiki/Mutual_information>`_
            on Wikipedia.
     .. [2] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
            information". Phys. Rev. E 69, 2004.
@@ -391,26 +393,26 @@ def mutual_info_classif(X, y, *, discrete_features='auto', n_neighbors=3,
     X : array-like or sparse matrix, shape (n_samples, n_features)
         Feature matrix.
 
-    y : array-like, shape (n_samples,)
+    y : array-like of shape (n_samples,)
         Target vector.
 
-    discrete_features : {'auto', bool, array-like}, default 'auto'
+    discrete_features : {'auto', bool, array-like}, default='auto'
         If bool, then determines whether to consider all features discrete
         or continuous. If array, then it should be either a boolean mask
         with shape (n_features,) or array with indices of discrete features.
         If 'auto', it is assigned to False for dense `X` and to True for
         sparse `X`.
 
-    n_neighbors : int, default 3
+    n_neighbors : int, default=3
         Number of neighbors to use for MI estimation for continuous variables,
         see [2]_ and [3]_. Higher values reduce variance of the estimation, but
         could introduce a bias.
 
-    copy : bool, default True
+    copy : bool, default=True
         Whether to make a copy of the given data. If set to False, the initial
         data will be overwritten.
 
-    random_state : int, RandomState instance or None, optional, default None
+    random_state : int, RandomState instance or None, default=None
         Determines random number generation for adding small noise to
         continuous variables in order to remove repeated values.
         Pass an int for reproducible results across multiple function calls.
@@ -428,13 +430,15 @@ def mutual_info_classif(X, y, *, discrete_features='auto', n_neighbors=3,
        For example, pixel intensities of an image are discrete features
        (but hardly categorical) and you will get better results if mark them
        as such. Also note, that treating a continuous variable as discrete and
-       vice versa will usually give incorrect results, so be attentive about that.
+       vice versa will usually give incorrect results, so be attentive about
+       that.
     2. True mutual information can't be negative. If its estimate turns out
        to be negative, it is replaced by zero.
 
     References
     ----------
-    .. [1] `Mutual Information <https://en.wikipedia.org/wiki/Mutual_information>`_
+    .. [1] `Mutual Information
+           <https://en.wikipedia.org/wiki/Mutual_information>`_
            on Wikipedia.
     .. [2] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
            information". Phys. Rev. E 69, 2004.

diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
@@ -57,17 +57,17 @@ class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
         information about feature importance
         (e.g. `coef_`, `feature_importances_`).
 
-    n_features_to_select : int or None (default=None)
+    n_features_to_select : int or None, default=None
         The number of features to select. If `None`, half of the features
         are selected.
 
-    step : int or float, optional (default=1)
+    step : int or float, default=1
         If greater than or equal to 1, then ``step`` corresponds to the
         (integer) number of features to remove at each iteration.
         If within (0.0, 1.0), then ``step`` corresponds to the percentage
         (rounded down) of features to remove at each iteration.
 
-    verbose : int, (default=0)
+    verbose : int, default=0
         Controls verbosity of output.
 
     importance_getter : str or callable, default='auto'
@@ -368,23 +368,23 @@ class RFECV(RFE):
         information about feature importance either through a ``coef_``
         attribute or through a ``feature_importances_`` attribute.
 
-    step : int or float, optional (default=1)
+    step : int or float, default=1
         If greater than or equal to 1, then ``step`` corresponds to the
         (integer) number of features to remove at each iteration.
         If within (0.0, 1.0), then ``step`` corresponds to the percentage
         (rounded down) of features to remove at each iteration.
         Note that the last iteration may remove fewer than ``step`` features in
         order to reach ``min_features_to_select``.
 
-    min_features_to_select : int, (default=1)
+    min_features_to_select : int, default=1
         The minimum number of features to be selected. This number of features
         will always be scored, even if the difference between the original
         feature count and ``min_features_to_select`` isn't divisible by
         ``step``.
 
         .. versionadded:: 0.20
 
-    cv : int, cross-validation generator or an iterable, optional
+    cv : int, cross-validation generator or an iterable, default=None
         Determines the cross-validation splitting strategy.
         Possible inputs for cv are:
 
@@ -404,23 +404,23 @@ class RFECV(RFE):
         .. versionchanged:: 0.22
             ``cv`` default value of None changed from 3-fold to 5-fold.
 
-    scoring : string, callable or None, optional, (default=None)
+    scoring : string, callable or None, default=None
         A string (see model evaluation documentation) or
         a scorer callable object / function with signature
         ``scorer(estimator, X, y)``.
 
-    verbose : int, (default=0)
+    verbose : int, default=0
         Controls verbosity of output.
 
-    n_jobs : int or None, optional (default=None)
+    n_jobs : int or None, default=None
         Number of cores to run in parallel while fitting across folds.
         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
         for more details.
 
         .. versionadded:: 0.18
 
-    importance_getter : str or callable, optional (default='auto')
+    importance_getter : str or callable, default='auto'
         If 'auto', uses the feature importance either through a `coef_`
         or `feature_importances_` attributes of estimator.
 
@@ -524,7 +524,7 @@ def fit(self, X, y, groups=None):
             Target values (integers for classification, real numbers for
             regression).
 
-        groups : array-like of shape (n_samples,) or None
+        groups : array-like of shape (n_samples,) or None, default=None
             Group labels for the samples used while splitting the dataset into
             train/test set. Only used in conjunction with a "Group" :term:`cv`
             instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).