10000 DOC Standarize default documentation for feature_selection (#17465) · viclafargue/scikit-learn@4848e5d · GitHub
[go: up one dir, main page]

Skip to content

Commit 4848e5d

Browse files
kohakukunviclafargue
authored andcommitted
DOC Standarize default documentation for feature_selection (scikit-learn#17465)
* doc: Standarize default documentation for feature_selection - Change `default` documentation of parameters to follow `default=<value>` template * remove space at end-of-line * fix linting issues * address comments * missing file
1 parent 535a1b7 commit 4848e5d

File tree

6 files changed

+80
-66
lines changed

6 files changed

+80
-66
lines changed

sklearn/feature_selection/_base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ def get_support(self, indices=False):
3232
3333
Parameters
3434
----------
35-
indices : boolean (default False)
35+
indices : bool, default=False
3636
If True, the return value will be an array of integers, rather
3737
than a boolean mask.
3838

sklearn/feature_selection/_from_model.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
7373
or a non-fitted estimator. The estimator must have either a
7474
``feature_importances_`` or ``coef_`` attribute after fitting.
7575
76-
threshold : string, float, optional default None
76+
threshold : string or float, default=None
7777
The threshold value to use for feature selection. Features whose
7878
importance is greater or equal are kept while the others are
7979
discarded. If "median" (resp. "mean"), then the ``threshold`` value is
@@ -83,26 +83,26 @@ class SelectFromModel(MetaEstimatorMixin, SelectorMixin, BaseEstimator):
8383
or implicitly (e.g, Lasso), the threshold used is 1e-5.
8484
Otherwise, "mean" is used by default.
8585
86-
prefit : bool, default False
86+
prefit : bool, default=False
8787
Whether a prefit model is expected to be passed into the constructor
8888
directly or not. If True, ``transform`` must be called directly
8989
and SelectFromModel cannot be used with ``cross_val_score``,
9090
``GridSearchCV`` and similar utilities that clone the estimator.
9191
Otherwise train the model using ``fit`` and then ``transform`` to do
9292
feature selection.
9393
94-
norm_order : non-zero int, inf, -inf, default 1
94+
norm_order : non-zero int, inf, -inf, default=1
9595
Order of the norm used to filter the vectors of coefficients below
9696
``threshold`` in the case where the ``coef_`` attribute of the
9797
estimator is of dimension 2.
9898
99-
max_features : int or None, optional
99+
max_features : int, default=None
100100
The maximum number of features to select.
101101
To only select based on ``max_features``, set ``threshold=-np.inf``.
102102
103103
.. versionadded:: 0.20
104104
105-
importance_getter : str or callable, optional (default='auto')
105+
importance_getter : str or callable, default='auto'
106106
If 'auto', uses the feature importance either through a ``coef_``
107107
attribute or ``feature_importances_`` attribute of estimator.
108108
@@ -198,7 +198,7 @@ def fit(self, X, y=None, **fit_params):
198198
X : array-like of shape (n_samples, n_features)
199199
The training input samples.
200200
201-
y : array-like, shape (n_samples,)
201+
y : array-like of shape (n_samples,), default=None
202202
The target values (integers that correspond to classes in
203203
classification, real numbers in regression).
204204
@@ -242,7 +242,7 @@ def partial_fit(self, X, y=None, **fit_params):
242242
X : array-like of shape (n_samples, n_features)
243243
The training input samples.
244244
245-
y : array-like, shape (n_samples,)
245+
y : array-like of shape (n_samples,), default=None
246246
The target values (integers that correspond to classes in
247247
classification, real numbers in regression).
248248

sklearn/feature_selection/_mutual_info.py

Lines changed: 25 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ def _iterate_columns(X, columns=None):
171171
X : ndarray or csc_matrix, shape (n_samples, n_features)
172172
Matrix over which to iterate.
173173
174-
columns : iterable or None, default None
174+
columns : iterable or None, default=None
175175
Indices of columns to iterate over. If None, iterate over all columns.
176176
177177
Yields
@@ -202,29 +202,29 @@ def _estimate_mi(X, y, discrete_features='auto', discrete_target=False,
202202
X : array-like or sparse matrix, shape (n_samples, n_features)
203203
Feature matrix.
204204
205-
y : array-like, shape (n_samples,)
205+
y : array-like of shape (n_samples,)
206206
Target vector.
207207
208-
discrete_features : {'auto', bool, array-like}, default 'auto'
208+
discrete_features : {'auto', bool, array-like}, default='auto'
209209
If bool, then determines whether to consider all features discrete
210210
or continuous. If array, then it should be either a boolean mask
211211
with shape (n_features,) or array with indices of discrete features.
212212
If 'auto', it is assigned to False for dense `X` and to True for
213213
sparse `X`.
214214
215-
discrete_target : bool, default False
215+
discrete_target : bool, default=False
216216
Whether to consider `y` as a discrete variable.
217217
218-
n_neighbors : int, default 3
218+
n_neighbors : int, default=3
219219
Number of neighbors to use for MI estimation for continuous variables,
220220
see [1]_ and [2]_. Higher values reduce variance of the estimation, but
221221
could introduce a bias.
222222
223-
copy : bool, default True
223+
copy : bool, default=True
224224
Whether to make a copy of the given data. If set to False, the initial
225225
data will be overwritten.
226226
227-
random_state : int, RandomState instance or None, optional, default None
227+
random_state : int, RandomState instance or None, default=None
228228
Determines random number generation for adding small noise to
229229
continuous variables in order to remove repeated values.
230230
Pass an int for reproducible results across multiple function calls.
@@ -313,26 +313,26 @@ def mutual_info_regression(X, y, *, discrete_features='auto', n_neighbors=3,
313313
X : array-like or sparse matrix, shape (n_samples, n_features)
314314
Feature matrix.
315315
316-
y : array-like, shape (n_samples,)
316+
y : array-like of shape (n_samples,)
317317
Target vector.
318318
319-
discrete_features : {'auto', bool, array-like}, default 'auto'
319+
discrete_features : {'auto', bool, array-like}, default='auto'
320320
If bool, then determines whether to consider all features discrete
321321
or continuous. If array, then it should be either a boolean mask
322322
with shape (n_features,) or array with indices of discrete features.
323323
If 'auto', it is assigned to False for dense `X` and to True for
324324
sparse `X`.
325325
326-
n_neighbors : int, default 3
326+
n_neighbors : int, default=3
327327
Number of neighbors to use for MI estimation for continuous variables,
328328
see [2]_ and [3]_. Higher values reduce variance of the estimation, but
329329
could introduce a bias.
330330
331-
copy : bool, default True
331+
copy : bool, default=True
332332
Whether to make a copy of the given data. If set to False, the initial
333333
data will be overwritten.
334334
335-
random_state : int, RandomState instance or None, optional, default None
335+
random_state : int, RandomState instance or None, default=None
336336
Determines random number generation for adding small noise to
337337
continuous variables in order to remove repeated values.
338338
Pass an int for reproducible results across multiple function calls.
@@ -350,13 +350,15 @@ def mutual_info_regression(X, y, *, discrete_features='auto', n_neighbors=3,
350350
For example, pixel intensities of an image are discrete features
351351
(but hardly categorical) and you will get better results if mark them
352352
as such. Also note, that treating a continuous variable as discrete and
353-
vice versa will usually give incorrect results, so be attentive about that.
353+
vice versa will usually give incorrect results, so be attentive about
354+
that.
354355
2. True mutual information can't be negative. If its estimate turns out
355356
to be negative, it is replaced by zero.
356357
357358
References
358359
----------
359-
.. [1] `Mutual Information <https://en.wikipedia.org/wiki/Mutual_information>`_
360+
.. [1] `Mutual Information
361+
<https://en.wikipedia.org/wiki/Mutual_information>`_
360362
on Wikipedia.
361363
.. [2] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
362364
information". Phys. Rev. E 69, 2004.
@@ -391,26 +393,26 @@ def mutual_info_classif(X, y, *, discrete_features='auto', n_neighbors=3,
391393
X : array-like or sparse matrix, shape (n_samples, n_features)
392394
Feature matrix.
393395
394-
y : array-like, shape (n_samples,)
396+
y : array-like of shape (n_samples,)
395397
Target vector.
396398
397-
discrete_features : {'auto', bool, array-like}, default 'auto'
399+
discrete_features : {'auto', bool, array-like}, default='auto'
398400
If bool, then determines whether to consider all features discrete
399401
or continuous. If array, then it should be either a boolean mask
400402
with shape (n_features,) or array with indices of discrete features.
401403
If 'auto', it is assigned to False for dense `X` and to True for
402404
sparse `X`.
403405
404-
n_neighbors : int, default 3
406+
n_neighbors : int, default=3
405407
Number of neighbors to use for MI estimation for continuous variables,
406408
see [2]_ and [3]_. Higher values reduce variance of the estimation, but
407409
could introduce a bias.
408410
409-
copy : bool, default True
411+
copy : bool, default=True
410412
Whether to make a copy of the given data. If set to False, the initial
411413
data will be overwritten.
412414
413-
random_state : int, RandomState instance or None, optional, default None
415+
random_state : int, RandomState instance or None, default=None
414416
Determines random number generation for adding small noise to
415417
continuous variables in order to remove repeated values.
416418
Pass an int for reproducible results across multiple function calls.
@@ -428,13 +430,15 @@ def mutual_info_classif(X, y, *, discrete_features='auto', n_neighbors=3,
428430
For example, pixel intensities of an image are discrete features
429431
(but hardly categorical) and you will get better results if mark them
430432
as such. Also note, that treating a continuous variable as discrete and
431-
vice versa will usually give incorrect results, so be attentive about that.
433+
vice versa will usually give incorrect results, so be attentive about
434+
that.
432435
2. True mutual information can't be negative. If its estimate turns out
433436
to be negative, it is replaced by zero.
434437
435438
References
436439
----------
437-
.. [1] `Mutual Information <https://en.wikipedia.org/wiki/Mutual_information>`_
440+
.. [1] `Mutual Information
441+
<https://en.wikipedia.org/wiki/Mutual_information>`_
438442
on Wikipedia.
439443
.. [2] A. Kraskov, H. Stogbauer and P. Grassberger, "Estimating mutual
440444
information". Phys. Rev. E 69, 2004.

sklearn/feature_selection/_rfe.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,17 +57,17 @@ class RFE(SelectorMixin, MetaEstimatorMixin, BaseEstimator):
5757
information about feature importance
5858
(e.g. `coef_`, `feature_importances_`).
5959
60-
n_features_to_select : int or None (default=None)
60+
n_features_to_select : int or None, default=None
6161
The number of features to select. If `None`, half of the features
6262
are selected.
6363
64-
step : int or float, optional (default=1)
64+
step : int or float, default=1
6565
If greater than or equal to 1, then ``step`` corresponds to the
6666
(integer) number of features to remove at each iteration.
6767
If within (0.0, 1.0), then ``step`` corresponds to the percentage
6868
(rounded down) of features to remove at each iteration.
6969
70-
verbose : int, (default=0)
70+
verbose : int, default=0
7171
Controls verbosity of output.
7272
7373
importance_getter : str or callable, default='auto'
@@ -368,23 +368,23 @@ class RFECV(RFE):
368368
information about feature importance either through a ``coef_``
369369
attribute or through a ``feature_importances_`` attribute.
370370
371-
step : int or float, optional (default=1)
371+
step : int or float, default=1
372372
If greater than or equal to 1, then ``step`` corresponds to the
373373
(integer) number of features to remove at each iteration.
374374
If within (0.0, 1.0), then ``step`` corresponds to the percentage
375375
(rounded down) of features to remove at each iteration.
376376
Note that the last iteration may remove fewer than ``step`` features in
377377
order to reach ``min_features_to_select``.
378378
379-
min_features_to_select : int, (default=1)
379+
min_features_to_select : int, default=1
380380
The minimum number of features to be selected. This number of features
381381
will always be scored, even if the difference between the original
382382
feature count and ``min_features_to_select`` isn't divisible by
383383
``step``.
384384
385385
.. versionadded:: 0.20
386386
387-
cv : int, cross-validation generator or an iterable, optional
387+
cv : int, cross-validation generator or an iterable, default=None
388388
Determines the cross-validation splitting strategy.
389389
Possible inputs for cv are:
390390
@@ -404,23 +404,23 @@ class RFECV(RFE):
404404
.. versionchanged:: 0.22
405405
``cv`` default value of None changed from 3-fold to 5-fold.
406406
407-
scoring : string, callable or None, optional, (default=None)
407+
scoring : string, callable or None, default=None
408408
A string (see model evaluation documentation) or
409409
a scorer callable object / function with signature
410410
``scorer(estimator, X, y)``.
411411
412-
verbose : int, (default=0)
412+
verbose : int, default=0
413413
Controls verbosity of output.
414414
415-
n_jobs : int or None, optional (default=None)
415+
n_jobs : int or None, default=None
416416
Number of cores to run in parallel while fitting across folds.
417417
``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
418418
``-1`` means using all processors. See :term:`Glossary <n_jobs>`
419419
for more details.
420420
421421
.. versionadded:: 0.18
422422
423-
importance_getter : str or callable, optional (default='auto')
423+
importance_getter : str or callable, default='auto'
424424
If 'auto', uses the feature importance either through a `coef_`
425425
or `feature_importances_` attributes of estimator.
426426
@@ -524,7 +524,7 @@ def fit(self, X, y, groups=None):
524524
Target values (integers for classification, real numbers for
525525
regression).
526526
527-
groups : array-like of shape (n_samples,) or None
527+
groups : array-like of shape (n_samples,) or None, default=None
528528
Group labels for the samples used while splitting the dataset into
529529
train/test set. Only used in conjunction with a "Group" :term:`cv`
530530
instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).

0 commit comments

Comments
 (0)
0