8000 DOC Ensures that RobustScaler passes numpydoc validation (#21155) · scikit-learn/scikit-learn@0dbfd21 · GitHub
[go: up one dir, main page]

Skip to content

Commit 0dbfd21

Browse files
jmloyolaglemaitre
authored andcommitted
DOC Ensures that RobustScaler passes numpydoc validation (#21155)
1 parent faa1a44 commit 0dbfd21

File tree

2 files changed

+40
-39
lines changed

2 files changed

+40
-39
lines changed

maint_tools/test_docstrings.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
"PatchExtractor",
3232
"PolynomialFeatures",
3333
"QuadraticDiscriminantAnalysis",
34-
"RobustScaler",
3534
"SelfTrainingClassifier",
3635
"SparseRandomProjection",
3736
"SpectralBiclustering",

sklearn/preprocessing/_data.py

Lines changed: 40 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1352,7 +1352,7 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
13521352
Centering and scaling happen independently on each feature by
13531353
computing the relevant statistics on the samples in the training
13541354
set. Median and interquartile range are then stored to be used on
1355-
later data using the ``transform`` method.
1355+
later data using the :meth:`transform` method.
13561356
13571357
Standardization of a dataset is a common requirement for many
13581358
machine learning estimators. Typically this is done by removing the mean
@@ -1367,31 +1367,33 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
13671367
Parameters
13681368
----------
13691369
with_centering : bool, default=True
1370-
If True, center the data before scaling.
1371-
This will cause ``transform`` to raise an exception when attempted on
1372-
sparse matrices, because centering them entails building a dense
1370+
If `True`, center the data before scaling.
1371+
This will cause :meth:`transform` to raise an exception when attempted
1372+
on sparse matrices, because centering them entails building a dense
13731373
matrix which in common use cases is likely to be too large to fit in
13741374
memory.
13751375
13761376
with_scaling : bool, default=True
1377-
If True, scale the data to interquartile range.
1377+
If `True`, scale the data to interquartile range.
13781378
13791379
quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0, \
1380-
default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR
1381-
Quantile range used to calculate ``scale_``.
1380+
default=(25.0, 75.0)
1381+
Quantile range used to calculate `scale_`. By default this is equal to
1382+
the IQR, i.e., `q_min` is the first quantile and `q_max` is the third
1383+
quantile.
13821384
13831385
.. versionadded:: 0.18
13841386
13851387
copy : bool, default=True
1386-
If False, try to avoid a copy and do inplace scaling instead.
1388+
If `False`, try to avoid a copy and do inplace scaling instead.
13871389
This is not guaranteed to always work inplace; e.g. if the data is
13881390
not a NumPy array or scipy.sparse CSR matrix, a copy may still be
13891391
returned.
13901392
13911393
unit_variance : bool, default=False
1392-
If True, scale data so that normally distributed features have a
1394+
If `True`, scale data so that normally distributed features have a
13931395
variance of 1. In general, if the difference between the x-values of
1394-
``q_max`` and ``q_min`` for a standard normal distribution is greater
1396+
`q_max` and `q_min` for a standard normal distribution is greater
13951397
than 1, the dataset will be scaled down. If less than 1, the dataset
13961398
will be scaled up.
13971399
@@ -1419,6 +1421,21 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
14191421
14201422
.. versionadded:: 1.0
14211423
1424+
See Also
1425+
--------
1426+
robust_scale : Equivalent function without the estimator API.
1427+
sklearn.decomposition.PCA : Further removes the linear correlation across
1428+
features with 'whiten=True'.
1429+
1430+
Notes
1431+
-----
1432+
For a comparison of the different scalers, transformers, and normalizers,
1433+
see :ref:`examples/preprocessing/plot_all_scaling.py
1434+
<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
1435+
1436+
https://en.wikipedia.org/wiki/Median
1437+
https://en.wikipedia.org/wiki/Interquartile_range
1438+
14221439
Examples
14231440
--------
14241441
>>> from sklearn.preprocessing import RobustScaler
@@ -1432,23 +1449,6 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator):
14321449
array([[ 0. , -2. , 0. ],
14331450
[-1. , 0. , 0.4],
14341451
[ 1. , 0. , -1.6]])
1435-
1436-
See Also
1437-
--------
1438-
robust_scale : Equivalent function without the estimator API.
1439-
1440-
:class:`~sklearn.decomposition.PCA`
1441-
Further removes the linear correlation across features with
1442-
'whiten=True'.
1443-
1444-
Notes
1445-
-----
1446-
For a comparison of the different scalers, transformers, and normalizers,
1447-
see :ref:`examples/preprocessing/plot_all_scaling.py
1448-
<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
1449-
1450-
https://en.wikipedia.org/wiki/Median
1451-
https://en.wikipedia.org/wiki/Interquartile_range
14521452
"""
14531453

14541454
def __init__(
@@ -1475,8 +1475,8 @@ def fit(self, X, y=None):
14751475
The data used to compute the median and quantiles
14761476
used for later scaling along the features axis.
14771477
1478-
y : None
1479-
Ignored.
1478+
y : Ignored
1479+
Not used, present here for API consistency by convention.
14801480
14811481
Returns
14821482
-------
@@ -1627,32 +1627,34 @@ def robust_scale(
16271627
The data to center and scale.
16281628
16291629
axis : int, default=0
1630-
axis used to compute the medians and IQR along. If 0,
1630+
Axis used to compute the medians and IQR along. If 0,
16311631
independently scale each feature, otherwise (if 1) scale
16321632
each sample.
16331633
16341634
with_centering : bool, default=True
1635-
If True, center the data before scaling.
1635+
If `True`, center the data before scaling.
16361636
16371637
with_scaling : bool, default=True
1638-
If True, scale the data to unit variance (or equivalently,
1638+
If `True`, scale the data to unit variance (or equivalently,
16391639
unit standard deviation).
16401640
1641-
quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0
1642-
default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR
1643-
Quantile range used to calculate ``scale_``.
1641+
quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0,\
1642+
default=(25.0, 75.0)
1643+
Quantile range used to calculate `scale_`. By default this is equal to
1644+
the IQR, i.e., `q_min` is the first quantile and `q_max` is the third
1645+
quantile.
16441646
16451647
.. 9052 versionadded:: 0.18
16461648
16471649
copy : bool, default=True
1648-
set to False to perform inplace row normalization and avoid a
1650+
Set to `False` to perform inplace row normalization and avoid a
16491651
copy (if the input is already a numpy array or a scipy.sparse
16501652
CSR matrix and if axis is 1).
16511653
16521654
unit_variance : bool, default=False
1653-
If True, scale data so that normally distributed features have a
1655+
If `True`, scale data so that normally distributed features have a
16541656
variance of 1. In general, if the difference between the x-values of
1655-
``q_max`` and ``q_min`` for a standard normal distribution is greater
1657+
`q_max` and `q_min` for a standard normal distribution is greater
16561658
than 1, the dataset will be scaled down. If less than 1, the dataset
16571659
will be scaled up.
16581660

0 commit comments

Comments
 (0)
0