From 1e5255f07b8c5a7174ed43db74d5c03b806a074b Mon Sep 17 00:00:00 2001 From: Juan Martin Loyola Date: Sun, 26 Sep 2021 22:55:52 -0300 Subject: [PATCH 1/3] Remove RobustScaler from DOCSTRING_IGNORE_LIST --- maint_tools/test_docstrings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/maint_tools/test_docstrings.py b/maint_tools/test_docstrings.py index 90d70a7c18101..d5b7ee4e740df 100644 --- a/maint_tools/test_docstrings.py +++ b/maint_tools/test_docstrings.py @@ -32,7 +32,6 @@ "PatchExtractor", "PolynomialFeatures", "QuadraticDiscriminantAnalysis", - "RobustScaler", "SelfTrainingClassifier", "SparseRandomProjection", "SpectralBiclustering", From 6adec4d32c0577b8807fb98915d8f92054e963cb Mon Sep 17 00:00:00 2001 From: Juan Martin Loyola Date: Sun, 26 Sep 2021 22:56:28 -0300 Subject: [PATCH 2/3] Fix numpydocs from RobustScaler --- sklearn/preprocessing/_data.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index ecce5e6b6d096..c374bc8e40d82 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -1419,20 +1419,6 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator): .. versionadded:: 1.0 - Examples - -------- - >>> from sklearn.preprocessing import RobustScaler - >>> X = [[ 1., -2., 2.], - ... [ -2., 1., 3.], - ... [ 4., 1., -2.]] - >>> transformer = RobustScaler().fit(X) - >>> transformer - RobustScaler() - >>> transformer.transform(X) - array([[ 0. , -2. , 0. ], - [-1. , 0. , 0.4], - [ 1. , 0. , -1.6]]) - See Also -------- robust_scale : Equivalent function without the estimator API. @@ -1449,6 +1435,20 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator): https://en.wikipedia.org/wiki/Median https://en.wikipedia.org/wiki/Interquartile_range + + Examples + -------- + >>> from sklearn.preprocessing import RobustScaler + >>> X = [[ 1., -2., 2.], + ... [ -2., 1., 3.], + ... [ 4., 1., -2.]] + >>> transformer = RobustScaler().fit(X) + >>> transformer + RobustScaler() + >>> transformer.transform(X) + array([[ 0. , -2. , 0. ], + [-1. , 0. , 0.4], + [ 1. , 0. , -1.6]]) """ def __init__( From 63023eac5f62c169763b61db76abb82a161529f4 Mon Sep 17 00:00:00 2001 From: Juan Martin Loyola Date: Sun, 26 Sep 2021 23:28:11 -0300 Subject: [PATCH 3/3] Change docstrings to maintain consistency --- sklearn/preprocessing/_data.py | 52 ++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index c374bc8e40d82..db865456db7e0 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -1352,7 +1352,7 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator): Centering and scaling happen independently on each feature by computing the relevant statistics on the samples in the training set. Median and interquartile range are then stored to be used on - later data using the ``transform`` method. + later data using the :meth:`transform` method. Standardization of a dataset is a common requirement for many machine learning estimators. Typically this is done by removing the mean @@ -1367,31 +1367,33 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator): Parameters ---------- with_centering : bool, default=True - If True, center the data before scaling. - This will cause ``transform`` to raise an exception when attempted on - sparse matrices, because centering them entails building a dense + If `True`, center the data before scaling. + This will cause :meth:`transform` to raise an exception when attempted + on sparse matrices, because centering them entails building a dense matrix which in common use cases is likely to be too large to fit in memory. with_scaling : bool, default=True - If True, scale the data to interquartile range. + If `True`, scale the data to interquartile range. quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0, \ - default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR - Quantile range used to calculate ``scale_``. + default=(25.0, 75.0) + Quantile range used to calculate `scale_`. By default this is equal to + the IQR, i.e., `q_min` is the first quantile and `q_max` is the third + quantile. .. versionadded:: 0.18 copy : bool, default=True - If False, try to avoid a copy and do inplace scaling instead. + If `False`, try to avoid a copy and do inplace scaling instead. This is not guaranteed to always work inplace; e.g. if the data is not a NumPy array or scipy.sparse CSR matrix, a copy may still be returned. unit_variance : bool, default=False - If True, scale data so that normally distributed features have a + If `True`, scale data so that normally distributed features have a variance of 1. In general, if the difference between the x-values of - ``q_max`` and ``q_min`` for a standard normal distribution is greater + `q_max` and `q_min` for a standard normal distribution is greater than 1, the dataset will be scaled down. If less than 1, the dataset will be scaled up. @@ -1422,10 +1424,8 @@ class RobustScaler(_OneToOneFeatureMixin, TransformerMixin, BaseEstimator): See Also -------- robust_scale : Equivalent function without the estimator API. - - :class:`~sklearn.decomposition.PCA` - Further removes the linear correlation across features with - 'whiten=True'. + sklearn.decomposition.PCA : Further removes the linear correlation across + features with 'whiten=True'. Notes ----- @@ -1475,8 +1475,8 @@ def fit(self, X, y=None): The data used to compute the median and quantiles used for later scaling along the features axis. - y : None - Ignored. + y : Ignored + Not used, present here for API consistency by convention. Returns ------- @@ -1627,32 +1627,34 @@ def robust_scale( The data to center and scale. axis : int, default=0 - axis used to compute the medians and IQR along. If 0, + Axis used to compute the medians and IQR along. If 0, independently scale each feature, otherwise (if 1) scale each sample. with_centering : bool, default=True - If True, center the data before scaling. + If `True`, center the data before scaling. with_scaling : bool, default=True - If True, scale the data to unit variance (or equivalently, + If `True`, scale the data to unit variance (or equivalently, unit standard deviation). - quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0 - default=(25.0, 75.0), == (1st quantile, 3rd quantile), == IQR - Quantile range used to calculate ``scale_``. + quantile_range : tuple (q_min, q_max), 0.0 < q_min < q_max < 100.0,\ + default=(25.0, 75.0) + Quantile range used to calculate `scale_`. By default this is equal to + the IQR, i.e., `q_min` is the first quantile and `q_max` is the third + quantile. .. versionadded:: 0.18 copy : bool, default=True - set to False to perform inplace row normalization and avoid a + Set to `False` to perform inplace row normalization and avoid a copy (if the input is already a numpy array or a scipy.sparse CSR matrix and if axis is 1). unit_variance : bool, default=False - If True, scale data so that normally distributed features have a + If `True`, scale data so that normally distributed features have a variance of 1. In general, if the difference between the x-values of - ``q_max`` and ``q_min`` for a standard normal distribution is greater + `q_max` and `q_min` for a standard normal distribution is greater than 1, the dataset will be scaled down. If less than 1, the dataset will be scaled up.