diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index 92b9f78139a4f..0fda258fe1cc2 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -19,8 +19,7 @@ from ..base import BaseEstimator, TransformerMixin from ..utils import check_array from ..utils.deprecation import deprecated -from ..utils.extmath import row_norms -from ..utils.extmath import _incremental_mean_and_var +from ..utils.extmath import _incremental_mean_and_var, row_norms from ..utils.sparsefuncs_fast import ( inplace_csr_row_normalize_l1, inplace_csr_row_normalize_l2, @@ -40,6 +39,7 @@ from ._encoders import OneHotEncoder + BOUNDS_THRESHOLD = 1e-7 __all__ = [ @@ -378,7 +378,6 @@ def _reset(self): __init__ parameters are not touched. """ - # Checking one attribute is enough, becase they are all set together # in partial_fit if hasattr(self, "scale_"): @@ -406,7 +405,6 @@ def fit(self, X, y=None): self : object Fitted scaler. """ - # Reset internal state before fitting self._reset() return self.partial_fit(X, y) @@ -602,7 +600,7 @@ def minmax_scale(X, feature_range=(0, 1), *, axis=0, copy=True): For a comparison of the different scalers, transformers, and normalizers, see :ref:`examples/preprocessing/plot_all_scaling.py `. - """ # noqa + """ # Unlike the scaler object, this function allows 1d input. # If copy is required, it will be done inside the scaler object. X = check_array( @@ -748,7 +746,7 @@ class StandardScaler(TransformerMixin, BaseEstimator): [ 1. 1.]] >>> print(scaler.transform([[2, 2]])) [[3. 3.]] - """ # noqa + """ def __init__(self, *, copy=True, with_mean=True, with_std=True): self.with_mean = with_mean @@ -760,7 +758,6 @@ def _reset(self): __init__ parameters are not touched. """ - # Checking one attribute is enough, becase they are all set together # in partial_fit if hasattr(self, "scale_"): @@ -792,14 +789,12 @@ def fit(self, X, y=None, sample_weight=None): self : object Fitted scaler. """ - # Reset internal state before fitting self._reset() return self.partial_fit(X, y, sample_weight) def partial_fit(self, X, y=None, sample_weight=None): - """ - Online computation of mean and std on X for later scaling. + """Online computation of mean and std on X for later scaling. All of X is processed as a single batch. This is intended for cases when :meth:`fit` is not feasible due to very large number of @@ -1108,7 +1103,6 @@ def _reset(self): __init__ parameters are not touched. """ - # Checking one attribute is enough, becase they are all set together # in partial_fit if hasattr(self, "scale_"): @@ -1138,8 +1132,7 @@ def fit(self, X, y=None): return self.partial_fit(X, y) def partial_fit(self, X, y=None): - """ - Online computation of max absolute value of X for later scaling. + """Online computation of max absolute value of X for later scaling. All of X is processed as a single batch. This is intended for cases when :meth:`fit` is not feasible due to very large number of @@ -1277,8 +1270,8 @@ def maxabs_scale(X, *, axis=0, copy=True): .. warning:: Risk of data leak - Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know what - you are doing. A common mistake is to apply it to the entire data + Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know + what you are doing. A common mistake is to apply it to the entire data *before* splitting into training and test sets. This will bias the model evaluation because information would have leaked from the test set to the training set. @@ -1301,7 +1294,7 @@ def maxabs_scale(X, *, axis=0, copy=True): For a comparison of the different scalers, transformers, and normalizers, see :ref:`examples/preprocessing/plot_all_scaling.py `. - """ # noqa + """ # Unlike the scaler object, this function allows 1d input. # If copy is required, it will be done inside the scaler object. @@ -1551,7 +1544,7 @@ def transform(self, X): return X def inverse_transform(self, X): - """Scale back the data to the original representation + """Scale back the data to the original representation. Parameters ---------- @@ -1597,7 +1590,7 @@ def robust_scale( copy=True, unit_variance=False, ): - """Standardize a dataset along any axis + """Standardize a dataset along any axis. Center to the median and component wise scale according to the interquartile range. @@ -1759,7 +1752,6 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False): For a comparison of the different scalers, transformers, and normalizers, see :ref:`examples/preprocessing/plot_all_scaling.py `. - """ if norm not in ("l1", "l2", "max"): raise ValueError("'%s' is not a supported norm" % norm) @@ -1888,7 +1880,7 @@ def __init__(self, norm="l2", *, copy=True): self.copy = copy def fit(self, X, y=None): - """Do nothing and return the estimator unchanged + """Do nothing and return the estimator unchanged. This method is just there to implement the usual API and hence work in pipelines. @@ -1910,7 +1902,7 @@ def fit(self, X, y=None): return self def transform(self, X, copy=None): - """Scale each non zero row of X to unit norm + """Scale each non zero row of X to unit norm. Parameters ---------- @@ -2191,7 +2183,6 @@ def fit(self, K, y=None): self : object Returns the instance itself. """ - K = self._validate_data(K, dtype=FLOAT_DTYPES) if K.shape[0] != K.shape[1]: @@ -2677,7 +2668,6 @@ def _transform(self, X, inverse=False): X : ndarray of shape (n_samples, n_features) Projected data. """ - if sparse.issparse(X): for feature_idx in range(X.shape[1]): column_slice = slice(X.indptr[feature_idx], X.indptr[feature_idx + 1]) diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index d460dd3c3f9ab..b5c7d7c117e21 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -18,9 +18,7 @@ from ..utils.sparsefuncs import min_max_axis from ..utils import column_or_1d -from ..utils.validation import check_array -from ..utils.validation import check_is_fitted -from ..utils.validation import _num_samples +from ..utils.validation import _num_samples, check_array, check_is_fitted from ..utils.multiclass import unique_labels from ..utils.multiclass import type_of_target from ..utils._encode import _encode, _unique @@ -845,7 +843,7 @@ def _build_cache(self): return self._cached_dict def _transform(self, y, class_mapping): - """Transforms the label sets with a given mapping + """Transforms the label sets with a given mapping. Parameters ----------