8000 CLN some code cleansing in preprocessing by lorentzenchr · Pull Request #18686 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

CLN some code cleansing in preprocessing #18686

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Aug 31, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 13 additions & 23 deletions sklearn/preprocessing/_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
from ..base import BaseEstimator, TransformerMixin
from ..utils import check_array
from ..utils.deprecation import deprecated
from ..utils.extmath import row_norms
from ..utils.extmath import _incremental_mean_and_var
from ..utils.extmath import _incremental_mean_and_var, row_norms
from ..utils.sparsefuncs_fast import (
inplace_csr_row_normalize_l1,
inplace_csr_row_normalize_l2,
Expand All @@ -40,6 +39,7 @@

from ._encoders import OneHotEncoder


BOUNDS_THRESHOLD = 1e-7

__all__ = [
Expand Down Expand Up @@ -378,7 +378,6 @@ def _reset(self):

__init__ parameters are not touched.
"""

# Checking one attribute is enough, becase they are all set together
# in partial_fit
if hasattr(self, "scale_"):
Expand Down Expand Up @@ -406,7 +405,6 @@ def fit(self, X, y=None):
self : object
Fitted scaler.
"""

# Reset internal state before fitting
self._reset()
return self.partial_fit(X, y)
Expand Down Expand Up @@ -602,7 +600,7 @@ def minmax_scale(X, feature_range=(0, 1), *, axis=0, copy=True):
For a comparison of the different scalers, transformers, and normalizers,
see :ref:`examples/preprocessing/plot_all_scaling.py
<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
""" # noqa
"""
# Unlike the scaler object, this function allows 1d input.
# If copy is required, it will be done inside the scaler object.
X = check_array(
Expand Down Expand Up @@ -748,7 +746,7 @@ class StandardScaler(TransformerMixin, BaseEstimator):
[ 1. 1.]]
>>> print(scaler.transform([[2, 2]]))
[[3. 3.]]
""" # noqa
"""

def __init__(self, *, copy=True, with_mean=True, with_std=True):
self.with_mean = with_mean
Expand All @@ -760,7 +758,6 @@ def _reset(self):

__init__ parameters are not touched.
"""

# Checking one attribute is enough, becase they are all set together
# in partial_fit
if hasattr(self, "scale_"):
Expand Down Expand Up @@ -792,14 +789,12 @@ def fit(self, X, y=None, sample_weight=None):
self : object
Fitted scaler.
"""

# Reset internal state before fitting
self._reset()
return self.partial_fit(X, y, sample_weight)

def partial_fit(self, X, y=None, sample_weight=None):
"""
Online computation of mean and std on X for later scaling.
"""Online computation of mean and std on X for later scaling.

All of X is processed as a single batch. This is intended for cases
when :meth:`fit` is not feasible due to very large number of
Expand Down Expand Up @@ -1108,7 +1103,6 @@ def _reset(self):

__init__ parameters are not touched.
"""

# Checking one attribute is enough, becase they are all set together
# in partial_fit
if hasattr(self, "scale_"):
Expand Down Expand Up @@ -1138,8 +1132,7 @@ def fit(self, X, y=None):
return self.partial_fit(X, y)

def partial_fit(self, X, y=None):
"""
Online computation of max absolute value of X for later scaling.
"""Online computation of max absolute value of X for later scaling.

All of X is processed as a single batch. This is intended for cases
when :meth:`fit` is not feasible due to very large number of
Expand Down Expand Up @@ -1277,8 +1270,8 @@ def maxabs_scale(X, *, axis=0, copy=True):

.. warning:: Risk of data leak

Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know what
you are doing. A common mistake is to apply it to the entire data
Do not use :func:`~sklearn.preprocessing.maxabs_scale` unless you know
what you are doing. A common mistake is to apply it to the entire data
*before* splitting into training and test sets. This will bias the
model evaluation because information would have leaked from the test
set to the training set.
Expand All @@ -1301,7 +1294,7 @@ def maxabs_scale(X, *, axis=0, copy=True):
For a comparison of the different scalers, transformers, and normalizers,
see :ref:`examples/preprocessing/plot_all_scaling.py
<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
""" # noqa
"""
# Unlike the scaler object, this function allows 1d input.

# If copy is required, it will be done inside the scaler object.
Expand Down Expand Up @@ -1551,7 +1544,7 @@ def transform(self, X):
return X

def inverse_transform(self, X):
"""Scale back the data to the original representation
"""Scale back the data to the original representation.

Parameters
----------
Expand Down Expand Up @@ -1597,7 +1590,7 @@ def robust_scale(
copy=True,
unit_variance=False,
):
"""Standardize a dataset along any axis
"""Standardize a dataset along any axis.

Center to the median and component wise scale
according to the interquartile range.
Expand Down Expand Up @@ -1759,7 +1752,6 @@ def normalize(X, norm="l2", *, axis=1, copy=True, return_norm=False):
For a comparison of the different scalers, transformers, and normalizers,
see :ref:`examples/preprocessing/plot_all_scaling.py
<sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.

"""
if norm not in ("l1", "l2", "max"):
raise ValueError("'%s' is not a supported norm" % norm)
Expand Down Expand Up @@ -1888,7 +1880,7 @@ def __init__(self, norm="l2", *, copy=True):
self.copy = copy

def fit(self, X, y=None):
"""Do nothing and return the estimator unchanged
"""Do nothing and return the estimator unchanged.

This method is just there to implement the usual API and hence
work in pipelines.
Expand All @@ -1910,7 +1902,7 @@ def fit(self, X, y=None):
return self

def transform(self, X, copy=None):
"""Scale each non zero row of X to unit norm
"""Scale each non zero row of X to unit norm.

Parameters
----------
Expand Down Expand Up @@ -2191,7 +2183,6 @@ def fit(self, K, y=None):
self : object
Returns the instance itself.
"""

K = self._validate_data(K, dtype=FLOAT_DTYPES)

if K.shape[0] != K.shape[1]:
Expand Down Expand Up @@ -2677,7 +2668,6 @@ def _transform(self, X, inverse=False):
X : ndarray of shape (n_samples, n_features)
Projected data.
"""

if sparse.issparse(X):
for feature_idx in range(X.shape[1]):
column_slice = slice(X.indptr[feature_idx], X.indptr[feature_idx + 1])
Expand Down
6 changes: 2 additions & 4 deletions sklearn/preprocessing/_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,7 @@

from ..utils.sparsefuncs import min_max_axis
from ..utils import column_or_1d
from ..utils.validation import check_array
from ..utils.validation import check_is_fitted
from ..utils.validation import _num_samples
from ..utils.validation import _num_samples, check_array, check_is_fitted
from ..utils.multiclass import unique_labels
from ..utils.multiclass import type_of_target
from ..utils._encode import _encode, _unique
Expand Down Expand Up @@ -845,7 +843,7 @@ def _build_cache(self):
return self._cached_dict

def _transform(self, y, class_mapping):
"""Transforms the label sets with a given mapping
"""Transforms the label sets with a given mapping.

Parameters
----------
Expand Down
0