8000 MNT Remove utils.fixes after Python 3.10 bump (#31022) · lucyleeow/scikit-learn@973da6f · GitHub
[go: up one dir, main page]

Skip to content

Commit 973da6f

Browse files
lestevelucyleeow
authored andcommitted
MNT Remove utils.fixes after Python 3.10 bump (scikit-learn#31022)
1 parent 759d453 commit 973da6f

File tree

19 files changed

+43
-292
lines changed

19 files changed

+43
-292
lines changed

sklearn/cross_decomposition/_pls.py

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from numbers import Integral, Real
1111

1212
import numpy as np
13-
from scipy.linalg import svd
13+
from scipy.linalg import pinv, svd
1414

1515
from ..base import (
1616
BaseEstimator,
@@ -24,20 +24,11 @@
2424
from ..utils import check_array, check_consistent_length
2525
from ..utils._param_validation import Interval, StrOptions
2626
from ..utils.extmath import svd_flip
27-
from ..utils.fixes import parse_version, sp_version
2827
from ..utils.validation import FLOAT_DTYPES, check_is_fitted, validate_data
2928

3029
__all__ = ["PLSSVD", "PLSCanonical", "PLSRegression"]
3130

3231

33-
if sp_version >= parse_version("1.7"):
34-
# Starting in scipy 1.7 pinv2 was deprecated in favor of pinv.
35-
# pinv now uses the svd to compute the pseudo-inverse.
36-
from scipy.linalg import pinv as pinv2
37-
else:
38-
from scipy.linalg import pinv2
39-
40-
4132
def _pinv2_old(a):
4233
# Used previous scipy pinv2 that was updated in:
4334
# https://github.com/scipy/scipy/pull/10067
@@ -393,11 +384,11 @@ def fit(self, X, y=None, Y=None):
393384
# Compute transformation matrices (rotations_). See User Guide.
394385
self.x_rotations_ = np.dot(
395386
self.x_weights_,
396-
pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),
387+
pinv(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),
397388
)
398389
self.y_rotations_ = np.dot(
399390
self.y_weights_,
400-
pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),
391+
pinv(np. F438 dot(self.y_loadings_.T, self.y_weights_), check_finite=False),
401392
)
402393
self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
403394
self.coef_ = (self.coef_ * self._y_std).T / self._x_std

sklearn/datasets/_lfw.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919

2020
from ..utils import Bunch
2121
from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
22-
from ..utils.fixes import tarfile_extractall
2322
from ._base import (
2423
RemoteFileMetadata,
2524
_fetch_remote,
@@ -118,7 +117,11 @@ def _check_fetch_lfw(
118117

119118
logger.debug("Decompressing the data archive to %s", data_folder_path)
120119
with tarfile.open(archive_path, "r:gz") as fp:
121-
tarfile_extractall(fp, path=lfw_home)
120+
# Use filter="data" to prevent the most dangerous security issues.
121+
# For more details, see
122+
# https://docs.python.org/3.9/library/tarfile.html#tarfile.TarFile.extractall
123+
fp.extractall(path=lfw_home, filter="data")
124+
122125
remove(archive_path)
123126

124127
return lfw_home, data_folder_path

sklearn/datasets/_twenty_newsgroups.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
from ..feature_extraction.text import CountVectorizer
4444
from ..utils import Bunch, check_random_state
4545
from ..utils._param_validation import Interval, StrOptions, validate_params
46-
from ..utils.fixes import tarfile_extractall
4746
from . import get_data_home, load_files
4847
from ._base import (
4948
RemoteFileMetadata,
@@ -82,7 +81,10 @@ def _download_20newsgroups(target_dir, cache_path, n_retries, delay):
8281

8382
logger.debug("Decompressing %s", archive_path)
8483
with tarfile.open(archive_path, "r:gz") as fp:
85-
tarfile_extractall(fp, path=target_dir)
84+
# Use filter="data" to prevent the most dangerous security issues.
85+
# For more details, see
86+
# https://docs.python.org/3.9/library/tarfile.html#tarfile.TarFile.extractall
87+
fp.extractall(path=target_dir, filter="data")
8688

8789
with suppress(FileNotFoundError):
8890
os.remove(archive_path)

sklearn/ensemble/_hist_gradient_boosting/binning.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
from ...base import BaseEstimator, TransformerMixin
1515
from ...utils import check_array, check_random_state
1616
from ...utils._openmp_helpers import _openmp_effective_n_threads
17-
from ...utils.fixes import percentile
1817
from ...utils.parallel import Parallel, delayed
1918
from ...utils.validation import check_is_fitted
2019
from ._binning import _map_to_bins
@@ -62,7 +61,9 @@ def _find_binning_thresholds(col_data, max_bins):
6261
# work on a fixed-size subsample of the full data.
6362
percentiles = np.linspace(0, 100, num=max_bins + 1)
6463
percentiles = percentiles[1:-1]
65-
midpoints = percentile(col_data, percentiles, method="midpoint").astype(X_DTYPE)
64+
midpoints = np.percentile(col_data, percentiles, method="midpoint").astype(
65+
X_DTYPE
66+
)
6667
assert midpoints.shape[0] == max_bins - 1
6768

6869
# We avoid having +inf thresholds: +inf thresholds are only allowed in

sklearn/linear_model/tests/test_logistic.py

Lines changed: 0 additions & 2 deletions
< 1C6A td data-grid-cell-id="diff-84dc33502e99175d160109f0ee16007297dc398d872a63c977b787287ec4ed00-742-742-0" data-selected="false" role="gridcell" style="background-color:var(--bgColor-accent-muted, var(--color-accent-subtle));flex-grow:1" tabindex="-1" valign="top" class="focusable-grid-cell diff-hunk-cell left-side" colSpan="4">
@@ -743,8 +743,6 @@ def test_logistic_regression_solvers_multiclass_unpenalized(
Original file line numberDiff line numberDiff line change
743743
fit_intercept, global_random_seed
744744
):
745745
"""Test and compare solver results for unpenalized multinomial multiclass."""
746-
# Our use of numpy.random.multinomial requires numpy >= 1.22
747-
pytest.importorskip("numpy", minversion="1.22.0")
748746
# We want to avoid perfect separation.
749747
n_samples, n_features, n_classes = 100, 4, 3
750748
rng = np.random.RandomState(global_random_seed)

sklearn/metrics/tests/test_dist_metrics.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
create_memmap_backed_data,
2020
ignore_warnings,
2121
)
22-
from sklearn.utils.fixes import CSR_CONTAINERS, parse_version, sp_version
22+
from sklearn.utils.fixes import CSR_CONTAINERS
2323

2424

2525
def dist_func(x1, x2, p):
@@ -81,13 +81,6 @@ def test_cdist(metric_param_grid, X, Y, csr_container):
8181
# with scipy
8282
rtol_dict = {"rtol": 1e-6}
8383

84-
# TODO: Remove when scipy minimum version >= 1.7.0
85-
# scipy supports 0<p<1 for minkowski metric >= 1.7.0
86-
if metric == "minkowski":
87-
p = kwargs["p"]
88-
if sp_version < parse_version("1.7.0") and p < 1:
89-
pytest.skip("scipy does not support 0<p<1 for minkowski metric < 1.7.0")
90-
9184
D_scipy_cdist = cdist(X, Y, metric, **kwargs)
9285

9386
dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs)
@@ -172,12 +165,6 @@ def test_pdist(metric_param_grid, X, csr_container):
172165
# with scipy
173166
rtol_dict = {"rtol": 1e-6}
174167

175-
# TODO: Remove when scipy minimum version >= 1.7.0
176-
# scipy supports 0<p<1 for minkowski metric >= 1.7.0
177-
if metric == "minkowski":
178-
p = kwargs["p"]
179-
if sp_version < parse_version("1.7.0") and p < 1:
180-
pytest.skip("scipy does not support 0<p<1 for minkowski metric < 1.7.0")
181168
D_scipy_pdist = cdist(X, X, metric, **kwargs)
182169

183170
dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs)

sklearn/neighbors/tests/test_neighbors.py

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,6 @@
5454
DIA_CONTAINERS,
5555
DOK_CONTAINERS,
5656
LIL_CONTAINERS,
57-
parse_version,
58-
sp_version,
5957
)
6058
from sklearn.utils.validation import check_random_state
6159

@@ -120,13 +118,13 @@ def _generate_test_params_for(metric: str, n_features: int):
120118
rng = np.random.RandomState(1)
121119

122120
if metric == "minkowski":
123-
minkowski_kwargs = [dict(p=1.5), dict(p=2), dict(p=3), dict(p=np.inf)]
124-
if sp_version >= parse_version("1.8.0.dev0"):
125-
# TODO: remove the test once we no longer support scipy < 1.8.0.
126-
# Recent scipy versions accept weights in the Minkowski metric directly:
127-
# type: ignore
128-
minkowski_kwargs.append(dict(p=3, w=rng.rand(n_features)))
129-
return minkowski_kwargs
121+
return [
122+
dict(p=1.5),
123+
dict(p=2),
124+
dict(p=3),
125+
dict(p=np.inf),
126+
dict(p=3, w=rng.rand(n_features)),
127+
]
130128

131129
if metric == "seuclidean":
132130
return [dict(V=rng.rand(n_features))]

sklearn/preprocessing/_discretization.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from ..utils import resample
1212
from ..utils._param_validation import Interval, Options, StrOptions
1313
from ..utils.deprecation import _deprecate_Xt_in_inverse_transform
14-
from ..utils.fixes import np_version, parse_version
1514
from ..utils.stats import _averaged_weighted_percentile, _weighted_percentile
1615
from ..utils.validation import (
1716
_check_feature_names_in,
@@ -346,26 +345,12 @@ def fit(self, X, y=None, sample_weight=None):
346345
elif self.strategy == "quantile":
347346
percentile_levels = np.linspace(0, 100, n_bins[jj] + 1)
348347

349-
# TODO: simplify the following when numpy min version >= 1.22.
350-
351348
# method="linear" is the implicit default for any numpy
352349
# version. So we keep it version independent in that case by
353350
# using an empty param dict.
354351
percentile_kwargs = {}
355352
if quantile_method != "linear" and sample_weight is None:
356-
if np_version < parse_version("1.22"):
357-
if quantile_method in ["averaged_inverted_cdf", "inverted_cdf"]:
358-
# The method parameter is not supported in numpy <
359-
# 1.22 but we can define unit sample weight to use
360-
# our own implementation instead:
361-
sample_weight = np.ones(X.shape[0], dtype=X.dtype)
362-
else:
363-
raise ValueError(
364-
f"quantile_method='{quantile_method}' is not "
365-
"supported with numpy < 1.22"
366-
)
367-
else:
368-
percentile_kwargs["method"] = quantile_method
353+
percentile_kwargs["method"] = quantile_method
369354

370355
if sample_weight is None:
371356
bin_edges[jj] = np.asarray(

sklearn/preprocessing/_polynomial.py

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -59,24 +59,6 @@ def _create_expansion(X, interaction_only, deg, n_features, cumulative_size=0):
5959
needs_int64 = max(max_indices, max_indptr) > max_int32
6060
index_dtype = np.int64 10000 if needs_int64 else np.int32
6161

62-
# This is a pretty specific bug that is hard to work around by a user,
63-
# hence we do not detail the entire bug and all possible avoidance
64-
# mechnasisms. Instead we recommend upgrading scipy or shrinking their data.
65-
cumulative_size += expanded_col
66-
if (
67-
sp_version < parse_version("1.8.0")
68-
and cumulative_size - 1 > max_int32
69-
and not needs_int64
70-
):
71-
raise ValueError(
72-
"In scipy versions `<1.8.0`, the function `scipy.sparse.hstack`"
73-
" sometimes produces negative columns when the output shape contains"
74-
" `n_cols` too large to be represented by a 32bit signed"
75-
" integer. To avoid this error, either use a version"
76-
" of scipy `>=1.8.0` or alter the `PolynomialFeatures`"
77-
" transformer to produce fewer than 2^31 output features."
78-
)
79-
8062
# Result of the expansion, modified in place by the
8163
# `_csr_polynomial_expansion` routine.
8264
expanded_data = np.empty(shape=total_nnz, dtype=X.data.dtype)
@@ -657,8 +639,7 @@ class SplineTransformer(TransformerMixin, BaseEstimator):
657639
may slow down subsequent estimators.
658640
659641
sparse_output : bool, default=False
660-
Will return sparse CSR matrix if set True else will return an array. This
661-
option is only available with `scipy>=1.8`.
642+
Will return sparse CSR matrix if set True else will return an array.
662643
663644
.. versionadded:: 1.2
664645
@@ -870,12 +851,6 @@ def fit(self, X, y=None, sample_weight=None):
870851
elif not np.all(np.diff(base_knots, axis=0) > 0):
871852
raise ValueError("knots must be sorted without duplicates.")
872853

873-
if self.sparse_output and sp_version < parse_version("1.8.0"):
874-
raise ValueError(
875-
"Option sparse_output=True is only available with scipy>=1.8.0, "
876-
f"but here scipy=={sp_version} is used."
877-
)
878-
879854
# number of knots for base interval
880855
n_knots = base_knots.shape[0]
881856

sklearn/preprocessing/tests/test_discretization.py

Lines changed: 0 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
assert_array_equal,
1414
ignore_warnings,
1515
)
16-
from sklearn.utils.fixes import np_version, parse_version
1716

1817
X = [[-2, 1.5, -4, -1], [-1, 2.5, -3, -0.5], [0, 3.5, -2, 0.5], [1, 4.5, -1, 2]]
1918

@@ -688,18 +687,3 @@ def test_KBD_inverse_transform_Xt_deprecation(strategy, quantile_method):
688687

689688
with pytest.warns(FutureWarning, match="Xt was renamed X in version 1.5"):
690689
kbd.inverse_transform(Xt=X)
691-
692-
693-
# TODO: remove this test when numpy min version >= 1.22
694-
@pytest.mark.skipif(
695-
condition=np_version >= parse_version("1.22"),
696-
reason="newer numpy versions do support the 'method' parameter",
697-
)
698-
def test_invalid_quantile_method_on_old_numpy():
699-
expected_msg = (
700-
"quantile_method='closest_observation' is not supported with numpy < 1.22"
701-
)
702-
with pytest.raises(ValueError, match=expected_msg):
703-
KBinsDiscretizer(
704-
quantile_method="closest_observation", strategy="quantile"
705-
).fit(X)

0 commit comments

Comments
 (0)
0