lucyleeow
diff --git a/‎sklearn/cross_decomposition/_pls.py
Lines changed: 3 additions & 12 deletions b/‎sklearn/cross_decomposition/_pls.py
Lines changed: 3 additions & 12 deletions
diff --git a/‎sklearn/datasets/_lfw.py
Lines changed: 5 additions & 2 deletions b/‎sklearn/datasets/_lfw.py
Lines changed: 5 additions & 2 deletions
diff --git a/‎sklearn/datasets/_twenty_newsgroups.py
Lines changed: 4 additions & 2 deletions b/‎sklearn/datasets/_twenty_newsgroups.py
Lines changed: 4 additions & 2 deletions
diff --git a/‎sklearn/ensemble/_hist_gradient_boosting/binning.py
Lines changed: 3 additions & 2 deletions b/‎sklearn/ensemble/_hist_gradient_boosting/binning.py
Lines changed: 3 additions & 2 deletions
diff --git a/‎sklearn/linear_model/tests/test_logistic.py
Lines changed: 0 additions & 2 deletions b/‎sklearn/linear_model/tests/test_logistic.py
Lines changed: 0 additions & 2 deletions
diff --git a/‎sklearn/metrics/tests/test_dist_metrics.py
Lines changed: 1 addition & 14 deletions b/‎sklearn/metrics/tests/test_dist_metrics.py
Lines changed: 1 addition & 14 deletions
diff --git a/‎sklearn/neighbors/tests/test_neighbors.py
Lines changed: 7 additions & 9 deletions b/‎sklearn/neighbors/tests/test_neighbors.py
Lines changed: 7 additions & 9 deletions
diff --git a/‎sklearn/preprocessing/_discretization.py
Lines changed: 1 addition & 16 deletions b/‎sklearn/preprocessing/_discretization.py
Lines changed: 1 addition & 16 deletions
diff --git a/‎sklearn/preprocessing/_polynomial.py
Lines changed: 1 addition & 26 deletions b/‎sklearn/preprocessing/_polynomial.py
Lines changed: 1 addition & 26 deletions
diff --git a/‎sklearn/preprocessing/tests/test_discretization.py
Lines changed: 0 additions & 16 deletions b/‎sklearn/preprocessing/tests/test_discretization.py
Lines changed: 0 additions & 16 deletions
@@ -10,7 +10,7 @@
 from numbers import Integral, Real
 
 import numpy as np
-from scipy.linalg import svd
+from scipy.linalg import pinv, svd
 
 from ..base import (
     BaseEstimator,
@@ -24,20 +24,11 @@
 from ..utils import check_array, check_consistent_length
 from ..utils._param_validation import Interval, StrOptions
 from ..utils.extmath import svd_flip
-from ..utils.fixes import parse_version, sp_version
 from ..utils.validation import FLOAT_DTYPES, check_is_fitted, validate_data
 
 __all__ = ["PLSSVD", "PLSCanonical", "PLSRegression"]
 
 
-if sp_version >= parse_version("1.7"):
-    # Starting in scipy 1.7 pinv2 was deprecated in favor of pinv.
-    # pinv now uses the svd to compute the pseudo-inverse.
-    from scipy.linalg import pinv as pinv2
-else:
-    from scipy.linalg import pinv2
-
-
 def _pinv2_old(a):
     # Used previous scipy pinv2 that was updated in:
     # https://github.com/scipy/scipy/pull/10067
@@ -393,11 +384,11 @@ def fit(self, X, y=None, Y=None):
         # Compute transformation matrices (rotations_). See User Guide.
         self.x_rotations_ = np.dot(
             self.x_weights_,
-            pinv2(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),
+            pinv(np.dot(self.x_loadings_.T, self.x_weights_), check_finite=False),
         )
         self.y_rotations_ = np.dot(
             self.y_weights_,
-            pinv2(np.dot(self.y_loadings_.T, self.y_weights_), check_finite=False),
+            pinv(np.
F438
dot(self.y_loadings_.T, self.y_weights_), check_finite=False),
         )
         self.coef_ = np.dot(self.x_rotations_, self.y_loadings_.T)
         self.coef_ = (self.coef_ * self._y_std).T / self._x_std
 
@@ -19,7 +19,6 @@
 
 from ..utils import Bunch
 from ..utils._param_validation import Hidden, Interval, StrOptions, validate_params
-from ..utils.fixes import tarfile_extractall
 from ._base import (
     RemoteFileMetadata,
     _fetch_remote,
@@ -118,7 +117,11 @@ def _check_fetch_lfw(
 
         logger.debug("Decompressing the data archive to %s", data_folder_path)
         with tarfile.open(archive_path, "r:gz") as fp:
-            tarfile_extractall(fp, path=lfw_home)
+            # Use filter="data" to prevent the most dangerous security issues.
+            # For more details, see
+            # https://docs.python.org/3.9/library/tarfile.html#tarfile.TarFile.extractall
+            fp.extractall(path=lfw_home, filter="data")
+
         remove(archive_path)
 
     return lfw_home, data_folder_path
 
@@ -43,7 +43,6 @@
 from ..feature_extraction.text import CountVectorizer
 from ..utils import Bunch, check_random_state
 from ..utils._param_validation import Interval, StrOptions, validate_params
-from ..utils.fixes import tarfile_extractall
 from . import get_data_home, load_files
 from ._base import (
     RemoteFileMetadata,
@@ -82,7 +81,10 @@ def _download_20newsgroups(target_dir, cache_path, n_retries, delay):
 
     logger.debug("Decompressing %s", archive_path)
     with tarfile.open(archive_path, "r:gz") as fp:
-        tarfile_extractall(fp, path=target_dir)
+        # Use filter="data" to prevent the most dangerous security issues.
+        # For more details, see
+        # https://docs.python.org/3.9/library/tarfile.html#tarfile.TarFile.extractall
+        fp.extractall(path=target_dir, filter="data")
 
     with suppress(FileNotFoundError):
         os.remove(archive_path)
 
@@ -14,7 +14,6 @@
 from ...base import BaseEstimator, TransformerMixin
 from ...utils import check_array, check_random_state
 from ...utils._openmp_helpers import _openmp_effective_n_threads
-from ...utils.fixes import percentile
 from ...utils.parallel import Parallel, delayed
 from ...utils.validation import check_is_fitted
 from ._binning import _map_to_bins
@@ -62,7 +61,9 @@ def _find_binning_thresholds(col_data, max_bins):
         # work on a fixed-size subsample of the full data.
         percentiles = np.linspace(0, 100, num=max_bins + 1)
         percentiles = percentiles[1:-1]
-        midpoints = percentile(col_data, percentiles, method="midpoint").astype(X_DTYPE)
+        midpoints = np.percentile(col_data, percentiles, method="midpoint").astype(
+            X_DTYPE
+        )
         assert midpoints.shape[0] == max_bins - 1
 
     # We avoid having +inf thresholds: +inf thresholds are only allowed in
 
     fit_intercept, global_random_seed
 ):
     """Test and compare solver results for unpenalized multinomial multiclass."""
-    # Our use of numpy.random.multinomial requires numpy >= 1.22
-    pytest.importorskip("numpy", minversion="1.22.0")
     # We want to avoid perfect separation.
     n_samples, n_features, n_classes = 100, 4, 3
     rng = np.random.RandomState(global_random_seed)
 
@@ -19,7 +19,7 @@
     create_memmap_backed_data,
     ignore_warnings,
 )
-from sklearn.utils.fixes import CSR_CONTAINERS, parse_version, sp_version
+from sklearn.utils.fixes import CSR_CONTAINERS
 
 
 def dist_func(x1, x2, p):
@@ -81,13 +81,6 @@ def test_cdist(metric_param_grid, X, Y, csr_container):
             # with scipy
             rtol_dict = {"rtol": 1e-6}
 
-        # TODO: Remove when scipy minimum version >= 1.7.0
-        # scipy supports 0<p<1 for minkowski metric >= 1.7.0
-        if metric == "minkowski":
-            p = kwargs["p"]
-            if sp_version < parse_version("1.7.0") and p < 1:
-                pytest.skip("scipy does not support 0<p<1 for minkowski metric < 1.7.0")
-
         D_scipy_cdist = cdist(X, Y, metric, **kwargs)
 
         dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs)
@@ -172,12 +165,6 @@ def test_pdist(metric_param_grid, X, csr_container):
             # with scipy
             rtol_dict = {"rtol": 1e-6}
 
-        # TODO: Remove when scipy minimum version >= 1.7.0
-        # scipy supports 0<p<1 for minkowski metric >= 1.7.0
-        if metric == "minkowski":
-            p = kwargs["p"]
-            if sp_version < parse_version("1.7.0") and p < 1:
-                pytest.skip("scipy does not support 0<p<1 for minkowski metric < 1.7.0")
         D_scipy_pdist = cdist(X, X, metric, **kwargs)
 
         dm = DistanceMetric.get_metric(metric, X.dtype, **kwargs)
 
@@ -54,8 +54,6 @@
     DIA_CONTAINERS,
     DOK_CONTAINERS,
     LIL_CONTAINERS,
-    parse_version,
-    sp_version,
 )
 from sklearn.utils.validation import check_random_state
 
@@ -120,13 +118,13 @@ def _generate_test_params_for(metric: str, n_features: int):
     rng = np.random.RandomState(1)
 
     if metric == "minkowski":
-        minkowski_kwargs = [dict(p=1.5), dict(p=2), dict(p=3), dict(p=np.inf)]
-        if sp_version >= parse_version("1.8.0.dev0"):
-            # TODO: remove the test once we no longer support scipy < 1.8.0.
-            # Recent scipy versions accept weights in the Minkowski metric directly:
-            # type: ignore
-            minkowski_kwargs.append(dict(p=3, w=rng.rand(n_features)))
-        return minkowski_kwargs
+        return [
+            dict(p=1.5),
+            dict(p=2),
+            dict(p=3),
+            dict(p=np.inf),
+            dict(p=3, w=rng.rand(n_features)),
+        ]
 
     if metric == "seuclidean":
         return [dict(V=rng.rand(n_features))]
 
@@ -11,7 +11,6 @@
 from ..utils import resample
 from ..utils._param_validation import Interval, Options, StrOptions
 from ..utils.deprecation import _deprecate_Xt_in_inverse_transform
-from ..utils.fixes import np_version, parse_version
 from ..utils.stats import _averaged_weighted_percentile, _weighted_percentile
 from ..utils.validation import (
     _check_feature_names_in,
@@ -346,26 +345,12 @@ def fit(self, X, y=None, sample_weight=None):
             elif self.strategy == "quantile":
                 percentile_levels = np.linspace(0, 100, n_bins[jj] + 1)
 
-                # TODO: simplify the following when numpy min version >= 1.22.
-
                 # method="linear" is the implicit default for any numpy
                 # version. So we keep it version independent in that case by
                 # using an empty param dict.
                 percentile_kwargs = {}
                 if quantile_method != "linear" and sample_weight is None:
-                    if np_version < parse_version("1.22"):
-                        if quantile_method in ["averaged_inverted_cdf", "inverted_cdf"]:
-                            # The method parameter is not supported in numpy <
-                            # 1.22 but we can define unit sample weight to use
-                            # our own implementation instead:
-                            sample_weight = np.ones(X.shape[0], dtype=X.dtype)
-                        else:
-                            raise ValueError(
-                                f"quantile_method='{quantile_method}' is not "
-                                "supported with numpy < 1.22"
-                            )
-                    else:
-                        percentile_kwargs["method"] = quantile_method
+                    percentile_kwargs["method"] = quantile_method
 
                 if sample_weight is None:
                     bin_edges[jj] = np.asarray(
 
@@ -59,24 +59,6 @@ def _create_expansion(X, interaction_only, deg, n_features, cumulative_size=0):
     needs_int64 = max(max_indices, max_indptr) > max_int32
     index_dtype = np.int64
10000
 if needs_int64 else np.int32
 
-    # This is a pretty specific bug that is hard to work around by a user,
-    # hence we do not detail the entire bug and all possible avoidance
-    # mechnasisms. Instead we recommend upgrading scipy or shrinking their data.
-    cumulative_size += expanded_col
-    if (
-        sp_version < parse_version("1.8.0")
-        and cumulative_size - 1 > max_int32
-        and not needs_int64
-    ):
-        raise ValueError(
-            "In scipy versions `<1.8.0`, the function `scipy.sparse.hstack`"
-            " sometimes produces negative columns when the output shape contains"
-            " `n_cols` too large to be represented by a 32bit signed"
-            " integer. To avoid this error, either use a version"
-            " of scipy `>=1.8.0` or alter the `PolynomialFeatures`"
-            " transformer to produce fewer than 2^31 output features."
-        )
-
     # Result of the expansion, modified in place by the
     # `_csr_polynomial_expansion` routine.
     expanded_data = np.empty(shape=total_nnz, dtype=X.data.dtype)
@@ -657,8 +639,7 @@ class SplineTransformer(TransformerMixin, BaseEstimator):
         may slow down subsequent estimators.
 
     sparse_output : bool, default=False
-        Will return sparse CSR matrix if set True else will return an array. This
-        option is only available with `scipy>=1.8`.
+        Will return sparse CSR matrix if set True else will return an array.
 
         .. versionadded:: 1.2
 
@@ -870,12 +851,6 @@ def fit(self, X, y=None, sample_weight=None):
             elif not np.all(np.diff(base_knots, axis=0) > 0):
                 raise ValueError("knots must be sorted without duplicates.")
 
-        if self.sparse_output and sp_version < parse_version("1.8.0"):
-            raise ValueError(
-                "Option sparse_output=True is only available with scipy>=1.8.0, "
-                f"but here scipy=={sp_version} is used."
-            )
-
         # number of knots for base interval
         n_knots = base_knots.shape[0]
 
 
@@ -13,7 +13,6 @@
     assert_array_equal,
     ignore_warnings,
 )
-from sklearn.utils.fixes import np_version, parse_version
 
 X = [[-2, 1.5, -4, -1], [-1, 2.5, -3, -0.5], [0, 3.5, -2, 0.5], [1, 4.5, -1, 2]]
 
@@ -688,18 +687,3 @@ def test_KBD_inverse_transform_Xt_deprecation(strategy, quantile_method):
 
     with pytest.warns(FutureWarning, match="Xt was renamed X in version 1.5"):
         kbd.inverse_transform(Xt=X)
-
-
-# TODO: remove this test when numpy min version >= 1.22
-@pytest.mark.skipif(
-    condition=np_version >= parse_version("1.22"),
-    reason="newer numpy versions do support the 'method' parameter",
-)
-def test_invalid_quantile_method_on_old_numpy():
-    expected_msg = (
-        "quantile_method='closest_observation' is not supported with numpy < 1.22"
-    )
-    with pytest.raises(ValueError, match=expected_msg):
-        KBinsDiscretizer(
-            quantile_method="closest_observation", strategy="quantile"
-        ).fit(X)