From 451dbea8c3586ca702b4090a29bdc87c27ffc5bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Wed, 23 Aug 2023 11:20:08 +0200
Subject: [PATCH 1/3] MAINT: Remove np.in1d and np.trapz usages

---
 .../plot_label_propagation_digits_active_learning.py         | 2 +-
 sklearn/datasets/_twenty_newsgroups.py                       | 2 +-
 sklearn/feature_extraction/image.py                          | 4 +++-
 sklearn/metrics/_plot/tests/test_precision_recall_display.py | 5 +++--
 sklearn/metrics/_plot/tests/test_roc_curve_display.py        | 5 +++--
 sklearn/metrics/_ranking.py                                  | 5 +++--
 sklearn/model_selection/_split.py                            | 4 ++--
 sklearn/model_selection/tests/test_search.py                 | 2 +-
 sklearn/model_selection/tests/test_split.py                  | 4 ++--
 sklearn/naive_bayes.py                                       | 2 +-
 sklearn/preprocessing/_label.py                              | 2 +-
 sklearn/tests/test_isotonic.py                               | 2 +-
 sklearn/utils/_encode.py                                     | 2 +-
 sklearn/utils/class_weight.py                                | 4 ++--
 14 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
index 215655a287c2d..efd953faa88d6 100644
--- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
+++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
@@ -79,7 +79,7 @@
     # select up to 5 digit examples that the classifier is most uncertain about
     uncertainty_index = np.argsort(pred_entropies)[::-1]
     uncertainty_index = uncertainty_index[
-        np.in1d(uncertainty_index, unlabeled_indices)
+        np.isin(uncertainty_index, unlabeled_indices).ravel()
     ][:5]
 
     # keep track of indices that we get labels for
diff --git a/sklearn/datasets/_twenty_newsgroups.py b/sklearn/datasets/_twenty_newsgroups.py
index 95a7274c20f75..637cf8e4fc8d4 100644
--- a/sklearn/datasets/_twenty_newsgroups.py
+++ b/sklearn/datasets/_twenty_newsgroups.py
@@ -319,7 +319,7 @@ def fetch_20newsgroups(
         # Sort the categories to have the ordering of the labels
         labels.sort()
         labels, categories = zip(*labels)
-        mask = np.in1d(data.target, labels)
+        mask = np.isin(data.target, labels)
         data.filenames = data.filenames[mask]
         data.target = data.target[mask]
         # searchsorted to have continuous labels
diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index 1ef3895fe2818..13d97f5a80e4e 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -76,7 +76,9 @@ def _mask_edges_weights(mask, edges, weights=None):
     """Apply a mask to edges (weighted or not)"""
     inds = np.arange(mask.size)
     inds = inds[mask.ravel()]
-    ind_mask = np.logical_and(np.in1d(edges[0], inds), np.in1d(edges[1], inds))
+    ind_mask = np.logical_and(
+        np.isin(edges[0], inds).ravel(), np.isin(edges[1], inds).ravel()
+    )
     edges = edges[:, ind_mask]
     if weights is not None:
         weights = weights[ind_mask]
diff --git a/sklearn/metrics/_plot/tests/test_precision_recall_display.py b/sklearn/metrics/_plot/tests/test_precision_recall_display.py
index 3ca94bd96dbe3..772f4eac10ea1 100644
--- a/sklearn/metrics/_plot/tests/test_precision_recall_display.py
+++ b/sklearn/metrics/_plot/tests/test_precision_recall_display.py
@@ -2,6 +2,7 @@
 
 import numpy as np
 import pytest
+from scipy.integrate import trapz as trapezoid
 
 from sklearn.compose import make_column_transformer
 from sklearn.datasets import load_breast_cancer, make_classification
@@ -286,7 +287,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
     # we should obtain the statistics of the "cancer" class
     avg_prec_limit = 0.65
     assert display.average_precision < avg_prec_limit
-    assert -np.trapz(display.precision, display.recall) < avg_prec_limit
+    assert -trapezoid(display.precision, display.recall) < avg_prec_limit
 
     # otherwise we should obtain the statistics of the "not cancer" class
     if constructor_name == "from_estimator":
@@ -305,7 +306,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
         )
     avg_prec_limit = 0.95
     assert display.average_precision > avg_prec_limit
-    assert -np.trapz(display.precision, display.recall) > avg_prec_limit
+    assert -trapezoid(display.precision, display.recall) > avg_prec_limit
 
 
 @pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"])
diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
index b87005e877b77..1cc6bcc7b28b3 100644
--- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py
+++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose
+from scipy.integrate import trapz as trapezoid
 
 from sklearn.compose import make_column_transformer
 from sklearn.datasets import load_breast_cancer, load_iris
@@ -290,7 +291,7 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
     roc_auc_limit = 0.95679
 
     assert display.roc_auc == pytest.approx(roc_auc_limit)
-    assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
+    assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
 
     if constructor_name == "from_estimator":
         display = RocCurveDisplay.from_estimator(
@@ -308,4 +309,4 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
         )
 
     assert display.roc_auc == pytest.approx(roc_auc_limit)
-    assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
+    assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 166c2ce20eb87..3835c547d7717 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -24,6 +24,7 @@
 from numbers import Integral, Real
 
 import numpy as np
+from scipy.integrate import trapz as trapezoid
 from scipy.sparse import csr_matrix, issparse
 from scipy.stats import rankdata
 
@@ -104,9 +105,9 @@ def auc(x, y):
         else:
             raise ValueError("x is neither increasing nor decreasing : {}.".format(x))
 
-    area = direction * np.trapz(y, x)
+    area = direction * trapezoid(y, x)
     if isinstance(area, np.memmap):
-        # Reductions such as .sum used internally in np.trapz do not return a
+        # Reductions such as .sum used internally in trapezoid do not return a
         # scalar by default for numpy.memmap instances contrary to
         # regular numpy.ndarray instances.
         area = area.dtype.type(area)
diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py
index 2dceff9b22126..4d30538023abd 100644
--- a/sklearn/model_selection/_split.py
+++ b/sklearn/model_selection/_split.py
@@ -1973,8 +1973,8 @@ def _iter_indices(self, X, y, groups):
             # these are the indices of classes in the partition
             # invert them into data indices
 
-            train = np.flatnonzero(np.in1d(group_indices, group_train))
-            test = np.flatnonzero(np.in1d(group_indices, group_test))
+            train = np.flatnonzero(np.isin(group_indices, group_train))
+            test = np.flatnonzero(np.isin(group_indices, group_test))
 
             yield train, test
 
diff --git a/sklearn/model_selection/tests/test_search.py b/sklearn/model_selection/tests/test_search.py
index 04c3f1f156fab..50b519118a2b3 100644
--- a/sklearn/model_selection/tests/test_search.py
+++ b/sklearn/model_selection/tests/test_search.py
@@ -1418,7 +1418,7 @@ def test_grid_search_correct_score_results():
         expected_keys = ("mean_test_score", "rank_test_score") + tuple(
             "split%d_test_score" % cv_i for cv_i in range(n_splits)
         )
-        assert all(np.in1d(expected_keys, result_keys))
+        assert all(np.isin(expected_keys, result_keys))
 
         cv = StratifiedKFold(n_splits=n_splits)
         n_splits = grid_search.n_splits_
diff --git a/sklearn/model_selection/tests/test_split.py b/sklearn/model_selection/tests/test_split.py
index 151498205dd39..648f11041cfbf 100644
--- a/sklearn/model_selection/tests/test_split.py
+++ b/sklearn/model_selection/tests/test_split.py
@@ -987,8 +987,8 @@ def test_group_shuffle_split():
             # First test: no train group is in the test set and vice versa
             l_train_unique = np.unique(l[train])
             l_test_unique = np.unique(l[test])
-            assert not np.any(np.in1d(l[train], l_test_unique))
-            assert not np.any(np.in1d(l[test], l_train_unique))
+            assert not np.any(np.isin(l[train], l_test_unique))
+            assert not np.any(np.isin(l[test], l_train_unique))
 
             # Second test: train and test add up to all the data
             assert l[train].size + l[test].size == l.size
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 22e65f5062586..9ee664bf8b3a4 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -467,7 +467,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):
         classes = self.classes_
 
         unique_y = np.unique(y)
-        unique_y_in_classes = np.in1d(unique_y, classes)
+        unique_y_in_classes = np.isin(unique_y, classes)
 
         if not np.all(unique_y_in_classes):
             raise ValueError(
diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py
index 3008710d3c3dc..41494f2649a01 100644
--- a/sklearn/preprocessing/_label.py
+++ b/sklearn/preprocessing/_label.py
@@ -553,7 +553,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
         y = column_or_1d(y)
 
         # pick out the known labels from y
-        y_in_classes = np.in1d(y, classes)
+        y_in_classes = np.isin(y, classes)
         y_seen = y[y_in_classes]
         indices = np.searchsorted(sorted_class, y_seen)
         indptr = np.hstack((0, np.cumsum(y_in_classes)))
diff --git a/sklearn/tests/test_isotonic.py b/sklearn/tests/test_isotonic.py
index 15e0d0d99cfb0..93df0221236b8 100644
--- a/sklearn/tests/test_isotonic.py
+++ b/sklearn/tests/test_isotonic.py
@@ -595,7 +595,7 @@ def test_isotonic_thresholds(increasing):
     # the data is already strictly monotonic which is not the case with
     # this random data)
     assert X_thresholds.shape[0] < X.shape[0]
-    assert np.in1d(X_thresholds, X).all()
+    assert np.isin(X_thresholds, X).all()
 
     # Output thresholds lie in the range of the training set:
     assert y_thresholds.max() <= y.max()
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index fb3912b27dbfe..d17435b8aab5b 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -296,7 +296,7 @@ def is_valid(value):
         diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
         if return_mask:
             if diff.size:
-                valid_mask = np.in1d(values, known_values)
+                valid_mask = np.isin(values, known_values).ravel()
             else:
                 valid_mask = np.ones(len(values), dtype=bool)
 
diff --git a/sklearn/utils/class_weight.py b/sklearn/utils/class_weight.py
index 19e7bcb7ba17a..049ab589c1c97 100644
--- a/sklearn/utils/class_weight.py
+++ b/sklearn/utils/class_weight.py
@@ -57,7 +57,7 @@ def compute_class_weight(class_weight, *, classes, y):
         # Find the weight of each class as present in y.
         le = LabelEncoder()
         y_ind = le.fit_transform(y)
-        if not all(np.in1d(classes, le.classes_)):
+        if not all(np.isin(classes, le.classes_)):
             raise ValueError("classes should have valid labels that are in y")
 
         recip_freq = len(y) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64))
@@ -194,7 +194,7 @@ def compute_sample_weight(class_weight, y, *, indices=None):
 
         if classes_missing:
             # Make missing classes' weight zero
-            weight_k[np.in1d(y_full, list(classes_missing))] = 0.0
+            weight_k[np.isin(y_full, list(classes_missing))] = 0.0
 
         expanded_class_weight.append(weight_k)
 

From e522e0171a05f92a7a18c213b411d8605858da20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= <mat646@gmail.com>
Date: Wed, 30 Aug 2023 19:45:43 +0200
Subject: [PATCH 2/3] add try except for trapezoid import

---
 .../metrics/_plot/tests/test_precision_recall_display.py   | 7 ++++++-
 sklearn/metrics/_plot/tests/test_roc_curve_display.py      | 7 ++++++-
 sklearn/metrics/_ranking.py                                | 7 ++++++-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/sklearn/metrics/_plot/tests/test_precision_recall_display.py b/sklearn/metrics/_plot/tests/test_precision_recall_display.py
index 629cf7e9771ef..072a490c5ad3d 100644
--- a/sklearn/metrics/_plot/tests/test_precision_recall_display.py
+++ b/sklearn/metrics/_plot/tests/test_precision_recall_display.py
@@ -2,7 +2,12 @@
 
 import numpy as np
 import pytest
-from scipy.integrate import trapz as trapezoid
+
+try:
+    from scipy.integrate import trapezoid
+except ImportError:
+    # NOTE: remove once 1.6.0 is minimum supported scipy version
+    from scipy.integrate import trapz as trapezoid
 
 from sklearn.compose import make_column_transformer
 from sklearn.datasets import load_breast_cancer, make_classification
diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
index 6ec10f64c6e46..112d0cb4bf3ba 100644
--- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py
+++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
@@ -1,7 +1,12 @@
 import numpy as np
 import pytest
 from numpy.testing import assert_allclose
-from scipy.integrate import trapz as trapezoid
+
+try:
+    from scipy.integrate import trapezoid
+except ImportError:
+    # NOTE: remove once 1.6.0 is minimum supported scipy version
+    from scipy.integrate import trapz as trapezoid
 
 from sklearn.compose import make_column_transformer
 from sklearn.datasets import load_breast_cancer, load_iris
diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index 3835c547d7717..fe93b0c821f78 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -24,10 +24,15 @@
 from numbers import Integral, Real
 
 import numpy as np
-from scipy.integrate import trapz as trapezoid
 from scipy.sparse import csr_matrix, issparse
 from scipy.stats import rankdata
 
+try:
+    from scipy.integrate import trapezoid
+except ImportError:
+    # NOTE: remove once 1.6.0 is minimum supported scipy version
+    from scipy.integrate import trapz as trapezoid
+
 from ..exceptions import UndefinedMetricWarning
 from ..preprocessing import label_binarize
 from ..utils import (

From 1888f6511110602481bbb9a30ea6d44164c62724 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Est=C3=A8ve?= <loic.esteve@ymail.com>
Date: Wed, 6 Sep 2023 15:45:21 +0200
Subject: [PATCH 3/3] Use utils.fixes + remove some unneeded ravel

---
 .../plot_label_propagation_digits_active_learning.py       | 2 +-
 sklearn/feature_extraction/image.py                        | 4 +---
 .../metrics/_plot/tests/test_precision_recall_display.py   | 7 +------
 sklearn/metrics/_plot/tests/test_roc_curve_display.py      | 7 +------
 sklearn/metrics/_ranking.py                                | 7 +------
 sklearn/utils/_encode.py                                   | 2 +-
 sklearn/utils/fixes.py                                     | 7 +++++++
 7 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
index efd953faa88d6..45af1d7891b2e 100644
--- a/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
+++ b/examples/semi_supervised/plot_label_propagation_digits_active_learning.py
@@ -79,7 +79,7 @@
     # select up to 5 digit examples that the classifier is most uncertain about
     uncertainty_index = np.argsort(pred_entropies)[::-1]
     uncertainty_index = uncertainty_index[
-        np.isin(uncertainty_index, unlabeled_indices).ravel()
+        np.isin(uncertainty_index, unlabeled_indices)
     ][:5]
 
     # keep track of indices that we get labels for
diff --git a/sklearn/feature_extraction/image.py b/sklearn/feature_extraction/image.py
index 13d97f5a80e4e..a2a23b9ec4f3d 100644
--- a/sklearn/feature_extraction/image.py
+++ b/sklearn/feature_extraction/image.py
@@ -76,9 +76,7 @@ def _mask_edges_weights(mask, edges, weights=None):
     """Apply a mask to edges (weighted or not)"""
     inds = np.arange(mask.size)
     inds = inds[mask.ravel()]
-    ind_mask = np.logical_and(
-        np.isin(edges[0], inds).ravel(), np.isin(edges[1], inds).ravel()
-    )
+    ind_mask = np.logical_and(np.isin(edges[0], inds), np.isin(edges[1], inds))
     edges = edges[:, ind_mask]
     if weights is not None:
         weights = weights[ind_mask]
diff --git a/sklearn/metrics/_plot/tests/test_precision_recall_display.py b/sklearn/metrics/_plot/tests/test_precision_recall_display.py
index 072a490c5ad3d..0173e5338d722 100644
--- a/sklearn/metrics/_plot/tests/test_precision_recall_display.py
+++ b/sklearn/metrics/_plot/tests/test_precision_recall_display.py
@@ -3,12 +3,6 @@
 import numpy as np
 import pytest
 
-try:
-    from scipy.integrate import trapezoid
-except ImportError:
-    # NOTE: remove once 1.6.0 is minimum supported scipy version
-    from scipy.integrate import trapz as trapezoid
-
 from sklearn.compose import make_column_transformer
 from sklearn.datasets import load_breast_cancer, make_classification
 from sklearn.exceptions import NotFittedError
@@ -22,6 +16,7 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import shuffle
+from sklearn.utils.fixes import trapezoid
 
 # TODO: Remove when https://github.com/numpy/numpy/issues/14397 is resolved
 pytestmark = pytest.mark.filterwarnings(
diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
index 112d0cb4bf3ba..8fd9f96576518 100644
--- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py
+++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py
@@ -2,12 +2,6 @@
 import pytest
 from numpy.testing import assert_allclose
 
-try:
-    from scipy.integrate import trapezoid
-except ImportError:
-    # NOTE: remove once 1.6.0 is minimum supported scipy version
-    from scipy.integrate import trapz as trapezoid
-
 from sklearn.compose import make_column_transformer
 from sklearn.datasets import load_breast_cancer, load_iris
 from sklearn.exceptions import NotFittedError
@@ -17,6 +11,7 @@
 from sklearn.pipeline import make_pipeline
 from sklearn.preprocessing import StandardScaler
 from sklearn.utils import shuffle
+from sklearn.utils.fixes import trapezoid
 
 
 @pytest.fixture(scope="module")
diff --git a/sklearn/metrics/_ranking.py b/sklearn/metrics/_ranking.py
index fe93b0c821f78..a7d4b5ef18d66 100644
--- a/sklearn/metrics/_ranking.py
+++ b/sklearn/metrics/_ranking.py
@@ -27,12 +27,6 @@
 from scipy.sparse import csr_matrix, issparse
 from scipy.stats import rankdata
 
-try:
-    from scipy.integrate import trapezoid
-except ImportError:
-    # NOTE: remove once 1.6.0 is minimum supported scipy version
-    from scipy.integrate import trapz as trapezoid
-
 from ..exceptions import UndefinedMetricWarning
 from ..preprocessing import label_binarize
 from ..utils import (
@@ -44,6 +38,7 @@
 from ..utils._encode import _encode, _unique
 from ..utils._param_validation import Interval, StrOptions, validate_params
 from ..utils.extmath import stable_cumsum
+from ..utils.fixes import trapezoid
 from ..utils.multiclass import type_of_target
 from ..utils.sparsefuncs import count_nonzero
 from ..utils.validation import _check_pos_label_consistency, _check_sample_weight
diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py
index d17435b8aab5b..b3bf1c2a317ec 100644
--- a/sklearn/utils/_encode.py
+++ b/sklearn/utils/_encode.py
@@ -296,7 +296,7 @@ def is_valid(value):
         diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
         if return_mask:
             if diff.size:
-                valid_mask = np.isin(values, known_values).ravel()
+                valid_mask = np.isin(values, known_values)
             else:
                 valid_mask = np.ones(len(values), dtype=bool)
 
diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py
index aeb01f91590a2..e545054bff96c 100644
--- a/sklearn/utils/fixes.py
+++ b/sklearn/utils/fixes.py
@@ -200,3 +200,10 @@ def _contents(data_module):
     from numpy.exceptions import ComplexWarning, VisibleDeprecationWarning
 else:
     from numpy import ComplexWarning, VisibleDeprecationWarning  # type: ignore  # noqa
+
+
+# TODO: Remove when Scipy 1.6 is the minimum supported version
+try:
+    from scipy.integrate import trapezoid  # type: ignore  # noqa
+except ImportError:
+    from scipy.integrate import trapz as trapezoid  # type: ignore  # noqa