8000 MAINT: Remove np.in1d and np.trapz usages · scikit-learn/scikit-learn@451dbea · GitHub
[go: up one dir, main page]

Skip to content
Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Appearance settings

Commit 451dbea

Browse files
committed
MAINT: Remove np.in1d and np.trapz usages
1 parent bb58543 commit 451dbea

File tree

14 files changed

+25
-20
lines changed

14 files changed

+25
-20
lines changed

examples/semi_supervised/plot_label_propagation_digits_active_learning.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@
7979
# select up to 5 digit examples that the classifier is most uncertain about
8080
uncertainty_index = np.argsort(pred_entropies)[::-1]
8181
uncertainty_index = uncertainty_index[
82-
np.in1d(uncertainty_index, unlabeled_indices)
82+
np.isin(uncertainty_index, unlabeled_indices).ravel()
8383
][:5]
8484

8585
# keep track of indices that we get labels for

sklearn/datasets/_twenty_newsgroups.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -319,7 +319,7 @@ def fetch_20newsgroups(
319319
# Sort the categories to have the ordering of the labels
320320
labels.sort()
321321
labels, categories = zip(*labels)
322-
mask = np.in1d(data.target, labels)
322+
mask = np.isin(data.target, labels)
323323
data.filenames = data.filenames[mask]
324324
data.target = data.target[mask]
325325
# searchsorted to have continuous labels

sklearn/feature_extraction/image.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,9 @@ def _mask_edges_weights(mask, edges, weights=None):
7676
"""Apply a mask to edges (weighted or not)"""
7777
inds = np.arange(mask.size)
7878
inds = inds[mask.ravel()]
79-
ind_mask = np.logical_and(np.in1d(edges[0], inds), np.in1d(edges[1], inds))
79+
ind_mask = np.logical_and(
80+
np.isin(edges[0], inds).ravel(), np.isin(edges[1], inds).ravel()
81+
)
8082
edges = edges[:, ind_mask]
8183
if weights is not None:
8284
weights = weights[ind_mask]

sklearn/metrics/_plot/tests/test_precision_recall_display.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import numpy as np
44
import pytest
5+
from scipy.integrate import trapz as trapezoid
56

67
from sklearn.compose import make_column_transformer
78
from sklearn.datasets import load_breast_cancer, make_classification
@@ -286,7 +287,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
286287
# we should obtain the statistics of the "cancer" class
287288
avg_prec_limit = 0.65
288289
assert display.average_precision < avg_prec_limit
289-
assert -np.trapz(display.precision, display.recall) < avg_prec_limit
290+
assert -trapezoid(display.precision, display.recall) < avg_prec_limit
290291

291292
# otherwise we should obtain the statistics of the "not cancer" class
292293
if constructor_name == "from_estimator":
@@ -305,7 +306,7 @@ def test_plot_precision_recall_pos_label(pyplot, constructor_name, response_meth
305306
)
306307
avg_prec_limit = 0.95
307308
assert display.average_precision > avg_prec_limit
308-
assert -np.trapz(display.precision, display.recall) > avg_prec_limit
309+
assert -trapezoid(display.precision, display.recall) > avg_prec_limit
309310

310311

311312
@pytest.mark.parametrize("constructor_name", ["from_estimator", "from_predictions"])

sklearn/metrics/_plot/tests/test_roc_curve_display.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import numpy as np
22
import pytest
33
from numpy.testing import assert_allclose
4+
from scipy.integrate import trapz as trapezoid
45

56
from sklearn.compose import make_column_transformer
67
from sklearn.datasets import load_breast_cancer, load_iris
@@ -290,7 +291,7 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
290291
roc_auc_limit = 0.95679
291292

292293
assert display.roc_auc == pytest.approx(roc_auc_limit)
293-
assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
294+
assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
294295

295296
if constructor_name == "from_estimator":
296297
display = RocCurveDisplay.from_estimator(
@@ -308,4 +309,4 @@ def test_plot_roc_curve_pos_label(pyplot, response_method, constructor_name):
308309
)
309310

310311
assert display.roc_auc == pytest.approx(roc_auc_limit)
311-
assert np.trapz(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)
312+
assert trapezoid(display.tpr, display.fpr) == pytest.approx(roc_auc_limit)

sklearn/metrics/_ranking.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from numbers import Integral, Real
2525

2626
import numpy as np
27+
from scipy.integrate import trapz as trapezoid
2728
from scipy.sparse import csr_matrix, issparse
2829
from scipy.stats import rankdata
2930

@@ -104,9 +105,9 @@ def auc(x, y):
104105
else:
105106
raise ValueError("x is neither increasing nor decreasing : {}.".format(x))
106107

107-
area = direction * np.trapz(y, x)
108+
area = direction * trapezoid(y, x)
108109
if isinstance(area, np.memmap):
109-
# Reductions such as .sum used internally in np.trapz do not return a
110+
# Reductions such as .sum used internally in trapezoid do not return a
110111
# scalar by default for numpy.memmap instances contrary to
111112
# regular numpy.ndarray instances.
112113
area = area.dtype.type(area)

sklearn/model_selection/_split.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1973,8 +1973,8 @@ def _iter_indices(self, X, y, groups):
19731973
# these are the indices of classes in the partition
19741974
# invert them into data indices
19751975

1976-
train = np.flatnonzero(np.in1d(group_indices, group_train))
1977-
test = np.flatnonzero(np.in1d(group_indices, group_test))
1976+
train = np.flatnonzero(np.isin(group_indices, group_train))
1977+
test = np.flatnonzero(np.isin(group_indices, group_test))
19781978

19791979
yield train, test
19801980

sklearn/model_selection/tests/test_search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1418,7 +1418,7 @@ def test_grid_search_correct_score_results():
14181418
expected_keys = ("mean_test_score", "rank_test_score") + tuple(
14191419
"split%d_test_score" % cv_i for cv_i in range(n_splits)
14201420
)
1421-
assert all(np.in1d(expected_keys, result_keys))
1421+
assert all(np.isin(expected_keys, result_keys))
14221422

14231423
cv = StratifiedKFold(n_splits=n_splits)
14241424
n_splits = grid_search.n_splits_

sklearn/model_selection/tests/test_split.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -987,8 +987,8 @@ def test_group_shuffle_split():
987987
# First test: no train group is in the test set and vice versa
988988
l_train_unique = np.unique(l[train])
989989
l_test_unique = np.unique(l[test])
990-
assert not np.any(np.in1d(l[train], l_test_unique))
991-
assert not np.any(np.in1d(l[test], l_train_unique))
990+
assert not np.any(np.isin(l[train], l_test_unique))
991+
assert not np.any(np.isin(l[test], l_train_unique))
992992

993993
# Second test: train and test add up to all the data
994994
assert l[train].size + l[test].size == l.size

sklearn/naive_bayes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):
467467
classes = self.classes_
468468

469469
unique_y = np.unique(y)
470-
unique_y_in_classes = np.in1d(unique_y, classes)
470+
unique_y_in_classes = np.isin(unique_y, classes)
471471

472472
if not np.all(unique_y_in_classes):
473473
raise ValueError(

sklearn/preprocessing/_label.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,7 @@ def label_binarize(y, *, classes, neg_label=0, pos_label=1, sparse_output=False)
553553
y = column_or_1d(y)
554554

555555
# pick out the known labels from y
556-
y_in_classes = np.in1d(y, classes)
556+
y_in_classes = np.isin(y, classes)
557557
y_seen = y[y_in_classes]
558558
indices = np.searchsorted(sorted_class, y_seen)
559559
indptr = np.hstack((0, np.cumsum(y_in_classes)))

sklearn/tests/test_isotonic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,7 @@ def test_isotonic_thresholds(increasing):
595595
# the data is already strictly monotonic which is not the case with
596596
# this random data)
597597
assert X_thresholds.shape[0] < X.shape[0]
598-
assert np.in1d(X_thresholds, X).all()
598+
assert np.isin(X_thresholds, X).all()
599599

600600
# Output thresholds lie in the range of the training set:
601601
assert y_thresholds.max() <= y.max()

sklearn/utils/_encode.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ def is_valid(value):
296296
diff = np.setdiff1d(unique_values, known_values, assume_unique=True)
297297
if return_mask:
298298
if diff.size:
299-
valid_mask = np.in1d(values, known_values)
299+
valid_mask = np.isin(values, known_values).ravel()
300300
else:
301301
valid_mask = np.ones(len(values), dtype=bool)
302302

sklearn/utils/class_weight.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def compute_class_weight(class_weight, *, classes, y):
5757
# Find the weight of each class as present in y.
5858
le = LabelEncoder()
5959
y_ind = le.fit_transform(y)
60-
if not all(np.in1d(classes, le.classes_)):
60+
if not all(np.isin(classes, le.classes_)):
6161
raise ValueError("classes should have valid labels that are in y")
6262

6363
recip_freq = len(y) / (len(le.classes_) * np.bincount(y_ind).astype(np.float64))
@@ -194,7 +194,7 @@ def compute_sample_weight(class_weight, y, *, indices=None):
194194

195195
if classes_missing:
196196
# Make missing classes' weight zero
197-
weight_k[np.in1d(y_full, list(classes_missing))] = 0.0
197+
weight_k[np.isin(y_full, list(classes_missing))] = 0.0
198198

199199
expanded_class_weight.append(weight_k)
200200

0 commit comments

Comments
 (0)
0