11
11
12
12
import numpy as np
13
13
import pytest
14
- from scipy .sparse import csc_matrix , csr_matrix
15
14
16
15
from sklearn .datasets import load_diabetes , load_iris , make_classification
17
16
from sklearn .ensemble import IsolationForest
25
24
assert_array_equal ,
26
25
ignore_warnings ,
27
26
)
27
+ from sklearn .utils .fixes import CSC_CONTAINERS , CSR_CONTAINERS
28
28
29
29
# load iris & diabetes dataset
30
30
iris = load_iris ()
@@ -47,30 +47,30 @@ def test_iforest(global_random_seed):
47
47
).predict (X_test )
48
48
49
49
50
- def test_iforest_sparse (global_random_seed ):
50
+ @pytest .mark .parametrize ("sparse_container" , CSC_CONTAINERS + CSR_CONTAINERS )
51
+ def test_iforest_sparse (global_random_seed , sparse_container ):
51
52
"""Check IForest for various parameter settings on sparse input."""
52
53
rng = check_random_state (global_random_seed )
53
54
X_train , X_test = train_test_split (diabetes .data [:50 ], random_state = rng )
54
55
grid = ParameterGrid ({"max_samples" : [0.5 , 1.0 ], "bootstrap" : [True , False ]})
55
56
56
- for sparse_format in [csc_matrix , csr_matrix ]:
57
- X_train_sparse = sparse_format (X_train )
58
- X_test_sparse = sparse_format (X_test )
57
+ X_train_sparse = sparse_container (X_train )
58
+ X_test_sparse = sparse_container (X_test )
59
59
60
- for params in grid :
61
- # Trained on sparse format
62
- sparse_classifier = IsolationForest (
63
- n_estimators = 10 , random_state = global_random_seed , ** params
64
- ).fit (X_train_sparse )
65
- sparse_results = sparse_classifier .predict (X_test_sparse )
60
+ for params in grid :
61
+ # Trained on sparse format
62
+ sparse_classifier = IsolationForest (
63
+ n_estimators = 10 , random_state = global_random_seed , ** params
64
+ ).fit (X_train_sparse )
65
+ sparse_results = sparse_classifier .predict (X_test_sparse )
66
66
67
- # Trained on dense format
68
- dense_classifier = IsolationForest (
69
- n_estimators = 10 , random_state = global_random_seed , ** params
70
- ).fit (X_train )
71
- dense_results = dense_classifier .predict (X_test )
67
+ # Trained on dense format
68
+ dense_classifier = IsolationForest (
69
+ n_estimators = 10 , random_state = global_random_seed , ** params
70
+ ).fit (X_train )
71
+ dense_results = dense_classifier .predict (X_test )
72
72
73
- assert_array_equal (sparse_results , dense_results )
73
+ assert_array_equal (sparse_results , dense_results )
74
74
75
75
76
76
def test_iforest_error ():
@@ -314,13 +314,14 @@ def test_iforest_with_uniform_data():
314
314
assert all (iforest .predict (np .ones ((100 , 10 ))) == 1 )
315
315
316
316
317
- def test_iforest_with_n_jobs_does_not_segfault ():
317
+ @pytest .mark .parametrize ("csc_container" , CSC_CONTAINERS )
318
+ def test_iforest_with_n_jobs_does_not_segfault (csc_container ):
318
319
"""Check that Isolation Forest does not segfault with n_jobs=2
319
320
320
321
Non-regression test for #23252
321
322
"""
322
323
X , _ = make_classification (n_samples = 85_000 , n_features = 100 , random_state = 0 )
323
- X = csc_matrix (X )
324
+ X = csc_container (X )
324
325
IsolationForest (n_estimators = 10 , max_samples = 256 , n_jobs = 2 ).fit (X )
325
326
326
327
0 commit comments