8000 TST Extend tests for `scipy.sparse/*array` in `sklearn/linear_model/t… · scikit-learn/scikit-learn@44ef975 · GitHub
[go: up one dir, main page]

Skip to content

Commit 44ef975

Browse files
TST Extend tests for scipy.sparse/*array in sklearn/linear_model/tests/test_sparse_coordinate_descent (#27237)
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com>
1 parent f2b5a2d commit 44ef975

File tree

1 file changed

+46
-32
lines changed

1 file changed

+46
-32
lines changed

sklearn/linear_model/tests/test_sparse_coordinate_descent.py

Lines changed: 46 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
create_memmap_backed_data,
1313
ignore_warnings,
1414
)
15+
from sklearn.utils.fixes import COO_CONTAINERS, CSC_CONTAINERS, LIL_CONTAINERS
1516

1617

1718
def test_sparse_coef():
@@ -23,9 +24,10 @@ def test_sparse_coef():
2324
assert clf.sparse_coef_.toarray().tolist()[0] == clf.coef_
2425

2526

26-
def test_lasso_zero():
27+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
28+
def test_lasso_zero(csc_container):
2729
# Check that the sparse lasso can handle zero data without crashing
28-
X = sp.csc_matrix((3, 1))
30+
X = csc_container((3, 1))
2931
y = [0, 0, 0]
3032
T = np.array([[1], [2], [3]])
3133
clf = Lasso().fit(X, y)
@@ -36,11 +38,12 @@ def test_lasso_zero():
3638

3739

3840
@pytest.mark.parametrize("with_sample_weight", [True, False])
39-
def test_enet_toy_list_input(with_sample_weight):
41+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
42+
def test_enet_toy_list_input(with_sample_weight, csc_container):
4043
# Test ElasticNet for various values of alpha and l1_ratio with list X
4144

4245
X = np.array([[-1], [0], [1]])
43-
X = sp.csc_matrix(X)
46+
X = csc_container(X)
4447
Y = [-1, 0, 1] # just a straight line
4548
T = np.array([[2], [3], [4]]) # test sample
4649
if with_sample_weight:
@@ -73,18 +76,19 @@ def test_enet_toy_list_input(with_sample_weight):
7376
assert_almost_equal(clf.dual_gap_, 0)
7477

7578

76-
def test_enet_toy_explicit_sparse_input():
79+
@pytest.mark.parametrize("lil_container", LIL_CONTAINERS)
80+
def test_enet_toy_explicit_sparse_input(lil_container):
7781
# Test ElasticNet for various values of alpha and l1_ratio with sparse X
7882
f = ignore_warnings
7983
# training samples
80-
X = sp.lil_matrix((3, 1))
84+
X = lil_container((3, 1))
8185
X[0, 0] = -1
8286
# X[1, 0] = 0
8387
X[2, 0] = 1
8488
Y = [-1, 0, 1] # just a straight line (the identity function)
8589

8690
# test samples
87-
T = sp.lil_matrix((3, 1))
91+
T = lil_container((3, 1))
8892
T[0, 0] = 2
8993
T[1, 0] = 3
9094
T[2, 0] = 4
@@ -113,6 +117,7 @@ def test_enet_toy_explicit_sparse_input():
113117

114118

115119
def make_sparse_data(
120+
sparse_container,
116121
n_samples=100,
117122
n_features=100,
118123
n_informative=10,
@@ -137,17 +142,24 @@ def make_sparse_data(
137142

138143
# generate training ground truth labels
139144
y = np.dot(X, w)
140-
X = sp.csc_matrix(X)
145+
X = sparse_container(X)
141146
if n_targets == 1:
142147
y = np.ravel(y)
143148
return X, y
144149

145150

146-
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
151+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
152+
@pytest.mark.parametrize(
153+
"alpha, fit_intercept, positive",
154+
[(0.1, False, False), (0.1, True, False), (1e-3, False, True), (1e-3, True, True)],
155+
)
156+
def test_sparse_enet_not_as_toy_dataset(csc_container, alpha, fit_intercept, positive):
147157
n_samples, n_features, max_iter = 100, 100, 1000
148158
n_informative = 10
149159

150-
X, y = make_sparse_data(n_samples, n_features, n_informative, positive=positive)
160+
X, y = make_sparse_data(
161+
csc_container, n_samples, n_features, n_informative, positive=positive
162+
)
151163

152164
X_train, X_test = X[n_samples // 2 :], X[: n_samples // 2]
153165
y_train, y_test = y[n_samples // 2 :], y[: n_samples // 2]
@@ -188,18 +200,14 @@ def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
188200
assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
189201

190202

191-
def test_sparse_enet_not_as_toy_dataset():
192-
_test_sparse_enet_not_as_toy_dataset(alpha=0.1, fit_intercept=False, positive=False)
193-
_test_sparse_enet_not_as_toy_dataset(alpha=0.1, fit_intercept=True, positive=False)
194-
_test_sparse_enet_not_as_toy_dataset(alpha=1e-3, fit_intercept=False, positive=True)
195-
_test_sparse_enet_not_as_toy_dataset(alpha=1e-3, fit_intercept=True, positive=True)
196-
197-
198-
def test_sparse_lasso_not_as_toy_dataset():
203+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
204+
def test_sparse_lasso_not_as_toy_dataset(csc_container):
199205
n_samples = 100
200206
max_iter = 1000
201207
n_informative = 10
202-
X, y = make_sparse_data(n_samples=n_samples, n_informative=n_informative)
208+
X, y = make_sparse_data(
209+
csc_container, n_samples=n_samples, n_informative=n_informative
210+
)
203211

204212
X_train, X_test = X[n_samples // 2 :], X[: n_samples // 2]
205213
y_train, y_test = y[n_samples // 2 :], y[: n_samples // 2]
@@ -219,9 +227,10 @@ def test_sparse_lasso_not_as_toy_dataset():
219227
assert np.sum(s_clf.coef_ != 0.0) == n_informative
220228

221229

222-
def test_enet_multitarget():
230+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
231+
def test_enet_multitarget(csc_container):
223232
n_targets = 3
224-
X, y = make_sparse_data(n_targets=n_targets)
233+
X, y = make_sparse_data(csc_container, n_targets=n_targets)
225234

226235
estimator = ElasticNet(alpha=0.01, precompute=False)
227236
# XXX: There is a bug when precompute is not False!
@@ -239,8 +248,9 @@ def test_enet_multitarget():
239248
assert_array_almost_equal(dual_gap[k], estimator.dual_gap_)
240249

241250

242-
def test_path_parameters():
243-
X, y = make_sparse_data()
251+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
252+
def test_path_parameters(csc_container):
253+
X, y = make_sparse_data(csc_container)
244254
max_iter = 50
245255
n_alphas = 10
246256
clf = ElasticNetCV(
@@ -263,8 +273,9 @@ def test_path_parameters():
263273
@pytest.mark.parametrize("fit_intercept", [False, True])
264274
@pytest.mark.parametrize("n_samples, n_features", [(24, 6), (6, 24)])
265275
@pytest.mark.parametrize("with_sample_weight", [True, False])
276+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
266277
def test_sparse_dense_equality(
267-
Model, fit_intercept, n_samples, n_features, with_sample_weight
278+
Model, fit_intercept, n_samples, n_features, with_sample_weight, csc_container
268279
):
269280
X, y = make_regression(
270281
n_samples=n_samples,
@@ -279,7 +290,7 @@ def test_sparse_dense_equality(
279290
sw = np.abs(np.random.RandomState(42).normal(scale=10, size=y.shape))
280291
else:
281292
sw = None
282-
Xs = sp.csc_matrix(X)
293+
Xs = csc_container(X)
283294
params = {"fit_intercept": fit_intercept}
284295
reg_dense = Model(**params).fit(X, y, sample_weight=sw)
285296
reg_sparse = Model(**params).fit(Xs, y, sample_weight=sw)
@@ -292,8 +303,9 @@ def test_sparse_dense_equality(
292303
assert_allclose(reg_sparse.coef_, reg_dense.coef_)
293304

294305

295-
def test_same_output_sparse_dense_lasso_and_enet_cv():
296-
X, y = make_sparse_data(n_samples=40, n_features=10)
306+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
307+
def test_same_output_sparse_dense_lasso_and_enet_cv(csc_container):
308+
X, y = make_sparse_data(csc_container, n_samples=40, n_features=10)
297309
clfs = ElasticNetCV(max_iter=100)
298310
clfs.fit(X, y)
299311
clfd = ElasticNetCV(max_iter=100)
@@ -313,7 +325,8 @@ def test_same_output_sparse_dense_lasso_and_enet_cv():
313325
assert_array_almost_equal(clfs.alphas_, clfd.alphas_)
314326

315327

316-
def test_same_multiple_output_sparse_dense():
328+
@pytest.mark.parametrize("coo_container", COO_CONTAINERS)
329+
def test_same_multiple_output_sparse_dense(coo_container):
317330
l = ElasticNet()
318331
X = [
319332
[0, 1, 2, 3, 4],
@@ -332,20 +345,21 @@ def test_same_multiple_output_sparse_dense():
332345
predict_dense = l.predict(sample)
333346

334347
l_sp = ElasticNet()
335-
X_sp = sp.coo_matrix(X)
348+
X_sp = coo_container(X)
336349
l_sp.fit(X_sp, y)
337-
sample_sparse = sp.coo_matrix(sample)
350+
sample_sparse = coo_container(sample)
338351
predict_sparse = l_sp.predict(sample_sparse)
339352

340353
assert_array_almost_equal(predict_sparse, predict_dense)
341354

342355

343-
def test_sparse_enet_coordinate_descent():
356+
@pytest.mark.parametrize("csc_container", CSC_CONTAINERS)
357+
def test_sparse_enet_coordinate_descent(csc_container):
344358
"""Test that a warning is issued if model does not converge"""
345359
clf = Lasso(max_iter=2)
346360
n_samples = 5
347361
n_features = 2
348-
X = sp.csc_matrix((n_samples, n_features)) * 1e50
362+
X = csc_container((n_samples, n_features)) * 1e50
349363
y = np.ones(n_samples)
350364
warning_message = (
351365
"Objective did not converge. You might want "

0 commit comments

Comments
 (0)
0