8000 TST Extend tests for `scipy.sparse.*array` in `sklearn/feature_select… · REDVM/scikit-learn@74fd7bb · GitHub
[go: up one dir, main page]

Skip to content

Commit 74fd7bb

Browse files
TialoglemaitreOmarManzoor
authored andcommitted
TST Extend tests for scipy.sparse.*array in sklearn/feature_selection/tests/test_feature_select.py (scikit-learn#27239)
Co-authored-by: Guillaume Lemaitre <g.lemaitre58@gmail.com> Co-authored-by: Omar Salman <omar.salman@arbisoft.com>
1 parent c3a17a2 commit 74fd7bb

File tree

3 files changed

+26
-12
lines changed

3 files changed

+26
-12
lines changed

doc/whats_new/v1.4.rst

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,13 @@ Changelog
187187
``**score_params`` which are passed to the underlying scorer.
188188
:pr:`26525` by :user:`Omar Salman <OmarManzoor>`.
189189

190+
:mod:`sklearn.metrics`
191+
......................
192+
193+
- |Enhancement| :func:`metrics.f_regression` and :func:`metrics.r_regression` now
194+
support SciPy sparse arrays.
195+
:pr:`27239` by :user:`Yaroslav Korobko <Tialo>`.
196+
190197
:mod:`sklearn.pipeline`
191198
.......................
192199

sklearn/feature_selection/_univariate_selection.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,13 @@ def r_regression(X, y, *, center=True, force_finite=True):
323323
# need not center X
324324
if center:
325325
y = y - np.mean(y)
326-
if issparse(X):
327-
X_means = X.mean(axis=0).getA1()
328-
else:
329-
X_means = X.mean(axis=0)
326+
# TODO: for Scipy <= 1.10, `isspmatrix(X)` returns `True` for sparse arrays.
327+
# Here, we check the output of the `.mean` operation that returns a `np.matrix`
328+
# for sparse matrices while a `np.array` for dense and sparse arrays.
329+
# We can reconsider using `isspmatrix` when the minimum version is
330+
# SciPy >= 1.11
331+
X_means = X.mean(axis=0)
332+
X_means = X_means.getA1() if isinstance(X_means, np.matrix) else X_means
330333
# Compute the scaled standard deviations via moments
331334
X_norms = np.sqrt(row_norms(X.T, squared=True) - n_samples * X_means**2)
332335
else:

sklearn/feature_selection/tests/test_feature_select.py

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
assert_array_equal,
3434
ignore_warnings,
3535
)
36+
from sklearn.utils.fixes import CSR_CONTAINERS
3637

3738
##############################################################################
3839
# Test the score functions
@@ -63,7 +64,8 @@ def test_f_oneway_ints():
6364
assert_array_almost_equal(p, pint, decimal=4)
6465

6566

66-
def test_f_classif():
67+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
68+
def test_f_classif(csr_container):
6769
# Test whether the F test yields meaningful results
6870
# on a simple simulated classification problem
6971
X, y = make_classification(
@@ -81,7 +83,7 @@ def test_f_classif():
8183
)
8284

8385
F, pv = f_classif(X, y)
84-
F_sparse, pv_sparse = f_classif(sparse.csr_matrix(X), y)
86+
F_sparse, pv_sparse = f_classif(csr_container(X), y)
8587
assert (F > 0).all()
8688
assert (pv > 0).all()
8789
assert (pv < 1).all()
@@ -113,7 +115,8 @@ def test_r_regression(center):
113115
assert_array_almost_equal(np_corr_coeffs, corr_coeffs, decimal=3)
114116

115117

116-
def test_f_regression():
118+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
119+
def test_f_regression(csr_container):
117120
# Test whether the F test yields meaningful results
118121
# on a simple simulated regression problem
119122
X, y = make_regression(
@@ -129,13 +132,13 @@ def test_f_regression():
129132

130133
# with centering, compare with sparse
131134
F, pv = f_regression(X, y, center=True)
132-
F_sparse, pv_sparse = f_regression(sparse.csr_matrix(X), y, center=True)
135+
F_sparse, pv_sparse = f_regression(csr_container(X), y, center=True)
133136
assert_allclose(F_sparse, F)
134137
assert_allclose(pv_sparse, pv)
135138

136139
# again without centering, compare with sparse
137140
F, pv = f_regression(X, y, center=False)
138-
F_sparse, pv_sparse = f_regression(sparse.csr_matrix(X), y, center=False)
141+
F_sparse, pv_sparse = f_regression(csr_container(X), y, center=False)
139142
assert_allclose(F_sparse, F)
140143
assert_allclose(pv_sparse, pv)
141144

@@ -356,7 +359,8 @@ def test_select_percentile_classif():
356359
assert_array_equal(support, gtruth)
357360

358361

359-
def test_select_percentile_classif_sparse():
362+
@pytest.mark.parametrize("csr_container", CSR_CONTAINERS)
363+
def test_select_percentile_classif_sparse(csr_container):
360364
# Test whether the relative univariate feature selection
361365
# gets the correct items in a simple classification problem
362366
# with the percentile heuristic
@@ -373,7 +377,7 @@ def test_select_percentile_classif_sparse():
373377
shuffle=False,
374378
random_state=0,
375379
)
376-
X = sparse.csr_matrix(X)
380+
X = csr_container(X)
377381
univariate_filter = SelectPercentile(f_classif, percentile=25)
378382
X_r = univariate_filter.fit(X, y).transform(X)
379383
X_r2 = (
@@ -393,7 +397,7 @@ def test_select_percentile_classif_sparse():
393397
assert X_r2inv.shape == X.shape
394398
assert_array_equal(X_r2inv[:, support_mask].toarray(), X_r.toarray())
395399
# Check other columns are empty
396-
assert X_r2inv.getnnz() == X_r.getnnz()
400+
assert X_r2inv.nnz == X_r.nnz
397401

398402

399403
##############################################################################

0 commit comments

Comments
 (0)
0