-
-
Notifications
You must be signed in to change notification settings - Fork 26k
FIX/ENH CheckingClassifier support parameters and sparse matrices #17259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
2c8f8a4
0648950
e34277a
c619366
9fc76fc
16e4a3e
25abfee
31b3eb5
45a4860
8000
94805b8
f9c703a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
import numpy as np | ||
import pytest | ||
from scipy import sparse | ||
|
||
from numpy.testing import assert_array_equal | ||
from numpy.testing import assert_allclose | ||
|
||
from sklearn.datasets import load_iris | ||
from sklearn.utils import check_array | ||
from sklearn.utils import _safe_indexing | ||
from sklearn.utils._testing import _convert_container | ||
|
||
from sklearn.utils._mocking import CheckingClassifier | ||
|
||
|
||
@pytest.fixture | ||
def iris(): | ||
return load_iris(return_X_y=True) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_type", ["list", "array", "sparse", "dataframe"] | ||
) | ||
def test_checking_classifier(iris, input_type): | ||
# Check that the CheckingClassifier outputs what we expect | ||
X, y = iris | ||
X = _convert_container(X, input_type) | ||
clf = CheckingClassifier() | ||
clf.fit(X, y) | ||
|
||
assert_array_equal(clf.classes_, np.unique(y)) | ||
assert len(clf.classes_) == 3 | ||
assert clf.n_features_in_ == 4 | ||
|
||
y_pred = clf.predict(X) | ||
assert_array_equal(y_pred, np.zeros(y_pred.size, dtype=np.int)) | ||
|
||
assert clf.score(X) == pytest.approx(0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we should be able to check for strict equality here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since they are floating-point, why would you make strict equality? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because they're hard-coded ones and zeros. There's not going to be any floating issue there There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I still consider this a bad pattern to have in the code. |
||
clf.set_params(foo_param=10) | ||
assert clf.fit(X, y).score(X) == pytest.approx(1) | ||
<
9E81
span class='blob-code-inner blob-code-marker ' data-code-marker="+"> |
||
y_proba = clf.predict_proba(X) | ||
assert y_proba.shape == (150, 3) | ||
assert_allclose(y_proba[:, 0], 1) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same here and for most other checks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. proba are also floating point |
||
assert_allclose(y_proba[:, 1:], 0) | ||
|
||
y_decision = clf.decision_function(X) | ||
assert y_decision.shape == (150, 3) | ||
assert_allclose(y_decision[:, 0], 1) | ||
assert_allclose(y_decision[:, 1:], 0) | ||
|
||
# check the shape in case of binary classification | ||
first_2_classes = np.logical_or(y == 0, y == 1) | ||
X = _safe_indexing(X, first_2_classes) | ||
y = _safe_indexing(y, first_2_classes) | ||
clf.fit(X, y) | ||
|
||
y_proba = clf.predict_proba(X) | ||
assert y_proba.shape == (100, 2) | ||
assert_allclose(y_proba[:, 0], 1) | ||
assert_allclose(y_proba[:, 1], 0) | ||
|
||
y_decision = clf.decision_function(X) | ||
assert y_decision.shape == (100,) | ||
assert_allclose(y_decision, 0) | ||
|
||
|
||
def test_checking_classifier_with_params(iris): | ||
X, y = iris | ||
X_sparse = sparse.csr_matrix(X) | ||
|
||
def check_X_is_sparse(X): | ||
if not sparse.issparse(X): | ||
raise ValueError("X is not sparse") | ||
return True | ||
|
||
clf = CheckingClassifier(check_X=check_X_is_sparse) | ||
with pytest.raises(ValueError, match="X is not sparse"): | ||
clf.fit(X, y) | ||
clf.fit(X_sparse, y) | ||
|
||
def _check_array(X, **params): | ||
check_array(X, **params) | ||
return True | ||
|
||
clf = CheckingClassifier( | ||
check_X=_check_array, check_X_params={"accept_sparse": False} | ||
) | ||
clf.fit(X, y) | ||
with pytest.raises(TypeError, match="A sparse matrix was passed"): | ||
clf.fit(X_sparse, y) | ||
|
||
|
||
def test_checking_classifier_fit_params(iris): | ||
# check the error raised when the number of samples is not the one expected | ||
X, y = iris | ||
clf = CheckingClassifier(expected_fit_params=["sample_weight"]) | ||
sample_weight = np.ones(len(X) // 2) | ||
|
||
with pytest.raises(AssertionError, match="Fit parameter sample_weight"): | ||
clf.fit(X, y, sample_weight=sample_weight) | ||
|
||
|
||
def test_checking_classifier_missing_fit_params(iris): | ||
X, y = iris | ||
clf = CheckingClassifier(expected_fit_params=["sample_weight"]) | ||
with pytest.raises(AssertionError, match="Expected fit parameter"): | ||
clf.fit(X, y) |
Uh oh!
There was an error while loading. Please reload this page.