8000 [MRG+2] Pytest parametrize unit tests by rth · Pull Request #11074 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG+2] Pytest parametrize unit tests #11074

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Jun 8, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
070ca35
Migrate to pytest
HolgerPeters Feb 8, 2017
3d95262
Replace nosetests with pytest on travis
HolgerPeters Feb 8, 2017
607f1b9
Makefile to pytest
HolgerPeters Feb 8, 2017
c99041d
Merge branch 'master' into pytest-parametrize-non-common-tests
rth May 4, 2018
86d4908
Fix conflicts and add a few more parametrizations
rth May 4, 2018
85fe86a
More pytest parametrizations
rth May 4, 2018
b51cc6a
Parametrize test_forest.py
rth May 5, 2018
e720097
Migrate test_gradient_boosting.py
rth May 5, 2018
f7d092a
More parametrization
rth May 5, 2018
4140c99
Replace last yields in tests
rth May 5, 2018
c4663f6
More test parametrization
rth May 6, 2018
978a5f9
Merge remote-tracking branch 'upstream/master' into pytest-parametriz…
rth May 7, 2018
639488c
More parametrizations
rth May 7, 2018
75e4292
Pytest parametrize sklearn.ensemble
rth May 7, 2018
a060efa
Fix CI
rth May 7, 2018
f96210a
Fix CI
rth May 7, 2018
a581f55
Ensure sklearn/utils/testing.py doesn't import pytest
rth May 8, 2018
501bae7
Don't use dict(x, **y)
rth May 17, 2018
0bc2ccb
Remove skip decorators for nose from sklearn/utils/testing.py
rth May 17, 2018
fbfb976
Merge branch 'pytest-parametrize-part1' into pytest-parametrize-non-c…
rth May 23, 2018
4f4c543
Fix Py2 compatibility in test_mldata tmpdir fixture
rth May 24, 2018
b4b15d1
Fix Py2 compatibility in test_mldata tmpdir fixture
rth May 24, 2018
848774a
Merge branch 'master' into pytest-parametrize-non-common-tests
rth May 24, 2018
fb4a8cf
Merge branch 'pytest-parametrize-non-common-tests' of github.com:rth/…
rth May 24, 2018
6b17874
Merge branch 'master' into pytest-parametrize-non-common-tests
rth May 25, 2018
332abb3
Fix Python 2 compatibility
rth May 25, 2018
9e07655
PEP8
rth May 25, 2018
2cf2486
Merge branch 'master' into pytest-parametrize-non-common-tests
rth Jun 1, 2018
75f16c5
PEP8
rth Jun 1, 2018
a9f5e81
Merge branch 'master' into pytest-parametrize-non-common-tests
rth Jun 4, 2018
645fcd5
Hanmin's comments
rth Jun 5, 2018
dd89184
More parametrizations in sklearn/metrics/tests/
rth Jun 6, 2018
588fb49
Review comments and use sets in metrics/tests/test_common.py
rth Jun 7, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions sklearn/linear_model/tests/test_coordinate_descent.py
Original file line number Diff line number Diff line change
Expand Up @@ -706,17 +706,17 @@ def test_overrided_gram_matrix():
clf.fit, X, y)


def test_lasso_non_float_y():
@pytest.mark.parametrize('model', [ElasticNet, Lasso])
def test_lasso_non_float_y(model):
X = [[0, 0], [1, 1], [-1, -1]]
y = [0, 1, 2]
y_float = [0.0, 1.0, 2.0]

for model in [ElasticNet, Lasso]:
clf = model(fit_intercept=False)
clf.fit(X, y)
clf_float = model(fit_intercept=False)
clf_float.fit(X, y_float)
assert_array_equal(clf.coef_, clf_float.coef_)
clf = model(fit_intercept=False)
clf.fit(X, y)
clf_float = model(fit_intercept=False)
clf_float.fit(X, y_float)
assert_array_equal(clf.coef_, clf_float.coef_)


def test_enet_float_precision():
Expand Down
22 changes: 13 additions & 9 deletions sklearn/linear_model/tests/test_least_angle.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import numpy as np
from scipy import linalg

import pytest

from sklearn.model_selection import train_test_split
from sklearn.utils.testing import assert_equal
from sklearn.utils.testing import assert_array_almost_equal
Expand Down Expand Up @@ -172,18 +174,20 @@ def test_no_path_all_precomputed():
assert_true(alpha_ == alphas_[-1])


def test_lars_precompute():
@pytest.mark.parametrize(
'classifier',
[linear_model.Lars, linear_model.LarsCV, linear_model.LassoLarsIC])
def test_lars_precompute(classifier):
# Check for different values of precompute
X, y = diabetes.data, diabetes.target
G = np.dot(X.T, X)
for classifier in [linear_model.Lars, linear_model.LarsCV,
linear_model.LassoLarsIC]:
clf = classifier(precompute=G)
output_1 = ignore_warnings(clf.fit)(X, y).coef_
for precompute in [True, False, 'auto', None]:
clf = classifier(precompute=precompute)
output_2 = clf.fit(X, y).coef_
assert_array_almost_equal(output_1, output_2, decimal=8)

clf = classifier(precompute=G)
output_1 = ignore_warnings(clf.fit)(X, y).coef_
for precompute in [True, False, 'auto', None]:
clf = classifier(precompute=precompute)
output_2 = clf.fit(X, y).coef_
assert_array_almost_equal(output_1, output_2, decimal=8)


def test_singular_matrix():
Expand Down
212 changes: 107 additions & 105 deletions sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import numpy as np
import scipy.sparse as sp
from scipy import linalg, optimize, sparse

import pytest

from sklearn.datasets import load_iris, make_classification
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
Expand Down Expand Up @@ -139,63 +142,63 @@ def test_predict_iris():
assert_greater(np.mean(pred == target), .95)


def test_multinomial_validation():
for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
lr = LogisticRegression(C=-1, solver=solver, multi_class='multinomial')
assert_raises(ValueError, lr.fit, [[0, 1], [1, 0]], [0, 1])
@pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
def test_multinomial_validation(solver):
lr = LogisticRegression(C=-1, solver=solver, multi_class='multinomial')
assert_raises(ValueError, lr.fit, [[0, 1], [1, 0]], [0, 1])


def test_check_solver_option():
@pytest.mark.parametrize('LR', [LogisticRegression, LogisticRegressionCV])
def test_check_solver_option(LR):
X, y = iris.data, iris.target
for LR in [LogisticRegression, LogisticRegressionCV]:

msg = ('Logistic Regression supports only liblinear, newton-cg, '
'lbfgs, sag and saga solvers, got wrong_name')
lr = LR(solver="wrong_name")
msg = ('Logistic Regression supports only liblinear, newton-cg, '
'lbfgs, sag and saga solvers, got wrong_name')
lr = LR(solver="wrong_name")
assert_raise_message(ValueError, msg, lr.fit, X, y)

msg = "multi_class should be either multinomial or ovr, got wrong_name"
lr = LR(solver='newton-cg', multi_class="wrong_name")
assert_raise_message(ValueError, msg, lr.fit, X, y)

# only 'liblinear' solver
msg = "Solver liblinear does not support a multinomial backend."
lr = LR(solver='liblinear', multi_class='multinomial')
assert_raise_message(ValueError, msg, lr.fit, X, y)

# all solvers except 'liblinear'
for solver in ['newton-cg', 'lbfgs', 'sag']:
msg = ("Solver %s supports only l2 penalties, got l1 penalty." %
solver)
lr = LR(solver=solver, penalty='l1')
9E88 assert_raise_message(ValueError, msg, lr.fit, X, y)

msg = "multi_class should be either multinomial or ovr, got wrong_name"
lr = LR(solver='newton-cg', multi_class="wrong_name")
for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']:
msg = ("Solver %s supports only dual=False, got dual=True" %
solver)
lr = LR(solver=solver, dual=True)
assert_raise_message(ValueError, msg, lr.fit, X, y)

# only 'liblinear' solver
msg = "Solver liblinear does not support a multinomial backend."
lr = LR(solver='liblinear', multi_class='multinomial')
assert_raise_message(ValueError, msg, lr.fit, X, y)

# all solvers except 'liblinear'
for solver in ['newton-cg', 'lbfgs', 'sag']:
msg = ("Solver %s supports only l2 penalties, got l1 penalty." %
solver)
lr = LR(solver=solver, penalty='l1')
assert_raise_message(ValueError, msg, lr.fit, X, y)
for solver in ['newton-cg', 'lbfgs', 'sag', 'saga']:
msg = ("Solver %s supports only dual=False, got dual=True" %
solver)
lr = LR(solver=solver, dual=True)
assert_raise_message(ValueError, msg, lr.fit, X, y)


def test_multinomial_binary():
@pytest.mark.parametrize('solver', ['lbfgs', 'newton-cg', 'sag', 'saga'])
def test_multinomial_binary(solver):
# Test multinomial LR on a binary problem.
target = (iris.target > 0).astype(np.intp)
target = np.array(["setosa", "not-setosa"])[target]

for solver in ['lbfgs', 'newton-cg', 'sag', 'saga']:
clf = LogisticRegression(solver=solver, multi_class='multinomial',
random_state=42, max_iter=2000)
clf.fit(iris.data, target)
clf = LogisticRegression(solver=solver, multi_class='multinomial',
random_state=42, max_iter=2000)
clf.fit(iris.data, target)

assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
assert_equal(clf.intercept_.shape, (1,))
assert_array_equal(clf.predict(iris.data), target)
assert_equal(clf.coef_.shape, (1, iris.data.shape[1]))
assert_equal(clf.intercept_.shape, (1,))
assert_array_equal(clf.predict(iris.data), target)

mlr = LogisticRegression(solver=solver, multi_class='multinomial',
random_state=42, fit_intercept=False)
mlr.fit(iris.data, target)
pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
axis=1)]
assert_greater(np.mean(pred == target), .9)
mlr = LogisticRegression(solver=solver, multi_class='multinomial',
random_state=42, fit_intercept=False)
mlr.fit(iris.data, target)
pred = clf.classes_[np.argmax(clf.predict_log_proba(iris.data),
axis=1)]
assert_greater(np.mean(pred == target), .9)


def test_multinomial_binary_probabilities():
Expand Down F438 Expand Up @@ -1043,7 +1046,9 @@ def test_max_iter():
assert_equal(lr.n_iter_[0], max_iter)


def test_n_iter():
@pytest.mark.parametrize('solver',
['newton-cg', 'liblinear', 'sag', 'saga', 'lbfgs'])
def test_n_iter(solver):
# Test that self.n_iter_ has the correct format.
X, y = iris.data, iris.target
y_bin = y.copy()
Expand All @@ -1052,76 +1057,73 @@ def test_n_iter():
n_Cs = 4
n_cv_fold = 2

for solver in ['newton-cg', 'liblinear', 'sag', 'saga', 'lbfgs']:
# OvR case
n_classes = 1 if solver == 'liblinear' else np.unique(y).shape[0]
clf = LogisticRegression(tol=1e-2, multi_class='ovr',
solver=solver, C=1.,
random_state=42, max_iter=100)
clf.fit(X, y)
assert_equal(clf.n_iter_.shape, (n_classes,))
# OvR case
n_classes = 1 if solver == 'liblinear' else np.unique(y).shape[0]
clf = LogisticRegression(tol=1e-2, multi_class='ovr',
solver=solver, C=1.,
random_state=42, max_iter=100)
clf.fit(X, y)
assert_equal(clf.n_iter_.shape, (n_classes,))

n_classes = np.unique(y).shape[0]
clf = LogisticRegressionCV(tol=1e-2, multi_class='ovr',
solver=solver, Cs=n_Cs, cv=n_cv_fold,
random_state=42, max_iter=100)
clf.fit(X, y)
assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
clf.fit(X, y_bin)
assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))

# multinomial case
n_classes = 1
if solver in ('liblinear', 'sag', 'saga'):
break

clf = LogisticRegression(tol=1e-2, multi_class='multinomial',
solver=solver, C=1.,
random_state=42, max_iter=100)
clf.fit(X, y)
assert_equal(clf.n_iter_.shape, (n_classes,))
n_classes = np.unique(y).shape[0]
clf = LogisticRegressionCV(tol=1e-2, multi_class='ovr',
solver=solver, Cs=n_Cs, cv=n_cv_fold,
random_state=42, max_iter=100)
clf.fit(X, y)
assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
clf.fit(X, y_bin)
assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))

# multinomial case
n_classes = 1
if solver in ('liblinear', 'sag', 'saga'):
return

clf = LogisticRegression(tol=1e-2, multi_class='multinomial',
solver=solver, C=1.,
random_state=42, max_iter=100)
clf.fit(X, y)
assert_equal(clf.n_iter_.shape, (n_classes,))

clf = LogisticRegressionCV(tol=1e-2, multi_class='multinomial',
solver=solver, Cs=n_Cs, cv=n_cv_fold,
random_state=42, max_iter=100)
clf.fit(X, y)
assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
clf.fit(X, y_bin)
assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))
clf = LogisticRegressionCV(tol=1e-2, multi_class='multinomial',
solver=solver, Cs=n_Cs, cv=n_cv_fold,
random_state=42, max_iter=100)
clf.fit(X, y)
assert_equal(clf.n_iter_.shape, (n_classes, n_cv_fold, n_Cs))
clf.fit(X, y_bin)
assert_equal(clf.n_iter_.shape, (1, n_cv_fold, n_Cs))


def test_warm_start():
@pytest.mark.parametrize('solver', ('newton-cg', 'sag', 'saga', 'lbfgs'))
@pytest.mark.parametrize('warm_start', (True, False))
@pytest.mark.parametrize('fit_intercept', (True, False))
@pytest.mark.parametrize('multi_class', ['ovr', 'multinomial'])
def test_warm_start(solver, warm_start, fit_intercept, multi_class):
# A 1-iteration second fit on same data should give almost same result
# with warm starting, and quite different result without warm starting.
# Warm starting does not work with liblinear solver.
X, y = iris.data, iris.target

solvers = ['newton-cg', 'sag', 'saga', 'lbfgs']

for warm_start in [True, False]:
for fit_intercept in [True, False]:
for solver in solvers:
for multi_class in ['ovr', 'multinomial']:
clf = LogisticRegression(tol=1e-4, multi_class=multi_class,
warm_start=warm_start,
solver=solver,
random_state=42, max_iter=100,
fit_intercept=fit_intercept)
with ignore_warnings(category=ConvergenceWarning):
clf.fit(X, y)
coef_1 = clf.coef_

clf.max_iter = 1
clf.fit(X, y)
cum_diff = np.sum(np.abs(coef_1 - clf.coef_))
msg = ("Warm starting issue with %s solver in %s mode "
"with fit_intercept=%s and warm_start=%s"
% (solver, multi_class, str(fit_intercept),
str(warm_start)))
if warm_start:
assert_greater(2.0, cum_diff, msg)
else:
assert_greater(cum_diff, 2.0, msg)
clf = LogisticRegression(tol=1e-4, multi_class=multi_class,
warm_start=warm_start,
solver=solver,
random_state=42, max_iter=100,
fit_intercept=fit_intercept)
with ignore_warnings(category=ConvergenceWarning):
clf.fit(X, y)
coef_1 = clf.coef_

clf.max_iter = 1
clf.fit(X, y)
cum_diff = np.sum(np.abs(coef_1 - clf.coef_))
msg = ("Warm starting issue with %s solver in %s mode "
"with fit_intercept=%s and warm_start=%s"
% (solver, multi_class, str(fit_intercept),
str(warm_start)))
if warm_start:
assert_greater(2.0, cum_diff, msg)
else:
assert_greater(cum_diff, 2.0, msg)


def test_saga_vs_liblinear():
Expand Down
Loading
0