8000 TST Fix unreachable code in tests by VarIr · Pull Request #16110 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

TST Fix unreachable code in tests #16110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 20 commits into from
Feb 16, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions sklearn/cluster/tests/test_bicluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,16 +201,13 @@ def test_project_and_cluster():
[0, 1],
[0, 0]])
for mat in (data, csr_matrix(data)):
labels = model._project_and_cluster(data, vectors,
labels = model._project_and_cluster(mat, vectors,
n_clusters=2)
assert_almost_equal(v_measure_score(labels, [0, 0, 1, 1]), 1.0)


def test_perfect_checkerboard():
# XXX test always skipped
raise SkipTest("This test is failing on the buildbot, but cannot"
" reproduce. Temporarily disabling it until it can be"
" reproduced and fixed.")
# XXX Previously failed on build bot (not reproducible)
model = SpectralBiclustering(3, svd_method="arpack", random_state=0)

S, rows, cols = make_checkerboard((30, 30), 3, noise=0,
Expand Down
3 changes: 2 additions & 1 deletion sklearn/compose/tests/test_column_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -523,7 +523,8 @@ def predict(self, X):

X_array = np.array([[0, 1, 2], [2, 4, 6]]).T
ct = ColumnTransformer([('trans', NoTrans(), [0])])
assert_raise_message(TypeError, "All estimators should implement fit",
assert_raise_message(TypeError,
"All estimators should implement fit and transform",
ct.fit, X_array)


Expand Down
16 changes: 6 additions & 10 deletions sklearn/dataset 67F4 s/tests/test_openml.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
assert data_by_id.target.shape == (expected_observations,
len(target_column))
assert data_by_id.target_names == target_column
assert data_by_id.data.dtype == np.float64
assert data_by_id.data.dtype == expected_data_dtype
assert data_by_id.target.dtype == expected_target_dtype
assert len(data_by_id.feature_names) == expected_features
for feature in data_by_id.feature_names:
Expand All @@ -118,11 +118,7 @@ def _fetch_dataset_from_openml(data_id, data_name, data_version,
if compare_default_target:
# check whether the data by id and data by id target are equal
data_by_id_default = fetch_openml(data_id=data_id, cache=False)
if data_by_id.data.dtype == np.float64:
np.testing.assert_allclose(data_by_id.data,
data_by_id_default.data)
else:
assert np.array_equal(data_by_id.data, data_by_id_default.data)
np.testing.assert_allclose(data_by_id.data, data_by_id_default.data)
if data_by_id.target.dtype == np.float64:
np.testing.assert_allclose(data_by_id.target,
data_by_id_default.target)
Expand Down Expand Up @@ -740,7 +736,7 @@ def test_fetch_openml_iris_multitarget(monkeypatch, gzip_response):
_fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
expected_observations, expected_features,
expected_missing,
object, np.float64, expect_sparse=False,
np.float64, np.float64, expect_sparse=False,
compare_default_target=False)


Expand All @@ -759,7 +755,7 @@ def test_fetch_openml_anneal(monkeypatch, gzip_response):
_fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
expected_observations, expected_features,
expected_missing,
object, object, expect_sparse=False,
np.float64, object, expect_sparse=False,
compare_default_target=True)


Expand All @@ -784,7 +780,7 @@ def test_fetch_openml_anneal_multitarget(monkeypatch, gzip_response):
_fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
expected_observations, expected_features,
expected_missing,
object, object, expect_sparse=False,
np.float64, object, expect_sparse=False,
compare_default_target=False)


Expand All @@ -802,7 +798,7 @@ def test_fetch_openml_cpu(monkeypatch, gzip_response):
_fetch_dataset_from_openml(data_id, data_name, data_version, target_column,
expected_observations, expected_features,
expected_missing,
object, np.float64, expect_sparse=False,
np.float64, np.float64, expect_sparse=False,
compare_default_target=True)


Expand Down
4 changes: 2 additions & 2 deletions sklearn/decomposition/tests/test_fastica.py
F438
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@ def test_fastica_convergence_fail():
s2 = np.ceil(np.sin(np.pi * t))
s = np.c_[s1, s2].T
center_and_norm(s)
s1, s2 = s

# Mixing matrix
mixing = rng.randn(6, 2)
Expand All @@ -170,7 +169,8 @@ def test_fastica_convergence_fail():
assert_warns(ConvergenceWarning, ica.fit, m.T)


def test_non_square_fastica(add_noise=False):
@pytest.mark.parametrize('add_noise', [True, False])
def test_non_square_fastica(add_noise):
# Test the FastICA algorithm on very simple data.
rng = np.random.RandomState(0)

Expand Down
7 changes: 0 additions & 7 deletions sklearn/decomposition/tests/test_nmf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import numpy as np
import scipy.sparse as sp
import numbers

from scipy import linalg
from sklearn.decomposition import NMF, non_negative_factorization
Expand All @@ -10,7 +9,6 @@
import pytest

from sklearn.utils._testing import assert_raise_message
from sklearn.utils._testing import assert_warns_message
from sklearn.utils._testing import assert_array_equal
from sklearn.utils._testing import assert_array_almost_equal
from sklearn.utils._testing import assert_almost_equal
Expand Down Expand Up @@ -246,11 +244,6 @@ def _beta_divergence_dense(X, W, H, beta):

Used as a reference for testing nmf._beta_divergence.
"""
if isinstance(X, numbers.Number):
W = np.array([[W]])
H = np.array([[H]])
X = np.array([[X]])

WH = np.dot(W, H)

if beta == 2:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,6 @@
get_equivalent_estimator)


pytest.importorskip("lightgbm")


@pytest.mark.parametrize('seed', range(5))
@pytest.mark.parametrize('min_samples_leaf', (1, 20))
@pytest.mark.parametrize('n_samples, max_leaf_nodes', [
Expand Down Expand Up @@ -46,6 +43,7 @@ def test_same_predictions_regression(seed, min_samples_leaf, n_samples,
# discrepancy between the initial values leads to biggish differences in
# the predictions. These differences are much smaller with more
# iterations.
pytest.importorskip("lightgbm")

rng = np.random.RandomState(seed=seed)
n_samples = n_samples
Expand Down Expand Up @@ -98,6 +96,7 @@ def test_same_predictions_regression(seed, min_samples_leaf, n_samples,
def test_same_predictions_classification(seed, min_samples_leaf, n_samples,
max_leaf_nodes):
# Same as test_same_predictions_regression but for classification
pytest.importorskip("lightgbm")

rng = np.random.RandomState(seed=seed)
n_samples = n_samples
Expand Down Expand Up @@ -158,6 +157,7 @@ def test_same_predictions_classification(seed, min_samples_leaf, n_samples,
def test_same_predictions_multiclass_classification(
seed, min_samples_leaf, n_samples, max_leaf_nodes):
# Same as test_same_predictions_regression but for classification
pytest.importorskip("lightgbm")

rng = np.random.RandomState(seed=seed)
n_samples = n_samples
Expand Down
47 changes: 17 additions & 30 deletions sklearn/feature_extraction/tests/test_text.py
F438
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# -*- coding: utf-8 -*-
from collections.abc import Mapping
import re
import warnings

import pytest
from scipy import sparse
Expand Down Expand Up @@ -31,10 +30,11 @@
from numpy.testing import assert_array_equal
from sklearn.utils import IS_PYPY
from sklearn.utils._testing import (assert_almost_equal,
assert_warns_message, assert_raise_message,
SkipTest, assert_no_warnings,
fails_if_pypy, assert_allclose_dense_sparse,
skip_if_32bit)
assert_warns_message, assert_raise_message,
assert_no_warnings,
fails_if_pypy,
assert_allclose_dense_sparse,
skip_if_32bit)
from collections import defaultdict
from functools import partial
import pickle
Expand Down Expand Up @@ -296,18 +296,17 @@ def test_countvectorizer_custom_vocabulary_pipeline():

def test_countvectorizer_custom_vocabulary_repeated_indices():
vocab = {"pizza": 0, "beer": 0}
try:
CountVectorizer(vocabulary=vocab)
except ValueError as e:
assert "vocabulary contains repeated indices" in str(e).lower()
msg = "Vocabulary contains repeated indices"
with pytest.raises(ValueError, match=msg):
vect = CountVectorizer(vocabulary=vocab)
vect.fit(["pasta_siziliana"])


def test_countvectorizer_custom_vocabulary_gap_index():
vocab = {"pizza": 1, "beer": 2}
try:
CountVectorizer(vocabulary=vocab)
except ValueError as e:
assert "doesn't contain index" in str(e).lower()
with pytest.raises(ValueError, match="doesn't contain index"):
vect = CountVectorizer(vocabulary=vocab)
vect.fit(['pasta_verdura'])


def test_countvectorizer_stop_words():
Expand All @@ -326,20 +325,14 @@ def test_countvectorizer_stop_words():


def test_countvectorizer_empty_vocabulary():
try:
with pytest.raises(ValueError, match="empty vocabulary"):
vect = CountVectorizer(vocabulary=[])
vect.fit(["foo"])
assert False, "we shouldn't get here"
except ValueError as e:
assert "empty vocabulary" in str(e).lower()

try:
with pytest.raises(ValueError, match="empty vocabulary"):
v = CountVectorizer(max_df=1.0, stop_words="english")
# fit on stopwords only
v.fit(["to be or not to be", "and me too", "and so do you"])
assert False, "we shouldn't get here"
except ValueError as e:
assert "empty vocabulary" in str(e).lower()


def test_fit_countvectorizer_twice():
Expand Down Expand Up @@ -387,15 +380,9 @@ def test_tfidf_no_smoothing():
[1, 0, 0]]
tr = TfidfTransformer(smooth_idf=False, norm='l2')

with warnings.catch_warnings(record=True) as w:
1. / np.array([0.])
numpy_provides_div0_warning = len(w) == 1

in_warning_message = 'divide by zero'
tfidf = assert_warns_message(RuntimeWarning, in_warning_message,
tr.fit_transform, X).toarray()
if not numpy_provides_div0_warning:
raise SkipTest("Numpy does not provide div 0 warnings.")
assert_warns_message(RuntimeWarning, in_warning_message,
tr.fit_transform, X).toarray()


def test_sublinear_tf():
Expand Down Expand Up @@ -1155,7 +1142,7 @@ def test_vectorizers_invalid_ngram_range(vec):
message = ("Invalid value for ngram_range=%s "
"lower boundary larger than the upper boundary."
% str(invalid_range))
if isinstance(vec, HashingVectorizer):
if isinstance(vec, HashingVectorizer) and IS_PYPY:
pytest.xfail(reason='HashingVectorizer is not supported on PyPy')

assert_raise_message(
Expand Down
12 changes: 4 additions & 8 deletions sklearn/feature_selection/tests/test_rfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ class MockClassifier:
def __init__(self, foo_param=0):
self.foo_param = foo_param

def fit(self, X, Y):
assert len(X) == len(Y)
def fit(self, X, y):
assert len(X) == len(y)
self.coef_ = np.ones(X.shape[1], dtype=np.float64)
return self

Expand All @@ -42,12 +42,8 @@ def predict(self, T):
decision_function = predict
transform = predict

def score(self, X=None, Y=None):
if self.foo_param > 1:
score = 1.
else:
score = 0.
return score
def score(self, X=None, y=None):
return 0.

def get_params(self, deep=True):
return {'foo_param': self.foo_param}
Expand Down
10 changes: 5 additions & 5 deletions sklearn/feature_selection/tests/test_variance_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
[0, 2, 2, 3, 5],
[1, 1, 2, 4, 0]]

data2 = [[-0.13725701]] * 10

def test_zero_variance():
# Test VarianceThreshold with default setting, zero variance.
Expand All @@ -32,17 +33,16 @@ def test_variance_threshold():
assert (len(data), 1) == X.shape


@pytest.mark.skipif(np.var(data2) == 0,
reason=('This test is not valid for this platform, '
'as it relies on numerical instabilities.'))
def test_zero_variance_floating_point_error():
# Test that VarianceThreshold(0.0).fit eliminates features that have
# the same value in every sample, even when floating point errors
# cause np.var not to be 0 for the feature.
# See #13691

data = [[-0.13725701]] * 10
if np.var(data) == 0:
pytest.skip('This test is not valid for this platform, as it relies '
'on numerical instabilities.')
for X in [data, csr_matrix(data), csc_matrix(data), bsr_matrix(data)]:
for X in [data2, csr_matrix(data2), csc_matrix(data2), bsr_matrix(data2)]:
msg = "No feature in X meets the variance threshold 0.00000"
with pytest.raises(ValueError, match=msg):
VarianceThreshold().fit(X)
Expand Down
1 change: 1 addition & 0 deletions sklearn/gaussian_process/tests/test_kernels.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ def test_kernel_clone_after_set_params(kernel):
isotropic_kernels):
length_scale = params['length_scale']
if np.iterable(length_scale):
# XXX unreached code as of v0.22
params['length_scale'] = length_scale[0]
params['length_scale_bounds'] = bounds
else:
Expand Down
3 changes: 1 addition & 2 deletions sklearn/impute/tests/test_impute.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def test_imputation_mean_median():
X[:, j] = np.hstack((v, z, p))

if 0 == test_missing_values:
# XXX unreached code as of v0.22
X_true[:, j] = np.hstack((v,
np.repeat(
true_statistics[j],
Expand Down Expand Up @@ -706,7 +707,6 @@ def test_iterative_imputer_truncated_normal_posterior():
# note that starting from the wrong random seed will make this test fail
# because random sampling doesn't occur at all when the imputation
# is outside of the (min_value, max_value) range
pytest.importorskip("scipy", minversion="0.17.0")
rng = np.random.RandomState(42)

X = rng.normal(size=(5, 5))
Expand Down Expand Up @@ -763,7 +763,6 @@ def test_iterative_imputer_missing_at_transform(strategy):


def test_iterative_imputer_transform_stochasticity():
pytest.importorskip("scipy", minversion="0.17.0")
rng1 = np.random.RandomState(0)
rng2 = np.random.RandomState(1)
n = 100
Expand Down
9 changes: 0 additions & 9 deletions sklearn/impute/tests/test_knn.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,9 @@
from sklearn.metrics.pairwise import nan_euclidean_distances
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.neighbors import KNeighborsRegressor
from sklearn.utils._mask import _get_mask
from sklearn.utils._testing import assert_allclose


def _missing_mean(X, missing_value):
masked_X = np.ma.array(X, mask=_get_mask(X, missing_value))
masked_X_mean = masked_X.mean(axis=0)
output = masked_X_mean.data
output[masked_X_mean.mask] = np.nan
return output


@pytest.mark.parametrize("weights", ["uniform", "distance"])
@pytest.mark.parametrize("n_neighbors", range(1, 6))
def test_knn_imputer_shape(weights, n_neighbors):
Expand Down
2 changes: 1 addition & 1 deletion sklearn/linear_model/tests/test_logistic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1723,7 +1723,7 @@ def fit(X, y, **kw):
if sys.platform == 'darwin' and solver == 'lbfgs':
pytest.xfail('Issue #11924: LogisticRegressionCV(solver="lbfgs", '
'multi_class="multinomial") is nondterministic on '
'MacOS.') # pragma: no cover
'MacOS.')
assert_allclose(est_auto_multi.coef_, est_multi_multi.coef_)
assert_allclose(est_auto_multi.predict_proba(X2),
est_multi_multi.predict_proba(X2))
Expand Down
Loading
0