8000 [MRG] MNT deprecate some more utils in estimator_checks.py by NicolasHug · Pull Request #15029 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

[MRG] MNT deprecate some more utils in estimator_checks.py #15029

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Sep 20, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions sklearn/tests/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from sklearn.utils.testing import SkipTest
from sklearn.utils.estimator_checks import (
_construct_instance,
set_checking_parameters,
_set_checking_parameters,
_set_check_estimator_ids,
check_parameters_default_constructible,
check_class_weight_balanced_linear_classifier,
Expand Down Expand Up @@ -93,7 +93,7 @@ def test_estimators(estimator, check):
# Common tests for estimator instances
with ignore_warnings(category=(DeprecationWarning, ConvergenceWarning,
UserWarning, FutureWarning)):
set_checking_parameters(estimator)
_set_checking_parameters(estimator)
check(estimator)


Expand Down
95 changes: 60 additions & 35 deletions sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,13 @@ def _boston_subset(n_samples=200):
return BOSTON


@deprecated("set_checking_parameters is deprecated in version "
"0.22 and will be removed in version 0.24.")
def set_checking_parameters(estimator):
_set_checking_parameters(estimator)


def _set_checking_parameters(estimator):
# set parameters to speed up some estimators and
# avoid deprecated behaviour
params = estimator.get_params()
Expand Down Expand Up @@ -519,7 +525,7 @@ def set_checking_parameters(estimator):
estimator.set_params(handle_unknown='ignore')


class NotAnArray:
class _NotAnArray:
"""An object that is convertible to an array

Parameters
Expand All @@ -535,6 +541,13 @@ def __array__(self, dtype=None):
return self.data


@deprecated("NotAnArray is deprecated in version "
"0.22 and will be removed in version 0.24.")
class NotAnArray(_NotAnArray):
# TODO: remove in 0.24
pass


def _is_pairwise(estimator):
"""Returns True if estimator has a _pairwise attribute set to True.

Expand Down Expand Up @@ -569,7 +582,13 @@ def _is_pairwise_metric(estimator):
return bool(metric == 'precomputed')


@deprecated("pairwise_estimator_convert_X is deprecated in version "
"0.22 and will be removed in version 0.24.")
def pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):
return _pairwise_estimator_convert_X(X, estimator, kernel)


def _pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):

if _is_pairwise_metric(estimator):
return pairwise_distances(X, metric='euclidean')
Expand Down Expand Up @@ -616,7 +635,7 @@ def check_estimator_sparse_data(name, estimator_orig):
rng = np.random.RandomState(0)
X = rng.rand(40, 10)
X[X < .8] = 0
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)
X_csr = sparse.csr_matrix(X)
tags = _safe_tags(estimator_orig)
if tags['binary_only']:
Expand Down Expand Up @@ -681,7 +700,7 @@ def check_sample_weights_pandas_series(name, estimator_orig):
X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
[2, 1], [2, 2], [2, 3], [2, 4],
[3, 1], [3, 2], [3, 3], [3, 4]])
X = pd.DataFrame(pairwise_estimator_convert_X(X, estimator_orig))
X = pd.DataFrame(_pairwise_estimator_convert_X(X, estimator_orig))
y = pd.Series([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])
weights = pd.Series([1] * 12)
if _safe_tags(estimator, "multioutput_only"):
Expand All @@ -705,7 +724,7 @@ def check_sample_weights_list(name, estimator_orig):
estimator = clone(estimator_orig)
rnd = np.random.RandomState(0)
n_samples = 30
X = pairwise_estimator_convert_X(rnd.uniform(size=(n_samples, 3)),
X = _pairwise_estimator_convert_X(rnd.uniform(size=(n_samples, 3)),
estimator_orig)
if _safe_tags(estimator, 'binary_only'):
y = np.arange(n_samples) % 2
Expand Down Expand Up @@ -759,7 +778,7 @@ def check_sample_weights_invariance(name, estimator_orig):
def check_dtype_object(name, estimator_orig):
# check that estimators treat dtype object as numeric if possible
rng = np.random.RandomState(0)
X = pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
X = X.astype(object)
tags = _safe_tags(estimator_orig)
if tags['binary_only']:
Expand Down Expand Up @@ -818,7 +837,7 @@ def check_dict_unchanged(name, estimator_orig):
else:
X = 2 * rnd.uniform(size=(20, 3))

X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)

y = X[:, 0].astype(np.int)
estimator = clone(estimator_orig)
Expand All @@ -844,7 +863,13 @@ def check_dict_unchanged(name, estimator_orig):
'Estimator changes __dict__ during %s' % method)


@deprecated("is_public_parameter is deprecated in version "
"0.22 and will be removed in version 0.24.")
def is_public_parameter(attr):
return _is_public_parameter(attr)


def _is_public_parameter(attr):
return not (attr.startswith('_') or attr.endswith('_'))


Expand All @@ -857,7 +882,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
estimator = clone(estimator_orig)
rnd = np.random.RandomState(0)
X = 3 * rnd.uniform(size=(20, 3))
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)
y = X[:, 0].astype(np.int)
if _safe_tags(estimator, 'binary_only'):
y[y == 2] = 1
Expand All @@ -875,7 +900,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
dict_after_fit = estimator.__dict__

public_keys_after_fit = [key for key in dict_after_fit.keys()
if is_public_parameter(key)]
if _is_public_parameter(key)]

attrs_added_by_fit = [key for key in public_keys_after_fit
if key not in dict_before_fit.keys()]
Expand Down Expand Up @@ -908,7 +933,7 @@ def check_fit2d_predict1d(name, estimator_orig):
# check by fitting a 2d array and predicting with a 1d array
rnd = np.random.RandomState(0)
X = 3 * rnd.uniform(size=(20, 3))
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)
y = X[:, 0].astype(np.int)
tags = _safe_tags(estimator_orig)
if tags['binary_only']:
Expand Down Expand Up @@ -959,7 +984,7 @@ def check_methods_subset_invariance(name, estimator_orig):
# on mini batches or the whole set
rnd = np.random.RandomState(0)
X = 3 * rnd.uniform(size=(20, 3))
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)
y = X[:, 0].astype(np.int)
if _safe_tags(estimator_orig, 'binary_only'):
y[y == 2] = 1
Expand Down Expand Up @@ -1001,7 +1026,7 @@ def check_fit2d_1sample(name, estimator_orig):
# the number of samples or the number of classes.
rnd = np.random.RandomState(0)
X = 3 * rnd.uniform(size=(1, 10))
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)

y = X[:, 0].astype(np.int)
estimator = clone(estimator_orig)
Expand Down Expand Up @@ -1034,7 +1059,7 @@ def check_fit2d_1feature(name, estimator_orig):
# informative message
rnd = np.random.RandomState(0)
X = 3 * rnd.uniform(size=(10, 1))
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)
y = X[:, 0].astype(np.int)
estimator = clone(estimator_orig)
y = _enforce_estimator_tags_y(estimator, y)
Expand Down Expand Up @@ -1090,7 +1115,7 @@ def check_transformer_general(name, transformer, readonly_memmap=False):
random_state=0, n_features=2, cluster_std=0.1)
X = StandardScaler().fit_transform(X)
X -= X.min()
X = pairwise_estimator_convert_X(X, transformer)
X = _pairwise_estimator_convert_X(X, transformer)

if readonly_memmap:
X, y = create_memmap_backed_data([X, y])
Expand All @@ -1106,9 +1131,9 @@ def check_transformer_data_not_an_array(name, transformer):
# We need to make sure that we have non negative data, for things
# like NMF
X -= X.min() - .1
X = pairwise_estimator_convert_X(X, transformer)
this_X = NotAnArray(X)
this_y = NotAnArray(np.asarray(y))
X = _pairwise_estimator_convert_X(X, transformer)
this_X = _NotAnArray(X)
this_y = _NotAnArray(np.asarray(y))
_check_transformer(name, transformer, this_X, this_y)
# try the same with some list
_check_transformer(name, transformer, X.tolist(), y.tolist())
Expand Down Expand Up @@ -1212,7 +1237,7 @@ def check_pipeline_consistency(name, estimator_orig):
X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
random_state=0, n_features=2, cluster_std=0.1)
X -= X.min()
X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
F438 X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
estimator = clone(estimator_orig)
y = _enforce_estimator_tags_y(estimator, y)
set_random_state(estimator)
Expand All @@ -1238,7 +1263,7 @@ def check_fit_score_takes_y(name, estimator_orig):
rnd = np.random.RandomState(0)
n_samples = 30
X = rnd.uniform(size=(n_samples, 3))
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)
if _safe_tags(estimator_orig, 'binary_only'):
y = np.arange(n_samples) % 2
else:
Expand Down Expand Up @@ -1267,7 +1292,7 @@ def check_fit_score_takes_y(name, estimator_orig):
def check_estimators_dtypes(name, estimator_orig):
rnd = np.random.RandomState(0)
X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)
X_train_32 = pairwise_estimator_convert_X(X_train_32, estimator_orig)
X_train_32 = _pairwise_estimator_convert_X(X_train_32, estimator_orig)
X_train_64 = X_train_32.astype(np.float64)
X_train_int_64 = X_train_32.astype(np.int64)
X_train_int_32 = X_train_32.astype(np.int32)
Expand Down Expand Up @@ -1315,7 +1340,7 @@ def check_estimators_empty_data_messages(name, estimator_orig):
def check_estimators_nan_inf(name, estimator_orig):
# Checks that Estimator X's do not contain NaN or inf.
rnd = np.random.RandomState(0)
X_train_finite = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
X_train_finite = _pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
estimator_orig)
X_train_nan = rnd.uniform(size=(10, 3))
X_train_nan[0, 0] = np.nan
Expand Down Expand Up @@ -1406,7 +1431,7 @@ def check_estimators_pickle(name, estimator_orig):

# some estimators can't do features less than 0
X -= X.min()
X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)

tags = _safe_tags(estimator_orig)
# include NaN values when the estimator should deal with them
Expand Down Expand Up @@ -1604,7 +1629,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False):
n_classes = len(classes)
n_samples, n_features = X.shape
classifier = clone(classifier_orig)
X = pairwise_estimator_convert_X(X, classifier)
X = _pairwise_estimator_convert_X(X, classifier)
y = _enforce_estimator_tags_y(classifier, y)

set_random_state(classifier)
Expand Down Expand Up @@ -1807,7 +1832,7 @@ def check_estimators_fit_returns_self(name, estimator_orig,
X, y = make_blobs(random_state=0, n_samples=21, centers=n_centers)
# some want non-negative input
X -= X.min()
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)

estimator = clone(estimator_orig)
y = _enforce_estimator_tags_y(estimator, y)
Expand Down Expand Up @@ -1843,7 +1868,7 @@ def check_supervised_y_2d(name, estimator_orig):
return
rnd = np.random.RandomState(0)
n_samples = 30
X = pairwise_estimator_convert_X(
X = _pairwise_estimator_convert_X(
rnd.uniform(size=(n_samples, 3)), estimator_orig
)
if tags['binary_only']:
Expand Down Expand Up @@ -1943,8 +1968,8 @@ def check_classifiers_classes(name, classifier_orig):
X_binary = X_multiclass[y_multiclass != 2]
y_binary = y_multiclass[y_multiclass != 2]

X_multiclass = pairwise_estimator_convert_X(X_multiclass, classifier_orig)
X_binary = pairwise_estimator_convert_X(X_binary, classifier_orig)
X_multiclass = _pairwise_estimator_convert_X(X_multiclass, classifier_orig)
X_binary = _pairwise_estimator_convert_X(X_binary, classifier_orig)

labels_multiclass = ["one", "two", "three"]
labels_binary = ["one", "two"]
Expand All @@ -1970,7 +1995,7 @@ def check_classifiers_classes(name, classifier_orig):
@ignore_warnings(category=(DeprecationWarning, FutureWarning))
def check_regressors_int(name, regressor_orig):
X, _ = _boston_subset()
X = pairwise_estimator_convert_X(X[:50], regressor_orig)
X = _pairwise_estimator_convert_X(X[:50], regressor_orig)
rnd = np.random.RandomState(0)
y = rnd.randint(3, size=X.shape[0])
y = _enforce_estimator_tags_y(regressor_orig, y)
Expand Down Expand Up @@ -1998,7 +2023,7 @@ def check_regressors_int(name, regressor_orig):
@ignore_warnings(category=(DeprecationWarning, FutureWarning))
def check_regressors_train(name, regressor_orig, readonly_memmap=False):
X, y = _boston_subset()
X = pairwise_estimator_convert_X(X, regressor_orig)
X = _pairwise_estimator_convert_X(X, regressor_orig)
y = StandardScaler().fit_transform(y.reshape(-1, 1)) # X is already scaled
y = y.ravel()
regressor = clone(regressor_orig)
Expand Down Expand Up @@ -2047,7 +2072,7 @@ def check_regressors_no_decision_function(name, regressor_orig):
regressor = clone(regressor_orig)

X = rng.normal(size=(10, 4))
X = pairwise_estimator_convert_X(X, regressor_orig)
X = _pairwise_estimator_convert_X(X, regressor_orig)
y = _enforce_estimator_tags_y(regressor, X[:, 0])

if hasattr(regressor, "n_components"):
Expand Down Expand Up @@ -2186,7 +2211,7 @@ def check_estimators_overwrite_params(name, estimator_orig):
X, y = make_blobs(random_state=0, n_samples=21, centers=n_centers)
# some want non-negative input
X -= X.min()
X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
estimator = clone(estimator_orig)
y = _enforce_estimator_tags_y(estimator, y)

Expand Down Expand Up @@ -2277,7 +2302,7 @@ def check_sparsify_coefficients(name, estimator_orig):
def check_classifier_data_not_an_array(name, estimator_orig):
X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1],
[0, 3], [1, 0], [2, 0], [4, 4], [2, 3], [3, 2]])
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)
y = [1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2]
y = _enforce_estimator_tags_y(estimator_orig, y)
check_estimators_data_not_an_array(name, estimator_orig, X, y)
Expand All @@ -2286,7 +2311,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
@ignore_warnings(category=DeprecationWarning)
def check_regressor_data_not_an_array(name, estimator_orig):
X, y = _boston_subset(n_samples=50)
X = pairwise_estimator_convert_X(X, estimator_orig)
X = _pairwise_estimator_convert_X(X, estimator_orig)
y = _enforce_estimator_tags_y(estimator_orig, y)
check_estimators_data_not_an_array(name, estimator_orig, X, y)

Expand All @@ -2303,8 +2328,8 @@ def check_estimators_data_not_an_array(name, estimator_orig, X, y):
set_random_state(estimator_1)
set_random_state(estimator_2)

y_ = NotAnArray(np.asarray(y))
X_ = NotAnArray(np.asarray(X))
y_ = _NotAnArray(np.asarray(y))
X_ = _NotAnArray(np.asarray(X))

# fit
estimator_1.fit(X_, y_)
Expand Down Expand Up @@ -2638,7 +2663,7 @@ def check_fit_idempotent(name, estimator_orig):

n_samples = 100
X = rng.normal(loc=100, size=(n_samples, 2))
X = pairwise_estimator_convert_X(X, estimator)
X = _pairwise_estimator_convert_X(X, estimator)
if is_regressor(estimator_orig):
y = rng.normal(size=n_samples)
else:
Expand Down
24 changes: 24 additions & 0 deletions sklearn/utils/tests/test_deprecated_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@

from sklearn.dummy import DummyClassifier
from sklearn.utils.estimator_checks import choose_check_classifiers_labels
from sklearn.utils.estimator_checks import NotAnArray
from sklearn.utils.estimator_checks import enforce_estimator_tags_y
from sklearn.utils.estimator_checks import is_public_parameter
from sklearn.utils.estimator_checks import pairwise_estimator_convert_X
from sklearn.utils.estimator_checks import set_checking_parameters


# This file tests the utils that are deprecated
Expand All @@ -17,3 +21,23 @@ def test_choose_check_classifiers_labels_deprecated():
def test_enforce_estimator_tags_y():
with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
enforce_estimator_tags_y(DummyClassifier(), np.array([0, 1]))


def test_notanarray():
with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
NotAnArray([1, 2])


def test_is_public_parameter():
with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
is_public_parameter('hello')


def test_pairwise_estimator_convert_X():
with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
pairwise_estimator_convert_X([[1, 2]], DummyClassifier())


def test_set_checking_parameters():
with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
set_checking_parameters(DummyClassifier())
Loading
0