scikit-learn · adrinjalali · Sep 20, 2019 · Sep 19, 2019 · Sep 19, 2019 · Sep 19, 2019
diff --git a/sklearn/tests/test_common.py b/sklearn/tests/test_common.py
@@ -32,7 +32,7 @@
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.estimator_checks import (
     _construct_instance,
-    set_checking_parameters,
+    _set_checking_parameters,
     _set_check_estimator_ids,
     check_parameters_default_constructible,
     check_class_weight_balanced_linear_classifier,
@@ -93,7 +93,7 @@ def test_estimators(estimator, check):
     # Common tests for estimator instances
     with ignore_warnings(category=(DeprecationWarning, ConvergenceWarning,
                                    UserWarning, FutureWarning)):
-        set_checking_parameters(estimator)
+        _set_checking_parameters(estimator)
         check(estimator)
 
 

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
@@ -438,7 +438,13 @@ def _boston_subset(n_samples=200):
     return BOSTON
 
 
+@deprecated("set_checking_parameters is deprecated in version "
+            "0.22 and will be removed in version 0.24.")
 def set_checking_parameters(estimator):
+    _set_checking_parameters(estimator)
+
+
+def _set_checking_parameters(estimator):
     # set parameters to speed up some estimators and
     # avoid deprecated behaviour
     params = estimator.get_params()
@@ -519,7 +525,7 @@ def set_checking_parameters(estimator):
         estimator.set_params(handle_unknown='ignore')
 
 
-class NotAnArray:
+class _NotAnArray:
     """An object that is convertible to an array
 
     Parameters
@@ -535,6 +541,13 @@ def __array__(self, dtype=None):
         return self.data
 
 
+@deprecated("NotAnArray is deprecated in version "
+            "0.22 and will be removed in version 0.24.")
+class NotAnArray(_NotAnArray):
+    # TODO: remove in 0.24
+    pass
+
+
 def _is_pairwise(estimator):
     """Returns True if estimator has a _pairwise attribute set to True.
 
@@ -569,7 +582,13 @@ def _is_pairwise_metric(estimator):
     return bool(metric == 'precomputed')
 
 
+@deprecated("pairwise_estimator_convert_X is deprecated in version "
+            "0.22 and will be removed in version 0.24.")
 def pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):
+    return _pairwise_estimator_convert_X(X, estimator, kernel)
+
+
+def _pairwise_estimator_convert_X(X, estimator, kernel=linear_kernel):
 
     if _is_pairwise_metric(estimator):
         return pairwise_distances(X, metric='euclidean')
@@ -616,7 +635,7 @@ def check_estimator_sparse_data(name, estimator_orig):
     rng = np.random.RandomState(0)
     X = rng.rand(40, 10)
     X[X < .8] = 0
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
     X_csr = sparse.csr_matrix(X)
     tags = _safe_tags(estimator_orig)
     if tags['binary_only']:
@@ -681,7 +700,7 @@ def check_sample_weights_pandas_series(name, estimator_orig):
             X = np.array([[1, 1], [1, 2], [1, 3], [1, 4],
                           [2, 1], [2, 2], [2, 3], [2, 4],
                           [3, 1], [3, 2], [3, 3], [3, 4]])
-            X = pd.DataFrame(pairwise_estimator_convert_X(X, estimator_orig))
+            X = pd.DataFrame(_pairwise_estimator_convert_X(X, estimator_orig))
             y = pd.Series([1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 2])
             weights = pd.Series([1] * 12)
             if _safe_tags(estimator, "multioutput_only"):
@@ -705,7 +724,7 @@ def check_sample_weights_list(name, estimator_orig):
         estimator = clone(estimator_orig)
         rnd = np.random.RandomState(0)
         n_samples = 30
-        X = pairwise_estimator_convert_X(rnd.uniform(size=(n_samples, 3)),
+        X = _pairwise_estimator_convert_X(rnd.uniform(size=(n_samples, 3)),
                                          estimator_orig)
         if _safe_tags(estimator, 'binary_only'):
             y = np.arange(n_samples) % 2
@@ -759,7 +778,7 @@ def check_sample_weights_invariance(name, estimator_orig):
 def check_dtype_object(name, estimator_orig):
     # check that estimators treat dtype object as numeric if possible
     rng = np.random.RandomState(0)
-    X = pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
+    X = _pairwise_estimator_convert_X(rng.rand(40, 10), estimator_orig)
     X = X.astype(object)
     tags = _safe_tags(estimator_orig)
     if tags['binary_only']:
@@ -818,7 +837,7 @@ def check_dict_unchanged(name, estimator_orig):
     else:
         X = 2 * rnd.uniform(size=(20, 3))
 
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
 
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
@@ -844,7 +863,13 @@ def check_dict_unchanged(name, estimator_orig):
                 'Estimator changes __dict__ during %s' % method)
 
 
+@deprecated("is_public_parameter is deprecated in version "
+            "0.22 and will be removed in version 0.24.")
 def is_public_parameter(attr):
+    return _is_public_parameter(attr)
+
+
+def _is_public_parameter(attr):
     return not (attr.startswith('_') or attr.endswith('_'))
 
 
@@ -857,7 +882,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
     estimator = clone(estimator_orig)
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     if _safe_tags(estimator, 'binary_only'):
         y[y == 2] = 1
@@ -875,7 +900,7 @@ def check_dont_overwrite_parameters(name, estimator_orig):
     dict_after_fit = estimator.__dict__
 
     public_keys_after_fit = [key for key in dict_after_fit.keys()
-                             if is_public_parameter(key)]
+                             if _is_public_parameter(key)]
 
     attrs_added_by_fit = [key for key in public_keys_after_fit
                           if key not in dict_before_fit.keys()]
@@ -908,7 +933,7 @@ def check_fit2d_predict1d(name, estimator_orig):
     # check by fitting a 2d array and predicting with a 1d array
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     tags = _safe_tags(estimator_orig)
     if tags['binary_only']:
@@ -959,7 +984,7 @@ def check_methods_subset_invariance(name, estimator_orig):
     # on mini batches or the whole set
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(20, 3))
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     if _safe_tags(estimator_orig, 'binary_only'):
         y[y == 2] = 1
@@ -1001,7 +1026,7 @@ def check_fit2d_1sample(name, estimator_orig):
     # the number of samples or the number of classes.
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(1, 10))
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
 
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
@@ -1034,7 +1059,7 @@ def check_fit2d_1feature(name, estimator_orig):
     # informative message
     rnd = np.random.RandomState(0)
     X = 3 * rnd.uniform(size=(10, 1))
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = X[:, 0].astype(np.int)
     estimator = clone(estimator_orig)
     y = _enforce_estimator_tags_y(estimator, y)
@@ -1090,7 +1115,7 @@ def check_transformer_general(name, transformer, readonly_memmap=False):
                       random_state=0, n_features=2, cluster_std=0.1)
     X = StandardScaler().fit_transform(X)
     X -= X.min()
-    X = pairwise_estimator_convert_X(X, transformer)
+    X = _pairwise_estimator_convert_X(X, transformer)
 
     if readonly_memmap:
         X, y = create_memmap_backed_data([X, y])
@@ -1106,9 +1131,9 @@ def check_transformer_data_not_an_array(name, transformer):
     # We need to make sure that we have non negative data, for things
     # like NMF
     X -= X.min() - .1
-    X = pairwise_estimator_convert_X(X, transformer)
-    this_X = NotAnArray(X)
-    this_y = NotAnArray(np.asarray(y))
+    X = _pairwise_estimator_convert_X(X, transformer)
+    this_X = _NotAnArray(X)
+    this_y = _NotAnArray(np.asarray(y))
     _check_transformer(name, transformer, this_X, this_y)
    # try the same with some list
     _check_transformer(name, transformer, X.tolist(), y.tolist())
@@ -1212,7 +1237,7 @@ def check_pipeline_consistency(name, estimator_orig):
     X, y = make_blobs(n_samples=30, centers=[[0, 0, 0], [1, 1, 1]],
                       random_state=0, n_features=2, cluster_std=0.1)
     X -= X.min()
-    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
+  
F438
  X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
     y = _enforce_estimator_tags_y(estimator, y)
     set_random_state(estimator)
@@ -1238,7 +1263,7 @@ def check_fit_score_takes_y(name, estimator_orig):
     rnd = np.random.RandomState(0)
     n_samples = 30
     X = rnd.uniform(size=(n_samples, 3))
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
     if _safe_tags(estimator_orig, 'binary_only'):
         y = np.arange(n_samples) % 2
     else:
@@ -1267,7 +1292,7 @@ def check_fit_score_takes_y(name, estimator_orig):
 def check_estimators_dtypes(name, estimator_orig):
     rnd = np.random.RandomState(0)
     X_train_32 = 3 * rnd.uniform(size=(20, 5)).astype(np.float32)
-    X_train_32 = pairwise_estimator_convert_X(X_train_32, estimator_orig)
+    X_train_32 = _pairwise_estimator_convert_X(X_train_32, estimator_orig)
     X_train_64 = X_train_32.astype(np.float64)
     X_train_int_64 = X_train_32.astype(np.int64)
     X_train_int_32 = X_train_32.astype(np.int32)
@@ -1315,7 +1340,7 @@ def check_estimators_empty_data_messages(name, estimator_orig):
 def check_estimators_nan_inf(name, estimator_orig):
     # Checks that Estimator X's do not contain NaN or inf.
     rnd = np.random.RandomState(0)
-    X_train_finite = pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
+    X_train_finite = _pairwise_estimator_convert_X(rnd.uniform(size=(10, 3)),
                                                   estimator_orig)
     X_train_nan = rnd.uniform(size=(10, 3))
     X_train_nan[0, 0] = np.nan
@@ -1406,7 +1431,7 @@ def check_estimators_pickle(name, estimator_orig):
 
     # some estimators can't do features less than 0
     X -= X.min()
-    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
+    X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
 
     tags = _safe_tags(estimator_orig)
     # include NaN values when the estimator should deal with them
@@ -1604,7 +1629,7 @@ def check_classifiers_train(name, classifier_orig, readonly_memmap=False):
         n_classes = len(classes)
         n_samples, n_features = X.shape
         classifier = clone(classifier_orig)
-        X = pairwise_estimator_convert_X(X, classifier)
+        X = _pairwise_estimator_convert_X(X, classifier)
         y = _enforce_estimator_tags_y(classifier, y)
 
         set_random_state(classifier)
@@ -1807,7 +1832,7 @@ def check_estimators_fit_returns_self(name, estimator_orig,
     X, y = make_blobs(random_state=0, n_samples=21, centers=n_centers)
     # some want non-negative input
     X -= X.min()
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
 
     estimator = clone(estimator_orig)
     y = _enforce_estimator_tags_y(estimator, y)
@@ -1843,7 +1868,7 @@ def check_supervised_y_2d(name, estimator_orig):
         return
     rnd = np.random.RandomState(0)
     n_samples = 30
-    X = pairwise_estimator_convert_X(
+    X = _pairwise_estimator_convert_X(
         rnd.uniform(size=(n_samples, 3)), estimator_orig
     )
     if tags['binary_only']:
@@ -1943,8 +1968,8 @@ def check_classifiers_classes(name, classifier_orig):
     X_binary = X_multiclass[y_multiclass != 2]
     y_binary = y_multiclass[y_multiclass != 2]
 
-    X_multiclass = pairwise_estimator_convert_X(X_multiclass, classifier_orig)
-    X_binary = pairwise_estimator_convert_X(X_binary, classifier_orig)
+    X_multiclass = _pairwise_estimator_convert_X(X_multiclass, classifier_orig)
+    X_binary = _pairwise_estimator_convert_X(X_binary, classifier_orig)
 
     labels_multiclass = ["one", "two", "three"]
     labels_binary = ["one", "two"]
@@ -1970,7 +1995,7 @@ def check_classifiers_classes(name, classifier_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_regressors_int(name, regressor_orig):
     X, _ = _boston_subset()
-    X = pairwise_estimator_convert_X(X[:50], regressor_orig)
+    X = _pairwise_estimator_convert_X(X[:50], regressor_orig)
     rnd = np.random.RandomState(0)
     y = rnd.randint(3, size=X.shape[0])
     y = _enforce_estimator_tags_y(regressor_orig, y)
@@ -1998,7 +2023,7 @@ def check_regressors_int(name, regressor_orig):
 @ignore_warnings(category=(DeprecationWarning, FutureWarning))
 def check_regressors_train(name, regressor_orig, readonly_memmap=False):
     X, y = _boston_subset()
-    X = pairwise_estimator_convert_X(X, regressor_orig)
+    X = _pairwise_estimator_convert_X(X, regressor_orig)
     y = StandardScaler().fit_transform(y.reshape(-1, 1))  # X is already scaled
     y = y.ravel()
     regressor = clone(regressor_orig)
@@ -2047,7 +2072,7 @@ def check_regressors_no_decision_function(name, regressor_orig):
     regressor = clone(regressor_orig)
 
     X = rng.normal(size=(10, 4))
-    X = pairwise_estimator_convert_X(X, regressor_orig)
+    X = _pairwise_estimator_convert_X(X, regressor_orig)
     y = _enforce_estimator_tags_y(regressor, X[:, 0])
 
     if hasattr(regressor, "n_components"):
@@ -2186,7 +2211,7 @@ def check_estimators_overwrite_params(name, estimator_orig):
     X, y = make_blobs(random_state=0, n_samples=21, centers=n_centers)
     # some want non-negative input
     X -= X.min()
-    X = pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
+    X = _pairwise_estimator_convert_X(X, estimator_orig, kernel=rbf_kernel)
     estimator = clone(estimator_orig)
     y = _enforce_estimator_tags_y(estimator, y)
 
@@ -2277,7 +2302,7 @@ def check_sparsify_coefficients(name, estimator_orig):
 def check_classifier_data_not_an_array(name, estimator_orig):
     X = np.array([[3, 0], [0, 1], [0, 2], [1, 1], [1, 2], [2, 1],
                   [0, 3], [1, 0], [2, 0], [4, 4], [2, 3], [3, 2]])
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = [1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2]
     y = _enforce_estimator_tags_y(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
@@ -2286,7 +2311,7 @@ def check_classifier_data_not_an_array(name, estimator_orig):
 @ignore_warnings(category=DeprecationWarning)
 def check_regressor_data_not_an_array(name, estimator_orig):
     X, y = _boston_subset(n_samples=50)
-    X = pairwise_estimator_convert_X(X, estimator_orig)
+    X = _pairwise_estimator_convert_X(X, estimator_orig)
     y = _enforce_estimator_tags_y(estimator_orig, y)
     check_estimators_data_not_an_array(name, estimator_orig, X, y)
 
@@ -2303,8 +2328,8 @@ def check_estimators_data_not_an_array(name, estimator_orig, X, y):
     set_random_state(estimator_1)
     set_random_state(estimator_2)
 
-    y_ = NotAnArray(np.asarray(y))
-    X_ = NotAnArray(np.asarray(X))
+    y_ = _NotAnArray(np.asarray(y))
+    X_ = _NotAnArray(np.asarray(X))
 
     # fit
     estimator_1.fit(X_, y_)
@@ -2638,7 +2663,7 @@ def check_fit_idempotent(name, estimator_orig):
 
     n_samples = 100
     X = rng.normal(loc=100, size=(n_samples, 2))
-    X = pairwise_estimator_convert_X(X, estimator)
+    X = _pairwise_estimator_convert_X(X, estimator)
     if is_regressor(estimator_orig):
         y = rng.normal(size=n_samples)
     else:

diff --git a/sklearn/utils/tests/test_deprecated_utils.py b/sklearn/utils/tests/test_deprecated_utils.py
@@ -3,7 +3,11 @@
 
 from sklearn.dummy import DummyClassifier
 from sklearn.utils.estimator_checks import choose_check_classifiers_labels
+from sklearn.utils.estimator_checks import NotAnArray
 from sklearn.utils.estimator_checks import enforce_estimator_tags_y
+from sklearn.utils.estimator_checks import is_public_parameter
+from sklearn.utils.estimator_checks import pairwise_estimator_convert_X
+from sklearn.utils.estimator_checks import set_checking_parameters
 
 
 # This file tests the utils that are deprecated
@@ -17,3 +21,23 @@ def test_choose_check_classifiers_labels_deprecated():
 def test_enforce_estimator_tags_y():
     with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
         enforce_estimator_tags_y(DummyClassifier(), np.array([0, 1]))
+
+
+def test_notanarray():
+    with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
+        NotAnArray([1, 2])
+
+
+def test_is_public_parameter():
+    with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
+        is_public_parameter('hello')
+
+
+def test_pairwise_estimator_convert_X():
+    with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
+        pairwise_estimator_convert_X([[1, 2]], DummyClassifier())
+
+
+def test_set_checking_parameters():
+    with pytest.warns(DeprecationWarning, match="removed in version 0.24"):
+        set_checking_parameters(DummyClassifier())