diff --git a/doc/whats_new/v1.0.rst b/doc/whats_new/v1.0.rst index 4db2b097d2e75..5cac7e63e31fa 100644 --- a/doc/whats_new/v1.0.rst +++ b/doc/whats_new/v1.0.rst @@ -5,13 +5,23 @@ .. _changes_1_0_1: Version 1.0.1 -============= **In Development** Changelog --------- +Fixed models +------------ + +- |Fix| Non-fit methods in the following classes do not raise a UserWarning + when fitted on DataFrames with valid feature names: + :class:`covariance.EllipticEnvelope`, :class:`ensemble.IsolationForest`, + :class:`ensemble.AdaBoostClassifier`, :class:`neighbors.KNeighborsClassifier`, + :class:`neighbors.KNeighborsRegressor`, + :class:`neighbors.RadiusNeighborsClassifier`, + :class:`neighbors.RadiusNeighborsRegressor`. :pr:`21199` by `Thomas Fan`_. + :mod:`sklearn.calibration` .......................... @@ -25,6 +35,17 @@ Changelog the Bayesian priors. :pr:`21179` by :user:`Guillaume Lemaitre `. +:mod:`sklearn.neighbors` +........................ + +- |Fix| :class:`neighbors.KNeighborsClassifier`, + :class:`neighbors.KNeighborsRegressor`, + :class:`neighbors.RadiusNeighborsClassifier`, + :class:`neighbors.RadiusNeighborsRegressor` with `metric="precomputed"` raises + an error for `bsr` and `dok` sparse matrices in methods: `fit`, `kneighbors` + and `radius_neighbors`, due to handling of explicit zeros in `bsr` and `dok` + :term:`sparse graph` formats. :pr:`21199` by `Thomas Fan`_. + .. _changes_1_0: Version 1.0.0 diff --git a/sklearn/covariance/_elliptic_envelope.py b/sklearn/covariance/_elliptic_envelope.py index b63a4a67a9cfd..569e478e38ff4 100644 --- a/sklearn/covariance/_elliptic_envelope.py +++ b/sklearn/covariance/_elliptic_envelope.py @@ -215,7 +215,6 @@ def score_samples(self, X): Opposite of the Mahalanobis distances. """ check_is_fitted(self) - X = self._validate_data(X, reset=False) return -self.mahalanobis(X) def predict(self, X): diff --git a/sklearn/ensemble/_iforest.py b/sklearn/ensemble/_iforest.py index e7562616b0cc5..2dc8d9d058d88 100644 --- a/sklearn/ensemble/_iforest.py +++ b/sklearn/ensemble/_iforest.py @@ -337,9 +337,9 @@ def predict(self, X): be considered as an inlier according to the fitted model. """ check_is_fitted(self) - X = self._validate_data(X, accept_sparse="csr", reset=False) - is_inlier = np.ones(X.shape[0], dtype=int) - is_inlier[self.decision_function(X) < 0] = -1 + decision_func = self.decision_function(X) + is_inlier = np.ones_like(decision_func, dtype=int) + is_inlier[decision_func < 0] = -1 return is_inlier def decision_function(self, X): diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py index a47937880d91c..8fd6cc0228137 100644 --- a/sklearn/ensemble/_weight_boosting.py +++ b/sklearn/ensemble/_weight_boosting.py @@ -676,8 +676,6 @@ def predict(self, X): y : ndarray of shape (n_samples,) The predicted classes. """ - X = self._check_X(X) - pred = self.decision_function(X) if self.n_classes_ == 2: @@ -852,8 +850,6 @@ def predict_proba(self, X): outputs is the same of that of the :term:`classes_` attribute. """ check_is_fitted(self) - X = self._check_X(X) - n_classes = self.n_classes_ if n_classes == 1: @@ -886,7 +882,6 @@ def staged_predict_proba(self, X): The class probabilities of the input samples. The order of outputs is the same of that of the :term:`classes_` attribute. """ - X = self._check_X(X) n_classes = self.n_classes_ @@ -912,7 +907,6 @@ def predict_log_proba(self, X): The class probabilities of the input samples. The order of outputs is the same of that of the :term:`classes_` attribute. """ - X = self._check_X(X) return np.log(self.predict_proba(X)) diff --git a/sklearn/neighbors/_classification.py b/sklearn/neighbors/_classification.py index ced21c7885962..d616eaa2f32a8 100644 --- a/sklearn/neighbors/_classification.py +++ b/sklearn/neighbors/_classification.py @@ -211,8 +211,6 @@ def predict(self, X): y : ndarray of shape (n_queries,) or (n_queries, n_outputs) Class labels for each data sample. """ - X = self._validate_data(X, accept_sparse="csr", reset=False) - neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ _y = self._y @@ -255,8 +253,6 @@ def predict_proba(self, X): The class probabilities of the input samples. Classes are ordered by lexicographic order. """ - X = self._validate_data(X, accept_sparse="csr", reset=False) - neigh_dist, neigh_ind = self.kneighbors(X) classes_ = self.classes_ @@ -271,7 +267,7 @@ def predict_proba(self, X): if weights is None: weights = np.ones_like(neigh_ind) - all_rows = np.arange(X.shape[0]) + all_rows = np.arange(n_queries) probabilities = [] for k, classes_k in enumerate(classes_): pred_labels = _y[:, k][neigh_ind] @@ -614,7 +610,6 @@ def predict_proba(self, X): by lexicographic order. """ - X = self._validate_data(X, accept_sparse="csr", reset=False) n_queries = _num_samples(X) neigh_dist, neigh_ind = self.radius_neighbors(X) diff --git a/sklearn/neighbors/_regression.py b/sklearn/neighbors/_regression.py index 620a7af1d6708..75ef124ad1711 100644 --- a/sklearn/neighbors/_regression.py +++ b/sklearn/neighbors/_regression.py @@ -226,8 +226,6 @@ def predict(self, X): y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int Target values. """ - X = self._validate_data(X, accept_sparse="csr", reset=False) - neigh_dist, neigh_ind = self.kneighbors(X) weights = _get_weights(neigh_dist, self.weights) @@ -436,8 +434,6 @@ def predict(self, X): dtype=double Target values. """ - X = self._validate_data(X, accept_sparse="csr", reset=False) - neigh_dist, neigh_ind = self.radius_neighbors(X) weights = _get_weights(neigh_dist, self.weights) diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index 1e1f3a082786e..a9592ff9f2c51 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -1088,7 +1088,12 @@ def test_kneighbors_regressor_sparse( assert np.mean(knn.predict(X2).round() == y) > 0.95 X2_pre = sparsev(pairwise_distances(X, metric="euclidean")) - assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95 + if sparsev in {dok_matrix, bsr_matrix}: + msg = "not supported due to its handling of explicit zeros" + with pytest.raises(TypeError, match=msg): + knn_pre.predict(X2_pre) + else: + assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95 def test_neighbors_iris(): diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 914b4e6168247..37537bc1b0498 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -3779,7 +3779,14 @@ def check_dataframe_column_names_consistency(name, estimator_orig): check_methods.append((method, callable_method)) for _, method in check_methods: - method(X) # works + with warnings.catch_warnings(): + warnings.filterwarnings( + "error", + message="X does not have valid feature names", + category=UserWarning, + module="sklearn", + ) + method(X) # works without UserWarning for valid features invalid_names = [ (names[::-1], "Feature names must be in the same order as they were in fit."),