10000 FIX Non-fit methods no long raises UserWarning for valid dataframes by thomasjpfan · Pull Request #21199 · scikit-learn/scikit-learn · GitHub
[go: up one dir, main page]

Skip to content

FIX Non-fit methods no long raises UserWarning for valid dataframes #21199

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion doc/whats_new/v1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,23 @@
.. _changes_1_0_1:

Version 1.0.1
=============

**In Development**

Changelog
---------

Fixed models
------------

- |Fix| Non-fit methods in the following classes do not raise a UserWarning
when fitted on DataFrames with valid feature names:
:class:`covariance.EllipticEnvelope`, :class:`ensemble.IsolationForest`,
:class:`ensemble.AdaBoostClassifier`, :class:`neighbors.KNeighborsClassifier`,
:class:`neighbors.KNeighborsRegressor`,
:class:`neighbors.RadiusNeighborsClassifier`,
:class:`neighbors.RadiusNeighborsRegressor`. :pr:`21199` by `Thomas Fan`_.

:mod:`sklearn.calibration`
..........................

Expand All @@ -25,6 +35,17 @@ Changelog
the Bayesian priors.
:pr:`21179` by :user:`Guillaume Lemaitre <glemaitre>`.

:mod:`sklearn.neighbors`
........................

- |Fix| :class:`neighbors.KNeighborsClassifier`,
:class:`neighbors.KNeighborsRegressor`,
:class:`neighbors.RadiusNeighborsClassifier`,
:class:`neighbors.RadiusNeighborsRegressor` with `metric="precomputed"` raises
an error for `bsr` and `dok` sparse matrices in methods: `fit`, `kneighbors`
and `radius_neighbors`, due to handling of explicit zeros in `bsr` and `dok`
:term:`sparse graph` formats. :pr:`21199` by `Thomas Fan`_.

.. _changes_1_0:

Version 1.0.0
Expand Down
1 change: 0 additions & 1 deletion sklearn/covariance/_elliptic_envelope.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,6 @@ def score_samples(self, X):
Opposite of the Mahalanobis distances.
"""
check_is_fitted(self)
X = self._validate_data(X, reset=False)
return -self.mahalanobis(X)

def predict(self, X):
Expand Down
6 changes: 3 additions & 3 deletions sklearn/ens 8000 emble/_iforest.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,9 +337,9 @@ def predict(self, X):
be considered as an inlier according to the fitted model.
"""
check_is_fitted(self)
X = self._validate_data(X, accept_sparse="csr", reset=False)
is_inlier = np.ones(X.shape[0], dtype=int)
is_inlier[self.decision_function(X) < 0] = -1
decision_func = self.decision_function(X)
is_inlier = np.ones_like(decision_func, dtype=int)
is_inlier[decision_func < 0] = -1
return is_inlier

def decision_function(self, X):
Expand Down
6 changes: 0 additions & 6 deletions sklearn/ensemble/_weight_boosting.py
Original file line number Diff line number Diff line change
Expand Up @@ -676,8 +676,6 @@ def predict(self, X):
y : ndarray of shape (n_samples,)
The predicted classes.
"""
X = self._check_X(X)

pred = self.decision_function(X)

if self.n_classes_ == 2:
Expand Down Expand Up @@ -852,8 +850,6 @@ def predict_proba(self, X):
outputs is the same of that of the :term:`classes_` attribute.
"""
check_is_fitted(self)
X = self._check_X(X)

n_classes = self.n_classes_

if n_classes == 1:
Expand Down Expand Up @@ -886,7 +882,6 @@ def staged_predict_proba(self, X):
The class probabilities of the input samples. The order of
outputs is the same of that of the :term:`classes_` attribute.
"""
X = self._check_X(X)

n_classes = self.n_classes_

Expand All @@ -912,7 +907,6 @@ def predict_log_proba(self, X):
The class probabilities of the input samples. The order of
outputs is the same of that of the :term:`classes_` attribute.
"""
X = self._check_X(X)
return np.log(self.predict_proba(X))


Expand Down
7 changes: 1 addition & 6 deletions sklearn/neighbors/_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,6 @@ def predict(self, X):
y : ndarray of shape (n_queries,) or (n_queries, n_outputs)
Class labels for each data sample.
"""
X = self._validate_data(X, accept_sparse="csr", reset=False)

neigh_dist, neigh_ind = self.kneighbors(X)
classes_ = self.classes_
_y = self._y
Expand Down Expand Up @@ -255,8 +253,6 @@ def predict_proba(self, X):
The class probabilities of the input samples. Classes are ordered
by lexicographic order.
"""
X = self._validate_data(X, accept_sparse="csr", reset=False)

neigh_dist, neigh_ind = self.kneighbors(X)

classes_ = self.classes_
Expand All @@ -271,7 +267,7 @@ def predict_proba(self, X):
if weights is None:
weights = np.ones_like(neigh_ind)

all_rows = np.arange(X.shape[0])
all_rows = np.arange(n_queries)
probabilities = []
for k, classes_k in enumerate(classes_):
pred_labels = _y[:, k][neigh_ind]
Expand Down Expand Up @@ -614,7 +610,6 @@ def predict_proba(self, X):
by lexicographic order.
"""

X = self._validate_data(X, accept_sparse="csr", reset=False)
n_queries = _num_samples(X)

neigh_dist, neigh_ind = self.radius_neighbors(X)
Expand Down
4 changes: 0 additions & 4 deletions sklearn/neighbors/_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,8 +226,6 @@ def predict(self, X):
y : ndarray of shape (n_queries,) or (n_queries, n_outputs), dtype=int
Target values.
"""
X = self._validate_data(X, accept_sparse="csr", reset=False)

neigh_dist, neigh_ind = self.kneighbors(X)

weights = _get_weights(neigh_dist, self.weights)
Expand Down Expand Up @@ -436,8 +434,6 @@ def predict(self, X):
dtype=double
Target values.
"""
X = self._validate_data(X, accept_sparse="csr", reset=False)

neigh_dist, neigh_ind = self.radius_neighbors(X)

weights = _get_weights(neigh_dist, self.weights)
Expand Down
7 changes: 6 additions & 1 deletion sklearn/neighbors/tests/test_neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1088,7 +1088,12 @@ def test_kneighbors_regressor_sparse(
assert np.mean(knn.predict(X2).round() == y) > 0.95

X2_pre = sparsev(pairwise_distances(X, metric="euclidean"))
assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95
if sparsev in {dok_matrix, bsr_matrix}:
msg = "not supported due to its handling of explicit zeros"
with pytest.raises(TypeError, match=msg):
knn_pre.predict(X2_pre)
else:
assert np.mean(knn_pre.predict(X2_pre).round() == y) > 0.95


def test_neighbors_iris():
Expand Down
9 changes: 8 additions & 1 deletion sklearn/utils/estimator_checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -3779,7 +3779,14 @@ def check_dataframe_column_names_consistency(name, estimator_orig):
check_methods.append((method, callable_method))

for _, method in check_methods:
method(X) # works
with warnings.catch_warnings():
warnings.filterwarnings(
"error",
message="X does not have valid feature names",
category=UserWarning,
module="sklearn",
)
method(X) # works without UserWarning for valid features

invalid_names = [
(names[::-1], "Feature names must be in the same order as they were in fit."),
Expand Down
0