-
-
Notifications
You must be signed in to change notification settings - Fork 26k
ENH Adds n_feature_in_ checking to cluster #18727
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a501ca1
0fb69d4
f48580a
357f268
8360cfb
fa74c44
4b8f634
0bc1f28
ce54d6c
5596764
86bd36c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,6 @@ | |
import numpy as np | ||
|
||
from ..base import TransformerMixin | ||
from ..utils import check_array | ||
from ..utils.validation import check_is_fitted | ||
from scipy.sparse import issparse | ||
|
||
|
@@ -38,10 +37,7 @@ def transform(self, X): | |
""" | ||
check_is_fitted(self) | ||
|
||
X = check_array(X) | ||
if len(self.labels_) != X.shape[1]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We're assuming that the invariance of len(self.labels_) == X_train.shape[1] is enforced elsewhere, right? I guess this was only a workaround for not having |
||
raise ValueError("X has a different number of features than " | ||
"during fitting.") | ||
X = self._validate_data(X, reset=False) | ||
if self.pooling_func == np.mean and not issparse(X): | ||
size = np.bincount(self.labels_) | ||
n_samples = X.shape[0] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -854,15 +854,9 @@ def _validate_center_shape(self, X, centers): | |
f"match the number of features of the data {X.shape[1]}.") | ||
|
||
def _check_test_data(self, X): | ||
X = check_array(X, accept_sparse='csr', dtype=[np.float64, np.float32], | ||
order='C', accept_large_sparse=False) | ||
n_samples, n_features = X.shape | ||
expected_n_features = self.cluster_centers_.shape[1] | ||
if not n_features == expected_n_features: | ||
raise ValueError( | ||
f"Incorrect number of features. Got {n_features} features, " | ||
f"expected {expected_n_features}.") | ||
|
||
X = self._validate_data(X, accept_sparse='csr', reset=False, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. do we still need the function now? but fine with me. |
||
dtype=[np.float64, np.float32], | ||
order='C', accept_large_sparse=False) | ||
ogrisel marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return X | ||
|
||
def _check_mkl_vcomp(self, X, n_samples): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As a quick benchmark: