From 18fba6c6f7b51ce63bb5b435bf1828eac32ca158 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 1 Aug 2019 13:02:25 -0400 Subject: [PATCH 01/21] make check_is_fitted not take attributes --- benchmarks/bench_plot_nmf.py | 4 +-- doc/developers/contributing.rst | 2 +- sklearn/calibration.py | 4 +-- sklearn/cluster/_feature_agglomeration.py | 4 +-- sklearn/cluster/affinity_propagation_.py | 2 +- sklearn/cluster/birch.py | 4 +-- sklearn/cluster/k_means_.py | 8 +++--- sklearn/cluster/mean_shift_.py | 2 +- sklearn/compose/_column_transformer.py | 4 +-- sklearn/compose/_target.py | 2 +- sklearn/covariance/elliptic_envelope.py | 4 +-- sklearn/cross_decomposition/pls_.py | 6 ++-- sklearn/decomposition/base.py | 2 +- sklearn/decomposition/dict_learning.py | 2 +- sklearn/decomposition/factor_analysis.py | 8 +++--- sklearn/decomposition/fastica_.py | 4 +-- sklearn/decomposition/kernel_pca.py | 2 +- sklearn/decomposition/nmf.py | 4 +-- sklearn/decomposition/online_lda.py | 4 +-- sklearn/decomposition/pca.py | 2 +- sklearn/decomposition/sparse_pca.py | 2 +- sklearn/discriminant_analysis.py | 6 ++-- sklearn/dummy.py | 6 ++-- .../_hist_gradient_boosting/binning.py | 2 +- sklearn/ensemble/bagging.py | 8 +++--- sklearn/ensemble/forest.py | 10 +++---- sklearn/ensemble/gradient_boosting.py | 4 +-- sklearn/ensemble/iforest.py | 4 +-- sklearn/ensemble/voting.py | 10 +++---- sklearn/ensemble/weight_boosting.py | 10 +++---- sklearn/feature_extraction/text.py | 6 ++-- sklearn/feature_selection/rfe.py | 12 ++++---- .../feature_selection/univariate_selection.py | 12 ++++---- .../feature_selection/variance_threshold.py | 2 +- sklearn/gaussian_process/gpc.py | 10 +++---- sklearn/impute/_base.py | 4 +-- sklearn/impute/_iterative.py | 2 +- sklearn/kernel_approximation.py | 6 ++-- sklearn/kernel_ridge.py | 2 +- sklearn/linear_model/base.py | 4 +-- sklearn/linear_model/coordinate_descent.py | 2 +- sklearn/linear_model/logistic.py | 2 +- sklearn/linear_model/ransac.py | 4 +-- sklearn/linear_model/stochastic_gradient.py | 4 +-- sklearn/manifold/locally_linear.py | 2 +- sklearn/mixture/gaussian_mixture.py | 2 +- sklearn/model_selection/_search.py | 2 +- sklearn/multiclass.py | 20 ++++++------- sklearn/multioutput.py | 8 +++--- sklearn/naive_bayes.py | 8 +++--- sklearn/neighbors/base.py | 8 +++--- sklearn/neighbors/nca.py | 2 +- sklearn/neighbors/nearest_centroid.py | 2 +- .../neural_network/multilayer_perceptron.py | 6 ++-- sklearn/neural_network/rbm.py | 6 ++-- sklearn/preprocessing/_discretization.py | 4 +-- sklearn/preprocessing/_encoders.py | 8 +++--- sklearn/preprocessing/data.py | 28 +++++++++---------- sklearn/preprocessing/label.py | 12 ++++---- sklearn/random_projection.py | 2 +- sklearn/semi_supervised/label_propagation.py | 2 +- sklearn/svm/base.py | 4 +-- sklearn/tests/test_metaestimators.py | 2 +- sklearn/tree/tree.py | 12 ++++---- sklearn/utils/tests/test_estimator_checks.py | 2 +- sklearn/utils/tests/test_validation.py | 16 +++++------ sklearn/utils/validation.py | 8 +++--- 67 files changed, 187 insertions(+), 187 deletions(-) diff --git a/benchmarks/bench_plot_nmf.py b/benchmarks/bench_plot_nmf.py index 3ec7cea92cf2d..d8d34d8f952ce 100644 --- a/benchmarks/bench_plot_nmf.py +++ b/benchmarks/bench_plot_nmf.py @@ -213,13 +213,13 @@ def fit(self, X, y=None, **params): return self def transform(self, X): - check_is_fitted(self, 'components_') + check_is_fitted(self) H = self.components_ W, _, self.n_iter_ = self._fit_transform(X, H=H, update_H=False) return W def inverse_transform(self, W): - check_is_fitted(self, 'components_') + check_is_fitted(self) return np.dot(W, self.components_) def fit_transform(self, X, y=None, W=None, H=None): diff --git a/doc/developers/contributing.rst b/doc/developers/contributing.rst index 1ef8f2d03e14c..27d7236bf02d4 100644 --- a/doc/developers/contributing.rst +++ b/doc/developers/contributing.rst @@ -1354,7 +1354,7 @@ the correct interface more easily. ... def predict(self, X): ... ... # Check is fit had been called - ... check_is_fitted(self, ['X_', 'y_']) + ... check_is_fitted(self) ... ... # Input validation ... X = check_array(X) diff --git a/sklearn/calibration.py b/sklearn/calibration.py index 2c30cdabcb415..b88a8b8eb37ef 100644 --- a/sklearn/calibration.py +++ b/sklearn/calibration.py @@ -216,7 +216,7 @@ def predict_proba(self, X): C : array, shape (n_samples, n_classes) The predicted probas. """ - check_is_fitted(self, ["classes_", "calibrated_classifiers_"]) + check_is_fitted(self) X = check_array(X, accept_sparse=['csc', 'csr', 'coo'], force_all_finite=False) # Compute the arithmetic mean of the predictions of the calibrated @@ -244,7 +244,7 @@ def predict(self, X): C : array, shape (n_samples,) The predicted class. """ - check_is_fitted(self, ["classes_", "calibrated_classifiers_"]) + check_is_fitted(self) return self.classes_[np.argmax(self.predict_proba(X), axis=1)] diff --git a/sklearn/cluster/_feature_agglomeration.py b/sklearn/cluster/_feature_agglomeration.py index f20b8db7d535c..3b7767feedb00 100644 --- a/sklearn/cluster/_feature_agglomeration.py +++ b/sklearn/cluster/_feature_agglomeration.py @@ -36,7 +36,7 @@ def transform(self, X): Y : array, shape = [n_samples, n_clusters] or [n_clusters] The pooled values for each feature cluster. """ - check_is_fitted(self, "labels_") + check_is_fitted(self) X = check_array(X) if len(self.labels_) != X.shape[1]: @@ -71,7 +71,7 @@ def inverse_transform(self, Xred): A vector of size n_samples with the values of Xred assigned to each of the cluster of samples. """ - check_is_fitted(self, "labels_") + check_is_fitted(self) unil, inverse = np.unique(self.labels_, return_inverse=True) return Xred[..., inverse] diff --git a/sklearn/cluster/affinity_propagation_.py b/sklearn/cluster/affinity_propagation_.py index 487ade4012133..89c6ce9fe8b34 100644 --- a/sklearn/cluster/affinity_propagation_.py +++ b/sklearn/cluster/affinity_propagation_.py @@ -407,7 +407,7 @@ def predict(self, X): labels : ndarray, shape (n_samples,) Cluster labels. """ - check_is_fitted(self, "cluster_centers_indices_") + check_is_fitted(self) if not hasattr(self, "cluster_centers_"): raise ValueError("Predict method is not supported when " "affinity='precomputed'.") diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py index 27b5038bb67a3..941b833e977f7 100644 --- a/sklearn/cluster/birch.py +++ b/sklearn/cluster/birch.py @@ -534,7 +534,7 @@ def partial_fit(self, X=None, y=None): return self._fit(X) def _check_fit(self, X): - check_is_fitted(self, ['subcluster_centers_', 'partial_fit_'], + check_is_fitted(self, all_or_any=any) if (hasattr(self, 'subcluster_centers_') and @@ -583,7 +583,7 @@ def transform(self, X): X_trans : {array-like, sparse matrix}, shape (n_samples, n_clusters) Transformed data. """ - check_is_fitted(self, 'subcluster_centers_') + check_is_fitted(self) return euclidean_distances(X, self.subcluster_centers_) def _global_clustering(self, X=None): diff --git a/sklearn/cluster/k_means_.py b/sklearn/cluster/k_means_.py index b7fbdf7da3ad1..27bdc934e4c0d 100644 --- a/sklearn/cluster/k_means_.py +++ b/sklearn/cluster/k_means_.py @@ -1033,7 +1033,7 @@ def transform(self, X): X_new : array, shape [n_samples, k] X transformed in the new space. """ - check_is_fitted(self, 'cluster_centers_') + check_is_fitted(self) X = self._check_test_data(X) return self._transform(X) @@ -1063,7 +1063,7 @@ def predict(self, X, sample_weight=None): labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ - check_is_fitted(self, 'cluster_centers_') + check_is_fitted(self) X = self._check_test_data(X) x_squared_norms = row_norms(X, squared=True) @@ -1090,7 +1090,7 @@ def score(self, X, y=None, sample_weight=None): score : float Opposite of the value of X on the K-means objective. """ - check_is_fitted(self, 'cluster_centers_') + check_is_fitted(self) X = self._check_test_data(X) x_squared_norms = row_norms(X, squared=True) @@ -1733,7 +1733,7 @@ def predict(self, X, sample_weight=None): labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ - check_is_fitted(self, 'cluster_centers_') + check_is_fitted(self) X = self._check_test_data(X) return self._labels_inertia_minibatch(X, sample_weight)[0] diff --git a/sklearn/cluster/mean_shift_.py b/sklearn/cluster/mean_shift_.py index 960ac28984721..e588ccd6df1c8 100644 --- a/sklearn/cluster/mean_shift_.py +++ b/sklearn/cluster/mean_shift_.py @@ -435,6 +435,6 @@ def predict(self, X): labels : array, shape [n_samples,] Index of the cluster each sample belongs to. """ - check_is_fitted(self, "cluster_centers_") + check_is_fitted(self) return pairwise_distances_argmin(X, self.cluster_centers_) diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index c0f537776cb6a..1d460b11dc480 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -341,7 +341,7 @@ def get_feature_names(self): feature_names : list of strings Names of the features produced by transform. """ - check_is_fitted(self, 'transformers_') + check_is_fitted(self) feature_names = [] for name, trans, _, _ in self._iter(fitted=True): if trans == 'drop': @@ -516,7 +516,7 @@ def transform(self, X): sparse matrices. """ - check_is_fitted(self, 'transformers_') + check_is_fitted(self) X = _check_X(X) if self._n_features > X.shape[1]: diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py index c1c3f4df4e95f..35b7ed6af962a 100644 --- a/sklearn/compose/_target.py +++ b/sklearn/compose/_target.py @@ -221,7 +221,7 @@ def predict(self, X): Predicted values. """ - check_is_fitted(self, "regressor_") + check_is_fitted(self) pred = self.regressor_.predict(X) if pred.ndim == 1: pred_trans = self.transformer_.inverse_transform( diff --git a/sklearn/covariance/elliptic_envelope.py b/sklearn/covariance/elliptic_envelope.py index 517f9a32dc9af..aa5e01ffa14b0 100644 --- a/sklearn/covariance/elliptic_envelope.py +++ b/sklearn/covariance/elliptic_envelope.py @@ -147,7 +147,7 @@ def decision_function(self, X): compatibility with other outlier detection algorithms. """ - check_is_fitted(self, 'offset_') + check_is_fitted(self) negative_mahal_dist = self.score_samples(X) return negative_mahal_dist - self.offset_ @@ -163,7 +163,7 @@ def score_samples(self, X): negative_mahal_distances : array-like, shape (n_samples, ) Opposite of the Mahalanobis distances. """ - check_is_fitted(self, 'offset_') + check_is_fitted(self) return -self.mahalanobis(X) def predict(self, X): diff --git a/sklearn/cross_decomposition/pls_.py b/sklearn/cross_decomposition/pls_.py index 175a472e6d4fb..94c517992e061 100644 --- a/sklearn/cross_decomposition/pls_.py +++ b/sklearn/cross_decomposition/pls_.py @@ -398,7 +398,7 @@ def transform(self, X, Y=None, copy=True): ------- x_scores if Y is not given, (x_scores, y_scores) otherwise. """ - check_is_fitted(self, 'x_mean_') + check_is_fitted(self) X = check_array(X, copy=copy, dtype=FLOAT_DTYPES) # Normalize X -= self.x_mean_ @@ -433,7 +433,7 @@ def predict(self, X, copy=True): This call requires the estimation of a p x q matrix, which may be an issue in high dimensional space. """ - check_is_fitted(self, 'x_mean_') + check_is_fitted(self) X = check_array(X, copy=copy, dtype=FLOAT_DTYPES) # Normalize X -= self.x_mean_ @@ -872,7 +872,7 @@ def transform(self, X, Y=None): Target vectors, where n_samples is the number of samples and n_targets is the number of response variables. """ - check_is_fitted(self, 'x_mean_') + check_is_fitted(self) X = check_array(X, dtype=np.float64) Xr = (X - self.x_mean_) / self.x_std_ x_scores = np.dot(Xr, self.x_weights_) diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py index 3cbdb29723825..0dad8c6130d68 100644 --- a/sklearn/decomposition/base.py +++ b/sklearn/decomposition/base.py @@ -122,7 +122,7 @@ def transform(self, X): IncrementalPCA(batch_size=3, n_components=2) >>> ipca.transform(X) # doctest: +SKIP """ - check_is_fitted(self, ['mean_', 'components_'], all_or_any=all) + check_is_fitted(self, all_or_any=all) X = check_array(X) if self.mean_ is not None: diff --git a/sklearn/decomposition/dict_learning.py b/sklearn/decomposition/dict_learning.py index 56187948f8554..6fa6d1e7f2d6f 100644 --- a/sklearn/decomposition/dict_learning.py +++ b/sklearn/decomposition/dict_learning.py @@ -911,7 +911,7 @@ def transform(self, X): Transformed data """ - check_is_fitted(self, 'components_') + check_is_fitted(self) X = check_array(X) diff --git a/sklearn/decomposition/factor_analysis.py b/sklearn/decomposition/factor_analysis.py index f9d81737850ff..1306c4245a7f3 100644 --- a/sklearn/decomposition/factor_analysis.py +++ b/sklearn/decomposition/factor_analysis.py @@ -261,7 +261,7 @@ def transform(self, X): X_new : array-like, shape (n_samples, n_components) The latent variables of X. """ - check_is_fitted(self, 'components_') + check_is_fitted(self) X = check_array(X) Ih = np.eye(len(self.components_)) @@ -285,7 +285,7 @@ def get_covariance(self): cov : array, shape (n_features, n_features) Estimated covariance of data. """ - check_is_fitted(self, 'components_') + check_is_fitted(self) cov = np.dot(self.components_.T, self.components_) cov.flat[::len(cov) + 1] += self.noise_variance_ # modify diag inplace @@ -299,7 +299,7 @@ def get_precision(self): precision : array, shape (n_features, n_features) Estimated precision of data. """ - check_is_fitted(self, 'components_') + check_is_fitted(self) n_features = self.components_.shape[1] @@ -333,7 +333,7 @@ def score_samples(self, X): ll : array, shape (n_samples,) Log-likelihood of each sample under the current model """ - check_is_fitted(self, 'components_') + check_is_fitted(self) Xr = X - self.mean_ precision = self.get_precision() diff --git a/sklearn/decomposition/fastica_.py b/sklearn/decomposition/fastica_.py index d841926cdfc87..3f6f1af632494 100644 --- a/sklearn/decomposition/fastica_.py +++ b/sklearn/decomposition/fastica_.py @@ -574,7 +574,7 @@ def transform(self, X, copy=True): ------- X_new : array-like, shape (n_samples, n_components) """ - check_is_fitted(self, 'mixing_') + check_is_fitted(self) X = check_array(X, copy=copy, dtype=FLOAT_DTYPES) if self.whiten: @@ -597,7 +597,7 @@ def inverse_transform(self, X, copy=True): ------- X_new : array-like, shape (n_samples, n_features) """ - check_is_fitted(self, 'mixing_') + check_is_fitted(self) X = check_array(X, copy=(copy and self.whiten), dtype=FLOAT_DTYPES) X = np.dot(X, self.mixing_.T) diff --git a/sklearn/decomposition/kernel_pca.py b/sklearn/decomposition/kernel_pca.py index 555bd619c5a62..59785fed3ac0e 100644 --- a/sklearn/decomposition/kernel_pca.py +++ b/sklearn/decomposition/kernel_pca.py @@ -319,7 +319,7 @@ def transform(self, X): ------- X_new : array-like, shape (n_samples, n_components) """ - check_is_fitted(self, 'X_fit_') + check_is_fitted(self) # Compute centered gram matrix between X and training data X_fit_ K = self._centerer.transform(self._get_kernel(X, self.X_fit_)) diff --git a/sklearn/decomposition/nmf.py b/sklearn/decomposition/nmf.py index f64bc34b7fad7..0233688ae696e 100644 --- a/sklearn/decomposition/nmf.py +++ b/sklearn/decomposition/nmf.py @@ -1313,7 +1313,7 @@ def transform(self, X): W : array, shape (n_samples, n_components) Transformed data """ - check_is_fitted(self, 'n_components_') + check_is_fitted(self) W, _, n_iter_ = non_negative_factorization( X=X, W=None, H=self.components_, n_components=self.n_components_, @@ -1340,5 +1340,5 @@ def inverse_transform(self, W): .. versionadded:: 0.18 """ - check_is_fitted(self, 'n_components_') + check_is_fitted(self) return np.dot(W, self.components_) diff --git a/sklearn/decomposition/online_lda.py b/sklearn/decomposition/online_lda.py index c1d482f0a46c6..503cc24692e25 100644 --- a/sklearn/decomposition/online_lda.py +++ b/sklearn/decomposition/online_lda.py @@ -594,7 +594,7 @@ def _unnormalized_transform(self, X): doc_topic_distr : shape=(n_samples, n_components) Document topic distribution for X. """ - check_is_fitted(self, 'components_') + check_is_fitted(self) # make sure feature size is the same in fitted model and in X X = self._check_non_neg_array(X, "LatentDirichletAllocation.transform") @@ -748,7 +748,7 @@ def _perplexity_precomp_distr(self, X, doc_topic_distr=None, score : float Perplexity score. """ - check_is_fitted(self, 'components_') + check_is_fitted(self) X = self._check_non_neg_array(X, "LatentDirichletAllocation.perplexity") diff --git a/sklearn/decomposition/pca.py b/sklearn/decomposition/pca.py index 99e392020abaf..1bf3d6e6b19e6 100644 --- a/sklearn/decomposition/pca.py +++ b/sklearn/decomposition/pca.py @@ -569,7 +569,7 @@ def score_samples(self, X): ll : array, shape (n_samples,) Log-likelihood of each sample under the current model """ - check_is_fitted(self, 'mean_') + check_is_fitted(self) X = check_array(X) Xr = X - self.mean_ diff --git a/sklearn/decomposition/sparse_pca.py b/sklearn/decomposition/sparse_pca.py index 238f6cc4ef403..3ca14cb528bb8 100644 --- a/sklearn/decomposition/sparse_pca.py +++ b/sklearn/decomposition/sparse_pca.py @@ -221,7 +221,7 @@ def transform(self, X): X_new array, shape (n_samples, n_components) Transformed data. """ - check_is_fitted(self, 'components_') + check_is_fitted(self) X = check_array(X) X = X - self.mean_ diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 9634b303ea946..4a3542e204288 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -505,7 +505,7 @@ def transform(self, X): if self.solver == 'lsqr': raise NotImplementedError("transform not implemented for 'lsqr' " "solver (use 'svd' or 'eigen').") - check_is_fitted(self, ['xbar_', 'scalings_'], all_or_any=any) + check_is_fitted(self, all_or_any=any) X = check_array(X) if self.solver == 'svd': @@ -528,7 +528,7 @@ def predict_proba(self, X): C : array, shape (n_samples, n_classes) Estimated probabilities. """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) decision = self.decision_function(X) if self.classes_.size == 2: @@ -704,7 +704,7 @@ def fit(self, X, y): return self def _decision_function(self, X): - check_is_fitted(self, 'classes_') + check_is_fitted(self) X = check_array(X) norm2 = [] diff --git a/sklearn/dummy.py b/sklearn/dummy.py index 02d8a448c9766..067a956f6435d 100644 --- a/sklearn/dummy.py +++ b/sklearn/dummy.py @@ -171,7 +171,7 @@ def predict(self, X): y : array, shape = [n_samples] or [n_samples, n_outputs] Predicted target values for X. """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) # numpy random_state expects Python int and not long as size argument # under Windows @@ -249,7 +249,7 @@ def predict_proba(self, X): the model, where classes are ordered arithmetically, for each output. """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) # numpy random_state expects Python int and not long as size argument # under Windows @@ -498,7 +498,7 @@ def predict(self, X, return_std=False): y_std : array, shape = [n_samples] or [n_samples, n_outputs] Standard deviation of predictive distribution of query points. """ - check_is_fitted(self, "constant_") + check_is_fitted(self) n_samples = _num_samples(X) y = np.full((n_samples, self.n_outputs_), self.constant_, diff --git a/sklearn/ensemble/_hist_gradient_boosting/binning.py b/sklearn/ensemble/_hist_gradient_boosting/binning.py index a7ddc9a3ebb47..b35b2a2083b03 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/binning.py +++ b/sklearn/ensemble/_hist_gradient_boosting/binning.py @@ -143,7 +143,7 @@ def transform(self, X): The binned data (fortran-aligned). """ X = check_array(X, dtype=[X_DTYPE], force_all_finite=False) - check_is_fitted(self, ['bin_thresholds_', 'actual_n_bins_']) + check_is_fitted(self) if X.shape[1] != self.actual_n_bins_.shape[0]: raise ValueError( 'This estimator was fitted with {} features but {} got passed ' diff --git a/sklearn/ensemble/bagging.py b/sklearn/ensemble/bagging.py index 15096afefa810..7829b99b050f1 100644 --- a/sklearn/ensemble/bagging.py +++ b/sklearn/ensemble/bagging.py @@ -672,7 +672,7 @@ def predict_proba(self, X): The class probabilities of the input samples. The order of the classes corresponds to that in the attribute `classes_`. """ - check_is_fitted(self, "classes_") + check_is_fitted(self) # Check data X = check_array( X, accept_sparse=['csr', 'csc'], dtype=None, @@ -722,7 +722,7 @@ def predict_log_proba(self, X): The class log-probabilities of the input samples. The order of the classes corresponds to that in the attribute `classes_`. """ - check_is_fitted(self, "classes_") + check_is_fitted(self) if hasattr(self.base_estimator_, "predict_log_proba"): # Check data X = check_array( @@ -780,7 +780,7 @@ def decision_function(self, X): cases with ``k == 1``, otherwise ``k==n_classes``. """ - check_is_fitted(self, "classes_") + check_is_fitted(self) # Check data X = check_array( @@ -965,7 +965,7 @@ def predict(self, X): y : array of shape = [n_samples] The predicted values. """ - check_is_fitted(self, "estimators_features_") + check_is_fitted(self) # Check data X = check_array( X, accept_sparse=['csr', 'csc'], dtype=None, diff --git a/sklearn/ensemble/forest.py b/sklearn/ensemble/forest.py index a3513fdf32e41..b0fff1f6c9181 100644 --- a/sklearn/ensemble/forest.py +++ b/sklearn/ensemble/forest.py @@ -346,7 +346,7 @@ def _validate_y_class_weight(self, y): def _validate_X_predict(self, X): """Validate X whenever one tries to predict, apply, predict_proba""" - check_is_fitted(self, 'estimators_') + check_is_fitted(self) return self.estimators_[0]._validate_X_predict(X, check_input=True) @@ -362,7 +362,7 @@ def feature_importances_(self): trees consisting of only the root node, in which case it will be an array of zeros. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) all_importances = Parallel(n_jobs=self.n_jobs, **_joblib_parallel_args(prefer='threads'))( @@ -575,7 +575,7 @@ class in a leaf. The class probabilities of the input samples. The order of the classes corresponds to that in the attribute `classes_`. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) # Check data X = self._validate_X_predict(X) @@ -680,7 +680,7 @@ def predict(self, X): y : array of shape = [n_samples] or [n_samples, n_outputs] The predicted values. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) # Check data X = self._validate_X_predict(X) @@ -2026,5 +2026,5 @@ def transform(self, X): X_transformed : sparse matrix, shape=(n_samples, n_out) Transformed dataset. """ - check_is_fitted(self, 'one_hot_encoder_') + check_is_fitted(self) return self.one_hot_encoder_.transform(self.apply(X)) diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index cc43df36ba608..11813855d01d8 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -1390,7 +1390,7 @@ def _is_initialized(self): def _check_initialized(self): """Check that the estimator is initialized, raising an error if not.""" - check_is_fitted(self, 'estimators_') + check_is_fitted(self) def fit(self, X, y, sample_weight=None, monitor=None): """Fit the gradient boosting model. @@ -1741,7 +1741,7 @@ def _compute_partial_dependence_recursion(self, grid, target_features): (n_trees_per_iteration, n_samples) The value of the partial dependence function on each grid point. """ - check_is_fitted(self, 'estimators_', + check_is_fitted(self, msg="'estimator' parameter must be a fitted estimator") if self.init is not None: warnings.warn( diff --git a/sklearn/ensemble/iforest.py b/sklearn/ensemble/iforest.py index 8aaae2925ccaf..4cdeb9673ccdb 100644 --- a/sklearn/ensemble/iforest.py +++ b/sklearn/ensemble/iforest.py @@ -303,7 +303,7 @@ def predict(self, X): For each observation, tells whether or not (+1 or -1) it should be considered as an inlier according to the fitted model. """ - check_is_fitted(self, ["offset_"]) + check_is_fitted(self) X = check_array(X, accept_sparse='csr') is_inlier = np.ones(X.shape[0], dtype=int) is_inlier[self.decision_function(X) < 0] = -1 @@ -365,7 +365,7 @@ def score_samples(self, X): The lower, the more abnormal. """ # code structure from ForestClassifier/predict_proba - check_is_fitted(self, ["estimators_"]) + check_is_fitted(self) # Check data X = check_array(X, accept_sparse='csr') diff --git a/sklearn/ensemble/voting.py b/sklearn/ensemble/voting.py index 7900d28c1f782..69381a39d9ce3 100644 --- a/sklearn/ensemble/voting.py +++ b/sklearn/ensemble/voting.py @@ -296,7 +296,7 @@ def predict(self, X): Predicted class labels. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) if self.voting == 'soft': maj = np.argmax(self.predict_proba(X), axis=1) @@ -317,7 +317,7 @@ def _collect_probas(self, X): def _predict_proba(self, X): """Predict class probabilities for X in 'soft' voting """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) avg = np.average(self._collect_probas(X), axis=0, weights=self._weights_not_none) return avg @@ -363,7 +363,7 @@ def transform(self, X): array-like of shape (n_samples, n_classifiers), being class labels predicted by each classifier. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) if self.voting == 'soft': probas = self._collect_probas(X) @@ -477,7 +477,7 @@ def predict(self, X): y : array of shape (n_samples,) The predicted values. """ - check_is_fitted(self, "estimators_") + check_is_fitted(self) return np.average(self._predict(X), axis=1, weights=self._weights_not_none) @@ -495,5 +495,5 @@ def transform(self, X): array-like of shape (n_samples, n_classifiers), being values predicted by each regressor. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) return self._predict(X) diff --git a/sklearn/ensemble/weight_boosting.py b/sklearn/ensemble/weight_boosting.py index 2d0ba849f9990..9d3f1611a9d70 100644 --- a/sklearn/ensemble/weight_boosting.py +++ b/sklearn/ensemble/weight_boosting.py @@ -674,7 +674,7 @@ def decision_function(self, X): values closer to -1 or 1 mean more like the first or second class in ``classes_``, respectively. """ - check_is_fitted(self, "n_classes_") + check_is_fitted(self) X = self._validate_data(X) n_classes = self.n_classes_ @@ -717,7 +717,7 @@ def staged_decision_function(self, X): values closer to -1 or 1 mean more like the first or second class in ``classes_``, respectively. """ - check_is_fitted(self, "n_classes_") + check_is_fitted(self) X = self._validate_data(X) n_classes = self.n_classes_ @@ -786,7 +786,7 @@ def predict_proba(self, X): The class probabilities of the input samples. The order of outputs is the same of that of the `classes_` attribute. """ - check_is_fitted(self, "n_classes_") + check_is_fitted(self) X = self._validate_data(X) n_classes = self.n_classes_ @@ -1109,7 +1109,7 @@ def predict(self, X): y : array of shape = [n_samples] The predicted regression values. """ - check_is_fitted(self, "estimator_weights_") + check_is_fitted(self) X = self._validate_data(X) return self._get_median_predict(X, len(self.estimators_)) @@ -1134,7 +1134,7 @@ def staged_predict(self, X): y : generator of array, shape = [n_samples] The predicted regression values. """ - check_is_fitted(self, "estimator_weights_") + check_is_fitted(self) X = self._validate_data(X) for i, _ in enumerate(self.estimators_, 1): diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index ed4d41cc464f8..01a7b70587f3d 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -452,7 +452,7 @@ def _validate_vocabulary(self): def _check_vocabulary(self): """Check if vocabulary is empty or missing (not fit-ed)""" msg = "%(name)s - Vocabulary wasn't fitted." - check_is_fitted(self, 'vocabulary_', msg=msg), + check_is_fitted(self, msg=msg), if len(self.vocabulary_) == 0: raise ValueError("Vocabulary is empty") @@ -1380,7 +1380,7 @@ def transform(self, X, copy=True): X.data += 1 if self.use_idf: - check_is_fitted(self, '_idf_diag', 'idf vector is not fitted') + check_is_fitted(self, 'idf vector is not fitted') expected_n_features = self._idf_diag.shape[0] if n_features != expected_n_features: @@ -1749,7 +1749,7 @@ def transform(self, raw_documents, copy=True): X : sparse matrix, [n_samples, n_features] Tf-idf-weighted document-term matrix. """ - check_is_fitted(self, '_tfidf', 'The tfidf vector is not fitted') + check_is_fitted(self, 'The tfidf vector is not fitted') X = super().transform(raw_documents) return self._tfidf.transform(X, copy=False) diff --git a/sklearn/feature_selection/rfe.py b/sklearn/feature_selection/rfe.py index ce4eb5ed8bd45..4e957e8463a7c 100644 --- a/sklearn/feature_selection/rfe.py +++ b/sklearn/feature_selection/rfe.py @@ -242,7 +242,7 @@ def predict(self, X): y : array of shape [n_samples] The predicted target values. """ - check_is_fitted(self, 'estimator_') + check_is_fitted(self) return self.estimator_.predict(self.transform(X)) @if_delegate_has_method(delegate='estimator') @@ -258,11 +258,11 @@ def score(self, X, y): y : array of shape [n_samples] The target values. """ - check_is_fitted(self, 'estimator_') + check_is_fitted(self) return self.estimator_.score(self.transform(X), y) def _get_support_mask(self): - check_is_fitted(self, 'support_') + check_is_fitted(self) return self.support_ @if_delegate_has_method(delegate='estimator') @@ -284,7 +284,7 @@ def decision_function(self, X): Regression and binary classification produce an array of shape [n_samples]. """ - check_is_fitted(self, 'estimator_') + check_is_fitted(self) return self.estimator_.decision_function(self.transform(X)) @if_delegate_has_method(delegate='estimator') @@ -304,7 +304,7 @@ def predict_proba(self, X): The class probabilities of the input samples. The order of the classes corresponds to that in the attribute `classes_`. """ - check_is_fitted(self, 'estimator_') + check_is_fitted(self) return self.estimator_.predict_proba(self.transform(X)) @if_delegate_has_method(delegate='estimator') @@ -322,7 +322,7 @@ def predict_log_proba(self, X): The class log-probabilities of the input samples. The order of the classes corresponds to that in the attribute `classes_`. """ - check_is_fitted(self, 'estimator_') + check_is_fitted(self) return self.estimator_.predict_log_proba(self.transform(X)) def _more_tags(self): diff --git a/sklearn/feature_selection/univariate_selection.py b/sklearn/feature_selection/univariate_selection.py index 554cb3d392b29..5b1cae1823e9c 100644 --- a/sklearn/feature_selection/univariate_selection.py +++ b/sklearn/feature_selection/univariate_selection.py @@ -429,7 +429,7 @@ def _check_params(self, X, y): % self.percentile) def _get_support_mask(self): - check_is_fitted(self, 'scores_') + check_is_fitted(self) # Cater for NaNs if self.percentile == 100: @@ -514,7 +514,7 @@ def _check_params(self, X, y): % (X.shape[1], self.k)) def _get_support_mask(self): - check_is_fitted(self, 'scores_') + check_is_fitted(self) if self.k == 'all': return np.ones(self.scores_.shape, dtype=bool) @@ -587,7 +587,7 @@ def __init__(self, score_func=f_classif, alpha=5e-2): self.alpha = alpha def _get_support_mask(self): - check_is_fitted(self, 'scores_') + check_is_fitted(self) return self.pvalues_ < self.alpha @@ -653,7 +653,7 @@ def __init__(self, score_func=f_classif, alpha=5e-2): self.alpha = alpha def _get_support_mask(self): - check_is_fitted(self, 'scores_') + check_is_fitted(self) n_features = len(self.pvalues_) sv = np.sort(self.pvalues_) @@ -716,7 +716,7 @@ def __init__(self, score_func=f_classif, alpha=5e-2): self.alpha = alpha def _get_support_mask(self): - check_is_fitted(self, 'scores_') + check_is_fitted(self) return (self.pvalues_ < self.alpha / len(self.pvalues_)) @@ -811,7 +811,7 @@ def _check_params(self, X, y): self._make_selector()._check_params(X, y) def _get_support_mask(self): - check_is_fitted(self, 'scores_') + check_is_fitted(self) selector = self._make_selector() selector.pvalues_ = self.pvalues_ diff --git a/sklearn/feature_selection/variance_threshold.py b/sklearn/feature_selection/variance_threshold.py index 7d98de82c9711..c9eb973dc86c3 100644 --- a/sklearn/feature_selection/variance_threshold.py +++ b/sklearn/feature_selection/variance_threshold.py @@ -87,6 +87,6 @@ def fit(self, X, y=None): return self def _get_support_mask(self): - check_is_fitted(self, 'variances_') + check_is_fitted(self) return self.variances_ > self.threshold diff --git a/sklearn/gaussian_process/gpc.py b/sklearn/gaussian_process/gpc.py index 6270409f129b1..5421f7e408472 100644 --- a/sklearn/gaussian_process/gpc.py +++ b/sklearn/gaussian_process/gpc.py @@ -255,7 +255,7 @@ def predict(self, X): C : array, shape = (n_samples,) Predicted target values for X, values are from ``classes_`` """ - check_is_fitted(self, ["X_train_", "y_train_", "pi_", "W_sr_", "L_"]) + check_is_fitted(self) # As discussed on Section 3.4.2 of GPML, for making hard binary # decisions, it is enough to compute the MAP of the posterior and @@ -279,7 +279,7 @@ def predict_proba(self, X): the model. The columns correspond to the classes in sorted order, as they appear in the attribute ``classes_``. """ - check_is_fitted(self, ["X_train_", "y_train_", "pi_", "W_sr_", "L_"]) + check_is_fitted(self) # Based on Algorithm 3.2 of GPML K_star = self.kernel_(self.X_train_, X) # K_star =k(x_star) @@ -663,7 +663,7 @@ def predict(self, X): C : array, shape = (n_samples,) Predicted target values for X, values are from ``classes_`` """ - check_is_fitted(self, ["classes_", "n_classes_"]) + check_is_fitted(self) X = check_array(X) return self.base_estimator_.predict(X) @@ -681,7 +681,7 @@ def predict_proba(self, X): the model. The columns correspond to the classes in sorted order, as they appear in the attribute `classes_`. """ - check_is_fitted(self, ["classes_", "n_classes_"]) + check_is_fitted(self) if self.n_classes_ > 2 and self.multi_class == "one_vs_one": raise ValueError("one_vs_one multi-class mode does not support " "predicting probability estimates. Use " @@ -735,7 +735,7 @@ def log_marginal_likelihood(self, theta=None, eval_gradient=False, hyperparameters at position theta. Only returned when eval_gradient is True. """ - check_is_fitted(self, ["classes_", "n_classes_"]) + check_is_fitted(self) if theta is None: if eval_gradient: diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py index 88516f70f2e66..e56802bc74326 100644 --- a/sklearn/impute/_base.py +++ b/sklearn/impute/_base.py @@ -373,7 +373,7 @@ def transform(self, X): X : {array-like, sparse matrix}, shape (n_samples, n_features) The input data to complete. """ - check_is_fitted(self, 'statistics_') + check_is_fitted(self) X = self._validate_input(X) @@ -653,7 +653,7 @@ def transform(self, X): will be boolean. """ - check_is_fitted(self, "features_") + check_is_fitted(self) X = self._validate_input(X) if X.shape[1] != self._n_features: diff --git a/sklearn/impute/_iterative.py b/sklearn/impute/_iterative.py index ecf94d5ccfb57..05e2f1484fccf 100644 --- a/sklearn/impute/_iterative.py +++ b/sklearn/impute/_iterative.py @@ -627,7 +627,7 @@ def transform(self, X): Xt : array-like, shape (n_samples, n_features) The imputed input data. """ - check_is_fitted(self, 'initial_imputer_') + check_is_fitted(self) if self.add_indicator: X_trans_indicator = self.indicator_.transform(X) diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index 1ef79f48a0459..9d257427944dc 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -115,7 +115,7 @@ def transform(self, X): ------- X_new : array-like, shape (n_samples, n_components) """ - check_is_fitted(self, 'random_weights_') + check_is_fitted(self) X = check_array(X, accept_sparse='csr') projection = safe_sparse_dot(X, self.random_weights_) @@ -222,7 +222,7 @@ def transform(self, X): ------- X_new : array-like, shape (n_samples, n_components) """ - check_is_fitted(self, 'random_weights_') + check_is_fitted(self) X = as_float_array(X, copy=True) X = check_array(X, copy=False) @@ -580,7 +580,7 @@ def transform(self, X): X_transformed : array, shape=(n_samples, n_components) Transformed data. """ - check_is_fitted(self, 'components_') + check_is_fitted(self) X = check_array(X, accept_sparse='csr') kernel_params = self._get_kernel_params() diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py index 392ae265a8f20..3d69066e342d6 100644 --- a/sklearn/kernel_ridge.py +++ b/sklearn/kernel_ridge.py @@ -188,6 +188,6 @@ def predict(self, X): C : array, shape = [n_samples] or [n_samples, n_targets] Returns predicted values. """ - check_is_fitted(self, ["X_fit_", "dual_coef_"]) + check_is_fitted(self) K = self._get_kernel(X, self.X_fit_) return np.dot(K, self.dual_coef_) diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index 51ff3a2d1588a..b408c8569529d 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -199,7 +199,7 @@ def fit(self, X, y): """Fit model.""" def _decision_function(self, X): - check_is_fitted(self, "coef_") + check_is_fitted(self) X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) return safe_sparse_dot(X, self.coef_.T, @@ -258,7 +258,7 @@ def decision_function(self, X): case, confidence score for self.classes_[1] where >0 means this class would be predicted. """ - check_is_fitted(self, 'coef_') + check_is_fitted(self) X = check_array(X, accept_sparse='csr') diff --git a/sklearn/linear_model/coordinate_descent.py b/sklearn/linear_model/coordinate_descent.py index 646839a0a3ae6..1aebbfa5ba54e 100644 --- a/sklearn/linear_model/coordinate_descent.py +++ b/sklearn/linear_model/coordinate_descent.py @@ -785,7 +785,7 @@ def _decision_function(self, X): T : array, shape (n_samples,) The predicted decision function """ - check_is_fitted(self, 'n_iter_') + check_is_fitted(self) if sparse.isspmatrix(X): return safe_sparse_dot(X, self.coef_.T, dense_output=True) + self.intercept_ diff --git a/sklearn/linear_model/logistic.py b/sklearn/linear_model/logistic.py index 1ad01e5ddc656..432a5a7db3c0d 100644 --- a/sklearn/linear_model/logistic.py +++ b/sklearn/linear_model/logistic.py @@ -1628,7 +1628,7 @@ def predict_proba(self, X): Returns the probability of the sample for each class in the model, where classes are ordered as they are in ``self.classes_``. """ - check_is_fitted(self, 'coef_') + check_is_fitted(self) ovr = (self.multi_class in ["ovr", "warn"] or (self.multi_class == 'auto' and (self.classes_.size <= 2 or diff --git a/sklearn/linear_model/ransac.py b/sklearn/linear_model/ransac.py index b901e848f49bf..e868a31d17c8d 100644 --- a/sklearn/linear_model/ransac.py +++ b/sklearn/linear_model/ransac.py @@ -466,7 +466,7 @@ def predict(self, X): y : array, shape = [n_samples] or [n_samples, n_targets] Returns predicted values. """ - check_is_fitted(self, 'estimator_') + check_is_fitted(self) return self.estimator_.predict(X) @@ -488,6 +488,6 @@ def score(self, X, y): z : float Score of the prediction. """ - check_is_fitted(self, 'estimator_') + check_is_fitted(self) return self.estimator_.score(X, y) diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index afad2e94ed8c1..e80a6a7ec3ce4 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -982,7 +982,7 @@ def predict_proba(self): return self._predict_proba def _predict_proba(self, X): - check_is_fitted(self, "t_") + check_is_fitted(self) if self.loss == "log": return self._predict_proba_lr(X) @@ -1216,7 +1216,7 @@ def _decision_function(self, X): array, shape (n_samples,) Predicted target values per element in X. """ - check_is_fitted(self, ["t_", "coef_", "intercept_"], all_or_any=all) + check_is_fitted(self, all_or_any=all) X = check_array(X, accept_sparse='csr') diff --git a/sklearn/manifold/locally_linear.py b/sklearn/manifold/locally_linear.py index cf3c58486c27a..4e90d4876f4df 100644 --- a/sklearn/manifold/locally_linear.py +++ b/sklearn/manifold/locally_linear.py @@ -717,7 +717,7 @@ def transform(self, X): Because of scaling performed by this method, it is discouraged to use it together with methods that are not scale-invariant (like SVMs) """ - check_is_fitted(self, "nbrs_") + check_is_fitted(self) X = check_array(X) ind = self.nbrs_.kneighbors(X, n_neighbors=self.n_neighbors, diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py index 120b72f06cd01..610af54cc343a 100644 --- a/sklearn/mixture/gaussian_mixture.py +++ b/sklearn/mixture/gaussian_mixture.py @@ -687,7 +687,7 @@ def _compute_lower_bound(self, _, log_prob_norm): return log_prob_norm def _check_is_fitted(self): - check_is_fitted(self, ['weights_', 'means_', 'precisions_cholesky_']) + check_is_fitted(self) def _get_parameters(self): return (self.weights_, self.means_, self.covariances_, diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py index 208ab536c8181..74284f3bdb2fd 100644 --- a/sklearn/model_selection/_search.py +++ b/sklearn/model_selection/_search.py @@ -437,7 +437,7 @@ def _check_is_fitted(self, method_name): 'attribute' % (type(self).__name__, method_name)) else: - check_is_fitted(self, 'best_estimator_') + check_is_fitted(self) @if_delegate_has_method(delegate=('best_estimator_', 'estimator')) def predict(self, X): diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 6315197ad7856..5a8dcebd4170b 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -113,17 +113,17 @@ def fit(self, X, y): return self def predict(self, X): - check_is_fitted(self, 'y_') + check_is_fitted(self) return np.repeat(self.y_, X.shape[0]) def decision_function(self, X): - check_is_fitted(self, 'y_') + check_is_fitted(self) return np.repeat(self.y_, X.shape[0]) def predict_proba(self, X): - check_is_fitted(self, 'y_') + check_is_fitted(self) return np.repeat([np.hstack([1 - self.y_, self.y_])], X.shape[0], axis=0) @@ -285,7 +285,7 @@ def predict(self, X): y : (sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]. Predicted multi-class targets. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) n_samples = _num_samples(X) if self.label_binarizer_.y_type_ == "multiclass": @@ -337,7 +337,7 @@ def predict_proba(self, X): Returns the probability of the sample for each class in the model, where classes are ordered as they are in `self.classes_`. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) # Y[i, j] gives the probability that sample i has the label j. # In the multi-label case, these are not disjoint. Y = np.array([e.predict_proba(X)[:, 1] for e in self.estimators_]).T @@ -366,7 +366,7 @@ def decision_function(self, X): ------- T : array-like, shape = [n_samples, n_classes] """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) if len(self.estimators_) == 1: return self.estimators_[0].decision_function(X) return np.array([est.decision_function(X).ravel() @@ -383,7 +383,7 @@ def n_classes_(self): @property def coef_(self): - check_is_fitted(self, 'estimators_') + check_is_fitted(self) if not hasattr(self.estimators_[0], "coef_"): raise AttributeError( "Base estimator doesn't have a coef_ attribute.") @@ -394,7 +394,7 @@ def coef_(self): @property def intercept_(self): - check_is_fitted(self, 'estimators_') + check_is_fitted(self) if not hasattr(self.estimators_[0], "intercept_"): raise AttributeError( "Base estimator doesn't have an intercept_ attribute.") @@ -603,7 +603,7 @@ def decision_function(self, X): ------- Y : array-like, shape = [n_samples, n_classes] """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) indices = self.pairwise_indices_ if indices is None: @@ -768,7 +768,7 @@ def predict(self, X): y : numpy array of shape [n_samples] Predicted multi-class targets. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) X = check_array(X) Y = np.array([_predict_binary(e, X) for e in self.estimators_]).T pred = euclidean_distances(Y, self.code_book_).argmin(axis=1) diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index 4411919c1821f..463b72d40f47a 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -185,7 +185,7 @@ def predict(self, X): Multi-output targets predicted across multiple predictors. Note: Separate models are generated for each predictor. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) if not hasattr(self.estimator, "predict"): raise ValueError("The base estimator should implement" " a predict method") @@ -344,7 +344,7 @@ def predict_proba(self, X): The class probabilities of the input samples. The order of the classes corresponds to that in the attribute `classes_`. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) if not all([hasattr(estimator, "predict_proba") for estimator in self.estimators_]): raise ValueError("The base estimator should implement " @@ -370,7 +370,7 @@ def score(self, X, y): scores : float accuracy_score of self.predict(X) versus y """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) n_outputs_ = len(self.estimators_) if y.ndim == 1: raise ValueError("y must have at least two dimensions for " @@ -472,7 +472,7 @@ def predict(self, X): The predicted values. """ - check_is_fitted(self, 'estimators_') + check_is_fitted(self) X = check_array(X, accept_sparse=True) Y_pred_chain = np.zeros((X.shape[0], len(self.estimators_))) for chain_idx, estimator in enumerate(self.estimators_): diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index b3518c8f22e0c..aa14a7f085828 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -431,7 +431,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, return self def _joint_log_likelihood(self, X): - check_is_fitted(self, "classes_") + check_is_fitted(self) X = check_array(X) joint_log_likelihood = [] @@ -743,7 +743,7 @@ def _update_feature_log_prob(self, alpha): def _joint_log_likelihood(self, X): """Calculate the posterior log probability of the samples X""" - check_is_fitted(self, "classes_") + check_is_fitted(self) X = check_array(X, accept_sparse='csr') return (safe_sparse_dot(X, self.feature_log_prob_.T) + @@ -852,7 +852,7 @@ def _update_feature_log_prob(self, alpha): def _joint_log_likelihood(self, X): """Calculate the class scores for the samples in X.""" - check_is_fitted(self, "classes_") + check_is_fitted(self) X = check_array(X, accept_sparse="csr") jll = safe_sparse_dot(X, self.feature_log_prob_.T) @@ -963,7 +963,7 @@ def _update_feature_log_prob(self, alpha): def _joint_log_likelihood(self, X): """Calculate the posterior log probability of the samples X""" - check_is_fitted(self, "classes_") + check_is_fitted(self) X = check_array(X, accept_sparse='csr') diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py index 9a05eb62c2afc..041c13aae5417 100644 --- a/sklearn/neighbors/base.py +++ b/sklearn/neighbors/base.py @@ -388,7 +388,7 @@ class from an array representing our data set and ask who's [2]]...) """ - check_is_fitted(self, ["_fit_method", "_fit_X"], all_or_any=any) + check_is_fitted(self, all_or_any=any) if n_neighbors is None: n_neighbors = self.n_neighbors @@ -543,7 +543,7 @@ def kneighbors_graph(self, X=None, n_neighbors=None, -------- NearestNeighbors.radius_neighbors_graph """ - check_is_fitted(self, ["_fit_method", "_fit_X"], all_or_any=any) + check_is_fitted(self, all_or_any=any) if n_neighbors is None: n_neighbors = self.n_neighbors @@ -691,7 +691,7 @@ class from an array representing our data set and ask who's For efficiency, `radius_neighbors` returns arrays of objects, where each object is a 1D array of indices or distances. """ - check_is_fitted(self, ["_fit_method", "_fit_X"], all_or_any=any) + check_is_fitted(self, all_or_any=any) if X is not None: query_is_train = False @@ -828,7 +828,7 @@ def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity'): -------- kneighbors_graph """ - check_is_fitted(self, ["_fit_method", "_fit_X"], all_or_any=any) + check_is_fitted(self, all_or_any=any) if X is not None: X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) diff --git a/sklearn/neighbors/nca.py b/sklearn/neighbors/nca.py index 5060270ce1e61..68a72c92da865 100644 --- a/sklearn/neighbors/nca.py +++ b/sklearn/neighbors/nca.py @@ -258,7 +258,7 @@ def transform(self, X): If :meth:`fit` has not been called before. """ - check_is_fitted(self, ['components_']) + check_is_fitted(self) X = check_array(X) return np.dot(X, self.components_.T) diff --git a/sklearn/neighbors/nearest_centroid.py b/sklearn/neighbors/nearest_centroid.py index 5626189222e5a..3e1577469c920 100644 --- a/sklearn/neighbors/nearest_centroid.py +++ b/sklearn/neighbors/nearest_centroid.py @@ -191,7 +191,7 @@ def predict(self, X): be the distance matrix between the data to be predicted and ``self.centroids_``. """ - check_is_fitted(self, 'centroids_') + check_is_fitted(self) X = check_array(X, accept_sparse='csr') return self.classes_[pairwise_distances( diff --git a/sklearn/neural_network/multilayer_perceptron.py b/sklearn/neural_network/multilayer_perceptron.py index ebe5f03801ed5..11e1c4a3ab793 100644 --- a/sklearn/neural_network/multilayer_perceptron.py +++ b/sklearn/neural_network/multilayer_perceptron.py @@ -970,7 +970,7 @@ def predict(self, X): y : array-like, shape (n_samples,) or (n_samples, n_classes) The predicted classes. """ - check_is_fitted(self, "coefs_") + check_is_fitted(self) y_pred = self._predict(X) if self.n_outputs_ == 1: @@ -1071,7 +1071,7 @@ def predict_proba(self, X): The predicted probability of the sample for each class in the model, where classes are ordered as they are in `self.classes_`. """ - check_is_fitted(self, "coefs_") + check_is_fitted(self) y_pred = self._predict(X) if self.n_outputs_ == 1: @@ -1332,7 +1332,7 @@ def predict(self, X): y : array-like, shape (n_samples, n_outputs) The predicted values. """ - check_is_fitted(self, "coefs_") + check_is_fitted(self) y_pred = self._predict(X) if y_pred.shape[1] == 1: return y_pred.ravel() diff --git a/sklearn/neural_network/rbm.py b/sklearn/neural_network/rbm.py index b2b6166d4d253..fa39f5f23d80c 100644 --- a/sklearn/neural_network/rbm.py +++ b/sklearn/neural_network/rbm.py @@ -116,7 +116,7 @@ def transform(self, X): h : array, shape (n_samples, n_components) Latent representations of the data. """ - check_is_fitted(self, "components_") + check_is_fitted(self) X = check_array(X, accept_sparse='csr', dtype=np.float64) return self._mean_hiddens(X) @@ -208,7 +208,7 @@ def gibbs(self, v): v_new : array-like, shape (n_samples, n_features) Values of the visible layer after one Gibbs step. """ - check_is_fitted(self, "components_") + check_is_fitted(self) if not hasattr(self, "random_state_"): self.random_state_ = check_random_state(self.random_state) h_ = self._sample_hiddens(v, self.random_state_) @@ -299,7 +299,7 @@ def score_samples(self, X): free energy on X, then on a randomly corrupted version of X, and returns the log of the logistic function of the difference. """ - check_is_fitted(self, "components_") + check_is_fitted(self) v = check_array(X, accept_sparse='csr') rng = check_random_state(self.random_state) diff --git a/sklearn/preprocessing/_discretization.py b/sklearn/preprocessing/_discretization.py index b7ffd96032d2a..1be7499f783ec 100644 --- a/sklearn/preprocessing/_discretization.py +++ b/sklearn/preprocessing/_discretization.py @@ -253,7 +253,7 @@ def transform(self, X): Xt : numeric array-like or sparse matrix Data in the binned space. """ - check_is_fitted(self, ["bin_edges_"]) + check_is_fitted(self) Xt = check_array(X, copy=True, dtype=FLOAT_DTYPES) n_features = self.n_bins_.shape[0] @@ -294,7 +294,7 @@ def inverse_transform(self, Xt): Xinv : numeric array-like Data in the original feature space. """ - check_is_fitted(self, ["bin_edges_"]) + check_is_fitted(self) if 'onehot' in self.encode: Xt = self._encoder.inverse_transform(Xt) diff --git a/sklearn/preprocessing/_encoders.py b/sklearn/preprocessing/_encoders.py index c1d3b1e80c352..6a16b484ad563 100644 --- a/sklearn/preprocessing/_encoders.py +++ b/sklearn/preprocessing/_encoders.py @@ -371,7 +371,7 @@ def transform(self, X): X_out : sparse matrix if sparse=True else a 2-d array Transformed input. """ - check_is_fitted(self, 'categories_') + check_is_fitted(self) # validation of X happens in _check_X called by _transform X_int, X_mask = self._transform(X, handle_unknown=self.handle_unknown) @@ -423,7 +423,7 @@ def inverse_transform(self, X): Inverse transformed array. """ - check_is_fitted(self, 'categories_') + check_is_fitted(self) X = check_array(X, accept_sparse='csr') n_samples, _ = X.shape @@ -506,7 +506,7 @@ def get_feature_names(self, input_features=None): output_feature_names : array of string, length n_output_features """ - check_is_fitted(self, 'categories_') + check_is_fitted(self) cats = self.categories_ if input_features is None: input_features = ['x%d' % i for i in range(len(cats))] @@ -639,7 +639,7 @@ def inverse_transform(self, X): Inverse transformed array. """ - check_is_fitted(self, 'categories_') + check_is_fitted(self) X = check_array(X, accept_sparse='csr') n_samples, _ = X.shape diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index 46530de8e6ad9..e70c98e48e898 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -387,7 +387,7 @@ def transform(self, X): X : array-like, shape [n_samples, n_features] Input data that will be transformed. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self) X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES, force_all_finite="allow-nan") @@ -404,7 +404,7 @@ def inverse_transform(self, X): X : array-like, shape [n_samples, n_features] Input data that will be transformed. It cannot be sparse. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self) X = check_array(X, copy=self.copy, dtype=FLOAT_DTYPES, force_all_finite="allow-nan") @@ -756,7 +756,7 @@ def transform(self, X, copy=None): copy : bool, optional (default: None) Copy the input X or not. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self) copy = copy if copy is not None else self.copy X = check_array(X, accept_sparse='csr', copy=copy, @@ -792,7 +792,7 @@ def inverse_transform(self, X, copy=None): X_tr : array-like, shape [n_samples, n_features] Transformed array. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self) copy = copy if copy is not None else self.copy if sparse.issparse(X): @@ -957,7 +957,7 @@ def transform(self, X): X : {array-like, sparse matrix} The data that should be scaled. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self) X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite='allow-nan') @@ -976,7 +976,7 @@ def inverse_transform(self, X): X : {array-like, sparse matrix} The data that should be transformed back. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self) X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite='allow-nan') @@ -1206,7 +1206,7 @@ def transform(self, X): X : {array-like, sparse matrix} The data used to scale along the specified axis. """ - check_is_fitted(self, 'center_', 'scale_') + check_is_fitted(self, 'scale_') X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite='allow-nan') @@ -1229,7 +1229,7 @@ def inverse_transform(self, X): X : array-like The data used to scale along the specified axis. """ - check_is_fitted(self, 'center_', 'scale_') + check_is_fitted(self, 'scale_') X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite='allow-nan') @@ -1415,7 +1415,7 @@ def _combinations(n_features, degree, interaction_only, include_bias): @property def powers_(self): - check_is_fitted(self, 'n_input_features_') + check_is_fitted(self) combinations = self._combinations(self.n_input_features_, self.degree, self.interaction_only, @@ -1502,7 +1502,7 @@ def transform(self, X): The matrix of features, where NP is the number of polynomial features generated from the combination of inputs. """ - check_is_fitted(self, ['n_input_features_', 'n_output_features_']) + check_is_fitted(self) X = check_array(X, order='F', dtype=FLOAT_DTYPES, accept_sparse=('csr', 'csc')) @@ -2014,7 +2014,7 @@ def transform(self, K, copy=True): ------- K_new : numpy array of shape [n_samples1, n_samples2] """ - check_is_fitted(self, 'K_fit_all_') + check_is_fitted(self) K = check_array(K, copy=copy, dtype=FLOAT_DTYPES) @@ -2411,7 +2411,7 @@ def _check_inputs(self, X, accept_sparse_negative=False, copy=False): def _check_is_fitted(self, X): """Check the inputs before transforming""" - check_is_fitted(self, 'quantiles_') + check_is_fitted(self) # check that the dimension of X are adequate with the fitted data if X.shape[1] != self.quantiles_.shape[1]: raise ValueError('X does not have the same number of features as' @@ -2786,7 +2786,7 @@ def transform(self, X): X_trans : array-like, shape (n_samples, n_features) The transformed data. """ - check_is_fitted(self, 'lambdas_') + check_is_fitted(self) X = self._check_input(X, check_positive=True, check_shape=True) transform_function = {'box-cox': boxcox, @@ -2832,7 +2832,7 @@ def inverse_transform(self, X): X : array-like, shape (n_samples, n_features) The original data """ - check_is_fitted(self, 'lambdas_') + check_is_fitted(self) X = self._check_input(X, check_shape=True) if self.standardize: diff --git a/sklearn/preprocessing/label.py b/sklearn/preprocessing/label.py index a236589d1698d..118fc22fa7f11 100644 --- a/sklearn/preprocessing/label.py +++ b/sklearn/preprocessing/label.py @@ -256,7 +256,7 @@ def transform(self, y): ------- y : array-like of shape [n_samples] """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) y = column_or_1d(y, warn=True) # transform of empty array is empty array if _num_samples(y) == 0: @@ -277,7 +277,7 @@ def inverse_transform(self, y): ------- y : numpy array of shape [n_samples] """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) y = column_or_1d(y, warn=True) # inverse transform of empty array is empty array if _num_samples(y) == 0: @@ -465,7 +465,7 @@ def transform(self, y): Y : numpy array or CSR matrix of shape [n_samples, n_classes] Shape will be [n_samples, 1] for binary problems. """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) y_is_multilabel = type_of_target(y).startswith('multilabel') if y_is_multilabel and not self.y_type_.startswith('multilabel'): @@ -508,7 +508,7 @@ def inverse_transform(self, Y, threshold=None): linear model's decision_function method directly as the input of inverse_transform. """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) if threshold is None: threshold = (self.pos_label + self.neg_label) / 2. @@ -911,7 +911,7 @@ def transform(self, y): A matrix such that `y_indicator[i, j] = 1` iff `classes_[j]` is in `y[i]`, and 0 otherwise. """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) class_to_index = self._build_cache() yt = self._transform(y, class_to_index) @@ -976,7 +976,7 @@ def inverse_transform(self, yt): The set of labels for each sample such that `y[i]` consists of `classes_[j]` for each `yt[i, j] == 1`. """ - check_is_fitted(self, 'classes_') + check_is_fitted(self) if yt.shape[1] != len(self.classes_): raise ValueError('Expected indicator for {0} classes, but got {1}' diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index 8297a42ab17f8..4f8c8af1283b2 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -402,7 +402,7 @@ def transform(self, X): """ X = check_array(X, accept_sparse=['csr', 'csc']) - check_is_fitted(self, 'components_') + check_is_fitted(self) if X.shape[1] != self.components_.shape[1]: raise ValueError( diff --git a/sklearn/semi_supervised/label_propagation.py b/sklearn/semi_supervised/label_propagation.py index 4820af8cb2b69..704a075d95932 100644 --- a/sklearn/semi_supervised/label_propagation.py +++ b/sklearn/semi_supervised/label_propagation.py @@ -184,7 +184,7 @@ def predict_proba(self, X): Normalized probability distributions across class labels """ - check_is_fitted(self, 'X_') + check_is_fitted(self) X_2d = check_array(X, accept_sparse=['csc', 'csr', 'coo', 'dok', 'bsr', 'lil', 'dia']) diff --git a/sklearn/svm/base.py b/sklearn/svm/base.py index 40f87baec06be..b2723cc7e0c2b 100644 --- a/sklearn/svm/base.py +++ b/sklearn/svm/base.py @@ -437,7 +437,7 @@ def _sparse_decision_function(self, X): self.probA_, self.probB_) def _validate_for_predict(self, X): - check_is_fitted(self, 'support_') + check_is_fitted(self) X = check_array(X, accept_sparse='csr', dtype=np.float64, order="C", accept_large_sparse=False) @@ -562,7 +562,7 @@ def predict(self, X): y_pred : array, shape (n_samples,) Class labels for samples in X. """ - check_is_fitted(self, "classes_") + check_is_fitted(self) if self.break_ties and self.decision_function_shape == 'ovo': raise ValueError("break_ties must be False when " "decision_function_shape is 'ovo'") diff --git a/sklearn/tests/test_metaestimators.py b/sklearn/tests/test_metaestimators.py index 822dd0edb5501..6eb9c61ec2b2d 100644 --- a/sklearn/tests/test_metaestimators.py +++ b/sklearn/tests/test_metaestimators.py @@ -65,7 +65,7 @@ def fit(self, X, y=None, *args, **kwargs): return True def _check_fit(self): - check_is_fitted(self, 'coef_') + check_is_fitted(self) @hides def inverse_transform(self, X, *args, **kwargs): diff --git a/sklearn/tree/tree.py b/sklearn/tree/tree.py index 69c3b1a8270b6..9f6bf979717cf 100644 --- a/sklearn/tree/tree.py +++ b/sklearn/tree/tree.py @@ -111,13 +111,13 @@ def get_depth(self): The depth of a tree is the maximum distance between the root and any leaf. """ - check_is_fitted(self, 'tree_') + check_is_fitted(self) return self.tree_.max_depth def get_n_leaves(self): """Returns the number of leaves of the decision tree. """ - check_is_fitted(self, 'tree_') + check_is_fitted(self) return self.tree_.n_leaves def fit(self, X, y, sample_weight=None, check_input=True, @@ -424,7 +424,7 @@ def predict(self, X, check_input=True): y : array of shape = [n_samples] or [n_samples, n_outputs] The predicted classes, or the predict values. """ - check_is_fitted(self, 'tree_') + check_is_fitted(self) X = self._validate_X_predict(X, check_input) proba = self.tree_.predict(X) n_samples = X.shape[0] @@ -478,7 +478,7 @@ def apply(self, X, check_input=True): ``[0; self.tree_.node_count)``, possibly with gaps in the numbering. """ - check_is_fitted(self, 'tree_') + check_is_fitted(self) X = self._validate_X_predict(X, check_input) return self.tree_.apply(X) @@ -520,7 +520,7 @@ def feature_importances_(self): ------- feature_importances_ : array, shape = [n_features] """ - check_is_fitted(self, 'tree_') + check_is_fitted(self) return self.tree_.compute_feature_importances() @@ -841,7 +841,7 @@ class in a leaf. The class probabilities of the input samples. The order of the classes corresponds to that in the attribute `classes_`. """ - check_is_fitted(self, 'tree_') + check_is_fitted(self) X = self._validate_X_predict(X, check_input) proba = self.tree_.predict(X) diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py index b4bd2daac00d7..b93c66f7cfbb6 100644 --- a/sklearn/utils/tests/test_estimator_checks.py +++ b/sklearn/utils/tests/test_estimator_checks.py @@ -169,7 +169,7 @@ def fit(self, X, y): return self def predict(self, X): - check_is_fitted(self, 'coef_') + check_is_fitted(self) X = check_array(X) return np.ones(X.shape[0]) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index e1a1270f21e63..14b13d94ca5d1 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -632,34 +632,34 @@ def test_check_symmetric(): def test_check_is_fitted(): # Check is ValueError raised when non estimator instance passed - assert_raises(ValueError, check_is_fitted, ARDRegression, "coef_") - assert_raises(TypeError, check_is_fitted, "SVR", "support_") + assert_raises(ValueError, check_is_fitted, ARDRegression) + assert_raises(TypeError, check_is_fitted, "SVR") ard = ARDRegression() svr = SVR() try: - assert_raises(NotFittedError, check_is_fitted, ard, "coef_") - assert_raises(NotFittedError, check_is_fitted, svr, "support_") + assert_raises(NotFittedError, check_is_fitted, ard) + assert_raises(NotFittedError, check_is_fitted, svr) except ValueError: assert False, "check_is_fitted failed with ValueError" # NotFittedError is a subclass of both ValueError and AttributeError try: - check_is_fitted(ard, "coef_", "Random message %(name)s, %(name)s") + check_is_fitted(ard, msg="Random message %(name)s, %(name)s") except ValueError as e: assert str(e) == "Random message ARDRegression, ARDRegression" try: - check_is_fitted(svr, "support_", "Another message %(name)s, %(name)s") + check_is_fitted(svr, msg="Another message %(name)s, %(name)s") except AttributeError as e: assert str(e) == "Another message SVR, SVR" ard.fit(*make_blobs()) svr.fit(*make_blobs()) - assert check_is_fitted(ard, "coef_") is None - assert check_is_fitted(svr, "support_") is None + assert check_is_fitted(ard) is None + assert check_is_fitted(svr) is None def test_check_consistent_length(): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 2150a54ad75d9..b4e5bf8154e25 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -866,7 +866,7 @@ def check_symmetric(array, tol=1E-10, raise_warning=True, return array -def check_is_fitted(estimator, attributes, msg=None, all_or_any=all): +def check_is_fitted(estimator, *, msg=None, all_or_any=all): """Perform is_fitted validation for estimator. Checks if the estimator is fitted by verifying the presence of @@ -910,10 +910,10 @@ def check_is_fitted(estimator, attributes, msg=None, all_or_any=all): if not hasattr(estimator, 'fit'): raise TypeError("%s is not an estimator instance." % (estimator)) - if not isinstance(attributes, (list, tuple)): - attributes = [attributes] + attrs = [v for v in vars(estimator) if v.endswith("_") + and not v.startswith("__")] - if not all_or_any([hasattr(estimator, attr) for attr in attributes]): + if not len(attrs): raise NotFittedError(msg % {'name': type(estimator).__name__}) From e034ed80e20536e38b913e851eeed1427f7ebcc0 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 1 Aug 2019 13:07:18 -0400 Subject: [PATCH 02/21] cleanup, remove any_or_all --- sklearn/decomposition/base.py | 2 +- sklearn/discriminant_analysis.py | 2 +- .../_hist_gradient_boosting/gradient_boosting.py | 4 ++-- sklearn/ensemble/gradient_boosting.py | 10 +++++----- sklearn/kernel_approximation.py | 2 +- sklearn/linear_model/base.py | 4 ++-- sklearn/linear_model/stochastic_gradient.py | 2 +- sklearn/neighbors/base.py | 8 ++++---- sklearn/preprocessing/data.py | 4 ++-- sklearn/utils/validation.py | 12 +++--------- 10 files changed, 22 insertions(+), 28 deletions(-) diff --git a/sklearn/decomposition/base.py b/sklearn/decomposition/base.py index 0dad8c6130d68..2f11d8bd847b8 100644 --- a/sklearn/decomposition/base.py +++ b/sklearn/decomposition/base.py @@ -122,7 +122,7 @@ def transform(self, X): IncrementalPCA(batch_size=3, n_components=2) >>> ipca.transform(X) # doctest: +SKIP """ - check_is_fitted(self, all_or_any=all) + check_is_fitted(self) X = check_array(X) if self.mean_ is not None: diff --git a/sklearn/discriminant_analysis.py b/sklearn/discriminant_analysis.py index 4a3542e204288..efe39b8c3fb9a 100644 --- a/sklearn/discriminant_analysis.py +++ b/sklearn/discriminant_analysis.py @@ -505,7 +505,7 @@ def transform(self, X): if self.solver == 'lsqr': raise NotImplementedError("transform not implemented for 'lsqr' " "solver (use 'svd' or 'eigen').") - check_is_fitted(self, all_or_any=any) + check_is_fitted(self) X = check_array(X) if self.solver == 'svd': diff --git a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py index dc040ed1fa409..e66b755964058 100644 --- a/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py +++ b/sklearn/ensemble/_hist_gradient_boosting/gradient_boosting.py @@ -541,7 +541,7 @@ def _raw_predict(self, X): """ X = check_array(X, dtype=[X_DTYPE, X_BINNED_DTYPE], force_all_finite=False) - check_is_fitted(self, '_predictors') + check_is_fitted(self) if X.shape[1] != self.n_features_: raise ValueError( 'X has {} features but this estimator was trained with ' @@ -603,7 +603,7 @@ def _encode_y(self, y=None): @property def n_iter_(self): - check_is_fitted(self, '_predictors') + check_is_fitted(self) return len(self._predictors) def _more_tags(self): diff --git a/sklearn/ensemble/gradient_boosting.py b/sklearn/ensemble/gradient_boosting.py index 11813855d01d8..43c4dae31f66e 100644 --- a/sklearn/ensemble/gradient_boosting.py +++ b/sklearn/ensemble/gradient_boosting.py @@ -115,7 +115,7 @@ def predict(self, X): y : array, shape (n_samples,) Returns predicted values. """ - check_is_fitted(self, 'quantile') + check_is_fitted(self) y = np.empty((X.shape[0], 1), dtype=np.float64) y.fill(self.quantile) @@ -158,7 +158,7 @@ def predict(self, X): y : array, shape (n_samples,) Returns predicted values. """ - check_is_fitted(self, 'mean') + check_is_fitted(self) y = np.empty((X.shape[0], 1), dtype=np.float64) y.fill(self.mean) @@ -210,7 +210,7 @@ def predict(self, X): y : array, shape (n_samples,) Returns predicted values. """ - check_is_fitted(self, 'prior') + check_is_fitted(self) y = np.empty((X.shape[0], 1), dtype=np.float64) y.fill(self.prior) @@ -262,7 +262,7 @@ def predict(self, X): y : array, shape (n_samples,) Returns predicted values. """ - check_is_fitted(self, 'priors') + check_is_fitted(self) y = np.empty((X.shape[0], self.priors.shape[0]), dtype=np.float64) y[:] = self.priors @@ -316,7 +316,7 @@ def predict(self, X): y : array, shape (n_samples,) Returns predicted values. """ - check_is_fitted(self, 'n_classes') + check_is_fitted(self) y = np.empty((X.shape[0], self.n_classes), dtype=np.float64) y.fill(0.0) diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py index 9d257427944dc..82cb37104cbff 100644 --- a/sklearn/kernel_approximation.py +++ b/sklearn/kernel_approximation.py @@ -350,7 +350,7 @@ def transform(self, X): """ msg = ("%(name)s is not fitted. Call fit to set the parameters before" " calling transform") - check_is_fitted(self, "sample_interval_", msg=msg) + check_is_fitted(self, msg=msg) X = check_array(X, accept_sparse='csr') sparse = sp.issparse(X) diff --git a/sklearn/linear_model/base.py b/sklearn/linear_model/base.py index b408c8569529d..b36516e081392 100644 --- a/sklearn/linear_model/base.py +++ b/sklearn/linear_model/base.py @@ -327,7 +327,7 @@ def densify(self): self : estimator """ msg = "Estimator, %(name)s, must be fitted before densifying." - check_is_fitted(self, "coef_", msg=msg) + check_is_fitted(self, msg=msg) if sp.issparse(self.coef_): self.coef_ = self.coef_.toarray() return self @@ -357,7 +357,7 @@ def sparsify(self): self : estimator """ msg = "Estimator, %(name)s, must be fitted before sparsifying." - check_is_fitted(self, "coef_", msg=msg) + check_is_fitted(self, msg=msg) self.coef_ = sp.csr_matrix(self.coef_) return self diff --git a/sklearn/linear_model/stochastic_gradient.py b/sklearn/linear_model/stochastic_gradient.py index e80a6a7ec3ce4..50c91513c12db 100644 --- a/sklearn/linear_model/stochastic_gradient.py +++ b/sklearn/linear_model/stochastic_gradient.py @@ -1216,7 +1216,7 @@ def _decision_function(self, X): array, shape (n_samples,) Predicted target values per element in X. """ - check_is_fitted(self, all_or_any=all) + check_is_fitted(self) X = check_array(X, accept_sparse='csr') diff --git a/sklearn/neighbors/base.py b/sklearn/neighbors/base.py index 041c13aae5417..4f7ef38a4ae14 100644 --- a/sklearn/neighbors/base.py +++ b/sklearn/neighbors/base.py @@ -388,7 +388,7 @@ class from an array representing our data set and ask who's [2]]...) """ - check_is_fitted(self, all_or_any=any) + check_is_fitted(self) if n_neighbors is None: n_neighbors = self.n_neighbors @@ -543,7 +543,7 @@ def kneighbors_graph(self, X=None, n_neighbors=None, -------- NearestNeighbors.radius_neighbors_graph """ - check_is_fitted(self, all_or_any=any) + check_is_fitted(self) if n_neighbors is None: n_neighbors = self.n_neighbors @@ -691,7 +691,7 @@ class from an array representing our data set and ask who's For efficiency, `radius_neighbors` returns arrays of objects, where each object is a 1D array of indices or distances. """ - check_is_fitted(self, all_or_any=any) + check_is_fitted(self) if X is not None: query_is_train = False @@ -828,7 +828,7 @@ def radius_neighbors_graph(self, X=None, radius=None, mode='connectivity'): -------- kneighbors_graph """ - check_is_fitted(self, all_or_any=any) + check_is_fitted(self) if X is not None: X = check_array(X, accept_sparse=['csr', 'csc', 'coo']) diff --git a/sklearn/preprocessing/data.py b/sklearn/preprocessing/data.py index e70c98e48e898..b3f09664f025d 100644 --- a/sklearn/preprocessing/data.py +++ b/sklearn/preprocessing/data.py @@ -1206,7 +1206,7 @@ def transform(self, X): X : {array-like, sparse matrix} The data used to scale along the specified axis. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self) X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite='allow-nan') @@ -1229,7 +1229,7 @@ def inverse_transform(self, X): X : array-like The data used to scale along the specified axis. """ - check_is_fitted(self, 'scale_') + check_is_fitted(self) X = check_array(X, accept_sparse=('csr', 'csc'), copy=self.copy, estimator=self, dtype=FLOAT_DTYPES, force_all_finite='allow-nan') diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index b4e5bf8154e25..48daa64da77ee 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -866,21 +866,18 @@ def check_symmetric(array, tol=1E-10, raise_warning=True, return array -def check_is_fitted(estimator, *, msg=None, all_or_any=all): +def check_is_fitted(estimator, *, msg=None): """Perform is_fitted validation for estimator. Checks if the estimator is fitted by verifying the presence of - "all_or_any" of the passed attributes and raises a NotFittedError with the - given message. + fitted attributes (ending with a trailing underscore) and otherwise + raises a NotFittedError with the given message. Parameters ---------- estimator : estimator instance. estimator instance for which the check is performed. - attributes : attribute name(s) given as string or a list/tuple of strings - Eg.: - ``["coef_", "estimator_", ...], "coef_"`` msg : string The default error message is, "This %(name)s instance is not fitted @@ -891,9 +888,6 @@ def check_is_fitted(estimator, *, msg=None, all_or_any=all): Eg. : "Estimator, %(name)s, must be fitted before sparsifying". - all_or_any : callable, {all, any}, default all - Specify whether all or any of the given attributes must exist. - Returns ------- None From 1dc925854709a6a157d32ba4248edb1e9047b77f Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 1 Aug 2019 13:18:24 -0400 Subject: [PATCH 03/21] fix LOF, birch, mixtures --- sklearn/cluster/birch.py | 3 +-- sklearn/mixture/base.py | 13 +++++-------- sklearn/mixture/bayesian_mixture.py | 6 ------ sklearn/neighbors/lof.py | 6 ++---- 4 files changed, 8 insertions(+), 20 deletions(-) diff --git a/sklearn/cluster/birch.py b/sklearn/cluster/birch.py index 941b833e977f7..11bb0f17a1dc6 100644 --- a/sklearn/cluster/birch.py +++ b/sklearn/cluster/birch.py @@ -534,8 +534,7 @@ def partial_fit(self, X=None, y=None): return self._fit(X) def _check_fit(self, X): - check_is_fitted(self, - all_or_any=any) + check_is_fitted(self) if (hasattr(self, 'subcluster_centers_') and X.shape[1] != self.subcluster_centers_.shape[1]): diff --git a/sklearn/mixture/base.py b/sklearn/mixture/base.py index 8920bef181226..26410fc5256af 100644 --- a/sklearn/mixture/base.py +++ b/sklearn/mixture/base.py @@ -15,6 +15,7 @@ from ..base import DensityMixin from ..exceptions import ConvergenceWarning from ..utils import check_array, check_random_state +from ..utils.validation import check_is_fitted from ..utils.fixes import logsumexp @@ -308,10 +309,6 @@ def _m_step(self, X, log_resp): """ pass - @abstractmethod - def _check_is_fitted(self): - pass - @abstractmethod def _get_parameters(self): pass @@ -334,7 +331,7 @@ def score_samples(self, X): log_prob : array, shape (n_samples,) Log probabilities of each data point in X. """ - self._check_is_fitted() + check_is_fitted(self) X = _check_X(X, None, self.means_.shape[1]) return logsumexp(self._estimate_weighted_log_prob(X), axis=1) @@ -369,7 +366,7 @@ def predict(self, X): labels : array, shape (n_samples,) Component labels. """ - self._check_is_fitted() + check_is_fitted(self) X = _check_X(X, None, self.means_.shape[1]) return self._estimate_weighted_log_prob(X).argmax(axis=1) @@ -388,7 +385,7 @@ def predict_proba(self, X): Returns the probability each Gaussian (state) in the model given each sample. """ - self._check_is_fitted() + check_is_fitted(self) X = _check_X(X, None, self.means_.shape[1]) _, log_resp = self._estimate_log_prob_resp(X) return np.exp(log_resp) @@ -410,7 +407,7 @@ def sample(self, n_samples=1): Component labels """ - self._check_is_fitted() + check_is_fitted(self) if n_samples < 1: raise ValueError( diff --git a/sklearn/mixture/bayesian_mixture.py b/sklearn/mixture/bayesian_mixture.py index 88c0ab66ae20a..b0cc600d077da 100644 --- a/sklearn/mixture/bayesian_mixture.py +++ b/sklearn/mixture/bayesian_mixture.py @@ -646,12 +646,6 @@ def _estimate_wishart_spherical(self, nk, xk, sk): # Contrary to the original bishop book, we normalize the covariances self.covariances_ /= self.degrees_of_freedom_ - def _check_is_fitted(self): - check_is_fitted(self, ['weight_concentration_', 'mean_precision_', - 'means_', 'degrees_of_freedom_', - 'covariances_', 'precisions_', - 'precisions_cholesky_']) - def _m_step(self, X, log_resp): """M step. diff --git a/sklearn/neighbors/lof.py b/sklearn/neighbors/lof.py index a58997502be91..f4f697565cd3e 100644 --- a/sklearn/neighbors/lof.py +++ b/sklearn/neighbors/lof.py @@ -313,8 +313,7 @@ def _predict(self, X=None): is_inlier : array, shape (n_samples,) Returns -1 for anomalies/outliers and +1 for inliers. """ - check_is_fitted(self, ["offset_", "negative_outlier_factor_", - "n_neighbors_", "_distances_fit_X_"]) + check_is_fitted(self) if X is not None: X = check_array(X, accept_sparse='csr') @@ -454,8 +453,7 @@ def _score_samples(self, X): The opposite of the Local Outlier Factor of each input samples. The lower, the more abnormal. """ - check_is_fitted(self, ["offset_", "negative_outlier_factor_", - "_distances_fit_X_"]) + check_is_fitted(self) X = check_array(X, accept_sparse='csr') distances_X, neighbors_indices_X = ( From d6034ea6eb4d8f07be8e2476e9480f7d4a8ff797 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 1 Aug 2019 13:29:58 -0400 Subject: [PATCH 04/21] remove unused method --- sklearn/mixture/gaussian_mixture.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/sklearn/mixture/gaussian_mixture.py b/sklearn/mixture/gaussian_mixture.py index 610af54cc343a..b7941365b2609 100644 --- a/sklearn/mixture/gaussian_mixture.py +++ b/sklearn/mixture/gaussian_mixture.py @@ -686,9 +686,6 @@ def _estimate_log_weights(self): def _compute_lower_bound(self, _, log_prob_norm): return log_prob_norm - def _check_is_fitted(self): - check_is_fitted(self) - def _get_parameters(self): return (self.weights_, self.means_, self.covariances_, self.precisions_cholesky_) From 3cb95ac2d76ecf3c494446d44838b88347e5f1e8 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 2 Aug 2019 14:43:30 -0400 Subject: [PATCH 05/21] fix partial dependence function --- sklearn/ensemble/partial_dependence.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sklearn/ensemble/partial_dependence.py b/sklearn/ensemble/partial_dependence.py index 594ed39568c27..cce79342c393f 100644 --- a/sklearn/ensemble/partial_dependence.py +++ b/sklearn/ensemble/partial_dependence.py @@ -138,7 +138,7 @@ def partial_dependence(gbrt, target_variables, grid=None, X=None, """ if not isinstance(gbrt, BaseGradientBoosting): raise ValueError('gbrt has to be an instance of BaseGradientBoosting') - check_is_fitted(gbrt, 'estimators_') + check_is_fitted(gbrt) if (grid is None and X is None) or (grid is not None and X is not None): raise ValueError('Either grid or X must be specified') @@ -270,7 +270,7 @@ def plot_partial_dependence(gbrt, X, features, feature_names=None, if not isinstance(gbrt, BaseGradientBoosting): raise ValueError('gbrt has to be an instance of BaseGradientBoosting') - check_is_fitted(gbrt, 'estimators_') + check_is_fitted(gbrt) # set label_idx for multi-class GBRT if hasattr(gbrt, 'classes_') and np.size(gbrt.classes_) > 2: From 4d3a8b47ac98bc897c7675e21a2c8ac0d8571750 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 2 Aug 2019 14:53:17 -0400 Subject: [PATCH 06/21] make change backward-compatible --- sklearn/utils/tests/test_validation.py | 5 +++++ sklearn/utils/validation.py | 7 +++++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 14b13d94ca5d1..ec812b64938bc 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -661,6 +661,11 @@ def test_check_is_fitted(): assert check_is_fitted(ard) is None assert check_is_fitted(svr) is None + assert_warns_message( + DeprecationWarning, + "Passing attributes to check_is_fitted is deprecated", + check_is_fitted, ard, ['coef_']) + def test_check_consistent_length(): check_consistent_length([1], [2], [3], [4], [5]) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 48daa64da77ee..558c3e9de060f 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -155,7 +155,6 @@ def _num_samples(x): raise TypeError(message) - def check_memory(memory): """Check that ``memory`` is joblib.Memory-like. @@ -866,7 +865,7 @@ def check_symmetric(array, tol=1E-10, raise_warning=True, return array -def check_is_fitted(estimator, *, msg=None): +def check_is_fitted(estimator, attributes='deprecated', msg=None): """Perform is_fitted validation for estimator. Checks if the estimator is fitted by verifying the presence of @@ -897,6 +896,10 @@ def check_is_fitted(estimator, *, msg=None): NotFittedError If the attributes are not found. """ + if attributes != 'deprecated': + warnings.warn("Passing attributes to check_is_fitted is deprecated" + "and will be removed in 0.23. The attributes " + "argument is ignored.", DeprecationWarning) if msg is None: msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") From 1181982935808ceb7b68ff697baed702471a7d9b Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 2 Aug 2019 15:25:36 -0400 Subject: [PATCH 07/21] also allow private fitted attributes --- sklearn/utils/validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 558c3e9de060f..d0fe8276d853f 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -907,7 +907,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None): if not hasattr(estimator, 'fit'): raise TypeError("%s is not an estimator instance." % (estimator)) - attrs = [v for v in vars(estimator) if v.endswith("_") + attrs = [v for v in vars(estimator) + if (v.endswith("_") or v.startswith("_")) and not v.startswith("__")] if not len(attrs): From 7ed876d57c149b763fa4ac4d93c7c62f29e46446 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 2 Aug 2019 15:48:57 -0400 Subject: [PATCH 08/21] slight refactoring in CountVectorizer to mess less with the vocabulary --- sklearn/feature_extraction/text.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 01a7b70587f3d..4944d23200418 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -32,7 +32,7 @@ from ..utils.validation import check_is_fitted, check_array, FLOAT_DTYPES from ..utils import _IS_32BIT from ..utils.fixes import _astype_copy_false -from ..exceptions import ChangedBehaviorWarning +from ..exceptions import ChangedBehaviorWarning, NotFittedError __all__ = ['HashingVectorizer', @@ -450,9 +450,11 @@ def _validate_vocabulary(self): self.fixed_vocabulary_ = False def _check_vocabulary(self): - """Check if vocabulary is empty or missing (not fit-ed)""" - msg = "%(name)s - Vocabulary wasn't fitted." - check_is_fitted(self, msg=msg), + """Check if vocabulary is empty or missing (not fitted)""" + if not hasattr(self, 'vocabulary_'): + self._validate_vocabulary() + if not self.fixed_vocabulary_: + raise NotFittedError("Vocabulary not fitted or provided") if len(self.vocabulary_) == 0: raise ValueError("Vocabulary is empty") @@ -1172,10 +1174,6 @@ def transform(self, raw_documents): raise ValueError( "Iterable over raw text documents expected, " "string object received.") - - if not hasattr(self, 'vocabulary_'): - self._validate_vocabulary() - self._check_vocabulary() # use the same matrix-building strategy as fit_transform @@ -1216,8 +1214,6 @@ def inverse_transform(self, X): def get_feature_names(self): """Array mapping from feature integer indices to feature name""" - if not hasattr(self, 'vocabulary_'): - self._validate_vocabulary() self._check_vocabulary() From 8701cc0e07a9d070e8d823edbc675a06453e5db8 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 2 Aug 2019 15:52:42 -0400 Subject: [PATCH 09/21] added regression test for not being able to call inverse_transform before transform --- sklearn/feature_extraction/tests/test_text.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sklearn/feature_extraction/tests/test_text.py b/sklearn/feature_extraction/tests/test_text.py index 2bc1ad25bca63..e3be2c27955b6 100644 --- a/sklearn/feature_extraction/tests/test_text.py +++ b/sklearn/feature_extraction/tests/test_text.py @@ -261,6 +261,10 @@ def test_countvectorizer_custom_vocabulary(): assert set(vect.vocabulary_) == terms X = vect.transform(JUNK_FOOD_DOCS) assert X.shape[1] == len(terms) + v = typ(vocab) + vect = CountVectorizer(vocabulary=v) + inv = vect.inverse_transform(X) + assert len(inv) == X.shape[0] def test_countvectorizer_custom_vocabulary_pipeline(): From be4a90f0bf82d684f53e3bb405b7b51be115c83e Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 2 Aug 2019 16:16:35 -0400 Subject: [PATCH 10/21] add special check for classes --- sklearn/utils/tests/test_validation.py | 4 ++-- sklearn/utils/validation.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index ec812b64938bc..552bd85e91b99 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -631,8 +631,8 @@ def test_check_symmetric(): def test_check_is_fitted(): - # Check is ValueError raised when non estimator instance passed - assert_raises(ValueError, check_is_fitted, ARDRegression) + # Check is TypeError raised when non estimator instance passed + assert_raises(TypeError, check_is_fitted, ARDRegression) assert_raises(TypeError, check_is_fitted, "SVR") ard = ARDRegression() diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index d0fe8276d853f..06604bb583f66 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -14,7 +14,7 @@ import numpy as np import scipy.sparse as sp from distutils.version import LooseVersion -from inspect import signature +from inspect import signature, isclass from numpy.core.numeric import ComplexWarning import joblib @@ -900,6 +900,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None): warnings.warn("Passing attributes to check_is_fitted is deprecated" "and will be removed in 0.23. The attributes " "argument is ignored.", DeprecationWarning) + if isclass(estimator): + raise TypeError("{} is a class, not an instance.".format(estimator)) if msg is None: msg = ("This %(name)s instance is not fitted yet. Call 'fit' with " "appropriate arguments before using this method.") From 7e330276fa23ce04f1489b7e4ac99ec0eead7072 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 5 Aug 2019 11:28:49 -0400 Subject: [PATCH 11/21] more functions to fix --- sklearn/tree/export.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sklearn/tree/export.py b/sklearn/tree/export.py index 99b5e54a4c9b7..8e2e0a486b756 100644 --- a/sklearn/tree/export.py +++ b/sklearn/tree/export.py @@ -11,7 +11,6 @@ # Li Li # Giuseppe Vettigli # License: BSD 3 clause -import warnings from io import StringIO from numbers import Integral @@ -743,7 +742,7 @@ def export_graphviz(decision_tree, out_file=None, max_depth=None, 'digraph Tree {... """ - check_is_fitted(decision_tree, 'tree_') + check_is_fitted(decision_tree) own_file = False return_string = False try: @@ -849,7 +848,7 @@ def export_text(decision_tree, feature_names=None, max_depth=10, | |--- petal width (cm) > 1.75 | | |--- class: 2 """ - check_is_fitted(decision_tree, 'tree_') + check_is_fitted(decision_tree) tree_ = decision_tree.tree_ class_names = decision_tree.classes_ right_child_fmt = "{} {} <= {}\n" From 09e41929c32fee78be87fe91916b925c2cc2adc0 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 6 Aug 2019 17:50:40 -0400 Subject: [PATCH 12/21] Update sklearn/utils/validation.py Co-Authored-By: Guillaume Lemaitre --- sklearn/utils/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 06604bb583f66..8b8e77414af09 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -913,7 +913,7 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None): if (v.endswith("_") or v.startswith("_")) and not v.startswith("__")] - if not len(attrs): + if not attrs: raise NotFittedError(msg % {'name': type(estimator).__name__}) From 40af13e033a0b1b018ad06cc52923e6f4f23121b Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 6 Aug 2019 17:54:15 -0400 Subject: [PATCH 13/21] fix whitespace, keyword args --- sklearn/feature_extraction/text.py | 4 ++-- sklearn/utils/validation.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index 4944d23200418..2b7c52df8c102 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -1376,7 +1376,7 @@ def transform(self, X, copy=True): X.data += 1 if self.use_idf: - check_is_fitted(self, 'idf vector is not fitted') + check_is_fitted(self, msg='idf vector is not fitted') expected_n_features = self._idf_diag.shape[0] if n_features != expected_n_features: @@ -1745,7 +1745,7 @@ def transform(self, raw_documents, copy=True): X : sparse matrix, [n_samples, n_features] Tf-idf-weighted document-term matrix. """ - check_is_fitted(self, 'The tfidf vector is not fitted') + check_is_fitted(self, msg='The tfidf vector is not fitted') X = super().transform(raw_documents) return self._tfidf.transform(X, copy=False) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 06604bb583f66..d38a98165a504 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -898,7 +898,7 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None): """ if attributes != 'deprecated': warnings.warn("Passing attributes to check_is_fitted is deprecated" - "and will be removed in 0.23. The attributes " + " and will be removed in 0.23. The attributes " "argument is ignored.", DeprecationWarning) if isclass(estimator): raise TypeError("{} is a class, not an instance.".format(estimator)) From 86aebe7b5de423d9d720259376e687f4b079c269 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Aug 2019 11:11:04 -0400 Subject: [PATCH 14/21] remove extra blank line --- sklearn/utils/validation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 79aed001735a8..c32a92d4b4a5e 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -877,7 +877,6 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None): estimator : estimator instance. estimator instance for which the check is performed. - msg : string The default error message is, "This %(name)s instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." From ec25b3c5e24aaf2445e8fb091bf2bce3884d5389 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Thu, 8 Aug 2019 11:13:08 -0400 Subject: [PATCH 15/21] fix CI hopefully --- sklearn/utils/validation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index c32a92d4b4a5e..64e57a896ab4f 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -877,6 +877,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None): estimator : estimator instance. estimator instance for which the check is performed. + attributes : deprecated, ignored + msg : string The default error message is, "This %(name)s instance is not fitted yet. Call 'fit' with appropriate arguments before using this method." From 9038c62a9892739b9103bd5f2f5aa9868b9f182c Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 9 Aug 2019 15:20:03 -0400 Subject: [PATCH 16/21] deprecate all_or_any in check_is_fittec --- sklearn/utils/validation.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 64e57a896ab4f..fccc8b87b9d14 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -865,7 +865,8 @@ def check_symmetric(array, tol=1E-10, raise_warning=True, return array -def check_is_fitted(estimator, attributes='deprecated', msg=None): +def check_is_fitted(estimator, attributes='deprecated', msg=None, + all_or_any='deprecated'): """Perform is_fitted validation for estimator. Checks if the estimator is fitted by verifying the presence of @@ -888,6 +889,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None): Eg. : "Estimator, %(name)s, must be fitted before sparsifying". + all_or_any : deprecated, ignored + Returns ------- None @@ -901,6 +904,10 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None): warnings.warn("Passing attributes to check_is_fitted is deprecated" " and will be removed in 0.23. The attributes " "argument is ignored.", DeprecationWarning) + if attributes != 'any_or_all': + warnings.warn("Passing any_or_all to check_is_fitted is deprecated" + " and will be removed in 0.23. The any_or_all " + "argument is ignored.", DeprecationWarning) if isclass(estimator): raise TypeError("{} is a class, not an instance.".format(estimator)) if msg is None: From 98625297bd1b4d9ddc2134dfa8476e20eefb09dd Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Fri, 9 Aug 2019 15:22:05 -0400 Subject: [PATCH 17/21] fix typo, add test for deprecation --- sklearn/utils/tests/test_validation.py | 4 ++++ sklearn/utils/validation.py | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index 552bd85e91b99..fd166a257bc37 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -665,6 +665,10 @@ def test_check_is_fitted(): DeprecationWarning, "Passing attributes to check_is_fitted is deprecated", check_is_fitted, ard, ['coef_']) + assert_warns_message( + DeprecationWarning, + "Passing all_or_any to check_is_fitted is deprecated", + check_is_fitted, ard, all_or_any=any) def test_check_consistent_length(): diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index fccc8b87b9d14..1e178b91a1245 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -904,8 +904,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None, warnings.warn("Passing attributes to check_is_fitted is deprecated" " and will be removed in 0.23. The attributes " "argument is ignored.", DeprecationWarning) - if attributes != 'any_or_all': - warnings.warn("Passing any_or_all to check_is_fitted is deprecated" + if all_or_any != 'deprecated': + warnings.warn("Passing all_or_any to check_is_fitted is deprecated" " and will be removed in 0.23. The any_or_all " "argument is ignored.", DeprecationWarning) if isclass(estimator): From e958e6275bb0087c613c0c4a0974716666e0ba27 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 12 Aug 2019 15:48:18 -0400 Subject: [PATCH 18/21] add comment on 0.23 removal of deprecated arguments to check_is_fitted --- sklearn/utils/tests/test_validation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sklearn/utils/tests/test_validation.py b/sklearn/utils/tests/test_validation.py index fd166a257bc37..897eea6df8264 100644 --- a/sklearn/utils/tests/test_validation.py +++ b/sklearn/utils/tests/test_validation.py @@ -661,6 +661,7 @@ def test_check_is_fitted(): assert check_is_fitted(ard) is None assert check_is_fitted(svr) is None + # to be removed in 0.23 assert_warns_message( DeprecationWarning, "Passing attributes to check_is_fitted is deprecated", From 0538f9104bf55f19395f3d97f74bfdfda62988bb Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 12 Aug 2019 15:49:31 -0400 Subject: [PATCH 19/21] Apply suggestions from code review Co-Authored-By: Guillaume Lemaitre --- sklearn/utils/validation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 1e178b91a1245..ac79186a72cbc 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -890,6 +890,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None, Eg. : "Estimator, %(name)s, must be fitted before sparsifying". all_or_any : deprecated, ignored + .. deprecated:: 0.21 + `all_or_any` is deprecated, is currently ignored and will be removed in 0.23. Returns ------- From 11995c85fbfe5c31d664040303abb678a5dde046 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Mon, 12 Aug 2019 15:50:07 -0400 Subject: [PATCH 20/21] Update sklearn/utils/validation.py Co-Authored-By: Guillaume Lemaitre --- sklearn/utils/validation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index ac79186a72cbc..b598a9dd290e3 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -879,6 +879,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None, estimator instance for which the check is performed. attributes : deprecated, ignored + .. deprecated:: 0.22 + `attributes` is deprecated, is currently ignored and will be removed in 0.23. msg : string The default error message is, "This %(name)s instance is not fitted From 7463363c1d6db1c09da9691c00192a70b70aa1c8 Mon Sep 17 00:00:00 2001 From: Andreas Mueller Date: Tue, 13 Aug 2019 14:56:40 -0400 Subject: [PATCH 21/21] pep8 --- sklearn/utils/validation.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index b598a9dd290e3..8a63f12f33035 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -880,7 +880,8 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None, attributes : deprecated, ignored .. deprecated:: 0.22 - `attributes` is deprecated, is currently ignored and will be removed in 0.23. + `attributes` is deprecated, is currently ignored and will be removed + in 0.23. msg : string The default error message is, "This %(name)s instance is not fitted @@ -892,8 +893,9 @@ def check_is_fitted(estimator, attributes='deprecated', msg=None, Eg. : "Estimator, %(name)s, must be fitted before sparsifying". all_or_any : deprecated, ignored - .. deprecated:: 0.21 - `all_or_any` is deprecated, is currently ignored and will be removed in 0.23. + .. deprecated:: 0.21 + `all_or_any` is deprecated, is currently ignored and will be removed + in 0.23. Returns -------