diff --git a/build_tools/circle/list_versions.py b/build_tools/circle/list_versions.py index e1f8d54b84ec5..00526f062f200 100755 --- a/build_tools/circle/list_versions.py +++ b/build_tools/circle/list_versions.py @@ -71,10 +71,8 @@ def get_file_size(version): "Web-based documentation is available for versions listed below:\n", ] -ROOT_URL = ( - "https://api.github.com/repos/scikit-learn/scikit-learn.github.io/contents/" # noqa -) -RAW_FMT = "https://raw.githubusercontent.com/scikit-learn/scikit-learn.github.io/master/%s/index.html" # noqa +ROOT_URL = "https://api.github.com/repos/scikit-learn/scikit-learn.github.io/contents/" +RAW_FMT = "https://raw.githubusercontent.com/scikit-learn/scikit-learn.github.io/master/%s/index.html" VERSION_RE = re.compile(r"scikit-learn ([\w\.\-]+) documentation") NAMED_DIRS = ["dev", "stable"] diff --git a/build_tools/update_environments_and_lock_files.py b/build_tools/update_environments_and_lock_files.py index cd2c8d95dcbce..8fee1b9e3cffc 100644 --- a/build_tools/update_environments_and_lock_files.py +++ b/build_tools/update_environments_and_lock_files.py @@ -643,9 +643,9 @@ def write_pip_lock_file(build_metadata): json_output = execute_command(["conda", "info", "--json"]) conda_info = json.loads(json_output) - environment_folder = [ + environment_folder = next( each for each in conda_info["envs"] if each.endswith(environment_name) - ][0] + ) environment_path = Path(environment_folder) pip_compile_path = environment_path / "bin" / "pip-compile" diff --git a/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py b/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py index 1bc3804ee4764..85e73a2298d36 100644 --- a/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py +++ b/examples/ensemble/plot_forest_hist_grad_boosting_comparison.py @@ -143,7 +143,7 @@ for idx, result in enumerate(results): cv_results = result["cv_results"].round(3) model_name = result["model"] - param_name = list(param_grids[model_name].keys())[0] + param_name = next(iter(param_grids[model_name].keys())) cv_results[param_name] = cv_results["param_" + param_name] cv_results["model"] = model_name diff --git a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py index aabc8058dc407..4829e87bfda0b 100644 --- a/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py +++ b/examples/linear_model/plot_sgdocsvm_vs_ocsvm.py @@ -17,7 +17,7 @@ benefits of such an approximation in terms of computation time but rather to show that we obtain similar results on a toy dataset. -""" # noqa: E501 +""" # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause diff --git a/examples/model_selection/plot_grid_search_stats.py b/examples/model_selection/plot_grid_search_stats.py index a4f1c8e1417ba..febef9cb2ad98 100644 --- a/examples/model_selection/plot_grid_search_stats.py +++ b/examples/model_selection/plot_grid_search_stats.py @@ -230,8 +230,8 @@ def compute_corrected_ttest(differences, df, n_train, n_test): n = differences.shape[0] # number of test sets df = n - 1 -n_train = len(list(cv.split(X, y))[0][0]) -n_test = len(list(cv.split(X, y))[0][1]) +n_train = len(next(iter(cv.split(X, y)))[0]) +n_test = len(next(iter(cv.split(X, y)))[1]) t_stat, p_val = compute_corrected_ttest(differences, df, n_train, n_test) print(f"Corrected t-value: {t_stat:.3f}\nCorrected p-value: {p_val:.3f}") diff --git a/examples/neighbors/plot_species_kde.py b/examples/neighbors/plot_species_kde.py index 754f887f10138..a6c6808476673 100644 --- a/examples/neighbors/plot_species_kde.py +++ b/examples/neighbors/plot_species_kde.py @@ -33,7 +33,7 @@ `_ S. J. Phillips, R. P. Anderson, R. E. Schapire - Ecological Modelling, 190:231-259, 2006. -""" # noqa: E501 +""" # Authors: The scikit-learn developers # SPDX-License-Identifier: BSD-3-Clause diff --git a/pyproject.toml b/pyproject.toml index effa244a06086..ff0a9856b7802 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -148,7 +148,7 @@ preview = true # This enables us to use the explicit preview rules that we want only explicit-preview-rules = true # all rules can be found here: https://beta.ruff.rs/docs/rules/ -select = ["E", "F", "W", "I", "CPY001"] +select = ["E", "F", "W", "I", "CPY001", "RUF"] ignore=[ # space before : (needed for how black formats slicing) "E203", @@ -163,6 +163,13 @@ ignore=[ # F841 is in preview (july 2024), and we don't care much about it. # Local variable ... is assigned to but never used "F841", + # some RUF rules trigger too many changes + "RUF002", + "RUF003", + "RUF005", + "RUF012", + "RUF015", + "RUF021", ] [tool.ruff.lint.flake8-copyright] diff --git a/sklearn/_loss/__init__.py b/sklearn/_loss/__init__.py index bc348bbca8a15..97fdd884e517c 100644 --- a/sklearn/_loss/__init__.py +++ b/sklearn/_loss/__init__.py @@ -20,14 +20,14 @@ ) __all__ = [ - "HalfSquaredError", "AbsoluteError", - "PinballLoss", - "HuberLoss", - "HalfPoissonLoss", + "HalfBinomialLoss", "HalfGammaLoss", + "HalfMultinomialLoss", + "HalfPoissonLoss", + "HalfSquaredError", "HalfTweedieLoss", "HalfTweedieLossIdentity", - "HalfBinomialLoss", - "HalfMultinomialLoss", + "HuberLoss", + "PinballLoss", ] diff --git a/sklearn/cluster/__init__.py b/sklearn/cluster/__init__.py index a0545d3b90d56..de86a59e07113 100644 --- a/sklearn/cluster/__init__.py +++ b/sklearn/cluster/__init__.py @@ -26,21 +26,24 @@ from ._spectral import SpectralClustering, spectral_clustering __all__ = [ + "DBSCAN", + "HDBSCAN", + "OPTICS", "AffinityPropagation", "AgglomerativeClustering", "Birch", - "DBSCAN", - "OPTICS", - "cluster_optics_dbscan", - "cluster_optics_xi", - "compute_optics_graph", - "KMeans", "BisectingKMeans", "FeatureAgglomeration", + "KMeans", "MeanShift", "MiniBatchKMeans", + "SpectralBiclustering", "SpectralClustering", + "SpectralCoclustering", "affinity_propagation", + "cluster_optics_dbscan", + "cluster_optics_xi", + "compute_optics_graph", "dbscan", "estimate_bandwidth", "get_bin_seeds", @@ -50,7 +53,4 @@ "mean_shift", "spectral_clustering", "ward_tree", - "SpectralBiclustering", - "SpectralCoclustering", - "HDBSCAN", ] diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py index 95f49056ef646..be5dac955f7f7 100644 --- a/sklearn/cluster/_bicluster.py +++ b/sklearn/cluster/_bicluster.py @@ -18,7 +18,7 @@ from ..utils.validation import assert_all_finite, validate_data from ._kmeans import KMeans, MiniBatchKMeans -__all__ = ["SpectralCoclustering", "SpectralBiclustering"] +__all__ = ["SpectralBiclustering", "SpectralCoclustering"] def _scale_normalize(X): diff --git a/sklearn/compose/__init__.py b/sklearn/compose/__init__.py index 9f20bc9856074..842a86ba21d9b 100644 --- a/sklearn/compose/__init__.py +++ b/sklearn/compose/__init__.py @@ -17,7 +17,7 @@ __all__ = [ "ColumnTransformer", - "make_column_transformer", "TransformedTargetRegressor", "make_column_selector", + "make_column_transformer", ] diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py index e088f534707d2..65eed27e3e07f 100644 --- a/sklearn/compose/_column_transformer.py +++ b/sklearn/compose/_column_transformer.py @@ -50,7 +50,7 @@ check_is_fitted, ) -__all__ = ["ColumnTransformer", "make_column_transformer", "make_column_selector"] +__all__ = ["ColumnTransformer", "make_column_selector", "make_column_transformer"] _ERR_MSG_1DCOLUMN = ( @@ -1352,10 +1352,8 @@ def _is_empty_column_selection(column): if hasattr(column, "dtype") and np.issubdtype(column.dtype, np.bool_): return not column.any() elif hasattr(column, "__len__"): - return ( - len(column) == 0 - or all(isinstance(col, bool) for col in column) - and not any(column) + return len(column) == 0 or ( + all(isinstance(col, bool) for col in column) and not any(column) ) else: return False diff --git a/sklearn/compose/tests/test_column_transformer.py b/sklearn/compose/tests/test_column_transformer.py index 704236def45b6..588976f18b265 100644 --- a/sklearn/compose/tests/test_column_transformer.py +++ b/sklearn/compose/tests/test_column_transformer.py @@ -361,7 +361,7 @@ def test_column_transformer_empty_columns(pandas, column_selection, callable_col X = X_array if callable_column: - column = lambda X: column_selection # noqa + column = lambda X: column_selection else: column = column_selection diff --git a/sklearn/conftest.py b/sklearn/conftest.py index 0c7e00a93c6aa..6af3a2a51c0ce 100644 --- a/sklearn/conftest.py +++ b/sklearn/conftest.py @@ -185,7 +185,7 @@ def pytest_collection_modifyitems(config, items): marker = pytest.mark.xfail( reason=( "know failure. See " - "https://github.com/scikit-learn/scikit-learn/issues/17797" # noqa + "https://github.com/scikit-learn/scikit-learn/issues/17797" ) ) item.add_marker(marker) diff --git a/sklearn/covariance/__init__.py b/sklearn/covariance/__init__.py index 989f3372b42e0..65817ef7b977b 100644 --- a/sklearn/covariance/__init__.py +++ b/sklearn/covariance/__init__.py @@ -27,13 +27,13 @@ ) __all__ = [ + "OAS", "EllipticEnvelope", "EmpiricalCovariance", "GraphicalLasso", "GraphicalLassoCV", "LedoitWolf", "MinCovDet", - "OAS", "ShrunkCovariance", "empirical_covariance", "fast_mcd", diff --git a/sklearn/cross_decomposition/__init__.py b/sklearn/cross_decomposition/__init__.py index cad873ed800c6..f78f33811e5c7 100644 --- a/sklearn/cross_decomposition/__init__.py +++ b/sklearn/cross_decomposition/__init__.py @@ -5,4 +5,4 @@ from ._pls import CCA, PLSSVD, PLSCanonical, PLSRegression -__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD", "CCA"] +__all__ = ["CCA", "PLSSVD", "PLSCanonical", "PLSRegression"] diff --git a/sklearn/cross_decomposition/_pls.py b/sklearn/cross_decomposition/_pls.py index affc9f8f96c02..7183e6e15414a 100644 --- a/sklearn/cross_decomposition/_pls.py +++ b/sklearn/cross_decomposition/_pls.py @@ -27,7 +27,7 @@ from ..utils.fixes import parse_version, sp_version from ..utils.validation import FLOAT_DTYPES, check_is_fitted, validate_data -__all__ = ["PLSCanonical", "PLSRegression", "PLSSVD"] +__all__ = ["PLSSVD", "PLSCanonical", "PLSRegression"] if sp_version >= parse_version("1.7"): diff --git a/sklearn/datasets/__init__.py b/sklearn/datasets/__init__.py index 18c3cea4ea342..8863fe489f3b6 100644 --- a/sklearn/datasets/__init__.py +++ b/sklearn/datasets/__init__.py @@ -61,22 +61,22 @@ "dump_svmlight_file", "fetch_20newsgroups", "fetch_20newsgroups_vectorized", + "fetch_california_housing", + "fetch_covtype", "fetch_file", + "fetch_kddcup99", "fetch_lfw_pairs", "fetch_lfw_people", "fetch_olivetti_faces", - "fetch_species_distributions", - "fetch_california_housing", - "fetch_covtype", - "fetch_rcv1", - "fetch_kddcup99", "fetch_openml", + "fetch_rcv1", + "fetch_species_distributions", "get_data_home", + "load_breast_cancer", "load_diabetes", "load_digits", "load_files", "load_iris", - "load_breast_cancer", "load_linnerud", "load_sample_image", "load_sample_images", @@ -85,9 +85,9 @@ "load_wine", "make_biclusters", "make_blobs", + "make_checkerboard", "make_circles", "make_classification", - "make_checkerboard", "make_friedman1", "make_friedman2", "make_friedman3", diff --git a/sklearn/datasets/_kddcup99.py b/sklearn/datasets/_kddcup99.py index ab4db0522ef20..f379da42eb9df 100644 --- a/sklearn/datasets/_kddcup99.py +++ b/sklearn/datasets/_kddcup99.py @@ -376,7 +376,7 @@ def _fetch_brute_kddcup99( except Exception as e: raise OSError( "The cache for fetch_kddcup99 is invalid, please delete " - f"{str(kddcup_dir)} and run the fetch_kddcup99 again" + f"{kddcup_dir} and run the fetch_kddcup99 again" ) from e elif download_if_missing: diff --git a/sklearn/datasets/_openml.py b/sklearn/datasets/_openml.py index 6a23c5116227d..47ecdcd14de9d 100644 --- a/sklearn/datasets/_openml.py +++ b/sklearn/datasets/_openml.py @@ -20,7 +20,7 @@ import numpy as np from ..utils import Bunch -from ..utils._optional_dependencies import check_pandas_support # noqa +from ..utils._optional_dependencies import check_pandas_support from ..utils._param_validation import ( Integral, Interval, diff --git a/sklearn/datasets/_svmlight_format_io.py b/sklearn/datasets/_svmlight_format_io.py index b4c4c887b50dc..e3a833efb86c0 100644 --- a/sklearn/datasets/_svmlight_format_io.py +++ b/sklearn/datasets/_svmlight_format_io.py @@ -384,10 +384,8 @@ def get_data(): for f in files ] - if ( - zero_based is False - or zero_based == "auto" - and all(len(tmp[1]) and np.min(tmp[1]) > 0 for tmp in r) + if zero_based is False or ( + zero_based == "auto" and all(len(tmp[1]) and np.min(tmp[1]) > 0 for tmp in r) ): for _, indices, _, _, _ in r: indices -= 1 diff --git a/sklearn/datasets/tests/test_kddcup99.py b/sklearn/datasets/tests/test_kddcup99.py index 5f6e9c83a30b8..8fa5e397ead90 100644 --- a/sklearn/datasets/tests/test_kddcup99.py +++ b/sklearn/datasets/tests/test_kddcup99.py @@ -82,7 +82,7 @@ def test_corrupted_file_error_message(fetch_kddcup99_fxt, tmp_path): msg = ( "The cache for fetch_kddcup99 is invalid, please " - f"delete {str(kddcup99_dir)} and run the fetch_kddcup99 again" + f"delete {kddcup99_dir} and run the fetch_kddcup99 again" ) with pytest.raises(OSError, match=msg): diff --git a/sklearn/decomposition/__init__.py b/sklearn/decomposition/__init__.py index cd013fe9c7a93..6d3fa9b42895a 100644 --- a/sklearn/decomposition/__init__.py +++ b/sklearn/decomposition/__init__.py @@ -31,24 +31,24 @@ from ._truncated_svd import TruncatedSVD __all__ = [ + "NMF", + "PCA", "DictionaryLearning", + "FactorAnalysis", "FastICA", "IncrementalPCA", "KernelPCA", + "LatentDirichletAllocation", "MiniBatchDictionaryLearning", "MiniBatchNMF", "MiniBatchSparsePCA", - "NMF", - "PCA", "SparseCoder", "SparsePCA", + "TruncatedSVD", "dict_learning", "dict_learning_online", "fastica", "non_negative_factorization", "randomized_svd", "sparse_encode", - "FactorAnalysis", - "TruncatedSVD", - "LatentDirichletAllocation", ] diff --git a/sklearn/decomposition/_fastica.py b/sklearn/decomposition/_fastica.py index 2ef6162946574..a6fd837313fc5 100644 --- a/sklearn/decomposition/_fastica.py +++ b/sklearn/decomposition/_fastica.py @@ -25,7 +25,7 @@ from ..utils._param_validation import Interval, Options, StrOptions, validate_params from ..utils.validation import check_is_fitted, validate_data -__all__ = ["fastica", "FastICA"] +__all__ = ["FastICA", "fastica"] def _gs_decorrelation(w, W, j): diff --git a/sklearn/decomposition/tests/test_fastica.py b/sklearn/decomposition/tests/test_fastica.py index 0066d9faf17f2..22c9af52cd1d6 100644 --- a/sklearn/decomposition/tests/test_fastica.py +++ b/sklearn/decomposition/tests/test_fastica.py @@ -80,7 +80,7 @@ def test_fastica_simple(add_noise, global_random_seed, global_dtype): pytest.xfail( "FastICA instability with Ubuntu Atlas build with float32 " "global_dtype. For more details, see " - "https://github.com/scikit-learn/scikit-learn/issues/24131#issuecomment-1208091119" # noqa + "https://github.com/scikit-learn/scikit-learn/issues/24131#issuecomment-1208091119" ) # Test the FastICA algorithm on very simple data. diff --git a/sklearn/ensemble/__init__.py b/sklearn/ensemble/__init__.py index 2a8cf413be9da..62a538d340318 100644 --- a/sklearn/ensemble/__init__.py +++ b/sklearn/ensemble/__init__.py @@ -23,23 +23,23 @@ from ._weight_boosting import AdaBoostClassifier, AdaBoostRegressor __all__ = [ + "AdaBoostClassifier", + "AdaBoostRegressor", + "BaggingClassifier", + "BaggingRegressor", "BaseEnsemble", - "RandomForestClassifier", - "RandomForestRegressor", - "RandomTreesEmbedding", "ExtraTreesClassifier", "ExtraTreesRegressor", - "BaggingClassifier", - "BaggingRegressor", - "IsolationForest", "GradientBoostingClassifier", "GradientBoostingRegressor", - "AdaBoostClassifier", - "AdaBoostRegressor", - "VotingClassifier", - "VotingRegressor", - "StackingClassifier", - "StackingRegressor", "HistGradientBoostingClassifier", "HistGradientBoostingRegressor", + "IsolationForest", + "RandomForestClassifier", + "RandomForestRegressor", + "RandomTreesEmbedding", + "StackingClassifier", + "StackingRegressor", + "VotingClassifier", + "VotingRegressor", ] diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py index 5c2152f34e93d..890b8d7b23655 100644 --- a/sklearn/ensemble/_forest.py +++ b/sklearn/ensemble/_forest.py @@ -79,10 +79,10 @@ class calls the ``fit`` method of each sub-estimator on random samples from ._base import BaseEnsemble, _partition_estimators __all__ = [ - "RandomForestClassifier", - "RandomForestRegressor", "ExtraTreesClassifier", "ExtraTreesRegressor", + "RandomForestClassifier", + "RandomForestRegressor", "RandomTreesEmbedding", ] diff --git a/sklearn/ensemble/_voting.py b/sklearn/ensemble/_voting.py index bcf2d749725ff..f5325c89de18d 100644 --- a/sklearn/ensemble/_voting.py +++ b/sklearn/ensemble/_voting.py @@ -454,7 +454,7 @@ def _collect_probas(self, X): def _check_voting(self): if self.voting == "hard": raise AttributeError( - f"predict_proba is not available when voting={repr(self.voting)}" + f"predict_proba is not available when voting={self.voting!r}" ) return True diff --git a/sklearn/exceptions.py b/sklearn/exceptions.py index 1c9162dc760f9..7a7f1472ec48f 100644 --- a/sklearn/exceptions.py +++ b/sklearn/exceptions.py @@ -4,17 +4,17 @@ # SPDX-License-Identifier: BSD-3-Clause __all__ = [ - "NotFittedError", "ConvergenceWarning", "DataConversionWarning", "DataDimensionalityWarning", "EfficiencyWarning", + "EstimatorCheckFailedWarning", "FitFailedWarning", + "NotFittedError", + "PositiveSpectrumWarning", "SkipTestWarning", "UndefinedMetricWarning", - "PositiveSpectrumWarning", "UnsetMetadataPassedError", - "EstimatorCheckFailedWarning", ] diff --git a/sklearn/feature_extraction/__init__.py b/sklearn/feature_extraction/__init__.py index 3ca86d86bee68..0f8c53b4ffb6b 100644 --- a/sklearn/feature_extraction/__init__.py +++ b/sklearn/feature_extraction/__init__.py @@ -10,9 +10,9 @@ __all__ = [ "DictVectorizer", + "FeatureHasher", + "grid_to_graph", "image", "img_to_graph", - "grid_to_graph", "text", - "FeatureHasher", ] diff --git a/sklearn/feature_extraction/text.py b/sklearn/feature_extraction/text.py index e1bdfd5a7dee5..8d26539645866 100644 --- a/sklearn/feature_extraction/text.py +++ b/sklearn/feature_extraction/text.py @@ -28,9 +28,9 @@ from ._stop_words import ENGLISH_STOP_WORDS __all__ = [ - "HashingVectorizer", - "CountVectorizer", "ENGLISH_STOP_WORDS", + "CountVectorizer", + "HashingVectorizer", "TfidfTransformer", "TfidfVectorizer", "strip_accents_ascii", diff --git a/sklearn/feature_selection/__init__.py b/sklearn/feature_selection/__init__.py index fbb8f54350630..d0d2dcee909f4 100644 --- a/sklearn/feature_selection/__init__.py +++ b/sklearn/feature_selection/__init__.py @@ -28,23 +28,23 @@ from ._variance_threshold import VarianceThreshold __all__ = [ - "GenericUnivariateSelect", - "SequentialFeatureSelector", "RFE", "RFECV", + "GenericUnivariateSelect", "SelectFdr", "SelectFpr", + "SelectFromModel", "SelectFwe", "SelectKBest", - "SelectFromModel", "SelectPercentile", + "SelectorMixin", + "SequentialFeatureSelector", "VarianceThreshold", "chi2", "f_classif", "f_oneway", "f_regression", - "r_regression", "mutual_info_classif", "mutual_info_regression", - "SelectorMixin", + "r_regression", ] diff --git a/sklearn/gaussian_process/__init__.py b/sklearn/gaussian_process/__init__.py index 8dcbe3140415a..9fafaf67e4ed0 100644 --- a/sklearn/gaussian_process/__init__.py +++ b/sklearn/gaussian_process/__init__.py @@ -7,4 +7,4 @@ from ._gpc import GaussianProcessClassifier from ._gpr import GaussianProcessRegressor -__all__ = ["GaussianProcessRegressor", "GaussianProcessClassifier", "kernels"] +__all__ = ["GaussianProcessClassifier", "GaussianProcessRegressor", "kernels"] diff --git a/sklearn/impute/__init__.py b/sklearn/impute/__init__.py index 2f9ed9017c6cb..363d24d6a7f3e 100644 --- a/sklearn/impute/__init__.py +++ b/sklearn/impute/__init__.py @@ -13,7 +13,7 @@ # TODO: remove this check once the estimator is no longer experimental. from ._iterative import IterativeImputer # noqa -__all__ = ["MissingIndicator", "SimpleImputer", "KNNImputer"] +__all__ = ["KNNImputer", "MissingIndicator", "SimpleImputer"] # TODO: remove this check once the estimator is no longer experimental. diff --git a/sklearn/inspection/__init__.py b/sklearn/inspection/__init__.py index 8bb2b5dc575e9..8e0a1125ef041 100644 --- a/sklearn/inspection/__init__.py +++ b/sklearn/inspection/__init__.py @@ -9,8 +9,8 @@ from ._plot.partial_dependence import PartialDependenceDisplay __all__ = [ + "DecisionBoundaryDisplay", + "PartialDependenceDisplay", "partial_dependence", "permutation_importance", - "PartialDependenceDisplay", - "DecisionBoundaryDisplay", ] diff --git a/sklearn/inspection/_partial_dependence.py b/sklearn/inspection/_partial_dependence.py index 818f26f8a1c5f..3790eb8a9f78c 100644 --- a/sklearn/inspection/_partial_dependence.py +++ b/sklearn/inspection/_partial_dependence.py @@ -100,7 +100,7 @@ def _convert_custom_values(values): custom_values = {k: _convert_custom_values(v) for k, v in custom_values.items()} if any(v.ndim != 1 for v in custom_values.values()): error_string = ", ".join( - f"Feature {str(k)}: {v.ndim} dimensions" + f"Feature {k}: {v.ndim} dimensions" for k, v in custom_values.items() if v.ndim != 1 ) diff --git a/sklearn/inspection/_plot/decision_boundary.py b/sklearn/inspection/_plot/decision_boundary.py index 1ce189413eac9..b2cff9e12f8ce 100644 --- a/sklearn/inspection/_plot/decision_boundary.py +++ b/sklearn/inspection/_plot/decision_boundary.py @@ -204,8 +204,8 @@ def plot(self, plot_method="contourf", ax=None, xlabel=None, ylabel=None, **kwar Object that stores computed values. """ check_matplotlib_support("DecisionBoundaryDisplay.plot") - import matplotlib as mpl # noqa - import matplotlib.pyplot as plt # noqa + import matplotlib as mpl + import matplotlib.pyplot as plt if plot_method not in ("contourf", "contour", "pcolormesh"): raise ValueError( @@ -425,7 +425,7 @@ def from_estimator( """ check_matplotlib_support(f"{cls.__name__}.from_estimator") check_is_fitted(estimator) - import matplotlib as mpl # noqa + import matplotlib as mpl if not grid_resolution > 1: raise ValueError( diff --git a/sklearn/inspection/_plot/partial_dependence.py b/sklearn/inspection/_plot/partial_dependence.py index 788ec997a7fb5..400084d588f67 100644 --- a/sklearn/inspection/_plot/partial_dependence.py +++ b/sklearn/inspection/_plot/partial_dependence.py @@ -17,7 +17,7 @@ check_random_state, ) from ...utils._encode import _unique -from ...utils._optional_dependencies import check_matplotlib_support # noqa +from ...utils._optional_dependencies import check_matplotlib_support from ...utils._plotting import _validate_style_kwargs from ...utils.parallel import Parallel, delayed from .. import partial_dependence @@ -537,8 +537,8 @@ def from_estimator( <...> >>> plt.show() """ - check_matplotlib_support(f"{cls.__name__}.from_estimator") # noqa - import matplotlib.pyplot as plt # noqa + check_matplotlib_support(f"{cls.__name__}.from_estimator") + import matplotlib.pyplot as plt # set target_idx for multi-class estimators if hasattr(estimator, "classes_") and np.size(estimator.classes_) > 2: @@ -944,7 +944,7 @@ def _plot_one_way_partial_dependence( have the same scale and y limits. `pdp_lim[1]` is the global min and max for single partial dependence curves. """ - from matplotlib import transforms # noqa + from matplotlib import transforms if kind in ("individual", "both"): self._plot_ice_lines( @@ -1083,7 +1083,7 @@ def _plot_two_way_partial_dependence( heatmap_idx = np.unravel_index(pd_plot_idx, self.heatmaps_.shape) self.heatmaps_[heatmap_idx] = im else: - from matplotlib import transforms # noqa + from matplotlib import transforms XX, YY = np.meshgrid(feature_values[0], feature_values[1]) Z = avg_preds[self.target_idx].T @@ -1221,8 +1221,8 @@ def plot( """ check_matplotlib_support("plot_partial_dependence") - import matplotlib.pyplot as plt # noqa - from matplotlib.gridspec import GridSpecFromSubplotSpec # noqa + import matplotlib.pyplot as plt + from matplotlib.gridspec import GridSpecFromSubplotSpec if isinstance(self.kind, str): kind = [self.kind] * len(self.features) diff --git a/sklearn/isotonic.py b/sklearn/isotonic.py index fb47ca1dde68f..451d0544f672d 100644 --- a/sklearn/isotonic.py +++ b/sklearn/isotonic.py @@ -20,7 +20,7 @@ from .utils.fixes import parse_version, sp_base_version from .utils.validation import _check_sample_weight, check_is_fitted -__all__ = ["check_increasing", "isotonic_regression", "IsotonicRegression"] +__all__ = ["IsotonicRegression", "check_increasing", "isotonic_regression"] @validate_params( diff --git a/sklearn/linear_model/__init__.py b/sklearn/linear_model/__init__.py index 1ff28642bfb81..541f164daf46a 100644 --- a/sklearn/linear_model/__init__.py +++ b/sklearn/linear_model/__init__.py @@ -52,6 +52,7 @@ "BayesianRidge", "ElasticNet", "ElasticNetCV", + "GammaRegressor", "HuberRegressor", "Lars", "LarsCV", @@ -72,15 +73,18 @@ "PassiveAggressiveClassifier", "PassiveAggressiveRegressor", "Perceptron", + "PoissonRegressor", "QuantileRegressor", + "RANSACRegressor", "Ridge", "RidgeCV", "RidgeClassifier", "RidgeClassifierCV", "SGDClassifier", - "SGDRegressor", "SGDOneClassSVM", + "SGDRegressor", "TheilSenRegressor", + "TweedieRegressor", "enet_path", "lars_path", "lars_path_gram", @@ -88,8 +92,4 @@ "orthogonal_mp", "orthogonal_mp_gram", "ridge_regression", - "RANSACRegressor", - "PoissonRegressor", - "GammaRegressor", - "TweedieRegressor", ] diff --git a/sklearn/linear_model/_glm/__init__.py b/sklearn/linear_model/_glm/__init__.py index d0a51e65d3211..5c471c35096f8 100644 --- a/sklearn/linear_model/_glm/__init__.py +++ b/sklearn/linear_model/_glm/__init__.py @@ -9,8 +9,8 @@ ) __all__ = [ - "_GeneralizedLinearRegressor", - "PoissonRegressor", "GammaRegressor", + "PoissonRegressor", "TweedieRegressor", + "_GeneralizedLinearRegressor", ] diff --git a/sklearn/linear_model/_least_angle.py b/sklearn/linear_model/_least_angle.py index 25f956e5fadda..2945e00a1adda 100644 --- a/sklearn/linear_model/_least_angle.py +++ b/sklearn/linear_model/_least_angle.py @@ -554,7 +554,7 @@ def _lars_path_solver( Gram = None if X is None: raise ValueError("X and Gram cannot both be unspecified.") - elif isinstance(Gram, str) and Gram == "auto" or Gram is True: + elif (isinstance(Gram, str) and Gram == "auto") or Gram is True: if Gram is True or X.shape[0] > X.shape[1]: Gram = np.dot(X.T, X) else: @@ -1761,7 +1761,7 @@ def fit(self, X, y, **params): ) for train, test in cv.split(X, y, **routed_params.splitter.split) ) - all_alphas = np.concatenate(list(zip(*cv_paths))[0]) + all_alphas = np.concatenate(next(zip(*cv_paths))) # Unique also sorts all_alphas = np.unique(all_alphas) # Take at most max_n_alphas values diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py index 90dc6d6bc5e70..e58696d4d8296 100644 --- a/sklearn/linear_model/_ransac.py +++ b/sklearn/linear_model/_ransac.py @@ -256,7 +256,7 @@ class RANSACRegressor( For a more detailed example, see :ref:`sphx_glr_auto_examples_linear_model_plot_ransac.py` - """ # noqa: E501 + """ _parameter_constraints: dict = { "estimator": [HasMethods(["fit", "score", "predict"]), None], diff --git a/sklearn/manifold/__init__.py b/sklearn/manifold/__init__.py index 2266b6e08af88..349f7c1a4a7c4 100644 --- a/sklearn/manifold/__init__.py +++ b/sklearn/manifold/__init__.py @@ -10,13 +10,13 @@ from ._t_sne import TSNE, trustworthiness __all__ = [ - "locally_linear_embedding", - "LocallyLinearEmbedding", - "Isomap", "MDS", - "smacof", + "TSNE", + "Isomap", + "LocallyLinearEmbedding", "SpectralEmbedding", + "locally_linear_embedding", + "smacof", "spectral_embedding", - "TSNE", "trustworthiness", ] diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py index d3d45ec0773c3..06a2ffbf27a36 100644 --- a/sklearn/manifold/_spectral_embedding.py +++ b/sklearn/manifold/_spectral_embedding.py @@ -333,9 +333,8 @@ def _spectral_embedding( laplacian, dd = csgraph_laplacian( adjacency, normed=norm_laplacian, return_diag=True ) - if ( - eigen_solver == "arpack" - or eigen_solver != "lobpcg" + if eigen_solver == "arpack" or ( + eigen_solver != "lobpcg" and (not sparse.issparse(laplacian) or n_nodes < 5 * n_components) ): # lobpcg used with eigen_solver='amg' has bugs for low number of nodes diff --git a/sklearn/metrics/__init__.py b/sklearn/metrics/__init__.py index 787df39a21979..ce86525acc368 100644 --- a/sklearn/metrics/__init__.py +++ b/sklearn/metrics/__init__.py @@ -95,12 +95,19 @@ ) __all__ = [ + "ConfusionMatrixDisplay", + "DetCurveDisplay", + "DistanceMetric", + "PrecisionRecallDisplay", + "PredictionErrorDisplay", + "RocCurveDisplay", "accuracy_score", "adjusted_mutual_info_score", "adjusted_rand_score", "auc", "average_precision_score", "balanced_accuracy_score", + "brier_score_loss", "calinski_harabasz_score", "check_scoring", "class_likelihood_ratios", @@ -108,25 +115,23 @@ "cluster", "cohen_kappa_score", "completeness_score", - "ConfusionMatrixDisplay", "confusion_matrix", "consensus_score", "coverage_error", - "d2_tweedie_score", "d2_absolute_error_score", "d2_log_loss_score", "d2_pinball_score", - "dcg_score", + "d2_tweedie_score", "davies_bouldin_score", - "DetCurveDisplay", + "dcg_score", "det_curve", - "DistanceMetric", "euclidean_distances", "explained_variance_score", "f1_score", "fbeta_score", "fowlkes_mallows_score", "get_scorer", + "get_scorer_names", "hamming_loss", "hinge_loss", "homogeneity_completeness_v_measure", @@ -136,20 +141,20 @@ "label_ranking_loss", "log_loss", "make_scorer", - "nan_euclidean_distances", "matthews_corrcoef", "max_error", "mean_absolute_error", - "mean_squared_error", - "mean_squared_log_error", + "mean_absolute_percentage_error", + "mean_gamma_deviance", "mean_pinball_loss", "mean_poisson_deviance", - "mean_gamma_deviance", + "mean_squared_error", + "mean_squared_log_error", "mean_tweedie_deviance", "median_absolute_error", - "mean_absolute_percentage_error", "multilabel_confusion_matrix", "mutual_info_score", + "nan_euclidean_distances", "ndcg_score", "normalized_mutual_info_score", "pair_confusion_matrix", @@ -158,24 +163,19 @@ "pairwise_distances_argmin_min", "pairwise_distances_chunked", "pairwise_kernels", - "PrecisionRecallDisplay", "precision_recall_curve", "precision_recall_fscore_support", "precision_score", - "PredictionErrorDisplay", "r2_score", "rand_score", "recall_score", - "RocCurveDisplay", "roc_auc_score", "roc_curve", - "root_mean_squared_log_error", "root_mean_squared_error", - "get_scorer_names", + "root_mean_squared_log_error", "silhouette_samples", "silhouette_score", "top_k_accuracy_score", "v_measure_score", "zero_one_loss", - "brier_score_loss", ] diff --git a/sklearn/metrics/_pairwise_distances_reduction/__init__.py b/sklearn/metrics/_pairwise_distances_reduction/__init__.py index ea605198e36d6..6b532e0fa8ff0 100644 --- a/sklearn/metrics/_pairwise_distances_reduction/__init__.py +++ b/sklearn/metrics/_pairwise_distances_reduction/__init__.py @@ -101,10 +101,10 @@ ) __all__ = [ - "BaseDistancesReductionDispatcher", "ArgKmin", - "RadiusNeighbors", "ArgKminClassMode", + "BaseDistancesReductionDispatcher", + "RadiusNeighbors", "RadiusNeighborsClassMode", "sqeuclidean_row_norms", ] diff --git a/sklearn/metrics/_plot/tests/test_det_curve_display.py b/sklearn/metrics/_plot/tests/test_det_curve_display.py index 403ea70109577..242468d177bfa 100644 --- a/sklearn/metrics/_plot/tests/test_det_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_det_curve_display.py @@ -62,7 +62,7 @@ def test_det_curve_display( assert disp.estimator_name == "LogisticRegression" # cannot fail thanks to pyplot fixture - import matplotlib as mpl # noqal + import matplotlib as mpl assert isinstance(disp.line_, mpl.lines.Line2D) assert disp.line_.get_alpha() == 0.8 diff --git a/sklearn/metrics/_plot/tests/test_precision_recall_display.py b/sklearn/metrics/_plot/tests/test_precision_recall_display.py index 2ec34feb224da..022a5fbf28a91 100644 --- a/sklearn/metrics/_plot/tests/test_precision_recall_display.py +++ b/sklearn/metrics/_plot/tests/test_precision_recall_display.py @@ -112,7 +112,7 @@ def test_precision_recall_chance_level_line( chance_level_kw=chance_level_kw, ) - import matplotlib as mpl # noqa + import matplotlib as mpl assert isinstance(display.chance_level_, mpl.lines.Line2D) assert tuple(display.chance_level_.get_xdata()) == (0, 1) @@ -326,7 +326,7 @@ def test_precision_recall_prevalence_pos_label_reusable(pyplot, constructor_name ) assert display.chance_level_ is None - import matplotlib as mpl # noqa + import matplotlib as mpl # When calling from_estimator or from_predictions, # prevalence_pos_label should have been set, so that directly diff --git a/sklearn/metrics/_plot/tests/test_roc_curve_display.py b/sklearn/metrics/_plot/tests/test_roc_curve_display.py index e7e2abd7bd5f5..c8ad57beee1e0 100644 --- a/sklearn/metrics/_plot/tests/test_roc_curve_display.py +++ b/sklearn/metrics/_plot/tests/test_roc_curve_display.py @@ -105,7 +105,7 @@ def test_roc_curve_display_plotting( assert display.estimator_name == default_name - import matplotlib as mpl # noqal + import matplotlib as mpl assert isinstance(display.line_, mpl.lines.Line2D) assert display.line_.get_alpha() == 0.8 @@ -178,7 +178,7 @@ def test_roc_curve_chance_level_line( chance_level_kw=chance_level_kw, ) - import matplotlib as mpl # noqa + import matplotlib as mpl assert isinstance(display.line_, mpl.lines.Line2D) assert display.line_.get_alpha() == 0.8 diff --git a/sklearn/metrics/cluster/__init__.py b/sklearn/metrics/cluster/__init__.py index 6cb80a1edca9f..76020d80f8eb0 100644 --- a/sklearn/metrics/cluster/__init__.py +++ b/sklearn/metrics/cluster/__init__.py @@ -34,22 +34,22 @@ __all__ = [ "adjusted_mutual_info_score", - "normalized_mutual_info_score", "adjusted_rand_score", - "rand_score", + "calinski_harabasz_score", "completeness_score", - "pair_confusion_matrix", + "consensus_score", "contingency_matrix", + "davies_bouldin_score", + "entropy", "expected_mutual_information", + "fowlkes_mallows_score", "homogeneity_completeness_v_measure", "homogeneity_score", "mutual_info_score", - "v_measure_score", - "fowlkes_mallows_score", - "entropy", + "normalized_mutual_info_score", + "pair_confusion_matrix", + "rand_score", "silhouette_samples", "silhouette_score", - "calinski_harabasz_score", - "davies_bouldin_score", - "consensus_score", + "v_measure_score", ] diff --git a/sklearn/metrics/pairwise.py b/sklearn/metrics/pairwise.py index 843a373e6430e..c3e87b2452078 100644 --- a/sklearn/metrics/pairwise.py +++ b/sklearn/metrics/pairwise.py @@ -341,7 +341,7 @@ def euclidean_distances( Notes ----- - To achieve a better accuracy, `X_norm_squared` and `Y_norm_squared` may be + To achieve a better accuracy, `X_norm_squared` and `Y_norm_squared` may be unused if they are passed as `np.float32`. Examples diff --git a/sklearn/metrics/tests/test_score_objects.py b/sklearn/metrics/tests/test_score_objects.py index 0702be6c9ef7d..672ed8ae7eecc 100644 --- a/sklearn/metrics/tests/test_score_objects.py +++ b/sklearn/metrics/tests/test_score_objects.py @@ -621,7 +621,7 @@ def test_classification_scorer_sample_weight(): except TypeError as e: assert "sample_weight" in str(e), ( f"scorer {name} raises unhelpful exception when called " - f"with sample weights: {str(e)}" + f"with sample weights: {e}" ) @@ -667,7 +667,7 @@ def test_regression_scorer_sample_weight(): except TypeError as e: assert "sample_weight" in str(e), ( f"scorer {name} raises unhelpful exception when called " - f"with sample weights: {str(e)}" + f"with sample weights: {e}" ) diff --git a/sklearn/mixture/__init__.py b/sklearn/mixture/__init__.py index 6832f110e4cc6..c27263a0ed743 100644 --- a/sklearn/mixture/__init__.py +++ b/sklearn/mixture/__init__.py @@ -6,4 +6,4 @@ from ._bayesian_mixture import BayesianGaussianMixture from ._gaussian_mixture import GaussianMixture -__all__ = ["GaussianMixture", "BayesianGaussianMixture"] +__all__ = ["BayesianGaussianMixture", "GaussianMixture"] diff --git a/sklearn/mixture/tests/test_gaussian_mixture.py b/sklearn/mixture/tests/test_gaussian_mixture.py index e8144ada64f67..b9ee4e01b0120 100644 --- a/sklearn/mixture/tests/test_gaussian_mixture.py +++ b/sklearn/mixture/tests/test_gaussian_mixture.py @@ -182,7 +182,7 @@ def test_check_weights(): g.weights_init = weights_bad_shape msg = re.escape( "The parameter 'weights' should have the shape of " - f"({n_components},), but got {str(weights_bad_shape.shape)}" + f"({n_components},), but got {weights_bad_shape.shape}" ) with pytest.raises(ValueError, match=msg): g.fit(X) diff --git a/sklearn/model_selection/__init__.py b/sklearn/model_selection/__init__.py index 55b548ce45814..bed2a50f33d0d 100644 --- a/sklearn/model_selection/__init__.py +++ b/sklearn/model_selection/__init__.py @@ -53,37 +53,37 @@ __all__ = [ "BaseCrossValidator", "BaseShuffleSplit", + "FixedThresholdClassifier", "GridSearchCV", - "TimeSeriesSplit", - "KFold", "GroupKFold", "GroupShuffleSplit", + "KFold", + "LearningCurveDisplay", "LeaveOneGroupOut", "LeaveOneOut", "LeavePGroupsOut", "LeavePOut", - "RepeatedKFold", - "RepeatedStratifiedKFold", "ParameterGrid", "ParameterSampler", "PredefinedSplit", "RandomizedSearchCV", + "RepeatedKFold", + "RepeatedStratifiedKFold", "ShuffleSplit", - "StratifiedKFold", "StratifiedGroupKFold", + "StratifiedKFold", "StratifiedShuffleSplit", - "FixedThresholdClassifier", + "TimeSeriesSplit", "TunedThresholdClassifierCV", + "ValidationCurveDisplay", "check_cv", "cross_val_predict", "cross_val_score", "cross_validate", "learning_curve", - "LearningCurveDisplay", "permutation_test_score", "train_test_split", "validation_curve", - "ValidationCurveDisplay", ] diff --git a/sklearn/model_selection/_split.py b/sklearn/model_selection/_split.py index e4759c14e4ad5..ee85af7fe39e6 100644 --- a/sklearn/model_selection/_split.py +++ b/sklearn/model_selection/_split.py @@ -37,22 +37,22 @@ __all__ = [ "BaseCrossValidator", - "KFold", "GroupKFold", + "GroupShuffleSplit", + "KFold", "LeaveOneGroupOut", "LeaveOneOut", "LeavePGroupsOut", "LeavePOut", - "RepeatedStratifiedKFold", + "PredefinedSplit", "RepeatedKFold", + "RepeatedStratifiedKFold", "ShuffleSplit", - "GroupShuffleSplit", - "StratifiedKFold", "StratifiedGroupKFold", + "StratifiedKFold", "StratifiedShuffleSplit", - "PredefinedSplit", - "train_test_split", "check_cv", + "train_test_split", ] @@ -1088,9 +1088,8 @@ def _find_best_fold(self, y_counts_per_fold, y_cnt, group_y_counts): y_counts_per_fold[i] -= group_y_counts fold_eval = np.mean(std_per_class) samples_in_fold = np.sum(y_counts_per_fold[i]) - is_current_fold_better = ( - fold_eval < min_eval - or np.isclose(fold_eval, min_eval) + is_current_fold_better = fold_eval < min_eval or ( + np.isclose(fold_eval, min_eval) and samples_in_fold < min_samples_in_fold ) if is_current_fold_better: @@ -2442,11 +2441,8 @@ def _validate_shuffle_split(n_samples, test_size, train_size, default_test_size= test_size_type = np.asarray(test_size).dtype.kind train_size_type = np.asarray(train_size).dtype.kind - if ( - test_size_type == "i" - and (test_size >= n_samples or test_size <= 0) - or test_size_type == "f" - and (test_size <= 0 or test_size >= 1) + if (test_size_type == "i" and (test_size >= n_samples or test_size <= 0)) or ( + test_size_type == "f" and (test_size <= 0 or test_size >= 1) ): raise ValueError( "test_size={0} should be either positive and smaller" @@ -2454,11 +2450,8 @@ def _validate_shuffle_split(n_samples, test_size, train_size, default_test_size= "(0, 1) range".format(test_size, n_samples) ) - if ( - train_size_type == "i" - and (train_size >= n_samples or train_size <= 0) - or train_size_type == "f" - and (train_size <= 0 or train_size >= 1) + if (train_size_type == "i" and (train_size >= n_samples or train_size <= 0)) or ( + train_size_type == "f" and (train_size <= 0 or train_size >= 1) ): raise ValueError( "train_size={0} should be either positive and smaller" diff --git a/sklearn/model_selection/_validation.py b/sklearn/model_selection/_validation.py index 056248247d94b..2ae704baaefd1 100644 --- a/sklearn/model_selection/_validation.py +++ b/sklearn/model_selection/_validation.py @@ -46,11 +46,11 @@ from ._split import check_cv __all__ = [ - "cross_validate", - "cross_val_score", "cross_val_predict", - "permutation_test_score", + "cross_val_score", + "cross_validate", "learning_curve", + "permutation_test_score", "validation_curve", ] diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py index 1ddb36ca4fa8f..fa86201fb1d89 100644 --- a/sklearn/multiclass.py +++ b/sklearn/multiclass.py @@ -72,8 +72,8 @@ ) __all__ = [ - "OneVsRestClassifier", "OneVsOneClassifier", + "OneVsRestClassifier", "OutputCodeClassifier", ] diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py index b71fc082eb934..86a33d3d8d0b8 100644 --- a/sklearn/multioutput.py +++ b/sklearn/multioutput.py @@ -53,9 +53,9 @@ ) __all__ = [ - "MultiOutputRegressor", - "MultiOutputClassifier", "ClassifierChain", + "MultiOutputClassifier", + "MultiOutputRegressor", "RegressorChain", ] diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py index 0bb2daab25d0b..e5b03abbb903a 100644 --- a/sklearn/naive_bayes.py +++ b/sklearn/naive_bayes.py @@ -33,10 +33,10 @@ __all__ = [ "BernoulliNB", + "CategoricalNB", + "ComplementNB", "GaussianNB", "MultinomialNB", - "ComplementNB", - "CategoricalNB", ] diff --git a/sklearn/neighbors/__init__.py b/sklearn/neighbors/__init__.py index 02c4a28b9a6c4..4e0de99f5e7e3 100644 --- a/sklearn/neighbors/__init__.py +++ b/sklearn/neighbors/__init__.py @@ -21,22 +21,22 @@ from ._unsupervised import NearestNeighbors __all__ = [ + "VALID_METRICS", + "VALID_METRICS_SPARSE", "BallTree", "KDTree", "KNeighborsClassifier", "KNeighborsRegressor", "KNeighborsTransformer", + "KernelDensity", + "LocalOutlierFactor", "NearestCentroid", "NearestNeighbors", + "NeighborhoodComponentsAnalysis", "RadiusNeighborsClassifier", "RadiusNeighborsRegressor", "RadiusNeighborsTransformer", "kneighbors_graph", "radius_neighbors_graph", - "KernelDensity", - "LocalOutlierFactor", - "NeighborhoodComponentsAnalysis", "sort_graph_by_row_values", - "VALID_METRICS", - "VALID_METRICS_SPARSE", ] diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py index 72d27f444000e..767eee1358aa8 100644 --- a/sklearn/neighbors/_base.py +++ b/sklearn/neighbors/_base.py @@ -487,7 +487,7 @@ def _fit(self, X, y=None): if is_classifier(self): # Classification targets require a specific format - if y.ndim == 1 or y.ndim == 2 and y.shape[1] == 1: + if y.ndim == 1 or (y.ndim == 2 and y.shape[1] == 1): if y.ndim != 1: warnings.warn( ( @@ -1249,13 +1249,13 @@ class from an array representing our data set and ask who's ) if return_distance: neigh_dist_chunks, neigh_ind_chunks = zip(*chunked_results) - neigh_dist_list = sum(neigh_dist_chunks, []) - neigh_ind_list = sum(neigh_ind_chunks, []) + neigh_dist_list = list(itertools.chain.from_iterable(neigh_dist_chunks)) + neigh_ind_list = list(itertools.chain.from_iterable(neigh_ind_chunks)) neigh_dist = _to_object_array(neigh_dist_list) neigh_ind = _to_object_array(neigh_ind_list) results = neigh_dist, neigh_ind else: - neigh_ind_list = sum(chunked_results, []) + neigh_ind_list = list(itertools.chain.from_iterable(chunked_results)) results = _to_object_array(neigh_ind_list) if sort_results: diff --git a/sklearn/neighbors/tests/test_neighbors.py b/sklearn/neighbors/tests/test_neighbors.py index c9fb85fec9908..9bceeb5298433 100644 --- a/sklearn/neighbors/tests/test_neighbors.py +++ b/sklearn/neighbors/tests/test_neighbors.py @@ -165,7 +165,7 @@ def _weight_func(dist): ], ) @pytest.mark.parametrize("query_is_train", [False, True]) -@pytest.mark.parametrize("metric", COMMON_VALID_METRICS + DISTANCE_METRIC_OBJS) # type: ignore # noqa +@pytest.mark.parametrize("metric", COMMON_VALID_METRICS + DISTANCE_METRIC_OBJS) # type: ignore def test_unsupervised_kneighbors( global_dtype, n_samples, @@ -250,7 +250,7 @@ def test_unsupervised_kneighbors( (1000, 5, 100), ], ) -@pytest.mark.parametrize("metric", COMMON_VALID_METRICS + DISTANCE_METRIC_OBJS) # type: ignore # noqa +@pytest.mark.parametrize("metric", COMMON_VALID_METRICS + DISTANCE_METRIC_OBJS) # type: ignore @pytest.mark.parametrize("n_neighbors, radius", [(1, 100), (50, 500), (100, 1000)]) @pytest.mark.parametrize( "NeighborsMixinSubclass", diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py index edf96078e05c4..68b4344bab9e3 100644 --- a/sklearn/pipeline.py +++ b/sklearn/pipeline.py @@ -38,7 +38,7 @@ from .utils.parallel import Parallel, delayed from .utils.validation import check_is_fitted, check_memory -__all__ = ["Pipeline", "FeatureUnion", "make_pipeline", "make_union"] +__all__ = ["FeatureUnion", "Pipeline", "make_pipeline", "make_union"] @contextmanager diff --git a/sklearn/preprocessing/__init__.py b/sklearn/preprocessing/__init__.py index d5ea1fe15f036..48bb3aa6a7a4e 100644 --- a/sklearn/preprocessing/__init__.py +++ b/sklearn/preprocessing/__init__.py @@ -37,27 +37,27 @@ "KernelCenterer", "LabelBinarizer", "LabelEncoder", - "MultiLabelBinarizer", - "MinMaxScaler", "MaxAbsScaler", - "QuantileTransformer", + "MinMaxScaler", + "MultiLabelBinarizer", "Normalizer", "OneHotEncoder", "OrdinalEncoder", + "PolynomialFeatures", "PowerTransformer", + "QuantileTransformer", "RobustScaler", "SplineTransformer", "StandardScaler", "TargetEncoder", "add_dummy_feature", - "PolynomialFeatures", "binarize", - "normalize", - "scale", - "robust_scale", + "label_binarize", "maxabs_scale", "minmax_scale", - "label_binarize", - "quantile_transform", + "normalize", "power_transform", + "quantile_transform", + "robust_scale", + "scale", ] diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py index b7da0f3c0d4ce..74d7b1909c4e1 100644 --- a/sklearn/preprocessing/_data.py +++ b/sklearn/preprocessing/_data.py @@ -46,23 +46,23 @@ __all__ = [ "Binarizer", "KernelCenterer", - "MinMaxScaler", "MaxAbsScaler", + "MinMaxScaler", "Normalizer", "OneHotEncoder", + "PowerTransformer", + "QuantileTransformer", "RobustScaler", "StandardScaler", - "QuantileTransformer", - "PowerTransformer", "add_dummy_feature", "binarize", - "normalize", - "scale", - "robust_scale", "maxabs_scale", "minmax_scale", - "quantile_transform", + "normalize", "power_transform", + "quantile_transform", + "robust_scale", + "scale", ] diff --git a/sklearn/preprocessing/_label.py b/sklearn/preprocessing/_label.py index 345d55556459b..560713eb5df40 100644 --- a/sklearn/preprocessing/_label.py +++ b/sklearn/preprocessing/_label.py @@ -20,10 +20,10 @@ from ..utils.validation import _num_samples, check_array, check_is_fitted __all__ = [ - "label_binarize", "LabelBinarizer", "LabelEncoder", "MultiLabelBinarizer", + "label_binarize", ] diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py index 74741585f7761..81d32719a10ff 100644 --- a/sklearn/random_projection.py +++ b/sklearn/random_projection.py @@ -47,8 +47,8 @@ from .utils.validation import check_array, check_is_fitted, validate_data __all__ = [ - "SparseRandomProjection", "GaussianRandomProjection", + "SparseRandomProjection", "johnson_lindenstrauss_min_dim", ] diff --git a/sklearn/semi_supervised/__init__.py b/sklearn/semi_supervised/__init__.py index fba2488a753df..453cd5edc348b 100644 --- a/sklearn/semi_supervised/__init__.py +++ b/sklearn/semi_supervised/__init__.py @@ -10,4 +10,4 @@ from ._label_propagation import LabelPropagation, LabelSpreading from ._self_training import SelfTrainingClassifier -__all__ = ["SelfTrainingClassifier", "LabelPropagation", "LabelSpreading"] +__all__ = ["LabelPropagation", "LabelSpreading", "SelfTrainingClassifier"] diff --git a/sklearn/svm/__init__.py b/sklearn/svm/__init__.py index d9d2d33897863..a039d2e15abdd 100644 --- a/sklearn/svm/__init__.py +++ b/sklearn/svm/__init__.py @@ -10,12 +10,12 @@ from ._classes import SVC, SVR, LinearSVC, LinearSVR, NuSVC, NuSVR, OneClassSVM __all__ = [ + "SVC", + "SVR", "LinearSVC", "LinearSVR", "NuSVC", "NuSVR", "OneClassSVM", - "SVC", - "SVR", "l1_min_c", ] diff --git a/sklearn/tests/test_calibration.py b/sklearn/tests/test_calibration.py index 774a6f83ad1b6..16c8ac9261f27 100644 --- a/sklearn/tests/test_calibration.py +++ b/sklearn/tests/test_calibration.py @@ -660,7 +660,7 @@ def test_calibration_display_compute(pyplot, iris_data_binary, n_bins, strategy) assert viz.estimator_name == "LogisticRegression" # cannot fail thanks to pyplot fixture - import matplotlib as mpl # noqa + import matplotlib as mpl assert isinstance(viz.line_, mpl.lines.Line2D) assert viz.line_.get_alpha() == 0.8 diff --git a/sklearn/tree/__init__.py b/sklearn/tree/__init__.py index c961a811fe05c..c4b03b66eb6e5 100644 --- a/sklearn/tree/__init__.py +++ b/sklearn/tree/__init__.py @@ -19,6 +19,6 @@ "ExtraTreeClassifier", "ExtraTreeRegressor", "export_graphviz", - "plot_tree", "export_text", + "plot_tree", ] diff --git a/sklearn/tree/_reingold_tilford.py b/sklearn/tree/_reingold_tilford.py index 9801158166e1e..deb4d84f6d324 100644 --- a/sklearn/tree/_reingold_tilford.py +++ b/sklearn/tree/_reingold_tilford.py @@ -22,10 +22,10 @@ def __init__(self, tree, parent=None, depth=0, number=1): self.number = number def left(self): - return self.thread or len(self.children) and self.children[0] + return self.thread or (len(self.children) and self.children[0]) def right(self): - return self.thread or len(self.children) and self.children[-1] + return self.thread or (len(self.children) and self.children[-1]) def lbrother(self): n = None diff --git a/sklearn/utils/__init__.py b/sklearn/utils/__init__.py index 58bce9cfd6fe4..f724132e16daa 100644 --- a/sklearn/utils/__init__.py +++ b/sklearn/utils/__init__.py @@ -65,40 +65,40 @@ class parallel_backend(_joblib.parallel_backend): __all__ = [ - "murmurhash3_32", + "Bunch", + "ClassifierTags", + "DataConversionWarning", + "InputTags", + "RegressorTags", + "Tags", + "TargetTags", + "TransformerTags", + "all_estimators", "as_float_array", "assert_all_finite", + "check_X_y", "check_array", - "check_random_state", - "compute_class_weight", - "compute_sample_weight", - "column_or_1d", "check_consistent_length", - "check_X_y", + "check_random_state", "check_scalar", - "indexable", "check_symmetric", + "column_or_1d", + "compute_class_weight", + "compute_sample_weight", "deprecated", - "parallel_backend", - "register_parallel_backend", - "resample", - "shuffle", - "all_estimators", - "DataConversionWarning", "estimator_html_repr", - "Bunch", - "metadata_routing", - "safe_sqr", - "safe_mask", "gen_batches", "gen_even_slices", - "Tags", - "InputTags", - "TargetTags", - "ClassifierTags", - "RegressorTags", - "TransformerTags", "get_tags", + "indexable", + "metadata_routing", + "murmurhash3_32", + "parallel_backend", + "register_parallel_backend", + "resample", + "safe_mask", + "safe_sqr", + "shuffle", ] diff --git a/sklearn/utils/_array_api.py b/sklearn/utils/_array_api.py index e65ebcce169b2..59d408bf7ea71 100644 --- a/sklearn/utils/_array_api.py +++ b/sklearn/utils/_array_api.py @@ -85,7 +85,7 @@ def yield_namespace_device_dtype_combinations(include_numpy_namespaces=True): elif array_namespace == "array_api_strict": try: - import array_api_strict # noqa + import array_api_strict yield array_namespace, array_api_strict.Device("CPU_DEVICE"), "float64" yield array_namespace, array_api_strict.Device("device1"), "float32" @@ -196,8 +196,7 @@ def device(*array_list, remove_none=True, remove_types=(str,)): device_other = _single_array_device(array) if device_ != device_other: raise ValueError( - f"Input arrays use different devices: {str(device_)}, " - f"{str(device_other)}" + f"Input arrays use different devices: {device_}, {device_other}" ) return device_ @@ -325,7 +324,7 @@ def ensure_common_namespace_device(reference, *arrays): return arrays -def _check_device_cpu(device): # noqa +def _check_device_cpu(device): if device not in {"cpu", None}: raise ValueError(f"Unsupported device for NumPy: {device!r}") @@ -411,7 +410,7 @@ def astype(self, x, dtype, *, copy=True, casting="unsafe"): # astype is not defined in the top level NumPy namespace return x.astype(dtype, copy=copy, casting=casting) - def asarray(self, x, *, dtype=None, device=None, copy=None): # noqa + def asarray(self, x, *, dtype=None, device=None, copy=None): _check_device_cpu(device) # Support copy in NumPy namespace if copy is True: diff --git a/sklearn/utils/_available_if.py b/sklearn/utils/_available_if.py index b0da84189d1f3..91dee2641f20c 100644 --- a/sklearn/utils/_available_if.py +++ b/sklearn/utils/_available_if.py @@ -26,7 +26,7 @@ def __init__(self, fn, check, attribute_name): def _check(self, obj, owner): attr_err_msg = ( - f"This {repr(owner.__name__)} has no attribute {repr(self.attribute_name)}" + f"This {owner.__name__!r} has no attribute {self.attribute_name!r}" ) try: check_result = self.check(obj) diff --git a/sklearn/utils/_encode.py b/sklearn/utils/_encode.py index 045ce3e11919a..858e8b1c87cad 100644 --- a/sklearn/utils/_encode.py +++ b/sklearn/utils/_encode.py @@ -234,12 +234,12 @@ def _encode(values, *, uniques, check_unknown=True): try: return _map_to_integer(values, uniques) except KeyError as e: - raise ValueError(f"y contains previously unseen labels: {str(e)}") + raise ValueError(f"y contains previously unseen labels: {e}") else: if check_unknown: diff = _check_unknown(values, uniques) if diff: - raise ValueError(f"y contains previously unseen labels: {str(diff)}") + raise ValueError(f"y contains previously unseen labels: {diff}") return _searchsorted(uniques, values, xp=xp) @@ -285,10 +285,8 @@ def _check_unknown(values, known_values, return_mask=False): def is_valid(value): return ( value in uniques_set - or missing_in_uniques.none - and value is None - or missing_in_uniques.nan - and is_scalar_nan(value) + or (missing_in_uniques.none and value is None) + or (missing_in_uniques.nan and is_scalar_nan(value)) ) if return_mask: diff --git a/sklearn/utils/_joblib.py b/sklearn/utils/_joblib.py index 03c10397eea1c..d426b0080d83d 100644 --- a/sklearn/utils/_joblib.py +++ b/sklearn/utils/_joblib.py @@ -27,17 +27,17 @@ __all__ = [ - "parallel_backend", - "register_parallel_backend", - "cpu_count", - "Parallel", "Memory", + "Parallel", + "__version__", + "cpu_count", "delayed", + "dump", "effective_n_jobs", "hash", - "logger", - "dump", - "load", "joblib", - "__version__", + "load", + "logger", + "parallel_backend", + "register_parallel_backend", ] diff --git a/sklearn/utils/_metadata_requests.py b/sklearn/utils/_metadata_requests.py index cb2fb03050c39..ebfbc41c0eab8 100644 --- a/sklearn/utils/_metadata_requests.py +++ b/sklearn/utils/_metadata_requests.py @@ -1576,7 +1576,7 @@ def __getattr__(self, name): if not (hasattr(_obj, "get_metadata_routing") or isinstance(_obj, MetadataRouter)): raise AttributeError( - f"The given object ({repr(_obj.__class__.__name__)}) needs to either" + f"The given object ({_obj.__class__.__name__!r}) needs to either" " implement the routing method `get_metadata_routing` or be a" " `MetadataRouter` instance." ) diff --git a/sklearn/utils/_optional_dependencies.py b/sklearn/utils/_optional_dependencies.py index 1de7f4479b242..3bc8277fddab5 100644 --- a/sklearn/utils/_optional_dependencies.py +++ b/sklearn/utils/_optional_dependencies.py @@ -39,7 +39,7 @@ def check_pandas_support(caller_name): The pandas package. """ try: - import pandas # noqa + import pandas return pandas except ImportError as e: diff --git a/sklearn/utils/_response.py b/sklearn/utils/_response.py index 12cbff2230b17..9003699d4351d 100644 --- a/sklearn/utils/_response.py +++ b/sklearn/utils/_response.py @@ -195,7 +195,7 @@ def _get_response_values( If the response method can be applied to a classifier only and `estimator` is a regressor. """ - from sklearn.base import is_classifier, is_outlier_detector # noqa + from sklearn.base import is_classifier, is_outlier_detector if is_classifier(estimator): prediction_method = _check_response_method(estimator, response_method) diff --git a/sklearn/utils/_set_output.py b/sklearn/utils/_set_output.py index 963e5e5bf6d77..6980902594663 100644 --- a/sklearn/utils/_set_output.py +++ b/sklearn/utils/_set_output.py @@ -447,10 +447,8 @@ def _safe_set_output(estimator, *, transform=None): estimator : estimator instance Estimator instance. """ - set_output_for_transform = ( - hasattr(estimator, "transform") - or hasattr(estimator, "fit_transform") - and transform is not None + set_output_for_transform = hasattr(estimator, "transform") or ( + hasattr(estimator, "fit_transform") and transform is not None ) if not set_output_for_transform: # If estimator can not transform, then `set_output` does not need to be diff --git a/sklearn/utils/_tags.py b/sklearn/utils/_tags.py index ffb654c83637b..c8b1623682a0c 100644 --- a/sklearn/utils/_tags.py +++ b/sklearn/utils/_tags.py @@ -404,7 +404,7 @@ def get_tags(estimator) -> Tags: # `super().__sklearn_tags__()` but there is no `__sklearn_tags__` # method in the base class. warnings.warn( - f"The following error was raised: {str(exc)}. It seems that " + f"The following error was raised: {exc}. It seems that " "there are no classes that implement `__sklearn_tags__` " "in the MRO and/or all classes in the MRO call " "`super().__sklearn_tags__()`. Make sure to inherit from " diff --git a/sklearn/utils/_testing.py b/sklearn/utils/_testing.py index 5028818d0697f..bb0d807edc250 100644 --- a/sklearn/utils/_testing.py +++ b/sklearn/utils/_testing.py @@ -61,13 +61,13 @@ ) __all__ = [ - "assert_array_equal", + "SkipTest", + "assert_allclose", "assert_almost_equal", "assert_array_almost_equal", + "assert_array_equal", "assert_array_less", - "assert_allclose", "assert_run_python_script_without_output", - "SkipTest", ] SkipTest = unittest.case.SkipTest @@ -1273,7 +1273,7 @@ def _array_api_for_tests(array_namespace, device): f"{array_namespace} is not installed: not checking array_api input" ) try: - import array_api_compat # noqa + import array_api_compat except ImportError: raise SkipTest( "array_api_compat is not installed: not checking array_api input" diff --git a/sklearn/utils/discovery.py b/sklearn/utils/discovery.py index 40d5b5f8cf714..ffa57c37aa304 100644 --- a/sklearn/utils/discovery.py +++ b/sklearn/utils/discovery.py @@ -141,7 +141,7 @@ def is_abstract(c): "Parameter type_filter must be 'classifier', " "'regressor', 'transformer', 'cluster' or " "None, got" - f" {repr(type_filter)}." + f" {type_filter!r}." ) # drop duplicates, sort for reproducibility diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py index 6516b39219ff3..369e462c23d2f 100644 --- a/sklearn/utils/estimator_checks.py +++ b/sklearn/utils/estimator_checks.py @@ -4011,7 +4011,7 @@ def check_positive_only_tag_during_fit(name, estimator_orig): estimator.fit(X, y) except Exception as e: err_msg = ( - f"Estimator {repr(name)} raised {e.__class__.__name__} unexpectedly." + f"Estimator {name!r} raised {e.__class__.__name__} unexpectedly." " This happens when passing negative input values as X." " If negative values are not supported for this estimator instance," " then the tags.input_tags.positive_only tag needs to be set to True." diff --git a/sklearn/utils/fixes.py b/sklearn/utils/fixes.py index 56f18c98f44d1..6155a31ee2a75 100644 --- a/sklearn/utils/fixes.py +++ b/sklearn/utils/fixes.py @@ -360,7 +360,7 @@ def _smallest_admissible_index_dtype(arrays=(), maxval=None, check_contents=Fals # TODO: Remove when Scipy 1.12 is the minimum supported version if sp_version < parse_version("1.12"): - from ..externals._scipy.sparse.csgraph import laplacian # type: ignore # noqa + from ..externals._scipy.sparse.csgraph import laplacian # type: ignore else: from scipy.sparse.csgraph import laplacian # type: ignore # noqa # pragma: no cover diff --git a/sklearn/utils/multiclass.py b/sklearn/utils/multiclass.py index 8bdcca3197d1a..5df206259c5d1 100644 --- a/sklearn/utils/multiclass.py +++ b/sklearn/utils/multiclass.py @@ -185,9 +185,8 @@ def is_multilabel(y): if y.format in ("dok", "lil"): y = y.tocsr() labels = xp.unique_values(y.data) - return ( - len(y.data) == 0 - or (labels.size == 1 or (labels.size == 2) and (0 in labels)) + return len(y.data) == 0 or ( + (labels.size == 1 or ((labels.size == 2) and (0 in labels))) and (y.dtype.kind in "biu" or _is_integral_float(labels)) # bool, int, uint ) else: @@ -318,8 +317,7 @@ def _raise_or_return(): valid = ( (isinstance(y, Sequence) or issparse(y) or hasattr(y, "__array__")) and not isinstance(y, str) - or is_array_api_compliant - ) + ) or is_array_api_compliant if not valid: raise ValueError( diff --git a/sklearn/utils/tests/test_indexing.py b/sklearn/utils/tests/test_indexing.py index fa54c58413a3f..87fb5c77bcfbf 100644 --- a/sklearn/utils/tests/test_indexing.py +++ b/sklearn/utils/tests/test_indexing.py @@ -636,15 +636,15 @@ def test_shuffle_dont_convert_to_array(csc_container): a_s, b_s, c_s, d_s, e_s = shuffle(a, b, c, d, e, random_state=0) assert a_s == ["c", "b", "a"] - assert type(a_s) == list # noqa: E721 + assert type(a_s) == list assert_array_equal(b_s, ["c", "b", "a"]) assert b_s.dtype == object assert c_s == [3, 2, 1] - assert type(c_s) == list # noqa: E721 + assert type(c_s) == list assert_array_equal(d_s, np.array([["c", 2], ["b", 1], ["a", 0]], dtype=object)) - assert type(d_s) == MockDataFrame # noqa: E721 + assert type(d_s) == MockDataFrame assert_array_equal(e_s.toarray(), np.array([[4, 5], [2, 3], [0, 1]])) diff --git a/sklearn/utils/tests/test_multiclass.py b/sklearn/utils/tests/test_multiclass.py index 49f224b952d5d..199ffc2f751c6 100644 --- a/sklearn/utils/tests/test_multiclass.py +++ b/sklearn/utils/tests/test_multiclass.py @@ -545,7 +545,7 @@ def test_safe_split_with_precomputed_kernel(): K = np.dot(X, X.T) cv = ShuffleSplit(test_size=0.25, random_state=0) - train, test = list(cv.split(X))[0] + train, test = next(iter(cv.split(X))) X_train, y_train = _safe_split(clf, X, y, train) K_train, y_train2 = _safe_split(clfp, K, y, train) diff --git a/sklearn/utils/tests/test_set_output.py b/sklearn/utils/tests/test_set_output.py index 360b081a2a0fb..2b756ada64a6d 100644 --- a/sklearn/utils/tests/test_set_output.py +++ b/sklearn/utils/tests/test_set_output.py @@ -336,7 +336,7 @@ def test_set_output_mro(): class Base(_SetOutputMixin): def transform(self, X): - return "Base" # noqa + return "Base" class A(Base): pass diff --git a/sklearn/utils/validation.py b/sklearn/utils/validation.py index 89f9df760e6f0..116d12fc5e8ad 100644 --- a/sklearn/utils/validation.py +++ b/sklearn/utils/validation.py @@ -988,7 +988,7 @@ def is_sparse(dtype): # When all dataframe columns are sparse, convert to a sparse array if hasattr(array, "sparse") and array.ndim > 1: with suppress(ImportError): - from pandas import SparseDtype # noqa: F811 + from pandas import SparseDtype def is_sparse(dtype): return isinstance(dtype, SparseDtype) @@ -1916,7 +1916,7 @@ def type_name(t): expected_include_boundaries = ("left", "right", "both", "neither") if include_boundaries not in expected_include_boundaries: raise ValueError( - f"Unknown value for `include_boundaries`: {repr(include_boundaries)}. " + f"Unknown value for `include_boundaries`: {include_boundaries!r}. " f"Possible values are: {expected_include_boundaries}." ) @@ -2315,10 +2315,8 @@ def _check_method_params(X, params, indices=None): method_params_validated = {} for param_key, param_value in params.items(): if ( - not _is_arraylike(param_value) - and not sp.issparse(param_value) - or _num_samples(param_value) != _num_samples(X) - ): + not _is_arraylike(param_value) and not sp.issparse(param_value) + ) or _num_samples(param_value) != _num_samples(X): # Non-indexable pass-through (for now for backward-compatibility). # https://github.com/scikit-learn/scikit-learn/issues/15805 method_params_validated[param_key] = param_value @@ -2927,7 +2925,7 @@ def validate_data( ) no_val_X = isinstance(X, str) and X == "no_validation" - no_val_y = y is None or isinstance(y, str) and y == "no_validation" + no_val_y = y is None or (isinstance(y, str) and y == "no_validation") if no_val_X and no_val_y: raise ValueError("Validation should be done on X, y or both.")