From a896f2ea0c496c7d55d2f84b20f84c638b228705 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Thu, 31 Oct 2024 17:42:54 +0100
Subject: [PATCH 01/28] add input_tags.sparse and test

---
 sklearn/calibration.py                        | 11 +++---
 sklearn/cluster/_affinity_propagation.py      |  1 +
 sklearn/cluster/_bicluster.py                 |  1 +
 sklearn/cluster/_birch.py                     |  1 +
 sklearn/cluster/_bisect_k_means.py            |  1 +
 sklearn/cluster/_dbscan.py                    |  1 +
 sklearn/cluster/_hdbscan/hdbscan.py           |  1 +
 sklearn/cluster/_kmeans.py                    |  1 +
 sklearn/cluster/_spectral.py                  |  1 +
 sklearn/compose/_target.py                    |  1 +
 sklearn/decomposition/_incremental_pca.py     |  5 +++
 sklearn/decomposition/_kernel_pca.py          |  1 +
 sklearn/decomposition/_lda.py                 |  1 +
 sklearn/decomposition/_nmf.py                 |  1 +
 sklearn/decomposition/_pca.py                 |  1 +
 sklearn/decomposition/_truncated_svd.py       |  1 +
 sklearn/dummy.py                              |  2 ++
 sklearn/ensemble/_bagging.py                  |  1 +
 sklearn/ensemble/_base.py                     |  6 ++++
 sklearn/ensemble/_forest.py                   |  3 ++
 sklearn/ensemble/_gb.py                       |  2 ++
 sklearn/ensemble/_weight_boosting.py          |  2 ++
 sklearn/feature_selection/_from_model.py      |  1 +
 sklearn/feature_selection/_rfe.py             |  1 +
 sklearn/feature_selection/_sequential.py      |  1 +
 .../_univariate_selection.py                  |  1 +
 .../feature_selection/_variance_threshold.py  |  1 +
 sklearn/impute/_base.py                       |  2 ++
 sklearn/kernel_approximation.py               |  8 +++++
 sklearn/kernel_ridge.py                       |  1 +
 sklearn/linear_model/_base.py                 |  5 +++
 sklearn/linear_model/_coordinate_descent.py   |  8 +++++
 sklearn/linear_model/_glm/glm.py              |  1 +
 sklearn/linear_model/_huber.py                |  5 +++
 sklearn/linear_model/_logistic.py             |  6 ++++
 sklearn/linear_model/_quantile.py             |  5 +++
 sklearn/linear_model/_ransac.py               |  1 +
 sklearn/linear_model/_ridge.py                | 13 +++++++
 sklearn/linear_model/_stochastic_gradient.py  | 11 ++++++
 sklearn/manifold/_isomap.py                   |  1 +
 sklearn/manifold/_spectral_embedding.py       |  1 +
 .../_classification_threshold.py              |  3 +-
 sklearn/model_selection/_search.py            |  1 +
 sklearn/multiclass.py                         |  7 ++++
 sklearn/multioutput.py                        |  8 ++++-
 sklearn/naive_bayes.py                        |  8 +++++
 sklearn/neighbors/_base.py                    |  1 +
 sklearn/neighbors/_nearest_centroid.py        |  5 +++
 .../neural_network/_multilayer_perceptron.py  |  5 +++
 sklearn/neural_network/_rbm.py                |  1 +
 sklearn/pipeline.py                           |  1 +
 sklearn/preprocessing/_data.py                |  5 +++
 .../preprocessing/_function_transformer.py    |  1 +
 sklearn/preprocessing/_polynomial.py          |  5 +++
 sklearn/random_projection.py                  |  1 +
 sklearn/semi_supervised/_label_propagation.py |  5 +++
 sklearn/semi_supervised/_self_training.py     |  3 +-
 sklearn/svm/_classes.py                       |  7 ++++
 sklearn/tree/_classes.py                      |  2 ++
 sklearn/utils/estimator_checks.py             | 36 +++++++++++++++++++
 60 files changed, 216 insertions(+), 8 deletions(-)

diff --git a/sklearn/calibration.py b/sklearn/calibration.py
index 93035fef52b45..19e6516c30096 100644
--- a/sklearn/calibration.py
+++ b/sklearn/calibration.py
@@ -27,11 +27,7 @@
 from .model_selection import LeaveOneOut, check_cv, cross_val_predict
 from .preprocessing import LabelEncoder, label_binarize
 from .svm import LinearSVC
-from .utils import (
-    _safe_indexing,
-    column_or_1d,
-    indexable,
-)
+from .utils import _safe_indexing, column_or_1d, get_tags, indexable
 from .utils._param_validation import (
     HasMethods,
     Interval,
@@ -540,6 +536,11 @@ def get_metadata_routing(self):
         )
         return router
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = get_tags(self._get_estimator()).input_tags.sparse
+        return tags
+
 
 def _fit_classifier_calibrator_pair(
     estimator,
diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 677421974bdc0..76b5f27a21af4 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -483,6 +483,7 @@ def __init__(
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.pairwise = self.affinity == "precomputed"
+        tags.input_tags.sparse = True
         return tags
 
     @_fit_context(prefer_skip_nested_validation=True)
diff --git a/sklearn/cluster/_bicluster.py b/sklearn/cluster/_bicluster.py
index 08cd63b58cbaa..16818b98c703b 100644
--- a/sklearn/cluster/_bicluster.py
+++ b/sklearn/cluster/_bicluster.py
@@ -195,6 +195,7 @@ def _k_means(self, data, n_clusters):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags._xfail_checks = {
             "check_estimators_dtypes": "raises nan error",
             "check_fit2d_1sample": "_scale_normalize fails",
diff --git a/sklearn/cluster/_birch.py b/sklearn/cluster/_birch.py
index 3e5f9d10a79e8..4d8abb43513dc 100644
--- a/sklearn/cluster/_birch.py
+++ b/sklearn/cluster/_birch.py
@@ -742,4 +742,5 @@ def _global_clustering(self, X=None):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        tags.input_tags.sparse = True
         return tags
diff --git a/sklearn/cluster/_bisect_k_means.py b/sklearn/cluster/_bisect_k_means.py
index 3c9ccdcf06414..77e24adbf8084 100644
--- a/sklearn/cluster/_bisect_k_means.py
+++ b/sklearn/cluster/_bisect_k_means.py
@@ -538,5 +538,6 @@ def _predict_recursive(self, X, sample_weight, cluster_node):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         return tags
diff --git a/sklearn/cluster/_dbscan.py b/sklearn/cluster/_dbscan.py
index 7764bff94582f..d79c4f286d76d 100644
--- a/sklearn/cluster/_dbscan.py
+++ b/sklearn/cluster/_dbscan.py
@@ -473,4 +473,5 @@ def fit_predict(self, X, y=None, sample_weight=None):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.pairwise = self.metric == "precomputed"
+        tags.input_tags.sparse = True
         return tags
diff --git a/sklearn/cluster/_hdbscan/hdbscan.py b/sklearn/cluster/_hdbscan/hdbscan.py
index 8bf402a5081c9..0607bbb23ba51 100644
--- a/sklearn/cluster/_hdbscan/hdbscan.py
+++ b/sklearn/cluster/_hdbscan/hdbscan.py
@@ -1003,5 +1003,6 @@ def dbscan_clustering(self, cut_distance, min_cluster_size=5):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.input_tags.allow_nan = self.metric != "precomputed"
         return tags
diff --git a/sklearn/cluster/_kmeans.py b/sklearn/cluster/_kmeans.py
index 80958f8c845a2..654df8d69a0e4 100644
--- a/sklearn/cluster/_kmeans.py
+++ b/sklearn/cluster/_kmeans.py
@@ -1179,6 +1179,7 @@ def score(self, X, y=None, sample_weight=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: replace by a statistical test, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
diff --git a/sklearn/cluster/_spectral.py b/sklearn/cluster/_spectral.py
index ebfeccee677a9..6d1dcd093e803 100644
--- a/sklearn/cluster/_spectral.py
+++ b/sklearn/cluster/_spectral.py
@@ -794,6 +794,7 @@ def fit_predict(self, X, y=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.input_tags.pairwise = self.affinity in [
             "precomputed",
             "precomputed_nearest_neighbors",
diff --git a/sklearn/compose/_target.py b/sklearn/compose/_target.py
index d90ee17d13f49..86fc6294878b9 100644
--- a/sklearn/compose/_target.py
+++ b/sklearn/compose/_target.py
@@ -348,6 +348,7 @@ def __sklearn_tags__(self):
         regressor = self._get_regressor()
         tags = super().__sklearn_tags__()
         tags.regressor_tags.poor_score = True
+        tags.input_tags.sparse = get_tags(regressor).input_tags.sparse
         tags.target_tags.multi_output = get_tags(regressor).target_tags.multi_output
         return tags
 
diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
index b2caf81aa9793..35a894416f39a 100644
--- a/sklearn/decomposition/_incremental_pca.py
+++ b/sklearn/decomposition/_incremental_pca.py
@@ -418,3 +418,8 @@ def transform(self, X):
             return np.vstack(output)
         else:
             return super().transform(X)
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
diff --git a/sklearn/decomposition/_kernel_pca.py b/sklearn/decomposition/_kernel_pca.py
index d9757c7845be1..37ff77c8d7c64 100644
--- a/sklearn/decomposition/_kernel_pca.py
+++ b/sklearn/decomposition/_kernel_pca.py
@@ -566,6 +566,7 @@ def inverse_transform(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         tags.input_tags.pairwise = self.kernel == "precomputed"
         return tags
diff --git a/sklearn/decomposition/_lda.py b/sklearn/decomposition/_lda.py
index 875c6e25fbb10..4580ff073bca5 100644
--- a/sklearn/decomposition/_lda.py
+++ b/sklearn/decomposition/_lda.py
@@ -549,6 +549,7 @@ def _em_step(self, X, total_samples, batch_update, parallel=None):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.positive_only = True
+        tags.input_tags.sparse = True
         tags.transformer_tags.preserves_dtype = ["float32", "float64"]
         return tags
 
diff --git a/sklearn/decomposition/_nmf.py b/sklearn/decomposition/_nmf.py
index 6be97f2223fb5..dc21e389f6849 100644
--- a/sklearn/decomposition/_nmf.py
+++ b/sklearn/decomposition/_nmf.py
@@ -1331,6 +1331,7 @@ def _n_features_out(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.positive_only = True
+        tags.input_tags.sparse = True
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         return tags
 
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 24cb1649c5fee..ba5ca4dd4c680 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -851,4 +851,5 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         tags.array_api_support = True
+        tags.input_tags.sparse = True
         return tags
diff --git a/sklearn/decomposition/_truncated_svd.py b/sklearn/decomposition/_truncated_svd.py
index b87a53684c140..b77882f5da78d 100644
--- a/sklearn/decomposition/_truncated_svd.py
+++ b/sklearn/decomposition/_truncated_svd.py
@@ -312,6 +312,7 @@ def inverse_transform(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         return tags
 
diff --git a/sklearn/dummy.py b/sklearn/dummy.py
index 6332ff43cd482..aa07ca78810b0 100644
--- a/sklearn/dummy.py
+++ b/sklearn/dummy.py
@@ -423,6 +423,7 @@ def predict_log_proba(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.classifier_tags.poor_score = True
         tags.no_validation = True
         tags._xfail_checks = {
@@ -666,6 +667,7 @@ def predict(self, X, return_std=False):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.regressor_tags.poor_score = True
         tags.no_validation = True
         return tags
diff --git a/sklearn/ensemble/_bagging.py b/sklearn/ensemble/_bagging.py
index 423fc0ec6449a..b3b7590ce7fb1 100644
--- a/sklearn/ensemble/_bagging.py
+++ b/sklearn/ensemble/_bagging.py
@@ -642,6 +642,7 @@ def _get_estimator(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = get_tags(self._get_estimator()).input_tags.sparse
         tags.input_tags.allow_nan = get_tags(self._get_estimator()).input_tags.allow_nan
         # TODO: replace by a statistical test, see meta-issue #16298
         tags._xfail_checks = {
diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py
index 2789dd234294e..3545c4e802d7c 100644
--- a/sklearn/ensemble/_base.py
+++ b/sklearn/ensemble/_base.py
@@ -292,11 +292,17 @@ def __sklearn_tags__(self):
                 get_tags(est[1]).input_tags.allow_nan if est[1] != "drop" else True
                 for est in self.estimators
             )
+            sparse = all(
+                get_tags(est[1]).input_tags.sparse if est[1] != "drop" else True
+                for est in self.estimators
+            )
         except Exception:
             # If `estimators` does not comply with our API (list of tuples) then it will
             # fail. In this case, we assume that `allow_nan` is False but the parameter
             # validation will raise an error during `fit`.
             allow_nan = False
+            sparse = False
         tags.input_tags.allow_nan = allow_nan
+        tags.input_tags.sparse = sparse
         tags.transformer_tags.preserves_dtype = []
         return tags
diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index 7c7663864ad92..126addb0b5f9c 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1003,6 +1003,7 @@ def predict_log_proba(self, X):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.classifier_tags.multi_label = True
+        tags.input_tags.sparse = True
         return tags
 
 
@@ -1169,6 +1170,7 @@ def _compute_partial_dependence_recursion(self, grid, target_features):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.regressor_tags.multi_label = True
+        tags.input_tags.sparse = True
         return tags
 
 
@@ -3016,6 +3018,7 @@ def transform(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: replace by a statistical test, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 8f85f2f7aa3cd..dca3d65b87925 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -1727,6 +1727,7 @@ def staged_predict_proba(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: investigate failure see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
@@ -2194,6 +2195,7 @@ def apply(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: investigate failure see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py
index 7780230b046cb..66363808455bf 100644
--- a/sklearn/ensemble/_weight_boosting.py
+++ b/sklearn/ensemble/_weight_boosting.py
@@ -860,6 +860,7 @@ def predict_log_proba(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: replace by a statistical test, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
@@ -1179,6 +1180,7 @@ def staged_predict(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: replace by a statistical test, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
diff --git a/sklearn/feature_selection/_from_model.py b/sklearn/feature_selection/_from_model.py
index d5476e3f06abf..850505dbf6b92 100644
--- a/sklearn/feature_selection/_from_model.py
+++ b/sklearn/feature_selection/_from_model.py
@@ -519,5 +519,6 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         tags.input_tags.allow_nan = get_tags(self.estimator).input_tags.allow_nan
         return tags
diff --git a/sklearn/feature_selection/_rfe.py b/sklearn/feature_selection/_rfe.py
index 0282facf9fd31..6823759774c30 100644
--- a/sklearn/feature_selection/_rfe.py
+++ b/sklearn/feature_selection/_rfe.py
@@ -533,6 +533,7 @@ def __sklearn_tags__(self):
         if tags.regressor_tags is not None:
             tags.regressor_tags.poor_score = True
         tags.target_tags.required = True
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         tags.input_tags.allow_nan = get_tags(self.estimator).input_tags.allow_nan
         return tags
 
diff --git a/sklearn/feature_selection/_sequential.py b/sklearn/feature_selection/_sequential.py
index ac5f13fd00e7d..676106c4a19ca 100644
--- a/sklearn/feature_selection/_sequential.py
+++ b/sklearn/feature_selection/_sequential.py
@@ -329,6 +329,7 @@ def _get_support_mask(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = get_tags(self.estimator).input_tags.allow_nan
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         return tags
 
     def get_metadata_routing(self):
diff --git a/sklearn/feature_selection/_univariate_selection.py b/sklearn/feature_selection/_univariate_selection.py
index 7933818a6a19b..996d5423995d2 100644
--- a/sklearn/feature_selection/_univariate_selection.py
+++ b/sklearn/feature_selection/_univariate_selection.py
@@ -581,6 +581,7 @@ def _check_params(self, X, y):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.target_tags.required = True
+        tags.input_tags.sparse = True
         return tags
 
 
diff --git a/sklearn/feature_selection/_variance_threshold.py b/sklearn/feature_selection/_variance_threshold.py
index 1aab9080b964d..f26d70ecf8f82 100644
--- a/sklearn/feature_selection/_variance_threshold.py
+++ b/sklearn/feature_selection/_variance_threshold.py
@@ -137,4 +137,5 @@ def _get_support_mask(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
+        tags.input_tags.sparse = True
         return tags
diff --git a/sklearn/impute/_base.py b/sklearn/impute/_base.py
index faf1f9e23b678..7a8f2cc4483e2 100644
--- a/sklearn/impute/_base.py
+++ b/sklearn/impute/_base.py
@@ -739,6 +739,7 @@ def inverse_transform(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.input_tags.allow_nan = is_pandas_na(self.missing_values) or is_scalar_nan(
             self.missing_values
         )
@@ -1130,5 +1131,6 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
         tags.input_tags.string = True
+        tags.input_tags.sparse = True
         tags.transformer_tags.preserves_dtype = []
         return tags
diff --git a/sklearn/kernel_approximation.py b/sklearn/kernel_approximation.py
index 96f9b7e9d4778..dd16d2d5711ee 100644
--- a/sklearn/kernel_approximation.py
+++ b/sklearn/kernel_approximation.py
@@ -235,6 +235,11 @@ def transform(self, X):
 
         return data_sketch
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class RBFSampler(ClassNamePrefixFeaturesOutMixin, TransformerMixin, BaseEstimator):
     """Approximate a RBF kernel feature map using random Fourier features.
@@ -404,6 +409,7 @@ def transform(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         return tags
 
@@ -826,6 +832,7 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.requires_fit = False
         tags.input_tags.positive_only = True
+        tags.input_tags.sparse = True
         return tags
 
 
@@ -1094,6 +1101,7 @@ def _get_kernel_params(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags._xfail_checks = {
             "check_transformer_preserves_dtypes": (
                 "dtypes are preserved but not at a close enough precision"
diff --git a/sklearn/kernel_ridge.py b/sklearn/kernel_ridge.py
index 983b463508c5b..29e744647acc9 100644
--- a/sklearn/kernel_ridge.py
+++ b/sklearn/kernel_ridge.py
@@ -169,6 +169,7 @@ def _get_kernel(self, X, Y=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.input_tags.pairwise = self.kernel == "precomputed"
         return tags
 
diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 3bb3b8b7626d8..7be8188981455 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -687,6 +687,11 @@ def rmatvec(b):
         self._set_intercept(X_offset, y_offset, X_scale)
         return self
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 def _check_precomputed_gram_matrix(
     X, precompute, X_offset, X_scale, rtol=None, atol=1e-5
diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
index 2dbb83c82fbaa..b8430fe00b862 100644
--- a/sklearn/linear_model/_coordinate_descent.py
+++ b/sklearn/linear_model/_coordinate_descent.py
@@ -1149,6 +1149,11 @@ def _decision_function(self, X):
         else:
             return super()._decision_function(X)
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 ###############################################################################
 # Lasso model
@@ -2078,6 +2083,7 @@ def _is_multitask(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.target_tags.multi_output = False
         return tags
 
@@ -2359,6 +2365,7 @@ def _is_multitask(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.target_tags.multi_output = False
         return tags
 
@@ -2654,6 +2661,7 @@ def fit(self, X, y):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = False
         tags.target_tags.multi_output = True
         tags.target_tags.single_output = False
         return tags
diff --git a/sklearn/linear_model/_glm/glm.py b/sklearn/linear_model/_glm/glm.py
index 093a813f60550..fc31f9825d2e5 100644
--- a/sklearn/linear_model/_glm/glm.py
+++ b/sklearn/linear_model/_glm/glm.py
@@ -442,6 +442,7 @@ def score(self, X, y, sample_weight=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         try:
             # Create instance of BaseLoss if fit wasn't called yet. This is necessary as
             # TweedieRegressor might set the used loss during fit different from
diff --git a/sklearn/linear_model/_huber.py b/sklearn/linear_model/_huber.py
index 9e41cc4eae3b5..81fdfa51ead51 100644
--- a/sklearn/linear_model/_huber.py
+++ b/sklearn/linear_model/_huber.py
@@ -351,3 +351,8 @@ def fit(self, X, y, sample_weight=None):
         residual = np.abs(y - safe_sparse_dot(X, self.coef_) - self.intercept_)
         self.outliers_ = residual > self.scale_ * self.epsilon
         return self
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
diff --git a/sklearn/linear_model/_logistic.py b/sklearn/linear_model/_logistic.py
index fe5ee918066fa..014fae4287108 100644
--- a/sklearn/linear_model/_logistic.py
+++ b/sklearn/linear_model/_logistic.py
@@ -1459,6 +1459,7 @@ def predict_log_proba(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags._xfail_checks.update(
             {
                 "check_non_transformer_estimators_n_iter": (
@@ -2285,3 +2286,8 @@ def _get_scorer(self):
         """
         scoring = self.scoring or "accuracy"
         return get_scorer(scoring)
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
diff --git a/sklearn/linear_model/_quantile.py b/sklearn/linear_model/_quantile.py
index 883a41558f2f7..446d232958e8d 100644
--- a/sklearn/linear_model/_quantile.py
+++ b/sklearn/linear_model/_quantile.py
@@ -294,3 +294,8 @@ def fit(self, X, y, sample_weight=None):
             self.coef_ = params
             self.intercept_ = 0.0
         return self
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
index 8b5b34317f5eb..18f061622cf54 100644
--- a/sklearn/linear_model/_ransac.py
+++ b/sklearn/linear_model/_ransac.py
@@ -723,6 +723,7 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: replace by a statistical test, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 2b7b3708354e3..913f3a6cecce4 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1251,6 +1251,10 @@ def fit(self, X, y, sample_weight=None):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.array_api_support = True
+        reject_sparse = (self.solver == "svd") or (
+            self.solver == "cholesky" and self.fit_intercept
+        )
+        tags.input_tags.sparse = not reject_sparse
         tags._xfail_checks.update(
             {
                 "check_non_transformer_estimators_n_iter": (
@@ -1577,6 +1581,10 @@ def fit(self, X, y, sample_weight=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        reject_sparse = (self.solver == "svd") or (
+            self.solver == "cholesky" and self.fit_intercept
+        )
+        tags.input_tags.sparse = not reject_sparse
         tags._xfail_checks.update(
             {
                 "check_non_transformer_estimators_n_iter": (
@@ -2550,6 +2558,11 @@ def _get_scorer(self):
     def cv_values_(self):
         return self.cv_results_
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class RidgeCV(MultiOutputMixin, RegressorMixin, _BaseRidgeCV):
     """Ridge regression with built-in cross-validation.
diff --git a/sklearn/linear_model/_stochastic_gradient.py b/sklearn/linear_model/_stochastic_gradient.py
index 4d924a1ad00a6..d86ef9e1ef000 100644
--- a/sklearn/linear_model/_stochastic_gradient.py
+++ b/sklearn/linear_model/_stochastic_gradient.py
@@ -941,6 +941,11 @@ def fit(self, X, y, coef_init=None, intercept_init=None, sample_weight=None):
             sample_weight=sample_weight,
         )
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class SGDClassifier(BaseSGDClassifier):
     """Linear classifiers (SVM, logistic regression, etc.) with SGD training.
@@ -1782,6 +1787,11 @@ def _fit_regressor(
         else:
             self.intercept_ = np.atleast_1d(intercept)
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class SGDRegressor(BaseSGDRegressor):
     """Linear model fitted by minimizing a regularized empirical loss with SGD.
@@ -2656,6 +2666,7 @@ def predict(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: replace by a statistical test, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
diff --git a/sklearn/manifold/_isomap.py b/sklearn/manifold/_isomap.py
index ee302bc07b384..90154470c18a4 100644
--- a/sklearn/manifold/_isomap.py
+++ b/sklearn/manifold/_isomap.py
@@ -438,4 +438,5 @@ def transform(self, X):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        tags.input_tags.sparse = True
         return tags
diff --git a/sklearn/manifold/_spectral_embedding.py b/sklearn/manifold/_spectral_embedding.py
index ebd5d7c5b651b..d3d45ec0773c3 100644
--- a/sklearn/manifold/_spectral_embedding.py
+++ b/sklearn/manifold/_spectral_embedding.py
@@ -650,6 +650,7 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.input_tags.pairwise = self.affinity in [
             "precomputed",
             "precomputed_nearest_neighbors",
diff --git a/sklearn/model_selection/_classification_threshold.py b/sklearn/model_selection/_classification_threshold.py
index 56bc26299a442..2790cf8f5a4ad 100644
--- a/sklearn/model_selection/_classification_threshold.py
+++ b/sklearn/model_selection/_classification_threshold.py
@@ -22,7 +22,7 @@
     _CurveScorer,
     _threshold_scores_to_class_labels,
 )
-from ..utils import _safe_indexing
+from ..utils import _safe_indexing, get_tags
 from ..utils._param_validation import HasMethods, Interval, RealNotInt, StrOptions
 from ..utils._response import _get_response_values_binary
 from ..utils.metadata_routing import (
@@ -212,6 +212,7 @@ def decision_function(self, X):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.classifier_tags.multi_class = False
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         tags._xfail_checks = {
             "check_classifiers_train": "Threshold at probability 0.5 does not hold",
             "check_sample_weight_equivalence": (
diff --git a/sklearn/model_selection/_search.py b/sklearn/model_selection/_search.py
index 2935f7ce2465c..b7670d7c304d0 100644
--- a/sklearn/model_selection/_search.py
+++ b/sklearn/model_selection/_search.py
@@ -483,6 +483,7 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         # allows cross-validation to see 'precomputed' metrics
         tags.input_tags.pairwise = get_tags(self.estimator).input_tags.pairwise
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         tags._xfail_checks = {
             "check_supervised_y_2d": "DataConversionWarning not caught",
             "check_requires_y_none": "Doesn't fail gracefully",
diff --git a/sklearn/multiclass.py b/sklearn/multiclass.py
index dca055ecbfb4a..1ddb36ca4fa8f 100644
--- a/sklearn/multiclass.py
+++ b/sklearn/multiclass.py
@@ -601,6 +601,7 @@ def __sklearn_tags__(self):
         """Indicate if wrapped estimator is using a precomputed Gram matrix"""
         tags = super().__sklearn_tags__()
         tags.input_tags.pairwise = get_tags(self.estimator).input_tags.pairwise
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         return tags
 
     def get_metadata_routing(self):
@@ -1004,6 +1005,7 @@ def __sklearn_tags__(self):
         """Indicate if wrapped estimator is using a precomputed Gram matrix"""
         tags = super().__sklearn_tags__()
         tags.input_tags.pairwise = get_tags(self.estimator).input_tags.pairwise
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         return tags
 
     def get_metadata_routing(self):
@@ -1276,3 +1278,8 @@ def get_metadata_routing(self):
             method_mapping=MethodMapping().add(caller="fit", callee="fit"),
         )
         return router
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
+        return tags
diff --git a/sklearn/multioutput.py b/sklearn/multioutput.py
index ebcd73e95d881..38b6eb4a7e0ec 100644
--- a/sklearn/multioutput.py
+++ b/sklearn/multioutput.py
@@ -25,7 +25,7 @@
     is_classifier,
 )
 from .model_selection import cross_val_predict
-from .utils import Bunch, check_random_state
+from .utils import Bunch, check_random_state, get_tags
 from .utils._param_validation import HasMethods, StrOptions
 from .utils._response import _get_response_values
 from .utils._user_interface import _print_elapsed_time
@@ -311,6 +311,7 @@ def predict(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         tags.target_tags.single_output = False
         tags.target_tags.multi_output = True
         return tags
@@ -829,6 +830,11 @@ def predict(self, X):
         """
         return self._get_predictions(X, output_method="predict")
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = get_tags(self.base_estimator).input_tags.sparse
+        return tags
+
 
 class ClassifierChain(MetaEstimatorMixin, ClassifierMixin, _BaseChain):
     """A multi-label model that arranges binary classifiers into a chain.
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index fa99448f9d347..62cf52c4c2e49 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -880,6 +880,7 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.input_tags.positive_only = True
         return tags
 
@@ -1028,6 +1029,7 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.input_tags.positive_only = True
         return tags
 
@@ -1227,6 +1229,11 @@ def _joint_log_likelihood(self, X):
 
         return jll
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class CategoricalNB(_BaseDiscreteNB):
     """Naive Bayes classifier for categorical features.
@@ -1432,6 +1439,7 @@ def partial_fit(self, X, y, classes=None, sample_weight=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = False
         tags.input_tags.positive_only = True
         # TODO: fix sample_weight handling of this estimator, see meta-issue #16298
         tags._xfail_checks = {
diff --git a/sklearn/neighbors/_base.py b/sklearn/neighbors/_base.py
index 1925e0dbc758c..20d16bfa57878 100644
--- a/sklearn/neighbors/_base.py
+++ b/sklearn/neighbors/_base.py
@@ -693,6 +693,7 @@ def _fit(self, X, y=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # For cross-validation routines to split data correctly
         tags.input_tags.pairwise = self.metric == "precomputed"
         return tags
diff --git a/sklearn/neighbors/_nearest_centroid.py b/sklearn/neighbors/_nearest_centroid.py
index cb8d1dbf7107f..d52f6d128d052 100644
--- a/sklearn/neighbors/_nearest_centroid.py
+++ b/sklearn/neighbors/_nearest_centroid.py
@@ -332,3 +332,8 @@ def _check_euclidean_metric(self):
     predict_log_proba = available_if(_check_euclidean_metric)(
         DiscriminantAnalysisPredictionMixin.predict_log_proba
     )
+
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
diff --git a/sklearn/neural_network/_multilayer_perceptron.py b/sklearn/neural_network/_multilayer_perceptron.py
index 196203ce46763..47805857b5154 100644
--- a/sklearn/neural_network/_multilayer_perceptron.py
+++ b/sklearn/neural_network/_multilayer_perceptron.py
@@ -771,6 +771,11 @@ def _score_with_function(self, X, y, score_function):
 
         return score_function(y, y_pred)
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class MLPClassifier(ClassifierMixin, BaseMultilayerPerceptron):
     """Multi-layer Perceptron classifier.
diff --git a/sklearn/neural_network/_rbm.py b/sklearn/neural_network/_rbm.py
index 49848e9f982cc..019fc3af83ef0 100644
--- a/sklearn/neural_network/_rbm.py
+++ b/sklearn/neural_network/_rbm.py
@@ -448,5 +448,6 @@ def __sklearn_tags__(self):
                 "fails for the score_samples method"
             ),
         }
+        tags.input_tags.sparse = True
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         return tags
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 90a62d9e4e8ab..3c7b85c8abc9a 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -1060,6 +1060,7 @@ def __sklearn_tags__(self):
 
         try:
             tags.input_tags.pairwise = get_tags(self.steps[0][1]).input_tags.pairwise
+            tags.input_tags.sparse = get_tags(self.steps[0][1]).input_tags.sparse
         except (ValueError, AttributeError, TypeError):
             # This happens when the `steps` is not a list of (name, estimator)
             # tuples and `fit` is not called yet to validate the steps.
diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 74ea7431a5d72..8d428fe50c7f8 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -1130,6 +1130,7 @@ def inverse_transform(self, X, copy=None):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
+        tags.input_tags.sparse = not self.with_mean
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         return tags
 
@@ -1363,6 +1364,7 @@ def inverse_transform(self, X):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.allow_nan = True
+        tags.input_tags.sparse = True
         return tags
 
 
@@ -2136,6 +2138,7 @@ def transform(self, X, copy=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.requires_fit = False
         tags.array_api_support = True
         return tags
@@ -2343,6 +2346,7 @@ def transform(self, X, copy=None):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.requires_fit = False
+        tags.input_tags.sparse = True
         return tags
 
 
@@ -3009,6 +3013,7 @@ def inverse_transform(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.input_tags.allow_nan = True
         return tags
 
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index 02379273e302e..54166fdacb2ad 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -394,6 +394,7 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.no_validation = not self.validate
         tags.requires_fit = False
+        tags.input_tags.sparse = True
         return tags
 
     def set_output(self, *, transform=None):
diff --git a/sklearn/preprocessing/_polynomial.py b/sklearn/preprocessing/_polynomial.py
index 5a3239f113024..7ec9953680331 100644
--- a/sklearn/preprocessing/_polynomial.py
+++ b/sklearn/preprocessing/_polynomial.py
@@ -585,6 +585,11 @@ def transform(self, X):
                 XP = Xout
         return XP
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class SplineTransformer(TransformerMixin, BaseEstimator):
     """Generate univariate B-spline bases for features.
diff --git a/sklearn/random_projection.py b/sklearn/random_projection.py
index 804bd1088d70a..bec365616da7b 100644
--- a/sklearn/random_projection.py
+++ b/sklearn/random_projection.py
@@ -463,6 +463,7 @@ def inverse_transform(self, X):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
+        tags.input_tags.sparse = True
         return tags
 
 
diff --git a/sklearn/semi_supervised/_label_propagation.py b/sklearn/semi_supervised/_label_propagation.py
index a2e25277cf450..3eaee712b93a4 100644
--- a/sklearn/semi_supervised/_label_propagation.py
+++ b/sklearn/semi_supervised/_label_propagation.py
@@ -336,6 +336,11 @@ def fit(self, X, y):
         self.transduction_ = transduction.ravel()
         return self
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class LabelPropagation(BaseLabelPropagation):
     """Label Propagation classifier.
diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
index 3e1709adaa267..32eb36c83b4c0 100644
--- a/sklearn/semi_supervised/_self_training.py
+++ b/sklearn/semi_supervised/_self_training.py
@@ -5,7 +5,7 @@
 import numpy as np
 
 from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone
-from ..utils import Bunch, safe_mask
+from ..utils import Bunch, get_tags, safe_mask
 from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions
 from ..utils.metadata_routing import (
     MetadataRouter,
@@ -635,6 +635,7 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         tags._xfail_checks.update(
             {"check_non_transformer_estimators_n_iter": "n_iter_ can be 0."}
         )
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index f4e4aa118c069..91e1a06d035e7 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -351,6 +351,7 @@ def fit(self, X, y, sample_weight=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: replace by a statistical test when _dual=True, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
@@ -615,6 +616,7 @@ def fit(self, X, y, sample_weight=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: replace by a statistical test, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
@@ -902,6 +904,7 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = self.kernel != "precomputed"
         tags._xfail_checks = {
             # TODO: fix sample_weight handling of this estimator when probability=False
             # TODO: replace by a statistical test when probability=True
@@ -1177,6 +1180,7 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = self.kernel != "precomputed"
         tags._xfail_checks = {
             "check_methods_subset_invariance": (
                 "fails for the decision_function method"
@@ -1388,6 +1392,7 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = self.kernel != "precomputed"
         # TODO: fix sample_weight handling of this estimator, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
@@ -1583,6 +1588,7 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = self.kernel != "precomputed"
         # TODO: fix sample_weight handling of this estimator, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
@@ -1850,6 +1856,7 @@ def predict(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         # TODO: fix sample_weight handling of this estimator, see meta-issue #16298
         tags._xfail_checks = {
             "check_sample_weight_equivalence": (
diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index 93246a1376e85..f94dd70aefe1c 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -1100,6 +1100,7 @@ def __sklearn_tags__(self):
         }
         tags.classifier_tags.multi_label = True
         tags.input_tags.allow_nan = allow_nan
+        tags.input_tags.sparse = True
         return tags
 
 
@@ -1442,6 +1443,7 @@ def __sklearn_tags__(self):
             "poisson",
         }
         tags.input_tags.allow_nan = allow_nan
+        tags.input_tags.sparse = True
         return tags
 
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 728fd71844118..1b6e7e79e4b7e 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -136,6 +136,7 @@ def _yield_checks(estimator):
     if hasattr(estimator, "sparsify"):
         yield check_sparsify_coefficients
 
+    yield check_estimator_sparse_tag
     yield check_estimator_sparse_array
     yield check_estimator_sparse_matrix
 
@@ -871,6 +872,41 @@ def check_array_api_input_and_values(
     )
 
 
+def check_estimator_sparse_tag(name, estimator_orig):
+    if SPARSE_ARRAY_PRESENT:
+        sparse_container = sparse.csr_array
+    else:
+        sparse_container = sparse.csr_matrix
+    estimator = clone(estimator_orig)
+
+    rng = np.random.RandomState(0)
+    n_samples = 15 if name == "SpectralCoclustering" else 40
+    X = rng.uniform(size=(n_samples, 3))
+    X[X < 0.6] = 0
+    y = rng.randint(0, 3, size=n_samples)
+    X = _enforce_estimator_tags_X(estimator, X)
+    y = _enforce_estimator_tags_y(estimator, y)
+    X = sparse_container(X)
+
+    tags = get_tags(estimator)
+    if tags.input_tags.sparse:
+        estimator.fit(X, y)  # should pass
+        return
+    else:
+        err_msg = (
+            f"Estimator {name} has input_tags.sparse=False "
+            "but didn't raise an error when fitted on sparse data."
+        )
+        with raises(
+            (TypeError, ValueError),
+            match=["sparse", "Sparse"],
+            may_pass=False,
+            err_msg=err_msg,
+        ):
+            estimator.fit(X, y)  # should fail
+        return
+
+
 def _check_estimator_sparse_container(name, estimator_orig, sparse_type):
     rng = np.random.RandomState(0)
     X = rng.uniform(size=(40, 3))

From b0e605d127cb4f6c8d15cb0e177740104bcb3d2f Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Mon, 4 Nov 2024 10:19:13 +0100
Subject: [PATCH 02/28] fix LinearRegression tag

---
 sklearn/linear_model/_base.py                    | 2 +-
 sklearn/utils/_test_common/instance_generator.py | 3 +++
 sklearn/utils/tests/test_estimator_checks.py     | 7 +++++--
 3 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/sklearn/linear_model/_base.py b/sklearn/linear_model/_base.py
index 7be8188981455..bb71cbe9ed550 100644
--- a/sklearn/linear_model/_base.py
+++ b/sklearn/linear_model/_base.py
@@ -689,7 +689,7 @@ def rmatvec(b):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
+        tags.input_tags.sparse = not self.positive
         return tags
 
 
diff --git a/sklearn/utils/_test_common/instance_generator.py b/sklearn/utils/_test_common/instance_generator.py
index 846c132aa0feb..d60c5786a3025 100644
--- a/sklearn/utils/_test_common/instance_generator.py
+++ b/sklearn/utils/_test_common/instance_generator.py
@@ -533,6 +533,9 @@
         "check_dict_unchanged": dict(batch_size=10, max_iter=5, n_components=1)
     },
     LinearDiscriminantAnalysis: {"check_dict_unchanged": dict(n_components=1)},
+    LinearRegression: {
+        "check_estimator_sparse_tag": [dict(positive=False), dict(positive=True)]
+    },
     LocallyLinearEmbedding: {"check_dict_unchanged": dict(max_iter=5, n_components=1)},
     LogisticRegression: {
         "check_sample_weight_equivalence": [
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 29611a853938f..7d604eb0c61c1 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -495,7 +495,8 @@ def __sklearn_tags__(self):
 
 class RequiresPositiveXRegressor(LinearRegression):
     def fit(self, X, y):
-        X, y = validate_data(self, X, y, multi_output=True)
+        # reject sparse X to be able to call (X < 0).any()
+        X, y = validate_data(self, X, y, accept_sparse=False, multi_output=True)
         if (X < 0).any():
             raise ValueError("negative X values not supported!")
         return super().fit(X, y)
@@ -503,12 +504,14 @@ def fit(self, X, y):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.positive_only = True
+        # reject sparse X to be able to call (X < 0).any()
+        tags.input_tags.sparse = False
         return tags
 
 
 class RequiresPositiveYRegressor(LinearRegression):
     def fit(self, X, y):
-        X, y = validate_data(self, X, y, multi_output=True)
+        X, y = validate_data(self, X, y, accept_sparse=True, multi_output=True)
         if (y <= 0).any():
             raise ValueError("negative y values not supported!")
         return super().fit(X, y)

From 6c72527502bf89c716cf9cf24020416b486fc28e Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Mon, 4 Nov 2024 10:34:08 +0100
Subject: [PATCH 03/28] changelog

---
 doc/whats_new/upcoming_changes/sklearn.utils/30187.api.rst | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 doc/whats_new/upcoming_changes/sklearn.utils/30187.api.rst

diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/30187.api.rst b/doc/whats_new/upcoming_changes/sklearn.utils/30187.api.rst
new file mode 100644
index 0000000000000..46b5dfa96267a
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/sklearn.utils/30187.api.rst
@@ -0,0 +1,4 @@
+- :func:`utils.estimator_checks.check_estimator_sparse_tag` ensures that
+  the estimator tag `input_tags.sparse` is consistent with its `fit`
+  method (acceting sparse input `X` or raising an error).
+  By :user:`Antoine Baker <antoinebaker>`

From c3065930002b5ef8a36a1c6f45a40a7fe47180c5 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Mon, 4 Nov 2024 16:13:02 +0100
Subject: [PATCH 04/28] changelog

---
 .../sklearn.utils/{30187.api.rst => 30187.fix.rst}                | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename doc/whats_new/upcoming_changes/sklearn.utils/{30187.api.rst => 30187.fix.rst} (100%)

diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/30187.api.rst b/doc/whats_new/upcoming_changes/sklearn.utils/30187.fix.rst
similarity index 100%
rename from doc/whats_new/upcoming_changes/sklearn.utils/30187.api.rst
rename to doc/whats_new/upcoming_changes/sklearn.utils/30187.fix.rst

From 6aadc95b3eec13df3ac7793712ed7fb79caa1939 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Thu, 14 Nov 2024 16:49:32 +0100
Subject: [PATCH 05/28] fix column transformer tag

---
 sklearn/compose/_column_transformer.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index f9f6419310a6d..c73c5befa6eb2 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -29,6 +29,7 @@
     _get_output_config,
     _safe_set_output,
 )
+from ..utils._tags import get_tags
 from ..utils.metadata_routing import (
     MetadataRouter,
     MethodMapping,
@@ -1322,6 +1323,10 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = all(
+            get_tags(trans).input_tags.sparse if trans != "drop" else True
+            for name, trans, _ in self.transformers
+        )
         tags._xfail_checks = {
             "check_estimators_empty_data_messages": "FIXME",
             "check_estimators_nan_inf": "FIXME",

From 7979fa9a8b74b9bdadbebd4c4ea9f004d94886a2 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 15 Nov 2024 10:40:44 +0100
Subject: [PATCH 06/28] change error message

---
 sklearn/utils/estimator_checks.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 1b6e7e79e4b7e..f56095eecb819 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -891,11 +891,10 @@ def check_estimator_sparse_tag(name, estimator_orig):
     tags = get_tags(estimator)
     if tags.input_tags.sparse:
         estimator.fit(X, y)  # should pass
-        return
     else:
         err_msg = (
-            f"Estimator {name} has input_tags.sparse=False "
-            "but didn't raise an error when fitted on sparse data."
+            f"Estimator {name} with input_tags.sparse=False doesn't "
+            "fail gracefully when fitted on sparse data."
         )
         with raises(
             (TypeError, ValueError),
@@ -904,7 +903,6 @@ def check_estimator_sparse_tag(name, estimator_orig):
             err_msg=err_msg,
         ):
             estimator.fit(X, y)  # should fail
-        return
 
 
 def _check_estimator_sparse_container(name, estimator_orig, sparse_type):

From 6d7c2b1a07bb4bc4134841258ffcfad70a9ff62a Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 15 Nov 2024 14:57:09 +0100
Subject: [PATCH 07/28] changelog

---
 doc/whats_new/upcoming_changes/changed-models/30187.fix.rst    | 3 +++
 .../sklearn.utils/{30187.fix.rst => 30187.enhancement.rst}     | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 doc/whats_new/upcoming_changes/changed-models/30187.fix.rst
 rename doc/whats_new/upcoming_changes/sklearn.utils/{30187.fix.rst => 30187.enhancement.rst} (72%)

diff --git a/doc/whats_new/upcoming_changes/changed-models/30187.fix.rst b/doc/whats_new/upcoming_changes/changed-models/30187.fix.rst
new file mode 100644
index 0000000000000..0b662db8f8091
--- /dev/null
+++ b/doc/whats_new/upcoming_changes/changed-models/30187.fix.rst
@@ -0,0 +1,3 @@
+- The `tags.input_tags.sparse` flag was corrected for a
+  majority of estimators.
+  By :user:`Antoine Baker <antoinebaker>`
diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/30187.fix.rst b/doc/whats_new/upcoming_changes/sklearn.utils/30187.enhancement.rst
similarity index 72%
rename from doc/whats_new/upcoming_changes/sklearn.utils/30187.fix.rst
rename to doc/whats_new/upcoming_changes/sklearn.utils/30187.enhancement.rst
index 46b5dfa96267a..0a7bf5b06e8df 100644
--- a/doc/whats_new/upcoming_changes/sklearn.utils/30187.fix.rst
+++ b/doc/whats_new/upcoming_changes/sklearn.utils/30187.enhancement.rst
@@ -1,4 +1,4 @@
 - :func:`utils.estimator_checks.check_estimator_sparse_tag` ensures that
   the estimator tag `input_tags.sparse` is consistent with its `fit`
-  method (acceting sparse input `X` or raising an error).
+  method (acceting sparse input `X` or raising the appropriate error).
   By :user:`Antoine Baker <antoinebaker>`

From 7005797170f23441585cff0a7e184ef6e911c3c1 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 15 Nov 2024 18:19:44 +0100
Subject: [PATCH 08/28] fix passthrough sparse tag

---
 sklearn/compose/_column_transformer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index ba914ce7821f6..33a0b1bea2a10 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -1319,8 +1319,9 @@ def get_metadata_routing(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.sparse = all(
-            get_tags(trans).input_tags.sparse if trans != "drop" else True
+            get_tags(trans).input_tags.sparse
             for name, trans, _ in self.transformers
+            if trans not in {"passthrough", "drop"}
         )
         return tags
 

From 6962aa9344465c604bf5566cc98e7a50bb370794 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 15 Nov 2024 18:34:28 +0100
Subject: [PATCH 09/28] fix SelfTrainingClassifier

---
 sklearn/semi_supervised/_self_training.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
index b28a6bd88305e..2f7391cb4f76f 100644
--- a/sklearn/semi_supervised/_self_training.py
+++ b/sklearn/semi_supervised/_self_training.py
@@ -616,5 +616,6 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
+        if self.estimator is not None:
+            tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         return tags

From 2668fea2ff907a4dbc54692ac7efefdf02da6944 Mon Sep 17 00:00:00 2001
From: antoinebaker <antoinebaker@users.noreply.github.com>
Date: Fri, 15 Nov 2024 19:53:40 +0100
Subject: [PATCH 10/28] Apply suggestions from code review

Co-authored-by: Guillaume Lemaitre <guillaume@probabl.ai>
---
 .../upcoming_changes/changed-models/30187.fix.rst    |  3 +--
 .../sklearn.utils/30187.enhancement.rst              |  2 +-
 sklearn/cluster/_affinity_propagation.py             |  2 +-
 sklearn/decomposition/_pca.py                        |  2 +-
 sklearn/preprocessing/_function_transformer.py       |  2 +-
 sklearn/utils/estimator_checks.py                    | 12 +++++++++++-
 6 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/doc/whats_new/upcoming_changes/changed-models/30187.fix.rst b/doc/whats_new/upcoming_changes/changed-models/30187.fix.rst
index 0b662db8f8091..001b8840d9a7b 100644
--- a/doc/whats_new/upcoming_changes/changed-models/30187.fix.rst
+++ b/doc/whats_new/upcoming_changes/changed-models/30187.fix.rst
@@ -1,3 +1,2 @@
-- The `tags.input_tags.sparse` flag was corrected for a
-  majority of estimators.
+- The `tags.input_tags.sparse` flag was corrected for a majority of estimators.
   By :user:`Antoine Baker <antoinebaker>`
diff --git a/doc/whats_new/upcoming_changes/sklearn.utils/30187.enhancement.rst b/doc/whats_new/upcoming_changes/sklearn.utils/30187.enhancement.rst
index 0a7bf5b06e8df..de75f70cb552e 100644
--- a/doc/whats_new/upcoming_changes/sklearn.utils/30187.enhancement.rst
+++ b/doc/whats_new/upcoming_changes/sklearn.utils/30187.enhancement.rst
@@ -1,4 +1,4 @@
 - :func:`utils.estimator_checks.check_estimator_sparse_tag` ensures that
   the estimator tag `input_tags.sparse` is consistent with its `fit`
-  method (acceting sparse input `X` or raising the appropriate error).
+  method (accepting sparse input `X` or raising the appropriate error).
   By :user:`Antoine Baker <antoinebaker>`
diff --git a/sklearn/cluster/_affinity_propagation.py b/sklearn/cluster/_affinity_propagation.py
index 76b5f27a21af4..e5cb501984762 100644
--- a/sklearn/cluster/_affinity_propagation.py
+++ b/sklearn/cluster/_affinity_propagation.py
@@ -483,7 +483,7 @@ def __init__(
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.input_tags.pairwise = self.affinity == "precomputed"
-        tags.input_tags.sparse = True
+        tags.input_tags.sparse = self.affinity != "precomputed"
         return tags
 
     @_fit_context(prefer_skip_nested_validation=True)
diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index ba5ca4dd4c680..405f9e8c92c85 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -851,5 +851,5 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         tags.array_api_support = True
-        tags.input_tags.sparse = True
+        tags.input_tags.sparse = self.svd_solver in ("auto", "arpack", "covariance_eigh")
         return tags
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index 54166fdacb2ad..df84f5333357c 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -394,7 +394,7 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.no_validation = not self.validate
         tags.requires_fit = False
-        tags.input_tags.sparse = True
+        tags.input_tags.sparse = not self.validate or (self.validate and self.accept_sparse)
         return tags
 
     def set_output(self, *, transform=None):
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index c963040f1ff55..3ec4a795c2f5c 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1194,6 +1194,7 @@ def check_array_api_input_and_values(
 
 
 def check_estimator_sparse_tag(name, estimator_orig):
+    """Check that estimator tag related with accepting sparse data is properly set."""
     if SPARSE_ARRAY_PRESENT:
         sparse_container = sparse.csr_array
     else:
@@ -1211,7 +1212,16 @@ def check_estimator_sparse_tag(name, estimator_orig):
 
     tags = get_tags(estimator)
     if tags.input_tags.sparse:
-        estimator.fit(X, y)  # should pass
+        try:
+            estimator.fit(X, y)  # should pass
+        except Exception as e:
+            raise AssertionError(
+                f"Estimator {name} raised an exception: {e}. The tag "
+                "self.input_tags.sparse_tag might not be consistent with the "
+                "estimator's ability to handle sparse data (i.e. controlled by the "
+                "parameter `accept_sparse` in `validate_data` or `check_array` "
+                f"functions). Got input_tags.sparse={tags.input_tags.sparse}."
+            )
     else:
         err_msg = (
             f"Estimator {name} with input_tags.sparse=False doesn't "

From 705c41443f22b0e535859909d711b6cc410c5276 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 15 Nov 2024 19:55:46 +0100
Subject: [PATCH 11/28] add suggestions

---
 sklearn/decomposition/_incremental_pca.py   |  1 +
 sklearn/ensemble/_gb.py                     | 15 +++++--------
 sklearn/ensemble/_weight_boosting.py        | 15 +++++--------
 sklearn/linear_model/_coordinate_descent.py |  7 ++++--
 sklearn/naive_bayes.py                      |  8 +------
 sklearn/semi_supervised/_self_training.py   | 10 ++++++---
 sklearn/svm/_base.py                        |  6 +++++
 sklearn/svm/_classes.py                     | 25 ---------------------
 sklearn/tree/_classes.py                    |  7 ++++--
 9 files changed, 35 insertions(+), 59 deletions(-)

diff --git a/sklearn/decomposition/_incremental_pca.py b/sklearn/decomposition/_incremental_pca.py
index a8ea2dcd558aa..da617ef8fa787 100644
--- a/sklearn/decomposition/_incremental_pca.py
+++ b/sklearn/decomposition/_incremental_pca.py
@@ -421,5 +421,6 @@ def transform(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        # Beware that fit accepts sparse data but partial_fit doesn't
         tags.input_tags.sparse = True
         return tags
diff --git a/sklearn/ensemble/_gb.py b/sklearn/ensemble/_gb.py
index 34282db589ff9..fded8a535413d 100644
--- a/sklearn/ensemble/_gb.py
+++ b/sklearn/ensemble/_gb.py
@@ -1117,6 +1117,11 @@ def apply(self, X):
 
         return leaves
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class GradientBoostingClassifier(ClassifierMixin, BaseGradientBoosting):
     """Gradient Boosting for classification.
@@ -1725,11 +1730,6 @@ def staged_predict_proba(self, X):
                 "loss=%r does not support predict_proba" % self.loss
             ) from e
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
-        return tags
-
 
 class GradientBoostingRegressor(RegressorMixin, BaseGradientBoosting):
     """Gradient Boosting for regression.
@@ -2190,8 +2190,3 @@ def apply(self, X):
         leaves = super().apply(X)
         leaves = leaves.reshape(X.shape[0], self.estimators_.shape[0])
         return leaves
-
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
-        return tags
diff --git a/sklearn/ensemble/_weight_boosting.py b/sklearn/ensemble/_weight_boosting.py
index 065c7204b0a91..8503c4fdb8ae7 100644
--- a/sklearn/ensemble/_weight_boosting.py
+++ b/sklearn/ensemble/_weight_boosting.py
@@ -312,6 +312,11 @@ def feature_importances_(self):
                 "feature_importances_ attribute"
             ) from e
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 def _samme_proba(estimator, n_classes, X):
     """Calculate algorithm 4, step 2, equation c) of Zhu et al [1].
@@ -858,11 +863,6 @@ def predict_log_proba(self, X):
         """
         return np.log(self.predict_proba(X))
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
-        return tags
-
 
 class AdaBoostRegressor(_RoutingNotSupportedMixin, RegressorMixin, BaseWeightBoosting):
     """An AdaBoost regressor.
@@ -1171,8 +1171,3 @@ def staged_predict(self, X):
 
         for i, _ in enumerate(self.estimators_, 1):
             yield self._get_median_predict(X, limit=i)
-
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
-        return tags
diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
index b8430fe00b862..cfc2a1832b887 100644
--- a/sklearn/linear_model/_coordinate_descent.py
+++ b/sklearn/linear_model/_coordinate_descent.py
@@ -1869,6 +1869,11 @@ def get_metadata_routing(self):
         )
         return router
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 class LassoCV(RegressorMixin, LinearModelCV):
     """Lasso linear model with iterative fitting along a regularization path.
@@ -2083,7 +2088,6 @@ def _is_multitask(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
         tags.target_tags.multi_output = False
         return tags
 
@@ -2365,7 +2369,6 @@ def _is_multitask(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
         tags.target_tags.multi_output = False
         return tags
 
diff --git a/sklearn/naive_bayes.py b/sklearn/naive_bayes.py
index 3418e99e17c3c..0bb2daab25d0b 100644
--- a/sklearn/naive_bayes.py
+++ b/sklearn/naive_bayes.py
@@ -771,6 +771,7 @@ def _init_counters(self, n_classes, n_features):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
         tags.classifier_tags.poor_score = True
         return tags
 
@@ -880,7 +881,6 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
         tags.input_tags.positive_only = True
         return tags
 
@@ -1029,7 +1029,6 @@ def __init__(
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
         tags.input_tags.positive_only = True
         return tags
 
@@ -1229,11 +1228,6 @@ def _joint_log_likelihood(self, X):
 
         return jll
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
-        return tags
-
 
 class CategoricalNB(_BaseDiscreteNB):
     """Naive Bayes classifier for categorical features.
diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
index 2f7391cb4f76f..da5dd85d159d4 100644
--- a/sklearn/semi_supervised/_self_training.py
+++ b/sklearn/semi_supervised/_self_training.py
@@ -4,9 +4,13 @@
 
 import numpy as np
 
-from sklearn.base import ClassifierMixin
-
-from ..base import BaseEstimator, MetaEstimatorMixin, _fit_context, clone
+from ..base import (
+    BaseEstimator,
+    ClassifierMixin,
+    MetaEstimatorMixin,
+    _fit_context,
+    clone,
+)
 from ..utils import Bunch, get_tags, safe_mask
 from ..utils._param_validation import HasMethods, Hidden, Interval, StrOptions
 from ..utils.metadata_routing import (
diff --git a/sklearn/svm/_base.py b/sklearn/svm/_base.py
index 3e5024364df5c..f5b35f39a7daf 100644
--- a/sklearn/svm/_base.py
+++ b/sklearn/svm/_base.py
@@ -147,6 +147,7 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         # Used by cross_val_score.
         tags.input_tags.pairwise = self.kernel == "precomputed"
+        tags.input_tags.sparse = self.kernel != "precomputed"
         return tags
 
     @_fit_context(prefer_skip_nested_validation=True)
@@ -999,6 +1000,11 @@ def probB_(self):
         """
         return self._probB
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = self.kernel != "precomputed"
+        return tags
+
 
 def _get_liblinear_solver_type(multi_class, penalty, loss, dual):
     """Find the liblinear magic number for the solver.
diff --git a/sklearn/svm/_classes.py b/sklearn/svm/_classes.py
index f29f9baa0f139..0eb49a8c0832c 100644
--- a/sklearn/svm/_classes.py
+++ b/sklearn/svm/_classes.py
@@ -887,11 +887,6 @@ def __init__(
             random_state=random_state,
         )
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = self.kernel != "precomputed"
-        return tags
-
 
 class NuSVC(BaseSVC):
     """Nu-Support Vector Classification.
@@ -1155,11 +1150,6 @@ def __init__(
             random_state=random_state,
         )
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = self.kernel != "precomputed"
-        return tags
-
 
 class SVR(RegressorMixin, BaseLibSVM):
     """Epsilon-Support Vector Regression.
@@ -1354,11 +1344,6 @@ def __init__(
             random_state=None,
         )
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = self.kernel != "precomputed"
-        return tags
-
 
 class NuSVR(RegressorMixin, BaseLibSVM):
     """Nu Support Vector Regression.
@@ -1546,11 +1531,6 @@ def __init__(
             random_state=None,
         )
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = self.kernel != "precomputed"
-        return tags
-
 
 class OneClassSVM(OutlierMixin, BaseLibSVM):
     """Unsupervised Outlier Detection.
@@ -1807,8 +1787,3 @@ def predict(self, X):
         """
         y = super().predict(X)
         return np.asarray(y, dtype=np.intp)
-
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
-        return tags
diff --git a/sklearn/tree/_classes.py b/sklearn/tree/_classes.py
index f94dd70aefe1c..646aa7fb034c4 100644
--- a/sklearn/tree/_classes.py
+++ b/sklearn/tree/_classes.py
@@ -690,6 +690,11 @@ def feature_importances_(self):
 
         return self.tree_.compute_feature_importances()
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = True
+        return tags
+
 
 # =============================================================================
 # Public estimators
@@ -1100,7 +1105,6 @@ def __sklearn_tags__(self):
         }
         tags.classifier_tags.multi_label = True
         tags.input_tags.allow_nan = allow_nan
-        tags.input_tags.sparse = True
         return tags
 
 
@@ -1443,7 +1447,6 @@ def __sklearn_tags__(self):
             "poisson",
         }
         tags.input_tags.allow_nan = allow_nan
-        tags.input_tags.sparse = True
         return tags
 
 

From 5178539d5f995c305636e55de6ce14ba8db71a7a Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 15 Nov 2024 20:11:25 +0100
Subject: [PATCH 12/28] fix multitask

---
 sklearn/linear_model/_coordinate_descent.py | 2 +-
 sklearn/utils/estimator_checks.py           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
index cfc2a1832b887..938331bd7f23f 100644
--- a/sklearn/linear_model/_coordinate_descent.py
+++ b/sklearn/linear_model/_coordinate_descent.py
@@ -1871,7 +1871,7 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
+        tags.input_tags.sparse = not self._is_multitask()
         return tags
 
 
diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 3ec4a795c2f5c..5eafe9ab7836a 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1217,7 +1217,7 @@ def check_estimator_sparse_tag(name, estimator_orig):
         except Exception as e:
             raise AssertionError(
                 f"Estimator {name} raised an exception: {e}. The tag "
-                "self.input_tags.sparse_tag might not be consistent with the "
+                "self.input_tags.sparse might not be consistent with the "
                 "estimator's ability to handle sparse data (i.e. controlled by the "
                 "parameter `accept_sparse` in `validate_data` or `check_array` "
                 f"functions). Got input_tags.sparse={tags.input_tags.sparse}."

From e5a4458c184a8ce6eac49f24d4a8e3041b290fac Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 15 Nov 2024 20:19:44 +0100
Subject: [PATCH 13/28] black formatting

---
 sklearn/decomposition/_pca.py                  | 6 +++++-
 sklearn/preprocessing/_function_transformer.py | 4 +++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/sklearn/decomposition/_pca.py b/sklearn/decomposition/_pca.py
index 405f9e8c92c85..f8882a7a6b5d6 100644
--- a/sklearn/decomposition/_pca.py
+++ b/sklearn/decomposition/_pca.py
@@ -851,5 +851,9 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.transformer_tags.preserves_dtype = ["float64", "float32"]
         tags.array_api_support = True
-        tags.input_tags.sparse = self.svd_solver in ("auto", "arpack", "covariance_eigh")
+        tags.input_tags.sparse = self.svd_solver in (
+            "auto",
+            "arpack",
+            "covariance_eigh",
+        )
         return tags
diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index df84f5333357c..e4bb8e55ad62f 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -394,7 +394,9 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.no_validation = not self.validate
         tags.requires_fit = False
-        tags.input_tags.sparse = not self.validate or (self.validate and self.accept_sparse)
+        tags.input_tags.sparse = not self.validate or (
+            self.validate and self.accept_sparse
+        )
         return tags
 
     def set_output(self, *, transform=None):

From d6f277fd74eaaf66aec257029ec7fd57bc6a00ec Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Mon, 18 Nov 2024 11:56:24 +0100
Subject: [PATCH 14/28] add meta test

---
 sklearn/utils/estimator_checks.py            | 35 ++++++++++++------
 sklearn/utils/tests/test_estimator_checks.py | 37 ++++++++++++++++++++
 2 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 5eafe9ab7836a..3699a582e1cfa 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1193,6 +1193,12 @@ def check_array_api_input_and_values(
     )
 
 
+def _is_sparse_input_error(e):
+    if not (isinstance(e, TypeError) or isinstance(e, ValueError)):
+        return False
+    return True if re.search("[Ss]parse", str(e)) else False
+
+
 def check_estimator_sparse_tag(name, estimator_orig):
     """Check that estimator tag related with accepting sparse data is properly set."""
     if SPARSE_ARRAY_PRESENT:
@@ -1223,17 +1229,26 @@ def check_estimator_sparse_tag(name, estimator_orig):
                 f"functions). Got input_tags.sparse={tags.input_tags.sparse}."
             )
     else:
-        err_msg = (
-            f"Estimator {name} with input_tags.sparse=False doesn't "
-            "fail gracefully when fitted on sparse data."
+        try:
+            estimator.fit(X, y)  # should fail with appropriate error
+        except Exception as e:
+            if _is_sparse_input_error(e):
+                return
+            else:
+                raise AssertionError(
+                    f"Estimator {name} raised an exception: {e}. "
+                    "The estimator failed when fitted on sparse data in accordance "
+                    f"with its tag self.input_tags.sparse={tags.input_tags.sparse} "
+                    "but didn't raise the appropriate error : error message should "
+                    "state explicitly that sparse input is not supported if this is "
+                    "not the case, e.g. by using check_array(X, accept_sparse=False)."
+                )
+        raise AssertionError(
+            f"Estimator {name} didn't fail when fitted on sparse data "
+            "but should have according to its tag "
+            f"self.input_tags.sparse={tags.input_tags.sparse}. "
+            f"The tag is inconsistent and must be fixed."
         )
-        with raises(
-            (TypeError, ValueError),
-            match=["sparse", "Sparse"],
-            may_pass=False,
-            err_msg=err_msg,
-        ):
-            estimator.fit(X, y)  # should fail
 
 
 def _check_estimator_sparse_container(name, estimator_orig, sparse_type):
diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 7329277722dfb..d98a1ec1f4b92 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -72,6 +72,7 @@
     check_estimator_repr,
     check_estimator_sparse_array,
     check_estimator_sparse_matrix,
+    check_estimator_sparse_tag,
     check_estimator_tags_renamed,
     check_estimators_nan_inf,
     check_estimators_overwrite_params,
@@ -847,6 +848,42 @@ def test_check_outlier_corruption():
     check_outlier_corruption(1, 2, decision)
 
 
+def test_check_estimator_sparse_tag():
+    """Test that check_estimator_sparse_tag raises error when sparse tag is
+    misaligned."""
+
+    class EstimatorWithSparseConfig(BaseEstimator):
+        def __init__(self, tag_sparse, accept_sparse):
+            self.tag_sparse = tag_sparse
+            self.accept_sparse = accept_sparse
+
+        def fit(self, X, y=None):
+            validate_data(self, X, y, accept_sparse=self.accept_sparse)
+            return self
+
+        def __sklearn_tags__(self):
+            tags = super().__sklearn_tags__()
+            tags.input_tags.sparse = self.tag_sparse
+            return tags
+
+    test_cases = [
+        {"tag_sparse": True, "accept_sparse": True, "error_type": None},
+        {"tag_sparse": False, "accept_sparse": False, "error_type": None},
+        {"tag_sparse": False, "accept_sparse": True, "error_type": AssertionError},
+        {"tag_sparse": True, "accept_sparse": False, "error_type": AssertionError},
+    ]
+
+    for test_case in test_cases:
+        estimator = EstimatorWithSparseConfig(
+            test_case["tag_sparse"], test_case["accept_sparse"]
+        )
+        if test_case["error_type"] is None:
+            check_estimator_sparse_tag(estimator.__class__.__name__, estimator)
+        else:
+            with raises(test_case["error_type"]):
+                check_estimator_sparse_tag(estimator.__class__.__name__, estimator)
+
+
 def test_check_estimator_transformer_no_mixin():
     # check that TransformerMixin is not required for transformer tests to run
     # but it fails since the tag is not set

From 0bcc765a503865df41f77a57497a268fa2b3cee5 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Mon, 18 Nov 2024 14:31:36 +0100
Subject: [PATCH 15/28] add feature union

---
 sklearn/pipeline.py                              |  9 +++++++++
 sklearn/utils/_test_common/instance_generator.py | 12 ++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index f8422ae934d28..edb65e8f55a84 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -2114,6 +2114,15 @@ def get_metadata_routing(self):
 
         return router
 
+    def __sklearn_tags__(self):
+        tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = all(
+            get_tags(trans).input_tags.sparse
+            for name, trans in self.transformer_list
+            if trans not in {"passthrough", "drop"}
+        )
+        return tags
+
 
 def make_union(*transformers, n_jobs=None, verbose=False):
     """Construct a :class:`FeatureUnion` from the given transformers.
diff --git a/sklearn/utils/_test_common/instance_generator.py b/sklearn/utils/_test_common/instance_generator.py
index 7eeb0c848c508..a2e43308d0d0f 100644
--- a/sklearn/utils/_test_common/instance_generator.py
+++ b/sklearn/utils/_test_common/instance_generator.py
@@ -528,6 +528,18 @@
     FactorAnalysis: {"check_dict_unchanged": dict(max_iter=5, n_components=1)},
     FastICA: {"check_dict_unchanged": dict(max_iter=5, n_components=1)},
     FeatureAgglomeration: {"check_dict_unchanged": dict(n_clusters=1)},
+    FeatureUnion: {
+        "check_estimator_sparse_tag": [
+            dict(transformer_list=[("trans1", StandardScaler())]),
+            dict(
+                transformer_list=[
+                    ("trans1", StandardScaler(with_mean=False)),
+                    ("trans2", "drop"),
+                    ("trans3", "passthrough"),
+                ]
+            ),
+        ]
+    },
     GammaRegressor: {
         "check_sample_weight_equivalence": [
             dict(solver="newton-cholesky"),

From d862de777e31b77c81cde419377e585c5cfa178a Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Mon, 18 Nov 2024 14:48:24 +0100
Subject: [PATCH 16/28] check function transformer

---
 sklearn/preprocessing/_function_transformer.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sklearn/preprocessing/_function_transformer.py b/sklearn/preprocessing/_function_transformer.py
index e4bb8e55ad62f..3fc33c59e76bd 100644
--- a/sklearn/preprocessing/_function_transformer.py
+++ b/sklearn/preprocessing/_function_transformer.py
@@ -394,9 +394,7 @@ def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.no_validation = not self.validate
         tags.requires_fit = False
-        tags.input_tags.sparse = not self.validate or (
-            self.validate and self.accept_sparse
-        )
+        tags.input_tags.sparse = not self.validate or self.accept_sparse
         return tags
 
     def set_output(self, *, transform=None):

From 21ec5858049616fb8dfb0a0370fffaeeb979e966 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Wed, 20 Nov 2024 17:45:59 +0100
Subject: [PATCH 17/28] catch invalid transformers list

---
 sklearn/compose/_column_transformer.py | 17 ++++++++++++-----
 sklearn/pipeline.py                    | 17 ++++++++++++-----
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index 33a0b1bea2a10..f347266ae204a 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -1318,11 +1318,18 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = all(
-            get_tags(trans).input_tags.sparse
-            for name, trans, _ in self.transformers
-            if trans not in {"passthrough", "drop"}
-        )
+        try:
+            sparse = all(
+                get_tags(trans).input_tags.sparse
+                for name, trans, _ in self.transformers
+                if trans not in {"passthrough", "drop"}
+            )
+        except Exception:
+            # If `transformers` does not comply with our API (list of tuples)
+            # then it will fail. In this case, we assume that `sparse` is False
+            # but the parameter validation will raise an error during `fit`.
+            sparse = False
+        tags.input_tags.sparse = sparse
         return tags
 
 
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index edb65e8f55a84..e0d0f8184edb9 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -2116,11 +2116,18 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = all(
-            get_tags(trans).input_tags.sparse
-            for name, trans in self.transformer_list
-            if trans not in {"passthrough", "drop"}
-        )
+        try:
+            sparse = all(
+                get_tags(trans).input_tags.sparse
+                for name, trans in self.transformer_list
+                if trans not in {"passthrough", "drop"}
+            )
+        except Exception:
+            # If `transformer_list` does not comply with our API (list of tuples)
+            # then it will fail. In this case, we assume that `sparse` is False
+            # but the parameter validation will raise an error during `fit`.
+            sparse = False
+        tags.input_tags.sparse = sparse
         return tags
 
 

From c6538342ed2057ffa2af810af42728bdaf7a09f4 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Tue, 26 Nov 2024 12:09:29 +0100
Subject: [PATCH 18/28] add todo

---
 sklearn/semi_supervised/_self_training.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
index da5dd85d159d4..d19b57d033040 100644
--- a/sklearn/semi_supervised/_self_training.py
+++ b/sklearn/semi_supervised/_self_training.py
@@ -620,6 +620,7 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        # TODO(1.8): remove together with base_estimator
         if self.estimator is not None:
             tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         return tags

From d9f4de33dcb63aae5c0c0b60fbe852a5805e6409 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Tue, 26 Nov 2024 12:09:55 +0100
Subject: [PATCH 19/28] remove outer function

---
 sklearn/utils/estimator_checks.py | 31 ++++++++++++++-----------------
 1 file changed, 14 insertions(+), 17 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 1d6f27b0a1b65..3c26116b717be 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1197,12 +1197,6 @@ def check_array_api_input_and_values(
     )
 
 
-def _is_sparse_input_error(e):
-    if not (isinstance(e, TypeError) or isinstance(e, ValueError)):
-        return False
-    return True if re.search("[Ss]parse", str(e)) else False
-
-
 def check_estimator_sparse_tag(name, estimator_orig):
     """Check that estimator tag related with accepting sparse data is properly set."""
     if SPARSE_ARRAY_PRESENT:
@@ -1233,20 +1227,23 @@ def check_estimator_sparse_tag(name, estimator_orig):
                 f"functions). Got input_tags.sparse={tags.input_tags.sparse}."
             )
     else:
+        err_msg = (
+            f"Estimator {name} raised an exception. "
+            "The estimator failed when fitted on sparse data in accordance "
+            f"with its tag self.input_tags.sparse={tags.input_tags.sparse} "
+            "but didn't raise the appropriate error : error message should "
+            "state explicitly that sparse input is not supported if this is "
+            "not the case, e.g. by using check_array(X, accept_sparse=False)."
+        )
         try:
             estimator.fit(X, y)  # should fail with appropriate error
-        except Exception as e:
-            if _is_sparse_input_error(e):
+        except (ValueError, TypeError) as e:
+            if re.search("[Ss]parse", str(e)):
+                # Got the right error type and mentioning sparse issue
                 return
-            else:
-                raise AssertionError(
-                    f"Estimator {name} raised an exception: {e}. "
-                    "The estimator failed when fitted on sparse data in accordance "
-                    f"with its tag self.input_tags.sparse={tags.input_tags.sparse} "
-                    "but didn't raise the appropriate error : error message should "
-                    "state explicitly that sparse input is not supported if this is "
-                    "not the case, e.g. by using check_array(X, accept_sparse=False)."
-                )
+            raise AssertionError(err_msg) from e
+        except Exception as e:
+            raise AssertionError(err_msg) from e
         raise AssertionError(
             f"Estimator {name} didn't fail when fitted on sparse data "
             "but should have according to its tag "

From a9fb7d711ce902565cc7c4fd0953a87feb3d1c17 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Tue, 26 Nov 2024 18:05:22 +0100
Subject: [PATCH 20/28] change pipeline tag

---
 sklearn/pipeline.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index e0d0f8184edb9..70b08d28192d9 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -1224,7 +1224,15 @@ def __sklearn_tags__(self):
                 tags.input_tags.pairwise = get_tags(
                     self.steps[0][1]
                 ).input_tags.pairwise
-                tags.input_tags.sparse = get_tags(self.steps[0][1]).input_tags.sparse
+            # WARNING: the sparse tag can be incorrect.
+            # Some Pipelines accepting sparse data are wrongly tagged sparse=False.
+            # For example Pipeline([PCA(), estimator]) accepts sparse data
+            # even if the estimator doesn't as PCA outputs a dense array.
+            tags.input_tags.sparse = all(
+                get_tags(step).input_tags.sparse
+                for name, step in self.steps
+                if step != "passthrough"
+            )
         except (ValueError, AttributeError, TypeError):
             # This happens when the `steps` is not a list of (name, estimator)
             # tuples and `fit` is not called yet to validate the steps.

From e85f94a94da301d54a99a66e31087d8f5b58b7a2 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 29 Nov 2024 16:16:01 +0100
Subject: [PATCH 21/28] tag RobustScaler

---
 sklearn/preprocessing/_data.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sklearn/preprocessing/_data.py b/sklearn/preprocessing/_data.py
index 8d428fe50c7f8..f0d1defe61ca9 100644
--- a/sklearn/preprocessing/_data.py
+++ b/sklearn/preprocessing/_data.py
@@ -1739,6 +1739,7 @@ def inverse_transform(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.input_tags.sparse = not self.with_centering
         tags.input_tags.allow_nan = True
         return tags
 

From 8f2f3dbfaabdd1a2a68741753093a2b03d321def Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 29 Nov 2024 16:30:17 +0100
Subject: [PATCH 22/28] tag RANSAC

---
 sklearn/linear_model/_ransac.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
index a5fa36aa4c468..99b52e5840e32 100644
--- a/sklearn/linear_model/_ransac.py
+++ b/sklearn/linear_model/_ransac.py
@@ -15,7 +15,7 @@
     clone,
 )
 from ..exceptions import ConvergenceWarning
-from ..utils import check_consistent_length, check_random_state
+from ..utils import check_consistent_length, check_random_state, get_tags
 from ..utils._bunch import Bunch
 from ..utils._param_validation import (
     HasMethods,
@@ -724,5 +724,8 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = True
+        if self.estimator is None:
+            tags.input_tags.sparse = True
+        else:
+            tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         return tags

From 3ac38e10c2d0af50261842691e0251a49b932c74 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Fri, 29 Nov 2024 16:51:01 +0100
Subject: [PATCH 23/28] multi_output in LinearModelCV

---
 sklearn/linear_model/_coordinate_descent.py | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/sklearn/linear_model/_coordinate_descent.py b/sklearn/linear_model/_coordinate_descent.py
index 938331bd7f23f..b98cf08925910 100644
--- a/sklearn/linear_model/_coordinate_descent.py
+++ b/sklearn/linear_model/_coordinate_descent.py
@@ -1871,7 +1871,9 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.input_tags.sparse = not self._is_multitask()
+        multitask = self._is_multitask()
+        tags.input_tags.sparse = not multitask
+        tags.target_tags.multi_output = multitask
         return tags
 
 
@@ -2086,11 +2088,6 @@ def _get_estimator(self):
     def _is_multitask(self):
         return False
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.target_tags.multi_output = False
-        return tags
-
     def fit(self, X, y, sample_weight=None, **params):
         """Fit Lasso model with coordinate descent.
 
@@ -2367,11 +2364,6 @@ def _get_estimator(self):
     def _is_multitask(self):
         return False
 
-    def __sklearn_tags__(self):
-        tags = super().__sklearn_tags__()
-        tags.target_tags.multi_output = False
-        return tags
-
     def fit(self, X, y, sample_weight=None, **params):
         """Fit ElasticNet model with coordinate descent.
 
@@ -3035,7 +3027,6 @@ def _is_multitask(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.target_tags.multi_output = True
         tags.target_tags.single_output = False
         return tags
 
@@ -3276,7 +3267,6 @@ def _is_multitask(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        tags.target_tags.multi_output = True
         tags.target_tags.single_output = False
         return tags
 

From 425b4736dab567a29613155dc02bdd9fdd6f4ebf Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Mon, 9 Dec 2024 10:47:09 +0100
Subject: [PATCH 24/28] raise from exception

---
 sklearn/utils/estimator_checks.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/sklearn/utils/estimator_checks.py b/sklearn/utils/estimator_checks.py
index 66333be57ff56..4ffc3143508b3 100644
--- a/sklearn/utils/estimator_checks.py
+++ b/sklearn/utils/estimator_checks.py
@@ -1220,19 +1220,20 @@ def check_estimator_sparse_tag(name, estimator_orig):
         try:
             estimator.fit(X, y)  # should pass
         except Exception as e:
-            raise AssertionError(
-                f"Estimator {name} raised an exception: {e}. The tag "
-                "self.input_tags.sparse might not be consistent with the "
-                "estimator's ability to handle sparse data (i.e. controlled by the "
-                "parameter `accept_sparse` in `validate_data` or `check_array` "
-                f"functions). Got input_tags.sparse={tags.input_tags.sparse}."
+            err_msg = (
+                f"Estimator {name} raised an exception. "
+                f"The tag self.input_tags.sparse={tags.input_tags.sparse} "
+                "might not be consistent with the estimator's ability to "
+                "handle sparse data (i.e. controlled by the parameter `accept_sparse`"
+                " in `validate_data` or `check_array` functions)."
             )
+            raise AssertionError(err_msg) from e
     else:
         err_msg = (
             f"Estimator {name} raised an exception. "
             "The estimator failed when fitted on sparse data in accordance "
             f"with its tag self.input_tags.sparse={tags.input_tags.sparse} "
-            "but didn't raise the appropriate error : error message should "
+            "but didn't raise the appropriate error: error message should "
             "state explicitly that sparse input is not supported if this is "
             "not the case, e.g. by using check_array(X, accept_sparse=False)."
         )

From 17ccb72608450b537f19bfb84a560c0e66988efa Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Wed, 11 Dec 2024 11:08:24 +0100
Subject: [PATCH 25/28] no cover

---
 sklearn/compose/_column_transformer.py |  5 ++---
 sklearn/ensemble/_base.py              | 13 +++++--------
 sklearn/pipeline.py                    |  5 ++---
 3 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/sklearn/compose/_column_transformer.py b/sklearn/compose/_column_transformer.py
index f347266ae204a..e088f534707d2 100644
--- a/sklearn/compose/_column_transformer.py
+++ b/sklearn/compose/_column_transformer.py
@@ -1319,7 +1319,7 @@ def get_metadata_routing(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         try:
-            sparse = all(
+            tags.input_tags.sparse = all(
                 get_tags(trans).input_tags.sparse
                 for name, trans, _ in self.transformers
                 if trans not in {"passthrough", "drop"}
@@ -1328,8 +1328,7 @@ def __sklearn_tags__(self):
             # If `transformers` does not comply with our API (list of tuples)
             # then it will fail. In this case, we assume that `sparse` is False
             # but the parameter validation will raise an error during `fit`.
-            sparse = False
-        tags.input_tags.sparse = sparse
+            pass  # pragma: no cover
         return tags
 
 
diff --git a/sklearn/ensemble/_base.py b/sklearn/ensemble/_base.py
index 5a51e8e015727..db5a0944a72c3 100644
--- a/sklearn/ensemble/_base.py
+++ b/sklearn/ensemble/_base.py
@@ -288,20 +288,17 @@ def get_params(self, deep=True):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         try:
-            allow_nan = all(
+            tags.input_tags.allow_nan = all(
                 get_tags(est[1]).input_tags.allow_nan if est[1] != "drop" else True
                 for est in self.estimators
             )
-            sparse = all(
+            tags.input_tags.sparse = all(
                 get_tags(est[1]).input_tags.sparse if est[1] != "drop" else True
                 for est in self.estimators
             )
         except Exception:
             # If `estimators` does not comply with our API (list of tuples) then it will
-            # fail. In this case, we assume that `allow_nan` is False but the parameter
-            # validation will raise an error during `fit`.
-            allow_nan = False
-            sparse = False
-        tags.input_tags.allow_nan = allow_nan
-        tags.input_tags.sparse = sparse
+            # fail. In this case, we assume that `allow_nan` and `sparse` are False but
+            # the parameter validation will raise an error during `fit`.
+            pass  # pragma: no cover
         return tags
diff --git a/sklearn/pipeline.py b/sklearn/pipeline.py
index 70b08d28192d9..6b750297e9f13 100644
--- a/sklearn/pipeline.py
+++ b/sklearn/pipeline.py
@@ -2125,7 +2125,7 @@ def get_metadata_routing(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         try:
-            sparse = all(
+            tags.input_tags.sparse = all(
                 get_tags(trans).input_tags.sparse
                 for name, trans in self.transformer_list
                 if trans not in {"passthrough", "drop"}
@@ -2134,8 +2134,7 @@ def __sklearn_tags__(self):
             # If `transformer_list` does not comply with our API (list of tuples)
             # then it will fail. In this case, we assume that `sparse` is False
             # but the parameter validation will raise an error during `fit`.
-            sparse = False
-        tags.input_tags.sparse = sparse
+            pass  # pragma: no cover
         return tags
 
 

From 8429e8f55d9a2b134b49ab5fa9d9e2521e419e67 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Wed, 11 Dec 2024 11:26:54 +0100
Subject: [PATCH 26/28] test raise inappropriate error

---
 sklearn/utils/tests/test_estimator_checks.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/sklearn/utils/tests/test_estimator_checks.py b/sklearn/utils/tests/test_estimator_checks.py
index 5e23fbe1620f7..b805bc1209f0c 100644
--- a/sklearn/utils/tests/test_estimator_checks.py
+++ b/sklearn/utils/tests/test_estimator_checks.py
@@ -854,11 +854,14 @@ def test_check_estimator_sparse_tag():
     misaligned."""
 
     class EstimatorWithSparseConfig(BaseEstimator):
-        def __init__(self, tag_sparse, accept_sparse):
+        def __init__(self, tag_sparse, accept_sparse, fit_error=None):
             self.tag_sparse = tag_sparse
             self.accept_sparse = accept_sparse
+            self.fit_error = fit_error
 
         def fit(self, X, y=None):
+            if self.fit_error:
+                raise self.fit_error
             validate_data(self, X, y, accept_sparse=self.accept_sparse)
             return self
 
@@ -876,7 +879,8 @@ def __sklearn_tags__(self):
 
     for test_case in test_cases:
         estimator = EstimatorWithSparseConfig(
-            test_case["tag_sparse"], test_case["accept_sparse"]
+            test_case["tag_sparse"],
+            test_case["accept_sparse"],
         )
         if test_case["error_type"] is None:
             check_estimator_sparse_tag(estimator.__class__.__name__, estimator)
@@ -884,6 +888,13 @@ def __sklearn_tags__(self):
             with raises(test_case["error_type"]):
                 check_estimator_sparse_tag(estimator.__class__.__name__, estimator)
 
+    # estimator `tag_sparse=accept_sparse=False` fails on sparse data
+    # but does not raise the appropriate error
+    for fit_error in [TypeError("unexpected error"), KeyError("other error")]:
+        estimator = EstimatorWithSparseConfig(False, False, fit_error)
+        with raises(AssertionError):
+            check_estimator_sparse_tag(estimator.__class__.__name__, estimator)
+
 
 def test_check_estimator_transformer_no_mixin():
     # check that TransformerMixin is not required for transformer tests to run

From 97d700dde492640717a5ef1ba3882d6099ec267b Mon Sep 17 00:00:00 2001
From: antoinebaker <antoinebaker@users.noreply.github.com>
Date: Thu, 12 Dec 2024 17:46:26 +0100
Subject: [PATCH 27/28] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Jérémie du Boisberranger <jeremie@probabl.ai>
---
 sklearn/linear_model/_ransac.py           | 2 +-
 sklearn/linear_model/_ridge.py            | 5 ++---
 sklearn/semi_supervised/_self_training.py | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/sklearn/linear_model/_ransac.py b/sklearn/linear_model/_ransac.py
index 99b52e5840e32..90dc6d6bc5e70 100644
--- a/sklearn/linear_model/_ransac.py
+++ b/sklearn/linear_model/_ransac.py
@@ -725,7 +725,7 @@ def get_metadata_routing(self):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         if self.estimator is None:
-            tags.input_tags.sparse = True
+            tags.input_tags.sparse = True  # default estimator is LinearRegression
         else:
             tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         return tags
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index b7bd5d417877a..1646c08e25d31 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1251,10 +1251,9 @@ def fit(self, X, y, sample_weight=None):
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
         tags.array_api_support = True
-        reject_sparse = (self.solver == "svd") or (
-            self.solver == "cholesky" and self.fit_intercept
+        tags.input_tags.sparse = (self.solver != "svd") and (
+            self.solver != "cholesky" or not self.fit_intercept
         )
-        tags.input_tags.sparse = not reject_sparse
         return tags
 
 
diff --git a/sklearn/semi_supervised/_self_training.py b/sklearn/semi_supervised/_self_training.py
index d19b57d033040..4b469a2e9f8d8 100644
--- a/sklearn/semi_supervised/_self_training.py
+++ b/sklearn/semi_supervised/_self_training.py
@@ -620,7 +620,7 @@ def get_metadata_routing(self):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        # TODO(1.8): remove together with base_estimator
+        # TODO(1.8): remove the condition check together with base_estimator
         if self.estimator is not None:
             tags.input_tags.sparse = get_tags(self.estimator).input_tags.sparse
         return tags

From 66c3bd9e223bb6cb2fce070860673eee092139d8 Mon Sep 17 00:00:00 2001
From: Antoine Baker <antoine.baker59@gmail.com>
Date: Thu, 12 Dec 2024 17:50:13 +0100
Subject: [PATCH 28/28] suggestions from code review

---
 sklearn/ensemble/_forest.py    | 1 +
 sklearn/linear_model/_ridge.py | 5 ++---
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/sklearn/ensemble/_forest.py b/sklearn/ensemble/_forest.py
index ef9e6c0cdb437..f8dad7d6edd9a 100644
--- a/sklearn/ensemble/_forest.py
+++ b/sklearn/ensemble/_forest.py
@@ -1001,6 +1001,7 @@ def predict_log_proba(self, X):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
+        tags.classifier_tags.multi_label = True
         tags.input_tags.sparse = True
         return tags
 
diff --git a/sklearn/linear_model/_ridge.py b/sklearn/linear_model/_ridge.py
index 1646c08e25d31..9a94ba1caec1c 100644
--- a/sklearn/linear_model/_ridge.py
+++ b/sklearn/linear_model/_ridge.py
@@ -1573,10 +1573,9 @@ def fit(self, X, y, sample_weight=None):
 
     def __sklearn_tags__(self):
         tags = super().__sklearn_tags__()
-        reject_sparse = (self.solver == "svd") or (
-            self.solver == "cholesky" and self.fit_intercept
+        tags.input_tags.sparse = (self.solver != "svd") and (
+            self.solver != "cholesky" or not self.fit_intercept
         )
-        tags.input_tags.sparse = not reject_sparse
         return tags