scikit-learn
diff --git a/‎sklearn/cluster/affinity_propagation_.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/cluster/affinity_propagation_.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/cluster/hierarchical.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/cluster/hierarchical.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/cluster/k_means_.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/cluster/k_means_.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/cluster/mean_shift_.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/cluster/mean_shift_.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/cluster/spectral.py
Lines changed: 1 addition & 1 deletion b/‎sklearn/cluster/spectral.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎sklearn/tests/test_common.py
Lines changed: 3 additions & 0 deletions b/‎sklearn/tests/test_common.py
Lines changed: 3 additions & 0 deletions
diff --git a/‎sklearn/utils/estimator_checks.py
Lines changed: 20 additions & 27 deletions b/‎sklearn/utils/estimator_checks.py
Lines changed: 20 additions & 27 deletions
@@ -269,7 +269,7 @@ def __init__(self, damping=.5, max_iter=200, convergence_iter=15,
     def _pairwise(self):
         return self.affinity == "precomputed"
 
-    def fit(self, X):
+    def fit(self, X, y=None):
         """ Create affinity matrix from negative euclidean distances, then
         apply affinity propagation clustering.
 
 
@@ -680,7 +680,7 @@ def __init__(self, n_clusters=2, affinity="euclidean",
         self.affinity = affinity
         self.pooling_func = pooling_func
 
-    def fit(self, X):
+    def fit(self, X, y=None):
         """Fit the hierarchical clustering on the data
 
         Parameters
 
@@ -864,7 +864,7 @@ def predict(self, X):
         x_squared_norms = row_norms(X, squared=True)
         return _labels_inertia(X, x_squared_norms, self.cluster_centers_)[0]
 
-    def score(self, X):
+    def score(self, X, y=None):
         """Opposite of the value of X on the K-means objective.
 
         Parameters
 
@@ -320,7 +320,7 @@ def __init__(self, bandwidth=None, seeds=None, bin_seeding=False,
         self.cluster_all = cluster_all
         self.min_bin_freq = min_bin_freq
 
-    def fit(self, X):
+    def fit(self, X, y=None):
         """Perform clustering.
 
         Parameters
 
@@ -405,7 +405,7 @@ def __init__(self, n_clusters=8, eigen_solver=None, random_state=None,
         self.coef0 = coef0
         self.kernel_params = kernel_params
 
-    def fit(self, X):
+    def fit(self, X, y=None):
         """Creates an affinity matrix for X using the selected affinity,
         then applies spectral clustering to this affinity matrix.
 
 
@@ -55,6 +55,7 @@
     check_regressor_data_not_an_array,
     check_transformer_data_not_an_array,
     check_transformer_n_iter,
+    check_fit_score_takes_y,
     check_non_transformer_estimators_n_iter,
     CROSS_DECOMPOSITION)
 
@@ -87,6 +88,8 @@ def test_non_meta_estimators():
     estimators = all_estimators(type_filter=['classifier', 'regressor',
                                              'transformer', 'cluster'])
     for name, Estimator in estimators:
+        if hasattr(Estimator, "score") and name not in CROSS_DECOMPOSITION:
+            yield check_fit_score_takes_y, name, Estimator
         if name not in CROSS_DECOMPOSITION + ['Imputer']:
             # Test that all estimators check their input for NaN's and infs
             yield check_estimators_nan_inf, name, Estimator
 
@@ -23,8 +23,7 @@
 from sklearn.utils.testing import SkipTest
 from sklearn.utils.testing import check_skip_travis
 
-from sklearn.base import (clone, ClusterMixin, ClassifierMixin, RegressorMixin,
-                          TransformerMixin)
+from sklearn.base import clone, ClassifierMixin
 from sklearn.metrics import accuracy_score, adjusted_rand_score, f1_score
 
 from sklearn.lda import LDA
@@ -44,13 +43,6 @@
 CROSS_DECOMPOSITION = ['PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']
 
 
-def is_supervised(estimator):
-    return (isinstance(estimator, ClassifierMixin)
-            or isinstance(estimator, RegressorMixin)
-            # transformers can all take a y
-            or isinstance(estimator, TransformerMixin))
-
-
 def _boston_subset(n_samples=200):
     global BOSTON
     if BOSTON is None:
@@ -131,10 +123,7 @@ def check_estimator_sparse_data(name, Estimator):
     set_fast_parameters(estimator)
     # fit and predict
     try:
-        if is_supervised(estimator):
-            estimator.fit(X, y)
-        else:
-            estimator.fit(X)
+        estimator.fit(X, y)
         if hasattr(estimator, "predict"):
             estimator.predict(X)
         if hasattr(estimator, 'predict_proba'):
@@ -252,6 +241,21 @@ def _check_transformer(name, Transformer, X, y):
             assert_raises(ValueError, transformer.transform, X.T)
 
 
+def check_fit_score_takes_y(name, Estimator):
+    # check that all estimators accept an optional y
+    # in fit and score so they can be used in pipelines
+    rnd = np.random.RandomState(0)
+    X = rnd.uniform(size=(10, 3))
+    y = (X[:, 0] * 4).astype(np.int)
+    y = multioutput_estimator_convert_y_2d(name, y)
+    with warnings.catch_warnings(record=True):
+        estimator = Estimator()
+    set_fast_parameters(estimator)
+    set_random_state(estimator)
+    estimator.fit(X, y)
+    estimator.score(X, y)
+
+
 def check_estimators_nan_inf(name, Estimator):
     rnd = np.random.RandomState(0)
     X_train_finite = rnd.uniform(size=(10, 3))
@@ -275,10 +279,7 @@ def check_estimators_nan_inf(name, Estimator):
             set_random_state(estimator, 1)
             # try to fit
             try:
-                if issubclass(Estimator, ClusterMixin):
-                    estimator.fit(X_train)
-                else:
-                    estimator.fit(X_train, y)
+                estimator.fit(X_train, y)
             except ValueError as e:
                 if 'inf' not in repr(e) and 'NaN' not in repr(e):
                     print(error_string_fit, Estimator, e)
@@ -291,12 +292,7 @@ def check_estimators_nan_inf(name, Estimator):
             else:
                 raise AssertionError(error_string_fit, Estimator)
             # actually fit
-            if issubclass(Estimator, ClusterMixin):
-                # All estimators except clustering algorithm
-                # support fitting with (optional) y
-                estimator.fit(X_train_finite)
-            else:
-                estimator.fit(X_train_finite, y)
+            estimator.fit(X_train_finite, y)
 
             # predict
             if hasattr(estimator, "predict"):
@@ -833,10 +829,7 @@ def check_estimators_overwrite_params(name, Estimator):
     set_random_state(estimator)
 
     params = estimator.get_params()
-    if is_supervised(estimator):
-        estimator.fit(X, y)
-    else:
-        estimator.fit(X)
+    estimator.fit(X, y)
     new_params = estimator.get_params()
     for k, v in params.items():
         assert_false(np.any(new_params[k] != v),