8000 Add test that score takes y, fix KMeans, FIX pipeline compatibility o… · scikit-learn/scikit-learn@a105327 · GitHub
[go: up one dir, main page]

Skip to content

Commit a105327

Browse files
committed
Add test that score takes y, fix KMeans, FIX pipeline compatibility of clustering algorithms!
1 parent 1d08f08 commit a105327

File tree

13 files changed

+86
-49
lines changed

13 files changed

+86
-49
lines changed

doc/developers/index.rst

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -716,8 +716,11 @@ is not met, an exception of type ``ValueError`` should be raised.
716716
``y`` might be ignored in the case of unsupervised learning. However, to
717717
make it possible to use the estimator as part of a pipeline that can
718718
mix both supervised and unsupervised transformers, even unsupervised
719-
estimators are kindly asked to accept a ``y=None`` keyword argument in
719+
estimators need to accept a ``y=None`` keyword argument in
720720
the second position that is just ignored by the estimator.
721+
For the same reason, ``fit_predict``, ``fit_transform``, ``score``,
722+
``transform`` and ``partial_fit`` methods need to accept a ``y`` argument in
723+
the second place if they are implemented.
721724

722725
The method should return the object (``self``). This pattern is useful
723726
to be able to implement quick one liners in an IPython session such as::
@@ -857,9 +860,10 @@ last step, it needs to provide a ``fit`` or ``fit_transform`` function.
857860
To be able to evaluate the pipeline on any data but the training set,
858861
it also needs to provide a ``transform`` function.
859862
There are no special requirements for the last step in a pipeline, except that
860-
it has a ``fit`` function. All ``fit`` and ``fit_transform`` functions must
861-
take arguments ``X, y``, even if y is not used.
862-
863+
it has a ``fit`` function. All ``fit`` and ``fit_transform`` functions must
864+
take arguments ``X, y``, even if y is not used. Similarly, for ``score`` to be
865+
usable, the last step of the pipeline needs to have a ``score`` function that
866+
accepts an optional ``y``.
863867

864868
Working notes
865869
-------------

doc/whats_new.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,9 @@ Enhancements
176176
- Parallelized calculation of :func:`pairwise_distances` is now supported
177177
for scipy metrics and custom callables. By `Joel Nothman`_.
178178

< 3372 /code>
179+
- Allow the fitting and scoring of all clustering algorithms in
180+
:class:`pipeline.Pipeline`. By `Andreas Müller`_.
181+
179182
Documentation improvements
180183
..........................
181184

sklearn/cluster/affinity_propagation_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,7 +269,7 @@ def __init__(self, damping=.5, max_iter=200, convergence_iter=15,
269269
def _pairwise(self):
270270
return self.affinity == "precomputed"
271271

272-
def fit(self, X):
272+
def fit(self, X, y=None):
273273
""" Create affinity matrix from negative euclidean distances, then
274274
apply affinity propagation clustering.
275275

sklearn/cluster/dbscan_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ class DBSCAN(BaseEstimator, ClusterMixin):
189189
of the construction and query, as well as the memory required
190190
to store the tree. The optimal value depends
191191
on the nature of the problem.
192-
192+
193193
Attributes
194194
----------
195195
core_sample_indices_ : array, shape = [n_core_samples]
@@ -224,7 +224,7 @@ def __init__(self, eps=0.5, min_samples=5, metric='euclidean',
224224
self.p = p
225225
self.random_state = random_state
226226

227-
def fit(self, X, sample_weight=None):
227+
def fit(self, X, y=None, sample_weight=None):
228228
"""Perform DBSCAN clustering from features or distance matrix.
229229
230230
Parameters

sklearn/cluster/hierarchical.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -683,7 +683,7 @@ def __init__(self, n_clusters=2, affinity="euclidean",
683683
self.affinity = affinity
684684
self.pooling_func = pooling_func
685685

686-
def fit(self, X):
686+
def fit(self, X, y=None):
687687
"""Fit the hierarchical clustering on the data
688688
689689
Parameters

sklearn/cluster/k_means_.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,7 @@ def fit(self, X, y=None):
795795
n_jobs=self.n_jobs)
796796
return self
797797

798-
def fit_predict(self, X):
798+
def fit_predict(self, X, y=None):
799799
"""Compute cluster centers and predict cluster index for each sample.
800800
801801
Convenience method; equivalent to calling fit(X) followed by
@@ -864,7 +864,7 @@ def predict(self, X):
864864
x_squared_norms = row_norms(X, squared=True)
865865
return _labels_inertia(X, x_squared_norms, self.cluster_centers_)[0]
866866

867-
def score(self, X):
867+
def score(self, X, y=None):
868868
"""Opposite of the value of X on the K-means objective.
869869
870870
Parameters

sklearn/cluster/mean_shift_.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -320,7 +320,7 @@ def __init__(self, bandwidth=None, seeds=None, bin_seeding=False,
320320
self.cluster_all = cluster_all
321321
self.min_bin_freq = min_bin_freq
322322

323-
def fit(self, X):
323+
def fit(self, X, y=None):
324324
"""Perform clustering.
325325
326326
Parameters

sklearn/cluster/spectral.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ def __init__(self, n_clusters=8, eigen_solver=None, random_state=None,
405405
self.coef0 = coef0
406406
self.kernel_params = kernel_params
407407

408-
def fit(self, X):
408+
def fit(self, X, y=None):
409409
"""Creates an affinity matrix for X using the selected affinity,
410410
then applies spectral clustering to this affinity matrix.
411411

sklearn/decomposition/dict_learning.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,6 @@ def dict_learning(X, n_components, alpha, max_iter=100, tol=1e-8,
412412
SparsePCA
413413
MiniBatchSparsePCA
414414
"""
415-
416415
if method not in ('lars', 'cd'):
417416
raise ValueError('Coding method %r not supported as a fit algorithm.'
418417
% method)
@@ -604,6 +603,8 @@ def dict_learning_online(X, n_components=2, alpha=1, n_iter=100,
604603
MiniBatchSparsePCA
605604
606605
"""
606+
if n_components is None:
607+
n_components = X.shape[1]
607608

608609
if method not in ('lars', 'cd'):
609610
raise ValueError('Coding method not supported as a fit algorithm.')
@@ -750,7 +751,7 @@ def transform(self, X, y=None):
750751
Transformed data
751752
752753
"""
753-
check_is_fitted(self, 'components_')
754+
check_is_fitted(self, 'components_')
754755

755756
# XXX : kwargs is not documented
756757
X = check_array(X)
@@ -1159,13 +1160,9 @@ def fit(self, X, y=None):
11591160
"""
11601161
random_state = check_random_state(self.random_state)
11611162
X = check_array(X)
1162-
if self.n_components is None:
1163-
n_components = X.shape[1]
1164-
else:
1165-
n_components = self.n_components
11661163

11671164
U, (A, B), self.n_iter_ = dict_learning_online(
1168-
X, n_components, self.alpha,
1165+
X, self.n_components, self.alpha,
11691166
n_iter=self.n_iter, return_code=False,
11701167
method=self.fit_algorithm,
11711168
n_jobs=self.n_jobs, dict_init=self.dict_init,

sklearn/decomposition/incremental_pca.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ def fit(self, X, y=None):
174174
self.partial_fit(X[batch])
175175
return self
176176

177-
def partial_fit(self, X):
177+
def partial_fit(self, X, y=None):
178178
"""Incremental fit with X. All of X is processed as a single batch.
179179
180180
Parameters

0 commit comments

Comments
 (0)
0