8000 ENH Add random_state parameter to AffinityPropagation (#16801) · scikit-learn/scikit-learn@22a7d5b · GitHub
[go: up one dir, main page]

Skip to content

Commit 22a7d5b

Browse files
cmarmorwoolston.adminadrinjalaliNicolasHug
committed
ENH Add random_state parameter to AffinityPropagation (#16801)
* Added value checks and random state parameter to method * Changed default parameter to None instead of 0 * Added numpy RandomState to the check * Replaced inline validation with check_random_state from utils and pointed at glossery * Needed a different default parameter to pass the default way this has been working in the past * Updated to conform with flake8 stds * Add random_state to AffinityPropagation class. * Add test. * Add what's new entry and versionadded directive. * Add PR number. * Fix lint error due to this PR. * Use np.array_equal in test. * Update sklearn/cluster/_affinity_propagation.py Co-Authored-By: Adrin Jalali <adrin.jalali@gmail.com> * Homogenize parametre descriptions, default random_state to None. * Update sklearn/cluster/_affinity_propagation.py Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com> * Update sklearn/cluster/_affinity_propagation.py Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com> * Update sklearn/cluster/_affinity_propagation.py Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com> * Update doc/whats_new/v0.23.rst Co-Authored-By: Nicolas Hug <contact@nicolas-hug.com> * Change test name. * Modify check in test. * Fix lint errors. * Address comment. * Address comment. * Add 'deprecation' and its correspondent test. * Fix lint errors. * Add random_state parameter to tests, to avoid FutureWarnings. * Move warning in fit. Modify tests. * Modify example. * Tentative fix for failures. * Document default value to 0. Revert docstring. * Explicit link to Glossary. * Fix default value. * Remove some warnings from tests. * Validate and test docstring. * Tentative fix. * Tentative fix. * Ignore FutureWarning in fit attribute test. * Set random_state to avoid FutureWarning in test_fit_docstring_attributes. * [doc build] Force documentation build. * Clarify warning message. Co-authored-by: rwoolston.admin <rwoolston.admin@LXO-DS-DEV.afcucorp.local> Co-authored-by: Adrin Jalali <adrin.jalali@gmail.com> Co-authored-by: Nicolas Hug <contact@nicolas-hug.com>
1 parent fa8f498 commit 22a7d5b

File tree

4 files changed

+105
-35
lines changed

4 files changed

+105
-35
lines changed

doc/whats_new/v0.23.rst

+4
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,10 @@ Changelog
143143
deprecated. It has no effect. :pr:`11950` by
144144
:user:`Jeremie du Boisberranger <jeremiedbb>`.
145145

146+
- |API| The ``random_state`` parameter has been added to
147+
:class:`cluster.AffinityPropagation`. :pr:`16801` by :user:`rcwoolston`
148+
and :user:`Chiara Marmo <cmarmo>`.
149+
146150
:mod:`sklearn.compose`
147151
......................
148152

sklearn/cluster/_affinity_propagation.py

+48-22
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from ..exceptions import ConvergenceWarning
1212
from ..base import BaseEstimator, ClusterMixin
13-
from ..utils import as_float_array, check_array
13+
from ..utils import as_float_array, check_array, check_random_state
1414
from ..utils.validation import check_is_fitted, _deprecate_positional_args
1515
from ..metrics import euclidean_distances
1616
from ..metrics import pairwise_distances_argmin
@@ -32,7 +32,7 @@ def all_equal_similarities():
3232

3333
def affinity_propagation(S, preference=None, convergence_iter=15, max_iter=200,
3434
damping=0.5, copy=True, verbose=False,
35-
return_n_iter=False):
35+
return_n_iter=False, random_state='warn'):
3636
"""Perform Affinity Propagation Clustering of data
3737
3838
Read more in the :ref:`User Guide <affinity_propagation>`.
@@ -72,6 +72,14 @@ def affinity_propagation(S, preference=None, convergence_iter=15, max_iter=200,
7272
return_n_iter : bool, default False
7373
Whether or not to return the number of iterations.
7474
75+
random_state : int or np.random.RandomStateInstance, default: 0
76+
Pseudo-random number generator to control the starting state.
77+
Use an int for reproducible results across function calls.
78+
See the :term:`Glossary <random_state>`.
79+
80+
.. versionadded:: 0.23
81+
this parameter was previously hardcoded as 0.
82+
7583
Returns
7684
-------
7785
@@ -133,7 +141,16 @@ def affinity_propagation(S, preference=None, convergence_iter=15, max_iter=200,
133141
if return_n_iter
134142
else (np.array([0]), np.array([0] * n_samples)))
135143

136-
random_state = np.random.RandomState(0)
144+
if random_state == 'warn':
145+
warnings.warn(("'random_state' has been introduced in 0.23. "
146+
"It will be set to None starting from 0.25 which "
147+
"means that results will differ at every function "
148+
"call. Set 'random_state' to None to silence this "
149+
"warning, or to 0 to keep the behavior of versions "
150+
"<0.23."),
151+
FutureWarning)
152+
random_state = 0
153+
random_state = check_random_state(random_state)
137154

138155
# Place preference on the diagonal of S
139156
S.flat[::(n_samples + 1)] = preference
@@ -274,6 +291,13 @@ class AffinityPropagation(ClusterMixin, BaseEstimator):
274291
verbose : bool, default=False
275292
Whether to be verbose.
276293
294+
random_state : int or np.random.RandomStateInstance, default: 0
295+
Pseudo-random number generator to control the starting state.
296+
Use an int for reproducible results across function calls.
297+
See the :term:`Glossary <random_state>`.
298+
299+
.. versionadded:: 0.23
300+
this parameter was previously hardcoded as 0.
277301
278302
Attributes
279303
----------
@@ -292,23 +316,6 @@ class AffinityPropagation(ClusterMixin, BaseEstimator):
292316
n_iter_ : int
293317
Number of iterations taken to converge.
294318
295-
Examples
296-
--------
297-
>>> from sklearn.cluster import AffinityPropagation
298-
>>> import numpy as np
299-
>>> X = np.array([[1, 2], [1, 4], [1, 0],
300-
... [4, 2], [4, 4], [4, 0]])
301-
>>> clustering = AffinityPropagation().fit(X)
302-
>>> clustering
303-
AffinityPropagation()
304-
>>> clustering.labels_
305-
array([0, 0, 0, 1, 1, 1])
306-
>>> clustering.predict([[0, 0], [4, 4]])
307-
array([0, 1])
308-
>>> clustering.cluster_centers_
309-
array([[1, 2],
310-
[4, 2]])
311-
312319
Notes
313320
-----
314321
For an example, see :ref:`examples/cluster/plot_affinity_propagation.py
@@ -333,11 +340,28 @@ class AffinityPropagation(ClusterMixin, BaseEstimator):
333340
334341
Brendan J. Frey and Delbert Dueck, "Clustering by Passing Messages
335342
Between Data Points", Science Feb. 2007
343+
344+
Examples
345+
--------
346+
>>> from sklearn.cluster import AffinityPropagation
347+
>>> import numpy as np
348+
>>> X = np.array([[1, 2], [1, 4], [1, 0],
349+
... [4, 2], [4, 4], [4, 0]])
350+
>>> clustering = AffinityPropagation(random_state=5).fit(X)
351+
>>> clustering
352+
AffinityPropagation(random_state=5)
353+
>>> clustering.labels_
354+
array([0, 0, 0, 1, 1, 1])
355+
>>> clustering.predict([[0, 0], [4, 4]])
356+
array([0, 1])
357+
>>> clustering.cluster_centers_
358+
array([[1, 2],
359+
[4, 2]])
336360
"""
337361
@_deprecate_positional_args
338362
def __init__(self, *, damping=.5, max_iter=200, convergence_iter=15,
339363
copy=True, preference=None, affinity='euclidean',
340-
verbose=False):
364+
verbose=False, random_state='warn'):
341365

342366
self.damping = damping
343367
self.max_iter = max_iter
@@ -346,6 +370,7 @@ def __init__(self, *, damping=.5, max_iter=200, convergence_iter=15,
346370
self.verbose = verbose
347371
self.preference = preference
348372
self.affinity = affinity
373+
self.random_state = random_state
349374

350375
@property
351376
def _pairwise(self):
@@ -388,7 +413,8 @@ def fit(self, X, y=None):
388413
affinity_propagation(
389414
self.affinity_matrix_, self.preference, max_iter=self.max_iter,
390415
convergence_iter=self.convergence_iter, damping=self.damping,
391-
copy=self.copy, verbose=self.verbose, return_n_iter=True)
416+
copy=self.copy, verbose=self.verbose, return_n_iter=True,
417+
random_state=self.random_state)
392418

393419
if self.affinity != "precomputed":
394420
self.cluster_centers_ = X[self.cluster_centers_indices_].copy()

sklearn/cluster/tests/test_affinity_propagation.py

+49-13
Original file line numberDiff line numberDiff line change
@@ -33,16 +33,18 @@ def test_affinity_propagation():
3333
preference = np.median(S) * 10
3434
# Compute Affinity Propagation
3535
cluster_centers_indices, labels = affinity_propagation(
36-
S, preference=preference)
36+
S, preference=preference, random_state=39)
3737

3838
n_clusters_ = len(cluster_centers_indices)
3939

4040
assert n_clusters == n_clusters_
4141

42-
af = AffinityPropagation(preference=preference, affinity="precomputed")
42+
af = AffinityPropagation(preference=preference, affinity="precomputed",
43+
random_state=28)
4344
labels_precomputed = af.fit(S).labels_
4445

45-
af = AffinityPropagation(preference=preference, verbose=True)
46+
af = AffinityPropagation(preference=preference, verbose=True,
47+
random_state=37)
4648
labels = af.fit(X).labels_
4749

4850
assert_array_equal(labels, labels_precomputed)
@@ -55,24 +57,24 @@ def test_affinity_propagation():
5557

5658
# Test also with no copy
5759
_, labels_no_copy = affinity_propagation(S, preference=preference,
58-
copy=False)
60+
copy=False, random_state=74)
5961
assert_array_equal(labels, labels_no_copy)
6062

6163
# Test input validation
6264
with pytest.raises(ValueError):
6365
affinity_propagation(S[:, :-1])
6466
with pytest.raises(ValueError):
6567
affinity_propagation(S, damping=0)
66-
af = AffinityPropagation(affinity="unknown")
68+
af = AffinityPropagation(affinity="unknown", random_state=78)
6769
with pytest.raises(ValueError):
6870
af.fit(X)
69-
af_2 = AffinityPropagation(affinity='precomputed')
71+
af_2 = AffinityPropagation(affinity='precomputed', random_state=21)
7072
with pytest.raises(TypeError):
7173
af_2.fit(csr_matrix((3, 3)))
7274

7375
def test_affinity_propagation_predict():
7476
# Test AffinityPropagation.predict
75-
af = AffinityPropagation(affinity="euclidean")
77+
af = AffinityPropagation(affinity="euclidean", random_state=63)
7678
labels = af.fit_predict(X)
7779
labels2 = af.predict(X)
7880
assert_array_equal(labels, labels2)
@@ -87,7 +89,7 @@ def test_affinity_propagation_predict_error():
8789

8890
# Predict not supported when affinity="precomputed".
8991
S = np.dot(X, X.T)
90-
af = AffinityPropagation(affinity="precomputed")
92+
af = AffinityPropagation(affinity="precomputed", random_state=57)
9193
af.fit(S)
9294
with pytest.raises(ValueError):
9395
af.predict(X)
@@ -100,7 +102,7 @@ def test_affinity_propagation_fit_non_convergence():
100102
X = np.array([[0, 0], [1, 1], [-2, -2]])
101103

102104
# Force non-convergence by allowing only a single iteration
103-
af = AffinityPropagation(preference=-10, max_iter=1)
105+
af = AffinityPropagation(preference=-10, max_iter=1, random_state=82)
104106

105107
assert_warns(ConvergenceWarning, af.fit, X)
106108
assert_array_equal(np.empty((0, 2)), af.cluster_centers_)
@@ -129,7 +131,7 @@ def test_affinity_propagation_equal_mutual_similarities():
129131

130132
# setting different preferences
131133
cluster_center_indices, labels = assert_no_warnings(
132-
affinity_propagation, S, preference=[-20, -10])
134+
affinity_propagation, S, preference=[-20, -10], random_state=37)
133135

134136
# expect one cluster, with highest-preference sample as exemplar
135137
assert_array_equal([1], cluster_center_indices)
@@ -143,7 +145,8 @@ def test_affinity_propagation_predict_non_convergence():
143145

144146
# Force non-convergence by allowing only a single iteration
145147
af = assert_warns(ConvergenceWarning,
146-
AffinityPropagation(preference=-10, max_iter=1).fit, X)
148+
AffinityPropagation(preference=-10,
149+
max_iter=1, random_state=75).fit, X)
147150

148151
# At prediction time, consider new samples as noise since there are no
149152
# clusters
@@ -156,7 +159,8 @@ def test_affinity_propagation_non_convergence_regressiontest():
156159
X = np.array([[1, 0, 0, 0, 0, 0],
157160
[0, 1, 1, 1, 0, 0],
158161
[0, 0, 1, 0, 0, 1]])
159-
af = AffinityPropagation(affinity='euclidean', max_iter=2).fit(X)
162+
af = AffinityPropagation(affinity='euclidean',
163+
max_iter=2, random_state=34).fit(X)
160164
assert_array_equal(np.array([-1, -1, -1]), af.labels_)
161165

162166

@@ -181,14 +185,46 @@ def test_equal_similarities_and_preferences():
181185
assert _equal_similarities_and_preferences(S, np.array(0))
182186

183187

188+
def test_affinity_propagation_random_state():
189+
# Significance of random_state parameter
190+
# Generate sample data
191+
centers = [[1, 1], [-1, -1], [1, -1]]
192+
X, labels_true = make_blobs(n_samples=300, centers=centers,
193+
cluster_std=0.5, random_state=0)
194+
# random_state = 0
195+
ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=0)
196+
ap.fit(X)
197+
centers0 = ap.cluster_centers_
198+
199+
# random_state = 76
200+
ap = AffinityPropagation(convergence_iter=1, max_iter=2, random_state=76)
201+
ap.fit(X)
202+
centers76 = ap.cluster_centers_
203+
204+
assert np.mean((centers0 - centers76) ** 2) > 1
205+
206+
207+
# FIXME: to be removed in 0.25
208+
def test_affinity_propagation_random_state_warning():
209+
# test that a warning is raised when random_state is not defined.
210+
X = np.array([[0, 0], [1, 1], [-2, -2]])
211+
match = ("'random_state' has been introduced in 0.23. "
212+
"It will be set to None starting from 0.25 which "
213+
"means that results will differ at every function "
214+
"call. Set 'random_state' to None to silence this "
215+
"warning, or to 0 to keep the behavior of versions "
216+
"<0.23.")
217+
with pytest.warns(FutureWarning, match=match):
218+
AffinityPropagation().fit(X)
219+
184220
@pytest.mark.parametrize('centers', [csr_matrix(np.zeros((1, 10))),
185221
np.zeros((1, 10))])
186222
def test_affinity_propagation_convergence_warning_dense_sparse(centers):
187223
"""Non-regression, see #13334"""
188224
rng = np.random.RandomState(42)
189225
X = rng.rand(40, 10)
190226
y = (4 * rng.rand(40)).astype(np.int)
191-
ap = AffinityPropagation()
227+
ap = AffinityPropagation(random_state=46)
192228
ap.fit(X, y)
193229
ap.cluster_centers_ = centers
194230
with pytest.warns(None) as record:

sklearn/tests/test_docstring_parameters.py

+4
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,10 @@ def test_fit_docstring_attributes(name, Estimator):
198198
if Estimator.__name__ == 'DummyClassifier':
199199
est.strategy = "stratified"
200200

201+
# TO BE REMOVED for v0.25 (avoid FutureWarning)
202+
if Estimator.__name__ == 'AffinityPropagation':
203+
est.random_state = 63
204+
201205
X, y = make_classification(n_samples=20, n_features=3,
202206
n_redundant=0, n_classes=2,
203207
random_state=2)

0 commit comments

Comments
 (0)
0