8000 FIX Optics paper typo which resulted in undersized clusters (#13750) · scikit-learn/scikit-learn@05b209f · GitHub
[go: up one dir, main page]

Skip to content

Commit 05b209f

Browse files
qinhanmin2014jnothman
authored andcommitted
FIX Optics paper typo which resulted in undersized clusters (#13750)
1 parent 6627503 commit 05b209f

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

sklearn/cluster/optics_.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -844,7 +844,10 @@ def _xi_cluster(reachability_plot, predecessor_plot, ordering, xi, min_samples,
844844
# Find the first index from the right side which is almost
845845
# at the same level as the beginning of the detected
846846
# cluster.
847-
while (reachability_plot[c_end - 1] < D_max
847+
# Our implementation corrects a mistake in the original
848+
# paper, i.e., in Definition 11 4c, r(x) < r(sD) should be
849+
# r(x) > r(sD).
850+
while (reachability_plot[c_end - 1] > D_max
848851
and c_end > U_start):
849852
c_end -= 1
850853

sklearn/cluster/tests/test_optics.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def test_extract_xi():
9898
X, expected_labels = shuffle(X, expected_labels, random_state=rng)
9999

100100
clust = OPTICS(min_samples=3, min_cluster_size=2,
101-
max_eps=np.inf, cluster_method='xi',
101+
max_eps=20, cluster_method='xi',
102102
xi=0.4).fit(X)
103103
assert_array_equal(clust.labels_, expected_labels)
104104

@@ -110,7 +110,7 @@ def test_extract_xi():
110110
pytest.skip('FIXME (#13739): This is not stable across platforms.')
111111

112112
clust = OPTICS(min_samples=3, min_cluster_size=3,
113-
max_eps=np.inf, cluster_method='xi',
113+
max_eps=20, cluster_method='xi',
114114
xi=0.1).fit(X)
115115
# this may fail if the predecessor correction is not at work!
116116
assert_array_equal(clust.labels_, expected_labels)
@@ -129,9 +129,10 @@ def test_extract_xi():
129129

130130

131131
def test_cluster_hierarchy_():
132+
rng = np.random.RandomState(0)
132133
n_points_per_cluster = 100
133134
C1 = [0, 0] + 2 * rng.randn(n_points_per_cluster, 2)
134-
C2 = [0, 0] + 10 * rng.randn(n_points_per_cluster, 2)
135+
C2 = [0, 0] + 50 * rng.randn(n_points_per_cluster, 2)
135136
X = np.vstack((C1, C2))
136137
X = shuffle(X, random_state=0)
137138

0 commit comments

Comments
 (0)
0