glemaitre
diff --git a/‎doc/whats_new/v0.23.rst
Lines changed: 7 additions & 0 deletions b/‎doc/whats_new/v0.23.rst
Lines changed: 7 additions & 0 deletions
diff --git a/‎sklearn/cluster/_kmeans.py
Lines changed: 38 additions & 16 deletions b/‎sklearn/cluster/_kmeans.py
Lines changed: 38 additions & 16 deletions
diff --git a/‎sklearn/cluster/tests/test_k_means.py
Lines changed: 39 additions & 15 deletions b/‎sklearn/cluster/tests/test_k_means.py
Lines changed: 39 additions & 15 deletions
@@ -10,6 +10,13 @@ Version 0.23.2
 Changelog
 ---------
 
+:mod:`sklearn.cluster`
+......................
+
+- |Fix| Fixed a bug in :class:`cluster.KMeans` where rounding errors could
+  prevent convergence to be declared when `tol=0`. :pr:`17959` by
+  :user:`Jérémie du Boisberranger <jeremiedbb>`.
+
 :mod:`sklearn.ensemble`
 .......................
 
 
@@ -252,8 +252,6 @@ def k_means(X, n_clusters, *, sample_weight=None, init='k-means++',
         Relative tolerance with regards to Frobenius norm of the difference
         in the cluster centers of two consecutive iterations to declare
         convergence.
-        It's not advised to set `tol=0` since convergence might never be
-        declared due to rounding errors. Use a very small number instead.
 
     random_state : int, RandomState instance, default=None
         Determines random number generation for centroid initialization. Use
@@ -413,6 +411,7 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
     centers_new = np.zeros_like(centers)
     weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)
     labels = np.full(n_samples, -1, dtype=np.int32)
+    labels_old = labels.copy()
     center_half_distances = euclidean_distances(centers) / 2
     distance_next_center = np.partition(np.asarray(center_half_distances),
                                         kth=1, axis=0)[1]
@@ -432,6 +431,8 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
     init_bounds(X, centers, center_half_distances,
                 labels, upper_bounds, lower_bounds)
 
+    strict_convergence = False
+
     for i in range(max_iter):
         elkan_iter(X, sample_weight, centers, centers_new,
                    weight_in_clusters, center_half_distances,
@@ -448,17 +449,24 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
             inertia = _inertia(X, sample_weight, centers, labels)
             print("Iteration {0}, inertia {1}" .format(i, inertia))
 
-        center_shift_tot = (center_shift**2).sum()
-        if center_shift_tot <= tol:
+        if np.array_equal(labels, labels_old):
+            # First check the labels for strict convergence.
             if verbose:
-                print("Converged at iteration {0}: "
-                      "center shift {1} within tolerance {2}"
-                      .format(i, center_shift_tot, tol))
+                print(f"Converged at iteration {i}: strict convergence.")
+            strict_convergence = True
             break
+        else:
+            # No strict convergence, check for tol based convergence.
+            center_shift_tot = (center_shift**2).sum()
+            if center_shift_tot <= tol:
+                if verbose:
+                    print(f"Converged at iteration {i}: center shift "
+                          f"{center_shift_tot} within tolerance {tol}.")
+                break
 
-        centers, centers_new = centers_new, centers
+        labels_old[:] = labels
 
-    if center_shift_tot > 0:
+    if not strict_convergence:
         # rerun E-step so that predicted labels match cluster centers
         elkan_iter(X, sample_weight, centers, centers, weight_in_clusters,
                    center_half_distances, distance_next_center,
@@ -557,6 +565,7 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
 
     centers_new = np.zeros_like(centers)
     labels = np.full(X.shape[0], -1, dtype=np.int32)
+    labels_old = labels.copy()
     weight_in_clusters = np.zeros(n_clusters, dtype=X.dtype)
     center_shift = np.zeros(n_clusters, dtype=X.dtype)
 
@@ -567,6 +576,8 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
         lloyd_iter = lloyd_iter_chunked_dense
         _inertia = _inertia_dense
 
+    strict_convergence = False
+
     # Threadpoolctl context to limit the number of threads in second level of
     # nested parallelism (i.e. BLAS) to avoid oversubsciption.
     with threadpool_limits(limits=1, user_api="blas"):
@@ -578,17 +589,30 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
                 inertia = _inertia(X, sample_weight, centers, labels)
                 print("Iteration {0}, inertia {1}" .format(i, inertia))
 
-            center_shift_tot = (center_shift**2).sum()
-            if center_shift_tot <= tol:
+            if np.array_equal(labels, labels_old):
+                # First check the labels for strict convergence.
                 if verbose:
-                    print("Converged at iteration {0}: "
-                          "center shift {1} within tolerance {2}"
-                          .format(i, center_shift_tot, tol))
+                    print(f"Converged at iteration {i}: strict convergence.")
+                strict_convergence = True
                 break
+            else:
+                # No strict convergence, check for tol based convergence.
+                center_shift_tot = (center_shift**2).sum()
+                if center_shift_tot <= tol:
+                    if verbose:
+                        print(f"Converged at iteration {i}: center shift "
+                              f"{center_shift_tot} within tolerance {tol}.")
+                    break
 
+            labels_old[:] = labels
+
+<<<<<<< HEAD
             centers, centers_new = centers_new, centers
 
         if center_shift_tot > 0:
+=======
+        if not strict_convergence:
+>>>>>>> fc06baef49... [MRG] Fix KMeans convergence when tol==0 (#17959)
             # rerun E-step so that predicted labels match cluster centers
             lloyd_iter(X, sample_weight, x_squared_norms, centers, centers,
                        weight_in_clusters, labels, center_shift, n_threads,
@@ -783,8 +807,6 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator):
         Relative tolerance with regards to Frobenius norm of the difference
         in the cluster centers of two consecutive iterations to declare
         convergence.
-        It's not advised to set `tol=0` since convergence might never be
-        declared due to rounding errors. Use a very small number instead.
 
     precompute_distances : {'auto', True, False}, default='auto'
         Precompute distances (faster but takes more memory).
 
@@ -1,4 +1,5 @@
 """Testing for K-means"""
+import re
 import sys
 
 import numpy as np
@@ -135,10 +136,12 @@ def test_relocate_empty_clusters(representation):
     assert_allclose(centers_new, [[-36], [10], [9.5]])
 
 
-@pytest.mark.parametrize('distribution', ['normal', 'blobs'])
-@pytest.mark.parametrize('tol', [1e-2, 1e-4, 1e-8])
-def test_elkan_results(distribution, tol):
-    # check that results are identical between lloyd and elkan algorithms
+@pytest.mark.parametrize("distribution", ["normal", "blobs"])
+@pytest.mark.parametrize("array_constr", [np.array, sp.csr_matrix],
+                         ids=["dense", "sparse"])
+@pytest.mark.parametrize("tol", [1e-2, 1e-8, 1e-100, 0])
+def test_kmeans_elkan_results(distribution, array_constr, tol):
+    # Check that results are identical between lloyd and elkan algorithms
     rnd = np.random.RandomState(0)
     if distribution == 'normal':
         X = rnd.normal(size=(5000, 10))
@@ -164,11 +167,12 @@ def test_kmeans_convergence(algorithm):
     # Check that KMeans stops when convergence is reached when tol=0. (#16075)
     rnd = np.random.RandomState(0)
     X = rnd.normal(size=(5000, 10))
+    max_iter = 300
 
-    km = KMeans(algorithm=algorithm, n_clusters=5, random_state=0, n_init=1,
-                tol=0, max_iter=300).fit(X)
+    km = KMeans(algorithm=algorithm, n_clusters=5, random_state=0,
+                n_init=1, tol=0, max_iter=max_iter).fit(X)
 
-    assert km.n_iter_ < 300
+    assert km.n_iter_ < max_iter
 
 
 @pytest.mark.parametrize('distribution', ['normal', 'blobs'])
@@ -439,9 +443,9 @@ def test_k_means_fit_predict(algo, dtype, constructor, seed, max_iter, tol):
     assert v_measure_score(labels_1, labels_2) == 1
 
 
-def test_mb_kmeans_verbose():
-    mb_k_means = MiniBatchKMeans(init="k-means++", n_clusters=n_clusters,
-                                 random_state=42, verbose=1)
+def test_minibatch_kmeans_verbose():
+    # Check verbose mode of MiniBatchKMeans for better coverage.
+    km = MiniBatchKMeans(n_clusters=n_clusters, random_state=42, verbose=1)
     old_stdout = sys.stdout
     sys.stdout = StringIO()
     try:
@@ -450,11 +454,31 @@ def test_mb_kmeans_verbose():
         sys.stdout = old_stdout
 
 
-def test_minibatch_init_with_large_k():
-    mb_k_means = MiniBatchKMeans(init='k-means++', init_size=10, n_clusters=20)
-    # Check that a warning is raised, as the number clusters is larger
-    # than the init_size
-    assert_warns(RuntimeWarning, mb_k_means.fit, X)
+@pytest.mark.parametrize("algorithm", ["full", "elkan"])
+@pytest.mark.parametrize("tol", [1e-2, 0])
+def test_kmeans_verbose(algorithm, tol, capsys):
+    # Check verbose mode of KMeans for better coverage.
+    X = np.random.RandomState(0).normal(size=(5000, 10))
+
+    KMeans(algorithm=algorithm, n_clusters=n_clusters, random_state=42,
+           init="random", n_init=1, tol=tol, verbose=1).fit(X)
+
+    captured = capsys.readouterr()
+
+    assert re.search(r"Initialization complete", captured.out)
+    assert re.search(r"Iteration [0-9]+, inertia", captured.out)
+
+    if tol == 0:
+        assert re.search(r"strict convergence", captured.out)
+    else:
+        assert re.search(r"center shift .* within tolerance", captured.out)
+
+
+def test_minibatch_kmeans_warning_init_size():
+    # Check that a warning is raised when init_size is smaller than n_clusters
+    with pytest.warns(RuntimeWarning,
+                      match=r"init_size.* should be larger than n_clusters"):
+        MiniBatchKMeans(init_size=10, n_clusters=20).fit(X)
 
 
 def test_minibatch_k_means_init_multiple_runs_with_explicit_centers():