@@ -252,8 +252,6 @@ def k_means(X, n_clusters, *, sample_weight=None, init='k-means++',
252
252
Relative tolerance with regards to Frobenius norm of the difference
253
253
in the cluster centers of two consecutive iterations to declare
254
254
convergence.
255
- It's not advised to set `tol=0` since convergence might never be
256
- declared due to rounding errors. Use a very small number instead.
257
255
258
256
random_state : int, RandomState instance, default=None
259
257
Determines random number generation for centroid initialization. Use
@@ -413,6 +411,7 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
413
411
centers_new = np .zeros_like (centers )
414
412
weight_in_clusters = np .zeros (n_clusters , dtype = X .dtype )
415
413
labels = np .full (n_samples , - 1 , dtype = np .int32 )
414
+ labels_old = labels .copy ()
416
415
center_half_distances = euclidean_distances (centers ) / 2
417
416
distance_next_center = np .partition (np .asarray (center_half_distances ),
418
417
kth = 1 , axis = 0 )[1 ]
@@ -432,6 +431,8 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
432
431
init_bounds (X , centers , center_half_distances ,
433
432
labels , upper_bounds , lower_bounds )
434
433
434
+ strict_convergence = False
435
+
435
436
for i in range (max_iter ):
436
437
elkan_iter (X , sample_weight , centers , centers_new ,
437
438
weight_in_clusters , center_half_distances ,
@@ -448,17 +449,24 @@ def _kmeans_single_elkan(X, sample_weight, n_clusters, max_iter=300,
448
449
inertia = _inertia (X , sample_weight , centers , labels )
449
450
print ("Iteration {0}, inertia {1}" .format (i , inertia ))
450
451
451
- center_shift_tot = ( center_shift ** 2 ). sum ()
452
- if center_shift_tot <= tol :
452
+ if np . array_equal ( labels , labels_old ):
453
+ # First check the labels for strict convergence.
453
454
if verbose :
454
- print ("Converged at iteration {0}: "
455
- "center shift {1} within tolerance {2}"
456
- .format (i , center_shift_tot , tol ))
455
+ print (f"Converged at iteration { i } : strict convergence." )
456
+ strict_convergence = True
457
457
break
458
+ else :
459
+ # No strict convergence, check for tol based convergence.
460
+ center_shift_tot = (center_shift ** 2 ).sum ()
461
+ if center_shift_tot <= tol :
462
+ if verbose :
463
+ print (f"Converged at iteration { i } : center shift "
464
+ f"{ center_shift_tot } within tolerance { tol } ." )
465
+ break
458
466
459
- centers , centers_new = centers_new , centers
467
+
10000
labels_old [:] = labels
460
468
461
- if center_shift_tot > 0 :
469
+ if not strict_convergence :
462
470
# rerun E-step so that predicted labels match cluster centers
463
471
elkan_iter (X , sample_weight , centers , centers , weight_in_clusters ,
464
472
center_half_distances , distance_next_center ,
@@ -557,6 +565,7 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
557
565
558
566
centers_new = np .zeros_like (centers )
559
567
labels = np .full (X .shape [0 ], - 1 , dtype = np .int32 )
568
+ labels_old = labels .copy ()
560
569
weight_in_clusters = np .zeros (n_clusters , dtype = X .dtype )
561
570
center_shift = np .zeros (n_clusters , dtype = X .dtype )
562
571
@@ -567,6 +576,8 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
567
576
lloyd_iter = lloyd_iter_chunked_dense
568
577
_inertia = _inertia_dense
569
578
579
+ strict_convergence = False
580
+
570
581
# Threadpoolctl context to limit the number of threads in second level of
571
582
# nested parallelism (i.e. BLAS) to avoid oversubsciption.
572
583
with threadpool_limits (limits = 1 , user_api = "blas" ):
@@ -578,17 +589,30 @@ def _kmeans_single_lloyd(X, sample_weight, n_clusters, max_iter=300,
578
589
inertia = _inertia (X , sample_weight , centers , labels )
579
590
print ("Iteration {0}, inertia {1}" .format (i , inertia ))
580
591
581
- center_shift_tot = ( center_shift ** 2 ). sum ()
582
- if center_shift_tot <= tol :
592
+ if np . array_equal ( labels , labels_old ):
593
+ # First check the labels for strict convergence.
583
594
if verbose :
584
- print ("Converged at iteration {0}: "
585
- "center shift {1} within tolerance {2}"
586
- .format (i , center_shift_tot , tol ))
595
+ print (f"Converged at iteration { i } : strict convergence." )
596
+ strict_convergence = True
587
597
break
598
+ else :
599
+ # No strict convergence, check for tol based convergence.
600
+ center_shift_tot = (center_shift ** 2 ).sum ()
601
+ if center_shift_tot <= tol :
602
+ if verbose :
603
+ print (f"Converged at iteration { i } : center shift "
604
+ f"{ center_shift_tot } within tolerance { tol } ." )
605
+ break
588
606
607
+ labels_old [:] = labels
608
+
609
+ < << << << HEAD
589
610
centers , centers_new = centers_new , centers
590
611
591
612
if center_shift_tot > 0 :
613
+ == == == =
614
+ if not strict_convergence :
615
+ > >> >> >> fc06baef49 ... [MRG ] Fix KMeans convergence when tol == 0 (#17959)
592
616
# rerun E-step so that predicted labels match cluster centers
593
617
lloyd_iter (X , sample_weight , x_squared_norms , centers , centers ,
594
618
weight_in_clusters , labels , center_shift , n_threads ,
@@ -783,8 +807,6 @@ class KMeans(TransformerMixin, ClusterMixin, BaseEstimator):
783
807
Relative tolerance with regards to Frobenius norm of the difference
784
808
in the cluster centers of two consecutive iterations to declare
785
809
convergence.
786
- It's not advised to set `tol=0` since convergence might never be
787
- declared due to rounding errors. Use a very small number instead.
788
810
789
811
precompute_distances : {'auto', True, False}, default='auto'
790
812
Precompute distances (faster but takes more memory).
0 commit comments