@@ -27,7 +27,7 @@ def _smacof_single(
27
27
init = None ,
28
28
max_iter = 300 ,
29
29
verbose = 0 ,
30
- eps = 1e-3 ,
30
+ eps = 1e-6 ,
31
31
random_state = None ,
32
32
normalized_stress = False ,
33
33
):
@@ -59,18 +59,21 @@ def _smacof_single(
59
59
verbose : int, default=0
60
60
Level of verbosity.
61
61
62
- eps : float, default=1e-3
63
- Relative tolerance with respect to stress at which to declare
64
- convergence. The value of `eps` should be tuned separately depending
65
- on whether or not `normalized_stress` is being used.
62
+ eps : float, default=1e-6
63
+ The tolerance with respect to stress (normalized by the sum of squared
64
+ embedding distances) at which to declare convergence.
65
+
66
+ .. versionchanged:: 1.7
67
+ The default value for `eps` has changed from 1e-3 to 1e-6, as a result
68
+ of a bugfix in the computation of the convergence criterion.
66
69
67
70
random_state : int, RandomState instance or None, default=None
68
71
Determines the random number generator used to initialize the centers.
69
72
Pass an int for reproducible results across multiple function calls.
70
73
See :term:`Glossary <random_state>`.
71
74
72
75
normalized_stress : bool, default=False
73
- Whether use and return normalized stress value (Stress-1) instead of raw
76
+ Whether to return normalized stress value (Stress-1) instead of raw
74
77
stress.
75
78
76
79
.. versionadded:: 1.2
@@ -168,29 +171,32 @@ def _smacof_single(
168
171
# Compute stress
169
172
distances = euclidean_distances (X )
170
173
stress = ((distances .ravel () - disparities .ravel ()) ** 2 ).sum () / 2
171
- if normalized_stress :
172
- stress = np .sqrt (stress / ((disparities .ravel () ** 2 ).sum () / 2 ))
173
174
174
- normalization = np .sqrt ((X ** 2 ).sum (axis = 1 )).sum ()
175
175
if verbose >= 2 : # pragma: no cover
176
176
print (f"Iteration { it } , stress { stress :.4f} " )
177
177
if old_stress is not None :
178
- if (old_stress - stress / normalization ) < eps :
178
+ sum_squared_distances = (distances .ravel () ** 2 ).sum ()
179
+ if ((old_stress - stress ) / (sum_squared_distances / 2 )) < eps :
179
180
if verbose : # pragma: no cover
180
181
print ("Convergence criterion reached." )
181
182
break
182
- old_stress = stress / normalization
183
+ old_stress = stress
184
+
185
+ if normalized_stress :
186
+ sum_squared_distances = (distances .ravel () ** 2 ).sum ()
187
+ stress = np .sqrt (stress / (sum_squared_distances / 2 ))
183
188
184
189
return X , stress , it + 1
185
190
186
191
192
+ # TODO(1.9): change default `n_init` to 1, see PR #31117
187
193
@validate_params (
188
194
{
189
195
"dissimilarities" : ["array-like" ],
190
196
"metric" : ["boolean" ],
191
197
"n_components" : [Interval (Integral , 1 , None , closed = "left" )],
192
198
"init" : ["array-like" , None ],
193
- "n_init" : [Interval (Integral , 1 , None , closed = "left" )],
199
+ "n_init" : [Interval (Integral , 1 , None , closed = "left" ), StrOptions ({ "warn" }) ],
194
200
"n_jobs" : [Integral , None ],
195
201
"max_iter" : [Interval (Integral , 1 , None , closed = "left" )],
196
202
"verbose" : ["verbose" ],
@@ -207,11 +213,11 @@ def smacof(
207
213
metric = True ,
208
214
n_components = 2 ,
209
215
init = None ,
210
- n_init = 8 ,
216
+ n_init = "warn" ,
211
217
n_jobs = None ,
212
218
max_iter = 300 ,
213
219
verbose = 0 ,
214
- eps = 1e-3 ,
220
+ eps = 1e-6 ,
215
221
random_state = None ,
216
222
return_n_iter = False ,
217
223
normalized_stress = "auto" ,
@@ -262,6 +268,9 @@ def smacof(
262
268
determined by the run with the smallest final stress. If ``init`` is
263
269
provided, this option is overridden and a single run is performed.
264
270
271
+ .. versionchanged:: 1.9
272
+ The default value for `n_iter` will change from 8 to 1 in version 1.9.
273
+
265
274
n_jobs : int, default=None
266
275
The number of jobs to use for the computation. If multiple
267
276
initializations are used (``n_init``), each run of the algorithm is
@@ -277,10 +286,13 @@ def smacof(
277
286
verbose : int, default=0
278
287
Level of verbosity.
279
288
280
- eps : float, default=1e-3
281
- Relative tolerance with respect to stress at which to declare
282
- convergence. The value of `eps` should be tuned separately depending
283
- on whether or not `normalized_stress` is being used.
289
+ eps : float, default=1e-6
290
+ The tolerance with respect to stress (normalized by the sum of squared
291
+ embedding distances) at which to declare convergence.
292
+
293
+ .. versionchanged:: 1.7
294
+ The default value for `eps` has changed from 1e-3 to 1e-6, as a result
295
+ of a bugfix in the computation of the convergence criterion.
284
296
285
297
random_state : int, RandomState instance or None, default=None
286
298
Determines the random number generator used to initialize the centers.
@@ -290,7 +302,7 @@ def smacof(
290
302
return_n_iter : bool, default=False
291
303
Whether or not to return the number of iterations.
292
304
293
- normalized_stress : bool or "auto" default="auto"
305
+ normalized_stress : bool or "auto", default="auto"
294
306
Whether to return normalized stress value (Stress-1) instead of raw
295
307
stress. By default, metric MDS returns raw stress while non-metric MDS
296
308
returns normalized stress.
@@ -335,17 +347,24 @@ def smacof(
335
347
>>> import numpy as np
336
348
>>> from sklearn.manifold import smacof
337
349
>>> from sklearn.metrics import euclidean_distances
338
- >>> X = np.array([[0, 1, 2], [1, 0, 3],[2, 3, 0]])
350
+ >>> X = np.array([[0, 1, 2], [1, 0, 3], [2, 3, 0]])
339
351
>>> dissimilarities = euclidean_distances(X)
340
- >>> mds_result , stress = smacof(dissimilarities, n_components=2, random_state=42)
341
- >>> np.round(mds_result, 5)
342
- array([[ 0.05352, -1.07253],
343
- [ 1.74231, -0.75675],
344
- [-1.79583, 1.82928]] )
345
- >>> np.round(stress, 5 ).item()
346
- 0.00128
352
+ >>> Z , stress = smacof(
353
+ ... dissimilarities, n_components=2, n_init=1, eps=1e-6, random_state=42
354
+ ... )
355
+ >>> Z.shape
356
+ (3, 2 )
357
+ >>> np.round(stress, 6 ).item()
358
+ 3.2e-05
347
359
"""
348
360
361
+ if n_init == "warn" :
362
+ warnings .warn (
363
+ "The default value of `n_init` will change from 8 to 1 in 1.9." ,
364
+ FutureWarning ,
365
+ )
366
+ n_init = 8
367
+
349
368
dissimilarities = check_array (dissimilarities )
350
369
random_state = check_random_state (random_state )
351
370
@@ -408,6 +427,7 @@ def smacof(
408
427
return best_pos , best_stress
409
428
410
429
430
+ # TODO(1.9): change default `n_init` to 1, see PR #31117
411
431
class MDS (BaseEstimator ):
412
432
"""Multidimensional scaling.
413
433
@@ -428,16 +448,22 @@ class MDS(BaseEstimator):
428
448
initializations. The final results will be the best output of the runs,
429
449
determined by the run with the smallest final stress.
430
450
451
+ .. versionchanged:: 1.9
452
+ The default value for `n_init` will change from 4 to 1 in version 1.9.
453
+
431
454
max_iter : int, default=300
432
455
Maximum number of iterations of the SMACOF algorithm for a single run.
433
456
434
457
verbose : int, default=0
435
458
Level of verbosity.
436
459
437
- eps : float, default=1e-3
438
- Relative tolerance with respect to stress at which to declare
439
- convergence. The value of `eps` should be tuned separately depending
440
- on whether or not `normalized_stress` is being used.
460
+ eps : float, default=1e-6
461
+ The tolerance with respect to stress (normalized by the sum of squared
462
+ embedding distances) at which to declare convergence.
463
+
464
+ .. versionchanged:: 1.7
465
+ The default value for `eps` has changed from 1e-3 to 1e-6, as a result
466
+ of a bugfix in the computation of the convergence criterion.
441
467
442
468
n_jobs : int, default=None
443
469
The number of jobs to use for the computation. If multiple
@@ -464,9 +490,9 @@ class MDS(BaseEstimator):
464
490
``fit_transform``.
465
491
466
492
normalized_stress : bool or "auto" default="auto"
467
- Whether use and return normalized stress value (Stress-1) instead of raw
468
- stress. By default, metric MDS uses raw stress while non-metric MDS uses
469
- normalized stress.
493
+ Whether to return normalized stress value (Stress-1) instead of raw
494
+ stress. By default, metric MDS returns raw stress while non-metric MDS
495
+ returns normalized stress.
470
496
471
497
.. versionadded:: 1.2
472
498
@@ -539,7 +565,7 @@ class MDS(BaseEstimator):
539
565
>>> X, _ = load_digits(return_X_y=True)
540
566
>>> X.shape
541
567
(1797, 64)
542
- >>> embedding = MDS(n_components=2, normalized_stress='auto' )
568
+ >>> embedding = MDS(n_components=2, n_init=1 )
543
569
>>> X_transformed = embedding.fit_transform(X[:100])
544
570
>>> X_transformed.shape
545
571
(100, 2)
@@ -554,7 +580,7 @@ class MDS(BaseEstimator):
554
580
_parameter_constraints : dict = {
555
581
"n_components" : [Interval (Integral , 1 , None , closed = "left" )],
556
582
"metric" : ["boolean" ],
557
- "n_init" : [Interval (Integral , 1 , None , closed = "left" )],
583
+ "n_init" : [Interval (Integral , 1 , None , closed = "left" ), StrOptions ({ "warn" }) ],
558
584
"max_iter" : [Interval (Integral , 1 , None , closed = "left" )],
559
585
"verbose" : ["verbose" ],
560
586
"eps" : [Interval (Real , 0.0 , None , closed = "left" )],
@@ -569,10 +595,10 @@ def __init__(
569
595
n_components = 2 ,
570
596
* ,
571
597
metric = True ,
572
- n_init = 4 ,
598
+ n_init = "warn" ,
573
599
max_iter = 300 ,
574
600
verbose = 0 ,
575
- eps = 1e-3 ,
601
+ eps = 1e-6 ,
576
602
n_jobs = None ,
577
603
random_state = None ,
578
604
dissimilarity = "euclidean" ,
@@ -646,10 +672,20 @@ def fit_transform(self, X, y=None, init=None):
646
672
X_new : ndarray of shape (n_samples, n_components)
647
673
X transformed in the new space.
648
674
"""
675
+
676
+ if self .n_init == "warn" :
677
+ warnings .warn (
678
+ "The default value of `n_init` will change from 4 to 1 in 1.9." ,
679
+ FutureWarning ,
680
+ )
681
+ self ._n_init = 4
682
+ else :
683
+ self ._n_init = self .n_init
684
+
649
685
X = validate_data (self , X )
650
686
if X .shape [0 ] == X .shape [1 ] and self .dissimilarity != "precomputed" :
651
687
warnings .warn (
652
- "The MDS API has changed. ``fit`` now constructs an "
688
+ "The MDS API has changed. ``fit`` now constructs a "
653
689
" dissimilarity matrix from data. To use a custom "
654
690
"dissimilarity matrix, set "
655
691
"``dissimilarity='precomputed'``."
@@ -665,7 +701,7 @@ def fit_transform(self, X, y=None, init=None):
665
701
metric = self .metric ,
666
702
n_components = self .n_components ,
667
703
init = init ,
668
- n_init = self .n_init ,
704
+ n_init = self ._n_init ,
669
705
n_jobs = self .n_jobs ,
670
706
max_iter = self .max_iter ,
671
707
verbose = self .verbose ,
0 commit comments