8000 Minor changes in the classical mds · scikit-learn/scikit-learn@bf04a74 · GitHub
[go: up one dir, main page]

Skip to content

Commit bf04a74

Browse files
committed
Minor changes in the classical mds
- check_arrays was deprecated. Changed to check_array - eigenvalue is now done using scipy.sparse.linalg, thus allowing to retreive only the k largest eigenvalues and vectors - uses sklearn.metrics.euclidean_distances when possible
1 parent a720390 commit bf04a74

File tree

2 files changed

+11
-11
lines changed

2 files changed

+11
-11
lines changed

sklearn/manifold/mds.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# Licence: BSD
77

88
import numpy as np
9+
from scipy.sparse import linalg
910

1011
import warnings
1112

@@ -106,7 +107,9 @@ def _smacof_single(similarities, metric=True, n_components=2, init=None,
106107
(disparities ** 2).sum())
107108

108109
# Compute stress
109-
stress = ((dis.ravel() - disparities.ravel()) ** 2).sum() / 2
110+
stress = (euclidean_distances(
111+
dis.ravel(), disparities.ravel()) ** 2).sum()
112+
stress /= 2
110113

111114
# Update X using the Guttman transform
112115
dis[dis == 0] = 1e-5
@@ -285,7 +288,7 @@ def svd_mds(similarities, n_components=2):
285288
overridden if initial array is provided.
286289
"""
287290

288-
similarities, = check_array(similarities, sparse_format='dense')
291+
similarities = check_array(similarities)
289292
n_samples = similarities.shape[0]
290293

291294
if similarities.shape[0] != similarities.shape[1]:
@@ -296,16 +299,15 @@ def svd_mds(similarities, n_components=2):
296299

297300
H = np.eye(*similarities.shape) - 1./n_samples*np.ones(similarities.shape)
298301
K = -0.5*np.dot(H, np.dot(similarities**2, H))
299-
w, V = np.linalg.eig(K)
302+
w, V = linalg.eigs(K, k=n_components)
300303
# Sort eigenvalues and eigenvectors in decreasing order
301-
ix = np.argsort(w)[::-1]
302-
w = w[ix]
303-
V = V[:, ix]
304304
if not np.all(w >= -1e-12):
305305
raise ValueError("similarities must be euclidean")
306306
X = np.sqrt(w[:n_components])*V[:, :n_components]
307307
dists = euclidean_distances(X)
308-
stress = ((similarities.ravel() - dists.ravel()) ** 2).sum() / 2
308+
stress = (euclidean_distances(
309+
similarities.ravel(), dists.ravel()) ** 2).sum()
310+
stress /= 2
309311
return X, stress
310312

311313

sklearn/manifold/tests/test_mds.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,8 @@ def test_MDS():
6565

6666
def test_svd_mds():
6767
# Generate 4 randomly chosen points
68-
Y = np.array([[1, 0, 1],
69-
[-1, 3, 2],
70-
[1, -2, 3],
71-
[2, -1, -3]])
68+
random_state = np.random.RandomState(seed=42)
69+
Y = random_state.randint(-10, 10, (10, 2))
7270
sim = euclidean_distances(Y)
7371
# calculate error or smacof-based solution
7472
X_smacof, smacof_stress = mds.smacof(sim, n_components=2, random_state=42)

0 commit comments

Comments
 (0)
0