From 6fcb429344031048f4b6fd33995b5f0ebb4a3afd Mon Sep 17 00:00:00 2001 From: ge72mum Date: Tue, 15 Nov 2022 11:13:46 +0100 Subject: [PATCH 1/4] Fixed future warning in plot_kmeans_assumptions --- examples/cluster/plot_kmeans_assumptions.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index 94f8ff6c58f52..5a33b7eef4092 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -27,7 +27,7 @@ X, y = make_blobs(n_samples=n_samples, random_state=random_state) # Incorrect number of clusters -y_pred = KMeans(n_clusters=2, random_state=random_state).fit_predict(X) +y_pred = KMeans(n_clusters=2, n_init="auto", random_state=random_state).fit_predict(X) plt.subplot(221) plt.scatter(X[:, 0], X[:, 1], c=y_pred) @@ -36,7 +36,7 @@ # Anisotropicly distributed data transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]] X_aniso = np.dot(X, transformation) -y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_aniso) +y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict(X_aniso) plt.subplot(222) plt.scatter(X_aniso[:, 0], X_aniso[:, 1], c=y_pred) @@ -46,7 +46,7 @@ X_varied, y_varied = make_blobs( n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=random_state ) -y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_varied) +y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict(X_varied) plt.subplot(223) plt.scatter(X_varied[:, 0], X_varied[:, 1], c=y_pred) @@ -54,7 +54,7 @@ # Unevenly sized blobs X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10])) -y_pred = KMeans(n_clusters=3, random_state=random_state).fit_predict(X_filtered) +y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict(X_filtered) plt.subplot(224) plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred) From 3b3cc113d34ce80b852b7561fc6a846a89318e93 Mon Sep 17 00:00:00 2001 From: ge72mum Date: Tue, 15 Nov 2022 11:53:41 +0100 Subject: [PATCH 2/4] fixed formating --- examples/cluster/plot_kmeans_assumptions.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index 5a33b7eef4092..62e3f73c53da5 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -36,7 +36,8 @@ # Anisotropicly distributed data transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]] X_aniso = np.dot(X, transformation) -y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict(X_aniso) +y_pred = KMeans(n_clusters=3, n_init="auto", + random_state=random_state).fit_predict(X_aniso) plt.subplot(222) plt.scatter(X_aniso[:, 0], X_aniso[:, 1], c=y_pred) @@ -46,7 +47,8 @@ X_varied, y_varied = make_blobs( n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=random_state ) -y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict(X_varied) +y_pred = KMeans(n_clusters=3, n_init="auto", + random_state=random_state).fit_predict(X_varied) plt.subplot(223) plt.scatter(X_varied[:, 0], X_varied[:, 1], c=y_pred) @@ -54,7 +56,8 @@ # Unevenly sized blobs X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10])) -y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict(X_filtered) +y_pred = KMeans(n_clusters=3, n_init="auto", + random_state=random_state).fit_predict(X_filtered) plt.subplot(224) plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred) From 420ef21f0354e0e8eddc78f8b29c9ee0ac1fb404 Mon Sep 17 00:00:00 2001 From: ge72mum Date: Tue, 15 Nov 2022 12:02:06 +0100 Subject: [PATCH 3/4] fixed formating --- examples/cluster/plot_kmeans_assumptions.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index 62e3f73c53da5..e6d5519e4295e 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -36,8 +36,9 @@ # Anisotropicly distributed data transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]] X_aniso = np.dot(X, transformation) -y_pred = KMeans(n_clusters=3, n_init="auto", - random_state=random_state).fit_predict(X_aniso) +y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict( + X_aniso +) plt.subplot(222) plt.scatter(X_aniso[:, 0], X_aniso[:, 1], c=y_pred) @@ -47,8 +48,9 @@ X_varied, y_varied = make_blobs( n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=random_state ) -y_pred = KMeans(n_clusters=3, n_init="auto", - random_state=random_state).fit_predict(X_varied) +y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict( + X_varied +) plt.subplot(223) plt.scatter(X_varied[:, 0], X_varied[:, 1], c=y_pred) @@ -56,8 +58,9 @@ # Unevenly sized blobs X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10])) -y_pred = KMeans(n_clusters=3, n_init="auto", - random_state=random_state).fit_predict(X_filtered) +y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict( + X_filtered +) plt.subplot(224) plt.scatter(X_filtered[:, 0], X_filtered[:, 1], c=y_pred) From 2b7fc5b3d83d128a40b4ed1ae39d1f693fa8124d Mon Sep 17 00:00:00 2001 From: ge72mum Date: Tue, 15 Nov 2022 15:37:02 +0100 Subject: [PATCH 4/4] Explicitly set n_init=10 to obtain same plot --- examples/cluster/plot_kmeans_assumptions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/cluster/plot_kmeans_assumptions.py b/examples/cluster/plot_kmeans_assumptions.py index e6d5519e4295e..3bdfae86b4ff7 100644 --- a/examples/cluster/plot_kmeans_assumptions.py +++ b/examples/cluster/plot_kmeans_assumptions.py @@ -36,7 +36,7 @@ # Anisotropicly distributed data transformation = [[0.60834549, -0.63667341], [-0.40887718, 0.85253229]] X_aniso = np.dot(X, transformation) -y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict( +y_pred = KMeans(n_clusters=3, n_init=10, random_state=random_state).fit_predict( X_aniso ) @@ -58,7 +58,7 @@ # Unevenly sized blobs X_filtered = np.vstack((X[y == 0][:500], X[y == 1][:100], X[y == 2][:10])) -y_pred = KMeans(n_clusters=3, n_init="auto", random_state=random_state).fit_predict( +y_pred = KMeans(n_clusters=3, n_init=10, random_state=random_state).fit_predict( X_filtered )