16/11/2024, 19:15 kmeans.
ipynb - Colab
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import manhattan_distances
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from scipy.spatial.distance import cityblock
import matplotlib.pyplot as plt
# Step 1: Load the dataset
from sklearn.datasets import load_iris
data = load_iris()
X = data.data
y = data.target
df = pd.DataFrame(X, columns=data.feature_names)
# Step 2: Define a custom K-Means class with Manhattan distance
class KMeansManhattan:
def __init__(self, n_clusters, max_iter=100):
self.n_clusters = n_clusters
self.max_iter = max_iter
def fit(self, X):
np.random.seed(42)
# Initialize centroids randomly from the data points
self.centroids = X[np.random.choice(X.shape[0], self.n_clusters, replace=False)]
for _ in range(self.max_iter):
# Assign clusters based on Manhattan distance
self.labels = np.array([np.argmin([cityblock(x, c) for c in self.centroids]) for x in X])
# Compute new centroids
new_centroids = np.array([
X[self.labels == i].mean(axis=0) if len(X[self.labels == i]) > 0 else self.centroids[i]
for i in range(self.n_clusters)
])
# Check for convergence
if np.all(new_centroids == self.centroids):
break
self.centroids = new_centroids
def predict(self, X):
return np.array([np.argmin([cityblock(x, c) for c in self.centroids]) for x in X])
# Step 3: Split the dataset into training and testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
# Step 4: Apply K-Means clustering
n_clusters = 3
kmeans = KMeansManhattan(n_clusters=n_clusters)
kmeans.fit(X_train)
kmeans_train_labels = kmeans.labels
kmeans_test_labels = kmeans.predict(X_test)
# Step 5: KNN Classification
knn = KNeighborsClassifier(n_neighbors=3, metric='manhattan')
knn.fit(X_train, kmeans_train_labels)
knn_test_labels = knn.predict(X_test)
# Step 6: Evaluate the results
kmeans_accuracy = accuracy_score(y_test, kmeans_test_labels)
knn_accuracy = accuracy_score(y_test, knn_test_labels)
# Step 7: Print results
print("K-Means Clustering and KNN Classification Results:")
print("=" * 50)
# K-Means Results
print("\n1. K-Means Clustering Results:")
print(f"Cluster Assignments for Test Samples: {kmeans_test_labels}")
print(f"K-Means Accuracy on Test Data: {kmeans_accuracy:.2f}")
https://colab.research.google.com/drive/13PN2Rhm-conVmf6zItrTBMptNxCaVonT#scrollTo=U40-DOabMp5G&printMode=true 1/2
16/11/2024, 19:15 kmeans.ipynb - Colab
# KNN Results
print("\n2. KNN Classification Results:")
print(f"KNN Predicted Labels for Test Samples: {knn_test_labels}")
print(f"KNN Accuracy on Test Data: {knn_accuracy:.2f}")
# Conclusion
print("\n3. Conclusion:")
print("K-Means and KNN results show the unsupervised nature of K-Means, and KNN helps evaluate the clusters.")
K-Means Clustering and KNN Classification Results:
==================================================
1. K-Means Clustering Results:
Cluster Assignments for Test Samples: [2 0 1 2 2 0 2 1 2 2 1 0 0 0 0 2 1 2 2 1 0 2 0 1 1 1 1 1 0 0 0 0 2 0 0 2 2
0 0 0 2 2 2 0 0]
K-Means Accuracy on Test Data: 0.49
2. KNN Classification Results:
KNN Predicted Labels for Test Samples: [2 0 1 2 2 0 2 1 2 2 1 0 0 0 0 2 1 2 2 1 0 2 0 1 1 1 1 1 0 0 0 0 2 0 0 2 2
0 0 0 2 2 1 0 0]
KNN Accuracy on Test Data: 0.51
3. Conclusion:
K-Means and KNN results show the unsupervised nature of K-Means, and KNN helps evaluate the clusters.
https://colab.research.google.com/drive/13PN2Rhm-conVmf6zItrTBMptNxCaVonT#scrollTo=U40-DOabMp5G&printMode=true 2/2