0% found this document useful (0 votes)

39 views8 pages

ML Python Exercises UOM BDS Cluster Analysis

Uploaded by

metapi5906

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

39 views8 pages

ML Python Exercises UOM BDS Cluster Analysis

Uploaded by

metapi5906

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 8

CHAPTER 4 – CLUSTER ANALYSIS

1. PARTITIONING METHODS :[pg.no:87]

Partitioning methods are a widely used family of clustering algorithms in data mining that aim to
partition a dataset into K clusters.
Program:

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

data = {'x': [25, 34, 22, 27, 33, 33, 31, 22, 35, 34, 67, 54, 57, 43, 50, 57, 59, 52, 65, 47, 49, 48, 35,
33, 44, 45,

38,43,51,46],

'y': [79, 51, 53, 78, 59, 74, 73, 57, 69, 75, 51, 32, 40, 47, 53, 36, 35, 58, 59, 50, 25, 20, 14, 12,
20, 5, 29,

27, 8, 7]}

df = pd.DataFrame(data, columns=['x', 'y'])

# K-means Clustering

kmeans = KMeans(n_clusters=3).fit(df)

centroids = kmeans.cluster_centers_

print(centroids)

labels = kmeans.labels_

plt.scatter(df['x'], df['y'], c=labels.astype(float), s=50)

plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=50)

plt.show()

OUTPUT
2. AGGLOMERATIVE CLUSTERING :[pg.no:96]
Agglomerative clustering: Commonly referred to as AGNES (AGglomerative NESting) works in a
bottom-up manner. That is, each observation is initially considered as a single-element cluster
(leaf). At each step of the algorithm, the two clusters that are the most similar are combined into
a new bigger cluster (nodes).
PROGRAM

import matplotlib.pyplot as plt

import numpy as np

from sklearn.cluster import AgglomerativeClustering

import scipy.cluster.hierarchy as shc

X = np.array([[2, 8], [8, 15], [3, 6], [6, 9], [8, 7], [10, 10]])

# Agglomerative Clustering

cluster = AgglomerativeClustering(n_clusters=3,

affinity='euclidean', linkage='ward')

labels = cluster.fit_predict(X)

print(labels)

# Drawing Dendrograms

plt.figure(figsize=(10, 7))

plt.title("Employee Skill Dendrograms")

dend = shc.dendrogram(shc.linkage(X, method='ward'))

plt.show()
OUTPUT

[1 2 1 0 0 0]

3. Balanced Iterative Reducing Clustering Using Hierarchies (BIRCH):[pg.no:98]

It appropriate for very large datasets or for streaming data, because of its ability to find a good
clustering solution with only a single scan of the data.
PROGRAM

from sklearn.cluster import Birch

X = [[0, 1], [0.3, 1], [-0.3, 1], [0, -1], [0.3, -1], [-0.3, -1]]

brc = Birch(branching_factor=50, n_clusters=None,

threshold=0.5, compute_labels=True)

brc.fit(X)

print(brc.predict(X))

OUTPUT

[0 0 0 1 1 1]

4. Density-based clustering (DBSCAN) :[pg.no:101-103]

It locates regions of high density that are separated from one another by regions of low density.

PROGRAM

import numpy as np

from sklearn.cluster import DBSCAN

from sklearn import metrics

from sklearn.datasets import make_blobs

from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

# Generate sample data

centers = [[1, 1] , [-1,-1], [1, -1]]

X, labels_true = make_blobs(n_samples=750, centers=centers,

cluster_std=0.4, random_state=0)

X = StandardScaler().fit_transform(X)

# Compute DBSCAN

db = DBSCAN(eps=0.3, min_samples=10).fit(X)

core_samples_mask = np.zeros_like(db.labels_, dtype=bool)

core_samples_mask[db.core_sample_indices_] = True

labels = db.labels_

# Number of clusters in labels, ignoring noise if present.

n_clusters = len(set(labels)) - (1 if -1 in labels else 0)

n_noise = list(labels).count(-1)

print('Estimated number of clusters: %d' % n_clusters)

print('Estimated number of noise points: %d' % n_noise)

print("Homogeneity: %0.3f" %

metrics.homogeneity_score(labels_true, labels))

print("Completeness: %0.3f" %

metrics.completeness_score(labels_true, labels))

print("V-measure: %0.3f" %

metrics.v_measure_score(labels_true, labels))

print("Adjusted Rand Index: %0.3f" %

metrics.adjusted_rand_score(labels_true, labels))

print("Adjusted Mutual Information: %0.3f" %

metrics.adjusted_mutual_info_score(labels_true, labels))

print("Silhouette Coefficient: %0.3f" %

metrics.silhouette_score(X, labels))

# Plot result
# Black removed and is used for noise instead.

unique_labels = set(labels)

colors = plt.cm.Spectral(np.linspace(0, 1,

len(unique_labels)))

for k, col in zip(unique_labels, colors):

if k == -1:

# Black used for noise.

col = [0, 0, 0, 1]

class_member_mask = (labels == k)

xy = X[class_member_mask & core_samples_mask]

plt.plot(xy[:, 0], xy[:, 1], 'o',

markerfacecolor=tuple(col), markeredgecolor='k',

markersize=14)

xy = X[class_member_mask & ~core_samples_mask]

plt.plot(xy[:, 0], xy[:, 1], 'o',

markerfacecolor=tuple(col), markeredgecolor='k',

markersize=6)

plt.title('Estimated number of clusters: %d' % n_clusters)

plt.show()

OUTPUT

Estimated number of clusters: 3

Estimated number of noise points: 18

Homogeneity: 0.953

Completeness: 0.883

V-measure: 0.917

Adjusted Rand Index: 0.952

Adjusted Mutual Information: 0.916

Silhouette Coefficient: 0.626

5. OPTICS CLUSTERING VS DBSCAN CLUSTERING :[pg.no:105-106]
DBSCAN algorithm assumes the density of the clusters as constant, whereas the OPTICS
algorithm allows a varying density of the clusters.
PROGRAM

from sklearn.cluster import OPTICS, cluster_optics_dbscan

import matplotlib.gridspec as gridspec

import matplotlib.pyplot as plt

import numpy as np

# Generate sample data

np.random.seed(0)

n_points_per_cluster = 250

C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)

C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)

C3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)

C4 = [-2, 3] + .3 * np.random.randn(n_points_per_cluster, 2)

C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)

C6 = [5, 6] + 2 * np.random.randn(n_points_per_cluster, 2)

X = np.vstack((C1, C2, C3, C4, C5, C6))

clust = OPTICS(min_samples=50, xi=0.05,

min_cluster_size=0.05)

# Run the fit

clust.fit(X)

labels_050 = cluster_optics_dbscan(reachability=clust.reachability_,

core_distances=clust.core_distances_,

ordering=clust.ordering_,

eps=0.5)

labels_200 = cluster_optics_dbscan(reachability=clust.reachability_,
core_distances=clust.core_distances_,

ordering=clust.ordering_,

eps=2)

space = np.arange(len(X))

reachability = clust.reachability_[clust.ordering_]

labels = clust.labels_[clust.ordering_]

plt.figure(figsize=(10, 7))

G = gridspec.GridSpec(2, 3)

ax1 = plt.subplot(G[0, :])

ax2 = plt.subplot(G[1, 0])

# Reachability plot

colors = ['g.', 'r.', 'b.', 'y.', 'c.']

for klass, color in zip(range(0, 5), colors):

Xk = space[labels == klass]

Rk = reachability[labels == klass]

ax1.plot(Xk, Rk, color, alpha=0.3)

ax1.plot(space[labels == -1], reachability[labels == -1], 'k.',

alpha=0.3)

ax1.plot(space, np.full_like(space, 2., dtype=float), 'k-',

alpha=0.5)

ax1.plot(space, np.full_like(space, 0.5, dtype=float), 'k-.',

alpha=0.5)

ax1.set_ylabel('Reachability (epsilon distance)')

ax1.set_title('Reachability Plot')

# OPTICS

colors = ['g.', 'r.', 'b.', 'y.', 'c.']

for klass, color in zip(range(0, 5), colors):

Xk = X[clust.labels_ == klass]

ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)

ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], 'k.', alpha=0.1)

ax2.set_title('Automatic Clustering\nOPTICS')
plt.tight_layout()

plt.show()

OUTPUT

4.cluster Analysis
No ratings yet
4.cluster Analysis
7 pages
Unit 3 Unsupervised Learning
No ratings yet
Unit 3 Unsupervised Learning
9 pages
Clustering
No ratings yet
Clustering
7 pages
1 ST
No ratings yet
1 ST
11 pages
Chatgpt Unit - 4
No ratings yet
Chatgpt Unit - 4
4 pages
Week 8 DS Practical
No ratings yet
Week 8 DS Practical
13 pages
Partition
No ratings yet
Partition
52 pages
HTCB Unit 5
No ratings yet
HTCB Unit 5
3 pages
Dmaclat4 Merged
No ratings yet
Dmaclat4 Merged
46 pages
Clustering
No ratings yet
Clustering
45 pages
Clustering
No ratings yet
Clustering
1 page
Baidurya Debnath 4
No ratings yet
Baidurya Debnath 4
37 pages
ML0101EN Clus DBSCN Weather Py v1
No ratings yet
ML0101EN Clus DBSCN Weather Py v1
16 pages
Machine Learning Unit-4
No ratings yet
Machine Learning Unit-4
24 pages
Machine Learning
No ratings yet
Machine Learning
23 pages
Data Mining Unit-Iv
No ratings yet
Data Mining Unit-Iv
34 pages
Cheat Sheet-Building Unsupervised Learning Models
No ratings yet
Cheat Sheet-Building Unsupervised Learning Models
3 pages
Clustering
No ratings yet
Clustering
11 pages
Session 3: Clustering Techniques - Partitioning & Hierarchical Methods
No ratings yet
Session 3: Clustering Techniques - Partitioning & Hierarchical Methods
27 pages
Cluster Analysis
No ratings yet
Cluster Analysis
22 pages
Unit 4 Cluster Analysis 3
No ratings yet
Unit 4 Cluster Analysis 3
20 pages
Cluster Analysis - Approach 1
No ratings yet
Cluster Analysis - Approach 1
28 pages
From Import Import As Import As From Import From Import From Import From Import
No ratings yet
From Import Import As Import As From Import From Import From Import From Import
9 pages
Data Enggineering
No ratings yet
Data Enggineering
16 pages
Clustering I Ws
No ratings yet
Clustering I Ws
11 pages
23CC554
No ratings yet
23CC554
10 pages
Experiment 4 1
No ratings yet
Experiment 4 1
4 pages
DWDM Unit 3
No ratings yet
DWDM Unit 3
21 pages
Clustering in Machine Learning
No ratings yet
Clustering in Machine Learning
4 pages
Clustering in Python-Dr. Afsaneh Javadi
No ratings yet
Clustering in Python-Dr. Afsaneh Javadi
8 pages
K-Means Clustering Explained
No ratings yet
K-Means Clustering Explained
10 pages
Data Mining - Lecture 9
No ratings yet
Data Mining - Lecture 9
29 pages
Introduction To Cluster Analysis.
No ratings yet
Introduction To Cluster Analysis.
53 pages
DWDM Lab All
No ratings yet
DWDM Lab All
20 pages
SE KMeansClustering
No ratings yet
SE KMeansClustering
21 pages
DM Clustering UNIT4
No ratings yet
DM Clustering UNIT4
36 pages
Clustering Algorithms CheatSheet
No ratings yet
Clustering Algorithms CheatSheet
6 pages
Clustering Algorithms Overview
No ratings yet
Clustering Algorithms Overview
6 pages
Module 3 Clustering
No ratings yet
Module 3 Clustering
57 pages
DOC-20231118-WA0008new Unit 5
No ratings yet
DOC-20231118-WA0008new Unit 5
15 pages
Cluster Analysis Methods Guide
100% (1)
Cluster Analysis Methods Guide
21 pages
Ambo University: Inistitute of Technology
No ratings yet
Ambo University: Inistitute of Technology
15 pages
ML - 8
No ratings yet
ML - 8
70 pages
ML Exp5 C36
No ratings yet
ML Exp5 C36
18 pages
Clustering Part2
No ratings yet
Clustering Part2
29 pages
476 Emt Abstract
No ratings yet
476 Emt Abstract
3 pages
Exp 6
No ratings yet
Exp 6
10 pages
FAI Lecture - 9-10-2023 PDF
No ratings yet
FAI Lecture - 9-10-2023 PDF
16 pages
Amlt Bca Unit-3
No ratings yet
Amlt Bca Unit-3
7 pages
Clustering Techniques and Their Applications in Engineering
100% (1)
Clustering Techniques and Their Applications in Engineering
16 pages
Cluster Analysis and Methods Overview
No ratings yet
Cluster Analysis and Methods Overview
47 pages
Unit 5 DM
No ratings yet
Unit 5 DM
47 pages
Cluster Analysis
No ratings yet
Cluster Analysis
18 pages
ML 2.3 Prashant
No ratings yet
ML 2.3 Prashant
4 pages
Sathyabama Institute of Science and Technology SIT1301-Data Mining and Warehousing
No ratings yet
Sathyabama Institute of Science and Technology SIT1301-Data Mining and Warehousing
22 pages
Lecture Notes On Clustering
No ratings yet
Lecture Notes On Clustering
10 pages
Clustering
No ratings yet
Clustering
2 pages
Agmas Getenet
No ratings yet
Agmas Getenet
75 pages
Lesson 09 - Introduction To Model Building
No ratings yet
Lesson 09 - Introduction To Model Building
85 pages
K Means Clustering for Students
No ratings yet
K Means Clustering for Students
3 pages
Literature 04
No ratings yet
Literature 04
5 pages
Defect Detection of Composite Material
No ratings yet
Defect Detection of Composite Material
14 pages
Optimal K-Means Clustering on Iris
No ratings yet
Optimal K-Means Clustering on Iris
4 pages
Imbalanced K-Means Clustering Algorithm
No ratings yet
Imbalanced K-Means Clustering Algorithm
9 pages
Rtmnu Machine Learning Paper Winter 2024
100% (1)
Rtmnu Machine Learning Paper Winter 2024
4 pages
11-12-K Means Using SPSS
No ratings yet
11-12-K Means Using SPSS
4 pages
Final Report - Smart and Fast Email Sorting: 1 Project's Description
No ratings yet
Final Report - Smart and Fast Email Sorting: 1 Project's Description
5 pages
(S1 IJEECS 2021 Rohit Chivukula) Classifying Clinically KNN and SVM
No ratings yet
(S1 IJEECS 2021 Rohit Chivukula) Classifying Clinically KNN and SVM
8 pages
Tarp Da 5
No ratings yet
Tarp Da 5
29 pages
Neural Networks & Machine Learning: Worksheet 3
No ratings yet
Neural Networks & Machine Learning: Worksheet 3
3 pages
Weather Patterns Analysis and Prediction
No ratings yet
Weather Patterns Analysis and Prediction
8 pages
Unsupervised Learning: K-Means & GMM
No ratings yet
Unsupervised Learning: K-Means & GMM
27 pages
Industrial Training Report
No ratings yet
Industrial Training Report
31 pages
Cluster Analysis
No ratings yet
Cluster Analysis
5 pages
Unsupervised Learning in Deep Learning
No ratings yet
Unsupervised Learning in Deep Learning
51 pages
AIML Unit 4
No ratings yet
AIML Unit 4
26 pages
Tech Conference Program Guide
No ratings yet
Tech Conference Program Guide
51 pages
Ai Fundamental Midterm Quizzes - Jei
No ratings yet
Ai Fundamental Midterm Quizzes - Jei
48 pages
Fuzzy Classification
No ratings yet
Fuzzy Classification
12 pages
A Global Averaging Method For Dynamictime Warping, With Applications To Clustering
No ratings yet
A Global Averaging Method For Dynamictime Warping, With Applications To Clustering
16 pages
Computer Vision: Chapter 5. Segmentation
100% (1)
Computer Vision: Chapter 5. Segmentation
16 pages
KMeansPP Soda
No ratings yet
KMeansPP Soda
9 pages
Crime Data Analysis and Prediction
No ratings yet
Crime Data Analysis and Prediction
33 pages
Optimized and Efficient Color Prediction Algorithm
No ratings yet
Optimized and Efficient Color Prediction Algorithm
25 pages
ENVI Tutorial 2
No ratings yet
ENVI Tutorial 2
15 pages
ML Unit Wise Important Questions
No ratings yet
ML Unit Wise Important Questions
2 pages
Chapter 8 - Clustering
No ratings yet
Chapter 8 - Clustering
42 pages

ML Python Exercises UOM BDS Cluster Analysis

Uploaded by

ML Python Exercises UOM BDS Cluster Analysis

Uploaded by

CHAPTER 4 – CLUSTER ANALYSIS

1. PARTITIONING METHODS :[pg.no:87]

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

df = pd.DataFrame(data, columns=['x', 'y'])

plt.scatter(df['x'], df['y'], c=labels.astype(float), s=50)

plt.scatter(centroids[:, 0], centroids[:, 1], c='red', s=50)

import matplotlib.pyplot as plt

from sklearn.cluster import AgglomerativeClustering

import scipy.cluster.hierarchy as shc

plt.title("Employee Skill Dendrograms")

dend = shc.dendrogram(shc.linkage(X, method='ward'))

3. Balanced Iterative Reducing Clustering Using Hierarchies (BIRCH):[pg.no:98]

from sklearn.cluster import Birch

brc = Birch(branching_factor=50, n_clusters=None,

4. Density-based clustering (DBSCAN) :[pg.no:101-103]

from sklearn.cluster import DBSCAN

from sklearn import metrics

from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

# Generate sample data

centers = [[1, 1] , [-1,-1], [1, -1]]

X, labels_true = make_blobs(n_samples=750, centers=centers,

core_samples_mask = np.zeros_like(db.labels_, dtype=bool)

# Number of clusters in labels, ignoring noise if present.

n_clusters = len(set(labels)) - (1 if -1 in labels else 0)

print('Estimated number of clusters: %d' % n_clusters)

print('Estimated number of noise points: %d' % n_noise)

print("Adjusted Rand Index: %0.3f" %

print("Adjusted Mutual Information: %0.3f" %

print("Silhouette Coefficient: %0.3f" %

for k, col in zip(unique_labels, colors):

# Black used for noise.

xy = X[class_member_mask & core_samples_mask]

plt.plot(xy[:, 0], xy[:, 1], 'o',

xy = X[class_member_mask & ~core_samples_mask]

plt.plot(xy[:, 0], xy[:, 1], 'o',

plt.title('Estimated number of clusters: %d' % n_clusters)

Estimated number of clusters: 3

Estimated number of noise points: 18

Adjusted Rand Index: 0.952

Adjusted Mutual Information: 0.916

Silhouette Coefficient: 0.626

from sklearn.cluster import OPTICS, cluster_optics_dbscan

import matplotlib.gridspec as gridspec

import matplotlib.pyplot as plt

# Generate sample data

C1 = [-5, -2] + .8 * np.random.randn(n_points_per_cluster, 2)

C2 = [4, -1] + .1 * np.random.randn(n_points_per_cluster, 2)

C3 = [1, -2] + .2 * np.random.randn(n_points_per_cluster, 2)

C5 = [3, -2] + 1.6 * np.random.randn(n_points_per_cluster, 2)

X = np.vstack((C1, C2, C3, C4, C5, C6))

clust = OPTICS(min_samples=50, xi=0.05,

# Run the fit

ax1 = plt.subplot(G[0, :])

ax2 = plt.subplot(G[1, 0])

colors = ['g.', 'r.', 'b.', 'y.', 'c.']

for klass, color in zip(range(0, 5), colors):

ax1.plot(Xk, Rk, color, alpha=0.3)

ax1.plot(space[labels == -1], reachability[labels == -1], 'k.',

ax1.plot(space, np.full_like(space, 2., dtype=float), 'k-',

ax1.plot(space, np.full_like(space, 0.5, dtype=float), 'k-.',

ax1.set_ylabel('Reachability (epsilon distance)')

colors = ['g.', 'r.', 'b.', 'y.', 'c.']

for klass, color in zip(range(0, 5), colors):

ax2.plot(Xk[:, 0], Xk[:, 1], color, alpha=0.3)

ax2.plot(X[clust.labels_ == -1, 0], X[clust.labels_ == -1, 1], 'k.', alpha=0.1)

You might also like