program-8
December 7, 2023
0.0.1 Implement KMeans and DBSCAN algorithm using appropriate Data sets.
[5]: import numpy as np import matplotlib.pyplot as plt import
pandas as pd from sklearn.cluster import DBSCAN data =
pd.read_csv("Mall_Customers.csv") data.head() print("Dataset
shape:", data.shape) data.isnull().any().any() x =
data.loc[:, ['Annual Income (k$)','Spending Score (1-
100)']].values
# cluster the data into five clusters
dbscan = DBSCAN(eps = 8, min_samples =
4).fit(x)
# fitting the model
labels = dbscan.labels_ # getting the labels
plt.scatter(x[:, 0], x[:,1], c = labels, cmap= "plasma")
# plotting the clusters
plt.xlabel("Income") # X-axis label
plt.ylabel("Spending Score") # Y-axis
label plt.show() # showing the plot
Dataset shape: (200, 5)
[6]: import numpy as nm
import matplotlib.pyplot as mtp
import pandas as pd
# Importing the dataset
dataset = pd.read_csv('Mall_Customers.csv')
[7]: dataset
[7]: CustomerID Gender Age Annual Income (k$) Spending Score (1-100)
0 1 Male 19 15 39
1 2 Male 21 15 81
2 3 Female 20 16 6
3 4 Female 23 16 77
1
4 5 Female 31 17 40
.. … … … … …
195 196 Female 35 120 79
196 197 Female 45 126 28
197 198 Male 32 126 74
198 199 Male 32 137 18
199 200 Male 30 137 83
[200 rows x 5 columns]
[9]:
[8]: x = dataset.iloc[:, [3, 4]].values
x
[9]: array([[ 15,
39], [ 15,
81],
[ 16, 6],
[ 16, 77],
[ 17, 40],
[ 17, 76],
[ 18, 6],
[ 18, 94],
[ 19, 3],
[ 19, 72],
[ 19, 14],
[ 19, 99],
[ 20, 15],
[ 20, 77],
[ 20, 13],
[ 20, 79],
[ 21, 35],
[ 21, 66],
[ 23, 29],
[ 23, 98],
[ 24, 35],
[ 24, 73],
[ 25, 5],
[ 25, 73],
[ 28, 14],
[ 28, 82],
2
[ 28, 32],
[ 28, 61],
[ 29, 31],
[ 29, 87],
[ 30, 4],
[ 30, 73],
[ 33, 4],
[ 33, 92],
[ 33, 14],
[ 33, 81],
[ 34, 17],
[ 34, 73],
[ 37, 26],
[ 37, 75],
[ 38, 35],
[ 38, 92],
[ 39, 36],
[ 39, 61],
[ 39, 28],
[ 39, 65],
[ 40, 55],
[ 40, 47],
[ 40, 42],
[ 40, 42],
[ 42, 52],
[ 42, 60],
[ 43, 54],
[ 43, 60],
[ 43, 45],
[ 43, 41],
[ 44, 50],
[ 44, 46],
[ 46, 51],
[ 46, 46],
[ 46, 56],
[ 46, 55],
[ 47, 52],
[ 47, 59],
[ 48, 51],
[ 48, 59],
[ 48, 50],
[ 48, 48],
[ 48, 59],
[ 48, 47],
[ 49, 55],
[ 49, 42],
3
[ 50, 49],
[ 50, 56],
[ 54, 47],
[ 54, 54],
[ 54, 53],
[ 54, 48],
[ 54, 52],
[ 54, 42],
[ 54, 51],
[ 54, 55],
[ 54, 41],
[ 54, 44],
[ 54, 57],
[ 54, 46],
[ 57, 58], [ 57, 55],
[ 58, 60],
[ 58, 46],
[ 59, 55],
[ 59, 41],
[ 60, 49],
[ 60, 40],
[ 60, 42],
[ 60, 52],
[ 60, 47],
[ 60, 50],
[ 61, 42],
[ 61, 49],
[ 62, 41],
[ 62, 48],
[ 62, 59],
[ 62, 55],
[ 62, 56],
[ 62, 42],
[ 63, 50],
[ 63, 46],
[ 63, 43],
[ 63, 48],
[ 63, 52],
[ 63, 54],
[ 64, 42],
[ 64, 46],
[ 65, 48],
[ 65, 50],
[ 65, 43],
[ 65, 59],
[ 67, 43],
4
[ 67, 57],
[ 67, 56],
[ 67, 40],
[ 69, 58],
[ 69, 91],
[ 70, 29],
[ 70, 77],
[ 71, 35],
[ 71, 95],
[ 71, 11],
[ 71, 75],
[ 71, 9],
[ 71, 75],
[ 72, 34],
[ 72, 71], [ 73, 5],
[ 73, 88],
[ 73, 7],
[ 73, 73],
[ 74, 10],
[ 74, 72],
[ 75, 5],
[ 75, 93],
[ 76, 40],
[ 76, 87],
[ 77, 12],
[ 77, 97],
[ 77, 36],
[ 77, 74],
[ 78, 22],
[ 78, 90],
[ 78, 17],
[ 78, 88],
[ 78, 20],
[ 78, 76],
[ 78, 16],
[ 78, 89],
[ 78, 1],
[ 78, 78],
[ 78, 1],
[ 78, 73],
[ 79, 35],
[ 79, 83],
[ 81, 5],
[ 81, 93],
[ 85, 26],
[ 85, 75],
5
[ 86, 20],
[ 86, 95],
[ 87, 27],
[ 87, 63],
[ 87, 13],
[ 87, 75],
[ 87, 10],
[ 87, 92],
[ 88, 13],
[ 88, 86],
[ 88, 15],
[ 88, 69],
[ 93, 14],
[ 93, 90],
[ 97, 32], [ 97, 86],
[ 98, 15],
[ 98, 88],
[ 99, 39],
[ 99, 97],
[101, 24],
[101, 68],
[103, 17],
[103, 85],
[103, 23],
[103, 69],
[113, 8],
[113, 91],
[120, 16],
[120, 79],
[126, 28],
[126, 74],
[137, 18],
[137, 83]], dtype=int64)
[10]: #finding optimal number of clusters using the elbow
method from sklearn.cluster import KMeans
wcss_list= [] #Initializing the list for the values
of WCSS
#Using for loop for iterations from 1 to 10. for i in
range(1, 11): kmeans = KMeans(n_clusters=i, init='k-means+
+', random_state= 42) kmeans.fit(x)
wcss_list.append(kmeans.inertia_)
mtp.plot(range(1, 11),
wcss_list) mtp.title('The
Elobw Method Graph')
6
mtp.xlabel('Number of
clusters(k)')
mtp.ylabel('wcss_list')
mtp.show()
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
7
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436:
UserWarning: KMeans is known to have a memory leak on Windows with
MKL, when there are less chunks than available threads. You can
avoid it by setting the environment variable OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
8
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory leak
on Windows with MKL, when there are less chunks than available
threads. You can avoid it by setting the environment variable
OMP_NUM_THREADS=1.
warnings.warn(
9
[11]: #training the K-means model on a dataset kmeans =
KMeans(n_clusters=5, init='k-means++', random_state= 42)
y_predict= kmeans.fit_predict(x)
#visulaizing the clusters mtp.scatter(x[y_predict == 0, 0],
x[y_predict == 0, 1], s = 100, c = 'blue',␣
↪label = 'Cluster 1') #for first cluster mtp.scatter(x[y_predict
== 1, 0], x[y_predict == 1, 1], s = 100, c = 'green',␣
↪label = 'Cluster 2') #for second cluster
mtp.scatter(x[y_predict== 2, 0], x[y_predict == 2, 1], s = 100, c
= 'red',␣
↪label = 'Cluster 3') #for third cluster mtp.scatter(x[y_predict
== 3, 0], x[y_predict == 3, 1], s = 100, c = 'cyan',␣
↪label = 'Cluster 4') #for fourth cluster mtp.scatter(x[y_predict ==
4, 0], x[y_predict == 4, 1], s = 100, c = 'magenta',␣
↪label = 'Cluster 5') #for fifth cluster
mtp.scatter(kmeans.cluster_centers_[:, 0],
kmeans.cluster_centers_[:, 1], s =␣
10
↪300, c = 'yellow', label = 'Centroid')
mtp.title('Clusters of customers')
mtp.xlabel('Annual Income (k$)')
mtp.ylabel('Spending Score (1-100)')
mtp.legend()
mtp.show()
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1412:
FutureWarning: The default value of `n_init` will change from 10 to
'auto' in
1.4. Set the value of `n_init` explicitly to suppress the warning
super()._check_params_vs_input(X, default_n_init=10)
C:\Users\shilpa\anaconda3\Lib\site-packages\sklearn\cluster\
_kmeans.py:1436: UserWarning: KMeans is known to have a memory
leak on Windows with MKL, when there are less chunks than
available threads. You can avoid it by setting the environment
variable OMP_NUM_THREADS=1. warnings.warn(
[ ]:
11