[go: up one dir, main page]

0% found this document useful (0 votes)
40 views2 pages

Airline Reservation

Download as txt, pdf, or txt
Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1/ 2

unsupervised learning algorithm - k means clustering -in python - using scikit

#Pandas is a Python library. Pandas is used to analyze data.


import pandas as pd

#Matplotlib is a comprehensive library for creating static, animated, and


interactive visualizations in Python.
import matplotlib.pyplot as plt

#Seaborn is more comfortable in handling Pandas data frames, seaborn uses


matplotlib to draw its plots.
import seaborn as sns
#Scikit-learn is probably the most useful library for machine learning in Python.
The sklearn library contains a lot of efficient tools for machine learning and
statistical modeling including classification, regression, clustering and
dimensionality reduction
from sklearn.cluster import KMeans

#style - whitegrid - graph will be in white with box


sns.set(context="paper",palette="Blues",style = 'whitegrid',font_scale
=1,color_codes=True)

#to read data , passengerID will be the index column


data = pd.read_csv('E:\\DM\\passenger_details.csv',index_col='PassengerID')

#to print top 5 colomuns


print(data.head())

#iloc -to locate the 4 th and 5th column , No Y value


X= data.iloc[:, [4,5]].values
print(X)

#Empty list
wcss = []

#how many clusters to be form ( 1,5) means 10 cluster


for i in range(1,5):

#n_clusters = i =iterate one by one cluster


kmeans = KMeans(n_clusters = i,init = 'k-means+
+',max_iter=300,n_init=10,random_state =0)

#fiting X automatically
kmeans.fit(X)

#to append kmeans.inertia


wcss.append(kmeans.inertia_)

#plotting a figure
plt.figure(figsize=(10,5))

#ploting the wcss


plt.plot(range(1,5), wcss,marker='o',color='red')

#why elbow to find how many number of clusters are there in the given data
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
#kmeans++ - to iterate values
kmeans = KMeans(n_clusters = 2, init= 'k-means++')

#y_kmeans is to predict which values will be fit to which cluster


y_kmeans = kmeans.fit_predict(X)

#again plotting an cluster


plt.figure(figsize=(15,7))

#here Y values will be plot,The scatter() method in the matplotlib library is used
to draw a scatter plot
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0,1], color = 'yellow', label =
'Cluster 1', s=15)
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1,1], color = 'blue', label =
'Cluster 2', s=15)

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:,1], color =


'red',
label = 'Centroids', s=20,marker=',')

plt.grid(False)
plt.title('Cluster_of_passenger')
plt.xlabel('Customer_ratings'),
plt.ylabel('Age_of_passenger')

#Plot legends give meaning to a visualization, assigning meaning to the various


plot elements
plt.legend()
plt.show()

print("\n Running Successfully")

You might also like