[go: up one dir, main page]

0% found this document useful (0 votes)
40 views2 pages

Airline Reservation

Download as txt, pdf, or txt
Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1/ 2

unsupervised learning algorithm - k means clustering -in python - using scikit

#Pandas is a Python library. Pandas is used to analyze data.

import pandas as pd

#Matplotlib is a comprehensive library for creating static, animated, and

interactive visualizations in Python.
import matplotlib.pyplot as plt

#Seaborn is more comfortable in handling Pandas data frames, seaborn uses

matplotlib to draw its plots.
import seaborn as sns
#Scikit-learn is probably the most useful library for machine learning in Python.
The sklearn library contains a lot of efficient tools for machine learning and
statistical modeling including classification, regression, clustering and
dimensionality reduction
from sklearn.cluster import KMeans

#style - whitegrid - graph will be in white with box

sns.set(context="paper",palette="Blues",style = 'whitegrid',font_scale

#to read data , passengerID will be the index column

data = pd.read_csv('E:\\DM\\passenger_details.csv',index_col='PassengerID')

#to print top 5 colomuns


#iloc -to locate the 4 th and 5th column , No Y value

X= data.iloc[:, [4,5]].values

#Empty list
wcss = []

#how many clusters to be form ( 1,5) means 10 cluster

for i in range(1,5):

#n_clusters = i =iterate one by one cluster

kmeans = KMeans(n_clusters = i,init = 'k-means+
+',max_iter=300,n_init=10,random_state =0)

#fiting X automatically

#to append kmeans.inertia


#plotting a figure

#ploting the wcss

plt.plot(range(1,5), wcss,marker='o',color='red')

#why elbow to find how many number of clusters are there in the given data
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
#kmeans++ - to iterate values
kmeans = KMeans(n_clusters = 2, init= 'k-means++')

#y_kmeans is to predict which values will be fit to which cluster

y_kmeans = kmeans.fit_predict(X)

#again plotting an cluster


#here Y values will be plot,The scatter() method in the matplotlib library is used
to draw a scatter plot
plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0,1], color = 'yellow', label =
'Cluster 1', s=15)
plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1,1], color = 'blue', label =
'Cluster 2', s=15)

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:,1], color =

label = 'Centroids', s=20,marker=',')


#Plot legends give meaning to a visualization, assigning meaning to the various

plot elements

print("\n Running Successfully")

You might also like