[go: up one dir, main page]

0% found this document useful (0 votes)
33 views17 pages

3final ML Lab Manual

The document discusses implementing clustering algorithms like K-Means and EM on a dataset. It loads a CSV dataset and performs K-Means clustering. It then trains an EM model on the same data. It compares the results of the two algorithms and comments on the quality of clustering provided by each.

Uploaded by

fakersspot.1412
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
33 views17 pages

3final ML Lab Manual

The document discusses implementing clustering algorithms like K-Means and EM on a dataset. It loads a CSV dataset and performs K-Means clustering. It then trains an EM model on the same data. It compares the results of the two algorithms and comments on the quality of clustering provided by each.

Uploaded by

fakersspot.1412
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 17

DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

1. Write a LISP program to solve the water-jug problem using heuristic function.
j1=0
j2=0
x=4
y=3
print("initial state=(0,0)")
print("final state=(2,0)")
print("capabilities=(4,3)")
while(j1!=2):
r=int(input("enter rule:"))
if(r==1):
j1=x
elif(r==2):
j2=y
elif(r==3):
j1=0
elif(r==4):
j2=0
elif(r==5):
t=y-j2
j1-=t
j2=y
if(j1<0):
j1=0
elif(r==6):
t=x-j1
j1=x
j2-=t
if(j2<0):
j2=0
elif(r==7):
j2+=j1
j1=0
if(j2<y):
j2=y
elif(r==8):
j1+=j2
j2=0
if(j1>x):
j1=x
print (j1,j2)

1
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

Output

initial state=(0,0)
final state=(2,0)
capabilities=(4,3)
enter rule:2
03
enter rule:8
30
enter rule:2
33
enter rule:6
42
enter rule:3
02
enter rule:8
20

2
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

2. Write a program to use of BEST-FIRST SEARCH applied to the eight puzzle problem.
class Solution:
def solve(self, board):
dict = {}
flatten = []
for i in range(len(board)):
flatten += board[i]
flatten = tuple(flatten)
dict[flatten] = 0
if flatten == (0, 1, 2, 3, 4, 5, 6, 7, 8):
return 0
return self.get_paths(dict)
def get_paths(self, dict):
cnt = 0
while True:
current_nodes = [x for x in dict if dict[x] == cnt]
if len(current_nodes) == 0:
return -1
for node in current_nodes:
next_moves = self.find_next(node)
for move in next_moves:
if move not in dict:
dict[move] = cnt + 1
if move == (0, 1, 2, 3, 4, 5, 6, 7, 8):
return cnt + 1
cnt += 1
def find_next(self, node):
moves = {
0: [1, 3],
1: [0, 2, 4],
2: [1, 5],
3: [0, 4, 6],
4: [1, 3, 5, 7],
5: [2, 4, 8],
6: [3, 7],

3
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

7: [4, 6, 8],
8: [5, 7],
}
results = []
pos_0 = node.index(0)
for move in moves[pos_0]:
new_node = list(node)
new_node[move], new_node[pos_0] = new_node[pos_0], new_node[move]
results.append(tuple(new_node))
return results
ob = Solution()
matrix = [
[3, 1, 2],
[4, 7, 5],
[6, 8, 0]
]
print(ob.solve(matrix))

Input

matrix = [
[3, 1, 2],
[4, 7, 5],
[6, 8, 0] ]

Output 4

4
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

3. Implement A* Search Algorithm using Python.

def aStarAlgo(start_node, stop_node):


open_set = set(start_node)
closed_set = set()
g = {}
parents = {}
g[start_node] = 0
parents[start_node] = start_node
while len(open_set) > 0:
n = None
for v in open_set:
if n == None or g[v] + heuristic(v) < g[n] + heuristic(n):
n=v
if n == stop_node or Graph_nodes[n] == None:
pass
else:
for (m, weight) in get_neighbors(n):
if m not in open_set and m not in closed_set:
open_set.add(m)
parents[m] = n
g[m] = g[n] + weight
else:
if g[m] > g[n] + weight:
g[m] = g[n] + weight
parents[m] = n
if m in closed_set:
closed_set.remove(m)
open_set.add(m)
if n == None:
print('Path does not exist!')
return None
if n == stop_node:
path = []
while parents[n] != n:
path.append(n)
n = parents[n]
path.append(start_node)
path.reverse()
print('Path found: {}'.format(path))
return path
open_set.remove(n)
closed_set.add(n)
print('Path does not exist!')
return None
def get_neighbors(v):
if v in Graph_nodes:
return Graph_nodes[v]
else:
return None

5
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

def heuristic(n):
H_dist = {
'A': 11,
'B': 6,
'C': 99,
'D': 1,
'E': 7,
'G': 0,

return H_dist[n

Graph_nodes = {
'A': [('B', 2), ('E', 3)],
'B': [('C', 1),('G', 9)],
'C': None,
'E': [('D', 6)],
'D': [('G', 1)],

}
aStarAlgo('A', 'G')

Output:

6
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

4. For a given set of training data examples stored in a .CSV file, implement and demonstrate the
Candidate-Elimination algorithm to output a description of the set of all hypotheses consistent with
the training examples.
import csv
a=[]
with open("sports.csv","r") as csvfile:
fdata=csv.reader(csvfile)
for x in fdata:
a.append(x)
print(x)

num_att=len(a[0])-1

s=['0']*num_att
g=['?']*num_att

print(s)
print(g)
temp=[]

for i in range(0,num_att):
s[i]=a[1][i]
print(s)
print('_______')

for i in range(1,len(a)):

if a[i][num_att]=='yes':
for j in range(0,num_att):
if s[j]!=a[i][j]:
s[j]='?'

for j in range(0,num_att):
for k in range(0,len(temp)):
if temp[k][j]!=s[j] and temp[k][j]!='?':
del temp[k]

if a[i][num_att]=='no':
for j in range(0,num_att):
if a[i][j]!=s[j] and s[j]!='?':
g[j]=s[j]
temp.append(g)
g=['?']*num_att
print(s)

7
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

if len(temp)== 0:
print(g)
else:
print(temp)
print('_______')

output

['sky', 'air_temp', 'humidity', 'wind', 'water', 'forecast', 'enjoy_sport']


['sunny', 'warm', 'normal', 'strong', 'warm', 'same', 'yes']
['sunny', 'warm', 'high', 'strong', 'warm', 'same', 'yes']
['rainy', 'cold', 'high', 'strong', 'warm', 'change', 'no']
['sunny', 'warm', 'high', 'strong', 'cool', 'change', 'yes']
['0', '0', '0', '0', '0', '0']
['?', '?', '?', '?', '?', '?']
['sunny', 'warm', 'normal', 'strong', 'warm', 'same']
_______
['sunny', 'warm', 'normal', 'strong', 'warm', 'same']
['?', '?', '?', '?', '?', '?']
_______
['sunny', 'warm', '?', 'strong', 'warm', 'same']
['?', '?', '?', '?', '?', '?']
_______
['sunny', 'warm', '?', 'strong', 'warm', 'same']
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', 'same']]
_______
['sunny', 'warm', '?', 'strong', '?', '?']
[['sunny', '?', '?', '?', '?', '?'], ['?', 'warm', '?', '?', '?', '?']]
_______

8
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

5. Apply EM algorithm to cluster a set of data stored in a .CSV file. Use the same data set for
clustering using k-Means algorithm. Compare the results of these two algorithms and comment on
the quality of clustering. You can add Java/Python ML library classes/API in the program.

In[ 1 ]: from sklearn.cluster import KMeans

import numpy as np

import matplotlib.pyplot as plt

import pandas as pd

In[ 2 ]: data=pd.read_csv("sample.csv")

In[ 3 ]: df1=pd.DataFrame(data)

In[ 4 ]: print(df1)

Out[ ]:

Driver_ID Distance_Feature Speeding_Feature


0 3423311935 71.24 28
1 3423313212 52.53 25
2 3423313724 64.54 27
3 3423311373 55.69 22
4 3423310999 54.58 25
5 3423313857 41.91 10
6 3423312432 58.64 20
7 3423311434 52.02 8
8 3423311328 31.25 34
9 3423312488 44.31 19
10 3423311254 49.35 40
11 3423312943 58.07 45
12 3423312536 44.22 22
13 3423311542 55.73 19
14 3423312176 46.63 43
15 3423314176 52.97 32
16 3423314202 46.25 35
17 3423311346 51.55 27
18 3423310666 57.05 26
19 3423313527 58.45 30
20 3423312182 43.42 23
21 3423313590 55.68 37
22 3423312268 55.15 18

In[ 5 ]: f1 = df1['Distance_Feature'].values

In[ 6 ]: f2 = df1['Speeding_Feature'].values

In[ 7 ]: X=np.matrix(list(zip(f1,f2)))

9
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

In[ 8 ]: plt.scatter(f1,f2)

Out[ ]:

<matplotlib.collections.PathCollection at 0x10ab32c8>

In[ 9 ]: colors = ['b', 'g', 'r']

In[ 10 ]: markers = ['o', 'v', 's']

In[ 11 ]: kmeans_model = KMeans(n_clusters=3).fit(X)

In[ 12 ]: for i, l in enumerate(kmeans_model.labels_):

plt.plot(f1[i], f2[i], color=colors[l],marker=markers[l],ls='None')

plt.xlim([0, 100])

plt.ylim([0, 50])

plt.show()

Out[ ]:

10
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

6. Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an
appropriate data set for building the decision tree and apply this knowledge to classify a new
sample.
In[ 1 ]: from sklearn.datasets import load_iris
In[ 2 ]: from sklearn import tree
In[ 3 ]: iris = load_iris()
In[ 4 ]: X, y = iris.data, iris.target
In[ 5 ]: clf = tree.DecisionTreeClassifier()
In[ 6 ]: clf = clf.fit(X, y)
In[ 7 ]: tree.plot_tree(clf)

Out[ ]:

11
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

7. Write a program to implement k-Nearest Neighbour algorithm to classify the iris data set. Print
both correct and wrong predictions. Java/Python ML library classes can be used for this problem.

In[ 1 ]: from sklearn.model_selection import train_test_split


from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets

In[ 2 ]: iris=datasets.load_iris()

print("Iris Data set loaded...")

Out[ ]:Iris Data set loaded...

In[ 3 ]: print(iris.feature_names)

Out[ ]:['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']

In[ 4 ]: x_train, x_test, y_train, y_test = train_test_split(iris.data,iris.target,test_size=0.1)


print("Dataset is split into training and testing...")
print("Size of trainng data and its label",x_train.shape,y_train.shape)
print("Size of trainng data and its label",x_test.shape, y_test.shape)

Out[ ]:Dataset is split into training and testing...


Size of trainng data and its label (135, 4) (135,)
Size of trainng data and its label (15, 4) (15,)

In[ 5 ]: for i in range(len(iris.target_names)):

print("Label", i , "-",str(iris.target_names[i]))

Out[ ]:Label 0 - setosa


Label 1 – versicolor
Label 2 – virginica

In[ 6 ]: classifier = KNeighborsClassifier(n_neighbors=2)

In[ 7 ]: classifier.fit(x_train, y_train)

Out[ ]:KNeighborsClassifier(n_neighbors=2)

In[ 8 ]: y_pred=classifier.predict(x_test)

In[ 9 ]: print("Results of Classification using K-nn with K=2 ")

for r in range(0,len(x_test)):

print(" Sample:", str(x_test[r]), " Actual-label:", str(y_test[r]), " Predicted-label:",

str(y_pred[r]))

print("Classification Accuracy :" , classifier.score(x_test,y_test));

12
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

Out[ ]:Results of Classification using K-nn with K=2

Sample: [7.9 3.8 6.4 2. ] Actual-label: 2 Predicted-label: 2


Sample: [5. 3.4 1.6 0.4] Actual-label: 0 Predicted-label: 0
Sample: [5.6 3. 4.1 1.3] Actual-label: 1 Predicted-label: 1
Sample: [4.3 3. 1.1 0.1] Actual-label: 0 Predicted-label: 0
Sample: [6.5 3.2 5.1 2. ] Actual-label: 2 Predicted-label: 2
Sample: [6.1 2.8 4. 1.3] Actual-label: 1 Predicted-label: 1
Sample: [4.8 3. 1.4 0.1] Actual-label: 0 Predicted-label: 0
Sample: [4.9 2.5 4.5 1.7] Actual-label: 2 Predicted-label: 1
Sample: [5. 3.4 1.5 0.2] Actual-label: 0 Predicted-label: 0
Sample: [6.9 3.1 5.1 2.3] Actual-label: 2 Predicted-label: 2
Sample: [5.4 3.4 1.5 0.4] Actual-label: 0 Predicted-label: 0
Sample: [6.2 2.2 4.5 1.5] Actual-label: 1 Predicted-label: 1
Sample: [6.4 3.1 5.5 1.8] Actual-label: 2 Predicted-label: 2
Sample: [7.4 2.8 6.1 1.9] Actual-label: 2 Predicted-label: 2
Sample: [5.4 3. 4.5 1.5] Actual-label: 1 Predicted-label: 1

Classification Accuracy : 0.9333333333333333

In[ 10 ]: from sklearn.metrics import classification_report, confusion_matrix

print('Confusion Matrix')

print(confusion_matrix(y_test,y_pred))

print('Accuracy Metrics')

print(classification_report(y_test,y_pred))

Out[ ]:Confusion Matrix

[[5 0 0]
[0 4 0]
[0 1 5]]
Accuracy Metrics
precision recall f1-score support

0 1.00 1.00 1.00 5


1 0.80 1.00 0.89 4
2 1.00 0.83 0.91 6

accuracy 0.93 15
macro avg 0.93 0.94 0.93 15
weighted avg 0.95 0.93 0.93 15

13
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

8. Classification: Identifying to which category an object belongs to.

In[ 1 ]: import numpy as np


import pandas as pd
In[ 2 ]: from sklearn import datasets
In[ 3 ]: wine=datasets.load_wine()
In[ 4 ]: print(wine)

In[ 5 ]: print(wine.feature_names)

Out[ ]: ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash',


'magnesium', 'total_phenols', 'flavanoids', 'nonflavanoid_phenols',
'proanthocyanins', 'color_intensity', 'hue',
'od280/od315_of_diluted_wines', 'proline']

In[ 6 ]: print(wine.target_names)

Out[ ]: ['class_0' 'class_1' 'class_2']

In[ 7 ]: x=pd.DataFrame(wine['data'])
print(x.head())

Out[ ]: 0 1 2 3 4 5 6 7 8 9
10 11 \
0 14.23 1.71 2.43 15.6 127.0 2.80 3.06 0.28 2.29
5.64 1.04 3.92
1 13.20 1.78 2.14 11.2 100.0 2.65 2.76 0.26 1.28
4.38 1.05 3.40
2 13.16 2.36 2.67 18.6 101.0 2.80 3.24 0.30 2.81
5.68 1.03 3.17
3 14.37 1.95 2.50 16.8 113.0 3.85 3.49 0.24 2.18
7.80 0.86 3.45
4 13.24 2.59 2.87 21.0 118.0 2.80 2.69 0.39 1.82
4.32 1.04 2.93

12
0 1065.0
1 1050.0
2 1185.0
3 1480.0
4 735.0

In[ 8 ]: y=print(wine.target)

Out[ ]: [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
0 0 0 0 0
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1

14
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1 1 1 1 1 1 1
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
2 2 2 2 2 2 2
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]

In[ 9 ]: from sklearn.model_selection import train_test_split


In[ 10 ]: x_train, x_test, y_train, y_test = train_test_split(wine.data,wine.target)
In[ 11 ]: from sklearn.naive_bayes import GaussianNB
In[ 12 ]: gnb=GaussianNB()
In[ 13 ]: gnb.fit(x_train, y_train)

Out[ ]:GaussianNB()

In[ 14 ]: y_pred=gnb.predict(x_test)
In[ 15 ]: from sklearn.metrics import classification_report, confusion_matrix
In[ 16 ]: print("Classification Accuracy :" , gnb.score(x_test,y_test))
print('Confusion Matrix')
print(confusion_matrix(y_test,y_pred))
print('Accuracy Metrics')
print(classification_report(y_test,y_pred))

Out[ ]:Classification Accuracy : 0.9555555555555556


Confusion Matrix
[[21 1 0]
[ 0 14 1]
[ 0 0 8]]
Accuracy Metrics
precision recall f1-score support

0 1.00 0.95 0.98 22


1 0.93 0.93 0.93 15
2 0.89 1.00 0.94 8

accuracy 0.96 45
macro avg 0.94 0.96 0.95 45
weighted avg 0.96 0.96 0.96 45

15
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

9. Regression: Predicting a continuous-valued attribute associated with an object.

import matplotlib.pyplot as plt


from scipy import stats
x = [5,7,8,7,2,17,2,9,4,11,12,9,6]
y = [99,86,87,88,111,86,103,87,94,78,77,85,86]
slope, intercept, r, p, std_err = stats.linregress(x, y)
def myfunc(x):
return slope * x + intercept
mymodel = list(map(myfunc, x))
plt.scatter(x, y)
plt.plot(x, mymodel)
plt.show()

Output:

16
DEPARTMENT OF COMPUTER SCIENCE AND ENGINEERING

10. Pre-processing: Feature extraction and normalization.

In[ 1 ]: X = [[ 1., -1., 2.],


[ 2., 0., 0.],
[ 0., 1., -1.]]
In[ 2 ]: X
Out[ ]:[[1.0, -1.0, 2.0], [2.0, 0.0, 0.0], [0.0, 1.0, -1.0]]

In[ 3 ]: from sklearn import preprocessing

In[ 4 ]: X_normalized = preprocessing.normalize(X, norm='l1')

In[ 5 ]: X_normalized

Out[ ]:array([[ 0.25, -0.25, 0.5 ],


[ 1. , 0. , 0. ],
[ 0. , 0.5 , -0.5 ]])
In[ 6 ]: normalizer = preprocessing.Normalizer().fit(X)

In[ 7 ]: normalize

Out[ ]:Normalizer()

In[ 8 ]: normalizer.transform(X)

Out[ ]:array([[ 0.40824829, -0.40824829, 0.81649658],


[ 1. , 0. , 0. ],
[ 0. , 0.70710678, -0.70710678]])
In[ 9 ]: normalizer.transform([[-1., 1., 0.]])

Out[ ]:array([[-0.70710678, 0.70710678, 0. ]])

17

You might also like