©) studocu
ML Lab Manual(1-9)
Machine learning laboratory (Anna University)
‘Scan to open on StudocuEX.NO:
CANDIDATE - ELIMINATION ALGORITHM
DATE:
AIM:
To implement and demonstrate the Candidate-Elimination algorithm to output a
description of the set of all hypotheses consistent with the training examples.
ALGORITHM:
Initialize G to the set of maximally general hypotheses in H
Initialize $ to the set of maximally specific hypotheses in H
For each training example d, do
+ Ifd is a positive example
+ Remove from G any hypothesis inconsistent with d
+ For each hypothesis s in S that is not consistent with d
+ Remove s from S
+ Add to S all
imal generalizations h of s such that
+ his consistent with d, and some member of G is more general than h
+ Remove from S any hypothesis that is more general than another hypothesis,
inS
+ Ifd is a negative example
+ Remove from S any hypothesis inconsistent with d
+ For each hypothesis g in G that is not consistent with d
+ Remove g from G
+ Add to G all minimal specializations h of g such that
+ his consistent with d, and some member of S is more specific than h
+ Remove from G any hypothesis that is less general than another hypothesis
inGPROGRAM:
import numpy as np
import pandas as pd
data = pd.DataFrame(data=pd.read_csv(‘enjoysport.csv’))
concepts = np.array(data.iloct:,0:-1))
print(concepts)
target = np array(data.iloe{:,-1))
print(target)
def learn(concepts, target):
specific_h = concepts{0}.copy()
print("initialization of specific_h and general_h")
print(specific_h)
general_h = [{"?" for i in range(len(specific_h))] for iin
range(Ien(specific_h))]
print(general_h)
for i, h in enumerate(concepts):
if target{i] == "yes":
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ="?
general_h[x][x
print(specific_h)
print(specific_h)
if target{i]
for x in range(len(specific_h)):
if h{x]!= specific_hfx]:
general_h[{x][x] = specific_h[x]else:
general_h[x][x]
print(" steps of Candidate Elimination Algorithm’ i+1)
print(specific_h)
print(general_h)
indices = [i for i, val in enumerate(general_h) if val ==
(7°77, °7,°2, 711
for i in indices:
general_h.remove(("?, "7 "7,247 "7
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("Final Specific_h:", s_final, sep="\n")
print("Final General_h:", g_final, sep="\n")
DATA SET:
Sky AirTemp Humidity Wind Water Forecast EnjoySport
sunny warm normal strong warm same yes
sunny warm high strong warm same yes
rainy cold high strong warm change no
sunny warm high strong cool change yes
OUTPUT:
Final Specific_h:
[‘sunny' ‘warm’ '?' ‘strong’ "
Final General_h:
NIM OT,
[f'sunny.
(2, ‘warm’,RESULT:
‘Thus the program to implement Candidate-Elimination Algorithm for using the given
dataset have been executed successfully and the output got verified.EX.NO: 2
DECISION TREE BASED ID3 ALGORITHM
DATE:
AIM:
To demonstrate the working of the decision tree based ID3 algorithm. Use an
appropriate data set for building the decision tree and apply this knowledge to classify a new
sample.
ALGORITHM.
* Create a Root node for the tree
© Ifall Examples are positive, Return the single-node tree Root, with label
+ Ifall Examples are negative, Return the single-node tree Root, with label
* If Attributes is empty, Return the single-node tree Root, with label = mos
value of Target_attribute in Examples
* Otherwise Begin
© A.& the attribute from Attributes that best* classifies Examples
* The decision attribute for Root — A
* Foreach possible value, vi, of A,
* Add a new tree branch below Root, corresponding to the test A= vi
* Let Examples vi, be the subset of Examples that have value vi for A
* If Examples vi, is empty
* Then below this new branch add a leaf node with label = most common value of
Target _ attribute in Examples
* Else below this new branch add the subtree ID3(Examples vi, Targe_tattribute,
Attributes — {A}))
* End
* Return Root
common
PROGRAM:
import math
import esv
def load_esv(filenamé
lines=csv.reader(open(filename,"r"));
dataset
ines)
headers = dataset pop(0)
return dataset,headersclass Node:
def _init__(self,attribute):
self.attribute=attribute
self.children=[]
self.answer=""
def subtables(data,col,delete):
dic=(}
coldata=[row[col] for row in data]
attr=list(set(coldata))
counts=[0}*len(attr)
r=len(data)
c=len(data[0})
for x in range(len(attr)):
for y in range(t):
if datalylfco
atte[x]:
counts[x]+=1
for x in range(len(attr)):
dic{attr[x}]=[[0 for i in range(c)] for j in
range(counts{x])]
pos=0
for y in range(t):
if data[y][col].
attr[x]:
if delete:
del data[y][col]
dicfattr[x]][pos]=dataly]
post=1
return attr,dicdef entropy(S):
attr=list(set(S))
if len(attr)==
return 0
counts=[0,0]
for iin range(2):
counts[i}=sum((1 for x in S if attr[i}==x})/(len(S)*1.0)
sums=0
for ent in counts:
sums+=-1*cnt*math.log(ent,2)
return sums
def compute_gain(data,col):
altr,dic = subtables(data,col,delete=False)
total_size=len(data)
entropies=[0]*len(attr)
total_entropy=entropy({rowl-1] for row in data])
for x in range(len(attr)):
ratio[x]=len(dic[attr[x}})/(total_size*1.0)
entropies[x]=entropy({row[-1] for row in
dic{attr{x]]])
total_entropy-=ratio[x]*entropies{x]
return total_entropy
def build_tree(data, features):
lastcol=[row[-1] for row in data]
if{len(set(lastcol)
node=Node("")node.answer=lastcol{0]
return node
n=len(data{0})-1
gains=[0]*n
for col in range(n):
gains[col]=compute_gain(data,col)
split=gains.index(max(gains))
node=Node(features{split])
fea = features(:split]+features[split+1:]
attr,dic=subtables(data,split,delete=True)
for x in range(len(attr)):
child=build_tree(dicfattr{x] fea)
node.children.append((attr[x],child))
return node
def print_tree(node,level):
if node.answer!="":
print(” "*level,node.answer)
retum
print(” "*level,node.attribute)
for value,n in node.children:
print(" "*(level+1),value)
print_tree(n,Jevel+2)
def classify(node,x_test,features):
if node.answer!="":
print(node.answer)
returnpos=features.index(node.attribute)
for value, n in node.children:
if x_test[pos
classify(n,x_test,features)
"Main program"
dataset, features=load_csv("data3.csv")
node1=build_tree(dataset, features)
print("The decision tree for the dataset using ID3 algorithm
is")
print_tree(node1,0)
testdata,features=load_csv("data3_test.csv")
for xtest in testdata:
print(""The test instance: xtest)
print("The label for test instance:",end='
classify(node1,xtest,features)
DATA SET:
Day Outlook Temperature Humidity Wind PlayTennis
D1 Sunny Hot High Weak No
D2 Sunny Hot High Strong No
D3 Overcast Hot High Weak Yes
D4 Rain Mild High Weak Yes
DS Rain Cool Normal Weak Yes
D6 Rain Cool Normal Strong No
D7 Overcast Cool Normal Strong Yes
D8 Sunny Mild High Weak No
D9 Sunny Cool Normal Weak YesD10 Rain Mild Normal Weak Yes
D11 Sunny Mild Normal Strong Yes
D12 Overcast Mild High Strong Yes
D13 Overcast Hot Normal Weak Yes
D14 Rain Mild High Strong No
OUTPU’
The decision tree for the dataset using ID3 algorithm is
Outlook
rain
Wind
strong
no
weak
yes
overcast,
yes
sunny
Humidity
normal
yes
high
no
The test instance: ['rain’, ‘coo!’ ‘norma, 'strong’]
‘The label for test instance: no
The test instance:
nny’, ‘mild’, ‘normal’, ‘strong']
The label for test instance: yesRESULT:
‘Thus the program to implement Decision tree for ID3 Algorithm using the given
dataset have been executed successfully and the output got verified.EX.NO: 3
BACKPROPAGATION ALGORITHM
DATE:
AIM :
To Build an Artificial Neural Network by implementing the Backpropagation
algorithm and test the same using appropriate data sets.
ALGORITHM :
© Create a feed-forward network with ni inputs, nhidden hidden units, and nout
output
© uni
© Initialize all network weights to small random numbers
© Until the termination condition is met, Do
«© Foreach (*x, t), in training examples, Do
© Normalize the input
PROGRAM :
import numpy as np
X = np.array(((2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) # maximum of X array longitudinally
y= y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=5000 #Setting training iterationsIr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neur
ons))
bh=np.random.uniform(size=( hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neuron
8)
bout=np.random.uniform(size=( output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh.
hlayer_act =
igmoid(hinp)
outinp 1=np.dot(hlayer_act,wout)
outinp= outinp1+ bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO* outgrad
EH = d_output.dot(wout.T)
#how much hidden layer wts contributed to errorhiddengrad = derivatives_sigmoid(hlayer_act)
d_hiddenlayer = EH * hiddengrad
# dotproduct of nextlayererror and currentlayerop
wout += hlayer_act.T.dot(d_output) *Ir
wh += X.T.dot(d_hiddenlayer) *Ir
print("Input: \n" + str(X))
print(”Actual Output: \n" + str(y))
print("Predicted Output: \n" output)
ourrut:
Input:
{[0.66666667 1. }
[0.33333333 0.55555556]
(1. 0,666666671]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
{{0.89726759]
{0.87196896]
{0.9000671]]
RESUL!
Thus the program to implement Back propagation algorithm using the given dataset
have been executed successfully and the output got verified.EX.NO: 4 .
NAIVE BAYESIAN CLASSIFIER
DATE:
AIM:
To implement the naive Bayesian classifier for a sample training data set stored as a
.CSV file.
ALGORITHM :
‘© The data set used in this program is the Pima Indians Diabetes problem.
‘© This data set is comprised of 768 observations of medical details for Pima
Indians patients. The records describe instantaneous measurements taken from
the patient such as their age, the number of times pregnant and blood workup.
Al patients are women
‘© aged 21 or older. All attributes are numeric, and their units vary from attribute
to attribute.
‘© The attributes are Pregnancies, Glucose, BloodPressure, SkinThickness,
Insulin,
‘* BMI, DiabeticPedigreeFunction, Age, Outcome
‘* Each record has a class value that indicates whether the patient suffered an
onset of,
‘* diabetes within 5 years of when the measurements were taken (1) or not (0)
PROGRAM:
import csv
import random
import math
def loadcsv(filename):
lines = csv.reader(open(filename, "t"));
dataset =
ist(lines)
for i in range(len(dataset)):
#eonverting strings into numbers for processing
dataset[i] = [Mloat(x) for x in datasetfi]
return datasetdef splitdataset(dataset, splitratio):
#67% training size
trainsize = int(len(dataset) * splitratio);
1
trainset
copy = list(dataset);
while len(trainset) < trainsize:
Hgenerate indices for the dataset list randomly to pick ele for
training data
index = random.randrange(len(copy)
trainset.append(copy.pop(index))
return [trainset, copy]
def separatebyclass(dataset):
separated =
} #dictionary of classes 1 and 0
#ereates a dictionary of classes | and 0 where the values are
the instances belonging to each class
for i in range(len(dataset)):
Vector = dataset{i]
if (vector{-1] not in separated):
separated[ vector{-1}] = (1
separated vector{-1]].append( vector)
retum separated
def mean(numbers):
return sum(numbers)/float(len(numbers))
def stdev(numbers):
avg = mean(numbers)
variance = sum({pow(x-avg,2) for x innumbers})/float(len(numbers)-1)
return math.sqrt(variance)
def summarize(dataset): #creates a dictionary of classes
summaries = [(mean(attribute), stdev(attribute)) for
attribute in zip(*dataset)];
del summaries{-1] #excluding labels +ve or -ve
return summaries
def summarizebyclass(dataset):
separated = separatebyclass(dataset);
‘#print(separated)
summaries = {)
for classvalue, instances in separated.items():
#for key,value in dic. items()
#summaries is a dic of tuples(mean,std) for each class value
summaries[classvalue] = summarize(instances)
#summarize is used to cal to mean and std
return summaries
def calculateprobability(x, mean, stdev):
exponent = math.exp(-(math.pow(x-mean,2)/
(2*math.pow(stdev,2))))
return (1 / (math.sqrt(2*math.pi) * stdev)) * exponent
def calculateclassprobabilities(summaries, inputvector):
# probabilities contains the all prob of all class of test data
probabilities = {}
for classvalue, classsummaries in summaftclass and attribute information as mean and sd
probabilities[classvalue] = 1
for i in range(len(classsummaries)):
mean, stdev = classsummaries[i] #take mean and
sd of every attribute for class 0 and 1 seperaely
inputvector[i] #testvector's first attribute
probabilities[classvalue] *=
calculateprobability(x, mean, stdev);#use normal dist
return probabilities
def predict(summaries, inputvector): #training and test data
is passed
probabilities = calculateclassprobabilities(summaries,
inputvector)
bestLabel, bestProb = None, -1
for classvalue, probability in probabilities. items():
#assigns that class which has the highest prob
if bestLabel is None or probability > bestProb:
bestProb = probability
bestLabel = classvalue
return bestLabel
def getpredictions(summaries, testset):predictions = []
for i in range(len(testset)):
result = predict(summaries, testset{i])
predictions.append(result)
return predictions
def getaccuracy(testset, predictions):
correct = 0
for i in range(len(testset)):
if testset[i][-1] == predictionsfil:
correct += 1
return (correct/float(len(testset))) * 100.0
def main):
filename = ‘naivedata.csv'
splitratio = 0.67
dataset = loadcsv(filename);
trainingset, testset = splitdataset(dataset, splitratio)
print(‘Split {0} rows into train={1} and test={2}
rows'.format(len(dataset), len(trainingset), len(testset)))
# prepare model
summaries = summarizebyclass(trainingset);
#print(summaries)
# test model
predictions = getpredictions(summaries, testset) #find the
predictions of test data with the training data
accuracy = getaccuracy(testset, predictions)
print('Accuracy of the classifier is :(0}%format(accuracy))
main()
OUTPUT:
Split 768 rows into train=514 and test=254 rows
Accuracy of the classifier is : 71.65354330708661%
RESULT:
Thus the program to implement Naive Bayesian classifier using the given dataset
have been executed successfully and the output got verified.EX.NO: 5 NAIVE BAYESIAN CLASSIFIER USING ACCURACY,RECALL,
DATE: PRECISION
AIM:
To implement use the naive Bayesian Classifier model to perform this task. Built-in
Java classes/API can be used to write the program. Calculate the accuracy, precision, and
recall for your data set.
ALGORITHM :
1, collect all words, punctuation, and other tokens that occur in Examples
* Vocabulary