[go: up one dir, main page]

0% found this document useful (0 votes)
5 views6 pages

Machine Learning Lab Manual 10 15

The document outlines the implementation of the Candidate-Elimination algorithm using a class structure to manage hypotheses based on training data stored in a CSV file. It details the initialization of specific and general boundaries, the processing of positive and negative examples, and the methods for generalizing and specializing hypotheses. The output demonstrates the resulting hypotheses consistent with the training examples.

Uploaded by

shrivinoj143
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
5 views6 pages

Machine Learning Lab Manual 10 15

The document outlines the implementation of the Candidate-Elimination algorithm using a class structure to manage hypotheses based on training data stored in a CSV file. It details the initialization of specific and general boundaries, the processing of positive and negative examples, and the methods for generalizing and specializing hypotheses. The output demonstrates the resulting hypotheses consistent with the training examples.

Uploaded by

shrivinoj143
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

2. For a given set of training data examples stored in a .

CSV file, implement and


demonstrate the Candidate-Elimination algorithm to output a description of the set of all
hypotheses consistent with the training examples.

class Holder:
factors={} #Initialize an empty dictionary
attributes = () #declaration of dictionaries parameters with an arbitrary length

'''
Constructor of class Holder holding two parameters,
self refers to the instance of the class
'''
def init (self,attr): #
self.attributes = attr
for i in attr:
self.factors[i]=[]

def add_values(self,factor,values):
self.factors[factor]=values

class CandidateElimination:
Positive={} #Initialize positive empty dictionary
Negative={} #Initialize negative empty dictionary

def init (self,data,fact):


self.num_factors = len(data[0][0])
self.factors = fact.factors
self.attr = fact.attributes
self.dataset = data

def run_algorithm(self):
'''
Initialize the specific and general boundaries, and loop the dataset against the
algorithm
'''
G = self.initializeG()
S = self.initializeS()

'''
Programmatically populate list in the iterating variable trial_set
'''
count=0
for trial_set in self.dataset:
if self.is_positive(trial_set): #if trial set/example consists of positive examples
G = self.remove_inconsistent_G(G,trial_set[0]) #remove inconsitent data from
the general boundary
S_new = S[:] #initialize the dictionary with no key-value pair
print (S_new)
for s in S:
if not self.consistent(s,trial_set[0]):
S_new.remove(s)
generalization = self.generalize_inconsistent_S(s,trial_set[0])
generalization = self.get_general(generalization,G)
if generalization:
S_new.append(generalization)
S = S_new[:]
S = self.remove_more_general(S)
print(S)

else:#if it is negative

S = self.remove_inconsistent_S(S,trial_set[0]) #remove inconsitent data from


the specific boundary
G_new = G[:] #initialize the dictionary with no key-value pair (dataset can
take any value)
print (G_new)
for g in G:
if self.consistent(g,trial_set[0]):
G_new.remove(g)
specializations = self.specialize_inconsistent_G(g,trial_set[0])
specializationss = self.get_specific(specializations,S)
if specializations != []:
G_new += specializationss
G = G_new[:]
G = self.remove_more_specific(G)
print(G)

print (S)
print (G)

def initializeS(self):
''' Initialize the specific boundary '''
S = tuple(['-' for factor in range(self.num_factors)]) #6 constraints in the vector
return [S]

def initializeG(self):
''' Initialize the general boundary '''
G = tuple(['?' for factor in range(self.num_factors)]) # 6 constraints in the vector
return [G]

def is_positive(self,trial_set):
''' Check if a given training trial_set is positive '''
if trial_set[1] == 'Y':
return True
elif trial_set[1] == 'N':
return False
else:
raise TypeError("invalid target value")

def match_factor(self,value1,value2):
''' Check for the factors values match,
necessary while checking the consistency of
training trial_set with the hypothesis '''
if value1 == '?' or value2 == '?':
return True
elif value1 == value2 :
return True
return False

def consistent(self,hypothesis,instance):
''' Check whether the instance is part of the hypothesis '''
for i,factor in enumerate(hypothesis):
if not self.match_factor(factor,instance[i]):
return False
return True

def remove_inconsistent_G(self,hypotheses,instance):
''' For a positive trial_set, the hypotheses in G
inconsistent with it should be removed '''
G_new = hypotheses[:]

for g in hypotheses:
if not self.consistent(g,instance):
G_new.remove(g)
return G_new

def remove_inconsistent_S(self,hypotheses,instance):
''' For a negative trial_set, the hypotheses in S
inconsistent with it should be removed '''
S_new = hypotheses[:]
for s in hypotheses:
if self.consistent(s,instance):
S_new.remove(s)
return S_new

def remove_more_general(self,hypotheses):
''' After generalizing S for a positive trial_set, the hypothesis in S
general than others in S should be removed '''
S_new = hypotheses[:]
for old in hypotheses:
for new in S_new:
if old!=new and self.more_general(new,old):
S_new.remove[new]
return S_new

def remove_more_specific(self,hypotheses):
''' After specializing G for a negative trial_set, the hypothesis in G
specific than others in G should be removed '''
G_new = hypotheses[:]
for old in hypotheses:
for new in G_new:
if old!=new and self.more_specific(new,old):
G_new.remove[new]
return G_new

def generalize_inconsistent_S(self,hypothesis,instance):
''' When a inconsistent hypothesis for positive trial_set is seen in the specific
boundary S,
it should be generalized to be consistent with the trial_set ... we will get one
hypothesis'''
hypo = list(hypothesis) # convert tuple to list for mutability
for i,factor in enumerate(hypo):
if factor == '-':
hypo[i] = instance[i]
elif not self.match_factor(factor,instance[i]):
hypo[i] = '?'
generalization = tuple(hypo) # convert list back to tuple for immutability
return generalization

def specialize_inconsistent_G(self,hypothesis,instance):
''' When a inconsistent hypothesis for negative trial_set is seen in the general
boundary G
should be specialized to be consistent with the trial_set.. we will get a set of
hypotheses '''
specializations = []
hypo = list(hypothesis) # convert tuple to list for mutability
for i,factor in enumerate(hypo):
if factor == '?':
values = self.factors[self.attr[i]]
for j in values:
if instance[i] != j:
hyp=hypo[:]
hyp[i]=j
hyp=tuple(hyp) # convert list back to tuple for immutability
specializations.append(hyp)
return specializations
def get_general(self,generalization,G):
''' Checks if there is more general hypothesis in G
for a generalization of inconsistent hypothesis in S
in case of positive trial_set and returns valid generalization '''

for g in G:
if self.more_general(g,generalization):
return generalization
return None

def get_specific(self,specializations,S):
''' Checks if there is more specific hypothesis in S
for each of hypothesis in specializations of an
inconsistent hypothesis in G in case of negative trial_set
and return the valid specializations'''
valid_specializations = []
for hypo in specializations:
for s in S:
if self.more_specific(s,hypo) or s==self.initializeS()[0]:
valid_specializations.append(hypo)
return valid_specializations

def exists_general(self,hypothesis,G):
'''Used to check if there exists a more general hypothesis in
general boundary for version space'''

for g in G:
if self.more_general(g,hypothesis):
return True
return False

def exists_specific(self,hypothesis,S):
'''Used to check if there exists a more specific hypothesis in
general boundary for version space'''

for s in S:
if self.more_specific(s,hypothesis):
return True
return False

def more_general(self,hyp1,hyp2):
''' Check whether hyp1 is more general than hyp2 '''
hyp = zip(hyp1,hyp2)
for i,j in hyp:
if i == '?':
continue
elif j == '?':
if i != '?':
return False
elif i != j:
return False
else:
continue
return True

def more_specific(self,hyp1,hyp2):
''' hyp1 more specific than hyp2 is
equivalent to hyp2 being more general than hyp1 '''
return self.more_general(hyp2,hyp1)

dataset=[(('sunny','warm','normal','strong','warm','same'),'Y'),(('sunny','warm','high','stron
g','warm','same'),'Y'),(('rainy','cold','high','strong','warm','change'),'N'),(('sunny','warm','hi
gh','strong','cool','change'),'Y')]
attributes =('Sky','Temp','Humidity','Wind','Water','Forecast')
f = Holder(attributes)
f.add_values('Sky',('sunny','rainy','cloudy')) #sky can be sunny rainy or cloudy
f.add_values('Temp',('cold','warm')) #Temp can be sunny cold or warm
f.add_values('Humidity',('normal','high')) #Humidity can be normal or high
f.add_values('Wind',('weak','strong')) #wind can be weak or strong
f.add_values('Water',('warm','cold')) #water can be warm or cold
f.add_values('Forecast',('same','change')) #Forecast can be same or change
a = CandidateElimination(dataset,f) #pass the dataset to the algorithm class and call the
run algoritm method
a.run_algorithm()

Output

[('sunny', 'warm', 'normal', 'strong', 'warm', 'same')]


[('sunny', 'warm', 'normal', 'strong', 'warm', 'same')]
[('sunny', 'warm', '?', 'strong', 'warm', 'same')]
[('?', '?', '?', '?', '?', '?')]
[('sunny', '?', '?', '?', '?', '?'), ('?', 'warm', '?', '?', '?', '?'), ('?', '?', '?', '?', '?', 'same')]
[('sunny', 'warm', '?', 'strong', 'warm', 'same')]
[('sunny', 'warm', '?', 'strong', '?', '?')]
[('sunny', 'warm', '?', 'strong', '?', '?')]
[('sunny', '?', '?', '?', '?', '?'), ('?', 'warm', '?', '?', '?', '?')]

You might also like