1)Find_S
Code:
import csv
def loadCsv(filename):
lines = csv.reader(open(filename, "r"))
dataset = list(lines)
for i in range(len(dataset)):
dataset[i] = dataset[i]
return dataset
attributes = ['Sky','Temp','Humidity','Wind','Water','Forecast']
print('Attributes =',attributes)
num_attributes = len(attributes)
filename = "ENJOYSPORT.csv"
dataset = loadCsv(filename)
print(dataset)
hypothesis=['0'] * num_attributes
print("Intial Hypothesis")
print(hypothesis)
print("The Hypothesis are")
for i in range(1,len(dataset)):
target = dataset[i][-1]
if(target == '1'):
for j in range(num_attributes):
if(hypothesis[j]=='0'):
hypothesis[j] = dataset[i][j]
if(hypothesis[j]!= dataset[i][j]):
hypothesis[j]='?'
print(i+1,'=',hypothesis)
print("Final Hypothesis")
print(hypothesis)
Output:
[['Sky', 'AirTemp', 'Humidity', 'Wind', 'Water', 'Forecast',
'EnjoySport'], ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same',
'1'], ['Sunny', 'Warm', 'High', 'Strong', 'Warm', 'Same', '1'],
['Rainy', 'Cold', 'High', 'Strong', 'Warm', 'Change', '0'], ['Sunny',
'Warm', 'High', 'Strong', 'Cool', 'Change', '1']]
Intial Hypothesis
['0', '0', '0', '0', '0', '0']
The Hypothesis are
2 = ['Sunny', 'Warm', 'Normal', 'Strong', 'Warm', 'Same']
3 = ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
4 = ['Sunny', 'Warm', '?', 'Strong', 'Warm', 'Same']
5 = ['Sunny', 'Warm', '?', 'Strong', '?', '?']
Final Hypothesis
['Sunny', 'Warm', '?', 'Strong', '?', '?']
2)Candidate_elimination:
import numpy as np
import pandas as pd
data = pd.read_csv('ENJOYSPORT.csv')
concepts = np.array(data.iloc[:,0:-1])
print("\nInstances are:\n",concepts)
target = np.array(data.iloc[:,-1])
print("\nTarget Values are: ",target)
def learn(concepts, target):
specific_h = concepts[0].copy()
print("\nInitialization of specific_h and genearal_h")
print("\nSpecific Boundary: ", specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(l
en(specific_h))]
print("\nGeneric Boundary: ",general_h)
for i, h in enumerate(concepts):
print("\nInstance", i+1 , "is ", h)
if target[i] == 1:
print("Instance is Positive ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
specific_h[x] ='?'
general_h[x][x] ='?'
if target[i] == 0:
print("Instance is Negative ")
for x in range(len(specific_h)):
if h[x]!= specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("Specific Bundary after ", i+1,"Instance is ",specific_h)
print("Generic Boundary after ", i+1,"Instance is ",general_h)
print("\n")
indices = [i for i, val in enumerate(general_h) if val == ['?', '?'
, '?', '?', '?', '?']]
for i in indices:
general_h.remove(['?', '?', '?', '?', '?', '?'])
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("Final Specific_h: ", s_final, sep="\n")
print("Final General_h: ", g_final, sep="\n")
Output:
Instances are:
[['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']]
Target Values are: [1 1 0 1]
Initialization of specific_h and genearal_h
Specific Boundary: ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
Generic Boundary: [['?', '?', '?', '?', '?', '?'], ['?', '?', '?',
'?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?',
'?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?']]
Instance 1 is ['Sunny' 'Warm' 'Normal' 'Strong' 'Warm' 'Same']
Instance is Positive
Specific Bundary after 1 Instance is ['Sunny' 'Warm' 'Normal'
'Strong' 'Warm' 'Same']
Generic Boundary after 1 Instance is [['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
Instance 2 is ['Sunny' 'Warm' 'High' 'Strong' 'Warm' 'Same']
Instance is Positive
Specific Bundary after 2 Instance is ['Sunny' 'Warm' '?' 'Strong'
'Warm' 'Same']
Generic Boundary after 2 Instance is [['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?',
'?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'], ['?', '?',
'?', '?', '?', '?']]
Instance 3 is ['Rainy' 'Cold' 'High' 'Strong' 'Warm' 'Change']
Instance is Negative
Specific Bundary after 3 Instance is ['Sunny' 'Warm' '?' 'Strong'
'Warm' 'Same']
Generic Boundary after 3 Instance is [['Sunny', '?', '?', '?', '?',
'?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', 'Same']]
Instance 4 is ['Sunny' 'Warm' 'High' 'Strong' 'Cool' 'Change']
Instance is Positive
Specific Bundary after 4 Instance is ['Sunny' 'Warm' '?' 'Strong' '?'
'?']
Generic Boundary after 4 Instance is [['Sunny', '?', '?', '?', '?',
'?'], ['?', 'Warm', '?', '?', '?', '?'], ['?', '?', '?', '?', '?',
'?'], ['?', '?', '?', '?', '?', '?'], ['?', '?', '?', '?', '?', '?'],
['?', '?', '?', '?', '?', '?']]
Final Specific_h:
['Sunny' 'Warm' '?' 'Strong' '?' '?']
Final General_h:
[['Sunny', '?', '?', '?', '?', '?'], ['?', 'Warm', '?', '?', '?', '?']]
3)Decision Tree
import pandas as pd
import numpy as np
dataset= pd.read_csv('play_tennis.csv',names=['outlook','temperature','
humidity','wind','class'])
print(dataset)
def entropy(target_col):
elements,counts = np.unique(target_col,return_counts = True)
entropy = np.sum([(-counts[i]/np.sum(counts))*np.log2(counts[i]/
np.sum(counts))for i in range(len(elements))])
return entropy
def InfoGain(data,split_attribute_name,target_name="class"):
total_entropy = entropy(data[target_name])
vals,counts= np.unique(data[split_attribute_name],return_counts=Tru
e)
Weighted_Entropy = np.sum([(counts[i]/
np.sum(counts))*entropy(data.where(data[split_attribute_name]==vals[i])
.dropna()[target_name]) for i in range(len(vals))])
Information_Gain = total_entropy - Weighted_Entropy
return Information_Gain
def ID3(data,originaldata,features,target_attribute_name="class",parent
_node_class = None):
if len(np.unique(data[target_attribute_name])) <= 1:
return np.unique(data[target_attribute_name])[0]
elif len(data)==0:
return np.unique(originaldata[target_attribute_name])
[np.argmax(np.unique(originaldata[target_attribute_name],return_counts=
True)[1])] elif len(features) ==0:
return parent_node_class
else:
parent_node_class = np.unique(data[target_attribute_name])
[np.argmax(np.unique(data[target_attribute_name],return_counts=True)
[1])]
item_values = [InfoGain(data,feature,target_attribute_name) for
feature in features]
best_feature_index = np.argmax(item_values)
best_feature = features[best_feature_index]
tree = {best_feature:{}}
features = [i for i in features if i != best_feature]
for value in np.unique(data[best_feature]):
value = value
sub_data = data.where(data[best_feature] == value).dropna()
subtree = ID3(sub_data,dataset,features,target_attribute_na
me,parent_node_class)
tree[best_feature][value] = subtree
return(tree)
tree = ID3(dataset,dataset,dataset.columns[:-1])
print(' \nDisplay Tree\n',tree)
Output:
outlook temperature humidity wind class
day outlook temp humidity wind play
D1 Sunny Hot High Weak No
D2 Sunny Hot High Strong No
D3 Overcast Hot High Weak Yes
D4 Rain Mild High Weak Yes
D5 Rain Cool Normal Weak Yes
D6 Rain Cool Normal Strong No
D7 Overcast Cool Normal Strong Yes
D8 Sunny Mild High Weak No
D9 Sunny Cool Normal Weak Yes
D10 Rain Mild Normal Weak Yes
D11 Sunny Mild Normal Strong Yes
D12 Overcast Mild High Strong Yes
D13 Overcast Hot Normal Weak Yes
D14 Rain Mild High Strong No
Display Tree
{'outlook': {'Overcast': 'Yes', 'Rain': {'wind': {'Strong': 'No',
'Weak': 'Yes'}}, 'Sunny': {'humidity': {'High': 'No', 'Normal':
'Yes'}}, 'outlook': 'play'}}
4)Backpropagation algorithm
import numpy as np
X = np.array(([2, 9], [1, 5], [3, 6]), dtype=float)
y = np.array(([92], [86], [89]), dtype=float)
X = X/np.amax(X,axis=0) #maximum of X array longitudinally
y = y/100
#Sigmoid Function
def sigmoid (x):
return 1/(1 + np.exp(-x))
#Derivative of Sigmoid Function
def derivatives_sigmoid(x):
return x * (1 - x)
#Variable initialization
epoch=5 #Setting training iterations
lr=0.1 #Setting learning rate
inputlayer_neurons = 2 #number of features in data set
hiddenlayer_neurons = 3 #number of hidden layers neurons
output_neurons = 1 #number of neurons at output layer
#weight and bias initialization
wh=np.random.uniform(size=(inputlayer_neurons,hiddenlayer_neurons))
bh=np.random.uniform(size=(1,hiddenlayer_neurons))
wout=np.random.uniform(size=(hiddenlayer_neurons,output_neurons))
bout=np.random.uniform(size=(1,output_neurons))
#draws a random range of numbers uniformly of dim x*y
for i in range(epoch):
#Forward Propogation
hinp1=np.dot(X,wh)
hinp=hinp1 + bh
hlayer_act = sigmoid(hinp)
outinp1=np.dot(hlayer_act,wout)
outinp= outinp1+bout
output = sigmoid(outinp)
#Backpropagation
EO = y-output
outgrad = derivatives_sigmoid(output)
d_output = EO * outgrad
EH = d_output.dot(wout.T)
hiddengrad = derivatives_sigmoid(hlayer_act)#how much hidden layer
wts contributed to error
d_hiddenlayer = EH * hiddengrad
wout += hlayer_act.T.dot(d_output) *lr # dotproduct of nextlayere
rror and currentlayerop
wh += X.T.dot(d_hiddenlayer) *lr
print ("-----------Epoch-", i+1, "Starts----------")
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
print ("-----------Epoch-", i+1, "Ends----------\n")
print("Input: \n" + str(X))
print("Actual Output: \n" + str(y))
print("Predicted Output: \n" ,output)
Input Dataset:
X Y class
2 9 92
1 5 86
3 6 89
Output:
----------Epoch- 1 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.81946901]
[0.80312503]
[0.82285168]]
-----------Epoch- 1 Ends----------
-----------Epoch- 2 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.82027619]
[0.80391667]
[0.82366284]]
-----------Epoch- 2 Ends----------
-----------Epoch- 3 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.82106961]
[0.80469506]
[0.82446007]]
-----------Epoch- 3 Ends----------
-----------Epoch- 4 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.82184962]
[0.80546054]
[0.82524371]]
-----------Epoch- 4 Ends----------
-----------Epoch- 5 Starts----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.82261656]
[0.80621342]
[0.8260141 ]]
-----------Epoch- 5 Ends----------
Input:
[[0.66666667 1. ]
[0.33333333 0.55555556]
[1. 0.66666667]]
Actual Output:
[[0.92]
[0.86]
[0.89]]
Predicted Output:
[[0.82261656]
[0.80621342]
[0.8260141 ]]
5.Bayes classification
import csv
import random
import math
import numpy as np
def read_data(filename):
with open(filename,'r')as csvfile:
datareader =csv.reader(csvfile)
metadata = next(datareader)
traindata=[]
for row in datareader:
traindata.append(row[1:len(row)])
return (metadata,traindata)
def splitDataset(dataset,splitRatio):
trainSize = int(len(dataset) * splitRatio)
trainSet = []
testset =list(dataset)
i=0
while len(trainSet) < trainSize:
trainSet.append(testset.pop(i))
return [trainSet,testset ]
def classify(data,test):
total_size = data.shape[0]
print("\n")
print("training data size=",total_size)
print("test data size=",test.shape[0])
countYes = 0
countNo = 0
probYes = 0
probNo = 0
print("\n")
print("target count probability")
for x in range(data.shape[0]):
if data[x,data.shape[1]-1] == 'Yes':
countYes +=1
if data[x,data.shape[1]-1] == 'No':
countNo +=1
probYes=countYes/total_size
probNo=countNo/ total_size
print('YES',"\t",countYes,"\t",probYes)
print('No',"\t",countNo,"\t",probNo)
prob0 =np.zeros((test.shape[1]-1))
prob1 =np.zeros((test.shape[1]-1))
accuracy=0
print("\n")
print("instance prediction target")
for t in range(test.shape[0]):
for k in range(test.shape[1]-1):
count1=count0=0
for j in range (data.shape[0]):
#how many times appeared with no
if test[t,k] == data[j,k] and data[j,data.shape[1]-
1]=='No':
count0+=1
#how many times appeared with yes
if test[t,k]==data[j,k] and data[j,data.shape[1]-
1]=='Yes':
count1+=1
prob0[k]=count0/countNo
prob1[k]=count1/countYes
probno=probNo
probyes=probYes
for i in range(test.shape[1]-1):
probno=probno*prob0[i]
probyes=probyes*prob1[i]
if probno>probyes:
predict='No'
else:
predict='Yes'
print(t+1,"\t",predict,"\t ",test[t,test.shape[1]-1])
if predict == test[t,test.shape[1]-1]:
accuracy+=1
final_accuracy=(accuracy/test.shape[0])*100
print("accuracy",final_accuracy,"%")
return
metadata,traindata=read_data("play_tennis.csv")
print(traindata)
print("the attribute names of training data are:",metadata)
splitRatio=0.6
trainingset, testset=splitDataset(traindata, splitRatio)
training=np.array(trainingset)
print("\n the training data set are:")
for x in trainingset:
print(x)
testing=np.array(testset)
print("\n the test data set are:")
for x in testing:
print(x)
classify(training,testing)
Output:
[['Sunny', 'Hot', 'High', 'Weak', 'No'], ['Sunny', 'Hot', 'High', 'Strong', 'No'], ['Overcast',
'Hot', 'High', 'Weak', 'Yes'], ['Rain', 'Mild', 'High', 'Weak', 'Yes'], ['Rain', 'Cool', 'Normal',
'Weak', 'Yes'], ['Rain', 'Cool', 'Normal', 'Strong', 'No'], ['Overcast', 'Cool', 'Normal',
'Strong', 'Yes'], ['Sunny', 'Mild', 'High', 'Weak', 'No'], ['Sunny', 'Cool', 'Normal', 'Weak',
'Yes'], ['Rain', 'Mild', 'Normal', 'Weak', 'Yes'], ['Sunny', 'Mild', 'Normal', 'Strong', 'Yes'],
['Overcast', 'Mild', 'High', 'Strong', 'Yes'], ['Overcast', 'Hot', 'Normal', 'Weak', 'Yes'],
['Rain', 'Mild', 'High', 'Strong', 'No']]
the attribute names of training data are: ['day', 'outlook', 'temp', 'humidity', 'wind', 'play']
the training data set are:
['Sunny', 'Hot', 'High', 'Weak', 'No']
['Sunny', 'Hot', 'High', 'Strong', 'No']
['Overcast', 'Hot', 'High', 'Weak', 'Yes']
['Rain', 'Mild', 'High', 'Weak', 'Yes']
['Rain', 'Cool', 'Normal', 'Weak', 'Yes']
['Rain', 'Cool', 'Normal', 'Strong', 'No']
['Overcast', 'Cool', 'Normal', 'Strong', 'Yes']
['Sunny', 'Mild', 'High', 'Weak', 'No']
the test data set are:
['Sunny' 'Cool' 'Normal' 'Weak' 'Yes']
['Rain' 'Mild' 'Normal' 'Weak' 'Yes']
['Sunny' 'Mild' 'Normal' 'Strong' 'Yes']
['Overcast' 'Mild' 'High' 'Strong' 'Yes']
['Overcast' 'Hot' 'Normal' 'Weak' 'Yes']
['Rain' 'Mild' 'High' 'Strong' 'No']
training data size= 8
test data size= 6
target count probability
YES 4 0.5
No 4 0.5
instance prediction target
1 No Yes
2 Yes Yes
3 No Yes
4 Yes Yes
5 Yes Yes
6 No No
accuracy 66.66666666666666 %
6.Bayes classification for text classification
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
msg=pd.read_csv('text_classification.csv',names=['message','label'])
print('the dimension of the dataset',msg.shape)
msg['labelnum']=msg.label.map({'pos':1,'neg':0})
X=msg.message
y=msg.labelnum
xtrain,xtest,ytrain,ytest=train_test_split(X,y)
print('\n the total number of training data:',ytrain.shape)
print('\n the total number of test data:',ytest.shape)
cv = CountVectorizer()
xtrain_dtm = cv.fit_transform(xtrain)
xtest_dtm=cv.transform(xtest)
print('\n the words or tokens in the text documents\n')
print(cv.get_feature_names())
df=pd.DataFrame(xtrain_dtm.toarray(),columns=cv.get_feature_names())
clf = MultinomialNB().fit(xtrain_dtm,ytrain)
predicted = clf.predict(xtest_dtm)
print('\n Accuracy of the classifier is
',metrics.accuracy_score(ytest,predicted))
print('\n confusion matrix')
print(metrics.confusion_matrix(ytest,predicted))
print('\n the value of precision
',metrics.precision_score(ytest,predicted))
print('\n the value of recall',metrics.recall_score(ytest,predicted))
Output:
the dimension of the dataset (18, 2)
the total number of training data: (13,)
the total number of test data: (5,)
the words or tokens in the text documents
['about', 'am', 'amazing', 'an', 'and', 'awesome', 'beers', 'boss',
'can', 'dance', 'deal', 'donot', 'enemy', 'feel', 'fun', 'good',
'great', 'have', 'he', 'holiday', 'horrible', 'house', 'is', 'juice',
'like', 'love', 'my', 'of', 'place', 'sandwich', 'sick', 'sworn',
'taste', 'the', 'these', 'this', 'tired', 'to', 'today', 'tomorrow',
'very', 'we', 'went', 'what', 'will', 'with']
Accuracy of the classifier is 0.6
confusion matrix
[[2 1]
[1 1]]
the value of precision 0.5
the value of recall 0.5
7.Bayesian Belief network
pip install pgmpy
import pandas as pd
import csv
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.models import BayesianModel
from pgmpy.inference import VariableElimination
heartDisease = pd.read_csv('heart.csv')
heartDisease = heartDisease.replace('?',np.nan)
#display the data
print('Sample instances from the dataset are given below')
print(heartDisease.head())
#display the Attributes names and datatyes
print('\n Attributes and datatypes')
print(heartDisease.dtypes)
model = BayesianModel([('age','target'),('sex','target'),
('exang','target'),('cp','target'),('target','restecg'),
('target','chol')])
# learning CPDs using Maximum likelihood estimators
print("\n learning CPD using Maximum likelihood estimators")
model.fit(heartDisease,estimator=MaximumLikelihoodEstimator)
print(model.get_cpds('age'))
print(model.get_cpds('exang'))
print(model.get_cpds('sex'))
print(model.get_cpds('cp'))
print(model.get_cpds('restecg'))
print("\n inferencing with Bayesian Netwok:")
HeartDisease_infer = VariableElimination(model)
q1=HeartDisease_infer.query(variables=['target'],evidence={'restecg':1}
)
print(q1)
q2=HeartDisease_infer.query(variables=['target'],evidence={'age':40})
print(q2)
q3=HeartDisease_infer.query(variables=['target'],evidence={'cp':3})
print(q3)
output:
learning CPD using Maximum likelihood estimators
+---------+------------+
| age(29) | 0.00330033 |
+---------+------------+
| age(34) | 0.00660066 |
+---------+------------+
| age(35) | 0.0132013 |
+---------+------------+
| age(37) | 0.00660066 |
+---------+------------+
| age(38) | 0.00990099 |
+---------+------------+
| age(39) | 0.0132013 |
+---------+------------+
| age(40) | 0.00990099 |
+---------+------------+
| age(41) | 0.0330033 |
+---------+------------+
| age(42) | 0.0264026 |
+---------+------------+
| age(43) | 0.0264026 |
+---------+------------+
| age(44) | 0.0363036 |
+---------+------------+
| age(45) | 0.0264026 |
+---------+------------+
| age(46) | 0.0231023 |
+---------+------------+
| age(47) | 0.0165017 |
+---------+------------+
| age(48) | 0.0231023 |
+---------+------------+
| age(49) | 0.0165017 |
+---------+------------+
| age(50) | 0.0231023 |
+---------+------------+
| age(51) | 0.039604 |
+---------+------------+
| age(52) | 0.0429043 |
+---------+------------+
| age(53) | 0.0264026 |
+---------+------------+
| age(54) | 0.0528053 |
+---------+------------+
| age(55) | 0.0264026 |
+---------+------------+
| age(56) | 0.0363036 |
+---------+------------+
| age(57) | 0.0561056 |
+---------+------------+
| age(58) | 0.0627063 |
+---------+------------+
| age(59) | 0.0462046 |
+---------+------------+
| age(60) | 0.0363036 |
+---------+------------+
| age(61) | 0.0264026 |
+---------+------------+
| age(62) | 0.0363036 |
+---------+------------+
| age(63) | 0.029703 |
+---------+------------+
| age(64) | 0.0330033 |
+---------+------------+
| age(65) | 0.0264026 |
+---------+------------+
| age(66) | 0.0231023 |
+---------+------------+
| age(67) | 0.029703 |
+---------+------------+
| age(68) | 0.0132013 |
+---------+------------+
| age(69) | 0.00990099 |
+---------+------------+
| age(70) | 0.0132013 |
+---------+------------+
| age(71) | 0.00990099 |
+---------+------------+
| age(74) | 0.00330033 |
+---------+------------+
| age(76) | 0.00330033 |
+---------+------------+
| age(77) | 0.00330033 |
+---------+------------+
+----------+----------+
| exang(0) | 0.673267 |
+----------+----------+
| exang(1) | 0.326733 |
+----------+----------+
+--------+----------+
| sex(0) | 0.316832 |
+--------+----------+
| sex(1) | 0.683168 |
+--------+----------+
+-------+-----------+
| cp(0) | 0.471947 |
+-------+-----------+
| cp(1) | 0.165017 |
+-------+-----------+
| cp(2) | 0.287129 |
+-------+-----------+
| cp(3) | 0.0759076 |
+-------+-----------+
+------------+----------------------+----------------------+
| target | target(0) | target(1) |
+------------+----------------------+----------------------+
| restecg(0) | 0.572463768115942 | 0.4121212121212121 |
+------------+----------------------+----------------------+
| restecg(1) | 0.4057971014492754 | 0.5818181818181818 |
+------------+----------------------+----------------------+
| restecg(2) | 0.021739130434782608 | 0.006060606060606061 |
+------------+----------------------+----------------------+
inferencing with Bayesian Netwok:
Finding Elimination Order: : 100%
4/4 [00:00<00:00, 18.92it/s]
Eliminating: cp: 100%
4/4 [00:00<00:00, 6.14it/s]
+-----------+---------------+
| target | phi(target) |
+===========+===============+
| target(0) | 0.4242 |
+-----------+---------------+
| target(1) | 0.5758 |
+-----------+---------------+
Finding Elimination Order: : 100%
3/3 [00:00<00:00, 15.30it/s]
Eliminating: cp: 100%
3/3 [00:00<00:00, 36.25it/s]
+-----------+---------------+
| target | phi(target) |
+===========+===============+
| target(0) | 0.6527 |
+-----------+---------------+
| target(1) | 0.3473 |
+-----------+---------------+
Finding Elimination Order: : 0%
0/3 [00:00<?, ?it/s]
Eliminating: sex: 100%
3/3 [00:00<00:00, 38.03it/s]
+-----------+---------------+
| target | phi(target) |
+===========+===============+
| target(0) | 0.4588 |
+-----------+---------------+
| target(1) | 0.5412 |
+-----------+---------------+
8)EM and K-means
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np
iris = datasets.load_iris()
X = pd.DataFrame(iris.data)
X.columns = ['Sepal_Length','Sepal_Width','Petal_Length','Petal_width']
y = pd.DataFrame(iris.target)
y.columns = ['Targets']
#build the K-means Model
model = KMeans(n_clusters=3)
model.fit(X) #model.labels_:gives cluster no for which sample belongs t
o
## visualise the clustering results
plt.figure(figsize=(14,14))
colormap = np.array(['red','lime','black'])
plt.subplot(2,2,1)
plt.scatter(X.Petal_Length,X.Petal_width,c=colormap[y.Targets], s=40)
plt.title('Real Clusters')
plt.xlabel('Petal length')
plt.ylabel('Petal Width')
#plot the models classification
plt.subplot(2,2,2)
plt.scatter(X.Petal_Length,X.Petal_width,c=colormap[model.labels_],s=40
)
plt.title('K-Mean Clustering')
plt.xlabel('PetalLenght')
plt.ylabel('Petal width')
#general EM for GMM
from sklearn import preprocessing
#transform your sata such that its distribution will have a #mean value
0 and standard deviation of 1
from sklearn.preprocessing import StandardScaler
scaler = preprocessing.StandardScaler()
scaler.fit(X)
xsa = scaler.transform(X)
xs = pd.DataFrame(xsa,columns=X.columns)
from sklearn.mixture import GaussianMixture
gmm = GaussianMixture(n_components=3)
gmm.fit(xs)
gmm_y = gmm.predict(xs)
plt.subplot(2,2,3)
plt.scatter(X.Petal_Length,X.Petal_width,c=colormap[gmm_y],s=40)
plt.title('GMM Clustering')
plt.xlabel('petal lenght')
plt.ylabel('petal width')
print('observation :the GMM using EM algorithm based clustering matched
the true labels are closely than the kmeans')
Output:
observation :the GMM using EM algorithm based clustering matched the true
labels are closely than the kmeans
9)KNN algorithm
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import datasets
#load dataset
iris=datasets.load_iris()
print("iris data set loaded...")
#split the data into train and test samples
X_train,x_test,y_train,y_test = train_test_split(iris.data,iris.target,
test_size=0.2)
print("Data set is split into traning and testing..")
print("size of traning data and its label",X_train.shape,y_train.shape)
print("size of testing data and its label",x_test.shape,y_test.shape)
#print label no. and their names
for i in range(len(iris.target_names)):
print("label" , i , "-",str(iris.target_names[i]))
classifier = KNeighborsClassifier(n_neighbors=1)
classifier.fit(X_train,y_train)
y_pred=classifier.predict(x_test)
print("results of classification using K-NN with k=1")
for r in range(0,len(x_test)):
print("sample:",str(x_test[r]),"Actual-
label:",str(y_test[r]),"predicted-label:",str(y_pred[r]))
print("classification accuracy:",classifier.score(x_test,y_test));
from sklearn.metrics import classification_report,confusion_matrix
print("confusion matrix")
print(confusion_matrix(y_test,y_pred))
print("Accuracy Metrics")
print(classification_report(y_test,y_pred))
Output:
iris data set loaded...
Data set is split into traning and testing..
size of traning data and its label (120, 4) (120,)
size of testing data and its label (30, 4) (30,)
label 0 - setosa
label 1 - versicolor
label 2 - virginica
results of classification using K-NN with k=1
sample: [5.8 2.7 4.1 1. ] Actual-label: 1 predicted-label: 1
sample: [5.1 3.8 1.9 0.4] Actual-label: 0 predicted-label: 0
sample: [6.4 3.2 4.5 1.5] Actual-label: 1 predicted-label: 1
sample: [6.1 2.8 4.7 1.2] Actual-label: 1 predicted-label: 1
sample: [5.2 3.4 1.4 0.2] Actual-label: 0 predicted-label: 0
sample: [5.8 2.7 5.1 1.9] Actual-label: 2 predicted-label: 2
sample: [6.3 2.8 5.1 1.5] Actual-label: 2 predicted-label: 1
sample: [7.1 3. 5.9 2.1] Actual-label: 2 predicted-label: 2
sample: [6.7 2.5 5.8 1.8] Actual-label: 2 predicted-label: 2
sample: [6.8 2.8 4.8 1.4] Actual-label: 1 predicted-label: 1
sample: [5.1 3.7 1.5 0.4] Actual-label: 0 predicted-label: 0
sample: [5. 3.6 1.4 0.2] Actual-label: 0 predicted-label: 0
sample: [6.5 3. 5.8 2.2] Actual-label: 2 predicted-label: 2
sample: [6. 2.7 5.1 1.6] Actual-label: 1 predicted-label: 2
sample: [5.1 3.3 1.7 0.5] Actual-label: 0 predicted-label: 0
sample: [6.8 3.2 5.9 2.3] Actual-label: 2 predicted-label: 2
sample: [5.1 3.8 1.5 0.3] Actual-label: 0 predicted-label: 0
sample: [5.3 3.7 1.5 0.2] Actual-label: 0 predicted-label: 0
sample: [5.4 3.9 1.7 0.4] Actual-label: 0 predicted-label: 0
sample: [6.9 3.1 4.9 1.5] Actual-label: 1 predicted-label: 1
sample: [4.9 3.1 1.5 0.1] Actual-label: 0 predicted-label: 0
sample: [4.4 2.9 1.4 0.2] Actual-label: 0 predicted-label: 0
sample: [7.6 3. 6.6 2.1] Actual-label: 2 predicted-label: 2
sample: [6.3 3.4 5.6 2.4] Actual-label: 2 predicted-label: 2
sample: [5. 2. 3.5 1. ] Actual-label: 1 predicted-label: 1
sample: [6.5 3. 5.5 1.8] Actual-label: 2 predicted-label: 2
sample: [5.6 2.9 3.6 1.3] Actual-label: 1 predicted-label: 1
sample: [6.8 3. 5.5 2.1] Actual-label: 2 predicted-label: 2
sample: [6. 2.9 4.5 1.5] Actual-label: 1 predicted-label: 1
sample: [5.5 2.4 3.8 1.1] Actual-label: 1 predicted-label: 1
classification accuracy: 0.9333333333333333
confusion matrix
[[10 0 0]
[ 0 9 1]
[ 0 1 9]]
Accuracy Metrics
precision recall f1-score support
0 1.00 1.00 1.00 10
1 0.90 0.90 0.90 10
2 0.90 0.90 0.90 10
accuracy 0.93 30
macro avg 0.93 0.93 0.93 30
weighted avg 0.93 0.93 0.93 30
10)LWR algorithm
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
# kernel smoothing function
def kernel(point, xmat, k):
m,n = np.shape(xmat)
weights = np.mat(np.eye((m)))
for j in range(m):
diff = point - X[j]
weights[j, j] = np.exp(diff * diff.T / (-2.0 * k**2))
return weights
# function to return local weight of eah traiining example
def localWeight(point, xmat, ymat, k):
wt = kernel(point, xmat, k)
W = (X.T * (wt*X)).I * (X.T * wt * ymat.T)
return W
# root function that drives the algorithm
def localWeightRegression(xmat, ymat, k):
m,n = np.shape(xmat)
ypred = np.zeros(m)
for i in range(m):
ypred[i] = xmat[i] * localWeight(xmat[i], xmat, ymat, k)
return ypred
#import data
data = pd.read_csv('10-dataset.csv')
# place them in suitable data types
colA = np.array(data.total_bill)
colB = np.array(data.tip)
mcolA = np.mat(colA)
mcolB = np.mat(colB)
m = np.shape(mcolB)[1]
one = np.ones((1, m), dtype = int)
# horizontal stacking
X = np.hstack((one.T, mcolA.T))
print(X.shape)
# predicting values using LWLR
ypred = localWeightRegression(X, mcolB, 0.8)
# plotting the predicted graph
xsort = X.copy()
xsort.sort(axis=0)
plt.scatter(colA, colB, color='blue')
plt.plot(xsort[:, 1], ypred[X[:, 1].argsort(0)], color='yellow', linewi
dth=5)
plt.xlabel('Total Bill')
plt.ylabel('Tip')
plt.show()
Output: