Ex.
No: 1 - Candidate-Elimination Algorithm\
import numpy as np
import pandas as pd
data = pd.read_csv('Ex1_data.csv')
concepts = np.array(data.iloc[:, :-1])
target = np.array(data.iloc[:, -1])
def candidate_elimination(concepts, target):
n_features = concepts.shape[1]
specific_h = concepts[0].copy()
general_h = [['?' for _ in range(n_features)]]
for i, instance in enumerate(concepts):
if target[i] == "yes":
for x in range(n_features):
if instance[x] != specific_h[x]:
specific_h[x] = '?'
general_h = [g for g in general_h if all(
(feature == '?' or feature == specific_h[x])
for x, feature in enumerate(g)
)]
else:
general_h_new = []
for g in general_h:
for x in range(n_features):
if g[x] == "?":
for val in np.unique(concepts[:, x]):
if instance[x] != val:
g_new = g.copy()
g_new[x] = val
if g_new not in general_h_new:
general_h_new.append(g_new)
elif g[x] != instance[x]:
if g not in general_h_new:
general_h_new.append(g.copy())
general_h = general_h_new.copy()
general_h = [g for g in general_h if g != ['?' for _ in range(n_features)]]
return specific_h, general_h
s_final, g_final = candidate_elimination(concepts, target)
print("Final Specific Hypothesis:", s_final)
print("Final General Hypotheses:", g_final)
Ex.No: 2 - ID3 Decision Tree Algorithm
import math
import csv
class Node:
def __init__(self, attribute):
self.attribute = attribute
self.children = []
self.answer = ""
def load_csv(filename):
with open(filename, "r") as file:
lines = csv.reader(file)
dataset = list(lines)
headers = dataset.pop(0)
return dataset, headers
def subtables(data, col, delete):
dic = {}
coldata = [row[col] for row in data]
attr = list(set(coldata))
counts = [0] * len(attr)
for x in range(len(attr)):
for y in range(len(data)):
if data[y][col] == attr[x]:
counts[x] += 1
for x in range(len(attr)):
dic[attr[x]] = []
pos = 0
for y in range(len(data)):
if data[y][col] == attr[x]:
new_row = data[y][:col] + data[y][col+1:] if delete else data[y]
dic[attr[x]].append(new_row)
return attr, dic
def entropy(S):
if not S:
return 0
counts = {}
for item in S:
counts[item[-1]] = counts.get(item[-1], 0) + 1
entropy = 0
for key in counts:
prob = counts[key] / len(S)
entropy -= prob * math.log(prob, 2)
return entropy
def compute_gain(data, col):
attr, dic = subtables(data, col, delete=False)
total_entropy = entropy(data)
weighted_entropy = 0
total_size = len(data)
for x in range(len(attr)):
subset = dic[attr[x]]
weighted_entropy += (len(subset) / total_size) * entropy(subset)
return total_entropy - weighted_entropy
def build_tree(data, features):
last_col = [row[-1] for row in data]
if len(set(last_col)) == 1:
node = Node("")
node.answer = last_col[0]
return node
n = len(data[0]) - 1
gains = [compute_gain(data, col) for col in range(n)]
split = gains.index(max(gains))
node = Node(features[split])
fea = features[:split] + features[split+1:]
attr, dic = subtables(data, split, delete=True)
for x in range(len(attr)):
child = build_tree(dic[attr[x]], fea)
node.children.append((attr[x], child))
return node
def print_tree(node, level):
if node.answer != "":
print(" " * level, node.answer)
return
print(" " * level, node.attribute)
for value, child in node.children:
print(" " * (level + 1), value)
print_tree(child, level + 2)
dataset, features = load_csv("Ex2_data.csv")
node = build_tree(dataset, features)
print("Decision Tree:")
print_tree(node, 0)
Ex.No: 3 - Backpropagation Algorithm
import numpy as np
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
return x * (1 - x)
epoch = 7000
lr = 0.1
input_neurons = 2
hidden_neurons = 3
output_neurons = 1
wh = np.random.uniform(size=(input_neurons, hidden_neurons))
bh = np.random.uniform(size=(1, hidden_neurons))
wout = np.random.uniform(size=(hidden_neurons, output_neurons))
bout = np.random.uniform(size=(1, output_neurons))
X = np.array([[2, 9], [1, 5], [3, 6]], dtype=float)
y = np.array([[92], [86], [89]], dtype=float)
X = X / np.max(X, axis=0)
y = y / 100
for _ in range(epoch):
# Forward pass
hidden_input = np.dot(X, wh) + bh
hidden_output = sigmoid(hidden_input)
output = sigmoid(np.dot(hidden_output, wout) + bout)
# Backpropagation
error = y - output
d_output = error * sigmoid_derivative(output)
error_hidden = d_output.dot(wout.T)
d_hidden = error_hidden * sigmoid_derivative(hidden_output)
# Update weights
wout += hidden_output.T.dot(d_output) * lr
wh += X.T.dot(d_hidden) * lr
print("Predicted Output:\n", output)
Ex.No: 4 - Naive Bayesian Classifier
import csv
import random
import math
from sklearn.metrics import confusion_matrix, classification_report
def load_csv(filename):
with open(filename, "r") as file:
lines = csv.reader(file)
dataset = [list(map(float, row)) for row in lines]
return dataset
def split_dataset(dataset, split_ratio):
train_size = int(len(dataset) * split_ratio)
random.shuffle(dataset)
return dataset[:train_size], dataset[train_size:]
def separate_by_class(dataset):
separated = {}
for row in dataset:
class_val = row[-1]
if class_val not in separated:
separated[class_val] = []
separated[class_val].append(row[:-1])
return separated
def summarize(dataset):
summaries = [(np.mean(attr), np.std(attr)) for attr in zip(*dataset)]
return summaries
def calculate_probability(x, mean, stdev):
if stdev == 0:
return 1.0 if x == mean else 0.0
exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))
return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent
def predict(summaries, input_vector):
probabilities = {}
for class_val, class_summaries in summaries.items():
probabilities[class_val] = 1
for i in range(len(class_summaries)):
mean, stdev = class_summaries[i]
x = input_vector[i]
probabilities[class_val] *= calculate_probability(x, mean, stdev)
return max(probabilities, key=probabilities.get)
dataset = load_csv('Ex4_data.csv')
train, test = split_dataset(dataset, 0.67)
summaries = separate_by_class(train)
summaries = {k: summarize(v) for k, v in summaries.items()}
predictions = [predict(summaries, row) for row in test]
accuracy = sum(1 for i in range(len(test)) if test[i][-1] == predictions[i]) / len(test)
print(f"Accuracy: {accuracy * 100}%")
Ex.No: 5 - Document Classification with Naive Bayes
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn import metrics
msg = pd.read_csv('Ex5_data.csv', names=['message', 'label'])
msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})
X = msg.message
y = msg.labelnum
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)
count_vect = CountVectorizer()
xtrain_dtm = count_vect.fit_transform(xtrain)
xtest_dtm = count_vect.transform(xtest)
clf = MultinomialNB().fit(xtrain_dtm, ytrain)
predicted = clf.predict(xtest_dtm)
print("Confusion Matrix:\n", metrics.confusion_matrix(ytest, predicted))
print("Precision:", metrics.precision_score(ytest, predicted))
print("Recall:", metrics.recall_score(ytest, predicted))
Ex.No: 6 - Bayesian Network for CORONA Diagnosis
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
data = pd.read_csv('Ex6_data.csv').replace('?', np.nan)
model = BayesianModel([
('Fever', 'InfectionStatus'),
('Cough', 'InfectionStatus'),
('TravelHistory', 'InfectionStatus'),
('ContactWithConfirmed', 'InfectionStatus')
])
model.fit(data, estimator=MaximumLikelihoodEstimator)
infer = VariableElimination(model)
result = infer.query(variables=['InfectionStatus'], evidence={'Fever': 'Yes', 'Cough': 'Yes'})
print(result)
Ex.No: 7 - EM and K-Means Clustering
import pandas as pd
from sklearn.cluster import KMeans, GaussianMixture
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
data = pd.read_csv('Ex7_data.csv')
X = data.drop('Target', axis=1)
scaler = StandardScaler().fit(X)
X_scaled = scaler.transform(X)
kmeans = KMeans(n_clusters=3, n_init=10).fit(X)
gmm = GaussianMixture(n_components=3).fit(X_scaled)
plt.figure(figsize=(12, 5))
plt.subplot(121)
plt.scatter(X.iloc[:, 2], X.iloc[:, 3], c=kmeans.labels_)
plt.title('K-Means')
plt.subplot(122)
plt.scatter(X.iloc[:, 2], X.iloc[:, 3], c=gmm.predict(X_scaled))
plt.title('GMM')
plt.show()
Ex.No: 8 - k-Nearest Neighbors
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)
knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)
y_pred = knn.predict(X_test)
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
Ex.No: 9 - Locally Weighted Regression
import numpy as np
import matplotlib.pyplot as plt
def local_regression(x0, X, Y, tau):
X = np.c_[np.ones(len(X)), X]
x0 = np.r_[1, x0]
weights = np.exp(-np.sum((X - x0) ** 2, axis=1) / (2 * tau ** 2))
W = np.diag(weights)
beta = np.linalg.pinv(X.T @ W @ X) @ (X.T @ W @ Y)
return x0 @ beta
X = np.linspace(-3, 3, 100)
Y = np.sin(X) + np.random.normal(0, 0.1, 100)
predictions = [local_regression(x, X, Y, 0.1) for x in X]
plt.scatter(X, Y)
plt.plot(X, predictions, color='red')
plt.show()