[go: up one dir, main page]

0% found this document useful (0 votes)
13 views12 pages

ML - Lab Manual With Woad File

The document contains implementations of various machine learning algorithms including Candidate-Elimination, ID3 Decision Tree, Backpropagation, Naive Bayesian Classifier, Document Classification with Naive Bayes, Bayesian Network for COVID-19 Diagnosis, EM and K-Means Clustering, k-Nearest Neighbors, and Locally Weighted Regression. Each algorithm is demonstrated with Python code and utilizes different datasets for training and testing. The document serves as a comprehensive guide for understanding and applying these algorithms in practical scenarios.

Uploaded by

babypriya6543
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
13 views12 pages

ML - Lab Manual With Woad File

The document contains implementations of various machine learning algorithms including Candidate-Elimination, ID3 Decision Tree, Backpropagation, Naive Bayesian Classifier, Document Classification with Naive Bayes, Bayesian Network for COVID-19 Diagnosis, EM and K-Means Clustering, k-Nearest Neighbors, and Locally Weighted Regression. Each algorithm is demonstrated with Python code and utilizes different datasets for training and testing. The document serves as a comprehensive guide for understanding and applying these algorithms in practical scenarios.

Uploaded by

babypriya6543
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 12

Ex.

No: 1 - Candidate-Elimination Algorithm\

import numpy as np

import pandas as pd

data = pd.read_csv('Ex1_data.csv')

concepts = np.array(data.iloc[:, :-1])

target = np.array(data.iloc[:, -1])

def candidate_elimination(concepts, target):

n_features = concepts.shape[1]

specific_h = concepts[0].copy()

general_h = [['?' for _ in range(n_features)]]

for i, instance in enumerate(concepts):

if target[i] == "yes":

for x in range(n_features):

if instance[x] != specific_h[x]:

specific_h[x] = '?'

general_h = [g for g in general_h if all(

(feature == '?' or feature == specific_h[x])

for x, feature in enumerate(g)

)]

else:

general_h_new = []

for g in general_h:

for x in range(n_features):

if g[x] == "?":

for val in np.unique(concepts[:, x]):

if instance[x] != val:
g_new = g.copy()

g_new[x] = val

if g_new not in general_h_new:

general_h_new.append(g_new)

elif g[x] != instance[x]:

if g not in general_h_new:

general_h_new.append(g.copy())

general_h = general_h_new.copy()

general_h = [g for g in general_h if g != ['?' for _ in range(n_features)]]

return specific_h, general_h

s_final, g_final = candidate_elimination(concepts, target)

print("Final Specific Hypothesis:", s_final)

print("Final General Hypotheses:", g_final)

Ex.No: 2 - ID3 Decision Tree Algorithm

import math

import csv

class Node:

def __init__(self, attribute):

self.attribute = attribute

self.children = []

self.answer = ""

def load_csv(filename):
with open(filename, "r") as file:

lines = csv.reader(file)

dataset = list(lines)

headers = dataset.pop(0)

return dataset, headers

def subtables(data, col, delete):

dic = {}

coldata = [row[col] for row in data]

attr = list(set(coldata))

counts = [0] * len(attr)

for x in range(len(attr)):

for y in range(len(data)):

if data[y][col] == attr[x]:

counts[x] += 1

for x in range(len(attr)):

dic[attr[x]] = []

pos = 0

for y in range(len(data)):

if data[y][col] == attr[x]:

new_row = data[y][:col] + data[y][col+1:] if delete else data[y]

dic[attr[x]].append(new_row)

return attr, dic

def entropy(S):

if not S:

return 0
counts = {}

for item in S:

counts[item[-1]] = counts.get(item[-1], 0) + 1

entropy = 0

for key in counts:

prob = counts[key] / len(S)

entropy -= prob * math.log(prob, 2)

return entropy

def compute_gain(data, col):

attr, dic = subtables(data, col, delete=False)

total_entropy = entropy(data)

weighted_entropy = 0

total_size = len(data)

for x in range(len(attr)):

subset = dic[attr[x]]

weighted_entropy += (len(subset) / total_size) * entropy(subset)

return total_entropy - weighted_entropy

def build_tree(data, features):

last_col = [row[-1] for row in data]

if len(set(last_col)) == 1:

node = Node("")

node.answer = last_col[0]

return node

n = len(data[0]) - 1
gains = [compute_gain(data, col) for col in range(n)]

split = gains.index(max(gains))

node = Node(features[split])

fea = features[:split] + features[split+1:]

attr, dic = subtables(data, split, delete=True)

for x in range(len(attr)):

child = build_tree(dic[attr[x]], fea)

node.children.append((attr[x], child))

return node

def print_tree(node, level):

if node.answer != "":

print(" " * level, node.answer)

return

print(" " * level, node.attribute)

for value, child in node.children:

print(" " * (level + 1), value)

print_tree(child, level + 2)

dataset, features = load_csv("Ex2_data.csv")

node = build_tree(dataset, features)

print("Decision Tree:")

print_tree(node, 0)

Ex.No: 3 - Backpropagation Algorithm

import numpy as np

def sigmoid(x):
return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):

return x * (1 - x)

epoch = 7000

lr = 0.1

input_neurons = 2

hidden_neurons = 3

output_neurons = 1

wh = np.random.uniform(size=(input_neurons, hidden_neurons))

bh = np.random.uniform(size=(1, hidden_neurons))

wout = np.random.uniform(size=(hidden_neurons, output_neurons))

bout = np.random.uniform(size=(1, output_neurons))

X = np.array([[2, 9], [1, 5], [3, 6]], dtype=float)

y = np.array([[92], [86], [89]], dtype=float)

X = X / np.max(X, axis=0)

y = y / 100

for _ in range(epoch):

# Forward pass

hidden_input = np.dot(X, wh) + bh

hidden_output = sigmoid(hidden_input)

output = sigmoid(np.dot(hidden_output, wout) + bout)

# Backpropagation

error = y - output
d_output = error * sigmoid_derivative(output)

error_hidden = d_output.dot(wout.T)

d_hidden = error_hidden * sigmoid_derivative(hidden_output)

# Update weights

wout += hidden_output.T.dot(d_output) * lr

wh += X.T.dot(d_hidden) * lr

print("Predicted Output:\n", output)

Ex.No: 4 - Naive Bayesian Classifier

import csv

import random

import math

from sklearn.metrics import confusion_matrix, classification_report

def load_csv(filename):

with open(filename, "r") as file:

lines = csv.reader(file)

dataset = [list(map(float, row)) for row in lines]

return dataset

def split_dataset(dataset, split_ratio):

train_size = int(len(dataset) * split_ratio)

random.shuffle(dataset)

return dataset[:train_size], dataset[train_size:]

def separate_by_class(dataset):

separated = {}

for row in dataset:


class_val = row[-1]

if class_val not in separated:

separated[class_val] = []

separated[class_val].append(row[:-1])

return separated

def summarize(dataset):

summaries = [(np.mean(attr), np.std(attr)) for attr in zip(*dataset)]

return summaries

def calculate_probability(x, mean, stdev):

if stdev == 0:

return 1.0 if x == mean else 0.0

exponent = math.exp(-(math.pow(x - mean, 2) / (2 * math.pow(stdev, 2))))

return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent

def predict(summaries, input_vector):

probabilities = {}

for class_val, class_summaries in summaries.items():

probabilities[class_val] = 1

for i in range(len(class_summaries)):

mean, stdev = class_summaries[i]

x = input_vector[i]

probabilities[class_val] *= calculate_probability(x, mean, stdev)

return max(probabilities, key=probabilities.get)

dataset = load_csv('Ex4_data.csv')

train, test = split_dataset(dataset, 0.67)

summaries = separate_by_class(train)
summaries = {k: summarize(v) for k, v in summaries.items()}

predictions = [predict(summaries, row) for row in test]

accuracy = sum(1 for i in range(len(test)) if test[i][-1] == predictions[i]) / len(test)

print(f"Accuracy: {accuracy * 100}%")

Ex.No: 5 - Document Classification with Naive Bayes

import pandas as pd

from sklearn.model_selection import train_test_split

from sklearn.feature_extraction.text import CountVectorizer

from sklearn.naive_bayes import MultinomialNB

from sklearn import metrics

msg = pd.read_csv('Ex5_data.csv', names=['message', 'label'])

msg['labelnum'] = msg.label.map({'pos': 1, 'neg': 0})

X = msg.message

y = msg.labelnum

xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2, random_state=42)

count_vect = CountVectorizer()

xtrain_dtm = count_vect.fit_transform(xtrain)

xtest_dtm = count_vect.transform(xtest)

clf = MultinomialNB().fit(xtrain_dtm, ytrain)

predicted = clf.predict(xtest_dtm)

print("Confusion Matrix:\n", metrics.confusion_matrix(ytest, predicted))

print("Precision:", metrics.precision_score(ytest, predicted))

print("Recall:", metrics.recall_score(ytest, predicted))


Ex.No: 6 - Bayesian Network for CORONA Diagnosis

import pandas as pd

from pgmpy.models import BayesianModel

from pgmpy.estimators import MaximumLikelihoodEstimator

from pgmpy.inference import VariableElimination

data = pd.read_csv('Ex6_data.csv').replace('?', np.nan)

model = BayesianModel([

('Fever', 'InfectionStatus'),

('Cough', 'InfectionStatus'),

('TravelHistory', 'InfectionStatus'),

('ContactWithConfirmed', 'InfectionStatus')

])

model.fit(data, estimator=MaximumLikelihoodEstimator)

infer = VariableElimination(model)

result = infer.query(variables=['InfectionStatus'], evidence={'Fever': 'Yes', 'Cough': 'Yes'})

print(result)

Ex.No: 7 - EM and K-Means Clustering

import pandas as pd

from sklearn.cluster import KMeans, GaussianMixture

from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt

data = pd.read_csv('Ex7_data.csv')

X = data.drop('Target', axis=1)

scaler = StandardScaler().fit(X)

X_scaled = scaler.transform(X)
kmeans = KMeans(n_clusters=3, n_init=10).fit(X)

gmm = GaussianMixture(n_components=3).fit(X_scaled)

plt.figure(figsize=(12, 5))

plt.subplot(121)

plt.scatter(X.iloc[:, 2], X.iloc[:, 3], c=kmeans.labels_)

plt.title('K-Means')

plt.subplot(122)

plt.scatter(X.iloc[:, 2], X.iloc[:, 3], c=gmm.predict(X_scaled))

plt.title('GMM')

plt.show()

Ex.No: 8 - k-Nearest Neighbors

from sklearn.datasets import load_iris

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import classification_report, confusion_matrix

iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)

knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)

y_pred = knn.predict(X_test)

print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

print("\nClassification Report:\n", classification_report(y_test, y_pred))


Ex.No: 9 - Locally Weighted Regression

import numpy as np

import matplotlib.pyplot as plt

def local_regression(x0, X, Y, tau):

X = np.c_[np.ones(len(X)), X]

x0 = np.r_[1, x0]

weights = np.exp(-np.sum((X - x0) ** 2, axis=1) / (2 * tau ** 2))

W = np.diag(weights)

beta = np.linalg.pinv(X.T @ W @ X) @ (X.T @ W @ Y)

return x0 @ beta

X = np.linspace(-3, 3, 100)

Y = np.sin(X) + np.random.normal(0, 0.1, 100)

predictions = [local_regression(x, X, Y, 0.1) for x in X]

plt.scatter(X, Y)

plt.plot(X, predictions, color='red')

plt.show()

You might also like