1.....................................................................................................
......................................................................................................
#Implement and demonstrate the FIND-Salgorithm
import pandas as pd
def load_data(filename):
"""Load dataset from a CSV file."""
return pd.read_csv(filename)
def find_s_algorithm(data):
"""Implements the Find-S algorithm for learning the most specific hypothesis."""
# Extract attributes and target column
attributes = data.iloc[:, :-1].values # All columns except last
target = data.iloc[:, -1].values # Last column (Yes/No)
# Initialize the most specific hypothesis (ϕ, ϕ, ϕ, ...)
hypothesis = ["ϕ"] * len(attributes[0])
# Find first positive example to initialize hypothesis
for i in range(len(target)):
if target[i].lower() == "yes":
hypothesis = attributes[i].copy()
break
# Iterate over all examples to refine hypothesis
for i in range(len(target)):
if target[i].lower() == "yes":
for j in range(len(hypothesis)):
if hypothesis[j] != attributes[i][j]:
hypothesis[j] = "?"
return hypothesis
if __name__ == "__main__":
# Load dataset
filename = r"C:\Users\rahul\OneDrive\Desktop\mldata\training_data.csv" # Change
filename as needed
data = load_data(filename)
# Run Find-S algorithm
specific_hypothesis = find_s_algorithm(data)
# Print the result
print("Most Specific Hypothesis:", specific_hypothesis)
2...................................................................................
....................................................................................
import pandas as pd
import numpy as np
def load_data(filename):
"""Load training data from a CSV file."""
data = pd.read_csv(r'C:\Users\rahul\OneDrive\Desktop\mldata\training_data.csv')
return data
def candidate_elimination(data):
"""Implements the Candidate-Elimination Algorithm."""
attributes = data.columns[:-1] # Exclude target column
target = data.columns[-1] # Target column
# Initialize G to the most general hypothesis
G = [['?' for _ in range(len(attributes))]] # General boundary
# Initialize S to the most specific hypothesis
S = ['ϕ' for _ in range(len(attributes))] # Specific boundary
for i, row in data.iterrows():
instance = row[:-1].values # Feature values
label = row.iloc[-1] # Correct way to access the last column
# Class label
if label == 'Yes': # Positive example
# Remove inconsistent hypotheses from G
G = [g for g in G if is_consistent(instance, g)]
# Generalize S if necessary
for j in range(len(S)):
if S[j] == 'ϕ': # Initialize S with first positive example
S = instance.copy()
elif S[j] != instance[j]:
S[j] = '?' # Generalize S
elif label == 'No': # Negative example
# Remove inconsistent hypotheses from S
S = [S] if is_consistent(instance, S) else []
# Specialize G where necessary
new_G = []
for g in G:
if is_consistent(instance, g):
for j in range(len(g)):
if g[j] == '?':
for val in np.unique(data.iloc[:, j]):
if val != instance[j]:
new_hypothesis = g.copy()
new_hypothesis[j] = val
new_G.append(new_hypothesis)
else:
new_G.append(g)
G = new_G
return S, G
def is_consistent(instance, hypothesis):
"""Checks if an instance is consistent with a hypothesis."""
for i in range(len(instance)):
if hypothesis[i] != '?' and hypothesis[i] != instance[i]:
return False
return True
if __name__ == "__main__":
filename = "training_data.csv" # Replace with actual CSV filename
data = load_data(filename)
S_final, G_final = candidate_elimination(data)
print("Final Specific Hypothesis:", S_final)
print("Final General Hypotheses:", G_final)
3..................................................................................
....................................................................................
#Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use
an appropriate data set for building the decision tree and apply this knowledge toclassify a
new sample
import numpy as np
import pandas as pd
from collections import Counter
# Function to calculate entropy
def entropy(data):
labels = data.iloc[:, -1]
label_counts = Counter(labels)
total = len(labels)
return -sum((count / total) * np.log2(count / total) for count in label_counts.values())
# Function to calculate information gain
def info_gain(data, split_attribute):
total_entropy = entropy(data)
values, counts = np.unique(data[split_attribute], return_counts=True)
weighted_entropy = sum((counts[i] / sum(counts)) * entropy(data[data[split_attribute] ==
values[i]]) for i in range(len(values)))
return total_entropy - weighted_entropy
# Function to build the ID3 decision tree
def id3(data, attributes):
labels = data.iloc[:, -1]
if len(set(labels)) == 1:
return labels.iloc[0]
if len(attributes) == 0:
return labels.mode()[0]
best_attr = max(attributes, key=lambda attr: info_gain(data, attr))
tree = {best_attr: {}}
for value in np.unique(data[best_attr]):
subset = data[data[best_attr] == value].drop(columns=[best_attr])
tree[best_attr][value] = id3(subset, [attr for attr in attributes if attr != best_attr])
return tree
# Function to classify a new sample
def classify(tree, sample):
if not isinstance(tree, dict):
return tree
attribute = next(iter(tree))
if sample[attribute] in tree[attribute]:
return classify(tree[attribute][sample[attribute]], sample)
else:
return "Unknown"
# Sample dataset (PlayTennis)
data = pd.DataFrame({
'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain',
'Sunny', 'Overcast', 'Overcast', 'Rain'],
'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild',
'Hot', 'Mild'],
'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal',
'Normal', 'High', 'Normal', 'High'],
'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak', 'Weak', 'Weak',
'Strong', 'Strong', 'Weak', 'Strong'],
'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']
})
# Build the decision tree
tree = id3(data, ['Outlook', 'Temperature', 'Humidity', 'Wind'])
print("Decision Tree:")
print(tree)
# Classify a new sample
new_sample = {'Outlook': 'Sunny', 'Temperature': 'Cool', 'Humidity': 'High', 'Wind': 'Strong'}
result = classify(tree, new_sample)
print("Classification Result:", result)
4........................................................................................................
.........................................................................................................
import numpy as np
# Activation function (Sigmoid) and its derivative
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_derivative(x):
return x * (1 - x)
# Training data (XOR problem)
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([[0], [1], [1], [0]])
# Initialize weights and biases
input_layer_neurons = 2
hidden_layer_neurons = 2
output_layer_neurons = 1
np.random.seed(42)
weights_input_hidden = np.random.uniform(size=(input_layer_neurons, hidden_layer_neurons))
weights_hidden_output = np.random.uniform(size=(hidden_layer_neurons,
output_layer_neurons))
bias_hidden = np.random.uniform(size=(1, hidden_layer_neurons))
bias_output = np.random.uniform(size=(1, output_layer_neurons))
# Training the neural network
learning_rate = 0.5
epochs = 10000
for epoch in range(epochs):
# Forward pass
hidden_layer_activation = np.dot(X, weights_input_hidden) + bias_hidden
hidden_layer_output = sigmoid(hidden_layer_activation)
output_layer_activation = np.dot(hidden_layer_output, weights_hidden_output) +
bias_output
predicted_output = sigmoid(output_layer_activation)
# Backpropagation
error = y - predicted_output
d_predicted_output = error * sigmoid_derivative(predicted_output)
error_hidden_layer = d_predicted_output.dot(weights_hidden_output.T)
d_hidden_layer = error_hidden_layer * sigmoid_derivative(hidden_layer_output)
# Update weights and biases
weights_hidden_output += hidden_layer_output.T.dot(d_predicted_output) * learning_rate
bias_output += np.sum(d_predicted_output, axis=0, keepdims=True) * learning_rate
weights_input_hidden += X.T.dot(d_hidden_layer) * learning_rate
bias_hidden += np.sum(d_hidden_layer, axis=0, keepdims=True) * learning_rate
# Testing the trained ANN
def predict(sample):
hidden_layer_activation = np.dot(sample, weights_input_hidden) + bias_hidden
hidden_layer_output = sigmoid(hidden_layer_activation)
output_layer_activation = np.dot(hidden_layer_output, weights_hidden_output) +
bias_output
return sigmoid(output_layer_activation)
# Test samples
test_samples = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
predictions = predict(test_samples)
print("Predictions:")
print(predictions)
5.....................................................................................................
........................................................................................................
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
# Load dataset from CSV file
def load_data(filename):
return pd.read_csv(filename)
# Train Naïve Bayes classifier
def train_naive_bayes(data):
X = data.iloc[:, :-1] # Features
y = data.iloc[:, -1] # Labels
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = GaussianNB()
model.fit(X_train, y_train)
return model, X_train, X_test, y_train, y_test
# Compute accuracy of the classifier
def compute_accuracy(model, X_test, y_test):
y_pred = model.predict(X_test)
return accuracy_score(y_test, y_pred)
# Main function
def main():
filename = 'dataset.csv' # Change this to your actual dataset file
data = load_data(filename)
model, X_train, X_test, y_train, y_test = train_naive_bayes(data)
accuracy = compute_accuracy(model, X_test, y_test)
print(f'Naïve Bayes Classifier Accuracy: {accuracy * 100:.2f}%')
# Test with a few test data sets
sample_tests = X_test[:5]
predictions = model.predict(sample_tests)
print("Sample Test Predictions:")
for i, pred in enumerate(predictions):
print(f'Test {i+1}: Predicted - {pred}, Actual - {y_test.iloc[i]}')
if __name__ == "__main__":
main()
6...............................................................................
.................................................................................
import java.io.*;
import java.util.*;
import weka.classifiers.bayes.NaiveBayes;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.converters.ConverterUtils.DataSource;
import weka.classifiers.Evaluation;
public class NaiveBayesTextClassifier {
public static void main(String[] args) {
try {
// Load dataset from ARFF file (convert CSV to ARFF if needed)
DataSource source = new DataSource("dataset.arff");
Instances dataset = source.getDataSet();
// Set class attribute (last column as label)
dataset.setClassIndex(dataset.numAttributes() - 1);
// Split dataset into training and testing sets (80% train, 20% test)
int trainSize = (int) Math.round(dataset.numInstances() * 0.8);
int testSize = dataset.numInstances() - trainSize;
Instances trainSet = new Instances(dataset, 0, trainSize);
Instances testSet = new Instances(dataset, trainSize, testSize);
// Train Naïve Bayes model
NaiveBayes model = new NaiveBayes();
model.buildClassifier(trainSet);
// Evaluate the model
Evaluation eval = new Evaluation(trainSet);
eval.evaluateModel(model, testSet);
// Print evaluation metrics
System.out.println("Accuracy: " + (1 - eval.errorRate()) * 100 + "%");
System.out.println("Precision: " + eval.precision(1));
System.out.println("Recall: " + eval.recall(1));
System.out.println("F1 Score: " + eval.fMeasure(1));
} catch (Exception e) {
e.printStackTrace();
}
}
}
7.....................................................................................
......................................................................................
import pandas as pd
from pgmpy.models import BayesianModel
from pgmpy.estimators import MaximumLikelihoodEstimator
from pgmpy.inference import VariableElimination
# Load Heart Disease dataset
data = pd.read_csv("heart_disease.csv")
# Define the Bayesian Network structure
model = BayesianModel([
('Age', 'HeartDisease'), ('Sex', 'HeartDisease'),
('ChestPain', 'HeartDisease'), ('Cholesterol', 'HeartDisease'),
('BloodPressure', 'HeartDisease')
])
# Learn CPDs (Conditional Probability Distributions) using Maximum Likelihood Estimation
model.fit(data, estimator=MaximumLikelihoodEstimator)
# Perform inference
inference = VariableElimination(model)
# Example query: Probability of Heart Disease given some conditions
query_result = inference.query(variables=['HeartDisease'], evidence={'Age': 55, 'Cholesterol':
230})
print(query_result)
8...........................................................................................
............................................................................................
import pandas as pd
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.preprocessing import StandardScaler
# Load dataset from CSV file
def load_data(filename):
data = pd.read_csv(filename)
return data
# Apply EM algorithm (Gaussian Mixture Model) for clustering
def em_clustering(data, n_clusters):
gmm = GaussianMixture(n_components=n_clusters, random_state=42)
labels = gmm.fit_predict(data)
return labels
# Apply k-Means clustering
def kmeans_clustering(data, n_clusters):
kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
labels = kmeans.fit_predict(data)
return labels
# Main function
def main():
filename = 'dataset.csv' # Change this to your dataset file
data = load_data(filename)
# Preprocess data (Standardization)
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)
n_clusters = 3 # Change the number of clusters as needed
# Apply EM clustering
em_labels = em_clustering(data_scaled, n_clusters)
# Apply k-Means clustering
kmeans_labels = kmeans_clustering(data_scaled, n_clusters)
# Evaluate clustering quality using Silhouette Score
em_silhouette = silhouette_score(data_scaled, em_labels)
kmeans_silhouette = silhouette_score(data_scaled, kmeans_labels)
print(f'EM Clustering Silhouette Score: {em_silhouette:.4f}')
print(f'K-Means Clustering Silhouette Score: {kmeans_silhouette:.4f}')
if em_silhouette > kmeans_silhouette:
print("EM algorithm provides better clustering quality.")
else:
print("K-Means provides better clustering quality.")
if __name__ == "__main__":
main()
9...................................................................
....................................................................
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
# Load the Iris dataset
df = pd.read_csv("iris.csv") # Ensure the file is in the correct directory
# Split dataset into features and target variable
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Train k-NN classifier
k = 3 # Choose k value
knn = KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train, y_train)
# Make predictions
y_pred = knn.predict(X_test)
# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')
# Print correct and incorrect predictions
correct = []
incorrect = []
for i in range(len(y_test)):
if y_pred[i] == y_test.iloc[i]:
correct.append((X_test[i], y_pred[i]))
else:
incorrect.append((X_test[i], y_pred[i], y_test.iloc[i]))
print("\nCorrect Predictions:")
for item in correct:
print(f'Predicted: {item[1]}')
print("\nIncorrect Predictions:")
for item in incorrect:
print(f'Predicted: {item[1]}, Actual: {item[2]}')
10..............................................................................................
................................................................................................
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
def kernel(x, x_point, tau):
return np.exp(-np.sum((x - x_point) ** 2, axis=1) / (2 * tau ** 2))
def locally_weighted_regression(X_train, y_train, x_query, tau):
W = np.diag(kernel(X_train, x_query, tau))
theta = np.linalg.pinv(X_train.T @ W @ X_train) @ X_train.T @ W @ y_train
return x_query @ theta
# Generate synthetic dataset
np.random.seed(42)
X = np.linspace(-5, 5, 100).reshape(-1, 1)
y = np.sin(X).ravel() + np.random.normal(0, 0.2, X.shape[0])
# Add bias term
X_bias = np.c_[np.ones(X.shape[0]), X]
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_bias, y, test_size=0.2, random_state=42)
# Fit Locally Weighted Regression
tau = 0.5 # Bandwidth parameter
y_pred = np.array([locally_weighted_regression(X_train, y_train, x, tau) for x in X_test])
# Plot results
plt.scatter(X[:, 1], y, label='Data', color='blue', alpha=0.5)
plt.scatter(X_test[:, 1], y_pred, label='Predictions', color='red', marker='x')
plt.xlabel('X')
plt.ylabel('y')
plt.legend()
plt.title('Locally Weighted Regression')
plt.show()