[ML EXPERIMENT PROGRAMS]
1. Linear Discriminant Analysis (LDA)
2. Decision Tree (ID3 using sklearn)
3. Support Vector Machine
4. Sentiment Analysis using Random Forest
5. K-Nearest Neighbors on Iris Dataset (with correct and wrong predictions)
6. Bayesian Network for Heart Disease
7. Single-layer Perceptron
8. Multi-layer Perceptron
9. Two Sample T-test
10. K-Fold Cross Validation on Cancer Dataset
[Program Codes]
# 1. Linear Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target,
test_size=0.3)
model = LDA()
model.fit(X_train, y_train)
predictions = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, predictions))
# 2. Decision Tree (ID3)
from sklearn.tree import DecisionTreeClassifier
data = load_iris()
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target,
test_size=0.3)
model = DecisionTreeClassifier(criterion='entropy')
model.fit(X_train, y_train)
print("Predictions:", model.predict(X_test))
# 3. Support Vector Machine
from sklearn import datasets
from sklearn.svm import SVC
iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
test_size=0.3)
model = SVC(kernel='linear')
model.fit(X_train, y_train)
print("Accuracy:", model.score(X_test, y_test))
# 4. Sentiment Analysis using Random Forest
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_extraction.text import CountVectorizer
texts = ["good", "bad", "excellent", "poor", "great", "worst"]
labels = [1, 0, 1, 0, 1, 0]
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(texts)
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.33)
model = RandomForestClassifier()
model.fit(X_train, y_train)
print("Prediction:", model.predict(X_test))
# 5. K-Nearest Neighbors
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=3)
model.fit(X_train.toarray(), y_train)
predictions = model.predict(X_test.toarray())
for p, a in zip(predictions, y_test):
print(f"Predicted: {p}, Actual: {a}, {'Correct' if p == a else 'Wrong'}")
# 6. Bayesian Network
from pgmpy.models import BayesianModel
from pgmpy.factors.discrete import TabularCPD
from pgmpy.inference import VariableElimination
model = BayesianModel([('Exercise', 'HeartDisease'), ('Diet', 'HeartDisease')])
cpd_ex = TabularCPD('Exercise', 2, [[0.7], [0.3]])
cpd_diet = TabularCPD('Diet', 2, [[0.6], [0.4]])
cpd_hd = TabularCPD('HeartDisease', 2, [[0.9, 0.8, 0.7, 0.1], [0.1, 0.2, 0.3,
0.9]], evidence=['Exercise', 'Diet'], evidence_card=[2, 2])
model.add_cpds(cpd_ex, cpd_diet, cpd_hd)
infer = VariableElimination(model)
print(infer.query(variables=['HeartDisease'], evidence={'Exercise': 1, 'Diet': 0}))
# 7. Single-layer Perceptron
from sklearn.linear_model import Perceptron
clf = Perceptron()
clf.fit(X_train.toarray(), y_train)
print("Score:", clf.score(X_test.toarray(), y_test))
# 8. Multi-layer Perceptron
from sklearn.neural_network import MLPClassifier
clf = MLPClassifier(hidden_layer_sizes=(10,), max_iter=1000)
clf.fit(X_train.toarray(), y_train)
print("Score:", clf.score(X_test.toarray(), y_test))
# 9. Two Sample T-test
from scipy.stats import ttest_ind
group1 = [23, 21, 25, 30, 28]
group2 = [20, 22, 19, 24, 25]
stat, p = ttest_ind(group1, group2)
print(f"T-Statistic: {stat}, P-Value: {p}")
# 10. K-Fold Cross Validation
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score, KFold
data = load_breast_cancer()
X, y = data.data, data.target
model = RandomForestClassifier()
kf = KFold(n_splits=5)
scores = cross_val_score(model, X, y, cv=kf)
print("Cross-validation scores:", scores)