import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import matplotlib.pyplot as plt
# Step 2: Load data
iris = load_iris()
X = iris.data
y = iris.target
# Step 3: Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Step 4: Train basic Decision Tree
clf = DecisionTreeClassifier(random_state=42)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
# Step 5: Evaluate basic model
print("=== Default Model Evaluation ===")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
# Step 6: Visualize basic tree
plt.figure(figsize=(10,6))
plot_tree(clf, feature_names=iris.feature_names, class_names=iris.target_names,
filled=True)
plt.title("Default Decision Tree")
plt.show()
# Step 7: Hyperparameter Tuning using GridSearchCV
param_grid = {
'max_depth': [2, 3, 4, 5],
'min_samples_split': [2, 3, 4],
'criterion': ['gini', 'entropy']
}
grid_search = GridSearchCV(estimator=DecisionTreeClassifier(random_state=42),
param_grid=param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)
print("\n=== Best Parameters ===")
print(grid_search.best_params_)
# Step 8: Evaluate tuned model
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test)
print("\n=== Tuned Model Evaluation ===")
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_best))
print("Classification Report:\n", classification_report(y_test, y_pred_best))
print("Accuracy:", accuracy_score(y_test, y_pred_best))
# Step 9: Visualize tuned tree
plt.figure(figsize=(10,6))
plot_tree(best_model, feature_names=iris.feature_names,
class_names=iris.target_names, filled=True)
plt.title("Tuned Decision Tree")
plt.show()