Program 8
Develop a program to load the Titanic dataset. Split the data into
training and test sets. Train a decision tree classifier. Visualize the
tree structure. Evaluate accuracy, precision, recall, and F1-score.
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score, precision_score,
recall_score, f1_score
import matplotlib.pyplot as plt
import seaborn as sns
# Load the Titanic dataset from Seaborn library (or any other source you
prefer)
# For demonstration, we are using Seaborn's built-in Titanic dataset
titanic = sns.load_dataset('titanic')
# Display the first few rows of the dataset
print(titanic.head())
# Preprocessing the dataset
# Drop rows with missing values for simplicity (you can handle missing
values more intelligently if needed)
titanic = titanic.dropna(subset=['age', 'embarked', 'sex', 'pclass',
'survived'])
# Convert categorical features to numerical
titanic['sex'] = titanic['sex'].map({'male': 0, 'female': 1})
titanic['embarked'] = titanic['embarked'].map({'C': 0, 'Q': 1, 'S': 2})
# Features and target variable
X = titanic[['pclass', 'sex', 'age', 'embarked']]
y = titanic['survived']
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
# Initialize the Decision Tree Classifier
dt_classifier = DecisionTreeClassifier(random_state=42)
# Train the model
dt_classifier.fit(X_train, y_train)
# Make predictions on the test set
y_pred = dt_classifier.predict(X_test)
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Print the evaluation metrics
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1-Score: {f1}")
# Visualize the decision tree
plt.figure(figsize=(12, 8))
plot_tree(dt_classifier, feature_names=X.columns, class_names=['Not
Survived', 'Survived'], filled=True, rounded=True)
plt.title("Decision Tree for Titanic Dataset")
plt.show()