5/21/25, 8:25 AM Untitled3.
ipynb - Colab
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns
import graphviz
from sklearn import tree
# 1. Load the Titanic dataset
titanic = sns.load_dataset('titanic') # Using seaborn's built-in Titanic dataset
# 2. Data Preprocessing
# Drop rows with missing 'embarked' and 'age'
titanic.dropna(subset=['embarked', 'age'], inplace=True)
# Select features and target
features = ['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare', 'embarked']
target = 'survived'
# Convert categorical features to numeric
titanic_encoded = titanic[features + [target]].copy()
label_encoders = {}
for col in ['sex', 'embarked']:
le = LabelEncoder()
titanic_encoded[col] = le.fit_transform(titanic_encoded[col])
label_encoders[col] = le
# 3. Split data
X = titanic_encoded[features]
y = titanic_encoded[target]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# 4. Train Decision Tree Classifier
clf = DecisionTreeClassifier(max_depth=4, random_state=42)
clf.fit(X_train, y_train)
# 5. Visualize the tree
plt.figure(figsize=(20,10))
tree.plot_tree(clf, feature_names=features, class_names=['Not Survived', 'Survived'], filled=True)
plt.title("Decision Tree - Titanic")
plt.show()
# 6. Evaluate Model
y_pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred))
print("Recall:", recall_score(y_test, y_pred))
print("F1 Score:", f1_score(y_test, y_pred))
# Optional: Full classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))
https://colab.research.google.com/drive/11aop6X8rTQXtPe51CPHghl8h9p9KWWKE#printMode=true 1/2
5/21/25, 8:25 AM Untitled3.ipynb - Colab
Accuracy: 0.7062937062937062
Precision: 0.723404255319149
Recall: 0.5396825396825397
F1 Score: 0.6181818181818182
Classification Report:
precision recall f1-score support
0 0.70 0.84 0.76 80
1 0.72 0.54 0.62 63
accuracy 0.71 143
macro avg 0.71 0.69 0.69 143
weighted avg 0.71 0.71 0.70 143
https://colab.research.google.com/drive/11aop6X8rTQXtPe51CPHghl8h9p9KWWKE#printMode=true 2/2