Random forest
# Random Forest Example using scikit-learn
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target
# Split into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# Initialize Random Forest Classifier
rf_clf = RandomForestClassifier(
n_estimators=100, # Number of trees
max_depth=None, # Let trees expand until all leaves are pure
min_samples_split=2, # Minimum number of samples required to split an internal node
random_state=42,
n_jobs=-1 # Use all available cores
)
# Train the Random Forest Classifier
rf_clf.fit(X_train, y_train)
# Make predictions on the test set
y_pred = rf_clf.predict(X_test)
# Evaluate the accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Random Forest Accuracy: {accuracy:.4f}')
# Detailed Classification Report
print("\nClassification Report:")
print(classification_report(y_test, y_pred))
# Confusion Matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6,4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Greens',
xticklabels=data.target_names,
yticklabels=data.target_names)
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.title('Random Forest Confusion Matrix')
plt.show()
Random Forest Accuracy: 0.9649
Classification Report:
precision recall f1-score support
0 0.98 0.93 0.95 43
1 0.96 0.99 0.97 71
accuracy 0.96 114
macro avg 0.97 0.96 0.96 114
weighted avg 0.97 0.96 0.96 114