"""Probabilistic Supervised Learning - Naive Bayes: Create a dataset
and Perform the
necessary pre-processing steps. Train the model using Naive Bayes
Classifier. Give new test
data and predict the classification output. Analyze and write the
inference."""
'Probabilistic Supervised Learning - Naive Bayes: Create a dataset and
Perform the\nnecessary pre-processing steps. Train the model using
Naive Bayes Classifier. Give new test\ndata and predict the
classification output. Analyze and write the inference.'
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix, accuracy_score,
precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
# Create a synthetic dataset (e.g., 2 features, binary classification)
X, y = make_classification(n_samples=1000, n_features=2,
n_informative=2, n_redundant=0, n_classes=2, random_state=42)
# Create a DataFrame to inspect the data
df = pd.DataFrame(X, columns=['Feature 1', 'Feature 2'])
df['Class'] = y
# Inspect first few rows
df.head()
Feature 1 Feature 2 Class
0 -0.999102 -0.663860 1
1 1.246686 1.153597 1
2 0.962777 0.859397 1
3 -2.957441 2.033645 1
4 1.141165 1.059449 1
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2, random_state=42)
# Standardize the features (Naive Bayes works well when data is
normalized)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Initialize the Gaussian Naive Bayes model
nb_model = GaussianNB()
# Train the model on the training data
nb_model.fit(X_train, y_train)
GaussianNB()
# Make predictions on the test set
y_pred = nb_model.predict(X_test)
# Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
# Precision
precision = precision_score(y_test, y_pred)
# Recall
recall = recall_score(y_test, y_pred)
# F1 Score (Harmonic mean of precision and recall)
f1 = f1_score(y_test, y_pred)
# Print the evaluation metrics
print("Confusion Matrix:\n", conf_matrix)
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
Confusion Matrix:
[[93 8]
[16 83]]
Accuracy: 0.8800
Precision: 0.9121
Recall: 0.8384
F1 Score: 0.8737
# New test data (Example: 2 samples with random feature values)
new_data = np.array([[0.5, -1.5], [-0.2, 0.8]])
# Standardize the new test data
new_data_scaled = scaler.transform(new_data)
# Predict using the trained Naive Bayes model
new_pred = nb_model.predict(new_data_scaled)
print(f"Predicted class for the new test data: {new_pred}")
Predicted class for the new test data: [0 1]
# Plot decision boundary and data points
xx, yy = np.meshgrid(np.linspace(X_train[:, 0].min() - 1, X_train[:,
0].max() + 1, 100),
np.linspace(X_train[:, 1].min() - 1, X_train[:,
1].max() + 1, 100))
Z = nb_model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X_train[:, 0], X_train[:, 1], c=y_train, marker='o',
edgecolor='k', label='Training data')
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, marker='s',
edgecolor='k', label='Test data')
plt.title("Naive Bayes Classifier - Decision Boundary")
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()