#Name:Utkarsh Salunke #Assignment 5
#Roll:TECO2425B050
#Time:10.30-12.30
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score,
precision_score, recall_score, f1_score
# Load the dataset
dataset = pd.read_csv('Social_Network_Ads.csv')
# Display the first few rows of the dataset
print(dataset.head())
User ID Gender Age EstimatedSalary Purchased
0 15624510 Male 19 19000 0
1 15810944 Male 35 20000 0
2 15668575 Female 26 43000 0
3 15603246 Female 27 57000 0
4 15804002 Male 19 76000 0
# Let's assume the columns are 'Age', 'EstimatedSalary' as features
and 'Purchased' as the target
X = dataset.iloc[:, [2, 3]].values # Selecting 'Age' and
'EstimatedSalary' as features
y = dataset.iloc[:, 4].values # 'Purchased' as the target
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.25, random_state=0)
# Feature scaling (important for logistic regression)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Initialize the logistic regression model
classifier = LogisticRegression()
# Train the model
classifier.fit(X_train, y_train)
LogisticRegression()
# Predict the test set results
y_pred = classifier.predict(X_test)
# Compute confusion matrix
cm = confusion_matrix(y_test, y_pred)
# Extract values from the confusion matrix
TP = cm[0, 0] # True Positives
FP = cm[0, 1] # False Positives
TN = cm[1, 1] # True Negatives
FN = cm[1, 0] # False Negatives
# Compute additional metrics
accuracy = accuracy_score(y_test, y_pred)
error_rate = 1 - accuracy
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
# Print the results
print(f"Confusion Matrix:\n{cm}")
print(f"True Positives (TP): {TP}")
print(f"False Positives (FP): {FP}")
print(f"True Negatives (TN): {TN}")
print(f"False Negatives (FN): {FN}")
print(f"Accuracy: {accuracy:.4f}")
print(f"Error Rate: {error_rate:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
Confusion Matrix:
[[65 3]
[ 8 24]]
True Positives (TP): 65
False Positives (FP): 3
True Negatives (TN): 24
False Negatives (FN): 8
Accuracy: 0.8900
Error Rate: 0.1100
Precision: 0.8889
Recall: 0.7500
F1 Score: 0.8136