Week 1

8/19/24, 2:15 PM 12 Aug(1st & 2nd Labs Combined).
ipynb - Colab
import pandas as pd
df = pd.read_excel('student_marks.xlsx')
def pass_fail_status(row):
return "Fail" if any(mark < 50 for mark in row[2:]) else "Pass"
df["Status"] = df.apply(pass_fail_status, axis=1)
df_status = df[["Name", "Roll Number", "Status"]]
print(df_status)
Name Roll Number Status

0 Student 1 1 Fail
1 Student 2 2 Fail
2 Student 3 3 Fail
3 Student 4 4 Fail
4 Student 5 5 Fail
5 Student 6 6 Fail
6 Student 7 7 Fail
7 Student 8 8 Fail
8 Student 9 9 Fail
9 Student 10 10 Fail
16 Student 17 17 Pass
df.to_excel('Updated_Students_Data.xlsx', index=False)
print(df)
Name Roll Number Subject 1 Subject 2 Subject 3 Subject 4 \

0 Student 1 1 73 70 75 53
1 Student 2 2 52 51 54 33
2 Student 3 3 31 75 79 58
3 Student 4 4 40 79 47 42
4 Student 5 5 69 56 44 61
5 Student 6 6 45 67 78 33
6 Student 7 7 30 37 55 41
7 Student 8 8 70 56 32 76
8 Student 9 9 67 58 73 71
9 Student 10 10 59 74 36 74
10 Student 11 11 76 67 68 45
11 Student 12 12 67 56 70 30
12 Student 13 13 66 45 56 37
13 Student 14 14 47 67 32 37
14 Student 15 15 41 37 79 44
15 Student 16 16 51 59 36 64
16 Student 17 17 56 71 66 67
17 Student 18 18 41 37 75 50
18 Student 19 19 51 60 60 47
19 Student 20 20 57 56 41 34
20 Student 21 21 63 75 54 65
21 Student 22 22 40 40 61 30
22 Student 23 23 50 79 49 41
https://colab.research.google.com/drive/17u1rJx_8k-YiRyWTtd_gGHOKBQDkg-NA#scrollTo=AYBgC6SB7V6v&printMode=true 1/4
8/19/24, 2:15 PM 12 Aug(1st & 2nd Labs Combined).ipynb - Colab
23 Student 24 24 35 70 55 75
24 Student 25 25 31 49 49 61
25 Student 26 26 42 70 36 50
26 Student 27 27 70 70 74 34
27 Student 28 28 41 47 75 60
28 Student 29 29 44 74 49 78
29 Student 30 30 34 76 69 41
30 Student 31 31 53 60 79 71
31 Student 32 32 77 54 67 55
32 Student 33 33 60 62 64 68
33 Student 34 34 68 75 53 46
34 Student 35 35 61 76 42 66
35 Student 36 36 64 51 68 39
36 Student 37 37 65 46 79 39
37 Student 38 38 38 46 33 66
38 Student 39 39 78 53 40 67
39 Student 40 40 77 44 68 50
40 Student 41 41 43 50 54 41
41 Student 42 42 60 46 60 36
42 Student 43 43 78 35 76 62
43 Student 44 44 40 57 53 64
44 Student 45 45 31 45 46 54
45 Student 46 46 47 63 58 30
46 Student 47 47 65 79 71 44
47 Student 48 48 51 75 33 37
48 Student 49 49 46 33 37 57
49 Student 50 50 67 49 75 68
Subject 5 Subject 6 Status

0 74 38 Fail
1 35 77 Fail
2 66 32 Fail
3 71 59 Fail
4 63 76 Fail
keyboard_arrow_down Data Preprocessing

import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
data_file = 'Updated_Students_Data.xlsx'
student_data = pd.read_excel(data_file)
features = student_data.drop(columns=['Status'])
labels = student_data['Status']
label_encoder = LabelEncoder()
for column in features.select_dtypes(include='object').columns:
features[column] = label_encoder.fit_transform(features[column])
labels = label_encoder.fit_transform(labels)
scaler = StandardScaler()1
features_scaled = scaler.fit_transform(features)
X_train, X_test, y_train, y_test = train_test_split(features_scaled, labels, test_size=0.2, random_state=42)
keyboard_arrow_down Supprt Vector Machine

from sklearn.svm import SVC
svm_params = {
'C': [0.1, 1.0, 10],
'kernel': ['linear', 'rbf'],
'gamma': ['scale', 'auto']
}
svm_grid_search = GridSearchCV(SVC(), svm_params, cv=5)
svm_grid_search.fit(X_train, y_train)
svm_best_model = svm_grid_search.best_estimator_
svm_predictions = svm_best_model.predict(X_test)
/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_split.py:737: UserWarning: The least populated class in y has only

warnings.warn(
keyboard_arrow_down Decission Tree

from sklearn.tree import DecisionTreeClassifier
dt_params = {
'max_depth': [3, 5, 10],
'min_samples_split': [2, 5, 10],
'criterion': ['gini', 'entropy']
}
dt_grid_search = GridSearchCV(DecisionTreeClassifier(), dt_params, cv=5)
dt_grid_search.fit(X_train, y_train)
dt_best_model = dt_grid_search.best_estimator_
dt_predictions = dt_best_model.predict(X_test)

warnings.warn(
keyboard_arrow_down KNN
from sklearn.neighbors import KNeighborsClassifier
knn_params = {
'n_neighbors': [3, 5, 7],
'weights': ['uniform', 'distance'],
'p': [1, 2] # 1 for Manhattan, 2 for Euclidean
}
knn_grid_search = GridSearchCV(KNeighborsClassifier(), knn_params, cv=5)
knn_grid_search.fit(X_train, y_train)
knn_best_model = knn_grid_search.best_estimator_
knn_predictions = knn_best_model.predict(X_test)

warnings.warn(
keyboard_arrow_down Evaluating the models

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score
def evaluate_model(true_labels, predicted_labels):
accuracy = accuracy_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)
return accuracy, precision, recall
svm_accuracy, svm_precision, svm_recall = evaluate_model(y_test, svm_predictions)

dt_accuracy, dt_precision, dt_recall = evaluate_model(y_test, dt_predictions)
knn_accuracy, knn_precision, knn_recall = evaluate_model(y_test, knn_predictions)
print(f"SVM: Accuracy = {svm_accuracy:.2f}, Precision = {svm_precision:.2f}, Recall = {svm_recall:.2f}")

print(f"Decision Tree: Accuracy = {dt_accuracy:.2f}, Precision = {dt_precision:.2f}, Recall = {dt_recall:.2f}")
print(f"KNN: Accuracy = {knn_accuracy:.2f}, Precision = {knn_precision:.2f}, Recall = {knn_recall:.2f}")
SVM: Accuracy = 0.80, Precision = 0.00, Recall = 0.00

Decision Tree: Accuracy = 0.80, Precision = 0.00, Recall = 0.00
KNN: Accuracy = 0.90, Precision = 1.00, Recall = 0.50
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined an
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1471: UndefinedMetricWarning: Precision is ill-defined an
_warn_prf(average, modifier, msg_start, len(result))
print(student_data['Status'].value_counts())
Status
Fail 46
Pass 4
Name: count, dtype: int64

Week 1

Uploaded by

Document Informationclick to expand document information

Copyright:

Available Formats

Week 1

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

Week 1

Uploaded by

Copyright:

Available Formats

8/19/24, 2:15 PM 12 Aug(1st & 2nd Labs Combined).

Name Roll Number Status

Name Roll Number Subject 1 Subject 2 Subject 3 Subject 4 \

Subject 5 Subject 6 Status

keyboard_arrow_down Data Preprocessing

X_train, X_test, y_train, y_test = train_test_split(features_scaled, labels, test_size=0.2, random_state=42)

keyboard_arrow_down Supprt Vector Machine

/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_split.py:737: UserWarning: The least populated class in y has only

keyboard_arrow_down Decission Tree

/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_split.py:737: UserWarning: The least populated class in y has only

/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_split.py:737: UserWarning: The least populated class in y has only

keyboard_arrow_down Evaluating the models

svm_accuracy, svm_precision, svm_recall = evaluate_model(y_test, svm_predictions)

print(f"SVM: Accuracy = {svm_accuracy:.2f}, Precision = {svm_precision:.2f}, Recall = {svm_recall:.2f}")

SVM: Accuracy = 0.80, Precision = 0.00, Recall = 0.00

You might also like