e22cseu1389-assignment8-1
November 18, 2024
[4]: import pandas as pd
import numpy as np
[6]: data = {"Temperature": ["Hot", "Mild", "Cool", "Mild", "Cool", "Cool", "Cool",␣
↪"Mild", "Cool", "Cool", "Mild", "Mild", "Hot", "Cool"],
"Humidity": ["Normal", "High", "High", "High", "Normal", "Normal",␣
↪"Normal", "High", "Normal", "Normal", "Normal", "High", "Normal", "Normal"],
"Windy": ["False", "True", "False", "False", "False", "True", "False",␣
↪"True", "False", "False", "True", "True", "False", "False"],
"Play Football": ["No", "No", "Yes", "Yes", "Yes", "No", "Yes", "No",␣
↪"Yes", "Yes", "Yes", "No", "No", "No"]}
df = pd.DataFrame(data)
df
[6]: Temperature Humidity Windy Play Football
0 Hot Normal False No
1 Mild High True No
2 Cool High False Yes
3 Mild High False Yes
4 Cool Normal False Yes
5 Cool Normal True No
6 Cool Normal False Yes
7 Mild High True No
8 Cool Normal False Yes
9 Cool Normal False Yes
10 Mild Normal True Yes
11 Mild High True No
12 Hot Normal False No
13 Cool Normal False No
[7]: print("Shape of the dataset:", df.shape)
Shape of the dataset: (14, 4)
[9]: frequency_table = df.groupby(["Play Football", "Temperature", "Humidity",␣
↪"Windy"]).size().unstack(fill_value=0)
1
frequency_table
[9]: Windy False True
Play Football Temperature Humidity
No Cool Normal 1 1
Hot Normal 2 0
Mild High 0 3
Yes Cool High 1 0
Normal 4 0
Mild High 1 0
Normal 0 1
[11]: likelihood_temp_yes = df[df["Play Football"] == "Yes"]["Temperature"].
↪value_counts() / len(df[df["Play Football"] == "Yes"])
likelihood_temp_no = df[df["Play Football"] == "No"]["Temperature"].
↪value_counts() / len(df[df["Play Football"] == "No"])
print("Likelihood of Temperature given 'Yes':\n", likelihood_temp_yes)
print("Likelihood of Temperature given 'No':\n", likelihood_temp_no)
print()
likelihood_humi_yes = df[df["Play Football"] == "Yes"]["Humidity"].
↪value_counts() / len(df[df["Play Football"] == "Yes"])
likelihood_humi_no = df[df["Play Football"] == "No"]["Humidity"].value_counts()␣
↪/ len(df[df["Play Football"] == "No"])
print("Likelihood of Humidity given 'Yes':\n", likelihood_humi_yes)
print("Likelihood of Humidity given 'No':\n", likelihood_humi_no)
print()
likelihood_windy_yes = df[df["Play Football"] == "Yes"]["Windy"].value_counts()␣
↪/ len(df[df["Play Football"] == "Yes"])
likelihood_windy_no = df[df["Play Football"] == "No"]["Windy"].value_counts() /␣
↪len(df[df["Play Football"] == "No"])
print("Likelihood of Windy given 'Yes':\n", likelihood_windy_yes)
print("Likelihood of Windy given 'No':\n", likelihood_windy_no)
Likelihood of Temperature given 'Yes':
Temperature
Cool 0.714286
Mild 0.285714
Name: count, dtype: float64
Likelihood of Temperature given 'No':
Temperature
Mild 0.428571
Hot 0.285714
Cool 0.285714
Name: count, dtype: float64
2
Likelihood of Humidity given 'Yes':
Humidity
Normal 0.714286
High 0.285714
Name: count, dtype: float64
Likelihood of Humidity given 'No':
Humidity
Normal 0.571429
High 0.428571
Name: count, dtype: float64
Likelihood of Windy given 'Yes':
Windy
False 0.857143
True 0.142857
Name: count, dtype: float64
Likelihood of Windy given 'No':
Windy
True 0.571429
False 0.428571
Name: count, dtype: float64
[12]: def naive_bayes_manual(row):
temperature = row["Temperature"]
humidity = row["Humidity"]
windy = row["Windy"]
p_temp_yes = len(df[(df["Temperature"] == temperature) & (df["Play␣
↪Football"] == "Yes")]) / len(df[df["Play Football"] == "Yes"])
p_hum_yes = len(df[(df["Humidity"] == humidity) & (df["Play Football"] ==␣
↪"Yes")]) / len(df[df["Play Football"] == "Yes"])
p_wind_yes = len(df[(df["Windy"] == windy) & (df["Play Football"] ==␣
↪"Yes")]) / len(df[df["Play Football"] == "Yes"])
p_x_given_yes = p_temp_yes * p_hum_yes * p_wind_yes
p_yes_given_x = p_x_given_yes * p_yes
p_temp_no = len(df[(df["Temperature"] == temperature) & (df["Play␣
↪Football"] == "No")]) / len(df[df["Play Football"] == "No"])
p_hum_no = len(df[(df["Humidity"] == humidity) & (df["Play Football"] ==␣
↪"No")]) / len(df[df["Play Football"] == "No"])
p_wind_no = len(df[(df["Windy"] == windy) & (df["Play Football"] == "No")])␣
↪/ len(df[df["Play Football"] == "No"])
p_x_given_no = p_temp_no * p_hum_no * p_wind_no
p_no_given_x = p_x_given_no * p_no
if p_yes_given_x > p_no_given_x:
return "Yes"
3
else:
return "No"
p_yes = len(df[df["Play Football"] == "Yes"]) / len(df)
p_no = len(df[df["Play Football"] == "No"]) / len(df)
df["Predicted Play Football"] = df.apply(naive_bayes_manual, axis = 1)
print(df[["Temperature", "Humidity", "Windy", "Play Football", "Predicted Play␣
↪Football"]])
Temperature Humidity Windy Play Football Predicted Play Football
0 Hot Normal False No No
1 Mild High True No No
2 Cool High False Yes Yes
3 Mild High False Yes No
4 Cool Normal False Yes Yes
5 Cool Normal True No No
6 Cool Normal False Yes Yes
7 Mild High True No No
8 Cool Normal False Yes Yes
9 Cool Normal False Yes Yes
10 Mild Normal True Yes No
11 Mild High True No No
12 Hot Normal False No No
13 Cool Normal False No Yes
[13]: from sklearn.naive_bayes import CategoricalNB
df_numeric = df.copy()
df_numeric["Temperature"] = df_numeric["Temperature"].map({"Hot": 0, "Mild": 1,␣
↪"Cool": 2})
df_numeric["Humidity"] = df_numeric["Humidity"].map({"Normal": 0, "High": 1})
df_numeric["Windy"] = df_numeric["Windy"].map({"False": 0, "True": 1})
df_numeric["Play Football"] = df_numeric['Play Football'].map({"No": 0, "Yes":␣
↪1})
X = df_numeric[["Temperature", "Humidity", "Windy"]]
Y = df_numeric["Play Football"]
nb_model = CategoricalNB()
nb_model.fit(X, Y)
predictions = nb_model.predict(X)
df["Predicted Play Football"] = ["Yes" if pred == 1 else "No" for pred in␣
↪predictions]
print(df[["Temperature", "Humidity", "Windy", "Play Football", "Predicted Play␣
↪Football"]])
Temperature Humidity Windy Play Football Predicted Play Football
4
0 Hot Normal False No No
1 Mild High True No No
2 Cool High False Yes Yes
3 Mild High False Yes No
4 Cool Normal False Yes Yes
5 Cool Normal True No No
6 Cool Normal False Yes Yes
7 Mild High True No No
8 Cool Normal False Yes Yes
9 Cool Normal False Yes Yes
10 Mild Normal True Yes No
11 Mild High True No No
12 Hot Normal False No No
13 Cool Normal False No Yes
[14]: print("We took Temperature = Cool, Humidity = High, Windy = False as input.
↪\nFrom step 5 (manual calculation) and step 6 (model prediction), we can see␣
↪that the result match with each other.")
We took Temperature = Cool, Humidity = High, Windy = False as input.
From step 5 (manual calculation) and step 6 (model prediction), we can see that
the result match with each other.