4/27/23, 10:19 PM DM_Lab_7.
ipynb - Colaboratory
import pandas as pd
import numpy as np
PlayTennis = pd.read_csv('Tennis.csv')
PlayTennis.drop('Day', axis = 1, inplace = True)
PlayTennis
Outlook Temperature Humidity Wind PlayTennis
0 Sunny Hot High Weak No
1 Sunny Hot High Strong No
2 Overcast Hot High Weak Yes
3 Rain Mild High Weak Yes
4 Rain Cool Normal Weak Yes
5 Rain Cool Normal Strong No
6 Overcast Cool Normal Strong Yes
7 Sunny Mild High Weak No
8 Sunny Cool Normal Weak Yes
9 Rain Mild Normal Weak Yes
10 Sunny Mild Normal Strong Yes
11 Overcast Mild High Strong Yes
12 Overcast Hot Normal Weak Yes
13 Rain Mild High Strong No
from sklearn.preprocessing import LabelEncoder
Le = LabelEncoder()
PlayTennis['Outlook'] = Le.fit_transform(PlayTennis['Outlook'])
PlayTennis['Temperature'] = Le.fit_transform(PlayTennis['Temperature'])
PlayTennis['Humidity'] = Le.fit_transform(PlayTennis['Humidity'])
PlayTennis['Wind'] = Le.fit_transform(PlayTennis['Wind'])
PlayTennis['PlayTennis'] = Le.fit_transform(PlayTennis['PlayTennis'])
PlayTennis
Outlook Temperature Humidity Wind PlayTennis
0 2 1 0 1 0
1 2 1 0 0 0
2 0 1 0 1 1
3 1 2 0 1 1
4 1 0 1 1 1
5 1 0 1 0 0
6 0 0 1 0 1
7 2 2 0 1 0
8 2 0 1 1 1
9 1 2 1 1 1
10 2 2 1 0 1
11 0 2 0 0 1
12 0 1 1 1 1
13 1 2 0 0 0
y = PlayTennis['PlayTennis']
X = PlayTennis.drop(['PlayTennis'], axis = 1)
from sklearn import tree
clf = tree.DecisionTreeClassifier(criterion = 'gini') # entropy
clf = clf.fit(X, y)
https://colab.research.google.com/drive/1PAofDDIeD2BH3775jRzD5mXMYNwm4LKF#scrollTo=C1ZrSl3BTKVe&printMode=true 1/3
4/27/23, 10:19 PM DM_Lab_7.ipynb - Colaboratory
tree.plot_tree(clf)
[Text(0.4444444444444444, 0.9, 'x[0] <= 0.5\ngini = 0.459\nsamples =
14\nvalue = [5, 9]'),
Text(0.3333333333333333, 0.7, 'gini = 0.0\nsamples = 4\nvalue = [0, 4]'),
Text(0.5555555555555556, 0.7, 'x[2] <= 0.5\ngini = 0.5\nsamples =
10\nvalue = [5, 5]'),
Text(0.3333333333333333, 0.5, 'x[0] <= 1.5\ngini = 0.32\nsamples =
5\nvalue = [4, 1]'),
Text(0.2222222222222222, 0.3, 'x[3] <= 0.5\ngini = 0.5\nsamples =
2\nvalue = [1, 1]'),
Text(0.1111111111111111, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'),
Text(0.3333333333333333, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'),
Text(0.4444444444444444, 0.3, 'gini = 0.0\nsamples = 3\nvalue = [3, 0]'),
Text(0.7777777777777778, 0.5, 'x[3] <= 0.5\ngini = 0.32\nsamples =
5\nvalue = [1, 4]'),
Text(0.6666666666666666, 0.3, 'x[1] <= 1.0\ngini = 0.5\nsamples =
2\nvalue = [1, 1]'),
Text(0.5555555555555556, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [1, 0]'),
Text(0.7777777777777778, 0.1, 'gini = 0.0\nsamples = 1\nvalue = [0, 1]'),
Text(0.8888888888888888, 0.3, 'gini = 0.0\nsamples = 3\nvalue = [0, 3]')]
import graphviz
dot_data = tree.export_graphviz(clf, out_file = None)
graph = graphviz.Source(dot_data)
graph
x[0] <= 0.5
gini = 0.459
samples = 14
value = [5, 9]
False
True
x[2] <= 0.5
gini = 0.0
gini = 0.5
samples = 4
samples = 10
value = [0, 4]
value = [5, 5]
x[0] <= 1.5 x[3] <= 0.5
gini = 0.32 gini = 0.32
samples = 5 samples = 5
value = [4, 1] value = [1, 4]
x[3] <= 0.5 x[1] <= 1.0
gini = 0.0 gini
gini = 0.5 gini = 0.5
samples = 3 samp
samples = 2 samples = 2
value = [3, 0] value
value = [1, 1] value = [1, 1]
gini = 0.0 gini = 0.0 gini = 0.0 gini = 0.0
samples = 1 samples = 1 samples = 1 samples =
value = [1, 0] value = [0, 1] value = [1, 0] value = [0,
X_pred = clf.predict(X)
X_pred == y
https://colab.research.google.com/drive/1PAofDDIeD2BH3775jRzD5mXMYNwm4LKF#scrollTo=C1ZrSl3BTKVe&printMode=true 2/3
4/27/23, 10:19 PM DM_Lab_7.ipynb - Colaboratory
0 True
1 True
2 True
3 True
4 True
5 True
6 True
7 True
8 True
9 True
10 True
11 True
12 True
13 True
Name: PlayTennis, dtype: bool
check 0s completed at 10:18 PM
https://colab.research.google.com/drive/1PAofDDIeD2BH3775jRzD5mXMYNwm4LKF#scrollTo=C1ZrSl3BTKVe&printMode=true 3/3