[go: up one dir, main page]

0% found this document useful (0 votes)
17 views11 pages

Ml3.ipynb - Colab

Uploaded by

Atharva Dhorje
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views11 pages

Ml3.ipynb - Colab

Uploaded by

Atharva Dhorje
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 11

11/10/2024, ml3.

ipynb -
23:20 Colab

import pandas as
pd import numpy
as np
import matplotlib.pyplot as
plt import seaborn as sns

df = pd.read_csv('Churn_Modelling.csv')

df.head(10)

RowNumber CustomerI Surname CreditScor Geography Gender Age Tenure Balance NumOfProduct HasCrCard IsActiveMembe
d e s r
df.isnull().sum()
0 1 15634602 Hargrav 619 France Female 42 2 0.00 1 1 1
e
0
1 2 15647311 Hill 608 Spain Female 41 1 83807.86 1 0 1
RowNumber 0
2 3 15619304 Onio 502 France Female 42 8 159660.8 3 1 0
CustomerId 0 0
3 4 15701354 Boni 699 France Female 39 1 0.00 2 0 0
Surname 0
4 5 15737888 Mitchell 850 Spain Female 43 2 125510.8 1 1 1
CreditScore 0
2
5 Geography6 15574012
0 Chu 645 Spain Male 44 8 113755.7 2 1 0
8
Gender 0
6 7 15592531 Bartlett 822 France Male 50 7 0.00 2 1 1

7 Age 8 0
15656148 Obinna 376 Germany Female 29 4 115046.7 4 1 0
4
Tenure 0
8 9 15792365 He 501 France Male 44 4 142051.0 2 0 1
Balance 0

NumOfProducts 0

HasCrCard 0

IsActiveMember 0

EstimatedSalary 0

Exited 0

df.describe().T

https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 1/
11/10/2024, ml3.ipynb -
23:20 Colab
count mean std min 25% 50% 75% max

RowNumber 10000.0 5.000500e+0 2886.895680 1.00 2500.75 5.000500e+0 7.500250e+0 10000.00


3 3 3
CustomerId 10000.0 1.569094e+0 71936.18612 15565701.0 15628528.2 1.569074e+0 1.575323e+0 15815690.0
7 3 0 5 7 7 0
CreditScore 10000.0 6.505288e+0 96.653299 350.00 584.00 6.520000e+0 7.180000e+0 850.00
2 2 2
Age 10000.0 3.892180e+0 10.487806 18.00 32.00 3.700000e+0 4.400000e+0 92.00
1 1 1
Tenure 10000.0 5.012800e+0 2.892174 0.00 3.00 5.000000e+0 7.000000e+0 10.00
0 0 0
Balance 10000.0 7.648589e+0 62397.40520 0.00 0.00 9.719854e+0 1.276442e+0 250898.09
4 2 4 5
NumOfProducts 10000.0 1.530200e+0 0.581654 1.00 1.00 1.000000e+0 2.000000e+0 4.00
0 0 0
HasCrCard 10000.0 7.055000e- 0.455840 0.00 0.00 1.000000e+0 1.000000e+0 1.00
01 0 0
IsActiveMember 10000.0 5.151000e- 0.499797 0.00 0.00 1.000000e+0 1.000000e+0 1.00
01 0 0

df.info()

<class
'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to
9999 Data columns (total 14
columns):
# Column Non-Null Dtype
Count
0 RowNumber 10000 non- int64
null
1 CustomerId 10000 non- int64
null
2 Surname 10000 non- object
null
3 CreditScore 10000 non- int64
null
4 Geography 10000 non- object
null
5 Gender 10000 non- object
null
6 Age 10000 non- int64
null
7 Tenure 10000 non- int64
null
8 Balance 10000 non- float64
null
9 NumOfProducts 10000 non- int64
null
1 HasCrCard 10000 non- int64
0 null
1 IsActiveMember 10000 non- int64
1 null
1 EstimatedSalary 10000 non- float64
2 null
1 Exited 10000 non- int64
3 null
dtypes: float64(2), int64(9),
object(3) memory usage: 1.1+ MB

df[df['Balance']==0.0]

RowNumber CustomerI Surname CreditScor Geography Gender Age Tenure Balance NumOfProduct HasCrCard IsActiveM
d e s e
https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 2/
0 1 15634602 Hargrave 619 France Female 42 2 0.0 1 1
11/10/2024, ml3.ipynb -
23:20 3617 rows × 14 columns Colab

sns.barplot(x='Exited',y='Balance',data=df)

https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 3/
11/10/2024, ml3.ipynb -
23:20 Colab
<Axes: xlabel='Exited', ylabel='Balance'>

b_zero=df[df['Balance']==0.0]

b_zero.head()

RowNumber CustomerI Surname CreditScor Geography Gender Age Tenure Balance NumOfProduct HasCrCard IsActiveMembe
d e s r
df['Geography'].unique()
0 1 15634602 Hargrav 619 France Female 42 2 0.0 1 1 1
e
array(['France', 'Spain', 'Germany'], dtype=object)
3 4 15701354 Boni 699 France Female 39 1 0.0 2 0 0

6 7 15592531
from sklearn.preprocessing Bartlett
import 822 France Male 50 7 0.0 2 1 1
LabelEncoder le = LabelEncoder()
11 12 15737173 Andrews 497 Spain Male 24 3 0.0 2 1 0
df['Geography'] = le.fit_transform(df['Geography'])
12 13 15632264 Kay 476 France Female 34 10 0.0 2 1 0

df.head(10)

RowNumber CustomerI Surname CreditScor Geography Gender Age Tenure Balance NumOfProduct HasCrCard IsActiveMembe
d e s r
df['Gender'] =
0 1 15634602 Hargrav
le.fit_transform(df['Gender']) df 619 0 Female 42 2 0.00 1 1 1
e
1 2 15647311 Hill 608 2 Female 41 1 83807.86 1 0 1

2 3 15619304 Onio 502 0 Female 42 8 159660.8 3 1 0


0
3 4 15701354 Boni 699 0 Female 39 1 0.00 2 0 0

4 5 15737888 Mitchell 850 2 Female 43 2 125510.8 1 1 1


2
5 6 15574012 Chu 645 2 Male 44 8 113755.7 2 1 0
8
6 7 15592531 Bartlett 822 0 Male 50 7 0.00 2 1 1

7 8 15656148 Obinna 376 1 Female 29 4 115046.7 4 1 0


4
8 9 15792365 He 501 0 Male 44 4 142051.0 2 0 1

https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 4/
11/10/2024, ml3.ipynb -
23:20 Colab
RowNumber CustomerId Surname CreditScor Geography Gender Age Tenure Balance NumOfProduct HasCrCard IsActiveMe
e s m
0 1 15634602 Hargrave 619 0 0 42 2 0.00 1 1

1 2 15647311 Hill 608 2 0 41 1 83807.86 1 0

2 3 15619304 Onio 502 0 0 42 8 159660.8 3 1


0
3 4 15701354 Boni 699 0 0 39 1 0.00 2 0

4 5 15737888 Mitchell 850 2 0 43 2 125510.8 1 1


2
... ... ... ... ... ... ... ... ... ... ... ...

9995 9996 15606229 Obijiaku 771 0 1 39 5 0.00 2 1

9996 9997 15569892 Johnstone 516 0 1 35 10 57369.61 1 1

9997 9998 15584532 Liu 709 0 0 36 7 0.00 1 0

9998 9999 15682355 Sabbatini 772 1 1 42 3 75075.31 2 1

10000 rows × 14 columns

df.drop(['RowNumber','CustomerId','Surname'],axis=1,inplace=True)

df

CreditScore Geography Gender Age Tenure Balance NumOfProduct HasCrCard IsActiveMembe EstimatedSalar Exited
s r y
0 619 0 0 42 2 0.00 1 1 1 101348.88 1

1 608 2 0 41 1 83807.86 1 0 1 112542.58 0

2 502 0 0 42 8 159660.8 3 1 0 113931.57 1


0
3 699 0 0 39 1 0.00 2 0 0 93826.63 0

4 850 2 0 43 2 125510.8 1 1 1 79084.10 0


2
... ... ... ... ... ... ... ... ... ... ... ...

9995 771 0 1 39 5 0.00 2 1 0 96270.64 0

9996 516 0 1 35 10 57369.61 1 1 1 101699.77 0

9997 709 0 0 36 7 0.00 1 0 1 42085.58 1

9998 772 1 1 42 3 75075.31 2 1 0 92888.52 1

9999 792 0 0 28 4 130142.7 1 1 0 38190.78 0


9

10000 rows × 11 columns

df[(df['Balance']==0) & (df['Exited']==0) & (df['IsActiveMember']==0)]

1424 rows × 11 columns


CreditScor Geography Gender Age Tenure Balance NumOfProduct HasCrCard IsActiveMembe EstimatedSalar Exited
e s r y
3 699 0 0 39 1 0.0 2 0 0 93826.63 0
df.describe().T
11 497 2 1 24 3 0.0 2 1 0 76390.01 0
https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX-
12 476 0 0 34 10 0.0 2 1 0 26260.98 0 5/
11/10/2024, ml3.ipynb -
23:20 Colab
count mean std min 25% 50% 75% max

CreditScore 10000.0 650.528800 96.653299 350.00 584.00 652.000 718.0000 850.00

Geography 10000.0 0.746300 0.827529 0.00 0.00 0.000 1.0000 2.00

Gender 10000.0 0.545700 0.497932 0.00 0.00 1.000 1.0000 1.00

Age 10000.0 38.921800 10.487806 18.00 32.00 37.000 44.0000 92.00

Tenure 10000.0 5.012800 2.892174 0.00 3.00 5.000 7.0000 10.00

Balance 10000.0 76485.889288 62397.40520 0.00 0.00 97198.540 127644.240 250898.0


2 0 9
NumOfProducts 10000.0 1.530200 0.581654 1.00 1.00 1.000 2.0000 4.00

HasCrCard 10000.0 0.705500 0.455840 0.00 0.00 1.000 1.0000 1.00

IsActiveMember 10000.0 0.515100 0.499797 0.00 0.00 1.000 1.0000 1.00

EstimatedSalary 10000.0 100090.23988 57510.49281 11.58 51002.1 100193.91 149388.247 199992.4


1 8 1 5 5 8
Exited 10000.0 0.203700 0.402769 0.00 0.00 0.000 0.0000 1.00

fig, ax = plt.subplots(figsize=(12,
12))
sns.heatmap(df.corr(),annot=True)

https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 6/
11/10/2024, ml3.ipynb -
23:20 Colab
<Axes: >

sns.pairplot(df)

https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 7/
11/10/2024, ml3.ipynb -
23:20 Colab
<seaborn.axisgrid.PairGrid at 0x7b32832bf8b0>

https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 8/
11/10/2024, ml3.ipynb -
23:20 Colab

sns.histplot(df['CreditScore'])

<Axes: xlabel='CreditScore', ylabel='Count'>

sns.histplot(df['Geography'])

<Axes: xlabel='Geography', ylabel='Count'>

sns.histplot(df['Balance'])

https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 9/
11/10/2024, ml3.ipynb -
23:20 Colab
<Axes: xlabel='Balance', ylabel='Count'>

sns.histplot(df['EstimatedSalary'])

<Axes: xlabel='EstimatedSalary', ylabel='Count'>

sns.histplot(df['NumOfProducts'])

<Axes: xlabel='NumOfProducts', ylabel='Count'>

from sklearn.model_selection import


train_test_split x = df.drop('Exited',axis=1)

https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 1
11/10/2024, ml3.ipynb -
y = df['Exited']
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=42)

from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(x_train,y_train)

▾ GaussianNB i
GaussianNB()

y_pred = nb.predict(x_test)

y_pred

array([0, 0, 0, ..., 0, 0, 0])

from sklearn.metrics import

accuracy_score,confusion_matrix,classification_report

print(classification_report(y_test,y_pred))

precision recall f1-score support

0 0.81 0.97 0.88 1607


1 0.33 0.07 0.11 393

accuracy 0.79 2000


macro avg 0.57 0.52 0.50 2000
weighted 0.72 0.79 0.73 2000

accuracy_score(y_test,y_pr

ed) 0.79

cm =
confusion_matrix(y_test,y_pred)
cm

array([[1553, 54],
[ 366, 27]])

sns.heatmap(cm,annot=True,fmt='d')

<Axes: >

from sklearn.tree import


DecisionTreeClassifier dt =
DecisionTreeClassifier()
dt.fit(x_train,y_train)

▾ DecisionTreeClassifier i
DecisionTreeClassifier()
https://colab.research.google.com/drive/1Lr6payo0UxiyJ7Is_FNsk2D2LDxbwPX- 10

You might also like