0% found this document useful (0 votes)

79 views17 pages

Project3: Loading Library

1. The document loads libraries and data to analyze a personal loan dataset with 5000 customers and 14 variables. 2. Exploratory data analysis is performed including checking for missing data, outliers, and negative values. The Experience variable is made positive. 3. The data is imputed for missing values in the Family variable and categorical variables are converted to factors.

Uploaded by

Shreya Garg

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

79 views17 pages

Project3: Loading Library

Uploaded by

Shreya Garg

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as DOCX, PDF, TXT or read online on Scribd

You are on page 1/ 17

project3

#LOADING LIBRARY
library(readr)
library(readxl)
library(caTools)
library(rpart)
library(rpart.plot)
library(randomForest)
library(ROCR)
library(ineq)
library(StatMeasures)
library(rattle)

#LOADING DATA
Customerdata=read_excel("Thera Bank_Personal_Loan_Modelling-dataset-1.xlsx")
attach(Customerdata)

#Exploratory Data Analysis

#1 coloum name treatment
names(Customerdata)

## [1] "ID" "Age (in years)"

## [3] "Experience (in years)" "Income (in K/month)"
## [5] "ZIP Code" "Family members"
## [7] "CCAvg" "Education"
## [9] "Mortgage" "Personal Loan"
## [11] "Securities Account" "CD Account"
## [13] "Online" "CreditCard"

colnames(Customerdata)=make.names(colnames(Customerdata))

#2 Data overview
head(Customerdata)

## # A tibble: 6 x 14
## ID Age..in.years. Experience..in.~ Income..in.K.mo~ ZIP.Code
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 25 1 49 91107
## 2 2 45 19 34 90089
## 3 3 39 15 11 94720
## 4 4 35 9 100 94112
## 5 5 35 8 45 91330
## 6 6 37 13 29 92121
## # ... with 9 more variables: Family.members <dbl>, CCAvg <dbl>,
## # Education <dbl>, Mortgage <dbl>, Personal.Loan <dbl>,
## # Securities.Account <dbl>, CD.Account <dbl>, Online <dbl>,
## # CreditCard <dbl>

summary(Customerdata)

## ID Age..in.years. Experience..in.years. Income..in.K.month.

## Min. : 1 Min. :23.00 Min. :-3.0 Min. : 8.00
## 1st Qu.:1251 1st Qu.:35.00 1st Qu.:10.0 1st Qu.: 39.00
## Median :2500 Median :45.00 Median :20.0 Median : 64.00
## Mean :2500 Mean :45.34 Mean :20.1 Mean : 73.77
## 3rd Qu.:3750 3rd Qu.:55.00 3rd Qu.:30.0 3rd Qu.: 98.00
## Max. :5000 Max. :67.00 Max. :43.0 Max. :224.00
##
## ZIP.Code Family.members CCAvg Education
## Min. : 9307 Min. :1.000 Min. : 0.000 Min. :1.000
## 1st Qu.:91911 1st Qu.:1.000 1st Qu.: 0.700 1st Qu.:1.000
## Median :93437 Median :2.000 Median : 1.500 Median :2.000
## Mean :93153 Mean :2.397 Mean : 1.938 Mean :1.881
## 3rd Qu.:94608 3rd Qu.:3.000 3rd Qu.: 2.500 3rd Qu.:3.000
## Max. :96651 Max. :4.000 Max. :10.000 Max. :3.000
## NA's :18
## Mortgage Personal.Loan Securities.Account CD.Account
## Min. : 0.0 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.: 0.0 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median : 0.0 Median :0.000 Median :0.0000 Median :0.0000
## Mean : 56.5 Mean :0.096 Mean :0.1044 Mean :0.0604
## 3rd Qu.:101.0 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :635.0 Max. :1.000 Max. :1.0000 Max. :1.0000
##
## Online CreditCard
## Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.000
## Median :1.0000 Median :0.000
## Mean :0.5968 Mean :0.294
## 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.000
##

str(Customerdata)

## Classes 'tbl_df', 'tbl' and 'data.frame': 5000 obs. of 14 variables:

## $ ID : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Age..in.years. : num 25 45 39 35 35 37 53 50 35 34 ...
## $ Experience..in.years.: num 1 19 15 9 8 13 27 24 10 9 ...
## $ Income..in.K.month. : num 49 34 11 100 45 29 72 22 81 180 ...
## $ ZIP.Code : num 91107 90089 94720 94112 91330 ...
## $ Family.members : num 4 3 1 1 4 4 2 1 3 1 ...
## $ CCAvg : num 1.6 1.5 1 2.7 1 0.4 1.5 0.3 0.6 8.9 ...
## $ Education : num 1 1 1 2 2 2 2 3 2 3 ...
## $ Mortgage : num 0 0 0 0 0 155 0 0 104 0 ...
## $ Personal.Loan : num 0 0 0 0 0 0 0 0 0 1 ...
## $ Securities.Account : num 1 1 0 0 0 0 0 0 0 0 ...
## $ CD.Account : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Online : num 0 0 0 0 0 1 1 0 1 0 ...
## $ CreditCard : num 0 0 0 0 1 0 0 1 0 0 ...

Customerdata$Education=as.factor(Customerdata$Education)
Customerdata$Personal.Loan=as.factor(Customerdata$Personal.Loan)
Customerdata$Securities.Account=as.factor(Customerdata$Securities.Account)
Customerdata$CD.Account=as.factor(Customerdata$CD.Account)
Customerdata$Online=as.factor(Customerdata$Online)
Customerdata$CreditCard=as.factor(Customerdata$CreditCard)

str(Customerdata)

## Classes 'tbl_df', 'tbl' and 'data.frame': 5000 obs. of 14 variables:

## $ ID : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Age..in.years. : num 25 45 39 35 35 37 53 50 35 34 ...
## $ Experience..in.years.: num 1 19 15 9 8 13 27 24 10 9 ...
## $ Income..in.K.month. : num 49 34 11 100 45 29 72 22 81 180 ...
## $ ZIP.Code : num 91107 90089 94720 94112 91330 ...
## $ Family.members : num 4 3 1 1 4 4 2 1 3 1 ...
## $ CCAvg : num 1.6 1.5 1 2.7 1 0.4 1.5 0.3 0.6 8.9 ...
## $ Education : Factor w/ 3 levels "1","2","3": 1 1 1 2 2 2 2 3
2 3 ...
## $ Mortgage : num 0 0 0 0 0 155 0 0 104 0 ...
## $ Personal.Loan : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1
2 ...
## $ Securities.Account : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 1
1 ...
## $ CD.Account : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1
1 ...
## $ Online : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 2 1 2
1 ...
## $ CreditCard : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 2 1
1 ...

Customerdata=Customerdata[,-c(1,5)]

#4Checking Data set for missing value

library(mice)
library(VIM)
any(is.na.data.frame(Customerdata))

## [1] TRUE

impute=mice(Customerdata[,],m=3,seed=123)

##
## iter imp variable
## 1 1 Family.members
## 1 2 Family.members
## 1 3 Family.members
## 2 1 Family.members
## 2 2 Family.members
## 2 3 Family.members
## 3 1 Family.members
## 3 2 Family.members
## 3 3 Family.members
## 4 1 Family.members
## 4 2 Family.members
## 4 3 Family.members
## 5 1 Family.members
## 5 2 Family.members
## 5 3 Family.members

print(impute)

## Class: mids
## Number of multiple imputations: 3
## Imputation methods:
## Age..in.years. Experience..in.years. Income..in.K.month.
## "" "" ""
## Family.members CCAvg Education
## "pmm" "" ""
## Mortgage Personal.Loan Securities.Account
## "" "" ""
## CD.Account Online CreditCard
## "" "" ""
## PredictorMatrix:
## Age..in.years. Experience..in.years.
## Age..in.years. 0 1
## Experience..in.years. 1 0
## Income..in.K.month. 1 1
## Family.members 1 1
## CCAvg 1 1
## Education 1 1
## Income..in.K.month. Family.members CCAvg Education
## Age..in.years. 1 1 1 1
## Experience..in.years. 1 1 1 1
## Income..in.K.month. 0 1 1 1
## Family.members 1 0 1 1
## CCAvg 1 1 0 1
## Education 1 1 1 0
## Mortgage Personal.Loan Securities.Account CD.Account
## Age..in.years. 1 1 1 1
## Experience..in.years. 1 1 1 1
## Income..in.K.month. 1 1 1 1
## Family.members 1 1 1 1
## CCAvg 1 1 1 1
## Education 1 1 1 1
## Online CreditCard
## Age..in.years. 1 1
## Experience..in.years. 1 1
## Income..in.K.month. 1 1
## Family.members 1 1
## CCAvg 1 1
## Education 1 1

newdata=complete(impute,1)
any(is.na.data.frame(newdata))

## [1] FALSE

#3checking for outliers

boxplot(newdata)

#5Negative value treatment

newdata$Experience..in.years.=abs(newdata$Experience..in.years.)
summary(newdata)

## Age..in.years. Experience..in.years. Income..in.K.month. Family.members

## Min. :23.00 Min. : 0.00 Min. : 8.00 Min. :1.000
## 1st Qu.:35.00 1st Qu.:10.00 1st Qu.: 39.00 1st Qu.:1.000
## Median :45.00 Median :20.00 Median : 64.00 Median :2.000
## Mean :45.34 Mean :20.13 Mean : 73.77 Mean :2.396
## 3rd Qu.:55.00 3rd Qu.:30.00 3rd Qu.: 98.00 3rd Qu.:3.000
## Max. :67.00 Max. :43.00 Max. :224.00 Max. :4.000
## CCAvg Education Mortgage Personal.Loan
## Min. : 0.000 1:2096 Min. : 0.0 0:4520
## 1st Qu.: 0.700 2:1403 1st Qu.: 0.0 1: 480
## Median : 1.500 3:1501 Median : 0.0
## Mean : 1.938 Mean : 56.5
## 3rd Qu.: 2.500 3rd Qu.:101.0
## Max. :10.000 Max. :635.0
## Securities.Account CD.Account Online CreditCard
## 0:4478 0:4698 0:2016 0:3530
## 1: 522 1: 302 1:2984 1:1470
##
##
##
##

#Decision Tree.
#Spliting Data into train and test data.
seed=2000
set.seed(seed)
sample=sample.split(newdata,SplitRatio = 0.7)
train_data=subset(newdata,sample == TRUE)
test_data=subset(newdata,sample == FALSE)
nrow(train_data)

## [1] 3333

nrow(test_data)

## [1] 1667

#Checking Response variable

table(train_data$Personal.Loan)

##
## 0 1
## 3025 308

str(train_data)

## 'data.frame': 3333 obs. of 12 variables:

## $ Age..in.years. : num 25 45 39 35 37 34 65 29 48 59 ...
## $ Experience..in.years.: num 1 19 15 9 13 9 39 5 23 32 ...
## $ Income..in.K.month. : num 49 34 11 100 29 180 105 45 114 40 ...
## $ Family.members : num 4 3 1 1 4 1 4 3 2 4 ...
## $ CCAvg : num 1.6 1.5 1 2.7 0.4 8.9 2.4 0.1 3.8 2.5 ...
## $ Education : Factor w/ 3 levels "1","2","3": 1 1 1 2 2 3 3 2
3 2 ...
## $ Mortgage : num 0 0 0 0 155 0 0 0 0 0 ...
## $ Personal.Loan : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1
1 ...
## $ Securities.Account : Factor w/ 2 levels "0","1": 2 2 1 1 1 1 1 1 2
1 ...
## $ CD.Account : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1
1 ...
## $ Online : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 2 1
2 ...
## $ CreditCard : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1
1 ...

DT=train_data #DECISION TREE TRAIN DATA

RF=train_data #RANDOM FOREST TRAIN DATA
DS=test_data #DECISION TREE TEST DATA
RS=test_data #RANDOME FOREST TEST DATA

#Buliding Cart Model

Model1=rpart(formula = Personal.Loan~.,data=DT,method =
"class",cp=0,minsplit=100,minbucket=10,xval=10)
fancyRpartPlot(Model1)

Model1
## n= 3333
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 3333 308 0 (0.907590759 0.092409241)
## 2) Income..in.K.month.< 113.5 2671 56 0 (0.979034070 0.020965930)
## 4) CCAvg< 2.95 2475 11 0 (0.995555556 0.004444444) *
## 5) CCAvg>=2.95 196 45 0 (0.770408163 0.229591837)
## 10) CD.Account=0 178 32 0 (0.820224719 0.179775281) *
## 11) CD.Account=1 18 5 1 (0.277777778 0.722222222) *
## 3) Income..in.K.month.>=113.5 662 252 0 (0.619335347 0.380664653)
## 6) Education=1 449 50 0 (0.888641425 0.111358575)
## 12) Family.members< 2.5 399 0 0 (1.000000000 0.000000000) *
## 13) Family.members>=2.5 50 0 1 (0.000000000 1.000000000) *
## 7) Education=2,3 213 11 1 (0.051643192 0.948356808)
## 14) Income..in.K.month.< 116.5 18 7 0 (0.611111111 0.388888889) *
## 15) Income..in.K.month.>=116.5 195 0 1 (0.000000000 1.000000000) *

#Compleximity parameter chart

printcp(Model1)

##
## Classification tree:
## rpart(formula = Personal.Loan ~ ., data = DT, method = "class",
## cp = 0, minsplit = 100, minbucket = 10, xval = 10)
##
## Variables actually used in tree construction:
## [1] CCAvg CD.Account Education
## [4] Family.members Income..in.K.month.
##
## Root node error: 308/3333 = 0.092409
##
## n= 3333
##
## CP nsplit rel error xerror xstd
## 1 0.310065 0 1.00000 1.00000 0.054284
## 2 0.162338 2 0.37987 0.48052 0.038612
## 3 0.012987 3 0.21753 0.24351 0.027800
## 4 0.000000 6 0.17857 0.22727 0.026878

plotcp(Model1)
Pmodel=rpart(formula = Personal.Loan~.,data=DT,method =
"class",cp=0.04,minsplit=100,minbucket=100,xval=10)
fancyRpartPlot(Pmodel)
Pmodel

## n= 3333
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 3333 308 0 (0.90759076 0.09240924)
## 2) Income..in.K.month.< 113.5 2671 56 0 (0.97903407 0.02096593) *
## 3) Income..in.K.month.>=113.5 662 252 0 (0.61933535 0.38066465)
## 6) Education=1 449 50 0 (0.88864143 0.11135857) *
## 7) Education=2,3 213 11 1 (0.05164319 0.94835681) *

#Cart Model output Explanation

#First node shows that there are 91% chance that customer will accept personal loan 9%
chance of not accepting loan.
#Monthly income is the 1st variable that is split in decision tree hence it is the most
important variable for building strategy.
#Highest Risk segment is of 6% which means that 6% of customer will not accept personal
loan are being taken in that segment which is of Monthly Income >114 ,Education=1.
#second Risk segment is of 2% which means that 2% of customer will not accept personal
loan are being taken in that segment which is of Monthly Income >114, Education =1 and
Family.Members < 2.5
#Prediction
DT$Prediction=predict(Pmodel,data=DT,type = "class")
DT$probability=predict(Pmodel,data=DT,type = "prob")
tbl=table(Actual=DT$Personal.Loan,prediction=DT$Prediction)
(3014+252)/sum(tbl)

## [1] 0.979898

Z=Pmodel

#accurancy of above model is 97.9 %.

#PREDICTION USING SAME MODEL IN TEST DATA.
DS$Predict.class=predict(Pmodel,newdata = DS,type = "class")
tbl1=table(Actual.test=DS$Personal.Loan,prediction.test=DS$Predict.class)
tbl1

## prediction.test
## Actual.test 0 1
## 0 1489 6
## 1 47 125

(1489+144)/sum(tbl1)

## [1] 0.9796041

#Decision Tree Model Performance and Validation-Train Data.

#Confusion Matrix
DT$Prediction=predict(Pmodel,data=DT,type = "class")
tbl=table(Actual=DT$Personal.Loan,prediction=DT$Prediction)
tbl

## prediction
## Actual 0 1
## 0 3014 11
## 1 106 202

#1)Accuracy Of Model
(3014+252)/sum(tbl)

## [1] 0.979898

#2classification error
(11+56)/sum(tbl)

## [1] 0.02010201
#3)sensitivity(True Positive Rate)
252/(252+56)

## [1] 0.8181818

#4)Specifity(True Negative Rate)

3014/(11+3014)

## [1] 0.9963636

pobj=prediction(DT$probability[,2],DT$Personal.Loan)
perf <- performance(pobj, "tpr", "fpr")
plot(perf,main = "ROC curve")

KS <- max(attr(pobj, 'y.values')[[1]]-attr(perf, 'x.values')[[1]])

auc <- performance(pobj,"auc");
auc <- as.numeric(auc@y.values)
print(KS)

## [1] -Inf

auc

## [1] 0.8842803
gini=ineq(DT$probability,"gini")
print(gini)

## [1] 0.4767402

#Decision Tree Model Performance and Validation-Test Data.

#Confusion Matrix
DS$Predict.class=predict(Pmodel,newdata = DS,type = "class")
DS$probability1=predict(Pmodel,newdata = DS,type="prob")
tbl1=table(Actual.test=DS$Personal.Loan,prediction.test=DS$Predict.class)
tbl1

## prediction.test
## Actual.test 0 1
## 0 1489 6
## 1 47 125

(1489+144)/sum(tbl1)

## [1] 0.9796041

#1)Accuracy Of Model
(1489++144)/sum(tbl1)

## [1] 0.9796041

#2classification error
(6+28)/sum(tbl1)

## [1] 0.02039592

#3)sensitivity(True Positive Rate)

144/(144+28)

## [1] 0.8372093

#4)Specifity(True Negative Rate)

1489/(1489+6)

## [1] 0.9959866

pobj1=prediction(DS$probability1[,2],DS$Personal.Loan)
perf1 <- performance(pobj1, "tpr", "fpr")
plot(perf1,main = "ROC curve")
#Randome forest
print(sum(RF$Personal.Loan==1)/nrow(RF))

## [1] 0.09240924

rndforest=randomForest(Personal.Loan~.,data=RF,ntree=501,mtry=3,nodesize=10,i
mportance=TRUE)
rndforest

##
## Call:
## randomForest(formula = Personal.Loan ~ ., data = RF, ntree = 501,
mtry = 3, nodesize = 10, importance = TRUE)
## Type of random forest: classification
## Number of trees: 501
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 1.56%
## Confusion matrix:
## 0 1 class.error
## 0 3018 7 0.00231405
## 1 45 263 0.14610390

print(rndforest$err.rate)
## OOB 0 1
## [1,] 0.02977667 0.020720721 0.1313131
## [2,] 0.02933738 0.015512465 0.1744186
## [3,] 0.03137570 0.017621145 0.1759259
## [4,] 0.03066378 0.015013829 0.1950207
## [5,] 0.02920443 0.014380531 0.1797753
## [6,] 0.02917602 0.012707377 0.1923077
## [7,] 0.02753442 0.012064805 0.1796610
## [8,] 0.02718567 0.011576439 0.1800000
## [9,] 0.02687023 0.010781671 0.1824104
## [10,] 0.02363636 0.008018710 0.1758958

plot(rndforest)

rndforest=randomForest(Personal.Loan~.,data=RF,ntree=101,mtry=3,nodesize=10,i
mportance=TRUE)
print(rndforest$err.rate)

## OOB 0 1
## [1,] 0.04358553 0.019090909 0.2758621
## [2,] 0.03241895 0.012700166 0.2164948
## [3,] 0.02891566 0.011968085 0.1923077
## [4,] 0.03797922 0.014624506 0.2643678
## [5,] 0.03268846 0.011037528 0.2428571
## [6,] 0.03160920 0.009513742 0.2448980
## [7,] 0.03031250 0.010344828 0.2233333
## [8,] 0.02709360 0.010522743 0.1887417
## [9,] 0.02440513 0.008739496 0.1782178
## [10,] 0.02605271 0.008344459 0.2000000

plot(rndforest)

#TUNNING
set.seed(seed)
X=tuneRF(x=RF,y=RF$Personal.Loan,mtryStart =3,stepFactor = 1,ntreeTry
=500,trace = TRUE,
plot = TRUE,doBest = TRUE, nodesize=5,importance=TRUE)

## mtry = 3 OOB error = 0%

## Searching left ...
## Searching right ...

DM Assignment - Thena Bank
No ratings yet
DM Assignment - Thena Bank
39 pages
Thera Bank-Project
100% (12)
Thera Bank-Project
26 pages
Thera Bank Loan Prediction Model
100% (1)
Thera Bank Loan Prediction Model
24 pages
Predictive Modeling Mini Project
No ratings yet
Predictive Modeling Mini Project
25 pages
Project On Data Mining-Raveendra Babu Gaddam
No ratings yet
Project On Data Mining-Raveendra Babu Gaddam
29 pages
Cart Project
75% (4)
Cart Project
17 pages
Thera Bank - Project
100% (4)
Thera Bank - Project
34 pages
Supervised Decision Trees A Case Study For AllLife Bank
No ratings yet
Supervised Decision Trees A Case Study For AllLife Bank
50 pages
Note 4
No ratings yet
Note 4
18 pages
Project 5 PDF
100% (1)
Project 5 PDF
48 pages
Analysis
No ratings yet
Analysis
37 pages
ECN190 Term Project: Predicting Credit Card Default Risk: Introduction and Literature
No ratings yet
ECN190 Term Project: Predicting Credit Card Default Risk: Introduction and Literature
18 pages
Bank Marketing Data Analysis
No ratings yet
Bank Marketing Data Analysis
18 pages
R Analysis of Buying Patterns
No ratings yet
R Analysis of Buying Patterns
3 pages
Credit Card Default Analysis
No ratings yet
Credit Card Default Analysis
5 pages
Bank Rpubs
No ratings yet
Bank Rpubs
24 pages
Pract Person
No ratings yet
Pract Person
6 pages
Home Credit Data
No ratings yet
Home Credit Data
6 pages
Report
No ratings yet
Report
24 pages
FRA Group Assignment - Report
No ratings yet
FRA Group Assignment - Report
22 pages
Germany Credit Analysis
No ratings yet
Germany Credit Analysis
41 pages
SanatKulkarni - AP22110010183 - Assignment3-1
No ratings yet
SanatKulkarni - AP22110010183 - Assignment3-1
4 pages
R Data Analysis for Banking
No ratings yet
R Data Analysis for Banking
15 pages
RCode Group 4
No ratings yet
RCode Group 4
21 pages
Capastone Project Taiwan Customer Default
67% (3)
Capastone Project Taiwan Customer Default
36 pages
Machine Learning Transport Analysis
100% (4)
Machine Learning Transport Analysis
42 pages
Advanced Modelling Techniques Anurag Payel
No ratings yet
Advanced Modelling Techniques Anurag Payel
41 pages
21nku14 - Data Visualization Assignment
No ratings yet
21nku14 - Data Visualization Assignment
10 pages
R Machine Learning Lab Guide
0% (1)
R Machine Learning Lab Guide
9 pages
Insurance Premium Renewal Analysis
100% (3)
Insurance Premium Renewal Analysis
13 pages
Thera Bank: Targeting Loan Growth
100% (10)
Thera Bank: Targeting Loan Growth
79 pages
BDA MSC It
No ratings yet
BDA MSC It
35 pages
Machine Learning Project
67% (3)
Machine Learning Project
30 pages
Logistic Regression
100% (1)
Logistic Regression
29 pages
Credit Risk Modelling (EDA & Classification) - Kaggle
No ratings yet
Credit Risk Modelling (EDA & Classification) - Kaggle
21 pages
R Statistical Analysis Guide
No ratings yet
R Statistical Analysis Guide
52 pages
Employee Transport Prediction
100% (2)
Employee Transport Prediction
44 pages
Telecom Customer Churn
0% (1)
Telecom Customer Churn
39 pages
Ques On1 A)
No ratings yet
Ques On1 A)
21 pages
Week 4 LAB
No ratings yet
Week 4 LAB
26 pages
00 - Project - Your First Data Science Project - Jupyter Notebook
No ratings yet
00 - Project - Your First Data Science Project - Jupyter Notebook
8 pages
Knitted R Code - FRA Project
100% (1)
Knitted R Code - FRA Project
73 pages
Tables in R
No ratings yet
Tables in R
7 pages
ANZ Virtual Internship Module Model Answer For Task 1
No ratings yet
ANZ Virtual Internship Module Model Answer For Task 1
7 pages
EDA Python Code Cheatsheets
No ratings yet
EDA Python Code Cheatsheets
52 pages
Predictive+Modelling+-+Logistic+Regression+-+Student+Version-New2.3.ipynb - Colaboratory
No ratings yet
Predictive+Modelling+-+Logistic+Regression+-+Student+Version-New2.3.ipynb - Colaboratory
12 pages
Experiment 5
No ratings yet
Experiment 5
13 pages
Data Analysis 1
No ratings yet
Data Analysis 1
15 pages
Credit Card Default
No ratings yet
Credit Card Default
30 pages
Midterm Project Group 6
No ratings yet
Midterm Project Group 6
41 pages
R Working Manuals Students
No ratings yet
R Working Manuals Students
11 pages
R Working Materials Prep
No ratings yet
R Working Materials Prep
43 pages
Data Preparation
No ratings yet
Data Preparation
2 pages
Linear Regression in R
No ratings yet
Linear Regression in R
19 pages
A Note On R
No ratings yet
A Note On R
90 pages
Data Preprocessing & Visualization1
No ratings yet
Data Preprocessing & Visualization1
2 pages
Fruaddetectiondata2 CSV
No ratings yet
Fruaddetectiondata2 CSV
24 pages
ML LAB Manual-1
No ratings yet
ML LAB Manual-1
33 pages
21st Century Literature: From The Philippines and From The World
No ratings yet
21st Century Literature: From The Philippines and From The World
25 pages
Listening Section 1 - Practice (3.6.2023)
No ratings yet
Listening Section 1 - Practice (3.6.2023)
8 pages
Wako Chapter 9 Low Kick Rules
No ratings yet
Wako Chapter 9 Low Kick Rules
9 pages
Grade 6 Learning Intervention Plan
100% (19)
Grade 6 Learning Intervention Plan
2 pages
Ateneo Quick Reference Guide English
100% (1)
Ateneo Quick Reference Guide English
2 pages
Distortions in Indian History
No ratings yet
Distortions in Indian History
37 pages
ENGLISH LANGUAG-WPS Office
No ratings yet
ENGLISH LANGUAG-WPS Office
32 pages
Anthology of Gnosis
100% (4)
Anthology of Gnosis
56 pages
L223 Skid Steer Loader Fuse Box Guide
No ratings yet
L223 Skid Steer Loader Fuse Box Guide
2 pages
English 7: First Quarter Worksheet No. 1 Skimming
No ratings yet
English 7: First Quarter Worksheet No. 1 Skimming
3 pages
Patana News Volume 21 Issue 34
No ratings yet
Patana News Volume 21 Issue 34
26 pages
CBSE Class 6 Body Movements Worksheet PDF
100% (1)
CBSE Class 6 Body Movements Worksheet PDF
2 pages
Final Year Project Guide
No ratings yet
Final Year Project Guide
2 pages
Lucknow JEE Main Nurture TP - Phase-1 & 2 - Minor Test-6
No ratings yet
Lucknow JEE Main Nurture TP - Phase-1 & 2 - Minor Test-6
5 pages
Pages From DSSSBTGT SCIENCE Solved Papers TeachingExam - Watermark (10) - Compressed
No ratings yet
Pages From DSSSBTGT SCIENCE Solved Papers TeachingExam - Watermark (10) - Compressed
16 pages
Takenaka Corp vs. CIR
No ratings yet
Takenaka Corp vs. CIR
1 page
Copper Rock
No ratings yet
Copper Rock
1 page
Professional Responsibility Management and Ethics: Team:5
No ratings yet
Professional Responsibility Management and Ethics: Team:5
18 pages
Checkpoint Protocol
No ratings yet
Checkpoint Protocol
2 pages
gr12 Ela Unit4 Unitplanningorganizer
No ratings yet
gr12 Ela Unit4 Unitplanningorganizer
16 pages
Scroll Saw 40-100 16 1246
No ratings yet
Scroll Saw 40-100 16 1246
8 pages
Design and Implementation of A GPS-GSM Based Women Safety Device For Combating Sexual Assaults
No ratings yet
Design and Implementation of A GPS-GSM Based Women Safety Device For Combating Sexual Assaults
5 pages
Roswell - Shades, Mel Odom
No ratings yet
Roswell - Shades, Mel Odom
120 pages
Grade 11 Surface Area
No ratings yet
Grade 11 Surface Area
13 pages
Batch Manufacturing Review: Sr. No. Check Points Reference Documents Step 1: Introduction of New Product (BMR Review)
No ratings yet
Batch Manufacturing Review: Sr. No. Check Points Reference Documents Step 1: Introduction of New Product (BMR Review)
15 pages
Fortigate Sd-Wan Configuration
No ratings yet
Fortigate Sd-Wan Configuration
5 pages
13.24 Abdominal-Injury
100% (1)
13.24 Abdominal-Injury
42 pages
Norse Influence on English Language
No ratings yet
Norse Influence on English Language
3 pages
Mrunal's Weekly MockTest Pillar 1D1 Insurance Unacademy Dark
No ratings yet
Mrunal's Weekly MockTest Pillar 1D1 Insurance Unacademy Dark
21 pages
Specifications of Portable Suction
No ratings yet
Specifications of Portable Suction
1 page

Project3: Loading Library

Uploaded by

Project3: Loading Library

Uploaded by

project3

#Exploratory Data Analysis

## [1] "ID" "Age (in years)"

## ID Age..in.years. Experience..in.years. Income..in.K.month.

## Classes 'tbl_df', 'tbl' and 'data.frame': 5000 obs. of 14 variables:

## Classes 'tbl_df', 'tbl' and 'data.frame': 5000 obs. of 14 variables:

#4Checking Data set for missing value

#3checking for outliers

#5Negative value treatment

## Age..in.years. Experience..in.years. Income..in.K.month. Family.members

#Checking Response variable

## 'data.frame': 3333 obs. of 12 variables:

DT=train_data #DECISION TREE TRAIN DATA

#Buliding Cart Model

#Compleximity parameter chart

#Cart Model output Explanation

#accurancy of above model is 97.9 %.

#Decision Tree Model Performance and Validation-Train Data.

#4)Specifity(True Negative Rate)

KS <- max(attr(pobj, 'y.values')[[1]]-attr(perf, 'x.values')[[1]])

#Decision Tree Model Performance and Validation-Test Data.

#3)sensitivity(True Positive Rate)

#4)Specifity(True Negative Rate)

## mtry = 3 OOB error = 0%

You might also like