[go: up one dir, main page]

0% found this document useful (0 votes)
24 views7 pages

Lab-8 17mis7172

The document loads various libraries and reads in customer data and test data. It analyzes the data for missing values and encodes categorical variables. It builds decision tree and logistic regression models to predict customer churn. It evaluates the models on test data and calculates accuracy, precision, recall, and AUC.

Uploaded by

maneeshmogallpu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
24 views7 pages

Lab-8 17mis7172

The document loads various libraries and reads in customer data and test data. It analyzes the data for missing values and encodes categorical variables. It builds decision tree and logistic regression models to predict customer churn. It evaluates the models on test data and calculates accuracy, precision, recall, and AUC.

Uploaded by

maneeshmogallpu
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

LAB-8

17MIS7172

library(plyr)

library(randomForest)

library(rpart)

library(rpart.plot)

library(caret)

library(ggplot2)

library(gridExtra)

library(olsrr)

dataset = read.csv('C:/Users/Abhishek/Downloads/train.csv')

test1=read.csv('C:/Users/Abhishek/Downloads/test.csv')

summary(dataset)

sum(is.na(dataset$State))

sum(is.na(dataset$Account.length))

sum(is.na(dataset$Area.code))

sum(is.na(dataset$International.plan))

sum(is.na(dataset$Voice.mail.plan))

sum(is.na(dataset$Number.vmail.messages))

sum(is.na(dataset$Total.day.minutes))

sum(is.na(dataset$Total.day.calls))

sum(is.na(dataset$Total.day.charge))

sum(is.na(dataset$Total.eve.minutes))

sum(is.na(dataset$Total.eve.calls))

sum(is.na(dataset$Total.eve.charge))

sum(is.na(dataset$Total.night.minutes))

sum(is.na(dataset$Total.night.charge))
sum(is.na(dataset$Total.night.calls))

sum(is.na(dataset$Total.intl.minutes))

sum(is.na(dataset$Total.intl.calls))

sum(is.na(dataset$Total.intl.charge))

sum(is.na(dataset$Customer.service.calls))

sum(is.na(dataset$Churn))

str(dataset)

table(dataset$State)

table(test1$State)

str(dataset)

dataset$State=as.integer(as.factor(dataset$State))

dataset$International.plan=as.integer(as.factor(dataset$International.plan))

dataset$International.plan[dataset$International.plan<=1]<-0

dataset$International.plan[dataset$International.plan==2]<-1

dataset$Voice.mail.plan=as.integer(as.factor(dataset$Voice.mail.plan))

dataset$Voice.mail.plan[dataset$Voice.mail.plan<=1]<-0

dataset$Voice.mail.plan[dataset$Voice.mail.plan==2]<-1

dataset$Churn=as.integer(as.factor(dataset$Churn))

dataset$Churn[dataset$Churn<=1]<-0

dataset$Churn[dataset$Churn==2]<-1

test1$State=as.integer(as.factor(test1$State))

test1$International.plan=as.integer(as.factor(test1$International.plan))

test1$Voice.mail.plan=as.integer(as.factor(test1$Voice.mail.plan))

test1$Churn=as.integer(as.factor(test1$Churn))

test1$Churn[test1$Churn<=1]<-0

test1$Churn[test1$Churn==2]<-1
# Building correplot to visualize the correlartion matrix

library(corrplot)

cor(dataset,dataset$Churn)

corrplot(cor(dataset), method="number", is.corr=FALSE)

model <- lm(Churn ~ ., data = dataset)

ols_step_forward_p(model)

ols_step_backward_p(model)

str(dataset)

#decision tree

library(rpart)

library(rpart.plot)

library(ROSE)

library(Metrics)

library(caret)

f<-rpart(Churn ~ International.plan
+Customer.service.calls+Total.day.charge+Voice.mail.plan+Total.intl.charge+

Total.intl.calls+Total.eve.minutes+Total.night.charge+Number.vmail.messages,method =
"class",data = dataset)

plot(f,uniform = TRUE,main="Decision Tree")

text(f,use.n = TRUE,all = TRUE,cex=.6)

testnew<-test1[c(4,19,9,5,18,17,10,15,6)]

testnew$International.plan[testnew$International.plan<=1]<-0

testnew$International.plan[testnew$International.plan==2]<-1
testnew$Voice.mail.plan[testnew$Voice.mail.plan<=1]<-0

testnew$Voice.mail.plan[testnew$Voice.mail.plan==2]<-1

predict_model<-predict(f,testnew,type = 'class')

predict_table<-table(test1$Churn,predict_model)

predict_table

precision(predict_table)

recall(predict_table)

accuracy(predict_model,test1$Churn)

#logistic

library(caTools)

library(ROCR)

model<-glm(Churn ~ International.plan
+Customer.service.calls+Total.day.charge+Voice.mail.plan+Total.intl.charge+

Total.intl.calls+Total.eve.minutes+Total.night.charge+Number.vmail.messages, data=dataset)

model

predict_reg <- predict(model,testnew, type = "response")

predict_reg

predict_reg <- ifelse(predict_reg >0.5, 1, 0)

ROCPred <- prediction(predict_reg, test1$Churn)

ROCPer <- performance(ROCPred, measure = "tpr",

x.measure = "fpr")
ROCPer

plot(ROCPer)

plot(ROCPer, colorize = TRUE,

print.cutoffs.at = seq(0.1, by = 0.1),

main = "ROC CURVE")

abline(a = 0, b = 1)

auc <- performance(ROCPred, measure = "auc")

auc <- auc@y.values[[1]]

auc

auc <- round(auc, 4)

legend(.6, .4, auc, title = "AUC", cex = 1)

log_table<-table(test1$Churn,predict_reg)

log_table

accuracy(predict_reg,test1$Churn)

OUTPUT:-

You might also like