> library(readxl)
> For.R = read.csv("G:\\My Drive\\XLRI\\Term 4\\Applied Econometrics\\Assignment
2\\Assignment 2_Data.csv")
> str(For.R)
'data.frame': 1470 obs. of 13 variables:
$ Age : int 41 49 37 33 27 32 59 30 38 36 ...
$ Gender : chr "Female" "Male" "Male" "Female" ...
$ JobSatisfaction : int 4 2 3 3 2 4 1 3 3 3 ...
$ MonthlyIncome : int 5993 5130 2090 2909 3468 3068 2670 2693 9526
5237 ...
$ PercentSalaryHike : int 11 23 15 11 12 13 20 22 21 13 ...
$ PerformanceRating : int 3 4 3 3 3 3 4 4 4 3 ...
$ TotalWorkingYears : int 8 10 7 8 6 8 12 1 10 17 ...
$ TrainingTimesLastYear : int 0 3 3 3 3 2 3 2 2 3 ...
$ YearsAtCompany : int 6 10 0 8 2 7 1 1 9 7 ...
$ YearsInCurrentRole : int 4 7 0 7 2 7 0 0 7 7 ...
$ YearsSinceLastPromotion: int 0 1 0 3 2 3 0 0 1 7 ...
$ YearsWithCurrManager : int 5 7 0 0 2 6 0 0 8 7 ...
$ Attrition : chr "Yes" "No" "Yes" "No" ...
> For.R$Gender<-ifelse(For.R$Gender=="Female",1,0)
> For.R$Attrition<-ifelse(For.R$Attrition=="No",1,0)
> str(For.R)
'data.frame': 1470 obs. of 13 variables:
$ Age : int 41 49 37 33 27 32 59 30 38 36 ...
$ Gender : num 1 0 0 1 0 0 1 0 0 0 ...
$ JobSatisfaction : int 4 2 3 3 2 4 1 3 3 3 ...
$ MonthlyIncome : int 5993 5130 2090 2909 3468 3068 2670 2693 9526
5237 ...
$ PercentSalaryHike : int 11 23 15 11 12 13 20 22 21 13 ...
$ PerformanceRating : int 3 4 3 3 3 3 4 4 4 3 ...
$ TotalWorkingYears : int 8 10 7 8 6 8 12 1 10 17 ...
$ TrainingTimesLastYear : int 0 3 3 3 3 2 3 2 2 3 ...
$ YearsAtCompany : int 6 10 0 8 2 7 1 1 9 7 ...
$ YearsInCurrentRole : int 4 7 0 7 2 7 0 0 7 7 ...
$ YearsSinceLastPromotion: int 0 1 0 3 2 3 0 0 1 7 ...
$ YearsWithCurrManager : int 5 7 0 0 2 6 0 0 8 7 ...
$ Attrition : num 0 1 0 1 1 1 1 1 1 1 ...
> model1 = lm(For.R$Attrition ~ For.R$Age + For.R$Gender + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$PercentSalaryHike + For.R$PerformanceRating +
For.R$TotalWorkingYears + For.R$TrainingTimesLastYear + For.R$YearsAtCompany +
For.R$YearsInCurrentRole + For.R$YearsSinceLastPromotion +
For.R$YearsWithCurrManager)
> summary(model1)
Call:
lm(formula = For.R$Attrition ~ For.R$Age + For.R$Gender + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$PercentSalaryHike + For.R$PerformanceRating +
For.R$TotalWorkingYears + For.R$TrainingTimesLastYear + For.R$YearsAtCompany +
For.R$YearsInCurrentRole + For.R$YearsSinceLastPromotion +
For.R$YearsWithCurrManager)
Residuals:
Min 1Q Median 3Q Max
-1.09700 0.01972 0.13110 0.20845 0.38821
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.892e-01 1.055e-01 4.635 3.90e-06 ***
For.R$Age 4.091e-03 1.419e-03 2.883 0.003991 **
For.R$Gender 1.496e-02 1.897e-02 0.789 0.430416
For.R$JobSatisfaction 3.560e-02 8.424e-03 4.226 2.53e-05 ***
For.R$MonthlyIncome 6.789e-06 3.128e-06 2.170 0.030135 *
For.R$PercentSalaryHike 3.872e-03 4.010e-03 0.966 0.334382
For.R$PerformanceRating -3.697e-02 4.068e-02 -0.909 0.363690
For.R$TotalWorkingYears 7.120e-04 2.509e-03 0.284 0.776632
For.R$TrainingTimesLastYear 1.846e-02 7.206e-03 2.562 0.010511 *
For.R$YearsAtCompany -4.747e-03 3.146e-03 -1.509 0.131610
For.R$YearsInCurrentRole 1.341e-02 4.185e-03 3.205 0.001380 **
For.R$YearsSinceLastPromotion -1.247e-02 3.713e-03 -3.359 0.000801 ***
For.R$YearsWithCurrManager 1.288e-02 4.306e-03 2.991 0.002824 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3552 on 1457 degrees of freedom
Multiple R-squared: 0.07523, Adjusted R-squared: 0.06762
F-statistic: 9.878 on 12 and 1457 DF, p-value: < 2.2e-16
> model2 = lm(For.R$Attrition ~ For.R$Age + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$TrainingTimesLastYear + For.R$YearsInCurrentRole +
For.R$YearsSinceLastPromotion + For.R$YearsWithCurrManager)
> summary(model2)
Call:
lm(formula = For.R$Attrition ~ For.R$Age + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$TrainingTimesLastYear + For.R$YearsInCurrentRole +
For.R$YearsSinceLastPromotion + For.R$YearsWithCurrManager)
Residuals:
Min 1Q Median 3Q Max
-1.13679 0.02238 0.13368 0.20655 0.38999
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.369e-01 5.049e-02 8.652 < 2e-16 ***
For.R$Age 4.254e-03 1.171e-03 3.632 0.000291 ***
For.R$JobSatisfaction 3.522e-02 8.407e-03 4.189 2.97e-05 ***
For.R$MonthlyIncome 6.207e-06 2.417e-06 2.568 0.010318 *
For.R$TrainingTimesLastYear 1.845e-02 7.188e-03 2.566 0.010379 *
For.R$YearsInCurrentRole 1.108e-02 3.868e-03 2.865 0.004233 **
For.R$YearsSinceLastPromotion -1.416e-02 3.562e-03 -3.974 7.42e-05 ***
For.R$YearsWithCurrManager 9.897e-03 3.809e-03 2.599 0.009454 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 0.3551 on 1462 degrees of freedom
Multiple R-squared: 0.07271, Adjusted R-squared: 0.06827
F-statistic: 16.38 on 7 and 1462 DF, p-value: < 2.2e-16
> logitmodel1 = glm(For.R$Attrition ~ For.R$Age + For.R$Gender +
For.R$JobSatisfaction + For.R$MonthlyIncome + For.R$PercentSalaryHike +
For.R$PerformanceRating + For.R$TotalWorkingYears + For.R$TrainingTimesLastYear +
For.R$YearsAtCompany + For.R$YearsInCurrentRole + For.R$YearsSinceLastPromotion +
For.R$YearsWithCurrManager, family = 'binomial')
> summary(logitmodel1)
Call:
glm(formula = For.R$Attrition ~ For.R$Age + For.R$Gender + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$PercentSalaryHike + For.R$PerformanceRating +
For.R$TotalWorkingYears + For.R$TrainingTimesLastYear + For.R$YearsAtCompany +
For.R$YearsInCurrentRole + For.R$YearsSinceLastPromotion +
For.R$YearsWithCurrManager, family = "binomial")
Deviance Residuals:
Min 1Q Median 3Q Max
-3.0095 0.2713 0.4663 0.6515 1.2068
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.044e+00 8.377e-01 -1.247 0.212517
For.R$Age 3.027e-02 1.155e-02 2.621 0.008761 **
For.R$Gender 1.212e-01 1.539e-01 0.788 0.430974
For.R$JobSatisfaction 2.794e-01 6.701e-02 4.170 3.04e-05 ***
For.R$MonthlyIncome 8.042e-05 3.048e-05 2.639 0.008325 **
For.R$PercentSalaryHike 3.617e-02 3.256e-02 1.111 0.266669
For.R$PerformanceRating -3.265e-01 3.285e-01 -0.994 0.320229
For.R$TotalWorkingYears 7.739e-03 2.249e-02 0.344 0.730711
For.R$TrainingTimesLastYear 1.515e-01 5.943e-02 2.549 0.010815 *
For.R$YearsAtCompany -3.670e-02 3.111e-02 -1.180 0.238110
For.R$YearsInCurrentRole 1.305e-01 3.865e-02 3.376 0.000736 ***
For.R$YearsSinceLastPromotion -1.413e-01 3.573e-02 -3.954 7.70e-05 ***
For.R$YearsWithCurrManager 1.195e-01 3.874e-02 3.085 0.002034 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1298.6 on 1469 degrees of freedom
Residual deviance: 1172.2 on 1457 degrees of freedom
AIC: 1198.2
Number of Fisher Scoring iterations: 5
> logitmodel2 = lm(For.R$Attrition ~ For.R$Age + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$TrainingTimesLastYear + For.R$YearsInCurrentRole +
For.R$YearsSinceLastPromotion + For.R$YearsWithCurrManager, family = 'binomial')
Warning message:
In lm.fit(x, y, offset = offset, singular.ok = singular.ok, ...) :
extra argument ‘family’ will be disregarded
> logitmodel2 = glm(For.R$Attrition ~ For.R$Age + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$TrainingTimesLastYear + For.R$YearsInCurrentRole +
For.R$YearsSinceLastPromotion + For.R$YearsWithCurrManager, family = 'binomial')
> summary(logitmodel2)
Call:
glm(formula = For.R$Attrition ~ For.R$Age + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$TrainingTimesLastYear + For.R$YearsInCurrentRole +
For.R$YearsSinceLastPromotion + For.R$YearsWithCurrManager,
family = "binomial")
Deviance Residuals:
Min 1Q Median 3Q Max
-3.1051 0.2772 0.4720 0.6521 1.2166
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -1.502e+00 4.003e-01 -3.752 0.000175 ***
For.R$Age 3.200e-02 9.690e-03 3.303 0.000957 ***
For.R$JobSatisfaction 2.751e-01 6.669e-02 4.125 3.71e-05 ***
For.R$MonthlyIncome 7.757e-05 2.456e-05 3.158 0.001588 **
For.R$TrainingTimesLastYear 1.526e-01 5.905e-02 2.584 0.009769 **
For.R$YearsInCurrentRole 1.140e-01 3.643e-02 3.130 0.001748 **
For.R$YearsSinceLastPromotion -1.574e-01 3.337e-02 -4.718 2.38e-06 ***
For.R$YearsWithCurrManager 1.001e-01 3.540e-02 2.828 0.004690 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1298.6 on 1469 degrees of freedom
Residual deviance: 1175.5 on 1462 degrees of freedom
AIC: 1191.5
Number of Fisher Scoring iterations: 5
> probitmodel1 = glm(For.R$Attrition ~ For.R$Age + For.R$Gender +
For.R$JobSatisfaction + For.R$MonthlyIncome + For.R$PercentSalaryHike +
For.R$PerformanceRating + For.R$TotalWorkingYears + For.R$TrainingTimesLastYear +
For.R$YearsAtCompany + For.R$YearsInCurrentRole + For.R$YearsSinceLastPromotion +
For.R$YearsWithCurrManager, family = binomial(link = "probit"))
> summary(probitmodel1)
Call:
glm(formula = For.R$Attrition ~ For.R$Age + For.R$Gender + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$PercentSalaryHike + For.R$PerformanceRating +
For.R$TotalWorkingYears + For.R$TrainingTimesLastYear + For.R$YearsAtCompany +
For.R$YearsInCurrentRole + For.R$YearsSinceLastPromotion +
For.R$YearsWithCurrManager, family = binomial(link = "probit"))
Deviance Residuals:
Min 1Q Median 3Q Max
-3.0559 0.2625 0.4802 0.6611 1.1496
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.776e-01 4.676e-01 -1.021 0.307077
For.R$Age 1.498e-02 6.338e-03 2.364 0.018083 *
For.R$Gender 6.800e-02 8.515e-02 0.799 0.424517
For.R$JobSatisfaction 1.560e-01 3.723e-02 4.190 2.79e-05 ***
For.R$MonthlyIncome 4.129e-05 1.588e-05 2.601 0.009304 **
For.R$PercentSalaryHike 1.960e-02 1.803e-02 1.087 0.276989
For.R$PerformanceRating -1.704e-01 1.823e-01 -0.934 0.350083
For.R$TotalWorkingYears 6.933e-03 1.213e-02 0.572 0.567517
For.R$TrainingTimesLastYear 8.766e-02 3.287e-02 2.667 0.007657 **
For.R$YearsAtCompany -2.839e-02 1.601e-02 -1.773 0.076237 .
For.R$YearsInCurrentRole 7.026e-02 2.045e-02 3.436 0.000591 ***
For.R$YearsSinceLastPromotion -7.068e-02 1.852e-02 -3.816 0.000135 ***
For.R$YearsWithCurrManager 6.796e-02 2.067e-02 3.288 0.001008 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1298.6 on 1469 degrees of freedom
Residual deviance: 1175.3 on 1457 degrees of freedom
AIC: 1201.3
Number of Fisher Scoring iterations: 5
> probitmodel2 = glm(For.R$Attrition ~ For.R$Age + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$TrainingTimesLastYear + For.R$YearsInCurrentRole +
For.R$YearsSinceLastPromotion + For.R$YearsWithCurrManager, family = binomial(link
= "probit"))
> summary(probitmodel2)
Call:
glm(formula = For.R$Attrition ~ For.R$Age + For.R$JobSatisfaction +
For.R$MonthlyIncome + For.R$TrainingTimesLastYear + For.R$YearsInCurrentRole +
For.R$YearsSinceLastPromotion + For.R$YearsWithCurrManager,
family = binomial(link = "probit"))
Deviance Residuals:
Min 1Q Median 3Q Max
-3.2550 0.2755 0.4849 0.6603 1.1435
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -6.874e-01 2.236e-01 -3.075 0.00211 **
For.R$Age 1.629e-02 5.287e-03 3.081 0.00206 **
For.R$JobSatisfaction 1.504e-01 3.698e-02 4.067 4.77e-05 ***
For.R$MonthlyIncome 3.921e-05 1.254e-05 3.127 0.00177 **
For.R$TrainingTimesLastYear 8.561e-02 3.258e-02 2.628 0.00859 **
For.R$YearsInCurrentRole 5.646e-02 1.921e-02 2.939 0.00330 **
For.R$YearsSinceLastPromotion -8.139e-02 1.752e-02 -4.646 3.38e-06 ***
For.R$YearsWithCurrManager 5.304e-02 1.876e-02 2.828 0.00468 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 1298.6 on 1469 degrees of freedom
Residual deviance: 1180.4 on 1462 degrees of freedom
AIC: 1196.4
Number of Fisher Scoring iterations: 5