[go: up one dir, main page]

0% found this document useful (0 votes)
4 views26 pages

Python Project - Checkpoint

Uploaded by

anuragsingh0406
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
4 views26 pages

Python Project - Checkpoint

Uploaded by

anuragsingh0406
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 26

import pandas as pd

import matplotlib.pyplot as plt


import seaborn as sns

df=pd.read_csv("automobile.csv")

df.head()

Age Gender Profession Marital_status Education


No_of_Dependents \
0 53 Male Business Married Post Graduate
4
1 53 Femal Salaried Married Post Graduate
4
2 53 Female Salaried Married Post Graduate
3
3 53 Female Salaried Married Graduate
2
4 53 Male Salaried Married Post Graduate
3

Personal_loan House_loan Partner_working Salary Partner_salary \


0 No No Yes 99300 70700.0
1 Yes No Yes 95500 70300.0
2 No No Yes 97300 60700.0
3 Yes No Yes 72500 70300.0
4 No No Yes 79700 60200.0

Total_salary Price Make


0 170000 61000 SUV
1 165800 61000 SUV
2 158000 57000 SUV
3 142800 61000 SUV
4 139900 57000 SUV

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1581 entries, 0 to 1580
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Age 1581 non-null int64
1 Gender 1528 non-null object
2 Profession 1581 non-null object
3 Marital_status 1581 non-null object
4 Education 1581 non-null object
5 No_of_Dependents 1581 non-null int64
6 Personal_loan 1581 non-null object
7 House_loan 1581 non-null object
8 Partner_working 1581 non-null object
9 Salary 1581 non-null int64
10 Partner_salary 1475 non-null float64
11 Total_salary 1581 non-null int64
12 Price 1581 non-null int64
13 Make 1581 non-null object
dtypes: float64(1), int64(5), object(8)
memory usage: 173.1+ KB

df.isnull().sum()

Age 0
Gender 53
Profession 0
Marital_status 0
Education 0
No_of_Dependents 0
Personal_loan 0
House_loan 0
Partner_working 0
Salary 0
Partner_salary 106
Total_salary 0
Price 0
Make 0
dtype: int64

df.duplicated(
)

0 False
1 False
2 False
3 False
4 False
...
1576 False
1577 False
1578 False
1579 False
1580 False
Length: 1581, dtype: bool

df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)

df['Gender'].unique()

array(['Male', 'Femal', 'Female', 'Femle'], dtype=object)

df['Partner_salary']=df['Total_salary']-df['Salary']

df.isnull().sum()
Age 0
Gender 0
Profession 0
Marital_status 0
Education 0
No_of_Dependents 0
Personal_loan 0
House_loan 0
Partner_working 0
Salary 0
Partner_salary 0
Total_salary 0
Price 0
Make 0
dtype: int64

df['Gender']=df['Gender'].replace({'Femal':'Female','Femle':'Female'})

df['Gender'].unique()

array(['Male', 'Female'], dtype=object)

print(df['Gender'].value_counts())

Gender
Male 1252
Female 329
Name: count, dtype: int64

df.describe()

Age No_of_Dependents Salary Partner_salary \


count 1581.000000 1581.000000 1581.000000 1581.000000
mean 31.922201 2.457938 60392.220114 19233.776091
std 8.425978 0.943483 14674.825044 19670.391171
min 22.000000 0.000000 30000.000000 0.000000
25% 25.000000 2.000000 51900.000000 0.000000
50% 29.000000 2.000000 59500.000000 25100.000000
75% 38.000000 3.000000 71800.000000 38100.000000
max 54.000000 4.000000 99300.000000 80500.000000

Total_salary Price
count 1581.000000 1581.000000
mean 79625.996205 35597.722960
std 25545.857768 13633.636545
min 30000.000000 18000.000000
25% 60500.000000 25000.000000
50% 78000.000000 31000.000000
75% 95900.000000 47000.000000
max 171000.000000 70000.000000
df.head()

Age Gender Profession Marital_status Education


No_of_Dependents \
0 53 Male Business Married Post Graduate
4
1 53 Female Salaried Married Post Graduate
4
2 53 Female Salaried Married Post Graduate
3
3 53 Female Salaried Married Graduate
2
4 53 Male Salaried Married Post Graduate
3

Personal_loan House_loan Partner_working Salary Partner_salary \


0 No No Yes 99300 70700
1 Yes No Yes 95500 70300
2 No No Yes 97300 60700
3 Yes No Yes 72500 70300
4 No No Yes 79700 60200

Total_salary Price Make


0 170000 61000 SUV
1 165800 61000 SUV
2 158000 57000 SUV
3 142800 61000 SUV
4 139900 57000 SUV

df.tail()

Age Gender Profession Marital_status Education No_of_Dependents


\
1576 22 Male Salaried Single Graduate 2

1577 22 Male Business Married Graduate 4

1578 22 Male Business Single Graduate 2

1579 22 Male Business Married Graduate 3

1580 22 Male Salaried Married Graduate 4

Personal_loan House_loan Partner_working Salary Partner_salary


\
1576 No Yes No 33300 0

1577 No No No 32000 0

1578 No Yes No 32900 0


1579 Yes Yes No 32200 0

1580 No No No 31600 0

Total_salary Price Make


1576 33300 27000 Hatchback
1577 32000 31000 Hatchback
1578 32900 30000 Hatchback
1579 32200 24000 Hatchback
1580 31600 31000 Hatchback

sns.countplot(data=df,x='Gender',hue='Gender');
plt.title('Bar chat-Gender')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()

sns.countplot(data=df,x='Profession',hue='Profession');
plt.title('Bar chat-Profession')
plt.xlabel('Profession')
plt.ylabel('Count')
plt.show()

sns.countplot(data=df,x='Marital_status',hue='Marital_status');
plt.title('Bar chat-Marital_status')
plt.xlabel('Marital_status')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Education',hue='Education');
plt.title('Bar chat-Education')
plt.xlabel('Education')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Personal_loan',hue='Personal_loan',);
plt.title('Bar chat-Personal_loan')
plt.xlabel('Personal_loan')
plt.ylabel('Count')
plt.show()
df['Personal_loan'].value_counts(normalize=True) * 100

Personal_loan
Yes 50.094877
No 49.905123
Name: proportion, dtype: float64

sns.countplot(data=df,x='House_loan',hue='House_loan',);
plt.title('Bar chat-House_loan')
plt.xlabel('House_loan')
plt.ylabel('Count')
plt.show()
df['House_loan'].value_counts(normalize=True) * 100

House_loan
No 66.666667
Yes 33.333333
Name: proportion, dtype: float64

sns.countplot(data=df,x='Partner_working',hue='Partner_working',);
plt.title('Bar chat-Partner_working')
plt.xlabel('Partner_working')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Make',hue='Make',);
plt.title('Bar chat-Vehicle_type')
plt.xlabel('Vehicle_type')
plt.ylabel('Count')
plt.show()
sns.histplot(data=df, x='Age', kde=True);
plt.title('Histogram-Age')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()
df['Age'].describe()

count 1581.000000
mean 31.922201
std 8.425978
min 22.000000
25% 25.000000
50% 29.000000
75% 38.000000
max 54.000000
Name: Age, dtype: float64

sns.histplot(data=df, x='Salary', kde=True);


plt.title('Histogram-Salary')
plt.xlabel('Salary')
plt.ylabel('Count')
plt.show()
sns.histplot(data=df, x='Partner_salary', kde=True);
plt.title('Histogram-Partner_salary')
plt.xlabel('Partner_salary')
plt.ylabel('Count')
plt.show()
sns.histplot(data=df, x='Total_salary', kde=True);
plt.title('Histogram-Total_salary')
plt.xlabel('Total_salary')
plt.ylabel('Count')
plt.show()
sns.histplot(data=df, x='Price', kde=True);
plt.title('Histogram-Price')
plt.xlabel('Price')
plt.ylabel('Count')
plt.show()
sns.pairplot(data=df);
selected_cols = ['Age', 'No_of_Dependents', 'Salary',
'Partner_salary', 'Total_salary', 'Price']
df_selected = df[selected_cols]
correlation_table = df_selected.corr()
print(correlation_table)

Age No_of_Dependents Salary Partner_salary


\
Age 1.000000 -0.189614 0.616899 0.135702

No_of_Dependents -0.189614 1.000000 -0.031746 0.144320


Salary 0.616899 -0.031746 1.000000 0.087155

Partner_salary 0.135702 0.144320 0.087155 1.000000

Total_salary 0.458869 0.092890 0.641560 0.820069

Price 0.797831 -0.135839 0.409920 0.171875

Total_salary Price
Age 0.458869 0.797831
No_of_Dependents 0.092890 -0.135839
Salary 0.641560 0.409920
Partner_salary 0.820069 0.171875
Total_salary 1.000000 0.367823
Price 0.367823 1.000000

sns.heatmap(data=correlation_table, annot=True, cmap='coolwarm',


fmt='.2f');
plt.title('Correlation Heatmap')
plt.show()
sns.countplot(data=df,x='Gender',hue='Make');
plt.title('Bar chat-Gender')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Profession',hue='Make');
plt.title('Bar chat-Gender')
plt.xlabel('Profession')
plt.ylabel('Count')
plt.show()
df['Profession_Gender'] = df['Profession'] + '_' + df['Gender']
sns.countplot(data=df, x='Profession_Gender', hue='Make')
plt.title('Bar chart - Profession and Gender')
plt.xlabel('Profession and Gender')
plt.ylabel('Count')
plt.show()
sns.barplot(data=df, x='Gender', y='Price');
plt.title('Toatl amount spent on vechiles by Gender ')
plt.xlabel('Gender')
plt.ylabel('Price')
plt.show()
sns.barplot(data=df, x='Personal_loan', y='Price');
plt.title('Toatl amount spent on vechiles- by Perseonal Loan ')
plt.xlabel('Personal_loan')
plt.ylabel('Price')
plt.show()
sns.boxplot(data=df, x='Partner_working', y='Price');
plt.title('Influence of Partner woking om purchase of High-priced
cars')
plt.xlabel('Partner_working')
plt.ylabel('Price')
plt.show()

You might also like