import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv("automobile.csv")
df.head()
Age Gender Profession Marital_status Education
No_of_Dependents \
0 53 Male Business Married Post Graduate
4
1 53 Femal Salaried Married Post Graduate
4
2 53 Female Salaried Married Post Graduate
3
3 53 Female Salaried Married Graduate
2
4 53 Male Salaried Married Post Graduate
3
Personal_loan House_loan Partner_working Salary Partner_salary \
0 No No Yes 99300 70700.0
1 Yes No Yes 95500 70300.0
2 No No Yes 97300 60700.0
3 Yes No Yes 72500 70300.0
4 No No Yes 79700 60200.0
Total_salary Price Make
0 170000 61000 SUV
1 165800 61000 SUV
2 158000 57000 SUV
3 142800 61000 SUV
4 139900 57000 SUV
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1581 entries, 0 to 1580
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Age 1581 non-null int64
1 Gender 1528 non-null object
2 Profession 1581 non-null object
3 Marital_status 1581 non-null object
4 Education 1581 non-null object
5 No_of_Dependents 1581 non-null int64
6 Personal_loan 1581 non-null object
7 House_loan 1581 non-null object
8 Partner_working 1581 non-null object
9 Salary 1581 non-null int64
10 Partner_salary 1475 non-null float64
11 Total_salary 1581 non-null int64
12 Price 1581 non-null int64
13 Make 1581 non-null object
dtypes: float64(1), int64(5), object(8)
memory usage: 173.1+ KB
df.isnull().sum()
Age 0
Gender 53
Profession 0
Marital_status 0
Education 0
No_of_Dependents 0
Personal_loan 0
House_loan 0
Partner_working 0
Salary 0
Partner_salary 106
Total_salary 0
Price 0
Make 0
dtype: int64
df.duplicated(
)
0 False
1 False
2 False
3 False
4 False
...
1576 False
1577 False
1578 False
1579 False
1580 False
Length: 1581, dtype: bool
df['Gender'].fillna(df['Gender'].mode()[0], inplace=True)
df['Gender'].unique()
array(['Male', 'Femal', 'Female', 'Femle'], dtype=object)
df['Partner_salary']=df['Total_salary']-df['Salary']
df.isnull().sum()
Age 0
Gender 0
Profession 0
Marital_status 0
Education 0
No_of_Dependents 0
Personal_loan 0
House_loan 0
Partner_working 0
Salary 0
Partner_salary 0
Total_salary 0
Price 0
Make 0
dtype: int64
df['Gender']=df['Gender'].replace({'Femal':'Female','Femle':'Female'})
df['Gender'].unique()
array(['Male', 'Female'], dtype=object)
print(df['Gender'].value_counts())
Gender
Male 1252
Female 329
Name: count, dtype: int64
df.describe()
Age No_of_Dependents Salary Partner_salary \
count 1581.000000 1581.000000 1581.000000 1581.000000
mean 31.922201 2.457938 60392.220114 19233.776091
std 8.425978 0.943483 14674.825044 19670.391171
min 22.000000 0.000000 30000.000000 0.000000
25% 25.000000 2.000000 51900.000000 0.000000
50% 29.000000 2.000000 59500.000000 25100.000000
75% 38.000000 3.000000 71800.000000 38100.000000
max 54.000000 4.000000 99300.000000 80500.000000
Total_salary Price
count 1581.000000 1581.000000
mean 79625.996205 35597.722960
std 25545.857768 13633.636545
min 30000.000000 18000.000000
25% 60500.000000 25000.000000
50% 78000.000000 31000.000000
75% 95900.000000 47000.000000
max 171000.000000 70000.000000
df.head()
Age Gender Profession Marital_status Education
No_of_Dependents \
0 53 Male Business Married Post Graduate
4
1 53 Female Salaried Married Post Graduate
4
2 53 Female Salaried Married Post Graduate
3
3 53 Female Salaried Married Graduate
2
4 53 Male Salaried Married Post Graduate
3
Personal_loan House_loan Partner_working Salary Partner_salary \
0 No No Yes 99300 70700
1 Yes No Yes 95500 70300
2 No No Yes 97300 60700
3 Yes No Yes 72500 70300
4 No No Yes 79700 60200
Total_salary Price Make
0 170000 61000 SUV
1 165800 61000 SUV
2 158000 57000 SUV
3 142800 61000 SUV
4 139900 57000 SUV
df.tail()
Age Gender Profession Marital_status Education No_of_Dependents
\
1576 22 Male Salaried Single Graduate 2
1577 22 Male Business Married Graduate 4
1578 22 Male Business Single Graduate 2
1579 22 Male Business Married Graduate 3
1580 22 Male Salaried Married Graduate 4
Personal_loan House_loan Partner_working Salary Partner_salary
\
1576 No Yes No 33300 0
1577 No No No 32000 0
1578 No Yes No 32900 0
1579 Yes Yes No 32200 0
1580 No No No 31600 0
Total_salary Price Make
1576 33300 27000 Hatchback
1577 32000 31000 Hatchback
1578 32900 30000 Hatchback
1579 32200 24000 Hatchback
1580 31600 31000 Hatchback
sns.countplot(data=df,x='Gender',hue='Gender');
plt.title('Bar chat-Gender')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Profession',hue='Profession');
plt.title('Bar chat-Profession')
plt.xlabel('Profession')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Marital_status',hue='Marital_status');
plt.title('Bar chat-Marital_status')
plt.xlabel('Marital_status')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Education',hue='Education');
plt.title('Bar chat-Education')
plt.xlabel('Education')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Personal_loan',hue='Personal_loan',);
plt.title('Bar chat-Personal_loan')
plt.xlabel('Personal_loan')
plt.ylabel('Count')
plt.show()
df['Personal_loan'].value_counts(normalize=True) * 100
Personal_loan
Yes 50.094877
No 49.905123
Name: proportion, dtype: float64
sns.countplot(data=df,x='House_loan',hue='House_loan',);
plt.title('Bar chat-House_loan')
plt.xlabel('House_loan')
plt.ylabel('Count')
plt.show()
df['House_loan'].value_counts(normalize=True) * 100
House_loan
No 66.666667
Yes 33.333333
Name: proportion, dtype: float64
sns.countplot(data=df,x='Partner_working',hue='Partner_working',);
plt.title('Bar chat-Partner_working')
plt.xlabel('Partner_working')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Make',hue='Make',);
plt.title('Bar chat-Vehicle_type')
plt.xlabel('Vehicle_type')
plt.ylabel('Count')
plt.show()
sns.histplot(data=df, x='Age', kde=True);
plt.title('Histogram-Age')
plt.xlabel('Age')
plt.ylabel('Count')
plt.show()
df['Age'].describe()
count 1581.000000
mean 31.922201
std 8.425978
min 22.000000
25% 25.000000
50% 29.000000
75% 38.000000
max 54.000000
Name: Age, dtype: float64
sns.histplot(data=df, x='Salary', kde=True);
plt.title('Histogram-Salary')
plt.xlabel('Salary')
plt.ylabel('Count')
plt.show()
sns.histplot(data=df, x='Partner_salary', kde=True);
plt.title('Histogram-Partner_salary')
plt.xlabel('Partner_salary')
plt.ylabel('Count')
plt.show()
sns.histplot(data=df, x='Total_salary', kde=True);
plt.title('Histogram-Total_salary')
plt.xlabel('Total_salary')
plt.ylabel('Count')
plt.show()
sns.histplot(data=df, x='Price', kde=True);
plt.title('Histogram-Price')
plt.xlabel('Price')
plt.ylabel('Count')
plt.show()
sns.pairplot(data=df);
selected_cols = ['Age', 'No_of_Dependents', 'Salary',
'Partner_salary', 'Total_salary', 'Price']
df_selected = df[selected_cols]
correlation_table = df_selected.corr()
print(correlation_table)
Age No_of_Dependents Salary Partner_salary
\
Age 1.000000 -0.189614 0.616899 0.135702
No_of_Dependents -0.189614 1.000000 -0.031746 0.144320
Salary 0.616899 -0.031746 1.000000 0.087155
Partner_salary 0.135702 0.144320 0.087155 1.000000
Total_salary 0.458869 0.092890 0.641560 0.820069
Price 0.797831 -0.135839 0.409920 0.171875
Total_salary Price
Age 0.458869 0.797831
No_of_Dependents 0.092890 -0.135839
Salary 0.641560 0.409920
Partner_salary 0.820069 0.171875
Total_salary 1.000000 0.367823
Price 0.367823 1.000000
sns.heatmap(data=correlation_table, annot=True, cmap='coolwarm',
fmt='.2f');
plt.title('Correlation Heatmap')
plt.show()
sns.countplot(data=df,x='Gender',hue='Make');
plt.title('Bar chat-Gender')
plt.xlabel('Gender')
plt.ylabel('Count')
plt.show()
sns.countplot(data=df,x='Profession',hue='Make');
plt.title('Bar chat-Gender')
plt.xlabel('Profession')
plt.ylabel('Count')
plt.show()
df['Profession_Gender'] = df['Profession'] + '_' + df['Gender']
sns.countplot(data=df, x='Profession_Gender', hue='Make')
plt.title('Bar chart - Profession and Gender')
plt.xlabel('Profession and Gender')
plt.ylabel('Count')
plt.show()
sns.barplot(data=df, x='Gender', y='Price');
plt.title('Toatl amount spent on vechiles by Gender ')
plt.xlabel('Gender')
plt.ylabel('Price')
plt.show()
sns.barplot(data=df, x='Personal_loan', y='Price');
plt.title('Toatl amount spent on vechiles- by Perseonal Loan ')
plt.xlabel('Personal_loan')
plt.ylabel('Price')
plt.show()
sns.boxplot(data=df, x='Partner_working', y='Price');
plt.title('Influence of Partner woking om purchase of High-priced
cars')
plt.xlabel('Partner_working')
plt.ylabel('Price')
plt.show()