[go: up one dir, main page]

0% found this document useful (0 votes)
31 views6 pages

Untitled 21

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
31 views6 pages

Untitled 21

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 6

import pandas as pd

import numpy as np
import seaborn as sn
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

import numpy as np # linear algebra


import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/"


directory
# For example, running this (by clicking run or pressing Shift+Enter)
will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))

df=pd.read_csv("/content/auto1.csv")

df.head()

symboling normalized-losses make fuel-type aspiration \


0 3 NaN alfa-romero gas std
1 3 NaN alfa-romero gas std
2 1 NaN alfa-romero gas std
3 2 164.0 audi gas std
4 2 164.0 audi gas std

num-of-doors body-style drive-wheels engine-location wheel-


base ... \
0 two convertible rwd front
88.6 ...
1 two convertible rwd front
88.6 ...
2 two hatchback rwd front
94.5 ...
3 four sedan fwd front
99.8 ...
4 four sedan 4wd front
99.4 ...

engine-size fuel-system bore stroke compression-ratio horsepower


\
0 130 mpfi 3.47 2.68 9.0 111.0

1 130 mpfi 3.47 2.68 9.0 111.0

2 152 mpfi 2.68 3.47 9.0 154.0


3 109 mpfi 3.19 3.40 10.0 102.0

4 136 mpfi 3.19 3.40 8.0 115.0

peak-rpm city-mpg highway-mpg price


0 5000.0 21 27 13495.0
1 5000.0 21 27 16500.0
2 5000.0 19 26 16500.0
3 5500.0 24 30 13950.0
4 5500.0 18 22 17450.0

[5 rows x 26 columns]

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 205 entries, 0 to 204
Data columns (total 26 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 symboling 205 non-null int64
1 normalized-losses 164 non-null float64
2 make 205 non-null object
3 fuel-type 205 non-null object
4 aspiration 205 non-null object
5 num-of-doors 203 non-null object
6 body-style 205 non-null object
7 drive-wheels 205 non-null object
8 engine-location 205 non-null object
9 wheel-base 205 non-null float64
10 length 205 non-null float64
11 width 205 non-null float64
12 height 205 non-null float64
13 curb-weight 205 non-null int64
14 engine-type 205 non-null object
15 num-of-cylinders 205 non-null object
16 engine-size 205 non-null int64
17 fuel-system 205 non-null object
18 bore 201 non-null float64
19 stroke 201 non-null float64
20 compression-ratio 205 non-null float64
21 horsepower 203 non-null float64
22 peak-rpm 203 non-null float64
23 city-mpg 205 non-null int64
24 highway-mpg 205 non-null int64
25 price 201 non-null float64
dtypes: float64(11), int64(5), object(10)
memory usage: 41.8+ KB
df.replace("?", np.nan, inplace = True)
df.to_csv("auto1.csv", index=False)

df2=pd.read_csv('auto1.csv')

df2.head(10)

symboling normalized-losses make fuel-type aspiration \


0 3 NaN alfa-romero gas std
1 3 NaN alfa-romero gas std
2 1 NaN alfa-romero gas std
3 2 164.0 audi gas std
4 2 164.0 audi gas std
5 2 NaN audi gas std
6 1 158.0 audi gas std
7 1 NaN audi gas std
8 1 158.0 audi gas turbo
9 0 NaN audi gas turbo

num-of-doors body-style drive-wheels engine-location wheel-


base ... \
0 two convertible rwd front
88.6 ...
1 two convertible rwd front
88.6 ...
2 two hatchback rwd front
94.5 ...
3 four sedan fwd front
99.8 ...
4 four sedan 4wd front
99.4 ...
5 two sedan fwd front
99.8 ...
6 four sedan fwd front
105.8 ...
7 four wagon fwd front
105.8 ...
8 four sedan fwd front
105.8 ...
9 two hatchback 4wd front
99.5 ...

engine-size fuel-system bore stroke compression-ratio horsepower


\
0 130 mpfi 3.47 2.68 9.0 111.0

1 130 mpfi 3.47 2.68 9.0 111.0

2 152 mpfi 2.68 3.47 9.0 154.0


3 109 mpfi 3.19 3.40 10.0 102.0

4 136 mpfi 3.19 3.40 8.0 115.0

5 136 mpfi 3.19 3.40 8.5 110.0

6 136 mpfi 3.19 3.40 8.5 110.0

7 136 mpfi 3.19 3.40 8.5 110.0

8 131 mpfi 3.13 3.40 8.3 140.0

9 131 mpfi 3.13 3.40 7.0 160.0

peak-rpm city-mpg highway-mpg price


0 5000.0 21 27 13495.0
1 5000.0 21 27 16500.0
2 5000.0 19 26 16500.0
3 5500.0 24 30 13950.0
4 5500.0 18 22 17450.0
5 5500.0 19 25 15250.0
6 5500.0 19 25 17710.0
7 5500.0 19 25 18920.0
8 5500.0 17 20 23875.0
9 5500.0 16 22 NaN

[10 rows x 26 columns]

df2.isnull().sum()

symboling 0
normalized-losses 41
make 0
fuel-type 0
aspiration 0
num-of-doors 2
body-style 0
drive-wheels 0
engine-location 0
wheel-base 0
length 0
width 0
height 0
curb-weight 0
engine-type 0
num-of-cylinders 0
engine-size 0
fuel-system 0
bore 4
stroke 4
compression-ratio 0
horsepower 2
peak-rpm 2
city-mpg 0
highway-mpg 0
price 4
dtype: int64

a_n = df2["normalized-losses"].astype("float").median(axis=0)
print("Average of normalized-losses: ", a_n)

a_b = df2['bore'].astype('float').median(axis=0)
print("Average of bore: ", a_b)

a_s = df2["stroke"].astype("float").median(axis = 0)
print("Average of stroke:", a_s)

a_h = df2['horsepower'].astype('float').median(axis=0)
print("Average horsepower:", a_h)

a_p = df2['peak-rpm'].astype('float').median(axis=0)
print("Average peak rpm:", a_p)

Average of normalized-losses: 115.0


Average of bore: 3.31
Average of stroke: 3.29
Average horsepower: 95.0
Average peak rpm: 5200.0

df2["normalized-losses"].replace(np.nan, a_n, inplace=True)


df2["stroke"].replace(np.nan, a_s, inplace = True)
df2["bore"].replace(np.nan, a_b, inplace=True)
df2['horsepower'].replace(np.nan, a_h, inplace=True)
df2['peak-rpm'].replace(np.nan, a_p, inplace=True)
df2[df2['num-of-doors'].isna()]

symboling normalized-losses make fuel-type aspiration num-of-


doors \
27 1 148.0 dodge gas turbo
NaN
63 0 115.0 mazda diesel std
NaN

body-style drive-wheels engine-location wheel-base ... engine-


size \
27 sedan fwd front 93.7 ...
98
63 sedan fwd front 98.8 ...
122
fuel-system bore stroke compression-ratio horsepower peak-rpm
city-mpg \
27 mpfi 3.03 3.39 7.6 102.0 5500.0
24
63 idi 3.39 3.39 22.7 64.0 4650.0
36

highway-mpg price
27 30 8558.0
63 42 10795.0

[2 rows x 26 columns]

You might also like