3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.
23 - Jupyter Notebook
In [2]:
1 from random import randint
2 for i in range(10):
3 print(randint(1,44))
38
1
5
32
2
33
37
35
12
16
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 1/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [3]:
1 import pandas as pd
2 df=pd.read_csv('IPL_Matches_2008_2022.csv')
3 df
Out[3]:
ID City Date Season MatchNumber Team1 Team2 Ven
Narend
2022- Rajasthan Gujarat Mo
0 1312200 Ahmedabad 2022 Final
05-29 Royals Titans Stadiu
Ahmedab
Narend
Royal
2022- Rajasthan Mo
1 1312199 Ahmedabad 2022 Qualifier 2 Challengers
05-27 Royals Stadiu
Bangalore
Ahmedab
Royal Lucknow Ed
2022-
2 1312198 Kolkata 2022 Eliminator Challengers Super Garden
05-25
Bangalore Giants Kolka
Ed
2022- Rajasthan Gujarat
3 1312197 Kolkata 2022 Qualifier 1 Garden
05-24 Royals Titans
Kolka
Wankhe
2022- Sunrisers Punjab
4 1304116 Mumbai 2022 70 Stadiu
05-22 Hyderabad Kings
Mumb
... ... ... ... ... ... ... ...
Kolkata
2008- Deccan Ed
945 335986 Kolkata 2007/08 4 Knight
04-20 Chargers Garde
Riders
Royal
2008- Mumbai Wankhe
946 335985 Mumbai 2007/08 5 Challengers
04-20 Indians Stadiu
Bangalore
2008- Delhi Rajasthan Feroz Sh
947 335984 Delhi 2007/08 3
04-19 Daredevils Royals Ko
Punj
Chennai Crick
2008- Kings XI
948 335983 Chandigarh 2007/08 2 Super Associati
04-19 Punjab
Kings Stadiu
Moh
Royal Kolkata
2008-
949 335982 Bangalore 2007/08 1 Challengers Knight Chinnaswam
04-18
Bangalore Riders Stadiu
950 rows × 20 columns
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 2/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [4]:
1 df.columns
Out[4]:
Index(['ID', 'City', 'Date', 'Season', 'MatchNumber', 'Team1', 'Team2',
'Venue', 'TossWinner', 'TossDecision', 'SuperOver', 'WinningTeam',
'WonBy', 'Margin', 'method', 'Player_of_Match', 'Team1Players',
'Team2Players', 'Umpire1', 'Umpire2'],
dtype='object')
In [5]:
1 df.isnull().sum()
Out[5]:
ID 0
City 51
Date 0
Season 0
MatchNumber 0
Team1 0
Team2 0
Venue 0
TossWinner 0
TossDecision 0
SuperOver 4
WinningTeam 4
WonBy 0
Margin 18
method 931
Player_of_Match 4
Team1Players 0
Team2Players 0
Umpire1 0
Umpire2 0
dtype: int64
In [6]:
1 df['method']=df['method'].fillna('NA')
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 3/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [7]:
1 df.isnull().sum()
Out[7]:
ID 0
City 51
Date 0
Season 0
MatchNumber 0
Team1 0
Team2 0
Venue 0
TossWinner 0
TossDecision 0
SuperOver 4
WinningTeam 4
WonBy 0
Margin 18
method 0
Player_of_Match 4
Team1Players 0
Team2Players 0
Umpire1 0
Umpire2 0
dtype: int64
In [9]:
1 df['SuperOver'].isnull()
Out[9]:
0 False
1 False
2 False
3 False
4 False
...
945 False
946 False
947 False
948 False
949 False
Name: SuperOver, Length: 950, dtype: bool
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 4/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [10]:
1 df[df['SuperOver'].isnull()]
Out[10]:
ID City Date Season MatchNumber Team1 Team2 Venu
Royal
2019- Rajasthan M.Chinnaswam
205 1178424 Bengaluru 2019 49 Challengers
04-30 Royals Stadium
Bangalore
Royal
2015- Delhi M Chinnaswam
437 829813 Bangalore 2015 55 Challengers
05-17 Daredevils Stadium
Bangalore
Royal
2015- Rajasthan M Chinnaswam
464 829763 Bangalore 2015 29 Challengers
04-29 Royals Stadium
Bangalore
2011- Delhi Pune Feroz Sha
708 501265 Delhi 2011 68
05-21 Daredevils Warriors Kot
In [11]:
1 df=df.drop(df[df['SuperOver'].isnull()].index)
In [12]:
1 df.isnull().sum()
Out[12]:
ID 0
City 51
Date 0
Season 0
MatchNumber 0
Team1 0
Team2 0
Venue 0
TossWinner 0
TossDecision 0
SuperOver 0
WinningTeam 0
WonBy 0
Margin 14
method 0
Player_of_Match 0
Team1Players 0
Team2Players 0
Umpire1 0
Umpire2 0
dtype: int64
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 5/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [14]:
1 df['City']=df['City'].fillna(df['Venue'])
In [15]:
1 df.isnull().sum()
Out[15]:
ID 0
City 0
Date 0
Season 0
MatchNumber 0
Team1 0
Team2 0
Venue 0
TossWinner 0
TossDecision 0
SuperOver 0
WinningTeam 0
WonBy 0
Margin 14
method 0
Player_of_Match 0
Team1Players 0
Team2Players 0
Umpire1 0
Umpire2 0
dtype: int64
In [16]:
1 df['Margin']=df['Margin'].fillna(df['Margin'].mean())
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 6/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [17]:
1 df.isnull().sum()
Out[17]:
ID 0
City 0
Date 0
Season 0
MatchNumber 0
Team1 0
Team2 0
Venue 0
TossWinner 0
TossDecision 0
SuperOver 0
WinningTeam 0
WonBy 0
Margin 0
method 0
Player_of_Match 0
Team1Players 0
Team2Players 0
Umpire1 0
Umpire2 0
dtype: int64
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 7/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [18]:
1 import plotly.express as ex
2 ex.box(data_frame=df,x='Margin')
In [19]:
1 import pandas as pd
2 cars=pd.read_csv('car data.csv')
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 8/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [20]:
1 cars
Out[20]:
Car_Name Year Selling_Price Present_Price Kms_Driven Fuel_Type Seller_Type Tra
0 ritz 2014 3.35 5.59 27000 Petrol Dealer
1 sx4 2013 4.75 9.54 43000 Diesel Dealer
2 ciaz 2017 7.25 9.85 6900 Petrol Dealer
3 wagon r 2011 2.85 4.15 5200 Petrol Dealer
4 swift 2014 4.60 6.87 42450 Diesel Dealer
... ... ... ... ... ... ... ...
296 city 2016 9.50 11.60 33988 Diesel Dealer
297 brio 2015 4.00 5.90 60000 Petrol Dealer
298 city 2009 3.35 11.00 87934 Petrol Dealer
299 city 2017 11.50 12.50 9000 Diesel Dealer
300 brio 2016 5.30 5.90 5464 Petrol Dealer
301 rows × 9 columns
1 cars['Kms_Driven']/10000
In [22]:
1 cars
Out[22]:
Car_Name Year Selling_Price Present_Price Kms_Driven Fuel_Type Seller_Type Tra
0 ritz 2014 3.35 5.59 27000 Petrol Dealer
1 sx4 2013 4.75 9.54 43000 Diesel Dealer
2 ciaz 2017 7.25 9.85 6900 Petrol Dealer
3 wagon r 2011 2.85 4.15 5200 Petrol Dealer
4 swift 2014 4.60 6.87 42450 Diesel Dealer
... ... ... ... ... ... ... ...
296 city 2016 9.50 11.60 33988 Diesel Dealer
297 brio 2015 4.00 5.90 60000 Petrol Dealer
298 city 2009 3.35 11.00 87934 Petrol Dealer
299 city 2017 11.50 12.50 9000 Diesel Dealer
300 brio 2016 5.30 5.90 5464 Petrol Dealer
301 rows × 9 columns
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 9/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [24]:
1 newmax=10
2 newmin=1
3 oldmin,oldmax=min(cars['Kms_Driven']),max(cars['Kms_Driven'])
In [25]:
1 cars.shape
Out[25]:
(301, 9)
In [30]:
1 X=[]
2 for i in range(cars.shape[0]):
3 new_x=(cars['Kms_Driven'][i]-oldmin)/(oldmax-oldmin)*(newmax-newmin)+newmin
4 X.append(new_x)
5 X
1.6306306306306306,
1.7376936936936938,
1.4414414414414414,
1.0342342342342343,
1.8918918918918919,
1.8068468468468468,
2.0158378378378377,
1.3513513513513513,
1.9844684684684686,
1.2828828828828829,
1.7935495495495495,
1.8018018018018018,
1.9178198198198197,
1.9675675675675675,
1.6936936936936937,
1.8018018018018018,
1.8018018018018018,
1.8918558558558558,
1.8696756756756756,
3.2792792792792795,
In [31]:
1 cars['New_Kms_Driven']=X
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 10/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [32]:
1 cars
Out[32]:
Car_Name Year Selling_Price Present_Price Kms_Driven Fuel_Type Seller_Type Tra
0 ritz 2014 3.35 5.59 27000 Petrol Dealer
1 sx4 2013 4.75 9.54 43000 Diesel Dealer
2 ciaz 2017 7.25 9.85 6900 Petrol Dealer
3 wagon r 2011 2.85 4.15 5200 Petrol Dealer
4 swift 2014 4.60 6.87 42450 Diesel Dealer
... ... ... ... ... ... ... ...
296 city 2016 9.50 11.60 33988 Diesel Dealer
297 brio 2015 4.00 5.90 60000 Petrol Dealer
298 city 2009 3.35 11.00 87934 Petrol Dealer
299 city 2017 11.50 12.50 9000 Diesel Dealer
300 brio 2016 5.30 5.90 5464 Petrol Dealer
301 rows × 10 columns
In [33]:
1 min(cars['New_Kms_Driven']),max(cars['New_Kms_Driven'])
Out[33]:
(1.0, 10.0)
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 11/12
3/23/23, 2:01 PM BCA II Year B Section Session 03 - 23.03.23 - Jupyter Notebook
In [34]:
1 cars.describe()
Out[34]:
Year Selling_Price Present_Price Kms_Driven Owner New_Kms_Drive
count 301.000000 301.000000 301.000000 301.000000 301.000000 301.00000
mean 2013.627907 4.661296 7.628472 36947.205980 0.043189 1.65670
std 2.891554 5.082812 8.644115 38886.883882 0.247915 0.70066
min 2003.000000 0.100000 0.320000 500.000000 0.000000 1.00000
25% 2012.000000 0.900000 1.200000 15000.000000 0.000000 1.26126
50% 2014.000000 3.600000 6.400000 32000.000000 0.000000 1.56756
75% 2016.000000 6.000000 9.900000 48767.000000 0.000000 1.86967
max 2018.000000 35.000000 92.600000 500000.000000 3.000000 10.00000
In [ ]:
1 X=[]
2 Mean=36947.205980
3 STD=38886.883882
4 for i in range(cars.shape[0]):
5 new_x=
6 X.append(new_x)
7 X
localhost:8888/notebooks/BCA II Year B Section Session 03 - 23.03.23.ipynb 12/12