9/23/24, 10:21 AM pandas - Jupyter Notebook
In [1]: import numpy as np
In [6]: arr=np.array([1,2,3,4])
arr
Out[6]: array([1, 2, 3, 4])
In [4]: arr1=arr.reshape(2,2)
In [7]: arr1
arr1.ndim
Out[7]: 2
In [8]: arr1.mean()
Out[8]: 2.5
In [20]: import pandas as pd
In [23]: df=pd.read_csv("D:\myFolder\SKCET\cricket.csv")
In [24]: df
Out[24]: player_name score country
0 Rohit Sharma 264 india
1 Martin Guptill 237 NaN
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
5 Sachin Tendulkar 200 india
6 Charles Coventry 194 NaN
7 Saeed Anwar 194 pak
8 Sir Viv Richards 189 wind
9 Sanath Jayasuriya 189 NaN
10 Martin Guptill 189 NaN
11 Gary Kirsten 188 SA
12 Sachin Tendulkar 186 india
13 Shane Watson 185 Aus
14 Virat Kohli 183 india
15 Sourav Ganguly 183 NaN
16 MS Dhoni 183 india
localhost:8888/notebooks/pandas.ipynb# 1/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [38]: df=pd.read_csv("C://Users/naren/AppData/Local/Programs/Python/Python310/cri
In [35]: df
Out[35]: player_name score country
0 Rohit Sharma 264 india
1 Martin Guptill 237 NaN
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
5 Sachin Tendulkar 200 india
6 Charles Coventry 194 NaN
7 Saeed Anwar 194 pak
8 Sir Viv Richards 189 wind
9 Sanath Jayasuriya 189 NaN
10 Martin Guptill 189 NaN
11 Gary Kirsten 188 SA
12 Sachin Tendulkar 186 india
13 Shane Watson 185 Aus
14 Virat Kohli 183 india
15 Sourav Ganguly 183 NaN
16 MS Dhoni 183 india
In [39]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 player_name 17 non-null object
1 score 17 non-null int64
2 country 12 non-null object
dtypes: int64(1), object(2)
memory usage: 536.0+ bytes
In [40]: df.to_string()
Out[40]: ' player_name score country\n0 Rohit Sharma 264 indi
a\n1 Martin Guptill 237 NaN\n2 Virender Sehwag 219 in
dia\n3 Chris Gayle 215 wind\n4 Rohit Sharma 209
india\n5 Sachin Tendulkar 200 india\n6 Charles Coventry 194
NaN\n7 Saeed Anwar 194 pak\n8 Sir Viv Richards 189
wind\n9 Sanath Jayasuriya 189 NaN\n10 Martin Guptill 189
NaN\n11 Gary Kirsten 188 SA\n12 Sachin Tendulkar 186
india\n13 Shane Watson 185 Aus\n14 Virat Kohli 183
india\n15 Sourav Ganguly 183 NaN\n16 MS Dhoni 183
india'
localhost:8888/notebooks/pandas.ipynb# 2/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [41]: df.to_string
Out[41]: <bound method DataFrame.to_string of player_name score country
0 Rohit Sharma 264 india
1 Martin Guptill 237 NaN
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
5 Sachin Tendulkar 200 india
6 Charles Coventry 194 NaN
7 Saeed Anwar 194 pak
8 Sir Viv Richards 189 wind
9 Sanath Jayasuriya 189 NaN
10 Martin Guptill 189 NaN
11 Gary Kirsten 188 SA
12 Sachin Tendulkar 186 india
13 Shane Watson 185 Aus
14 Virat Kohli 183 india
15 Sourav Ganguly 183 NaN
16 MS Dhoni 183 india>
In [42]: df.loc[0]
Out[42]: player_name Rohit Sharma
score 264
country india
Name: 0, dtype: object
In [43]: df.head()
Out[43]: player_name score country
0 Rohit Sharma 264 india
1 Martin Guptill 237 NaN
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
In [44]: df.tail()
Out[44]: player_name score country
12 Sachin Tendulkar 186 india
13 Shane Watson 185 Aus
14 Virat Kohli 183 india
15 Sourav Ganguly 183 NaN
16 MS Dhoni 183 india
localhost:8888/notebooks/pandas.ipynb# 3/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [45]: df.mean()
C:\Users\naren\AppData\Local\Temp/ipykernel_18348/3698961737.py:1: FutureW
arning: Dropping of nuisance columns in DataFrame reductions (with 'numeri
c_only=None') is deprecated; in a future version this will raise TypeErro
r. Select only valid columns before calling the reduction.
df.mean()
Out[45]: score 200.411765
dtype: float64
In [46]: df['score'].mean()
Out[46]: 200.41176470588235
In [47]: df.dropna()
Out[47]: player_name score country
0 Rohit Sharma 264 india
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
5 Sachin Tendulkar 200 india
7 Saeed Anwar 194 pak
8 Sir Viv Richards 189 wind
11 Gary Kirsten 188 SA
12 Sachin Tendulkar 186 india
13 Shane Watson 185 Aus
14 Virat Kohli 183 india
16 MS Dhoni 183 india
In [48]: import pandas as pd
In [49]: data={"player_name":["sachin","dravid"], "score":[85,102]}
In [50]: data
Out[50]: {'player_name': ['sachin', 'dravid'], 'score': [85, 102]}
In [51]: df=pd.DataFrame(data)
localhost:8888/notebooks/pandas.ipynb# 4/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [52]: df
Out[52]: player_name score
0 sachin 85
1 dravid 102
In [55]: df=pd.read_csv("C://Users/naren/AppData/Local/Programs/Python/Python310/cri
In [56]: df
Out[56]: player_name score country
0 Rohit Sharma 264 india
1 Martin Guptill 237 NaN
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
5 Sachin Tendulkar 200 india
6 Charles Coventry 194 NaN
7 Saeed Anwar 194 pak
8 Sir Viv Richards 189 wind
9 Sanath Jayasuriya 189 NaN
10 Martin Guptill 189 NaN
11 Gary Kirsten 188 SA
12 Sachin Tendulkar 186 india
13 Shane Watson 185 Aus
14 Virat Kohli 183 india
15 Sourav Ganguly 183 NaN
16 MS Dhoni 183 india
In [57]: df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17 entries, 0 to 16
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 player_name 17 non-null object
1 score 17 non-null int64
2 country 12 non-null object
dtypes: int64(1), object(2)
memory usage: 536.0+ bytes
localhost:8888/notebooks/pandas.ipynb# 5/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [58]: df.dropna()
Out[58]: player_name score country
0 Rohit Sharma 264 india
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
5 Sachin Tendulkar 200 india
7 Saeed Anwar 194 pak
8 Sir Viv Richards 189 wind
11 Gary Kirsten 188 SA
12 Sachin Tendulkar 186 india
13 Shane Watson 185 Aus
14 Virat Kohli 183 india
16 MS Dhoni 183 india
In [59]: df.head()
Out[59]: player_name score country
0 Rohit Sharma 264 india
1 Martin Guptill 237 NaN
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
In [61]: df['score'].median()
Out[61]: 189.0
localhost:8888/notebooks/pandas.ipynb# 6/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [63]: df.fillna("NK")
Out[63]: player_name score country
0 Rohit Sharma 264 india
1 Martin Guptill 237 NK
2 Virender Sehwag 219 india
3 Chris Gayle 215 wind
4 Rohit Sharma 209 india
5 Sachin Tendulkar 200 india
6 Charles Coventry 194 NK
7 Saeed Anwar 194 pak
8 Sir Viv Richards 189 wind
9 Sanath Jayasuriya 189 NK
10 Martin Guptill 189 NK
11 Gary Kirsten 188 SA
12 Sachin Tendulkar 186 india
13 Shane Watson 185 Aus
14 Virat Kohli 183 india
15 Sourav Ganguly 183 NK
16 MS Dhoni 183 india
In [1]: readDF=pd.read_csv("C:\Users\naren\AppData\Local\Programs\Python\Python311\
File "C:\Users\naren\AppData\Local\Temp/ipykernel_35668/930507820.py", l
ine 1
readDF=pd.read_csv("C:\Users\naren\AppData\Local\Programs\Python\Pytho
n311\Data.csv")
^
SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in p
osition 2-3: truncated \UXXXXXXXX escape
In [4]: import pandas as pd
readDF=pd.read_csv("C://Users/naren/AppData/Local/Programs/Python/Python311
localhost:8888/notebooks/pandas.ipynb# 7/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [5]: readDF
Out[5]: Country Age Salary Purchased
0 France 44.0 72000.0 No
1 Spain 27.0 48000.0 Yes
2 Germany 30.0 54000.0 No
3 Spain 38.0 61000.0 No
4 Germany 40.0 NaN Yes
5 France 35.0 58000.0 Yes
6 Spain NaN 52000.0 No
7 France 48.0 79000.0 NaN
8 Germany 50.0 83000.0 No
9 France 37.0 67000.0 Yes
10 France 35.0 58000.0 Yes
11 Spain NaN 52000.0 No
12 France 34.0 79000.0 Yes
In [8]: data
--------------------------------------------------------------------------
-
TypeError Traceback (most recent call las
t)
~\AppData\Local\Temp/ipykernel_35668/2092712167.py in <module>
----> 1 pd['Country']
TypeError: 'module' object is not subscriptable
In [9]: readDF.loc[0]
Out[9]: Country France
Age 44.0
Salary 72000.0
Purchased No
Name: 0, dtype: object
In [10]: readDF.loc[0:4]
Out[10]: Country Age Salary Purchased
0 France 44.0 72000.0 No
1 Spain 27.0 48000.0 Yes
2 Germany 30.0 54000.0 No
3 Spain 38.0 61000.0 No
4 Germany 40.0 NaN Yes
localhost:8888/notebooks/pandas.ipynb# 8/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [11]: readDF.head(6)
Out[11]: Country Age Salary Purchased
0 France 44.0 72000.0 No
1 Spain 27.0 48000.0 Yes
2 Germany 30.0 54000.0 No
3 Spain 38.0 61000.0 No
4 Germany 40.0 NaN Yes
5 France 35.0 58000.0 Yes
In [12]: readDF.head()
Out[12]: Country Age Salary Purchased
0 France 44.0 72000.0 No
1 Spain 27.0 48000.0 Yes
2 Germany 30.0 54000.0 No
3 Spain 38.0 61000.0 No
4 Germany 40.0 NaN Yes
In [13]: readDF.tail(6)
Out[13]: Country Age Salary Purchased
7 France 48.0 79000.0 NaN
8 Germany 50.0 83000.0 No
9 France 37.0 67000.0 Yes
10 France 35.0 58000.0 Yes
11 Spain NaN 52000.0 No
12 France 34.0 79000.0 Yes
In [14]: readDF.tail()
Out[14]: Country Age Salary Purchased
8 Germany 50.0 83000.0 No
9 France 37.0 67000.0 Yes
10 France 35.0 58000.0 Yes
11 Spain NaN 52000.0 No
12 France 34.0 79000.0 Yes
localhost:8888/notebooks/pandas.ipynb# 9/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [15]: readDF.mean()
C:\Users\naren\AppData\Local\Temp/ipykernel_35668/1770479177.py:1: FutureW
arning: Dropping of nuisance columns in DataFrame reductions (with 'numeri
c_only=None') is deprecated; in a future version this will raise TypeErro
r. Select only valid columns before calling the reduction.
readDF.mean()
Out[15]: Age 38.000000
Salary 63583.333333
dtype: float64
In [16]: M=readDF['Age'].mean()
Out[16]: 38.0
In [18]: readDF.fillna(readDF['Age'].mean())
Out[18]: Country Age Salary Purchased
0 France 44.0 72000.0 No
1 Spain 27.0 48000.0 Yes
2 Germany 30.0 54000.0 No
3 Spain 38.0 61000.0 No
4 Germany 40.0 38.0 Yes
5 France 35.0 58000.0 Yes
6 Spain 38.0 52000.0 No
7 France 48.0 79000.0 38.0
8 Germany 50.0 83000.0 No
9 France 37.0 67000.0 Yes
10 France 35.0 58000.0 Yes
11 Spain 38.0 52000.0 No
12 France 34.0 79000.0 Yes
localhost:8888/notebooks/pandas.ipynb# 10/11
9/23/24, 10:21 AM pandas - Jupyter Notebook
In [19]: readDF
Out[19]: Country Age Salary Purchased
0 France 44.0 72000.0 No
1 Spain 27.0 48000.0 Yes
2 Germany 30.0 54000.0 No
3 Spain 38.0 61000.0 No
4 Germany 40.0 NaN Yes
5 France 35.0 58000.0 Yes
6 Spain NaN 52000.0 No
7 France 48.0 79000.0 NaN
8 Germany 50.0 83000.0 No
9 France 37.0 67000.0 Yes
10 France 35.0 58000.0 Yes
11 Spain NaN 52000.0 No
12 France 34.0 79000.0 Yes
In [ ]:
localhost:8888/notebooks/pandas.ipynb# 11/11