Empty DataFrame
import pandas as pd
dFrameEmt=pd.DataFrame()
print(dFrameEmt)
Empty DataFrame
Columns: [ ]
Index: [ ]
Data Frame from Numpy ndarrays
import numpy as np
array1= np.array([10,20,30])
array2=np.array([100,200,300])
array3= np.array([-10,-20,-30,-40])
dFrame=pd.DataFrame(array1)
print(dFrame)
dFrame2=pd.DataFrame([array1,array3,array2])
print(dFrame2)
0
0 10
1 20
2 30
0 1 2 3
0 10 20 30 NaN
1 -10 -20 -30 -40.0
2 100 200 300 NaN
Giving Index And Columns
dFrame3=pd.DataFrame([10,20,30],index=[1,2,3],columns=['A'])
print(dFrame3)
A
1 10
2 20
3 30
dFrame3=pd.DataFrame([10,20,30],[100,200,300])
print(dFrame3)
0
100 10
200 20
300 30
Creating Data Frame From List
import pandas as pd
list1=[12,14,16,18,20]
dFrame4=pd.DataFrame(list1)
print(dFrame4)
0
0 12
1 14
2 16
3 18
4 20
list1=[12,14,16,18,20]
list2=[13,15,17,19,21]
dFrame5=pd.DataFrame([list1,list2])
print(dFrame5)
0 1 2 3 4
0 12 14 16 18 20
1 13 15 17 19 21
list1=[12,14,16,18,20]
list2=[13,15,17,19,21]
dFrame5=pd.DataFrame([list1,list2],[1,2],['A','B','C','D','E'])
print(dFrame5)
A B C D E
1 12 14 16 18 20
2 13 15 17 19 21
Creating Data Frame From Dictionary
import pandas as pd
dict1=[{'a':10,'b':20,'c':30,'d':40}]
dFrame6=pd.DataFrame(dict1)
print(dFrame6)
a b c d
0 10 20 30 40
dict1={'a':10,'b':20,'c':30,'d':40}
dict2={'a':20,'e':30,'f':50,'b':10,'c':5}
dFrame7=pd.DataFrame([dict1,dict2])
print(dFrame7)
a b c d e f
0 10 20 30 40.0 NaN NaN
1 20 10 5 NaN 30.0 50.0
List of Dictionaries
import pandas as pd
listdict=[{'a':2,'b':4},{'a':1,'b':3,'c':5}]
dFrame8=pd.DataFrame(listdict)
print(dFrame8)
a b c
0 2 4 NaN
1 1 3 5.0
Dictionary of Lists
dictForest={'State':['Assam','Delhi','Kerala'],'GArea':[78438,1483,38852],'VDF':[27
97,6.72,1663]}
dFrame9=pd.DataFrame(dictForest)
print(dFrame9)
State GArea VDF
0 Assam 78438 2797.00
1 Delhi 1483 6.72
2 Kerala 38852 1663.00
Changing sequence of columns
dFrame9a=pd.DataFrame(dictForest,columns=['State','VDF','GArea'])
print(dFrame9a)
State VDF GArea
0 Assam 2797.00 78438
1 Delhi 6.72 1483
2 Kerala 1663.00 38852
Data Frame from Series
S1=pd.Series ([2,4,6,8,10], index=[1,2,3,4,5])
S2=pd.Series ([12,14,16,18,20], index=[6,7,8,9,10])
dFrame10=pd.DataFrame(S1)
print(dFrame10)
0
1 2
2 4
3 6
4 8
5 10
S1=pd.Series ([2,4,6,8,10], index=[1,2,3,4,5])
S2=pd.Series ([12,14,16,18,20], index=[1,2,3,6,5])
dFrame11=pd.DataFrame([S1,S2])
print(dFrame11)
1 2 3 4 5 6
0 2.0 4.0 6.0 8.0 10.0 NaN
1 12.0 14.0 16.0 NaN 20.0 18.0
Dictionary of Series
import pandas as pd
ResultSheet={'Arnab':pd.Series([90,91,97],index=['Maths','Science','Hindi']),'Ram
it':pd.Series([92,81,96],index=['Maths','Science','Hindi']),'Samriddhi':pd.Series([89
,98,81],index=['Maths','Science','Hindi']),'Riya':pd.Series([79,80,90],index=['Maths
','Science','Hindi']),'Mallika':pd.Series([90,89,80],['Maths','Science','Hindi'])}
dFrame12=pd.DataFrame(ResultSheet)
print(dFrame12)
Arnab Ramit Samriddhi Riya Mallika
Maths 90 92 89 79 90
Science 91 81 98 80 89
Hindi 97 96 81 90 80
Operations on rows and columns
Adding a new column
dFrame12['Preeti']=[89,78,90] #adding new column
print(dFrame12)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 90 92 89 79 90 89
Science 91 81 98 80 89 78
Hindi 97 96 81 90 80 90
dFrame12['Ramit']=[91,93,80] #updating column value
print(dFrame12)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 90 91 89 79 90 89
Science 91 93 98 80 89 78
Hindi 97 80 81 90 80 90
dFrame12['Arnab']=90 #entire column having same value
print(dFrame12)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 90 91 89 79 90 89
Science 90 93 98 80 89 78
Hindi 90 80 81 90 80 90
Adding a new row
dFrame12.loc['English']=[85,86,83,80,90,89] #adding ner row
print(dFrame12)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 90 91 89 79 90 89
Science 90 93 98 80 89 78
Hindi 90 80 81 90 80 90
English 85 86 83 80 90 89
dFrame12.loc['English']=[90,89,70,89,78,91] #updating a row
print(dFrame12)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 90 91 89 79 90 89
Science 90 93 98 80 89 78
Hindi 90 80 81 90 80 90
English 90 89 70 89 78 91
dFrame12.loc['Maths']=0 #changing values of a row to 0
dFrame14=dFrame12
print(dFrame14)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 0 0 0 0 0 0
Science 90 93 98 80 89 78
Hindi 90 80 81 90 80 90
English 90 89 70 89 78 91
dFrame14.loc[:]=0 #changing values of data frame to 0
dFrame15=dFrame14
print(dFrame15)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 0 0 0 0 0 0
Science 0 0 0 0 0 0
Hindi 0 0 0 0 0 0
English 0 0 0 0 0 0
Deleting rows or columns from a Data frame
dFrame16=dFrame13
dFrame16=dFrame16.drop('Science',axis=0) #Deleting row of Science
print(dFrame16)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 90 91 89 79 90 89
Hindi 90 80 81 90 80 90
English 85 86 83 80 90 89
dFrame16=dFrame16.drop('Riya',axis=1) #Deleting column of Riya
print(dFrame16)
Arnab Ramit Samriddhi Mallika Preeti
Maths 90 91 89 90 89
Hindi 90 80 81 80 90
English 85 86 83 90 89
dFrame16=dFrame16.loc['English','Preeti'] #getting value 89
print(dFrame16)
89
Renaming row labels of a DataFrame
dFrame17=dFrame12 #changing row label
dFrame17=dFrame17.rename({'Maths':'Sub1','Science':'Sub2','Hindi':'Sub3','Engl
ish':'Sub4'}, axis='index')
print(dFrame17)
Arnab Ramit Samriddhi Riya Mallika Preeti
Sub1 90 91 89 79 90 89
Sub2 91 93 98 80 89 78
Sub3 97 80 81 90 80 90
Sub4 90 89 70 89 78 91
dFrame18=dFrame12 #changing column label
dFrame18=dFrame18.rename({'Arnab':'Stu1','Ramit':'Stu2','Samriddhi':'Stu3','Mal
lika':'Stu4','Preeti':'Stu5'}, axis='columns')
print(dFrame18)
Stu1 Stu2 Stu3 Riya Stu4 Stu5
Maths 90 91 89 79 90 89
Science 91 93 98 80 89 78
Hindi 97 80 81 90 80 90
English 90 89 70 89 78 91
Accessing Data Frames By Indexing
Label Based indexing
dFrame19=dFrame12 #row
dFrame19.loc['Science']
Arnab 90
Ramit 93
Samriddhi 98
Riya 80
Mallika 89
Preeti 78
Name: Science, dtype: int64
dFrame20=dFrame12 #column
dFrame20.loc[:,'Arnab']
Maths 90
Science 90
Hindi 90
English 90
Name: Arnab, dtype: int64
Or
dFrame20=dFrame12 #column
print(dFrame20['Arnab'])
Maths 90
Science 90
Hindi 90
English 90
Name: Arnab, dtype: int64
dFrame21=pd.DataFrame([10,20,30,40,50,60]) #single
dFrame21.loc[2]
0 30
Name: 2, dtype: int64
dFrame22=pd.DataFrame([20,30,40,50,60,70]) #more than one
dFrame22.loc[[2,3]]
0
2 40
3 50
Boolean Indexing
dFrame13.loc['Maths']>90 #row
Arnab False
Ramit True
Samriddhi False
Riya False
Mallika False
Preeti False
Name: Maths, dtype: bool
dFrame13.loc['English']<90
Arnab False
Ramit True
Samriddhi True
Riya True
Mallika True
Preeti False
Name: English, dtype: bool
dFrame13.loc[:,'Arnab']>70 #column
Maths True
Science True
Hindi True
English True
Name: Arnab, dtype: bool
Slicing
dFrame13.loc['Maths':'Science'] #rows
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 90 91 89 79 90 89
Science 90 93 98 80 89 78
dFrame13.loc[:,'Arnab':'Ramit'] #columns
Arnab Ramit
Maths 90 91
Science 90 93
Hindi 90 80
English 90 89
dFrame13.loc['Maths':'Science','Arnab'] #2 row and 1 column
Arnab Preeti
Maths 90 89
Science 90 78
dFrame13.loc['Maths':'Science','Arnab':'Preeti'] #2 row and 2 column (range)
Arnab Ramit Samriddhi Riya Mallika Preeti
Maths 90 91 89 79 90 89
Science 90 93 98 80 89 78