Q[1] Write a Pandas program to append a list of dictionaries or series to an existing
DataFrame and display the combined data.
import pandas as pd
import numpy as np
exam_dic1 = {'name': ['jaynandan', 'shivnandan', 'dilkhush', 'jitendra', 'Abhishek', 'raushan', 'rajesh',
'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
exam_data1 = pd.DataFrame(exam_dic1)
exam_dic2 = {'name': ['Rohan', 'amar', 'santosh', 'badal', 'ravish'],
'perc': [89.5, 92, 90.5, 91.5, 90],
'qualify': ['yes', 'yes', 'yes', 'yes', 'yes']}
exam_data2 = pd.DataFrame(exam_dic2)
print("Original DataFrames:")
print(exam_data1)
print("-------------------------------------")
print(exam_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([exam_data1, exam_data2],axis=1)
print(result_data) Output:-
Q.[2] Create a data frame using dictionary with column heading, i.e., Name, Age,
Percentage and qualify.
import pandas as pd
import numpy as np
Data = {'name': ['jaynandan', 'shivnandan', 'dilkhush', 'jitendra', 'Abhishek', 'raushan',
'rajesh', 'Kartik', 'Kavita', 'Pooja'],
'Age':[20,30,45,35,60,80,70,60,14,32],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
df = pd.DataFrame(Data)
print(df)
Output:
a. Write a Pandas program to select the rows from the DataFrame in previous program
where the percentage greater than 70.
import pandas as pd
import numpy as np
Data = {'name': ['jaynandan', 'shivnandan', 'dilkhush', 'jitendra', 'Abhishek', 'raushan', 'rajesh', 'Kartik',
'Kavita', 'Pooja'],
'Age':[20,30,45,35,60,80,70,60,14,32],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
df = pd.DataFrame(Data)
print(df[df['perc'] > 70])
Output:-
b. Write a Pandas program to select the rows the percentage is between 70
and 90
import pandas as pd
import numpy as np
Data = {'name': ['jaynandan', 'shivnandan', 'dilkhush', 'jitendra', 'Abhishek', 'raushan', 'rajesh', 'Kartik',
'Kavita', 'Pooja'],
'Age':[20,30,45,35,60,80,70,60,14,32],
'perc': [79.5, 85, 90.5, np.nan, 75, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
df = pd.DataFrame(Data)
print(df[df['perc'].between(70,90)]) 90.
OUTPUT:-
[3] Write a Pandas program to join the two given DataFrame using the
column header, i.e., Name, Age,Department, and Percentage.
=> a. Along rows and assign all data.
import pandas as pd
import numpy as np
exam_dic1 = {'name': ['jaynandan', 'shivnandan', 'jitendra', 'dilkhush', 'roshan', 'raju',
'Mohan', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'Age':[20,30,45,35,60,80,70,60,14,32],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
exam_data1 = pd.DataFrame(exam_dic1)
exam_dic2 = {'name': ['Parveen', 'yaduvanshi', 'Ashaz', 'yadavjee', 'Ahir'],
'Age':[20,30,45,35,60],
'perc': [89.5, 92, 90.5, 91.5, 90],
'qualify': ['yes', 'yes', 'yes', 'yes', 'yes']}
exam_data2 = pd.DataFrame(exam_dic2)
print("Original DataFrames:")
print(exam_data1)
print("-------------------------------------")
print(exam_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([exam_data1, exam_data2])
print(result_data)
OUTPUT:-
Q c Along columns and assign all data.
import pandas as pd
import numpy as np
exam_dic1 = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit', 'Matthew', 'Kartik',
'Kavita', 'Pooja'],
'Age':[20,30,45,35,60,80,70,60,14,32],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89]
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
exam_data1 = pd.DataFrame(exam_dic1)
exam_dic2 = {'name': ['Parveen', 'Ahil', 'Ashaz', 'Shifin', 'Hanash'],
'Age':[20,30,45,35,60],
'perc': [89.5, 92, 90.5, 91.5, 90],
'qualify': ['yes', 'yes', 'yes', 'yes', 'yes']}
exam_data2 = pd.DataFrame(exam_dic2)
print("Original DataFrames:")
print(exam_data1)
print("-------------------------------------")
print(exam_data2)
print("\nJoin the said two dataframes along columns:")
result_data = pd.concat([exam_data1, exam_data2],axis=1)
print(result_data)
OUTPUTS:-
c. Sorting the concatenated dataset by age
d. Filter out rows based on different criteria such as duplicate rows.
import pandas as pd
data={'Name':['Aman','Rohit','Deepika','Aman','Deepika','Sohit','Geeta'],
'Sales':[8500,4500,9200,8500,9200,9600,8400]}
sales=pd.DataFrame(data)
duplicated = sales[sales.duplicated(keep=False)]
print("duplicate Row:\n",duplicated)
OUTPUT :-
Q 4 Write a program to create DataFrame using ‘Student_result.csv’ file using
Pandas and perform following operations
import pandas as pd
import csv
df = pd.read_csv("student_result.csv")
print(df)
A .To display row labels, column labels data types of each column and the dimensions
import pandas as pd
import csv
#Reading the Data
df = pd.read_csv("student_result.csv")
# Display Name of Columns
print(df.columns)
# Display Column Names and their types
print(df.info())
B . To display the shape (number of rows and columns) of the CSV file.
import pandas as pd
import csv
#Reading the Data
df = pd.read_csv("student_result.csv")
# Display no of rows and column
print(df.shape)
OUTPUT :-
C . To display Admission_No, Gender and Percentage from ‘student_result.csv’ file.
import pandas as pd
import csv
#To display Adm_No, Gender and Percentage from ‘student_result.csv’ file.
df = pd.read_csv("student_result.csv",usecols = ["id",'gender', 'sciences.grade'])
print("To display Adm_No, Gender and Percentage from ‘student_result.csv’ file.")
print(df)
D. To display the first 5 and last 5 records from ‘student_result.csv’ file
import pandas as pd
import csv
#To display first 5 and last 5 records from ‘student_result.csv’ file.
df1 = pd.read_csv("student_result.csv")
print(df1.head())
print(df1.tail())
E. To modify the Percentage of student below 40 with NaN value in DataFrame.
import pandas as pd
import numpy as np
import csv
df = pd.read_csv("student_result.csv")
print(df)
# To modify the Percentage of student below 40 with NaN value.
df2 = pd.read_csv("student_result.csv")
print(df2)
print("To modify the Percentage of student below 40 with NaN value.")
df2.loc[(df2['PERCENTAGE'] <40, 'PERCENTAGE')] = np.nan
print(df2)
Q [5] Write a program to find the sum of each column, and find the column with the
lowest mean (Consider Student result file with marks in different subject)
import pandas as pd
Pass_Perc ={'Phy': {'2017':95.4,'2018':96.4,'2019':99.2,'2020':97.4},
'Che': {'2017':96.5,'2018':97.4,'2019':100,'2020':99.2},
'Maths': {'2017':90.2,'2018':92.6,'2019':97.4,'2020':98.0},
'Eng': {'2017':99.2,'2018':100,'2019':100,'2020':100},
'IP': {'2017':95.6,'2018':100,'2019':100,'2020':100}}
df=pd.DataFrame(Pass_Perc)
print(df)
print()
print('Column wise sum in datframe is :')
print(df.sum(axis=0))
# Print mean vaLue of each coLumn
print()
print('Column wise mean value are:')
print(df.mean(axis=0).round(1))
# Returns CoLumn with minimum mean vaLue
print()
print('Column with minimum mean value is:')
print(df.mean(axis=0).idxmin())
OUTPUTS :-
Q 6.Read Total marks of all students and show line plot with the following Style
properties. Generated line plot must include following Style properties: –
import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_csv("D:\\Python\\Articles\\matplotlib\\sales_data.csv")
profitList = df ['total_profit'].tolist()
monthList = df ['month_number'].tolist()
plt.plot(monthList, profitList, label = 'Month-wise Profit data of last year')
plt.xlabel('Month number')
plt.ylabel('Profit in dollar')
plt.xticks(monthList)
plt.title('Company profit per month')
plt.yticks([100000, 200000, 300000, 400000, 500000])
plt.show()