0% found this document useful (0 votes)

19 views5 pages

Dsbda Assignment 1

Uploaded by

ngak1214

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

19 views5 pages

Dsbda Assignment 1

Uploaded by

ngak1214

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as TXT, PDF, TXT or read online on Scribd

You are on page 1/ 5

DSBDA ASSIGNMENT 1

Importing Libraries

import pandas as pd

Loading csv file

data= pd.read_csv("C:/Users/Neha/Desktop/FOLDERS/6th
SEM/DSBDA_PRACTICALS/StudentsPerformance.csv")
data.columns
Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
'test preparation course', 'math score', 'reading score',
'writing score'],
dtype='object')

data.gender
0 female
1 female
2 female
3 male
4 male
...
995 female
996 male
997 female
998 female
999 female
Name: gender, Length: 1000, dtype: object

data['math score']
0 72
1 69
2 90
3 47
4 76
..
995 88
996 62
997 59
998 68
999 77
Name: math score, Length: 1000, dtype: int64

data.isnull() #checks if there are any null values

gender race/ethnicity parental level of education lunch test
preparation course math score reading score writing score
0 False False False False False False False False
1 False False False False False False False False
2 False False False False False False False False
3 False False False False False False False False
4 False False False False False False False False
... ... ... ... ... ... ... ... ...
995 False False False False False False False False
996 False False False False False False False False
997 False False False False False False False False
998 False False False False False False False False
999 False False False False False False False False
data1= pd.read_csv("C:/Users/Neha/Desktop/FOLDERS/6th
SEM/DSBDA_PRACTICALS/score.csv")
data1.columns
Index(['Name', 'M1 Score', 'M2 Score'], dtype='object')

WRITING TO A CSV FILE USING

1)USING csv.writer

import csv
header=['Name','M1 Score','M2 Score']
data1 = [['Alex', 62, 80], ['Brad', 45, 56], ['Joey', 85, 98]]
filename = 'score.csv'
with open(filename, 'w', newline="") as file:
csvwriter = csv.writer(file) # create a csvwriter object

csvwriter.writerow(header) # write the header

csvwriter.writerows(data1) # write the rest of the data

2) USING csv.writer

3) USING .writelines()

header=['Name','M1 Score','M2 Score']

data1=[['Alex', 62, 80], ['Brad', 45, 56], ['Joey', 85, 98]]
filename = 'score.csv'
with open(filename, 'w') as file:
for header in header:
file.write(str(header)+',')
file.write('\n')
for row in data1:
for x in row:
file.write(str(x)+',')
file.write('\n')

4)USING pandas

header = ['Name', 'M1 Score', 'M2 Score']

data1 = [['Alex', 62, 80], ['Brad', 56], ['Joey', 85, 98],[85,98]]
data1 = pd.DataFrame(data1, columns=header)
data1.to_csv('score.csv', index=False) #The index=False argument specifies that
the row indices should not be included in the CSV file.

data1.isnull()
Name M1 Score M2 Score
0 False False False
1 False False True
2 False False False
3 False False True
data1.Name.isnull()
0 False
1 False
2 False
3 False
Name: Name, dtype: bool

data1.isnull().sum()
Name 0
M1 Score 0
M2 Score 2
dtype: int64

Count NaN(Not a Number) values in Pandas DataFrame

1) Using Numpy

import pandas as pd
import numpy as np

data2={'set_of_numbers':[1,2,3,4,5,np.nan,6,7,np.nan,np.nan,8,9,10,np.nan]}
df=pd.DataFrame(data2)
print(df)
set_of_numbers
0 1.0
1 2.0
2 3.0
3 4.0
4 5.0
5 NaN
6 6.0
7 7.0
8 NaN
9 NaN
10 8.0
11 9.0
12 10.0
13 NaN

Count number of NaN values in the DataFrame

import pandas as pd
import numpy as np

data3={'first_set':[1,2,3,4,5,np.nan,6,7,np.nan,np.nan],
'second_set':['a','b',np.nan,np.nan,'c','d','e',np.nan,np.nan,'f'],
'third_set':['aa',np.nan,'bb','cc',np.nan,np.nan,'dd',np.nan,np.nan,'ee']}
df1=pd.DataFrame(data3,columns=['first_set','second_set','third_set'])
print(df1)
first_set second_set third_set
0 1.0 a aa
1 2.0 b NaN
2 3.0 NaN bb
3 4.0 NaN cc
4 5.0 c NaN
5 NaN d NaN
6 6.0 e dd
7 7.0 NaN NaN
8 NaN NaN NaN
9 NaN f ee

# Count NaNvalues under a single DataFramecolumn

count_nan=df1['first_set'].isna().sum()
print('count of NaN :'+ str(count_nan))
count of NaN :3

# Count NaN values under the entire DataFrame

count_nan_full=df1.isna().sum().sum()
print('count of entire NaN :'+ str(count_nan_full))
count of entire NaN :12

# Count NaN values across a single DataFramerow

count_nan_row=df1.loc[[7]].isna().sum().sum()
print('count of row NaN :'+ str(count_nan_row))
count of row NaN :2

Remove Duplicates from Pandas DataFrame

import pandas as pd
boxes={'Color': ['Green','Green','Green','Blue','Blue','Red','Red','Red'],
'Shape':
['Rectangle','Rectangle','Square','Rectangle','Square','Square','Square','Rectangle
']}
df2=pd.DataFrame(boxes,columns=['Color','Shape'])
print(df2)
Color Shape
0 Green Rectangle
1 Green Rectangle
2 Green Square
3 Blue Rectangle
4 Blue Square
5 Red Square
6 Red Square
7 Red Rectangle

df2_duplicates_removed=df2.drop_duplicates()
print(df2_duplicates_removed)
#only the distinct values across the two columns remain
Color Shape
0 Green Rectangle
2 Green Square
3 Blue Rectangle
4 Blue Square
5 Red Square
7 Red Rectangle

# to remove the duplicates on a specific column

df2_duplicates_removed=df2.drop_duplicates(subset=['Color'])
print(df2_duplicates_removed)
Color Shape
0 Green Rectangle
3 Blue Rectangle
5 Red Square

df2_duplicates_removed=df2.drop_duplicates(subset=['Shape'])
print(df2_duplicates_removed)
Color Shape
0 Green Rectangle
2 Green Square

# Get the Descriptive Statistics for Pandas DataFram

#step 1 -- collect data

#step 2 --create dataframe
import pandas as pd

data_a = {'product': ['A', 'B', 'C', 'c','D'], 'price': [22000, 27000, 25000,
29000, 35000], 'year': [2014, 2015, 2016, 2017, 2018] }

df_a = pd.DataFrame(data_a)
df_a
product price year
0 A 22000 2014
1 B 27000 2015
2 C 25000 2016
3 c 29000 2017
4 D 35000 2018

#step 3 -- Get the Descriptive Statistics for Pandas DataFrame

# Descriptive Statistics for column price

stats_numeric=df_a['price'].describe()
stats_numeric
count 5.000000
mean 27600.000000
std 4878.524367
min 22000.000000
25% 25000.000000
50% 27000.000000
75% 29000.000000
max 35000.000000
Name: price, dtype: float64

# Descriptive Statistics for column product

stats_categorical= df_a['product'].describe()
stats_categorical
count 5
unique 5
top A
freq 1
Name: product, dtype: object

# Descriptive Statistics for entire dataframe

stats=df_a.describe(include='all')
stats
product price year
count 5 5.000000 5.000000
unique 5 NaN NaN
top A NaN NaN
freq 1 NaN NaN
mean NaN 27600.000000 2016.000000
std NaN 4878.524367 1.581139
min NaN 22000.000000 2014.000000
25% NaN 25000.000000 2015.000000
50% NaN 27000.000000 2016.000000
75% NaN 29000.000000 2017.000000
max NaN 35000.000000 2018.000000

12 Pandas
100% (1)
12 Pandas
21 pages
Unit 3 Python B.SC IT
No ratings yet
Unit 3 Python B.SC IT
18 pages
Python DataFrame Techniques
No ratings yet
Python DataFrame Techniques
10 pages
Pandas 2 Complete Notes Class XII
No ratings yet
Pandas 2 Complete Notes Class XII
18 pages
Unit3 - 3) Pandas - Ipynb - Colab
No ratings yet
Unit3 - 3) Pandas - Ipynb - Colab
11 pages
GR12 Record Programs 6TH Onwards
No ratings yet
GR12 Record Programs 6TH Onwards
18 pages
Assignments IP Class 12
No ratings yet
Assignments IP Class 12
9 pages
Pandas Series and DataFrame Guide
No ratings yet
Pandas Series and DataFrame Guide
98 pages
DS Manual 1
No ratings yet
DS Manual 1
96 pages
Week 3 GGG
No ratings yet
Week 3 GGG
17 pages
EDA Lab Manual
No ratings yet
EDA Lab Manual
93 pages
EDA Lab Manual
100% (2)
EDA Lab Manual
93 pages
12 Pandas
No ratings yet
12 Pandas
14 pages
Numpy Boolean Indexing: Filter
No ratings yet
Numpy Boolean Indexing: Filter
39 pages
4 PythonPandas
No ratings yet
4 PythonPandas
8 pages
Xii Record (Dataframe & CSV)
No ratings yet
Xii Record (Dataframe & CSV)
11 pages
Exp 3
No ratings yet
Exp 3
10 pages
Revision Notes DataFrame XII IP
No ratings yet
Revision Notes DataFrame XII IP
8 pages
Pandas
No ratings yet
Pandas
8 pages
Pandas - DataFrames Creation
No ratings yet
Pandas - DataFrames Creation
2 pages
Practical File Programs
No ratings yet
Practical File Programs
8 pages
Pandas DataFrame and Series Operations
No ratings yet
Pandas DataFrame and Series Operations
74 pages
AD3301 - Data - Transformation - Ipynb - Colaboratory
No ratings yet
AD3301 - Data - Transformation - Ipynb - Colaboratory
27 pages
Data Frame Demo
No ratings yet
Data Frame Demo
73 pages
Sakina Assign1 Batch3
No ratings yet
Sakina Assign1 Batch3
8 pages
Dataframe
No ratings yet
Dataframe
19 pages
Pandas 1705297450
No ratings yet
Pandas 1705297450
21 pages
Series and Pandas Methods
No ratings yet
Series and Pandas Methods
5 pages
UNIT-4 Important Q-A
No ratings yet
UNIT-4 Important Q-A
28 pages
Pandas
No ratings yet
Pandas
44 pages
Data Science Practicals - Ipynb
No ratings yet
Data Science Practicals - Ipynb
54 pages
Answers Practical File
No ratings yet
Answers Practical File
19 pages
Pandas Part-2
No ratings yet
Pandas Part-2
9 pages
Data Sci
No ratings yet
Data Sci
29 pages
Only Pandas
No ratings yet
Only Pandas
8 pages
Unit 1 Python Pandas
No ratings yet
Unit 1 Python Pandas
20 pages
MCQ On Dataframe
No ratings yet
MCQ On Dataframe
11 pages
Ip Practical
No ratings yet
Ip Practical
23 pages
002 Python Pandas
No ratings yet
002 Python Pandas
19 pages
B "Hello, World!" Print (B (2:5) ) Llo
No ratings yet
B "Hello, World!" Print (B (2:5) ) Llo
52 pages
Exp3 Python
No ratings yet
Exp3 Python
15 pages
PDF&Rendition 1
No ratings yet
PDF&Rendition 1
47 pages
Pandas Commands
No ratings yet
Pandas Commands
3 pages
Chapter 2 Python Pandas - II
No ratings yet
Chapter 2 Python Pandas - II
19 pages
Practical File Questions With Answers
No ratings yet
Practical File Questions With Answers
7 pages
Python Pandas-DataFrames Complete - Jupyter Notebook
No ratings yet
Python Pandas-DataFrames Complete - Jupyter Notebook
34 pages
Data Science Practical Book - Ipynb
No ratings yet
Data Science Practical Book - Ipynb
21 pages
Pandas Cheat Sheet
No ratings yet
Pandas Cheat Sheet
2 pages
Oddstudents
No ratings yet
Oddstudents
35 pages
Ai Tools and Applications-Lab
No ratings yet
Ai Tools and Applications-Lab
33 pages
Pandas Guide for Beginners
No ratings yet
Pandas Guide for Beginners
18 pages
Pandas Py
No ratings yet
Pandas Py
20 pages
DA Lab Manual r22
No ratings yet
DA Lab Manual r22
31 pages
Panda Merged
No ratings yet
Panda Merged
19 pages
Numpy - Pandas - Lab - Jupyter Notebook
No ratings yet
Numpy - Pandas - Lab - Jupyter Notebook
29 pages
Pandas Cheat Sheet........
No ratings yet
Pandas Cheat Sheet........
11 pages
Practical File Python
No ratings yet
Practical File Python
25 pages
Create A Pandas Series From A Dictionary of Values and An Ndarray
No ratings yet
Create A Pandas Series From A Dictionary of Values and An Ndarray
15 pages
Track Stick User Guide
No ratings yet
Track Stick User Guide
53 pages
Quick Start
No ratings yet
Quick Start
4 pages
Unit 3 - ICT Skills-IV
No ratings yet
Unit 3 - ICT Skills-IV
18 pages
DriveCleanup: Remove Non-Present USB Devices
No ratings yet
DriveCleanup: Remove Non-Present USB Devices
2 pages
Table Top Exercise
No ratings yet
Table Top Exercise
32 pages
AutoCAD 3D Modeling Basics Module
No ratings yet
AutoCAD 3D Modeling Basics Module
108 pages
Chapter 3 Database
No ratings yet
Chapter 3 Database
19 pages
J-STD-0 1 6-1 99
No ratings yet
J-STD-0 1 6-1 99
6 pages
SCC 5.9 UserManual
No ratings yet
SCC 5.9 UserManual
148 pages
Wpox8 User Guide en
No ratings yet
Wpox8 User Guide en
282 pages
Thecus Y.E.S Box N2100 Itunes Configuration Guide
No ratings yet
Thecus Y.E.S Box N2100 Itunes Configuration Guide
10 pages
NetBackup1011 Dedupe Guide
No ratings yet
NetBackup1011 Dedupe Guide
653 pages
EPP 6 Q1 Quiz 4
No ratings yet
EPP 6 Q1 Quiz 4
3 pages
API Contact Model Tutorial
No ratings yet
API Contact Model Tutorial
7 pages
Chapter 1
No ratings yet
Chapter 1
12 pages
Computer Fundamentals Study Material
No ratings yet
Computer Fundamentals Study Material
36 pages
File Handling in C With Examples (Fopen, Fread, Fwrite, Fseek)
No ratings yet
File Handling in C With Examples (Fopen, Fread, Fwrite, Fseek)
9 pages
It Application Tools in Business
No ratings yet
It Application Tools in Business
22 pages
AVEVA Asset Information Management Advanced On AVEVA Connect v1.5
No ratings yet
AVEVA Asset Information Management Advanced On AVEVA Connect v1.5
13 pages
BIM Materials Guide
No ratings yet
BIM Materials Guide
7 pages
77 Useful Linux Commands and Utilities
No ratings yet
77 Useful Linux Commands and Utilities
12 pages
Multithumb Image Plugin For Joomla New Version 2 0
100% (9)
Multithumb Image Plugin For Joomla New Version 2 0
47 pages
POPcom Controller
No ratings yet
POPcom Controller
52 pages
Plan and Prepare For Task To Be Undertaken TO BE CONTINUE
100% (1)
Plan and Prepare For Task To Be Undertaken TO BE CONTINUE
80 pages
MAXDictio User Guide
No ratings yet
MAXDictio User Guide
76 pages
Introduction To The Lab: Ceng/Bilm 362 Computer Networks Lab Manual
No ratings yet
Introduction To The Lab: Ceng/Bilm 362 Computer Networks Lab Manual
26 pages
Deep Spar Disk Imager
No ratings yet
Deep Spar Disk Imager
3 pages
Mitutoyo Sj-210 User Manual: Quick Links
No ratings yet
Mitutoyo Sj-210 User Manual: Quick Links
420 pages
SAS Deployment Wizard
100% (1)
SAS Deployment Wizard
102 pages

Dsbda Assignment 1

Uploaded by

Dsbda Assignment 1

Uploaded by

DSBDA ASSIGNMENT 1

Loading csv file

data.isnull() #checks if there are any null values

WRITING TO A CSV FILE USING

csvwriter.writerow(header) # write the header

header=['Name','M1 Score','M2 Score']

header = ['Name', 'M1 Score', 'M2 Score']

Count NaN(Not a Number) values in Pandas DataFrame

Count number of NaN values in the DataFrame

# Count NaNvalues under a single DataFramecolumn

# Count NaN values under the entire DataFrame

# Count NaN values across a single DataFramerow

Remove Duplicates from Pandas DataFrame

# to remove the duplicates on a specific column

# Get the Descriptive Statistics for Pandas DataFram

#step 1 -- collect data

#step 3 -- Get the Descriptive Statistics for Pandas DataFrame

# Descriptive Statistics for column price

# Descriptive Statistics for column product

# Descriptive Statistics for entire dataframe

You might also like