[go: up one dir, main page]

0% found this document useful (0 votes)
3 views34 pages

Unit 3

The document provides an overview of various data visualization techniques using Matplotlib in Python, including scatter plots, histograms, bar charts, pie charts, line plots, and heatmaps. It includes code examples for creating each type of plot, demonstrating features such as color customization, transparency, and annotations. Additionally, it discusses the differences between histograms and bar charts, as well as how to visualize data like crime rates and temperature variations using heatmaps.

Uploaded by

Amira Shaikh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PPTX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
3 views34 pages

Unit 3

The document provides an overview of various data visualization techniques using Matplotlib in Python, including scatter plots, histograms, bar charts, pie charts, line plots, and heatmaps. It includes code examples for creating each type of plot, demonstrating features such as color customization, transparency, and annotations. Additionally, it discusses the differences between histograms and bar charts, as well as how to visualize data like crime rates and temperature variations using heatmaps.

Uploaded by

Amira Shaikh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PPTX, PDF, TXT or read online on Scribd
You are on page 1/ 34

Graphs

Matplotlib is a low level graph plotting library in


python that serves as a visualization utility.
Scatter Plot

import matplotlib.pyplot as plt

x = [5,7,8,7,2,17,2,9,4,11,12,9,6]
y
= [99,86,87,88,111,86,103,87,94,78,77,85,86
]

plt.scatter(x, y)
plt.show()
Compare

import matplotlib.pyplot as plt


import numpy as np

#day one, the age and speed of 13 cars:


x =
np.array([5,7,8,7,2,17,2,9,4,11,12,9,6])
y =
np.array([99,86,87,88,111,86,103,87,94,78,7
7,85,86])
plt.scatter(x, y)

#day two, the age and speed of 15 cars:


x =
np.array([2,2,8,1,15,8,12,9,7,3,11,4,7,14,1
2])
y =
np.array([100,105,84,105,90,99,90,95,94,100
,79,112,91,80,85])
plt.scatter(x, y)
Colors

import matplotlib.pyplot as plt


import numpy as np

x =
np.array([5,7,8,7,2,17,2,9,4,11,12,9,6])
y =
np.array([99,86,87,88,111,86,103,87,94,78,7
7,85,86])
plt.scatter(x, y, color = 'hotpink')

x =
np.array([2,2,8,1,15,8,12,9,7,3,11,4,7,14,1
2])
y =
np.array([100,105,84,105,90,99,90,95,94,100
,79,112,91,80,85])
plt.scatter(x, y, color = '#88c999')

plt.show()
Size

import matplotlib.pyplot as plt


import numpy as np

x =
np.array([5,7,8,7,2,17,2,9,4,11,12,9,6])
y =
np.array([99,86,87,88,111,86,103,87,94,78,7
7,85,86])
sizes
= np.array([20,50,100,200,500,1000,60,90,10
,300,600,800,75])

plt.scatter(x, y, s=sizes)

plt.show()
Alpha

You can adjust the transparency of the dots with the alpha argument.

import matplotlib.pyplot as plt


import numpy as np

x =
np.array([5,7,8,7,2,17,2,9,4,11,12,9,6])
y =
np.array([99,86,87,88,111,86,103,87,94,78,7
7,85,86])
sizes
= np.array([20,50,100,200,500,1000,60,90,10
,300,600,800,75])

plt.scatter(x, y, s=sizes, alpha=0.5)

plt.show()
Histogram
A histogram is a graph showing frequency distributions.
It is a graph showing the number of observations within each given interval.
import matplotlib.pyplot as plt
import numpy as np

x = np.random.normal(170, 10, 250)

plt.hist(x)
plt.show()
import matplotlib.pyplot as plt
import numpy as np
# Generate random data
np.random.seed(42)
# For reproducibility
data = np.random.randn(1000)
# 1000 random values from a normal distribution
# Create histogram
plt.hist(data, bins=30, edgecolor='black', alpha=0.7)
# Add labels and title
plt.xlabel('Value')
plt.ylabel('Frequency')
plt.title('Histogram Example')
# Show plot
plt.show()
Bar chart

import matplotlib.pyplot as plt


import numpy as np

x = np.array(["A", "B", "C", "D"])


y = np.array([3, 8, 1, 10])

plt.bar(x,y)
plt.show()
x = ["APPLES", "BANANAS"]
y = [400, 350]
plt.bar(x, y)
Horizontal Bars

import matplotlib.pyplot as plt


import numpy as np

x = np.array(["A", "B", "C", "D"])


y = np.array([3, 8, 1, 10])

plt.barh(x, y)
plt.show()
Bar Color

import matplotlib.pyplot as plt


import numpy as np

x = np.array(["A", "B", "C", "D"])


y = np.array([3, 8, 1, 10])

plt.bar(x, y, color = "red")


plt.show()

Color Names Supported by All Browsers


All modern browsers support the following 140
color names
Bar Width

import matplotlib.pyplot as plt


import numpy as np

x = np.array(["A", "B", "C", "D"])


y = np.array([3, 8, 1, 10])

plt.bar(x, y, width = 0.1)


plt.show()

The default width value is 0.8


Bar Height

import matplotlib.pyplot as plt


import numpy as np

x = np.array(["A", "B", "C", "D"])


y = np.array([3, 8, 1, 10])

plt.barh(x, y, height = 0.1)


plt.show()

The default height value is 0.8


• Histograms visualize quantitative data or numerical
data,
whereas bar charts display categorical variables.
Creating Pie Charts

import matplotlib.pyplot as plt


import numpy as np

y = np.array([35, 25, 25, 15])

plt.pie(y)
plt.show()
Labels

import matplotlib.pyplot as plt


import numpy as np

y = np.array([35, 25, 25, 15])


mylabels =
["Apples", "Bananas", "Cherries", "Dates"]

plt.pie(y, labels = mylabels)


plt.show()
Explode

import matplotlib.pyplot as plt


import numpy as np

y = np.array([35, 25, 25, 15])


mylabels =
["Apples", "Bananas", "Cherries", "Dates"]
myexplode = [0.2, 0, 0, 0]

plt.pie(y, labels = mylabels, explode =


myexplode)
plt.show()
Shadow
import matplotlib.pyplot as plt
import numpy as np

y = np.array([35, 25, 25, 15])


mylabels =
["Apples", "Bananas", "Cherries", "Dates"]
myexplode = [0.2, 0, 0, 0]

plt.pie(y, labels = mylabels, explode =


myexplode, shadow = True)
plt.show()
Legend

import matplotlib.pyplot as plt


import numpy as np

y = np.array([35, 25, 25, 15])


mylabels =
["Apples", "Bananas", "Cherries", "Dates"]

plt.pie(y, labels = mylabels)


plt.legend()
plt.show()
Legend With Header

import matplotlib.pyplot as plt


import numpy as np

y = np.array([35, 25, 25, 15])


mylabels =
["Apples", "Bananas", "Cherries", "Dates"]

plt.pie(y, labels = mylabels)


plt.legend(title = "Four Fruits:")
plt.show()
Line

import numpy as np
import matplotlib.pyplot as plt

x =
np.array([80, 85, 90, 95, 100, 105, 110, 11
5, 120, 125])
y =
np.array([240, 250, 260, 270, 280, 290, 300
, 310, 320, 330])

plt.title("Sports Watch Data")


plt.xlabel("Average Pulse")
plt.ylabel("Calorie Burnage")

plt.plot(x, y)

plt.grid(axis = 'x')

plt.show()
Linestyle

import matplotlib.pyplot as plt


import numpy as np

ypoints = np.array([3, 8, 1, 10])

plt.plot(ypoints, linestyle = 'dotted')


plt.show()

plt.plot(ypoints, linestyle = 'dashed')

plt.plot(ypoints, ls = ':')
Plotting Without Line

import matplotlib.pyplot as plt


import numpy as np

xpoints = np.array([1, 8])


ypoints = np.array([3, 10])

plt.plot(xpoints, ypoints, 'o')


plt.show()
import matplotlib.pyplot as plt
import numpy as np

xpoints = np.array([1, 2, 6, 8])


ypoints = np.array([3, 8, 1, 10])

plt.plot(xpoints, ypoints)
plt.show()
• A heat map is a two-dimensional representation of data
in which various values are represented by colors. A
simple heat map provides an immediate visual
summary of information across two axes, allowing users
to quickly grasp the most important or relevant data
points.

• a heatmap is a graphical representation of data where


values are depicted using colors. The data is typically
arranged in a grid or matrix format, with each cell
assigned a color based on its value.
Basic Heatmap

import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt

# generating 2-D 10x10 matrix of random numbers


# from 1 to 100
data = np.random.randint(low = 1, high = 100, size = (10, 10))
print("The data to be plotted:\n")
print(data)

# plotting the heatmap


hm = sn.heatmap(data = data)

# displaying the plotted heatmap


plt.show()
Anchoring the colormap
If we set the vmin value to 30 and the vmax value to 70, then only the cells with values
between 30 and 70 will be displayed. This is called anchoring the colormap.
# importing the modules
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt

# generating 2-D 10x10 matrix of random numbers


# from 1 to 100
data = np.random.randint(low=1,
high=100,
size=(10, 10))

# setting the parameter values


vmin = 30
vmax = 70

# plotting the heatmap


hm = sn.heatmap(data=data,
vmin=vmin,
vmax=vmax)

# displaying the plotted heatmap


Choosing the colormap

we’ll be using tab20.


# importing the modules
import numpy as np
import seaborn as sn
import matplotlib.pyplot as plt

# generating 2-D 10x10 matrix of random numbers


# from 1 to 100
data = np.random.randint(low=1,
high=100,
size=(10, 10))

# setting the parameter values


cmap = "tab20"

# plotting the heatmap


hm = sn.heatmap(data=data,
cmap=cmap)

# displaying the plotted heatmap


plt.show()
Displaying the cell values

# generating 2-D 10x10 matrix of random numbers


# from 1 to 100
data = np.random.randint(low=1,
high=100,
size=(10, 10))

# setting the parameter values


annot = True

# plotting the heatmap


hm = sn.heatmap(data=data,
annot=annot)

# displaying the plotted heatmap


plt.show()
Crime rate in
city
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Generate synthetic crime rate data (10x10 city grid)


np.random.seed(42)
city_size = (10, 10) # Grid representing city blocks
crime_data = np.random.poisson(lam=5, size=city_size) # Poisson distribution for
crime occurrences

# Create the heatmap


plt.figure(figsize=(8, 6))
sns.heatmap(crime_data, cmap="Reds", annot=True, fmt="d", linewidths=0.5,
cbar=True)

# Labels and title


plt.title("Crime Rate Heatmap of a City")
plt.xlabel("City Blocks (X-axis)")
plt.ylabel("City Blocks (Y-axis)")

# Show the plot


plt.show()
Temperature variation
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Generate synthetic temperature data (10x10 grid representing a region)


np.random.seed(42)
region_size = (10, 10) # Grid representing different parts of the region
temperature_data = np.random.uniform(low=15, high=40, size=region_size) #
Temperatures in °C

# Create the heatmap


plt.figure(figsize=(8, 6))
sns.heatmap(temperature_data, cmap="coolwarm", annot=True, fmt=".1f",
linewidths=0.5, cbar=True)

# Labels and title


plt.title("Temperature Variation Heatmap Across a Region")
plt.xlabel("Region Grid (X-axis)")
plt.ylabel("Region Grid (Y-axis)")

# Show the plot


plt.show()
import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file


file_path = "Salaries.csv" # Ensure the correct path to your file
df = pd.read_csv(file_path)

# Scatter plot of Years Since PhD vs. Salary


plt.figure(figsize=(8, 6))
plt.scatter(df["yrs.since.phd"], df["salary"], alpha=0.5, color='b')
plt.xlabel("Years Since PhD")
plt.ylabel("Salary")
plt.title("Scatter Plot of Salary vs. Years Since PhD")
plt.grid(True)
plt.show()

You might also like