Q1.
Write a program to compute summary statistics such as mean, median, mode of the
given different types of data.
Mean:
Code:
import numpy as np
speed=[30,35,45,43,40,45,55]
a=np.mean(speed) print
("mean = "a)
Output:
Median:
Code:
import numpy as np speed=[30,35,45,43,40,45,55]
a=np.median(speed)
print("median = ",a)
Output:
Mode:
Code:
from scipy import stats
speed=[30,35,45,43,40,45,55]
a=stats.mode(speed) print("mode
= ",a)
1
Output:
2
Q2. Write a program to compute summary statistics such as standard deviation, and
variance of the given different types of data.
Variance:
Code:
import numpy as np
num=[1,2,3,45,5,6,66] a=np.var(num)
print("Variance = ",a)
Output:
Standard Deviation:
Code:
import numpy as np
num=[1,2,3,45,5,6,66] a=np.std(num)
print("Standard Deviation = ",a)
3
Q3. Write a python program to demonstrate Regression analysis with residual plots on a
given dataset.
Code:
import pandas as pd import
numpy as np import
matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
data = pd.read_csv("headbrain.csv")
data.head()
Output:
Code:
4
from statsmodels.formula.api import ols
data = pd.read_csv('headbrain.csv')
data.rename(columns={'Brain Weight(grams)': 'Brain_Weight', 'Head Size(cm^3)': 'Head_Size'},
inplace=True)
linear_model = ols('Brain_Weight ~ Head_Size',
data=data).fit() print(linear_model.summary()) fig =
plt.figure(figsize=(14, 8))
fig = sm.graphics.plot_regress_exog(linear_model, "Head_Size", fig=fig)
plt.show()
5
Output:
Q4. Write a python program to calculate the variance.
1. Using statistics variance
Code:
import statistics
a = [-0.372, 0.073, -0.441, -0.577, 0.463, 0.569, -0.559, 0.300, -0.903, 0.442]
var = statistics.variance(a)
print("Variance = ",var)
Output:
6
2. Using
numpy.varCode:
import numpy as np
a = [-0.372, 0.073, -0.441, -0.577, 0.463, 0.569, -0.559, 0.300, -0.903, 0.442]
var = np.var(a)
print("Variance = ",var)
Q5. Write a program to create normal curve using python.
Code:
7
Output:
import numpy as np import
matplotlib.pyplot as plt
from scipy.stats import norm
# Parameters for the normal distribution mean
= 0 # mean
std_dev = 1 # standard deviation
# Generate a range of x values
x = np.linspace(mean - 4*std_dev, mean + 4*std_dev, 1000)
# Calculate the normal distribution values (y values) y
= norm.pdf(x, mean, std_dev)
# Create the plot plt.figure(figsize=(10,
5))
#plt.plot(x, y, label='Normal Distribution', color='Black')
plt.plot(x, y, label=f'Normal Distribution\nMean = {mean}, Std Dev = {std_dev}', color='Red')
plt.fill_between(x, y, alpha=0.3, color='Red') plt.title('Normal Distribution Curve')
plt.xlabel('X Value')
plt.ylabel('Probability Density Function')
plt.axvline(mean, color='Blue', linestyle='--', linewidth=2, label='Mean')
plt.legend() plt.grid()
plt.show()
8
Q6. Write a python program for correlation with scatter plot.
Code:
import matplotlib.pyplot as plt
import numpy as np
# Generate sample data np.random.seed(0)
# For reproducibility
x = np.random.rand(100) * 100 # 100 random values scaled to 0-100
y = x * 0.5 + np.random.normal(0, 10, 100) # Linear relationship with some noise
# Calculate the correlation coefficient correlation_coefficient
= np.corrcoef(x, y)[0, 1]
# Print the correlation coefficient
print(f"Correlation coefficient: {correlation_coefficient:.2f}")
9
Output:
# Plot the data points plt.scatter(x, y,
label='Data') plt.title('Scatter Plot
with Correlation')
# Fit a line to the data m,
b = np.polyfit(x, y, 1)
plt.plot(x, m*x + b, color='red', label='Line of Best Fit')
plt.xlabel('X values') plt.ylabel('Y values')
plt.grid() plt.legend()
plt.show()
10
Output:
Q7. Write a python program to read total profit of all months and show it using a line plot.
Code:
import matplotlib.pyplot as plt import
numpy as np
#Create Dataset
month_number=[1,2,3,4,5,6,7,8,9,10,11,12] facecream =
[2500,2630,2140,3400,3600,2760,2980,3700,3540,1990,2340,2900] facewash =
[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
toothpaste=[5200,5100,4550,5870,4560,4890,4780,5860,6100,8300,7300,7400]
bathingshop=[9200,6100,9550,8870,7760,7490,8980,9960,8100,10300,13300,14400]
shampoo=[1200,2100,3550,1870,1560,1890,1780,2860,2100,2300,2400,1800]
moisturizer=[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
total_units=[21100,18330,22470,22270,20960,20140,29550,36140,23400,26670,41280,30020]
11
total_profit=[211000,183300,224700,222700,209600,201400,295500,361400,234000,266700,412800,30
0200]
plt.plot(month_number , total_profit , linestyle= '--' , color='red', linewidth = 3 , marker = 'o', mfc='k'
,label ="Profit data of last year",)
plt.xlabel("Month Number") plt.ylabel("Sold
units nuber")
plt.title("Company Sales Data of Last Year")
plt.legend( loc = 4)
plt.ylim(100000)
plt.xlim() plt.show()
Output:
Q8. Write a python program to read all product sales data and show it using a multiline
plot.
Code:
import matplotlib.pyplot as plt import
numpy as np
12
#Create Dataset
month_number=[1,2,3,4,5,6,7,8,9,10,11,12] facecream =
[2500,2630,2140,3400,3600,2760,2980,3700,3540,1990,2340,2900] facewash =
[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
toothpaste=[5200,5100,4550,5870,4560,4890,4780,5860,6100,8300,7300,7400]
bathingshop=[9200,6100,9550,8870,7760,7490,8980,9960,8100,10300,13300,14400]
shampoo=[1200,2100,3550,1870,1560,1890,1780,2860,2100,2300,2400,1800]
moisturizer=[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
total_units=[21100,18330,22470,22270,20960,20140,29550,36140,23400,26670,41280,30020]
total_profit=[211000,183300,224700,222700,209600,201400,295500,361400,234000,266700,412800,30
0200]
plt.plot(month_number,facecream, linestyle='-', label="Face cream Sales Data", marker='o' )
plt.plot(month_number,facewash, linestyle='-', label="Face Wash Sales Data", marker='o' )
plt.plot(month_number,toothpaste, linestyle='-', label="ToothPaste Sales Data", marker='o' )
plt.plot(month_number,bathingshop, linestyle='-', label="Bathingshop Sales Data", marker='o' )
plt.plot(month_number,moisturizer, linestyle='-', label="Moisturizer Sales Data", marker='o' )
plt.plot(month_number,shampoo, linestyle='-', label="Shampoo Sales Data", marker='o' )
plt.xlabel("Months Number") plt.ylabel("Sales units in number") plt.legend(loc=2)
plt.ylim(1000)
plt.show()
Output:
13
Q9. Write a python program to read toothpaste sales data of each month and show it using
a scatter plot.
Code:
import matplotlib.pyplot as plt import
numpy as np
#Create Dataset
month_number= [1,2,3,4,5,6,7,8,9,10,11,12] facecream =
[2500,2630,2140,3400,3600,2760,2980,3700,3540,1990,2340,2900] facewash =
[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760] toothpaste=
[5200,5100,4550,5870,4560,4890,4780,5860,6100,8300,7300,7400] bathingshop=
[9200,6100,9550,8870,7760,7490,8980,9960,8100,10300,13300,14400] shampoo=
[1200,2100,3550,1870,1560,1890,1780,2860,2100,2300,2400,1800] moisturizer=
[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760] total_units=
[21100,18330,22470,22270,20960,20140,29550,36140,23400,26670,41280,30020]
total_profit=[211000,183300,224700,222700,209600,201400,295500,361400,234000,266700,412800,30
0200]
14
plt.scatter(month_number,toothpaste, label="Toothpaste Sales Data")
plt.legend(loc=2) plt.grid(linestyle='--', linewidth=1) plt.title("Tooth
Paste sales data") plt.xlabel("Month Number") plt.ylabel("Number of
units Sold") plt.show()
Output:
15
Q10. Write a python program to read face cream and face wash product sales data and
show it using bar chart.
Code:
import matplotlib.pyplot as plt import
numpy as np
#Create Dataset
month_number=[1,2,3,4,5,6,7,8,9,10,11,12] facecream =
[2500,2630,2140,3400,3600,2760,2980,3700,3540,1990,2340,2900] facewash =
[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
toothpaste=[5200,5100,4550,5870,4560,4890,4780,5860,6100,8300,7300,7400]
bathingshop=[9200,6100,9550,8870,7760,7490,8980,9960,8100,10300,13300,14400]
shampoo=[1200,2100,3550,1870,1560,1890,1780,2860,2100,2300,2400,1800]
moisturizer=[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
total_units=[21100,18330,22470,22270,20960,20140,29550,36140,23400,26670,41280,30020]
total_profit=[211000,183300,224700,222700,209600,201400,295500,361400,234000,266700,412800,30
0200]
plt.figure()
plt.bar(np.array(month_number), facecream , width=0.5, label="Facecream")
plt.bar(np.array(month_number) + 0.5, facewash, width = 0.5, label="Facewash") plt.legend(loc
= 1)
plt.grid(linestyle ='--' ,linewidth = .5)
plt.xticks(np.array(month_number)) plt.xlabel("Months")
plt.ylabel("Sales Units in Number") plt.title("Facewash
and facecream sales data") plt.show()
Output:
16
Q11. Write a python program to read the total profit of each month and show it using
histogram to see the most common profit ranges.
Code:
import matplotlib.pyplot as plt import
numpy as np
#Create Dataset
month_number=[1,2,3,4,5,6,7,8,9,10,11,12] facecream =
[2500,2630,2140,3400,3600,2760,2980,3700,3540,1990,2340,2900] facewash =
[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
toothpaste=[5200,5100,4550,5870,4560,4890,4780,5860,6100,8300,7300,7400]
bathingshop=[9200,6100,9550,8870,7760,7490,8980,9960,8100,10300,13300,14400]
shampoo=[1200,2100,3550,1870,1560,1890,1780,2860,2100,2300,2400,1800]
moisturizer=[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
total_units=[21100,18330,22470,22270,20960,20140,29550,36140,23400,26670,41280,30020]
total_profit=[211000,183300,224700,222700,209600,201400,295500,361400,234000,266700,412800,30
0200]
plt.hist(total_profit, label="Profit Data")
plt.xlabel("Profit range in Dollar")
17
plt.ylabel("Actual Profit in Dollar")
plt.title("Profit Data") plt.legend(loc=2)
plt.show()
Output:
Q12. Write a python program to calculate total sale data for last year for each product and
show it using a pie chart.
Code:
import matplotlib.pyplot as plt import
numpy as np
18
#Create Dataset
month_number=[1,2,3,4,5,6,7,8,9,10,11,12] facecream =
[2500,2630,2140,3400,3600,2760,2980,3700,3540,1990,2340,2900] facewash =
[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
toothpaste=[5200,5100,4550,5870,4560,4890,4780,5860,6100,8300,7300,7400]
bathingshop=[9200,6100,9550,8870,7760,7490,8980,9960,8100,10300,13300,14400]
shampoo=[1200,2100,3550,1870,1560,1890,1780,2860,2100,2300,2400,1800]
moisturizer=[1500,1200,1340,1130,1740,1555,1120,1400,1780,1890,2100,1760]
total_units=[21100,18330,22470,22270,20960,20140,29550,36140,23400,26670,41280,30020]
total_profit=[211000,183300,224700,222700,209600,201400,295500,361400,234000,266700,412800,30
0200]
total_sale = [sum(facecream), sum(facewash), sum(toothpaste), sum(bathingshop),
sum(shampoo),sum(moisturizer)]
label = ['Facecream', 'Facewash', 'Toothpaste', 'Bathingsoap','Shampoo','Moisturizer']
plt.pie(total_sale , autopct='%1.1f%%', labels=label) plt.legend(loc=4)
plt.show()
Output:
19
Q13. Write a python program to find the correlation between variables of iris data. Also
create a heatmap using seaborn to present their relations.
Code:
import pandas as pd import
numpy as np import
matplotlib.pyplot as plt import
seaborn as sns iris =
pd.read_csv("iris.csv")
#Drop id column
iris = iris.drop('Id',axis=1) X
= iris.iloc[:, 0:4]
f, ax = plt.subplots(figsize=(10, 8))
corr = X.corr() print(corr)
sns.heatmap(corr, mask=np.zeros_like(corr),
cmap=sns.diverging_palette(220, 10, as_cmap=True),square=True, ax=ax, linewidths=.5) plt.show()
20
Output:
21
Q14. Write a python program to create a box plot which shows the distribution of
quantitative data in a way that facilitates comparisons between variables or across levels of
a categorical variable of iris dataset. Use seaborn.
Code:
import pandas as pd import
seaborn as sns iris =
pd.read_csv("iris.csv")
#Drop id column
iris = dataset.drop('Unnamed: 0',axis=1) box_data = iris
#variable representing the data array box_target = iris.Species
#variable representing the labels array sns.boxplot(data =
box_data,width=0.5,fliersize=5)
sns.set(rc={'figure.figsize':(10,10)})
Output:
22
Q15. Write a python program to read the mobile company name and stock data and show it
using waffle chart.
Code:
import pandas as pd import
matplotlib.pyplot as plt
from pywaffle import Waffle
# Sample data: Mobile company names and their stock data (in market capitalization or stock price) data
={
'Company': ['Apple', 'Samsung', 'Xiaomi', 'OnePlus', 'realme', 'Vivo'],
'Stock Price': [150, 100, 70, 80, 90, 120]
}
# Create a DataFrame
df = pd.DataFrame(data)
# Normalize the stock prices to get proportions for the waffle chart total
= df['Stock Price'].sum()
df['Proportion'] = (df['Stock Price'] / total) * 100
# Create a Waffle Chart fig
=
plt.figure( FigureClass=
Waffle, rows=5,
values=df['Proportion'],
labels=[f"{company} ({stock}%)" for company, stock in zip(df['Company'],
df['Proportion'].round(1))], fig size= (10, 15),
colors= ["#FF9999", "#66B2FF", "#99FF99", "#FFCC99", "#FF66B2", "#FF5733"], legend=
{'loc': 'upper left', 'b box to _anchor': (1, 1)},
icons='mobile-alt' # Optional: adds mobile icons in place of squares
icon_ size=25, icon_ legend=True
}
Plt.show()
Output:
23
24