3/21/23, 9:17 PM Exp-3_2 - Jupyter Notebook
In [19]: 1 print("20201CST0182")
2 print("PATTABHI RAMANJANEYULU")
20201CST0182
PATTABHI RAMANJANEYULU
In [2]: 1 import pandas as pd
2 import numpy as np
3 import matplotlib.pyplot as plt
4 from sklearn.linear_model import LinearRegression
5 from sklearn.preprocessing import PolynomialFeatures
6 from sklearn.metrics import mean_squared_error, r2_score
7 from sklearn.model_selection import learning_curve
8 from sklearn.model_selection import train_test_split
In [3]: 1 url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-whit
2 data = pd.read_csv(url, sep=';')
3 data
Out[3]:
free total
fixed volatile citric residual
chlorides sulfur sulfur density pH sulphates alcohol quality
acidity acidity acid sugar
dioxide dioxide
0 7.0 0.27 0.36 20.7 0.045 45.0 170.0 1.00100 3.00 0.45 8.8 6
1 6.3 0.30 0.34 1.6 0.049 14.0 132.0 0.99400 3.30 0.49 9.5 6
2 8.1 0.28 0.40 6.9 0.050 30.0 97.0 0.99510 3.26 0.44 10.1 6
3 7.2 0.23 0.32 8.5 0.058 47.0 186.0 0.99560 3.19 0.40 9.9 6
4 7.2 0.23 0.32 8.5 0.058 47.0 186.0 0.99560 3.19 0.40 9.9 6
... ... ... ... ... ... ... ... ... ... ... ... ...
4893 6.2 0.21 0.29 1.6 0.039 24.0 92.0 0.99114 3.27 0.50 11.2 6
4894 6.6 0.32 0.36 8.0 0.047 57.0 168.0 0.99490 3.15 0.46 9.6 5
4895 6.5 0.24 0.19 1.2 0.041 30.0 111.0 0.99254 2.99 0.46 9.4 6
4896 5.5 0.29 0.30 1.1 0.022 20.0 110.0 0.98869 3.34 0.38 12.8 7
4897 6.0 0.21 0.38 0.8 0.020 22.0 98.0 0.98941 3.26 0.32 11.8 6
4898 rows × 12 columns
In [4]: 1 print(data.isnull().sum())
fixed acidity 0
volatile acidity 0
citric acid 0
residual sugar 0
chlorides 0
free sulfur dioxide 0
total sulfur dioxide 0
density 0
pH 0
sulphates 0
alcohol 0
quality 0
dtype: int64
In [5]: 1 X = data[['alcohol']].values
2 y = data['quality'].values
3 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
In [6]: 1 lin_regressor = LinearRegression()
2 lin_regressor.fit(X_train, y_train)
Out[6]: LinearRegression()
localhost:8888/notebooks/sem6/machine learning sem/labsheets/Exp-3_2.ipynb 1/4
3/21/23, 9:17 PM Exp-3_2 - Jupyter Notebook
In [7]: 1 poly = PolynomialFeatures(degree=2)
In [8]: 1 X_poly_train = poly.fit_transform(X_train)
2 X_poly_test = poly.transform(X_test)
3 poly_regressor = LinearRegression()
4 poly_regressor.fit(X_poly_train, y_train)
Out[8]: LinearRegression()
In [9]: 1 X_poly_test
Out[9]: array([[ 1. , 10.7 , 114.49],
[ 1. , 9.8 , 96.04],
[ 1. , 10.8 , 116.64],
...,
[ 1. , 9.4 , 88.36],
[ 1. , 9.5 , 90.25],
[ 1. , 8.9 , 79.21]])
In [10]: 1 y_pred_lin = lin_regressor.predict(X_test)
2 y_pred_poly = poly_regressor.predict(X_poly_test)
In [11]: 1 print('Linear Regression Metrics:')
2 mse_lin = mean_squared_error(y_test, y_pred_lin)
3 rmse_lin = np.sqrt(mse_lin)
4 r2_lin = r2_score(y_test, y_pred_lin)
5 print('Mean Squared Error: ', mse_lin)
6 print('Root Mean Squared Error: ', rmse_lin)
7 print('R-squared: ', r2_lin)
Linear Regression Metrics:
Mean Squared Error: 0.7306442340192562
Root Mean Squared Error: 0.8547773008329457
R-squared: 0.1710201454832172
In [12]: 1 print('Polynomial Regression Metrics:')
2 mse_poly = mean_squared_error(y_test, y_pred_poly)
3 rmse_poly = np.sqrt(mse_poly)
4 r2_poly = r2_score(y_test, y_pred_poly)
5 print('Mean Squared Error: ', mse_poly)
6 print('Root Mean Squared Error: ', rmse_poly)
7 print('R-squared: ', r2_poly)
Polynomial Regression Metrics:
Mean Squared Error: 0.7321933575255221
Root Mean Squared Error: 0.8556829772325274
R-squared: 0.16926252923298202
In [13]: 1 train_sizes, train_scores_lin, test_scores_lin = learning_curve(lin_regressor, X, y, cv=5)
In [14]: 1 test_scores_lin
Out[14]: array([[0.17190738, 0.18859221, 0.19393544, 0.19501437, 0.01110232],
[0.15952351, 0.19176156, 0.19151582, 0.20026073, 0.02371787],
[0.15951429, 0.19624999, 0.20252129, 0.21833535, 0.0691351 ],
[0.1595731 , 0.19619985, 0.20477136, 0.2174897 , 0.05734617],
[0.15535547, 0.19257443, 0.20308835, 0.22426451, 0.08084605]])
localhost:8888/notebooks/sem6/machine learning sem/labsheets/Exp-3_2.ipynb 2/4
3/21/23, 9:17 PM Exp-3_2 - Jupyter Notebook
In [15]: 1 train_sizes, train_scores_poly, test_scores_poly = learning_curve(poly_regressor, X_poly_train
2 train_mean_lin = np.mean(train_scores_lin, axis=1)
3 train_std_lin = np.std(train_scores_lin, axis=1)
4 test_mean_lin = np.mean(test_scores_lin, axis=1)
5 test_std_lin = np.std(test_scores_lin, axis=1)
6 train_mean_poly = np.mean(train_scores_poly, axis=1)
7 train_std_poly = np.std(train_scores_poly, axis=1)
8 test_mean_poly = np.mean(test_scores_poly, axis=1)
9 test_std_poly = np.std(test_scores_poly, axis=1)
In [21]: 1 print("20201CST0182")
2 print("PATTABHI RAMANJANEYULU")
3 plt.figure(figsize=(12,6))
4 plt.subplot(1,2,1)
5 plt.plot(train_sizes, train_mean_lin, label='Training score')
6 plt.plot(train_sizes, test_mean_lin, label='Cross-validation score')
7 plt.fill_between(train_sizes, train_mean_lin - train_std_lin, train_mean_lin + train_std_lin,
8 plt.fill_between(train_sizes, test_mean_lin - test_std_lin, test_mean_lin + test_std_lin, alp
9 plt.xlabel('Training set size')
10 plt.show()
11
20201CST0182
PATTABHI RAMANJANEYULU
localhost:8888/notebooks/sem6/machine learning sem/labsheets/Exp-3_2.ipynb 3/4
3/21/23, 9:17 PM Exp-3_2 - Jupyter Notebook
In [22]: 1 print("20201CST0182")
2 print("PATTABHI RAMANJANEYULU")
3 import matplotlib.pyplot as plt
4
5 plt.plot(train_sizes, train_mean_lin, label='Training Score (Linear Regression)')
6 plt.plot(train_sizes, test_mean_lin, label='Validation Score (Linear Regression)')
7
8 plt.plot(train_sizes, train_mean_poly, label='Training Score (Polynomial Regression)')
9 plt.plot(train_sizes, test_mean_poly, label='Validation Score (Polynomial Regression)')
10
11 plt.title("Learning Curves for Linear and Polynomial Regression")
12 plt.xlabel("Training examples")
13 plt.ylabel("Score")
14
15 plt.legend(loc="best")
16
17 plt.show()
18
20201CST0182
PATTABHI RAMANJANEYULU
In [ ]: 1
localhost:8888/notebooks/sem6/machine learning sem/labsheets/Exp-3_2.ipynb 4/4