[go: up one dir, main page]

0% found this document useful (0 votes)
9 views4 pages

Lab - 8 - 21130616 - TranThanhVu - Ipynb - Colab

Download as pdf or txt
Download as pdf or txt
Download as pdf or txt
You are on page 1/ 4

06/05/2024, 23:54 Lab_8_21130616_TranThanhVu.

ipynb - Colab

The main aim of this lab is to deal with the pipeline technique and MultilayerPerceptron algorithm

keyboard_arrow_down Import libraries


from sklearn.pipeline import Pipeline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import SimpleImputer
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics
from prettytable import PrettyTable
from sklearn import svm, datasets
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import GridSearchCV
from sklearn import set_config
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.neural_network import MLPClassifier

from google.colab import drive


drive.mount('/content/gdrive')
%cd '/content/gdrive/MyDrive/ML_Data/lab6'

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
/content/gdrive/MyDrive/ML_Data/lab6

keyboard_arrow_down Task 1. With iris dataset


Apply pipeline including preprocessing steps (i.e., StandardScaler, SimpleImputer, feature selection, KBinsDiscretizer, …) and
classification algorithms (i.e., Random forest, kNN, Naïve Bayes).

map = {
'clf': RandomForestClassifier(),
'kNN': KNeighborsClassifier(),

}
data = datasets.load_iris()
X,y = data.data, data.target
# y = data.target
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.3,random_state=42)
for name,al in map.items():
pipe_lr = Pipeline([('scl', StandardScaler()),('si', SimpleImputer(strategy='mean')),(name, al)])
pipe_lr.fit(X_train, y_train)
# predict the X_test
y_pred=pipe_lr.predict(X_test)
# get accuracy of the trained model
print(pipe_lr.score(X_test, y_test))
# or using accuracy_score from metrics
print(accuracy_score(y_test, y_pred))

1.0
1.0
1.0
1.0

https://colab.research.google.com/drive/1lh4gnwJbiX5zT9aSW6j345ZU5R_-64hF?hl=vi#printMode=true 1/4
06/05/2024, 23:54 Lab_8_21130616_TranThanhVu.ipynb - Colab

keyboard_arrow_down Task 2. With fashion dataset


2.1. Apply MultilayerPerceptron classification with 1 hidden layer having 10 nodes

train = pd.read_csv('fashion_train.csv')
test = pd.read_csv('fashion_test.csv')

X_train = train.drop(columns = "y")


y_train = train[["y"]]
X_test = test.drop(columns = "y")
y_test = test[["y"]]

clf = MLPClassifier(solver='lbfgs', alpha=1e-5,


hidden_layer_sizes=(10), random_state=1,activation='tanh',max_iter = 1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y


y = column_or_1d(y, warn=True)
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:541: ConvergenceWarning: lbfgs failed to conver
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
https://scikit-learn.org/stable/modules/preprocessing.html
self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
0.522

2.2. Apply MultilayerPerceptron algorithm with the following settings (the first hidden layer has 250 neuron, the second one has 100
neurons).

# code
clf = MLPClassifier(solver='lbfgs', alpha=1e-5,
hidden_layer_sizes=(250,100), random_state=1,activation='tanh',max_iter = 1000)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test, y_pred)

/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y


y = column_or_1d(y, warn=True)
0.784

2.3. Find the best hyperparameters using GridSearchCV

#code
param_grid = {
'hidden_layer_sizes': [(150,100,50), (120,80,40), (100,50,30)],
'max_iter': [50, 100, 150],
'activation': ['tanh', 'relu'],
'solver': ['sgd', 'adam'],
# 'alpha': [0.0001, 0.05],
# 'learning_rate': ['constant','adaptive'],
}
clf = MLPClassifier()
#n_jobs=-1: means using all processors
grid = GridSearchCV(estimator=clf,param_grid= param_grid, n_jobs= 2, cv=5)
grid.fit(X_train, y_train)
grid.predict(X_test)
print(grid.best_params_)
# grid_predictions = grid.predict(testX_scaled)

/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y


y = column_or_1d(y, warn=True)
{'activation': 'relu', 'hidden_layer_sizes': (150, 100, 50), 'max_iter': 50, 'solver': 'adam'}
/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:686: ConvergenceWarning: Stochastic Optimizer:
warnings.warn(

https://colab.research.google.com/drive/1lh4gnwJbiX5zT9aSW6j345ZU5R_-64hF?hl=vi#printMode=true 2/4
06/05/2024, 23:54 Lab_8_21130616_TranThanhVu.ipynb - Colab

2.4. Compare the MultilayerPerceptron using the best hyperparameters in 2.3 and other classification algorithms (i.e., Random forest,
kNN, Naïve Bayes) in termns of accuracy, precision, recall, and F1

table2 = PrettyTable(["algo","Accuracy","Precision","Recall","F1"])
table2.add_row(getScore(RandomForestClassifier(),RandomForestClassifier(),X_train,X_test,y_train.values.ravel(),y_test.values.ravel()))
table2.add_row(getScore(KNeighborsClassifier(),KNeighborsClassifier(),X_train,X_test,y_train.values.ravel(),y_test.values.ravel()))
table2.add_row(getScore(GaussianNB(),GaussianNB(),X_train,X_test,y_train.values.ravel(),y_test.values.ravel()))
table2.add_row(getScore(grid_fashion,grid_fashion.best_estimator_,X_train,X_test,y_train.values.ravel(),y_test.values.ravel(),fit=False))
print(table2)

+--------------------------------------------------------------------------------+----------+---------------------+---------------------
| algo | Accuracy | Precision | Recall
+--------------------------------------------------------------------------------+----------+---------------------+---------------------
| RandomForestClassifier() | 0.472 | 0.5078111784127922 | 0.46880913502793514
| KNeighborsClassifier() | 0.516 | 0.541269501536488 | 0.5154440130909421
| GaussianNB() | 0.175 | 0.069421918767507 | 0.16362223756303312
| MLPClassifier(activation='tanh', hidden_layer_sizes=(100, 50), max_iter=10000) | 0.391 | 0.40307983346332776 | 0.3904590904545075
+--------------------------------------------------------------------------------+----------+---------------------+---------------------
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and be
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and be
_warn_prf(average, modifier, msg_start, len(result))
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and be
_warn_prf(average, modifier, msg_start, len(result))

keyboard_arrow_down Task 3. With breast cancer dataset


3.1. Apply GridSearchCV to MultilayperPerceptron to find the best hyperparameters (the setting of hyperparameters chosen by students)

canncer = datasets.load_breast_cancer()
X = canncer.data
y = canncer.target
X = SelectKBest(chi2,k=10).fit_transform(X,y)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

param_grid = {
'hidden_layer_sizes': [(100,50), (100,60,20), (100,)],
'activation': ['tanh', 'relu'],
}
grid_cancer = GridSearchCV(estimator=MLPClassifier(max_iter=10000),param_grid=param_grid,n_jobs=-1)
grid_cancer.fit(X_train,y_train)
grid_cancer.best_estimator_

▾ MLPClassifier
MLPClassifier(activation='tanh', hidden_layer_sizes=(100, 60, 20),
max_iter=10000)

3.2. Compare the MultilayerPerceptron using the best hyperparameters in 3.1) and other classification algorithms (i.e., Random forest,
kNN, Naïve Bayes) in termns of accuracy, precision, recall, and F1

table3 = PrettyTable(["algo","Accuracy","Precision","Recall","F1"])
table3.add_row(getScore(RandomForestClassifier(),RandomForestClassifier(),X_train,X_test,y_train,y_test))
table3.add_row(getScore(KNeighborsClassifier(),KNeighborsClassifier(),X_train,X_test,y_train,y_test))
table3.add_row(getScore(GaussianNB(),GaussianNB(),X_train,X_test,y_train,y_test))
table3.add_row(getScore(grid_cancer,grid_cancer.best_estimator_,X_train,X_test,y_train,y_test,fit=False))
print(table3)

+--------------------------------------------------------------------+--------------------+--------------------+--------------------+---
| algo | Accuracy | Precision | Recall |
+--------------------------------------------------------------------+--------------------+--------------------+--------------------+---
| RandomForestClassifier() | 0.956140350877193 | 0.9603978300180831 | 0.9407894736842105 | 0.
| KNeighborsClassifier() | 0.956140350877193 | 0.9534924534924535 | 0.9473684210526316 | 0.
| GaussianNB() | 0.9473684210526315 | 0.9634146341463414 | 0.9210526315789473 | 0.
| MLPClassifier(activation='tanh', hidden_layer_sizes=(100, 60, 20), | 0.9122807017543859 | 0.9013157894736843 | 0.9013157894736843 | 0.
| max_iter=10000) | | | |
+--------------------------------------------------------------------+--------------------+--------------------+--------------------+---

https://colab.research.google.com/drive/1lh4gnwJbiX5zT9aSW6j345ZU5R_-64hF?hl=vi#printMode=true 3/4
06/05/2024, 23:54 Lab_8_21130616_TranThanhVu.ipynb - Colab

keyboard_arrow_down Task 4. With mobile price classification dataset


4.1. Build your own Neural Network using MultilayerPerceptron

mobile = pd.read_csv("mobile.csv")
X = mobile.drop(columns="price_range")
y = mobile[["price_range"]]
X = SelectKBest(chi2,k=10).fit_transform(X,y)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2)
myMLP = MLPClassifier(max_iter=10000,hidden_layer_sizes=(200,100,20))
myMLP.fit(X_train,y_train)
table4 = PrettyTable(["algo","Accuracy","Precision","Recall","F1"])
table4.add_row(getScore(myMLP,myMLP,X_train,X_test,y_train,y_test,fit=False))
print(table4)

/usr/local/lib/python3.10/dist-packages/sklearn/neural_network/_multilayer_perceptron.py:1098: DataConversionWarning: A column-vector y


y = column_or_1d(y, warn=True)
+------------------------------------------------------------------+----------+--------------------+---------------------+--------------
| algo | Accuracy | Precision | Recall | F1
+------------------------------------------------------------------+----------+--------------------+---------------------+--------------
| MLPClassifier(hidden_layer_sizes=(200, 100, 20), max_iter=10000) | 0.4725 | 0.4774781648910644 | 0.45940853897375633 | 0.38861867595
+------------------------------------------------------------------+----------+--------------------+---------------------+--------------
/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Precision is ill-defined and be
_warn_prf(average, modifier, msg_start, len(result))

4.2. Apply GridSearchCV to MultilayperPerceptron to find the best hyperparameters (the setting of hyperparameters chosen by students)

grid_moblie = GridSearchCV(estimator=MLPClassifier(max_iter=10000),param_grid=param_grid,n_jobs=-1)
grid_moblie.fit(X_train,y_train.values.ravel())
grid_moblie.best_estimator_

▾ MLPClassifier
MLPClassifier(activation='tanh', max_iter=10000)

Finally,
Save a copy in your Github. Remember renaming the notebook.

https://colab.research.google.com/drive/1lh4gnwJbiX5zT9aSW6j345ZU5R_-64hF?hl=vi#printMode=true 4/4

You might also like