8000 add a code file of the multi-layer perceptron classifier from scrach by WeiYFan · Pull Request #12754 · TheAlgorithms/Python · GitHub
[go: up one dir, main page]

Skip to content

add a code file of the multi-layer perceptron classifier from scrach #12754

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 11 commits into from
Closed
Prev Previous commit
Next Next commit
Update multilayer_perceptron_classifier_from_scratch.py
  • Loading branch information
WeiYFan authored May 14, 2025
commit ad745ee8c80b2f51966c4c20dce67fdf96949398
178 changes: 82 additions & 96 deletions machine_learning/multilayer_perceptron_classifier_from_scratch.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,19 @@
import numpy as np
from tqdm import tqdm
from numpy.random import default_rng

Check failure on line 3 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:1:1: I001 Import block is un-sorted or un-formatted

Check failure on line 3 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (I001)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:1:1: I001 Import block is un-sorted or un-formatted
from numpy.random import seed

seed(42)


class Dataloader:
"""
DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting.
DataLoader class for handling dataset operations. Supports:
- data shuffling
- one-hot encoding
- train/test splitting

Example usage:
>>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]]
>>> y = [0, 1, 0, 0]
>>> loader = Dataloader(X, y)
>>> train_X, train_y, test_X, test_y = loader.get_Train_test_data()
>>> train_X, train_y, test_X, test_y = loader.get_train_test_data()
>>> train_X.shape
(3, 2)
>>> len(train_y)
Expand All @@ -36,7 +35,8 @@

def __init__(self, features: list[list[float]], labels: list[int]) -> None:
"""
Initializes the Dataloader instance with feature matrix features and labels labels.
Initializes the Dataloader instance with a feature matrix (`features`)
and corresponding labels (`labels`).

Args:
features: Feature matrix of shape (n_samples, n_features).
Expand All @@ -48,11 +48,10 @@
self.y = np.array(labels)
self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed

def get_Train_test_data(
self,
) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
def get_train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]:

Check failure on line 51 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:51:89: E501 Line too long (115 > 88)

Check failure on line 51 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:51:89: E501 Line too long (115 > 88)
"""
Splits the data into training and testing sets. Here, we manually split the data.
Splits the data into training and testing sets.
Here, we manually split the data.

Returns:
A tuple containing:
Expand All @@ -61,21 +60,13 @@
- Test data
- Test labels
"""
train_data = np.array(
[self.X[0], self.X[1], self.X[2]]
) # First 3 samples for training
train_labels = [
np.array([self.y[0]]),
np.array([self.y[1]]),
np.array([self.y[2]]),
] # Labels as np.ndarray
train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training

Check failure on line 63 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:63:89: E501 Line too long (96 > 88)

Check failure on line 63 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:63:89: E501 Line too long (96 > 88)
train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray

Check failure on line 64 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:64:89: E501 Line too long (116 > 88)

Check failure on line 64 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:64:89: E501 Line too long (116 > 88)
test_data = np.array([self.X[3]]) # Last sample for testing
test_labels = [np.array([self.y[3]])] # Labels as np.ndarray
return train_data, train_labels, test_data, test_labels

def shuffle_data(
self, paired_data: list[tuple[np.ndarray, int]]
) -> list[tuple[np.ndarray, int]]:
def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]:

Check failure on line 69 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:69:89: E501 Line too long (102 > 88)

Check failure on line 69 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:69:89: E501 Line too long (102 > 88)
"""
Shuffles the data randomly.

Expand All @@ -89,7 +80,7 @@
return paired_data

def get_inout_dim(self) -> tuple[int, int]:
train_data, train_labels, test_data, test_labels = self.get_Train_test_data()
train_data, train_labels, test_data, test_labels = self.get_train_test_data()
in_dim = train_data[0].shape[0]
out_dim = len(train_labels)
return in_dim, out_dim
Expand All @@ -112,44 +103,43 @@
return one_hot


class MLP:
"""
A custom MLP class for implementing a simple multi-layer perceptron with
forward propagation, backpropagation.

Attributes:
learning_rate (float): Learning rate for gradient descent.
gamma (float): Parameter to control learning rate adjustment.
epoch (int): Number of epochs for training.
hidden_dim (int): Dimension of the hidden layer.
batch_size (int): Number of samples per mini-batch.
train_loss (List[float]): List to store training loss for each fold.
train_accuracy (List[float]): List to store training accuracy for each fold.
test_loss (List[float]): List to store test loss for each fold.
test_accuracy (List[float]): List to store test accuracy for each fold.
dataloader (Dataloader): DataLoader object for handling training data.
inter_variable (dict): Dictionary to store intermediate variables for backpropagation.
weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold.

Methods:
get_inout_dim:obtain input dimension and output dimension.
relu: Apply the ReLU activation function.
relu_derivative: Compute the derivative of the ReLU function.
forward: Perform a forward pass through the network.
back_prop: Perform backpropagation to compute gradients.
update_weights: Update the weights using gradients.
update_learning_rate: Adjust the learning rate based on test accuracy.
accuracy: Compute accuracy of the model.
loss: Compute weighted MSE loss.
train: Train the MLP over multiple folds with early stopping.


class MLP():

Check failure on line 106 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP039)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:106:10: UP039 Unnecessary parentheses after class definition

Check failure on line 106 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP039)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:106:10: UP039 Unnecessary parentheses after class definition
"""

def __init__(
self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2
):
self.learning_rate = learning_rate #
A custom MLP class for implementing a simple multi-layer perceptron with
forward propagation, backpropagation.

Attributes:
learning_rate (float): Learning rate for gradient descent.
gamma (float): Parameter to control learning rate adjustment.
epoch (int): Number of epochs for training.
hidden_dim (int): Dimension of the hidden layer.
batch_size (int): Number of samples per mini-batch.
train_loss (List[float]): List to store training loss for each fold.
train_accuracy (List[float]): List to store training accuracy for each fold.
test_loss (List[float]): List to store test loss for each fold.
test_accuracy (List[float]): List to store test accuracy for each fold.
dataloader (Dataloader): DataLoader object for handling training data.
inter_variable (dict):
Dictionary to store intermediate variables for backpropagation.
weights1_list (List[Tuple[np.ndarray, np.ndarray]]):
List of weights for each fold.

Methods:
get_inout_dim:obtain input dimension and output dimension.
relu: Apply the ReLU activation function.
relu_derivative: Compute the derivative of the ReLU function.
forward: Perform a forward pass through the network.
back_prop: Perform backpropagation to compute gradients.
update_weights: Update the weights using gradients.
update_learning_rate: Adjust the learning rate based on test accuracy.
accuracy: Compute accuracy of the model.
loss: Compute weighted MSE loss.
train: Train the MLP over multiple folds with early stopping.


"""
def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2):

Check failure on line 141 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:141:89: E501 Line too long (92 > 88)

Check failure on line 141 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:141:89: E501 Line too long (92 > 88)
self.learning_rate = learning_rate
self.gamma = gamma # learning_rate decay hyperparameter gamma
self.epoch = epoch
self.hidden_dim = hidden_dim
Expand Down Expand Up @@ -198,10 +188,10 @@
"""

in_dim, out_dim = self.dataloader.get_inout_dim() # in_dim here is image dim
W1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden)
w1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden)
D856

Check failure on line 191 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (NPY002)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:191:14: NPY002 Replace legacy `np.random.randn` call with `np.random.Generator`

Check failure on line 191 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (NPY002)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:191:14: NPY002 Replace legacy `np.random.randn` call with `np.random.Generator`

W2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output)
return W1, W2
w2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output)

Check failure on line 193 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (NPY002)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:193:14: NPY002 Replace legacy `np.random.randn` call with `np.random.Generator`

Check failure on line 193 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (NPY002)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:193:14: NPY002 Replace legacy `np.random.randn` call with `np.random.Generator`
return w1, w2

def relu(self, input_array: np.ndarray) -> np.ndarray:
"""
Expand Down Expand Up @@ -231,12 +221,13 @@
"""
return (input_array > 0).astype(float)


def forward(
self,
input_data: np.ndarray,
W1: np.ndarray,
W2: np.ndarray,
no_gradient: bool = False,
self,
input_data: np.ndarray,
W1: np.ndarray,

Check failure on line 228 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:228:13: N803 Argument name `W1` should be lowercase

Check failure on line 228 in machine_learning/multilayer_perceptron_classifier_from_scratch.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N803)

machine_learning/multilayer_perceptron_classifier_from_scratch.py:228:13: N803 Argument name `W1` should be lowercase
W2: np.ndarray,
no_gradient: bool = False
) -> np.ndarray:
"""
Performs a forward pass through the neural network with one hidden layer.
Expand Down Expand Up @@ -276,11 +267,11 @@
return a2

def back_prop(
self,
input_data: np.ndarray,
true_labels: np.ndarray,
W1: np.ndarray,
W2: np.ndarray,
self,
input_data: np.ndarray,
true_labels: np.ndarray,
W1: np.ndarray,
W2: np.ndarray
) -> tuple[np.ndarray, np.ndarray]:
"""
Performs backpropagation to compute gradients for the weights.
Expand Down Expand Up @@ -322,22 +313,20 @@
grad_w2 = (
np.dot(a1.T, delta_k) / batch_size
) # (hidden, batch).dot(batch, output) = (hidden, output)
input_data_flat = input_data.reshape(
input_data.shape[0], -1
) # (batch_size, input_dim)
input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim)
grad_w1 = (
np.dot(input_data_flat.T, delta_j) / batch_size
) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden)

return grad_w1, grad_w2

def update_weights(
self,
w1: np.ndarray,
w2: np.ndarray,
grad_w1: np.ndarray,
grad_w2: np.ndarray,
learning_rate: float,
self,
w1: np.ndarray,
w2: np.ndarray,
grad_w1: np.ndarray,
grad_w2: np.ndarray,
learning_rate: float
) -> tuple[np.ndarray, np.ndarray]:
"""
Updates the weight matrices using the computed gradients and learning rate.
Expand Down Expand Up @@ -372,6 +361,7 @@
w2 -= learning_rate * grad_w2
return w1, w2


def update_learning_rate(self, learning_rate: float) -> float:
"""
Updates the learning rate by applying the decay factor gamma.
Expand Down Expand Up @@ -462,18 +452,17 @@
>>> y = [0, 1, 0, 0]
>>> loader = Dataloader(X, y)
>>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2)
>>> mlp.train()
Test accuracy: 1.0
>>> mlp.train() #doctest:+ELLIPSIS
Test accuracy: ...
"""

learning_rate = self.learning_rate
train_data, train_labels, test_data, test_labels = (
self.dataloader.get_Train_test_data()
)
train_data, train_labels, test_data, test_labels = self.dataloader.get_train_test_data()

train_data = np.c_[train_data, np.ones(train_data.shape[0])]
test_data = np.c_[test_data, np.ones(test_data.shape[0])]


_, total_label_num = self.dataloader.get_inout_dim()

train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num)
Expand All @@ -488,16 +477,13 @@

for j in tqdm(range(self.epoch)):
for k in range(0, train_data.shape[0], batch_size): # retrieve every image
batch_imgs = train_data[k : k + batch_size]
batch_labels = train_labels[k : k + batch_size]

output = self.forward(
input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False
)
batch_imgs = train_data[k: k + batch_size]
batch_labels = train_labels[k: k + batch_size]

output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False)

grad_W1, grad_W2 = self.back_prop(
input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2
)
grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2)

W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate)

Expand All @@ -512,7 +498,7 @@

self.test_accuracy = test_accuracy_list
self.test_loss = test_loss_list
print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list))
print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list))


if __name__ == "__main__":
Expand Down
Loading
0