From 7232e28b1a443de81764a23c505d8b0a1778a351 Mon Sep 17 00:00:00 2001 From: WeiYFan <1521716717@qq.com> Date: Wed, 14 May 2025 16:12:04 +0800 Subject: [PATCH 01/11] add a code file of the multi-layer perceptron classifier from scrach --- ...ayer_perceptron_classifier_from_scratch.py | 501 ++++++++++++++++++ 1 file changed, 501 insertions(+) create mode 100644 machine_learning/multilayer_perceptron_classifier_from_scratch.py diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py new file mode 100644 index 000000000000..f42928d870f2 --- /dev/null +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -0,0 +1,501 @@ +import numpy as np +from tqdm import tqdm +from numpy.random import default_rng +from numpy.random import seed +seed(42) +class Dataloader: + """ + DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting. + + Example usage: + >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] + >>> y = [0, 1, 0, 0] + >>> loader = Dataloader(X, y) + >>> train_X, train_y, test_X, test_y = loader.get_Train_test_data() + >>> train_X.shape + (3, 2) + >>> len(train_y) + 3 + >>> test_X.shape + (1, 2) + >>> len(test_y) + 1 + >>> loader.one_hot_encode([0, 1, 0], 2) # Returns one-hot encoded labels + array([[0.99, 0. ], + [0. , 0.99], + [0.99, 0. ]]) + >>> loader.get_inout_dim() + (2, 3) + >>> loader.one_hot_encode([0, 2], 3) + array([[0.99, 0. , 0. ], + [0. , 0. , 0.99]]) + """ + + def __init__(self, features: list[list[float]], labels: list[int]) -> None: + """ + Initializes the Dataloader instance with feature matrix features and labels labels. + + Args: + features: Feature matrix of shape (n_samples, n_features). + labels: List of labels of shape (n_samples,). + """ + # random seed + self.rng = default_rng(42) # Create a random number generator with a seed + self.X = np.array(features) + self.y = np.array(labels) + self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed + + def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: + """ + Splits the data into training and testing sets. Here, we manually split the data. + + Returns: + A tuple containing: + - Train data + - Train labels + - Test data + - Test labels + """ + train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training + train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray + test_data = np.array([self.X[3]]) # Last sample for testing + test_labels = [np.array([self.y[3]])] # Labels as np.ndarray + return train_data, train_labels, test_data, test_labels + + def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]: + """ + Shuffles the data randomly. + + Args: + paired_data: List of tuples containing data and corresponding labels. + + Returns: + A shuffled list of data-label pairs. + """ + default_rng.shuffle(paired_data) # Using the new random number generator + return paired_data + + def get_inout_dim(self) -> tuple[int, int]: + train_data, train_labels, test_data, test_labels = self.get_Train_test_data() + in_dim = train_data[0].shape[0] + out_dim = len(train_labels) + return in_dim, out_dim + + @staticmethod + def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray: + """ + Perform one-hot encoding for the given labels. + + Args: + labels: List of integer labels. + num_classes: Total number of classes for encoding. + + Returns: + A numpy array representing one-hot encoded labels. + """ + one_hot = np.zeros((len(labels), num_classes)) + for idx, label in enumerate(labels): + one_hot[idx, label] = 0.99 + return one_hot + + +class MLP(): + """ + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. + + + """ + def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2): + self.learning_rate = learning_rate # + self.gamma = gamma # learning_rate decay hyperparameter gamma + self.epoch = epoch + self.hidden_dim = hidden_dim + + self.train_loss = [] + self.train_accuracy = [] + self.test_loss = [] + self.test_accuracy = [] + + self.dataloader = dataloader + self.inter_variable = {} + self.weights1_list = [] + + def get_inout_dim(self) -> tuple[int, int]: + """ + obtain input dimension and output dimension. + + :return: Tuple of weights (input_dim, output_dim) for the network. + + >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] + >>> y = [0, 1, 0, 0] + >>> loader = Dataloader(X, y) + >>> mlp = MLP(loader, 10, 0.1) + >>> mlp.get_inout_dim() + (2, 3) + """ + input_dim, output_dim = self.dataloader.get_inout_dim() + + return input_dim, output_dim + + def initialize(self) -> tuple[np.ndarray, np.ndarray]: + """ + Initialize weights using He initialization. + + :return: Tuple of weights (W1, W2) for the network. + + >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] + >>> y = [0, 1, 0, 0] + >>> loader = Dataloader(X, y) + >>> mlp = MLP(loader, 10, 0.1) + >>> W1, W2 = mlp.initialize() + >>> W1.shape + (3, 2) + >>> W2.shape + (2, 3) + """ + + in_dim, out_dim = self.dataloader.get_inout_dim() # in_dim here is image dim + W1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden) + + W2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output) + return W1, W2 + + def relu(self, input_array: np.ndarray) -> np.ndarray: + """ + Apply the ReLU activation function element-wise. + + :param input_array: Input array. + :return: Output array after applying ReLU. + + >>> mlp = MLP(None, 1, 0.1) + >>> mlp.relu(np.array([[-1, 2], [3, -4]])) + array([[0, 2], + [3, 0]]) + """ + return np.maximum(0, input_array) + + def relu_derivative(self, input_array: np.ndarray) -> np.ndarray: + """ + Compute the derivative of the ReLU function. + + :param input_array: Input array. + :return: Derivative of ReLU function element-wise. + + >>> mlp = MLP(None, 1, 0.01) + >>> mlp.relu_derivative(np.array([[-1, 2], [3, -4]])) + array([[0., 1.], + [1., 0.]]) + """ + return (input_array > 0).astype(float) + + + def forward( + self, + input_data: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, + no_gradient: bool = False + ) -> np.ndarray: + """ + Performs a forward pass through the neural network with one hidden layer. + + Args: + input_data: Input data, shape (batch_size, input_dim). + W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). + W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + no_gradient: If True, returns output without storing intermediates. + + Returns: + Output of the network after forward pass, shape (batch_size, output_dim). + + Examples: + >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) + >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias + >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) + >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) + >>> output = mlp.forward(x, W1, W2) + >>> output.shape + (1, 2) + """ + z1 = np.dot(input_data, W1) + + a1 = self.relu(z1) # relu + + # hidden → output + z2 = np.dot(a1, W2) + a2 = z2 + + if no_gradient: + # when predict + return a2 + else: + # when training + self.inter_variable = {"z1": z1, "a1": a1, "z2": z2, "a2": a2} + return a2 + + def back_prop( + self, + input_data: np.ndarray, + true_labels: np.ndarray, + W1: np.ndarray, + W2: np.ndarray + ) -> tuple[np.ndarray, np.ndarray]: + """ + Performs backpropagation to compute gradients for the weights. + + Args: + input_data: Input data, shape (batch_size, input_dim). + true_labels: True labels, shape (batch_size, output_dim). + W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). + W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + + Returns: + Tuple of gradients (grad_W1, grad_W2) for the weight matrices. + Examples: + >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) + >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias + >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2 + >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) + >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) + >>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable + >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2) + >>> grad_W1.shape + (3, 2) + >>> grad_W2.shape + (2, 2) + """ + a1 = self.inter_variable["a1"] # (batch_size, hidden_dim) + z1 = self.inter_variable["z1"] + a2 = self.inter_variable["a2"] # (batch_size, output_dim) + z2 = self.inter_variable["z2"] + + batch_size = input_data.shape[0] + + # 1. output layer error + delta_k = a2 - true_labels + delta_j = np.dot(delta_k, W2.T) * self.relu_derivative( + z1 + ) # (batch, hidden_dim) 使用relu时 + + grad_w2 = ( + np.dot(a1.T, delta_k) / batch_size + ) # (hidden, batch).dot(batch, output) = (hidden, output) + input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim) + grad_w1 = ( + np.dot(input_data_flat.T, delta_j) / batch_size + ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) + + return grad_w1, grad_w2 + + def update_weights( + self, + w1: np.ndarray, + w2: np.ndarray, + grad_w1: np.ndarray, + grad_w2: np.ndarray, + learning_rate: float + ) -> tuple[np.ndarray, np.ndarray]: + """ + Updates the weight matrices using the computed gradients and learning rate. + + Args: + W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). + W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + grad_W1: Gradient for W1, shape (input_dim + 1, hidden_dim). + grad_W2: Gradient for W2, shape (hidden_dim, output_dim). + learning_rate: Learning rate for weight updates. + + Returns: + Updated weight matrices (W1, W2). + + Examples: + >>> mlp = MLP(None, 1, 0.1) + >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) + >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) + >>> grad_W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) + >>> grad_W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) + >>> learning_rate = 0.1 + >>> new_W1, new_W2 = mlp.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) + >>> new_W1==np.array([[0.09, 0.18], [0.27, 0.36], [0.45, 0.54]]) + array([[ True, True], + [ True, True], + [ True, True]]) + >>> new_W2==np.array([[0.63, 0.72], [0.81, 0.90]]) + array([[ True, True], + [ True, True]]) + """ + w1 -= learning_rate * grad_w1 + w2 -= learning_rate * grad_w2 + return w1, w2 + + + def update_learning_rate(self, learning_rate: float) -> float: + """ + Updates the learning rate by applying the decay factor gamma. + + Args: + learning_rate: Current learning rate. + + Returns: + Updated learning rate. + + Examples: + >>> mlp = MLP(None, 1, 0.1, gamma=0.9) + >>> round(mlp.update_learning_rate(0.1), 2) + 0.09 + """ + + return learning_rate * self.gamma + + @staticmethod + def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: + """ + Computes the accuracy of predictions by comparing predicted and true labels. + + Args: + label: True labels, shape (batch_size, num_classes). + y_hat: Predicted outputs, shape (batch_size, num_classes). + + Returns: + Accuracy as a float between 0 and 1. + + Examples: + >>> mlp = MLP(None, 1, 0.01) + >>> label = np.array([[1, 0], [0, 1], [1, 0]]) + >>> y_hat = np.array([[0.9, 0.1], [0.2, 0.8], [0.6, 0.4]]) + >>> mlp.accuracy(label, y_hat) + 1.0 + """ + return (y_hat.argmax(axis=1) == label.argmax(axis=1)).mean() + + @staticmethod + def loss(output: np.ndarray, label: np.ndarray) -> float: + """ + Computes the mean squared error loss between predictions and true labels. + + Args: + output: Predicted outputs, shape (batch_size, num_classes). + label: True labels, shape (batch_size, num_classes). + + Returns: + Mean squared error loss as a float. + + Examples: + >>> mlp = MLP(None, 1, 0.1) + >>> output = np.array([[0.9, 0.1], [0.2, 0.8]]) + >>> label = np.array([[1.0, 0.0], [0.0, 1.0]]) + >>> round(mlp.loss(output, label), 3) + 0.025 + """ + return np.sum((output - label) ** 2) / (2 * label.shape[0]) + + def get_acc_loss(self) -> tuple[list[float], list[float]]: + """ + Returns the recorded test accuracy and test loss. + + Returns: + Tuple of (test_accuracy, test_loss) lists. + + Examples: + >>> mlp = MLP(None, 1, 0.1) + >>> mlp.test_accuracy = [0.8, 0.9] + >>> mlp.test_loss = [0.1, 0.05] + >>> acc, loss = mlp.get_acc_loss() + >>> acc + [0.8, 0.9] + >>> loss + [0.1, 0.05] + """ + return self.test_accuracy, self.test_loss + + def train(self) -> None: + """ + Trains the MLP model using the provided dataloader for multiple folds and epochs. + + Saves the best model parameters for each fold and records accuracy/loss. + + Examples: + >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] + >>> y = [0, 1, 0, 0] + >>> loader = Dataloader(X, y) + >>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2) + >>> mlp.train() + Test accuracy: 1.0 + """ + + learning_rate = self.learning_rate + train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data() + + train_data = np.c_[train_data, np.ones(train_data.shape[0])] + test_data = np.c_[test_data, np.ones(test_data.shape[0])] + + + _, total_label_num = self.dataloader.get_inout_dim() + + train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) + test_labels = self.dataloader.one_hot_encode(test_labels, total_label_num) + + W1, W2 = self.initialize() + + train_accuracy_list, train_loss_list = [], [] + test_accuracy_list, test_loss_list = [], [] + + batch_size = 1 + + for j in tqdm(range(self.epoch)): + for k in range(0, train_data.shape[0], batch_size): # retrieve every image + + batch_imgs = train_data[k: k + batch_size] + batch_labels = train_labels[k: k + batch_size] + + output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False) + + grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2) + + W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) + + test_output = self.forward(test_data, W1, W2, no_gradient=True) + test_accuracy = self.accuracy(test_labels, test_output) + test_loss = self.loss(test_output, test_labels) + + test_accuracy_list.append(test_accuracy) + test_loss_list.append(test_loss) + + learning_rate = self.update_learning_rate(learning_rate) + + self.test_accuracy = test_accuracy_list + self.test_loss = test_loss_list + print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) + + +if __name__ == "__main__": + import doctest + + doctest.testmod() \ No newline at end of file From 9371e4633b7966588937ecd7d93e71f3a7ce0c21 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 08:25:21 +0000 Subject: [PATCH 02/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...ayer_perceptron_classifier_from_scratch.py | 150 ++++++++++-------- 1 file changed, 85 insertions(+), 65 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index f42928d870f2..5f66d6bd00ad 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -2,7 +2,10 @@ from tqdm import tqdm from numpy.random import default_rng from numpy.random import seed + seed(42) + + class Dataloader: """ DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting. @@ -45,7 +48,9 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None: self.y = np.array(labels) self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed - def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: + def get_Train_test_data( + self, + ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """ Splits the data into training and testing sets. Here, we manually split the data. @@ -56,13 +61,21 @@ def get_Train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[ - Test data - Test labels """ - train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training - train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray + train_data = np.array( + [self.X[0], self.X[1], self.X[2]] + ) # First 3 samples for training + train_labels = [ + np.array([self.y[0]]), + np.array([self.y[1]]), + np.array([self.y[2]]), + ] # Labels as np.ndarray test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels - def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]: + def shuffle_data( + self, paired_data: list[tuple[np.ndarray, int]] + ) -> list[tuple[np.ndarray, int]]: """ Shuffles the data randomly. @@ -99,40 +112,43 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray: return one_hot -class MLP(): +class MLP: + """ + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. + + """ - A custom MLP class for implementing a simple multi-layer perceptron with - forward propagation, backpropagation. - - Attributes: - learning_rate (float): Learning rate for gradient descent. - gamma (float): Parameter to control learning rate adjustment. - epoch (int): Number of epochs for training. - hidden_dim (int): Dimension of the hidden layer. - batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): List to store training loss for each fold. - train_accuracy (List[float]): List to store training accuracy for each fold. - test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): List to store test accuracy for each fold. - dataloader (Dataloader): DataLoader object for handling training data. - inter_variable (dict): Dictionary to store intermediate variables for backpropagation. - weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. - - Methods: - get_inout_dim:obtain input dimension and output dimension. - relu: Apply the ReLU activation function. - relu_derivative: Compute the derivative of the ReLU function. - forward: Perform a forward pass through the network. - back_prop: Perform backpropagation to compute gradients. - update_weights: Update the weights using gradients. - update_learning_rate: Adjust the learning rate based on test accuracy. - accuracy: Compute accuracy of the model. - loss: Compute weighted MSE loss. - train: Train the MLP over multiple folds with early stopping. - - - """ - def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2): + + def __init__( + self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2 + ): self.learning_rate = learning_rate # self.gamma = gamma # learning_rate decay hyperparameter gamma self.epoch = epoch @@ -215,13 +231,12 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray: """ return (input_array > 0).astype(float) - def forward( - self, - input_data: np.ndarray, - W1: np.ndarray, - W2: np.ndarray, - no_gradient: bool = False + self, + input_data: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, + no_gradient: bool = False, ) -> np.ndarray: """ Performs a forward pass through the neural network with one hidden layer. @@ -261,11 +276,11 @@ def forward( return a2 def back_prop( - self, - input_data: np.ndarray, - true_labels: np.ndarray, - W1: np.ndarray, - W2: np.ndarray + self, + input_data: np.ndarray, + true_labels: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, ) -> tuple[np.ndarray, np.ndarray]: """ Performs backpropagation to compute gradients for the weights. @@ -307,7 +322,9 @@ def back_prop( grad_w2 = ( np.dot(a1.T, delta_k) / batch_size ) # (hidden, batch).dot(batch, output) = (hidden, output) - input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim) + input_data_flat = input_data.reshape( + input_data.shape[0], -1 + ) # (batch_size, input_dim) grad_w1 = ( np.dot(input_data_flat.T, delta_j) / batch_size ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) @@ -315,12 +332,12 @@ def back_prop( return grad_w1, grad_w2 def update_weights( - self, - w1: np.ndarray, - w2: np.ndarray, - grad_w1: np.ndarray, - grad_w2: np.ndarray, - learning_rate: float + self, + w1: np.ndarray, + w2: np.ndarray, + grad_w1: np.ndarray, + grad_w2: np.ndarray, + learning_rate: float, ) -> tuple[np.ndarray, np.ndarray]: """ Updates the weight matrices using the computed gradients and learning rate. @@ -355,7 +372,6 @@ def update_weights( w2 -= learning_rate * grad_w2 return w1, w2 - def update_learning_rate(self, learning_rate: float) -> float: """ Updates the learning rate by applying the decay factor gamma. @@ -451,12 +467,13 @@ def train(self) -> None: """ learning_rate = self.learning_rate - train_data, train_labels, test_data, test_labels = self.dataloader.get_Train_test_data() + train_data, train_labels, test_data, test_labels = ( + self.dataloader.get_Train_test_data() + ) train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] - _, total_label_num = self.dataloader.get_inout_dim() train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) @@ -471,13 +488,16 @@ def train(self) -> None: for j in tqdm(range(self.epoch)): for k in range(0, train_data.shape[0], batch_size): # retrieve every image + batch_imgs = train_data[k : k + batch_size] + batch_labels = train_labels[k : k + batch_size] - batch_imgs = train_data[k: k + batch_size] - batch_labels = train_labels[k: k + batch_size] - - output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False) + output = self.forward( + input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False + ) - grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2) + grad_W1, grad_W2 = self.back_prop( + input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2 + ) W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) @@ -492,10 +512,10 @@ def train(self) -> None: self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) + print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) if __name__ == "__main__": import doctest - doctest.testmod() \ No newline at end of file + doctest.testmod() From ad745ee8c80b2f51966c4c20dce67fdf96949398 Mon Sep 17 00:00:00 2001 From: WeiYFan <150578207+WeiYFan@users.noreply.github.com> Date: Wed, 14 May 2025 16:45:19 +0800 Subject: [PATCH 03/11] Update multilayer_perceptron_classifier_from_scratch.py --- ...ayer_perceptron_classifier_from_scratch.py | 178 ++++++++---------- 1 file changed, 82 insertions(+), 96 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index 5f66d6bd00ad..f5fe9b4a01ac 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,20 +1,19 @@ import numpy as np from tqdm import tqdm from numpy.random import default_rng -from numpy.random import seed - -seed(42) - class Dataloader: """ - DataLoader class for handling dataset, including data shuffling, one-hot encoding, and train-test splitting. + DataLoader class for handling dataset operations. Supports: + - data shuffling + - one-hot encoding + - train/test splitting Example usage: >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] >>> y = [0, 1, 0, 0] >>> loader = Dataloader(X, y) - >>> train_X, train_y, test_X, test_y = loader.get_Train_test_data() + >>> train_X, train_y, test_X, test_y = loader.get_train_test_data() >>> train_X.shape (3, 2) >>> len(train_y) @@ -36,7 +35,8 @@ class Dataloader: def __init__(self, features: list[list[float]], labels: list[int]) -> None: """ - Initializes the Dataloader instance with feature matrix features and labels labels. + Initializes the Dataloader instance with a feature matrix (`features`) + and corresponding labels (`labels`). Args: features: Feature matrix of shape (n_samples, n_features). @@ -48,11 +48,10 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None: self.y = np.array(labels) self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed - def get_Train_test_data( - self, - ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: + def get_train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """ - Splits the data into training and testing sets. Here, we manually split the data. + Splits the data into training and testing sets. + Here, we manually split the data. Returns: A tuple containing: @@ -61,21 +60,13 @@ def get_Train_test_data( - Test data - Test labels """ - train_data = np.array( - [self.X[0], self.X[1], self.X[2]] - ) # First 3 samples for training - train_labels = [ - np.array([self.y[0]]), - np.array([self.y[1]]), - np.array([self.y[2]]), - ] # Labels as np.ndarray + train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training + train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels - def shuffle_data( - self, paired_data: list[tuple[np.ndarray, int]] - ) -> list[tuple[np.ndarray, int]]: + def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]: """ Shuffles the data randomly. @@ -89,7 +80,7 @@ def shuffle_data( return paired_data def get_inout_dim(self) -> tuple[int, int]: - train_data, train_labels, test_data, test_labels = self.get_Train_test_data() + train_data, train_labels, test_data, test_labels = self.get_train_test_data() in_dim = train_data[0].shape[0] out_dim = len(train_labels) return in_dim, out_dim @@ -112,44 +103,43 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray: return one_hot -class MLP: - """ - A custom MLP class for implementing a simple multi-layer perceptron with - forward propagation, backpropagation. - - Attributes: - learning_rate (float): Learning rate for gradient descent. - gamma (float): Parameter to control learning rate adjustment. - epoch (int): Number of epochs for training. - hidden_dim (int): Dimension of the hidden layer. - batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): List to store training loss for each fold. - train_accuracy (List[float]): List to store training accuracy for each fold. - test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): List to store test accuracy for each fold. - dataloader (Dataloader): DataLoader object for handling training data. - inter_variable (dict): Dictionary to store intermediate variables for backpropagation. - weights1_list (List[Tuple[np.ndarray, np.ndarray]]): List of weights for each fold. - - Methods: - get_inout_dim:obtain input dimension and output dimension. - relu: Apply the ReLU activation function. - relu_derivative: Compute the derivative of the ReLU function. - forward: Perform a forward pass through the network. - back_prop: Perform backpropagation to compute gradients. - update_weights: Update the weights using gradients. - update_learning_rate: Adjust the learning rate based on test accuracy. - accuracy: Compute accuracy of the model. - loss: Compute weighted MSE loss. - train: Train the MLP over multiple folds with early stopping. - - +class MLP(): """ - - def __init__( - self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2 - ): - self.learning_rate = learning_rate # + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): + Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): + List of weights for each fold. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. + + + """ + def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2): + self.learning_rate = learning_rate self.gamma = gamma # learning_rate decay hyperparameter gamma self.epoch = epoch self.hidden_dim = hidden_dim @@ -198,10 +188,10 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]: """ in_dim, out_dim = self.dataloader.get_inout_dim() # in_dim here is image dim - W1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden) + w1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden) - W2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output) - return W1, W2 + w2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output) + return w1, w2 def relu(self, input_array: np.ndarray) -> np.ndarray: """ @@ -231,12 +221,13 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray: """ return (input_array > 0).astype(float) + def forward( - self, - input_data: np.ndarray, - W1: np.ndarray, - W2: np.ndarray, - no_gradient: bool = False, + self, + input_data: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, + no_gradient: bool = False ) -> np.ndarray: """ Performs a forward pass through the neural network with one hidden layer. @@ -276,11 +267,11 @@ def forward( return a2 def back_prop( - self, - input_data: np.ndarray, - true_labels: np.ndarray, - W1: np.ndarray, - W2: np.ndarray, + self, + input_data: np.ndarray, + true_labels: np.ndarray, + W1: np.ndarray, + W2: np.ndarray ) -> tuple[np.ndarray, np.ndarray]: """ Performs backpropagation to compute gradients for the weights. @@ -322,9 +313,7 @@ def back_prop( grad_w2 = ( np.dot(a1.T, delta_k) / batch_size ) # (hidden, batch).dot(batch, output) = (hidden, output) - input_data_flat = input_data.reshape( - input_data.shape[0], -1 - ) # (batch_size, input_dim) + input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim) grad_w1 = ( np.dot(input_data_flat.T, delta_j) / batch_size ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) @@ -332,12 +321,12 @@ def back_prop( return grad_w1, grad_w2 def update_weights( - self, - w1: np.ndarray, - w2: np.ndarray, - grad_w1: np.ndarray, - grad_w2: np.ndarray, - learning_rate: float, + self, + w1: np.ndarray, + w2: np.ndarray, + grad_w1: np.ndarray, + grad_w2: np.ndarray, + learning_rate: float ) -> tuple[np.ndarray, np.ndarray]: """ Updates the weight matrices using the computed gradients and learning rate. @@ -372,6 +361,7 @@ def update_weights( w2 -= learning_rate * grad_w2 return w1, w2 + def update_learning_rate(self, learning_rate: float) -> float: """ Updates the learning rate by applying the decay factor gamma. @@ -462,18 +452,17 @@ def train(self) -> None: >>> y = [0, 1, 0, 0] >>> loader = Dataloader(X, y) >>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2) - >>> mlp.train() - Test accuracy: 1.0 + >>> mlp.train() #doctest:+ELLIPSIS + Test accuracy: ... """ learning_rate = self.learning_rate - train_data, train_labels, test_data, test_labels = ( - self.dataloader.get_Train_test_data() - ) + train_data, train_labels, test_data, test_labels = self.dataloader.get_train_test_data() train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] + _, total_label_num = self.dataloader.get_inout_dim() train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) @@ -488,16 +477,13 @@ def train(self) -> None: for j in tqdm(range(self.epoch)): for k in range(0, train_data.shape[0], batch_size): # retrieve every image - batch_imgs = train_data[k : k + batch_size] - batch_labels = train_labels[k : k + batch_size] - output = self.forward( - input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False - ) + batch_imgs = train_data[k: k + batch_size] + batch_labels = train_labels[k: k + batch_size] + + output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False) - grad_W1, grad_W2 = self.back_prop( - input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2 - ) + grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2) W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) @@ -512,7 +498,7 @@ def train(self) -> None: self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) + print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) if __name__ == "__main__": From 1eca8f143018b8570efe30b82382425abea91f6d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 08:50:25 +0000 Subject: [PATCH 04/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...ayer_perceptron_classifier_from_scratch.py | 150 ++++++++++-------- 1 file changed, 84 insertions(+), 66 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index f5fe9b4a01ac..fc99f14ce954 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -2,6 +2,7 @@ from tqdm import tqdm from numpy.random import default_rng + class Dataloader: """ DataLoader class for handling dataset operations. Supports: @@ -48,7 +49,9 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None: self.y = np.array(labels) self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed - def get_train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: + def get_train_test_data( + self, + ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """ Splits the data into training and testing sets. Here, we manually split the data. @@ -60,13 +63,21 @@ def get_train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[ - Test data - Test labels """ - train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training - train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] # Labels as np.ndarray + train_data = np.array( + [self.X[0], self.X[1], self.X[2]] + ) # First 3 samples for training + train_labels = [ + np.array([self.y[0]]), + np.array([self.y[1]]), + np.array([self.y[2]]), + ] # Labels as np.ndarray test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels - def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]: + def shuffle_data( + self, paired_data: list[tuple[np.ndarray, int]] + ) -> list[tuple[np.ndarray, int]]: """ Shuffles the data randomly. @@ -103,42 +114,45 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray: return one_hot -class MLP(): +class MLP: """ - A custom MLP class for implementing a simple multi-layer perceptron with - forward propagation, backpropagation. - - Attributes: - learning_rate (float): Learning rate for gradient descent. - gamma (float): Parameter to control learning rate adjustment. - epoch (int): Number of epochs for training. - hidden_dim (int): Dimension of the hidden layer. - batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): List to store training loss for each fold. - train_accuracy (List[float]): List to store training accuracy for each fold. - test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): List to store test accuracy for each fold. - dataloader (Dataloader): DataLoader object for handling training data. - inter_variable (dict): - Dictionary to store intermediate variables for backpropagation. - weights1_list (List[Tuple[np.ndarray, np.ndarray]]): - List of weights for each fold. - - Methods: - get_inout_dim:obtain input dimension and output dimension. - relu: Apply the ReLU activation function. - relu_derivative: Compute the derivative of the ReLU function. - forward: Perform a forward pass through the network. - back_prop: Perform backpropagation to compute gradients. - update_weights: Update the weights using gradients. - update_learning_rate: Adjust the learning rate based on test accuracy. - accuracy: Compute accuracy of the model. - loss: Compute weighted MSE loss. - train: Train the MLP over multiple folds with early stopping. - - - """ - def __init__(self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2): + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): + Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): + List of weights for each fold. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. + + + """ + + def __init__( + self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2 + ): self.learning_rate = learning_rate self.gamma = gamma # learning_rate decay hyperparameter gamma self.epoch = epoch @@ -221,13 +235,12 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray: """ return (input_array > 0).astype(float) - def forward( - self, - input_data: np.ndarray, - W1: np.ndarray, - W2: np.ndarray, - no_gradient: bool = False + self, + input_data: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, + no_gradient: bool = False, ) -> np.ndarray: """ Performs a forward pass through the neural network with one hidden layer. @@ -267,11 +280,11 @@ def forward( return a2 def back_prop( - self, - input_data: np.ndarray, - true_labels: np.ndarray, - W1: np.ndarray, - W2: np.ndarray + self, + input_data: np.ndarray, + true_labels: np.ndarray, + W1: np.ndarray, + W2: np.ndarray, ) -> tuple[np.ndarray, np.ndarray]: """ Performs backpropagation to compute gradients for the weights. @@ -313,7 +326,9 @@ def back_prop( grad_w2 = ( np.dot(a1.T, delta_k) / batch_size ) # (hidden, batch).dot(batch, output) = (hidden, output) - input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim) + input_data_flat = input_data.reshape( + input_data.shape[0], -1 + ) # (batch_size, input_dim) grad_w1 = ( np.dot(input_data_flat.T, delta_j) / batch_size ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) @@ -321,12 +336,12 @@ def back_prop( return grad_w1, grad_w2 def update_weights( - self, - w1: np.ndarray, - w2: np.ndarray, - grad_w1: np.ndarray, - grad_w2: np.ndarray, - learning_rate: float + self, + w1: np.ndarray, + w2: np.ndarray, + grad_w1: np.ndarray, + grad_w2: np.ndarray, + learning_rate: float, ) -> tuple[np.ndarray, np.ndarray]: """ Updates the weight matrices using the computed gradients and learning rate. @@ -361,7 +376,6 @@ def update_weights( w2 -= learning_rate * grad_w2 return w1, w2 - def update_learning_rate(self, learning_rate: float) -> float: """ Updates the learning rate by applying the decay factor gamma. @@ -457,12 +471,13 @@ def train(self) -> None: """ learning_rate = self.learning_rate - train_data, train_labels, test_data, test_labels = self.dataloader.get_train_test_data() + train_data, train_labels, test_data, test_labels = ( + self.dataloader.get_train_test_data() + ) train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] - _, total_label_num = self.dataloader.get_inout_dim() train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) @@ -477,13 +492,16 @@ def train(self) -> None: for j in tqdm(range(self.epoch)): for k in range(0, train_data.shape[0], batch_size): # retrieve every image + batch_imgs = train_data[k : k + batch_size] + batch_labels = train_labels[k : k + batch_size] - batch_imgs = train_data[k: k + batch_size] - batch_labels = train_labels[k: k + batch_size] - - output = self.forward(input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False) + output = self.forward( + input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False + ) - grad_W1, grad_W2 = self.back_prop(input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2) + grad_W1, grad_W2 = self.back_prop( + input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2 + ) W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) @@ -498,7 +516,7 @@ def train(self) -> None: self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) + print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) if __name__ == "__main__": From ce9da8fe869da0e10247c01f134197bf33edf073 Mon Sep 17 00:00:00 2001 From: WeiYFan <150578207+WeiYFan@users.noreply.github.com> Date: Wed, 14 May 2025 17:23:34 +0800 Subject: [PATCH 05/11] Update multilayer_perceptron_classifier_from_scratch.py --- ...ayer_perceptron_classifier_from_scratch.py | 276 +++++++++--------- 1 file changed, 142 insertions(+), 134 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index fc99f14ce954..3281657ab207 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,8 +1,7 @@ import numpy as np -from tqdm import tqdm from numpy.random import default_rng - - +from tqdm import tqdm +rng = default_rng(42) class Dataloader: """ DataLoader class for handling dataset operations. Supports: @@ -36,22 +35,23 @@ class Dataloader: def __init__(self, features: list[list[float]], labels: list[int]) -> None: """ - Initializes the Dataloader instance with a feature matrix (`features`) + Initializes the Dataloader instance + with a feature matrix (`features`) and corresponding labels (`labels`). Args: - features: Feature matrix of shape (n_samples, n_features). - labels: List of labels of shape (n_samples,). + features: Feature matrix of shape + (n_samples, n_features). + labels: List of labels of shape + (n_samples,). """ # random seed - self.rng = default_rng(42) # Create a random number generator with a seed + self.rng = default_rng(42) self.X = np.array(features) self.y = np.array(labels) - self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed + self.class_weights = {0: 1.0, 1: 1.0} - def get_train_test_data( - self, - ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: + def get_train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """ Splits the data into training and testing sets. Here, we manually split the data. @@ -63,26 +63,19 @@ def get_train_test_data( - Test data - Test labels """ - train_data = np.array( - [self.X[0], self.X[1], self.X[2]] - ) # First 3 samples for training - train_labels = [ - np.array([self.y[0]]), - np.array([self.y[1]]), - np.array([self.y[2]]), - ] # Labels as np.ndarray + train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training + train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels - def shuffle_data( - self, paired_data: list[tuple[np.ndarray, int]] - ) -> list[tuple[np.ndarray, int]]: + def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]: """ Shuffles the data randomly. Args: - paired_data: List of tuples containing data and corresponding labels. + paired_data: List of tuples containing data + and corresponding labels. Returns: A shuffled list of data-label pairs. @@ -114,45 +107,54 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray: return one_hot -class MLP: +class MLP(): """ - A custom MLP class for implementing a simple multi-layer perceptron with - forward propagation, backpropagation. - - Attributes: - learning_rate (float): Learning rate for gradient descent. - gamma (float): Parameter to control learning rate adjustment. - epoch (int): Number of epochs for training. - hidden_dim (int): Dimension of the hidden layer. - batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): List to store training loss for each fold. - train_accuracy (List[float]): List to store training accuracy for each fold. - test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): List to store test accuracy for each fold. - dataloader (Dataloader): DataLoader object for handling training data. - inter_variable (dict): - Dictionary to store intermediate variables for backpropagation. - weights1_list (List[Tuple[np.ndarray, np.ndarray]]): - List of weights for each fold. - - Methods: - get_inout_dim:obtain input dimension and output dimension. - relu: Apply the ReLU activation function. - relu_derivative: Compute the derivative of the ReLU function. - forward: Perform a forward pass through the network. - back_prop: Perform backpropagation to compute gradients. - update_weights: Update the weights using gradients. - update_learning_rate: Adjust the learning rate based on test accuracy. - accuracy: Compute accuracy of the model. - loss: Compute weighted MSE loss. - train: Train the MLP over multiple folds with early stopping. + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): + List to store training loss for each fold. + train_accuracy (List[float]): + List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): + List to store test accuracy for each fold. + dataloader (Dataloader): + DataLoader object for handling training data. + inter_variable (dict): + Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): + List of weights for each fold. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. - """ + """ def __init__( - self, dataloader, epoch: int, learning_rate: float, gamma=1, hidden_dim=2 - ): + self, + dataloader: Dataloader, + epoch: int, + learning_rate: float, + gamma: float = 1.0, + hidden_dim: int = 2, + ) -> None: self.learning_rate = learning_rate self.gamma = gamma # learning_rate decay hyperparameter gamma self.epoch = epoch @@ -188,23 +190,22 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]: """ Initialize weights using He initialization. - :return: Tuple of weights (W1, W2) for the network. + :return: Tuple of weights (w1, w2) for the network. >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] >>> y = [0, 1, 0, 0] >>> loader = Dataloader(X, y) >>> mlp = MLP(loader, 10, 0.1) - >>> W1, W2 = mlp.initialize() - >>> W1.shape + >>> w1, w2 = mlp.initialize() + >>> w1.shape (3, 2) - >>> W2.shape + >>> w2.shape (2, 3) """ in_dim, out_dim = self.dataloader.get_inout_dim() # in_dim here is image dim - w1 = np.random.randn(in_dim + 1, self.hidden_dim) * 0.01 # (in_dim, hidden) - - w2 = np.random.randn(self.hidden_dim, out_dim) * 0.01 # (hidden, output) + w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim) + w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt(2.0 / self.hidden_dim) return w1, w2 def relu(self, input_array: np.ndarray) -> np.ndarray: @@ -235,20 +236,23 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray: """ return (input_array > 0).astype(float) + def forward( - self, - input_data: np.ndarray, - W1: np.ndarray, - W2: np.ndarray, - no_gradient: bool = False, + self, + input_data: np.ndarray, + w1: np.ndarray, + w2: np.ndarray, + no_gradient: bool = False ) -> np.ndarray: """ Performs a forward pass through the neural network with one hidden layer. Args: input_data: Input data, shape (batch_size, input_dim). - W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). - W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + w1: Weight matrix for input to hidden layer, + shape (input_dim + 1, hidden_dim). + w2: Weight matrix for hidden to output layer, + shape (hidden_dim, output_dim). no_gradient: If True, returns output without storing intermediates. Returns: @@ -256,19 +260,22 @@ def forward( Examples: >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) - >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias - >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) - >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) - >>> output = mlp.forward(x, W1, W2) + >>> x = np.array([[1.0, 2.0, 1.0]]) + + >>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) + + >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) + + >>> output = mlp.forward(x, w1, w2) >>> output.shape (1, 2) """ - z1 = np.dot(input_data, W1) + z1 = np.dot(input_data, w1) a1 = self.relu(z1) # relu # hidden → output - z2 = np.dot(a1, W2) + z2 = np.dot(a1, w2) a2 = z2 if no_gradient: @@ -280,55 +287,55 @@ def forward( return a2 def back_prop( - self, - input_data: np.ndarray, - true_labels: np.ndarray, - W1: np.ndarray, - W2: np.ndarray, + self, + input_data: np.ndarray, + true_labels: np.ndarray, + w2: np.ndarray ) -> tuple[np.ndarray, np.ndarray]: """ Performs backpropagation to compute gradients for the weights. Args: - input_data: Input data, shape (batch_size, input_dim). - true_labels: True labels, shape (batch_size, output_dim). - W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). - W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + input_data: Input data, shape + (batch_size, input_dim). + true_labels: True labels, shape + (batch_size, output_dim). + w1: Weight matrix for input to + hidden layer, shape (input_dim + 1, hidden_dim). + w2: Weight matrix for hidden + to output layer, shape (hidden_dim, output_dim). Returns: - Tuple of gradients (grad_W1, grad_W2) for the weight matrices. + Tuple of gradients (grad_w1, grad_w2) for the weight matrices. Examples: >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) - >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias - >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2 - >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) - >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) - >>> _ = mlp.forward(x, W1, W2) # Run forward to set inter_variable - >>> grad_W1, grad_W2 = mlp.back_prop(x, y, W1, W2) - >>> grad_W1.shape + >>> x = np.array([[1.0, 2.0, 1.0]]) + >>> y = np.array([[0.0, 1.0]]) + >>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) + >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) + >>> _ = mlp.forward(x, w1, w2) + >>> grad_w1, grad_w2 = mlp.back_prop(x, y, w2) + >>> grad_w1.shape (3, 2) - >>> grad_W2.shape + >>> grad_w2.shape (2, 2) """ - a1 = self.inter_variable["a1"] # (batch_size, hidden_dim) + a1 = self.inter_variable["a1"] z1 = self.inter_variable["z1"] - a2 = self.inter_variable["a2"] # (batch_size, output_dim) - z2 = self.inter_variable["z2"] + a2 = self.inter_variable["a2"] batch_size = input_data.shape[0] # 1. output layer error delta_k = a2 - true_labels - delta_j = np.dot(delta_k, W2.T) * self.relu_derivative( + delta_j = np.dot(delta_k, w2.T) * self.relu_derivative( z1 ) # (batch, hidden_dim) 使用relu时 grad_w2 = ( np.dot(a1.T, delta_k) / batch_size ) # (hidden, batch).dot(batch, output) = (hidden, output) - input_data_flat = input_data.reshape( - input_data.shape[0], -1 - ) # (batch_size, input_dim) + input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim) grad_w1 = ( np.dot(input_data_flat.T, delta_j) / batch_size ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) @@ -336,39 +343,39 @@ def back_prop( return grad_w1, grad_w2 def update_weights( - self, - w1: np.ndarray, - w2: np.ndarray, - grad_w1: np.ndarray, - grad_w2: np.ndarray, - learning_rate: float, + self, + w1: np.ndarray, + w2: np.ndarray, + grad_w1: np.ndarray, + grad_w2: np.ndarray, + learning_rate: float ) -> tuple[np.ndarray, np.ndarray]: """ Updates the weight matrices using the computed gradients and learning rate. Args: - W1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). - W2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). - grad_W1: Gradient for W1, shape (input_dim + 1, hidden_dim). - grad_W2: Gradient for W2, shape (hidden_dim, output_dim). + w1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). + w2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + grad_w1: Gradient for w1, shape (input_dim + 1, hidden_dim). + grad_w2: Gradient for w2, shape (hidden_dim, output_dim). learning_rate: Learning rate for weight updates. Returns: - Updated weight matrices (W1, W2). + Updated weight matrices (w1, w2). Examples: >>> mlp = MLP(None, 1, 0.1) - >>> W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) # (input_dim=3, hidden_dim=2) - >>> W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) - >>> grad_W1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) - >>> grad_W2 = np.array([[0.7, 0.8], [0.9, 1.0]]) + >>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) + >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) + >>> grad_w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) + >>> grad_w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) >>> learning_rate = 0.1 - >>> new_W1, new_W2 = mlp.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) - >>> new_W1==np.array([[0.09, 0.18], [0.27, 0.36], [0.45, 0.54]]) + >>> new_w1, new_w2 = mlp.update_weights(w1, w2, grad_w1, grad_w2, learning_rate) + >>> new_w1==np.array([[0.09, 0.18], [0.27, 0.36], [0.45, 0.54]]) array([[ True, True], [ True, True], [ True, True]]) - >>> new_W2==np.array([[0.63, 0.72], [0.81, 0.90]]) + >>> new_w2==np.array([[0.63, 0.72], [0.81, 0.90]]) array([[ True, True], [ True, True]]) """ @@ -376,6 +383,7 @@ def update_weights( w2 -= learning_rate * grad_w2 return w1, w2 + def update_learning_rate(self, learning_rate: float) -> float: """ Updates the learning rate by applying the decay factor gamma. @@ -471,19 +479,18 @@ def train(self) -> None: """ learning_rate = self.learning_rate - train_data, train_labels, test_data, test_labels = ( - self.dataloader.get_train_test_data() - ) + train_data, train_labels, test_data, test_labels = self.dataloader.get_train_test_data() train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] + _, total_label_num = self.dataloader.get_inout_dim() train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) test_labels = self.dataloader.one_hot_encode(test_labels, total_label_num) - W1, W2 = self.initialize() + w1, w2 = self.initialize() train_accuracy_list, train_loss_list = [], [] test_accuracy_list, test_loss_list = [], [] @@ -492,20 +499,21 @@ def train(self) -> None: for j in tqdm(range(self.epoch)): for k in range(0, train_data.shape[0], batch_size): # retrieve every image - batch_imgs = train_data[k : k + batch_size] - batch_labels = train_labels[k : k + batch_size] - output = self.forward( - input_data=batch_imgs, W1=W1, W2=W2, no_gradient=False - ) + batch_imgs = train_data[k: k + batch_size] + batch_labels = train_labels[k: k + batch_size] + + output = self.forward(input_data=batch_imgs, w1=w1, w2=w2, no_gradient=False) - grad_W1, grad_W2 = self.back_prop( - input_data=batch_imgs, true_labels=batch_labels, W1=W1, W2=W2 + grad_w1, grad_w2 = self.back_prop( + input_data=batch_imgs, + true_labels=batch_labels, + w2=w2 ) - W1, W2 = self.update_weights(W1, W2, grad_W1, grad_W2, learning_rate) + w1, w2 = self.update_weights(w1, w2, grad_w1, grad_w2, learning_rate) - test_output = self.forward(test_data, W1, W2, no_gradient=True) + test_output = self.forward(test_data, w1, w2, no_gradient=True) test_accuracy = self.accuracy(test_labels, test_output) test_loss = self.loss(test_output, test_labels) @@ -516,7 +524,7 @@ def train(self) -> None: self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) + print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) if __name__ == "__main__": From 38ee6e2a255f58f45cedfd75c308c35eb849d035 Mon Sep 17 00:00:00 2001 From: WeiYFan <150578207+WeiYFan@users.noreply.github.com> Date: Wed, 14 May 2025 17:29:57 +0800 Subject: [PATCH 06/11] Update multilayer_perceptron_classifier_from_scratch.py --- ...ayer_perceptron_classifier_from_scratch.py | 66 ++++++++++++------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index 3281657ab207..f4218140147e 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -51,7 +51,9 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None: self.y = np.array(labels) self.class_weights = {0: 1.0, 1: 1.0} - def get_train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: + def get_train_test_data( + self + ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """ Splits the data into training and testing sets. Here, we manually split the data. @@ -63,13 +65,17 @@ def get_train_test_data(self) -> tuple[list[np.ndarray], list[np.ndarray], list[ - Test data - Test labels """ - train_data = np.array([self.X[0], self.X[1], self.X[2]]) # First 3 samples for training - train_labels = [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] + train_data = np.array([self.X[0], self.X[1], self.X[2]]) + train_labels = \ + [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels - def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[np.ndarray, int]]: + def shuffle_data( + self, + paired_data: list[tuple[np.ndarray, int]] + ) -> list[tuple[np.ndarray, int]]: """ Shuffles the data randomly. @@ -84,7 +90,8 @@ def shuffle_data(self, paired_data: list[tuple[np.ndarray, int]]) -> list[tuple[ return paired_data def get_inout_dim(self) -> tuple[int, int]: - train_data, train_labels, test_data, test_labels = self.get_train_test_data() + train_data, train_labels, test_data, test_labels = ( + self.get_train_test_data()) in_dim = train_data[0].shape[0] out_dim = len(train_labels) return in_dim, out_dim @@ -203,9 +210,11 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]: (2, 3) """ - in_dim, out_dim = self.dataloader.get_inout_dim() # in_dim here is image dim - w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim) - w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt(2.0 / self.hidden_dim) + in_dim, out_dim = self.dataloader.get_inout_dim() + w1 = (rng.standard_normal((in_dim + 1, self.hidden_dim)) + * np.sqrt(2.0 / in_dim)) + w2 = (rng.standard_normal((self.hidden_dim, out_dim)) + * np.sqrt(2.0 / self.hidden_dim)) return w1, w2 def relu(self, input_array: np.ndarray) -> np.ndarray: @@ -256,7 +265,8 @@ def forward( no_gradient: If True, returns output without storing intermediates. Returns: - Output of the network after forward pass, shape (batch_size, output_dim). + Output of the network after forward pass, + shape (batch_size, output_dim). Examples: >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) @@ -334,11 +344,11 @@ def back_prop( grad_w2 = ( np.dot(a1.T, delta_k) / batch_size - ) # (hidden, batch).dot(batch, output) = (hidden, output) - input_data_flat = input_data.reshape(input_data.shape[0], -1) # (batch_size, input_dim) + ) + input_data_flat = input_data.reshape(input_data.shape[0], -1) grad_w1 = ( np.dot(input_data_flat.T, delta_j) / batch_size - ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) + ) return grad_w1, grad_w2 @@ -351,11 +361,14 @@ def update_weights( learning_rate: float ) -> tuple[np.ndarray, np.ndarray]: """ - Updates the weight matrices using the computed gradients and learning rate. + Updates the weight matrices using + the computed gradients and learning rate. Args: - w1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). - w2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). + w1: Weight matrix for input to hidden layer, shape + (input_dim + 1, hidden_dim). + w2: Weight matrix for hidden to output layer, shape + (hidden_dim, output_dim). grad_w1: Gradient for w1, shape (input_dim + 1, hidden_dim). grad_w2: Gradient for w2, shape (hidden_dim, output_dim). learning_rate: Learning rate for weight updates. @@ -405,7 +418,8 @@ def update_learning_rate(self, learning_rate: float) -> float: @staticmethod def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: """ - Computes the accuracy of predictions by comparing predicted and true labels. + Computes the accuracy of predictions + by comparing predicted and true labels. Args: label: True labels, shape (batch_size, num_classes). @@ -426,7 +440,8 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: @staticmethod def loss(output: np.ndarray, label: np.ndarray) -> float: """ - Computes the mean squared error loss between predictions and true labels. + Computes the mean squared error loss + between predictions and true labels. Args: output: Predicted outputs, shape (batch_size, num_classes). @@ -465,9 +480,11 @@ def get_acc_loss(self) -> tuple[list[float], list[float]]: def train(self) -> None: """ - Trains the MLP model using the provided dataloader for multiple folds and epochs. + Trains the MLP model using the provided dataloader + for multiple folds and epochs. - Saves the best model parameters for each fold and records accuracy/loss. + Saves the best model parameters + for each fold and records accuracy/loss. Examples: >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] @@ -479,7 +496,8 @@ def train(self) -> None: """ learning_rate = self.learning_rate - train_data, train_labels, test_data, test_labels = self.dataloader.get_train_test_data() + train_data, train_labels, test_data, test_labels = ( + self.dataloader.get_train_test_data()) train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] @@ -498,12 +516,16 @@ def train(self) -> None: batch_size = 1 for j in tqdm(range(self.epoch)): - for k in range(0, train_data.shape[0], batch_size): # retrieve every image + for k in range(0, train_data.shape[0], batch_size): batch_imgs = train_data[k: k + batch_size] batch_labels = train_labels[k: k + batch_size] - output = self.forward(input_data=batch_imgs, w1=w1, w2=w2, no_gradient=False) + output = self.forward( + input_data=batch_imgs, + w1=w1, + w2=w2, + no_gradient=False) grad_w1, grad_w2 = self.back_prop( input_data=batch_imgs, From 0cb6734b7b529de1e094efced8e893c80b82260a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 09:32:15 +0000 Subject: [PATCH 07/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...ayer_perceptron_classifier_from_scratch.py | 190 +++++++++--------- 1 file changed, 90 insertions(+), 100 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index f4218140147e..a822d63de9db 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,7 +1,10 @@ import numpy as np from numpy.random import default_rng from tqdm import tqdm + rng = default_rng(42) + + class Dataloader: """ DataLoader class for handling dataset operations. Supports: @@ -52,7 +55,7 @@ def __init__(self, features: list[list[float]], labels: list[int]) -> None: self.class_weights = {0: 1.0, 1: 1.0} def get_train_test_data( - self + self, ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """ Splits the data into training and testing sets. @@ -65,16 +68,18 @@ def get_train_test_data( - Test data - Test labels """ - train_data = np.array([self.X[0], self.X[1], self.X[2]]) - train_labels = \ - [np.array([self.y[0]]), np.array([self.y[1]]), np.array([self.y[2]])] + train_data = np.array([self.X[0], self.X[1], self.X[2]]) + train_labels = [ + np.array([self.y[0]]), + np.array([self.y[1]]), + np.array([self.y[2]]), + ] test_data = np.array([self.X[3]]) # Last sample for testing test_labels = [np.array([self.y[3]])] # Labels as np.ndarray return train_data, train_labels, test_data, test_labels def shuffle_data( - self, - paired_data: list[tuple[np.ndarray, int]] + self, paired_data: list[tuple[np.ndarray, int]] ) -> list[tuple[np.ndarray, int]]: """ Shuffles the data randomly. @@ -90,8 +95,7 @@ def shuffle_data( return paired_data def get_inout_dim(self) -> tuple[int, int]: - train_data, train_labels, test_data, test_labels = ( - self.get_train_test_data()) + train_data, train_labels, test_data, test_labels = self.get_train_test_data() in_dim = train_data[0].shape[0] out_dim = len(train_labels) return in_dim, out_dim @@ -114,53 +118,53 @@ def one_hot_encode(labels: list[int], num_classes: int) -> np.ndarray: return one_hot -class MLP(): +class MLP: """ - A custom MLP class for implementing a simple multi-layer perceptron with - forward propagation, backpropagation. - - Attributes: - learning_rate (float): Learning rate for gradient descent. - gamma (float): Parameter to control learning rate adjustment. - epoch (int): Number of epochs for training. - hidden_dim (int): Dimension of the hidden layer. - batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): - List to store training loss for each fold. - train_accuracy (List[float]): - List to store training accuracy for each fold. - test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): - List to store test accuracy for each fold. - dataloader (Dataloader): - DataLoader object for handling training data. - inter_variable (dict): - Dictionary to store intermediate variables for backpropagation. - weights1_list (List[Tuple[np.ndarray, np.ndarray]]): - List of weights for each fold. - - Methods: - get_inout_dim:obtain input dimension and output dimension. - relu: Apply the ReLU activation function. - relu_derivative: Compute the derivative of the ReLU function. - forward: Perform a forward pass through the network. - back_prop: Perform backpropagation to compute gradients. - update_weights: Update the weights using gradients. - update_learning_rate: Adjust the learning rate based on test accuracy. - accuracy: Compute accuracy of the model. - loss: Compute weighted MSE loss. - train: Train the MLP over multiple folds with early stopping. + A custom MLP class for implementing a simple multi-layer perceptron with + forward propagation, backpropagation. + + Attributes: + learning_rate (float): Learning rate for gradient descent. + gamma (float): Parameter to control learning rate adjustment. + epoch (int): Number of epochs for training. + hidden_dim (int): Dimension of the hidden layer. + batch_size (int): Number of samples per mini-batch. + train_loss (List[float]): + List to store training loss for each fold. + train_accuracy (List[float]): + List to store training accuracy for each fold. + test_loss (List[float]): List to store test loss for each fold. + test_accuracy (List[float]): + List to store test accuracy for each fold. + dataloader (Dataloader): + DataLoader object for handling training data. + inter_variable (dict): + Dictionary to store intermediate variables for backpropagation. + weights1_list (List[Tuple[np.ndarray, np.ndarray]]): + List of weights for each fold. + + Methods: + get_inout_dim:obtain input dimension and output dimension. + relu: Apply the ReLU activation function. + relu_derivative: Compute the derivative of the ReLU function. + forward: Perform a forward pass through the network. + back_prop: Perform backpropagation to compute gradients. + update_weights: Update the weights using gradients. + update_learning_rate: Adjust the learning rate based on test accuracy. + accuracy: Compute accuracy of the model. + loss: Compute weighted MSE loss. + train: Train the MLP over multiple folds with early stopping. - """ + """ def __init__( - self, - dataloader: Dataloader, - epoch: int, - learning_rate: float, - gamma: float = 1.0, - hidden_dim: int = 2, + self, + dataloader: Dataloader, + epoch: int, + learning_rate: float, + gamma: float = 1.0, + hidden_dim: int = 2, ) -> None: self.learning_rate = learning_rate self.gamma = gamma # learning_rate decay hyperparameter gamma @@ -211,10 +215,10 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]: """ in_dim, out_dim = self.dataloader.get_inout_dim() - w1 = (rng.standard_normal((in_dim + 1, self.hidden_dim)) - * np.sqrt(2.0 / in_dim)) - w2 = (rng.standard_normal((self.hidden_dim, out_dim)) - * np.sqrt(2.0 / self.hidden_dim)) + w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim) + w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt( + 2.0 / self.hidden_dim + ) return w1, w2 def relu(self, input_array: np.ndarray) -> np.ndarray: @@ -245,13 +249,12 @@ def relu_derivative(self, input_array: np.ndarray) -> np.ndarray: """ return (input_array > 0).astype(float) - def forward( - self, - input_data: np.ndarray, - w1: np.ndarray, - w2: np.ndarray, - no_gradient: bool = False + self, + input_data: np.ndarray, + w1: np.ndarray, + w2: np.ndarray, + no_gradient: bool = False, ) -> np.ndarray: """ Performs a forward pass through the neural network with one hidden layer. @@ -265,7 +268,7 @@ def forward( no_gradient: If True, returns output without storing intermediates. Returns: - Output of the network after forward pass, + Output of the network after forward pass, shape (batch_size, output_dim). Examples: @@ -297,10 +300,7 @@ def forward( return a2 def back_prop( - self, - input_data: np.ndarray, - true_labels: np.ndarray, - w2: np.ndarray + self, input_data: np.ndarray, true_labels: np.ndarray, w2: np.ndarray ) -> tuple[np.ndarray, np.ndarray]: """ Performs backpropagation to compute gradients for the weights. @@ -342,32 +342,28 @@ def back_prop( z1 ) # (batch, hidden_dim) 使用relu时 - grad_w2 = ( - np.dot(a1.T, delta_k) / batch_size - ) - input_data_flat = input_data.reshape(input_data.shape[0], -1) - grad_w1 = ( - np.dot(input_data_flat.T, delta_j) / batch_size - ) + grad_w2 = np.dot(a1.T, delta_k) / batch_size + input_data_flat = input_data.reshape(input_data.shape[0], -1) + grad_w1 = np.dot(input_data_flat.T, delta_j) / batch_size return grad_w1, grad_w2 def update_weights( - self, - w1: np.ndarray, - w2: np.ndarray, - grad_w1: np.ndarray, - grad_w2: np.ndarray, - learning_rate: float + self, + w1: np.ndarray, + w2: np.ndarray, + grad_w1: np.ndarray, + grad_w2: np.ndarray, + learning_rate: float, ) -> tuple[np.ndarray, np.ndarray]: """ - Updates the weight matrices using + Updates the weight matrices using the computed gradients and learning rate. Args: - w1: Weight matrix for input to hidden layer, shape + w1: Weight matrix for input to hidden layer, shape (input_dim + 1, hidden_dim). - w2: Weight matrix for hidden to output layer, shape + w2: Weight matrix for hidden to output layer, shape (hidden_dim, output_dim). grad_w1: Gradient for w1, shape (input_dim + 1, hidden_dim). grad_w2: Gradient for w2, shape (hidden_dim, output_dim). @@ -396,7 +392,6 @@ def update_weights( w2 -= learning_rate * grad_w2 return w1, w2 - def update_learning_rate(self, learning_rate: float) -> float: """ Updates the learning rate by applying the decay factor gamma. @@ -418,7 +413,7 @@ def update_learning_rate(self, learning_rate: float) -> float: @staticmethod def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: """ - Computes the accuracy of predictions + Computes the accuracy of predictions by comparing predicted and true labels. Args: @@ -440,7 +435,7 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: @staticmethod def loss(output: np.ndarray, label: np.ndarray) -> float: """ - Computes the mean squared error loss + Computes the mean squared error loss between predictions and true labels. Args: @@ -480,10 +475,10 @@ def get_acc_loss(self) -> tuple[list[float], list[float]]: def train(self) -> None: """ - Trains the MLP model using the provided dataloader + Trains the MLP model using the provided dataloader for multiple folds and epochs. - Saves the best model parameters + Saves the best model parameters for each fold and records accuracy/loss. Examples: @@ -497,12 +492,12 @@ def train(self) -> None: learning_rate = self.learning_rate train_data, train_labels, test_data, test_labels = ( - self.dataloader.get_train_test_data()) + self.dataloader.get_train_test_data() + ) train_data = np.c_[train_data, np.ones(train_data.shape[0])] test_data = np.c_[test_data, np.ones(test_data.shape[0])] - _, total_label_num = self.dataloader.get_inout_dim() train_labels = self.dataloader.one_hot_encode(train_labels, total_label_num) @@ -516,21 +511,16 @@ def train(self) -> None: batch_size = 1 for j in tqdm(range(self.epoch)): - for k in range(0, train_data.shape[0], batch_size): - - batch_imgs = train_data[k: k + batch_size] - batch_labels = train_labels[k: k + batch_size] + for k in range(0, train_data.shape[0], batch_size): + batch_imgs = train_data[k : k + batch_size] + batch_labels = train_labels[k : k + batch_size] output = self.forward( - input_data=batch_imgs, - w1=w1, - w2=w2, - no_gradient=False) + input_data=batch_imgs, w1=w1, w2=w2, no_gradient=False + ) grad_w1, grad_w2 = self.back_prop( - input_data=batch_imgs, - true_labels=batch_labels, - w2=w2 + input_data=batch_imgs, true_labels=batch_labels, w2=w2 ) w1, w2 = self.update_weights(w1, w2, grad_w1, grad_w2, learning_rate) @@ -546,7 +536,7 @@ def train(self) -> None: self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list)/len(test_accuracy_list)) + print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) if __name__ == "__main__": From e0ae20f505154dea0e97e8c07228edf40dbfac86 Mon Sep 17 00:00:00 2001 From: WeiYFan <150578207+WeiYFan@users.noreply.github.com> Date: Wed, 14 May 2025 18:52:10 +0800 Subject: [PATCH 08/11] Update multilayer_perceptron_classifier_from_scratch.py --- ...ayer_perceptron_classifier_from_scratch.py | 180 ++++++++---------- 1 file changed, 75 insertions(+), 105 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index a822d63de9db..72a55d31b477 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,30 +1,17 @@ import numpy as np from numpy.random import default_rng -from tqdm import tqdm - rng = default_rng(42) - - class Dataloader: """ - DataLoader class for handling dataset operations. Supports: - - data shuffling - - one-hot encoding - - train/test splitting + DataLoader class for handling dataset, including data shuffling, + one-hot encoding, and train-test splitting. Example usage: >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] >>> y = [0, 1, 0, 0] >>> loader = Dataloader(X, y) - >>> train_X, train_y, test_X, test_y = loader.get_train_test_data() - >>> train_X.shape - (3, 2) - >>> len(train_y) - 3 - >>> test_X.shape - (1, 2) - >>> len(test_y) - 1 + >>> len(loader.get_train_test_data()) # Returns train and test data + 4 >>> loader.one_hot_encode([0, 1, 0], 2) # Returns one-hot encoded labels array([[0.99, 0. ], [0. , 0.99], @@ -38,21 +25,17 @@ class Dataloader: def __init__(self, features: list[list[float]], labels: list[int]) -> None: """ - Initializes the Dataloader instance - with a feature matrix (`features`) - and corresponding labels (`labels`). + Initializes the Dataloader instance with feature matrix + features and labels labels. Args: - features: Feature matrix of shape - (n_samples, n_features). - labels: List of labels of shape - (n_samples,). + features: Feature matrix of shape (n_samples, n_features). + labels: List of labels of shape (n_samples,). """ # random seed - self.rng = default_rng(42) self.X = np.array(features) self.y = np.array(labels) - self.class_weights = {0: 1.0, 1: 1.0} + self.class_weights = {0: 1.0, 1: 1.0} # Example class weights, adjust as needed def get_train_test_data( self, @@ -74,8 +57,8 @@ def get_train_test_data( np.array([self.y[1]]), np.array([self.y[2]]), ] - test_data = np.array([self.X[3]]) # Last sample for testing - test_labels = [np.array([self.y[3]])] # Labels as np.ndarray + test_data = np.array([self.X[3]]) + test_labels = [np.array([self.y[3]])] return train_data, train_labels, test_data, test_labels def shuffle_data( @@ -85,13 +68,11 @@ def shuffle_data( Shuffles the data randomly. Args: - paired_data: List of tuples containing data - and corresponding labels. + paired_data: List of tuples containing data and corresponding labels. Returns: A shuffled list of data-label pairs. """ - default_rng.shuffle(paired_data) # Using the new random number generator return paired_data def get_inout_dim(self) -> tuple[int, int]: @@ -129,19 +110,15 @@ class MLP: epoch (int): Number of epochs for training. hidden_dim (int): Dimension of the hidden layer. batch_size (int): Number of samples per mini-batch. - train_loss (List[float]): - List to store training loss for each fold. - train_accuracy (List[float]): - List to store training accuracy for each fold. + train_loss (List[float]): List to store training loss for each fold. + train_accuracy (List[float]): List to store training accuracy for each fold. test_loss (List[float]): List to store test loss for each fold. - test_accuracy (List[float]): - List to store test accuracy for each fold. - dataloader (Dataloader): - DataLoader object for handling training data. - inter_variable (dict): - Dictionary to store intermediate variables for backpropagation. + test_accuracy (List[float]): List to store test accuracy for each fold. + dataloader (Dataloader): DataLoader object for handling training data. + inter_variable (dict): Dictionary to store intermediate variables + for backpropagation. weights1_list (List[Tuple[np.ndarray, np.ndarray]]): - List of weights for each fold. + List of weights for each fold. Methods: get_inout_dim:obtain input dimension and output dimension. @@ -159,26 +136,26 @@ class MLP: """ def __init__( - self, - dataloader: Dataloader, - epoch: int, - learning_rate: float, - gamma: float = 1.0, - hidden_dim: int = 2, + self, + dataloader: Dataloader, + epoch: int, + learning_rate: float, + gamma: float = 1.0, + hidden_dim: int = 2, ) -> None: self.learning_rate = learning_rate self.gamma = gamma # learning_rate decay hyperparameter gamma self.epoch = epoch self.hidden_dim = hidden_dim - self.train_loss = [] - self.train_accuracy = [] - self.test_loss = [] - self.test_accuracy = [] + self.train_loss: list[float] = [] + self.train_accuracy: list[float] = [] + self.test_loss: list[float] = [] + self.test_accuracy: list[float] = [] self.dataloader = dataloader - self.inter_variable = {} - self.weights1_list = [] + self.inter_variable: dict[str, np.ndarray] = {} + self.weights1_list: list[np.ndarray] = [] def get_inout_dim(self) -> tuple[int, int]: """ @@ -215,7 +192,8 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]: """ in_dim, out_dim = self.dataloader.get_inout_dim() - w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim) + w1 = (rng.standard_normal((in_dim + 1, self.hidden_dim)) * + np.sqrt(2.0 / in_dim)) w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt( 2.0 / self.hidden_dim ) @@ -262,23 +240,19 @@ def forward( Args: input_data: Input data, shape (batch_size, input_dim). w1: Weight matrix for input to hidden layer, - shape (input_dim + 1, hidden_dim). + shape (input_dim + 1, hidden_dim). w2: Weight matrix for hidden to output layer, - shape (hidden_dim, output_dim). + shape (hidden_dim, output_dim). no_gradient: If True, returns output without storing intermediates. Returns: - Output of the network after forward pass, - shape (batch_size, output_dim). + Output of the network after forward pass, shape (batch_size, output_dim). Examples: >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) - >>> x = np.array([[1.0, 2.0, 1.0]]) - + >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias >>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) - >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) - >>> output = mlp.forward(x, w1, w2) >>> output.shape (1, 2) @@ -306,33 +280,29 @@ def back_prop( Performs backpropagation to compute gradients for the weights. Args: - input_data: Input data, shape - (batch_size, input_dim). - true_labels: True labels, shape - (batch_size, output_dim). - w1: Weight matrix for input to - hidden layer, shape (input_dim + 1, hidden_dim). - w2: Weight matrix for hidden - to output layer, shape (hidden_dim, output_dim). + input_data: Input data, shape (batch_size, input_dim). + true_labels: True labels, shape (batch_size, output_dim). + w2: Weight matrix for hidden to output layer, + shape (hidden_dim, output_dim). Returns: Tuple of gradients (grad_w1, grad_w2) for the weight matrices. Examples: >>> mlp = MLP(None, 1, 0.1, hidden_dim=2) - >>> x = np.array([[1.0, 2.0, 1.0]]) - >>> y = np.array([[0.0, 1.0]]) + >>> x = np.array([[1.0, 2.0, 1.0]]) # batch_size=1, input_dim=2 + bias + >>> y = np.array([[0.0, 1.0]]) # batch_size=1, output_dim=2 >>> w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) - >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) - >>> _ = mlp.forward(x, w1, w2) + >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) # (hidden_dim=2, output_dim=2) + >>> _ = mlp.forward(x, w1, w2) # Run forward to set inter_variable >>> grad_w1, grad_w2 = mlp.back_prop(x, y, w2) >>> grad_w1.shape (3, 2) >>> grad_w2.shape (2, 2) """ - a1 = self.inter_variable["a1"] + a1 = self.inter_variable["a1"] # (batch_size, hidden_dim) z1 = self.inter_variable["z1"] - a2 = self.inter_variable["a2"] + a2 = self.inter_variable["a2"] # (batch_size, output_dim) batch_size = input_data.shape[0] @@ -342,9 +312,13 @@ def back_prop( z1 ) # (batch, hidden_dim) 使用relu时 - grad_w2 = np.dot(a1.T, delta_k) / batch_size + grad_w2 = ( + np.dot(a1.T, delta_k) / batch_size + ) # (hidden, batch).dot(batch, output) = (hidden, output) input_data_flat = input_data.reshape(input_data.shape[0], -1) - grad_w1 = np.dot(input_data_flat.T, delta_j) / batch_size + grad_w1 = ( + np.dot(input_data_flat.T, delta_j) / batch_size + ) # (input_dim, batch_size).dot(batch, hidden) = (input, hidden) return grad_w1, grad_w2 @@ -357,16 +331,17 @@ def update_weights( learning_rate: float, ) -> tuple[np.ndarray, np.ndarray]: """ - Updates the weight matrices using - the computed gradients and learning rate. + Updates the weight matrices using the computed gradients and learning rate. Args: - w1: Weight matrix for input to hidden layer, shape - (input_dim + 1, hidden_dim). - w2: Weight matrix for hidden to output layer, shape - (hidden_dim, output_dim). - grad_w1: Gradient for w1, shape (input_dim + 1, hidden_dim). - grad_w2: Gradient for w2, shape (hidden_dim, output_dim). + w1: Weight matrix for input to hidden layer, + shape (input_dim + 1, hidden_dim). + w2: Weight matrix for hidden to output layer, + shape (hidden_dim, output_dim). + grad_w1: Gradient for w1, + shape (input_dim + 1, hidden_dim). + grad_w2: Gradient for w2, + shape (hidden_dim, output_dim). learning_rate: Learning rate for weight updates. Returns: @@ -378,8 +353,8 @@ def update_weights( >>> w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) >>> grad_w1 = np.array([[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]]) >>> grad_w2 = np.array([[0.7, 0.8], [0.9, 1.0]]) - >>> learning_rate = 0.1 - >>> new_w1, new_w2 = mlp.update_weights(w1, w2, grad_w1, grad_w2, learning_rate) + >>> lr = 0.1 + >>> new_w1, new_w2 = mlp.update_weights(w1, w2, grad_w1, grad_w2, lr) >>> new_w1==np.array([[0.09, 0.18], [0.27, 0.36], [0.45, 0.54]]) array([[ True, True], [ True, True], @@ -413,8 +388,7 @@ def update_learning_rate(self, learning_rate: float) -> float: @staticmethod def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: """ - Computes the accuracy of predictions - by comparing predicted and true labels. + Computes the accuracy of predictions by comparing predicted and true labels. Args: label: True labels, shape (batch_size, num_classes). @@ -435,8 +409,7 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: @staticmethod def loss(output: np.ndarray, label: np.ndarray) -> float: """ - Computes the mean squared error loss - between predictions and true labels. + Computes the mean squared error loss between predictions and true labels. Args: output: Predicted outputs, shape (batch_size, num_classes). @@ -476,17 +449,16 @@ def get_acc_loss(self) -> tuple[list[float], list[float]]: def train(self) -> None: """ Trains the MLP model using the provided dataloader - for multiple folds and epochs. + for multiple folds and epochs. - Saves the best model parameters - for each fold and records accuracy/loss. + Saves the best model parameters for each fold and records accuracy/loss. Examples: >>> X = [[0.0, 0.0], [1.0, 1.0], [1.0, 0.0], [0.0, 1.0]] >>> y = [0, 1, 0, 0] >>> loader = Dataloader(X, y) >>> mlp = MLP(loader, epoch=2, learning_rate=0.1, hidden_dim=2) - >>> mlp.train() #doctest:+ELLIPSIS + >>> mlp.train() # doctest: +ELLIPSIS Test accuracy: ... """ @@ -505,19 +477,17 @@ def train(self) -> None: w1, w2 = self.initialize() - train_accuracy_list, train_loss_list = [], [] - test_accuracy_list, test_loss_list = [], [] + test_accuracy_list: list[float] = [] + test_loss_list: list[float] = [] batch_size = 1 - for j in tqdm(range(self.epoch)): - for k in range(0, train_data.shape[0], batch_size): + for _j in range(self.epoch): + for k in range(0, train_data.shape[0], batch_size): # retrieve every image batch_imgs = train_data[k : k + batch_size] batch_labels = train_labels[k : k + batch_size] - output = self.forward( - input_data=batch_imgs, w1=w1, w2=w2, no_gradient=False - ) + self.forward(input_data=batch_imgs, w1=w1, w2=w2, no_gradient=False) grad_w1, grad_w2 = self.back_prop( input_data=batch_imgs, true_labels=batch_labels, w2=w2 @@ -536,7 +506,7 @@ def train(self) -> None: self.test_accuracy = test_accuracy_list self.test_loss = test_loss_list - print(f"Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) + print("Test accuracy:", sum(test_accuracy_list) / len(test_accuracy_list)) if __name__ == "__main__": From 8988727767f4e32fbc7dcfd3a8005db2227051d2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 10:53:27 +0000 Subject: [PATCH 09/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...layer_perceptron_classifier_from_scratch.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index 72a55d31b477..45bb9588cd0d 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,6 +1,9 @@ import numpy as np from numpy.random import default_rng + rng = default_rng(42) + + class Dataloader: """ DataLoader class for handling dataset, including data shuffling, @@ -136,12 +139,12 @@ class MLP: """ def __init__( - self, - dataloader: Dataloader, - epoch: int, - learning_rate: float, - gamma: float = 1.0, - hidden_dim: int = 2, + self, + dataloader: Dataloader, + epoch: int, + learning_rate: float, + gamma: float = 1.0, + hidden_dim: int = 2, ) -> None: self.learning_rate = learning_rate self.gamma = gamma # learning_rate decay hyperparameter gamma @@ -192,8 +195,7 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]: """ in_dim, out_dim = self.dataloader.get_inout_dim() - w1 = (rng.standard_normal((in_dim + 1, self.hidden_dim)) * - np.sqrt(2.0 / in_dim)) + w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim) w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt( 2.0 / self.hidden_dim ) From 644ba59de5817d7c117f2cfa035ef7cd200adfb7 Mon Sep 17 00:00:00 2001 From: WeiYFan <150578207+WeiYFan@users.noreply.github.com> Date: Wed, 14 May 2025 19:02:39 +0800 Subject: [PATCH 10/11] Update multilayer_perceptron_classifier_from_scratch.py --- ...ayer_perceptron_classifier_from_scratch.py | 22 +++++++++---------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index 45bb9588cd0d..bef3b18f6234 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,9 +1,6 @@ import numpy as np from numpy.random import default_rng - rng = default_rng(42) - - class Dataloader: """ DataLoader class for handling dataset, including data shuffling, @@ -139,12 +136,12 @@ class MLP: """ def __init__( - self, - dataloader: Dataloader, - epoch: int, - learning_rate: float, - gamma: float = 1.0, - hidden_dim: int = 2, + self, + dataloader: Dataloader, + epoch: int, + learning_rate: float, + gamma: float = 1.0, + hidden_dim: int = 2, ) -> None: self.learning_rate = learning_rate self.gamma = gamma # learning_rate decay hyperparameter gamma @@ -195,7 +192,8 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]: """ in_dim, out_dim = self.dataloader.get_inout_dim() - w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim) + w1 = (rng.standard_normal((in_dim + 1, self.hidden_dim)) * + np.sqrt(2.0 / in_dim)) w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt( 2.0 / self.hidden_dim ) @@ -404,7 +402,7 @@ def accuracy(label: np.ndarray, y_hat: np.ndarray) -> float: >>> label = np.array([[1, 0], [0, 1], [1, 0]]) >>> y_hat = np.array([[0.9, 0.1], [0.2, 0.8], [0.6, 0.4]]) >>> mlp.accuracy(label, y_hat) - 1.0 + np.float64(1.0) """ return (y_hat.argmax(axis=1) == label.argmax(axis=1)).mean() @@ -425,7 +423,7 @@ def loss(output: np.ndarray, label: np.ndarray) -> float: >>> output = np.array([[0.9, 0.1], [0.2, 0.8]]) >>> label = np.array([[1.0, 0.0], [0.0, 1.0]]) >>> round(mlp.loss(output, label), 3) - 0.025 + np.float64(0.025) """ return np.sum((output - label) ** 2) / (2 * label.shape[0]) From 1c822a859ee22277356d58b961b04096caf909eb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 14 May 2025 11:04:10 +0000 Subject: [PATCH 11/11] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ...layer_perceptron_classifier_from_scratch.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/machine_learning/multilayer_perceptron_classifier_from_scratch.py b/machine_learning/multilayer_perceptron_classifier_from_scratch.py index bef3b18f6234..e4cdec35d234 100644 --- a/machine_learning/multilayer_perceptron_classifier_from_scratch.py +++ b/machine_learning/multilayer_perceptron_classifier_from_scratch.py @@ -1,6 +1,9 @@ import numpy as np from numpy.random import default_rng + rng = default_rng(42) + + class Dataloader: """ DataLoader class for handling dataset, including data shuffling, @@ -136,12 +139,12 @@ class MLP: """ def __init__( - self, - dataloader: Dataloader, - epoch: int, - learning_rate: float, - gamma: float = 1.0, - hidden_dim: int = 2, + self, + dataloader: Dataloader, + epoch: int, + learning_rate: float, + gamma: float = 1.0, + hidden_dim: int = 2, ) -> None: self.learning_rate = learning_rate self.gamma = gamma # learning_rate decay hyperparameter gamma @@ -192,8 +195,7 @@ def initialize(self) -> tuple[np.ndarray, np.ndarray]: """ in_dim, out_dim = self.dataloader.get_inout_dim() - w1 = (rng.standard_normal((in_dim + 1, self.hidden_dim)) * - np.sqrt(2.0 / in_dim)) + w1 = rng.standard_normal((in_dim + 1, self.hidden_dim)) * np.sqrt(2.0 / in_dim) w2 = rng.standard_normal((self.hidden_dim, out_dim)) * np.sqrt( 2.0 / self.hidden_dim )