Solution Review: Train the XOR Multilayer Perceptron
Explore the process of training a multilayer perceptron to solve the XOR problem. Learn to implement forward propagation, calculate loss, apply backpropagation to compute gradients, and update parameters effectively. This lesson helps you understand key steps in neural network training using Python and NumPy.
We'll cover the following...
We'll cover the following...
Solution
...Python 3.5
import numpy as npfrom matplotlib import pyplot as pltdef sigmoid(z):"""sigmoid activation function on input z"""return 1 / (1 + np.exp(-z)) # defines the sigmoid activation functiondef forward_propagation(X, Y, W1, b1, W2, b2):"""Computes the forward propagation operation of a neural network andreturns the output after applying the sigmoid activation function"""net_h = np.dot(W1, X) + b1 # net output at the hidden layerout_h = sigmoid(net_h) # actual after applying sigmoidnet_y = np.dot(W2, out_h) + b2 # net output at the output layerout_y = sigmoid(net_y) # actual output at the output layerreturn out_h, out_ydef calculate_error(y, y_predicted):"""Computes cross entropy error"""loss = np.sum(- y * np.log(y_predicted) - (1 - y) * np.log(1 - y_predicted))return lossdef backward_propagation(X, Y, out_h, out_y, W2):"""Computes the backpropagation operation of a neural network andreturns the derivative of weights and biases"""l2_error = out_y - Y # actual - targetdW2 = np.dot(l2_error, out_h.T) # derivative of layer 2 weights is the dot product of error at layer 2 and hidden layer outputdb2 = np.sum(l2_error, axis = 1, keepdims=True) # derivative of layer 2 bias is simply the error at layer 2dh = np.dot(W2.T, l2_error) # compute dot product of weights in layer 2 with error at layer 2l1_error = np.multiply(dh, out_h * (1 - out_h)) # compute layer 1 errordW1 = np.dot(l1_error, X.T) # derivative of layer 2 weights is the dot product of error at layer 1 and inputdb1 = np.sum(l1_error, axis=1, keepdims=True) # derivative of layer 1 bias is simply the error at layer 1return dW1, db1, dW2, db2 # return the derivatives of parametersdef update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate):"""Updates weights and biases and returns thir values"""W1 = W1 - learning_rate * dW1W2 = W2 - learning_rate * dW2b1 = b1 - learning_rate * db1b2 = b2 - learning_rate * db2return W1, b1, W2, b2def train(X, Y, W1, b1, W2, b2, num_iterations, losses, learning_rate):"""Trains the neural network and returns updated weights, bias and loss"""for i in range(num_iterations):A1, A2 = forward_propagation(X, Y, W1, b1, W2, b2)losses[i, 0] = calculate_error(Y, A2)dW1, db1, dW2, db2 = backward_propagation(X, Y, A1, A2, W2)W1, b1, W2, b2 = update_parameters(W1, b1, W2, b2, dW1, db1, dW2, db2, learning_rate)return W1, b1, W2, b2, lossesnp.random.seed(42) # seed function to generate the same random value# Initializing parametersX = np.array([[0, 0, 1, 1], [0, 1, 0, 1]])Y = np.array([[0, 1, 1, 0]]) # XORn_h = 2n_x = X.shape[0]n_y = Y.shape[0]W1 = np.random.randn(n_h, n_x)b1 = np.zeros((n_h, 1))W2 = np.random.randn(n_y, n_h)b2 = np.zeros((n_y, 1))num_iterations = 100000learning_rate = 0.01losses = np.zeros((num_iterations, 1))W1, b1, W2, b2, losses = train(X, Y, W1, b1, W2, b2, num_iterations, losses, learning_rate)print("After training:\n")print("W1:\n", W1)print("b1:\n", b1)print("W2:\n", W2)print("b2:\n", b2)print("losses:\n", losses)# Evaluating the performanceplt.figure()plt.plot(losses)plt.xlabel("EPOCHS")plt.ylabel("Loss value")plt.show()plt.savefig('output/legend.png')# Predicting valueA1, A2 = forward_propagation(X, Y, W1, b1, W2, b2)pred = (A2 > 0.5) * 1.0print("Predicted labels:", pred)
Explanation
...