Solution Review: Training - 3 Layered Neural Network
Explore how to train a three-layer neural network for letter classification. Understand the workflow of forward propagation, error calculation, backward propagation, and parameter updates to improve model accuracy during training.
We'll cover the following...
We'll cover the following...
Solution
Python 3.5
import numpy as npimport matplotlib.pyplot as pltdef sigmoid(z):"""Compute sigmoid values for each sets of scores in x"""return 1 / (1 + np.exp(-z))def softmax(x):"""Compute softmax values for each sets of scores in x"""return np.exp(x) / np.sum(np.exp(x), axis=1)def forward_propagation(x, w1, w2, w3, b1, b2, b3):"""Computes the forward propagation operation for the 3-layeredneural network and returns the output at the 2 hidden layersand the output layer"""net_h1 = np.dot(x, w1) + b1 # net output at the first hidden layerout_h1 = sigmoid(net_h1) # applying the sigmoid activation to the first hidden layer net outputnet_h2 = np.dot(out_h1, w2) + b2 # net output at the second hidden layerout_h2 = sigmoid(net_h2) # applying the sigmoid activation to the second hidden layer net outputnet_y = np.dot(out_h2, w3) + b3 # net output of the output layerout_y = softmax(net_y) # applying the softmax activation to the net output of output layerreturn out_h1, out_h2, out_ydef backpropagation(y, out_y, out_h2, out_h1, w3, w2, x):"""Computes the backpropagation operation for the3-layered neural network and returns the gradientsof weights and biases"""# Back propagating error from output to second hidden layerl3_error = out_y - y # Calculating error at layer 3dW3 = np.dot(out_h2.T, l3_error) # Change in weights at layer 3db3 = np.sum(l3_error, axis = 0, keepdims=True) # Change in bias at layer 3# Back propagating error from second hidden layer to first hidden layerdh2 = np.dot(w3, l3_error)l2_error = np.multiply(dh2.T, out_h2 * (1 - out_h2)) # Calculating error at layer 2dW2 = np.dot(out_h1.T, l2_error) # Change in weights at layer 2db2 = np.sum(l2_error, axis = 0, keepdims=True) # Change in bias at layer 2# Back propagating error from first hidden layer to input layerdh1 = np.dot(w2, l2_error.T)l1_error = np.multiply(dh1.T, out_h1 * (1 - out_h1)) # Calculating error at layer 1dW1 = np.dot(x.T, l1_error) # Change in weights at layer 1db1 = np.sum(l1_error, axis=0, keepdims=True) # Change in bias at layer 1return dW1, dW2, dW3, db1, db2, db3def update_parameters(w1, dW1, b1, db1, w2, dW2, b2, db2, w3, dW3, b3, db3, learning_rate):"""Update parameters after the gradient descent operation"""w1 = w1 - learning_rate * dW1b1 = b1 - learning_rate * db1w2 = w2 - learning_rate * dW2b2 = b2 - learning_rate * db2w3 = w3 - learning_rate * dW3b3 = b3 - learning_rate * db3return w1, b1, w2, b2, w3, b3def calculate_error(y, y_predicted):"""Calculate the cross entropy losss"""loss = np.sum(- y * np.log(y_predicted) - (1 - y) * np.log(1 - y_predicted))return lossdef train(x, y, w1, w2, w3, b1, b2, b3, epochs, learning_rate):"""Train the 3 layered neural network"""losses = np.zeros((epochs, 1))for i in range(epochs):out_h1, out_h2, out_y = forward_propagation(x, w1, w2, w3, b1, b2, b3)losses [i, 0] = calculate_error(y, out_y)dW1, dW2, dW3, db1, db2, db3 = backpropagation(y, out_y, out_h2, out_h1, w3, w2, x)w1, b1, w2, b2, w3, b3 = update_parameters(w1, dW1, b1, db1, w2, dW2, b2, db2, w3, dW3, b3, db3, learning_rate)return w1, b1, w2, b2, w3, b3, losses# Creating data set# Aa = [0, 0, 1, 1, 0, 0,0, 1, 0, 0, 1, 0,1, 1, 1, 1, 1, 1,1, 0, 0, 0, 0, 1,1, 0, 0, 0, 0, 1]# Bb =[0, 1, 1, 1, 1, 0,0, 1, 0, 0, 1, 0,0, 1, 1, 1, 1, 0,0, 1, 0, 0, 1, 0,0, 1, 1, 1, 1, 0]# Cc =[0, 1, 1, 1, 1, 0,0, 1, 0, 0, 0, 0,0, 1, 0, 0, 0, 0,0, 1, 0, 0, 0, 0,0, 1, 1, 1, 1, 0]# Creating labelsy =[[1, 0, 0],[0, 1, 0],[0, 0, 1]]# converting data and labels into numpy arrayx = np.array([a, b, c])# Labels are also converted into NumPy arrayy = np.array(y)np.random.seed(42) # seed function to generate the same random valuen_x = 30 # number of nodes in the input layern_h1 = 5 # number of nodes in the first hidden layern_h2 = 4 # number of nodes in the second hidden layern_y = 3 # number of nodes in the output layerw1 = np.random.randn(n_x, n_h1) # weights of the first hidden layerw2 = np.random.randn(n_h1, n_h2) # weights of the second hidden layerw3 = np.random.randn(n_h2, n_y) # weights of the output layerb1 = np.zeros((1, n_h1)) # bias of the first hidden layerb2 = np.zeros((1, n_h2)) # bias of the second hidden layerb3 = np.zeros((1, n_y)) # bias of the output layerepochs = 1000learning_rate = 0.5# Train the neural networkw1, b1, w2, b2, w3, b3, losses = train(x, y, w1, w2, w3, b1, b2, b3, epochs, learning_rate)# Evaluating the performanceplt.figure()plt.plot(losses)plt.xlabel("EPOCHS")plt.ylabel("Loss value")plt.show()plt.savefig('output/legend.png')
Explanation
train function:
The function takes parameters the weights w1, w2, and w3, and the bias, b1, ...