Quantcast
Viewing all articles
Browse latest Browse all 14040

Why isn't the backpropagation in my neural network correctly making it learn?

I have to code a neural network from scratch, using only numpy external library. I have as input data images that I turn into a 4200-length vector of binary data, and a label of whether or not it is a face. I find that the neural network isn't learning.

Here is the code I used:

def testNeuralNetwork(weights, data, labels):    input_size = data.shape[1]    theta1 = weights[:input_size * (input_size+1)].reshape(input_size, input_size+1)    theta2 = weights[input_size * (input_size+1):].reshape(1, input_size+1)    bias1, bias2, threshold, correct = 1, 1, 0.5, 0    for i in range(data.shape[0]):        current_input = data[i]        true_out = labels[i]        current_input = np.insert(current_input, 0, bias1)        (_, expected_out) = forward_prop(current_input, theta1, theta2, bias2)        binary_prediction = (expected_out > threshold).astype(int)        if true_out == binary_prediction:            correct+=1    return correct/data.shape[0]# input: 4200 for face# 4200 x 4201 = theta1# 1 x 4201 = theta2# choose lambda=0# choose alpha=1def trainNeuralNetwork(data, labels):    # initialize    input_size = data.shape[1]    totalWeightSize = input_size*(input_size+1)+(input_size+1)    bound = 1 / np.sqrt(input_size)    weights = np.random.uniform(-bound, bound, totalWeightSize)    theta1 = weights[:input_size * (input_size+1)].reshape(input_size, input_size+1)    theta2 = weights[input_size * (input_size+1):].reshape(1, input_size+1)    bias1, bias2 = 1, 1    for j in range(10):        print("Epoch " + str(j))        delta1 = np.zeros_like(theta1)        delta2 = np.zeros_like(theta2)        for i in range(data.shape[0]):            # forward prop            current_input = np.insert(data[i], 0, bias1)            true_out = labels[i]            a2, expected_out = forward_prop(current_input, theta1, theta2, bias2)            # Backprop            d3 = expected_out - true_out            gz2 = a2 * (1 - a2)            d2 = (theta2.T @ d3) * gz2            # dropping derivative corresponding to bias in hidden layer            d2 = d2[1:]            delta1 = delta1 + np.outer(d2, current_input)            delta2 = delta2 + np.outer(d3, a2)        AvgRegGrad1 = (1/input_size)*delta1        AvgRegGrad2 = (1/input_size)*delta2        theta1 = theta1 - AvgRegGrad1        theta2 = theta2 - AvgRegGrad2    flat_weights1 = theta1.flatten()    flat_weights2 = theta2.flatten()    weights_final = np.concatenate((flat_weights1, flat_weights2))    return weights_finaldef forward_prop(input, theta1, theta2, bias2):    z2 = theta1 @ input    a2 = sigmoidFunc(z2)    a2 = np.insert(a2, 0, bias2)    z3 = theta2 @ a2    expected_out = sigmoidFunc(z3)    return (a2, expected_out)def sigmoidFunc(x):    return 1 / (1 + np.exp(-x))

Viewing all articles
Browse latest Browse all 14040

Trending Articles