I have to code a neural network from scratch, using only numpy external library. I have as input data images that I turn into a 4200-length vector of binary data, and a label of whether or not it is a face. I find that the neural network isn't learning.
Here is the code I used:
def testNeuralNetwork(weights, data, labels): input_size = data.shape[1] theta1 = weights[:input_size * (input_size+1)].reshape(input_size, input_size+1) theta2 = weights[input_size * (input_size+1):].reshape(1, input_size+1) bias1, bias2, threshold, correct = 1, 1, 0.5, 0 for i in range(data.shape[0]): current_input = data[i] true_out = labels[i] current_input = np.insert(current_input, 0, bias1) (_, expected_out) = forward_prop(current_input, theta1, theta2, bias2) binary_prediction = (expected_out > threshold).astype(int) if true_out == binary_prediction: correct+=1 return correct/data.shape[0]# input: 4200 for face# 4200 x 4201 = theta1# 1 x 4201 = theta2# choose lambda=0# choose alpha=1def trainNeuralNetwork(data, labels): # initialize input_size = data.shape[1] totalWeightSize = input_size*(input_size+1)+(input_size+1) bound = 1 / np.sqrt(input_size) weights = np.random.uniform(-bound, bound, totalWeightSize) theta1 = weights[:input_size * (input_size+1)].reshape(input_size, input_size+1) theta2 = weights[input_size * (input_size+1):].reshape(1, input_size+1) bias1, bias2 = 1, 1 for j in range(10): print("Epoch " + str(j)) delta1 = np.zeros_like(theta1) delta2 = np.zeros_like(theta2) for i in range(data.shape[0]): # forward prop current_input = np.insert(data[i], 0, bias1) true_out = labels[i] a2, expected_out = forward_prop(current_input, theta1, theta2, bias2) # Backprop d3 = expected_out - true_out gz2 = a2 * (1 - a2) d2 = (theta2.T @ d3) * gz2 # dropping derivative corresponding to bias in hidden layer d2 = d2[1:] delta1 = delta1 + np.outer(d2, current_input) delta2 = delta2 + np.outer(d3, a2) AvgRegGrad1 = (1/input_size)*delta1 AvgRegGrad2 = (1/input_size)*delta2 theta1 = theta1 - AvgRegGrad1 theta2 = theta2 - AvgRegGrad2 flat_weights1 = theta1.flatten() flat_weights2 = theta2.flatten() weights_final = np.concatenate((flat_weights1, flat_weights2)) return weights_finaldef forward_prop(input, theta1, theta2, bias2): z2 = theta1 @ input a2 = sigmoidFunc(z2) a2 = np.insert(a2, 0, bias2) z3 = theta2 @ a2 expected_out = sigmoidFunc(z3) return (a2, expected_out)def sigmoidFunc(x): return 1 / (1 + np.exp(-x))