diff --git a/Java/Algorithms/Neurons/FFNN.java b/Java/Algorithms/Neurons/FFNN.java new file mode 100644 index 00000000..d1c75560 --- /dev/null +++ b/Java/Algorithms/Neurons/FFNN.java @@ -0,0 +1,139 @@ +# Libraries +# Standard library +import random + +# third-party libraries +import numpy as np + +# from .ActivationFunction import Sigmoid + + +class Network: + """ A module to implement the stochastic gradient descent learning + algorithm for a feeforward neural network. Gradient are calculated + using backpropagation """ + + def __init__(self, sizes): + """The list ``sizes`` contains the number of neurons in the + respective layers of the network. For example, if the list + was [2,3,1] then it would be a three-layer network, with the + first layer containing 2 neurons, the second layer 3 neurons, + and the third layer 1 neuron. The biases and weight for the + network are initialized randomly, using a Gaussian + distribution with mean 0 and variance 1. Note that the first + layer is assumed to be an input layer, and by convention we + won't set any biases for those neurons, since biases are only + ever used in computing the outputs from later layers.""" + self.num_layers = len(sizes) + self.sizes = sizes + self.biases = [np.random.randn(y, 1) for y in sizes[1:]] + self.weights = [np.random.randn(y, x) for x, y in zip(sizes[:-1], sizes[1:])] + + def feedforward(self, a): + """Return the output of the network if ``a`` is input.""" + for b, w in zip(self.biases, self.weights): + a = sigmoid(np.dot(w, a) + b) + return a + + def SGD(self, training_data, epochs, mini_batch_size, eta, test_data=None): + """Train the neural network using mini_batch stochastic + gradient descent. The ``Training data`` is a list of tuples + ``(x, y)`` representing the training inputs and the desired + outputs. The other non-optional parameters are + self-explanatory. If ``test_data`` is provided then the + network will be evaluated against the test data after each + epoch, and partial progress, but slows thing down substantially.""" + if test_data: + n_test = len(test_data) + n = len(training_data) + for j in range(epochs): + random.shuffle(training_data) + mini_batches = [ + training_data[k : k + mini_batch_size] + for k in range(0, n, mini_batch_size) + ] + for mini_batch in mini_batches: + self.update_mini_batch(mini_batch, eta) + if test_data: + print( + "Epoch {0}:{1} / {2}".format( + j, self.evaluate(test_data), n_test + ) + ) + else: + print("Epoch {0} complete".format(j)) + + def update_mini_batch(self, mini_batch, eta): + """Update the network's weight and biases by applying + gradient descent using backpropagation to a single mini batch. + The ``mini_batch`` is a list of tuples ``(x, y)``, and ``eta`` + is the learning rate.""" + nabla_b = [np.zeros(b.shape) for b in self.biases] + nabla_w = [np.zeros(w.shape) for w in self.weights] + for x, y in mini_batch: + delta_nabla_b, delta_nabla_w = self.backdrop(x, y) + nabla_b = [nb + dnb for nb, dnb in zip(nabla_b, delta_nabla_b)] + nabla_w = [nw + dnw for nw, dnw in zip(nabla_w, delta_nabla_w)] + self.weights = [ + w - (eta / len(mini_batch)) * nw for w, nw in zip(self.weights, nabla_w) + ] + self.biases = [ + b - (eta / len(mini_batch)) * nb for b, nb in zip(self.biases, nabla_b) + ] + + def backdrop(self, x, y): + """Return a tuple ``(nabla_b, nabla_w)`` representing the gradient + for the cost function C_x. ``nabla_b`` and ``nabla_w`` are + layer-by-layer lists of numpy arrays, similar to + ``self.biases`` and ``self.weights``.""" + nabla_b = [np.zeros(b.shape) for b in self.biases] + nabla_w = [np.zeros(w.shape) for w in self.weights] + # feedforward + activation = x + activations = [x] # list to store all the activations, layer by layer + zs = [] # list to store all the z vectors, layer by layer + for b, w in zip(self.biases, self.weights): + z = np.dot(w, activation) + b + zs.append(z) + activation = sigmoid(z) + activations.append(activation) + # backward pass + delta = self.cost_derivative(activations[-1], y) * sigmoid_prime(zs[-1]) + nabla_b[-1] = delta + nabla_w[-1] = np.dot(delta, activations[-2].transpose()) + # note that the variable l in the loop below is used a little + # differently. L =1 means the last layer of neurons, l = 2 is the + # second-last layer, and so on. it's a renumbering of the + # scheme in the book, used here to take advantage of the fact + # that Python can use negative indices in lists. + for _ in range(2, self.num_layers): + z = zs[-1] + np.sigmoid_primes(z) + delta = np.dot(self.weights[-1 + 1].transpose(), delta) * zs + nabla_b[-1] = delta + nabla_w[-1] = np.dot(delta, activations[-1 - 1].transpose()) + return (nabla_b, nabla_w) + + def evaluate(self, test_data): + """Return the number of test inputs for which the neural + network outputs the correct result. Note that the neural + network's output is assumed to be the index of whichever + neuron in the final layer has the highest activation.""" + test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data] + return sum(int(x == y) for (x, y) in test_results) + + def cost_derivative(self, output_activations, y): + """Return the vector partial derivatives \\partial C_x / + \\partial a for the output activations.""" + return output_activations - y + + +# Miscellanous functions +def sigmoid(z): + """The sigmoid function.""" + return 1.0 / (1.0 + np.exp(-z)) + + +def sigmoid_prime(z): + """Derivative of the sigmoid function""" + return sigmoid(z) * (1 - sigmoid(z))