MNIST handwriting recognition deep learning written in pure Perl
We will publish MNIST handwriting recognition deep learning written in pure Perl.
The MNIST handwritten data obtained from THE MNIST DATABASE of handwritten digits is placed under the data directory.
This deep learning Perl code is based on the source code of Neural Networks and Deep Learning.
As it is pure Perl code, it can be useful for Perl users to get an overview of deep learning algorithms.
It doesn't use any special libraries, so if you have knowledge of if and for statements and arrays, you can read the source code. Since it consists of an if statement, a for statement, and an array, you can write deep learning logic in other programming languages as a reference.
Currently, this code does not have the performance of Perl's numerical calculation, so performance improvement is required.
Partial rewrite to the C language transpiler SPVM, through trial and error to a realistic speed level It will be faster.
# network.pl use strict; use warnings; use FindBin; use List::Util'shuffle'; # Learning rate my $learning_rate = 3; #Epoch Count-Number of Training Set Executions my $epoch_count = 30; # Mini batch size my $mini_batch_size = 10; #Number of neurons in each layer # 28 * 28 = 728 monochrome image (input) # Through 30 intermediate outputs (intermediate output) Classify into 10 items from # 0 to 9 (output) my $neurons_count_in_layers = [728, 30, 10]; #Information about the function that converts m inputs in each layer to n outputs. Number of inputs, number of outputs, bias, weight my $m_to_n_func_infos = []; #Create information on the conversion function from the number of neurons in each layer to m to n for (my $i = 0; $i <@$neurons_count_in_layers --1; $i ++) { my $inputs_length = $neurons_count_in_layers->[$i]; my $outputs_length = $neurons_count_in_layers->[$i + 1]; #Initialize bias with 0 my $biases = array_new_zero ($outputs_length); Initialize the weight with the initial value of #Xivier. Weight is column priority matrix my $weights_mat = mat_new_zero ($outputs_length, $inputs_length); my $weights_length = $weights_mat->{rows_length} * $weights_mat->{columns_length}; $weights_mat->{values} = array_create_he_init_value ($weights_length, $inputs_length); # Set conversion function information $m_to_n_func_infos->[$i] = { inputs_length => $inputs_length, outputs_length => $outputs_length, biases => $biases, weights_mat => $weights_mat, };; } #MNIEST Read image information-Handwritten training data used for input my $mnist_train_image_file = "$FindBin::Bin / data / train-images-idx3-ubyte"; my $mnist_train_image_info = load_mnist_train_image_file ($mnist_train_image_file); #Read MNIEST Label Information-Expected Output of Handwritten Training Data my $mnist_train_label_file = "$FindBin::Bin / data / train-labels-idx1-ubyte"; my $mnist_train_label_info = load_mnist_train_label_file ($mnist_train_label_file); #Training data index(only the first 40,000 are used as training data, and the remaining 10,000 are used as verification data) my @training_data_indexes = (0 .. 40000); # Information on each conversion function in mini-batch units my $m_to_n_func_mini_batch_infos = []; # Created by initializing the total of the bias slopes of each conversion function in the mini-batch and the weight slope of each conversion function in the mini-batch to 0. #Initialize here to use memory area repeatedly for (my $m_to_n_func_index = 0; $m_to_n_func_index <@$m_to_n_func_infos; $m_to_n_func_index ++) { my $m_to_n_func_info = $m_to_n_func_infos->[$m_to_n_func_index]; my $biases = $m_to_n_func_info->{biases}; my $weights_mat = $m_to_n_func_info->{weights_mat}; #Bias length my $biases_length = @$biases; # Created by initializing the sum of the bias slopes of each conversion function in a mini-batch to 0 my $bias_grad_totals = array_new_zero ($biases_length); $m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {biase_grad_totals} = $biase_grad_totals; # Created by initializing the sum of the slopes of the weights of each conversion function in the mini-batch to 0 my $weight_grad_totals_mat = mat_new_zero ($weights_mat->{rows_length}, $weights_mat->{columns_length}); $m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {weight_grad_totals_mat} = $weight_grad_totals_mat; } # Total number of executions my $total_count = 0; # The number of correct answers my $answer_match_count = 0; #Run the training set as many times as you epoch for (my $epoch_index = 0; $epoch_index <$epoch_count; $epoch_index ++) { # Shuffle the index of training data (It seems that it is more generalized to train randomly) my @training_data_indexes_shuffle = shuffle @training_data_indexes; my $count = 0; #Learning in mini-batch size units my $backprop_count = 0; while (my @indexed_for_mini_batch = splice(@training_data_indexes_shuffle, 0, $mini_batch_size)) { # Initialize the sum of the bias slopes of each conversion function in the mini-batch and the sum of the weight slopes of each conversion function in the mini-batch to 0. for (my $m_to_n_func_index = 0; $m_to_n_func_index <@$m_to_n_func_mini_batch_infos; $m_to_n_func_index ++) { my $m_to_n_func_info = $m_to_n_func_infos->[$m_to_n_func_index]; my $biases = $m_to_n_func_info->{biases}; my $weights_mat = $m_to_n_func_info->{weights_mat}; # Created by initializing the sum of the bias slopes of each conversion function in a mini-batch to 0 $m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {biase_grad_totals} = array_new_zero (scalar @{$m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {biase_grad_totals}}) # Created by initializing the sum of the slopes of the weights of each conversion function in the mini-batch to 0 $m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {weight_grad_totals_mat} {values} = array_new_zero (scalar @{$m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {weight_grad_total } for my $training_data_index (@indexed_for_mini_batch) { # Get the slope for the weight and bias loss function using the reverse mispropagation method my $m_to_n_func_grad_infos = backprop ($m_to_n_func_infos, $mnist_train_image_info, $mnist_train_label_info, $training_data_index); #Bias loss function slope my $bias_grads = $m_to_n_func_grad_infos->{biases}; # Slope with respect to weight loss function my $weight_grads_mat = $m_to_n_func_grad_infos->{weights_mat}; # Add the sum of the bias slopes of each conversion function in the mini-batch and the weight slope of each conversion function in the mini-batch.for (my $m_to_n_func_index = 0; $m_to_n_func_index <@$m_to_n_func_mini_batch_infos; $m_to_n_func_index ++) { my $m_to_n_func_info = $m_to_n_func_infos->[$m_to_n_func_index]; # Add the slope of the bias of each transmutation function in the mini-batch array_add_inplace ($m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {biase_grad_totals}, $biase_grads->[$m_to_n_func_index]); # Add the slope of the weight of each transmutation function in the mini-batch array_add_inplace ($m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {weight_grad_totals_mat} {values}, $weight_grads_mat->[$m_to_n_func_index] {values}); } } # Update the bias and weight of each transform function using the sum of the slopes of the mini-batch for (my $m_to_n_func_index = 0; $m_to_n_func_index <@$m_to_n_func_infos; $m_to_n_func_index ++) { #Update the bias of each transmutation function (consider the learning rate and divide by the number of mini-batch) update_params ($m_to_n_func_infos->[$m_to_n_func_index] {biases}, $m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {biase_grad_totals}, $learning_rate, $mini_batch_size); #Update the weight of each transmutation function (consider the learning rate, divide the total slope by the number of mini-batch, divide by the number of mini-batch) update_params ($m_to_n_func_infos->[$m_to_n_func_index] {weights_mat} {values}, $m_to_n_func_mini_batch_infos->[$m_to_n_func_index] {weight_grad_totals_mat} {values}, $learning_rate } } } #Update parameters considering learning rate and number of mini-batch sub update_params { my ($params, $param_grads, $learning_rate, $mini_batch_size) = @_; for (my $param_index = 0; $param_index <@$params; $param_index ++) { my $update_diff = ($learning_rate / $mini_batch_size) * $param_grads->[$param_index]; $params->[$param_index]-= $update_diff; } } my $count = 0; # Reverse mispropagation method sub backprop { my ($m_to_n_func_infos, $mnist_train_image_info, $mnist_train_label_info, $training_data_index) = @_; my $first_inputs_length = $m_to_n_func_infos->[0] {inputs_length}; # Input (convert 0 to 255 values to 0 to 1) my $image_unit_length = $mnist_train_image_info->{rows_count} * $mnist_train_image_info->{columns_count}; my $mnist_train_image_data = $mnist_train_image_info->{data}; my $first_inputs_packed = substr($mnist_train_image_data, $image_unit_length * $training_data_index, $image_unit_length); my $first_inputs_raw = [unpack("C $first_inputs_length", $first_inputs_packed)]; my $first_inputs = array_div_scalar ($first_inputs_raw, 255); # Probability distribution of expected output my $label_number = $mnist_train_label_info->{label_numbers} [$training_data_index]; my $desired_outputs = probabilize_desired_outputs ($label_number); #Bias slope of each transmutation function my $bias_grads_in_m_to_n_funcs = []; # Slope of weight of each transmutation function my $weight_grads_mat_in_m_to_n_funcs = []; #Initialization of bias slope and weight slope for (my $m_to_n_func_index = 0; $m_to_n_func_index <@$m_to_n_func_infos; $m_to_n_func_index ++) { my $inputs_length = $m_to_n_func_infos->[$m_to_n_func_index] {inputs_length}; my $outputs_length = $m_to_n_func_infos->[$m_to_n_func_index] {outputs_length}; #Initialize bias slope to 0 $bias_grads_in_m_to_n_funcs->[$m_to_n_func_index] = array_new_zero ($outputs_length); #Initialize the slope of the weight with 0 $weight_grads_mat_in_m_to_n_funcs->[$m_to_n_func_index] = mat_new_zero ($outputs_length, $inputs_length); } # Input for each layer my $inputs_in_m_to_n_funcs = [$first_inputs]; #Activated output of each layer my $outputs_in_m_to_n_funcs = []; # Obtain the output of the output layer from the input of the input layer # Save the output of each layer and the activated output due to the reverse mispropagation method for (my $m_to_n_func_index = 0; $m_to_n_func_index <@$m_to_n_func_infos; $m_to_n_func_index ++) { my $cur_inputs = $inputs_in_m_to_n_funcs->[-1]; my $inputs_length = $m_to_n_func_infos->[$m_to_n_func_index] {inputs_length}; my $outputs_length = $m_to_n_func_infos->[$m_to_n_func_index] {outputs_length}; #Weight matrix my $weights_mat = $m_to_n_func_infos->[$m_to_n_func_index] {weights_mat}; #Input matrix my $cur_inputs_rows_length = $outputs_length; my $cur_inputs_columns_length = 1; my $cur_inputs_mat = { rows_length => $cur_inputs_rows_length, columns_length => $cur_inputs_columns_length, values => $cur_inputs, };; # Matrix product of weights and inputs my $mul_weights_inputs_mat = mat_mul ($weights_mat, $cur_inputs_mat); my $mul_weights_inputs = $mul_weights_inputs_mat->{values}; #Bias my $biases = $m_to_n_func_infos->[$m_to_n_func_index] {biases}; #Output-The sum of the weight and input matrix product and bias my $outputs = array_add ($mul_weights_inputs, $biases); # Activated output-Apply activation function to output my $activate_outputs = array_sigmoid ($outputs); #Save output for reverse mispropagation push @$outputs_in_m_to_n_funcs, $outputs; # Save the following input for backflip method push @$inputs_in_m_to_n_funcs, $activate_outputs; } #Last output my $last_outputs = $outputs_in_m_to_n_funcs->[-1]; #Last activated output my $last_activate_outputs = pop @$inputs_in_m_to_n_funcs; #softmax function # my $softmax_outputs = softmax ($last_activate_outputs); #Error my $cost = cross_entropy_cost ($last_activate_outputs, $desired_outputs); print "Cost:". Sprintf ("%.3f", $cost). "\ n"; # Whether you answered correctly my $answer = max_index ($last_activate_outputs); # my $answer = max_index ($softmax_outputs); my $desired_answer = max_index ($desire