neuralnetwork.h 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. #ifndef NEURALNETWORK_H
  2. #define NEURALNETWORK_H
  3. #include <iostream>
  4. #include <string>
  5. #include <random>
  6. #include <betterthanmnist.h>
  7. using namespace std;
  8. const unsigned c_numInputNeurons = 784;
  9. const unsigned c_numHiddenNeurons = 30;
  10. const unsigned c_numOutputNeurons = 10;
  11. const unsigned c_trainingGenerations = 30;
  12. const unsigned c_batchSize = 10;
  13. const float c_learningRate = 3.0f;
  14. const unsigned c_numTestsForGen = 300;
  15. const string c_init_filename = "init90.txt";
  16. class NeuralNetwork
  17. {
  18. public:
  19. NeuralNetwork (unsigned b_size = c_batchSize, float l_rate = c_learningRate){
  20. my_batch_size = b_size;
  21. my_learning_rate = l_rate;
  22. random_device r;
  23. normal_distribution<float> dist(0, 1);
  24. for (float& f : hiddenLayerBiases)
  25. f = dist(r);
  26. for (float& f : outputLayerBiases)
  27. f = dist(r);
  28. for (float& f : hiddenLayerWeights)
  29. f = dist(r);
  30. for (float& f : outputLayerWeights)
  31. f = dist(r);
  32. cout<<"[+] Network created."<<endl;
  33. }
  34. void fill_arr(float* arr,unsigned arr_size, float fill){
  35. for (unsigned i = 0;i < arr_size; ++i)
  36. arr[i] = fill;
  37. }
  38. void Train (BetterThanMnist& trainingData){
  39. unsigned trainingIndex = 0;
  40. while (trainingIndex < c_numTestsForGen){
  41. fill_arr(batchHiddenLayerBiasesDeltaDer, c_numHiddenNeurons, 0.0f);
  42. fill_arr(batchOutputLayerBiasesDeltaDer, c_numOutputNeurons, 0.0f);
  43. fill_arr(batchHiddenLayerWeightsDeltaDer, c_numInputNeurons*c_numHiddenNeurons, 0.0f);
  44. fill_arr(batchOutputLayerWeightsDeltaDer, c_numHiddenNeurons*c_numOutputNeurons, 0.0f);
  45. unsigned batchIndex = 0;
  46. while (batchIndex < my_batch_size && trainingIndex < c_numTestsForGen){
  47. unsigned imageLabel = 0;
  48. vector<float> pixels = trainingData.GetImage(imageLabel);
  49. ForwardPass(pixels);
  50. BackwardPass(pixels, imageLabel);
  51. // sum derivatives
  52. for (unsigned i = 0; i < c_numHiddenNeurons; ++i)
  53. batchHiddenLayerBiasesDeltaDer[i] += hiddenLayerBiasesDeltaDerGradient[i];
  54. for (unsigned i = 0; i < c_numOutputNeurons; ++i)
  55. batchOutputLayerBiasesDeltaDer[i] += outputLayerBiasesDeltaDerGradient[i];
  56. for (unsigned i = 0; i < c_numInputNeurons*c_numHiddenNeurons; ++i)
  57. batchHiddenLayerWeightsDeltaDer[i] += hiddenLayerWeightsDeltaDer[i];
  58. for (unsigned i = 0; i < c_numHiddenNeurons*c_numOutputNeurons; ++i)
  59. batchOutputLayerWeightsDeltaDer[i] += outputLayerWeightsDeltaDer[i];
  60. ++trainingIndex;
  61. ++batchIndex;
  62. }
  63. float batchLearningRate = my_learning_rate / float(batchIndex);
  64. // apply training
  65. for (unsigned i = 0; i < c_numHiddenNeurons; ++i)
  66. hiddenLayerBiases[i] -= batchHiddenLayerBiasesDeltaDer[i] * batchLearningRate;
  67. for (unsigned i = 0; i < c_numOutputNeurons; ++i)
  68. outputLayerBiases[i] -= batchOutputLayerBiasesDeltaDer[i] * batchLearningRate;
  69. for (unsigned i = 0; i < c_numInputNeurons*c_numHiddenNeurons; ++i)
  70. hiddenLayerWeights[i] -= batchHiddenLayerWeightsDeltaDer[i] * batchLearningRate;
  71. for (unsigned i = 0; i < c_numHiddenNeurons*c_numOutputNeurons; ++i)
  72. outputLayerWeights[i] -= batchOutputLayerWeightsDeltaDer[i] * batchLearningRate;
  73. }
  74. }
  75. // input pixels -> answer
  76. unsigned ForwardPass (vector<float>& pixels){
  77. // hidden layer
  78. for (unsigned neuronIndex = 0; neuronIndex < c_numHiddenNeurons; ++neuronIndex){
  79. float sum = hiddenLayerBiases[neuronIndex];
  80. for (unsigned inputIndex = 0; inputIndex < c_numInputNeurons; ++inputIndex)
  81. sum += pixels[inputIndex] * hiddenLayerWeights[HiddenLayerWeightIndex(inputIndex, neuronIndex)];
  82. hiddenLayerVals[neuronIndex] = 1.0f / (1.0f + exp(-sum));
  83. }
  84. // output layer
  85. for (unsigned neuronIndex = 0; neuronIndex < c_numOutputNeurons; ++neuronIndex){
  86. float sum = outputLayerBiases[neuronIndex];
  87. for (unsigned inputIndex = 0; inputIndex < c_numHiddenNeurons; ++inputIndex)
  88. sum += hiddenLayerVals[inputIndex] * outputLayerWeights[OutputLayerWeightIndex(inputIndex, neuronIndex)];
  89. outputLayerVals[neuronIndex] = 1.0f / (1.0f + exp(-sum));
  90. }
  91. // return max value
  92. float maxOutput = outputLayerVals[0];
  93. unsigned maxLabel = 0;
  94. for (unsigned neuronIndex = 1; neuronIndex < c_numOutputNeurons; ++neuronIndex){
  95. if (outputLayerVals[neuronIndex] > maxOutput){
  96. maxOutput = outputLayerVals[neuronIndex];
  97. maxLabel = neuronIndex;
  98. }
  99. }
  100. return maxLabel;
  101. }
  102. const float* GetHiddenLayerBiases () const { return hiddenLayerBiases; }
  103. const float* GetOutputLayerBiases () const { return outputLayerBiases; }
  104. const float* GetHiddenLayerWeights () const { return hiddenLayerWeights; }
  105. const float* GetOutputLayerWeights () const { return outputLayerWeights; }
  106. void initialize(){
  107. fstream fin;
  108. fin.open(c_init_filename);
  109. for (float& f : hiddenLayerBiases)
  110. fin >> f;
  111. for (float& f : outputLayerBiases)
  112. fin >> f;
  113. for (float& f : hiddenLayerWeights)
  114. fin >> f;
  115. for (float& f : outputLayerWeights)
  116. fin >> f;
  117. cout<<"[+] Neural Network iniitalized."<<endl;
  118. }
  119. private:
  120. unsigned HiddenLayerWeightIndex (unsigned inputIndex, unsigned hiddenLayerNeuronIndex){
  121. return hiddenLayerNeuronIndex * c_numInputNeurons + inputIndex;
  122. }
  123. unsigned OutputLayerWeightIndex (unsigned hiddenLayerNeuronIndex, unsigned outputLayerNeuronIndex){
  124. return outputLayerNeuronIndex * c_numHiddenNeurons + hiddenLayerNeuronIndex;
  125. }
  126. void BackwardPass (vector<float>& pixels, unsigned correctLabel){
  127. // output layer
  128. for (unsigned neuronIndex = 0; neuronIndex < c_numOutputNeurons; ++neuronIndex){
  129. float bestOutput = (correctLabel == neuronIndex) ? 1.0f : 0.0f;
  130. float deltaError = outputLayerVals[neuronIndex] - bestOutput;
  131. float delta0 = outputLayerVals[neuronIndex] * (1.0f - outputLayerVals[neuronIndex]);
  132. outputLayerBiasesDeltaDerGradient[neuronIndex] = deltaError * delta0;
  133. for (unsigned inputIndex = 0; inputIndex < c_numHiddenNeurons; ++inputIndex)
  134. outputLayerWeightsDeltaDer[OutputLayerWeightIndex(inputIndex, neuronIndex)] = outputLayerBiasesDeltaDerGradient[neuronIndex] * hiddenLayerVals[inputIndex];
  135. }
  136. // hidden layer
  137. for (unsigned neuronIndex = 0; neuronIndex < c_numHiddenNeurons; ++neuronIndex){
  138. float deltaErrorSum = 0.0f;
  139. for (unsigned destinationNeuronIndex = 0; destinationNeuronIndex < c_numOutputNeurons; ++destinationNeuronIndex)
  140. deltaErrorSum += outputLayerBiasesDeltaDerGradient[destinationNeuronIndex] * outputLayerWeights[OutputLayerWeightIndex(neuronIndex, destinationNeuronIndex)];
  141. float delta0 = hiddenLayerVals[neuronIndex] * (1.0f - hiddenLayerVals[neuronIndex]);
  142. hiddenLayerBiasesDeltaDerGradient[neuronIndex] = deltaErrorSum * delta0;
  143. for (unsigned inputIndex = 0; inputIndex < c_numInputNeurons; ++inputIndex)
  144. hiddenLayerWeightsDeltaDer[HiddenLayerWeightIndex(inputIndex, neuronIndex)] = hiddenLayerBiasesDeltaDerGradient[neuronIndex] * pixels[inputIndex];
  145. }
  146. }
  147. private:
  148. unsigned my_batch_size;
  149. float my_learning_rate;
  150. // biases and weights
  151. float hiddenLayerBiases[c_numHiddenNeurons];
  152. float outputLayerBiases[c_numOutputNeurons];
  153. float hiddenLayerWeights[c_numInputNeurons*c_numHiddenNeurons];
  154. float outputLayerWeights[c_numHiddenNeurons*c_numOutputNeurons];
  155. // neuron activation values
  156. float hiddenLayerVals[c_numHiddenNeurons];
  157. float outputLayerVals[c_numOutputNeurons];
  158. // derivatives of biases and weights for every test. GRADIENT
  159. float hiddenLayerBiasesDeltaDerGradient[c_numHiddenNeurons];
  160. float outputLayerBiasesDeltaDerGradient[c_numOutputNeurons];
  161. float hiddenLayerWeightsDeltaDer[c_numInputNeurons*c_numHiddenNeurons];
  162. float outputLayerWeightsDeltaDer[c_numHiddenNeurons*c_numOutputNeurons];
  163. // derivatives of biases and weights for every batch. Average of all items in batch.
  164. float batchHiddenLayerBiasesDeltaDer[c_numHiddenNeurons];
  165. float batchOutputLayerBiasesDeltaDer[c_numOutputNeurons];
  166. float batchHiddenLayerWeightsDeltaDer[c_numInputNeurons*c_numHiddenNeurons];
  167. float batchOutputLayerWeightsDeltaDer[c_numHiddenNeurons*c_numOutputNeurons];
  168. };
  169. #endif // NEURALNETWORK_H