markovNames.h 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. /*
  2. Copyright (C) 2015 Jochem Raat
  3. This program is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU General Public License as published by
  5. the Free Software Foundation, either version 3 of the License, or
  6. (at your option) any later version.
  7. This program is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU General Public License for more details.
  11. You should have received a copy of the GNU General Public License
  12. along with this program. If not, see <http://www.gnu.org/licenses/>.
  13. */
  14. #pragma once
  15. #include <string>
  16. #include <vector>
  17. #include <map>
  18. #include <random>
  19. // We don't need to store the string to which this element refers in
  20. // here, because you will get this element by requesting the element
  21. // for a certain string from the map
  22. struct MarkovElement {
  23. // The number of times the element has occured.
  24. int frequency;
  25. // A map, which maps every possible char which can follow the
  26. // current element to the number of times it has occured after
  27. // this element.
  28. // NOTE: the possibility that this is the end of the name is
  29. // marked by the '\n' character, which represents a line break.
  30. std::map<char, int> possibilities;
  31. };
  32. // The BeginElement is different from normal elements in that its
  33. // possibilities are strings of groupSize characters.
  34. struct BeginElement {
  35. // The number of beginnings
  36. int frequency;
  37. // A map, which maps every string, which is possible at the
  38. // beginning, of groupSize characters to the number of times it
  39. // has occured.
  40. std::map<std::string, int> possibilities;
  41. };
  42. // A map, which connects strings of groupSize characters to their
  43. // MarkovElement, which includes the number of times the string has
  44. // occured and all the possible following characters and their
  45. // frequencies.
  46. typedef std::map<std::string, MarkovElement> MarkovMap;
  47. // The main struct in which the representation used to create names
  48. // from markov chains is stored.
  49. struct MarkovData {
  50. // The BeginElement which includes all possible beginnings.
  51. BeginElement beginnings;
  52. // The MarkovMap which maps all possible strings of groupSize
  53. // characters to their MarkovElement's.
  54. MarkovMap markovMap;
  55. };
  56. class MarkovNames {
  57. public:
  58. // The constructor for the MarkovNames class
  59. MarkovNames(
  60. // The name of the file in which the real names are stored,
  61. // from which the markov chains can be trained.
  62. std::string trainingFileName,
  63. // The number of characters which each markov node represents
  64. int groupSize,
  65. // The maximum number of characters allowed in a name
  66. int maxLength,
  67. // A pointer to the random number generator to use
  68. std::mt19937 *mt);
  69. // Return a name randomly generated from the markov data. Before
  70. // being returned, the name is checked according to a few
  71. // criteria, like words not being too long and the whole name not
  72. // being to short, if it doesn't pass these tests, a new name is
  73. // generated, which will have to pass the same checks.
  74. std::string createName();
  75. private:
  76. // Variables:
  77. std::string trainingFileName; // The filename of the training file
  78. int groupSize; // The number of characters in each markov node
  79. int maxLength; // The maximum number of characters allowed in a
  80. // name
  81. int maxWords; // The maximum number of words allowed in a name
  82. int maxWordLength; // The maximum length of a word allowed in a
  83. // name
  84. MarkovData markovData; // The representation of the markov chain
  85. std::mt19937 *mt; // The random number generator
  86. // Functions:
  87. // Returns a vector of names, taken from the training file.
  88. std::vector<std::string> getTrainingNames();
  89. // Create the markov data from the training data.
  90. void train();
  91. // Create the markov data associated with one name.
  92. void trainName(std::string name);
  93. // Add a string, character combination to the MarkovMap pointed to
  94. // by map.
  95. void addToMarkovMap(MarkovMap *map, // The map to insert into
  96. std::string current, // The string to insert
  97. char next); // The character to insert
  98. // Add a possibility to a possibilities map, pointed to by
  99. // possibilities.
  100. void addToPossibilities(std::map<char, int> *possibilities,
  101. char next);
  102. // Generate a name from the markov data, but don't check if it is
  103. // good enough, just return it.
  104. std::string getName();
  105. // Return a beginning string of groupSize characters, according to
  106. // the probabilities indicated by the frequencies in the markov
  107. // data.
  108. std::string chooseBeginning();
  109. // Choose a following character after the string of groupSize
  110. // character current, according to the probabilities indicated by
  111. // the frequencies in the markov data.
  112. char chooseNext(std::string current);
  113. };