123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- /*
- Copyright (C) 2015 Jochem Raat
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
- #pragma once
- #include <string>
- #include <vector>
- #include <map>
- #include <random>
- // We don't need to store the string to which this element refers in
- // here, because you will get this element by requesting the element
- // for a certain string from the map
- struct MarkovElement {
- // The number of times the element has occured.
- int frequency;
- // A map, which maps every possible char which can follow the
- // current element to the number of times it has occured after
- // this element.
- // NOTE: the possibility that this is the end of the name is
- // marked by the '\n' character, which represents a line break.
- std::map<char, int> possibilities;
- };
- // The BeginElement is different from normal elements in that its
- // possibilities are strings of groupSize characters.
- struct BeginElement {
- // The number of beginnings
- int frequency;
- // A map, which maps every string, which is possible at the
- // beginning, of groupSize characters to the number of times it
- // has occured.
- std::map<std::string, int> possibilities;
- };
- // A map, which connects strings of groupSize characters to their
- // MarkovElement, which includes the number of times the string has
- // occured and all the possible following characters and their
- // frequencies.
- typedef std::map<std::string, MarkovElement> MarkovMap;
- // The main struct in which the representation used to create names
- // from markov chains is stored.
- struct MarkovData {
- // The BeginElement which includes all possible beginnings.
- BeginElement beginnings;
- // The MarkovMap which maps all possible strings of groupSize
- // characters to their MarkovElement's.
- MarkovMap markovMap;
- };
- class MarkovNames {
- public:
- // The constructor for the MarkovNames class
- MarkovNames(
- // The name of the file in which the real names are stored,
- // from which the markov chains can be trained.
- std::string trainingFileName,
- // The number of characters which each markov node represents
- int groupSize,
- // The maximum number of characters allowed in a name
- int maxLength,
- // A pointer to the random number generator to use
- std::mt19937 *mt);
- // Return a name randomly generated from the markov data. Before
- // being returned, the name is checked according to a few
- // criteria, like words not being too long and the whole name not
- // being to short, if it doesn't pass these tests, a new name is
- // generated, which will have to pass the same checks.
- std::string createName();
- private:
- // Variables:
- std::string trainingFileName; // The filename of the training file
- int groupSize; // The number of characters in each markov node
- int maxLength; // The maximum number of characters allowed in a
- // name
- int maxWords; // The maximum number of words allowed in a name
- int maxWordLength; // The maximum length of a word allowed in a
- // name
- MarkovData markovData; // The representation of the markov chain
- std::mt19937 *mt; // The random number generator
- // Functions:
- // Returns a vector of names, taken from the training file.
- std::vector<std::string> getTrainingNames();
- // Create the markov data from the training data.
- void train();
- // Create the markov data associated with one name.
- void trainName(std::string name);
- // Add a string, character combination to the MarkovMap pointed to
- // by map.
- void addToMarkovMap(MarkovMap *map, // The map to insert into
- std::string current, // The string to insert
- char next); // The character to insert
- // Add a possibility to a possibilities map, pointed to by
- // possibilities.
- void addToPossibilities(std::map<char, int> *possibilities,
- char next);
- // Generate a name from the markov data, but don't check if it is
- // good enough, just return it.
- std::string getName();
- // Return a beginning string of groupSize characters, according to
- // the probabilities indicated by the frequencies in the markov
- // data.
- std::string chooseBeginning();
- // Choose a following character after the string of groupSize
- // character current, according to the probabilities indicated by
- // the frequencies in the markov data.
- char chooseNext(std::string current);
- };
|