123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325 |
- --[[
- Copyright 2019 NetherEran.
- This program is licensed under the MIT license
- This provides a markov chain object. This can be used to create
- themed nonsense based on input strings.
- How it works:
- -cut input strings into word lists by splitting at the space characters
- --put technical START and END words at the start and the end
- -store the propabilities for each word to follow after another word
-
- -to create own sentences, start at the START word and randomly
- walk according to the stored propabilities until the END word
- or a maximum sentence length is reached
- ]]
- local MAX_SENTENCE_LENGTH = 30 --in words
- --we use numeric indices for start and end nodes so input (which is
- --strings) can't mess with it
- local START = 1
- local END = 2
- --node related stuff
- local node_prototype = {}
- local function link(node, to_link)
- node.link_count = node.link_count + 1
- node.links[to_link] = (node.links[to_link] or 0) + 1
- end
- --returns a node object that stores what it links, where it's linked
- --from and the weight of its links
- local function MarkovNode()
- local prototype = {}
- prototype.links = {} --word -> weight
- prototype.linked_by = {} --word array
- prototype.link_count = 0
-
- for k, v in pairs(node_prototype)
- do
- prototype[k] = v
- end
- return prototype
- end
- --graph related stuff
- local graph_prototype = {}
- --links two words or increases the weight if they're already linked
- function graph_prototype:link(word1, word2)
- link(self.nodes[word1], word2)
- local reverse = self.nodes[word2].linked_by
- for _, name in ipairs(reverse)
- do
- if name == word1
- then
- return
- end
- end
- table.insert(reverse, word1)
- end
- --splits a string at its space characters
- local function cutup(str)
- local pieces = {}
- pieces[0] = START
- local i = 1
- for k in string.gmatch(str, "([^%s]+)")
- do
- pieces[i] = k
- i = i + 1
- end
- pieces[i] = END
-
-
- if pieces[1] == END
- then
- return
- else
- return pieces
- end
- end
- --cuts a line into words and puts them into the graph as nodes and does weight stuff
- function graph_prototype:learn_line(line)
- local input = cutup(line)
- if not input
- then
- return
- end
- for i, v in ipairs(input)
- do
- self:link(input[i - 1], v)
- end
- end
- --read a file and treat each line as a line to learn from
- function graph_prototype:learn_from_file(filename)
- for line in io.lines(filename)
- do
- self:learn_line(line)
- end
- end
- --randomly pick an order of words based on the links and weights
- function graph_prototype:randomwalk()
- local path = {}
- local current = START
- local sentence_length = 0
- while current ~= END and sentence_length < MAX_SENTENCE_LENGTH
- do
- sentence_length = sentence_length + 1
- local node = self.nodes[current]
- local rand = math.random(node.link_count + 1)
-
- for k, v in pairs(node.links)
- do
- rand = rand - v
- if rand <= 0
- then
- current = k
- table.insert(path, k)
- break
- end
- end
- end
- path[#path] = nil
- return path
- end
- --returns a sentense based on a randomwalk
- function graph_prototype:get_sentence()
- local path = self:randomwalk()
- return table.concat(path, " ")
- end
- --counts the nodes of a graph without START and END
- function graph_prototype:count_known_words()
- local count = 0
- for name, _ in pairs(self.nodes)
- do
- if not(name == START or name == END)
- then
- count = count + 1
- end
- end
- return count
- end
- --checks if a table contains a value
- local function contains(table, value)
- for k, v in pairs(table)
- do
- if v == value
- then
- return true
- end
- end
- return false
- end
- --returns dead nodes. A node is dead when it can not be reached from
- --START or END can't be reached from it
- local function get_dead_nodes(graph)
- --calculate the distance to each node from START
- local sdistances = {}
- sdistances[START] = 0
- local to_visit = {START}
- while #to_visit > 0
- do
- local current = table.remove(to_visit)
- local current_distance = sdistances[current] + 1
- for node, _ in pairs(graph.nodes[current].links)
- do
- if current_distance < (sdistances[node] or math.huge)
- then
- sdistances[node] = current_distance
- if not contains(to_visit, node)
- then
- table.insert(to_visit, node)
- end
- end
- end
- end
- --calculate the distance to each node from END (same as above but backwards)
- local edistances = {}
- edistances[END] = 0
- table.insert(to_visit, END)
- while #to_visit > 0
- do
- local current = table.remove(to_visit)
- local current_distance = edistances[current] + 1
- for _, node in pairs(graph.nodes[current].linked_by)
- do
- if current_distance < (edistances[node] or math.huge)
- then
- edistances[node] = current_distance
- if not contains(to_visit, node)
- then
- table.insert(to_visit, node)
- end
- end
- end
- end
-
- --a node is dead when if and only if it has no distance to START or END
- local dead = {}
- for name, node in pairs(graph.nodes)
- do
- if not (edistances[name] and sdistances[name])
- then
-
- table.insert(dead, name)
- end
- end
- return dead
- end
- --removes a node and the links to it
- local function remove(graph, to_remove)
- --don't remove start or end
- if to_remove == START or to_remove == END
- then
- return
- end
- --remove links to to_remove
- for i, name in ipairs(graph.nodes[to_remove].linked_by)
- do
- local node = graph.nodes[name]
- node.link_count = node.link_count - node.links[to_remove]
- node.links[to_remove] = nil
- end
- --remove notes that to_remove links nodes
- for name, _ in pairs(graph.nodes[to_remove].links)
- do
- local node = graph.nodes[name]
- for i, v in ipairs(node.linked_by)
- do
- if v == to_remove
- then
- table.remove(node.linked_by, i)
- break
- end
- end
- end
- --actually remove node
- graph.nodes[to_remove] = nil
- end
- --removes a node, and the nodes that become dead by removing it
- function graph_prototype:remove_node(to_remove)
- remove(self, to_remove)
- local dead = get_dead_nodes(self)
- for i, name in ipairs(dead)
- do
- remove(self, name)
- end
- end
- --returns the node that is has appeared the least often
- local function get_least_used(nodes)
- local least_uses = math.huge
- local least_used = nil
-
- for name, node in pairs(nodes)
- do
- if node.link_count < least_uses and
- not(name == END or name == START)
- then
- least_uses = node.link_count
- least_used = name
- end
- end
- return least_used
- end
- --cuts the amount of known words to 'to' words maximum.
- function graph_prototype:cull(to)
- local count = self:count_known_words()
- while count > to
- do
- local least_used = get_least_used(self.nodes)
- if least_used
- then
- self:remove_node(least_used)
- end
- count = self:count_known_words()
- end
- end
- local nodes_meta = {}
- --create new MarkovNode when indexed
- function nodes_meta.__index(self, key)
- self[key] = MarkovNode()
- return self[key]
- end
- --returns a MarkovGraph object
- local function MarkovGraph()
- local prototype = {}
- prototype.nodes = {} --word -> MarkovNode
- setmetatable(prototype.nodes, nodes_meta)
-
- for k, v in pairs(graph_prototype)
- do
- prototype[k] = v
- end
-
- return prototype
- end
- return MarkovGraph
|