tdp.clu_0 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. % TDP CLU
  2. %
  3. % A general, non-forgetful, top-down parser
  4. str = string;
  5. nterm = string;
  6. rule = oneof[or: nt_pair,
  7. and: nt_pair,
  8. term: str];
  9. nt_pair = record[nt1, nt2: nterm];
  10. task = record[nt: nterm,
  11. s: str];
  12. % Because of the simple grammar form, a derivation (parse tree) can be
  13. % represented by a list of non-terminals.
  14. % This cluster has only the bare essentials needed for the program.
  15. deriv = cluster is create, % nterm -> deriv
  16. concat, % deriv, deriv -> deriv
  17. elements; % deriv --> nterm
  18. rep = oneof[pair: pair,
  19. one: nterm];
  20. pair = record[head, tail: deriv];
  21. create = proc (nt: nterm) returns (cvt);
  22. return(rep$make_one(nt));
  23. end create;
  24. concat = proc (d1, d2: deriv) returns (cvt);
  25. return(rep$make_pair(pair${head: d1, tail: d2}));
  26. end concat;
  27. elements = iter (d: cvt) yields (nterm);
  28. tagcase d
  29. tag pair (p: pair):
  30. for nt: nterm in elements(p.head) do
  31. yield(nt);
  32. end;
  33. for nt: nterm in elements(p.tail) do
  34. yield(nt);
  35. end;
  36. tag one (nt: nterm):
  37. yield(nt);
  38. end;
  39. end elements;
  40. end deriv;
  41. % A simple hash table mapping strings to objects of type 't'.
  42. table = cluster [t: type] is create, % -> table[t]
  43. lookup, % table[t], str -> t => not_found
  44. enter, % table[t], str, t => already_exists
  45. alter, % table[t], str, t ->
  46. remove; % table[t], str => not_found
  47. rep = array[bucket];
  48. bucket = array[entry];
  49. entry = record[key: str,
  50. value: t];
  51. size = 211;
  52. create = proc () returns (cvt);
  53. tab: rep := rep$predict(1, size);
  54. for i: int in int$from_to(1, size) do
  55. rep$addh(tab, bucket$create(1));
  56. end;
  57. return(tab);
  58. end create;
  59. lookup = proc (tab: cvt, key: str) returns (t) signals (not_found);
  60. for ent: entry in bucket$elements(tab[hash(key, size)]) do
  61. if key = ent.key
  62. then return(ent.value); end;
  63. end;
  64. signal not_found;
  65. end lookup;
  66. enter = proc (tab: cvt, key: str, value: t) signals (already_exists);
  67. buck: bucket := tab[hash(key, size)];
  68. for ent: entry in bucket$elements(buck) do
  69. if key = ent.key
  70. then signal already_exists; end;
  71. end;
  72. bucket$addh(buck, entry${key: key,
  73. value: value});
  74. end enter;
  75. alter = proc (tab: cvt, key: str, value: t);
  76. buck: bucket := tab[hash(key, size)];
  77. for ent: entry in bucket$elements(buck) do
  78. if key = ent.key
  79. then ent.value := value;
  80. return;
  81. end;
  82. end;
  83. bucket$addh(buck, entry${key: key,
  84. value: value});
  85. end alter;
  86. remove = proc (tab: cvt, key: str) signals (not_found);
  87. buck: bucket := tab[hash(key, size)];
  88. for i: int in bucket$indexes(buck) do
  89. if key = buck[i].key
  90. then h: int := bucket$high(buck);
  91. if i < h
  92. then buck[i] := buck[h]; end;
  93. bucket$remh(buck);
  94. return;
  95. end;
  96. end;
  97. signal not_found;
  98. end remove;
  99. end table;
  100. % Only three kinds of productions are allowed, with the following syntax:
  101. % nterm=nterm|nterm
  102. % nterm=nterm nterm
  103. % nterm=terminal
  104. % Productions must be separated by newlines.
  105. grammar = cluster is create, % nterm, str -> grammar
  106. % => bad_format, multiple_rules
  107. alter, % grammar, str => bad_format
  108. remove, % grammar, nterm => no_rule
  109. get_start_symbol, % grammar -> nterm
  110. set_start_symbol, % grammar, nterm ->
  111. get_rule; % grammar, nterm -> rule => no_rule
  112. rep = record[start: nterm,
  113. rules: rtab];
  114. rtab = table[rule];
  115. create = proc (start: nterm, s: str) returns (cvt) signals (bad_format, multiple_rules);
  116. tab: rtab := rtab$create();
  117. for nt: nterm, r: rule in get_rules(s) do
  118. rtab$enter(tab, nt, r);
  119. end;
  120. except when bad_format: signal bad_format;
  121. when already_exists: signal multiple_rules;
  122. end;
  123. return(rep${start: start,
  124. rules: tab});
  125. end create;
  126. alter = proc (g: cvt, s: str) signals (bad_format);
  127. tab: rtab := g.rules;
  128. for nt: nterm, r: rule in get_rules(s) do
  129. rtab$alter(tab, nt, r);
  130. end;
  131. except when bad_format: signal bad_format; end;
  132. end alter;
  133. get_rules = iter (s: str) yields (nterm, rule) signals (bad_format);
  134. while str$size(s) > 0 do
  135. i: int := str$indexc('\n', s);
  136. p: str;
  137. if i > 0
  138. then p := str$substr(s, 1, i - 1);
  139. s := str$rest(s, i + 1);
  140. else p := s;
  141. s := "";
  142. end;
  143. i := str$indexc('=', p);
  144. if i = 0
  145. then signal bad_format; end;
  146. nt: nterm := str$substr(p, 1, i - 1);
  147. p := str$rest(p, i + 1);
  148. r: rule;
  149. i := str$indexc('|', p);
  150. if i > 1
  151. then nt1: nterm := str$substr(p, 1, i - 1);
  152. nt2: nterm := str$rest(p, i + 1);
  153. r := rule$make_or(nt_pair${nt1: nt1, nt2: nt2});
  154. else i := str$indexc(' ', p);
  155. if i > 1
  156. then nt1: nterm := str$substr(p, 1, i - 1);
  157. nt2: nterm := str$rest(p, i + 1);
  158. r := rule$make_and(nt_pair${nt1: nt1, nt2: nt2});
  159. else r := rule$make_term(p);
  160. end;
  161. end;
  162. yield(nt, r);
  163. end;
  164. end get_rules;
  165. remove = proc (g: cvt, nt: nterm) signals (no_rule);
  166. rtab$remove(g.rules, nt);
  167. except when not_found: signal no_rule; end;
  168. end remove;
  169. get_start_symbol = proc (g: cvt) returns (nterm);
  170. return(g.start);
  171. end get_start_symbol;
  172. set_start_symbol = proc (g: cvt, nt: nterm);
  173. g.start := nt;
  174. end set_start_symbol;
  175. get_rule = proc (g: cvt, nt: nterm) returns (rule) signals (no_rule);
  176. return(rtab$lookup(g.rules, nt));
  177. except when not_found: signal no_rule; end;
  178. end get_rule;
  179. end grammar;
  180. % An environment is used to hold the grammar and the results of all sub-parses (tasks).
  181. env = cluster is create, % grammar -> env
  182. get_rule, % env, nterm -> rule => no_rule
  183. splits_exist, % env, task -> bool
  184. splits, % env, task --> deriv, str
  185. add_split; % env, task, deriv, str ->
  186. rep = record[gram: grammar,
  187. table: etab,
  188. task: task,
  189. splits: splitlist];
  190. etab = table[entrylist];
  191. entrylist = array[entry];
  192. entry = record[nterm: nterm,
  193. splits: splitlist];
  194. splitlist = array[split];
  195. split = record[deriv: deriv,
  196. rest: str];
  197. create = proc (g: grammar) returns (cvt);
  198. return(rep${gram: g,
  199. table: etab$create(),
  200. task: task${nt, s: ""},
  201. splits: splitlist$create(1)});
  202. end create;
  203. get_rule = proc (e: cvt, nt: nterm) returns (rule) signals (no_rule);
  204. return(grammar$get_rule(e.gram, nt));
  205. except when no_rule: signal no_rule; end;
  206. end get_rule;
  207. splits_exist = proc (e: env, t: task) returns (bool);
  208. return(splitlist$size(search(e, t)) > 0);
  209. end splits_exist;
  210. splits = iter (e: env, t: task) yields (deriv, str);
  211. for sp: split in splitlist$elements(search(e, t)) do
  212. yield(sp.deriv, sp.rest);
  213. end;
  214. end splits;
  215. add_split = proc (e: env, t: task, d: deriv, rest: str);
  216. splitlist$addh(search(e, t), split${deriv: d,
  217. rest: rest});
  218. end add_split;
  219. search = proc (e: cvt, t: task) returns (splitlist);
  220. if e.task = t
  221. then return(e.splits); end;
  222. e.task := t;
  223. nt: nterm := t.nt;
  224. ents: entrylist;
  225. sp: splitlist;
  226. begin
  227. ents := etab$lookup(e.table, t.s);
  228. for ent: entry in entrylist$elements(ents) do
  229. if nt = ent.nterm
  230. then sp := ent.splits;
  231. e.splits := sp;
  232. return(sp); end;
  233. end;
  234. end; except when not_found: ents := entrylist$create(1);
  235. etab$enter(e.table, t.s, ents);
  236. end;
  237. sp := splitlist$create(1);
  238. e.splits := sp;
  239. entrylist$addh(ents, entry${nterm: nt,
  240. splits: sp});
  241. return(sp);
  242. end search;
  243. end env;
  244. % Parse prefixes of 's' starting from 'nt', yielding derivation and rest of 's'
  245. splits1 = iter (nt: nterm, s: str, e: env) yields (deriv, str);
  246. d0: deriv := deriv$create(nt);
  247. tagcase env$get_rule(e, nt)
  248. tag or (pair: nt_pair):
  249. for d: deriv, rest: str in splits(pair.nt1, s, e) do
  250. yield(d0 || d, rest);
  251. end;
  252. for d: deriv, rest: str in splits(pair.nt2, s, e) do
  253. yield(d0 || d, rest);
  254. end;
  255. tag and (pair: nt_pair):
  256. for d1: deriv, rest1: str in splits(pair.nt1, s, e) do
  257. for d2: deriv, rest2: str in splits(pair.nt2, rest1, e) do
  258. yield(d0 || d1 || d2, rest2);
  259. end;
  260. end;
  261. tag term (term: str):
  262. if str$indexs(term, s) = 1
  263. then yield(d0, str$rest(s, str$size(term) + 1)); end;
  264. end;
  265. except when no_rule: ; end;
  266. end splits1;
  267. % If the task has already been run, pull the stored splits out of the environment
  268. splits = iter (nt: nterm, s: str, e: env) yields (deriv, str);
  269. t: task := task${nt: nt, s: s};
  270. if env$splits_exist(e, t)
  271. then for d: deriv, rest: str in env$splits(e, t) do
  272. yield(d, rest);
  273. end;
  274. else for d: deriv, rest: str in splits1(nt, s, e) do
  275. yield(d, rest);
  276. env$add_split(e, t, d, rest);
  277. end;
  278. end;
  279. end splits;
  280. parse = proc (s: str, g: grammar) returns (deriv) signals (no_derivation);
  281. for d: deriv, rest: str in splits(g.start_symbol, s, env$create(g)) do
  282. if rest = ""
  283. then return(d); end;
  284. end;
  285. signal no_derivation;
  286. end parse;
  287. output = proc (s: string, g: grammar) signals (no_derivation);
  288. f: file := file$tyo();
  289. n: int := 0;
  290. for s in deriv$elements(parse(s, g)) do
  291. z: int := str$size(s);
  292. if n + z > 70
  293. then file$putc(f, '\n');
  294. n := 0;
  295. end;
  296. file$puts(f, s);
  297. file$putc(f, ' ');
  298. n := n + z + 1;
  299. end;
  300. except when no_derivation: signal no_derivation; end;
  301. file$putc(f, '\n');
  302. end output;