123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347 |
- % TDP CLU
- %
- % A general, non-forgetful, top-down parser
- str = string;
- nterm = string;
- rule = oneof[or: nt_pair,
- and: nt_pair,
- term: str];
- nt_pair = record[nt1, nt2: nterm];
- task = record[nt: nterm,
- s: str];
- % Because of the simple grammar form, a derivation (parse tree) can be
- % represented by a list of non-terminals.
- % This cluster has only the bare essentials needed for the program.
- deriv = cluster is create, % nterm -> deriv
- concat, % deriv, deriv -> deriv
- elements; % deriv --> nterm
- rep = oneof[pair: pair,
- one: nterm];
- pair = record[head, tail: deriv];
- create = proc (nt: nterm) returns (cvt);
- return(rep$make_one(nt));
- end create;
- concat = proc (d1, d2: deriv) returns (cvt);
- return(rep$make_pair(pair${head: d1, tail: d2}));
- end concat;
- elements = iter (d: cvt) yields (nterm);
- tagcase d
- tag pair (p: pair):
- for nt: nterm in elements(p.head) do
- yield(nt);
- end;
- for nt: nterm in elements(p.tail) do
- yield(nt);
- end;
- tag one (nt: nterm):
- yield(nt);
- end;
- end elements;
- end deriv;
- % A simple hash table mapping strings to objects of type 't'.
- table = cluster [t: type] is create, % -> table[t]
- lookup, % table[t], str -> t => not_found
- enter, % table[t], str, t => already_exists
- alter, % table[t], str, t ->
- remove; % table[t], str => not_found
- rep = array[bucket];
- bucket = array[entry];
- entry = record[key: str,
- value: t];
- size = 211;
- create = proc () returns (cvt);
- tab: rep := rep$predict(1, size);
- for i: int in int$from_to(1, size) do
- rep$addh(tab, bucket$create(1));
- end;
- return(tab);
- end create;
- lookup = proc (tab: cvt, key: str) returns (t) signals (not_found);
- for ent: entry in bucket$elements(tab[hash(key, size)]) do
- if key = ent.key
- then return(ent.value); end;
- end;
- signal not_found;
- end lookup;
- enter = proc (tab: cvt, key: str, value: t) signals (already_exists);
- buck: bucket := tab[hash(key, size)];
- for ent: entry in bucket$elements(buck) do
- if key = ent.key
- then signal already_exists; end;
- end;
- bucket$addh(buck, entry${key: key,
- value: value});
- end enter;
- alter = proc (tab: cvt, key: str, value: t);
- buck: bucket := tab[hash(key, size)];
- for ent: entry in bucket$elements(buck) do
- if key = ent.key
- then ent.value := value;
- return;
- end;
- end;
- bucket$addh(buck, entry${key: key,
- value: value});
- end alter;
- remove = proc (tab: cvt, key: str) signals (not_found);
- buck: bucket := tab[hash(key, size)];
- for i: int in bucket$indexes(buck) do
- if key = buck[i].key
- then h: int := bucket$high(buck);
- if i < h
- then buck[i] := buck[h]; end;
- bucket$remh(buck);
- return;
- end;
- end;
- signal not_found;
- end remove;
- end table;
- % Only three kinds of productions are allowed, with the following syntax:
- % nterm=nterm|nterm
- % nterm=nterm nterm
- % nterm=terminal
- % Productions must be separated by newlines.
- grammar = cluster is create, % nterm, str -> grammar
- % => bad_format, multiple_rules
- alter, % grammar, str => bad_format
- remove, % grammar, nterm => no_rule
- get_start_symbol, % grammar -> nterm
- set_start_symbol, % grammar, nterm ->
- get_rule; % grammar, nterm -> rule => no_rule
- rep = record[start: nterm,
- rules: rtab];
- rtab = table[rule];
- create = proc (start: nterm, s: str) returns (cvt) signals (bad_format, multiple_rules);
- tab: rtab := rtab$create();
- for nt: nterm, r: rule in get_rules(s) do
- rtab$enter(tab, nt, r);
- end;
- except when bad_format: signal bad_format;
- when already_exists: signal multiple_rules;
- end;
- return(rep${start: start,
- rules: tab});
- end create;
- alter = proc (g: cvt, s: str) signals (bad_format);
- tab: rtab := g.rules;
- for nt: nterm, r: rule in get_rules(s) do
- rtab$alter(tab, nt, r);
- end;
- except when bad_format: signal bad_format; end;
- end alter;
- get_rules = iter (s: str) yields (nterm, rule) signals (bad_format);
- while str$size(s) > 0 do
- i: int := str$indexc('\n', s);
- p: str;
- if i > 0
- then p := str$substr(s, 1, i - 1);
- s := str$rest(s, i + 1);
- else p := s;
- s := "";
- end;
- i := str$indexc('=', p);
- if i = 0
- then signal bad_format; end;
- nt: nterm := str$substr(p, 1, i - 1);
- p := str$rest(p, i + 1);
- r: rule;
- i := str$indexc('|', p);
- if i > 1
- then nt1: nterm := str$substr(p, 1, i - 1);
- nt2: nterm := str$rest(p, i + 1);
- r := rule$make_or(nt_pair${nt1: nt1, nt2: nt2});
- else i := str$indexc(' ', p);
- if i > 1
- then nt1: nterm := str$substr(p, 1, i - 1);
- nt2: nterm := str$rest(p, i + 1);
- r := rule$make_and(nt_pair${nt1: nt1, nt2: nt2});
- else r := rule$make_term(p);
- end;
- end;
- yield(nt, r);
- end;
- end get_rules;
- remove = proc (g: cvt, nt: nterm) signals (no_rule);
- rtab$remove(g.rules, nt);
- except when not_found: signal no_rule; end;
- end remove;
- get_start_symbol = proc (g: cvt) returns (nterm);
- return(g.start);
- end get_start_symbol;
- set_start_symbol = proc (g: cvt, nt: nterm);
- g.start := nt;
- end set_start_symbol;
- get_rule = proc (g: cvt, nt: nterm) returns (rule) signals (no_rule);
- return(rtab$lookup(g.rules, nt));
- except when not_found: signal no_rule; end;
- end get_rule;
- end grammar;
- % An environment is used to hold the grammar and the results of all sub-parses (tasks).
- env = cluster is create, % grammar -> env
- get_rule, % env, nterm -> rule => no_rule
- splits_exist, % env, task -> bool
- splits, % env, task --> deriv, str
- add_split; % env, task, deriv, str ->
- rep = record[gram: grammar,
- table: etab,
- task: task,
- splits: splitlist];
- etab = table[entrylist];
- entrylist = array[entry];
- entry = record[nterm: nterm,
- splits: splitlist];
- splitlist = array[split];
- split = record[deriv: deriv,
- rest: str];
- create = proc (g: grammar) returns (cvt);
- return(rep${gram: g,
- table: etab$create(),
- task: task${nt, s: ""},
- splits: splitlist$create(1)});
- end create;
- get_rule = proc (e: cvt, nt: nterm) returns (rule) signals (no_rule);
- return(grammar$get_rule(e.gram, nt));
- except when no_rule: signal no_rule; end;
- end get_rule;
- splits_exist = proc (e: env, t: task) returns (bool);
- return(splitlist$size(search(e, t)) > 0);
- end splits_exist;
- splits = iter (e: env, t: task) yields (deriv, str);
- for sp: split in splitlist$elements(search(e, t)) do
- yield(sp.deriv, sp.rest);
- end;
- end splits;
- add_split = proc (e: env, t: task, d: deriv, rest: str);
- splitlist$addh(search(e, t), split${deriv: d,
- rest: rest});
- end add_split;
- search = proc (e: cvt, t: task) returns (splitlist);
- if e.task = t
- then return(e.splits); end;
- e.task := t;
- nt: nterm := t.nt;
- ents: entrylist;
- sp: splitlist;
- begin
- ents := etab$lookup(e.table, t.s);
- for ent: entry in entrylist$elements(ents) do
- if nt = ent.nterm
- then sp := ent.splits;
- e.splits := sp;
- return(sp); end;
- end;
- end; except when not_found: ents := entrylist$create(1);
- etab$enter(e.table, t.s, ents);
- end;
- sp := splitlist$create(1);
- e.splits := sp;
- entrylist$addh(ents, entry${nterm: nt,
- splits: sp});
- return(sp);
- end search;
- end env;
- % Parse prefixes of 's' starting from 'nt', yielding derivation and rest of 's'
- splits1 = iter (nt: nterm, s: str, e: env) yields (deriv, str);
- d0: deriv := deriv$create(nt);
- tagcase env$get_rule(e, nt)
- tag or (pair: nt_pair):
- for d: deriv, rest: str in splits(pair.nt1, s, e) do
- yield(d0 || d, rest);
- end;
- for d: deriv, rest: str in splits(pair.nt2, s, e) do
- yield(d0 || d, rest);
- end;
- tag and (pair: nt_pair):
- for d1: deriv, rest1: str in splits(pair.nt1, s, e) do
- for d2: deriv, rest2: str in splits(pair.nt2, rest1, e) do
- yield(d0 || d1 || d2, rest2);
- end;
- end;
- tag term (term: str):
- if str$indexs(term, s) = 1
- then yield(d0, str$rest(s, str$size(term) + 1)); end;
- end;
- except when no_rule: ; end;
- end splits1;
- % If the task has already been run, pull the stored splits out of the environment
- splits = iter (nt: nterm, s: str, e: env) yields (deriv, str);
- t: task := task${nt: nt, s: s};
- if env$splits_exist(e, t)
- then for d: deriv, rest: str in env$splits(e, t) do
- yield(d, rest);
- end;
- else for d: deriv, rest: str in splits1(nt, s, e) do
- yield(d, rest);
- env$add_split(e, t, d, rest);
- end;
- end;
- end splits;
- parse = proc (s: str, g: grammar) returns (deriv) signals (no_derivation);
- for d: deriv, rest: str in splits(g.start_symbol, s, env$create(g)) do
- if rest = ""
- then return(d); end;
- end;
- signal no_derivation;
- end parse;
- output = proc (s: string, g: grammar) signals (no_derivation);
- f: file := file$tyo();
- n: int := 0;
- for s in deriv$elements(parse(s, g)) do
- z: int := str$size(s);
- if n + z > 70
- then file$putc(f, '\n');
- n := 0;
- end;
- file$puts(f, s);
- file$putc(f, ' ');
- n := n + z + 1;
- end;
- except when no_derivation: signal no_derivation; end;
- file$putc(f, '\n');
- end output;
|