123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188 |
- (* 'a is a parameter specifying what kind of result tree will be returned *)
- type pstring = char list;
- type 'a parse_data = ('a * pstring);
- type 'a parse_result = 'a parse_data list;
- datatype 'a result = ParseFailure
- | ParseSuccess of 'a parse_result;
- type 'a parser = pstring -> 'a result;
- (*
- result :: a -> Parser a
- result v = \inp -> [(v, inp)]
- *)
- fun result value input = ParseSuccess [(value, input)];
- fun zero input = ParseFailure;
- fun item [] = ParseFailure
- | item (c::cs) =
- let
- val value = c;
- val unconsumed_input = cs;
- in
- ParseSuccess [(value, unconsumed_input)]
- end;
- (* The paper makes use of a concat function, which flattens nested lists of
- depth 2 to a flat list. *)
- fun concat nil = nil
- | concat (nil::rest_outer) = concat rest_outer
- | concat ((first::rest_inner)::rest_outer) =
- first :: concat (rest_inner :: rest_outer);
- (* The paper also makes use of a list comprehension, which does not exist in
- SML. To emulate it we write a compose function. *)
- fun compose inner outer =
- fn any => outer (inner any);
- (* The parser bind "integrates {sequencing of parsers} with {processing of their
- result values}. bind has the following signature:
- Take
- - a Parser of type 'a,
- - a function which maps from type 'a to a Parser of type 'b
- Return
- - a Parser of type 'b
- The Parser of type 'b will be constructed from Parser of type 'a and the mapping
- function. *)
- fun bind p f (input: pstring) =
- ParseSuccess
- (concat
- (map
- (fn (value, unconsumed_input) => f value unconsumed_input)
- (p input)));
- fun sequence p q =
- bind p (fn x =>
- (bind q (fn y =>
- result x y)));
- fun bind (p: 'a parser) (f: 'a -> 'b parser) =
- (* bind must return a parser itself, so that we can combine parsers into new
- parsers. A parser always takes some input string, here called `input`. *)
- (fn input: pstring =>
- (ParseSuccess
- (concat
- (map
- (fn ((value: 'a, unconsumed_input): 'a parse_data) =>
- ((f value): 'b parser) unconsumed_input)
- (p input)))): 'b result): 'b parser;
- fun bind (p: 'a parser) (f: 'a -> 'b parser) =
- (* bind must return a parser itself, so that we can combine parsers into new
- parsers. A parser always takes some input string, here called `input`. *)
- (fn (input: pstring) =>
- let
- val result_of_parser_a = p input;
- in
- result_of_parser_a
- end): 'b parser;
- (* =========================================================
- Approach from https://invidio.xamh.de/watch?v=RDalzi7mhdY
- ========================================================= *)
- (* (matched char, remaining chars, failure message) *)
- type result2 = (pstring * char list * string);
- fun pchar2 (char_to_match: char) (input: pstring) =
- if List.null input
- then
- ([], input, "no characters left to match"): result2
- else
- let
- val (first_char::rest_chars) = input;
- in
- if first_char = char_to_match
- then
- ([first_char], rest_chars, ""): result2
- else
- ([], input, "first character does not match"): result2
- end;
- (* 3 - introduce Success and Failure datatype *)
- type result3 = (char * char list);
- datatype 'a result = Success of result3
- | Failure of string;
- fun pchar3 (char_to_match: char) (input: pstring) =
- if List.null input
- then
- Failure "no characters left to match"
- else
- let
- val (first_char::rest_chars) = input;
- in
- if first_char = char_to_match
- then
- Success (first_char, rest_chars)
- else
- Failure "first character does not match"
- end;
- (* 4 - wrap function as a type "parser" *)
- type 'a parser_outcome_value = ('a list * pstring);
- datatype 'a ParseOutcome = Success of 'a parser_outcome_value
- | Failure of string;
- datatype 'a Parser = Parser of (pstring -> 'a ParseOutcome);
- fun pchar (char_to_match: char) =
- let
- fun parsing_func input =
- if List.null input
- then
- Failure "no characters left to match"
- else
- let
- val (first_char::rest_chars) = input;
- in
- if first_char = char_to_match
- then
- Success ([first_char], rest_chars)
- else
- Failure "first character does not match"
- end;
- in
- Parser parsing_func
- end;
- fun run parser input =
- let
- val (Parser parsing_func) = parser;
- in
- parsing_func input
- end;
- fun pcompose parser1 parser2 =
- let
- fun parsing_func input =
- let
- val result1 = run parser1 input;
- in
- case result1 of
- Failure err => result1
- | Success (value1, remaining1) =>
- let
- val result2 = run parser2 remaining1;
- in
- case result2 of
- Failure err => result2
- | Success (value2, remaining2) =>
- Success (value1 @ value2, remaining2)
- end
- end
- in
- Parser parsing_func
- end;
|