Lexer.java 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401
  1. // Copyright (c) 1999, 2004 Per M.A. Bothner.
  2. // This is free software; for terms and warranty disclaimer see ./COPYING.
  3. package gnu.text;
  4. import gnu.kawa.io.InPort;
  5. import java.io.*;
  6. /**
  7. * Framework for implementing lexical scanners and parsers.
  8. * @author Per Bothner
  9. */
  10. public class Lexer extends Reader
  11. {
  12. protected InPort port;
  13. private boolean interactive;
  14. protected boolean tentative;
  15. public Lexer(InPort port)
  16. {
  17. this.port = port;
  18. }
  19. public Lexer(InPort port, SourceMessages messages)
  20. {
  21. this.port = port;
  22. this.messages = messages;
  23. }
  24. /** Enter a nested expression.
  25. * This is used in interactive mode to control whether to continue
  26. * past end of line, depending on whether the expression is incomplete.
  27. * @param promptChar Used in prompt string to indicate type of nesting.
  28. * @return The previous value of promptChar, to be passed to popNesting.
  29. */
  30. public char pushNesting (char promptChar)
  31. {
  32. nesting++;
  33. InPort port = getPort();
  34. char save = port.readState;
  35. port.readState = promptChar;
  36. return save;
  37. }
  38. /** Exit a nested expression, reversing pushNesting
  39. * @param save Saved values return by prior pushNesting
  40. */
  41. public void popNesting (char save)
  42. {
  43. InPort port = getPort();
  44. port.readState = save;
  45. nesting--;
  46. }
  47. protected int nesting;
  48. public final InPort getPort() { return port; }
  49. public void close() throws java.io.IOException
  50. {
  51. port.close();
  52. }
  53. public int read() throws java.io.IOException
  54. {
  55. return port.read();
  56. }
  57. /** Read a Unicode character (codepoint) by checking for surrogates.
  58. * @deprecated Use {@link #readCodePoint}.
  59. */
  60. @Deprecated public int readUnicodeChar() throws java.io.IOException {
  61. return port.readCodePoint();
  62. }
  63. /** Read a Unicode character (codepoint) by checking for surrogates.
  64. */
  65. public int readCodePoint() throws java.io.IOException {
  66. return port.readCodePoint();
  67. }
  68. public int read(char[] buf, int offset, int length)
  69. throws java.io.IOException
  70. {
  71. return port.read(buf, offset, length);
  72. }
  73. public void unread(int ch) throws java.io.IOException
  74. {
  75. if (ch >= 0)
  76. port.unread();
  77. }
  78. public int peek() throws java.io.IOException
  79. {
  80. return port.peek();
  81. }
  82. public void skip() throws java.io.IOException
  83. {
  84. port.skip();
  85. }
  86. protected void unread() throws java.io.IOException
  87. {
  88. port.unread();
  89. }
  90. protected void unread_quick() throws java.io.IOException
  91. {
  92. port.unread_quick();
  93. }
  94. /**
  95. * Check if the next character matches a given character.
  96. * @param ch The character to match against.
  97. * @return if the character read matches
  98. * On a match, the position is advanced following that character.
  99. */
  100. public boolean checkNext(char ch)
  101. throws java.io.IOException
  102. {
  103. int r = port.read();
  104. if (r == ch)
  105. return true;
  106. if (r >= 0)
  107. port.unread_quick();
  108. return false;
  109. }
  110. protected void skip_quick() throws java.io.IOException
  111. {
  112. port.skip_quick();
  113. }
  114. SourceMessages messages = null;
  115. public SourceMessages getMessages () { return messages; }
  116. public void setMessages (SourceMessages messages)
  117. { this.messages = messages; }
  118. /** Returns true if any error were seen. Prints and clears the errors.
  119. * @param out where to write the error message to
  120. * @param max maximum number of messages to print (can be 0) */
  121. public boolean checkErrors(PrintWriter out, int max)
  122. {
  123. return messages != null && messages.checkErrors(out, max);
  124. }
  125. public SourceError getErrors()
  126. { return messages == null ? null : messages.getErrors(); }
  127. public boolean seenErrors()
  128. { return messages != null && messages.seenErrors(); }
  129. public void clearErrors() { if (messages != null) messages.clearErrors(); }
  130. public void error(char severity, String filename, int line, int column,
  131. String message)
  132. {
  133. if (messages == null)
  134. messages = new SourceMessages();
  135. messages.error(severity, filename, line, column, message);
  136. }
  137. public void error(char severity, String message)
  138. {
  139. int line = port.getLineNumber();
  140. int column = port.getColumnNumber();
  141. error(severity, port.getName(), line + 1, column >= 0 ? column + 1 : 0,
  142. message);
  143. }
  144. public void error(String message)
  145. {
  146. error('e', message);
  147. }
  148. public void fatal(String message) throws SyntaxException
  149. {
  150. error('f', message);
  151. throw new SyntaxException(messages);
  152. }
  153. public void eofError(String msg) throws SyntaxException
  154. {
  155. fatal(msg);
  156. }
  157. public void eofError(String message, int startLine, int startColumn)
  158. throws SyntaxException
  159. {
  160. error('f', port.getName(), startLine, startColumn, message);
  161. throw new SyntaxException(messages);
  162. }
  163. /** Read an optional signed integer.
  164. * If there is no integer in the input stream, return 1.
  165. * For excessively large exponents, return Integer.MIN_VALUE
  166. * or Integer.MAX_VALUE.
  167. */
  168. public int readOptionalExponent()
  169. throws java.io.IOException
  170. {
  171. int sign = read();
  172. boolean overflow = false;
  173. int c;
  174. if (sign == '+' || sign == '-')
  175. c = read();
  176. else
  177. {
  178. c = sign;
  179. sign = 0;
  180. }
  181. int value;
  182. if (c < 0 || (value = Character.digit ((char)c, 10)) < 0)
  183. {
  184. if (sign != 0)
  185. error("exponent sign not followed by digit");
  186. value = 1;
  187. }
  188. else
  189. {
  190. int max = (Integer.MAX_VALUE - 9) / 10;
  191. for (;;)
  192. {
  193. c = read();
  194. int d = Character.digit ((char)c, 10);
  195. if (d < 0)
  196. break;
  197. if (value > max)
  198. overflow = true;
  199. value = 10 * value + d;
  200. }
  201. }
  202. if (c >= 0)
  203. unread(c);
  204. if (sign == '-')
  205. value = -value;
  206. if (overflow)
  207. return sign == '-' ? Integer.MIN_VALUE : Integer.MAX_VALUE;
  208. return value;
  209. }
  210. /** Scan until a given delimiter.
  211. * On success, text upto the delimiter is in then tokenBuffer (with
  212. * tokenBufferLength marking its length); the delimiter is not included.
  213. */
  214. public boolean readDelimited(String delimiter)
  215. throws java.io.IOException, SyntaxException
  216. {
  217. tokenBufferLength = 0;
  218. int dlen = delimiter.length();
  219. char last = delimiter.charAt(dlen-1);
  220. for (;;)
  221. {
  222. int ch = read();
  223. if (ch < 0)
  224. return false;
  225. int dstart, j;
  226. // Look for a match for the last delimiter character.
  227. if (ch == last
  228. && (dstart = tokenBufferLength - (j = dlen - 1)) >= 0)
  229. {
  230. // Check that the initial part of the delimiter has also been seen.
  231. do
  232. {
  233. if (j == 0)
  234. {
  235. tokenBufferLength = dstart;
  236. return true;
  237. }
  238. j--;
  239. }
  240. while (tokenBuffer[dstart+j] == delimiter.charAt(j));
  241. }
  242. tokenBufferAppend((char) ch);
  243. }
  244. }
  245. /** Read digits, up to the first non-digit or the buffer limit
  246. * @param ival previously-seen digits or -2 if no digits seen
  247. * @return the digits seen as a non-negative long, or -1 on overflow,
  248. * or -2 if no digits seen
  249. */
  250. public static long readDigitsInBuffer (InPort port, long ival, int radix)
  251. {
  252. int i = port.pos;
  253. if (i >= port.limit)
  254. return ival;
  255. for (;;)
  256. {
  257. char c = port.buffer[i];
  258. int dval = Character.digit(c, radix);
  259. if (dval < 0)
  260. break;
  261. if (ival == -2) // initial digits
  262. ival = dval;
  263. else if (ival == -1)
  264. ;
  265. else if (ival > (Long.MAX_VALUE - dval) / radix)
  266. ival = -1;
  267. else
  268. ival = ival * radix + dval;
  269. if (++i >= port.limit)
  270. break;
  271. }
  272. port.pos = i;
  273. return ival;
  274. }
  275. public static long readDigits(InPort port, int radix) throws IOException {
  276. long ival = -2;
  277. for (;;) {
  278. ival = readDigitsInBuffer(port, ival, radix);
  279. if (port.pos < port.limit || port.peek() < 0)
  280. break;
  281. }
  282. return ival;
  283. }
  284. public int readIntDigits() throws IOException {
  285. long lval = readDigits(port, 10);
  286. int ival = (int) lval;
  287. if (ival == -1 || ival != lval)
  288. return Integer.MAX_VALUE;
  289. return ival < 0 ? -1 : ival;
  290. }
  291. public String getName() { return port.getName(); }
  292. /** Get the current line number.
  293. * The "first" line is number number 0. */
  294. public int getLineNumber() { return port.getLineNumber(); }
  295. /** Return the current (zero-based) column number. */
  296. public int getColumnNumber() { return port.getColumnNumber(); }
  297. public boolean isInteractive() { return interactive; }
  298. public void setInteractive(boolean v) { interactive = v; }
  299. /** True if input may be incomplete or actively edited.
  300. * Used for command-completion and on-the-fly error checking. */
  301. public boolean isTentative() { return tentative; }
  302. public void setTentative(boolean v) { tentative = v; }
  303. /** For building tokens of various kinds. */
  304. public char[] tokenBuffer = new char[100];
  305. /** The number of chars of tokenBuffer that are used. */
  306. public int tokenBufferLength = 0;
  307. /** Append one character to tokenBuffer, resizing it if need be. */
  308. public void tokenBufferAppend(int ch)
  309. {
  310. if (ch >= 0x10000)
  311. {
  312. tokenBufferAppend(((ch - 0x10000) >> 10) + 0xD800);
  313. ch = (ch & 0x3FF) + 0xDC00;
  314. // fall through to append low surrogate.
  315. }
  316. int len = tokenBufferLength;
  317. char[] buffer = tokenBuffer;
  318. if (len == tokenBuffer.length)
  319. {
  320. tokenBuffer = new char[2 * len];
  321. System.arraycopy(buffer, 0, tokenBuffer, 0, len);
  322. buffer = tokenBuffer;
  323. }
  324. buffer[len] = (char) ch;
  325. tokenBufferLength = len + 1;
  326. }
  327. public String tokenBufferString ()
  328. {
  329. return new String(tokenBuffer, 0, tokenBufferLength);
  330. }
  331. private int saveTokenBufferLength = -1;
  332. /** Start tentative parsing. Must be followed by a reset. */
  333. public void mark ()
  334. throws java.io.IOException
  335. {
  336. if (saveTokenBufferLength >= 0)
  337. throw new Error("internal error: recursive call to mark not allowed");
  338. port.mark(Integer.MAX_VALUE);
  339. saveTokenBufferLength = tokenBufferLength;
  340. }
  341. /** Stop tentative parsing. Return to position where we called mark. */
  342. public void reset ()
  343. throws java.io.IOException
  344. {
  345. if (saveTokenBufferLength < 0)
  346. throw new Error("internal error: reset called without prior mark");
  347. port.reset();
  348. saveTokenBufferLength = -1;
  349. }
  350. }