ReaderDispatchMisc.java 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. // Copyright (c) 2001 Per M.A. Bothner
  2. // This is free software; for terms and warranty disclaimer see ./COPYING.
  3. package gnu.kawa.lispexpr;
  4. import gnu.text.*;
  5. import gnu.mapping.*;
  6. import gnu.bytecode.PrimType;
  7. import gnu.bytecode.Type;
  8. import gnu.lists.*;
  9. import gnu.kawa.io.InPort;
  10. import gnu.kawa.io.OutPort;
  11. import gnu.kawa.util.GeneralHashTable;
  12. /* #ifdef use:java.util.regex */
  13. import java.util.regex.*;
  14. /* #endif */
  15. public class ReaderDispatchMisc extends ReadTableEntry
  16. {
  17. /** A code which specifies which particular reader-action to perform.
  18. * The code is one the CommonLisp or Scheme '#' reader characters.
  19. * For example, if code=='x' then read a hexadecimal integer.
  20. * If code==-1, perform the standard action for the character read. */
  21. protected int code;
  22. private static ReaderDispatchMisc instance = new ReaderDispatchMisc();
  23. public static ReaderDispatchMisc getInstance() { return instance; }
  24. public ReaderDispatchMisc()
  25. {
  26. code = -1;
  27. }
  28. public ReaderDispatchMisc(int code)
  29. {
  30. this.code = code;
  31. }
  32. public Object read (Lexer in, int ch, int count)
  33. throws java.io.IOException, SyntaxException
  34. {
  35. LispReader reader = (LispReader) in;
  36. char saveReadState = '\0';
  37. InPort port;
  38. int length;
  39. String name;
  40. if (code >= 0)
  41. ch = code;
  42. switch (ch)
  43. {
  44. case '*':
  45. name = reader.readTokenString(-1, ReadTable.getCurrent());
  46. int nlen = name.length();
  47. int len = nlen;
  48. if (count >= 0) {
  49. if (nlen > count)
  50. in.error("too many bits in bit vector");
  51. len = count;
  52. }
  53. boolean[] arr = new boolean[len];
  54. char prev = '0';
  55. for (int i = 0; i < len; i++) {
  56. char c = i < nlen ? name.charAt(i) : prev;
  57. prev = c;
  58. if (c == '1' || c == 't' || c == 'F')
  59. arr[i] = true;
  60. else if (! (c == '0' || c == 'f' || c == 'F')) {
  61. prev = '0';
  62. in.error("invalid character (at offset "+i+") in bitvector");
  63. }
  64. }
  65. return new BitVector(arr);
  66. case ':':
  67. // Handle Guile-style keyword syntax: '#:KEYWORD'
  68. // Note this conflicts with Common Lisp uninterned symbols. FIXME
  69. name = reader.readTokenString(-1, ReadTable.getCurrent());
  70. return gnu.expr.Keyword.make(name.intern());
  71. case '\\':
  72. return LispReader.readCharacter(reader);
  73. case '!':
  74. return LispReader.readSpecial(reader);
  75. case 'T':
  76. case 'F':
  77. int startPos = reader.tokenBufferLength;
  78. while (ch >= 0 && Character.isLetterOrDigit(ch)) {
  79. reader.tokenBufferAppend(ch);
  80. ch = reader.read();
  81. }
  82. reader.unread(ch);
  83. name = new String(reader.tokenBuffer, startPos,
  84. reader.tokenBufferLength - startPos);
  85. reader.tokenBufferLength = startPos;
  86. String nameLC = name.toLowerCase();
  87. if (nameLC.equals("t") || nameLC.equals("true"))
  88. return Boolean.TRUE;
  89. if (nameLC.equals("f") || nameLC.equals("false"))
  90. return Boolean.FALSE;
  91. PrimType elementType;
  92. if (nameLC.equals("f32"))
  93. elementType = LangPrimType.floatType;
  94. else if (nameLC.equals("f64"))
  95. elementType = LangPrimType.doubleType;
  96. else
  97. {
  98. in.error("unexpected characters following '#'");
  99. return Boolean.FALSE;
  100. }
  101. return LispReader.readGeneralArray(reader, count, elementType);
  102. case 'S':
  103. case 'U':
  104. int size = reader.readIntDigits();
  105. switch (size) {
  106. case 8:
  107. elementType = ch == 'U' ? LangPrimType.unsignedByteType
  108. : LangPrimType.byteType;
  109. break;
  110. case 16:
  111. elementType = ch == 'U' ? LangPrimType.unsignedShortType
  112. : LangPrimType.shortType;
  113. break;
  114. case 32:
  115. elementType = ch == 'U' ? LangPrimType.unsignedIntType
  116. : LangPrimType.intType;
  117. break;
  118. case 64:
  119. elementType = ch == 'U' ? LangPrimType.unsignedLongType
  120. : LangPrimType.longType;
  121. break;
  122. default:
  123. in.error("expected 8, 16, 32, or 64 after #S or #U");
  124. elementType = null;
  125. }
  126. return LispReader.readGeneralArray(reader, count, elementType);
  127. case 'R':
  128. if (count > 36)
  129. {
  130. StringBuilder sbuf = new StringBuilder("the radix ");
  131. if (count < Integer.MAX_VALUE)
  132. {
  133. sbuf.append(count);
  134. sbuf.append(' ');
  135. }
  136. sbuf.append("is too big (max is 36)");
  137. in.error(sbuf.toString());
  138. count = 36;
  139. }
  140. return LispReader.readNumberWithRadix(0, reader, count);
  141. case 'X':
  142. return LispReader.readNumberWithRadix(0, reader, 16);
  143. case 'D':
  144. return LispReader.readNumberWithRadix(0, reader, 10);
  145. case 'O':
  146. return LispReader.readNumberWithRadix(0, reader, 8);
  147. case 'B':
  148. return LispReader.readNumberWithRadix(0, reader, 2);
  149. case 'I':
  150. case 'E':
  151. reader.tokenBufferAppend('#');
  152. reader.tokenBufferAppend(ch);
  153. return LispReader.readNumberWithRadix(2, reader, 0);
  154. case 'A':
  155. return LispReader.readGeneralArray(reader, count, null);
  156. /* #ifdef use:java.util.regex */
  157. case '/':
  158. return readRegex(in, ch, count);
  159. /* #endif */
  160. case ';':
  161. port = reader.getPort();
  162. if (port instanceof InPort)
  163. {
  164. saveReadState = ((InPort) port).readState;
  165. ((InPort) port).readState = ';';
  166. }
  167. try
  168. {
  169. reader.readObject();
  170. }
  171. finally
  172. {
  173. if (port instanceof InPort)
  174. ((InPort) port).readState = saveReadState;
  175. }
  176. return Values.empty;
  177. case ',':
  178. return ReaderDispatchSyntaxQuote.readNamedConstructor(reader);
  179. case '=':
  180. return reader.readObject(count, false);
  181. case '#':
  182. if (in instanceof LispReader)
  183. {
  184. GeneralHashTable<Integer,Object> map
  185. = ((LispReader) in).sharedStructureTable;
  186. if (map != null)
  187. {
  188. Integer key = Integer.valueOf(count);
  189. Object object = map.get(key, in);
  190. if (object != in)
  191. return object;
  192. }
  193. }
  194. in.error("an unrecognized #n# back-reference was read");
  195. return Boolean.FALSE;
  196. default:
  197. in.error("An invalid #-construct was read.");
  198. return Values.empty;
  199. }
  200. }
  201. /* #ifdef use:java.util.regex */
  202. public static Pattern readRegex (Lexer in, int ch, int count)
  203. throws java.io.IOException, SyntaxException
  204. {
  205. int startPos = in.tokenBufferLength;
  206. InPort port = in.getPort();
  207. char saveReadState = '\0';
  208. int flags = 0;
  209. if (port instanceof InPort)
  210. {
  211. saveReadState = ((InPort) port).readState;
  212. ((InPort) port).readState = '/';
  213. }
  214. try
  215. {
  216. for (;;)
  217. {
  218. int next;
  219. int c = port.read();
  220. if (c < 0)
  221. in.eofError("unexpected EOF in regex literal");
  222. if (c == ch)
  223. break;
  224. if (c == '\\')
  225. {
  226. c = port.read();
  227. if ((c == ' ' || c == '\t' || c == '\r' || c == '\n')
  228. && in instanceof LispReader)
  229. {
  230. c = ((LispReader) in).readEscape(c);
  231. if (c == -2)
  232. continue;
  233. }
  234. if (c < 0)
  235. in.eofError("unexpected EOF in regex literal");
  236. if (c != ch)
  237. in.tokenBufferAppend('\\');
  238. }
  239. in.tokenBufferAppend(c);
  240. }
  241. String pattern = new String(in.tokenBuffer, startPos,
  242. in.tokenBufferLength - startPos);
  243. for (;;)
  244. {
  245. int c = in.peek();
  246. if (c == 'i' || c == 'I')
  247. flags |= Pattern.CASE_INSENSITIVE|Pattern.UNICODE_CASE;
  248. else if (c == 's' || c == 'S')
  249. flags |= Pattern.DOTALL;
  250. else if (c == 'm' || c == 'M')
  251. flags |= Pattern.MULTILINE;
  252. /* Think this through more before adding this feature:
  253. Perhaps we should use the 'x' handling from
  254. gnu.xquery.util.StringUtils.makePattern (which is
  255. smart enogh to handle space in character classes).
  256. Perhaps we should handle Scheme comments?
  257. else if (c == 'x' || c == 'X')
  258. flags |= Pattern.COMMENTS;
  259. */
  260. else if (Character.isLetter(c))
  261. {
  262. in.error("unrecognized regex option '"+((char) c)+'\'');
  263. }
  264. else
  265. break;
  266. in.skip();
  267. }
  268. return Pattern.compile(pattern, flags);
  269. }
  270. finally
  271. {
  272. in.tokenBufferLength = startPos;
  273. if (port instanceof InPort)
  274. ((InPort) port).readState = saveReadState;
  275. }
  276. }
  277. /* #endif */
  278. }