ReaderExtendedLiteral.java 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445
  1. // Copyright (c) 2012, 2013 Per M.A. Bothner
  2. // This is free software; for terms and warranty disclaimer see ../../../COPYING.
  3. package gnu.kawa.lispexpr;
  4. import gnu.kawa.io.InPort;
  5. import gnu.mapping.*;
  6. import gnu.lists.*;
  7. import gnu.expr.*;
  8. import gnu.text.*;
  9. import java.io.*;
  10. import gnu.xml.XName; // FIXME - not available if --disable-xml
  11. public class ReaderExtendedLiteral extends ReaderConstituent {
  12. static final Symbol qstringSymbol = Symbol.valueOf("$string$");
  13. static final Symbol formatSymbol = Symbol.valueOf("$format$");
  14. static final Symbol sprintfSymbol = Symbol.valueOf("$sprintf$");
  15. static final Symbol startEnclosedSymbol = Symbol.valueOf("$<<$");
  16. static final Symbol endEnclosedSymbol = Symbol.valueOf("$>>$");
  17. public char escapeChar;
  18. public ReaderExtendedLiteral(char escapeChar) {
  19. super(ReadTable.CONSTITUENT);
  20. this.escapeChar = escapeChar;
  21. }
  22. public ReaderExtendedLiteral() { this('&'); }
  23. public Object read(Lexer in, int ch, int count)
  24. throws java.io.IOException, SyntaxException {
  25. LispReader reader = (LispReader) in;
  26. int startPos = reader.tokenBufferLength;
  27. ReadTable rtable = ReadTable.getCurrent();
  28. int startLine = reader.getLineNumber() + 1;
  29. int startColumn = reader.getColumnNumber() - 2;
  30. in.tokenBufferAppend(ch);
  31. int next = reader.read();
  32. next = scanTag(reader, next);
  33. Object result;
  34. if (next == '{' || next == '[') {
  35. int len = reader.tokenBufferLength - startPos - 1;
  36. String tag = len == 0 ? null
  37. : new String(reader.tokenBuffer, startPos+1, len);
  38. reader.tokenBufferLength = startPos;
  39. result = readNamedLiteral(reader, rtable, tag, next, startLine, startColumn);
  40. } else {
  41. result = reader.readAndHandleToken(next, startPos, rtable);
  42. }
  43. return result;
  44. }
  45. protected int enclosedExprDelim(int ch, LispReader reader) {
  46. return ch == '[' ? ']' : -1;
  47. }
  48. public Object readNamedLiteral(LispReader reader, ReadTable rtable,
  49. String tag, int next,
  50. int startLine, int startColumn)
  51. throws java.io.IOException, SyntaxException {
  52. Object operator = tag == null ? qstringSymbol
  53. : LispLanguage.constructNamespace.getSymbol(tag);
  54. Pair result = PairWithPosition.make(operator, null,
  55. reader.getName(),
  56. startLine, startColumn);
  57. Pair rtail = result;
  58. int endDelimiter = enclosedExprDelim(next, reader);
  59. if (endDelimiter >= 0 && tag != null) {
  60. int line = reader.getLineNumber() + 1;
  61. int column = reader.getColumnNumber();
  62. rtail = readEnclosed(reader, rtable, rtail, next, endDelimiter);
  63. Pair endMarker = reader.makePair(endEnclosedSymbol, LList.Empty,
  64. reader.getLineNumber() + 1,
  65. reader.getColumnNumber());
  66. rtail.setCdrBackdoor(endMarker);
  67. rtail = endMarker;
  68. next = reader.read();
  69. }
  70. if (next == '{') {
  71. readContent(reader, '}', rtail);
  72. }
  73. else if (tag == null) {
  74. reader.error("unexpected character after "+escapeChar);
  75. } else
  76. reader.unread(next);
  77. return result;
  78. }
  79. protected Object checkDelim(LispReader reader, int next, int delimiter)
  80. throws java.io.IOException, SyntaxException {
  81. return next == delimiter || next < 0 ? Special.eof : null;
  82. }
  83. protected boolean isNestableStartDelim(int next) {
  84. return next == '{';
  85. }
  86. protected boolean isNestableEndDelim(int next) {
  87. return next == '}';
  88. }
  89. public Pair readContent(LispReader reader, char delimiter, Pair head)
  90. throws java.io.IOException, SyntaxException {
  91. Pair resultTail = head;
  92. reader.tokenBufferLength = 0;
  93. int braceNesting = 1;
  94. // If lineStart >= 0 then it is an index into tokenBuffer
  95. // such that &| should delete up to lineStart.
  96. // However, there is an error if we've seen a non-space,
  97. // and so we use nonSpace to mark the first non-space character
  98. // on the line.
  99. int lineStart = -1;
  100. int nonSpace = -1;
  101. for (;;) {
  102. Object item = null;
  103. int line = reader.getLineNumber() + 1;
  104. int column = reader.getColumnNumber();
  105. int next = reader.readCodePoint();
  106. if (next == '\r' || next == '\n') {
  107. // As a special case, if this is the first newline
  108. // since the start of the literal text, and we haven't
  109. // seen any whitespace, then &| also delete this newline.
  110. if (lineStart < 0 && nonSpace < 0)
  111. lineStart = 0;
  112. else
  113. lineStart = reader.tokenBufferLength + 1;
  114. nonSpace = -1;
  115. }
  116. else if (nonSpace < 0 && next != ' ' && next != '\t') {
  117. nonSpace = reader.tokenBufferLength;
  118. }
  119. if (next < 0) {
  120. reader.eofError("unexpected end-of-file");
  121. }
  122. else if (next == delimiter
  123. && (! isNestableEndDelim(next)
  124. || --braceNesting == 0))
  125. item = Special.eof;
  126. else if (next == escapeChar) {
  127. int next1 = reader.peek();
  128. if (next1 == '|') {
  129. int skipped = 0;
  130. int blen = reader.tokenBufferLength;
  131. if (lineStart < 0) {
  132. reader.error('e', reader.getName(),
  133. line, column+1,
  134. "invalid '"+escapeChar+"|'");
  135. } else if (nonSpace != reader.tokenBufferLength) {
  136. reader.error('e', reader.getName(),
  137. line,
  138. nonSpace - lineStart + 1,
  139. "non-whitespace before '"+escapeChar+"|'");
  140. }
  141. else
  142. reader.tokenBufferLength = lineStart;
  143. reader.skip();
  144. continue;
  145. } else if (next1 == '-') {
  146. reader.skip();
  147. boolean complained = false;
  148. for (;;) {
  149. next = reader.read();
  150. if (next == '\r' || next == '\n')
  151. break;
  152. if (! complained && next != ' ' && next != '\t') {
  153. reader.error('e', reader.getName(),
  154. reader.getLineNumber() + 1,
  155. reader.getColumnNumber(),
  156. "non-whitespace after '"+escapeChar+"-'");
  157. complained = true;
  158. }
  159. }
  160. lineStart = reader.tokenBufferLength;
  161. nonSpace = -1;
  162. continue;
  163. } else if (next1 == '#') {
  164. reader.skip();
  165. next = reader.read();
  166. if (next == '|') {
  167. ReaderNestedComment.getLispInstance()
  168. .readNestedComment(reader);
  169. } else
  170. readCharRef(reader, next);
  171. }
  172. } else {
  173. if (isNestableStartDelim(next))
  174. braceNesting++;
  175. reader.tokenBufferAppend(next);
  176. if (next == ']' && delimiter == '<') {
  177. if (reader.peek() == ']') {
  178. reader.skip();
  179. reader.tokenBufferAppend(']');
  180. if (reader.peek() == '>') {
  181. reader.error('w', reader.getName(), line, column+1,
  182. "literal ']]>' is only valid following '<![CDATA['");
  183. }
  184. }
  185. }
  186. next = ' ';
  187. }
  188. if (reader.tokenBufferLength > 0
  189. && (next == delimiter || next == escapeChar || next < 0)) {
  190. String text = reader.tokenBufferString();
  191. reader.tokenBufferLength = 0;
  192. Object tnode = wrapText(text);
  193. Pair pair = PairWithPosition.make(tnode, reader.makeNil(),
  194. null, -1, -1); // FIXME
  195. resultTail.setCdrBackdoor(pair);
  196. resultTail = pair;
  197. }
  198. if (next == escapeChar) {
  199. ReadTable rtable = ReadTable.getCurrent();
  200. next = reader.read();
  201. int endDelimiter = enclosedExprDelim(next, reader);
  202. if (endDelimiter >= 0 || next == '(') {
  203. Pair qq =
  204. reader.makePair(startEnclosedSymbol, LList.Empty,
  205. line, column);
  206. resultTail.setCdrBackdoor(qq);
  207. resultTail = qq;
  208. resultTail = readEnclosed(reader, rtable, resultTail, next, endDelimiter);
  209. item = endEnclosedSymbol;
  210. }
  211. else if (next == '~' || next == '%') {
  212. boolean sawQuote = false;
  213. boolean printfStyle = next == '%';
  214. boolean needEnclosed;
  215. int magic = next;
  216. for (;;) {
  217. reader.tokenBufferAppend(next);
  218. next = reader.read();
  219. if (next < 0 || next == '\n') {
  220. reader.error('e', "non-terminated format specifier");
  221. needEnclosed = false;
  222. break;
  223. }
  224. if (sawQuote)
  225. sawQuote = false;
  226. else if (next == '\'' && magic == '~')
  227. sawQuote = true;
  228. // Prefix characters allowed in a format directive.
  229. // We should probably be more restrictive.
  230. else if ((next >= '0' && next <= '9')
  231. || next == '+' || next == '-' || next == ' '
  232. || (printfStyle
  233. ? (next == '.' || next == '*')
  234. : (next == ',' || next == '#'
  235. || next == 'v' || next == 'V'
  236. || next == ':' || next == '@')))
  237. ; // prefix directive part
  238. else {
  239. // next is (hopefully) a directive character
  240. reader.tokenBufferAppend(next);
  241. next = reader.read();
  242. if (next == '[' || next == '(') {
  243. needEnclosed = true;
  244. break;
  245. } else if (next != magic) {
  246. reader.unread(next);
  247. needEnclosed = false;
  248. break;
  249. }
  250. // if next==magic continue to read next specifier.
  251. }
  252. }
  253. String fmt = reader.tokenBufferString();
  254. endDelimiter = enclosedExprDelim(next, reader);
  255. reader.tokenBufferLength = 0;
  256. Pair ffmt = reader.makePair(fmt, LList.Empty, line, column);
  257. Object fun = printfStyle ? sprintfSymbol : formatSymbol;
  258. Pair fhead = reader.makePair(fun, ffmt,
  259. line, column);
  260. if (needEnclosed)
  261. readEnclosed(reader, rtable, ffmt, next, endDelimiter);
  262. item = fhead;
  263. }
  264. else {
  265. int startPos = reader.tokenBufferLength;
  266. next = scanTag(reader, next);
  267. String str = new String(reader.tokenBuffer, startPos,
  268. reader.tokenBufferLength-startPos);
  269. reader.tokenBufferLength = startPos;
  270. reader.unread(next);
  271. if (next == '[' || next == '{') {
  272. item = readNamedLiteral(reader, rtable, str, reader.read(),
  273. line, column);
  274. } else if (next == ';') {
  275. item = checkEntity(reader, str);
  276. } else {
  277. reader.error('e', "expected '[', '{', or ';'");
  278. }
  279. }
  280. }
  281. else {
  282. item = checkDelim(reader, next, delimiter);
  283. }
  284. if (item == Special.eof)
  285. break;
  286. if (item != null) {
  287. Pair pair = PairWithPosition.make(item, reader.makeNil(),
  288. reader.getName(),
  289. line, column+1);
  290. resultTail.setCdrBackdoor(pair);
  291. resultTail = pair;
  292. }
  293. }
  294. return resultTail;
  295. }
  296. protected Object wrapText(String text) {
  297. return text;
  298. }
  299. protected Object readEnclosedSingleExpression (LispReader reader, ReadTable readTable, int ch)
  300. throws IOException, SyntaxException {
  301. if (ch == '(') {
  302. reader.unread(ch);
  303. return reader.readObject();
  304. } else {
  305. int endDelimiter = enclosedExprDelim(ch, reader);
  306. Pair head = new Pair(null, LList.Empty);
  307. int line = reader.getLineNumber() + 1;
  308. int column = reader.getColumnNumber() + 1; // Column after '['
  309. Pair tail = readEnclosedExpressions(reader, readTable, head, endDelimiter);
  310. if (head == tail) {
  311. reader.error('e', reader.getName(), line, column,
  312. "missing expression");
  313. return "<missing>";
  314. }
  315. Pair first = (Pair) head.getCdr();
  316. if (first.getCdr() != LList.Empty)
  317. reader.error('e', reader.getName(), line, column,
  318. "too many expressions");
  319. return first.getCar();
  320. }
  321. }
  322. protected Pair readEnclosed(LispReader reader, ReadTable readTable, Pair last, int startDelimiter, int endDelimiter)
  323. throws IOException, SyntaxException {
  324. if (startDelimiter == '(') {
  325. return reader.readValuesAndAppend('(', readTable, last);
  326. } else {
  327. return readEnclosedExpressions(reader, readTable, last,
  328. endDelimiter);
  329. }
  330. }
  331. /** Read expressions enclosed by '[' and ']'.
  332. * Assume '[' has already been read.
  333. */
  334. protected Pair readEnclosedExpressions(LispReader reader, ReadTable readTable, Pair last, int endDelimiter)
  335. throws IOException, SyntaxException {
  336. InPort port = reader.getPort();
  337. char saveReadState = reader.pushNesting('[');
  338. int startLine = port.getLineNumber();
  339. int startColumn = port.getColumnNumber();
  340. try {
  341. for (;;) {
  342. int line = port.getLineNumber();
  343. int column = port.getColumnNumber();
  344. int ch = port.read();
  345. if (ch == endDelimiter)
  346. break;
  347. if (ch < 0)
  348. reader.eofError("unexpected EOF in list starting here",//FIXME
  349. startLine + 1, startColumn);
  350. last = reader.readValuesAndAppend(ch, readTable, last);
  351. }
  352. return last;
  353. }
  354. finally
  355. {
  356. reader.popNesting(saveReadState);
  357. }
  358. }
  359. private int scanTag(LispReader reader, int next)
  360. throws IOException, SyntaxException {
  361. if (XName.isNameStart(next)) {
  362. for (;;) {
  363. reader.tokenBufferAppend(next);
  364. next = reader.read();
  365. if (! XName.isNamePart(next)) {
  366. break;
  367. }
  368. }
  369. } else if (next == '`' || next == '<' || next == '>') {
  370. int nextnext = reader.peek();
  371. if (next == '>' && nextnext == '>') {
  372. reader.tokenBufferAppend(next);
  373. reader.skip();
  374. nextnext = reader.peek();
  375. }
  376. if (nextnext == '{' || nextnext == '[') {
  377. reader.tokenBufferAppend(next);
  378. next = reader.read();
  379. }
  380. }
  381. return next;
  382. }
  383. Object checkEntity(LispReader reader, String str)
  384. throws IOException, SyntaxException {
  385. int next = reader.read();
  386. if (next != ';') {
  387. reader.unread(next);
  388. reader. error("invalid entity reference");
  389. }
  390. return LispLanguage.entityNamespace.getSymbol(str);
  391. }
  392. /** Read a character reference, assuming {@code "&#"} have been read. */
  393. void readCharRef (LispReader reader, int next)
  394. throws IOException, SyntaxException {
  395. int base;
  396. if (next == 'x') {
  397. base = 16;
  398. next = reader.read();
  399. } else
  400. base = 10;
  401. int value = 0;
  402. while (next >= 0) {
  403. char ch = (char) next;
  404. int digit = Character.digit((char) ch, base);
  405. if (digit < 0)
  406. break;
  407. if (value >= 0x8000000)
  408. break; // Overflow likely.
  409. value = value * base;
  410. value += digit;
  411. next = reader.read();
  412. }
  413. if (next != ';') {
  414. reader.unread(next);
  415. reader.error("invalid character reference");
  416. }
  417. // See definition of 'Char' in XML 1.1 2nd ed Specification.
  418. else if ((value > 0 && value <= 0xD7FF)
  419. || (value >= 0xE000 && value <= 0xFFFD)
  420. || (value >= 0x10000 && value <= 0x10FFFF)) {
  421. reader.tokenBufferAppend(value);
  422. }
  423. else
  424. reader.error("invalid character value "+value);
  425. }
  426. }