tokenstream.cpp 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #include "tokenstream.h"
  4. #include "../math/math.h"
  5. namespace embree
  6. {
  7. /* shorthands for common sets of characters */
  8. const std::string TokenStream::alpha = "abcdefghijklmnopqrstuvwxyz";
  9. const std::string TokenStream::ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  10. const std::string TokenStream::numbers = "0123456789";
  11. const std::string TokenStream::separators = "\n\t\r ";
  12. const std::string TokenStream::stringChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 _.,+-=:/*\\";
  13. /* creates map for fast categorization of characters */
  14. static void createCharMap(bool map[256], const std::string& chrs) {
  15. for (size_t i=0; i<256; i++) map[i] = false;
  16. for (size_t i=0; i<chrs.size(); i++) map[uint8_t(chrs[i])] = true;
  17. }
  18. /* build full tokenizer that takes list of valid characters and keywords */
  19. TokenStream::TokenStream(const Ref<Stream<int> >& cin, //< stream to read from
  20. const std::string& alpha, //< valid characters for identifiers
  21. const std::string& seps, //< characters that act as separators
  22. const std::vector<std::string>& symbols) //< symbols
  23. : cin(cin), symbols(symbols)
  24. {
  25. createCharMap(isAlphaMap,alpha);
  26. createCharMap(isSepMap,seps);
  27. createCharMap(isStringCharMap,stringChars);
  28. }
  29. bool TokenStream::decDigits(std::string& str_o)
  30. {
  31. bool ok = false;
  32. std::string str;
  33. if (cin->peek() == '+' || cin->peek() == '-') str += (char)cin->get();
  34. while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
  35. if (ok) str_o += str;
  36. else cin->unget(str.size());
  37. return ok;
  38. }
  39. bool TokenStream::decDigits1(std::string& str_o)
  40. {
  41. bool ok = false;
  42. std::string str;
  43. while (isDigit(cin->peek())) { ok = true; str += (char)cin->get(); }
  44. if (ok) str_o += str; else cin->unget(str.size());
  45. return ok;
  46. }
  47. bool TokenStream::trySymbol(const std::string& symbol)
  48. {
  49. size_t pos = 0;
  50. while (pos < symbol.size()) {
  51. if (symbol[pos] != cin->peek()) { cin->unget(pos); return false; }
  52. cin->drop(); pos++;
  53. }
  54. return true;
  55. }
  56. bool TokenStream::trySymbols(Token& token, const ParseLocation& loc)
  57. {
  58. for (size_t i=0; i<symbols.size(); i++) {
  59. if (!trySymbol(symbols[i])) continue;
  60. token = Token(symbols[i],Token::TY_SYMBOL,loc);
  61. return true;
  62. }
  63. return false;
  64. }
  65. bool TokenStream::tryFloat(Token& token, const ParseLocation& loc)
  66. {
  67. bool ok = false;
  68. std::string str;
  69. if (trySymbol("nan")) {
  70. token = Token(float(nan));
  71. return true;
  72. }
  73. if (trySymbol("+inf")) {
  74. token = Token(float(pos_inf));
  75. return true;
  76. }
  77. if (trySymbol("-inf")) {
  78. token = Token(float(neg_inf));
  79. return true;
  80. }
  81. if (decDigits(str))
  82. {
  83. if (cin->peek() == '.') {
  84. str += (char)cin->get();
  85. decDigits(str);
  86. if (cin->peek() == 'e' || cin->peek() == 'E') {
  87. str += (char)cin->get();
  88. if (decDigits(str)) ok = true; // 1.[2]E2
  89. }
  90. else ok = true; // 1.[2]
  91. }
  92. else if (cin->peek() == 'e' || cin->peek() == 'E') {
  93. str += (char)cin->get();
  94. if (decDigits(str)) ok = true; // 1E2
  95. }
  96. }
  97. else
  98. {
  99. if (cin->peek() == '.') {
  100. str += (char)cin->get();
  101. if (decDigits(str)) {
  102. if (cin->peek() == 'e' || cin->peek() == 'E') {
  103. str += (char)cin->get();
  104. if (decDigits(str)) ok = true; // .3E2
  105. }
  106. else ok = true; // .3
  107. }
  108. }
  109. }
  110. if (ok) {
  111. token = Token((float)atof(str.c_str()),loc);
  112. }
  113. else cin->unget(str.size());
  114. return ok;
  115. }
  116. bool TokenStream::tryInt(Token& token, const ParseLocation& loc) {
  117. std::string str;
  118. if (decDigits(str)) {
  119. token = Token(atoi(str.c_str()),loc);
  120. return true;
  121. }
  122. return false;
  123. }
  124. bool TokenStream::tryString(Token& token, const ParseLocation& loc)
  125. {
  126. std::string str;
  127. if (cin->peek() != '\"') return false;
  128. cin->drop();
  129. while (cin->peek() != '\"') {
  130. const int c = cin->get();
  131. if (!isStringChar(c)) THROW_RUNTIME_ERROR("invalid string character "+std::string(1,c)+" at "+loc.str());
  132. str += (char)c;
  133. }
  134. cin->drop();
  135. token = Token(str,Token::TY_STRING,loc);
  136. return true;
  137. }
  138. bool TokenStream::tryIdentifier(Token& token, const ParseLocation& loc)
  139. {
  140. std::string str;
  141. if (!isAlpha(cin->peek())) return false;
  142. str += (char)cin->get();
  143. while (isAlphaNum(cin->peek())) str += (char)cin->get();
  144. token = Token(str,Token::TY_IDENTIFIER,loc);
  145. return true;
  146. }
  147. void TokenStream::skipSeparators()
  148. {
  149. /* skip separators */
  150. while (cin->peek() != EOF && isSeparator(cin->peek()))
  151. cin->drop();
  152. }
  153. Token TokenStream::next()
  154. {
  155. Token token;
  156. skipSeparators();
  157. ParseLocation loc = cin->loc();
  158. if (trySymbols (token,loc)) return token; /**< try to parse a symbol */
  159. if (tryFloat (token,loc)) return token; /**< try to parse float */
  160. if (tryInt (token,loc)) return token; /**< try to parse integer */
  161. if (tryString (token,loc)) return token; /**< try to parse string */
  162. if (tryIdentifier(token,loc)) return token; /**< try to parse identifier */
  163. if (cin->peek() == EOF ) return Token(loc); /**< return EOF token */
  164. return Token((char)cin->get(),loc); /**< return invalid character token */
  165. }
  166. }