tokenstream.h 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. // Copyright 2009-2021 Intel Corporation
  2. // SPDX-License-Identifier: Apache-2.0
  3. #pragma once
  4. #include "stream.h"
  5. #include <string>
  6. #include <vector>
  7. namespace embree
  8. {
  9. /*! token class */
  10. class Token
  11. {
  12. public:
  13. enum Type { TY_EOF, TY_CHAR, TY_INT, TY_FLOAT, TY_IDENTIFIER, TY_STRING, TY_SYMBOL };
  14. Token ( const ParseLocation& loc = ParseLocation()) : ty(TY_EOF ), loc(loc) {}
  15. Token (char c, const ParseLocation& loc = ParseLocation()) : ty(TY_CHAR ), c(c), loc(loc) {}
  16. Token (int i, const ParseLocation& loc = ParseLocation()) : ty(TY_INT ), i(i), loc(loc) {}
  17. Token (float f,const ParseLocation& loc = ParseLocation()) : ty(TY_FLOAT), f(f), loc(loc) {}
  18. Token (std::string str, Type ty, const ParseLocation& loc = ParseLocation()) : ty(ty), str(str), loc(loc) {}
  19. static Token Eof() { return Token(); }
  20. static Token Sym(std::string str) { return Token(str,TY_SYMBOL); }
  21. static Token Str(std::string str) { return Token(str,TY_STRING); }
  22. static Token Id (std::string str) { return Token(str,TY_IDENTIFIER); }
  23. char Char() const {
  24. if (ty == TY_CHAR) return c;
  25. THROW_RUNTIME_ERROR(loc.str()+": character expected");
  26. }
  27. int Int() const {
  28. if (ty == TY_INT) return i;
  29. THROW_RUNTIME_ERROR(loc.str()+": integer expected");
  30. }
  31. float Float(bool cast = true) const {
  32. if (ty == TY_FLOAT) return f;
  33. if (ty == TY_INT && cast) return (float)i;
  34. THROW_RUNTIME_ERROR(loc.str()+": float expected");
  35. }
  36. std::string Identifier() const {
  37. if (ty == TY_IDENTIFIER) return str;
  38. THROW_RUNTIME_ERROR(loc.str()+": identifier expected");
  39. }
  40. std::string String() const {
  41. if (ty == TY_STRING) return str;
  42. THROW_RUNTIME_ERROR(loc.str()+": string expected");
  43. }
  44. std::string Symbol() const {
  45. if (ty == TY_SYMBOL) return str;
  46. THROW_RUNTIME_ERROR(loc.str()+": symbol expected");
  47. }
  48. const ParseLocation& Location() const { return loc; }
  49. friend bool operator==(const Token& a, const Token& b)
  50. {
  51. if (a.ty != b.ty) return false;
  52. if (a.ty == TY_CHAR) return a.c == b.c;
  53. if (a.ty == TY_INT) return a.i == b.i;
  54. if (a.ty == TY_FLOAT) return a.f == b.f;
  55. if (a.ty == TY_IDENTIFIER) return a.str == b.str;
  56. if (a.ty == TY_STRING) return a.str == b.str;
  57. if (a.ty == TY_SYMBOL) return a.str == b.str;
  58. return true;
  59. }
  60. friend bool operator!=(const Token& a, const Token& b) {
  61. return !(a == b);
  62. }
  63. friend bool operator <( const Token& a, const Token& b ) {
  64. if (a.ty != b.ty) return (int)a.ty < (int)b.ty;
  65. if (a.ty == TY_CHAR) return a.c < b.c;
  66. if (a.ty == TY_INT) return a.i < b.i;
  67. if (a.ty == TY_FLOAT) return a.f < b.f;
  68. if (a.ty == TY_IDENTIFIER) return a.str < b.str;
  69. if (a.ty == TY_STRING) return a.str < b.str;
  70. if (a.ty == TY_SYMBOL) return a.str < b.str;
  71. return false;
  72. }
  73. friend std::ostream& operator<<(std::ostream& cout, const Token& t)
  74. {
  75. if (t.ty == TY_EOF) return cout << "eof";
  76. if (t.ty == TY_CHAR) return cout << "Char(" << t.c << ")";
  77. if (t.ty == TY_INT) return cout << "Int(" << t.i << ")";
  78. if (t.ty == TY_FLOAT) return cout << "Float(" << t.f << ")";
  79. if (t.ty == TY_IDENTIFIER) return cout << "Id(" << t.str << ")";
  80. if (t.ty == TY_STRING) return cout << "String(" << t.str << ")";
  81. if (t.ty == TY_SYMBOL) return cout << "Symbol(" << t.str << ")";
  82. return cout << "unknown";
  83. }
  84. private:
  85. Type ty; //< the type of the token
  86. union {
  87. char c; //< data for char tokens
  88. int i; //< data for int tokens
  89. float f; //< data for float tokens
  90. };
  91. std::string str; //< data for string and identifier tokens
  92. ParseLocation loc; //< the location the token is from
  93. };
  94. /*! build full tokenizer that takes list of valid characters and keywords */
  95. class TokenStream : public Stream<Token>
  96. {
  97. public:
  98. /*! shorthands for common sets of characters */
  99. static const std::string alpha;
  100. static const std::string ALPHA;
  101. static const std::string numbers;
  102. static const std::string separators;
  103. static const std::string stringChars;
  104. public:
  105. TokenStream(const Ref<Stream<int> >& cin,
  106. const std::string& alpha, //< valid characters for identifiers
  107. const std::string& seps, //< characters that act as separators
  108. const std::vector<std::string>& symbols = std::vector<std::string>()); //< symbols
  109. public:
  110. ParseLocation location() { return cin->loc(); }
  111. Token next();
  112. bool trySymbol(const std::string& symbol);
  113. private:
  114. void skipSeparators();
  115. bool decDigits(std::string& str);
  116. bool decDigits1(std::string& str);
  117. bool trySymbols(Token& token, const ParseLocation& loc);
  118. bool tryFloat(Token& token, const ParseLocation& loc);
  119. bool tryInt(Token& token, const ParseLocation& loc);
  120. bool tryString(Token& token, const ParseLocation& loc);
  121. bool tryIdentifier(Token& token, const ParseLocation& loc);
  122. Ref<Stream<int> > cin;
  123. bool isSepMap[256];
  124. bool isAlphaMap[256];
  125. bool isStringCharMap[256];
  126. std::vector<std::string> symbols;
  127. /*! checks if a character is a separator */
  128. __forceinline bool isSeparator(unsigned int c) const { return c<256 && isSepMap[c]; }
  129. /*! checks if a character is a number */
  130. __forceinline bool isDigit(unsigned int c) const { return c >= '0' && c <= '9'; }
  131. /*! checks if a character is valid inside a string */
  132. __forceinline bool isStringChar(unsigned int c) const { return c<256 && isStringCharMap[c]; }
  133. /*! checks if a character is legal for an identifier */
  134. __forceinline bool isAlpha(unsigned int c) const { return c<256 && isAlphaMap[c]; }
  135. __forceinline bool isAlphaNum(unsigned int c) const { return isAlpha(c) || isDigit(c); }
  136. };
  137. }