SourceCodeToTokenize.hpp 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. //@+leo-ver=5-thin
  2. //@+node:caminhante.20210809161411.1: * @file SourceCodeToTokenize.hpp
  3. //@@language cplusplus
  4. #pragma once
  5. using namespace std;
  6. //@+others
  7. //@+node:caminhante.20210809161759.1: ** /includes
  8. #include <cassert>
  9. #include <string>
  10. #include <cstring>
  11. #include <fstream>
  12. #include <iostream>
  13. #include <streambuf>
  14. #include <sstream>
  15. #include <vector>
  16. #include <limits>
  17. #include <iterator>
  18. #include <numeric>
  19. #include <algorithm>
  20. #include <functional>
  21. //@+node:caminhante.20210809161826.1: ** just_pass
  22. inline function<bool(int)> just_pass () {
  23. static a = [](int a)->bool{ (void)a; return true; };
  24. return a;
  25. }
  26. //@+node:caminhante.20210809161836.1: ** SourceCodeToTokenize
  27. class SourceCodeToTokenize {
  28. //@+others
  29. //@+node:caminhante.20210809161902.1: *3* private
  30. private:
  31. size_t beginning_last_line=0, current_line=1;
  32. size_t saved_last_line, saved_line, saved_position;
  33. stringstream input;
  34. //@+node:caminhante.20210809161914.1: *3* public
  35. public:
  36. SourceCodeToTokenize (string i) : input(i) {}
  37. SourceCodeToTokenize (istream i) { input << i.rdbuf(); }
  38. //@+others
  39. //@+node:caminhante.20210809161947.1: *4* Read tokens
  40. //@+node:caminhante.20210809162104.1: *5* next
  41. int next () {
  42. if (reached_end()) return -1;
  43. int c = input.get();
  44. stats_for_next_char(c);
  45. return c;
  46. }
  47. //@+node:caminhante.20210809162111.1: *5* peek
  48. int peek () {
  49. if (reached_end()) return -1;
  50. return input.peek();
  51. }
  52. //@+node:caminhante.20210809162116.1: *5* whitespace
  53. string whitespace () {
  54. if (!isspace(peek())) return "";
  55. function<bool(char)> p = (int(*)(int)) &isgraph;
  56. return extract_until_or_preserve_state(p);
  57. }
  58. //@+node:caminhante.20210809162122.1: *5* next_token
  59. string next_token () {
  60. if (!isgraph(peek())) return "";
  61. function<bool(char)> p = (int(*)(int)) &isspace;
  62. return extract_until_or_preserve_state(p);
  63. }
  64. //@+node:caminhante.20210809162129.1: *5* read_literal
  65. bool read_literal (string literal) {
  66. if (reached_end()) return false;
  67. size_t lit_pos = 0;
  68. function<bool(char)> p = [&](char c) -> bool {
  69. if (lit_pos >= literal.length()-1) return true;
  70. return c != literal[lit_pos++];
  71. };
  72. function <bool(int)> a = [&](int l) -> bool {
  73. return (size_t)l == literal.length();
  74. };
  75. return extract_until_or_preserve_state(p,a) != "";
  76. }
  77. //@+node:caminhante.20210809162135.1: *5* read_number
  78. string read_number () {
  79. if (!isdigit(peek())) return "";
  80. function<bool(char)> p = [](char c) -> bool {
  81. return !isdigit(c);
  82. };
  83. return extract_until_or_preserve_state(p);
  84. }
  85. //@+node:caminhante.20210809162140.1: *5* read_string
  86. string read_string () {
  87. if (peek() != '"' && peek() != '\'') return "";
  88. char first_char = peek();
  89. bool ignore_next = false;
  90. unsigned int expected_quotes_ahead = 2;
  91. function<bool(char)> parse_string = [&](char c) -> bool {
  92. if (expected_quotes_ahead == 0) return true;
  93. if (c == '\\') {
  94. ignore_next = true;
  95. return false; }
  96. if (!ignore_next && c == first_char) {
  97. expected_quotes_ahead--;
  98. return false; }
  99. ignore_next = false;
  100. return false;
  101. };
  102. return extract_until_or_preserve_state(parse_string);
  103. }
  104. //@+node:caminhante.20210809162010.1: *4* Basic statistics
  105. //@+node:caminhante.20210809162204.1: *5* reached_end
  106. bool reached_end () {
  107. return input.tellg() == -1;
  108. }
  109. //@+node:caminhante.20210809162208.1: *5* position
  110. size_t position () {
  111. return input.tellg();
  112. }
  113. //@+node:caminhante.20210809162214.1: *5* column
  114. size_t column () {
  115. int pos = position();
  116. return beginning_last_line-pos+1;
  117. }
  118. //@+node:caminhante.20210809162220.1: *5* length
  119. size_t length () {
  120. int start_pos = input.tellg();
  121. input.seekg(0,input.end);
  122. size_t length = input.tellg();
  123. input.seekg(start_pos,input.beg);
  124. return length;
  125. }
  126. //@+node:caminhante.20210809162223.1: *5* line
  127. size_t line () {
  128. return current_line;
  129. }
  130. //@-others
  131. //@+node:caminhante.20210809162021.1: *3* private
  132. private:
  133. //@+others
  134. //@+node:caminhante.20210809162308.1: *4* extract_until_or_preserve_state
  135. string extract_until_or_preserve_state (
  136. function<bool(char)> &predicate,
  137. const function<bool(int)> &acceptable = just_pass()) {
  138. if (reached_end()) return "";
  139. save_reading_state();
  140. int extractlen = skip_until(predicate);
  141. restore_reading_state();
  142. if (acceptable(extractlen)) {
  143. char extr [extractlen]; input.get(extr,extractlen);
  144. for (char c : extr) stats_for_next_char(c);
  145. return string(extr);
  146. }
  147. return "";
  148. }
  149. //@+node:caminhante.20210809162315.1: *4* skip_until
  150. int skip_until (function<bool(char)> &predicate) {
  151. int start_pos = position();
  152. find_if(istreambuf_iterator<char>(input), istreambuf_iterator<char>(), predicate);
  153. int end_pos = position();
  154. return end_pos-start_pos+1;
  155. }
  156. //@+node:caminhante.20210809162322.1: *4* save_reading_state
  157. void save_reading_state () {
  158. saved_position = (size_t)input.tellg();
  159. saved_last_line = beginning_last_line;
  160. saved_line = current_line;
  161. }
  162. //@+node:caminhante.20210809162329.1: *4* restore_reading_state
  163. void restore_reading_state () {
  164. input.seekg(saved_position,input.beg);
  165. beginning_last_line = saved_last_line;
  166. current_line = saved_line;
  167. }
  168. //@+node:caminhante.20210809162334.1: *4* stats_for_next_char
  169. void stats_for_next_char (int c) {
  170. if (c == '\n') new_line();
  171. }
  172. //@+node:caminhante.20210809162341.1: *4* new_line
  173. void new_line () {
  174. int pos = position();
  175. beginning_last_line = pos+1;
  176. current_line++;
  177. }
  178. //@-others
  179. //@-others
  180. };
  181. //@-others
  182. //@-leo