123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183 |
- //@+leo-ver=5-thin
- //@+node:caminhante.20210809161411.1: * @file SourceCodeToTokenize.hpp
- //@@language cplusplus
- #pragma once
- using namespace std;
- //@+others
- //@+node:caminhante.20210809161759.1: ** /includes
- #include <cassert>
- #include <string>
- #include <cstring>
- #include <fstream>
- #include <iostream>
- #include <streambuf>
- #include <sstream>
- #include <vector>
- #include <limits>
- #include <iterator>
- #include <numeric>
- #include <algorithm>
- #include <functional>
- //@+node:caminhante.20210809161826.1: ** just_pass
- inline function<bool(int)> just_pass () {
- static a = [](int a)->bool{ (void)a; return true; };
- return a;
- }
- //@+node:caminhante.20210809161836.1: ** SourceCodeToTokenize
- class SourceCodeToTokenize {
- //@+others
- //@+node:caminhante.20210809161902.1: *3* private
- private:
- size_t beginning_last_line=0, current_line=1;
- size_t saved_last_line, saved_line, saved_position;
- stringstream input;
- //@+node:caminhante.20210809161914.1: *3* public
- public:
- SourceCodeToTokenize (string i) : input(i) {}
- SourceCodeToTokenize (istream i) { input << i.rdbuf(); }
- //@+others
- //@+node:caminhante.20210809161947.1: *4* Read tokens
- //@+node:caminhante.20210809162104.1: *5* next
- int next () {
- if (reached_end()) return -1;
- int c = input.get();
- stats_for_next_char(c);
- return c;
- }
- //@+node:caminhante.20210809162111.1: *5* peek
- int peek () {
- if (reached_end()) return -1;
- return input.peek();
- }
- //@+node:caminhante.20210809162116.1: *5* whitespace
- string whitespace () {
- if (!isspace(peek())) return "";
- function<bool(char)> p = (int(*)(int)) &isgraph;
- return extract_until_or_preserve_state(p);
- }
- //@+node:caminhante.20210809162122.1: *5* next_token
- string next_token () {
- if (!isgraph(peek())) return "";
- function<bool(char)> p = (int(*)(int)) &isspace;
- return extract_until_or_preserve_state(p);
- }
- //@+node:caminhante.20210809162129.1: *5* read_literal
- bool read_literal (string literal) {
- if (reached_end()) return false;
- size_t lit_pos = 0;
- function<bool(char)> p = [&](char c) -> bool {
- if (lit_pos >= literal.length()-1) return true;
- return c != literal[lit_pos++];
- };
- function <bool(int)> a = [&](int l) -> bool {
- return (size_t)l == literal.length();
- };
- return extract_until_or_preserve_state(p,a) != "";
- }
- //@+node:caminhante.20210809162135.1: *5* read_number
- string read_number () {
- if (!isdigit(peek())) return "";
- function<bool(char)> p = [](char c) -> bool {
- return !isdigit(c);
- };
- return extract_until_or_preserve_state(p);
- }
- //@+node:caminhante.20210809162140.1: *5* read_string
- string read_string () {
- if (peek() != '"' && peek() != '\'') return "";
- char first_char = peek();
- bool ignore_next = false;
- unsigned int expected_quotes_ahead = 2;
- function<bool(char)> parse_string = [&](char c) -> bool {
- if (expected_quotes_ahead == 0) return true;
- if (c == '\\') {
- ignore_next = true;
- return false; }
- if (!ignore_next && c == first_char) {
- expected_quotes_ahead--;
- return false; }
- ignore_next = false;
- return false;
- };
- return extract_until_or_preserve_state(parse_string);
- }
- //@+node:caminhante.20210809162010.1: *4* Basic statistics
- //@+node:caminhante.20210809162204.1: *5* reached_end
- bool reached_end () {
- return input.tellg() == -1;
- }
- //@+node:caminhante.20210809162208.1: *5* position
- size_t position () {
- return input.tellg();
- }
- //@+node:caminhante.20210809162214.1: *5* column
- size_t column () {
- int pos = position();
- return beginning_last_line-pos+1;
- }
- //@+node:caminhante.20210809162220.1: *5* length
- size_t length () {
- int start_pos = input.tellg();
- input.seekg(0,input.end);
- size_t length = input.tellg();
- input.seekg(start_pos,input.beg);
- return length;
- }
- //@+node:caminhante.20210809162223.1: *5* line
- size_t line () {
- return current_line;
- }
- //@-others
- //@+node:caminhante.20210809162021.1: *3* private
- private:
- //@+others
- //@+node:caminhante.20210809162308.1: *4* extract_until_or_preserve_state
- string extract_until_or_preserve_state (
- function<bool(char)> &predicate,
- const function<bool(int)> &acceptable = just_pass()) {
- if (reached_end()) return "";
- save_reading_state();
- int extractlen = skip_until(predicate);
- restore_reading_state();
- if (acceptable(extractlen)) {
- char extr [extractlen]; input.get(extr,extractlen);
- for (char c : extr) stats_for_next_char(c);
- return string(extr);
- }
- return "";
- }
- //@+node:caminhante.20210809162315.1: *4* skip_until
- int skip_until (function<bool(char)> &predicate) {
- int start_pos = position();
- find_if(istreambuf_iterator<char>(input), istreambuf_iterator<char>(), predicate);
- int end_pos = position();
- return end_pos-start_pos+1;
- }
- //@+node:caminhante.20210809162322.1: *4* save_reading_state
- void save_reading_state () {
- saved_position = (size_t)input.tellg();
- saved_last_line = beginning_last_line;
- saved_line = current_line;
- }
- //@+node:caminhante.20210809162329.1: *4* restore_reading_state
- void restore_reading_state () {
- input.seekg(saved_position,input.beg);
- beginning_last_line = saved_last_line;
- current_line = saved_line;
- }
- //@+node:caminhante.20210809162334.1: *4* stats_for_next_char
- void stats_for_next_char (int c) {
- if (c == '\n') new_line();
- }
- //@+node:caminhante.20210809162341.1: *4* new_line
- void new_line () {
- int pos = position();
- beginning_last_line = pos+1;
- current_line++;
- }
- //@-others
- //@-others
- };
- //@-others
- //@-leo
|