123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
- // This file is part of BOINC.
- // http://boinc.berkeley.edu
- // Copyright (C) 2008 University of California
- //
- // BOINC is free software; you can redistribute it and/or modify it
- // under the terms of the GNU Lesser General Public License
- // as published by the Free Software Foundation,
- // either version 3 of the License, or (at your option) any later version.
- //
- // BOINC is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
- // See the GNU Lesser General Public License for more details.
- //
- // You should have received a copy of the GNU Lesser General Public License
- // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
- #ifndef BOINC_PARSE_H
- #define BOINC_PARSE_H
- #include <cstdio>
- #include <stdlib.h>
- #include <string.h>
- #include <errno.h>
- #include "config.h"
- #include "miofile.h"
- #include "error_numbers.h"
- #include "str_util.h"
- // see parse_test.cpp for example usage of XML_PARSER
- #define XML_PARSE_COMMENT 1
- #define XML_PARSE_EOF 2
- #define XML_PARSE_CDATA 3
- #define XML_PARSE_TAG 4
- #define XML_PARSE_DATA 5
- #define XML_PARSE_OVERFLOW 6
- #define TAG_BUF_LEN 4096
- // max tag length
- #define ELEMENT_BUF_LEN 65536
- // max element length (matches BLOB_SIZE, max size of XML fields in DB)
- struct XML_PARSER {
- int scan_comment();
- int scan_cdata(char*, int);
- char parsed_tag[TAG_BUF_LEN];
- bool is_tag;
- MIOFILE* f;
- XML_PARSER(MIOFILE*);
- void init(MIOFILE* mf) {
- f = mf;
- }
- // read and copy text to buf; stop when find a <;
- // ungetc() that so we read it again
- // Return XML_PARSE_DATA if successful
- //
- inline int copy_until_tag(char* buf, int len) {
- int c;
- while (1) {
- c = f->_getc();
- if (!c || c == EOF) return XML_PARSE_EOF;
- if (c == '<') {
- f->_ungetc(c);
- *buf = 0;
- return XML_PARSE_DATA;
- }
- if (--len <= 0) {
- return XML_PARSE_OVERFLOW;
- }
- *buf++ = (char)c;
- }
- }
- // return true if EOF or error
- //
- inline bool get(
- char* buf, int len, bool& _is_tag, char* attr_buf=0, int attr_len=0
- ) {
- switch (get_aux(buf, len, attr_buf, attr_len)) {
- case XML_PARSE_EOF:
- case XML_PARSE_OVERFLOW:
- return true;
- case XML_PARSE_TAG:
- _is_tag = true;
- break;
- case XML_PARSE_DATA:
- case XML_PARSE_CDATA:
- default:
- _is_tag = false;
- break;
- }
- return false;
- }
- inline bool get_tag(char* ab=0, int al=0) {
- if (get(parsed_tag, sizeof(parsed_tag), is_tag, ab, al)) {
- return true;
- }
- if (strlen(parsed_tag) > TAG_BUF_LEN-10) {
- parsed_tag[TAG_BUF_LEN-10] = 0;
- }
- return false;
- }
- inline bool match_tag(const char* tag) {
- return !strcmp(parsed_tag, tag);
- }
- // read until find non-whitespace char.
- // Return the char in the reference param
- // Return true iff reached EOF
- //
- inline bool scan_nonws(int& first_char) {
- int c;
- while (1) {
- c = f->_getc();
- if (!c || c == EOF) return true;
- if (isascii(c) && isspace(c)) continue;
- first_char = c;
- return false;
- }
- }
- // Scan something, either tag or text.
- // Strip whitespace at start and end
- // (however, the supplied buffer must accommodate this white space).
- // Ignore comments.
- // Return true iff reached EOF
- //
- inline int get_aux(
- char* buf, int len, char* attr_buf, int attr_len
- ) {
- bool eof;
- int c, retval;
- while (1) {
- eof = scan_nonws(c);
- if (eof) return XML_PARSE_EOF;
- if (c == '<') {
- retval = scan_tag(buf, len, attr_buf, attr_len);
- if (retval == XML_PARSE_EOF) return retval;
- if (retval == XML_PARSE_OVERFLOW) return retval;
- if (retval == XML_PARSE_COMMENT) continue;
- } else {
- buf[0] = (char)c;
- retval = copy_until_tag(buf+1, len-1);
- if (retval != XML_PARSE_DATA) return retval;
- }
- strip_whitespace(buf);
- return retval;
- }
- }
- // we just read a <; read until we find a >.
- // Given <tag [attr=val attr=val] [/]>:
- // - copy tag (or tag/) to buf
- // - copy "attr=val attr=val" to attr_buf
- //
- // Return either
- // XML_PARSE_TAG
- // XML_PARSE_COMMENT
- // XML_PARSE_EOF
- // XML_PARSE_CDATA
- //
- inline int scan_tag(
- char* buf, int _tag_len, char* attr_buf=0, int attr_len=0
- ) {
- int c;
- char* buf_start = buf;
- bool found_space = false;
- int tag_len = _tag_len;
- for (int i=0; ; i++) {
- c = f->_getc();
- if (!c || c == EOF) return XML_PARSE_EOF;
- if (c == '>') {
- *buf = 0;
- if (attr_buf) *attr_buf = 0;
- return XML_PARSE_TAG;
- }
- if (isascii(c) && isspace(c)) {
- if (found_space && attr_buf) {
- if (--attr_len > 0) {
- *attr_buf++ = (char)c;
- }
- }
- found_space = true;
- } else if (c == '/') {
- if (--tag_len > 0) {
- *buf++ = (char)c;
- } else {
- return XML_PARSE_OVERFLOW;
- }
- } else {
- if (found_space) {
- if (attr_buf) {
- if (--attr_len > 0) {
- *attr_buf++ = (char)c;
- }
- }
- } else {
- if (--tag_len > 0) {
- *buf++ = (char)c;
- } else {
- return XML_PARSE_OVERFLOW;
- }
- }
- }
- // check for comment start
- //
- if (i==2 && !strncmp(buf_start, "!--", 3)) {
- return scan_comment();
- }
- if (i==7 && !strncmp(buf_start, "![CDATA[", 8)) {
- return scan_cdata(buf_start, tag_len);
- }
- }
- }
- // copy everything up to (but not including) the given end tag.
- // The copied text may include XML tags.
- // strips start/end whitespace.
- //
- inline int element_contents(const char* end_tag, char* buf, int buflen) {
- int n=0;
- int retval=0;
- while (1) {
- if (n == buflen-1) {
- retval = ERR_XML_PARSE;
- break;
- }
- int c = f->_getc();
- if (!c || c == EOF) {
- retval = ERR_XML_PARSE;
- break;
- }
- buf[n++] = (char)c;
- buf[n] = 0;
- char* p = strstr(buf, end_tag);
- if (p) {
- *p = 0;
- break;
- }
- }
- buf[n] = 0;
- strip_whitespace(buf);
- return retval;
- }
- bool parse_str_aux(const char*, char*, int);
- // interface starts here
- //
- bool parse_start(const char*);
- bool parse_str(const char*, char*, int);
- bool parse_string(const char*, std::string&);
- bool parse_int(const char*, int&);
- bool parse_long(const char*, long&);
- bool parse_double(const char*, double&);
- bool parse_ulong(const char*, unsigned long&);
- bool parse_ulonglong(const char*, unsigned long long&);
- bool parse_bool(const char*, bool&);
- int copy_element(std::string&);
- void skip_unexpected(const char*, bool verbose, const char*);
- void skip_unexpected(bool verbose=false, const char* msg="") {
- skip_unexpected(parsed_tag, verbose, msg);
- }
- };
- extern bool boinc_is_finite(double);
- /////////////// START DEPRECATED XML PARSER
- // Deprecated because it makes assumptions about
- // the format of the XML being parsed
- ///////////////
- // return true if the tag appears in the line
- //
- inline bool match_tag(const char* buf, const char* tag) {
- if (strstr(buf, tag)) return true;
- return false;
- }
- inline bool match_tag(const std::string &s, const char* tag) {
- return match_tag(s.c_str(), tag);
- }
- #if defined(_WIN32) && !defined(__MINGW32__)
- #define boinc_strtoull _strtoui64
- #else
- #if defined(HAVE_STRTOULL) || defined(__MINGW32__)
- #define boinc_strtoull strtoull
- #else
- inline unsigned long long boinc_strtoull(const char *s, char **, int) {
- char buf[64];
- char *p;
- unsigned long long y;
- strncpy(buf, s, sizeof(buf)-1);
- strip_whitespace(buf);
- p = strstr(buf, "0x");
- if (!p) p = strstr(buf, "0X");
- if (p) {
- sscanf(p, "%llx", &y);
- } else {
- sscanf(buf, "%llu", &y);
- }
- return y;
- }
- #endif
- #endif
- // parse an integer of the form <tag>1234</tag>
- // return true if it's there
- // Note: this doesn't check for the end tag
- //
- inline bool parse_int(const char* buf, const char* tag, int& x) {
- const char* p = strstr(buf, tag);
- if (!p) return false;
- errno = 0;
- int y = strtol(p+strlen(tag), 0, 0); // this parses 0xabcd correctly
- if (errno) return false;
- x = y;
- return true;
- }
- // Same, for doubles
- //
- inline bool parse_double(const char* buf, const char* tag, double& x) {
- double y;
- const char* p = strstr(buf, tag);
- if (!p) return false;
- errno = 0;
- y = strtod(p+strlen(tag), NULL);
- if (errno) return false;
- if (!boinc_is_finite(y)) {
- return false;
- }
- x = y;
- return true;
- }
- extern bool parse(char* , char* );
- extern bool parse_str(const char*, const char*, char*, int);
- extern bool parse_str(const char* buf, const char* tag, std::string& dest);
- extern void parse_attr(const char* buf, const char* attrname, char* out, int len);
- extern bool parse_bool(const char*, const char*, bool&);
- /////////////// END DEPRECATED XML PARSER
- extern int copy_stream(FILE* in, FILE* out);
- extern int strcatdup(char*& p, char* buf);
- extern int dup_element_contents(FILE* in, const char* end_tag, char** pp);
- extern int dup_element(FILE* in, const char* end_tag, char** pp);
- extern int copy_element_contents(FILE* in, const char* end_tag, char* p, size_t len);
- extern int copy_element_contents(FILE* in, const char* end_tag, std::string&);
- extern void replace_element_contents(
- char* buf, const char* start, const char* end, const char* replacement
- );
- extern bool remove_element(char* buf, const char* start, const char* end);
- extern bool str_replace(char* str, const char* old, const char* neww);
- extern char* sgets(char* buf, int len, char* &in);
- extern void non_ascii_escape(const char*, char*, int len);
- extern void xml_escape(const char*, char*, int len);
- extern void xml_unescape(std::string&);
- extern void xml_unescape(char*);
- extern void extract_venue(const char*, const char*, char*, int len);
- extern int skip_unrecognized(char* buf, MIOFILE&);
- #endif
|