xml_parser.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. /*************************************************************************/
  2. /* xml_parser.cpp */
  3. /*************************************************************************/
  4. /* This file is part of: */
  5. /* GODOT ENGINE */
  6. /* http://www.godotengine.org */
  7. /*************************************************************************/
  8. /* Copyright (c) 2007-2015 Juan Linietsky, Ariel Manzur. */
  9. /* */
  10. /* Permission is hereby granted, free of charge, to any person obtaining */
  11. /* a copy of this software and associated documentation files (the */
  12. /* "Software"), to deal in the Software without restriction, including */
  13. /* without limitation the rights to use, copy, modify, merge, publish, */
  14. /* distribute, sublicense, and/or sell copies of the Software, and to */
  15. /* permit persons to whom the Software is furnished to do so, subject to */
  16. /* the following conditions: */
  17. /* */
  18. /* The above copyright notice and this permission notice shall be */
  19. /* included in all copies or substantial portions of the Software. */
  20. /* */
  21. /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
  22. /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
  23. /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.*/
  24. /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
  25. /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
  26. /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
  27. /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
  28. /*************************************************************************/
  29. #include "xml_parser.h"
  30. #include "print_string.h"
  31. //#define DEBUG_XML
  32. VARIANT_ENUM_CAST(XMLParser::NodeType);
  33. static bool _equalsn(const CharType* str1, const CharType* str2, int len) {
  34. int i;
  35. for(i=0; i < len && str1[i] && str2[i] ; ++i)
  36. if (str1[i] != str2[i])
  37. return false;
  38. // if one (or both) of the strings was smaller then they
  39. // are only equal if they have the same lenght
  40. return (i == len) || (str1[i] == 0 && str2[i] == 0);
  41. }
  42. String XMLParser::_replace_special_characters(const String& origstr) {
  43. int pos = origstr.find("&");
  44. int oldPos = 0;
  45. if (pos == -1)
  46. return origstr;
  47. String newstr;
  48. while(pos != -1 && pos < origstr.length()-2) {
  49. // check if it is one of the special characters
  50. int specialChar = -1;
  51. for (int i=0; i<(int)special_characters.size(); ++i)
  52. {
  53. const CharType* p = &origstr[pos]+1;
  54. if (_equalsn(&special_characters[i][1], p, special_characters[i].length()-1))
  55. {
  56. specialChar = i;
  57. break;
  58. }
  59. }
  60. if (specialChar != -1)
  61. {
  62. newstr+=(origstr.substr(oldPos, pos - oldPos));
  63. newstr+=(special_characters[specialChar][0]);
  64. pos += special_characters[specialChar].length();
  65. }
  66. else
  67. {
  68. newstr+=(origstr.substr(oldPos, pos - oldPos + 1));
  69. pos += 1;
  70. }
  71. // find next &
  72. oldPos = pos;
  73. pos = origstr.find("&", pos);
  74. }
  75. if (oldPos < origstr.length()-1)
  76. newstr+=(origstr.substr(oldPos, origstr.length()-oldPos));
  77. return newstr;
  78. }
  79. static inline bool _is_white_space(char c)
  80. {
  81. return (c==' ' || c=='\t' || c=='\n' || c=='\r');
  82. }
  83. //! sets the state that text was found. Returns true if set should be set
  84. bool XMLParser::_set_text(char* start, char* end) {
  85. // check if text is more than 2 characters, and if not, check if there is
  86. // only white space, so that this text won't be reported
  87. if (end - start < 3)
  88. {
  89. char* p = start;
  90. for(; p != end; ++p)
  91. if (!_is_white_space(*p))
  92. break;
  93. if (p == end)
  94. return false;
  95. }
  96. // set current text to the parsed text, and replace xml special characters
  97. String s = String::utf8(start, (int)(end - start));
  98. node_name = _replace_special_characters(s);
  99. // current XML node type is text
  100. node_type = NODE_TEXT;
  101. return true;
  102. }
  103. void XMLParser::_parse_closing_xml_element() {
  104. node_type = NODE_ELEMENT_END;
  105. node_empty = false;
  106. attributes.clear();
  107. ++P;
  108. const char* pBeginClose = P;
  109. while(*P != '>')
  110. ++P;
  111. node_name = String::utf8(pBeginClose, (int)(P - pBeginClose));
  112. #ifdef DEBUG_XML
  113. print_line("XML CLOSE: "+node_name);
  114. #endif
  115. ++P;
  116. }
  117. void XMLParser::_ignore_definition() {
  118. node_type = NODE_UNKNOWN;
  119. char *F=P;
  120. // move until end marked with '>' reached
  121. while(*P != '>')
  122. ++P;
  123. node_name.parse_utf8(F,P-F);
  124. ++P;
  125. }
  126. bool XMLParser::_parse_cdata() {
  127. if (*(P+1) != '[')
  128. return false;
  129. node_type = NODE_CDATA;
  130. // skip '<![CDATA['
  131. int count=0;
  132. while( *P && count<8 )
  133. {
  134. ++P;
  135. ++count;
  136. }
  137. if (!*P)
  138. return true;
  139. char *cDataBegin = P;
  140. char *cDataEnd = 0;
  141. // find end of CDATA
  142. while(*P && !cDataEnd) {
  143. if (*P == '>' &&
  144. (*(P-1) == ']') &&
  145. (*(P-2) == ']'))
  146. {
  147. cDataEnd = P - 2;
  148. }
  149. ++P;
  150. }
  151. if ( cDataEnd )
  152. node_name = String::utf8(cDataBegin, (int)(cDataEnd - cDataBegin));
  153. else
  154. node_name = "";
  155. #ifdef DEBUG_XML
  156. print_line("XML CDATA: "+node_name);
  157. #endif
  158. return true;
  159. }
  160. void XMLParser::_parse_comment() {
  161. node_type = NODE_COMMENT;
  162. P += 1;
  163. char *pCommentBegin = P;
  164. int count = 1;
  165. // move until end of comment reached
  166. while(count)
  167. {
  168. if (*P == '>')
  169. --count;
  170. else
  171. if (*P == '<')
  172. ++count;
  173. ++P;
  174. }
  175. P -= 3;
  176. node_name = String::utf8(pCommentBegin+2, (int)(P - pCommentBegin-2));
  177. P += 3;
  178. #ifdef DEBUG_XML
  179. print_line("XML COMMENT: "+node_name);
  180. #endif
  181. }
  182. void XMLParser::_parse_opening_xml_element() {
  183. node_type = NODE_ELEMENT;
  184. node_empty = false;
  185. attributes.clear();
  186. // find name
  187. const char* startName = P;
  188. // find end of element
  189. while(*P != '>' && !_is_white_space(*P))
  190. ++P;
  191. const char* endName = P;
  192. // find attributes
  193. while(*P != '>')
  194. {
  195. if (_is_white_space(*P))
  196. ++P;
  197. else
  198. {
  199. if (*P != '/')
  200. {
  201. // we've got an attribute
  202. // read the attribute names
  203. const char* attributeNameBegin = P;
  204. while(!_is_white_space(*P) && *P != '=')
  205. ++P;
  206. const char* attributeNameEnd = P;
  207. ++P;
  208. // read the attribute value
  209. // check for quotes and single quotes, thx to murphy
  210. while( (*P != '\"') && (*P != '\'') && *P)
  211. ++P;
  212. if (!*P) // malformatted xml file
  213. return;
  214. const char attributeQuoteChar = *P;
  215. ++P;
  216. const char* attributeValueBegin = P;
  217. while(*P != attributeQuoteChar && *P)
  218. ++P;
  219. if (!*P) // malformatted xml file
  220. return;
  221. const char* attributeValueEnd = P;
  222. ++P;
  223. Attribute attr;
  224. attr.name = String::utf8(attributeNameBegin,
  225. (int)(attributeNameEnd - attributeNameBegin));
  226. String s =String::utf8(attributeValueBegin,
  227. (int)(attributeValueEnd - attributeValueBegin));
  228. attr.value = _replace_special_characters(s);
  229. attributes.push_back(attr);
  230. }
  231. else
  232. {
  233. // tag is closed directly
  234. ++P;
  235. node_empty = true;
  236. break;
  237. }
  238. }
  239. }
  240. // check if this tag is closing directly
  241. if (endName > startName && *(endName-1) == '/')
  242. {
  243. // directly closing tag
  244. node_empty = true;
  245. endName--;
  246. }
  247. node_name = String::utf8(startName, (int)(endName - startName));
  248. #ifdef DEBUG_XML
  249. print_line("XML OPEN: "+node_name);
  250. #endif
  251. ++P;
  252. }
  253. void XMLParser::_parse_current_node() {
  254. char* start = P;
  255. node_offset = P - data;
  256. // more forward until '<' found
  257. while(*P != '<' && *P)
  258. ++P;
  259. if (!*P)
  260. return;
  261. if (P - start > 0)
  262. {
  263. // we found some text, store it
  264. if (_set_text(start, P))
  265. return;
  266. }
  267. ++P;
  268. // based on current token, parse and report next element
  269. switch(*P)
  270. {
  271. case '/':
  272. _parse_closing_xml_element();
  273. break;
  274. case '?':
  275. _ignore_definition();
  276. break;
  277. case '!':
  278. if (!_parse_cdata())
  279. _parse_comment();
  280. break;
  281. default:
  282. _parse_opening_xml_element();
  283. break;
  284. }
  285. }
  286. uint64_t XMLParser::get_node_offset() const {
  287. return node_offset;
  288. };
  289. Error XMLParser::seek(uint64_t p_pos) {
  290. ERR_FAIL_COND_V(!data, ERR_FILE_EOF)
  291. ERR_FAIL_COND_V(p_pos >= length, ERR_FILE_EOF);
  292. P = data + p_pos;
  293. return read();
  294. };
  295. void XMLParser::_bind_methods() {
  296. ObjectTypeDB::bind_method(_MD("read"),&XMLParser::read);
  297. ObjectTypeDB::bind_method(_MD("get_node_type"),&XMLParser::get_node_type);
  298. ObjectTypeDB::bind_method(_MD("get_node_name"),&XMLParser::get_node_name);
  299. ObjectTypeDB::bind_method(_MD("get_node_data"),&XMLParser::get_node_data);
  300. ObjectTypeDB::bind_method(_MD("get_node_offset"),&XMLParser::get_node_offset);
  301. ObjectTypeDB::bind_method(_MD("get_attribute_count"),&XMLParser::get_attribute_count);
  302. ObjectTypeDB::bind_method(_MD("get_attribute_name"),&XMLParser::get_attribute_name);
  303. ObjectTypeDB::bind_method(_MD("get_attribute_value"),(String (XMLParser::*)(int) const) &XMLParser::get_attribute_value);
  304. ObjectTypeDB::bind_method(_MD("has_attribute"),&XMLParser::has_attribute);
  305. ObjectTypeDB::bind_method(_MD("get_named_attribute_value"), (String (XMLParser::*)(const String&) const) &XMLParser::get_attribute_value);
  306. ObjectTypeDB::bind_method(_MD("get_named_attribute_value_safe"), &XMLParser::get_attribute_value_safe);
  307. ObjectTypeDB::bind_method(_MD("is_empty"),&XMLParser::is_empty);
  308. ObjectTypeDB::bind_method(_MD("get_current_line"),&XMLParser::get_current_line);
  309. ObjectTypeDB::bind_method(_MD("skip_section"),&XMLParser::skip_section);
  310. ObjectTypeDB::bind_method(_MD("seek"),&XMLParser::seek);
  311. ObjectTypeDB::bind_method(_MD("open","file"),&XMLParser::open);
  312. ObjectTypeDB::bind_method(_MD("open_buffer","buffer"),&XMLParser::open_buffer);
  313. BIND_CONSTANT( NODE_NONE );
  314. BIND_CONSTANT( NODE_ELEMENT );
  315. BIND_CONSTANT( NODE_ELEMENT_END );
  316. BIND_CONSTANT( NODE_TEXT );
  317. BIND_CONSTANT( NODE_COMMENT );
  318. BIND_CONSTANT( NODE_CDATA );
  319. BIND_CONSTANT( NODE_UNKNOWN );
  320. };
  321. Error XMLParser::read() {
  322. // if not end reached, parse the node
  323. if (P && (P - data) < length - 1 && *P != 0)
  324. {
  325. _parse_current_node();
  326. return OK;
  327. }
  328. return ERR_FILE_EOF;
  329. }
  330. XMLParser::NodeType XMLParser::get_node_type() {
  331. return node_type;
  332. }
  333. String XMLParser::get_node_data() const {
  334. ERR_FAIL_COND_V( node_type != NODE_TEXT, "");
  335. return node_name;
  336. }
  337. String XMLParser::get_node_name() const {
  338. ERR_FAIL_COND_V( node_type == NODE_TEXT, "");
  339. return node_name;
  340. }
  341. int XMLParser::get_attribute_count() const {
  342. return attributes.size();
  343. }
  344. String XMLParser::get_attribute_name(int p_idx) const {
  345. ERR_FAIL_INDEX_V(p_idx,attributes.size(),"");
  346. return attributes[p_idx].name;
  347. }
  348. String XMLParser::get_attribute_value(int p_idx) const {
  349. ERR_FAIL_INDEX_V(p_idx,attributes.size(),"");
  350. return attributes[p_idx].value;
  351. }
  352. bool XMLParser::has_attribute(const String& p_name) const {
  353. for(int i=0;i<attributes.size();i++) {
  354. if (attributes[i].name==p_name)
  355. return true;
  356. }
  357. return false;
  358. }
  359. String XMLParser::get_attribute_value(const String& p_name) const {
  360. int idx=-1;
  361. for(int i=0;i<attributes.size();i++) {
  362. if (attributes[i].name==p_name) {
  363. idx=i;
  364. break;
  365. }
  366. }
  367. if (idx<0) {
  368. ERR_EXPLAIN("Attribute not found: "+p_name);
  369. }
  370. ERR_FAIL_COND_V(idx<0,"");
  371. return attributes[idx].value;
  372. }
  373. String XMLParser::get_attribute_value_safe(const String& p_name) const {
  374. int idx=-1;
  375. for(int i=0;i<attributes.size();i++) {
  376. if (attributes[i].name==p_name) {
  377. idx=i;
  378. break;
  379. }
  380. }
  381. if (idx<0)
  382. return "";
  383. return attributes[idx].value;
  384. }
  385. bool XMLParser::is_empty() const {
  386. return node_empty;
  387. }
  388. Error XMLParser::open_buffer(const Vector<uint8_t>& p_buffer) {
  389. ERR_FAIL_COND_V(p_buffer.size()==0,ERR_INVALID_DATA);
  390. length = p_buffer.size();
  391. data = memnew_arr( char, length+1);
  392. copymem(data,p_buffer.ptr(),length);
  393. data[length]=0;
  394. P=data;
  395. return OK;
  396. }
  397. Error XMLParser::open(const String& p_path) {
  398. Error err;
  399. FileAccess * file = FileAccess::open(p_path,FileAccess::READ,&err);
  400. if (err) {
  401. ERR_FAIL_COND_V(err!=OK,err);
  402. }
  403. length = file->get_len();
  404. ERR_FAIL_COND_V(length<1, ERR_FILE_CORRUPT);
  405. data = memnew_arr( char, length+1);
  406. file->get_buffer((uint8_t*)data,length);
  407. data[length]=0;
  408. P=data;
  409. memdelete(file);
  410. return OK;
  411. }
  412. void XMLParser::skip_section() {
  413. // skip if this element is empty anyway.
  414. if (is_empty())
  415. return;
  416. // read until we've reached the last element in this section
  417. int tagcount = 1;
  418. while(tagcount && read()==OK)
  419. {
  420. if (get_node_type() == XMLParser::NODE_ELEMENT &&
  421. !is_empty())
  422. {
  423. ++tagcount;
  424. }
  425. else
  426. if (get_node_type() == XMLParser::NODE_ELEMENT_END)
  427. --tagcount;
  428. }
  429. }
  430. void XMLParser::close() {
  431. if (data)
  432. memdelete_arr(data);
  433. data=NULL;
  434. length=0;
  435. P=NULL;
  436. node_empty=false;
  437. node_type=NODE_NONE;
  438. node_offset = 0;
  439. }
  440. int XMLParser::get_current_line() const {
  441. return 0;
  442. }
  443. XMLParser::XMLParser() {
  444. data=NULL;
  445. close();
  446. special_characters.push_back("&amp;");
  447. special_characters.push_back("<lt;");
  448. special_characters.push_back(">gt;");
  449. special_characters.push_back("\"quot;");
  450. special_characters.push_back("'apos;");
  451. }
  452. XMLParser::~XMLParser() {
  453. if (data)
  454. memdelete_arr(data);
  455. }