XmlParser.cpp 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279
  1. /* ----------------------------------------------------------------- */
  2. /* The HMM-Based Singing Voice Synthesis System "Sinsy" */
  3. /* developed by Sinsy Working Group */
  4. /* http://sinsy.sourceforge.net/ */
  5. /* ----------------------------------------------------------------- */
  6. /* */
  7. /* Copyright (c) 2009-2015 Nagoya Institute of Technology */
  8. /* Department of Computer Science */
  9. /* */
  10. /* All rights reserved. */
  11. /* */
  12. /* Redistribution and use in source and binary forms, with or */
  13. /* without modification, are permitted provided that the following */
  14. /* conditions are met: */
  15. /* */
  16. /* - Redistributions of source code must retain the above copyright */
  17. /* notice, this list of conditions and the following disclaimer. */
  18. /* - Redistributions in binary form must reproduce the above */
  19. /* copyright notice, this list of conditions and the following */
  20. /* disclaimer in the documentation and/or other materials provided */
  21. /* with the distribution. */
  22. /* - Neither the name of the Sinsy working group nor the names of */
  23. /* its contributors may be used to endorse or promote products */
  24. /* derived from this software without specific prior written */
  25. /* permission. */
  26. /* */
  27. /* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND */
  28. /* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  29. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  30. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  31. /* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS */
  32. /* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, */
  33. /* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED */
  34. /* TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, */
  35. /* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON */
  36. /* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, */
  37. /* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY */
  38. /* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  39. /* POSSIBILITY OF SUCH DAMAGE. */
  40. /* ----------------------------------------------------------------- */
  41. #include <sstream>
  42. #include <algorithm>
  43. #include <stack>
  44. #include <stdexcept>
  45. #include "XmlParser.h"
  46. #include "InputFile.h"
  47. #include "util_string.h"
  48. #include "util_log.h"
  49. namespace sinsy
  50. {
  51. namespace
  52. {
  53. /*!
  54. get char
  55. */
  56. char getChar(IReadableStream& stream) throw (StreamException)
  57. {
  58. char c = '\0';
  59. int result = stream.read(static_cast<void*>(&c), 1);
  60. if (0 == result) {
  61. throw StreamException("getChar() end of file");
  62. } else if (result < 0) {
  63. throw StreamException("getChar() error");
  64. }
  65. return c;
  66. }
  67. /*!
  68. get next valid char
  69. */
  70. char getNextValidChar(IReadableStream& stream) throw (StreamException)
  71. {
  72. for ( ; ; ) {
  73. char c(getChar(stream));
  74. if (!isBlank(c)) {
  75. return c;
  76. }
  77. }
  78. }
  79. /*!
  80. get next tag
  81. @param fp input fp
  82. @param data buffer to store data up to the next tag
  83. @param tag buffer to store the next tag (not contain '<' and '>')
  84. */
  85. void getNextTag(IReadableStream& stream, std::string& data, std::string& tag) throw (StreamException)
  86. {
  87. std::ostringstream oss;
  88. // skip blanks
  89. char c = getNextValidChar(stream);
  90. while ('<' != c) {
  91. oss << c;
  92. c = getChar(stream);
  93. }
  94. data = oss.str();
  95. // remove blanks from the tail of data
  96. cutBlanks(data);
  97. oss.str("");
  98. // skip blanks
  99. c = getNextValidChar(stream);
  100. while ('>' != c) {
  101. oss << c;
  102. c = getChar(stream);
  103. }
  104. tag = oss.str();
  105. // remove blanks from the tail of tag
  106. cutBlanks(tag);
  107. }
  108. /*!
  109. create xml data
  110. */
  111. XmlData* createXmlData(const std::string& tag, const std::string& data = "")
  112. {
  113. size_t idx = findFirstOfBlank(tag);
  114. if (0 == idx) {
  115. StreamException("createXmlData() no tag");
  116. }
  117. if (std::string::npos == idx) { // no attributres
  118. return new XmlData(tag);
  119. }
  120. XmlData* ret(new XmlData(tag.substr(0, idx)));
  121. const size_t sz(tag.size());
  122. while (idx < sz) {
  123. size_t at(tag.find('=', idx)); // search '='
  124. if (std::string::npos == at) {
  125. delete ret;
  126. throw StreamException("createXmlData() '=' is not exist");
  127. }
  128. if (at == idx) {
  129. delete ret;
  130. throw StreamException("createXmlData() '=' is at the head of tag");
  131. }
  132. std::string key(tag.substr(idx, at - idx)); // cut up to '='
  133. cutBlanks(key);
  134. idx = at + 1;
  135. idx = findFirstNotOfBlank(tag, idx);
  136. char quotation = '\0';
  137. // attribute value must start with ' or "
  138. if (('\'' == tag[idx]) || ('\"' == tag[idx])) {
  139. quotation = tag[idx];
  140. } else {
  141. delete ret;
  142. throw StreamException("xml attribute value needs \" or \'");
  143. }
  144. ++idx;
  145. size_t start(idx);
  146. // search last ' or "
  147. for ( ; ; ) {
  148. idx = tag.find(quotation, idx);
  149. if (std::string::npos == idx) {
  150. delete ret;
  151. throw StreamException("xml attribute value needs \" or \'");
  152. }
  153. // last ' or " following \ should be ignored
  154. if ((start != idx) || ('\\' != tag[idx - 1])) {
  155. break;
  156. } else {
  157. ++idx;
  158. }
  159. }
  160. std::string value(tag.substr(start, idx - start));
  161. ret->addAttribute(key, value);
  162. ++idx;
  163. idx = findFirstNotOfBlank(tag, idx);
  164. }
  165. return ret;
  166. }
  167. };
  168. /*!
  169. constructor
  170. */
  171. XmlParser::XmlParser()
  172. {
  173. }
  174. /*!
  175. destructor
  176. */
  177. XmlParser::~XmlParser()
  178. {
  179. }
  180. /*!
  181. read data from stream
  182. */
  183. XmlData* XmlParser::read(IReadableStream& stream, std::string& encoding) throw (StreamException)
  184. {
  185. XmlData* topData(NULL);
  186. std::stack<XmlData*> dataStack;
  187. std::string data;
  188. std::string tag;
  189. try {
  190. for ( ; ; ) {
  191. getNextTag(stream, data, tag);
  192. if ('/' == tag[0]) { // end tag
  193. tag.erase(0, 1);
  194. cutBlanks(tag);
  195. if (tag.empty()) {
  196. throw StreamException("end tag is empty : </ >");
  197. }
  198. if (dataStack.empty()) {
  199. throw StreamException("start tag is needed before end tag");
  200. }
  201. XmlData* xd(dataStack.top());
  202. if (0 != tag.compare(xd->getTag())) {
  203. throw StreamException("start tag and end tag are not match");
  204. }
  205. xd->setData(data);
  206. dataStack.pop();
  207. if (dataStack.empty()) {
  208. break;
  209. }
  210. } else if ('?' == tag[0]) { // processing instruction
  211. if (0 == tag.compare(0, 4, "?xml")) { // if <?xml ... ?> tag, read character encoding
  212. tag = tag.substr(1, tag.size() - 2);
  213. cutBlanks(tag);
  214. if (tag.empty()) {
  215. throw StreamException("start tag is empty : < />");
  216. }
  217. XmlData* xd(createXmlData(tag));
  218. std::string enc = xd->getAttribute("encoding");
  219. if (!enc.empty()) {
  220. encoding = enc;
  221. }
  222. delete xd;
  223. }
  224. } else if ('!' == tag[0]) { // skip
  225. } else { // start tag
  226. bool hasEndTag(true);
  227. if ('/' == tag[tag.size() - 1]) { // start tag which donot need end tag
  228. tag.erase(tag.size() - 1, 1);
  229. cutBlanks(tag);
  230. if (tag.empty()) {
  231. throw StreamException("start tag is empty : < />");
  232. }
  233. data.clear(); // fail safe
  234. hasEndTag = false;
  235. }
  236. XmlData* xd(createXmlData(tag));
  237. if (dataStack.empty()) {
  238. topData = xd;
  239. } else {
  240. dataStack.top()->addChild(xd);
  241. }
  242. if (hasEndTag) {
  243. dataStack.push(xd);
  244. }
  245. }
  246. }
  247. } catch (const std::exception& ex) {
  248. ERR_MSG("XML parsing error : " << ex.what());
  249. delete topData;
  250. throw;
  251. }
  252. return topData;
  253. }
  254. }; // namespace sinsy