xmlparser.nim 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2010 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module parses an XML document and creates its XML tree representation.
  10. import std/[streams, parsexml, strtabs, xmltree]
  11. when defined(nimPreviewSlimSystem):
  12. import std/syncio
  13. type
  14. XmlError* = object of ValueError ## Exception that is raised
  15. ## for invalid XML.
  16. errors*: seq[string] ## All detected parsing errors.
  17. proc raiseInvalidXml(errors: seq[string]) =
  18. var e: ref XmlError
  19. new(e)
  20. e.msg = errors[0]
  21. e.errors = errors
  22. raise e
  23. proc addNode(father, son: XmlNode) =
  24. if son != nil: add(father, son)
  25. proc parse(x: var XmlParser, errors: var seq[string]): XmlNode {.gcsafe.}
  26. proc untilElementEnd(x: var XmlParser, result: XmlNode,
  27. errors: var seq[string]) =
  28. while true:
  29. case x.kind
  30. of xmlElementEnd:
  31. if x.elementName == result.tag:
  32. next(x)
  33. else:
  34. errors.add(errorMsg(x, "</" & result.tag & "> expected"))
  35. # do not skip it here!
  36. break
  37. of xmlEof:
  38. errors.add(errorMsg(x, "</" & result.tag & "> expected"))
  39. break
  40. else:
  41. result.addNode(parse(x, errors))
  42. proc parse(x: var XmlParser, errors: var seq[string]): XmlNode =
  43. result = nil
  44. case x.kind
  45. of xmlComment:
  46. result = newComment(x.charData)
  47. next(x)
  48. of xmlCharData, xmlWhitespace:
  49. result = newText(x.charData)
  50. next(x)
  51. of xmlPI, xmlSpecial:
  52. # we just ignore processing instructions for now
  53. next(x)
  54. of xmlError:
  55. errors.add(errorMsg(x))
  56. next(x)
  57. of xmlElementStart: ## ``<elem>``
  58. result = newElement(x.elementName)
  59. next(x)
  60. untilElementEnd(x, result, errors)
  61. of xmlElementEnd:
  62. errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
  63. of xmlElementOpen:
  64. result = newElement(x.elementName)
  65. next(x)
  66. result.attrs = newStringTable()
  67. while true:
  68. case x.kind
  69. of xmlAttribute:
  70. result.attrs[x.attrKey] = x.attrValue
  71. next(x)
  72. of xmlElementClose:
  73. next(x)
  74. break
  75. of xmlError:
  76. errors.add(errorMsg(x))
  77. next(x)
  78. break
  79. else:
  80. errors.add(errorMsg(x, "'>' expected"))
  81. next(x)
  82. break
  83. untilElementEnd(x, result, errors)
  84. of xmlAttribute, xmlElementClose:
  85. errors.add(errorMsg(x, "<some_tag> expected"))
  86. next(x)
  87. of xmlCData:
  88. result = newCData(x.charData)
  89. next(x)
  90. of xmlEntity:
  91. ## &entity;
  92. result = newEntity(x.entityName)
  93. next(x)
  94. of xmlEof: discard
  95. proc parseXml*(s: Stream, filename: string,
  96. errors: var seq[string], options: set[XmlParseOption] = {reportComments}): XmlNode =
  97. ## Parses the XML from stream ``s`` and returns a ``XmlNode``. Every
  98. ## occurred parsing error is added to the ``errors`` sequence.
  99. result = nil
  100. var x: XmlParser = default(XmlParser)
  101. open(x, s, filename, options)
  102. while true:
  103. x.next()
  104. case x.kind
  105. of xmlElementOpen, xmlElementStart:
  106. result = parse(x, errors)
  107. break
  108. of xmlComment, xmlWhitespace, xmlSpecial, xmlPI: discard # just skip it
  109. of xmlError:
  110. errors.add(errorMsg(x))
  111. else:
  112. errors.add(errorMsg(x, "<some_tag> expected"))
  113. break
  114. close(x)
  115. proc parseXml*(s: Stream, options: set[XmlParseOption] = {reportComments}): XmlNode =
  116. ## Parses the XML from stream ``s`` and returns a ``XmlNode``. All parsing
  117. ## errors are turned into an ``XmlError`` exception.
  118. var errors: seq[string] = @[]
  119. result = parseXml(s, "unknown_xml_doc", errors, options)
  120. if errors.len > 0: raiseInvalidXml(errors)
  121. proc parseXml*(str: string, options: set[XmlParseOption] = {reportComments}): XmlNode =
  122. ## Parses the XML from string ``str`` and returns a ``XmlNode``. All parsing
  123. ## errors are turned into an ``XmlError`` exception.
  124. parseXml(newStringStream(str), options)
  125. proc loadXml*(path: string, errors: var seq[string], options: set[XmlParseOption] = {reportComments}): XmlNode =
  126. ## Loads and parses XML from file specified by ``path``, and returns
  127. ## a ``XmlNode``. Every occurred parsing error is added to the ``errors``
  128. ## sequence.
  129. var s = newFileStream(path, fmRead)
  130. if s == nil: raise newException(IOError, "Unable to read file: " & path)
  131. result = parseXml(s, path, errors, options)
  132. proc loadXml*(path: string, options: set[XmlParseOption] = {reportComments}): XmlNode =
  133. ## Loads and parses XML from file specified by ``path``, and returns
  134. ## a ``XmlNode``. All parsing errors are turned into an ``XmlError``
  135. ## exception.
  136. var errors: seq[string] = @[]
  137. result = loadXml(path, errors, options)
  138. if errors.len > 0: raiseInvalidXml(errors)
  139. when isMainModule:
  140. when not defined(testing):
  141. import std/os
  142. var errors: seq[string] = @[]
  143. var x = loadXml(paramStr(1), errors)
  144. for e in items(errors): echo e
  145. var f: File
  146. if open(f, "xmltest.txt", fmWrite):
  147. f.write($x)
  148. f.close()
  149. else:
  150. quit("cannot write test.txt")
  151. else:
  152. block: # correctly parse ../../tests/testdata/doc1.xml
  153. let filePath = "tests/testdata/doc1.xml"
  154. var errors: seq[string] = @[]
  155. var xml = loadXml(filePath, errors)
  156. assert(errors.len == 0, "The file tests/testdata/doc1.xml should be parsed without errors.")
  157. block bug1518:
  158. var err: seq[string] = @[]
  159. assert $parsexml(newStringStream"<tag>One &amp; two</tag>", "temp.xml",
  160. err) == "<tag>One &amp; two</tag>"