xmlparser.nim 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2010 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module parses an XML document and creates its XML tree representation.
  10. import streams, parsexml, strtabs, xmltree
  11. when defined(nimPreviewSlimSystem):
  12. import std/syncio
  13. type
  14. XmlError* = object of ValueError ## Exception that is raised
  15. ## for invalid XML.
  16. errors*: seq[string] ## All detected parsing errors.
  17. proc raiseInvalidXml(errors: seq[string]) =
  18. var e: ref XmlError
  19. new(e)
  20. e.msg = errors[0]
  21. e.errors = errors
  22. raise e
  23. proc addNode(father, son: XmlNode) =
  24. if son != nil: add(father, son)
  25. proc parse(x: var XmlParser, errors: var seq[string]): XmlNode {.gcsafe.}
  26. proc untilElementEnd(x: var XmlParser, result: XmlNode,
  27. errors: var seq[string]) =
  28. while true:
  29. case x.kind
  30. of xmlElementEnd:
  31. if x.elementName == result.tag:
  32. next(x)
  33. else:
  34. errors.add(errorMsg(x, "</" & result.tag & "> expected"))
  35. # do not skip it here!
  36. break
  37. of xmlEof:
  38. errors.add(errorMsg(x, "</" & result.tag & "> expected"))
  39. break
  40. else:
  41. result.addNode(parse(x, errors))
  42. proc parse(x: var XmlParser, errors: var seq[string]): XmlNode =
  43. case x.kind
  44. of xmlComment:
  45. result = newComment(x.charData)
  46. next(x)
  47. of xmlCharData, xmlWhitespace:
  48. result = newText(x.charData)
  49. next(x)
  50. of xmlPI, xmlSpecial:
  51. # we just ignore processing instructions for now
  52. next(x)
  53. of xmlError:
  54. errors.add(errorMsg(x))
  55. next(x)
  56. of xmlElementStart: ## ``<elem>``
  57. result = newElement(x.elementName)
  58. next(x)
  59. untilElementEnd(x, result, errors)
  60. of xmlElementEnd:
  61. errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
  62. of xmlElementOpen:
  63. result = newElement(x.elementName)
  64. next(x)
  65. result.attrs = newStringTable()
  66. while true:
  67. case x.kind
  68. of xmlAttribute:
  69. result.attrs[x.attrKey] = x.attrValue
  70. next(x)
  71. of xmlElementClose:
  72. next(x)
  73. break
  74. of xmlError:
  75. errors.add(errorMsg(x))
  76. next(x)
  77. break
  78. else:
  79. errors.add(errorMsg(x, "'>' expected"))
  80. next(x)
  81. break
  82. untilElementEnd(x, result, errors)
  83. of xmlAttribute, xmlElementClose:
  84. errors.add(errorMsg(x, "<some_tag> expected"))
  85. next(x)
  86. of xmlCData:
  87. result = newCData(x.charData)
  88. next(x)
  89. of xmlEntity:
  90. ## &entity;
  91. result = newEntity(x.entityName)
  92. next(x)
  93. of xmlEof: discard
  94. proc parseXml*(s: Stream, filename: string,
  95. errors: var seq[string], options: set[XmlParseOption] = {reportComments}): XmlNode =
  96. ## Parses the XML from stream ``s`` and returns a ``XmlNode``. Every
  97. ## occurred parsing error is added to the ``errors`` sequence.
  98. var x: XmlParser
  99. open(x, s, filename, options)
  100. while true:
  101. x.next()
  102. case x.kind
  103. of xmlElementOpen, xmlElementStart:
  104. result = parse(x, errors)
  105. break
  106. of xmlComment, xmlWhitespace, xmlSpecial, xmlPI: discard # just skip it
  107. of xmlError:
  108. errors.add(errorMsg(x))
  109. else:
  110. errors.add(errorMsg(x, "<some_tag> expected"))
  111. break
  112. close(x)
  113. proc parseXml*(s: Stream, options: set[XmlParseOption] = {reportComments}): XmlNode =
  114. ## Parses the XML from stream ``s`` and returns a ``XmlNode``. All parsing
  115. ## errors are turned into an ``XmlError`` exception.
  116. var errors: seq[string] = @[]
  117. result = parseXml(s, "unknown_xml_doc", errors, options)
  118. if errors.len > 0: raiseInvalidXml(errors)
  119. proc parseXml*(str: string, options: set[XmlParseOption] = {reportComments}): XmlNode =
  120. ## Parses the XML from string ``str`` and returns a ``XmlNode``. All parsing
  121. ## errors are turned into an ``XmlError`` exception.
  122. parseXml(newStringStream(str), options)
  123. proc loadXml*(path: string, errors: var seq[string], options: set[XmlParseOption] = {reportComments}): XmlNode =
  124. ## Loads and parses XML from file specified by ``path``, and returns
  125. ## a ``XmlNode``. Every occurred parsing error is added to the ``errors``
  126. ## sequence.
  127. var s = newFileStream(path, fmRead)
  128. if s == nil: raise newException(IOError, "Unable to read file: " & path)
  129. result = parseXml(s, path, errors, options)
  130. proc loadXml*(path: string, options: set[XmlParseOption] = {reportComments}): XmlNode =
  131. ## Loads and parses XML from file specified by ``path``, and returns
  132. ## a ``XmlNode``. All parsing errors are turned into an ``XmlError``
  133. ## exception.
  134. var errors: seq[string] = @[]
  135. result = loadXml(path, errors, options)
  136. if errors.len > 0: raiseInvalidXml(errors)
  137. when isMainModule:
  138. when not defined(testing):
  139. import os
  140. var errors: seq[string] = @[]
  141. var x = loadXml(paramStr(1), errors)
  142. for e in items(errors): echo e
  143. var f: File
  144. if open(f, "xmltest.txt", fmWrite):
  145. f.write($x)
  146. f.close()
  147. else:
  148. quit("cannot write test.txt")
  149. else:
  150. block: # correctly parse ../../tests/testdata/doc1.xml
  151. let filePath = "tests/testdata/doc1.xml"
  152. var errors: seq[string] = @[]
  153. var xml = loadXml(filePath, errors)
  154. assert(errors.len == 0, "The file tests/testdata/doc1.xml should be parsed without errors.")
  155. block bug1518:
  156. var err: seq[string] = @[]
  157. assert $parsexml(newStringStream"<tag>One &amp; two</tag>", "temp.xml",
  158. err) == "<tag>One &amp; two</tag>"