xmlparser.nim 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2010 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## This module parses an XML document and creates its XML tree representation.
  10. import streams, parsexml, strtabs, xmltree
  11. type
  12. XmlError* = object of ValueError ## Exception that is raised
  13. ## for invalid XML.
  14. errors*: seq[string] ## All detected parsing errors.
  15. proc raiseInvalidXml(errors: seq[string]) =
  16. var e: ref XmlError
  17. new(e)
  18. e.msg = errors[0]
  19. e.errors = errors
  20. raise e
  21. proc addNode(father, son: XmlNode) =
  22. if son != nil: add(father, son)
  23. proc parse(x: var XmlParser, errors: var seq[string]): XmlNode {.gcsafe.}
  24. proc untilElementEnd(x: var XmlParser, result: XmlNode,
  25. errors: var seq[string]) =
  26. while true:
  27. case x.kind
  28. of xmlElementEnd:
  29. if x.elementName == result.tag:
  30. next(x)
  31. else:
  32. errors.add(errorMsg(x, "</" & result.tag & "> expected"))
  33. # do not skip it here!
  34. break
  35. of xmlEof:
  36. errors.add(errorMsg(x, "</" & result.tag & "> expected"))
  37. break
  38. else:
  39. result.addNode(parse(x, errors))
  40. proc parse(x: var XmlParser, errors: var seq[string]): XmlNode =
  41. case x.kind
  42. of xmlComment:
  43. result = newComment(x.charData)
  44. next(x)
  45. of xmlCharData, xmlWhitespace:
  46. result = newText(x.charData)
  47. next(x)
  48. of xmlPI, xmlSpecial:
  49. # we just ignore processing instructions for now
  50. next(x)
  51. of xmlError:
  52. errors.add(errorMsg(x))
  53. next(x)
  54. of xmlElementStart: ## ``<elem>``
  55. result = newElement(x.elementName)
  56. next(x)
  57. untilElementEnd(x, result, errors)
  58. of xmlElementEnd:
  59. errors.add(errorMsg(x, "unexpected ending tag: " & x.elementName))
  60. of xmlElementOpen:
  61. result = newElement(x.elementName)
  62. next(x)
  63. result.attrs = newStringTable()
  64. while true:
  65. case x.kind
  66. of xmlAttribute:
  67. result.attrs[x.attrKey] = x.attrValue
  68. next(x)
  69. of xmlElementClose:
  70. next(x)
  71. break
  72. of xmlError:
  73. errors.add(errorMsg(x))
  74. next(x)
  75. break
  76. else:
  77. errors.add(errorMsg(x, "'>' expected"))
  78. next(x)
  79. break
  80. untilElementEnd(x, result, errors)
  81. of xmlAttribute, xmlElementClose:
  82. errors.add(errorMsg(x, "<some_tag> expected"))
  83. next(x)
  84. of xmlCData:
  85. result = newCData(x.charData)
  86. next(x)
  87. of xmlEntity:
  88. ## &entity;
  89. result = newEntity(x.entityName)
  90. next(x)
  91. of xmlEof: discard
  92. proc parseXml*(s: Stream, filename: string,
  93. errors: var seq[string]): XmlNode =
  94. ## Parses the XML from stream ``s`` and returns a ``XmlNode``. Every
  95. ## occurred parsing error is added to the ``errors`` sequence.
  96. var x: XmlParser
  97. open(x, s, filename, {reportComments})
  98. while true:
  99. x.next()
  100. case x.kind
  101. of xmlElementOpen, xmlElementStart:
  102. result = parse(x, errors)
  103. break
  104. of xmlComment, xmlWhitespace, xmlSpecial, xmlPI: discard # just skip it
  105. of xmlError:
  106. errors.add(errorMsg(x))
  107. else:
  108. errors.add(errorMsg(x, "<some_tag> expected"))
  109. break
  110. close(x)
  111. proc parseXml*(s: Stream): XmlNode =
  112. ## Parses the XML from stream ``s`` and returns a ``XmlNode``. All parsing
  113. ## errors are turned into an ``XmlError`` exception.
  114. var errors: seq[string] = @[]
  115. result = parseXml(s, "unknown_xml_doc", errors)
  116. if errors.len > 0: raiseInvalidXml(errors)
  117. proc parseXml*(str: string): XmlNode =
  118. ## Parses the XML from string ``str`` and returns a ``XmlNode``. All parsing
  119. ## errors are turned into an ``XmlError`` exception.
  120. parseXml(newStringStream(str))
  121. proc loadXml*(path: string, errors: var seq[string]): XmlNode =
  122. ## Loads and parses XML from file specified by ``path``, and returns
  123. ## a ``XmlNode``. Every occurred parsing error is added to the ``errors``
  124. ## sequence.
  125. var s = newFileStream(path, fmRead)
  126. if s == nil: raise newException(IOError, "Unable to read file: " & path)
  127. result = parseXml(s, path, errors)
  128. proc loadXml*(path: string): XmlNode =
  129. ## Loads and parses XML from file specified by ``path``, and returns
  130. ## a ``XmlNode``. All parsing errors are turned into an ``XmlError``
  131. ## exception.
  132. var errors: seq[string] = @[]
  133. result = loadXml(path, errors)
  134. if errors.len > 0: raiseInvalidXml(errors)
  135. when isMainModule:
  136. when not defined(testing):
  137. import os
  138. var errors: seq[string] = @[]
  139. var x = loadXml(paramStr(1), errors)
  140. for e in items(errors): echo e
  141. var f: File
  142. if open(f, "xmltest.txt", fmWrite):
  143. f.write($x)
  144. f.close()
  145. else:
  146. quit("cannot write test.txt")
  147. else:
  148. block: # correctly parse ../../tests/testdata/doc1.xml
  149. let filePath = "tests/testdata/doc1.xml"
  150. var errors: seq[string] = @[]
  151. var xml = loadXml(filePath, errors)
  152. assert(errors.len == 0, "The file tests/testdata/doc1.xml should be parsed without errors.")
  153. block bug1518:
  154. var err: seq[string] = @[]
  155. assert $parsexml(newStringStream"<tag>One &amp; two</tag>", "temp.xml", err) == "<tag>One &amp; two</tag>"