xmltree.nim 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403
  1. #
  2. #
  3. # Nim's Runtime Library
  4. # (c) Copyright 2012 Andreas Rumpf
  5. #
  6. # See the file "copying.txt", included in this
  7. # distribution, for details about the copyright.
  8. #
  9. ## A simple XML tree.
  10. import macros, strtabs, strutils
  11. type
  12. XmlNode* = ref XmlNodeObj ## an XML tree consists of ``XmlNode``'s.
  13. XmlNodeKind* = enum ## different kinds of ``XmlNode``'s
  14. xnText, ## a text element
  15. xnElement, ## an element with 0 or more children
  16. xnCData, ## a CDATA node
  17. xnEntity, ## an entity (like ``&thing;``)
  18. xnComment ## an XML comment
  19. XmlAttributes* = StringTableRef ## an alias for a string to string mapping
  20. XmlNodeObj {.acyclic.} = object
  21. case k: XmlNodeKind # private, use the kind() proc to read this field.
  22. of xnText, xnComment, xnCData, xnEntity:
  23. fText: string
  24. of xnElement:
  25. fTag: string
  26. s: seq[XmlNode]
  27. fAttr: XmlAttributes
  28. fClientData: int ## for other clients
  29. proc newXmlNode(kind: XmlNodeKind): XmlNode =
  30. ## creates a new ``XmlNode``.
  31. new(result)
  32. result.k = kind
  33. proc newElement*(tag: string): XmlNode =
  34. ## creates a new ``PXmlNode`` of kind ``xnText`` with the given `tag`.
  35. result = newXmlNode(xnElement)
  36. result.fTag = tag
  37. result.s = @[]
  38. # init attributes lazily to safe memory
  39. proc newText*(text: string): XmlNode =
  40. ## creates a new ``PXmlNode`` of kind ``xnText`` with the text `text`.
  41. result = newXmlNode(xnText)
  42. result.fText = text
  43. proc newComment*(comment: string): XmlNode =
  44. ## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `comment`.
  45. result = newXmlNode(xnComment)
  46. result.fText = comment
  47. proc newCData*(cdata: string): XmlNode =
  48. ## creates a new ``PXmlNode`` of kind ``xnComment`` with the text `cdata`.
  49. result = newXmlNode(xnCData)
  50. result.fText = cdata
  51. proc newEntity*(entity: string): XmlNode =
  52. ## creates a new ``PXmlNode`` of kind ``xnEntity`` with the text `entity`.
  53. result = newXmlNode(xnEntity)
  54. result.fText = entity
  55. proc text*(n: XmlNode): string {.inline.} =
  56. ## gets the associated text with the node `n`. `n` can be a CDATA, Text,
  57. ## comment, or entity node.
  58. assert n.k in {xnText, xnComment, xnCData, xnEntity}
  59. result = n.fText
  60. proc `text=`*(n: XmlNode, text: string){.inline.} =
  61. ## sets the associated text with the node `n`. `n` can be a CDATA, Text,
  62. ## comment, or entity node.
  63. assert n.k in {xnText, xnComment, xnCData, xnEntity}
  64. n.fText = text
  65. proc rawText*(n: XmlNode): string {.inline.} =
  66. ## returns the underlying 'text' string by reference.
  67. ## This is only used for speed hacks.
  68. shallowCopy(result, n.fText)
  69. proc rawTag*(n: XmlNode): string {.inline.} =
  70. ## returns the underlying 'tag' string by reference.
  71. ## This is only used for speed hacks.
  72. shallowCopy(result, n.fTag)
  73. proc innerText*(n: XmlNode): string =
  74. ## gets the inner text of `n`:
  75. ##
  76. ## - If `n` is `xnText` or `xnEntity`, returns its content.
  77. ## - If `n` is `xnElement`, runs recursively on each child node and
  78. ## concatenates the results.
  79. ## - Otherwise returns an empty string.
  80. proc worker(res: var string, n: XmlNode) =
  81. case n.k
  82. of xnText, xnEntity:
  83. res.add(n.fText)
  84. of xnElement:
  85. for sub in n.s:
  86. worker(res, sub)
  87. else:
  88. discard
  89. result = ""
  90. worker(result, n)
  91. proc tag*(n: XmlNode): string {.inline.} =
  92. ## gets the tag name of `n`. `n` has to be an ``xnElement`` node.
  93. assert n.k == xnElement
  94. result = n.fTag
  95. proc `tag=`*(n: XmlNode, tag: string) {.inline.} =
  96. ## sets the tag name of `n`. `n` has to be an ``xnElement`` node.
  97. assert n.k == xnElement
  98. n.fTag = tag
  99. proc add*(father, son: XmlNode) {.inline.} =
  100. ## adds the child `son` to `father`.
  101. add(father.s, son)
  102. proc insert*(father, son: XmlNode, index: int) {.inline.} =
  103. ## insert the child `son` to a given position in `father`.
  104. assert father.k == xnElement and son.k == xnElement
  105. if len(father.s) > index:
  106. insert(father.s, son, index)
  107. else:
  108. insert(father.s, son, len(father.s))
  109. proc len*(n: XmlNode): int {.inline.} =
  110. ## returns the number `n`'s children.
  111. if n.k == xnElement: result = len(n.s)
  112. proc kind*(n: XmlNode): XmlNodeKind {.inline.} =
  113. ## returns `n`'s kind.
  114. result = n.k
  115. proc `[]`* (n: XmlNode, i: int): XmlNode {.inline.} =
  116. ## returns the `i`'th child of `n`.
  117. assert n.k == xnElement
  118. result = n.s[i]
  119. proc delete*(n: XmlNode, i: Natural) {.noSideEffect.} =
  120. ## delete the `i`'th child of `n`.
  121. assert n.k == xnElement
  122. n.s.delete(i)
  123. proc `[]`* (n: var XmlNode, i: int): var XmlNode {.inline.} =
  124. ## returns the `i`'th child of `n` so that it can be modified
  125. assert n.k == xnElement
  126. result = n.s[i]
  127. iterator items*(n: XmlNode): XmlNode {.inline.} =
  128. ## iterates over any child of `n`.
  129. assert n.k == xnElement
  130. for i in 0 .. n.len-1: yield n[i]
  131. iterator mitems*(n: var XmlNode): var XmlNode {.inline.} =
  132. ## iterates over any child of `n`.
  133. assert n.k == xnElement
  134. for i in 0 .. n.len-1: yield n[i]
  135. proc attrs*(n: XmlNode): XmlAttributes {.inline.} =
  136. ## gets the attributes belonging to `n`.
  137. ## Returns `nil` if attributes have not been initialised for this node.
  138. assert n.k == xnElement
  139. result = n.fAttr
  140. proc `attrs=`*(n: XmlNode, attr: XmlAttributes) {.inline.} =
  141. ## sets the attributes belonging to `n`.
  142. assert n.k == xnElement
  143. n.fAttr = attr
  144. proc attrsLen*(n: XmlNode): int {.inline.} =
  145. ## returns the number of `n`'s attributes.
  146. assert n.k == xnElement
  147. if not isNil(n.fAttr): result = len(n.fAttr)
  148. proc clientData*(n: XmlNode): int {.inline.} =
  149. ## gets the client data of `n`. The client data field is used by the HTML
  150. ## parser and generator.
  151. result = n.fClientData
  152. proc `clientData=`*(n: XmlNode, data: int) {.inline.} =
  153. ## sets the client data of `n`. The client data field is used by the HTML
  154. ## parser and generator.
  155. n.fClientData = data
  156. proc addEscaped*(result: var string, s: string) =
  157. ## same as ``result.add(escape(s))``, but more efficient.
  158. for c in items(s):
  159. case c
  160. of '<': result.add("&lt;")
  161. of '>': result.add("&gt;")
  162. of '&': result.add("&amp;")
  163. of '"': result.add("&quot;")
  164. of '\'': result.add("&#x27;")
  165. of '/': result.add("&#x2F;")
  166. else: result.add(c)
  167. proc escape*(s: string): string =
  168. ## escapes `s` for inclusion into an XML document.
  169. ## Escapes these characters:
  170. ##
  171. ## ------------ -------------------
  172. ## char is converted to
  173. ## ------------ -------------------
  174. ## ``<`` ``&lt;``
  175. ## ``>`` ``&gt;``
  176. ## ``&`` ``&amp;``
  177. ## ``"`` ``&quot;``
  178. ## ``'`` ``&#x27;``
  179. ## ``/`` ``&#x2F;``
  180. ## ------------ -------------------
  181. result = newStringOfCap(s.len)
  182. addEscaped(result, s)
  183. proc addIndent(result: var string, indent: int, addNewLines: bool) =
  184. if addNewLines:
  185. result.add("\n")
  186. for i in 1..indent: result.add(' ')
  187. proc noWhitespace(n: XmlNode): bool =
  188. #for i in 1..n.len-1:
  189. # if n[i].kind != n[0].kind: return true
  190. for i in 0..n.len-1:
  191. if n[i].kind in {xnText, xnEntity}: return true
  192. proc add*(result: var string, n: XmlNode, indent = 0, indWidth = 2,
  193. addNewLines=true) =
  194. ## adds the textual representation of `n` to `result`.
  195. proc addEscapedAttr(result: var string, s: string) =
  196. # `addEscaped` alternative with less escaped characters.
  197. # Only to be used for escaping attribute values enclosed in double quotes!
  198. for c in items(s):
  199. case c
  200. of '<': result.add("&lt;")
  201. of '>': result.add("&gt;")
  202. of '&': result.add("&amp;")
  203. of '"': result.add("&quot;")
  204. else: result.add(c)
  205. if n == nil: return
  206. case n.k
  207. of xnElement:
  208. result.add('<')
  209. result.add(n.fTag)
  210. if not isNil(n.fAttr):
  211. for key, val in pairs(n.fAttr):
  212. result.add(' ')
  213. result.add(key)
  214. result.add("=\"")
  215. result.addEscapedAttr(val)
  216. result.add('"')
  217. if n.len > 0:
  218. result.add('>')
  219. if n.len > 1:
  220. if noWhitespace(n):
  221. # for mixed leaves, we cannot output whitespace for readability,
  222. # because this would be wrong. For example: ``a<b>b</b>`` is
  223. # different from ``a <b>b</b>``.
  224. for i in 0..n.len-1:
  225. result.add(n[i], indent+indWidth, indWidth, addNewLines)
  226. else:
  227. for i in 0..n.len-1:
  228. result.addIndent(indent+indWidth, addNewLines)
  229. result.add(n[i], indent+indWidth, indWidth, addNewLines)
  230. result.addIndent(indent, addNewLines)
  231. else:
  232. result.add(n[0], indent+indWidth, indWidth, addNewLines)
  233. result.add("</")
  234. result.add(n.fTag)
  235. result.add(">")
  236. else:
  237. result.add(" />")
  238. of xnText:
  239. result.addEscaped(n.fText)
  240. of xnComment:
  241. result.add("<!-- ")
  242. result.addEscaped(n.fText)
  243. result.add(" -->")
  244. of xnCData:
  245. result.add("<![CDATA[")
  246. result.add(n.fText)
  247. result.add("]]>")
  248. of xnEntity:
  249. result.add('&')
  250. result.add(n.fText)
  251. result.add(';')
  252. const
  253. xmlHeader* = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
  254. ## header to use for complete XML output
  255. proc `$`*(n: XmlNode): string =
  256. ## converts `n` into its string representation. No ``<$xml ...$>`` declaration
  257. ## is produced, so that the produced XML fragments are composable.
  258. result = ""
  259. result.add(n)
  260. proc newXmlTree*(tag: string, children: openArray[XmlNode],
  261. attributes: XmlAttributes = nil): XmlNode =
  262. ## creates a new XML tree with `tag`, `children` and `attributes`
  263. result = newXmlNode(xnElement)
  264. result.fTag = tag
  265. newSeq(result.s, children.len)
  266. for i in 0..children.len-1: result.s[i] = children[i]
  267. result.fAttr = attributes
  268. proc xmlConstructor(a: NimNode): NimNode {.compileTime.} =
  269. if a.kind == nnkCall:
  270. result = newCall("newXmlTree", toStrLit(a[0]))
  271. var attrs = newNimNode(nnkBracket, a)
  272. var newStringTabCall = newCall(bindSym"newStringTable", attrs,
  273. bindSym"modeCaseSensitive")
  274. var elements = newNimNode(nnkBracket, a)
  275. for i in 1..a.len-1:
  276. if a[i].kind == nnkExprEqExpr:
  277. # In order to support attributes like `data-lang` we have to
  278. # replace whitespace because `toStrLit` gives `data - lang`.
  279. let attrName = toStrLit(a[i][0]).strVal.replace(" ", "")
  280. attrs.add(newStrLitNode(attrName))
  281. attrs.add(a[i][1])
  282. #echo repr(attrs)
  283. else:
  284. elements.add(a[i])
  285. result.add(elements)
  286. if attrs.len > 1:
  287. #echo repr(newStringTabCall)
  288. result.add(newStringTabCall)
  289. else:
  290. result = newCall("newXmlTree", toStrLit(a))
  291. macro `<>`*(x: untyped): untyped =
  292. ## Constructor macro for XML. Example usage:
  293. ##
  294. ## .. code-block:: nim
  295. ## <>a(href="http://nim-lang.org", newText("Nim rules."))
  296. ##
  297. ## Produces an XML tree for::
  298. ##
  299. ## <a href="http://nim-lang.org">Nim rules.</a>
  300. ##
  301. result = xmlConstructor(x)
  302. proc child*(n: XmlNode, name: string): XmlNode =
  303. ## Finds the first child element of `n` with a name of `name`.
  304. ## Returns `nil` on failure.
  305. assert n.kind == xnElement
  306. for i in items(n):
  307. if i.kind == xnElement:
  308. if i.tag == name:
  309. return i
  310. proc attr*(n: XmlNode, name: string): string =
  311. ## Finds the first attribute of `n` with a name of `name`.
  312. ## Returns "" on failure.
  313. assert n.kind == xnElement
  314. if n.attrs == nil: return ""
  315. return n.attrs.getOrDefault(name)
  316. proc findAll*(n: XmlNode, tag: string, result: var seq[XmlNode]) =
  317. ## Iterates over all the children of `n` returning those matching `tag`.
  318. ##
  319. ## Found nodes satisfying the condition will be appended to the `result`
  320. ## sequence, which can't be nil or the proc will crash. Usage example:
  321. ##
  322. ## .. code-block::
  323. ## var
  324. ## html: XmlNode
  325. ## tags: seq[XmlNode] = @[]
  326. ##
  327. ## html = buildHtml()
  328. ## findAll(html, "img", tags)
  329. ## for imgTag in tags:
  330. ## process(imgTag)
  331. assert n.k == xnElement
  332. for child in n.items():
  333. if child.k != xnElement:
  334. continue
  335. if child.tag == tag:
  336. result.add(child)
  337. child.findAll(tag, result)
  338. proc findAll*(n: XmlNode, tag: string): seq[XmlNode] =
  339. ## Shortcut version to assign in let blocks. Example:
  340. ##
  341. ## .. code-block::
  342. ## var html: XmlNode
  343. ##
  344. ## html = buildHtml(html)
  345. ## for imgTag in html.findAll("img"):
  346. ## process(imgTag)
  347. newSeq(result, 0)
  348. findAll(n, tag, result)
  349. when isMainModule:
  350. assert """<a href="http://nim-lang.org">Nim rules.</a>""" ==
  351. $(<>a(href="http://nim-lang.org", newText("Nim rules.")))