thtmlparser.nim 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. discard """
  2. output: '''
  3. @[]
  4. true
  5. '''
  6. """
  7. import htmlparser
  8. import xmltree
  9. import strutils
  10. from streams import newStringStream
  11. block t2813:
  12. const
  13. html = """
  14. <html>
  15. <head>
  16. <title>Test</title>
  17. </head>
  18. <body>
  19. <table>
  20. <thead>
  21. <tr><td>A</td></tr>
  22. <tr><td>B</td></tr>
  23. </thead>
  24. <tbody>
  25. <tr><td></td>A<td></td></tr>
  26. <tr><td></td>B<td></td></tr>
  27. <tr><td></td>C<td></td></tr>
  28. </tbody>
  29. <tfoot>
  30. <tr><td>A</td></tr>
  31. </tfoot>
  32. </table>
  33. </body>
  34. </html>
  35. """
  36. var errors: seq[string] = @[]
  37. let tree = parseHtml(newStringStream(html), "test.html", errors)
  38. echo errors # Errors: </thead> expected,...
  39. var len = tree.findAll("tr").len # len = 6
  40. var rows: seq[XmlNode] = @[]
  41. for n in tree.findAll("table"):
  42. n.findAll("tr", rows) # len = 2
  43. break
  44. assert tree.findAll("tr").len == rows.len
  45. block t2814:
  46. ## builds the two cases below and test that
  47. ## ``//[dd,li]`` has "<p>that</p>" as children
  48. ##
  49. ## <dl>
  50. ## <dt>this</dt>
  51. ## <dd>
  52. ## <p>that</p>
  53. ## </dd>
  54. ## </dl>
  55. ##
  56. ## <ul>
  57. ## <li>
  58. ## <p>that</p>
  59. ## </li>
  60. ## </ul>
  61. for ltype in [["dl","dd"], ["ul","li"]]:
  62. let desc_item = if ltype[0]=="dl": "<dt>this</dt>" else: ""
  63. let item = "$1<$2><p>that</p></$2>" % [desc_item, ltype[1]]
  64. let list = """ <$1>
  65. $2
  66. </$1> """ % [ltype[0], item]
  67. var errors : seq[string] = @[]
  68. let parseH = parseHtml(newStringStream(list),"statichtml", errors =errors)
  69. if $parseH.findAll(ltype[1])[0].child("p") != "<p>that</p>":
  70. echo "case " & ltype[0] & " failed !"
  71. quit(2)
  72. echo "true"
  73. block t6154:
  74. let foo = """
  75. <!DOCTYPE html>
  76. <html>
  77. <head>
  78. <title> foobar </title>
  79. </head>
  80. <body>
  81. <p class=foo id=bar></p>
  82. <p something=&#9;foo&#9;bar&#178;></p>
  83. <p something= &#9;foo&#9;bar&#178; foo =bloo></p>
  84. <p class="foo2" id="bar2"></p>
  85. <p wrong= ></p>
  86. <p data-foo data-bar="correct!" enabled ></p>
  87. <p quux whatever></p>
  88. </body>
  89. </html>
  90. """
  91. var errors: seq[string] = @[]
  92. let html = parseHtml(newStringStream(foo), "statichtml", errors=errors)
  93. doAssert "statichtml(11, 18) Error: attribute value expected" in errors
  94. let ps = html.findAll("p")
  95. doAssert ps.len == 7
  96. doAssert ps[0].attrsLen == 2
  97. doAssert ps[0].attr("class") == "foo"
  98. doAssert ps[0].attr("id") == "bar"
  99. doassert ps[0].len == 0
  100. doAssert ps[1].attrsLen == 1
  101. doAssert ps[1].attr("something") == "\tfoo\tbar²"
  102. doassert ps[1].len == 0
  103. doAssert ps[2].attrsLen == 2
  104. doAssert ps[2].attr("something") == "\tfoo\tbar²"
  105. doAssert ps[2].attr("foo") == "bloo"
  106. doassert ps[2].len == 0
  107. doAssert ps[3].attrsLen == 2
  108. doAssert ps[3].attr("class") == "foo2"
  109. doAssert ps[3].attr("id") == "bar2"
  110. doassert ps[3].len == 0
  111. doAssert ps[4].attrsLen == 1
  112. doAssert ps[4].attr("wrong") == ""
  113. doAssert ps[5].attrsLen == 3
  114. doAssert ps[5].attr("data-foo") == ""
  115. doAssert ps[5].attr("data-bar") == "correct!"
  116. doAssert ps[5].attr("enabled") == ""
  117. doassert ps[5].len == 0
  118. doAssert ps[6].attrsLen == 2
  119. doAssert ps[6].attr("quux") == ""
  120. doAssert ps[6].attr("whatever") == ""
  121. doassert ps[6].len == 0