DOMTreeBuilderTest.php 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744
  1. <?php
  2. /**
  3. * @file
  4. * Test the Tree Builder.
  5. */
  6. namespace Masterminds\HTML5\Tests\Parser;
  7. use Masterminds\HTML5\Parser\Scanner;
  8. use Masterminds\HTML5\Parser\Tokenizer;
  9. use Masterminds\HTML5\Parser\DOMTreeBuilder;
  10. /**
  11. * These tests are functional, not necessarily unit tests.
  12. */
  13. class DOMTreeBuilderTest extends \Masterminds\HTML5\Tests\TestCase
  14. {
  15. protected $errors = array();
  16. /**
  17. * Convenience function for parsing.
  18. */
  19. protected function parse($string, array $options = array())
  20. {
  21. $treeBuilder = new DOMTreeBuilder(false, $options);
  22. $scanner = new Scanner($string);
  23. $parser = new Tokenizer($scanner, $treeBuilder);
  24. $parser->parse();
  25. $this->errors = $treeBuilder->getErrors();
  26. return $treeBuilder->document();
  27. }
  28. /**
  29. * Utility function for parsing a fragment of HTML5.
  30. */
  31. protected function parseFragment($string)
  32. {
  33. $treeBuilder = new DOMTreeBuilder(true);
  34. $scanner = new Scanner($string);
  35. $parser = new Tokenizer($scanner, $treeBuilder);
  36. $parser->parse();
  37. $this->errors = $treeBuilder->getErrors();
  38. return $treeBuilder->fragment();
  39. }
  40. public function testDocument()
  41. {
  42. $html = '<!DOCTYPE html><html></html>';
  43. $doc = $this->parse($html);
  44. $this->assertEquals('UTF-8', $doc->encoding);
  45. $this->assertInstanceOf('\DOMDocument', $doc);
  46. $this->assertEquals('html', $doc->documentElement->tagName);
  47. $this->assertEquals('http://www.w3.org/1999/xhtml', $doc->documentElement->namespaceURI);
  48. }
  49. public function testBareAmpersand()
  50. {
  51. $html = "<!doctype html>
  52. <html>
  53. <body>
  54. <img src='a&b' />
  55. <img src='a&=' />
  56. <img src='a&=c' />
  57. <img src='a&=9' />
  58. </body>
  59. </html>";
  60. $doc = $this->parse($html);
  61. $this->assertEmpty($this->errors);
  62. $this->assertXmlStringEqualsXmlString('
  63. <!DOCTYPE html>
  64. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  65. <img src="a&amp;b"/>
  66. <img src="a&amp;="/>
  67. <img src="a&amp;=c"/>
  68. <img src="a&amp;=9"/>
  69. </body>
  70. </html>', $doc->saveXML());
  71. }
  72. public function testBareAmpersandNotAllowedInAttributes()
  73. {
  74. $html = "<!doctype html>
  75. <html>
  76. <body>
  77. <img src='a&' />
  78. <img src='a&+' />
  79. </body>
  80. </html>";
  81. $doc = $this->parse($html);
  82. $this->assertCount(2, $this->errors);
  83. $this->assertXmlStringEqualsXmlString('
  84. <!DOCTYPE html>
  85. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  86. <img src="a&amp;"/>
  87. <img src="a&amp;+"/>
  88. </body>
  89. </html>', $doc->saveXML());
  90. }
  91. public function testBareAmpersandNotAllowedInBody()
  92. {
  93. $html = '<!doctype html>
  94. <html>
  95. <body>
  96. a&b
  97. a&=
  98. a&=c
  99. a&=9
  100. a&+
  101. a& -- valid
  102. </body>
  103. </html>';
  104. $doc = $this->parse($html);
  105. $this->assertCount(5, $this->errors);
  106. $this->assertXmlStringEqualsXmlString('
  107. <!DOCTYPE html>
  108. <html xmlns="http://www.w3.org/1999/xhtml"><body>
  109. a&amp;b
  110. a&amp;=
  111. a&amp;=c
  112. a&amp;=9
  113. a&amp;+
  114. a&amp; -- valid
  115. </body>
  116. </html>', $doc->saveXML());
  117. }
  118. public function testEntityAtEndOfFile()
  119. {
  120. $fragment = $this->parseFragment('&#');
  121. $this->assertInstanceOf('DOMDocumentFragment', $fragment);
  122. $this->assertSame('&#', $fragment->textContent);
  123. $this->assertEquals('Line 1, Col 2: Expected &#DEC; &#HEX;, got EOF', $this->errors[0]);
  124. }
  125. public function testStrangeCapitalization()
  126. {
  127. $html = '<!doctype html>
  128. <html>
  129. <head>
  130. <Title>Hello, world!</TitlE>
  131. </head>
  132. <body>TheBody<script>foo</script></body>
  133. </html>';
  134. $doc = $this->parse($html);
  135. $this->assertInstanceOf('\DOMDocument', $doc);
  136. $this->assertEquals('html', $doc->documentElement->tagName);
  137. $xpath = new \DOMXPath($doc);
  138. $xpath->registerNamespace('x', 'http://www.w3.org/1999/xhtml');
  139. $this->assertEquals('Hello, world!', $xpath->query('//x:title')->item(0)->nodeValue);
  140. $this->assertEquals('foo', $xpath->query('//x:script')->item(0)->nodeValue);
  141. }
  142. public function testDocumentWithDisabledNamespaces()
  143. {
  144. $html = '<!DOCTYPE html><html></html>';
  145. $doc = $this->parse($html, array('disable_html_ns' => true));
  146. $this->assertInstanceOf('\DOMDocument', $doc);
  147. $this->assertEquals('html', $doc->documentElement->tagName);
  148. $this->assertNull($doc->documentElement->namespaceURI);
  149. }
  150. public function testDocumentWithATargetDocument()
  151. {
  152. $targetDom = new \DOMDocument();
  153. $html = '<!DOCTYPE html><html></html>';
  154. $doc = $this->parse($html, array('target_document' => $targetDom));
  155. $this->assertInstanceOf('\DOMDocument', $doc);
  156. $this->assertSame($doc, $targetDom);
  157. $this->assertEquals('html', $doc->documentElement->tagName);
  158. }
  159. public function testDocumentFakeAttrAbsence()
  160. {
  161. $html = '<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><body>foo</body></html>';
  162. $doc = $this->parse($html, array('xmlNamespaces' => true));
  163. $xp = new \DOMXPath($doc);
  164. $this->assertEquals(0, $xp->query('//@html5-php-fake-id-attribute')->length);
  165. }
  166. public function testFragment()
  167. {
  168. $html = '<div>test</div><span>test2</span>';
  169. $doc = $this->parseFragment($html);
  170. $this->assertInstanceOf('\DOMDocumentFragment', $doc);
  171. $this->assertTrue($doc->hasChildNodes());
  172. $this->assertEquals('div', $doc->childNodes->item(0)->tagName);
  173. $this->assertEquals('test', $doc->childNodes->item(0)->textContent);
  174. $this->assertEquals('span', $doc->childNodes->item(1)->tagName);
  175. $this->assertEquals('test2', $doc->childNodes->item(1)->textContent);
  176. }
  177. public function testElements()
  178. {
  179. $html = '<!DOCTYPE html><html><head><title></title></head><body></body></html>';
  180. $doc = $this->parse($html);
  181. $root = $doc->documentElement;
  182. $this->assertEquals('html', $root->tagName);
  183. $this->assertEquals('html', $root->localName);
  184. $this->assertEquals('html', $root->nodeName);
  185. $this->assertEquals(2, $root->childNodes->length);
  186. $kids = $root->childNodes;
  187. $this->assertEquals('head', $kids->item(0)->tagName);
  188. $this->assertEquals('body', $kids->item(1)->tagName);
  189. $head = $kids->item(0);
  190. $this->assertEquals(1, $head->childNodes->length);
  191. $this->assertEquals('title', $head->childNodes->item(0)->tagName);
  192. }
  193. public function testImplicitNamespaces()
  194. {
  195. $dom = $this->parse('<!DOCTYPE html><html><body><a xlink:href="bar">foo</a></body></html>');
  196. $a = $dom->getElementsByTagName('a')->item(0);
  197. $attr = $a->getAttributeNode('xlink:href');
  198. $this->assertEquals('http://www.w3.org/1999/xlink', $attr->namespaceURI);
  199. $dom = $this->parse('<!DOCTYPE html><html><body><a xml:base="bar">foo</a></body></html>');
  200. $a = $dom->getElementsByTagName('a')->item(0);
  201. $attr = $a->getAttributeNode('xml:base');
  202. $this->assertEquals('http://www.w3.org/XML/1998/namespace', $attr->namespaceURI);
  203. }
  204. public function testCustomImplicitNamespaces()
  205. {
  206. $dom = $this->parse('<!DOCTYPE html><html><body><a t:href="bar">foo</a></body></html>', array(
  207. 'implicitNamespaces' => array(
  208. 't' => 'http://www.example.com',
  209. ),
  210. ));
  211. $a = $dom->getElementsByTagName('a')->item(0);
  212. $attr = $a->getAttributeNode('t:href');
  213. $this->assertEquals('http://www.example.com', $attr->namespaceURI);
  214. $dom = $this->parse('<!DOCTYPE html><html><body><t:a>foo</t:a></body></html>', array(
  215. 'implicitNamespaces' => array(
  216. 't' => 'http://www.example.com',
  217. ),
  218. ));
  219. $list = $dom->getElementsByTagNameNS('http://www.example.com', 'a');
  220. $this->assertEquals(1, $list->length);
  221. }
  222. public function testXmlNamespaces()
  223. {
  224. $dom = $this->parse(
  225. '<!DOCTYPE html><html>
  226. <t:body xmlns:t="http://www.example.com">
  227. <a t:href="bar">foo</a>
  228. </body>
  229. <div>foo</div>
  230. </html>', array(
  231. 'xmlNamespaces' => true,
  232. ));
  233. $a = $dom->getElementsByTagName('a')->item(0);
  234. $attr = $a->getAttributeNode('t:href');
  235. $this->assertEquals('http://www.example.com', $attr->namespaceURI);
  236. $list = $dom->getElementsByTagNameNS('http://www.example.com', 'body');
  237. $this->assertEquals(1, $list->length);
  238. }
  239. public function testXmlNamespaceNesting()
  240. {
  241. $dom = $this->parse(
  242. '<!DOCTYPE html><html>
  243. <body xmlns:x="http://www.prefixed.com" id="body">
  244. <a id="bar1" xmlns="http://www.prefixed.com/bar1">
  245. <b id="bar4" xmlns="http://www.prefixed.com/bar4"><x:prefixed id="prefixed"/></b>
  246. </a>
  247. <svg id="svg"></svg>
  248. <c id="bar2" xmlns="http://www.prefixed.com/bar2"></c>
  249. <div id="div"></div>
  250. <d id="bar3"></d>
  251. <xn:d xmlns:xn="http://www.prefixed.com/xn" xmlns="http://www.prefixed.com/bar5_x" id="bar5"><x id="bar5_x"/></xn:d>
  252. </body>
  253. </html>', array(
  254. 'xmlNamespaces' => true,
  255. ));
  256. $this->assertEmpty($this->errors);
  257. $div = $dom->getElementById('div');
  258. $this->assertEquals('http://www.w3.org/1999/xhtml', $div->namespaceURI);
  259. $body = $dom->getElementById('body');
  260. $this->assertEquals('http://www.w3.org/1999/xhtml', $body->namespaceURI);
  261. $bar1 = $dom->getElementById('bar1');
  262. $this->assertEquals('http://www.prefixed.com/bar1', $bar1->namespaceURI);
  263. $bar2 = $dom->getElementById('bar2');
  264. $this->assertEquals('http://www.prefixed.com/bar2', $bar2->namespaceURI);
  265. $bar3 = $dom->getElementById('bar3');
  266. $this->assertEquals('http://www.w3.org/1999/xhtml', $bar3->namespaceURI);
  267. $bar4 = $dom->getElementById('bar4');
  268. $this->assertEquals('http://www.prefixed.com/bar4', $bar4->namespaceURI);
  269. $svg = $dom->getElementById('svg');
  270. $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
  271. $prefixed = $dom->getElementById('prefixed');
  272. $this->assertEquals('http://www.prefixed.com', $prefixed->namespaceURI);
  273. $prefixed = $dom->getElementById('bar5');
  274. $this->assertEquals('http://www.prefixed.com/xn', $prefixed->namespaceURI);
  275. $prefixed = $dom->getElementById('bar5_x');
  276. $this->assertEquals('http://www.prefixed.com/bar5_x', $prefixed->namespaceURI);
  277. }
  278. public function testMoveNonInlineElements()
  279. {
  280. $doc = $this->parse('<p>line1<br/><hr/>line2</p>');
  281. $this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1<br/></p><hr/>line2</html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.');
  282. $doc = $this->parse('<p>line1<div>line2</div></p>');
  283. $this->assertEquals('<html xmlns="http://www.w3.org/1999/xhtml"><p>line1</p><div>line2</div></html>', $doc->saveXML($doc->documentElement), 'Move non-inline elements outside of inline containers.');
  284. }
  285. public function testAttributes()
  286. {
  287. $html = "<!DOCTYPE html>
  288. <html>
  289. <head><title></title></head>
  290. <body id='a' class='b c'></body>
  291. </html>";
  292. $doc = $this->parse($html);
  293. $root = $doc->documentElement;
  294. $body = $root->GetElementsByTagName('body')->item(0);
  295. $this->assertEquals('body', $body->tagName);
  296. $this->assertTrue($body->hasAttributes());
  297. $this->assertEquals('a', $body->getAttribute('id'));
  298. $this->assertEquals('b c', $body->getAttribute('class'));
  299. $body2 = $doc->getElementById('a');
  300. $this->assertEquals('body', $body2->tagName);
  301. $this->assertEquals('a', $body2->getAttribute('id'));
  302. }
  303. public function testSVGAttributes()
  304. {
  305. $html = "<!DOCTYPE html>
  306. <html><body>
  307. <svg width='150' viewbox='2'>
  308. <rect textlength='2'/>
  309. <animatecolor>foo</animatecolor>
  310. </svg>
  311. </body></html>";
  312. $doc = $this->parse($html);
  313. $root = $doc->documentElement;
  314. $svg = $root->getElementsByTagName('svg')->item(0);
  315. $this->assertTrue($svg->hasAttribute('viewBox'));
  316. $rect = $root->getElementsByTagName('rect')->item(0);
  317. $this->assertTrue($rect->hasAttribute('textLength'));
  318. $ac = $root->getElementsByTagName('animateColor');
  319. $this->assertEquals(1, $ac->length);
  320. }
  321. public function testMathMLAttribute()
  322. {
  323. $html = '<!doctype html>
  324. <html lang="en">
  325. <body>
  326. <math>
  327. <mi>x</mi>
  328. <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus">
  329. <mo>&PlusMinus;</mo>
  330. </csymbol>
  331. <mi>y</mi>
  332. </math>
  333. </body>
  334. </html>';
  335. $doc = $this->parse($html);
  336. $root = $doc->documentElement;
  337. $csymbol = $root->getElementsByTagName('csymbol')->item(0);
  338. $this->assertTrue($csymbol->hasAttribute('definitionURL'));
  339. }
  340. public function testMissingHtmlTag()
  341. {
  342. $html = '<!DOCTYPE html><title>test</title>';
  343. $doc = $this->parse($html);
  344. $this->assertEquals('html', $doc->documentElement->tagName);
  345. $this->assertEquals('title', $doc->documentElement->childNodes->item(0)->tagName);
  346. }
  347. public function testComment()
  348. {
  349. $html = '<html><!--Hello World.--></html>';
  350. $doc = $this->parse($html);
  351. $comment = $doc->documentElement->childNodes->item(0);
  352. $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
  353. $this->assertEquals('Hello World.', $comment->data);
  354. $html = '<!--Hello World.--><html></html>';
  355. $doc = $this->parse($html);
  356. $comment = $doc->childNodes->item(1);
  357. $this->assertEquals(XML_COMMENT_NODE, $comment->nodeType);
  358. $this->assertEquals('Hello World.', $comment->data);
  359. $comment = $doc->childNodes->item(2);
  360. $this->assertEquals(XML_ELEMENT_NODE, $comment->nodeType);
  361. $this->assertEquals('html', $comment->tagName);
  362. }
  363. public function testCDATA()
  364. {
  365. $html = '<!DOCTYPE html><html><math><![CDATA[test]]></math></html>';
  366. $doc = $this->parse($html);
  367. $wrapper = $doc->getElementsByTagName('math')->item(0);
  368. $this->assertEquals(1, $wrapper->childNodes->length);
  369. $cdata = $wrapper->childNodes->item(0);
  370. $this->assertEquals(XML_CDATA_SECTION_NODE, $cdata->nodeType);
  371. $this->assertEquals('test', $cdata->data);
  372. }
  373. public function testText()
  374. {
  375. $html = '<!DOCTYPE html><html><head></head><body><math>test</math></body></html>';
  376. $doc = $this->parse($html);
  377. $wrapper = $doc->getElementsByTagName('math')->item(0);
  378. $this->assertEquals(1, $wrapper->childNodes->length);
  379. $data = $wrapper->childNodes->item(0);
  380. $this->assertEquals(XML_TEXT_NODE, $data->nodeType);
  381. $this->assertEquals('test', $data->data);
  382. // The DomTreeBuilder has special handling for text when in before head mode.
  383. $html = '<!DOCTYPE html><html>
  384. Foo<head></head><body></body></html>';
  385. $doc = $this->parse($html);
  386. $this->assertEquals('Line 0, Col 0: Unexpected text. Ignoring: Foo', $this->errors[0]);
  387. $headElement = $doc->documentElement->firstChild;
  388. $this->assertEquals('head', $headElement->tagName);
  389. }
  390. public function testParseErrors()
  391. {
  392. $html = '<!DOCTYPE html><html><math><![CDATA[test';
  393. $doc = $this->parse($html);
  394. // We're JUST testing that we can access errors. Actual testing of
  395. // error messages happen in the Tokenizer's tests.
  396. $this->assertGreaterThan(0, count($this->errors));
  397. $this->assertTrue(is_string($this->errors[0]));
  398. }
  399. public function testProcessingInstruction()
  400. {
  401. // Test the simple case, which is where PIs are inserted into the DOM.
  402. $doc = $this->parse('<!DOCTYPE html><html><?foo bar?>');
  403. $this->assertEquals(1, $doc->documentElement->childNodes->length);
  404. $pi = $doc->documentElement->firstChild;
  405. $this->assertInstanceOf('\DOMProcessingInstruction', $pi);
  406. $this->assertEquals('foo', $pi->nodeName);
  407. $this->assertEquals('bar', $pi->data);
  408. // Leading xml PIs should be ignored.
  409. $doc = $this->parse('<?xml version="1.0"?><!DOCTYPE html><html><head></head></html>');
  410. $this->assertEquals(2, $doc->childNodes->length);
  411. $this->assertInstanceOf('\DOMDocumentType', $doc->childNodes->item(0));
  412. $this->assertInstanceOf('\DOMElement', $doc->childNodes->item(1));
  413. }
  414. public function testAutocloseP()
  415. {
  416. $html = '<!DOCTYPE html><html><body><p><figure></body></html>';
  417. $doc = $this->parse($html);
  418. $p = $doc->getElementsByTagName('p')->item(0);
  419. $this->assertEquals(0, $p->childNodes->length);
  420. $this->assertEquals('figure', $p->nextSibling->tagName);
  421. }
  422. public function testAutocloseLI()
  423. {
  424. $html = '<!doctype html>
  425. <html lang="en">
  426. <body>
  427. <ul><li>Foo<li>Bar<li>Baz</ul>
  428. </body>
  429. </html>';
  430. $doc = $this->parse($html);
  431. $length = $doc->getElementsByTagName('ul')->item(0)->childNodes->length;
  432. $this->assertEquals(3, $length);
  433. }
  434. public function testMathML()
  435. {
  436. $html = '<!doctype html>
  437. <html lang="en">
  438. <body>
  439. <math xmlns="http://www.w3.org/1998/Math/MathML">
  440. <mi>x</mi>
  441. <csymbol definitionurl="http://www.example.com/mathops/multiops.html#plusminus">
  442. <mo>&PlusMinus;</mo>
  443. </csymbol>
  444. <mi>y</mi>
  445. </math>
  446. </body>
  447. </html>';
  448. $doc = $this->parse($html);
  449. $math = $doc->getElementsByTagName('math')->item(0);
  450. $this->assertEquals('math', $math->tagName);
  451. $this->assertEquals('math', $math->nodeName);
  452. $this->assertEquals('math', $math->localName);
  453. $this->assertEquals('http://www.w3.org/1998/Math/MathML', $math->namespaceURI);
  454. }
  455. public function testSVG()
  456. {
  457. $html = '<!doctype html>
  458. <html lang="en">
  459. <body>
  460. <svg width="150" height="100" viewBox="0 0 3 2" xmlns="http://www.w3.org/2000/svg">
  461. <rect width="1" height="2" x="2" fill="#d2232c" />
  462. <text font-family="Verdana" font-size="32">
  463. <textpath xlink:href="#Foo">
  464. Test Text.
  465. </textPath>
  466. </text>
  467. </svg>
  468. </body>
  469. </html>';
  470. $doc = $this->parse($html);
  471. $svg = $doc->getElementsByTagName('svg')->item(0);
  472. $this->assertEquals('svg', $svg->tagName);
  473. $this->assertEquals('svg', $svg->nodeName);
  474. $this->assertEquals('svg', $svg->localName);
  475. $this->assertEquals('http://www.w3.org/2000/svg', $svg->namespaceURI);
  476. $textPath = $doc->getElementsByTagName('textPath')->item(0);
  477. $this->assertEquals('textPath', $textPath->tagName);
  478. }
  479. public function testNoScript()
  480. {
  481. $html = '<!DOCTYPE html><html><head><noscript>No JS</noscript></head></html>';
  482. $doc = $this->parse($html);
  483. $this->assertEmpty($this->errors);
  484. $noscript = $doc->getElementsByTagName('noscript')->item(0);
  485. $this->assertEquals('noscript', $noscript->tagName);
  486. $html = '<!DOCTYPE html><html><body><noscript><p>No JS</p></noscript></body></html>';
  487. $doc = $this->parse($html);
  488. $this->assertEmpty($this->errors);
  489. $p = $doc->getElementsByTagName('p')->item(0);
  490. $this->assertEquals('p', $p->tagName);
  491. }
  492. /**
  493. * Regression for issue #13.
  494. */
  495. public function testRegressionHTMLNoBody()
  496. {
  497. $html = '<!DOCTYPE html><html><span id="test">Test</span></html>';
  498. $doc = $this->parse($html);
  499. $span = $doc->getElementById('test');
  500. $this->assertEmpty($this->errors);
  501. $this->assertEquals('span', $span->tagName);
  502. $this->assertEquals('Test', $span->textContent);
  503. }
  504. public function testInstructionProcessor()
  505. {
  506. $string = '<!DOCTYPE html><html><?foo bar ?></html>';
  507. $treeBuilder = new DOMTreeBuilder();
  508. $is = new InstructionProcessorMock();
  509. $treeBuilder->setInstructionProcessor($is);
  510. $scanner = new Scanner($string);
  511. $parser = new Tokenizer($scanner, $treeBuilder);
  512. $parser->parse();
  513. $dom = $treeBuilder->document();
  514. $div = $dom->getElementsByTagName('div')->item(0);
  515. $this->assertEquals(1, $is->count);
  516. $this->assertEquals('foo', $is->name);
  517. $this->assertEquals('bar ', $is->data);
  518. $this->assertEquals('div', $div->tagName);
  519. $this->assertEquals('foo', $div->textContent);
  520. }
  521. public function testSelectGroupedOptions()
  522. {
  523. $html = <<<EOM
  524. <!DOCTYPE html>
  525. <html>
  526. <head>
  527. <title>testSelectGroupedOptions</title>
  528. </head>
  529. <body>
  530. <select>
  531. <optgroup id="first" label="first">
  532. <option value="foo">foo</option>
  533. <option value="bar">bar</option>
  534. <option value="baz">baz</option>
  535. </optgroup>
  536. <optgroup id="second" label="second">
  537. <option value="lorem">lorem</option>
  538. <option value="ipsum">ipsum</option>
  539. </optgroup>
  540. </select>
  541. </body>
  542. </html>
  543. EOM;
  544. $dom = $this->parse($html);
  545. $this->assertSame(3, $dom->getElementById('first')->getElementsByTagName('option')->length);
  546. $this->assertSame(2, $dom->getElementById('second')->getElementsByTagName('option')->length);
  547. }
  548. public function testVoidTag()
  549. {
  550. $html = <<<EOM
  551. <!DOCTYPE html>
  552. <html>
  553. <head>
  554. <title>testVoidTag</title>
  555. <meta>
  556. <meta>
  557. </head>
  558. <body></body>
  559. </html>
  560. EOM;
  561. $dom = $this->parse($html);
  562. $this->assertSame(2, $dom->getElementsByTagName('meta')->length);
  563. $this->assertSame(0, $dom->getElementsByTagName('meta')->item(0)->childNodes->length);
  564. $this->assertSame(0, $dom->getElementsByTagName('meta')->item(1)->childNodes->length);
  565. }
  566. public function testIgnoreSelfClosingTag()
  567. {
  568. $html = <<<EOM
  569. <!DOCTYPE html>
  570. <html>
  571. <head>
  572. <title>testIllegalSelfClosingTag</title>
  573. </head>
  574. <body>
  575. <div /><span>Hello, World!</span></div>
  576. </body>
  577. </html>
  578. EOM;
  579. $dom = $this->parse($html);
  580. $this->assertSame(1, $dom->getElementsByTagName('div')->item(0)->childNodes->length);
  581. }
  582. public function testIAudioInParagraph()
  583. {
  584. $html = <<<EOM
  585. <!DOCTYPE html>
  586. <html>
  587. <head>
  588. <title>testIllegalSelfClosingTag</title>
  589. </head>
  590. <body>
  591. <p>
  592. <audio preload="none" controls="controls">
  593. <source src="https://example.com/test.mp3" type="audio/mpeg" />
  594. Your browser does not support the audio element.
  595. </audio>
  596. </p>
  597. </body>
  598. </html>>
  599. </html>
  600. EOM;
  601. $dom = $this->parse($html);
  602. $audio = $dom->getElementsByTagName('audio')->item(0);
  603. $this->assertSame('p', $audio->parentNode->nodeName);
  604. $this->assertSame(3, $audio->childNodes->length);
  605. }
  606. public function testClosingBr()
  607. {
  608. $html = <<<EOM
  609. <!DOCTYPE html>
  610. <html>
  611. <head>
  612. <title>testClosingBr</title>
  613. </head>
  614. <body>
  615. <p>
  616. This line ends with a normal line break <br class="attribute-should-be-retained">
  617. This line ends with a line break marked up as a closing tag </br class="attribute-should-be-discarded">
  618. </p>
  619. </body>
  620. </html>>
  621. </html>
  622. EOM;
  623. $dom = $this->parse($html);
  624. $this->assertSame(2, $dom->getElementsByTagName('br')->length);
  625. $this->assertSame(1, $dom->getElementsByTagName('br')->item(0)->attributes->length);
  626. $this->assertSame(0, $dom->getElementsByTagName('br')->item(1)->attributes->length);
  627. }
  628. }