Html5Test.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. <?php
  2. namespace Masterminds\HTML5\Tests;
  3. use Masterminds\HTML5;
  4. class Html5Test extends TestCase
  5. {
  6. /**
  7. * @var HTML5
  8. */
  9. private $html5;
  10. public function setUp()
  11. {
  12. $this->html5 = $this->getInstance();
  13. }
  14. /**
  15. * Parse and serialize a string.
  16. */
  17. protected function cycle($html)
  18. {
  19. $dom = $this->html5->loadHTML('<!DOCTYPE html><html><body>' . $html . '</body></html>');
  20. $out = $this->html5->saveHTML($dom);
  21. return $out;
  22. }
  23. protected function cycleFragment($fragment)
  24. {
  25. $dom = $this->html5->loadHTMLFragment($fragment);
  26. $out = $this->html5->saveHTML($dom);
  27. return $out;
  28. }
  29. public function testImageTagsInSvg()
  30. {
  31. $html = '<!DOCTYPE html>
  32. <html>
  33. <head>
  34. <title>foo</title>
  35. </head>
  36. <body>
  37. <svg>
  38. <image height="10" width="10"></image>
  39. </svg>
  40. </body>
  41. </html>';
  42. $doc = $this->html5->loadHTML($html);
  43. $this->assertInstanceOf('DOMElement', $doc->getElementsByTagName('image')->item(0));
  44. $this->assertEmpty($this->html5->getErrors());
  45. }
  46. public function testLoadOptions()
  47. {
  48. // doc
  49. $dom = $this->html5->loadHTML($this->wrap('<t:tag/>'), array(
  50. 'implicitNamespaces' => array('t' => 'http://example.com'),
  51. 'xmlNamespaces' => true,
  52. ));
  53. $this->assertInstanceOf('\DOMDocument', $dom);
  54. $this->assertEmpty($this->html5->getErrors());
  55. $this->assertFalse($this->html5->hasErrors());
  56. $xpath = new \DOMXPath($dom);
  57. $xpath->registerNamespace('t', 'http://example.com');
  58. $this->assertEquals(1, $xpath->query('//t:tag')->length);
  59. // doc fragment
  60. $frag = $this->html5->loadHTMLFragment('<t:tag/>', array(
  61. 'implicitNamespaces' => array('t' => 'http://example.com'),
  62. 'xmlNamespaces' => true,
  63. ));
  64. $this->assertInstanceOf('\DOMDocumentFragment', $frag);
  65. $this->assertEmpty($this->html5->getErrors());
  66. $this->assertFalse($this->html5->hasErrors());
  67. $frag->ownerDocument->appendChild($frag);
  68. $xpath = new \DOMXPath($frag->ownerDocument);
  69. $xpath->registerNamespace('t', 'http://example.com');
  70. $this->assertEquals(1, $xpath->query('//t:tag', $frag)->length);
  71. }
  72. public function testEncodingUtf8()
  73. {
  74. $dom = $this->html5->load(__DIR__ . '/Fixtures/encoding/utf-8.html');
  75. $this->assertInstanceOf('\DOMDocument', $dom);
  76. $this->assertEmpty($this->html5->getErrors());
  77. $this->assertFalse($this->html5->hasErrors());
  78. $this->assertContains('Žťčýů', $dom->saveHTML());
  79. }
  80. public function testEncodingWindows1252()
  81. {
  82. $dom = $this->html5->load(__DIR__ . '/Fixtures/encoding/windows-1252.html', array(
  83. 'encoding' => 'Windows-1252',
  84. ));
  85. $this->assertInstanceOf('\DOMDocument', $dom);
  86. $this->assertEmpty($this->html5->getErrors());
  87. $this->assertFalse($this->html5->hasErrors());
  88. $dumpedAsUtf8 = mb_convert_encoding($dom->saveHTML(), 'UTF-8', 'Windows-1252');
  89. $this->assertNotFalse(mb_strpos($dumpedAsUtf8, 'Ž'));
  90. $this->assertNotFalse(mb_strpos($dumpedAsUtf8, 'è'));
  91. $this->assertNotFalse(mb_strpos($dumpedAsUtf8, 'ý'));
  92. $this->assertNotFalse(mb_strpos($dumpedAsUtf8, 'ù'));
  93. }
  94. public function testErrors()
  95. {
  96. $dom = $this->html5->loadHTML('<xx as>');
  97. $this->assertInstanceOf('\DOMDocument', $dom);
  98. $this->assertNotEmpty($this->html5->getErrors());
  99. $this->assertTrue($this->html5->hasErrors());
  100. }
  101. public function testLoad()
  102. {
  103. $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
  104. $this->assertInstanceOf('\DOMDocument', $dom);
  105. $this->assertEmpty($this->html5->getErrors());
  106. $this->assertFalse($this->html5->hasErrors());
  107. $file = fopen(__DIR__ . '/Html5Test.html', 'r');
  108. $dom = $this->html5->load($file);
  109. $this->assertInstanceOf('\DOMDocument', $dom);
  110. $this->assertEmpty($this->html5->getErrors());
  111. $dom = $this->html5->loadHTMLFile(__DIR__ . '/Html5Test.html');
  112. $this->assertInstanceOf('\DOMDocument', $dom);
  113. $this->assertEmpty($this->html5->getErrors());
  114. }
  115. public function testLoadHTML()
  116. {
  117. $contents = file_get_contents(__DIR__ . '/Html5Test.html');
  118. $dom = $this->html5->loadHTML($contents);
  119. $this->assertInstanceOf('\DOMDocument', $dom);
  120. $this->assertEmpty($this->html5->getErrors());
  121. }
  122. public function testLoadHTMLWithComments()
  123. {
  124. $contents = '<!--[if lte IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]-->
  125. <!--[if gt IE 8]> <!--><html class="no-js" lang="en"><!--<![endif]-->
  126. </html>';
  127. $dom = $this->html5->loadHTML($contents);
  128. $this->assertInstanceOf('\DOMDocument', $dom);
  129. $expected = '<!DOCTYPE html>
  130. <!--[if lte IE 8]> <html class="no-js lt-ie9" lang="en"> <![endif]--><!--[if gt IE 8]> <!--><html class="no-js" lang="en"><!--<![endif]--></html>
  131. ';
  132. $this->assertEquals($expected, $this->html5->saveHTML($dom));
  133. }
  134. public function testLoadHTMLFragment()
  135. {
  136. $fragment = '<section id="Foo"><div class="Bar">Baz</div></section>';
  137. $dom = $this->html5->loadHTMLFragment($fragment);
  138. $this->assertInstanceOf('\DOMDocumentFragment', $dom);
  139. $this->assertEmpty($this->html5->getErrors());
  140. }
  141. public function testSaveHTML()
  142. {
  143. $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
  144. $this->assertInstanceOf('\DOMDocument', $dom);
  145. $this->assertEmpty($this->html5->getErrors());
  146. $saved = $this->html5->saveHTML($dom);
  147. $this->assertRegExp('|<p>This is a test.</p>|', $saved);
  148. }
  149. public function testSaveHTMLFragment()
  150. {
  151. $fragment = '<section id="Foo"><div class="Bar">Baz</div></section>';
  152. $dom = $this->html5->loadHTMLFragment($fragment);
  153. $string = $this->html5->saveHTML($dom);
  154. $this->assertEquals($fragment, $string);
  155. }
  156. public function testSave()
  157. {
  158. $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
  159. $this->assertInstanceOf('\DOMDocument', $dom);
  160. $this->assertEmpty($this->html5->getErrors());
  161. // Test resource
  162. $file = fopen('php://temp', 'w');
  163. $this->html5->save($dom, $file);
  164. $content = stream_get_contents($file, -1, 0);
  165. $this->assertRegExp('|<p>This is a test.</p>|', $content);
  166. // Test file
  167. $tmpfname = tempnam(sys_get_temp_dir(), 'html5-php');
  168. $this->html5->save($dom, $tmpfname);
  169. $content = file_get_contents($tmpfname);
  170. $this->assertRegExp('|<p>This is a test.</p>|', $content);
  171. unlink($tmpfname);
  172. }
  173. // This test reads a document into a dom, turn the dom into a document,
  174. // then tries to read that document again. This makes sure we are reading,
  175. // and generating a document that works at a high level.
  176. public function testItWorks()
  177. {
  178. $dom = $this->html5->load(__DIR__ . '/Html5Test.html');
  179. $this->assertInstanceOf('\DOMDocument', $dom);
  180. $this->assertEmpty($this->html5->getErrors());
  181. $saved = $this->html5->saveHTML($dom);
  182. $dom2 = $this->html5->loadHTML($saved);
  183. $this->assertInstanceOf('\DOMDocument', $dom2);
  184. $this->assertEmpty($this->html5->getErrors());
  185. }
  186. public function testConfig()
  187. {
  188. $html5 = $this->getInstance();
  189. $options = $html5->getOptions();
  190. $this->assertEquals(false, $options['encode_entities']);
  191. $html5 = $this->getInstance(array(
  192. 'foo' => 'bar',
  193. 'encode_entities' => true,
  194. ));
  195. $options = $html5->getOptions();
  196. $this->assertEquals('bar', $options['foo']);
  197. $this->assertEquals(true, $options['encode_entities']);
  198. // Need to reset to original so future tests pass as expected.
  199. // $this->getInstance()->setOption('encode_entities', false);
  200. }
  201. public function testSvg()
  202. {
  203. $dom = $this->html5->loadHTML(
  204. '<!doctype html>
  205. <html lang="en">
  206. <body>
  207. <div id="foo" class="bar baz">foo bar baz</div>
  208. <svg width="150" height="100" viewBox="0 0 3 2">
  209. <rect width="1" height="2" x="0" fill="#008d46" />
  210. <rect width="1" height="2" x="1" fill="#ffffff" />
  211. <rect width="1" height="2" x="2" fill="#d2232c" />
  212. <text font-family="Verdana" font-size="32">
  213. <textPath xlink:href="#Foo">
  214. Test Text.
  215. </textPath>
  216. </text>
  217. </svg>
  218. </body>
  219. </html>');
  220. $this->assertEmpty($this->html5->getErrors());
  221. // Test a mixed case attribute.
  222. $list = $dom->getElementsByTagName('svg');
  223. $this->assertNotEmpty($list->length);
  224. $svg = $list->item(0);
  225. $this->assertEquals('0 0 3 2', $svg->getAttribute('viewBox'));
  226. $this->assertFalse($svg->hasAttribute('viewbox'));
  227. // Test a mixed case tag.
  228. // Note: getElementsByTagName is not case sensitive.
  229. $list = $dom->getElementsByTagName('textPath');
  230. $this->assertNotEmpty($list->length);
  231. $textPath = $list->item(0);
  232. $this->assertEquals('textPath', $textPath->tagName);
  233. $this->assertNotEquals('textpath', $textPath->tagName);
  234. $html = $this->html5->saveHTML($dom);
  235. $this->assertRegExp('|<svg width="150" height="100" viewBox="0 0 3 2">|', $html);
  236. $this->assertRegExp('|<rect width="1" height="2" x="0" fill="#008d46" />|', $html);
  237. }
  238. public function testMathMl()
  239. {
  240. $dom = $this->html5->loadHTML(
  241. '<!doctype html>
  242. <html lang="en">
  243. <body>
  244. <div id="foo" class="bar baz" definitionURL="http://example.com">foo bar baz</div>
  245. <math>
  246. <mi>x</mi>
  247. <csymbol definitionURL="http://www.example.com/mathops/multiops.html#plusminus">
  248. <mo>&PlusMinus;</mo>
  249. </csymbol>
  250. <mi>y</mi>
  251. </math>
  252. </body>
  253. </html>');
  254. $this->assertEmpty($this->html5->getErrors());
  255. $list = $dom->getElementsByTagName('math');
  256. $this->assertNotEmpty($list->length);
  257. $list = $dom->getElementsByTagName('div');
  258. $this->assertNotEmpty($list->length);
  259. $div = $list->item(0);
  260. $this->assertEquals('http://example.com', $div->getAttribute('definitionurl'));
  261. $this->assertFalse($div->hasAttribute('definitionURL'));
  262. $list = $dom->getElementsByTagName('csymbol');
  263. $csymbol = $list->item(0);
  264. $this->assertEquals('http://www.example.com/mathops/multiops.html#plusminus', $csymbol->getAttribute('definitionURL'));
  265. $this->assertFalse($csymbol->hasAttribute('definitionurl'));
  266. $html = $this->html5->saveHTML($dom);
  267. $this->assertRegExp('|<csymbol definitionURL="http://www.example.com/mathops/multiops.html#plusminus">|', $html);
  268. $this->assertRegExp('|<mi>y</mi>|', $html);
  269. }
  270. public function testUnknownElements()
  271. {
  272. // The : should not have special handling accourding to section 2.9 of the
  273. // spec. This is differenant than XML. Since we don't know these elements
  274. // they are handled as normal elements. Note, to do this is really
  275. // an invalid example and you should not embed prefixed xml in html5.
  276. $dom = $this->html5->loadHTMLFragment(
  277. '<f:rug>
  278. <f:name>Big rectangle thing</f:name>
  279. <f:width>40</f:width>
  280. <f:length>80</f:length>
  281. </f:rug>
  282. <sarcasm>um, yeah</sarcasm>');
  283. $this->assertEmpty($this->html5->getErrors());
  284. $markup = $this->html5->saveHTML($dom);
  285. $this->assertRegExp('|<f:name>Big rectangle thing</f:name>|', $markup);
  286. $this->assertRegExp('|<sarcasm>um, yeah</sarcasm>|', $markup);
  287. }
  288. public function testElements()
  289. {
  290. // Should have content.
  291. $res = $this->cycle('<div>FOO</div>');
  292. $this->assertRegExp('|<div>FOO</div>|', $res);
  293. // Should be empty
  294. $res = $this->cycle('<span></span>');
  295. $this->assertRegExp('|<span></span>|', $res);
  296. // Should have content.
  297. $res = $this->cycleFragment('<div>FOO</div>');
  298. $this->assertRegExp('|<div>FOO</div>|', $res);
  299. // Should be empty
  300. $res = $this->cycleFragment('<span></span>');
  301. $this->assertRegExp('|<span></span>|', $res);
  302. // Elements with dashes and underscores
  303. $res = $this->cycleFragment('<sp-an></sp-an>');
  304. $this->assertRegExp('|<sp-an></sp-an>|', $res);
  305. $res = $this->cycleFragment('<sp_an></sp_an>');
  306. $this->assertRegExp('|<sp_an></sp_an>|', $res);
  307. // Should have no closing tag.
  308. $res = $this->cycle('<hr>');
  309. $this->assertRegExp('|<hr></body>|', $res);
  310. }
  311. public function testAttributes()
  312. {
  313. $res = $this->cycle('<use xlink:href="#svg-track" xmlns:xlink="http://www.w3.org/1999/xlink"></use>');
  314. $this->assertContains('<use xlink:href="#svg-track" xmlns:xlink="http://www.w3.org/1999/xlink"></use>', $res);
  315. $res = $this->cycle('<div attr="val">FOO</div>');
  316. $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
  317. // XXX: Note that spec does NOT require attrs in the same order.
  318. $res = $this->cycle('<div attr="val" class="even">FOO</div>');
  319. $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
  320. $res = $this->cycle('<div xmlns:foo="http://example.com">FOO</div>');
  321. $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
  322. $res = $this->cycleFragment('<div attr="val">FOO</div>');
  323. $this->assertRegExp('|<div attr="val">FOO</div>|', $res);
  324. // XXX: Note that spec does NOT require attrs in the same order.
  325. $res = $this->cycleFragment('<div attr="val" class="even">FOO</div>');
  326. $this->assertRegExp('|<div attr="val" class="even">FOO</div>|', $res);
  327. $res = $this->cycleFragment('<div xmlns:foo="http://example.com">FOO</div>');
  328. $this->assertRegExp('|<div xmlns:foo="http://example.com">FOO</div>|', $res);
  329. }
  330. public function testPCData()
  331. {
  332. $res = $this->cycle('<a>This is a test.</a>');
  333. $this->assertRegExp('|This is a test.|', $res);
  334. $res = $this->cycleFragment('<a>This is a test.</a>');
  335. $this->assertRegExp('|This is a test.|', $res);
  336. $res = $this->cycle('This
  337. is
  338. a
  339. test.');
  340. // Check that newlines are there, but don't count spaces.
  341. $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
  342. $res = $this->cycleFragment('This
  343. is
  344. a
  345. test.');
  346. // Check that newlines are there, but don't count spaces.
  347. $this->assertRegExp('|This\n\s*is\n\s*a\n\s*test.|', $res);
  348. $res = $this->cycle('<a>This <em>is</em> a test.</a>');
  349. $this->assertRegExp('|This <em>is</em> a test.|', $res);
  350. $res = $this->cycleFragment('<a>This <em>is</em> a test.</a>');
  351. $this->assertRegExp('|This <em>is</em> a test.|', $res);
  352. }
  353. public function testUnescaped()
  354. {
  355. $res = $this->cycle('<script>2 < 1</script>');
  356. $this->assertRegExp('|2 < 1|', $res);
  357. $res = $this->cycle('<style>div>div>div</style>');
  358. $this->assertRegExp('|div>div>div|', $res);
  359. $res = $this->cycleFragment('<script>2 < 1</script>');
  360. $this->assertRegExp('|2 < 1|', $res);
  361. $res = $this->cycleFragment('<style>div>div>div</style>');
  362. $this->assertRegExp('|div>div>div|', $res);
  363. }
  364. public function testEntities()
  365. {
  366. $res = $this->cycle('<a>Apples &amp; bananas.</a>');
  367. $this->assertRegExp('|Apples &amp; bananas.|', $res);
  368. $res = $this->cycleFragment('<a>Apples &amp; bananas.</a>');
  369. $this->assertRegExp('|Apples &amp; bananas.|', $res);
  370. $res = $this->cycleFragment('<p>R&D</p>');
  371. $this->assertRegExp('|R&amp;D|', $res);
  372. }
  373. public function testCaseSensitiveTags()
  374. {
  375. $dom = $this->html5->loadHTML(
  376. '<html><body><Button color="red">Error</Button></body></html>',
  377. array(
  378. 'xmlNamespaces' => true,
  379. )
  380. );
  381. $out = $this->html5->saveHTML($dom);
  382. $this->assertRegExp('|<html><body><Button color="red">Error</Button></body></html>|', $out);
  383. }
  384. public function testComment()
  385. {
  386. $res = $this->cycle('a<!-- This is a test. -->b');
  387. $this->assertRegExp('|<!-- This is a test. -->|', $res);
  388. $res = $this->cycleFragment('a<!-- This is a test. -->b');
  389. $this->assertRegExp('|<!-- This is a test. -->|', $res);
  390. }
  391. public function testCDATA()
  392. {
  393. $res = $this->cycle('a<![CDATA[ This <is> a test. ]]>b');
  394. $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
  395. $res = $this->cycleFragment('a<![CDATA[ This <is> a test. ]]>b');
  396. $this->assertRegExp('|<!\[CDATA\[ This <is> a test\. \]\]>|', $res);
  397. }
  398. }