123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982 |
- <?php
- namespace Masterminds\HTML5\Tests\Parser;
- use Masterminds\HTML5\Parser\UTF8Utils;
- use Masterminds\HTML5\Parser\Scanner;
- use Masterminds\HTML5\Parser\Tokenizer;
- class TokenizerTest extends \Masterminds\HTML5\Tests\TestCase
- {
- // ================================================================
- // Additional assertions.
- // ================================================================
- /**
- * Tests that an event matches both the event type and the expected value.
- *
- * @param string $type
- * Expected event type
- * @param string $expects
- * The value expected in $event['data'][0]
- */
- public function assertEventEquals($type, $expects, $event)
- {
- $this->assertEquals($type, $event['name'], "Event $type for " . print_r($event, true));
- if (is_array($expects)) {
- $this->assertEquals($expects, $event['data'], "Event $type should equal " . print_r($expects, true) . ': ' . print_r($event, true));
- } else {
- $d = (is_array($event['data']) ? $event['data'][0] : null);
- $this->assertEquals($expects, $d, "Event $type should equal $expects: " . print_r($event, true));
- }
- }
- /**
- * Assert that a given event is 'error'.
- */
- public function assertEventError($event)
- {
- $this->assertEquals('error', $event['name'], 'Expected error for event: ' . print_r($event, true));
- }
- /**
- * Asserts that all of the tests are good.
- *
- * This loops through a map of tests/expectations and runs a few assertions on each test.
- *
- * Checks:
- * - depth (if depth is > 0)
- * - event name
- * - matches on event 0.
- */
- protected function isAllGood($name, $depth, $tests, $debug = false)
- {
- foreach ($tests as $try => $expects) {
- if ($debug) {
- fprintf(STDOUT, "%s expects %s\n", $try, print_r($expects, true));
- }
- $e = $this->parse($try);
- if ($depth > 0) {
- $this->assertEquals($depth, $e->depth(), "Expected depth $depth for test $try." . print_r($e, true));
- }
- $this->assertEventEquals($name, $expects, $e->get(0));
- }
- }
- // ================================================================
- // Utility functions.
- // ================================================================
- public function testParse()
- {
- list($tok, $events) = $this->createTokenizer('');
- $tok->parse();
- $e1 = $events->get(0);
- $this->assertEquals(1, $events->Depth());
- $this->assertEquals('eof', $e1['name']);
- }
- public function testWhitespace()
- {
- $spaces = ' ';
- list($tok, $events) = $this->createTokenizer($spaces);
- $tok->parse();
- $this->assertEquals(2, $events->depth());
- $e1 = $events->get(0);
- $this->assertEquals('text', $e1['name']);
- $this->assertEquals($spaces, $e1['data'][0]);
- }
- public function testCharacterReference()
- {
- $good = array(
- '&' => '&',
- '<' => '<',
- '&' => '&',
- '&' => '&',
- );
- $this->isAllGood('text', 2, $good);
- // Test with broken charref
- $str = '&foo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
- $str = 'oo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
- $str = '&#foo';
- $events = $this->parse($str);
- $e1 = $events->get(0);
- $this->assertEquals('error', $e1['name']);
- // FIXME: Once the text processor is done, need to verify that the
- // tokens are transformed correctly into text.
- }
- public function testBogusComment()
- {
- $bogus = array(
- '</+this is a bogus comment. +>',
- '<!+this is a bogus comment. !>',
- '<!D OCTYPE foo bar>',
- '<!DOCTYEP foo bar>',
- '<![CADATA[ TEST ]]>',
- '<![CDATA Hello ]]>',
- '<![CDATA[ Hello [[>',
- '<!CDATA[[ test ]]>',
- '<![CDATA[',
- '<![CDATA[hellooooo hello',
- '<? Hello World ?>',
- '<? Hello World',
- );
- foreach ($bogus as $str) {
- $events = $this->parse($str);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('comment', $str, $events->get(1));
- }
- }
- public function testEndTag()
- {
- $succeed = array(
- '</a>' => 'a',
- '</test>' => 'test',
- '</test
- >' => 'test',
- '</thisIsTheTagThatDoesntEndItJustGoesOnAndOnMyFriend>' => 'thisisthetagthatdoesntenditjustgoesonandonmyfriend',
- // See 8.2.4.10, which requires this and does not say error.
- '</a<b>' => 'a<b',
- );
- $this->isAllGood('endTag', 2, $succeed);
- // Recoverable failures
- $fail = array(
- '</a class="monkey">' => 'a',
- '</a <b>' => 'a',
- '</a <b <c>' => 'a',
- '</a is the loneliest letter>' => 'a',
- '</a' => 'a',
- );
- foreach ($fail as $test => $result) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
- // Should have triggered an error.
- $this->assertEventError($events->get(0));
- // Should have tried to parse anyway.
- $this->assertEventEquals('endTag', $result, $events->get(1));
- }
- // BogoComments
- $comments = array(
- '</>' => '</>',
- '</ >' => '</ >',
- '</ a>' => '</ a>',
- );
- foreach ($comments as $test => $result) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
- // Should have triggered an error.
- $this->assertEventError($events->get(0));
- // Should have tried to parse anyway.
- $this->assertEventEquals('comment', $result, $events->get(1));
- }
- }
- public function testComment()
- {
- $good = array(
- '<!--easy-->' => 'easy',
- '<!-- 1 > 0 -->' => ' 1 > 0 ',
- '<!-- --$i -->' => ' --$i ',
- '<!----$i-->' => '--$i',
- "<!--\nHello World.\na-->" => "\nHello World.\na",
- '<!-- <!-- -->' => ' <!-- ',
- );
- foreach ($good as $test => $expected) {
- $events = $this->parse($test);
- $this->assertEventEquals('comment', $expected, $events->get(0));
- }
- $fail = array(
- '<!-->' => '',
- '<!--Hello' => 'Hello',
- "<!--\0Hello" => UTF8Utils::FFFD . 'Hello',
- '<!--' => '',
- );
- foreach ($fail as $test => $expected) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth());
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('comment', $expected, $events->get(1));
- }
- }
- public function testCDATASection()
- {
- $good = array(
- '<![CDATA[ This is a test. ]]>' => ' This is a test. ',
- '<![CDATA[CDATA]]>' => 'CDATA',
- '<![CDATA[ ]] > ]]>' => ' ]] > ',
- '<![CDATA[ ]]>' => ' ',
- );
- $this->isAllGood('cdata', 2, $good);
- }
- public function testDoctype()
- {
- $good = array(
- '<!DOCTYPE html>' => array(
- 'html',
- 0,
- null,
- false,
- ),
- '<!doctype html>' => array(
- 'html',
- 0,
- null,
- false,
- ),
- '<!DocType html>' => array(
- 'html',
- 0,
- null,
- false,
- ),
- "<!DOCTYPE\nhtml>" => array(
- 'html',
- 0,
- null,
- false,
- ),
- "<!DOCTYPE\fhtml>" => array(
- 'html',
- 0,
- null,
- false,
- ),
- '<!DOCTYPE html PUBLIC "foo bar">' => array(
- 'html',
- EventStack::DOCTYPE_PUBLIC,
- 'foo bar',
- false,
- ),
- "<!DOCTYPE html PUBLIC 'foo bar'>" => array(
- 'html',
- EventStack::DOCTYPE_PUBLIC,
- 'foo bar',
- false,
- ),
- '<!DOCTYPE html PUBLIC "foo bar" >' => array(
- 'html',
- EventStack::DOCTYPE_PUBLIC,
- 'foo bar',
- false,
- ),
- "<!DOCTYPE html \nPUBLIC\n'foo bar'>" => array(
- 'html',
- EventStack::DOCTYPE_PUBLIC,
- 'foo bar',
- false,
- ),
- '<!DOCTYPE html SYSTEM "foo bar">' => array(
- 'html',
- EventStack::DOCTYPE_SYSTEM,
- 'foo bar',
- false,
- ),
- "<!DOCTYPE html SYSTEM 'foo bar'>" => array(
- 'html',
- EventStack::DOCTYPE_SYSTEM,
- 'foo bar',
- false,
- ),
- '<!DOCTYPE html SYSTEM "foo/bar" >' => array(
- 'html',
- EventStack::DOCTYPE_SYSTEM,
- 'foo/bar',
- false,
- ),
- "<!DOCTYPE html \nSYSTEM\n'foo bar'>" => array(
- 'html',
- EventStack::DOCTYPE_SYSTEM,
- 'foo bar',
- false,
- ),
- );
- $this->isAllGood('doctype', 2, $good);
- $bad = array(
- '<!DOCTYPE>' => array(
- null,
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE >' => array(
- null,
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE foo' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE foo PUB' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE foo PUB>' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE foo PUB "Looks good">' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE foo SYSTME "Looks good"' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- // Can't tell whether these are ids or ID types, since the context is chopped.
- '<!DOCTYPE foo PUBLIC' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE foo PUBLIC>' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE foo SYSTEM' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE foo SYSTEM>' => array(
- 'foo',
- EventStack::DOCTYPE_NONE,
- null,
- true,
- ),
- '<!DOCTYPE html SYSTEM "foo bar"' => array(
- 'html',
- EventStack::DOCTYPE_SYSTEM,
- 'foo bar',
- true,
- ),
- '<!DOCTYPE html SYSTEM "foo bar" more stuff>' => array(
- 'html',
- EventStack::DOCTYPE_SYSTEM,
- 'foo bar',
- true,
- ),
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- // fprintf(STDOUT, $test . PHP_EOL);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('doctype', $expects, $events->get(1));
- }
- }
- public function testProcessorInstruction()
- {
- $good = array(
- '<?hph ?>' => 'hph',
- '<?hph echo "Hello World"; ?>' => array(
- 'hph',
- 'echo "Hello World"; ',
- ),
- "<?hph \necho 'Hello World';\n?>" => array(
- 'hph',
- "echo 'Hello World';\n",
- ),
- );
- $this->isAllGood('pi', 2, $good);
- }
- /**
- * This tests just simple tags.
- */
- public function testSimpleTags()
- {
- $open = array(
- '<foo>' => 'foo',
- '<FOO>' => 'foo',
- '<fOO>' => 'foo',
- '<foo >' => 'foo',
- "<foo\n\n\n\n>" => 'foo',
- '<foo:bar>' => 'foo:bar',
- );
- $this->isAllGood('startTag', 2, $open);
- $selfClose = array(
- '<foo/>' => 'foo',
- '<FOO/>' => 'foo',
- '<foo />' => 'foo',
- "<foo\n\n\n\n/>" => 'foo',
- '<foo:bar/>' => 'foo:bar',
- );
- foreach ($selfClose as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(2, $events->depth(), "Counting events for '$test'" . print_r($events, true));
- $this->assertEventEquals('startTag', $expects, $events->get(0));
- $event = $events->get(0);
- $this->assertTrue($event['data'][2]);
- }
- $bad = array(
- '<foo' => 'foo',
- '<foo ' => 'foo',
- '<foo/' => 'foo',
- '<foo /' => 'foo',
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expects, $events->get(1));
- }
- }
- public function testTagsWithAttributeAndMissingName()
- {
- $cases = array(
- '<id="top_featured">' => 'id',
- '<color="white">' => 'color',
- "<class='neaktivni_stranka'>" => 'class',
- '<bgcolor="white">' => 'bgcolor',
- '<class="nom">' => 'class',
- );
- foreach ($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('startTag', $expected, $events->get(3));
- $this->assertEventEquals('eof', null, $events->get(4));
- }
- }
- public function testTagNotClosedAfterTagName()
- {
- $cases = array(
- '<noscript<img>' => array(
- 'noscript',
- 'img',
- ),
- '<center<a>' => array(
- 'center',
- 'a',
- ),
- '<br<br>' => array(
- 'br',
- 'br',
- ),
- );
- foreach ($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expected[0], $events->get(1));
- $this->assertEventEquals('startTag', $expected[1], $events->get(2));
- $this->assertEventEquals('eof', null, $events->get(3));
- }
- $events = $this->parse('<span<>02</span>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'span', $events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('text', '>02', $events->get(3));
- $this->assertEventEquals('endTag', 'span', $events->get(4));
- $this->assertEventEquals('eof', null, $events->get(5));
- $events = $this->parse('<p</p>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'p', $events->get(1));
- $this->assertEventEquals('endTag', 'p', $events->get(2));
- $this->assertEventEquals('eof', null, $events->get(3));
- $events = $this->parse('<strong><WordPress</strong>');
- $this->assertEventEquals('startTag', 'strong', $events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventEquals('startTag', 'wordpress', $events->get(2));
- $this->assertEventEquals('endTag', 'strong', $events->get(3));
- $this->assertEventEquals('eof', null, $events->get(4));
- $events = $this->parse('<src=<a>');
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventError($events->get(2));
- $this->assertEventEquals('startTag', 'src', $events->get(3));
- $this->assertEventEquals('startTag', 'a', $events->get(4));
- $this->assertEventEquals('eof', null, $events->get(5));
- $events = $this->parse('<br...<a>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', 'br', $events->get(1));
- $this->assertEventEquals('eof', null, $events->get(2));
- }
- public function testIllegalTagNames()
- {
- $cases = array(
- '<li">' => 'li',
- '<p">' => 'p',
- '<b >' => 'b',
- '<static*all>' => 'static',
- '<h*0720/>' => 'h',
- '<st*ATTRIBUTE />' => 'st',
- );
- foreach ($cases as $html => $expected) {
- $events = $this->parse($html);
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expected, $events->get(1));
- }
- }
- /**
- * @depends testCharacterReference
- */
- public function testTagAttributes()
- {
- // Opening tags.
- $good = array(
- '<foo bar="baz">' => array(
- 'foo',
- array(
- 'bar' => 'baz',
- ),
- false,
- ),
- '<foo bar=" baz ">' => array(
- 'foo',
- array(
- 'bar' => ' baz ',
- ),
- false,
- ),
- "<foo bar=\"\nbaz\n\">" => array(
- 'foo',
- array(
- 'bar' => "\nbaz\n",
- ),
- false,
- ),
- "<foo bar='baz'>" => array(
- 'foo',
- array(
- 'bar' => 'baz',
- ),
- false,
- ),
- '<foo bar="A full sentence.">' => array(
- 'foo',
- array(
- 'bar' => 'A full sentence.',
- ),
- false,
- ),
- "<foo a='1' b=\"2\">" => array(
- 'foo',
- array(
- 'a' => '1',
- 'b' => '2',
- ),
- false,
- ),
- "<foo ns:bar='baz'>" => array(
- 'foo',
- array(
- 'ns:bar' => 'baz',
- ),
- false,
- ),
- "<foo a='blue&red'>" => array(
- 'foo',
- array(
- 'a' => 'blue&red',
- ),
- false,
- ),
- "<foo a='blue&red'>" => array(
- 'foo',
- array(
- 'a' => 'blue&red',
- ),
- false,
- ),
- "<foo a='blue&&&red'>" => array(
- 'foo',
- array(
- 'a' => 'blue&&&red',
- ),
- false,
- ),
- "<foo a='blue&&red'>" => array(
- 'foo',
- array(
- 'a' => 'blue&&red',
- ),
- false,
- ),
- "<foo\nbar='baz'\n>" => array(
- 'foo',
- array(
- 'bar' => 'baz',
- ),
- false,
- ),
- '<doe a deer>' => array(
- 'doe',
- array(
- 'a' => null,
- 'deer' => null,
- ),
- false,
- ),
- '<foo bar=baz>' => array(
- 'foo',
- array(
- 'bar' => 'baz',
- ),
- false,
- ),
- // Updated for 8.1.2.3
- '<foo bar = "baz" >' => array(
- 'foo',
- array(
- 'bar' => 'baz',
- ),
- false,
- ),
- // The spec allows an unquoted value '/'. This will not be a closing
- // tag.
- '<foo bar=/>' => array(
- 'foo',
- array(
- 'bar' => '/',
- ),
- false,
- ),
- '<foo bar=baz/>' => array(
- 'foo',
- array(
- 'bar' => 'baz/',
- ),
- false,
- ),
- );
- $this->isAllGood('startTag', 2, $good);
- // Self-closing tags.
- $withEnd = array(
- '<foo bar="baz"/>' => array(
- 'foo',
- array(
- 'bar' => 'baz',
- ),
- true,
- ),
- '<foo BAR="baz"/>' => array(
- 'foo',
- array(
- 'bar' => 'baz',
- ),
- true,
- ),
- '<foo BAR="BAZ"/>' => array(
- 'foo',
- array(
- 'bar' => 'BAZ',
- ),
- true,
- ),
- "<foo a='1' b=\"2\" c=3 d/>" => array(
- 'foo',
- array(
- 'a' => '1',
- 'b' => '2',
- 'c' => '3',
- 'd' => null,
- ),
- true,
- ),
- );
- $this->isAllGood('startTag', 2, $withEnd);
- // Cause a parse error.
- $bad = array(
- // This will emit an entity lookup failure for &+dark.
- "<foo a='blue&+dark'>" => array(
- 'foo',
- array(
- 'a' => 'blue&+dark',
- ),
- false,
- ),
- '<foo bar=>' => array(
- 'foo',
- array(
- 'bar' => null,
- ),
- false,
- ),
- '<foo bar="oh' => array(
- 'foo',
- array(
- 'bar' => 'oh',
- ),
- false,
- ),
- '<foo bar=oh">' => array(
- 'foo',
- array(
- 'bar' => 'oh"',
- ),
- false,
- ),
- // these attributes are ignored because of current implementation
- // of method "DOMElement::setAttribute"
- // see issue #23: https://github.com/Masterminds/html5-php/issues/23
- '<foo b"="baz">' => array(
- 'foo',
- array(),
- false,
- ),
- '<foo 2abc="baz">' => array(
- 'foo',
- array(),
- false,
- ),
- '<foo ?="baz">' => array(
- 'foo',
- array(),
- false,
- ),
- '<foo foo?bar="baz">' => array(
- 'foo',
- array(),
- false,
- ),
- )
- ;
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(3, $events->depth(), "Counting events for '$test': " . print_r($events, true));
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', $expects, $events->get(1));
- }
- // Cause multiple parse errors.
- $reallyBad = array(
- '<foo ="bar">' => array(
- 'foo',
- array(
- '=' => null,
- '"bar"' => null,
- ),
- false,
- ),
- '<foo////>' => array(
- 'foo',
- array(),
- true,
- ),
- // character "&" in unquoted attribute shouldn't cause an infinite loop
- '<foo bar=index.php?str=1&id=29>' => array(
- 'foo',
- array(
- 'bar' => 'index.php?str=1&id=29',
- ),
- false,
- ),
- );
- foreach ($reallyBad as $test => $expects) {
- $events = $this->parse($test);
- // fprintf(STDOUT, $test . print_r($events, true));
- $this->assertEventError($events->get(0));
- $this->assertEventError($events->get(1));
- // $this->assertEventEquals('startTag', $expects, $events->get(1));
- }
- // Regression: Malformed elements should be detected.
- // '<foo baz="1" <bar></foo>' => array('foo', array('baz' => '1'), false),
- $events = $this->parse('<foo baz="1" <bar></foo>');
- $this->assertEventError($events->get(0));
- $this->assertEventEquals('startTag', array(
- 'foo',
- array(
- 'baz' => '1',
- ),
- false,
- ), $events->get(1));
- $this->assertEventEquals('startTag', array(
- 'bar',
- array(),
- false,
- ), $events->get(2));
- $this->assertEventEquals('endTag', array(
- 'foo',
- ), $events->get(3));
- }
- public function testRawText()
- {
- $good = array(
- '<script>abcd efg hijk lmnop</script> ' => 'abcd efg hijk lmnop',
- '<script><not/><the/><tag></script>' => '<not/><the/><tag>',
- '<script><<<<<<<<</script>' => '<<<<<<<<',
- '<script>hello</script</script>' => 'hello</script',
- "<script>\nhello</script\n</script>" => "\nhello</script\n",
- '<script>&</script>' => '&',
- '<script><!--not a comment--></script>' => '<!--not a comment-->',
- '<script><![CDATA[not a comment]]></script>' => '<![CDATA[not a comment]]>',
- );
- foreach ($good as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEventEquals('startTag', 'script', $events->get(0));
- $this->assertEventEquals('text', $expects, $events->get(1));
- $this->assertEventEquals('endTag', 'script', $events->get(2));
- }
- $bad = array(
- '<script>&</script' => '&</script',
- '<script>Hello world' => 'Hello world',
- );
- foreach ($bad as $test => $expects) {
- $events = $this->parse($test);
- $this->assertEquals(4, $events->depth(), "Counting events for '$test': " . print_r($events, true));
- $this->assertEventEquals('startTag', 'script', $events->get(0));
- $this->assertEventError($events->get(1));
- $this->assertEventEquals('text', $expects, $events->get(2));
- }
- // Testing case sensitivity
- $events = $this->parse('<TITLE>a test</TITLE>');
- $this->assertEventEquals('startTag', 'title', $events->get(0));
- $this->assertEventEquals('text', 'a test', $events->get(1));
- $this->assertEventEquals('endTag', 'title', $events->get(2));
- // Testing end tags with whitespaces
- $events = $this->parse('<title>Whitespaces are tasty</title >');
- $this->assertEventEquals('startTag', 'title', $events->get(0));
- $this->assertEventEquals('text', 'Whitespaces are tasty', $events->get(1));
- $this->assertEventEquals('endTag', 'title', $events->get(2));
- }
- public function testRcdata()
- {
- list($tok, $events) = $this->createTokenizer('<title>'<!-- not a comment --></TITLE>');
- $tok->setTextMode(\Masterminds\HTML5\Elements::TEXT_RCDATA, 'title');
- $tok->parse();
- $this->assertEventEquals('text', "'<!-- not a comment -->", $events->get(1));
- }
- public function testText()
- {
- $events = $this->parse('a<br>b');
- $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('startTag', 'br', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
- $events = $this->parse('<a>Test</a>');
- $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
- $this->assertEventEquals('startTag', 'a', $events->get(0));
- $this->assertEventEquals('text', 'Test', $events->get(1));
- $this->assertEventEquals('endTag', 'a', $events->get(2));
- $events = $this->parse('<p>0</p><p>1</p>');
- $this->assertEquals(7, $events->depth(), 'Events: ' . print_r($events, true));
- $this->assertEventEquals('startTag', 'p', $events->get(0));
- $this->assertEventEquals('text', '0', $events->get(1));
- $this->assertEventEquals('endTag', 'p', $events->get(2));
- $this->assertEventEquals('startTag', 'p', $events->get(3));
- $this->assertEventEquals('text', '1', $events->get(4));
- $this->assertEventEquals('endTag', 'p', $events->get(5));
- $events = $this->parse('a<![CDATA[test]]>b');
- $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('cdata', 'test', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
- $events = $this->parse('a<!--test-->b');
- $this->assertEquals(4, $events->depth(), 'Events: ' . print_r($events, true));
- $this->assertEventEquals('text', 'a', $events->get(0));
- $this->assertEventEquals('comment', 'test', $events->get(1));
- $this->assertEventEquals('text', 'b', $events->get(2));
- $events = $this->parse('a&b');
- $this->assertEquals(2, $events->depth(), 'Events: ' . print_r($events, true));
- $this->assertEventEquals('text', 'a&b', $events->get(0));
- $events = $this->parse('a²b');
- $this->assertEquals(2, $events->depth(), 'Events: ' . print_r($events, true));
- $this->assertEventEquals('text', 'a²b', $events->get(0));
- }
- // ================================================================
- // Utility functions.
- // ================================================================
- protected function createTokenizer($string, $debug = false)
- {
- $eventHandler = new EventStack();
- $scanner = new Scanner($string);
- $scanner->debug = $debug;
- return array(
- new Tokenizer($scanner, $eventHandler),
- $eventHandler,
- );
- }
- public function parse($string, $debug = false)
- {
- list($tok, $events) = $this->createTokenizer($string, $debug);
- $tok->parse();
- return $events;
- }
- }
|