MarkdownExtra.php 53 KB


  1. <?php
  2. /**
  3. * Markdown Extra - A text-to-HTML conversion tool for web writers
  4. *
  5. * @package php-markdown
  6. * @author Michel Fortin <michel.fortin@michelf.com>
  7. * @copyright 2004-2019 Michel Fortin <https://michelf.com/projects/php-markdown/>
  8. * @copyright (Original Markdown) 2004-2006 John Gruber <https://daringfireball.net/projects/markdown/>
  9. */
  10. namespace Michelf;
  11. /**
  12. * Markdown Extra Parser Class
  13. */
  14. class MarkdownExtra extends \Michelf\Markdown {
  15. /**
  16. * Configuration variables
  17. */
  18. /**
  19. * Prefix for footnote ids.
  20. * @var string
  21. */
  22. public $fn_id_prefix = "";
  23. /**
  24. * Optional title attribute for footnote links.
  25. * @var string
  26. */
  27. public $fn_link_title = "";
  28. /**
  29. * Optional class attribute for footnote links and backlinks.
  30. * @var string
  31. */
  32. public $fn_link_class = "footnote-ref";
  33. public $fn_backlink_class = "footnote-backref";
  34. /**
  35. * Content to be displayed within footnote backlinks. The default is '↩';
  36. * the U+FE0E on the end is a Unicode variant selector used to prevent iOS
  37. * from displaying the arrow character as an emoji.
  38. * Optionally use '^^' and '%%' to refer to the footnote number and
  39. * reference number respectively. {@see parseFootnotePlaceholders()}
  40. * @var string
  41. */
  42. public $fn_backlink_html = '&#8617;&#xFE0E;';
  43. /**
  44. * Optional title and aria-label attributes for footnote backlinks for
  45. * added accessibility (to ensure backlink uniqueness).
  46. * Use '^^' and '%%' to refer to the footnote number and reference number
  47. * respectively. {@see parseFootnotePlaceholders()}
  48. * @var string
  49. */
  50. public $fn_backlink_title = "";
  51. public $fn_backlink_label = "";
  52. /**
  53. * Class name for table cell alignment (%% replaced left/center/right)
  54. * For instance: 'go-%%' becomes 'go-left' or 'go-right' or 'go-center'
  55. * If empty, the align attribute is used instead of a class name.
  56. * @var string
  57. */
  58. public $table_align_class_tmpl = '';
  59. /**
  60. * Optional class prefix for fenced code block.
  61. * @var string
  62. */
  63. public $code_class_prefix = "";
  64. /**
  65. * Class attribute for code blocks goes on the `code` tag;
  66. * setting this to true will put attributes on the `pre` tag instead.
  67. * @var boolean
  68. */
  69. public $code_attr_on_pre = false;
  70. /**
  71. * Predefined abbreviations.
  72. * @var array
  73. */
  74. public $predef_abbr = array();
  75. /**
  76. * Only convert atx-style headers if there's a space between the header and #
  77. * @var boolean
  78. */
  79. public $hashtag_protection = false;
  80. /**
  81. * Determines whether footnotes should be appended to the end of the document.
  82. * If true, footnote html can be retrieved from $this->footnotes_assembled.
  83. * @var boolean
  84. */
  85. public $omit_footnotes = false;
  86. /**
  87. * After parsing, the HTML for the list of footnotes appears here.
  88. * This is available only if $omit_footnotes == true.
  89. *
  90. * Note: when placing the content of `footnotes_assembled` on the page,
  91. * consider adding the attribute `role="doc-endnotes"` to the `div` or
  92. * `section` that will enclose the list of footnotes so they are
  93. * reachable to accessibility tools the same way they would be with the
  94. * default HTML output.
  95. * @var null|string
  96. */
  97. public $footnotes_assembled = null;
  98. /**
  99. * Parser implementation
  100. */
  101. /**
  102. * Constructor function. Initialize the parser object.
  103. * @return void
  104. */
  105. public function __construct() {
  106. // Add extra escapable characters before parent constructor
  107. // initialize the table.
  108. $this->escape_chars .= ':|';
  109. // Insert extra document, block, and span transformations.
  110. // Parent constructor will do the sorting.
  111. $this->document_gamut += array(
  112. "doFencedCodeBlocks" => 5,
  113. "stripFootnotes" => 15,
  114. "stripAbbreviations" => 25,
  115. "appendFootnotes" => 50,
  116. );
  117. $this->block_gamut += array(
  118. "doFencedCodeBlocks" => 5,
  119. "doTables" => 15,
  120. "doDefLists" => 45,
  121. );
  122. $this->span_gamut += array(
  123. "doFootnotes" => 5,
  124. "doAbbreviations" => 70,
  125. );
  126. $this->enhanced_ordered_list = true;
  127. parent::__construct();
  128. }
  129. /**
  130. * Extra variables used during extra transformations.
  131. * @var array
  132. */
  133. protected $footnotes = array();
  134. protected $footnotes_ordered = array();
  135. protected $footnotes_ref_count = array();
  136. protected $footnotes_numbers = array();
  137. protected $abbr_desciptions = array();
  138. /** @var string */
  139. protected $abbr_word_re = '';
  140. /**
  141. * Give the current footnote number.
  142. * @var integer
  143. */
  144. protected $footnote_counter = 1;
  145. /**
  146. * Ref attribute for links
  147. * @var array
  148. */
  149. protected $ref_attr = array();
  150. /**
  151. * Setting up Extra-specific variables.
  152. */
  153. protected function setup() {
  154. parent::setup();
  155. $this->footnotes = array();
  156. $this->footnotes_ordered = array();
  157. $this->footnotes_ref_count = array();
  158. $this->footnotes_numbers = array();
  159. $this->abbr_desciptions = array();
  160. $this->abbr_word_re = '';
  161. $this->footnote_counter = 1;
  162. $this->footnotes_assembled = null;
  163. foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
  164. if ($this->abbr_word_re)
  165. $this->abbr_word_re .= '|';
  166. $this->abbr_word_re .= preg_quote($abbr_word);
  167. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  168. }
  169. }
  170. /**
  171. * Clearing Extra-specific variables.
  172. */
  173. protected function teardown() {
  174. $this->footnotes = array();
  175. $this->footnotes_ordered = array();
  176. $this->footnotes_ref_count = array();
  177. $this->footnotes_numbers = array();
  178. $this->abbr_desciptions = array();
  179. $this->abbr_word_re = '';
  180. if ( ! $this->omit_footnotes )
  181. $this->footnotes_assembled = null;
  182. parent::teardown();
  183. }
  184. /**
  185. * Extra attribute parser
  186. */
  187. /**
  188. * Expression to use to catch attributes (includes the braces)
  189. * @var string
  190. */
  191. protected $id_class_attr_catch_re = '\{((?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,})[ ]*\}';
  192. /**
  193. * Expression to use when parsing in a context when no capture is desired
  194. * @var string
  195. */
  196. protected $id_class_attr_nocatch_re = '\{(?>[ ]*[#.a-z][-_:a-zA-Z0-9=]+){1,}[ ]*\}';
  197. /**
  198. * Parse attributes caught by the $this->id_class_attr_catch_re expression
  199. * and return the HTML-formatted list of attributes.
  200. *
  201. * Currently supported attributes are .class and #id.
  202. *
  203. * In addition, this method also supports supplying a default Id value,
  204. * which will be used to populate the id attribute in case it was not
  205. * overridden.
  206. * @param string $tag_name
  207. * @param string $attr
  208. * @param mixed $defaultIdValue
  209. * @param array $classes
  210. * @return string
  211. */
  212. protected function doExtraAttributes($tag_name, $attr, $defaultIdValue = null, $classes = array()) {
  213. if (empty($attr) && !$defaultIdValue && empty($classes)) {
  214. return "";
  215. }
  216. // Split on components
  217. preg_match_all('/[#.a-z][-_:a-zA-Z0-9=]+/', $attr, $matches);
  218. $elements = $matches[0];
  219. // Handle classes and IDs (only first ID taken into account)
  220. $attributes = array();
  221. $id = false;
  222. foreach ($elements as $element) {
  223. if ($element[0] === '.') {
  224. $classes[] = substr($element, 1);
  225. } else if ($element[0] === '#') {
  226. if ($id === false) $id = substr($element, 1);
  227. } else if (strpos($element, '=') > 0) {
  228. $parts = explode('=', $element, 2);
  229. $attributes[] = $parts[0] . '="' . $parts[1] . '"';
  230. }
  231. }
  232. if ($id === false || $id === '') {
  233. $id = $defaultIdValue;
  234. }
  235. // Compose attributes as string
  236. $attr_str = "";
  237. if (!empty($id)) {
  238. $attr_str .= ' id="'.$this->encodeAttribute($id) .'"';
  239. }
  240. if (!empty($classes)) {
  241. $attr_str .= ' class="'. implode(" ", $classes) . '"';
  242. }
  243. if (!$this->no_markup && !empty($attributes)) {
  244. $attr_str .= ' '.implode(" ", $attributes);
  245. }
  246. return $attr_str;
  247. }
  248. /**
  249. * Strips link definitions from text, stores the URLs and titles in
  250. * hash references.
  251. * @param string $text
  252. * @return string
  253. */
  254. protected function stripLinkDefinitions($text) {
  255. $less_than_tab = $this->tab_width - 1;
  256. // Link defs are in the form: ^[id]: url "optional title"
  257. $text = preg_replace_callback('{
  258. ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
  259. [ ]*
  260. \n? # maybe *one* newline
  261. [ ]*
  262. (?:
  263. <(.+?)> # url = $2
  264. |
  265. (\S+?) # url = $3
  266. )
  267. [ ]*
  268. \n? # maybe one newline
  269. [ ]*
  270. (?:
  271. (?<=\s) # lookbehind for whitespace
  272. ["(]
  273. (.*?) # title = $4
  274. [")]
  275. [ ]*
  276. )? # title is optional
  277. (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr
  278. (?:\n+|\Z)
  279. }xm',
  280. array($this, '_stripLinkDefinitions_callback'),
  281. $text);
  282. return $text;
  283. }
  284. /**
  285. * Strip link definition callback
  286. * @param array $matches
  287. * @return string
  288. */
  289. protected function _stripLinkDefinitions_callback($matches) {
  290. $link_id = strtolower($matches[1]);
  291. $url = $matches[2] == '' ? $matches[3] : $matches[2];
  292. $this->urls[$link_id] = $url;
  293. $this->titles[$link_id] =& $matches[4];
  294. $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]);
  295. return ''; // String that will replace the block
  296. }
  297. /**
  298. * HTML block parser
  299. */
  300. /**
  301. * Tags that are always treated as block tags
  302. * @var string
  303. */
  304. protected $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|figure';
  305. /**
  306. * Tags treated as block tags only if the opening tag is alone on its line
  307. * @var string
  308. */
  309. protected $context_block_tags_re = 'script|noscript|style|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video';
  310. /**
  311. * Tags where markdown="1" default to span mode:
  312. * @var string
  313. */
  314. protected $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
  315. /**
  316. * Tags which must not have their contents modified, no matter where
  317. * they appear
  318. * @var string
  319. */
  320. protected $clean_tags_re = 'script|style|math|svg';
  321. /**
  322. * Tags that do not need to be closed.
  323. * @var string
  324. */
  325. protected $auto_close_tags_re = 'hr|img|param|source|track';
  326. /**
  327. * Hashify HTML Blocks and "clean tags".
  328. *
  329. * We only want to do this for block-level HTML tags, such as headers,
  330. * lists, and tables. That's because we still want to wrap <p>s around
  331. * "paragraphs" that are wrapped in non-block-level tags, such as anchors,
  332. * phrase emphasis, and spans. The list of tags we're looking for is
  333. * hard-coded.
  334. *
  335. * This works by calling _HashHTMLBlocks_InMarkdown, which then calls
  336. * _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1"
  337. * attribute is found within a tag, _HashHTMLBlocks_InHTML calls back
  338. * _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
  339. * These two functions are calling each other. It's recursive!
  340. * @param string $text
  341. * @return string
  342. */
  343. protected function hashHTMLBlocks($text) {
  344. if ($this->no_markup) {
  345. return $text;
  346. }
  347. // Call the HTML-in-Markdown hasher.
  348. list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
  349. return $text;
  350. }
  351. /**
  352. * Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
  353. *
  354. * * $indent is the number of space to be ignored when checking for code
  355. * blocks. This is important because if we don't take the indent into
  356. * account, something like this (which looks right) won't work as expected:
  357. *
  358. * <div>
  359. * <div markdown="1">
  360. * Hello World. <-- Is this a Markdown code block or text?
  361. * </div> <-- Is this a Markdown code block or a real tag?
  362. * <div>
  363. *
  364. * If you don't like this, just don't indent the tag on which
  365. * you apply the markdown="1" attribute.
  366. *
  367. * * If $enclosing_tag_re is not empty, stops at the first unmatched closing
  368. * tag with that name. Nested tags supported.
  369. *
  370. * * If $span is true, text inside must treated as span. So any double
  371. * newline will be replaced by a single newline so that it does not create
  372. * paragraphs.
  373. *
  374. * Returns an array of that form: ( processed text , remaining text )
  375. *
  376. * @param string $text
  377. * @param integer $indent
  378. * @param string $enclosing_tag_re
  379. * @param boolean $span
  380. * @return array
  381. */
  382. protected function _hashHTMLBlocks_inMarkdown($text, $indent = 0,
  383. $enclosing_tag_re = '', $span = false)
  384. {
  385. if ($text === '') return array('', '');
  386. // Regex to check for the presense of newlines around a block tag.
  387. $newline_before_re = '/(?:^\n?|\n\n)*$/';
  388. $newline_after_re =
  389. '{
  390. ^ # Start of text following the tag.
  391. (?>[ ]*<!--.*?-->)? # Optional comment.
  392. [ ]*\n # Must be followed by newline.
  393. }xs';
  394. // Regex to match any tag.
  395. $block_tag_re =
  396. '{
  397. ( # $2: Capture whole tag.
  398. </? # Any opening or closing tag.
  399. (?> # Tag name.
  400. ' . $this->block_tags_re . ' |
  401. ' . $this->context_block_tags_re . ' |
  402. ' . $this->clean_tags_re . ' |
  403. (?!\s)'.$enclosing_tag_re . '
  404. )
  405. (?:
  406. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
  407. (?>
  408. ".*?" | # Double quotes (can contain `>`)
  409. \'.*?\' | # Single quotes (can contain `>`)
  410. .+? # Anything but quotes and `>`.
  411. )*?
  412. )?
  413. > # End of tag.
  414. |
  415. <!-- .*? --> # HTML Comment
  416. |
  417. <\?.*?\?> | <%.*?%> # Processing instruction
  418. |
  419. <!\[CDATA\[.*?\]\]> # CData Block
  420. ' . ( !$span ? ' # If not in span.
  421. |
  422. # Indented code block
  423. (?: ^[ ]*\n | ^ | \n[ ]*\n )
  424. [ ]{' . ($indent + 4) . '}[^\n]* \n
  425. (?>
  426. (?: [ ]{' . ($indent + 4) . '}[^\n]* | [ ]* ) \n
  427. )*
  428. |
  429. # Fenced code block marker
  430. (?<= ^ | \n )
  431. [ ]{0,' . ($indent + 3) . '}(?:~{3,}|`{3,})
  432. [ ]*
  433. (?: \.?[-_:a-zA-Z0-9]+ )? # standalone class name
  434. [ ]*
  435. (?: ' . $this->id_class_attr_nocatch_re . ' )? # extra attributes
  436. [ ]*
  437. (?= \n )
  438. ' : '' ) . ' # End (if not is span).
  439. |
  440. # Code span marker
  441. # Note, this regex needs to go after backtick fenced
  442. # code blocks but it should also be kept outside of the
  443. # "if not in span" condition adding backticks to the parser
  444. `+
  445. )
  446. }xs';
  447. $depth = 0; // Current depth inside the tag tree.
  448. $parsed = ""; // Parsed text that will be returned.
  449. // Loop through every tag until we find the closing tag of the parent
  450. // or loop until reaching the end of text if no parent tag specified.
  451. do {
  452. // Split the text using the first $tag_match pattern found.
  453. // Text before pattern will be first in the array, text after
  454. // pattern will be at the end, and between will be any catches made
  455. // by the pattern.
  456. $parts = preg_split($block_tag_re, $text, 2,
  457. PREG_SPLIT_DELIM_CAPTURE);
  458. // If in Markdown span mode, add a empty-string span-level hash
  459. // after each newline to prevent triggering any block element.
  460. if ($span) {
  461. $void = $this->hashPart("", ':');
  462. $newline = "\n$void";
  463. $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
  464. }
  465. $parsed .= $parts[0]; // Text before current tag.
  466. // If end of $text has been reached. Stop loop.
  467. if (count($parts) < 3) {
  468. $text = "";
  469. break;
  470. }
  471. $tag = $parts[1]; // Tag to handle.
  472. $text = $parts[2]; // Remaining text after current tag.
  473. // Check for: Fenced code block marker.
  474. // Note: need to recheck the whole tag to disambiguate backtick
  475. // fences from code spans
  476. if (preg_match('{^\n?([ ]{0,' . ($indent + 3) . '})(~{3,}|`{3,})[ ]*(?:\.?[-_:a-zA-Z0-9]+)?[ ]*(?:' . $this->id_class_attr_nocatch_re . ')?[ ]*\n?$}', $tag, $capture)) {
  477. // Fenced code block marker: find matching end marker.
  478. $fence_indent = strlen($capture[1]); // use captured indent in re
  479. $fence_re = $capture[2]; // use captured fence in re
  480. if (preg_match('{^(?>.*\n)*?[ ]{' . ($fence_indent) . '}' . $fence_re . '[ ]*(?:\n|$)}', $text,
  481. $matches))
  482. {
  483. // End marker found: pass text unchanged until marker.
  484. $parsed .= $tag . $matches[0];
  485. $text = substr($text, strlen($matches[0]));
  486. }
  487. else {
  488. // No end marker: just skip it.
  489. $parsed .= $tag;
  490. }
  491. }
  492. // Check for: Indented code block.
  493. else if ($tag[0] === "\n" || $tag[0] === " ") {
  494. // Indented code block: pass it unchanged, will be handled
  495. // later.
  496. $parsed .= $tag;
  497. }
  498. // Check for: Code span marker
  499. // Note: need to check this after backtick fenced code blocks
  500. else if ($tag[0] === "`") {
  501. // Find corresponding end marker.
  502. $tag_re = preg_quote($tag);
  503. if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)' . $tag_re . '(?!`)}',
  504. $text, $matches))
  505. {
  506. // End marker found: pass text unchanged until marker.
  507. $parsed .= $tag . $matches[0];
  508. $text = substr($text, strlen($matches[0]));
  509. }
  510. else {
  511. // Unmatched marker: just skip it.
  512. $parsed .= $tag;
  513. }
  514. }
  515. // Check for: Opening Block level tag or
  516. // Opening Context Block tag (like ins and del)
  517. // used as a block tag (tag is alone on it's line).
  518. else if (preg_match('{^<(?:' . $this->block_tags_re . ')\b}', $tag) ||
  519. ( preg_match('{^<(?:' . $this->context_block_tags_re . ')\b}', $tag) &&
  520. preg_match($newline_before_re, $parsed) &&
  521. preg_match($newline_after_re, $text) )
  522. )
  523. {
  524. // Need to parse tag and following text using the HTML parser.
  525. list($block_text, $text) =
  526. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
  527. // Make sure it stays outside of any paragraph by adding newlines.
  528. $parsed .= "\n\n$block_text\n\n";
  529. }
  530. // Check for: Clean tag (like script, math)
  531. // HTML Comments, processing instructions.
  532. else if (preg_match('{^<(?:' . $this->clean_tags_re . ')\b}', $tag) ||
  533. $tag[1] === '!' || $tag[1] === '?')
  534. {
  535. // Need to parse tag and following text using the HTML parser.
  536. // (don't check for markdown attribute)
  537. list($block_text, $text) =
  538. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
  539. $parsed .= $block_text;
  540. }
  541. // Check for: Tag with same name as enclosing tag.
  542. else if ($enclosing_tag_re !== '' &&
  543. // Same name as enclosing tag.
  544. preg_match('{^</?(?:' . $enclosing_tag_re . ')\b}', $tag))
  545. {
  546. // Increase/decrease nested tag count.
  547. if ($tag[1] === '/') {
  548. $depth--;
  549. } else if ($tag[strlen($tag)-2] !== '/') {
  550. $depth++;
  551. }
  552. if ($depth < 0) {
  553. // Going out of parent element. Clean up and break so we
  554. // return to the calling function.
  555. $text = $tag . $text;
  556. break;
  557. }
  558. $parsed .= $tag;
  559. }
  560. else {
  561. $parsed .= $tag;
  562. }
  563. } while ($depth >= 0);
  564. return array($parsed, $text);
  565. }
  566. /**
  567. * Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
  568. *
  569. * * Calls $hash_method to convert any blocks.
  570. * * Stops when the first opening tag closes.
  571. * * $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
  572. * (it is not inside clean tags)
  573. *
  574. * Returns an array of that form: ( processed text , remaining text )
  575. * @param string $text
  576. * @param string $hash_method
  577. * @param bool $md_attr Handle `markdown="1"` attribute
  578. * @return array
  579. */
  580. protected function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
  581. if ($text === '') return array('', '');
  582. // Regex to match `markdown` attribute inside of a tag.
  583. $markdown_attr_re = '
  584. {
  585. \s* # Eat whitespace before the `markdown` attribute
  586. markdown
  587. \s*=\s*
  588. (?>
  589. (["\']) # $1: quote delimiter
  590. (.*?) # $2: attribute value
  591. \1 # matching delimiter
  592. |
  593. ([^\s>]*) # $3: unquoted attribute value
  594. )
  595. () # $4: make $3 always defined (avoid warnings)
  596. }xs';
  597. // Regex to match any tag.
  598. $tag_re = '{
  599. ( # $2: Capture whole tag.
  600. </? # Any opening or closing tag.
  601. [\w:$]+ # Tag name.
  602. (?:
  603. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
  604. (?>
  605. ".*?" | # Double quotes (can contain `>`)
  606. \'.*?\' | # Single quotes (can contain `>`)
  607. .+? # Anything but quotes and `>`.
  608. )*?
  609. )?
  610. > # End of tag.
  611. |
  612. <!-- .*? --> # HTML Comment
  613. |
  614. <\?.*?\?> | <%.*?%> # Processing instruction
  615. |
  616. <!\[CDATA\[.*?\]\]> # CData Block
  617. )
  618. }xs';
  619. $original_text = $text; // Save original text in case of faliure.
  620. $depth = 0; // Current depth inside the tag tree.
  621. $block_text = ""; // Temporary text holder for current text.
  622. $parsed = ""; // Parsed text that will be returned.
  623. $base_tag_name_re = '';
  624. // Get the name of the starting tag.
  625. // (This pattern makes $base_tag_name_re safe without quoting.)
  626. if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
  627. $base_tag_name_re = $matches[1];
  628. // Loop through every tag until we find the corresponding closing tag.
  629. do {
  630. // Split the text using the first $tag_match pattern found.
  631. // Text before pattern will be first in the array, text after
  632. // pattern will be at the end, and between will be any catches made
  633. // by the pattern.
  634. $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
  635. if (count($parts) < 3) {
  636. // End of $text reached with unbalenced tag(s).
  637. // In that case, we return original text unchanged and pass the
  638. // first character as filtered to prevent an infinite loop in the
  639. // parent function.
  640. return array($original_text[0], substr($original_text, 1));
  641. }
  642. $block_text .= $parts[0]; // Text before current tag.
  643. $tag = $parts[1]; // Tag to handle.
  644. $text = $parts[2]; // Remaining text after current tag.
  645. // Check for: Auto-close tag (like <hr/>)
  646. // Comments and Processing Instructions.
  647. if (preg_match('{^</?(?:' . $this->auto_close_tags_re . ')\b}', $tag) ||
  648. $tag[1] === '!' || $tag[1] === '?')
  649. {
  650. // Just add the tag to the block as if it was text.
  651. $block_text .= $tag;
  652. }
  653. else {
  654. // Increase/decrease nested tag count. Only do so if
  655. // the tag's name match base tag's.
  656. if (preg_match('{^</?' . $base_tag_name_re . '\b}', $tag)) {
  657. if ($tag[1] === '/') {
  658. $depth--;
  659. } else if ($tag[strlen($tag)-2] !== '/') {
  660. $depth++;
  661. }
  662. }
  663. // Check for `markdown="1"` attribute and handle it.
  664. if ($md_attr &&
  665. preg_match($markdown_attr_re, $tag, $attr_m) &&
  666. preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
  667. {
  668. // Remove `markdown` attribute from opening tag.
  669. $tag = preg_replace($markdown_attr_re, '', $tag);
  670. // Check if text inside this tag must be parsed in span mode.
  671. $mode = $attr_m[2] . $attr_m[3];
  672. $span_mode = $mode === 'span' || ($mode !== 'block' &&
  673. preg_match('{^<(?:' . $this->contain_span_tags_re . ')\b}', $tag));
  674. // Calculate indent before tag.
  675. if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
  676. $strlen = $this->utf8_strlen;
  677. $indent = $strlen($matches[1], 'UTF-8');
  678. } else {
  679. $indent = 0;
  680. }
  681. // End preceding block with this tag.
  682. $block_text .= $tag;
  683. $parsed .= $this->$hash_method($block_text);
  684. // Get enclosing tag name for the ParseMarkdown function.
  685. // (This pattern makes $tag_name_re safe without quoting.)
  686. preg_match('/^<([\w:$]*)\b/', $tag, $matches);
  687. $tag_name_re = $matches[1];
  688. // Parse the content using the HTML-in-Markdown parser.
  689. list ($block_text, $text)
  690. = $this->_hashHTMLBlocks_inMarkdown($text, $indent,
  691. $tag_name_re, $span_mode);
  692. // Outdent markdown text.
  693. if ($indent > 0) {
  694. $block_text = preg_replace("/^[ ]{1,$indent}/m", "",
  695. $block_text);
  696. }
  697. // Append tag content to parsed text.
  698. if (!$span_mode) {
  699. $parsed .= "\n\n$block_text\n\n";
  700. } else {
  701. $parsed .= (string) $block_text;
  702. }
  703. // Start over with a new block.
  704. $block_text = "";
  705. }
  706. else $block_text .= $tag;
  707. }
  708. } while ($depth > 0);
  709. // Hash last block text that wasn't processed inside the loop.
  710. $parsed .= $this->$hash_method($block_text);
  711. return array($parsed, $text);
  712. }
  713. /**
  714. * Called whenever a tag must be hashed when a function inserts a "clean" tag
  715. * in $text, it passes through this function and is automaticaly escaped,
  716. * blocking invalid nested overlap.
  717. * @param string $text
  718. * @return string
  719. */
  720. protected function hashClean($text) {
  721. return $this->hashPart($text, 'C');
  722. }
  723. /**
  724. * Turn Markdown link shortcuts into XHTML <a> tags.
  725. * @param string $text
  726. * @return string
  727. */
  728. protected function doAnchors($text) {
  729. if ($this->in_anchor) {
  730. return $text;
  731. }
  732. $this->in_anchor = true;
  733. // First, handle reference-style links: [link text] [id]
  734. $text = preg_replace_callback('{
  735. ( # wrap whole match in $1
  736. \[
  737. (' . $this->nested_brackets_re . ') # link text = $2
  738. \]
  739. [ ]? # one optional space
  740. (?:\n[ ]*)? # one optional newline followed by spaces
  741. \[
  742. (.*?) # id = $3
  743. \]
  744. )
  745. }xs',
  746. array($this, '_doAnchors_reference_callback'), $text);
  747. // Next, inline-style links: [link text](url "optional title")
  748. $text = preg_replace_callback('{
  749. ( # wrap whole match in $1
  750. \[
  751. (' . $this->nested_brackets_re . ') # link text = $2
  752. \]
  753. \( # literal paren
  754. [ \n]*
  755. (?:
  756. <(.+?)> # href = $3
  757. |
  758. (' . $this->nested_url_parenthesis_re . ') # href = $4
  759. )
  760. [ \n]*
  761. ( # $5
  762. ([\'"]) # quote char = $6
  763. (.*?) # Title = $7
  764. \6 # matching quote
  765. [ \n]* # ignore any spaces/tabs between closing quote and )
  766. )? # title is optional
  767. \)
  768. (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes
  769. )
  770. }xs',
  771. array($this, '_doAnchors_inline_callback'), $text);
  772. // Last, handle reference-style shortcuts: [link text]
  773. // These must come last in case you've also got [link text][1]
  774. // or [link text](/foo)
  775. $text = preg_replace_callback('{
  776. ( # wrap whole match in $1
  777. \[
  778. ([^\[\]]+) # link text = $2; can\'t contain [ or ]
  779. \]
  780. )
  781. }xs',
  782. array($this, '_doAnchors_reference_callback'), $text);
  783. $this->in_anchor = false;
  784. return $text;
  785. }
  786. /**
  787. * Callback for reference anchors
  788. * @param array $matches
  789. * @return string
  790. */
  791. protected function _doAnchors_reference_callback($matches) {
  792. $whole_match = $matches[1];
  793. $link_text = $matches[2];
  794. $link_id =& $matches[3];
  795. if ($link_id == "") {
  796. // for shortcut links like [this][] or [this].
  797. $link_id = $link_text;
  798. }
  799. // lower-case and turn embedded newlines into spaces
  800. $link_id = strtolower($link_id);
  801. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
  802. if (isset($this->urls[$link_id])) {
  803. $url = $this->urls[$link_id];
  804. $url = $this->encodeURLAttribute($url);
  805. $result = "<a href=\"$url\"";
  806. if ( isset( $this->titles[$link_id] ) ) {
  807. $title = $this->titles[$link_id];
  808. $title = $this->encodeAttribute($title);
  809. $result .= " title=\"$title\"";
  810. }
  811. if (isset($this->ref_attr[$link_id]))
  812. $result .= $this->ref_attr[$link_id];
  813. $link_text = $this->runSpanGamut($link_text);
  814. $result .= ">$link_text</a>";
  815. $result = $this->hashPart($result);
  816. }
  817. else {
  818. $result = $whole_match;
  819. }
  820. return $result;
  821. }
  822. /**
  823. * Callback for inline anchors
  824. * @param array $matches
  825. * @return string
  826. */
  827. protected function _doAnchors_inline_callback($matches) {
  828. $link_text = $this->runSpanGamut($matches[2]);
  829. $url = $matches[3] === '' ? $matches[4] : $matches[3];
  830. $title =& $matches[7];
  831. $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]);
  832. // if the URL was of the form <s p a c e s> it got caught by the HTML
  833. // tag parser and hashed. Need to reverse the process before using the URL.
  834. $unhashed = $this->unhash($url);
  835. if ($unhashed !== $url)
  836. $url = preg_replace('/^<(.*)>$/', '\1', $unhashed);
  837. $url = $this->encodeURLAttribute($url);
  838. $result = "<a href=\"$url\"";
  839. if (isset($title)) {
  840. $title = $this->encodeAttribute($title);
  841. $result .= " title=\"$title\"";
  842. }
  843. $result .= $attr;
  844. $link_text = $this->runSpanGamut($link_text);
  845. $result .= ">$link_text</a>";
  846. return $this->hashPart($result);
  847. }
  848. /**
  849. * Turn Markdown image shortcuts into <img> tags.
  850. * @param string $text
  851. * @return string
  852. */
  853. protected function doImages($text) {
  854. // First, handle reference-style labeled images: ![alt text][id]
  855. $text = preg_replace_callback('{
  856. ( # wrap whole match in $1
  857. !\[
  858. (' . $this->nested_brackets_re . ') # alt text = $2
  859. \]
  860. [ ]? # one optional space
  861. (?:\n[ ]*)? # one optional newline followed by spaces
  862. \[
  863. (.*?) # id = $3
  864. \]
  865. )
  866. }xs',
  867. array($this, '_doImages_reference_callback'), $text);
  868. // Next, handle inline images: ![alt text](url "optional title")
  869. // Don't forget: encode * and _
  870. $text = preg_replace_callback('{
  871. ( # wrap whole match in $1
  872. !\[
  873. (' . $this->nested_brackets_re . ') # alt text = $2
  874. \]
  875. \s? # One optional whitespace character
  876. \( # literal paren
  877. [ \n]*
  878. (?:
  879. <(\S*)> # src url = $3
  880. |
  881. (' . $this->nested_url_parenthesis_re . ') # src url = $4
  882. )
  883. [ \n]*
  884. ( # $5
  885. ([\'"]) # quote char = $6
  886. (.*?) # title = $7
  887. \6 # matching quote
  888. [ \n]*
  889. )? # title is optional
  890. \)
  891. (?:[ ]? ' . $this->id_class_attr_catch_re . ' )? # $8 = id/class attributes
  892. )
  893. }xs',
  894. array($this, '_doImages_inline_callback'), $text);
  895. return $text;
  896. }
  897. /**
  898. * Callback for referenced images
  899. * @param array $matches
  900. * @return string
  901. */
  902. protected function _doImages_reference_callback($matches) {
  903. $whole_match = $matches[1];
  904. $alt_text = $matches[2];
  905. $link_id = strtolower($matches[3]);
  906. if ($link_id === "") {
  907. $link_id = strtolower($alt_text); // for shortcut links like ![this][].
  908. }
  909. $alt_text = $this->encodeAttribute($alt_text);
  910. if (isset($this->urls[$link_id])) {
  911. $url = $this->encodeURLAttribute($this->urls[$link_id]);
  912. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  913. if (isset($this->titles[$link_id])) {
  914. $title = $this->titles[$link_id];
  915. $title = $this->encodeAttribute($title);
  916. $result .= " title=\"$title\"";
  917. }
  918. if (isset($this->ref_attr[$link_id])) {
  919. $result .= $this->ref_attr[$link_id];
  920. }
  921. $result .= $this->empty_element_suffix;
  922. $result = $this->hashPart($result);
  923. }
  924. else {
  925. // If there's no such link ID, leave intact:
  926. $result = $whole_match;
  927. }
  928. return $result;
  929. }
  930. /**
  931. * Callback for inline images
  932. * @param array $matches
  933. * @return string
  934. */
  935. protected function _doImages_inline_callback($matches) {
  936. $alt_text = $matches[2];
  937. $url = $matches[3] === '' ? $matches[4] : $matches[3];
  938. $title =& $matches[7];
  939. $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]);
  940. $alt_text = $this->encodeAttribute($alt_text);
  941. $url = $this->encodeURLAttribute($url);
  942. $result = "<img src=\"$url\" alt=\"$alt_text\"";
  943. if (isset($title)) {
  944. $title = $this->encodeAttribute($title);
  945. $result .= " title=\"$title\""; // $title already quoted
  946. }
  947. $result .= $attr;
  948. $result .= $this->empty_element_suffix;
  949. return $this->hashPart($result);
  950. }
  951. /**
  952. * Process markdown headers. Redefined to add ID and class attribute support.
  953. * @param string $text
  954. * @return string
  955. */
  956. protected function doHeaders($text) {
  957. // Setext-style headers:
  958. // Header 1 {#header1}
  959. // ========
  960. //
  961. // Header 2 {#header2 .class1 .class2}
  962. // --------
  963. //
  964. $text = preg_replace_callback(
  965. '{
  966. (^.+?) # $1: Header text
  967. (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes
  968. [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer
  969. }mx',
  970. array($this, '_doHeaders_callback_setext'), $text);
  971. // atx-style headers:
  972. // # Header 1 {#header1}
  973. // ## Header 2 {#header2}
  974. // ## Header 2 with closing hashes ## {#header3.class1.class2}
  975. // ...
  976. // ###### Header 6 {.class2}
  977. //
  978. $text = preg_replace_callback('{
  979. ^(\#{1,6}) # $1 = string of #\'s
  980. [ ]'.($this->hashtag_protection ? '+' : '*').'
  981. (.+?) # $2 = Header text
  982. [ ]*
  983. \#* # optional closing #\'s (not counted)
  984. (?:[ ]+ ' . $this->id_class_attr_catch_re . ' )? # $3 = id/class attributes
  985. [ ]*
  986. \n+
  987. }xm',
  988. array($this, '_doHeaders_callback_atx'), $text);
  989. return $text;
  990. }
  991. /**
  992. * Callback for setext headers
  993. * @param array $matches
  994. * @return string
  995. */
  996. protected function _doHeaders_callback_setext($matches) {
  997. if ($matches[3] === '-' && preg_match('{^- }', $matches[1])) {
  998. return $matches[0];
  999. }
  1000. $level = $matches[3][0] === '=' ? 1 : 2;
  1001. $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[1]) : null;
  1002. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2], $defaultId);
  1003. $block = "<h$level$attr>" . $this->runSpanGamut($matches[1]) . "</h$level>";
  1004. return "\n" . $this->hashBlock($block) . "\n\n";
  1005. }
  1006. /**
  1007. * Callback for atx headers
  1008. * @param array $matches
  1009. * @return string
  1010. */
  1011. protected function _doHeaders_callback_atx($matches) {
  1012. $level = strlen($matches[1]);
  1013. $defaultId = is_callable($this->header_id_func) ? call_user_func($this->header_id_func, $matches[2]) : null;
  1014. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3], $defaultId);
  1015. $block = "<h$level$attr>" . $this->runSpanGamut($matches[2]) . "</h$level>";
  1016. return "\n" . $this->hashBlock($block) . "\n\n";
  1017. }
  1018. /**
  1019. * Form HTML tables.
  1020. * @param string $text
  1021. * @return string
  1022. */
  1023. protected function doTables($text) {
  1024. $less_than_tab = $this->tab_width - 1;
  1025. // Find tables with leading pipe.
  1026. //
  1027. // | Header 1 | Header 2
  1028. // | -------- | --------
  1029. // | Cell 1 | Cell 2
  1030. // | Cell 3 | Cell 4
  1031. $text = preg_replace_callback('
  1032. {
  1033. ^ # Start of a line
  1034. [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
  1035. [|] # Optional leading pipe (present)
  1036. (.+) \n # $1: Header row (at least one pipe)
  1037. [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
  1038. [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
  1039. ( # $3: Cells
  1040. (?>
  1041. [ ]* # Allowed whitespace.
  1042. [|] .* \n # Row content.
  1043. )*
  1044. )
  1045. (?=\n|\Z) # Stop at final double newline.
  1046. }xm',
  1047. array($this, '_doTable_leadingPipe_callback'), $text);
  1048. // Find tables without leading pipe.
  1049. //
  1050. // Header 1 | Header 2
  1051. // -------- | --------
  1052. // Cell 1 | Cell 2
  1053. // Cell 3 | Cell 4
  1054. $text = preg_replace_callback('
  1055. {
  1056. ^ # Start of a line
  1057. [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
  1058. (\S.*[|].*) \n # $1: Header row (at least one pipe)
  1059. [ ]{0,' . $less_than_tab . '} # Allowed whitespace.
  1060. ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline
  1061. ( # $3: Cells
  1062. (?>
  1063. .* [|] .* \n # Row content
  1064. )*
  1065. )
  1066. (?=\n|\Z) # Stop at final double newline.
  1067. }xm',
  1068. array($this, '_DoTable_callback'), $text);
  1069. return $text;
  1070. }
  1071. /**
  1072. * Callback for removing the leading pipe for each row
  1073. * @param array $matches
  1074. * @return string
  1075. */
  1076. protected function _doTable_leadingPipe_callback($matches) {
  1077. $head = $matches[1];
  1078. $underline = $matches[2];
  1079. $content = $matches[3];
  1080. $content = preg_replace('/^ *[|]/m', '', $content);
  1081. return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
  1082. }
  1083. /**
  1084. * Make the align attribute in a table
  1085. * @param string $alignname
  1086. * @return string
  1087. */
  1088. protected function _doTable_makeAlignAttr($alignname) {
  1089. if (empty($this->table_align_class_tmpl)) {
  1090. return " align=\"$alignname\"";
  1091. }
  1092. $classname = str_replace('%%', $alignname, $this->table_align_class_tmpl);
  1093. return " class=\"$classname\"";
  1094. }
  1095. /**
  1096. * Calback for processing tables
  1097. * @param array $matches
  1098. * @return string
  1099. */
  1100. protected function _doTable_callback($matches) {
  1101. $head = $matches[1];
  1102. $underline = $matches[2];
  1103. $content = $matches[3];
  1104. // Remove any tailing pipes for each line.
  1105. $head = preg_replace('/[|] *$/m', '', $head);
  1106. $underline = preg_replace('/[|] *$/m', '', $underline);
  1107. $content = preg_replace('/[|] *$/m', '', $content);
  1108. // Reading alignement from header underline.
  1109. $separators = preg_split('/ *[|] */', $underline);
  1110. foreach ($separators as $n => $s) {
  1111. if (preg_match('/^ *-+: *$/', $s))
  1112. $attr[$n] = $this->_doTable_makeAlignAttr('right');
  1113. else if (preg_match('/^ *:-+: *$/', $s))
  1114. $attr[$n] = $this->_doTable_makeAlignAttr('center');
  1115. else if (preg_match('/^ *:-+ *$/', $s))
  1116. $attr[$n] = $this->_doTable_makeAlignAttr('left');
  1117. else
  1118. $attr[$n] = '';
  1119. }
  1120. // Parsing span elements, including code spans, character escapes,
  1121. // and inline HTML tags, so that pipes inside those gets ignored.
  1122. $head = $this->parseSpan($head);
  1123. $headers = preg_split('/ *[|] */', $head);
  1124. $col_count = count($headers);
  1125. $attr = array_pad($attr, $col_count, '');
  1126. // Write column headers.
  1127. $text = "<table>\n";
  1128. $text .= "<thead>\n";
  1129. $text .= "<tr>\n";
  1130. foreach ($headers as $n => $header) {
  1131. $text .= " <th$attr[$n]>" . $this->runSpanGamut(trim($header)) . "</th>\n";
  1132. }
  1133. $text .= "</tr>\n";
  1134. $text .= "</thead>\n";
  1135. // Split content by row.
  1136. $rows = explode("\n", trim($content, "\n"));
  1137. $text .= "<tbody>\n";
  1138. foreach ($rows as $row) {
  1139. // Parsing span elements, including code spans, character escapes,
  1140. // and inline HTML tags, so that pipes inside those gets ignored.
  1141. $row = $this->parseSpan($row);
  1142. // Split row by cell.
  1143. $row_cells = preg_split('/ *[|] */', $row, $col_count);
  1144. $row_cells = array_pad($row_cells, $col_count, '');
  1145. $text .= "<tr>\n";
  1146. foreach ($row_cells as $n => $cell) {
  1147. $text .= " <td$attr[$n]>" . $this->runSpanGamut(trim($cell)) . "</td>\n";
  1148. }
  1149. $text .= "</tr>\n";
  1150. }
  1151. $text .= "</tbody>\n";
  1152. $text .= "</table>";
  1153. return $this->hashBlock($text) . "\n";
  1154. }
  1155. /**
  1156. * Form HTML definition lists.
  1157. * @param string $text
  1158. * @return string
  1159. */
  1160. protected function doDefLists($text) {
  1161. $less_than_tab = $this->tab_width - 1;
  1162. // Re-usable pattern to match any entire dl list:
  1163. $whole_list_re = '(?>
  1164. ( # $1 = whole list
  1165. ( # $2
  1166. [ ]{0,' . $less_than_tab . '}
  1167. ((?>.*\S.*\n)+) # $3 = defined term
  1168. \n?
  1169. [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
  1170. )
  1171. (?s:.+?)
  1172. ( # $4
  1173. \z
  1174. |
  1175. \n{2,}
  1176. (?=\S)
  1177. (?! # Negative lookahead for another term
  1178. [ ]{0,' . $less_than_tab . '}
  1179. (?: \S.*\n )+? # defined term
  1180. \n?
  1181. [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
  1182. )
  1183. (?! # Negative lookahead for another definition
  1184. [ ]{0,' . $less_than_tab . '}:[ ]+ # colon starting definition
  1185. )
  1186. )
  1187. )
  1188. )'; // mx
  1189. $text = preg_replace_callback('{
  1190. (?>\A\n?|(?<=\n\n))
  1191. ' . $whole_list_re . '
  1192. }mx',
  1193. array($this, '_doDefLists_callback'), $text);
  1194. return $text;
  1195. }
  1196. /**
  1197. * Callback for processing definition lists
  1198. * @param array $matches
  1199. * @return string
  1200. */
  1201. protected function _doDefLists_callback($matches) {
  1202. // Re-usable patterns to match list item bullets and number markers:
  1203. $list = $matches[1];
  1204. // Turn double returns into triple returns, so that we can make a
  1205. // paragraph for the last item in a list, if necessary:
  1206. $result = trim($this->processDefListItems($list));
  1207. $result = "<dl>\n" . $result . "\n</dl>";
  1208. return $this->hashBlock($result) . "\n\n";
  1209. }
  1210. /**
  1211. * Process the contents of a single definition list, splitting it
  1212. * into individual term and definition list items.
  1213. * @param string $list_str
  1214. * @return string
  1215. */
  1216. protected function processDefListItems($list_str) {
  1217. $less_than_tab = $this->tab_width - 1;
  1218. // Trim trailing blank lines:
  1219. $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
  1220. // Process definition terms.
  1221. $list_str = preg_replace_callback('{
  1222. (?>\A\n?|\n\n+) # leading line
  1223. ( # definition terms = $1
  1224. [ ]{0,' . $less_than_tab . '} # leading whitespace
  1225. (?!\:[ ]|[ ]) # negative lookahead for a definition
  1226. # mark (colon) or more whitespace.
  1227. (?> \S.* \n)+? # actual term (not whitespace).
  1228. )
  1229. (?=\n?[ ]{0,3}:[ ]) # lookahead for following line feed
  1230. # with a definition mark.
  1231. }xm',
  1232. array($this, '_processDefListItems_callback_dt'), $list_str);
  1233. // Process actual definitions.
  1234. $list_str = preg_replace_callback('{
  1235. \n(\n+)? # leading line = $1
  1236. ( # marker space = $2
  1237. [ ]{0,' . $less_than_tab . '} # whitespace before colon
  1238. \:[ ]+ # definition mark (colon)
  1239. )
  1240. ((?s:.+?)) # definition text = $3
  1241. (?= \n+ # stop at next definition mark,
  1242. (?: # next term or end of text
  1243. [ ]{0,' . $less_than_tab . '} \:[ ] |
  1244. <dt> | \z
  1245. )
  1246. )
  1247. }xm',
  1248. array($this, '_processDefListItems_callback_dd'), $list_str);
  1249. return $list_str;
  1250. }
  1251. /**
  1252. * Callback for <dt> elements in definition lists
  1253. * @param array $matches
  1254. * @return string
  1255. */
  1256. protected function _processDefListItems_callback_dt($matches) {
  1257. $terms = explode("\n", trim($matches[1]));
  1258. $text = '';
  1259. foreach ($terms as $term) {
  1260. $term = $this->runSpanGamut(trim($term));
  1261. $text .= "\n<dt>" . $term . "</dt>";
  1262. }
  1263. return $text . "\n";
  1264. }
  1265. /**
  1266. * Callback for <dd> elements in definition lists
  1267. * @param array $matches
  1268. * @return string
  1269. */
  1270. protected function _processDefListItems_callback_dd($matches) {
  1271. $leading_line = $matches[1];
  1272. $marker_space = $matches[2];
  1273. $def = $matches[3];
  1274. if ($leading_line || preg_match('/\n{2,}/', $def)) {
  1275. // Replace marker with the appropriate whitespace indentation
  1276. $def = str_repeat(' ', strlen($marker_space)) . $def;
  1277. $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
  1278. $def = "\n". $def ."\n";
  1279. }
  1280. else {
  1281. $def = rtrim($def);
  1282. $def = $this->runSpanGamut($this->outdent($def));
  1283. }
  1284. return "\n<dd>" . $def . "</dd>\n";
  1285. }
  1286. /**
  1287. * Adding the fenced code block syntax to regular Markdown:
  1288. *
  1289. * ~~~
  1290. * Code block
  1291. * ~~~
  1292. *
  1293. * @param string $text
  1294. * @return string
  1295. */
  1296. protected function doFencedCodeBlocks($text) {
  1297. $text = preg_replace_callback('{
  1298. (?:\n|\A)
  1299. # 1: Opening marker
  1300. (
  1301. (?:~{3,}|`{3,}) # 3 or more tildes/backticks.
  1302. )
  1303. [ ]*
  1304. (?:
  1305. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name
  1306. )?
  1307. [ ]*
  1308. (?:
  1309. ' . $this->id_class_attr_catch_re . ' # 3: Extra attributes
  1310. )?
  1311. [ ]* \n # Whitespace and newline following marker.
  1312. # 4: Content
  1313. (
  1314. (?>
  1315. (?!\1 [ ]* \n) # Not a closing marker.
  1316. .*\n+
  1317. )+
  1318. )
  1319. # Closing marker.
  1320. \1 [ ]* (?= \n )
  1321. }xm',
  1322. array($this, '_doFencedCodeBlocks_callback'), $text);
  1323. return $text;
  1324. }
  1325. /**
  1326. * Callback to process fenced code blocks
  1327. * @param array $matches
  1328. * @return string
  1329. */
  1330. protected function _doFencedCodeBlocks_callback($matches) {
  1331. $classname =& $matches[2];
  1332. $attrs =& $matches[3];
  1333. $codeblock = $matches[4];
  1334. if ($this->code_block_content_func) {
  1335. $codeblock = call_user_func($this->code_block_content_func, $codeblock, $classname);
  1336. } else {
  1337. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
  1338. }
  1339. $codeblock = preg_replace_callback('/^\n+/',
  1340. array($this, '_doFencedCodeBlocks_newlines'), $codeblock);
  1341. $classes = array();
  1342. if ($classname !== "") {
  1343. if ($classname[0] === '.') {
  1344. $classname = substr($classname, 1);
  1345. }
  1346. $classes[] = $this->code_class_prefix . $classname;
  1347. }
  1348. $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs, null, $classes);
  1349. $pre_attr_str = $this->code_attr_on_pre ? $attr_str : '';
  1350. $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str;
  1351. $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>";
  1352. return "\n\n".$this->hashBlock($codeblock)."\n\n";
  1353. }
  1354. /**
  1355. * Replace new lines in fenced code blocks
  1356. * @param array $matches
  1357. * @return string
  1358. */
  1359. protected function _doFencedCodeBlocks_newlines($matches) {
  1360. return str_repeat("<br$this->empty_element_suffix",
  1361. strlen($matches[0]));
  1362. }
  1363. /**
  1364. * Redefining emphasis markers so that emphasis by underscore does not
  1365. * work in the middle of a word.
  1366. * @var array
  1367. */
  1368. protected $em_relist = array(
  1369. '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?![\.,:;]?\s)',
  1370. '*' => '(?<![\s*])\*(?!\*)',
  1371. '_' => '(?<![\s_])_(?![a-zA-Z0-9_])',
  1372. );
  1373. protected $strong_relist = array(
  1374. '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?![\.,:;]?\s)',
  1375. '**' => '(?<![\s*])\*\*(?!\*)',
  1376. '__' => '(?<![\s_])__(?![a-zA-Z0-9_])',
  1377. );
  1378. protected $em_strong_relist = array(
  1379. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?![\.,:;]?\s)',
  1380. '***' => '(?<![\s*])\*\*\*(?!\*)',
  1381. '___' => '(?<![\s_])___(?![a-zA-Z0-9_])',
  1382. );
  1383. /**
  1384. * Parse text into paragraphs
  1385. * @param string $text String to process in paragraphs
  1386. * @param boolean $wrap_in_p Whether paragraphs should be wrapped in <p> tags
  1387. * @return string HTML output
  1388. */
  1389. protected function formParagraphs($text, $wrap_in_p = true) {
  1390. // Strip leading and trailing lines:
  1391. $text = preg_replace('/\A\n+|\n+\z/', '', $text);
  1392. $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
  1393. // Wrap <p> tags and unhashify HTML blocks
  1394. foreach ($grafs as $key => $value) {
  1395. $value = trim($this->runSpanGamut($value));
  1396. // Check if this should be enclosed in a paragraph.
  1397. // Clean tag hashes & block tag hashes are left alone.
  1398. $is_p = $wrap_in_p && !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
  1399. if ($is_p) {
  1400. $value = "<p>$value</p>";
  1401. }
  1402. $grafs[$key] = $value;
  1403. }
  1404. // Join grafs in one text, then unhash HTML tags.
  1405. $text = implode("\n\n", $grafs);
  1406. // Finish by removing any tag hashes still present in $text.
  1407. $text = $this->unhash($text);
  1408. return $text;
  1409. }
  1410. /**
  1411. * Footnotes - Strips link definitions from text, stores the URLs and
  1412. * titles in hash references.
  1413. * @param string $text
  1414. * @return string
  1415. */
  1416. protected function stripFootnotes($text) {
  1417. $less_than_tab = $this->tab_width - 1;
  1418. // Link defs are in the form: [^id]: url "optional title"
  1419. $text = preg_replace_callback('{
  1420. ^[ ]{0,' . $less_than_tab . '}\[\^(.+?)\][ ]?: # note_id = $1
  1421. [ ]*
  1422. \n? # maybe *one* newline
  1423. ( # text = $2 (no blank lines allowed)
  1424. (?:
  1425. .+ # actual text
  1426. |
  1427. \n # newlines but
  1428. (?!\[.+?\][ ]?:\s)# negative lookahead for footnote or link definition marker.
  1429. (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed
  1430. # by non-indented content
  1431. )*
  1432. )
  1433. }xm',
  1434. array($this, '_stripFootnotes_callback'),
  1435. $text);
  1436. return $text;
  1437. }
  1438. /**
  1439. * Callback for stripping footnotes
  1440. * @param array $matches
  1441. * @return string
  1442. */
  1443. protected function _stripFootnotes_callback($matches) {
  1444. $note_id = $this->fn_id_prefix . $matches[1];
  1445. $this->footnotes[$note_id] = $this->outdent($matches[2]);
  1446. return ''; // String that will replace the block
  1447. }
  1448. /**
  1449. * Replace footnote references in $text [^id] with a special text-token
  1450. * which will be replaced by the actual footnote marker in appendFootnotes.
  1451. * @param string $text
  1452. * @return string
  1453. */
  1454. protected function doFootnotes($text) {
  1455. if (!$this->in_anchor) {
  1456. $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
  1457. }
  1458. return $text;
  1459. }
  1460. /**
  1461. * Append footnote list to text
  1462. * @param string $text
  1463. * @return string
  1464. */
  1465. protected function appendFootnotes($text) {
  1466. $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
  1467. array($this, '_appendFootnotes_callback'), $text);
  1468. if ( ! empty( $this->footnotes_ordered ) ) {
  1469. $this->_doFootnotes();
  1470. if ( ! $this->omit_footnotes ) {
  1471. $text .= "\n\n";
  1472. $text .= "<div class=\"footnotes\" role=\"doc-endnotes\">\n";
  1473. $text .= "<hr" . $this->empty_element_suffix . "\n";
  1474. $text .= $this->footnotes_assembled;
  1475. $text .= "</div>";
  1476. }
  1477. }
  1478. return $text;
  1479. }
  1480. /**
  1481. * Generates the HTML for footnotes. Called by appendFootnotes, even if
  1482. * footnotes are not being appended.
  1483. * @return void
  1484. */
  1485. protected function _doFootnotes() {
  1486. $attr = array();
  1487. if ($this->fn_backlink_class !== "") {
  1488. $class = $this->fn_backlink_class;
  1489. $class = $this->encodeAttribute($class);
  1490. $attr['class'] = " class=\"$class\"";
  1491. }
  1492. $attr['role'] = " role=\"doc-backlink\"";
  1493. $num = 0;
  1494. $text = "<ol>\n\n";
  1495. while (!empty($this->footnotes_ordered)) {
  1496. $footnote = reset($this->footnotes_ordered);
  1497. $note_id = key($this->footnotes_ordered);
  1498. unset($this->footnotes_ordered[$note_id]);
  1499. $ref_count = $this->footnotes_ref_count[$note_id];
  1500. unset($this->footnotes_ref_count[$note_id]);
  1501. unset($this->footnotes[$note_id]);
  1502. $footnote .= "\n"; // Need to append newline before parsing.
  1503. $footnote = $this->runBlockGamut("$footnote\n");
  1504. $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',
  1505. array($this, '_appendFootnotes_callback'), $footnote);
  1506. $num++;
  1507. $note_id = $this->encodeAttribute($note_id);
  1508. // Prepare backlink, multiple backlinks if multiple references
  1509. // Do not create empty backlinks if the html is blank
  1510. $backlink = "";
  1511. if (!empty($this->fn_backlink_html)) {
  1512. for ($ref_num = 1; $ref_num <= $ref_count; ++$ref_num) {
  1513. if (!empty($this->fn_backlink_title)) {
  1514. $attr['title'] = ' title="' . $this->encodeAttribute($this->fn_backlink_title) . '"';
  1515. }
  1516. if (!empty($this->fn_backlink_label)) {
  1517. $attr['label'] = ' aria-label="' . $this->encodeAttribute($this->fn_backlink_label) . '"';
  1518. }
  1519. $parsed_attr = $this->parseFootnotePlaceholders(
  1520. implode('', $attr),
  1521. $num,
  1522. $ref_num
  1523. );
  1524. $backlink_text = $this->parseFootnotePlaceholders(
  1525. $this->fn_backlink_html,
  1526. $num,
  1527. $ref_num
  1528. );
  1529. $ref_count_mark = $ref_num > 1 ? $ref_num : '';
  1530. $backlink .= " <a href=\"#fnref$ref_count_mark:$note_id\"$parsed_attr>$backlink_text</a>";
  1531. }
  1532. $backlink = trim($backlink);
  1533. }
  1534. // Add backlink to last paragraph; create new paragraph if needed.
  1535. if (!empty($backlink)) {
  1536. if (preg_match('{</p>$}', $footnote)) {
  1537. $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
  1538. } else {
  1539. $footnote .= "\n\n<p>$backlink</p>";
  1540. }
  1541. }
  1542. $text .= "<li id=\"fn:$note_id\" role=\"doc-endnote\">\n";
  1543. $text .= $footnote . "\n";
  1544. $text .= "</li>\n\n";
  1545. }
  1546. $text .= "</ol>\n";
  1547. $this->footnotes_assembled = $text;
  1548. }
  1549. /**
  1550. * Callback for appending footnotes
  1551. * @param array $matches
  1552. * @return string
  1553. */
  1554. protected function _appendFootnotes_callback($matches) {
  1555. $node_id = $this->fn_id_prefix . $matches[1];
  1556. // Create footnote marker only if it has a corresponding footnote *and*
  1557. // the footnote hasn't been used by another marker.
  1558. if (isset($this->footnotes[$node_id])) {
  1559. $num =& $this->footnotes_numbers[$node_id];
  1560. if (!isset($num)) {
  1561. // Transfer footnote content to the ordered list and give it its
  1562. // number
  1563. $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
  1564. $this->footnotes_ref_count[$node_id] = 1;
  1565. $num = $this->footnote_counter++;
  1566. $ref_count_mark = '';
  1567. } else {
  1568. $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1;
  1569. }
  1570. $attr = "";
  1571. if ($this->fn_link_class !== "") {
  1572. $class = $this->fn_link_class;
  1573. $class = $this->encodeAttribute($class);
  1574. $attr .= " class=\"$class\"";
  1575. }
  1576. if ($this->fn_link_title !== "") {
  1577. $title = $this->fn_link_title;
  1578. $title = $this->encodeAttribute($title);
  1579. $attr .= " title=\"$title\"";
  1580. }
  1581. $attr .= " role=\"doc-noteref\"";
  1582. $attr = str_replace("%%", $num, $attr);
  1583. $node_id = $this->encodeAttribute($node_id);
  1584. return
  1585. "<sup id=\"fnref$ref_count_mark:$node_id\">".
  1586. "<a href=\"#fn:$node_id\"$attr>$num</a>".
  1587. "</sup>";
  1588. }
  1589. return "[^" . $matches[1] . "]";
  1590. }
  1591. /**
  1592. * Build footnote label by evaluating any placeholders.
  1593. * - ^^ footnote number
  1594. * - %% footnote reference number (Nth reference to footnote number)
  1595. * @param string $label
  1596. * @param int $footnote_number
  1597. * @param int $reference_number
  1598. * @return string
  1599. */
  1600. protected function parseFootnotePlaceholders($label, $footnote_number, $reference_number) {
  1601. return str_replace(
  1602. array('^^', '%%'),
  1603. array($footnote_number, $reference_number),
  1604. $label
  1605. );
  1606. }
  1607. /**
  1608. * Abbreviations - strips abbreviations from text, stores titles in hash
  1609. * references.
  1610. * @param string $text
  1611. * @return string
  1612. */
  1613. protected function stripAbbreviations($text) {
  1614. $less_than_tab = $this->tab_width - 1;
  1615. // Link defs are in the form: [id]*: url "optional title"
  1616. $text = preg_replace_callback('{
  1617. ^[ ]{0,' . $less_than_tab . '}\*\[(.+?)\][ ]?: # abbr_id = $1
  1618. (.*) # text = $2 (no blank lines allowed)
  1619. }xm',
  1620. array($this, '_stripAbbreviations_callback'),
  1621. $text);
  1622. return $text;
  1623. }
  1624. /**
  1625. * Callback for stripping abbreviations
  1626. * @param array $matches
  1627. * @return string
  1628. */
  1629. protected function _stripAbbreviations_callback($matches) {
  1630. $abbr_word = $matches[1];
  1631. $abbr_desc = $matches[2];
  1632. if ($this->abbr_word_re) {
  1633. $this->abbr_word_re .= '|';
  1634. }
  1635. $this->abbr_word_re .= preg_quote($abbr_word);
  1636. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
  1637. return ''; // String that will replace the block
  1638. }
  1639. /**
  1640. * Find defined abbreviations in text and wrap them in <abbr> elements.
  1641. * @param string $text
  1642. * @return string
  1643. */
  1644. protected function doAbbreviations($text) {
  1645. if ($this->abbr_word_re) {
  1646. // cannot use the /x modifier because abbr_word_re may
  1647. // contain significant spaces:
  1648. $text = preg_replace_callback('{' .
  1649. '(?<![\w\x1A])' .
  1650. '(?:' . $this->abbr_word_re . ')' .
  1651. '(?![\w\x1A])' .
  1652. '}',
  1653. array($this, '_doAbbreviations_callback'), $text);
  1654. }
  1655. return $text;
  1656. }
  1657. /**
  1658. * Callback for processing abbreviations
  1659. * @param array $matches
  1660. * @return string
  1661. */
  1662. protected function _doAbbreviations_callback($matches) {
  1663. $abbr = $matches[0];
  1664. if (isset($this->abbr_desciptions[$abbr])) {
  1665. $desc = $this->abbr_desciptions[$abbr];
  1666. if (empty($desc)) {
  1667. return $this->hashPart("<abbr>$abbr</abbr>");
  1668. }
  1669. $desc = $this->encodeAttribute($desc);
  1670. return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
  1671. }
  1672. return $matches[0];
  1673. }
  1674. }