creole.pl 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. #!/usr/bin/env perl
  2. use strict;
  3. use v5.10;
  4. # ====================[ creole.pl ]====================
  5. =head1 NAME
  6. creole - An Oddmuse module for marking up Oddmuse Wiki pages according to the
  7. Wiki Creole standard, a Wiki-agnostic syntax scheme.
  8. =head1 INSTALLATION
  9. creole is easily installable; move this file into the B<wiki/modules/>
  10. directory for your Oddmuse Wiki.
  11. =cut
  12. AddModuleDescription('creole.pl', 'Creole Markup Extension');
  13. our ($q, $bol, %InterSite, $FullUrlPattern, $FreeLinkPattern, $FreeInterLinkPattern, $InterSitePattern, @MyRules, %RuleOrder, @MyInitVariables, @HtmlStack, @HtmlAttrStack);
  14. # ....................{ CONFIGURATION }....................
  15. =head1 CONFIGURATION
  16. creole is easily configurable; set these variables in the B<wiki/config.pl>
  17. file for your Oddmuse Wiki.
  18. =cut
  19. our ($CreoleLineBreaks,
  20. $CreoleTildeAlternative,
  21. $CreoleTableCellsContainBlockLevelElements,
  22. $CreoleDashStyleUnorderedLists);
  23. =head2 $CreoleLineBreaks
  24. A boolean that, if true, causes this extension to convert single newlines in
  25. page text to genuine linebreaks (i.e., the <br> tag) in the HTML for that
  26. page. (If false, this extension consumes single newlines without actually
  27. converting them into anything; they will be ignored, wherever found.)
  28. Irregardless of this booleans setting, this extension always converts two
  29. newlines to a paragraph break (i.e., the <p> tag).
  30. =cut
  31. $CreoleLineBreaks = 0;
  32. =head2 $CreoleTildeAlternative
  33. A boolean that, if true, prevents this extension from consuming the tilde ~
  34. character, when that character appears in front of an a-z, A-Z, or 0-9
  35. character. (If false, this extension consumes such tilde ~ characters.)
  36. =cut
  37. $CreoleTildeAlternative = 0;
  38. =head2 $CreoleTableCellsContainBlockLevelElements
  39. A boolean that, if true, permits table cell markup to embed block level
  40. elements in table cells. (By default, this boolean is false.)
  41. You are encouraged to enable this boolean, as it significantly improves the
  42. "stuff" you can do with Wiki Creole table syntax. For example, enabling this
  43. boolean permits you to embed nested lists in tables.
  44. Block level elements are such "high-level" entities as paragraphs, blockquotes,
  45. list items, and so on. Thus, enabling this boolean permits you to embed multiple
  46. paragraphs, blockquotes, and so on in individual table cells.
  47. Please note: enabling this boolean permits non-conformant syntax -- that is,
  48. syntax which no longer conforms to the Wiki Creole standard. (In general,
  49. unless you have significant amounts of Wiki Creole table markup strictly
  50. conforming to the Wiki Creole standard, this shouldn't be an issue.)
  51. Please note: enabling this boolean also requires you explicitly close the last
  52. table cell of a cell with a "|" character. (This character is optional under
  53. the Wiki Creole standard, but not under this non-conformant alteration.)
  54. =cut
  55. $CreoleTableCellsContainBlockLevelElements = 0;
  56. =head2 $CreoleDashStyleUnorderedLists
  57. A boolean that, if true, permits unordered list items to be prefixed with either
  58. a '-' dash or an '*' asterisk or, if false, requires unordered list items to be
  59. prefixed with an '*' asterisk, only. (By default, this boolean is false.)
  60. Please note: enabling this boolean permits non-conformant syntax -- that is,
  61. syntax which no longer conforms to the Wiki Creole standard. Unless your Wiki
  62. requires it, you are encouraged not to set this boolean.
  63. =cut
  64. $CreoleDashStyleUnorderedLists = 0;
  65. # ....................{ INITIALIZATION }....................
  66. push(@MyInitVariables, \&CreoleInit);
  67. # A boolean that is true if the "creoleaddition.pl" module is also installed.
  68. my $CreoleIsCreoleAddition;
  69. # A boolean set by CreoleRule() to true, if a new table cell has just been
  70. # started. This allows testing, elsewhere, of whether we are at the start of a
  71. # a new table cell. Why test that? Because. If we are indeed at the start of a
  72. # a new table cell, we should behave as if the "$bol" boolean is true: we should
  73. # allow block level elements at the start of this new table cell.
  74. #
  75. # Of course, we have to set this to false immediately after matching past the
  76. # start of that table cell. This is what RunMyRulesCreole() does.
  77. my $CreoleIsTableCellBol;
  78. # A regular expression matching Wiki Creole-style table cells.
  79. my $CreoleTableCellPattern = '[ \t]*(\|+)(=)?\n?([ \t]*)';
  80. # A regular expression matching Wiki Creole-style pipe delimiters in links.
  81. my $CreoleLinkPipePattern = '[ \t]*\|[ \t]*';
  82. # A regular expression matching Wiki Creole-style link text. This expression
  83. # takes into account the fact that such text is always optional.
  84. my $CreoleLinkTextPattern = "($CreoleLinkPipePattern(.+?))?";
  85. # The html tag and string of html tag attributes for the current Creole header.
  86. # This prevents an otherwise necessary, costly evaluation of test statements
  87. # resembling:
  88. #
  89. # if (InElement('h1') or InElement('h2') or InElement('h3') or
  90. # InElement('h4') or InElement('h5') or InElement('h6')) { ... }
  91. #
  92. # As Creole headers cannot span blocks or lines, this should be a safe caching.
  93. my ($CreoleHeaderHtmlTag, $CreoleHeaderHtmlTagAttr);
  94. sub CreoleInit {
  95. $CreoleIsCreoleAddition = defined &CreoleAdditionRule;
  96. $CreoleIsTableCellBol =
  97. $CreoleHeaderHtmlTag =
  98. $CreoleHeaderHtmlTagAttr = '';
  99. # This is the "code magic" enabling block-level elements in multi-line
  100. # table cells.
  101. if ($CreoleTableCellsContainBlockLevelElements) {
  102. SetHtmlEnvironmentContainer('td');
  103. SetHtmlEnvironmentContainer('th');
  104. }
  105. # FIXME: The following changes interfere with the bbcode extension.
  106. # To achieve something similar, we often see sites with an InterMap
  107. # entry called Self, eg. from http://emacswiki.org/InterMap: Self
  108. # /cgi-bin/emacs? -- which allows you to link to Self:action=index.
  109. # Permit page authors to link to URLs resembling:
  110. # "See [[/?action=index|the site map]]."
  111. #
  112. # Which Oddmuse converts to HTML resembling:
  113. # "See <a href="/?action=index">the site map</a>."
  114. #
  115. # When not using this extension, authors must add this Wiki's base URL:
  116. # "See [[http://www.oddmuse.com/cgi-bin/oddmuse?action=index|the site map]]."
  117. # my $UrlChars = '[-a-zA-Z0-9/@=+$_~*.,;:?!\'"()&#%]'; # see RFC 2396
  118. # $FullUrlPattern = "((?:$UrlProtocols:|/)$UrlChars+)";
  119. # Permit page authors to link to other pages having semicolons in their names.
  120. # my $LinkCharsSansZero = "-;,.()' _1-9A-Za-z\x{0080}-\x{fffd}";
  121. # my $LinkChars = $LinkCharsSansZero.'0';
  122. # $FreeLinkPattern = "([$LinkCharsSansZero]|[$LinkChars][$LinkChars]+)";
  123. }
  124. # ....................{ MARKUP }....................
  125. push(@MyRules,
  126. \&CreoleRule,
  127. \&CreoleHeadingRule,
  128. \&CreoleListAndNewLineRule);
  129. # Creole link rules conflict with Oddmuse's default LinkRule.
  130. $RuleOrder{\&CreoleRule} = -10;
  131. # Creole heading rules must come after the TocRule.
  132. $RuleOrder{\&CreoleHeadingRule} = 100;
  133. # List items must come later than MarkupRule because *foo* at the
  134. # beginning of a line should be bold, not the list item foo*. Also,
  135. # newlines must come after list items, otherwise this will add a lot
  136. # of useless "</br>" tags.
  137. $RuleOrder{\&CreoleListAndNewLineRule} = 180;
  138. # Oddmuse's built-in ListRule conflicts with above CreoleListAndNewLineRule.
  139. # Thus, we ensure the latter is applied before the former.
  140. $RuleOrder{\&ListRule} = 190;
  141. =head2 CreoleRule
  142. Handles the large part of Wiki Creole syntax.
  143. Technically, as Oddmuse's default C<LinkRules> function also conflicts with
  144. this extension's link rules and does not comply, in any case, with the Wiki
  145. Creole rules for links, we should also nullify Oddmuse's default C<LinkRules>
  146. function. Sadly, we don't. Why? Since existing Oddmuse Wikis using this
  147. extension depend on Oddmuse's default C<LinkRules> function, and as it's no
  148. terrible harm to let that function be, we have to let it be. Bah!
  149. =cut
  150. sub CreoleRule {
  151. # "$is_interlinking" is a boolean that, if true, indicates this rule should
  152. # make interlinks (i.e., links to Wiki pages on other, external Wikis) and,
  153. # and, if false, should not. (Typically, Oddmuse sets this to false when
  154. # including external HTML pages into local Wiki pages.)
  155. my ($is_interlinking, $is_intraanchoring) = @_;
  156. # horizontal rule
  157. # ----
  158. if ($bol && m/\G[ \t]*----[ \t]*(\n|$)/cg) {
  159. return CloseHtmlEnvironments().$q->hr().AddHtmlEnvironment('p');
  160. }
  161. # {{{
  162. # nowiki block
  163. # }}}
  164. elsif ($bol && m/\G\{\{\{[ \t]*\n(.*?)\n\}\}\}[ \t]*(\n|$)/cgs) {
  165. my $str = $1;
  166. return CloseHtmlEnvironments()
  167. .$q->pre({-class=> 'real'}, $str)
  168. .AddHtmlEnvironment('p');
  169. }
  170. # escape next char (and prevent // in URLs from enabling italics)
  171. # ~
  172. elsif (m/\G(~($FullUrlPattern|\S))/cg) {
  173. return
  174. ($CreoleTildeAlternative and
  175. index( 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  176. .'abcdefghijklmnopqrstuvwxyz'
  177. .'0123456789', $2) != -1)
  178. ? $1 # tilde stays
  179. : $2; # tilde disappears
  180. }
  181. # **bold**
  182. elsif (m/\G\*\*/cg) { return AddOrCloseHtmlEnvironment('strong'); }
  183. # //italic//
  184. elsif (m/\G\/\//cg) { return AddOrCloseHtmlEnvironment('em'); }
  185. # {{{preformatted code}}}
  186. elsif (m/\G\{\{\{(.*?}*)\}\}\}/cg) { return $q->code($1); }
  187. # download: {{pic}} and {{pic|text}}
  188. elsif (m/\G(\{\{$FreeLinkPattern$CreoleLinkTextPattern\}\})/cgs) {
  189. my $text = $4 || $2;
  190. return GetCreoleLinkHtml($1, GetDownloadLink(FreeToNormal($2), 1, undef, $text), $text);
  191. }
  192. # image link: {{url}} and {{url|text}}
  193. elsif (m/\G\{\{$FullUrlPattern$CreoleLinkTextPattern\}\}/cgs) {
  194. return GetCreoleImageHtml(
  195. $q->a({-href=> UnquoteHtml($1),
  196. -class=> 'image outside'},
  197. $q->img({-src=> UnquoteHtml($1),
  198. -alt=> UnquoteHtml($3),
  199. -title=> UnquoteHtml($3),
  200. -class=> 'url outside',
  201. -loading=>'lazy'})));
  202. }
  203. # image link: [[link|{{pic}}]] and [[link|{{pic|text}}]]
  204. elsif (m/\G(\[\[$FreeLinkPattern$CreoleLinkPipePattern
  205. \{\{$FreeLinkPattern$CreoleLinkTextPattern\}\}\]\])/cgsx) {
  206. my $text = $5 || $2;
  207. return GetCreoleLinkHtml($1, GetCreoleImageHtml(
  208. ScriptLink(UrlEncode(FreeToNormal($2)),
  209. $q->img({-src=> GetDownloadLink(FreeToNormal($3), 2),
  210. -alt=> UnquoteHtml($text),
  211. -title=> UnquoteHtml($text),
  212. -class=> 'upload',
  213. -loading=>'lazy'}), 'image')), $text);
  214. }
  215. # image link: [[link|{{url}}]] and [[link|{{url|text}}]]
  216. elsif (m/\G(\[\[$FreeLinkPattern$CreoleLinkPipePattern
  217. \{\{$FullUrlPattern$CreoleLinkTextPattern\}\}\]\])/cgsx) {
  218. my $text = $5 || $2;
  219. return GetCreoleLinkHtml($1, GetCreoleImageHtml(
  220. ScriptLink(UrlEncode(FreeToNormal($2)),
  221. $q->img({-src=> UnquoteHtml($3),
  222. -alt=> UnquoteHtml($text),
  223. -title=> UnquoteHtml($text),
  224. -class=> 'url outside',
  225. -loading=>'lazy'}), 'image')), $text);
  226. }
  227. # image link: [[url|{{pic}}]] and [[url|{{pic|text}}]]
  228. elsif (m/\G(\[\[$FullUrlPattern$CreoleLinkPipePattern
  229. \{\{$FreeLinkPattern$CreoleLinkTextPattern\}\}\]\])/cgsx) {
  230. my $text = $5 || $2;
  231. return GetCreoleLinkHtml($1, GetCreoleImageHtml(
  232. $q->a({-href=> UnquoteHtml($2), -class=> 'image outside'},
  233. $q->img({-src=> GetDownloadLink(FreeToNormal($3), 2),
  234. -alt=> UnquoteHtml($text),
  235. -title=> UnquoteHtml($text),
  236. -class=> 'upload',
  237. -loading=>'lazy'}))), $text);
  238. }
  239. # image link: [[url|{{url}}]] and [[url|{{url|text}}]]
  240. elsif (m/\G\[\[$FullUrlPattern$CreoleLinkPipePattern
  241. \{\{$FullUrlPattern$CreoleLinkTextPattern\}\}\]\]/cgsx) {
  242. return GetCreoleImageHtml(
  243. $q->a({-href=> UnquoteHtml($1), -class=> 'image outside'},
  244. $q->img({-src=> UnquoteHtml($2),
  245. -alt=> UnquoteHtml($4),
  246. -title=> UnquoteHtml($4),
  247. -class=> 'url outside',
  248. -loading=>'lazy'})));
  249. }
  250. # link: [[url]] and [[url|text]]
  251. elsif (m/\G\[\[$FullUrlPattern$CreoleLinkTextPattern\]\]/cgs) {
  252. # Permit embedding of Creole syntax within link text. (Rather complicated,
  253. # but it does the job remarkably.)
  254. my $link_url = $1;
  255. my $link_text = $3 ? CreoleRuleRecursive($3, @_) : $link_url;
  256. # GetUrl() takes parameters resembling:
  257. # ~ the link's URL.
  258. # ~ the link's text (to be displayed for that URL).
  259. # ~ a boolean (to be used Gods' know how).
  260. return GetUrl($link_url, $link_text, 1);
  261. }
  262. # link: [[page]] and [[page|text]]
  263. elsif (m/\G(\[\[$FreeLinkPattern$CreoleLinkTextPattern\]\])/cgs) {
  264. my $markup = $1;
  265. my $page_name = $2;
  266. my $link_text = $4 ? CreoleRuleRecursive($4, @_) : $page_name;
  267. return GetCreoleLinkHtml($markup,
  268. GetPageOrEditLink($page_name, $link_text, 0, 1), $link_text);
  269. }
  270. # interlink: [[Wiki:page]] and [[Wiki:page|text]]
  271. elsif ($is_interlinking and
  272. m/\G(\[\[$FreeInterLinkPattern$CreoleLinkTextPattern\]\])/cgs) {
  273. my $markup = $1;
  274. my $interlink = $2;
  275. my $interlink_text = $4;
  276. my ($site_name, $page_name) = $interlink =~ m~^($InterSitePattern):(.*)$~;
  277. # Permit embedding of Creole syntax within interlink text. We operate on
  278. # "$interlink_text", rather than "$4", since that ordinal has already been
  279. # overridden by the above regular expression match.
  280. $interlink_text = $interlink_text
  281. ? CreoleRuleRecursive($interlink_text, @_)
  282. : $q->span({-class=> 'site'}, $site_name)
  283. .$q->span({-class=> 'separator'}, ':')
  284. .$q->span({-class=> 'page'}, $page_name);
  285. # If the Wiki for this interlink is a registered Wiki (that is, it appears
  286. # in this Wiki's "$InterMap" page), then produce an interlink to it;
  287. # otherwise, produce a normal intralink to a page on this Wiki.
  288. return GetCreoleLinkHtml($markup,
  289. $InterSite{$site_name}
  290. ? GetInterLink ($interlink, $interlink_text, 0, 1)
  291. : GetPageOrEditLink($page_name, $interlink_text, 0, 1), $interlink_text);
  292. }
  293. #
  294. # Table syntax is matched last (or nearly last), so as to allow other Creole-
  295. # specific syntax within tables.
  296. #
  297. # tables using | -- end of the table (two newlines) or row (one newline)
  298. elsif (InElement('table')) {
  299. # We know that this is the end of this table row, if we match:
  300. # * an explicit "|" character followed by: a newline character and
  301. # another "|" character; or
  302. # * an explicit newline character followed by: a "|" character.
  303. #
  304. # That is to say, the "|" character terminating a table row is optional.
  305. #
  306. # In either case, the newline character signifies the end of this table
  307. # row and the "|" character that follows it signifies the start of a new
  308. # row. We avoid consuming the "|" character by matching it with a lookahead.
  309. if (m/\G([ \t]*\|)?[ \t]*\n(?=$CreoleTableCellPattern)/cg) {
  310. return CloseHtmlEnvironmentUntil('table').AddHtmlEnvironment('tr');
  311. }
  312. # If block level elements are allowed in table cells, we know that this is
  313. # the end of the table, if we match:
  314. # * an explicit "|" character followed by: a newline character not
  315. # followed by another "|" character, or an implicit end-of-page.
  316. #
  317. # Otherwise, we know that this is the end of the table, if we match:
  318. # * an explicit "|" character followed by: a newline character not
  319. # followed by another "|" character, or an implicit end-of-page; or
  320. # * two newline characters.
  321. #
  322. # This condition should appear after the end-of-row test, above.
  323. elsif (m/\G[ \t]*\|[ \t]*(\n|$)/cg or
  324. (!$CreoleTableCellsContainBlockLevelElements and m/\G[ \t]*\n\n/cg)) {
  325. # Note: we do not call "CloseHtmlEnvironmentsCreoleOld", as that function
  326. # refers to the Oddmuse built-in. If another module with name
  327. # lexically following "creole.pl" also redefines the built-in
  328. # "CloseHtmlEnvironments" function, then calling the
  329. # "CloseHtmlEnvironmentsCreoleOld" function causes that other
  330. # module's redefinition to not be called. (Yes; an entangling mess
  331. # we've made for ourselves, here. Clearly, this needs a rethink in
  332. # some later Oddmuse refactoring.)
  333. return CloseHtmlEnvironment('table').AddHtmlEnvironment('p');
  334. }
  335. # Lastly, we know this this is start of a new table cell (and possibly also
  336. # the end of the last table cell), if we match:
  337. # * an explicit "|" character.
  338. #
  339. # This condition should appear after the end-of-table test, above.
  340. elsif (m/\G$CreoleTableCellPattern/cg) {
  341. # This is the start of a new table cell. However, we only consider that
  342. # equivalent to the "$bol" variable when the
  343. # "$CreoleTableCellsContainBlockLevelElements" variable is enabled. (In
  344. # other words, we only declare that we may insert block level elements at
  345. # the start of this new table cell, when we allow block level elements in
  346. # table cells. Yum.)
  347. $CreoleIsTableCellBol = $CreoleTableCellsContainBlockLevelElements;
  348. my $tag = $2 ? 'th' : 'td';
  349. my $column_span = length($1);
  350. my $is_right_justified = $3;
  351. # Now that we've retrieved all numbered matches, match another lookahead.
  352. my $is_left_justified = m/\G(?=[^\n|]*?[ \t]+\|)/;
  353. my $attributes = $column_span == 1 ? '' : qq{colspan="$column_span"};
  354. if ($is_left_justified and
  355. $is_right_justified) { $attributes .= 'align="center"' }
  356. elsif ($is_right_justified) { $attributes .= 'align="right"' }
  357. elsif ($is_left_justified) { $attributes .= 'align="left"' }
  358. return
  359. (InElement('td') || InElement('th') ? CloseHtmlEnvironmentUntil('tr') : '')
  360. .AddHtmlEnvironment($tag, $attributes);
  361. }
  362. }
  363. # tables using | -- an ordinary table cell
  364. #
  365. # Please note that order is important, here; this should appear after all
  366. # markup dependent on being in a current table.
  367. #
  368. # Also, the "|" character also signifies the start of a new table cell. Thus,
  369. # we avoid consuming that character by matching it with a lookahead.
  370. elsif ($bol and m/\G(?=$CreoleTableCellPattern)/cg) {
  371. return OpenHtmlEnvironment('table', 1, 'user').AddHtmlEnvironment('tr');
  372. }
  373. return;
  374. }
  375. sub CreoleHeadingRule {
  376. # header opening: = to ====== for h1 to h6
  377. #
  378. # header opening and closing have been partitioned into two separate
  379. # conditional matches rather than congealed into one conditional match. Why?
  380. # Because, in so doing, we permit application of other markup rules,
  381. # elsewhere, to header text. This, in turn, permits insertion and
  382. # interpretation of complex markup in header text; e.g.,
  383. # == //This Is a **Level-2** Header %%Having Complex Markup%%.// ==
  384. if ($bol and m~\G(\s*\n)*(=+)[ \t]*~cg) {
  385. my $header_depth = length($2);
  386. ($CreoleHeaderHtmlTag, $CreoleHeaderHtmlTagAttr) = $header_depth <= 6
  387. ? ('h'.$header_depth, '')
  388. : ('h6', qq{class="h$header_depth"});
  389. return CloseHtmlEnvironments()
  390. . AddHtmlEnvironment($CreoleHeaderHtmlTag, $CreoleHeaderHtmlTagAttr);
  391. }
  392. # header closing: = to ======, newline, or EOF
  393. #
  394. # Note: partitioning this from the heading opening conditional, above,
  395. # typically causes Oddmuse to insert an extraneous space at the end of
  396. # header tags. This is non-dangerous, fortunately; and changes nothing.
  397. elsif ($CreoleHeaderHtmlTag and m~\G[ \t]*=*[ \t]*(\n|$)~cg) {
  398. my $header_html =
  399. CloseHtmlEnvironment($CreoleHeaderHtmlTag, '^'.$CreoleHeaderHtmlTagAttr.'$')
  400. .AddHtmlEnvironment('p');
  401. $CreoleHeaderHtmlTag = $CreoleHeaderHtmlTagAttr = '';
  402. return $header_html;
  403. }
  404. return;
  405. }
  406. sub CreoleListAndNewLineRule {
  407. my $is_in_list_item = InElement('li');
  408. # # numbered list
  409. # * bullet list (nestable; needs space when nested to disambiguate from bold)
  410. if (($bol and m/\G[ \t]*([#*])[ \t]*/cg) or
  411. ($is_in_list_item and m/\G[ \t]*\n+[ \t]*(#+)[ \t]*/cg) or
  412. ($is_in_list_item and m/\G[ \t]*\n+[ \t]*(\*+)[ \t]+/cg)) {
  413. # Note: the first line of this return statement is --not-- equivalent to:
  414. # "return CloseHtmlEnvironmentUntil('li')", as that line does not permit
  415. # modules overriding the CloseHtmlEnvironments() function to "have a say."
  416. return ($is_in_list_item ? CloseHtmlEnvironmentUntil('li') : CloseHtmlEnvironments())
  417. .OpenHtmlEnvironment(substr($1, 0, 1) eq '#' ? 'ol' : 'ul', length($1), '', 'ol|ul')
  418. .AddHtmlEnvironment('li');
  419. }
  420. # - bullet list (not nestable; always needs space)
  421. elsif ($CreoleDashStyleUnorderedLists and (
  422. ($bol and m/\G[ \t]*(-)[ \t]+/cg) or
  423. ($is_in_list_item and m/\G[ \t]*\n+[ \t]*(-)[ \t]+/cg))) {
  424. return ($is_in_list_item ? CloseHtmlEnvironmentUntil('li') : CloseHtmlEnvironments())
  425. .OpenHtmlEnvironment('ul', length($1))
  426. .AddHtmlEnvironment ('li');
  427. }
  428. # paragraphs: at least two newlines
  429. elsif (m/\G\s*\n(\s*\n)+/cg) {
  430. return CloseHtmlEnvironments().AddHtmlEnvironment('p');
  431. }
  432. # line break: one newline or explicit "\\"
  433. #
  434. # Note, single newlines not matched by this conditional will be converted into
  435. # a single space. (In general, this is what you want.)
  436. elsif (($CreoleLineBreaks and m/\G\s*\n/cg) or m/\G\\\\(\s*\n?)/cg) {
  437. return $q->br();
  438. }
  439. return;
  440. }
  441. # ....................{ HTML }....................
  442. =head2 GetCreoleImageHtml
  443. Returns the passed HTML image, conditionally wrapped within an HTML paragraph
  444. tag having an necessary image class when the passed HTML also represents such a
  445. new paragraph. Difficult to explain, isn't she?
  446. =cut
  447. sub GetCreoleImageHtml {
  448. my $image_html = shift;
  449. return
  450. ($bol ? CloseHtmlEnvironments().AddHtmlEnvironment('p', 'class="image"') : '')
  451. .$image_html;
  452. }
  453. =head2 GetCreoleLinkHtml
  454. Marks the passed HTML as a dirty block, unless this HTML belongs to an HTML
  455. header tag. Such tags may not contain dirty blocks! Most Oddmuse modules using
  456. header tags (e.g., "sidebar.pl", "toc.pl") require, as a caching efficiency,
  457. header text to be clean. This is a nearly necessary efficiency, since
  458. regeneration of markup for those modules is an often costly operation. (We
  459. certainly don't want to regenerate the Table of Contents for each page having at
  460. least one header having at least one dirty link whenever an external user browses
  461. to that page!)
  462. Thus, if in a header, this function cleans links out of the passed HTML and
  463. returns the resultant HTML (to the current clean block). Otherwise, this
  464. function appends the resultant HTML to a new dirty block, prints it, and returns
  465. it. (This does not print the resultant HTML when clean, since clean blocks are
  466. printed, automatically, by the next call to C<Dirty>.)
  467. This function, lastly, accepts three function parameters. These are:
  468. =over
  469. =item C<$markup>. (This is the Wiki markup string to be marked as dirty when it
  470. is not embedded in a Creole header.)
  471. =item C<$html>. (This is the HTML string to be marked as dirty when this HTML
  472. is not embedded in a Creole header.)
  473. =item C<$text>. (This is the text string to be marked as clean when this HTML
  474. is embedded within a Creole header.)
  475. =back
  476. Creole functions, above, should **not** call the C<Dirty> function directly.
  477. Rather, they should always call this function...with appropriate parameters.
  478. =cut
  479. sub GetCreoleLinkHtml {
  480. my ($markup, $html, $link_text) = @_;
  481. if ($CreoleHeaderHtmlTag) { return $link_text; }
  482. else {
  483. Dirty($markup);
  484. print $html;
  485. return '';
  486. }
  487. }
  488. # ....................{ FUNCTIONS }....................
  489. *RunMyRulesCreoleOld = \&RunMyRules;
  490. *RunMyRules = \&RunMyRulesCreole;
  491. =head2 RunMyRulesCreole
  492. Runs all markup rules for the current block of page markup. This redefinition
  493. ensures that the beginning of a table cell is considered the beginning of a
  494. block-level element -- that, in other words, the C<$bol> global be set to 1.
  495. If the C<$CreoleTableCellsContainBlockLevelElements> option is set to 0 (the
  496. default), then this function is, effectively, a no-op - and just calls the
  497. default C<RunMyRules> function.
  498. =cut
  499. sub RunMyRulesCreole {
  500. # See documentation for the "$CreoleIsTableCellBol" variable, above.
  501. my $creole_is_table_cell_bol_last = $CreoleIsTableCellBol;
  502. $bol = 1 if $CreoleIsTableCellBol;
  503. my $html = RunMyRulesCreoleOld(@_);
  504. $CreoleIsTableCellBol = '' if $creole_is_table_cell_bol_last;
  505. return $html;
  506. }
  507. =head2 CreoleRuleRecursive
  508. Calls C<CreoleRule> on the passed string, from within some existing call to
  509. C<CreoleRule>. This function ensures, among other safeties, that the
  510. C<CreoleRule> function is not recursed into more than once.
  511. =cut
  512. our $CreoleRuleRecursing; # must have a variable to localize below
  513. sub CreoleRuleRecursive {
  514. my $markup = shift;
  515. return $markup if $CreoleRuleRecursing; # avoid infinite loops
  516. local $CreoleRuleRecursing = 1; # use local for the mod_perl case
  517. local $bol = 0; # prevent block level element handling
  518. # Preserve global variables.
  519. my ($oldpos, $old_) = (pos, $_);
  520. my @oldHtmlStack = @HtmlStack;
  521. my @oldHtmlAttrStack = @HtmlAttrStack;
  522. # Reset global variables.
  523. $_ = $markup;
  524. @HtmlStack = @HtmlAttrStack = ();
  525. my ($html, $html_creole) = ('', '');
  526. # The contents of this loop are, in part, hacked from the guts of Oddmuse's
  527. # ApplyRules() function. We cannot simply call that function, as it "cleans"
  528. # the HTML converted from the text passed to it, rather than returns that
  529. # HTML.
  530. while (1) {
  531. if ($html_creole = CreoleRule(@_) or
  532. ($CreoleIsCreoleAddition and # try "creoleaddition.pl", too.
  533. $html_creole = CreoleAdditionRule(@_))) {
  534. $html .= $html_creole;
  535. }
  536. elsif (m/\G&amp;([a-z]+|#[0-9]+|#x[a-fA-F0-9]+);/cg) { # entity references
  537. $html .= "&$1;";
  538. }
  539. elsif (m/\G\s+/cg) {
  540. $html .= ' ';
  541. }
  542. elsif ( m/\G([A-Za-z\x{0080}-\x{fffd}]+([ \t]+[a-z\x{0080}-\x{fffd}]+)*[ \t]+)/cg
  543. or m/\G([A-Za-z\x{0080}-\x{fffd}]+)/cg
  544. or m/\G(\S)/cg) {
  545. $html .= $1; # multiple words but do not match http://foo
  546. }
  547. else { last; }
  548. }
  549. # Restore global variables, in reverse order.
  550. @HtmlAttrStack = @oldHtmlAttrStack;
  551. @HtmlStack = @oldHtmlStack;
  552. ($_, pos) = ($old_, $oldpos);
  553. # Allow entrance into this function, again.
  554. $CreoleRuleRecursing = 0;
  555. return $html;
  556. }
  557. =head1 COPYRIGHT AND LICENSE
  558. The information below applies to everything in this distribution,
  559. except where noted.
  560. Copyleft 2008 by Brian Curry <http://raiazome.com>.
  561. Copyright 2008 by Weakish Jiang <weakish@gmail.com>.
  562. Copyright 2006, 2007 by Alex Schroeder <alex@gnu.org>.
  563. This program is free software; you can redistribute it and/or modify
  564. it under the terms of the GNU General Public License as published by
  565. the Free Software Foundation; either version 3 of the License, or
  566. (at your option) any later version.
  567. This program is distributed in the hope that it will be useful,
  568. but WITHOUT ANY WARRANTY; without even the implied warranty of
  569. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  570. GNU General Public License for more details.
  571. You should have received a copy of the GNU General Public License
  572. along with this program. If not, see L<http://www.gnu.org/licenses/>.
  573. =cut