markdown-rule.pl 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. #! /usr/bin/perl
  2. # Copyright (C) 2014–2022 Alex Schroeder <alex@gnu.org>
  3. # This program is free software: you can redistribute it and/or modify it under
  4. # the terms of the GNU General Public License as published by the Free Software
  5. # Foundation, either version 3 of the License, or (at your option) any later
  6. # version.
  7. #
  8. # This program is distributed in the hope that it will be useful, but WITHOUT
  9. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  10. # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
  11. #
  12. # You should have received a copy of the GNU General Public License along with
  13. # this program. If not, see <http://www.gnu.org/licenses/>.
  14. use strict;
  15. use v5.10;
  16. AddModuleDescription('markdown-rule.pl', 'Markdown Rule Extension');
  17. our ($q, $bol, %RuleOrder, @MyRules, $UrlProtocols, $FullUrlPattern, @HtmlStack, $Fragment);
  18. push(@MyRules, \&MarkdownRule);
  19. # Since we want this package to be a simple add-on, we try and avoid
  20. # all conflicts by going *last*. The use of # for numbered lists by
  21. # Usemod conflicts with the use of # for headings, for example.
  22. $RuleOrder{\&MarkdownRule} = 200;
  23. # http://daringfireball.net/projects/markdown/syntax
  24. # https://help.github.com/articles/markdown-basics
  25. # https://help.github.com/articles/github-flavored-markdown
  26. sub MarkdownRule {
  27. my $alignment;
  28. # \escape
  29. if (m/\G\\([-#>*`=])/cg) {
  30. return $1;
  31. }
  32. # atx headers
  33. elsif ($bol and m~\G(\s*\n)*(#{1,6})[ \t]*~cg) {
  34. my $header_depth = length($2);
  35. return CloseHtmlEnvironments()
  36. . AddHtmlEnvironment("h" . $header_depth);
  37. }
  38. # end atx header at a newline
  39. elsif ((InElement('h1') or InElement('h2') or InElement('h3') or
  40. InElement('h4') or InElement('h5') or InElement('h6'))
  41. and m/\G\n/cg) {
  42. return CloseHtmlEnvironments()
  43. . AddHtmlEnvironment("p");
  44. }
  45. # > blockquote
  46. # with continuation
  47. elsif ($bol and m/\G((?:&gt;.*\n?)+)/cg) {
  48. Clean(CloseHtmlEnvironments());
  49. Dirty($1);
  50. my $text = $1;
  51. my ($oldpos, $old_) = ((pos), $_);
  52. print '<blockquote>';
  53. $text =~ s/^&gt; ?//gm;
  54. ApplyRules($text, 1, 1, undef, 'p'); # local links, anchors, no revision, start with p
  55. print '</blockquote>';
  56. Clean(AddHtmlEnvironment('p')); # if dirty block is looked at later, this will disappear
  57. ($_, pos) = ($old_, $oldpos); # restore \G (assignment order matters!)
  58. }
  59. # """ = blockquote, too
  60. elsif ($bol and m/\G("""[ \t]*\n(.*?)\n"""[ \t]*(?:\n|$))/cgs) {
  61. Clean(CloseHtmlEnvironments());
  62. Dirty($1);
  63. my ($oldpos, $old_) = ((pos), $_);
  64. print '<blockquote>';
  65. ApplyRules($2, 1, 1, undef, 'p'); # local links, anchors, no revision, start with p
  66. print '</blockquote>';
  67. Clean(AddHtmlEnvironment('p')); # if dirty block is looked at later, this will disappear
  68. ($_, pos) = ($old_, $oldpos); # restore \G (assignment order matters!)
  69. }
  70. # ``` = code
  71. elsif ($bol and m/\G```[ \t]*\n(.*?)\n```[ \t]*(\n|$)/cgs) {
  72. return CloseHtmlEnvironments() . $q->pre($1)
  73. . AddHtmlEnvironment("p");
  74. }
  75. # ` = code may not start with a newline
  76. elsif (m/\G`([^\n`][^`]*)`/cg) {
  77. return $q->code($1);
  78. }
  79. # ***bold and italic***
  80. elsif (not InElement('strong') and not InElement('em') and m/\G\*\*\*/cg) {
  81. return AddHtmlEnvironment('em') . AddHtmlEnvironment('strong');
  82. }
  83. elsif (InElement('strong') and InElement('em') and m/\G\*\*\*/cg) {
  84. return CloseHtmlEnvironment('strong') . CloseHtmlEnvironment('em');
  85. }
  86. # **bold**
  87. elsif (m/\G\*\*/cg) {
  88. return AddOrCloseHtmlEnvironment('strong');
  89. }
  90. # *italic* (closing before adding environment!)
  91. elsif (InElement('em') and m/\G\*/cg) {
  92. return CloseHtmlEnvironment('em');
  93. }
  94. elsif ($bol and m/\G\*/cg or m/\G(?<=\P{Word})\*/cg) {
  95. return AddHtmlEnvironment('em');
  96. }
  97. # ~~strikethrough~~ (deleted)
  98. elsif (m/\G~~/cg) {
  99. return AddOrCloseHtmlEnvironment('del');
  100. }
  101. # indented lists = nested lists
  102. elsif ($bol and m/\G(\s*\n)*()([*-]|\d+\.)[ \t]+/cg
  103. or InElement('li') && m/\G(\s*\n)+( *)([*-]|\d+\.)[ \t]+/cg) {
  104. my $nesting_goal = int(length($2)/4) + 1;
  105. my $tag = ($3 eq '*' or $3 eq '-') ? 'ul' : 'ol';
  106. my $nesting_current = 0;
  107. my @nesting = grep(/^[uo]l$/, @HtmlStack);
  108. my $html = CloseHtmlEnvironmentUntil('li'); # but don't close li element
  109. # warn "\@nesting is (@nesting)\n";
  110. # warn " goal is $nesting_goal\n";
  111. # warn " tag is $3 > $tag\n";
  112. while (@nesting > $nesting_goal) {
  113. $html .= CloseHtmlEnvironment(pop(@nesting));
  114. # warn " pop\n";
  115. }
  116. # if have the correct nesting level, but the wrong type, close it
  117. if (@nesting == $nesting_goal
  118. and $nesting[$#nesting] ne $tag) {
  119. $html .= CloseHtmlEnvironment(pop(@nesting));
  120. # warn " switch\n";
  121. }
  122. # now add a list of the appropriate type
  123. if (@nesting < $nesting_goal) {
  124. $html .= AddHtmlEnvironment($tag);
  125. # warn " add $tag\n";
  126. }
  127. # and a new list item
  128. if (InElement('li')) {
  129. $html .= CloseHtmlEnvironmentUntil($nesting[$#nesting]);
  130. # warn " close li\n";
  131. }
  132. $html .= AddHtmlEnvironment('li');
  133. # warn " add li\n";
  134. return $html;
  135. }
  136. # beginning of a table
  137. elsif ($bol and !InElement('table') and m/\G\|/cg) {
  138. # warn pos . " beginning of a table";
  139. $alignment = 'style="text-align: right"' if m/\G([ \t]+)/cg;
  140. $alignment = 'style="text-align: center"' if $alignment and m/\G(?=[^|]+[ \t]+\|)/cg;
  141. $Fragment =~ s/[ \t]+$//; # cleanup trailing whitespace if previous column was centered
  142. return OpenHtmlEnvironment('table',1)
  143. . AddHtmlEnvironment('tr')
  144. . AddHtmlEnvironment('th', $alignment);
  145. }
  146. # end of a row and beginning of a new row
  147. elsif (InElement('table') and m/\G\|?\n\|/cg) {
  148. # warn pos . " end of a row and beginning of a new row";
  149. $alignment = 'style="text-align: right"' if m/\G([ \t]+)/cg;
  150. $alignment = 'style="text-align: center"' if $alignment and m/\G(?=[^|]+[ \t]+\|)/cg;
  151. $Fragment =~ s/[ \t]+$//; # cleanup trailing whitespace if previous column was centered
  152. return CloseHtmlEnvironment('tr')
  153. . AddHtmlEnvironment('tr')
  154. . AddHtmlEnvironment('td', $alignment);
  155. }
  156. # otherwise the table ends
  157. elsif (InElement('table') and m/\G\|?(\n|$)/cg) {
  158. # warn pos . " otherwise the table ends";
  159. $Fragment =~ s/[ \t]+$//; # cleanup trailing whitespace if previous column was centered
  160. return CloseHtmlEnvironment('table')
  161. . AddHtmlEnvironment('p');
  162. }
  163. # continuation of the first row
  164. elsif (InElement('th') and m/\G\|/cg) {
  165. # warn pos . " continuation of the first row";
  166. $alignment = 'style="text-align: right"' if m/\G([ \t]+)/cg;
  167. $alignment = 'style="text-align: center"' if $alignment and m/\G(?=[^|]+[ \t]+\|)/cg;
  168. $Fragment =~ s/[ \t]+$//; # cleanup trailing whitespace if previous column was centered
  169. return CloseHtmlEnvironment('th')
  170. . AddHtmlEnvironment('th', $alignment);
  171. }
  172. # continuation of other rows
  173. elsif (InElement('td') and m/\G\|/cg) {
  174. # warn pos . " continuation of other rows";
  175. $alignment = 'style="text-align: right"' if m/\G([ \t]+)/cg;
  176. $alignment = 'style="text-align: center"' if $alignment and m/\G(?=[^|]+[ \t]+\|)/cg;
  177. $Fragment =~ s/[ \t]+$//; # cleanup trailing whitespace if previous column was centered
  178. return CloseHtmlEnvironment('td')
  179. . AddHtmlEnvironment('td', $alignment);
  180. }
  181. # whitespace indentation = code
  182. elsif ($bol and m/\G(\s*\n)*( .+)\n?/cg) {
  183. my $str = substr($2, 4);
  184. while (m/\G( .*)\n?/cg) {
  185. $str .= "\n" . substr($1, 4);
  186. }
  187. return OpenHtmlEnvironment('pre',1) . $str; # always level 1
  188. }
  189. # link: [an example](http://example.com/ "Title")
  190. elsif (m/\G\[((?:[^]\n]+\n?)+)\]\((\S+)(\s+"(.+?)")?\)/cg) {
  191. my ($text, $url, $title) = ($1, $2, $4);
  192. my %params;
  193. $params{-href} = $url;
  194. $params{-class} = "url";
  195. $params{-title} = $title if $title;
  196. return $q->a(\%params, $text);
  197. }
  198. # link: [an example](#foo "Title")
  199. elsif (m/\G\[((?:[^]\n]+\n?)+)\]\((#\S)+(\s+"(.+?)")?\)/cg) {
  200. my ($text, $url, $title) = ($1, $2, $4);
  201. my %params;
  202. $params{-href} = $url;
  203. $params{-class} = "named-anchor";
  204. $params{-title} = $title if $title;
  205. return $q->a(\%params, $text);
  206. }
  207. # setext headers (must come after block quotes)
  208. elsif ($bol and m/\G((\s*\n)*(.+?)[ \t]*\n(-+|=+)[ \t]*\n)/cg) {
  209. return CloseHtmlEnvironments()
  210. . (substr($4,0,1) eq '=' ? $q->h2($3) : $q->h3($3))
  211. . AddHtmlEnvironment('p');
  212. }
  213. return;
  214. }
  215. push(@MyRules, \&MarkdownExtraRule);
  216. sub MarkdownExtraRule {
  217. # __italic underline__
  218. if (m/\G__/cg) {
  219. return AddOrCloseHtmlEnvironment('em', 'style="font-style: normal; text-decoration: underline"');
  220. }
  221. # _underline_ (closing before adding environment!)
  222. elsif (InElement('em', 'style="font-style: normal; text-decoration: underline"') and m/\G_/cg) {
  223. return CloseHtmlEnvironment('em');
  224. }
  225. elsif ($bol and m/\G_/cg or m/\G(?<=\P{Word})_(?=\S)/cg) {
  226. return AddHtmlEnvironment('em', 'style="font-style: normal; text-decoration: underline"');
  227. }
  228. # //italic//
  229. elsif (m/\G\/\//cg) {
  230. return AddOrCloseHtmlEnvironment('em');
  231. }
  232. # /italic/ (closing before adding environment!)
  233. elsif (InElement('em') and m/\G\//cg) {
  234. return CloseHtmlEnvironment('em');
  235. }
  236. elsif ($bol and m/\G\//cg or m/\G(?<=[|[:space:]])\/(?=\S)/cg) {
  237. return AddHtmlEnvironment('em');
  238. }
  239. return;
  240. }