markup.pl 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. # Copyright (C) 2004–2015 Alex Schroeder <alex@gnu.org>
  2. #
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 3 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. # GNU General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  15. use strict;
  16. use v5.10;
  17. AddModuleDescription('markup.pl', 'Markup Extension');
  18. our ($q, $bol, @MyRules, %RuleOrder, @MyInitVariables);
  19. our (%MarkupPairs, %MarkupForcedPairs, %MarkupSingles, %MarkupLines,
  20. $MarkupQuotes, $MarkupQuoteTable);
  21. $MarkupQuotes = 1;
  22. # $MarkupQuotes 'hi' "hi" I'm Favored in
  23. # 0 'hi' "hi" I'm Typewriters
  24. # 1 ‘hi’ “hi” I’m Britain and North America
  25. # 2 ‹hi› «hi» I’m France and Italy
  26. # 3 ›hi‹ »hi« I’m Germany
  27. # 4 ‚hi’ „hi” I’m Germany
  28. # 0 1 2 3 4
  29. $MarkupQuoteTable = [[ "'", "'", '"', '"' , "'" ], # 0
  30. ['&#x2018;', '&#x2019;', '&#x201d;', '&#x201c;', '&#x2019;'], # 1
  31. ['&#x2039;', '&#x203a;', '&#x00bb;', '&#x00ab;', '&#x2019;'], # 2
  32. ['&#x203a;', '&#x2039;', '&#x00ab;', '&#x00bb;', '&#x2019;'], # 3
  33. ['&#x201a;', '&#x2018;', '&#x201c;', '&#x201e;', '&#x2019;'], # 4
  34. ];
  35. # $MarkupQuoteTable->[2]->[0] ‹
  36. # $MarkupQuoteTable->[2]->[1] ›
  37. # $MarkupQuoteTable->[2]->[2] »
  38. # $MarkupQuoteTable->[2]->[3] «
  39. # $MarkupQuoteTable->[2]->[4] ’
  40. push(@MyRules, \&MarkupRule);
  41. # The ---- rule in usemod.pl conflicts with the --- rule
  42. $RuleOrder{\&MarkupRule} = 150;
  43. %MarkupPairs = ('*' => 'b',
  44. '/' => 'i',
  45. '_' => ['em', {'style'=>'text-decoration: underline; font-style: normal;'}],
  46. '~' => 'em',
  47. );
  48. %MarkupForcedPairs = ("{{{\n" => ['pre', {}, '}}}'], # don't use undef instead of {}
  49. '##' => 'code',
  50. '%%' => 'span',
  51. '**' => 'b',
  52. '//' => 'i',
  53. '__' => ['em', {'style'=>'text-decoration: underline; font-style: normal;'}],
  54. '~~' => 'em',
  55. );
  56. # This could be done using macros, however: If we convert to the
  57. # numbered entity, the next person editing finds it hard to read. If
  58. # we convert to a unicode character, it is no longer obvious how to
  59. # achieve it.
  60. %MarkupSingles = ('...' => '&#x2026;', # HORIZONTAL ELLIPSIS
  61. '---' => '&#x2014;', # EM DASH
  62. '-- ' => '&#x2013; ', # EN DASH
  63. '-> ' => '&#x2192;&#x00a0;', # RIGHTWARDS ARROW, NO-BREAK SPACE
  64. '<-' => '&#8592;',
  65. '<--' => '&#8592;',
  66. '-->' => '&#x2192;',
  67. '=>' => '&#8658;',
  68. '==>' => '&#8658;',
  69. '<=>' => '&#8660;',
  70. '+/-' => '&#x00b1;',
  71. );
  72. %MarkupLines = ('>' => 'pre',
  73. );
  74. # either a single letter, or a string that begins with a single letter and ends with a non-space
  75. my $words = '([A-Za-z\x{0080}-\x{fffd}](?:[-%.,:;\'"!?0-9 A-Za-z\x{0080}-\x{fffd}]*?[-%.,:;\'"!?0-9A-Za-z\x{0080}-\x{fffd}])?)';
  76. # zero-width assertion to prevent km/h from counting
  77. my $nowordstart = '(?:(?<=[^-0-9A-Za-z\x{0080}-\x{fffd}])|^)';
  78. # zero-width look-ahead assertion to prevent km/h from counting
  79. my $nowordend = '(?=[^-0-9A-Za-z\x{0080}-\x{fffd}]|$)';
  80. my $markup_pairs_re = '';
  81. my $markup_forced_pairs_re = '';
  82. my $markup_singles_re = '';
  83. my $markup_lines_re = '';
  84. # do not add all block elements, because not all of them make sense,
  85. # as they cannot be nested -- thus it would not be possible to put
  86. # list items inside a list element, for example.
  87. my %block_element = map { $_ => 1 } qw(p blockquote address div h1 h2
  88. h3 h4 h5 h6 pre);
  89. # do this later so that the user can customize the vars
  90. push(@MyInitVariables, \&MarkupInit);
  91. sub MarkupInit {
  92. $markup_pairs_re = '\G([' . join('', (map { quotemeta(QuoteHtml($_)) }
  93. keys(%MarkupPairs))) . '])';
  94. $markup_pairs_re = qr/${nowordstart}${markup_pairs_re}${words}\1${nowordend}/;
  95. $markup_forced_pairs_re = '\G(' . join('|', (map { quotemeta(QuoteHtml($_)) }
  96. keys(%MarkupForcedPairs))) . ')';
  97. $markup_forced_pairs_re = qr/$markup_forced_pairs_re/;
  98. $markup_singles_re = '\G(' . join('|', (map { quotemeta(QuoteHtml($_)) }
  99. sort {$b cmp $a} # longer regex first
  100. keys(%MarkupSingles))) . ')';
  101. $markup_singles_re = qr/$markup_singles_re/;
  102. $markup_lines_re = '\G(' . join('|', (map { quotemeta(QuoteHtml($_)) }
  103. keys(%MarkupLines))) . ')(.*\n?)';
  104. $markup_lines_re = qr/$markup_lines_re/;
  105. }
  106. sub MarkupTag {
  107. my ($tag, $str) = @_;
  108. my ($start, $end);
  109. if (ref($tag)) {
  110. my $arrayref = $tag;
  111. my ($tag, $hashref) = @{$arrayref};
  112. my %hash = %{$hashref};
  113. $start = $end = $tag;
  114. foreach my $attr (keys %hash) {
  115. $start .= ' ' . $attr . '="' . $hash{$attr} . '"';
  116. }
  117. } else {
  118. $start = $end = $tag;
  119. }
  120. my $result = "<$start>$str</$end>";
  121. $result = CloseHtmlEnvironments() . $result . AddHtmlEnvironment('p')
  122. if $block_element{$start};
  123. return $result;
  124. }
  125. sub MarkupRule {
  126. if ($bol and %MarkupLines and m/$markup_lines_re/cg) {
  127. my ($tag, $str) = ($1, $2);
  128. $str = $q->span($tag) . $str;
  129. while (m/$markup_lines_re/cg) {
  130. $str .= $q->span($1) . $2;
  131. }
  132. return CloseHtmlEnvironments()
  133. . MarkupTag($MarkupLines{UnquoteHtml($tag)}, $str)
  134. . AddHtmlEnvironment('p');
  135. } elsif (%MarkupSingles and m/$markup_singles_re/cg) {
  136. return $MarkupSingles{UnquoteHtml($1)};
  137. } elsif (%MarkupForcedPairs and m/$markup_forced_pairs_re/cg) {
  138. my $tag = $1;
  139. my $start = $tag;
  140. my $end = $tag;
  141. # handle different end tag
  142. my $data = $MarkupForcedPairs{UnquoteHtml($tag)};
  143. if (ref($data)) {
  144. my @data = @{$data};
  145. $start = $data[0] if $data[0];
  146. $end = $data[2] if $data[2];
  147. }
  148. my $endre = quotemeta($end);
  149. $endre .= '[ \t]*\n?' if $block_element{$start}; # skip trailing whitespace if block
  150. # may match the empty string, or multiple lines, but may not span
  151. # paragraphs.
  152. if ($endre and m/\G$endre/cg) {
  153. return $tag . $end;
  154. } elsif ($tag eq $end && m/\G((:?.+?\n)*?.+?)$endre/cg) { # may not span paragraphs
  155. return MarkupTag($data, $1);
  156. } elsif ($tag ne $end && m/\G((:?.|\n)+?)$endre/cg) {
  157. return MarkupTag($data, $1);
  158. } else {
  159. return $tag;
  160. }
  161. } elsif (%MarkupPairs and m/$markup_pairs_re/cg) {
  162. return MarkupTag($MarkupPairs{UnquoteHtml($1)}, $2);
  163. } elsif ($MarkupPairs{'/'} and m|\G~/|cg) {
  164. return '~/'; # fix ~/elisp/ example
  165. } elsif ($MarkupPairs{'/'} and m|\G(/[-A-Za-z0-9\x{0080}-\x{fffd}/]+/$words/)|cg) {
  166. return $1; # fix /usr/share/lib/! example
  167. }
  168. # "foo
  169. elsif ($MarkupQuotes and (m/\G(?<=[[:space:]])"/cg
  170. or pos == 0 and m/\G"/cg)) {
  171. return $MarkupQuoteTable->[$MarkupQuotes]->[3];
  172. }
  173. # foo"
  174. elsif ($MarkupQuotes and (m/\G"(?=[[:space:][:punct:]])/cg
  175. or m/\G"\z/cg)) {
  176. return $MarkupQuoteTable->[$MarkupQuotes]->[2];
  177. }
  178. # foo."
  179. elsif ($MarkupQuotes and (m/\G(?<=[[:punct:]])"/cg)) {
  180. return $MarkupQuoteTable->[$MarkupQuotes]->[3];
  181. }
  182. # single quotes at the beginning of the buffer
  183. elsif ($MarkupQuotes and pos == 0 and m/\G'/cg) {
  184. return $MarkupQuoteTable->[$MarkupQuotes]->[0];
  185. }
  186. # 'foo
  187. elsif ($MarkupQuotes and (m/\G(?<=[[:space:]])'/cg
  188. or pos == 0 and m/\G'/cg)) {
  189. return $MarkupQuoteTable->[$MarkupQuotes]->[0];
  190. }
  191. # foo'
  192. elsif ($MarkupQuotes and (m/\G'(?=[[:space:][:punct:]])/cg
  193. or m/\G'\z/cg)) {
  194. return $MarkupQuoteTable->[$MarkupQuotes]->[1];
  195. }
  196. # foo's
  197. elsif ($MarkupQuotes and m/\G(?<![[:space:]])'(?![[:space:][:punct:]])/cg) {
  198. return $MarkupQuoteTable->[$MarkupQuotes]->[4];
  199. }
  200. return;
  201. }