tags.pl 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. use strict;
  2. use v5.10;
  3. =encoding utf8
  4. =head1 NAME
  5. tags - an Oddmuse module that implements tagging of pages and
  6. searching for tagged pages
  7. =head1 SYNOPSIS
  8. This module recognises the pattern C<[[tag:foo]]> on a page and will
  9. render this as a link to all pages tagged foo, as well as a link to
  10. the RSS feed for all pages tagged foo.
  11. Alternatively, the pattern C<[[tag:foo|bar]]> is also recognized. The
  12. only difference is that this will look like a link to bar instead of
  13. foo.
  14. When searching for a term of the form C<tag:foo> the term "foo" be
  15. searched in a separate tag index, making it much faster.
  16. You can also negate this particular form by using C<-tag:foo>.
  17. These searches will also work for Journal Pages, Recent Changes, and
  18. RSS feed.
  19. =head1 INSTALLATION
  20. Installing a module is easy: Create a modules subdirectory in your
  21. data directory, and put the Perl file in there. It will be loaded
  22. automatically.
  23. =cut
  24. AddModuleDescription('tags.pl', 'Tagging Extension');
  25. =head1 CONFIGURATION
  26. =head2 $TagUrl and $TagFeed
  27. These variable will be used to link the tags. By default, they will
  28. point at the wiki itself, using C<$ScriptName>. They use C<%s> as a
  29. placeholder for the tag.
  30. Example:
  31. $TagUrl = 'http://technorati.com/tag/%s';
  32. $TagFeed = 'http://feeds.technorati.com/tag/%s';
  33. By default, these two will point to the list of recent changes,
  34. filtered by the appropriate tag, formatted as HTML or RSS
  35. respectively.
  36. =head2 $TagFeedIcon
  37. This variable should point to an RSS icon. You can get one from
  38. L<http://www.feedicons.com/>, for example.
  39. Example:
  40. $TagFeedIcon = 'http://www.example.org/pics/rss.png';
  41. =head2 $TagCloudSize
  42. The number of most used tags when looking at the tag cloud. The
  43. default is 50.
  44. Example:
  45. $TagCloudSize = 20;
  46. =cut
  47. our ($q, $Now, %Action, %Page, $FreeLinkPattern, @MyInitVariables, @MyRules, @MyAdminCode, $DataDir, $ScriptName);
  48. our ($TagUrl, $TagFeed, $TagFeedIcon, $TagFile, $TagCloudSize);
  49. push(@MyInitVariables, \&TagsInit);
  50. sub TagsInit {
  51. $TagUrl = ScriptUrl('action=rc;rcfilteronly=tag:%s') unless $TagUrl;
  52. $TagFeed = ScriptUrl('action=rss;rcfilteronly=tag:%s') unless $TagFeed;
  53. $TagCloudSize = 50 unless $TagCloudSize;
  54. $TagFile = "$DataDir/tag.db";
  55. }
  56. sub TagsGetLink {
  57. my ($url, $id) = @_;
  58. $id = UrlEncode($id);
  59. $url =~ s/\%s/$id/g or $url .= $id;
  60. return $url;
  61. }
  62. sub TagReadHash {
  63. require Storable;
  64. return %{ Storable::retrieve(encode_utf8($TagFile)) } if IsFile($TagFile);
  65. }
  66. # returns undef if encountering an error
  67. sub TagWriteHash {
  68. my $h = shift;
  69. require Storable;
  70. return Storable::store($h, encode_utf8($TagFile));
  71. }
  72. push(@MyRules, \&TagsRule);
  73. sub TagsRule {
  74. if (m/\G(\[\[tag:$FreeLinkPattern\]\])/cg
  75. or m/\G(\[\[tag:$FreeLinkPattern\|([^]|]+)\]\])/cg) {
  76. # [[tag:Free Link]], [[tag:Free Link|alt text]]
  77. my ($tag, $text) = ($2, $3);
  78. my $html = $q->a({-href=>TagsGetLink($TagUrl, $tag),
  79. -class=>'outside tag',
  80. -title=>T('Tag'),
  81. -rel=>'tag'
  82. }, $text || $tag);
  83. if ($TagFeedIcon) {
  84. $html .= ' ' . $q->a({-href=>TagsGetLink($TagFeed, $tag),
  85. -class=>'feed tag',
  86. -title=>T('Feed for this tag'),
  87. -rel=>'feed'
  88. }, $q->img({-src=>$TagFeedIcon,
  89. -alt=>T('RSS'),
  90. -loading=>'lazy'}));
  91. }
  92. return $html;
  93. }
  94. return;
  95. }
  96. =pod
  97. When saving, a tags db is written to disk. If it doesn't exist, it
  98. will be regenerated.
  99. =cut
  100. *OldTagSave = \&Save;
  101. *Save = \&NewTagSave;
  102. sub NewTagSave { # called within a lock!
  103. OldTagSave(@_);
  104. my $id = shift;
  105. # Within a tag, space is replaced by _ as in foo_bar.
  106. my %tag = map { lc(FreeToNormal($_)) => 1 }
  107. ($Page{text} =~ m/\[\[tag:$FreeLinkPattern\]\]/g,
  108. $Page{text} =~ m/\[\[tag:$FreeLinkPattern\|([^]|]+)\]\]/g);
  109. # open the DB file
  110. my %h = TagReadHash();
  111. # For each tag we list the files tagged. Add the current file for
  112. # all those tags where it is missing.
  113. foreach my $tag (keys %tag) {
  114. my %file = map {$_=>1} @{$h{$tag}};
  115. if (not $file{$id}) {
  116. $file{$id} = 1;
  117. $h{$tag} = [keys %file];
  118. }
  119. }
  120. # For each file in our hash, we have a reverse lookup of all the
  121. # tags used. This allows us to delete the references that no longer
  122. # show up without looping through them all. The files are indexed
  123. # with a starting underscore because this is an illegal tag name.
  124. foreach my $tag (@{$h{"_$id"}}) {
  125. # If the tag we're looking at is no longer listed, we have work to
  126. # do.
  127. if (!$tag{$tag}) {
  128. my %file = map {$_=>1} @{$h{$tag}};
  129. delete $file{$id};
  130. if (%file) {
  131. $h{$tag} = [keys %file];
  132. } else {
  133. delete $h{$tag};
  134. }
  135. }
  136. }
  137. # Store the new reverse lookup of all the tags used on the current
  138. # page. If no more tags appear on this page, delete the entry.
  139. if (%tag) {
  140. $h{"_$id"} = [keys %tag];
  141. } else {
  142. delete $h{"_$id"};
  143. }
  144. TagWriteHash(\%h);
  145. }
  146. =pod
  147. When a page expires, the relevant pages and references have to be
  148. removed from the tags db.
  149. =cut
  150. *OldTagDeletePage = \&DeletePage;
  151. *DeletePage = \&NewTagDeletePage;
  152. sub NewTagDeletePage { # called within a lock!
  153. my $id = shift;
  154. # open the DB file
  155. my %h = TagReadHash();
  156. # For each file in our hash, we have a reverse lookup of all the
  157. # tags used. This allows us to delete the references that no longer
  158. # show up without looping through them all.
  159. foreach my $tag (@{$h{"_$id"}}) {
  160. my %file = map {$_=>1} @{$h{$tag}};
  161. delete $file{$id};
  162. if (%file) {
  163. $h{$tag} = [keys %file];
  164. } else {
  165. delete $h{$tag};
  166. }
  167. }
  168. # Delete reverse lookup entry.
  169. delete $h{"_$id"};
  170. TagWriteHash(\%h);
  171. # Return any error codes?
  172. return OldTagDeletePage($id, @_);
  173. }
  174. =pod
  175. When searching, the tags db is read and used. This works by scanning
  176. the search string for tag:foo and -tag:bar elements, searching for
  177. those, and then calling the grep filter code with the new list of
  178. pages and a new search term without the tag terms.
  179. =cut
  180. sub TagFind {
  181. my @tags = @_;
  182. # open the DB file
  183. my %h = TagReadHash();
  184. my %page;
  185. foreach my $tag (@tags) {
  186. foreach my $id (@{$h{lc($tag)}}) {
  187. $page{$id} = 1;
  188. }
  189. }
  190. my @result = sort keys %page;
  191. return @result;
  192. }
  193. sub TagsTerms {
  194. my $string = shift;
  195. return grep(/./, $string =~ /\"([^\"]+)\"|(\S+)/g);
  196. }
  197. *OldTagFiltered = \&Filtered;
  198. *Filtered = \&NewTagFiltered;
  199. sub NewTagFiltered { # called within a lock!
  200. my ($string, @pages) = @_;
  201. my %page = map { $_ => 1 } @pages;
  202. # looking at all the "tag:SOME TERMS" and and tag:TERM
  203. my @tagterms = map { FreeToNormal($_) } grep(/^-?tag:/, TagsTerms($string));
  204. my @positives = map {substr($_, 4)} grep(/^tag:/, @tagterms);
  205. my @negatives = map {substr($_, 5)} grep(/^-tag:/, @tagterms);
  206. if (@positives) {
  207. my %found;
  208. foreach my $id (TagFind(@positives)) {
  209. $found{$id} = 1 if $page{$id};
  210. }
  211. %page = %found;
  212. }
  213. # remove the negatives
  214. foreach my $id (TagFind(@negatives)) {
  215. delete $page{$id};
  216. }
  217. # filter out the tags from the search string, and add quotes which might have
  218. # been stripped
  219. $string = join(' ', map { qq{"$_"} } grep(!/^-?tag:/, TagsTerms($string)));
  220. # run the old code for any remaining search terms
  221. return OldTagFiltered($string, sort keys %page);
  222. }
  223. =pod
  224. There remains a problem: The real search code will still be in
  225. operation, and terms of the form -tag:foo will never match. That's why
  226. the code that does the ordinary search has to be changed as well.
  227. We're need to remove all tag terms (again) in order to not confuse it.
  228. =cut
  229. *OldTagSearchString = \&SearchString;
  230. *SearchString = \&NewTagSearchString;
  231. sub NewTagSearchString {
  232. my ($string, @rest) = @_;
  233. # filter out the negative tags from the search string, and add quotes which
  234. # might have been stripped
  235. $string = join(' ', map { NormalToFree($_) } map { qq{"$_"} } grep(!/^-tag:/, TagsTerms($string)));
  236. return 1 unless $string;
  237. return OldTagSearchString($string, @rest);
  238. }
  239. =pod
  240. We also want to provide a visual feedback of tag importance using a
  241. "tag cloud" -- larger font size means that a tag has been used more
  242. often.
  243. =cut
  244. $Action{tagcloud} = \&TagCloud;
  245. sub TagCloud {
  246. print GetHeader('', T('Tag Cloud'), ''),
  247. $q->start_div({-class=>'content cloud'});
  248. require HTML::TagCloud;
  249. my $cloud = HTML::TagCloud->new;
  250. # open the DB file
  251. my %h = TagReadHash();
  252. foreach my $tag (grep !/^_/, keys %h) {
  253. $cloud->add(NormalToFree($tag), "$ScriptName?search=tag:" . UrlEncode($tag), scalar @{$h{$tag}});
  254. }
  255. print $cloud->html_and_css($TagCloudSize);
  256. print '</div>';
  257. PrintFooter();
  258. }
  259. =pod
  260. Finally, we need to provide the means to reindex the entire site. The
  261. Reindex Action will do this. This should only be necessary when you
  262. install the module, and when you suspect that the tag.db is out of
  263. sync such as after a restoration from backup.
  264. Example:
  265. http://example.org/cgi-bin/wiki?action=reindex
  266. =cut
  267. $Action{reindex} = \&DoTagsReindex;
  268. sub DoTagsReindex {
  269. if (not UserIsAdmin()
  270. and IsFile($TagFile)
  271. and $Now - Modified($TagFile) < 0.5) {
  272. ReportError(T('Rebuilding index not done.'), '403 FORBIDDEN',
  273. 0, T('(Rebuilding the index can only be done once every 12 hours.)'));
  274. }
  275. # Request the main lock, because we want to prevent anybody from
  276. # saving while we are reindexing.
  277. RequestLockOrError();
  278. print GetHttpHeader('text/plain');
  279. # open the DB file
  280. require Storable;
  281. my %h = ();
  282. foreach my $id (AllPagesList()) {
  283. print "$id\n";
  284. OpenPage($id);
  285. my %tag = map { lc(FreeToNormal($_)) => 1 }
  286. ($Page{text} =~ m/\[\[tag:$FreeLinkPattern\]\]/g,
  287. $Page{text} =~ m/\[\[tag:$FreeLinkPattern\|([^]|]+)\]\]/g);
  288. next unless %tag;
  289. # For each tag we list the files tagged. Add the current file for
  290. # all tags.
  291. foreach my $tag (keys %tag) {
  292. push(@{$h{$tag}}, $id);
  293. }
  294. # Store the reverse lookup of all the tags used on the current
  295. # page.
  296. $h{"_$id"} = [keys %tag];
  297. }
  298. if (TagWriteHash(\%h)) {
  299. print "Saved tag file.\n";
  300. } else {
  301. print "Error saving tag file.\n";
  302. }
  303. ReleaseLock();
  304. }
  305. =pod
  306. If you want to debug the data structure, use the Tag List Action. All
  307. keys starting with an underscore are pagenames, the others are tags.
  308. Example:
  309. http://example.org/cgi-bin/wiki?action=taglist
  310. =cut
  311. $Action{taglist} = \&TagList;
  312. sub TagList {
  313. print GetHttpHeader('text/plain');
  314. # open the DB file
  315. my %h = TagReadHash();
  316. foreach my $id (sort keys %h) {
  317. print "$id: " . join(', ', @{$h{$id}}) . "\n";
  318. }
  319. TagWriteHash(\%h);
  320. }
  321. =pod
  322. Both these actions are of course available from the Administration
  323. menu.
  324. =cut
  325. push(@MyAdminCode, \&TagsMenu);
  326. sub TagsMenu {
  327. my ($id, $menuref, $restref) = @_;
  328. push(@$menuref,
  329. ScriptLink('action=reindex', T('Rebuild tag index'), 'reindex')
  330. . ', ' . ScriptLink('action=tagcloud', T('tag cloud'), 'tagcloud'));
  331. }
  332. =head1 COPYRIGHT AND LICENSE
  333. Copyright (C) 2005–2019 Alex Schroeder <alex@gnu.org>
  334. This program is free software; you can redistribute it and/or modify
  335. it under the terms of the GNU General Public License as published by
  336. the Free Software Foundation; either version 3 of the License, or (at
  337. your option) any later version.
  338. This program is distributed in the hope that it will be useful, but
  339. WITHOUT ANY WARRANTY; without even the implied warranty of
  340. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  341. General Public License for more details.
  342. You should have received a copy of the GNU General Public License
  343. along with this program. If not, see <http://www.gnu.org/licenses/>.
  344. =cut