retag 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. #!/usr/bin/perl -w
  2. #
  3. # Copyright (C) 2007 Alex Schroeder <alex@gnu.org>
  4. #
  5. # This program is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation; either version 3 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. require LWP;
  18. use Getopt::Std;
  19. our ($opt_v, $opt_w, $opt_f);
  20. # We make our own specialization of LWP::UserAgent that asks for
  21. # user/password if document is protected.
  22. {
  23. package RequestAgent;
  24. @ISA = qw(LWP::UserAgent);
  25. sub new {
  26. my $self = LWP::UserAgent::new(@_);
  27. $self;
  28. }
  29. sub get_basic_credentials {
  30. my($self, $realm, $uri) = @_;
  31. return split(':', $main::opt_w, 2);
  32. }
  33. }
  34. my $usage = qq{$0 [-i URL] [-t SECONDS]
  35. \t[-u USERNAME] [-p PASSWORD] [-w USERNAME:PASSWORD]
  36. \t[-f FORMAT] [-a TAG] [-d TAG] [TARGET]
  37. TARGET is the base URL for the wiki. Visiting this URL should show you
  38. its homepage.
  39. You add a TAG using -a and delete it using -d. Multiple tags can be
  40. separated by a space or a comma.
  41. FORMAT defaults to [[tag:TheTag]]. If you use just words, specify -f1.
  42. Provide the page names to retag on STDIN or use -i to point to a page.
  43. You can use the index action with the raw parameter. See example
  44. below.
  45. The list of page names should use the MIME type text/plain.
  46. By default, retag will tag a page every five seconds. Use -t to
  47. override this. SECONDS is the number of seconds to wait between
  48. requests.
  49. The edits will show up on the list of changes as anonymous edits. If
  50. you want to provide a USERNAME, you can use -u to do so.
  51. If you want to tag pages on a locked wiki, you need to provide a
  52. PASSWORD using -p.
  53. On the other hand, if your wiki is protected by so-called "basic
  54. authentication" -- that is, if you need to provide a username and
  55. password before you can even view the site -- then you can pass those
  56. along using the -w option. Separate username and password using a
  57. colon.
  58. Example:
  59. retag -i 'http://www.emacswiki.org/cgi-bin/alex?search=tag%3Akitsunemori+2006+2007;context=0;raw=1' \\
  60. -u AlexSchroeder -a MondayGroup http://www.emacswiki.org/cgi-bin/alex
  61. };
  62. sub UrlEncode {
  63. my $str = shift;
  64. return '' unless $str;
  65. my @letters = split(//, $str);
  66. my @safe = ('a' .. 'z', 'A' .. 'Z', '0' .. '9', '-', '_', '.', '!', '~', '*', "'", '(', ')', '#');
  67. foreach my $letter (@letters) {
  68. my $pattern = quotemeta($letter);
  69. if (not grep(/$pattern/, @safe)) {
  70. $letter = sprintf("%%%02x", ord($letter));
  71. }
  72. }
  73. return join('', @letters);
  74. }
  75. sub GetRaw {
  76. my ($uri) = @_;
  77. my $ua = RequestAgent->new;
  78. my $response = $ua->get($uri);
  79. print "no response\n" unless $response->code;
  80. print "GET ", $response->code, " ", $response->message, "\n" if $opt_v;
  81. return $response->content if $response->is_success;
  82. }
  83. my $FreeLinkPattern = "([-,.()' _0-9A-Za-z\x80-\xff]+)";
  84. sub PostRaw {
  85. my ($uri, $id, $data, $username, $password) = @_;
  86. my $ua = RequestAgent->new;
  87. my $response = $ua->post($uri, {title=>$id, text=>$data, raw=>1,
  88. question=>1, recent_edit=>'on',
  89. username=>$username, pwd=>$password});
  90. my $status = $response->code . ' ' . $response->message;
  91. warn "POST $id failed: $status.\n" unless $response->is_success;
  92. }
  93. sub tag {
  94. my ($target, $interval, $username, $password,
  95. $pageref, $addref, $delref) = @_;
  96. foreach my $id (@$pageref) {
  97. print "$id\n";
  98. my $page = UrlEncode ($id);
  99. my $data = GetRaw("$target?action=browse;id=$page;raw=1");
  100. # Every page starts with a new copy.
  101. my %tags = map { $_ => 1 } @$addref;
  102. # The current code does not remove tags sprinkled all over the
  103. # page. The code will in fact add those tags to the final tagline.
  104. if ($data =~ /\n\nTags: (.*)/) {
  105. my $tags = $1;
  106. if ($opt_f) {
  107. foreach my $tag (split /,\s*/, $1) {
  108. $tags{$tag} = 1;
  109. }
  110. } else {
  111. while ($tags =~ /\[\[tag:$FreeLinkPattern(\|[^]|]+)?\]\]/ogi) {
  112. $tags{$1} = 1;
  113. }
  114. }
  115. foreach my $tag (@$delref) {
  116. delete $tags{$tag};
  117. }
  118. }
  119. my $newtags;
  120. if ($opt_f) {
  121. $newtags = join(', ', sort keys %tags);
  122. } else {
  123. $newtags = join(' ', map { "\[\[tag:$_\]\]" } sort keys %tags);
  124. }
  125. # The code will not remove the tagline if the last tag is removed.
  126. # It will add a tagline if there is none.
  127. $data =~ s/\n\nTags: .*/\n\nTags: $newtags/ or $data .= "\n\nTags: $newtags";
  128. PostRaw($target, $id, $data, $username, $password);
  129. sleep($interval);
  130. }
  131. }
  132. sub main {
  133. our($opt_h, $opt_i, $opt_t, $opt_d, $opt_u, $opt_p);
  134. getopts('hvi:t:u:p:w:a:d:f:');
  135. die $usage if $opt_h;
  136. die "Missing tags to add or delete. Use -a TAG or -d TAG.\n"
  137. unless $opt_a or $opt_d;
  138. my $interval = $opt_t ? $opt_t : 5;
  139. my (@add, @delete);
  140. @add = split(/[ ,]+/, $opt_a) if $opt_a;
  141. @delete = split(/[ ,]+/, $opt_d) if $opt_d;
  142. my $username = $opt_u;
  143. my $password = $opt_p;
  144. my $target = shift(@ARGV);
  145. die "You need to provide exactly one target URL. Use -h for more help.\n"
  146. unless $target and not @ARGV;
  147. my @pages = ();
  148. if ($opt_i) {
  149. my $data = GetRaw($opt_i);
  150. @pages = split(/\n/, $data);
  151. } else {
  152. print "List of pages:\n";
  153. while (<STDIN>) {
  154. chomp;
  155. push(@pages, $_);
  156. }
  157. }
  158. die "The list of pages is missing. Use -i.\n" unless @pages;
  159. tag($target, $interval, $username, $password, \@pages, \@add, \@delete);
  160. }
  161. main();