wikicopy 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. #!/usr/bin/perl -w
  2. #
  3. # Copyright (C) 2004, 2005, 2006 Alex Schroeder <alex@emacswiki.org>
  4. #
  5. # This program is free software; you can redistribute it and/or modify
  6. # it under the terms of the GNU General Public License as published by
  7. # the Free Software Foundation; either version 2 of the License, or
  8. # (at your option) any later version.
  9. #
  10. # This program is distributed in the hope that it will be useful,
  11. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. # GNU General Public License for more details.
  14. #
  15. # You should have received a copy of the GNU General Public License
  16. # along with this program; if not, write to the
  17. # Free Software Foundation, Inc.
  18. # 59 Temple Place, Suite 330
  19. # Boston, MA 02111-1307 USA
  20. require LWP;
  21. use Getopt::Std;
  22. our ($opt_v, $opt_w);
  23. # We make our own specialization of LWP::UserAgent that asks for
  24. # user/password if document is protected.
  25. {
  26. package RequestAgent;
  27. @ISA = qw(LWP::UserAgent);
  28. sub new {
  29. my $self = LWP::UserAgent::new(@_);
  30. $self;
  31. }
  32. sub get_basic_credentials {
  33. my($self, $realm, $uri) = @_;
  34. return split(':', $main::opt_w, 2);
  35. }
  36. }
  37. my $usage = qq{$0 [-i URL] [-d STRING] [-t SECONDS]
  38. \t[-u USERNAME] [-p PASSWORD] [-w USERNAME:PASSWORD]
  39. \t[-q QUESTION] [-a ANSWER] [-z SECRET]
  40. \t[SOURCE] TARGET
  41. SOURCE and TARGET are the base URLs for the two wikis. Visiting these
  42. two URLs should show you the respective homepages.
  43. Provide the page names to copy on STDIN or use -i to point to a page.
  44. You can use the index action with the raw parameter from the source
  45. wiki to copy all the pages. See example below.
  46. The list of page names should use the MIME type text/plain.
  47. By default, wikicopy will copy a page every five seconds. Use -t to
  48. override this. SECONDS is the number of seconds to wait between
  49. requests.
  50. If you use -d instead of providing a SOURCE, all the pages will be
  51. replaced with STRING. This is useful when replacing the page content
  52. with "DeletedPage", for example.
  53. -d Delete target pages instead of providing SOURCE (default: none)
  54. -s The summary for RecentChanges (default: none)
  55. -u The username for RecentChanges (default: none)
  56. -p The password to use for locked pages (default: none)
  57. -w The username:password combo for basic authentication (default:none)
  58. -q The question number to answer (default: 0, ie. the first question)
  59. -a The answer to the question (default: none)
  60. -z Alternatively, the secret key (default: question)
  61. -v Verbose output for debugging (default: none)
  62. Examples:
  63. wikicopy -i 'http://www.emacswiki.org/cgi-bin/alex?action=index;raw=1' \\
  64. http://www.emacswiki.org/cgi-bin/alex \\
  65. http://localhost/cgi-bin/wiki.pl
  66. wikicopy -d DeletedPage http://localhost/cgi-bin/wiki.pl < list.txt
  67. wikicopy -v -u 'ElGordo' -w 'simple:mind' \\
  68. -i 'http://www.communitywiki.org/odd/LosAngelesEcoVillage?action=index;raw=1' \\
  69. 'http://www.communitywiki.org/odd/LosAngelesEcoVillage' \\
  70. 'http://www.tentacle.net/~eeio/cgi/wiki.cgi'
  71. };
  72. sub UrlEncode {
  73. my $str = shift;
  74. return '' unless $str;
  75. my @letters = split(//, $str);
  76. my @safe = ('a' .. 'z', 'A' .. 'Z', '0' .. '9', '-', '_', '.', '!', '~', '*', "'", '(', ')', '#');
  77. foreach my $letter (@letters) {
  78. my $pattern = quotemeta($letter);
  79. if (not grep(/$pattern/, @safe)) {
  80. $letter = sprintf("%%%02x", ord($letter));
  81. }
  82. }
  83. return join('', @letters);
  84. }
  85. sub GetRaw {
  86. my ($uri) = @_;
  87. my $ua = RequestAgent->new;
  88. my $response = $ua->get($uri);
  89. print "no response\n" unless $response->code;
  90. print "GET ", $response->code, " ", $response->message, "\n" if $opt_v;
  91. return $response->content if $response->is_success;
  92. }
  93. sub post {
  94. my ($uri, $id, $data, $minor, $summary, $username, $password,
  95. $question, $answer, $secret) = @_;
  96. my $ua = RequestAgent->new;
  97. my %params = (title=>$id, text=>$data, raw=>1,
  98. username=>$username, pwd=>$password,
  99. summary=>$summary, question_num=>$question,
  100. answer=>$answer, $secret=>1,
  101. recent_edit=>$minor);
  102. if ($opt_v) {
  103. foreach my $key (keys %params) {
  104. my $value = $params{$key} || '(none)';
  105. $value = substr($value,0,50) . '...'
  106. if $key eq 'text' and length($value) > 53;
  107. warn "$key: $value\n";
  108. }
  109. }
  110. my $response = $ua->post($uri, \%params);
  111. my $status = $response->code . ' ' . $response->message;
  112. warn "POST $id failed: $status.\n" unless $response->is_success;
  113. }
  114. sub copy {
  115. my ($source, $replacement, $target, $interval, $minor, $summary,
  116. $username, $password, $question, $answer, $secret,
  117. @pages) = @_;
  118. foreach my $id (@pages) {
  119. print "$id\n";
  120. my $page = UrlEncode ($id);
  121. # fix URL for other wikis
  122. my $data = $replacement || GetRaw("$source?action=browse;id=$page;raw=1");
  123. next unless $data;
  124. post($target, $id, $data, $minor, $summary, $username, $password,
  125. $question, $answer, $secret);
  126. sleep($interval);
  127. }
  128. }
  129. sub main {
  130. our($opt_m, $opt_i, $opt_t, $opt_d, $opt_s, $opt_u, $opt_p,
  131. $opt_q, $opt_a, $opt_z);
  132. getopts('mi:t:d:s:u:p:q:a:z:w:v');
  133. my $interval = $opt_t ? $opt_t : 5;
  134. my $replacement = $opt_d;
  135. my ($source, $target);
  136. $source = shift(@ARGV) unless $replacement;
  137. $target = shift(@ARGV);
  138. die $usage if not $target or @ARGV; # not enough or too many
  139. my @pages = ();
  140. if ($opt_i) {
  141. my $data = GetRaw($opt_i);
  142. @pages = split(/\n/, $data);
  143. } else {
  144. print "List of pages:\n";
  145. while (<STDIN>) {
  146. chomp;
  147. push(@pages, $_);
  148. }
  149. }
  150. die "The list of pages is missing. Did you use -i?\n" unless @pages;
  151. copy($source, $replacement, $target, $interval, $opt_m ? 'on' : '', $opt_s,
  152. $opt_u, $opt_p, $opt_q, $opt_a, $opt_z||'question',
  153. @pages);
  154. }
  155. main();