123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- #! /usr/bin/perl -w
- use CGI qw/:standard/;
- use CGI::Carp qw(fatalsToBrowser);
- if (param('separator') eq 'UseMod 0.92' or param('separator') eq 'UseMod 1.00') {
- $FS = "\xb3";
- } elsif (param('separator') eq 'UseMod 1.00 with $NewFS set') {
- $FS = "\x1e\xff\xfe\x1e";
- } else {
- $FS = "\x1e";
- }
- $NewFS = "\x1e";
- # override $FS if you want!
- print header() . start_html('Upgrading Files'), p;
- print q{Upgrade version: $Id: upgrade-files.pl,v 1.16 2010/11/06 11:51:18 as Exp $}, "\n";
- if (not param('dir')) {
- print start_form, p, '$DataDir: ', textfield('dir', '/tmp/oddmuse'),
- p, radio_group('separator', ['Oddmuse', 'UseMod 0.92', 'UseMod 1.00',
- 'UseMod 1.00 with $NewFS set']),
- p, checkbox('convert', 'checked', 'on', 'Convert Latin-1 to UTF-8'),
- p, submit('Ok'), "\n", end_form;
- } elsif (param('dir') and not param('sure')) {
- print start_form, hidden('sure', 'yes'), hidden('dir', param('dir')),
- hidden('separator', param('separator')), hidden('convert', param('convert')),
- p, '$DataDir: ', param('dir'),
- p, 'separator used when reading pages: ',
- join(', ', map { sprintf('0x%x', ord($_)) } split (//, $FS)),
- p, 'separator used when writing pages: ',
- join(', ', map { sprintf('0x%x', ord($_)) } split (//, $NewFS)),
- p, 'Convert Latin-1 to UTF-8: ', param('convert') ? 'Yes' : 'No',
- p, submit('Confirm'), "\n", end_form;
- } else {
- rewrite(scalar(param('dir')));
- }
- print end_html();
- sub rewrite {
- my ($directory) = @_;
- $FS1 = $FS . "1";
- $FS2 = $FS . "2";
- $FS3 = $FS . "3";
- my @files = glob("$directory/page/*/*.db");
- if (not @files) {
- print "$directory does not seem to be a data directory.\n";
- return;
- }
- print '<pre>';
- foreach my $file (@files) {
- print "Reading page $file...\n";
- my %page = split(/$FS1/, read_file($file), -1);
- %section = split(/$FS2/, $page{text_default}, -1);
- %text = split(/$FS3/, $section{data}, -1);
- $file =~ s!/([A-Z]|other)/!/!;
- $file =~ s/\.db$/.pg/ or die "Invalid page name\n";
- print "Writing $file...\n";
- write_page_file($file);
- }
- print '</pre>';
- @files = glob("$directory/referer/*/*.rb");
- print '<pre>';
- foreach my $file (@files) {
- print "Reading refer $file...\n";
- my $data = read_file($file);
- $data =~ s/$FS1/$NewFS/g;
- $file =~ s!/([A-Z]|other)/!/!;
- $file =~ s/\.rb$/.rf/ or die "Invalid page name\n";
- print "Writing $file...\n";
- write_file($file, $data);
- }
- print '</pre>';
- @files = glob("$directory/keep/*/*.kp");
- foreach my $file (@files) {
- print '<pre>';
- print "Reading keep $file...\n";
- my $data = read_file($file);
- my @list = split(/$FS1/, $data);
- my $out = $file;
- $out =~ s!/([A-Z]|other)/!/!;
- $out =~ s/\.kp$// or die "Invalid keep name\n";
- # We introduce a new variable $dir, here, instead of using $out,
- # because $out will be part of the filename later on, and the
- # filename will be converted in write_file. To convert $out to
- # utf8 would double-encode the directory part of the filename.
- my $dir = param('convert') ? utf8($out) : $out;
- print "Creating $out...\n";
- mkdir($dir) or die "Cannot create directory $dir\n" unless -d $dir;
- foreach my $keep (@list) {
- next unless $keep;
- %section = split(/$FS2/, $keep, -1);
- %text = split(/$FS3/, $section{data}, -1);
- my $current = "$out/$section{'revision'}.kp";
- print "Writing $current...\n";
- write_keep_file($current);
- }
- print '</pre>';
- }
- @files = glob("$directory/*rclog");
- print '<pre>';
- foreach my $file (@files) {
- print "Reading $file...\n";
- my $data = read_file($file);
- @rc = split(/\n/, $data);
- foreach (@rc) {
- my ($ts, $pagename, $summary, $minor, $host, $kind, $extraTemp)
- = split(/$FS3/, $_);
- my %extra = split(/$FS2/, $extraTemp, -1);
- foreach ('name', 'revision', 'languages', 'cluster') {
- $extra{$_} = '' unless $extra{$_};
- }
- $extra{languages} =~ s/$FS1/,/g;
- $_ = join($NewFS, $ts, $pagename, $minor, $summary, $host,
- $extra{name}, $extra{revision}, $extra{languages}, $extra{cluster});
- }
- $data = join("\n", @rc) . "\n";
- $file =~ s/log$/.log/;
- print "Writing $file...\n";
- write_file($file, $data);
- }
- print '</pre>';
- print p, "Done.\n";
- }
- sub read_file {
- my ($filename) = @_;
- my ($data);
- local $/ = undef; # Read complete files
- open(F, "<$filename") or die "can't read $filename: $!";
- $data=<F>;
- close F;
- return $data;
- }
- sub write_file {
- my ($filename, $data) = @_;
- if (param('convert')) {
- $filename = utf8($filename);
- $data = utf8($data);
- }
- open(F, ">$filename") or die "can't write $filename: $!";
- print F $data;
- close F;
- }
- sub cache {
- $_ = shift;
- return "" unless $_;
- my ($block, $flag) = split(/$FS2/, $_);
- my @blocks = split(/$FS3/, $block);
- my @flags = split(/$FS3/, $flag);
- return 'blocks: ' . escape_newlines(join($NewFS, @blocks)) . "\n"
- . 'flags: ' . escape_newlines(join($NewFS, @flags)) . "\n";
- }
- sub escape_newlines {
- $_ = shift;
- $_ =~ s/\n/\n\t/g if $_;
- return $_;
- }
- # Skip the info encoded in the filename (page name). We need the info
- # stored in the rclog (summary, ip, host, username) for the history
- # page. Don't trust the modification dates of the files themselves,
- # which is why we have the timestamp in the file, too. We need the
- # timestamp when expiring old keep files. We need all the info in the
- # page file that will eventually end up in the keep file.
- sub basic_data {
- my $data = 'ts: ' . $section{ts} . "\n" if $section{ts};
- $data .= 'keep-ts: ' . $section{keepts} . "\n" if $section{keepts};
- $data .= 'revision: ' . $section{revision} . "\n" if $section{revision};
- $data .= 'summary: ' . $section{summary} . "\n" if $section{summary};
- $data .= 'summary: ' . $text{summary} . "\n" if $text{summary} and not $section{summary};
- $data .= 'username: ' . $section{username} . "\n" if $section{username};
- $data .= 'ip: ' . $section{ip} . "\n" if $section{ip};
- $data .= 'host: ' . $section{host} . "\n" if $section{host};
- $data .= 'minor: ' . $text{minor} . "\n" if $text{minor};
- # $data .= 'oldmajor: ' . $page{cache_oldmajor} . "\n" if $page{cache_oldmajor};
- $data .= 'text: ' . escape_newlines($text{text}) . "\n";
- return $data;
- }
- sub write_page_file {
- my $file = shift;
- my $data = basic_data();
- $data .= cache($page{cache_blocks});
- $data .= 'diff-major: ' . escape_newlines($page{cache_diff_default_major}) . "\n"
- if $page{cache_diff_default_major};
- $data .= 'diff-minor: ' . escape_newlines($page{cache_diff_default_minor}) . "\n"
- if $page{cache_diff_default_minor};
- write_file($file, $data);
- }
- sub write_keep_file {
- my $file = shift;
- my $data = basic_data();
- write_file($file, $data);
- }
- # This Latin-1 to UTF-8 conversion was written by Skalman on the
- # Oddmuse Wiki. He says: I added a quick, dirty and completely
- # unreadable hack to convert all characters above 0x7F:
- # s/([\x80-\xff])/chr(0xc0+(ord($1)>>6)).chr(ord($1)&0b00111111|0b10000000)/ge;
- # Reading the UTF-8 and Unicode FAQ, I convert every character to
- # (binary) 110xxxxx 10xxxxxx where the 'x' marks the bits of the
- # original ISO-8859-1 character. That is: take the two most
- # significant bits of the caracter and add them to 0xC0 (first byte),
- # then replace the first two bits with 10 (second byte).
- sub utf8 {
- $_ = shift;
- s/([\x80-\xff])/chr(0xc0+(ord($1)>>6)).chr(ord($1)&0b00111111|0b10000000)/ge;
- return $_;
- }
|