undo-double-quotes.pl 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. #! /usr/bin/perl -w
  2. # Copyright (C) 2015 Alex Schroeder <alex@gnu.org>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 3 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. # GNU General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  16. use utf8;
  17. use strict;
  18. use warnings;
  19. undef $/; # slurp
  20. my %index = ();
  21. my $verbose = '';
  22. sub write_file {
  23. my ($file, $data) = @_;
  24. return unless $data;
  25. open(my $fh, '>:utf8', $file) or die "Cannot write $file: $!";
  26. print $fh $data;
  27. close($fh);
  28. }
  29. sub replacement_block {
  30. my ($block, $pos, @no_go) = @_;
  31. while (@no_go) {
  32. my $first = shift @no_go;
  33. print "Is $pos between " . $first->[0] . " and " . $first->[1] . "?\n" if $verbose;
  34. return $block if $pos >= $first->[0] and $pos <= $first->[1];
  35. }
  36. return "";
  37. }
  38. sub translate_file {
  39. my ($data) = @_;
  40. my @no_go = ();
  41. while ($data =~ /( <nowiki>.*?<\/nowiki>
  42. | <code>.*?<\/code>
  43. | ^ <pre> (.*\n)+ <\/pre>
  44. | ^ {{{ (.*\n)+ }}} )/gmx) {
  45. push @no_go, [pos($data) - length $1, pos($data)];
  46. print "no go from " . $no_go[-1]->[0] . ".." . $no_go[-1]->[1] . " for $1\n" if $verbose;
  47. }
  48. # The problem is that these replacements don't adjust @no_go! Perhaps it is good enough?
  49. my $subs = '';
  50. $subs = $subs || $data =~ s/ ( \[\/quote\] \n \n \[quote\] ) /replacement_block($1, pos($data), @no_go)/gex;
  51. return $data if $subs;
  52. }
  53. sub read_file {
  54. my $file = shift;
  55. open(my $fh, '<:utf8', $file) or die "Cannot read $file: $!";
  56. my $data = <$fh>;
  57. close($fh);
  58. return $data;
  59. }
  60. sub main {
  61. my ($dir) = @_;
  62. mkdir($dir . '-new') or die "Cannot create $dir-new: $!";
  63. print "Indexing files\n";
  64. foreach my $file (glob("$dir/.* $dir/*")) {
  65. next unless $file =~ /$dir\/(.+)/;
  66. my $id = $1;
  67. next if $id eq ".";
  68. next if $id eq "..";
  69. $index{$id} = 1;
  70. }
  71. print "Converting files\n";
  72. foreach my $id (sort keys %index) {
  73. # this is where you debug a particular page
  74. # $verbose = $id eq '2014-12-18_Emacs_Wiki_Migration';
  75. write_file("$dir-new/$id", translate_file(read_file("$dir/$id")));
  76. }
  77. }
  78. use Getopt::Long;
  79. my $dir = 'raw';
  80. my $help = '';
  81. GetOptions ("dir=s" => \$dir,
  82. "help" => \$help);
  83. if ($help) {
  84. print qq{
  85. Usage: $0 [--dir=DIR]
  86. You need to use the raw.pl script to create a directory full of raw
  87. wiki text files.
  88. --dir=DIR is where the raw wiki text files are. Default: raw. The
  89. converted files will be stored in DIR-new, ie. in raw-new by
  90. default.
  91. Example: $0 --dir=~/alexschroeder/raw
  92. }
  93. } else {
  94. main ($dir);
  95. }