parse-mozilla-encoding-table.pl 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/usr/bin/perl
  2. # parse-mozilla-encoding-table.pl, version 0.6
  3. #
  4. # Script to deassemble existing Mozilla *.uf or *.ut files
  5. # back to source conversion tables.
  6. # by Anthony Fok <anthony@thizlinux.com>, ThizLinux Laboratory Ltd., 2002/11/27
  7. # License: GNU General Public License, version 2 or newer
  8. #
  9. # Used for verifying HKSCS-1999 hkscs.uf and hkscs.ut so that I can make
  10. # new ones for HKSCS-2001. This script is quick-and-dirty and not very
  11. # robust, so if the debug output of fromu/tou ever changes, this script
  12. # will need to be modified too. :-)
  13. my %data = ();
  14. my $mappingPos = 0;
  15. my $filename = shift;
  16. my $mode;
  17. if ($filename =~ /\.(ut|uf)$/) {
  18. print $filename, "\n";
  19. $mode = $1;
  20. } else {
  21. die;
  22. }
  23. open(INFILE, "<$filename") or die;
  24. # Quick-and-dirty routine to populate %data
  25. while (<INFILE>) {
  26. if (/^Begin of Item ([[:xdigit:]]+)/) {
  27. die if defined($itemId) and hex($itemId) + 1 != hex($1);
  28. $itemId = $1;
  29. <INFILE> =~ /Format ([012])/ or die;
  30. $format = $1;
  31. <INFILE> =~ /srcBegin = ([[:xdigit:]]+)/ or die;
  32. $srcBegin = $1;
  33. if ($format == 0) { # Range
  34. <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die;
  35. $srcEnd = $1;
  36. <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die;
  37. $destBegin = $1;
  38. for ($i = hex($srcBegin); $i <= hex($srcEnd); $i++) {
  39. $data{sprintf("%04X",$i)} = sprintf("%04X",
  40. hex($destBegin) + $i - hex($srcBegin));
  41. }
  42. <INFILE> =~ /^End of Item $itemId\s*$/ or die;
  43. }
  44. elsif ($format == 1) { # Mapping
  45. <INFILE> =~ /srcEnd = ([[:xdigit:]]+)/ or die;
  46. $srcEnd = $1;
  47. <INFILE> =~ /mappingOffset = ([[:xdigit:]]+)/ or die;
  48. $mappingOffset = hex($1);
  49. die unless $mappingOffset == $mappingPos;
  50. <INFILE> =~ /Mapping =\s*$/ or die;
  51. until ($_ = <INFILE>, /^End of Item/) {
  52. chop;
  53. for $i (split ' ') {
  54. $key = sprintf("%04X", hex($srcBegin) - $mappingOffset + $mappingPos++);
  55. next if $i eq "FFFD";
  56. if (defined($data{$key})) {
  57. print "Error: doubly defined. $key was $data{$key}, and now $i.\n";
  58. } else {
  59. $data{$key} = $i;
  60. }
  61. }
  62. }
  63. die unless $mappingPos - $mappingOffset == hex($srcEnd) - hex($srcBegin) + 1;
  64. /^End of Item $itemId\s*$/ or die;
  65. }
  66. else { # Single ($format == 2)
  67. <INFILE> =~ /destBegin = ([[:xdigit:]]+)/ or die;
  68. $destBegin = $1;
  69. $data{$srcBegin} = $destBegin;
  70. <INFILE> =~ /^End of Item $itemId\s*$/ or die;
  71. }
  72. }
  73. }
  74. # Generate conversion table
  75. for $key (sort keys %data) {
  76. if ($mode eq "ut") {
  77. print "0x$key\t0x$data{$key}\n";
  78. } elsif ($mode eq "uf") {
  79. print "0x$data{$key}\t0x$key\n";
  80. } else {
  81. die;
  82. }
  83. }
  84. close INFILE;