sbcsgen.pl 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. #!/usr/bin/env perl
  2. # This script generates sbcsdat.c (the data for all the SBCSes) from its
  3. # source form sbcs.dat.
  4. use warnings;
  5. use Getopt::Long;
  6. use File::Basename;
  7. $infile = (dirname __FILE__) . "/sbcs.dat";
  8. $outfile = "sbcsdat.c";
  9. my $usage = "usage: sbcsgen.pl [-o OUTFILE]\n";
  10. GetOptions("o|output=s" => \$outfile)
  11. or die $usage;
  12. open FOO, $infile;
  13. open BAR, ">$outfile";
  14. select BAR;
  15. print "/*\n";
  16. print " * sbcsdat.c - data definitions for single-byte character sets.\n";
  17. print " *\n";
  18. print " * Generated by sbcsgen.pl from sbcs.dat.\n";
  19. print " * You should edit those files rather than editing this one.\n";
  20. print " */\n";
  21. print "\n";
  22. print "#ifndef ENUM_CHARSETS\n";
  23. print "\n";
  24. print "#include \"charset.h\"\n";
  25. print "#include \"internal.h\"\n";
  26. print "\n";
  27. my $charsetname = undef;
  28. my @vals = ();
  29. my @charsetnames = ();
  30. my @sortpriority = ();
  31. while (<FOO>) {
  32. chomp; y/\r//d;
  33. if (/^charset (.*)$/) {
  34. $charsetname = $1;
  35. @vals = ();
  36. @sortpriority = map { 0 } 0..255;
  37. } elsif (/^sortpriority ([^-]*)-([^-]*) (.*)$/) {
  38. for ($i = hex $1; $i <= hex $2; $i++) {
  39. $sortpriority[$i] += $3;
  40. }
  41. } elsif (/^[0-9a-fA-FX]/) {
  42. push @vals, map { $_ eq "XXXX" ? -1 : hex $_ } split / +/, $_;
  43. if (scalar @vals > 256) {
  44. die "$infile:$.: charset $charsetname has more than 256 values\n";
  45. } elsif (scalar @vals == 256) {
  46. &outcharset($charsetname, \@vals, \@sortpriority);
  47. push @charsetnames, $charsetname;
  48. $charsetname = undef;
  49. @vals = ();
  50. @sortpriority = map { 0 } 0..255;
  51. }
  52. }
  53. }
  54. print "#else /* ENUM_CHARSETS */\n";
  55. print "\n";
  56. foreach $i (@charsetnames) {
  57. print "ENUM_CHARSET($i)\n";
  58. }
  59. print "\n";
  60. print "#endif /* ENUM_CHARSETS */\n";
  61. sub outcharset($$$) {
  62. my ($name, $vals, $sortpriority) = @_;
  63. my ($prefix, $i, @sorted);
  64. print "static const sbcs_data data_$name = {\n";
  65. print " {\n";
  66. $prefix = " ";
  67. @sorted = ();
  68. for ($i = 0; $i < 256; $i++) {
  69. if ($vals->[$i] < 0) {
  70. printf "%sERROR ", $prefix;
  71. } else {
  72. printf "%s0x%04x", $prefix, $vals->[$i];
  73. die "ooh? $i\n" unless defined $sortpriority->[$i];
  74. push @sorted, [$i, $vals->[$i], 0+$sortpriority->[$i]];
  75. }
  76. if ($i % 8 == 7) {
  77. $prefix = ",\n ";
  78. } else {
  79. $prefix = ", ";
  80. }
  81. }
  82. print "\n },\n {\n";
  83. @sorted = sort { ($a->[1] == $b->[1] ?
  84. $b->[2] <=> $a->[2] :
  85. $a->[1] <=> $b->[1]) ||
  86. $a->[0] <=> $b->[0] } @sorted;
  87. $prefix = " ";
  88. $uval = -1;
  89. for ($i = $j = 0; $i < scalar @sorted; $i++) {
  90. next if ($uval == $sorted[$i]->[1]); # low-priority alternative
  91. $uval = $sorted[$i]->[1];
  92. printf "%s0x%02x", $prefix, $sorted[$i]->[0];
  93. if ($j % 8 == 7) {
  94. $prefix = ",\n ";
  95. } else {
  96. $prefix = ", ";
  97. }
  98. $j++;
  99. }
  100. printf "\n },\n %d\n", $j;
  101. print "};\n";
  102. print "const charset_spec charset_$name = {\n" .
  103. " $name, read_sbcs, write_sbcs, &data_$name\n};\n\n";
  104. }