123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500 |
- #!/usr/bin/perl
- # -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*-
- # This Source Code Form is subject to the terms of the Mozilla Public
- # License, v. 2.0. If a copy of the MPL was not distributed with this
- # file, You can obtain one at http://mozilla.org/MPL/2.0/.
- use XML::LibXSLT;
- use XML::LibXML;
- use LWP::Simple;
- # output files
- $FILE_UNICODE = "unicode.xml";
- $FILE_DICTIONARY = "dictionary.xml";
- $FILE_DIFFERENCES = "differences.txt";
- $FILE_NEW_DICTIONARY = "new_dictionary.txt";
- $FILE_SYNTAX_ERRORS = "syntax_errors.txt";
- $FILE_JS = "tests/stretchy-and-large-operators.js";
- # our dictionary (property file)
- $MOZ_DICTIONARY = "mathfont.properties";
- # dictionary provided by the W3C in "XML Entity Definitions for Characters"
- $WG_DICTIONARY_URL = "http://www.w3.org/2003/entities/2007xml/unicode.xml";
- # XSL stylesheet to extract relevant data from the dictionary
- $DICTIONARY_XSL = "operatorDictionary.xsl";
- # dictionary provided by the W3C transformed with operatorDictionary.xsl
- $WG_DICTIONARY = $FILE_DICTIONARY;
- if (!($#ARGV >= 0 &&
- ((($ARGV[0] eq "download") && $#ARGV <= 1) ||
- (($ARGV[0] eq "compare") && $#ARGV <= 1) ||
- (($ARGV[0] eq "check") && $#ARGV <= 0) ||
- (($ARGV[0] eq "make-js") && $#ARGV <= 0) ||
- (($ARGV[0] eq "clean") && $#ARGV <= 0)))) {
- &usage;
- }
- if ($ARGV[0] eq "download") {
- if ($#ARGV == 1) {
- $WG_DICTIONARY_URL = $ARGV[1];
- }
- print "Downloading $WG_DICTIONARY_URL...\n";
- getstore($WG_DICTIONARY_URL, $FILE_UNICODE);
- print "Converting $FILE_UNICODE into $FILE_DICTIONARY...\n";
- my $xslt = XML::LibXSLT->new();
- my $source = XML::LibXML->load_xml(location => $FILE_UNICODE);
- my $style_doc = XML::LibXML->load_xml(location => $DICTIONARY_XSL,
- no_cdata=>1);
- my $stylesheet = $xslt->parse_stylesheet($style_doc);
- my $results = $stylesheet->transform($source);
- open($file, ">$FILE_DICTIONARY") || die ("Couldn't open $FILE_DICTIONARY!");
- print $file $stylesheet->output_as_bytes($results);
- close($file);
- exit 0;
- }
- if ($ARGV[0] eq "clean") {
- unlink($FILE_UNICODE,
- $FILE_DICTIONARY,
- $FILE_DIFFERENCES,
- $FILE_NEW_DICTIONARY,
- $FILE_SYNTAX_ERRORS);
- exit 0;
- }
- if ($ARGV[0] eq "compare" && $#ARGV == 1) {
- $WG_DICTIONARY = $ARGV[1];
- }
- ################################################################################
- # structure of the dictionary used by this script:
- # - key: same as in mathfont.properties
- # - table:
- # index | value
- # 0 | description
- # 1 | lspace
- # 2 | rspace
- # 3 | minsize
- # 4 | largeop
- # 5 | movablelimits
- # 6 | stretchy
- # 7 | separator
- # 8 | accent
- # 9 | fence
- # 10 | symmetric
- # 11 | priority
- # 12 | linebreakstyle
- # 13 | direction
- # 14 | integral
- # 15 | mirrorable
- # 1) build %moz_hash from $MOZ_DICTIONARY
- print "loading $MOZ_DICTIONARY...\n";
- open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!");
- print "building dictionary...\n";
- while (<$file>) {
- next unless (m/^operator\.(.*)$/);
- (m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/);
- # 1.1) build the key
- $key = $1;
- # 1.2) build the array
- $_ = $2;
- @value = ();
- $value[0] = $3;
- if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; }
- if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; }
- if (m/^(.*)minsize:(\d)(.*)$/) { $value[3] = $2; } else { $value[3] = "1"; }
- $value[4] = (m/^(.*)largeop(.*)$/);
- $value[5] = (m/^(.*)movablelimits(.*)$/);
- $value[6] = (m/^(.*)stretchy(.*)$/);
- $value[7] = (m/^(.*)separator(.*)$/);
- $value[8] = (m/^(.*)accent(.*)$/);
- $value[9] = (m/^(.*)fence(.*)$/);
- $value[10] = (m/^(.*)symmetric(.*)$/);
- $value[11] = ""; # we don't store "priority" in our dictionary
- $value[12] = ""; # we don't store "linebreakstyle" in our dictionary
- if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; }
- else { $value[13] = ""; }
- $value[14] = (m/^(.*)integral(.*)$/);
- $value[15] = (m/^(.*)mirrorable(.*)$/);
- # 1.3) save the key and value
- $moz_hash{$key} = [ @value ];
- }
- close($file);
- ################################################################################
- # 2) If mode "make-js", generate tests/stretchy-and-large-operators.js and quit.
- # If mode "check", verify validity of our operator dictionary and quit.
- # If mode "compare", go to step 3)
- if ($ARGV[0] eq "make-js") {
- print "generating file $FILE_JS...\n";
- open($file_js, ">$FILE_JS") ||
- die ("Couldn't open $FILE_JS!");
- print $file_js "// This file is automatically generated. Do not edit.\n";
- print $file_js "var stretchy_and_large_operators = [";
- @moz_keys = (keys %moz_hash);
- while ($key = pop(@moz_keys)) {
- @moz = @{ $moz_hash{$key} };
- $_ = $key;
- (m/^operator\.([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
- $opname = "\\$1.$2: ";
- if (@moz[4]) {
- print $file_js "['$opname', '$1','l','$2'],";
- }
- if (@moz[6]) {
- $_ = substr(@moz[13], 0, 1);
- print $file_js "['$opname', '$1','$_','$2'],";
- }
- }
- print $file_js "];\n";
- close($file_js);
- exit 0;
- }
- if ($ARGV[0] eq "check") {
- print "checking operator dictionary...\n";
- open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") ||
- die ("Couldn't open $FILE_SYNTAX_ERRORS!");
- $nb_errors = 0;
- $nb_warnings = 0;
- @moz_keys = (keys %moz_hash);
- # check the validity of our private data
- while ($key = pop(@moz_keys)) {
- @moz = @{ $moz_hash{$key} };
- $entry = &generateEntry($key, @moz);
- $valid = 1;
- if (!(@moz[13] eq "" ||
- @moz[13] eq "horizontal" ||
- @moz[13] eq "vertical")) {
- $valid = 0;
- $nb_errors++;
- print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n";
- }
- if (!@moz[4] && @moz[14]) {
- $valid = 0;
- $nb_warnings++;
- print $file_syntax_errors "warning: operator is integral but not largeop\n";
- }
-
- $_ = @moz[0];
- if ((m/^(.*)[iI]ntegral(.*)$/) && !@moz[14]) {
- $valid = 0;
- $nb_warnings++;
- print $file_syntax_errors "warning: operator contains the term \"integral\" in its comment, but is not integral\n";
- }
- if (!$valid) {
- print $file_syntax_errors $entry;
- print $file_syntax_errors "\n";
- }
- }
- # check that all forms have the same direction.
- @moz_keys = (keys %moz_hash);
- while ($key = pop(@moz_keys)) {
- if (@{ $moz_hash{$key} }) {
- # the operator has not been removed from the hash table yet.
- $_ = $key;
- (m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
- $key_prefix = "$1.prefix";
- $key_infix = "$1.infix";
- $key_postfix = "$1.postfix";
- @moz_prefix = @{ $moz_hash{$key_prefix} };
- @moz_infix = @{ $moz_hash{$key_infix} };
- @moz_postfix = @{ $moz_hash{$key_postfix} };
- $same_direction = 1;
- if (@moz_prefix) {
- if (@moz_infix &&
- !($moz_infix[13] eq $moz_prefix[13])) {
- $same_direction = 0;
- }
- if (@moz_postfix &&
- !($moz_postfix[13] eq $moz_prefix[13])) {
- $same_direction = 0;
- }
- }
- if (@moz_infix) {
- if (@moz_postfix &&
- !($moz_postfix[13] eq $moz_infix[13])) {
- $same_direction = 0;
- }
- }
- if (!$same_direction) {
- $nb_errors++;
- print $file_syntax_errors
- "error: operator has a stretchy form, but all forms";
- print $file_syntax_errors
- " have not the same direction\n";
- if (@moz_prefix) {
- $_ = &generateEntry($key_prefix, @moz_prefix);
- print $file_syntax_errors $_;
- }
- if (@moz_infix) {
- $_ = &generateEntry($key_infix, @moz_infix);
- print $file_syntax_errors $_;
- }
- if (@moz_postfix) {
- $_ = &generateEntry($key_postfix, @moz_postfix);
- print $file_syntax_errors $_;
- }
- print $file_syntax_errors "\n";
- }
-
- if (@moz_prefix) {
- delete $moz_hash{$key.prefix};
- }
- if (@moz_infix) {
- delete $moz_hash{$key_infix};
- }
- if (@moz_postfix) {
- delete $moz_hash{$key_postfix};
- }
- }
- }
- close($file_syntax_errors);
- print "\n";
- if ($nb_errors > 0 || $nb_warnings > 0) {
- print "$nb_errors error(s) found\n";
- print "$nb_warnings warning(s) found\n";
- print "See output file $FILE_SYNTAX_ERRORS.\n\n";
- } else {
- print "No error found.\n\n";
- }
- exit 0;
- }
- ################################################################################
- # 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY
- print "loading $WG_DICTIONARY...\n";
- my $parser = XML::LibXML->new();
- my $doc = $parser->parse_file($WG_DICTIONARY);
- print "building dictionary...\n";
- @wg_keys = ();
- foreach my $entry ($doc->findnodes('/root/entry')) {
- # 3.1) build the key
- $key = "operator.";
- $_ = $entry->getAttribute("unicode");
- $_ = "$_-";
- while (m/^U?0(\w*)-(.*)$/) {
- # Concatenate .\uNNNN
- $key = "$key\\u$1";
- $_ = $2;
- }
- $_ = $entry->getAttribute("form"); # "Form"
- $key = "$key.$_";
- # 3.2) build the array
- @value = ();
- $value[0] = lc($entry->getAttribute("description"));
- $value[1] = $entry->getAttribute("lspace");
- if ($value[1] eq "") { $value[1] = "5"; }
- $value[2] = $entry->getAttribute("rspace");
- if ($value[2] eq "") { $value[2] = "5"; }
- $value[3] = $entry->getAttribute("minsize");
- if ($value[3] eq "") { $value[3] = "1"; }
- $_ = $entry->getAttribute("properties");
- $value[4] = (m/^(.*)largeop(.*)$/);
- $value[5] = (m/^(.*)movablelimits(.*)$/);
- $value[6] = (m/^(.*)stretchy(.*)$/);
- $value[7] = (m/^(.*)separator(.*)$/);
- $value[8] = (m/^(.*)accent(.*)$/);
- $value[9] = (m/^(.*)fence(.*)$/);
- $value[10] = (m/^(.*)symmetric(.*)$/);
- $value[15] = (m/^(.*)mirrorable(.*)$/);
- $value[11] = $entry->getAttribute("priority");
- $value[12] = $entry->getAttribute("linebreakstyle");
- # not stored in the WG dictionary
- $value[13] = ""; # direction
- $value[14] = ""; # integral
- # 3.3) save the key and value
- push(@wg_keys, $key);
- $wg_hash{$key} = [ @value ];
- }
- @wg_keys = reverse(@wg_keys);
- ################################################################################
- # 4) Compare the two dictionaries and output the result
- print "comparing dictionaries...\n";
- open($file_differences, ">$FILE_DIFFERENCES") ||
- die ("Couldn't open $FILE_DIFFERENCES!");
- open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") ||
- die ("Couldn't open $FILE_NEW_DICTIONARY!");
- $conflicting = 0; $conflicting_stretching = 0;
- $new = 0; $new_stretching = 0;
- $obsolete = 0; $obsolete_stretching = 0;
- $unchanged = 0;
- # 4.1) look to the entries of the WG dictionary
- while ($key = pop(@wg_keys)) {
- @wg = @{ $wg_hash{$key} };
- delete $wg_hash{$key};
- $wg_value = &generateCommon(@wg);
- if (exists($moz_hash{$key})) {
- # entry is in both dictionary
- @moz = @{ $moz_hash{$key} };
- delete $moz_hash{$key};
- $moz_value = &generateCommon(@moz);
- if ($moz_value ne $wg_value) {
- # conflicting entry
- print $file_differences "[conflict]";
- $conflicting++;
- if ($moz[6] != $wg[6]) {
- print $file_differences "[stretching]";
- $conflicting_stretching++;
- }
- print $file_differences " - $key ($wg[0])\n";
- print $file_differences "-$moz_value\n+$wg_value\n\n";
- $_ = &completeCommon($wg_value, $key, @moz, @wg);
- print $file_new_dictionary $_;
- } else {
- # unchanged entry
- $unchanged++;
- $_ = &completeCommon($wg_value, $key, @moz, @wg);
- print $file_new_dictionary $_;
- }
- } else {
- # we don't have this entry in our dictionary yet
- print $file_differences "[new entry]";
- $new++;
- if ($wg[6]) {
- print $file_differences "[stretching]";
- $new_stretching++;
- }
- print $file_differences " - $key ($wg[0])\n";
- print $file_differences "-\n+$wg_value\n\n";
- $_ = &completeCommon($wg_value, $key, (), @wg);
- print $file_new_dictionary $_;
- }
- }
- print $file_new_dictionary
- "\n# Entries below are not part of the official MathML dictionary\n\n";
- # 4.2) look in our dictionary the remaining entries
- @moz_keys = (keys %moz_hash);
- @moz_keys = reverse(sort(@moz_keys));
- while ($key = pop(@moz_keys)) {
- @moz = @{ $moz_hash{$key} };
- $moz_value = &generateCommon(@moz);
- print $file_differences "[obsolete entry]";
- $obsolete++;
- if ($moz[6]) {
- print $file_differences "[stretching]";
- $obsolete_stretching++;
- }
- print $file_differences " - $key ($moz[0])\n";
- print $file_differences "-$moz_value\n+\n\n";
- $_ = &completeCommon($moz_value, $key, (), @moz);
- print $file_new_dictionary $_;
- }
- close($file_differences);
- close($file_new_dictionary);
- print "\n";
- print "- $obsolete obsolete entries ";
- print "($obsolete_stretching of them are related to stretching)\n";
- print "- $unchanged unchanged entries\n";
- print "- $conflicting conflicting entries ";
- print "($conflicting_stretching of them are related to stretching)\n";
- print "- $new new entries ";
- print "($new_stretching of them are related to stretching)\n";
- print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n";
- print "After having modified the dictionary, please run";
- print "./updateOperatorDictionary check\n\n";
- exit 0;
- ################################################################################
- sub usage {
- # display the accepted command syntax and quit
- print "usage:\n";
- print " ./updateOperatorDictionary.pl download [unicode.xml]\n";
- print " ./updateOperatorDictionary.pl compare [dictionary.xml]\n";
- print " ./updateOperatorDictionary.pl check\n";
- print " ./updateOperatorDictionary.pl make-js\n";
- print " ./updateOperatorDictionary.pl clean\n";
- exit 0;
- }
- sub generateCommon {
- # helper function to generate the string of data shared by both dictionaries
- my(@v) = @_;
- $entry = "lspace:$v[1] rspace:$v[2]";
- if ($v[3] ne "1") { $entry = "$entry minsize:$v[3]"; }
- if ($v[4]) { $entry = "$entry largeop"; }
- if ($v[5]) { $entry = "$entry movablelimits"; }
- if ($v[6]) { $entry = "$entry stretchy"; }
- if ($v[7]) { $entry = "$entry separator"; }
- if ($v[8]) { $entry = "$entry accent"; }
- if ($v[9]) { $entry = "$entry fence"; }
- if ($v[10]) { $entry = "$entry symmetric"; }
- if ($v[15]) { $entry = "$entry mirrorable"; }
- return $entry;
- }
- sub completeCommon {
- # helper to add key and private data to generateCommon
- my($entry, $key, @v_moz, @v_wg) = @_;
-
- $entry = "$key = $entry";
- if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; }
- if ($v_moz[14]) { $entry = "$entry integral"; }
- if ($v_moz[15]) { $entry = "$entry mirrorable"; }
- if ($v_moz[0]) {
- # keep our previous comment
- $entry = "$entry # $v_moz[0]";
- } else {
- # otherwise use the description given by the WG
- $entry = "$entry # $v_wg[0]";
- }
- $entry = "$entry\n";
- return $entry;
- }
- sub generateEntry {
- # helper function to generate an entry of our operator dictionary
- my($key, @moz) = @_;
- $entry = &generateCommon(@moz);
- $entry = &completeCommon($entry, $key, @moz, @moz);
- return $entry;
- }
|