123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- import argparse
- import glob
- import HTMLParser
- import logging
- import os
- import re
- import sys
- import urllib2
- # Import compare-locales parser from parent folder.
- script_path = os.path.dirname(os.path.realpath(__file__))
- compare_locales_path = os.path.join(script_path, '../../../compare-locales')
- sys.path.insert(0, compare_locales_path)
- from compare_locales import parser
- # Configure logging format and level
- logging.basicConfig(format=' [%(levelname)s] %(message)s', level=logging.INFO)
- # License header to use when creating new properties files.
- DEFAULT_HEADER = ('# This Source Code Form is subject to the terms of the '
- 'Mozilla Public\n# License, v. 2.0. If a copy of the MPL '
- 'was not distributed with this\n# file, You can obtain '
- 'one at http://mozilla.org/MPL/2.0/.\n')
- # Base url to retrieve properties files on central, that will be parsed for
- # localization notes.
- CENTRAL_BASE_URL = ('https://hg.mozilla.org/'
- 'mozilla-central/raw-file/tip/'
- 'devtools/client/locales/en-US/')
- # HTML parser to translate HTML entities in dtd files.
- HTML_PARSER = HTMLParser.HTMLParser()
- # Cache to store properties files retrieved over the network.
- central_prop_cache = {}
- # Cache the parsed entities from the existing DTD files.
- dtd_entities_cache = {}
- # Retrieve the content of the current version of a properties file for the
- # provided filename, from devtools/client on mozilla central. Will return an
- # empty array if the file can't be retrieved or read.
- def get_central_prop_content(prop_filename):
- if prop_filename in central_prop_cache:
- return central_prop_cache[prop_filename]
- url = CENTRAL_BASE_URL + prop_filename
- logging.info('loading localization file from central: {%s}' % url)
- try:
- central_prop_cache[prop_filename] = urllib2.urlopen(url).readlines()
- except:
- logging.error('failed to load properties file from central: {%s}'
- % url)
- central_prop_cache[prop_filename] = []
- return central_prop_cache[prop_filename]
- # Retrieve the current en-US localization notes for the provided prop_name.
- def get_localization_note(prop_name, prop_filename):
- prop_content = get_central_prop_content(prop_filename)
- comment_buffer = []
- for i, line in enumerate(prop_content):
- # Remove line breaks.
- line = line.strip('\n').strip('\r')
- if line.startswith('#'):
- # Comment line, add to the current comment buffer.
- comment_buffer.append(line)
- elif re.search('(^|\n)' + re.escape(prop_name) + '\s*=', line):
- # Property found, the current comment buffer is the localization
- # note.
- break;
- else:
- # No match, not a comment, reinitialize the comment buffer.
- comment_buffer = []
- return '\n'.join(comment_buffer)
- # Retrieve the parsed DTD entities for a provided path. Results are cached by
- # dtd path.
- def get_dtd_entities(dtd_path):
- if dtd_path in dtd_entities_cache:
- return dtd_entities_cache[dtd_path]
- dtd_parser = parser.getParser('.dtd')
- dtd_parser.readFile(dtd_path)
- dtd_entities_cache[dtd_path] = dtd_parser.parse()
- return dtd_entities_cache[dtd_path]
- # Extract the value of an entity in a dtd file.
- def get_translation_from_dtd(dtd_path, entity_name):
- entities, map = get_dtd_entities(dtd_path)
- if entity_name not in map:
- # Bail out if translation is missing.
- return
- key = map[entity_name]
- entity = entities[key]
- translation = HTML_PARSER.unescape(entity.val)
- return translation.encode('utf-8')
- # Extract the header and file wide comments for the provided properties file
- # filename.
- def get_properties_header(prop_filename):
- prop_content = get_central_prop_content(prop_filename)
- # if the file content is empty, return the default license header.
- if len(prop_content) == 0:
- return DEFAULT_HEADER
- header_buffer = []
- for i, line in enumerate(prop_content):
- # remove line breaks.
- line = line.strip('\n').strip('\r')
- # regexp matching keys extracted form parser.py.
- is_entity_line = re.search('^(\s*)'
- '((?:[#!].*?\n\s*)*)'
- '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', line)
- is_loc_note = re.search('^(\s*)'
- '\#\s*LOCALIZATION NOTE\s*\([^)]+\)', line)
- if is_entity_line or is_loc_note:
- # header finished, break the loop.
- break
- else:
- # header line, add to the current buffer.
- header_buffer.append(line)
- # concatenate the current buffer and return.
- return '\n'.join(header_buffer)
- # Create a new properties file at the provided path.
- def create_properties_file(prop_path):
- logging.info('creating new *.properties file: {%s}' % prop_path)
- prop_filename = os.path.basename(prop_path)
- header = get_properties_header(prop_filename)
- prop_file = open(prop_path, 'w+')
- prop_file.write(header)
- prop_file.close()
- # Migrate a single string entry for a dtd to a properties file.
- def migrate_string(dtd_path, prop_path, dtd_name, prop_name):
- if not os.path.isfile(dtd_path):
- logging.error('dtd file can not be found at: {%s}' % dtd_path)
- return
- translation = get_translation_from_dtd(dtd_path, dtd_name)
- if not translation:
- logging.error('translation could not be found for: {%s} in {%s}'
- % (dtd_name, dtd_path))
- return
- # Create properties file if missing.
- if not os.path.isfile(prop_path):
- create_properties_file(prop_path)
- if not os.path.isfile(prop_path):
- logging.error('could not create new properties file at: {%s}'
- % prop_path)
- return
- prop_line = prop_name + '=' + translation + '\n'
- # Skip the string if it already exists in the destination file.
- prop_file_content = open(prop_path, 'r').read()
- if prop_line in prop_file_content:
- logging.warning('string already migrated, skipping: {%s}' % prop_name)
- return
- # Skip the string and log an error if an existing entry is found, but with
- # a different value.
- if re.search('(^|\n)' + re.escape(prop_name) + '\s*=', prop_file_content):
- logging.error('existing string found, skipping: {%s}' % prop_name)
- return
- prop_filename = os.path.basename(prop_path)
- logging.info('migrating {%s} in {%s}' % (prop_name, prop_filename))
- with open(prop_path, 'a') as prop_file:
- localization_note = get_localization_note(prop_name, prop_filename)
- if len(localization_note):
- prop_file.write('\n' + localization_note)
- else:
- logging.warning('localization notes could not be found for: {%s}'
- % prop_name)
- prop_file.write('\n' + prop_line)
- # Apply the migration instructions in the provided configuration file.
- def migrate_conf(conf_path, l10n_path):
- f = open(conf_path, 'r')
- lines = f.readlines()
- f.close()
- for i, line in enumerate(lines):
- # Remove line breaks.
- line = line.strip('\n').strip('\r')
- # Skip invalid lines.
- if ' = ' not in line:
- continue
- # Expected syntax: ${prop_path}:${prop_name} = ${dtd_path}:${dtd_name}.
- prop_info, dtd_info = line.split(' = ')
- prop_path, prop_name = prop_info.split(':')
- dtd_path, dtd_name = dtd_info.split(':')
- dtd_path = os.path.join(l10n_path, dtd_path)
- prop_path = os.path.join(l10n_path, prop_path)
- migrate_string(dtd_path, prop_path, dtd_name, prop_name)
- def main():
- # Read command line arguments.
- arg_parser = argparse.ArgumentParser(
- description='Migrate devtools localized strings.')
- arg_parser.add_argument('path', type=str, help='path to l10n repository')
- arg_parser.add_argument('-c', '--config', type=str,
- help='path to configuration file or folder')
- args = arg_parser.parse_args()
- # Retrieve path to devtools localization files in l10n repository.
- devtools_l10n_path = os.path.join(args.path, 'devtools/client/')
- if not os.path.exists(devtools_l10n_path):
- logging.error('l10n path is invalid: {%s}' % devtools_l10n_path)
- exit()
- logging.info('l10n path is valid: {%s}' % devtools_l10n_path)
- # Retrieve configuration files to apply.
- if os.path.isdir(args.config):
- conf_files = glob.glob(args.config + '*')
- elif os.path.isfile(args.config):
- conf_files = [args.config]
- else:
- logging.error('config path is invalid: {%s}' % args.config)
- exit()
- # Perform migration for each configuration file.
- for conf_file in conf_files:
- logging.info('performing migration for config file: {%s}' % conf_file)
- migrate_conf(conf_file, devtools_l10n_path)
- if __name__ == '__main__':
- main()
|