main.py 9.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262
  1. import argparse
  2. import glob
  3. import HTMLParser
  4. import logging
  5. import os
  6. import re
  7. import sys
  8. import urllib2
  9. # Import compare-locales parser from parent folder.
  10. script_path = os.path.dirname(os.path.realpath(__file__))
  11. compare_locales_path = os.path.join(script_path, '../../../compare-locales')
  12. sys.path.insert(0, compare_locales_path)
  13. from compare_locales import parser
  14. # Configure logging format and level
  15. logging.basicConfig(format=' [%(levelname)s] %(message)s', level=logging.INFO)
  16. # License header to use when creating new properties files.
  17. DEFAULT_HEADER = ('# This Source Code Form is subject to the terms of the '
  18. 'Mozilla Public\n# License, v. 2.0. If a copy of the MPL '
  19. 'was not distributed with this\n# file, You can obtain '
  20. 'one at http://mozilla.org/MPL/2.0/.\n')
  21. # Base url to retrieve properties files on central, that will be parsed for
  22. # localization notes.
  23. CENTRAL_BASE_URL = ('https://hg.mozilla.org/'
  24. 'mozilla-central/raw-file/tip/'
  25. 'devtools/client/locales/en-US/')
  26. # HTML parser to translate HTML entities in dtd files.
  27. HTML_PARSER = HTMLParser.HTMLParser()
  28. # Cache to store properties files retrieved over the network.
  29. central_prop_cache = {}
  30. # Cache the parsed entities from the existing DTD files.
  31. dtd_entities_cache = {}
  32. # Retrieve the content of the current version of a properties file for the
  33. # provided filename, from devtools/client on mozilla central. Will return an
  34. # empty array if the file can't be retrieved or read.
  35. def get_central_prop_content(prop_filename):
  36. if prop_filename in central_prop_cache:
  37. return central_prop_cache[prop_filename]
  38. url = CENTRAL_BASE_URL + prop_filename
  39. logging.info('loading localization file from central: {%s}' % url)
  40. try:
  41. central_prop_cache[prop_filename] = urllib2.urlopen(url).readlines()
  42. except:
  43. logging.error('failed to load properties file from central: {%s}'
  44. % url)
  45. central_prop_cache[prop_filename] = []
  46. return central_prop_cache[prop_filename]
  47. # Retrieve the current en-US localization notes for the provided prop_name.
  48. def get_localization_note(prop_name, prop_filename):
  49. prop_content = get_central_prop_content(prop_filename)
  50. comment_buffer = []
  51. for i, line in enumerate(prop_content):
  52. # Remove line breaks.
  53. line = line.strip('\n').strip('\r')
  54. if line.startswith('#'):
  55. # Comment line, add to the current comment buffer.
  56. comment_buffer.append(line)
  57. elif re.search('(^|\n)' + re.escape(prop_name) + '\s*=', line):
  58. # Property found, the current comment buffer is the localization
  59. # note.
  60. break;
  61. else:
  62. # No match, not a comment, reinitialize the comment buffer.
  63. comment_buffer = []
  64. return '\n'.join(comment_buffer)
  65. # Retrieve the parsed DTD entities for a provided path. Results are cached by
  66. # dtd path.
  67. def get_dtd_entities(dtd_path):
  68. if dtd_path in dtd_entities_cache:
  69. return dtd_entities_cache[dtd_path]
  70. dtd_parser = parser.getParser('.dtd')
  71. dtd_parser.readFile(dtd_path)
  72. dtd_entities_cache[dtd_path] = dtd_parser.parse()
  73. return dtd_entities_cache[dtd_path]
  74. # Extract the value of an entity in a dtd file.
  75. def get_translation_from_dtd(dtd_path, entity_name):
  76. entities, map = get_dtd_entities(dtd_path)
  77. if entity_name not in map:
  78. # Bail out if translation is missing.
  79. return
  80. key = map[entity_name]
  81. entity = entities[key]
  82. translation = HTML_PARSER.unescape(entity.val)
  83. return translation.encode('utf-8')
  84. # Extract the header and file wide comments for the provided properties file
  85. # filename.
  86. def get_properties_header(prop_filename):
  87. prop_content = get_central_prop_content(prop_filename)
  88. # if the file content is empty, return the default license header.
  89. if len(prop_content) == 0:
  90. return DEFAULT_HEADER
  91. header_buffer = []
  92. for i, line in enumerate(prop_content):
  93. # remove line breaks.
  94. line = line.strip('\n').strip('\r')
  95. # regexp matching keys extracted form parser.py.
  96. is_entity_line = re.search('^(\s*)'
  97. '((?:[#!].*?\n\s*)*)'
  98. '([^#!\s\n][^=:\n]*?)\s*[:=][ \t]*', line)
  99. is_loc_note = re.search('^(\s*)'
  100. '\#\s*LOCALIZATION NOTE\s*\([^)]+\)', line)
  101. if is_entity_line or is_loc_note:
  102. # header finished, break the loop.
  103. break
  104. else:
  105. # header line, add to the current buffer.
  106. header_buffer.append(line)
  107. # concatenate the current buffer and return.
  108. return '\n'.join(header_buffer)
  109. # Create a new properties file at the provided path.
  110. def create_properties_file(prop_path):
  111. logging.info('creating new *.properties file: {%s}' % prop_path)
  112. prop_filename = os.path.basename(prop_path)
  113. header = get_properties_header(prop_filename)
  114. prop_file = open(prop_path, 'w+')
  115. prop_file.write(header)
  116. prop_file.close()
  117. # Migrate a single string entry for a dtd to a properties file.
  118. def migrate_string(dtd_path, prop_path, dtd_name, prop_name):
  119. if not os.path.isfile(dtd_path):
  120. logging.error('dtd file can not be found at: {%s}' % dtd_path)
  121. return
  122. translation = get_translation_from_dtd(dtd_path, dtd_name)
  123. if not translation:
  124. logging.error('translation could not be found for: {%s} in {%s}'
  125. % (dtd_name, dtd_path))
  126. return
  127. # Create properties file if missing.
  128. if not os.path.isfile(prop_path):
  129. create_properties_file(prop_path)
  130. if not os.path.isfile(prop_path):
  131. logging.error('could not create new properties file at: {%s}'
  132. % prop_path)
  133. return
  134. prop_line = prop_name + '=' + translation + '\n'
  135. # Skip the string if it already exists in the destination file.
  136. prop_file_content = open(prop_path, 'r').read()
  137. if prop_line in prop_file_content:
  138. logging.warning('string already migrated, skipping: {%s}' % prop_name)
  139. return
  140. # Skip the string and log an error if an existing entry is found, but with
  141. # a different value.
  142. if re.search('(^|\n)' + re.escape(prop_name) + '\s*=', prop_file_content):
  143. logging.error('existing string found, skipping: {%s}' % prop_name)
  144. return
  145. prop_filename = os.path.basename(prop_path)
  146. logging.info('migrating {%s} in {%s}' % (prop_name, prop_filename))
  147. with open(prop_path, 'a') as prop_file:
  148. localization_note = get_localization_note(prop_name, prop_filename)
  149. if len(localization_note):
  150. prop_file.write('\n' + localization_note)
  151. else:
  152. logging.warning('localization notes could not be found for: {%s}'
  153. % prop_name)
  154. prop_file.write('\n' + prop_line)
  155. # Apply the migration instructions in the provided configuration file.
  156. def migrate_conf(conf_path, l10n_path):
  157. f = open(conf_path, 'r')
  158. lines = f.readlines()
  159. f.close()
  160. for i, line in enumerate(lines):
  161. # Remove line breaks.
  162. line = line.strip('\n').strip('\r')
  163. # Skip invalid lines.
  164. if ' = ' not in line:
  165. continue
  166. # Expected syntax: ${prop_path}:${prop_name} = ${dtd_path}:${dtd_name}.
  167. prop_info, dtd_info = line.split(' = ')
  168. prop_path, prop_name = prop_info.split(':')
  169. dtd_path, dtd_name = dtd_info.split(':')
  170. dtd_path = os.path.join(l10n_path, dtd_path)
  171. prop_path = os.path.join(l10n_path, prop_path)
  172. migrate_string(dtd_path, prop_path, dtd_name, prop_name)
  173. def main():
  174. # Read command line arguments.
  175. arg_parser = argparse.ArgumentParser(
  176. description='Migrate devtools localized strings.')
  177. arg_parser.add_argument('path', type=str, help='path to l10n repository')
  178. arg_parser.add_argument('-c', '--config', type=str,
  179. help='path to configuration file or folder')
  180. args = arg_parser.parse_args()
  181. # Retrieve path to devtools localization files in l10n repository.
  182. devtools_l10n_path = os.path.join(args.path, 'devtools/client/')
  183. if not os.path.exists(devtools_l10n_path):
  184. logging.error('l10n path is invalid: {%s}' % devtools_l10n_path)
  185. exit()
  186. logging.info('l10n path is valid: {%s}' % devtools_l10n_path)
  187. # Retrieve configuration files to apply.
  188. if os.path.isdir(args.config):
  189. conf_files = glob.glob(args.config + '*')
  190. elif os.path.isfile(args.config):
  191. conf_files = [args.config]
  192. else:
  193. logging.error('config path is invalid: {%s}' % args.config)
  194. exit()
  195. # Perform migration for each configuration file.
  196. for conf_file in conf_files:
  197. logging.info('performing migration for config file: {%s}' % conf_file)
  198. migrate_conf(conf_file, devtools_l10n_path)
  199. if __name__ == '__main__':
  200. main()