fix_unicode.py 2.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273
  1. #
  2. # Copyright (c) Contributors to the Open 3D Engine Project.
  3. # For complete copyright and license terms please see the LICENSE at the root of this distribution.
  4. #
  5. # SPDX-License-Identifier: Apache-2.0 OR MIT
  6. #
  7. #
  8. import argparse
  9. import fnmatch
  10. import os
  11. handled_file_patterns = [
  12. '*.c', '*.cc', '*.cpp', '*.cxx', '*.h', '*.hpp', '*.hxx', '*.inl', '*.m', '*.mm', '*.cs', '*.java',
  13. '*.py', '*.lua', '*.bat', '*.cmd', '*.sh', '*.js',
  14. '*.cmake', 'CMakeLists.txt'
  15. ]
  16. replacement_map = {
  17. 0xA0: ' ',
  18. 0xA6: '|',
  19. 0x2019: '\'',
  20. 0x2014: '-',
  21. 0x2191: '^',
  22. 0x2212: '-',
  23. 0x2217: '*',
  24. 0x2248: 'is close to',
  25. 0xFEFF: '',
  26. }
  27. def fixUnicode(input_file):
  28. try:
  29. basename = os.path.basename(input_file)
  30. for pattern in handled_file_patterns:
  31. if fnmatch.fnmatch(basename, pattern):
  32. with open(input_file, 'r', encoding='utf-8', errors='replace') as fh:
  33. fileContents = fh.read()
  34. modified = False
  35. for uni, repl in replacement_map.items():
  36. uni_str = chr(uni)
  37. if uni_str in fileContents:
  38. fileContents = fileContents.replace(uni_str, repl)
  39. modified = True
  40. if modified:
  41. with open(input_file, 'w') as destination_file:
  42. destination_file.writelines(fileContents)
  43. print(f'[INFO] Patched {input_file}')
  44. break
  45. except (IOError, UnicodeDecodeError) as err:
  46. print('[ERROR] reading {}: {}'.format(input_file, err))
  47. return
  48. def main():
  49. """script main function"""
  50. parser = argparse.ArgumentParser(description='This script replaces unicode characters, some of them are replaced for spaces (e.g. xA0), others are replaced with the escape sequence',
  51. formatter_class=argparse.RawTextHelpFormatter)
  52. parser.add_argument('file_or_dir', type=str, nargs='+',
  53. help='list of files or directories to search within for files to fix up unicode characters')
  54. args = parser.parse_args()
  55. for input_file in args.file_or_dir:
  56. if os.path.isdir(input_file):
  57. for dp, dn, filenames in os.walk(input_file):
  58. for f in filenames:
  59. fixUnicode(os.path.join(dp, f))
  60. else:
  61. fixUnicode(input_file)
  62. #entrypoint
  63. if __name__ == '__main__':
  64. main()