convert.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. import json
  2. def read(input_file_name):
  3. with open(input_file_name, mode="r") as opened_file:
  4. content = json.load(opened_file)
  5. return content
  6. def convert(json_content):
  7. vocabulary_metadata = json_content["metadata"]
  8. del vocabulary_metadata["learned_percentage"]
  9. del vocabulary_metadata["count"]
  10. vocabulary_metadata["language_id_to_name"] = {
  11. "english": "English",
  12. "english_phonetic_script": "IPA",
  13. "chinese_simplified": "Chinese (simplified)",
  14. "chinese_traditional": "Chinese (traditional)",
  15. "pinyin": "Pīnyīn",
  16. "pinyin_numbered": "Pīnyīn numbered"
  17. }
  18. words = []
  19. for ind in json_content["words"]:
  20. word = json_content["words"][ind]
  21. print(f"parsing word: {word}")
  22. words.append({
  23. "metadata": {
  24. "identifier": ind,
  25. "learned": word["metadata"]["learned"],
  26. "relevance_level": 5,
  27. "tags": []
  28. },
  29. "meanings": [
  30. {
  31. "translation": {
  32. "english": word["translation_data"]["english"],
  33. "english_phonetic_script": "(add IPA)",
  34. "pinyin": word["translation_data"]["pinyin"],
  35. "pinyin_numbered": word["translation_data"]["pinyin_numbered"],
  36. "chinese_simplified": word["translation_data"]["simplified"],
  37. "chinese_traditional": word["translation_data"]["traditional"]
  38. },
  39. "description": [],
  40. "examples": []
  41. }
  42. ]
  43. })
  44. pass
  45. updated_vocabulary = {
  46. "metadata": vocabulary_metadata,
  47. "words": words
  48. }
  49. return updated_vocabulary
  50. def write(content, output_file_name):
  51. with open(output_file_name, mode='w', encoding='utf8') as write_file:
  52. write_file.write(json.dumps(content, indent=4, ensure_ascii=False))
  53. if __name__ == "__main__":
  54. for ind in range(1,7):
  55. write(convert(read(f"hsk-{ind}.json")), f"hsk-{ind}-updated.json")