sparser.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206
  1. """The parser for S-expressions in Json"""
  2. from sly import Lexer, Parser
  3. import json
  4. """
  5. EBNF of the syntax that this parser reads:
  6. <symbol> :: = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" | "Y" | "Z" | "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" | "y" | "z" | "-" | "_"
  7. <NUMBER> ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
  8. <NAME> ::= <symbol> | <symbol> <NUMBER> | <NUMBER> <symbol> | <symbol> <symbol>
  9. <LPAREN> ::= "("
  10. <RPAREN> ::= ")"
  11. <Object> ::= <NUMBER> | <NAME>
  12. <List> ::= <Object> <List>
  13. <Main construction> ::= <LPAREN> <List> <RPAREN>
  14. """
  15. class Lex:
  16. """The class of the token. When the parser recognizes something, it writes it to the instance of this class.
  17. In data, it is always either NAME or NUMBER."""
  18. def __init__(self, data):
  19. self.data = data
  20. self.master = None
  21. self.slaves = []
  22. def __str__(self):
  23. return str(self.data)
  24. def get_serializable(self):
  25. """Recursively convert the token tree to dictionaries and lists
  26. so that Python can automatically convert the tree to json"""
  27. res = []
  28. for i in self.slaves:
  29. res.append(i.get_serializable())
  30. res.reverse()
  31. if len(res) > 0:
  32. d = dict()
  33. if len(res) == 1:
  34. res = res[0]
  35. else:
  36. """The else branch is needed here to remove unnecessary nesting of lists and dictionaries.
  37. Excessive nesting does not affect the correctness of the information, but it significantly worsens
  38. 'readability', so this is where the garbage is cleaned."""
  39. final = []
  40. main_dict = dict()
  41. for i in res:
  42. if isinstance(i, dict):
  43. for k, v in i.items():
  44. main_dict[k] = v
  45. elif isinstance(i, list):
  46. for j in i:
  47. final.append(j)
  48. else:
  49. final.append(i)
  50. if len(main_dict) > 0:
  51. final.append(main_dict)
  52. res = final
  53. d[str(self.data)] = res
  54. else:
  55. d = self.data
  56. return d
  57. class LexList:
  58. """When several tokens are placed side by side, they are combined into a list"""
  59. def __init__(self):
  60. self.list = []
  61. def __str__(self):
  62. return 'List of '+str(len(self.list))+' lexers'
  63. def get_serializable(self):
  64. """Converting each element of a list of tokens to dictionaries and Python lists"""
  65. res = []
  66. for i in self.list:
  67. res.append(i.get_serializable())
  68. if len(res) == 1:
  69. res = res[0]
  70. else:
  71. final = []
  72. main_dict = dict()
  73. for i in res:
  74. if isinstance(i, dict):
  75. for k, v in i.items():
  76. main_dict[k] = v
  77. elif isinstance(i, list):
  78. for j in i:
  79. final.append(j)
  80. else:
  81. final.append(i)
  82. if len(main_dict) > 0:
  83. final.append(main_dict)
  84. res = final
  85. return res
  86. class CalcLexer(Lexer):
  87. """Lexer. Splits the input string into tokens"""
  88. tokens = {NAME, NUMBER, LPAREN, RPAREN}
  89. ignore = ' \t'
  90. # Tokens
  91. NAME = r'("[a-zа-яА-ЯA-Z.0-9_\- \/\*]*"|[а-яА-Я-a-zA-Z_.]+[.а-яА-Я0-9-a-zA-Z_]*)' # r'[-a-zA-Z_]+[0-9-a-zA-Z_]*'
  92. NUMBER = r'\d+'
  93. # Special symbols
  94. LPAREN = r'\('
  95. RPAREN = r'\)'
  96. # Ignored pattern
  97. ignore_newline = r'\n+'
  98. ignore_comments = r'\/\*.*\*\/' # Ignore comments
  99. def error(self, t):
  100. self.index += 1
  101. class CalcParser(Parser):
  102. """The parser. Collects a tree of Lex and LexList instances from tokens"""
  103. tokens = CalcLexer.tokens
  104. precedence = (
  105. ('left', NAME),
  106. )
  107. def __init__(self):
  108. self.root = None
  109. self.errors = False
  110. self.is_comm = False
  111. def error(self, token):
  112. if not self.errors:
  113. print('Syntax error!!')
  114. self.errors = True
  115. @_('term')
  116. def expr(self, p):
  117. return p[0]
  118. @_('term expr')
  119. def expr(self, p):
  120. """Merge objects into one if they are separated by commas"""
  121. obj = LexList()
  122. if isinstance(p[1], LexList):
  123. for i in p[1].list:
  124. obj.list.append(i)
  125. else:
  126. obj.list.append(p[1])
  127. if isinstance(p[0], LexList):
  128. for i in p[0].list:
  129. obj.list.append(i)
  130. else:
  131. obj.list.append(p[0])
  132. return obj
  133. @_('NUMBER')
  134. def term(self, p):
  135. if not self.is_comm:
  136. obj = Lex(int(p.NUMBER))
  137. return obj
  138. @_('NAME')
  139. def term(self, p):
  140. if not self.is_comm:
  141. obj = Lex(str(p.NAME).replace('"', ''))
  142. return obj
  143. @_('LPAREN expr RPAREN')
  144. def term(self, p):
  145. """The main semantic construction.
  146. The first token inside the bracket corresponds to a set of objects (recursion is possible)"""
  147. if isinstance(p[1], Lex):
  148. return p[1]
  149. obj = p[1].list.pop()
  150. for i in p[1].list:
  151. obj.slaves.append(i)
  152. self.root = obj
  153. return obj
  154. if __name__ == '__main__':
  155. """The main function of the program.
  156. Reads the information from the file and translates the tree of paired objects first to serializable, and then to Json"""
  157. lexer = CalcLexer()
  158. parser = CalcParser()
  159. text = input('Enter file name: ')
  160. if text:
  161. try:
  162. with open(text, 'r') as content_file:
  163. content = content_file.read()
  164. except FileNotFoundError:
  165. print('File does not exist!')
  166. parser.parse(lexer.tokenize(content))
  167. if not parser.errors:
  168. root = parser.root
  169. serializable = root.get_serializable()
  170. print('Output JSON:')
  171. print(json.dumps(serializable, indent=1, ensure_ascii=False))
  172. else:
  173. print('No output JSON due to syntax error.')