sexpr.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113
  1. #!/usr/bin/env python
  2. # Copyright (c) 2014 Ingo Ruhnke <grumbel@gmail.com>
  3. #
  4. # This software is provided 'as-is', without any express or implied
  5. # warranty. In no event will the authors be held liable for any damages
  6. # arising from the use of this software.
  7. #
  8. # Permission is granted to anyone to use this software for any purpose,
  9. # including commercial applications, and to alter it and redistribute it
  10. # freely, subject to the following restrictions:
  11. #
  12. # 1. The origin of this software must not be misrepresented; you must not
  13. # claim that you wrote the original software. If you use this software
  14. # in a product, an acknowledgment in the product documentation would be
  15. # appreciated but is not required.
  16. # 2. Altered source versions must be plainly marked as such, and must not be
  17. # misrepresented as being the original software.
  18. # 3. This notice may not be removed or altered from any source distribution.
  19. import re
  20. import codecs
  21. def parse(text):
  22. stack = [[]]
  23. state = 'list'
  24. i = 0
  25. line = 1
  26. column = 0
  27. while i < len(text):
  28. c = text[i]
  29. if c == '\n':
  30. line += 1
  31. column = 0
  32. else:
  33. column += 1
  34. if state == 'list':
  35. if c == '(':
  36. stack.append([])
  37. elif c == ')':
  38. stack[-2].append(stack.pop())
  39. elif c == "\"":
  40. state = 'string'
  41. atom = ""
  42. elif c == ";":
  43. state = 'comment'
  44. elif c.isalpha():
  45. state = 'symbol'
  46. atom = c
  47. elif c.isdigit():
  48. state = 'number'
  49. atom = c
  50. elif c.isspace():
  51. pass
  52. else:
  53. raise Exception("%d:%d: error: unexpected character: '%s'" % (line, column, c))
  54. elif state == 'comment':
  55. if c == '\n':
  56. state = 'list'
  57. else:
  58. pass
  59. elif state == 'string':
  60. if c == "\\":
  61. i += 1
  62. atom += text[i]
  63. elif c == "\"":
  64. stack[-1].append(atom)
  65. state = 'list'
  66. else:
  67. atom += c
  68. elif state == 'number':
  69. if not c.isdigit() or c != ".":
  70. stack[-1].append(int(atom))
  71. state = 'list'
  72. i -= 1
  73. else:
  74. atom += c
  75. elif state == 'symbol':
  76. if c.isspace() or c == '(' or c == ')':
  77. stack[-1].append(atom)
  78. state = 'list'
  79. i -= 1
  80. else:
  81. atom += c
  82. # print c, stack
  83. i += 1
  84. if len(stack) == 1:
  85. return stack[0]
  86. else:
  87. raise Exception("error: list not closed")
  88. if __name__ == "__main__":
  89. print "parsing..."
  90. result = parse(r'(() ("bar" foo) ()) () bar ')
  91. print "1.", result
  92. print "2.", parse(""";;comment
  93. ("Hello World" 5 1 123) ("Hello" 123 123 "foobar") ;; comment""")
  94. print "3.", parse(r'(8(8)8)')
  95. print "4.", parse(r'')
  96. print "5.", parse(r' ')
  97. with codecs.open("white.stf", encoding='utf-8') as fin:
  98. print "6.", parse(fin.read())
  99. # EOF #