sexpr.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. #!/usr/bin/env python
  2. # Copyright (c) 2014 Ingo Ruhnke <grumbel@gmail.com>
  3. #
  4. # This software is provided 'as-is', without any express or implied
  5. # warranty. In no event will the authors be held liable for any damages
  6. # arising from the use of this software.
  7. #
  8. # Permission is granted to anyone to use this software for any purpose,
  9. # including commercial applications, and to alter it and redistribute it
  10. # freely, subject to the following restrictions:
  11. #
  12. # 1. The origin of this software must not be misrepresented; you must not
  13. # claim that you wrote the original software. If you use this software
  14. # in a product, an acknowledgment in the product documentation would be
  15. # appreciated but is not required.
  16. # 2. Altered source versions must be plainly marked as such, and must not be
  17. # misrepresented as being the original software.
  18. # 3. This notice may not be removed or altered from any source distribution.
  19. import codecs
  20. def parse(text):
  21. stack = [[]]
  22. state = 'list'
  23. i = 0
  24. line = 1
  25. column = 0
  26. while i < len(text):
  27. c = text[i]
  28. if c == '\n':
  29. line += 1
  30. column = 0
  31. else:
  32. column += 1
  33. if state == 'list':
  34. if c == '(':
  35. stack.append([])
  36. elif c == ')':
  37. stack[-2].append(stack.pop())
  38. elif c == "\"":
  39. state = 'string'
  40. atom = ""
  41. elif c == ";":
  42. state = 'comment'
  43. elif c.isalpha():
  44. state = 'symbol'
  45. atom = c
  46. elif c.isdigit():
  47. state = 'number'
  48. atom = c
  49. elif c.isspace():
  50. pass
  51. else:
  52. raise Exception("%d:%d: error: unexpected character: '%s'" % (line, column, c))
  53. elif state == 'comment':
  54. if c == '\n':
  55. state = 'list'
  56. else:
  57. pass
  58. elif state == 'string':
  59. if c == "\\":
  60. i += 1
  61. atom += text[i]
  62. elif c == "\"":
  63. stack[-1].append(atom)
  64. state = 'list'
  65. else:
  66. atom += c
  67. elif state == 'number':
  68. if not c.isdigit() or c != ".":
  69. stack[-1].append(int(atom))
  70. state = 'list'
  71. i -= 1
  72. else:
  73. atom += c
  74. elif state == 'symbol':
  75. if c.isspace() or c == '(' or c == ')':
  76. stack[-1].append(atom)
  77. state = 'list'
  78. i -= 1
  79. else:
  80. atom += c
  81. # print c, stack
  82. i += 1
  83. if len(stack) == 1:
  84. return stack[0]
  85. else:
  86. raise Exception("error: list not closed")
  87. if __name__ == "__main__":
  88. print "parsing..."
  89. result = parse(r'(() ("bar" foo) ()) () bar ')
  90. print "1.", result
  91. print "2.", parse(""";;comment
  92. ("Hello World" 5 1 123) ("Hello" 123 123 "foobar") ;; comment""")
  93. print "3.", parse(r'(8(8)8)')
  94. print "4.", parse(r'')
  95. print "5.", parse(r' ')
  96. with codecs.open("white.stf", encoding='utf-8') as fin:
  97. print "6.", parse(fin.read())
  98. # EOF #