/lang/python/utils/parserll.py

https://github.com/tfviv79/junk · Python · 272 lines · 237 code · 28 blank · 7 comment · 37 complexity · ac7b1a88b90a90c609c6766af12a1ff0 MD5 · raw file

  1. #!/usr/bin/env python
  2. ## encoding: utf-8
  3. import collections
  4. Result = collections.namedtuple("Result", ["flg", "ok", "err"])
  5. Result_ok = lambda x: Result(True, x, None)
  6. Result_err = lambda x, *args: Result(False, None, x%args)
  7. def is_iter(l):
  8. return isinstance(l, collections.Iterable) and not isinstance(l, str)
  9. def flatten(l, nested=-1):
  10. ret = []
  11. for e in l:
  12. if nested != 0 and is_iter(e):
  13. ret.extend(flatten(e, nested - 1))
  14. else:
  15. ret.append(e)
  16. return ret
  17. class InputSource(object):
  18. def __init__(self, s, pos=0):
  19. self.s = s
  20. self.pos = pos
  21. def read(self):
  22. if self.pos >= len(self.s):
  23. return Result_err("EOF")
  24. return Result_ok(self.s[self.pos])
  25. def next(self):
  26. return InputSource(self.s, self.pos + 1)
  27. def _checker_is_digit(ch):
  28. return '0' <= ch <= '9'
  29. def _checker_is_alpha_lower(ch):
  30. return 'a' <= ch <= 'z'
  31. def _checker_is_alpha_upper(ch):
  32. return 'A' <= ch <= 'Z'
  33. def _checker_is_alpha(ch):
  34. return _checker_is_alpha_lower(ch) or _checker_is_alpha_upper(ch)
  35. def _checker_is_alphanum(ch):
  36. return _checker_is_alpha(ch) or _checker_is_digit(ch)
  37. def _checker_is_space(ch):
  38. return ch == ' ' or ch == '\t'
  39. def _checker_is_char(ch):
  40. def _checker_is_char_inner(c):
  41. return ch == c
  42. return _checker_is_char_inner
  43. def _checker_in(container):
  44. def _checker_in_inner(c):
  45. return c in container
  46. return _checker_in_inner
  47. class Parser(object):
  48. def __init__(self, proc):
  49. self.proc = proc
  50. def parse(self, s):
  51. rr = self.proc(s)
  52. s, ret = rr
  53. return s, ret
  54. ## combinator functions
  55. def n(self, p):
  56. def proc(s):
  57. s, ret = self.parse(s)
  58. if not ret.flg:
  59. return s, ret
  60. s, ret2 = p.parse(s)
  61. if not ret2.flg:
  62. return s, ret2
  63. return s, Result_ok(ret2.ok)
  64. return Parser(proc)
  65. def p(self, p):
  66. def proc(s):
  67. s, ret = self.parse(s)
  68. if not ret.flg:
  69. return s, ret
  70. s, ret2 = p.parse(s)
  71. if not ret2.flg:
  72. return s, ret2
  73. return s, Result_ok(ret.ok)
  74. return Parser(proc)
  75. def maybe(self, p):
  76. def proc(s):
  77. acc = []
  78. s, ret = self.parse(s)
  79. if not ret.flg:
  80. return s, ret
  81. acc.append(ret.ok)
  82. copy_s = s
  83. s, ret = p.parse(s)
  84. if not ret.flg:
  85. s = copy_s
  86. else:
  87. acc.append(ret.ok)
  88. return s, Result_ok(acc)
  89. return Parser(proc)
  90. def many(self, p):
  91. def proc(s):
  92. acc = []
  93. s, ret = self.parse(s)
  94. if not ret.flg:
  95. return s, ret
  96. acc.append(ret.ok)
  97. while True:
  98. copy_s = s
  99. s, ret = p.parse(s)
  100. if not ret.flg:
  101. s = copy_s
  102. break
  103. acc.append(ret.ok)
  104. return s, Result_ok(acc)
  105. return Parser(proc)
  106. def many1(self, p):
  107. return self.c(p).many(p)
  108. def c(self, p):
  109. def parse_continue(s):
  110. ret_a = []
  111. s, ret = self.proc(s)
  112. if not ret.flg:
  113. return s, ret
  114. ret_a.append(ret.ok)
  115. s, ret = p.proc(s)
  116. if not ret.flg:
  117. return s, ret
  118. ret_a.append(ret.ok)
  119. return s, Result_ok(ret_a)
  120. return Parser(parse_continue)
  121. def o(self, p):
  122. def parse_selection(s):
  123. copy_s = s
  124. ret = self.proc(s)
  125. if ret.flg:
  126. return s, ret
  127. s = copy_s
  128. s, ret = p.proc(s)
  129. if ret.flg:
  130. return s, ret
  131. return s, Result_err("not matched")
  132. return Parser(parse_selection)
  133. ## transrator
  134. def trans(self, proc):
  135. def trans_func(s):
  136. s, ret = self.proc(s)
  137. if not ret.flg:
  138. return s, ret
  139. new_result = proc(ret.ok)
  140. return s, Result_ok(new_result)
  141. return Parser(trans_func)
  142. def to_s(self):
  143. def trans_to_s(ok):
  144. return "".join([str(r) for r in ok])
  145. return self.trans(trans_to_s)
  146. def flat(self, nested=-1):
  147. def trans_flat(ok):
  148. return flatten(ok, nested)
  149. return self.trans(trans_flat)
  150. ##base parser parts
  151. def checkparser(checker, name=None):
  152. if name is None:
  153. name = checker.__name__
  154. def check(s):
  155. c = s.read()
  156. if not c.flg:
  157. return s, c
  158. if checker(c.ok):
  159. ns = s.next()
  160. return ns, Result_ok(c.ok)
  161. return s, Result_err("%s is not %s", c.ok, name)
  162. return Parser(check)
  163. def wrapper(parser):
  164. def wrapped_parser(s):
  165. print("#### wraped %s"%(s.read(),))
  166. return parser().parse(s)
  167. return Parser(wrapped_parser)
  168. def many(p):
  169. def proc(s):
  170. acc = []
  171. while True:
  172. copy_s = s
  173. s, ret = p.parse(s)
  174. if not ret.flg:
  175. s = copy_s
  176. break
  177. acc.append(ret.ok)
  178. return s, Result_ok(acc)
  179. return Parser(proc)
  180. def many1(p):
  181. return p.many(p)
  182. def o(*parsers):
  183. def proc(s):
  184. for parser in parsers:
  185. copy_s = s
  186. s, ret = parser.parse(s)
  187. if ret.flg:
  188. return s, ret
  189. s = copy_s
  190. return copy_s, Result_err("not matched")
  191. return Parser(proc)
  192. def c(*parsers):
  193. def proc(s):
  194. acc = []
  195. for parser in parsers:
  196. s, ret = parser.parse(s)
  197. if not ret.flg:
  198. return s, ret
  199. acc.append(ret.ok)
  200. return s, Result_ok(acc)
  201. return Parser(proc)
  202. def maybe(p):
  203. def proc(s):
  204. copy_s = s
  205. s, ret = p.parse(s)
  206. if not ret.flg:
  207. s = copy_s
  208. return s, Result_ok("")
  209. return s, ret
  210. return Parser(proc)
  211. ##base parser
  212. def char1(ch):
  213. return checkparser(lambda x: x == ch, ch)
  214. one_digit = checkparser(_checker_is_digit, "digit")
  215. one_alpha = checkparser(_checker_is_alpha, "alpha")
  216. one_alphanum = checkparser(_checker_is_alphanum, "alphanum")
  217. space = checkparser(_checker_is_space, "space")
  218. spaces = many(space)
  219. digit = many1(one_digit).to_s()
  220. sign = o(char1("-"), char1("+"))
  221. intp = maybe(sign).c(digit).to_s()
  222. floatp = intp.maybe(char1(".").c(intp)).to_s()
  223. identity = o(char1("_"), one_alpha).many(o(char1("_"), one_alphanum))
  224. ##helper function
  225. def parse(parser, s):
  226. return parser.parse(InputSource(s))[1]
  227. if __name__ == "__main__":
  228. def testparse(parser, s):
  229. print("INPUT:", s)
  230. ret = parse(parser, s)
  231. print("RESULT:", ret)
  232. import unittest
  233. def _exp():
  234. global exp
  235. print("###exp", exp)
  236. return exp
  237. mul = o(char1("*"), char1("/"))
  238. term = o(intp, wrapper(_exp))
  239. add_op = term.many(spaces.n(mul).p(spaces).c(term))
  240. exp = add_op.many(spaces.n(sign).p(spaces).c(add_op))
  241. testparse(exp, "-1234567890 + -3")
  242. import sys
  243. if len(sys.argv) > 1:
  244. testparse(exp, sys.argv[1])