PageRenderTime 59ms CodeModel.GetById 32ms RepoModel.GetById 0ms app.codeStats 0ms

/examples/graph/parser.py

https://github.com/tailhook/pyzza
Python | 355 lines | 312 code | 26 blank | 17 comment | 53 complexity | 492e4db2e285542795be2b730885e496 MD5 | raw file
  1. from string import repr
  2. from graph import Graph, Digraph
  3. class Token:
  4. __slots__ = []
  5. def __init__(self): pass
  6. class TokenEnd(Token):
  7. __slots__ = []
  8. def __init__(self): pass
  9. class TokenName(Token):
  10. __slots__ = ['value']
  11. def __init__(self, value): self.value = value
  12. class TokenString(Token):
  13. __slots__ = ['value']
  14. def __init__(self, value): self.value = value
  15. class TokenSpace(Token):
  16. __slots__ = []
  17. def __init__(self): pass
  18. class TokenComment(Token):
  19. __slots__ = []
  20. def __init__(self): pass
  21. class TokenLbrace(Token):
  22. __slots__ = []
  23. def __init__(self): pass
  24. class TokenRbrace(Token):
  25. __slots__ = []
  26. def __init__(self): pass
  27. class TokenLbracket(Token):
  28. __slots__ = []
  29. def __init__(self): pass
  30. class TokenRbracket(Token):
  31. __slots__ = []
  32. def __init__(self): pass
  33. class TokenEq(Token):
  34. __slots__ = []
  35. def __init__(self): pass
  36. class TokenSemicolon(Token):
  37. __slots__ = []
  38. def __init__(self): pass
  39. class TokenComma(Token):
  40. __slots__ = []
  41. def __init__(self): pass
  42. class TokenEdge(Token):
  43. __slots__ = []
  44. def __init__(self): pass
  45. class TokenDiedge(Token):
  46. __slots__ = []
  47. def __init__(self): pass
  48. token_chars = {
  49. '{': TokenLbrace,
  50. '}': TokenRbrace,
  51. '[': TokenLbracket,
  52. ']': TokenRbracket,
  53. '=': TokenEq,
  54. ';': TokenSemicolon,
  55. ',': TokenComma,
  56. '-': TokenEdge,
  57. '"': TokenString,
  58. '#': TokenComment,
  59. ' ': TokenSpace,
  60. '\r': TokenSpace,
  61. '\n': TokenSpace,
  62. ' ': TokenSpace,
  63. }
  64. alnum = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_0123456789'
  65. for i in range(alnum.length):
  66. token_chars[alnum.charAt(i)] = TokenName
  67. escapes = {
  68. 'n': '\n',
  69. 'N': '', # probably means nothing
  70. 'r': '\r',
  71. '\\': '\\',
  72. '"': '"',
  73. '\n': '',
  74. '\r': '',
  75. }
  76. @package('graph')
  77. class SyntaxError(Error):
  78. __slots__ = []
  79. def __init__(self, message):
  80. super().__init__(message)
  81. class Tokenizer:
  82. def __init__(self, string):
  83. self.data = string
  84. self.index = 0
  85. self.line = 1
  86. self.line_start = 0
  87. self.token = None
  88. def peek(self):
  89. if self.token == None:
  90. self.token = self._next()
  91. return self.token
  92. def next(self):
  93. if self.token != None:
  94. res = self.token
  95. self.token = None
  96. return res
  97. return self._next()
  98. def _next(self):
  99. data = self.data
  100. if self.index >= data.length:
  101. return TokenEnd()
  102. ch = data.charAt(self.index)
  103. self.index += 1
  104. tt = token_chars[ch]
  105. if tt == TokenSpace:
  106. while self.index < data.length:
  107. if ch == '\n':
  108. self.line += 1
  109. self.line_start = self.index
  110. ch = data.charAt(self.index)
  111. if token_chars[ch] != TokenSpace:
  112. break
  113. self.index += 1
  114. else:
  115. return TokenEnd()
  116. return self.next() # skipping name
  117. elif tt == TokenName:
  118. start = self.index - 1
  119. while self.index < data.length:
  120. if token_chars[data.charAt(self.index)] != TokenName:
  121. break
  122. self.index += 1
  123. end = self.index
  124. return TokenName(data.substring(start, end))
  125. elif tt == TokenString:
  126. res = ""
  127. while self.index < data.length:
  128. ch1 = data.charAt(self.index)
  129. self.index += 1
  130. if ch1 == '\\':
  131. if self.index >= data.length:
  132. self.syntax_error()
  133. res += escapes[data.charAt(self.index)]
  134. self.index += 1
  135. elif ch1 == ch:
  136. return TokenString(res)
  137. elif ch1 == '\r' or ch1 == '\n':
  138. raise self.syntax_error()
  139. else:
  140. res += ch1
  141. else:
  142. self.syntax_error()
  143. elif tt == TokenComment:
  144. while self.index < data.length:
  145. ch = data.charAt(self.index)
  146. if ch == '\n' or ch == '\r':
  147. break
  148. self.index += 1
  149. else:
  150. return TokenEnd()
  151. return self.next() # skipping name
  152. elif tt == TokenEdge:
  153. if self.index >= data.length:
  154. self.syntax_error("Unexpected EOF")
  155. tok = None
  156. ch = data.charAt(self.index)
  157. if ch == '-':
  158. tok = TokenEdge()
  159. elif ch == '>':
  160. tok = TokenDiedge()
  161. else:
  162. self.syntax_error()
  163. self.index += 1
  164. return tok
  165. else:
  166. if tt:
  167. return (Class(tt))()
  168. else:
  169. self.syntax_error("Wrong char {0!r} (0x{1:02x})"
  170. .format(ch, float(ch.charCodeAt(0))))
  171. def syntax_error(self, message="SyntaxError"):
  172. # TODO: show line number and position
  173. raise SyntaxError(message + ' at line {0:d} char {1:d}'
  174. .format(self.line, self.index - self.line_start))
  175. @package('graph')
  176. class Parser:
  177. ############
  178. # Grammar:
  179. # graph: type NAME '{' graphbody '}'
  180. # type: 'digraph' | 'graph'
  181. # graphbody: entity*
  182. # entity: prototype | node | edge | subgraph | anonsubgraph;
  183. # prototype: 'graph' params ';' | 'node' params ';' | 'edge' params ';'
  184. # node: NAME params? ';'
  185. # edge: NAME '--' NAME params? ';'
  186. # subgraph: 'subgraph' NAME '{' graphbody '}'
  187. # anonsubgraph: '{' graphbody '}'
  188. ############
  189. def __init__(self):
  190. pass
  191. def parse(self, data):
  192. self.tokenizer = Tokenizer(data.replace('\r\n', '\n'))
  193. return self.parse_graph()
  194. # parser state functions
  195. def parse_graph(self):
  196. tok = self.get_token(TokenName)
  197. name = self.get_token(TokenName)
  198. if tok.value == 'graph':
  199. res = Graph(name.value)
  200. elif tok.value == 'digraph':
  201. res = Digraph(name.value)
  202. else:
  203. self.syntax_error('Wrong graph declaration')
  204. lbrace = self.get_token(TokenLbrace)
  205. self.parse_graphbody(res)
  206. rbrace = self.get_token(TokenRbrace)
  207. self.get_token(TokenEnd)
  208. return res
  209. def parse_graphbody(self, graph):
  210. tok = self.tokenizer.peek()
  211. while not isinstance(tok,TokenRbrace):
  212. if isinstance(tok, TokenName) or isinstance(tok, TokenString):
  213. if tok.value == 'graph':
  214. self.parse_graphparams(graph)
  215. elif tok.value == 'node':
  216. self.parse_nodedefaults(graph)
  217. elif tok.value == 'edge':
  218. self.parse_edgedefaults(graph)
  219. elif tok.value == 'subgraph':
  220. self.parse_subgraph(graph)
  221. else:
  222. # node or edge
  223. name = self.tokenizer.next()
  224. nex = self.tokenizer.peek()
  225. if isinstance(nex, TokenEdge) or isinstance(nex,TokenDiedge):
  226. self.parse_edge(graph, name)
  227. else:
  228. self.parse_node(graph, name)
  229. elif isinstance(tok, TokenLbrace):
  230. self.parse_anonsub(graph)
  231. else:
  232. self.syntax_error(tok, [TokenName, TokenString,
  233. TokenLbrace, TokenRbrace])
  234. tok = self.tokenizer.peek()
  235. def parse_graphparams(self, graph):
  236. tok = self.get_token(TokenName)
  237. val = self.parse_params()
  238. self.get_token(TokenSemicolon)
  239. graph.update_properties(val)
  240. def parse_nodedefaults(self, graph):
  241. tok = self.get_token(TokenName)
  242. val = self.parse_params()
  243. self.get_token(TokenSemicolon)
  244. graph.update_node_defaults(val)
  245. def parse_edgedefaults(self, graph):
  246. tok = self.get_token(TokenName)
  247. val = self.parse_params()
  248. self.get_token(TokenSemicolon)
  249. graph.update_edge_defaults(val)
  250. def parse_subgraph(self, graph):
  251. tok = self.get_token(TokenName)
  252. name = self.get_token(TokenName)
  253. sub = graph.add_subgraph(name.value)
  254. self.get_token(TokenLbrace)
  255. self.parse_graphbody(sub)
  256. self.get_token(TokenRbrace)
  257. def parse_anonsub(self, graph):
  258. sub = graph.add_anonsub()
  259. self.get_token(TokenLbrace)
  260. self.parse_graphbody(sub)
  261. self.get_token(TokenRbrace)
  262. def parse_edge(self, graph, start):
  263. tok = self.tokenizer.next()
  264. if isinstance(tok, TokenEdge):
  265. if graph.directed:
  266. self.syntax_error(tok, [TokenDiedge])
  267. elif isinstance(tok, TokenDiedge):
  268. if not graph.directed:
  269. self.syntax_error(tok, [TokenEdge])
  270. else:
  271. self.syntax_error()
  272. end = self.get_string()
  273. if isinstance(self.tokenizer.peek(), TokenLbracket):
  274. val = self.parse_params()
  275. else:
  276. val = {}
  277. self.get_token(TokenSemicolon)
  278. graph.add_edge(start.value, end.value, val)
  279. def parse_node(self, graph, name):
  280. if isinstance(self.tokenizer.peek(), TokenLbracket):
  281. val = self.parse_params()
  282. else:
  283. val = {}
  284. self.get_token(TokenSemicolon)
  285. graph.add_node(name.value, val)
  286. def parse_params(self):
  287. self.get_token(TokenLbracket)
  288. tok = self.tokenizer.next()
  289. if isinstance(tok, TokenRbracket):
  290. return {}
  291. name = tok
  292. eq = self.get_token(TokenEq)
  293. value = self.get_string()
  294. res = {str(name.value): value.value}
  295. while True:
  296. tok = self.tokenizer.next()
  297. if isinstance(tok, TokenRbracket):
  298. break
  299. elif isinstance(tok, TokenComma):
  300. pass
  301. else:
  302. self.syntax_error(tok, [TokenRbracket, TokenComma])
  303. name = self.get_token(TokenName)
  304. eq = self.get_token(TokenEq)
  305. value = self.get_string()
  306. res[str(name.value)] = value.value
  307. return res
  308. # Utility functions
  309. def get_token(self, type):
  310. tok = self.tokenizer.next()
  311. if not isinstance(tok, type):
  312. self.syntax_error(tok, [type])
  313. return tok
  314. def get_string(self):
  315. tok = self.tokenizer.next()
  316. if isinstance(tok, TokenString) or isinstance(tok, TokenName):
  317. return tok
  318. self.syntax_error(tok, [TokenString, TokenComma])
  319. def syntax_error(self, has, expected):
  320. # TODO: show line number, position and expected tokens
  321. raise SyntaxError('Unexpected token {0!r}, need one of {1} '
  322. 'at line {2:d} char {3:d}'.format(has, expected,
  323. self.tokenizer.line, self.tokenizer.index -\
  324. self.tokenizer.line_start))