PageRenderTime 78ms CodeModel.GetById 38ms RepoModel.GetById 0ms app.codeStats 0ms

/dominic/xpath/parser.py

http://github.com/gabrielfalcao/dominic
Python | 420 lines | 374 code | 46 blank | 0 comment | 57 complexity | 967da3d37fabcb7c0e943b8b08e39136 MD5 | raw file
Possible License(s): JSON
  1. import xpath.expr as X
  2. from xpath.yappsrt import *
  3. from string import *
  4. import re
  5. class XPathScanner(Scanner):
  6. patterns = [
  7. ("r'\\:'", re.compile('\\:')),
  8. ("r'node'", re.compile('node')),
  9. ("r'text'", re.compile('text')),
  10. ("r'comment'", re.compile('comment')),
  11. ("r'processing-instruction'", re.compile('processing-instruction')),
  12. ("r'\\,'", re.compile('\\,')),
  13. ("r'\\.'", re.compile('\\.')),
  14. ("r'\\$'", re.compile('\\$')),
  15. ("r'\\)'", re.compile('\\)')),
  16. ("r'\\('", re.compile('\\(')),
  17. ("r'\\]'", re.compile('\\]')),
  18. ("r'\\['", re.compile('\\[')),
  19. ("r'\\*'", re.compile('\\*')),
  20. ("r':'", re.compile(':')),
  21. ("r'\\.\\.'", re.compile('\\.\\.')),
  22. ("r'@'", re.compile('@')),
  23. ("r'::'", re.compile('::')),
  24. ("r'\\/\\/'", re.compile('\\/\\/')),
  25. ("r'\\/'", re.compile('\\/')),
  26. ("r'\\-'", re.compile('\\-')),
  27. ("'\\|'", re.compile('\\|')),
  28. ("r'and'", re.compile('and')),
  29. ("r'or'", re.compile('or')),
  30. ('\\s+', re.compile('\\s+')),
  31. ('END', re.compile('$')),
  32. ('FORWARD_AXIS_NAME', re.compile('child|descendant-or-self|attribute|self|descendant|following-sibling|following|namespace')),
  33. ('REVERSE_AXIS_NAME', re.compile('parent|preceding-sibling|preceding|ancestor-or-self|ancestor')),
  34. ('NCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*(?!\\()')),
  35. ('FUNCNAME', re.compile('[a-zA-Z_][a-zA-Z0-9_\\-\\.\\w]*')),
  36. ('DQUOTE', re.compile('\\"(?:[^\\"])*\\"')),
  37. ('SQUOTE', re.compile("\\'(?:[^\\'])*\\'")),
  38. ('NUMBER', re.compile('((\\.[0-9]+)|([0-9]+(\\.[0-9]*)?))([eE][\\+\\-]?[0-9]+)?')),
  39. ('EQ_COMP', re.compile('\\!?\\=')),
  40. ('REL_COMP', re.compile('[\\<\\>]\\=?')),
  41. ('ADD_COMP', re.compile('[\\+\\-]')),
  42. ('MUL_COMP', re.compile('\\*|div|mod')),
  43. ]
  44. def __init__(self, str):
  45. Scanner.__init__(self,None,['\\s+'],str)
  46. class XPath(Parser):
  47. def XPath(self):
  48. Expr = self.Expr()
  49. END = self._scan('END')
  50. return Expr
  51. def Expr(self):
  52. OrExpr = self.OrExpr()
  53. return OrExpr
  54. def OrExpr(self):
  55. AndExpr = self.AndExpr()
  56. Expr = AndExpr
  57. while self._peek("r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'or'":
  58. self._scan("r'or'")
  59. AndExpr = self.AndExpr()
  60. Expr = X.OrExpr('or', Expr, AndExpr)
  61. return Expr
  62. def AndExpr(self):
  63. EqualityExpr = self.EqualityExpr()
  64. Expr = EqualityExpr
  65. while self._peek("r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'and'":
  66. self._scan("r'and'")
  67. EqualityExpr = self.EqualityExpr()
  68. Expr = X.AndExpr('and', Expr, EqualityExpr)
  69. return Expr
  70. def EqualityExpr(self):
  71. RelationalExpr = self.RelationalExpr()
  72. Expr = RelationalExpr
  73. while self._peek('EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'EQ_COMP':
  74. EQ_COMP = self._scan('EQ_COMP')
  75. RelationalExpr = self.RelationalExpr()
  76. Expr = X.EqualityExpr(EQ_COMP, Expr, RelationalExpr)
  77. return Expr
  78. def RelationalExpr(self):
  79. AdditiveExpr = self.AdditiveExpr()
  80. Expr = AdditiveExpr
  81. while self._peek('REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'REL_COMP':
  82. REL_COMP = self._scan('REL_COMP')
  83. AdditiveExpr = self.AdditiveExpr()
  84. Expr = X.EqualityExpr(REL_COMP, Expr, AdditiveExpr)
  85. return Expr
  86. def AdditiveExpr(self):
  87. MultiplicativeExpr = self.MultiplicativeExpr()
  88. Expr = MultiplicativeExpr
  89. while self._peek('ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'ADD_COMP':
  90. ADD_COMP = self._scan('ADD_COMP')
  91. MultiplicativeExpr = self.MultiplicativeExpr()
  92. Expr = X.ArithmeticalExpr(ADD_COMP, Expr, MultiplicativeExpr)
  93. return Expr
  94. def MultiplicativeExpr(self):
  95. UnionExpr = self.UnionExpr()
  96. Expr = UnionExpr
  97. while self._peek('MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == 'MUL_COMP':
  98. MUL_COMP = self._scan('MUL_COMP')
  99. UnionExpr = self.UnionExpr()
  100. Expr = X.ArithmeticalExpr(MUL_COMP, Expr, UnionExpr)
  101. return Expr
  102. def UnionExpr(self):
  103. UnaryExpr = self.UnaryExpr()
  104. Expr = UnaryExpr
  105. while self._peek("'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "'\\|'":
  106. self._scan("'\\|'")
  107. UnaryExpr = self.UnaryExpr()
  108. Expr = X.UnionExpr('|', Expr, UnaryExpr)
  109. return Expr
  110. def UnaryExpr(self):
  111. _token_ = self._peek("r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
  112. if _token_ == "r'\\-'":
  113. self._scan("r'\\-'")
  114. ValueExpr = self.ValueExpr()
  115. return X.NegationExpr(ValueExpr)
  116. else:
  117. ValueExpr = self.ValueExpr()
  118. return ValueExpr
  119. def ValueExpr(self):
  120. PathExpr = self.PathExpr()
  121. return PathExpr
  122. def PathExpr(self):
  123. _token_ = self._peek("r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
  124. if _token_ == "r'\\/'":
  125. self._scan("r'\\/'")
  126. path = None
  127. if self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME', "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") not in ["'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'"]:
  128. RelativePathExpr = self.RelativePathExpr()
  129. path = RelativePathExpr
  130. return X.AbsolutePathExpr(path)
  131. elif _token_ == "r'\\/\\/'":
  132. self._scan("r'\\/\\/'")
  133. RelativePathExpr = self.RelativePathExpr()
  134. step = X.AxisStep('descendant-or-self')
  135. RelativePathExpr.steps.insert(0, step)
  136. return X.AbsolutePathExpr(RelativePathExpr)
  137. else:
  138. RelativePathExpr = self.RelativePathExpr()
  139. return RelativePathExpr
  140. def RelativePathExpr(self):
  141. StepExpr = self.StepExpr()
  142. steps = [StepExpr]
  143. while self._peek("r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") in ["r'\\/'", "r'\\/\\/'"]:
  144. _token_ = self._peek("r'\\/'", "r'\\/\\/'")
  145. if _token_ == "r'\\/'":
  146. self._scan("r'\\/'")
  147. else:# == "r'\\/\\/'"
  148. self._scan("r'\\/\\/'")
  149. steps.append(X.AxisStep('descendant-or-self'))
  150. StepExpr = self.StepExpr()
  151. steps.append(StepExpr)
  152. return X.PathExpr(steps)
  153. def StepExpr(self):
  154. _token_ = self._peek("r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
  155. if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE']:
  156. AxisStep = self.AxisStep()
  157. return AxisStep
  158. else:
  159. FilterExpr = self.FilterExpr()
  160. return FilterExpr
  161. def AxisStep(self):
  162. _token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
  163. if _token_ not in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]:
  164. ForwardStep = self.ForwardStep()
  165. step = ForwardStep
  166. else:# in ['REVERSE_AXIS_NAME', "r'\\.\\.'"]
  167. ReverseStep = self.ReverseStep()
  168. step = ReverseStep
  169. expr = X.AxisStep(*step)
  170. if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
  171. PredicateList = self.PredicateList()
  172. expr = X.PredicateList(expr, PredicateList, step[0])
  173. return expr
  174. def ForwardStep(self):
  175. _token_ = self._peek('FORWARD_AXIS_NAME', "r'@'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
  176. if _token_ == 'FORWARD_AXIS_NAME':
  177. ForwardAxis = self.ForwardAxis()
  178. NodeTest = self.NodeTest()
  179. return [ForwardAxis, NodeTest]
  180. else:
  181. AbbrevForwardStep = self.AbbrevForwardStep()
  182. return AbbrevForwardStep
  183. def ForwardAxis(self):
  184. FORWARD_AXIS_NAME = self._scan('FORWARD_AXIS_NAME')
  185. self._scan("r'::'")
  186. return FORWARD_AXIS_NAME
  187. def AbbrevForwardStep(self):
  188. axis = 'child'
  189. if self._peek("r'@'", "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME') == "r'@'":
  190. self._scan("r'@'")
  191. axis = 'attribute'
  192. NodeTest = self.NodeTest()
  193. return [axis, NodeTest]
  194. def ReverseStep(self):
  195. _token_ = self._peek('REVERSE_AXIS_NAME', "r'\\.\\.'")
  196. if _token_ == 'REVERSE_AXIS_NAME':
  197. ReverseAxis = self.ReverseAxis()
  198. NodeTest = self.NodeTest()
  199. return [ReverseAxis, NodeTest]
  200. else:# == "r'\\.\\.'"
  201. AbbrevReverseStep = self.AbbrevReverseStep()
  202. return AbbrevReverseStep
  203. def ReverseAxis(self):
  204. REVERSE_AXIS_NAME = self._scan('REVERSE_AXIS_NAME')
  205. self._scan("r'::'")
  206. return REVERSE_AXIS_NAME
  207. def AbbrevReverseStep(self):
  208. self._scan("r'\\.\\.'")
  209. return ['parent', None]
  210. def NodeTest(self):
  211. _token_ = self._peek("r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME')
  212. if _token_ not in ["r'\\*'", 'NCNAME']:
  213. KindTest = self.KindTest()
  214. return KindTest
  215. else:# in ["r'\\*'", 'NCNAME']
  216. NameTest = self.NameTest()
  217. return NameTest
  218. def NameTest(self):
  219. prefix = None
  220. WildcardOrNCName = self.WildcardOrNCName()
  221. localpart = WildcardOrNCName
  222. if self._peek("r':'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r':'":
  223. self._scan("r':'")
  224. WildcardOrNCName = self.WildcardOrNCName()
  225. prefix = localpart
  226. localpart = WildcardOrNCName
  227. return X.NameTest(prefix, localpart)
  228. def WildcardOrNCName(self):
  229. _token_ = self._peek("r'\\*'", 'NCNAME')
  230. if _token_ == "r'\\*'":
  231. self._scan("r'\\*'")
  232. return '*'
  233. else:# == 'NCNAME'
  234. NCNAME = self._scan('NCNAME')
  235. return NCNAME
  236. def FilterExpr(self):
  237. PrimaryExpr = self.PrimaryExpr()
  238. if self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
  239. PredicateList = self.PredicateList()
  240. PrimaryExpr = X.PredicateList(PrimaryExpr,PredicateList)
  241. return PrimaryExpr
  242. def PredicateList(self):
  243. Predicate = self.Predicate()
  244. predicates = [Predicate]
  245. while self._peek("r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\['":
  246. Predicate = self.Predicate()
  247. predicates.append(Predicate)
  248. return predicates
  249. def Predicate(self):
  250. self._scan("r'\\['")
  251. Expr = self.Expr()
  252. self._scan("r'\\]'")
  253. return Expr
  254. def PrimaryExpr(self):
  255. _token_ = self._peek("r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE')
  256. if _token_ not in ["r'\\('", "r'\\$'", "r'\\.'", 'FUNCNAME']:
  257. Literal = self.Literal()
  258. return X.LiteralExpr(Literal)
  259. elif _token_ == "r'\\$'":
  260. VariableReference = self.VariableReference()
  261. return VariableReference
  262. elif _token_ == "r'\\('":
  263. self._scan("r'\\('")
  264. Expr = self.Expr()
  265. self._scan("r'\\)'")
  266. return Expr
  267. elif _token_ == "r'\\.'":
  268. ContextItemExpr = self.ContextItemExpr()
  269. return ContextItemExpr
  270. else:# == 'FUNCNAME'
  271. FunctionCall = self.FunctionCall()
  272. return FunctionCall
  273. def VariableReference(self):
  274. self._scan("r'\\$'")
  275. QName = self.QName()
  276. return X.VariableReference(*QName)
  277. def ContextItemExpr(self):
  278. self._scan("r'\\.'")
  279. return X.AxisStep('self')
  280. def FunctionCall(self):
  281. FUNCNAME = self._scan('FUNCNAME')
  282. self._scan("r'\\('")
  283. args = []
  284. if self._peek("r'\\,'", "r'\\)'", "r'\\-'", "r'\\/'", "r'\\/\\/'", "r'\\('", 'FORWARD_AXIS_NAME', "r'@'", 'REVERSE_AXIS_NAME', "r'\\.\\.'", "r'\\$'", "r'\\.'", 'FUNCNAME', 'NUMBER', 'DQUOTE', 'SQUOTE', "r'processing-instruction'", "r'comment'", "r'text'", "r'node'", "r'\\*'", 'NCNAME') not in ["r'\\,'", "r'\\)'"]:
  285. Expr = self.Expr()
  286. args.append(Expr)
  287. while self._peek("r'\\,'", "r'\\)'") == "r'\\,'":
  288. self._scan("r'\\,'")
  289. Expr = self.Expr()
  290. args.append(Expr)
  291. self._scan("r'\\)'")
  292. return X.Function(FUNCNAME, args)
  293. def KindTest(self):
  294. _token_ = self._peek("r'processing-instruction'", "r'comment'", "r'text'", "r'node'")
  295. if _token_ == "r'processing-instruction'":
  296. PITest = self.PITest()
  297. return PITest
  298. elif _token_ == "r'comment'":
  299. CommentTest = self.CommentTest()
  300. return CommentTest
  301. elif _token_ == "r'text'":
  302. TextTest = self.TextTest()
  303. return TextTest
  304. else:# == "r'node'"
  305. AnyKindTest = self.AnyKindTest()
  306. return AnyKindTest
  307. def PITest(self):
  308. self._scan("r'processing-instruction'")
  309. name = None
  310. self._scan("r'\\('")
  311. if self._peek('NCNAME', "r'\\)'", 'DQUOTE', 'SQUOTE') != "r'\\)'":
  312. _token_ = self._peek('NCNAME', 'DQUOTE', 'SQUOTE')
  313. if _token_ == 'NCNAME':
  314. NCNAME = self._scan('NCNAME')
  315. name = NCNAME
  316. else:# in ['DQUOTE', 'SQUOTE']
  317. StringLiteral = self.StringLiteral()
  318. name = StringLiteral
  319. self._scan("r'\\)'")
  320. return X.PITest(name)
  321. def CommentTest(self):
  322. self._scan("r'comment'")
  323. self._scan("r'\\('")
  324. self._scan("r'\\)'")
  325. return X.CommentTest()
  326. def TextTest(self):
  327. self._scan("r'text'")
  328. self._scan("r'\\('")
  329. self._scan("r'\\)'")
  330. return X.TextTest()
  331. def AnyKindTest(self):
  332. self._scan("r'node'")
  333. self._scan("r'\\('")
  334. self._scan("r'\\)'")
  335. return X.AnyKindTest()
  336. def Literal(self):
  337. _token_ = self._peek('NUMBER', 'DQUOTE', 'SQUOTE')
  338. if _token_ == 'NUMBER':
  339. NumericLiteral = self.NumericLiteral()
  340. return NumericLiteral
  341. else:# in ['DQUOTE', 'SQUOTE']
  342. StringLiteral = self.StringLiteral()
  343. return StringLiteral
  344. def NumericLiteral(self):
  345. NUMBER = self._scan('NUMBER')
  346. return float(NUMBER)
  347. def StringLiteral(self):
  348. _token_ = self._peek('DQUOTE', 'SQUOTE')
  349. if _token_ == 'DQUOTE':
  350. DQUOTE = self._scan('DQUOTE')
  351. return DQUOTE[1:-1]
  352. else:# == 'SQUOTE'
  353. SQUOTE = self._scan('SQUOTE')
  354. return SQUOTE[1:-1]
  355. def QName(self):
  356. NCNAME = self._scan('NCNAME')
  357. name = NCNAME
  358. if self._peek("r'\\:'", "r'\\['", "r'\\/'", "r'\\/\\/'", "'\\|'", 'MUL_COMP', 'ADD_COMP', 'REL_COMP', 'EQ_COMP', "r'and'", "r'or'", 'END', "r'\\]'", "r'\\)'", "r'\\,'") == "r'\\:'":
  359. self._scan("r'\\:'")
  360. NCNAME = self._scan('NCNAME')
  361. return (name, NCNAME)
  362. return (None, name)
  363. def parse(rule, text):
  364. P = XPath(XPathScanner(text))
  365. return wrap_error_reporter(P, rule)
  366. if __name__ == '__main__':
  367. from sys import argv, stdin
  368. if len(argv) >= 2:
  369. if len(argv) >= 3:
  370. f = open(argv[2],'r')
  371. else:
  372. f = stdin
  373. print parse(argv[1], f.read())
  374. else: print 'Args: <rule> [<filename>]'