PageRenderTime 57ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/lib-python/2.7/xml/etree/ElementPath.py

https://bitbucket.org/bwesterb/pypy
Python | 303 lines | 232 code | 12 blank | 59 comment | 31 complexity | 0d55c30659fc0b3e74180607863e54ad MD5 | raw file
  1. #
  2. # ElementTree
  3. # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
  4. #
  5. # limited xpath support for element trees
  6. #
  7. # history:
  8. # 2003-05-23 fl created
  9. # 2003-05-28 fl added support for // etc
  10. # 2003-08-27 fl fixed parsing of periods in element names
  11. # 2007-09-10 fl new selection engine
  12. # 2007-09-12 fl fixed parent selector
  13. # 2007-09-13 fl added iterfind; changed findall to return a list
  14. # 2007-11-30 fl added namespaces support
  15. # 2009-10-30 fl added child element value filter
  16. #
  17. # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
  18. #
  19. # fredrik@pythonware.com
  20. # http://www.pythonware.com
  21. #
  22. # --------------------------------------------------------------------
  23. # The ElementTree toolkit is
  24. #
  25. # Copyright (c) 1999-2009 by Fredrik Lundh
  26. #
  27. # By obtaining, using, and/or copying this software and/or its
  28. # associated documentation, you agree that you have read, understood,
  29. # and will comply with the following terms and conditions:
  30. #
  31. # Permission to use, copy, modify, and distribute this software and
  32. # its associated documentation for any purpose and without fee is
  33. # hereby granted, provided that the above copyright notice appears in
  34. # all copies, and that both that copyright notice and this permission
  35. # notice appear in supporting documentation, and that the name of
  36. # Secret Labs AB or the author not be used in advertising or publicity
  37. # pertaining to distribution of the software without specific, written
  38. # prior permission.
  39. #
  40. # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  41. # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  42. # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  43. # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  44. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  45. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  46. # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  47. # OF THIS SOFTWARE.
  48. # --------------------------------------------------------------------
  49. # Licensed to PSF under a Contributor Agreement.
  50. # See http://www.python.org/psf/license for licensing details.
  51. ##
  52. # Implementation module for XPath support. There's usually no reason
  53. # to import this module directly; the <b>ElementTree</b> does this for
  54. # you, if needed.
  55. ##
  56. import re
  57. xpath_tokenizer_re = re.compile(
  58. "("
  59. "'[^']*'|\"[^\"]*\"|"
  60. "::|"
  61. "//?|"
  62. "\.\.|"
  63. "\(\)|"
  64. "[/.*:\[\]\(\)@=])|"
  65. "((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
  66. "\s+"
  67. )
  68. def xpath_tokenizer(pattern, namespaces=None):
  69. for token in xpath_tokenizer_re.findall(pattern):
  70. tag = token[1]
  71. if tag and tag[0] != "{" and ":" in tag:
  72. try:
  73. prefix, uri = tag.split(":", 1)
  74. if not namespaces:
  75. raise KeyError
  76. yield token[0], "{%s}%s" % (namespaces[prefix], uri)
  77. except KeyError:
  78. raise SyntaxError("prefix %r not found in prefix map" % prefix)
  79. else:
  80. yield token
  81. def get_parent_map(context):
  82. parent_map = context.parent_map
  83. if parent_map is None:
  84. context.parent_map = parent_map = {}
  85. for p in context.root.iter():
  86. for e in p:
  87. parent_map[e] = p
  88. return parent_map
  89. def prepare_child(next, token):
  90. tag = token[1]
  91. def select(context, result):
  92. for elem in result:
  93. for e in elem:
  94. if e.tag == tag:
  95. yield e
  96. return select
  97. def prepare_star(next, token):
  98. def select(context, result):
  99. for elem in result:
  100. for e in elem:
  101. yield e
  102. return select
  103. def prepare_self(next, token):
  104. def select(context, result):
  105. for elem in result:
  106. yield elem
  107. return select
  108. def prepare_descendant(next, token):
  109. token = next()
  110. if token[0] == "*":
  111. tag = "*"
  112. elif not token[0]:
  113. tag = token[1]
  114. else:
  115. raise SyntaxError("invalid descendant")
  116. def select(context, result):
  117. for elem in result:
  118. for e in elem.iter(tag):
  119. if e is not elem:
  120. yield e
  121. return select
  122. def prepare_parent(next, token):
  123. def select(context, result):
  124. # FIXME: raise error if .. is applied at toplevel?
  125. parent_map = get_parent_map(context)
  126. result_map = {}
  127. for elem in result:
  128. if elem in parent_map:
  129. parent = parent_map[elem]
  130. if parent not in result_map:
  131. result_map[parent] = None
  132. yield parent
  133. return select
  134. def prepare_predicate(next, token):
  135. # FIXME: replace with real parser!!! refs:
  136. # http://effbot.org/zone/simple-iterator-parser.htm
  137. # http://javascript.crockford.com/tdop/tdop.html
  138. signature = []
  139. predicate = []
  140. while 1:
  141. token = next()
  142. if token[0] == "]":
  143. break
  144. if token[0] and token[0][:1] in "'\"":
  145. token = "'", token[0][1:-1]
  146. signature.append(token[0] or "-")
  147. predicate.append(token[1])
  148. signature = "".join(signature)
  149. # use signature to determine predicate type
  150. if signature == "@-":
  151. # [@attribute] predicate
  152. key = predicate[1]
  153. def select(context, result):
  154. for elem in result:
  155. if elem.get(key) is not None:
  156. yield elem
  157. return select
  158. if signature == "@-='":
  159. # [@attribute='value']
  160. key = predicate[1]
  161. value = predicate[-1]
  162. def select(context, result):
  163. for elem in result:
  164. if elem.get(key) == value:
  165. yield elem
  166. return select
  167. if signature == "-" and not re.match("\d+$", predicate[0]):
  168. # [tag]
  169. tag = predicate[0]
  170. def select(context, result):
  171. for elem in result:
  172. if elem.find(tag) is not None:
  173. yield elem
  174. return select
  175. if signature == "-='" and not re.match("\d+$", predicate[0]):
  176. # [tag='value']
  177. tag = predicate[0]
  178. value = predicate[-1]
  179. def select(context, result):
  180. for elem in result:
  181. for e in elem.findall(tag):
  182. if "".join(e.itertext()) == value:
  183. yield elem
  184. break
  185. return select
  186. if signature == "-" or signature == "-()" or signature == "-()-":
  187. # [index] or [last()] or [last()-index]
  188. if signature == "-":
  189. index = int(predicate[0]) - 1
  190. else:
  191. if predicate[0] != "last":
  192. raise SyntaxError("unsupported function")
  193. if signature == "-()-":
  194. try:
  195. index = int(predicate[2]) - 1
  196. except ValueError:
  197. raise SyntaxError("unsupported expression")
  198. else:
  199. index = -1
  200. def select(context, result):
  201. parent_map = get_parent_map(context)
  202. for elem in result:
  203. try:
  204. parent = parent_map[elem]
  205. # FIXME: what if the selector is "*" ?
  206. elems = list(parent.findall(elem.tag))
  207. if elems[index] is elem:
  208. yield elem
  209. except (IndexError, KeyError):
  210. pass
  211. return select
  212. raise SyntaxError("invalid predicate")
  213. ops = {
  214. "": prepare_child,
  215. "*": prepare_star,
  216. ".": prepare_self,
  217. "..": prepare_parent,
  218. "//": prepare_descendant,
  219. "[": prepare_predicate,
  220. }
  221. _cache = {}
  222. class _SelectorContext:
  223. parent_map = None
  224. def __init__(self, root):
  225. self.root = root
  226. # --------------------------------------------------------------------
  227. ##
  228. # Generate all matching objects.
  229. def iterfind(elem, path, namespaces=None):
  230. # compile selector pattern
  231. if path[-1:] == "/":
  232. path = path + "*" # implicit all (FIXME: keep this?)
  233. try:
  234. selector = _cache[path]
  235. except KeyError:
  236. if len(_cache) > 100:
  237. _cache.clear()
  238. if path[:1] == "/":
  239. raise SyntaxError("cannot use absolute path on element")
  240. next = iter(xpath_tokenizer(path, namespaces)).next
  241. token = next()
  242. selector = []
  243. while 1:
  244. try:
  245. selector.append(ops[token[0]](next, token))
  246. except StopIteration:
  247. raise SyntaxError("invalid path")
  248. try:
  249. token = next()
  250. if token[0] == "/":
  251. token = next()
  252. except StopIteration:
  253. break
  254. _cache[path] = selector
  255. # execute selector pattern
  256. result = [elem]
  257. context = _SelectorContext(elem)
  258. for select in selector:
  259. result = select(context, result)
  260. return result
  261. ##
  262. # Find first matching object.
  263. def find(elem, path, namespaces=None):
  264. try:
  265. return iterfind(elem, path, namespaces).next()
  266. except StopIteration:
  267. return None
  268. ##
  269. # Find all matching objects.
  270. def findall(elem, path, namespaces=None):
  271. return list(iterfind(elem, path, namespaces))
  272. ##
  273. # Find text for first matching object.
  274. def findtext(elem, path, default=None, namespaces=None):
  275. try:
  276. elem = iterfind(elem, path, namespaces).next()
  277. return elem.text or ""
  278. except StopIteration:
  279. return default