/Lib/xml/etree/ElementPath.py

http://unladen-swallow.googlecode.com/ · Python · 198 lines · 105 code · 24 blank · 69 comment · 46 complexity · 7954605cb3782c7c2f94674092e28588 MD5 · raw file

  1. #
  2. # ElementTree
  3. # $Id: ElementPath.py 1858 2004-06-17 21:31:41Z Fredrik $
  4. #
  5. # limited xpath support for element trees
  6. #
  7. # history:
  8. # 2003-05-23 fl created
  9. # 2003-05-28 fl added support for // etc
  10. # 2003-08-27 fl fixed parsing of periods in element names
  11. #
  12. # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
  13. #
  14. # fredrik@pythonware.com
  15. # http://www.pythonware.com
  16. #
  17. # --------------------------------------------------------------------
  18. # The ElementTree toolkit is
  19. #
  20. # Copyright (c) 1999-2004 by Fredrik Lundh
  21. #
  22. # By obtaining, using, and/or copying this software and/or its
  23. # associated documentation, you agree that you have read, understood,
  24. # and will comply with the following terms and conditions:
  25. #
  26. # Permission to use, copy, modify, and distribute this software and
  27. # its associated documentation for any purpose and without fee is
  28. # hereby granted, provided that the above copyright notice appears in
  29. # all copies, and that both that copyright notice and this permission
  30. # notice appear in supporting documentation, and that the name of
  31. # Secret Labs AB or the author not be used in advertising or publicity
  32. # pertaining to distribution of the software without specific, written
  33. # prior permission.
  34. #
  35. # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
  36. # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
  37. # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
  38. # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
  39. # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
  40. # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  41. # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
  42. # OF THIS SOFTWARE.
  43. # --------------------------------------------------------------------
  44. # Licensed to PSF under a Contributor Agreement.
  45. # See http://www.python.org/2.4/license for licensing details.
  46. ##
  47. # Implementation module for XPath support. There's usually no reason
  48. # to import this module directly; the <b>ElementTree</b> does this for
  49. # you, if needed.
  50. ##
  51. import re
  52. xpath_tokenizer = re.compile(
  53. "(::|\.\.|\(\)|[/.*:\[\]\(\)@=])|((?:\{[^}]+\})?[^/:\[\]\(\)@=\s]+)|\s+"
  54. ).findall
  55. class xpath_descendant_or_self:
  56. pass
  57. ##
  58. # Wrapper for a compiled XPath.
  59. class Path:
  60. ##
  61. # Create an Path instance from an XPath expression.
  62. def __init__(self, path):
  63. tokens = xpath_tokenizer(path)
  64. # the current version supports 'path/path'-style expressions only
  65. self.path = []
  66. self.tag = None
  67. if tokens and tokens[0][0] == "/":
  68. raise SyntaxError("cannot use absolute path on element")
  69. while tokens:
  70. op, tag = tokens.pop(0)
  71. if tag or op == "*":
  72. self.path.append(tag or op)
  73. elif op == ".":
  74. pass
  75. elif op == "/":
  76. self.path.append(xpath_descendant_or_self())
  77. continue
  78. else:
  79. raise SyntaxError("unsupported path syntax (%s)" % op)
  80. if tokens:
  81. op, tag = tokens.pop(0)
  82. if op != "/":
  83. raise SyntaxError(
  84. "expected path separator (%s)" % (op or tag)
  85. )
  86. if self.path and isinstance(self.path[-1], xpath_descendant_or_self):
  87. raise SyntaxError("path cannot end with //")
  88. if len(self.path) == 1 and isinstance(self.path[0], type("")):
  89. self.tag = self.path[0]
  90. ##
  91. # Find first matching object.
  92. def find(self, element):
  93. tag = self.tag
  94. if tag is None:
  95. nodeset = self.findall(element)
  96. if not nodeset:
  97. return None
  98. return nodeset[0]
  99. for elem in element:
  100. if elem.tag == tag:
  101. return elem
  102. return None
  103. ##
  104. # Find text for first matching object.
  105. def findtext(self, element, default=None):
  106. tag = self.tag
  107. if tag is None:
  108. nodeset = self.findall(element)
  109. if not nodeset:
  110. return default
  111. return nodeset[0].text or ""
  112. for elem in element:
  113. if elem.tag == tag:
  114. return elem.text or ""
  115. return default
  116. ##
  117. # Find all matching objects.
  118. def findall(self, element):
  119. nodeset = [element]
  120. index = 0
  121. while 1:
  122. try:
  123. path = self.path[index]
  124. index = index + 1
  125. except IndexError:
  126. return nodeset
  127. set = []
  128. if isinstance(path, xpath_descendant_or_self):
  129. try:
  130. tag = self.path[index]
  131. if not isinstance(tag, type("")):
  132. tag = None
  133. else:
  134. index = index + 1
  135. except IndexError:
  136. tag = None # invalid path
  137. for node in nodeset:
  138. new = list(node.getiterator(tag))
  139. if new and new[0] is node:
  140. set.extend(new[1:])
  141. else:
  142. set.extend(new)
  143. else:
  144. for node in nodeset:
  145. for node in node:
  146. if path == "*" or node.tag == path:
  147. set.append(node)
  148. if not set:
  149. return []
  150. nodeset = set
  151. _cache = {}
  152. ##
  153. # (Internal) Compile path.
  154. def _compile(path):
  155. p = _cache.get(path)
  156. if p is not None:
  157. return p
  158. p = Path(path)
  159. if len(_cache) >= 100:
  160. _cache.clear()
  161. _cache[path] = p
  162. return p
  163. ##
  164. # Find first matching object.
  165. def find(element, path):
  166. return _compile(path).find(element)
  167. ##
  168. # Find text for first matching object.
  169. def findtext(element, path, default=None):
  170. return _compile(path).findtext(element, default)
  171. ##
  172. # Find all matching objects.
  173. def findall(element, path):
  174. return _compile(path).findall(element)