PageRenderTime 45ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 1ms

/win32/ppython/App/Lib/site-packages/pip-1.4.1-py2.7.egg/pip/vendor/html5lib/treebuilders/etree.py

https://gitlab.com/minoca/tools
Python | 337 lines | 302 code | 31 blank | 4 comment | 33 complexity | 9a85049f285ae37de3d93c1561f81640 MD5 | raw file
  1. from __future__ import absolute_import, division, unicode_literals
  2. from pip.vendor.six import text_type
  3. import re
  4. from . import _base
  5. from .. import ihatexml
  6. from .. import constants
  7. from ..constants import namespaces
  8. from ..utils import moduleFactoryFactory
  9. tag_regexp = re.compile("{([^}]*)}(.*)")
  10. def getETreeBuilder(ElementTreeImplementation, fullTree=False):
  11. ElementTree = ElementTreeImplementation
  12. ElementTreeCommentType = ElementTree.Comment("asd").tag
  13. class Element(_base.Node):
  14. def __init__(self, name, namespace=None):
  15. self._name = name
  16. self._namespace = namespace
  17. self._element = ElementTree.Element(self._getETreeTag(name,
  18. namespace))
  19. if namespace is None:
  20. self.nameTuple = namespaces["html"], self._name
  21. else:
  22. self.nameTuple = self._namespace, self._name
  23. self.parent = None
  24. self._childNodes = []
  25. self._flags = []
  26. def _getETreeTag(self, name, namespace):
  27. if namespace is None:
  28. etree_tag = name
  29. else:
  30. etree_tag = "{%s}%s" % (namespace, name)
  31. return etree_tag
  32. def _setName(self, name):
  33. self._name = name
  34. self._element.tag = self._getETreeTag(self._name, self._namespace)
  35. def _getName(self):
  36. return self._name
  37. name = property(_getName, _setName)
  38. def _setNamespace(self, namespace):
  39. self._namespace = namespace
  40. self._element.tag = self._getETreeTag(self._name, self._namespace)
  41. def _getNamespace(self):
  42. return self._namespace
  43. namespace = property(_getNamespace, _setNamespace)
  44. def _getAttributes(self):
  45. return self._element.attrib
  46. def _setAttributes(self, attributes):
  47. # Delete existing attributes first
  48. # XXX - there may be a better way to do this...
  49. for key in list(self._element.attrib.keys()):
  50. del self._element.attrib[key]
  51. for key, value in attributes.items():
  52. if isinstance(key, tuple):
  53. name = "{%s}%s" % (key[2], key[1])
  54. else:
  55. name = key
  56. self._element.set(name, value)
  57. attributes = property(_getAttributes, _setAttributes)
  58. def _getChildNodes(self):
  59. return self._childNodes
  60. def _setChildNodes(self, value):
  61. del self._element[:]
  62. self._childNodes = []
  63. for element in value:
  64. self.insertChild(element)
  65. childNodes = property(_getChildNodes, _setChildNodes)
  66. def hasContent(self):
  67. """Return true if the node has children or text"""
  68. return bool(self._element.text or len(self._element))
  69. def appendChild(self, node):
  70. self._childNodes.append(node)
  71. self._element.append(node._element)
  72. node.parent = self
  73. def insertBefore(self, node, refNode):
  74. index = list(self._element).index(refNode._element)
  75. self._element.insert(index, node._element)
  76. node.parent = self
  77. def removeChild(self, node):
  78. self._element.remove(node._element)
  79. node.parent = None
  80. def insertText(self, data, insertBefore=None):
  81. if not(len(self._element)):
  82. if not self._element.text:
  83. self._element.text = ""
  84. self._element.text += data
  85. elif insertBefore is None:
  86. # Insert the text as the tail of the last child element
  87. if not self._element[-1].tail:
  88. self._element[-1].tail = ""
  89. self._element[-1].tail += data
  90. else:
  91. # Insert the text before the specified node
  92. children = list(self._element)
  93. index = children.index(insertBefore._element)
  94. if index > 0:
  95. if not self._element[index - 1].tail:
  96. self._element[index - 1].tail = ""
  97. self._element[index - 1].tail += data
  98. else:
  99. if not self._element.text:
  100. self._element.text = ""
  101. self._element.text += data
  102. def cloneNode(self):
  103. element = type(self)(self.name, self.namespace)
  104. for name, value in self.attributes.items():
  105. element.attributes[name] = value
  106. return element
  107. def reparentChildren(self, newParent):
  108. if newParent.childNodes:
  109. newParent.childNodes[-1]._element.tail += self._element.text
  110. else:
  111. if not newParent._element.text:
  112. newParent._element.text = ""
  113. if self._element.text is not None:
  114. newParent._element.text += self._element.text
  115. self._element.text = ""
  116. _base.Node.reparentChildren(self, newParent)
  117. class Comment(Element):
  118. def __init__(self, data):
  119. # Use the superclass constructor to set all properties on the
  120. # wrapper element
  121. self._element = ElementTree.Comment(data)
  122. self.parent = None
  123. self._childNodes = []
  124. self._flags = []
  125. def _getData(self):
  126. return self._element.text
  127. def _setData(self, value):
  128. self._element.text = value
  129. data = property(_getData, _setData)
  130. class DocumentType(Element):
  131. def __init__(self, name, publicId, systemId):
  132. Element.__init__(self, "<!DOCTYPE>")
  133. self._element.text = name
  134. self.publicId = publicId
  135. self.systemId = systemId
  136. def _getPublicId(self):
  137. return self._element.get("publicId", "")
  138. def _setPublicId(self, value):
  139. if value is not None:
  140. self._element.set("publicId", value)
  141. publicId = property(_getPublicId, _setPublicId)
  142. def _getSystemId(self):
  143. return self._element.get("systemId", "")
  144. def _setSystemId(self, value):
  145. if value is not None:
  146. self._element.set("systemId", value)
  147. systemId = property(_getSystemId, _setSystemId)
  148. class Document(Element):
  149. def __init__(self):
  150. Element.__init__(self, "DOCUMENT_ROOT")
  151. class DocumentFragment(Element):
  152. def __init__(self):
  153. Element.__init__(self, "DOCUMENT_FRAGMENT")
  154. def testSerializer(element):
  155. rv = []
  156. def serializeElement(element, indent=0):
  157. if not(hasattr(element, "tag")):
  158. element = element.getroot()
  159. if element.tag == "<!DOCTYPE>":
  160. if element.get("publicId") or element.get("systemId"):
  161. publicId = element.get("publicId") or ""
  162. systemId = element.get("systemId") or ""
  163. rv.append("""<!DOCTYPE %s "%s" "%s">""" %
  164. (element.text, publicId, systemId))
  165. else:
  166. rv.append("<!DOCTYPE %s>" % (element.text,))
  167. elif element.tag == "DOCUMENT_ROOT":
  168. rv.append("#document")
  169. if element.text is not None:
  170. rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
  171. if element.tail is not None:
  172. raise TypeError("Document node cannot have tail")
  173. if hasattr(element, "attrib") and len(element.attrib):
  174. raise TypeError("Document node cannot have attributes")
  175. elif element.tag == ElementTreeCommentType:
  176. rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
  177. else:
  178. assert isinstance(element.tag, text_type), \
  179. "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
  180. nsmatch = tag_regexp.match(element.tag)
  181. if nsmatch is None:
  182. name = element.tag
  183. else:
  184. ns, name = nsmatch.groups()
  185. prefix = constants.prefixes[ns]
  186. name = "%s %s" % (prefix, name)
  187. rv.append("|%s<%s>" % (' ' * indent, name))
  188. if hasattr(element, "attrib"):
  189. attributes = []
  190. for name, value in element.attrib.items():
  191. nsmatch = tag_regexp.match(name)
  192. if nsmatch is not None:
  193. ns, name = nsmatch.groups()
  194. prefix = constants.prefixes[ns]
  195. attr_string = "%s %s" % (prefix, name)
  196. else:
  197. attr_string = name
  198. attributes.append((attr_string, value))
  199. for name, value in sorted(attributes):
  200. rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
  201. if element.text:
  202. rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
  203. indent += 2
  204. for child in element:
  205. serializeElement(child, indent)
  206. if element.tail:
  207. rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
  208. serializeElement(element, 0)
  209. return "\n".join(rv)
  210. def tostring(element):
  211. """Serialize an element and its child nodes to a string"""
  212. rv = []
  213. filter = ihatexml.InfosetFilter()
  214. def serializeElement(element):
  215. if isinstance(element, ElementTree.ElementTree):
  216. element = element.getroot()
  217. if element.tag == "<!DOCTYPE>":
  218. if element.get("publicId") or element.get("systemId"):
  219. publicId = element.get("publicId") or ""
  220. systemId = element.get("systemId") or ""
  221. rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
  222. (element.text, publicId, systemId))
  223. else:
  224. rv.append("<!DOCTYPE %s>" % (element.text,))
  225. elif element.tag == "DOCUMENT_ROOT":
  226. if element.text is not None:
  227. rv.append(element.text)
  228. if element.tail is not None:
  229. raise TypeError("Document node cannot have tail")
  230. if hasattr(element, "attrib") and len(element.attrib):
  231. raise TypeError("Document node cannot have attributes")
  232. for child in element:
  233. serializeElement(child)
  234. elif element.tag == ElementTreeCommentType:
  235. rv.append("<!--%s-->" % (element.text,))
  236. else:
  237. # This is assumed to be an ordinary element
  238. if not element.attrib:
  239. rv.append("<%s>" % (filter.fromXmlName(element.tag),))
  240. else:
  241. attr = " ".join(["%s=\"%s\"" % (
  242. filter.fromXmlName(name), value)
  243. for name, value in element.attrib.items()])
  244. rv.append("<%s %s>" % (element.tag, attr))
  245. if element.text:
  246. rv.append(element.text)
  247. for child in element:
  248. serializeElement(child)
  249. rv.append("</%s>" % (element.tag,))
  250. if element.tail:
  251. rv.append(element.tail)
  252. serializeElement(element)
  253. return "".join(rv)
  254. class TreeBuilder(_base.TreeBuilder):
  255. documentClass = Document
  256. doctypeClass = DocumentType
  257. elementClass = Element
  258. commentClass = Comment
  259. fragmentClass = DocumentFragment
  260. implementation = ElementTreeImplementation
  261. def testSerializer(self, element):
  262. return testSerializer(element)
  263. def getDocument(self):
  264. if fullTree:
  265. return self.document._element
  266. else:
  267. if self.defaultNamespace is not None:
  268. return self.document._element.find(
  269. "{%s}html" % self.defaultNamespace)
  270. else:
  271. return self.document._element.find("html")
  272. def getFragment(self):
  273. return _base.TreeBuilder.getFragment(self)._element
  274. return locals()
  275. getETreeModule = moduleFactoryFactory(getETreeBuilder)