/python3/lib/python3.9/site-packages/pip/_vendor/html5lib/treebuilders/etree.py
Python | 343 lines | 305 code | 33 blank | 5 comment | 33 complexity | f8fd3d35dae80eed93550ea3ecda54ea MD5 | raw file
1from __future__ import absolute_import, division, unicode_literals
2# pylint:disable=protected-access
3
4from pip._vendor.six import text_type
5
6import re
7
8from copy import copy
9
10from . import base
11from .. import _ihatexml
12from .. import constants
13from ..constants import namespaces
14from .._utils import moduleFactoryFactory
15
16tag_regexp = re.compile("{([^}]*)}(.*)")
17
18
19def getETreeBuilder(ElementTreeImplementation, fullTree=False):
20 ElementTree = ElementTreeImplementation
21 ElementTreeCommentType = ElementTree.Comment("asd").tag
22
23 class Element(base.Node):
24 def __init__(self, name, namespace=None):
25 self._name = name
26 self._namespace = namespace
27 self._element = ElementTree.Element(self._getETreeTag(name,
28 namespace))
29 if namespace is None:
30 self.nameTuple = namespaces["html"], self._name
31 else:
32 self.nameTuple = self._namespace, self._name
33 self.parent = None
34 self._childNodes = []
35 self._flags = []
36
37 def _getETreeTag(self, name, namespace):
38 if namespace is None:
39 etree_tag = name
40 else:
41 etree_tag = "{%s}%s" % (namespace, name)
42 return etree_tag
43
44 def _setName(self, name):
45 self._name = name
46 self._element.tag = self._getETreeTag(self._name, self._namespace)
47
48 def _getName(self):
49 return self._name
50
51 name = property(_getName, _setName)
52
53 def _setNamespace(self, namespace):
54 self._namespace = namespace
55 self._element.tag = self._getETreeTag(self._name, self._namespace)
56
57 def _getNamespace(self):
58 return self._namespace
59
60 namespace = property(_getNamespace, _setNamespace)
61
62 def _getAttributes(self):
63 return self._element.attrib
64
65 def _setAttributes(self, attributes):
66 el_attrib = self._element.attrib
67 el_attrib.clear()
68 if attributes:
69 # calling .items _always_ allocates, and the above truthy check is cheaper than the
70 # allocation on average
71 for key, value in attributes.items():
72 if isinstance(key, tuple):
73 name = "{%s}%s" % (key[2], key[1])
74 else:
75 name = key
76 el_attrib[name] = value
77
78 attributes = property(_getAttributes, _setAttributes)
79
80 def _getChildNodes(self):
81 return self._childNodes
82
83 def _setChildNodes(self, value):
84 del self._element[:]
85 self._childNodes = []
86 for element in value:
87 self.insertChild(element)
88
89 childNodes = property(_getChildNodes, _setChildNodes)
90
91 def hasContent(self):
92 """Return true if the node has children or text"""
93 return bool(self._element.text or len(self._element))
94
95 def appendChild(self, node):
96 self._childNodes.append(node)
97 self._element.append(node._element)
98 node.parent = self
99
100 def insertBefore(self, node, refNode):
101 index = list(self._element).index(refNode._element)
102 self._element.insert(index, node._element)
103 node.parent = self
104
105 def removeChild(self, node):
106 self._childNodes.remove(node)
107 self._element.remove(node._element)
108 node.parent = None
109
110 def insertText(self, data, insertBefore=None):
111 if not(len(self._element)):
112 if not self._element.text:
113 self._element.text = ""
114 self._element.text += data
115 elif insertBefore is None:
116 # Insert the text as the tail of the last child element
117 if not self._element[-1].tail:
118 self._element[-1].tail = ""
119 self._element[-1].tail += data
120 else:
121 # Insert the text before the specified node
122 children = list(self._element)
123 index = children.index(insertBefore._element)
124 if index > 0:
125 if not self._element[index - 1].tail:
126 self._element[index - 1].tail = ""
127 self._element[index - 1].tail += data
128 else:
129 if not self._element.text:
130 self._element.text = ""
131 self._element.text += data
132
133 def cloneNode(self):
134 element = type(self)(self.name, self.namespace)
135 if self._element.attrib:
136 element._element.attrib = copy(self._element.attrib)
137 return element
138
139 def reparentChildren(self, newParent):
140 if newParent.childNodes:
141 newParent.childNodes[-1]._element.tail += self._element.text
142 else:
143 if not newParent._element.text:
144 newParent._element.text = ""
145 if self._element.text is not None:
146 newParent._element.text += self._element.text
147 self._element.text = ""
148 base.Node.reparentChildren(self, newParent)
149
150 class Comment(Element):
151 def __init__(self, data):
152 # Use the superclass constructor to set all properties on the
153 # wrapper element
154 self._element = ElementTree.Comment(data)
155 self.parent = None
156 self._childNodes = []
157 self._flags = []
158
159 def _getData(self):
160 return self._element.text
161
162 def _setData(self, value):
163 self._element.text = value
164
165 data = property(_getData, _setData)
166
167 class DocumentType(Element):
168 def __init__(self, name, publicId, systemId):
169 Element.__init__(self, "<!DOCTYPE>")
170 self._element.text = name
171 self.publicId = publicId
172 self.systemId = systemId
173
174 def _getPublicId(self):
175 return self._element.get("publicId", "")
176
177 def _setPublicId(self, value):
178 if value is not None:
179 self._element.set("publicId", value)
180
181 publicId = property(_getPublicId, _setPublicId)
182
183 def _getSystemId(self):
184 return self._element.get("systemId", "")
185
186 def _setSystemId(self, value):
187 if value is not None:
188 self._element.set("systemId", value)
189
190 systemId = property(_getSystemId, _setSystemId)
191
192 class Document(Element):
193 def __init__(self):
194 Element.__init__(self, "DOCUMENT_ROOT")
195
196 class DocumentFragment(Element):
197 def __init__(self):
198 Element.__init__(self, "DOCUMENT_FRAGMENT")
199
200 def testSerializer(element):
201 rv = []
202
203 def serializeElement(element, indent=0):
204 if not(hasattr(element, "tag")):
205 element = element.getroot()
206 if element.tag == "<!DOCTYPE>":
207 if element.get("publicId") or element.get("systemId"):
208 publicId = element.get("publicId") or ""
209 systemId = element.get("systemId") or ""
210 rv.append("""<!DOCTYPE %s "%s" "%s">""" %
211 (element.text, publicId, systemId))
212 else:
213 rv.append("<!DOCTYPE %s>" % (element.text,))
214 elif element.tag == "DOCUMENT_ROOT":
215 rv.append("#document")
216 if element.text is not None:
217 rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
218 if element.tail is not None:
219 raise TypeError("Document node cannot have tail")
220 if hasattr(element, "attrib") and len(element.attrib):
221 raise TypeError("Document node cannot have attributes")
222 elif element.tag == ElementTreeCommentType:
223 rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
224 else:
225 assert isinstance(element.tag, text_type), \
226 "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
227 nsmatch = tag_regexp.match(element.tag)
228
229 if nsmatch is None:
230 name = element.tag
231 else:
232 ns, name = nsmatch.groups()
233 prefix = constants.prefixes[ns]
234 name = "%s %s" % (prefix, name)
235 rv.append("|%s<%s>" % (' ' * indent, name))
236
237 if hasattr(element, "attrib"):
238 attributes = []
239 for name, value in element.attrib.items():
240 nsmatch = tag_regexp.match(name)
241 if nsmatch is not None:
242 ns, name = nsmatch.groups()
243 prefix = constants.prefixes[ns]
244 attr_string = "%s %s" % (prefix, name)
245 else:
246 attr_string = name
247 attributes.append((attr_string, value))
248
249 for name, value in sorted(attributes):
250 rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
251 if element.text:
252 rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
253 indent += 2
254 for child in element:
255 serializeElement(child, indent)
256 if element.tail:
257 rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
258 serializeElement(element, 0)
259
260 return "\n".join(rv)
261
262 def tostring(element): # pylint:disable=unused-variable
263 """Serialize an element and its child nodes to a string"""
264 rv = []
265 filter = _ihatexml.InfosetFilter()
266
267 def serializeElement(element):
268 if isinstance(element, ElementTree.ElementTree):
269 element = element.getroot()
270
271 if element.tag == "<!DOCTYPE>":
272 if element.get("publicId") or element.get("systemId"):
273 publicId = element.get("publicId") or ""
274 systemId = element.get("systemId") or ""
275 rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
276 (element.text, publicId, systemId))
277 else:
278 rv.append("<!DOCTYPE %s>" % (element.text,))
279 elif element.tag == "DOCUMENT_ROOT":
280 if element.text is not None:
281 rv.append(element.text)
282 if element.tail is not None:
283 raise TypeError("Document node cannot have tail")
284 if hasattr(element, "attrib") and len(element.attrib):
285 raise TypeError("Document node cannot have attributes")
286
287 for child in element:
288 serializeElement(child)
289
290 elif element.tag == ElementTreeCommentType:
291 rv.append("<!--%s-->" % (element.text,))
292 else:
293 # This is assumed to be an ordinary element
294 if not element.attrib:
295 rv.append("<%s>" % (filter.fromXmlName(element.tag),))
296 else:
297 attr = " ".join(["%s=\"%s\"" % (
298 filter.fromXmlName(name), value)
299 for name, value in element.attrib.items()])
300 rv.append("<%s %s>" % (element.tag, attr))
301 if element.text:
302 rv.append(element.text)
303
304 for child in element:
305 serializeElement(child)
306
307 rv.append("</%s>" % (element.tag,))
308
309 if element.tail:
310 rv.append(element.tail)
311
312 serializeElement(element)
313
314 return "".join(rv)
315
316 class TreeBuilder(base.TreeBuilder): # pylint:disable=unused-variable
317 documentClass = Document
318 doctypeClass = DocumentType
319 elementClass = Element
320 commentClass = Comment
321 fragmentClass = DocumentFragment
322 implementation = ElementTreeImplementation
323
324 def testSerializer(self, element):
325 return testSerializer(element)
326
327 def getDocument(self):
328 if fullTree:
329 return self.document._element
330 else:
331 if self.defaultNamespace is not None:
332 return self.document._element.find(
333 "{%s}html" % self.defaultNamespace)
334 else:
335 return self.document._element.find("html")
336
337 def getFragment(self):
338 return base.TreeBuilder.getFragment(self)._element
339
340 return locals()
341
342
343getETreeModule = moduleFactoryFactory(getETreeBuilder)