PageRenderTime 133ms CodeModel.GetById 32ms app.highlight 87ms RepoModel.GetById 1ms app.codeStats 0ms

/python3/lib/python3.9/site-packages/pip/_vendor/html5lib/treebuilders/etree.py

https://gitlab.com/Alioth-Project/clang-r445002
Python | 343 lines | 305 code | 33 blank | 5 comment | 33 complexity | f8fd3d35dae80eed93550ea3ecda54ea MD5 | raw file
  1from __future__ import absolute_import, division, unicode_literals
  2# pylint:disable=protected-access
  3
  4from pip._vendor.six import text_type
  5
  6import re
  7
  8from copy import copy
  9
 10from . import base
 11from .. import _ihatexml
 12from .. import constants
 13from ..constants import namespaces
 14from .._utils import moduleFactoryFactory
 15
 16tag_regexp = re.compile("{([^}]*)}(.*)")
 17
 18
 19def getETreeBuilder(ElementTreeImplementation, fullTree=False):
 20    ElementTree = ElementTreeImplementation
 21    ElementTreeCommentType = ElementTree.Comment("asd").tag
 22
 23    class Element(base.Node):
 24        def __init__(self, name, namespace=None):
 25            self._name = name
 26            self._namespace = namespace
 27            self._element = ElementTree.Element(self._getETreeTag(name,
 28                                                                  namespace))
 29            if namespace is None:
 30                self.nameTuple = namespaces["html"], self._name
 31            else:
 32                self.nameTuple = self._namespace, self._name
 33            self.parent = None
 34            self._childNodes = []
 35            self._flags = []
 36
 37        def _getETreeTag(self, name, namespace):
 38            if namespace is None:
 39                etree_tag = name
 40            else:
 41                etree_tag = "{%s}%s" % (namespace, name)
 42            return etree_tag
 43
 44        def _setName(self, name):
 45            self._name = name
 46            self._element.tag = self._getETreeTag(self._name, self._namespace)
 47
 48        def _getName(self):
 49            return self._name
 50
 51        name = property(_getName, _setName)
 52
 53        def _setNamespace(self, namespace):
 54            self._namespace = namespace
 55            self._element.tag = self._getETreeTag(self._name, self._namespace)
 56
 57        def _getNamespace(self):
 58            return self._namespace
 59
 60        namespace = property(_getNamespace, _setNamespace)
 61
 62        def _getAttributes(self):
 63            return self._element.attrib
 64
 65        def _setAttributes(self, attributes):
 66            el_attrib = self._element.attrib
 67            el_attrib.clear()
 68            if attributes:
 69                # calling .items _always_ allocates, and the above truthy check is cheaper than the
 70                # allocation on average
 71                for key, value in attributes.items():
 72                    if isinstance(key, tuple):
 73                        name = "{%s}%s" % (key[2], key[1])
 74                    else:
 75                        name = key
 76                    el_attrib[name] = value
 77
 78        attributes = property(_getAttributes, _setAttributes)
 79
 80        def _getChildNodes(self):
 81            return self._childNodes
 82
 83        def _setChildNodes(self, value):
 84            del self._element[:]
 85            self._childNodes = []
 86            for element in value:
 87                self.insertChild(element)
 88
 89        childNodes = property(_getChildNodes, _setChildNodes)
 90
 91        def hasContent(self):
 92            """Return true if the node has children or text"""
 93            return bool(self._element.text or len(self._element))
 94
 95        def appendChild(self, node):
 96            self._childNodes.append(node)
 97            self._element.append(node._element)
 98            node.parent = self
 99
100        def insertBefore(self, node, refNode):
101            index = list(self._element).index(refNode._element)
102            self._element.insert(index, node._element)
103            node.parent = self
104
105        def removeChild(self, node):
106            self._childNodes.remove(node)
107            self._element.remove(node._element)
108            node.parent = None
109
110        def insertText(self, data, insertBefore=None):
111            if not(len(self._element)):
112                if not self._element.text:
113                    self._element.text = ""
114                self._element.text += data
115            elif insertBefore is None:
116                # Insert the text as the tail of the last child element
117                if not self._element[-1].tail:
118                    self._element[-1].tail = ""
119                self._element[-1].tail += data
120            else:
121                # Insert the text before the specified node
122                children = list(self._element)
123                index = children.index(insertBefore._element)
124                if index > 0:
125                    if not self._element[index - 1].tail:
126                        self._element[index - 1].tail = ""
127                    self._element[index - 1].tail += data
128                else:
129                    if not self._element.text:
130                        self._element.text = ""
131                    self._element.text += data
132
133        def cloneNode(self):
134            element = type(self)(self.name, self.namespace)
135            if self._element.attrib:
136                element._element.attrib = copy(self._element.attrib)
137            return element
138
139        def reparentChildren(self, newParent):
140            if newParent.childNodes:
141                newParent.childNodes[-1]._element.tail += self._element.text
142            else:
143                if not newParent._element.text:
144                    newParent._element.text = ""
145                if self._element.text is not None:
146                    newParent._element.text += self._element.text
147            self._element.text = ""
148            base.Node.reparentChildren(self, newParent)
149
150    class Comment(Element):
151        def __init__(self, data):
152            # Use the superclass constructor to set all properties on the
153            # wrapper element
154            self._element = ElementTree.Comment(data)
155            self.parent = None
156            self._childNodes = []
157            self._flags = []
158
159        def _getData(self):
160            return self._element.text
161
162        def _setData(self, value):
163            self._element.text = value
164
165        data = property(_getData, _setData)
166
167    class DocumentType(Element):
168        def __init__(self, name, publicId, systemId):
169            Element.__init__(self, "<!DOCTYPE>")
170            self._element.text = name
171            self.publicId = publicId
172            self.systemId = systemId
173
174        def _getPublicId(self):
175            return self._element.get("publicId", "")
176
177        def _setPublicId(self, value):
178            if value is not None:
179                self._element.set("publicId", value)
180
181        publicId = property(_getPublicId, _setPublicId)
182
183        def _getSystemId(self):
184            return self._element.get("systemId", "")
185
186        def _setSystemId(self, value):
187            if value is not None:
188                self._element.set("systemId", value)
189
190        systemId = property(_getSystemId, _setSystemId)
191
192    class Document(Element):
193        def __init__(self):
194            Element.__init__(self, "DOCUMENT_ROOT")
195
196    class DocumentFragment(Element):
197        def __init__(self):
198            Element.__init__(self, "DOCUMENT_FRAGMENT")
199
200    def testSerializer(element):
201        rv = []
202
203        def serializeElement(element, indent=0):
204            if not(hasattr(element, "tag")):
205                element = element.getroot()
206            if element.tag == "<!DOCTYPE>":
207                if element.get("publicId") or element.get("systemId"):
208                    publicId = element.get("publicId") or ""
209                    systemId = element.get("systemId") or ""
210                    rv.append("""<!DOCTYPE %s "%s" "%s">""" %
211                              (element.text, publicId, systemId))
212                else:
213                    rv.append("<!DOCTYPE %s>" % (element.text,))
214            elif element.tag == "DOCUMENT_ROOT":
215                rv.append("#document")
216                if element.text is not None:
217                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
218                if element.tail is not None:
219                    raise TypeError("Document node cannot have tail")
220                if hasattr(element, "attrib") and len(element.attrib):
221                    raise TypeError("Document node cannot have attributes")
222            elif element.tag == ElementTreeCommentType:
223                rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
224            else:
225                assert isinstance(element.tag, text_type), \
226                    "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
227                nsmatch = tag_regexp.match(element.tag)
228
229                if nsmatch is None:
230                    name = element.tag
231                else:
232                    ns, name = nsmatch.groups()
233                    prefix = constants.prefixes[ns]
234                    name = "%s %s" % (prefix, name)
235                rv.append("|%s<%s>" % (' ' * indent, name))
236
237                if hasattr(element, "attrib"):
238                    attributes = []
239                    for name, value in element.attrib.items():
240                        nsmatch = tag_regexp.match(name)
241                        if nsmatch is not None:
242                            ns, name = nsmatch.groups()
243                            prefix = constants.prefixes[ns]
244                            attr_string = "%s %s" % (prefix, name)
245                        else:
246                            attr_string = name
247                        attributes.append((attr_string, value))
248
249                    for name, value in sorted(attributes):
250                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
251                if element.text:
252                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
253            indent += 2
254            for child in element:
255                serializeElement(child, indent)
256            if element.tail:
257                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
258        serializeElement(element, 0)
259
260        return "\n".join(rv)
261
262    def tostring(element):  # pylint:disable=unused-variable
263        """Serialize an element and its child nodes to a string"""
264        rv = []
265        filter = _ihatexml.InfosetFilter()
266
267        def serializeElement(element):
268            if isinstance(element, ElementTree.ElementTree):
269                element = element.getroot()
270
271            if element.tag == "<!DOCTYPE>":
272                if element.get("publicId") or element.get("systemId"):
273                    publicId = element.get("publicId") or ""
274                    systemId = element.get("systemId") or ""
275                    rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
276                              (element.text, publicId, systemId))
277                else:
278                    rv.append("<!DOCTYPE %s>" % (element.text,))
279            elif element.tag == "DOCUMENT_ROOT":
280                if element.text is not None:
281                    rv.append(element.text)
282                if element.tail is not None:
283                    raise TypeError("Document node cannot have tail")
284                if hasattr(element, "attrib") and len(element.attrib):
285                    raise TypeError("Document node cannot have attributes")
286
287                for child in element:
288                    serializeElement(child)
289
290            elif element.tag == ElementTreeCommentType:
291                rv.append("<!--%s-->" % (element.text,))
292            else:
293                # This is assumed to be an ordinary element
294                if not element.attrib:
295                    rv.append("<%s>" % (filter.fromXmlName(element.tag),))
296                else:
297                    attr = " ".join(["%s=\"%s\"" % (
298                        filter.fromXmlName(name), value)
299                        for name, value in element.attrib.items()])
300                    rv.append("<%s %s>" % (element.tag, attr))
301                if element.text:
302                    rv.append(element.text)
303
304                for child in element:
305                    serializeElement(child)
306
307                rv.append("</%s>" % (element.tag,))
308
309            if element.tail:
310                rv.append(element.tail)
311
312        serializeElement(element)
313
314        return "".join(rv)
315
316    class TreeBuilder(base.TreeBuilder):  # pylint:disable=unused-variable
317        documentClass = Document
318        doctypeClass = DocumentType
319        elementClass = Element
320        commentClass = Comment
321        fragmentClass = DocumentFragment
322        implementation = ElementTreeImplementation
323
324        def testSerializer(self, element):
325            return testSerializer(element)
326
327        def getDocument(self):
328            if fullTree:
329                return self.document._element
330            else:
331                if self.defaultNamespace is not None:
332                    return self.document._element.find(
333                        "{%s}html" % self.defaultNamespace)
334                else:
335                    return self.document._element.find("html")
336
337        def getFragment(self):
338            return base.TreeBuilder.getFragment(self)._element
339
340    return locals()
341
342
343getETreeModule = moduleFactoryFactory(getETreeBuilder)