/atom/core.py
http://radioappz.googlecode.com/ · Python · 534 lines · 375 code · 46 blank · 113 comment · 97 complexity · 29c03e952ce9d3f7047b1dd2a4f8daf6 MD5 · raw file
- #!/usr/bin/env python
- #
- # Copyright (C) 2008 Google Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # This module is used for version 2 of the Google Data APIs.
- __author__ = 'j.s@google.com (Jeff Scudder)'
- import inspect
- try:
- from xml.etree import cElementTree as ElementTree
- except ImportError:
- try:
- import cElementTree as ElementTree
- except ImportError:
- try:
- from xml.etree import ElementTree
- except ImportError:
- from elementtree import ElementTree
- STRING_ENCODING = 'utf-8'
- class XmlElement(object):
- """Represents an element node in an XML document.
- The text member is a UTF-8 encoded str or unicode.
- """
- _qname = None
- _other_elements = None
- _other_attributes = None
- # The rule set contains mappings for XML qnames to child members and the
- # appropriate member classes.
- _rule_set = None
- _members = None
- text = None
- def __init__(self, text=None, *args, **kwargs):
- if ('_members' not in self.__class__.__dict__
- or self.__class__._members is None):
- self.__class__._members = tuple(self.__class__._list_xml_members())
- for member_name, member_type in self.__class__._members:
- if member_name in kwargs:
- setattr(self, member_name, kwargs[member_name])
- else:
- if isinstance(member_type, list):
- setattr(self, member_name, [])
- else:
- setattr(self, member_name, None)
- self._other_elements = []
- self._other_attributes = {}
- if text is not None:
- self.text = text
- def _list_xml_members(cls):
- """Generator listing all members which are XML elements or attributes.
- The following members would be considered XML members:
- foo = 'abc' - indicates an XML attribute with the qname abc
- foo = SomeElement - indicates an XML child element
- foo = [AnElement] - indicates a repeating XML child element, each instance
- will be stored in a list in this member
- foo = ('att1', '{http://example.com/namespace}att2') - indicates an XML
- attribute which has different parsing rules in different versions of
- the protocol. Version 1 of the XML parsing rules will look for an
- attribute with the qname 'att1' but verion 2 of the parsing rules will
- look for a namespaced attribute with the local name of 'att2' and an
- XML namespace of 'http://example.com/namespace'.
- """
- members = []
- for pair in inspect.getmembers(cls):
- if not pair[0].startswith('_') and pair[0] != 'text':
- member_type = pair[1]
- if (isinstance(member_type, tuple) or isinstance(member_type, list)
- or isinstance(member_type, (str, unicode))
- or (inspect.isclass(member_type)
- and issubclass(member_type, XmlElement))):
- members.append(pair)
- return members
- _list_xml_members = classmethod(_list_xml_members)
- def _get_rules(cls, version):
- """Initializes the _rule_set for the class which is used when parsing XML.
- This method is used internally for parsing and generating XML for an
- XmlElement. It is not recommended that you call this method directly.
- Returns:
- A tuple containing the XML parsing rules for the appropriate version.
- The tuple looks like:
- (qname, {sub_element_qname: (member_name, member_class, repeating), ..},
- {attribute_qname: member_name})
- To give a couple of concrete example, the atom.data.Control _get_rules
- with version of 2 will return:
- ('{http://www.w3.org/2007/app}control',
- {'{http://www.w3.org/2007/app}draft': ('draft',
- <class 'atom.data.Draft'>,
- False)},
- {})
- Calling _get_rules with version 1 on gdata.data.FeedLink will produce:
- ('{http://schemas.google.com/g/2005}feedLink',
- {'{http://www.w3.org/2005/Atom}feed': ('feed',
- <class 'gdata.data.GDFeed'>,
- False)},
- {'href': 'href', 'readOnly': 'read_only', 'countHint': 'count_hint',
- 'rel': 'rel'})
- """
- # Initialize the _rule_set to make sure there is a slot available to store
- # the parsing rules for this version of the XML schema.
- # Look for rule set in the class __dict__ proxy so that only the
- # _rule_set for this class will be found. By using the dict proxy
- # we avoid finding rule_sets defined in superclasses.
- # The four lines below provide support for any number of versions, but it
- # runs a bit slower then hard coding slots for two versions, so I'm using
- # the below two lines.
- #if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
- # cls._rule_set = []
- #while len(cls.__dict__['_rule_set']) < version:
- # cls._rule_set.append(None)
- # If there is no rule set cache in the class, provide slots for two XML
- # versions. If and when there is a version 3, this list will need to be
- # expanded.
- if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
- cls._rule_set = [None, None]
- # If a version higher than 2 is requested, fall back to version 2 because
- # 2 is currently the highest supported version.
- if version > 2:
- return cls._get_rules(2)
- # Check the dict proxy for the rule set to avoid finding any rule sets
- # which belong to the superclass. We only want rule sets for this class.
- if cls._rule_set[version-1] is None:
- # The rule set for each version consists of the qname for this element
- # ('{namespace}tag'), a dictionary (elements) for looking up the
- # corresponding class member when given a child element's qname, and a
- # dictionary (attributes) for looking up the corresponding class member
- # when given an XML attribute's qname.
- elements = {}
- attributes = {}
- if ('_members' not in cls.__dict__ or cls._members is None):
- cls._members = tuple(cls._list_xml_members())
- for member_name, target in cls._members:
- if isinstance(target, list):
- # This member points to a repeating element.
- elements[_get_qname(target[0], version)] = (member_name, target[0],
- True)
- elif isinstance(target, tuple):
- # This member points to a versioned XML attribute.
- if version <= len(target):
- attributes[target[version-1]] = member_name
- else:
- attributes[target[-1]] = member_name
- elif isinstance(target, (str, unicode)):
- # This member points to an XML attribute.
- attributes[target] = member_name
- elif issubclass(target, XmlElement):
- # This member points to a single occurance element.
- elements[_get_qname(target, version)] = (member_name, target, False)
- version_rules = (_get_qname(cls, version), elements, attributes)
- cls._rule_set[version-1] = version_rules
- return version_rules
- else:
- return cls._rule_set[version-1]
- _get_rules = classmethod(_get_rules)
- def get_elements(self, tag=None, namespace=None, version=1):
- """Find all sub elements which match the tag and namespace.
- To find all elements in this object, call get_elements with the tag and
- namespace both set to None (the default). This method searches through
- the object's members and the elements stored in _other_elements which
- did not match any of the XML parsing rules for this class.
- Args:
- tag: str
- namespace: str
- version: int Specifies the version of the XML rules to be used when
- searching for matching elements.
- Returns:
- A list of the matching XmlElements.
- """
- matches = []
- ignored1, elements, ignored2 = self.__class__._get_rules(version)
- if elements:
- for qname, element_def in elements.iteritems():
- member = getattr(self, element_def[0])
- if member:
- if _qname_matches(tag, namespace, qname):
- if element_def[2]:
- # If this is a repeating element, copy all instances into the
- # result list.
- matches.extend(member)
- else:
- matches.append(member)
- for element in self._other_elements:
- if _qname_matches(tag, namespace, element._qname):
- matches.append(element)
- return matches
- GetElements = get_elements
- # FindExtensions and FindChildren are provided for backwards compatibility
- # to the atom.AtomBase class.
- # However, FindExtensions may return more results than the v1 atom.AtomBase
- # method does, because get_elements searches both the expected children
- # and the unexpected "other elements". The old AtomBase.FindExtensions
- # method searched only "other elements" AKA extension_elements.
- FindExtensions = get_elements
- FindChildren = get_elements
- def get_attributes(self, tag=None, namespace=None, version=1):
- """Find all attributes which match the tag and namespace.
- To find all attributes in this object, call get_attributes with the tag
- and namespace both set to None (the default). This method searches
- through the object's members and the attributes stored in
- _other_attributes which did not fit any of the XML parsing rules for this
- class.
- Args:
- tag: str
- namespace: str
- version: int Specifies the version of the XML rules to be used when
- searching for matching attributes.
- Returns:
- A list of XmlAttribute objects for the matching attributes.
- """
- matches = []
- ignored1, ignored2, attributes = self.__class__._get_rules(version)
- if attributes:
- for qname, attribute_def in attributes.iteritems():
- if isinstance(attribute_def, (list, tuple)):
- attribute_def = attribute_def[0]
- member = getattr(self, attribute_def)
- # TODO: ensure this hasn't broken existing behavior.
- #member = getattr(self, attribute_def[0])
- if member:
- if _qname_matches(tag, namespace, qname):
- matches.append(XmlAttribute(qname, member))
- for qname, value in self._other_attributes.iteritems():
- if _qname_matches(tag, namespace, qname):
- matches.append(XmlAttribute(qname, value))
- return matches
- GetAttributes = get_attributes
- def _harvest_tree(self, tree, version=1):
- """Populates object members from the data in the tree Element."""
- qname, elements, attributes = self.__class__._get_rules(version)
- for element in tree:
- if elements and element.tag in elements:
- definition = elements[element.tag]
- # If this is a repeating element, make sure the member is set to a
- # list.
- if definition[2]:
- if getattr(self, definition[0]) is None:
- setattr(self, definition[0], [])
- getattr(self, definition[0]).append(_xml_element_from_tree(element,
- definition[1], version))
- else:
- setattr(self, definition[0], _xml_element_from_tree(element,
- definition[1], version))
- else:
- self._other_elements.append(_xml_element_from_tree(element, XmlElement,
- version))
- for attrib, value in tree.attrib.iteritems():
- if attributes and attrib in attributes:
- setattr(self, attributes[attrib], value)
- else:
- self._other_attributes[attrib] = value
- if tree.text:
- self.text = tree.text
- def _to_tree(self, version=1, encoding=None):
- new_tree = ElementTree.Element(_get_qname(self, version))
- self._attach_members(new_tree, version, encoding)
- return new_tree
- def _attach_members(self, tree, version=1, encoding=None):
- """Convert members to XML elements/attributes and add them to the tree.
- Args:
- tree: An ElementTree.Element which will be modified. The members of
- this object will be added as child elements or attributes
- according to the rules described in _expected_elements and
- _expected_attributes. The elements and attributes stored in
- other_attributes and other_elements are also added a children
- of this tree.
- version: int Ingnored in this method but used by VersionedElement.
- encoding: str (optional)
- """
- qname, elements, attributes = self.__class__._get_rules(version)
- encoding = encoding or STRING_ENCODING
- # Add the expected elements and attributes to the tree.
- if elements:
- for tag, element_def in elements.iteritems():
- member = getattr(self, element_def[0])
- # If this is a repeating element and there are members in the list.
- if member and element_def[2]:
- for instance in member:
- instance._become_child(tree, version)
- elif member:
- member._become_child(tree, version)
- if attributes:
- for attribute_tag, member_name in attributes.iteritems():
- value = getattr(self, member_name)
- if value:
- tree.attrib[attribute_tag] = value
- # Add the unexpected (other) elements and attributes to the tree.
- for element in self._other_elements:
- element._become_child(tree, version)
- for key, value in self._other_attributes.iteritems():
- # I'm not sure if unicode can be used in the attribute name, so for now
- # we assume the encoding is correct for the attribute name.
- if not isinstance(value, unicode):
- value = value.decode(encoding)
- tree.attrib[key] = value
- if self.text:
- if isinstance(self.text, unicode):
- tree.text = self.text
- else:
- tree.text = self.text.decode(encoding)
- def to_string(self, version=1, encoding=None):
- """Converts this object to XML."""
- return ElementTree.tostring(self._to_tree(version, encoding))
- ToString = to_string
- def __str__(self):
- return self.to_string()
- def _become_child(self, tree, version=1):
- """Adds a child element to tree with the XML data in self."""
- new_child = ElementTree.Element('')
- tree.append(new_child)
- new_child.tag = _get_qname(self, version)
- self._attach_members(new_child, version)
- def __get_extension_elements(self):
- return self._other_elements
- def __set_extension_elements(self, elements):
- self._other_elements = elements
- extension_elements = property(__get_extension_elements,
- __set_extension_elements,
- """Provides backwards compatibility for v1 atom.AtomBase classes.""")
- def __get_extension_attributes(self):
- return self._other_attributes
- def __set_extension_attributes(self, attributes):
- self._other_attributes = attributes
- extension_attributes = property(__get_extension_attributes,
- __set_extension_attributes,
- """Provides backwards compatibility for v1 atom.AtomBase classes.""")
- def _get_tag(self, version=1):
- qname = _get_qname(self, version)
- return qname[qname.find('}')+1:]
- def _get_namespace(self, version=1):
- qname = _get_qname(self, version)
- if qname.startswith('{'):
- return qname[1:qname.find('}')]
- else:
- return None
- def _set_tag(self, tag):
- if isinstance(self._qname, tuple):
- self._qname = self._qname.copy()
- if self._qname[0].startswith('{'):
- self._qname[0] = '{%s}%s' % (self._get_namespace(1), tag)
- else:
- self._qname[0] = tag
- else:
- if self._qname.startswith('{'):
- self._qname = '{%s}%s' % (self._get_namespace(), tag)
- else:
- self._qname = tag
- def _set_namespace(self, namespace):
- if isinstance(self._qname, tuple):
- self._qname = self._qname.copy()
- if namespace:
- self._qname[0] = '{%s}%s' % (namespace, self._get_tag(1))
- else:
- self._qname[0] = self._get_tag(1)
- else:
- if namespace:
- self._qname = '{%s}%s' % (namespace, self._get_tag(1))
- else:
- self._qname = self._get_tag(1)
- tag = property(_get_tag, _set_tag,
- """Provides backwards compatibility for v1 atom.AtomBase classes.""")
- namespace = property(_get_namespace, _set_namespace,
- """Provides backwards compatibility for v1 atom.AtomBase classes.""")
- # Provided for backwards compatibility to atom.ExtensionElement
- children = extension_elements
- attributes = extension_attributes
- def _get_qname(element, version):
- if isinstance(element._qname, tuple):
- if version <= len(element._qname):
- return element._qname[version-1]
- else:
- return element._qname[-1]
- else:
- return element._qname
- def _qname_matches(tag, namespace, qname):
- """Logic determines if a QName matches the desired local tag and namespace.
- This is used in XmlElement.get_elements and XmlElement.get_attributes to
- find matches in the element's members (among all expected-and-unexpected
- elements-and-attributes).
- Args:
- expected_tag: string
- expected_namespace: string
- qname: string in the form '{xml_namespace}localtag' or 'tag' if there is
- no namespace.
- Returns:
- boolean True if the member's tag and namespace fit the expected tag and
- namespace.
- """
- # If there is no expected namespace or tag, then everything will match.
- if qname is None:
- member_tag = None
- member_namespace = None
- else:
- if qname.startswith('{'):
- member_namespace = qname[1:qname.index('}')]
- member_tag = qname[qname.index('}') + 1:]
- else:
- member_namespace = None
- member_tag = qname
- return ((tag is None and namespace is None)
- # If there is a tag, but no namespace, see if the local tag matches.
- or (namespace is None and member_tag == tag)
- # There was no tag, but there was a namespace so see if the namespaces
- # match.
- or (tag is None and member_namespace == namespace)
- # There was no tag, and the desired elements have no namespace, so check
- # to see that the member's namespace is None.
- or (tag is None and namespace == ''
- and member_namespace is None)
- # The tag and the namespace both match.
- or (tag == member_tag
- and namespace == member_namespace)
- # The tag matches, and the expected namespace is the empty namespace,
- # check to make sure the member's namespace is None.
- or (tag == member_tag and namespace == ''
- and member_namespace is None))
- def parse(xml_string, target_class=None, version=1, encoding=None):
- """Parses the XML string according to the rules for the target_class.
- Args:
- xml_string: str or unicode
- target_class: XmlElement or a subclass. If None is specified, the
- XmlElement class is used.
- version: int (optional) The version of the schema which should be used when
- converting the XML into an object. The default is 1.
- encoding: str (optional) The character encoding of the bytes in the
- xml_string. Default is 'UTF-8'.
- """
- if target_class is None:
- target_class = XmlElement
- if isinstance(xml_string, unicode):
- if encoding is None:
- xml_string = xml_string.encode(STRING_ENCODING)
- else:
- xml_string = xml_string.encode(encoding)
- tree = ElementTree.fromstring(xml_string)
- return _xml_element_from_tree(tree, target_class, version)
- Parse = parse
- xml_element_from_string = parse
- XmlElementFromString = xml_element_from_string
- def _xml_element_from_tree(tree, target_class, version=1):
- if target_class._qname is None:
- instance = target_class()
- instance._qname = tree.tag
- instance._harvest_tree(tree, version)
- return instance
- # TODO handle the namespace-only case
- # Namespace only will be used with Google Spreadsheets rows and
- # Google Base item attributes.
- elif tree.tag == _get_qname(target_class, version):
- instance = target_class()
- instance._harvest_tree(tree, version)
- return instance
- return None
- class XmlAttribute(object):
- def __init__(self, qname, value):
- self._qname = qname
- self.value = value