PageRenderTime 50ms CodeModel.GetById 13ms app.highlight 30ms RepoModel.GetById 2ms app.codeStats 0ms

/atom/core.py

http://radioappz.googlecode.com/
Python | 534 lines | 444 code | 16 blank | 74 comment | 30 complexity | 29c03e952ce9d3f7047b1dd2a4f8daf6 MD5 | raw file
  1#!/usr/bin/env python
  2#
  3#    Copyright (C) 2008 Google Inc.
  4#
  5#   Licensed under the Apache License, Version 2.0 (the "License");
  6#   you may not use this file except in compliance with the License.
  7#   You may obtain a copy of the License at
  8#
  9#       http://www.apache.org/licenses/LICENSE-2.0
 10#
 11#   Unless required by applicable law or agreed to in writing, software
 12#   distributed under the License is distributed on an "AS IS" BASIS,
 13#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14#   See the License for the specific language governing permissions and
 15#   limitations under the License.
 16
 17
 18# This module is used for version 2 of the Google Data APIs.
 19
 20
 21__author__ = 'j.s@google.com (Jeff Scudder)'
 22
 23
 24import inspect
 25try:
 26  from xml.etree import cElementTree as ElementTree
 27except ImportError:
 28  try:
 29    import cElementTree as ElementTree
 30  except ImportError:
 31    try:
 32      from xml.etree import ElementTree
 33    except ImportError:
 34      from elementtree import ElementTree
 35
 36
 37STRING_ENCODING = 'utf-8'
 38
 39
 40class XmlElement(object):
 41  """Represents an element node in an XML document.
 42
 43  The text member is a UTF-8 encoded str or unicode.
 44  """
 45  _qname = None
 46  _other_elements = None
 47  _other_attributes = None
 48  # The rule set contains mappings for XML qnames to child members and the
 49  # appropriate member classes.
 50  _rule_set = None
 51  _members = None
 52  text = None
 53
 54  def __init__(self, text=None, *args, **kwargs):
 55    if ('_members' not in self.__class__.__dict__
 56        or self.__class__._members is None):
 57      self.__class__._members = tuple(self.__class__._list_xml_members())
 58    for member_name, member_type in self.__class__._members:
 59      if member_name in kwargs:
 60        setattr(self, member_name, kwargs[member_name])
 61      else:
 62        if isinstance(member_type, list):
 63          setattr(self, member_name, [])
 64        else:
 65          setattr(self, member_name, None)
 66    self._other_elements = []
 67    self._other_attributes = {}
 68    if text is not None:
 69      self.text = text
 70
 71  def _list_xml_members(cls):
 72    """Generator listing all members which are XML elements or attributes.
 73
 74    The following members would be considered XML members:
 75    foo = 'abc' - indicates an XML attribute with the qname abc
 76    foo = SomeElement - indicates an XML child element
 77    foo = [AnElement] - indicates a repeating XML child element, each instance
 78        will be stored in a list in this member
 79    foo = ('att1', '{http://example.com/namespace}att2') - indicates an XML
 80        attribute which has different parsing rules in different versions of
 81        the protocol. Version 1 of the XML parsing rules will look for an
 82        attribute with the qname 'att1' but verion 2 of the parsing rules will
 83        look for a namespaced attribute with the local name of 'att2' and an
 84        XML namespace of 'http://example.com/namespace'.
 85    """
 86    members = []
 87    for pair in inspect.getmembers(cls):
 88      if not pair[0].startswith('_') and pair[0] != 'text':
 89        member_type = pair[1]
 90        if (isinstance(member_type, tuple) or isinstance(member_type, list)
 91            or isinstance(member_type, (str, unicode))
 92            or (inspect.isclass(member_type)
 93                and issubclass(member_type, XmlElement))):
 94          members.append(pair)
 95    return members
 96
 97  _list_xml_members = classmethod(_list_xml_members)
 98
 99  def _get_rules(cls, version):
100    """Initializes the _rule_set for the class which is used when parsing XML.
101
102    This method is used internally for parsing and generating XML for an
103    XmlElement. It is not recommended that you call this method directly.
104
105    Returns:
106      A tuple containing the XML parsing rules for the appropriate version.
107
108      The tuple looks like:
109      (qname, {sub_element_qname: (member_name, member_class, repeating), ..},
110       {attribute_qname: member_name})
111
112      To give a couple of concrete example, the atom.data.Control _get_rules
113      with version of 2 will return:
114      ('{http://www.w3.org/2007/app}control',
115       {'{http://www.w3.org/2007/app}draft': ('draft',
116                                              <class 'atom.data.Draft'>,
117                                              False)},
118       {})
119      Calling _get_rules with version 1 on gdata.data.FeedLink will produce:
120      ('{http://schemas.google.com/g/2005}feedLink',
121       {'{http://www.w3.org/2005/Atom}feed': ('feed',
122                                              <class 'gdata.data.GDFeed'>,
123                                              False)},
124       {'href': 'href', 'readOnly': 'read_only', 'countHint': 'count_hint',
125        'rel': 'rel'})
126    """
127    # Initialize the _rule_set to make sure there is a slot available to store
128    # the parsing rules for this version of the XML schema.
129    # Look for rule set in the class __dict__ proxy so that only the
130    # _rule_set for this class will be found. By using the dict proxy
131    # we avoid finding rule_sets defined in superclasses.
132    # The four lines below provide support for any number of versions, but it
133    # runs a bit slower then hard coding slots for two versions, so I'm using
134    # the below two lines.
135    #if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
136    #  cls._rule_set = []
137    #while len(cls.__dict__['_rule_set']) < version:
138    #  cls._rule_set.append(None)
139    # If there is no rule set cache in the class, provide slots for two XML
140    # versions. If and when there is a version 3, this list will need to be
141    # expanded.
142    if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
143      cls._rule_set = [None, None]
144    # If a version higher than 2 is requested, fall back to version 2 because
145    # 2 is currently the highest supported version.
146    if version > 2:
147      return cls._get_rules(2)
148    # Check the dict proxy for the rule set to avoid finding any rule sets
149    # which belong to the superclass. We only want rule sets for this class.
150    if cls._rule_set[version-1] is None:
151      # The rule set for each version consists of the qname for this element
152      # ('{namespace}tag'), a dictionary (elements) for looking up the
153      # corresponding class member when given a child element's qname, and a
154      # dictionary (attributes) for looking up the corresponding class member
155      # when given an XML attribute's qname.
156      elements = {}
157      attributes = {}
158      if ('_members' not in cls.__dict__ or cls._members is None):
159        cls._members = tuple(cls._list_xml_members())
160      for member_name, target in cls._members:
161        if isinstance(target, list):
162          # This member points to a repeating element.
163          elements[_get_qname(target[0], version)] = (member_name, target[0],
164              True)
165        elif isinstance(target, tuple):
166          # This member points to a versioned XML attribute.
167          if version <= len(target):
168            attributes[target[version-1]] = member_name
169          else:
170            attributes[target[-1]] = member_name
171        elif isinstance(target, (str, unicode)):
172          # This member points to an XML attribute.
173          attributes[target] = member_name
174        elif issubclass(target, XmlElement):
175          # This member points to a single occurance element.
176          elements[_get_qname(target, version)] = (member_name, target, False)
177      version_rules = (_get_qname(cls, version), elements, attributes)
178      cls._rule_set[version-1] = version_rules
179      return version_rules
180    else:
181      return cls._rule_set[version-1]
182
183  _get_rules = classmethod(_get_rules)
184
185  def get_elements(self, tag=None, namespace=None, version=1):
186    """Find all sub elements which match the tag and namespace.
187
188    To find all elements in this object, call get_elements with the tag and
189    namespace both set to None (the default). This method searches through
190    the object's members and the elements stored in _other_elements which
191    did not match any of the XML parsing rules for this class.
192
193    Args:
194      tag: str
195      namespace: str
196      version: int Specifies the version of the XML rules to be used when
197               searching for matching elements.
198
199    Returns:
200      A list of the matching XmlElements.
201    """
202    matches = []
203    ignored1, elements, ignored2 = self.__class__._get_rules(version)
204    if elements:
205      for qname, element_def in elements.iteritems():
206        member = getattr(self, element_def[0])
207        if member:
208          if _qname_matches(tag, namespace, qname):
209            if element_def[2]:
210              # If this is a repeating element, copy all instances into the
211              # result list.
212              matches.extend(member)
213            else:
214              matches.append(member)
215    for element in self._other_elements:
216      if _qname_matches(tag, namespace, element._qname):
217        matches.append(element)
218    return matches
219
220  GetElements = get_elements
221  # FindExtensions and FindChildren are provided for backwards compatibility
222  # to the atom.AtomBase class.
223  # However, FindExtensions may return more results than the v1 atom.AtomBase
224  # method does, because get_elements searches both the expected children
225  # and the unexpected "other elements". The old AtomBase.FindExtensions
226  # method searched only "other elements" AKA extension_elements.
227  FindExtensions = get_elements
228  FindChildren = get_elements
229
230  def get_attributes(self, tag=None, namespace=None, version=1):
231    """Find all attributes which match the tag and namespace.
232
233    To find all attributes in this object, call get_attributes with the tag
234    and namespace both set to None (the default). This method searches
235    through the object's members and the attributes stored in
236    _other_attributes which did not fit any of the XML parsing rules for this
237    class.
238
239    Args:
240      tag: str
241      namespace: str
242      version: int Specifies the version of the XML rules to be used when
243               searching for matching attributes.
244
245    Returns:
246      A list of XmlAttribute objects for the matching attributes.
247    """
248    matches = []
249    ignored1, ignored2, attributes = self.__class__._get_rules(version)
250    if attributes:
251      for qname, attribute_def in attributes.iteritems():
252        if isinstance(attribute_def, (list, tuple)):
253          attribute_def = attribute_def[0]
254        member = getattr(self, attribute_def)
255        # TODO: ensure this hasn't broken existing behavior.
256        #member = getattr(self, attribute_def[0])
257        if member:
258          if _qname_matches(tag, namespace, qname):
259            matches.append(XmlAttribute(qname, member))
260    for qname, value in self._other_attributes.iteritems():
261      if _qname_matches(tag, namespace, qname):
262        matches.append(XmlAttribute(qname, value))
263    return matches
264
265  GetAttributes = get_attributes
266
267  def _harvest_tree(self, tree, version=1):
268    """Populates object members from the data in the tree Element."""
269    qname, elements, attributes = self.__class__._get_rules(version)
270    for element in tree:
271      if elements and element.tag in elements:
272        definition = elements[element.tag]
273        # If this is a repeating element, make sure the member is set to a
274        # list.
275        if definition[2]:
276          if getattr(self, definition[0]) is None:
277            setattr(self, definition[0], [])
278          getattr(self, definition[0]).append(_xml_element_from_tree(element,
279              definition[1], version))
280        else:
281          setattr(self, definition[0], _xml_element_from_tree(element,
282              definition[1], version))
283      else:
284        self._other_elements.append(_xml_element_from_tree(element, XmlElement,
285                                                           version))
286    for attrib, value in tree.attrib.iteritems():
287      if attributes and attrib in attributes:
288        setattr(self, attributes[attrib], value)
289      else:
290        self._other_attributes[attrib] = value
291    if tree.text:
292      self.text = tree.text
293
294  def _to_tree(self, version=1, encoding=None):
295    new_tree = ElementTree.Element(_get_qname(self, version))
296    self._attach_members(new_tree, version, encoding)
297    return new_tree
298
299  def _attach_members(self, tree, version=1, encoding=None):
300    """Convert members to XML elements/attributes and add them to the tree.
301
302    Args:
303      tree: An ElementTree.Element which will be modified. The members of
304            this object will be added as child elements or attributes
305            according to the rules described in _expected_elements and
306            _expected_attributes. The elements and attributes stored in
307            other_attributes and other_elements are also added a children
308            of this tree.
309      version: int Ingnored in this method but used by VersionedElement.
310      encoding: str (optional)
311    """
312    qname, elements, attributes = self.__class__._get_rules(version)
313    encoding = encoding or STRING_ENCODING
314    # Add the expected elements and attributes to the tree.
315    if elements:
316      for tag, element_def in elements.iteritems():
317        member = getattr(self, element_def[0])
318        # If this is a repeating element and there are members in the list.
319        if member and element_def[2]:
320          for instance in member:
321            instance._become_child(tree, version)
322        elif member:
323          member._become_child(tree, version)
324    if attributes:
325      for attribute_tag, member_name in attributes.iteritems():
326        value = getattr(self, member_name)
327        if value:
328          tree.attrib[attribute_tag] = value
329    # Add the unexpected (other) elements and attributes to the tree.
330    for element in self._other_elements:
331      element._become_child(tree, version)
332    for key, value in self._other_attributes.iteritems():
333      # I'm not sure if unicode can be used in the attribute name, so for now
334      # we assume the encoding is correct for the attribute name.
335      if not isinstance(value, unicode):
336        value = value.decode(encoding)
337      tree.attrib[key] = value
338    if self.text:
339      if isinstance(self.text, unicode):
340        tree.text = self.text
341      else:
342        tree.text = self.text.decode(encoding)
343
344  def to_string(self, version=1, encoding=None):
345    """Converts this object to XML."""
346    return ElementTree.tostring(self._to_tree(version, encoding))
347
348  ToString = to_string
349
350  def __str__(self):
351    return self.to_string()
352
353  def _become_child(self, tree, version=1):
354    """Adds a child element to tree with the XML data in self."""
355    new_child = ElementTree.Element('')
356    tree.append(new_child)
357    new_child.tag = _get_qname(self, version)
358    self._attach_members(new_child, version)
359
360  def __get_extension_elements(self):
361    return self._other_elements
362
363  def __set_extension_elements(self, elements):
364    self._other_elements = elements
365
366  extension_elements = property(__get_extension_elements,
367      __set_extension_elements,
368      """Provides backwards compatibility for v1 atom.AtomBase classes.""")
369
370  def __get_extension_attributes(self):
371    return self._other_attributes
372
373  def __set_extension_attributes(self, attributes):
374    self._other_attributes = attributes
375
376  extension_attributes = property(__get_extension_attributes,
377      __set_extension_attributes,
378      """Provides backwards compatibility for v1 atom.AtomBase classes.""")
379
380  def _get_tag(self, version=1):
381    qname = _get_qname(self, version)
382    return qname[qname.find('}')+1:]
383
384  def _get_namespace(self, version=1):
385    qname = _get_qname(self, version)
386    if qname.startswith('{'):
387      return qname[1:qname.find('}')]
388    else:
389      return None
390
391  def _set_tag(self, tag):
392    if isinstance(self._qname, tuple):
393      self._qname = self._qname.copy()
394      if self._qname[0].startswith('{'):
395        self._qname[0] = '{%s}%s' % (self._get_namespace(1), tag)
396      else:
397        self._qname[0] = tag
398    else:
399      if self._qname.startswith('{'):
400        self._qname = '{%s}%s' % (self._get_namespace(), tag)
401      else:
402        self._qname = tag
403
404  def _set_namespace(self, namespace):
405    if isinstance(self._qname, tuple):
406      self._qname = self._qname.copy()
407      if namespace:
408         self._qname[0] = '{%s}%s' % (namespace, self._get_tag(1))
409      else:
410         self._qname[0] = self._get_tag(1)
411    else:
412      if namespace:
413         self._qname = '{%s}%s' % (namespace, self._get_tag(1))
414      else:
415         self._qname = self._get_tag(1)
416
417  tag = property(_get_tag, _set_tag,
418      """Provides backwards compatibility for v1 atom.AtomBase classes.""")
419
420  namespace = property(_get_namespace, _set_namespace,
421      """Provides backwards compatibility for v1 atom.AtomBase classes.""")
422
423  # Provided for backwards compatibility to atom.ExtensionElement
424  children = extension_elements
425  attributes = extension_attributes
426
427
428def _get_qname(element, version):
429  if isinstance(element._qname, tuple):
430    if version <= len(element._qname):
431      return element._qname[version-1]
432    else:
433      return element._qname[-1]
434  else:
435    return element._qname
436
437
438def _qname_matches(tag, namespace, qname):
439  """Logic determines if a QName matches the desired local tag and namespace.
440
441  This is used in XmlElement.get_elements and XmlElement.get_attributes to
442  find matches in the element's members (among all expected-and-unexpected
443  elements-and-attributes).
444
445  Args:
446    expected_tag: string
447    expected_namespace: string
448    qname: string in the form '{xml_namespace}localtag' or 'tag' if there is
449           no namespace.
450
451  Returns:
452    boolean True if the member's tag and namespace fit the expected tag and
453    namespace.
454  """
455  # If there is no expected namespace or tag, then everything will match.
456  if qname is None:
457    member_tag = None
458    member_namespace = None
459  else:
460    if qname.startswith('{'):
461      member_namespace = qname[1:qname.index('}')]
462      member_tag = qname[qname.index('}') + 1:]
463    else:
464      member_namespace = None
465      member_tag = qname
466  return ((tag is None and namespace is None)
467      # If there is a tag, but no namespace, see if the local tag matches.
468      or (namespace is None and member_tag == tag)
469      # There was no tag, but there was a namespace so see if the namespaces
470      # match.
471      or (tag is None and member_namespace == namespace)
472      # There was no tag, and the desired elements have no namespace, so check
473      # to see that the member's namespace is None.
474      or (tag is None and namespace == ''
475          and member_namespace is None)
476      # The tag and the namespace both match.
477      or (tag == member_tag
478          and namespace == member_namespace)
479      # The tag matches, and the expected namespace is the empty namespace,
480      # check to make sure the member's namespace is None.
481      or (tag == member_tag and namespace == ''
482          and member_namespace is None))
483
484
485def parse(xml_string, target_class=None, version=1, encoding=None):
486  """Parses the XML string according to the rules for the target_class.
487
488  Args:
489    xml_string: str or unicode
490    target_class: XmlElement or a subclass. If None is specified, the
491        XmlElement class is used.
492    version: int (optional) The version of the schema which should be used when
493        converting the XML into an object. The default is 1.
494    encoding: str (optional) The character encoding of the bytes in the
495        xml_string. Default is 'UTF-8'.
496  """
497  if target_class is None:
498    target_class = XmlElement
499  if isinstance(xml_string, unicode):
500    if encoding is None:
501      xml_string = xml_string.encode(STRING_ENCODING)
502    else:
503      xml_string = xml_string.encode(encoding)
504  tree = ElementTree.fromstring(xml_string)
505  return _xml_element_from_tree(tree, target_class, version)
506
507
508Parse = parse
509xml_element_from_string = parse
510XmlElementFromString = xml_element_from_string
511
512
513def _xml_element_from_tree(tree, target_class, version=1):
514  if target_class._qname is None:
515    instance = target_class()
516    instance._qname = tree.tag
517    instance._harvest_tree(tree, version)
518    return instance
519  # TODO handle the namespace-only case
520  # Namespace only will be used with Google Spreadsheets rows and
521  # Google Base item attributes.
522  elif tree.tag == _get_qname(target_class, version):
523    instance = target_class()
524    instance._harvest_tree(tree, version)
525    return instance
526  return None
527
528
529class XmlAttribute(object):
530
531  def __init__(self, qname, value):
532    self._qname = qname
533    self.value = value
534