/atom/core.py

http://radioappz.googlecode.com/ · Python · 534 lines · 375 code · 46 blank · 113 comment · 97 complexity · 29c03e952ce9d3f7047b1dd2a4f8daf6 MD5 · raw file

  1. #!/usr/bin/env python
  2. #
  3. # Copyright (C) 2008 Google Inc.
  4. #
  5. # Licensed under the Apache License, Version 2.0 (the "License");
  6. # you may not use this file except in compliance with the License.
  7. # You may obtain a copy of the License at
  8. #
  9. # http://www.apache.org/licenses/LICENSE-2.0
  10. #
  11. # Unless required by applicable law or agreed to in writing, software
  12. # distributed under the License is distributed on an "AS IS" BASIS,
  13. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. # See the License for the specific language governing permissions and
  15. # limitations under the License.
  16. # This module is used for version 2 of the Google Data APIs.
  17. __author__ = 'j.s@google.com (Jeff Scudder)'
  18. import inspect
  19. try:
  20. from xml.etree import cElementTree as ElementTree
  21. except ImportError:
  22. try:
  23. import cElementTree as ElementTree
  24. except ImportError:
  25. try:
  26. from xml.etree import ElementTree
  27. except ImportError:
  28. from elementtree import ElementTree
  29. STRING_ENCODING = 'utf-8'
  30. class XmlElement(object):
  31. """Represents an element node in an XML document.
  32. The text member is a UTF-8 encoded str or unicode.
  33. """
  34. _qname = None
  35. _other_elements = None
  36. _other_attributes = None
  37. # The rule set contains mappings for XML qnames to child members and the
  38. # appropriate member classes.
  39. _rule_set = None
  40. _members = None
  41. text = None
  42. def __init__(self, text=None, *args, **kwargs):
  43. if ('_members' not in self.__class__.__dict__
  44. or self.__class__._members is None):
  45. self.__class__._members = tuple(self.__class__._list_xml_members())
  46. for member_name, member_type in self.__class__._members:
  47. if member_name in kwargs:
  48. setattr(self, member_name, kwargs[member_name])
  49. else:
  50. if isinstance(member_type, list):
  51. setattr(self, member_name, [])
  52. else:
  53. setattr(self, member_name, None)
  54. self._other_elements = []
  55. self._other_attributes = {}
  56. if text is not None:
  57. self.text = text
  58. def _list_xml_members(cls):
  59. """Generator listing all members which are XML elements or attributes.
  60. The following members would be considered XML members:
  61. foo = 'abc' - indicates an XML attribute with the qname abc
  62. foo = SomeElement - indicates an XML child element
  63. foo = [AnElement] - indicates a repeating XML child element, each instance
  64. will be stored in a list in this member
  65. foo = ('att1', '{http://example.com/namespace}att2') - indicates an XML
  66. attribute which has different parsing rules in different versions of
  67. the protocol. Version 1 of the XML parsing rules will look for an
  68. attribute with the qname 'att1' but verion 2 of the parsing rules will
  69. look for a namespaced attribute with the local name of 'att2' and an
  70. XML namespace of 'http://example.com/namespace'.
  71. """
  72. members = []
  73. for pair in inspect.getmembers(cls):
  74. if not pair[0].startswith('_') and pair[0] != 'text':
  75. member_type = pair[1]
  76. if (isinstance(member_type, tuple) or isinstance(member_type, list)
  77. or isinstance(member_type, (str, unicode))
  78. or (inspect.isclass(member_type)
  79. and issubclass(member_type, XmlElement))):
  80. members.append(pair)
  81. return members
  82. _list_xml_members = classmethod(_list_xml_members)
  83. def _get_rules(cls, version):
  84. """Initializes the _rule_set for the class which is used when parsing XML.
  85. This method is used internally for parsing and generating XML for an
  86. XmlElement. It is not recommended that you call this method directly.
  87. Returns:
  88. A tuple containing the XML parsing rules for the appropriate version.
  89. The tuple looks like:
  90. (qname, {sub_element_qname: (member_name, member_class, repeating), ..},
  91. {attribute_qname: member_name})
  92. To give a couple of concrete example, the atom.data.Control _get_rules
  93. with version of 2 will return:
  94. ('{http://www.w3.org/2007/app}control',
  95. {'{http://www.w3.org/2007/app}draft': ('draft',
  96. <class 'atom.data.Draft'>,
  97. False)},
  98. {})
  99. Calling _get_rules with version 1 on gdata.data.FeedLink will produce:
  100. ('{http://schemas.google.com/g/2005}feedLink',
  101. {'{http://www.w3.org/2005/Atom}feed': ('feed',
  102. <class 'gdata.data.GDFeed'>,
  103. False)},
  104. {'href': 'href', 'readOnly': 'read_only', 'countHint': 'count_hint',
  105. 'rel': 'rel'})
  106. """
  107. # Initialize the _rule_set to make sure there is a slot available to store
  108. # the parsing rules for this version of the XML schema.
  109. # Look for rule set in the class __dict__ proxy so that only the
  110. # _rule_set for this class will be found. By using the dict proxy
  111. # we avoid finding rule_sets defined in superclasses.
  112. # The four lines below provide support for any number of versions, but it
  113. # runs a bit slower then hard coding slots for two versions, so I'm using
  114. # the below two lines.
  115. #if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
  116. # cls._rule_set = []
  117. #while len(cls.__dict__['_rule_set']) < version:
  118. # cls._rule_set.append(None)
  119. # If there is no rule set cache in the class, provide slots for two XML
  120. # versions. If and when there is a version 3, this list will need to be
  121. # expanded.
  122. if '_rule_set' not in cls.__dict__ or cls._rule_set is None:
  123. cls._rule_set = [None, None]
  124. # If a version higher than 2 is requested, fall back to version 2 because
  125. # 2 is currently the highest supported version.
  126. if version > 2:
  127. return cls._get_rules(2)
  128. # Check the dict proxy for the rule set to avoid finding any rule sets
  129. # which belong to the superclass. We only want rule sets for this class.
  130. if cls._rule_set[version-1] is None:
  131. # The rule set for each version consists of the qname for this element
  132. # ('{namespace}tag'), a dictionary (elements) for looking up the
  133. # corresponding class member when given a child element's qname, and a
  134. # dictionary (attributes) for looking up the corresponding class member
  135. # when given an XML attribute's qname.
  136. elements = {}
  137. attributes = {}
  138. if ('_members' not in cls.__dict__ or cls._members is None):
  139. cls._members = tuple(cls._list_xml_members())
  140. for member_name, target in cls._members:
  141. if isinstance(target, list):
  142. # This member points to a repeating element.
  143. elements[_get_qname(target[0], version)] = (member_name, target[0],
  144. True)
  145. elif isinstance(target, tuple):
  146. # This member points to a versioned XML attribute.
  147. if version <= len(target):
  148. attributes[target[version-1]] = member_name
  149. else:
  150. attributes[target[-1]] = member_name
  151. elif isinstance(target, (str, unicode)):
  152. # This member points to an XML attribute.
  153. attributes[target] = member_name
  154. elif issubclass(target, XmlElement):
  155. # This member points to a single occurance element.
  156. elements[_get_qname(target, version)] = (member_name, target, False)
  157. version_rules = (_get_qname(cls, version), elements, attributes)
  158. cls._rule_set[version-1] = version_rules
  159. return version_rules
  160. else:
  161. return cls._rule_set[version-1]
  162. _get_rules = classmethod(_get_rules)
  163. def get_elements(self, tag=None, namespace=None, version=1):
  164. """Find all sub elements which match the tag and namespace.
  165. To find all elements in this object, call get_elements with the tag and
  166. namespace both set to None (the default). This method searches through
  167. the object's members and the elements stored in _other_elements which
  168. did not match any of the XML parsing rules for this class.
  169. Args:
  170. tag: str
  171. namespace: str
  172. version: int Specifies the version of the XML rules to be used when
  173. searching for matching elements.
  174. Returns:
  175. A list of the matching XmlElements.
  176. """
  177. matches = []
  178. ignored1, elements, ignored2 = self.__class__._get_rules(version)
  179. if elements:
  180. for qname, element_def in elements.iteritems():
  181. member = getattr(self, element_def[0])
  182. if member:
  183. if _qname_matches(tag, namespace, qname):
  184. if element_def[2]:
  185. # If this is a repeating element, copy all instances into the
  186. # result list.
  187. matches.extend(member)
  188. else:
  189. matches.append(member)
  190. for element in self._other_elements:
  191. if _qname_matches(tag, namespace, element._qname):
  192. matches.append(element)
  193. return matches
  194. GetElements = get_elements
  195. # FindExtensions and FindChildren are provided for backwards compatibility
  196. # to the atom.AtomBase class.
  197. # However, FindExtensions may return more results than the v1 atom.AtomBase
  198. # method does, because get_elements searches both the expected children
  199. # and the unexpected "other elements". The old AtomBase.FindExtensions
  200. # method searched only "other elements" AKA extension_elements.
  201. FindExtensions = get_elements
  202. FindChildren = get_elements
  203. def get_attributes(self, tag=None, namespace=None, version=1):
  204. """Find all attributes which match the tag and namespace.
  205. To find all attributes in this object, call get_attributes with the tag
  206. and namespace both set to None (the default). This method searches
  207. through the object's members and the attributes stored in
  208. _other_attributes which did not fit any of the XML parsing rules for this
  209. class.
  210. Args:
  211. tag: str
  212. namespace: str
  213. version: int Specifies the version of the XML rules to be used when
  214. searching for matching attributes.
  215. Returns:
  216. A list of XmlAttribute objects for the matching attributes.
  217. """
  218. matches = []
  219. ignored1, ignored2, attributes = self.__class__._get_rules(version)
  220. if attributes:
  221. for qname, attribute_def in attributes.iteritems():
  222. if isinstance(attribute_def, (list, tuple)):
  223. attribute_def = attribute_def[0]
  224. member = getattr(self, attribute_def)
  225. # TODO: ensure this hasn't broken existing behavior.
  226. #member = getattr(self, attribute_def[0])
  227. if member:
  228. if _qname_matches(tag, namespace, qname):
  229. matches.append(XmlAttribute(qname, member))
  230. for qname, value in self._other_attributes.iteritems():
  231. if _qname_matches(tag, namespace, qname):
  232. matches.append(XmlAttribute(qname, value))
  233. return matches
  234. GetAttributes = get_attributes
  235. def _harvest_tree(self, tree, version=1):
  236. """Populates object members from the data in the tree Element."""
  237. qname, elements, attributes = self.__class__._get_rules(version)
  238. for element in tree:
  239. if elements and element.tag in elements:
  240. definition = elements[element.tag]
  241. # If this is a repeating element, make sure the member is set to a
  242. # list.
  243. if definition[2]:
  244. if getattr(self, definition[0]) is None:
  245. setattr(self, definition[0], [])
  246. getattr(self, definition[0]).append(_xml_element_from_tree(element,
  247. definition[1], version))
  248. else:
  249. setattr(self, definition[0], _xml_element_from_tree(element,
  250. definition[1], version))
  251. else:
  252. self._other_elements.append(_xml_element_from_tree(element, XmlElement,
  253. version))
  254. for attrib, value in tree.attrib.iteritems():
  255. if attributes and attrib in attributes:
  256. setattr(self, attributes[attrib], value)
  257. else:
  258. self._other_attributes[attrib] = value
  259. if tree.text:
  260. self.text = tree.text
  261. def _to_tree(self, version=1, encoding=None):
  262. new_tree = ElementTree.Element(_get_qname(self, version))
  263. self._attach_members(new_tree, version, encoding)
  264. return new_tree
  265. def _attach_members(self, tree, version=1, encoding=None):
  266. """Convert members to XML elements/attributes and add them to the tree.
  267. Args:
  268. tree: An ElementTree.Element which will be modified. The members of
  269. this object will be added as child elements or attributes
  270. according to the rules described in _expected_elements and
  271. _expected_attributes. The elements and attributes stored in
  272. other_attributes and other_elements are also added a children
  273. of this tree.
  274. version: int Ingnored in this method but used by VersionedElement.
  275. encoding: str (optional)
  276. """
  277. qname, elements, attributes = self.__class__._get_rules(version)
  278. encoding = encoding or STRING_ENCODING
  279. # Add the expected elements and attributes to the tree.
  280. if elements:
  281. for tag, element_def in elements.iteritems():
  282. member = getattr(self, element_def[0])
  283. # If this is a repeating element and there are members in the list.
  284. if member and element_def[2]:
  285. for instance in member:
  286. instance._become_child(tree, version)
  287. elif member:
  288. member._become_child(tree, version)
  289. if attributes:
  290. for attribute_tag, member_name in attributes.iteritems():
  291. value = getattr(self, member_name)
  292. if value:
  293. tree.attrib[attribute_tag] = value
  294. # Add the unexpected (other) elements and attributes to the tree.
  295. for element in self._other_elements:
  296. element._become_child(tree, version)
  297. for key, value in self._other_attributes.iteritems():
  298. # I'm not sure if unicode can be used in the attribute name, so for now
  299. # we assume the encoding is correct for the attribute name.
  300. if not isinstance(value, unicode):
  301. value = value.decode(encoding)
  302. tree.attrib[key] = value
  303. if self.text:
  304. if isinstance(self.text, unicode):
  305. tree.text = self.text
  306. else:
  307. tree.text = self.text.decode(encoding)
  308. def to_string(self, version=1, encoding=None):
  309. """Converts this object to XML."""
  310. return ElementTree.tostring(self._to_tree(version, encoding))
  311. ToString = to_string
  312. def __str__(self):
  313. return self.to_string()
  314. def _become_child(self, tree, version=1):
  315. """Adds a child element to tree with the XML data in self."""
  316. new_child = ElementTree.Element('')
  317. tree.append(new_child)
  318. new_child.tag = _get_qname(self, version)
  319. self._attach_members(new_child, version)
  320. def __get_extension_elements(self):
  321. return self._other_elements
  322. def __set_extension_elements(self, elements):
  323. self._other_elements = elements
  324. extension_elements = property(__get_extension_elements,
  325. __set_extension_elements,
  326. """Provides backwards compatibility for v1 atom.AtomBase classes.""")
  327. def __get_extension_attributes(self):
  328. return self._other_attributes
  329. def __set_extension_attributes(self, attributes):
  330. self._other_attributes = attributes
  331. extension_attributes = property(__get_extension_attributes,
  332. __set_extension_attributes,
  333. """Provides backwards compatibility for v1 atom.AtomBase classes.""")
  334. def _get_tag(self, version=1):
  335. qname = _get_qname(self, version)
  336. return qname[qname.find('}')+1:]
  337. def _get_namespace(self, version=1):
  338. qname = _get_qname(self, version)
  339. if qname.startswith('{'):
  340. return qname[1:qname.find('}')]
  341. else:
  342. return None
  343. def _set_tag(self, tag):
  344. if isinstance(self._qname, tuple):
  345. self._qname = self._qname.copy()
  346. if self._qname[0].startswith('{'):
  347. self._qname[0] = '{%s}%s' % (self._get_namespace(1), tag)
  348. else:
  349. self._qname[0] = tag
  350. else:
  351. if self._qname.startswith('{'):
  352. self._qname = '{%s}%s' % (self._get_namespace(), tag)
  353. else:
  354. self._qname = tag
  355. def _set_namespace(self, namespace):
  356. if isinstance(self._qname, tuple):
  357. self._qname = self._qname.copy()
  358. if namespace:
  359. self._qname[0] = '{%s}%s' % (namespace, self._get_tag(1))
  360. else:
  361. self._qname[0] = self._get_tag(1)
  362. else:
  363. if namespace:
  364. self._qname = '{%s}%s' % (namespace, self._get_tag(1))
  365. else:
  366. self._qname = self._get_tag(1)
  367. tag = property(_get_tag, _set_tag,
  368. """Provides backwards compatibility for v1 atom.AtomBase classes.""")
  369. namespace = property(_get_namespace, _set_namespace,
  370. """Provides backwards compatibility for v1 atom.AtomBase classes.""")
  371. # Provided for backwards compatibility to atom.ExtensionElement
  372. children = extension_elements
  373. attributes = extension_attributes
  374. def _get_qname(element, version):
  375. if isinstance(element._qname, tuple):
  376. if version <= len(element._qname):
  377. return element._qname[version-1]
  378. else:
  379. return element._qname[-1]
  380. else:
  381. return element._qname
  382. def _qname_matches(tag, namespace, qname):
  383. """Logic determines if a QName matches the desired local tag and namespace.
  384. This is used in XmlElement.get_elements and XmlElement.get_attributes to
  385. find matches in the element's members (among all expected-and-unexpected
  386. elements-and-attributes).
  387. Args:
  388. expected_tag: string
  389. expected_namespace: string
  390. qname: string in the form '{xml_namespace}localtag' or 'tag' if there is
  391. no namespace.
  392. Returns:
  393. boolean True if the member's tag and namespace fit the expected tag and
  394. namespace.
  395. """
  396. # If there is no expected namespace or tag, then everything will match.
  397. if qname is None:
  398. member_tag = None
  399. member_namespace = None
  400. else:
  401. if qname.startswith('{'):
  402. member_namespace = qname[1:qname.index('}')]
  403. member_tag = qname[qname.index('}') + 1:]
  404. else:
  405. member_namespace = None
  406. member_tag = qname
  407. return ((tag is None and namespace is None)
  408. # If there is a tag, but no namespace, see if the local tag matches.
  409. or (namespace is None and member_tag == tag)
  410. # There was no tag, but there was a namespace so see if the namespaces
  411. # match.
  412. or (tag is None and member_namespace == namespace)
  413. # There was no tag, and the desired elements have no namespace, so check
  414. # to see that the member's namespace is None.
  415. or (tag is None and namespace == ''
  416. and member_namespace is None)
  417. # The tag and the namespace both match.
  418. or (tag == member_tag
  419. and namespace == member_namespace)
  420. # The tag matches, and the expected namespace is the empty namespace,
  421. # check to make sure the member's namespace is None.
  422. or (tag == member_tag and namespace == ''
  423. and member_namespace is None))
  424. def parse(xml_string, target_class=None, version=1, encoding=None):
  425. """Parses the XML string according to the rules for the target_class.
  426. Args:
  427. xml_string: str or unicode
  428. target_class: XmlElement or a subclass. If None is specified, the
  429. XmlElement class is used.
  430. version: int (optional) The version of the schema which should be used when
  431. converting the XML into an object. The default is 1.
  432. encoding: str (optional) The character encoding of the bytes in the
  433. xml_string. Default is 'UTF-8'.
  434. """
  435. if target_class is None:
  436. target_class = XmlElement
  437. if isinstance(xml_string, unicode):
  438. if encoding is None:
  439. xml_string = xml_string.encode(STRING_ENCODING)
  440. else:
  441. xml_string = xml_string.encode(encoding)
  442. tree = ElementTree.fromstring(xml_string)
  443. return _xml_element_from_tree(tree, target_class, version)
  444. Parse = parse
  445. xml_element_from_string = parse
  446. XmlElementFromString = xml_element_from_string
  447. def _xml_element_from_tree(tree, target_class, version=1):
  448. if target_class._qname is None:
  449. instance = target_class()
  450. instance._qname = tree.tag
  451. instance._harvest_tree(tree, version)
  452. return instance
  453. # TODO handle the namespace-only case
  454. # Namespace only will be used with Google Spreadsheets rows and
  455. # Google Base item attributes.
  456. elif tree.tag == _get_qname(target_class, version):
  457. instance = target_class()
  458. instance._harvest_tree(tree, version)
  459. return instance
  460. return None
  461. class XmlAttribute(object):
  462. def __init__(self, qname, value):
  463. self._qname = qname
  464. self.value = value