/gdata/books/__init__.py

http://radioappz.googlecode.com/ · Python · 473 lines · 409 code · 24 blank · 40 comment · 38 complexity · 335709a962c6fb38cdab9d2afc3eb8b6 MD5 · raw file

  1. #!/usr/bin/python
  2. """
  3. Data Models for books.service
  4. All classes can be instantiated from an xml string using their FromString
  5. class method.
  6. Notes:
  7. * Book.title displays the first dc:title because the returned XML
  8. repeats that datum as atom:title.
  9. There is an undocumented gbs:openAccess element that is not parsed.
  10. """
  11. __author__ = "James Sams <sams.james@gmail.com>"
  12. __copyright__ = "Apache License v2.0"
  13. import atom
  14. import gdata
  15. BOOK_SEARCH_NAMESPACE = 'http://schemas.google.com/books/2008'
  16. DC_NAMESPACE = 'http://purl.org/dc/terms'
  17. ANNOTATION_REL = "http://schemas.google.com/books/2008/annotation"
  18. INFO_REL = "http://schemas.google.com/books/2008/info"
  19. LABEL_SCHEME = "http://schemas.google.com/books/2008/labels"
  20. PREVIEW_REL = "http://schemas.google.com/books/2008/preview"
  21. THUMBNAIL_REL = "http://schemas.google.com/books/2008/thumbnail"
  22. FULL_VIEW = "http://schemas.google.com/books/2008#view_all_pages"
  23. PARTIAL_VIEW = "http://schemas.google.com/books/2008#view_partial"
  24. NO_VIEW = "http://schemas.google.com/books/2008#view_no_pages"
  25. UNKNOWN_VIEW = "http://schemas.google.com/books/2008#view_unknown"
  26. EMBEDDABLE = "http://schemas.google.com/books/2008#embeddable"
  27. NOT_EMBEDDABLE = "http://schemas.google.com/books/2008#not_embeddable"
  28. class _AtomFromString(atom.AtomBase):
  29. #@classmethod
  30. def FromString(cls, s):
  31. return atom.CreateClassFromXMLString(cls, s)
  32. FromString = classmethod(FromString)
  33. class Creator(_AtomFromString):
  34. """
  35. The <dc:creator> element identifies an author-or more generally, an entity
  36. responsible for creating the volume in question. Examples of a creator
  37. include a person, an organization, or a service. In the case of
  38. anthologies, proceedings, or other edited works, this field may be used to
  39. indicate editors or other entities responsible for collecting the volume's
  40. contents.
  41. This element appears as a child of <entry>. If there are multiple authors or
  42. contributors to the book, there may be multiple <dc:creator> elements in the
  43. volume entry (one for each creator or contributor).
  44. """
  45. _tag = 'creator'
  46. _namespace = DC_NAMESPACE
  47. class Date(_AtomFromString): #iso 8601 / W3CDTF profile
  48. """
  49. The <dc:date> element indicates the publication date of the specific volume
  50. in question. If the book is a reprint, this is the reprint date, not the
  51. original publication date. The date is encoded according to the ISO-8601
  52. standard (and more specifically, the W3CDTF profile).
  53. The <dc:date> element can appear only as a child of <entry>.
  54. Usually only the year or the year and the month are given.
  55. YYYY-MM-DDThh:mm:ssTZD TZD = -hh:mm or +hh:mm
  56. """
  57. _tag = 'date'
  58. _namespace = DC_NAMESPACE
  59. class Description(_AtomFromString):
  60. """
  61. The <dc:description> element includes text that describes a book or book
  62. result. In a search result feed, this may be a search result "snippet" that
  63. contains the words around the user's search term. For a single volume feed,
  64. this element may contain a synopsis of the book.
  65. The <dc:description> element can appear only as a child of <entry>
  66. """
  67. _tag = 'description'
  68. _namespace = DC_NAMESPACE
  69. class Format(_AtomFromString):
  70. """
  71. The <dc:format> element describes the physical properties of the volume.
  72. Currently, it indicates the number of pages in the book, but more
  73. information may be added to this field in the future.
  74. This element can appear only as a child of <entry>.
  75. """
  76. _tag = 'format'
  77. _namespace = DC_NAMESPACE
  78. class Identifier(_AtomFromString):
  79. """
  80. The <dc:identifier> element provides an unambiguous reference to a
  81. particular book.
  82. * Every <entry> contains at least one <dc:identifier> child.
  83. * The first identifier is always the unique string Book Search has assigned
  84. to the volume (such as s1gVAAAAYAAJ). This is the ID that appears in the
  85. book's URL in the Book Search GUI, as well as in the URL of that book's
  86. single item feed.
  87. * Many books contain additional <dc:identifier> elements. These provide
  88. alternate, external identifiers to the volume. Such identifiers may
  89. include the ISBNs, ISSNs, Library of Congress Control Numbers (LCCNs),
  90. and OCLC numbers; they are prepended with a corresponding namespace
  91. prefix (such as "ISBN:").
  92. * Any <dc:identifier> can be passed to the Dynamic Links, used to
  93. instantiate an Embedded Viewer, or even used to construct static links to
  94. Book Search.
  95. The <dc:identifier> element can appear only as a child of <entry>.
  96. """
  97. _tag = 'identifier'
  98. _namespace = DC_NAMESPACE
  99. class Publisher(_AtomFromString):
  100. """
  101. The <dc:publisher> element contains the name of the entity responsible for
  102. producing and distributing the volume (usually the specific edition of this
  103. book). Examples of a publisher include a person, an organization, or a
  104. service.
  105. This element can appear only as a child of <entry>. If there is more than
  106. one publisher, multiple <dc:publisher> elements may appear.
  107. """
  108. _tag = 'publisher'
  109. _namespace = DC_NAMESPACE
  110. class Subject(_AtomFromString):
  111. """
  112. The <dc:subject> element identifies the topic of the book. Usually this is
  113. a Library of Congress Subject Heading (LCSH) or Book Industry Standards
  114. and Communications Subject Heading (BISAC).
  115. The <dc:subject> element can appear only as a child of <entry>. There may
  116. be multiple <dc:subject> elements per entry.
  117. """
  118. _tag = 'subject'
  119. _namespace = DC_NAMESPACE
  120. class Title(_AtomFromString):
  121. """
  122. The <dc:title> element contains the title of a book as it was published. If
  123. a book has a subtitle, it appears as a second <dc:title> element in the book
  124. result's <entry>.
  125. """
  126. _tag = 'title'
  127. _namespace = DC_NAMESPACE
  128. class Viewability(_AtomFromString):
  129. """
  130. Google Book Search respects the user's local copyright restrictions. As a
  131. result, previews or full views of some books are not available in all
  132. locations. The <gbs:viewability> element indicates whether a book is fully
  133. viewable, can be previewed, or only has "about the book" information. These
  134. three "viewability modes" are the same ones returned by the Dynamic Links
  135. API.
  136. The <gbs:viewability> element can appear only as a child of <entry>.
  137. The value attribute will take the form of the following URIs to represent
  138. the relevant viewing capability:
  139. Full View: http://schemas.google.com/books/2008#view_all_pages
  140. Limited Preview: http://schemas.google.com/books/2008#view_partial
  141. Snippet View/No Preview: http://schemas.google.com/books/2008#view_no_pages
  142. Unknown view: http://schemas.google.com/books/2008#view_unknown
  143. """
  144. _tag = 'viewability'
  145. _namespace = BOOK_SEARCH_NAMESPACE
  146. _attributes = atom.AtomBase._attributes.copy()
  147. _attributes['value'] = 'value'
  148. def __init__(self, value=None, text=None,
  149. extension_elements=None, extension_attributes=None):
  150. self.value = value
  151. _AtomFromString.__init__(self, extension_elements=extension_elements,
  152. extension_attributes=extension_attributes, text=text)
  153. class Embeddability(_AtomFromString):
  154. """
  155. Many of the books found on Google Book Search can be embedded on third-party
  156. sites using the Embedded Viewer. The <gbs:embeddability> element indicates
  157. whether a particular book result is available for embedding. By definition,
  158. a book that cannot be previewed on Book Search cannot be embedded on third-
  159. party sites.
  160. The <gbs:embeddability> element can appear only as a child of <entry>.
  161. The value attribute will take on one of the following URIs:
  162. embeddable: http://schemas.google.com/books/2008#embeddable
  163. not embeddable: http://schemas.google.com/books/2008#not_embeddable
  164. """
  165. _tag = 'embeddability'
  166. _namespace = BOOK_SEARCH_NAMESPACE
  167. _attributes = atom.AtomBase._attributes.copy()
  168. _attributes['value'] = 'value'
  169. def __init__(self, value=None, text=None, extension_elements=None,
  170. extension_attributes=None):
  171. self.value = value
  172. _AtomFromString.__init__(self, extension_elements=extension_elements,
  173. extension_attributes=extension_attributes, text=text)
  174. class Review(_AtomFromString):
  175. """
  176. When present, the <gbs:review> element contains a user-generated review for
  177. a given book. This element currently appears only in the user library and
  178. user annotation feeds, as a child of <entry>.
  179. type: text, html, xhtml
  180. xml:lang: id of the language, a guess, (always two letters?)
  181. """
  182. _tag = 'review'
  183. _namespace = BOOK_SEARCH_NAMESPACE
  184. _attributes = atom.AtomBase._attributes.copy()
  185. _attributes['type'] = 'type'
  186. _attributes['{http://www.w3.org/XML/1998/namespace}lang'] = 'lang'
  187. def __init__(self, type=None, lang=None, text=None,
  188. extension_elements=None, extension_attributes=None):
  189. self.type = type
  190. self.lang = lang
  191. _AtomFromString.__init__(self, extension_elements=extension_elements,
  192. extension_attributes=extension_attributes, text=text)
  193. class Rating(_AtomFromString):
  194. """All attributes must take an integral string between 1 and 5.
  195. The min, max, and average attributes represent 'community' ratings. The
  196. value attribute is the user's (of the feed from which the item is fetched,
  197. not necessarily the authenticated user) rating of the book.
  198. """
  199. _tag = 'rating'
  200. _namespace = gdata.GDATA_NAMESPACE
  201. _attributes = atom.AtomBase._attributes.copy()
  202. _attributes['min'] = 'min'
  203. _attributes['max'] = 'max'
  204. _attributes['average'] = 'average'
  205. _attributes['value'] = 'value'
  206. def __init__(self, min=None, max=None, average=None, value=None, text=None,
  207. extension_elements=None, extension_attributes=None):
  208. self.min = min
  209. self.max = max
  210. self.average = average
  211. self.value = value
  212. _AtomFromString.__init__(self, extension_elements=extension_elements,
  213. extension_attributes=extension_attributes, text=text)
  214. class Book(_AtomFromString, gdata.GDataEntry):
  215. """
  216. Represents an <entry> from either a search, annotation, library, or single
  217. item feed. Note that dc_title attribute is the proper title of the volume,
  218. title is an atom element and may not represent the full title.
  219. """
  220. _tag = 'entry'
  221. _namespace = atom.ATOM_NAMESPACE
  222. _children = gdata.GDataEntry._children.copy()
  223. for i in (Creator, Identifier, Publisher, Subject,):
  224. _children['{%s}%s' % (i._namespace, i._tag)] = (i._tag, [i])
  225. for i in (Date, Description, Format, Viewability, Embeddability,
  226. Review, Rating): # Review, Rating maybe only in anno/lib entrys
  227. _children['{%s}%s' % (i._namespace, i._tag)] = (i._tag, i)
  228. # there is an atom title as well, should we clobber that?
  229. del(i)
  230. _children['{%s}%s' % (Title._namespace, Title._tag)] = ('dc_title', [Title])
  231. def to_dict(self):
  232. """Returns a dictionary of the book's available metadata. If the data
  233. cannot be discovered, it is not included as a key in the returned dict.
  234. The possible keys are: authors, embeddability, date, description,
  235. format, identifiers, publishers, rating, review, subjects, title, and
  236. viewability.
  237. Notes:
  238. * Plural keys will be lists
  239. * Singular keys will be strings
  240. * Title, despite usually being a list, joins the title and subtitle
  241. with a space as a single string.
  242. * embeddability and viewability only return the portion of the URI
  243. after #
  244. * identifiers is a list of tuples, where the first item of each tuple
  245. is the type of identifier and the second item is the identifying
  246. string. Note that while doing dict() on this tuple may be possible,
  247. some items may have multiple of the same identifier and converting
  248. to a dict may resulted in collisions/dropped data.
  249. * Rating returns only the user's rating. See Rating class for precise
  250. definition.
  251. """
  252. d = {}
  253. if self.GetAnnotationLink():
  254. d['annotation'] = self.GetAnnotationLink().href
  255. if self.creator:
  256. d['authors'] = [x.text for x in self.creator]
  257. if self.embeddability:
  258. d['embeddability'] = self.embeddability.value.split('#')[-1]
  259. if self.date:
  260. d['date'] = self.date.text
  261. if self.description:
  262. d['description'] = self.description.text
  263. if self.format:
  264. d['format'] = self.format.text
  265. if self.identifier:
  266. d['identifiers'] = [('google_id', self.identifier[0].text)]
  267. for x in self.identifier[1:]:
  268. l = x.text.split(':') # should we lower the case of the ids?
  269. d['identifiers'].append((l[0], ':'.join(l[1:])))
  270. if self.GetInfoLink():
  271. d['info'] = self.GetInfoLink().href
  272. if self.GetPreviewLink():
  273. d['preview'] = self.GetPreviewLink().href
  274. if self.publisher:
  275. d['publishers'] = [x.text for x in self.publisher]
  276. if self.rating:
  277. d['rating'] = self.rating.value
  278. if self.review:
  279. d['review'] = self.review.text
  280. if self.subject:
  281. d['subjects'] = [x.text for x in self.subject]
  282. if self.GetThumbnailLink():
  283. d['thumbnail'] = self.GetThumbnailLink().href
  284. if self.dc_title:
  285. d['title'] = ' '.join([x.text for x in self.dc_title])
  286. if self.viewability:
  287. d['viewability'] = self.viewability.value.split('#')[-1]
  288. return d
  289. def __init__(self, creator=None, date=None,
  290. description=None, format=None, author=None, identifier=None,
  291. publisher=None, subject=None, dc_title=None, viewability=None,
  292. embeddability=None, review=None, rating=None, category=None,
  293. content=None, contributor=None, atom_id=None, link=None,
  294. published=None, rights=None, source=None, summary=None,
  295. title=None, control=None, updated=None, text=None,
  296. extension_elements=None, extension_attributes=None):
  297. self.creator = creator
  298. self.date = date
  299. self.description = description
  300. self.format = format
  301. self.identifier = identifier
  302. self.publisher = publisher
  303. self.subject = subject
  304. self.dc_title = dc_title or []
  305. self.viewability = viewability
  306. self.embeddability = embeddability
  307. self.review = review
  308. self.rating = rating
  309. gdata.GDataEntry.__init__(self, author=author, category=category,
  310. content=content, contributor=contributor, atom_id=atom_id,
  311. link=link, published=published, rights=rights, source=source,
  312. summary=summary, title=title, control=control, updated=updated,
  313. text=text, extension_elements=extension_elements,
  314. extension_attributes=extension_attributes)
  315. def GetThumbnailLink(self):
  316. """Returns the atom.Link object representing the thumbnail URI."""
  317. for i in self.link:
  318. if i.rel == THUMBNAIL_REL:
  319. return i
  320. def GetInfoLink(self):
  321. """
  322. Returns the atom.Link object representing the human-readable info URI.
  323. """
  324. for i in self.link:
  325. if i.rel == INFO_REL:
  326. return i
  327. def GetPreviewLink(self):
  328. """Returns the atom.Link object representing the preview URI."""
  329. for i in self.link:
  330. if i.rel == PREVIEW_REL:
  331. return i
  332. def GetAnnotationLink(self):
  333. """
  334. Returns the atom.Link object representing the Annotation URI.
  335. Note that the use of www.books in the href of this link seems to make
  336. this information useless. Using books.service.ANNOTATION_FEED and
  337. BOOK_SERVER to construct your URI seems to work better.
  338. """
  339. for i in self.link:
  340. if i.rel == ANNOTATION_REL:
  341. return i
  342. def set_rating(self, value):
  343. """Set user's rating. Must be an integral string between 1 nad 5"""
  344. assert (value in ('1','2','3','4','5'))
  345. if not isinstance(self.rating, Rating):
  346. self.rating = Rating()
  347. self.rating.value = value
  348. def set_review(self, text, type='text', lang='en'):
  349. """Set user's review text"""
  350. self.review = Review(text=text, type=type, lang=lang)
  351. def get_label(self):
  352. """Get users label for the item as a string"""
  353. for i in self.category:
  354. if i.scheme == LABEL_SCHEME:
  355. return i.term
  356. def set_label(self, term):
  357. """Clear pre-existing label for the item and set term as the label."""
  358. self.remove_label()
  359. self.category.append(atom.Category(term=term, scheme=LABEL_SCHEME))
  360. def remove_label(self):
  361. """Clear the user's label for the item"""
  362. ln = len(self.category)
  363. for i, j in enumerate(self.category[::-1]):
  364. if j.scheme == LABEL_SCHEME:
  365. del(self.category[ln-1-i])
  366. def clean_annotations(self):
  367. """Clear all annotations from an item. Useful for taking an item from
  368. another user's library/annotation feed and adding it to the
  369. authenticated user's library without adopting annotations."""
  370. self.remove_label()
  371. self.review = None
  372. self.rating = None
  373. def get_google_id(self):
  374. """Get Google's ID of the item."""
  375. return self.id.text.split('/')[-1]
  376. class BookFeed(_AtomFromString, gdata.GDataFeed):
  377. """Represents a feed of entries from a search."""
  378. _tag = 'feed'
  379. _namespace = atom.ATOM_NAMESPACE
  380. _children = gdata.GDataFeed._children.copy()
  381. _children['{%s}%s' % (Book._namespace, Book._tag)] = (Book._tag, [Book])
  382. if __name__ == '__main__':
  383. import doctest
  384. doctest.testfile('datamodels.txt')