PageRenderTime 49ms CodeModel.GetById 17ms app.highlight 26ms RepoModel.GetById 1ms app.codeStats 0ms

/gdata/books/__init__.py

http://radioappz.googlecode.com/
Python | 473 lines | 409 code | 24 blank | 40 comment | 52 complexity | 335709a962c6fb38cdab9d2afc3eb8b6 MD5 | raw file
  1#!/usr/bin/python
  2
  3"""
  4    Data Models for books.service
  5
  6    All classes can be instantiated from an xml string using their FromString
  7    class method.
  8
  9    Notes:
 10        * Book.title displays the first dc:title because the returned XML
 11          repeats that datum as atom:title.
 12    There is an undocumented gbs:openAccess element that is not parsed.
 13"""
 14
 15__author__ = "James Sams <sams.james@gmail.com>"
 16__copyright__ = "Apache License v2.0"
 17
 18import atom
 19import gdata
 20
 21
 22BOOK_SEARCH_NAMESPACE   = 'http://schemas.google.com/books/2008'
 23DC_NAMESPACE            = 'http://purl.org/dc/terms' 
 24ANNOTATION_REL          = "http://schemas.google.com/books/2008/annotation"
 25INFO_REL                = "http://schemas.google.com/books/2008/info"
 26LABEL_SCHEME            = "http://schemas.google.com/books/2008/labels"
 27PREVIEW_REL             = "http://schemas.google.com/books/2008/preview"
 28THUMBNAIL_REL           = "http://schemas.google.com/books/2008/thumbnail"
 29FULL_VIEW               = "http://schemas.google.com/books/2008#view_all_pages"
 30PARTIAL_VIEW            = "http://schemas.google.com/books/2008#view_partial"
 31NO_VIEW                 = "http://schemas.google.com/books/2008#view_no_pages"
 32UNKNOWN_VIEW            = "http://schemas.google.com/books/2008#view_unknown"
 33EMBEDDABLE              = "http://schemas.google.com/books/2008#embeddable"
 34NOT_EMBEDDABLE          = "http://schemas.google.com/books/2008#not_embeddable"
 35
 36
 37
 38class _AtomFromString(atom.AtomBase):
 39
 40    #@classmethod
 41    def FromString(cls, s):
 42        return atom.CreateClassFromXMLString(cls, s)
 43
 44    FromString = classmethod(FromString)
 45
 46
 47class Creator(_AtomFromString):
 48    """
 49    The <dc:creator> element identifies an author-or more generally, an entity
 50    responsible for creating the volume in question. Examples of a creator
 51    include a person, an organization, or a service. In the case of 
 52    anthologies, proceedings, or other edited works, this field may be used to 
 53    indicate editors or other entities responsible for collecting the volume's 
 54    contents.
 55    
 56    This element appears as a child of <entry>. If there are multiple authors or
 57    contributors to the book, there may be multiple <dc:creator> elements in the
 58    volume entry (one for each creator or contributor).
 59    """
 60
 61    _tag = 'creator'
 62    _namespace = DC_NAMESPACE
 63
 64
 65class Date(_AtomFromString): #iso 8601 / W3CDTF profile
 66    """
 67    The <dc:date> element indicates the publication date of the specific volume
 68    in question. If the book is a reprint, this is the reprint date, not the 
 69    original publication date. The date is encoded according to the ISO-8601 
 70    standard (and more specifically, the W3CDTF profile).
 71
 72    The <dc:date> element can appear only as a child of <entry>.
 73    
 74    Usually only the year or the year and the month are given.
 75
 76    YYYY-MM-DDThh:mm:ssTZD  TZD = -hh:mm or +hh:mm
 77    """
 78    
 79    _tag = 'date'     
 80    _namespace = DC_NAMESPACE
 81   
 82
 83class Description(_AtomFromString):
 84    """
 85    The <dc:description> element includes text that describes a book or book 
 86    result. In a search result feed, this may be a search result "snippet" that
 87    contains the words around the user's search term. For a single volume feed,
 88    this element may contain a synopsis of the book.
 89
 90    The <dc:description> element can appear only as a child of <entry>
 91    """
 92
 93    _tag = 'description'
 94    _namespace = DC_NAMESPACE
 95
 96
 97class Format(_AtomFromString):
 98    """
 99    The <dc:format> element describes the physical properties of the volume. 
100    Currently, it indicates the number of pages in the book, but more 
101    information may be added to this field in the future.
102
103    This element can appear only as a child of <entry>.
104    """
105
106    _tag = 'format'
107    _namespace = DC_NAMESPACE
108
109
110class Identifier(_AtomFromString):
111    """
112    The <dc:identifier> element provides an unambiguous reference to a 
113    particular book.
114    * Every <entry> contains at least one <dc:identifier> child.
115    * The first identifier is always the unique string Book Search has assigned
116      to the volume (such as s1gVAAAAYAAJ). This is the ID that appears in the 
117      book's URL in the Book Search GUI, as well as in the URL of that book's 
118      single item feed.
119    * Many books contain additional <dc:identifier> elements. These provide 
120      alternate, external identifiers to the volume. Such identifiers may 
121      include the ISBNs, ISSNs, Library of Congress Control Numbers (LCCNs), 
122      and OCLC numbers; they are prepended with a corresponding namespace 
123      prefix (such as "ISBN:").
124    * Any <dc:identifier> can be passed to the Dynamic Links, used to 
125      instantiate an Embedded Viewer, or even used to construct static links to
126      Book Search.
127    The <dc:identifier> element can appear only as a child of <entry>.
128    """
129
130    _tag = 'identifier'
131    _namespace = DC_NAMESPACE
132
133
134class Publisher(_AtomFromString):
135    """
136    The <dc:publisher> element contains the name of the entity responsible for 
137    producing and distributing the volume (usually the specific edition of this
138    book). Examples of a publisher include a person, an organization, or a 
139    service.
140
141    This element can appear only as a child of <entry>. If there is more than 
142    one publisher, multiple <dc:publisher> elements may appear.
143    """
144
145    _tag = 'publisher'
146    _namespace = DC_NAMESPACE
147
148
149class Subject(_AtomFromString):
150    """
151    The <dc:subject> element identifies the topic of the book. Usually this is 
152    a Library of Congress Subject Heading (LCSH) or  Book Industry Standards 
153    and Communications Subject Heading (BISAC).
154
155    The <dc:subject> element can appear only as a child of <entry>. There may 
156    be multiple <dc:subject> elements per entry.
157    """
158
159    _tag = 'subject'
160    _namespace = DC_NAMESPACE
161
162
163class Title(_AtomFromString):
164    """
165    The <dc:title> element contains the title of a book as it was published. If
166    a book has a subtitle, it appears as a second <dc:title> element in the book
167    result's <entry>.
168    """
169
170    _tag = 'title'
171    _namespace = DC_NAMESPACE
172
173
174class Viewability(_AtomFromString):
175    """
176    Google Book Search respects the user's local copyright restrictions. As a 
177    result, previews or full views of some books are not available in all 
178    locations. The <gbs:viewability> element indicates whether a book is fully 
179    viewable, can be previewed, or only has "about the book" information. These
180    three "viewability modes" are the same ones returned by the Dynamic Links 
181    API.
182
183    The <gbs:viewability> element can appear only as a child of <entry>.
184
185    The value attribute will take the form of the following URIs to represent
186    the relevant viewing capability:
187
188    Full View: http://schemas.google.com/books/2008#view_all_pages
189    Limited Preview: http://schemas.google.com/books/2008#view_partial
190    Snippet View/No Preview: http://schemas.google.com/books/2008#view_no_pages
191    Unknown view: http://schemas.google.com/books/2008#view_unknown
192    """
193
194    _tag = 'viewability'
195    _namespace = BOOK_SEARCH_NAMESPACE
196    _attributes = atom.AtomBase._attributes.copy()
197    _attributes['value'] = 'value'
198
199    def __init__(self, value=None, text=None, 
200                extension_elements=None, extension_attributes=None):
201        self.value = value
202        _AtomFromString.__init__(self, extension_elements=extension_elements,
203                    extension_attributes=extension_attributes, text=text)
204
205
206class Embeddability(_AtomFromString):
207    """
208    Many of the books found on Google Book Search can be embedded on third-party
209    sites using the Embedded Viewer. The <gbs:embeddability> element indicates 
210    whether a particular book result is available for embedding. By definition,
211    a book that cannot be previewed on Book Search cannot be embedded on third-
212    party sites.
213
214    The <gbs:embeddability> element can appear only as a child of <entry>.
215
216    The value attribute will take on one of the following URIs:
217    embeddable: http://schemas.google.com/books/2008#embeddable
218    not embeddable: http://schemas.google.com/books/2008#not_embeddable
219    """
220
221    _tag = 'embeddability'
222    _namespace = BOOK_SEARCH_NAMESPACE
223    _attributes = atom.AtomBase._attributes.copy()
224    _attributes['value'] = 'value'
225
226    def __init__(self, value=None, text=None, extension_elements=None, 
227                extension_attributes=None):
228        self.value = value
229        _AtomFromString.__init__(self, extension_elements=extension_elements,
230                    extension_attributes=extension_attributes, text=text)
231
232
233class Review(_AtomFromString):
234    """
235    When present, the <gbs:review> element contains a user-generated review for
236    a given book. This element currently appears only in the user library and 
237    user annotation feeds, as a child of <entry>.
238
239    type: text, html, xhtml
240    xml:lang: id of the language, a guess, (always two letters?)
241    """
242
243    _tag = 'review'
244    _namespace = BOOK_SEARCH_NAMESPACE
245    _attributes = atom.AtomBase._attributes.copy()
246    _attributes['type'] = 'type'
247    _attributes['{http://www.w3.org/XML/1998/namespace}lang'] = 'lang'
248    
249    def __init__(self, type=None, lang=None, text=None, 
250                extension_elements=None, extension_attributes=None):
251        self.type = type
252        self.lang = lang
253        _AtomFromString.__init__(self, extension_elements=extension_elements,
254                    extension_attributes=extension_attributes, text=text)
255
256
257class Rating(_AtomFromString):
258    """All attributes must take an integral string between 1 and 5.
259    The min, max, and average attributes represent 'community' ratings. The
260    value attribute is the user's (of the feed from which the item is fetched,
261    not necessarily the authenticated user) rating of the book.
262    """
263
264    _tag = 'rating'
265    _namespace = gdata.GDATA_NAMESPACE
266    _attributes = atom.AtomBase._attributes.copy()
267    _attributes['min'] = 'min'
268    _attributes['max'] = 'max'
269    _attributes['average'] = 'average'
270    _attributes['value'] = 'value'
271
272    def __init__(self, min=None, max=None, average=None, value=None, text=None,
273                extension_elements=None, extension_attributes=None):
274        self.min = min 
275        self.max = max 
276        self.average = average
277        self.value = value
278        _AtomFromString.__init__(self, extension_elements=extension_elements,
279                    extension_attributes=extension_attributes, text=text)
280
281
282class Book(_AtomFromString, gdata.GDataEntry):
283    """
284    Represents an <entry> from either a search, annotation, library, or single
285    item feed. Note that dc_title attribute is the proper title of the volume,
286    title is an atom element and may not represent the full title.
287    """
288
289    _tag = 'entry'
290    _namespace = atom.ATOM_NAMESPACE
291    _children = gdata.GDataEntry._children.copy()
292    for i in (Creator, Identifier, Publisher, Subject,):
293        _children['{%s}%s' % (i._namespace, i._tag)] = (i._tag, [i])
294    for i in (Date, Description, Format, Viewability, Embeddability, 
295                Review, Rating):  # Review, Rating maybe only in anno/lib entrys
296        _children['{%s}%s' % (i._namespace, i._tag)] = (i._tag, i)
297    # there is an atom title as well, should we clobber that?
298    del(i)
299    _children['{%s}%s' % (Title._namespace, Title._tag)] = ('dc_title', [Title])
300
301    def to_dict(self):
302        """Returns a dictionary of the book's available metadata. If the data
303        cannot be discovered, it is not included as a key in the returned dict.
304        The possible keys are: authors, embeddability, date, description, 
305        format, identifiers, publishers, rating, review, subjects, title, and
306        viewability.
307
308        Notes:
309          * Plural keys will be lists
310          * Singular keys will be strings
311          * Title, despite usually being a list, joins the title and subtitle
312            with a space as a single string.
313          * embeddability and viewability only return the portion of the URI 
314            after #
315          * identifiers is a list of tuples, where the first item of each tuple
316            is the type of identifier and the second item is the identifying
317            string. Note that while doing dict() on this tuple may be possible,
318            some items may have multiple of the same identifier and converting
319            to a dict may resulted in collisions/dropped data.
320          * Rating returns only the user's rating. See Rating class for precise
321            definition.
322        """
323        d = {}
324        if self.GetAnnotationLink():
325            d['annotation'] = self.GetAnnotationLink().href
326        if self.creator:
327            d['authors'] = [x.text for x in self.creator]
328        if self.embeddability:
329            d['embeddability'] = self.embeddability.value.split('#')[-1]
330        if self.date:
331            d['date'] = self.date.text
332        if self.description:
333            d['description'] = self.description.text
334        if self.format:
335            d['format'] = self.format.text
336        if self.identifier:
337            d['identifiers'] = [('google_id', self.identifier[0].text)]
338            for x in self.identifier[1:]:
339                l = x.text.split(':') # should we lower the case of the ids?
340                d['identifiers'].append((l[0], ':'.join(l[1:])))
341        if self.GetInfoLink():
342            d['info'] = self.GetInfoLink().href
343        if self.GetPreviewLink():
344            d['preview'] = self.GetPreviewLink().href
345        if self.publisher:
346            d['publishers'] = [x.text for x in self.publisher]
347        if self.rating:
348            d['rating'] = self.rating.value
349        if self.review:
350            d['review'] = self.review.text
351        if self.subject:
352            d['subjects'] = [x.text for x in self.subject]
353        if self.GetThumbnailLink():
354            d['thumbnail'] = self.GetThumbnailLink().href
355        if self.dc_title:
356            d['title'] = ' '.join([x.text for x in self.dc_title])
357        if self.viewability:
358            d['viewability'] = self.viewability.value.split('#')[-1]
359        return d
360
361    def __init__(self, creator=None, date=None, 
362                description=None, format=None, author=None, identifier=None, 
363                publisher=None, subject=None, dc_title=None, viewability=None, 
364                embeddability=None, review=None, rating=None, category=None, 
365                content=None, contributor=None, atom_id=None, link=None,
366                published=None, rights=None, source=None, summary=None, 
367                title=None, control=None, updated=None, text=None, 
368                extension_elements=None, extension_attributes=None):
369        self.creator = creator
370        self.date = date
371        self.description = description
372        self.format = format
373        self.identifier = identifier
374        self.publisher = publisher
375        self.subject = subject
376        self.dc_title = dc_title or []
377        self.viewability = viewability
378        self.embeddability = embeddability
379        self.review = review
380        self.rating = rating
381        gdata.GDataEntry.__init__(self, author=author, category=category, 
382                content=content, contributor=contributor, atom_id=atom_id,
383                link=link, published=published, rights=rights, source=source,
384                summary=summary, title=title, control=control, updated=updated, 
385                text=text, extension_elements=extension_elements, 
386                extension_attributes=extension_attributes)
387    
388    def GetThumbnailLink(self):
389        """Returns the atom.Link object representing the thumbnail URI."""
390        for i in self.link:
391            if i.rel == THUMBNAIL_REL:
392                return i
393    
394    def GetInfoLink(self):
395        """
396        Returns the atom.Link object representing the human-readable info URI.
397        """
398        for i in self.link:
399            if i.rel == INFO_REL:
400                return i
401    
402    def GetPreviewLink(self):
403        """Returns the atom.Link object representing the preview URI."""
404        for i in self.link:
405            if i.rel == PREVIEW_REL:
406                return i
407    
408    def GetAnnotationLink(self):
409        """
410        Returns the atom.Link object representing the Annotation URI.
411        Note that the use of www.books in the href of this link seems to make
412        this information useless. Using books.service.ANNOTATION_FEED and 
413        BOOK_SERVER to construct your URI seems to work better.
414        """
415        for i in self.link:
416            if i.rel == ANNOTATION_REL:
417                return i
418    
419    def set_rating(self, value):
420        """Set user's rating. Must be an integral string between 1 nad 5"""
421        assert (value in ('1','2','3','4','5'))
422        if not isinstance(self.rating, Rating):
423            self.rating = Rating()
424        self.rating.value = value
425    
426    def set_review(self, text, type='text', lang='en'):
427        """Set user's review text"""
428        self.review = Review(text=text, type=type, lang=lang)
429    
430    def get_label(self):
431        """Get users label for the item as a string"""
432        for i in self.category:
433            if i.scheme == LABEL_SCHEME:
434                return i.term
435    
436    def set_label(self, term):
437        """Clear pre-existing label for the item and set term as the label."""
438        self.remove_label()
439        self.category.append(atom.Category(term=term, scheme=LABEL_SCHEME))
440    
441    def remove_label(self):
442        """Clear the user's label for the item"""
443        ln = len(self.category)
444        for i, j in enumerate(self.category[::-1]):
445            if j.scheme == LABEL_SCHEME:
446                del(self.category[ln-1-i])
447
448    def clean_annotations(self):
449        """Clear all annotations from an item. Useful for taking an item from
450        another user's library/annotation feed and adding it to the 
451        authenticated user's library without adopting annotations."""
452        self.remove_label()
453        self.review = None
454        self.rating = None
455
456    
457    def get_google_id(self):
458        """Get Google's ID of the item."""
459        return self.id.text.split('/')[-1]
460
461
462class BookFeed(_AtomFromString, gdata.GDataFeed):
463    """Represents a feed of entries from a search."""
464
465    _tag = 'feed'
466    _namespace = atom.ATOM_NAMESPACE
467    _children = gdata.GDataFeed._children.copy()
468    _children['{%s}%s' % (Book._namespace, Book._tag)] = (Book._tag, [Book])
469
470
471if __name__ == '__main__':
472    import doctest
473    doctest.testfile('datamodels.txt')