/gdata/books/__init__.py
Python | 473 lines | 409 code | 24 blank | 40 comment | 52 complexity | 335709a962c6fb38cdab9d2afc3eb8b6 MD5 | raw file
1#!/usr/bin/python 2 3""" 4 Data Models for books.service 5 6 All classes can be instantiated from an xml string using their FromString 7 class method. 8 9 Notes: 10 * Book.title displays the first dc:title because the returned XML 11 repeats that datum as atom:title. 12 There is an undocumented gbs:openAccess element that is not parsed. 13""" 14 15__author__ = "James Sams <sams.james@gmail.com>" 16__copyright__ = "Apache License v2.0" 17 18import atom 19import gdata 20 21 22BOOK_SEARCH_NAMESPACE = 'http://schemas.google.com/books/2008' 23DC_NAMESPACE = 'http://purl.org/dc/terms' 24ANNOTATION_REL = "http://schemas.google.com/books/2008/annotation" 25INFO_REL = "http://schemas.google.com/books/2008/info" 26LABEL_SCHEME = "http://schemas.google.com/books/2008/labels" 27PREVIEW_REL = "http://schemas.google.com/books/2008/preview" 28THUMBNAIL_REL = "http://schemas.google.com/books/2008/thumbnail" 29FULL_VIEW = "http://schemas.google.com/books/2008#view_all_pages" 30PARTIAL_VIEW = "http://schemas.google.com/books/2008#view_partial" 31NO_VIEW = "http://schemas.google.com/books/2008#view_no_pages" 32UNKNOWN_VIEW = "http://schemas.google.com/books/2008#view_unknown" 33EMBEDDABLE = "http://schemas.google.com/books/2008#embeddable" 34NOT_EMBEDDABLE = "http://schemas.google.com/books/2008#not_embeddable" 35 36 37 38class _AtomFromString(atom.AtomBase): 39 40 #@classmethod 41 def FromString(cls, s): 42 return atom.CreateClassFromXMLString(cls, s) 43 44 FromString = classmethod(FromString) 45 46 47class Creator(_AtomFromString): 48 """ 49 The <dc:creator> element identifies an author-or more generally, an entity 50 responsible for creating the volume in question. Examples of a creator 51 include a person, an organization, or a service. In the case of 52 anthologies, proceedings, or other edited works, this field may be used to 53 indicate editors or other entities responsible for collecting the volume's 54 contents. 55 56 This element appears as a child of <entry>. If there are multiple authors or 57 contributors to the book, there may be multiple <dc:creator> elements in the 58 volume entry (one for each creator or contributor). 59 """ 60 61 _tag = 'creator' 62 _namespace = DC_NAMESPACE 63 64 65class Date(_AtomFromString): #iso 8601 / W3CDTF profile 66 """ 67 The <dc:date> element indicates the publication date of the specific volume 68 in question. If the book is a reprint, this is the reprint date, not the 69 original publication date. The date is encoded according to the ISO-8601 70 standard (and more specifically, the W3CDTF profile). 71 72 The <dc:date> element can appear only as a child of <entry>. 73 74 Usually only the year or the year and the month are given. 75 76 YYYY-MM-DDThh:mm:ssTZD TZD = -hh:mm or +hh:mm 77 """ 78 79 _tag = 'date' 80 _namespace = DC_NAMESPACE 81 82 83class Description(_AtomFromString): 84 """ 85 The <dc:description> element includes text that describes a book or book 86 result. In a search result feed, this may be a search result "snippet" that 87 contains the words around the user's search term. For a single volume feed, 88 this element may contain a synopsis of the book. 89 90 The <dc:description> element can appear only as a child of <entry> 91 """ 92 93 _tag = 'description' 94 _namespace = DC_NAMESPACE 95 96 97class Format(_AtomFromString): 98 """ 99 The <dc:format> element describes the physical properties of the volume. 100 Currently, it indicates the number of pages in the book, but more 101 information may be added to this field in the future. 102 103 This element can appear only as a child of <entry>. 104 """ 105 106 _tag = 'format' 107 _namespace = DC_NAMESPACE 108 109 110class Identifier(_AtomFromString): 111 """ 112 The <dc:identifier> element provides an unambiguous reference to a 113 particular book. 114 * Every <entry> contains at least one <dc:identifier> child. 115 * The first identifier is always the unique string Book Search has assigned 116 to the volume (such as s1gVAAAAYAAJ). This is the ID that appears in the 117 book's URL in the Book Search GUI, as well as in the URL of that book's 118 single item feed. 119 * Many books contain additional <dc:identifier> elements. These provide 120 alternate, external identifiers to the volume. Such identifiers may 121 include the ISBNs, ISSNs, Library of Congress Control Numbers (LCCNs), 122 and OCLC numbers; they are prepended with a corresponding namespace 123 prefix (such as "ISBN:"). 124 * Any <dc:identifier> can be passed to the Dynamic Links, used to 125 instantiate an Embedded Viewer, or even used to construct static links to 126 Book Search. 127 The <dc:identifier> element can appear only as a child of <entry>. 128 """ 129 130 _tag = 'identifier' 131 _namespace = DC_NAMESPACE 132 133 134class Publisher(_AtomFromString): 135 """ 136 The <dc:publisher> element contains the name of the entity responsible for 137 producing and distributing the volume (usually the specific edition of this 138 book). Examples of a publisher include a person, an organization, or a 139 service. 140 141 This element can appear only as a child of <entry>. If there is more than 142 one publisher, multiple <dc:publisher> elements may appear. 143 """ 144 145 _tag = 'publisher' 146 _namespace = DC_NAMESPACE 147 148 149class Subject(_AtomFromString): 150 """ 151 The <dc:subject> element identifies the topic of the book. Usually this is 152 a Library of Congress Subject Heading (LCSH) or Book Industry Standards 153 and Communications Subject Heading (BISAC). 154 155 The <dc:subject> element can appear only as a child of <entry>. There may 156 be multiple <dc:subject> elements per entry. 157 """ 158 159 _tag = 'subject' 160 _namespace = DC_NAMESPACE 161 162 163class Title(_AtomFromString): 164 """ 165 The <dc:title> element contains the title of a book as it was published. If 166 a book has a subtitle, it appears as a second <dc:title> element in the book 167 result's <entry>. 168 """ 169 170 _tag = 'title' 171 _namespace = DC_NAMESPACE 172 173 174class Viewability(_AtomFromString): 175 """ 176 Google Book Search respects the user's local copyright restrictions. As a 177 result, previews or full views of some books are not available in all 178 locations. The <gbs:viewability> element indicates whether a book is fully 179 viewable, can be previewed, or only has "about the book" information. These 180 three "viewability modes" are the same ones returned by the Dynamic Links 181 API. 182 183 The <gbs:viewability> element can appear only as a child of <entry>. 184 185 The value attribute will take the form of the following URIs to represent 186 the relevant viewing capability: 187 188 Full View: http://schemas.google.com/books/2008#view_all_pages 189 Limited Preview: http://schemas.google.com/books/2008#view_partial 190 Snippet View/No Preview: http://schemas.google.com/books/2008#view_no_pages 191 Unknown view: http://schemas.google.com/books/2008#view_unknown 192 """ 193 194 _tag = 'viewability' 195 _namespace = BOOK_SEARCH_NAMESPACE 196 _attributes = atom.AtomBase._attributes.copy() 197 _attributes['value'] = 'value' 198 199 def __init__(self, value=None, text=None, 200 extension_elements=None, extension_attributes=None): 201 self.value = value 202 _AtomFromString.__init__(self, extension_elements=extension_elements, 203 extension_attributes=extension_attributes, text=text) 204 205 206class Embeddability(_AtomFromString): 207 """ 208 Many of the books found on Google Book Search can be embedded on third-party 209 sites using the Embedded Viewer. The <gbs:embeddability> element indicates 210 whether a particular book result is available for embedding. By definition, 211 a book that cannot be previewed on Book Search cannot be embedded on third- 212 party sites. 213 214 The <gbs:embeddability> element can appear only as a child of <entry>. 215 216 The value attribute will take on one of the following URIs: 217 embeddable: http://schemas.google.com/books/2008#embeddable 218 not embeddable: http://schemas.google.com/books/2008#not_embeddable 219 """ 220 221 _tag = 'embeddability' 222 _namespace = BOOK_SEARCH_NAMESPACE 223 _attributes = atom.AtomBase._attributes.copy() 224 _attributes['value'] = 'value' 225 226 def __init__(self, value=None, text=None, extension_elements=None, 227 extension_attributes=None): 228 self.value = value 229 _AtomFromString.__init__(self, extension_elements=extension_elements, 230 extension_attributes=extension_attributes, text=text) 231 232 233class Review(_AtomFromString): 234 """ 235 When present, the <gbs:review> element contains a user-generated review for 236 a given book. This element currently appears only in the user library and 237 user annotation feeds, as a child of <entry>. 238 239 type: text, html, xhtml 240 xml:lang: id of the language, a guess, (always two letters?) 241 """ 242 243 _tag = 'review' 244 _namespace = BOOK_SEARCH_NAMESPACE 245 _attributes = atom.AtomBase._attributes.copy() 246 _attributes['type'] = 'type' 247 _attributes['{http://www.w3.org/XML/1998/namespace}lang'] = 'lang' 248 249 def __init__(self, type=None, lang=None, text=None, 250 extension_elements=None, extension_attributes=None): 251 self.type = type 252 self.lang = lang 253 _AtomFromString.__init__(self, extension_elements=extension_elements, 254 extension_attributes=extension_attributes, text=text) 255 256 257class Rating(_AtomFromString): 258 """All attributes must take an integral string between 1 and 5. 259 The min, max, and average attributes represent 'community' ratings. The 260 value attribute is the user's (of the feed from which the item is fetched, 261 not necessarily the authenticated user) rating of the book. 262 """ 263 264 _tag = 'rating' 265 _namespace = gdata.GDATA_NAMESPACE 266 _attributes = atom.AtomBase._attributes.copy() 267 _attributes['min'] = 'min' 268 _attributes['max'] = 'max' 269 _attributes['average'] = 'average' 270 _attributes['value'] = 'value' 271 272 def __init__(self, min=None, max=None, average=None, value=None, text=None, 273 extension_elements=None, extension_attributes=None): 274 self.min = min 275 self.max = max 276 self.average = average 277 self.value = value 278 _AtomFromString.__init__(self, extension_elements=extension_elements, 279 extension_attributes=extension_attributes, text=text) 280 281 282class Book(_AtomFromString, gdata.GDataEntry): 283 """ 284 Represents an <entry> from either a search, annotation, library, or single 285 item feed. Note that dc_title attribute is the proper title of the volume, 286 title is an atom element and may not represent the full title. 287 """ 288 289 _tag = 'entry' 290 _namespace = atom.ATOM_NAMESPACE 291 _children = gdata.GDataEntry._children.copy() 292 for i in (Creator, Identifier, Publisher, Subject,): 293 _children['{%s}%s' % (i._namespace, i._tag)] = (i._tag, [i]) 294 for i in (Date, Description, Format, Viewability, Embeddability, 295 Review, Rating): # Review, Rating maybe only in anno/lib entrys 296 _children['{%s}%s' % (i._namespace, i._tag)] = (i._tag, i) 297 # there is an atom title as well, should we clobber that? 298 del(i) 299 _children['{%s}%s' % (Title._namespace, Title._tag)] = ('dc_title', [Title]) 300 301 def to_dict(self): 302 """Returns a dictionary of the book's available metadata. If the data 303 cannot be discovered, it is not included as a key in the returned dict. 304 The possible keys are: authors, embeddability, date, description, 305 format, identifiers, publishers, rating, review, subjects, title, and 306 viewability. 307 308 Notes: 309 * Plural keys will be lists 310 * Singular keys will be strings 311 * Title, despite usually being a list, joins the title and subtitle 312 with a space as a single string. 313 * embeddability and viewability only return the portion of the URI 314 after # 315 * identifiers is a list of tuples, where the first item of each tuple 316 is the type of identifier and the second item is the identifying 317 string. Note that while doing dict() on this tuple may be possible, 318 some items may have multiple of the same identifier and converting 319 to a dict may resulted in collisions/dropped data. 320 * Rating returns only the user's rating. See Rating class for precise 321 definition. 322 """ 323 d = {} 324 if self.GetAnnotationLink(): 325 d['annotation'] = self.GetAnnotationLink().href 326 if self.creator: 327 d['authors'] = [x.text for x in self.creator] 328 if self.embeddability: 329 d['embeddability'] = self.embeddability.value.split('#')[-1] 330 if self.date: 331 d['date'] = self.date.text 332 if self.description: 333 d['description'] = self.description.text 334 if self.format: 335 d['format'] = self.format.text 336 if self.identifier: 337 d['identifiers'] = [('google_id', self.identifier[0].text)] 338 for x in self.identifier[1:]: 339 l = x.text.split(':') # should we lower the case of the ids? 340 d['identifiers'].append((l[0], ':'.join(l[1:]))) 341 if self.GetInfoLink(): 342 d['info'] = self.GetInfoLink().href 343 if self.GetPreviewLink(): 344 d['preview'] = self.GetPreviewLink().href 345 if self.publisher: 346 d['publishers'] = [x.text for x in self.publisher] 347 if self.rating: 348 d['rating'] = self.rating.value 349 if self.review: 350 d['review'] = self.review.text 351 if self.subject: 352 d['subjects'] = [x.text for x in self.subject] 353 if self.GetThumbnailLink(): 354 d['thumbnail'] = self.GetThumbnailLink().href 355 if self.dc_title: 356 d['title'] = ' '.join([x.text for x in self.dc_title]) 357 if self.viewability: 358 d['viewability'] = self.viewability.value.split('#')[-1] 359 return d 360 361 def __init__(self, creator=None, date=None, 362 description=None, format=None, author=None, identifier=None, 363 publisher=None, subject=None, dc_title=None, viewability=None, 364 embeddability=None, review=None, rating=None, category=None, 365 content=None, contributor=None, atom_id=None, link=None, 366 published=None, rights=None, source=None, summary=None, 367 title=None, control=None, updated=None, text=None, 368 extension_elements=None, extension_attributes=None): 369 self.creator = creator 370 self.date = date 371 self.description = description 372 self.format = format 373 self.identifier = identifier 374 self.publisher = publisher 375 self.subject = subject 376 self.dc_title = dc_title or [] 377 self.viewability = viewability 378 self.embeddability = embeddability 379 self.review = review 380 self.rating = rating 381 gdata.GDataEntry.__init__(self, author=author, category=category, 382 content=content, contributor=contributor, atom_id=atom_id, 383 link=link, published=published, rights=rights, source=source, 384 summary=summary, title=title, control=control, updated=updated, 385 text=text, extension_elements=extension_elements, 386 extension_attributes=extension_attributes) 387 388 def GetThumbnailLink(self): 389 """Returns the atom.Link object representing the thumbnail URI.""" 390 for i in self.link: 391 if i.rel == THUMBNAIL_REL: 392 return i 393 394 def GetInfoLink(self): 395 """ 396 Returns the atom.Link object representing the human-readable info URI. 397 """ 398 for i in self.link: 399 if i.rel == INFO_REL: 400 return i 401 402 def GetPreviewLink(self): 403 """Returns the atom.Link object representing the preview URI.""" 404 for i in self.link: 405 if i.rel == PREVIEW_REL: 406 return i 407 408 def GetAnnotationLink(self): 409 """ 410 Returns the atom.Link object representing the Annotation URI. 411 Note that the use of www.books in the href of this link seems to make 412 this information useless. Using books.service.ANNOTATION_FEED and 413 BOOK_SERVER to construct your URI seems to work better. 414 """ 415 for i in self.link: 416 if i.rel == ANNOTATION_REL: 417 return i 418 419 def set_rating(self, value): 420 """Set user's rating. Must be an integral string between 1 nad 5""" 421 assert (value in ('1','2','3','4','5')) 422 if not isinstance(self.rating, Rating): 423 self.rating = Rating() 424 self.rating.value = value 425 426 def set_review(self, text, type='text', lang='en'): 427 """Set user's review text""" 428 self.review = Review(text=text, type=type, lang=lang) 429 430 def get_label(self): 431 """Get users label for the item as a string""" 432 for i in self.category: 433 if i.scheme == LABEL_SCHEME: 434 return i.term 435 436 def set_label(self, term): 437 """Clear pre-existing label for the item and set term as the label.""" 438 self.remove_label() 439 self.category.append(atom.Category(term=term, scheme=LABEL_SCHEME)) 440 441 def remove_label(self): 442 """Clear the user's label for the item""" 443 ln = len(self.category) 444 for i, j in enumerate(self.category[::-1]): 445 if j.scheme == LABEL_SCHEME: 446 del(self.category[ln-1-i]) 447 448 def clean_annotations(self): 449 """Clear all annotations from an item. Useful for taking an item from 450 another user's library/annotation feed and adding it to the 451 authenticated user's library without adopting annotations.""" 452 self.remove_label() 453 self.review = None 454 self.rating = None 455 456 457 def get_google_id(self): 458 """Get Google's ID of the item.""" 459 return self.id.text.split('/')[-1] 460 461 462class BookFeed(_AtomFromString, gdata.GDataFeed): 463 """Represents a feed of entries from a search.""" 464 465 _tag = 'feed' 466 _namespace = atom.ATOM_NAMESPACE 467 _children = gdata.GDataFeed._children.copy() 468 _children['{%s}%s' % (Book._namespace, Book._tag)] = (Book._tag, [Book]) 469 470 471if __name__ == '__main__': 472 import doctest 473 doctest.testfile('datamodels.txt')