PageRenderTime 42ms CodeModel.GetById 12ms app.highlight 23ms RepoModel.GetById 1ms app.codeStats 1ms

/gdata/spreadsheet/text_db.py

http://radioappz.googlecode.com/
Python | 559 lines | 508 code | 6 blank | 45 comment | 10 complexity | 5f6a4d003fc6297780997c2d9f7634c4 MD5 | raw file
  1#!/usr/bin/python
  2#
  3# Copyright Google 2007-2008, all rights reserved.
  4#
  5# Licensed under the Apache License, Version 2.0 (the "License");
  6# you may not use this file except in compliance with the License.
  7# You may obtain a copy of the License at
  8#
  9#      http://www.apache.org/licenses/LICENSE-2.0
 10#
 11# Unless required by applicable law or agreed to in writing, software
 12# distributed under the License is distributed on an "AS IS" BASIS,
 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14# See the License for the specific language governing permissions and
 15# limitations under the License.
 16
 17
 18import StringIO
 19import gdata
 20import gdata.service
 21import gdata.spreadsheet
 22import gdata.spreadsheet.service
 23import gdata.docs
 24import gdata.docs.service
 25
 26
 27"""Make the Google Documents API feel more like using a database.
 28
 29This module contains a client and other classes which make working with the 
 30Google Documents List Data API and the Google Spreadsheets Data API look a
 31bit more like working with a heirarchical database. Using the DatabaseClient,
 32you can create or find spreadsheets and use them like a database, with 
 33worksheets representing tables and rows representing records.
 34
 35Example Usage:
 36# Create a new database, a new table, and add records.
 37client = gdata.spreadsheet.text_db.DatabaseClient(username='jo@example.com', 
 38    password='12345')
 39database = client.CreateDatabase('My Text Database')
 40table = database.CreateTable('addresses', ['name','email',
 41    'phonenumber', 'mailingaddress'])
 42record = table.AddRecord({'name':'Bob', 'email':'bob@example.com', 
 43    'phonenumber':'555-555-1234', 'mailingaddress':'900 Imaginary St.'})
 44
 45# Edit a record
 46record.content['email'] = 'bob2@example.com'
 47record.Push()
 48
 49# Delete a table
 50table.Delete
 51
 52Warnings: 
 53Care should be exercised when using this module on spreadsheets
 54which contain formulas. This module treats all rows as containing text and
 55updating a row will overwrite any formula with the output of the formula. 
 56The intended use case is to allow easy storage of text data in a spreadsheet.
 57
 58  Error: Domain specific extension of Exception.
 59  BadCredentials: Error raised is username or password was incorrect.
 60  CaptchaRequired: Raised if a login attempt failed and a CAPTCHA challenge 
 61      was issued.
 62  DatabaseClient: Communicates with Google Docs APIs servers.
 63  Database: Represents a spreadsheet and interacts with tables.
 64  Table: Represents a worksheet and interacts with records.
 65  RecordResultSet: A list of records in a table.
 66  Record: Represents a row in a worksheet allows manipulation of text data.
 67"""
 68
 69
 70__author__ = 'api.jscudder (Jeffrey Scudder)'
 71
 72
 73class Error(Exception):
 74  pass
 75
 76
 77class BadCredentials(Error):
 78  pass
 79
 80
 81class CaptchaRequired(Error):
 82  pass
 83
 84
 85class DatabaseClient(object):
 86  """Allows creation and finding of Google Spreadsheets databases.
 87
 88  The DatabaseClient simplifies the process of creating and finding Google 
 89  Spreadsheets and will talk to both the Google Spreadsheets API and the 
 90  Google Documents List API. 
 91  """
 92
 93  def __init__(self, username=None, password=None):
 94    """Constructor for a Database Client. 
 95  
 96    If the username and password are present, the constructor  will contact
 97    the Google servers to authenticate.
 98
 99    Args:
100      username: str (optional) Example: jo@example.com
101      password: str (optional)
102    """
103    self.__docs_client = gdata.docs.service.DocsService()
104    self.__spreadsheets_client = (
105        gdata.spreadsheet.service.SpreadsheetsService())
106    self.SetCredentials(username, password)
107
108  def SetCredentials(self, username, password):
109    """Attempts to log in to Google APIs using the provided credentials.
110
111    If the username or password are None, the client will not request auth 
112    tokens.
113
114    Args:
115      username: str (optional) Example: jo@example.com
116      password: str (optional)
117    """
118    self.__docs_client.email = username
119    self.__docs_client.password = password
120    self.__spreadsheets_client.email = username
121    self.__spreadsheets_client.password = password
122    if username and password:
123      try:
124        self.__docs_client.ProgrammaticLogin()
125        self.__spreadsheets_client.ProgrammaticLogin()
126      except gdata.service.CaptchaRequired:
127        raise CaptchaRequired('Please visit https://www.google.com/accounts/'
128                            'DisplayUnlockCaptcha to unlock your account.')
129      except gdata.service.BadAuthentication:
130        raise BadCredentials('Username or password incorrect.')
131    
132  def CreateDatabase(self, name):
133    """Creates a new Google Spreadsheet with the desired name.
134
135    Args:
136      name: str The title for the spreadsheet.
137
138    Returns:
139      A Database instance representing the new spreadsheet. 
140    """
141    # Create a Google Spreadsheet to form the foundation of this database.
142    # Spreadsheet is created by uploading a file to the Google Documents
143    # List API.
144    virtual_csv_file = StringIO.StringIO(',,,')
145    virtual_media_source = gdata.MediaSource(file_handle=virtual_csv_file, content_type='text/csv', content_length=3)
146    db_entry = self.__docs_client.UploadSpreadsheet(virtual_media_source, name)
147    return Database(spreadsheet_entry=db_entry, database_client=self)
148
149  def GetDatabases(self, spreadsheet_key=None, name=None):
150    """Finds spreadsheets which have the unique key or title.
151
152    If querying on the spreadsheet_key there will be at most one result, but
153    searching by name could yield multiple results.
154
155    Args:
156      spreadsheet_key: str The unique key for the spreadsheet, this 
157          usually in the the form 'pk23...We' or 'o23...423.12,,,3'.
158      name: str The title of the spreadsheets.
159
160    Returns:
161      A list of Database objects representing the desired spreadsheets.
162    """
163    if spreadsheet_key:
164      db_entry = self.__docs_client.GetDocumentListEntry(
165          r'/feeds/documents/private/full/spreadsheet%3A' + spreadsheet_key)
166      return [Database(spreadsheet_entry=db_entry, database_client=self)]
167    else:
168      title_query = gdata.docs.service.DocumentQuery()
169      title_query['title'] = name
170      db_feed = self.__docs_client.QueryDocumentListFeed(title_query.ToUri())
171      matching_databases = []
172      for entry in db_feed.entry:
173        matching_databases.append(Database(spreadsheet_entry=entry, 
174                                           database_client=self))
175      return matching_databases
176    
177  def _GetDocsClient(self):
178    return self.__docs_client
179
180  def _GetSpreadsheetsClient(self):
181    return self.__spreadsheets_client
182
183
184class Database(object):
185  """Provides interface to find and create tables.
186
187  The database represents a Google Spreadsheet.
188  """
189
190  def __init__(self, spreadsheet_entry=None, database_client=None):
191    """Constructor for a database object.
192
193    Args:
194      spreadsheet_entry: gdata.docs.DocumentListEntry The 
195          Atom entry which represents the Google Spreadsheet. The
196          spreadsheet's key is extracted from the entry and stored as a 
197          member.
198      database_client: DatabaseClient A client which can talk to the
199          Google Spreadsheets servers to perform operations on worksheets
200          within this spreadsheet.
201    """
202    self.entry = spreadsheet_entry
203    if self.entry:
204      id_parts = spreadsheet_entry.id.text.split('/')
205      self.spreadsheet_key = id_parts[-1].replace('spreadsheet%3A', '')
206    self.client = database_client
207
208  def CreateTable(self, name, fields=None):
209    """Add a new worksheet to this spreadsheet and fill in column names.
210
211    Args:
212      name: str The title of the new worksheet.
213      fields: list of strings The column names which are placed in the
214          first row of this worksheet. These names are converted into XML
215          tags by the server. To avoid changes during the translation
216          process I recommend using all lowercase alphabetic names. For
217          example ['somelongname', 'theothername']
218
219    Returns:
220      Table representing the newly created worksheet.
221    """
222    worksheet = self.client._GetSpreadsheetsClient().AddWorksheet(title=name,
223        row_count=1, col_count=len(fields), key=self.spreadsheet_key)
224    return Table(name=name, worksheet_entry=worksheet, 
225        database_client=self.client, 
226        spreadsheet_key=self.spreadsheet_key, fields=fields)
227
228  def GetTables(self, worksheet_id=None, name=None):
229    """Searches for a worksheet with the specified ID or name.
230
231    The list of results should have one table at most, or no results
232    if the id or name were not found.
233
234    Args:
235      worksheet_id: str The ID of the worksheet, example: 'od6'
236      name: str The title of the worksheet.
237
238    Returns:
239      A list of length 0 or 1 containing the desired Table. A list is returned
240      to make this method feel like GetDatabases and GetRecords.
241    """
242    if worksheet_id:
243      worksheet_entry = self.client._GetSpreadsheetsClient().GetWorksheetsFeed(
244          self.spreadsheet_key, wksht_id=worksheet_id)
245      return [Table(name=worksheet_entry.title.text, 
246          worksheet_entry=worksheet_entry, database_client=self.client, 
247          spreadsheet_key=self.spreadsheet_key)]
248    else:
249      matching_tables = []
250      query = None
251      if name:
252        query = gdata.spreadsheet.service.DocumentQuery()
253        query.title = name
254 
255      worksheet_feed = self.client._GetSpreadsheetsClient().GetWorksheetsFeed(
256          self.spreadsheet_key, query=query)
257      for entry in worksheet_feed.entry:
258        matching_tables.append(Table(name=entry.title.text, 
259            worksheet_entry=entry, database_client=self.client, 
260            spreadsheet_key=self.spreadsheet_key))
261      return matching_tables
262
263  def Delete(self):
264    """Deletes the entire database spreadsheet from Google Spreadsheets."""
265    entry = self.client._GetDocsClient().Get(
266        r'http://docs.google.com/feeds/documents/private/full/spreadsheet%3A' +
267        self.spreadsheet_key)
268    self.client._GetDocsClient().Delete(entry.GetEditLink().href)
269
270
271class Table(object):
272
273  def __init__(self, name=None, worksheet_entry=None, database_client=None, 
274      spreadsheet_key=None, fields=None):
275    self.name = name
276    self.entry = worksheet_entry
277    id_parts = worksheet_entry.id.text.split('/')
278    self.worksheet_id = id_parts[-1]
279    self.spreadsheet_key = spreadsheet_key
280    self.client = database_client
281    self.fields = fields or []
282    if fields:
283      self.SetFields(fields)
284
285  def LookupFields(self):
286    """Queries to find the column names in the first row of the worksheet.
287    
288    Useful when you have retrieved the table from the server and you don't 
289    know the column names.
290    """
291    if self.entry:
292      first_row_contents = []
293      query = gdata.spreadsheet.service.CellQuery()
294      query.max_row = '1'
295      query.min_row = '1'
296      feed = self.client._GetSpreadsheetsClient().GetCellsFeed(
297          self.spreadsheet_key, wksht_id=self.worksheet_id, query=query)
298      for entry in feed.entry:
299        first_row_contents.append(entry.content.text)
300      # Get the next set of cells if needed.
301      next_link = feed.GetNextLink()
302      while next_link:
303        feed = self.client._GetSpreadsheetsClient().Get(next_link.href, 
304            converter=gdata.spreadsheet.SpreadsheetsCellsFeedFromString)
305        for entry in feed.entry:
306          first_row_contents.append(entry.content.text)
307        next_link = feed.GetNextLink()
308      # Convert the contents of the cells to valid headers.
309      self.fields = ConvertStringsToColumnHeaders(first_row_contents)
310    
311  def SetFields(self, fields):
312    """Changes the contents of the cells in the first row of this worksheet.
313
314    Args:
315      fields: list of strings The names in the list comprise the
316          first row of the worksheet. These names are converted into XML
317          tags by the server. To avoid changes during the translation
318          process I recommend using all lowercase alphabetic names. For
319          example ['somelongname', 'theothername']
320    """
321    # TODO: If the table already had fields, we might want to clear out the,
322    # current column headers.
323    self.fields = fields
324    i = 0
325    for column_name in fields:
326      i = i + 1
327      # TODO: speed this up by using a batch request to update cells.
328      self.client._GetSpreadsheetsClient().UpdateCell(1, i, column_name, 
329          self.spreadsheet_key, self.worksheet_id)
330
331  def Delete(self):
332    """Deletes this worksheet from the spreadsheet."""
333    worksheet = self.client._GetSpreadsheetsClient().GetWorksheetsFeed(
334        self.spreadsheet_key, wksht_id=self.worksheet_id)
335    self.client._GetSpreadsheetsClient().DeleteWorksheet(
336        worksheet_entry=worksheet)
337
338  def AddRecord(self, data):
339    """Adds a new row to this worksheet.
340
341    Args:
342      data: dict of strings Mapping of string values to column names. 
343
344    Returns:
345      Record which represents this row of the spreadsheet.
346    """
347    new_row = self.client._GetSpreadsheetsClient().InsertRow(data, 
348        self.spreadsheet_key, wksht_id=self.worksheet_id)
349    return Record(content=data, row_entry=new_row, 
350        spreadsheet_key=self.spreadsheet_key, worksheet_id=self.worksheet_id,
351        database_client=self.client)
352
353  def GetRecord(self, row_id=None, row_number=None):
354    """Gets a single record from the worksheet based on row ID or number.
355    
356    Args:
357      row_id: The ID for the individual row.
358      row_number: str or int The position of the desired row. Numbering 
359          begins at 1, which refers to the second row in the worksheet since
360          the first row is used for column names.
361
362    Returns:
363      Record for the desired row.
364    """
365    if row_id:
366      row_entry = self.client._GetSpreadsheetsClient().GetListFeed(
367          self.spreadsheet_key, wksht_id=self.worksheet_id, row_id=row_id)
368      return Record(content=None, row_entry=row_entry, 
369           spreadsheet_key=self.spreadsheet_key, 
370           worksheet_id=self.worksheet_id, database_client=self.client)
371    else:
372      row_query = gdata.spreadsheet.service.ListQuery()
373      row_query.start_index = str(row_number)
374      row_query.max_results = '1'
375      row_feed = self.client._GetSpreadsheetsClient().GetListFeed(
376          self.spreadsheet_key, wksht_id=self.worksheet_id, query=row_query)
377      if len(row_feed.entry) >= 1:
378        return Record(content=None, row_entry=row_feed.entry[0],
379            spreadsheet_key=self.spreadsheet_key,
380            worksheet_id=self.worksheet_id, database_client=self.client)
381      else:
382        return None
383
384  def GetRecords(self, start_row, end_row):
385    """Gets all rows between the start and end row numbers inclusive.
386
387    Args:
388      start_row: str or int
389      end_row: str or int
390
391    Returns:
392      RecordResultSet for the desired rows.
393    """
394    start_row = int(start_row)
395    end_row = int(end_row)
396    max_rows = end_row - start_row + 1
397    row_query = gdata.spreadsheet.service.ListQuery()
398    row_query.start_index = str(start_row)
399    row_query.max_results = str(max_rows)
400    rows_feed = self.client._GetSpreadsheetsClient().GetListFeed(
401        self.spreadsheet_key, wksht_id=self.worksheet_id, query=row_query)
402    return RecordResultSet(rows_feed, self.client, self.spreadsheet_key,
403        self.worksheet_id)
404
405  def FindRecords(self, query_string):
406    """Performs a query against the worksheet to find rows which match.
407
408    For details on query string syntax see the section on sq under
409    http://code.google.com/apis/spreadsheets/reference.html#list_Parameters
410
411    Args:
412      query_string: str Examples: 'name == john' to find all rows with john
413          in the name column, '(cost < 19.50 and name != toy) or cost > 500'
414
415    Returns:
416      RecordResultSet with the first group of matches.
417    """
418    row_query = gdata.spreadsheet.service.ListQuery()
419    row_query.sq = query_string
420    matching_feed = self.client._GetSpreadsheetsClient().GetListFeed(
421        self.spreadsheet_key, wksht_id=self.worksheet_id, query=row_query)
422    return RecordResultSet(matching_feed, self.client, 
423        self.spreadsheet_key, self.worksheet_id)
424
425
426class RecordResultSet(list):
427  """A collection of rows which allows fetching of the next set of results.
428
429  The server may not send all rows in the requested range because there are
430  too many. Using this result set you can access the first set of results
431  as if it is a list, then get the next batch (if there are more results) by
432  calling GetNext().
433  """
434
435  def __init__(self, feed, client, spreadsheet_key, worksheet_id):
436    self.client = client
437    self.spreadsheet_key = spreadsheet_key
438    self.worksheet_id = worksheet_id
439    self.feed = feed
440    list(self)
441    for entry in self.feed.entry:
442      self.append(Record(content=None, row_entry=entry, 
443          spreadsheet_key=spreadsheet_key, worksheet_id=worksheet_id,
444          database_client=client))
445
446  def GetNext(self):
447    """Fetches the next batch of rows in the result set.
448
449    Returns:
450      A new RecordResultSet.
451    """
452    next_link = self.feed.GetNextLink()
453    if next_link and next_link.href:
454      new_feed = self.client._GetSpreadsheetsClient().Get(next_link.href, 
455          converter=gdata.spreadsheet.SpreadsheetsListFeedFromString)
456      return RecordResultSet(new_feed, self.client, self.spreadsheet_key,
457          self.worksheet_id)
458
459
460class Record(object):
461  """Represents one row in a worksheet and provides a dictionary of values.
462
463  Attributes:
464    custom: dict Represents the contents of the row with cell values mapped
465        to column headers.
466  """
467
468  def __init__(self, content=None, row_entry=None, spreadsheet_key=None, 
469       worksheet_id=None, database_client=None):
470    """Constructor for a record.
471    
472    Args:
473      content: dict of strings Mapping of string values to column names.
474      row_entry: gdata.spreadsheet.SpreadsheetsList The Atom entry 
475          representing this row in the worksheet.
476      spreadsheet_key: str The ID of the spreadsheet in which this row 
477          belongs.
478      worksheet_id: str The ID of the worksheet in which this row belongs.
479      database_client: DatabaseClient The client which can be used to talk
480          the Google Spreadsheets server to edit this row.
481    """
482    self.entry = row_entry
483    self.spreadsheet_key = spreadsheet_key
484    self.worksheet_id = worksheet_id
485    if row_entry:
486      self.row_id = row_entry.id.text.split('/')[-1]
487    else:
488      self.row_id = None
489    self.client = database_client
490    self.content = content or {}
491    if not content:
492      self.ExtractContentFromEntry(row_entry)
493
494  def ExtractContentFromEntry(self, entry):
495    """Populates the content and row_id based on content of the entry.
496
497    This method is used in the Record's contructor.
498
499    Args:
500      entry: gdata.spreadsheet.SpreadsheetsList The Atom entry 
501          representing this row in the worksheet.
502    """
503    self.content = {}
504    if entry:
505      self.row_id = entry.id.text.split('/')[-1]
506      for label, custom in entry.custom.iteritems():
507        self.content[label] = custom.text
508
509  def Push(self):
510    """Send the content of the record to spreadsheets to edit the row.
511
512    All items in the content dictionary will be sent. Items which have been
513    removed from the content may remain in the row. The content member
514    of the record will not be modified so additional fields in the row
515    might be absent from this local copy.
516    """
517    self.entry = self.client._GetSpreadsheetsClient().UpdateRow(self.entry, self.content)
518
519  def Pull(self):
520    """Query Google Spreadsheets to get the latest data from the server.
521
522    Fetches the entry for this row and repopulates the content dictionary 
523    with the data found in the row.
524    """
525    if self.row_id:
526      self.entry = self.client._GetSpreadsheetsClient().GetListFeed(
527          self.spreadsheet_key, wksht_id=self.worksheet_id, row_id=self.row_id)
528    self.ExtractContentFromEntry(self.entry)
529
530  def Delete(self):
531    self.client._GetSpreadsheetsClient().DeleteRow(self.entry)
532
533
534def ConvertStringsToColumnHeaders(proposed_headers):
535  """Converts a list of strings to column names which spreadsheets accepts.
536
537  When setting values in a record, the keys which represent column names must
538  fit certain rules. They are all lower case, contain no spaces or special
539  characters. If two columns have the same name after being sanitized, the 
540  columns further to the right have _2, _3 _4, etc. appended to them.
541
542  If there are column names which consist of all special characters, or if
543  the column header is blank, an obfuscated value will be used for a column
544  name. This method does not handle blank column names or column names with
545  only special characters.
546  """
547  headers = []
548  for input_string in proposed_headers:
549    # TODO: probably a more efficient way to do this. Perhaps regex.
550    sanitized = input_string.lower().replace('_', '').replace(
551        ':', '').replace(' ', '')
552    # When the same sanitized header appears multiple times in the first row
553    # of a spreadsheet, _n is appended to the name to make it unique.
554    header_count = headers.count(sanitized)
555    if header_count > 0:
556      headers.append('%s_%i' % (sanitized, header_count+1))
557    else:
558      headers.append(sanitized)
559  return headers