gdata_lib.py - Copyright (c) 2011 The Chromium OS Authors. …

/lib/gdata_lib.py

https://gitlab.com/github-cloud-corporation/chromite · Python · 680 lines · 537 code · 79 blank · 64 comment · 52 complexity · 3df56eb2ac240c347647008115e73265 MD5 · raw file

#!/usr/bin/python

# Copyright (c) 2011 The Chromium OS Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""Library for interacting with gdata (i.e. Google Docs, Tracker, etc)."""

import functools
import getpass
import os
import pickle
import re
import urllib
import xml.dom.minidom

# pylint: disable=W0404
import gdata.projecthosting.client
import gdata.service
import gdata.spreadsheet.service

from chromite.lib import operation

# pylint: disable=W0201,E0203

TOKEN_FILE = os.path.join(os.environ['HOME'], '.gdata_token')
CRED_FILE = os.path.join(os.environ['HOME'], '.gdata_cred.txt')

oper = operation.Operation('gdata_lib')

_BAD_COL_CHARS_REGEX = re.compile(r'[ /]')
def PrepColNameForSS(col):
  """Translate a column name for spreadsheet interface."""
  # Spreadsheet interface requires column names to be
  # all lowercase and with no spaces or other special characters.
  return _BAD_COL_CHARS_REGEX.sub('', col.lower())


# TODO(mtennant): Rename PrepRowValuesForSS
def PrepRowForSS(row):
  """Make sure spreadsheet handles all values in row as strings."""
  return dict((key, PrepValForSS(val)) for key, val in row.items())


# Regex to detect values that the spreadsheet will auto-format as numbers.
_NUM_REGEX = re.compile(r'^[\d\.]+$')
def PrepValForSS(val):
  """Make sure spreadsheet handles this value as a string."""
  if val and _NUM_REGEX.match(val):
    return "'" + val
  return val


def ScrubValFromSS(val):
  """Remove string indicator prefix if found."""
  if val and val[0] == "'":
    return val[1:]
  return val


class Creds(object):
  """Class to manage user/password credentials."""

  __slots__ = (
    'docs_auth_token',    # Docs Client auth token string
    'creds_dirty',        # True if user/password set and not, yet, saved
    'password',           # User password
    'token_dirty',        # True if auth token(s) set and not, yet, saved
    'tracker_auth_token', # Tracker Client auth token string
    'user',               # User account (foo@chromium.org)
    )

  SAVED_TOKEN_ATTRS = ('docs_auth_token', 'tracker_auth_token', 'user')

  def __init__(self):
    self.user = None
    self.password = None

    self.docs_auth_token = None
    self.tracker_auth_token = None

    self.token_dirty = False
    self.creds_dirty = False

  def SetDocsAuthToken(self, auth_token):
    """Set the Docs auth_token string."""
    self.docs_auth_token = auth_token
    self.token_dirty = True

  def SetTrackerAuthToken(self, auth_token):
    """Set the Tracker auth_token string."""
    self.tracker_auth_token = auth_token
    self.token_dirty = True

  def LoadAuthToken(self, filepath):
    """Load previously saved auth token(s) from |filepath|.

    This first clears both docs_auth_token and tracker_auth_token.
    """
    self.docs_auth_token = None
    self.tracker_auth_token = None
    try:
      f = open(filepath, 'r')
      obj = pickle.load(f)
      f.close()
      if obj.has_key('auth_token'):
        # Backwards compatability.  Default 'auth_token' is what
        # docs_auth_token used to be saved as.
        self.docs_auth_token = obj['auth_token']
        self.token_dirty = True
      for attr in self.SAVED_TOKEN_ATTRS:
        if obj.has_key(attr):
          setattr(self, attr, obj[attr])
      oper.Notice('Loaded Docs/Tracker auth token(s) from "%s"' % filepath)
    except IOError:
      oper.Error('Unable to load auth token file at "%s"' % filepath)

  def StoreAuthTokenIfNeeded(self, filepath):
    """Store auth token(s) to |filepath| if anything changed."""
    if self.token_dirty:
      self.StoreAuthToken(filepath)

  def StoreAuthToken(self, filepath):
    """Store auth token(s) to |filepath|."""
    obj = {}

    for attr in self.SAVED_TOKEN_ATTRS:
      val = getattr(self, attr)
      if val:
        obj[attr] = val

    try:
      oper.Notice('Storing Docs and/or Tracker auth token to "%s"' % filepath)
      f = open(filepath, 'w')
      pickle.dump(obj, f)
      f.close()

      self.token_dirty = False
    except IOError:
      oper.Error('Unable to store auth token to file at "%s"' % filepath)

  def SetCreds(self, user, password=None):
    if not user.endswith('@chromium.org'):
      user = '%s@chromium.org' % user

    if not password:
      password = getpass.getpass('Tracker password for %s:' % user)

    self.user = user
    self.password = password
    self.creds_dirty = True

  def LoadCreds(self, filepath):
    """Load email/password credentials from |filepath|."""
    # Read email from first line and password from second.

    with open(filepath, 'r') as f:
      (self.user, self.password) = (l.strip() for l in f.readlines())
    oper.Notice('Loaded Docs/Tracker login credentials from "%s"' % filepath)

  def StoreCredsIfNeeded(self, filepath):
    """Store email/password credentials to |filepath| if anything changed."""
    if self.creds_dirty:
      self.StoreCreds(filepath)

  def StoreCreds(self, filepath):
    """Store email/password credentials to |filepath|."""
    oper.Notice('Storing Docs/Tracker login credentials to "%s"' % filepath)
    # Simply write email on first line and password on second.
    with open(filepath, 'w') as f:
      f.write(self.user + '\n')
      f.write(self.password + '\n')

    self.creds_dirty = False


class IssueComment(object):
  """Represent a Tracker issue comment."""

  __slots__ = ['title', 'text']

  def __init__(self, title, text):
    self.title = title
    self.text = text

  def __str__(self):
    text = '<no comment>'
    if self.text:
      text = '\n  '.join(self.text.split('\n'))
    return '%s:\n  %s' % (self.title, text)


class Issue(object):
  """Represents one Tracker Issue."""

  SlotDefaults = {
    'comments': [], # List of IssueComment objects
    'id': 0,        # Issue id number (int)
    'labels': [],   # List of text labels
    'owner': None,  # Current owner (text, chromium.org account)
    'status': None, # Current issue status (text) (e.g. Assigned)
    'summary': None,# Issue summary (first comment)
    'title': None,  # Title text
    }

  __slots__ = SlotDefaults.keys()

  def __init__(self, **kwargs):
    """Init for one Issue object.

    |kwargs| - key/value arguments to give initial values to
    any additional attributes on |self|.
    """
    # Use SlotDefaults overwritten by kwargs for starting slot values.
    slotvals = self.SlotDefaults.copy()
    slotvals.update(kwargs)
    for slot in self.__slots__:
      setattr(self, slot, slotvals.pop(slot))
    if slotvals:
      raise ValueError('I do not know what to do with %r' % slotvals)

  def __str__(self):
    """Pretty print of issue."""
    lines = ['Issue %d - %s' % (self.id, self.title),
             'Status: %s, Owner: %s' % (self.status, self.owner),
             'Labels: %s' % ', '.join(self.labels),
             ]

    if self.summary:
      lines.append('Summary: %s' % self.summary)

    if self.comments:
      lines.extend(self.comments)

    return '\n'.join(lines)

  def InitFromTracker(self, t_issue, project_name):
    """Initialize |self| from tracker issue |t_issue|"""

    self.id = int(t_issue.id.text.split('/')[-1])
    self.labels = [label.text for label in t_issue.label]
    if t_issue.owner:
      self.owner = t_issue.owner.username.text
    self.status = t_issue.status.text
    self.summary = t_issue.content.text
    self.title = t_issue.title.text
    self.comments = self.GetTrackerIssueComments(self.id, project_name)

  def GetTrackerIssueComments(self, issue_id, project_name):
    """Retrieve comments for |issue_id| from comments URL"""
    comments = []

    feeds = 'http://code.google.com/feeds'
    url = '%s/issues/p/%s/issues/%d/comments/full' % (feeds, project_name,
                                                      issue_id)
    doc = xml.dom.minidom.parse(urllib.urlopen(url))
    entries = doc.getElementsByTagName('entry')
    for entry in entries:
      title_text_list = []
      for key in ('title', 'content'):
        child = entry.getElementsByTagName(key)[0].firstChild
        title_text_list.append(child.nodeValue if child else None)
      comments.append(IssueComment(*title_text_list))

    return comments

  def __eq__(self, other):
    return (self.id == other.id and self.labels == other.labels and
            self.owner == other.owner and self.status == other.status and
            self.summary == other.summary and self.title == other.title)

  def __ne__(self, other):
    return not self == other

class TrackerError(RuntimeError):
  """Error class for tracker communication errors."""


class TrackerInvalidUserError(TrackerError):
  """Error class for when user not recognized by Tracker."""


class TrackerComm(object):
  """Class to manage communication with Tracker."""

  __slots__ = (
    'author',       # Author when creating/editing Tracker issues
    'it_client',    # Issue Tracker client
    'project_name', # Tracker project name
    )

  def __init__(self):
    self.author = None
    self.it_client = None
    self.project_name = None

  def Connect(self, creds, project_name, source='chromiumos'):
    self.project_name = project_name

    it_client = gdata.projecthosting.client.ProjectHostingClient()
    it_client.source = source

    if creds.tracker_auth_token:
      oper.Notice('Logging into Tracker using previous auth token.')
      it_client.auth_token = gdata.gauth.ClientLoginToken(
        creds.tracker_auth_token)
    else:
      oper.Notice('Logging into Tracker as "%s".' % creds.user)
      it_client.ClientLogin(creds.user, creds.password,
                            source=source, service='code',
                            account_type='GOOGLE')
      creds.SetTrackerAuthToken(it_client.auth_token.token_string)

    self.author = creds.user
    self.it_client = it_client

  def _QueryTracker(self, query):
    """Query the tracker for a list of issues. Return |None| on failure."""
    try:
      return self.it_client.get_issues(self.project_name, query=query)
    except gdata.client.RequestError:
      return None

  def _CreateIssue(self, t_issue):
    """Create an Issue from a Tracker Issue."""
    issue = Issue()
    issue.InitFromTracker(t_issue, self.project_name)
    return issue

  # TODO(mtennant): This method works today, but is not being actively used.
  # Leaving it in, because a logical use of the method is for to verify
  # that a Tracker issue in the package spreadsheet is open, and to add
  # comments to it when new upstream versions become available.
  def GetTrackerIssueById(self, tid):
    """Get tracker issue given |tid| number.  Return Issue object if found."""

    query = gdata.projecthosting.client.Query(issue_id=str(tid))
    feed = self._QueryTracker(query)

    if feed.entry:
      return self._CreateIssue(feed.entry[0])
    return None

  def GetTrackerIssuesByText(self, search_text, full_text=True,
                             only_open=True):
    """Find all Tracker Issues that contain the text search_text."""
    if not full_text:
      search_text = 'summary:"%s"' % search_text
    if only_open:
      search_text += ' is:open'
    query = gdata.projecthosting.client.Query(text_query=search_text)
    feed = self._QueryTracker(query)
    if feed:
      return [self._CreateIssue(tissue) for tissue in feed.entry]
    else:
      return []

  def CreateTrackerIssue(self, issue):
    """Create a new issue in Tracker according to |issue|."""
    try:
      created = self.it_client.add_issue(project_name=self.project_name,
                                         title=issue.title,
                                         content=issue.summary,
                                         author=self.author,
                                         status=issue.status,
                                         owner=issue.owner,
                                         labels=issue.labels)
      issue.id = int(created.id.text.split('/')[-1])
      return issue.id
    except gdata.client.RequestError as ex:
      if ex.body and ex.body.lower() == 'user not found':
        raise TrackerInvalidUserError('Tracker user %s not found' % issue.owner)
      raise

  def AppendTrackerIssueById(self, issue_id, comment):
    """Append |comment| to issue |issue_id| in Tracker"""
    self.it_client.update_issue(project_name=self.project_name,
                                issue_id=issue_id,
                                author=self.author,
                                comment=comment)
    return issue_id


class SpreadsheetRow(dict):
  """Minor semi-immutable extension of dict to keep the original spreadsheet
  row object and spreadsheet row number as attributes.

  No changes are made to equality checking or anything else, so client code
  that wishes to handle this as a pure dict can.
  """

  def __init__(self, ss_row_obj, ss_row_num, mapping=None):
    if mapping:
      dict.__init__(self, mapping)

    self.ss_row_obj = ss_row_obj
    self.ss_row_num = ss_row_num

  def __setitem__(self, key, val):
    raise TypeError('setting item in SpreadsheetRow not supported')

  def __delitem__(self, key):
    raise TypeError('deleting item in SpreadsheetRow not supported')


class SpreadsheetError(RuntimeError):
  """Error class for spreadsheet communication errors."""

def ReadWriteDecorator(func):
  """Raise SpreadsheetError if appropriate."""
  def f(self, *args, **kwargs):
    try:
      return func(self, *args, **kwargs)
    except gdata.service.RequestError as ex:
      raise SpreadsheetError(str(ex))

  f.__name__ = func.__name__
  return f

class SpreadsheetComm(object):
  """Class to manage communication with one Google Spreadsheet worksheet."""

  # Row numbering in spreadsheets effectively starts at 2 because row 1
  # has the column headers.
  ROW_NUMBER_OFFSET = 2

  # Spreadsheet column numbers start at 1.
  COLUMN_NUMBER_OFFSET = 1

  __slots__ = (
    '_columns',    # Tuple of translated column names, filled in as needed
    '_rows',       # Tuple of Row dicts in order, filled in as needed
    'gd_client',   # Google Data client
    'ss_key',      # Spreadsheet key
    'ws_name',     # Worksheet name
    'ws_key',      # Worksheet key
    )

  @property
  def columns(self):
    """The columns property is filled in on demand.

    It is a tuple of column names, each run through PrepColNameForSS.
    """
    if self._columns is None:
      query = gdata.spreadsheet.service.CellQuery()
      query['max-row'] = '1'
      feed = self.gd_client.GetCellsFeed(self.ss_key, self.ws_key, query=query)

      # The use of PrepColNameForSS here looks weird, but the values
      # in row 1 are the unaltered column names, rather than the restricted
      # column names used for interface purposes.  In other words, if the
      # spreadsheet looks like it has a column called "Foo Bar", then the
      # first row will have a value "Foo Bar" but all interaction with that
      # column for other rows will use column key "foobar".  Translate to
      # restricted names now with PrepColNameForSS.
      cols = [PrepColNameForSS(entry.content.text) for entry in feed.entry]

      self._columns = tuple(cols)

    return self._columns

  @property
  def rows(self):
    """The rows property is filled in on demand.

    It is a tuple of SpreadsheetRow objects.
    """
    if self._rows is None:
      rows = []

      feed = self.gd_client.GetListFeed(self.ss_key, self.ws_key)
      for rowIx, rowObj in enumerate(feed.entry, start=self.ROW_NUMBER_OFFSET):
        row_dict = dict((key, ScrubValFromSS(val.text))
                        for key, val in rowObj.custom.iteritems())
        rows.append(SpreadsheetRow(rowObj, rowIx, row_dict))

      self._rows = tuple(rows)

    return self._rows

  def __init__(self):
    for slot in self.__slots__:
      setattr(self, slot, None)

  def Connect(self, creds, ss_key, ws_name, source='chromiumos'):
    """Login to spreadsheet service and set current worksheet.

    |creds| Credentials object for Google Docs
    |ss_key| Spreadsheet key
    |ws_name| Worksheet name
    |source| Name to associate with connecting service
    """
    self._Login(creds, source)
    self.SetCurrentWorksheet(ws_name, ss_key=ss_key)

  def SetCurrentWorksheet(self, ws_name, ss_key=None):
    """Change the current worksheet.  This clears all caches."""
    if ss_key and ss_key != self.ss_key:
      self.ss_key = ss_key
      self._ClearCache()

    self.ws_name = ws_name

    ws_key = self._GetWorksheetKey(self.ss_key, self.ws_name)
    if ws_key != self.ws_key:
      self.ws_key = ws_key
      self._ClearCache()

  def _ClearCache(self, keep_columns=False):
    """Called whenever column/row data might be stale."""
    self._rows = None
    if not keep_columns:
      self._columns = None

  def _Login(self, creds, source):
    """Login to Google doc client using given |creds|."""
    gd_client = RetrySpreadsheetsService()
    gd_client.source = source

    # Login using previous auth token if available, otherwise
    # use email/password from creds.
    if creds.docs_auth_token:
      oper.Notice('Logging into Docs using previous auth token.')
      gd_client.SetClientLoginToken(creds.docs_auth_token)
    else:
      oper.Notice('Logging into Docs as "%s".' % creds.user)
      gd_client.email = creds.user
      gd_client.password = creds.password
      gd_client.ProgrammaticLogin()
      creds.SetDocsAuthToken(gd_client.GetClientLoginToken())

    self.gd_client = gd_client

  def _GetWorksheetKey(self, ss_key, ws_name):
    """Get the worksheet key with name |ws_name| in spreadsheet |ss_key|."""
    feed = self.gd_client.GetWorksheetsFeed(ss_key)
    # The worksheet key is the last component in the URL (after last '/')
    for entry in feed.entry:
      if ws_name == entry.title.text:
        return entry.id.text.split('/')[-1]

    oper.Die('Unable to find worksheet "%s" in spreadsheet "%s"' %
             (ws_name, ss_key))

  @ReadWriteDecorator
  def GetColumns(self):
    """Return tuple of column names in worksheet.

    Note that each returned name has been run through PrepColNameForSS.
    """
    return self.columns

  @ReadWriteDecorator
  def GetColumnIndex(self, colName):
    """Get the column index (starting at 1) for column |colName|"""
    try:
      # Spreadsheet column indices start at 1, so +1.
      return self.columns.index(colName) + self.COLUMN_NUMBER_OFFSET
    except ValueError:
      return None

  @ReadWriteDecorator
  def GetRows(self):
    """Return tuple of SpreadsheetRow objects in order."""
    return self.rows

  @ReadWriteDecorator
  def GetRowCacheByCol(self, column):
    """Return a dict for looking up rows by value in |column|.

    Each row value is a SpreadsheetRow object.
    If more than one row has the same value for |column|, then the
    row objects will be in a list in the returned dict.
    """
    row_cache = {}

    for row in self.GetRows():
      col_val = row[column]

      current_entry = row_cache.get(col_val, None)
      if current_entry and type(current_entry) is list:
        current_entry.append(row)
      elif current_entry:
        current_entry = [current_entry, row]
      else:
        current_entry = row

      row_cache[col_val] = current_entry

    return row_cache

  @ReadWriteDecorator
  def InsertRow(self, row):
    """Insert |row| at end of spreadsheet."""
    self.gd_client.InsertRow(row, self.ss_key, self.ws_key)
    self._ClearCache(keep_columns=True)

  @ReadWriteDecorator
  def UpdateRowCellByCell(self, rowIx, row):
    """Replace cell values in row at |rowIx| with those in |row| dict."""
    for colName in row:
      colIx = self.GetColumnIndex(colName)
      if colIx is not None:
        self.ReplaceCellValue(rowIx, colIx, row[colName])
    self._ClearCache(keep_columns=True)

  @ReadWriteDecorator
  def DeleteRow(self, ss_row):
    """Delete the given |ss_row| (must be original spreadsheet row object."""
    self.gd_client.DeleteRow(ss_row)
    self._ClearCache(keep_columns=True)

  @ReadWriteDecorator
  def ReplaceCellValue(self, rowIx, colIx, val):
    """Replace cell value at |rowIx| and |colIx| with |val|"""
    self.gd_client.UpdateCell(rowIx, colIx, val, self.ss_key, self.ws_key)
    self._ClearCache(keep_columns=True)

  @ReadWriteDecorator
  def ClearCellValue(self, rowIx, colIx):
    """Clear cell value at |rowIx| and |colIx|"""
    self.ReplaceCellValue(rowIx, colIx, None)


class RetrySpreadsheetsService(gdata.spreadsheet.service.SpreadsheetsService):
  """Extend SpreadsheetsService to put retry logic around http request method.

  The entire purpose of this class is to remove some flakiness from
  interactions with Google Docs spreadsheet service, in the form of
  certain 40* http error responses to http requests.  This is documented in
  http://code.google.com/p/chromium-os/issues/detail?id=23819.
  There are two "request" methods that need to be wrapped in retry logic.
  1) The request method on self.  Original implementation is in
     base class atom.service.AtomService.
  2) The request method on self.http_client.  The class of self.http_client
     can actually vary, so the original implementation of the request
     method can also vary.
  """
  # pylint: disable=R0904

  TRY_MAX = 5
  RETRYABLE_STATUSES = (403,)

  def __init__(self, *args, **kwargs):
    gdata.spreadsheet.service.SpreadsheetsService.__init__(self, *args,
                                                           **kwargs)

    # Wrap self.http_client.request with retry wrapper.  This request method
    # is used by ProgrammaticLogin(), at least.
    if hasattr(self, 'http_client'):
      self.http_client.request = functools.partial(self._RetryRequest,
                                                   self.http_client.request)

    self.request = functools.partial(self._RetryRequest, self.request)

  def _RetryRequest(self, func, *args, **kwargs):
    """Retry wrapper for bound |func|, passing |args| and |kwargs|.

    This retry wrapper can be used for any http request |func| that provides
    an http status code via the .status attribute of the returned value.

    Retry when the status value on the return object is in RETRYABLE_STATUSES,
    and run up to TRY_MAX times.  If successful (whether or not retries
    were necessary) return the last return value returned from base method.
    If unsuccessful return the first return value returned from base method.
    """
    first_retval = None
    for try_ix in xrange(1, self.TRY_MAX + 1):
      retval = func(*args, **kwargs)
      if retval.status not in self.RETRYABLE_STATUSES:
        return retval
      else:
        oper.Warning('Retry-able HTTP request failure (status=%d), try %d/%d' %
                     (retval.status, try_ix, self.TRY_MAX))
        if not first_retval:
          first_retval = retval

    oper.Warning('Giving up on HTTP request after %d tries' % self.TRY_MAX)
    return first_retval
Tech Fingerprint

Alerts (15)

'open(' Use 'with open()' to ensure Files are properly closed
103 134 256
'def' Ensure functions have docstrings for documentation
142 297 344
Complexity hotspot; lines 268 to 270 (total complexity: 3)
268 269 270
Complexity hotspot; lines 353 to 355 (total complexity: 3)
353 354 355
Complexity hotspot; lines 370 to 371 (total complexity: 3)
370 371
'type(' Use isinstance() for type checking instead of type()
582