PageRenderTime 487ms CodeModel.GetById 81ms app.highlight 323ms RepoModel.GetById 38ms app.codeStats 2ms

/upload-diffs.py

http://googlecl.googlecode.com/
Python | 1784 lines | 1695 code | 17 blank | 72 comment | 45 complexity | 1589180e0197a5f44945f980888474be MD5 | raw file
   1#!/usr/bin/env python
   2#
   3# Copyright 2007 Google Inc.
   4#
   5# Licensed under the Apache License, Version 2.0 (the "License");
   6# you may not use this file except in compliance with the License.
   7# You may obtain a copy of the License at
   8#
   9#     http://www.apache.org/licenses/LICENSE-2.0
  10#
  11# Unless required by applicable law or agreed to in writing, software
  12# distributed under the License is distributed on an "AS IS" BASIS,
  13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14# See the License for the specific language governing permissions and
  15# limitations under the License.
  16
  17"""Tool for uploading diffs from a version control system to the codereview app.
  18
  19Usage summary: upload.py [options] [-- diff_options] [path...]
  20
  21Diff options are passed to the diff command of the underlying system.
  22
  23Supported version control systems:
  24  Git
  25  Mercurial
  26  Subversion
  27
  28It is important for Git/Mercurial users to specify a tree/node/branch to diff
  29against by using the '--rev' option.
  30"""
  31# Taken from rietveld trunk (http://code.google.com/p/rietveld), r579
  32
  33# This code is derived from appcfg.py in the App Engine SDK (open source),
  34# and from ASPN recipe #146306.
  35
  36import ConfigParser
  37import cookielib
  38import fnmatch
  39import getpass
  40import logging
  41import mimetypes
  42import optparse
  43import os
  44import re
  45import socket
  46import subprocess
  47import sys
  48import urllib
  49import urllib2
  50import urlparse
  51
  52# The md5 module was deprecated in Python 2.5.
  53try:
  54  from hashlib import md5
  55except ImportError:
  56  from md5 import md5
  57
  58try:
  59  import readline
  60except ImportError:
  61  pass
  62
  63try:
  64  import keyring
  65except ImportError:
  66  keyring = None
  67
  68# Constants for GoogleCL.
  69DEFAULT_REVIEWER = 'tom.h.miller@gmail.com'
  70DEFAULT_CC = 'googlecl-dev@googlegroups.com'
  71
  72# The logging verbosity:
  73#  0: Errors only.
  74#  1: Status messages.
  75#  2: Info logs.
  76#  3: Debug logs.
  77verbosity = 1
  78
  79# The account type used for authentication.
  80# This line could be changed by the review server (see handler for
  81# upload.py).
  82AUTH_ACCOUNT_TYPE = "GOOGLE"
  83
  84# URL of the default review server. As for AUTH_ACCOUNT_TYPE, this line could be
  85# changed by the review server (see handler for upload.py).
  86DEFAULT_REVIEW_SERVER = "codereview.appspot.com"
  87
  88# Max size of patch or base file.
  89MAX_UPLOAD_SIZE = 900 * 1024
  90
  91# Constants for version control names.  Used by GuessVCSName.
  92VCS_GIT = "Git"
  93VCS_MERCURIAL = "Mercurial"
  94VCS_SUBVERSION = "Subversion"
  95VCS_UNKNOWN = "Unknown"
  96
  97# whitelist for non-binary filetypes which do not start with "text/"
  98# .mm (Objective-C) shows up as application/x-freemind on my Linux box.
  99TEXT_MIMETYPES = ['application/javascript', 'application/x-javascript',
 100                  'application/xml', 'application/x-freemind', 
 101                  'application/x-sh']
 102
 103VCS_ABBREVIATIONS = {
 104  VCS_MERCURIAL.lower(): VCS_MERCURIAL,
 105  "hg": VCS_MERCURIAL,
 106  VCS_SUBVERSION.lower(): VCS_SUBVERSION,
 107  "svn": VCS_SUBVERSION,
 108  VCS_GIT.lower(): VCS_GIT,
 109}
 110
 111# The result of parsing Subversion's [auto-props] setting.
 112svn_auto_props_map = None
 113
 114def GetEmail(prompt):
 115  """Prompts the user for their email address and returns it.
 116
 117  The last used email address is saved to a file and offered up as a suggestion
 118  to the user. If the user presses enter without typing in anything the last
 119  used email address is used. If the user enters a new address, it is saved
 120  for next time we prompt.
 121
 122  """
 123  last_email_file_name = os.path.expanduser("~/.last_codereview_email_address")
 124  last_email = ""
 125  if os.path.exists(last_email_file_name):
 126    try:
 127      last_email_file = open(last_email_file_name, "r")
 128      last_email = last_email_file.readline().strip("\n")
 129      last_email_file.close()
 130      prompt += " [%s]" % last_email
 131    except IOError, e:
 132      pass
 133  email = raw_input(prompt + ": ").strip()
 134  if email:
 135    try:
 136      last_email_file = open(last_email_file_name, "w")
 137      last_email_file.write(email)
 138      last_email_file.close()
 139    except IOError, e:
 140      pass
 141  else:
 142    email = last_email
 143  return email
 144
 145
 146def StatusUpdate(msg):
 147  """Print a status message to stdout.
 148
 149  If 'verbosity' is greater than 0, print the message.
 150
 151  Args:
 152    msg: The string to print.
 153  """
 154  if verbosity > 0:
 155    print msg
 156
 157
 158def ErrorExit(msg):
 159  """Print an error message to stderr and exit."""
 160  print >>sys.stderr, msg
 161  sys.exit(1)
 162
 163
 164class ClientLoginError(urllib2.HTTPError):
 165  """Raised to indicate there was an error authenticating with ClientLogin."""
 166
 167  def __init__(self, url, code, msg, headers, args):
 168    urllib2.HTTPError.__init__(self, url, code, msg, headers, None)
 169    self.args = args
 170    self.reason = args["Error"]
 171
 172
 173class AbstractRpcServer(object):
 174  """Provides a common interface for a simple RPC server."""
 175
 176  def __init__(self, host, auth_function, host_override=None, extra_headers={},
 177               save_cookies=False, account_type=AUTH_ACCOUNT_TYPE):
 178    """Creates a new HttpRpcServer.
 179
 180    Args:
 181      host: The host to send requests to.
 182      auth_function: A function that takes no arguments and returns an
 183        (email, password) tuple when called. Will be called if authentication
 184        is required.
 185      host_override: The host header to send to the server (defaults to host).
 186      extra_headers: A dict of extra headers to append to every request.
 187      save_cookies: If True, save the authentication cookies to local disk.
 188        If False, use an in-memory cookiejar instead.  Subclasses must
 189        implement this functionality.  Defaults to False.
 190      account_type: Account type used for authentication. Defaults to
 191        AUTH_ACCOUNT_TYPE.
 192    """
 193    self.host = host
 194    if (not self.host.startswith("http://") and
 195        not self.host.startswith("https://")):
 196      self.host = "http://" + self.host
 197    self.host_override = host_override
 198    self.auth_function = auth_function
 199    self.authenticated = False
 200    self.extra_headers = extra_headers
 201    self.save_cookies = save_cookies
 202    self.account_type = account_type
 203    self.opener = self._GetOpener()
 204    if self.host_override:
 205      logging.info("Server: %s; Host: %s", self.host, self.host_override)
 206    else:
 207      logging.info("Server: %s", self.host)
 208
 209  def _GetOpener(self):
 210    """Returns an OpenerDirector for making HTTP requests.
 211
 212    Returns:
 213      A urllib2.OpenerDirector object.
 214    """
 215    raise NotImplementedError()
 216
 217  def _CreateRequest(self, url, data=None):
 218    """Creates a new urllib request."""
 219    logging.debug("Creating request for: '%s' with payload:\n%s", url, data)
 220    req = urllib2.Request(url, data=data)
 221    if self.host_override:
 222      req.add_header("Host", self.host_override)
 223    for key, value in self.extra_headers.iteritems():
 224      req.add_header(key, value)
 225    return req
 226
 227  def _GetAuthToken(self, email, password):
 228    """Uses ClientLogin to authenticate the user, returning an auth token.
 229
 230    Args:
 231      email:    The user's email address
 232      password: The user's password
 233
 234    Raises:
 235      ClientLoginError: If there was an error authenticating with ClientLogin.
 236      HTTPError: If there was some other form of HTTP error.
 237
 238    Returns:
 239      The authentication token returned by ClientLogin.
 240    """
 241    account_type = self.account_type
 242    if self.host.endswith(".google.com"):
 243      # Needed for use inside Google.
 244      account_type = "HOSTED"
 245    req = self._CreateRequest(
 246        url="https://www.google.com/accounts/ClientLogin",
 247        data=urllib.urlencode({
 248            "Email": email,
 249            "Passwd": password,
 250            "service": "ah",
 251            "source": "rietveld-codereview-upload",
 252            "accountType": account_type,
 253        }),
 254    )
 255    try:
 256      response = self.opener.open(req)
 257      response_body = response.read()
 258      response_dict = dict(x.split("=")
 259                           for x in response_body.split("\n") if x)
 260      return response_dict["Auth"]
 261    except urllib2.HTTPError, e:
 262      if e.code == 403:
 263        body = e.read()
 264        response_dict = dict(x.split("=", 1) for x in body.split("\n") if x)
 265        raise ClientLoginError(req.get_full_url(), e.code, e.msg,
 266                               e.headers, response_dict)
 267      else:
 268        raise
 269
 270  def _GetAuthCookie(self, auth_token):
 271    """Fetches authentication cookies for an authentication token.
 272
 273    Args:
 274      auth_token: The authentication token returned by ClientLogin.
 275
 276    Raises:
 277      HTTPError: If there was an error fetching the authentication cookies.
 278    """
 279    # This is a dummy value to allow us to identify when we're successful.
 280    continue_location = "http://localhost/"
 281    args = {"continue": continue_location, "auth": auth_token}
 282    req = self._CreateRequest("%s/_ah/login?%s" %
 283                              (self.host, urllib.urlencode(args)))
 284    try:
 285      response = self.opener.open(req)
 286    except urllib2.HTTPError, e:
 287      response = e
 288    if (response.code != 302 or
 289        response.info()["location"] != continue_location):
 290      raise urllib2.HTTPError(req.get_full_url(), response.code, response.msg,
 291                              response.headers, response.fp)
 292    self.authenticated = True
 293
 294  def _Authenticate(self):
 295    """Authenticates the user.
 296
 297    The authentication process works as follows:
 298     1) We get a username and password from the user
 299     2) We use ClientLogin to obtain an AUTH token for the user
 300        (see http://code.google.com/apis/accounts/AuthForInstalledApps.html).
 301     3) We pass the auth token to /_ah/login on the server to obtain an
 302        authentication cookie. If login was successful, it tries to redirect
 303        us to the URL we provided.
 304
 305    If we attempt to access the upload API without first obtaining an
 306    authentication cookie, it returns a 401 response (or a 302) and
 307    directs us to authenticate ourselves with ClientLogin.
 308    """
 309    for i in range(3):
 310      credentials = self.auth_function()
 311      try:
 312        auth_token = self._GetAuthToken(credentials[0], credentials[1])
 313      except ClientLoginError, e:
 314        if e.reason == "BadAuthentication":
 315          print >>sys.stderr, "Invalid username or password."
 316          continue
 317        if e.reason == "CaptchaRequired":
 318          print >>sys.stderr, (
 319              "Please go to\n"
 320              "https://www.google.com/accounts/DisplayUnlockCaptcha\n"
 321              "and verify you are a human.  Then try again.\n"
 322              "If you are using a Google Apps account the URL is:\n"
 323              "https://www.google.com/a/yourdomain.com/UnlockCaptcha")
 324          break
 325        if e.reason == "NotVerified":
 326          print >>sys.stderr, "Account not verified."
 327          break
 328        if e.reason == "TermsNotAgreed":
 329          print >>sys.stderr, "User has not agreed to TOS."
 330          break
 331        if e.reason == "AccountDeleted":
 332          print >>sys.stderr, "The user account has been deleted."
 333          break
 334        if e.reason == "AccountDisabled":
 335          print >>sys.stderr, "The user account has been disabled."
 336          break
 337        if e.reason == "ServiceDisabled":
 338          print >>sys.stderr, ("The user's access to the service has been "
 339                               "disabled.")
 340          break
 341        if e.reason == "ServiceUnavailable":
 342          print >>sys.stderr, "The service is not available; try again later."
 343          break
 344        raise
 345      self._GetAuthCookie(auth_token)
 346      return
 347
 348  def Send(self, request_path, payload=None,
 349           content_type="application/octet-stream",
 350           timeout=None,
 351           extra_headers=None,
 352           **kwargs):
 353    """Sends an RPC and returns the response.
 354
 355    Args:
 356      request_path: The path to send the request to, eg /api/appversion/create.
 357      payload: The body of the request, or None to send an empty request.
 358      content_type: The Content-Type header to use.
 359      timeout: timeout in seconds; default None i.e. no timeout.
 360        (Note: for large requests on OS X, the timeout doesn't work right.)
 361      extra_headers: Dict containing additional HTTP headers that should be
 362        included in the request (string header names mapped to their values),
 363        or None to not include any additional headers.
 364      kwargs: Any keyword arguments are converted into query string parameters.
 365
 366    Returns:
 367      The response body, as a string.
 368    """
 369    # TODO: Don't require authentication.  Let the server say
 370    # whether it is necessary.
 371    if not self.authenticated:
 372      self._Authenticate()
 373
 374    old_timeout = socket.getdefaulttimeout()
 375    socket.setdefaulttimeout(timeout)
 376    try:
 377      tries = 0
 378      while True:
 379        tries += 1
 380        args = dict(kwargs)
 381        url = "%s%s" % (self.host, request_path)
 382        if args:
 383          url += "?" + urllib.urlencode(args)
 384        req = self._CreateRequest(url=url, data=payload)
 385        req.add_header("Content-Type", content_type)
 386        if extra_headers:
 387          for header, value in extra_headers.items():
 388            req.add_header(header, value)
 389        try:
 390          f = self.opener.open(req)
 391          response = f.read()
 392          f.close()
 393          return response
 394        except urllib2.HTTPError, e:
 395          if tries > 3:
 396            raise
 397          elif e.code == 401 or e.code == 302:
 398            self._Authenticate()
 399##           elif e.code >= 500 and e.code < 600:
 400##             # Server Error - try again.
 401##             continue
 402          else:
 403            raise
 404    finally:
 405      socket.setdefaulttimeout(old_timeout)
 406
 407
 408class HttpRpcServer(AbstractRpcServer):
 409  """Provides a simplified RPC-style interface for HTTP requests."""
 410
 411  def _Authenticate(self):
 412    """Save the cookie jar after authentication."""
 413    super(HttpRpcServer, self)._Authenticate()
 414    if self.save_cookies:
 415      StatusUpdate("Saving authentication cookies to %s" % self.cookie_file)
 416      self.cookie_jar.save()
 417
 418  def _GetOpener(self):
 419    """Returns an OpenerDirector that supports cookies and ignores redirects.
 420
 421    Returns:
 422      A urllib2.OpenerDirector object.
 423    """
 424    opener = urllib2.OpenerDirector()
 425    opener.add_handler(urllib2.ProxyHandler())
 426    opener.add_handler(urllib2.UnknownHandler())
 427    opener.add_handler(urllib2.HTTPHandler())
 428    opener.add_handler(urllib2.HTTPDefaultErrorHandler())
 429    opener.add_handler(urllib2.HTTPSHandler())
 430    opener.add_handler(urllib2.HTTPErrorProcessor())
 431    if self.save_cookies:
 432      self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies")
 433      self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file)
 434      if os.path.exists(self.cookie_file):
 435        try:
 436          self.cookie_jar.load()
 437          self.authenticated = True
 438          StatusUpdate("Loaded authentication cookies from %s" %
 439                       self.cookie_file)
 440        except (cookielib.LoadError, IOError):
 441          # Failed to load cookies - just ignore them.
 442          pass
 443      else:
 444        # Create an empty cookie file with mode 600
 445        fd = os.open(self.cookie_file, os.O_CREAT, 0600)
 446        os.close(fd)
 447      # Always chmod the cookie file
 448      os.chmod(self.cookie_file, 0600)
 449    else:
 450      # Don't save cookies across runs of update.py.
 451      self.cookie_jar = cookielib.CookieJar()
 452    opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar))
 453    return opener
 454
 455
 456parser = optparse.OptionParser(
 457    usage="%prog [options] [-- diff_options] [path...]")
 458parser.add_option("-y", "--assume_yes", action="store_true",
 459                  dest="assume_yes", default=False,
 460                  help="Assume that the answer to yes/no questions is 'yes'.")
 461# Logging
 462group = parser.add_option_group("Logging options")
 463group.add_option("-q", "--quiet", action="store_const", const=0,
 464                 dest="verbose", help="Print errors only.")
 465group.add_option("-v", "--verbose", action="store_const", const=2,
 466                 dest="verbose", default=1,
 467                 help="Print info level logs.")
 468group.add_option("--noisy", action="store_const", const=3,
 469                 dest="verbose", help="Print all logs.")
 470# Review server
 471group = parser.add_option_group("Review server options")
 472group.add_option("-s", "--server", action="store", dest="server",
 473                 default=DEFAULT_REVIEW_SERVER,
 474                 metavar="SERVER",
 475                 help=("The server to upload to. The format is host[:port]. "
 476                       "Defaults to '%default'."))
 477group.add_option("-e", "--email", action="store", dest="email",
 478                 metavar="EMAIL", default=None,
 479                 help="The username to use. Will prompt if omitted.")
 480group.add_option("-H", "--host", action="store", dest="host",
 481                 metavar="HOST", default=None,
 482                 help="Overrides the Host header sent with all RPCs.")
 483group.add_option("--no_cookies", action="store_false",
 484                 dest="save_cookies", default=True,
 485                 help="Do not save authentication cookies to local disk.")
 486group.add_option("--account_type", action="store", dest="account_type",
 487                 metavar="TYPE", default=AUTH_ACCOUNT_TYPE,
 488                 choices=["GOOGLE", "HOSTED"],
 489                 help=("Override the default account type "
 490                       "(defaults to '%default', "
 491                       "valid choices are 'GOOGLE' and 'HOSTED')."))
 492# Issue
 493group = parser.add_option_group("Issue options")
 494group.add_option("-d", "--description", action="store", dest="description",
 495                 metavar="DESCRIPTION", default=None,
 496                 help="Optional description when creating an issue.")
 497group.add_option("-f", "--description_file", action="store",
 498                 dest="description_file", metavar="DESCRIPTION_FILE",
 499                 default=None,
 500                 help="Optional path of a file that contains "
 501                      "the description when creating an issue.")
 502group.add_option("-r", "--reviewers", action="store", dest="reviewers",
 503                 metavar="REVIEWERS", default=DEFAULT_REVIEWER,
 504                 help="Add reviewers (comma separated email addresses).")
 505group.add_option("--cc", action="store", dest="cc",
 506                 metavar="CC", default=DEFAULT_CC,
 507                 help="Add CC (comma separated email addresses).")
 508group.add_option("--private", action="store_true", dest="private",
 509                 default=False,
 510                 help="Make the issue restricted to reviewers and those CCed")
 511# Upload options
 512group = parser.add_option_group("Patch options")
 513group.add_option("-m", "--message", action="store", dest="message",
 514                 metavar="MESSAGE", default=None,
 515                 help="A message to identify the patch. "
 516                      "Will prompt if omitted.")
 517group.add_option("-i", "--issue", type="int", action="store",
 518                 metavar="ISSUE", default=None,
 519                 help="Issue number to which to add. Defaults to new issue.")
 520group.add_option("--base_url", action="store", dest="base_url", default=None,
 521                 help="Base repository URL (listed as \"Base URL\" when "
 522                 "viewing issue).  If omitted, will be guessed automatically "
 523                 "for SVN repos and left blank for others.")
 524group.add_option("--download_base", action="store_true",
 525                 dest="download_base", default=False,
 526                 help="Base files will be downloaded by the server "
 527                 "(side-by-side diffs may not work on files with CRs).")
 528group.add_option("--rev", action="store", dest="revision",
 529                 metavar="REV", default=None,
 530                 help="Base revision/branch/tree to diff against. Use "
 531                      "rev1:rev2 range to review already committed changeset.")
 532group.add_option("--send_mail", action="store_true",
 533                 dest="send_mail", default=False,
 534                 help="Send notification email to reviewers.")
 535group.add_option("--vcs", action="store", dest="vcs",
 536                 metavar="VCS", default=None,
 537                 help=("Version control system (optional, usually upload.py "
 538                       "already guesses the right VCS)."))
 539group.add_option("--emulate_svn_auto_props", action="store_true",
 540                 dest="emulate_svn_auto_props", default=False,
 541                 help=("Emulate Subversion's auto properties feature."))
 542
 543
 544def GetRpcServer(server, email=None, host_override=None, save_cookies=True,
 545                 account_type=AUTH_ACCOUNT_TYPE):
 546  """Returns an instance of an AbstractRpcServer.
 547
 548  Args:
 549    server: String containing the review server URL.
 550    email: String containing user's email address.
 551    host_override: If not None, string containing an alternate hostname to use
 552      in the host header.
 553    save_cookies: Whether authentication cookies should be saved to disk.
 554    account_type: Account type for authentication, either 'GOOGLE'
 555      or 'HOSTED'. Defaults to AUTH_ACCOUNT_TYPE.
 556
 557  Returns:
 558    A new AbstractRpcServer, on which RPC calls can be made.
 559  """
 560
 561  rpc_server_class = HttpRpcServer
 562
 563  # If this is the dev_appserver, use fake authentication.
 564  host = (host_override or server).lower()
 565  if host == "localhost" or host.startswith("localhost:"):
 566    if email is None:
 567      email = "test@example.com"
 568      logging.info("Using debug user %s.  Override with --email" % email)
 569    server = rpc_server_class(
 570        server,
 571        lambda: (email, "password"),
 572        host_override=host_override,
 573        extra_headers={"Cookie":
 574                       'dev_appserver_login="%s:False"' % email},
 575        save_cookies=save_cookies,
 576        account_type=account_type)
 577    # Don't try to talk to ClientLogin.
 578    server.authenticated = True
 579    return server
 580
 581  def GetUserCredentials():
 582    """Prompts the user for a username and password."""
 583    # Create a local alias to the email variable to avoid Python's crazy
 584    # scoping rules.
 585    local_email = email
 586    if local_email is None:
 587      local_email = GetEmail("Email (login for uploading to %s)" % server)
 588    password = None
 589    if keyring:
 590      password = keyring.get_password(host, local_email)
 591    if password is not None:
 592      print "Using password from system keyring."
 593    else:
 594      password = getpass.getpass("Password for %s: " % local_email)
 595      if keyring:
 596        answer = raw_input("Store password in system keyring?(y/N) ").strip()
 597        if answer == "y":
 598          keyring.set_password(host, local_email, password)
 599    return (local_email, password)
 600
 601  return rpc_server_class(server,
 602                          GetUserCredentials,
 603                          host_override=host_override,
 604                          save_cookies=save_cookies)
 605
 606
 607def EncodeMultipartFormData(fields, files):
 608  """Encode form fields for multipart/form-data.
 609
 610  Args:
 611    fields: A sequence of (name, value) elements for regular form fields.
 612    files: A sequence of (name, filename, value) elements for data to be
 613           uploaded as files.
 614  Returns:
 615    (content_type, body) ready for httplib.HTTP instance.
 616
 617  Source:
 618    http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
 619  """
 620  BOUNDARY = '-M-A-G-I-C---B-O-U-N-D-A-R-Y-'
 621  CRLF = '\r\n'
 622  lines = []
 623  for (key, value) in fields:
 624    lines.append('--' + BOUNDARY)
 625    lines.append('Content-Disposition: form-data; name="%s"' % key)
 626    lines.append('')
 627    if isinstance(value, unicode):
 628      value = value.encode('utf-8')
 629    lines.append(value)
 630  for (key, filename, value) in files:
 631    lines.append('--' + BOUNDARY)
 632    lines.append('Content-Disposition: form-data; name="%s"; filename="%s"' %
 633             (key, filename))
 634    lines.append('Content-Type: %s' % GetContentType(filename))
 635    lines.append('')
 636    if isinstance(value, unicode):
 637      value = value.encode('utf-8')
 638    lines.append(value)
 639  lines.append('--' + BOUNDARY + '--')
 640  lines.append('')
 641  body = CRLF.join(lines)
 642  content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
 643  return content_type, body
 644
 645
 646def GetContentType(filename):
 647  """Helper to guess the content-type from the filename."""
 648  return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
 649
 650
 651# Use a shell for subcommands on Windows to get a PATH search.
 652use_shell = sys.platform.startswith("win")
 653
 654def RunShellWithReturnCode(command, print_output=False,
 655                           universal_newlines=True,
 656                           env=os.environ):
 657  """Executes a command and returns the output from stdout and the return code.
 658
 659  Args:
 660    command: Command to execute.
 661    print_output: If True, the output is printed to stdout.
 662                  If False, both stdout and stderr are ignored.
 663    universal_newlines: Use universal_newlines flag (default: True).
 664
 665  Returns:
 666    Tuple (output, return code)
 667  """
 668  logging.info("Running %s", command)
 669  p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
 670                       shell=use_shell, universal_newlines=universal_newlines,
 671                       env=env)
 672  if print_output:
 673    output_array = []
 674    while True:
 675      line = p.stdout.readline()
 676      if not line:
 677        break
 678      print line.strip("\n")
 679      output_array.append(line)
 680    output = "".join(output_array)
 681  else:
 682    output = p.stdout.read()
 683  p.wait()
 684  errout = p.stderr.read()
 685  if print_output and errout:
 686    print >>sys.stderr, errout
 687  p.stdout.close()
 688  p.stderr.close()
 689  return output, p.returncode
 690
 691
 692def RunShell(command, silent_ok=False, universal_newlines=True,
 693             print_output=False, env=os.environ):
 694  data, retcode = RunShellWithReturnCode(command, print_output,
 695                                         universal_newlines, env)
 696  if retcode:
 697    ErrorExit("Got error status from %s:\n%s" % (command, data))
 698  if not silent_ok and not data:
 699    ErrorExit("No output from %s" % command)
 700  return data
 701
 702
 703class VersionControlSystem(object):
 704  """Abstract base class providing an interface to the VCS."""
 705
 706  def __init__(self, options):
 707    """Constructor.
 708
 709    Args:
 710      options: Command line options.
 711    """
 712    self.options = options
 713
 714  def PostProcessDiff(self, diff):
 715    """Return the diff with any special post processing this VCS needs, e.g.
 716    to include an svn-style "Index:"."""
 717    return diff
 718
 719  def GenerateDiff(self, args):
 720    """Return the current diff as a string.
 721
 722    Args:
 723      args: Extra arguments to pass to the diff command.
 724    """
 725    raise NotImplementedError(
 726        "abstract method -- subclass %s must override" % self.__class__)
 727
 728  def GetUnknownFiles(self):
 729    """Return a list of files unknown to the VCS."""
 730    raise NotImplementedError(
 731        "abstract method -- subclass %s must override" % self.__class__)
 732
 733  def CheckForUnknownFiles(self):
 734    """Show an "are you sure?" prompt if there are unknown files."""
 735    unknown_files = self.GetUnknownFiles()
 736    if unknown_files:
 737      print "The following files are not added to version control:"
 738      for line in unknown_files:
 739        print line
 740      prompt = "Are you sure to continue?(y/N) "
 741      answer = raw_input(prompt).strip()
 742      if answer != "y":
 743        ErrorExit("User aborted")
 744
 745  def GetBaseFile(self, filename):
 746    """Get the content of the upstream version of a file.
 747
 748    Returns:
 749      A tuple (base_content, new_content, is_binary, status)
 750        base_content: The contents of the base file.
 751        new_content: For text files, this is empty.  For binary files, this is
 752          the contents of the new file, since the diff output won't contain
 753          information to reconstruct the current file.
 754        is_binary: True iff the file is binary.
 755        status: The status of the file.
 756    """
 757
 758    raise NotImplementedError(
 759        "abstract method -- subclass %s must override" % self.__class__)
 760
 761
 762  def GetBaseFiles(self, diff):
 763    """Helper that calls GetBase file for each file in the patch.
 764
 765    Returns:
 766      A dictionary that maps from filename to GetBaseFile's tuple.  Filenames
 767      are retrieved based on lines that start with "Index:" or
 768      "Property changes on:".
 769    """
 770    files = {}
 771    for line in diff.splitlines(True):
 772      if line.startswith('Index:') or line.startswith('Property changes on:'):
 773        unused, filename = line.split(':', 1)
 774        # On Windows if a file has property changes its filename uses '\'
 775        # instead of '/'.
 776        filename = filename.strip().replace('\\', '/')
 777        files[filename] = self.GetBaseFile(filename)
 778    return files
 779
 780
 781  def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options,
 782                      files):
 783    """Uploads the base files (and if necessary, the current ones as well)."""
 784
 785    def UploadFile(filename, file_id, content, is_binary, status, is_base):
 786      """Uploads a file to the server."""
 787      file_too_large = False
 788      if is_base:
 789        type = "base"
 790      else:
 791        type = "current"
 792      if len(content) > MAX_UPLOAD_SIZE:
 793        print ("Not uploading the %s file for %s because it's too large." %
 794               (type, filename))
 795        file_too_large = True
 796        content = ""
 797      checksum = md5(content).hexdigest()
 798      if options.verbose > 0 and not file_too_large:
 799        print "Uploading %s file for %s" % (type, filename)
 800      url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id)
 801      form_fields = [("filename", filename),
 802                     ("status", status),
 803                     ("checksum", checksum),
 804                     ("is_binary", str(is_binary)),
 805                     ("is_current", str(not is_base)),
 806                    ]
 807      if file_too_large:
 808        form_fields.append(("file_too_large", "1"))
 809      if options.email:
 810        form_fields.append(("user", options.email))
 811      ctype, body = EncodeMultipartFormData(form_fields,
 812                                            [("data", filename, content)])
 813      response_body = rpc_server.Send(url, body,
 814                                      content_type=ctype)
 815      if not response_body.startswith("OK"):
 816        StatusUpdate("  --> %s" % response_body)
 817        sys.exit(1)
 818
 819    patches = dict()
 820    [patches.setdefault(v, k) for k, v in patch_list]
 821    for filename in patches.keys():
 822      base_content, new_content, is_binary, status = files[filename]
 823      file_id_str = patches.get(filename)
 824      if file_id_str.find("nobase") != -1:
 825        base_content = None
 826        file_id_str = file_id_str[file_id_str.rfind("_") + 1:]
 827      file_id = int(file_id_str)
 828      if base_content != None:
 829        UploadFile(filename, file_id, base_content, is_binary, status, True)
 830      if new_content != None:
 831        UploadFile(filename, file_id, new_content, is_binary, status, False)
 832
 833  def IsImage(self, filename):
 834    """Returns true if the filename has an image extension."""
 835    mimetype =  mimetypes.guess_type(filename)[0]
 836    if not mimetype:
 837      return False
 838    return mimetype.startswith("image/")
 839
 840  def IsBinary(self, filename):
 841    """Returns true if the guessed mimetyped isnt't in text group."""
 842    mimetype = mimetypes.guess_type(filename)[0]
 843    if not mimetype:
 844      return False  # e.g. README, "real" binaries usually have an extension
 845    # special case for text files which don't start with text/
 846    if mimetype in TEXT_MIMETYPES:
 847      return False
 848    return not mimetype.startswith("text/")
 849
 850
 851class SubversionVCS(VersionControlSystem):
 852  """Implementation of the VersionControlSystem interface for Subversion."""
 853
 854  def __init__(self, options):
 855    super(SubversionVCS, self).__init__(options)
 856    if self.options.revision:
 857      match = re.match(r"(\d+)(:(\d+))?", self.options.revision)
 858      if not match:
 859        ErrorExit("Invalid Subversion revision %s." % self.options.revision)
 860      self.rev_start = match.group(1)
 861      self.rev_end = match.group(3)
 862    else:
 863      self.rev_start = self.rev_end = None
 864    # Cache output from "svn list -r REVNO dirname".
 865    # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev).
 866    self.svnls_cache = {}
 867    # Base URL is required to fetch files deleted in an older revision.
 868    # Result is cached to not guess it over and over again in GetBaseFile().
 869    required = self.options.download_base or self.options.revision is not None
 870    self.svn_base = self._GuessBase(required)
 871
 872  def GuessBase(self, required):
 873    """Wrapper for _GuessBase."""
 874    return self.svn_base
 875
 876  def _GuessBase(self, required):
 877    """Returns the SVN base URL.
 878
 879    Args:
 880      required: If true, exits if the url can't be guessed, otherwise None is
 881        returned.
 882    """
 883    info = RunShell(["svn", "info"])
 884    for line in info.splitlines():
 885      words = line.split()
 886      if len(words) == 2 and words[0] == "URL:":
 887        url = words[1]
 888        scheme, netloc, path, params, query, fragment = urlparse.urlparse(url)
 889        username, netloc = urllib.splituser(netloc)
 890        if username:
 891          logging.info("Removed username from base URL")
 892        if netloc.endswith("svn.python.org"):
 893          if netloc == "svn.python.org":
 894            if path.startswith("/projects/"):
 895              path = path[9:]
 896          elif netloc != "pythondev@svn.python.org":
 897            ErrorExit("Unrecognized Python URL: %s" % url)
 898          base = "http://svn.python.org/view/*checkout*%s/" % path
 899          logging.info("Guessed Python base = %s", base)
 900        elif netloc.endswith("svn.collab.net"):
 901          if path.startswith("/repos/"):
 902            path = path[6:]
 903          base = "http://svn.collab.net/viewvc/*checkout*%s/" % path
 904          logging.info("Guessed CollabNet base = %s", base)
 905        elif netloc.endswith(".googlecode.com"):
 906          path = path + "/"
 907          base = urlparse.urlunparse(("http", netloc, path, params,
 908                                      query, fragment))
 909          logging.info("Guessed Google Code base = %s", base)
 910        else:
 911          path = path + "/"
 912          base = urlparse.urlunparse((scheme, netloc, path, params,
 913                                      query, fragment))
 914          logging.info("Guessed base = %s", base)
 915        return base
 916    if required:
 917      ErrorExit("Can't find URL in output from svn info")
 918    return None
 919
 920  def GenerateDiff(self, args):
 921    cmd = ["svn", "diff"]
 922    if self.options.revision:
 923      cmd += ["-r", self.options.revision]
 924    cmd.extend(args)
 925    data = RunShell(cmd)
 926    count = 0
 927    for line in data.splitlines():
 928      if line.startswith("Index:") or line.startswith("Property changes on:"):
 929        count += 1
 930        logging.info(line)
 931    if not count:
 932      ErrorExit("No valid patches found in output from svn diff")
 933    return data
 934
 935  def _CollapseKeywords(self, content, keyword_str):
 936    """Collapses SVN keywords."""
 937    # svn cat translates keywords but svn diff doesn't. As a result of this
 938    # behavior patching.PatchChunks() fails with a chunk mismatch error.
 939    # This part was originally written by the Review Board development team
 940    # who had the same problem (http://reviews.review-board.org/r/276/).
 941    # Mapping of keywords to known aliases
 942    svn_keywords = {
 943      # Standard keywords
 944      'Date':                ['Date', 'LastChangedDate'],
 945      'Revision':            ['Revision', 'LastChangedRevision', 'Rev'],
 946      'Author':              ['Author', 'LastChangedBy'],
 947      'HeadURL':             ['HeadURL', 'URL'],
 948      'Id':                  ['Id'],
 949
 950      # Aliases
 951      'LastChangedDate':     ['LastChangedDate', 'Date'],
 952      'LastChangedRevision': ['LastChangedRevision', 'Rev', 'Revision'],
 953      'LastChangedBy':       ['LastChangedBy', 'Author'],
 954      'URL':                 ['URL', 'HeadURL'],
 955    }
 956
 957    def repl(m):
 958       if m.group(2):
 959         return "$%s::%s$" % (m.group(1), " " * len(m.group(3)))
 960       return "$%s$" % m.group(1)
 961    keywords = [keyword
 962                for name in keyword_str.split(" ")
 963                for keyword in svn_keywords.get(name, [])]
 964    return re.sub(r"\$(%s):(:?)([^\$]+)\$" % '|'.join(keywords), repl, content)
 965
 966  def GetUnknownFiles(self):
 967    status = RunShell(["svn", "status", "--ignore-externals"], silent_ok=True)
 968    unknown_files = []
 969    for line in status.split("\n"):
 970      if line and line[0] == "?":
 971        unknown_files.append(line)
 972    return unknown_files
 973
 974  def ReadFile(self, filename):
 975    """Returns the contents of a file."""
 976    file = open(filename, 'rb')
 977    result = ""
 978    try:
 979      result = file.read()
 980    finally:
 981      file.close()
 982    return result
 983
 984  def GetStatus(self, filename):
 985    """Returns the status of a file."""
 986    if not self.options.revision:
 987      status = RunShell(["svn", "status", "--ignore-externals", filename])
 988      if not status:
 989        ErrorExit("svn status returned no output for %s" % filename)
 990      status_lines = status.splitlines()
 991      # If file is in a cl, the output will begin with
 992      # "\n--- Changelist 'cl_name':\n".  See
 993      # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt
 994      if (len(status_lines) == 3 and
 995          not status_lines[0] and
 996          status_lines[1].startswith("--- Changelist")):
 997        status = status_lines[2]
 998      else:
 999        status = status_lines[0]
1000    # If we have a revision to diff against we need to run "svn list"
1001    # for the old and the new revision and compare the results to get
1002    # the correct status for a file.
1003    else:
1004      dirname, relfilename = os.path.split(filename)
1005      if dirname not in self.svnls_cache:
1006        cmd = ["svn", "list", "-r", self.rev_start, dirname or "."]
1007        out, returncode = RunShellWithReturnCode(cmd)
1008        if returncode:
1009          ErrorExit("Failed to get status for %s." % filename)
1010        old_files = out.splitlines()
1011        args = ["svn", "list"]
1012        if self.rev_end:
1013          args += ["-r", self.rev_end]
1014        cmd = args + [dirname or "."]
1015        out, returncode = RunShellWithReturnCode(cmd)
1016        if returncode:
1017          ErrorExit("Failed to run command %s" % cmd)
1018        self.svnls_cache[dirname] = (old_files, out.splitlines())
1019      old_files, new_files = self.svnls_cache[dirname]
1020      if relfilename in old_files and relfilename not in new_files:
1021        status = "D   "
1022      elif relfilename in old_files and relfilename in new_files:
1023        status = "M   "
1024      else:
1025        status = "A   "
1026    return status
1027
1028  def GetBaseFile(self, filename):
1029    status = self.GetStatus(filename)
1030    base_content = None
1031    new_content = None
1032
1033    # If a file is copied its status will be "A  +", which signifies
1034    # "addition-with-history".  See "svn st" for more information.  We need to
1035    # upload the original file or else diff parsing will fail if the file was
1036    # edited.
1037    if status[0] == "A" and status[3] != "+":
1038      # We'll need to upload the new content if we're adding a binary file
1039      # since diff's output won't contain it.
1040      mimetype = RunShell(["svn", "propget", "svn:mime-type", filename],
1041                          silent_ok=True)
1042      base_content = ""
1043      is_binary = bool(mimetype) and not mimetype.startswith("text/")
1044      if is_binary and self.IsImage(filename):
1045        new_content = self.ReadFile(filename)
1046    elif (status[0] in ("M", "D", "R") or
1047          (status[0] == "A" and status[3] == "+") or  # Copied file.
1048          (status[0] == " " and status[1] == "M")):  # Property change.
1049      args = []
1050      if self.options.revision:
1051        url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
1052      else:
1053        # Don't change filename, it's needed later.
1054        url = filename
1055        args += ["-r", "BASE"]
1056      cmd = ["svn"] + args + ["propget", "svn:mime-type", url]
1057      mimetype, returncode = RunShellWithReturnCode(cmd)
1058      if returncode:
1059        # File does not exist in the requested revision.
1060        # Reset mimetype, it contains an error message.
1061        mimetype = ""
1062      get_base = False
1063      is_binary = bool(mimetype) and not mimetype.startswith("text/")
1064      if status[0] == " ":
1065        # Empty base content just to force an upload.
1066        base_content = ""
1067      elif is_binary:
1068        if self.IsImage(filename):
1069          get_base = True
1070          if status[0] == "M":
1071            if not self.rev_end:
1072              new_content = self.ReadFile(filename)
1073            else:
1074              url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end)
1075              new_content = RunShell(["svn", "cat", url],
1076                                     universal_newlines=True, silent_ok=True)
1077        else:
1078          base_content = ""
1079      else:
1080        get_base = True
1081
1082      if get_base:
1083        if is_binary:
1084          universal_newlines = False
1085        else:
1086          universal_newlines = True
1087        if self.rev_start:
1088          # "svn cat -r REV delete_file.txt" doesn't work. cat requires
1089          # the full URL with "@REV" appended instead of using "-r" option.
1090          url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
1091          base_content = RunShell(["svn", "cat", url],
1092                                  universal_newlines=universal_newlines,
1093                                  silent_ok=True)
1094        else:
1095          base_content, ret_code = RunShellWithReturnCode(
1096            ["svn", "cat", filename], universal_newlines=universal_newlines)
1097          if ret_code and status[0] == "R":
1098            # It's a replaced file without local history (see issue208).
1099            # The base file needs to be fetched from the server.
1100            url = "%s/%s" % (self.svn_base, filename)
1101            base_content = RunShell(["svn", "cat", url],
1102                                    universal_newlines=universal_newlines,
1103                                    silent_ok=True)
1104          elif ret_code:
1105            ErrorExit("Got error status from 'svn cat %s'" % filename)
1106        if not is_binary:
1107          args = []
1108          if self.rev_start:
1109            url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start)
1110          else:
1111            url = filename
1112            args += ["-r", "BASE"]
1113          cmd = ["svn"] + args + ["propget", "svn:keywords", url]
1114          keywords, returncode = RunShellWithReturnCode(cmd)
1115          if keywords and not returncode:
1116            base_content = self._CollapseKeywords(base_content, keywords)
1117    else:
1118      StatusUpdate("svn status returned unexpected output: %s" % status)
1119      sys.exit(1)
1120    return base_content, new_content, is_binary, status[0:5]
1121
1122
1123class GitVCS(VersionControlSystem):
1124  """Implementation of the VersionControlSystem interface for Git."""
1125
1126  def __init__(self, options):
1127    super(GitVCS, self).__init__(options)
1128    # Map of filename -> (hash before, hash after) of base file.
1129    # Hashes for "no such file" are represented as None.
1130    self.hashes = {}
1131    # Map of new filename -> old filename for renames.
1132    self.renames = {}
1133
1134  def PostProcessDiff(self, gitdiff):
1135    """Converts the diff output to include an svn-style "Index:" line as well
1136    as record the hashes of the files, so we can upload them along with our
1137    diff."""
1138    # Special used by git to indicate "no such content".
1139    NULL_HASH = "0"*40
1140
1141    def IsFileNew(filename):
1142      return filename in self.hashes and self.hashes[filename][0] is None
1143
1144    def AddSubversionPropertyChange(filename):
1145      """Add svn's property change information into the patch if given file is
1146      new file.
1147
1148      We use Subversion's auto-props setting to retrieve its property.
1149      See http://svnbook.red-bean.com/en/1.1/ch07.html#svn-ch-7-sect-1.3.2 for
1150      Subversion's [auto-props] setting.
1151      """
1152      if self.options.emulate_svn_auto_props and IsFileNew(filename):
1153        svnprops = GetSubversionPropertyChanges(filename)
1154        if svnprops:
1155          svndiff.append("\n" + svnprops + "\n")
1156
1157    svndiff = []
1158    filecount = 0
1159    filename = None
1160    for line in gitdiff.splitlines():
1161      match = re.match(r"diff --git a/(.*) b/(.*)$", line)
1162      if match:
1163        # Add auto property here for previously seen file.
1164        if filename is not None:
1165          AddSubversionPropertyChange(filename)
1166        filecount += 1
1167        # Intentionally use the "after" filename so we can show renames.
1168        filename = match.group(2)
1169        svndiff.append("Index: %s\n" % filename)
1170        if match.group(1) != match.group(2):
1171          self.renames[match.group(2)] = match.group(1)
1172      else:
1173        # The "index" line in a git diff looks like this (long hashes elided):
1174        #   index 82c0d44..b2cee3f 100755
1175        # We want to save the left hash, as that identifies the base file.
1176        match = re.match(r"index (\w+)\.\.(\w+)", line)
1177        if match:
1178          before, after = (match.group(1), match.group(2))
1179          if before == NULL_HASH:
1180            before = None
1181          if after == NULL_HASH:
1182            after = None
1183          self.hashes[filename] = (before, after)
1184      svndiff.append(line + "\n")
1185    if not filecount:
1186      ErrorExit("No valid patches found in output from git diff")
1187    # Add auto property for the last seen file.
1188    assert filename is not None
1189    AddSubversionPropertyChange(filename)
1190    return "".join(svndiff)
1191
1192  def GenerateDiff(self, extra_args):
1193    extra_args = extra_args[:]
1194    if self.options.revision:
1195      if ":" in self.options.revision:
1196        extra_args = self.options.revision.split(":", 1) + extra_args
1197      else:
1198        extra_args = [self.options.revision] + extra_args
1199
1200    # --no-ext-diff is broken in some versions of Git, so try to work around
1201    # this by overriding the environment (but there is still a problem if the
1202    # git config key "diff.external" is used).
1203    env = os.environ.copy()
1204    if 'GIT_EXTERNAL_DIFF' in env: del env['GIT_EXTERNAL_DIFF']
1205    return RunShell(["git", "diff", "--no-ext-diff", "--full-index", "-M"]
1206                    + extra_args, env=env)
1207
1208  def GetUnknownFiles(self):
1209    status = RunShell(["git", "ls-files", "--exclude-standard", "--others"],
1210                      silent_ok=True)
1211    return status.splitlines()
1212
1213  def GetFileContent(self, file_hash, is_binary):
1214    """Returns the content of a file identified by its git hash."""
1215    data, retcode = RunShellWithReturnCode(["git", "show", file_hash],
1216                                            universal_newlines=not is_binary)
1217    if retcode:
1218      ErrorExit("Got error status from 'git show %s'" % file_hash)
1219    return data
1220
1221  def GetBaseFile(self, filename):
1222    hash_before, hash_after = self.hashes.get(filename, (None,None))
1223    base_content = None
1224    new_content = None
1225    is_binary = self.IsBinary(filename)
1226    status = None
1227
1228    if filename in self.renames:
1229      status = "A +"  # Match svn attribute name for renames.
1230      if filename not in self.hashes:
1231        # If a rename doesn't change the content, we never get a hash.
1232        base_content = RunShell(["git", "show", "HEAD:" + filename])
1233    elif not hash_before:
1234      status = "A"
1235      base_content = ""
1236    elif not hash_after:
1237      status = "D"
1238    else:
1239      status = "M"
1240
1241    is_image = self.IsImage(filename)
1242
1243    # Grab the before/after content if we need it.
1244    # We should include file contents if it's text or it's an image.
1245    if not is_binary or is_image:
1246      # Grab the base content if we don't have it already.
1247      if base_content is None and hash_before:
1248        base_content = self.GetFileContent(hash_before, is_binary)
1249      # Only include the "after" file if it's an image; otherwise it
1250      # it is reconstructed from the diff.
1251      if is_image and hash_after:
1252        new_content = self.GetFileContent(hash_after, is_binary)
1253
1254    return (base_content, new_content, is_binary, status)
1255
1256
1257class MercurialVCS(VersionControlSystem):
1258  """Implementation of the VersionControlSystem interface for Mercurial."""
1259
1260  def __init__(self, options, repo_dir):
1261    super(MercurialVCS, self).__init__(options)
1262    # Absolute path to repository (we can be in a subdir)
1263    self.repo_dir = os.path.normpath(repo_dir)
1264    # Compute the subdir
1265    cwd = os.path.normpath(os.getcwd())
1266    assert cwd.startswith(self.repo_dir)
1267    self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/")
1268    if self.options.revision:
1269      self.base_rev = self.options.revision
1270    else:
1271      self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip()
1272
1273  def _GetRelPath(self, filename):
1274    """Get relative path of a file according to the current directory,
1275    given its logical path in the repo."""
1276    assert filename.startswith(self.subdir), (filename, self.subdir)
1277    return filename[len(self.subdir):].lstrip(r"\/")
1278
1279  def GenerateDiff(self, extra_args):
1280    cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args
1281    data = RunShell(cmd, silent_ok=True)
1282    svndiff = []
1283    filecount = 0
1284    for line in data.splitlines():
1285      m = re.match("diff --git a/(\S+) b/(\S+)", line)
1286      if m:
1287        # Modify line to make it look like as it comes from svn diff.
1288        # With this modification no changes on the server side are required
1289        # to make upload.py work with Mercurial repos.
1290        # NOTE: for proper handling of moved/copied files, we have to use
1291        # the second filename.
1292        filename = m.group(2)
1293        svndiff.append("Index: %s" % filename)
1294        svndiff.append("=" * 67)
1295        filecount += 1
1296        logging.info(line)
1297      else:
1298        svndiff.append(line)
1299    if not filecount:
1300      ErrorExit("No valid patches found in output from hg diff")
1301    return "\n".join(svndiff) + "\n"
1302
1303  def GetUnknownFiles(self):
1304    """Return a list of files unknown to the VCS."""
1305    args = []
1306    status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."],
1307        silent_ok=True)
1308    unknown_files = []
1309    for line in status.splitlines():
1310      st, fn = line.split(" ", 1)
1311      if st == "?":
1312        unknown_files.append(fn)
1313    return unknown_files
1314
1315  def GetBaseFile(self, filename):
1316    # "hg status" and "hg cat" both take a path relative to the current subdir
1317    # rather than to the repo root, but "hg diff" has given us the full path
1318    # to the repo root.
1319    base_content = ""
1320    new_content = None
1321    is_binary = False
1322    oldrelpath = relpath = self._GetRelPath(filename)
1323    # "hg status -C" returns two lines for moved/copied files, one otherwise
1324    out = RunShell(["hg", "status", "-C", "--rev", self.base_rev, relpath])
1325    out = out.splitlines()
1326    # HACK: strip error message about missing file/directory if it isn't in
1327    # the working copy
1328    if out[0].startswith('%s: ' % relpath):
1329      out = out[1:]
1330    if len(out) > 1:
1331      # Moved/copied => considered as modified, use old filename to
1332      # retrieve base contents
1333      oldrelpath = out[1].strip()
1334      status = "M"
1335    else:
1336      status, _ = out[0].split(' ', 1)
1337    if ":" in self.base_rev:
1338      base_rev = self.base_rev.split(":", 1)[0]
1339    else:
1340      base_rev = self.base_rev
1341    if status != "A":
1342      base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath],
1343        silent_ok=True)
1344      is_binary = "\0" in base_content  # Mercurial's heuristic
1345    if status != "R":
1346      new_content = open(relpath, "rb").read()
1347      is_binary = is_binary or "\0" in new_content
1348    if is_binary and base_content:
1349      # Fetch again without converting newlines
1350      base_content = RunShell(["hg", "cat", "-r", base_rev, oldrelpath],
1351        silent_ok=True, universal_newlines=False)
1352    if not is_binary or not self.IsImage(relpath):
1353      new_content = None
1354    return base_content, new_content, is_binary, status
1355
1356
1357# NOTE: The SplitPatch function is duplicated in engine.py, keep them in sync.
1358def SplitPatch(data):
1359  """Splits a patch into separate pieces for each file.
1360
1361  Args:
1362    data: A string containing the output of svn diff.
1363
1364  Returns:
1365    A list of 2-tuple (filename, text) where text is the svn diff output
1366      pertaining to filename.
1367  """
1368  patches = []
1369  filename = None
1370  diff = []
1371  for line in data.splitlines(True):
1372    new_filename = None
1373    if line.startswith('Index:'):
1374      unused, new_filename = line.split(':', 1)
1375      new_filename = new_filename.strip()
1376    elif line.startswith('Property changes on:'):
1377      unused, temp_filename = line.split(':', 1)
1378      # When a file is modified, paths use '/' between directories, however
1379      # when a property is modified '\' is used on Windows.  Make them the same
1380      # otherwise the file shows up twice.
1381      temp_filename = temp_filename.strip().replace('\\', '/')
1382      if temp_filename != filename:
1383        # File has property changes but no modifications, create a new diff.
1384        new_filename = temp_filename
1385    if new_filename:
1386      if filename and diff:
1387        patches.append((filename, ''.join(diff)))
1388      filename = new_filename
1389      diff = [line]
1390      continue
1391    if diff is not None:
1392      diff.append(line)
1393  if filename and diff:
1394    patches.append((filename, ''.join(diff)))
1395  return patches
1396
1397
1398def UploadSeparatePatches(issue, rpc_server, patchset, data, options):
1399  """Uploads a separate patch for each file in the diff output.
1400
1401  Returns a list of [patch_key, filename] for each file.
1402  """
1403  patches = SplitPatch(data)
1404  rv = []
1405  for patch in patches:
1406    if len(patch[1]) > MAX_UPLOAD_SIZE:
1407      print ("Not uploading the patch for " + patch[0] +
1408             " because the file is too large.")
1409      continue
1410    form_fields = [("filename", patch[0])]
1411    if not options.download_base:
1412      form_fields.append(("content_upload", "1"))
1413    files = [("data", "data.diff", patch[1])]
1414    ctype, body = EncodeMultipartFormData(form_fields, files)
1415    url = "/%d/upload_patch/%d" % (int(issue), int(patchset))
1416    print "Uploading patch for " + patch[0]
1417    response_body = rpc_server.Send(url, body, content_type=ctype)
1418    lines = response_body.splitlines()
1419    if not lines or lines[0] != "OK":
1420      StatusUpdate("  --> %s" % response_body)
1421      sys.exit(1)
1422    rv.append([lines[1], patch[0]])
1423  return rv
1424
1425
1426def GuessVCSName():
1427  """Helper to guess the version control system.
1428
1429  This examines the current directory, guesses which VersionControlSystem
1430  we're using, and returns an string indicating which VCS is detected.
1431
1432  Returns:
1433    A pair (vcs, output).  vcs is a string indicating which VCS was detected
1434    and is one of VCS_GIT, VCS_MERCURIAL, VCS_SUBVERSION, or VCS_UNKNOWN.
1435    output is a string containing any interesting output from the vcs
1436    detection routine, or None if there is nothing interesting.
1437  """
1438  # Mercurial has a command to get the base directory of a repository
1439  # Try running it, but don't die if we don't have hg installed.
1440  # NOTE: we try Mercurial first as it can sit on top of an SVN working copy.
1441  try:
1442    out, returncode = RunShellWithReturnCode(["hg", "root"])
1443    if returncode == 0:
1444      return (VCS_MERCURIAL, out.strip())
1445  except OSError, (errno, message):
1446    if errno != 2:  # ENOENT -- they don't have hg installed.
1447      raise
1448
1449  # Subversion has a .svn in all working directories.
1450  if os.path.isdir('.svn'):
1451    logging.info("Guessed VCS = Subversion")
1452    return (VCS_SUBVERSION, None)
1453
1454  # Git has a command to test if you're in a git tree.
1455  # Try running it, but don't die if we don't have git installed.
1456  try:
1457    out, returncode = RunShellWithReturnCode(["git", "rev-parse",
1458                                              "--is-inside-work-tree"])
1459    if returncode == 0:
1460      return (VCS_GIT, None)
1461  except OSError, (errno, message):
1462    if errno != 2:  # ENOENT -- they don't have git installed.
1463      raise
1464
1465  return (VCS_UNKNOWN, None)
1466
1467
1468def GuessVCS(options):
1469  """Helper to guess the version control system.
1470
1471  This verifies any user-specified VersionControlSystem (by command line
1472  or environment variable).  If the user didn't specify one, this examines
1473  the current directory, guesses which VersionControlSystem we're using,
1474  and returns an instance of the appropriate class.  Exit with an error
1475  if we can't figure it out.
1476
1477  Returns:
1478    A VersionControlSystem instance. Exits if the VCS can't be guessed.
1479  """
1480  vcs = options.vcs
1481  if not vcs:
1482    vcs = os.environ.get("CODEREVIEW_VCS")
1483  if vcs:
1484    v = VCS_ABBREVIATIONS.get(vcs.lower())
1485    if v is None:
1486      ErrorExit("Unknown version control system %r specified." % vcs)
1487    (vcs, extra_output) = (v, None)
1488  else:
1489    (vcs, extra_output) = GuessVCSName()
1490
1491  if vcs == VCS_MERCURIAL:
1492    if extra_output is None:
1493      extra_output = RunShell(["hg", "root"]).strip()
1494    return MercurialVCS(options, extra_output)
1495  elif vcs == VCS_SUBVERSION:
1496    return SubversionVCS(options)
1497  elif vcs == VCS_GIT:
1498    return GitVCS(options)
1499
1500  ErrorExit(("Could not guess version control system. "
1501             "Are you in a working copy directory?"))
1502
1503
1504def CheckReviewer(reviewer):
1505  """Validate a reviewer -- either a nickname or an email addres.
1506
1507  Args:
1508    reviewer: A nickname or an email address.
1509
1510  Calls ErrorExit() if it is an invalid email address.
1511  """
1512  if "@" not in reviewer:
1513    return  # Assume nickname
1514  parts = reviewer.split("@")
1515  if len(parts) > 2:
1516    ErrorExit("Invalid email address: %r" % reviewer)
1517  assert len(parts) == 2
1518  if "." not in parts[1]:
1519    ErrorExit("Invalid email address: %r" % reviewer)
1520
1521
1522def LoadSubversionAutoProperties():
1523  """Returns the content of [auto-props] section of Subversion's config file as
1524  a dictionary.
1525
1526  Returns:
1527    A dictionary whose key-value pair corresponds the [auto-props] section's
1528      key-value pair.
1529    In following cases, returns empty dictionary:
1530      - config file doesn't exist, or
1531      - 'enable-auto-props' is not set to 'true-like-value' in [miscellany].
1532  """
1533  if os.name == 'nt':
1534    subversion_config = os.environ.get("APPDATA") + "\\Subversion\\config"
1535  else:
1536    subversion_config = os.path.expanduser("~/.subversion/config")
1537  if not os.path.exists(subversion_config):
1538    return {}
1539  config = ConfigParser.ConfigParser()
1540  config.read(subversion_config)
1541  if (config.has_section("miscellany") and
1542      config.has_option("miscellany", "enable-auto-props") and
1543      config.getboolean("miscellany", "enable-auto-props") and
1544      config.has_section("auto-props")):
1545    props = {}
1546    for file_pattern in config.options("auto-props"):
1547      props[file_pattern] = ParseSubversionPropertyValues(
1548        config.get("auto-props", file_pattern))
1549    return props
1550  else:
1551    return {}
1552
1553def ParseSubversionPropertyValues(props):
1554  """Parse the given property value which comes from [auto-props] section and
1555  returns a list whose element is a (svn_prop_key, svn_prop_value) pair.
1556
1557  See the following doctest for example.
1558
1559  >>> ParseSubversionPropertyValues('svn:eol-style=LF')
1560  [('svn:eol-style', 'LF')]
1561  >>> ParseSubversionPropertyValues('svn:mime-type=image/jpeg')
1562  [('svn:mime-type', 'image/jpeg')]
1563  >>> ParseSubversionPropertyValues('svn:eol-style=LF;svn:executable')
1564  [('svn:eol-style', 'LF'), ('svn:executable', '*')]
1565  """
1566  key_value_pairs = []
1567  for prop in props.split(";"):
1568    key_value = prop.split("=")
1569    assert len(key_value) <= 2
1570    if len(key_value) == 1:
1571      # If value is not given, use '*' as a Subversion's convention.
1572      key_value_pairs.append((key_value[0], "*"))
1573    else:
1574      key_value_pairs.append((key_value[0], key_value[1]))
1575  return key_value_pairs
1576
1577
1578def GetSubversionPropertyChanges(filename):
1579  """Return a Subversion's 'Property changes on ...' string, which is used in
1580  the patch file.
1581
1582  Args:
1583    filename: filename whose property might be set by [auto-props] config.
1584
1585  Returns:
1586    A string like 'Property changes on |filename| ...' if given |filename|
1587      matches any entries in [auto-props] section. None, otherwise.
1588  """
1589  global svn_auto_props_map
1590  if svn_auto_props_map is None:
1591    svn_auto_props_map = LoadSubversionAutoProperties()
1592
1593  all_props = []
1594  for file_pattern, props in svn_auto_props_map.items():
1595    if fnmatch.fnmatch(filename, file_pattern):
1596      all_props.extend(props)
1597  if all_props:
1598    return FormatSubversionPropertyChanges(filename, all_props)
1599  return None
1600
1601
1602def FormatSubversionPropertyChanges(filename, props):
1603  """Returns Subversion's 'Property changes on ...' strings using given filename
1604  and properties.
1605
1606  Args:
1607    filename: filename
1608    props: A list whose element is a (svn_prop_key, svn_prop_value) pair.
1609
1610  Returns:
1611    A string which can be used in the patch file for Subversion.
1612
1613  See the following doctest for example.
1614
1615  >>> print FormatSubversionPropertyChanges('foo.cc', [('svn:eol-style', 'LF')])
1616  Property changes on: foo.cc
1617  ___________________________________________________________________
1618  Added: svn:eol-style
1619     + LF
1620  <BLANKLINE>
1621  """
1622  prop_changes_lines = [
1623    "Property changes on: %s" % filename,
1624    "___________________________________________________________________"]
1625  for key, value in props:
1626    prop_changes_lines.append("Added: " + key)
1627    prop_changes_lines.append("   + " + value)
1628  return "\n".join(prop_changes_lines) + "\n"
1629
1630
1631def RealMain(argv, data=None):
1632  """The real main function.
1633
1634  Args:
1635    argv: Command line arguments.
1636    data: Diff contents. If None (default) the diff is generated by
1637      the VersionControlSystem implementation returned by GuessVCS().
1638
1639  Returns:
1640    A 2-tuple (issue id, patchset id).
1641    The patchset id is None if the base files are not uploaded by this
1642    script (applies only to SVN checkouts).
1643  """
1644  logging.basicConfig(format=("%(asctime).19s %(levelname)s %(filename)s:"
1645                              "%(lineno)s %(message)s "))
1646  os.environ['LC_ALL'] = 'C'
1647  options, args = parser.parse_args(argv[1:])
1648  global verbosity
1649  verbosity = options.verbose
1650  if verbosity >= 3:
1651    logging.getLogger().setLevel(logging.DEBUG)
1652  elif verbosity >= 2:
1653    logging.getLogger().setLevel(logging.INFO)
1654
1655  vcs = GuessVCS(options)
1656
1657  base = options.base_url
1658  if isinstance(vcs, SubversionVCS):
1659    # Guessing the base field is only supported for Subversion.
1660    # Note: Fetching base files may become deprecated in future releases.
1661    guessed_base = vcs.GuessBase(options.download_base)
1662    if base:
1663      if guessed_base and base != guessed_base:
1664        print "Using base URL \"%s\" from --base_url instead of \"%s\"" % \
1665            (base, guessed_base)
1666    else:
1667      base = guessed_base
1668
1669  if not base and options.download_base:
1670    options.download_base = True
1671    logging.info("Enabled upload of base file")
1672  if not options.assume_yes:
1673    vcs.CheckForUnknownFiles()
1674  if data is None:
1675    data = vcs.GenerateDiff(args)
1676  data = vcs.PostProcessDiff(data)
1677  files = vcs.GetBaseFiles(data)
1678  if verbosity >= 1:
1679    print "Upload server:", options.server, "(change with -s/--server)"
1680  if options.issue:
1681    prompt = "Message describing this patch set: "
1682  else:
1683    prompt = "New issue subject: "
1684  message = options.message or raw_input(prompt).strip()
1685  if not message:
1686    ErrorExit("A non-empty message is required")
1687  rpc_server = GetRpcServer(options.server,
1688                            options.email,
1689                            options.host,
1690                            options.save_cookies,
1691                            options.account_type)
1692  form_fields = [("subject", message)]
1693  if base:
1694    form_fields.append(("base", base))
1695  if options.issue:
1696    form_fields.append(("issue", str(options.issue)))
1697  if options.email:
1698    form_fields.append(("user", options.email))
1699  if options.reviewers:
1700    for reviewer in options.reviewers.split(','):
1701      CheckReviewer(reviewer)
1702    form_fields.append(("reviewers", options.reviewers))
1703  if options.cc:
1704    for cc in options.cc.split(','):
1705      CheckReviewer(cc)
1706    form_fields.append(("cc", options.cc))
1707  description = options.description
1708  if options.description_file:
1709    if options.description:
1710      ErrorExit("Can't specify description and description_file")
1711    file = open(options.description_file, 'r')
1712    description = file.read()
1713    file.close()
1714  if description:
1715    form_fields.append(("description", description))
1716  # Send a hash of all the base file so the server can determine if a copy
1717  # already exists in an earlier patchset.
1718  base_hashes = ""
1719  for file, info in files.iteritems():
1720    if not info[0] is None:
1721      checksum = md5(info[0]).hexdigest()
1722      if base_hashes:
1723        base_hashes += "|"
1724      base_hashes += checksum + ":" + file
1725  form_fields.append(("base_hashes", base_hashes))
1726  if options.private:
1727    if options.issue:
1728      print "Warning: Private flag ignored when updating an existing issue."
1729    else:
1730      form_fields.append(("private", "1"))
1731  # If we're uploading base files, don't send the email before the uploads, so
1732  # that it contains the file status.
1733  if options.send_mail and options.download_base:
1734    form_fields.append(("send_mail", "1"))
1735  if not options.download_base:
1736    form_fields.append(("content_upload", "1"))
1737  if len(data) > MAX_UPLOAD_SIZE:
1738    print "Patch is large, so uploading file patches separately."
1739    uploaded_diff_file = []
1740    form_fields.append(("separate_patches", "1"))
1741  else:
1742    uploaded_diff_file = [("data", "data.diff", data)]
1743  ctype, body = EncodeMultipartFormData(form_fields, uploaded_diff_file)
1744  response_body = rpc_server.Send("/upload", body, content_type=ctype)
1745  patchset = None
1746  if not options.download_base or not uploaded_diff_file:
1747    lines = response_body.splitlines()
1748    if len(lines) >= 2:
1749      msg = lines[0]
1750      patchset = lines[1].strip()
1751      patches = [x.split(" ", 1) for x in lines[2:]]
1752    else:
1753      msg = response_body
1754  else:
1755    msg = response_body
1756  StatusUpdate(msg)
1757  if not response_body.startswith("Issue created.") and \
1758  not response_body.startswith("Issue updated."):
1759    sys.exit(0)
1760  issue = msg[msg.rfind("/")+1:]
1761
1762  if not uploaded_diff_file:
1763    result = UploadSeparatePatches(issue, rpc_server, patchset, data, options)
1764    if not options.download_base:
1765      patches = result
1766
1767  if not options.download_base:
1768    vcs.UploadBaseFiles(issue, rpc_server, patches, patchset, options, files)
1769    if options.send_mail:
1770      rpc_server.Send("/" + issue + "/mail", payload="")
1771  return issue, patchset
1772
1773
1774def main():
1775  try:
1776    RealMain(sys.argv)
1777  except KeyboardInterrupt:
1778    print
1779    StatusUpdate("Interrupted.")
1780    sys.exit(1)
1781
1782
1783if __name__ == "__main__":
1784  main()