/upload-diffs.py
Python | 1784 lines | 1695 code | 17 blank | 72 comment | 45 complexity | 1589180e0197a5f44945f980888474be MD5 | raw file
Large files files are truncated, but you can click here to view the full file
1#!/usr/bin/env python 2# 3# Copyright 2007 Google Inc. 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Tool for uploading diffs from a version control system to the codereview app. 18 19Usage summary: upload.py [options] [-- diff_options] [path...] 20 21Diff options are passed to the diff command of the underlying system. 22 23Supported version control systems: 24 Git 25 Mercurial 26 Subversion 27 28It is important for Git/Mercurial users to specify a tree/node/branch to diff 29against by using the '--rev' option. 30""" 31# Taken from rietveld trunk (http://code.google.com/p/rietveld), r579 32 33# This code is derived from appcfg.py in the App Engine SDK (open source), 34# and from ASPN recipe #146306. 35 36import ConfigParser 37import cookielib 38import fnmatch 39import getpass 40import logging 41import mimetypes 42import optparse 43import os 44import re 45import socket 46import subprocess 47import sys 48import urllib 49import urllib2 50import urlparse 51 52# The md5 module was deprecated in Python 2.5. 53try: 54 from hashlib import md5 55except ImportError: 56 from md5 import md5 57 58try: 59 import readline 60except ImportError: 61 pass 62 63try: 64 import keyring 65except ImportError: 66 keyring = None 67 68# Constants for GoogleCL. 69DEFAULT_REVIEWER = 'tom.h.miller@gmail.com' 70DEFAULT_CC = 'googlecl-dev@googlegroups.com' 71 72# The logging verbosity: 73# 0: Errors only. 74# 1: Status messages. 75# 2: Info logs. 76# 3: Debug logs. 77verbosity = 1 78 79# The account type used for authentication. 80# This line could be changed by the review server (see handler for 81# upload.py). 82AUTH_ACCOUNT_TYPE = "GOOGLE" 83 84# URL of the default review server. As for AUTH_ACCOUNT_TYPE, this line could be 85# changed by the review server (see handler for upload.py). 86DEFAULT_REVIEW_SERVER = "codereview.appspot.com" 87 88# Max size of patch or base file. 89MAX_UPLOAD_SIZE = 900 * 1024 90 91# Constants for version control names. Used by GuessVCSName. 92VCS_GIT = "Git" 93VCS_MERCURIAL = "Mercurial" 94VCS_SUBVERSION = "Subversion" 95VCS_UNKNOWN = "Unknown" 96 97# whitelist for non-binary filetypes which do not start with "text/" 98# .mm (Objective-C) shows up as application/x-freemind on my Linux box. 99TEXT_MIMETYPES = ['application/javascript', 'application/x-javascript', 100 'application/xml', 'application/x-freemind', 101 'application/x-sh'] 102 103VCS_ABBREVIATIONS = { 104 VCS_MERCURIAL.lower(): VCS_MERCURIAL, 105 "hg": VCS_MERCURIAL, 106 VCS_SUBVERSION.lower(): VCS_SUBVERSION, 107 "svn": VCS_SUBVERSION, 108 VCS_GIT.lower(): VCS_GIT, 109} 110 111# The result of parsing Subversion's [auto-props] setting. 112svn_auto_props_map = None 113 114def GetEmail(prompt): 115 """Prompts the user for their email address and returns it. 116 117 The last used email address is saved to a file and offered up as a suggestion 118 to the user. If the user presses enter without typing in anything the last 119 used email address is used. If the user enters a new address, it is saved 120 for next time we prompt. 121 122 """ 123 last_email_file_name = os.path.expanduser("~/.last_codereview_email_address") 124 last_email = "" 125 if os.path.exists(last_email_file_name): 126 try: 127 last_email_file = open(last_email_file_name, "r") 128 last_email = last_email_file.readline().strip("\n") 129 last_email_file.close() 130 prompt += " [%s]" % last_email 131 except IOError, e: 132 pass 133 email = raw_input(prompt + ": ").strip() 134 if email: 135 try: 136 last_email_file = open(last_email_file_name, "w") 137 last_email_file.write(email) 138 last_email_file.close() 139 except IOError, e: 140 pass 141 else: 142 email = last_email 143 return email 144 145 146def StatusUpdate(msg): 147 """Print a status message to stdout. 148 149 If 'verbosity' is greater than 0, print the message. 150 151 Args: 152 msg: The string to print. 153 """ 154 if verbosity > 0: 155 print msg 156 157 158def ErrorExit(msg): 159 """Print an error message to stderr and exit.""" 160 print >>sys.stderr, msg 161 sys.exit(1) 162 163 164class ClientLoginError(urllib2.HTTPError): 165 """Raised to indicate there was an error authenticating with ClientLogin.""" 166 167 def __init__(self, url, code, msg, headers, args): 168 urllib2.HTTPError.__init__(self, url, code, msg, headers, None) 169 self.args = args 170 self.reason = args["Error"] 171 172 173class AbstractRpcServer(object): 174 """Provides a common interface for a simple RPC server.""" 175 176 def __init__(self, host, auth_function, host_override=None, extra_headers={}, 177 save_cookies=False, account_type=AUTH_ACCOUNT_TYPE): 178 """Creates a new HttpRpcServer. 179 180 Args: 181 host: The host to send requests to. 182 auth_function: A function that takes no arguments and returns an 183 (email, password) tuple when called. Will be called if authentication 184 is required. 185 host_override: The host header to send to the server (defaults to host). 186 extra_headers: A dict of extra headers to append to every request. 187 save_cookies: If True, save the authentication cookies to local disk. 188 If False, use an in-memory cookiejar instead. Subclasses must 189 implement this functionality. Defaults to False. 190 account_type: Account type used for authentication. Defaults to 191 AUTH_ACCOUNT_TYPE. 192 """ 193 self.host = host 194 if (not self.host.startswith("http://") and 195 not self.host.startswith("https://")): 196 self.host = "http://" + self.host 197 self.host_override = host_override 198 self.auth_function = auth_function 199 self.authenticated = False 200 self.extra_headers = extra_headers 201 self.save_cookies = save_cookies 202 self.account_type = account_type 203 self.opener = self._GetOpener() 204 if self.host_override: 205 logging.info("Server: %s; Host: %s", self.host, self.host_override) 206 else: 207 logging.info("Server: %s", self.host) 208 209 def _GetOpener(self): 210 """Returns an OpenerDirector for making HTTP requests. 211 212 Returns: 213 A urllib2.OpenerDirector object. 214 """ 215 raise NotImplementedError() 216 217 def _CreateRequest(self, url, data=None): 218 """Creates a new urllib request.""" 219 logging.debug("Creating request for: '%s' with payload:\n%s", url, data) 220 req = urllib2.Request(url, data=data) 221 if self.host_override: 222 req.add_header("Host", self.host_override) 223 for key, value in self.extra_headers.iteritems(): 224 req.add_header(key, value) 225 return req 226 227 def _GetAuthToken(self, email, password): 228 """Uses ClientLogin to authenticate the user, returning an auth token. 229 230 Args: 231 email: The user's email address 232 password: The user's password 233 234 Raises: 235 ClientLoginError: If there was an error authenticating with ClientLogin. 236 HTTPError: If there was some other form of HTTP error. 237 238 Returns: 239 The authentication token returned by ClientLogin. 240 """ 241 account_type = self.account_type 242 if self.host.endswith(".google.com"): 243 # Needed for use inside Google. 244 account_type = "HOSTED" 245 req = self._CreateRequest( 246 url="https://www.google.com/accounts/ClientLogin", 247 data=urllib.urlencode({ 248 "Email": email, 249 "Passwd": password, 250 "service": "ah", 251 "source": "rietveld-codereview-upload", 252 "accountType": account_type, 253 }), 254 ) 255 try: 256 response = self.opener.open(req) 257 response_body = response.read() 258 response_dict = dict(x.split("=") 259 for x in response_body.split("\n") if x) 260 return response_dict["Auth"] 261 except urllib2.HTTPError, e: 262 if e.code == 403: 263 body = e.read() 264 response_dict = dict(x.split("=", 1) for x in body.split("\n") if x) 265 raise ClientLoginError(req.get_full_url(), e.code, e.msg, 266 e.headers, response_dict) 267 else: 268 raise 269 270 def _GetAuthCookie(self, auth_token): 271 """Fetches authentication cookies for an authentication token. 272 273 Args: 274 auth_token: The authentication token returned by ClientLogin. 275 276 Raises: 277 HTTPError: If there was an error fetching the authentication cookies. 278 """ 279 # This is a dummy value to allow us to identify when we're successful. 280 continue_location = "http://localhost/" 281 args = {"continue": continue_location, "auth": auth_token} 282 req = self._CreateRequest("%s/_ah/login?%s" % 283 (self.host, urllib.urlencode(args))) 284 try: 285 response = self.opener.open(req) 286 except urllib2.HTTPError, e: 287 response = e 288 if (response.code != 302 or 289 response.info()["location"] != continue_location): 290 raise urllib2.HTTPError(req.get_full_url(), response.code, response.msg, 291 response.headers, response.fp) 292 self.authenticated = True 293 294 def _Authenticate(self): 295 """Authenticates the user. 296 297 The authentication process works as follows: 298 1) We get a username and password from the user 299 2) We use ClientLogin to obtain an AUTH token for the user 300 (see http://code.google.com/apis/accounts/AuthForInstalledApps.html). 301 3) We pass the auth token to /_ah/login on the server to obtain an 302 authentication cookie. If login was successful, it tries to redirect 303 us to the URL we provided. 304 305 If we attempt to access the upload API without first obtaining an 306 authentication cookie, it returns a 401 response (or a 302) and 307 directs us to authenticate ourselves with ClientLogin. 308 """ 309 for i in range(3): 310 credentials = self.auth_function() 311 try: 312 auth_token = self._GetAuthToken(credentials[0], credentials[1]) 313 except ClientLoginError, e: 314 if e.reason == "BadAuthentication": 315 print >>sys.stderr, "Invalid username or password." 316 continue 317 if e.reason == "CaptchaRequired": 318 print >>sys.stderr, ( 319 "Please go to\n" 320 "https://www.google.com/accounts/DisplayUnlockCaptcha\n" 321 "and verify you are a human. Then try again.\n" 322 "If you are using a Google Apps account the URL is:\n" 323 "https://www.google.com/a/yourdomain.com/UnlockCaptcha") 324 break 325 if e.reason == "NotVerified": 326 print >>sys.stderr, "Account not verified." 327 break 328 if e.reason == "TermsNotAgreed": 329 print >>sys.stderr, "User has not agreed to TOS." 330 break 331 if e.reason == "AccountDeleted": 332 print >>sys.stderr, "The user account has been deleted." 333 break 334 if e.reason == "AccountDisabled": 335 print >>sys.stderr, "The user account has been disabled." 336 break 337 if e.reason == "ServiceDisabled": 338 print >>sys.stderr, ("The user's access to the service has been " 339 "disabled.") 340 break 341 if e.reason == "ServiceUnavailable": 342 print >>sys.stderr, "The service is not available; try again later." 343 break 344 raise 345 self._GetAuthCookie(auth_token) 346 return 347 348 def Send(self, request_path, payload=None, 349 content_type="application/octet-stream", 350 timeout=None, 351 extra_headers=None, 352 **kwargs): 353 """Sends an RPC and returns the response. 354 355 Args: 356 request_path: The path to send the request to, eg /api/appversion/create. 357 payload: The body of the request, or None to send an empty request. 358 content_type: The Content-Type header to use. 359 timeout: timeout in seconds; default None i.e. no timeout. 360 (Note: for large requests on OS X, the timeout doesn't work right.) 361 extra_headers: Dict containing additional HTTP headers that should be 362 included in the request (string header names mapped to their values), 363 or None to not include any additional headers. 364 kwargs: Any keyword arguments are converted into query string parameters. 365 366 Returns: 367 The response body, as a string. 368 """ 369 # TODO: Don't require authentication. Let the server say 370 # whether it is necessary. 371 if not self.authenticated: 372 self._Authenticate() 373 374 old_timeout = socket.getdefaulttimeout() 375 socket.setdefaulttimeout(timeout) 376 try: 377 tries = 0 378 while True: 379 tries += 1 380 args = dict(kwargs) 381 url = "%s%s" % (self.host, request_path) 382 if args: 383 url += "?" + urllib.urlencode(args) 384 req = self._CreateRequest(url=url, data=payload) 385 req.add_header("Content-Type", content_type) 386 if extra_headers: 387 for header, value in extra_headers.items(): 388 req.add_header(header, value) 389 try: 390 f = self.opener.open(req) 391 response = f.read() 392 f.close() 393 return response 394 except urllib2.HTTPError, e: 395 if tries > 3: 396 raise 397 elif e.code == 401 or e.code == 302: 398 self._Authenticate() 399## elif e.code >= 500 and e.code < 600: 400## # Server Error - try again. 401## continue 402 else: 403 raise 404 finally: 405 socket.setdefaulttimeout(old_timeout) 406 407 408class HttpRpcServer(AbstractRpcServer): 409 """Provides a simplified RPC-style interface for HTTP requests.""" 410 411 def _Authenticate(self): 412 """Save the cookie jar after authentication.""" 413 super(HttpRpcServer, self)._Authenticate() 414 if self.save_cookies: 415 StatusUpdate("Saving authentication cookies to %s" % self.cookie_file) 416 self.cookie_jar.save() 417 418 def _GetOpener(self): 419 """Returns an OpenerDirector that supports cookies and ignores redirects. 420 421 Returns: 422 A urllib2.OpenerDirector object. 423 """ 424 opener = urllib2.OpenerDirector() 425 opener.add_handler(urllib2.ProxyHandler()) 426 opener.add_handler(urllib2.UnknownHandler()) 427 opener.add_handler(urllib2.HTTPHandler()) 428 opener.add_handler(urllib2.HTTPDefaultErrorHandler()) 429 opener.add_handler(urllib2.HTTPSHandler()) 430 opener.add_handler(urllib2.HTTPErrorProcessor()) 431 if self.save_cookies: 432 self.cookie_file = os.path.expanduser("~/.codereview_upload_cookies") 433 self.cookie_jar = cookielib.MozillaCookieJar(self.cookie_file) 434 if os.path.exists(self.cookie_file): 435 try: 436 self.cookie_jar.load() 437 self.authenticated = True 438 StatusUpdate("Loaded authentication cookies from %s" % 439 self.cookie_file) 440 except (cookielib.LoadError, IOError): 441 # Failed to load cookies - just ignore them. 442 pass 443 else: 444 # Create an empty cookie file with mode 600 445 fd = os.open(self.cookie_file, os.O_CREAT, 0600) 446 os.close(fd) 447 # Always chmod the cookie file 448 os.chmod(self.cookie_file, 0600) 449 else: 450 # Don't save cookies across runs of update.py. 451 self.cookie_jar = cookielib.CookieJar() 452 opener.add_handler(urllib2.HTTPCookieProcessor(self.cookie_jar)) 453 return opener 454 455 456parser = optparse.OptionParser( 457 usage="%prog [options] [-- diff_options] [path...]") 458parser.add_option("-y", "--assume_yes", action="store_true", 459 dest="assume_yes", default=False, 460 help="Assume that the answer to yes/no questions is 'yes'.") 461# Logging 462group = parser.add_option_group("Logging options") 463group.add_option("-q", "--quiet", action="store_const", const=0, 464 dest="verbose", help="Print errors only.") 465group.add_option("-v", "--verbose", action="store_const", const=2, 466 dest="verbose", default=1, 467 help="Print info level logs.") 468group.add_option("--noisy", action="store_const", const=3, 469 dest="verbose", help="Print all logs.") 470# Review server 471group = parser.add_option_group("Review server options") 472group.add_option("-s", "--server", action="store", dest="server", 473 default=DEFAULT_REVIEW_SERVER, 474 metavar="SERVER", 475 help=("The server to upload to. The format is host[:port]. " 476 "Defaults to '%default'.")) 477group.add_option("-e", "--email", action="store", dest="email", 478 metavar="EMAIL", default=None, 479 help="The username to use. Will prompt if omitted.") 480group.add_option("-H", "--host", action="store", dest="host", 481 metavar="HOST", default=None, 482 help="Overrides the Host header sent with all RPCs.") 483group.add_option("--no_cookies", action="store_false", 484 dest="save_cookies", default=True, 485 help="Do not save authentication cookies to local disk.") 486group.add_option("--account_type", action="store", dest="account_type", 487 metavar="TYPE", default=AUTH_ACCOUNT_TYPE, 488 choices=["GOOGLE", "HOSTED"], 489 help=("Override the default account type " 490 "(defaults to '%default', " 491 "valid choices are 'GOOGLE' and 'HOSTED').")) 492# Issue 493group = parser.add_option_group("Issue options") 494group.add_option("-d", "--description", action="store", dest="description", 495 metavar="DESCRIPTION", default=None, 496 help="Optional description when creating an issue.") 497group.add_option("-f", "--description_file", action="store", 498 dest="description_file", metavar="DESCRIPTION_FILE", 499 default=None, 500 help="Optional path of a file that contains " 501 "the description when creating an issue.") 502group.add_option("-r", "--reviewers", action="store", dest="reviewers", 503 metavar="REVIEWERS", default=DEFAULT_REVIEWER, 504 help="Add reviewers (comma separated email addresses).") 505group.add_option("--cc", action="store", dest="cc", 506 metavar="CC", default=DEFAULT_CC, 507 help="Add CC (comma separated email addresses).") 508group.add_option("--private", action="store_true", dest="private", 509 default=False, 510 help="Make the issue restricted to reviewers and those CCed") 511# Upload options 512group = parser.add_option_group("Patch options") 513group.add_option("-m", "--message", action="store", dest="message", 514 metavar="MESSAGE", default=None, 515 help="A message to identify the patch. " 516 "Will prompt if omitted.") 517group.add_option("-i", "--issue", type="int", action="store", 518 metavar="ISSUE", default=None, 519 help="Issue number to which to add. Defaults to new issue.") 520group.add_option("--base_url", action="store", dest="base_url", default=None, 521 help="Base repository URL (listed as \"Base URL\" when " 522 "viewing issue). If omitted, will be guessed automatically " 523 "for SVN repos and left blank for others.") 524group.add_option("--download_base", action="store_true", 525 dest="download_base", default=False, 526 help="Base files will be downloaded by the server " 527 "(side-by-side diffs may not work on files with CRs).") 528group.add_option("--rev", action="store", dest="revision", 529 metavar="REV", default=None, 530 help="Base revision/branch/tree to diff against. Use " 531 "rev1:rev2 range to review already committed changeset.") 532group.add_option("--send_mail", action="store_true", 533 dest="send_mail", default=False, 534 help="Send notification email to reviewers.") 535group.add_option("--vcs", action="store", dest="vcs", 536 metavar="VCS", default=None, 537 help=("Version control system (optional, usually upload.py " 538 "already guesses the right VCS).")) 539group.add_option("--emulate_svn_auto_props", action="store_true", 540 dest="emulate_svn_auto_props", default=False, 541 help=("Emulate Subversion's auto properties feature.")) 542 543 544def GetRpcServer(server, email=None, host_override=None, save_cookies=True, 545 account_type=AUTH_ACCOUNT_TYPE): 546 """Returns an instance of an AbstractRpcServer. 547 548 Args: 549 server: String containing the review server URL. 550 email: String containing user's email address. 551 host_override: If not None, string containing an alternate hostname to use 552 in the host header. 553 save_cookies: Whether authentication cookies should be saved to disk. 554 account_type: Account type for authentication, either 'GOOGLE' 555 or 'HOSTED'. Defaults to AUTH_ACCOUNT_TYPE. 556 557 Returns: 558 A new AbstractRpcServer, on which RPC calls can be made. 559 """ 560 561 rpc_server_class = HttpRpcServer 562 563 # If this is the dev_appserver, use fake authentication. 564 host = (host_override or server).lower() 565 if host == "localhost" or host.startswith("localhost:"): 566 if email is None: 567 email = "test@example.com" 568 logging.info("Using debug user %s. Override with --email" % email) 569 server = rpc_server_class( 570 server, 571 lambda: (email, "password"), 572 host_override=host_override, 573 extra_headers={"Cookie": 574 'dev_appserver_login="%s:False"' % email}, 575 save_cookies=save_cookies, 576 account_type=account_type) 577 # Don't try to talk to ClientLogin. 578 server.authenticated = True 579 return server 580 581 def GetUserCredentials(): 582 """Prompts the user for a username and password.""" 583 # Create a local alias to the email variable to avoid Python's crazy 584 # scoping rules. 585 local_email = email 586 if local_email is None: 587 local_email = GetEmail("Email (login for uploading to %s)" % server) 588 password = None 589 if keyring: 590 password = keyring.get_password(host, local_email) 591 if password is not None: 592 print "Using password from system keyring." 593 else: 594 password = getpass.getpass("Password for %s: " % local_email) 595 if keyring: 596 answer = raw_input("Store password in system keyring?(y/N) ").strip() 597 if answer == "y": 598 keyring.set_password(host, local_email, password) 599 return (local_email, password) 600 601 return rpc_server_class(server, 602 GetUserCredentials, 603 host_override=host_override, 604 save_cookies=save_cookies) 605 606 607def EncodeMultipartFormData(fields, files): 608 """Encode form fields for multipart/form-data. 609 610 Args: 611 fields: A sequence of (name, value) elements for regular form fields. 612 files: A sequence of (name, filename, value) elements for data to be 613 uploaded as files. 614 Returns: 615 (content_type, body) ready for httplib.HTTP instance. 616 617 Source: 618 http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 619 """ 620 BOUNDARY = '-M-A-G-I-C---B-O-U-N-D-A-R-Y-' 621 CRLF = '\r\n' 622 lines = [] 623 for (key, value) in fields: 624 lines.append('--' + BOUNDARY) 625 lines.append('Content-Disposition: form-data; name="%s"' % key) 626 lines.append('') 627 if isinstance(value, unicode): 628 value = value.encode('utf-8') 629 lines.append(value) 630 for (key, filename, value) in files: 631 lines.append('--' + BOUNDARY) 632 lines.append('Content-Disposition: form-data; name="%s"; filename="%s"' % 633 (key, filename)) 634 lines.append('Content-Type: %s' % GetContentType(filename)) 635 lines.append('') 636 if isinstance(value, unicode): 637 value = value.encode('utf-8') 638 lines.append(value) 639 lines.append('--' + BOUNDARY + '--') 640 lines.append('') 641 body = CRLF.join(lines) 642 content_type = 'multipart/form-data; boundary=%s' % BOUNDARY 643 return content_type, body 644 645 646def GetContentType(filename): 647 """Helper to guess the content-type from the filename.""" 648 return mimetypes.guess_type(filename)[0] or 'application/octet-stream' 649 650 651# Use a shell for subcommands on Windows to get a PATH search. 652use_shell = sys.platform.startswith("win") 653 654def RunShellWithReturnCode(command, print_output=False, 655 universal_newlines=True, 656 env=os.environ): 657 """Executes a command and returns the output from stdout and the return code. 658 659 Args: 660 command: Command to execute. 661 print_output: If True, the output is printed to stdout. 662 If False, both stdout and stderr are ignored. 663 universal_newlines: Use universal_newlines flag (default: True). 664 665 Returns: 666 Tuple (output, return code) 667 """ 668 logging.info("Running %s", command) 669 p = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, 670 shell=use_shell, universal_newlines=universal_newlines, 671 env=env) 672 if print_output: 673 output_array = [] 674 while True: 675 line = p.stdout.readline() 676 if not line: 677 break 678 print line.strip("\n") 679 output_array.append(line) 680 output = "".join(output_array) 681 else: 682 output = p.stdout.read() 683 p.wait() 684 errout = p.stderr.read() 685 if print_output and errout: 686 print >>sys.stderr, errout 687 p.stdout.close() 688 p.stderr.close() 689 return output, p.returncode 690 691 692def RunShell(command, silent_ok=False, universal_newlines=True, 693 print_output=False, env=os.environ): 694 data, retcode = RunShellWithReturnCode(command, print_output, 695 universal_newlines, env) 696 if retcode: 697 ErrorExit("Got error status from %s:\n%s" % (command, data)) 698 if not silent_ok and not data: 699 ErrorExit("No output from %s" % command) 700 return data 701 702 703class VersionControlSystem(object): 704 """Abstract base class providing an interface to the VCS.""" 705 706 def __init__(self, options): 707 """Constructor. 708 709 Args: 710 options: Command line options. 711 """ 712 self.options = options 713 714 def PostProcessDiff(self, diff): 715 """Return the diff with any special post processing this VCS needs, e.g. 716 to include an svn-style "Index:".""" 717 return diff 718 719 def GenerateDiff(self, args): 720 """Return the current diff as a string. 721 722 Args: 723 args: Extra arguments to pass to the diff command. 724 """ 725 raise NotImplementedError( 726 "abstract method -- subclass %s must override" % self.__class__) 727 728 def GetUnknownFiles(self): 729 """Return a list of files unknown to the VCS.""" 730 raise NotImplementedError( 731 "abstract method -- subclass %s must override" % self.__class__) 732 733 def CheckForUnknownFiles(self): 734 """Show an "are you sure?" prompt if there are unknown files.""" 735 unknown_files = self.GetUnknownFiles() 736 if unknown_files: 737 print "The following files are not added to version control:" 738 for line in unknown_files: 739 print line 740 prompt = "Are you sure to continue?(y/N) " 741 answer = raw_input(prompt).strip() 742 if answer != "y": 743 ErrorExit("User aborted") 744 745 def GetBaseFile(self, filename): 746 """Get the content of the upstream version of a file. 747 748 Returns: 749 A tuple (base_content, new_content, is_binary, status) 750 base_content: The contents of the base file. 751 new_content: For text files, this is empty. For binary files, this is 752 the contents of the new file, since the diff output won't contain 753 information to reconstruct the current file. 754 is_binary: True iff the file is binary. 755 status: The status of the file. 756 """ 757 758 raise NotImplementedError( 759 "abstract method -- subclass %s must override" % self.__class__) 760 761 762 def GetBaseFiles(self, diff): 763 """Helper that calls GetBase file for each file in the patch. 764 765 Returns: 766 A dictionary that maps from filename to GetBaseFile's tuple. Filenames 767 are retrieved based on lines that start with "Index:" or 768 "Property changes on:". 769 """ 770 files = {} 771 for line in diff.splitlines(True): 772 if line.startswith('Index:') or line.startswith('Property changes on:'): 773 unused, filename = line.split(':', 1) 774 # On Windows if a file has property changes its filename uses '\' 775 # instead of '/'. 776 filename = filename.strip().replace('\\', '/') 777 files[filename] = self.GetBaseFile(filename) 778 return files 779 780 781 def UploadBaseFiles(self, issue, rpc_server, patch_list, patchset, options, 782 files): 783 """Uploads the base files (and if necessary, the current ones as well).""" 784 785 def UploadFile(filename, file_id, content, is_binary, status, is_base): 786 """Uploads a file to the server.""" 787 file_too_large = False 788 if is_base: 789 type = "base" 790 else: 791 type = "current" 792 if len(content) > MAX_UPLOAD_SIZE: 793 print ("Not uploading the %s file for %s because it's too large." % 794 (type, filename)) 795 file_too_large = True 796 content = "" 797 checksum = md5(content).hexdigest() 798 if options.verbose > 0 and not file_too_large: 799 print "Uploading %s file for %s" % (type, filename) 800 url = "/%d/upload_content/%d/%d" % (int(issue), int(patchset), file_id) 801 form_fields = [("filename", filename), 802 ("status", status), 803 ("checksum", checksum), 804 ("is_binary", str(is_binary)), 805 ("is_current", str(not is_base)), 806 ] 807 if file_too_large: 808 form_fields.append(("file_too_large", "1")) 809 if options.email: 810 form_fields.append(("user", options.email)) 811 ctype, body = EncodeMultipartFormData(form_fields, 812 [("data", filename, content)]) 813 response_body = rpc_server.Send(url, body, 814 content_type=ctype) 815 if not response_body.startswith("OK"): 816 StatusUpdate(" --> %s" % response_body) 817 sys.exit(1) 818 819 patches = dict() 820 [patches.setdefault(v, k) for k, v in patch_list] 821 for filename in patches.keys(): 822 base_content, new_content, is_binary, status = files[filename] 823 file_id_str = patches.get(filename) 824 if file_id_str.find("nobase") != -1: 825 base_content = None 826 file_id_str = file_id_str[file_id_str.rfind("_") + 1:] 827 file_id = int(file_id_str) 828 if base_content != None: 829 UploadFile(filename, file_id, base_content, is_binary, status, True) 830 if new_content != None: 831 UploadFile(filename, file_id, new_content, is_binary, status, False) 832 833 def IsImage(self, filename): 834 """Returns true if the filename has an image extension.""" 835 mimetype = mimetypes.guess_type(filename)[0] 836 if not mimetype: 837 return False 838 return mimetype.startswith("image/") 839 840 def IsBinary(self, filename): 841 """Returns true if the guessed mimetyped isnt't in text group.""" 842 mimetype = mimetypes.guess_type(filename)[0] 843 if not mimetype: 844 return False # e.g. README, "real" binaries usually have an extension 845 # special case for text files which don't start with text/ 846 if mimetype in TEXT_MIMETYPES: 847 return False 848 return not mimetype.startswith("text/") 849 850 851class SubversionVCS(VersionControlSystem): 852 """Implementation of the VersionControlSystem interface for Subversion.""" 853 854 def __init__(self, options): 855 super(SubversionVCS, self).__init__(options) 856 if self.options.revision: 857 match = re.match(r"(\d+)(:(\d+))?", self.options.revision) 858 if not match: 859 ErrorExit("Invalid Subversion revision %s." % self.options.revision) 860 self.rev_start = match.group(1) 861 self.rev_end = match.group(3) 862 else: 863 self.rev_start = self.rev_end = None 864 # Cache output from "svn list -r REVNO dirname". 865 # Keys: dirname, Values: 2-tuple (ouput for start rev and end rev). 866 self.svnls_cache = {} 867 # Base URL is required to fetch files deleted in an older revision. 868 # Result is cached to not guess it over and over again in GetBaseFile(). 869 required = self.options.download_base or self.options.revision is not None 870 self.svn_base = self._GuessBase(required) 871 872 def GuessBase(self, required): 873 """Wrapper for _GuessBase.""" 874 return self.svn_base 875 876 def _GuessBase(self, required): 877 """Returns the SVN base URL. 878 879 Args: 880 required: If true, exits if the url can't be guessed, otherwise None is 881 returned. 882 """ 883 info = RunShell(["svn", "info"]) 884 for line in info.splitlines(): 885 words = line.split() 886 if len(words) == 2 and words[0] == "URL:": 887 url = words[1] 888 scheme, netloc, path, params, query, fragment = urlparse.urlparse(url) 889 username, netloc = urllib.splituser(netloc) 890 if username: 891 logging.info("Removed username from base URL") 892 if netloc.endswith("svn.python.org"): 893 if netloc == "svn.python.org": 894 if path.startswith("/projects/"): 895 path = path[9:] 896 elif netloc != "pythondev@svn.python.org": 897 ErrorExit("Unrecognized Python URL: %s" % url) 898 base = "http://svn.python.org/view/*checkout*%s/" % path 899 logging.info("Guessed Python base = %s", base) 900 elif netloc.endswith("svn.collab.net"): 901 if path.startswith("/repos/"): 902 path = path[6:] 903 base = "http://svn.collab.net/viewvc/*checkout*%s/" % path 904 logging.info("Guessed CollabNet base = %s", base) 905 elif netloc.endswith(".googlecode.com"): 906 path = path + "/" 907 base = urlparse.urlunparse(("http", netloc, path, params, 908 query, fragment)) 909 logging.info("Guessed Google Code base = %s", base) 910 else: 911 path = path + "/" 912 base = urlparse.urlunparse((scheme, netloc, path, params, 913 query, fragment)) 914 logging.info("Guessed base = %s", base) 915 return base 916 if required: 917 ErrorExit("Can't find URL in output from svn info") 918 return None 919 920 def GenerateDiff(self, args): 921 cmd = ["svn", "diff"] 922 if self.options.revision: 923 cmd += ["-r", self.options.revision] 924 cmd.extend(args) 925 data = RunShell(cmd) 926 count = 0 927 for line in data.splitlines(): 928 if line.startswith("Index:") or line.startswith("Property changes on:"): 929 count += 1 930 logging.info(line) 931 if not count: 932 ErrorExit("No valid patches found in output from svn diff") 933 return data 934 935 def _CollapseKeywords(self, content, keyword_str): 936 """Collapses SVN keywords.""" 937 # svn cat translates keywords but svn diff doesn't. As a result of this 938 # behavior patching.PatchChunks() fails with a chunk mismatch error. 939 # This part was originally written by the Review Board development team 940 # who had the same problem (http://reviews.review-board.org/r/276/). 941 # Mapping of keywords to known aliases 942 svn_keywords = { 943 # Standard keywords 944 'Date': ['Date', 'LastChangedDate'], 945 'Revision': ['Revision', 'LastChangedRevision', 'Rev'], 946 'Author': ['Author', 'LastChangedBy'], 947 'HeadURL': ['HeadURL', 'URL'], 948 'Id': ['Id'], 949 950 # Aliases 951 'LastChangedDate': ['LastChangedDate', 'Date'], 952 'LastChangedRevision': ['LastChangedRevision', 'Rev', 'Revision'], 953 'LastChangedBy': ['LastChangedBy', 'Author'], 954 'URL': ['URL', 'HeadURL'], 955 } 956 957 def repl(m): 958 if m.group(2): 959 return "$%s::%s$" % (m.group(1), " " * len(m.group(3))) 960 return "$%s$" % m.group(1) 961 keywords = [keyword 962 for name in keyword_str.split(" ") 963 for keyword in svn_keywords.get(name, [])] 964 return re.sub(r"\$(%s):(:?)([^\$]+)\$" % '|'.join(keywords), repl, content) 965 966 def GetUnknownFiles(self): 967 status = RunShell(["svn", "status", "--ignore-externals"], silent_ok=True) 968 unknown_files = [] 969 for line in status.split("\n"): 970 if line and line[0] == "?": 971 unknown_files.append(line) 972 return unknown_files 973 974 def ReadFile(self, filename): 975 """Returns the contents of a file.""" 976 file = open(filename, 'rb') 977 result = "" 978 try: 979 result = file.read() 980 finally: 981 file.close() 982 return result 983 984 def GetStatus(self, filename): 985 """Returns the status of a file.""" 986 if not self.options.revision: 987 status = RunShell(["svn", "status", "--ignore-externals", filename]) 988 if not status: 989 ErrorExit("svn status returned no output for %s" % filename) 990 status_lines = status.splitlines() 991 # If file is in a cl, the output will begin with 992 # "\n--- Changelist 'cl_name':\n". See 993 # http://svn.collab.net/repos/svn/trunk/notes/changelist-design.txt 994 if (len(status_lines) == 3 and 995 not status_lines[0] and 996 status_lines[1].startswith("--- Changelist")): 997 status = status_lines[2] 998 else: 999 status = status_lines[0] 1000 # If we have a revision to diff against we need to run "svn list" 1001 # for the old and the new revision and compare the results to get 1002 # the correct status for a file. 1003 else: 1004 dirname, relfilename = os.path.split(filename) 1005 if dirname not in self.svnls_cache: 1006 cmd = ["svn", "list", "-r", self.rev_start, dirname or "."] 1007 out, returncode = RunShellWithReturnCode(cmd) 1008 if returncode: 1009 ErrorExit("Failed to get status for %s." % filename) 1010 old_files = out.splitlines() 1011 args = ["svn", "list"] 1012 if self.rev_end: 1013 args += ["-r", self.rev_end] 1014 cmd = args + [dirname or "."] 1015 out, returncode = RunShellWithReturnCode(cmd) 1016 if returncode: 1017 ErrorExit("Failed to run command %s" % cmd) 1018 self.svnls_cache[dirname] = (old_files, out.splitlines()) 1019 old_files, new_files = self.svnls_cache[dirname] 1020 if relfilename in old_files and relfilename not in new_files: 1021 status = "D " 1022 elif relfilename in old_files and relfilename in new_files: 1023 status = "M " 1024 else: 1025 status = "A " 1026 return status 1027 1028 def GetBaseFile(self, filename): 1029 status = self.GetStatus(filename) 1030 base_content = None 1031 new_content = None 1032 1033 # If a file is copied its status will be "A +", which signifies 1034 # "addition-with-history". See "svn st" for more information. We need to 1035 # upload the original file or else diff parsing will fail if the file was 1036 # edited. 1037 if status[0] == "A" and status[3] != "+": 1038 # We'll need to upload the new content if we're adding a binary file 1039 # since diff's output won't contain it. 1040 mimetype = RunShell(["svn", "propget", "svn:mime-type", filename], 1041 silent_ok=True) 1042 base_content = "" 1043 is_binary = bool(mimetype) and not mimetype.startswith("text/") 1044 if is_binary and self.IsImage(filename): 1045 new_content = self.ReadFile(filename) 1046 elif (status[0] in ("M", "D", "R") or 1047 (status[0] == "A" and status[3] == "+") or # Copied file. 1048 (status[0] == " " and status[1] == "M")): # Property change. 1049 args = [] 1050 if self.options.revision: 1051 url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) 1052 else: 1053 # Don't change filename, it's needed later. 1054 url = filename 1055 args += ["-r", "BASE"] 1056 cmd = ["svn"] + args + ["propget", "svn:mime-type", url] 1057 mimetype, returncode = RunShellWithReturnCode(cmd) 1058 if returncode: 1059 # File does not exist in the requested revision. 1060 # Reset mimetype, it contains an error message. 1061 mimetype = "" 1062 get_base = False 1063 is_binary = bool(mimetype) and not mimetype.startswith("text/") 1064 if status[0] == " ": 1065 # Empty base content just to force an upload. 1066 base_content = "" 1067 elif is_binary: 1068 if self.IsImage(filename): 1069 get_base = True 1070 if status[0] == "M": 1071 if not self.rev_end: 1072 new_content = self.ReadFile(filename) 1073 else: 1074 url = "%s/%s@%s" % (self.svn_base, filename, self.rev_end) 1075 new_content = RunShell(["svn", "cat", url], 1076 universal_newlines=True, silent_ok=True) 1077 else: 1078 base_content = "" 1079 else: 1080 get_base = True 1081 1082 if get_base: 1083 if is_binary: 1084 universal_newlines = False 1085 else: 1086 universal_newlines = True 1087 if self.rev_start: 1088 # "svn cat -r REV delete_file.txt" doesn't work. cat requires 1089 # the full URL with "@REV" appended instead of using "-r" option. 1090 url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) 1091 base_content = RunShell(["svn", "cat", url], 1092 universal_newlines=universal_newlines, 1093 silent_ok=True) 1094 else: 1095 base_content, ret_code = RunShellWithReturnCode( 1096 ["svn", "cat", filename], universal_newlines=universal_newlines) 1097 if ret_code and status[0] == "R": 1098 # It's a replaced file without local history (see issue208). 1099 # The base file needs to be fetched from the server. 1100 url = "%s/%s" % (self.svn_base, filename) 1101 base_content = RunShell(["svn", "cat", url], 1102 universal_newlines=universal_newlines, 1103 silent_ok=True) 1104 elif ret_code: 1105 ErrorExit("Got error status from 'svn cat %s'" % filename) 1106 if not is_binary: 1107 args = [] 1108 if self.rev_start: 1109 url = "%s/%s@%s" % (self.svn_base, filename, self.rev_start) 1110 else: 1111 url = filename 1112 args += ["-r", "BASE"] 1113 cmd = ["svn"] + args + ["propget", "svn:keywords", url] 1114 keywords, returncode = RunShellWithReturnCode(cmd) 1115 if keywords and not returncode: 1116 base_content = self._CollapseKeywords(base_content, keywords) 1117 else: 1118 StatusUpdate("svn status returned unexpected output: %s" % status) 1119 sys.exit(1) 1120 return base_content, new_content, is_binary, status[0:5] 1121 1122 1123class GitVCS(VersionControlSystem): 1124 """Implementation of the VersionControlSystem interface for Git.""" 1125 1126 def __init__(self, options): 1127 super(GitVCS, self).__init__(options) 1128 # Map of filename -> (hash before, hash after) of base file. 1129 # Hashes for "no such file" are represented as None. 1130 self.hashes = {} 1131 # Map of new filename -> old filename for renames. 1132 self.renames = {} 1133 1134 def PostProcessDiff(self, gitdiff): 1135 """Converts the diff output to include an svn-style "Index:" line as well 1136 as record the hashes of the files, so we can upload them along with our 1137 diff.""" 1138 # Special used by git to indicate "no such content". 1139 NULL_HASH = "0"*40 1140 1141 def IsFileNew(filename): 1142 return filename in self.hashes and self.hashes[filename][0] is None 1143 1144 def AddSubversionPropertyChange(filename): 1145 """Add svn's property change information into the patch if given file is 1146 new file. 1147 1148 We use Subversion's auto-props setting to retrieve its property. 1149 See http://svnbook.red-bean.com/en/1.1/ch07.html#svn-ch-7-sect-1.3.2 for 1150 Subversion's [auto-props] setting. 1151 """ 1152 if self.options.emulate_svn_auto_props and IsFileNew(filename): 1153 svnprops = GetSubversionPropertyChanges(filename) 1154 if svnprops: 1155 svndiff.append("\n" + svnprops + "\n") 1156 1157 svndiff = [] 1158 filecount = 0 1159 filename = None 1160 for line in gitdiff.splitlines(): 1161 match = re.match(r"diff --git a/(.*) b/(.*)$", line) 1162 if match: 1163 # Add auto property here for previously seen file. 1164 if filename is not None: 1165 AddSubversionPropertyChange(filename) 1166 filecount += 1 1167 # Intentionally use the "after" filename so we can show renames. 1168 filename = match.group(2) 1169 svndiff.append("Index: %s\n" % filename) 1170 if match.group(1) != match.group(2): 1171 self.renames[match.group(2)] = match.group(1) 1172 else: 1173 # The "index" line in a git diff looks like this (long hashes elided): 1174 # index 82c0d44..b2cee3f 100755 1175 # We want to save the left hash, as that identifies the base file. 1176 match = re.match(r"index (\w+)\.\.(\w+)", line) 1177 if match: 1178 before, after = (match.group(1), match.group(2)) 1179 if before == NULL_HASH: 1180 before = None 1181 if after == NULL_HASH: 1182 after = None 1183 self.hashes[filename] = (before, after) 1184 svndiff.append(line + "\n") 1185 if not filecount: 1186 ErrorExit("No valid patches found in output from git diff") 1187 # Add auto property for the last seen file. 1188 assert filename is not None 1189 AddSubversionPropertyChange(filename) 1190 return "".join(svndiff) 1191 1192 def GenerateDiff(self, extra_args): 1193 extra_args = extra_args[:] 1194 if self.options.revision: 1195 if ":" in self.options.revision: 1196 extra_args = self.options.revision.split(":", 1) + extra_args 1197 else: 1198 extra_args = [self.options.revision] + extra_args 1199 1200 # --no-ext-diff is broken in some versions of Git, so try to work around 1201 # this by overriding the environment (but there is still a problem if the 1202 # git config key "diff.external" is used). 1203 env = os.environ.copy() 1204 if 'GIT_EXTERNAL_DIFF' in env: del env['GIT_EXTERNAL_DIFF'] 1205 return RunShell(["git", "diff", "--no-ext-diff", "--full-index", "-M"] 1206 + extra_args, env=env) 1207 1208 def GetUnknownFiles(self): 1209 status = RunShell(["git", "ls-files", "--exclude-standard", "--others"], 1210 silent_ok=True) 1211 return status.splitlines() 1212 1213 def GetFileContent(self, file_hash, is_binary): 1214 """Returns the content of a file identified by its git hash.""" 1215 data, retcode = RunShellWithReturnCode(["git", "show", file_hash], 1216 universal_newlines=not is_binary) 1217 if retcode: 1218 ErrorExit("Got error status from 'git show %s'" % file_hash) 1219 return data 1220 1221 def GetBaseFile(self, filename): 1222 hash_before, hash_after = self.hashes.get(filename, (None,None)) 1223 base_content = None 1224 new_content = None 1225 is_binary = self.IsBinary(filename) 1226 status = None 1227 1228 if filename in self.renames: 1229 status = "A +" # Match svn attribute name for renames. 1230 if filename not in self.hashes: 1231 # If a rename doesn't change the content, we never get a hash. 1232 base_content = RunShell(["git", "show", "HEAD:" + filename]) 1233 elif not hash_before: 1234 status = "A" 1235 base_content = "" 1236 elif not hash_after: 1237 status = "D" 1238 else: 1239 status = "M" 1240 1241 is_image = self.IsImage(filename) 1242 1243 # Grab the before/after content if we need it. 1244 # We should include file contents if it's text or it's an image. 1245 if not is_binary or is_image: 1246 # Grab the base content if we don't have it already. 1247 if base_content is None and hash_before: 1248 base_content = self.GetFileContent(hash_before, is_binary) 1249 # Only include the "after" file if it's an image; otherwise it 1250 # it is reconstructed from the diff. 1251 if is_image and hash_after: 1252 new_content = self.GetFileContent(hash_after, is_binary) 1253 1254 return (base_content, new_content, is_binary, status) 1255 1256 1257class MercurialVCS(VersionControlSystem): 1258 """Implementation of the VersionControlSystem interface for Mercurial.""" 1259 1260 def __init__(self, options, repo_dir): 1261 super(MercurialVCS, self).__init__(options) 1262 # Absolute path to repository (we can be in a subdir) 1263 self.repo_dir = os.path.normpath(repo_dir) 1264 # Compute the subdir 1265 cwd = os.path.normpath(os.getcwd()) 1266 assert cwd.startswith(self.repo_dir) 1267 self.subdir = cwd[len(self.repo_dir):].lstrip(r"\/") 1268 if self.options.revision: 1269 self.base_rev = self.options.revision 1270 else: 1271 self.base_rev = RunShell(["hg", "parent", "-q"]).split(':')[1].strip() 1272 1273 def _GetRelPath(self, filename): 1274 """Get relative path of a file according to the current directory, 1275 given its logical path in the repo.""" 1276 assert filename.startswith(self.subdir), (filename, self.subdir) 1277 return filename[len(self.subdir):].lstrip(r"\/") 1278 1279 def GenerateDiff(self, extra_args): 1280 cmd = ["hg", "diff", "--git", "-r", self.base_rev] + extra_args 1281 data = RunShell(cmd, silent_ok=True) 1282 svndiff = [] 1283 filecount = 0 1284 for line in data.splitlines(): 1285 m = re.match("diff --git a/(\S+) b/(\S+)", line) 1286 if m: 1287 # Modify line to make it look like as it comes from svn diff. 1288 # With this modification no changes on the server side are required 1289 # to make upload.py work with Mercurial repos. 1290 # NOTE: for proper handling of moved/copied files, we have to use 1291 # the second filename. 1292 filename = m.group(2) 1293 svndiff.append("Index: %s" % filename) 1294 svndiff.append("=" * 67) 1295 filecount += 1 1296 logging.info(line) 1297 else: 1298 svndiff.append(line) 1299 if not filecount: 1300 ErrorExit("No valid patches found in output from hg diff") 1301 return "\n".join(svndiff) + "\n" 1302 1303 def GetUnknownFiles(self): 1304 """Return a list of files unknown to the VCS.""" 1305 args = [] 1306 status = RunShell(["hg", "status", "--rev", self.base_rev, "-u", "."], 1307 silent_ok=True) 1308 unknown_files = [] 1309 for line in status.splitlines(): 1310 st, fn = line.split(" ", 1) 1311 if st == "?": 1312 unknown_files.append(fn) 1313 return unknown_files 1314 1315 def GetBaseFile(self, filename): 1316 # "hg status" and "hg cat" both take a path relative to the current subdir 1317 # rather than to the repo root, but "hg diff" has given us the full path 1318 # to the repo root. 1319 base_content = "" 1320 new_content = None 1321 is_binary = False 1322 oldrelpath = relpath = self._GetRelPath(filename) 1323 # "hg status -…
Large files files are truncated, but you can click here to view the full file