PageRenderTime 647ms CodeModel.GetById 211ms app.highlight 173ms RepoModel.GetById 256ms app.codeStats 0ms

/patch.py

https://bitbucket.org/jpellerin/nose/
Python | 639 lines | 547 code | 43 blank | 49 comment | 5 complexity | 2bb2d7fee63254d6bfb4e0d9d91720d2 MD5 | raw file
  1""" Patch utility to apply unified diffs
  2
  3    Brute-force line-by-line non-recursive parsing 
  4
  5    Copyright (c) 2008-2010 anatoly techtonik
  6    Available under the terms of MIT license
  7
  8    NOTE: This version has been patched by Alex Stewart <alex@foogod.com> for
  9    Python 3.x support and other misc fixups.
 10
 11    Project home: http://code.google.com/p/python-patch/
 12
 13
 14    $Id: patch.py 92 2010-07-02 06:04:57Z techtonik $
 15    $HeadURL: http://python-patch.googlecode.com/svn/trunk/patch.py $
 16"""
 17
 18__author__ = "techtonik.rainforce.org"
 19__version__ = "10.04-2.pAS1"
 20
 21import copy
 22import logging
 23import re
 24from logging import debug, info, warning
 25import sys
 26
 27try:
 28  # cStringIO doesn't support unicode in 2.5
 29  from StringIO import StringIO
 30except ImportError:
 31  # StringIO has been renamed to 'io' in 3.x
 32  from io import StringIO
 33
 34from os.path import exists, isfile, abspath
 35from os import unlink
 36
 37_open = open
 38
 39if sys.version_info >= (3,):
 40    # Open files with universal newline support but no newline translation (3.x)
 41    def open(filename, mode='r'):
 42        return _open(filename, mode, newline='')
 43else:
 44    # Open files with universal newline support but no newline translation (2.x)
 45    def open(filename, mode='r'):
 46        return _open(filename, mode + 'b')
 47
 48    # Python 3.x has changed iter.next() to be next(iter) instead, so for
 49    # backwards compatibility, we'll just define a next() function under 2.x
 50    def next(iter):
 51        return iter.next()
 52
 53
 54#------------------------------------------------
 55# Logging is controlled by "python_patch" logger
 56
 57debugmode = False
 58
 59logger = logging.getLogger("python_patch")
 60loghandler = logging.StreamHandler()
 61logger.addHandler(loghandler)
 62
 63debug = logger.debug
 64info = logger.info
 65warning = logger.warning
 66
 67# If called as a library, don't log info/debug messages by default.
 68logger.setLevel(logging.WARN)
 69
 70#------------------------------------------------
 71
 72# constants for patch types
 73
 74DIFF = PLAIN = "plain"
 75HG = MERCURIAL = "mercurial"
 76SVN = SUBVERSION = "svn"
 77
 78
 79def fromfile(filename):
 80  """ Parse patch file and return Patch() object
 81  """
 82  info("reading patch from file %s" % filename)
 83  fp = open(filename, "r")
 84  patch = Patch(fp)
 85  fp.close()
 86  return patch
 87
 88
 89def fromstring(s):
 90  """ Parse text string and return Patch() object
 91  """
 92  return Patch( StringIO(s) )
 93
 94
 95
 96class HunkInfo(object):
 97  """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
 98
 99  def __init__(self):
100    self.startsrc=None #: line count starts with 1
101    self.linessrc=None
102    self.starttgt=None
103    self.linestgt=None
104    self.invalid=False
105    self.text=[]
106
107  def copy(self):
108    return copy.copy(self)
109
110#  def apply(self, estream):
111#    """ write hunk data into enumerable stream
112#        return strings one by one until hunk is
113#        over
114#
115#        enumerable stream are tuples (lineno, line)
116#        where lineno starts with 0
117#    """
118#    pass
119
120
121
122class Patch(object):
123
124  def __init__(self, stream=None):
125
126    # define Patch data members
127    # table with a row for every source file
128
129    #: list of source filenames
130    self.source=None
131    self.target=None
132    #: list of lists of hunks
133    self.hunks=None
134    #: file endings statistics for every hunk
135    self.hunkends=None
136    #: headers for each file
137    self.header=None
138
139    #: patch type - one of constants
140    self.type = None
141
142    if stream:
143      self.parse(stream)
144
145  def copy(self):
146    return copy.copy(self)
147
148  def parse(self, stream):
149    """ parse unified diff """
150    self.header = []
151
152    self.source = []
153    self.target = []
154    self.hunks = []
155    self.hunkends = []
156
157    # define possible file regions that will direct the parser flow
158    headscan  = False # scanning header before the patch body
159    filenames = False # lines starting with --- and +++
160
161    hunkhead = False  # @@ -R +R @@ sequence
162    hunkbody = False  #
163    hunkskip = False  # skipping invalid hunk mode
164
165    headscan = True
166    lineends = dict(lf=0, crlf=0, cr=0)
167    nextfileno = 0
168    nexthunkno = 0    #: even if index starts with 0 user messages number hunks from 1
169
170    # hunkinfo holds parsed values, hunkactual - calculated
171    hunkinfo = HunkInfo()
172    hunkactual = dict(linessrc=None, linestgt=None)
173
174
175    fe = enumerate(stream)
176    for lineno, line in fe:
177
178      # read out header
179      if headscan:
180        header = ''
181        try:
182          while not line.startswith("--- "):
183            header += line
184            lineno, line = next(fe)
185        except StopIteration:
186            # this is actually a loop exit
187            continue
188        self.header.append(header)
189
190        headscan = False
191        # switch to filenames state
192        filenames = True
193
194      # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
195      if hunkbody:
196        # process line first
197        if re.match(r"^[- \+\\]", line):
198            # gather stats about line endings
199            if line.endswith("\r\n"):
200              self.hunkends[nextfileno-1]["crlf"] += 1
201            elif line.endswith("\n"):
202              self.hunkends[nextfileno-1]["lf"] += 1
203            elif line.endswith("\r"):
204              self.hunkends[nextfileno-1]["cr"] += 1
205              
206            if line.startswith("-"):
207              hunkactual["linessrc"] += 1
208            elif line.startswith("+"):
209              hunkactual["linestgt"] += 1
210            elif not line.startswith("\\"):
211              hunkactual["linessrc"] += 1
212              hunkactual["linestgt"] += 1
213            hunkinfo.text.append(line)
214            # todo: handle \ No newline cases
215        else:
216            warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
217            # add hunk status node
218            self.hunks[nextfileno-1].append(hunkinfo.copy())
219            self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
220            # switch to hunkskip state
221            hunkbody = False
222            hunkskip = True
223
224        # check exit conditions
225        if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt:
226            warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
227            # add hunk status node
228            self.hunks[nextfileno-1].append(hunkinfo.copy())
229            self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
230            # switch to hunkskip state
231            hunkbody = False
232            hunkskip = True
233        elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]:
234            self.hunks[nextfileno-1].append(hunkinfo.copy())
235            # switch to hunkskip state
236            hunkbody = False
237            hunkskip = True
238
239            # detect mixed window/unix line ends
240            ends = self.hunkends[nextfileno-1]
241            if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
242              warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1])
243            if debugmode:
244              debuglines = dict(ends)
245              debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno)
246              debug("crlf: %(crlf)d  lf: %(lf)d  cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
247
248      if hunkskip:
249        match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
250        if match:
251          # switch to hunkhead state
252          hunkskip = False
253          hunkhead = True
254        elif line.startswith("--- "):
255          # switch to filenames state
256          hunkskip = False
257          filenames = True
258          if debugmode and len(self.source) > 0:
259            debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
260
261      if filenames:
262        if line.startswith("--- "):
263          if nextfileno in self.source:
264            warning("skipping invalid patch for %s" % self.source[nextfileno])
265            del self.source[nextfileno]
266            # double source filename line is encountered
267            # attempt to restart from this second line
268          re_filename = "^--- ([^\t]+)"
269          match = re.match(re_filename, line)
270          # todo: support spaces in filenames
271          if match:
272            self.source.append(match.group(1).strip())
273          else:
274            warning("skipping invalid filename at line %d" % lineno)
275            # switch back to headscan state
276            filenames = False
277            headscan = True
278        elif not line.startswith("+++ "):
279          if nextfileno in self.source:
280            warning("skipping invalid patch with no target for %s" % self.source[nextfileno])
281            del self.source[nextfileno]
282          else:
283            # this should be unreachable
284            warning("skipping invalid target patch")
285          filenames = False
286          headscan = True
287        else:
288          if nextfileno in self.target:
289            warning("skipping invalid patch - double target at line %d" % lineno)
290            del self.source[nextfileno]
291            del self.target[nextfileno]
292            nextfileno -= 1
293            # double target filename line is encountered
294            # switch back to headscan state
295            filenames = False
296            headscan = True
297          else:
298            re_filename = "^\+\+\+ ([^\t]+)"
299            match = re.match(re_filename, line)
300            if not match:
301              warning("skipping invalid patch - no target filename at line %d" % lineno)
302              # switch back to headscan state
303              filenames = False
304              headscan = True
305            else:
306              self.target.append(match.group(1).strip())
307              nextfileno += 1
308              # switch to hunkhead state
309              filenames = False
310              hunkhead = True
311              nexthunkno = 0
312              self.hunks.append([])
313              self.hunkends.append(lineends.copy())
314              continue
315
316      if hunkhead:
317        match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
318        if not match:
319          if nextfileno-1 not in self.hunks:
320            warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1])
321            # switch to headscan state
322            hunkhead = False
323            headscan = True
324            continue
325          else:
326            # switch to headscan state
327            hunkhead = False
328            headscan = True
329        else:
330          hunkinfo.startsrc = int(match.group(1))
331          hunkinfo.linessrc = 1
332          if match.group(3): hunkinfo.linessrc = int(match.group(3))
333          hunkinfo.starttgt = int(match.group(4))
334          hunkinfo.linestgt = 1
335          if match.group(6): hunkinfo.linestgt = int(match.group(6))
336          hunkinfo.invalid = False
337          hunkinfo.text = []
338
339          hunkactual["linessrc"] = hunkactual["linestgt"] = 0
340
341          # switch to hunkbody state
342          hunkhead = False
343          hunkbody = True
344          nexthunkno += 1
345          continue
346
347    if not hunkskip:
348      warning("patch file incomplete - %s" % filename)
349      # sys.exit(?)
350    else:
351      # duplicated message when an eof is reached
352      if debugmode and len(self.source) > 0:
353          debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
354
355    info("total files: %d  total hunks: %d" % (len(self.source), sum([len(hset) for hset in self.hunks])))
356
357
358  def apply(self):
359    """ apply parsed patch """
360
361    total = len(self.source)
362    for fileno, filename in enumerate(self.source):
363
364      f2patch = filename
365      if not exists(f2patch):
366        f2patch = self.target[fileno]
367        if not exists(f2patch):
368          warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch))
369          continue
370      if not isfile(f2patch):
371        warning("not a file - %s" % f2patch)
372        continue
373      filename = f2patch
374
375      info("processing %d/%d:\t %s" % (fileno+1, total, filename))
376
377      # validate before patching
378      f2fp = open(filename)
379      hunkno = 0
380      hunk = self.hunks[fileno][hunkno]
381      hunkfind = []
382      hunkreplace = []
383      validhunks = 0
384      canpatch = False
385      for lineno, line in enumerate(f2fp):
386        if lineno+1 < hunk.startsrc:
387          continue
388        elif lineno+1 == hunk.startsrc:
389          hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
390          hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
391          #pprint(hunkreplace)
392          hunklineno = 0
393
394          # todo \ No newline at end of file
395
396        # check hunks in source file
397        if lineno+1 < hunk.startsrc+len(hunkfind)-1:
398          if line.rstrip("\r\n") == hunkfind[hunklineno]:
399            hunklineno+=1
400          else:
401            debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename))
402            # file may be already patched, but we will check other hunks anyway
403            hunkno += 1
404            if hunkno < len(self.hunks[fileno]):
405              hunk = self.hunks[fileno][hunkno]
406              continue
407            else:
408              break
409
410        # check if processed line is the last line
411        if lineno+1 == hunk.startsrc+len(hunkfind)-1:
412          debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1))
413          hunkno+=1
414          validhunks+=1
415          if hunkno < len(self.hunks[fileno]):
416            hunk = self.hunks[fileno][hunkno]
417          else:
418            if validhunks == len(self.hunks[fileno]):
419              # patch file
420              canpatch = True
421              break
422      else:
423        if hunkno < len(self.hunks[fileno]):
424          warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
425
426      f2fp.close()
427
428      if validhunks < len(self.hunks[fileno]):
429        if self._match_file_hunks(filename, self.hunks[fileno]):
430          warning("already patched  %s" % filename)
431        else:
432          warning("source file is different - %s" % filename)
433      if canpatch:
434        backupname = filename+".orig"
435        if exists(backupname):
436          warning("can't backup original file to %s - aborting" % backupname)
437        else:
438          import shutil
439          shutil.move(filename, backupname)
440          if self.write_hunks(backupname, filename, self.hunks[fileno]):
441            info("successfully patched %s" % filename)
442            unlink(backupname)
443          else:
444            warning("error patching file %s" % filename)
445            shutil.copy(filename, filename+".invalid")
446            warning("invalid version is saved to %s" % filename+".invalid")
447            # todo: proper rejects
448            shutil.move(backupname, filename)
449
450    # todo: check for premature eof
451
452
453  def can_patch(self, filename):
454    """ Check if specified filename can be patched. Returns None if file can
455    not be found among source filenames. False if patch can not be applied
456    clearly. True otherwise.
457
458    :returns: True, False or None
459    """
460    idx = self._get_file_idx(filename, source=True)
461    if idx == None:
462      return None
463    return self._match_file_hunks(filename, self.hunks[idx])
464    
465
466  def _match_file_hunks(self, filepath, hunks):
467    matched = True
468    fp = open(abspath(filepath))
469
470    class NoMatch(Exception):
471      pass
472
473    lineno = 1
474    line = fp.readline()
475    hno = None
476    try:
477      for hno, h in enumerate(hunks):
478        # skip to first line of the hunk
479        while lineno < h.starttgt:
480          if not len(line): # eof
481            debug("check failed - premature eof before hunk: %d" % (hno+1))
482            raise NoMatch
483          line = fp.readline()
484          lineno += 1
485        for hline in h.text:
486          if hline.startswith("-"):
487            continue
488          if not len(line):
489            debug("check failed - premature eof on hunk: %d" % (hno+1))
490            # todo: \ No newline at the end of file
491            raise NoMatch
492          if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
493            debug("file is not patched - failed hunk: %d" % (hno+1))
494            raise NoMatch
495          line = fp.readline()
496          lineno += 1
497
498    except NoMatch:
499      matched = False
500      # todo: display failed hunk, i.e. expected/found
501
502    fp.close()
503    return matched
504
505
506  def patch_stream(self, instream, hunks):
507    """ Generator that yields stream patched with hunks iterable
508    
509        Converts lineends in hunk lines to the best suitable format
510        autodetected from input
511    """
512
513    # todo: At the moment substituted lineends may not be the same
514    #       at the start and at the end of patching. Also issue a
515    #       warning/throw about mixed lineends (is it really needed?)
516
517    hunks = iter(hunks)
518
519    srclineno = 1
520
521    lineends = {'\n':0, '\r\n':0, '\r':0}
522    def get_line():
523      """
524      local utility function - return line from source stream
525      collecting line end statistics on the way
526      """
527      line = instream.readline()
528        # 'U' mode works only with text files
529      if line.endswith("\r\n"):
530        lineends["\r\n"] += 1
531      elif line.endswith("\n"):
532        lineends["\n"] += 1
533      elif line.endswith("\r"):
534        lineends["\r"] += 1
535      return line
536
537    for hno, h in enumerate(hunks):
538      debug("hunk %d" % (hno+1))
539      # skip to line just before hunk starts
540      while srclineno < h.startsrc:
541        yield get_line()
542        srclineno += 1
543
544      for hline in h.text:
545        # todo: check \ No newline at the end of file
546        if hline.startswith("-") or hline.startswith("\\"):
547          get_line()
548          srclineno += 1
549          continue
550        else:
551          if not hline.startswith("+"):
552            get_line()
553            srclineno += 1
554          line2write = hline[1:]
555          # detect if line ends are consistent in source file
556          if sum([bool(lineends[x]) for x in lineends]) == 1:
557            newline = [x for x in lineends if lineends[x] != 0][0]
558            yield line2write.rstrip("\r\n")+newline
559          else: # newlines are mixed
560            yield line2write
561     
562    for line in instream:
563      yield line
564
565
566  def write_hunks(self, srcname, tgtname, hunks):
567    src = open(srcname, "r")
568    tgt = open(tgtname, "w")
569
570    debug("processing target file %s" % tgtname)
571
572    tgt.writelines(self.patch_stream(src, hunks))
573
574    tgt.close()
575    src.close()
576    return True
577  
578
579  def _get_file_idx(self, filename, source=None):
580    """ Detect index of given filename within patch.
581
582        :param filename:
583        :param source: search filename among sources (True),
584                       targets (False), or both (None)
585        :returns: int or None
586    """
587    filename = abspath(filename)
588    if source == True or source == None:
589      for i,fnm in enumerate(self.source):
590        if filename == abspath(fnm):
591          return i  
592    if source == False or source == None:
593      for i,fnm in enumerate(self.target):
594        if filename == abspath(fnm):
595          return i  
596
597
598
599
600if __name__ == "__main__":
601  from optparse import OptionParser
602  from os.path import exists
603  import sys
604
605  opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__)
606  opt.add_option("-d", "--debug", action="store_true", dest="debugmode", help="Print debugging messages")
607  opt.add_option("-q", "--quiet", action="store_true", dest="quiet", help="Only print messages on warning/error")
608  (options, args) = opt.parse_args()
609
610  if not args:
611    opt.print_version()
612    opt.print_help()
613    sys.exit()
614  debugmode = options.debugmode
615  patchfile = args[0]
616  if not exists(patchfile) or not isfile(patchfile):
617    sys.exit("patch file does not exist - %s" % patchfile)
618
619
620  if debugmode:
621    loglevel = logging.DEBUG
622    logformat = "%(levelname)8s %(message)s"
623  elif options.quiet:
624    loglevel = logging.WARN
625    logformat = "%(message)s"
626  else:
627    loglevel = logging.INFO
628    logformat = "%(message)s"
629  logger.setLevel(loglevel)
630  loghandler.setFormatter(logging.Formatter(logformat))
631
632
633
634  patch = fromfile(patchfile)
635  #pprint(patch)
636  patch.apply()
637
638  # todo: document and test line ends handling logic - patch.py detects proper line-endings
639  #       for inserted hunks and issues a warning if patched file has incosistent line ends