/patch.py

https://bitbucket.org/jpellerin/nose/ · Python · 639 lines · 409 code · 104 blank · 126 comment · 130 complexity · 2bb2d7fee63254d6bfb4e0d9d91720d2 MD5 · raw file

  1. """ Patch utility to apply unified diffs
  2. Brute-force line-by-line non-recursive parsing
  3. Copyright (c) 2008-2010 anatoly techtonik
  4. Available under the terms of MIT license
  5. NOTE: This version has been patched by Alex Stewart <alex@foogod.com> for
  6. Python 3.x support and other misc fixups.
  7. Project home: http://code.google.com/p/python-patch/
  8. $Id: patch.py 92 2010-07-02 06:04:57Z techtonik $
  9. $HeadURL: http://python-patch.googlecode.com/svn/trunk/patch.py $
  10. """
  11. __author__ = "techtonik.rainforce.org"
  12. __version__ = "10.04-2.pAS1"
  13. import copy
  14. import logging
  15. import re
  16. from logging import debug, info, warning
  17. import sys
  18. try:
  19. # cStringIO doesn't support unicode in 2.5
  20. from StringIO import StringIO
  21. except ImportError:
  22. # StringIO has been renamed to 'io' in 3.x
  23. from io import StringIO
  24. from os.path import exists, isfile, abspath
  25. from os import unlink
  26. _open = open
  27. if sys.version_info >= (3,):
  28. # Open files with universal newline support but no newline translation (3.x)
  29. def open(filename, mode='r'):
  30. return _open(filename, mode, newline='')
  31. else:
  32. # Open files with universal newline support but no newline translation (2.x)
  33. def open(filename, mode='r'):
  34. return _open(filename, mode + 'b')
  35. # Python 3.x has changed iter.next() to be next(iter) instead, so for
  36. # backwards compatibility, we'll just define a next() function under 2.x
  37. def next(iter):
  38. return iter.next()
  39. #------------------------------------------------
  40. # Logging is controlled by "python_patch" logger
  41. debugmode = False
  42. logger = logging.getLogger("python_patch")
  43. loghandler = logging.StreamHandler()
  44. logger.addHandler(loghandler)
  45. debug = logger.debug
  46. info = logger.info
  47. warning = logger.warning
  48. # If called as a library, don't log info/debug messages by default.
  49. logger.setLevel(logging.WARN)
  50. #------------------------------------------------
  51. # constants for patch types
  52. DIFF = PLAIN = "plain"
  53. HG = MERCURIAL = "mercurial"
  54. SVN = SUBVERSION = "svn"
  55. def fromfile(filename):
  56. """ Parse patch file and return Patch() object
  57. """
  58. info("reading patch from file %s" % filename)
  59. fp = open(filename, "r")
  60. patch = Patch(fp)
  61. fp.close()
  62. return patch
  63. def fromstring(s):
  64. """ Parse text string and return Patch() object
  65. """
  66. return Patch( StringIO(s) )
  67. class HunkInfo(object):
  68. """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
  69. def __init__(self):
  70. self.startsrc=None #: line count starts with 1
  71. self.linessrc=None
  72. self.starttgt=None
  73. self.linestgt=None
  74. self.invalid=False
  75. self.text=[]
  76. def copy(self):
  77. return copy.copy(self)
  78. # def apply(self, estream):
  79. # """ write hunk data into enumerable stream
  80. # return strings one by one until hunk is
  81. # over
  82. #
  83. # enumerable stream are tuples (lineno, line)
  84. # where lineno starts with 0
  85. # """
  86. # pass
  87. class Patch(object):
  88. def __init__(self, stream=None):
  89. # define Patch data members
  90. # table with a row for every source file
  91. #: list of source filenames
  92. self.source=None
  93. self.target=None
  94. #: list of lists of hunks
  95. self.hunks=None
  96. #: file endings statistics for every hunk
  97. self.hunkends=None
  98. #: headers for each file
  99. self.header=None
  100. #: patch type - one of constants
  101. self.type = None
  102. if stream:
  103. self.parse(stream)
  104. def copy(self):
  105. return copy.copy(self)
  106. def parse(self, stream):
  107. """ parse unified diff """
  108. self.header = []
  109. self.source = []
  110. self.target = []
  111. self.hunks = []
  112. self.hunkends = []
  113. # define possible file regions that will direct the parser flow
  114. headscan = False # scanning header before the patch body
  115. filenames = False # lines starting with --- and +++
  116. hunkhead = False # @@ -R +R @@ sequence
  117. hunkbody = False #
  118. hunkskip = False # skipping invalid hunk mode
  119. headscan = True
  120. lineends = dict(lf=0, crlf=0, cr=0)
  121. nextfileno = 0
  122. nexthunkno = 0 #: even if index starts with 0 user messages number hunks from 1
  123. # hunkinfo holds parsed values, hunkactual - calculated
  124. hunkinfo = HunkInfo()
  125. hunkactual = dict(linessrc=None, linestgt=None)
  126. fe = enumerate(stream)
  127. for lineno, line in fe:
  128. # read out header
  129. if headscan:
  130. header = ''
  131. try:
  132. while not line.startswith("--- "):
  133. header += line
  134. lineno, line = next(fe)
  135. except StopIteration:
  136. # this is actually a loop exit
  137. continue
  138. self.header.append(header)
  139. headscan = False
  140. # switch to filenames state
  141. filenames = True
  142. # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
  143. if hunkbody:
  144. # process line first
  145. if re.match(r"^[- \+\\]", line):
  146. # gather stats about line endings
  147. if line.endswith("\r\n"):
  148. self.hunkends[nextfileno-1]["crlf"] += 1
  149. elif line.endswith("\n"):
  150. self.hunkends[nextfileno-1]["lf"] += 1
  151. elif line.endswith("\r"):
  152. self.hunkends[nextfileno-1]["cr"] += 1
  153. if line.startswith("-"):
  154. hunkactual["linessrc"] += 1
  155. elif line.startswith("+"):
  156. hunkactual["linestgt"] += 1
  157. elif not line.startswith("\\"):
  158. hunkactual["linessrc"] += 1
  159. hunkactual["linestgt"] += 1
  160. hunkinfo.text.append(line)
  161. # todo: handle \ No newline cases
  162. else:
  163. warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
  164. # add hunk status node
  165. self.hunks[nextfileno-1].append(hunkinfo.copy())
  166. self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
  167. # switch to hunkskip state
  168. hunkbody = False
  169. hunkskip = True
  170. # check exit conditions
  171. if hunkactual["linessrc"] > hunkinfo.linessrc or hunkactual["linestgt"] > hunkinfo.linestgt:
  172. warning("extra hunk no.%d lines at %d for target %s" % (nexthunkno, lineno+1, self.target[nextfileno-1]))
  173. # add hunk status node
  174. self.hunks[nextfileno-1].append(hunkinfo.copy())
  175. self.hunks[nextfileno-1][nexthunkno-1]["invalid"] = True
  176. # switch to hunkskip state
  177. hunkbody = False
  178. hunkskip = True
  179. elif hunkinfo.linessrc == hunkactual["linessrc"] and hunkinfo.linestgt == hunkactual["linestgt"]:
  180. self.hunks[nextfileno-1].append(hunkinfo.copy())
  181. # switch to hunkskip state
  182. hunkbody = False
  183. hunkskip = True
  184. # detect mixed window/unix line ends
  185. ends = self.hunkends[nextfileno-1]
  186. if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
  187. warning("inconsistent line ends in patch hunks for %s" % self.source[nextfileno-1])
  188. if debugmode:
  189. debuglines = dict(ends)
  190. debuglines.update(file=self.target[nextfileno-1], hunk=nexthunkno)
  191. debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
  192. if hunkskip:
  193. match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
  194. if match:
  195. # switch to hunkhead state
  196. hunkskip = False
  197. hunkhead = True
  198. elif line.startswith("--- "):
  199. # switch to filenames state
  200. hunkskip = False
  201. filenames = True
  202. if debugmode and len(self.source) > 0:
  203. debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
  204. if filenames:
  205. if line.startswith("--- "):
  206. if nextfileno in self.source:
  207. warning("skipping invalid patch for %s" % self.source[nextfileno])
  208. del self.source[nextfileno]
  209. # double source filename line is encountered
  210. # attempt to restart from this second line
  211. re_filename = "^--- ([^\t]+)"
  212. match = re.match(re_filename, line)
  213. # todo: support spaces in filenames
  214. if match:
  215. self.source.append(match.group(1).strip())
  216. else:
  217. warning("skipping invalid filename at line %d" % lineno)
  218. # switch back to headscan state
  219. filenames = False
  220. headscan = True
  221. elif not line.startswith("+++ "):
  222. if nextfileno in self.source:
  223. warning("skipping invalid patch with no target for %s" % self.source[nextfileno])
  224. del self.source[nextfileno]
  225. else:
  226. # this should be unreachable
  227. warning("skipping invalid target patch")
  228. filenames = False
  229. headscan = True
  230. else:
  231. if nextfileno in self.target:
  232. warning("skipping invalid patch - double target at line %d" % lineno)
  233. del self.source[nextfileno]
  234. del self.target[nextfileno]
  235. nextfileno -= 1
  236. # double target filename line is encountered
  237. # switch back to headscan state
  238. filenames = False
  239. headscan = True
  240. else:
  241. re_filename = "^\+\+\+ ([^\t]+)"
  242. match = re.match(re_filename, line)
  243. if not match:
  244. warning("skipping invalid patch - no target filename at line %d" % lineno)
  245. # switch back to headscan state
  246. filenames = False
  247. headscan = True
  248. else:
  249. self.target.append(match.group(1).strip())
  250. nextfileno += 1
  251. # switch to hunkhead state
  252. filenames = False
  253. hunkhead = True
  254. nexthunkno = 0
  255. self.hunks.append([])
  256. self.hunkends.append(lineends.copy())
  257. continue
  258. if hunkhead:
  259. match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
  260. if not match:
  261. if nextfileno-1 not in self.hunks:
  262. warning("skipping invalid patch with no hunks for file %s" % self.target[nextfileno-1])
  263. # switch to headscan state
  264. hunkhead = False
  265. headscan = True
  266. continue
  267. else:
  268. # switch to headscan state
  269. hunkhead = False
  270. headscan = True
  271. else:
  272. hunkinfo.startsrc = int(match.group(1))
  273. hunkinfo.linessrc = 1
  274. if match.group(3): hunkinfo.linessrc = int(match.group(3))
  275. hunkinfo.starttgt = int(match.group(4))
  276. hunkinfo.linestgt = 1
  277. if match.group(6): hunkinfo.linestgt = int(match.group(6))
  278. hunkinfo.invalid = False
  279. hunkinfo.text = []
  280. hunkactual["linessrc"] = hunkactual["linestgt"] = 0
  281. # switch to hunkbody state
  282. hunkhead = False
  283. hunkbody = True
  284. nexthunkno += 1
  285. continue
  286. if not hunkskip:
  287. warning("patch file incomplete - %s" % filename)
  288. # sys.exit(?)
  289. else:
  290. # duplicated message when an eof is reached
  291. if debugmode and len(self.source) > 0:
  292. debug("- %2d hunks for %s" % (len(self.hunks[nextfileno-1]), self.source[nextfileno-1]))
  293. info("total files: %d total hunks: %d" % (len(self.source), sum([len(hset) for hset in self.hunks])))
  294. def apply(self):
  295. """ apply parsed patch """
  296. total = len(self.source)
  297. for fileno, filename in enumerate(self.source):
  298. f2patch = filename
  299. if not exists(f2patch):
  300. f2patch = self.target[fileno]
  301. if not exists(f2patch):
  302. warning("source/target file does not exist\n--- %s\n+++ %s" % (filename, f2patch))
  303. continue
  304. if not isfile(f2patch):
  305. warning("not a file - %s" % f2patch)
  306. continue
  307. filename = f2patch
  308. info("processing %d/%d:\t %s" % (fileno+1, total, filename))
  309. # validate before patching
  310. f2fp = open(filename)
  311. hunkno = 0
  312. hunk = self.hunks[fileno][hunkno]
  313. hunkfind = []
  314. hunkreplace = []
  315. validhunks = 0
  316. canpatch = False
  317. for lineno, line in enumerate(f2fp):
  318. if lineno+1 < hunk.startsrc:
  319. continue
  320. elif lineno+1 == hunk.startsrc:
  321. hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
  322. hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
  323. #pprint(hunkreplace)
  324. hunklineno = 0
  325. # todo \ No newline at end of file
  326. # check hunks in source file
  327. if lineno+1 < hunk.startsrc+len(hunkfind)-1:
  328. if line.rstrip("\r\n") == hunkfind[hunklineno]:
  329. hunklineno+=1
  330. else:
  331. debug("hunk no.%d doesn't match source file %s" % (hunkno+1, filename))
  332. # file may be already patched, but we will check other hunks anyway
  333. hunkno += 1
  334. if hunkno < len(self.hunks[fileno]):
  335. hunk = self.hunks[fileno][hunkno]
  336. continue
  337. else:
  338. break
  339. # check if processed line is the last line
  340. if lineno+1 == hunk.startsrc+len(hunkfind)-1:
  341. debug("file %s hunk no.%d -- is ready to be patched" % (filename, hunkno+1))
  342. hunkno+=1
  343. validhunks+=1
  344. if hunkno < len(self.hunks[fileno]):
  345. hunk = self.hunks[fileno][hunkno]
  346. else:
  347. if validhunks == len(self.hunks[fileno]):
  348. # patch file
  349. canpatch = True
  350. break
  351. else:
  352. if hunkno < len(self.hunks[fileno]):
  353. warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
  354. f2fp.close()
  355. if validhunks < len(self.hunks[fileno]):
  356. if self._match_file_hunks(filename, self.hunks[fileno]):
  357. warning("already patched %s" % filename)
  358. else:
  359. warning("source file is different - %s" % filename)
  360. if canpatch:
  361. backupname = filename+".orig"
  362. if exists(backupname):
  363. warning("can't backup original file to %s - aborting" % backupname)
  364. else:
  365. import shutil
  366. shutil.move(filename, backupname)
  367. if self.write_hunks(backupname, filename, self.hunks[fileno]):
  368. info("successfully patched %s" % filename)
  369. unlink(backupname)
  370. else:
  371. warning("error patching file %s" % filename)
  372. shutil.copy(filename, filename+".invalid")
  373. warning("invalid version is saved to %s" % filename+".invalid")
  374. # todo: proper rejects
  375. shutil.move(backupname, filename)
  376. # todo: check for premature eof
  377. def can_patch(self, filename):
  378. """ Check if specified filename can be patched. Returns None if file can
  379. not be found among source filenames. False if patch can not be applied
  380. clearly. True otherwise.
  381. :returns: True, False or None
  382. """
  383. idx = self._get_file_idx(filename, source=True)
  384. if idx == None:
  385. return None
  386. return self._match_file_hunks(filename, self.hunks[idx])
  387. def _match_file_hunks(self, filepath, hunks):
  388. matched = True
  389. fp = open(abspath(filepath))
  390. class NoMatch(Exception):
  391. pass
  392. lineno = 1
  393. line = fp.readline()
  394. hno = None
  395. try:
  396. for hno, h in enumerate(hunks):
  397. # skip to first line of the hunk
  398. while lineno < h.starttgt:
  399. if not len(line): # eof
  400. debug("check failed - premature eof before hunk: %d" % (hno+1))
  401. raise NoMatch
  402. line = fp.readline()
  403. lineno += 1
  404. for hline in h.text:
  405. if hline.startswith("-"):
  406. continue
  407. if not len(line):
  408. debug("check failed - premature eof on hunk: %d" % (hno+1))
  409. # todo: \ No newline at the end of file
  410. raise NoMatch
  411. if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
  412. debug("file is not patched - failed hunk: %d" % (hno+1))
  413. raise NoMatch
  414. line = fp.readline()
  415. lineno += 1
  416. except NoMatch:
  417. matched = False
  418. # todo: display failed hunk, i.e. expected/found
  419. fp.close()
  420. return matched
  421. def patch_stream(self, instream, hunks):
  422. """ Generator that yields stream patched with hunks iterable
  423. Converts lineends in hunk lines to the best suitable format
  424. autodetected from input
  425. """
  426. # todo: At the moment substituted lineends may not be the same
  427. # at the start and at the end of patching. Also issue a
  428. # warning/throw about mixed lineends (is it really needed?)
  429. hunks = iter(hunks)
  430. srclineno = 1
  431. lineends = {'\n':0, '\r\n':0, '\r':0}
  432. def get_line():
  433. """
  434. local utility function - return line from source stream
  435. collecting line end statistics on the way
  436. """
  437. line = instream.readline()
  438. # 'U' mode works only with text files
  439. if line.endswith("\r\n"):
  440. lineends["\r\n"] += 1
  441. elif line.endswith("\n"):
  442. lineends["\n"] += 1
  443. elif line.endswith("\r"):
  444. lineends["\r"] += 1
  445. return line
  446. for hno, h in enumerate(hunks):
  447. debug("hunk %d" % (hno+1))
  448. # skip to line just before hunk starts
  449. while srclineno < h.startsrc:
  450. yield get_line()
  451. srclineno += 1
  452. for hline in h.text:
  453. # todo: check \ No newline at the end of file
  454. if hline.startswith("-") or hline.startswith("\\"):
  455. get_line()
  456. srclineno += 1
  457. continue
  458. else:
  459. if not hline.startswith("+"):
  460. get_line()
  461. srclineno += 1
  462. line2write = hline[1:]
  463. # detect if line ends are consistent in source file
  464. if sum([bool(lineends[x]) for x in lineends]) == 1:
  465. newline = [x for x in lineends if lineends[x] != 0][0]
  466. yield line2write.rstrip("\r\n")+newline
  467. else: # newlines are mixed
  468. yield line2write
  469. for line in instream:
  470. yield line
  471. def write_hunks(self, srcname, tgtname, hunks):
  472. src = open(srcname, "r")
  473. tgt = open(tgtname, "w")
  474. debug("processing target file %s" % tgtname)
  475. tgt.writelines(self.patch_stream(src, hunks))
  476. tgt.close()
  477. src.close()
  478. return True
  479. def _get_file_idx(self, filename, source=None):
  480. """ Detect index of given filename within patch.
  481. :param filename:
  482. :param source: search filename among sources (True),
  483. targets (False), or both (None)
  484. :returns: int or None
  485. """
  486. filename = abspath(filename)
  487. if source == True or source == None:
  488. for i,fnm in enumerate(self.source):
  489. if filename == abspath(fnm):
  490. return i
  491. if source == False or source == None:
  492. for i,fnm in enumerate(self.target):
  493. if filename == abspath(fnm):
  494. return i
  495. if __name__ == "__main__":
  496. from optparse import OptionParser
  497. from os.path import exists
  498. import sys
  499. opt = OptionParser(usage="%prog [options] unipatch-file", version="python-patch %s" % __version__)
  500. opt.add_option("-d", "--debug", action="store_true", dest="debugmode", help="Print debugging messages")
  501. opt.add_option("-q", "--quiet", action="store_true", dest="quiet", help="Only print messages on warning/error")
  502. (options, args) = opt.parse_args()
  503. if not args:
  504. opt.print_version()
  505. opt.print_help()
  506. sys.exit()
  507. debugmode = options.debugmode
  508. patchfile = args[0]
  509. if not exists(patchfile) or not isfile(patchfile):
  510. sys.exit("patch file does not exist - %s" % patchfile)
  511. if debugmode:
  512. loglevel = logging.DEBUG
  513. logformat = "%(levelname)8s %(message)s"
  514. elif options.quiet:
  515. loglevel = logging.WARN
  516. logformat = "%(message)s"
  517. else:
  518. loglevel = logging.INFO
  519. logformat = "%(message)s"
  520. logger.setLevel(loglevel)
  521. loghandler.setFormatter(logging.Formatter(logformat))
  522. patch = fromfile(patchfile)
  523. #pprint(patch)
  524. patch.apply()
  525. # todo: document and test line ends handling logic - patch.py detects proper line-endings
  526. # for inserted hunks and issues a warning if patched file has incosistent line ends