PageRenderTime 54ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/patch.py

http://python-patch.googlecode.com/
Python | 961 lines | 875 code | 33 blank | 53 comment | 17 complexity | 23535d0612daf2180d06d195a5da147c MD5 | raw file
Possible License(s): MIT
  1. #!/usr/bin/env python
  2. """ Patch utility to apply unified diffs
  3. Brute-force line-by-line non-recursive parsing
  4. Copyright (c) 2008-2011 anatoly techtonik
  5. Available under the terms of MIT license
  6. Project home: http://code.google.com/p/python-patch/
  7. $Id: patch.py 158 2012-02-13 13:52:11Z techtonik $
  8. $HeadURL: http://python-patch.googlecode.com/svn/trunk/patch.py $
  9. """
  10. __author__ = "techtonik.rainforce.org"
  11. __version__ = "1.11.11-dev"
  12. import copy
  13. import logging
  14. import re
  15. # cStringIO doesn't support unicode in 2.5
  16. from StringIO import StringIO
  17. import urllib2
  18. from os.path import exists, isabs, isfile, abspath, normpath
  19. import os
  20. import shutil
  21. #------------------------------------------------
  22. # Logging is controlled by logger named after the
  23. # module name (e.g. 'patch' for patch.py module)
  24. debugmode = False
  25. logger = logging.getLogger(__name__)
  26. debug = logger.debug
  27. info = logger.info
  28. warning = logger.warning
  29. #------------------------------------------------
  30. # constants for Patch/PatchSet types
  31. DIFF = PLAIN = "plain"
  32. GIT = "git"
  33. HG = MERCURIAL = "mercurial"
  34. SVN = SUBVERSION = "svn"
  35. # mixed type is only actual when PatchSet contains
  36. # Patches of different type
  37. MIXED = MIXED = "mixed"
  38. def fromfile(filename):
  39. """ Parse patch file and return PatchSet() object
  40. XXX error reporting
  41. """
  42. debug("reading %s" % filename)
  43. fp = open(filename, "rb")
  44. patchset = PatchSet(fp)
  45. fp.close()
  46. return patchset
  47. def fromstring(s):
  48. """ Parse text string and return PatchSet() object
  49. """
  50. return PatchSet( StringIO(s) )
  51. def fromurl(url):
  52. """ Read patch from URL
  53. """
  54. return PatchSet( urllib2.urlopen(url) )
  55. # --- Utility functions ---
  56. def pathstrip(path, n):
  57. """ Strip n leading components from the given path """
  58. pathlist = [path]
  59. while os.path.dirname(pathlist[0]) != '':
  60. pathlist[0:1] = os.path.split(pathlist[0])
  61. return os.path.join(*pathlist[n:])
  62. # --- /Utility function ---
  63. class Hunk(object):
  64. """ Parsed hunk data container (hunk starts with @@ -R +R @@) """
  65. def __init__(self):
  66. self.startsrc=None #: line count starts with 1
  67. self.linessrc=None
  68. self.starttgt=None
  69. self.linestgt=None
  70. self.invalid=False
  71. self.text=[]
  72. # def apply(self, estream):
  73. # """ write hunk data into enumerable stream
  74. # return strings one by one until hunk is
  75. # over
  76. #
  77. # enumerable stream are tuples (lineno, line)
  78. # where lineno starts with 0
  79. # """
  80. # pass
  81. class Patch(object):
  82. """ Patch for a single file """
  83. def __init__(self):
  84. self.source = None
  85. self.target = None
  86. self.hunks = []
  87. self.hunkends = []
  88. self.header = []
  89. self.type = None
  90. class PatchSet(object):
  91. def __init__(self, stream=None):
  92. self.name = None # descriptive name of the PatchSet
  93. # list of Patch objects
  94. self.items = []
  95. #: patch set type - one of constants
  96. self.type = None
  97. if stream:
  98. self.parse(stream)
  99. def __len__(self):
  100. return len(self.items)
  101. def parse(self, stream):
  102. """ parse unified diff
  103. return True on success
  104. """
  105. lineends = dict(lf=0, crlf=0, cr=0)
  106. nexthunkno = 0 #: even if index starts with 0 user messages number hunks from 1
  107. p = None
  108. hunk = None
  109. # hunkactual variable is used to calculate hunk lines for comparison
  110. hunkactual = dict(linessrc=None, linestgt=None)
  111. class wrapumerate(enumerate):
  112. """Enumerate wrapper that uses boolean end of stream status instead of
  113. StopIteration exception, and properties to access line information.
  114. """
  115. def __init__(self, *args, **kwargs):
  116. # we don't call parent, it is magically created by __new__ method
  117. self._exhausted = False
  118. self._lineno = False # after end of stream equal to the num of lines
  119. self._line = False # will be reset to False after end of stream
  120. def next(self):
  121. """Try to read the next line and return True if it is available,
  122. False if end of stream is reached."""
  123. if self._exhausted:
  124. return False
  125. try:
  126. self._lineno, self._line = super(wrapumerate, self).next()
  127. except StopIteration:
  128. self._exhausted = True
  129. self._line = False
  130. return False
  131. return True
  132. @property
  133. def is_empty(self):
  134. return self._exhausted
  135. @property
  136. def line(self):
  137. return self._line
  138. @property
  139. def lineno(self):
  140. return self._lineno
  141. # define states (possible file regions) that direct parse flow
  142. headscan = True # start with scanning header
  143. filenames = False # lines starting with --- and +++
  144. hunkhead = False # @@ -R +R @@ sequence
  145. hunkbody = False #
  146. hunkskip = False # skipping invalid hunk mode
  147. hunkparsed = False # state after successfully parsed hunk
  148. # regexp to match start of hunk, used groups - 1,3,4,6
  149. re_hunk_start = re.compile("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?")
  150. errors = 0
  151. # temp buffers for header and filenames info
  152. header = []
  153. srcname = None
  154. tgtname = None
  155. # start of main cycle
  156. # each parsing block already has line available in fe.line
  157. fe = wrapumerate(stream)
  158. while fe.next():
  159. # -- deciders: these only switch state to decide who should process
  160. # -- line fetched at the start of this cycle
  161. if hunkparsed:
  162. hunkparsed = False
  163. if re_hunk_start.match(fe.line):
  164. hunkhead = True
  165. elif fe.line.startswith("--- "):
  166. filenames = True
  167. else:
  168. headscan = True
  169. # -- ------------------------------------
  170. # read out header
  171. if headscan:
  172. while not fe.is_empty and not fe.line.startswith("--- "):
  173. header.append(fe.line)
  174. fe.next()
  175. if fe.is_empty:
  176. if p == None:
  177. errors += 1
  178. warning("warning: no patch data is found")
  179. else:
  180. info("%d unparsed bytes left at the end of stream" % len(''.join(header)))
  181. # TODO check for \No new line at the end..
  182. # TODO test for unparsed bytes
  183. # otherwise error += 1
  184. # this is actually a loop exit
  185. continue
  186. headscan = False
  187. # switch to filenames state
  188. filenames = True
  189. line = fe.line
  190. lineno = fe.lineno
  191. # hunkskip and hunkbody code skipped until definition of hunkhead is parsed
  192. if hunkbody:
  193. # process line first
  194. if re.match(r"^[- \+\\]", line):
  195. # gather stats about line endings
  196. if line.endswith("\r\n"):
  197. p.hunkends["crlf"] += 1
  198. elif line.endswith("\n"):
  199. p.hunkends["lf"] += 1
  200. elif line.endswith("\r"):
  201. p.hunkends["cr"] += 1
  202. if line.startswith("-"):
  203. hunkactual["linessrc"] += 1
  204. elif line.startswith("+"):
  205. hunkactual["linestgt"] += 1
  206. elif not line.startswith("\\"):
  207. hunkactual["linessrc"] += 1
  208. hunkactual["linestgt"] += 1
  209. hunk.text.append(line)
  210. # todo: handle \ No newline cases
  211. else:
  212. warning("invalid hunk no.%d at %d for target file %s" % (nexthunkno, lineno+1, p.target))
  213. # add hunk status node
  214. hunk.invalid = True
  215. p.hunks.append(hunk)
  216. errors += 1
  217. # switch to hunkskip state
  218. hunkbody = False
  219. hunkskip = True
  220. # check exit conditions
  221. if hunkactual["linessrc"] > hunk.linessrc or hunkactual["linestgt"] > hunk.linestgt:
  222. warning("extra lines for hunk no.%d at %d for target %s" % (nexthunkno, lineno+1, p.target))
  223. # add hunk status node
  224. hunk.invalid = True
  225. p.hunks.append(hunk)
  226. errors += 1
  227. # switch to hunkskip state
  228. hunkbody = False
  229. hunkskip = True
  230. elif hunk.linessrc == hunkactual["linessrc"] and hunk.linestgt == hunkactual["linestgt"]:
  231. # hunk parsed successfully
  232. p.hunks.append(hunk)
  233. # switch to hunkparsed state
  234. hunkbody = False
  235. hunkparsed = True
  236. # detect mixed window/unix line ends
  237. ends = p.hunkends
  238. if ((ends["cr"]!=0) + (ends["crlf"]!=0) + (ends["lf"]!=0)) > 1:
  239. warning("inconsistent line ends in patch hunks for %s" % p.source)
  240. if debugmode:
  241. debuglines = dict(ends)
  242. debuglines.update(file=p.target, hunk=nexthunkno)
  243. debug("crlf: %(crlf)d lf: %(lf)d cr: %(cr)d\t - file: %(file)s hunk: %(hunk)d" % debuglines)
  244. # fetch next line
  245. continue
  246. if hunkskip:
  247. if re_hunk_start.match(line):
  248. # switch to hunkhead state
  249. hunkskip = False
  250. hunkhead = True
  251. elif line.startswith("--- "):
  252. # switch to filenames state
  253. hunkskip = False
  254. filenames = True
  255. if debugmode and len(self.items) > 0:
  256. debug("- %2d hunks for %s" % (len(p.hunks), p.source))
  257. if filenames:
  258. if line.startswith("--- "):
  259. if srcname != None:
  260. # XXX testcase
  261. warning("skipping false patch for %s" % srcname)
  262. srcname = None
  263. # XXX header += srcname
  264. # double source filename line is encountered
  265. # attempt to restart from this second line
  266. re_filename = "^--- ([^\t]+)"
  267. match = re.match(re_filename, line)
  268. # todo: support spaces in filenames
  269. if match:
  270. srcname = match.group(1).strip()
  271. else:
  272. warning("skipping invalid filename at line %d" % lineno)
  273. errors += 1
  274. # XXX p.header += line
  275. # switch back to headscan state
  276. filenames = False
  277. headscan = True
  278. elif not line.startswith("+++ "):
  279. if srcname != None:
  280. warning("skipping invalid patch with no target for %s" % srcname)
  281. errors += 1
  282. srcname = None
  283. # XXX header += srcname
  284. # XXX header += line
  285. else:
  286. # this should be unreachable
  287. warning("skipping invalid target patch")
  288. filenames = False
  289. headscan = True
  290. else:
  291. if tgtname != None:
  292. # XXX seems to be a dead branch
  293. warning("skipping invalid patch - double target at line %d" % lineno)
  294. errors += 1
  295. srcname = None
  296. tgtname = None
  297. # XXX header += srcname
  298. # XXX header += tgtname
  299. # XXX header += line
  300. # double target filename line is encountered
  301. # switch back to headscan state
  302. filenames = False
  303. headscan = True
  304. else:
  305. re_filename = "^\+\+\+ ([^\t]+)"
  306. match = re.match(re_filename, line)
  307. if not match:
  308. warning("skipping invalid patch - no target filename at line %d" % lineno)
  309. errors += 1
  310. srcname = None
  311. # switch back to headscan state
  312. filenames = False
  313. headscan = True
  314. else:
  315. if p: # for the first run p is None
  316. self.items.append(p)
  317. p = Patch()
  318. p.source = srcname
  319. srcname = None
  320. p.target = match.group(1).strip()
  321. p.header = header
  322. header = []
  323. # switch to hunkhead state
  324. filenames = False
  325. hunkhead = True
  326. nexthunkno = 0
  327. p.hunkends = lineends.copy()
  328. continue
  329. if hunkhead:
  330. match = re.match("^@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?", line)
  331. if not match:
  332. if not p.hunks:
  333. warning("skipping invalid patch with no hunks for file %s" % p.source)
  334. errors += 1
  335. # XXX review switch
  336. # switch to headscan state
  337. hunkhead = False
  338. headscan = True
  339. continue
  340. else:
  341. # TODO review condition case
  342. # switch to headscan state
  343. hunkhead = False
  344. headscan = True
  345. else:
  346. hunk = Hunk()
  347. hunk.startsrc = int(match.group(1))
  348. hunk.linessrc = 1
  349. if match.group(3): hunk.linessrc = int(match.group(3))
  350. hunk.starttgt = int(match.group(4))
  351. hunk.linestgt = 1
  352. if match.group(6): hunk.linestgt = int(match.group(6))
  353. hunk.invalid = False
  354. hunk.text = []
  355. hunkactual["linessrc"] = hunkactual["linestgt"] = 0
  356. # switch to hunkbody state
  357. hunkhead = False
  358. hunkbody = True
  359. nexthunkno += 1
  360. continue
  361. self.items.append(p)
  362. if not hunkparsed:
  363. if hunkskip:
  364. warning("warning: finished with warnings, some hunks may be invalid")
  365. elif headscan:
  366. if len(self.items) == 0:
  367. warning("error: no patch data found!")
  368. # ? sys.exit(-1)
  369. else: # extra data at the end of file
  370. pass
  371. else:
  372. warning("error: patch stream is incomplete!")
  373. errors += 1
  374. if debugmode and len(self.items) > 0:
  375. debug("- %2d hunks for %s" % (len(p.hunks), p.source))
  376. # XXX fix total hunks calculation
  377. debug("total files: %d total hunks: %d" % (len(self.items),
  378. sum(len(p.hunks) for p in self.items)))
  379. # ---- detect patch and patchset types ----
  380. for idx, p in enumerate(self.items):
  381. self.items[idx].type = self._detect_type(p)
  382. types = set([p.type for p in self.items])
  383. if len(types) > 1:
  384. self.type = MIXED
  385. else:
  386. self.type = types.pop()
  387. # --------
  388. if not self._normalize_filenames():
  389. errors += 1
  390. return (errors == 0)
  391. def _detect_type(self, p):
  392. """ detect and return type for the specified Patch object
  393. analyzes header and filenames info
  394. NOTE: must be run before filenames are normalized
  395. """
  396. # check for SVN
  397. # - header starts with Index:
  398. # - next line is ===... delimiter
  399. # - filename is followed by revision number
  400. # TODO add SVN revision
  401. if (len(p.header) > 1 and p.header[-2].startswith("Index: ")
  402. and p.header[-1].startswith("="*67)):
  403. return SVN
  404. # GIT type check
  405. # - header[-2] is like "diff --git a/oldname b/newname"
  406. # - header[-1] is like "index <hash>..<hash> <mode>"
  407. # TODO add git rename diffs and add/remove diffs
  408. # add git diff with spaced filename
  409. # TODO http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
  410. # detect the start of diff header - there might be some comments before
  411. for idx in reversed(range(len(p.header))):
  412. if p.header[idx].startswith("diff --git"):
  413. break
  414. if len(p.header) > 1 and re.match(r'diff --git a/[\w/.]+ b/[\w/.]+', p.header[idx]):
  415. if re.match(r'index \w{7}..\w{7} \d{6}', p.header[idx+1]):
  416. if p.source.startswith('a/') and p.target.startswith('b/'):
  417. return GIT
  418. # HG check
  419. # - Patch header is like "diff -r b2d9961ff1f5 filename"
  420. # - filename starts with a/, b/ or is equal to /dev/null
  421. # TODO add MQ version
  422. if len(p.header) > 0 and re.match(r'diff -r \w{12} .*', p.header[-1]):
  423. if ((p.source.startswith('a/') or p.source == '/dev/null')
  424. and (p.target.startswith('b/') or p.target == '/dev/null')):
  425. return HG
  426. return PLAIN
  427. def _normalize_filenames(self):
  428. """ sanitize filenames, normalizing paths, i.e.:
  429. 1. strip a/ and b/ prefixes from GIT and HG style patches
  430. 2. remove all references to parent directories (with warning)
  431. 3. translate any absolute paths to relative (with warning)
  432. [ ] think about using forward slashes for crossplatform issues
  433. (diff/patch were born as a unix utility after all)
  434. [ ] need to find diff/patch with forward slashes
  435. return True on success
  436. """
  437. errors = 0
  438. for i,p in enumerate(self.items):
  439. if p.type in (HG, GIT):
  440. # TODO: figure out how to deal with /dev/null entries
  441. debug("stripping a/ and b/ prefixes")
  442. if p.source != '/dev/null':
  443. if not p.source.startswith("a/"):
  444. warning("invalid source filename")
  445. else:
  446. p.source = p.source[2:]
  447. if p.target != '/dev/null':
  448. if not p.target.startswith("b/"):
  449. warning("invalid target filename")
  450. else:
  451. p.target = p.target[2:]
  452. # [ ] xnormpath, check if forward slash paths can be exploited
  453. p.source = normpath(p.source)
  454. p.target = normpath(p.target)
  455. # references to parent are not allowed
  456. if p.source.startswith(".." + os.sep):
  457. warning("error: stripping parent path for source file patch no.%d" % (i+1))
  458. errors += 1
  459. while p.source.startswith(".." + os.sep):
  460. p.source = p.source.partition(os.sep)[2]
  461. if p.target.startswith(".." + os.sep):
  462. warning("error: stripping parent path for target file patch no.%d" % (i+1))
  463. errors += 1
  464. while p.target.startswith(".." + os.sep):
  465. p.target = p.target.partition(os.sep)[2]
  466. # absolute paths are not allowed
  467. def xisabs(filename):
  468. """return True if `filename` is absolute on Linux/Unix/OS X or Windows"""
  469. if filename.startswith('/'):
  470. return True # Linux/Unix
  471. elif re.match('\w+:', filename):
  472. return True # Windows
  473. def xstrip(filename):
  474. """strip Linux/Unix/OS X and Windows absolute file prefixes from filename"""
  475. warning("stripping absolute path component from '%s'" % filename)
  476. while re.match('\w+:', filename) or filename.startswith('/'):
  477. filename = re.sub('^\w+:', '', filename)
  478. filename = filename.lstrip('/')
  479. if xisabs(p.source) or xisabs(p.target):
  480. errors += 1
  481. warning("error: absolute paths are not allowed for file patch no.%d" % (i+1))
  482. if xisabs(p.source):
  483. p.source = xstrip(p.source)
  484. if xisabs(p.target):
  485. p.target = xstrip(p.target)
  486. self.items[i].source = p.source
  487. self.items[i].target = p.target
  488. return (errors == 0)
  489. def diffstat(self):
  490. """ calculate diffstat and return as a string
  491. Notes:
  492. - original diffstat ouputs target filename
  493. - single + or - shouldn't escape histogram
  494. """
  495. names = []
  496. insert = []
  497. delete = []
  498. namelen = 0
  499. maxdiff = 0 # max number of changes for single file
  500. # (for histogram width calculation)
  501. for patch in self.items:
  502. i,d = 0,0
  503. for hunk in patch.hunks:
  504. for line in hunk.text:
  505. if line.startswith('+'):
  506. i += 1
  507. elif line.startswith('-'):
  508. d += 1
  509. names.append(patch.target)
  510. insert.append(i)
  511. delete.append(d)
  512. namelen = max(namelen, len(patch.target))
  513. maxdiff = max(maxdiff, i+d)
  514. output = ''
  515. statlen = len(str(maxdiff)) # stats column width
  516. for i,n in enumerate(names):
  517. # %-19s | %-4d %s
  518. format = " %-" + str(namelen) + "s | %" + str(statlen) + "s %s\n"
  519. hist = ''
  520. # -- calculating histogram --
  521. width = len(format % ('', '', ''))
  522. histwidth = max(2, 80 - width)
  523. if maxdiff < histwidth:
  524. hist = "+"*insert[i] + "-"*delete[i]
  525. else:
  526. iratio = (float(insert[i]) / maxdiff) * histwidth
  527. dratio = (float(delete[i]) / maxdiff) * histwidth
  528. # make sure every entry gets at least one + or -
  529. iwidth = 1 if 0 < iratio < 1 else int(iratio)
  530. dwidth = 1 if 0 < dratio < 1 else int(dratio)
  531. #print iratio, dratio, iwidth, dwidth, histwidth
  532. hist = "+"*int(iwidth) + "-"*int(dwidth)
  533. # -- /calculating +- histogram --
  534. output += (format % (names[i], insert[i] + delete[i], hist))
  535. output += (" %d files changed, %d insertions(+), %d deletions(-)"
  536. % (len(names), sum(insert), sum(delete)))
  537. return output
  538. def apply(self, strip=0):
  539. """ apply parsed patch
  540. return True on success
  541. """
  542. total = len(self.items)
  543. errors = 0
  544. if strip:
  545. # [ ] test strip level exceeds nesting level
  546. # [ ] test the same only for selected files
  547. # [ ] test if files end up being on the same level
  548. try:
  549. strip = int(strip)
  550. except ValueError:
  551. errors += 1
  552. warning("error: strip parameter '%s' must be an integer" % strip)
  553. strip = 0
  554. #for fileno, filename in enumerate(self.source):
  555. for i,p in enumerate(self.items):
  556. f2patch = p.source
  557. if strip:
  558. debug("stripping %s leading component from '%s'" % (strip, f2patch))
  559. f2patch = pathstrip(f2patch, strip)
  560. if not exists(f2patch):
  561. f2patch = p.target
  562. if strip:
  563. debug("stripping %s leading component from '%s'" % (strip, f2patch))
  564. f2patch = pathstrip(f2patch, strip)
  565. if not exists(f2patch):
  566. warning("source/target file does not exist\n--- %s\n+++ %s" % (p.source, f2patch))
  567. errors += 1
  568. continue
  569. if not isfile(f2patch):
  570. warning("not a file - %s" % f2patch)
  571. errors += 1
  572. continue
  573. filename = f2patch
  574. debug("processing %d/%d:\t %s" % (i+1, total, filename))
  575. # validate before patching
  576. f2fp = open(filename)
  577. hunkno = 0
  578. hunk = p.hunks[hunkno]
  579. hunkfind = []
  580. hunkreplace = []
  581. validhunks = 0
  582. canpatch = False
  583. for lineno, line in enumerate(f2fp):
  584. if lineno+1 < hunk.startsrc:
  585. continue
  586. elif lineno+1 == hunk.startsrc:
  587. hunkfind = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " -"]
  588. hunkreplace = [x[1:].rstrip("\r\n") for x in hunk.text if x[0] in " +"]
  589. #pprint(hunkreplace)
  590. hunklineno = 0
  591. # todo \ No newline at end of file
  592. # check hunks in source file
  593. if lineno+1 < hunk.startsrc+len(hunkfind)-1:
  594. if line.rstrip("\r\n") == hunkfind[hunklineno]:
  595. hunklineno+=1
  596. else:
  597. info("file %d/%d:\t %s" % (i+1, total, filename))
  598. info(" hunk no.%d doesn't match source file at line %d" % (hunkno+1, lineno))
  599. info(" expected: %s" % hunkfind[hunklineno])
  600. info(" actual : %s" % line.rstrip("\r\n"))
  601. # not counting this as error, because file may already be patched.
  602. # check if file is already patched is done after the number of
  603. # invalid hunks if found
  604. # TODO: check hunks against source/target file in one pass
  605. # API - check(stream, srchunks, tgthunks)
  606. # return tuple (srcerrs, tgterrs)
  607. # continue to check other hunks for completeness
  608. hunkno += 1
  609. if hunkno < len(p.hunks):
  610. hunk = p.hunks[hunkno]
  611. continue
  612. else:
  613. break
  614. # check if processed line is the last line
  615. if lineno+1 == hunk.startsrc+len(hunkfind)-1:
  616. debug(" hunk no.%d for file %s -- is ready to be patched" % (hunkno+1, filename))
  617. hunkno+=1
  618. validhunks+=1
  619. if hunkno < len(p.hunks):
  620. hunk = p.hunks[hunkno]
  621. else:
  622. if validhunks == len(p.hunks):
  623. # patch file
  624. canpatch = True
  625. break
  626. else:
  627. if hunkno < len(p.hunks):
  628. warning("premature end of source file %s at hunk %d" % (filename, hunkno+1))
  629. errors += 1
  630. f2fp.close()
  631. if validhunks < len(p.hunks):
  632. if self._match_file_hunks(filename, p.hunks):
  633. warning("already patched %s" % filename)
  634. else:
  635. warning("source file is different - %s" % filename)
  636. errors += 1
  637. if canpatch:
  638. backupname = filename+".orig"
  639. if exists(backupname):
  640. warning("can't backup original file to %s - aborting" % backupname)
  641. else:
  642. import shutil
  643. shutil.move(filename, backupname)
  644. if self.write_hunks(backupname, filename, p.hunks):
  645. info("successfully patched %d/%d:\t %s" % (i+1, total, filename))
  646. os.unlink(backupname)
  647. else:
  648. errors += 1
  649. warning("error patching file %s" % filename)
  650. shutil.copy(filename, filename+".invalid")
  651. warning("invalid version is saved to %s" % filename+".invalid")
  652. # todo: proper rejects
  653. shutil.move(backupname, filename)
  654. # todo: check for premature eof
  655. return (errors == 0)
  656. def can_patch(self, filename):
  657. """ Check if specified filename can be patched. Returns None if file can
  658. not be found among source filenames. False if patch can not be applied
  659. clearly. True otherwise.
  660. :returns: True, False or None
  661. """
  662. filename = abspath(filename)
  663. for p in self.items:
  664. if filename == abspath(p.source):
  665. return self._match_file_hunks(filename, p.hunks)
  666. return None
  667. def _match_file_hunks(self, filepath, hunks):
  668. matched = True
  669. fp = open(abspath(filepath))
  670. class NoMatch(Exception):
  671. pass
  672. lineno = 1
  673. line = fp.readline()
  674. hno = None
  675. try:
  676. for hno, h in enumerate(hunks):
  677. # skip to first line of the hunk
  678. while lineno < h.starttgt:
  679. if not len(line): # eof
  680. debug("check failed - premature eof before hunk: %d" % (hno+1))
  681. raise NoMatch
  682. line = fp.readline()
  683. lineno += 1
  684. for hline in h.text:
  685. if hline.startswith("-"):
  686. continue
  687. if not len(line):
  688. debug("check failed - premature eof on hunk: %d" % (hno+1))
  689. # todo: \ No newline at the end of file
  690. raise NoMatch
  691. if line.rstrip("\r\n") != hline[1:].rstrip("\r\n"):
  692. debug("file is not patched - failed hunk: %d" % (hno+1))
  693. raise NoMatch
  694. line = fp.readline()
  695. lineno += 1
  696. except NoMatch:
  697. matched = False
  698. # todo: display failed hunk, i.e. expected/found
  699. fp.close()
  700. return matched
  701. def patch_stream(self, instream, hunks):
  702. """ Generator that yields stream patched with hunks iterable
  703. Converts lineends in hunk lines to the best suitable format
  704. autodetected from input
  705. """
  706. # todo: At the moment substituted lineends may not be the same
  707. # at the start and at the end of patching. Also issue a
  708. # warning/throw about mixed lineends (is it really needed?)
  709. hunks = iter(hunks)
  710. srclineno = 1
  711. lineends = {'\n':0, '\r\n':0, '\r':0}
  712. def get_line():
  713. """
  714. local utility function - return line from source stream
  715. collecting line end statistics on the way
  716. """
  717. line = instream.readline()
  718. # 'U' mode works only with text files
  719. if line.endswith("\r\n"):
  720. lineends["\r\n"] += 1
  721. elif line.endswith("\n"):
  722. lineends["\n"] += 1
  723. elif line.endswith("\r"):
  724. lineends["\r"] += 1
  725. return line
  726. for hno, h in enumerate(hunks):
  727. debug("hunk %d" % (hno+1))
  728. # skip to line just before hunk starts
  729. while srclineno < h.startsrc:
  730. yield get_line()
  731. srclineno += 1
  732. for hline in h.text:
  733. # todo: check \ No newline at the end of file
  734. if hline.startswith("-") or hline.startswith("\\"):
  735. get_line()
  736. srclineno += 1
  737. continue
  738. else:
  739. if not hline.startswith("+"):
  740. get_line()
  741. srclineno += 1
  742. line2write = hline[1:]
  743. # detect if line ends are consistent in source file
  744. if sum([bool(lineends[x]) for x in lineends]) == 1:
  745. newline = [x for x in lineends if lineends[x] != 0][0]
  746. yield line2write.rstrip("\r\n")+newline
  747. else: # newlines are mixed
  748. yield line2write
  749. for line in instream:
  750. yield line
  751. def write_hunks(self, srcname, tgtname, hunks):
  752. src = open(srcname, "rb")
  753. tgt = open(tgtname, "wb")
  754. debug("processing target file %s" % tgtname)
  755. tgt.writelines(self.patch_stream(src, hunks))
  756. tgt.close()
  757. src.close()
  758. # [ ] TODO: add test for permission copy
  759. shutil.copymode(srcname, tgtname)
  760. return True
  761. if __name__ == "__main__":
  762. from optparse import OptionParser
  763. from os.path import exists
  764. import sys
  765. opt = OptionParser(usage="1. %prog [options] unified.diff\n"
  766. " 2. %prog [options] http://host/patch\n"
  767. " 3. %prog [options] -- < unified.diff",
  768. version="python-patch %s" % __version__)
  769. opt.add_option("-q", "--quiet", action="store_const", dest="verbosity",
  770. const=0, help="print only warnings and errors", default=1)
  771. opt.add_option("-v", "--verbose", action="store_const", dest="verbosity",
  772. const=2, help="be verbose")
  773. opt.add_option("--debug", action="store_true", dest="debugmode", help="debug mode")
  774. opt.add_option("--diffstat", action="store_true", dest="diffstat",
  775. help="print diffstat and exit")
  776. opt.add_option("-p", "--strip", type="int", metavar='N', default=0,
  777. help="strip N path components from filenames")
  778. (options, args) = opt.parse_args()
  779. if not args and sys.argv[-1:] != ['--']:
  780. opt.print_version()
  781. opt.print_help()
  782. sys.exit()
  783. readstdin = (sys.argv[-1:] == ['--'] and not args)
  784. debugmode = options.debugmode
  785. verbosity_levels = {0:logging.WARNING, 1:logging.INFO, 2:logging.DEBUG}
  786. loglevel = verbosity_levels[options.verbosity]
  787. logformat = "%(message)s"
  788. if debugmode:
  789. loglevel = logging.DEBUG
  790. logformat = "%(levelname)8s %(message)s"
  791. logger.setLevel(loglevel)
  792. loghandler = logging.StreamHandler()
  793. loghandler.setFormatter(logging.Formatter(logformat))
  794. logger.addHandler(loghandler)
  795. if readstdin:
  796. patch = PatchSet(sys.stdin)
  797. else:
  798. patchfile = args[0]
  799. urltest = patchfile.split(':')[0]
  800. if (':' in patchfile and urltest.isalpha()
  801. and len(urltest) > 1): # one char before : is a windows drive letter
  802. patch = fromurl(patchfile)
  803. else:
  804. if not exists(patchfile) or not isfile(patchfile):
  805. sys.exit("patch file does not exist - %s" % patchfile)
  806. patch = fromfile(patchfile)
  807. if options.diffstat:
  808. print patch.diffstat()
  809. sys.exit(0)
  810. #pprint(patch)
  811. patch.apply(options.strip) or sys.exit(-1)
  812. # todo: document and test line ends handling logic - patch.py detects proper line-endings
  813. # for inserted hunks and issues a warning if patched file has incosistent line ends