PageRenderTime 41ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/hgext/convert/filemap.py

https://bitbucket.org/mirror/mercurial/
Python | 422 lines | 313 code | 37 blank | 72 comment | 71 complexity | 9b9322133739f7ea421b258f984f6f4f MD5 | raw file
Possible License(s): GPL-2.0
  1. # Copyright 2007 Bryan O'Sullivan <bos@serpentine.com>
  2. # Copyright 2007 Alexis S. L. Carvalho <alexis@cecm.usp.br>
  3. #
  4. # This software may be used and distributed according to the terms of the
  5. # GNU General Public License version 2 or any later version.
  6. import posixpath
  7. import shlex
  8. from mercurial.i18n import _
  9. from mercurial import util, error
  10. from common import SKIPREV, converter_source
  11. def rpairs(path):
  12. '''Yield tuples with path split at '/', starting with the full path.
  13. No leading, trailing or double '/', please.
  14. >>> for x in rpairs('foo/bar/baz'): print x
  15. ('foo/bar/baz', '')
  16. ('foo/bar', 'baz')
  17. ('foo', 'bar/baz')
  18. ('.', 'foo/bar/baz')
  19. '''
  20. i = len(path)
  21. while i != -1:
  22. yield path[:i], path[i + 1:]
  23. i = path.rfind('/', 0, i)
  24. yield '.', path
  25. def normalize(path):
  26. ''' We use posixpath.normpath to support cross-platform path format.
  27. However, it doesn't handle None input. So we wrap it up. '''
  28. if path is None:
  29. return None
  30. return posixpath.normpath(path)
  31. class filemapper(object):
  32. '''Map and filter filenames when importing.
  33. A name can be mapped to itself, a new name, or None (omit from new
  34. repository).'''
  35. def __init__(self, ui, path=None):
  36. self.ui = ui
  37. self.include = {}
  38. self.exclude = {}
  39. self.rename = {}
  40. if path:
  41. if self.parse(path):
  42. raise util.Abort(_('errors in filemap'))
  43. def parse(self, path):
  44. errs = 0
  45. def check(name, mapping, listname):
  46. if not name:
  47. self.ui.warn(_('%s:%d: path to %s is missing\n') %
  48. (lex.infile, lex.lineno, listname))
  49. return 1
  50. if name in mapping:
  51. self.ui.warn(_('%s:%d: %r already in %s list\n') %
  52. (lex.infile, lex.lineno, name, listname))
  53. return 1
  54. if (name.startswith('/') or
  55. name.endswith('/') or
  56. '//' in name):
  57. self.ui.warn(_('%s:%d: superfluous / in %s %r\n') %
  58. (lex.infile, lex.lineno, listname, name))
  59. return 1
  60. return 0
  61. lex = shlex.shlex(open(path), path, True)
  62. lex.wordchars += '!@#$%^&*()-=+[]{}|;:,./<>?'
  63. cmd = lex.get_token()
  64. while cmd:
  65. if cmd == 'include':
  66. name = normalize(lex.get_token())
  67. errs += check(name, self.exclude, 'exclude')
  68. self.include[name] = name
  69. elif cmd == 'exclude':
  70. name = normalize(lex.get_token())
  71. errs += check(name, self.include, 'include')
  72. errs += check(name, self.rename, 'rename')
  73. self.exclude[name] = name
  74. elif cmd == 'rename':
  75. src = normalize(lex.get_token())
  76. dest = normalize(lex.get_token())
  77. errs += check(src, self.exclude, 'exclude')
  78. self.rename[src] = dest
  79. elif cmd == 'source':
  80. errs += self.parse(normalize(lex.get_token()))
  81. else:
  82. self.ui.warn(_('%s:%d: unknown directive %r\n') %
  83. (lex.infile, lex.lineno, cmd))
  84. errs += 1
  85. cmd = lex.get_token()
  86. return errs
  87. def lookup(self, name, mapping):
  88. name = normalize(name)
  89. for pre, suf in rpairs(name):
  90. try:
  91. return mapping[pre], pre, suf
  92. except KeyError:
  93. pass
  94. return '', name, ''
  95. def __call__(self, name):
  96. if self.include:
  97. inc = self.lookup(name, self.include)[0]
  98. else:
  99. inc = name
  100. if self.exclude:
  101. exc = self.lookup(name, self.exclude)[0]
  102. else:
  103. exc = ''
  104. if (not self.include and exc) or (len(inc) <= len(exc)):
  105. return None
  106. newpre, pre, suf = self.lookup(name, self.rename)
  107. if newpre:
  108. if newpre == '.':
  109. return suf
  110. if suf:
  111. if newpre.endswith('/'):
  112. return newpre + suf
  113. return newpre + '/' + suf
  114. return newpre
  115. return name
  116. def active(self):
  117. return bool(self.include or self.exclude or self.rename)
  118. # This class does two additional things compared to a regular source:
  119. #
  120. # - Filter and rename files. This is mostly wrapped by the filemapper
  121. # class above. We hide the original filename in the revision that is
  122. # returned by getchanges to be able to find things later in getfile.
  123. #
  124. # - Return only revisions that matter for the files we're interested in.
  125. # This involves rewriting the parents of the original revision to
  126. # create a graph that is restricted to those revisions.
  127. #
  128. # This set of revisions includes not only revisions that directly
  129. # touch files we're interested in, but also merges that merge two
  130. # or more interesting revisions.
  131. class filemap_source(converter_source):
  132. def __init__(self, ui, baseconverter, filemap):
  133. super(filemap_source, self).__init__(ui)
  134. self.base = baseconverter
  135. self.filemapper = filemapper(ui, filemap)
  136. self.commits = {}
  137. # if a revision rev has parent p in the original revision graph, then
  138. # rev will have parent self.parentmap[p] in the restricted graph.
  139. self.parentmap = {}
  140. # self.wantedancestors[rev] is the set of all ancestors of rev that
  141. # are in the restricted graph.
  142. self.wantedancestors = {}
  143. self.convertedorder = None
  144. self._rebuilt = False
  145. self.origparents = {}
  146. self.children = {}
  147. self.seenchildren = {}
  148. def before(self):
  149. self.base.before()
  150. def after(self):
  151. self.base.after()
  152. def setrevmap(self, revmap):
  153. # rebuild our state to make things restartable
  154. #
  155. # To avoid calling getcommit for every revision that has already
  156. # been converted, we rebuild only the parentmap, delaying the
  157. # rebuild of wantedancestors until we need it (i.e. until a
  158. # merge).
  159. #
  160. # We assume the order argument lists the revisions in
  161. # topological order, so that we can infer which revisions were
  162. # wanted by previous runs.
  163. self._rebuilt = not revmap
  164. seen = {SKIPREV: SKIPREV}
  165. dummyset = set()
  166. converted = []
  167. for rev in revmap.order:
  168. mapped = revmap[rev]
  169. wanted = mapped not in seen
  170. if wanted:
  171. seen[mapped] = rev
  172. self.parentmap[rev] = rev
  173. else:
  174. self.parentmap[rev] = seen[mapped]
  175. self.wantedancestors[rev] = dummyset
  176. arg = seen[mapped]
  177. if arg == SKIPREV:
  178. arg = None
  179. converted.append((rev, wanted, arg))
  180. self.convertedorder = converted
  181. return self.base.setrevmap(revmap)
  182. def rebuild(self):
  183. if self._rebuilt:
  184. return True
  185. self._rebuilt = True
  186. self.parentmap.clear()
  187. self.wantedancestors.clear()
  188. self.seenchildren.clear()
  189. for rev, wanted, arg in self.convertedorder:
  190. if rev not in self.origparents:
  191. try:
  192. self.origparents[rev] = self.getcommit(rev).parents
  193. except error.RepoLookupError:
  194. self.ui.debug("unknown revmap source: %s\n" % rev)
  195. continue
  196. if arg is not None:
  197. self.children[arg] = self.children.get(arg, 0) + 1
  198. for rev, wanted, arg in self.convertedorder:
  199. try:
  200. parents = self.origparents[rev]
  201. except KeyError:
  202. continue # unknown revmap source
  203. if wanted:
  204. self.mark_wanted(rev, parents)
  205. else:
  206. self.mark_not_wanted(rev, arg)
  207. self._discard(arg, *parents)
  208. return True
  209. def getheads(self):
  210. return self.base.getheads()
  211. def getcommit(self, rev):
  212. # We want to save a reference to the commit objects to be able
  213. # to rewrite their parents later on.
  214. c = self.commits[rev] = self.base.getcommit(rev)
  215. for p in c.parents:
  216. self.children[p] = self.children.get(p, 0) + 1
  217. return c
  218. def _cachedcommit(self, rev):
  219. if rev in self.commits:
  220. return self.commits[rev]
  221. return self.base.getcommit(rev)
  222. def _discard(self, *revs):
  223. for r in revs:
  224. if r is None:
  225. continue
  226. self.seenchildren[r] = self.seenchildren.get(r, 0) + 1
  227. if self.seenchildren[r] == self.children[r]:
  228. self.wantedancestors.pop(r, None)
  229. self.parentmap.pop(r, None)
  230. del self.seenchildren[r]
  231. if self._rebuilt:
  232. del self.children[r]
  233. def wanted(self, rev, i):
  234. # Return True if we're directly interested in rev.
  235. #
  236. # i is an index selecting one of the parents of rev (if rev
  237. # has no parents, i is None). getchangedfiles will give us
  238. # the list of files that are different in rev and in the parent
  239. # indicated by i. If we're interested in any of these files,
  240. # we're interested in rev.
  241. try:
  242. files = self.base.getchangedfiles(rev, i)
  243. except NotImplementedError:
  244. raise util.Abort(_("source repository doesn't support --filemap"))
  245. for f in files:
  246. if self.filemapper(f):
  247. return True
  248. return False
  249. def mark_not_wanted(self, rev, p):
  250. # Mark rev as not interesting and update data structures.
  251. if p is None:
  252. # A root revision. Use SKIPREV to indicate that it doesn't
  253. # map to any revision in the restricted graph. Put SKIPREV
  254. # in the set of wanted ancestors to simplify code elsewhere
  255. self.parentmap[rev] = SKIPREV
  256. self.wantedancestors[rev] = set((SKIPREV,))
  257. return
  258. # Reuse the data from our parent.
  259. self.parentmap[rev] = self.parentmap[p]
  260. self.wantedancestors[rev] = self.wantedancestors[p]
  261. def mark_wanted(self, rev, parents):
  262. # Mark rev ss wanted and update data structures.
  263. # rev will be in the restricted graph, so children of rev in
  264. # the original graph should still have rev as a parent in the
  265. # restricted graph.
  266. self.parentmap[rev] = rev
  267. # The set of wanted ancestors of rev is the union of the sets
  268. # of wanted ancestors of its parents. Plus rev itself.
  269. wrev = set()
  270. for p in parents:
  271. if p in self.wantedancestors:
  272. wrev.update(self.wantedancestors[p])
  273. else:
  274. self.ui.warn(_('warning: %s parent %s is missing\n') %
  275. (rev, p))
  276. wrev.add(rev)
  277. self.wantedancestors[rev] = wrev
  278. def getchanges(self, rev):
  279. parents = self.commits[rev].parents
  280. if len(parents) > 1:
  281. self.rebuild()
  282. # To decide whether we're interested in rev we:
  283. #
  284. # - calculate what parents rev will have if it turns out we're
  285. # interested in it. If it's going to have more than 1 parent,
  286. # we're interested in it.
  287. #
  288. # - otherwise, we'll compare it with the single parent we found.
  289. # If any of the files we're interested in is different in the
  290. # the two revisions, we're interested in rev.
  291. # A parent p is interesting if its mapped version (self.parentmap[p]):
  292. # - is not SKIPREV
  293. # - is still not in the list of parents (we don't want duplicates)
  294. # - is not an ancestor of the mapped versions of the other parents or
  295. # there is no parent in the same branch than the current revision.
  296. mparents = []
  297. knownparents = set()
  298. branch = self.commits[rev].branch
  299. hasbranchparent = False
  300. for i, p1 in enumerate(parents):
  301. mp1 = self.parentmap[p1]
  302. if mp1 == SKIPREV or mp1 in knownparents:
  303. continue
  304. isancestor = util.any(p2 for p2 in parents
  305. if p1 != p2 and mp1 != self.parentmap[p2]
  306. and mp1 in self.wantedancestors[p2])
  307. if not isancestor and not hasbranchparent and len(parents) > 1:
  308. # This could be expensive, avoid unnecessary calls.
  309. if self._cachedcommit(p1).branch == branch:
  310. hasbranchparent = True
  311. mparents.append((p1, mp1, i, isancestor))
  312. knownparents.add(mp1)
  313. # Discard parents ancestors of other parents if there is a
  314. # non-ancestor one on the same branch than current revision.
  315. if hasbranchparent:
  316. mparents = [p for p in mparents if not p[3]]
  317. wp = None
  318. if mparents:
  319. wp = max(p[2] for p in mparents)
  320. mparents = [p[1] for p in mparents]
  321. elif parents:
  322. wp = 0
  323. self.origparents[rev] = parents
  324. closed = False
  325. if 'close' in self.commits[rev].extra:
  326. # A branch closing revision is only useful if one of its
  327. # parents belong to the branch being closed
  328. pbranches = [self._cachedcommit(p).branch for p in mparents]
  329. if branch in pbranches:
  330. closed = True
  331. if len(mparents) < 2 and not closed and not self.wanted(rev, wp):
  332. # We don't want this revision.
  333. # Update our state and tell the convert process to map this
  334. # revision to the same revision its parent as mapped to.
  335. p = None
  336. if parents:
  337. p = parents[wp]
  338. self.mark_not_wanted(rev, p)
  339. self.convertedorder.append((rev, False, p))
  340. self._discard(*parents)
  341. return self.parentmap[rev]
  342. # We want this revision.
  343. # Rewrite the parents of the commit object
  344. self.commits[rev].parents = mparents
  345. self.mark_wanted(rev, parents)
  346. self.convertedorder.append((rev, True, None))
  347. self._discard(*parents)
  348. # Get the real changes and do the filtering/mapping. To be
  349. # able to get the files later on in getfile, we hide the
  350. # original filename in the rev part of the return value.
  351. changes, copies = self.base.getchanges(rev)
  352. files = {}
  353. for f, r in changes:
  354. newf = self.filemapper(f)
  355. if newf and (newf != f or newf not in files):
  356. files[newf] = (f, r)
  357. files = sorted(files.items())
  358. ncopies = {}
  359. for c in copies:
  360. newc = self.filemapper(c)
  361. if newc:
  362. newsource = self.filemapper(copies[c])
  363. if newsource:
  364. ncopies[newc] = newsource
  365. return files, ncopies
  366. def getfile(self, name, rev):
  367. realname, realrev = rev
  368. return self.base.getfile(realname, realrev)
  369. def gettags(self):
  370. return self.base.gettags()
  371. def hasnativeorder(self):
  372. return self.base.hasnativeorder()
  373. def lookuprev(self, rev):
  374. return self.base.lookuprev(rev)
  375. def getbookmarks(self):
  376. return self.base.getbookmarks()
  377. def converted(self, rev, sinkrev):
  378. self.base.converted(rev, sinkrev)