/hgext/largefiles/lfcommands.py

https://bitbucket.org/mirror/mercurial/ · Python · 572 lines · 452 code · 49 blank · 71 comment · 86 complexity · c0c82aa035c53d024cf4a3fdb677eca7 MD5 · raw file

  1. # Copyright 2009-2010 Gregory P. Ward
  2. # Copyright 2009-2010 Intelerad Medical Systems Incorporated
  3. # Copyright 2010-2011 Fog Creek Software
  4. # Copyright 2010-2011 Unity Technologies
  5. #
  6. # This software may be used and distributed according to the terms of the
  7. # GNU General Public License version 2 or any later version.
  8. '''High-level command function for lfconvert, plus the cmdtable.'''
  9. import os, errno
  10. import shutil
  11. from mercurial import util, match as match_, hg, node, context, error, \
  12. cmdutil, scmutil, commands
  13. from mercurial.i18n import _
  14. from mercurial.lock import release
  15. import lfutil
  16. import basestore
  17. # -- Commands ----------------------------------------------------------
  18. cmdtable = {}
  19. command = cmdutil.command(cmdtable)
  20. @command('lfconvert',
  21. [('s', 'size', '',
  22. _('minimum size (MB) for files to be converted as largefiles'), 'SIZE'),
  23. ('', 'to-normal', False,
  24. _('convert from a largefiles repo to a normal repo')),
  25. ],
  26. _('hg lfconvert SOURCE DEST [FILE ...]'),
  27. norepo=True,
  28. inferrepo=True)
  29. def lfconvert(ui, src, dest, *pats, **opts):
  30. '''convert a normal repository to a largefiles repository
  31. Convert repository SOURCE to a new repository DEST, identical to
  32. SOURCE except that certain files will be converted as largefiles:
  33. specifically, any file that matches any PATTERN *or* whose size is
  34. above the minimum size threshold is converted as a largefile. The
  35. size used to determine whether or not to track a file as a
  36. largefile is the size of the first version of the file. The
  37. minimum size can be specified either with --size or in
  38. configuration as ``largefiles.size``.
  39. After running this command you will need to make sure that
  40. largefiles is enabled anywhere you intend to push the new
  41. repository.
  42. Use --to-normal to convert largefiles back to normal files; after
  43. this, the DEST repository can be used without largefiles at all.'''
  44. if opts['to_normal']:
  45. tolfile = False
  46. else:
  47. tolfile = True
  48. size = lfutil.getminsize(ui, True, opts.get('size'), default=None)
  49. if not hg.islocal(src):
  50. raise util.Abort(_('%s is not a local Mercurial repo') % src)
  51. if not hg.islocal(dest):
  52. raise util.Abort(_('%s is not a local Mercurial repo') % dest)
  53. rsrc = hg.repository(ui, src)
  54. ui.status(_('initializing destination %s\n') % dest)
  55. rdst = hg.repository(ui, dest, create=True)
  56. success = False
  57. dstwlock = dstlock = None
  58. try:
  59. # Lock destination to prevent modification while it is converted to.
  60. # Don't need to lock src because we are just reading from its history
  61. # which can't change.
  62. dstwlock = rdst.wlock()
  63. dstlock = rdst.lock()
  64. # Get a list of all changesets in the source. The easy way to do this
  65. # is to simply walk the changelog, using changelog.nodesbetween().
  66. # Take a look at mercurial/revlog.py:639 for more details.
  67. # Use a generator instead of a list to decrease memory usage
  68. ctxs = (rsrc[ctx] for ctx in rsrc.changelog.nodesbetween(None,
  69. rsrc.heads())[0])
  70. revmap = {node.nullid: node.nullid}
  71. if tolfile:
  72. lfiles = set()
  73. normalfiles = set()
  74. if not pats:
  75. pats = ui.configlist(lfutil.longname, 'patterns', default=[])
  76. if pats:
  77. matcher = match_.match(rsrc.root, '', list(pats))
  78. else:
  79. matcher = None
  80. lfiletohash = {}
  81. for ctx in ctxs:
  82. ui.progress(_('converting revisions'), ctx.rev(),
  83. unit=_('revision'), total=rsrc['tip'].rev())
  84. _lfconvert_addchangeset(rsrc, rdst, ctx, revmap,
  85. lfiles, normalfiles, matcher, size, lfiletohash)
  86. ui.progress(_('converting revisions'), None)
  87. if os.path.exists(rdst.wjoin(lfutil.shortname)):
  88. shutil.rmtree(rdst.wjoin(lfutil.shortname))
  89. for f in lfiletohash.keys():
  90. if os.path.isfile(rdst.wjoin(f)):
  91. os.unlink(rdst.wjoin(f))
  92. try:
  93. os.removedirs(os.path.dirname(rdst.wjoin(f)))
  94. except OSError:
  95. pass
  96. # If there were any files converted to largefiles, add largefiles
  97. # to the destination repository's requirements.
  98. if lfiles:
  99. rdst.requirements.add('largefiles')
  100. rdst._writerequirements()
  101. else:
  102. for ctx in ctxs:
  103. ui.progress(_('converting revisions'), ctx.rev(),
  104. unit=_('revision'), total=rsrc['tip'].rev())
  105. _addchangeset(ui, rsrc, rdst, ctx, revmap)
  106. ui.progress(_('converting revisions'), None)
  107. success = True
  108. finally:
  109. rdst.dirstate.clear()
  110. release(dstlock, dstwlock)
  111. if not success:
  112. # we failed, remove the new directory
  113. shutil.rmtree(rdst.root)
  114. def _addchangeset(ui, rsrc, rdst, ctx, revmap):
  115. # Convert src parents to dst parents
  116. parents = _convertparents(ctx, revmap)
  117. # Generate list of changed files
  118. files = _getchangedfiles(ctx, parents)
  119. def getfilectx(repo, memctx, f):
  120. if lfutil.standin(f) in files:
  121. # if the file isn't in the manifest then it was removed
  122. # or renamed, raise IOError to indicate this
  123. try:
  124. fctx = ctx.filectx(lfutil.standin(f))
  125. except error.LookupError:
  126. raise IOError
  127. renamed = fctx.renamed()
  128. if renamed:
  129. renamed = lfutil.splitstandin(renamed[0])
  130. hash = fctx.data().strip()
  131. path = lfutil.findfile(rsrc, hash)
  132. # If one file is missing, likely all files from this rev are
  133. if path is None:
  134. cachelfiles(ui, rsrc, ctx.node())
  135. path = lfutil.findfile(rsrc, hash)
  136. if path is None:
  137. raise util.Abort(
  138. _("missing largefile \'%s\' from revision %s")
  139. % (f, node.hex(ctx.node())))
  140. data = ''
  141. fd = None
  142. try:
  143. fd = open(path, 'rb')
  144. data = fd.read()
  145. finally:
  146. if fd:
  147. fd.close()
  148. return context.memfilectx(repo, f, data, 'l' in fctx.flags(),
  149. 'x' in fctx.flags(), renamed)
  150. else:
  151. return _getnormalcontext(repo, ctx, f, revmap)
  152. dstfiles = []
  153. for file in files:
  154. if lfutil.isstandin(file):
  155. dstfiles.append(lfutil.splitstandin(file))
  156. else:
  157. dstfiles.append(file)
  158. # Commit
  159. _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
  160. def _lfconvert_addchangeset(rsrc, rdst, ctx, revmap, lfiles, normalfiles,
  161. matcher, size, lfiletohash):
  162. # Convert src parents to dst parents
  163. parents = _convertparents(ctx, revmap)
  164. # Generate list of changed files
  165. files = _getchangedfiles(ctx, parents)
  166. dstfiles = []
  167. for f in files:
  168. if f not in lfiles and f not in normalfiles:
  169. islfile = _islfile(f, ctx, matcher, size)
  170. # If this file was renamed or copied then copy
  171. # the largefile-ness of its predecessor
  172. if f in ctx.manifest():
  173. fctx = ctx.filectx(f)
  174. renamed = fctx.renamed()
  175. renamedlfile = renamed and renamed[0] in lfiles
  176. islfile |= renamedlfile
  177. if 'l' in fctx.flags():
  178. if renamedlfile:
  179. raise util.Abort(
  180. _('renamed/copied largefile %s becomes symlink')
  181. % f)
  182. islfile = False
  183. if islfile:
  184. lfiles.add(f)
  185. else:
  186. normalfiles.add(f)
  187. if f in lfiles:
  188. dstfiles.append(lfutil.standin(f))
  189. # largefile in manifest if it has not been removed/renamed
  190. if f in ctx.manifest():
  191. fctx = ctx.filectx(f)
  192. if 'l' in fctx.flags():
  193. renamed = fctx.renamed()
  194. if renamed and renamed[0] in lfiles:
  195. raise util.Abort(_('largefile %s becomes symlink') % f)
  196. # largefile was modified, update standins
  197. m = util.sha1('')
  198. m.update(ctx[f].data())
  199. hash = m.hexdigest()
  200. if f not in lfiletohash or lfiletohash[f] != hash:
  201. rdst.wwrite(f, ctx[f].data(), ctx[f].flags())
  202. executable = 'x' in ctx[f].flags()
  203. lfutil.writestandin(rdst, lfutil.standin(f), hash,
  204. executable)
  205. lfiletohash[f] = hash
  206. else:
  207. # normal file
  208. dstfiles.append(f)
  209. def getfilectx(repo, memctx, f):
  210. if lfutil.isstandin(f):
  211. # if the file isn't in the manifest then it was removed
  212. # or renamed, raise IOError to indicate this
  213. srcfname = lfutil.splitstandin(f)
  214. try:
  215. fctx = ctx.filectx(srcfname)
  216. except error.LookupError:
  217. raise IOError
  218. renamed = fctx.renamed()
  219. if renamed:
  220. # standin is always a largefile because largefile-ness
  221. # doesn't change after rename or copy
  222. renamed = lfutil.standin(renamed[0])
  223. return context.memfilectx(repo, f, lfiletohash[srcfname] + '\n',
  224. 'l' in fctx.flags(), 'x' in fctx.flags(),
  225. renamed)
  226. else:
  227. return _getnormalcontext(repo, ctx, f, revmap)
  228. # Commit
  229. _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap)
  230. def _commitcontext(rdst, parents, ctx, dstfiles, getfilectx, revmap):
  231. mctx = context.memctx(rdst, parents, ctx.description(), dstfiles,
  232. getfilectx, ctx.user(), ctx.date(), ctx.extra())
  233. ret = rdst.commitctx(mctx)
  234. rdst.setparents(ret)
  235. revmap[ctx.node()] = rdst.changelog.tip()
  236. # Generate list of changed files
  237. def _getchangedfiles(ctx, parents):
  238. files = set(ctx.files())
  239. if node.nullid not in parents:
  240. mc = ctx.manifest()
  241. mp1 = ctx.parents()[0].manifest()
  242. mp2 = ctx.parents()[1].manifest()
  243. files |= (set(mp1) | set(mp2)) - set(mc)
  244. for f in mc:
  245. if mc[f] != mp1.get(f, None) or mc[f] != mp2.get(f, None):
  246. files.add(f)
  247. return files
  248. # Convert src parents to dst parents
  249. def _convertparents(ctx, revmap):
  250. parents = []
  251. for p in ctx.parents():
  252. parents.append(revmap[p.node()])
  253. while len(parents) < 2:
  254. parents.append(node.nullid)
  255. return parents
  256. # Get memfilectx for a normal file
  257. def _getnormalcontext(repo, ctx, f, revmap):
  258. try:
  259. fctx = ctx.filectx(f)
  260. except error.LookupError:
  261. raise IOError
  262. renamed = fctx.renamed()
  263. if renamed:
  264. renamed = renamed[0]
  265. data = fctx.data()
  266. if f == '.hgtags':
  267. data = _converttags (repo.ui, revmap, data)
  268. return context.memfilectx(repo, f, data, 'l' in fctx.flags(),
  269. 'x' in fctx.flags(), renamed)
  270. # Remap tag data using a revision map
  271. def _converttags(ui, revmap, data):
  272. newdata = []
  273. for line in data.splitlines():
  274. try:
  275. id, name = line.split(' ', 1)
  276. except ValueError:
  277. ui.warn(_('skipping incorrectly formatted tag %s\n')
  278. % line)
  279. continue
  280. try:
  281. newid = node.bin(id)
  282. except TypeError:
  283. ui.warn(_('skipping incorrectly formatted id %s\n')
  284. % id)
  285. continue
  286. try:
  287. newdata.append('%s %s\n' % (node.hex(revmap[newid]),
  288. name))
  289. except KeyError:
  290. ui.warn(_('no mapping for id %s\n') % id)
  291. continue
  292. return ''.join(newdata)
  293. def _islfile(file, ctx, matcher, size):
  294. '''Return true if file should be considered a largefile, i.e.
  295. matcher matches it or it is larger than size.'''
  296. # never store special .hg* files as largefiles
  297. if file == '.hgtags' or file == '.hgignore' or file == '.hgsigs':
  298. return False
  299. if matcher and matcher(file):
  300. return True
  301. try:
  302. return ctx.filectx(file).size() >= size * 1024 * 1024
  303. except error.LookupError:
  304. return False
  305. def uploadlfiles(ui, rsrc, rdst, files):
  306. '''upload largefiles to the central store'''
  307. if not files:
  308. return
  309. store = basestore._openstore(rsrc, rdst, put=True)
  310. at = 0
  311. ui.debug("sending statlfile command for %d largefiles\n" % len(files))
  312. retval = store.exists(files)
  313. files = filter(lambda h: not retval[h], files)
  314. ui.debug("%d largefiles need to be uploaded\n" % len(files))
  315. for hash in files:
  316. ui.progress(_('uploading largefiles'), at, unit='largefile',
  317. total=len(files))
  318. source = lfutil.findfile(rsrc, hash)
  319. if not source:
  320. raise util.Abort(_('largefile %s missing from store'
  321. ' (needs to be uploaded)') % hash)
  322. # XXX check for errors here
  323. store.put(source, hash)
  324. at += 1
  325. ui.progress(_('uploading largefiles'), None)
  326. def verifylfiles(ui, repo, all=False, contents=False):
  327. '''Verify that every largefile revision in the current changeset
  328. exists in the central store. With --contents, also verify that
  329. the contents of each local largefile file revision are correct (SHA-1 hash
  330. matches the revision ID). With --all, check every changeset in
  331. this repository.'''
  332. if all:
  333. # Pass a list to the function rather than an iterator because we know a
  334. # list will work.
  335. revs = range(len(repo))
  336. else:
  337. revs = ['.']
  338. store = basestore._openstore(repo)
  339. return store.verify(revs, contents=contents)
  340. def cachelfiles(ui, repo, node, filelist=None):
  341. '''cachelfiles ensures that all largefiles needed by the specified revision
  342. are present in the repository's largefile cache.
  343. returns a tuple (cached, missing). cached is the list of files downloaded
  344. by this operation; missing is the list of files that were needed but could
  345. not be found.'''
  346. lfiles = lfutil.listlfiles(repo, node)
  347. if filelist:
  348. lfiles = set(lfiles) & set(filelist)
  349. toget = []
  350. for lfile in lfiles:
  351. try:
  352. expectedhash = repo[node][lfutil.standin(lfile)].data().strip()
  353. except IOError, err:
  354. if err.errno == errno.ENOENT:
  355. continue # node must be None and standin wasn't found in wctx
  356. raise
  357. if not lfutil.findfile(repo, expectedhash):
  358. toget.append((lfile, expectedhash))
  359. if toget:
  360. store = basestore._openstore(repo)
  361. ret = store.get(toget)
  362. return ret
  363. return ([], [])
  364. def downloadlfiles(ui, repo, rev=None):
  365. matchfn = scmutil.match(repo[None],
  366. [repo.wjoin(lfutil.shortname)], {})
  367. def prepare(ctx, fns):
  368. pass
  369. totalsuccess = 0
  370. totalmissing = 0
  371. if rev != []: # walkchangerevs on empty list would return all revs
  372. for ctx in cmdutil.walkchangerevs(repo, matchfn, {'rev' : rev},
  373. prepare):
  374. success, missing = cachelfiles(ui, repo, ctx.node())
  375. totalsuccess += len(success)
  376. totalmissing += len(missing)
  377. ui.status(_("%d additional largefiles cached\n") % totalsuccess)
  378. if totalmissing > 0:
  379. ui.status(_("%d largefiles failed to download\n") % totalmissing)
  380. return totalsuccess, totalmissing
  381. def updatelfiles(ui, repo, filelist=None, printmessage=True):
  382. wlock = repo.wlock()
  383. try:
  384. lfdirstate = lfutil.openlfdirstate(ui, repo)
  385. lfiles = set(lfutil.listlfiles(repo)) | set(lfdirstate)
  386. if filelist is not None:
  387. lfiles = [f for f in lfiles if f in filelist]
  388. update = {}
  389. updated, removed = 0, 0
  390. for lfile in lfiles:
  391. abslfile = repo.wjoin(lfile)
  392. absstandin = repo.wjoin(lfutil.standin(lfile))
  393. if os.path.exists(absstandin):
  394. if (os.path.exists(absstandin + '.orig') and
  395. os.path.exists(abslfile)):
  396. shutil.copyfile(abslfile, abslfile + '.orig')
  397. util.unlinkpath(absstandin + '.orig')
  398. expecthash = lfutil.readstandin(repo, lfile)
  399. if (expecthash != '' and
  400. (not os.path.exists(abslfile) or
  401. expecthash != lfutil.hashfile(abslfile))):
  402. if lfile not in repo[None]: # not switched to normal file
  403. util.unlinkpath(abslfile, ignoremissing=True)
  404. # use normallookup() to allocate entry in largefiles
  405. # dirstate, because lack of it misleads
  406. # lfilesrepo.status() into recognition that such cache
  407. # missing files are REMOVED.
  408. lfdirstate.normallookup(lfile)
  409. update[lfile] = expecthash
  410. else:
  411. # Remove lfiles for which the standin is deleted, unless the
  412. # lfile is added to the repository again. This happens when a
  413. # largefile is converted back to a normal file: the standin
  414. # disappears, but a new (normal) file appears as the lfile.
  415. if (os.path.exists(abslfile) and
  416. repo.dirstate.normalize(lfile) not in repo[None]):
  417. util.unlinkpath(abslfile)
  418. removed += 1
  419. # largefile processing might be slow and be interrupted - be prepared
  420. lfdirstate.write()
  421. if lfiles:
  422. if printmessage:
  423. ui.status(_('getting changed largefiles\n'))
  424. cachelfiles(ui, repo, None, lfiles)
  425. for lfile in lfiles:
  426. update1 = 0
  427. expecthash = update.get(lfile)
  428. if expecthash:
  429. if not lfutil.copyfromcache(repo, expecthash, lfile):
  430. # failed ... but already removed and set to normallookup
  431. continue
  432. # Synchronize largefile dirstate to the last modified
  433. # time of the file
  434. lfdirstate.normal(lfile)
  435. update1 = 1
  436. # copy the state of largefile standin from the repository's
  437. # dirstate to its state in the lfdirstate.
  438. abslfile = repo.wjoin(lfile)
  439. absstandin = repo.wjoin(lfutil.standin(lfile))
  440. if os.path.exists(absstandin):
  441. mode = os.stat(absstandin).st_mode
  442. if mode != os.stat(abslfile).st_mode:
  443. os.chmod(abslfile, mode)
  444. update1 = 1
  445. updated += update1
  446. state = repo.dirstate[lfutil.standin(lfile)]
  447. if state == 'n':
  448. # When rebasing, we need to synchronize the standin and the
  449. # largefile, because otherwise the largefile will get reverted.
  450. # But for commit's sake, we have to mark the file as unclean.
  451. if getattr(repo, "_isrebasing", False):
  452. lfdirstate.normallookup(lfile)
  453. else:
  454. lfdirstate.normal(lfile)
  455. elif state == 'r':
  456. lfdirstate.remove(lfile)
  457. elif state == 'a':
  458. lfdirstate.add(lfile)
  459. elif state == '?':
  460. lfdirstate.drop(lfile)
  461. lfdirstate.write()
  462. if printmessage and lfiles:
  463. ui.status(_('%d largefiles updated, %d removed\n') % (updated,
  464. removed))
  465. finally:
  466. wlock.release()
  467. @command('lfpull',
  468. [('r', 'rev', [], _('pull largefiles for these revisions'))
  469. ] + commands.remoteopts,
  470. _('-r REV... [-e CMD] [--remotecmd CMD] [SOURCE]'))
  471. def lfpull(ui, repo, source="default", **opts):
  472. """pull largefiles for the specified revisions from the specified source
  473. Pull largefiles that are referenced from local changesets but missing
  474. locally, pulling from a remote repository to the local cache.
  475. If SOURCE is omitted, the 'default' path will be used.
  476. See :hg:`help urls` for more information.
  477. .. container:: verbose
  478. Some examples:
  479. - pull largefiles for all branch heads::
  480. hg lfpull -r "head() and not closed()"
  481. - pull largefiles on the default branch::
  482. hg lfpull -r "branch(default)"
  483. """
  484. repo.lfpullsource = source
  485. revs = opts.get('rev', [])
  486. if not revs:
  487. raise util.Abort(_('no revisions specified'))
  488. revs = scmutil.revrange(repo, revs)
  489. numcached = 0
  490. for rev in revs:
  491. ui.note(_('pulling largefiles for revision %s\n') % rev)
  492. (cached, missing) = cachelfiles(ui, repo, rev)
  493. numcached += len(cached)
  494. ui.status(_("%d largefiles cached\n") % numcached)