PageRenderTime 49ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/mercurial/verify.py

https://bitbucket.org/mirror/mercurial/
Python | 318 lines | 265 code | 38 blank | 15 comment | 110 complexity | 18308da2c21909797469e223161979bd MD5 | raw file
Possible License(s): GPL-2.0
  1. # verify.py - repository integrity checking for Mercurial
  2. #
  3. # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
  4. #
  5. # This software may be used and distributed according to the terms of the
  6. # GNU General Public License version 2 or any later version.
  7. from node import nullid, short
  8. from i18n import _
  9. import os
  10. import revlog, util, error
  11. def verify(repo):
  12. lock = repo.lock()
  13. try:
  14. return _verify(repo)
  15. finally:
  16. lock.release()
  17. def _normpath(f):
  18. # under hg < 2.4, convert didn't sanitize paths properly, so a
  19. # converted repo may contain repeated slashes
  20. while '//' in f:
  21. f = f.replace('//', '/')
  22. return f
  23. def _verify(repo):
  24. repo = repo.unfiltered()
  25. mflinkrevs = {}
  26. filelinkrevs = {}
  27. filenodes = {}
  28. revisions = 0
  29. badrevs = set()
  30. errors = [0]
  31. warnings = [0]
  32. ui = repo.ui
  33. cl = repo.changelog
  34. mf = repo.manifest
  35. lrugetctx = util.lrucachefunc(repo.changectx)
  36. if not repo.url().startswith('file:'):
  37. raise util.Abort(_("cannot verify bundle or remote repos"))
  38. def err(linkrev, msg, filename=None):
  39. if linkrev is not None:
  40. badrevs.add(linkrev)
  41. else:
  42. linkrev = '?'
  43. msg = "%s: %s" % (linkrev, msg)
  44. if filename:
  45. msg = "%s@%s" % (filename, msg)
  46. ui.warn(" " + msg + "\n")
  47. errors[0] += 1
  48. def exc(linkrev, msg, inst, filename=None):
  49. if isinstance(inst, KeyboardInterrupt):
  50. ui.warn(_("interrupted"))
  51. raise
  52. if not str(inst):
  53. inst = repr(inst)
  54. err(linkrev, "%s: %s" % (msg, inst), filename)
  55. def warn(msg):
  56. ui.warn(msg + "\n")
  57. warnings[0] += 1
  58. def checklog(obj, name, linkrev):
  59. if not len(obj) and (havecl or havemf):
  60. err(linkrev, _("empty or missing %s") % name)
  61. return
  62. d = obj.checksize()
  63. if d[0]:
  64. err(None, _("data length off by %d bytes") % d[0], name)
  65. if d[1]:
  66. err(None, _("index contains %d extra bytes") % d[1], name)
  67. if obj.version != revlog.REVLOGV0:
  68. if not revlogv1:
  69. warn(_("warning: `%s' uses revlog format 1") % name)
  70. elif revlogv1:
  71. warn(_("warning: `%s' uses revlog format 0") % name)
  72. def checkentry(obj, i, node, seen, linkrevs, f):
  73. lr = obj.linkrev(obj.rev(node))
  74. if lr < 0 or (havecl and lr not in linkrevs):
  75. if lr < 0 or lr >= len(cl):
  76. msg = _("rev %d points to nonexistent changeset %d")
  77. else:
  78. msg = _("rev %d points to unexpected changeset %d")
  79. err(None, msg % (i, lr), f)
  80. if linkrevs:
  81. if f and len(linkrevs) > 1:
  82. try:
  83. # attempt to filter down to real linkrevs
  84. linkrevs = [l for l in linkrevs
  85. if lrugetctx(l)[f].filenode() == node]
  86. except Exception:
  87. pass
  88. warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
  89. lr = None # can't be trusted
  90. try:
  91. p1, p2 = obj.parents(node)
  92. if p1 not in seen and p1 != nullid:
  93. err(lr, _("unknown parent 1 %s of %s") %
  94. (short(p1), short(node)), f)
  95. if p2 not in seen and p2 != nullid:
  96. err(lr, _("unknown parent 2 %s of %s") %
  97. (short(p2), short(node)), f)
  98. except Exception, inst:
  99. exc(lr, _("checking parents of %s") % short(node), inst, f)
  100. if node in seen:
  101. err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
  102. seen[node] = i
  103. return lr
  104. if os.path.exists(repo.sjoin("journal")):
  105. ui.warn(_("abandoned transaction found - run hg recover\n"))
  106. revlogv1 = cl.version != revlog.REVLOGV0
  107. if ui.verbose or not revlogv1:
  108. ui.status(_("repository uses revlog format %d\n") %
  109. (revlogv1 and 1 or 0))
  110. havecl = len(cl) > 0
  111. havemf = len(mf) > 0
  112. ui.status(_("checking changesets\n"))
  113. refersmf = False
  114. seen = {}
  115. checklog(cl, "changelog", 0)
  116. total = len(repo)
  117. for i in repo:
  118. ui.progress(_('checking'), i, total=total, unit=_('changesets'))
  119. n = cl.node(i)
  120. checkentry(cl, i, n, seen, [i], "changelog")
  121. try:
  122. changes = cl.read(n)
  123. if changes[0] != nullid:
  124. mflinkrevs.setdefault(changes[0], []).append(i)
  125. refersmf = True
  126. for f in changes[3]:
  127. filelinkrevs.setdefault(_normpath(f), []).append(i)
  128. except Exception, inst:
  129. refersmf = True
  130. exc(i, _("unpacking changeset %s") % short(n), inst)
  131. ui.progress(_('checking'), None)
  132. ui.status(_("checking manifests\n"))
  133. seen = {}
  134. if refersmf:
  135. # Do not check manifest if there are only changelog entries with
  136. # null manifests.
  137. checklog(mf, "manifest", 0)
  138. total = len(mf)
  139. for i in mf:
  140. ui.progress(_('checking'), i, total=total, unit=_('manifests'))
  141. n = mf.node(i)
  142. lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
  143. if n in mflinkrevs:
  144. del mflinkrevs[n]
  145. else:
  146. err(lr, _("%s not in changesets") % short(n), "manifest")
  147. try:
  148. for f, fn in mf.readdelta(n).iteritems():
  149. if not f:
  150. err(lr, _("file without name in manifest"))
  151. elif f != "/dev/null":
  152. filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
  153. except Exception, inst:
  154. exc(lr, _("reading manifest delta %s") % short(n), inst)
  155. ui.progress(_('checking'), None)
  156. ui.status(_("crosschecking files in changesets and manifests\n"))
  157. total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
  158. count = 0
  159. if havemf:
  160. for c, m in sorted([(c, m) for m in mflinkrevs
  161. for c in mflinkrevs[m]]):
  162. count += 1
  163. if m == nullid:
  164. continue
  165. ui.progress(_('crosschecking'), count, total=total)
  166. err(c, _("changeset refers to unknown manifest %s") % short(m))
  167. mflinkrevs = None # del is bad here due to scope issues
  168. for f in sorted(filelinkrevs):
  169. count += 1
  170. ui.progress(_('crosschecking'), count, total=total)
  171. if f not in filenodes:
  172. lr = filelinkrevs[f][0]
  173. err(lr, _("in changeset but not in manifest"), f)
  174. if havecl:
  175. for f in sorted(filenodes):
  176. count += 1
  177. ui.progress(_('crosschecking'), count, total=total)
  178. if f not in filelinkrevs:
  179. try:
  180. fl = repo.file(f)
  181. lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
  182. except Exception:
  183. lr = None
  184. err(lr, _("in manifest but not in changeset"), f)
  185. ui.progress(_('crosschecking'), None)
  186. ui.status(_("checking files\n"))
  187. storefiles = set()
  188. for f, f2, size in repo.store.datafiles():
  189. if not f:
  190. err(None, _("cannot decode filename '%s'") % f2)
  191. elif size > 0 or not revlogv1:
  192. storefiles.add(_normpath(f))
  193. files = sorted(set(filenodes) | set(filelinkrevs))
  194. total = len(files)
  195. for i, f in enumerate(files):
  196. ui.progress(_('checking'), i, item=f, total=total)
  197. try:
  198. linkrevs = filelinkrevs[f]
  199. except KeyError:
  200. # in manifest but not in changelog
  201. linkrevs = []
  202. if linkrevs:
  203. lr = linkrevs[0]
  204. else:
  205. lr = None
  206. try:
  207. fl = repo.file(f)
  208. except error.RevlogError, e:
  209. err(lr, _("broken revlog! (%s)") % e, f)
  210. continue
  211. for ff in fl.files():
  212. try:
  213. storefiles.remove(ff)
  214. except KeyError:
  215. err(lr, _("missing revlog!"), ff)
  216. checklog(fl, f, lr)
  217. seen = {}
  218. rp = None
  219. for i in fl:
  220. revisions += 1
  221. n = fl.node(i)
  222. lr = checkentry(fl, i, n, seen, linkrevs, f)
  223. if f in filenodes:
  224. if havemf and n not in filenodes[f]:
  225. err(lr, _("%s not in manifests") % (short(n)), f)
  226. else:
  227. del filenodes[f][n]
  228. # verify contents
  229. try:
  230. l = len(fl.read(n))
  231. rp = fl.renamed(n)
  232. if l != fl.size(i):
  233. if len(fl.revision(n)) != fl.size(i):
  234. err(lr, _("unpacked size is %s, %s expected") %
  235. (l, fl.size(i)), f)
  236. except Exception, inst:
  237. exc(lr, _("unpacking %s") % short(n), inst, f)
  238. # check renames
  239. try:
  240. if rp:
  241. if lr is not None and ui.verbose:
  242. ctx = lrugetctx(lr)
  243. found = False
  244. for pctx in ctx.parents():
  245. if rp[0] in pctx:
  246. found = True
  247. break
  248. if not found:
  249. warn(_("warning: copy source of '%s' not"
  250. " in parents of %s") % (f, ctx))
  251. fl2 = repo.file(rp[0])
  252. if not len(fl2):
  253. err(lr, _("empty or missing copy source revlog %s:%s")
  254. % (rp[0], short(rp[1])), f)
  255. elif rp[1] == nullid:
  256. ui.note(_("warning: %s@%s: copy source"
  257. " revision is nullid %s:%s\n")
  258. % (f, lr, rp[0], short(rp[1])))
  259. else:
  260. fl2.rev(rp[1])
  261. except Exception, inst:
  262. exc(lr, _("checking rename of %s") % short(n), inst, f)
  263. # cross-check
  264. if f in filenodes:
  265. fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
  266. for lr, node in sorted(fns):
  267. err(lr, _("%s in manifests not found") % short(node), f)
  268. ui.progress(_('checking'), None)
  269. for f in storefiles:
  270. warn(_("warning: orphan revlog '%s'") % f)
  271. ui.status(_("%d files, %d changesets, %d total revisions\n") %
  272. (len(files), len(cl), revisions))
  273. if warnings[0]:
  274. ui.warn(_("%d warnings encountered!\n") % warnings[0])
  275. if errors[0]:
  276. ui.warn(_("%d integrity errors encountered!\n") % errors[0])
  277. if badrevs:
  278. ui.warn(_("(first damaged changeset appears to be %d)\n")
  279. % min(badrevs))
  280. return 1