PageRenderTime 46ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 0ms

/mercurial/fileset.py

https://bitbucket.org/mirror/mercurial/
Python | 507 lines | 496 code | 5 blank | 6 comment | 10 complexity | bcc9ae4f045467b4b77a017463f467c2 MD5 | raw file
Possible License(s): GPL-2.0
  1. # fileset.py - file set queries for mercurial
  2. #
  3. # Copyright 2010 Matt Mackall <mpm@selenic.com>
  4. #
  5. # This software may be used and distributed according to the terms of the
  6. # GNU General Public License version 2 or any later version.
  7. import re
  8. import parser, error, util, merge
  9. from i18n import _
  10. elements = {
  11. "(": (20, ("group", 1, ")"), ("func", 1, ")")),
  12. "-": (5, ("negate", 19), ("minus", 5)),
  13. "not": (10, ("not", 10)),
  14. "!": (10, ("not", 10)),
  15. "and": (5, None, ("and", 5)),
  16. "&": (5, None, ("and", 5)),
  17. "or": (4, None, ("or", 4)),
  18. "|": (4, None, ("or", 4)),
  19. "+": (4, None, ("or", 4)),
  20. ",": (2, None, ("list", 2)),
  21. ")": (0, None, None),
  22. "symbol": (0, ("symbol",), None),
  23. "string": (0, ("string",), None),
  24. "end": (0, None, None),
  25. }
  26. keywords = set(['and', 'or', 'not'])
  27. globchars = ".*{}[]?/\\_"
  28. def tokenize(program):
  29. pos, l = 0, len(program)
  30. while pos < l:
  31. c = program[pos]
  32. if c.isspace(): # skip inter-token whitespace
  33. pass
  34. elif c in "(),-|&+!": # handle simple operators
  35. yield (c, None, pos)
  36. elif (c in '"\'' or c == 'r' and
  37. program[pos:pos + 2] in ("r'", 'r"')): # handle quoted strings
  38. if c == 'r':
  39. pos += 1
  40. c = program[pos]
  41. decode = lambda x: x
  42. else:
  43. decode = lambda x: x.decode('string-escape')
  44. pos += 1
  45. s = pos
  46. while pos < l: # find closing quote
  47. d = program[pos]
  48. if d == '\\': # skip over escaped characters
  49. pos += 2
  50. continue
  51. if d == c:
  52. yield ('string', decode(program[s:pos]), s)
  53. break
  54. pos += 1
  55. else:
  56. raise error.ParseError(_("unterminated string"), s)
  57. elif c.isalnum() or c in globchars or ord(c) > 127:
  58. # gather up a symbol/keyword
  59. s = pos
  60. pos += 1
  61. while pos < l: # find end of symbol
  62. d = program[pos]
  63. if not (d.isalnum() or d in globchars or ord(d) > 127):
  64. break
  65. pos += 1
  66. sym = program[s:pos]
  67. if sym in keywords: # operator keywords
  68. yield (sym, None, s)
  69. else:
  70. yield ('symbol', sym, s)
  71. pos -= 1
  72. else:
  73. raise error.ParseError(_("syntax error"), pos)
  74. pos += 1
  75. yield ('end', None, pos)
  76. def parse(expr):
  77. p = parser.parser(tokenize, elements)
  78. return p.parse(expr)
  79. def getstring(x, err):
  80. if x and (x[0] == 'string' or x[0] == 'symbol'):
  81. return x[1]
  82. raise error.ParseError(err)
  83. def getset(mctx, x):
  84. if not x:
  85. raise error.ParseError(_("missing argument"))
  86. return methods[x[0]](mctx, *x[1:])
  87. def stringset(mctx, x):
  88. m = mctx.matcher([x])
  89. return [f for f in mctx.subset if m(f)]
  90. def andset(mctx, x, y):
  91. return getset(mctx.narrow(getset(mctx, x)), y)
  92. def orset(mctx, x, y):
  93. # needs optimizing
  94. xl = getset(mctx, x)
  95. yl = getset(mctx, y)
  96. return xl + [f for f in yl if f not in xl]
  97. def notset(mctx, x):
  98. s = set(getset(mctx, x))
  99. return [r for r in mctx.subset if r not in s]
  100. def minusset(mctx, x, y):
  101. xl = getset(mctx, x)
  102. yl = set(getset(mctx, y))
  103. return [f for f in xl if f not in yl]
  104. def listset(mctx, a, b):
  105. raise error.ParseError(_("can't use a list in this context"))
  106. def modified(mctx, x):
  107. """``modified()``
  108. File that is modified according to status.
  109. """
  110. # i18n: "modified" is a keyword
  111. getargs(x, 0, 0, _("modified takes no arguments"))
  112. s = mctx.status()[0]
  113. return [f for f in mctx.subset if f in s]
  114. def added(mctx, x):
  115. """``added()``
  116. File that is added according to status.
  117. """
  118. # i18n: "added" is a keyword
  119. getargs(x, 0, 0, _("added takes no arguments"))
  120. s = mctx.status()[1]
  121. return [f for f in mctx.subset if f in s]
  122. def removed(mctx, x):
  123. """``removed()``
  124. File that is removed according to status.
  125. """
  126. # i18n: "removed" is a keyword
  127. getargs(x, 0, 0, _("removed takes no arguments"))
  128. s = mctx.status()[2]
  129. return [f for f in mctx.subset if f in s]
  130. def deleted(mctx, x):
  131. """``deleted()``
  132. File that is deleted according to status.
  133. """
  134. # i18n: "deleted" is a keyword
  135. getargs(x, 0, 0, _("deleted takes no arguments"))
  136. s = mctx.status()[3]
  137. return [f for f in mctx.subset if f in s]
  138. def unknown(mctx, x):
  139. """``unknown()``
  140. File that is unknown according to status. These files will only be
  141. considered if this predicate is used.
  142. """
  143. # i18n: "unknown" is a keyword
  144. getargs(x, 0, 0, _("unknown takes no arguments"))
  145. s = mctx.status()[4]
  146. return [f for f in mctx.subset if f in s]
  147. def ignored(mctx, x):
  148. """``ignored()``
  149. File that is ignored according to status. These files will only be
  150. considered if this predicate is used.
  151. """
  152. # i18n: "ignored" is a keyword
  153. getargs(x, 0, 0, _("ignored takes no arguments"))
  154. s = mctx.status()[5]
  155. return [f for f in mctx.subset if f in s]
  156. def clean(mctx, x):
  157. """``clean()``
  158. File that is clean according to status.
  159. """
  160. # i18n: "clean" is a keyword
  161. getargs(x, 0, 0, _("clean takes no arguments"))
  162. s = mctx.status()[6]
  163. return [f for f in mctx.subset if f in s]
  164. def func(mctx, a, b):
  165. if a[0] == 'symbol' and a[1] in symbols:
  166. return symbols[a[1]](mctx, b)
  167. raise error.ParseError(_("not a function: %s") % a[1])
  168. def getlist(x):
  169. if not x:
  170. return []
  171. if x[0] == 'list':
  172. return getlist(x[1]) + [x[2]]
  173. return [x]
  174. def getargs(x, min, max, err):
  175. l = getlist(x)
  176. if len(l) < min or len(l) > max:
  177. raise error.ParseError(err)
  178. return l
  179. def binary(mctx, x):
  180. """``binary()``
  181. File that appears to be binary (contains NUL bytes).
  182. """
  183. # i18n: "binary" is a keyword
  184. getargs(x, 0, 0, _("binary takes no arguments"))
  185. return [f for f in mctx.existing() if util.binary(mctx.ctx[f].data())]
  186. def exec_(mctx, x):
  187. """``exec()``
  188. File that is marked as executable.
  189. """
  190. # i18n: "exec" is a keyword
  191. getargs(x, 0, 0, _("exec takes no arguments"))
  192. return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'x']
  193. def symlink(mctx, x):
  194. """``symlink()``
  195. File that is marked as a symlink.
  196. """
  197. # i18n: "symlink" is a keyword
  198. getargs(x, 0, 0, _("symlink takes no arguments"))
  199. return [f for f in mctx.existing() if mctx.ctx.flags(f) == 'l']
  200. def resolved(mctx, x):
  201. """``resolved()``
  202. File that is marked resolved according to the resolve state.
  203. """
  204. # i18n: "resolved" is a keyword
  205. getargs(x, 0, 0, _("resolved takes no arguments"))
  206. if mctx.ctx.rev() is not None:
  207. return []
  208. ms = merge.mergestate(mctx.ctx._repo)
  209. return [f for f in mctx.subset if f in ms and ms[f] == 'r']
  210. def unresolved(mctx, x):
  211. """``unresolved()``
  212. File that is marked unresolved according to the resolve state.
  213. """
  214. # i18n: "unresolved" is a keyword
  215. getargs(x, 0, 0, _("unresolved takes no arguments"))
  216. if mctx.ctx.rev() is not None:
  217. return []
  218. ms = merge.mergestate(mctx.ctx._repo)
  219. return [f for f in mctx.subset if f in ms and ms[f] == 'u']
  220. def hgignore(mctx, x):
  221. """``hgignore()``
  222. File that matches the active .hgignore pattern.
  223. """
  224. getargs(x, 0, 0, _("hgignore takes no arguments"))
  225. ignore = mctx.ctx._repo.dirstate._ignore
  226. return [f for f in mctx.subset if ignore(f)]
  227. def grep(mctx, x):
  228. """``grep(regex)``
  229. File contains the given regular expression.
  230. """
  231. try:
  232. # i18n: "grep" is a keyword
  233. r = re.compile(getstring(x, _("grep requires a pattern")))
  234. except re.error, e:
  235. raise error.ParseError(_('invalid match pattern: %s') % e)
  236. return [f for f in mctx.existing() if r.search(mctx.ctx[f].data())]
  237. def _sizetomax(s):
  238. try:
  239. s = s.strip()
  240. for k, v in util._sizeunits:
  241. if s.endswith(k):
  242. # max(4k) = 5k - 1, max(4.5k) = 4.6k - 1
  243. n = s[:-len(k)]
  244. inc = 1.0
  245. if "." in n:
  246. inc /= 10 ** len(n.split(".")[1])
  247. return int((float(n) + inc) * v) - 1
  248. # no extension, this is a precise value
  249. return int(s)
  250. except ValueError:
  251. raise error.ParseError(_("couldn't parse size: %s") % s)
  252. def size(mctx, x):
  253. """``size(expression)``
  254. File size matches the given expression. Examples:
  255. - 1k (files from 1024 to 2047 bytes)
  256. - < 20k (files less than 20480 bytes)
  257. - >= .5MB (files at least 524288 bytes)
  258. - 4k - 1MB (files from 4096 bytes to 1048576 bytes)
  259. """
  260. # i18n: "size" is a keyword
  261. expr = getstring(x, _("size requires an expression")).strip()
  262. if '-' in expr: # do we have a range?
  263. a, b = expr.split('-', 1)
  264. a = util.sizetoint(a)
  265. b = util.sizetoint(b)
  266. m = lambda x: x >= a and x <= b
  267. elif expr.startswith("<="):
  268. a = util.sizetoint(expr[2:])
  269. m = lambda x: x <= a
  270. elif expr.startswith("<"):
  271. a = util.sizetoint(expr[1:])
  272. m = lambda x: x < a
  273. elif expr.startswith(">="):
  274. a = util.sizetoint(expr[2:])
  275. m = lambda x: x >= a
  276. elif expr.startswith(">"):
  277. a = util.sizetoint(expr[1:])
  278. m = lambda x: x > a
  279. elif expr[0].isdigit or expr[0] == '.':
  280. a = util.sizetoint(expr)
  281. b = _sizetomax(expr)
  282. m = lambda x: x >= a and x <= b
  283. else:
  284. raise error.ParseError(_("couldn't parse size: %s") % expr)
  285. return [f for f in mctx.existing() if m(mctx.ctx[f].size())]
  286. def encoding(mctx, x):
  287. """``encoding(name)``
  288. File can be successfully decoded with the given character
  289. encoding. May not be useful for encodings other than ASCII and
  290. UTF-8.
  291. """
  292. # i18n: "encoding" is a keyword
  293. enc = getstring(x, _("encoding requires an encoding name"))
  294. s = []
  295. for f in mctx.existing():
  296. d = mctx.ctx[f].data()
  297. try:
  298. d.decode(enc)
  299. except LookupError:
  300. raise util.Abort(_("unknown encoding '%s'") % enc)
  301. except UnicodeDecodeError:
  302. continue
  303. s.append(f)
  304. return s
  305. def eol(mctx, x):
  306. """``eol(style)``
  307. File contains newlines of the given style (dos, unix, mac). Binary
  308. files are excluded, files with mixed line endings match multiple
  309. styles.
  310. """
  311. # i18n: "encoding" is a keyword
  312. enc = getstring(x, _("encoding requires an encoding name"))
  313. s = []
  314. for f in mctx.existing():
  315. d = mctx.ctx[f].data()
  316. if util.binary(d):
  317. continue
  318. if (enc == 'dos' or enc == 'win') and '\r\n' in d:
  319. s.append(f)
  320. elif enc == 'unix' and re.search('(?<!\r)\n', d):
  321. s.append(f)
  322. elif enc == 'mac' and re.search('\r(?!\n)', d):
  323. s.append(f)
  324. return s
  325. def copied(mctx, x):
  326. """``copied()``
  327. File that is recorded as being copied.
  328. """
  329. # i18n: "copied" is a keyword
  330. getargs(x, 0, 0, _("copied takes no arguments"))
  331. s = []
  332. for f in mctx.subset:
  333. p = mctx.ctx[f].parents()
  334. if p and p[0].path() != f:
  335. s.append(f)
  336. return s
  337. def subrepo(mctx, x):
  338. """``subrepo([pattern])``
  339. Subrepositories whose paths match the given pattern.
  340. """
  341. # i18n: "subrepo" is a keyword
  342. getargs(x, 0, 1, _("subrepo takes at most one argument"))
  343. ctx = mctx.ctx
  344. sstate = sorted(ctx.substate)
  345. if x:
  346. pat = getstring(x, _("subrepo requires a pattern or no arguments"))
  347. import match as matchmod # avoid circular import issues
  348. fast = not matchmod.patkind(pat)
  349. if fast:
  350. def m(s):
  351. return (s == pat)
  352. else:
  353. m = matchmod.match(ctx._repo.root, '', [pat], ctx=ctx)
  354. return [sub for sub in sstate if m(sub)]
  355. else:
  356. return [sub for sub in sstate]
  357. symbols = {
  358. 'added': added,
  359. 'binary': binary,
  360. 'clean': clean,
  361. 'copied': copied,
  362. 'deleted': deleted,
  363. 'encoding': encoding,
  364. 'eol': eol,
  365. 'exec': exec_,
  366. 'grep': grep,
  367. 'ignored': ignored,
  368. 'hgignore': hgignore,
  369. 'modified': modified,
  370. 'removed': removed,
  371. 'resolved': resolved,
  372. 'size': size,
  373. 'symlink': symlink,
  374. 'unknown': unknown,
  375. 'unresolved': unresolved,
  376. 'subrepo': subrepo,
  377. }
  378. methods = {
  379. 'string': stringset,
  380. 'symbol': stringset,
  381. 'and': andset,
  382. 'or': orset,
  383. 'minus': minusset,
  384. 'list': listset,
  385. 'group': getset,
  386. 'not': notset,
  387. 'func': func,
  388. }
  389. class matchctx(object):
  390. def __init__(self, ctx, subset=None, status=None):
  391. self.ctx = ctx
  392. self.subset = subset
  393. self._status = status
  394. def status(self):
  395. return self._status
  396. def matcher(self, patterns):
  397. return self.ctx.match(patterns)
  398. def filter(self, files):
  399. return [f for f in files if f in self.subset]
  400. def existing(self):
  401. if self._status is not None:
  402. removed = set(self._status[3])
  403. unknown = set(self._status[4] + self._status[5])
  404. else:
  405. removed = set()
  406. unknown = set()
  407. return (f for f in self.subset
  408. if (f in self.ctx and f not in removed) or f in unknown)
  409. def narrow(self, files):
  410. return matchctx(self.ctx, self.filter(files), self._status)
  411. def _intree(funcs, tree):
  412. if isinstance(tree, tuple):
  413. if tree[0] == 'func' and tree[1][0] == 'symbol':
  414. if tree[1][1] in funcs:
  415. return True
  416. for s in tree[1:]:
  417. if _intree(funcs, s):
  418. return True
  419. return False
  420. # filesets using matchctx.existing()
  421. _existingcallers = [
  422. 'binary',
  423. 'exec',
  424. 'grep',
  425. 'size',
  426. 'symlink',
  427. ]
  428. def getfileset(ctx, expr):
  429. tree, pos = parse(expr)
  430. if (pos != len(expr)):
  431. raise error.ParseError(_("invalid token"), pos)
  432. # do we need status info?
  433. if (_intree(['modified', 'added', 'removed', 'deleted',
  434. 'unknown', 'ignored', 'clean'], tree) or
  435. # Using matchctx.existing() on a workingctx requires us to check
  436. # for deleted files.
  437. (ctx.rev() is None and _intree(_existingcallers, tree))):
  438. unknown = _intree(['unknown'], tree)
  439. ignored = _intree(['ignored'], tree)
  440. r = ctx._repo
  441. status = r.status(ctx.p1(), ctx,
  442. unknown=unknown, ignored=ignored, clean=True)
  443. subset = []
  444. for c in status:
  445. subset.extend(c)
  446. else:
  447. status = None
  448. subset = list(ctx.walk(ctx.match([])))
  449. return getset(matchctx(ctx, subset, status), tree)
  450. # tell hggettext to extract docstrings from these functions:
  451. i18nfunctions = symbols.values()