PageRenderTime 61ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/mercurial/match.py

https://bitbucket.org/mirror/mercurial/
Python | 416 lines | 397 code | 6 blank | 13 comment | 33 complexity | a34cdcad886d285b483b86431791f262 MD5 | raw file
Possible License(s): GPL-2.0
  1. # match.py - filename matching
  2. #
  3. # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
  4. #
  5. # This software may be used and distributed according to the terms of the
  6. # GNU General Public License version 2 or any later version.
  7. import re
  8. import util, pathutil
  9. from i18n import _
  10. def _rematcher(regex):
  11. '''compile the regexp with the best available regexp engine and return a
  12. matcher function'''
  13. m = util.compilere(regex)
  14. try:
  15. # slightly faster, provided by facebook's re2 bindings
  16. return m.test_match
  17. except AttributeError:
  18. return m.match
  19. def _expandsets(kindpats, ctx):
  20. '''Returns the kindpats list with the 'set' patterns expanded.'''
  21. fset = set()
  22. other = []
  23. for kind, pat in kindpats:
  24. if kind == 'set':
  25. if not ctx:
  26. raise util.Abort("fileset expression with no context")
  27. s = ctx.getfileset(pat)
  28. fset.update(s)
  29. continue
  30. other.append((kind, pat))
  31. return fset, other
  32. class match(object):
  33. def __init__(self, root, cwd, patterns, include=[], exclude=[],
  34. default='glob', exact=False, auditor=None, ctx=None):
  35. """build an object to match a set of file patterns
  36. arguments:
  37. root - the canonical root of the tree you're matching against
  38. cwd - the current working directory, if relevant
  39. patterns - patterns to find
  40. include - patterns to include (unless they are excluded)
  41. exclude - patterns to exclude (even if they are included)
  42. default - if a pattern in patterns has no explicit type, assume this one
  43. exact - patterns are actually filenames (include/exclude still apply)
  44. a pattern is one of:
  45. 'glob:<glob>' - a glob relative to cwd
  46. 're:<regexp>' - a regular expression
  47. 'path:<path>' - a path relative to repository root
  48. 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
  49. 'relpath:<path>' - a path relative to cwd
  50. 'relre:<regexp>' - a regexp that needn't match the start of a name
  51. 'set:<fileset>' - a fileset expression
  52. '<something>' - a pattern of the specified default type
  53. """
  54. self._root = root
  55. self._cwd = cwd
  56. self._files = [] # exact files and roots of patterns
  57. self._anypats = bool(include or exclude)
  58. self._ctx = ctx
  59. self._always = False
  60. if include:
  61. kindpats = _normalize(include, 'glob', root, cwd, auditor)
  62. self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
  63. if exclude:
  64. kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
  65. self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
  66. if exact:
  67. if isinstance(patterns, list):
  68. self._files = patterns
  69. else:
  70. self._files = list(patterns)
  71. pm = self.exact
  72. elif patterns:
  73. kindpats = _normalize(patterns, default, root, cwd, auditor)
  74. self._files = _roots(kindpats)
  75. self._anypats = self._anypats or _anypats(kindpats)
  76. self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
  77. if patterns or exact:
  78. if include:
  79. if exclude:
  80. m = lambda f: im(f) and not em(f) and pm(f)
  81. else:
  82. m = lambda f: im(f) and pm(f)
  83. else:
  84. if exclude:
  85. m = lambda f: not em(f) and pm(f)
  86. else:
  87. m = pm
  88. else:
  89. if include:
  90. if exclude:
  91. m = lambda f: im(f) and not em(f)
  92. else:
  93. m = im
  94. else:
  95. if exclude:
  96. m = lambda f: not em(f)
  97. else:
  98. m = lambda f: True
  99. self._always = True
  100. self.matchfn = m
  101. self._fmap = set(self._files)
  102. def __call__(self, fn):
  103. return self.matchfn(fn)
  104. def __iter__(self):
  105. for f in self._files:
  106. yield f
  107. # Callbacks related to how the matcher is used by dirstate.walk.
  108. # Subscribers to these events must monkeypatch the matcher object.
  109. def bad(self, f, msg):
  110. '''Callback from dirstate.walk for each explicit file that can't be
  111. found/accessed, with an error message.'''
  112. pass
  113. # If an explicitdir is set, it will be called when an explicitly listed
  114. # directory is visited.
  115. explicitdir = None
  116. # If an traversedir is set, it will be called when a directory discovered
  117. # by recursive traversal is visited.
  118. traversedir = None
  119. def rel(self, f):
  120. '''Convert repo path back to path that is relative to cwd of matcher.'''
  121. return util.pathto(self._root, self._cwd, f)
  122. def files(self):
  123. '''Explicitly listed files or patterns or roots:
  124. if no patterns or .always(): empty list,
  125. if exact: list exact files,
  126. if not .anypats(): list all files and dirs,
  127. else: optimal roots'''
  128. return self._files
  129. def exact(self, f):
  130. '''Returns True if f is in .files().'''
  131. return f in self._fmap
  132. def anypats(self):
  133. '''Matcher uses patterns or include/exclude.'''
  134. return self._anypats
  135. def always(self):
  136. '''Matcher will match everything and .files() will be empty
  137. - optimization might be possible and necessary.'''
  138. return self._always
  139. class exact(match):
  140. def __init__(self, root, cwd, files):
  141. match.__init__(self, root, cwd, files, exact=True)
  142. class always(match):
  143. def __init__(self, root, cwd):
  144. match.__init__(self, root, cwd, [])
  145. self._always = True
  146. class narrowmatcher(match):
  147. """Adapt a matcher to work on a subdirectory only.
  148. The paths are remapped to remove/insert the path as needed:
  149. >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
  150. >>> m2 = narrowmatcher('sub', m1)
  151. >>> bool(m2('a.txt'))
  152. False
  153. >>> bool(m2('b.txt'))
  154. True
  155. >>> bool(m2.matchfn('a.txt'))
  156. False
  157. >>> bool(m2.matchfn('b.txt'))
  158. True
  159. >>> m2.files()
  160. ['b.txt']
  161. >>> m2.exact('b.txt')
  162. True
  163. >>> m2.rel('b.txt')
  164. 'b.txt'
  165. >>> def bad(f, msg):
  166. ... print "%s: %s" % (f, msg)
  167. >>> m1.bad = bad
  168. >>> m2.bad('x.txt', 'No such file')
  169. sub/x.txt: No such file
  170. """
  171. def __init__(self, path, matcher):
  172. self._root = matcher._root
  173. self._cwd = matcher._cwd
  174. self._path = path
  175. self._matcher = matcher
  176. self._always = matcher._always
  177. self._files = [f[len(path) + 1:] for f in matcher._files
  178. if f.startswith(path + "/")]
  179. self._anypats = matcher._anypats
  180. self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
  181. self._fmap = set(self._files)
  182. def bad(self, f, msg):
  183. self._matcher.bad(self._path + "/" + f, msg)
  184. def patkind(pattern, default=None):
  185. '''If pattern is 'kind:pat' with a known kind, return kind.'''
  186. return _patsplit(pattern, default)[0]
  187. def _patsplit(pattern, default):
  188. """Split a string into the optional pattern kind prefix and the actual
  189. pattern."""
  190. if ':' in pattern:
  191. kind, pat = pattern.split(':', 1)
  192. if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
  193. 'listfile', 'listfile0', 'set'):
  194. return kind, pat
  195. return default, pattern
  196. def _globre(pat):
  197. r'''Convert an extended glob string to a regexp string.
  198. >>> print _globre(r'?')
  199. .
  200. >>> print _globre(r'*')
  201. [^/]*
  202. >>> print _globre(r'**')
  203. .*
  204. >>> print _globre(r'**/a')
  205. (?:.*/)?a
  206. >>> print _globre(r'a/**/b')
  207. a\/(?:.*/)?b
  208. >>> print _globre(r'[a*?!^][^b][!c]')
  209. [a*?!^][\^b][^c]
  210. >>> print _globre(r'{a,b}')
  211. (?:a|b)
  212. >>> print _globre(r'.\*\?')
  213. \.\*\?
  214. '''
  215. i, n = 0, len(pat)
  216. res = ''
  217. group = 0
  218. escape = re.escape
  219. def peek():
  220. return i < n and pat[i]
  221. while i < n:
  222. c = pat[i]
  223. i += 1
  224. if c not in '*?[{},\\':
  225. res += escape(c)
  226. elif c == '*':
  227. if peek() == '*':
  228. i += 1
  229. if peek() == '/':
  230. i += 1
  231. res += '(?:.*/)?'
  232. else:
  233. res += '.*'
  234. else:
  235. res += '[^/]*'
  236. elif c == '?':
  237. res += '.'
  238. elif c == '[':
  239. j = i
  240. if j < n and pat[j] in '!]':
  241. j += 1
  242. while j < n and pat[j] != ']':
  243. j += 1
  244. if j >= n:
  245. res += '\\['
  246. else:
  247. stuff = pat[i:j].replace('\\','\\\\')
  248. i = j + 1
  249. if stuff[0] == '!':
  250. stuff = '^' + stuff[1:]
  251. elif stuff[0] == '^':
  252. stuff = '\\' + stuff
  253. res = '%s[%s]' % (res, stuff)
  254. elif c == '{':
  255. group += 1
  256. res += '(?:'
  257. elif c == '}' and group:
  258. res += ')'
  259. group -= 1
  260. elif c == ',' and group:
  261. res += '|'
  262. elif c == '\\':
  263. p = peek()
  264. if p:
  265. i += 1
  266. res += escape(p)
  267. else:
  268. res += escape(c)
  269. else:
  270. res += escape(c)
  271. return res
  272. def _regex(kind, pat, globsuffix):
  273. '''Convert a (normalized) pattern of any kind into a regular expression.
  274. globsuffix is appended to the regexp of globs.'''
  275. if not pat:
  276. return ''
  277. if kind == 're':
  278. return pat
  279. if kind == 'path':
  280. return '^' + re.escape(pat) + '(?:/|$)'
  281. if kind == 'relglob':
  282. return '(?:|.*/)' + _globre(pat) + globsuffix
  283. if kind == 'relpath':
  284. return re.escape(pat) + '(?:/|$)'
  285. if kind == 'relre':
  286. if pat.startswith('^'):
  287. return pat
  288. return '.*' + pat
  289. return _globre(pat) + globsuffix
  290. def _buildmatch(ctx, kindpats, globsuffix):
  291. '''Return regexp string and a matcher function for kindpats.
  292. globsuffix is appended to the regexp of globs.'''
  293. fset, kindpats = _expandsets(kindpats, ctx)
  294. if not kindpats:
  295. return "", fset.__contains__
  296. regex, mf = _buildregexmatch(kindpats, globsuffix)
  297. if fset:
  298. return regex, lambda f: f in fset or mf(f)
  299. return regex, mf
  300. def _buildregexmatch(kindpats, globsuffix):
  301. """Build a match function from a list of kinds and kindpats,
  302. return regexp string and a matcher function."""
  303. try:
  304. regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
  305. for (k, p) in kindpats])
  306. if len(regex) > 20000:
  307. raise OverflowError
  308. return regex, _rematcher(regex)
  309. except OverflowError:
  310. # We're using a Python with a tiny regex engine and we
  311. # made it explode, so we'll divide the pattern list in two
  312. # until it works
  313. l = len(kindpats)
  314. if l < 2:
  315. raise
  316. regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
  317. regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
  318. return regex, lambda s: a(s) or b(s)
  319. except re.error:
  320. for k, p in kindpats:
  321. try:
  322. _rematcher('(?:%s)' % _regex(k, p, globsuffix))
  323. except re.error:
  324. raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
  325. raise util.Abort(_("invalid pattern"))
  326. def _normalize(patterns, default, root, cwd, auditor):
  327. '''Convert 'kind:pat' from the patterns list to tuples with kind and
  328. normalized and rooted patterns and with listfiles expanded.'''
  329. kindpats = []
  330. for kind, pat in [_patsplit(p, default) for p in patterns]:
  331. if kind in ('glob', 'relpath'):
  332. pat = pathutil.canonpath(root, cwd, pat, auditor)
  333. elif kind in ('relglob', 'path'):
  334. pat = util.normpath(pat)
  335. elif kind in ('listfile', 'listfile0'):
  336. try:
  337. files = util.readfile(pat)
  338. if kind == 'listfile0':
  339. files = files.split('\0')
  340. else:
  341. files = files.splitlines()
  342. files = [f for f in files if f]
  343. except EnvironmentError:
  344. raise util.Abort(_("unable to read file list (%s)") % pat)
  345. kindpats += _normalize(files, default, root, cwd, auditor)
  346. continue
  347. # else: re or relre - which cannot be normalized
  348. kindpats.append((kind, pat))
  349. return kindpats
  350. def _roots(kindpats):
  351. '''return roots and exact explicitly listed files from patterns
  352. >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
  353. ['g', 'g', '.']
  354. >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
  355. ['r', 'p/p', '.']
  356. >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
  357. ['.', '.', '.']
  358. '''
  359. r = []
  360. for kind, pat in kindpats:
  361. if kind == 'glob': # find the non-glob prefix
  362. root = []
  363. for p in pat.split('/'):
  364. if '[' in p or '{' in p or '*' in p or '?' in p:
  365. break
  366. root.append(p)
  367. r.append('/'.join(root) or '.')
  368. elif kind in ('relpath', 'path'):
  369. r.append(pat or '.')
  370. else: # relglob, re, relre
  371. r.append('.')
  372. return r
  373. def _anypats(kindpats):
  374. for kind, pat in kindpats:
  375. if kind in ('glob', 're', 'relglob', 'relre', 'set'):
  376. return True