/mercurial/match.py
Python | 416 lines | 397 code | 6 blank | 13 comment | 33 complexity | a34cdcad886d285b483b86431791f262 MD5 | raw file
Possible License(s): GPL-2.0
- # match.py - filename matching
- #
- # Copyright 2008, 2009 Matt Mackall <mpm@selenic.com> and others
- #
- # This software may be used and distributed according to the terms of the
- # GNU General Public License version 2 or any later version.
- import re
- import util, pathutil
- from i18n import _
- def _rematcher(regex):
- '''compile the regexp with the best available regexp engine and return a
- matcher function'''
- m = util.compilere(regex)
- try:
- # slightly faster, provided by facebook's re2 bindings
- return m.test_match
- except AttributeError:
- return m.match
- def _expandsets(kindpats, ctx):
- '''Returns the kindpats list with the 'set' patterns expanded.'''
- fset = set()
- other = []
- for kind, pat in kindpats:
- if kind == 'set':
- if not ctx:
- raise util.Abort("fileset expression with no context")
- s = ctx.getfileset(pat)
- fset.update(s)
- continue
- other.append((kind, pat))
- return fset, other
- class match(object):
- def __init__(self, root, cwd, patterns, include=[], exclude=[],
- default='glob', exact=False, auditor=None, ctx=None):
- """build an object to match a set of file patterns
- arguments:
- root - the canonical root of the tree you're matching against
- cwd - the current working directory, if relevant
- patterns - patterns to find
- include - patterns to include (unless they are excluded)
- exclude - patterns to exclude (even if they are included)
- default - if a pattern in patterns has no explicit type, assume this one
- exact - patterns are actually filenames (include/exclude still apply)
- a pattern is one of:
- 'glob:<glob>' - a glob relative to cwd
- 're:<regexp>' - a regular expression
- 'path:<path>' - a path relative to repository root
- 'relglob:<glob>' - an unrooted glob (*.c matches C files in all dirs)
- 'relpath:<path>' - a path relative to cwd
- 'relre:<regexp>' - a regexp that needn't match the start of a name
- 'set:<fileset>' - a fileset expression
- '<something>' - a pattern of the specified default type
- """
- self._root = root
- self._cwd = cwd
- self._files = [] # exact files and roots of patterns
- self._anypats = bool(include or exclude)
- self._ctx = ctx
- self._always = False
- if include:
- kindpats = _normalize(include, 'glob', root, cwd, auditor)
- self.includepat, im = _buildmatch(ctx, kindpats, '(?:/|$)')
- if exclude:
- kindpats = _normalize(exclude, 'glob', root, cwd, auditor)
- self.excludepat, em = _buildmatch(ctx, kindpats, '(?:/|$)')
- if exact:
- if isinstance(patterns, list):
- self._files = patterns
- else:
- self._files = list(patterns)
- pm = self.exact
- elif patterns:
- kindpats = _normalize(patterns, default, root, cwd, auditor)
- self._files = _roots(kindpats)
- self._anypats = self._anypats or _anypats(kindpats)
- self.patternspat, pm = _buildmatch(ctx, kindpats, '$')
- if patterns or exact:
- if include:
- if exclude:
- m = lambda f: im(f) and not em(f) and pm(f)
- else:
- m = lambda f: im(f) and pm(f)
- else:
- if exclude:
- m = lambda f: not em(f) and pm(f)
- else:
- m = pm
- else:
- if include:
- if exclude:
- m = lambda f: im(f) and not em(f)
- else:
- m = im
- else:
- if exclude:
- m = lambda f: not em(f)
- else:
- m = lambda f: True
- self._always = True
- self.matchfn = m
- self._fmap = set(self._files)
- def __call__(self, fn):
- return self.matchfn(fn)
- def __iter__(self):
- for f in self._files:
- yield f
- # Callbacks related to how the matcher is used by dirstate.walk.
- # Subscribers to these events must monkeypatch the matcher object.
- def bad(self, f, msg):
- '''Callback from dirstate.walk for each explicit file that can't be
- found/accessed, with an error message.'''
- pass
- # If an explicitdir is set, it will be called when an explicitly listed
- # directory is visited.
- explicitdir = None
- # If an traversedir is set, it will be called when a directory discovered
- # by recursive traversal is visited.
- traversedir = None
- def rel(self, f):
- '''Convert repo path back to path that is relative to cwd of matcher.'''
- return util.pathto(self._root, self._cwd, f)
- def files(self):
- '''Explicitly listed files or patterns or roots:
- if no patterns or .always(): empty list,
- if exact: list exact files,
- if not .anypats(): list all files and dirs,
- else: optimal roots'''
- return self._files
- def exact(self, f):
- '''Returns True if f is in .files().'''
- return f in self._fmap
- def anypats(self):
- '''Matcher uses patterns or include/exclude.'''
- return self._anypats
- def always(self):
- '''Matcher will match everything and .files() will be empty
- - optimization might be possible and necessary.'''
- return self._always
- class exact(match):
- def __init__(self, root, cwd, files):
- match.__init__(self, root, cwd, files, exact=True)
- class always(match):
- def __init__(self, root, cwd):
- match.__init__(self, root, cwd, [])
- self._always = True
- class narrowmatcher(match):
- """Adapt a matcher to work on a subdirectory only.
- The paths are remapped to remove/insert the path as needed:
- >>> m1 = match('root', '', ['a.txt', 'sub/b.txt'])
- >>> m2 = narrowmatcher('sub', m1)
- >>> bool(m2('a.txt'))
- False
- >>> bool(m2('b.txt'))
- True
- >>> bool(m2.matchfn('a.txt'))
- False
- >>> bool(m2.matchfn('b.txt'))
- True
- >>> m2.files()
- ['b.txt']
- >>> m2.exact('b.txt')
- True
- >>> m2.rel('b.txt')
- 'b.txt'
- >>> def bad(f, msg):
- ... print "%s: %s" % (f, msg)
- >>> m1.bad = bad
- >>> m2.bad('x.txt', 'No such file')
- sub/x.txt: No such file
- """
- def __init__(self, path, matcher):
- self._root = matcher._root
- self._cwd = matcher._cwd
- self._path = path
- self._matcher = matcher
- self._always = matcher._always
- self._files = [f[len(path) + 1:] for f in matcher._files
- if f.startswith(path + "/")]
- self._anypats = matcher._anypats
- self.matchfn = lambda fn: matcher.matchfn(self._path + "/" + fn)
- self._fmap = set(self._files)
- def bad(self, f, msg):
- self._matcher.bad(self._path + "/" + f, msg)
- def patkind(pattern, default=None):
- '''If pattern is 'kind:pat' with a known kind, return kind.'''
- return _patsplit(pattern, default)[0]
- def _patsplit(pattern, default):
- """Split a string into the optional pattern kind prefix and the actual
- pattern."""
- if ':' in pattern:
- kind, pat = pattern.split(':', 1)
- if kind in ('re', 'glob', 'path', 'relglob', 'relpath', 'relre',
- 'listfile', 'listfile0', 'set'):
- return kind, pat
- return default, pattern
- def _globre(pat):
- r'''Convert an extended glob string to a regexp string.
- >>> print _globre(r'?')
- .
- >>> print _globre(r'*')
- [^/]*
- >>> print _globre(r'**')
- .*
- >>> print _globre(r'**/a')
- (?:.*/)?a
- >>> print _globre(r'a/**/b')
- a\/(?:.*/)?b
- >>> print _globre(r'[a*?!^][^b][!c]')
- [a*?!^][\^b][^c]
- >>> print _globre(r'{a,b}')
- (?:a|b)
- >>> print _globre(r'.\*\?')
- \.\*\?
- '''
- i, n = 0, len(pat)
- res = ''
- group = 0
- escape = re.escape
- def peek():
- return i < n and pat[i]
- while i < n:
- c = pat[i]
- i += 1
- if c not in '*?[{},\\':
- res += escape(c)
- elif c == '*':
- if peek() == '*':
- i += 1
- if peek() == '/':
- i += 1
- res += '(?:.*/)?'
- else:
- res += '.*'
- else:
- res += '[^/]*'
- elif c == '?':
- res += '.'
- elif c == '[':
- j = i
- if j < n and pat[j] in '!]':
- j += 1
- while j < n and pat[j] != ']':
- j += 1
- if j >= n:
- res += '\\['
- else:
- stuff = pat[i:j].replace('\\','\\\\')
- i = j + 1
- if stuff[0] == '!':
- stuff = '^' + stuff[1:]
- elif stuff[0] == '^':
- stuff = '\\' + stuff
- res = '%s[%s]' % (res, stuff)
- elif c == '{':
- group += 1
- res += '(?:'
- elif c == '}' and group:
- res += ')'
- group -= 1
- elif c == ',' and group:
- res += '|'
- elif c == '\\':
- p = peek()
- if p:
- i += 1
- res += escape(p)
- else:
- res += escape(c)
- else:
- res += escape(c)
- return res
- def _regex(kind, pat, globsuffix):
- '''Convert a (normalized) pattern of any kind into a regular expression.
- globsuffix is appended to the regexp of globs.'''
- if not pat:
- return ''
- if kind == 're':
- return pat
- if kind == 'path':
- return '^' + re.escape(pat) + '(?:/|$)'
- if kind == 'relglob':
- return '(?:|.*/)' + _globre(pat) + globsuffix
- if kind == 'relpath':
- return re.escape(pat) + '(?:/|$)'
- if kind == 'relre':
- if pat.startswith('^'):
- return pat
- return '.*' + pat
- return _globre(pat) + globsuffix
- def _buildmatch(ctx, kindpats, globsuffix):
- '''Return regexp string and a matcher function for kindpats.
- globsuffix is appended to the regexp of globs.'''
- fset, kindpats = _expandsets(kindpats, ctx)
- if not kindpats:
- return "", fset.__contains__
- regex, mf = _buildregexmatch(kindpats, globsuffix)
- if fset:
- return regex, lambda f: f in fset or mf(f)
- return regex, mf
- def _buildregexmatch(kindpats, globsuffix):
- """Build a match function from a list of kinds and kindpats,
- return regexp string and a matcher function."""
- try:
- regex = '(?:%s)' % '|'.join([_regex(k, p, globsuffix)
- for (k, p) in kindpats])
- if len(regex) > 20000:
- raise OverflowError
- return regex, _rematcher(regex)
- except OverflowError:
- # We're using a Python with a tiny regex engine and we
- # made it explode, so we'll divide the pattern list in two
- # until it works
- l = len(kindpats)
- if l < 2:
- raise
- regexa, a = _buildregexmatch(kindpats[:l//2], globsuffix)
- regexb, b = _buildregexmatch(kindpats[l//2:], globsuffix)
- return regex, lambda s: a(s) or b(s)
- except re.error:
- for k, p in kindpats:
- try:
- _rematcher('(?:%s)' % _regex(k, p, globsuffix))
- except re.error:
- raise util.Abort(_("invalid pattern (%s): %s") % (k, p))
- raise util.Abort(_("invalid pattern"))
- def _normalize(patterns, default, root, cwd, auditor):
- '''Convert 'kind:pat' from the patterns list to tuples with kind and
- normalized and rooted patterns and with listfiles expanded.'''
- kindpats = []
- for kind, pat in [_patsplit(p, default) for p in patterns]:
- if kind in ('glob', 'relpath'):
- pat = pathutil.canonpath(root, cwd, pat, auditor)
- elif kind in ('relglob', 'path'):
- pat = util.normpath(pat)
- elif kind in ('listfile', 'listfile0'):
- try:
- files = util.readfile(pat)
- if kind == 'listfile0':
- files = files.split('\0')
- else:
- files = files.splitlines()
- files = [f for f in files if f]
- except EnvironmentError:
- raise util.Abort(_("unable to read file list (%s)") % pat)
- kindpats += _normalize(files, default, root, cwd, auditor)
- continue
- # else: re or relre - which cannot be normalized
- kindpats.append((kind, pat))
- return kindpats
- def _roots(kindpats):
- '''return roots and exact explicitly listed files from patterns
- >>> _roots([('glob', 'g/*'), ('glob', 'g'), ('glob', 'g*')])
- ['g', 'g', '.']
- >>> _roots([('relpath', 'r'), ('path', 'p/p'), ('path', '')])
- ['r', 'p/p', '.']
- >>> _roots([('relglob', 'rg*'), ('re', 're/'), ('relre', 'rr')])
- ['.', '.', '.']
- '''
- r = []
- for kind, pat in kindpats:
- if kind == 'glob': # find the non-glob prefix
- root = []
- for p in pat.split('/'):
- if '[' in p or '{' in p or '*' in p or '?' in p:
- break
- root.append(p)
- r.append('/'.join(root) or '.')
- elif kind in ('relpath', 'path'):
- r.append(pat or '.')
- else: # relglob, re, relre
- r.append('.')
- return r
- def _anypats(kindpats):
- for kind, pat in kindpats:
- if kind in ('glob', 're', 'relglob', 'relre', 'set'):
- return True