/Demo/comparisons/regextest.py
http://unladen-swallow.googlecode.com/ · Python · 47 lines · 25 code · 7 blank · 15 comment · 7 complexity · b7b6a789e0204425892f3f1be3676294 MD5 · raw file
- #! /usr/bin/env python
- # 1) Regular Expressions Test
- #
- # Read a file of (extended per egrep) regular expressions (one per line),
- # and apply those to all files whose names are listed on the command line.
- # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns
- # against a five /etc/termcap files. Tests using more elaborate patters
- # would also be interesting. Your code should not break if given hundreds
- # of regular expressions or binary files to scan.
- # This implementation:
- # - combines all patterns into a single one using ( ... | ... | ... )
- # - reads patterns from stdin, scans files given as command line arguments
- # - produces output in the format <file>:<lineno>:<line>
- # - is only about 2.5 times as slow as egrep (though I couldn't run
- # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)
- import string
- import sys
- import re
- def main():
- pats = map(chomp, sys.stdin.readlines())
- bigpat = '(' + '|'.join(pats) + ')'
- prog = re.compile(bigpat)
- for file in sys.argv[1:]:
- try:
- fp = open(file, 'r')
- except IOError, msg:
- print "%s: %s" % (file, msg)
- continue
- lineno = 0
- while 1:
- line = fp.readline()
- if not line:
- break
- lineno = lineno + 1
- if prog.search(line):
- print "%s:%s:%s" % (file, lineno, line),
- def chomp(s):
- return s.rstrip('\n')
- if __name__ == '__main__':
- main()