/Demo/comparisons/regextest.py

http://unladen-swallow.googlecode.com/ · Python · 47 lines · 25 code · 7 blank · 15 comment · 7 complexity · b7b6a789e0204425892f3f1be3676294 MD5 · raw file

  1. #! /usr/bin/env python
  2. # 1) Regular Expressions Test
  3. #
  4. # Read a file of (extended per egrep) regular expressions (one per line),
  5. # and apply those to all files whose names are listed on the command line.
  6. # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns
  7. # against a five /etc/termcap files. Tests using more elaborate patters
  8. # would also be interesting. Your code should not break if given hundreds
  9. # of regular expressions or binary files to scan.
  10. # This implementation:
  11. # - combines all patterns into a single one using ( ... | ... | ... )
  12. # - reads patterns from stdin, scans files given as command line arguments
  13. # - produces output in the format <file>:<lineno>:<line>
  14. # - is only about 2.5 times as slow as egrep (though I couldn't run
  15. # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)
  16. import string
  17. import sys
  18. import re
  19. def main():
  20. pats = map(chomp, sys.stdin.readlines())
  21. bigpat = '(' + '|'.join(pats) + ')'
  22. prog = re.compile(bigpat)
  23. for file in sys.argv[1:]:
  24. try:
  25. fp = open(file, 'r')
  26. except IOError, msg:
  27. print "%s: %s" % (file, msg)
  28. continue
  29. lineno = 0
  30. while 1:
  31. line = fp.readline()
  32. if not line:
  33. break
  34. lineno = lineno + 1
  35. if prog.search(line):
  36. print "%s:%s:%s" % (file, lineno, line),
  37. def chomp(s):
  38. return s.rstrip('\n')
  39. if __name__ == '__main__':
  40. main()