Guido van Rossum | f06ee5f | 1996-11-27 19:52:01 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
Guido van Rossum | 21bc15b | 1995-04-10 11:40:26 +0000 | [diff] [blame] | 2 | |
| 3 | # 1) Regular Expressions Test |
Andrew M. Kuchling | 946c53e | 2003-04-24 17:13:18 +0000 | [diff] [blame] | 4 | # |
| 5 | # Read a file of (extended per egrep) regular expressions (one per line), |
Guido van Rossum | 21bc15b | 1995-04-10 11:40:26 +0000 | [diff] [blame] | 6 | # and apply those to all files whose names are listed on the command line. |
| 7 | # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns |
| 8 | # against a five /etc/termcap files. Tests using more elaborate patters |
| 9 | # would also be interesting. Your code should not break if given hundreds |
Andrew M. Kuchling | 946c53e | 2003-04-24 17:13:18 +0000 | [diff] [blame] | 10 | # of regular expressions or binary files to scan. |
Guido van Rossum | 21bc15b | 1995-04-10 11:40:26 +0000 | [diff] [blame] | 11 | |
| 12 | # This implementation: |
| 13 | # - combines all patterns into a single one using ( ... | ... | ... ) |
| 14 | # - reads patterns from stdin, scans files given as command line arguments |
| 15 | # - produces output in the format <file>:<lineno>:<line> |
| 16 | # - is only about 2.5 times as slow as egrep (though I couldn't run |
| 17 | # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) |
| 18 | |
| 19 | import string |
| 20 | import sys |
Andrew M. Kuchling | 9b12d9d | 2003-04-24 17:22:04 +0000 | [diff] [blame] | 21 | import re |
Guido van Rossum | 21bc15b | 1995-04-10 11:40:26 +0000 | [diff] [blame] | 22 | |
| 23 | def main(): |
Andrew M. Kuchling | 946c53e | 2003-04-24 17:13:18 +0000 | [diff] [blame] | 24 | pats = map(chomp, sys.stdin.readlines()) |
Andrew M. Kuchling | 9b12d9d | 2003-04-24 17:22:04 +0000 | [diff] [blame] | 25 | bigpat = '(' + '|'.join(pats) + ')' |
| 26 | prog = re.compile(bigpat) |
Andrew M. Kuchling | 946c53e | 2003-04-24 17:13:18 +0000 | [diff] [blame] | 27 | |
| 28 | for file in sys.argv[1:]: |
| 29 | try: |
| 30 | fp = open(file, 'r') |
| 31 | except IOError, msg: |
| 32 | print "%s: %s" % (file, msg) |
| 33 | continue |
| 34 | lineno = 0 |
| 35 | while 1: |
| 36 | line = fp.readline() |
| 37 | if not line: |
| 38 | break |
| 39 | lineno = lineno + 1 |
Andrew M. Kuchling | 9b12d9d | 2003-04-24 17:22:04 +0000 | [diff] [blame] | 40 | if prog.search(line): |
Andrew M. Kuchling | 946c53e | 2003-04-24 17:13:18 +0000 | [diff] [blame] | 41 | print "%s:%s:%s" % (file, lineno, line), |
Guido van Rossum | 21bc15b | 1995-04-10 11:40:26 +0000 | [diff] [blame] | 42 | |
| 43 | def chomp(s): |
Andrew M. Kuchling | 9b12d9d | 2003-04-24 17:22:04 +0000 | [diff] [blame] | 44 | return s.rstrip('\n') |
Guido van Rossum | 21bc15b | 1995-04-10 11:40:26 +0000 | [diff] [blame] | 45 | |
Neal Norwitz | 286d747 | 2005-11-09 07:07:58 +0000 | [diff] [blame] | 46 | if __name__ == '__main__': |
| 47 | main() |