Guido van Rossum | f06ee5f | 1996-11-27 19:52:01 +0000 | [diff] [blame] | 1 | #! /usr/bin/env python |
Guido van Rossum | 21bc15b | 1995-04-10 11:40:26 +0000 | [diff] [blame] | 2 | |
| 3 | # 1) Regular Expressions Test |
| 4 | # |
| 5 | # Read a file of (extended per egrep) regular expressions (one per line), |
| 6 | # and apply those to all files whose names are listed on the command line. |
| 7 | # Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns |
| 8 | # against a five /etc/termcap files. Tests using more elaborate patters |
| 9 | # would also be interesting. Your code should not break if given hundreds |
| 10 | # of regular expressions or binary files to scan. |
| 11 | |
| 12 | # This implementation: |
| 13 | # - combines all patterns into a single one using ( ... | ... | ... ) |
| 14 | # - reads patterns from stdin, scans files given as command line arguments |
| 15 | # - produces output in the format <file>:<lineno>:<line> |
| 16 | # - is only about 2.5 times as slow as egrep (though I couldn't run |
| 17 | # Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) |
| 18 | |
| 19 | import string |
| 20 | import sys |
| 21 | import regex |
| 22 | from regex_syntax import * |
| 23 | |
| 24 | regex.set_syntax(RE_SYNTAX_EGREP) |
| 25 | |
| 26 | def main(): |
| 27 | pats = map(chomp, sys.stdin.readlines()) |
| 28 | bigpat = '(' + string.joinfields(pats, '|') + ')' |
| 29 | prog = regex.compile(bigpat) |
| 30 | |
| 31 | for file in sys.argv[1:]: |
| 32 | try: |
| 33 | fp = open(file, 'r') |
| 34 | except IOError, msg: |
| 35 | print "%s: %s" % (file, msg) |
| 36 | continue |
| 37 | lineno = 0 |
| 38 | while 1: |
| 39 | line = fp.readline() |
| 40 | if not line: |
| 41 | break |
| 42 | lineno = lineno + 1 |
| 43 | if prog.search(line) >= 0: |
| 44 | print "%s:%s:%s" % (file, lineno, line), |
| 45 | |
| 46 | def chomp(s): |
| 47 | if s[-1:] == '\n': return s[:-1] |
| 48 | else: return s |
| 49 | |
| 50 | main() |