blob: fbc5f6c6ebecffff990d2d0c417897429a14d664 [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum21bc15b1995-04-10 11:40:26 +00002
3# 1) Regular Expressions Test
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +00004#
5# Read a file of (extended per egrep) regular expressions (one per line),
Guido van Rossum21bc15b1995-04-10 11:40:26 +00006# and apply those to all files whose names are listed on the command line.
7# Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns
8# against a five /etc/termcap files. Tests using more elaborate patters
9# would also be interesting. Your code should not break if given hundreds
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +000010# of regular expressions or binary files to scan.
Guido van Rossum21bc15b1995-04-10 11:40:26 +000011
12# This implementation:
13# - combines all patterns into a single one using ( ... | ... | ... )
14# - reads patterns from stdin, scans files given as command line arguments
15# - produces output in the format <file>:<lineno>:<line>
16# - is only about 2.5 times as slow as egrep (though I couldn't run
17# Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)
18
19import string
20import sys
Andrew M. Kuchling9b12d9d2003-04-24 17:22:04 +000021import re
Guido van Rossum21bc15b1995-04-10 11:40:26 +000022
23def main():
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +000024 pats = map(chomp, sys.stdin.readlines())
Andrew M. Kuchling9b12d9d2003-04-24 17:22:04 +000025 bigpat = '(' + '|'.join(pats) + ')'
26 prog = re.compile(bigpat)
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +000027
28 for file in sys.argv[1:]:
29 try:
30 fp = open(file, 'r')
31 except IOError, msg:
32 print "%s: %s" % (file, msg)
33 continue
34 lineno = 0
35 while 1:
36 line = fp.readline()
37 if not line:
38 break
39 lineno = lineno + 1
Andrew M. Kuchling9b12d9d2003-04-24 17:22:04 +000040 if prog.search(line):
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +000041 print "%s:%s:%s" % (file, lineno, line),
Guido van Rossum21bc15b1995-04-10 11:40:26 +000042
43def chomp(s):
Andrew M. Kuchling9b12d9d2003-04-24 17:22:04 +000044 return s.rstrip('\n')
Guido van Rossum21bc15b1995-04-10 11:40:26 +000045
46main()