blob: e4e18d6d581203507e6d6ee0d2deb824c529e13a [file] [log] [blame]
Guido van Rossumf06ee5f1996-11-27 19:52:01 +00001#! /usr/bin/env python
Guido van Rossum21bc15b1995-04-10 11:40:26 +00002
3# 1) Regular Expressions Test
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +00004#
5# Read a file of (extended per egrep) regular expressions (one per line),
Guido van Rossum21bc15b1995-04-10 11:40:26 +00006# and apply those to all files whose names are listed on the command line.
7# Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns
8# against a five /etc/termcap files. Tests using more elaborate patters
9# would also be interesting. Your code should not break if given hundreds
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +000010# of regular expressions or binary files to scan.
Guido van Rossum21bc15b1995-04-10 11:40:26 +000011
12# This implementation:
13# - combines all patterns into a single one using ( ... | ... | ... )
14# - reads patterns from stdin, scans files given as command line arguments
15# - produces output in the format <file>:<lineno>:<line>
16# - is only about 2.5 times as slow as egrep (though I couldn't run
17# Tom's test -- this system, a vanilla SGI, only has /etc/terminfo)
18
19import string
20import sys
21import regex
22from regex_syntax import *
23
24regex.set_syntax(RE_SYNTAX_EGREP)
25
26def main():
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +000027 pats = map(chomp, sys.stdin.readlines())
28 bigpat = '(' + string.joinfields(pats, '|') + ')'
29 prog = regex.compile(bigpat)
30
31 for file in sys.argv[1:]:
32 try:
33 fp = open(file, 'r')
34 except IOError, msg:
35 print "%s: %s" % (file, msg)
36 continue
37 lineno = 0
38 while 1:
39 line = fp.readline()
40 if not line:
41 break
42 lineno = lineno + 1
43 if prog.search(line) >= 0:
44 print "%s:%s:%s" % (file, lineno, line),
Guido van Rossum21bc15b1995-04-10 11:40:26 +000045
46def chomp(s):
Andrew M. Kuchling946c53e2003-04-24 17:13:18 +000047 if s[-1:] == '\n': return s[:-1]
48 else: return s
Guido van Rossum21bc15b1995-04-10 11:40:26 +000049
50main()