blob: 87380614c81b4594206ff38b08cd8ef87df86c12 [file] [log] [blame]
Guido van Rossum7627c0d2000-03-31 14:58:54 +00001#
2# Secret Labs' Regular Expression Engine
3# $Id$
4#
5# convert template to internal format
6#
7# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
8#
9# This code can only be used for 1.6 alpha testing. All other use
10# require explicit permission from Secret Labs AB.
11#
12# Portions of this engine have been developed in cooperation with
13# CNRI. Hewlett-Packard provided funding for 1.6 integration and
14# other compatibility work.
15#
16
17# FIXME: <fl> formalize (objectify?) and document the compiler code
18# format, so that other frontends can use this compiler
19
20import array, string, sys
21
22import _sre
23
24from sre_constants import *
25
26# find an array type code that matches the engine's code size
27for WORDSIZE in "BHil":
28 if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
Guido van Rossumb81e70e2000-04-10 17:10:48 +000029 break
Guido van Rossum7627c0d2000-03-31 14:58:54 +000030else:
31 raise RuntimeError, "cannot find a useable array type"
32
33# FIXME: <fl> should move some optimizations from the parser to here!
34
35class Code:
36 def __init__(self):
Guido van Rossumb81e70e2000-04-10 17:10:48 +000037 self.data = []
Guido van Rossum7627c0d2000-03-31 14:58:54 +000038 def __len__(self):
Guido van Rossumb81e70e2000-04-10 17:10:48 +000039 return len(self.data)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000040 def __getitem__(self, index):
Guido van Rossumb81e70e2000-04-10 17:10:48 +000041 return self.data[index]
Guido van Rossum7627c0d2000-03-31 14:58:54 +000042 def __setitem__(self, index, code):
Guido van Rossumb81e70e2000-04-10 17:10:48 +000043 self.data[index] = code
Guido van Rossum7627c0d2000-03-31 14:58:54 +000044 def append(self, code):
Guido van Rossumb81e70e2000-04-10 17:10:48 +000045 self.data.append(code)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000046 def todata(self):
Guido van Rossumb81e70e2000-04-10 17:10:48 +000047 # print self.data
48 return array.array(WORDSIZE, self.data).tostring()
Guido van Rossum7627c0d2000-03-31 14:58:54 +000049
50def _lower(literal):
51 # return _sre._lower(literal) # FIXME
52 return string.lower(literal)
53
54def _compile(code, pattern, flags):
55 append = code.append
56 for op, av in pattern:
Guido van Rossumb81e70e2000-04-10 17:10:48 +000057 if op is ANY:
58 if "s" in flags:
59 append(CODES[op]) # any character at all!
60 else:
61 append(CODES[NOT_LITERAL])
62 append(10)
63 elif op in (SUCCESS, FAILURE):
64 append(CODES[op])
65 elif op is AT:
66 append(CODES[op])
67 append(POSITIONS[av])
68 elif op is BRANCH:
69 append(CODES[op])
70 tail = []
71 for av in av[1]:
72 skip = len(code); append(0)
73 _compile(code, av, flags)
74 append(CODES[JUMP])
75 tail.append(len(code)); append(0)
76 code[skip] = len(code) - skip
77 append(0) # end of branch
78 for tail in tail:
79 code[tail] = len(code) - tail
80 elif op is CALL:
81 append(CODES[op])
82 skip = len(code); append(0)
83 _compile(code, av, flags)
84 append(CODES[SUCCESS])
85 code[skip] = len(code) - skip
86 elif op is CATEGORY: # not used by current parser
87 append(CODES[op])
88 append(CATEGORIES[av])
89 elif op is GROUP:
90 if "i" in flags:
91 append(CODES[MAP_IGNORE[op]])
92 else:
93 append(CODES[op])
94 append(av)
95 elif op is IN:
96 if "i" in flags:
97 append(CODES[MAP_IGNORE[op]])
98 def fixup(literal):
99 return ord(_lower(literal))
100 else:
101 append(CODES[op])
102 fixup = ord
103 skip = len(code); append(0)
104 for op, av in av:
105 append(CODES[op])
106 if op is NEGATE:
107 pass
108 elif op is LITERAL:
109 append(fixup(av))
110 elif op is RANGE:
111 append(fixup(av[0]))
112 append(fixup(av[1]))
113 elif op is CATEGORY:
114 append(CATEGORIES[av])
115 else:
116 raise ValueError, "unsupported set operator"
117 append(CODES[FAILURE])
118 code[skip] = len(code) - skip
119 elif op in (LITERAL, NOT_LITERAL):
120 if "i" in flags:
121 append(CODES[MAP_IGNORE[op]])
122 append(ord(_lower(av)))
123 else:
124 append(CODES[op])
125 append(ord(av))
126 elif op is MARK:
127 append(CODES[op])
128 append(av)
129 elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
130 lo, hi = av[2].getwidth()
131 if lo == 0:
132 raise SyntaxError, "cannot repeat zero-width items"
133 if lo == hi == 1 and op is MAX_REPEAT:
134 append(CODES[MAX_REPEAT_ONE])
135 skip = len(code); append(0)
136 append(av[0])
137 append(av[1])
138 _compile(code, av[2], flags)
139 append(CODES[SUCCESS])
140 code[skip] = len(code) - skip
141 else:
142 append(CODES[op])
143 skip = len(code); append(0)
144 append(av[0])
145 append(av[1])
146 _compile(code, av[2], flags)
147 if op is MIN_REPEAT:
148 append(CODES[MIN_UNTIL])
149 else:
150 # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
151 append(CODES[MAX_UNTIL])
152 code[skip] = len(code) - skip
153 elif op is SUBPATTERN:
154## group = av[0]
155## if group:
156## append(CODES[MARK])
157## append((group-1)*2)
158 _compile(code, av[1], flags)
159## if group:
160## append(CODES[MARK])
161## append((group-1)*2+1)
162 else:
163 raise ValueError, ("unsupported operand type", op)
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000164
165def compile(p, flags=()):
166 # convert pattern list to internal format
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000167 if type(p) in (type(""), type(u"")):
168 import sre_parse
169 pattern = p
170 p = sre_parse.parse(p)
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000171 else:
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000172 pattern = None
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000173 # print p.getwidth()
174 # print p
175 code = Code()
176 _compile(code, p.data, p.pattern.flags)
177 code.append(CODES[SUCCESS])
178 # print list(code.data)
179 data = code.todata()
180 if 0: # debugging
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000181 print
182 print "-" * 68
183 import sre_disasm
184 sre_disasm.disasm(data)
185 print "-" * 68
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000186 # print len(data), p.pattern.groups, len(p.pattern.groupdict)
187 return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)