blob: 93a7b5dc997b1fe3e3fe2369c8974a518ad18571 [file] [log] [blame]
Guido van Rossumaad67612000-05-08 17:31:04 +00001#
2# Secret Labs' Regular Expression Engine
Guido van Rossumaad67612000-05-08 17:31:04 +00003#
Guido van Rossum3e06ab12000-06-29 19:35:29 +00004# convert re-style regular expression to sre pattern
Guido van Rossumaad67612000-05-08 17:31:04 +00005#
6# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
7#
Guido van Rossumaad67612000-05-08 17:31:04 +00008# Portions of this engine have been developed in cooperation with
9# CNRI. Hewlett-Packard provided funding for 1.6 integration and
10# other compatibility work.
11#
12
Guido van Rossumaad67612000-05-08 17:31:04 +000013import string, sys
14
Guido van Rossum3e06ab12000-06-29 19:35:29 +000015import _sre
16
Guido van Rossumaad67612000-05-08 17:31:04 +000017from sre_constants import *
18
Guido van Rossum3e06ab12000-06-29 19:35:29 +000019# FIXME: should be 65535, but the arraymodule is still broken
20MAXREPEAT = 32767
21
Guido van Rossumaad67612000-05-08 17:31:04 +000022SPECIAL_CHARS = ".\\[{()*+?^$|"
23REPEAT_CHARS = "*+?{"
24
Guido van Rossumaad67612000-05-08 17:31:04 +000025DIGITS = tuple(string.digits)
26
27OCTDIGITS = tuple("01234567")
28HEXDIGITS = tuple("0123456789abcdefABCDEF")
29
Guido van Rossum3e06ab12000-06-29 19:35:29 +000030WHITESPACE = string.whitespace
31
Guido van Rossumaad67612000-05-08 17:31:04 +000032ESCAPES = {
33 "\\a": (LITERAL, chr(7)),
34 "\\b": (LITERAL, chr(8)),
35 "\\f": (LITERAL, chr(12)),
36 "\\n": (LITERAL, chr(10)),
37 "\\r": (LITERAL, chr(13)),
38 "\\t": (LITERAL, chr(9)),
39 "\\v": (LITERAL, chr(11))
40}
41
42CATEGORIES = {
43 "\\A": (AT, AT_BEGINNING), # start of string
44 "\\b": (AT, AT_BOUNDARY),
45 "\\B": (AT, AT_NON_BOUNDARY),
46 "\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
47 "\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
48 "\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
49 "\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
50 "\\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
51 "\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
52 "\\Z": (AT, AT_END), # end of string
53}
54
Guido van Rossum3e06ab12000-06-29 19:35:29 +000055FLAGS = {
56 # standard flags
57 "i": SRE_FLAG_IGNORECASE,
58 "L": SRE_FLAG_LOCALE,
59 "m": SRE_FLAG_MULTILINE,
60 "s": SRE_FLAG_DOTALL,
61 "x": SRE_FLAG_VERBOSE,
62 # extensions
63 "t": SRE_FLAG_TEMPLATE,
64 "u": SRE_FLAG_UNICODE,
65}
66
67class State:
Guido van Rossumaad67612000-05-08 17:31:04 +000068 def __init__(self):
Guido van Rossum3e06ab12000-06-29 19:35:29 +000069 self.flags = 0
Guido van Rossumaad67612000-05-08 17:31:04 +000070 self.groups = 1
71 self.groupdict = {}
72 def getgroup(self, name=None):
73 gid = self.groups
74 self.groups = gid + 1
75 if name:
76 self.groupdict[name] = gid
77 return gid
Guido van Rossumaad67612000-05-08 17:31:04 +000078
79class SubPattern:
80 # a subpattern, in intermediate form
81 def __init__(self, pattern, data=None):
82 self.pattern = pattern
83 if not data:
84 data = []
85 self.data = data
Guido van Rossumaad67612000-05-08 17:31:04 +000086 self.width = None
87 def __repr__(self):
88 return repr(self.data)
89 def __len__(self):
90 return len(self.data)
91 def __delitem__(self, index):
92 del self.data[index]
93 def __getitem__(self, index):
94 return self.data[index]
95 def __setitem__(self, index, code):
96 self.data[index] = code
97 def __getslice__(self, start, stop):
98 return SubPattern(self.pattern, self.data[start:stop])
99 def insert(self, index, code):
100 self.data.insert(index, code)
101 def append(self, code):
102 self.data.append(code)
103 def getwidth(self):
104 # determine the width (min, max) for this subpattern
105 if self.width:
106 return self.width
107 lo = hi = 0L
108 for op, av in self.data:
109 if op is BRANCH:
110 l = sys.maxint
111 h = 0
112 for av in av[1]:
113 i, j = av.getwidth()
114 l = min(l, i)
115 h = min(h, j)
116 lo = lo + i
117 hi = hi + j
118 elif op is CALL:
119 i, j = av.getwidth()
120 lo = lo + i
121 hi = hi + j
122 elif op is SUBPATTERN:
123 i, j = av[1].getwidth()
124 lo = lo + i
125 hi = hi + j
126 elif op in (MIN_REPEAT, MAX_REPEAT):
127 i, j = av[2].getwidth()
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000128 lo = lo + long(i) * av[0]
129 hi = hi + long(j) * av[1]
Guido van Rossumaad67612000-05-08 17:31:04 +0000130 elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
131 lo = lo + 1
132 hi = hi + 1
133 elif op == SUCCESS:
134 break
135 self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
136 return self.width
Guido van Rossumaad67612000-05-08 17:31:04 +0000137
138class Tokenizer:
139 def __init__(self, string):
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000140 self.index = 0
141 self.string = string
Guido van Rossumaad67612000-05-08 17:31:04 +0000142 self.next = self.__next()
143 def __next(self):
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000144 if self.index >= len(self.string):
Guido van Rossumaad67612000-05-08 17:31:04 +0000145 return None
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000146 char = self.string[self.index]
Guido van Rossumaad67612000-05-08 17:31:04 +0000147 if char[0] == "\\":
148 try:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000149 c = self.string[self.index + 1]
Guido van Rossumaad67612000-05-08 17:31:04 +0000150 except IndexError:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000151 raise error, "bogus escape"
Guido van Rossumaad67612000-05-08 17:31:04 +0000152 char = char + c
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000153 self.index = self.index + len(char)
Guido van Rossumaad67612000-05-08 17:31:04 +0000154 return char
155 def match(self, char):
156 if char == self.next:
157 self.next = self.__next()
158 return 1
159 return 0
160 def match_set(self, set):
161 if self.next and self.next in set:
162 self.next = self.__next()
163 return 1
164 return 0
165 def get(self):
166 this = self.next
167 self.next = self.__next()
168 return this
169
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000170def isident(char):
171 return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
172
173def isdigit(char):
174 return "0" <= char <= "9"
175
176def isname(name):
177 # check that group name is a valid string
178 # FIXME: <fl> this code is really lame. should use a regular
179 # expression instead, but I seem to have certain bootstrapping
180 # problems here ;-)
181 if not isident(name[0]):
182 return 0
183 for char in name:
184 if not isident(char) and not isdigit(char):
185 return 0
186 return 1
187
188def _group(escape, state):
189 # check if the escape string represents a valid group
190 try:
191 group = int(escape[1:])
192 if group and group < state.groups:
193 return group
194 except ValueError:
195 pass
196 return None # not a valid group
197
198def _class_escape(source, escape):
199 # handle escape code inside character class
200 code = ESCAPES.get(escape)
Guido van Rossumaad67612000-05-08 17:31:04 +0000201 if code:
202 return code
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000203 code = CATEGORIES.get(escape)
204 if code:
205 return code
Guido van Rossumaad67612000-05-08 17:31:04 +0000206 try:
207 if escape[1:2] == "x":
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000208 while source.next in HEXDIGITS:
209 escape = escape + source.get()
Guido van Rossumaad67612000-05-08 17:31:04 +0000210 escape = escape[2:]
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000211 # FIXME: support unicode characters!
212 return LITERAL, chr(int(escape[-4:], 16) & 0xff)
213 elif str(escape[1:2]) in OCTDIGITS:
214 while source.next in OCTDIGITS:
215 escape = escape + source.get()
216 escape = escape[1:]
217 # FIXME: support unicode characters!
218 return LITERAL, chr(int(escape[-6:], 8) & 0xff)
219 if len(escape) == 2:
Guido van Rossumaad67612000-05-08 17:31:04 +0000220 return LITERAL, escape[1]
221 except ValueError:
222 pass
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000223 raise error, "bogus escape: %s" % repr(escape)
Guido van Rossumaad67612000-05-08 17:31:04 +0000224
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000225def _escape(source, escape, state):
226 # handle escape code in expression
227 code = CATEGORIES.get(escape)
228 if code:
229 return code
230 code = ESCAPES.get(escape)
231 if code:
232 return code
233 try:
234 if escape[1:2] == "x":
235 while source.next in HEXDIGITS:
236 escape = escape + source.get()
237 escape = escape[2:]
238 # FIXME: support unicode characters!
239 return LITERAL, chr(int(escape[-4:], 16) & 0xff)
240 elif escape[1:2] in DIGITS:
241 while 1:
242 group = _group(escape, state)
243 if group:
244 if (not source.next or
245 not _group(escape + source.next, state)):
246 return GROUP, group
247 escape = escape + source.get()
248 elif source.next in OCTDIGITS:
249 escape = escape + source.get()
250 else:
251 break
252 escape = escape[1:]
253 # FIXME: support unicode characters!
254 return LITERAL, chr(int(escape[-6:], 8) & 0xff)
255 if len(escape) == 2:
256 return LITERAL, escape[1]
257 except ValueError:
258 pass
259 raise error, "bogus escape: %s" % repr(escape)
Guido van Rossumaad67612000-05-08 17:31:04 +0000260
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000261
262def _branch(pattern, items):
263
264 # form a branch operator from a set of items
265
266 subpattern = SubPattern(pattern)
Guido van Rossumaad67612000-05-08 17:31:04 +0000267
268 # check if all items share a common prefix
269 while 1:
270 prefix = None
271 for item in items:
272 if not item:
273 break
274 if prefix is None:
275 prefix = item[0]
276 elif item[0] != prefix:
277 break
278 else:
279 # all subitems start with a common "prefix".
280 # move it out of the branch
281 for item in items:
282 del item[0]
283 subpattern.append(prefix)
284 continue # check next one
285 break
286
287 # check if the branch can be replaced by a character set
288 for item in items:
289 if len(item) != 1 or item[0][0] != LITERAL:
290 break
291 else:
292 # we can store this as a character set instead of a
293 # branch (FIXME: use a range if possible)
294 set = []
295 for item in items:
296 set.append(item[0])
297 subpattern.append((IN, set))
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000298 return subpattern
Guido van Rossumaad67612000-05-08 17:31:04 +0000299
300 subpattern.append((BRANCH, (None, items)))
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000301 return subpattern
Guido van Rossumaad67612000-05-08 17:31:04 +0000302
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000303def _parse(source, state, flags=0):
Guido van Rossumaad67612000-05-08 17:31:04 +0000304
305 # parse regular expression pattern into an operator list.
306
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000307 subpattern = SubPattern(state)
Guido van Rossumaad67612000-05-08 17:31:04 +0000308
309 while 1:
310
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000311 if source.next in ("|", ")"):
Guido van Rossumaad67612000-05-08 17:31:04 +0000312 break # end of subpattern
313 this = source.get()
314 if this is None:
315 break # end of pattern
316
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000317 if state.flags & SRE_FLAG_VERBOSE:
318 # skip whitespace and comments
319 if this in WHITESPACE:
320 continue
321 if this == "#":
322 while 1:
323 this = source.get()
324 if this in (None, "\n"):
325 break
326 continue
327
Guido van Rossumaad67612000-05-08 17:31:04 +0000328 if this and this[0] not in SPECIAL_CHARS:
329 subpattern.append((LITERAL, this))
330
331 elif this == "[":
332 # character set
333 set = []
334## if source.match(":"):
335## pass # handle character classes
336 if source.match("^"):
337 set.append((NEGATE, None))
338 # check remaining characters
339 start = set[:]
340 while 1:
341 this = source.get()
342 if this == "]" and set != start:
343 break
344 elif this and this[0] == "\\":
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000345 code1 = _class_escape(source, this)
Guido van Rossumaad67612000-05-08 17:31:04 +0000346 elif this:
347 code1 = LITERAL, this
348 else:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000349 raise error, "unexpected end of regular expression"
Guido van Rossumaad67612000-05-08 17:31:04 +0000350 if source.match("-"):
351 # potential range
352 this = source.get()
353 if this == "]":
354 set.append(code1)
355 set.append((LITERAL, "-"))
356 break
357 else:
358 if this[0] == "\\":
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000359 code2 = _class_escape(source, this)
Guido van Rossumaad67612000-05-08 17:31:04 +0000360 else:
361 code2 = LITERAL, this
362 if code1[0] != LITERAL or code2[0] != LITERAL:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000363 raise error, "illegal range"
Guido van Rossumaad67612000-05-08 17:31:04 +0000364 if len(code1[1]) != 1 or len(code2[1]) != 1:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000365 raise error, "illegal range"
Guido van Rossumaad67612000-05-08 17:31:04 +0000366 set.append((RANGE, (code1[1], code2[1])))
367 else:
368 if code1[0] is IN:
369 code1 = code1[1][0]
370 set.append(code1)
371
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000372 # FIXME: <fl> move set optimization to compiler!
Guido van Rossumaad67612000-05-08 17:31:04 +0000373 if len(set)==1 and set[0][0] is LITERAL:
374 subpattern.append(set[0]) # optimization
375 elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
376 subpattern.append((NOT_LITERAL, set[1][1])) # optimization
377 else:
378 # FIXME: <fl> add charmap optimization
379 subpattern.append((IN, set))
380
381 elif this and this[0] in REPEAT_CHARS:
382 # repeat previous item
383 if this == "?":
384 min, max = 0, 1
385 elif this == "*":
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000386 min, max = 0, MAXREPEAT
Guido van Rossumaad67612000-05-08 17:31:04 +0000387 elif this == "+":
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000388 min, max = 1, MAXREPEAT
Guido van Rossumaad67612000-05-08 17:31:04 +0000389 elif this == "{":
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000390 min, max = 0, MAXREPEAT
Guido van Rossumaad67612000-05-08 17:31:04 +0000391 lo = hi = ""
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000392 while source.next in DIGITS:
Guido van Rossumaad67612000-05-08 17:31:04 +0000393 lo = lo + source.get()
394 if source.match(","):
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000395 while source.next in DIGITS:
Guido van Rossumaad67612000-05-08 17:31:04 +0000396 hi = hi + source.get()
397 else:
398 hi = lo
399 if not source.match("}"):
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000400 raise error, "bogus range"
Guido van Rossumaad67612000-05-08 17:31:04 +0000401 if lo:
402 min = int(lo)
403 if hi:
404 max = int(hi)
405 # FIXME: <fl> check that hi >= lo!
406 else:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000407 raise error, "not supported"
Guido van Rossumaad67612000-05-08 17:31:04 +0000408 # figure out which item to repeat
Guido van Rossumaad67612000-05-08 17:31:04 +0000409 if subpattern:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000410 item = subpattern[-1:]
Guido van Rossumaad67612000-05-08 17:31:04 +0000411 else:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000412 raise error, "nothing to repeat"
Guido van Rossumaad67612000-05-08 17:31:04 +0000413 if source.match("?"):
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000414 subpattern[-1] = (MIN_REPEAT, (min, max, item))
Guido van Rossumaad67612000-05-08 17:31:04 +0000415 else:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000416 subpattern[-1] = (MAX_REPEAT, (min, max, item))
417
Guido van Rossumaad67612000-05-08 17:31:04 +0000418 elif this == ".":
419 subpattern.append((ANY, None))
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000420
Guido van Rossumaad67612000-05-08 17:31:04 +0000421 elif this == "(":
422 group = 1
423 name = None
424 if source.match("?"):
425 group = 0
426 # options
427 if source.match("P"):
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000428 # python extensions
Guido van Rossumaad67612000-05-08 17:31:04 +0000429 if source.match("<"):
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000430 # named group: skip forward to end of name
Guido van Rossumaad67612000-05-08 17:31:04 +0000431 name = ""
432 while 1:
433 char = source.get()
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000434 if char is None:
435 raise error, "unterminated name"
436 if char == ">":
Guido van Rossumaad67612000-05-08 17:31:04 +0000437 break
438 name = name + char
439 group = 1
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000440 if not isname(name):
441 raise error, "illegal character in group name"
442 elif source.match("="):
443 # named backreference
444 raise error, "not yet implemented"
445 else:
446 char = source.get()
447 if char is None:
448 raise error, "unexpected end of pattern"
449 raise error, "unknown specifier: ?P%s" % char
Guido van Rossumaad67612000-05-08 17:31:04 +0000450 elif source.match(":"):
451 # non-capturing group
452 group = 2
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000453 elif source.match("#"):
454 # comment
455 while 1:
456 if source.next is None or source.next == ")":
457 break
458 source.get()
459 else:
460 # flags
461 while FLAGS.has_key(source.next):
462 state.flags = state.flags | FLAGS[source.get()]
Guido van Rossumaad67612000-05-08 17:31:04 +0000463 if group:
464 # parse group contents
465 b = []
466 if group == 2:
467 # anonymous group
468 group = None
469 else:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000470 group = state.getgroup(name)
Guido van Rossumaad67612000-05-08 17:31:04 +0000471 while 1:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000472 p = _parse(source, state, flags)
Guido van Rossumaad67612000-05-08 17:31:04 +0000473 if source.match(")"):
474 if b:
475 b.append(p)
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000476 p = _branch(state, b)
477 subpattern.append((SUBPATTERN, (group, p)))
Guido van Rossumaad67612000-05-08 17:31:04 +0000478 break
479 elif source.match("|"):
480 b.append(p)
481 else:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000482 raise error, "group not properly closed"
Guido van Rossumaad67612000-05-08 17:31:04 +0000483 else:
Guido van Rossumaad67612000-05-08 17:31:04 +0000484 while 1:
485 char = source.get()
486 if char is None or char == ")":
487 break
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000488 raise error, "unknown extension"
Guido van Rossumaad67612000-05-08 17:31:04 +0000489
490 elif this == "^":
491 subpattern.append((AT, AT_BEGINNING))
492
493 elif this == "$":
494 subpattern.append((AT, AT_END))
495
496 elif this and this[0] == "\\":
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000497 code = _escape(source, this, state)
Guido van Rossumaad67612000-05-08 17:31:04 +0000498 subpattern.append(code)
499
500 else:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000501 raise error, "parser error"
Guido van Rossumaad67612000-05-08 17:31:04 +0000502
503 return subpattern
504
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000505def parse(pattern, flags=0):
506 # parse 're' pattern into list of (opcode, argument) tuples
507 source = Tokenizer(pattern)
508 state = State()
Guido van Rossumaad67612000-05-08 17:31:04 +0000509 b = []
510 while 1:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000511 p = _parse(source, state, flags)
512 tail = source.get()
Guido van Rossumaad67612000-05-08 17:31:04 +0000513 if tail == "|":
514 b.append(p)
515 elif tail == ")":
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000516 raise error, "unbalanced parenthesis"
Guido van Rossumaad67612000-05-08 17:31:04 +0000517 elif tail is None:
518 if b:
519 b.append(p)
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000520 p = _branch(state, b)
Guido van Rossumaad67612000-05-08 17:31:04 +0000521 break
522 else:
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000523 raise error, "bogus characters at end of regular expression"
Guido van Rossumaad67612000-05-08 17:31:04 +0000524 return p
525
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000526def parse_template(source, pattern):
527 # parse 're' replacement string into list of literals and
528 # group references
529 s = Tokenizer(source)
530 p = []
531 a = p.append
532 while 1:
533 this = s.get()
534 if this is None:
535 break # end of replacement string
536 if this and this[0] == "\\":
537 if this == "\\g":
538 name = ""
539 if s.match("<"):
540 while 1:
541 char = s.get()
542 if char is None:
543 raise error, "unterminated group name"
544 if char == ">":
545 break
546 name = name + char
547 if not name:
548 raise error, "bad group name"
549 try:
550 index = int(name)
551 except ValueError:
552 if not isname(name):
553 raise error, "illegal character in group name"
554 try:
555 index = pattern.groupindex[name]
556 except KeyError:
557 raise IndexError, "unknown group name"
558 a((MARK, index))
559 elif len(this) > 1 and this[1] in DIGITS:
560 while s.next in DIGITS:
561 this = this + s.get()
562 a((MARK, int(this[1:])))
563 else:
564 try:
565 a(ESCAPES[this])
566 except KeyError:
567 for char in this:
568 a((LITERAL, char))
569 else:
570 a((LITERAL, this))
571 return p
Guido van Rossumaad67612000-05-08 17:31:04 +0000572
Guido van Rossum3e06ab12000-06-29 19:35:29 +0000573def expand_template(template, match):
574 # FIXME: <fl> this is sooooo slow. drop in the slicelist
575 # code instead
576 p = []
577 a = p.append
578 for c, s in template:
579 if c is LITERAL:
580 a(s)
581 elif c is MARK:
582 s = match.group(s)
583 if s is None:
584 raise error, "empty group"
585 a(s)
586 return match.string[:0].join(p)