| #!/usr/bin/env python | 
 | # -*- mode: python -*- | 
 | # $Id$ | 
 |  | 
 | import string | 
 | import reop | 
 |  | 
 | # reop.error and re.error should be the same, since exceptions can be | 
 | # raised from  either module. | 
 | error = reop.error # 're error' | 
 |  | 
 | from reop import NORMAL, CHARCLASS, REPLACEMENT | 
 | from reop import CHAR, MEMORY_REFERENCE, SYNTAX, NOT_SYNTAX, SET | 
 | from reop import WORD_BOUNDARY, NOT_WORD_BOUNDARY, BEGINNING_OF_BUFFER, END_OF_BUFFER | 
 |  | 
 | # compilation flags | 
 |  | 
 | IGNORECASE = I = 0x01 | 
 |  | 
 | MULTILINE = M = 0x02 | 
 | DOTALL = S = 0x04 | 
 | VERBOSE = X = 0x08 | 
 |  | 
 | repetition_operators = ['*', '*?', '+', '+?', '?', '??', '{n}', '{n}?', | 
 | 			'{n,}', '{n,}?', '{n,m}', '{n,m}?'] | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | def valid_identifier(id): | 
 |     if len(id) == 0: | 
 | 	return 0 | 
 |     if (not reop.syntax_table[id[0]] & reop.word) or \ | 
 |        (reop.syntax_table[id[0]] & reop.digit): | 
 | 	return 0 | 
 |     for char in id[1:]: | 
 | 	if not reop.syntax_table[char] & reop.word: | 
 | 	    return 0 | 
 |     return 1 | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | _cache = {} | 
 | _MAXCACHE = 20 | 
 |  | 
 | def _cachecompile(pattern, flags=0): | 
 |     key = (pattern, flags) | 
 |     try: | 
 | 	return _cache[key] | 
 |     except KeyError: | 
 | 	pass | 
 |     value = compile(pattern, flags) | 
 |     if len(_cache) >= _MAXCACHE: | 
 | 	_cache.clear() | 
 |     _cache[key] = value | 
 |     return value | 
 |  | 
 | def match(pattern, string, flags=0): | 
 |     return _cachecompile(pattern, flags).match(string) | 
 |    | 
 | def search(pattern, string, flags=0): | 
 |     return _cachecompile(pattern, flags).search(string) | 
 |    | 
 | def sub(pattern, repl, string, count=0): | 
 |     if type(pattern) == type(''): | 
 | 	pattern = _cachecompile(pattern) | 
 |     return pattern.sub(repl, string, count) | 
 |  | 
 | def subn(pattern, repl, string, count=0): | 
 |     if type(pattern) == type(''): | 
 | 	pattern = _cachecompile(pattern) | 
 |     return pattern.subn(repl, string, count) | 
 |    | 
 | def split(pattern, string, maxsplit=0): | 
 |     if type(pattern) == type(''): | 
 | 	pattern = _cachecompile(pattern) | 
 |     return pattern.split(string, maxsplit) | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | def _expand(m, repl): | 
 |     results = [] | 
 |     index = 0 | 
 |     size = len(repl) | 
 |     while index < size: | 
 | 	found = string.find(repl, '\\', index) | 
 | 	if found < 0: | 
 | 	    results.append(repl[index:]) | 
 | 	    break | 
 | 	if found > index: | 
 | 	    results.append(repl[index:found]) | 
 | 	escape_type, value, index = expand_escape(repl, found+1, REPLACEMENT) | 
 | 	if escape_type == CHAR: | 
 | 	    results.append(value) | 
 | 	elif escape_type == MEMORY_REFERENCE: | 
 | 	    r = m.group(value) | 
 | 	    if r is None: | 
 | 		raise error, ('group "' + str(value) + '" did not contribute ' | 
 | 			      'to the match') | 
 | 	    results.append(m.group(value)) | 
 | 	else: | 
 | 	    raise error, "bad escape in replacement" | 
 |     return string.join(results, '') | 
 |  | 
 | class RegexObject: | 
 |     def __init__(self, pattern, flags, code, num_regs, groupindex): | 
 | 	self.code = code | 
 | 	self.num_regs = num_regs | 
 | 	self.flags = flags | 
 | 	self.pattern = pattern | 
 | 	self.groupindex = groupindex | 
 | 	self.fastmap = build_fastmap(code) | 
 | 	 | 
 | 	if code[0].name == 'bol': | 
 | 	    self.anchor = 1 | 
 | 	     | 
 | 	elif code[0].name == 'begbuf': | 
 | 	    self.anchor = 2 | 
 | 	     | 
 | 	else: | 
 | 	    self.anchor = 0 | 
 | 	     | 
 | 	self.buffer = assemble(code) | 
 |     def search(self, string, pos=0): | 
 | 	regs = reop.search(self.buffer, | 
 | 			   self.num_regs, | 
 | 			   self.flags, | 
 | 			   self.fastmap.can_be_null, | 
 | 			   self.fastmap.fastmap(), | 
 | 			   self.anchor, | 
 | 			   string, | 
 | 			   pos) | 
 | 	if regs is None: | 
 | 	    return None | 
 | 	 | 
 | 	return MatchObject(self, | 
 | 			   string, | 
 | 			   pos, | 
 | 			   regs) | 
 |      | 
 |     def match(self, string, pos=0): | 
 | 	regs = reop.match(self.buffer, | 
 | 			  self.num_regs, | 
 | 			  self.flags, | 
 | 			  self.fastmap.can_be_null, | 
 | 			  self.fastmap.fastmap(), | 
 | 			  self.anchor, | 
 | 			  string, | 
 | 			  pos) | 
 | 	if regs is None: | 
 | 	    return None | 
 | 	 | 
 | 	return MatchObject(self, | 
 | 			   string, | 
 | 			   pos, | 
 | 			   regs) | 
 |      | 
 |     def sub(self, repl, string, count=0): | 
 |         return self.subn(repl, string, count)[0] | 
 |      | 
 |     def subn(self, repl, source, count=0): | 
 | 	if count < 0: | 
 | 	    raise ValueError, "negative substibution count" | 
 | 	if count == 0: | 
 | 	    import sys | 
 | 	    count = sys.maxint | 
 | 	if type(repl) == type(''): | 
 | 	    if '\\' in repl: | 
 | 		repl = lambda m, r=repl: _expand(m, r) | 
 | 	    else: | 
 | 		repl = lambda m, r=repl: r | 
 | 	n = 0           # Number of matches | 
 | 	pos = 0         # Where to start searching | 
 | 	lastmatch = -1  # End of last match | 
 | 	results = []    # Substrings making up the result | 
 | 	end = len(source) | 
 | 	while n < count and pos <= end: | 
 | 	    m = self.search(source, pos) | 
 | 	    if not m: | 
 | 		break | 
 | 	    i, j = m.span(0) | 
 | 	    if i == j == lastmatch: | 
 | 		# Empty match adjacent to previous match | 
 | 		pos = pos + 1 | 
 | 		results.append(source[lastmatch:pos]) | 
 | 		continue | 
 | 	    if pos < i: | 
 | 		results.append(source[pos:i]) | 
 | 	    results.append(repl(m)) | 
 | 	    pos = lastmatch = j | 
 | 	    if i == j: | 
 | 		# Last match was empty; don't try here again | 
 | 		pos = pos + 1 | 
 | 		results.append(source[lastmatch:pos]) | 
 | 	    n = n + 1 | 
 | 	results.append(source[pos:]) | 
 | 	return (string.join(results, ''), n) | 
 | 									     | 
 |     def split(self, source, maxsplit=0): | 
 | 	if maxsplit < 0: | 
 | 	    raise error, "negative split count" | 
 | 	if maxsplit == 0: | 
 | 	    import sys | 
 | 	    maxsplit = sys.maxint | 
 | 	n = 0 | 
 | 	pos = 0 | 
 | 	lastmatch = 0 | 
 | 	results = [] | 
 | 	end = len(source) | 
 | 	while n < maxsplit: | 
 | 	    m = self.search(source, pos) | 
 | 	    if not m: | 
 | 		break | 
 | 	    i, j = m.span(0) | 
 | 	    if i == j: | 
 | 		# Empty match | 
 | 		if pos >= end: | 
 | 		    break | 
 | 		pos = pos+1 | 
 | 		continue | 
 | 	    results.append(source[lastmatch:i]) | 
 | 	    g = m.group() | 
 | 	    if g: | 
 | 		results[len(results):] = list(g) | 
 | 	    pos = lastmatch = j | 
 | 	results.append(source[lastmatch:]) | 
 | 	return results | 
 |  | 
 | class MatchObject: | 
 |     def __init__(self, re, string, pos, regs): | 
 | 	self.re = re | 
 | 	self.string = string | 
 | 	self.pos = pos | 
 | 	self.regs = regs | 
 | 	 | 
 |     def start(self, g): | 
 | 	if type(g) == type(''): | 
 | 	    try: | 
 | 		g = self.re.groupindex[g] | 
 | 	    except (KeyError, TypeError): | 
 | 		raise IndexError, ('group "' + g + '" is undefined') | 
 | 	return self.regs[g][0] | 
 |      | 
 |     def end(self, g): | 
 | 	if type(g) == type(''): | 
 | 	    try: | 
 | 		g = self.re.groupindex[g] | 
 | 	    except (KeyError, TypeError): | 
 | 		raise IndexError, ('group "' + g + '" is undefined') | 
 | 	return self.regs[g][1] | 
 |      | 
 |     def span(self, g): | 
 | 	if type(g) == type(''): | 
 | 	    try: | 
 | 		g = self.re.groupindex[g] | 
 | 	    except (KeyError, TypeError): | 
 | 		raise IndexError, ('group "' + g + '" is undefined') | 
 | 	return self.regs[g] | 
 |      | 
 |     def group(self, *groups): | 
 | 	if len(groups) == 0: | 
 | 	    groups = range(1, self.re.num_regs) | 
 | 	    use_all = 1 | 
 | 	else: | 
 | 	    use_all = 0 | 
 | 	result = [] | 
 | 	for g in groups: | 
 | 	    if type(g) == type(''): | 
 | 		try: | 
 | 		    g = self.re.groupindex[g] | 
 | 		except (KeyError, TypeError): | 
 | 		    raise IndexError, ('group "' + g + '" is undefined') | 
 | 	    if (self.regs[g][0] == -1) or (self.regs[g][1] == -1): | 
 | 		result.append(None) | 
 | 	    else: | 
 | 		result.append(self.string[self.regs[g][0]:self.regs[g][1]]) | 
 | 	if use_all or len(result) > 1: | 
 | 	    return tuple(result) | 
 | 	elif len(result) == 1: | 
 | 	    return result[0] | 
 | 	else: | 
 | 	    return () | 
 |  | 
 | # | 
 | # A set of classes to make assembly a bit easier, if a bit verbose. | 
 | # | 
 |  | 
 | class Instruction: | 
 |     def __init__(self, opcode, size=1): | 
 | 	self.opcode = opcode | 
 | 	self.size = size | 
 |     def assemble(self, position, labels): | 
 | 	return self.opcode | 
 |     def __repr__(self): | 
 | 	return '%-15s' % (self.name) | 
 |  | 
 | class End(Instruction): | 
 |     name = 'end' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(0)) | 
 |  | 
 | class Bol(Instruction): | 
 |     name = 'bol' | 
 |     def __init__(self): | 
 | 	self.name = 'bol' | 
 | 	Instruction.__init__(self, chr(1)) | 
 |  | 
 | class Eol(Instruction): | 
 |     name = 'eol' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(2)) | 
 |  | 
 | class Set(Instruction): | 
 |     name = 'set' | 
 |     def __init__(self, set, flags=0): | 
 | 	self.set = set | 
 | 	if flags & IGNORECASE: self.set=map(string.lower, self.set) | 
 | 	if len(set)==1:  | 
 | 	    # If only one element, use the "exact" opcode (it'll be faster) | 
 | 	    Instruction.__init__(self, chr(4), 2) | 
 | 	else: | 
 | 	    # Use the "set" opcode | 
 | 	    Instruction.__init__(self, chr(3), 33) | 
 |     def assemble(self, position, labels): | 
 | 	if len(self.set)==1: | 
 | 	    # If only one character in set, generate an "exact" opcode | 
 | 	    return self.opcode + self.set[0] | 
 | 	result = self.opcode | 
 | 	temp = 0 | 
 | 	for i, c in map(lambda x: (x, chr(x)), range(256)): | 
 | 	    if c in self.set: | 
 | 		temp = temp | (1 << (i & 7)) | 
 | 	    if (i % 8) == 7: | 
 | 		result = result + chr(temp) | 
 | 		temp = 0 | 
 | 	return result | 
 |     def __repr__(self): | 
 | 	result = '%-15s' % (self.name) | 
 | 	self.set.sort() | 
 | 	# XXX this should print more intelligently | 
 | 	for char in self.set: | 
 | 	    result = result + char | 
 | 	return result | 
 |      | 
 | class Exact(Instruction): | 
 |     name = 'exact' | 
 |     def __init__(self, char, flags): | 
 | 	self.char = char | 
 | 	if flags & IGNORECASE: self.char=string.lower(self.char) | 
 | 	Instruction.__init__(self, chr(4), 2) | 
 |     def assemble(self, position, labels): | 
 | 	return self.opcode + self.char | 
 |     def __repr__(self): | 
 | 	return '%-15s %s' % (self.name, `self.char`) | 
 |      | 
 | class AnyChar(Instruction): | 
 |     name = 'anychar' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(5)) | 
 |     def assemble(self, position, labels): | 
 | 	return self.opcode | 
 |  | 
 | class MemoryInstruction(Instruction): | 
 |     def __init__(self, opcode, register): | 
 | 	self.register = register | 
 | 	Instruction.__init__(self, opcode, 2) | 
 |     def assemble(self, position, labels): | 
 | 	return self.opcode + chr(self.register) | 
 |     def __repr__(self): | 
 | 	return '%-15s %i' % (self.name, self.register) | 
 |  | 
 | class StartMemory(MemoryInstruction): | 
 |     name = 'start_memory' | 
 |     def __init__(self, register): | 
 | 	MemoryInstruction.__init__(self, chr(6), register) | 
 |  | 
 | class EndMemory(MemoryInstruction): | 
 |     name = 'end_memory' | 
 |     def __init__(self, register): | 
 | 	MemoryInstruction.__init__(self, chr(7), register) | 
 |  | 
 | class MatchMemory(MemoryInstruction): | 
 |     name = 'match_memory' | 
 |     def __init__(self, register): | 
 | 	MemoryInstruction.__init__(self, chr(8), register) | 
 |  | 
 | class JumpInstruction(Instruction): | 
 |     def __init__(self, opcode, label): | 
 | 	self.label = label | 
 | 	Instruction.__init__(self, opcode, 3) | 
 |     def compute_offset(self, start, dest): | 
 | 	return dest - (start + 3) | 
 |     def pack_offset(self, offset): | 
 | 	if offset > 32767: | 
 | 	    raise error, 'offset out of range (pos)' | 
 | 	elif offset < -32768: | 
 | 	    raise error, 'offset out of range (neg)' | 
 | 	elif offset < 0: | 
 | 	    offset = offset + 65536 | 
 | 	return chr(offset & 0xff) + chr((offset >> 8) & 0xff) | 
 |     def assemble(self, position, labels): | 
 | 	return self.opcode + \ | 
 | 	       self.pack_offset(self.compute_offset(position, | 
 | 						    labels[self.label])) | 
 |     def __repr__(self): | 
 | 	return '%-15s %i' % (self.name, self.label) | 
 |  | 
 | class Jump(JumpInstruction): | 
 |     name = 'jump' | 
 |     def __init__(self, label): | 
 | 	JumpInstruction.__init__(self, chr(9), label) | 
 |  | 
 | class StarJump(JumpInstruction): | 
 |     name = 'star_jump' | 
 |     def __init__(self, label): | 
 | 	JumpInstruction.__init__(self, chr(10), label) | 
 |  | 
 | class FailureJump(JumpInstruction): | 
 |     name = 'failure_jump' | 
 |     def __init__(self, label): | 
 | 	JumpInstruction.__init__(self, chr(11), label) | 
 |  | 
 | class UpdateFailureJump(JumpInstruction): | 
 |     name = 'update_failure_jump' | 
 |     def __init__(self, label): | 
 | 	JumpInstruction.__init__(self, chr(12), label) | 
 |  | 
 | class DummyFailureJump(JumpInstruction): | 
 |     name = 'dummy_failure_jump' | 
 |     def __init__(self, label): | 
 | 	JumpInstruction.__init__(self, chr(13), label) | 
 | 	 | 
 | class BegBuf(Instruction): | 
 |     name = 'begbuf' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(14)) | 
 |  | 
 | class EndBuf(Instruction): | 
 |     name = 'endbuf' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(15)) | 
 |  | 
 | class WordBeg(Instruction): | 
 |     name = 'wordbeg' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(16)) | 
 |  | 
 | class WordEnd(Instruction): | 
 |     name = 'wordend' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(17)) | 
 |  | 
 | class WordBound(Instruction): | 
 |     name = 'wordbound' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(18)) | 
 |  | 
 | class NotWordBound(Instruction): | 
 |     name = 'notwordbound' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, chr(19)) | 
 |  | 
 | class SyntaxSpec(Instruction): | 
 |     name = 'syntaxspec' | 
 |     def __init__(self, syntax): | 
 | 	self.syntax = syntax | 
 | 	Instruction.__init__(self, chr(20), 2) | 
 |     def assemble(self, postition, labels): | 
 | 	return self.opcode + chr(self.syntax) | 
 |  | 
 | class NotSyntaxSpec(Instruction): | 
 |     name = 'notsyntaxspec' | 
 |     def __init__(self, syntax): | 
 | 	self.syntax = syntax | 
 | 	Instruction.__init__(self, chr(21), 2) | 
 |     def assemble(self, postition, labels): | 
 | 	return self.opcode + chr(self.syntax) | 
 |  | 
 | class Label(Instruction): | 
 |     name = 'label' | 
 |     def __init__(self, label): | 
 | 	self.label = label | 
 | 	Instruction.__init__(self, '', 0) | 
 |     def __repr__(self): | 
 | 	return '%-15s %i' % (self.name, self.label) | 
 |  | 
 | class OpenParen(Instruction): | 
 |     name = '(' | 
 |     def __init__(self, register): | 
 | 	self.register = register | 
 | 	Instruction.__init__(self, '', 0) | 
 |     def assemble(self, position, labels): | 
 | 	raise error, 'unmatched open parenthesis' | 
 |  | 
 | class Alternation(Instruction): | 
 |     name = '|' | 
 |     def __init__(self): | 
 | 	Instruction.__init__(self, '', 0) | 
 |     def assemble(self, position, labels): | 
 | 	raise error, 'an alternation was not taken care of' | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | def assemble(instructions): | 
 |     labels = {} | 
 |     position = 0 | 
 |     pass1 = [] | 
 |     for instruction in instructions: | 
 | 	if instruction.name == 'label': | 
 | 	    labels[instruction.label] = position | 
 | 	else: | 
 | 	    pass1.append((position, instruction)) | 
 | 	    position = position + instruction.size | 
 |     pass2 = '' | 
 |     for position, instruction in pass1: | 
 | 	pass2 = pass2 + instruction.assemble(position, labels) | 
 |     return pass2 | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | def escape(pattern): | 
 |     result = [] | 
 |     for char in pattern: | 
 | 	if not reop.syntax_table[char] & reop.word: | 
 | 	    result.append('\\') | 
 | 	result.append(char) | 
 |     return string.join(result, '') | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | def registers_used(instructions): | 
 |     result = [] | 
 |     for instruction in instructions: | 
 | 	if (instruction.name in ['set_memory', 'end_memory']) and \ | 
 | 	   (instruction.register not in result): | 
 | 	    result.append(instruction.register) | 
 |     return result | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | class Fastmap: | 
 |     def __init__(self): | 
 | 	self.map = ['\000']*256 | 
 | 	self.can_be_null = 0 | 
 |     def add(self, char): | 
 | 	self.map[ord(char)] = '\001' | 
 |     def fastmap(self): | 
 | 	return string.join(self.map, '') | 
 |     def __getitem__(self, char): | 
 | 	return ord(self.map[ord(char)]) | 
 |     def __repr__(self): | 
 | 	self.map.sort() | 
 | 	return 'Fastmap(' + `self.can_be_null` + ', ' + `self.map` + ')' | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | def find_label(code, label): | 
 |     line = 0 | 
 |     for instruction in code: | 
 | 	if (instruction.name == 'label') and (instruction.label == label): | 
 | 	    return line + 1 | 
 | 	line = line + 1 | 
 | 	 | 
 | def build_fastmap_aux(code, pos, visited, fastmap): | 
 |     if visited[pos]: | 
 | 	return | 
 |     while 1: | 
 | 	instruction = code[pos] | 
 | 	visited[pos] = 1 | 
 | 	pos = pos + 1 | 
 | 	if instruction.name == 'end': | 
 | 	    fastmap.can_be_null = 1 | 
 | 	    return | 
 | 	elif instruction.name == 'syntaxspec': | 
 | 	    for char in map(chr, range(256)): | 
 | 		if reop.syntax_table[char] & instruction.syntax: | 
 | 		    fastmap.add(char) | 
 | 	    return | 
 | 	elif instruction.name == 'notsyntaxspec': | 
 | 	    for char in map(chr, range(256)): | 
 | 		if not reop.syntax_table[char] & instruction.syntax: | 
 | 		    fastmap.add(char) | 
 | 	    return | 
 | 	elif instruction.name == 'eol': | 
 | 	    fastmap.add('\n') | 
 | 	    if fastmap.can_be_null == 0: | 
 | 		fastmap.can_be_null = 2 | 
 | 	    return | 
 | 	elif instruction.name == 'set': | 
 | 	    for char in instruction.set: | 
 | 		fastmap.add(char) | 
 | 	    return | 
 | 	elif instruction.name == 'exact': | 
 | 	    fastmap.add(instruction.char) | 
 | 	elif instruction.name == 'anychar': | 
 | 	    for char in map(chr, range(256)): | 
 | 		if char != '\n': | 
 | 		    fastmap.add(char) | 
 | 	    return | 
 | 	elif instruction.name == 'match_memory': | 
 | 	    for char in map(chr, range(256)): | 
 | 		fastmap.add(char) | 
 | 	    fastmap.can_be_null = 1 | 
 | 	    return | 
 | 	elif instruction.name in ['jump', 'dummy_failure_jump', \ | 
 | 				  'update_failure_jump', 'star_jump']: | 
 | 	    pos = find_label(code, instruction.label) | 
 | 	    if visited[pos]: | 
 | 		return | 
 | 	    visited[pos] = 1 | 
 | 	elif instruction.name  == 'failure_jump': | 
 | 	    build_fastmap_aux(code, | 
 | 			      find_label(code, instruction.label), | 
 | 			      visited, | 
 | 			      fastmap) | 
 | 	 | 
 | def build_fastmap(code, pos=0): | 
 |     visited = [0] * len(code) | 
 |     fastmap = Fastmap() | 
 |     build_fastmap_aux(code, pos, visited, fastmap) | 
 |     return fastmap | 
 |  | 
 | # | 
 | # | 
 | # | 
 |  | 
 | #[NORMAL, CHARCLASS, REPLACEMENT] = range(3) | 
 | #[CHAR, MEMORY_REFERENCE, SYNTAX, NOT_SYNTAX, SET, WORD_BOUNDARY, | 
 | # NOT_WORD_BOUNDARY, BEGINNING_OF_BUFFER, END_OF_BUFFER] = range(9) | 
 |  | 
 | def expand_escape(pattern, index, context=NORMAL): | 
 |     if index >= len(pattern): | 
 | 	raise error, 'escape ends too soon' | 
 |  | 
 |     elif pattern[index] == 't': | 
 | 	return CHAR, chr(9), index + 1 | 
 |      | 
 |     elif pattern[index] == 'n': | 
 | 	return CHAR, chr(10), index + 1 | 
 |      | 
 |     elif pattern[index] == 'v': | 
 | 	return CHAR, chr(11), index + 1 | 
 |      | 
 |     elif pattern[index] == 'r': | 
 | 	return CHAR, chr(13), index + 1 | 
 |      | 
 |     elif pattern[index] == 'f': | 
 | 	return CHAR, chr(12), index + 1 | 
 |      | 
 |     elif pattern[index] == 'a': | 
 | 	return CHAR, chr(7), index + 1 | 
 |      | 
 |     elif pattern[index] == 'x': | 
 | 	# CAUTION: this is the Python rule, not the Perl rule! | 
 | 	end = index + 1  # Skip over the 'x' character | 
 | 	while (end < len(pattern)) and (pattern[end] in string.hexdigits): | 
 | 	    end = end + 1 | 
 | 	if end == index: | 
 | 	    raise error, "\\x must be followed by hex digit(s)" | 
 | 	# let Python evaluate it, so we don't incorrectly 2nd-guess | 
 | 	# what it's doing (and Python in turn passes it on to sscanf, | 
 | 	# so that *it* doesn't incorrectly 2nd-guess what C does!) | 
 | 	char = eval ('"' + pattern[index-1:end] + '"') | 
 | 	assert len(char) == 1 | 
 | 	return CHAR, char, end | 
 |  | 
 |     elif pattern[index] == 'b': | 
 | 	if context != NORMAL: | 
 | 	    return CHAR, chr(8), index + 1 | 
 | 	else: | 
 | 	    return WORD_BOUNDARY, '', index + 1 | 
 | 	     | 
 |     elif pattern[index] == 'B': | 
 | 	if context != NORMAL: | 
 | 	    return CHAR, 'B', index + 1 | 
 | 	else: | 
 | 	    return NOT_WORD_BOUNDARY, '', index + 1 | 
 | 	     | 
 |     elif pattern[index] == 'A': | 
 | 	if context != NORMAL: | 
 | 	    return CHAR, 'A', index + 1 | 
 | 	else: | 
 | 	    return BEGINNING_OF_BUFFER, '', index + 1 | 
 | 	     | 
 |     elif pattern[index] == 'Z': | 
 | 	if context != NORMAL: | 
 | 	    return CHAR, 'Z', index + 1 | 
 | 	else: | 
 | 	    return END_OF_BUFFER, '', index + 1 | 
 | 	     | 
 |     elif pattern[index] in 'GluLUQE': | 
 | 	raise error, ('\\' + pattern[index] + ' is not allowed') | 
 |      | 
 |     elif pattern[index] == 'w': | 
 | 	if context == NORMAL: | 
 | 	    return SYNTAX, reop.word, index + 1 | 
 | 	elif context == CHARCLASS: | 
 | 	    set = [] | 
 | 	    for char in reop.syntax_table.keys(): | 
 | 		if reop.syntax_table[char] & reop.word: | 
 | 		    set.append(char) | 
 | 	    return SET, set, index + 1 | 
 | 	else: | 
 | 	    return CHAR, 'w', index + 1 | 
 | 	 | 
 |     elif pattern[index] == 'W': | 
 | 	if context == NORMAL: | 
 | 	    return NOT_SYNTAX, reop.word, index + 1 | 
 | 	elif context == CHARCLASS: | 
 | 	    set = [] | 
 | 	    for char in reop.syntax_table.keys(): | 
 | 		if not reop.syntax_table[char] & reop.word: | 
 | 		    set.append(char) | 
 | 	    return SET, set, index + 1 | 
 | 	else: | 
 | 	    return CHAR, 'W', index + 1 | 
 | 	 | 
 |     elif pattern[index] == 's': | 
 | 	if context == NORMAL: | 
 | 	    return SYNTAX, reop.whitespace, index + 1 | 
 | 	elif context == CHARCLASS: | 
 | 	    set = [] | 
 | 	    for char in reop.syntax_table.keys(): | 
 | 		if reop.syntax_table[char] & reop.whitespace: | 
 | 		    set.append(char) | 
 | 	    return SET, set, index + 1 | 
 | 	else: | 
 | 	    return CHAR, 's', index + 1 | 
 | 	 | 
 |     elif pattern[index] == 'S': | 
 | 	if context == NORMAL: | 
 | 	    return NOT_SYNTAX, reop.whitespace, index + 1 | 
 | 	elif context == CHARCLASS: | 
 | 	    set = [] | 
 | 	    for char in reop.syntax_table.keys(): | 
 | 		if not reop.syntax_table[char] & reop.whitespace: | 
 | 		    set.append(char) | 
 | 	    return SET, set, index + 1 | 
 | 	else: | 
 | 	    return CHAR, 'S', index + 1 | 
 | 	 | 
 |     elif pattern[index] == 'd': | 
 | 	if context == NORMAL: | 
 | 	    return SYNTAX, reop.digit, index + 1 | 
 | 	elif context == CHARCLASS: | 
 | 	    set = [] | 
 | 	    for char in reop.syntax_table.keys(): | 
 | 		if reop.syntax_table[char] & reop.digit: | 
 | 		    set.append(char) | 
 | 	    return SET, set, index + 1 | 
 | 	else: | 
 | 	    return CHAR, 'd', index + 1 | 
 | 	 | 
 |     elif pattern[index] == 'D': | 
 | 	if context == NORMAL: | 
 | 	    return NOT_SYNTAX, reop.digit, index + 1 | 
 | 	elif context == CHARCLASS: | 
 | 	    set = [] | 
 | 	    for char in reop.syntax_table.keys(): | 
 | 		if not reop.syntax_table[char] & reop.digit: | 
 | 		    set.append(char) | 
 | 	    return SET, set, index + 1 | 
 | 	else: | 
 | 	    return CHAR, 'D', index + 1 | 
 |  | 
 |     elif pattern[index] in '0123456789': | 
 |  | 
 | 	if pattern[index] == '0': | 
 | 	    if (index + 1 < len(pattern)) and \ | 
 | 	       (pattern[index + 1] in string.octdigits): | 
 | 		if (index + 2 < len(pattern)) and \ | 
 | 		   (pattern[index + 2] in string.octdigits): | 
 | 		    value = string.atoi(pattern[index:index + 3], 8) | 
 | 		    index = index + 3 | 
 |  | 
 | 		else: | 
 | 		    value = string.atoi(pattern[index:index + 2], 8) | 
 | 		    index = index + 2 | 
 |  | 
 | 	    else: | 
 | 		value = 0 | 
 | 		index = index + 1 | 
 |  | 
 | 	    if value > 255: | 
 | 		raise error, 'octal value out of range' | 
 |  | 
 | 	    return CHAR, chr(value), index | 
 | 	 | 
 | 	else: | 
 | 	    if (index + 1 < len(pattern)) and \ | 
 | 	       (pattern[index + 1] in string.digits): | 
 | 		if (index + 2 < len(pattern)) and \ | 
 | 		   (pattern[index + 2] in string.octdigits) and \ | 
 | 		   (pattern[index + 1] in string.octdigits) and \ | 
 | 		   (pattern[index] in string.octdigits): | 
 | 		    value = string.atoi(pattern[index:index + 3], 8) | 
 | 		    if value > 255: | 
 | 			raise error, 'octal value out of range' | 
 |  | 
 | 		    return CHAR, chr(value), index + 3 | 
 |  | 
 | 		else: | 
 | 		    value = string.atoi(pattern[index:index + 2]) | 
 | 		    if (value < 1) or (value > 99): | 
 | 			raise error, 'memory reference out of range' | 
 |  | 
 | 		    if context == CHARCLASS: | 
 | 			raise error, ('cannot reference a register from ' | 
 | 				      'inside a character class') | 
 | 		    return MEMORY_REFERENCE, value, index + 2 | 
 |  | 
 | 	    else: | 
 | 		if context == CHARCLASS: | 
 | 		    raise error, ('cannot reference a register from ' | 
 | 				  'inside a character class') | 
 |  | 
 | 		value = string.atoi(pattern[index]) | 
 | 		return MEMORY_REFERENCE, value, index + 1 | 
 | 	     | 
 |     elif pattern[index] == 'g': | 
 | 	if context != REPLACEMENT: | 
 | 	    return CHAR, 'g', index + 1 | 
 |  | 
 | 	index = index + 1 | 
 | 	if index >= len(pattern): | 
 | 	    raise error, 'unfinished symbolic reference' | 
 | 	if pattern[index] != '<': | 
 | 	    raise error, 'missing < in symbolic reference' | 
 |  | 
 | 	index = index + 1 | 
 | 	end = string.find(pattern, '>', index) | 
 | 	if end == -1: | 
 | 	    raise error, 'unfinished symbolic reference' | 
 | 	value = pattern[index:end] | 
 | 	if not valid_identifier(value): | 
 | 	    raise error, 'illegal symbolic reference' | 
 | 	return MEMORY_REFERENCE, value, end + 1 | 
 |      | 
 |     else: | 
 | 	return CHAR, pattern[index], index + 1 | 
 |  | 
 | def compile(pattern, flags=0): | 
 |     stack = [] | 
 |     label = 0 | 
 |     register = 1 | 
 |     groupindex = {} | 
 |     lastop = '' | 
 |  | 
 |     # look for embedded pattern modifiers at the beginning of the pattern | 
 |  | 
 |     index = 0 | 
 |  | 
 |     if len(pattern) >= 3 and \ | 
 |        (pattern[:2] == '(?') and \ | 
 |        (pattern[2] in 'iImMsSxX'): | 
 | 	index = 2 | 
 | 	while (index < len(pattern)) and (pattern[index] != ')'): | 
 | 	    if pattern[index] in 'iI': | 
 | 		flags = flags | IGNORECASE | 
 | 	    elif pattern[index] in 'mM': | 
 | 		flags = flags | MULTILINE | 
 | 	    elif pattern[index] in 'sS': | 
 | 		flags = flags | DOTALL | 
 | 	    elif pattern[index] in 'xX': | 
 | 		flags = flags | VERBOSE | 
 | 	    else: | 
 | 		raise error, 'unknown modifier' | 
 | 	    index = index + 1 | 
 | 	index = index + 1 | 
 |  | 
 |     # compile the rest of the pattern | 
 |      | 
 |     while (index < len(pattern)): | 
 | 	char = pattern[index] | 
 | 	index = index + 1 | 
 | 	if char == '\\': | 
 | 	    escape_type, value, index = expand_escape(pattern, index) | 
 |  | 
 | 	    if escape_type == CHAR: | 
 | 		stack.append([Exact(value, flags)]) | 
 | 		lastop = '\\' + value | 
 | 		 | 
 | 	    elif escape_type == MEMORY_REFERENCE: | 
 | 		if value >= register: | 
 | 		    raise error, ('cannot reference a register ' | 
 | 				  'not yet used') | 
 | 		stack.append([MatchMemory(value)]) | 
 | 		lastop = '\\1' | 
 | 		 | 
 | 	    elif escape_type == BEGINNING_OF_BUFFER: | 
 | 		stack.append([BegBuf()]) | 
 | 		lastop = '\\A' | 
 | 		 | 
 | 	    elif escape_type == END_OF_BUFFER: | 
 | 		stack.append([EndBuf()]) | 
 | 		lastop = '\\Z' | 
 | 		 | 
 | 	    elif escape_type == WORD_BOUNDARY: | 
 | 		stack.append([WordBound()]) | 
 | 		lastop = '\\b' | 
 | 		 | 
 | 	    elif escape_type == NOT_WORD_BOUNDARY: | 
 | 		stack.append([NotWordBound()]) | 
 | 		lastop = '\\B' | 
 | 		 | 
 | 	    elif escape_type == SYNTAX: | 
 | 		stack.append([SyntaxSpec(value)]) | 
 | 		if value == reop.word: | 
 | 		    lastop = '\\w' | 
 | 		elif value == reop.whitespace: | 
 | 		    lastop = '\\s' | 
 | 		elif value == reop.digit: | 
 | 		    lastop = '\\d' | 
 | 		else: | 
 | 		    lastop = '\\?' | 
 | 		     | 
 | 	    elif escape_type == NOT_SYNTAX: | 
 | 		stack.append([NotSyntaxSpec(value)]) | 
 | 		if value == reop.word: | 
 | 		    lastop = '\\W' | 
 | 		elif value == reop.whitespace: | 
 | 		    lastop = '\\S' | 
 | 		elif value == reop.digit: | 
 | 		    lastop = '\\D' | 
 | 		else: | 
 | 		    lastop = '\\?' | 
 | 		 | 
 | 	    elif escape_type == SET: | 
 | 		raise error, 'cannot use set escape type here' | 
 | 	     | 
 | 	    else: | 
 | 		raise error, 'unknown escape type' | 
 |  | 
 | 	elif char == '|': | 
 | 	    expr = [] | 
 | 	     | 
 | 	    while (len(stack) != 0) and \ | 
 | 		  (stack[-1][0].name != '(') and \ | 
 | 		  (stack[-1][0].name != '|'): | 
 | 		expr = stack[-1] + expr | 
 | 		del stack[-1] | 
 | 	    stack.append([FailureJump(label)] + \ | 
 | 			 expr + \ | 
 | 			 [Jump(-1), | 
 | 			  Label(label)]) | 
 | 	    stack.append([Alternation()]) | 
 | 	    label = label + 1 | 
 | 	    lastop = '|' | 
 | 	     | 
 | 	elif char == '(': | 
 | 	    if index >= len(pattern): | 
 | 		raise error, 'no matching close paren' | 
 |  | 
 | 	    elif pattern[index] == '?': | 
 | 		# Perl style (?...) extensions | 
 | 		index = index + 1 | 
 | 		if index >= len(pattern): | 
 | 		    raise error, 'extension ends prematurely' | 
 |  | 
 | 		elif pattern[index] == 'P': | 
 | 		    # Python extensions | 
 | 		    index = index + 1 | 
 | 		    if index >= len(pattern): | 
 | 			raise error, 'extension ends prematurely' | 
 |  | 
 | 		    elif pattern[index] == '<': | 
 | 			# Handle Python symbolic group names (?P<...>...) | 
 | 			index = index + 1 | 
 | 			end = string.find(pattern, '>', index) | 
 | 			if end == -1: | 
 | 			    raise error, 'no end to symbolic group name' | 
 | 			name = pattern[index:end] | 
 | 			if not valid_identifier(name): | 
 | 			    raise error, ('symbolic group name must be a ' | 
 | 					  'valid identifier') | 
 | 			index = end + 1 | 
 | 			groupindex[name] = register | 
 | 			stack.append([OpenParen(register)]) | 
 | 			register = register + 1 | 
 | 			lastop = '(' | 
 |  | 
 | 		    elif pattern[index] == '=': | 
 | 			# backreference to symbolic group name | 
 | 			if index >= len(pattern): | 
 | 			    raise error, '(?P= at the end of the pattern' | 
 | 			start = index + 1 | 
 | 			end = string.find(pattern, ')', start) | 
 | 			if end == -1: | 
 | 			    raise error, 'no ) to end symbolic group name' | 
 | 			name = pattern[start:end] | 
 | 			if name not in groupindex.keys(): | 
 | 			    raise error, ('symbolic group name ' + name + \ | 
 | 					  ' has not been used yet') | 
 | 			stack.append([MatchMemory(groupindex[name])]) | 
 | 			index = end + 1 | 
 | 			lastop = '(?P=)' | 
 | 			 | 
 | 		    else: | 
 | 			raise error, ('unknown Python extension: ' + \ | 
 | 				      pattern[index]) | 
 | 		     | 
 | 		elif pattern[index] == ':': | 
 | 		    # grouping, but no registers | 
 | 		    index = index + 1 | 
 | 		    stack.append([OpenParen(-1)]) | 
 | 		    lastop = '(' | 
 | 		     | 
 | 		elif pattern[index] == '#': | 
 | 		    # comment | 
 | 		    index = index + 1 | 
 | 		    end = string.find(pattern, ')', index) | 
 | 		    if end == -1: | 
 | 			raise error, 'no end to comment' | 
 | 		    index = end + 1 | 
 | 		    # do not change lastop | 
 | 		     | 
 | 		elif pattern[index] == '=': | 
 | 		    raise error, ('zero-width positive lookahead ' | 
 | 				  'assertion is unsupported') | 
 |  | 
 | 		elif pattern[index] == '!': | 
 | 		    raise error, ('zero-width negative lookahead ' | 
 | 				  'assertion is unsupported') | 
 |  | 
 | 		elif pattern[index] in 'iImMsSxX': | 
 | 		    raise error, ('embedded pattern modifiers are only ' | 
 | 				  'allowed at the beginning of the pattern') | 
 |  | 
 | 		else: | 
 | 		    raise error, 'unknown extension' | 
 |  | 
 | 	    else: | 
 | 		stack.append([OpenParen(register)]) | 
 | 		register = register + 1 | 
 | 		lastop = '(' | 
 |  | 
 | 	elif char == ')': | 
 | 	    # make one expression out of everything on the stack up to | 
 | 	    # the marker left by the last parenthesis | 
 | 	    expr = [] | 
 | 	    while (len(stack) > 0) and (stack[-1][0].name != '('): | 
 | 		expr = stack[-1] + expr | 
 | 		del stack[-1] | 
 |  | 
 | 	    if len(stack) == 0: | 
 | 		raise error, 'too many close parens' | 
 | 	     | 
 | 	    # remove markers left by alternation | 
 | 	    expr = filter(lambda x: x.name != '|', expr) | 
 |  | 
 | 	    # clean up jumps inserted by alternation | 
 | 	    need_label = 0 | 
 | 	    for i in range(len(expr)): | 
 | 		if (expr[i].name == 'jump') and (expr[i].label == -1): | 
 | 		    expr[i] = Jump(label) | 
 | 		    need_label = 1 | 
 | 	    if need_label: | 
 | 		expr.append(Label(label)) | 
 | 		label = label + 1 | 
 |  | 
 | 	    if stack[-1][0].register > 0: | 
 | 		expr = [StartMemory(stack[-1][0].register)] + \ | 
 | 		       expr + \ | 
 | 		       [EndMemory(stack[-1][0].register)] | 
 | 	    del stack[-1] | 
 | 	    stack.append(expr) | 
 | 	    lastop = ')' | 
 |  | 
 | 	elif char == '{': | 
 | 	    if len(stack) == 0: | 
 | 		raise error, 'no expression to repeat' | 
 | 	    end = string.find(pattern, '}', index) | 
 | 	    if end == -1: | 
 | 		raise error, ('no close curly bracket to match' | 
 | 			      ' open curly bracket') | 
 |  | 
 | 	    fields = map(string.strip, | 
 | 			 string.split(pattern[index:end], ',')) | 
 | 	    index = end + 1 | 
 |  | 
 | 	    minimal = 0 | 
 | 	    if (index < len(pattern)) and (pattern[index] == '?'): | 
 | 		minimal = 1 | 
 | 		index = index + 1 | 
 |  | 
 | 	    if len(fields) == 1: | 
 | 		# {n} or {n}? (there's really no difference) | 
 | 		try: | 
 | 		    count = string.atoi(fields[0]) | 
 | 		except ValueError: | 
 | 		    raise error, ('count must be an integer ' | 
 | 				  'inside curly braces') | 
 | 		if count > 65535: | 
 | 		    raise error, 'repeat count out of range' | 
 | 		expr = [] | 
 | 		while count > 0: | 
 | 		    expr = expr + stack[-1] | 
 | 		    count = count - 1 | 
 | 		del stack[-1] | 
 | 		stack.append(expr) | 
 | 		if minimal: | 
 | 		    lastop = '{n}?' | 
 | 		else: | 
 | 		    lastop = '{n}' | 
 |  | 
 | 	    elif len(fields) == 2: | 
 | 		# {n,} or {n,m} | 
 | 		if fields[1] == '': | 
 | 		    # {n,} | 
 | 		    try: | 
 | 			min = string.atoi(fields[0]) | 
 | 		    except ValueError: | 
 | 			raise error, ('minimum must be an integer ' | 
 | 				      'inside curly braces') | 
 | 		    if min > 65535: | 
 | 			raise error, 'minimum repeat count out of range' | 
 |  | 
 | 		    expr = [] | 
 | 		    while min > 0: | 
 | 			expr = expr + stack[-1] | 
 | 			min = min - 1 | 
 | 		    if minimal: | 
 | 			expr = expr + \ | 
 | 			       ([Jump(label + 1), | 
 | 				 Label(label)] + \ | 
 | 				stack[-1] + \ | 
 | 				[Label(label + 1), | 
 | 				 FailureJump(label)]) | 
 | 			lastop = '{n,}?' | 
 | 		    else: | 
 | 			expr = expr + \ | 
 | 			       ([Label(label), | 
 | 				 FailureJump(label + 1)] + | 
 | 				stack[-1] + | 
 | 				[StarJump(label), | 
 | 				 Label(label + 1)]) | 
 | 			lastop = '{n,}' | 
 |  | 
 | 		    del stack[-1] | 
 | 		    stack.append(expr) | 
 | 		    label = label + 2 | 
 |  | 
 | 		else: | 
 | 		    # {n,m} | 
 | 		    try: | 
 | 			min = string.atoi(fields[0]) | 
 | 		    except ValueError: | 
 | 			raise error, ('minimum must be an integer ' | 
 | 				      'inside curly braces') | 
 | 		    try: | 
 | 			max = string.atoi(fields[1]) | 
 | 		    except ValueError: | 
 | 			raise error, ('maximum must be an integer ' | 
 | 				      'inside curly braces') | 
 | 		    if min > 65535: | 
 | 			raise error, ('minumim repeat count out ' | 
 | 				      'of range') | 
 | 		    if max > 65535: | 
 | 			raise error, ('maximum repeat count out ' | 
 | 				      'of range') | 
 | 		    if min > max: | 
 | 			raise error, ('minimum repeat count must be ' | 
 | 				      'less than the maximum ' | 
 | 				      'repeat count') | 
 | 		    expr = [] | 
 | 		    while min > 0: | 
 | 			expr = expr + stack[-1] | 
 | 			min = min - 1 | 
 | 			max = max - 1 | 
 | 		    if minimal: | 
 | 			while max > 0: | 
 | 			    expr = expr + \ | 
 | 				   [FailureJump(label), | 
 | 				    Jump(label + 1), | 
 | 				    Label(label)] + \ | 
 | 				   stack[-1] + \ | 
 | 				   [Label(label + 1)] | 
 | 			    max = max - 1 | 
 | 			    label = label + 2 | 
 | 			del stack[-1] | 
 | 			stack.append(expr) | 
 | 			lastop = '{n,m}?' | 
 | 		    else: | 
 | 			while max > 0: | 
 | 			    expr = expr + \ | 
 | 				   [FailureJump(label)] + \ | 
 | 				   stack[-1] | 
 | 			    max = max - 1 | 
 | 			del stack[-1] | 
 | 			stack.append(expr + [Label(label)]) | 
 | 			label = label + 1 | 
 | 			lastop = '{n,m}' | 
 |  | 
 | 	    else: | 
 | 		raise error, ('there need to be one or two fields ' | 
 | 			      'in a {} expression') | 
 |  | 
 | 	elif char == '}': | 
 | 	    raise error, 'unbalanced close curly brace' | 
 |  | 
 | 	elif char == '*': | 
 | 	    # Kleene closure | 
 | 	    if len(stack) == 0: | 
 | 		raise error, '* needs something to repeat' | 
 |  | 
 | 	    if lastop in ['(', '|']: | 
 | 		raise error, '* needs something to repeat' | 
 |  | 
 | 	    if lastop in repetition_operators: | 
 | 		raise error, 'nested repetition operators' | 
 | 	     | 
 | 	    if (index < len(pattern)) and (pattern[index] == '?'): | 
 | 		# non-greedy matching | 
 | 		expr = [Jump(label + 1), | 
 | 			Label(label)] + \ | 
 | 		       stack[-1] + \ | 
 | 		       [Label(label + 1), | 
 | 			FailureJump(label)] | 
 | 		index = index + 1 | 
 | 		lastop = '*?' | 
 | 	    else: | 
 | 		# greedy matching | 
 | 		expr = [Label(label), | 
 | 			FailureJump(label + 1)] + \ | 
 | 		       stack[-1] + \ | 
 | 		       [StarJump(label), | 
 | 			Label(label + 1)] | 
 | 		lastop = '*' | 
 | 	    del stack[-1] | 
 | 	    stack.append(expr) | 
 | 	    label = label + 2 | 
 |  | 
 | 	elif char == '+': | 
 | 	    # positive closure | 
 | 	    if len(stack) == 0: | 
 | 		raise error, '+ needs something to repeat' | 
 | 	     | 
 | 	    if lastop in ['(', '|']: | 
 | 		raise error, '+ needs something to repeat' | 
 |  | 
 | 	    if lastop in repetition_operators: | 
 | 		raise error, 'nested repetition operators' | 
 |  | 
 | 	    if (index < len(pattern)) and (pattern[index] == '?'): | 
 | 		# non-greedy | 
 | 		expr = [Label(label)] + \ | 
 | 		       stack[-1] + \ | 
 | 		       [FailureJump(label)] | 
 | 		label = label + 1 | 
 | 		index = index + 1 | 
 | 		lastop = '+?' | 
 | 		 | 
 | 	    else: | 
 | 		# greedy | 
 | 		expr = [DummyFailureJump(label + 1), | 
 | 			Label(label), | 
 | 			FailureJump(label + 2), | 
 | 			Label(label + 1)] + \ | 
 | 		       stack[-1] + \ | 
 | 		       [StarJump(label), | 
 | 			Label(label + 2)] | 
 | 		label = label + 3 | 
 | 		lastop = '+' | 
 | 		 | 
 | 	    del stack[-1] | 
 | 	    stack.append(expr) | 
 |  | 
 | 	elif char == '?': | 
 | 	    if len(stack) == 0: | 
 | 		raise error, 'need something to be optional' | 
 | 	     | 
 | 	    if len(stack) == 0: | 
 | 		raise error, '? needs something to repeat' | 
 | 	     | 
 | 	    if lastop in ['(', '|']: | 
 | 		raise error, '? needs something to repeat' | 
 |  | 
 | 	    if lastop in repetition_operators: | 
 | 		raise error, 'nested repetition operators' | 
 |  | 
 | 	    if (index < len(pattern)) and (pattern[index] == '?'): | 
 | 		# non-greedy matching | 
 | 		expr = [FailureJump(label), | 
 | 			Jump(label + 1), | 
 | 			Label(label)] + \ | 
 | 		       stack[-1] + \ | 
 | 		       [Label(label + 1)] | 
 | 		label = label + 2 | 
 | 		index = index + 1 | 
 | 		lastop = '??' | 
 | 		 | 
 | 	    else: | 
 | 		# greedy matching | 
 | 		expr = [FailureJump(label)] + \ | 
 | 		       stack[-1] + \ | 
 | 		       [Label(label)] | 
 | 		label = label + 1 | 
 | 		lastop = '?' | 
 | 		 | 
 | 	    del stack[-1] | 
 | 	    stack.append(expr) | 
 |  | 
 | 	elif char == '.': | 
 | 	    if flags & DOTALL: | 
 | 		stack.append([Set(map(chr, range(256)), flags)]) | 
 | 	    else: | 
 | 		stack.append([AnyChar()]) | 
 | 	    lastop = '.' | 
 |  | 
 | 	elif char == '^': | 
 | 	    if flags & MULTILINE: | 
 | 		stack.append([Bol()]) | 
 | 	    else: | 
 | 		stack.append([BegBuf()]) | 
 | 	    lastop = '^' | 
 |  | 
 | 	elif char == '$': | 
 | 	    if flags & MULTILINE: | 
 | 		stack.append([Eol()]) | 
 | 	    else: | 
 | 		stack.append([EndBuf()]) | 
 | 	    lastop = '$' | 
 |  | 
 | 	elif char == '#': | 
 | 	    if flags & VERBOSE: | 
 | 		# comment | 
 | 		index = index + 1 | 
 | 		end = string.find(pattern, '\n', index) | 
 | 		if end == -1: | 
 | 		    index = len(pattern) | 
 | 		else: | 
 | 		    index = end + 1 | 
 | 		# do not change lastop | 
 | 	    else: | 
 | 		stack.append([Exact(char, flags)]) | 
 | 		lastop = '#' | 
 |  | 
 | 	elif char in string.whitespace: | 
 | 	    if not (flags & VERBOSE): | 
 | 		stack.append([Exact(char, flags)]) | 
 | 		lastop = char | 
 |  | 
 | 	elif char == '[': | 
 | 	    # compile character class | 
 | 	     | 
 | 	    if index >= len(pattern): | 
 | 		raise error, 'unclosed character class' | 
 | 	     | 
 | 	    negate = 0 | 
 | 	    last = '' | 
 | 	    set = [] | 
 |  | 
 | 	    if pattern[index] == '^': | 
 | 		negate = 1 | 
 | 		index = index + 1 | 
 | 		if index >= len(pattern): | 
 | 		    raise error, 'unclosed character class' | 
 |  | 
 | 	    if pattern[index] == ']': | 
 | 		set.append(']') | 
 | 		index = index + 1 | 
 | 		if index >= len(pattern): | 
 | 		    raise error, 'unclosed character class' | 
 | 		 | 
 | 	    elif pattern[index] == '-': | 
 | 		set.append('-') | 
 | 		index = index + 1 | 
 | 		if index >= len(pattern): | 
 | 		    raise error, 'unclosed character class' | 
 | 		 | 
 | 	    while (index < len(pattern)) and (pattern[index] != ']'): | 
 | 		next = pattern[index] | 
 | 		index = index + 1 | 
 | 		if next == '-': | 
 | 		    if index >= len(pattern): | 
 | 			raise error, 'incomplete range in character class' | 
 |  | 
 | 		    elif pattern[index] == ']': | 
 | 			set.append('-') | 
 | 			 | 
 | 		    else: | 
 | 			if last == '': | 
 | 			    raise error, ('improper use of range in ' | 
 | 					  'character class') | 
 |  | 
 | 			start = last | 
 | 			 | 
 | 			if pattern[index] == '\\': | 
 | 			    escape_type, | 
 | 			    value, | 
 | 			    index = expand_escape(pattern, | 
 | 						  index + 1, | 
 | 						  CHARCLASS) | 
 |  | 
 | 			    if escape_type == CHAR: | 
 | 				end = value | 
 | 				 | 
 | 			    else: | 
 | 				raise error, ('illegal escape in character ' | 
 | 					      'class range') | 
 | 			else: | 
 | 			    end = pattern[index] | 
 | 			    index = index + 1 | 
 |  | 
 | 			if start > end: | 
 | 			    raise error, ('range arguments out of order ' | 
 | 					  'in character class') | 
 | 			 | 
 | 			for char in map(chr, range(ord(start), ord(end) + 1)): | 
 | 			    if char not in set: | 
 | 				set.append(char) | 
 | 			     | 
 | 			last = '' | 
 |  | 
 | 		elif next == '\\': | 
 | 		    # expand syntax meta-characters and add to set | 
 | 		    if index >= len(pattern): | 
 | 			raise error, 'incomplete set' | 
 |  | 
 | 		    escape_type, value, index = expand_escape(pattern, | 
 | 							      index, | 
 | 							      CHARCLASS) | 
 |  | 
 | 		    if escape_type == CHAR: | 
 | 			set.append(value) | 
 | 			last = value | 
 |  | 
 | 		    elif escape_type == SET: | 
 | 			for char in value: | 
 | 			    if char not in set: | 
 | 				set.append(char) | 
 | 			last = '' | 
 |  | 
 | 		    else: | 
 | 			raise error, 'illegal escape type in character class' | 
 |  | 
 | 		else: | 
 | 		    if next not in set: | 
 | 			set.append(next) | 
 | 		    last = next | 
 | 		     | 
 | 	    if (index >= len(pattern)) or ( pattern[index] != ']'): | 
 | 		raise error, 'incomplete set' | 
 |  | 
 | 	    index = index + 1 | 
 |  | 
 | 	    if negate: | 
 | 		# If case is being ignored, then both upper- and lowercase | 
 | 		# versions of the letters must be excluded. | 
 | 		if flags & IGNORECASE: set=set+map(string.upper, set) | 
 | 		notset = [] | 
 | 		for char in map(chr, range(256)): | 
 | 		    if char not in set: | 
 | 			notset.append(char) | 
 | 		if len(notset) == 0: | 
 | 		    raise error, 'empty negated set' | 
 | 		stack.append([Set(notset, flags)]) | 
 | 	    else: | 
 | 		if len(set) == 0: | 
 | 		    raise error, 'empty set' | 
 | 		stack.append([Set(set, flags)]) | 
 |  | 
 | 	    lastop = '[]' | 
 |  | 
 | 	else: | 
 | 	    stack.append([Exact(char, flags)]) | 
 | 	    lastop = char | 
 |  | 
 |     code = [] | 
 |     while len(stack) > 0: | 
 | 	if stack[-1][0].name == '(': | 
 | 	    raise error, 'too many open parens' | 
 | 	code = stack[-1] + code | 
 | 	del stack[-1] | 
 |     if len(code) == 0: | 
 | 	raise error, 'no code generated' | 
 |     code = filter(lambda x: x.name != '|', code) | 
 |     need_label = 0 | 
 |     for i in range(len(code)): | 
 | 	if (code[i].name == 'jump') and (code[i].label == -1): | 
 | 	    code[i] = Jump(label) | 
 | 	    need_label = 1 | 
 |     if need_label: | 
 | 	code.append(Label(label)) | 
 | 	label = label + 1 | 
 |     code.append(End()) | 
 | #    print code | 
 |     return RegexObject(pattern, flags, code, register, groupindex) | 
 |  | 
 | # Replace expand_escape and _expand functions with their C equivalents. | 
 | # If you suspect bugs in the C versions, comment out the next two lines | 
 | expand_escape = reop.expand_escape | 
 | _expand  = reop._expand |