| # | 
 | # Secret Labs' Regular Expression Engine | 
 | # $Id$ | 
 | # | 
 | # convert template to internal format | 
 | # | 
 | # Copyright (c) 1997-2000 by Secret Labs AB.  All rights reserved. | 
 | # | 
 | # This code can only be used for 1.6 alpha testing.  All other use | 
 | # require explicit permission from Secret Labs AB. | 
 | # | 
 | # Portions of this engine have been developed in cooperation with | 
 | # CNRI.  Hewlett-Packard provided funding for 1.6 integration and | 
 | # other compatibility work. | 
 | # | 
 |  | 
 | import array, string, sys | 
 |  | 
 | import _sre | 
 |  | 
 | from sre_constants import * | 
 |  | 
 | # find an array type code that matches the engine's code size | 
 | for WORDSIZE in "BHil": | 
 |     if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize(): | 
 | 	break | 
 | else: | 
 |     raise RuntimeError, "cannot find a useable array type" | 
 |  | 
 | # FIXME: <fl> should move some optimizations from the parser to here! | 
 |  | 
 | class Code: | 
 |     def __init__(self): | 
 | 	self.data = [] | 
 |     def __len__(self): | 
 | 	return len(self.data) | 
 |     def __getitem__(self, index): | 
 | 	return self.data[index] | 
 |     def __setitem__(self, index, code): | 
 | 	self.data[index] = code | 
 |     def append(self, code): | 
 | 	self.data.append(code) | 
 |     def todata(self): | 
 | 	# print self.data | 
 | 	try: | 
 | 	    return array.array(WORDSIZE, self.data).tostring() | 
 | 	except OverflowError: | 
 | 	    print self.data | 
 | 	    raise | 
 |  | 
 | def _compile(code, pattern, flags, level=0): | 
 |     append = code.append | 
 |     for op, av in pattern: | 
 | 	if op is ANY: | 
 | 	    if flags & SRE_FLAG_DOTALL: | 
 | 		append(OPCODES[op]) # any character at all! | 
 | 	    else: | 
 | 		append(OPCODES[CATEGORY]) | 
 | 		append(CHCODES[CATEGORY_NOT_LINEBREAK]) | 
 | 	elif op in (SUCCESS, FAILURE): | 
 | 	    append(OPCODES[op]) | 
 | 	elif op is AT: | 
 | 	    append(OPCODES[op]) | 
 | 	    if flags & SRE_FLAG_MULTILINE: | 
 | 		append(ATCODES[AT_MULTILINE[av]]) | 
 | 	    else: | 
 | 		append(ATCODES[av]) | 
 | 	elif op is BRANCH: | 
 | 	    append(OPCODES[op]) | 
 | 	    tail = [] | 
 | 	    for av in av[1]: | 
 | 		skip = len(code); append(0) | 
 | 		_compile(code, av, flags, level) | 
 | 		append(OPCODES[JUMP]) | 
 | 		tail.append(len(code)); append(0) | 
 | 		code[skip] = len(code) - skip | 
 | 	    append(0) # end of branch | 
 | 	    for tail in tail: | 
 | 		code[tail] = len(code) - tail | 
 | 	elif op is CALL: | 
 | 	    append(OPCODES[op]) | 
 | 	    skip = len(code); append(0) | 
 | 	    _compile(code, av, flags, level+1) | 
 | 	    append(OPCODES[SUCCESS]) | 
 | 	    code[skip] = len(code) - skip | 
 | 	elif op is CATEGORY: # not used by current parser | 
 | 	    append(OPCODES[op]) | 
 | 	    if flags & SRE_FLAG_LOCALE: | 
 | 		append(CH_LOCALE[CHCODES[av]]) | 
 | 	    else: | 
 | 		append(CHCODES[av]) | 
 | 	elif op is GROUP: | 
 | 	    if flags & SRE_FLAG_IGNORECASE: | 
 | 		append(OPCODES[OP_IGNORE[op]]) | 
 | 	    else: | 
 | 		append(OPCODES[op]) | 
 | 	    append(av-1) | 
 | 	elif op is IN: | 
 | 	    if flags & SRE_FLAG_IGNORECASE: | 
 | 		append(OPCODES[OP_IGNORE[op]]) | 
 | 		def fixup(literal): | 
 | 		    return ord(literal.lower()) | 
 | 	    else: | 
 | 		append(OPCODES[op]) | 
 | 		fixup = ord | 
 | 	    skip = len(code); append(0) | 
 | 	    for op, av in av: | 
 | 		append(OPCODES[op]) | 
 | 		if op is NEGATE: | 
 | 		    pass | 
 | 		elif op is LITERAL: | 
 | 		    append(fixup(av)) | 
 | 		elif op is RANGE: | 
 | 		    append(fixup(av[0])) | 
 | 		    append(fixup(av[1])) | 
 | 		elif op is CATEGORY: | 
 | 		    if flags & SRE_FLAG_LOCALE: | 
 | 			append(CH_LOCALE[CHCODES[av]]) | 
 | 		    else: | 
 | 			append(CHCODES[av]) | 
 | 		else: | 
 | 		    raise ValueError, "unsupported set operator" | 
 | 	    append(OPCODES[FAILURE]) | 
 | 	    code[skip] = len(code) - skip | 
 | 	elif op in (LITERAL, NOT_LITERAL): | 
 | 	    if flags & SRE_FLAG_IGNORECASE: | 
 | 		append(OPCODES[OP_IGNORE[op]]) | 
 | 		append(ord(av.lower())) | 
 | 	    else: | 
 | 		append(OPCODES[op]) | 
 | 		append(ord(av)) | 
 | 	elif op is MARK: | 
 | 	    append(OPCODES[op]) | 
 | 	    append(av) | 
 |  	elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT): | 
 | 	    lo, hi = av[2].getwidth() | 
 |  	    if lo == 0: | 
 |  		raise SyntaxError, "cannot repeat zero-width items" | 
 | 	    if lo == hi == 1 and op is MAX_REPEAT: | 
 | 		append(OPCODES[MAX_REPEAT_ONE]) | 
 | 		skip = len(code); append(0) | 
 | 		append(av[0]) | 
 | 		append(av[1]) | 
 | 		_compile(code, av[2], flags, level+1) | 
 | 		append(OPCODES[SUCCESS]) | 
 | 		code[skip] = len(code) - skip | 
 | 	    else: | 
 | 		append(OPCODES[op]) | 
 | 		skip = len(code); append(0) | 
 | 		append(av[0]) | 
 | 		append(av[1]) | 
 | 		_compile(code, av[2], flags, level+1) | 
 | 		if op is MIN_REPEAT: | 
 | 		    append(OPCODES[MIN_UNTIL]) | 
 | 		else: | 
 | 		    append(OPCODES[MAX_UNTIL]) | 
 | 		code[skip] = len(code) - skip | 
 | 	elif op is SUBPATTERN: | 
 |  	    group = av[0] | 
 |  	    if group: | 
 |  		append(OPCODES[MARK]) | 
 |  		append((group-1)*2) | 
 | 	    _compile(code, av[1], flags, level+1) | 
 |  	    if group: | 
 |  		append(OPCODES[MARK]) | 
 |  		append((group-1)*2+1) | 
 | 	else: | 
 | 	    raise ValueError, ("unsupported operand type", op) | 
 |  | 
 | def compile(p, flags=0): | 
 |     # convert pattern list to internal format | 
 |     if type(p) in (type(""), type(u"")): | 
 | 	import sre_parse | 
 | 	pattern = p | 
 | 	p = sre_parse.parse(p) | 
 |     else: | 
 | 	pattern = None | 
 |     flags = p.pattern.flags | flags | 
 |     code = Code() | 
 |     _compile(code, p.data, flags) | 
 |     code.append(OPCODES[SUCCESS]) | 
 |     data = code.todata() | 
 |     if 0: # debugging | 
 | 	print | 
 | 	print "-" * 68 | 
 | 	import sre_disasm | 
 | 	sre_disasm.disasm(data) | 
 | 	print "-" * 68 | 
 |     return _sre.compile( | 
 | 	pattern, flags, | 
 | 	data, | 
 | 	p.pattern.groups-1, p.pattern.groupdict | 
 | 	) |