blob: a51531b4980d1e75f11fb03f5017770557659697 [file] [log] [blame]
Guido van Rossum7627c0d2000-03-31 14:58:54 +00001#
2# Secret Labs' Regular Expression Engine
3# $Id$
4#
5# convert template to internal format
6#
7# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
8#
Guido van Rossum7627c0d2000-03-31 14:58:54 +00009# Portions of this engine have been developed in cooperation with
10# CNRI. Hewlett-Packard provided funding for 1.6 integration and
11# other compatibility work.
12#
13
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000014import array
Guido van Rossum7627c0d2000-03-31 14:58:54 +000015import _sre
16
17from sre_constants import *
18
19# find an array type code that matches the engine's code size
20for WORDSIZE in "BHil":
21 if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
Guido van Rossumb81e70e2000-04-10 17:10:48 +000022 break
Guido van Rossum7627c0d2000-03-31 14:58:54 +000023else:
24 raise RuntimeError, "cannot find a useable array type"
25
Fredrik Lundh436c3d582000-06-29 08:58:44 +000026def _compile(code, pattern, flags):
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000027 emit = code.append
Guido van Rossum7627c0d2000-03-31 14:58:54 +000028 for op, av in pattern:
Guido van Rossumb81e70e2000-04-10 17:10:48 +000029 if op is ANY:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000030 if flags & SRE_FLAG_DOTALL:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000031 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000032 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000033 emit(OPCODES[CATEGORY])
34 emit(CHCODES[CATEGORY_NOT_LINEBREAK])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000035 elif op in (SUCCESS, FAILURE):
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000036 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000037 elif op is AT:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000038 emit(OPCODES[op])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000039 if flags & SRE_FLAG_MULTILINE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000040 emit(ATCODES[AT_MULTILINE[av]])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000041 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000042 emit(ATCODES[av])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000043 elif op is BRANCH:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000044 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000045 tail = []
46 for av in av[1]:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000047 skip = len(code); emit(0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000048 _compile(code, av, flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000049 emit(OPCODES[JUMP])
50 tail.append(len(code)); emit(0)
Guido van Rossumb81e70e2000-04-10 17:10:48 +000051 code[skip] = len(code) - skip
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000052 emit(0) # end of branch
Fredrik Lundh436c3d582000-06-29 08:58:44 +000053 for tail in tail:
Guido van Rossumb81e70e2000-04-10 17:10:48 +000054 code[tail] = len(code) - tail
55 elif op is CALL:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000056 emit(OPCODES[op])
57 skip = len(code); emit(0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000058 _compile(code, av, flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000059 emit(OPCODES[SUCCESS])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000060 code[skip] = len(code) - skip
Fredrik Lundh436c3d582000-06-29 08:58:44 +000061 elif op is CATEGORY:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000062 emit(OPCODES[op])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000063 if flags & SRE_FLAG_LOCALE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000064 emit(CH_LOCALE[CHCODES[av]])
Fredrik Lundh436c3d582000-06-29 08:58:44 +000065 elif flags & SRE_FLAG_UNICODE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000066 emit(CH_UNICODE[CHCODES[av]])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000067 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000068 emit(CHCODES[av])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000069 elif op is GROUP:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000070 if flags & SRE_FLAG_IGNORECASE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000071 emit(OPCODES[OP_IGNORE[op]])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000072 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000073 emit(OPCODES[op])
74 emit(av-1)
Guido van Rossumb81e70e2000-04-10 17:10:48 +000075 elif op is IN:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000076 if flags & SRE_FLAG_IGNORECASE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000077 emit(OPCODES[OP_IGNORE[op]])
Fredrik Lundh436c3d582000-06-29 08:58:44 +000078 def fixup(literal, flags=flags):
79 return _sre.getlower(ord(literal), flags)
Guido van Rossumb81e70e2000-04-10 17:10:48 +000080 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000081 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000082 fixup = ord
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000083 skip = len(code); emit(0)
Guido van Rossumb81e70e2000-04-10 17:10:48 +000084 for op, av in av:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000085 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000086 if op is NEGATE:
87 pass
88 elif op is LITERAL:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000089 emit(fixup(av))
Guido van Rossumb81e70e2000-04-10 17:10:48 +000090 elif op is RANGE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000091 emit(fixup(av[0]))
92 emit(fixup(av[1]))
Guido van Rossumb81e70e2000-04-10 17:10:48 +000093 elif op is CATEGORY:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000094 if flags & SRE_FLAG_LOCALE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000095 emit(CH_LOCALE[CHCODES[av]])
Fredrik Lundh436c3d582000-06-29 08:58:44 +000096 elif flags & SRE_FLAG_UNICODE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000097 emit(CH_UNICODE[CHCODES[av]])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000098 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000099 emit(CHCODES[av])
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000100 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000101 raise error, "internal: unsupported set operator"
102 emit(OPCODES[FAILURE])
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000103 code[skip] = len(code) - skip
104 elif op in (LITERAL, NOT_LITERAL):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000105 if flags & SRE_FLAG_IGNORECASE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000106 emit(OPCODES[OP_IGNORE[op]])
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000107 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000108 emit(OPCODES[op])
109 emit(ord(av))
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000110 elif op is MARK:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000111 emit(OPCODES[op])
112 emit(av)
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000113 elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000114 if flags & SRE_FLAG_TEMPLATE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000115 emit(OPCODES[REPEAT])
116 skip = len(code); emit(0)
117 emit(av[0])
118 emit(av[1])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000119 _compile(code, av[2], flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000120 emit(OPCODES[SUCCESS])
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000121 code[skip] = len(code) - skip
122 else:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000123 lo, hi = av[2].getwidth()
124 if lo == 0:
125 raise error, "nothing to repeat"
126 if 0 and lo == hi == 1 and op is MAX_REPEAT:
127 # FIXME: <fl> need a better way to figure out when
128 # it's safe to use this one (in the parser, probably)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000129 emit(OPCODES[MAX_REPEAT_ONE])
130 skip = len(code); emit(0)
131 emit(av[0])
132 emit(av[1])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000133 _compile(code, av[2], flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000134 emit(OPCODES[SUCCESS])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000135 code[skip] = len(code) - skip
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000136 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000137 emit(OPCODES[op])
138 skip = len(code); emit(0)
139 emit(av[0])
140 emit(av[1])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000141 _compile(code, av[2], flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000142 emit(OPCODES[SUCCESS])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000143 code[skip] = len(code) - skip
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000144 elif op is SUBPATTERN:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000145 group = av[0]
146 if group:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000147 emit(OPCODES[MARK])
148 emit((group-1)*2)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000149 _compile(code, av[1], flags)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000150 if group:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000151 emit(OPCODES[MARK])
152 emit((group-1)*2+1)
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000153 else:
154 raise ValueError, ("unsupported operand type", op)
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000155
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000156def compile(p, flags=0):
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000157 # internal: convert pattern list to internal format
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000158 if type(p) in (type(""), type(u"")):
159 import sre_parse
160 pattern = p
161 p = sre_parse.parse(p)
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000162 else:
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000163 pattern = None
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000164 flags = p.pattern.flags | flags
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000165 code = []
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000166 _compile(code, p.data, flags)
167 code.append(OPCODES[SUCCESS])
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000168 # FIXME: <fl> get rid of this limitation
169 assert p.pattern.groups <= 100,\
170 "sorry, but this version only supports 100 named groups"
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000171 return _sre.compile(
172 pattern, flags,
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000173 array.array(WORDSIZE, code).tostring(),
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000174 p.pattern.groups-1, p.pattern.groupdict
175 )