blob: c0423750e3b9073e58c072f5ac262772484a2c4b [file] [log] [blame]
Guido van Rossum7627c0d2000-03-31 14:58:54 +00001#
2# Secret Labs' Regular Expression Engine
Guido van Rossum7627c0d2000-03-31 14:58:54 +00003#
4# convert template to internal format
5#
6# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
7#
Guido van Rossum7627c0d2000-03-31 14:58:54 +00008# Portions of this engine have been developed in cooperation with
9# CNRI. Hewlett-Packard provided funding for 1.6 integration and
10# other compatibility work.
11#
12
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000013import array
Guido van Rossum7627c0d2000-03-31 14:58:54 +000014import _sre
15
16from sre_constants import *
17
18# find an array type code that matches the engine's code size
19for WORDSIZE in "BHil":
20 if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
Guido van Rossumb81e70e2000-04-10 17:10:48 +000021 break
Guido van Rossum7627c0d2000-03-31 14:58:54 +000022else:
23 raise RuntimeError, "cannot find a useable array type"
24
Fredrik Lundh436c3d582000-06-29 08:58:44 +000025def _compile(code, pattern, flags):
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000026 emit = code.append
Guido van Rossum7627c0d2000-03-31 14:58:54 +000027 for op, av in pattern:
Guido van Rossumb81e70e2000-04-10 17:10:48 +000028 if op is ANY:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000029 if flags & SRE_FLAG_DOTALL:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000030 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000031 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000032 emit(OPCODES[CATEGORY])
33 emit(CHCODES[CATEGORY_NOT_LINEBREAK])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000034 elif op in (SUCCESS, FAILURE):
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000035 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000036 elif op is AT:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000037 emit(OPCODES[op])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000038 if flags & SRE_FLAG_MULTILINE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000039 emit(ATCODES[AT_MULTILINE[av]])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000040 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000041 emit(ATCODES[av])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000042 elif op is BRANCH:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000043 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000044 tail = []
45 for av in av[1]:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000046 skip = len(code); emit(0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000047 _compile(code, av, flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000048 emit(OPCODES[JUMP])
49 tail.append(len(code)); emit(0)
Guido van Rossumb81e70e2000-04-10 17:10:48 +000050 code[skip] = len(code) - skip
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000051 emit(0) # end of branch
Fredrik Lundh436c3d582000-06-29 08:58:44 +000052 for tail in tail:
Guido van Rossumb81e70e2000-04-10 17:10:48 +000053 code[tail] = len(code) - tail
54 elif op is CALL:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000055 emit(OPCODES[op])
56 skip = len(code); emit(0)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000057 _compile(code, av, flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000058 emit(OPCODES[SUCCESS])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000059 code[skip] = len(code) - skip
Fredrik Lundh436c3d582000-06-29 08:58:44 +000060 elif op is CATEGORY:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000061 emit(OPCODES[op])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000062 if flags & SRE_FLAG_LOCALE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000063 emit(CH_LOCALE[CHCODES[av]])
Fredrik Lundh436c3d582000-06-29 08:58:44 +000064 elif flags & SRE_FLAG_UNICODE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000065 emit(CH_UNICODE[CHCODES[av]])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000066 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000067 emit(CHCODES[av])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000068 elif op is GROUP:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000069 if flags & SRE_FLAG_IGNORECASE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000070 emit(OPCODES[OP_IGNORE[op]])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000071 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000072 emit(OPCODES[op])
73 emit(av-1)
Guido van Rossumb81e70e2000-04-10 17:10:48 +000074 elif op is IN:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000075 if flags & SRE_FLAG_IGNORECASE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000076 emit(OPCODES[OP_IGNORE[op]])
Fredrik Lundh436c3d582000-06-29 08:58:44 +000077 def fixup(literal, flags=flags):
78 return _sre.getlower(ord(literal), flags)
Guido van Rossumb81e70e2000-04-10 17:10:48 +000079 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000080 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000081 fixup = ord
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000082 skip = len(code); emit(0)
Guido van Rossumb81e70e2000-04-10 17:10:48 +000083 for op, av in av:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000084 emit(OPCODES[op])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000085 if op is NEGATE:
86 pass
87 elif op is LITERAL:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000088 emit(fixup(av))
Guido van Rossumb81e70e2000-04-10 17:10:48 +000089 elif op is RANGE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000090 emit(fixup(av[0]))
91 emit(fixup(av[1]))
Guido van Rossumb81e70e2000-04-10 17:10:48 +000092 elif op is CATEGORY:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000093 if flags & SRE_FLAG_LOCALE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000094 emit(CH_LOCALE[CHCODES[av]])
Fredrik Lundh436c3d582000-06-29 08:58:44 +000095 elif flags & SRE_FLAG_UNICODE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000096 emit(CH_UNICODE[CHCODES[av]])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000097 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000098 emit(CHCODES[av])
Guido van Rossumb81e70e2000-04-10 17:10:48 +000099 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000100 raise error, "internal: unsupported set operator"
101 emit(OPCODES[FAILURE])
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000102 code[skip] = len(code) - skip
103 elif op in (LITERAL, NOT_LITERAL):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000104 if flags & SRE_FLAG_IGNORECASE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000105 emit(OPCODES[OP_IGNORE[op]])
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000106 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000107 emit(OPCODES[op])
108 emit(ord(av))
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000109 elif op is MARK:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000110 emit(OPCODES[op])
111 emit(av)
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000112 elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000113 if flags & SRE_FLAG_TEMPLATE:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000114 emit(OPCODES[REPEAT])
115 skip = len(code); emit(0)
116 emit(av[0])
117 emit(av[1])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000118 _compile(code, av[2], flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000119 emit(OPCODES[SUCCESS])
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000120 code[skip] = len(code) - skip
121 else:
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000122 lo, hi = av[2].getwidth()
123 if lo == 0:
124 raise error, "nothing to repeat"
125 if 0 and lo == hi == 1 and op is MAX_REPEAT:
126 # FIXME: <fl> need a better way to figure out when
127 # it's safe to use this one (in the parser, probably)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000128 emit(OPCODES[MAX_REPEAT_ONE])
129 skip = len(code); emit(0)
130 emit(av[0])
131 emit(av[1])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000132 _compile(code, av[2], flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000133 emit(OPCODES[SUCCESS])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000134 code[skip] = len(code) - skip
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000135 else:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000136 emit(OPCODES[op])
137 skip = len(code); emit(0)
138 emit(av[0])
139 emit(av[1])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000140 _compile(code, av[2], flags)
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000141 emit(OPCODES[SUCCESS])
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000142 code[skip] = len(code) - skip
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000143 elif op is SUBPATTERN:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000144 group = av[0]
145 if group:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000146 emit(OPCODES[MARK])
147 emit((group-1)*2)
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000148 _compile(code, av[1], flags)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000149 if group:
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000150 emit(OPCODES[MARK])
151 emit((group-1)*2+1)
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000152 else:
153 raise ValueError, ("unsupported operand type", op)
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000154
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000155def compile(p, flags=0):
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000156 # internal: convert pattern list to internal format
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000157 if type(p) in (type(""), type(u"")):
158 import sre_parse
159 pattern = p
160 p = sre_parse.parse(p)
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000161 else:
Guido van Rossumb81e70e2000-04-10 17:10:48 +0000162 pattern = None
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163 flags = p.pattern.flags | flags
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000164 code = []
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165 _compile(code, p.data, flags)
166 code.append(OPCODES[SUCCESS])
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000167 # FIXME: <fl> get rid of this limitation
168 assert p.pattern.groups <= 100,\
169 "sorry, but this version only supports 100 named groups"
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000170 return _sre.compile(
171 pattern, flags,
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000172 array.array(WORDSIZE, code).tostring(),
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000173 p.pattern.groups-1, p.pattern.groupdict
174 )