blob: 6dd1df9c2add716ed3ee03909ccfefdf3e26d18b [file] [log] [blame]
Guido van Rossum7627c0d2000-03-31 14:58:54 +00001#
2# Secret Labs' Regular Expression Engine
Guido van Rossum7627c0d2000-03-31 14:58:54 +00003#
4# re-compatible interface for the sre matching engine
5#
6# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
7#
Guido van Rossum7627c0d2000-03-31 14:58:54 +00008# Portions of this engine have been developed in cooperation with
Fredrik Lundh22d25462000-07-01 17:50:59 +00009# CNRI. Hewlett-Packard provided funding for 2.0 integration and
Guido van Rossum7627c0d2000-03-31 14:58:54 +000010# other compatibility work.
11#
12
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000013# FIXME: change all FIXME's to XXX ;-)
14
Guido van Rossum7627c0d2000-03-31 14:58:54 +000015import sre_compile
Fredrik Lundh436c3d582000-06-29 08:58:44 +000016import sre_parse
Guido van Rossum7627c0d2000-03-31 14:58:54 +000017
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000018import string
19
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000020# flags
21I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
22L = LOCALE = sre_compile.SRE_FLAG_LOCALE
23M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
24S = DOTALL = sre_compile.SRE_FLAG_DOTALL
25X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
26
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000027# sre extensions (may or may not be in 2.0 final)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000028T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
29U = UNICODE = sre_compile.SRE_FLAG_UNICODE
30
31# sre exception
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000032error = sre_compile.error
Fredrik Lundh436c3d582000-06-29 08:58:44 +000033
Guido van Rossum7627c0d2000-03-31 14:58:54 +000034# --------------------------------------------------------------------
35# public interface
36
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000037# FIXME: add docstrings
Guido van Rossum7627c0d2000-03-31 14:58:54 +000038
39def match(pattern, string, flags=0):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000040 return _compile(pattern, flags).match(string)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000041
42def search(pattern, string, flags=0):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000043 return _compile(pattern, flags).search(string)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000044
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000045def sub(pattern, repl, string, count=0):
46 return _compile(pattern).sub(repl, string, count)
47
48def subn(pattern, repl, string, count=0):
49 return _compile(pattern).subn(repl, string, count)
50
51def split(pattern, string, maxsplit=0):
52 return _compile(pattern).split(string, maxsplit)
53
54def findall(pattern, string, maxsplit=0):
55 return _compile(pattern).findall(string, maxsplit)
56
57def compile(pattern, flags=0):
58 return _compile(pattern, flags)
59
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000060def purge():
61 _cache.clear()
62
Fredrik Lundh436c3d582000-06-29 08:58:44 +000063def template(pattern, flags=0):
64 return _compile(pattern, flags|T)
65
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000066def escape(pattern):
67 s = list(pattern)
68 for i in range(len(pattern)):
69 c = pattern[i]
70 if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
71 if c == "\000":
72 s[i] = "\\000"
73 else:
74 s[i] = "\\" + c
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000075 return _join(s, pattern)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000076
77# --------------------------------------------------------------------
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000078# internals
Guido van Rossum7627c0d2000-03-31 14:58:54 +000079
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000080_cache = {}
81_MAXCACHE = 100
Guido van Rossum7627c0d2000-03-31 14:58:54 +000082
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000083def _join(seq, sep):
84 # internal: join into string having the same type as sep
85 return string.join(seq, sep[:0])
86
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000087def _compile(pattern, flags=0):
88 # internal: compile pattern
89 tp = type(pattern)
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000090 if tp not in sre_compile.STRING_TYPES:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000091 return pattern
92 key = (tp, pattern, flags)
93 try:
94 return _cache[key]
95 except KeyError:
96 pass
97 p = sre_compile.compile(pattern, flags)
98 if len(_cache) >= _MAXCACHE:
99 _cache.clear()
100 _cache[key] = p
101 return p
102
103def _sub(pattern, template, string, count=0):
104 # internal: pattern.sub implementation hook
105 return _subn(pattern, template, string, count)[0]
106
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000107def _subn(pattern, template, string, count=0):
108 # internal: pattern.subn implementation hook
109 if callable(template):
Andrew M. Kuchlinge8d52af2000-06-18 20:27:10 +0000110 filter = template
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000111 else:
Fredrik Lundh90a07912000-06-30 07:50:59 +0000112 template = sre_parse.parse_template(template, pattern)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000113 def filter(match, template=template):
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000114 return sre_parse.expand_template(template, match)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000115 n = i = 0
116 s = []
117 append = s.append
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000118 c = pattern.scanner(string)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000119 while not count or n < count:
120 m = c.search()
121 if not m:
122 break
Fredrik Lundh90a07912000-06-30 07:50:59 +0000123 b, e = m.span()
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000124 if i < b:
125 append(string[i:b])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000126 append(filter(m))
Fredrik Lundh90a07912000-06-30 07:50:59 +0000127 i = e
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000128 n = n + 1
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000129 append(string[i:])
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000130 return _join(s, string[:0]), n
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000131
132def _split(pattern, string, maxsplit=0):
133 # internal: pattern.split implementation hook
134 n = i = 0
135 s = []
136 append = s.append
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000137 extend = s.extend
138 c = pattern.scanner(string)
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000139 g = pattern.groups
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000140 while not maxsplit or n < maxsplit:
141 m = c.search()
142 if not m:
143 break
Fredrik Lundh90a07912000-06-30 07:50:59 +0000144 b, e = m.span()
145 if b == e:
146 if i >= len(string):
147 break
148 continue
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000149 append(string[i:b])
Fredrik Lundh90a07912000-06-30 07:50:59 +0000150 if g and b != e:
151 extend(m.groups())
152 i = e
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000153 n = n + 1
Fredrik Lundh80946112000-06-29 18:03:25 +0000154 append(string[i:])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000155 return s
Fredrik Lundh0640e112000-06-30 13:55:15 +0000156
157# register myself for pickling
158
159import copy_reg
160
161def _pickle(p):
162 return _compile, (p.pattern, p.flags)
163
164copy_reg.pickle(type(_compile("")), _pickle, _compile)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000165
166# --------------------------------------------------------------------
167# experimental stuff (see python-dev discussions for details)
168
169class Scanner:
170 def __init__(self, lexicon):
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000171 from sre_constants import BRANCH, SUBPATTERN, INDEX
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000172 self.lexicon = lexicon
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000173 # combine phrases into a compound pattern
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000174 p = []
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000175 s = sre_parse.Pattern()
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000176 for phrase, action in lexicon:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000177 p.append(sre_parse.SubPattern(s, [
178 (SUBPATTERN, (None, sre_parse.parse(phrase))),
179 (INDEX, len(p))
180 ]))
181 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
182 s.groups = len(p)
183 self.scanner = sre_compile.compile(p)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000184 def scan(self, string):
185 result = []
186 append = result.append
187 match = self.scanner.match
188 i = 0
189 while 1:
190 m = match(string, i)
191 if not m:
192 break
193 j = m.end()
194 if i == j:
195 break
Fredrik Lundh019bcb52000-07-02 22:59:57 +0000196 action = self.lexicon[m.lastindex][1]
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000197 if callable(action):
198 self.match = match
199 action = action(self, m.group())
200 if action is not None:
201 append(action)
202 i = j
203 return result, string[i:]