blob: 6dea5c40456f23a7dcd0e04eadfda16cad631630 [file] [log] [blame]
Guido van Rossum7627c0d2000-03-31 14:58:54 +00001#
2# Secret Labs' Regular Expression Engine
Guido van Rossum7627c0d2000-03-31 14:58:54 +00003#
4# re-compatible interface for the sre matching engine
5#
6# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
7#
Fredrik Lundh29c4ba92000-08-01 18:20:07 +00008# This version of the SRE library can be redistributed under CNRI's
9# Python 1.6 license. For any other use, please contact Secret Labs
10# AB (info@pythonware.com).
11#
Guido van Rossum7627c0d2000-03-31 14:58:54 +000012# Portions of this engine have been developed in cooperation with
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000013# CNRI. Hewlett-Packard provided funding for 1.6 integration and
Guido van Rossum7627c0d2000-03-31 14:58:54 +000014# other compatibility work.
15#
16
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000017# FIXME: change all FIXME's to XXX ;-)
18
Guido van Rossum7627c0d2000-03-31 14:58:54 +000019import sre_compile
Fredrik Lundh436c3d582000-06-29 08:58:44 +000020import sre_parse
Guido van Rossum7627c0d2000-03-31 14:58:54 +000021
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000022import string
23
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000024# flags
25I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
26L = LOCALE = sre_compile.SRE_FLAG_LOCALE
27M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
28S = DOTALL = sre_compile.SRE_FLAG_DOTALL
29X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
30
Fredrik Lundh29c4ba92000-08-01 18:20:07 +000031# sre extensions (may or may not be in 1.6/2.0 final)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000032T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
33U = UNICODE = sre_compile.SRE_FLAG_UNICODE
34
35# sre exception
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000036error = sre_compile.error
Fredrik Lundh436c3d582000-06-29 08:58:44 +000037
Guido van Rossum7627c0d2000-03-31 14:58:54 +000038# --------------------------------------------------------------------
39# public interface
40
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000041# FIXME: add docstrings
Guido van Rossum7627c0d2000-03-31 14:58:54 +000042
43def match(pattern, string, flags=0):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000044 return _compile(pattern, flags).match(string)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000045
46def search(pattern, string, flags=0):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000047 return _compile(pattern, flags).search(string)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000048
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000049def sub(pattern, repl, string, count=0):
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000050 return _compile(pattern, 0).sub(repl, string, count)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000051
52def subn(pattern, repl, string, count=0):
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000053 return _compile(pattern, 0).subn(repl, string, count)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000054
55def split(pattern, string, maxsplit=0):
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000056 return _compile(pattern, 0).split(string, maxsplit)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000057
58def findall(pattern, string, maxsplit=0):
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000059 return _compile(pattern, 0).findall(string, maxsplit)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000060
61def compile(pattern, flags=0):
62 return _compile(pattern, flags)
63
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000064def purge():
65 _cache.clear()
66
Fredrik Lundh436c3d582000-06-29 08:58:44 +000067def template(pattern, flags=0):
68 return _compile(pattern, flags|T)
69
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000070def escape(pattern):
71 s = list(pattern)
72 for i in range(len(pattern)):
73 c = pattern[i]
74 if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
75 if c == "\000":
76 s[i] = "\\000"
77 else:
78 s[i] = "\\" + c
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000079 return _join(s, pattern)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000080
81# --------------------------------------------------------------------
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000082# internals
Guido van Rossum7627c0d2000-03-31 14:58:54 +000083
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000084_cache = {}
85_MAXCACHE = 100
Guido van Rossum7627c0d2000-03-31 14:58:54 +000086
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +000087def _join(seq, sep):
88 # internal: join into string having the same type as sep
89 return string.join(seq, sep[:0])
90
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000091def _compile(*key):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000092 # internal: compile pattern
Fredrik Lundh7898c3e2000-08-07 20:59:04 +000093 p = _cache.get(key)
94 if p is not None:
95 return p
96 pattern, flags = key
97 if type(pattern) not in sre_compile.STRING_TYPES:
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000098 return pattern
Fredrik Lundhe1869832000-08-01 22:47:49 +000099 try:
100 p = sre_compile.compile(pattern, flags)
101 except error, v:
102 raise error, v # invalid expression
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000103 if len(_cache) >= _MAXCACHE:
104 _cache.clear()
105 _cache[key] = p
106 return p
107
Fredrik Lundh5644b7f2000-09-21 17:03:25 +0000108def _expand(pattern, match, template):
109 # internal: match.expand implementation hook
110 template = sre_parse.parse_template(template, pattern)
111 return sre_parse.expand_template(template, match)
112
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000113def _sub(pattern, template, string, count=0):
114 # internal: pattern.sub implementation hook
115 return _subn(pattern, template, string, count)[0]
116
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000117def _subn(pattern, template, string, count=0):
118 # internal: pattern.subn implementation hook
119 if callable(template):
Andrew M. Kuchlinge8d52af2000-06-18 20:27:10 +0000120 filter = template
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000121 else:
Fredrik Lundh90a07912000-06-30 07:50:59 +0000122 template = sre_parse.parse_template(template, pattern)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000123 def filter(match, template=template):
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000124 return sre_parse.expand_template(template, match)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000125 n = i = 0
126 s = []
127 append = s.append
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000128 c = pattern.scanner(string)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000129 while not count or n < count:
130 m = c.search()
131 if not m:
132 break
Fredrik Lundh90a07912000-06-30 07:50:59 +0000133 b, e = m.span()
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000134 if i < b:
135 append(string[i:b])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000136 append(filter(m))
Fredrik Lundh90a07912000-06-30 07:50:59 +0000137 i = e
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000138 n = n + 1
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000139 append(string[i:])
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000140 return _join(s, string[:0]), n
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000141
142def _split(pattern, string, maxsplit=0):
143 # internal: pattern.split implementation hook
144 n = i = 0
145 s = []
146 append = s.append
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000147 extend = s.extend
148 c = pattern.scanner(string)
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000149 g = pattern.groups
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000150 while not maxsplit or n < maxsplit:
151 m = c.search()
152 if not m:
153 break
Fredrik Lundh90a07912000-06-30 07:50:59 +0000154 b, e = m.span()
155 if b == e:
156 if i >= len(string):
157 break
158 continue
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000159 append(string[i:b])
Fredrik Lundh90a07912000-06-30 07:50:59 +0000160 if g and b != e:
161 extend(m.groups())
162 i = e
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000163 n = n + 1
Fredrik Lundh80946112000-06-29 18:03:25 +0000164 append(string[i:])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000165 return s
Fredrik Lundh0640e112000-06-30 13:55:15 +0000166
167# register myself for pickling
168
169import copy_reg
170
171def _pickle(p):
172 return _compile, (p.pattern, p.flags)
173
Fredrik Lundh7898c3e2000-08-07 20:59:04 +0000174copy_reg.pickle(type(_compile("", 0)), _pickle, _compile)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000175
176# --------------------------------------------------------------------
177# experimental stuff (see python-dev discussions for details)
178
179class Scanner:
180 def __init__(self, lexicon):
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000181 from sre_constants import BRANCH, SUBPATTERN
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000182 self.lexicon = lexicon
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000183 # combine phrases into a compound pattern
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000184 p = []
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000185 s = sre_parse.Pattern()
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000186 for phrase, action in lexicon:
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000187 p.append(sre_parse.SubPattern(s, [
Fredrik Lundh29c4ba92000-08-01 18:20:07 +0000188 (SUBPATTERN, (len(p), sre_parse.parse(phrase))),
Fredrik Lundh8a3ebf82000-07-23 21:46:17 +0000189 ]))
190 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
191 s.groups = len(p)
192 self.scanner = sre_compile.compile(p)
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000193 def scan(self, string):
194 result = []
195 append = result.append
196 match = self.scanner.match
197 i = 0
198 while 1:
199 m = match(string, i)
200 if not m:
201 break
202 j = m.end()
203 if i == j:
204 break
Fredrik Lundh019bcb52000-07-02 22:59:57 +0000205 action = self.lexicon[m.lastindex][1]
Fredrik Lundh7cafe4d2000-07-02 17:33:27 +0000206 if callable(action):
207 self.match = match
208 action = action(self, m.group())
209 if action is not None:
210 append(action)
211 i = j
212 return result, string[i:]