blob: 97a5140e91687c9598dd2bce6395bb16fd1cf4c4 [file] [log] [blame]
Guido van Rossum7627c0d2000-03-31 14:58:54 +00001#
2# Secret Labs' Regular Expression Engine
Guido van Rossum7627c0d2000-03-31 14:58:54 +00003#
4# re-compatible interface for the sre matching engine
5#
6# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
7#
Guido van Rossum7627c0d2000-03-31 14:58:54 +00008# Portions of this engine have been developed in cooperation with
9# CNRI. Hewlett-Packard provided funding for 1.6 integration and
10# other compatibility work.
11#
12
Guido van Rossum7627c0d2000-03-31 14:58:54 +000013import sre_compile
Fredrik Lundh436c3d582000-06-29 08:58:44 +000014import sre_parse
Guido van Rossum7627c0d2000-03-31 14:58:54 +000015
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000016# flags
17I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
18L = LOCALE = sre_compile.SRE_FLAG_LOCALE
19M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
20S = DOTALL = sre_compile.SRE_FLAG_DOTALL
21X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
22
Fredrik Lundhdf02d0b2000-06-30 07:08:20 +000023# sre extensions (may or may not be in 2.0 final)
Fredrik Lundh436c3d582000-06-29 08:58:44 +000024T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
25U = UNICODE = sre_compile.SRE_FLAG_UNICODE
26
27# sre exception
Fredrik Lundhbe2211e2000-06-29 16:57:40 +000028error = sre_compile.error
Fredrik Lundh436c3d582000-06-29 08:58:44 +000029
Guido van Rossum7627c0d2000-03-31 14:58:54 +000030# --------------------------------------------------------------------
31# public interface
32
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000033# FIXME: add docstrings
Guido van Rossum7627c0d2000-03-31 14:58:54 +000034
35def match(pattern, string, flags=0):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000036 return _compile(pattern, flags).match(string)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000037
38def search(pattern, string, flags=0):
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000039 return _compile(pattern, flags).search(string)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000040
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000041def sub(pattern, repl, string, count=0):
42 return _compile(pattern).sub(repl, string, count)
43
44def subn(pattern, repl, string, count=0):
45 return _compile(pattern).subn(repl, string, count)
46
47def split(pattern, string, maxsplit=0):
48 return _compile(pattern).split(string, maxsplit)
49
50def findall(pattern, string, maxsplit=0):
51 return _compile(pattern).findall(string, maxsplit)
52
53def compile(pattern, flags=0):
54 return _compile(pattern, flags)
55
Fredrik Lundh436c3d582000-06-29 08:58:44 +000056def template(pattern, flags=0):
57 return _compile(pattern, flags|T)
58
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000059def escape(pattern):
60 s = list(pattern)
61 for i in range(len(pattern)):
62 c = pattern[i]
63 if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
64 if c == "\000":
65 s[i] = "\\000"
66 else:
67 s[i] = "\\" + c
68 return pattern[:0].join(s)
Guido van Rossum7627c0d2000-03-31 14:58:54 +000069
70# --------------------------------------------------------------------
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000071# internals
Guido van Rossum7627c0d2000-03-31 14:58:54 +000072
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000073_cache = {}
74_MAXCACHE = 100
Guido van Rossum7627c0d2000-03-31 14:58:54 +000075
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000076def _compile(pattern, flags=0):
77 # internal: compile pattern
78 tp = type(pattern)
79 if tp not in (type(""), type(u"")):
80 return pattern
81 key = (tp, pattern, flags)
82 try:
83 return _cache[key]
84 except KeyError:
85 pass
86 p = sre_compile.compile(pattern, flags)
87 if len(_cache) >= _MAXCACHE:
88 _cache.clear()
89 _cache[key] = p
90 return p
91
92def _sub(pattern, template, string, count=0):
93 # internal: pattern.sub implementation hook
94 return _subn(pattern, template, string, count)[0]
95
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000096def _subn(pattern, template, string, count=0):
97 # internal: pattern.subn implementation hook
98 if callable(template):
Andrew M. Kuchlinge8d52af2000-06-18 20:27:10 +000099 filter = template
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000100 else:
Fredrik Lundh90a07912000-06-30 07:50:59 +0000101 template = sre_parse.parse_template(template, pattern)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000102 def filter(match, template=template):
Fredrik Lundh436c3d582000-06-29 08:58:44 +0000103 return sre_parse.expand_template(template, match)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000104 n = i = 0
105 s = []
106 append = s.append
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000107 c = pattern.scanner(string)
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000108 while not count or n < count:
109 m = c.search()
110 if not m:
111 break
Fredrik Lundh90a07912000-06-30 07:50:59 +0000112 b, e = m.span()
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000113 if i < b:
114 append(string[i:b])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000115 append(filter(m))
Fredrik Lundh90a07912000-06-30 07:50:59 +0000116 i = e
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000117 n = n + 1
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000118 append(string[i:])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000119 return string[:0].join(s), n
120
121def _split(pattern, string, maxsplit=0):
122 # internal: pattern.split implementation hook
123 n = i = 0
124 s = []
125 append = s.append
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000126 extend = s.extend
127 c = pattern.scanner(string)
Fredrik Lundh01016fe2000-06-30 00:27:46 +0000128 g = pattern.groups
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000129 while not maxsplit or n < maxsplit:
130 m = c.search()
131 if not m:
132 break
Fredrik Lundh90a07912000-06-30 07:50:59 +0000133 b, e = m.span()
134 if b == e:
135 if i >= len(string):
136 break
137 continue
Fredrik Lundhbe2211e2000-06-29 16:57:40 +0000138 append(string[i:b])
Fredrik Lundh90a07912000-06-30 07:50:59 +0000139 if g and b != e:
140 extend(m.groups())
141 i = e
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000142 n = n + 1
Fredrik Lundh80946112000-06-29 18:03:25 +0000143 append(string[i:])
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000144 return s