| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 1 | # | 
 | 2 | # Secret Labs' Regular Expression Engine | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 3 | # | 
 | 4 | # re-compatible interface for the sre matching engine | 
 | 5 | # | 
 | 6 | # Copyright (c) 1998-2000 by Secret Labs AB.  All rights reserved. | 
 | 7 | # | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 8 | # Portions of this engine have been developed in cooperation with | 
 | 9 | # CNRI.  Hewlett-Packard provided funding for 1.6 integration and | 
 | 10 | # other compatibility work. | 
 | 11 | # | 
 | 12 |  | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 13 | import sre_compile | 
| Fredrik Lundh | 436c3d58 | 2000-06-29 08:58:44 +0000 | [diff] [blame] | 14 | import sre_parse | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 15 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 16 | # flags | 
 | 17 | I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE | 
 | 18 | L = LOCALE = sre_compile.SRE_FLAG_LOCALE | 
 | 19 | M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE | 
 | 20 | S = DOTALL = sre_compile.SRE_FLAG_DOTALL | 
 | 21 | X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE | 
 | 22 |  | 
| Fredrik Lundh | df02d0b | 2000-06-30 07:08:20 +0000 | [diff] [blame] | 23 | # sre extensions (may or may not be in 2.0 final) | 
| Fredrik Lundh | 436c3d58 | 2000-06-29 08:58:44 +0000 | [diff] [blame] | 24 | T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE | 
 | 25 | U = UNICODE = sre_compile.SRE_FLAG_UNICODE | 
 | 26 |  | 
 | 27 | # sre exception | 
| Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 28 | error = sre_compile.error | 
| Fredrik Lundh | 436c3d58 | 2000-06-29 08:58:44 +0000 | [diff] [blame] | 29 |  | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 30 | # -------------------------------------------------------------------- | 
 | 31 | # public interface | 
 | 32 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 33 | # FIXME: add docstrings | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 34 |  | 
 | 35 | def match(pattern, string, flags=0): | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 36 |     return _compile(pattern, flags).match(string) | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 37 |  | 
 | 38 | def search(pattern, string, flags=0): | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 39 |     return _compile(pattern, flags).search(string) | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 40 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 41 | def sub(pattern, repl, string, count=0): | 
 | 42 |     return _compile(pattern).sub(repl, string, count) | 
 | 43 |  | 
 | 44 | def subn(pattern, repl, string, count=0): | 
 | 45 |     return _compile(pattern).subn(repl, string, count) | 
 | 46 |  | 
 | 47 | def split(pattern, string, maxsplit=0): | 
 | 48 |     return _compile(pattern).split(string, maxsplit) | 
 | 49 |  | 
 | 50 | def findall(pattern, string, maxsplit=0): | 
 | 51 |     return _compile(pattern).findall(string, maxsplit) | 
 | 52 |  | 
 | 53 | def compile(pattern, flags=0): | 
 | 54 |     return _compile(pattern, flags) | 
 | 55 |  | 
| Fredrik Lundh | 436c3d58 | 2000-06-29 08:58:44 +0000 | [diff] [blame] | 56 | def template(pattern, flags=0): | 
 | 57 |     return _compile(pattern, flags|T) | 
 | 58 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 59 | def escape(pattern): | 
 | 60 |     s = list(pattern) | 
 | 61 |     for i in range(len(pattern)): | 
 | 62 |         c = pattern[i] | 
 | 63 |         if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"): | 
 | 64 |             if c == "\000": | 
 | 65 |                 s[i] = "\\000" | 
 | 66 |             else: | 
 | 67 |                 s[i] = "\\" + c | 
 | 68 |     return pattern[:0].join(s) | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 69 |  | 
 | 70 | # -------------------------------------------------------------------- | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 71 | # internals | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 72 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 73 | _cache = {} | 
 | 74 | _MAXCACHE = 100 | 
| Guido van Rossum | 7627c0d | 2000-03-31 14:58:54 +0000 | [diff] [blame] | 75 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 76 | def _compile(pattern, flags=0): | 
 | 77 |     # internal: compile pattern | 
 | 78 |     tp = type(pattern) | 
 | 79 |     if tp not in (type(""), type(u"")): | 
 | 80 |         return pattern | 
 | 81 |     key = (tp, pattern, flags) | 
 | 82 |     try: | 
 | 83 |         return _cache[key] | 
 | 84 |     except KeyError: | 
 | 85 |         pass | 
 | 86 |     p = sre_compile.compile(pattern, flags) | 
 | 87 |     if len(_cache) >= _MAXCACHE: | 
 | 88 |         _cache.clear() | 
 | 89 |     _cache[key] = p | 
 | 90 |     return p | 
 | 91 |  | 
 | 92 | def _sub(pattern, template, string, count=0): | 
 | 93 |     # internal: pattern.sub implementation hook | 
 | 94 |     return _subn(pattern, template, string, count)[0] | 
 | 95 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 96 | def _subn(pattern, template, string, count=0): | 
 | 97 |     # internal: pattern.subn implementation hook | 
 | 98 |     if callable(template): | 
| Andrew M. Kuchling | e8d52af | 2000-06-18 20:27:10 +0000 | [diff] [blame] | 99 |         filter = template | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 100 |     else: | 
| Fredrik Lundh | 90a0791 | 2000-06-30 07:50:59 +0000 | [diff] [blame^] | 101 |         template = sre_parse.parse_template(template, pattern) | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 102 |         def filter(match, template=template): | 
| Fredrik Lundh | 436c3d58 | 2000-06-29 08:58:44 +0000 | [diff] [blame] | 103 |             return sre_parse.expand_template(template, match) | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 104 |     n = i = 0 | 
 | 105 |     s = [] | 
 | 106 |     append = s.append | 
| Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 107 |     c = pattern.scanner(string) | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 108 |     while not count or n < count: | 
 | 109 |         m = c.search() | 
 | 110 |         if not m: | 
 | 111 |             break | 
| Fredrik Lundh | 90a0791 | 2000-06-30 07:50:59 +0000 | [diff] [blame^] | 112 |         b, e = m.span() | 
| Fredrik Lundh | 01016fe | 2000-06-30 00:27:46 +0000 | [diff] [blame] | 113 |         if i < b: | 
 | 114 |             append(string[i:b]) | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 115 |         append(filter(m)) | 
| Fredrik Lundh | 90a0791 | 2000-06-30 07:50:59 +0000 | [diff] [blame^] | 116 |         i = e | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 117 |         n = n + 1 | 
| Fredrik Lundh | 01016fe | 2000-06-30 00:27:46 +0000 | [diff] [blame] | 118 |     append(string[i:]) | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 119 |     return string[:0].join(s), n | 
 | 120 |  | 
 | 121 | def _split(pattern, string, maxsplit=0): | 
 | 122 |     # internal: pattern.split implementation hook | 
 | 123 |     n = i = 0 | 
 | 124 |     s = [] | 
 | 125 |     append = s.append | 
| Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 126 |     extend = s.extend | 
 | 127 |     c = pattern.scanner(string) | 
| Fredrik Lundh | 01016fe | 2000-06-30 00:27:46 +0000 | [diff] [blame] | 128 |     g = pattern.groups | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 129 |     while not maxsplit or n < maxsplit: | 
 | 130 |         m = c.search() | 
 | 131 |         if not m: | 
 | 132 |             break | 
| Fredrik Lundh | 90a0791 | 2000-06-30 07:50:59 +0000 | [diff] [blame^] | 133 |         b, e = m.span() | 
 | 134 |         if b == e: | 
 | 135 |             if i >= len(string): | 
 | 136 |                 break | 
 | 137 |             continue | 
| Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 138 |         append(string[i:b]) | 
| Fredrik Lundh | 90a0791 | 2000-06-30 07:50:59 +0000 | [diff] [blame^] | 139 |         if g and b != e: | 
 | 140 |             extend(m.groups()) | 
 | 141 |         i = e | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 142 |         n = n + 1 | 
| Fredrik Lundh | 8094611 | 2000-06-29 18:03:25 +0000 | [diff] [blame] | 143 |     append(string[i:]) | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 144 |     return s |