SRE 0.9.8: passes the entire test suite
-- reverted REPEAT operator to use "repeat context" strategy
(from 0.8.X), but done right this time.
-- got rid of backtracking stack; use nested SRE_MATCH calls
instead (should probably put it back again in 0.9.9 ;-)
-- properly reset state in scanner mode
-- don't use aggressive inlining by default
diff --git a/Lib/sre.py b/Lib/sre.py
index 6dd1df9..3e125a7 100644
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -5,8 +5,12 @@
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
+# This version of the SRE library can be redistributed under CNRI's
+# Python 1.6 license. For any other use, please contact Secret Labs
+# AB (info@pythonware.com).
+#
# Portions of this engine have been developed in cooperation with
-# CNRI. Hewlett-Packard provided funding for 2.0 integration and
+# CNRI. Hewlett-Packard provided funding for 1.6 integration and
# other compatibility work.
#
@@ -24,7 +28,7 @@
S = DOTALL = sre_compile.SRE_FLAG_DOTALL
X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
-# sre extensions (may or may not be in 2.0 final)
+# sre extensions (may or may not be in 1.6/2.0 final)
T = TEMPLATE = sre_compile.SRE_FLAG_TEMPLATE
U = UNICODE = sre_compile.SRE_FLAG_UNICODE
@@ -168,15 +172,14 @@
class Scanner:
def __init__(self, lexicon):
- from sre_constants import BRANCH, SUBPATTERN, INDEX
+ from sre_constants import BRANCH, SUBPATTERN
self.lexicon = lexicon
# combine phrases into a compound pattern
p = []
s = sre_parse.Pattern()
for phrase, action in lexicon:
p.append(sre_parse.SubPattern(s, [
- (SUBPATTERN, (None, sre_parse.parse(phrase))),
- (INDEX, len(p))
+ (SUBPATTERN, (len(p), sre_parse.parse(phrase))),
]))
p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
s.groups = len(p)
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index ef26e1c..2d1cbb1 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -5,9 +5,7 @@
#
# Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved.
#
-# Portions of this engine have been developed in cooperation with
-# CNRI. Hewlett-Packard provided funding for 2.0 integration and
-# other compatibility work.
+# See the sre.py file for information on usage and redistribution.
#
import _sre
@@ -124,6 +122,7 @@
emit(CHCODES[CATEGORY_NOT_LINEBREAK])
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
if flags & SRE_FLAG_TEMPLATE:
+ raise error, "internal: unsupported template operator"
emit(OPCODES[REPEAT])
skip = len(code); emit(0)
emit(av[0])
@@ -136,9 +135,8 @@
if lo == 0:
raise error, "nothing to repeat"
if 0 and lo == hi == 1 and op is MAX_REPEAT:
- # FIXME: <fl> need a better way to figure out when
- # it's safe to use this one (in the parser, probably)
- emit(OPCODES[MAX_REPEAT_ONE])
+ # FIXME: <fl> fast and wrong (but we'll fix that)
+ emit(OPCODES[REPEAT_ONE])
skip = len(code); emit(0)
emit(av[0])
emit(av[1])
@@ -146,29 +144,24 @@
emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip
else:
- emit(OPCODES[op])
+ emit(OPCODES[REPEAT])
skip = len(code); emit(0)
emit(av[0])
emit(av[1])
- mark = MAXCODE
- if av[2][0][0] == SUBPATTERN:
- # repeated subpattern
- gid, foo = av[2][0][1]
- if gid:
- mark = (gid-1)*2
- emit(mark)
_compile(code, av[2], flags)
- emit(OPCODES[SUCCESS])
code[skip] = len(code) - skip
+ if op == MAX_REPEAT:
+ emit(OPCODES[MAX_UNTIL])
+ else:
+ emit(OPCODES[MIN_UNTIL])
elif op is SUBPATTERN:
- gid = av[0]
- if gid:
+ if av[0]:
emit(OPCODES[MARK])
- emit((gid-1)*2)
+ emit((av[0]-1)*2)
_compile(code, av[1], flags)
- if gid:
+ if av[0]:
emit(OPCODES[MARK])
- emit((gid-1)*2+1)
+ emit((av[0]-1)*2+1)
elif op in (SUCCESS, FAILURE):
emit(OPCODES[op])
elif op in (ASSERT, ASSERT_NOT):
@@ -197,11 +190,10 @@
else:
emit(ATCODES[av])
elif op is BRANCH:
+ emit(OPCODES[op])
tail = []
for av in av[1]:
- emit(OPCODES[op])
skip = len(code); emit(0)
- emit(MAXCODE) # save mark
_compile(code, av, flags)
emit(OPCODES[JUMP])
tail.append(len(code)); emit(0)
@@ -223,9 +215,6 @@
else:
emit(OPCODES[op])
emit(av-1)
- elif op in (MARK, INDEX):
- emit(OPCODES[op])
- emit(av)
else:
raise ValueError, ("unsupported operand type", op)
@@ -294,16 +283,7 @@
except NameError:
pass
-def compile(p, flags=0):
- # internal: convert pattern list to internal format
-
- # compile, as necessary
- if type(p) in STRING_TYPES:
- import sre_parse
- pattern = p
- p = sre_parse.parse(p, flags)
- else:
- pattern = None
+def _compile1(p, flags):
flags = p.pattern.flags | flags
code = []
@@ -316,6 +296,20 @@
code.append(OPCODES[SUCCESS])
+ return code
+
+def compile(p, flags=0):
+ # internal: convert pattern list to internal format
+
+ if type(p) in STRING_TYPES:
+ import sre_parse
+ pattern = p
+ p = sre_parse.parse(p, flags)
+ else:
+ pattern = None
+
+ code = _compile1(p, flags)
+
# print code
# FIXME: <fl> get rid of this limitation!
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index ef32c32..e595915 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -6,9 +6,7 @@
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
-# Portions of this engine have been developed in cooperation with
-# CNRI. Hewlett-Packard provided funding for 2.0 integration and
-# other compatibility work.
+# See the sre.py file for information on usage and redistribution.
#
# should this really be here?
@@ -33,15 +31,15 @@
GROUPREF_IGNORE = "groupref_ignore"
IN = "in"
IN_IGNORE = "in_ignore"
-INDEX = "index"
INFO = "info"
JUMP = "jump"
LITERAL = "literal"
LITERAL_IGNORE = "literal_ignore"
MARK = "mark"
MAX_REPEAT = "max_repeat"
-MAX_REPEAT_ONE = "max_repeat_one"
+MAX_UNTIL = "max_until"
MIN_REPEAT = "min_repeat"
+MIN_UNTIL = "min_until"
NEGATE = "negate"
NOT_LITERAL = "not_literal"
NOT_LITERAL_IGNORE = "not_literal_ignore"
@@ -91,19 +89,19 @@
CATEGORY,
CHARSET,
GROUPREF, GROUPREF_IGNORE,
- INDEX,
IN, IN_IGNORE,
INFO,
JUMP,
LITERAL, LITERAL_IGNORE,
MARK,
- MAX_REPEAT,
- MAX_REPEAT_ONE,
- MIN_REPEAT,
+ MAX_UNTIL,
+ MIN_UNTIL,
NOT_LITERAL, NOT_LITERAL_IGNORE,
NEGATE,
RANGE,
- REPEAT
+ REPEAT,
+ REPEAT_ONE,
+ SUBPATTERN
]
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 1b56352..299aa0e 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -5,9 +5,7 @@
#
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
-# Portions of this engine have been developed in cooperation with
-# CNRI. Hewlett-Packard provided funding for 2.0 integration and
-# other compatibility work.
+# See the sre.py file for information on usage and redistribution.
#
import string, sys
@@ -536,8 +534,6 @@
group = state.getgroup(name)
p = _parse_sub(source, state)
subpattern.append((SUBPATTERN, (group, p)))
- if group is not None:
- p.append((INDEX, group))
else:
while 1:
char = source.get()