Issue #27030: Unknown escapes consisting of ``'\'`` and ASCII letter in
regular expressions now are errors.
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
index 4ff50d1..521e379 100644
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -282,33 +282,6 @@
def error(self, msg, offset=0):
return error(msg, self.string, self.tell() - offset)
-# The following three functions are not used in this module anymore, but we keep
-# them here (with DeprecationWarnings) for backwards compatibility.
-
-def isident(char):
- import warnings
- warnings.warn('sre_parse.isident() will be removed in 3.5',
- DeprecationWarning, stacklevel=2)
- return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
-
-def isdigit(char):
- import warnings
- warnings.warn('sre_parse.isdigit() will be removed in 3.5',
- DeprecationWarning, stacklevel=2)
- return "0" <= char <= "9"
-
-def isname(name):
- import warnings
- warnings.warn('sre_parse.isname() will be removed in 3.5',
- DeprecationWarning, stacklevel=2)
- # check that group name is a valid string
- if not isident(name[0]):
- return False
- for char in name[1:]:
- if not isident(char) and not isdigit(char):
- return False
- return True
-
def _class_escape(source, escape):
# handle escape code inside character class
code = ESCAPES.get(escape)
@@ -351,9 +324,7 @@
raise ValueError
if len(escape) == 2:
if c in ASCIILETTERS:
- import warnings
- warnings.warn('bad escape %s' % escape,
- DeprecationWarning, stacklevel=8)
+ raise source.error('bad escape %s' % escape, len(escape))
return LITERAL, ord(escape[1])
except ValueError:
pass
@@ -418,9 +389,7 @@
raise source.error("invalid group reference", len(escape))
if len(escape) == 2:
if c in ASCIILETTERS:
- import warnings
- warnings.warn('bad escape %s' % escape,
- DeprecationWarning, stacklevel=8)
+ raise source.error("bad escape %s" % escape, len(escape))
return LITERAL, ord(escape[1])
except ValueError:
pass
@@ -798,10 +767,7 @@
# Check and fix flags according to the type of pattern (str or bytes)
if isinstance(src, str):
if flags & SRE_FLAG_LOCALE:
- import warnings
- warnings.warn("LOCALE flag with a str pattern is deprecated. "
- "Will be an error in 3.6",
- DeprecationWarning, stacklevel=6)
+ raise ValueError("cannot use LOCALE flag with a str pattern")
if not flags & SRE_FLAG_ASCII:
flags |= SRE_FLAG_UNICODE
elif flags & SRE_FLAG_UNICODE:
@@ -810,10 +776,7 @@
if flags & SRE_FLAG_UNICODE:
raise ValueError("cannot use UNICODE flag with a bytes pattern")
if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:
- import warnings
- warnings.warn("ASCII and LOCALE flags are incompatible. "
- "Will be an error in 3.6",
- DeprecationWarning, stacklevel=6)
+ raise ValueError("ASCII and LOCALE flags are incompatible")
return flags
def parse(str, flags=0, pattern=None):
@@ -914,9 +877,7 @@
this = chr(ESCAPES[this][1])
except KeyError:
if c in ASCIILETTERS:
- import warnings
- warnings.warn('bad escape %s' % this,
- DeprecationWarning, stacklevel=4)
+ raise s.error('bad escape %s' % this, len(this))
lappend(this)
else:
lappend(this)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 7a74141..e27591c 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -124,7 +124,7 @@
(chr(9)+chr(10)+chr(11)+chr(13)+chr(12)+chr(7)+chr(8)))
for c in 'cdehijklmopqsuwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ':
with self.subTest(c):
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(re.error):
self.assertEqual(re.sub('a', '\\' + c, 'a'), '\\' + c)
self.assertEqual(re.sub('^\s*', 'X', 'test'), 'Xtest')
@@ -633,14 +633,10 @@
re.purge() # for warnings
for c in 'ceghijklmopqyzCEFGHIJKLMNOPQRTVXY':
with self.subTest(c):
- with self.assertWarns(DeprecationWarning):
- self.assertEqual(re.fullmatch('\\%c' % c, c).group(), c)
- self.assertIsNone(re.match('\\%c' % c, 'a'))
+ self.assertRaises(re.error, re.compile, '\\%c' % c)
for c in 'ceghijklmopqyzABCEFGHIJKLMNOPQRTVXYZ':
with self.subTest(c):
- with self.assertWarns(DeprecationWarning):
- self.assertEqual(re.fullmatch('[\\%c]' % c, c).group(), c)
- self.assertIsNone(re.match('[\\%c]' % c, 'a'))
+ self.assertRaises(re.error, re.compile, '[\\%c]' % c)
def test_string_boundaries(self):
# See http://bugs.python.org/issue10713
@@ -993,10 +989,8 @@
self.assertTrue(re.match((r"\x%02x" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"\x%02x0" % i).encode(), bytes([i])+b"0"))
self.assertTrue(re.match((r"\x%02xz" % i).encode(), bytes([i])+b"z"))
- with self.assertWarns(DeprecationWarning):
- self.assertTrue(re.match(br"\u1234", b'u1234'))
- with self.assertWarns(DeprecationWarning):
- self.assertTrue(re.match(br"\U00012345", b'U00012345'))
+ self.assertRaises(re.error, re.compile, br"\u1234")
+ self.assertRaises(re.error, re.compile, br"\U00012345")
self.assertTrue(re.match(br"\0", b"\000"))
self.assertTrue(re.match(br"\08", b"\0008"))
self.assertTrue(re.match(br"\01", b"\001"))
@@ -1018,10 +1012,8 @@
self.assertTrue(re.match((r"[\x%02x]" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"[\x%02x0]" % i).encode(), bytes([i])))
self.assertTrue(re.match((r"[\x%02xz]" % i).encode(), bytes([i])))
- with self.assertWarns(DeprecationWarning):
- self.assertTrue(re.match(br"[\u1234]", b'u'))
- with self.assertWarns(DeprecationWarning):
- self.assertTrue(re.match(br"[\U00012345]", b'U'))
+ self.assertRaises(re.error, re.compile, br"[\u1234]")
+ self.assertRaises(re.error, re.compile, br"[\U00012345]")
self.checkPatternError(br"[\567]",
r'octal escape value \567 outside of '
r'range 0-0o377', 1)
@@ -1363,12 +1355,12 @@
if bletter:
self.assertIsNone(pat.match(bletter))
# Incompatibilities
- self.assertWarns(DeprecationWarning, re.compile, '', re.LOCALE)
- self.assertWarns(DeprecationWarning, re.compile, '(?L)')
- self.assertWarns(DeprecationWarning, re.compile, b'', re.LOCALE | re.ASCII)
- self.assertWarns(DeprecationWarning, re.compile, b'(?L)', re.ASCII)
- self.assertWarns(DeprecationWarning, re.compile, b'(?a)', re.LOCALE)
- self.assertWarns(DeprecationWarning, re.compile, b'(?aL)')
+ self.assertRaises(ValueError, re.compile, '', re.LOCALE)
+ self.assertRaises(ValueError, re.compile, '(?L)')
+ self.assertRaises(ValueError, re.compile, b'', re.LOCALE | re.ASCII)
+ self.assertRaises(ValueError, re.compile, b'(?L)', re.ASCII)
+ self.assertRaises(ValueError, re.compile, b'(?a)', re.LOCALE)
+ self.assertRaises(ValueError, re.compile, b'(?aL)')
def test_bug_6509(self):
# Replacement strings of both types must parse properly.
@@ -1419,13 +1411,6 @@
# Test behaviour when not given a string or pattern as parameter
self.assertRaises(TypeError, re.compile, 0)
- def test_bug_13899(self):
- # Issue #13899: re pattern r"[\A]" should work like "A" but matches
- # nothing. Ditto B and Z.
- with self.assertWarns(DeprecationWarning):
- self.assertEqual(re.findall(r'[\A\B\b\C\Z]', 'AB\bCZ'),
- ['A', 'B', '\b', 'C', 'Z'])
-
@bigmemtest(size=_2G, memuse=1)
def test_large_search(self, size):
# Issue #10182: indices were 32-bit-truncated.