bpo-29995: re.escape() now escapes only special characters. (#1007)
diff --git a/Lib/idlelib/idle_test/test_replace.py b/Lib/idlelib/idle_test/test_replace.py
index 9913ed2..2ecbd34 100644
--- a/Lib/idlelib/idle_test/test_replace.py
+++ b/Lib/idlelib/idle_test/test_replace.py
@@ -221,8 +221,8 @@
self.assertIn('Invalid Replace Expression', showerror.message)
# test access method
- self.engine.setcookedpat("\'")
- equal(pv.get(), "\\'")
+ self.engine.setcookedpat("?")
+ equal(pv.get(), "\\?")
def test_replace_backwards(self):
equal = self.assertEqual
diff --git a/Lib/re.py b/Lib/re.py
index d321cff..7053edd 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -241,39 +241,21 @@
"Compile a template pattern, returning a pattern object"
return _compile(pattern, flags|T)
-_alphanum_str = frozenset(
- "_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
-_alphanum_bytes = frozenset(
- b"_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ01234567890")
+# SPECIAL_CHARS
+# closing ')', '}' and ']'
+# '-' (a range in character set)
+# '#' (comment) and WHITESPACE (ignored) in verbose mode
+_special_chars_map = {i: '\\' + chr(i) for i in b'()[]{}?*+-|^$\\.# \t\n\r\v\f'}
def escape(pattern):
"""
- Escape all the characters in pattern except ASCII letters, numbers and '_'.
+ Escape special characters in a string.
"""
if isinstance(pattern, str):
- alphanum = _alphanum_str
- s = list(pattern)
- for i, c in enumerate(pattern):
- if c not in alphanum:
- if c == "\000":
- s[i] = "\\000"
- else:
- s[i] = "\\" + c
- return "".join(s)
+ return pattern.translate(_special_chars_map)
else:
- alphanum = _alphanum_bytes
- s = []
- esc = ord(b"\\")
- for c in pattern:
- if c in alphanum:
- s.append(c)
- else:
- if c == 0:
- s.extend(b"\\000")
- else:
- s.append(esc)
- s.append(c)
- return bytes(s)
+ pattern = str(pattern, 'latin1')
+ return pattern.translate(_special_chars_map).encode('latin1')
# --------------------------------------------------------------------
# internals
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index a1fddfb..b3b29f8 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -904,7 +904,7 @@
self.assertEqual(re.search(r"a\s", "a ").group(0), "a ")
def assertMatch(self, pattern, text, match=None, span=None,
- matcher=re.match):
+ matcher=re.fullmatch):
if match is None and span is None:
# the pattern matches the whole text
match = text
@@ -917,37 +917,38 @@
self.assertEqual(m.group(), match)
self.assertEqual(m.span(), span)
+ LITERAL_CHARS = string.ascii_letters + string.digits + '!"%&\',/:;<=>@_`~'
+
def test_re_escape(self):
- alnum_chars = string.ascii_letters + string.digits + '_'
p = ''.join(chr(i) for i in range(256))
for c in p:
- if c in alnum_chars:
- self.assertEqual(re.escape(c), c)
- elif c == '\x00':
- self.assertEqual(re.escape(c), '\\000')
- else:
- self.assertEqual(re.escape(c), '\\' + c)
self.assertMatch(re.escape(c), c)
+ self.assertMatch('[' + re.escape(c) + ']', c)
+ self.assertMatch('(?x)' + re.escape(c), c)
self.assertMatch(re.escape(p), p)
+ for c in '-.]{}':
+ self.assertEqual(re.escape(c)[:1], '\\')
+ literal_chars = self.LITERAL_CHARS
+ self.assertEqual(re.escape(literal_chars), literal_chars)
- def test_re_escape_byte(self):
- alnum_chars = (string.ascii_letters + string.digits + '_').encode('ascii')
+ def test_re_escape_bytes(self):
p = bytes(range(256))
for i in p:
b = bytes([i])
- if b in alnum_chars:
- self.assertEqual(re.escape(b), b)
- elif i == 0:
- self.assertEqual(re.escape(b), b'\\000')
- else:
- self.assertEqual(re.escape(b), b'\\' + b)
self.assertMatch(re.escape(b), b)
+ self.assertMatch(b'[' + re.escape(b) + b']', b)
+ self.assertMatch(b'(?x)' + re.escape(b), b)
self.assertMatch(re.escape(p), p)
+ for i in b'-.]{}':
+ b = bytes([i])
+ self.assertEqual(re.escape(b)[:1], b'\\')
+ literal_chars = self.LITERAL_CHARS.encode('ascii')
+ self.assertEqual(re.escape(literal_chars), literal_chars)
def test_re_escape_non_ascii(self):
s = 'xxx\u2620\u2620\u2620xxx'
s_escaped = re.escape(s)
- self.assertEqual(s_escaped, 'xxx\\\u2620\\\u2620\\\u2620xxx')
+ self.assertEqual(s_escaped, s)
self.assertMatch(s_escaped, s)
self.assertMatch('.%s+.' % re.escape('\u2620'), s,
'x\u2620\u2620\u2620x', (2, 7), re.search)
@@ -955,7 +956,7 @@
def test_re_escape_non_ascii_bytes(self):
b = 'y\u2620y\u2620y'.encode('utf-8')
b_escaped = re.escape(b)
- self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
+ self.assertEqual(b_escaped, b)
self.assertMatch(b_escaped, b)
res = re.findall(re.escape('\u2620'.encode('utf-8')), b)
self.assertEqual(len(res), 2)