bpo-30215: Make re.compile() locale agnostic. (#1361)
Compiled regular expression objects with the re.LOCALE flag no longer
depend on the locale at compile time. Only the locale at matching
time affects the result of matching.
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index da5c953..7601dc8 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1730,6 +1730,38 @@
self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
+ def test_locale_compiled(self):
+ oldlocale = locale.setlocale(locale.LC_CTYPE)
+ self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
+ for loc in 'en_US.iso88591', 'en_US.utf8':
+ try:
+ locale.setlocale(locale.LC_CTYPE, loc)
+ except locale.Error:
+ # Unsupported locale on this system
+ self.skipTest('test needs %s locale' % loc)
+
+ locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
+ p1 = re.compile(b'\xc5\xe5', re.L|re.I)
+ p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I)
+ p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I)
+ p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I)
+ for p in p1, p2, p3:
+ self.assertTrue(p.match(b'\xc5\xe5'))
+ self.assertTrue(p.match(b'\xe5\xe5'))
+ self.assertTrue(p.match(b'\xc5\xc5'))
+ self.assertIsNone(p4.match(b'\xe5\xc5'))
+ self.assertIsNone(p4.match(b'\xe5\xe5'))
+ self.assertIsNone(p4.match(b'\xc5\xc5'))
+
+ locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
+ for p in p1, p2, p3:
+ self.assertTrue(p.match(b'\xc5\xe5'))
+ self.assertIsNone(p.match(b'\xe5\xe5'))
+ self.assertIsNone(p.match(b'\xc5\xc5'))
+ self.assertTrue(p4.match(b'\xe5\xc5'))
+ self.assertIsNone(p4.match(b'\xe5\xe5'))
+ self.assertIsNone(p4.match(b'\xc5\xc5'))
+
def test_error(self):
with self.assertRaises(re.error) as cm:
re.compile('(\u20ac))')