bpo-30215: Make re.compile() locale agnostic. (#1361)

Compiled regular expression objects with the re.LOCALE flag no longer
depend on the locale at compile time.  Only the locale at matching
time affects the result of matching.
diff --git a/Lib/re.py b/Lib/re.py
index 7053edd..d0ee5db 100644
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -268,9 +268,7 @@
 def _compile(pattern, flags):
     # internal: compile pattern
     try:
-        p, loc = _cache[type(pattern), pattern, flags]
-        if loc is None or loc == _locale.setlocale(_locale.LC_CTYPE):
-            return p
+        return _cache[type(pattern), pattern, flags]
     except KeyError:
         pass
     if isinstance(pattern, _pattern_type):
@@ -284,13 +282,7 @@
     if not (flags & DEBUG):
         if len(_cache) >= _MAXCACHE:
             _cache.clear()
-        if p.flags & LOCALE:
-            if not _locale:
-                return p
-            loc = _locale.setlocale(_locale.LC_CTYPE)
-        else:
-            loc = None
-        _cache[type(pattern), pattern, flags] = p, loc
+        _cache[type(pattern), pattern, flags] = p
     return p
 
 @functools.lru_cache(_MAXCACHE)
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
index 2cc3900..d7ee4e8 100644
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -78,7 +78,13 @@
         fixes = None
     for op, av in pattern:
         if op in LITERAL_CODES:
-            if flags & SRE_FLAG_IGNORECASE:
+            if not flags & SRE_FLAG_IGNORECASE:
+                emit(op)
+                emit(av)
+            elif flags & SRE_FLAG_LOCALE:
+                emit(OP_LOC_IGNORE[op])
+                emit(av)
+            else:
                 lo = _sre.getlower(av, flags)
                 if fixes and lo in fixes:
                     emit(IN_IGNORE)
@@ -93,17 +99,17 @@
                 else:
                     emit(OP_IGNORE[op])
                     emit(lo)
-            else:
-                emit(op)
-                emit(av)
         elif op is IN:
-            if flags & SRE_FLAG_IGNORECASE:
-                emit(OP_IGNORE[op])
-                def fixup(literal, flags=flags):
-                    return _sre.getlower(literal, flags)
-            else:
+            if not flags & SRE_FLAG_IGNORECASE:
                 emit(op)
                 fixup = None
+            elif flags & SRE_FLAG_LOCALE:
+                emit(IN_LOC_IGNORE)
+                fixup = None
+            else:
+                emit(IN_IGNORE)
+                def fixup(literal, flags=flags):
+                    return _sre.getlower(literal, flags)
             skip = _len(code); emit(0)
             _compile_charset(av, flags, code, fixup, fixes)
             code[skip] = _len(code) - skip
diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
index fc684ae..b016431 100644
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -13,7 +13,7 @@
 
 # update when constants are added or removed
 
-MAGIC = 20140917
+MAGIC = 20170530
 
 from _sre import MAXREPEAT, MAXGROUPS
 
@@ -87,6 +87,9 @@
     SUBPATTERN
     MIN_REPEAT_ONE
     RANGE_IGNORE
+    LITERAL_LOC_IGNORE
+    NOT_LITERAL_LOC_IGNORE
+    IN_LOC_IGNORE
 
     MIN_REPEAT MAX_REPEAT
 """)
@@ -124,6 +127,11 @@
     RANGE: RANGE_IGNORE,
 }
 
+OP_LOC_IGNORE = {
+    LITERAL: LITERAL_LOC_IGNORE,
+    NOT_LITERAL: NOT_LITERAL_LOC_IGNORE,
+}
+
 AT_MULTILINE = {
     AT_BEGINNING: AT_BEGINNING_LINE,
     AT_END: AT_END_LINE
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index da5c953..7601dc8 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1730,6 +1730,38 @@
         self.assertIsNone(re.match(b'(?Li)\xc5', b'\xe5'))
         self.assertIsNone(re.match(b'(?Li)\xe5', b'\xc5'))
 
+    def test_locale_compiled(self):
+        oldlocale = locale.setlocale(locale.LC_CTYPE)
+        self.addCleanup(locale.setlocale, locale.LC_CTYPE, oldlocale)
+        for loc in 'en_US.iso88591', 'en_US.utf8':
+            try:
+                locale.setlocale(locale.LC_CTYPE, loc)
+            except locale.Error:
+                # Unsupported locale on this system
+                self.skipTest('test needs %s locale' % loc)
+
+        locale.setlocale(locale.LC_CTYPE, 'en_US.iso88591')
+        p1 = re.compile(b'\xc5\xe5', re.L|re.I)
+        p2 = re.compile(b'[a\xc5][a\xe5]', re.L|re.I)
+        p3 = re.compile(b'[az\xc5][az\xe5]', re.L|re.I)
+        p4 = re.compile(b'[^\xc5][^\xe5]', re.L|re.I)
+        for p in p1, p2, p3:
+            self.assertTrue(p.match(b'\xc5\xe5'))
+            self.assertTrue(p.match(b'\xe5\xe5'))
+            self.assertTrue(p.match(b'\xc5\xc5'))
+        self.assertIsNone(p4.match(b'\xe5\xc5'))
+        self.assertIsNone(p4.match(b'\xe5\xe5'))
+        self.assertIsNone(p4.match(b'\xc5\xc5'))
+
+        locale.setlocale(locale.LC_CTYPE, 'en_US.utf8')
+        for p in p1, p2, p3:
+            self.assertTrue(p.match(b'\xc5\xe5'))
+            self.assertIsNone(p.match(b'\xe5\xe5'))
+            self.assertIsNone(p.match(b'\xc5\xc5'))
+        self.assertTrue(p4.match(b'\xe5\xc5'))
+        self.assertIsNone(p4.match(b'\xe5\xe5'))
+        self.assertIsNone(p4.match(b'\xc5\xc5'))
+
     def test_error(self):
         with self.assertRaises(re.error) as cm:
             re.compile('(\u20ac))')