bpo-31690: Allow the inline flags "a", "L", and "u" to be used as group flags for RE. (#3885)

diff --git a/Modules/_sre.c b/Modules/_sre.c
index c42ab26..a9b6b50 100644
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -97,12 +97,12 @@
 #define SRE_IS_WORD(ch)\
     ((ch) < 128 && (Py_ISALNUM(ch) || (ch) == '_'))
 
-static unsigned int sre_lower(unsigned int ch)
+static unsigned int sre_lower_ascii(unsigned int ch)
 {
     return ((ch) < 128 ? Py_TOLOWER(ch) : ch);
 }
 
-static unsigned int sre_upper(unsigned int ch)
+static unsigned int sre_upper_ascii(unsigned int ch)
 {
     return ((ch) < 128 ? Py_TOUPPER(ch) : ch);
 }
@@ -188,6 +188,15 @@
     return 0;
 }
 
+LOCAL(int)
+char_loc_ignore(SRE_CODE pattern, SRE_CODE ch)
+{
+    return ch == pattern
+        || (SRE_CODE) sre_lower_locale(ch) == pattern
+        || (SRE_CODE) sre_upper_locale(ch) == pattern;
+}
+
+
 /* helpers */
 
 static void
@@ -286,7 +295,7 @@
 /*[clinic end generated code: output=4f454b630fbd19a2 input=9f0bd952812c7ed3]*/
 {
     unsigned int ch = (unsigned int)character;
-    return ch != sre_lower(ch) || ch != sre_upper(ch);
+    return ch != sre_lower_ascii(ch) || ch != sre_upper_ascii(ch);
 }
 
 /*[clinic input]
@@ -317,7 +326,7 @@
 _sre_ascii_tolower_impl(PyObject *module, int character)
 /*[clinic end generated code: output=228294ed6ff2a612 input=272c609b5b61f136]*/
 {
-    return sre_lower(character);
+    return sre_lower_ascii(character);
 }
 
 /*[clinic input]
@@ -448,19 +457,6 @@
     state->pos = start;
     state->endpos = end;
 
-    if (pattern->flags & SRE_FLAG_LOCALE) {
-        state->lower = sre_lower_locale;
-        state->upper = sre_upper_locale;
-    }
-    else if (pattern->flags & SRE_FLAG_UNICODE) {
-        state->lower = sre_lower_unicode;
-        state->upper = sre_upper_unicode;
-    }
-    else {
-        state->lower = sre_lower;
-        state->upper = sre_upper;
-    }
-
     return string;
   err:
     PyMem_Del(state->mark);
@@ -1533,7 +1529,7 @@
             break;
 
         case SRE_OP_RANGE:
-        case SRE_OP_RANGE_IGNORE:
+        case SRE_OP_RANGE_UNI_IGNORE:
             GET_ARG;
             GET_ARG;
             break;
@@ -1630,6 +1626,8 @@
         case SRE_OP_NOT_LITERAL:
         case SRE_OP_LITERAL_IGNORE:
         case SRE_OP_NOT_LITERAL_IGNORE:
+        case SRE_OP_LITERAL_UNI_IGNORE:
+        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
         case SRE_OP_LITERAL_LOC_IGNORE:
         case SRE_OP_NOT_LITERAL_LOC_IGNORE:
             GET_ARG;
@@ -1669,6 +1667,7 @@
 
         case SRE_OP_IN:
         case SRE_OP_IN_IGNORE:
+        case SRE_OP_IN_UNI_IGNORE:
         case SRE_OP_IN_LOC_IGNORE:
             GET_SKIP;
             /* Stop 1 before the end; we check the FAILURE below */
@@ -1805,6 +1804,8 @@
 
         case SRE_OP_GROUPREF:
         case SRE_OP_GROUPREF_IGNORE:
+        case SRE_OP_GROUPREF_UNI_IGNORE:
+        case SRE_OP_GROUPREF_LOC_IGNORE:
             GET_ARG;
             if (arg >= (size_t)groups)
                 FAIL;
diff --git a/Modules/sre.h b/Modules/sre.h
index 9af5e40..585d284 100644
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -52,8 +52,6 @@
     Py_ssize_t mark[1];
 } MatchObject;
 
-typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch);
-
 typedef struct SRE_REPEAT_T {
     Py_ssize_t count;
     SRE_CODE* pattern; /* points to REPEAT operator arguments */
@@ -83,8 +81,6 @@
     Py_buffer buffer;
     /* current repeat context */
     SRE_REPEAT *repeat;
-    /* hooks */
-    SRE_TOLOWER_HOOK lower, upper;
 } SRE_STATE;
 
 typedef struct {
diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h
index 6d6d21e..c8ccb32 100644
--- a/Modules/sre_constants.h
+++ b/Modules/sre_constants.h
@@ -11,7 +11,7 @@
  * See the _sre.c file for information on usage and redistribution.
  */
 
-#define SRE_MAGIC 20170530
+#define SRE_MAGIC 20171005
 #define SRE_OP_FAILURE 0
 #define SRE_OP_SUCCESS 1
 #define SRE_OP_ANY 2
@@ -26,28 +26,33 @@
 #define SRE_OP_BIGCHARSET 11
 #define SRE_OP_GROUPREF 12
 #define SRE_OP_GROUPREF_EXISTS 13
-#define SRE_OP_GROUPREF_IGNORE 14
-#define SRE_OP_IN 15
-#define SRE_OP_IN_IGNORE 16
-#define SRE_OP_INFO 17
-#define SRE_OP_JUMP 18
-#define SRE_OP_LITERAL 19
-#define SRE_OP_LITERAL_IGNORE 20
-#define SRE_OP_MARK 21
-#define SRE_OP_MAX_UNTIL 22
-#define SRE_OP_MIN_UNTIL 23
-#define SRE_OP_NOT_LITERAL 24
-#define SRE_OP_NOT_LITERAL_IGNORE 25
-#define SRE_OP_NEGATE 26
-#define SRE_OP_RANGE 27
-#define SRE_OP_REPEAT 28
-#define SRE_OP_REPEAT_ONE 29
-#define SRE_OP_SUBPATTERN 30
-#define SRE_OP_MIN_REPEAT_ONE 31
-#define SRE_OP_RANGE_IGNORE 32
-#define SRE_OP_LITERAL_LOC_IGNORE 33
-#define SRE_OP_NOT_LITERAL_LOC_IGNORE 34
-#define SRE_OP_IN_LOC_IGNORE 35
+#define SRE_OP_IN 14
+#define SRE_OP_INFO 15
+#define SRE_OP_JUMP 16
+#define SRE_OP_LITERAL 17
+#define SRE_OP_MARK 18
+#define SRE_OP_MAX_UNTIL 19
+#define SRE_OP_MIN_UNTIL 20
+#define SRE_OP_NOT_LITERAL 21
+#define SRE_OP_NEGATE 22
+#define SRE_OP_RANGE 23
+#define SRE_OP_REPEAT 24
+#define SRE_OP_REPEAT_ONE 25
+#define SRE_OP_SUBPATTERN 26
+#define SRE_OP_MIN_REPEAT_ONE 27
+#define SRE_OP_GROUPREF_IGNORE 28
+#define SRE_OP_IN_IGNORE 29
+#define SRE_OP_LITERAL_IGNORE 30
+#define SRE_OP_NOT_LITERAL_IGNORE 31
+#define SRE_OP_GROUPREF_LOC_IGNORE 32
+#define SRE_OP_IN_LOC_IGNORE 33
+#define SRE_OP_LITERAL_LOC_IGNORE 34
+#define SRE_OP_NOT_LITERAL_LOC_IGNORE 35
+#define SRE_OP_GROUPREF_UNI_IGNORE 36
+#define SRE_OP_IN_UNI_IGNORE 37
+#define SRE_OP_LITERAL_UNI_IGNORE 38
+#define SRE_OP_NOT_LITERAL_UNI_IGNORE 39
+#define SRE_OP_RANGE_UNI_IGNORE 40
 #define SRE_AT_BEGINNING 0
 #define SRE_AT_BEGINNING_LINE 1
 #define SRE_AT_BEGINNING_STRING 2
diff --git a/Modules/sre_lib.h b/Modules/sre_lib.h
index b540d21..e13b90e 100644
--- a/Modules/sre_lib.h
+++ b/Modules/sre_lib.h
@@ -101,14 +101,6 @@
 }
 
 LOCAL(int)
-SRE(char_loc_ignore)(SRE_STATE* state, SRE_CODE pattern, SRE_CODE ch)
-{
-    return ch == pattern
-        || (SRE_CODE) state->lower(ch) == pattern
-        || (SRE_CODE) state->upper(ch) == pattern;
-}
-
-LOCAL(int)
 SRE(charset)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
 {
     /* check if character is a member of the given set */
@@ -150,14 +142,14 @@
             set += 2;
             break;
 
-        case SRE_OP_RANGE_IGNORE:
-            /* <RANGE_IGNORE> <lower> <upper> */
+        case SRE_OP_RANGE_UNI_IGNORE:
+            /* <RANGE_UNI_IGNORE> <lower> <upper> */
         {
             SRE_CODE uch;
             /* ch is already lower cased */
             if (set[0] <= ch && ch <= set[1])
                 return ok;
-            uch = state->upper(ch);
+            uch = sre_upper_unicode(ch);
             if (set[0] <= uch && uch <= set[1])
                 return ok;
             set += 2;
@@ -199,11 +191,11 @@
 SRE(charset_loc_ignore)(SRE_STATE* state, SRE_CODE* set, SRE_CODE ch)
 {
     SRE_CODE lo, up;
-    lo = state->lower(ch);
+    lo = sre_lower_locale(ch);
     if (SRE(charset)(state, set, lo))
        return 1;
 
-    up = state->upper(ch);
+    up = sre_upper_locale(ch);
     return up != lo && SRE(charset)(state, set, up);
 }
 
@@ -263,7 +255,15 @@
         /* repeated literal */
         chr = pattern[1];
         TRACE(("|%p|%p|COUNT LITERAL_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) state->lower(*ptr) == chr)
+        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) == chr)
+            ptr++;
+        break;
+
+    case SRE_OP_LITERAL_UNI_IGNORE:
+        /* repeated literal */
+        chr = pattern[1];
+        TRACE(("|%p|%p|COUNT LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
+        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) == chr)
             ptr++;
         break;
 
@@ -271,7 +271,7 @@
         /* repeated literal */
         chr = pattern[1];
         TRACE(("|%p|%p|COUNT LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && SRE(char_loc_ignore)(state, chr, *ptr))
+        while (ptr < end && char_loc_ignore(chr, *ptr))
             ptr++;
         break;
 
@@ -293,7 +293,15 @@
         /* repeated non-literal */
         chr = pattern[1];
         TRACE(("|%p|%p|COUNT NOT_LITERAL_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && (SRE_CODE) state->lower(*ptr) != chr)
+        while (ptr < end && (SRE_CODE) sre_lower_ascii(*ptr) != chr)
+            ptr++;
+        break;
+
+    case SRE_OP_NOT_LITERAL_UNI_IGNORE:
+        /* repeated non-literal */
+        chr = pattern[1];
+        TRACE(("|%p|%p|COUNT NOT_LITERAL_UNI_IGNORE %d\n", pattern, ptr, chr));
+        while (ptr < end && (SRE_CODE) sre_lower_unicode(*ptr) != chr)
             ptr++;
         break;
 
@@ -301,7 +309,7 @@
         /* repeated non-literal */
         chr = pattern[1];
         TRACE(("|%p|%p|COUNT NOT_LITERAL_LOC_IGNORE %d\n", pattern, ptr, chr));
-        while (ptr < end && !SRE(char_loc_ignore)(state, chr, *ptr))
+        while (ptr < end && !char_loc_ignore(chr, *ptr))
             ptr++;
         break;
 
@@ -687,7 +695,17 @@
             TRACE(("|%p|%p|LITERAL_IGNORE %d\n",
                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
             if (ctx->ptr >= end ||
-                state->lower(*ctx->ptr) != *ctx->pattern)
+                sre_lower_ascii(*ctx->ptr) != *ctx->pattern)
+                RETURN_FAILURE;
+            ctx->pattern++;
+            ctx->ptr++;
+            break;
+
+        case SRE_OP_LITERAL_UNI_IGNORE:
+            TRACE(("|%p|%p|LITERAL_UNI_IGNORE %d\n",
+                   ctx->pattern, ctx->ptr, ctx->pattern[0]));
+            if (ctx->ptr >= end ||
+                sre_lower_unicode(*ctx->ptr) != *ctx->pattern)
                 RETURN_FAILURE;
             ctx->pattern++;
             ctx->ptr++;
@@ -697,7 +715,7 @@
             TRACE(("|%p|%p|LITERAL_LOC_IGNORE %d\n",
                    ctx->pattern, ctx->ptr, ctx->pattern[0]));
             if (ctx->ptr >= end
-                || !SRE(char_loc_ignore)(state, *ctx->pattern, *ctx->ptr))
+                || !char_loc_ignore(*ctx->pattern, *ctx->ptr))
                 RETURN_FAILURE;
             ctx->pattern++;
             ctx->ptr++;
@@ -707,7 +725,17 @@
             TRACE(("|%p|%p|NOT_LITERAL_IGNORE %d\n",
                    ctx->pattern, ctx->ptr, *ctx->pattern));
             if (ctx->ptr >= end ||
-                state->lower(*ctx->ptr) == *ctx->pattern)
+                sre_lower_ascii(*ctx->ptr) == *ctx->pattern)
+                RETURN_FAILURE;
+            ctx->pattern++;
+            ctx->ptr++;
+            break;
+
+        case SRE_OP_NOT_LITERAL_UNI_IGNORE:
+            TRACE(("|%p|%p|NOT_LITERAL_UNI_IGNORE %d\n",
+                   ctx->pattern, ctx->ptr, *ctx->pattern));
+            if (ctx->ptr >= end ||
+                sre_lower_unicode(*ctx->ptr) == *ctx->pattern)
                 RETURN_FAILURE;
             ctx->pattern++;
             ctx->ptr++;
@@ -717,7 +745,7 @@
             TRACE(("|%p|%p|NOT_LITERAL_LOC_IGNORE %d\n",
                    ctx->pattern, ctx->ptr, *ctx->pattern));
             if (ctx->ptr >= end
-                || SRE(char_loc_ignore)(state, *ctx->pattern, *ctx->ptr))
+                || char_loc_ignore(*ctx->pattern, *ctx->ptr))
                 RETURN_FAILURE;
             ctx->pattern++;
             ctx->ptr++;
@@ -727,7 +755,17 @@
             TRACE(("|%p|%p|IN_IGNORE\n", ctx->pattern, ctx->ptr));
             if (ctx->ptr >= end
                 || !SRE(charset)(state, ctx->pattern+1,
-                                 (SRE_CODE)state->lower(*ctx->ptr)))
+                                 (SRE_CODE)sre_lower_ascii(*ctx->ptr)))
+                RETURN_FAILURE;
+            ctx->pattern += ctx->pattern[0];
+            ctx->ptr++;
+            break;
+
+        case SRE_OP_IN_UNI_IGNORE:
+            TRACE(("|%p|%p|IN_UNI_IGNORE\n", ctx->pattern, ctx->ptr));
+            if (ctx->ptr >= end
+                || !SRE(charset)(state, ctx->pattern+1,
+                                 (SRE_CODE)sre_lower_unicode(*ctx->ptr)))
                 RETURN_FAILURE;
             ctx->pattern += ctx->pattern[0];
             ctx->ptr++;
@@ -1135,7 +1173,59 @@
                         RETURN_FAILURE;
                     while (p < e) {
                         if (ctx->ptr >= end ||
-                            state->lower(*ctx->ptr) != state->lower(*p))
+                            sre_lower_ascii(*ctx->ptr) != sre_lower_ascii(*p))
+                            RETURN_FAILURE;
+                        p++;
+                        ctx->ptr++;
+                    }
+                }
+            }
+            ctx->pattern++;
+            break;
+
+        case SRE_OP_GROUPREF_UNI_IGNORE:
+            /* match backreference */
+            TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", ctx->pattern,
+                   ctx->ptr, ctx->pattern[0]));
+            i = ctx->pattern[0];
+            {
+                Py_ssize_t groupref = i+i;
+                if (groupref >= state->lastmark) {
+                    RETURN_FAILURE;
+                } else {
+                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
+                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
+                    if (!p || !e || e < p)
+                        RETURN_FAILURE;
+                    while (p < e) {
+                        if (ctx->ptr >= end ||
+                            sre_lower_unicode(*ctx->ptr) != sre_lower_unicode(*p))
+                            RETURN_FAILURE;
+                        p++;
+                        ctx->ptr++;
+                    }
+                }
+            }
+            ctx->pattern++;
+            break;
+
+        case SRE_OP_GROUPREF_LOC_IGNORE:
+            /* match backreference */
+            TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", ctx->pattern,
+                   ctx->ptr, ctx->pattern[0]));
+            i = ctx->pattern[0];
+            {
+                Py_ssize_t groupref = i+i;
+                if (groupref >= state->lastmark) {
+                    RETURN_FAILURE;
+                } else {
+                    SRE_CHAR* p = (SRE_CHAR*) state->mark[groupref];
+                    SRE_CHAR* e = (SRE_CHAR*) state->mark[groupref+1];
+                    if (!p || !e || e < p)
+                        RETURN_FAILURE;
+                    while (p < e) {
+                        if (ctx->ptr >= end ||
+                            sre_lower_locale(*ctx->ptr) != sre_lower_locale(*p))
                             RETURN_FAILURE;
                         p++;
                         ctx->ptr++;