bpo-35766: Merge typed_ast back into CPython (GH-11645)

diff --git a/Parser/Python.asdl b/Parser/Python.asdl
index 7b2a873..85b686d 100644
--- a/Parser/Python.asdl
+++ b/Parser/Python.asdl
@@ -3,17 +3,20 @@
 
 module Python
 {
-    mod = Module(stmt* body)
+    mod = Module(stmt* body, type_ignore *type_ignores)
         | Interactive(stmt* body)
         | Expression(expr body)
+        | FunctionType(expr* argtypes, expr returns)
 
         -- not really an actual node but useful in Jython's typesystem.
         | Suite(stmt* body)
 
     stmt = FunctionDef(identifier name, arguments args,
-                       stmt* body, expr* decorator_list, expr? returns)
+                       stmt* body, expr* decorator_list, expr? returns,
+                       string? type_comment)
           | AsyncFunctionDef(identifier name, arguments args,
-                             stmt* body, expr* decorator_list, expr? returns)
+                             stmt* body, expr* decorator_list, expr? returns,
+                             string? type_comment)
 
           | ClassDef(identifier name,
              expr* bases,
@@ -23,18 +26,18 @@
           | Return(expr? value)
 
           | Delete(expr* targets)
-          | Assign(expr* targets, expr value)
+          | Assign(expr* targets, expr value, string? type_comment)
           | AugAssign(expr target, operator op, expr value)
           -- 'simple' indicates that we annotate simple name without parens
           | AnnAssign(expr target, expr annotation, expr? value, int simple)
 
           -- use 'orelse' because else is a keyword in target languages
-          | For(expr target, expr iter, stmt* body, stmt* orelse)
-          | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse)
+          | For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
+          | AsyncFor(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
           | While(expr test, stmt* body, stmt* orelse)
           | If(expr test, stmt* body, stmt* orelse)
-          | With(withitem* items, stmt* body)
-          | AsyncWith(withitem* items, stmt* body)
+          | With(withitem* items, stmt* body, string? type_comment)
+          | AsyncWith(withitem* items, stmt* body, string? type_comment)
 
           | Raise(expr? exc, expr? cause)
           | Try(stmt* body, excepthandler* handlers, stmt* orelse, stmt* finalbody)
@@ -111,7 +114,7 @@
     arguments = (arg* args, arg? vararg, arg* kwonlyargs, expr* kw_defaults,
                  arg? kwarg, expr* defaults)
 
-    arg = (identifier arg, expr? annotation)
+    arg = (identifier arg, expr? annotation, string? type_comment)
            attributes (int lineno, int col_offset, int? end_lineno, int? end_col_offset)
 
     -- keyword arguments supplied to call (NULL identifier for **kwargs)
@@ -121,5 +124,7 @@
     alias = (identifier name, identifier? asname)
 
     withitem = (expr context_expr, expr? optional_vars)
+
+    type_ignore = TypeIgnore(int lineno)
 }
 
diff --git a/Parser/asdl_c.py b/Parser/asdl_c.py
index 8640b29..a51a5db 100644
--- a/Parser/asdl_c.py
+++ b/Parser/asdl_c.py
@@ -890,6 +890,15 @@
     return obj2ast_object(obj, out, arena);
 }
 
+static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena)
+{
+    if (!PyUnicode_CheckExact(obj) && !PyBytes_CheckExact(obj)) {
+        PyErr_SetString(PyExc_TypeError, "AST string must be of type str");
+        return 1;
+    }
+    return obj2ast_object(obj, out, arena);
+}
+
 static int obj2ast_int(PyObject* obj, int* out, PyArena* arena)
 {
     int i;
@@ -993,6 +1002,8 @@
         self.emit('if (PyDict_SetItemString(d, "AST", (PyObject*)&AST_type) < 0) return NULL;', 1)
         self.emit('if (PyModule_AddIntMacro(m, PyCF_ONLY_AST) < 0)', 1)
         self.emit("return NULL;", 2)
+        self.emit('if (PyModule_AddIntMacro(m, PyCF_TYPE_COMMENTS) < 0)', 1)
+        self.emit("return NULL;", 2)
         for dfn in mod.dfns:
             self.visit(dfn)
         self.emit("return m;", 1)
@@ -1176,18 +1187,19 @@
 }
 
 /* mode is 0 for "exec", 1 for "eval" and 2 for "single" input */
+/* and 3 for "func_type" */
 mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode)
 {
     mod_ty res;
     PyObject *req_type[3];
-    char *req_name[] = {"Module", "Expression", "Interactive"};
+    char *req_name[] = {"Module", "Expression", "Interactive", "FunctionType"};
     int isinstance;
 
     req_type[0] = (PyObject*)Module_type;
     req_type[1] = (PyObject*)Expression_type;
     req_type[2] = (PyObject*)Interactive_type;
 
-    assert(0 <= mode && mode <= 2);
+    assert(0 <= mode && mode <= 3);
 
     if (!init_types())
         return NULL;
diff --git a/Parser/parser.c b/Parser/parser.c
index a9916d3..fa4a8f0 100644
--- a/Parser/parser.c
+++ b/Parser/parser.c
@@ -12,6 +12,7 @@
 #include "node.h"
 #include "parser.h"
 #include "errcode.h"
+#include "graminit.h"
 
 
 #ifdef Py_DEBUG
@@ -260,7 +261,15 @@
                     /* Push non-terminal */
                     int nt = (x >> 8) + NT_OFFSET;
                     int arrow = x & ((1<<7)-1);
-                    dfa *d1 = PyGrammar_FindDFA(
+                    dfa *d1;
+                    if (nt == func_body_suite && !(ps->p_flags & PyCF_TYPE_COMMENTS)) {
+                        /* When parsing type comments is not requested,
+                           we can provide better errors about bad indentation
+                           by using 'suite' for the body of a funcdef */
+                        D(printf(" [switch func_body_suite to suite]"));
+                        nt = suite;
+                    }
+                    d1 = PyGrammar_FindDFA(
                         ps->p_grammar, nt);
                     if ((err = push(&ps->p_stack, nt, d1,
                         arrow, lineno, col_offset,
@@ -268,7 +277,7 @@
                         D(printf(" MemError: push\n"));
                         return err;
                     }
-                    D(printf(" Push ...\n"));
+                    D(printf(" Push '%s'\n", d1->d_name));
                     continue;
                 }
 
diff --git a/Parser/parsetok.c b/Parser/parsetok.c
index 2b5254a..7fddc5a 100644
--- a/Parser/parsetok.c
+++ b/Parser/parsetok.c
@@ -15,6 +15,42 @@
 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
 static int initerr(perrdetail *err_ret, PyObject * filename);
 
+typedef struct {
+    int *items;
+    size_t size;
+    size_t num_items;
+} growable_int_array;
+
+static int
+growable_int_array_init(growable_int_array *arr, size_t initial_size) {
+    assert(initial_size > 0);
+    arr->items = malloc(initial_size * sizeof(*arr->items));
+    arr->size = initial_size;
+    arr->num_items = 0;
+
+    return arr->items != NULL;
+}
+
+static int
+growable_int_array_add(growable_int_array *arr, int item) {
+    if (arr->num_items >= arr->size) {
+        arr->size *= 2;
+        arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
+        if (!arr->items) {
+            return 0;
+        }
+    }
+
+    arr->items[arr->num_items] = item;
+    arr->num_items++;
+    return 1;
+}
+
+static void
+growable_int_array_deallocate(growable_int_array *arr) {
+    free(arr->items);
+}
+
 /* Parse input coming from a string.  Return error code, print some errors. */
 node *
 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
@@ -59,6 +95,9 @@
         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
         return NULL;
     }
+    if (*flags & PyPARSE_TYPE_COMMENTS) {
+        tok->type_comments = 1;
+    }
 
 #ifndef PGEN
     Py_INCREF(err_ret->filename);
@@ -127,6 +166,9 @@
         err_ret->error = E_NOMEM;
         return NULL;
     }
+    if (*flags & PyPARSE_TYPE_COMMENTS) {
+        tok->type_comments = 1;
+    }
 #ifndef PGEN
     Py_INCREF(err_ret->filename);
     tok->filename = err_ret->filename;
@@ -188,6 +230,13 @@
     node *n;
     int started = 0;
     int col_offset, end_col_offset;
+    growable_int_array type_ignores;
+
+    if (!growable_int_array_init(&type_ignores, 10)) {
+        err_ret->error = E_NOMEM;
+        PyTokenizer_Free(tok);
+        return NULL;
+    }
 
     if ((ps = PyParser_New(g, start)) == NULL) {
         err_ret->error = E_NOMEM;
@@ -197,6 +246,8 @@
 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
     if (*flags & PyPARSE_BARRY_AS_BDFL)
         ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
+    if (*flags & PyPARSE_TYPE_COMMENTS)
+        ps->p_flags |= PyCF_TYPE_COMMENTS;
 #endif
 
     for (;;) {
@@ -277,6 +328,15 @@
         else {
             end_col_offset = -1;
         }
+
+        if (type == TYPE_IGNORE) {
+            if (!growable_int_array_add(&type_ignores, tok->lineno)) {
+                err_ret->error = E_NOMEM;
+                break;
+            }
+            continue;
+        }
+
         if ((err_ret->error =
              PyParser_AddToken(ps, (int)type, str,
                                lineno, col_offset, tok->lineno, end_col_offset,
@@ -293,6 +353,24 @@
         n = ps->p_tree;
         ps->p_tree = NULL;
 
+        if (n->n_type == file_input) {
+            /* Put type_ignore nodes in the ENDMARKER of file_input. */
+            int num;
+            node *ch;
+            size_t i;
+
+            num = NCH(n);
+            ch = CHILD(n, num - 1);
+            REQ(ch, ENDMARKER);
+
+            for (i = 0; i < type_ignores.num_items; i++) {
+                PyNode_AddChild(ch, TYPE_IGNORE, NULL,
+                                type_ignores.items[i], 0,
+                                type_ignores.items[i], 0);
+            }
+        }
+        growable_int_array_deallocate(&type_ignores);
+
 #ifndef PGEN
         /* Check that the source for a single input statement really
            is a single statement by looking at what is left in the
diff --git a/Parser/token.c b/Parser/token.c
index d27f98a..228ecff 100644
--- a/Parser/token.c
+++ b/Parser/token.c
@@ -61,6 +61,8 @@
     "ELLIPSIS",
     "COLONEQUAL",
     "OP",
+    "TYPE_IGNORE",
+    "TYPE_COMMENT",
     "<ERRORTOKEN>",
     "<COMMENT>",
     "<NL>",
diff --git a/Parser/tokenizer.c b/Parser/tokenizer.c
index 3e3cf2c..1ded9ad 100644
--- a/Parser/tokenizer.c
+++ b/Parser/tokenizer.c
@@ -48,6 +48,10 @@
 static void tok_backup(struct tok_state *tok, int c);
 
 
+/* Spaces in this constant are treated as "zero or more spaces or tabs" when
+   tokenizing. */
+static const char* type_comment_prefix = "# type: ";
+
 /* Create and initialize a new tok_state structure */
 
 static struct tok_state *
@@ -82,6 +86,7 @@
     tok->decoding_readline = NULL;
     tok->decoding_buffer = NULL;
 #endif
+    tok->type_comments = 0;
 
     return tok;
 }
@@ -1245,11 +1250,61 @@
     /* Set start of current token */
     tok->start = tok->cur - 1;
 
-    /* Skip comment */
+    /* Skip comment, unless it's a type comment */
     if (c == '#') {
+        const char *prefix, *p, *type_start;
+
         while (c != EOF && c != '\n') {
             c = tok_nextc(tok);
         }
+
+        if (tok->type_comments) {
+            p = tok->start;
+            prefix = type_comment_prefix;
+            while (*prefix && p < tok->cur) {
+                if (*prefix == ' ') {
+                    while (*p == ' ' || *p == '\t') {
+                        p++;
+                    }
+                } else if (*prefix == *p) {
+                    p++;
+                } else {
+                    break;
+                }
+
+                prefix++;
+            }
+
+            /* This is a type comment if we matched all of type_comment_prefix. */
+            if (!*prefix) {
+                int is_type_ignore = 1;
+                tok_backup(tok, c);  /* don't eat the newline or EOF */
+
+                type_start = p;
+
+                is_type_ignore = tok->cur >= p + 6 && memcmp(p, "ignore", 6) == 0;
+                p += 6;
+                while (is_type_ignore && p < tok->cur) {
+                    if (*p == '#')
+                        break;
+                    is_type_ignore = is_type_ignore && (*p == ' ' || *p == '\t');
+                    p++;
+                }
+
+                if (is_type_ignore) {
+                    /* If this type ignore is the only thing on the line, consume the newline also. */
+                    if (blankline) {
+                        tok_nextc(tok);
+                        tok->atbol = 1;
+                    }
+                    return TYPE_IGNORE;
+                } else {
+                    *p_start = (char *) type_start;  /* after type_comment_prefix */
+                    *p_end = tok->cur;
+                    return TYPE_COMMENT;
+                }
+            }
+        }
     }
 
     /* Check for EOF and errors now */
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index 096ce68..9639c65 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h
@@ -70,6 +70,8 @@
     const char* enc;        /* Encoding for the current str. */
     const char* str;
     const char* input; /* Tokenizer's newline translated copy of the string. */
+
+    int type_comments;      /* Whether to look for type comments */
 };
 
 extern struct tok_state *PyTokenizer_FromString(const char *, int);