Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 1 | #ifndef PEGEN_H |
| 2 | #define PEGEN_H |
| 3 | |
| 4 | #define PY_SSIZE_T_CLEAN |
| 5 | #include <Python.h> |
| 6 | #include <token.h> |
| 7 | #include <Python-ast.h> |
| 8 | #include <pyarena.h> |
| 9 | |
Pablo Galindo | 2b74c83 | 2020-04-27 18:02:07 +0100 | [diff] [blame] | 10 | #if 0 |
| 11 | #define PyPARSE_YIELD_IS_KEYWORD 0x0001 |
| 12 | #endif |
| 13 | |
| 14 | #define PyPARSE_DONT_IMPLY_DEDENT 0x0002 |
| 15 | |
| 16 | #if 0 |
| 17 | #define PyPARSE_WITH_IS_KEYWORD 0x0003 |
| 18 | #define PyPARSE_PRINT_IS_FUNCTION 0x0004 |
| 19 | #define PyPARSE_UNICODE_LITERALS 0x0008 |
| 20 | #endif |
| 21 | |
| 22 | #define PyPARSE_IGNORE_COOKIE 0x0010 |
| 23 | #define PyPARSE_BARRY_AS_BDFL 0x0020 |
| 24 | #define PyPARSE_TYPE_COMMENTS 0x0040 |
| 25 | #define PyPARSE_ASYNC_HACKS 0x0080 |
| 26 | |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 27 | typedef struct _memo { |
| 28 | int type; |
| 29 | void *node; |
| 30 | int mark; |
| 31 | struct _memo *next; |
| 32 | } Memo; |
| 33 | |
| 34 | typedef struct { |
| 35 | int type; |
| 36 | PyObject *bytes; |
Lysandros Nikolaou | 861efc6 | 2020-06-20 15:57:27 +0300 | [diff] [blame] | 37 | int lineno, col_offset, end_lineno, end_col_offset; |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 38 | Memo *memo; |
| 39 | } Token; |
| 40 | |
| 41 | typedef struct { |
| 42 | char *str; |
| 43 | int type; |
| 44 | } KeywordToken; |
| 45 | |
Guido van Rossum | c001c09 | 2020-04-30 12:12:19 -0700 | [diff] [blame] | 46 | |
| 47 | typedef struct { |
| 48 | struct { |
| 49 | int lineno; |
| 50 | char *comment; // The " <tag>" in "# type: ignore <tag>" |
| 51 | } *items; |
| 52 | size_t size; |
| 53 | size_t num_items; |
| 54 | } growable_comment_array; |
| 55 | |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 56 | typedef struct { |
| 57 | struct tok_state *tok; |
| 58 | Token **tokens; |
| 59 | int mark; |
| 60 | int fill, size; |
| 61 | PyArena *arena; |
| 62 | KeywordToken **keywords; |
| 63 | int n_keyword_lists; |
| 64 | int start_rule; |
| 65 | int *errcode; |
| 66 | int parsing_started; |
| 67 | PyObject* normalize; |
| 68 | int starting_lineno; |
| 69 | int starting_col_offset; |
| 70 | int error_indicator; |
Pablo Galindo | 2b74c83 | 2020-04-27 18:02:07 +0100 | [diff] [blame] | 71 | int flags; |
Lysandros Nikolaou | 3e0a6f3 | 2020-05-01 06:27:52 +0300 | [diff] [blame] | 72 | int feature_version; |
Guido van Rossum | c001c09 | 2020-04-30 12:12:19 -0700 | [diff] [blame] | 73 | growable_comment_array type_ignore_comments; |
Lysandros Nikolaou | 2f37c35 | 2020-05-07 13:37:51 +0300 | [diff] [blame] | 74 | Token *known_err_token; |
Pablo Galindo | 800a35c6 | 2020-05-25 18:38:45 +0100 | [diff] [blame] | 75 | int level; |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 76 | } Parser; |
| 77 | |
| 78 | typedef struct { |
| 79 | cmpop_ty cmpop; |
| 80 | expr_ty expr; |
| 81 | } CmpopExprPair; |
| 82 | |
| 83 | typedef struct { |
| 84 | expr_ty key; |
| 85 | expr_ty value; |
| 86 | } KeyValuePair; |
| 87 | |
| 88 | typedef struct { |
| 89 | arg_ty arg; |
| 90 | expr_ty value; |
| 91 | } NameDefaultPair; |
| 92 | |
| 93 | typedef struct { |
| 94 | asdl_seq *plain_names; |
| 95 | asdl_seq *names_with_defaults; // asdl_seq* of NameDefaultsPair's |
| 96 | } SlashWithDefault; |
| 97 | |
| 98 | typedef struct { |
| 99 | arg_ty vararg; |
| 100 | asdl_seq *kwonlyargs; // asdl_seq* of NameDefaultsPair's |
| 101 | arg_ty kwarg; |
| 102 | } StarEtc; |
| 103 | |
| 104 | typedef struct { |
| 105 | operator_ty kind; |
| 106 | } AugOperator; |
| 107 | |
| 108 | typedef struct { |
| 109 | void *element; |
| 110 | int is_keyword; |
| 111 | } KeywordOrStarred; |
| 112 | |
| 113 | void _PyPegen_clear_memo_statistics(void); |
| 114 | PyObject *_PyPegen_get_memo_statistics(void); |
| 115 | |
| 116 | int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node); |
| 117 | int _PyPegen_update_memo(Parser *p, int mark, int type, void *node); |
| 118 | int _PyPegen_is_memoized(Parser *p, int type, void *pres); |
| 119 | |
Pablo Galindo | 1df5a9e | 2020-04-23 12:42:13 +0100 | [diff] [blame] | 120 | int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 121 | int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int); |
Pablo Galindo | 404b23b | 2020-05-27 00:15:52 +0100 | [diff] [blame] | 122 | int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 123 | int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *); |
| 124 | |
| 125 | Token *_PyPegen_expect_token(Parser *p, int type); |
Guido van Rossum | b45af1a | 2020-05-26 10:58:44 -0700 | [diff] [blame] | 126 | expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 127 | Token *_PyPegen_get_last_nonnwhitespace_token(Parser *); |
| 128 | int _PyPegen_fill_token(Parser *p); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 129 | expr_ty _PyPegen_name_token(Parser *p); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 130 | expr_ty _PyPegen_number_token(Parser *p); |
| 131 | void *_PyPegen_string_token(Parser *p); |
| 132 | const char *_PyPegen_get_expr_name(expr_ty); |
Lysandros Nikolaou | a15c9b3 | 2020-05-13 22:36:27 +0300 | [diff] [blame] | 133 | void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...); |
| 134 | void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype, |
Pablo Galindo | 51c5896 | 2020-06-16 16:49:43 +0100 | [diff] [blame] | 135 | Py_ssize_t lineno, Py_ssize_t col_offset, |
Lysandros Nikolaou | a15c9b3 | 2020-05-13 22:36:27 +0300 | [diff] [blame] | 136 | const char *errmsg, va_list va); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 137 | void *_PyPegen_dummy_name(Parser *p, ...); |
| 138 | |
Lysandros Nikolaou | a15c9b3 | 2020-05-13 22:36:27 +0300 | [diff] [blame] | 139 | Py_LOCAL_INLINE(void *) |
| 140 | RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype, int lineno, |
| 141 | int col_offset, const char *errmsg, ...) |
| 142 | { |
| 143 | va_list va; |
| 144 | va_start(va, errmsg); |
| 145 | _PyPegen_raise_error_known_location(p, errtype, lineno, col_offset + 1, |
| 146 | errmsg, va); |
| 147 | va_end(va); |
| 148 | return NULL; |
| 149 | } |
| 150 | |
| 151 | |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 152 | #define UNUSED(expr) do { (void)(expr); } while (0) |
| 153 | #define EXTRA_EXPR(head, tail) head->lineno, head->col_offset, tail->end_lineno, tail->end_col_offset, p->arena |
Pablo Galindo | ac7a92c | 2020-05-10 05:34:50 +0100 | [diff] [blame] | 154 | #define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena |
Lysandros Nikolaou | a15c9b3 | 2020-05-13 22:36:27 +0300 | [diff] [blame] | 155 | #define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__) |
| 156 | #define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__) |
| 157 | #define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \ |
Lysandros Nikolaou | ae14583 | 2020-05-22 03:56:52 +0300 | [diff] [blame] | 158 | RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, msg, ##__VA_ARGS__) |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 159 | |
| 160 | Py_LOCAL_INLINE(void *) |
| 161 | CHECK_CALL(Parser *p, void *result) |
| 162 | { |
| 163 | if (result == NULL) { |
| 164 | assert(PyErr_Occurred()); |
| 165 | p->error_indicator = 1; |
| 166 | } |
| 167 | return result; |
| 168 | } |
| 169 | |
| 170 | /* This is needed for helper functions that are allowed to |
| 171 | return NULL without an error. Example: _PyPegen_seq_extract_starred_exprs */ |
| 172 | Py_LOCAL_INLINE(void *) |
| 173 | CHECK_CALL_NULL_ALLOWED(Parser *p, void *result) |
| 174 | { |
| 175 | if (result == NULL && PyErr_Occurred()) { |
| 176 | p->error_indicator = 1; |
| 177 | } |
| 178 | return result; |
| 179 | } |
| 180 | |
| 181 | #define CHECK(result) CHECK_CALL(p, result) |
| 182 | #define CHECK_NULL_ALLOWED(result) CHECK_CALL_NULL_ALLOWED(p, result) |
| 183 | |
Guido van Rossum | c001c09 | 2020-04-30 12:12:19 -0700 | [diff] [blame] | 184 | PyObject *_PyPegen_new_type_comment(Parser *, char *); |
| 185 | |
| 186 | Py_LOCAL_INLINE(PyObject *) |
| 187 | NEW_TYPE_COMMENT(Parser *p, Token *tc) |
| 188 | { |
| 189 | if (tc == NULL) { |
| 190 | return NULL; |
| 191 | } |
| 192 | char *bytes = PyBytes_AsString(tc->bytes); |
| 193 | if (bytes == NULL) { |
| 194 | goto error; |
| 195 | } |
| 196 | PyObject *tco = _PyPegen_new_type_comment(p, bytes); |
| 197 | if (tco == NULL) { |
| 198 | goto error; |
| 199 | } |
| 200 | return tco; |
| 201 | error: |
| 202 | p->error_indicator = 1; // Inline CHECK_CALL |
| 203 | return NULL; |
| 204 | } |
| 205 | |
Lysandros Nikolaou | 3e0a6f3 | 2020-05-01 06:27:52 +0300 | [diff] [blame] | 206 | Py_LOCAL_INLINE(void *) |
| 207 | INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node) |
| 208 | { |
| 209 | if (node == NULL) { |
| 210 | p->error_indicator = 1; // Inline CHECK_CALL |
| 211 | return NULL; |
| 212 | } |
| 213 | if (p->feature_version < version) { |
| 214 | p->error_indicator = 1; |
Batuhan Taskaya | 76c1b4d | 2020-05-01 16:13:43 +0300 | [diff] [blame] | 215 | return RAISE_SYNTAX_ERROR("%s only supported in Python 3.%i and greater", |
| 216 | msg, version); |
Lysandros Nikolaou | 3e0a6f3 | 2020-05-01 06:27:52 +0300 | [diff] [blame] | 217 | } |
| 218 | return node; |
| 219 | } |
| 220 | |
| 221 | #define CHECK_VERSION(version, msg, node) INVALID_VERSION_CHECK(p, version, msg, node) |
| 222 | |
Guido van Rossum | c001c09 | 2020-04-30 12:12:19 -0700 | [diff] [blame] | 223 | arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 224 | PyObject *_PyPegen_new_identifier(Parser *, char *); |
Lysandros Nikolaou | 3e0a6f3 | 2020-05-01 06:27:52 +0300 | [diff] [blame] | 225 | Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 226 | void _PyPegen_Parser_Free(Parser *); |
| 227 | mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *, |
Pablo Galindo | 2b74c83 | 2020-04-27 18:02:07 +0100 | [diff] [blame] | 228 | const char *, const char *, PyCompilerFlags *, int *, PyArena *); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 229 | void *_PyPegen_run_parser(Parser *); |
Pablo Galindo | 2b74c83 | 2020-04-27 18:02:07 +0100 | [diff] [blame] | 230 | mod_ty _PyPegen_run_parser_from_file(const char *, int, PyObject *, PyCompilerFlags *, PyArena *); |
| 231 | mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 232 | void *_PyPegen_interactive_exit(Parser *); |
| 233 | asdl_seq *_PyPegen_singleton_seq(Parser *, void *); |
| 234 | asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *); |
Guido van Rossum | c001c09 | 2020-04-30 12:12:19 -0700 | [diff] [blame] | 235 | asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 236 | asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *); |
| 237 | expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty); |
| 238 | int _PyPegen_seq_count_dots(asdl_seq *); |
| 239 | alias_ty _PyPegen_alias_for_star(Parser *); |
| 240 | asdl_seq *_PyPegen_map_names_to_ids(Parser *, asdl_seq *); |
| 241 | CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty); |
| 242 | asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *); |
| 243 | asdl_seq *_PyPegen_get_exprs(Parser *, asdl_seq *); |
| 244 | expr_ty _PyPegen_set_expr_context(Parser *, expr_ty, expr_context_ty); |
| 245 | KeyValuePair *_PyPegen_key_value_pair(Parser *, expr_ty, expr_ty); |
| 246 | asdl_seq *_PyPegen_get_keys(Parser *, asdl_seq *); |
| 247 | asdl_seq *_PyPegen_get_values(Parser *, asdl_seq *); |
Guido van Rossum | c001c09 | 2020-04-30 12:12:19 -0700 | [diff] [blame] | 248 | NameDefaultPair *_PyPegen_name_default_pair(Parser *, arg_ty, expr_ty, Token *); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 249 | SlashWithDefault *_PyPegen_slash_with_default(Parser *, asdl_seq *, asdl_seq *); |
| 250 | StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty); |
| 251 | arguments_ty _PyPegen_make_arguments(Parser *, asdl_seq *, SlashWithDefault *, |
| 252 | asdl_seq *, asdl_seq *, StarEtc *); |
| 253 | arguments_ty _PyPegen_empty_arguments(Parser *); |
| 254 | AugOperator *_PyPegen_augoperator(Parser*, operator_ty type); |
| 255 | stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_seq *, stmt_ty); |
| 256 | stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_seq *, stmt_ty); |
| 257 | KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int); |
| 258 | asdl_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *); |
| 259 | asdl_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *); |
| 260 | expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *); |
| 261 | asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *); |
Pablo Galindo | 2b74c83 | 2020-04-27 18:02:07 +0100 | [diff] [blame] | 262 | int _PyPegen_check_barry_as_flufl(Parser *); |
Guido van Rossum | c001c09 | 2020-04-30 12:12:19 -0700 | [diff] [blame] | 263 | mod_ty _PyPegen_make_module(Parser *, asdl_seq *); |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 264 | |
Pablo Galindo | 16ab070 | 2020-05-15 02:04:52 +0100 | [diff] [blame] | 265 | // Error reporting helpers |
Lysandros Nikolaou | 01ece63 | 2020-06-19 02:10:43 +0300 | [diff] [blame] | 266 | typedef enum { |
| 267 | STAR_TARGETS, |
| 268 | DEL_TARGETS, |
| 269 | FOR_TARGETS |
| 270 | } TARGETS_TYPE; |
| 271 | expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type); |
Lysandros Nikolaou | 6c4e0bd | 2020-06-21 05:18:01 +0300 | [diff] [blame] | 272 | #define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e) |
| 273 | |
| 274 | Py_LOCAL_INLINE(void *) |
| 275 | _RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e) |
| 276 | { |
| 277 | expr_ty invalid_target = CHECK_NULL_ALLOWED(_PyPegen_get_invalid_target(e, type)); |
| 278 | if (invalid_target != NULL) { |
| 279 | const char *msg; |
| 280 | if (type == STAR_TARGETS || type == FOR_TARGETS) { |
| 281 | msg = "cannot assign to %s"; |
| 282 | } |
| 283 | else { |
| 284 | msg = "cannot delete %s"; |
| 285 | } |
| 286 | return RAISE_SYNTAX_ERROR_KNOWN_LOCATION( |
| 287 | invalid_target, |
| 288 | msg, |
| 289 | _PyPegen_get_expr_name(invalid_target) |
| 290 | ); |
| 291 | } |
| 292 | return RAISE_SYNTAX_ERROR("invalid syntax"); |
| 293 | } |
Lysandros Nikolaou | 01ece63 | 2020-06-19 02:10:43 +0300 | [diff] [blame] | 294 | |
Lysandros Nikolaou | 75b863a | 2020-05-18 22:14:47 +0300 | [diff] [blame] | 295 | void *_PyPegen_arguments_parsing_error(Parser *, expr_ty); |
Lysandros Nikolaou | ae14583 | 2020-05-22 03:56:52 +0300 | [diff] [blame] | 296 | void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args); |
Lysandros Nikolaou | 75b863a | 2020-05-18 22:14:47 +0300 | [diff] [blame] | 297 | |
Pablo Galindo | 16ab070 | 2020-05-15 02:04:52 +0100 | [diff] [blame] | 298 | |
Lysandros Nikolaou | 01ece63 | 2020-06-19 02:10:43 +0300 | [diff] [blame] | 299 | // Generated function in parse.c - function definition in python.gram |
Pablo Galindo | c5fc156 | 2020-04-22 23:29:27 +0100 | [diff] [blame] | 300 | void *_PyPegen_parse(Parser *); |
| 301 | |
| 302 | #endif |