blob: 4db79237532d80c84451c21faf606bf92460cdea [file] [log] [blame]
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001#ifndef PEGEN_H
2#define PEGEN_H
3
4#define PY_SSIZE_T_CLEAN
5#include <Python.h>
6#include <token.h>
Victor Stinner94faa072021-03-23 20:47:40 +01007#include <pycore_ast.h>
Pablo Galindoc5fc1562020-04-22 23:29:27 +01008
Pablo Galindo2b74c832020-04-27 18:02:07 +01009#if 0
10#define PyPARSE_YIELD_IS_KEYWORD 0x0001
11#endif
12
13#define PyPARSE_DONT_IMPLY_DEDENT 0x0002
14
15#if 0
16#define PyPARSE_WITH_IS_KEYWORD 0x0003
17#define PyPARSE_PRINT_IS_FUNCTION 0x0004
18#define PyPARSE_UNICODE_LITERALS 0x0008
19#endif
20
21#define PyPARSE_IGNORE_COOKIE 0x0010
22#define PyPARSE_BARRY_AS_BDFL 0x0020
23#define PyPARSE_TYPE_COMMENTS 0x0040
24#define PyPARSE_ASYNC_HACKS 0x0080
25
Pablo Galindoc5fc1562020-04-22 23:29:27 +010026typedef struct _memo {
27 int type;
28 void *node;
29 int mark;
30 struct _memo *next;
31} Memo;
32
33typedef struct {
34 int type;
35 PyObject *bytes;
Lysandros Nikolaou861efc62020-06-20 15:57:27 +030036 int lineno, col_offset, end_lineno, end_col_offset;
Pablo Galindoc5fc1562020-04-22 23:29:27 +010037 Memo *memo;
38} Token;
39
40typedef struct {
41 char *str;
42 int type;
43} KeywordToken;
44
Guido van Rossumc001c092020-04-30 12:12:19 -070045
46typedef struct {
47 struct {
48 int lineno;
49 char *comment; // The " <tag>" in "# type: ignore <tag>"
50 } *items;
51 size_t size;
52 size_t num_items;
53} growable_comment_array;
54
Pablo Galindoc5fc1562020-04-22 23:29:27 +010055typedef struct {
56 struct tok_state *tok;
57 Token **tokens;
58 int mark;
59 int fill, size;
60 PyArena *arena;
61 KeywordToken **keywords;
Pablo Galindob2802482021-04-15 21:38:45 +010062 char **soft_keywords;
Pablo Galindoc5fc1562020-04-22 23:29:27 +010063 int n_keyword_lists;
64 int start_rule;
65 int *errcode;
66 int parsing_started;
67 PyObject* normalize;
68 int starting_lineno;
69 int starting_col_offset;
70 int error_indicator;
Pablo Galindo2b74c832020-04-27 18:02:07 +010071 int flags;
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +030072 int feature_version;
Guido van Rossumc001c092020-04-30 12:12:19 -070073 growable_comment_array type_ignore_comments;
Lysandros Nikolaou2f37c352020-05-07 13:37:51 +030074 Token *known_err_token;
Pablo Galindo800a35c62020-05-25 18:38:45 +010075 int level;
Lysandros Nikolaoubca70142020-10-27 00:42:04 +020076 int call_invalid_rules;
Miss Islington (bot)ae1732d2021-05-21 11:20:43 -070077 int in_raw_rule;
Pablo Galindoc5fc1562020-04-22 23:29:27 +010078} Parser;
79
80typedef struct {
81 cmpop_ty cmpop;
82 expr_ty expr;
83} CmpopExprPair;
84
85typedef struct {
86 expr_ty key;
87 expr_ty value;
88} KeyValuePair;
89
90typedef struct {
Nick Coghlan1e7b8582021-04-29 15:58:44 +100091 expr_ty key;
92 pattern_ty pattern;
93} KeyPatternPair;
94
95typedef struct {
Pablo Galindoc5fc1562020-04-22 23:29:27 +010096 arg_ty arg;
97 expr_ty value;
98} NameDefaultPair;
99
100typedef struct {
Pablo Galindoa5634c42020-09-16 19:42:00 +0100101 asdl_arg_seq *plain_names;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100102 asdl_seq *names_with_defaults; // asdl_seq* of NameDefaultsPair's
103} SlashWithDefault;
104
105typedef struct {
106 arg_ty vararg;
107 asdl_seq *kwonlyargs; // asdl_seq* of NameDefaultsPair's
108 arg_ty kwarg;
109} StarEtc;
110
Victor Stinner94faa072021-03-23 20:47:40 +0100111typedef struct { operator_ty kind; } AugOperator;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100112typedef struct {
113 void *element;
114 int is_keyword;
115} KeywordOrStarred;
116
Pablo Galindo58bafe42021-04-09 01:17:31 +0100117#if defined(Py_DEBUG)
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100118void _PyPegen_clear_memo_statistics(void);
119PyObject *_PyPegen_get_memo_statistics(void);
Pablo Galindo58bafe42021-04-09 01:17:31 +0100120#endif
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100121
122int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
123int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
124int _PyPegen_is_memoized(Parser *p, int type, void *pres);
125
Pablo Galindo58fb1562021-02-02 19:54:22 +0000126
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100127int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100128int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
Pablo Galindo404b23b2020-05-27 00:15:52 +0100129int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100130int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
131
132Token *_PyPegen_expect_token(Parser *p, int type);
Pablo Galindo58fb1562021-02-02 19:54:22 +0000133Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
Guido van Rossumb45af1a2020-05-26 10:58:44 -0700134expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
Pablo Galindob2802482021-04-15 21:38:45 +0100135expr_ty _PyPegen_soft_keyword_token(Parser *p);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100136Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
137int _PyPegen_fill_token(Parser *p);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100138expr_ty _PyPegen_name_token(Parser *p);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100139expr_ty _PyPegen_number_token(Parser *p);
140void *_PyPegen_string_token(Parser *p);
141const char *_PyPegen_get_expr_name(expr_ty);
Lysandros Nikolaoua15c9b32020-05-13 22:36:27 +0300142void *_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...);
143void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
Pablo Galindo51c58962020-06-16 16:49:43 +0100144 Py_ssize_t lineno, Py_ssize_t col_offset,
Pablo Galindoa77aac42021-04-23 14:27:05 +0100145 Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
Lysandros Nikolaoua15c9b32020-05-13 22:36:27 +0300146 const char *errmsg, va_list va);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100147void *_PyPegen_dummy_name(Parser *p, ...);
148
Pablo Galindoa77aac42021-04-23 14:27:05 +0100149void * _PyPegen_seq_last_item(asdl_seq *seq);
150#define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
151
Miss Islington (bot)11f1a302021-06-24 08:34:28 -0700152void * _PyPegen_seq_first_item(asdl_seq *seq);
153#define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
154
Pablo Galindoa77aac42021-04-23 14:27:05 +0100155#define CURRENT_POS (-5)
156
Lysandros Nikolaoua15c9b32020-05-13 22:36:27 +0300157Py_LOCAL_INLINE(void *)
Pablo Galindo96eeff52021-03-22 17:28:11 +0000158RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
159 Py_ssize_t lineno, Py_ssize_t col_offset,
Brandt Bucherdbe60ee2021-04-29 17:19:28 -0700160 Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
Pablo Galindo96eeff52021-03-22 17:28:11 +0000161 const char *errmsg, ...)
Lysandros Nikolaoua15c9b32020-05-13 22:36:27 +0300162{
163 va_list va;
164 va_start(va, errmsg);
Pablo Galindoa77aac42021-04-23 14:27:05 +0100165 Py_ssize_t _col_offset = (col_offset == CURRENT_POS ? CURRENT_POS : col_offset + 1);
166 Py_ssize_t _end_col_offset = (end_col_offset == CURRENT_POS ? CURRENT_POS : end_col_offset + 1);
167 _PyPegen_raise_error_known_location(p, errtype, lineno, _col_offset, end_lineno, _end_col_offset, errmsg, va);
Lysandros Nikolaoua15c9b32020-05-13 22:36:27 +0300168 va_end(va);
169 return NULL;
170}
171
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100172#define UNUSED(expr) do { (void)(expr); } while (0)
Pablo Galindo58bafe42021-04-09 01:17:31 +0100173#define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
Pablo Galindoac7a92c2020-05-10 05:34:50 +0100174#define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
Lysandros Nikolaoua15c9b32020-05-13 22:36:27 +0300175#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, msg, ##__VA_ARGS__)
176#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, msg, ##__VA_ARGS__)
Pablo Galindoa77aac42021-04-23 14:27:05 +0100177#define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
178 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
Lysandros Nikolaoua15c9b32020-05-13 22:36:27 +0300179#define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
Pablo Galindoa77aac42021-04-23 14:27:05 +0100180 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
181#define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
182 RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100183
184Py_LOCAL_INLINE(void *)
185CHECK_CALL(Parser *p, void *result)
186{
187 if (result == NULL) {
188 assert(PyErr_Occurred());
189 p->error_indicator = 1;
190 }
191 return result;
192}
193
194/* This is needed for helper functions that are allowed to
195 return NULL without an error. Example: _PyPegen_seq_extract_starred_exprs */
196Py_LOCAL_INLINE(void *)
197CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
198{
199 if (result == NULL && PyErr_Occurred()) {
200 p->error_indicator = 1;
201 }
202 return result;
203}
204
Lysandros Nikolaou2e5ca9e2020-10-21 22:53:14 +0300205#define CHECK(type, result) ((type) CHECK_CALL(p, result))
206#define CHECK_NULL_ALLOWED(type, result) ((type) CHECK_CALL_NULL_ALLOWED(p, result))
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100207
Serhiy Storchakac43317d2021-06-12 20:44:32 +0300208PyObject *_PyPegen_new_type_comment(Parser *, const char *);
Guido van Rossumc001c092020-04-30 12:12:19 -0700209
210Py_LOCAL_INLINE(PyObject *)
211NEW_TYPE_COMMENT(Parser *p, Token *tc)
212{
213 if (tc == NULL) {
214 return NULL;
215 }
Serhiy Storchakac43317d2021-06-12 20:44:32 +0300216 const char *bytes = PyBytes_AsString(tc->bytes);
Guido van Rossumc001c092020-04-30 12:12:19 -0700217 if (bytes == NULL) {
218 goto error;
219 }
220 PyObject *tco = _PyPegen_new_type_comment(p, bytes);
221 if (tco == NULL) {
222 goto error;
223 }
224 return tco;
225 error:
226 p->error_indicator = 1; // Inline CHECK_CALL
227 return NULL;
228}
229
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +0300230Py_LOCAL_INLINE(void *)
231INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
232{
233 if (node == NULL) {
234 p->error_indicator = 1; // Inline CHECK_CALL
235 return NULL;
236 }
237 if (p->feature_version < version) {
238 p->error_indicator = 1;
Batuhan Taskaya76c1b4d2020-05-01 16:13:43 +0300239 return RAISE_SYNTAX_ERROR("%s only supported in Python 3.%i and greater",
240 msg, version);
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +0300241 }
242 return node;
243}
244
Lysandros Nikolaou2e5ca9e2020-10-21 22:53:14 +0300245#define CHECK_VERSION(type, version, msg, node) ((type) INVALID_VERSION_CHECK(p, version, msg, node))
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +0300246
Guido van Rossumc001c092020-04-30 12:12:19 -0700247arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
Serhiy Storchakac43317d2021-06-12 20:44:32 +0300248PyObject *_PyPegen_new_identifier(Parser *, const char *);
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +0300249Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100250void _PyPegen_Parser_Free(Parser *);
251mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
Pablo Galindo2b74c832020-04-27 18:02:07 +0100252 const char *, const char *, PyCompilerFlags *, int *, PyArena *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100253void *_PyPegen_run_parser(Parser *);
Pablo Galindo2b74c832020-04-27 18:02:07 +0100254mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100255asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100256asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
257asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
Guido van Rossumc001c092020-04-30 12:12:19 -0700258asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100259asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *);
260expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty);
261int _PyPegen_seq_count_dots(asdl_seq *);
Matthew Suozzo75a06f02021-04-10 16:56:28 -0400262alias_ty _PyPegen_alias_for_star(Parser *, int, int, int, int, PyArena *);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100263asdl_identifier_seq *_PyPegen_map_names_to_ids(Parser *, asdl_expr_seq *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100264CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty);
265asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100266asdl_expr_seq *_PyPegen_get_exprs(Parser *, asdl_seq *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100267expr_ty _PyPegen_set_expr_context(Parser *, expr_ty, expr_context_ty);
268KeyValuePair *_PyPegen_key_value_pair(Parser *, expr_ty, expr_ty);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100269asdl_expr_seq *_PyPegen_get_keys(Parser *, asdl_seq *);
270asdl_expr_seq *_PyPegen_get_values(Parser *, asdl_seq *);
Nick Coghlan1e7b8582021-04-29 15:58:44 +1000271KeyPatternPair *_PyPegen_key_pattern_pair(Parser *, expr_ty, pattern_ty);
272asdl_expr_seq *_PyPegen_get_pattern_keys(Parser *, asdl_seq *);
273asdl_pattern_seq *_PyPegen_get_patterns(Parser *, asdl_seq *);
Guido van Rossumc001c092020-04-30 12:12:19 -0700274NameDefaultPair *_PyPegen_name_default_pair(Parser *, arg_ty, expr_ty, Token *);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100275SlashWithDefault *_PyPegen_slash_with_default(Parser *, asdl_arg_seq *, asdl_seq *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100276StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100277arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
278 asdl_arg_seq *, asdl_seq *, StarEtc *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100279arguments_ty _PyPegen_empty_arguments(Parser *);
280AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100281stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
282stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100283KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100284asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
285asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
286expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
Pablo Galindo315a61f2020-09-03 15:29:32 +0100287 int lineno, int col_offset, int end_lineno,
288 int end_col_offset, PyArena *arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100289expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_seq *);
Nick Coghlan1e7b8582021-04-29 15:58:44 +1000290expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
Brandt Bucherdbe60ee2021-04-29 17:19:28 -0700291expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100292asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
Pablo Galindo06f8c332020-10-30 23:48:42 +0000293int _PyPegen_check_barry_as_flufl(Parser *, Token *);
Pablo Galindo Salgadob977f852021-07-27 18:52:32 +0100294int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
Pablo Galindoa5634c42020-09-16 19:42:00 +0100295mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100296
Pablo Galindo16ab0702020-05-15 02:04:52 +0100297// Error reporting helpers
Lysandros Nikolaou01ece632020-06-19 02:10:43 +0300298typedef enum {
299 STAR_TARGETS,
300 DEL_TARGETS,
301 FOR_TARGETS
302} TARGETS_TYPE;
303expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
Lysandros Nikolaou6c4e0bd2020-06-21 05:18:01 +0300304#define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
305
306Py_LOCAL_INLINE(void *)
307_RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
308{
Lysandros Nikolaou2e5ca9e2020-10-21 22:53:14 +0300309 expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
Lysandros Nikolaou6c4e0bd2020-06-21 05:18:01 +0300310 if (invalid_target != NULL) {
311 const char *msg;
312 if (type == STAR_TARGETS || type == FOR_TARGETS) {
313 msg = "cannot assign to %s";
314 }
315 else {
316 msg = "cannot delete %s";
317 }
318 return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
319 invalid_target,
320 msg,
321 _PyPegen_get_expr_name(invalid_target)
322 );
323 }
324 return RAISE_SYNTAX_ERROR("invalid syntax");
325}
Lysandros Nikolaou01ece632020-06-19 02:10:43 +0300326
Lysandros Nikolaou75b863a2020-05-18 22:14:47 +0300327void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
Lysandros Nikolaouae145832020-05-22 03:56:52 +0300328void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args);
Lysandros Nikolaou75b863a2020-05-18 22:14:47 +0300329
Pablo Galindo16ab0702020-05-15 02:04:52 +0100330
Lysandros Nikolaou01ece632020-06-19 02:10:43 +0300331// Generated function in parse.c - function definition in python.gram
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100332void *_PyPegen_parse(Parser *);
333
334#endif