blob: ef95aacb7f0849fb2324c1bb822e2d8004b9138a [file] [log] [blame]
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001#include <Python.h>
2#include <errcode.h>
3#include "../tokenizer.h"
4
5#include "pegen.h"
6#include "parse_string.h"
7
8static int
9init_normalization(Parser *p)
10{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030011 if (p->normalize) {
12 return 1;
13 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +010014 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
15 if (!m)
16 {
17 return 0;
18 }
19 p->normalize = PyObject_GetAttrString(m, "normalize");
20 Py_DECREF(m);
21 if (!p->normalize)
22 {
23 return 0;
24 }
25 return 1;
26}
27
Pablo Galindo2b74c832020-04-27 18:02:07 +010028/* Checks if the NOTEQUAL token is valid given the current parser flags
290 indicates success and nonzero indicates failure (an exception may be set) */
30int
31_PyPegen_check_barry_as_flufl(Parser *p) {
32 Token *t = p->tokens[p->fill - 1];
33 assert(t->bytes != NULL);
34 assert(t->type == NOTEQUAL);
35
36 char* tok_str = PyBytes_AS_STRING(t->bytes);
37 if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>")){
38 RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
39 return -1;
40 } else if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
41 return strcmp(tok_str, "!=");
42 }
43 return 0;
44}
45
Pablo Galindoc5fc1562020-04-22 23:29:27 +010046PyObject *
47_PyPegen_new_identifier(Parser *p, char *n)
48{
49 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
50 if (!id) {
51 goto error;
52 }
53 /* PyUnicode_DecodeUTF8 should always return a ready string. */
54 assert(PyUnicode_IS_READY(id));
55 /* Check whether there are non-ASCII characters in the
56 identifier; if so, normalize to NFKC. */
57 if (!PyUnicode_IS_ASCII(id))
58 {
59 PyObject *id2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030060 if (!init_normalization(p))
Pablo Galindoc5fc1562020-04-22 23:29:27 +010061 {
62 Py_DECREF(id);
63 goto error;
64 }
65 PyObject *form = PyUnicode_InternFromString("NFKC");
66 if (form == NULL)
67 {
68 Py_DECREF(id);
69 goto error;
70 }
71 PyObject *args[2] = {form, id};
72 id2 = _PyObject_FastCall(p->normalize, args, 2);
73 Py_DECREF(id);
74 Py_DECREF(form);
75 if (!id2) {
76 goto error;
77 }
78 if (!PyUnicode_Check(id2))
79 {
80 PyErr_Format(PyExc_TypeError,
81 "unicodedata.normalize() must return a string, not "
82 "%.200s",
83 _PyType_Name(Py_TYPE(id2)));
84 Py_DECREF(id2);
85 goto error;
86 }
87 id = id2;
88 }
89 PyUnicode_InternInPlace(&id);
90 if (PyArena_AddPyObject(p->arena, id) < 0)
91 {
92 Py_DECREF(id);
93 goto error;
94 }
95 return id;
96
97error:
98 p->error_indicator = 1;
99 return NULL;
100}
101
102static PyObject *
103_create_dummy_identifier(Parser *p)
104{
105 return _PyPegen_new_identifier(p, "");
106}
107
108static inline Py_ssize_t
109byte_offset_to_character_offset(PyObject *line, int col_offset)
110{
111 const char *str = PyUnicode_AsUTF8(line);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300112 if (!str) {
113 return 0;
114 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100115 PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
116 if (!text) {
117 return 0;
118 }
119 Py_ssize_t size = PyUnicode_GET_LENGTH(text);
120 Py_DECREF(text);
121 return size;
122}
123
124const char *
125_PyPegen_get_expr_name(expr_ty e)
126{
127 switch (e->kind) {
128 case Attribute_kind:
129 return "attribute";
130 case Subscript_kind:
131 return "subscript";
132 case Starred_kind:
133 return "starred";
134 case Name_kind:
135 return "name";
136 case List_kind:
137 return "list";
138 case Tuple_kind:
139 return "tuple";
140 case Lambda_kind:
141 return "lambda";
142 case Call_kind:
143 return "function call";
144 case BoolOp_kind:
145 case BinOp_kind:
146 case UnaryOp_kind:
147 return "operator";
148 case GeneratorExp_kind:
149 return "generator expression";
150 case Yield_kind:
151 case YieldFrom_kind:
152 return "yield expression";
153 case Await_kind:
154 return "await expression";
155 case ListComp_kind:
156 return "list comprehension";
157 case SetComp_kind:
158 return "set comprehension";
159 case DictComp_kind:
160 return "dict comprehension";
161 case Dict_kind:
162 return "dict display";
163 case Set_kind:
164 return "set display";
165 case JoinedStr_kind:
166 case FormattedValue_kind:
167 return "f-string expression";
168 case Constant_kind: {
169 PyObject *value = e->v.Constant.value;
170 if (value == Py_None) {
171 return "None";
172 }
173 if (value == Py_False) {
174 return "False";
175 }
176 if (value == Py_True) {
177 return "True";
178 }
179 if (value == Py_Ellipsis) {
180 return "Ellipsis";
181 }
182 return "literal";
183 }
184 case Compare_kind:
185 return "comparison";
186 case IfExp_kind:
187 return "conditional expression";
188 case NamedExpr_kind:
189 return "named expression";
190 default:
191 PyErr_Format(PyExc_SystemError,
192 "unexpected expression in assignment %d (line %d)",
193 e->kind, e->lineno);
194 return NULL;
195 }
196}
197
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300198static int
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100199raise_decode_error(Parser *p)
200{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300201 assert(PyErr_Occurred());
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100202 const char *errtype = NULL;
203 if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
204 errtype = "unicode error";
205 }
206 else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
207 errtype = "value error";
208 }
209 if (errtype) {
210 PyObject *type, *value, *tback, *errstr;
211 PyErr_Fetch(&type, &value, &tback);
212 errstr = PyObject_Str(value);
213 if (errstr) {
214 RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
215 Py_DECREF(errstr);
216 }
217 else {
218 PyErr_Clear();
219 RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
220 }
221 Py_XDECREF(type);
222 Py_XDECREF(value);
223 Py_XDECREF(tback);
224 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300225
226 return -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100227}
228
229static void
230raise_tokenizer_init_error(PyObject *filename)
231{
232 if (!(PyErr_ExceptionMatches(PyExc_LookupError)
233 || PyErr_ExceptionMatches(PyExc_ValueError)
234 || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
235 return;
236 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300237 PyObject *errstr = NULL;
238 PyObject *tuple = NULL;
239 PyObject *type, *value, *tback;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100240 PyErr_Fetch(&type, &value, &tback);
241 errstr = PyObject_Str(value);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300242 if (!errstr) {
243 goto error;
244 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100245
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300246 PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100247 if (!tmp) {
248 goto error;
249 }
250
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300251 tuple = PyTuple_Pack(2, errstr, tmp);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100252 Py_DECREF(tmp);
253 if (!value) {
254 goto error;
255 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300256 PyErr_SetObject(PyExc_SyntaxError, tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100257
258error:
259 Py_XDECREF(type);
260 Py_XDECREF(value);
261 Py_XDECREF(tback);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300262 Py_XDECREF(errstr);
263 Py_XDECREF(tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100264}
265
266static inline PyObject *
267get_error_line(char *buffer)
268{
269 char *newline = strchr(buffer, '\n');
270 if (newline) {
271 return PyUnicode_FromStringAndSize(buffer, newline - buffer);
272 }
273 else {
274 return PyUnicode_FromString(buffer);
275 }
276}
277
278static int
279tokenizer_error_with_col_offset(Parser *p, PyObject *errtype, const char *errmsg)
280{
281 PyObject *errstr = NULL;
282 PyObject *value = NULL;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100283 size_t col_number = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100284
285 errstr = PyUnicode_FromString(errmsg);
286 if (!errstr) {
287 return -1;
288 }
289
290 PyObject *loc = NULL;
291 if (p->start_rule == Py_file_input) {
292 loc = PyErr_ProgramTextObject(p->tok->filename, p->tok->lineno);
293 }
294 if (!loc) {
295 loc = get_error_line(p->tok->buf);
296 }
297
298 if (loc) {
299 col_number = p->tok->cur - p->tok->buf;
300 }
301 else {
302 Py_INCREF(Py_None);
303 loc = Py_None;
304 }
305
306 PyObject *tmp = Py_BuildValue("(OiiN)", p->tok->filename, p->tok->lineno,
307 col_number, loc);
308 if (!tmp) {
309 goto error;
310 }
311
312 value = PyTuple_Pack(2, errstr, tmp);
313 Py_DECREF(tmp);
314 if (!value) {
315 goto error;
316 }
317 PyErr_SetObject(errtype, value);
318
319 Py_XDECREF(value);
320 Py_XDECREF(errstr);
321 return -1;
322
323error:
324 Py_XDECREF(errstr);
325 Py_XDECREF(loc);
326 return -1;
327}
328
329static int
330tokenizer_error(Parser *p)
331{
332 if (PyErr_Occurred()) {
333 return -1;
334 }
335
336 const char *msg = NULL;
337 PyObject* errtype = PyExc_SyntaxError;
338 switch (p->tok->done) {
339 case E_TOKEN:
340 msg = "invalid token";
341 break;
342 case E_IDENTIFIER:
343 msg = "invalid character in identifier";
344 break;
345 case E_BADPREFIX:
346 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300347 errtype, "invalid string prefix");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100348 case E_EOFS:
349 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300350 errtype, "EOF while scanning triple-quoted string literal");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100351 case E_EOLS:
352 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300353 errtype, "EOL while scanning string literal");
354 case E_EOF:
355 return tokenizer_error_with_col_offset(p,
356 errtype, "unexpected EOF while parsing");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100357 case E_DEDENT:
358 return tokenizer_error_with_col_offset(p,
359 PyExc_IndentationError, "unindent does not match any outer indentation level");
360 case E_INTR:
361 if (!PyErr_Occurred()) {
362 PyErr_SetNone(PyExc_KeyboardInterrupt);
363 }
364 return -1;
365 case E_NOMEM:
366 PyErr_NoMemory();
367 return -1;
368 case E_TABSPACE:
369 errtype = PyExc_TabError;
370 msg = "inconsistent use of tabs and spaces in indentation";
371 break;
372 case E_TOODEEP:
373 errtype = PyExc_IndentationError;
374 msg = "too many levels of indentation";
375 break;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100376 case E_LINECONT:
377 msg = "unexpected character after line continuation character";
378 break;
379 default:
380 msg = "unknown parsing error";
381 }
382
383 PyErr_Format(errtype, msg);
384 // There is no reliable column information for this error
385 PyErr_SyntaxLocationObject(p->tok->filename, p->tok->lineno, 0);
386
387 return -1;
388}
389
390void *
391_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
392{
393 PyObject *value = NULL;
394 PyObject *errstr = NULL;
395 PyObject *loc = NULL;
396 PyObject *tmp = NULL;
397 Token *t = p->tokens[p->fill - 1];
398 Py_ssize_t col_number = 0;
399 va_list va;
400
401 va_start(va, errmsg);
402 errstr = PyUnicode_FromFormatV(errmsg, va);
403 va_end(va);
404 if (!errstr) {
405 goto error;
406 }
407
408 if (p->start_rule == Py_file_input) {
409 loc = PyErr_ProgramTextObject(p->tok->filename, t->lineno);
410 }
411
412 if (!loc) {
413 loc = get_error_line(p->tok->buf);
414 }
415
416 if (loc) {
417 int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
418 col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
419 }
420 else {
421 Py_INCREF(Py_None);
422 loc = Py_None;
423 }
424
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100425 tmp = Py_BuildValue("(OiiN)", p->tok->filename, t->lineno, col_number, loc);
426 if (!tmp) {
427 goto error;
428 }
429 value = PyTuple_Pack(2, errstr, tmp);
430 Py_DECREF(tmp);
431 if (!value) {
432 goto error;
433 }
434 PyErr_SetObject(errtype, value);
435
436 Py_DECREF(errstr);
437 Py_DECREF(value);
438 return NULL;
439
440error:
441 Py_XDECREF(errstr);
442 Py_XDECREF(loc);
443 return NULL;
444}
445
446void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
447 int kwarg_unpacking = 0;
448 for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
449 keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
450 if (!keyword->arg) {
451 kwarg_unpacking = 1;
452 }
453 }
454
455 const char *msg = NULL;
456 if (kwarg_unpacking) {
457 msg = "positional argument follows keyword argument unpacking";
458 } else {
459 msg = "positional argument follows keyword argument";
460 }
461
462 return RAISE_SYNTAX_ERROR(msg);
463}
464
465#if 0
466static const char *
467token_name(int type)
468{
469 if (0 <= type && type <= N_TOKENS) {
470 return _PyParser_TokenNames[type];
471 }
472 return "<Huh?>";
473}
474#endif
475
476// Here, mark is the start of the node, while p->mark is the end.
477// If node==NULL, they should be the same.
478int
479_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
480{
481 // Insert in front
482 Memo *m = PyArena_Malloc(p->arena, sizeof(Memo));
483 if (m == NULL) {
484 return -1;
485 }
486 m->type = type;
487 m->node = node;
488 m->mark = p->mark;
489 m->next = p->tokens[mark]->memo;
490 p->tokens[mark]->memo = m;
491 return 0;
492}
493
494// Like _PyPegen_insert_memo(), but updates an existing node if found.
495int
496_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
497{
498 for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
499 if (m->type == type) {
500 // Update existing node.
501 m->node = node;
502 m->mark = p->mark;
503 return 0;
504 }
505 }
506 // Insert new node.
507 return _PyPegen_insert_memo(p, mark, type, node);
508}
509
510// Return dummy NAME.
511void *
512_PyPegen_dummy_name(Parser *p, ...)
513{
514 static void *cache = NULL;
515
516 if (cache != NULL) {
517 return cache;
518 }
519
520 PyObject *id = _create_dummy_identifier(p);
521 if (!id) {
522 return NULL;
523 }
524 cache = Name(id, Load, 1, 0, 1, 0, p->arena);
525 return cache;
526}
527
528static int
529_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
530{
531 if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL) {
532 return NAME;
533 }
534 for (KeywordToken *k = p->keywords[name_len]; k->type != -1; k++) {
535 if (strncmp(k->str, name, name_len) == 0) {
536 return k->type;
537 }
538 }
539 return NAME;
540}
541
542int
543_PyPegen_fill_token(Parser *p)
544{
545 const char *start, *end;
546 int type = PyTokenizer_Get(p->tok, &start, &end);
547 if (type == ERRORTOKEN) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300548 if (p->tok->done == E_DECODE) {
549 return raise_decode_error(p);
550 }
551 else {
552 return tokenizer_error(p);
553 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100554 }
555 if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
556 type = NEWLINE; /* Add an extra newline */
557 p->parsing_started = 0;
558
Pablo Galindob94dbd72020-04-27 18:35:58 +0100559 if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100560 p->tok->pendin = -p->tok->indent;
561 p->tok->indent = 0;
562 }
563 }
564 else {
565 p->parsing_started = 1;
566 }
567
568 if (p->fill == p->size) {
569 int newsize = p->size * 2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300570 Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
571 if (new_tokens == NULL) {
572 PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100573 return -1;
574 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300575 else {
576 p->tokens = new_tokens;
577 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100578 for (int i = p->size; i < newsize; i++) {
579 p->tokens[i] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300580 if (p->tokens[i] == NULL) {
581 p->size = i; // Needed, in order to cleanup correctly after parser fails
582 PyErr_NoMemory();
583 return -1;
584 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100585 memset(p->tokens[i], '\0', sizeof(Token));
586 }
587 p->size = newsize;
588 }
589
590 Token *t = p->tokens[p->fill];
591 t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
592 t->bytes = PyBytes_FromStringAndSize(start, end - start);
593 if (t->bytes == NULL) {
594 return -1;
595 }
596 PyArena_AddPyObject(p->arena, t->bytes);
597
598 int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
599 const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
Pablo Galindo22081342020-04-29 02:04:06 +0100600 int end_lineno = p->tok->lineno;
601 int col_offset = -1, end_col_offset = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100602 if (start != NULL && start >= line_start) {
Pablo Galindo22081342020-04-29 02:04:06 +0100603 col_offset = (int)(start - line_start);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100604 }
605 if (end != NULL && end >= p->tok->line_start) {
Pablo Galindo22081342020-04-29 02:04:06 +0100606 end_col_offset = (int)(end - p->tok->line_start);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100607 }
608
609 t->lineno = p->starting_lineno + lineno;
610 t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
611 t->end_lineno = p->starting_lineno + end_lineno;
612 t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
613
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100614 p->fill += 1;
615 return 0;
616}
617
618// Instrumentation to count the effectiveness of memoization.
619// The array counts the number of tokens skipped by memoization,
620// indexed by type.
621
622#define NSTATISTICS 2000
623static long memo_statistics[NSTATISTICS];
624
625void
626_PyPegen_clear_memo_statistics()
627{
628 for (int i = 0; i < NSTATISTICS; i++) {
629 memo_statistics[i] = 0;
630 }
631}
632
633PyObject *
634_PyPegen_get_memo_statistics()
635{
636 PyObject *ret = PyList_New(NSTATISTICS);
637 if (ret == NULL) {
638 return NULL;
639 }
640 for (int i = 0; i < NSTATISTICS; i++) {
641 PyObject *value = PyLong_FromLong(memo_statistics[i]);
642 if (value == NULL) {
643 Py_DECREF(ret);
644 return NULL;
645 }
646 // PyList_SetItem borrows a reference to value.
647 if (PyList_SetItem(ret, i, value) < 0) {
648 Py_DECREF(ret);
649 return NULL;
650 }
651 }
652 return ret;
653}
654
655int // bool
656_PyPegen_is_memoized(Parser *p, int type, void *pres)
657{
658 if (p->mark == p->fill) {
659 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300660 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100661 return -1;
662 }
663 }
664
665 Token *t = p->tokens[p->mark];
666
667 for (Memo *m = t->memo; m != NULL; m = m->next) {
668 if (m->type == type) {
669 if (0 <= type && type < NSTATISTICS) {
670 long count = m->mark - p->mark;
671 // A memoized negative result counts for one.
672 if (count <= 0) {
673 count = 1;
674 }
675 memo_statistics[type] += count;
676 }
677 p->mark = m->mark;
678 *(void **)(pres) = m->node;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100679 return 1;
680 }
681 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100682 return 0;
683}
684
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100685
686int
687_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
688{
689 int mark = p->mark;
690 void *res = func(p);
691 p->mark = mark;
692 return (res != NULL) == positive;
693}
694
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100695int
696_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
697 const char *arg)
698{
699 int mark = p->mark;
700 void *res = func(p, arg);
701 p->mark = mark;
702 return (res != NULL) == positive;
703}
704
705int
706_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
707{
708 int mark = p->mark;
709 void *res = func(p, arg);
710 p->mark = mark;
711 return (res != NULL) == positive;
712}
713
714int
715_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
716{
717 int mark = p->mark;
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100718 void *res = (void*)func(p);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100719 p->mark = mark;
720 return (res != NULL) == positive;
721}
722
723Token *
724_PyPegen_expect_token(Parser *p, int type)
725{
726 if (p->mark == p->fill) {
727 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300728 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100729 return NULL;
730 }
731 }
732 Token *t = p->tokens[p->mark];
733 if (t->type != type) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100734 return NULL;
735 }
736 p->mark += 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100737 return t;
738}
739
740Token *
741_PyPegen_get_last_nonnwhitespace_token(Parser *p)
742{
743 assert(p->mark >= 0);
744 Token *token = NULL;
745 for (int m = p->mark - 1; m >= 0; m--) {
746 token = p->tokens[m];
747 if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
748 break;
749 }
750 }
751 return token;
752}
753
754void *
755_PyPegen_async_token(Parser *p)
756{
757 return _PyPegen_expect_token(p, ASYNC);
758}
759
760void *
761_PyPegen_await_token(Parser *p)
762{
763 return _PyPegen_expect_token(p, AWAIT);
764}
765
766void *
767_PyPegen_endmarker_token(Parser *p)
768{
769 return _PyPegen_expect_token(p, ENDMARKER);
770}
771
772expr_ty
773_PyPegen_name_token(Parser *p)
774{
775 Token *t = _PyPegen_expect_token(p, NAME);
776 if (t == NULL) {
777 return NULL;
778 }
779 char* s = PyBytes_AsString(t->bytes);
780 if (!s) {
781 return NULL;
782 }
783 PyObject *id = _PyPegen_new_identifier(p, s);
784 if (id == NULL) {
785 return NULL;
786 }
787 return Name(id, Load, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
788 p->arena);
789}
790
791void *
792_PyPegen_string_token(Parser *p)
793{
794 return _PyPegen_expect_token(p, STRING);
795}
796
797void *
798_PyPegen_newline_token(Parser *p)
799{
800 return _PyPegen_expect_token(p, NEWLINE);
801}
802
803void *
804_PyPegen_indent_token(Parser *p)
805{
806 return _PyPegen_expect_token(p, INDENT);
807}
808
809void *
810_PyPegen_dedent_token(Parser *p)
811{
812 return _PyPegen_expect_token(p, DEDENT);
813}
814
815static PyObject *
816parsenumber_raw(const char *s)
817{
818 const char *end;
819 long x;
820 double dx;
821 Py_complex compl;
822 int imflag;
823
824 assert(s != NULL);
825 errno = 0;
826 end = s + strlen(s) - 1;
827 imflag = *end == 'j' || *end == 'J';
828 if (s[0] == '0') {
829 x = (long)PyOS_strtoul(s, (char **)&end, 0);
830 if (x < 0 && errno == 0) {
831 return PyLong_FromString(s, (char **)0, 0);
832 }
833 }
834 else
835 x = PyOS_strtol(s, (char **)&end, 0);
836 if (*end == '\0') {
837 if (errno != 0)
838 return PyLong_FromString(s, (char **)0, 0);
839 return PyLong_FromLong(x);
840 }
841 /* XXX Huge floats may silently fail */
842 if (imflag) {
843 compl.real = 0.;
844 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
845 if (compl.imag == -1.0 && PyErr_Occurred())
846 return NULL;
847 return PyComplex_FromCComplex(compl);
848 }
849 else {
850 dx = PyOS_string_to_double(s, NULL, NULL);
851 if (dx == -1.0 && PyErr_Occurred())
852 return NULL;
853 return PyFloat_FromDouble(dx);
854 }
855}
856
857static PyObject *
858parsenumber(const char *s)
859{
860 char *dup, *end;
861 PyObject *res = NULL;
862
863 assert(s != NULL);
864
865 if (strchr(s, '_') == NULL) {
866 return parsenumber_raw(s);
867 }
868 /* Create a duplicate without underscores. */
869 dup = PyMem_Malloc(strlen(s) + 1);
870 if (dup == NULL) {
871 return PyErr_NoMemory();
872 }
873 end = dup;
874 for (; *s; s++) {
875 if (*s != '_') {
876 *end++ = *s;
877 }
878 }
879 *end = '\0';
880 res = parsenumber_raw(dup);
881 PyMem_Free(dup);
882 return res;
883}
884
885expr_ty
886_PyPegen_number_token(Parser *p)
887{
888 Token *t = _PyPegen_expect_token(p, NUMBER);
889 if (t == NULL) {
890 return NULL;
891 }
892
893 char *num_raw = PyBytes_AsString(t->bytes);
894
895 if (num_raw == NULL) {
896 return NULL;
897 }
898
899 PyObject *c = parsenumber(num_raw);
900
901 if (c == NULL) {
902 return NULL;
903 }
904
905 if (PyArena_AddPyObject(p->arena, c) < 0) {
906 Py_DECREF(c);
907 return NULL;
908 }
909
910 return Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
911 p->arena);
912}
913
914void
915_PyPegen_Parser_Free(Parser *p)
916{
917 Py_XDECREF(p->normalize);
918 for (int i = 0; i < p->size; i++) {
919 PyMem_Free(p->tokens[i]);
920 }
921 PyMem_Free(p->tokens);
922 PyMem_Free(p);
923}
924
Pablo Galindo2b74c832020-04-27 18:02:07 +0100925static int
926compute_parser_flags(PyCompilerFlags *flags)
927{
928 int parser_flags = 0;
929 if (!flags) {
930 return 0;
931 }
932 if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
933 parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
934 }
935 if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
936 parser_flags |= PyPARSE_IGNORE_COOKIE;
937 }
938 if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
939 parser_flags |= PyPARSE_BARRY_AS_BDFL;
940 }
941 if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
942 parser_flags |= PyPARSE_TYPE_COMMENTS;
943 }
944 return parser_flags;
945}
946
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100947Parser *
Pablo Galindo2b74c832020-04-27 18:02:07 +0100948_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
949 int *errcode, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100950{
951 Parser *p = PyMem_Malloc(sizeof(Parser));
952 if (p == NULL) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300953 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100954 }
955 assert(tok != NULL);
956 p->tok = tok;
957 p->keywords = NULL;
958 p->n_keyword_lists = -1;
959 p->tokens = PyMem_Malloc(sizeof(Token *));
960 if (!p->tokens) {
961 PyMem_Free(p);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300962 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100963 }
964 p->tokens[0] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300965 if (!p->tokens) {
966 PyMem_Free(p->tokens);
967 PyMem_Free(p);
968 return (Parser *) PyErr_NoMemory();
969 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100970 memset(p->tokens[0], '\0', sizeof(Token));
971 p->mark = 0;
972 p->fill = 0;
973 p->size = 1;
974
975 p->errcode = errcode;
976 p->arena = arena;
977 p->start_rule = start_rule;
978 p->parsing_started = 0;
979 p->normalize = NULL;
980 p->error_indicator = 0;
981
982 p->starting_lineno = 0;
983 p->starting_col_offset = 0;
Pablo Galindo2b74c832020-04-27 18:02:07 +0100984 p->flags = flags;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100985
986 return p;
987}
988
989void *
990_PyPegen_run_parser(Parser *p)
991{
992 void *res = _PyPegen_parse(p);
993 if (res == NULL) {
994 if (PyErr_Occurred()) {
995 return NULL;
996 }
997 if (p->fill == 0) {
998 RAISE_SYNTAX_ERROR("error at start before reading any input");
999 }
1000 else if (p->tok->done == E_EOF) {
1001 RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
1002 }
1003 else {
1004 if (p->tokens[p->fill-1]->type == INDENT) {
1005 RAISE_INDENTATION_ERROR("unexpected indent");
1006 }
1007 else if (p->tokens[p->fill-1]->type == DEDENT) {
1008 RAISE_INDENTATION_ERROR("unexpected unindent");
1009 }
1010 else {
1011 RAISE_SYNTAX_ERROR("invalid syntax");
1012 }
1013 }
1014 return NULL;
1015 }
1016
1017 return res;
1018}
1019
1020mod_ty
1021_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
1022 const char *enc, const char *ps1, const char *ps2,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001023 PyCompilerFlags *flags, int *errcode, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001024{
1025 struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
1026 if (tok == NULL) {
1027 if (PyErr_Occurred()) {
1028 raise_tokenizer_init_error(filename_ob);
1029 return NULL;
1030 }
1031 return NULL;
1032 }
1033 // This transfers the ownership to the tokenizer
1034 tok->filename = filename_ob;
1035 Py_INCREF(filename_ob);
1036
1037 // From here on we need to clean up even if there's an error
1038 mod_ty result = NULL;
1039
Pablo Galindo2b74c832020-04-27 18:02:07 +01001040 int parser_flags = compute_parser_flags(flags);
1041 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, errcode, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001042 if (p == NULL) {
1043 goto error;
1044 }
1045
1046 result = _PyPegen_run_parser(p);
1047 _PyPegen_Parser_Free(p);
1048
1049error:
1050 PyTokenizer_Free(tok);
1051 return result;
1052}
1053
1054mod_ty
1055_PyPegen_run_parser_from_file(const char *filename, int start_rule,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001056 PyObject *filename_ob, PyCompilerFlags *flags, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001057{
1058 FILE *fp = fopen(filename, "rb");
1059 if (fp == NULL) {
1060 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
1061 return NULL;
1062 }
1063
1064 mod_ty result = _PyPegen_run_parser_from_file_pointer(fp, start_rule, filename_ob,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001065 NULL, NULL, NULL, flags, NULL, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001066
1067 fclose(fp);
1068 return result;
1069}
1070
1071mod_ty
1072_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001073 PyCompilerFlags *flags, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001074{
1075 int exec_input = start_rule == Py_file_input;
1076
1077 struct tok_state *tok;
Pablo Galindo2b74c832020-04-27 18:02:07 +01001078 if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001079 tok = PyTokenizer_FromUTF8(str, exec_input);
1080 } else {
1081 tok = PyTokenizer_FromString(str, exec_input);
1082 }
1083 if (tok == NULL) {
1084 if (PyErr_Occurred()) {
1085 raise_tokenizer_init_error(filename_ob);
1086 }
1087 return NULL;
1088 }
1089 // This transfers the ownership to the tokenizer
1090 tok->filename = filename_ob;
1091 Py_INCREF(filename_ob);
1092
1093 // We need to clear up from here on
1094 mod_ty result = NULL;
1095
Pablo Galindo2b74c832020-04-27 18:02:07 +01001096 int parser_flags = compute_parser_flags(flags);
1097 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, NULL, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001098 if (p == NULL) {
1099 goto error;
1100 }
1101
1102 result = _PyPegen_run_parser(p);
1103 _PyPegen_Parser_Free(p);
1104
1105error:
1106 PyTokenizer_Free(tok);
1107 return result;
1108}
1109
1110void *
1111_PyPegen_interactive_exit(Parser *p)
1112{
1113 if (p->errcode) {
1114 *(p->errcode) = E_EOF;
1115 }
1116 return NULL;
1117}
1118
1119/* Creates a single-element asdl_seq* that contains a */
1120asdl_seq *
1121_PyPegen_singleton_seq(Parser *p, void *a)
1122{
1123 assert(a != NULL);
1124 asdl_seq *seq = _Py_asdl_seq_new(1, p->arena);
1125 if (!seq) {
1126 return NULL;
1127 }
1128 asdl_seq_SET(seq, 0, a);
1129 return seq;
1130}
1131
1132/* Creates a copy of seq and prepends a to it */
1133asdl_seq *
1134_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
1135{
1136 assert(a != NULL);
1137 if (!seq) {
1138 return _PyPegen_singleton_seq(p, a);
1139 }
1140
1141 asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1142 if (!new_seq) {
1143 return NULL;
1144 }
1145
1146 asdl_seq_SET(new_seq, 0, a);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001147 for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001148 asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i - 1));
1149 }
1150 return new_seq;
1151}
1152
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001153static Py_ssize_t
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001154_get_flattened_seq_size(asdl_seq *seqs)
1155{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001156 Py_ssize_t size = 0;
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001157 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1158 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
1159 size += asdl_seq_LEN(inner_seq);
1160 }
1161 return size;
1162}
1163
1164/* Flattens an asdl_seq* of asdl_seq*s */
1165asdl_seq *
1166_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
1167{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001168 Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001169 assert(flattened_seq_size > 0);
1170
1171 asdl_seq *flattened_seq = _Py_asdl_seq_new(flattened_seq_size, p->arena);
1172 if (!flattened_seq) {
1173 return NULL;
1174 }
1175
1176 int flattened_seq_idx = 0;
1177 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1178 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001179 for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001180 asdl_seq_SET(flattened_seq, flattened_seq_idx++, asdl_seq_GET(inner_seq, j));
1181 }
1182 }
1183 assert(flattened_seq_idx == flattened_seq_size);
1184
1185 return flattened_seq;
1186}
1187
1188/* Creates a new name of the form <first_name>.<second_name> */
1189expr_ty
1190_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
1191{
1192 assert(first_name != NULL && second_name != NULL);
1193 PyObject *first_identifier = first_name->v.Name.id;
1194 PyObject *second_identifier = second_name->v.Name.id;
1195
1196 if (PyUnicode_READY(first_identifier) == -1) {
1197 return NULL;
1198 }
1199 if (PyUnicode_READY(second_identifier) == -1) {
1200 return NULL;
1201 }
1202 const char *first_str = PyUnicode_AsUTF8(first_identifier);
1203 if (!first_str) {
1204 return NULL;
1205 }
1206 const char *second_str = PyUnicode_AsUTF8(second_identifier);
1207 if (!second_str) {
1208 return NULL;
1209 }
Pablo Galindo9f27dd32020-04-24 01:13:33 +01001210 Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001211
1212 PyObject *str = PyBytes_FromStringAndSize(NULL, len);
1213 if (!str) {
1214 return NULL;
1215 }
1216
1217 char *s = PyBytes_AS_STRING(str);
1218 if (!s) {
1219 return NULL;
1220 }
1221
1222 strcpy(s, first_str);
1223 s += strlen(first_str);
1224 *s++ = '.';
1225 strcpy(s, second_str);
1226 s += strlen(second_str);
1227 *s = '\0';
1228
1229 PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
1230 Py_DECREF(str);
1231 if (!uni) {
1232 return NULL;
1233 }
1234 PyUnicode_InternInPlace(&uni);
1235 if (PyArena_AddPyObject(p->arena, uni) < 0) {
1236 Py_DECREF(uni);
1237 return NULL;
1238 }
1239
1240 return _Py_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
1241}
1242
1243/* Counts the total number of dots in seq's tokens */
1244int
1245_PyPegen_seq_count_dots(asdl_seq *seq)
1246{
1247 int number_of_dots = 0;
1248 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1249 Token *current_expr = asdl_seq_GET(seq, i);
1250 switch (current_expr->type) {
1251 case ELLIPSIS:
1252 number_of_dots += 3;
1253 break;
1254 case DOT:
1255 number_of_dots += 1;
1256 break;
1257 default:
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001258 Py_UNREACHABLE();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001259 }
1260 }
1261
1262 return number_of_dots;
1263}
1264
1265/* Creates an alias with '*' as the identifier name */
1266alias_ty
1267_PyPegen_alias_for_star(Parser *p)
1268{
1269 PyObject *str = PyUnicode_InternFromString("*");
1270 if (!str) {
1271 return NULL;
1272 }
1273 if (PyArena_AddPyObject(p->arena, str) < 0) {
1274 Py_DECREF(str);
1275 return NULL;
1276 }
1277 return alias(str, NULL, p->arena);
1278}
1279
1280/* Creates a new asdl_seq* with the identifiers of all the names in seq */
1281asdl_seq *
1282_PyPegen_map_names_to_ids(Parser *p, asdl_seq *seq)
1283{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001284 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001285 assert(len > 0);
1286
1287 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1288 if (!new_seq) {
1289 return NULL;
1290 }
1291 for (Py_ssize_t i = 0; i < len; i++) {
1292 expr_ty e = asdl_seq_GET(seq, i);
1293 asdl_seq_SET(new_seq, i, e->v.Name.id);
1294 }
1295 return new_seq;
1296}
1297
1298/* Constructs a CmpopExprPair */
1299CmpopExprPair *
1300_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
1301{
1302 assert(expr != NULL);
1303 CmpopExprPair *a = PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
1304 if (!a) {
1305 return NULL;
1306 }
1307 a->cmpop = cmpop;
1308 a->expr = expr;
1309 return a;
1310}
1311
1312asdl_int_seq *
1313_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
1314{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001315 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001316 assert(len > 0);
1317
1318 asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
1319 if (!new_seq) {
1320 return NULL;
1321 }
1322 for (Py_ssize_t i = 0; i < len; i++) {
1323 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1324 asdl_seq_SET(new_seq, i, pair->cmpop);
1325 }
1326 return new_seq;
1327}
1328
1329asdl_seq *
1330_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
1331{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001332 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001333 assert(len > 0);
1334
1335 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1336 if (!new_seq) {
1337 return NULL;
1338 }
1339 for (Py_ssize_t i = 0; i < len; i++) {
1340 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1341 asdl_seq_SET(new_seq, i, pair->expr);
1342 }
1343 return new_seq;
1344}
1345
1346/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
1347static asdl_seq *
1348_set_seq_context(Parser *p, asdl_seq *seq, expr_context_ty ctx)
1349{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001350 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001351 if (len == 0) {
1352 return NULL;
1353 }
1354
1355 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1356 if (!new_seq) {
1357 return NULL;
1358 }
1359 for (Py_ssize_t i = 0; i < len; i++) {
1360 expr_ty e = asdl_seq_GET(seq, i);
1361 asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
1362 }
1363 return new_seq;
1364}
1365
1366static expr_ty
1367_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
1368{
1369 return _Py_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
1370}
1371
1372static expr_ty
1373_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
1374{
1375 return _Py_Tuple(_set_seq_context(p, e->v.Tuple.elts, ctx), ctx, EXTRA_EXPR(e, e));
1376}
1377
1378static expr_ty
1379_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
1380{
1381 return _Py_List(_set_seq_context(p, e->v.List.elts, ctx), ctx, EXTRA_EXPR(e, e));
1382}
1383
1384static expr_ty
1385_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
1386{
1387 return _Py_Subscript(e->v.Subscript.value, e->v.Subscript.slice, ctx, EXTRA_EXPR(e, e));
1388}
1389
1390static expr_ty
1391_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
1392{
1393 return _Py_Attribute(e->v.Attribute.value, e->v.Attribute.attr, ctx, EXTRA_EXPR(e, e));
1394}
1395
1396static expr_ty
1397_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
1398{
1399 return _Py_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), ctx, EXTRA_EXPR(e, e));
1400}
1401
1402/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
1403expr_ty
1404_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
1405{
1406 assert(expr != NULL);
1407
1408 expr_ty new = NULL;
1409 switch (expr->kind) {
1410 case Name_kind:
1411 new = _set_name_context(p, expr, ctx);
1412 break;
1413 case Tuple_kind:
1414 new = _set_tuple_context(p, expr, ctx);
1415 break;
1416 case List_kind:
1417 new = _set_list_context(p, expr, ctx);
1418 break;
1419 case Subscript_kind:
1420 new = _set_subscript_context(p, expr, ctx);
1421 break;
1422 case Attribute_kind:
1423 new = _set_attribute_context(p, expr, ctx);
1424 break;
1425 case Starred_kind:
1426 new = _set_starred_context(p, expr, ctx);
1427 break;
1428 default:
1429 new = expr;
1430 }
1431 return new;
1432}
1433
1434/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
1435KeyValuePair *
1436_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
1437{
1438 KeyValuePair *a = PyArena_Malloc(p->arena, sizeof(KeyValuePair));
1439 if (!a) {
1440 return NULL;
1441 }
1442 a->key = key;
1443 a->value = value;
1444 return a;
1445}
1446
1447/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
1448asdl_seq *
1449_PyPegen_get_keys(Parser *p, asdl_seq *seq)
1450{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001451 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001452 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1453 if (!new_seq) {
1454 return NULL;
1455 }
1456 for (Py_ssize_t i = 0; i < len; i++) {
1457 KeyValuePair *pair = asdl_seq_GET(seq, i);
1458 asdl_seq_SET(new_seq, i, pair->key);
1459 }
1460 return new_seq;
1461}
1462
1463/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
1464asdl_seq *
1465_PyPegen_get_values(Parser *p, asdl_seq *seq)
1466{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001467 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001468 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1469 if (!new_seq) {
1470 return NULL;
1471 }
1472 for (Py_ssize_t i = 0; i < len; i++) {
1473 KeyValuePair *pair = asdl_seq_GET(seq, i);
1474 asdl_seq_SET(new_seq, i, pair->value);
1475 }
1476 return new_seq;
1477}
1478
1479/* Constructs a NameDefaultPair */
1480NameDefaultPair *
1481_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value)
1482{
1483 NameDefaultPair *a = PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
1484 if (!a) {
1485 return NULL;
1486 }
1487 a->arg = arg;
1488 a->value = value;
1489 return a;
1490}
1491
1492/* Constructs a SlashWithDefault */
1493SlashWithDefault *
1494_PyPegen_slash_with_default(Parser *p, asdl_seq *plain_names, asdl_seq *names_with_defaults)
1495{
1496 SlashWithDefault *a = PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
1497 if (!a) {
1498 return NULL;
1499 }
1500 a->plain_names = plain_names;
1501 a->names_with_defaults = names_with_defaults;
1502 return a;
1503}
1504
1505/* Constructs a StarEtc */
1506StarEtc *
1507_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
1508{
1509 StarEtc *a = PyArena_Malloc(p->arena, sizeof(StarEtc));
1510 if (!a) {
1511 return NULL;
1512 }
1513 a->vararg = vararg;
1514 a->kwonlyargs = kwonlyargs;
1515 a->kwarg = kwarg;
1516 return a;
1517}
1518
1519asdl_seq *
1520_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
1521{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001522 Py_ssize_t first_len = asdl_seq_LEN(a);
1523 Py_ssize_t second_len = asdl_seq_LEN(b);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001524 asdl_seq *new_seq = _Py_asdl_seq_new(first_len + second_len, p->arena);
1525 if (!new_seq) {
1526 return NULL;
1527 }
1528
1529 int k = 0;
1530 for (Py_ssize_t i = 0; i < first_len; i++) {
1531 asdl_seq_SET(new_seq, k++, asdl_seq_GET(a, i));
1532 }
1533 for (Py_ssize_t i = 0; i < second_len; i++) {
1534 asdl_seq_SET(new_seq, k++, asdl_seq_GET(b, i));
1535 }
1536
1537 return new_seq;
1538}
1539
1540static asdl_seq *
1541_get_names(Parser *p, asdl_seq *names_with_defaults)
1542{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001543 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001544 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1545 if (!seq) {
1546 return NULL;
1547 }
1548 for (Py_ssize_t i = 0; i < len; i++) {
1549 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1550 asdl_seq_SET(seq, i, pair->arg);
1551 }
1552 return seq;
1553}
1554
1555static asdl_seq *
1556_get_defaults(Parser *p, asdl_seq *names_with_defaults)
1557{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001558 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001559 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1560 if (!seq) {
1561 return NULL;
1562 }
1563 for (Py_ssize_t i = 0; i < len; i++) {
1564 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1565 asdl_seq_SET(seq, i, pair->value);
1566 }
1567 return seq;
1568}
1569
1570/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
1571arguments_ty
1572_PyPegen_make_arguments(Parser *p, asdl_seq *slash_without_default,
1573 SlashWithDefault *slash_with_default, asdl_seq *plain_names,
1574 asdl_seq *names_with_default, StarEtc *star_etc)
1575{
1576 asdl_seq *posonlyargs;
1577 if (slash_without_default != NULL) {
1578 posonlyargs = slash_without_default;
1579 }
1580 else if (slash_with_default != NULL) {
1581 asdl_seq *slash_with_default_names =
1582 _get_names(p, slash_with_default->names_with_defaults);
1583 if (!slash_with_default_names) {
1584 return NULL;
1585 }
1586 posonlyargs = _PyPegen_join_sequences(p, slash_with_default->plain_names, slash_with_default_names);
1587 if (!posonlyargs) {
1588 return NULL;
1589 }
1590 }
1591 else {
1592 posonlyargs = _Py_asdl_seq_new(0, p->arena);
1593 if (!posonlyargs) {
1594 return NULL;
1595 }
1596 }
1597
1598 asdl_seq *posargs;
1599 if (plain_names != NULL && names_with_default != NULL) {
1600 asdl_seq *names_with_default_names = _get_names(p, names_with_default);
1601 if (!names_with_default_names) {
1602 return NULL;
1603 }
1604 posargs = _PyPegen_join_sequences(p, plain_names, names_with_default_names);
1605 if (!posargs) {
1606 return NULL;
1607 }
1608 }
1609 else if (plain_names == NULL && names_with_default != NULL) {
1610 posargs = _get_names(p, names_with_default);
1611 if (!posargs) {
1612 return NULL;
1613 }
1614 }
1615 else if (plain_names != NULL && names_with_default == NULL) {
1616 posargs = plain_names;
1617 }
1618 else {
1619 posargs = _Py_asdl_seq_new(0, p->arena);
1620 if (!posargs) {
1621 return NULL;
1622 }
1623 }
1624
1625 asdl_seq *posdefaults;
1626 if (slash_with_default != NULL && names_with_default != NULL) {
1627 asdl_seq *slash_with_default_values =
1628 _get_defaults(p, slash_with_default->names_with_defaults);
1629 if (!slash_with_default_values) {
1630 return NULL;
1631 }
1632 asdl_seq *names_with_default_values = _get_defaults(p, names_with_default);
1633 if (!names_with_default_values) {
1634 return NULL;
1635 }
1636 posdefaults = _PyPegen_join_sequences(p, slash_with_default_values, names_with_default_values);
1637 if (!posdefaults) {
1638 return NULL;
1639 }
1640 }
1641 else if (slash_with_default == NULL && names_with_default != NULL) {
1642 posdefaults = _get_defaults(p, names_with_default);
1643 if (!posdefaults) {
1644 return NULL;
1645 }
1646 }
1647 else if (slash_with_default != NULL && names_with_default == NULL) {
1648 posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
1649 if (!posdefaults) {
1650 return NULL;
1651 }
1652 }
1653 else {
1654 posdefaults = _Py_asdl_seq_new(0, p->arena);
1655 if (!posdefaults) {
1656 return NULL;
1657 }
1658 }
1659
1660 arg_ty vararg = NULL;
1661 if (star_etc != NULL && star_etc->vararg != NULL) {
1662 vararg = star_etc->vararg;
1663 }
1664
1665 asdl_seq *kwonlyargs;
1666 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1667 kwonlyargs = _get_names(p, star_etc->kwonlyargs);
1668 if (!kwonlyargs) {
1669 return NULL;
1670 }
1671 }
1672 else {
1673 kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1674 if (!kwonlyargs) {
1675 return NULL;
1676 }
1677 }
1678
1679 asdl_seq *kwdefaults;
1680 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1681 kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
1682 if (!kwdefaults) {
1683 return NULL;
1684 }
1685 }
1686 else {
1687 kwdefaults = _Py_asdl_seq_new(0, p->arena);
1688 if (!kwdefaults) {
1689 return NULL;
1690 }
1691 }
1692
1693 arg_ty kwarg = NULL;
1694 if (star_etc != NULL && star_etc->kwarg != NULL) {
1695 kwarg = star_etc->kwarg;
1696 }
1697
1698 return _Py_arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg,
1699 posdefaults, p->arena);
1700}
1701
1702/* Constructs an empty arguments_ty object, that gets used when a function accepts no
1703 * arguments. */
1704arguments_ty
1705_PyPegen_empty_arguments(Parser *p)
1706{
1707 asdl_seq *posonlyargs = _Py_asdl_seq_new(0, p->arena);
1708 if (!posonlyargs) {
1709 return NULL;
1710 }
1711 asdl_seq *posargs = _Py_asdl_seq_new(0, p->arena);
1712 if (!posargs) {
1713 return NULL;
1714 }
1715 asdl_seq *posdefaults = _Py_asdl_seq_new(0, p->arena);
1716 if (!posdefaults) {
1717 return NULL;
1718 }
1719 asdl_seq *kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1720 if (!kwonlyargs) {
1721 return NULL;
1722 }
1723 asdl_seq *kwdefaults = _Py_asdl_seq_new(0, p->arena);
1724 if (!kwdefaults) {
1725 return NULL;
1726 }
1727
1728 return _Py_arguments(posonlyargs, posargs, NULL, kwonlyargs, kwdefaults, NULL, kwdefaults,
1729 p->arena);
1730}
1731
1732/* Encapsulates the value of an operator_ty into an AugOperator struct */
1733AugOperator *
1734_PyPegen_augoperator(Parser *p, operator_ty kind)
1735{
1736 AugOperator *a = PyArena_Malloc(p->arena, sizeof(AugOperator));
1737 if (!a) {
1738 return NULL;
1739 }
1740 a->kind = kind;
1741 return a;
1742}
1743
1744/* Construct a FunctionDef equivalent to function_def, but with decorators */
1745stmt_ty
1746_PyPegen_function_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty function_def)
1747{
1748 assert(function_def != NULL);
1749 if (function_def->kind == AsyncFunctionDef_kind) {
1750 return _Py_AsyncFunctionDef(
1751 function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1752 function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
1753 function_def->v.FunctionDef.type_comment, function_def->lineno,
1754 function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
1755 p->arena);
1756 }
1757
1758 return _Py_FunctionDef(function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1759 function_def->v.FunctionDef.body, decorators,
1760 function_def->v.FunctionDef.returns,
1761 function_def->v.FunctionDef.type_comment, function_def->lineno,
1762 function_def->col_offset, function_def->end_lineno,
1763 function_def->end_col_offset, p->arena);
1764}
1765
1766/* Construct a ClassDef equivalent to class_def, but with decorators */
1767stmt_ty
1768_PyPegen_class_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty class_def)
1769{
1770 assert(class_def != NULL);
1771 return _Py_ClassDef(class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
1772 class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
1773 class_def->lineno, class_def->col_offset, class_def->end_lineno,
1774 class_def->end_col_offset, p->arena);
1775}
1776
1777/* Construct a KeywordOrStarred */
1778KeywordOrStarred *
1779_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
1780{
1781 KeywordOrStarred *a = PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
1782 if (!a) {
1783 return NULL;
1784 }
1785 a->element = element;
1786 a->is_keyword = is_keyword;
1787 return a;
1788}
1789
1790/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
1791static int
1792_seq_number_of_starred_exprs(asdl_seq *seq)
1793{
1794 int n = 0;
1795 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1796 KeywordOrStarred *k = asdl_seq_GET(seq, i);
1797 if (!k->is_keyword) {
1798 n++;
1799 }
1800 }
1801 return n;
1802}
1803
1804/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
1805asdl_seq *
1806_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
1807{
1808 int new_len = _seq_number_of_starred_exprs(kwargs);
1809 if (new_len == 0) {
1810 return NULL;
1811 }
1812 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1813 if (!new_seq) {
1814 return NULL;
1815 }
1816
1817 int idx = 0;
1818 for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
1819 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1820 if (!k->is_keyword) {
1821 asdl_seq_SET(new_seq, idx++, k->element);
1822 }
1823 }
1824 return new_seq;
1825}
1826
1827/* Return a new asdl_seq* with only the keywords in kwargs */
1828asdl_seq *
1829_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
1830{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001831 Py_ssize_t len = asdl_seq_LEN(kwargs);
1832 Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001833 if (new_len == 0) {
1834 return NULL;
1835 }
1836 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1837 if (!new_seq) {
1838 return NULL;
1839 }
1840
1841 int idx = 0;
1842 for (Py_ssize_t i = 0; i < len; i++) {
1843 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1844 if (k->is_keyword) {
1845 asdl_seq_SET(new_seq, idx++, k->element);
1846 }
1847 }
1848 return new_seq;
1849}
1850
1851expr_ty
1852_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
1853{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001854 Py_ssize_t len = asdl_seq_LEN(strings);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001855 assert(len > 0);
1856
1857 Token *first = asdl_seq_GET(strings, 0);
1858 Token *last = asdl_seq_GET(strings, len - 1);
1859
1860 int bytesmode = 0;
1861 PyObject *bytes_str = NULL;
1862
1863 FstringParser state;
1864 _PyPegen_FstringParser_Init(&state);
1865
1866 for (Py_ssize_t i = 0; i < len; i++) {
1867 Token *t = asdl_seq_GET(strings, i);
1868
1869 int this_bytesmode;
1870 int this_rawmode;
1871 PyObject *s;
1872 const char *fstr;
1873 Py_ssize_t fstrlen = -1;
1874
1875 char *this_str = PyBytes_AsString(t->bytes);
1876 if (!this_str) {
1877 goto error;
1878 }
1879
1880 if (_PyPegen_parsestr(p, this_str, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen) != 0) {
1881 goto error;
1882 }
1883
1884 /* Check that we are not mixing bytes with unicode. */
1885 if (i != 0 && bytesmode != this_bytesmode) {
1886 RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1887 Py_XDECREF(s);
1888 goto error;
1889 }
1890 bytesmode = this_bytesmode;
1891
1892 if (fstr != NULL) {
1893 assert(s == NULL && !bytesmode);
1894
1895 int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
1896 this_rawmode, 0, first, t, last);
1897 if (result < 0) {
1898 goto error;
1899 }
1900 }
1901 else {
1902 /* String or byte string. */
1903 assert(s != NULL && fstr == NULL);
1904 assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
1905
1906 if (bytesmode) {
1907 if (i == 0) {
1908 bytes_str = s;
1909 }
1910 else {
1911 PyBytes_ConcatAndDel(&bytes_str, s);
1912 if (!bytes_str) {
1913 goto error;
1914 }
1915 }
1916 }
1917 else {
1918 /* This is a regular string. Concatenate it. */
1919 if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
1920 goto error;
1921 }
1922 }
1923 }
1924 }
1925
1926 if (bytesmode) {
1927 if (PyArena_AddPyObject(p->arena, bytes_str) < 0) {
1928 goto error;
1929 }
1930 return Constant(bytes_str, NULL, first->lineno, first->col_offset, last->end_lineno,
1931 last->end_col_offset, p->arena);
1932 }
1933
1934 return _PyPegen_FstringParser_Finish(p, &state, first, last);
1935
1936error:
1937 Py_XDECREF(bytes_str);
1938 _PyPegen_FstringParser_Dealloc(&state);
1939 if (PyErr_Occurred()) {
1940 raise_decode_error(p);
1941 }
1942 return NULL;
1943}