blob: 39da2709991b91b62d48b56035c8db49a6d3f389 [file] [log] [blame]
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001#include <Python.h>
2#include <errcode.h>
3#include "../tokenizer.h"
4
5#include "pegen.h"
6#include "parse_string.h"
7
8static int
9init_normalization(Parser *p)
10{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030011 if (p->normalize) {
12 return 1;
13 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +010014 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
15 if (!m)
16 {
17 return 0;
18 }
19 p->normalize = PyObject_GetAttrString(m, "normalize");
20 Py_DECREF(m);
21 if (!p->normalize)
22 {
23 return 0;
24 }
25 return 1;
26}
27
Pablo Galindo2b74c832020-04-27 18:02:07 +010028/* Checks if the NOTEQUAL token is valid given the current parser flags
290 indicates success and nonzero indicates failure (an exception may be set) */
30int
31_PyPegen_check_barry_as_flufl(Parser *p) {
32 Token *t = p->tokens[p->fill - 1];
33 assert(t->bytes != NULL);
34 assert(t->type == NOTEQUAL);
35
36 char* tok_str = PyBytes_AS_STRING(t->bytes);
37 if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>")){
38 RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
39 return -1;
40 } else if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
41 return strcmp(tok_str, "!=");
42 }
43 return 0;
44}
45
Pablo Galindoc5fc1562020-04-22 23:29:27 +010046PyObject *
47_PyPegen_new_identifier(Parser *p, char *n)
48{
49 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
50 if (!id) {
51 goto error;
52 }
53 /* PyUnicode_DecodeUTF8 should always return a ready string. */
54 assert(PyUnicode_IS_READY(id));
55 /* Check whether there are non-ASCII characters in the
56 identifier; if so, normalize to NFKC. */
57 if (!PyUnicode_IS_ASCII(id))
58 {
59 PyObject *id2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030060 if (!init_normalization(p))
Pablo Galindoc5fc1562020-04-22 23:29:27 +010061 {
62 Py_DECREF(id);
63 goto error;
64 }
65 PyObject *form = PyUnicode_InternFromString("NFKC");
66 if (form == NULL)
67 {
68 Py_DECREF(id);
69 goto error;
70 }
71 PyObject *args[2] = {form, id};
72 id2 = _PyObject_FastCall(p->normalize, args, 2);
73 Py_DECREF(id);
74 Py_DECREF(form);
75 if (!id2) {
76 goto error;
77 }
78 if (!PyUnicode_Check(id2))
79 {
80 PyErr_Format(PyExc_TypeError,
81 "unicodedata.normalize() must return a string, not "
82 "%.200s",
83 _PyType_Name(Py_TYPE(id2)));
84 Py_DECREF(id2);
85 goto error;
86 }
87 id = id2;
88 }
89 PyUnicode_InternInPlace(&id);
90 if (PyArena_AddPyObject(p->arena, id) < 0)
91 {
92 Py_DECREF(id);
93 goto error;
94 }
95 return id;
96
97error:
98 p->error_indicator = 1;
99 return NULL;
100}
101
102static PyObject *
103_create_dummy_identifier(Parser *p)
104{
105 return _PyPegen_new_identifier(p, "");
106}
107
108static inline Py_ssize_t
109byte_offset_to_character_offset(PyObject *line, int col_offset)
110{
111 const char *str = PyUnicode_AsUTF8(line);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300112 if (!str) {
113 return 0;
114 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100115 PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
116 if (!text) {
117 return 0;
118 }
119 Py_ssize_t size = PyUnicode_GET_LENGTH(text);
120 Py_DECREF(text);
121 return size;
122}
123
124const char *
125_PyPegen_get_expr_name(expr_ty e)
126{
127 switch (e->kind) {
128 case Attribute_kind:
129 return "attribute";
130 case Subscript_kind:
131 return "subscript";
132 case Starred_kind:
133 return "starred";
134 case Name_kind:
135 return "name";
136 case List_kind:
137 return "list";
138 case Tuple_kind:
139 return "tuple";
140 case Lambda_kind:
141 return "lambda";
142 case Call_kind:
143 return "function call";
144 case BoolOp_kind:
145 case BinOp_kind:
146 case UnaryOp_kind:
147 return "operator";
148 case GeneratorExp_kind:
149 return "generator expression";
150 case Yield_kind:
151 case YieldFrom_kind:
152 return "yield expression";
153 case Await_kind:
154 return "await expression";
155 case ListComp_kind:
156 return "list comprehension";
157 case SetComp_kind:
158 return "set comprehension";
159 case DictComp_kind:
160 return "dict comprehension";
161 case Dict_kind:
162 return "dict display";
163 case Set_kind:
164 return "set display";
165 case JoinedStr_kind:
166 case FormattedValue_kind:
167 return "f-string expression";
168 case Constant_kind: {
169 PyObject *value = e->v.Constant.value;
170 if (value == Py_None) {
171 return "None";
172 }
173 if (value == Py_False) {
174 return "False";
175 }
176 if (value == Py_True) {
177 return "True";
178 }
179 if (value == Py_Ellipsis) {
180 return "Ellipsis";
181 }
182 return "literal";
183 }
184 case Compare_kind:
185 return "comparison";
186 case IfExp_kind:
187 return "conditional expression";
188 case NamedExpr_kind:
189 return "named expression";
190 default:
191 PyErr_Format(PyExc_SystemError,
192 "unexpected expression in assignment %d (line %d)",
193 e->kind, e->lineno);
194 return NULL;
195 }
196}
197
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300198static int
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100199raise_decode_error(Parser *p)
200{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300201 assert(PyErr_Occurred());
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100202 const char *errtype = NULL;
203 if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
204 errtype = "unicode error";
205 }
206 else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
207 errtype = "value error";
208 }
209 if (errtype) {
210 PyObject *type, *value, *tback, *errstr;
211 PyErr_Fetch(&type, &value, &tback);
212 errstr = PyObject_Str(value);
213 if (errstr) {
214 RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
215 Py_DECREF(errstr);
216 }
217 else {
218 PyErr_Clear();
219 RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
220 }
221 Py_XDECREF(type);
222 Py_XDECREF(value);
223 Py_XDECREF(tback);
224 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300225
226 return -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100227}
228
229static void
230raise_tokenizer_init_error(PyObject *filename)
231{
232 if (!(PyErr_ExceptionMatches(PyExc_LookupError)
233 || PyErr_ExceptionMatches(PyExc_ValueError)
234 || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
235 return;
236 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300237 PyObject *errstr = NULL;
238 PyObject *tuple = NULL;
239 PyObject *type, *value, *tback;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100240 PyErr_Fetch(&type, &value, &tback);
241 errstr = PyObject_Str(value);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300242 if (!errstr) {
243 goto error;
244 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100245
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300246 PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100247 if (!tmp) {
248 goto error;
249 }
250
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300251 tuple = PyTuple_Pack(2, errstr, tmp);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100252 Py_DECREF(tmp);
253 if (!value) {
254 goto error;
255 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300256 PyErr_SetObject(PyExc_SyntaxError, tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100257
258error:
259 Py_XDECREF(type);
260 Py_XDECREF(value);
261 Py_XDECREF(tback);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300262 Py_XDECREF(errstr);
263 Py_XDECREF(tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100264}
265
266static inline PyObject *
267get_error_line(char *buffer)
268{
269 char *newline = strchr(buffer, '\n');
270 if (newline) {
271 return PyUnicode_FromStringAndSize(buffer, newline - buffer);
272 }
273 else {
274 return PyUnicode_FromString(buffer);
275 }
276}
277
278static int
279tokenizer_error_with_col_offset(Parser *p, PyObject *errtype, const char *errmsg)
280{
281 PyObject *errstr = NULL;
282 PyObject *value = NULL;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100283 size_t col_number = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100284
285 errstr = PyUnicode_FromString(errmsg);
286 if (!errstr) {
287 return -1;
288 }
289
290 PyObject *loc = NULL;
291 if (p->start_rule == Py_file_input) {
292 loc = PyErr_ProgramTextObject(p->tok->filename, p->tok->lineno);
293 }
294 if (!loc) {
295 loc = get_error_line(p->tok->buf);
296 }
297
298 if (loc) {
299 col_number = p->tok->cur - p->tok->buf;
300 }
301 else {
302 Py_INCREF(Py_None);
303 loc = Py_None;
304 }
305
306 PyObject *tmp = Py_BuildValue("(OiiN)", p->tok->filename, p->tok->lineno,
307 col_number, loc);
308 if (!tmp) {
309 goto error;
310 }
311
312 value = PyTuple_Pack(2, errstr, tmp);
313 Py_DECREF(tmp);
314 if (!value) {
315 goto error;
316 }
317 PyErr_SetObject(errtype, value);
318
319 Py_XDECREF(value);
320 Py_XDECREF(errstr);
321 return -1;
322
323error:
324 Py_XDECREF(errstr);
325 Py_XDECREF(loc);
326 return -1;
327}
328
329static int
330tokenizer_error(Parser *p)
331{
332 if (PyErr_Occurred()) {
333 return -1;
334 }
335
336 const char *msg = NULL;
337 PyObject* errtype = PyExc_SyntaxError;
338 switch (p->tok->done) {
339 case E_TOKEN:
340 msg = "invalid token";
341 break;
342 case E_IDENTIFIER:
343 msg = "invalid character in identifier";
344 break;
345 case E_BADPREFIX:
346 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300347 errtype, "invalid string prefix");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100348 case E_EOFS:
349 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300350 errtype, "EOF while scanning triple-quoted string literal");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100351 case E_EOLS:
352 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300353 errtype, "EOL while scanning string literal");
354 case E_EOF:
355 return tokenizer_error_with_col_offset(p,
356 errtype, "unexpected EOF while parsing");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100357 case E_DEDENT:
358 return tokenizer_error_with_col_offset(p,
359 PyExc_IndentationError, "unindent does not match any outer indentation level");
360 case E_INTR:
361 if (!PyErr_Occurred()) {
362 PyErr_SetNone(PyExc_KeyboardInterrupt);
363 }
364 return -1;
365 case E_NOMEM:
366 PyErr_NoMemory();
367 return -1;
368 case E_TABSPACE:
369 errtype = PyExc_TabError;
370 msg = "inconsistent use of tabs and spaces in indentation";
371 break;
372 case E_TOODEEP:
373 errtype = PyExc_IndentationError;
374 msg = "too many levels of indentation";
375 break;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100376 case E_LINECONT:
377 msg = "unexpected character after line continuation character";
378 break;
379 default:
380 msg = "unknown parsing error";
381 }
382
383 PyErr_Format(errtype, msg);
384 // There is no reliable column information for this error
385 PyErr_SyntaxLocationObject(p->tok->filename, p->tok->lineno, 0);
386
387 return -1;
388}
389
390void *
391_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
392{
393 PyObject *value = NULL;
394 PyObject *errstr = NULL;
395 PyObject *loc = NULL;
396 PyObject *tmp = NULL;
397 Token *t = p->tokens[p->fill - 1];
398 Py_ssize_t col_number = 0;
399 va_list va;
400
401 va_start(va, errmsg);
402 errstr = PyUnicode_FromFormatV(errmsg, va);
403 va_end(va);
404 if (!errstr) {
405 goto error;
406 }
407
408 if (p->start_rule == Py_file_input) {
409 loc = PyErr_ProgramTextObject(p->tok->filename, t->lineno);
410 }
411
412 if (!loc) {
413 loc = get_error_line(p->tok->buf);
414 }
415
416 if (loc) {
417 int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
418 col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
419 }
420 else {
421 Py_INCREF(Py_None);
422 loc = Py_None;
423 }
424
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100425 tmp = Py_BuildValue("(OiiN)", p->tok->filename, t->lineno, col_number, loc);
426 if (!tmp) {
427 goto error;
428 }
429 value = PyTuple_Pack(2, errstr, tmp);
430 Py_DECREF(tmp);
431 if (!value) {
432 goto error;
433 }
434 PyErr_SetObject(errtype, value);
435
436 Py_DECREF(errstr);
437 Py_DECREF(value);
438 return NULL;
439
440error:
441 Py_XDECREF(errstr);
442 Py_XDECREF(loc);
443 return NULL;
444}
445
446void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
447 int kwarg_unpacking = 0;
448 for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
449 keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
450 if (!keyword->arg) {
451 kwarg_unpacking = 1;
452 }
453 }
454
455 const char *msg = NULL;
456 if (kwarg_unpacking) {
457 msg = "positional argument follows keyword argument unpacking";
458 } else {
459 msg = "positional argument follows keyword argument";
460 }
461
462 return RAISE_SYNTAX_ERROR(msg);
463}
464
465#if 0
466static const char *
467token_name(int type)
468{
469 if (0 <= type && type <= N_TOKENS) {
470 return _PyParser_TokenNames[type];
471 }
472 return "<Huh?>";
473}
474#endif
475
476// Here, mark is the start of the node, while p->mark is the end.
477// If node==NULL, they should be the same.
478int
479_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
480{
481 // Insert in front
482 Memo *m = PyArena_Malloc(p->arena, sizeof(Memo));
483 if (m == NULL) {
484 return -1;
485 }
486 m->type = type;
487 m->node = node;
488 m->mark = p->mark;
489 m->next = p->tokens[mark]->memo;
490 p->tokens[mark]->memo = m;
491 return 0;
492}
493
494// Like _PyPegen_insert_memo(), but updates an existing node if found.
495int
496_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
497{
498 for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
499 if (m->type == type) {
500 // Update existing node.
501 m->node = node;
502 m->mark = p->mark;
503 return 0;
504 }
505 }
506 // Insert new node.
507 return _PyPegen_insert_memo(p, mark, type, node);
508}
509
510// Return dummy NAME.
511void *
512_PyPegen_dummy_name(Parser *p, ...)
513{
514 static void *cache = NULL;
515
516 if (cache != NULL) {
517 return cache;
518 }
519
520 PyObject *id = _create_dummy_identifier(p);
521 if (!id) {
522 return NULL;
523 }
524 cache = Name(id, Load, 1, 0, 1, 0, p->arena);
525 return cache;
526}
527
528static int
529_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
530{
531 if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL) {
532 return NAME;
533 }
534 for (KeywordToken *k = p->keywords[name_len]; k->type != -1; k++) {
535 if (strncmp(k->str, name, name_len) == 0) {
536 return k->type;
537 }
538 }
539 return NAME;
540}
541
542int
543_PyPegen_fill_token(Parser *p)
544{
545 const char *start, *end;
546 int type = PyTokenizer_Get(p->tok, &start, &end);
547 if (type == ERRORTOKEN) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300548 if (p->tok->done == E_DECODE) {
549 return raise_decode_error(p);
550 }
551 else {
552 return tokenizer_error(p);
553 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100554 }
555 if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
556 type = NEWLINE; /* Add an extra newline */
557 p->parsing_started = 0;
558
Pablo Galindob94dbd72020-04-27 18:35:58 +0100559 if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100560 p->tok->pendin = -p->tok->indent;
561 p->tok->indent = 0;
562 }
563 }
564 else {
565 p->parsing_started = 1;
566 }
567
568 if (p->fill == p->size) {
569 int newsize = p->size * 2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300570 Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
571 if (new_tokens == NULL) {
572 PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100573 return -1;
574 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300575 else {
576 p->tokens = new_tokens;
577 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100578 for (int i = p->size; i < newsize; i++) {
579 p->tokens[i] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300580 if (p->tokens[i] == NULL) {
581 p->size = i; // Needed, in order to cleanup correctly after parser fails
582 PyErr_NoMemory();
583 return -1;
584 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100585 memset(p->tokens[i], '\0', sizeof(Token));
586 }
587 p->size = newsize;
588 }
589
590 Token *t = p->tokens[p->fill];
591 t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
592 t->bytes = PyBytes_FromStringAndSize(start, end - start);
593 if (t->bytes == NULL) {
594 return -1;
595 }
596 PyArena_AddPyObject(p->arena, t->bytes);
597
598 int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
599 const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
Pablo Galindo22081342020-04-29 02:04:06 +0100600 int end_lineno = p->tok->lineno;
601 int col_offset = -1, end_col_offset = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100602 if (start != NULL && start >= line_start) {
Pablo Galindo22081342020-04-29 02:04:06 +0100603 col_offset = (int)(start - line_start);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100604 }
605 if (end != NULL && end >= p->tok->line_start) {
Pablo Galindo22081342020-04-29 02:04:06 +0100606 end_col_offset = (int)(end - p->tok->line_start);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100607 }
608
609 t->lineno = p->starting_lineno + lineno;
610 t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
611 t->end_lineno = p->starting_lineno + end_lineno;
612 t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
613
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100614 p->fill += 1;
615 return 0;
616}
617
618// Instrumentation to count the effectiveness of memoization.
619// The array counts the number of tokens skipped by memoization,
620// indexed by type.
621
622#define NSTATISTICS 2000
623static long memo_statistics[NSTATISTICS];
624
625void
626_PyPegen_clear_memo_statistics()
627{
628 for (int i = 0; i < NSTATISTICS; i++) {
629 memo_statistics[i] = 0;
630 }
631}
632
633PyObject *
634_PyPegen_get_memo_statistics()
635{
636 PyObject *ret = PyList_New(NSTATISTICS);
637 if (ret == NULL) {
638 return NULL;
639 }
640 for (int i = 0; i < NSTATISTICS; i++) {
641 PyObject *value = PyLong_FromLong(memo_statistics[i]);
642 if (value == NULL) {
643 Py_DECREF(ret);
644 return NULL;
645 }
646 // PyList_SetItem borrows a reference to value.
647 if (PyList_SetItem(ret, i, value) < 0) {
648 Py_DECREF(ret);
649 return NULL;
650 }
651 }
652 return ret;
653}
654
655int // bool
656_PyPegen_is_memoized(Parser *p, int type, void *pres)
657{
658 if (p->mark == p->fill) {
659 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300660 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100661 return -1;
662 }
663 }
664
665 Token *t = p->tokens[p->mark];
666
667 for (Memo *m = t->memo; m != NULL; m = m->next) {
668 if (m->type == type) {
669 if (0 <= type && type < NSTATISTICS) {
670 long count = m->mark - p->mark;
671 // A memoized negative result counts for one.
672 if (count <= 0) {
673 count = 1;
674 }
675 memo_statistics[type] += count;
676 }
677 p->mark = m->mark;
678 *(void **)(pres) = m->node;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100679 return 1;
680 }
681 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100682 return 0;
683}
684
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100685
686int
687_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
688{
689 int mark = p->mark;
690 void *res = func(p);
691 p->mark = mark;
692 return (res != NULL) == positive;
693}
694
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100695int
696_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
697 const char *arg)
698{
699 int mark = p->mark;
700 void *res = func(p, arg);
701 p->mark = mark;
702 return (res != NULL) == positive;
703}
704
705int
706_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
707{
708 int mark = p->mark;
709 void *res = func(p, arg);
710 p->mark = mark;
711 return (res != NULL) == positive;
712}
713
714int
715_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
716{
717 int mark = p->mark;
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100718 void *res = (void*)func(p);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100719 p->mark = mark;
720 return (res != NULL) == positive;
721}
722
723Token *
724_PyPegen_expect_token(Parser *p, int type)
725{
726 if (p->mark == p->fill) {
727 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300728 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100729 return NULL;
730 }
731 }
732 Token *t = p->tokens[p->mark];
733 if (t->type != type) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100734 return NULL;
735 }
736 p->mark += 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100737 return t;
738}
739
740Token *
741_PyPegen_get_last_nonnwhitespace_token(Parser *p)
742{
743 assert(p->mark >= 0);
744 Token *token = NULL;
745 for (int m = p->mark - 1; m >= 0; m--) {
746 token = p->tokens[m];
747 if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
748 break;
749 }
750 }
751 return token;
752}
753
754void *
755_PyPegen_async_token(Parser *p)
756{
757 return _PyPegen_expect_token(p, ASYNC);
758}
759
760void *
761_PyPegen_await_token(Parser *p)
762{
763 return _PyPegen_expect_token(p, AWAIT);
764}
765
766void *
767_PyPegen_endmarker_token(Parser *p)
768{
769 return _PyPegen_expect_token(p, ENDMARKER);
770}
771
772expr_ty
773_PyPegen_name_token(Parser *p)
774{
775 Token *t = _PyPegen_expect_token(p, NAME);
776 if (t == NULL) {
777 return NULL;
778 }
779 char* s = PyBytes_AsString(t->bytes);
780 if (!s) {
781 return NULL;
782 }
783 PyObject *id = _PyPegen_new_identifier(p, s);
784 if (id == NULL) {
785 return NULL;
786 }
787 return Name(id, Load, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
788 p->arena);
789}
790
791void *
792_PyPegen_string_token(Parser *p)
793{
794 return _PyPegen_expect_token(p, STRING);
795}
796
797void *
798_PyPegen_newline_token(Parser *p)
799{
800 return _PyPegen_expect_token(p, NEWLINE);
801}
802
803void *
804_PyPegen_indent_token(Parser *p)
805{
806 return _PyPegen_expect_token(p, INDENT);
807}
808
809void *
810_PyPegen_dedent_token(Parser *p)
811{
812 return _PyPegen_expect_token(p, DEDENT);
813}
814
815static PyObject *
816parsenumber_raw(const char *s)
817{
818 const char *end;
819 long x;
820 double dx;
821 Py_complex compl;
822 int imflag;
823
824 assert(s != NULL);
825 errno = 0;
826 end = s + strlen(s) - 1;
827 imflag = *end == 'j' || *end == 'J';
828 if (s[0] == '0') {
829 x = (long)PyOS_strtoul(s, (char **)&end, 0);
830 if (x < 0 && errno == 0) {
831 return PyLong_FromString(s, (char **)0, 0);
832 }
833 }
834 else
835 x = PyOS_strtol(s, (char **)&end, 0);
836 if (*end == '\0') {
837 if (errno != 0)
838 return PyLong_FromString(s, (char **)0, 0);
839 return PyLong_FromLong(x);
840 }
841 /* XXX Huge floats may silently fail */
842 if (imflag) {
843 compl.real = 0.;
844 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
845 if (compl.imag == -1.0 && PyErr_Occurred())
846 return NULL;
847 return PyComplex_FromCComplex(compl);
848 }
849 else {
850 dx = PyOS_string_to_double(s, NULL, NULL);
851 if (dx == -1.0 && PyErr_Occurred())
852 return NULL;
853 return PyFloat_FromDouble(dx);
854 }
855}
856
857static PyObject *
858parsenumber(const char *s)
859{
860 char *dup, *end;
861 PyObject *res = NULL;
862
863 assert(s != NULL);
864
865 if (strchr(s, '_') == NULL) {
866 return parsenumber_raw(s);
867 }
868 /* Create a duplicate without underscores. */
869 dup = PyMem_Malloc(strlen(s) + 1);
870 if (dup == NULL) {
871 return PyErr_NoMemory();
872 }
873 end = dup;
874 for (; *s; s++) {
875 if (*s != '_') {
876 *end++ = *s;
877 }
878 }
879 *end = '\0';
880 res = parsenumber_raw(dup);
881 PyMem_Free(dup);
882 return res;
883}
884
885expr_ty
886_PyPegen_number_token(Parser *p)
887{
888 Token *t = _PyPegen_expect_token(p, NUMBER);
889 if (t == NULL) {
890 return NULL;
891 }
892
893 char *num_raw = PyBytes_AsString(t->bytes);
894
895 if (num_raw == NULL) {
896 return NULL;
897 }
898
899 PyObject *c = parsenumber(num_raw);
900
901 if (c == NULL) {
902 return NULL;
903 }
904
905 if (PyArena_AddPyObject(p->arena, c) < 0) {
906 Py_DECREF(c);
907 return NULL;
908 }
909
910 return Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
911 p->arena);
912}
913
Lysandros Nikolaou6d650872020-04-29 04:42:27 +0300914static int // bool
915newline_in_string(Parser *p, const char *cur)
916{
917 for (char c = *cur; cur >= p->tok->buf; c = *--cur) {
918 if (c == '\'' || c == '"') {
919 return 1;
920 }
921 }
922 return 0;
923}
924
925/* Check that the source for a single input statement really is a single
926 statement by looking at what is left in the buffer after parsing.
927 Trailing whitespace and comments are OK. */
928static int // bool
929bad_single_statement(Parser *p)
930{
931 const char *cur = strchr(p->tok->buf, '\n');
932
933 /* Newlines are allowed if preceded by a line continuation character
934 or if they appear inside a string. */
935 if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) {
936 return 0;
937 }
938 char c = *cur;
939
940 for (;;) {
941 while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
942 c = *++cur;
943 }
944
945 if (!c) {
946 return 0;
947 }
948
949 if (c != '#') {
950 return 1;
951 }
952
953 /* Suck up comment. */
954 while (c && c != '\n') {
955 c = *++cur;
956 }
957 }
958}
959
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100960void
961_PyPegen_Parser_Free(Parser *p)
962{
963 Py_XDECREF(p->normalize);
964 for (int i = 0; i < p->size; i++) {
965 PyMem_Free(p->tokens[i]);
966 }
967 PyMem_Free(p->tokens);
968 PyMem_Free(p);
969}
970
Pablo Galindo2b74c832020-04-27 18:02:07 +0100971static int
972compute_parser_flags(PyCompilerFlags *flags)
973{
974 int parser_flags = 0;
975 if (!flags) {
976 return 0;
977 }
978 if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
979 parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
980 }
981 if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
982 parser_flags |= PyPARSE_IGNORE_COOKIE;
983 }
984 if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
985 parser_flags |= PyPARSE_BARRY_AS_BDFL;
986 }
987 if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
988 parser_flags |= PyPARSE_TYPE_COMMENTS;
989 }
990 return parser_flags;
991}
992
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100993Parser *
Pablo Galindo2b74c832020-04-27 18:02:07 +0100994_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
995 int *errcode, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100996{
997 Parser *p = PyMem_Malloc(sizeof(Parser));
998 if (p == NULL) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300999 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001000 }
1001 assert(tok != NULL);
1002 p->tok = tok;
1003 p->keywords = NULL;
1004 p->n_keyword_lists = -1;
1005 p->tokens = PyMem_Malloc(sizeof(Token *));
1006 if (!p->tokens) {
1007 PyMem_Free(p);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001008 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001009 }
1010 p->tokens[0] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001011 if (!p->tokens) {
1012 PyMem_Free(p->tokens);
1013 PyMem_Free(p);
1014 return (Parser *) PyErr_NoMemory();
1015 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001016 memset(p->tokens[0], '\0', sizeof(Token));
1017 p->mark = 0;
1018 p->fill = 0;
1019 p->size = 1;
1020
1021 p->errcode = errcode;
1022 p->arena = arena;
1023 p->start_rule = start_rule;
1024 p->parsing_started = 0;
1025 p->normalize = NULL;
1026 p->error_indicator = 0;
1027
1028 p->starting_lineno = 0;
1029 p->starting_col_offset = 0;
Pablo Galindo2b74c832020-04-27 18:02:07 +01001030 p->flags = flags;
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001031
1032 return p;
1033}
1034
1035void *
1036_PyPegen_run_parser(Parser *p)
1037{
1038 void *res = _PyPegen_parse(p);
1039 if (res == NULL) {
1040 if (PyErr_Occurred()) {
1041 return NULL;
1042 }
1043 if (p->fill == 0) {
1044 RAISE_SYNTAX_ERROR("error at start before reading any input");
1045 }
1046 else if (p->tok->done == E_EOF) {
1047 RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
1048 }
1049 else {
1050 if (p->tokens[p->fill-1]->type == INDENT) {
1051 RAISE_INDENTATION_ERROR("unexpected indent");
1052 }
1053 else if (p->tokens[p->fill-1]->type == DEDENT) {
1054 RAISE_INDENTATION_ERROR("unexpected unindent");
1055 }
1056 else {
1057 RAISE_SYNTAX_ERROR("invalid syntax");
1058 }
1059 }
1060 return NULL;
1061 }
1062
Lysandros Nikolaou6d650872020-04-29 04:42:27 +03001063 if (p->start_rule == Py_single_input && bad_single_statement(p)) {
1064 p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
1065 return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
1066 }
1067
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001068 return res;
1069}
1070
1071mod_ty
1072_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
1073 const char *enc, const char *ps1, const char *ps2,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001074 PyCompilerFlags *flags, int *errcode, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001075{
1076 struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
1077 if (tok == NULL) {
1078 if (PyErr_Occurred()) {
1079 raise_tokenizer_init_error(filename_ob);
1080 return NULL;
1081 }
1082 return NULL;
1083 }
1084 // This transfers the ownership to the tokenizer
1085 tok->filename = filename_ob;
1086 Py_INCREF(filename_ob);
1087
1088 // From here on we need to clean up even if there's an error
1089 mod_ty result = NULL;
1090
Pablo Galindo2b74c832020-04-27 18:02:07 +01001091 int parser_flags = compute_parser_flags(flags);
1092 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, errcode, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001093 if (p == NULL) {
1094 goto error;
1095 }
1096
1097 result = _PyPegen_run_parser(p);
1098 _PyPegen_Parser_Free(p);
1099
1100error:
1101 PyTokenizer_Free(tok);
1102 return result;
1103}
1104
1105mod_ty
1106_PyPegen_run_parser_from_file(const char *filename, int start_rule,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001107 PyObject *filename_ob, PyCompilerFlags *flags, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001108{
1109 FILE *fp = fopen(filename, "rb");
1110 if (fp == NULL) {
1111 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
1112 return NULL;
1113 }
1114
1115 mod_ty result = _PyPegen_run_parser_from_file_pointer(fp, start_rule, filename_ob,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001116 NULL, NULL, NULL, flags, NULL, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001117
1118 fclose(fp);
1119 return result;
1120}
1121
1122mod_ty
1123_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001124 PyCompilerFlags *flags, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001125{
1126 int exec_input = start_rule == Py_file_input;
1127
1128 struct tok_state *tok;
Pablo Galindo2b74c832020-04-27 18:02:07 +01001129 if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001130 tok = PyTokenizer_FromUTF8(str, exec_input);
1131 } else {
1132 tok = PyTokenizer_FromString(str, exec_input);
1133 }
1134 if (tok == NULL) {
1135 if (PyErr_Occurred()) {
1136 raise_tokenizer_init_error(filename_ob);
1137 }
1138 return NULL;
1139 }
1140 // This transfers the ownership to the tokenizer
1141 tok->filename = filename_ob;
1142 Py_INCREF(filename_ob);
1143
1144 // We need to clear up from here on
1145 mod_ty result = NULL;
1146
Pablo Galindo2b74c832020-04-27 18:02:07 +01001147 int parser_flags = compute_parser_flags(flags);
1148 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, NULL, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001149 if (p == NULL) {
1150 goto error;
1151 }
1152
1153 result = _PyPegen_run_parser(p);
1154 _PyPegen_Parser_Free(p);
1155
1156error:
1157 PyTokenizer_Free(tok);
1158 return result;
1159}
1160
1161void *
1162_PyPegen_interactive_exit(Parser *p)
1163{
1164 if (p->errcode) {
1165 *(p->errcode) = E_EOF;
1166 }
1167 return NULL;
1168}
1169
1170/* Creates a single-element asdl_seq* that contains a */
1171asdl_seq *
1172_PyPegen_singleton_seq(Parser *p, void *a)
1173{
1174 assert(a != NULL);
1175 asdl_seq *seq = _Py_asdl_seq_new(1, p->arena);
1176 if (!seq) {
1177 return NULL;
1178 }
1179 asdl_seq_SET(seq, 0, a);
1180 return seq;
1181}
1182
1183/* Creates a copy of seq and prepends a to it */
1184asdl_seq *
1185_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
1186{
1187 assert(a != NULL);
1188 if (!seq) {
1189 return _PyPegen_singleton_seq(p, a);
1190 }
1191
1192 asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1193 if (!new_seq) {
1194 return NULL;
1195 }
1196
1197 asdl_seq_SET(new_seq, 0, a);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001198 for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001199 asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i - 1));
1200 }
1201 return new_seq;
1202}
1203
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001204static Py_ssize_t
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001205_get_flattened_seq_size(asdl_seq *seqs)
1206{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001207 Py_ssize_t size = 0;
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001208 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1209 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
1210 size += asdl_seq_LEN(inner_seq);
1211 }
1212 return size;
1213}
1214
1215/* Flattens an asdl_seq* of asdl_seq*s */
1216asdl_seq *
1217_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
1218{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001219 Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001220 assert(flattened_seq_size > 0);
1221
1222 asdl_seq *flattened_seq = _Py_asdl_seq_new(flattened_seq_size, p->arena);
1223 if (!flattened_seq) {
1224 return NULL;
1225 }
1226
1227 int flattened_seq_idx = 0;
1228 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1229 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001230 for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001231 asdl_seq_SET(flattened_seq, flattened_seq_idx++, asdl_seq_GET(inner_seq, j));
1232 }
1233 }
1234 assert(flattened_seq_idx == flattened_seq_size);
1235
1236 return flattened_seq;
1237}
1238
1239/* Creates a new name of the form <first_name>.<second_name> */
1240expr_ty
1241_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
1242{
1243 assert(first_name != NULL && second_name != NULL);
1244 PyObject *first_identifier = first_name->v.Name.id;
1245 PyObject *second_identifier = second_name->v.Name.id;
1246
1247 if (PyUnicode_READY(first_identifier) == -1) {
1248 return NULL;
1249 }
1250 if (PyUnicode_READY(second_identifier) == -1) {
1251 return NULL;
1252 }
1253 const char *first_str = PyUnicode_AsUTF8(first_identifier);
1254 if (!first_str) {
1255 return NULL;
1256 }
1257 const char *second_str = PyUnicode_AsUTF8(second_identifier);
1258 if (!second_str) {
1259 return NULL;
1260 }
Pablo Galindo9f27dd32020-04-24 01:13:33 +01001261 Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001262
1263 PyObject *str = PyBytes_FromStringAndSize(NULL, len);
1264 if (!str) {
1265 return NULL;
1266 }
1267
1268 char *s = PyBytes_AS_STRING(str);
1269 if (!s) {
1270 return NULL;
1271 }
1272
1273 strcpy(s, first_str);
1274 s += strlen(first_str);
1275 *s++ = '.';
1276 strcpy(s, second_str);
1277 s += strlen(second_str);
1278 *s = '\0';
1279
1280 PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
1281 Py_DECREF(str);
1282 if (!uni) {
1283 return NULL;
1284 }
1285 PyUnicode_InternInPlace(&uni);
1286 if (PyArena_AddPyObject(p->arena, uni) < 0) {
1287 Py_DECREF(uni);
1288 return NULL;
1289 }
1290
1291 return _Py_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
1292}
1293
1294/* Counts the total number of dots in seq's tokens */
1295int
1296_PyPegen_seq_count_dots(asdl_seq *seq)
1297{
1298 int number_of_dots = 0;
1299 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1300 Token *current_expr = asdl_seq_GET(seq, i);
1301 switch (current_expr->type) {
1302 case ELLIPSIS:
1303 number_of_dots += 3;
1304 break;
1305 case DOT:
1306 number_of_dots += 1;
1307 break;
1308 default:
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001309 Py_UNREACHABLE();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001310 }
1311 }
1312
1313 return number_of_dots;
1314}
1315
1316/* Creates an alias with '*' as the identifier name */
1317alias_ty
1318_PyPegen_alias_for_star(Parser *p)
1319{
1320 PyObject *str = PyUnicode_InternFromString("*");
1321 if (!str) {
1322 return NULL;
1323 }
1324 if (PyArena_AddPyObject(p->arena, str) < 0) {
1325 Py_DECREF(str);
1326 return NULL;
1327 }
1328 return alias(str, NULL, p->arena);
1329}
1330
1331/* Creates a new asdl_seq* with the identifiers of all the names in seq */
1332asdl_seq *
1333_PyPegen_map_names_to_ids(Parser *p, asdl_seq *seq)
1334{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001335 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001336 assert(len > 0);
1337
1338 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1339 if (!new_seq) {
1340 return NULL;
1341 }
1342 for (Py_ssize_t i = 0; i < len; i++) {
1343 expr_ty e = asdl_seq_GET(seq, i);
1344 asdl_seq_SET(new_seq, i, e->v.Name.id);
1345 }
1346 return new_seq;
1347}
1348
1349/* Constructs a CmpopExprPair */
1350CmpopExprPair *
1351_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
1352{
1353 assert(expr != NULL);
1354 CmpopExprPair *a = PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
1355 if (!a) {
1356 return NULL;
1357 }
1358 a->cmpop = cmpop;
1359 a->expr = expr;
1360 return a;
1361}
1362
1363asdl_int_seq *
1364_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
1365{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001366 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001367 assert(len > 0);
1368
1369 asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
1370 if (!new_seq) {
1371 return NULL;
1372 }
1373 for (Py_ssize_t i = 0; i < len; i++) {
1374 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1375 asdl_seq_SET(new_seq, i, pair->cmpop);
1376 }
1377 return new_seq;
1378}
1379
1380asdl_seq *
1381_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
1382{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001383 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001384 assert(len > 0);
1385
1386 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1387 if (!new_seq) {
1388 return NULL;
1389 }
1390 for (Py_ssize_t i = 0; i < len; i++) {
1391 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1392 asdl_seq_SET(new_seq, i, pair->expr);
1393 }
1394 return new_seq;
1395}
1396
1397/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
1398static asdl_seq *
1399_set_seq_context(Parser *p, asdl_seq *seq, expr_context_ty ctx)
1400{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001401 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001402 if (len == 0) {
1403 return NULL;
1404 }
1405
1406 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1407 if (!new_seq) {
1408 return NULL;
1409 }
1410 for (Py_ssize_t i = 0; i < len; i++) {
1411 expr_ty e = asdl_seq_GET(seq, i);
1412 asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
1413 }
1414 return new_seq;
1415}
1416
1417static expr_ty
1418_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
1419{
1420 return _Py_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
1421}
1422
1423static expr_ty
1424_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
1425{
1426 return _Py_Tuple(_set_seq_context(p, e->v.Tuple.elts, ctx), ctx, EXTRA_EXPR(e, e));
1427}
1428
1429static expr_ty
1430_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
1431{
1432 return _Py_List(_set_seq_context(p, e->v.List.elts, ctx), ctx, EXTRA_EXPR(e, e));
1433}
1434
1435static expr_ty
1436_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
1437{
1438 return _Py_Subscript(e->v.Subscript.value, e->v.Subscript.slice, ctx, EXTRA_EXPR(e, e));
1439}
1440
1441static expr_ty
1442_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
1443{
1444 return _Py_Attribute(e->v.Attribute.value, e->v.Attribute.attr, ctx, EXTRA_EXPR(e, e));
1445}
1446
1447static expr_ty
1448_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
1449{
1450 return _Py_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), ctx, EXTRA_EXPR(e, e));
1451}
1452
1453/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
1454expr_ty
1455_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
1456{
1457 assert(expr != NULL);
1458
1459 expr_ty new = NULL;
1460 switch (expr->kind) {
1461 case Name_kind:
1462 new = _set_name_context(p, expr, ctx);
1463 break;
1464 case Tuple_kind:
1465 new = _set_tuple_context(p, expr, ctx);
1466 break;
1467 case List_kind:
1468 new = _set_list_context(p, expr, ctx);
1469 break;
1470 case Subscript_kind:
1471 new = _set_subscript_context(p, expr, ctx);
1472 break;
1473 case Attribute_kind:
1474 new = _set_attribute_context(p, expr, ctx);
1475 break;
1476 case Starred_kind:
1477 new = _set_starred_context(p, expr, ctx);
1478 break;
1479 default:
1480 new = expr;
1481 }
1482 return new;
1483}
1484
1485/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
1486KeyValuePair *
1487_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
1488{
1489 KeyValuePair *a = PyArena_Malloc(p->arena, sizeof(KeyValuePair));
1490 if (!a) {
1491 return NULL;
1492 }
1493 a->key = key;
1494 a->value = value;
1495 return a;
1496}
1497
1498/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
1499asdl_seq *
1500_PyPegen_get_keys(Parser *p, asdl_seq *seq)
1501{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001502 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001503 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1504 if (!new_seq) {
1505 return NULL;
1506 }
1507 for (Py_ssize_t i = 0; i < len; i++) {
1508 KeyValuePair *pair = asdl_seq_GET(seq, i);
1509 asdl_seq_SET(new_seq, i, pair->key);
1510 }
1511 return new_seq;
1512}
1513
1514/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
1515asdl_seq *
1516_PyPegen_get_values(Parser *p, asdl_seq *seq)
1517{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001518 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001519 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1520 if (!new_seq) {
1521 return NULL;
1522 }
1523 for (Py_ssize_t i = 0; i < len; i++) {
1524 KeyValuePair *pair = asdl_seq_GET(seq, i);
1525 asdl_seq_SET(new_seq, i, pair->value);
1526 }
1527 return new_seq;
1528}
1529
1530/* Constructs a NameDefaultPair */
1531NameDefaultPair *
1532_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value)
1533{
1534 NameDefaultPair *a = PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
1535 if (!a) {
1536 return NULL;
1537 }
1538 a->arg = arg;
1539 a->value = value;
1540 return a;
1541}
1542
1543/* Constructs a SlashWithDefault */
1544SlashWithDefault *
1545_PyPegen_slash_with_default(Parser *p, asdl_seq *plain_names, asdl_seq *names_with_defaults)
1546{
1547 SlashWithDefault *a = PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
1548 if (!a) {
1549 return NULL;
1550 }
1551 a->plain_names = plain_names;
1552 a->names_with_defaults = names_with_defaults;
1553 return a;
1554}
1555
1556/* Constructs a StarEtc */
1557StarEtc *
1558_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
1559{
1560 StarEtc *a = PyArena_Malloc(p->arena, sizeof(StarEtc));
1561 if (!a) {
1562 return NULL;
1563 }
1564 a->vararg = vararg;
1565 a->kwonlyargs = kwonlyargs;
1566 a->kwarg = kwarg;
1567 return a;
1568}
1569
1570asdl_seq *
1571_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
1572{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001573 Py_ssize_t first_len = asdl_seq_LEN(a);
1574 Py_ssize_t second_len = asdl_seq_LEN(b);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001575 asdl_seq *new_seq = _Py_asdl_seq_new(first_len + second_len, p->arena);
1576 if (!new_seq) {
1577 return NULL;
1578 }
1579
1580 int k = 0;
1581 for (Py_ssize_t i = 0; i < first_len; i++) {
1582 asdl_seq_SET(new_seq, k++, asdl_seq_GET(a, i));
1583 }
1584 for (Py_ssize_t i = 0; i < second_len; i++) {
1585 asdl_seq_SET(new_seq, k++, asdl_seq_GET(b, i));
1586 }
1587
1588 return new_seq;
1589}
1590
1591static asdl_seq *
1592_get_names(Parser *p, asdl_seq *names_with_defaults)
1593{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001594 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001595 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1596 if (!seq) {
1597 return NULL;
1598 }
1599 for (Py_ssize_t i = 0; i < len; i++) {
1600 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1601 asdl_seq_SET(seq, i, pair->arg);
1602 }
1603 return seq;
1604}
1605
1606static asdl_seq *
1607_get_defaults(Parser *p, asdl_seq *names_with_defaults)
1608{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001609 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001610 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1611 if (!seq) {
1612 return NULL;
1613 }
1614 for (Py_ssize_t i = 0; i < len; i++) {
1615 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1616 asdl_seq_SET(seq, i, pair->value);
1617 }
1618 return seq;
1619}
1620
1621/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
1622arguments_ty
1623_PyPegen_make_arguments(Parser *p, asdl_seq *slash_without_default,
1624 SlashWithDefault *slash_with_default, asdl_seq *plain_names,
1625 asdl_seq *names_with_default, StarEtc *star_etc)
1626{
1627 asdl_seq *posonlyargs;
1628 if (slash_without_default != NULL) {
1629 posonlyargs = slash_without_default;
1630 }
1631 else if (slash_with_default != NULL) {
1632 asdl_seq *slash_with_default_names =
1633 _get_names(p, slash_with_default->names_with_defaults);
1634 if (!slash_with_default_names) {
1635 return NULL;
1636 }
1637 posonlyargs = _PyPegen_join_sequences(p, slash_with_default->plain_names, slash_with_default_names);
1638 if (!posonlyargs) {
1639 return NULL;
1640 }
1641 }
1642 else {
1643 posonlyargs = _Py_asdl_seq_new(0, p->arena);
1644 if (!posonlyargs) {
1645 return NULL;
1646 }
1647 }
1648
1649 asdl_seq *posargs;
1650 if (plain_names != NULL && names_with_default != NULL) {
1651 asdl_seq *names_with_default_names = _get_names(p, names_with_default);
1652 if (!names_with_default_names) {
1653 return NULL;
1654 }
1655 posargs = _PyPegen_join_sequences(p, plain_names, names_with_default_names);
1656 if (!posargs) {
1657 return NULL;
1658 }
1659 }
1660 else if (plain_names == NULL && names_with_default != NULL) {
1661 posargs = _get_names(p, names_with_default);
1662 if (!posargs) {
1663 return NULL;
1664 }
1665 }
1666 else if (plain_names != NULL && names_with_default == NULL) {
1667 posargs = plain_names;
1668 }
1669 else {
1670 posargs = _Py_asdl_seq_new(0, p->arena);
1671 if (!posargs) {
1672 return NULL;
1673 }
1674 }
1675
1676 asdl_seq *posdefaults;
1677 if (slash_with_default != NULL && names_with_default != NULL) {
1678 asdl_seq *slash_with_default_values =
1679 _get_defaults(p, slash_with_default->names_with_defaults);
1680 if (!slash_with_default_values) {
1681 return NULL;
1682 }
1683 asdl_seq *names_with_default_values = _get_defaults(p, names_with_default);
1684 if (!names_with_default_values) {
1685 return NULL;
1686 }
1687 posdefaults = _PyPegen_join_sequences(p, slash_with_default_values, names_with_default_values);
1688 if (!posdefaults) {
1689 return NULL;
1690 }
1691 }
1692 else if (slash_with_default == NULL && names_with_default != NULL) {
1693 posdefaults = _get_defaults(p, names_with_default);
1694 if (!posdefaults) {
1695 return NULL;
1696 }
1697 }
1698 else if (slash_with_default != NULL && names_with_default == NULL) {
1699 posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
1700 if (!posdefaults) {
1701 return NULL;
1702 }
1703 }
1704 else {
1705 posdefaults = _Py_asdl_seq_new(0, p->arena);
1706 if (!posdefaults) {
1707 return NULL;
1708 }
1709 }
1710
1711 arg_ty vararg = NULL;
1712 if (star_etc != NULL && star_etc->vararg != NULL) {
1713 vararg = star_etc->vararg;
1714 }
1715
1716 asdl_seq *kwonlyargs;
1717 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1718 kwonlyargs = _get_names(p, star_etc->kwonlyargs);
1719 if (!kwonlyargs) {
1720 return NULL;
1721 }
1722 }
1723 else {
1724 kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1725 if (!kwonlyargs) {
1726 return NULL;
1727 }
1728 }
1729
1730 asdl_seq *kwdefaults;
1731 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1732 kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
1733 if (!kwdefaults) {
1734 return NULL;
1735 }
1736 }
1737 else {
1738 kwdefaults = _Py_asdl_seq_new(0, p->arena);
1739 if (!kwdefaults) {
1740 return NULL;
1741 }
1742 }
1743
1744 arg_ty kwarg = NULL;
1745 if (star_etc != NULL && star_etc->kwarg != NULL) {
1746 kwarg = star_etc->kwarg;
1747 }
1748
1749 return _Py_arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg,
1750 posdefaults, p->arena);
1751}
1752
1753/* Constructs an empty arguments_ty object, that gets used when a function accepts no
1754 * arguments. */
1755arguments_ty
1756_PyPegen_empty_arguments(Parser *p)
1757{
1758 asdl_seq *posonlyargs = _Py_asdl_seq_new(0, p->arena);
1759 if (!posonlyargs) {
1760 return NULL;
1761 }
1762 asdl_seq *posargs = _Py_asdl_seq_new(0, p->arena);
1763 if (!posargs) {
1764 return NULL;
1765 }
1766 asdl_seq *posdefaults = _Py_asdl_seq_new(0, p->arena);
1767 if (!posdefaults) {
1768 return NULL;
1769 }
1770 asdl_seq *kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1771 if (!kwonlyargs) {
1772 return NULL;
1773 }
1774 asdl_seq *kwdefaults = _Py_asdl_seq_new(0, p->arena);
1775 if (!kwdefaults) {
1776 return NULL;
1777 }
1778
1779 return _Py_arguments(posonlyargs, posargs, NULL, kwonlyargs, kwdefaults, NULL, kwdefaults,
1780 p->arena);
1781}
1782
1783/* Encapsulates the value of an operator_ty into an AugOperator struct */
1784AugOperator *
1785_PyPegen_augoperator(Parser *p, operator_ty kind)
1786{
1787 AugOperator *a = PyArena_Malloc(p->arena, sizeof(AugOperator));
1788 if (!a) {
1789 return NULL;
1790 }
1791 a->kind = kind;
1792 return a;
1793}
1794
1795/* Construct a FunctionDef equivalent to function_def, but with decorators */
1796stmt_ty
1797_PyPegen_function_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty function_def)
1798{
1799 assert(function_def != NULL);
1800 if (function_def->kind == AsyncFunctionDef_kind) {
1801 return _Py_AsyncFunctionDef(
1802 function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1803 function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
1804 function_def->v.FunctionDef.type_comment, function_def->lineno,
1805 function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
1806 p->arena);
1807 }
1808
1809 return _Py_FunctionDef(function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1810 function_def->v.FunctionDef.body, decorators,
1811 function_def->v.FunctionDef.returns,
1812 function_def->v.FunctionDef.type_comment, function_def->lineno,
1813 function_def->col_offset, function_def->end_lineno,
1814 function_def->end_col_offset, p->arena);
1815}
1816
1817/* Construct a ClassDef equivalent to class_def, but with decorators */
1818stmt_ty
1819_PyPegen_class_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty class_def)
1820{
1821 assert(class_def != NULL);
1822 return _Py_ClassDef(class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
1823 class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
1824 class_def->lineno, class_def->col_offset, class_def->end_lineno,
1825 class_def->end_col_offset, p->arena);
1826}
1827
1828/* Construct a KeywordOrStarred */
1829KeywordOrStarred *
1830_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
1831{
1832 KeywordOrStarred *a = PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
1833 if (!a) {
1834 return NULL;
1835 }
1836 a->element = element;
1837 a->is_keyword = is_keyword;
1838 return a;
1839}
1840
1841/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
1842static int
1843_seq_number_of_starred_exprs(asdl_seq *seq)
1844{
1845 int n = 0;
1846 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1847 KeywordOrStarred *k = asdl_seq_GET(seq, i);
1848 if (!k->is_keyword) {
1849 n++;
1850 }
1851 }
1852 return n;
1853}
1854
1855/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
1856asdl_seq *
1857_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
1858{
1859 int new_len = _seq_number_of_starred_exprs(kwargs);
1860 if (new_len == 0) {
1861 return NULL;
1862 }
1863 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1864 if (!new_seq) {
1865 return NULL;
1866 }
1867
1868 int idx = 0;
1869 for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
1870 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1871 if (!k->is_keyword) {
1872 asdl_seq_SET(new_seq, idx++, k->element);
1873 }
1874 }
1875 return new_seq;
1876}
1877
1878/* Return a new asdl_seq* with only the keywords in kwargs */
1879asdl_seq *
1880_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
1881{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001882 Py_ssize_t len = asdl_seq_LEN(kwargs);
1883 Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001884 if (new_len == 0) {
1885 return NULL;
1886 }
1887 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1888 if (!new_seq) {
1889 return NULL;
1890 }
1891
1892 int idx = 0;
1893 for (Py_ssize_t i = 0; i < len; i++) {
1894 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1895 if (k->is_keyword) {
1896 asdl_seq_SET(new_seq, idx++, k->element);
1897 }
1898 }
1899 return new_seq;
1900}
1901
1902expr_ty
1903_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
1904{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001905 Py_ssize_t len = asdl_seq_LEN(strings);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001906 assert(len > 0);
1907
1908 Token *first = asdl_seq_GET(strings, 0);
1909 Token *last = asdl_seq_GET(strings, len - 1);
1910
1911 int bytesmode = 0;
1912 PyObject *bytes_str = NULL;
1913
1914 FstringParser state;
1915 _PyPegen_FstringParser_Init(&state);
1916
1917 for (Py_ssize_t i = 0; i < len; i++) {
1918 Token *t = asdl_seq_GET(strings, i);
1919
1920 int this_bytesmode;
1921 int this_rawmode;
1922 PyObject *s;
1923 const char *fstr;
1924 Py_ssize_t fstrlen = -1;
1925
1926 char *this_str = PyBytes_AsString(t->bytes);
1927 if (!this_str) {
1928 goto error;
1929 }
1930
1931 if (_PyPegen_parsestr(p, this_str, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen) != 0) {
1932 goto error;
1933 }
1934
1935 /* Check that we are not mixing bytes with unicode. */
1936 if (i != 0 && bytesmode != this_bytesmode) {
1937 RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1938 Py_XDECREF(s);
1939 goto error;
1940 }
1941 bytesmode = this_bytesmode;
1942
1943 if (fstr != NULL) {
1944 assert(s == NULL && !bytesmode);
1945
1946 int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
1947 this_rawmode, 0, first, t, last);
1948 if (result < 0) {
1949 goto error;
1950 }
1951 }
1952 else {
1953 /* String or byte string. */
1954 assert(s != NULL && fstr == NULL);
1955 assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
1956
1957 if (bytesmode) {
1958 if (i == 0) {
1959 bytes_str = s;
1960 }
1961 else {
1962 PyBytes_ConcatAndDel(&bytes_str, s);
1963 if (!bytes_str) {
1964 goto error;
1965 }
1966 }
1967 }
1968 else {
1969 /* This is a regular string. Concatenate it. */
1970 if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
1971 goto error;
1972 }
1973 }
1974 }
1975 }
1976
1977 if (bytesmode) {
1978 if (PyArena_AddPyObject(p->arena, bytes_str) < 0) {
1979 goto error;
1980 }
1981 return Constant(bytes_str, NULL, first->lineno, first->col_offset, last->end_lineno,
1982 last->end_col_offset, p->arena);
1983 }
1984
1985 return _PyPegen_FstringParser_Finish(p, &state, first, last);
1986
1987error:
1988 Py_XDECREF(bytes_str);
1989 _PyPegen_FstringParser_Dealloc(&state);
1990 if (PyErr_Occurred()) {
1991 raise_decode_error(p);
1992 }
1993 return NULL;
1994}