blob: d75267b2e2778a17adc005c0ab9fd6114f379340 [file] [log] [blame]
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001#include <Python.h>
2#include <errcode.h>
3#include "../tokenizer.h"
4
5#include "pegen.h"
6#include "parse_string.h"
7
8static int
9init_normalization(Parser *p)
10{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030011 if (p->normalize) {
12 return 1;
13 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +010014 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
15 if (!m)
16 {
17 return 0;
18 }
19 p->normalize = PyObject_GetAttrString(m, "normalize");
20 Py_DECREF(m);
21 if (!p->normalize)
22 {
23 return 0;
24 }
25 return 1;
26}
27
Pablo Galindo2b74c832020-04-27 18:02:07 +010028/* Checks if the NOTEQUAL token is valid given the current parser flags
290 indicates success and nonzero indicates failure (an exception may be set) */
30int
31_PyPegen_check_barry_as_flufl(Parser *p) {
32 Token *t = p->tokens[p->fill - 1];
33 assert(t->bytes != NULL);
34 assert(t->type == NOTEQUAL);
35
36 char* tok_str = PyBytes_AS_STRING(t->bytes);
37 if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>")){
38 RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
39 return -1;
40 } else if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
41 return strcmp(tok_str, "!=");
42 }
43 return 0;
44}
45
Pablo Galindoc5fc1562020-04-22 23:29:27 +010046PyObject *
47_PyPegen_new_identifier(Parser *p, char *n)
48{
49 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
50 if (!id) {
51 goto error;
52 }
53 /* PyUnicode_DecodeUTF8 should always return a ready string. */
54 assert(PyUnicode_IS_READY(id));
55 /* Check whether there are non-ASCII characters in the
56 identifier; if so, normalize to NFKC. */
57 if (!PyUnicode_IS_ASCII(id))
58 {
59 PyObject *id2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030060 if (!init_normalization(p))
Pablo Galindoc5fc1562020-04-22 23:29:27 +010061 {
62 Py_DECREF(id);
63 goto error;
64 }
65 PyObject *form = PyUnicode_InternFromString("NFKC");
66 if (form == NULL)
67 {
68 Py_DECREF(id);
69 goto error;
70 }
71 PyObject *args[2] = {form, id};
72 id2 = _PyObject_FastCall(p->normalize, args, 2);
73 Py_DECREF(id);
74 Py_DECREF(form);
75 if (!id2) {
76 goto error;
77 }
78 if (!PyUnicode_Check(id2))
79 {
80 PyErr_Format(PyExc_TypeError,
81 "unicodedata.normalize() must return a string, not "
82 "%.200s",
83 _PyType_Name(Py_TYPE(id2)));
84 Py_DECREF(id2);
85 goto error;
86 }
87 id = id2;
88 }
89 PyUnicode_InternInPlace(&id);
90 if (PyArena_AddPyObject(p->arena, id) < 0)
91 {
92 Py_DECREF(id);
93 goto error;
94 }
95 return id;
96
97error:
98 p->error_indicator = 1;
99 return NULL;
100}
101
102static PyObject *
103_create_dummy_identifier(Parser *p)
104{
105 return _PyPegen_new_identifier(p, "");
106}
107
108static inline Py_ssize_t
109byte_offset_to_character_offset(PyObject *line, int col_offset)
110{
111 const char *str = PyUnicode_AsUTF8(line);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300112 if (!str) {
113 return 0;
114 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100115 PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
116 if (!text) {
117 return 0;
118 }
119 Py_ssize_t size = PyUnicode_GET_LENGTH(text);
120 Py_DECREF(text);
121 return size;
122}
123
124const char *
125_PyPegen_get_expr_name(expr_ty e)
126{
127 switch (e->kind) {
128 case Attribute_kind:
129 return "attribute";
130 case Subscript_kind:
131 return "subscript";
132 case Starred_kind:
133 return "starred";
134 case Name_kind:
135 return "name";
136 case List_kind:
137 return "list";
138 case Tuple_kind:
139 return "tuple";
140 case Lambda_kind:
141 return "lambda";
142 case Call_kind:
143 return "function call";
144 case BoolOp_kind:
145 case BinOp_kind:
146 case UnaryOp_kind:
147 return "operator";
148 case GeneratorExp_kind:
149 return "generator expression";
150 case Yield_kind:
151 case YieldFrom_kind:
152 return "yield expression";
153 case Await_kind:
154 return "await expression";
155 case ListComp_kind:
156 return "list comprehension";
157 case SetComp_kind:
158 return "set comprehension";
159 case DictComp_kind:
160 return "dict comprehension";
161 case Dict_kind:
162 return "dict display";
163 case Set_kind:
164 return "set display";
165 case JoinedStr_kind:
166 case FormattedValue_kind:
167 return "f-string expression";
168 case Constant_kind: {
169 PyObject *value = e->v.Constant.value;
170 if (value == Py_None) {
171 return "None";
172 }
173 if (value == Py_False) {
174 return "False";
175 }
176 if (value == Py_True) {
177 return "True";
178 }
179 if (value == Py_Ellipsis) {
180 return "Ellipsis";
181 }
182 return "literal";
183 }
184 case Compare_kind:
185 return "comparison";
186 case IfExp_kind:
187 return "conditional expression";
188 case NamedExpr_kind:
189 return "named expression";
190 default:
191 PyErr_Format(PyExc_SystemError,
192 "unexpected expression in assignment %d (line %d)",
193 e->kind, e->lineno);
194 return NULL;
195 }
196}
197
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300198static int
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100199raise_decode_error(Parser *p)
200{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300201 assert(PyErr_Occurred());
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100202 const char *errtype = NULL;
203 if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
204 errtype = "unicode error";
205 }
206 else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
207 errtype = "value error";
208 }
209 if (errtype) {
210 PyObject *type, *value, *tback, *errstr;
211 PyErr_Fetch(&type, &value, &tback);
212 errstr = PyObject_Str(value);
213 if (errstr) {
214 RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
215 Py_DECREF(errstr);
216 }
217 else {
218 PyErr_Clear();
219 RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
220 }
221 Py_XDECREF(type);
222 Py_XDECREF(value);
223 Py_XDECREF(tback);
224 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300225
226 return -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100227}
228
229static void
230raise_tokenizer_init_error(PyObject *filename)
231{
232 if (!(PyErr_ExceptionMatches(PyExc_LookupError)
233 || PyErr_ExceptionMatches(PyExc_ValueError)
234 || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
235 return;
236 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300237 PyObject *errstr = NULL;
238 PyObject *tuple = NULL;
239 PyObject *type, *value, *tback;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100240 PyErr_Fetch(&type, &value, &tback);
241 errstr = PyObject_Str(value);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300242 if (!errstr) {
243 goto error;
244 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100245
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300246 PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100247 if (!tmp) {
248 goto error;
249 }
250
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300251 tuple = PyTuple_Pack(2, errstr, tmp);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100252 Py_DECREF(tmp);
253 if (!value) {
254 goto error;
255 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300256 PyErr_SetObject(PyExc_SyntaxError, tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100257
258error:
259 Py_XDECREF(type);
260 Py_XDECREF(value);
261 Py_XDECREF(tback);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300262 Py_XDECREF(errstr);
263 Py_XDECREF(tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100264}
265
266static inline PyObject *
267get_error_line(char *buffer)
268{
269 char *newline = strchr(buffer, '\n');
270 if (newline) {
271 return PyUnicode_FromStringAndSize(buffer, newline - buffer);
272 }
273 else {
274 return PyUnicode_FromString(buffer);
275 }
276}
277
278static int
279tokenizer_error_with_col_offset(Parser *p, PyObject *errtype, const char *errmsg)
280{
281 PyObject *errstr = NULL;
282 PyObject *value = NULL;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100283 size_t col_number = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100284
285 errstr = PyUnicode_FromString(errmsg);
286 if (!errstr) {
287 return -1;
288 }
289
290 PyObject *loc = NULL;
291 if (p->start_rule == Py_file_input) {
292 loc = PyErr_ProgramTextObject(p->tok->filename, p->tok->lineno);
293 }
294 if (!loc) {
295 loc = get_error_line(p->tok->buf);
296 }
297
298 if (loc) {
299 col_number = p->tok->cur - p->tok->buf;
300 }
301 else {
302 Py_INCREF(Py_None);
303 loc = Py_None;
304 }
305
306 PyObject *tmp = Py_BuildValue("(OiiN)", p->tok->filename, p->tok->lineno,
307 col_number, loc);
308 if (!tmp) {
309 goto error;
310 }
311
312 value = PyTuple_Pack(2, errstr, tmp);
313 Py_DECREF(tmp);
314 if (!value) {
315 goto error;
316 }
317 PyErr_SetObject(errtype, value);
318
319 Py_XDECREF(value);
320 Py_XDECREF(errstr);
321 return -1;
322
323error:
324 Py_XDECREF(errstr);
325 Py_XDECREF(loc);
326 return -1;
327}
328
329static int
330tokenizer_error(Parser *p)
331{
332 if (PyErr_Occurred()) {
333 return -1;
334 }
335
336 const char *msg = NULL;
337 PyObject* errtype = PyExc_SyntaxError;
338 switch (p->tok->done) {
339 case E_TOKEN:
340 msg = "invalid token";
341 break;
342 case E_IDENTIFIER:
343 msg = "invalid character in identifier";
344 break;
345 case E_BADPREFIX:
346 return tokenizer_error_with_col_offset(p,
347 PyExc_SyntaxError, "invalid string prefix");
348 case E_EOFS:
349 return tokenizer_error_with_col_offset(p,
350 PyExc_SyntaxError, "EOF while scanning triple-quoted string literal");
351 case E_EOLS:
352 return tokenizer_error_with_col_offset(p,
353 PyExc_SyntaxError, "EOL while scanning string literal");
354 case E_DEDENT:
355 return tokenizer_error_with_col_offset(p,
356 PyExc_IndentationError, "unindent does not match any outer indentation level");
357 case E_INTR:
358 if (!PyErr_Occurred()) {
359 PyErr_SetNone(PyExc_KeyboardInterrupt);
360 }
361 return -1;
362 case E_NOMEM:
363 PyErr_NoMemory();
364 return -1;
365 case E_TABSPACE:
366 errtype = PyExc_TabError;
367 msg = "inconsistent use of tabs and spaces in indentation";
368 break;
369 case E_TOODEEP:
370 errtype = PyExc_IndentationError;
371 msg = "too many levels of indentation";
372 break;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100373 case E_LINECONT:
374 msg = "unexpected character after line continuation character";
375 break;
376 default:
377 msg = "unknown parsing error";
378 }
379
380 PyErr_Format(errtype, msg);
381 // There is no reliable column information for this error
382 PyErr_SyntaxLocationObject(p->tok->filename, p->tok->lineno, 0);
383
384 return -1;
385}
386
387void *
388_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
389{
390 PyObject *value = NULL;
391 PyObject *errstr = NULL;
392 PyObject *loc = NULL;
393 PyObject *tmp = NULL;
394 Token *t = p->tokens[p->fill - 1];
395 Py_ssize_t col_number = 0;
396 va_list va;
397
398 va_start(va, errmsg);
399 errstr = PyUnicode_FromFormatV(errmsg, va);
400 va_end(va);
401 if (!errstr) {
402 goto error;
403 }
404
405 if (p->start_rule == Py_file_input) {
406 loc = PyErr_ProgramTextObject(p->tok->filename, t->lineno);
407 }
408
409 if (!loc) {
410 loc = get_error_line(p->tok->buf);
411 }
412
413 if (loc) {
414 int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
415 col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
416 }
417 else {
418 Py_INCREF(Py_None);
419 loc = Py_None;
420 }
421
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100422 tmp = Py_BuildValue("(OiiN)", p->tok->filename, t->lineno, col_number, loc);
423 if (!tmp) {
424 goto error;
425 }
426 value = PyTuple_Pack(2, errstr, tmp);
427 Py_DECREF(tmp);
428 if (!value) {
429 goto error;
430 }
431 PyErr_SetObject(errtype, value);
432
433 Py_DECREF(errstr);
434 Py_DECREF(value);
435 return NULL;
436
437error:
438 Py_XDECREF(errstr);
439 Py_XDECREF(loc);
440 return NULL;
441}
442
443void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
444 int kwarg_unpacking = 0;
445 for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
446 keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
447 if (!keyword->arg) {
448 kwarg_unpacking = 1;
449 }
450 }
451
452 const char *msg = NULL;
453 if (kwarg_unpacking) {
454 msg = "positional argument follows keyword argument unpacking";
455 } else {
456 msg = "positional argument follows keyword argument";
457 }
458
459 return RAISE_SYNTAX_ERROR(msg);
460}
461
462#if 0
463static const char *
464token_name(int type)
465{
466 if (0 <= type && type <= N_TOKENS) {
467 return _PyParser_TokenNames[type];
468 }
469 return "<Huh?>";
470}
471#endif
472
473// Here, mark is the start of the node, while p->mark is the end.
474// If node==NULL, they should be the same.
475int
476_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
477{
478 // Insert in front
479 Memo *m = PyArena_Malloc(p->arena, sizeof(Memo));
480 if (m == NULL) {
481 return -1;
482 }
483 m->type = type;
484 m->node = node;
485 m->mark = p->mark;
486 m->next = p->tokens[mark]->memo;
487 p->tokens[mark]->memo = m;
488 return 0;
489}
490
491// Like _PyPegen_insert_memo(), but updates an existing node if found.
492int
493_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
494{
495 for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
496 if (m->type == type) {
497 // Update existing node.
498 m->node = node;
499 m->mark = p->mark;
500 return 0;
501 }
502 }
503 // Insert new node.
504 return _PyPegen_insert_memo(p, mark, type, node);
505}
506
507// Return dummy NAME.
508void *
509_PyPegen_dummy_name(Parser *p, ...)
510{
511 static void *cache = NULL;
512
513 if (cache != NULL) {
514 return cache;
515 }
516
517 PyObject *id = _create_dummy_identifier(p);
518 if (!id) {
519 return NULL;
520 }
521 cache = Name(id, Load, 1, 0, 1, 0, p->arena);
522 return cache;
523}
524
525static int
526_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
527{
528 if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL) {
529 return NAME;
530 }
531 for (KeywordToken *k = p->keywords[name_len]; k->type != -1; k++) {
532 if (strncmp(k->str, name, name_len) == 0) {
533 return k->type;
534 }
535 }
536 return NAME;
537}
538
539int
540_PyPegen_fill_token(Parser *p)
541{
542 const char *start, *end;
543 int type = PyTokenizer_Get(p->tok, &start, &end);
544 if (type == ERRORTOKEN) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300545 if (p->tok->done == E_DECODE) {
546 return raise_decode_error(p);
547 }
548 else {
549 return tokenizer_error(p);
550 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100551 }
552 if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
553 type = NEWLINE; /* Add an extra newline */
554 p->parsing_started = 0;
555
Pablo Galindob94dbd72020-04-27 18:35:58 +0100556 if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100557 p->tok->pendin = -p->tok->indent;
558 p->tok->indent = 0;
559 }
560 }
561 else {
562 p->parsing_started = 1;
563 }
564
565 if (p->fill == p->size) {
566 int newsize = p->size * 2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300567 Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
568 if (new_tokens == NULL) {
569 PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100570 return -1;
571 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300572 else {
573 p->tokens = new_tokens;
574 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100575 for (int i = p->size; i < newsize; i++) {
576 p->tokens[i] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300577 if (p->tokens[i] == NULL) {
578 p->size = i; // Needed, in order to cleanup correctly after parser fails
579 PyErr_NoMemory();
580 return -1;
581 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100582 memset(p->tokens[i], '\0', sizeof(Token));
583 }
584 p->size = newsize;
585 }
586
587 Token *t = p->tokens[p->fill];
588 t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
589 t->bytes = PyBytes_FromStringAndSize(start, end - start);
590 if (t->bytes == NULL) {
591 return -1;
592 }
593 PyArena_AddPyObject(p->arena, t->bytes);
594
595 int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
596 const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100597 size_t end_lineno = p->tok->lineno;
598 size_t col_offset = -1, end_col_offset = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100599 if (start != NULL && start >= line_start) {
600 col_offset = start - line_start;
601 }
602 if (end != NULL && end >= p->tok->line_start) {
603 end_col_offset = end - p->tok->line_start;
604 }
605
606 t->lineno = p->starting_lineno + lineno;
607 t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
608 t->end_lineno = p->starting_lineno + end_lineno;
609 t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
610
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100611 p->fill += 1;
612 return 0;
613}
614
615// Instrumentation to count the effectiveness of memoization.
616// The array counts the number of tokens skipped by memoization,
617// indexed by type.
618
619#define NSTATISTICS 2000
620static long memo_statistics[NSTATISTICS];
621
622void
623_PyPegen_clear_memo_statistics()
624{
625 for (int i = 0; i < NSTATISTICS; i++) {
626 memo_statistics[i] = 0;
627 }
628}
629
630PyObject *
631_PyPegen_get_memo_statistics()
632{
633 PyObject *ret = PyList_New(NSTATISTICS);
634 if (ret == NULL) {
635 return NULL;
636 }
637 for (int i = 0; i < NSTATISTICS; i++) {
638 PyObject *value = PyLong_FromLong(memo_statistics[i]);
639 if (value == NULL) {
640 Py_DECREF(ret);
641 return NULL;
642 }
643 // PyList_SetItem borrows a reference to value.
644 if (PyList_SetItem(ret, i, value) < 0) {
645 Py_DECREF(ret);
646 return NULL;
647 }
648 }
649 return ret;
650}
651
652int // bool
653_PyPegen_is_memoized(Parser *p, int type, void *pres)
654{
655 if (p->mark == p->fill) {
656 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300657 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100658 return -1;
659 }
660 }
661
662 Token *t = p->tokens[p->mark];
663
664 for (Memo *m = t->memo; m != NULL; m = m->next) {
665 if (m->type == type) {
666 if (0 <= type && type < NSTATISTICS) {
667 long count = m->mark - p->mark;
668 // A memoized negative result counts for one.
669 if (count <= 0) {
670 count = 1;
671 }
672 memo_statistics[type] += count;
673 }
674 p->mark = m->mark;
675 *(void **)(pres) = m->node;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100676 return 1;
677 }
678 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100679 return 0;
680}
681
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100682
683int
684_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
685{
686 int mark = p->mark;
687 void *res = func(p);
688 p->mark = mark;
689 return (res != NULL) == positive;
690}
691
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100692int
693_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
694 const char *arg)
695{
696 int mark = p->mark;
697 void *res = func(p, arg);
698 p->mark = mark;
699 return (res != NULL) == positive;
700}
701
702int
703_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
704{
705 int mark = p->mark;
706 void *res = func(p, arg);
707 p->mark = mark;
708 return (res != NULL) == positive;
709}
710
711int
712_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
713{
714 int mark = p->mark;
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100715 void *res = (void*)func(p);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100716 p->mark = mark;
717 return (res != NULL) == positive;
718}
719
720Token *
721_PyPegen_expect_token(Parser *p, int type)
722{
723 if (p->mark == p->fill) {
724 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300725 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100726 return NULL;
727 }
728 }
729 Token *t = p->tokens[p->mark];
730 if (t->type != type) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100731 return NULL;
732 }
733 p->mark += 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100734 return t;
735}
736
737Token *
738_PyPegen_get_last_nonnwhitespace_token(Parser *p)
739{
740 assert(p->mark >= 0);
741 Token *token = NULL;
742 for (int m = p->mark - 1; m >= 0; m--) {
743 token = p->tokens[m];
744 if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
745 break;
746 }
747 }
748 return token;
749}
750
751void *
752_PyPegen_async_token(Parser *p)
753{
754 return _PyPegen_expect_token(p, ASYNC);
755}
756
757void *
758_PyPegen_await_token(Parser *p)
759{
760 return _PyPegen_expect_token(p, AWAIT);
761}
762
763void *
764_PyPegen_endmarker_token(Parser *p)
765{
766 return _PyPegen_expect_token(p, ENDMARKER);
767}
768
769expr_ty
770_PyPegen_name_token(Parser *p)
771{
772 Token *t = _PyPegen_expect_token(p, NAME);
773 if (t == NULL) {
774 return NULL;
775 }
776 char* s = PyBytes_AsString(t->bytes);
777 if (!s) {
778 return NULL;
779 }
780 PyObject *id = _PyPegen_new_identifier(p, s);
781 if (id == NULL) {
782 return NULL;
783 }
784 return Name(id, Load, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
785 p->arena);
786}
787
788void *
789_PyPegen_string_token(Parser *p)
790{
791 return _PyPegen_expect_token(p, STRING);
792}
793
794void *
795_PyPegen_newline_token(Parser *p)
796{
797 return _PyPegen_expect_token(p, NEWLINE);
798}
799
800void *
801_PyPegen_indent_token(Parser *p)
802{
803 return _PyPegen_expect_token(p, INDENT);
804}
805
806void *
807_PyPegen_dedent_token(Parser *p)
808{
809 return _PyPegen_expect_token(p, DEDENT);
810}
811
812static PyObject *
813parsenumber_raw(const char *s)
814{
815 const char *end;
816 long x;
817 double dx;
818 Py_complex compl;
819 int imflag;
820
821 assert(s != NULL);
822 errno = 0;
823 end = s + strlen(s) - 1;
824 imflag = *end == 'j' || *end == 'J';
825 if (s[0] == '0') {
826 x = (long)PyOS_strtoul(s, (char **)&end, 0);
827 if (x < 0 && errno == 0) {
828 return PyLong_FromString(s, (char **)0, 0);
829 }
830 }
831 else
832 x = PyOS_strtol(s, (char **)&end, 0);
833 if (*end == '\0') {
834 if (errno != 0)
835 return PyLong_FromString(s, (char **)0, 0);
836 return PyLong_FromLong(x);
837 }
838 /* XXX Huge floats may silently fail */
839 if (imflag) {
840 compl.real = 0.;
841 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
842 if (compl.imag == -1.0 && PyErr_Occurred())
843 return NULL;
844 return PyComplex_FromCComplex(compl);
845 }
846 else {
847 dx = PyOS_string_to_double(s, NULL, NULL);
848 if (dx == -1.0 && PyErr_Occurred())
849 return NULL;
850 return PyFloat_FromDouble(dx);
851 }
852}
853
854static PyObject *
855parsenumber(const char *s)
856{
857 char *dup, *end;
858 PyObject *res = NULL;
859
860 assert(s != NULL);
861
862 if (strchr(s, '_') == NULL) {
863 return parsenumber_raw(s);
864 }
865 /* Create a duplicate without underscores. */
866 dup = PyMem_Malloc(strlen(s) + 1);
867 if (dup == NULL) {
868 return PyErr_NoMemory();
869 }
870 end = dup;
871 for (; *s; s++) {
872 if (*s != '_') {
873 *end++ = *s;
874 }
875 }
876 *end = '\0';
877 res = parsenumber_raw(dup);
878 PyMem_Free(dup);
879 return res;
880}
881
882expr_ty
883_PyPegen_number_token(Parser *p)
884{
885 Token *t = _PyPegen_expect_token(p, NUMBER);
886 if (t == NULL) {
887 return NULL;
888 }
889
890 char *num_raw = PyBytes_AsString(t->bytes);
891
892 if (num_raw == NULL) {
893 return NULL;
894 }
895
896 PyObject *c = parsenumber(num_raw);
897
898 if (c == NULL) {
899 return NULL;
900 }
901
902 if (PyArena_AddPyObject(p->arena, c) < 0) {
903 Py_DECREF(c);
904 return NULL;
905 }
906
907 return Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
908 p->arena);
909}
910
911void
912_PyPegen_Parser_Free(Parser *p)
913{
914 Py_XDECREF(p->normalize);
915 for (int i = 0; i < p->size; i++) {
916 PyMem_Free(p->tokens[i]);
917 }
918 PyMem_Free(p->tokens);
919 PyMem_Free(p);
920}
921
Pablo Galindo2b74c832020-04-27 18:02:07 +0100922static int
923compute_parser_flags(PyCompilerFlags *flags)
924{
925 int parser_flags = 0;
926 if (!flags) {
927 return 0;
928 }
929 if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
930 parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
931 }
932 if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
933 parser_flags |= PyPARSE_IGNORE_COOKIE;
934 }
935 if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
936 parser_flags |= PyPARSE_BARRY_AS_BDFL;
937 }
938 if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
939 parser_flags |= PyPARSE_TYPE_COMMENTS;
940 }
941 return parser_flags;
942}
943
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100944Parser *
Pablo Galindo2b74c832020-04-27 18:02:07 +0100945_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
946 int *errcode, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100947{
948 Parser *p = PyMem_Malloc(sizeof(Parser));
949 if (p == NULL) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300950 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100951 }
952 assert(tok != NULL);
953 p->tok = tok;
954 p->keywords = NULL;
955 p->n_keyword_lists = -1;
956 p->tokens = PyMem_Malloc(sizeof(Token *));
957 if (!p->tokens) {
958 PyMem_Free(p);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300959 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100960 }
961 p->tokens[0] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300962 if (!p->tokens) {
963 PyMem_Free(p->tokens);
964 PyMem_Free(p);
965 return (Parser *) PyErr_NoMemory();
966 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100967 memset(p->tokens[0], '\0', sizeof(Token));
968 p->mark = 0;
969 p->fill = 0;
970 p->size = 1;
971
972 p->errcode = errcode;
973 p->arena = arena;
974 p->start_rule = start_rule;
975 p->parsing_started = 0;
976 p->normalize = NULL;
977 p->error_indicator = 0;
978
979 p->starting_lineno = 0;
980 p->starting_col_offset = 0;
Pablo Galindo2b74c832020-04-27 18:02:07 +0100981 p->flags = flags;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100982
983 return p;
984}
985
986void *
987_PyPegen_run_parser(Parser *p)
988{
989 void *res = _PyPegen_parse(p);
990 if (res == NULL) {
991 if (PyErr_Occurred()) {
992 return NULL;
993 }
994 if (p->fill == 0) {
995 RAISE_SYNTAX_ERROR("error at start before reading any input");
996 }
997 else if (p->tok->done == E_EOF) {
998 RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
999 }
1000 else {
1001 if (p->tokens[p->fill-1]->type == INDENT) {
1002 RAISE_INDENTATION_ERROR("unexpected indent");
1003 }
1004 else if (p->tokens[p->fill-1]->type == DEDENT) {
1005 RAISE_INDENTATION_ERROR("unexpected unindent");
1006 }
1007 else {
1008 RAISE_SYNTAX_ERROR("invalid syntax");
1009 }
1010 }
1011 return NULL;
1012 }
1013
1014 return res;
1015}
1016
1017mod_ty
1018_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
1019 const char *enc, const char *ps1, const char *ps2,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001020 PyCompilerFlags *flags, int *errcode, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001021{
1022 struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
1023 if (tok == NULL) {
1024 if (PyErr_Occurred()) {
1025 raise_tokenizer_init_error(filename_ob);
1026 return NULL;
1027 }
1028 return NULL;
1029 }
1030 // This transfers the ownership to the tokenizer
1031 tok->filename = filename_ob;
1032 Py_INCREF(filename_ob);
1033
1034 // From here on we need to clean up even if there's an error
1035 mod_ty result = NULL;
1036
Pablo Galindo2b74c832020-04-27 18:02:07 +01001037 int parser_flags = compute_parser_flags(flags);
1038 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, errcode, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001039 if (p == NULL) {
1040 goto error;
1041 }
1042
1043 result = _PyPegen_run_parser(p);
1044 _PyPegen_Parser_Free(p);
1045
1046error:
1047 PyTokenizer_Free(tok);
1048 return result;
1049}
1050
1051mod_ty
1052_PyPegen_run_parser_from_file(const char *filename, int start_rule,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001053 PyObject *filename_ob, PyCompilerFlags *flags, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001054{
1055 FILE *fp = fopen(filename, "rb");
1056 if (fp == NULL) {
1057 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
1058 return NULL;
1059 }
1060
1061 mod_ty result = _PyPegen_run_parser_from_file_pointer(fp, start_rule, filename_ob,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001062 NULL, NULL, NULL, flags, NULL, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001063
1064 fclose(fp);
1065 return result;
1066}
1067
1068mod_ty
1069_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001070 PyCompilerFlags *flags, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001071{
1072 int exec_input = start_rule == Py_file_input;
1073
1074 struct tok_state *tok;
Pablo Galindo2b74c832020-04-27 18:02:07 +01001075 if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001076 tok = PyTokenizer_FromUTF8(str, exec_input);
1077 } else {
1078 tok = PyTokenizer_FromString(str, exec_input);
1079 }
1080 if (tok == NULL) {
1081 if (PyErr_Occurred()) {
1082 raise_tokenizer_init_error(filename_ob);
1083 }
1084 return NULL;
1085 }
1086 // This transfers the ownership to the tokenizer
1087 tok->filename = filename_ob;
1088 Py_INCREF(filename_ob);
1089
1090 // We need to clear up from here on
1091 mod_ty result = NULL;
1092
Pablo Galindo2b74c832020-04-27 18:02:07 +01001093 int parser_flags = compute_parser_flags(flags);
1094 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, NULL, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001095 if (p == NULL) {
1096 goto error;
1097 }
1098
1099 result = _PyPegen_run_parser(p);
1100 _PyPegen_Parser_Free(p);
1101
1102error:
1103 PyTokenizer_Free(tok);
1104 return result;
1105}
1106
1107void *
1108_PyPegen_interactive_exit(Parser *p)
1109{
1110 if (p->errcode) {
1111 *(p->errcode) = E_EOF;
1112 }
1113 return NULL;
1114}
1115
1116/* Creates a single-element asdl_seq* that contains a */
1117asdl_seq *
1118_PyPegen_singleton_seq(Parser *p, void *a)
1119{
1120 assert(a != NULL);
1121 asdl_seq *seq = _Py_asdl_seq_new(1, p->arena);
1122 if (!seq) {
1123 return NULL;
1124 }
1125 asdl_seq_SET(seq, 0, a);
1126 return seq;
1127}
1128
1129/* Creates a copy of seq and prepends a to it */
1130asdl_seq *
1131_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
1132{
1133 assert(a != NULL);
1134 if (!seq) {
1135 return _PyPegen_singleton_seq(p, a);
1136 }
1137
1138 asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1139 if (!new_seq) {
1140 return NULL;
1141 }
1142
1143 asdl_seq_SET(new_seq, 0, a);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001144 for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001145 asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i - 1));
1146 }
1147 return new_seq;
1148}
1149
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001150static Py_ssize_t
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001151_get_flattened_seq_size(asdl_seq *seqs)
1152{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001153 Py_ssize_t size = 0;
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001154 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1155 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
1156 size += asdl_seq_LEN(inner_seq);
1157 }
1158 return size;
1159}
1160
1161/* Flattens an asdl_seq* of asdl_seq*s */
1162asdl_seq *
1163_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
1164{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001165 Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001166 assert(flattened_seq_size > 0);
1167
1168 asdl_seq *flattened_seq = _Py_asdl_seq_new(flattened_seq_size, p->arena);
1169 if (!flattened_seq) {
1170 return NULL;
1171 }
1172
1173 int flattened_seq_idx = 0;
1174 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1175 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001176 for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001177 asdl_seq_SET(flattened_seq, flattened_seq_idx++, asdl_seq_GET(inner_seq, j));
1178 }
1179 }
1180 assert(flattened_seq_idx == flattened_seq_size);
1181
1182 return flattened_seq;
1183}
1184
1185/* Creates a new name of the form <first_name>.<second_name> */
1186expr_ty
1187_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
1188{
1189 assert(first_name != NULL && second_name != NULL);
1190 PyObject *first_identifier = first_name->v.Name.id;
1191 PyObject *second_identifier = second_name->v.Name.id;
1192
1193 if (PyUnicode_READY(first_identifier) == -1) {
1194 return NULL;
1195 }
1196 if (PyUnicode_READY(second_identifier) == -1) {
1197 return NULL;
1198 }
1199 const char *first_str = PyUnicode_AsUTF8(first_identifier);
1200 if (!first_str) {
1201 return NULL;
1202 }
1203 const char *second_str = PyUnicode_AsUTF8(second_identifier);
1204 if (!second_str) {
1205 return NULL;
1206 }
Pablo Galindo9f27dd32020-04-24 01:13:33 +01001207 Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001208
1209 PyObject *str = PyBytes_FromStringAndSize(NULL, len);
1210 if (!str) {
1211 return NULL;
1212 }
1213
1214 char *s = PyBytes_AS_STRING(str);
1215 if (!s) {
1216 return NULL;
1217 }
1218
1219 strcpy(s, first_str);
1220 s += strlen(first_str);
1221 *s++ = '.';
1222 strcpy(s, second_str);
1223 s += strlen(second_str);
1224 *s = '\0';
1225
1226 PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
1227 Py_DECREF(str);
1228 if (!uni) {
1229 return NULL;
1230 }
1231 PyUnicode_InternInPlace(&uni);
1232 if (PyArena_AddPyObject(p->arena, uni) < 0) {
1233 Py_DECREF(uni);
1234 return NULL;
1235 }
1236
1237 return _Py_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
1238}
1239
1240/* Counts the total number of dots in seq's tokens */
1241int
1242_PyPegen_seq_count_dots(asdl_seq *seq)
1243{
1244 int number_of_dots = 0;
1245 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1246 Token *current_expr = asdl_seq_GET(seq, i);
1247 switch (current_expr->type) {
1248 case ELLIPSIS:
1249 number_of_dots += 3;
1250 break;
1251 case DOT:
1252 number_of_dots += 1;
1253 break;
1254 default:
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001255 Py_UNREACHABLE();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001256 }
1257 }
1258
1259 return number_of_dots;
1260}
1261
1262/* Creates an alias with '*' as the identifier name */
1263alias_ty
1264_PyPegen_alias_for_star(Parser *p)
1265{
1266 PyObject *str = PyUnicode_InternFromString("*");
1267 if (!str) {
1268 return NULL;
1269 }
1270 if (PyArena_AddPyObject(p->arena, str) < 0) {
1271 Py_DECREF(str);
1272 return NULL;
1273 }
1274 return alias(str, NULL, p->arena);
1275}
1276
1277/* Creates a new asdl_seq* with the identifiers of all the names in seq */
1278asdl_seq *
1279_PyPegen_map_names_to_ids(Parser *p, asdl_seq *seq)
1280{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001281 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001282 assert(len > 0);
1283
1284 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1285 if (!new_seq) {
1286 return NULL;
1287 }
1288 for (Py_ssize_t i = 0; i < len; i++) {
1289 expr_ty e = asdl_seq_GET(seq, i);
1290 asdl_seq_SET(new_seq, i, e->v.Name.id);
1291 }
1292 return new_seq;
1293}
1294
1295/* Constructs a CmpopExprPair */
1296CmpopExprPair *
1297_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
1298{
1299 assert(expr != NULL);
1300 CmpopExprPair *a = PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
1301 if (!a) {
1302 return NULL;
1303 }
1304 a->cmpop = cmpop;
1305 a->expr = expr;
1306 return a;
1307}
1308
1309asdl_int_seq *
1310_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
1311{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001312 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001313 assert(len > 0);
1314
1315 asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
1316 if (!new_seq) {
1317 return NULL;
1318 }
1319 for (Py_ssize_t i = 0; i < len; i++) {
1320 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1321 asdl_seq_SET(new_seq, i, pair->cmpop);
1322 }
1323 return new_seq;
1324}
1325
1326asdl_seq *
1327_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
1328{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001329 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001330 assert(len > 0);
1331
1332 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1333 if (!new_seq) {
1334 return NULL;
1335 }
1336 for (Py_ssize_t i = 0; i < len; i++) {
1337 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1338 asdl_seq_SET(new_seq, i, pair->expr);
1339 }
1340 return new_seq;
1341}
1342
1343/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
1344static asdl_seq *
1345_set_seq_context(Parser *p, asdl_seq *seq, expr_context_ty ctx)
1346{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001347 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001348 if (len == 0) {
1349 return NULL;
1350 }
1351
1352 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1353 if (!new_seq) {
1354 return NULL;
1355 }
1356 for (Py_ssize_t i = 0; i < len; i++) {
1357 expr_ty e = asdl_seq_GET(seq, i);
1358 asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
1359 }
1360 return new_seq;
1361}
1362
1363static expr_ty
1364_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
1365{
1366 return _Py_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
1367}
1368
1369static expr_ty
1370_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
1371{
1372 return _Py_Tuple(_set_seq_context(p, e->v.Tuple.elts, ctx), ctx, EXTRA_EXPR(e, e));
1373}
1374
1375static expr_ty
1376_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
1377{
1378 return _Py_List(_set_seq_context(p, e->v.List.elts, ctx), ctx, EXTRA_EXPR(e, e));
1379}
1380
1381static expr_ty
1382_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
1383{
1384 return _Py_Subscript(e->v.Subscript.value, e->v.Subscript.slice, ctx, EXTRA_EXPR(e, e));
1385}
1386
1387static expr_ty
1388_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
1389{
1390 return _Py_Attribute(e->v.Attribute.value, e->v.Attribute.attr, ctx, EXTRA_EXPR(e, e));
1391}
1392
1393static expr_ty
1394_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
1395{
1396 return _Py_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), ctx, EXTRA_EXPR(e, e));
1397}
1398
1399/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
1400expr_ty
1401_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
1402{
1403 assert(expr != NULL);
1404
1405 expr_ty new = NULL;
1406 switch (expr->kind) {
1407 case Name_kind:
1408 new = _set_name_context(p, expr, ctx);
1409 break;
1410 case Tuple_kind:
1411 new = _set_tuple_context(p, expr, ctx);
1412 break;
1413 case List_kind:
1414 new = _set_list_context(p, expr, ctx);
1415 break;
1416 case Subscript_kind:
1417 new = _set_subscript_context(p, expr, ctx);
1418 break;
1419 case Attribute_kind:
1420 new = _set_attribute_context(p, expr, ctx);
1421 break;
1422 case Starred_kind:
1423 new = _set_starred_context(p, expr, ctx);
1424 break;
1425 default:
1426 new = expr;
1427 }
1428 return new;
1429}
1430
1431/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
1432KeyValuePair *
1433_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
1434{
1435 KeyValuePair *a = PyArena_Malloc(p->arena, sizeof(KeyValuePair));
1436 if (!a) {
1437 return NULL;
1438 }
1439 a->key = key;
1440 a->value = value;
1441 return a;
1442}
1443
1444/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
1445asdl_seq *
1446_PyPegen_get_keys(Parser *p, asdl_seq *seq)
1447{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001448 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001449 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1450 if (!new_seq) {
1451 return NULL;
1452 }
1453 for (Py_ssize_t i = 0; i < len; i++) {
1454 KeyValuePair *pair = asdl_seq_GET(seq, i);
1455 asdl_seq_SET(new_seq, i, pair->key);
1456 }
1457 return new_seq;
1458}
1459
1460/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
1461asdl_seq *
1462_PyPegen_get_values(Parser *p, asdl_seq *seq)
1463{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001464 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001465 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1466 if (!new_seq) {
1467 return NULL;
1468 }
1469 for (Py_ssize_t i = 0; i < len; i++) {
1470 KeyValuePair *pair = asdl_seq_GET(seq, i);
1471 asdl_seq_SET(new_seq, i, pair->value);
1472 }
1473 return new_seq;
1474}
1475
1476/* Constructs a NameDefaultPair */
1477NameDefaultPair *
1478_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value)
1479{
1480 NameDefaultPair *a = PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
1481 if (!a) {
1482 return NULL;
1483 }
1484 a->arg = arg;
1485 a->value = value;
1486 return a;
1487}
1488
1489/* Constructs a SlashWithDefault */
1490SlashWithDefault *
1491_PyPegen_slash_with_default(Parser *p, asdl_seq *plain_names, asdl_seq *names_with_defaults)
1492{
1493 SlashWithDefault *a = PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
1494 if (!a) {
1495 return NULL;
1496 }
1497 a->plain_names = plain_names;
1498 a->names_with_defaults = names_with_defaults;
1499 return a;
1500}
1501
1502/* Constructs a StarEtc */
1503StarEtc *
1504_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
1505{
1506 StarEtc *a = PyArena_Malloc(p->arena, sizeof(StarEtc));
1507 if (!a) {
1508 return NULL;
1509 }
1510 a->vararg = vararg;
1511 a->kwonlyargs = kwonlyargs;
1512 a->kwarg = kwarg;
1513 return a;
1514}
1515
1516asdl_seq *
1517_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
1518{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001519 Py_ssize_t first_len = asdl_seq_LEN(a);
1520 Py_ssize_t second_len = asdl_seq_LEN(b);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001521 asdl_seq *new_seq = _Py_asdl_seq_new(first_len + second_len, p->arena);
1522 if (!new_seq) {
1523 return NULL;
1524 }
1525
1526 int k = 0;
1527 for (Py_ssize_t i = 0; i < first_len; i++) {
1528 asdl_seq_SET(new_seq, k++, asdl_seq_GET(a, i));
1529 }
1530 for (Py_ssize_t i = 0; i < second_len; i++) {
1531 asdl_seq_SET(new_seq, k++, asdl_seq_GET(b, i));
1532 }
1533
1534 return new_seq;
1535}
1536
1537static asdl_seq *
1538_get_names(Parser *p, asdl_seq *names_with_defaults)
1539{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001540 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001541 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1542 if (!seq) {
1543 return NULL;
1544 }
1545 for (Py_ssize_t i = 0; i < len; i++) {
1546 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1547 asdl_seq_SET(seq, i, pair->arg);
1548 }
1549 return seq;
1550}
1551
1552static asdl_seq *
1553_get_defaults(Parser *p, asdl_seq *names_with_defaults)
1554{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001555 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001556 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1557 if (!seq) {
1558 return NULL;
1559 }
1560 for (Py_ssize_t i = 0; i < len; i++) {
1561 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1562 asdl_seq_SET(seq, i, pair->value);
1563 }
1564 return seq;
1565}
1566
1567/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
1568arguments_ty
1569_PyPegen_make_arguments(Parser *p, asdl_seq *slash_without_default,
1570 SlashWithDefault *slash_with_default, asdl_seq *plain_names,
1571 asdl_seq *names_with_default, StarEtc *star_etc)
1572{
1573 asdl_seq *posonlyargs;
1574 if (slash_without_default != NULL) {
1575 posonlyargs = slash_without_default;
1576 }
1577 else if (slash_with_default != NULL) {
1578 asdl_seq *slash_with_default_names =
1579 _get_names(p, slash_with_default->names_with_defaults);
1580 if (!slash_with_default_names) {
1581 return NULL;
1582 }
1583 posonlyargs = _PyPegen_join_sequences(p, slash_with_default->plain_names, slash_with_default_names);
1584 if (!posonlyargs) {
1585 return NULL;
1586 }
1587 }
1588 else {
1589 posonlyargs = _Py_asdl_seq_new(0, p->arena);
1590 if (!posonlyargs) {
1591 return NULL;
1592 }
1593 }
1594
1595 asdl_seq *posargs;
1596 if (plain_names != NULL && names_with_default != NULL) {
1597 asdl_seq *names_with_default_names = _get_names(p, names_with_default);
1598 if (!names_with_default_names) {
1599 return NULL;
1600 }
1601 posargs = _PyPegen_join_sequences(p, plain_names, names_with_default_names);
1602 if (!posargs) {
1603 return NULL;
1604 }
1605 }
1606 else if (plain_names == NULL && names_with_default != NULL) {
1607 posargs = _get_names(p, names_with_default);
1608 if (!posargs) {
1609 return NULL;
1610 }
1611 }
1612 else if (plain_names != NULL && names_with_default == NULL) {
1613 posargs = plain_names;
1614 }
1615 else {
1616 posargs = _Py_asdl_seq_new(0, p->arena);
1617 if (!posargs) {
1618 return NULL;
1619 }
1620 }
1621
1622 asdl_seq *posdefaults;
1623 if (slash_with_default != NULL && names_with_default != NULL) {
1624 asdl_seq *slash_with_default_values =
1625 _get_defaults(p, slash_with_default->names_with_defaults);
1626 if (!slash_with_default_values) {
1627 return NULL;
1628 }
1629 asdl_seq *names_with_default_values = _get_defaults(p, names_with_default);
1630 if (!names_with_default_values) {
1631 return NULL;
1632 }
1633 posdefaults = _PyPegen_join_sequences(p, slash_with_default_values, names_with_default_values);
1634 if (!posdefaults) {
1635 return NULL;
1636 }
1637 }
1638 else if (slash_with_default == NULL && names_with_default != NULL) {
1639 posdefaults = _get_defaults(p, names_with_default);
1640 if (!posdefaults) {
1641 return NULL;
1642 }
1643 }
1644 else if (slash_with_default != NULL && names_with_default == NULL) {
1645 posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
1646 if (!posdefaults) {
1647 return NULL;
1648 }
1649 }
1650 else {
1651 posdefaults = _Py_asdl_seq_new(0, p->arena);
1652 if (!posdefaults) {
1653 return NULL;
1654 }
1655 }
1656
1657 arg_ty vararg = NULL;
1658 if (star_etc != NULL && star_etc->vararg != NULL) {
1659 vararg = star_etc->vararg;
1660 }
1661
1662 asdl_seq *kwonlyargs;
1663 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1664 kwonlyargs = _get_names(p, star_etc->kwonlyargs);
1665 if (!kwonlyargs) {
1666 return NULL;
1667 }
1668 }
1669 else {
1670 kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1671 if (!kwonlyargs) {
1672 return NULL;
1673 }
1674 }
1675
1676 asdl_seq *kwdefaults;
1677 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1678 kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
1679 if (!kwdefaults) {
1680 return NULL;
1681 }
1682 }
1683 else {
1684 kwdefaults = _Py_asdl_seq_new(0, p->arena);
1685 if (!kwdefaults) {
1686 return NULL;
1687 }
1688 }
1689
1690 arg_ty kwarg = NULL;
1691 if (star_etc != NULL && star_etc->kwarg != NULL) {
1692 kwarg = star_etc->kwarg;
1693 }
1694
1695 return _Py_arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg,
1696 posdefaults, p->arena);
1697}
1698
1699/* Constructs an empty arguments_ty object, that gets used when a function accepts no
1700 * arguments. */
1701arguments_ty
1702_PyPegen_empty_arguments(Parser *p)
1703{
1704 asdl_seq *posonlyargs = _Py_asdl_seq_new(0, p->arena);
1705 if (!posonlyargs) {
1706 return NULL;
1707 }
1708 asdl_seq *posargs = _Py_asdl_seq_new(0, p->arena);
1709 if (!posargs) {
1710 return NULL;
1711 }
1712 asdl_seq *posdefaults = _Py_asdl_seq_new(0, p->arena);
1713 if (!posdefaults) {
1714 return NULL;
1715 }
1716 asdl_seq *kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1717 if (!kwonlyargs) {
1718 return NULL;
1719 }
1720 asdl_seq *kwdefaults = _Py_asdl_seq_new(0, p->arena);
1721 if (!kwdefaults) {
1722 return NULL;
1723 }
1724
1725 return _Py_arguments(posonlyargs, posargs, NULL, kwonlyargs, kwdefaults, NULL, kwdefaults,
1726 p->arena);
1727}
1728
1729/* Encapsulates the value of an operator_ty into an AugOperator struct */
1730AugOperator *
1731_PyPegen_augoperator(Parser *p, operator_ty kind)
1732{
1733 AugOperator *a = PyArena_Malloc(p->arena, sizeof(AugOperator));
1734 if (!a) {
1735 return NULL;
1736 }
1737 a->kind = kind;
1738 return a;
1739}
1740
1741/* Construct a FunctionDef equivalent to function_def, but with decorators */
1742stmt_ty
1743_PyPegen_function_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty function_def)
1744{
1745 assert(function_def != NULL);
1746 if (function_def->kind == AsyncFunctionDef_kind) {
1747 return _Py_AsyncFunctionDef(
1748 function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1749 function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
1750 function_def->v.FunctionDef.type_comment, function_def->lineno,
1751 function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
1752 p->arena);
1753 }
1754
1755 return _Py_FunctionDef(function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1756 function_def->v.FunctionDef.body, decorators,
1757 function_def->v.FunctionDef.returns,
1758 function_def->v.FunctionDef.type_comment, function_def->lineno,
1759 function_def->col_offset, function_def->end_lineno,
1760 function_def->end_col_offset, p->arena);
1761}
1762
1763/* Construct a ClassDef equivalent to class_def, but with decorators */
1764stmt_ty
1765_PyPegen_class_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty class_def)
1766{
1767 assert(class_def != NULL);
1768 return _Py_ClassDef(class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
1769 class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
1770 class_def->lineno, class_def->col_offset, class_def->end_lineno,
1771 class_def->end_col_offset, p->arena);
1772}
1773
1774/* Construct a KeywordOrStarred */
1775KeywordOrStarred *
1776_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
1777{
1778 KeywordOrStarred *a = PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
1779 if (!a) {
1780 return NULL;
1781 }
1782 a->element = element;
1783 a->is_keyword = is_keyword;
1784 return a;
1785}
1786
1787/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
1788static int
1789_seq_number_of_starred_exprs(asdl_seq *seq)
1790{
1791 int n = 0;
1792 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1793 KeywordOrStarred *k = asdl_seq_GET(seq, i);
1794 if (!k->is_keyword) {
1795 n++;
1796 }
1797 }
1798 return n;
1799}
1800
1801/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
1802asdl_seq *
1803_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
1804{
1805 int new_len = _seq_number_of_starred_exprs(kwargs);
1806 if (new_len == 0) {
1807 return NULL;
1808 }
1809 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1810 if (!new_seq) {
1811 return NULL;
1812 }
1813
1814 int idx = 0;
1815 for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
1816 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1817 if (!k->is_keyword) {
1818 asdl_seq_SET(new_seq, idx++, k->element);
1819 }
1820 }
1821 return new_seq;
1822}
1823
1824/* Return a new asdl_seq* with only the keywords in kwargs */
1825asdl_seq *
1826_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
1827{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001828 Py_ssize_t len = asdl_seq_LEN(kwargs);
1829 Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001830 if (new_len == 0) {
1831 return NULL;
1832 }
1833 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1834 if (!new_seq) {
1835 return NULL;
1836 }
1837
1838 int idx = 0;
1839 for (Py_ssize_t i = 0; i < len; i++) {
1840 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1841 if (k->is_keyword) {
1842 asdl_seq_SET(new_seq, idx++, k->element);
1843 }
1844 }
1845 return new_seq;
1846}
1847
1848expr_ty
1849_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
1850{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001851 Py_ssize_t len = asdl_seq_LEN(strings);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001852 assert(len > 0);
1853
1854 Token *first = asdl_seq_GET(strings, 0);
1855 Token *last = asdl_seq_GET(strings, len - 1);
1856
1857 int bytesmode = 0;
1858 PyObject *bytes_str = NULL;
1859
1860 FstringParser state;
1861 _PyPegen_FstringParser_Init(&state);
1862
1863 for (Py_ssize_t i = 0; i < len; i++) {
1864 Token *t = asdl_seq_GET(strings, i);
1865
1866 int this_bytesmode;
1867 int this_rawmode;
1868 PyObject *s;
1869 const char *fstr;
1870 Py_ssize_t fstrlen = -1;
1871
1872 char *this_str = PyBytes_AsString(t->bytes);
1873 if (!this_str) {
1874 goto error;
1875 }
1876
1877 if (_PyPegen_parsestr(p, this_str, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen) != 0) {
1878 goto error;
1879 }
1880
1881 /* Check that we are not mixing bytes with unicode. */
1882 if (i != 0 && bytesmode != this_bytesmode) {
1883 RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1884 Py_XDECREF(s);
1885 goto error;
1886 }
1887 bytesmode = this_bytesmode;
1888
1889 if (fstr != NULL) {
1890 assert(s == NULL && !bytesmode);
1891
1892 int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
1893 this_rawmode, 0, first, t, last);
1894 if (result < 0) {
1895 goto error;
1896 }
1897 }
1898 else {
1899 /* String or byte string. */
1900 assert(s != NULL && fstr == NULL);
1901 assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
1902
1903 if (bytesmode) {
1904 if (i == 0) {
1905 bytes_str = s;
1906 }
1907 else {
1908 PyBytes_ConcatAndDel(&bytes_str, s);
1909 if (!bytes_str) {
1910 goto error;
1911 }
1912 }
1913 }
1914 else {
1915 /* This is a regular string. Concatenate it. */
1916 if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
1917 goto error;
1918 }
1919 }
1920 }
1921 }
1922
1923 if (bytesmode) {
1924 if (PyArena_AddPyObject(p->arena, bytes_str) < 0) {
1925 goto error;
1926 }
1927 return Constant(bytes_str, NULL, first->lineno, first->col_offset, last->end_lineno,
1928 last->end_col_offset, p->arena);
1929 }
1930
1931 return _PyPegen_FstringParser_Finish(p, &state, first, last);
1932
1933error:
1934 Py_XDECREF(bytes_str);
1935 _PyPegen_FstringParser_Dealloc(&state);
1936 if (PyErr_Occurred()) {
1937 raise_decode_error(p);
1938 }
1939 return NULL;
1940}