blob: c8f5c95b473e29b4c42b89845f2b2fd22dec329b [file] [log] [blame]
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001#include <Python.h>
2#include <errcode.h>
3#include "../tokenizer.h"
4
5#include "pegen.h"
6#include "parse_string.h"
7
8static int
9init_normalization(Parser *p)
10{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030011 if (p->normalize) {
12 return 1;
13 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +010014 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
15 if (!m)
16 {
17 return 0;
18 }
19 p->normalize = PyObject_GetAttrString(m, "normalize");
20 Py_DECREF(m);
21 if (!p->normalize)
22 {
23 return 0;
24 }
25 return 1;
26}
27
28PyObject *
29_PyPegen_new_identifier(Parser *p, char *n)
30{
31 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
32 if (!id) {
33 goto error;
34 }
35 /* PyUnicode_DecodeUTF8 should always return a ready string. */
36 assert(PyUnicode_IS_READY(id));
37 /* Check whether there are non-ASCII characters in the
38 identifier; if so, normalize to NFKC. */
39 if (!PyUnicode_IS_ASCII(id))
40 {
41 PyObject *id2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030042 if (!init_normalization(p))
Pablo Galindoc5fc1562020-04-22 23:29:27 +010043 {
44 Py_DECREF(id);
45 goto error;
46 }
47 PyObject *form = PyUnicode_InternFromString("NFKC");
48 if (form == NULL)
49 {
50 Py_DECREF(id);
51 goto error;
52 }
53 PyObject *args[2] = {form, id};
54 id2 = _PyObject_FastCall(p->normalize, args, 2);
55 Py_DECREF(id);
56 Py_DECREF(form);
57 if (!id2) {
58 goto error;
59 }
60 if (!PyUnicode_Check(id2))
61 {
62 PyErr_Format(PyExc_TypeError,
63 "unicodedata.normalize() must return a string, not "
64 "%.200s",
65 _PyType_Name(Py_TYPE(id2)));
66 Py_DECREF(id2);
67 goto error;
68 }
69 id = id2;
70 }
71 PyUnicode_InternInPlace(&id);
72 if (PyArena_AddPyObject(p->arena, id) < 0)
73 {
74 Py_DECREF(id);
75 goto error;
76 }
77 return id;
78
79error:
80 p->error_indicator = 1;
81 return NULL;
82}
83
84static PyObject *
85_create_dummy_identifier(Parser *p)
86{
87 return _PyPegen_new_identifier(p, "");
88}
89
90static inline Py_ssize_t
91byte_offset_to_character_offset(PyObject *line, int col_offset)
92{
93 const char *str = PyUnicode_AsUTF8(line);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030094 if (!str) {
95 return 0;
96 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +010097 PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
98 if (!text) {
99 return 0;
100 }
101 Py_ssize_t size = PyUnicode_GET_LENGTH(text);
102 Py_DECREF(text);
103 return size;
104}
105
106const char *
107_PyPegen_get_expr_name(expr_ty e)
108{
109 switch (e->kind) {
110 case Attribute_kind:
111 return "attribute";
112 case Subscript_kind:
113 return "subscript";
114 case Starred_kind:
115 return "starred";
116 case Name_kind:
117 return "name";
118 case List_kind:
119 return "list";
120 case Tuple_kind:
121 return "tuple";
122 case Lambda_kind:
123 return "lambda";
124 case Call_kind:
125 return "function call";
126 case BoolOp_kind:
127 case BinOp_kind:
128 case UnaryOp_kind:
129 return "operator";
130 case GeneratorExp_kind:
131 return "generator expression";
132 case Yield_kind:
133 case YieldFrom_kind:
134 return "yield expression";
135 case Await_kind:
136 return "await expression";
137 case ListComp_kind:
138 return "list comprehension";
139 case SetComp_kind:
140 return "set comprehension";
141 case DictComp_kind:
142 return "dict comprehension";
143 case Dict_kind:
144 return "dict display";
145 case Set_kind:
146 return "set display";
147 case JoinedStr_kind:
148 case FormattedValue_kind:
149 return "f-string expression";
150 case Constant_kind: {
151 PyObject *value = e->v.Constant.value;
152 if (value == Py_None) {
153 return "None";
154 }
155 if (value == Py_False) {
156 return "False";
157 }
158 if (value == Py_True) {
159 return "True";
160 }
161 if (value == Py_Ellipsis) {
162 return "Ellipsis";
163 }
164 return "literal";
165 }
166 case Compare_kind:
167 return "comparison";
168 case IfExp_kind:
169 return "conditional expression";
170 case NamedExpr_kind:
171 return "named expression";
172 default:
173 PyErr_Format(PyExc_SystemError,
174 "unexpected expression in assignment %d (line %d)",
175 e->kind, e->lineno);
176 return NULL;
177 }
178}
179
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300180static int
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100181raise_decode_error(Parser *p)
182{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300183 assert(PyErr_Occurred());
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100184 const char *errtype = NULL;
185 if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
186 errtype = "unicode error";
187 }
188 else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
189 errtype = "value error";
190 }
191 if (errtype) {
192 PyObject *type, *value, *tback, *errstr;
193 PyErr_Fetch(&type, &value, &tback);
194 errstr = PyObject_Str(value);
195 if (errstr) {
196 RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
197 Py_DECREF(errstr);
198 }
199 else {
200 PyErr_Clear();
201 RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
202 }
203 Py_XDECREF(type);
204 Py_XDECREF(value);
205 Py_XDECREF(tback);
206 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300207
208 return -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100209}
210
211static void
212raise_tokenizer_init_error(PyObject *filename)
213{
214 if (!(PyErr_ExceptionMatches(PyExc_LookupError)
215 || PyErr_ExceptionMatches(PyExc_ValueError)
216 || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
217 return;
218 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300219 PyObject *errstr = NULL;
220 PyObject *tuple = NULL;
221 PyObject *type, *value, *tback;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100222 PyErr_Fetch(&type, &value, &tback);
223 errstr = PyObject_Str(value);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300224 if (!errstr) {
225 goto error;
226 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100227
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300228 PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100229 if (!tmp) {
230 goto error;
231 }
232
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300233 tuple = PyTuple_Pack(2, errstr, tmp);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100234 Py_DECREF(tmp);
235 if (!value) {
236 goto error;
237 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300238 PyErr_SetObject(PyExc_SyntaxError, tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100239
240error:
241 Py_XDECREF(type);
242 Py_XDECREF(value);
243 Py_XDECREF(tback);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300244 Py_XDECREF(errstr);
245 Py_XDECREF(tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100246}
247
248static inline PyObject *
249get_error_line(char *buffer)
250{
251 char *newline = strchr(buffer, '\n');
252 if (newline) {
253 return PyUnicode_FromStringAndSize(buffer, newline - buffer);
254 }
255 else {
256 return PyUnicode_FromString(buffer);
257 }
258}
259
260static int
261tokenizer_error_with_col_offset(Parser *p, PyObject *errtype, const char *errmsg)
262{
263 PyObject *errstr = NULL;
264 PyObject *value = NULL;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100265 size_t col_number = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100266
267 errstr = PyUnicode_FromString(errmsg);
268 if (!errstr) {
269 return -1;
270 }
271
272 PyObject *loc = NULL;
273 if (p->start_rule == Py_file_input) {
274 loc = PyErr_ProgramTextObject(p->tok->filename, p->tok->lineno);
275 }
276 if (!loc) {
277 loc = get_error_line(p->tok->buf);
278 }
279
280 if (loc) {
281 col_number = p->tok->cur - p->tok->buf;
282 }
283 else {
284 Py_INCREF(Py_None);
285 loc = Py_None;
286 }
287
288 PyObject *tmp = Py_BuildValue("(OiiN)", p->tok->filename, p->tok->lineno,
289 col_number, loc);
290 if (!tmp) {
291 goto error;
292 }
293
294 value = PyTuple_Pack(2, errstr, tmp);
295 Py_DECREF(tmp);
296 if (!value) {
297 goto error;
298 }
299 PyErr_SetObject(errtype, value);
300
301 Py_XDECREF(value);
302 Py_XDECREF(errstr);
303 return -1;
304
305error:
306 Py_XDECREF(errstr);
307 Py_XDECREF(loc);
308 return -1;
309}
310
311static int
312tokenizer_error(Parser *p)
313{
314 if (PyErr_Occurred()) {
315 return -1;
316 }
317
318 const char *msg = NULL;
319 PyObject* errtype = PyExc_SyntaxError;
320 switch (p->tok->done) {
321 case E_TOKEN:
322 msg = "invalid token";
323 break;
324 case E_IDENTIFIER:
325 msg = "invalid character in identifier";
326 break;
327 case E_BADPREFIX:
328 return tokenizer_error_with_col_offset(p,
329 PyExc_SyntaxError, "invalid string prefix");
330 case E_EOFS:
331 return tokenizer_error_with_col_offset(p,
332 PyExc_SyntaxError, "EOF while scanning triple-quoted string literal");
333 case E_EOLS:
334 return tokenizer_error_with_col_offset(p,
335 PyExc_SyntaxError, "EOL while scanning string literal");
336 case E_DEDENT:
337 return tokenizer_error_with_col_offset(p,
338 PyExc_IndentationError, "unindent does not match any outer indentation level");
339 case E_INTR:
340 if (!PyErr_Occurred()) {
341 PyErr_SetNone(PyExc_KeyboardInterrupt);
342 }
343 return -1;
344 case E_NOMEM:
345 PyErr_NoMemory();
346 return -1;
347 case E_TABSPACE:
348 errtype = PyExc_TabError;
349 msg = "inconsistent use of tabs and spaces in indentation";
350 break;
351 case E_TOODEEP:
352 errtype = PyExc_IndentationError;
353 msg = "too many levels of indentation";
354 break;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100355 case E_LINECONT:
356 msg = "unexpected character after line continuation character";
357 break;
358 default:
359 msg = "unknown parsing error";
360 }
361
362 PyErr_Format(errtype, msg);
363 // There is no reliable column information for this error
364 PyErr_SyntaxLocationObject(p->tok->filename, p->tok->lineno, 0);
365
366 return -1;
367}
368
369void *
370_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
371{
372 PyObject *value = NULL;
373 PyObject *errstr = NULL;
374 PyObject *loc = NULL;
375 PyObject *tmp = NULL;
376 Token *t = p->tokens[p->fill - 1];
377 Py_ssize_t col_number = 0;
378 va_list va;
379
380 va_start(va, errmsg);
381 errstr = PyUnicode_FromFormatV(errmsg, va);
382 va_end(va);
383 if (!errstr) {
384 goto error;
385 }
386
387 if (p->start_rule == Py_file_input) {
388 loc = PyErr_ProgramTextObject(p->tok->filename, t->lineno);
389 }
390
391 if (!loc) {
392 loc = get_error_line(p->tok->buf);
393 }
394
395 if (loc) {
396 int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
397 col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
398 }
399 else {
400 Py_INCREF(Py_None);
401 loc = Py_None;
402 }
403
404
405 tmp = Py_BuildValue("(OiiN)", p->tok->filename, t->lineno, col_number, loc);
406 if (!tmp) {
407 goto error;
408 }
409 value = PyTuple_Pack(2, errstr, tmp);
410 Py_DECREF(tmp);
411 if (!value) {
412 goto error;
413 }
414 PyErr_SetObject(errtype, value);
415
416 Py_DECREF(errstr);
417 Py_DECREF(value);
418 return NULL;
419
420error:
421 Py_XDECREF(errstr);
422 Py_XDECREF(loc);
423 return NULL;
424}
425
426void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
427 int kwarg_unpacking = 0;
428 for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
429 keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
430 if (!keyword->arg) {
431 kwarg_unpacking = 1;
432 }
433 }
434
435 const char *msg = NULL;
436 if (kwarg_unpacking) {
437 msg = "positional argument follows keyword argument unpacking";
438 } else {
439 msg = "positional argument follows keyword argument";
440 }
441
442 return RAISE_SYNTAX_ERROR(msg);
443}
444
445#if 0
446static const char *
447token_name(int type)
448{
449 if (0 <= type && type <= N_TOKENS) {
450 return _PyParser_TokenNames[type];
451 }
452 return "<Huh?>";
453}
454#endif
455
456// Here, mark is the start of the node, while p->mark is the end.
457// If node==NULL, they should be the same.
458int
459_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
460{
461 // Insert in front
462 Memo *m = PyArena_Malloc(p->arena, sizeof(Memo));
463 if (m == NULL) {
464 return -1;
465 }
466 m->type = type;
467 m->node = node;
468 m->mark = p->mark;
469 m->next = p->tokens[mark]->memo;
470 p->tokens[mark]->memo = m;
471 return 0;
472}
473
474// Like _PyPegen_insert_memo(), but updates an existing node if found.
475int
476_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
477{
478 for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
479 if (m->type == type) {
480 // Update existing node.
481 m->node = node;
482 m->mark = p->mark;
483 return 0;
484 }
485 }
486 // Insert new node.
487 return _PyPegen_insert_memo(p, mark, type, node);
488}
489
490// Return dummy NAME.
491void *
492_PyPegen_dummy_name(Parser *p, ...)
493{
494 static void *cache = NULL;
495
496 if (cache != NULL) {
497 return cache;
498 }
499
500 PyObject *id = _create_dummy_identifier(p);
501 if (!id) {
502 return NULL;
503 }
504 cache = Name(id, Load, 1, 0, 1, 0, p->arena);
505 return cache;
506}
507
508static int
509_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
510{
511 if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL) {
512 return NAME;
513 }
514 for (KeywordToken *k = p->keywords[name_len]; k->type != -1; k++) {
515 if (strncmp(k->str, name, name_len) == 0) {
516 return k->type;
517 }
518 }
519 return NAME;
520}
521
522int
523_PyPegen_fill_token(Parser *p)
524{
525 const char *start, *end;
526 int type = PyTokenizer_Get(p->tok, &start, &end);
527 if (type == ERRORTOKEN) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300528 if (p->tok->done == E_DECODE) {
529 return raise_decode_error(p);
530 }
531 else {
532 return tokenizer_error(p);
533 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100534 }
535 if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
536 type = NEWLINE; /* Add an extra newline */
537 p->parsing_started = 0;
538
539 if (p->tok->indent) {
540 p->tok->pendin = -p->tok->indent;
541 p->tok->indent = 0;
542 }
543 }
544 else {
545 p->parsing_started = 1;
546 }
547
548 if (p->fill == p->size) {
549 int newsize = p->size * 2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300550 Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
551 if (new_tokens == NULL) {
552 PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100553 return -1;
554 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300555 else {
556 p->tokens = new_tokens;
557 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100558 for (int i = p->size; i < newsize; i++) {
559 p->tokens[i] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300560 if (p->tokens[i] == NULL) {
561 p->size = i; // Needed, in order to cleanup correctly after parser fails
562 PyErr_NoMemory();
563 return -1;
564 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100565 memset(p->tokens[i], '\0', sizeof(Token));
566 }
567 p->size = newsize;
568 }
569
570 Token *t = p->tokens[p->fill];
571 t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
572 t->bytes = PyBytes_FromStringAndSize(start, end - start);
573 if (t->bytes == NULL) {
574 return -1;
575 }
576 PyArena_AddPyObject(p->arena, t->bytes);
577
578 int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
579 const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100580 size_t end_lineno = p->tok->lineno;
581 size_t col_offset = -1, end_col_offset = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100582 if (start != NULL && start >= line_start) {
583 col_offset = start - line_start;
584 }
585 if (end != NULL && end >= p->tok->line_start) {
586 end_col_offset = end - p->tok->line_start;
587 }
588
589 t->lineno = p->starting_lineno + lineno;
590 t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
591 t->end_lineno = p->starting_lineno + end_lineno;
592 t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
593
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100594 p->fill += 1;
595 return 0;
596}
597
598// Instrumentation to count the effectiveness of memoization.
599// The array counts the number of tokens skipped by memoization,
600// indexed by type.
601
602#define NSTATISTICS 2000
603static long memo_statistics[NSTATISTICS];
604
605void
606_PyPegen_clear_memo_statistics()
607{
608 for (int i = 0; i < NSTATISTICS; i++) {
609 memo_statistics[i] = 0;
610 }
611}
612
613PyObject *
614_PyPegen_get_memo_statistics()
615{
616 PyObject *ret = PyList_New(NSTATISTICS);
617 if (ret == NULL) {
618 return NULL;
619 }
620 for (int i = 0; i < NSTATISTICS; i++) {
621 PyObject *value = PyLong_FromLong(memo_statistics[i]);
622 if (value == NULL) {
623 Py_DECREF(ret);
624 return NULL;
625 }
626 // PyList_SetItem borrows a reference to value.
627 if (PyList_SetItem(ret, i, value) < 0) {
628 Py_DECREF(ret);
629 return NULL;
630 }
631 }
632 return ret;
633}
634
635int // bool
636_PyPegen_is_memoized(Parser *p, int type, void *pres)
637{
638 if (p->mark == p->fill) {
639 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300640 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100641 return -1;
642 }
643 }
644
645 Token *t = p->tokens[p->mark];
646
647 for (Memo *m = t->memo; m != NULL; m = m->next) {
648 if (m->type == type) {
649 if (0 <= type && type < NSTATISTICS) {
650 long count = m->mark - p->mark;
651 // A memoized negative result counts for one.
652 if (count <= 0) {
653 count = 1;
654 }
655 memo_statistics[type] += count;
656 }
657 p->mark = m->mark;
658 *(void **)(pres) = m->node;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100659 return 1;
660 }
661 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100662 return 0;
663}
664
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100665
666int
667_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
668{
669 int mark = p->mark;
670 void *res = func(p);
671 p->mark = mark;
672 return (res != NULL) == positive;
673}
674
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100675int
676_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
677 const char *arg)
678{
679 int mark = p->mark;
680 void *res = func(p, arg);
681 p->mark = mark;
682 return (res != NULL) == positive;
683}
684
685int
686_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
687{
688 int mark = p->mark;
689 void *res = func(p, arg);
690 p->mark = mark;
691 return (res != NULL) == positive;
692}
693
694int
695_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
696{
697 int mark = p->mark;
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100698 void *res = (void*)func(p);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100699 p->mark = mark;
700 return (res != NULL) == positive;
701}
702
703Token *
704_PyPegen_expect_token(Parser *p, int type)
705{
706 if (p->mark == p->fill) {
707 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300708 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100709 return NULL;
710 }
711 }
712 Token *t = p->tokens[p->mark];
713 if (t->type != type) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100714 return NULL;
715 }
716 p->mark += 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100717 return t;
718}
719
720Token *
721_PyPegen_get_last_nonnwhitespace_token(Parser *p)
722{
723 assert(p->mark >= 0);
724 Token *token = NULL;
725 for (int m = p->mark - 1; m >= 0; m--) {
726 token = p->tokens[m];
727 if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
728 break;
729 }
730 }
731 return token;
732}
733
734void *
735_PyPegen_async_token(Parser *p)
736{
737 return _PyPegen_expect_token(p, ASYNC);
738}
739
740void *
741_PyPegen_await_token(Parser *p)
742{
743 return _PyPegen_expect_token(p, AWAIT);
744}
745
746void *
747_PyPegen_endmarker_token(Parser *p)
748{
749 return _PyPegen_expect_token(p, ENDMARKER);
750}
751
752expr_ty
753_PyPegen_name_token(Parser *p)
754{
755 Token *t = _PyPegen_expect_token(p, NAME);
756 if (t == NULL) {
757 return NULL;
758 }
759 char* s = PyBytes_AsString(t->bytes);
760 if (!s) {
761 return NULL;
762 }
763 PyObject *id = _PyPegen_new_identifier(p, s);
764 if (id == NULL) {
765 return NULL;
766 }
767 return Name(id, Load, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
768 p->arena);
769}
770
771void *
772_PyPegen_string_token(Parser *p)
773{
774 return _PyPegen_expect_token(p, STRING);
775}
776
777void *
778_PyPegen_newline_token(Parser *p)
779{
780 return _PyPegen_expect_token(p, NEWLINE);
781}
782
783void *
784_PyPegen_indent_token(Parser *p)
785{
786 return _PyPegen_expect_token(p, INDENT);
787}
788
789void *
790_PyPegen_dedent_token(Parser *p)
791{
792 return _PyPegen_expect_token(p, DEDENT);
793}
794
795static PyObject *
796parsenumber_raw(const char *s)
797{
798 const char *end;
799 long x;
800 double dx;
801 Py_complex compl;
802 int imflag;
803
804 assert(s != NULL);
805 errno = 0;
806 end = s + strlen(s) - 1;
807 imflag = *end == 'j' || *end == 'J';
808 if (s[0] == '0') {
809 x = (long)PyOS_strtoul(s, (char **)&end, 0);
810 if (x < 0 && errno == 0) {
811 return PyLong_FromString(s, (char **)0, 0);
812 }
813 }
814 else
815 x = PyOS_strtol(s, (char **)&end, 0);
816 if (*end == '\0') {
817 if (errno != 0)
818 return PyLong_FromString(s, (char **)0, 0);
819 return PyLong_FromLong(x);
820 }
821 /* XXX Huge floats may silently fail */
822 if (imflag) {
823 compl.real = 0.;
824 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
825 if (compl.imag == -1.0 && PyErr_Occurred())
826 return NULL;
827 return PyComplex_FromCComplex(compl);
828 }
829 else {
830 dx = PyOS_string_to_double(s, NULL, NULL);
831 if (dx == -1.0 && PyErr_Occurred())
832 return NULL;
833 return PyFloat_FromDouble(dx);
834 }
835}
836
837static PyObject *
838parsenumber(const char *s)
839{
840 char *dup, *end;
841 PyObject *res = NULL;
842
843 assert(s != NULL);
844
845 if (strchr(s, '_') == NULL) {
846 return parsenumber_raw(s);
847 }
848 /* Create a duplicate without underscores. */
849 dup = PyMem_Malloc(strlen(s) + 1);
850 if (dup == NULL) {
851 return PyErr_NoMemory();
852 }
853 end = dup;
854 for (; *s; s++) {
855 if (*s != '_') {
856 *end++ = *s;
857 }
858 }
859 *end = '\0';
860 res = parsenumber_raw(dup);
861 PyMem_Free(dup);
862 return res;
863}
864
865expr_ty
866_PyPegen_number_token(Parser *p)
867{
868 Token *t = _PyPegen_expect_token(p, NUMBER);
869 if (t == NULL) {
870 return NULL;
871 }
872
873 char *num_raw = PyBytes_AsString(t->bytes);
874
875 if (num_raw == NULL) {
876 return NULL;
877 }
878
879 PyObject *c = parsenumber(num_raw);
880
881 if (c == NULL) {
882 return NULL;
883 }
884
885 if (PyArena_AddPyObject(p->arena, c) < 0) {
886 Py_DECREF(c);
887 return NULL;
888 }
889
890 return Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
891 p->arena);
892}
893
894void
895_PyPegen_Parser_Free(Parser *p)
896{
897 Py_XDECREF(p->normalize);
898 for (int i = 0; i < p->size; i++) {
899 PyMem_Free(p->tokens[i]);
900 }
901 PyMem_Free(p->tokens);
902 PyMem_Free(p);
903}
904
905Parser *
906_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena *arena)
907{
908 Parser *p = PyMem_Malloc(sizeof(Parser));
909 if (p == NULL) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300910 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100911 }
912 assert(tok != NULL);
913 p->tok = tok;
914 p->keywords = NULL;
915 p->n_keyword_lists = -1;
916 p->tokens = PyMem_Malloc(sizeof(Token *));
917 if (!p->tokens) {
918 PyMem_Free(p);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300919 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100920 }
921 p->tokens[0] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300922 if (!p->tokens) {
923 PyMem_Free(p->tokens);
924 PyMem_Free(p);
925 return (Parser *) PyErr_NoMemory();
926 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100927 memset(p->tokens[0], '\0', sizeof(Token));
928 p->mark = 0;
929 p->fill = 0;
930 p->size = 1;
931
932 p->errcode = errcode;
933 p->arena = arena;
934 p->start_rule = start_rule;
935 p->parsing_started = 0;
936 p->normalize = NULL;
937 p->error_indicator = 0;
938
939 p->starting_lineno = 0;
940 p->starting_col_offset = 0;
941
942 return p;
943}
944
945void *
946_PyPegen_run_parser(Parser *p)
947{
948 void *res = _PyPegen_parse(p);
949 if (res == NULL) {
950 if (PyErr_Occurred()) {
951 return NULL;
952 }
953 if (p->fill == 0) {
954 RAISE_SYNTAX_ERROR("error at start before reading any input");
955 }
956 else if (p->tok->done == E_EOF) {
957 RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
958 }
959 else {
960 if (p->tokens[p->fill-1]->type == INDENT) {
961 RAISE_INDENTATION_ERROR("unexpected indent");
962 }
963 else if (p->tokens[p->fill-1]->type == DEDENT) {
964 RAISE_INDENTATION_ERROR("unexpected unindent");
965 }
966 else {
967 RAISE_SYNTAX_ERROR("invalid syntax");
968 }
969 }
970 return NULL;
971 }
972
973 return res;
974}
975
976mod_ty
977_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
978 const char *enc, const char *ps1, const char *ps2,
979 int *errcode, PyArena *arena)
980{
981 struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
982 if (tok == NULL) {
983 if (PyErr_Occurred()) {
984 raise_tokenizer_init_error(filename_ob);
985 return NULL;
986 }
987 return NULL;
988 }
989 // This transfers the ownership to the tokenizer
990 tok->filename = filename_ob;
991 Py_INCREF(filename_ob);
992
993 // From here on we need to clean up even if there's an error
994 mod_ty result = NULL;
995
996 Parser *p = _PyPegen_Parser_New(tok, start_rule, errcode, arena);
997 if (p == NULL) {
998 goto error;
999 }
1000
1001 result = _PyPegen_run_parser(p);
1002 _PyPegen_Parser_Free(p);
1003
1004error:
1005 PyTokenizer_Free(tok);
1006 return result;
1007}
1008
1009mod_ty
1010_PyPegen_run_parser_from_file(const char *filename, int start_rule,
1011 PyObject *filename_ob, PyArena *arena)
1012{
1013 FILE *fp = fopen(filename, "rb");
1014 if (fp == NULL) {
1015 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
1016 return NULL;
1017 }
1018
1019 mod_ty result = _PyPegen_run_parser_from_file_pointer(fp, start_rule, filename_ob,
1020 NULL, NULL, NULL, NULL, arena);
1021
1022 fclose(fp);
1023 return result;
1024}
1025
1026mod_ty
1027_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
1028 int iflags, PyArena *arena)
1029{
1030 int exec_input = start_rule == Py_file_input;
1031
1032 struct tok_state *tok;
1033 if (iflags & PyCF_IGNORE_COOKIE) {
1034 tok = PyTokenizer_FromUTF8(str, exec_input);
1035 } else {
1036 tok = PyTokenizer_FromString(str, exec_input);
1037 }
1038 if (tok == NULL) {
1039 if (PyErr_Occurred()) {
1040 raise_tokenizer_init_error(filename_ob);
1041 }
1042 return NULL;
1043 }
1044 // This transfers the ownership to the tokenizer
1045 tok->filename = filename_ob;
1046 Py_INCREF(filename_ob);
1047
1048 // We need to clear up from here on
1049 mod_ty result = NULL;
1050
1051 Parser *p = _PyPegen_Parser_New(tok, start_rule, NULL, arena);
1052 if (p == NULL) {
1053 goto error;
1054 }
1055
1056 result = _PyPegen_run_parser(p);
1057 _PyPegen_Parser_Free(p);
1058
1059error:
1060 PyTokenizer_Free(tok);
1061 return result;
1062}
1063
1064void *
1065_PyPegen_interactive_exit(Parser *p)
1066{
1067 if (p->errcode) {
1068 *(p->errcode) = E_EOF;
1069 }
1070 return NULL;
1071}
1072
1073/* Creates a single-element asdl_seq* that contains a */
1074asdl_seq *
1075_PyPegen_singleton_seq(Parser *p, void *a)
1076{
1077 assert(a != NULL);
1078 asdl_seq *seq = _Py_asdl_seq_new(1, p->arena);
1079 if (!seq) {
1080 return NULL;
1081 }
1082 asdl_seq_SET(seq, 0, a);
1083 return seq;
1084}
1085
1086/* Creates a copy of seq and prepends a to it */
1087asdl_seq *
1088_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
1089{
1090 assert(a != NULL);
1091 if (!seq) {
1092 return _PyPegen_singleton_seq(p, a);
1093 }
1094
1095 asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1096 if (!new_seq) {
1097 return NULL;
1098 }
1099
1100 asdl_seq_SET(new_seq, 0, a);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001101 for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001102 asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i - 1));
1103 }
1104 return new_seq;
1105}
1106
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001107static Py_ssize_t
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001108_get_flattened_seq_size(asdl_seq *seqs)
1109{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001110 Py_ssize_t size = 0;
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001111 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1112 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
1113 size += asdl_seq_LEN(inner_seq);
1114 }
1115 return size;
1116}
1117
1118/* Flattens an asdl_seq* of asdl_seq*s */
1119asdl_seq *
1120_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
1121{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001122 Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001123 assert(flattened_seq_size > 0);
1124
1125 asdl_seq *flattened_seq = _Py_asdl_seq_new(flattened_seq_size, p->arena);
1126 if (!flattened_seq) {
1127 return NULL;
1128 }
1129
1130 int flattened_seq_idx = 0;
1131 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1132 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001133 for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001134 asdl_seq_SET(flattened_seq, flattened_seq_idx++, asdl_seq_GET(inner_seq, j));
1135 }
1136 }
1137 assert(flattened_seq_idx == flattened_seq_size);
1138
1139 return flattened_seq;
1140}
1141
1142/* Creates a new name of the form <first_name>.<second_name> */
1143expr_ty
1144_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
1145{
1146 assert(first_name != NULL && second_name != NULL);
1147 PyObject *first_identifier = first_name->v.Name.id;
1148 PyObject *second_identifier = second_name->v.Name.id;
1149
1150 if (PyUnicode_READY(first_identifier) == -1) {
1151 return NULL;
1152 }
1153 if (PyUnicode_READY(second_identifier) == -1) {
1154 return NULL;
1155 }
1156 const char *first_str = PyUnicode_AsUTF8(first_identifier);
1157 if (!first_str) {
1158 return NULL;
1159 }
1160 const char *second_str = PyUnicode_AsUTF8(second_identifier);
1161 if (!second_str) {
1162 return NULL;
1163 }
Pablo Galindo9f27dd32020-04-24 01:13:33 +01001164 Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001165
1166 PyObject *str = PyBytes_FromStringAndSize(NULL, len);
1167 if (!str) {
1168 return NULL;
1169 }
1170
1171 char *s = PyBytes_AS_STRING(str);
1172 if (!s) {
1173 return NULL;
1174 }
1175
1176 strcpy(s, first_str);
1177 s += strlen(first_str);
1178 *s++ = '.';
1179 strcpy(s, second_str);
1180 s += strlen(second_str);
1181 *s = '\0';
1182
1183 PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
1184 Py_DECREF(str);
1185 if (!uni) {
1186 return NULL;
1187 }
1188 PyUnicode_InternInPlace(&uni);
1189 if (PyArena_AddPyObject(p->arena, uni) < 0) {
1190 Py_DECREF(uni);
1191 return NULL;
1192 }
1193
1194 return _Py_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
1195}
1196
1197/* Counts the total number of dots in seq's tokens */
1198int
1199_PyPegen_seq_count_dots(asdl_seq *seq)
1200{
1201 int number_of_dots = 0;
1202 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1203 Token *current_expr = asdl_seq_GET(seq, i);
1204 switch (current_expr->type) {
1205 case ELLIPSIS:
1206 number_of_dots += 3;
1207 break;
1208 case DOT:
1209 number_of_dots += 1;
1210 break;
1211 default:
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001212 Py_UNREACHABLE();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001213 }
1214 }
1215
1216 return number_of_dots;
1217}
1218
1219/* Creates an alias with '*' as the identifier name */
1220alias_ty
1221_PyPegen_alias_for_star(Parser *p)
1222{
1223 PyObject *str = PyUnicode_InternFromString("*");
1224 if (!str) {
1225 return NULL;
1226 }
1227 if (PyArena_AddPyObject(p->arena, str) < 0) {
1228 Py_DECREF(str);
1229 return NULL;
1230 }
1231 return alias(str, NULL, p->arena);
1232}
1233
1234/* Creates a new asdl_seq* with the identifiers of all the names in seq */
1235asdl_seq *
1236_PyPegen_map_names_to_ids(Parser *p, asdl_seq *seq)
1237{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001238 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001239 assert(len > 0);
1240
1241 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1242 if (!new_seq) {
1243 return NULL;
1244 }
1245 for (Py_ssize_t i = 0; i < len; i++) {
1246 expr_ty e = asdl_seq_GET(seq, i);
1247 asdl_seq_SET(new_seq, i, e->v.Name.id);
1248 }
1249 return new_seq;
1250}
1251
1252/* Constructs a CmpopExprPair */
1253CmpopExprPair *
1254_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
1255{
1256 assert(expr != NULL);
1257 CmpopExprPair *a = PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
1258 if (!a) {
1259 return NULL;
1260 }
1261 a->cmpop = cmpop;
1262 a->expr = expr;
1263 return a;
1264}
1265
1266asdl_int_seq *
1267_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
1268{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001269 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001270 assert(len > 0);
1271
1272 asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
1273 if (!new_seq) {
1274 return NULL;
1275 }
1276 for (Py_ssize_t i = 0; i < len; i++) {
1277 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1278 asdl_seq_SET(new_seq, i, pair->cmpop);
1279 }
1280 return new_seq;
1281}
1282
1283asdl_seq *
1284_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
1285{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001286 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001287 assert(len > 0);
1288
1289 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1290 if (!new_seq) {
1291 return NULL;
1292 }
1293 for (Py_ssize_t i = 0; i < len; i++) {
1294 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1295 asdl_seq_SET(new_seq, i, pair->expr);
1296 }
1297 return new_seq;
1298}
1299
1300/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
1301static asdl_seq *
1302_set_seq_context(Parser *p, asdl_seq *seq, expr_context_ty ctx)
1303{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001304 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001305 if (len == 0) {
1306 return NULL;
1307 }
1308
1309 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1310 if (!new_seq) {
1311 return NULL;
1312 }
1313 for (Py_ssize_t i = 0; i < len; i++) {
1314 expr_ty e = asdl_seq_GET(seq, i);
1315 asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
1316 }
1317 return new_seq;
1318}
1319
1320static expr_ty
1321_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
1322{
1323 return _Py_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
1324}
1325
1326static expr_ty
1327_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
1328{
1329 return _Py_Tuple(_set_seq_context(p, e->v.Tuple.elts, ctx), ctx, EXTRA_EXPR(e, e));
1330}
1331
1332static expr_ty
1333_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
1334{
1335 return _Py_List(_set_seq_context(p, e->v.List.elts, ctx), ctx, EXTRA_EXPR(e, e));
1336}
1337
1338static expr_ty
1339_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
1340{
1341 return _Py_Subscript(e->v.Subscript.value, e->v.Subscript.slice, ctx, EXTRA_EXPR(e, e));
1342}
1343
1344static expr_ty
1345_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
1346{
1347 return _Py_Attribute(e->v.Attribute.value, e->v.Attribute.attr, ctx, EXTRA_EXPR(e, e));
1348}
1349
1350static expr_ty
1351_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
1352{
1353 return _Py_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), ctx, EXTRA_EXPR(e, e));
1354}
1355
1356/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
1357expr_ty
1358_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
1359{
1360 assert(expr != NULL);
1361
1362 expr_ty new = NULL;
1363 switch (expr->kind) {
1364 case Name_kind:
1365 new = _set_name_context(p, expr, ctx);
1366 break;
1367 case Tuple_kind:
1368 new = _set_tuple_context(p, expr, ctx);
1369 break;
1370 case List_kind:
1371 new = _set_list_context(p, expr, ctx);
1372 break;
1373 case Subscript_kind:
1374 new = _set_subscript_context(p, expr, ctx);
1375 break;
1376 case Attribute_kind:
1377 new = _set_attribute_context(p, expr, ctx);
1378 break;
1379 case Starred_kind:
1380 new = _set_starred_context(p, expr, ctx);
1381 break;
1382 default:
1383 new = expr;
1384 }
1385 return new;
1386}
1387
1388/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
1389KeyValuePair *
1390_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
1391{
1392 KeyValuePair *a = PyArena_Malloc(p->arena, sizeof(KeyValuePair));
1393 if (!a) {
1394 return NULL;
1395 }
1396 a->key = key;
1397 a->value = value;
1398 return a;
1399}
1400
1401/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
1402asdl_seq *
1403_PyPegen_get_keys(Parser *p, asdl_seq *seq)
1404{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001405 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001406 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1407 if (!new_seq) {
1408 return NULL;
1409 }
1410 for (Py_ssize_t i = 0; i < len; i++) {
1411 KeyValuePair *pair = asdl_seq_GET(seq, i);
1412 asdl_seq_SET(new_seq, i, pair->key);
1413 }
1414 return new_seq;
1415}
1416
1417/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
1418asdl_seq *
1419_PyPegen_get_values(Parser *p, asdl_seq *seq)
1420{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001421 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001422 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1423 if (!new_seq) {
1424 return NULL;
1425 }
1426 for (Py_ssize_t i = 0; i < len; i++) {
1427 KeyValuePair *pair = asdl_seq_GET(seq, i);
1428 asdl_seq_SET(new_seq, i, pair->value);
1429 }
1430 return new_seq;
1431}
1432
1433/* Constructs a NameDefaultPair */
1434NameDefaultPair *
1435_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value)
1436{
1437 NameDefaultPair *a = PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
1438 if (!a) {
1439 return NULL;
1440 }
1441 a->arg = arg;
1442 a->value = value;
1443 return a;
1444}
1445
1446/* Constructs a SlashWithDefault */
1447SlashWithDefault *
1448_PyPegen_slash_with_default(Parser *p, asdl_seq *plain_names, asdl_seq *names_with_defaults)
1449{
1450 SlashWithDefault *a = PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
1451 if (!a) {
1452 return NULL;
1453 }
1454 a->plain_names = plain_names;
1455 a->names_with_defaults = names_with_defaults;
1456 return a;
1457}
1458
1459/* Constructs a StarEtc */
1460StarEtc *
1461_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
1462{
1463 StarEtc *a = PyArena_Malloc(p->arena, sizeof(StarEtc));
1464 if (!a) {
1465 return NULL;
1466 }
1467 a->vararg = vararg;
1468 a->kwonlyargs = kwonlyargs;
1469 a->kwarg = kwarg;
1470 return a;
1471}
1472
1473asdl_seq *
1474_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
1475{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001476 Py_ssize_t first_len = asdl_seq_LEN(a);
1477 Py_ssize_t second_len = asdl_seq_LEN(b);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001478 asdl_seq *new_seq = _Py_asdl_seq_new(first_len + second_len, p->arena);
1479 if (!new_seq) {
1480 return NULL;
1481 }
1482
1483 int k = 0;
1484 for (Py_ssize_t i = 0; i < first_len; i++) {
1485 asdl_seq_SET(new_seq, k++, asdl_seq_GET(a, i));
1486 }
1487 for (Py_ssize_t i = 0; i < second_len; i++) {
1488 asdl_seq_SET(new_seq, k++, asdl_seq_GET(b, i));
1489 }
1490
1491 return new_seq;
1492}
1493
1494static asdl_seq *
1495_get_names(Parser *p, asdl_seq *names_with_defaults)
1496{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001497 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001498 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1499 if (!seq) {
1500 return NULL;
1501 }
1502 for (Py_ssize_t i = 0; i < len; i++) {
1503 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1504 asdl_seq_SET(seq, i, pair->arg);
1505 }
1506 return seq;
1507}
1508
1509static asdl_seq *
1510_get_defaults(Parser *p, asdl_seq *names_with_defaults)
1511{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001512 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001513 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1514 if (!seq) {
1515 return NULL;
1516 }
1517 for (Py_ssize_t i = 0; i < len; i++) {
1518 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1519 asdl_seq_SET(seq, i, pair->value);
1520 }
1521 return seq;
1522}
1523
1524/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
1525arguments_ty
1526_PyPegen_make_arguments(Parser *p, asdl_seq *slash_without_default,
1527 SlashWithDefault *slash_with_default, asdl_seq *plain_names,
1528 asdl_seq *names_with_default, StarEtc *star_etc)
1529{
1530 asdl_seq *posonlyargs;
1531 if (slash_without_default != NULL) {
1532 posonlyargs = slash_without_default;
1533 }
1534 else if (slash_with_default != NULL) {
1535 asdl_seq *slash_with_default_names =
1536 _get_names(p, slash_with_default->names_with_defaults);
1537 if (!slash_with_default_names) {
1538 return NULL;
1539 }
1540 posonlyargs = _PyPegen_join_sequences(p, slash_with_default->plain_names, slash_with_default_names);
1541 if (!posonlyargs) {
1542 return NULL;
1543 }
1544 }
1545 else {
1546 posonlyargs = _Py_asdl_seq_new(0, p->arena);
1547 if (!posonlyargs) {
1548 return NULL;
1549 }
1550 }
1551
1552 asdl_seq *posargs;
1553 if (plain_names != NULL && names_with_default != NULL) {
1554 asdl_seq *names_with_default_names = _get_names(p, names_with_default);
1555 if (!names_with_default_names) {
1556 return NULL;
1557 }
1558 posargs = _PyPegen_join_sequences(p, plain_names, names_with_default_names);
1559 if (!posargs) {
1560 return NULL;
1561 }
1562 }
1563 else if (plain_names == NULL && names_with_default != NULL) {
1564 posargs = _get_names(p, names_with_default);
1565 if (!posargs) {
1566 return NULL;
1567 }
1568 }
1569 else if (plain_names != NULL && names_with_default == NULL) {
1570 posargs = plain_names;
1571 }
1572 else {
1573 posargs = _Py_asdl_seq_new(0, p->arena);
1574 if (!posargs) {
1575 return NULL;
1576 }
1577 }
1578
1579 asdl_seq *posdefaults;
1580 if (slash_with_default != NULL && names_with_default != NULL) {
1581 asdl_seq *slash_with_default_values =
1582 _get_defaults(p, slash_with_default->names_with_defaults);
1583 if (!slash_with_default_values) {
1584 return NULL;
1585 }
1586 asdl_seq *names_with_default_values = _get_defaults(p, names_with_default);
1587 if (!names_with_default_values) {
1588 return NULL;
1589 }
1590 posdefaults = _PyPegen_join_sequences(p, slash_with_default_values, names_with_default_values);
1591 if (!posdefaults) {
1592 return NULL;
1593 }
1594 }
1595 else if (slash_with_default == NULL && names_with_default != NULL) {
1596 posdefaults = _get_defaults(p, names_with_default);
1597 if (!posdefaults) {
1598 return NULL;
1599 }
1600 }
1601 else if (slash_with_default != NULL && names_with_default == NULL) {
1602 posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
1603 if (!posdefaults) {
1604 return NULL;
1605 }
1606 }
1607 else {
1608 posdefaults = _Py_asdl_seq_new(0, p->arena);
1609 if (!posdefaults) {
1610 return NULL;
1611 }
1612 }
1613
1614 arg_ty vararg = NULL;
1615 if (star_etc != NULL && star_etc->vararg != NULL) {
1616 vararg = star_etc->vararg;
1617 }
1618
1619 asdl_seq *kwonlyargs;
1620 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1621 kwonlyargs = _get_names(p, star_etc->kwonlyargs);
1622 if (!kwonlyargs) {
1623 return NULL;
1624 }
1625 }
1626 else {
1627 kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1628 if (!kwonlyargs) {
1629 return NULL;
1630 }
1631 }
1632
1633 asdl_seq *kwdefaults;
1634 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1635 kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
1636 if (!kwdefaults) {
1637 return NULL;
1638 }
1639 }
1640 else {
1641 kwdefaults = _Py_asdl_seq_new(0, p->arena);
1642 if (!kwdefaults) {
1643 return NULL;
1644 }
1645 }
1646
1647 arg_ty kwarg = NULL;
1648 if (star_etc != NULL && star_etc->kwarg != NULL) {
1649 kwarg = star_etc->kwarg;
1650 }
1651
1652 return _Py_arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg,
1653 posdefaults, p->arena);
1654}
1655
1656/* Constructs an empty arguments_ty object, that gets used when a function accepts no
1657 * arguments. */
1658arguments_ty
1659_PyPegen_empty_arguments(Parser *p)
1660{
1661 asdl_seq *posonlyargs = _Py_asdl_seq_new(0, p->arena);
1662 if (!posonlyargs) {
1663 return NULL;
1664 }
1665 asdl_seq *posargs = _Py_asdl_seq_new(0, p->arena);
1666 if (!posargs) {
1667 return NULL;
1668 }
1669 asdl_seq *posdefaults = _Py_asdl_seq_new(0, p->arena);
1670 if (!posdefaults) {
1671 return NULL;
1672 }
1673 asdl_seq *kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1674 if (!kwonlyargs) {
1675 return NULL;
1676 }
1677 asdl_seq *kwdefaults = _Py_asdl_seq_new(0, p->arena);
1678 if (!kwdefaults) {
1679 return NULL;
1680 }
1681
1682 return _Py_arguments(posonlyargs, posargs, NULL, kwonlyargs, kwdefaults, NULL, kwdefaults,
1683 p->arena);
1684}
1685
1686/* Encapsulates the value of an operator_ty into an AugOperator struct */
1687AugOperator *
1688_PyPegen_augoperator(Parser *p, operator_ty kind)
1689{
1690 AugOperator *a = PyArena_Malloc(p->arena, sizeof(AugOperator));
1691 if (!a) {
1692 return NULL;
1693 }
1694 a->kind = kind;
1695 return a;
1696}
1697
1698/* Construct a FunctionDef equivalent to function_def, but with decorators */
1699stmt_ty
1700_PyPegen_function_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty function_def)
1701{
1702 assert(function_def != NULL);
1703 if (function_def->kind == AsyncFunctionDef_kind) {
1704 return _Py_AsyncFunctionDef(
1705 function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1706 function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
1707 function_def->v.FunctionDef.type_comment, function_def->lineno,
1708 function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
1709 p->arena);
1710 }
1711
1712 return _Py_FunctionDef(function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1713 function_def->v.FunctionDef.body, decorators,
1714 function_def->v.FunctionDef.returns,
1715 function_def->v.FunctionDef.type_comment, function_def->lineno,
1716 function_def->col_offset, function_def->end_lineno,
1717 function_def->end_col_offset, p->arena);
1718}
1719
1720/* Construct a ClassDef equivalent to class_def, but with decorators */
1721stmt_ty
1722_PyPegen_class_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty class_def)
1723{
1724 assert(class_def != NULL);
1725 return _Py_ClassDef(class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
1726 class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
1727 class_def->lineno, class_def->col_offset, class_def->end_lineno,
1728 class_def->end_col_offset, p->arena);
1729}
1730
1731/* Construct a KeywordOrStarred */
1732KeywordOrStarred *
1733_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
1734{
1735 KeywordOrStarred *a = PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
1736 if (!a) {
1737 return NULL;
1738 }
1739 a->element = element;
1740 a->is_keyword = is_keyword;
1741 return a;
1742}
1743
1744/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
1745static int
1746_seq_number_of_starred_exprs(asdl_seq *seq)
1747{
1748 int n = 0;
1749 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1750 KeywordOrStarred *k = asdl_seq_GET(seq, i);
1751 if (!k->is_keyword) {
1752 n++;
1753 }
1754 }
1755 return n;
1756}
1757
1758/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
1759asdl_seq *
1760_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
1761{
1762 int new_len = _seq_number_of_starred_exprs(kwargs);
1763 if (new_len == 0) {
1764 return NULL;
1765 }
1766 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1767 if (!new_seq) {
1768 return NULL;
1769 }
1770
1771 int idx = 0;
1772 for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
1773 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1774 if (!k->is_keyword) {
1775 asdl_seq_SET(new_seq, idx++, k->element);
1776 }
1777 }
1778 return new_seq;
1779}
1780
1781/* Return a new asdl_seq* with only the keywords in kwargs */
1782asdl_seq *
1783_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
1784{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001785 Py_ssize_t len = asdl_seq_LEN(kwargs);
1786 Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001787 if (new_len == 0) {
1788 return NULL;
1789 }
1790 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1791 if (!new_seq) {
1792 return NULL;
1793 }
1794
1795 int idx = 0;
1796 for (Py_ssize_t i = 0; i < len; i++) {
1797 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1798 if (k->is_keyword) {
1799 asdl_seq_SET(new_seq, idx++, k->element);
1800 }
1801 }
1802 return new_seq;
1803}
1804
1805expr_ty
1806_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
1807{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001808 Py_ssize_t len = asdl_seq_LEN(strings);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001809 assert(len > 0);
1810
1811 Token *first = asdl_seq_GET(strings, 0);
1812 Token *last = asdl_seq_GET(strings, len - 1);
1813
1814 int bytesmode = 0;
1815 PyObject *bytes_str = NULL;
1816
1817 FstringParser state;
1818 _PyPegen_FstringParser_Init(&state);
1819
1820 for (Py_ssize_t i = 0; i < len; i++) {
1821 Token *t = asdl_seq_GET(strings, i);
1822
1823 int this_bytesmode;
1824 int this_rawmode;
1825 PyObject *s;
1826 const char *fstr;
1827 Py_ssize_t fstrlen = -1;
1828
1829 char *this_str = PyBytes_AsString(t->bytes);
1830 if (!this_str) {
1831 goto error;
1832 }
1833
1834 if (_PyPegen_parsestr(p, this_str, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen) != 0) {
1835 goto error;
1836 }
1837
1838 /* Check that we are not mixing bytes with unicode. */
1839 if (i != 0 && bytesmode != this_bytesmode) {
1840 RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1841 Py_XDECREF(s);
1842 goto error;
1843 }
1844 bytesmode = this_bytesmode;
1845
1846 if (fstr != NULL) {
1847 assert(s == NULL && !bytesmode);
1848
1849 int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
1850 this_rawmode, 0, first, t, last);
1851 if (result < 0) {
1852 goto error;
1853 }
1854 }
1855 else {
1856 /* String or byte string. */
1857 assert(s != NULL && fstr == NULL);
1858 assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
1859
1860 if (bytesmode) {
1861 if (i == 0) {
1862 bytes_str = s;
1863 }
1864 else {
1865 PyBytes_ConcatAndDel(&bytes_str, s);
1866 if (!bytes_str) {
1867 goto error;
1868 }
1869 }
1870 }
1871 else {
1872 /* This is a regular string. Concatenate it. */
1873 if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
1874 goto error;
1875 }
1876 }
1877 }
1878 }
1879
1880 if (bytesmode) {
1881 if (PyArena_AddPyObject(p->arena, bytes_str) < 0) {
1882 goto error;
1883 }
1884 return Constant(bytes_str, NULL, first->lineno, first->col_offset, last->end_lineno,
1885 last->end_col_offset, p->arena);
1886 }
1887
1888 return _PyPegen_FstringParser_Finish(p, &state, first, last);
1889
1890error:
1891 Py_XDECREF(bytes_str);
1892 _PyPegen_FstringParser_Dealloc(&state);
1893 if (PyErr_Occurred()) {
1894 raise_decode_error(p);
1895 }
1896 return NULL;
1897}