blob: 44198ab67b8ee09ed7002e149328ab318f9e8e32 [file] [log] [blame]
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001#include <Python.h>
2#include <errcode.h>
3#include "../tokenizer.h"
4
5#include "pegen.h"
6#include "parse_string.h"
7
8static int
9init_normalization(Parser *p)
10{
11 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
12 if (!m)
13 {
14 return 0;
15 }
16 p->normalize = PyObject_GetAttrString(m, "normalize");
17 Py_DECREF(m);
18 if (!p->normalize)
19 {
20 return 0;
21 }
22 return 1;
23}
24
25PyObject *
26_PyPegen_new_identifier(Parser *p, char *n)
27{
28 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
29 if (!id) {
30 goto error;
31 }
32 /* PyUnicode_DecodeUTF8 should always return a ready string. */
33 assert(PyUnicode_IS_READY(id));
34 /* Check whether there are non-ASCII characters in the
35 identifier; if so, normalize to NFKC. */
36 if (!PyUnicode_IS_ASCII(id))
37 {
38 PyObject *id2;
39 if (!p->normalize && !init_normalization(p))
40 {
41 Py_DECREF(id);
42 goto error;
43 }
44 PyObject *form = PyUnicode_InternFromString("NFKC");
45 if (form == NULL)
46 {
47 Py_DECREF(id);
48 goto error;
49 }
50 PyObject *args[2] = {form, id};
51 id2 = _PyObject_FastCall(p->normalize, args, 2);
52 Py_DECREF(id);
53 Py_DECREF(form);
54 if (!id2) {
55 goto error;
56 }
57 if (!PyUnicode_Check(id2))
58 {
59 PyErr_Format(PyExc_TypeError,
60 "unicodedata.normalize() must return a string, not "
61 "%.200s",
62 _PyType_Name(Py_TYPE(id2)));
63 Py_DECREF(id2);
64 goto error;
65 }
66 id = id2;
67 }
68 PyUnicode_InternInPlace(&id);
69 if (PyArena_AddPyObject(p->arena, id) < 0)
70 {
71 Py_DECREF(id);
72 goto error;
73 }
74 return id;
75
76error:
77 p->error_indicator = 1;
78 return NULL;
79}
80
81static PyObject *
82_create_dummy_identifier(Parser *p)
83{
84 return _PyPegen_new_identifier(p, "");
85}
86
87static inline Py_ssize_t
88byte_offset_to_character_offset(PyObject *line, int col_offset)
89{
90 const char *str = PyUnicode_AsUTF8(line);
91 PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
92 if (!text) {
93 return 0;
94 }
95 Py_ssize_t size = PyUnicode_GET_LENGTH(text);
96 Py_DECREF(text);
97 return size;
98}
99
100const char *
101_PyPegen_get_expr_name(expr_ty e)
102{
103 switch (e->kind) {
104 case Attribute_kind:
105 return "attribute";
106 case Subscript_kind:
107 return "subscript";
108 case Starred_kind:
109 return "starred";
110 case Name_kind:
111 return "name";
112 case List_kind:
113 return "list";
114 case Tuple_kind:
115 return "tuple";
116 case Lambda_kind:
117 return "lambda";
118 case Call_kind:
119 return "function call";
120 case BoolOp_kind:
121 case BinOp_kind:
122 case UnaryOp_kind:
123 return "operator";
124 case GeneratorExp_kind:
125 return "generator expression";
126 case Yield_kind:
127 case YieldFrom_kind:
128 return "yield expression";
129 case Await_kind:
130 return "await expression";
131 case ListComp_kind:
132 return "list comprehension";
133 case SetComp_kind:
134 return "set comprehension";
135 case DictComp_kind:
136 return "dict comprehension";
137 case Dict_kind:
138 return "dict display";
139 case Set_kind:
140 return "set display";
141 case JoinedStr_kind:
142 case FormattedValue_kind:
143 return "f-string expression";
144 case Constant_kind: {
145 PyObject *value = e->v.Constant.value;
146 if (value == Py_None) {
147 return "None";
148 }
149 if (value == Py_False) {
150 return "False";
151 }
152 if (value == Py_True) {
153 return "True";
154 }
155 if (value == Py_Ellipsis) {
156 return "Ellipsis";
157 }
158 return "literal";
159 }
160 case Compare_kind:
161 return "comparison";
162 case IfExp_kind:
163 return "conditional expression";
164 case NamedExpr_kind:
165 return "named expression";
166 default:
167 PyErr_Format(PyExc_SystemError,
168 "unexpected expression in assignment %d (line %d)",
169 e->kind, e->lineno);
170 return NULL;
171 }
172}
173
174static void
175raise_decode_error(Parser *p)
176{
177 const char *errtype = NULL;
178 if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
179 errtype = "unicode error";
180 }
181 else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
182 errtype = "value error";
183 }
184 if (errtype) {
185 PyObject *type, *value, *tback, *errstr;
186 PyErr_Fetch(&type, &value, &tback);
187 errstr = PyObject_Str(value);
188 if (errstr) {
189 RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
190 Py_DECREF(errstr);
191 }
192 else {
193 PyErr_Clear();
194 RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
195 }
196 Py_XDECREF(type);
197 Py_XDECREF(value);
198 Py_XDECREF(tback);
199 }
200}
201
202static void
203raise_tokenizer_init_error(PyObject *filename)
204{
205 if (!(PyErr_ExceptionMatches(PyExc_LookupError)
206 || PyErr_ExceptionMatches(PyExc_ValueError)
207 || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
208 return;
209 }
210 PyObject *type, *value, *tback, *errstr;
211 PyErr_Fetch(&type, &value, &tback);
212 errstr = PyObject_Str(value);
213
214 Py_INCREF(Py_None);
215 PyObject *tmp = Py_BuildValue("(OiiN)", filename, 0, -1, Py_None);
216 if (!tmp) {
217 goto error;
218 }
219
220 value = PyTuple_Pack(2, errstr, tmp);
221 Py_DECREF(tmp);
222 if (!value) {
223 goto error;
224 }
225 PyErr_SetObject(PyExc_SyntaxError, value);
226
227error:
228 Py_XDECREF(type);
229 Py_XDECREF(value);
230 Py_XDECREF(tback);
231}
232
233static inline PyObject *
234get_error_line(char *buffer)
235{
236 char *newline = strchr(buffer, '\n');
237 if (newline) {
238 return PyUnicode_FromStringAndSize(buffer, newline - buffer);
239 }
240 else {
241 return PyUnicode_FromString(buffer);
242 }
243}
244
245static int
246tokenizer_error_with_col_offset(Parser *p, PyObject *errtype, const char *errmsg)
247{
248 PyObject *errstr = NULL;
249 PyObject *value = NULL;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100250 size_t col_number = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100251
252 errstr = PyUnicode_FromString(errmsg);
253 if (!errstr) {
254 return -1;
255 }
256
257 PyObject *loc = NULL;
258 if (p->start_rule == Py_file_input) {
259 loc = PyErr_ProgramTextObject(p->tok->filename, p->tok->lineno);
260 }
261 if (!loc) {
262 loc = get_error_line(p->tok->buf);
263 }
264
265 if (loc) {
266 col_number = p->tok->cur - p->tok->buf;
267 }
268 else {
269 Py_INCREF(Py_None);
270 loc = Py_None;
271 }
272
273 PyObject *tmp = Py_BuildValue("(OiiN)", p->tok->filename, p->tok->lineno,
274 col_number, loc);
275 if (!tmp) {
276 goto error;
277 }
278
279 value = PyTuple_Pack(2, errstr, tmp);
280 Py_DECREF(tmp);
281 if (!value) {
282 goto error;
283 }
284 PyErr_SetObject(errtype, value);
285
286 Py_XDECREF(value);
287 Py_XDECREF(errstr);
288 return -1;
289
290error:
291 Py_XDECREF(errstr);
292 Py_XDECREF(loc);
293 return -1;
294}
295
296static int
297tokenizer_error(Parser *p)
298{
299 if (PyErr_Occurred()) {
300 return -1;
301 }
302
303 const char *msg = NULL;
304 PyObject* errtype = PyExc_SyntaxError;
305 switch (p->tok->done) {
306 case E_TOKEN:
307 msg = "invalid token";
308 break;
309 case E_IDENTIFIER:
310 msg = "invalid character in identifier";
311 break;
312 case E_BADPREFIX:
313 return tokenizer_error_with_col_offset(p,
314 PyExc_SyntaxError, "invalid string prefix");
315 case E_EOFS:
316 return tokenizer_error_with_col_offset(p,
317 PyExc_SyntaxError, "EOF while scanning triple-quoted string literal");
318 case E_EOLS:
319 return tokenizer_error_with_col_offset(p,
320 PyExc_SyntaxError, "EOL while scanning string literal");
321 case E_DEDENT:
322 return tokenizer_error_with_col_offset(p,
323 PyExc_IndentationError, "unindent does not match any outer indentation level");
324 case E_INTR:
325 if (!PyErr_Occurred()) {
326 PyErr_SetNone(PyExc_KeyboardInterrupt);
327 }
328 return -1;
329 case E_NOMEM:
330 PyErr_NoMemory();
331 return -1;
332 case E_TABSPACE:
333 errtype = PyExc_TabError;
334 msg = "inconsistent use of tabs and spaces in indentation";
335 break;
336 case E_TOODEEP:
337 errtype = PyExc_IndentationError;
338 msg = "too many levels of indentation";
339 break;
340 case E_DECODE:
341 raise_decode_error(p);
342 return -1;
343 case E_LINECONT:
344 msg = "unexpected character after line continuation character";
345 break;
346 default:
347 msg = "unknown parsing error";
348 }
349
350 PyErr_Format(errtype, msg);
351 // There is no reliable column information for this error
352 PyErr_SyntaxLocationObject(p->tok->filename, p->tok->lineno, 0);
353
354 return -1;
355}
356
357void *
358_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
359{
360 PyObject *value = NULL;
361 PyObject *errstr = NULL;
362 PyObject *loc = NULL;
363 PyObject *tmp = NULL;
364 Token *t = p->tokens[p->fill - 1];
365 Py_ssize_t col_number = 0;
366 va_list va;
367
368 va_start(va, errmsg);
369 errstr = PyUnicode_FromFormatV(errmsg, va);
370 va_end(va);
371 if (!errstr) {
372 goto error;
373 }
374
375 if (p->start_rule == Py_file_input) {
376 loc = PyErr_ProgramTextObject(p->tok->filename, t->lineno);
377 }
378
379 if (!loc) {
380 loc = get_error_line(p->tok->buf);
381 }
382
383 if (loc) {
384 int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
385 col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
386 }
387 else {
388 Py_INCREF(Py_None);
389 loc = Py_None;
390 }
391
392
393 tmp = Py_BuildValue("(OiiN)", p->tok->filename, t->lineno, col_number, loc);
394 if (!tmp) {
395 goto error;
396 }
397 value = PyTuple_Pack(2, errstr, tmp);
398 Py_DECREF(tmp);
399 if (!value) {
400 goto error;
401 }
402 PyErr_SetObject(errtype, value);
403
404 Py_DECREF(errstr);
405 Py_DECREF(value);
406 return NULL;
407
408error:
409 Py_XDECREF(errstr);
410 Py_XDECREF(loc);
411 return NULL;
412}
413
414void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
415 int kwarg_unpacking = 0;
416 for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
417 keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
418 if (!keyword->arg) {
419 kwarg_unpacking = 1;
420 }
421 }
422
423 const char *msg = NULL;
424 if (kwarg_unpacking) {
425 msg = "positional argument follows keyword argument unpacking";
426 } else {
427 msg = "positional argument follows keyword argument";
428 }
429
430 return RAISE_SYNTAX_ERROR(msg);
431}
432
433#if 0
434static const char *
435token_name(int type)
436{
437 if (0 <= type && type <= N_TOKENS) {
438 return _PyParser_TokenNames[type];
439 }
440 return "<Huh?>";
441}
442#endif
443
444// Here, mark is the start of the node, while p->mark is the end.
445// If node==NULL, they should be the same.
446int
447_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
448{
449 // Insert in front
450 Memo *m = PyArena_Malloc(p->arena, sizeof(Memo));
451 if (m == NULL) {
452 return -1;
453 }
454 m->type = type;
455 m->node = node;
456 m->mark = p->mark;
457 m->next = p->tokens[mark]->memo;
458 p->tokens[mark]->memo = m;
459 return 0;
460}
461
462// Like _PyPegen_insert_memo(), but updates an existing node if found.
463int
464_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
465{
466 for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
467 if (m->type == type) {
468 // Update existing node.
469 m->node = node;
470 m->mark = p->mark;
471 return 0;
472 }
473 }
474 // Insert new node.
475 return _PyPegen_insert_memo(p, mark, type, node);
476}
477
478// Return dummy NAME.
479void *
480_PyPegen_dummy_name(Parser *p, ...)
481{
482 static void *cache = NULL;
483
484 if (cache != NULL) {
485 return cache;
486 }
487
488 PyObject *id = _create_dummy_identifier(p);
489 if (!id) {
490 return NULL;
491 }
492 cache = Name(id, Load, 1, 0, 1, 0, p->arena);
493 return cache;
494}
495
496static int
497_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
498{
499 if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL) {
500 return NAME;
501 }
502 for (KeywordToken *k = p->keywords[name_len]; k->type != -1; k++) {
503 if (strncmp(k->str, name, name_len) == 0) {
504 return k->type;
505 }
506 }
507 return NAME;
508}
509
510int
511_PyPegen_fill_token(Parser *p)
512{
513 const char *start, *end;
514 int type = PyTokenizer_Get(p->tok, &start, &end);
515 if (type == ERRORTOKEN) {
516 return tokenizer_error(p);
517 }
518 if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
519 type = NEWLINE; /* Add an extra newline */
520 p->parsing_started = 0;
521
522 if (p->tok->indent) {
523 p->tok->pendin = -p->tok->indent;
524 p->tok->indent = 0;
525 }
526 }
527 else {
528 p->parsing_started = 1;
529 }
530
531 if (p->fill == p->size) {
532 int newsize = p->size * 2;
533 p->tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
534 if (p->tokens == NULL) {
535 PyErr_Format(PyExc_MemoryError, "Realloc tokens failed");
536 return -1;
537 }
538 for (int i = p->size; i < newsize; i++) {
539 p->tokens[i] = PyMem_Malloc(sizeof(Token));
540 memset(p->tokens[i], '\0', sizeof(Token));
541 }
542 p->size = newsize;
543 }
544
545 Token *t = p->tokens[p->fill];
546 t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
547 t->bytes = PyBytes_FromStringAndSize(start, end - start);
548 if (t->bytes == NULL) {
549 return -1;
550 }
551 PyArena_AddPyObject(p->arena, t->bytes);
552
553 int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
554 const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100555 size_t end_lineno = p->tok->lineno;
556 size_t col_offset = -1, end_col_offset = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100557 if (start != NULL && start >= line_start) {
558 col_offset = start - line_start;
559 }
560 if (end != NULL && end >= p->tok->line_start) {
561 end_col_offset = end - p->tok->line_start;
562 }
563
564 t->lineno = p->starting_lineno + lineno;
565 t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
566 t->end_lineno = p->starting_lineno + end_lineno;
567 t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
568
569 // if (p->fill % 100 == 0) fprintf(stderr, "Filled at %d: %s \"%s\"\n", p->fill,
570 // token_name(type), PyBytes_AsString(t->bytes));
571 p->fill += 1;
572 return 0;
573}
574
575// Instrumentation to count the effectiveness of memoization.
576// The array counts the number of tokens skipped by memoization,
577// indexed by type.
578
579#define NSTATISTICS 2000
580static long memo_statistics[NSTATISTICS];
581
582void
583_PyPegen_clear_memo_statistics()
584{
585 for (int i = 0; i < NSTATISTICS; i++) {
586 memo_statistics[i] = 0;
587 }
588}
589
590PyObject *
591_PyPegen_get_memo_statistics()
592{
593 PyObject *ret = PyList_New(NSTATISTICS);
594 if (ret == NULL) {
595 return NULL;
596 }
597 for (int i = 0; i < NSTATISTICS; i++) {
598 PyObject *value = PyLong_FromLong(memo_statistics[i]);
599 if (value == NULL) {
600 Py_DECREF(ret);
601 return NULL;
602 }
603 // PyList_SetItem borrows a reference to value.
604 if (PyList_SetItem(ret, i, value) < 0) {
605 Py_DECREF(ret);
606 return NULL;
607 }
608 }
609 return ret;
610}
611
612int // bool
613_PyPegen_is_memoized(Parser *p, int type, void *pres)
614{
615 if (p->mark == p->fill) {
616 if (_PyPegen_fill_token(p) < 0) {
617 return -1;
618 }
619 }
620
621 Token *t = p->tokens[p->mark];
622
623 for (Memo *m = t->memo; m != NULL; m = m->next) {
624 if (m->type == type) {
625 if (0 <= type && type < NSTATISTICS) {
626 long count = m->mark - p->mark;
627 // A memoized negative result counts for one.
628 if (count <= 0) {
629 count = 1;
630 }
631 memo_statistics[type] += count;
632 }
633 p->mark = m->mark;
634 *(void **)(pres) = m->node;
635 // fprintf(stderr, "%d < %d: memoized!\n", p->mark, p->fill);
636 return 1;
637 }
638 }
639 // fprintf(stderr, "%d < %d: not memoized\n", p->mark, p->fill);
640 return 0;
641}
642
643int
644_PyPegen_lookahead_with_string(int positive, void *(func)(Parser *, const char *), Parser *p,
645 const char *arg)
646{
647 int mark = p->mark;
648 void *res = func(p, arg);
649 p->mark = mark;
650 return (res != NULL) == positive;
651}
652
653int
654_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
655{
656 int mark = p->mark;
657 void *res = func(p, arg);
658 p->mark = mark;
659 return (res != NULL) == positive;
660}
661
662int
663_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
664{
665 int mark = p->mark;
666 void *res = func(p);
667 p->mark = mark;
668 return (res != NULL) == positive;
669}
670
671Token *
672_PyPegen_expect_token(Parser *p, int type)
673{
674 if (p->mark == p->fill) {
675 if (_PyPegen_fill_token(p) < 0) {
676 return NULL;
677 }
678 }
679 Token *t = p->tokens[p->mark];
680 if (t->type != type) {
681 // fprintf(stderr, "No %s at %d\n", token_name(type), p->mark);
682 return NULL;
683 }
684 p->mark += 1;
685 // fprintf(stderr, "Got %s at %d: %s\n", token_name(type), p->mark,
686 // PyBytes_AsString(t->bytes));
687
688 return t;
689}
690
691Token *
692_PyPegen_get_last_nonnwhitespace_token(Parser *p)
693{
694 assert(p->mark >= 0);
695 Token *token = NULL;
696 for (int m = p->mark - 1; m >= 0; m--) {
697 token = p->tokens[m];
698 if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
699 break;
700 }
701 }
702 return token;
703}
704
705void *
706_PyPegen_async_token(Parser *p)
707{
708 return _PyPegen_expect_token(p, ASYNC);
709}
710
711void *
712_PyPegen_await_token(Parser *p)
713{
714 return _PyPegen_expect_token(p, AWAIT);
715}
716
717void *
718_PyPegen_endmarker_token(Parser *p)
719{
720 return _PyPegen_expect_token(p, ENDMARKER);
721}
722
723expr_ty
724_PyPegen_name_token(Parser *p)
725{
726 Token *t = _PyPegen_expect_token(p, NAME);
727 if (t == NULL) {
728 return NULL;
729 }
730 char* s = PyBytes_AsString(t->bytes);
731 if (!s) {
732 return NULL;
733 }
734 PyObject *id = _PyPegen_new_identifier(p, s);
735 if (id == NULL) {
736 return NULL;
737 }
738 return Name(id, Load, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
739 p->arena);
740}
741
742void *
743_PyPegen_string_token(Parser *p)
744{
745 return _PyPegen_expect_token(p, STRING);
746}
747
748void *
749_PyPegen_newline_token(Parser *p)
750{
751 return _PyPegen_expect_token(p, NEWLINE);
752}
753
754void *
755_PyPegen_indent_token(Parser *p)
756{
757 return _PyPegen_expect_token(p, INDENT);
758}
759
760void *
761_PyPegen_dedent_token(Parser *p)
762{
763 return _PyPegen_expect_token(p, DEDENT);
764}
765
766static PyObject *
767parsenumber_raw(const char *s)
768{
769 const char *end;
770 long x;
771 double dx;
772 Py_complex compl;
773 int imflag;
774
775 assert(s != NULL);
776 errno = 0;
777 end = s + strlen(s) - 1;
778 imflag = *end == 'j' || *end == 'J';
779 if (s[0] == '0') {
780 x = (long)PyOS_strtoul(s, (char **)&end, 0);
781 if (x < 0 && errno == 0) {
782 return PyLong_FromString(s, (char **)0, 0);
783 }
784 }
785 else
786 x = PyOS_strtol(s, (char **)&end, 0);
787 if (*end == '\0') {
788 if (errno != 0)
789 return PyLong_FromString(s, (char **)0, 0);
790 return PyLong_FromLong(x);
791 }
792 /* XXX Huge floats may silently fail */
793 if (imflag) {
794 compl.real = 0.;
795 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
796 if (compl.imag == -1.0 && PyErr_Occurred())
797 return NULL;
798 return PyComplex_FromCComplex(compl);
799 }
800 else {
801 dx = PyOS_string_to_double(s, NULL, NULL);
802 if (dx == -1.0 && PyErr_Occurred())
803 return NULL;
804 return PyFloat_FromDouble(dx);
805 }
806}
807
808static PyObject *
809parsenumber(const char *s)
810{
811 char *dup, *end;
812 PyObject *res = NULL;
813
814 assert(s != NULL);
815
816 if (strchr(s, '_') == NULL) {
817 return parsenumber_raw(s);
818 }
819 /* Create a duplicate without underscores. */
820 dup = PyMem_Malloc(strlen(s) + 1);
821 if (dup == NULL) {
822 return PyErr_NoMemory();
823 }
824 end = dup;
825 for (; *s; s++) {
826 if (*s != '_') {
827 *end++ = *s;
828 }
829 }
830 *end = '\0';
831 res = parsenumber_raw(dup);
832 PyMem_Free(dup);
833 return res;
834}
835
836expr_ty
837_PyPegen_number_token(Parser *p)
838{
839 Token *t = _PyPegen_expect_token(p, NUMBER);
840 if (t == NULL) {
841 return NULL;
842 }
843
844 char *num_raw = PyBytes_AsString(t->bytes);
845
846 if (num_raw == NULL) {
847 return NULL;
848 }
849
850 PyObject *c = parsenumber(num_raw);
851
852 if (c == NULL) {
853 return NULL;
854 }
855
856 if (PyArena_AddPyObject(p->arena, c) < 0) {
857 Py_DECREF(c);
858 return NULL;
859 }
860
861 return Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
862 p->arena);
863}
864
865void
866_PyPegen_Parser_Free(Parser *p)
867{
868 Py_XDECREF(p->normalize);
869 for (int i = 0; i < p->size; i++) {
870 PyMem_Free(p->tokens[i]);
871 }
872 PyMem_Free(p->tokens);
873 PyMem_Free(p);
874}
875
876Parser *
877_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int *errcode, PyArena *arena)
878{
879 Parser *p = PyMem_Malloc(sizeof(Parser));
880 if (p == NULL) {
881 PyErr_Format(PyExc_MemoryError, "Out of memory for Parser");
882 return NULL;
883 }
884 assert(tok != NULL);
885 p->tok = tok;
886 p->keywords = NULL;
887 p->n_keyword_lists = -1;
888 p->tokens = PyMem_Malloc(sizeof(Token *));
889 if (!p->tokens) {
890 PyMem_Free(p);
891 PyErr_Format(PyExc_MemoryError, "Out of memory for tokens");
892 return NULL;
893 }
894 p->tokens[0] = PyMem_Malloc(sizeof(Token));
895 memset(p->tokens[0], '\0', sizeof(Token));
896 p->mark = 0;
897 p->fill = 0;
898 p->size = 1;
899
900 p->errcode = errcode;
901 p->arena = arena;
902 p->start_rule = start_rule;
903 p->parsing_started = 0;
904 p->normalize = NULL;
905 p->error_indicator = 0;
906
907 p->starting_lineno = 0;
908 p->starting_col_offset = 0;
909
910 return p;
911}
912
913void *
914_PyPegen_run_parser(Parser *p)
915{
916 void *res = _PyPegen_parse(p);
917 if (res == NULL) {
918 if (PyErr_Occurred()) {
919 return NULL;
920 }
921 if (p->fill == 0) {
922 RAISE_SYNTAX_ERROR("error at start before reading any input");
923 }
924 else if (p->tok->done == E_EOF) {
925 RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
926 }
927 else {
928 if (p->tokens[p->fill-1]->type == INDENT) {
929 RAISE_INDENTATION_ERROR("unexpected indent");
930 }
931 else if (p->tokens[p->fill-1]->type == DEDENT) {
932 RAISE_INDENTATION_ERROR("unexpected unindent");
933 }
934 else {
935 RAISE_SYNTAX_ERROR("invalid syntax");
936 }
937 }
938 return NULL;
939 }
940
941 return res;
942}
943
944mod_ty
945_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
946 const char *enc, const char *ps1, const char *ps2,
947 int *errcode, PyArena *arena)
948{
949 struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
950 if (tok == NULL) {
951 if (PyErr_Occurred()) {
952 raise_tokenizer_init_error(filename_ob);
953 return NULL;
954 }
955 return NULL;
956 }
957 // This transfers the ownership to the tokenizer
958 tok->filename = filename_ob;
959 Py_INCREF(filename_ob);
960
961 // From here on we need to clean up even if there's an error
962 mod_ty result = NULL;
963
964 Parser *p = _PyPegen_Parser_New(tok, start_rule, errcode, arena);
965 if (p == NULL) {
966 goto error;
967 }
968
969 result = _PyPegen_run_parser(p);
970 _PyPegen_Parser_Free(p);
971
972error:
973 PyTokenizer_Free(tok);
974 return result;
975}
976
977mod_ty
978_PyPegen_run_parser_from_file(const char *filename, int start_rule,
979 PyObject *filename_ob, PyArena *arena)
980{
981 FILE *fp = fopen(filename, "rb");
982 if (fp == NULL) {
983 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
984 return NULL;
985 }
986
987 mod_ty result = _PyPegen_run_parser_from_file_pointer(fp, start_rule, filename_ob,
988 NULL, NULL, NULL, NULL, arena);
989
990 fclose(fp);
991 return result;
992}
993
994mod_ty
995_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
996 int iflags, PyArena *arena)
997{
998 int exec_input = start_rule == Py_file_input;
999
1000 struct tok_state *tok;
1001 if (iflags & PyCF_IGNORE_COOKIE) {
1002 tok = PyTokenizer_FromUTF8(str, exec_input);
1003 } else {
1004 tok = PyTokenizer_FromString(str, exec_input);
1005 }
1006 if (tok == NULL) {
1007 if (PyErr_Occurred()) {
1008 raise_tokenizer_init_error(filename_ob);
1009 }
1010 return NULL;
1011 }
1012 // This transfers the ownership to the tokenizer
1013 tok->filename = filename_ob;
1014 Py_INCREF(filename_ob);
1015
1016 // We need to clear up from here on
1017 mod_ty result = NULL;
1018
1019 Parser *p = _PyPegen_Parser_New(tok, start_rule, NULL, arena);
1020 if (p == NULL) {
1021 goto error;
1022 }
1023
1024 result = _PyPegen_run_parser(p);
1025 _PyPegen_Parser_Free(p);
1026
1027error:
1028 PyTokenizer_Free(tok);
1029 return result;
1030}
1031
1032void *
1033_PyPegen_interactive_exit(Parser *p)
1034{
1035 if (p->errcode) {
1036 *(p->errcode) = E_EOF;
1037 }
1038 return NULL;
1039}
1040
1041/* Creates a single-element asdl_seq* that contains a */
1042asdl_seq *
1043_PyPegen_singleton_seq(Parser *p, void *a)
1044{
1045 assert(a != NULL);
1046 asdl_seq *seq = _Py_asdl_seq_new(1, p->arena);
1047 if (!seq) {
1048 return NULL;
1049 }
1050 asdl_seq_SET(seq, 0, a);
1051 return seq;
1052}
1053
1054/* Creates a copy of seq and prepends a to it */
1055asdl_seq *
1056_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
1057{
1058 assert(a != NULL);
1059 if (!seq) {
1060 return _PyPegen_singleton_seq(p, a);
1061 }
1062
1063 asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1064 if (!new_seq) {
1065 return NULL;
1066 }
1067
1068 asdl_seq_SET(new_seq, 0, a);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001069 for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001070 asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i - 1));
1071 }
1072 return new_seq;
1073}
1074
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001075static Py_ssize_t
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001076_get_flattened_seq_size(asdl_seq *seqs)
1077{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001078 Py_ssize_t size = 0;
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001079 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1080 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
1081 size += asdl_seq_LEN(inner_seq);
1082 }
1083 return size;
1084}
1085
1086/* Flattens an asdl_seq* of asdl_seq*s */
1087asdl_seq *
1088_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
1089{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001090 Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001091 assert(flattened_seq_size > 0);
1092
1093 asdl_seq *flattened_seq = _Py_asdl_seq_new(flattened_seq_size, p->arena);
1094 if (!flattened_seq) {
1095 return NULL;
1096 }
1097
1098 int flattened_seq_idx = 0;
1099 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1100 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001101 for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001102 asdl_seq_SET(flattened_seq, flattened_seq_idx++, asdl_seq_GET(inner_seq, j));
1103 }
1104 }
1105 assert(flattened_seq_idx == flattened_seq_size);
1106
1107 return flattened_seq;
1108}
1109
1110/* Creates a new name of the form <first_name>.<second_name> */
1111expr_ty
1112_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
1113{
1114 assert(first_name != NULL && second_name != NULL);
1115 PyObject *first_identifier = first_name->v.Name.id;
1116 PyObject *second_identifier = second_name->v.Name.id;
1117
1118 if (PyUnicode_READY(first_identifier) == -1) {
1119 return NULL;
1120 }
1121 if (PyUnicode_READY(second_identifier) == -1) {
1122 return NULL;
1123 }
1124 const char *first_str = PyUnicode_AsUTF8(first_identifier);
1125 if (!first_str) {
1126 return NULL;
1127 }
1128 const char *second_str = PyUnicode_AsUTF8(second_identifier);
1129 if (!second_str) {
1130 return NULL;
1131 }
1132 ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot
1133
1134 PyObject *str = PyBytes_FromStringAndSize(NULL, len);
1135 if (!str) {
1136 return NULL;
1137 }
1138
1139 char *s = PyBytes_AS_STRING(str);
1140 if (!s) {
1141 return NULL;
1142 }
1143
1144 strcpy(s, first_str);
1145 s += strlen(first_str);
1146 *s++ = '.';
1147 strcpy(s, second_str);
1148 s += strlen(second_str);
1149 *s = '\0';
1150
1151 PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
1152 Py_DECREF(str);
1153 if (!uni) {
1154 return NULL;
1155 }
1156 PyUnicode_InternInPlace(&uni);
1157 if (PyArena_AddPyObject(p->arena, uni) < 0) {
1158 Py_DECREF(uni);
1159 return NULL;
1160 }
1161
1162 return _Py_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
1163}
1164
1165/* Counts the total number of dots in seq's tokens */
1166int
1167_PyPegen_seq_count_dots(asdl_seq *seq)
1168{
1169 int number_of_dots = 0;
1170 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1171 Token *current_expr = asdl_seq_GET(seq, i);
1172 switch (current_expr->type) {
1173 case ELLIPSIS:
1174 number_of_dots += 3;
1175 break;
1176 case DOT:
1177 number_of_dots += 1;
1178 break;
1179 default:
1180 assert(current_expr->type == ELLIPSIS || current_expr->type == DOT);
1181 }
1182 }
1183
1184 return number_of_dots;
1185}
1186
1187/* Creates an alias with '*' as the identifier name */
1188alias_ty
1189_PyPegen_alias_for_star(Parser *p)
1190{
1191 PyObject *str = PyUnicode_InternFromString("*");
1192 if (!str) {
1193 return NULL;
1194 }
1195 if (PyArena_AddPyObject(p->arena, str) < 0) {
1196 Py_DECREF(str);
1197 return NULL;
1198 }
1199 return alias(str, NULL, p->arena);
1200}
1201
1202/* Creates a new asdl_seq* with the identifiers of all the names in seq */
1203asdl_seq *
1204_PyPegen_map_names_to_ids(Parser *p, asdl_seq *seq)
1205{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001206 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001207 assert(len > 0);
1208
1209 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1210 if (!new_seq) {
1211 return NULL;
1212 }
1213 for (Py_ssize_t i = 0; i < len; i++) {
1214 expr_ty e = asdl_seq_GET(seq, i);
1215 asdl_seq_SET(new_seq, i, e->v.Name.id);
1216 }
1217 return new_seq;
1218}
1219
1220/* Constructs a CmpopExprPair */
1221CmpopExprPair *
1222_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
1223{
1224 assert(expr != NULL);
1225 CmpopExprPair *a = PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
1226 if (!a) {
1227 return NULL;
1228 }
1229 a->cmpop = cmpop;
1230 a->expr = expr;
1231 return a;
1232}
1233
1234asdl_int_seq *
1235_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
1236{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001237 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001238 assert(len > 0);
1239
1240 asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
1241 if (!new_seq) {
1242 return NULL;
1243 }
1244 for (Py_ssize_t i = 0; i < len; i++) {
1245 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1246 asdl_seq_SET(new_seq, i, pair->cmpop);
1247 }
1248 return new_seq;
1249}
1250
1251asdl_seq *
1252_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
1253{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001254 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001255 assert(len > 0);
1256
1257 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1258 if (!new_seq) {
1259 return NULL;
1260 }
1261 for (Py_ssize_t i = 0; i < len; i++) {
1262 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1263 asdl_seq_SET(new_seq, i, pair->expr);
1264 }
1265 return new_seq;
1266}
1267
1268/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
1269static asdl_seq *
1270_set_seq_context(Parser *p, asdl_seq *seq, expr_context_ty ctx)
1271{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001272 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001273 if (len == 0) {
1274 return NULL;
1275 }
1276
1277 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1278 if (!new_seq) {
1279 return NULL;
1280 }
1281 for (Py_ssize_t i = 0; i < len; i++) {
1282 expr_ty e = asdl_seq_GET(seq, i);
1283 asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
1284 }
1285 return new_seq;
1286}
1287
1288static expr_ty
1289_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
1290{
1291 return _Py_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
1292}
1293
1294static expr_ty
1295_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
1296{
1297 return _Py_Tuple(_set_seq_context(p, e->v.Tuple.elts, ctx), ctx, EXTRA_EXPR(e, e));
1298}
1299
1300static expr_ty
1301_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
1302{
1303 return _Py_List(_set_seq_context(p, e->v.List.elts, ctx), ctx, EXTRA_EXPR(e, e));
1304}
1305
1306static expr_ty
1307_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
1308{
1309 return _Py_Subscript(e->v.Subscript.value, e->v.Subscript.slice, ctx, EXTRA_EXPR(e, e));
1310}
1311
1312static expr_ty
1313_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
1314{
1315 return _Py_Attribute(e->v.Attribute.value, e->v.Attribute.attr, ctx, EXTRA_EXPR(e, e));
1316}
1317
1318static expr_ty
1319_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
1320{
1321 return _Py_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), ctx, EXTRA_EXPR(e, e));
1322}
1323
1324/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
1325expr_ty
1326_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
1327{
1328 assert(expr != NULL);
1329
1330 expr_ty new = NULL;
1331 switch (expr->kind) {
1332 case Name_kind:
1333 new = _set_name_context(p, expr, ctx);
1334 break;
1335 case Tuple_kind:
1336 new = _set_tuple_context(p, expr, ctx);
1337 break;
1338 case List_kind:
1339 new = _set_list_context(p, expr, ctx);
1340 break;
1341 case Subscript_kind:
1342 new = _set_subscript_context(p, expr, ctx);
1343 break;
1344 case Attribute_kind:
1345 new = _set_attribute_context(p, expr, ctx);
1346 break;
1347 case Starred_kind:
1348 new = _set_starred_context(p, expr, ctx);
1349 break;
1350 default:
1351 new = expr;
1352 }
1353 return new;
1354}
1355
1356/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
1357KeyValuePair *
1358_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
1359{
1360 KeyValuePair *a = PyArena_Malloc(p->arena, sizeof(KeyValuePair));
1361 if (!a) {
1362 return NULL;
1363 }
1364 a->key = key;
1365 a->value = value;
1366 return a;
1367}
1368
1369/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
1370asdl_seq *
1371_PyPegen_get_keys(Parser *p, asdl_seq *seq)
1372{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001373 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001374 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1375 if (!new_seq) {
1376 return NULL;
1377 }
1378 for (Py_ssize_t i = 0; i < len; i++) {
1379 KeyValuePair *pair = asdl_seq_GET(seq, i);
1380 asdl_seq_SET(new_seq, i, pair->key);
1381 }
1382 return new_seq;
1383}
1384
1385/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
1386asdl_seq *
1387_PyPegen_get_values(Parser *p, asdl_seq *seq)
1388{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001389 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001390 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1391 if (!new_seq) {
1392 return NULL;
1393 }
1394 for (Py_ssize_t i = 0; i < len; i++) {
1395 KeyValuePair *pair = asdl_seq_GET(seq, i);
1396 asdl_seq_SET(new_seq, i, pair->value);
1397 }
1398 return new_seq;
1399}
1400
1401/* Constructs a NameDefaultPair */
1402NameDefaultPair *
1403_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value)
1404{
1405 NameDefaultPair *a = PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
1406 if (!a) {
1407 return NULL;
1408 }
1409 a->arg = arg;
1410 a->value = value;
1411 return a;
1412}
1413
1414/* Constructs a SlashWithDefault */
1415SlashWithDefault *
1416_PyPegen_slash_with_default(Parser *p, asdl_seq *plain_names, asdl_seq *names_with_defaults)
1417{
1418 SlashWithDefault *a = PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
1419 if (!a) {
1420 return NULL;
1421 }
1422 a->plain_names = plain_names;
1423 a->names_with_defaults = names_with_defaults;
1424 return a;
1425}
1426
1427/* Constructs a StarEtc */
1428StarEtc *
1429_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
1430{
1431 StarEtc *a = PyArena_Malloc(p->arena, sizeof(StarEtc));
1432 if (!a) {
1433 return NULL;
1434 }
1435 a->vararg = vararg;
1436 a->kwonlyargs = kwonlyargs;
1437 a->kwarg = kwarg;
1438 return a;
1439}
1440
1441asdl_seq *
1442_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
1443{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001444 Py_ssize_t first_len = asdl_seq_LEN(a);
1445 Py_ssize_t second_len = asdl_seq_LEN(b);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001446 asdl_seq *new_seq = _Py_asdl_seq_new(first_len + second_len, p->arena);
1447 if (!new_seq) {
1448 return NULL;
1449 }
1450
1451 int k = 0;
1452 for (Py_ssize_t i = 0; i < first_len; i++) {
1453 asdl_seq_SET(new_seq, k++, asdl_seq_GET(a, i));
1454 }
1455 for (Py_ssize_t i = 0; i < second_len; i++) {
1456 asdl_seq_SET(new_seq, k++, asdl_seq_GET(b, i));
1457 }
1458
1459 return new_seq;
1460}
1461
1462static asdl_seq *
1463_get_names(Parser *p, asdl_seq *names_with_defaults)
1464{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001465 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001466 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1467 if (!seq) {
1468 return NULL;
1469 }
1470 for (Py_ssize_t i = 0; i < len; i++) {
1471 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1472 asdl_seq_SET(seq, i, pair->arg);
1473 }
1474 return seq;
1475}
1476
1477static asdl_seq *
1478_get_defaults(Parser *p, asdl_seq *names_with_defaults)
1479{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001480 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001481 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1482 if (!seq) {
1483 return NULL;
1484 }
1485 for (Py_ssize_t i = 0; i < len; i++) {
1486 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1487 asdl_seq_SET(seq, i, pair->value);
1488 }
1489 return seq;
1490}
1491
1492/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
1493arguments_ty
1494_PyPegen_make_arguments(Parser *p, asdl_seq *slash_without_default,
1495 SlashWithDefault *slash_with_default, asdl_seq *plain_names,
1496 asdl_seq *names_with_default, StarEtc *star_etc)
1497{
1498 asdl_seq *posonlyargs;
1499 if (slash_without_default != NULL) {
1500 posonlyargs = slash_without_default;
1501 }
1502 else if (slash_with_default != NULL) {
1503 asdl_seq *slash_with_default_names =
1504 _get_names(p, slash_with_default->names_with_defaults);
1505 if (!slash_with_default_names) {
1506 return NULL;
1507 }
1508 posonlyargs = _PyPegen_join_sequences(p, slash_with_default->plain_names, slash_with_default_names);
1509 if (!posonlyargs) {
1510 return NULL;
1511 }
1512 }
1513 else {
1514 posonlyargs = _Py_asdl_seq_new(0, p->arena);
1515 if (!posonlyargs) {
1516 return NULL;
1517 }
1518 }
1519
1520 asdl_seq *posargs;
1521 if (plain_names != NULL && names_with_default != NULL) {
1522 asdl_seq *names_with_default_names = _get_names(p, names_with_default);
1523 if (!names_with_default_names) {
1524 return NULL;
1525 }
1526 posargs = _PyPegen_join_sequences(p, plain_names, names_with_default_names);
1527 if (!posargs) {
1528 return NULL;
1529 }
1530 }
1531 else if (plain_names == NULL && names_with_default != NULL) {
1532 posargs = _get_names(p, names_with_default);
1533 if (!posargs) {
1534 return NULL;
1535 }
1536 }
1537 else if (plain_names != NULL && names_with_default == NULL) {
1538 posargs = plain_names;
1539 }
1540 else {
1541 posargs = _Py_asdl_seq_new(0, p->arena);
1542 if (!posargs) {
1543 return NULL;
1544 }
1545 }
1546
1547 asdl_seq *posdefaults;
1548 if (slash_with_default != NULL && names_with_default != NULL) {
1549 asdl_seq *slash_with_default_values =
1550 _get_defaults(p, slash_with_default->names_with_defaults);
1551 if (!slash_with_default_values) {
1552 return NULL;
1553 }
1554 asdl_seq *names_with_default_values = _get_defaults(p, names_with_default);
1555 if (!names_with_default_values) {
1556 return NULL;
1557 }
1558 posdefaults = _PyPegen_join_sequences(p, slash_with_default_values, names_with_default_values);
1559 if (!posdefaults) {
1560 return NULL;
1561 }
1562 }
1563 else if (slash_with_default == NULL && names_with_default != NULL) {
1564 posdefaults = _get_defaults(p, names_with_default);
1565 if (!posdefaults) {
1566 return NULL;
1567 }
1568 }
1569 else if (slash_with_default != NULL && names_with_default == NULL) {
1570 posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
1571 if (!posdefaults) {
1572 return NULL;
1573 }
1574 }
1575 else {
1576 posdefaults = _Py_asdl_seq_new(0, p->arena);
1577 if (!posdefaults) {
1578 return NULL;
1579 }
1580 }
1581
1582 arg_ty vararg = NULL;
1583 if (star_etc != NULL && star_etc->vararg != NULL) {
1584 vararg = star_etc->vararg;
1585 }
1586
1587 asdl_seq *kwonlyargs;
1588 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1589 kwonlyargs = _get_names(p, star_etc->kwonlyargs);
1590 if (!kwonlyargs) {
1591 return NULL;
1592 }
1593 }
1594 else {
1595 kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1596 if (!kwonlyargs) {
1597 return NULL;
1598 }
1599 }
1600
1601 asdl_seq *kwdefaults;
1602 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1603 kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
1604 if (!kwdefaults) {
1605 return NULL;
1606 }
1607 }
1608 else {
1609 kwdefaults = _Py_asdl_seq_new(0, p->arena);
1610 if (!kwdefaults) {
1611 return NULL;
1612 }
1613 }
1614
1615 arg_ty kwarg = NULL;
1616 if (star_etc != NULL && star_etc->kwarg != NULL) {
1617 kwarg = star_etc->kwarg;
1618 }
1619
1620 return _Py_arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg,
1621 posdefaults, p->arena);
1622}
1623
1624/* Constructs an empty arguments_ty object, that gets used when a function accepts no
1625 * arguments. */
1626arguments_ty
1627_PyPegen_empty_arguments(Parser *p)
1628{
1629 asdl_seq *posonlyargs = _Py_asdl_seq_new(0, p->arena);
1630 if (!posonlyargs) {
1631 return NULL;
1632 }
1633 asdl_seq *posargs = _Py_asdl_seq_new(0, p->arena);
1634 if (!posargs) {
1635 return NULL;
1636 }
1637 asdl_seq *posdefaults = _Py_asdl_seq_new(0, p->arena);
1638 if (!posdefaults) {
1639 return NULL;
1640 }
1641 asdl_seq *kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1642 if (!kwonlyargs) {
1643 return NULL;
1644 }
1645 asdl_seq *kwdefaults = _Py_asdl_seq_new(0, p->arena);
1646 if (!kwdefaults) {
1647 return NULL;
1648 }
1649
1650 return _Py_arguments(posonlyargs, posargs, NULL, kwonlyargs, kwdefaults, NULL, kwdefaults,
1651 p->arena);
1652}
1653
1654/* Encapsulates the value of an operator_ty into an AugOperator struct */
1655AugOperator *
1656_PyPegen_augoperator(Parser *p, operator_ty kind)
1657{
1658 AugOperator *a = PyArena_Malloc(p->arena, sizeof(AugOperator));
1659 if (!a) {
1660 return NULL;
1661 }
1662 a->kind = kind;
1663 return a;
1664}
1665
1666/* Construct a FunctionDef equivalent to function_def, but with decorators */
1667stmt_ty
1668_PyPegen_function_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty function_def)
1669{
1670 assert(function_def != NULL);
1671 if (function_def->kind == AsyncFunctionDef_kind) {
1672 return _Py_AsyncFunctionDef(
1673 function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1674 function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
1675 function_def->v.FunctionDef.type_comment, function_def->lineno,
1676 function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
1677 p->arena);
1678 }
1679
1680 return _Py_FunctionDef(function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1681 function_def->v.FunctionDef.body, decorators,
1682 function_def->v.FunctionDef.returns,
1683 function_def->v.FunctionDef.type_comment, function_def->lineno,
1684 function_def->col_offset, function_def->end_lineno,
1685 function_def->end_col_offset, p->arena);
1686}
1687
1688/* Construct a ClassDef equivalent to class_def, but with decorators */
1689stmt_ty
1690_PyPegen_class_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty class_def)
1691{
1692 assert(class_def != NULL);
1693 return _Py_ClassDef(class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
1694 class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
1695 class_def->lineno, class_def->col_offset, class_def->end_lineno,
1696 class_def->end_col_offset, p->arena);
1697}
1698
1699/* Construct a KeywordOrStarred */
1700KeywordOrStarred *
1701_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
1702{
1703 KeywordOrStarred *a = PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
1704 if (!a) {
1705 return NULL;
1706 }
1707 a->element = element;
1708 a->is_keyword = is_keyword;
1709 return a;
1710}
1711
1712/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
1713static int
1714_seq_number_of_starred_exprs(asdl_seq *seq)
1715{
1716 int n = 0;
1717 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1718 KeywordOrStarred *k = asdl_seq_GET(seq, i);
1719 if (!k->is_keyword) {
1720 n++;
1721 }
1722 }
1723 return n;
1724}
1725
1726/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
1727asdl_seq *
1728_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
1729{
1730 int new_len = _seq_number_of_starred_exprs(kwargs);
1731 if (new_len == 0) {
1732 return NULL;
1733 }
1734 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1735 if (!new_seq) {
1736 return NULL;
1737 }
1738
1739 int idx = 0;
1740 for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
1741 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1742 if (!k->is_keyword) {
1743 asdl_seq_SET(new_seq, idx++, k->element);
1744 }
1745 }
1746 return new_seq;
1747}
1748
1749/* Return a new asdl_seq* with only the keywords in kwargs */
1750asdl_seq *
1751_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
1752{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001753 Py_ssize_t len = asdl_seq_LEN(kwargs);
1754 Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001755 if (new_len == 0) {
1756 return NULL;
1757 }
1758 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1759 if (!new_seq) {
1760 return NULL;
1761 }
1762
1763 int idx = 0;
1764 for (Py_ssize_t i = 0; i < len; i++) {
1765 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1766 if (k->is_keyword) {
1767 asdl_seq_SET(new_seq, idx++, k->element);
1768 }
1769 }
1770 return new_seq;
1771}
1772
1773expr_ty
1774_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
1775{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001776 Py_ssize_t len = asdl_seq_LEN(strings);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001777 assert(len > 0);
1778
1779 Token *first = asdl_seq_GET(strings, 0);
1780 Token *last = asdl_seq_GET(strings, len - 1);
1781
1782 int bytesmode = 0;
1783 PyObject *bytes_str = NULL;
1784
1785 FstringParser state;
1786 _PyPegen_FstringParser_Init(&state);
1787
1788 for (Py_ssize_t i = 0; i < len; i++) {
1789 Token *t = asdl_seq_GET(strings, i);
1790
1791 int this_bytesmode;
1792 int this_rawmode;
1793 PyObject *s;
1794 const char *fstr;
1795 Py_ssize_t fstrlen = -1;
1796
1797 char *this_str = PyBytes_AsString(t->bytes);
1798 if (!this_str) {
1799 goto error;
1800 }
1801
1802 if (_PyPegen_parsestr(p, this_str, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen) != 0) {
1803 goto error;
1804 }
1805
1806 /* Check that we are not mixing bytes with unicode. */
1807 if (i != 0 && bytesmode != this_bytesmode) {
1808 RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
1809 Py_XDECREF(s);
1810 goto error;
1811 }
1812 bytesmode = this_bytesmode;
1813
1814 if (fstr != NULL) {
1815 assert(s == NULL && !bytesmode);
1816
1817 int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
1818 this_rawmode, 0, first, t, last);
1819 if (result < 0) {
1820 goto error;
1821 }
1822 }
1823 else {
1824 /* String or byte string. */
1825 assert(s != NULL && fstr == NULL);
1826 assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
1827
1828 if (bytesmode) {
1829 if (i == 0) {
1830 bytes_str = s;
1831 }
1832 else {
1833 PyBytes_ConcatAndDel(&bytes_str, s);
1834 if (!bytes_str) {
1835 goto error;
1836 }
1837 }
1838 }
1839 else {
1840 /* This is a regular string. Concatenate it. */
1841 if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
1842 goto error;
1843 }
1844 }
1845 }
1846 }
1847
1848 if (bytesmode) {
1849 if (PyArena_AddPyObject(p->arena, bytes_str) < 0) {
1850 goto error;
1851 }
1852 return Constant(bytes_str, NULL, first->lineno, first->col_offset, last->end_lineno,
1853 last->end_col_offset, p->arena);
1854 }
1855
1856 return _PyPegen_FstringParser_Finish(p, &state, first, last);
1857
1858error:
1859 Py_XDECREF(bytes_str);
1860 _PyPegen_FstringParser_Dealloc(&state);
1861 if (PyErr_Occurred()) {
1862 raise_decode_error(p);
1863 }
1864 return NULL;
1865}