blob: 40c09ffcc3a649471fe41149ee4aa608d85539ab [file] [log] [blame]
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001#include <Python.h>
2#include <errcode.h>
3#include "../tokenizer.h"
4
5#include "pegen.h"
6#include "parse_string.h"
7
Guido van Rossumc001c092020-04-30 12:12:19 -07008PyObject *
9_PyPegen_new_type_comment(Parser *p, char *s)
10{
11 PyObject *res = PyUnicode_DecodeUTF8(s, strlen(s), NULL);
12 if (res == NULL) {
13 return NULL;
14 }
15 if (PyArena_AddPyObject(p->arena, res) < 0) {
16 Py_DECREF(res);
17 return NULL;
18 }
19 return res;
20}
21
22arg_ty
23_PyPegen_add_type_comment_to_arg(Parser *p, arg_ty a, Token *tc)
24{
25 if (tc == NULL) {
26 return a;
27 }
28 char *bytes = PyBytes_AsString(tc->bytes);
29 if (bytes == NULL) {
30 return NULL;
31 }
32 PyObject *tco = _PyPegen_new_type_comment(p, bytes);
33 if (tco == NULL) {
34 return NULL;
35 }
36 return arg(a->arg, a->annotation, tco,
37 a->lineno, a->col_offset, a->end_lineno, a->end_col_offset,
38 p->arena);
39}
40
Pablo Galindoc5fc1562020-04-22 23:29:27 +010041static int
42init_normalization(Parser *p)
43{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030044 if (p->normalize) {
45 return 1;
46 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +010047 PyObject *m = PyImport_ImportModuleNoBlock("unicodedata");
48 if (!m)
49 {
50 return 0;
51 }
52 p->normalize = PyObject_GetAttrString(m, "normalize");
53 Py_DECREF(m);
54 if (!p->normalize)
55 {
56 return 0;
57 }
58 return 1;
59}
60
Pablo Galindo2b74c832020-04-27 18:02:07 +010061/* Checks if the NOTEQUAL token is valid given the current parser flags
620 indicates success and nonzero indicates failure (an exception may be set) */
63int
64_PyPegen_check_barry_as_flufl(Parser *p) {
65 Token *t = p->tokens[p->fill - 1];
66 assert(t->bytes != NULL);
67 assert(t->type == NOTEQUAL);
68
69 char* tok_str = PyBytes_AS_STRING(t->bytes);
70 if (p->flags & PyPARSE_BARRY_AS_BDFL && strcmp(tok_str, "<>")){
71 RAISE_SYNTAX_ERROR("with Barry as BDFL, use '<>' instead of '!='");
72 return -1;
73 } else if (!(p->flags & PyPARSE_BARRY_AS_BDFL)) {
74 return strcmp(tok_str, "!=");
75 }
76 return 0;
77}
78
Pablo Galindoc5fc1562020-04-22 23:29:27 +010079PyObject *
80_PyPegen_new_identifier(Parser *p, char *n)
81{
82 PyObject *id = PyUnicode_DecodeUTF8(n, strlen(n), NULL);
83 if (!id) {
84 goto error;
85 }
86 /* PyUnicode_DecodeUTF8 should always return a ready string. */
87 assert(PyUnicode_IS_READY(id));
88 /* Check whether there are non-ASCII characters in the
89 identifier; if so, normalize to NFKC. */
90 if (!PyUnicode_IS_ASCII(id))
91 {
92 PyObject *id2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +030093 if (!init_normalization(p))
Pablo Galindoc5fc1562020-04-22 23:29:27 +010094 {
95 Py_DECREF(id);
96 goto error;
97 }
98 PyObject *form = PyUnicode_InternFromString("NFKC");
99 if (form == NULL)
100 {
101 Py_DECREF(id);
102 goto error;
103 }
104 PyObject *args[2] = {form, id};
105 id2 = _PyObject_FastCall(p->normalize, args, 2);
106 Py_DECREF(id);
107 Py_DECREF(form);
108 if (!id2) {
109 goto error;
110 }
111 if (!PyUnicode_Check(id2))
112 {
113 PyErr_Format(PyExc_TypeError,
114 "unicodedata.normalize() must return a string, not "
115 "%.200s",
116 _PyType_Name(Py_TYPE(id2)));
117 Py_DECREF(id2);
118 goto error;
119 }
120 id = id2;
121 }
122 PyUnicode_InternInPlace(&id);
123 if (PyArena_AddPyObject(p->arena, id) < 0)
124 {
125 Py_DECREF(id);
126 goto error;
127 }
128 return id;
129
130error:
131 p->error_indicator = 1;
132 return NULL;
133}
134
135static PyObject *
136_create_dummy_identifier(Parser *p)
137{
138 return _PyPegen_new_identifier(p, "");
139}
140
141static inline Py_ssize_t
142byte_offset_to_character_offset(PyObject *line, int col_offset)
143{
144 const char *str = PyUnicode_AsUTF8(line);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300145 if (!str) {
146 return 0;
147 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100148 PyObject *text = PyUnicode_DecodeUTF8(str, col_offset, NULL);
149 if (!text) {
150 return 0;
151 }
152 Py_ssize_t size = PyUnicode_GET_LENGTH(text);
153 Py_DECREF(text);
154 return size;
155}
156
157const char *
158_PyPegen_get_expr_name(expr_ty e)
159{
160 switch (e->kind) {
161 case Attribute_kind:
162 return "attribute";
163 case Subscript_kind:
164 return "subscript";
165 case Starred_kind:
166 return "starred";
167 case Name_kind:
168 return "name";
169 case List_kind:
170 return "list";
171 case Tuple_kind:
172 return "tuple";
173 case Lambda_kind:
174 return "lambda";
175 case Call_kind:
176 return "function call";
177 case BoolOp_kind:
178 case BinOp_kind:
179 case UnaryOp_kind:
180 return "operator";
181 case GeneratorExp_kind:
182 return "generator expression";
183 case Yield_kind:
184 case YieldFrom_kind:
185 return "yield expression";
186 case Await_kind:
187 return "await expression";
188 case ListComp_kind:
189 return "list comprehension";
190 case SetComp_kind:
191 return "set comprehension";
192 case DictComp_kind:
193 return "dict comprehension";
194 case Dict_kind:
195 return "dict display";
196 case Set_kind:
197 return "set display";
198 case JoinedStr_kind:
199 case FormattedValue_kind:
200 return "f-string expression";
201 case Constant_kind: {
202 PyObject *value = e->v.Constant.value;
203 if (value == Py_None) {
204 return "None";
205 }
206 if (value == Py_False) {
207 return "False";
208 }
209 if (value == Py_True) {
210 return "True";
211 }
212 if (value == Py_Ellipsis) {
213 return "Ellipsis";
214 }
215 return "literal";
216 }
217 case Compare_kind:
218 return "comparison";
219 case IfExp_kind:
220 return "conditional expression";
221 case NamedExpr_kind:
222 return "named expression";
223 default:
224 PyErr_Format(PyExc_SystemError,
225 "unexpected expression in assignment %d (line %d)",
226 e->kind, e->lineno);
227 return NULL;
228 }
229}
230
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300231static int
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100232raise_decode_error(Parser *p)
233{
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300234 assert(PyErr_Occurred());
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100235 const char *errtype = NULL;
236 if (PyErr_ExceptionMatches(PyExc_UnicodeError)) {
237 errtype = "unicode error";
238 }
239 else if (PyErr_ExceptionMatches(PyExc_ValueError)) {
240 errtype = "value error";
241 }
242 if (errtype) {
243 PyObject *type, *value, *tback, *errstr;
244 PyErr_Fetch(&type, &value, &tback);
245 errstr = PyObject_Str(value);
246 if (errstr) {
247 RAISE_SYNTAX_ERROR("(%s) %U", errtype, errstr);
248 Py_DECREF(errstr);
249 }
250 else {
251 PyErr_Clear();
252 RAISE_SYNTAX_ERROR("(%s) unknown error", errtype);
253 }
254 Py_XDECREF(type);
255 Py_XDECREF(value);
256 Py_XDECREF(tback);
257 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300258
259 return -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100260}
261
262static void
263raise_tokenizer_init_error(PyObject *filename)
264{
265 if (!(PyErr_ExceptionMatches(PyExc_LookupError)
266 || PyErr_ExceptionMatches(PyExc_ValueError)
267 || PyErr_ExceptionMatches(PyExc_UnicodeDecodeError))) {
268 return;
269 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300270 PyObject *errstr = NULL;
271 PyObject *tuple = NULL;
272 PyObject *type, *value, *tback;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100273 PyErr_Fetch(&type, &value, &tback);
274 errstr = PyObject_Str(value);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300275 if (!errstr) {
276 goto error;
277 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100278
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300279 PyObject *tmp = Py_BuildValue("(OiiO)", filename, 0, -1, Py_None);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100280 if (!tmp) {
281 goto error;
282 }
283
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300284 tuple = PyTuple_Pack(2, errstr, tmp);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100285 Py_DECREF(tmp);
286 if (!value) {
287 goto error;
288 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300289 PyErr_SetObject(PyExc_SyntaxError, tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100290
291error:
292 Py_XDECREF(type);
293 Py_XDECREF(value);
294 Py_XDECREF(tback);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300295 Py_XDECREF(errstr);
296 Py_XDECREF(tuple);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100297}
298
299static inline PyObject *
300get_error_line(char *buffer)
301{
302 char *newline = strchr(buffer, '\n');
303 if (newline) {
304 return PyUnicode_FromStringAndSize(buffer, newline - buffer);
305 }
306 else {
307 return PyUnicode_FromString(buffer);
308 }
309}
310
311static int
312tokenizer_error_with_col_offset(Parser *p, PyObject *errtype, const char *errmsg)
313{
314 PyObject *errstr = NULL;
315 PyObject *value = NULL;
Pablo Galindoee40e4b2020-04-23 03:43:08 +0100316 size_t col_number = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100317
318 errstr = PyUnicode_FromString(errmsg);
319 if (!errstr) {
320 return -1;
321 }
322
323 PyObject *loc = NULL;
324 if (p->start_rule == Py_file_input) {
325 loc = PyErr_ProgramTextObject(p->tok->filename, p->tok->lineno);
326 }
327 if (!loc) {
328 loc = get_error_line(p->tok->buf);
329 }
330
331 if (loc) {
332 col_number = p->tok->cur - p->tok->buf;
333 }
334 else {
335 Py_INCREF(Py_None);
336 loc = Py_None;
337 }
338
339 PyObject *tmp = Py_BuildValue("(OiiN)", p->tok->filename, p->tok->lineno,
340 col_number, loc);
341 if (!tmp) {
342 goto error;
343 }
344
345 value = PyTuple_Pack(2, errstr, tmp);
346 Py_DECREF(tmp);
347 if (!value) {
348 goto error;
349 }
350 PyErr_SetObject(errtype, value);
351
352 Py_XDECREF(value);
353 Py_XDECREF(errstr);
354 return -1;
355
356error:
357 Py_XDECREF(errstr);
358 Py_XDECREF(loc);
359 return -1;
360}
361
362static int
363tokenizer_error(Parser *p)
364{
365 if (PyErr_Occurred()) {
366 return -1;
367 }
368
369 const char *msg = NULL;
370 PyObject* errtype = PyExc_SyntaxError;
371 switch (p->tok->done) {
372 case E_TOKEN:
373 msg = "invalid token";
374 break;
375 case E_IDENTIFIER:
376 msg = "invalid character in identifier";
377 break;
378 case E_BADPREFIX:
379 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300380 errtype, "invalid string prefix");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100381 case E_EOFS:
382 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300383 errtype, "EOF while scanning triple-quoted string literal");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100384 case E_EOLS:
385 return tokenizer_error_with_col_offset(p,
Lysandros Nikolaoud55133f2020-04-28 03:23:35 +0300386 errtype, "EOL while scanning string literal");
387 case E_EOF:
388 return tokenizer_error_with_col_offset(p,
389 errtype, "unexpected EOF while parsing");
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100390 case E_DEDENT:
391 return tokenizer_error_with_col_offset(p,
392 PyExc_IndentationError, "unindent does not match any outer indentation level");
393 case E_INTR:
394 if (!PyErr_Occurred()) {
395 PyErr_SetNone(PyExc_KeyboardInterrupt);
396 }
397 return -1;
398 case E_NOMEM:
399 PyErr_NoMemory();
400 return -1;
401 case E_TABSPACE:
402 errtype = PyExc_TabError;
403 msg = "inconsistent use of tabs and spaces in indentation";
404 break;
405 case E_TOODEEP:
406 errtype = PyExc_IndentationError;
407 msg = "too many levels of indentation";
408 break;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100409 case E_LINECONT:
410 msg = "unexpected character after line continuation character";
411 break;
412 default:
413 msg = "unknown parsing error";
414 }
415
416 PyErr_Format(errtype, msg);
417 // There is no reliable column information for this error
418 PyErr_SyntaxLocationObject(p->tok->filename, p->tok->lineno, 0);
419
420 return -1;
421}
422
423void *
424_PyPegen_raise_error(Parser *p, PyObject *errtype, const char *errmsg, ...)
425{
426 PyObject *value = NULL;
427 PyObject *errstr = NULL;
428 PyObject *loc = NULL;
429 PyObject *tmp = NULL;
430 Token *t = p->tokens[p->fill - 1];
431 Py_ssize_t col_number = 0;
432 va_list va;
433
434 va_start(va, errmsg);
435 errstr = PyUnicode_FromFormatV(errmsg, va);
436 va_end(va);
437 if (!errstr) {
438 goto error;
439 }
440
441 if (p->start_rule == Py_file_input) {
442 loc = PyErr_ProgramTextObject(p->tok->filename, t->lineno);
443 }
444
445 if (!loc) {
446 loc = get_error_line(p->tok->buf);
447 }
448
449 if (loc) {
450 int col_offset = t->col_offset == -1 ? 0 : t->col_offset;
451 col_number = byte_offset_to_character_offset(loc, col_offset) + 1;
452 }
453 else {
454 Py_INCREF(Py_None);
455 loc = Py_None;
456 }
457
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100458 tmp = Py_BuildValue("(OiiN)", p->tok->filename, t->lineno, col_number, loc);
459 if (!tmp) {
460 goto error;
461 }
462 value = PyTuple_Pack(2, errstr, tmp);
463 Py_DECREF(tmp);
464 if (!value) {
465 goto error;
466 }
467 PyErr_SetObject(errtype, value);
468
469 Py_DECREF(errstr);
470 Py_DECREF(value);
471 return NULL;
472
473error:
474 Py_XDECREF(errstr);
475 Py_XDECREF(loc);
476 return NULL;
477}
478
479void *_PyPegen_arguments_parsing_error(Parser *p, expr_ty e) {
480 int kwarg_unpacking = 0;
481 for (Py_ssize_t i = 0, l = asdl_seq_LEN(e->v.Call.keywords); i < l; i++) {
482 keyword_ty keyword = asdl_seq_GET(e->v.Call.keywords, i);
483 if (!keyword->arg) {
484 kwarg_unpacking = 1;
485 }
486 }
487
488 const char *msg = NULL;
489 if (kwarg_unpacking) {
490 msg = "positional argument follows keyword argument unpacking";
491 } else {
492 msg = "positional argument follows keyword argument";
493 }
494
495 return RAISE_SYNTAX_ERROR(msg);
496}
497
498#if 0
499static const char *
500token_name(int type)
501{
502 if (0 <= type && type <= N_TOKENS) {
503 return _PyParser_TokenNames[type];
504 }
505 return "<Huh?>";
506}
507#endif
508
509// Here, mark is the start of the node, while p->mark is the end.
510// If node==NULL, they should be the same.
511int
512_PyPegen_insert_memo(Parser *p, int mark, int type, void *node)
513{
514 // Insert in front
515 Memo *m = PyArena_Malloc(p->arena, sizeof(Memo));
516 if (m == NULL) {
517 return -1;
518 }
519 m->type = type;
520 m->node = node;
521 m->mark = p->mark;
522 m->next = p->tokens[mark]->memo;
523 p->tokens[mark]->memo = m;
524 return 0;
525}
526
527// Like _PyPegen_insert_memo(), but updates an existing node if found.
528int
529_PyPegen_update_memo(Parser *p, int mark, int type, void *node)
530{
531 for (Memo *m = p->tokens[mark]->memo; m != NULL; m = m->next) {
532 if (m->type == type) {
533 // Update existing node.
534 m->node = node;
535 m->mark = p->mark;
536 return 0;
537 }
538 }
539 // Insert new node.
540 return _PyPegen_insert_memo(p, mark, type, node);
541}
542
543// Return dummy NAME.
544void *
545_PyPegen_dummy_name(Parser *p, ...)
546{
547 static void *cache = NULL;
548
549 if (cache != NULL) {
550 return cache;
551 }
552
553 PyObject *id = _create_dummy_identifier(p);
554 if (!id) {
555 return NULL;
556 }
557 cache = Name(id, Load, 1, 0, 1, 0, p->arena);
558 return cache;
559}
560
561static int
562_get_keyword_or_name_type(Parser *p, const char *name, int name_len)
563{
564 if (name_len >= p->n_keyword_lists || p->keywords[name_len] == NULL) {
565 return NAME;
566 }
567 for (KeywordToken *k = p->keywords[name_len]; k->type != -1; k++) {
568 if (strncmp(k->str, name, name_len) == 0) {
569 return k->type;
570 }
571 }
572 return NAME;
573}
574
Guido van Rossumc001c092020-04-30 12:12:19 -0700575static int
576growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
577 assert(initial_size > 0);
578 arr->items = PyMem_Malloc(initial_size * sizeof(*arr->items));
579 arr->size = initial_size;
580 arr->num_items = 0;
581
582 return arr->items != NULL;
583}
584
585static int
586growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
587 if (arr->num_items >= arr->size) {
588 size_t new_size = arr->size * 2;
589 void *new_items_array = PyMem_Realloc(arr->items, new_size * sizeof(*arr->items));
590 if (!new_items_array) {
591 return 0;
592 }
593 arr->items = new_items_array;
594 arr->size = new_size;
595 }
596
597 arr->items[arr->num_items].lineno = lineno;
598 arr->items[arr->num_items].comment = comment; // Take ownership
599 arr->num_items++;
600 return 1;
601}
602
603static void
604growable_comment_array_deallocate(growable_comment_array *arr) {
605 for (unsigned i = 0; i < arr->num_items; i++) {
606 PyMem_Free(arr->items[i].comment);
607 }
608 PyMem_Free(arr->items);
609}
610
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100611int
612_PyPegen_fill_token(Parser *p)
613{
614 const char *start, *end;
615 int type = PyTokenizer_Get(p->tok, &start, &end);
Guido van Rossumc001c092020-04-30 12:12:19 -0700616
617 // Record and skip '# type: ignore' comments
618 while (type == TYPE_IGNORE) {
619 Py_ssize_t len = end - start;
620 char *tag = PyMem_Malloc(len + 1);
621 if (tag == NULL) {
622 PyErr_NoMemory();
623 return -1;
624 }
625 strncpy(tag, start, len);
626 tag[len] = '\0';
627 // Ownership of tag passes to the growable array
628 if (!growable_comment_array_add(&p->type_ignore_comments, p->tok->lineno, tag)) {
629 PyErr_NoMemory();
630 return -1;
631 }
632 type = PyTokenizer_Get(p->tok, &start, &end);
633 }
634
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100635 if (type == ERRORTOKEN) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300636 if (p->tok->done == E_DECODE) {
637 return raise_decode_error(p);
638 }
639 else {
640 return tokenizer_error(p);
641 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100642 }
643 if (type == ENDMARKER && p->start_rule == Py_single_input && p->parsing_started) {
644 type = NEWLINE; /* Add an extra newline */
645 p->parsing_started = 0;
646
Pablo Galindob94dbd72020-04-27 18:35:58 +0100647 if (p->tok->indent && !(p->flags & PyPARSE_DONT_IMPLY_DEDENT)) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100648 p->tok->pendin = -p->tok->indent;
649 p->tok->indent = 0;
650 }
651 }
652 else {
653 p->parsing_started = 1;
654 }
655
656 if (p->fill == p->size) {
657 int newsize = p->size * 2;
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300658 Token **new_tokens = PyMem_Realloc(p->tokens, newsize * sizeof(Token *));
659 if (new_tokens == NULL) {
660 PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100661 return -1;
662 }
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300663 else {
664 p->tokens = new_tokens;
665 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100666 for (int i = p->size; i < newsize; i++) {
667 p->tokens[i] = PyMem_Malloc(sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300668 if (p->tokens[i] == NULL) {
669 p->size = i; // Needed, in order to cleanup correctly after parser fails
670 PyErr_NoMemory();
671 return -1;
672 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100673 memset(p->tokens[i], '\0', sizeof(Token));
674 }
675 p->size = newsize;
676 }
677
678 Token *t = p->tokens[p->fill];
679 t->type = (type == NAME) ? _get_keyword_or_name_type(p, start, (int)(end - start)) : type;
680 t->bytes = PyBytes_FromStringAndSize(start, end - start);
681 if (t->bytes == NULL) {
682 return -1;
683 }
684 PyArena_AddPyObject(p->arena, t->bytes);
685
686 int lineno = type == STRING ? p->tok->first_lineno : p->tok->lineno;
687 const char *line_start = type == STRING ? p->tok->multi_line_start : p->tok->line_start;
Pablo Galindo22081342020-04-29 02:04:06 +0100688 int end_lineno = p->tok->lineno;
689 int col_offset = -1, end_col_offset = -1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100690 if (start != NULL && start >= line_start) {
Pablo Galindo22081342020-04-29 02:04:06 +0100691 col_offset = (int)(start - line_start);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100692 }
693 if (end != NULL && end >= p->tok->line_start) {
Pablo Galindo22081342020-04-29 02:04:06 +0100694 end_col_offset = (int)(end - p->tok->line_start);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100695 }
696
697 t->lineno = p->starting_lineno + lineno;
698 t->col_offset = p->tok->lineno == 1 ? p->starting_col_offset + col_offset : col_offset;
699 t->end_lineno = p->starting_lineno + end_lineno;
700 t->end_col_offset = p->tok->lineno == 1 ? p->starting_col_offset + end_col_offset : end_col_offset;
701
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100702 p->fill += 1;
703 return 0;
704}
705
706// Instrumentation to count the effectiveness of memoization.
707// The array counts the number of tokens skipped by memoization,
708// indexed by type.
709
710#define NSTATISTICS 2000
711static long memo_statistics[NSTATISTICS];
712
713void
714_PyPegen_clear_memo_statistics()
715{
716 for (int i = 0; i < NSTATISTICS; i++) {
717 memo_statistics[i] = 0;
718 }
719}
720
721PyObject *
722_PyPegen_get_memo_statistics()
723{
724 PyObject *ret = PyList_New(NSTATISTICS);
725 if (ret == NULL) {
726 return NULL;
727 }
728 for (int i = 0; i < NSTATISTICS; i++) {
729 PyObject *value = PyLong_FromLong(memo_statistics[i]);
730 if (value == NULL) {
731 Py_DECREF(ret);
732 return NULL;
733 }
734 // PyList_SetItem borrows a reference to value.
735 if (PyList_SetItem(ret, i, value) < 0) {
736 Py_DECREF(ret);
737 return NULL;
738 }
739 }
740 return ret;
741}
742
743int // bool
744_PyPegen_is_memoized(Parser *p, int type, void *pres)
745{
746 if (p->mark == p->fill) {
747 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300748 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100749 return -1;
750 }
751 }
752
753 Token *t = p->tokens[p->mark];
754
755 for (Memo *m = t->memo; m != NULL; m = m->next) {
756 if (m->type == type) {
757 if (0 <= type && type < NSTATISTICS) {
758 long count = m->mark - p->mark;
759 // A memoized negative result counts for one.
760 if (count <= 0) {
761 count = 1;
762 }
763 memo_statistics[type] += count;
764 }
765 p->mark = m->mark;
766 *(void **)(pres) = m->node;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100767 return 1;
768 }
769 }
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100770 return 0;
771}
772
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100773
774int
775_PyPegen_lookahead_with_name(int positive, expr_ty (func)(Parser *), Parser *p)
776{
777 int mark = p->mark;
778 void *res = func(p);
779 p->mark = mark;
780 return (res != NULL) == positive;
781}
782
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100783int
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100784_PyPegen_lookahead_with_int(int positive, Token *(func)(Parser *, int), Parser *p, int arg)
785{
786 int mark = p->mark;
787 void *res = func(p, arg);
788 p->mark = mark;
789 return (res != NULL) == positive;
790}
791
792int
793_PyPegen_lookahead(int positive, void *(func)(Parser *), Parser *p)
794{
795 int mark = p->mark;
Pablo Galindo1df5a9e2020-04-23 12:42:13 +0100796 void *res = (void*)func(p);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100797 p->mark = mark;
798 return (res != NULL) == positive;
799}
800
801Token *
802_PyPegen_expect_token(Parser *p, int type)
803{
804 if (p->mark == p->fill) {
805 if (_PyPegen_fill_token(p) < 0) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +0300806 p->error_indicator = 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100807 return NULL;
808 }
809 }
810 Token *t = p->tokens[p->mark];
811 if (t->type != type) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100812 return NULL;
813 }
814 p->mark += 1;
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100815 return t;
816}
817
818Token *
819_PyPegen_get_last_nonnwhitespace_token(Parser *p)
820{
821 assert(p->mark >= 0);
822 Token *token = NULL;
823 for (int m = p->mark - 1; m >= 0; m--) {
824 token = p->tokens[m];
825 if (token->type != ENDMARKER && (token->type < NEWLINE || token->type > DEDENT)) {
826 break;
827 }
828 }
829 return token;
830}
831
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100832expr_ty
833_PyPegen_name_token(Parser *p)
834{
835 Token *t = _PyPegen_expect_token(p, NAME);
836 if (t == NULL) {
837 return NULL;
838 }
839 char* s = PyBytes_AsString(t->bytes);
840 if (!s) {
841 return NULL;
842 }
843 PyObject *id = _PyPegen_new_identifier(p, s);
844 if (id == NULL) {
845 return NULL;
846 }
847 return Name(id, Load, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
848 p->arena);
849}
850
851void *
852_PyPegen_string_token(Parser *p)
853{
854 return _PyPegen_expect_token(p, STRING);
855}
856
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100857static PyObject *
858parsenumber_raw(const char *s)
859{
860 const char *end;
861 long x;
862 double dx;
863 Py_complex compl;
864 int imflag;
865
866 assert(s != NULL);
867 errno = 0;
868 end = s + strlen(s) - 1;
869 imflag = *end == 'j' || *end == 'J';
870 if (s[0] == '0') {
871 x = (long)PyOS_strtoul(s, (char **)&end, 0);
872 if (x < 0 && errno == 0) {
873 return PyLong_FromString(s, (char **)0, 0);
874 }
875 }
876 else
877 x = PyOS_strtol(s, (char **)&end, 0);
878 if (*end == '\0') {
879 if (errno != 0)
880 return PyLong_FromString(s, (char **)0, 0);
881 return PyLong_FromLong(x);
882 }
883 /* XXX Huge floats may silently fail */
884 if (imflag) {
885 compl.real = 0.;
886 compl.imag = PyOS_string_to_double(s, (char **)&end, NULL);
887 if (compl.imag == -1.0 && PyErr_Occurred())
888 return NULL;
889 return PyComplex_FromCComplex(compl);
890 }
891 else {
892 dx = PyOS_string_to_double(s, NULL, NULL);
893 if (dx == -1.0 && PyErr_Occurred())
894 return NULL;
895 return PyFloat_FromDouble(dx);
896 }
897}
898
899static PyObject *
900parsenumber(const char *s)
901{
902 char *dup, *end;
903 PyObject *res = NULL;
904
905 assert(s != NULL);
906
907 if (strchr(s, '_') == NULL) {
908 return parsenumber_raw(s);
909 }
910 /* Create a duplicate without underscores. */
911 dup = PyMem_Malloc(strlen(s) + 1);
912 if (dup == NULL) {
913 return PyErr_NoMemory();
914 }
915 end = dup;
916 for (; *s; s++) {
917 if (*s != '_') {
918 *end++ = *s;
919 }
920 }
921 *end = '\0';
922 res = parsenumber_raw(dup);
923 PyMem_Free(dup);
924 return res;
925}
926
927expr_ty
928_PyPegen_number_token(Parser *p)
929{
930 Token *t = _PyPegen_expect_token(p, NUMBER);
931 if (t == NULL) {
932 return NULL;
933 }
934
935 char *num_raw = PyBytes_AsString(t->bytes);
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100936 if (num_raw == NULL) {
937 return NULL;
938 }
939
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +0300940 if (p->feature_version < 6 && strchr(num_raw, '_') != NULL) {
941 p->error_indicator = 1;
942 return RAISE_SYNTAX_ERROR("Underscores in numeric literals are only supported"
943 "in Python 3.6 and greater");
944 }
945
Pablo Galindoc5fc1562020-04-22 23:29:27 +0100946 PyObject *c = parsenumber(num_raw);
947
948 if (c == NULL) {
949 return NULL;
950 }
951
952 if (PyArena_AddPyObject(p->arena, c) < 0) {
953 Py_DECREF(c);
954 return NULL;
955 }
956
957 return Constant(c, NULL, t->lineno, t->col_offset, t->end_lineno, t->end_col_offset,
958 p->arena);
959}
960
Lysandros Nikolaou6d650872020-04-29 04:42:27 +0300961static int // bool
962newline_in_string(Parser *p, const char *cur)
963{
964 for (char c = *cur; cur >= p->tok->buf; c = *--cur) {
965 if (c == '\'' || c == '"') {
966 return 1;
967 }
968 }
969 return 0;
970}
971
972/* Check that the source for a single input statement really is a single
973 statement by looking at what is left in the buffer after parsing.
974 Trailing whitespace and comments are OK. */
975static int // bool
976bad_single_statement(Parser *p)
977{
978 const char *cur = strchr(p->tok->buf, '\n');
979
980 /* Newlines are allowed if preceded by a line continuation character
981 or if they appear inside a string. */
982 if (!cur || *(cur - 1) == '\\' || newline_in_string(p, cur)) {
983 return 0;
984 }
985 char c = *cur;
986
987 for (;;) {
988 while (c == ' ' || c == '\t' || c == '\n' || c == '\014') {
989 c = *++cur;
990 }
991
992 if (!c) {
993 return 0;
994 }
995
996 if (c != '#') {
997 return 1;
998 }
999
1000 /* Suck up comment. */
1001 while (c && c != '\n') {
1002 c = *++cur;
1003 }
1004 }
1005}
1006
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001007void
1008_PyPegen_Parser_Free(Parser *p)
1009{
1010 Py_XDECREF(p->normalize);
1011 for (int i = 0; i < p->size; i++) {
1012 PyMem_Free(p->tokens[i]);
1013 }
1014 PyMem_Free(p->tokens);
Guido van Rossumc001c092020-04-30 12:12:19 -07001015 growable_comment_array_deallocate(&p->type_ignore_comments);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001016 PyMem_Free(p);
1017}
1018
Pablo Galindo2b74c832020-04-27 18:02:07 +01001019static int
1020compute_parser_flags(PyCompilerFlags *flags)
1021{
1022 int parser_flags = 0;
1023 if (!flags) {
1024 return 0;
1025 }
1026 if (flags->cf_flags & PyCF_DONT_IMPLY_DEDENT) {
1027 parser_flags |= PyPARSE_DONT_IMPLY_DEDENT;
1028 }
1029 if (flags->cf_flags & PyCF_IGNORE_COOKIE) {
1030 parser_flags |= PyPARSE_IGNORE_COOKIE;
1031 }
1032 if (flags->cf_flags & CO_FUTURE_BARRY_AS_BDFL) {
1033 parser_flags |= PyPARSE_BARRY_AS_BDFL;
1034 }
1035 if (flags->cf_flags & PyCF_TYPE_COMMENTS) {
1036 parser_flags |= PyPARSE_TYPE_COMMENTS;
1037 }
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +03001038 if (flags->cf_feature_version < 7) {
1039 parser_flags |= PyPARSE_ASYNC_HACKS;
1040 }
Pablo Galindo2b74c832020-04-27 18:02:07 +01001041 return parser_flags;
1042}
1043
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001044Parser *
Pablo Galindo2b74c832020-04-27 18:02:07 +01001045_PyPegen_Parser_New(struct tok_state *tok, int start_rule, int flags,
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +03001046 int feature_version, int *errcode, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001047{
1048 Parser *p = PyMem_Malloc(sizeof(Parser));
1049 if (p == NULL) {
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001050 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001051 }
1052 assert(tok != NULL);
1053 p->tok = tok;
1054 p->keywords = NULL;
1055 p->n_keyword_lists = -1;
1056 p->tokens = PyMem_Malloc(sizeof(Token *));
1057 if (!p->tokens) {
1058 PyMem_Free(p);
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001059 return (Parser *) PyErr_NoMemory();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001060 }
Guido van Rossumc001c092020-04-30 12:12:19 -07001061 p->tokens[0] = PyMem_Calloc(1, sizeof(Token));
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001062 if (!p->tokens) {
1063 PyMem_Free(p->tokens);
1064 PyMem_Free(p);
1065 return (Parser *) PyErr_NoMemory();
1066 }
Guido van Rossumc001c092020-04-30 12:12:19 -07001067 if (!growable_comment_array_init(&p->type_ignore_comments, 10)) {
1068 PyMem_Free(p->tokens[0]);
1069 PyMem_Free(p->tokens);
1070 PyMem_Free(p);
1071 return (Parser *) PyErr_NoMemory();
1072 }
1073
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001074 p->mark = 0;
1075 p->fill = 0;
1076 p->size = 1;
1077
1078 p->errcode = errcode;
1079 p->arena = arena;
1080 p->start_rule = start_rule;
1081 p->parsing_started = 0;
1082 p->normalize = NULL;
1083 p->error_indicator = 0;
1084
1085 p->starting_lineno = 0;
1086 p->starting_col_offset = 0;
Pablo Galindo2b74c832020-04-27 18:02:07 +01001087 p->flags = flags;
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +03001088 p->feature_version = feature_version;
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001089
1090 return p;
1091}
1092
1093void *
1094_PyPegen_run_parser(Parser *p)
1095{
1096 void *res = _PyPegen_parse(p);
1097 if (res == NULL) {
1098 if (PyErr_Occurred()) {
1099 return NULL;
1100 }
1101 if (p->fill == 0) {
1102 RAISE_SYNTAX_ERROR("error at start before reading any input");
1103 }
1104 else if (p->tok->done == E_EOF) {
1105 RAISE_SYNTAX_ERROR("unexpected EOF while parsing");
1106 }
1107 else {
1108 if (p->tokens[p->fill-1]->type == INDENT) {
1109 RAISE_INDENTATION_ERROR("unexpected indent");
1110 }
1111 else if (p->tokens[p->fill-1]->type == DEDENT) {
1112 RAISE_INDENTATION_ERROR("unexpected unindent");
1113 }
1114 else {
1115 RAISE_SYNTAX_ERROR("invalid syntax");
1116 }
1117 }
1118 return NULL;
1119 }
1120
Lysandros Nikolaou6d650872020-04-29 04:42:27 +03001121 if (p->start_rule == Py_single_input && bad_single_statement(p)) {
1122 p->tok->done = E_BADSINGLE; // This is not necessary for now, but might be in the future
1123 return RAISE_SYNTAX_ERROR("multiple statements found while compiling a single statement");
1124 }
1125
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001126 return res;
1127}
1128
1129mod_ty
1130_PyPegen_run_parser_from_file_pointer(FILE *fp, int start_rule, PyObject *filename_ob,
1131 const char *enc, const char *ps1, const char *ps2,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001132 PyCompilerFlags *flags, int *errcode, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001133{
1134 struct tok_state *tok = PyTokenizer_FromFile(fp, enc, ps1, ps2);
1135 if (tok == NULL) {
1136 if (PyErr_Occurred()) {
1137 raise_tokenizer_init_error(filename_ob);
1138 return NULL;
1139 }
1140 return NULL;
1141 }
1142 // This transfers the ownership to the tokenizer
1143 tok->filename = filename_ob;
1144 Py_INCREF(filename_ob);
1145
1146 // From here on we need to clean up even if there's an error
1147 mod_ty result = NULL;
1148
Pablo Galindo2b74c832020-04-27 18:02:07 +01001149 int parser_flags = compute_parser_flags(flags);
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +03001150 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, PY_MINOR_VERSION,
1151 errcode, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001152 if (p == NULL) {
1153 goto error;
1154 }
1155
1156 result = _PyPegen_run_parser(p);
1157 _PyPegen_Parser_Free(p);
1158
1159error:
1160 PyTokenizer_Free(tok);
1161 return result;
1162}
1163
1164mod_ty
1165_PyPegen_run_parser_from_file(const char *filename, int start_rule,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001166 PyObject *filename_ob, PyCompilerFlags *flags, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001167{
1168 FILE *fp = fopen(filename, "rb");
1169 if (fp == NULL) {
1170 PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
1171 return NULL;
1172 }
1173
1174 mod_ty result = _PyPegen_run_parser_from_file_pointer(fp, start_rule, filename_ob,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001175 NULL, NULL, NULL, flags, NULL, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001176
1177 fclose(fp);
1178 return result;
1179}
1180
1181mod_ty
1182_PyPegen_run_parser_from_string(const char *str, int start_rule, PyObject *filename_ob,
Pablo Galindo2b74c832020-04-27 18:02:07 +01001183 PyCompilerFlags *flags, PyArena *arena)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001184{
1185 int exec_input = start_rule == Py_file_input;
1186
1187 struct tok_state *tok;
Pablo Galindo2b74c832020-04-27 18:02:07 +01001188 if (flags == NULL || flags->cf_flags & PyCF_IGNORE_COOKIE) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001189 tok = PyTokenizer_FromUTF8(str, exec_input);
1190 } else {
1191 tok = PyTokenizer_FromString(str, exec_input);
1192 }
1193 if (tok == NULL) {
1194 if (PyErr_Occurred()) {
1195 raise_tokenizer_init_error(filename_ob);
1196 }
1197 return NULL;
1198 }
1199 // This transfers the ownership to the tokenizer
1200 tok->filename = filename_ob;
1201 Py_INCREF(filename_ob);
1202
1203 // We need to clear up from here on
1204 mod_ty result = NULL;
1205
Pablo Galindo2b74c832020-04-27 18:02:07 +01001206 int parser_flags = compute_parser_flags(flags);
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +03001207 int feature_version = flags ? flags->cf_feature_version : PY_MINOR_VERSION;
Guido van Rossumc001c092020-04-30 12:12:19 -07001208 tok->type_comments = (parser_flags & PyPARSE_TYPE_COMMENTS) > 0;
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +03001209 tok->async_hacks = (parser_flags & PyPARSE_ASYNC_HACKS) > 0;
Guido van Rossumc001c092020-04-30 12:12:19 -07001210
Lysandros Nikolaou3e0a6f32020-05-01 06:27:52 +03001211 Parser *p = _PyPegen_Parser_New(tok, start_rule, parser_flags, feature_version,
1212 NULL, arena);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001213 if (p == NULL) {
1214 goto error;
1215 }
1216
1217 result = _PyPegen_run_parser(p);
1218 _PyPegen_Parser_Free(p);
1219
1220error:
1221 PyTokenizer_Free(tok);
1222 return result;
1223}
1224
1225void *
1226_PyPegen_interactive_exit(Parser *p)
1227{
1228 if (p->errcode) {
1229 *(p->errcode) = E_EOF;
1230 }
1231 return NULL;
1232}
1233
1234/* Creates a single-element asdl_seq* that contains a */
1235asdl_seq *
1236_PyPegen_singleton_seq(Parser *p, void *a)
1237{
1238 assert(a != NULL);
1239 asdl_seq *seq = _Py_asdl_seq_new(1, p->arena);
1240 if (!seq) {
1241 return NULL;
1242 }
1243 asdl_seq_SET(seq, 0, a);
1244 return seq;
1245}
1246
1247/* Creates a copy of seq and prepends a to it */
1248asdl_seq *
1249_PyPegen_seq_insert_in_front(Parser *p, void *a, asdl_seq *seq)
1250{
1251 assert(a != NULL);
1252 if (!seq) {
1253 return _PyPegen_singleton_seq(p, a);
1254 }
1255
1256 asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1257 if (!new_seq) {
1258 return NULL;
1259 }
1260
1261 asdl_seq_SET(new_seq, 0, a);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001262 for (Py_ssize_t i = 1, l = asdl_seq_LEN(new_seq); i < l; i++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001263 asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i - 1));
1264 }
1265 return new_seq;
1266}
1267
Guido van Rossumc001c092020-04-30 12:12:19 -07001268/* Creates a copy of seq and appends a to it */
1269asdl_seq *
1270_PyPegen_seq_append_to_end(Parser *p, asdl_seq *seq, void *a)
1271{
1272 assert(a != NULL);
1273 if (!seq) {
1274 return _PyPegen_singleton_seq(p, a);
1275 }
1276
1277 asdl_seq *new_seq = _Py_asdl_seq_new(asdl_seq_LEN(seq) + 1, p->arena);
1278 if (!new_seq) {
1279 return NULL;
1280 }
1281
1282 for (Py_ssize_t i = 0, l = asdl_seq_LEN(new_seq); i + 1 < l; i++) {
1283 asdl_seq_SET(new_seq, i, asdl_seq_GET(seq, i));
1284 }
1285 asdl_seq_SET(new_seq, asdl_seq_LEN(new_seq) - 1, a);
1286 return new_seq;
1287}
1288
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001289static Py_ssize_t
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001290_get_flattened_seq_size(asdl_seq *seqs)
1291{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001292 Py_ssize_t size = 0;
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001293 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1294 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
1295 size += asdl_seq_LEN(inner_seq);
1296 }
1297 return size;
1298}
1299
1300/* Flattens an asdl_seq* of asdl_seq*s */
1301asdl_seq *
1302_PyPegen_seq_flatten(Parser *p, asdl_seq *seqs)
1303{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001304 Py_ssize_t flattened_seq_size = _get_flattened_seq_size(seqs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001305 assert(flattened_seq_size > 0);
1306
1307 asdl_seq *flattened_seq = _Py_asdl_seq_new(flattened_seq_size, p->arena);
1308 if (!flattened_seq) {
1309 return NULL;
1310 }
1311
1312 int flattened_seq_idx = 0;
1313 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seqs); i < l; i++) {
1314 asdl_seq *inner_seq = asdl_seq_GET(seqs, i);
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001315 for (Py_ssize_t j = 0, li = asdl_seq_LEN(inner_seq); j < li; j++) {
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001316 asdl_seq_SET(flattened_seq, flattened_seq_idx++, asdl_seq_GET(inner_seq, j));
1317 }
1318 }
1319 assert(flattened_seq_idx == flattened_seq_size);
1320
1321 return flattened_seq;
1322}
1323
1324/* Creates a new name of the form <first_name>.<second_name> */
1325expr_ty
1326_PyPegen_join_names_with_dot(Parser *p, expr_ty first_name, expr_ty second_name)
1327{
1328 assert(first_name != NULL && second_name != NULL);
1329 PyObject *first_identifier = first_name->v.Name.id;
1330 PyObject *second_identifier = second_name->v.Name.id;
1331
1332 if (PyUnicode_READY(first_identifier) == -1) {
1333 return NULL;
1334 }
1335 if (PyUnicode_READY(second_identifier) == -1) {
1336 return NULL;
1337 }
1338 const char *first_str = PyUnicode_AsUTF8(first_identifier);
1339 if (!first_str) {
1340 return NULL;
1341 }
1342 const char *second_str = PyUnicode_AsUTF8(second_identifier);
1343 if (!second_str) {
1344 return NULL;
1345 }
Pablo Galindo9f27dd32020-04-24 01:13:33 +01001346 Py_ssize_t len = strlen(first_str) + strlen(second_str) + 1; // +1 for the dot
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001347
1348 PyObject *str = PyBytes_FromStringAndSize(NULL, len);
1349 if (!str) {
1350 return NULL;
1351 }
1352
1353 char *s = PyBytes_AS_STRING(str);
1354 if (!s) {
1355 return NULL;
1356 }
1357
1358 strcpy(s, first_str);
1359 s += strlen(first_str);
1360 *s++ = '.';
1361 strcpy(s, second_str);
1362 s += strlen(second_str);
1363 *s = '\0';
1364
1365 PyObject *uni = PyUnicode_DecodeUTF8(PyBytes_AS_STRING(str), PyBytes_GET_SIZE(str), NULL);
1366 Py_DECREF(str);
1367 if (!uni) {
1368 return NULL;
1369 }
1370 PyUnicode_InternInPlace(&uni);
1371 if (PyArena_AddPyObject(p->arena, uni) < 0) {
1372 Py_DECREF(uni);
1373 return NULL;
1374 }
1375
1376 return _Py_Name(uni, Load, EXTRA_EXPR(first_name, second_name));
1377}
1378
1379/* Counts the total number of dots in seq's tokens */
1380int
1381_PyPegen_seq_count_dots(asdl_seq *seq)
1382{
1383 int number_of_dots = 0;
1384 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1385 Token *current_expr = asdl_seq_GET(seq, i);
1386 switch (current_expr->type) {
1387 case ELLIPSIS:
1388 number_of_dots += 3;
1389 break;
1390 case DOT:
1391 number_of_dots += 1;
1392 break;
1393 default:
Lysandros Nikolaouebebb642020-04-23 18:36:06 +03001394 Py_UNREACHABLE();
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001395 }
1396 }
1397
1398 return number_of_dots;
1399}
1400
1401/* Creates an alias with '*' as the identifier name */
1402alias_ty
1403_PyPegen_alias_for_star(Parser *p)
1404{
1405 PyObject *str = PyUnicode_InternFromString("*");
1406 if (!str) {
1407 return NULL;
1408 }
1409 if (PyArena_AddPyObject(p->arena, str) < 0) {
1410 Py_DECREF(str);
1411 return NULL;
1412 }
1413 return alias(str, NULL, p->arena);
1414}
1415
1416/* Creates a new asdl_seq* with the identifiers of all the names in seq */
1417asdl_seq *
1418_PyPegen_map_names_to_ids(Parser *p, asdl_seq *seq)
1419{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001420 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001421 assert(len > 0);
1422
1423 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1424 if (!new_seq) {
1425 return NULL;
1426 }
1427 for (Py_ssize_t i = 0; i < len; i++) {
1428 expr_ty e = asdl_seq_GET(seq, i);
1429 asdl_seq_SET(new_seq, i, e->v.Name.id);
1430 }
1431 return new_seq;
1432}
1433
1434/* Constructs a CmpopExprPair */
1435CmpopExprPair *
1436_PyPegen_cmpop_expr_pair(Parser *p, cmpop_ty cmpop, expr_ty expr)
1437{
1438 assert(expr != NULL);
1439 CmpopExprPair *a = PyArena_Malloc(p->arena, sizeof(CmpopExprPair));
1440 if (!a) {
1441 return NULL;
1442 }
1443 a->cmpop = cmpop;
1444 a->expr = expr;
1445 return a;
1446}
1447
1448asdl_int_seq *
1449_PyPegen_get_cmpops(Parser *p, asdl_seq *seq)
1450{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001451 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001452 assert(len > 0);
1453
1454 asdl_int_seq *new_seq = _Py_asdl_int_seq_new(len, p->arena);
1455 if (!new_seq) {
1456 return NULL;
1457 }
1458 for (Py_ssize_t i = 0; i < len; i++) {
1459 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1460 asdl_seq_SET(new_seq, i, pair->cmpop);
1461 }
1462 return new_seq;
1463}
1464
1465asdl_seq *
1466_PyPegen_get_exprs(Parser *p, asdl_seq *seq)
1467{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001468 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001469 assert(len > 0);
1470
1471 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1472 if (!new_seq) {
1473 return NULL;
1474 }
1475 for (Py_ssize_t i = 0; i < len; i++) {
1476 CmpopExprPair *pair = asdl_seq_GET(seq, i);
1477 asdl_seq_SET(new_seq, i, pair->expr);
1478 }
1479 return new_seq;
1480}
1481
1482/* Creates an asdl_seq* where all the elements have been changed to have ctx as context */
1483static asdl_seq *
1484_set_seq_context(Parser *p, asdl_seq *seq, expr_context_ty ctx)
1485{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001486 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001487 if (len == 0) {
1488 return NULL;
1489 }
1490
1491 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1492 if (!new_seq) {
1493 return NULL;
1494 }
1495 for (Py_ssize_t i = 0; i < len; i++) {
1496 expr_ty e = asdl_seq_GET(seq, i);
1497 asdl_seq_SET(new_seq, i, _PyPegen_set_expr_context(p, e, ctx));
1498 }
1499 return new_seq;
1500}
1501
1502static expr_ty
1503_set_name_context(Parser *p, expr_ty e, expr_context_ty ctx)
1504{
1505 return _Py_Name(e->v.Name.id, ctx, EXTRA_EXPR(e, e));
1506}
1507
1508static expr_ty
1509_set_tuple_context(Parser *p, expr_ty e, expr_context_ty ctx)
1510{
1511 return _Py_Tuple(_set_seq_context(p, e->v.Tuple.elts, ctx), ctx, EXTRA_EXPR(e, e));
1512}
1513
1514static expr_ty
1515_set_list_context(Parser *p, expr_ty e, expr_context_ty ctx)
1516{
1517 return _Py_List(_set_seq_context(p, e->v.List.elts, ctx), ctx, EXTRA_EXPR(e, e));
1518}
1519
1520static expr_ty
1521_set_subscript_context(Parser *p, expr_ty e, expr_context_ty ctx)
1522{
1523 return _Py_Subscript(e->v.Subscript.value, e->v.Subscript.slice, ctx, EXTRA_EXPR(e, e));
1524}
1525
1526static expr_ty
1527_set_attribute_context(Parser *p, expr_ty e, expr_context_ty ctx)
1528{
1529 return _Py_Attribute(e->v.Attribute.value, e->v.Attribute.attr, ctx, EXTRA_EXPR(e, e));
1530}
1531
1532static expr_ty
1533_set_starred_context(Parser *p, expr_ty e, expr_context_ty ctx)
1534{
1535 return _Py_Starred(_PyPegen_set_expr_context(p, e->v.Starred.value, ctx), ctx, EXTRA_EXPR(e, e));
1536}
1537
1538/* Creates an `expr_ty` equivalent to `expr` but with `ctx` as context */
1539expr_ty
1540_PyPegen_set_expr_context(Parser *p, expr_ty expr, expr_context_ty ctx)
1541{
1542 assert(expr != NULL);
1543
1544 expr_ty new = NULL;
1545 switch (expr->kind) {
1546 case Name_kind:
1547 new = _set_name_context(p, expr, ctx);
1548 break;
1549 case Tuple_kind:
1550 new = _set_tuple_context(p, expr, ctx);
1551 break;
1552 case List_kind:
1553 new = _set_list_context(p, expr, ctx);
1554 break;
1555 case Subscript_kind:
1556 new = _set_subscript_context(p, expr, ctx);
1557 break;
1558 case Attribute_kind:
1559 new = _set_attribute_context(p, expr, ctx);
1560 break;
1561 case Starred_kind:
1562 new = _set_starred_context(p, expr, ctx);
1563 break;
1564 default:
1565 new = expr;
1566 }
1567 return new;
1568}
1569
1570/* Constructs a KeyValuePair that is used when parsing a dict's key value pairs */
1571KeyValuePair *
1572_PyPegen_key_value_pair(Parser *p, expr_ty key, expr_ty value)
1573{
1574 KeyValuePair *a = PyArena_Malloc(p->arena, sizeof(KeyValuePair));
1575 if (!a) {
1576 return NULL;
1577 }
1578 a->key = key;
1579 a->value = value;
1580 return a;
1581}
1582
1583/* Extracts all keys from an asdl_seq* of KeyValuePair*'s */
1584asdl_seq *
1585_PyPegen_get_keys(Parser *p, asdl_seq *seq)
1586{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001587 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001588 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1589 if (!new_seq) {
1590 return NULL;
1591 }
1592 for (Py_ssize_t i = 0; i < len; i++) {
1593 KeyValuePair *pair = asdl_seq_GET(seq, i);
1594 asdl_seq_SET(new_seq, i, pair->key);
1595 }
1596 return new_seq;
1597}
1598
1599/* Extracts all values from an asdl_seq* of KeyValuePair*'s */
1600asdl_seq *
1601_PyPegen_get_values(Parser *p, asdl_seq *seq)
1602{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001603 Py_ssize_t len = asdl_seq_LEN(seq);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001604 asdl_seq *new_seq = _Py_asdl_seq_new(len, p->arena);
1605 if (!new_seq) {
1606 return NULL;
1607 }
1608 for (Py_ssize_t i = 0; i < len; i++) {
1609 KeyValuePair *pair = asdl_seq_GET(seq, i);
1610 asdl_seq_SET(new_seq, i, pair->value);
1611 }
1612 return new_seq;
1613}
1614
1615/* Constructs a NameDefaultPair */
1616NameDefaultPair *
Guido van Rossumc001c092020-04-30 12:12:19 -07001617_PyPegen_name_default_pair(Parser *p, arg_ty arg, expr_ty value, Token *tc)
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001618{
1619 NameDefaultPair *a = PyArena_Malloc(p->arena, sizeof(NameDefaultPair));
1620 if (!a) {
1621 return NULL;
1622 }
Guido van Rossumc001c092020-04-30 12:12:19 -07001623 a->arg = _PyPegen_add_type_comment_to_arg(p, arg, tc);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001624 a->value = value;
1625 return a;
1626}
1627
1628/* Constructs a SlashWithDefault */
1629SlashWithDefault *
1630_PyPegen_slash_with_default(Parser *p, asdl_seq *plain_names, asdl_seq *names_with_defaults)
1631{
1632 SlashWithDefault *a = PyArena_Malloc(p->arena, sizeof(SlashWithDefault));
1633 if (!a) {
1634 return NULL;
1635 }
1636 a->plain_names = plain_names;
1637 a->names_with_defaults = names_with_defaults;
1638 return a;
1639}
1640
1641/* Constructs a StarEtc */
1642StarEtc *
1643_PyPegen_star_etc(Parser *p, arg_ty vararg, asdl_seq *kwonlyargs, arg_ty kwarg)
1644{
1645 StarEtc *a = PyArena_Malloc(p->arena, sizeof(StarEtc));
1646 if (!a) {
1647 return NULL;
1648 }
1649 a->vararg = vararg;
1650 a->kwonlyargs = kwonlyargs;
1651 a->kwarg = kwarg;
1652 return a;
1653}
1654
1655asdl_seq *
1656_PyPegen_join_sequences(Parser *p, asdl_seq *a, asdl_seq *b)
1657{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001658 Py_ssize_t first_len = asdl_seq_LEN(a);
1659 Py_ssize_t second_len = asdl_seq_LEN(b);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001660 asdl_seq *new_seq = _Py_asdl_seq_new(first_len + second_len, p->arena);
1661 if (!new_seq) {
1662 return NULL;
1663 }
1664
1665 int k = 0;
1666 for (Py_ssize_t i = 0; i < first_len; i++) {
1667 asdl_seq_SET(new_seq, k++, asdl_seq_GET(a, i));
1668 }
1669 for (Py_ssize_t i = 0; i < second_len; i++) {
1670 asdl_seq_SET(new_seq, k++, asdl_seq_GET(b, i));
1671 }
1672
1673 return new_seq;
1674}
1675
1676static asdl_seq *
1677_get_names(Parser *p, asdl_seq *names_with_defaults)
1678{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001679 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001680 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1681 if (!seq) {
1682 return NULL;
1683 }
1684 for (Py_ssize_t i = 0; i < len; i++) {
1685 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1686 asdl_seq_SET(seq, i, pair->arg);
1687 }
1688 return seq;
1689}
1690
1691static asdl_seq *
1692_get_defaults(Parser *p, asdl_seq *names_with_defaults)
1693{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001694 Py_ssize_t len = asdl_seq_LEN(names_with_defaults);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001695 asdl_seq *seq = _Py_asdl_seq_new(len, p->arena);
1696 if (!seq) {
1697 return NULL;
1698 }
1699 for (Py_ssize_t i = 0; i < len; i++) {
1700 NameDefaultPair *pair = asdl_seq_GET(names_with_defaults, i);
1701 asdl_seq_SET(seq, i, pair->value);
1702 }
1703 return seq;
1704}
1705
1706/* Constructs an arguments_ty object out of all the parsed constructs in the parameters rule */
1707arguments_ty
1708_PyPegen_make_arguments(Parser *p, asdl_seq *slash_without_default,
1709 SlashWithDefault *slash_with_default, asdl_seq *plain_names,
1710 asdl_seq *names_with_default, StarEtc *star_etc)
1711{
1712 asdl_seq *posonlyargs;
1713 if (slash_without_default != NULL) {
1714 posonlyargs = slash_without_default;
1715 }
1716 else if (slash_with_default != NULL) {
1717 asdl_seq *slash_with_default_names =
1718 _get_names(p, slash_with_default->names_with_defaults);
1719 if (!slash_with_default_names) {
1720 return NULL;
1721 }
1722 posonlyargs = _PyPegen_join_sequences(p, slash_with_default->plain_names, slash_with_default_names);
1723 if (!posonlyargs) {
1724 return NULL;
1725 }
1726 }
1727 else {
1728 posonlyargs = _Py_asdl_seq_new(0, p->arena);
1729 if (!posonlyargs) {
1730 return NULL;
1731 }
1732 }
1733
1734 asdl_seq *posargs;
1735 if (plain_names != NULL && names_with_default != NULL) {
1736 asdl_seq *names_with_default_names = _get_names(p, names_with_default);
1737 if (!names_with_default_names) {
1738 return NULL;
1739 }
1740 posargs = _PyPegen_join_sequences(p, plain_names, names_with_default_names);
1741 if (!posargs) {
1742 return NULL;
1743 }
1744 }
1745 else if (plain_names == NULL && names_with_default != NULL) {
1746 posargs = _get_names(p, names_with_default);
1747 if (!posargs) {
1748 return NULL;
1749 }
1750 }
1751 else if (plain_names != NULL && names_with_default == NULL) {
1752 posargs = plain_names;
1753 }
1754 else {
1755 posargs = _Py_asdl_seq_new(0, p->arena);
1756 if (!posargs) {
1757 return NULL;
1758 }
1759 }
1760
1761 asdl_seq *posdefaults;
1762 if (slash_with_default != NULL && names_with_default != NULL) {
1763 asdl_seq *slash_with_default_values =
1764 _get_defaults(p, slash_with_default->names_with_defaults);
1765 if (!slash_with_default_values) {
1766 return NULL;
1767 }
1768 asdl_seq *names_with_default_values = _get_defaults(p, names_with_default);
1769 if (!names_with_default_values) {
1770 return NULL;
1771 }
1772 posdefaults = _PyPegen_join_sequences(p, slash_with_default_values, names_with_default_values);
1773 if (!posdefaults) {
1774 return NULL;
1775 }
1776 }
1777 else if (slash_with_default == NULL && names_with_default != NULL) {
1778 posdefaults = _get_defaults(p, names_with_default);
1779 if (!posdefaults) {
1780 return NULL;
1781 }
1782 }
1783 else if (slash_with_default != NULL && names_with_default == NULL) {
1784 posdefaults = _get_defaults(p, slash_with_default->names_with_defaults);
1785 if (!posdefaults) {
1786 return NULL;
1787 }
1788 }
1789 else {
1790 posdefaults = _Py_asdl_seq_new(0, p->arena);
1791 if (!posdefaults) {
1792 return NULL;
1793 }
1794 }
1795
1796 arg_ty vararg = NULL;
1797 if (star_etc != NULL && star_etc->vararg != NULL) {
1798 vararg = star_etc->vararg;
1799 }
1800
1801 asdl_seq *kwonlyargs;
1802 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1803 kwonlyargs = _get_names(p, star_etc->kwonlyargs);
1804 if (!kwonlyargs) {
1805 return NULL;
1806 }
1807 }
1808 else {
1809 kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1810 if (!kwonlyargs) {
1811 return NULL;
1812 }
1813 }
1814
1815 asdl_seq *kwdefaults;
1816 if (star_etc != NULL && star_etc->kwonlyargs != NULL) {
1817 kwdefaults = _get_defaults(p, star_etc->kwonlyargs);
1818 if (!kwdefaults) {
1819 return NULL;
1820 }
1821 }
1822 else {
1823 kwdefaults = _Py_asdl_seq_new(0, p->arena);
1824 if (!kwdefaults) {
1825 return NULL;
1826 }
1827 }
1828
1829 arg_ty kwarg = NULL;
1830 if (star_etc != NULL && star_etc->kwarg != NULL) {
1831 kwarg = star_etc->kwarg;
1832 }
1833
1834 return _Py_arguments(posonlyargs, posargs, vararg, kwonlyargs, kwdefaults, kwarg,
1835 posdefaults, p->arena);
1836}
1837
1838/* Constructs an empty arguments_ty object, that gets used when a function accepts no
1839 * arguments. */
1840arguments_ty
1841_PyPegen_empty_arguments(Parser *p)
1842{
1843 asdl_seq *posonlyargs = _Py_asdl_seq_new(0, p->arena);
1844 if (!posonlyargs) {
1845 return NULL;
1846 }
1847 asdl_seq *posargs = _Py_asdl_seq_new(0, p->arena);
1848 if (!posargs) {
1849 return NULL;
1850 }
1851 asdl_seq *posdefaults = _Py_asdl_seq_new(0, p->arena);
1852 if (!posdefaults) {
1853 return NULL;
1854 }
1855 asdl_seq *kwonlyargs = _Py_asdl_seq_new(0, p->arena);
1856 if (!kwonlyargs) {
1857 return NULL;
1858 }
1859 asdl_seq *kwdefaults = _Py_asdl_seq_new(0, p->arena);
1860 if (!kwdefaults) {
1861 return NULL;
1862 }
1863
1864 return _Py_arguments(posonlyargs, posargs, NULL, kwonlyargs, kwdefaults, NULL, kwdefaults,
1865 p->arena);
1866}
1867
1868/* Encapsulates the value of an operator_ty into an AugOperator struct */
1869AugOperator *
1870_PyPegen_augoperator(Parser *p, operator_ty kind)
1871{
1872 AugOperator *a = PyArena_Malloc(p->arena, sizeof(AugOperator));
1873 if (!a) {
1874 return NULL;
1875 }
1876 a->kind = kind;
1877 return a;
1878}
1879
1880/* Construct a FunctionDef equivalent to function_def, but with decorators */
1881stmt_ty
1882_PyPegen_function_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty function_def)
1883{
1884 assert(function_def != NULL);
1885 if (function_def->kind == AsyncFunctionDef_kind) {
1886 return _Py_AsyncFunctionDef(
1887 function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1888 function_def->v.FunctionDef.body, decorators, function_def->v.FunctionDef.returns,
1889 function_def->v.FunctionDef.type_comment, function_def->lineno,
1890 function_def->col_offset, function_def->end_lineno, function_def->end_col_offset,
1891 p->arena);
1892 }
1893
1894 return _Py_FunctionDef(function_def->v.FunctionDef.name, function_def->v.FunctionDef.args,
1895 function_def->v.FunctionDef.body, decorators,
1896 function_def->v.FunctionDef.returns,
1897 function_def->v.FunctionDef.type_comment, function_def->lineno,
1898 function_def->col_offset, function_def->end_lineno,
1899 function_def->end_col_offset, p->arena);
1900}
1901
1902/* Construct a ClassDef equivalent to class_def, but with decorators */
1903stmt_ty
1904_PyPegen_class_def_decorators(Parser *p, asdl_seq *decorators, stmt_ty class_def)
1905{
1906 assert(class_def != NULL);
1907 return _Py_ClassDef(class_def->v.ClassDef.name, class_def->v.ClassDef.bases,
1908 class_def->v.ClassDef.keywords, class_def->v.ClassDef.body, decorators,
1909 class_def->lineno, class_def->col_offset, class_def->end_lineno,
1910 class_def->end_col_offset, p->arena);
1911}
1912
1913/* Construct a KeywordOrStarred */
1914KeywordOrStarred *
1915_PyPegen_keyword_or_starred(Parser *p, void *element, int is_keyword)
1916{
1917 KeywordOrStarred *a = PyArena_Malloc(p->arena, sizeof(KeywordOrStarred));
1918 if (!a) {
1919 return NULL;
1920 }
1921 a->element = element;
1922 a->is_keyword = is_keyword;
1923 return a;
1924}
1925
1926/* Get the number of starred expressions in an asdl_seq* of KeywordOrStarred*s */
1927static int
1928_seq_number_of_starred_exprs(asdl_seq *seq)
1929{
1930 int n = 0;
1931 for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
1932 KeywordOrStarred *k = asdl_seq_GET(seq, i);
1933 if (!k->is_keyword) {
1934 n++;
1935 }
1936 }
1937 return n;
1938}
1939
1940/* Extract the starred expressions of an asdl_seq* of KeywordOrStarred*s */
1941asdl_seq *
1942_PyPegen_seq_extract_starred_exprs(Parser *p, asdl_seq *kwargs)
1943{
1944 int new_len = _seq_number_of_starred_exprs(kwargs);
1945 if (new_len == 0) {
1946 return NULL;
1947 }
1948 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1949 if (!new_seq) {
1950 return NULL;
1951 }
1952
1953 int idx = 0;
1954 for (Py_ssize_t i = 0, len = asdl_seq_LEN(kwargs); i < len; i++) {
1955 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1956 if (!k->is_keyword) {
1957 asdl_seq_SET(new_seq, idx++, k->element);
1958 }
1959 }
1960 return new_seq;
1961}
1962
1963/* Return a new asdl_seq* with only the keywords in kwargs */
1964asdl_seq *
1965_PyPegen_seq_delete_starred_exprs(Parser *p, asdl_seq *kwargs)
1966{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001967 Py_ssize_t len = asdl_seq_LEN(kwargs);
1968 Py_ssize_t new_len = len - _seq_number_of_starred_exprs(kwargs);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001969 if (new_len == 0) {
1970 return NULL;
1971 }
1972 asdl_seq *new_seq = _Py_asdl_seq_new(new_len, p->arena);
1973 if (!new_seq) {
1974 return NULL;
1975 }
1976
1977 int idx = 0;
1978 for (Py_ssize_t i = 0; i < len; i++) {
1979 KeywordOrStarred *k = asdl_seq_GET(kwargs, i);
1980 if (k->is_keyword) {
1981 asdl_seq_SET(new_seq, idx++, k->element);
1982 }
1983 }
1984 return new_seq;
1985}
1986
1987expr_ty
1988_PyPegen_concatenate_strings(Parser *p, asdl_seq *strings)
1989{
Pablo Galindoee40e4b2020-04-23 03:43:08 +01001990 Py_ssize_t len = asdl_seq_LEN(strings);
Pablo Galindoc5fc1562020-04-22 23:29:27 +01001991 assert(len > 0);
1992
1993 Token *first = asdl_seq_GET(strings, 0);
1994 Token *last = asdl_seq_GET(strings, len - 1);
1995
1996 int bytesmode = 0;
1997 PyObject *bytes_str = NULL;
1998
1999 FstringParser state;
2000 _PyPegen_FstringParser_Init(&state);
2001
2002 for (Py_ssize_t i = 0; i < len; i++) {
2003 Token *t = asdl_seq_GET(strings, i);
2004
2005 int this_bytesmode;
2006 int this_rawmode;
2007 PyObject *s;
2008 const char *fstr;
2009 Py_ssize_t fstrlen = -1;
2010
2011 char *this_str = PyBytes_AsString(t->bytes);
2012 if (!this_str) {
2013 goto error;
2014 }
2015
2016 if (_PyPegen_parsestr(p, this_str, &this_bytesmode, &this_rawmode, &s, &fstr, &fstrlen) != 0) {
2017 goto error;
2018 }
2019
2020 /* Check that we are not mixing bytes with unicode. */
2021 if (i != 0 && bytesmode != this_bytesmode) {
2022 RAISE_SYNTAX_ERROR("cannot mix bytes and nonbytes literals");
2023 Py_XDECREF(s);
2024 goto error;
2025 }
2026 bytesmode = this_bytesmode;
2027
2028 if (fstr != NULL) {
2029 assert(s == NULL && !bytesmode);
2030
2031 int result = _PyPegen_FstringParser_ConcatFstring(p, &state, &fstr, fstr + fstrlen,
2032 this_rawmode, 0, first, t, last);
2033 if (result < 0) {
2034 goto error;
2035 }
2036 }
2037 else {
2038 /* String or byte string. */
2039 assert(s != NULL && fstr == NULL);
2040 assert(bytesmode ? PyBytes_CheckExact(s) : PyUnicode_CheckExact(s));
2041
2042 if (bytesmode) {
2043 if (i == 0) {
2044 bytes_str = s;
2045 }
2046 else {
2047 PyBytes_ConcatAndDel(&bytes_str, s);
2048 if (!bytes_str) {
2049 goto error;
2050 }
2051 }
2052 }
2053 else {
2054 /* This is a regular string. Concatenate it. */
2055 if (_PyPegen_FstringParser_ConcatAndDel(&state, s) < 0) {
2056 goto error;
2057 }
2058 }
2059 }
2060 }
2061
2062 if (bytesmode) {
2063 if (PyArena_AddPyObject(p->arena, bytes_str) < 0) {
2064 goto error;
2065 }
2066 return Constant(bytes_str, NULL, first->lineno, first->col_offset, last->end_lineno,
2067 last->end_col_offset, p->arena);
2068 }
2069
2070 return _PyPegen_FstringParser_Finish(p, &state, first, last);
2071
2072error:
2073 Py_XDECREF(bytes_str);
2074 _PyPegen_FstringParser_Dealloc(&state);
2075 if (PyErr_Occurred()) {
2076 raise_decode_error(p);
2077 }
2078 return NULL;
2079}
Guido van Rossumc001c092020-04-30 12:12:19 -07002080
2081mod_ty
2082_PyPegen_make_module(Parser *p, asdl_seq *a) {
2083 asdl_seq *type_ignores = NULL;
2084 Py_ssize_t num = p->type_ignore_comments.num_items;
2085 if (num > 0) {
2086 // Turn the raw (comment, lineno) pairs into TypeIgnore objects in the arena
2087 type_ignores = _Py_asdl_seq_new(num, p->arena);
2088 if (type_ignores == NULL) {
2089 return NULL;
2090 }
2091 for (int i = 0; i < num; i++) {
2092 PyObject *tag = _PyPegen_new_type_comment(p, p->type_ignore_comments.items[i].comment);
2093 if (tag == NULL) {
2094 return NULL;
2095 }
2096 type_ignore_ty ti = TypeIgnore(p->type_ignore_comments.items[i].lineno, tag, p->arena);
2097 if (ti == NULL) {
2098 return NULL;
2099 }
2100 asdl_seq_SET(type_ignores, i, ti);
2101 }
2102 }
2103 return Module(a, type_ignores, p->arena);
2104}