Blame - Parser/pegen/parse_string.c - platform/external/python/cpython3

blob: 94241e1965e9a8e01c2c33fe91efb5bdcf1fcaa2 [file] [log] [blame]

Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1	#include <Python.h>
				2
				3	#include "../tokenizer.h"
				4	#include "pegen.h"
				5	#include "parse_string.h"
				6
				7	//// STRING HANDLING FUNCTIONS ////
				8
				9	// These functions are ported directly from Python/ast.c with some modifications
				10	// to account for the use of "Parser *p", the fact that don't have parser nodes
				11	// to pass around and the usage of some specialized APIs present only in this
				12	// file (like "_PyPegen_raise_syntax_error").
				13
				14	static int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	15	warn_invalid_escape_sequence(Parser p, unsigned char first_invalid_escape_char, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	16	{
				17	PyObject *msg =
				18	PyUnicode_FromFormat("invalid escape sequence \\%c", first_invalid_escape_char);
				19	if (msg == NULL) {
				20	return -1;
				21	}
				22	if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	23	t->lineno, NULL, NULL) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	24	if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
				25	/* Replace the DeprecationWarning exception with a SyntaxError
				26	to get a more accurate error report */
				27	PyErr_Clear();
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	28
				29	/* This is needed, in order for the SyntaxError to point to the token t,
				30	since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
				31	error location, if p->known_err_token is not set. */
				32	p->known_err_token = t;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	33	RAISE_SYNTAX_ERROR("invalid escape sequence \\%c", first_invalid_escape_char);
				34	}
				35	Py_DECREF(msg);
				36	return -1;
				37	}
				38	Py_DECREF(msg);
				39	return 0;
				40	}
				41
				42	static PyObject *
				43	decode_utf8(const char *sPtr, const char end)
				44	{
				45	const char s, t;
				46	t = s = *sPtr;
				47	while (s < end && (*s & 0x80)) {
				48	s++;
				49	}
				50	*sPtr = s;
				51	return PyUnicode_DecodeUTF8(t, s - t, NULL);
				52	}
				53
				54	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	55	decode_unicode_with_escapes(Parser parser, const char s, size_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	56	{
				57	PyObject v, u;
				58	char *buf;
				59	char *p;
				60	const char *end;
				61
				62	/* check for integer overflow */
				63	if (len > SIZE_MAX / 6) {
				64	return NULL;
				65	}
				66	/* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
				67	"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
				68	u = PyBytes_FromStringAndSize((char )NULL, len 6);
				69	if (u == NULL) {
				70	return NULL;
				71	}
				72	p = buf = PyBytes_AsString(u);
				73	end = s + len;
				74	while (s < end) {
				75	if (*s == '\\') {
				76	p++ = s++;
				77	if (s >= end \|\| *s & 0x80) {
				78	strcpy(p, "u005c");
				79	p += 5;
				80	if (s >= end) {
				81	break;
				82	}
				83	}
				84	}
				85	if (*s & 0x80) {
				86	PyObject *w;
				87	int kind;
				88	void *data;
				89	Py_ssize_t len, i;
				90	w = decode_utf8(&s, end);
				91	if (w == NULL) {
				92	Py_DECREF(u);
				93	return NULL;
				94	}
				95	kind = PyUnicode_KIND(w);
				96	data = PyUnicode_DATA(w);
				97	len = PyUnicode_GET_LENGTH(w);
				98	for (i = 0; i < len; i++) {
				99	Py_UCS4 chr = PyUnicode_READ(kind, data, i);
				100	sprintf(p, "\\U%08x", chr);
				101	p += 10;
				102	}
				103	/* Should be impossible to overflow */
				104	assert(p - buf <= PyBytes_GET_SIZE(u));
				105	Py_DECREF(w);
				106	}
				107	else {
				108	p++ = s++;
				109	}
				110	}
				111	len = p - buf;
				112	s = buf;
				113
				114	const char *first_invalid_escape;
				115	v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
				116
				117	if (v != NULL && first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	118	if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	119	/* We have not decref u before because first_invalid_escape points
				120	inside u. */
				121	Py_XDECREF(u);
				122	Py_DECREF(v);
				123	return NULL;
				124	}
				125	}
				126	Py_XDECREF(u);
				127	return v;
				128	}
				129
				130	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	131	decode_bytes_with_escapes(Parser p, const char s, Py_ssize_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	132	{
				133	const char *first_invalid_escape;
				134	PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
				135	if (result == NULL) {
				136	return NULL;
				137	}
				138
				139	if (first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	140	if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	141	Py_DECREF(result);
				142	return NULL;
				143	}
				144	}
				145	return result;
				146	}
				147
				148	/* s must include the bracketing quote characters, and r, b, u,
				149	&/or f prefixes (if any), and embedded escape sequences (if any).
				150	_PyPegen_parsestr parses it, and sets *result to decoded Python string object.
				151	If the string is an f-string, set fstr and fstrlen to the unparsed
				152	string object. Return 0 if no errors occurred. */
				153	int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	154	_PyPegen_parsestr(Parser p, int bytesmode, int rawmode, PyObject *result,
				155	const char *fstr, Py_ssize_t fstrlen, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	156	{
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	157	const char *s = PyBytes_AsString(t->bytes);
				158	if (s == NULL) {
				159	return -1;
				160	}
				161
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	162	size_t len;
				163	int quote = Py_CHARMASK(*s);
				164	int fmode = 0;
				165	*bytesmode = 0;
				166	*rawmode = 0;
				167	*result = NULL;
				168	*fstr = NULL;
				169	if (Py_ISALPHA(quote)) {
				170	while (!bytesmode \|\| !rawmode) {
				171	if (quote == 'b' \|\| quote == 'B') {
				172	quote = *++s;
				173	*bytesmode = 1;
				174	}
				175	else if (quote == 'u' \|\| quote == 'U') {
				176	quote = *++s;
				177	}
				178	else if (quote == 'r' \|\| quote == 'R') {
				179	quote = *++s;
				180	*rawmode = 1;
				181	}
				182	else if (quote == 'f' \|\| quote == 'F') {
				183	quote = *++s;
				184	fmode = 1;
				185	}
				186	else {
				187	break;
				188	}
				189	}
				190	}
				191
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	192	/* fstrings are only allowed in Python 3.6 and greater */
				193	if (fmode && p->feature_version < 6) {
				194	p->error_indicator = 1;
				195	RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
				196	return -1;
				197	}
				198
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	199	if (fmode && *bytesmode) {
				200	PyErr_BadInternalCall();
				201	return -1;
				202	}
				203	if (quote != '\'' && quote != '\"') {
				204	PyErr_BadInternalCall();
				205	return -1;
				206	}
				207	/* Skip the leading quote char. */
				208	s++;
				209	len = strlen(s);
				210	if (len > INT_MAX) {
				211	PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
				212	return -1;
				213	}
				214	if (s[--len] != quote) {
				215	/* Last quote char must match the first. */
				216	PyErr_BadInternalCall();
				217	return -1;
				218	}
				219	if (len >= 4 && s[0] == quote && s[1] == quote) {
				220	/* A triple quoted string. We've already skipped one quote at
				221	the start and one at the end of the string. Now skip the
				222	two at the start. */
				223	s += 2;
				224	len -= 2;
				225	/* And check that the last two match. */
				226	if (s[--len] != quote \|\| s[--len] != quote) {
				227	PyErr_BadInternalCall();
				228	return -1;
				229	}
				230	}
				231
				232	if (fmode) {
				233	/* Just return the bytes. The caller will parse the resulting
				234	string. */
				235	*fstr = s;
				236	*fstrlen = len;
				237	return 0;
				238	}
				239
				240	/* Not an f-string. */
				241	/* Avoid invoking escape decoding routines if possible. */
				242	rawmode = rawmode \|\| strchr(s, '\\') == NULL;
				243	if (*bytesmode) {
				244	/* Disallow non-ASCII characters. */
				245	const char *ch;
				246	for (ch = s; *ch; ch++) {
				247	if (Py_CHARMASK(*ch) >= 0x80) {
				248	RAISE_SYNTAX_ERROR(
				249	"bytes can only contain ASCII "
				250	"literal characters.");
				251	return -1;
				252	}
				253	}
				254	if (*rawmode) {
				255	*result = PyBytes_FromStringAndSize(s, len);
				256	}
				257	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	258	*result = decode_bytes_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	259	}
				260	}
				261	else {
				262	if (*rawmode) {
				263	*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
				264	}
				265	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	266	*result = decode_unicode_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	267	}
				268	}
				269	return *result == NULL ? -1 : 0;
				270	}
				271
				272
				273
				274	// FSTRING STUFF
				275
				276	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
				277	static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
				278
				279
				280	static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
Miss Islington (bot)	6440911	2020-06-07 18:08:53 -0700	[diff] [blame^]	281	if (n == NULL) {
				282	return;
				283	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	284	if (parent->lineno < n->lineno) {
				285	col = 0;
				286	}
				287	fstring_shift_expr_locations(n, line, col);
				288	}
				289
				290	static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
				291	if (parent->lineno < n->lineno) {
				292	col = 0;
				293	}
				294	fstring_shift_argument(parent, n, line, col);
				295	}
				296
				297	static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	298	for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	299	expr_ty expr = asdl_seq_GET(seq, i);
				300	if (expr == NULL){
				301	continue;
				302	}
				303	shift_expr(parent, expr, lineno, col_offset);
				304	}
				305	}
				306
				307	static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
				308	switch (slice->kind) {
				309	case Slice_kind:
				310	if (slice->v.Slice.lower) {
				311	shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
				312	}
				313	if (slice->v.Slice.upper) {
				314	shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
				315	}
				316	if (slice->v.Slice.step) {
				317	shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
				318	}
				319	break;
				320	case Tuple_kind:
				321	fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
				322	break;
				323	default:
				324	break;
				325	}
				326	}
				327
				328	static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
				329	shift_expr(parent, comp->target, lineno, col_offset);
				330	shift_expr(parent, comp->iter, lineno, col_offset);
				331	fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
				332	}
				333
				334	static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
				335	if (arg->annotation != NULL){
				336	shift_expr(parent, arg->annotation, lineno, col_offset);
				337	}
				338	arg->col_offset = arg->col_offset + col_offset;
				339	arg->end_col_offset = arg->end_col_offset + col_offset;
				340	arg->lineno = arg->lineno + lineno;
				341	arg->end_lineno = arg->end_lineno + lineno;
				342	}
				343
				344	static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	345	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	346	arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
				347	shift_arg(parent, arg, lineno, col_offset);
				348	}
				349
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	350	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	351	arg_ty arg = asdl_seq_GET(args->args, i);
				352	shift_arg(parent, arg, lineno, col_offset);
				353	}
				354
				355	if (args->vararg != NULL) {
				356	shift_arg(parent, args->vararg, lineno, col_offset);
				357	}
				358
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	359	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	360	arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
				361	shift_arg(parent, arg, lineno, col_offset);
				362	}
				363
				364	fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
				365
				366	if (args->kwarg != NULL) {
				367	shift_arg(parent, args->kwarg, lineno, col_offset);
				368	}
				369
				370	fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
				371	}
				372
				373	static void fstring_shift_children_locations(expr_ty n, int lineno, int col_offset) {
				374	switch (n->kind) {
				375	case BoolOp_kind:
				376	fstring_shift_seq_locations(n, n->v.BoolOp.values, lineno, col_offset);
				377	break;
				378	case NamedExpr_kind:
				379	shift_expr(n, n->v.NamedExpr.target, lineno, col_offset);
				380	shift_expr(n, n->v.NamedExpr.value, lineno, col_offset);
				381	break;
				382	case BinOp_kind:
				383	shift_expr(n, n->v.BinOp.left, lineno, col_offset);
				384	shift_expr(n, n->v.BinOp.right, lineno, col_offset);
				385	break;
				386	case UnaryOp_kind:
				387	shift_expr(n, n->v.UnaryOp.operand, lineno, col_offset);
				388	break;
				389	case Lambda_kind:
				390	fstring_shift_arguments(n, n->v.Lambda.args, lineno, col_offset);
				391	shift_expr(n, n->v.Lambda.body, lineno, col_offset);
				392	break;
				393	case IfExp_kind:
				394	shift_expr(n, n->v.IfExp.test, lineno, col_offset);
				395	shift_expr(n, n->v.IfExp.body, lineno, col_offset);
				396	shift_expr(n, n->v.IfExp.orelse, lineno, col_offset);
				397	break;
				398	case Dict_kind:
				399	fstring_shift_seq_locations(n, n->v.Dict.keys, lineno, col_offset);
				400	fstring_shift_seq_locations(n, n->v.Dict.values, lineno, col_offset);
				401	break;
				402	case Set_kind:
				403	fstring_shift_seq_locations(n, n->v.Set.elts, lineno, col_offset);
				404	break;
				405	case ListComp_kind:
				406	shift_expr(n, n->v.ListComp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	407	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.ListComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	408	comprehension_ty comp = asdl_seq_GET(n->v.ListComp.generators, i);
				409	fstring_shift_comprehension(n, comp, lineno, col_offset);
				410	}
				411	break;
				412	case SetComp_kind:
				413	shift_expr(n, n->v.SetComp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	414	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.SetComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	415	comprehension_ty comp = asdl_seq_GET(n->v.SetComp.generators, i);
				416	fstring_shift_comprehension(n, comp, lineno, col_offset);
				417	}
				418	break;
				419	case DictComp_kind:
				420	shift_expr(n, n->v.DictComp.key, lineno, col_offset);
				421	shift_expr(n, n->v.DictComp.value, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	422	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.DictComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	423	comprehension_ty comp = asdl_seq_GET(n->v.DictComp.generators, i);
				424	fstring_shift_comprehension(n, comp, lineno, col_offset);
				425	}
				426	break;
				427	case GeneratorExp_kind:
				428	shift_expr(n, n->v.GeneratorExp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	429	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.GeneratorExp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	430	comprehension_ty comp = asdl_seq_GET(n->v.GeneratorExp.generators, i);
				431	fstring_shift_comprehension(n, comp, lineno, col_offset);
				432	}
				433	break;
				434	case Await_kind:
				435	shift_expr(n, n->v.Await.value, lineno, col_offset);
				436	break;
				437	case Yield_kind:
				438	shift_expr(n, n->v.Yield.value, lineno, col_offset);
				439	break;
				440	case YieldFrom_kind:
				441	shift_expr(n, n->v.YieldFrom.value, lineno, col_offset);
				442	break;
				443	case Compare_kind:
				444	shift_expr(n, n->v.Compare.left, lineno, col_offset);
				445	fstring_shift_seq_locations(n, n->v.Compare.comparators, lineno, col_offset);
				446	break;
				447	case Call_kind:
				448	shift_expr(n, n->v.Call.func, lineno, col_offset);
				449	fstring_shift_seq_locations(n, n->v.Call.args, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	450	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.Call.keywords); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	451	keyword_ty keyword = asdl_seq_GET(n->v.Call.keywords, i);
				452	shift_expr(n, keyword->value, lineno, col_offset);
				453	}
				454	break;
				455	case Attribute_kind:
				456	shift_expr(n, n->v.Attribute.value, lineno, col_offset);
				457	break;
				458	case Subscript_kind:
				459	shift_expr(n, n->v.Subscript.value, lineno, col_offset);
				460	fstring_shift_slice_locations(n, n->v.Subscript.slice, lineno, col_offset);
				461	shift_expr(n, n->v.Subscript.slice, lineno, col_offset);
				462	break;
				463	case Starred_kind:
				464	shift_expr(n, n->v.Starred.value, lineno, col_offset);
				465	break;
				466	case List_kind:
				467	fstring_shift_seq_locations(n, n->v.List.elts, lineno, col_offset);
				468	break;
				469	case Tuple_kind:
				470	fstring_shift_seq_locations(n, n->v.Tuple.elts, lineno, col_offset);
				471	break;
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	472	case JoinedStr_kind:
				473	fstring_shift_seq_locations(n, n->v.JoinedStr.values, lineno, col_offset);
				474	break;
				475	case FormattedValue_kind:
				476	shift_expr(n, n->v.FormattedValue.value, lineno, col_offset);
				477	if (n->v.FormattedValue.format_spec) {
				478	shift_expr(n, n->v.FormattedValue.format_spec, lineno, col_offset);
				479	}
				480	break;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	481	default:
				482	return;
				483	}
				484	}
				485
				486	/* Shift locations for the given node and all its children by adding `lineno`
				487	and `col_offset` to existing locations. Note that n is the already parsed
				488	expression. */
				489	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
				490	{
				491	n->col_offset = n->col_offset + col_offset;
				492
				493	// The following is needed, in order for nodes spanning across multiple lines
				494	// to be shifted correctly. An example of such a node is a Call node, the closing
				495	// parenthesis of which is not on the same line as its name.
				496	if (n->lineno == n->end_lineno) {
				497	n->end_col_offset = n->end_col_offset + col_offset;
				498	}
				499
				500	fstring_shift_children_locations(n, lineno, col_offset);
				501	n->lineno = n->lineno + lineno;
				502	n->end_lineno = n->end_lineno + lineno;
				503	}
				504
				505	/* Fix locations for the given node and its children.
				506
				507	`parent` is the enclosing node.
				508	`n` is the node which locations are going to be fixed relative to parent.
				509	`expr_str` is the child node's string representation, including braces.
				510	*/
				511	static void
				512	fstring_fix_expr_location(Token parent, expr_ty n, char expr_str)
				513	{
				514	char *substr = NULL;
				515	char *start;
				516	int lines = 0;
				517	int cols = 0;
				518
				519	if (parent && parent->bytes) {
				520	char *parent_str = PyBytes_AsString(parent->bytes);
				521	if (!parent_str) {
				522	return;
				523	}
				524	substr = strstr(parent_str, expr_str);
				525	if (substr) {
				526	// The following is needed, in order to correctly shift the column
				527	// offset, in the case that (disregarding any whitespace) a newline
				528	// immediately follows the opening curly brace of the fstring expression.
				529	int newline_after_brace = 1;
				530	start = substr + 1;
				531	while (start && start != '}' && start != '\n') {
				532	if (start != ' ' && start != '\t' && *start != '\f') {
				533	newline_after_brace = 0;
				534	break;
				535	}
				536	start++;
				537	}
				538
				539	// Account for the characters from the last newline character to our
				540	// left until the beginning of substr.
				541	if (!newline_after_brace) {
				542	start = substr;
				543	while (start > parent_str && *start != '\n') {
				544	start--;
				545	}
				546	cols += (int)(substr - start);
				547	}
				548	/* adjust the start based on the number of newlines encountered
				549	before the f-string expression */
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	550	for (char* p = parent_str; p < substr; p++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	551	if (*p == '\n') {
				552	lines++;
				553	}
				554	}
				555	}
				556	}
				557	fstring_shift_expr_locations(n, lines, cols);
				558	}
				559
				560
				561	/* Compile this expression in to an expr_ty. Add parens around the
				562	expression, in order to allow leading spaces in the expression. */
				563	static expr_ty
				564	fstring_compile_expr(Parser p, const char expr_start, const char *expr_end,
				565	Token *t)
				566	{
				567	expr_ty expr = NULL;
				568	char *str;
				569	Py_ssize_t len;
				570	const char *s;
				571	expr_ty result = NULL;
				572
				573	assert(expr_end >= expr_start);
				574	assert(*(expr_start-1) == '{');
				575	assert(expr_end == '}' \|\| expr_end == '!' \|\| *expr_end == ':' \|\|
				576	*expr_end == '=');
				577
				578	/* If the substring is all whitespace, it's an error. We need to catch this
				579	here, and not when we call PyParser_SimpleParseStringFlagsFilename,
				580	because turning the expression '' in to '()' would go from being invalid
				581	to valid. */
				582	for (s = expr_start; s != expr_end; s++) {
				583	char c = *s;
				584	/* The Python parser ignores only the following whitespace
				585	characters (\r already is converted to \n). */
				586	if (!(c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\f')) {
				587	break;
				588	}
				589	}
				590	if (s == expr_end) {
				591	RAISE_SYNTAX_ERROR("f-string: empty expression not allowed");
				592	return NULL;
				593	}
				594
				595	len = expr_end - expr_start;
				596	/* Allocate 3 extra bytes: open paren, close paren, null byte. */
				597	str = PyMem_RawMalloc(len + 3);
				598	if (str == NULL) {
				599	PyErr_NoMemory();
				600	return NULL;
				601	}
				602
				603	str[0] = '(';
				604	memcpy(str+1, expr_start, len);
				605	str[len+1] = ')';
				606	str[len+2] = 0;
				607
				608	struct tok_state* tok = PyTokenizer_FromString(str, 1);
				609	if (tok == NULL) {
Miss Islington (bot)	79e6c15	2020-06-05 17:10:57 -0700	[diff] [blame]	610	PyMem_RawFree(str);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	611	return NULL;
				612	}
Lysandros Nikolaou	791a46e	2020-05-26 04:24:31 +0300	[diff] [blame]	613	Py_INCREF(p->tok->filename);
				614	tok->filename = p->tok->filename;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	615
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	616	Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
				617	NULL, p->arena);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	618	p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
				619	p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
				620	? p->starting_col_offset + t->col_offset : 0;
				621
				622	expr = _PyPegen_run_parser(p2);
				623
				624	if (expr == NULL) {
				625	goto exit;
				626	}
				627
				628	/* Reuse str to find the correct column offset. */
				629	str[0] = '{';
				630	str[len+1] = '}';
				631	fstring_fix_expr_location(t, expr, str);
				632
				633	result = expr;
				634
				635	exit:
Miss Islington (bot)	79e6c15	2020-06-05 17:10:57 -0700	[diff] [blame]	636	PyMem_RawFree(str);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	637	_PyPegen_Parser_Free(p2);
				638	PyTokenizer_Free(tok);
				639	return result;
				640	}
				641
				642	/* Return -1 on error.
				643
				644	Return 0 if we reached the end of the literal.
				645
				646	Return 1 if we haven't reached the end of the literal, but we want
				647	the caller to process the literal up to this point. Used for
				648	doubled braces.
				649	*/
				650	static int
				651	fstring_find_literal(Parser p, const char str, const char end, int raw,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	652	PyObject *literal, int recurse_lvl, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	653	{
				654	/* Get any literal string. It ends when we hit an un-doubled left
				655	brace (which isn't part of a unicode name escape such as
				656	"\N{EULER CONSTANT}"), or the end of the string. */
				657
				658	const char s = str;
				659	const char *literal_start = s;
				660	int result = 0;
				661
				662	assert(*literal == NULL);
				663	while (s < end) {
				664	char ch = *s++;
				665	if (!raw && ch == '\\' && s < end) {
				666	ch = *s++;
				667	if (ch == 'N') {
				668	if (s < end && *s++ == '{') {
				669	while (s < end && *s++ != '}') {
				670	}
				671	continue;
				672	}
				673	break;
				674	}
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	675	if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	676	return -1;
				677	}
				678	}
				679	if (ch == '{' \|\| ch == '}') {
				680	/* Check for doubled braces, but only at the top level. If
				681	we checked at every level, then f'{0:{3}}' would fail
				682	with the two closing braces. */
				683	if (recurse_lvl == 0) {
				684	if (s < end && *s == ch) {
				685	/* We're going to tell the caller that the literal ends
				686	here, but that they should continue scanning. But also
				687	skip over the second brace when we resume scanning. */
				688	*str = s + 1;
				689	result = 1;
				690	goto done;
				691	}
				692
				693	/* Where a single '{' is the start of a new expression, a
				694	single '}' is not allowed. */
				695	if (ch == '}') {
				696	*str = s - 1;
				697	RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed");
				698	return -1;
				699	}
				700	}
				701	/* We're either at a '{', which means we're starting another
				702	expression; or a '}', which means we're at the end of this
				703	f-string (for a nested format_spec). */
				704	s--;
				705	break;
				706	}
				707	}
				708	*str = s;
				709	assert(s <= end);
				710	assert(s == end \|\| s == '{' \|\| s == '}');
				711	done:
				712	if (literal_start != s) {
				713	if (raw)
				714	*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
				715	s - literal_start,
				716	NULL, NULL);
				717	else
				718	*literal = decode_unicode_with_escapes(p, literal_start,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	719	s - literal_start, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	720	if (!*literal)
				721	return -1;
				722	}
				723	return result;
				724	}
				725
				726	/* Forward declaration because parsing is recursive. */
				727	static expr_ty
				728	fstring_parse(Parser p, const char str, const char end, int raw, int recurse_lvl,
				729	Token first_token, Token t, Token *last_token);
				730
				731	/* Parse the f-string at str, ending at end. We know str starts an
				732	expression (so it must be a '{'). Returns the FormattedValue node, which
				733	includes the expression, conversion character, format_spec expression, and
				734	optionally the text of the expression (if = is used).
				735
				736	Note that I don't do a perfect job here: I don't make sure that a
				737	closing brace doesn't match an opening paren, for example. It
				738	doesn't need to error on all invalid expressions, just correctly
				739	find the end of all valid ones. Any errors inside the expression
				740	will be caught when we parse it later.
				741
				742	*expression is set to the expression. For an '=' "debug" expression,
				743	*expr_text is set to the debug text (the original text of the expression,
				744	including the '=' and any whitespace around it, as a string object). If
				745	not a debug expression, expr_text set to NULL. /
				746	static int
				747	fstring_find_expr(Parser p, const char str, const char end, int raw, int recurse_lvl,
				748	PyObject *expr_text, expr_ty expression, Token *first_token,
				749	Token t, Token last_token)
				750	{
				751	/* Return -1 on error, else 0. */
				752
				753	const char *expr_start;
				754	const char *expr_end;
				755	expr_ty simple_expression;
				756	expr_ty format_spec = NULL; /* Optional format specifier. */
				757	int conversion = -1; /* The conversion char. Use default if not
				758	specified, or !r if using = and no format
				759	spec. */
				760
				761	/* 0 if we're not in a string, else the quote char we're trying to
				762	match (single or double quote). */
				763	char quote_char = 0;
				764
				765	/* If we're inside a string, 1=normal, 3=triple-quoted. */
				766	int string_type = 0;
				767
				768	/* Keep track of nesting level for braces/parens/brackets in
				769	expressions. */
				770	Py_ssize_t nested_depth = 0;
				771	char parenstack[MAXLEVEL];
				772
				773	*expr_text = NULL;
				774
				775	/* Can only nest one level deep. */
				776	if (recurse_lvl >= 2) {
				777	RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply");
				778	goto error;
				779	}
				780
				781	/* The first char must be a left brace, or we wouldn't have gotten
				782	here. Skip over it. */
				783	assert(**str == '{');
				784	*str += 1;
				785
				786	expr_start = *str;
				787	for (; str < end; (str)++) {
				788	char ch;
				789
				790	/* Loop invariants. */
				791	assert(nested_depth >= 0);
				792	assert(str >= expr_start && str < end);
				793	if (quote_char)
				794	assert(string_type == 1 \|\| string_type == 3);
				795	else
				796	assert(string_type == 0);
				797
				798	ch = **str;
				799	/* Nowhere inside an expression is a backslash allowed. */
				800	if (ch == '\\') {
				801	/* Error: can't include a backslash character, inside
				802	parens or strings or not. */
				803	RAISE_SYNTAX_ERROR(
				804	"f-string expression part "
				805	"cannot include a backslash");
				806	goto error;
				807	}
				808	if (quote_char) {
				809	/* We're inside a string. See if we're at the end. */
				810	/* This code needs to implement the same non-error logic
				811	as tok_get from tokenizer.c, at the letter_quote
				812	label. To actually share that code would be a
				813	nightmare. But, it's unlikely to change and is small,
				814	so duplicate it here. Note we don't need to catch all
				815	of the errors, since they'll be caught when parsing the
				816	expression. We just need to match the non-error
				817	cases. Thus we can ignore \n in single-quoted strings,
				818	for example. Or non-terminated strings. */
				819	if (ch == quote_char) {
				820	/* Does this match the string_type (single or triple
				821	quoted)? */
				822	if (string_type == 3) {
				823	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				824	/* We're at the end of a triple quoted string. */
				825	*str += 2;
				826	string_type = 0;
				827	quote_char = 0;
				828	continue;
				829	}
				830	} else {
				831	/* We're at the end of a normal string. */
				832	quote_char = 0;
				833	string_type = 0;
				834	continue;
				835	}
				836	}
				837	} else if (ch == '\'' \|\| ch == '"') {
				838	/* Is this a triple quoted string? */
				839	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				840	string_type = 3;
				841	*str += 2;
				842	} else {
				843	/* Start of a normal string. */
				844	string_type = 1;
				845	}
				846	/* Start looking for the end of the string. */
				847	quote_char = ch;
				848	} else if (ch == '[' \|\| ch == '{' \|\| ch == '(') {
				849	if (nested_depth >= MAXLEVEL) {
				850	RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis");
				851	goto error;
				852	}
				853	parenstack[nested_depth] = ch;
				854	nested_depth++;
				855	} else if (ch == '#') {
				856	/* Error: can't include a comment character, inside parens
				857	or not. */
				858	RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'");
				859	goto error;
				860	} else if (nested_depth == 0 &&
				861	(ch == '!' \|\| ch == ':' \|\| ch == '}' \|\|
				862	ch == '=' \|\| ch == '>' \|\| ch == '<')) {
				863	/* See if there's a next character. */
				864	if (*str+1 < end) {
				865	char next = (str+1);
				866
				867	/* For "!=". since '=' is not an allowed conversion character,
				868	nothing is lost in this test. */
				869	if ((ch == '!' && next == '=') \|\| /* != */
				870	(ch == '=' && next == '=') \|\| /* == */
				871	(ch == '<' && next == '=') \|\| /* <= */
				872	(ch == '>' && next == '=') /* >= */
				873	) {
				874	*str += 1;
				875	continue;
				876	}
				877	/* Don't get out of the loop for these, if they're single
				878	chars (not part of 2-char tokens). If by themselves, they
				879	don't end an expression (unlike say '!'). */
				880	if (ch == '>' \|\| ch == '<') {
				881	continue;
				882	}
				883	}
				884
				885	/* Normal way out of this loop. */
				886	break;
				887	} else if (ch == ']' \|\| ch == '}' \|\| ch == ')') {
				888	if (!nested_depth) {
				889	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch);
				890	goto error;
				891	}
				892	nested_depth--;
				893	int opening = parenstack[nested_depth];
				894	if (!((opening == '(' && ch == ')') \|\|
				895	(opening == '[' && ch == ']') \|\|
				896	(opening == '{' && ch == '}')))
				897	{
				898	RAISE_SYNTAX_ERROR(
				899	"f-string: closing parenthesis '%c' "
				900	"does not match opening parenthesis '%c'",
				901	ch, opening);
				902	goto error;
				903	}
				904	} else {
				905	/* Just consume this char and loop around. */
				906	}
				907	}
				908	expr_end = *str;
				909	/* If we leave this loop in a string or with mismatched parens, we
				910	don't care. We'll get a syntax error when compiling the
				911	expression. But, we can produce a better error message, so
				912	let's just do that.*/
				913	if (quote_char) {
				914	RAISE_SYNTAX_ERROR("f-string: unterminated string");
				915	goto error;
				916	}
				917	if (nested_depth) {
				918	int opening = parenstack[nested_depth - 1];
				919	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening);
				920	goto error;
				921	}
				922
				923	if (*str >= end)
				924	goto unexpected_end_of_string;
				925
				926	/* Compile the expression as soon as possible, so we show errors
				927	related to the expression before errors related to the
				928	conversion or format_spec. */
				929	simple_expression = fstring_compile_expr(p, expr_start, expr_end, t);
				930	if (!simple_expression)
				931	goto error;
				932
				933	/* Check for =, which puts the text value of the expression in
				934	expr_text. */
				935	if (**str == '=') {
Pablo Galindo	9b83829	2020-05-27 22:01:11 +0100	[diff] [blame]	936	if (p->feature_version < 8) {
				937	RAISE_SYNTAX_ERROR("f-string: self documenting expressions are "
				938	"only supported in Python 3.8 and greater");
				939	goto error;
				940	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	941	*str += 1;
				942
				943	/* Skip over ASCII whitespace. No need to test for end of string
				944	here, since we know there's at least a trailing quote somewhere
				945	ahead. */
				946	while (Py_ISSPACE(**str)) {
				947	*str += 1;
				948	}
				949
				950	/* Set expr_text to the text of the expression. /
				951	expr_text = PyUnicode_FromStringAndSize(expr_start, str-expr_start);
				952	if (!*expr_text) {
				953	goto error;
				954	}
				955	}
				956
				957	/* Check for a conversion char, if present. */
				958	if (**str == '!') {
				959	*str += 1;
				960	if (*str >= end)
				961	goto unexpected_end_of_string;
				962
				963	conversion = **str;
				964	*str += 1;
				965
				966	/* Validate the conversion. */
				967	if (!(conversion == 's' \|\| conversion == 'r' \|\| conversion == 'a')) {
				968	RAISE_SYNTAX_ERROR(
				969	"f-string: invalid conversion character: "
				970	"expected 's', 'r', or 'a'");
				971	goto error;
				972	}
				973
				974	}
				975
				976	/* Check for the format spec, if present. */
				977	if (*str >= end)
				978	goto unexpected_end_of_string;
				979	if (**str == ':') {
				980	*str += 1;
				981	if (*str >= end)
				982	goto unexpected_end_of_string;
				983
				984	/* Parse the format spec. */
				985	format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1,
				986	first_token, t, last_token);
				987	if (!format_spec)
				988	goto error;
				989	}
				990
				991	if (str >= end \|\| *str != '}')
				992	goto unexpected_end_of_string;
				993
				994	/* We're at a right brace. Consume it. */
				995	assert(*str < end);
				996	assert(**str == '}');
				997	*str += 1;
				998
				999	/* If we're in = mode (detected by non-NULL expr_text), and have no format
				1000	spec and no explicit conversion, set the conversion to 'r'. */
				1001	if (*expr_text && format_spec == NULL && conversion == -1) {
				1002	conversion = 'r';
				1003	}
				1004
				1005	/* And now create the FormattedValue node that represents this
				1006	entire expression with the conversion and format spec. */
				1007	//TODO: Fix this
				1008	*expression = FormattedValue(simple_expression, conversion,
				1009	format_spec, first_token->lineno,
				1010	first_token->col_offset, last_token->end_lineno,
				1011	last_token->end_col_offset, p->arena);
				1012	if (!*expression)
				1013	goto error;
				1014
				1015	return 0;
				1016
				1017	unexpected_end_of_string:
				1018	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1019	/* Falls through to error. */
				1020
				1021	error:
				1022	Py_XDECREF(*expr_text);
				1023	return -1;
				1024
				1025	}
				1026
				1027	/* Return -1 on error.
				1028
				1029	Return 0 if we have a literal (possible zero length) and an
				1030	expression (zero length if at the end of the string.
				1031
				1032	Return 1 if we have a literal, but no expression, and we want the
				1033	caller to call us again. This is used to deal with doubled
				1034	braces.
				1035
				1036	When called multiple times on the string 'a{{b{0}c', this function
				1037	will return:
				1038
				1039	1. the literal 'a{' with no expression, and a return value
				1040	of 1. Despite the fact that there's no expression, the return
				1041	value of 1 means we're not finished yet.
				1042
				1043	2. the literal 'b' and the expression '0', with a return value of
				1044	0. The fact that there's an expression means we're not finished.
				1045
				1046	3. literal 'c' with no expression and a return value of 0. The
				1047	combination of the return value of 0 with no expression means
				1048	we're finished.
				1049	*/
				1050	static int
				1051	fstring_find_literal_and_expr(Parser p, const char str, const char end, int raw,
				1052	int recurse_lvl, PyObject **literal,
				1053	PyObject *expr_text, expr_ty expression,
				1054	Token first_token, Token t, Token *last_token)
				1055	{
				1056	int result;
				1057
				1058	assert(literal == NULL && expression == NULL);
				1059
				1060	/* Get any literal string. */
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	1061	result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1062	if (result < 0)
				1063	goto error;
				1064
				1065	assert(result == 0 \|\| result == 1);
				1066
				1067	if (result == 1)
				1068	/* We have a literal, but don't look at the expression. */
				1069	return 1;
				1070
				1071	if (str >= end \|\| *str == '}')
				1072	/* We're at the end of the string or the end of a nested
				1073	f-string: no expression. The top-level error case where we
				1074	expect to be at the end of the string but we're at a '}' is
				1075	handled later. */
				1076	return 0;
				1077
				1078	/* We must now be the start of an expression, on a '{'. */
				1079	assert(**str == '{');
				1080
				1081	if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text,
				1082	expression, first_token, t, last_token) < 0)
				1083	goto error;
				1084
				1085	return 0;
				1086
				1087	error:
				1088	Py_CLEAR(*literal);
				1089	return -1;
				1090	}
				1091
				1092	#ifdef NDEBUG
				1093	#define ExprList_check_invariants(l)
				1094	#else
				1095	static void
				1096	ExprList_check_invariants(ExprList *l)
				1097	{
				1098	/* Check our invariants. Make sure this object is "live", and
				1099	hasn't been deallocated. */
				1100	assert(l->size >= 0);
				1101	assert(l->p != NULL);
				1102	if (l->size <= EXPRLIST_N_CACHED)
				1103	assert(l->data == l->p);
				1104	}
				1105	#endif
				1106
				1107	static void
				1108	ExprList_Init(ExprList *l)
				1109	{
				1110	l->allocated = EXPRLIST_N_CACHED;
				1111	l->size = 0;
				1112
				1113	/* Until we start allocating dynamically, p points to data. */
				1114	l->p = l->data;
				1115
				1116	ExprList_check_invariants(l);
				1117	}
				1118
				1119	static int
				1120	ExprList_Append(ExprList *l, expr_ty exp)
				1121	{
				1122	ExprList_check_invariants(l);
				1123	if (l->size >= l->allocated) {
				1124	/* We need to alloc (or realloc) the memory. */
				1125	Py_ssize_t new_size = l->allocated * 2;
				1126
				1127	/* See if we've ever allocated anything dynamically. */
				1128	if (l->p == l->data) {
				1129	Py_ssize_t i;
				1130	/* We're still using the cached data. Switch to
				1131	alloc-ing. */
				1132	l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
				1133	if (!l->p)
				1134	return -1;
				1135	/* Copy the cached data into the new buffer. */
				1136	for (i = 0; i < l->size; i++)
				1137	l->p[i] = l->data[i];
				1138	} else {
				1139	/* Just realloc. */
				1140	expr_ty tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) new_size);
				1141	if (!tmp) {
				1142	PyMem_RawFree(l->p);
				1143	l->p = NULL;
				1144	return -1;
				1145	}
				1146	l->p = tmp;
				1147	}
				1148
				1149	l->allocated = new_size;
				1150	assert(l->allocated == 2 * l->size);
				1151	}
				1152
				1153	l->p[l->size++] = exp;
				1154
				1155	ExprList_check_invariants(l);
				1156	return 0;
				1157	}
				1158
				1159	static void
				1160	ExprList_Dealloc(ExprList *l)
				1161	{
				1162	ExprList_check_invariants(l);
				1163
				1164	/* If there's been an error, or we've never dynamically allocated,
				1165	do nothing. */
				1166	if (!l->p \|\| l->p == l->data) {
				1167	/* Do nothing. */
				1168	} else {
				1169	/* We have dynamically allocated. Free the memory. */
				1170	PyMem_RawFree(l->p);
				1171	}
				1172	l->p = NULL;
				1173	l->size = -1;
				1174	}
				1175
				1176	static asdl_seq *
				1177	ExprList_Finish(ExprList l, PyArena arena)
				1178	{
				1179	asdl_seq *seq;
				1180
				1181	ExprList_check_invariants(l);
				1182
				1183	/* Allocate the asdl_seq and copy the expressions in to it. */
				1184	seq = _Py_asdl_seq_new(l->size, arena);
				1185	if (seq) {
				1186	Py_ssize_t i;
				1187	for (i = 0; i < l->size; i++)
				1188	asdl_seq_SET(seq, i, l->p[i]);
				1189	}
				1190	ExprList_Dealloc(l);
				1191	return seq;
				1192	}
				1193
				1194	#ifdef NDEBUG
				1195	#define FstringParser_check_invariants(state)
				1196	#else
				1197	static void
				1198	FstringParser_check_invariants(FstringParser *state)
				1199	{
				1200	if (state->last_str)
				1201	assert(PyUnicode_CheckExact(state->last_str));
				1202	ExprList_check_invariants(&state->expr_list);
				1203	}
				1204	#endif
				1205
				1206	void
				1207	_PyPegen_FstringParser_Init(FstringParser *state)
				1208	{
				1209	state->last_str = NULL;
				1210	state->fmode = 0;
				1211	ExprList_Init(&state->expr_list);
				1212	FstringParser_check_invariants(state);
				1213	}
				1214
				1215	void
				1216	_PyPegen_FstringParser_Dealloc(FstringParser *state)
				1217	{
				1218	FstringParser_check_invariants(state);
				1219
				1220	Py_XDECREF(state->last_str);
				1221	ExprList_Dealloc(&state->expr_list);
				1222	}
				1223
				1224	/* Make a Constant node, but decref the PyUnicode object being added. */
				1225	static expr_ty
				1226	make_str_node_and_del(Parser p, PyObject str, Token first_token, Token *last_token)
				1227	{
				1228	PyObject s = str;
				1229	PyObject *kind = NULL;
				1230	*str = NULL;
				1231	assert(PyUnicode_CheckExact(s));
				1232	if (PyArena_AddPyObject(p->arena, s) < 0) {
				1233	Py_DECREF(s);
				1234	return NULL;
				1235	}
				1236	const char* the_str = PyBytes_AsString(first_token->bytes);
				1237	if (the_str && the_str[0] == 'u') {
				1238	kind = _PyPegen_new_identifier(p, "u");
				1239	}
				1240
				1241	if (kind == NULL && PyErr_Occurred()) {
				1242	return NULL;
				1243	}
				1244
				1245	return Constant(s, kind, first_token->lineno, first_token->col_offset,
				1246	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1247
				1248	}
				1249
				1250
				1251	/* Add a non-f-string (that is, a regular literal string). str is
				1252	decref'd. */
				1253	int
				1254	_PyPegen_FstringParser_ConcatAndDel(FstringParser state, PyObject str)
				1255	{
				1256	FstringParser_check_invariants(state);
				1257
				1258	assert(PyUnicode_CheckExact(str));
				1259
				1260	if (PyUnicode_GET_LENGTH(str) == 0) {
				1261	Py_DECREF(str);
				1262	return 0;
				1263	}
				1264
				1265	if (!state->last_str) {
				1266	/* We didn't have a string before, so just remember this one. */
				1267	state->last_str = str;
				1268	} else {
				1269	/* Concatenate this with the previous string. */
				1270	PyUnicode_AppendAndDel(&state->last_str, str);
				1271	if (!state->last_str)
				1272	return -1;
				1273	}
				1274	FstringParser_check_invariants(state);
				1275	return 0;
				1276	}
				1277
				1278	/* Parse an f-string. The f-string is in *str to end, with no
				1279	'f' or quotes. */
				1280	int
				1281	_PyPegen_FstringParser_ConcatFstring(Parser p, FstringParser state, const char **str,
				1282	const char *end, int raw, int recurse_lvl,
				1283	Token first_token, Token t, Token *last_token)
				1284	{
				1285	FstringParser_check_invariants(state);
				1286	state->fmode = 1;
				1287
				1288	/* Parse the f-string. */
				1289	while (1) {
				1290	PyObject *literal = NULL;
				1291	PyObject *expr_text = NULL;
				1292	expr_ty expression = NULL;
				1293
				1294	/* If there's a zero length literal in front of the
				1295	expression, literal will be NULL. If we're at the end of
				1296	the f-string, expression will be NULL (unless result == 1,
				1297	see below). */
				1298	int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl,
				1299	&literal, &expr_text,
				1300	&expression, first_token, t, last_token);
				1301	if (result < 0)
				1302	return -1;
				1303
				1304	/* Add the literal, if any. */
				1305	if (literal && _PyPegen_FstringParser_ConcatAndDel(state, literal) < 0) {
				1306	Py_XDECREF(expr_text);
				1307	return -1;
				1308	}
				1309	/* Add the expr_text, if any. */
				1310	if (expr_text && _PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 0) {
				1311	return -1;
				1312	}
				1313
				1314	/* We've dealt with the literal and expr_text, their ownership has
				1315	been transferred to the state object. Don't look at them again. */
				1316
				1317	/* See if we should just loop around to get the next literal
				1318	and expression, while ignoring the expression this
				1319	time. This is used for un-doubling braces, as an
				1320	optimization. */
				1321	if (result == 1)
				1322	continue;
				1323
				1324	if (!expression)
				1325	/* We're done with this f-string. */
				1326	break;
				1327
				1328	/* We know we have an expression. Convert any existing string
				1329	to a Constant node. */
				1330	if (!state->last_str) {
				1331	/* Do nothing. No previous literal. */
				1332	} else {
				1333	/* Convert the existing last_str literal to a Constant node. */
				1334	expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1335	if (!str \|\| ExprList_Append(&state->expr_list, str) < 0)
				1336	return -1;
				1337	}
				1338
				1339	if (ExprList_Append(&state->expr_list, expression) < 0)
				1340	return -1;
				1341	}
				1342
				1343	/* If recurse_lvl is zero, then we must be at the end of the
				1344	string. Otherwise, we must be at a right brace. */
				1345
				1346	if (recurse_lvl == 0 && *str < end-1) {
				1347	RAISE_SYNTAX_ERROR("f-string: unexpected end of string");
				1348	return -1;
				1349	}
				1350	if (recurse_lvl != 0 && **str != '}') {
				1351	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1352	return -1;
				1353	}
				1354
				1355	FstringParser_check_invariants(state);
				1356	return 0;
				1357	}
				1358
				1359	/* Convert the partial state reflected in last_str and expr_list to an
				1360	expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
				1361	expr_ty
				1362	_PyPegen_FstringParser_Finish(Parser p, FstringParser state, Token* first_token,
				1363	Token *last_token)
				1364	{
				1365	asdl_seq *seq;
				1366
				1367	FstringParser_check_invariants(state);
				1368
				1369	/* If we're just a constant string with no expressions, return
				1370	that. */
				1371	if (!state->fmode) {
				1372	assert(!state->expr_list.size);
				1373	if (!state->last_str) {
				1374	/* Create a zero length string. */
				1375	state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
				1376	if (!state->last_str)
				1377	goto error;
				1378	}
				1379	return make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1380	}
				1381
				1382	/* Create a Constant node out of last_str, if needed. It will be the
				1383	last node in our expression list. */
				1384	if (state->last_str) {
				1385	expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1386	if (!str \|\| ExprList_Append(&state->expr_list, str) < 0)
				1387	goto error;
				1388	}
				1389	/* This has already been freed. */
				1390	assert(state->last_str == NULL);
				1391
				1392	seq = ExprList_Finish(&state->expr_list, p->arena);
				1393	if (!seq)
				1394	goto error;
				1395
				1396	return _Py_JoinedStr(seq, first_token->lineno, first_token->col_offset,
				1397	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1398
				1399	error:
				1400	_PyPegen_FstringParser_Dealloc(state);
				1401	return NULL;
				1402	}
				1403
				1404	/* Given an f-string (with no 'f' or quotes) that's in *str and ends
				1405	at end, parse it into an expr_ty. Return NULL on error. Adjust
				1406	str to point past the parsed portion. */
				1407	static expr_ty
				1408	fstring_parse(Parser p, const char str, const char end, int raw,
				1409	int recurse_lvl, Token first_token, Token t, Token *last_token)
				1410	{
				1411	FstringParser state;
				1412
				1413	_PyPegen_FstringParser_Init(&state);
				1414	if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl,
				1415	first_token, t, last_token) < 0) {
				1416	_PyPegen_FstringParser_Dealloc(&state);
				1417	return NULL;
				1418	}
				1419
				1420	return _PyPegen_FstringParser_Finish(p, &state, t, t);
				1421	}