Blame - Parser/pegen/parse_string.c - platform/external/python/cpython3

blob: ca4b733c153b57b427cb73b2d3756725942d2dde [file] [log] [blame]

Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1	#include <Python.h>
				2
				3	#include "../tokenizer.h"
				4	#include "pegen.h"
				5	#include "parse_string.h"
				6
				7	//// STRING HANDLING FUNCTIONS ////
				8
				9	// These functions are ported directly from Python/ast.c with some modifications
				10	// to account for the use of "Parser *p", the fact that don't have parser nodes
				11	// to pass around and the usage of some specialized APIs present only in this
				12	// file (like "_PyPegen_raise_syntax_error").
				13
				14	static int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	15	warn_invalid_escape_sequence(Parser p, unsigned char first_invalid_escape_char, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	16	{
				17	PyObject *msg =
				18	PyUnicode_FromFormat("invalid escape sequence \\%c", first_invalid_escape_char);
				19	if (msg == NULL) {
				20	return -1;
				21	}
				22	if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	23	t->lineno, NULL, NULL) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	24	if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
				25	/* Replace the DeprecationWarning exception with a SyntaxError
				26	to get a more accurate error report */
				27	PyErr_Clear();
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	28
				29	/* This is needed, in order for the SyntaxError to point to the token t,
				30	since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
				31	error location, if p->known_err_token is not set. */
				32	p->known_err_token = t;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	33	RAISE_SYNTAX_ERROR("invalid escape sequence \\%c", first_invalid_escape_char);
				34	}
				35	Py_DECREF(msg);
				36	return -1;
				37	}
				38	Py_DECREF(msg);
				39	return 0;
				40	}
				41
				42	static PyObject *
				43	decode_utf8(const char *sPtr, const char end)
				44	{
				45	const char s, t;
				46	t = s = *sPtr;
				47	while (s < end && (*s & 0x80)) {
				48	s++;
				49	}
				50	*sPtr = s;
				51	return PyUnicode_DecodeUTF8(t, s - t, NULL);
				52	}
				53
				54	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	55	decode_unicode_with_escapes(Parser parser, const char s, size_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	56	{
				57	PyObject v, u;
				58	char *buf;
				59	char *p;
				60	const char *end;
				61
				62	/* check for integer overflow */
				63	if (len > SIZE_MAX / 6) {
				64	return NULL;
				65	}
				66	/* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
				67	"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
				68	u = PyBytes_FromStringAndSize((char )NULL, len 6);
				69	if (u == NULL) {
				70	return NULL;
				71	}
				72	p = buf = PyBytes_AsString(u);
				73	end = s + len;
				74	while (s < end) {
				75	if (*s == '\\') {
				76	p++ = s++;
				77	if (s >= end \|\| *s & 0x80) {
				78	strcpy(p, "u005c");
				79	p += 5;
				80	if (s >= end) {
				81	break;
				82	}
				83	}
				84	}
				85	if (*s & 0x80) {
				86	PyObject *w;
				87	int kind;
				88	void *data;
				89	Py_ssize_t len, i;
				90	w = decode_utf8(&s, end);
				91	if (w == NULL) {
				92	Py_DECREF(u);
				93	return NULL;
				94	}
				95	kind = PyUnicode_KIND(w);
				96	data = PyUnicode_DATA(w);
				97	len = PyUnicode_GET_LENGTH(w);
				98	for (i = 0; i < len; i++) {
				99	Py_UCS4 chr = PyUnicode_READ(kind, data, i);
				100	sprintf(p, "\\U%08x", chr);
				101	p += 10;
				102	}
				103	/* Should be impossible to overflow */
				104	assert(p - buf <= PyBytes_GET_SIZE(u));
				105	Py_DECREF(w);
				106	}
				107	else {
				108	p++ = s++;
				109	}
				110	}
				111	len = p - buf;
				112	s = buf;
				113
				114	const char *first_invalid_escape;
				115	v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
				116
				117	if (v != NULL && first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	118	if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	119	/* We have not decref u before because first_invalid_escape points
				120	inside u. */
				121	Py_XDECREF(u);
				122	Py_DECREF(v);
				123	return NULL;
				124	}
				125	}
				126	Py_XDECREF(u);
				127	return v;
				128	}
				129
				130	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	131	decode_bytes_with_escapes(Parser p, const char s, Py_ssize_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	132	{
				133	const char *first_invalid_escape;
				134	PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
				135	if (result == NULL) {
				136	return NULL;
				137	}
				138
				139	if (first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	140	if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	141	Py_DECREF(result);
				142	return NULL;
				143	}
				144	}
				145	return result;
				146	}
				147
				148	/* s must include the bracketing quote characters, and r, b, u,
				149	&/or f prefixes (if any), and embedded escape sequences (if any).
				150	_PyPegen_parsestr parses it, and sets *result to decoded Python string object.
				151	If the string is an f-string, set fstr and fstrlen to the unparsed
				152	string object. Return 0 if no errors occurred. */
				153	int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	154	_PyPegen_parsestr(Parser p, int bytesmode, int rawmode, PyObject *result,
				155	const char *fstr, Py_ssize_t fstrlen, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	156	{
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	157	const char *s = PyBytes_AsString(t->bytes);
				158	if (s == NULL) {
				159	return -1;
				160	}
				161
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	162	size_t len;
				163	int quote = Py_CHARMASK(*s);
				164	int fmode = 0;
				165	*bytesmode = 0;
				166	*rawmode = 0;
				167	*result = NULL;
				168	*fstr = NULL;
				169	if (Py_ISALPHA(quote)) {
				170	while (!bytesmode \|\| !rawmode) {
				171	if (quote == 'b' \|\| quote == 'B') {
				172	quote = *++s;
				173	*bytesmode = 1;
				174	}
				175	else if (quote == 'u' \|\| quote == 'U') {
				176	quote = *++s;
				177	}
				178	else if (quote == 'r' \|\| quote == 'R') {
				179	quote = *++s;
				180	*rawmode = 1;
				181	}
				182	else if (quote == 'f' \|\| quote == 'F') {
				183	quote = *++s;
				184	fmode = 1;
				185	}
				186	else {
				187	break;
				188	}
				189	}
				190	}
				191
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	192	/* fstrings are only allowed in Python 3.6 and greater */
				193	if (fmode && p->feature_version < 6) {
				194	p->error_indicator = 1;
				195	RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
				196	return -1;
				197	}
				198
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	199	if (fmode && *bytesmode) {
				200	PyErr_BadInternalCall();
				201	return -1;
				202	}
				203	if (quote != '\'' && quote != '\"') {
				204	PyErr_BadInternalCall();
				205	return -1;
				206	}
				207	/* Skip the leading quote char. */
				208	s++;
				209	len = strlen(s);
				210	if (len > INT_MAX) {
				211	PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
				212	return -1;
				213	}
				214	if (s[--len] != quote) {
				215	/* Last quote char must match the first. */
				216	PyErr_BadInternalCall();
				217	return -1;
				218	}
				219	if (len >= 4 && s[0] == quote && s[1] == quote) {
				220	/* A triple quoted string. We've already skipped one quote at
				221	the start and one at the end of the string. Now skip the
				222	two at the start. */
				223	s += 2;
				224	len -= 2;
				225	/* And check that the last two match. */
				226	if (s[--len] != quote \|\| s[--len] != quote) {
				227	PyErr_BadInternalCall();
				228	return -1;
				229	}
				230	}
				231
				232	if (fmode) {
				233	/* Just return the bytes. The caller will parse the resulting
				234	string. */
				235	*fstr = s;
				236	*fstrlen = len;
				237	return 0;
				238	}
				239
				240	/* Not an f-string. */
				241	/* Avoid invoking escape decoding routines if possible. */
				242	rawmode = rawmode \|\| strchr(s, '\\') == NULL;
				243	if (*bytesmode) {
				244	/* Disallow non-ASCII characters. */
				245	const char *ch;
				246	for (ch = s; *ch; ch++) {
				247	if (Py_CHARMASK(*ch) >= 0x80) {
				248	RAISE_SYNTAX_ERROR(
				249	"bytes can only contain ASCII "
				250	"literal characters.");
				251	return -1;
				252	}
				253	}
				254	if (*rawmode) {
				255	*result = PyBytes_FromStringAndSize(s, len);
				256	}
				257	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	258	*result = decode_bytes_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	259	}
				260	}
				261	else {
				262	if (*rawmode) {
				263	*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
				264	}
				265	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	266	*result = decode_unicode_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	267	}
				268	}
				269	return *result == NULL ? -1 : 0;
				270	}
				271
				272
				273
				274	// FSTRING STUFF
				275
				276	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
				277	static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
				278
				279
				280	static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
				281	if (parent->lineno < n->lineno) {
				282	col = 0;
				283	}
				284	fstring_shift_expr_locations(n, line, col);
				285	}
				286
				287	static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
				288	if (parent->lineno < n->lineno) {
				289	col = 0;
				290	}
				291	fstring_shift_argument(parent, n, line, col);
				292	}
				293
				294	static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	295	for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	296	expr_ty expr = asdl_seq_GET(seq, i);
				297	if (expr == NULL){
				298	continue;
				299	}
				300	shift_expr(parent, expr, lineno, col_offset);
				301	}
				302	}
				303
				304	static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
				305	switch (slice->kind) {
				306	case Slice_kind:
				307	if (slice->v.Slice.lower) {
				308	shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
				309	}
				310	if (slice->v.Slice.upper) {
				311	shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
				312	}
				313	if (slice->v.Slice.step) {
				314	shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
				315	}
				316	break;
				317	case Tuple_kind:
				318	fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
				319	break;
				320	default:
				321	break;
				322	}
				323	}
				324
				325	static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
				326	shift_expr(parent, comp->target, lineno, col_offset);
				327	shift_expr(parent, comp->iter, lineno, col_offset);
				328	fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
				329	}
				330
				331	static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
				332	if (arg->annotation != NULL){
				333	shift_expr(parent, arg->annotation, lineno, col_offset);
				334	}
				335	arg->col_offset = arg->col_offset + col_offset;
				336	arg->end_col_offset = arg->end_col_offset + col_offset;
				337	arg->lineno = arg->lineno + lineno;
				338	arg->end_lineno = arg->end_lineno + lineno;
				339	}
				340
				341	static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	342	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	343	arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
				344	shift_arg(parent, arg, lineno, col_offset);
				345	}
				346
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	347	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	348	arg_ty arg = asdl_seq_GET(args->args, i);
				349	shift_arg(parent, arg, lineno, col_offset);
				350	}
				351
				352	if (args->vararg != NULL) {
				353	shift_arg(parent, args->vararg, lineno, col_offset);
				354	}
				355
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	356	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	357	arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
				358	shift_arg(parent, arg, lineno, col_offset);
				359	}
				360
				361	fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
				362
				363	if (args->kwarg != NULL) {
				364	shift_arg(parent, args->kwarg, lineno, col_offset);
				365	}
				366
				367	fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
				368	}
				369
				370	static void fstring_shift_children_locations(expr_ty n, int lineno, int col_offset) {
				371	switch (n->kind) {
				372	case BoolOp_kind:
				373	fstring_shift_seq_locations(n, n->v.BoolOp.values, lineno, col_offset);
				374	break;
				375	case NamedExpr_kind:
				376	shift_expr(n, n->v.NamedExpr.target, lineno, col_offset);
				377	shift_expr(n, n->v.NamedExpr.value, lineno, col_offset);
				378	break;
				379	case BinOp_kind:
				380	shift_expr(n, n->v.BinOp.left, lineno, col_offset);
				381	shift_expr(n, n->v.BinOp.right, lineno, col_offset);
				382	break;
				383	case UnaryOp_kind:
				384	shift_expr(n, n->v.UnaryOp.operand, lineno, col_offset);
				385	break;
				386	case Lambda_kind:
				387	fstring_shift_arguments(n, n->v.Lambda.args, lineno, col_offset);
				388	shift_expr(n, n->v.Lambda.body, lineno, col_offset);
				389	break;
				390	case IfExp_kind:
				391	shift_expr(n, n->v.IfExp.test, lineno, col_offset);
				392	shift_expr(n, n->v.IfExp.body, lineno, col_offset);
				393	shift_expr(n, n->v.IfExp.orelse, lineno, col_offset);
				394	break;
				395	case Dict_kind:
				396	fstring_shift_seq_locations(n, n->v.Dict.keys, lineno, col_offset);
				397	fstring_shift_seq_locations(n, n->v.Dict.values, lineno, col_offset);
				398	break;
				399	case Set_kind:
				400	fstring_shift_seq_locations(n, n->v.Set.elts, lineno, col_offset);
				401	break;
				402	case ListComp_kind:
				403	shift_expr(n, n->v.ListComp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	404	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.ListComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	405	comprehension_ty comp = asdl_seq_GET(n->v.ListComp.generators, i);
				406	fstring_shift_comprehension(n, comp, lineno, col_offset);
				407	}
				408	break;
				409	case SetComp_kind:
				410	shift_expr(n, n->v.SetComp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	411	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.SetComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	412	comprehension_ty comp = asdl_seq_GET(n->v.SetComp.generators, i);
				413	fstring_shift_comprehension(n, comp, lineno, col_offset);
				414	}
				415	break;
				416	case DictComp_kind:
				417	shift_expr(n, n->v.DictComp.key, lineno, col_offset);
				418	shift_expr(n, n->v.DictComp.value, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	419	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.DictComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	420	comprehension_ty comp = asdl_seq_GET(n->v.DictComp.generators, i);
				421	fstring_shift_comprehension(n, comp, lineno, col_offset);
				422	}
				423	break;
				424	case GeneratorExp_kind:
				425	shift_expr(n, n->v.GeneratorExp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	426	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.GeneratorExp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	427	comprehension_ty comp = asdl_seq_GET(n->v.GeneratorExp.generators, i);
				428	fstring_shift_comprehension(n, comp, lineno, col_offset);
				429	}
				430	break;
				431	case Await_kind:
				432	shift_expr(n, n->v.Await.value, lineno, col_offset);
				433	break;
				434	case Yield_kind:
				435	shift_expr(n, n->v.Yield.value, lineno, col_offset);
				436	break;
				437	case YieldFrom_kind:
				438	shift_expr(n, n->v.YieldFrom.value, lineno, col_offset);
				439	break;
				440	case Compare_kind:
				441	shift_expr(n, n->v.Compare.left, lineno, col_offset);
				442	fstring_shift_seq_locations(n, n->v.Compare.comparators, lineno, col_offset);
				443	break;
				444	case Call_kind:
				445	shift_expr(n, n->v.Call.func, lineno, col_offset);
				446	fstring_shift_seq_locations(n, n->v.Call.args, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	447	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.Call.keywords); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	448	keyword_ty keyword = asdl_seq_GET(n->v.Call.keywords, i);
				449	shift_expr(n, keyword->value, lineno, col_offset);
				450	}
				451	break;
				452	case Attribute_kind:
				453	shift_expr(n, n->v.Attribute.value, lineno, col_offset);
				454	break;
				455	case Subscript_kind:
				456	shift_expr(n, n->v.Subscript.value, lineno, col_offset);
				457	fstring_shift_slice_locations(n, n->v.Subscript.slice, lineno, col_offset);
				458	shift_expr(n, n->v.Subscript.slice, lineno, col_offset);
				459	break;
				460	case Starred_kind:
				461	shift_expr(n, n->v.Starred.value, lineno, col_offset);
				462	break;
				463	case List_kind:
				464	fstring_shift_seq_locations(n, n->v.List.elts, lineno, col_offset);
				465	break;
				466	case Tuple_kind:
				467	fstring_shift_seq_locations(n, n->v.Tuple.elts, lineno, col_offset);
				468	break;
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	469	case JoinedStr_kind:
				470	fstring_shift_seq_locations(n, n->v.JoinedStr.values, lineno, col_offset);
				471	break;
				472	case FormattedValue_kind:
				473	shift_expr(n, n->v.FormattedValue.value, lineno, col_offset);
				474	if (n->v.FormattedValue.format_spec) {
				475	shift_expr(n, n->v.FormattedValue.format_spec, lineno, col_offset);
				476	}
				477	break;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	478	default:
				479	return;
				480	}
				481	}
				482
				483	/* Shift locations for the given node and all its children by adding `lineno`
				484	and `col_offset` to existing locations. Note that n is the already parsed
				485	expression. */
				486	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
				487	{
				488	n->col_offset = n->col_offset + col_offset;
				489
				490	// The following is needed, in order for nodes spanning across multiple lines
				491	// to be shifted correctly. An example of such a node is a Call node, the closing
				492	// parenthesis of which is not on the same line as its name.
				493	if (n->lineno == n->end_lineno) {
				494	n->end_col_offset = n->end_col_offset + col_offset;
				495	}
				496
				497	fstring_shift_children_locations(n, lineno, col_offset);
				498	n->lineno = n->lineno + lineno;
				499	n->end_lineno = n->end_lineno + lineno;
				500	}
				501
				502	/* Fix locations for the given node and its children.
				503
				504	`parent` is the enclosing node.
				505	`n` is the node which locations are going to be fixed relative to parent.
				506	`expr_str` is the child node's string representation, including braces.
				507	*/
				508	static void
				509	fstring_fix_expr_location(Token parent, expr_ty n, char expr_str)
				510	{
				511	char *substr = NULL;
				512	char *start;
				513	int lines = 0;
				514	int cols = 0;
				515
				516	if (parent && parent->bytes) {
				517	char *parent_str = PyBytes_AsString(parent->bytes);
				518	if (!parent_str) {
				519	return;
				520	}
				521	substr = strstr(parent_str, expr_str);
				522	if (substr) {
				523	// The following is needed, in order to correctly shift the column
				524	// offset, in the case that (disregarding any whitespace) a newline
				525	// immediately follows the opening curly brace of the fstring expression.
				526	int newline_after_brace = 1;
				527	start = substr + 1;
				528	while (start && start != '}' && start != '\n') {
				529	if (start != ' ' && start != '\t' && *start != '\f') {
				530	newline_after_brace = 0;
				531	break;
				532	}
				533	start++;
				534	}
				535
				536	// Account for the characters from the last newline character to our
				537	// left until the beginning of substr.
				538	if (!newline_after_brace) {
				539	start = substr;
				540	while (start > parent_str && *start != '\n') {
				541	start--;
				542	}
				543	cols += (int)(substr - start);
				544	}
				545	/* adjust the start based on the number of newlines encountered
				546	before the f-string expression */
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	547	for (char* p = parent_str; p < substr; p++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	548	if (*p == '\n') {
				549	lines++;
				550	}
				551	}
				552	}
				553	}
				554	fstring_shift_expr_locations(n, lines, cols);
				555	}
				556
				557
				558	/* Compile this expression in to an expr_ty. Add parens around the
				559	expression, in order to allow leading spaces in the expression. */
				560	static expr_ty
				561	fstring_compile_expr(Parser p, const char expr_start, const char *expr_end,
				562	Token *t)
				563	{
				564	expr_ty expr = NULL;
				565	char *str;
				566	Py_ssize_t len;
				567	const char *s;
				568	expr_ty result = NULL;
				569
				570	assert(expr_end >= expr_start);
				571	assert(*(expr_start-1) == '{');
				572	assert(expr_end == '}' \|\| expr_end == '!' \|\| *expr_end == ':' \|\|
				573	*expr_end == '=');
				574
				575	/* If the substring is all whitespace, it's an error. We need to catch this
				576	here, and not when we call PyParser_SimpleParseStringFlagsFilename,
				577	because turning the expression '' in to '()' would go from being invalid
				578	to valid. */
				579	for (s = expr_start; s != expr_end; s++) {
				580	char c = *s;
				581	/* The Python parser ignores only the following whitespace
				582	characters (\r already is converted to \n). */
				583	if (!(c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\f')) {
				584	break;
				585	}
				586	}
				587	if (s == expr_end) {
				588	RAISE_SYNTAX_ERROR("f-string: empty expression not allowed");
				589	return NULL;
				590	}
				591
				592	len = expr_end - expr_start;
				593	/* Allocate 3 extra bytes: open paren, close paren, null byte. */
				594	str = PyMem_RawMalloc(len + 3);
				595	if (str == NULL) {
				596	PyErr_NoMemory();
				597	return NULL;
				598	}
				599
				600	str[0] = '(';
				601	memcpy(str+1, expr_start, len);
				602	str[len+1] = ')';
				603	str[len+2] = 0;
				604
				605	struct tok_state* tok = PyTokenizer_FromString(str, 1);
				606	if (tok == NULL) {
				607	return NULL;
				608	}
				609	tok->filename = PyUnicode_FromString("<fstring>");
				610	if (!tok->filename) {
				611	PyTokenizer_Free(tok);
				612	return NULL;
				613	}
				614
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	615	Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
				616	NULL, p->arena);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	617	p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
				618	p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
				619	? p->starting_col_offset + t->col_offset : 0;
				620
				621	expr = _PyPegen_run_parser(p2);
				622
				623	if (expr == NULL) {
				624	goto exit;
				625	}
				626
				627	/* Reuse str to find the correct column offset. */
				628	str[0] = '{';
				629	str[len+1] = '}';
				630	fstring_fix_expr_location(t, expr, str);
				631
				632	result = expr;
				633
				634	exit:
				635	_PyPegen_Parser_Free(p2);
				636	PyTokenizer_Free(tok);
				637	return result;
				638	}
				639
				640	/* Return -1 on error.
				641
				642	Return 0 if we reached the end of the literal.
				643
				644	Return 1 if we haven't reached the end of the literal, but we want
				645	the caller to process the literal up to this point. Used for
				646	doubled braces.
				647	*/
				648	static int
				649	fstring_find_literal(Parser p, const char str, const char end, int raw,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	650	PyObject *literal, int recurse_lvl, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	651	{
				652	/* Get any literal string. It ends when we hit an un-doubled left
				653	brace (which isn't part of a unicode name escape such as
				654	"\N{EULER CONSTANT}"), or the end of the string. */
				655
				656	const char s = str;
				657	const char *literal_start = s;
				658	int result = 0;
				659
				660	assert(*literal == NULL);
				661	while (s < end) {
				662	char ch = *s++;
				663	if (!raw && ch == '\\' && s < end) {
				664	ch = *s++;
				665	if (ch == 'N') {
				666	if (s < end && *s++ == '{') {
				667	while (s < end && *s++ != '}') {
				668	}
				669	continue;
				670	}
				671	break;
				672	}
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	673	if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	674	return -1;
				675	}
				676	}
				677	if (ch == '{' \|\| ch == '}') {
				678	/* Check for doubled braces, but only at the top level. If
				679	we checked at every level, then f'{0:{3}}' would fail
				680	with the two closing braces. */
				681	if (recurse_lvl == 0) {
				682	if (s < end && *s == ch) {
				683	/* We're going to tell the caller that the literal ends
				684	here, but that they should continue scanning. But also
				685	skip over the second brace when we resume scanning. */
				686	*str = s + 1;
				687	result = 1;
				688	goto done;
				689	}
				690
				691	/* Where a single '{' is the start of a new expression, a
				692	single '}' is not allowed. */
				693	if (ch == '}') {
				694	*str = s - 1;
				695	RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed");
				696	return -1;
				697	}
				698	}
				699	/* We're either at a '{', which means we're starting another
				700	expression; or a '}', which means we're at the end of this
				701	f-string (for a nested format_spec). */
				702	s--;
				703	break;
				704	}
				705	}
				706	*str = s;
				707	assert(s <= end);
				708	assert(s == end \|\| s == '{' \|\| s == '}');
				709	done:
				710	if (literal_start != s) {
				711	if (raw)
				712	*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
				713	s - literal_start,
				714	NULL, NULL);
				715	else
				716	*literal = decode_unicode_with_escapes(p, literal_start,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	717	s - literal_start, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	718	if (!*literal)
				719	return -1;
				720	}
				721	return result;
				722	}
				723
				724	/* Forward declaration because parsing is recursive. */
				725	static expr_ty
				726	fstring_parse(Parser p, const char str, const char end, int raw, int recurse_lvl,
				727	Token first_token, Token t, Token *last_token);
				728
				729	/* Parse the f-string at str, ending at end. We know str starts an
				730	expression (so it must be a '{'). Returns the FormattedValue node, which
				731	includes the expression, conversion character, format_spec expression, and
				732	optionally the text of the expression (if = is used).
				733
				734	Note that I don't do a perfect job here: I don't make sure that a
				735	closing brace doesn't match an opening paren, for example. It
				736	doesn't need to error on all invalid expressions, just correctly
				737	find the end of all valid ones. Any errors inside the expression
				738	will be caught when we parse it later.
				739
				740	*expression is set to the expression. For an '=' "debug" expression,
				741	*expr_text is set to the debug text (the original text of the expression,
				742	including the '=' and any whitespace around it, as a string object). If
				743	not a debug expression, expr_text set to NULL. /
				744	static int
				745	fstring_find_expr(Parser p, const char str, const char end, int raw, int recurse_lvl,
				746	PyObject *expr_text, expr_ty expression, Token *first_token,
				747	Token t, Token last_token)
				748	{
				749	/* Return -1 on error, else 0. */
				750
				751	const char *expr_start;
				752	const char *expr_end;
				753	expr_ty simple_expression;
				754	expr_ty format_spec = NULL; /* Optional format specifier. */
				755	int conversion = -1; /* The conversion char. Use default if not
				756	specified, or !r if using = and no format
				757	spec. */
				758
				759	/* 0 if we're not in a string, else the quote char we're trying to
				760	match (single or double quote). */
				761	char quote_char = 0;
				762
				763	/* If we're inside a string, 1=normal, 3=triple-quoted. */
				764	int string_type = 0;
				765
				766	/* Keep track of nesting level for braces/parens/brackets in
				767	expressions. */
				768	Py_ssize_t nested_depth = 0;
				769	char parenstack[MAXLEVEL];
				770
				771	*expr_text = NULL;
				772
				773	/* Can only nest one level deep. */
				774	if (recurse_lvl >= 2) {
				775	RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply");
				776	goto error;
				777	}
				778
				779	/* The first char must be a left brace, or we wouldn't have gotten
				780	here. Skip over it. */
				781	assert(**str == '{');
				782	*str += 1;
				783
				784	expr_start = *str;
				785	for (; str < end; (str)++) {
				786	char ch;
				787
				788	/* Loop invariants. */
				789	assert(nested_depth >= 0);
				790	assert(str >= expr_start && str < end);
				791	if (quote_char)
				792	assert(string_type == 1 \|\| string_type == 3);
				793	else
				794	assert(string_type == 0);
				795
				796	ch = **str;
				797	/* Nowhere inside an expression is a backslash allowed. */
				798	if (ch == '\\') {
				799	/* Error: can't include a backslash character, inside
				800	parens or strings or not. */
				801	RAISE_SYNTAX_ERROR(
				802	"f-string expression part "
				803	"cannot include a backslash");
				804	goto error;
				805	}
				806	if (quote_char) {
				807	/* We're inside a string. See if we're at the end. */
				808	/* This code needs to implement the same non-error logic
				809	as tok_get from tokenizer.c, at the letter_quote
				810	label. To actually share that code would be a
				811	nightmare. But, it's unlikely to change and is small,
				812	so duplicate it here. Note we don't need to catch all
				813	of the errors, since they'll be caught when parsing the
				814	expression. We just need to match the non-error
				815	cases. Thus we can ignore \n in single-quoted strings,
				816	for example. Or non-terminated strings. */
				817	if (ch == quote_char) {
				818	/* Does this match the string_type (single or triple
				819	quoted)? */
				820	if (string_type == 3) {
				821	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				822	/* We're at the end of a triple quoted string. */
				823	*str += 2;
				824	string_type = 0;
				825	quote_char = 0;
				826	continue;
				827	}
				828	} else {
				829	/* We're at the end of a normal string. */
				830	quote_char = 0;
				831	string_type = 0;
				832	continue;
				833	}
				834	}
				835	} else if (ch == '\'' \|\| ch == '"') {
				836	/* Is this a triple quoted string? */
				837	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				838	string_type = 3;
				839	*str += 2;
				840	} else {
				841	/* Start of a normal string. */
				842	string_type = 1;
				843	}
				844	/* Start looking for the end of the string. */
				845	quote_char = ch;
				846	} else if (ch == '[' \|\| ch == '{' \|\| ch == '(') {
				847	if (nested_depth >= MAXLEVEL) {
				848	RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis");
				849	goto error;
				850	}
				851	parenstack[nested_depth] = ch;
				852	nested_depth++;
				853	} else if (ch == '#') {
				854	/* Error: can't include a comment character, inside parens
				855	or not. */
				856	RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'");
				857	goto error;
				858	} else if (nested_depth == 0 &&
				859	(ch == '!' \|\| ch == ':' \|\| ch == '}' \|\|
				860	ch == '=' \|\| ch == '>' \|\| ch == '<')) {
				861	/* See if there's a next character. */
				862	if (*str+1 < end) {
				863	char next = (str+1);
				864
				865	/* For "!=". since '=' is not an allowed conversion character,
				866	nothing is lost in this test. */
				867	if ((ch == '!' && next == '=') \|\| /* != */
				868	(ch == '=' && next == '=') \|\| /* == */
				869	(ch == '<' && next == '=') \|\| /* <= */
				870	(ch == '>' && next == '=') /* >= */
				871	) {
				872	*str += 1;
				873	continue;
				874	}
				875	/* Don't get out of the loop for these, if they're single
				876	chars (not part of 2-char tokens). If by themselves, they
				877	don't end an expression (unlike say '!'). */
				878	if (ch == '>' \|\| ch == '<') {
				879	continue;
				880	}
				881	}
				882
				883	/* Normal way out of this loop. */
				884	break;
				885	} else if (ch == ']' \|\| ch == '}' \|\| ch == ')') {
				886	if (!nested_depth) {
				887	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch);
				888	goto error;
				889	}
				890	nested_depth--;
				891	int opening = parenstack[nested_depth];
				892	if (!((opening == '(' && ch == ')') \|\|
				893	(opening == '[' && ch == ']') \|\|
				894	(opening == '{' && ch == '}')))
				895	{
				896	RAISE_SYNTAX_ERROR(
				897	"f-string: closing parenthesis '%c' "
				898	"does not match opening parenthesis '%c'",
				899	ch, opening);
				900	goto error;
				901	}
				902	} else {
				903	/* Just consume this char and loop around. */
				904	}
				905	}
				906	expr_end = *str;
				907	/* If we leave this loop in a string or with mismatched parens, we
				908	don't care. We'll get a syntax error when compiling the
				909	expression. But, we can produce a better error message, so
				910	let's just do that.*/
				911	if (quote_char) {
				912	RAISE_SYNTAX_ERROR("f-string: unterminated string");
				913	goto error;
				914	}
				915	if (nested_depth) {
				916	int opening = parenstack[nested_depth - 1];
				917	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening);
				918	goto error;
				919	}
				920
				921	if (*str >= end)
				922	goto unexpected_end_of_string;
				923
				924	/* Compile the expression as soon as possible, so we show errors
				925	related to the expression before errors related to the
				926	conversion or format_spec. */
				927	simple_expression = fstring_compile_expr(p, expr_start, expr_end, t);
				928	if (!simple_expression)
				929	goto error;
				930
				931	/* Check for =, which puts the text value of the expression in
				932	expr_text. */
				933	if (**str == '=') {
				934	*str += 1;
				935
				936	/* Skip over ASCII whitespace. No need to test for end of string
				937	here, since we know there's at least a trailing quote somewhere
				938	ahead. */
				939	while (Py_ISSPACE(**str)) {
				940	*str += 1;
				941	}
				942
				943	/* Set expr_text to the text of the expression. /
				944	expr_text = PyUnicode_FromStringAndSize(expr_start, str-expr_start);
				945	if (!*expr_text) {
				946	goto error;
				947	}
				948	}
				949
				950	/* Check for a conversion char, if present. */
				951	if (**str == '!') {
				952	*str += 1;
				953	if (*str >= end)
				954	goto unexpected_end_of_string;
				955
				956	conversion = **str;
				957	*str += 1;
				958
				959	/* Validate the conversion. */
				960	if (!(conversion == 's' \|\| conversion == 'r' \|\| conversion == 'a')) {
				961	RAISE_SYNTAX_ERROR(
				962	"f-string: invalid conversion character: "
				963	"expected 's', 'r', or 'a'");
				964	goto error;
				965	}
				966
				967	}
				968
				969	/* Check for the format spec, if present. */
				970	if (*str >= end)
				971	goto unexpected_end_of_string;
				972	if (**str == ':') {
				973	*str += 1;
				974	if (*str >= end)
				975	goto unexpected_end_of_string;
				976
				977	/* Parse the format spec. */
				978	format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1,
				979	first_token, t, last_token);
				980	if (!format_spec)
				981	goto error;
				982	}
				983
				984	if (str >= end \|\| *str != '}')
				985	goto unexpected_end_of_string;
				986
				987	/* We're at a right brace. Consume it. */
				988	assert(*str < end);
				989	assert(**str == '}');
				990	*str += 1;
				991
				992	/* If we're in = mode (detected by non-NULL expr_text), and have no format
				993	spec and no explicit conversion, set the conversion to 'r'. */
				994	if (*expr_text && format_spec == NULL && conversion == -1) {
				995	conversion = 'r';
				996	}
				997
				998	/* And now create the FormattedValue node that represents this
				999	entire expression with the conversion and format spec. */
				1000	//TODO: Fix this
				1001	*expression = FormattedValue(simple_expression, conversion,
				1002	format_spec, first_token->lineno,
				1003	first_token->col_offset, last_token->end_lineno,
				1004	last_token->end_col_offset, p->arena);
				1005	if (!*expression)
				1006	goto error;
				1007
				1008	return 0;
				1009
				1010	unexpected_end_of_string:
				1011	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1012	/* Falls through to error. */
				1013
				1014	error:
				1015	Py_XDECREF(*expr_text);
				1016	return -1;
				1017
				1018	}
				1019
				1020	/* Return -1 on error.
				1021
				1022	Return 0 if we have a literal (possible zero length) and an
				1023	expression (zero length if at the end of the string.
				1024
				1025	Return 1 if we have a literal, but no expression, and we want the
				1026	caller to call us again. This is used to deal with doubled
				1027	braces.
				1028
				1029	When called multiple times on the string 'a{{b{0}c', this function
				1030	will return:
				1031
				1032	1. the literal 'a{' with no expression, and a return value
				1033	of 1. Despite the fact that there's no expression, the return
				1034	value of 1 means we're not finished yet.
				1035
				1036	2. the literal 'b' and the expression '0', with a return value of
				1037	0. The fact that there's an expression means we're not finished.
				1038
				1039	3. literal 'c' with no expression and a return value of 0. The
				1040	combination of the return value of 0 with no expression means
				1041	we're finished.
				1042	*/
				1043	static int
				1044	fstring_find_literal_and_expr(Parser p, const char str, const char end, int raw,
				1045	int recurse_lvl, PyObject **literal,
				1046	PyObject *expr_text, expr_ty expression,
				1047	Token first_token, Token t, Token *last_token)
				1048	{
				1049	int result;
				1050
				1051	assert(literal == NULL && expression == NULL);
				1052
				1053	/* Get any literal string. */
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	1054	result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1055	if (result < 0)
				1056	goto error;
				1057
				1058	assert(result == 0 \|\| result == 1);
				1059
				1060	if (result == 1)
				1061	/* We have a literal, but don't look at the expression. */
				1062	return 1;
				1063
				1064	if (str >= end \|\| *str == '}')
				1065	/* We're at the end of the string or the end of a nested
				1066	f-string: no expression. The top-level error case where we
				1067	expect to be at the end of the string but we're at a '}' is
				1068	handled later. */
				1069	return 0;
				1070
				1071	/* We must now be the start of an expression, on a '{'. */
				1072	assert(**str == '{');
				1073
				1074	if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text,
				1075	expression, first_token, t, last_token) < 0)
				1076	goto error;
				1077
				1078	return 0;
				1079
				1080	error:
				1081	Py_CLEAR(*literal);
				1082	return -1;
				1083	}
				1084
				1085	#ifdef NDEBUG
				1086	#define ExprList_check_invariants(l)
				1087	#else
				1088	static void
				1089	ExprList_check_invariants(ExprList *l)
				1090	{
				1091	/* Check our invariants. Make sure this object is "live", and
				1092	hasn't been deallocated. */
				1093	assert(l->size >= 0);
				1094	assert(l->p != NULL);
				1095	if (l->size <= EXPRLIST_N_CACHED)
				1096	assert(l->data == l->p);
				1097	}
				1098	#endif
				1099
				1100	static void
				1101	ExprList_Init(ExprList *l)
				1102	{
				1103	l->allocated = EXPRLIST_N_CACHED;
				1104	l->size = 0;
				1105
				1106	/* Until we start allocating dynamically, p points to data. */
				1107	l->p = l->data;
				1108
				1109	ExprList_check_invariants(l);
				1110	}
				1111
				1112	static int
				1113	ExprList_Append(ExprList *l, expr_ty exp)
				1114	{
				1115	ExprList_check_invariants(l);
				1116	if (l->size >= l->allocated) {
				1117	/* We need to alloc (or realloc) the memory. */
				1118	Py_ssize_t new_size = l->allocated * 2;
				1119
				1120	/* See if we've ever allocated anything dynamically. */
				1121	if (l->p == l->data) {
				1122	Py_ssize_t i;
				1123	/* We're still using the cached data. Switch to
				1124	alloc-ing. */
				1125	l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
				1126	if (!l->p)
				1127	return -1;
				1128	/* Copy the cached data into the new buffer. */
				1129	for (i = 0; i < l->size; i++)
				1130	l->p[i] = l->data[i];
				1131	} else {
				1132	/* Just realloc. */
				1133	expr_ty tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) new_size);
				1134	if (!tmp) {
				1135	PyMem_RawFree(l->p);
				1136	l->p = NULL;
				1137	return -1;
				1138	}
				1139	l->p = tmp;
				1140	}
				1141
				1142	l->allocated = new_size;
				1143	assert(l->allocated == 2 * l->size);
				1144	}
				1145
				1146	l->p[l->size++] = exp;
				1147
				1148	ExprList_check_invariants(l);
				1149	return 0;
				1150	}
				1151
				1152	static void
				1153	ExprList_Dealloc(ExprList *l)
				1154	{
				1155	ExprList_check_invariants(l);
				1156
				1157	/* If there's been an error, or we've never dynamically allocated,
				1158	do nothing. */
				1159	if (!l->p \|\| l->p == l->data) {
				1160	/* Do nothing. */
				1161	} else {
				1162	/* We have dynamically allocated. Free the memory. */
				1163	PyMem_RawFree(l->p);
				1164	}
				1165	l->p = NULL;
				1166	l->size = -1;
				1167	}
				1168
				1169	static asdl_seq *
				1170	ExprList_Finish(ExprList l, PyArena arena)
				1171	{
				1172	asdl_seq *seq;
				1173
				1174	ExprList_check_invariants(l);
				1175
				1176	/* Allocate the asdl_seq and copy the expressions in to it. */
				1177	seq = _Py_asdl_seq_new(l->size, arena);
				1178	if (seq) {
				1179	Py_ssize_t i;
				1180	for (i = 0; i < l->size; i++)
				1181	asdl_seq_SET(seq, i, l->p[i]);
				1182	}
				1183	ExprList_Dealloc(l);
				1184	return seq;
				1185	}
				1186
				1187	#ifdef NDEBUG
				1188	#define FstringParser_check_invariants(state)
				1189	#else
				1190	static void
				1191	FstringParser_check_invariants(FstringParser *state)
				1192	{
				1193	if (state->last_str)
				1194	assert(PyUnicode_CheckExact(state->last_str));
				1195	ExprList_check_invariants(&state->expr_list);
				1196	}
				1197	#endif
				1198
				1199	void
				1200	_PyPegen_FstringParser_Init(FstringParser *state)
				1201	{
				1202	state->last_str = NULL;
				1203	state->fmode = 0;
				1204	ExprList_Init(&state->expr_list);
				1205	FstringParser_check_invariants(state);
				1206	}
				1207
				1208	void
				1209	_PyPegen_FstringParser_Dealloc(FstringParser *state)
				1210	{
				1211	FstringParser_check_invariants(state);
				1212
				1213	Py_XDECREF(state->last_str);
				1214	ExprList_Dealloc(&state->expr_list);
				1215	}
				1216
				1217	/* Make a Constant node, but decref the PyUnicode object being added. */
				1218	static expr_ty
				1219	make_str_node_and_del(Parser p, PyObject str, Token first_token, Token *last_token)
				1220	{
				1221	PyObject s = str;
				1222	PyObject *kind = NULL;
				1223	*str = NULL;
				1224	assert(PyUnicode_CheckExact(s));
				1225	if (PyArena_AddPyObject(p->arena, s) < 0) {
				1226	Py_DECREF(s);
				1227	return NULL;
				1228	}
				1229	const char* the_str = PyBytes_AsString(first_token->bytes);
				1230	if (the_str && the_str[0] == 'u') {
				1231	kind = _PyPegen_new_identifier(p, "u");
				1232	}
				1233
				1234	if (kind == NULL && PyErr_Occurred()) {
				1235	return NULL;
				1236	}
				1237
				1238	return Constant(s, kind, first_token->lineno, first_token->col_offset,
				1239	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1240
				1241	}
				1242
				1243
				1244	/* Add a non-f-string (that is, a regular literal string). str is
				1245	decref'd. */
				1246	int
				1247	_PyPegen_FstringParser_ConcatAndDel(FstringParser state, PyObject str)
				1248	{
				1249	FstringParser_check_invariants(state);
				1250
				1251	assert(PyUnicode_CheckExact(str));
				1252
				1253	if (PyUnicode_GET_LENGTH(str) == 0) {
				1254	Py_DECREF(str);
				1255	return 0;
				1256	}
				1257
				1258	if (!state->last_str) {
				1259	/* We didn't have a string before, so just remember this one. */
				1260	state->last_str = str;
				1261	} else {
				1262	/* Concatenate this with the previous string. */
				1263	PyUnicode_AppendAndDel(&state->last_str, str);
				1264	if (!state->last_str)
				1265	return -1;
				1266	}
				1267	FstringParser_check_invariants(state);
				1268	return 0;
				1269	}
				1270
				1271	/* Parse an f-string. The f-string is in *str to end, with no
				1272	'f' or quotes. */
				1273	int
				1274	_PyPegen_FstringParser_ConcatFstring(Parser p, FstringParser state, const char **str,
				1275	const char *end, int raw, int recurse_lvl,
				1276	Token first_token, Token t, Token *last_token)
				1277	{
				1278	FstringParser_check_invariants(state);
				1279	state->fmode = 1;
				1280
				1281	/* Parse the f-string. */
				1282	while (1) {
				1283	PyObject *literal = NULL;
				1284	PyObject *expr_text = NULL;
				1285	expr_ty expression = NULL;
				1286
				1287	/* If there's a zero length literal in front of the
				1288	expression, literal will be NULL. If we're at the end of
				1289	the f-string, expression will be NULL (unless result == 1,
				1290	see below). */
				1291	int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl,
				1292	&literal, &expr_text,
				1293	&expression, first_token, t, last_token);
				1294	if (result < 0)
				1295	return -1;
				1296
				1297	/* Add the literal, if any. */
				1298	if (literal && _PyPegen_FstringParser_ConcatAndDel(state, literal) < 0) {
				1299	Py_XDECREF(expr_text);
				1300	return -1;
				1301	}
				1302	/* Add the expr_text, if any. */
				1303	if (expr_text && _PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 0) {
				1304	return -1;
				1305	}
				1306
				1307	/* We've dealt with the literal and expr_text, their ownership has
				1308	been transferred to the state object. Don't look at them again. */
				1309
				1310	/* See if we should just loop around to get the next literal
				1311	and expression, while ignoring the expression this
				1312	time. This is used for un-doubling braces, as an
				1313	optimization. */
				1314	if (result == 1)
				1315	continue;
				1316
				1317	if (!expression)
				1318	/* We're done with this f-string. */
				1319	break;
				1320
				1321	/* We know we have an expression. Convert any existing string
				1322	to a Constant node. */
				1323	if (!state->last_str) {
				1324	/* Do nothing. No previous literal. */
				1325	} else {
				1326	/* Convert the existing last_str literal to a Constant node. */
				1327	expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1328	if (!str \|\| ExprList_Append(&state->expr_list, str) < 0)
				1329	return -1;
				1330	}
				1331
				1332	if (ExprList_Append(&state->expr_list, expression) < 0)
				1333	return -1;
				1334	}
				1335
				1336	/* If recurse_lvl is zero, then we must be at the end of the
				1337	string. Otherwise, we must be at a right brace. */
				1338
				1339	if (recurse_lvl == 0 && *str < end-1) {
				1340	RAISE_SYNTAX_ERROR("f-string: unexpected end of string");
				1341	return -1;
				1342	}
				1343	if (recurse_lvl != 0 && **str != '}') {
				1344	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1345	return -1;
				1346	}
				1347
				1348	FstringParser_check_invariants(state);
				1349	return 0;
				1350	}
				1351
				1352	/* Convert the partial state reflected in last_str and expr_list to an
				1353	expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
				1354	expr_ty
				1355	_PyPegen_FstringParser_Finish(Parser p, FstringParser state, Token* first_token,
				1356	Token *last_token)
				1357	{
				1358	asdl_seq *seq;
				1359
				1360	FstringParser_check_invariants(state);
				1361
				1362	/* If we're just a constant string with no expressions, return
				1363	that. */
				1364	if (!state->fmode) {
				1365	assert(!state->expr_list.size);
				1366	if (!state->last_str) {
				1367	/* Create a zero length string. */
				1368	state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
				1369	if (!state->last_str)
				1370	goto error;
				1371	}
				1372	return make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1373	}
				1374
				1375	/* Create a Constant node out of last_str, if needed. It will be the
				1376	last node in our expression list. */
				1377	if (state->last_str) {
				1378	expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1379	if (!str \|\| ExprList_Append(&state->expr_list, str) < 0)
				1380	goto error;
				1381	}
				1382	/* This has already been freed. */
				1383	assert(state->last_str == NULL);
				1384
				1385	seq = ExprList_Finish(&state->expr_list, p->arena);
				1386	if (!seq)
				1387	goto error;
				1388
				1389	return _Py_JoinedStr(seq, first_token->lineno, first_token->col_offset,
				1390	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1391
				1392	error:
				1393	_PyPegen_FstringParser_Dealloc(state);
				1394	return NULL;
				1395	}
				1396
				1397	/* Given an f-string (with no 'f' or quotes) that's in *str and ends
				1398	at end, parse it into an expr_ty. Return NULL on error. Adjust
				1399	str to point past the parsed portion. */
				1400	static expr_ty
				1401	fstring_parse(Parser p, const char str, const char end, int raw,
				1402	int recurse_lvl, Token first_token, Token t, Token *last_token)
				1403	{
				1404	FstringParser state;
				1405
				1406	_PyPegen_FstringParser_Init(&state);
				1407	if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl,
				1408	first_token, t, last_token) < 0) {
				1409	_PyPegen_FstringParser_Dealloc(&state);
				1410	return NULL;
				1411	}
				1412
				1413	return _PyPegen_FstringParser_Finish(p, &state, t, t);
				1414	}