Blame - Parser/pegen/parse_string.c - platform/external/python/cpython3

blob: e24ecc58d3aa1dc3acf7f05423ad3dd1720604ff [file] [log] [blame]

Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1	#include <Python.h>
				2
				3	#include "../tokenizer.h"
				4	#include "pegen.h"
				5	#include "parse_string.h"
				6
				7	//// STRING HANDLING FUNCTIONS ////
				8
				9	// These functions are ported directly from Python/ast.c with some modifications
				10	// to account for the use of "Parser *p", the fact that don't have parser nodes
				11	// to pass around and the usage of some specialized APIs present only in this
				12	// file (like "_PyPegen_raise_syntax_error").
				13
				14	static int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	15	warn_invalid_escape_sequence(Parser p, unsigned char first_invalid_escape_char, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	16	{
				17	PyObject *msg =
				18	PyUnicode_FromFormat("invalid escape sequence \\%c", first_invalid_escape_char);
				19	if (msg == NULL) {
				20	return -1;
				21	}
				22	if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	23	t->lineno, NULL, NULL) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	24	if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
				25	/* Replace the DeprecationWarning exception with a SyntaxError
				26	to get a more accurate error report */
				27	PyErr_Clear();
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	28
				29	/* This is needed, in order for the SyntaxError to point to the token t,
				30	since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
				31	error location, if p->known_err_token is not set. */
				32	p->known_err_token = t;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	33	RAISE_SYNTAX_ERROR("invalid escape sequence \\%c", first_invalid_escape_char);
				34	}
				35	Py_DECREF(msg);
				36	return -1;
				37	}
				38	Py_DECREF(msg);
				39	return 0;
				40	}
				41
				42	static PyObject *
				43	decode_utf8(const char *sPtr, const char end)
				44	{
				45	const char s, t;
				46	t = s = *sPtr;
				47	while (s < end && (*s & 0x80)) {
				48	s++;
				49	}
				50	*sPtr = s;
				51	return PyUnicode_DecodeUTF8(t, s - t, NULL);
				52	}
				53
				54	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	55	decode_unicode_with_escapes(Parser parser, const char s, size_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	56	{
				57	PyObject v, u;
				58	char *buf;
				59	char *p;
				60	const char *end;
				61
				62	/* check for integer overflow */
				63	if (len > SIZE_MAX / 6) {
				64	return NULL;
				65	}
				66	/* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
				67	"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
				68	u = PyBytes_FromStringAndSize((char )NULL, len 6);
				69	if (u == NULL) {
				70	return NULL;
				71	}
				72	p = buf = PyBytes_AsString(u);
				73	end = s + len;
				74	while (s < end) {
				75	if (*s == '\\') {
				76	p++ = s++;
				77	if (s >= end \|\| *s & 0x80) {
				78	strcpy(p, "u005c");
				79	p += 5;
				80	if (s >= end) {
				81	break;
				82	}
				83	}
				84	}
				85	if (*s & 0x80) {
				86	PyObject *w;
				87	int kind;
				88	void *data;
				89	Py_ssize_t len, i;
				90	w = decode_utf8(&s, end);
				91	if (w == NULL) {
				92	Py_DECREF(u);
				93	return NULL;
				94	}
				95	kind = PyUnicode_KIND(w);
				96	data = PyUnicode_DATA(w);
				97	len = PyUnicode_GET_LENGTH(w);
				98	for (i = 0; i < len; i++) {
				99	Py_UCS4 chr = PyUnicode_READ(kind, data, i);
				100	sprintf(p, "\\U%08x", chr);
				101	p += 10;
				102	}
				103	/* Should be impossible to overflow */
				104	assert(p - buf <= PyBytes_GET_SIZE(u));
				105	Py_DECREF(w);
				106	}
				107	else {
				108	p++ = s++;
				109	}
				110	}
				111	len = p - buf;
				112	s = buf;
				113
				114	const char *first_invalid_escape;
				115	v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
				116
				117	if (v != NULL && first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	118	if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	119	/* We have not decref u before because first_invalid_escape points
				120	inside u. */
				121	Py_XDECREF(u);
				122	Py_DECREF(v);
				123	return NULL;
				124	}
				125	}
				126	Py_XDECREF(u);
				127	return v;
				128	}
				129
				130	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	131	decode_bytes_with_escapes(Parser p, const char s, Py_ssize_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	132	{
				133	const char *first_invalid_escape;
				134	PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
				135	if (result == NULL) {
				136	return NULL;
				137	}
				138
				139	if (first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	140	if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	141	Py_DECREF(result);
				142	return NULL;
				143	}
				144	}
				145	return result;
				146	}
				147
				148	/* s must include the bracketing quote characters, and r, b, u,
				149	&/or f prefixes (if any), and embedded escape sequences (if any).
				150	_PyPegen_parsestr parses it, and sets *result to decoded Python string object.
				151	If the string is an f-string, set fstr and fstrlen to the unparsed
				152	string object. Return 0 if no errors occurred. */
				153	int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	154	_PyPegen_parsestr(Parser p, int bytesmode, int rawmode, PyObject *result,
				155	const char *fstr, Py_ssize_t fstrlen, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	156	{
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	157	const char *s = PyBytes_AsString(t->bytes);
				158	if (s == NULL) {
				159	return -1;
				160	}
				161
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	162	size_t len;
				163	int quote = Py_CHARMASK(*s);
				164	int fmode = 0;
				165	*bytesmode = 0;
				166	*rawmode = 0;
				167	*result = NULL;
				168	*fstr = NULL;
				169	if (Py_ISALPHA(quote)) {
				170	while (!bytesmode \|\| !rawmode) {
				171	if (quote == 'b' \|\| quote == 'B') {
				172	quote = *++s;
				173	*bytesmode = 1;
				174	}
				175	else if (quote == 'u' \|\| quote == 'U') {
				176	quote = *++s;
				177	}
				178	else if (quote == 'r' \|\| quote == 'R') {
				179	quote = *++s;
				180	*rawmode = 1;
				181	}
				182	else if (quote == 'f' \|\| quote == 'F') {
				183	quote = *++s;
				184	fmode = 1;
				185	}
				186	else {
				187	break;
				188	}
				189	}
				190	}
				191
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	192	/* fstrings are only allowed in Python 3.6 and greater */
				193	if (fmode && p->feature_version < 6) {
				194	p->error_indicator = 1;
				195	RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
				196	return -1;
				197	}
				198
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	199	if (fmode && *bytesmode) {
				200	PyErr_BadInternalCall();
				201	return -1;
				202	}
				203	if (quote != '\'' && quote != '\"') {
				204	PyErr_BadInternalCall();
				205	return -1;
				206	}
				207	/* Skip the leading quote char. */
				208	s++;
				209	len = strlen(s);
				210	if (len > INT_MAX) {
				211	PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
				212	return -1;
				213	}
				214	if (s[--len] != quote) {
				215	/* Last quote char must match the first. */
				216	PyErr_BadInternalCall();
				217	return -1;
				218	}
				219	if (len >= 4 && s[0] == quote && s[1] == quote) {
				220	/* A triple quoted string. We've already skipped one quote at
				221	the start and one at the end of the string. Now skip the
				222	two at the start. */
				223	s += 2;
				224	len -= 2;
				225	/* And check that the last two match. */
				226	if (s[--len] != quote \|\| s[--len] != quote) {
				227	PyErr_BadInternalCall();
				228	return -1;
				229	}
				230	}
				231
				232	if (fmode) {
				233	/* Just return the bytes. The caller will parse the resulting
				234	string. */
				235	*fstr = s;
				236	*fstrlen = len;
				237	return 0;
				238	}
				239
				240	/* Not an f-string. */
				241	/* Avoid invoking escape decoding routines if possible. */
				242	rawmode = rawmode \|\| strchr(s, '\\') == NULL;
				243	if (*bytesmode) {
				244	/* Disallow non-ASCII characters. */
				245	const char *ch;
				246	for (ch = s; *ch; ch++) {
				247	if (Py_CHARMASK(*ch) >= 0x80) {
				248	RAISE_SYNTAX_ERROR(
				249	"bytes can only contain ASCII "
				250	"literal characters.");
				251	return -1;
				252	}
				253	}
				254	if (*rawmode) {
				255	*result = PyBytes_FromStringAndSize(s, len);
				256	}
				257	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	258	*result = decode_bytes_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	259	}
				260	}
				261	else {
				262	if (*rawmode) {
				263	*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
				264	}
				265	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	266	*result = decode_unicode_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	267	}
				268	}
				269	return *result == NULL ? -1 : 0;
				270	}
				271
				272
				273
				274	// FSTRING STUFF
				275
				276	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
				277	static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
				278
				279
				280	static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
				281	if (parent->lineno < n->lineno) {
				282	col = 0;
				283	}
				284	fstring_shift_expr_locations(n, line, col);
				285	}
				286
				287	static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
				288	if (parent->lineno < n->lineno) {
				289	col = 0;
				290	}
				291	fstring_shift_argument(parent, n, line, col);
				292	}
				293
				294	static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	295	for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	296	expr_ty expr = asdl_seq_GET(seq, i);
				297	if (expr == NULL){
				298	continue;
				299	}
				300	shift_expr(parent, expr, lineno, col_offset);
				301	}
				302	}
				303
				304	static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
				305	switch (slice->kind) {
				306	case Slice_kind:
				307	if (slice->v.Slice.lower) {
				308	shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
				309	}
				310	if (slice->v.Slice.upper) {
				311	shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
				312	}
				313	if (slice->v.Slice.step) {
				314	shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
				315	}
				316	break;
				317	case Tuple_kind:
				318	fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
				319	break;
				320	default:
				321	break;
				322	}
				323	}
				324
				325	static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
				326	shift_expr(parent, comp->target, lineno, col_offset);
				327	shift_expr(parent, comp->iter, lineno, col_offset);
				328	fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
				329	}
				330
				331	static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
				332	if (arg->annotation != NULL){
				333	shift_expr(parent, arg->annotation, lineno, col_offset);
				334	}
				335	arg->col_offset = arg->col_offset + col_offset;
				336	arg->end_col_offset = arg->end_col_offset + col_offset;
				337	arg->lineno = arg->lineno + lineno;
				338	arg->end_lineno = arg->end_lineno + lineno;
				339	}
				340
				341	static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	342	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	343	arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
				344	shift_arg(parent, arg, lineno, col_offset);
				345	}
				346
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	347	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	348	arg_ty arg = asdl_seq_GET(args->args, i);
				349	shift_arg(parent, arg, lineno, col_offset);
				350	}
				351
				352	if (args->vararg != NULL) {
				353	shift_arg(parent, args->vararg, lineno, col_offset);
				354	}
				355
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	356	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	357	arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
				358	shift_arg(parent, arg, lineno, col_offset);
				359	}
				360
				361	fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
				362
				363	if (args->kwarg != NULL) {
				364	shift_arg(parent, args->kwarg, lineno, col_offset);
				365	}
				366
				367	fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
				368	}
				369
				370	static void fstring_shift_children_locations(expr_ty n, int lineno, int col_offset) {
				371	switch (n->kind) {
				372	case BoolOp_kind:
				373	fstring_shift_seq_locations(n, n->v.BoolOp.values, lineno, col_offset);
				374	break;
				375	case NamedExpr_kind:
				376	shift_expr(n, n->v.NamedExpr.target, lineno, col_offset);
				377	shift_expr(n, n->v.NamedExpr.value, lineno, col_offset);
				378	break;
				379	case BinOp_kind:
				380	shift_expr(n, n->v.BinOp.left, lineno, col_offset);
				381	shift_expr(n, n->v.BinOp.right, lineno, col_offset);
				382	break;
				383	case UnaryOp_kind:
				384	shift_expr(n, n->v.UnaryOp.operand, lineno, col_offset);
				385	break;
				386	case Lambda_kind:
				387	fstring_shift_arguments(n, n->v.Lambda.args, lineno, col_offset);
				388	shift_expr(n, n->v.Lambda.body, lineno, col_offset);
				389	break;
				390	case IfExp_kind:
				391	shift_expr(n, n->v.IfExp.test, lineno, col_offset);
				392	shift_expr(n, n->v.IfExp.body, lineno, col_offset);
				393	shift_expr(n, n->v.IfExp.orelse, lineno, col_offset);
				394	break;
				395	case Dict_kind:
				396	fstring_shift_seq_locations(n, n->v.Dict.keys, lineno, col_offset);
				397	fstring_shift_seq_locations(n, n->v.Dict.values, lineno, col_offset);
				398	break;
				399	case Set_kind:
				400	fstring_shift_seq_locations(n, n->v.Set.elts, lineno, col_offset);
				401	break;
				402	case ListComp_kind:
				403	shift_expr(n, n->v.ListComp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	404	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.ListComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	405	comprehension_ty comp = asdl_seq_GET(n->v.ListComp.generators, i);
				406	fstring_shift_comprehension(n, comp, lineno, col_offset);
				407	}
				408	break;
				409	case SetComp_kind:
				410	shift_expr(n, n->v.SetComp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	411	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.SetComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	412	comprehension_ty comp = asdl_seq_GET(n->v.SetComp.generators, i);
				413	fstring_shift_comprehension(n, comp, lineno, col_offset);
				414	}
				415	break;
				416	case DictComp_kind:
				417	shift_expr(n, n->v.DictComp.key, lineno, col_offset);
				418	shift_expr(n, n->v.DictComp.value, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	419	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.DictComp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	420	comprehension_ty comp = asdl_seq_GET(n->v.DictComp.generators, i);
				421	fstring_shift_comprehension(n, comp, lineno, col_offset);
				422	}
				423	break;
				424	case GeneratorExp_kind:
				425	shift_expr(n, n->v.GeneratorExp.elt, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	426	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.GeneratorExp.generators); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	427	comprehension_ty comp = asdl_seq_GET(n->v.GeneratorExp.generators, i);
				428	fstring_shift_comprehension(n, comp, lineno, col_offset);
				429	}
				430	break;
				431	case Await_kind:
				432	shift_expr(n, n->v.Await.value, lineno, col_offset);
				433	break;
				434	case Yield_kind:
				435	shift_expr(n, n->v.Yield.value, lineno, col_offset);
				436	break;
				437	case YieldFrom_kind:
				438	shift_expr(n, n->v.YieldFrom.value, lineno, col_offset);
				439	break;
				440	case Compare_kind:
				441	shift_expr(n, n->v.Compare.left, lineno, col_offset);
				442	fstring_shift_seq_locations(n, n->v.Compare.comparators, lineno, col_offset);
				443	break;
				444	case Call_kind:
				445	shift_expr(n, n->v.Call.func, lineno, col_offset);
				446	fstring_shift_seq_locations(n, n->v.Call.args, lineno, col_offset);
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	447	for (Py_ssize_t i = 0, l = asdl_seq_LEN(n->v.Call.keywords); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	448	keyword_ty keyword = asdl_seq_GET(n->v.Call.keywords, i);
				449	shift_expr(n, keyword->value, lineno, col_offset);
				450	}
				451	break;
				452	case Attribute_kind:
				453	shift_expr(n, n->v.Attribute.value, lineno, col_offset);
				454	break;
				455	case Subscript_kind:
				456	shift_expr(n, n->v.Subscript.value, lineno, col_offset);
				457	fstring_shift_slice_locations(n, n->v.Subscript.slice, lineno, col_offset);
				458	shift_expr(n, n->v.Subscript.slice, lineno, col_offset);
				459	break;
				460	case Starred_kind:
				461	shift_expr(n, n->v.Starred.value, lineno, col_offset);
				462	break;
				463	case List_kind:
				464	fstring_shift_seq_locations(n, n->v.List.elts, lineno, col_offset);
				465	break;
				466	case Tuple_kind:
				467	fstring_shift_seq_locations(n, n->v.Tuple.elts, lineno, col_offset);
				468	break;
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	469	case JoinedStr_kind:
				470	fstring_shift_seq_locations(n, n->v.JoinedStr.values, lineno, col_offset);
				471	break;
				472	case FormattedValue_kind:
				473	shift_expr(n, n->v.FormattedValue.value, lineno, col_offset);
				474	if (n->v.FormattedValue.format_spec) {
				475	shift_expr(n, n->v.FormattedValue.format_spec, lineno, col_offset);
				476	}
				477	break;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	478	default:
				479	return;
				480	}
				481	}
				482
				483	/* Shift locations for the given node and all its children by adding `lineno`
				484	and `col_offset` to existing locations. Note that n is the already parsed
				485	expression. */
				486	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
				487	{
				488	n->col_offset = n->col_offset + col_offset;
				489
				490	// The following is needed, in order for nodes spanning across multiple lines
				491	// to be shifted correctly. An example of such a node is a Call node, the closing
				492	// parenthesis of which is not on the same line as its name.
				493	if (n->lineno == n->end_lineno) {
				494	n->end_col_offset = n->end_col_offset + col_offset;
				495	}
				496
				497	fstring_shift_children_locations(n, lineno, col_offset);
				498	n->lineno = n->lineno + lineno;
				499	n->end_lineno = n->end_lineno + lineno;
				500	}
				501
				502	/* Fix locations for the given node and its children.
				503
				504	`parent` is the enclosing node.
				505	`n` is the node which locations are going to be fixed relative to parent.
				506	`expr_str` is the child node's string representation, including braces.
				507	*/
				508	static void
				509	fstring_fix_expr_location(Token parent, expr_ty n, char expr_str)
				510	{
				511	char *substr = NULL;
				512	char *start;
				513	int lines = 0;
				514	int cols = 0;
				515
				516	if (parent && parent->bytes) {
				517	char *parent_str = PyBytes_AsString(parent->bytes);
				518	if (!parent_str) {
				519	return;
				520	}
				521	substr = strstr(parent_str, expr_str);
				522	if (substr) {
				523	// The following is needed, in order to correctly shift the column
				524	// offset, in the case that (disregarding any whitespace) a newline
				525	// immediately follows the opening curly brace of the fstring expression.
				526	int newline_after_brace = 1;
				527	start = substr + 1;
				528	while (start && start != '}' && start != '\n') {
				529	if (start != ' ' && start != '\t' && *start != '\f') {
				530	newline_after_brace = 0;
				531	break;
				532	}
				533	start++;
				534	}
				535
				536	// Account for the characters from the last newline character to our
				537	// left until the beginning of substr.
				538	if (!newline_after_brace) {
				539	start = substr;
				540	while (start > parent_str && *start != '\n') {
				541	start--;
				542	}
				543	cols += (int)(substr - start);
				544	}
				545	/* adjust the start based on the number of newlines encountered
				546	before the f-string expression */
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	547	for (char* p = parent_str; p < substr; p++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	548	if (*p == '\n') {
				549	lines++;
				550	}
				551	}
				552	}
				553	}
				554	fstring_shift_expr_locations(n, lines, cols);
				555	}
				556
				557
				558	/* Compile this expression in to an expr_ty. Add parens around the
				559	expression, in order to allow leading spaces in the expression. */
				560	static expr_ty
				561	fstring_compile_expr(Parser p, const char expr_start, const char *expr_end,
				562	Token *t)
				563	{
				564	expr_ty expr = NULL;
				565	char *str;
				566	Py_ssize_t len;
				567	const char *s;
				568	expr_ty result = NULL;
				569
				570	assert(expr_end >= expr_start);
				571	assert(*(expr_start-1) == '{');
				572	assert(expr_end == '}' \|\| expr_end == '!' \|\| *expr_end == ':' \|\|
				573	*expr_end == '=');
				574
				575	/* If the substring is all whitespace, it's an error. We need to catch this
				576	here, and not when we call PyParser_SimpleParseStringFlagsFilename,
				577	because turning the expression '' in to '()' would go from being invalid
				578	to valid. */
				579	for (s = expr_start; s != expr_end; s++) {
				580	char c = *s;
				581	/* The Python parser ignores only the following whitespace
				582	characters (\r already is converted to \n). */
				583	if (!(c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\f')) {
				584	break;
				585	}
				586	}
				587	if (s == expr_end) {
				588	RAISE_SYNTAX_ERROR("f-string: empty expression not allowed");
				589	return NULL;
				590	}
				591
				592	len = expr_end - expr_start;
				593	/* Allocate 3 extra bytes: open paren, close paren, null byte. */
				594	str = PyMem_RawMalloc(len + 3);
				595	if (str == NULL) {
				596	PyErr_NoMemory();
				597	return NULL;
				598	}
				599
				600	str[0] = '(';
				601	memcpy(str+1, expr_start, len);
				602	str[len+1] = ')';
				603	str[len+2] = 0;
				604
				605	struct tok_state* tok = PyTokenizer_FromString(str, 1);
				606	if (tok == NULL) {
				607	return NULL;
				608	}
Lysandros Nikolaou	791a46e	2020-05-26 04:24:31 +0300	[diff] [blame]	609	Py_INCREF(p->tok->filename);
				610	tok->filename = p->tok->filename;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	611
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	612	Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
				613	NULL, p->arena);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	614	p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
				615	p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
				616	? p->starting_col_offset + t->col_offset : 0;
				617
				618	expr = _PyPegen_run_parser(p2);
				619
				620	if (expr == NULL) {
				621	goto exit;
				622	}
				623
				624	/* Reuse str to find the correct column offset. */
				625	str[0] = '{';
				626	str[len+1] = '}';
				627	fstring_fix_expr_location(t, expr, str);
				628
				629	result = expr;
				630
				631	exit:
				632	_PyPegen_Parser_Free(p2);
				633	PyTokenizer_Free(tok);
				634	return result;
				635	}
				636
				637	/* Return -1 on error.
				638
				639	Return 0 if we reached the end of the literal.
				640
				641	Return 1 if we haven't reached the end of the literal, but we want
				642	the caller to process the literal up to this point. Used for
				643	doubled braces.
				644	*/
				645	static int
				646	fstring_find_literal(Parser p, const char str, const char end, int raw,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	647	PyObject *literal, int recurse_lvl, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	648	{
				649	/* Get any literal string. It ends when we hit an un-doubled left
				650	brace (which isn't part of a unicode name escape such as
				651	"\N{EULER CONSTANT}"), or the end of the string. */
				652
				653	const char s = str;
				654	const char *literal_start = s;
				655	int result = 0;
				656
				657	assert(*literal == NULL);
				658	while (s < end) {
				659	char ch = *s++;
				660	if (!raw && ch == '\\' && s < end) {
				661	ch = *s++;
				662	if (ch == 'N') {
				663	if (s < end && *s++ == '{') {
				664	while (s < end && *s++ != '}') {
				665	}
				666	continue;
				667	}
				668	break;
				669	}
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	670	if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	671	return -1;
				672	}
				673	}
				674	if (ch == '{' \|\| ch == '}') {
				675	/* Check for doubled braces, but only at the top level. If
				676	we checked at every level, then f'{0:{3}}' would fail
				677	with the two closing braces. */
				678	if (recurse_lvl == 0) {
				679	if (s < end && *s == ch) {
				680	/* We're going to tell the caller that the literal ends
				681	here, but that they should continue scanning. But also
				682	skip over the second brace when we resume scanning. */
				683	*str = s + 1;
				684	result = 1;
				685	goto done;
				686	}
				687
				688	/* Where a single '{' is the start of a new expression, a
				689	single '}' is not allowed. */
				690	if (ch == '}') {
				691	*str = s - 1;
				692	RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed");
				693	return -1;
				694	}
				695	}
				696	/* We're either at a '{', which means we're starting another
				697	expression; or a '}', which means we're at the end of this
				698	f-string (for a nested format_spec). */
				699	s--;
				700	break;
				701	}
				702	}
				703	*str = s;
				704	assert(s <= end);
				705	assert(s == end \|\| s == '{' \|\| s == '}');
				706	done:
				707	if (literal_start != s) {
				708	if (raw)
				709	*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
				710	s - literal_start,
				711	NULL, NULL);
				712	else
				713	*literal = decode_unicode_with_escapes(p, literal_start,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	714	s - literal_start, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	715	if (!*literal)
				716	return -1;
				717	}
				718	return result;
				719	}
				720
				721	/* Forward declaration because parsing is recursive. */
				722	static expr_ty
				723	fstring_parse(Parser p, const char str, const char end, int raw, int recurse_lvl,
				724	Token first_token, Token t, Token *last_token);
				725
				726	/* Parse the f-string at str, ending at end. We know str starts an
				727	expression (so it must be a '{'). Returns the FormattedValue node, which
				728	includes the expression, conversion character, format_spec expression, and
				729	optionally the text of the expression (if = is used).
				730
				731	Note that I don't do a perfect job here: I don't make sure that a
				732	closing brace doesn't match an opening paren, for example. It
				733	doesn't need to error on all invalid expressions, just correctly
				734	find the end of all valid ones. Any errors inside the expression
				735	will be caught when we parse it later.
				736
				737	*expression is set to the expression. For an '=' "debug" expression,
				738	*expr_text is set to the debug text (the original text of the expression,
				739	including the '=' and any whitespace around it, as a string object). If
				740	not a debug expression, expr_text set to NULL. /
				741	static int
				742	fstring_find_expr(Parser p, const char str, const char end, int raw, int recurse_lvl,
				743	PyObject *expr_text, expr_ty expression, Token *first_token,
				744	Token t, Token last_token)
				745	{
				746	/* Return -1 on error, else 0. */
				747
				748	const char *expr_start;
				749	const char *expr_end;
				750	expr_ty simple_expression;
				751	expr_ty format_spec = NULL; /* Optional format specifier. */
				752	int conversion = -1; /* The conversion char. Use default if not
				753	specified, or !r if using = and no format
				754	spec. */
				755
				756	/* 0 if we're not in a string, else the quote char we're trying to
				757	match (single or double quote). */
				758	char quote_char = 0;
				759
				760	/* If we're inside a string, 1=normal, 3=triple-quoted. */
				761	int string_type = 0;
				762
				763	/* Keep track of nesting level for braces/parens/brackets in
				764	expressions. */
				765	Py_ssize_t nested_depth = 0;
				766	char parenstack[MAXLEVEL];
				767
				768	*expr_text = NULL;
				769
				770	/* Can only nest one level deep. */
				771	if (recurse_lvl >= 2) {
				772	RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply");
				773	goto error;
				774	}
				775
				776	/* The first char must be a left brace, or we wouldn't have gotten
				777	here. Skip over it. */
				778	assert(**str == '{');
				779	*str += 1;
				780
				781	expr_start = *str;
				782	for (; str < end; (str)++) {
				783	char ch;
				784
				785	/* Loop invariants. */
				786	assert(nested_depth >= 0);
				787	assert(str >= expr_start && str < end);
				788	if (quote_char)
				789	assert(string_type == 1 \|\| string_type == 3);
				790	else
				791	assert(string_type == 0);
				792
				793	ch = **str;
				794	/* Nowhere inside an expression is a backslash allowed. */
				795	if (ch == '\\') {
				796	/* Error: can't include a backslash character, inside
				797	parens or strings or not. */
				798	RAISE_SYNTAX_ERROR(
				799	"f-string expression part "
				800	"cannot include a backslash");
				801	goto error;
				802	}
				803	if (quote_char) {
				804	/* We're inside a string. See if we're at the end. */
				805	/* This code needs to implement the same non-error logic
				806	as tok_get from tokenizer.c, at the letter_quote
				807	label. To actually share that code would be a
				808	nightmare. But, it's unlikely to change and is small,
				809	so duplicate it here. Note we don't need to catch all
				810	of the errors, since they'll be caught when parsing the
				811	expression. We just need to match the non-error
				812	cases. Thus we can ignore \n in single-quoted strings,
				813	for example. Or non-terminated strings. */
				814	if (ch == quote_char) {
				815	/* Does this match the string_type (single or triple
				816	quoted)? */
				817	if (string_type == 3) {
				818	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				819	/* We're at the end of a triple quoted string. */
				820	*str += 2;
				821	string_type = 0;
				822	quote_char = 0;
				823	continue;
				824	}
				825	} else {
				826	/* We're at the end of a normal string. */
				827	quote_char = 0;
				828	string_type = 0;
				829	continue;
				830	}
				831	}
				832	} else if (ch == '\'' \|\| ch == '"') {
				833	/* Is this a triple quoted string? */
				834	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				835	string_type = 3;
				836	*str += 2;
				837	} else {
				838	/* Start of a normal string. */
				839	string_type = 1;
				840	}
				841	/* Start looking for the end of the string. */
				842	quote_char = ch;
				843	} else if (ch == '[' \|\| ch == '{' \|\| ch == '(') {
				844	if (nested_depth >= MAXLEVEL) {
				845	RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis");
				846	goto error;
				847	}
				848	parenstack[nested_depth] = ch;
				849	nested_depth++;
				850	} else if (ch == '#') {
				851	/* Error: can't include a comment character, inside parens
				852	or not. */
				853	RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'");
				854	goto error;
				855	} else if (nested_depth == 0 &&
				856	(ch == '!' \|\| ch == ':' \|\| ch == '}' \|\|
				857	ch == '=' \|\| ch == '>' \|\| ch == '<')) {
				858	/* See if there's a next character. */
				859	if (*str+1 < end) {
				860	char next = (str+1);
				861
				862	/* For "!=". since '=' is not an allowed conversion character,
				863	nothing is lost in this test. */
				864	if ((ch == '!' && next == '=') \|\| /* != */
				865	(ch == '=' && next == '=') \|\| /* == */
				866	(ch == '<' && next == '=') \|\| /* <= */
				867	(ch == '>' && next == '=') /* >= */
				868	) {
				869	*str += 1;
				870	continue;
				871	}
				872	/* Don't get out of the loop for these, if they're single
				873	chars (not part of 2-char tokens). If by themselves, they
				874	don't end an expression (unlike say '!'). */
				875	if (ch == '>' \|\| ch == '<') {
				876	continue;
				877	}
				878	}
				879
				880	/* Normal way out of this loop. */
				881	break;
				882	} else if (ch == ']' \|\| ch == '}' \|\| ch == ')') {
				883	if (!nested_depth) {
				884	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch);
				885	goto error;
				886	}
				887	nested_depth--;
				888	int opening = parenstack[nested_depth];
				889	if (!((opening == '(' && ch == ')') \|\|
				890	(opening == '[' && ch == ']') \|\|
				891	(opening == '{' && ch == '}')))
				892	{
				893	RAISE_SYNTAX_ERROR(
				894	"f-string: closing parenthesis '%c' "
				895	"does not match opening parenthesis '%c'",
				896	ch, opening);
				897	goto error;
				898	}
				899	} else {
				900	/* Just consume this char and loop around. */
				901	}
				902	}
				903	expr_end = *str;
				904	/* If we leave this loop in a string or with mismatched parens, we
				905	don't care. We'll get a syntax error when compiling the
				906	expression. But, we can produce a better error message, so
				907	let's just do that.*/
				908	if (quote_char) {
				909	RAISE_SYNTAX_ERROR("f-string: unterminated string");
				910	goto error;
				911	}
				912	if (nested_depth) {
				913	int opening = parenstack[nested_depth - 1];
				914	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening);
				915	goto error;
				916	}
				917
				918	if (*str >= end)
				919	goto unexpected_end_of_string;
				920
				921	/* Compile the expression as soon as possible, so we show errors
				922	related to the expression before errors related to the
				923	conversion or format_spec. */
				924	simple_expression = fstring_compile_expr(p, expr_start, expr_end, t);
				925	if (!simple_expression)
				926	goto error;
				927
				928	/* Check for =, which puts the text value of the expression in
				929	expr_text. */
				930	if (**str == '=') {
Pablo Galindo	9b83829	2020-05-27 22:01:11 +0100	[diff] [blame^]	931	if (p->feature_version < 8) {
				932	RAISE_SYNTAX_ERROR("f-string: self documenting expressions are "
				933	"only supported in Python 3.8 and greater");
				934	goto error;
				935	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	936	*str += 1;
				937
				938	/* Skip over ASCII whitespace. No need to test for end of string
				939	here, since we know there's at least a trailing quote somewhere
				940	ahead. */
				941	while (Py_ISSPACE(**str)) {
				942	*str += 1;
				943	}
				944
				945	/* Set expr_text to the text of the expression. /
				946	expr_text = PyUnicode_FromStringAndSize(expr_start, str-expr_start);
				947	if (!*expr_text) {
				948	goto error;
				949	}
				950	}
				951
				952	/* Check for a conversion char, if present. */
				953	if (**str == '!') {
				954	*str += 1;
				955	if (*str >= end)
				956	goto unexpected_end_of_string;
				957
				958	conversion = **str;
				959	*str += 1;
				960
				961	/* Validate the conversion. */
				962	if (!(conversion == 's' \|\| conversion == 'r' \|\| conversion == 'a')) {
				963	RAISE_SYNTAX_ERROR(
				964	"f-string: invalid conversion character: "
				965	"expected 's', 'r', or 'a'");
				966	goto error;
				967	}
				968
				969	}
				970
				971	/* Check for the format spec, if present. */
				972	if (*str >= end)
				973	goto unexpected_end_of_string;
				974	if (**str == ':') {
				975	*str += 1;
				976	if (*str >= end)
				977	goto unexpected_end_of_string;
				978
				979	/* Parse the format spec. */
				980	format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1,
				981	first_token, t, last_token);
				982	if (!format_spec)
				983	goto error;
				984	}
				985
				986	if (str >= end \|\| *str != '}')
				987	goto unexpected_end_of_string;
				988
				989	/* We're at a right brace. Consume it. */
				990	assert(*str < end);
				991	assert(**str == '}');
				992	*str += 1;
				993
				994	/* If we're in = mode (detected by non-NULL expr_text), and have no format
				995	spec and no explicit conversion, set the conversion to 'r'. */
				996	if (*expr_text && format_spec == NULL && conversion == -1) {
				997	conversion = 'r';
				998	}
				999
				1000	/* And now create the FormattedValue node that represents this
				1001	entire expression with the conversion and format spec. */
				1002	//TODO: Fix this
				1003	*expression = FormattedValue(simple_expression, conversion,
				1004	format_spec, first_token->lineno,
				1005	first_token->col_offset, last_token->end_lineno,
				1006	last_token->end_col_offset, p->arena);
				1007	if (!*expression)
				1008	goto error;
				1009
				1010	return 0;
				1011
				1012	unexpected_end_of_string:
				1013	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1014	/* Falls through to error. */
				1015
				1016	error:
				1017	Py_XDECREF(*expr_text);
				1018	return -1;
				1019
				1020	}
				1021
				1022	/* Return -1 on error.
				1023
				1024	Return 0 if we have a literal (possible zero length) and an
				1025	expression (zero length if at the end of the string.
				1026
				1027	Return 1 if we have a literal, but no expression, and we want the
				1028	caller to call us again. This is used to deal with doubled
				1029	braces.
				1030
				1031	When called multiple times on the string 'a{{b{0}c', this function
				1032	will return:
				1033
				1034	1. the literal 'a{' with no expression, and a return value
				1035	of 1. Despite the fact that there's no expression, the return
				1036	value of 1 means we're not finished yet.
				1037
				1038	2. the literal 'b' and the expression '0', with a return value of
				1039	0. The fact that there's an expression means we're not finished.
				1040
				1041	3. literal 'c' with no expression and a return value of 0. The
				1042	combination of the return value of 0 with no expression means
				1043	we're finished.
				1044	*/
				1045	static int
				1046	fstring_find_literal_and_expr(Parser p, const char str, const char end, int raw,
				1047	int recurse_lvl, PyObject **literal,
				1048	PyObject *expr_text, expr_ty expression,
				1049	Token first_token, Token t, Token *last_token)
				1050	{
				1051	int result;
				1052
				1053	assert(literal == NULL && expression == NULL);
				1054
				1055	/* Get any literal string. */
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	1056	result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1057	if (result < 0)
				1058	goto error;
				1059
				1060	assert(result == 0 \|\| result == 1);
				1061
				1062	if (result == 1)
				1063	/* We have a literal, but don't look at the expression. */
				1064	return 1;
				1065
				1066	if (str >= end \|\| *str == '}')
				1067	/* We're at the end of the string or the end of a nested
				1068	f-string: no expression. The top-level error case where we
				1069	expect to be at the end of the string but we're at a '}' is
				1070	handled later. */
				1071	return 0;
				1072
				1073	/* We must now be the start of an expression, on a '{'. */
				1074	assert(**str == '{');
				1075
				1076	if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text,
				1077	expression, first_token, t, last_token) < 0)
				1078	goto error;
				1079
				1080	return 0;
				1081
				1082	error:
				1083	Py_CLEAR(*literal);
				1084	return -1;
				1085	}
				1086
				1087	#ifdef NDEBUG
				1088	#define ExprList_check_invariants(l)
				1089	#else
				1090	static void
				1091	ExprList_check_invariants(ExprList *l)
				1092	{
				1093	/* Check our invariants. Make sure this object is "live", and
				1094	hasn't been deallocated. */
				1095	assert(l->size >= 0);
				1096	assert(l->p != NULL);
				1097	if (l->size <= EXPRLIST_N_CACHED)
				1098	assert(l->data == l->p);
				1099	}
				1100	#endif
				1101
				1102	static void
				1103	ExprList_Init(ExprList *l)
				1104	{
				1105	l->allocated = EXPRLIST_N_CACHED;
				1106	l->size = 0;
				1107
				1108	/* Until we start allocating dynamically, p points to data. */
				1109	l->p = l->data;
				1110
				1111	ExprList_check_invariants(l);
				1112	}
				1113
				1114	static int
				1115	ExprList_Append(ExprList *l, expr_ty exp)
				1116	{
				1117	ExprList_check_invariants(l);
				1118	if (l->size >= l->allocated) {
				1119	/* We need to alloc (or realloc) the memory. */
				1120	Py_ssize_t new_size = l->allocated * 2;
				1121
				1122	/* See if we've ever allocated anything dynamically. */
				1123	if (l->p == l->data) {
				1124	Py_ssize_t i;
				1125	/* We're still using the cached data. Switch to
				1126	alloc-ing. */
				1127	l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
				1128	if (!l->p)
				1129	return -1;
				1130	/* Copy the cached data into the new buffer. */
				1131	for (i = 0; i < l->size; i++)
				1132	l->p[i] = l->data[i];
				1133	} else {
				1134	/* Just realloc. */
				1135	expr_ty tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) new_size);
				1136	if (!tmp) {
				1137	PyMem_RawFree(l->p);
				1138	l->p = NULL;
				1139	return -1;
				1140	}
				1141	l->p = tmp;
				1142	}
				1143
				1144	l->allocated = new_size;
				1145	assert(l->allocated == 2 * l->size);
				1146	}
				1147
				1148	l->p[l->size++] = exp;
				1149
				1150	ExprList_check_invariants(l);
				1151	return 0;
				1152	}
				1153
				1154	static void
				1155	ExprList_Dealloc(ExprList *l)
				1156	{
				1157	ExprList_check_invariants(l);
				1158
				1159	/* If there's been an error, or we've never dynamically allocated,
				1160	do nothing. */
				1161	if (!l->p \|\| l->p == l->data) {
				1162	/* Do nothing. */
				1163	} else {
				1164	/* We have dynamically allocated. Free the memory. */
				1165	PyMem_RawFree(l->p);
				1166	}
				1167	l->p = NULL;
				1168	l->size = -1;
				1169	}
				1170
				1171	static asdl_seq *
				1172	ExprList_Finish(ExprList l, PyArena arena)
				1173	{
				1174	asdl_seq *seq;
				1175
				1176	ExprList_check_invariants(l);
				1177
				1178	/* Allocate the asdl_seq and copy the expressions in to it. */
				1179	seq = _Py_asdl_seq_new(l->size, arena);
				1180	if (seq) {
				1181	Py_ssize_t i;
				1182	for (i = 0; i < l->size; i++)
				1183	asdl_seq_SET(seq, i, l->p[i]);
				1184	}
				1185	ExprList_Dealloc(l);
				1186	return seq;
				1187	}
				1188
				1189	#ifdef NDEBUG
				1190	#define FstringParser_check_invariants(state)
				1191	#else
				1192	static void
				1193	FstringParser_check_invariants(FstringParser *state)
				1194	{
				1195	if (state->last_str)
				1196	assert(PyUnicode_CheckExact(state->last_str));
				1197	ExprList_check_invariants(&state->expr_list);
				1198	}
				1199	#endif
				1200
				1201	void
				1202	_PyPegen_FstringParser_Init(FstringParser *state)
				1203	{
				1204	state->last_str = NULL;
				1205	state->fmode = 0;
				1206	ExprList_Init(&state->expr_list);
				1207	FstringParser_check_invariants(state);
				1208	}
				1209
				1210	void
				1211	_PyPegen_FstringParser_Dealloc(FstringParser *state)
				1212	{
				1213	FstringParser_check_invariants(state);
				1214
				1215	Py_XDECREF(state->last_str);
				1216	ExprList_Dealloc(&state->expr_list);
				1217	}
				1218
				1219	/* Make a Constant node, but decref the PyUnicode object being added. */
				1220	static expr_ty
				1221	make_str_node_and_del(Parser p, PyObject str, Token first_token, Token *last_token)
				1222	{
				1223	PyObject s = str;
				1224	PyObject *kind = NULL;
				1225	*str = NULL;
				1226	assert(PyUnicode_CheckExact(s));
				1227	if (PyArena_AddPyObject(p->arena, s) < 0) {
				1228	Py_DECREF(s);
				1229	return NULL;
				1230	}
				1231	const char* the_str = PyBytes_AsString(first_token->bytes);
				1232	if (the_str && the_str[0] == 'u') {
				1233	kind = _PyPegen_new_identifier(p, "u");
				1234	}
				1235
				1236	if (kind == NULL && PyErr_Occurred()) {
				1237	return NULL;
				1238	}
				1239
				1240	return Constant(s, kind, first_token->lineno, first_token->col_offset,
				1241	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1242
				1243	}
				1244
				1245
				1246	/* Add a non-f-string (that is, a regular literal string). str is
				1247	decref'd. */
				1248	int
				1249	_PyPegen_FstringParser_ConcatAndDel(FstringParser state, PyObject str)
				1250	{
				1251	FstringParser_check_invariants(state);
				1252
				1253	assert(PyUnicode_CheckExact(str));
				1254
				1255	if (PyUnicode_GET_LENGTH(str) == 0) {
				1256	Py_DECREF(str);
				1257	return 0;
				1258	}
				1259
				1260	if (!state->last_str) {
				1261	/* We didn't have a string before, so just remember this one. */
				1262	state->last_str = str;
				1263	} else {
				1264	/* Concatenate this with the previous string. */
				1265	PyUnicode_AppendAndDel(&state->last_str, str);
				1266	if (!state->last_str)
				1267	return -1;
				1268	}
				1269	FstringParser_check_invariants(state);
				1270	return 0;
				1271	}
				1272
				1273	/* Parse an f-string. The f-string is in *str to end, with no
				1274	'f' or quotes. */
				1275	int
				1276	_PyPegen_FstringParser_ConcatFstring(Parser p, FstringParser state, const char **str,
				1277	const char *end, int raw, int recurse_lvl,
				1278	Token first_token, Token t, Token *last_token)
				1279	{
				1280	FstringParser_check_invariants(state);
				1281	state->fmode = 1;
				1282
				1283	/* Parse the f-string. */
				1284	while (1) {
				1285	PyObject *literal = NULL;
				1286	PyObject *expr_text = NULL;
				1287	expr_ty expression = NULL;
				1288
				1289	/* If there's a zero length literal in front of the
				1290	expression, literal will be NULL. If we're at the end of
				1291	the f-string, expression will be NULL (unless result == 1,
				1292	see below). */
				1293	int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl,
				1294	&literal, &expr_text,
				1295	&expression, first_token, t, last_token);
				1296	if (result < 0)
				1297	return -1;
				1298
				1299	/* Add the literal, if any. */
				1300	if (literal && _PyPegen_FstringParser_ConcatAndDel(state, literal) < 0) {
				1301	Py_XDECREF(expr_text);
				1302	return -1;
				1303	}
				1304	/* Add the expr_text, if any. */
				1305	if (expr_text && _PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 0) {
				1306	return -1;
				1307	}
				1308
				1309	/* We've dealt with the literal and expr_text, their ownership has
				1310	been transferred to the state object. Don't look at them again. */
				1311
				1312	/* See if we should just loop around to get the next literal
				1313	and expression, while ignoring the expression this
				1314	time. This is used for un-doubling braces, as an
				1315	optimization. */
				1316	if (result == 1)
				1317	continue;
				1318
				1319	if (!expression)
				1320	/* We're done with this f-string. */
				1321	break;
				1322
				1323	/* We know we have an expression. Convert any existing string
				1324	to a Constant node. */
				1325	if (!state->last_str) {
				1326	/* Do nothing. No previous literal. */
				1327	} else {
				1328	/* Convert the existing last_str literal to a Constant node. */
				1329	expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1330	if (!str \|\| ExprList_Append(&state->expr_list, str) < 0)
				1331	return -1;
				1332	}
				1333
				1334	if (ExprList_Append(&state->expr_list, expression) < 0)
				1335	return -1;
				1336	}
				1337
				1338	/* If recurse_lvl is zero, then we must be at the end of the
				1339	string. Otherwise, we must be at a right brace. */
				1340
				1341	if (recurse_lvl == 0 && *str < end-1) {
				1342	RAISE_SYNTAX_ERROR("f-string: unexpected end of string");
				1343	return -1;
				1344	}
				1345	if (recurse_lvl != 0 && **str != '}') {
				1346	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1347	return -1;
				1348	}
				1349
				1350	FstringParser_check_invariants(state);
				1351	return 0;
				1352	}
				1353
				1354	/* Convert the partial state reflected in last_str and expr_list to an
				1355	expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
				1356	expr_ty
				1357	_PyPegen_FstringParser_Finish(Parser p, FstringParser state, Token* first_token,
				1358	Token *last_token)
				1359	{
				1360	asdl_seq *seq;
				1361
				1362	FstringParser_check_invariants(state);
				1363
				1364	/* If we're just a constant string with no expressions, return
				1365	that. */
				1366	if (!state->fmode) {
				1367	assert(!state->expr_list.size);
				1368	if (!state->last_str) {
				1369	/* Create a zero length string. */
				1370	state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
				1371	if (!state->last_str)
				1372	goto error;
				1373	}
				1374	return make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1375	}
				1376
				1377	/* Create a Constant node out of last_str, if needed. It will be the
				1378	last node in our expression list. */
				1379	if (state->last_str) {
				1380	expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1381	if (!str \|\| ExprList_Append(&state->expr_list, str) < 0)
				1382	goto error;
				1383	}
				1384	/* This has already been freed. */
				1385	assert(state->last_str == NULL);
				1386
				1387	seq = ExprList_Finish(&state->expr_list, p->arena);
				1388	if (!seq)
				1389	goto error;
				1390
				1391	return _Py_JoinedStr(seq, first_token->lineno, first_token->col_offset,
				1392	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1393
				1394	error:
				1395	_PyPegen_FstringParser_Dealloc(state);
				1396	return NULL;
				1397	}
				1398
				1399	/* Given an f-string (with no 'f' or quotes) that's in *str and ends
				1400	at end, parse it into an expr_ty. Return NULL on error. Adjust
				1401	str to point past the parsed portion. */
				1402	static expr_ty
				1403	fstring_parse(Parser p, const char str, const char end, int raw,
				1404	int recurse_lvl, Token first_token, Token t, Token *last_token)
				1405	{
				1406	FstringParser state;
				1407
				1408	_PyPegen_FstringParser_Init(&state);
				1409	if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl,
				1410	first_token, t, last_token) < 0) {
				1411	_PyPegen_FstringParser_Dealloc(&state);
				1412	return NULL;
				1413	}
				1414
				1415	return _PyPegen_FstringParser_Finish(p, &state, t, t);
				1416	}