Blame - Parser/string_parser.c - platform/external/python/cpython3

blob: f8e2427276cd3c593b67a22260cb2565ece497ff [file] [log] [blame]

Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1	#include <Python.h>
				2
Pablo Galindo	1ed83ad	2020-06-11 17:30:46 +0100	[diff] [blame]	3	#include "tokenizer.h"
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	4	#include "pegen.h"
Pablo Galindo	1ed83ad	2020-06-11 17:30:46 +0100	[diff] [blame]	5	#include "string_parser.h"
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	6
				7	//// STRING HANDLING FUNCTIONS ////
				8
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	9	static int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	10	warn_invalid_escape_sequence(Parser p, unsigned char first_invalid_escape_char, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	11	{
				12	PyObject *msg =
				13	PyUnicode_FromFormat("invalid escape sequence \\%c", first_invalid_escape_char);
				14	if (msg == NULL) {
				15	return -1;
				16	}
				17	if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	18	t->lineno, NULL, NULL) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	19	if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
				20	/* Replace the DeprecationWarning exception with a SyntaxError
				21	to get a more accurate error report */
				22	PyErr_Clear();
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	23
				24	/* This is needed, in order for the SyntaxError to point to the token t,
				25	since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
				26	error location, if p->known_err_token is not set. */
				27	p->known_err_token = t;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	28	RAISE_SYNTAX_ERROR("invalid escape sequence \\%c", first_invalid_escape_char);
				29	}
				30	Py_DECREF(msg);
				31	return -1;
				32	}
				33	Py_DECREF(msg);
				34	return 0;
				35	}
				36
				37	static PyObject *
				38	decode_utf8(const char *sPtr, const char end)
				39	{
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	40	const char *s;
				41	const char *t;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	42	t = s = *sPtr;
				43	while (s < end && (*s & 0x80)) {
				44	s++;
				45	}
				46	*sPtr = s;
				47	return PyUnicode_DecodeUTF8(t, s - t, NULL);
				48	}
				49
				50	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	51	decode_unicode_with_escapes(Parser parser, const char s, size_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	52	{
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	53	PyObject *v;
				54	PyObject *u;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	55	char *buf;
				56	char *p;
				57	const char *end;
				58
				59	/* check for integer overflow */
				60	if (len > SIZE_MAX / 6) {
				61	return NULL;
				62	}
				63	/* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
				64	"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
				65	u = PyBytes_FromStringAndSize((char )NULL, len 6);
				66	if (u == NULL) {
				67	return NULL;
				68	}
				69	p = buf = PyBytes_AsString(u);
				70	end = s + len;
				71	while (s < end) {
				72	if (*s == '\\') {
				73	p++ = s++;
				74	if (s >= end \|\| *s & 0x80) {
				75	strcpy(p, "u005c");
				76	p += 5;
				77	if (s >= end) {
				78	break;
				79	}
				80	}
				81	}
				82	if (*s & 0x80) {
				83	PyObject *w;
				84	int kind;
				85	void *data;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	86	Py_ssize_t w_len;
				87	Py_ssize_t i;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	88	w = decode_utf8(&s, end);
				89	if (w == NULL) {
				90	Py_DECREF(u);
				91	return NULL;
				92	}
				93	kind = PyUnicode_KIND(w);
				94	data = PyUnicode_DATA(w);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	95	w_len = PyUnicode_GET_LENGTH(w);
				96	for (i = 0; i < w_len; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	97	Py_UCS4 chr = PyUnicode_READ(kind, data, i);
				98	sprintf(p, "\\U%08x", chr);
				99	p += 10;
				100	}
				101	/* Should be impossible to overflow */
				102	assert(p - buf <= PyBytes_GET_SIZE(u));
				103	Py_DECREF(w);
				104	}
				105	else {
				106	p++ = s++;
				107	}
				108	}
				109	len = p - buf;
				110	s = buf;
				111
				112	const char *first_invalid_escape;
				113	v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
				114
				115	if (v != NULL && first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	116	if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	117	/* We have not decref u before because first_invalid_escape points
				118	inside u. */
				119	Py_XDECREF(u);
				120	Py_DECREF(v);
				121	return NULL;
				122	}
				123	}
				124	Py_XDECREF(u);
				125	return v;
				126	}
				127
				128	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	129	decode_bytes_with_escapes(Parser p, const char s, Py_ssize_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	130	{
				131	const char *first_invalid_escape;
				132	PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
				133	if (result == NULL) {
				134	return NULL;
				135	}
				136
				137	if (first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	138	if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	139	Py_DECREF(result);
				140	return NULL;
				141	}
				142	}
				143	return result;
				144	}
				145
				146	/* s must include the bracketing quote characters, and r, b, u,
				147	&/or f prefixes (if any), and embedded escape sequences (if any).
				148	_PyPegen_parsestr parses it, and sets *result to decoded Python string object.
				149	If the string is an f-string, set fstr and fstrlen to the unparsed
				150	string object. Return 0 if no errors occurred. */
				151	int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	152	_PyPegen_parsestr(Parser p, int bytesmode, int rawmode, PyObject *result,
				153	const char *fstr, Py_ssize_t fstrlen, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	154	{
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	155	const char *s = PyBytes_AsString(t->bytes);
				156	if (s == NULL) {
				157	return -1;
				158	}
				159
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	160	size_t len;
				161	int quote = Py_CHARMASK(*s);
				162	int fmode = 0;
				163	*bytesmode = 0;
				164	*rawmode = 0;
				165	*result = NULL;
				166	*fstr = NULL;
				167	if (Py_ISALPHA(quote)) {
				168	while (!bytesmode \|\| !rawmode) {
				169	if (quote == 'b' \|\| quote == 'B') {
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	170	quote =(unsigned char)*++s;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	171	*bytesmode = 1;
				172	}
				173	else if (quote == 'u' \|\| quote == 'U') {
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	174	quote = (unsigned char)*++s;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	175	}
				176	else if (quote == 'r' \|\| quote == 'R') {
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	177	quote = (unsigned char)*++s;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	178	*rawmode = 1;
				179	}
				180	else if (quote == 'f' \|\| quote == 'F') {
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	181	quote = (unsigned char)*++s;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	182	fmode = 1;
				183	}
				184	else {
				185	break;
				186	}
				187	}
				188	}
				189
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	190	/* fstrings are only allowed in Python 3.6 and greater */
				191	if (fmode && p->feature_version < 6) {
				192	p->error_indicator = 1;
				193	RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
				194	return -1;
				195	}
				196
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	197	if (fmode && *bytesmode) {
				198	PyErr_BadInternalCall();
				199	return -1;
				200	}
				201	if (quote != '\'' && quote != '\"') {
				202	PyErr_BadInternalCall();
				203	return -1;
				204	}
				205	/* Skip the leading quote char. */
				206	s++;
				207	len = strlen(s);
				208	if (len > INT_MAX) {
				209	PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
				210	return -1;
				211	}
				212	if (s[--len] != quote) {
				213	/* Last quote char must match the first. */
				214	PyErr_BadInternalCall();
				215	return -1;
				216	}
				217	if (len >= 4 && s[0] == quote && s[1] == quote) {
				218	/* A triple quoted string. We've already skipped one quote at
				219	the start and one at the end of the string. Now skip the
				220	two at the start. */
				221	s += 2;
				222	len -= 2;
				223	/* And check that the last two match. */
				224	if (s[--len] != quote \|\| s[--len] != quote) {
				225	PyErr_BadInternalCall();
				226	return -1;
				227	}
				228	}
				229
				230	if (fmode) {
				231	/* Just return the bytes. The caller will parse the resulting
				232	string. */
				233	*fstr = s;
				234	*fstrlen = len;
				235	return 0;
				236	}
				237
				238	/* Not an f-string. */
				239	/* Avoid invoking escape decoding routines if possible. */
				240	rawmode = rawmode \|\| strchr(s, '\\') == NULL;
				241	if (*bytesmode) {
				242	/* Disallow non-ASCII characters. */
				243	const char *ch;
				244	for (ch = s; *ch; ch++) {
				245	if (Py_CHARMASK(*ch) >= 0x80) {
				246	RAISE_SYNTAX_ERROR(
				247	"bytes can only contain ASCII "
				248	"literal characters.");
				249	return -1;
				250	}
				251	}
				252	if (*rawmode) {
				253	*result = PyBytes_FromStringAndSize(s, len);
				254	}
				255	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	256	*result = decode_bytes_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	257	}
				258	}
				259	else {
				260	if (*rawmode) {
				261	*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
				262	}
				263	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	264	*result = decode_unicode_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	265	}
				266	}
				267	return *result == NULL ? -1 : 0;
				268	}
				269
				270
				271
				272	// FSTRING STUFF
				273
				274	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
				275	static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
				276
				277
				278	static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
Pablo Galindo	972ab03	2020-06-08 01:47:37 +0100	[diff] [blame]	279	if (n == NULL) {
				280	return;
				281	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	282	if (parent->lineno < n->lineno) {
				283	col = 0;
				284	}
				285	fstring_shift_expr_locations(n, line, col);
				286	}
				287
				288	static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
				289	if (parent->lineno < n->lineno) {
				290	col = 0;
				291	}
				292	fstring_shift_argument(parent, n, line, col);
				293	}
				294
				295	static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	296	for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	297	expr_ty expr = asdl_seq_GET(seq, i);
				298	if (expr == NULL){
				299	continue;
				300	}
				301	shift_expr(parent, expr, lineno, col_offset);
				302	}
				303	}
				304
				305	static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
				306	switch (slice->kind) {
				307	case Slice_kind:
				308	if (slice->v.Slice.lower) {
				309	shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
				310	}
				311	if (slice->v.Slice.upper) {
				312	shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
				313	}
				314	if (slice->v.Slice.step) {
				315	shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
				316	}
				317	break;
				318	case Tuple_kind:
				319	fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
				320	break;
				321	default:
				322	break;
				323	}
				324	}
				325
				326	static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
				327	shift_expr(parent, comp->target, lineno, col_offset);
				328	shift_expr(parent, comp->iter, lineno, col_offset);
				329	fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
				330	}
				331
				332	static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
				333	if (arg->annotation != NULL){
				334	shift_expr(parent, arg->annotation, lineno, col_offset);
				335	}
				336	arg->col_offset = arg->col_offset + col_offset;
				337	arg->end_col_offset = arg->end_col_offset + col_offset;
				338	arg->lineno = arg->lineno + lineno;
				339	arg->end_lineno = arg->end_lineno + lineno;
				340	}
				341
				342	static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	343	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	344	arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
				345	shift_arg(parent, arg, lineno, col_offset);
				346	}
				347
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	348	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	349	arg_ty arg = asdl_seq_GET(args->args, i);
				350	shift_arg(parent, arg, lineno, col_offset);
				351	}
				352
				353	if (args->vararg != NULL) {
				354	shift_arg(parent, args->vararg, lineno, col_offset);
				355	}
				356
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	357	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	358	arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
				359	shift_arg(parent, arg, lineno, col_offset);
				360	}
				361
				362	fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
				363
				364	if (args->kwarg != NULL) {
				365	shift_arg(parent, args->kwarg, lineno, col_offset);
				366	}
				367
				368	fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
				369	}
				370
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	371	static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) {
				372	switch (node->kind) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	373	case BoolOp_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	374	fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	375	break;
				376	case NamedExpr_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	377	shift_expr(node, node->v.NamedExpr.target, lineno, col_offset);
				378	shift_expr(node, node->v.NamedExpr.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	379	break;
				380	case BinOp_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	381	shift_expr(node, node->v.BinOp.left, lineno, col_offset);
				382	shift_expr(node, node->v.BinOp.right, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	383	break;
				384	case UnaryOp_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	385	shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	386	break;
				387	case Lambda_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	388	fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset);
				389	shift_expr(node, node->v.Lambda.body, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	390	break;
				391	case IfExp_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	392	shift_expr(node, node->v.IfExp.test, lineno, col_offset);
				393	shift_expr(node, node->v.IfExp.body, lineno, col_offset);
				394	shift_expr(node, node->v.IfExp.orelse, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	395	break;
				396	case Dict_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	397	fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset);
				398	fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	399	break;
				400	case Set_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	401	fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	402	break;
				403	case ListComp_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	404	shift_expr(node, node->v.ListComp.elt, lineno, col_offset);
				405	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) {
				406	comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i);
				407	fstring_shift_comprehension(node, comp, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	408	}
				409	break;
				410	case SetComp_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	411	shift_expr(node, node->v.SetComp.elt, lineno, col_offset);
				412	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) {
				413	comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i);
				414	fstring_shift_comprehension(node, comp, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	415	}
				416	break;
				417	case DictComp_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	418	shift_expr(node, node->v.DictComp.key, lineno, col_offset);
				419	shift_expr(node, node->v.DictComp.value, lineno, col_offset);
				420	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) {
				421	comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i);
				422	fstring_shift_comprehension(node, comp, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	423	}
				424	break;
				425	case GeneratorExp_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	426	shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset);
				427	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) {
				428	comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i);
				429	fstring_shift_comprehension(node, comp, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	430	}
				431	break;
				432	case Await_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	433	shift_expr(node, node->v.Await.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	434	break;
				435	case Yield_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	436	shift_expr(node, node->v.Yield.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	437	break;
				438	case YieldFrom_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	439	shift_expr(node, node->v.YieldFrom.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	440	break;
				441	case Compare_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	442	shift_expr(node, node->v.Compare.left, lineno, col_offset);
				443	fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	444	break;
				445	case Call_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	446	shift_expr(node, node->v.Call.func, lineno, col_offset);
				447	fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset);
				448	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) {
				449	keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i);
				450	shift_expr(node, keyword->value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	451	}
				452	break;
				453	case Attribute_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	454	shift_expr(node, node->v.Attribute.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	455	break;
				456	case Subscript_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	457	shift_expr(node, node->v.Subscript.value, lineno, col_offset);
				458	fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset);
				459	shift_expr(node, node->v.Subscript.slice, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	460	break;
				461	case Starred_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	462	shift_expr(node, node->v.Starred.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	463	break;
				464	case List_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	465	fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	466	break;
				467	case Tuple_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	468	fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	469	break;
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	470	case JoinedStr_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	471	fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset);
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	472	break;
				473	case FormattedValue_kind:
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	474	shift_expr(node, node->v.FormattedValue.value, lineno, col_offset);
				475	if (node->v.FormattedValue.format_spec) {
				476	shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset);
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	477	}
				478	break;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	479	default:
				480	return;
				481	}
				482	}
				483
				484	/* Shift locations for the given node and all its children by adding `lineno`
				485	and `col_offset` to existing locations. Note that n is the already parsed
				486	expression. */
				487	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
				488	{
				489	n->col_offset = n->col_offset + col_offset;
				490
				491	// The following is needed, in order for nodes spanning across multiple lines
				492	// to be shifted correctly. An example of such a node is a Call node, the closing
				493	// parenthesis of which is not on the same line as its name.
				494	if (n->lineno == n->end_lineno) {
				495	n->end_col_offset = n->end_col_offset + col_offset;
				496	}
				497
				498	fstring_shift_children_locations(n, lineno, col_offset);
				499	n->lineno = n->lineno + lineno;
				500	n->end_lineno = n->end_lineno + lineno;
				501	}
				502
				503	/* Fix locations for the given node and its children.
				504
				505	`parent` is the enclosing node.
				506	`n` is the node which locations are going to be fixed relative to parent.
				507	`expr_str` is the child node's string representation, including braces.
				508	*/
				509	static void
				510	fstring_fix_expr_location(Token parent, expr_ty n, char expr_str)
				511	{
				512	char *substr = NULL;
				513	char *start;
				514	int lines = 0;
				515	int cols = 0;
				516
				517	if (parent && parent->bytes) {
				518	char *parent_str = PyBytes_AsString(parent->bytes);
				519	if (!parent_str) {
				520	return;
				521	}
				522	substr = strstr(parent_str, expr_str);
				523	if (substr) {
				524	// The following is needed, in order to correctly shift the column
				525	// offset, in the case that (disregarding any whitespace) a newline
				526	// immediately follows the opening curly brace of the fstring expression.
				527	int newline_after_brace = 1;
				528	start = substr + 1;
				529	while (start && start != '}' && start != '\n') {
				530	if (start != ' ' && start != '\t' && *start != '\f') {
				531	newline_after_brace = 0;
				532	break;
				533	}
				534	start++;
				535	}
				536
				537	// Account for the characters from the last newline character to our
				538	// left until the beginning of substr.
				539	if (!newline_after_brace) {
				540	start = substr;
				541	while (start > parent_str && *start != '\n') {
				542	start--;
				543	}
				544	cols += (int)(substr - start);
				545	}
				546	/* adjust the start based on the number of newlines encountered
				547	before the f-string expression */
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	548	for (char* p = parent_str; p < substr; p++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	549	if (*p == '\n') {
				550	lines++;
				551	}
				552	}
				553	}
				554	}
				555	fstring_shift_expr_locations(n, lines, cols);
				556	}
				557
				558
				559	/* Compile this expression in to an expr_ty. Add parens around the
				560	expression, in order to allow leading spaces in the expression. */
				561	static expr_ty
				562	fstring_compile_expr(Parser p, const char expr_start, const char *expr_end,
				563	Token *t)
				564	{
				565	expr_ty expr = NULL;
				566	char *str;
				567	Py_ssize_t len;
				568	const char *s;
				569	expr_ty result = NULL;
				570
				571	assert(expr_end >= expr_start);
				572	assert(*(expr_start-1) == '{');
				573	assert(expr_end == '}' \|\| expr_end == '!' \|\| *expr_end == ':' \|\|
				574	*expr_end == '=');
				575
				576	/* If the substring is all whitespace, it's an error. We need to catch this
				577	here, and not when we call PyParser_SimpleParseStringFlagsFilename,
				578	because turning the expression '' in to '()' would go from being invalid
				579	to valid. */
				580	for (s = expr_start; s != expr_end; s++) {
				581	char c = *s;
				582	/* The Python parser ignores only the following whitespace
				583	characters (\r already is converted to \n). */
				584	if (!(c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\f')) {
				585	break;
				586	}
				587	}
				588	if (s == expr_end) {
				589	RAISE_SYNTAX_ERROR("f-string: empty expression not allowed");
				590	return NULL;
				591	}
				592
				593	len = expr_end - expr_start;
				594	/* Allocate 3 extra bytes: open paren, close paren, null byte. */
				595	str = PyMem_RawMalloc(len + 3);
				596	if (str == NULL) {
				597	PyErr_NoMemory();
				598	return NULL;
				599	}
				600
				601	str[0] = '(';
				602	memcpy(str+1, expr_start, len);
				603	str[len+1] = ')';
				604	str[len+2] = 0;
				605
				606	struct tok_state* tok = PyTokenizer_FromString(str, 1);
				607	if (tok == NULL) {
Pablo Galindo	a54096e	2020-06-06 00:52:15 +0100	[diff] [blame]	608	PyMem_RawFree(str);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	609	return NULL;
				610	}
Lysandros Nikolaou	f7b1e46	2020-05-26 03:32:18 +0300	[diff] [blame]	611	Py_INCREF(p->tok->filename);
				612	tok->filename = p->tok->filename;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	613
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	614	Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
				615	NULL, p->arena);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	616	p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
				617	p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
				618	? p->starting_col_offset + t->col_offset : 0;
				619
				620	expr = _PyPegen_run_parser(p2);
				621
				622	if (expr == NULL) {
				623	goto exit;
				624	}
				625
				626	/* Reuse str to find the correct column offset. */
				627	str[0] = '{';
				628	str[len+1] = '}';
				629	fstring_fix_expr_location(t, expr, str);
				630
				631	result = expr;
				632
				633	exit:
Pablo Galindo	a54096e	2020-06-06 00:52:15 +0100	[diff] [blame]	634	PyMem_RawFree(str);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	635	_PyPegen_Parser_Free(p2);
				636	PyTokenizer_Free(tok);
				637	return result;
				638	}
				639
				640	/* Return -1 on error.
				641
				642	Return 0 if we reached the end of the literal.
				643
				644	Return 1 if we haven't reached the end of the literal, but we want
				645	the caller to process the literal up to this point. Used for
				646	doubled braces.
				647	*/
				648	static int
				649	fstring_find_literal(Parser p, const char str, const char end, int raw,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	650	PyObject *literal, int recurse_lvl, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	651	{
				652	/* Get any literal string. It ends when we hit an un-doubled left
				653	brace (which isn't part of a unicode name escape such as
				654	"\N{EULER CONSTANT}"), or the end of the string. */
				655
				656	const char s = str;
				657	const char *literal_start = s;
				658	int result = 0;
				659
				660	assert(*literal == NULL);
				661	while (s < end) {
				662	char ch = *s++;
				663	if (!raw && ch == '\\' && s < end) {
				664	ch = *s++;
				665	if (ch == 'N') {
				666	if (s < end && *s++ == '{') {
				667	while (s < end && *s++ != '}') {
				668	}
				669	continue;
				670	}
				671	break;
				672	}
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	673	if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	674	return -1;
				675	}
				676	}
				677	if (ch == '{' \|\| ch == '}') {
				678	/* Check for doubled braces, but only at the top level. If
				679	we checked at every level, then f'{0:{3}}' would fail
				680	with the two closing braces. */
				681	if (recurse_lvl == 0) {
				682	if (s < end && *s == ch) {
				683	/* We're going to tell the caller that the literal ends
				684	here, but that they should continue scanning. But also
				685	skip over the second brace when we resume scanning. */
				686	*str = s + 1;
				687	result = 1;
				688	goto done;
				689	}
				690
				691	/* Where a single '{' is the start of a new expression, a
				692	single '}' is not allowed. */
				693	if (ch == '}') {
				694	*str = s - 1;
				695	RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed");
				696	return -1;
				697	}
				698	}
				699	/* We're either at a '{', which means we're starting another
				700	expression; or a '}', which means we're at the end of this
				701	f-string (for a nested format_spec). */
				702	s--;
				703	break;
				704	}
				705	}
				706	*str = s;
				707	assert(s <= end);
				708	assert(s == end \|\| s == '{' \|\| s == '}');
				709	done:
				710	if (literal_start != s) {
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	711	if (raw) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	712	*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
				713	s - literal_start,
				714	NULL, NULL);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	715	} else {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	716	*literal = decode_unicode_with_escapes(p, literal_start,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	717	s - literal_start, t);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	718	}
				719	if (!*literal) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	720	return -1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	721	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	722	}
				723	return result;
				724	}
				725
				726	/* Forward declaration because parsing is recursive. */
				727	static expr_ty
				728	fstring_parse(Parser p, const char str, const char end, int raw, int recurse_lvl,
				729	Token first_token, Token t, Token *last_token);
				730
				731	/* Parse the f-string at str, ending at end. We know str starts an
				732	expression (so it must be a '{'). Returns the FormattedValue node, which
				733	includes the expression, conversion character, format_spec expression, and
				734	optionally the text of the expression (if = is used).
				735
				736	Note that I don't do a perfect job here: I don't make sure that a
				737	closing brace doesn't match an opening paren, for example. It
				738	doesn't need to error on all invalid expressions, just correctly
				739	find the end of all valid ones. Any errors inside the expression
				740	will be caught when we parse it later.
				741
				742	*expression is set to the expression. For an '=' "debug" expression,
				743	*expr_text is set to the debug text (the original text of the expression,
				744	including the '=' and any whitespace around it, as a string object). If
				745	not a debug expression, expr_text set to NULL. /
				746	static int
				747	fstring_find_expr(Parser p, const char str, const char end, int raw, int recurse_lvl,
				748	PyObject *expr_text, expr_ty expression, Token *first_token,
				749	Token t, Token last_token)
				750	{
				751	/* Return -1 on error, else 0. */
				752
				753	const char *expr_start;
				754	const char *expr_end;
				755	expr_ty simple_expression;
				756	expr_ty format_spec = NULL; /* Optional format specifier. */
				757	int conversion = -1; /* The conversion char. Use default if not
				758	specified, or !r if using = and no format
				759	spec. */
				760
				761	/* 0 if we're not in a string, else the quote char we're trying to
				762	match (single or double quote). */
				763	char quote_char = 0;
				764
				765	/* If we're inside a string, 1=normal, 3=triple-quoted. */
				766	int string_type = 0;
				767
				768	/* Keep track of nesting level for braces/parens/brackets in
				769	expressions. */
				770	Py_ssize_t nested_depth = 0;
				771	char parenstack[MAXLEVEL];
				772
				773	*expr_text = NULL;
				774
				775	/* Can only nest one level deep. */
				776	if (recurse_lvl >= 2) {
				777	RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply");
				778	goto error;
				779	}
				780
				781	/* The first char must be a left brace, or we wouldn't have gotten
				782	here. Skip over it. */
				783	assert(**str == '{');
				784	*str += 1;
				785
				786	expr_start = *str;
				787	for (; str < end; (str)++) {
				788	char ch;
				789
				790	/* Loop invariants. */
				791	assert(nested_depth >= 0);
				792	assert(str >= expr_start && str < end);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	793	if (quote_char) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	794	assert(string_type == 1 \|\| string_type == 3);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	795	} else {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	796	assert(string_type == 0);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	797	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	798
				799	ch = **str;
				800	/* Nowhere inside an expression is a backslash allowed. */
				801	if (ch == '\\') {
				802	/* Error: can't include a backslash character, inside
				803	parens or strings or not. */
				804	RAISE_SYNTAX_ERROR(
				805	"f-string expression part "
				806	"cannot include a backslash");
				807	goto error;
				808	}
				809	if (quote_char) {
				810	/* We're inside a string. See if we're at the end. */
				811	/* This code needs to implement the same non-error logic
				812	as tok_get from tokenizer.c, at the letter_quote
				813	label. To actually share that code would be a
				814	nightmare. But, it's unlikely to change and is small,
				815	so duplicate it here. Note we don't need to catch all
				816	of the errors, since they'll be caught when parsing the
				817	expression. We just need to match the non-error
				818	cases. Thus we can ignore \n in single-quoted strings,
				819	for example. Or non-terminated strings. */
				820	if (ch == quote_char) {
				821	/* Does this match the string_type (single or triple
				822	quoted)? */
				823	if (string_type == 3) {
				824	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				825	/* We're at the end of a triple quoted string. */
				826	*str += 2;
				827	string_type = 0;
				828	quote_char = 0;
				829	continue;
				830	}
				831	} else {
				832	/* We're at the end of a normal string. */
				833	quote_char = 0;
				834	string_type = 0;
				835	continue;
				836	}
				837	}
				838	} else if (ch == '\'' \|\| ch == '"') {
				839	/* Is this a triple quoted string? */
				840	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				841	string_type = 3;
				842	*str += 2;
				843	} else {
				844	/* Start of a normal string. */
				845	string_type = 1;
				846	}
				847	/* Start looking for the end of the string. */
				848	quote_char = ch;
				849	} else if (ch == '[' \|\| ch == '{' \|\| ch == '(') {
				850	if (nested_depth >= MAXLEVEL) {
				851	RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis");
				852	goto error;
				853	}
				854	parenstack[nested_depth] = ch;
				855	nested_depth++;
				856	} else if (ch == '#') {
				857	/* Error: can't include a comment character, inside parens
				858	or not. */
				859	RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'");
				860	goto error;
				861	} else if (nested_depth == 0 &&
				862	(ch == '!' \|\| ch == ':' \|\| ch == '}' \|\|
				863	ch == '=' \|\| ch == '>' \|\| ch == '<')) {
				864	/* See if there's a next character. */
				865	if (*str+1 < end) {
				866	char next = (str+1);
				867
				868	/* For "!=". since '=' is not an allowed conversion character,
				869	nothing is lost in this test. */
				870	if ((ch == '!' && next == '=') \|\| /* != */
				871	(ch == '=' && next == '=') \|\| /* == */
				872	(ch == '<' && next == '=') \|\| /* <= */
				873	(ch == '>' && next == '=') /* >= */
				874	) {
				875	*str += 1;
				876	continue;
				877	}
				878	/* Don't get out of the loop for these, if they're single
				879	chars (not part of 2-char tokens). If by themselves, they
				880	don't end an expression (unlike say '!'). */
				881	if (ch == '>' \|\| ch == '<') {
				882	continue;
				883	}
				884	}
				885
				886	/* Normal way out of this loop. */
				887	break;
				888	} else if (ch == ']' \|\| ch == '}' \|\| ch == ')') {
				889	if (!nested_depth) {
				890	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch);
				891	goto error;
				892	}
				893	nested_depth--;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	894	int opening = (unsigned char)parenstack[nested_depth];
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	895	if (!((opening == '(' && ch == ')') \|\|
				896	(opening == '[' && ch == ']') \|\|
				897	(opening == '{' && ch == '}')))
				898	{
				899	RAISE_SYNTAX_ERROR(
				900	"f-string: closing parenthesis '%c' "
				901	"does not match opening parenthesis '%c'",
				902	ch, opening);
				903	goto error;
				904	}
				905	} else {
				906	/* Just consume this char and loop around. */
				907	}
				908	}
				909	expr_end = *str;
				910	/* If we leave this loop in a string or with mismatched parens, we
				911	don't care. We'll get a syntax error when compiling the
				912	expression. But, we can produce a better error message, so
				913	let's just do that.*/
				914	if (quote_char) {
				915	RAISE_SYNTAX_ERROR("f-string: unterminated string");
				916	goto error;
				917	}
				918	if (nested_depth) {
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	919	int opening = (unsigned char)parenstack[nested_depth - 1];
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	920	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening);
				921	goto error;
				922	}
				923
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	924	if (*str >= end) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	925	goto unexpected_end_of_string;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	926	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	927
				928	/* Compile the expression as soon as possible, so we show errors
				929	related to the expression before errors related to the
				930	conversion or format_spec. */
				931	simple_expression = fstring_compile_expr(p, expr_start, expr_end, t);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	932	if (!simple_expression) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	933	goto error;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	934	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	935
				936	/* Check for =, which puts the text value of the expression in
				937	expr_text. */
				938	if (**str == '=') {
Shantanu	c116c94	2020-05-27 13:30:38 -0700	[diff] [blame]	939	if (p->feature_version < 8) {
				940	RAISE_SYNTAX_ERROR("f-string: self documenting expressions are "
				941	"only supported in Python 3.8 and greater");
				942	goto error;
				943	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	944	*str += 1;
				945
				946	/* Skip over ASCII whitespace. No need to test for end of string
				947	here, since we know there's at least a trailing quote somewhere
				948	ahead. */
				949	while (Py_ISSPACE(**str)) {
				950	*str += 1;
				951	}
				952
				953	/* Set expr_text to the text of the expression. /
				954	expr_text = PyUnicode_FromStringAndSize(expr_start, str-expr_start);
				955	if (!*expr_text) {
				956	goto error;
				957	}
				958	}
				959
				960	/* Check for a conversion char, if present. */
				961	if (**str == '!') {
				962	*str += 1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	963	if (*str >= end) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	964	goto unexpected_end_of_string;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	965	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	966
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	967	conversion = (unsigned char)**str;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	968	*str += 1;
				969
				970	/* Validate the conversion. */
				971	if (!(conversion == 's' \|\| conversion == 'r' \|\| conversion == 'a')) {
				972	RAISE_SYNTAX_ERROR(
				973	"f-string: invalid conversion character: "
				974	"expected 's', 'r', or 'a'");
				975	goto error;
				976	}
				977
				978	}
				979
				980	/* Check for the format spec, if present. */
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	981	if (*str >= end) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	982	goto unexpected_end_of_string;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	983	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	984	if (**str == ':') {
				985	*str += 1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	986	if (*str >= end) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	987	goto unexpected_end_of_string;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	988	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	989
				990	/* Parse the format spec. */
				991	format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1,
				992	first_token, t, last_token);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	993	if (!format_spec) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	994	goto error;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	995	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	996	}
				997
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	998	if (str >= end \|\| *str != '}') {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	999	goto unexpected_end_of_string;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1000	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1001
				1002	/* We're at a right brace. Consume it. */
				1003	assert(*str < end);
				1004	assert(**str == '}');
				1005	*str += 1;
				1006
				1007	/* If we're in = mode (detected by non-NULL expr_text), and have no format
				1008	spec and no explicit conversion, set the conversion to 'r'. */
				1009	if (*expr_text && format_spec == NULL && conversion == -1) {
				1010	conversion = 'r';
				1011	}
				1012
				1013	/* And now create the FormattedValue node that represents this
				1014	entire expression with the conversion and format spec. */
				1015	//TODO: Fix this
				1016	*expression = FormattedValue(simple_expression, conversion,
				1017	format_spec, first_token->lineno,
				1018	first_token->col_offset, last_token->end_lineno,
				1019	last_token->end_col_offset, p->arena);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1020	if (!*expression) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1021	goto error;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1022	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1023
				1024	return 0;
				1025
				1026	unexpected_end_of_string:
				1027	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1028	/* Falls through to error. */
				1029
				1030	error:
				1031	Py_XDECREF(*expr_text);
				1032	return -1;
				1033
				1034	}
				1035
				1036	/* Return -1 on error.
				1037
				1038	Return 0 if we have a literal (possible zero length) and an
				1039	expression (zero length if at the end of the string.
				1040
				1041	Return 1 if we have a literal, but no expression, and we want the
				1042	caller to call us again. This is used to deal with doubled
				1043	braces.
				1044
				1045	When called multiple times on the string 'a{{b{0}c', this function
				1046	will return:
				1047
				1048	1. the literal 'a{' with no expression, and a return value
				1049	of 1. Despite the fact that there's no expression, the return
				1050	value of 1 means we're not finished yet.
				1051
				1052	2. the literal 'b' and the expression '0', with a return value of
				1053	0. The fact that there's an expression means we're not finished.
				1054
				1055	3. literal 'c' with no expression and a return value of 0. The
				1056	combination of the return value of 0 with no expression means
				1057	we're finished.
				1058	*/
				1059	static int
				1060	fstring_find_literal_and_expr(Parser p, const char str, const char end, int raw,
				1061	int recurse_lvl, PyObject **literal,
				1062	PyObject *expr_text, expr_ty expression,
				1063	Token first_token, Token t, Token *last_token)
				1064	{
				1065	int result;
				1066
				1067	assert(literal == NULL && expression == NULL);
				1068
				1069	/* Get any literal string. */
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	1070	result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1071	if (result < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1072	goto error;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1073	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1074
				1075	assert(result == 0 \|\| result == 1);
				1076
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1077	if (result == 1) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1078	/* We have a literal, but don't look at the expression. */
				1079	return 1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1080	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1081
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1082	if (str >= end \|\| *str == '}') {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1083	/* We're at the end of the string or the end of a nested
				1084	f-string: no expression. The top-level error case where we
				1085	expect to be at the end of the string but we're at a '}' is
				1086	handled later. */
				1087	return 0;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1088	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1089
				1090	/* We must now be the start of an expression, on a '{'. */
				1091	assert(**str == '{');
				1092
				1093	if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text,
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1094	expression, first_token, t, last_token) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1095	goto error;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1096	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1097
				1098	return 0;
				1099
				1100	error:
				1101	Py_CLEAR(*literal);
				1102	return -1;
				1103	}
				1104
				1105	#ifdef NDEBUG
				1106	#define ExprList_check_invariants(l)
				1107	#else
				1108	static void
				1109	ExprList_check_invariants(ExprList *l)
				1110	{
				1111	/* Check our invariants. Make sure this object is "live", and
				1112	hasn't been deallocated. */
				1113	assert(l->size >= 0);
				1114	assert(l->p != NULL);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1115	if (l->size <= EXPRLIST_N_CACHED) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1116	assert(l->data == l->p);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1117	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1118	}
				1119	#endif
				1120
				1121	static void
				1122	ExprList_Init(ExprList *l)
				1123	{
				1124	l->allocated = EXPRLIST_N_CACHED;
				1125	l->size = 0;
				1126
				1127	/* Until we start allocating dynamically, p points to data. */
				1128	l->p = l->data;
				1129
				1130	ExprList_check_invariants(l);
				1131	}
				1132
				1133	static int
				1134	ExprList_Append(ExprList *l, expr_ty exp)
				1135	{
				1136	ExprList_check_invariants(l);
				1137	if (l->size >= l->allocated) {
				1138	/* We need to alloc (or realloc) the memory. */
				1139	Py_ssize_t new_size = l->allocated * 2;
				1140
				1141	/* See if we've ever allocated anything dynamically. */
				1142	if (l->p == l->data) {
				1143	Py_ssize_t i;
				1144	/* We're still using the cached data. Switch to
				1145	alloc-ing. */
				1146	l->p = PyMem_RawMalloc(sizeof(expr_ty) * new_size);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1147	if (!l->p) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1148	return -1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1149	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1150	/* Copy the cached data into the new buffer. */
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1151	for (i = 0; i < l->size; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1152	l->p[i] = l->data[i];
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1153	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1154	} else {
				1155	/* Just realloc. */
				1156	expr_ty tmp = PyMem_RawRealloc(l->p, sizeof(expr_ty) new_size);
				1157	if (!tmp) {
				1158	PyMem_RawFree(l->p);
				1159	l->p = NULL;
				1160	return -1;
				1161	}
				1162	l->p = tmp;
				1163	}
				1164
				1165	l->allocated = new_size;
				1166	assert(l->allocated == 2 * l->size);
				1167	}
				1168
				1169	l->p[l->size++] = exp;
				1170
				1171	ExprList_check_invariants(l);
				1172	return 0;
				1173	}
				1174
				1175	static void
				1176	ExprList_Dealloc(ExprList *l)
				1177	{
				1178	ExprList_check_invariants(l);
				1179
				1180	/* If there's been an error, or we've never dynamically allocated,
				1181	do nothing. */
				1182	if (!l->p \|\| l->p == l->data) {
				1183	/* Do nothing. */
				1184	} else {
				1185	/* We have dynamically allocated. Free the memory. */
				1186	PyMem_RawFree(l->p);
				1187	}
				1188	l->p = NULL;
				1189	l->size = -1;
				1190	}
				1191
				1192	static asdl_seq *
				1193	ExprList_Finish(ExprList l, PyArena arena)
				1194	{
				1195	asdl_seq *seq;
				1196
				1197	ExprList_check_invariants(l);
				1198
				1199	/* Allocate the asdl_seq and copy the expressions in to it. */
				1200	seq = _Py_asdl_seq_new(l->size, arena);
				1201	if (seq) {
				1202	Py_ssize_t i;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1203	for (i = 0; i < l->size; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1204	asdl_seq_SET(seq, i, l->p[i]);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1205	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1206	}
				1207	ExprList_Dealloc(l);
				1208	return seq;
				1209	}
				1210
				1211	#ifdef NDEBUG
				1212	#define FstringParser_check_invariants(state)
				1213	#else
				1214	static void
				1215	FstringParser_check_invariants(FstringParser *state)
				1216	{
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1217	if (state->last_str) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1218	assert(PyUnicode_CheckExact(state->last_str));
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1219	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1220	ExprList_check_invariants(&state->expr_list);
				1221	}
				1222	#endif
				1223
				1224	void
				1225	_PyPegen_FstringParser_Init(FstringParser *state)
				1226	{
				1227	state->last_str = NULL;
				1228	state->fmode = 0;
				1229	ExprList_Init(&state->expr_list);
				1230	FstringParser_check_invariants(state);
				1231	}
				1232
				1233	void
				1234	_PyPegen_FstringParser_Dealloc(FstringParser *state)
				1235	{
				1236	FstringParser_check_invariants(state);
				1237
				1238	Py_XDECREF(state->last_str);
				1239	ExprList_Dealloc(&state->expr_list);
				1240	}
				1241
				1242	/* Make a Constant node, but decref the PyUnicode object being added. */
				1243	static expr_ty
				1244	make_str_node_and_del(Parser p, PyObject str, Token first_token, Token *last_token)
				1245	{
				1246	PyObject s = str;
				1247	PyObject *kind = NULL;
				1248	*str = NULL;
				1249	assert(PyUnicode_CheckExact(s));
				1250	if (PyArena_AddPyObject(p->arena, s) < 0) {
				1251	Py_DECREF(s);
				1252	return NULL;
				1253	}
				1254	const char* the_str = PyBytes_AsString(first_token->bytes);
				1255	if (the_str && the_str[0] == 'u') {
				1256	kind = _PyPegen_new_identifier(p, "u");
				1257	}
				1258
				1259	if (kind == NULL && PyErr_Occurred()) {
				1260	return NULL;
				1261	}
				1262
				1263	return Constant(s, kind, first_token->lineno, first_token->col_offset,
				1264	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1265
				1266	}
				1267
				1268
				1269	/* Add a non-f-string (that is, a regular literal string). str is
				1270	decref'd. */
				1271	int
				1272	_PyPegen_FstringParser_ConcatAndDel(FstringParser state, PyObject str)
				1273	{
				1274	FstringParser_check_invariants(state);
				1275
				1276	assert(PyUnicode_CheckExact(str));
				1277
				1278	if (PyUnicode_GET_LENGTH(str) == 0) {
				1279	Py_DECREF(str);
				1280	return 0;
				1281	}
				1282
				1283	if (!state->last_str) {
				1284	/* We didn't have a string before, so just remember this one. */
				1285	state->last_str = str;
				1286	} else {
				1287	/* Concatenate this with the previous string. */
				1288	PyUnicode_AppendAndDel(&state->last_str, str);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1289	if (!state->last_str) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1290	return -1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1291	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1292	}
				1293	FstringParser_check_invariants(state);
				1294	return 0;
				1295	}
				1296
				1297	/* Parse an f-string. The f-string is in *str to end, with no
				1298	'f' or quotes. */
				1299	int
				1300	_PyPegen_FstringParser_ConcatFstring(Parser p, FstringParser state, const char **str,
				1301	const char *end, int raw, int recurse_lvl,
				1302	Token first_token, Token t, Token *last_token)
				1303	{
				1304	FstringParser_check_invariants(state);
				1305	state->fmode = 1;
				1306
				1307	/* Parse the f-string. */
				1308	while (1) {
				1309	PyObject *literal = NULL;
				1310	PyObject *expr_text = NULL;
				1311	expr_ty expression = NULL;
				1312
				1313	/* If there's a zero length literal in front of the
				1314	expression, literal will be NULL. If we're at the end of
				1315	the f-string, expression will be NULL (unless result == 1,
				1316	see below). */
				1317	int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl,
				1318	&literal, &expr_text,
				1319	&expression, first_token, t, last_token);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1320	if (result < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1321	return -1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1322	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1323
				1324	/* Add the literal, if any. */
				1325	if (literal && _PyPegen_FstringParser_ConcatAndDel(state, literal) < 0) {
				1326	Py_XDECREF(expr_text);
				1327	return -1;
				1328	}
				1329	/* Add the expr_text, if any. */
				1330	if (expr_text && _PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 0) {
				1331	return -1;
				1332	}
				1333
				1334	/* We've dealt with the literal and expr_text, their ownership has
				1335	been transferred to the state object. Don't look at them again. */
				1336
				1337	/* See if we should just loop around to get the next literal
				1338	and expression, while ignoring the expression this
				1339	time. This is used for un-doubling braces, as an
				1340	optimization. */
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1341	if (result == 1) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1342	continue;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1343	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1344
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1345	if (!expression) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1346	/* We're done with this f-string. */
				1347	break;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1348	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1349
				1350	/* We know we have an expression. Convert any existing string
				1351	to a Constant node. */
				1352	if (!state->last_str) {
				1353	/* Do nothing. No previous literal. */
				1354	} else {
				1355	/* Convert the existing last_str literal to a Constant node. */
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1356	expr_ty last_str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1357	if (!last_str \|\| ExprList_Append(&state->expr_list, last_str) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1358	return -1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1359	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1360	}
				1361
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1362	if (ExprList_Append(&state->expr_list, expression) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1363	return -1;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1364	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1365	}
				1366
				1367	/* If recurse_lvl is zero, then we must be at the end of the
				1368	string. Otherwise, we must be at a right brace. */
				1369
				1370	if (recurse_lvl == 0 && *str < end-1) {
				1371	RAISE_SYNTAX_ERROR("f-string: unexpected end of string");
				1372	return -1;
				1373	}
				1374	if (recurse_lvl != 0 && **str != '}') {
				1375	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1376	return -1;
				1377	}
				1378
				1379	FstringParser_check_invariants(state);
				1380	return 0;
				1381	}
				1382
				1383	/* Convert the partial state reflected in last_str and expr_list to an
				1384	expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
				1385	expr_ty
				1386	_PyPegen_FstringParser_Finish(Parser p, FstringParser state, Token* first_token,
				1387	Token *last_token)
				1388	{
				1389	asdl_seq *seq;
				1390
				1391	FstringParser_check_invariants(state);
				1392
				1393	/* If we're just a constant string with no expressions, return
				1394	that. */
				1395	if (!state->fmode) {
				1396	assert(!state->expr_list.size);
				1397	if (!state->last_str) {
				1398	/* Create a zero length string. */
				1399	state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1400	if (!state->last_str) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1401	goto error;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1402	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1403	}
				1404	return make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1405	}
				1406
				1407	/* Create a Constant node out of last_str, if needed. It will be the
				1408	last node in our expression list. */
				1409	if (state->last_str) {
				1410	expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1411	if (!str \|\| ExprList_Append(&state->expr_list, str) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1412	goto error;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1413	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1414	}
				1415	/* This has already been freed. */
				1416	assert(state->last_str == NULL);
				1417
				1418	seq = ExprList_Finish(&state->expr_list, p->arena);
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1419	if (!seq) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1420	goto error;
Pablo Galindo	fb61c42	2020-06-15 14:23:43 +0100	[diff] [blame]	1421	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1422
				1423	return _Py_JoinedStr(seq, first_token->lineno, first_token->col_offset,
				1424	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1425
				1426	error:
				1427	_PyPegen_FstringParser_Dealloc(state);
				1428	return NULL;
				1429	}
				1430
				1431	/* Given an f-string (with no 'f' or quotes) that's in *str and ends
				1432	at end, parse it into an expr_ty. Return NULL on error. Adjust
				1433	str to point past the parsed portion. */
				1434	static expr_ty
				1435	fstring_parse(Parser p, const char str, const char end, int raw,
				1436	int recurse_lvl, Token first_token, Token t, Token *last_token)
				1437	{
				1438	FstringParser state;
				1439
				1440	_PyPegen_FstringParser_Init(&state);
				1441	if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl,
				1442	first_token, t, last_token) < 0) {
				1443	_PyPegen_FstringParser_Dealloc(&state);
				1444	return NULL;
				1445	}
				1446
				1447	return _PyPegen_FstringParser_Finish(p, &state, t, t);
				1448	}