Blame - Parser/pegen/parse_string.c - platform/external/python/cpython3

blob: 61e60446e26fa9ecb167e66bcd1c2a0dda4ac633 [file] [log] [blame]

Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1	#include <Python.h>
				2
				3	#include "../tokenizer.h"
				4	#include "pegen.h"
				5	#include "parse_string.h"
				6
				7	//// STRING HANDLING FUNCTIONS ////
				8
				9	// These functions are ported directly from Python/ast.c with some modifications
				10	// to account for the use of "Parser *p", the fact that don't have parser nodes
				11	// to pass around and the usage of some specialized APIs present only in this
				12	// file (like "_PyPegen_raise_syntax_error").
				13
				14	static int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	15	warn_invalid_escape_sequence(Parser p, unsigned char first_invalid_escape_char, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	16	{
				17	PyObject *msg =
				18	PyUnicode_FromFormat("invalid escape sequence \\%c", first_invalid_escape_char);
				19	if (msg == NULL) {
				20	return -1;
				21	}
				22	if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	23	t->lineno, NULL, NULL) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	24	if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
				25	/* Replace the DeprecationWarning exception with a SyntaxError
				26	to get a more accurate error report */
				27	PyErr_Clear();
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	28
				29	/* This is needed, in order for the SyntaxError to point to the token t,
				30	since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
				31	error location, if p->known_err_token is not set. */
				32	p->known_err_token = t;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	33	RAISE_SYNTAX_ERROR("invalid escape sequence \\%c", first_invalid_escape_char);
				34	}
				35	Py_DECREF(msg);
				36	return -1;
				37	}
				38	Py_DECREF(msg);
				39	return 0;
				40	}
				41
				42	static PyObject *
				43	decode_utf8(const char *sPtr, const char end)
				44	{
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	45	const char *s;
				46	const char *t;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	47	t = s = *sPtr;
				48	while (s < end && (*s & 0x80)) {
				49	s++;
				50	}
				51	*sPtr = s;
				52	return PyUnicode_DecodeUTF8(t, s - t, NULL);
				53	}
				54
				55	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	56	decode_unicode_with_escapes(Parser parser, const char s, size_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	57	{
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	58	PyObject *v;
				59	PyObject *u;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	60	char *buf;
				61	char *p;
				62	const char *end;
				63
				64	/* check for integer overflow */
				65	if (len > SIZE_MAX / 6) {
				66	return NULL;
				67	}
				68	/* "ä" (2 bytes) may become "\U000000E4" (10 bytes), or 1:5
				69	"\ä" (3 bytes) may become "\u005c\U000000E4" (16 bytes), or ~1:6 */
				70	u = PyBytes_FromStringAndSize((char )NULL, len 6);
				71	if (u == NULL) {
				72	return NULL;
				73	}
				74	p = buf = PyBytes_AsString(u);
				75	end = s + len;
				76	while (s < end) {
				77	if (*s == '\\') {
				78	p++ = s++;
				79	if (s >= end \|\| *s & 0x80) {
				80	strcpy(p, "u005c");
				81	p += 5;
				82	if (s >= end) {
				83	break;
				84	}
				85	}
				86	}
				87	if (*s & 0x80) {
				88	PyObject *w;
				89	int kind;
				90	void *data;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	91	Py_ssize_t w_len;
				92	Py_ssize_t i;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	93	w = decode_utf8(&s, end);
				94	if (w == NULL) {
				95	Py_DECREF(u);
				96	return NULL;
				97	}
				98	kind = PyUnicode_KIND(w);
				99	data = PyUnicode_DATA(w);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	100	w_len = PyUnicode_GET_LENGTH(w);
				101	for (i = 0; i < w_len; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	102	Py_UCS4 chr = PyUnicode_READ(kind, data, i);
				103	sprintf(p, "\\U%08x", chr);
				104	p += 10;
				105	}
				106	/* Should be impossible to overflow */
				107	assert(p - buf <= PyBytes_GET_SIZE(u));
				108	Py_DECREF(w);
				109	}
				110	else {
				111	p++ = s++;
				112	}
				113	}
				114	len = p - buf;
				115	s = buf;
				116
				117	const char *first_invalid_escape;
				118	v = _PyUnicode_DecodeUnicodeEscape(s, len, NULL, &first_invalid_escape);
				119
				120	if (v != NULL && first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	121	if (warn_invalid_escape_sequence(parser, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	122	/* We have not decref u before because first_invalid_escape points
				123	inside u. */
				124	Py_XDECREF(u);
				125	Py_DECREF(v);
				126	return NULL;
				127	}
				128	}
				129	Py_XDECREF(u);
				130	return v;
				131	}
				132
				133	static PyObject *
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	134	decode_bytes_with_escapes(Parser p, const char s, Py_ssize_t len, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	135	{
				136	const char *first_invalid_escape;
				137	PyObject *result = _PyBytes_DecodeEscape(s, len, NULL, &first_invalid_escape);
				138	if (result == NULL) {
				139	return NULL;
				140	}
				141
				142	if (first_invalid_escape != NULL) {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	143	if (warn_invalid_escape_sequence(p, *first_invalid_escape, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	144	Py_DECREF(result);
				145	return NULL;
				146	}
				147	}
				148	return result;
				149	}
				150
				151	/* s must include the bracketing quote characters, and r, b, u,
				152	&/or f prefixes (if any), and embedded escape sequences (if any).
				153	_PyPegen_parsestr parses it, and sets *result to decoded Python string object.
				154	If the string is an f-string, set fstr and fstrlen to the unparsed
				155	string object. Return 0 if no errors occurred. */
				156	int
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	157	_PyPegen_parsestr(Parser p, int bytesmode, int rawmode, PyObject *result,
				158	const char *fstr, Py_ssize_t fstrlen, Token *t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	159	{
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	160	const char *s = PyBytes_AsString(t->bytes);
				161	if (s == NULL) {
				162	return -1;
				163	}
				164
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	165	size_t len;
				166	int quote = Py_CHARMASK(*s);
				167	int fmode = 0;
				168	*bytesmode = 0;
				169	*rawmode = 0;
				170	*result = NULL;
				171	*fstr = NULL;
				172	if (Py_ISALPHA(quote)) {
				173	while (!bytesmode \|\| !rawmode) {
				174	if (quote == 'b' \|\| quote == 'B') {
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	175	quote =(unsigned char)*++s;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	176	*bytesmode = 1;
				177	}
				178	else if (quote == 'u' \|\| quote == 'U') {
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	179	quote = (unsigned char)*++s;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	180	}
				181	else if (quote == 'r' \|\| quote == 'R') {
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	182	quote = (unsigned char)*++s;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	183	*rawmode = 1;
				184	}
				185	else if (quote == 'f' \|\| quote == 'F') {
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	186	quote = (unsigned char)*++s;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	187	fmode = 1;
				188	}
				189	else {
				190	break;
				191	}
				192	}
				193	}
				194
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	195	/* fstrings are only allowed in Python 3.6 and greater */
				196	if (fmode && p->feature_version < 6) {
				197	p->error_indicator = 1;
				198	RAISE_SYNTAX_ERROR("Format strings are only supported in Python 3.6 and greater");
				199	return -1;
				200	}
				201
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	202	if (fmode && *bytesmode) {
				203	PyErr_BadInternalCall();
				204	return -1;
				205	}
				206	if (quote != '\'' && quote != '\"') {
				207	PyErr_BadInternalCall();
				208	return -1;
				209	}
				210	/* Skip the leading quote char. */
				211	s++;
				212	len = strlen(s);
				213	if (len > INT_MAX) {
				214	PyErr_SetString(PyExc_OverflowError, "string to parse is too long");
				215	return -1;
				216	}
				217	if (s[--len] != quote) {
				218	/* Last quote char must match the first. */
				219	PyErr_BadInternalCall();
				220	return -1;
				221	}
				222	if (len >= 4 && s[0] == quote && s[1] == quote) {
				223	/* A triple quoted string. We've already skipped one quote at
				224	the start and one at the end of the string. Now skip the
				225	two at the start. */
				226	s += 2;
				227	len -= 2;
				228	/* And check that the last two match. */
				229	if (s[--len] != quote \|\| s[--len] != quote) {
				230	PyErr_BadInternalCall();
				231	return -1;
				232	}
				233	}
				234
				235	if (fmode) {
				236	/* Just return the bytes. The caller will parse the resulting
				237	string. */
				238	*fstr = s;
				239	*fstrlen = len;
				240	return 0;
				241	}
				242
				243	/* Not an f-string. */
				244	/* Avoid invoking escape decoding routines if possible. */
				245	rawmode = rawmode \|\| strchr(s, '\\') == NULL;
				246	if (*bytesmode) {
				247	/* Disallow non-ASCII characters. */
				248	const char *ch;
				249	for (ch = s; *ch; ch++) {
				250	if (Py_CHARMASK(*ch) >= 0x80) {
				251	RAISE_SYNTAX_ERROR(
				252	"bytes can only contain ASCII "
				253	"literal characters.");
				254	return -1;
				255	}
				256	}
				257	if (*rawmode) {
				258	*result = PyBytes_FromStringAndSize(s, len);
				259	}
				260	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	261	*result = decode_bytes_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	262	}
				263	}
				264	else {
				265	if (*rawmode) {
				266	*result = PyUnicode_DecodeUTF8Stateful(s, len, NULL, NULL);
				267	}
				268	else {
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	269	*result = decode_unicode_with_escapes(p, s, len, t);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	270	}
				271	}
				272	return *result == NULL ? -1 : 0;
				273	}
				274
				275
				276
				277	// FSTRING STUFF
				278
				279	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset);
				280	static void fstring_shift_argument(expr_ty parent, arg_ty args, int lineno, int col_offset);
				281
				282
				283	static inline void shift_expr(expr_ty parent, expr_ty n, int line, int col) {
Miss Islington (bot)	6440911	2020-06-07 18:08:53 -0700	[diff] [blame]	284	if (n == NULL) {
				285	return;
				286	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	287	if (parent->lineno < n->lineno) {
				288	col = 0;
				289	}
				290	fstring_shift_expr_locations(n, line, col);
				291	}
				292
				293	static inline void shift_arg(expr_ty parent, arg_ty n, int line, int col) {
				294	if (parent->lineno < n->lineno) {
				295	col = 0;
				296	}
				297	fstring_shift_argument(parent, n, line, col);
				298	}
				299
				300	static void fstring_shift_seq_locations(expr_ty parent, asdl_seq *seq, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	301	for (Py_ssize_t i = 0, l = asdl_seq_LEN(seq); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	302	expr_ty expr = asdl_seq_GET(seq, i);
				303	if (expr == NULL){
				304	continue;
				305	}
				306	shift_expr(parent, expr, lineno, col_offset);
				307	}
				308	}
				309
				310	static void fstring_shift_slice_locations(expr_ty parent, expr_ty slice, int lineno, int col_offset) {
				311	switch (slice->kind) {
				312	case Slice_kind:
				313	if (slice->v.Slice.lower) {
				314	shift_expr(parent, slice->v.Slice.lower, lineno, col_offset);
				315	}
				316	if (slice->v.Slice.upper) {
				317	shift_expr(parent, slice->v.Slice.upper, lineno, col_offset);
				318	}
				319	if (slice->v.Slice.step) {
				320	shift_expr(parent, slice->v.Slice.step, lineno, col_offset);
				321	}
				322	break;
				323	case Tuple_kind:
				324	fstring_shift_seq_locations(parent, slice->v.Tuple.elts, lineno, col_offset);
				325	break;
				326	default:
				327	break;
				328	}
				329	}
				330
				331	static void fstring_shift_comprehension(expr_ty parent, comprehension_ty comp, int lineno, int col_offset) {
				332	shift_expr(parent, comp->target, lineno, col_offset);
				333	shift_expr(parent, comp->iter, lineno, col_offset);
				334	fstring_shift_seq_locations(parent, comp->ifs, lineno, col_offset);
				335	}
				336
				337	static void fstring_shift_argument(expr_ty parent, arg_ty arg, int lineno, int col_offset) {
				338	if (arg->annotation != NULL){
				339	shift_expr(parent, arg->annotation, lineno, col_offset);
				340	}
				341	arg->col_offset = arg->col_offset + col_offset;
				342	arg->end_col_offset = arg->end_col_offset + col_offset;
				343	arg->lineno = arg->lineno + lineno;
				344	arg->end_lineno = arg->end_lineno + lineno;
				345	}
				346
				347	static void fstring_shift_arguments(expr_ty parent, arguments_ty args, int lineno, int col_offset) {
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	348	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->posonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	349	arg_ty arg = asdl_seq_GET(args->posonlyargs, i);
				350	shift_arg(parent, arg, lineno, col_offset);
				351	}
				352
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	353	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->args); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	354	arg_ty arg = asdl_seq_GET(args->args, i);
				355	shift_arg(parent, arg, lineno, col_offset);
				356	}
				357
				358	if (args->vararg != NULL) {
				359	shift_arg(parent, args->vararg, lineno, col_offset);
				360	}
				361
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	362	for (Py_ssize_t i = 0, l = asdl_seq_LEN(args->kwonlyargs); i < l; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	363	arg_ty arg = asdl_seq_GET(args->kwonlyargs, i);
				364	shift_arg(parent, arg, lineno, col_offset);
				365	}
				366
				367	fstring_shift_seq_locations(parent, args->kw_defaults, lineno, col_offset);
				368
				369	if (args->kwarg != NULL) {
				370	shift_arg(parent, args->kwarg, lineno, col_offset);
				371	}
				372
				373	fstring_shift_seq_locations(parent, args->defaults, lineno, col_offset);
				374	}
				375
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	376	static void fstring_shift_children_locations(expr_ty node, int lineno, int col_offset) {
				377	switch (node->kind) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	378	case BoolOp_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	379	fstring_shift_seq_locations(node, node->v.BoolOp.values, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	380	break;
				381	case NamedExpr_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	382	shift_expr(node, node->v.NamedExpr.target, lineno, col_offset);
				383	shift_expr(node, node->v.NamedExpr.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	384	break;
				385	case BinOp_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	386	shift_expr(node, node->v.BinOp.left, lineno, col_offset);
				387	shift_expr(node, node->v.BinOp.right, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	388	break;
				389	case UnaryOp_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	390	shift_expr(node, node->v.UnaryOp.operand, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	391	break;
				392	case Lambda_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	393	fstring_shift_arguments(node, node->v.Lambda.args, lineno, col_offset);
				394	shift_expr(node, node->v.Lambda.body, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	395	break;
				396	case IfExp_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	397	shift_expr(node, node->v.IfExp.test, lineno, col_offset);
				398	shift_expr(node, node->v.IfExp.body, lineno, col_offset);
				399	shift_expr(node, node->v.IfExp.orelse, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	400	break;
				401	case Dict_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	402	fstring_shift_seq_locations(node, node->v.Dict.keys, lineno, col_offset);
				403	fstring_shift_seq_locations(node, node->v.Dict.values, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	404	break;
				405	case Set_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	406	fstring_shift_seq_locations(node, node->v.Set.elts, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	407	break;
				408	case ListComp_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	409	shift_expr(node, node->v.ListComp.elt, lineno, col_offset);
				410	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.ListComp.generators); i < l; i++) {
				411	comprehension_ty comp = asdl_seq_GET(node->v.ListComp.generators, i);
				412	fstring_shift_comprehension(node, comp, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	413	}
				414	break;
				415	case SetComp_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	416	shift_expr(node, node->v.SetComp.elt, lineno, col_offset);
				417	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.SetComp.generators); i < l; i++) {
				418	comprehension_ty comp = asdl_seq_GET(node->v.SetComp.generators, i);
				419	fstring_shift_comprehension(node, comp, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	420	}
				421	break;
				422	case DictComp_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	423	shift_expr(node, node->v.DictComp.key, lineno, col_offset);
				424	shift_expr(node, node->v.DictComp.value, lineno, col_offset);
				425	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.DictComp.generators); i < l; i++) {
				426	comprehension_ty comp = asdl_seq_GET(node->v.DictComp.generators, i);
				427	fstring_shift_comprehension(node, comp, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	428	}
				429	break;
				430	case GeneratorExp_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	431	shift_expr(node, node->v.GeneratorExp.elt, lineno, col_offset);
				432	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.GeneratorExp.generators); i < l; i++) {
				433	comprehension_ty comp = asdl_seq_GET(node->v.GeneratorExp.generators, i);
				434	fstring_shift_comprehension(node, comp, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	435	}
				436	break;
				437	case Await_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	438	shift_expr(node, node->v.Await.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	439	break;
				440	case Yield_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	441	shift_expr(node, node->v.Yield.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	442	break;
				443	case YieldFrom_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	444	shift_expr(node, node->v.YieldFrom.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	445	break;
				446	case Compare_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	447	shift_expr(node, node->v.Compare.left, lineno, col_offset);
				448	fstring_shift_seq_locations(node, node->v.Compare.comparators, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	449	break;
				450	case Call_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	451	shift_expr(node, node->v.Call.func, lineno, col_offset);
				452	fstring_shift_seq_locations(node, node->v.Call.args, lineno, col_offset);
				453	for (Py_ssize_t i = 0, l = asdl_seq_LEN(node->v.Call.keywords); i < l; i++) {
				454	keyword_ty keyword = asdl_seq_GET(node->v.Call.keywords, i);
				455	shift_expr(node, keyword->value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	456	}
				457	break;
				458	case Attribute_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	459	shift_expr(node, node->v.Attribute.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	460	break;
				461	case Subscript_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	462	shift_expr(node, node->v.Subscript.value, lineno, col_offset);
				463	fstring_shift_slice_locations(node, node->v.Subscript.slice, lineno, col_offset);
				464	shift_expr(node, node->v.Subscript.slice, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	465	break;
				466	case Starred_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	467	shift_expr(node, node->v.Starred.value, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	468	break;
				469	case List_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	470	fstring_shift_seq_locations(node, node->v.List.elts, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	471	break;
				472	case Tuple_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	473	fstring_shift_seq_locations(node, node->v.Tuple.elts, lineno, col_offset);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	474	break;
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	475	case JoinedStr_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	476	fstring_shift_seq_locations(node, node->v.JoinedStr.values, lineno, col_offset);
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	477	break;
				478	case FormattedValue_kind:
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	479	shift_expr(node, node->v.FormattedValue.value, lineno, col_offset);
				480	if (node->v.FormattedValue.format_spec) {
				481	shift_expr(node, node->v.FormattedValue.format_spec, lineno, col_offset);
Lysandros Nikolaou	37af21b	2020-04-29 03:43:50 +0300	[diff] [blame]	482	}
				483	break;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	484	default:
				485	return;
				486	}
				487	}
				488
				489	/* Shift locations for the given node and all its children by adding `lineno`
				490	and `col_offset` to existing locations. Note that n is the already parsed
				491	expression. */
				492	static void fstring_shift_expr_locations(expr_ty n, int lineno, int col_offset)
				493	{
				494	n->col_offset = n->col_offset + col_offset;
				495
				496	// The following is needed, in order for nodes spanning across multiple lines
				497	// to be shifted correctly. An example of such a node is a Call node, the closing
				498	// parenthesis of which is not on the same line as its name.
				499	if (n->lineno == n->end_lineno) {
				500	n->end_col_offset = n->end_col_offset + col_offset;
				501	}
				502
				503	fstring_shift_children_locations(n, lineno, col_offset);
				504	n->lineno = n->lineno + lineno;
				505	n->end_lineno = n->end_lineno + lineno;
				506	}
				507
				508	/* Fix locations for the given node and its children.
				509
				510	`parent` is the enclosing node.
				511	`n` is the node which locations are going to be fixed relative to parent.
				512	`expr_str` is the child node's string representation, including braces.
				513	*/
				514	static void
				515	fstring_fix_expr_location(Token parent, expr_ty n, char expr_str)
				516	{
				517	char *substr = NULL;
				518	char *start;
				519	int lines = 0;
				520	int cols = 0;
				521
				522	if (parent && parent->bytes) {
				523	char *parent_str = PyBytes_AsString(parent->bytes);
				524	if (!parent_str) {
				525	return;
				526	}
				527	substr = strstr(parent_str, expr_str);
				528	if (substr) {
				529	// The following is needed, in order to correctly shift the column
				530	// offset, in the case that (disregarding any whitespace) a newline
				531	// immediately follows the opening curly brace of the fstring expression.
				532	int newline_after_brace = 1;
				533	start = substr + 1;
				534	while (start && start != '}' && start != '\n') {
				535	if (start != ' ' && start != '\t' && *start != '\f') {
				536	newline_after_brace = 0;
				537	break;
				538	}
				539	start++;
				540	}
				541
				542	// Account for the characters from the last newline character to our
				543	// left until the beginning of substr.
				544	if (!newline_after_brace) {
				545	start = substr;
				546	while (start > parent_str && *start != '\n') {
				547	start--;
				548	}
				549	cols += (int)(substr - start);
				550	}
				551	/* adjust the start based on the number of newlines encountered
				552	before the f-string expression */
Pablo Galindo	0b7829e	2020-04-23 03:24:25 +0100	[diff] [blame]	553	for (char* p = parent_str; p < substr; p++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	554	if (*p == '\n') {
				555	lines++;
				556	}
				557	}
				558	}
				559	}
				560	fstring_shift_expr_locations(n, lines, cols);
				561	}
				562
				563
				564	/* Compile this expression in to an expr_ty. Add parens around the
				565	expression, in order to allow leading spaces in the expression. */
				566	static expr_ty
				567	fstring_compile_expr(Parser p, const char expr_start, const char *expr_end,
				568	Token *t)
				569	{
				570	expr_ty expr = NULL;
				571	char *str;
				572	Py_ssize_t len;
				573	const char *s;
				574	expr_ty result = NULL;
				575
				576	assert(expr_end >= expr_start);
				577	assert(*(expr_start-1) == '{');
				578	assert(expr_end == '}' \|\| expr_end == '!' \|\| *expr_end == ':' \|\|
				579	*expr_end == '=');
				580
				581	/* If the substring is all whitespace, it's an error. We need to catch this
				582	here, and not when we call PyParser_SimpleParseStringFlagsFilename,
				583	because turning the expression '' in to '()' would go from being invalid
				584	to valid. */
				585	for (s = expr_start; s != expr_end; s++) {
				586	char c = *s;
				587	/* The Python parser ignores only the following whitespace
				588	characters (\r already is converted to \n). */
				589	if (!(c == ' ' \|\| c == '\t' \|\| c == '\n' \|\| c == '\f')) {
				590	break;
				591	}
				592	}
				593	if (s == expr_end) {
				594	RAISE_SYNTAX_ERROR("f-string: empty expression not allowed");
				595	return NULL;
				596	}
				597
				598	len = expr_end - expr_start;
				599	/* Allocate 3 extra bytes: open paren, close paren, null byte. */
Lysandros Nikolaou	5193d0a	2020-06-27 21:35:18 +0300	[diff] [blame^]	600	str = PyMem_Malloc(len + 3);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	601	if (str == NULL) {
				602	PyErr_NoMemory();
				603	return NULL;
				604	}
				605
				606	str[0] = '(';
				607	memcpy(str+1, expr_start, len);
				608	str[len+1] = ')';
				609	str[len+2] = 0;
				610
				611	struct tok_state* tok = PyTokenizer_FromString(str, 1);
				612	if (tok == NULL) {
Lysandros Nikolaou	5193d0a	2020-06-27 21:35:18 +0300	[diff] [blame^]	613	PyMem_Free(str);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	614	return NULL;
				615	}
Lysandros Nikolaou	791a46e	2020-05-26 04:24:31 +0300	[diff] [blame]	616	Py_INCREF(p->tok->filename);
				617	tok->filename = p->tok->filename;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	618
Lysandros Nikolaou	3e0a6f3	2020-05-01 06:27:52 +0300	[diff] [blame]	619	Parser *p2 = _PyPegen_Parser_New(tok, Py_fstring_input, p->flags, p->feature_version,
				620	NULL, p->arena);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	621	p2->starting_lineno = p->starting_lineno + p->tok->first_lineno - 1;
				622	p2->starting_col_offset = p->tok->first_lineno == p->tok->lineno
				623	? p->starting_col_offset + t->col_offset : 0;
				624
				625	expr = _PyPegen_run_parser(p2);
				626
				627	if (expr == NULL) {
				628	goto exit;
				629	}
				630
				631	/* Reuse str to find the correct column offset. */
				632	str[0] = '{';
				633	str[len+1] = '}';
				634	fstring_fix_expr_location(t, expr, str);
				635
				636	result = expr;
				637
				638	exit:
Lysandros Nikolaou	5193d0a	2020-06-27 21:35:18 +0300	[diff] [blame^]	639	PyMem_Free(str);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	640	_PyPegen_Parser_Free(p2);
				641	PyTokenizer_Free(tok);
				642	return result;
				643	}
				644
				645	/* Return -1 on error.
				646
				647	Return 0 if we reached the end of the literal.
				648
				649	Return 1 if we haven't reached the end of the literal, but we want
				650	the caller to process the literal up to this point. Used for
				651	doubled braces.
				652	*/
				653	static int
				654	fstring_find_literal(Parser p, const char str, const char end, int raw,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	655	PyObject *literal, int recurse_lvl, Token t)
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	656	{
				657	/* Get any literal string. It ends when we hit an un-doubled left
				658	brace (which isn't part of a unicode name escape such as
				659	"\N{EULER CONSTANT}"), or the end of the string. */
				660
				661	const char s = str;
				662	const char *literal_start = s;
				663	int result = 0;
				664
				665	assert(*literal == NULL);
				666	while (s < end) {
				667	char ch = *s++;
				668	if (!raw && ch == '\\' && s < end) {
				669	ch = *s++;
				670	if (ch == 'N') {
				671	if (s < end && *s++ == '{') {
				672	while (s < end && *s++ != '}') {
				673	}
				674	continue;
				675	}
				676	break;
				677	}
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	678	if (ch == '{' && warn_invalid_escape_sequence(p, ch, t) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	679	return -1;
				680	}
				681	}
				682	if (ch == '{' \|\| ch == '}') {
				683	/* Check for doubled braces, but only at the top level. If
				684	we checked at every level, then f'{0:{3}}' would fail
				685	with the two closing braces. */
				686	if (recurse_lvl == 0) {
				687	if (s < end && *s == ch) {
				688	/* We're going to tell the caller that the literal ends
				689	here, but that they should continue scanning. But also
				690	skip over the second brace when we resume scanning. */
				691	*str = s + 1;
				692	result = 1;
				693	goto done;
				694	}
				695
				696	/* Where a single '{' is the start of a new expression, a
				697	single '}' is not allowed. */
				698	if (ch == '}') {
				699	*str = s - 1;
				700	RAISE_SYNTAX_ERROR("f-string: single '}' is not allowed");
				701	return -1;
				702	}
				703	}
				704	/* We're either at a '{', which means we're starting another
				705	expression; or a '}', which means we're at the end of this
				706	f-string (for a nested format_spec). */
				707	s--;
				708	break;
				709	}
				710	}
				711	*str = s;
				712	assert(s <= end);
				713	assert(s == end \|\| s == '{' \|\| s == '}');
				714	done:
				715	if (literal_start != s) {
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	716	if (raw) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	717	*literal = PyUnicode_DecodeUTF8Stateful(literal_start,
				718	s - literal_start,
				719	NULL, NULL);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	720	} else {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	721	*literal = decode_unicode_with_escapes(p, literal_start,
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	722	s - literal_start, t);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	723	}
				724	if (!*literal) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	725	return -1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	726	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	727	}
				728	return result;
				729	}
				730
				731	/* Forward declaration because parsing is recursive. */
				732	static expr_ty
				733	fstring_parse(Parser p, const char str, const char end, int raw, int recurse_lvl,
				734	Token first_token, Token t, Token *last_token);
				735
				736	/* Parse the f-string at str, ending at end. We know str starts an
				737	expression (so it must be a '{'). Returns the FormattedValue node, which
				738	includes the expression, conversion character, format_spec expression, and
				739	optionally the text of the expression (if = is used).
				740
				741	Note that I don't do a perfect job here: I don't make sure that a
				742	closing brace doesn't match an opening paren, for example. It
				743	doesn't need to error on all invalid expressions, just correctly
				744	find the end of all valid ones. Any errors inside the expression
				745	will be caught when we parse it later.
				746
				747	*expression is set to the expression. For an '=' "debug" expression,
				748	*expr_text is set to the debug text (the original text of the expression,
				749	including the '=' and any whitespace around it, as a string object). If
				750	not a debug expression, expr_text set to NULL. /
				751	static int
				752	fstring_find_expr(Parser p, const char str, const char end, int raw, int recurse_lvl,
				753	PyObject *expr_text, expr_ty expression, Token *first_token,
				754	Token t, Token last_token)
				755	{
				756	/* Return -1 on error, else 0. */
				757
				758	const char *expr_start;
				759	const char *expr_end;
				760	expr_ty simple_expression;
				761	expr_ty format_spec = NULL; /* Optional format specifier. */
				762	int conversion = -1; /* The conversion char. Use default if not
				763	specified, or !r if using = and no format
				764	spec. */
				765
				766	/* 0 if we're not in a string, else the quote char we're trying to
				767	match (single or double quote). */
				768	char quote_char = 0;
				769
				770	/* If we're inside a string, 1=normal, 3=triple-quoted. */
				771	int string_type = 0;
				772
				773	/* Keep track of nesting level for braces/parens/brackets in
				774	expressions. */
				775	Py_ssize_t nested_depth = 0;
				776	char parenstack[MAXLEVEL];
				777
				778	*expr_text = NULL;
				779
				780	/* Can only nest one level deep. */
				781	if (recurse_lvl >= 2) {
				782	RAISE_SYNTAX_ERROR("f-string: expressions nested too deeply");
				783	goto error;
				784	}
				785
				786	/* The first char must be a left brace, or we wouldn't have gotten
				787	here. Skip over it. */
				788	assert(**str == '{');
				789	*str += 1;
				790
				791	expr_start = *str;
				792	for (; str < end; (str)++) {
				793	char ch;
				794
				795	/* Loop invariants. */
				796	assert(nested_depth >= 0);
				797	assert(str >= expr_start && str < end);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	798	if (quote_char) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	799	assert(string_type == 1 \|\| string_type == 3);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	800	} else {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	801	assert(string_type == 0);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	802	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	803
				804	ch = **str;
				805	/* Nowhere inside an expression is a backslash allowed. */
				806	if (ch == '\\') {
				807	/* Error: can't include a backslash character, inside
				808	parens or strings or not. */
				809	RAISE_SYNTAX_ERROR(
				810	"f-string expression part "
				811	"cannot include a backslash");
				812	goto error;
				813	}
				814	if (quote_char) {
				815	/* We're inside a string. See if we're at the end. */
				816	/* This code needs to implement the same non-error logic
				817	as tok_get from tokenizer.c, at the letter_quote
				818	label. To actually share that code would be a
				819	nightmare. But, it's unlikely to change and is small,
				820	so duplicate it here. Note we don't need to catch all
				821	of the errors, since they'll be caught when parsing the
				822	expression. We just need to match the non-error
				823	cases. Thus we can ignore \n in single-quoted strings,
				824	for example. Or non-terminated strings. */
				825	if (ch == quote_char) {
				826	/* Does this match the string_type (single or triple
				827	quoted)? */
				828	if (string_type == 3) {
				829	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				830	/* We're at the end of a triple quoted string. */
				831	*str += 2;
				832	string_type = 0;
				833	quote_char = 0;
				834	continue;
				835	}
				836	} else {
				837	/* We're at the end of a normal string. */
				838	quote_char = 0;
				839	string_type = 0;
				840	continue;
				841	}
				842	}
				843	} else if (ch == '\'' \|\| ch == '"') {
				844	/* Is this a triple quoted string? */
				845	if (str+2 < end && (str+1) == ch && (*str+2) == ch) {
				846	string_type = 3;
				847	*str += 2;
				848	} else {
				849	/* Start of a normal string. */
				850	string_type = 1;
				851	}
				852	/* Start looking for the end of the string. */
				853	quote_char = ch;
				854	} else if (ch == '[' \|\| ch == '{' \|\| ch == '(') {
				855	if (nested_depth >= MAXLEVEL) {
				856	RAISE_SYNTAX_ERROR("f-string: too many nested parenthesis");
				857	goto error;
				858	}
				859	parenstack[nested_depth] = ch;
				860	nested_depth++;
				861	} else if (ch == '#') {
				862	/* Error: can't include a comment character, inside parens
				863	or not. */
				864	RAISE_SYNTAX_ERROR("f-string expression part cannot include '#'");
				865	goto error;
				866	} else if (nested_depth == 0 &&
				867	(ch == '!' \|\| ch == ':' \|\| ch == '}' \|\|
				868	ch == '=' \|\| ch == '>' \|\| ch == '<')) {
				869	/* See if there's a next character. */
				870	if (*str+1 < end) {
				871	char next = (str+1);
				872
				873	/* For "!=". since '=' is not an allowed conversion character,
				874	nothing is lost in this test. */
				875	if ((ch == '!' && next == '=') \|\| /* != */
				876	(ch == '=' && next == '=') \|\| /* == */
				877	(ch == '<' && next == '=') \|\| /* <= */
				878	(ch == '>' && next == '=') /* >= */
				879	) {
				880	*str += 1;
				881	continue;
				882	}
				883	/* Don't get out of the loop for these, if they're single
				884	chars (not part of 2-char tokens). If by themselves, they
				885	don't end an expression (unlike say '!'). */
				886	if (ch == '>' \|\| ch == '<') {
				887	continue;
				888	}
				889	}
				890
				891	/* Normal way out of this loop. */
				892	break;
				893	} else if (ch == ']' \|\| ch == '}' \|\| ch == ')') {
				894	if (!nested_depth) {
				895	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", ch);
				896	goto error;
				897	}
				898	nested_depth--;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	899	int opening = (unsigned char)parenstack[nested_depth];
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	900	if (!((opening == '(' && ch == ')') \|\|
				901	(opening == '[' && ch == ']') \|\|
				902	(opening == '{' && ch == '}')))
				903	{
				904	RAISE_SYNTAX_ERROR(
				905	"f-string: closing parenthesis '%c' "
				906	"does not match opening parenthesis '%c'",
				907	ch, opening);
				908	goto error;
				909	}
				910	} else {
				911	/* Just consume this char and loop around. */
				912	}
				913	}
				914	expr_end = *str;
				915	/* If we leave this loop in a string or with mismatched parens, we
				916	don't care. We'll get a syntax error when compiling the
				917	expression. But, we can produce a better error message, so
				918	let's just do that.*/
				919	if (quote_char) {
				920	RAISE_SYNTAX_ERROR("f-string: unterminated string");
				921	goto error;
				922	}
				923	if (nested_depth) {
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	924	int opening = (unsigned char)parenstack[nested_depth - 1];
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	925	RAISE_SYNTAX_ERROR("f-string: unmatched '%c'", opening);
				926	goto error;
				927	}
				928
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	929	if (*str >= end) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	930	goto unexpected_end_of_string;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	931	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	932
				933	/* Compile the expression as soon as possible, so we show errors
				934	related to the expression before errors related to the
				935	conversion or format_spec. */
				936	simple_expression = fstring_compile_expr(p, expr_start, expr_end, t);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	937	if (!simple_expression) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	938	goto error;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	939	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	940
				941	/* Check for =, which puts the text value of the expression in
				942	expr_text. */
				943	if (**str == '=') {
Pablo Galindo	9b83829	2020-05-27 22:01:11 +0100	[diff] [blame]	944	if (p->feature_version < 8) {
				945	RAISE_SYNTAX_ERROR("f-string: self documenting expressions are "
				946	"only supported in Python 3.8 and greater");
				947	goto error;
				948	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	949	*str += 1;
				950
				951	/* Skip over ASCII whitespace. No need to test for end of string
				952	here, since we know there's at least a trailing quote somewhere
				953	ahead. */
				954	while (Py_ISSPACE(**str)) {
				955	*str += 1;
				956	}
				957
				958	/* Set expr_text to the text of the expression. /
				959	expr_text = PyUnicode_FromStringAndSize(expr_start, str-expr_start);
				960	if (!*expr_text) {
				961	goto error;
				962	}
				963	}
				964
				965	/* Check for a conversion char, if present. */
				966	if (**str == '!') {
				967	*str += 1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	968	if (*str >= end) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	969	goto unexpected_end_of_string;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	970	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	971
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	972	conversion = (unsigned char)**str;
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	973	*str += 1;
				974
				975	/* Validate the conversion. */
				976	if (!(conversion == 's' \|\| conversion == 'r' \|\| conversion == 'a')) {
				977	RAISE_SYNTAX_ERROR(
				978	"f-string: invalid conversion character: "
				979	"expected 's', 'r', or 'a'");
				980	goto error;
				981	}
				982
				983	}
				984
				985	/* Check for the format spec, if present. */
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	986	if (*str >= end) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	987	goto unexpected_end_of_string;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	988	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	989	if (**str == ':') {
				990	*str += 1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	991	if (*str >= end) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	992	goto unexpected_end_of_string;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	993	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	994
				995	/* Parse the format spec. */
				996	format_spec = fstring_parse(p, str, end, raw, recurse_lvl+1,
				997	first_token, t, last_token);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	998	if (!format_spec) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	999	goto error;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1000	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1001	}
				1002
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1003	if (str >= end \|\| *str != '}') {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1004	goto unexpected_end_of_string;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1005	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1006
				1007	/* We're at a right brace. Consume it. */
				1008	assert(*str < end);
				1009	assert(**str == '}');
				1010	*str += 1;
				1011
				1012	/* If we're in = mode (detected by non-NULL expr_text), and have no format
				1013	spec and no explicit conversion, set the conversion to 'r'. */
				1014	if (*expr_text && format_spec == NULL && conversion == -1) {
				1015	conversion = 'r';
				1016	}
				1017
				1018	/* And now create the FormattedValue node that represents this
				1019	entire expression with the conversion and format spec. */
				1020	//TODO: Fix this
				1021	*expression = FormattedValue(simple_expression, conversion,
				1022	format_spec, first_token->lineno,
				1023	first_token->col_offset, last_token->end_lineno,
				1024	last_token->end_col_offset, p->arena);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1025	if (!*expression) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1026	goto error;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1027	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1028
				1029	return 0;
				1030
				1031	unexpected_end_of_string:
				1032	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1033	/* Falls through to error. */
				1034
				1035	error:
				1036	Py_XDECREF(*expr_text);
				1037	return -1;
				1038
				1039	}
				1040
				1041	/* Return -1 on error.
				1042
				1043	Return 0 if we have a literal (possible zero length) and an
				1044	expression (zero length if at the end of the string.
				1045
				1046	Return 1 if we have a literal, but no expression, and we want the
				1047	caller to call us again. This is used to deal with doubled
				1048	braces.
				1049
				1050	When called multiple times on the string 'a{{b{0}c', this function
				1051	will return:
				1052
				1053	1. the literal 'a{' with no expression, and a return value
				1054	of 1. Despite the fact that there's no expression, the return
				1055	value of 1 means we're not finished yet.
				1056
				1057	2. the literal 'b' and the expression '0', with a return value of
				1058	0. The fact that there's an expression means we're not finished.
				1059
				1060	3. literal 'c' with no expression and a return value of 0. The
				1061	combination of the return value of 0 with no expression means
				1062	we're finished.
				1063	*/
				1064	static int
				1065	fstring_find_literal_and_expr(Parser p, const char str, const char end, int raw,
				1066	int recurse_lvl, PyObject **literal,
				1067	PyObject *expr_text, expr_ty expression,
				1068	Token first_token, Token t, Token *last_token)
				1069	{
				1070	int result;
				1071
				1072	assert(literal == NULL && expression == NULL);
				1073
				1074	/* Get any literal string. */
Lysandros Nikolaou	2f37c35	2020-05-07 13:37:51 +0300	[diff] [blame]	1075	result = fstring_find_literal(p, str, end, raw, literal, recurse_lvl, t);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1076	if (result < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1077	goto error;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1078	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1079
				1080	assert(result == 0 \|\| result == 1);
				1081
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1082	if (result == 1) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1083	/* We have a literal, but don't look at the expression. */
				1084	return 1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1085	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1086
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1087	if (str >= end \|\| *str == '}') {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1088	/* We're at the end of the string or the end of a nested
				1089	f-string: no expression. The top-level error case where we
				1090	expect to be at the end of the string but we're at a '}' is
				1091	handled later. */
				1092	return 0;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1093	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1094
				1095	/* We must now be the start of an expression, on a '{'. */
				1096	assert(**str == '{');
				1097
				1098	if (fstring_find_expr(p, str, end, raw, recurse_lvl, expr_text,
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1099	expression, first_token, t, last_token) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1100	goto error;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1101	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1102
				1103	return 0;
				1104
				1105	error:
				1106	Py_CLEAR(*literal);
				1107	return -1;
				1108	}
				1109
				1110	#ifdef NDEBUG
				1111	#define ExprList_check_invariants(l)
				1112	#else
				1113	static void
				1114	ExprList_check_invariants(ExprList *l)
				1115	{
				1116	/* Check our invariants. Make sure this object is "live", and
				1117	hasn't been deallocated. */
				1118	assert(l->size >= 0);
				1119	assert(l->p != NULL);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1120	if (l->size <= EXPRLIST_N_CACHED) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1121	assert(l->data == l->p);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1122	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1123	}
				1124	#endif
				1125
				1126	static void
				1127	ExprList_Init(ExprList *l)
				1128	{
				1129	l->allocated = EXPRLIST_N_CACHED;
				1130	l->size = 0;
				1131
				1132	/* Until we start allocating dynamically, p points to data. */
				1133	l->p = l->data;
				1134
				1135	ExprList_check_invariants(l);
				1136	}
				1137
				1138	static int
				1139	ExprList_Append(ExprList *l, expr_ty exp)
				1140	{
				1141	ExprList_check_invariants(l);
				1142	if (l->size >= l->allocated) {
				1143	/* We need to alloc (or realloc) the memory. */
				1144	Py_ssize_t new_size = l->allocated * 2;
				1145
				1146	/* See if we've ever allocated anything dynamically. */
				1147	if (l->p == l->data) {
				1148	Py_ssize_t i;
				1149	/* We're still using the cached data. Switch to
				1150	alloc-ing. */
Lysandros Nikolaou	5193d0a	2020-06-27 21:35:18 +0300	[diff] [blame^]	1151	l->p = PyMem_Malloc(sizeof(expr_ty) * new_size);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1152	if (!l->p) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1153	return -1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1154	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1155	/* Copy the cached data into the new buffer. */
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1156	for (i = 0; i < l->size; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1157	l->p[i] = l->data[i];
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1158	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1159	} else {
				1160	/* Just realloc. */
Lysandros Nikolaou	5193d0a	2020-06-27 21:35:18 +0300	[diff] [blame^]	1161	expr_ty tmp = PyMem_Realloc(l->p, sizeof(expr_ty) new_size);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1162	if (!tmp) {
Lysandros Nikolaou	5193d0a	2020-06-27 21:35:18 +0300	[diff] [blame^]	1163	PyMem_Free(l->p);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1164	l->p = NULL;
				1165	return -1;
				1166	}
				1167	l->p = tmp;
				1168	}
				1169
				1170	l->allocated = new_size;
				1171	assert(l->allocated == 2 * l->size);
				1172	}
				1173
				1174	l->p[l->size++] = exp;
				1175
				1176	ExprList_check_invariants(l);
				1177	return 0;
				1178	}
				1179
				1180	static void
				1181	ExprList_Dealloc(ExprList *l)
				1182	{
				1183	ExprList_check_invariants(l);
				1184
				1185	/* If there's been an error, or we've never dynamically allocated,
				1186	do nothing. */
				1187	if (!l->p \|\| l->p == l->data) {
				1188	/* Do nothing. */
				1189	} else {
				1190	/* We have dynamically allocated. Free the memory. */
Lysandros Nikolaou	5193d0a	2020-06-27 21:35:18 +0300	[diff] [blame^]	1191	PyMem_Free(l->p);
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1192	}
				1193	l->p = NULL;
				1194	l->size = -1;
				1195	}
				1196
				1197	static asdl_seq *
				1198	ExprList_Finish(ExprList l, PyArena arena)
				1199	{
				1200	asdl_seq *seq;
				1201
				1202	ExprList_check_invariants(l);
				1203
				1204	/* Allocate the asdl_seq and copy the expressions in to it. */
				1205	seq = _Py_asdl_seq_new(l->size, arena);
				1206	if (seq) {
				1207	Py_ssize_t i;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1208	for (i = 0; i < l->size; i++) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1209	asdl_seq_SET(seq, i, l->p[i]);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1210	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1211	}
				1212	ExprList_Dealloc(l);
				1213	return seq;
				1214	}
				1215
				1216	#ifdef NDEBUG
				1217	#define FstringParser_check_invariants(state)
				1218	#else
				1219	static void
				1220	FstringParser_check_invariants(FstringParser *state)
				1221	{
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1222	if (state->last_str) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1223	assert(PyUnicode_CheckExact(state->last_str));
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1224	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1225	ExprList_check_invariants(&state->expr_list);
				1226	}
				1227	#endif
				1228
				1229	void
				1230	_PyPegen_FstringParser_Init(FstringParser *state)
				1231	{
				1232	state->last_str = NULL;
				1233	state->fmode = 0;
				1234	ExprList_Init(&state->expr_list);
				1235	FstringParser_check_invariants(state);
				1236	}
				1237
				1238	void
				1239	_PyPegen_FstringParser_Dealloc(FstringParser *state)
				1240	{
				1241	FstringParser_check_invariants(state);
				1242
				1243	Py_XDECREF(state->last_str);
				1244	ExprList_Dealloc(&state->expr_list);
				1245	}
				1246
				1247	/* Make a Constant node, but decref the PyUnicode object being added. */
				1248	static expr_ty
				1249	make_str_node_and_del(Parser p, PyObject str, Token first_token, Token *last_token)
				1250	{
				1251	PyObject s = str;
				1252	PyObject *kind = NULL;
				1253	*str = NULL;
				1254	assert(PyUnicode_CheckExact(s));
				1255	if (PyArena_AddPyObject(p->arena, s) < 0) {
				1256	Py_DECREF(s);
				1257	return NULL;
				1258	}
				1259	const char* the_str = PyBytes_AsString(first_token->bytes);
				1260	if (the_str && the_str[0] == 'u') {
				1261	kind = _PyPegen_new_identifier(p, "u");
				1262	}
				1263
				1264	if (kind == NULL && PyErr_Occurred()) {
				1265	return NULL;
				1266	}
				1267
				1268	return Constant(s, kind, first_token->lineno, first_token->col_offset,
				1269	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1270
				1271	}
				1272
				1273
				1274	/* Add a non-f-string (that is, a regular literal string). str is
				1275	decref'd. */
				1276	int
				1277	_PyPegen_FstringParser_ConcatAndDel(FstringParser state, PyObject str)
				1278	{
				1279	FstringParser_check_invariants(state);
				1280
				1281	assert(PyUnicode_CheckExact(str));
				1282
				1283	if (PyUnicode_GET_LENGTH(str) == 0) {
				1284	Py_DECREF(str);
				1285	return 0;
				1286	}
				1287
				1288	if (!state->last_str) {
				1289	/* We didn't have a string before, so just remember this one. */
				1290	state->last_str = str;
				1291	} else {
				1292	/* Concatenate this with the previous string. */
				1293	PyUnicode_AppendAndDel(&state->last_str, str);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1294	if (!state->last_str) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1295	return -1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1296	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1297	}
				1298	FstringParser_check_invariants(state);
				1299	return 0;
				1300	}
				1301
				1302	/* Parse an f-string. The f-string is in *str to end, with no
				1303	'f' or quotes. */
				1304	int
				1305	_PyPegen_FstringParser_ConcatFstring(Parser p, FstringParser state, const char **str,
				1306	const char *end, int raw, int recurse_lvl,
				1307	Token first_token, Token t, Token *last_token)
				1308	{
				1309	FstringParser_check_invariants(state);
				1310	state->fmode = 1;
				1311
				1312	/* Parse the f-string. */
				1313	while (1) {
				1314	PyObject *literal = NULL;
				1315	PyObject *expr_text = NULL;
				1316	expr_ty expression = NULL;
				1317
				1318	/* If there's a zero length literal in front of the
				1319	expression, literal will be NULL. If we're at the end of
				1320	the f-string, expression will be NULL (unless result == 1,
				1321	see below). */
				1322	int result = fstring_find_literal_and_expr(p, str, end, raw, recurse_lvl,
				1323	&literal, &expr_text,
				1324	&expression, first_token, t, last_token);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1325	if (result < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1326	return -1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1327	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1328
				1329	/* Add the literal, if any. */
				1330	if (literal && _PyPegen_FstringParser_ConcatAndDel(state, literal) < 0) {
				1331	Py_XDECREF(expr_text);
				1332	return -1;
				1333	}
				1334	/* Add the expr_text, if any. */
				1335	if (expr_text && _PyPegen_FstringParser_ConcatAndDel(state, expr_text) < 0) {
				1336	return -1;
				1337	}
				1338
				1339	/* We've dealt with the literal and expr_text, their ownership has
				1340	been transferred to the state object. Don't look at them again. */
				1341
				1342	/* See if we should just loop around to get the next literal
				1343	and expression, while ignoring the expression this
				1344	time. This is used for un-doubling braces, as an
				1345	optimization. */
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1346	if (result == 1) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1347	continue;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1348	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1349
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1350	if (!expression) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1351	/* We're done with this f-string. */
				1352	break;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1353	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1354
				1355	/* We know we have an expression. Convert any existing string
				1356	to a Constant node. */
				1357	if (!state->last_str) {
				1358	/* Do nothing. No previous literal. */
				1359	} else {
				1360	/* Convert the existing last_str literal to a Constant node. */
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1361	expr_ty last_str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1362	if (!last_str \|\| ExprList_Append(&state->expr_list, last_str) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1363	return -1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1364	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1365	}
				1366
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1367	if (ExprList_Append(&state->expr_list, expression) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1368	return -1;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1369	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1370	}
				1371
				1372	/* If recurse_lvl is zero, then we must be at the end of the
				1373	string. Otherwise, we must be at a right brace. */
				1374
				1375	if (recurse_lvl == 0 && *str < end-1) {
				1376	RAISE_SYNTAX_ERROR("f-string: unexpected end of string");
				1377	return -1;
				1378	}
				1379	if (recurse_lvl != 0 && **str != '}') {
				1380	RAISE_SYNTAX_ERROR("f-string: expecting '}'");
				1381	return -1;
				1382	}
				1383
				1384	FstringParser_check_invariants(state);
				1385	return 0;
				1386	}
				1387
				1388	/* Convert the partial state reflected in last_str and expr_list to an
				1389	expr_ty. The expr_ty can be a Constant, or a JoinedStr. */
				1390	expr_ty
				1391	_PyPegen_FstringParser_Finish(Parser p, FstringParser state, Token* first_token,
				1392	Token *last_token)
				1393	{
				1394	asdl_seq *seq;
				1395
				1396	FstringParser_check_invariants(state);
				1397
				1398	/* If we're just a constant string with no expressions, return
				1399	that. */
				1400	if (!state->fmode) {
				1401	assert(!state->expr_list.size);
				1402	if (!state->last_str) {
				1403	/* Create a zero length string. */
				1404	state->last_str = PyUnicode_FromStringAndSize(NULL, 0);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1405	if (!state->last_str) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1406	goto error;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1407	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1408	}
				1409	return make_str_node_and_del(p, &state->last_str, first_token, last_token);
				1410	}
				1411
				1412	/* Create a Constant node out of last_str, if needed. It will be the
				1413	last node in our expression list. */
				1414	if (state->last_str) {
				1415	expr_ty str = make_str_node_and_del(p, &state->last_str, first_token, last_token);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1416	if (!str \|\| ExprList_Append(&state->expr_list, str) < 0) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1417	goto error;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1418	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1419	}
				1420	/* This has already been freed. */
				1421	assert(state->last_str == NULL);
				1422
				1423	seq = ExprList_Finish(&state->expr_list, p->arena);
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1424	if (!seq) {
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1425	goto error;
Pablo Galindo	30b59fd	2020-06-15 15:08:00 +0100	[diff] [blame]	1426	}
Pablo Galindo	c5fc156	2020-04-22 23:29:27 +0100	[diff] [blame]	1427
				1428	return _Py_JoinedStr(seq, first_token->lineno, first_token->col_offset,
				1429	last_token->end_lineno, last_token->end_col_offset, p->arena);
				1430
				1431	error:
				1432	_PyPegen_FstringParser_Dealloc(state);
				1433	return NULL;
				1434	}
				1435
				1436	/* Given an f-string (with no 'f' or quotes) that's in *str and ends
				1437	at end, parse it into an expr_ty. Return NULL on error. Adjust
				1438	str to point past the parsed portion. */
				1439	static expr_ty
				1440	fstring_parse(Parser p, const char str, const char end, int raw,
				1441	int recurse_lvl, Token first_token, Token t, Token *last_token)
				1442	{
				1443	FstringParser state;
				1444
				1445	_PyPegen_FstringParser_Init(&state);
				1446	if (_PyPegen_FstringParser_ConcatFstring(p, &state, str, end, raw, recurse_lvl,
				1447	first_token, t, last_token) < 0) {
				1448	_PyPegen_FstringParser_Dealloc(&state);
				1449	return NULL;
				1450	}
				1451
				1452	return _PyPegen_FstringParser_Finish(p, &state, t, t);
				1453	}