bpo-37050: Remove expr_text from FormattedValue ast node, use Constant node instead (GH-13597)
When using the "=" debug functionality of f-strings, use another Constant node (or a merged constant node) instead of adding expr_text to the FormattedValue node.
diff --git a/Python/Python-ast.c b/Python/Python-ast.c
index 39a40ee..7c8e438 100644
--- a/Python/Python-ast.c
+++ b/Python/Python-ast.c
@@ -314,12 +314,10 @@
static PyTypeObject *FormattedValue_type;
_Py_IDENTIFIER(conversion);
_Py_IDENTIFIER(format_spec);
-_Py_IDENTIFIER(expr_text);
static char *FormattedValue_fields[]={
"value",
"conversion",
"format_spec",
- "expr_text",
};
static PyTypeObject *JoinedStr_type;
static char *JoinedStr_fields[]={
@@ -954,7 +952,7 @@
Call_type = make_type("Call", expr_type, Call_fields, 3);
if (!Call_type) return 0;
FormattedValue_type = make_type("FormattedValue", expr_type,
- FormattedValue_fields, 4);
+ FormattedValue_fields, 3);
if (!FormattedValue_type) return 0;
JoinedStr_type = make_type("JoinedStr", expr_type, JoinedStr_fields, 1);
if (!JoinedStr_type) return 0;
@@ -2253,9 +2251,9 @@
}
expr_ty
-FormattedValue(expr_ty value, int conversion, expr_ty format_spec, string
- expr_text, int lineno, int col_offset, int end_lineno, int
- end_col_offset, PyArena *arena)
+FormattedValue(expr_ty value, int conversion, expr_ty format_spec, int lineno,
+ int col_offset, int end_lineno, int end_col_offset, PyArena
+ *arena)
{
expr_ty p;
if (!value) {
@@ -2270,7 +2268,6 @@
p->v.FormattedValue.value = value;
p->v.FormattedValue.conversion = conversion;
p->v.FormattedValue.format_spec = format_spec;
- p->v.FormattedValue.expr_text = expr_text;
p->lineno = lineno;
p->col_offset = col_offset;
p->end_lineno = end_lineno;
@@ -3507,11 +3504,6 @@
if (_PyObject_SetAttrId(result, &PyId_format_spec, value) == -1)
goto failed;
Py_DECREF(value);
- value = ast2obj_string(o->v.FormattedValue.expr_text);
- if (!value) goto failed;
- if (_PyObject_SetAttrId(result, &PyId_expr_text, value) == -1)
- goto failed;
- Py_DECREF(value);
break;
case JoinedStr_kind:
result = PyType_GenericNew(JoinedStr_type, NULL, NULL);
@@ -7169,7 +7161,6 @@
expr_ty value;
int conversion;
expr_ty format_spec;
- string expr_text;
if (_PyObject_LookupAttrId(obj, &PyId_value, &tmp) < 0) {
return 1;
@@ -7210,22 +7201,8 @@
if (res != 0) goto failed;
Py_CLEAR(tmp);
}
- if (_PyObject_LookupAttrId(obj, &PyId_expr_text, &tmp) < 0) {
- return 1;
- }
- if (tmp == NULL || tmp == Py_None) {
- Py_CLEAR(tmp);
- expr_text = NULL;
- }
- else {
- int res;
- res = obj2ast_string(tmp, &expr_text, arena);
- if (res != 0) goto failed;
- Py_CLEAR(tmp);
- }
- *out = FormattedValue(value, conversion, format_spec, expr_text,
- lineno, col_offset, end_lineno, end_col_offset,
- arena);
+ *out = FormattedValue(value, conversion, format_spec, lineno,
+ col_offset, end_lineno, end_col_offset, arena);
if (*out == NULL) goto failed;
return 0;
}
diff --git a/Python/ast.c b/Python/ast.c
index 6259827..7ffdf4a 100644
--- a/Python/ast.c
+++ b/Python/ast.c
@@ -5006,10 +5006,16 @@
closing brace doesn't match an opening paren, for example. It
doesn't need to error on all invalid expressions, just correctly
find the end of all valid ones. Any errors inside the expression
- will be caught when we parse it later. */
+ will be caught when we parse it later.
+
+ *expression is set to the expression. For an '=' "debug" expression,
+ *expr_text is set to the debug text (the original text of the expression,
+ *including the '=' and any whitespace around it, as a string object). If
+ *not a debug expression, *expr_text set to NULL. */
static int
fstring_find_expr(const char **str, const char *end, int raw, int recurse_lvl,
- expr_ty *expression, struct compiling *c, const node *n)
+ PyObject **expr_text, expr_ty *expression,
+ struct compiling *c, const node *n)
{
/* Return -1 on error, else 0. */
@@ -5020,9 +5026,6 @@
int conversion = -1; /* The conversion char. Use default if not
specified, or !r if using = and no format
spec. */
- int equal_flag = 0; /* Are we using the = feature? */
- PyObject *expr_text = NULL; /* The text of the expression, used for =. */
- const char *expr_text_end;
/* 0 if we're not in a string, else the quote char we're trying to
match (single or double quote). */
@@ -5198,7 +5201,6 @@
expr_text. */
if (**str == '=') {
*str += 1;
- equal_flag = 1;
/* Skip over ASCII whitespace. No need to test for end of string
here, since we know there's at least a trailing quote somewhere
@@ -5206,7 +5208,14 @@
while (Py_ISSPACE(**str)) {
*str += 1;
}
- expr_text_end = *str;
+
+ /* Set *expr_text to the text of the expression. */
+ *expr_text = PyUnicode_FromStringAndSize(expr_start, *str-expr_start);
+ if (!*expr_text) {
+ goto error;
+ }
+ } else {
+ *expr_text = NULL;
}
/* Check for a conversion char, if present. */
@@ -5227,17 +5236,6 @@
}
}
- if (equal_flag) {
- Py_ssize_t len = expr_text_end - expr_start;
- expr_text = PyUnicode_FromStringAndSize(expr_start, len);
- if (!expr_text) {
- goto error;
- }
- if (PyArena_AddPyObject(c->c_arena, expr_text) < 0) {
- Py_DECREF(expr_text);
- goto error;
- }
- }
/* Check for the format spec, if present. */
if (*str >= end)
@@ -5261,16 +5259,16 @@
assert(**str == '}');
*str += 1;
- /* If we're in = mode, and have no format spec and no explict conversion,
- set the conversion to 'r'. */
- if (equal_flag && format_spec == NULL && conversion == -1) {
+ /* If we're in = mode (detected by non-NULL expr_text), and have no format
+ spec and no explict conversion, set the conversion to 'r'. */
+ if (*expr_text && format_spec == NULL && conversion == -1) {
conversion = 'r';
}
/* And now create the FormattedValue node that represents this
entire expression with the conversion and format spec. */
*expression = FormattedValue(simple_expression, conversion,
- format_spec, expr_text, LINENO(n),
+ format_spec, LINENO(n),
n->n_col_offset, n->n_end_lineno,
n->n_end_col_offset, c->c_arena);
if (!*expression)
@@ -5313,7 +5311,7 @@
static int
fstring_find_literal_and_expr(const char **str, const char *end, int raw,
int recurse_lvl, PyObject **literal,
- expr_ty *expression,
+ PyObject **expr_text, expr_ty *expression,
struct compiling *c, const node *n)
{
int result;
@@ -5341,7 +5339,8 @@
/* We must now be the start of an expression, on a '{'. */
assert(**str == '{');
- if (fstring_find_expr(str, end, raw, recurse_lvl, expression, c, n) < 0)
+ if (fstring_find_expr(str, end, raw, recurse_lvl, expr_text,
+ expression, c, n) < 0)
goto error;
return 0;
@@ -5604,7 +5603,7 @@
/* Parse the f-string. */
while (1) {
- PyObject *literal = NULL;
+ PyObject *literal[2] = {NULL, NULL};
expr_ty expression = NULL;
/* If there's a zero length literal in front of the
@@ -5612,31 +5611,34 @@
the f-string, expression will be NULL (unless result == 1,
see below). */
int result = fstring_find_literal_and_expr(str, end, raw, recurse_lvl,
- &literal, &expression,
- c, n);
+ &literal[0], &literal[1],
+ &expression, c, n);
if (result < 0)
return -1;
- /* Add the literal, if any. */
- if (!literal) {
- /* Do nothing. Just leave last_str alone (and possibly
- NULL). */
- } else if (!state->last_str) {
- /* Note that the literal can be zero length, if the
- input string is "\\\n" or "\\\r", among others. */
- state->last_str = literal;
- literal = NULL;
- } else {
- /* We have a literal, concatenate it. */
- assert(PyUnicode_GET_LENGTH(literal) != 0);
- if (FstringParser_ConcatAndDel(state, literal) < 0)
- return -1;
- literal = NULL;
+ /* Add the literals, if any. */
+ for (int i = 0; i < 2; i++) {
+ if (!literal[i]) {
+ /* Do nothing. Just leave last_str alone (and possibly
+ NULL). */
+ } else if (!state->last_str) {
+ /* Note that the literal can be zero length, if the
+ input string is "\\\n" or "\\\r", among others. */
+ state->last_str = literal[i];
+ literal[i] = NULL;
+ } else {
+ /* We have a literal, concatenate it. */
+ assert(PyUnicode_GET_LENGTH(literal[i]) != 0);
+ if (FstringParser_ConcatAndDel(state, literal[i]) < 0)
+ return -1;
+ literal[i] = NULL;
+ }
}
- /* We've dealt with the literal now. It can't be leaked on further
+ /* We've dealt with the literals now. They can't be leaked on further
errors. */
- assert(literal == NULL);
+ assert(literal[0] == NULL);
+ assert(literal[1] == NULL);
/* See if we should just loop around to get the next literal
and expression, while ignoring the expression this
diff --git a/Python/ast_unparse.c b/Python/ast_unparse.c
index f1b991a..f376e86 100644
--- a/Python/ast_unparse.c
+++ b/Python/ast_unparse.c
@@ -665,11 +665,6 @@
}
Py_DECREF(temp_fv_str);
- if (e->v.FormattedValue.expr_text) {
- /* Use the = for debug text expansion. */
- APPEND_STR("=");
- }
-
if (e->v.FormattedValue.conversion > 0) {
switch (e->v.FormattedValue.conversion) {
case 'a':
diff --git a/Python/compile.c b/Python/compile.c
index 425d0d6..f1c97bd 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -3963,12 +3963,6 @@
int conversion = e->v.FormattedValue.conversion;
int oparg;
- if (e->v.FormattedValue.expr_text) {
- /* Push the text of the expression (which already has the '=' in
- it. */
- ADDOP_LOAD_CONST(c, e->v.FormattedValue.expr_text);
- }
-
/* The expression to be formatted. */
VISIT(c, expr, e->v.FormattedValue.value);
@@ -3991,11 +3985,6 @@
/* And push our opcode and oparg */
ADDOP_I(c, FORMAT_VALUE, oparg);
- /* If we have expr_text, join the 2 strings on the stack. */
- if (e->v.FormattedValue.expr_text) {
- ADDOP_I(c, BUILD_STRING, 2);
- }
-
return 1;
}