bpo-42246: Partial implementation of PEP 626. (GH-23113)
* Implement new line number table format, as defined in PEP 626.
diff --git a/Objects/clinic/codeobject.c.h b/Objects/clinic/codeobject.c.h
index c739537..bae2ab0 100644
--- a/Objects/clinic/codeobject.c.h
+++ b/Objects/clinic/codeobject.c.h
@@ -5,7 +5,7 @@ preserve
PyDoc_STRVAR(code_new__doc__,
"code(argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize,\n"
" flags, codestring, constants, names, varnames, filename, name,\n"
-" firstlineno, lnotab, freevars=(), cellvars=(), /)\n"
+" firstlineno, linetable, freevars=(), cellvars=(), /)\n"
"--\n"
"\n"
"Create a code object. Not for the faint of heart.");
@@ -15,7 +15,7 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount,
int kwonlyargcount, int nlocals, int stacksize, int flags,
PyObject *code, PyObject *consts, PyObject *names,
PyObject *varnames, PyObject *filename, PyObject *name,
- int firstlineno, PyObject *lnotab, PyObject *freevars,
+ int firstlineno, PyObject *linetable, PyObject *freevars,
PyObject *cellvars);
static PyObject *
@@ -35,7 +35,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
PyObject *filename;
PyObject *name;
int firstlineno;
- PyObject *lnotab;
+ PyObject *linetable;
PyObject *freevars = NULL;
PyObject *cellvars = NULL;
@@ -114,7 +114,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
_PyArg_BadArgument("code", "argument 14", "bytes", PyTuple_GET_ITEM(args, 13));
goto exit;
}
- lnotab = PyTuple_GET_ITEM(args, 13);
+ linetable = PyTuple_GET_ITEM(args, 13);
if (PyTuple_GET_SIZE(args) < 15) {
goto skip_optional;
}
@@ -132,7 +132,7 @@ code_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
}
cellvars = PyTuple_GET_ITEM(args, 15);
skip_optional:
- return_value = code_new_impl(type, argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, filename, name, firstlineno, lnotab, freevars, cellvars);
+ return_value = code_new_impl(type, argcount, posonlyargcount, kwonlyargcount, nlocals, stacksize, flags, code, consts, names, varnames, filename, name, firstlineno, linetable, freevars, cellvars);
exit:
return return_value;
@@ -144,7 +144,7 @@ PyDoc_STRVAR(code_replace__doc__,
" co_flags=-1, co_firstlineno=-1, co_code=None, co_consts=None,\n"
" co_names=None, co_varnames=None, co_freevars=None,\n"
" co_cellvars=None, co_filename=None, co_name=None,\n"
-" co_lnotab=None)\n"
+" co_linetable=None)\n"
"--\n"
"\n"
"Return a copy of the code object with new values for the specified fields.");
@@ -160,13 +160,13 @@ code_replace_impl(PyCodeObject *self, int co_argcount,
PyObject *co_consts, PyObject *co_names,
PyObject *co_varnames, PyObject *co_freevars,
PyObject *co_cellvars, PyObject *co_filename,
- PyObject *co_name, PyBytesObject *co_lnotab);
+ PyObject *co_name, PyBytesObject *co_linetable);
static PyObject *
code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames)
{
PyObject *return_value = NULL;
- static const char * const _keywords[] = {"co_argcount", "co_posonlyargcount", "co_kwonlyargcount", "co_nlocals", "co_stacksize", "co_flags", "co_firstlineno", "co_code", "co_consts", "co_names", "co_varnames", "co_freevars", "co_cellvars", "co_filename", "co_name", "co_lnotab", NULL};
+ static const char * const _keywords[] = {"co_argcount", "co_posonlyargcount", "co_kwonlyargcount", "co_nlocals", "co_stacksize", "co_flags", "co_firstlineno", "co_code", "co_consts", "co_names", "co_varnames", "co_freevars", "co_cellvars", "co_filename", "co_name", "co_linetable", NULL};
static _PyArg_Parser _parser = {NULL, _keywords, "replace", 0};
PyObject *argsbuf[16];
Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 0;
@@ -185,7 +185,7 @@ code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje
PyObject *co_cellvars = self->co_cellvars;
PyObject *co_filename = self->co_filename;
PyObject *co_name = self->co_name;
- PyBytesObject *co_lnotab = (PyBytesObject *)self->co_lnotab;
+ PyBytesObject *co_linetable = (PyBytesObject *)self->co_linetable;
args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 0, 0, 0, argsbuf);
if (!args) {
@@ -344,14 +344,14 @@ code_replace(PyCodeObject *self, PyObject *const *args, Py_ssize_t nargs, PyObje
}
}
if (!PyBytes_Check(args[15])) {
- _PyArg_BadArgument("replace", "argument 'co_lnotab'", "bytes", args[15]);
+ _PyArg_BadArgument("replace", "argument 'co_linetable'", "bytes", args[15]);
goto exit;
}
- co_lnotab = (PyBytesObject *)args[15];
+ co_linetable = (PyBytesObject *)args[15];
skip_optional_kwonly:
- return_value = code_replace_impl(self, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_firstlineno, co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, co_lnotab);
+ return_value = code_replace_impl(self, co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals, co_stacksize, co_flags, co_firstlineno, co_code, co_consts, co_names, co_varnames, co_freevars, co_cellvars, co_filename, co_name, co_linetable);
exit:
return return_value;
}
-/*[clinic end generated code: output=18c31941ec09e9ca input=a9049054013a1b77]*/
+/*[clinic end generated code: output=e3091c7baaaaa420 input=a9049054013a1b77]*/
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index c86d0e1..7b224cc 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -119,7 +119,7 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
PyObject *code, PyObject *consts, PyObject *names,
PyObject *varnames, PyObject *freevars, PyObject *cellvars,
PyObject *filename, PyObject *name, int firstlineno,
- PyObject *lnotab)
+ PyObject *linetable)
{
PyCodeObject *co;
Py_ssize_t *cell2arg = NULL;
@@ -137,7 +137,7 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
cellvars == NULL || !PyTuple_Check(cellvars) ||
name == NULL || !PyUnicode_Check(name) ||
filename == NULL || !PyUnicode_Check(filename) ||
- lnotab == NULL || !PyBytes_Check(lnotab)) {
+ linetable == NULL || !PyBytes_Check(linetable)) {
PyErr_BadInternalCall();
return NULL;
}
@@ -258,8 +258,8 @@ PyCode_NewWithPosOnlyArgs(int argcount, int posonlyargcount, int kwonlyargcount,
Py_INCREF(name);
co->co_name = name;
co->co_firstlineno = firstlineno;
- Py_INCREF(lnotab);
- co->co_lnotab = lnotab;
+ Py_INCREF(linetable);
+ co->co_linetable = linetable;
co->co_zombieframe = NULL;
co->co_weakreflist = NULL;
co->co_extra = NULL;
@@ -277,12 +277,12 @@ PyCode_New(int argcount, int kwonlyargcount,
PyObject *code, PyObject *consts, PyObject *names,
PyObject *varnames, PyObject *freevars, PyObject *cellvars,
PyObject *filename, PyObject *name, int firstlineno,
- PyObject *lnotab)
+ PyObject *linetable)
{
return PyCode_NewWithPosOnlyArgs(argcount, 0, kwonlyargcount, nlocals,
stacksize, flags, code, consts, names,
varnames, freevars, cellvars, filename,
- name, firstlineno, lnotab);
+ name, firstlineno, linetable);
}
int
@@ -369,7 +369,7 @@ PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
filename_ob, /* filename */
funcname_ob, /* name */
firstlineno, /* firstlineno */
- emptystring /* lnotab */
+ emptystring /* linetable */
);
failed:
@@ -395,11 +395,89 @@ static PyMemberDef code_memberlist[] = {
{"co_cellvars", T_OBJECT, OFF(co_cellvars), READONLY},
{"co_filename", T_OBJECT, OFF(co_filename), READONLY},
{"co_name", T_OBJECT, OFF(co_name), READONLY},
- {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY},
- {"co_lnotab", T_OBJECT, OFF(co_lnotab), READONLY},
+ {"co_firstlineno", T_INT, OFF(co_firstlineno), READONLY},
+ {"co_linetable", T_OBJECT, OFF(co_linetable), READONLY},
{NULL} /* Sentinel */
};
+static int
+emit_pair(PyObject **bytes, int *offset, int a, int b)
+{
+ Py_ssize_t len = PyBytes_GET_SIZE(*bytes);
+ if (*offset + 2 >= len) {
+ if (_PyBytes_Resize(bytes, len * 2) < 0)
+ return 0;
+ }
+ unsigned char *lnotab = (unsigned char *)
+ PyBytes_AS_STRING(*bytes) + *offset;
+ *lnotab++ = a;
+ *lnotab++ = b;
+ *offset += 2;
+ return 1;
+}
+
+static int
+emit_delta(PyObject **bytes, int bdelta, int ldelta, int *offset)
+{
+ while (bdelta > 255) {
+ if (!emit_pair(bytes, offset, 255, 0)) {
+ return 0;
+ }
+ bdelta -= 255;
+ }
+ while (ldelta > 127) {
+ if (!emit_pair(bytes, offset, bdelta, 127)) {
+ return 0;
+ }
+ bdelta = 0;
+ ldelta -= 127;
+ }
+ while (ldelta < -128) {
+ if (!emit_pair(bytes, offset, bdelta, -128)) {
+ return 0;
+ }
+ bdelta = 0;
+ ldelta += 128;
+ }
+ return emit_pair(bytes, offset, bdelta, ldelta);
+}
+
+static PyObject *
+code_getlnotab(PyCodeObject *code, void *closure)
+{
+ PyCodeAddressRange bounds;
+ PyObject *bytes;
+ int table_offset = 0;
+ int code_offset = 0;
+ int line = code->co_firstlineno;
+ bytes = PyBytes_FromStringAndSize(NULL, 64);
+ if (bytes == NULL) {
+ return NULL;
+ }
+ _PyCode_InitAddressRange(code, &bounds);
+ while (PyLineTable_NextAddressRange(&bounds)) {
+ if (bounds.ar_computed_line != line) {
+ int bdelta = bounds.ar_start - code_offset;
+ int ldelta = bounds.ar_computed_line - line;
+ if (!emit_delta(&bytes, bdelta, ldelta, &table_offset)) {
+ Py_DECREF(bytes);
+ return NULL;
+ }
+ code_offset = bounds.ar_start;
+ line = bounds.ar_computed_line;
+ }
+ }
+ _PyBytes_Resize(&bytes, table_offset);
+ return bytes;
+}
+
+
+static PyGetSetDef code_getsetlist[] = {
+ {"co_lnotab", (getter)code_getlnotab, NULL, NULL},
+ {0}
+};
+
+
/* Helper for code_new: return a shallow copy of a tuple that is
guaranteed to contain exact strings, by converting string subclasses
to exact strings and complaining if a non-string is found. */
@@ -459,7 +537,7 @@ code.__new__ as code_new
filename: unicode
name: unicode
firstlineno: int
- lnotab: object(subclass_of="&PyBytes_Type")
+ linetable: object(subclass_of="&PyBytes_Type")
freevars: object(subclass_of="&PyTuple_Type", c_default="NULL") = ()
cellvars: object(subclass_of="&PyTuple_Type", c_default="NULL") = ()
/
@@ -472,9 +550,9 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount,
int kwonlyargcount, int nlocals, int stacksize, int flags,
PyObject *code, PyObject *consts, PyObject *names,
PyObject *varnames, PyObject *filename, PyObject *name,
- int firstlineno, PyObject *lnotab, PyObject *freevars,
+ int firstlineno, PyObject *linetable, PyObject *freevars,
PyObject *cellvars)
-/*[clinic end generated code: output=612aac5395830184 input=85e678ea4178f234]*/
+/*[clinic end generated code: output=42c1839b082ba293 input=0ec80da632b99f57]*/
{
PyObject *co = NULL;
PyObject *ournames = NULL;
@@ -540,7 +618,7 @@ code_new_impl(PyTypeObject *type, int argcount, int posonlyargcount,
code, consts, ournames,
ourvarnames, ourfreevars,
ourcellvars, filename,
- name, firstlineno, lnotab);
+ name, firstlineno, linetable);
cleanup:
Py_XDECREF(ournames);
Py_XDECREF(ourvarnames);
@@ -584,7 +662,7 @@ code_dealloc(PyCodeObject *co)
Py_XDECREF(co->co_cellvars);
Py_XDECREF(co->co_filename);
Py_XDECREF(co->co_name);
- Py_XDECREF(co->co_lnotab);
+ Py_XDECREF(co->co_linetable);
if (co->co_cell2arg != NULL)
PyMem_FREE(co->co_cell2arg);
if (co->co_zombieframe != NULL)
@@ -636,7 +714,7 @@ code.replace
co_cellvars: object(subclass_of="&PyTuple_Type", c_default="self->co_cellvars") = None
co_filename: unicode(c_default="self->co_filename") = None
co_name: unicode(c_default="self->co_name") = None
- co_lnotab: PyBytesObject(c_default="(PyBytesObject *)self->co_lnotab") = None
+ co_linetable: PyBytesObject(c_default="(PyBytesObject *)self->co_linetable") = None
Return a copy of the code object with new values for the specified fields.
[clinic start generated code]*/
@@ -649,8 +727,8 @@ code_replace_impl(PyCodeObject *self, int co_argcount,
PyObject *co_consts, PyObject *co_names,
PyObject *co_varnames, PyObject *co_freevars,
PyObject *co_cellvars, PyObject *co_filename,
- PyObject *co_name, PyBytesObject *co_lnotab)
-/*[clinic end generated code: output=25c8e303913bcace input=d9051bc8f24e6b28]*/
+ PyObject *co_name, PyBytesObject *co_linetable)
+/*[clinic end generated code: output=50d77e668d3b449b input=a5f997b173d7f636]*/
{
#define CHECK_INT_ARG(ARG) \
if (ARG < 0) { \
@@ -680,7 +758,7 @@ code_replace_impl(PyCodeObject *self, int co_argcount,
co_argcount, co_posonlyargcount, co_kwonlyargcount, co_nlocals,
co_stacksize, co_flags, (PyObject*)co_code, co_consts, co_names,
co_varnames, co_freevars, co_cellvars, co_filename, co_name,
- co_firstlineno, (PyObject*)co_lnotab);
+ co_firstlineno, (PyObject*)co_linetable);
}
static PyObject *
@@ -933,10 +1011,189 @@ code_hash(PyCodeObject *co)
return h;
}
+typedef struct {
+ PyObject_HEAD
+ PyCodeObject *li_code;
+ PyCodeAddressRange li_line;
+ char *li_end;
+} lineiterator;
+
+
+static void
+lineiter_dealloc(lineiterator *li)
+{
+ Py_DECREF(li->li_code);
+ Py_TYPE(li)->tp_free(li);
+}
+
+static PyObject *
+lineiter_next(lineiterator *li)
+{
+ PyCodeAddressRange *bounds = &li->li_line;
+ if (!PyLineTable_NextAddressRange(bounds)) {
+ return NULL;
+ }
+ PyObject *start = NULL;
+ PyObject *end = NULL;
+ PyObject *line = NULL;
+ PyObject *result = PyTuple_New(3);
+ start = PyLong_FromLong(bounds->ar_start);
+ end = PyLong_FromLong(bounds->ar_end);
+ if (bounds->ar_line < 0) {
+ Py_INCREF(Py_None);
+ line = Py_None;
+ }
+ else {
+ line = PyLong_FromLong(bounds->ar_line);
+ }
+ if (result == NULL || start == NULL || end == NULL || line == NULL) {
+ goto error;
+ }
+ PyTuple_SET_ITEM(result, 0, start);
+ PyTuple_SET_ITEM(result, 1, end);
+ PyTuple_SET_ITEM(result, 2, line);
+ return result;
+error:
+ Py_XDECREF(start);
+ Py_XDECREF(end);
+ Py_XDECREF(line);
+ Py_XDECREF(result);
+ return result;
+}
+
+static PyTypeObject LineIterator = {
+ PyVarObject_HEAD_INIT(&PyType_Type, 0)
+ "line_iterator", /* tp_name */
+ sizeof(lineiterator), /* tp_basicsize */
+ 0, /* tp_itemsize */
+ /* methods */
+ (destructor)lineiter_dealloc, /* tp_dealloc */
+ 0, /* tp_vectorcall_offset */
+ 0, /* tp_getattr */
+ 0, /* tp_setattr */
+ 0, /* tp_as_async */
+ 0, /* tp_repr */
+ 0, /* tp_as_number */
+ 0, /* tp_as_sequence */
+ 0, /* tp_as_mapping */
+ 0, /* tp_hash */
+ 0, /* tp_call */
+ 0, /* tp_str */
+ 0, /* tp_getattro */
+ 0, /* tp_setattro */
+ 0, /* tp_as_buffer */
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
+ 0, /* tp_doc */
+ 0, /* tp_traverse */
+ 0, /* tp_clear */
+ 0, /* tp_richcompare */
+ 0, /* tp_weaklistoffset */
+ PyObject_SelfIter, /* tp_iter */
+ (iternextfunc)lineiter_next, /* tp_iternext */
+ 0, /* tp_methods */
+ 0, /* tp_members */
+ 0, /* tp_getset */
+ 0, /* tp_base */
+ 0, /* tp_dict */
+ 0, /* tp_descr_get */
+ 0, /* tp_descr_set */
+ 0, /* tp_dictoffset */
+ 0, /* tp_init */
+ 0, /* tp_alloc */
+ 0, /* tp_new */
+ PyObject_Del, /* tp_free */
+};
+
+static PyObject *
+code_linesiterator(PyCodeObject *code, PyObject *Py_UNUSED(args))
+{
+ lineiterator *li = (lineiterator *)PyType_GenericAlloc(&LineIterator, 0);
+ if (li == NULL) {
+ return NULL;
+ }
+ Py_INCREF(code);
+ li->li_code = code;
+ _PyCode_InitAddressRange(code, &li->li_line);
+ return (PyObject *)li;
+}
+
+static void
+retreat(PyCodeAddressRange *bounds)
+{
+ int ldelta = ((signed char *)bounds->lo_next)[-1];
+ if (ldelta == -128) {
+ ldelta = 0;
+ }
+ bounds->ar_computed_line -= ldelta;
+ bounds->lo_next -= 2;
+ bounds->ar_end = bounds->ar_start;
+ bounds->ar_start -= ((unsigned char *)bounds->lo_next)[-2];
+ ldelta = ((signed char *)bounds->lo_next)[-1];
+ if (ldelta == -128) {
+ bounds->ar_line = -1;
+ }
+ else {
+ bounds->ar_line = bounds->ar_computed_line;
+ }
+}
+
+static void
+advance(PyCodeAddressRange *bounds)
+{
+ bounds->ar_start = bounds->ar_end;
+ int delta = ((unsigned char *)bounds->lo_next)[0];
+ assert (delta < 255);
+ bounds->ar_end += delta;
+ int ldelta = ((signed char *)bounds->lo_next)[1];
+ bounds->lo_next += 2;
+ if (ldelta == -128) {
+ bounds->ar_line = -1;
+ }
+ else {
+ bounds->ar_computed_line += ldelta;
+ bounds->ar_line = bounds->ar_computed_line;
+ }
+}
+
+static inline int
+at_end(PyCodeAddressRange *bounds) {
+ return ((unsigned char *)bounds->lo_next)[0] == 255;
+}
+
+int
+PyLineTable_PreviousAddressRange(PyCodeAddressRange *range)
+{
+ if (range->ar_start <= 0) {
+ return 0;
+ }
+ retreat(range);
+ while (range->ar_start == range->ar_end) {
+ assert(range->ar_start > 0);
+ retreat(range);
+ }
+ return 1;
+}
+
+int
+PyLineTable_NextAddressRange(PyCodeAddressRange *range)
+{
+ if (at_end(range)) {
+ return 0;
+ }
+ advance(range);
+ while (range->ar_start == range->ar_end) {
+ assert(!at_end(range));
+ advance(range);
+ }
+ return 1;
+}
+
+
/* XXX code objects need to participate in GC? */
static struct PyMethodDef code_methods[] = {
{"__sizeof__", (PyCFunction)code_sizeof, METH_NOARGS},
+ {"co_lines", (PyCFunction)code_linesiterator, METH_NOARGS},
CODE_REPLACE_METHODDEF
{NULL, NULL} /* sentinel */
};
@@ -971,7 +1228,7 @@ PyTypeObject PyCode_Type = {
0, /* tp_iternext */
code_methods, /* tp_methods */
code_memberlist, /* tp_members */
- 0, /* tp_getset */
+ code_getsetlist, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
@@ -982,78 +1239,55 @@ PyTypeObject PyCode_Type = {
code_new, /* tp_new */
};
-/* Use co_lnotab to compute the line number from a bytecode index, addrq. See
+/* Use co_linetable to compute the line number from a bytecode index, addrq. See
lnotab_notes.txt for the details of the lnotab representation.
*/
int
PyCode_Addr2Line(PyCodeObject *co, int addrq)
{
- Py_ssize_t size = PyBytes_Size(co->co_lnotab) / 2;
- unsigned char *p = (unsigned char*)PyBytes_AsString(co->co_lnotab);
- int line = co->co_firstlineno;
- int addr = 0;
- while (--size >= 0) {
- addr += *p++;
- if (addr > addrq)
- break;
- line += (signed char)*p;
- p++;
+ if (addrq == -1) {
+ return co->co_firstlineno;
}
- return line;
+ assert(addrq >= 0 && addrq < PyBytes_GET_SIZE(co->co_code));
+ PyCodeAddressRange bounds;
+ _PyCode_InitAddressRange(co, &bounds);
+ return _PyCode_CheckLineNumber(addrq, &bounds);
+}
+
+void
+PyLineTable_InitAddressRange(char *linetable, int firstlineno, PyCodeAddressRange *range)
+{
+ range->lo_next = linetable;
+ range->ar_start = -1;
+ range->ar_end = 0;
+ range->ar_computed_line = range->ar_line = firstlineno;
+}
+
+int
+_PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds)
+{
+ char *linetable = PyBytes_AS_STRING(co->co_linetable);
+ PyLineTable_InitAddressRange(linetable, co->co_firstlineno, bounds);
+ return bounds->ar_line;
}
/* Update *bounds to describe the first and one-past-the-last instructions in
- the same line as lasti. Return the number of that line. */
+ the same line as lasti. Return the number of that line, or -1 if lasti is out of bounds. */
int
-_PyCode_CheckLineNumber(PyCodeObject* co, int lasti, PyAddrPair *bounds)
+_PyCode_CheckLineNumber(int lasti, PyCodeAddressRange *bounds)
{
- Py_ssize_t size;
- int addr, line;
- unsigned char* p;
-
- p = (unsigned char*)PyBytes_AS_STRING(co->co_lnotab);
- size = PyBytes_GET_SIZE(co->co_lnotab) / 2;
-
- addr = 0;
- line = co->co_firstlineno;
- assert(line > 0);
-
- /* possible optimization: if f->f_lasti == instr_ub
- (likely to be a common case) then we already know
- instr_lb -- if we stored the matching value of p
- somewhere we could skip the first while loop. */
-
- /* See lnotab_notes.txt for the description of
- co_lnotab. A point to remember: increments to p
- come in (addr, line) pairs. */
-
- bounds->ap_lower = 0;
- while (size > 0) {
- if (addr + *p > lasti)
- break;
- addr += *p++;
- if ((signed char)*p)
- bounds->ap_lower = addr;
- line += (signed char)*p;
- p++;
- --size;
- }
-
- if (size > 0) {
- while (--size >= 0) {
- addr += *p++;
- if ((signed char)*p)
- break;
- p++;
+ while (bounds->ar_end <= lasti) {
+ if (!PyLineTable_NextAddressRange(bounds)) {
+ return -1;
}
- bounds->ap_upper = addr;
}
- else {
- bounds->ap_upper = INT_MAX;
+ while (bounds->ar_start > lasti) {
+ if (!PyLineTable_PreviousAddressRange(bounds)) {
+ return -1;
+ }
}
-
- return line;
+ return bounds->ar_line;
}
diff --git a/Objects/frameobject.c b/Objects/frameobject.c
index 8838b80..787cd8b 100644
--- a/Objects/frameobject.c
+++ b/Objects/frameobject.c
@@ -249,36 +249,22 @@ explain_incompatible_block_stack(int64_t to_stack)
static int *
marklines(PyCodeObject *code, int len)
{
+ PyCodeAddressRange bounds;
+ _PyCode_InitAddressRange(code, &bounds);
+ assert (bounds.ar_end == 0);
+
int *linestarts = PyMem_New(int, len);
if (linestarts == NULL) {
return NULL;
}
- Py_ssize_t size = PyBytes_GET_SIZE(code->co_lnotab) / 2;
- unsigned char *p = (unsigned char*)PyBytes_AS_STRING(code->co_lnotab);
- int line = code->co_firstlineno;
- int addr = 0;
- int index = 0;
- while (--size >= 0) {
- addr += *p++;
- if (index*2 < addr) {
- linestarts[index++] = line;
- }
- while (index*2 < addr) {
- linestarts[index++] = -1;
- if (index >= len) {
- break;
- }
- }
- line += (signed char)*p;
- p++;
+ for (int i = 0; i < len; i++) {
+ linestarts[i] = -1;
}
- if (index < len) {
- linestarts[index++] = line;
+
+ while (PyLineTable_NextAddressRange(&bounds)) {
+ assert(bounds.ar_start/2 < len);
+ linestarts[bounds.ar_start/2] = bounds.ar_line;
}
- while (index < len) {
- linestarts[index++] = -1;
- }
- assert(index == len);
return linestarts;
}
@@ -925,7 +911,7 @@ _PyFrame_New_NoTrack(PyThreadState *tstate, PyCodeObject *code,
}
f->f_lasti = -1;
- f->f_lineno = code->co_firstlineno;
+ f->f_lineno = 0;
f->f_iblock = 0;
f->f_state = FRAME_CREATED;
f->f_gen = NULL;
diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt
index 71a2979..046f753 100644
--- a/Objects/lnotab_notes.txt
+++ b/Objects/lnotab_notes.txt
@@ -1,11 +1,103 @@
-All about co_lnotab, the line number table.
+Description of the internal format of the line number table
-Code objects store a field named co_lnotab. This is an array of unsigned bytes
-disguised as a Python bytes object. It is used to map bytecode offsets to
-source code line #s for tracebacks and to identify line number boundaries for
-line tracing. Because of internals of the peephole optimizer, it's possible
-for lnotab to contain bytecode offsets that are no longer valid (for example
-if the optimizer removed the last line in a function).
+Conceptually, the line number table consists of a sequence of triples:
+ start-offset (inclusive), end-offset (exclusive), line-number.
+
+Note that note all byte codes have a line number so we need handle `None` for the line-number.
+
+However, storing the above sequence directly would be very inefficient as we would need 12 bytes per entry.
+
+First of all, we can note that the end of one entry is the same as the start of the next, so we can overlap entries.
+Secondly we also note that we don't really need arbitrary access to the sequence, so we can store deltas.
+
+We just need to store (end - start, line delta) pairs. The start offset of the first entry is always zero.
+
+Thirdly, most deltas are small, so we can use a single byte for each value, as long we allow several entries for the same line.
+
+Consider the following table
+ Start End Line
+ 0 6 1
+ 6 50 2
+ 50 350 7
+ 350 360 No line number
+ 360 376 8
+ 376 380 208
+
+Stripping the redundant ends gives:
+
+ End-Start Line-delta
+ 6 +1
+ 44 +1
+ 300 +5
+ 10 No line number
+ 16 +1
+ 4 +200
+
+
+Note that the end - start value is always positive.
+
+Finally in order, to fit into a single byte we need to convert start deltas to the range 0 <= delta <= 254,
+and line deltas to the range -127 <= delta <= 127.
+A line delta of -128 is used to indicate no line number.
+A start delta of 255 is used as a sentinel to mark the end of the table.
+Also note that a delta of zero indicates that there are no bytecodes in the given range,
+which means can use an invalidate line number for that range.
+
+Final form:
+
+ Start delta Line delta
+ 6 +1
+ 44 +1
+ 254 +5
+ 46 0
+ 10 -128 (No line number, treated as a delta of zero)
+ 16 +1
+ 0 +127 (line 135, but the range is empty as no bytecodes are at line 135)
+ 4 +73
+ 255 (end mark) ---
+
+Iterating over the table.
+-------------------------
+
+For the `co_lines` attribute we want to emit the full form, omitting the (350, 360, No line number) and empty entries.
+
+The code is as follows:
+
+def co_lines(code):
+ line = code.co_firstlineno
+ end = 0
+ table_iter = iter(code.internal_line_table):
+ for sdelta, ldelta in table_iter:
+ if sdelta == 255:
+ break
+ if ldelta == 0: # No change to line number, just accumulate changes to end
+ end += odelta
+ continue
+ start = end
+ end = start + sdelta
+ if ldelta == -128: # No valid line number -- skip entry
+ continue
+ line += ldelta
+ if end == start: # Empty range, omit.
+ continue
+ yield start, end, line
+
+
+
+
+The historical co_lnotab format
+-------------------------------
+
+prior to 3.10 code objects stored a field named co_lnotab.
+This was an array of unsigned bytes disguised as a Python bytes object.
+
+The old co_lnotab did not account for the presence of bytecodes without a line number,
+nor was it well suited to tracing as a number of workarounds were required.
+
+The old format can still be accessed via `code.co_lnotab`, which is lazily computed from the new format.
+
+Below is the description of the old co_lnotab format:
+
The array is conceptually a compressed list of
(bytecode offset increment, line number increment)