bpo-26219: per opcode cache for LOAD_GLOBAL (GH-12884)
This patch implements per opcode cache mechanism, and use it in
only LOAD_GLOBAL opcode.
Based on Yury's opcache3.patch in bpo-26219.
diff --git a/Objects/codeobject.c b/Objects/codeobject.c
index 2333075..0d9e5d1 100644
--- a/Objects/codeobject.c
+++ b/Objects/codeobject.c
@@ -2,7 +2,9 @@
#include "Python.h"
#include "code.h"
+#include "opcode.h"
#include "structmember.h"
+#include "pycore_code.h"
#include "pycore_pystate.h"
#include "pycore_tupleobject.h"
#include "clinic/codeobject.c.h"
@@ -233,9 +235,56 @@
co->co_zombieframe = NULL;
co->co_weakreflist = NULL;
co->co_extra = NULL;
+
+ co->co_opcache_map = NULL;
+ co->co_opcache = NULL;
+ co->co_opcache_flag = 0;
+ co->co_opcache_size = 0;
return co;
}
+int
+_PyCode_InitOpcache(PyCodeObject *co)
+{
+ Py_ssize_t co_size = PyBytes_Size(co->co_code) / sizeof(_Py_CODEUNIT);
+ co->co_opcache_map = (unsigned char *)PyMem_Calloc(co_size, 1);
+ if (co->co_opcache_map == NULL) {
+ return -1;
+ }
+
+ _Py_CODEUNIT *opcodes = (_Py_CODEUNIT*)PyBytes_AS_STRING(co->co_code);
+ Py_ssize_t opts = 0;
+
+ for (Py_ssize_t i = 0; i < co_size;) {
+ unsigned char opcode = _Py_OPCODE(opcodes[i]);
+ i++; // 'i' is now aligned to (next_instr - first_instr)
+
+ // TODO: LOAD_METHOD, LOAD_ATTR
+ if (opcode == LOAD_GLOBAL) {
+ co->co_opcache_map[i] = ++opts;
+ if (opts > 254) {
+ break;
+ }
+ }
+ }
+
+ if (opts) {
+ co->co_opcache = (_PyOpcache *)PyMem_Calloc(opts, sizeof(_PyOpcache));
+ if (co->co_opcache == NULL) {
+ PyMem_FREE(co->co_opcache_map);
+ return -1;
+ }
+ }
+ else {
+ PyMem_FREE(co->co_opcache_map);
+ co->co_opcache_map = NULL;
+ co->co_opcache = NULL;
+ }
+
+ co->co_opcache_size = opts;
+ return 0;
+}
+
PyCodeObject *
PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno)
{
@@ -458,6 +507,15 @@
static void
code_dealloc(PyCodeObject *co)
{
+ if (co->co_opcache != NULL) {
+ PyMem_FREE(co->co_opcache);
+ }
+ if (co->co_opcache_map != NULL) {
+ PyMem_FREE(co->co_opcache_map);
+ }
+ co->co_opcache_flag = 0;
+ co->co_opcache_size = 0;
+
if (co->co_extra != NULL) {
PyInterpreterState *interp = _PyInterpreterState_GET_UNSAFE();
_PyCodeObjectExtra *co_extra = co->co_extra;
@@ -504,6 +562,13 @@
res += sizeof(_PyCodeObjectExtra) +
(co_extra->ce_size-1) * sizeof(co_extra->ce_extras[0]);
}
+ if (co->co_opcache != NULL) {
+ assert(co->co_opcache_map != NULL);
+ // co_opcache_map
+ res += PyBytes_GET_SIZE(co->co_code) / sizeof(_Py_CODEUNIT);
+ // co_opcache
+ res += co->co_opcache_size * sizeof(_PyOpcache);
+ }
return PyLong_FromSsize_t(res);
}
diff --git a/Objects/dictobject.c b/Objects/dictobject.c
index 2b04b0b..0cc1443 100644
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@@ -1080,20 +1080,21 @@
return 0;
}
- if (_PyDict_HasSplitTable(mp)) {
- mp->ma_values[ix] = value;
- if (old_value == NULL) {
- /* pending state */
- assert(ix == mp->ma_used);
- mp->ma_used++;
+ if (old_value != value) {
+ if (_PyDict_HasSplitTable(mp)) {
+ mp->ma_values[ix] = value;
+ if (old_value == NULL) {
+ /* pending state */
+ assert(ix == mp->ma_used);
+ mp->ma_used++;
+ }
}
+ else {
+ assert(old_value != NULL);
+ DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
+ }
+ mp->ma_version_tag = DICT_NEXT_VERSION();
}
- else {
- assert(old_value != NULL);
- DK_ENTRIES(mp->ma_keys)[ix].me_value = value;
- }
-
- mp->ma_version_tag = DICT_NEXT_VERSION();
Py_XDECREF(old_value); /* which **CAN** re-enter (see issue #22653) */
ASSERT_CONSISTENT(mp);
Py_DECREF(key);