Issue #7451: Improve decoding performance of JSON objects, and reduce
the memory consumption of said decoded objects when they use the same
strings as keys.
diff --git a/Modules/_json.c b/Modules/_json.c
index 2e39525..bbaf7dd 100644
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -36,6 +36,7 @@
     PyObject *parse_float;
     PyObject *parse_int;
     PyObject *parse_constant;
+    PyObject *memo;
 } PyScannerObject;
 
 static PyMemberDef scanner_members[] = {
@@ -305,6 +306,21 @@
     return tpl;
 }
 
+#define APPEND_OLD_CHUNK \
+    if (chunk != NULL) { \
+        if (chunks == NULL) { \
+            chunks = PyList_New(0); \
+            if (chunks == NULL) { \
+                goto bail; \
+            } \
+        } \
+        if (PyList_Append(chunks, chunk)) { \
+            Py_DECREF(chunk); \
+            goto bail; \
+        } \
+        Py_CLEAR(chunk); \
+    }
+
 static PyObject *
 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
 {
@@ -316,15 +332,14 @@
 
     Return value is a new PyUnicode
     */
-    PyObject *rval;
+    PyObject *rval = NULL;
     Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
     Py_ssize_t begin = end - 1;
     Py_ssize_t next = begin;
     const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
-    PyObject *chunks = PyList_New(0);
-    if (chunks == NULL) {
-        goto bail;
-    }
+    PyObject *chunks = NULL;
+    PyObject *chunk = NULL;
+
     if (end < 0 || len <= end) {
         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
         goto bail;
@@ -332,7 +347,6 @@
     while (1) {
         /* Find the end of the string or the next escape */
         Py_UNICODE c = 0;
-        PyObject *chunk = NULL;
         for (next = end; next < len; next++) {
             c = buf[next];
             if (c == '"' || c == '\\') {
@@ -349,15 +363,11 @@
         }
         /* Pick up this chunk if it's not zero length */
         if (next != end) {
+            APPEND_OLD_CHUNK
             chunk = PyUnicode_FromUnicode(&buf[end], next - end);
             if (chunk == NULL) {
                 goto bail;
             }
-            if (PyList_Append(chunks, chunk)) {
-                Py_DECREF(chunk);
-                goto bail;
-            }
-            Py_DECREF(chunk);
         }
         next++;
         if (c == '"') {
@@ -459,27 +469,34 @@
             }
 #endif
         }
+        APPEND_OLD_CHUNK
         chunk = PyUnicode_FromUnicode(&c, 1);
         if (chunk == NULL) {
             goto bail;
         }
-        if (PyList_Append(chunks, chunk)) {
-            Py_DECREF(chunk);
-            goto bail;
-        }
-        Py_DECREF(chunk);
     }
 
-    rval = join_list_unicode(chunks);
-    if (rval == NULL) {
-        goto bail;
+    if (chunks == NULL) {
+        if (chunk != NULL)
+            rval = chunk;
+        else
+            rval = PyUnicode_FromStringAndSize("", 0);
     }
-    Py_DECREF(chunks);
+    else {
+        APPEND_OLD_CHUNK
+        rval = join_list_unicode(chunks);
+        if (rval == NULL) {
+            goto bail;
+        }
+        Py_CLEAR(chunks);
+    }
+
     *next_end_ptr = end;
     return rval;
 bail:
     *next_end_ptr = -1;
     Py_XDECREF(chunks);
+    Py_XDECREF(chunk);
     return NULL;
 }
 
@@ -578,6 +595,7 @@
     Py_CLEAR(s->parse_float);
     Py_CLEAR(s->parse_int);
     Py_CLEAR(s->parse_constant);
+    Py_CLEAR(s->memo);
     return 0;
 }
 
@@ -593,10 +611,16 @@
     Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
     Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
     PyObject *val = NULL;
-    PyObject *rval = PyList_New(0);
+    PyObject *rval = NULL;
     PyObject *key = NULL;
     int strict = PyObject_IsTrue(s->strict);
+    int has_pairs_hook = (s->object_pairs_hook != Py_None);
     Py_ssize_t next_idx;
+
+    if (has_pairs_hook)
+        rval = PyList_New(0);
+    else
+        rval = PyDict_New();
     if (rval == NULL)
         return NULL;
 
@@ -606,6 +630,8 @@
     /* only loop if the object is non-empty */
     if (idx <= end_idx && str[idx] != '}') {
         while (idx <= end_idx) {
+            PyObject *memokey;
+
             /* read key */
             if (str[idx] != '"') {
                 raise_errmsg("Expecting property name", pystr, idx);
@@ -614,6 +640,16 @@
             key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
             if (key == NULL)
                 goto bail;
+            memokey = PyDict_GetItem(s->memo, key);
+            if (memokey != NULL) {
+                Py_INCREF(memokey);
+                Py_DECREF(key);
+                key = memokey;
+            }
+            else {
+                if (PyDict_SetItem(s->memo, key, key) < 0)
+                    goto bail;
+            }
             idx = next_idx;
 
             /* skip whitespace between key and : delimiter, read :, skip whitespace */
@@ -630,19 +666,24 @@
             if (val == NULL)
                 goto bail;
 
-            {
-                PyObject *tuple = PyTuple_Pack(2, key, val);
-                if (tuple == NULL)
+            if (has_pairs_hook) {
+                PyObject *item = PyTuple_Pack(2, key, val);
+                if (item == NULL)
                     goto bail;
-                if (PyList_Append(rval, tuple) == -1) {
-                    Py_DECREF(tuple);
+                Py_CLEAR(key);
+                Py_CLEAR(val);
+                if (PyList_Append(rval, item) == -1) {
+                    Py_DECREF(item);
                     goto bail;
                 }
-                Py_DECREF(tuple);
+                Py_DECREF(item);
             }
-
-            Py_CLEAR(key);
-            Py_CLEAR(val);
+            else {
+                if (PyDict_SetItem(rval, key, val) < 0)
+                    goto bail;
+                Py_CLEAR(key);
+                Py_CLEAR(val);
+            }
             idx = next_idx;
 
             /* skip whitespace before } or , */
@@ -672,36 +713,23 @@
 
     *next_idx_ptr = idx + 1;
 
-    if (s->object_pairs_hook != Py_None) {
+    if (has_pairs_hook) {
         val = PyObject_CallFunctionObjArgs(s->object_pairs_hook, rval, NULL);
-        if (val == NULL)
-            goto bail;
         Py_DECREF(rval);
         return val;
     }
 
-    val = PyDict_New();
-    if (val == NULL)
-        goto bail;
-    if (PyDict_MergeFromSeq2(val, rval, 1) == -1)
-        goto bail;
-    Py_DECREF(rval);
-    rval = val;
-
     /* if object_hook is not None: rval = object_hook(rval) */
     if (s->object_hook != Py_None) {
         val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
-        if (val == NULL)
-            goto bail;
         Py_DECREF(rval);
-        rval = val;
-        val = NULL;
+        return val;
     }
     return rval;
 bail:
     Py_XDECREF(key);
     Py_XDECREF(val);
-    Py_DECREF(rval);
+    Py_XDECREF(rval);
     return NULL;
 }
 
@@ -988,6 +1016,9 @@
                  Py_TYPE(pystr)->tp_name);
         return NULL;
     }
+    PyDict_Clear(s->memo);
+    if (rval == NULL)
+        return NULL;
     return _build_rval_index_tuple(rval, next_idx);
 }
 
@@ -1021,6 +1052,12 @@
     if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
         return -1;
 
+    if (s->memo == NULL) {
+        s->memo = PyDict_New();
+        if (s->memo == NULL)
+            goto bail;
+    }
+
     /* All of these will fail "gracefully" so we don't need to verify them */
     s->strict = PyObject_GetAttrString(ctx, "strict");
     if (s->strict == NULL)