bpo-29505: Fuzz json module, enforce size limit on int(x) fuzz (GH-13991)

* bpo-29505: Enable fuzz testing of the json module, enforce size limit on int(x) fuzz and json input size to avoid timeouts.

Contributed by by Ammar Askar for Google.
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index 54f816e..e862a99 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -28,8 +28,15 @@
     return 0;
 }
 
+#define MAX_INT_TEST_SIZE 0x10000
+
 /* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
 static int fuzz_builtin_int(const char* data, size_t size) {
+    /* Ignore test cases with very long ints to avoid timeouts
+       int("9" * 1000000) is not a very interesting test caase */
+    if (size > MAX_INT_TEST_SIZE) {
+        return 0;
+    }
     /* Pick a random valid base. (When the fuzzed function takes extra
        parameters, it's somewhat normal to hash the input to generate those
        parameters. We want to exercise all code paths, so we do so here.) */
@@ -72,6 +79,42 @@
     return 0;
 }
 
+#define MAX_JSON_TEST_SIZE 0x10000
+
+/* Initialized in LLVMFuzzerTestOneInput */
+PyObject* json_loads_method = NULL;
+/* Fuzz json.loads(x) */
+static int fuzz_json_loads(const char* data, size_t size) {
+    /* Since python supports arbitrarily large ints in JSON,
+       long inputs can lead to timeouts on boring inputs like
+       `json.loads("9" * 100000)` */
+    if (size > MAX_JSON_TEST_SIZE) {
+        return 0;
+    }
+    PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
+    if (input_bytes == NULL) {
+        return 0;
+    }
+    PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL);
+    /* Ignore ValueError as the fuzzer will more than likely
+       generate some invalid json and values */
+    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+    /* Ignore RecursionError as the fuzzer generates long sequences of
+       arrays such as `[[[...` */
+    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) {
+        PyErr_Clear();
+    }
+    /* Ignore unicode errors, invalid byte sequences are common */
+    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+        PyErr_Clear();
+    }
+    Py_DECREF(input_bytes);
+    Py_XDECREF(parsed);
+    return 0;
+}
+
 /* Run fuzzer and abort on failure. */
 static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
     int rv = fuzzer((const char*) data, size);
@@ -88,7 +131,6 @@
 /* CPython generates a lot of leak warnings for whatever reason. */
 int __lsan_is_turned_off(void) { return 1; }
 
-wchar_t wide_program_name[NAME_MAX];
 
 int LLVMFuzzerInitialize(int *argc, char ***argv) {
     wchar_t* wide_program_name = Py_DecodeLocale(*argv[0], NULL);
@@ -110,6 +152,12 @@
            initialize CPython ourselves on the first run. */
         Py_InitializeEx(0);
     }
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
+    if (json_loads_method == NULL) {
+        PyObject* json_module = PyImport_ImportModule("json");
+        json_loads_method = PyObject_GetAttrString(json_module, "loads");
+    }
+#endif
 
     int rv = 0;
 
@@ -122,5 +170,8 @@
 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
     rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
 #endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
+    rv |= _run_fuzz(data, size, fuzz_json_loads);
+#endif
   return rv;
 }