blob: e862a99cfb34cada436949e38818ccba9293c034 [file] [log] [blame]
Devin Jeanpierrec5bace22017-09-06 11:15:35 -07001/* A fuzz test for CPython.
2
3 The only exposed function is LLVMFuzzerTestOneInput, which is called by
4 fuzzers and by the _fuzz module for smoke tests.
5
6 To build exactly one fuzz test, as when running in oss-fuzz etc.,
7 build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
8 LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
9 -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
10
11 See the source code for LLVMFuzzerTestOneInput for details. */
12
13#include <Python.h>
14#include <stdlib.h>
15#include <inttypes.h>
16
17/* Fuzz PyFloat_FromString as a proxy for float(str). */
18static int fuzz_builtin_float(const char* data, size_t size) {
19 PyObject* s = PyBytes_FromStringAndSize(data, size);
20 if (s == NULL) return 0;
21 PyObject* f = PyFloat_FromString(s);
22 if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
23 PyErr_Clear();
24 }
25
26 Py_XDECREF(f);
27 Py_DECREF(s);
28 return 0;
29}
30
Ammar Askara6e190e2019-06-11 21:30:35 -070031#define MAX_INT_TEST_SIZE 0x10000
32
Devin Jeanpierrec5bace22017-09-06 11:15:35 -070033/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
34static int fuzz_builtin_int(const char* data, size_t size) {
Ammar Askara6e190e2019-06-11 21:30:35 -070035 /* Ignore test cases with very long ints to avoid timeouts
36 int("9" * 1000000) is not a very interesting test caase */
37 if (size > MAX_INT_TEST_SIZE) {
38 return 0;
39 }
Devin Jeanpierrec5bace22017-09-06 11:15:35 -070040 /* Pick a random valid base. (When the fuzzed function takes extra
41 parameters, it's somewhat normal to hash the input to generate those
42 parameters. We want to exercise all code paths, so we do so here.) */
43 int base = _Py_HashBytes(data, size) % 37;
44 if (base == 1) {
45 // 1 is the only number between 0 and 36 that is not a valid base.
46 base = 0;
47 }
48 if (base == -1) {
49 return 0; // An error occurred, bail early.
50 }
51 if (base < 0) {
52 base = -base;
53 }
54
55 PyObject* s = PyUnicode_FromStringAndSize(data, size);
56 if (s == NULL) {
57 if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
58 PyErr_Clear();
59 }
60 return 0;
61 }
62 PyObject* l = PyLong_FromUnicodeObject(s, base);
63 if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
64 PyErr_Clear();
65 }
66 PyErr_Clear();
67 Py_XDECREF(l);
68 Py_DECREF(s);
69 return 0;
70}
71
72/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
73static int fuzz_builtin_unicode(const char* data, size_t size) {
74 PyObject* s = PyUnicode_FromStringAndSize(data, size);
75 if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
76 PyErr_Clear();
77 }
78 Py_XDECREF(s);
79 return 0;
80}
81
Ammar Askara6e190e2019-06-11 21:30:35 -070082#define MAX_JSON_TEST_SIZE 0x10000
83
84/* Initialized in LLVMFuzzerTestOneInput */
85PyObject* json_loads_method = NULL;
86/* Fuzz json.loads(x) */
87static int fuzz_json_loads(const char* data, size_t size) {
88 /* Since python supports arbitrarily large ints in JSON,
89 long inputs can lead to timeouts on boring inputs like
90 `json.loads("9" * 100000)` */
91 if (size > MAX_JSON_TEST_SIZE) {
92 return 0;
93 }
94 PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
95 if (input_bytes == NULL) {
96 return 0;
97 }
98 PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL);
99 /* Ignore ValueError as the fuzzer will more than likely
100 generate some invalid json and values */
101 if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
102 PyErr_Clear();
103 }
104 /* Ignore RecursionError as the fuzzer generates long sequences of
105 arrays such as `[[[...` */
106 if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) {
107 PyErr_Clear();
108 }
109 /* Ignore unicode errors, invalid byte sequences are common */
110 if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
111 PyErr_Clear();
112 }
113 Py_DECREF(input_bytes);
114 Py_XDECREF(parsed);
115 return 0;
116}
117
Devin Jeanpierrec5bace22017-09-06 11:15:35 -0700118/* Run fuzzer and abort on failure. */
119static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
120 int rv = fuzzer((const char*) data, size);
121 if (PyErr_Occurred()) {
122 /* Fuzz tests should handle expected errors for themselves.
123 This is last-ditch check in case they didn't. */
124 PyErr_Print();
125 abort();
126 }
127 /* Someday the return value might mean something, propagate it. */
128 return rv;
129}
130
131/* CPython generates a lot of leak warnings for whatever reason. */
132int __lsan_is_turned_off(void) { return 1; }
133
Ammar Askara15a7bc2019-06-08 07:43:16 -0700134
135int LLVMFuzzerInitialize(int *argc, char ***argv) {
136 wchar_t* wide_program_name = Py_DecodeLocale(*argv[0], NULL);
137 Py_SetProgramName(wide_program_name);
138 return 0;
139}
140
Devin Jeanpierrec5bace22017-09-06 11:15:35 -0700141/* Fuzz test interface.
142 This returns the bitwise or of all fuzz test's return values.
143
144 All fuzz tests must return 0, as all nonzero return codes are reserved for
145 future use -- we propagate the return values for that future case.
146 (And we bitwise or when running multiple tests to verify that normally we
147 only return 0.) */
148int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
149 if (!Py_IsInitialized()) {
150 /* LLVMFuzzerTestOneInput is called repeatedly from the same process,
151 with no separate initialization phase, sadly, so we need to
152 initialize CPython ourselves on the first run. */
153 Py_InitializeEx(0);
154 }
Ammar Askara6e190e2019-06-11 21:30:35 -0700155#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
156 if (json_loads_method == NULL) {
157 PyObject* json_module = PyImport_ImportModule("json");
158 json_loads_method = PyObject_GetAttrString(json_module, "loads");
159 }
160#endif
Devin Jeanpierrec5bace22017-09-06 11:15:35 -0700161
162 int rv = 0;
163
Devin Jeanpierre78ebc732017-09-06 18:00:47 -0700164#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_float)
Devin Jeanpierrec5bace22017-09-06 11:15:35 -0700165 rv |= _run_fuzz(data, size, fuzz_builtin_float);
166#endif
Devin Jeanpierre78ebc732017-09-06 18:00:47 -0700167#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_int)
Devin Jeanpierrec5bace22017-09-06 11:15:35 -0700168 rv |= _run_fuzz(data, size, fuzz_builtin_int);
169#endif
Devin Jeanpierre78ebc732017-09-06 18:00:47 -0700170#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
Devin Jeanpierrec5bace22017-09-06 11:15:35 -0700171 rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
172#endif
Ammar Askara6e190e2019-06-11 21:30:35 -0700173#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
174 rv |= _run_fuzz(data, size, fuzz_json_loads);
175#endif
Devin Jeanpierrec5bace22017-09-06 11:15:35 -0700176 return rv;
177}