8000 bpo-29505: Add fuzz tests for float(str), int(str), unicode(str) (#2878) · python/cpython@c5bace2 · GitHub
[go: up one dir, main page]

Skip to content

Commit c5bace2

Browse files
ssbrgpshead
authored andcommitted
bpo-29505: Add fuzz tests for float(str), int(str), unicode(str) (#2878)
Add basic fuzz tests for a few common builtin functions. This is an easy place to start, and these functions are probably safe. We'll want to add more fuzz tests later. Lets bootstrap using these. While the fuzz tests are included in CPython and compiled / tested on a very basic level inside CPython itself, the actual fuzzing happens as part of oss-fuzz (https://github.com/google/oss-fuzz). The reason to include the tests in CPython is to make sure that they're maintained as part of the CPython project, especially when (as some eventually will) they use internal implementation details in the test. (This will be necessary sometimes because e.g. the fuzz test should never enter Python's interpreter loop, whereas some APIs only expose themselves publicly as Python functions.) This particular set of changes is part of testing Python's builtins, tracked internally at Google by b/37562550. The _xxtestfuzz module that this change adds need not be shipped with binary distributions of Python.
1 parent 5fcd5e6 commit c5bace2

File tree

7 files changed

+252
-0
lines changed

7 files changed

+252
-0
lines changed

Lib/test/test_xxtestfuzz.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import faulthandler
2+
import test.support
3+
import unittest
4+
5+
_xxtestfuzz = test.support.import_module('_xxtestfuzz')
6+
7+
8+
class TestFuzzer(unittest.TestCase):
9+
"""To keep our https://github.com/google/oss-fuzz API working."""
10+
11+
def test_sample_input_smoke_test(self):
12+
"""This is only a regression test: Check that it doesn't crash."""
13+
_xxtestfuzz.run(b"")
14+
_xxtestfuzz.run(b"\0")
15+
_xxtestfuzz.run(b"{")
16+
_xxtestfuzz.run(b" ")
17+
_xxtestfuzz.run(b"x")
18+
_xxtestfuzz.run(b"1")
19+
20+
21+
if __name__ == "__main__":
22+
faulthandler.enable()
23+
unittest.main()
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add fuzz tests for float(str), int(str), unicode(str); for oss-fuzz.

Modules/_xxtestfuzz/README.rst

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
Fuzz Tests for CPython
2+
======================
3+
4+
These fuzz tests are designed to be included in Google's `oss-fuzz`_ project.
5+
6+
oss-fuzz works against a library exposing a function of the form
7+
``int LLVMFuzzerTestOneInput(const uint8_t* data, size_t length)``. We provide
8+
that library (``fuzzer.c``), and include a ``_fuzz`` module for testing with
9+
some toy values -- no fuzzing occurs in Python's test suite.
10+
11+
oss-fuzz will regularly pull from CPython, discover all the tests in
12+
``fuzz_tests.txt``, and run them -- so adding a new test here means it will
13+
automatically be run in oss-fuzz, while also being smoke-tested as part of
14+
CPython's test suite.
15+
16+
Adding a new fuzz test
17+
----------------------
18+
19+
Add the test name on a new line in ``fuzz_tests.txt``.
20+
21+
In ``fuzzer.c``, add a function to be run::
22+
23+
int $test_name (const char* data, size_t size) {
24+
...
25+
return 0;
26+
}
27+
28+
29+
And invoke it from ``LLVMFuzzerTestOneInput``::
30+
31+
#if _Py_FUZZ_YES(fuzz_builtin_float)
32+
rv |= _run_fuzz(data, size, fuzz_builtin_float);
33+
#endif
34+
35+
``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
36+
``fuzz_tests.txt`` run separately.
37+
38+
What makes a good fuzz test
39+
---------------------------
40+
41+
Libraries written in C that might handle untrusted data are worthwhile. The
42+
more complex the logic (e.g. parsing), the more likely this is to be a useful
43+
fuzz test. See the existing examples for reference, and refer to the
44+
`oss-fuzz`_ docs.
45+
46+
.. _oss-fuzz: https://github.com/google/oss-fuzz

Modules/_xxtestfuzz/_xxtestfuzz.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
#define PY_SSIZE_T_CLEAN
2+
#include <Python.h>
3+
#include <stdlib.h>
4+
#include <inttypes.h>
5+
6+
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
7+
8+
static PyObject* _fuzz_run(PyObject* self, PyObject* args) {
9+
const char* buf;
< 1E79 /code>
10+
Py_ssize_t size;
11+
if (!PyArg_ParseTuple(args, "s#", &buf, &size)) {
12+
return NULL;
13+
}
14+
int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size);
15+
if (PyErr_Occurred()) {
16+
return NULL;
17+
}
18+
if (rv != 0) {
19+
// Nonzero return codes are reserved for future use.
20+
PyErr_Format(
21+
PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv);
22+
return NULL;
23+
}
24+
Py_RETURN_NONE;
25+
}
26+
27+
static PyMethodDef module_methods[] = {
28+
{"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""},
29+
{NULL},
30+
};
31+
32+
static struct PyModuleDef _fuzzmodule = {
33+
PyModuleDef_HEAD_INIT,
34+
"_fuzz",
35+
NULL,
36+
0,
37+
module_methods,
38+
NULL,
39+
NULL,
40+
NULL,
41+
NULL
42+
};
43+
44+
PyMODINIT_FUNC
45+
PyInit__xxtestfuzz(void)
46+
{
47+
PyObject *m = NULL;
48+
49+
if ((m = PyModule_Create(&_fuzzmodule)) == NULL) {
50+
return NULL;
51+
}
52+
return m;
53+
}

Modules/_xxtestfuzz/fuzz_tests.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
fuzz_builtin_float
2+
fuzz_builtin_int
3+
fuzz_builtin_unicode

Modules/_xxtestfuzz/fuzzer.c

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/* A fuzz test for CPython.
2+
3+
The only exposed function is LLVMFuzzerTestOneInput, which is called by
4+
fuzzers and by the _fuzz module for smoke tests.
5+
6+
To build exactly one fuzz test, as when running in oss-fuzz etc.,
7+
build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
8+
LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
9+
-D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
10+
11+
See the source code for LLVMFuzzerTestOneInput for details. */
12+
13+
#include <Python.h>
14+
#include <stdlib.h>
15+
#include <inttypes.h>
16+
17+
/* Fuzz PyFloat_FromString as a proxy for float(str). */
18+
static int fuzz_builtin_float(const char* data, size_t size) {
19+
PyObject* s = PyBytes_FromStringAndSize(data, size);
20+
if (s == NULL) return 0;
21+
PyObject* f = PyFloat_FromString(s);
22+
if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
23+
PyErr_Clear();
24+
}
25+
26+
Py_XDECREF(f);
27+
Py_DECREF(s);
28+
return 0;
29+
}
30+
31+
/* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
32+
static int fuzz_builtin_int(const char* data, size_t size) {
33+
/* Pick a random valid base. (When the fuzzed function takes extra
34+
parameters, it's somewhat normal to hash the input to generate those
35+
parameters. We want to exercise all code paths, so we do so here.) */
36+
int base = _Py_HashBytes(data, size) % 37;
37+
if (base == 1) {
38+
// 1 is the only number between 0 and 36 that is not a valid base.
39+
base = 0;
40+
}
41+
if (base == -1) {
42+
return 0; // An error occurred, bail early.
43+
}
44+
if (base < 0) {
45+
base = -base;
46+
}
47+
48+
PyObject* s = PyUnicode_FromStringAndSize(data, size);
49+
if (s == NULL) {
50+
if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
51+
PyErr_Clear();
52+
}
53+
return 0;
54+
}
55+
PyObject* l = PyLong_FromUnicodeObject(s, base);
56+
if (l == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
57+
PyErr_Clear();
58+
}
59+
PyErr_Clear();
60+
Py_XDECREF(l);
61+
Py_DECREF(s);
62+
return 0;
63+
}
64+
65+
/* Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str). */
66+
static int fuzz_builtin_unicode(const char* data, size_t size) {
67+
PyObject* s = PyUnicode_FromStringAndSize(data, size);
68+
if (s == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
69+
PyErr_Clear();
70+
}
71+
Py_XDECREF(s);
72+
return 0;
73+
}
74+
75+
/* Run fuzzer and abort on failure. */
76+
static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
77+
int rv = fuzzer((const char*) data, size);
78+
if (PyErr_Occurred()) {
79+
/* Fuzz tests should handle expected errors for themselves.
80+
This is last-ditch check in case they didn't. */
81+
PyErr_Print();
82+
abort();
83+
}
84+
/* Someday the return value might mean something, propagate it. */
85+
return rv;
86+
}
87+
88+
/* CPython generates a lot of leak warnings for whatever reason. */
89+
int __lsan_is_turned_off(void) { return 1; }
90+
91+
/* Fuzz test interface.
92+
This returns the bitwise or of all fuzz test's return values.
93+
94+
All fuzz tests must return 0, as all nonzero return codes are reserved for
95+
future use -- we propagate the return values for that future case.
96+
(And we bitwise or when running multiple tests to verify that normally we
97+
only return 0.) */
98+
int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
99+
if (!Py_IsInitialized()) {
100+
/* LLVMFuzzerTestOneInput is called repeatedly from the same process,
101+
with no separate initialization phase, sadly, so we need to
102+
initialize CPython ourselves on the first run. */
103+
Py_InitializeEx(0);
104+
}
105+
106+
int rv = 0;
107+
108+
#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE))
109+
#if _Py_FUZZ_YES(fuzz_builtin_float)
110+
rv |= _run_fuzz(data, size, fuzz_builtin_float);
111+
#endif
112+
#if _Py_FUZZ_YES(fuzz_builtin_int)
113+
rv |= _run_fuzz(data, size, fuzz_builtin_int);
114+
#endif
115+
#if _Py_FUZZ_YES(fuzz_builtin_unicode)
116+
rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
117+
#endif
118+
#undef _Py_FUZZ_YES
119+
return rv;
120+
}

setup.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -715,6 +715,12 @@ def detect_modules(self):
715715
# syslog daemon interface
716716
exts.append( Extension('syslog', ['syslogmodule.c']) )
717717

718+
# Fuzz tests.
719+
exts.append( Extension(
720+
'_xxtestfuzz',
721+
['_xxtestfuzz/_xxtestfuzz.c', '_xxtestfuzz/fuzzer.c'])
722+
)
723+
718724
#
719725
# Here ends the simple stuff. From here on, modules need certain
720726
# libraries, are platform-specific, or present other surprises.

0 commit comments

Comments
 (0)
0