python · gpshead · Sep 6, 2017 · Jul 21, 2017 · Jul 25, 2017 · Jul 25, 2017
diff --git a/Lib/test/test_fuzz.py b/Lib/test/test_fuzz.py
@@ -0,0 +1,20 @@
+import unittest
+from test import support
+import _fuzz
+
+class TestFuzz(unittest.TestCase):
+
+    def test_fuzz(self):
+        """Run the fuzz tests on sample input.
+
+        This isn't meaningful and only checks it doesn't crash.
+        """
+        _fuzz.run(b"")
+        _fuzz.run(b"\0")
+        _fuzz.run(b"{")
+        _fuzz.run(b" ")
+        _fuzz.run(b"x")
+        _fuzz.run(b"1")
+
+if __name__ == "__main__":
+    support.run_unittest(TestFuzz)
diff --git a/Modules/_fuzz/README.rst b/Modules/_fuzz/README.rst
@@ -0,0 +1,36 @@
+Fuzz Tests for CPython
+======================
+
+These fuzz tests are designed to be included in Google's `oss-fuzz`_ project.
+
+Adding a new fuzz test
+----------------------
+
+Add the test name on a new line in ``fuzz_tests.txt``.
+
+In ``fuzzer.c``, add a function to be run::
+
+    int $test_name (const char* data, size_t size) {
+        ...
+        return 0;
+    }
+
+
+And invoke it from ``LLVMFuzzerTestOneInput``::
+
+    #if _Py_FUZZ_YES(fuzz_builtin_float)
+        rv |= _run_fuzz(data, size, fuzz_builtin_float);
+    #endif
+
+``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
+``fuzz_tests.txt`` run separately.
+
+What makes a good fuzz test
+---------------------------
+
+Libraries written in C that might handle untrusted data are worthwhile. The
+more complex the logic (e.g. parsing), the more likely this is to be a useful
+fuzz test. See the existing examples for reference, and refer to the
+`oss-fuzz`_ docs.
+
+.. _oss-fuzz: https://github.com/google/oss-fuzz
diff --git a/Modules/_fuzz/_fuzzmodule.c b/Modules/_fuzz/_fuzzmodule.c
@@ -0,0 +1,52 @@
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size);
+
+static PyObject* _fuzz_run(PyObject* self, PyObject* args) {
+    const char* buf;
+    size_t size;
+    if (!PyArg_ParseTuple(args, "s#", &buf, &size)) {
+        return NULL;
+    }
+    int rv = LLVMFuzzerTestOneInput((const uint8_t*)buf, size);
+    if (PyErr_Occurred()) {
+        return NULL;
+    }
+    if (rv != 0) {
+        // Nonzero return codes are reserved for future use.
+        PyErr_Format(
+            PyExc_RuntimeError, "Nonzero return code from fuzzer: %d", rv);
+        return NULL;
+    }
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef module_methods[] = {
+    {"run", (PyCFunction)_fuzz_run, METH_VARARGS, ""},
+    {NULL},
+};
+
+static struct PyModuleDef _fuzzmodule = {
+        PyModuleDef_HEAD_INIT,
+        "_fuzz",
+        NULL,
+        0,
+        module_methods,
+        NULL,
+        NULL,
+        NULL,
+        NULL
+};
+
+PyMODINIT_FUNC
+PyInit__fuzz(void)
+{
+    PyObject *m = NULL;
+
+    if ((m = PyModule_Create(&_fuzzmodule)) == NULL) {
+        return NULL;
+    }
+    return m;
+}
diff --git a/Modules/_fuzz/fuzz_tests.txt b/Modules/_fuzz/fuzz_tests.txt
@@ -0,0 +1,3 @@
+fuzz_builtin_float
+fuzz_builtin_int
+fuzz_builtin_unicode
diff --git a/Modules/_fuzz/fuzzer.cpp b/Modules/_fuzz/fuzzer.cpp
@@ -0,0 +1,118 @@
+// A fuzz test for CPython.
+//
+// Unusually for CPython, this is written in C++ for the benefit of linking with
+// libFuzzer.
+//
+// The only exposed function is LLVMFuzzerTestOneInput, which is called by
+// fuzzers and by the _fuzz module for smoke tests.
+//
+// To build exactly one fuzz test, as when running in oss-fuzz etc.,
+// build with -D _Py_FUZZ_ONE and -D _Py_FUZZ_<test_name>. e.g. to build
+// LLVMFuzzerTestOneInput to only run "fuzz_builtin_float", build this file with
+// -D _Py_FUZZ_ONE -D _Py_FUZZ_fuzz_builtin_float.
+//
+// See the source code for LLVMFuzzerTestOneInput for details.
+
+#include <Python.h>
+#include <stdlib.h>
+#include <inttypes.h>
+
+// Fuzz PyFloat_FromString as a proxy for float(str).
+static int fuzz_builtin_float(const char* data, size_t size) {
+    PyObject* s = PyBytes_FromStringAndSize(data, size);
+    if (s == NULL) return 0;
+    PyObject* f = PyFloat_FromString(s);
+    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+
+    Py_XDECREF(f);
+    Py_DECREF(s);
+    return 0;
+}
+
+// Fuzz PyLong_FromUnicodeObject as a proxy for int(str).
+static int fuzz_builtin_int(const char* data, size_t size) {
+    int base = _Py_HashBytes(data, size) % 36;
+    if (base == 1) {
+        base = 0;
+    }
+    if (base == -1) {
+        return 0;  // An error occurred, bail early.
+    }
+    if (base < 0) {
+        base = -base;
+    }
+
+    PyObject* s = PyUnicode_FromStringAndSize(data, size);
+    if (PyErr_Occurred()) {
+        if (PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+            PyErr_Clear();
+        }
+        return 0;
+    }
+    PyObject* l = PyLong_FromUnicodeObject(s, base);
+    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+    PyErr_Clear();
+    Py_XDECREF(l);
+    Py_DECREF(s);
+    return 0;
+}
+
+// Fuzz PyUnicode_FromStringAndSize as a proxy for unicode(str).
+static int fuzz_builtin_unicode(const char* data, size_t size) {
+    PyObject* s = PyUnicode_FromStringAndSize(data, size);
+    if (PyErr_Occurred() && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+        PyErr_Clear();
+    }
+    Py_XDECREF(s);
+    return 0;
+}
+
+// Run fuzzer and abort on failure.
+static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
+    int rv = fuzzer("", 0);
+    if (PyErr_Occurred()) {
+        // Fuzz tests should handle expected errors for themselves.
+        PyErr_Print();
+        abort();
+    }
+    // Someday the return value might mean something, propagate it.
+    return rv;
+}
+
+// CPython generates a lot of leak warnings for whatever reason.
+extern "C" int __lsan_is_turned_off(void) { return 1; }
+
+// Fuzz test interface.
+// This returns the bitwise or of all fuzz test's return values.
+//
+// All fuzz tests must return 0, as all nonzero return codes are reserved for
+// future use -- we propagate the return values for that future case.
+// (And we bitwise or when running multiple tests to verify that normally we
+// only return 0.)
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
+    if (!Py_IsInitialized()) {
+        // LLVMFuzzerTestOneInput is called repeatedly from the same process, with
+        // no separate initialization phase, sadly, so we need to initialize CPython
+        // ourselves on the first run.
+        Py_InitializeEx(0);
+    }
+
+    int rv = 0;
+
+#define _Py_FUZZ_YES(test_name) (defined(_Py_FUZZ_##test_name) || !defined(_Py_FUZZ_ONE))
+#if _Py_FUZZ_YES(fuzz_builtin_float)
+    rv |= _run_fuzz(data, size, fuzz_builtin_float);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_int)
+    rv |= _run_fuzz(data, size, fuzz_builtin_int);
+#endif
+#if _Py_FUZZ_YES(fuzz_builtin_unicode)
+    rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
+#endif
+#undef _Py_FUZZ_YES
+  return rv;
+}
diff --git a/setup.py b/setup.py
@@ -715,6 +715,13 @@ def detect_modules(self):
         # syslog daemon interface
         exts.append( Extension('syslog', ['syslogmodule.c']) )
 
+        # Fuzz tests.
+        exts.append( Extension(
+            '_fuzz',
+            ['_fuzz/_fuzzmodule.c', '_fuzz/fuzzer.cpp'],
+            optional=True)
+        )
+
         #
         # Here ends the simple stuff.  From here on, modules need certain
         # libraries, are platform-specific, or present other surprises.