8000 Add a fuzzer for `Py_CompileStringExFlags` by bradlarsen · Pull Request #111721 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

Add a fuzzer for Py_CompileStringExFlags #111721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions Modules/_xxtestfuzz/dictionaries/fuzz_pycompile.dict
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# bits of syntax
"( "
") "
"[ "
"] "
": "
", "
"; "
"{ "
"} "

# operators
"+ "
"- "
"* "
"** "
"/ "
"// "
"| "
"& "
"< "
"> "
"= "
". "
"% "
"` "
"^ "
"~ "
"@ "
"== "
"!= "
"<> "
"<< "
"<= "
">= "
">> "
"+= "
"-= "
"*= "
"** "
"/= "
"//= "
"|= "
"%= "
"&= "
"^= "
"<<= "
">>= "
"**= "
":= "
"@= "

# whitespace
" "
":\\n "

# type signatures and functions
"-> "
": List[int]"
": Dict[int, str]"

"# type:"
"# type: List[int]"
"# type: Dict[int, str]"

", *"
", /"
", *args"
", **kwargs"
", x=42"


# literals
"0x0a"
"0b0000"
"42"
"0o70"
"42j"
"42.01"
"-5"
"+42e-3"
"0_0_0"
"1e1_0"
".1_4"

"{}"

# variable names
"x"
"y"

# strings
"r'x'"

"b'x'"

"rb\"x\""

"br\"x\""

"f'{x + 5}'"
"f\"{x + 5}\""

"'''"
"\"\"\""

"\\u"
"\\x"

# keywords
"def "
"del "
"pass "
"break "
"continue "
"return "
"raise "
"from "
"import "
".. "
"... "
"__future__ "
"as "
"global "
"nonlocal "
"assert "
"print "
"if "
"elif "
"else: "
"while "
"try: "
"except "
"finally: "
"with "
"lambda "
"or "
"and "
"not "
"None "
"__peg_parser__"
"True "
"False "
"yield "
"async "
"await "
"for "
"in "
"is "
"class "

# shebangs and encodings
"#!"
"# coding:"
"# coding="
"# coding: latin-1"
"# coding=latin-1"
"# coding: utf-8"
"# coding=utf-8"
"# coding: ascii"
"# coding=ascii"
"# coding: cp860"
"# coding=cp860"
"# coding: gbk"
"# coding=gbk"
7 changes: 7 additions & 0 deletions Modules/_xxtestfuzz/fuzz_pycompile_corpus/input1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from __future__ import annotations

def test() -> None:
x: list[int] = []
x: dict[int, str] = {}
x: set[bytes] = {}
print(5 + 42 * 3, x)
5 changes: 5 additions & 0 deletions Modules/_xxtestfuzz/fuzz_pycompile_corpus/input2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class Foo(metaclass=42):
__slots__ = ['x']
pass

foo = Foo()
6 changes: 6 additions & 0 deletions Modules/_xxtestfuzz/fuzz_pycompile_corpus/input3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
def evens():
i = 0
while True:
i += 1
if i % 2 == 0:
yield i
3 changes: 3 additions & 0 deletions Modules/_xxtestfuzz/fuzz_pycompile_corpus/input4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
async def hello(name: str):
await name
print(name)
7 changes: 7 additions & 0 deletions Modules/_xxtestfuzz/fuzz_pycompile_corpus/input5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
try:
eval('importer exporter... really long matches')
except SyntaxError:
print("nothing to see here")
finally:
print("all done here")
raise
8 changes: 8 additions & 0 deletions Modules/_xxtestfuzz/fuzz_pycompile_corpus/input6.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Some module docstring"""
import sys

def main():
print("Hello world!", file=sys.stderr)

if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions Modules/_xxtestfuzz/fuzz_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ fuzz_csv_reader
fuzz_struct_unpack
fuzz_ast_literal_eval
fuzz_elementtree_parsewhole
fuzz_pycompile
60 changes: 60 additions & 0 deletions Modules/_xxtestfuzz/fuzzer.c
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,63 @@ static int fuzz_elementtree_parsewhole(const char* data, size_t size) {
return 0;
}

#define MAX_PYCOMPILE_TEST_SIZE 16384
static char pycompile_scratch[MAX_PYCOMPILE_TEST_SIZE];

static const int start_vals[] = {Py_eval_input, Py_single_input, Py_file_input};
const size_t NUM_START_VALS = sizeof(start_vals) / sizeof(start_vals[0]);

static const int optimize_vals[] = {-1, 0, 1, 2};
const size_t NUM_OPTIMIZE_VALS = sizeof(optimize_vals) / sizeof(optimize_vals[0]);

/* Fuzz `PyCompileStringExFlag F438 s` using a variety of input parameters.
* That function is essentially behind the `compile` builtin */
static int fuzz_pycompile(const char* data, size_t size) {
// Ignore overly-large inputs, and account for a NUL terminator
if (size > MAX_PYCOMPILE_TEST_SIZE - 1) {
return 0;
}

// Need 2 bytes for parameter selection
if (size < 2) {
return 0;
}

// Use first byte to determine element of `start_vals` to use
unsigned char start_idx = (unsigned char) data[0];
int start = start_vals[start_idx % NUM_START_VALS];

// Use second byte to determine element of `optimize_vals` to use
unsigned char optimize_idx = (unsigned char) data[1];
int optimize = optimize_vals[optimize_idx % NUM_OPTIMIZE_VALS];

// Create a NUL-terminated C string from the remaining input
memcpy(pycompile_scratch, data + 2, size - 2);
// Put a NUL terminator just after the copied data. (Space was reserved already.)
pycompile_scratch[size - 2] = '\0';

// XXX: instead of always using NULL for the `flags` value to
// `Py_CompileStringExFlags`, there are many flags that conditionally
// change parser behavior:
//
// #define PyCF_TYPE_COMMENTS 0x1000
// #define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000
// #define PyCF_ONLY_AST 0x0400
//
// It would be good to test various combinations of these, too.
PyCompilerFlags *flags = NULL;

PyObject *result = Py_CompileStringExFlags(pycompile_scratch, "<fuzz input>", start, flags, optimize);
if (result == NULL) {
/* compilation failed, most likely from a syntax error */
PyErr_Clear();
} else {
Py_DECREF(result);
}

return 0;
}

/* Run fuzzer and abort on failure. */
static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
int rv = fuzzer((const char*) data, size);
Expand Down Expand Up @@ -642,6 +699,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
}

rv |= _run_fuzz(data, size, fuzz_elementtree_parsewhole);
#endif
#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_pycompile)
rv |= _run_fuzz(data, size, fuzz_pycompile);
#endif
return rv;
}
3 changes: 3 additions & 0 deletions Tools/c-analyzer/cpython/ignored.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,9 @@ Modules/_xxtestfuzz/fuzzer.c - re_error_exception -
Modules/_xxtestfuzz/fuzzer.c - struct_error -
Modules/_xxtestfuzz/fuzzer.c - struct_unpack_method -
Modules/_xxtestfuzz/fuzzer.c - xmlparser_type -
Modules/_xxtestfuzz/fuzzer.c - pycompile_scratch -
Modules/_xxtestfuzz/fuzzer.c - start_vals -
Modules/_xxtestf 4A05 uzz/fuzzer.c - optimize_vals -
Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput CSV_READER_INITIALIZED -
Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput JSON_LOADS_INITIALIZED -
Modules/_xxtestfuzz/fuzzer.c LLVMFuzzerTestOneInput SRE_COMPILE_INITIALIZED -
Expand Down
0