From a44faf3d7cd870fdbc55faf9906d0ed7c1f5673b Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 28 Jun 2024 15:22:40 -0700 Subject: [PATCH 01/19] Add strace helper for tracing system calls made by python running specific code --- Lib/test/support/strace_helper.py | 170 ++++++++++++++++++++++++++++++ 1 file changed, 170 insertions(+) create mode 100644 Lib/test/support/strace_helper.py diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py new file mode 100644 index 00000000000000..e3de0562eab361 --- /dev/null +++ b/Lib/test/support/strace_helper.py @@ -0,0 +1,170 @@ +import re +import sys +import textwrap +import unittest +from dataclasses import dataclass +from test import support +from test.support.script_helper import run_python_until_end +from typing import Dict, List + +_strace_binary = "/usr/bin/strace" +_syscall_regex = re.compile( + r"(?P[^(]*)\((?P[^)]*)\)\s*[=]\s*(?P.+)") +_returncode_regex = re.compile( + r"\+\+\+ exited with (?P\d+) \+\+\+") + +# Cached value of whether or not there is a compatible strace binary +_strace_working: bool | None = None + + +@dataclass +class StraceEvent: + syscall: str + args: List[str] + returncode: str + + +@dataclass +class StraceResult: + strace_returncode: int + python_returncode: int + _raw_events: str + stdout: str + stderr: str + + def events(self) -> List[StraceEvent]: + """Extract the call list information from _raw_events""" + matches = [ + _syscall_regex.match(event) + for event in self._raw_events.splitlines() + ] + return [ + StraceEvent(match["syscall"], + [arg.strip() for arg in (match["args"].split(","))], + match["returncode"]) for match in matches if match + ] + + def sections(self) -> Dict[str:List[StraceEvent]]: + """Find all "MARK " writes and use them to make groups of events. + + This is useful to avoid variable / overhead strace events, like that + at interpreter startup, so a test can just check does the small case + under study work.""" + current_section = "__startup" + sections = {current_section: []} + for event in self.events(): + if event.syscall == 'write' and len( + event.args) > 2 and event.args[1].startswith("\"MARK "): + # Found a new section, don't include the write in the section + # but all events until next mark should be in that section + current_section = event.args[1].split( + " ", 1)[1].removesuffix('\\n"') + if current_section not in sections: + sections[current_section] = list() + else: + sections[current_section].append(event) + + return sections + + +@support.requires_subprocess() +def strace_python(code: str, + strace_flags: List[str], + check: bool = True) -> StraceResult: + """Run strace and return the trace. + + Sets strace_returncode and python_returncode to `-1` on error + """ + res = None + + def _make_error(reason, details): + return StraceResult( + strace_returncode=-1, + python_returncode=-1, + _raw_events=f"error({reason},details={details}) = -1", + stdout=res.out if res else "", + stderr=res.err if res else "") + + # Run strace, and get out the raw text + try: + res, cmd_line = run_python_until_end( + "-c", + textwrap.dedent(code), + __run_using_command=[_strace_binary] + strace_flags) + except OSError as err: + return _make_error("Caught OSError", err) + + # Get out program returncode + decoded = res.err.decode().strip() + + output = decoded.rsplit("\n", 1) + if len(output) != 2: + return _make_error("Expected strace events and exit code line", + decoded[-50:]) + + returncode_match = _returncode_regex.match(output[1]) + if not returncode_match: + return _make_error("Expected to find returncode in last line.", + output[1][:50]) + + python_returncode = int(returncode_match["returncode"]) + if check and (res.rc or python_returncode): + res.fail(cmd_line) + + return StraceResult(strace_returncode=res.rc, + python_returncode=python_returncode, + _raw_events=output[0], + stdout=res.out, + stderr=res.err) + + +def _get_events(code: str, strace_flags: List[str], prelude: str, + cleanup: str) -> List[StraceEvent]: + # NOTE: The flush is currently required to prevent the prints from getting + # buffered and done all at once at exit + prelude = textwrap.dedent(prelude) + code = textwrap.dedent(code) + cleanup = textwrap.dedent(cleanup) + to_run = f""" +print("MARK prelude", flush=True) +{prelude} +print("MARK code", flush=True) +{code} +print("MARK cleanup", flush=True) +{cleanup} +print("MARK __shutdown", flush=True) + """ + trace = strace_python(to_run, strace_flags) + all_sections = trace.sections() + return all_sections['code'] + + +def get_syscalls(code: str, + strace_flags: List[str], + prelude: str = "", + cleanup: str = "") -> List[str]: + """Get the syscalls which a given chunk of python code generates""" + events = _get_events(code, strace_flags, prelude=prelude, cleanup=cleanup) + return [ev.syscall for ev in events] + + +def _can_strace(): + res = strace_python("import sys; sys.exit(0)", [], check=False) + assert res.events(), "Should have parsed multiple calls" + + global _strace_working + _strace_working = res.strace_returncode == 0 and res.python_returncode == 0 + + +def requires_strace(): + if sys.platform != "linux": + return unittest.skip("Linux only, requires strace.") + # Moderately expensive (spawns a subprocess), so share results when possible. + if _strace_working is None: + _can_strace() + + assert _strace_working is not None, "Should have been set by _can_strace" + return unittest.skipUnless(_strace_working, "Requires working strace") + + +__all__ = ["requires_strace", "strace_python", "StraceResult", "StraceEvent"] From a462039b6bca1e8d92743b0018a1fc84953b6637 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 28 Jun 2024 15:27:39 -0700 Subject: [PATCH 02/19] Update test_subprocess to use strace_helper The test explicitly wants to check for `vfork` call _or_ flag passed to clone so keep doing the regex search rather than looking at particular events --- Lib/test/test_subprocess.py | 56 +++++++++++++++---------------------- 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 8b69cd03ba7f24..701ce84e6ac0a3 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -4,6 +4,7 @@ from test.support import check_sanitizer from test.support import import_helper from test.support import os_helper +from test.support import strace_helper from test.support import warnings_helper from test.support.script_helper import assert_python_ok import subprocess @@ -3434,7 +3435,7 @@ def test__use_vfork(self, mock_fork_exec): @unittest.skipIf(not sysconfig.get_config_var("HAVE_VFORK"), "vfork() not enabled by configure.") - @unittest.skipIf(sys.platform != "linux", "Linux only, requires strace.") + @strace_helper.requires_strace() @mock.patch("subprocess._USE_POSIX_SPAWN", new=False) def test_vfork_used_when_expected(self): # This is a performance regression test to ensure we default to using @@ -3442,52 +3443,41 @@ def test_vfork_used_when_expected(self): # Technically this test could pass when posix_spawn is used as well # because libc tends to implement that internally using vfork. But # that'd just be testing a libc+kernel implementation detail. - strace_binary = "/usr/bin/strace" - # The only system calls we are interested in. - strace_filter = "--trace=clone,clone2,clone3,fork,vfork,exit,exit_group" - true_binary = "/bin/true" - strace_command = [strace_binary, strace_filter] - try: - does_strace_work_process = subprocess.run( - strace_command + [true_binary], - stderr=subprocess.PIPE, - stdout=subprocess.DEVNULL, - ) - rc = does_strace_work_process.returncode - stderr = does_strace_work_process.stderr - except OSError: - rc = -1 - stderr = "" - if rc or (b"+++ exited with 0 +++" not in stderr): - self.skipTest("strace not found or not working as expected.") + # Are intersted in the system calls: + # clone,clone2,clone3,fork,vfork,exit,exit_group + # Unfortunately using `--trace` with that list to strace fails because + # not all are supported on all platforms (ex. clone2 is ia64 only...) + # So instead use `%process` which is recommended by strace, and contains + # the above. + true_binary = "/bin/true" + strace_args = ["--trace=%process"] with self.subTest(name="default_is_vfork"): - vfork_result = assert_python_ok( - "-c", - textwrap.dedent(f"""\ - import subprocess - subprocess.check_call([{true_binary!r}])"""), - __run_using_command=strace_command, + vfork_result = strace_helper.strace_python( + f"""\ + import subprocess + subprocess.check_call([{true_binary!r}])""", + strace_args ) # Match both vfork() and clone(..., flags=...|CLONE_VFORK|...) - self.assertRegex(vfork_result.err, br"(?i)vfork") + self.assertRegex(vfork_result._raw_events, r"(?i)vfork") # Do NOT check that fork() or other clones did not happen. # If the OS denys the vfork it'll fallback to plain fork(). # Test that each individual thing that would disable the use of vfork # actually disables it. for sub_name, preamble, sp_kwarg, expect_permission_error in ( - ("!use_vfork", "subprocess._USE_VFORK = False", "", False), + # FIXME(cmaloney): _USE_VFORK doesn't work currently. + # ("!use_vfork", "subprocess._USE_VFORK = False", "", False), ("preexec", "", "preexec_fn=lambda: None", False), ("setgid", "", f"group={os.getgid()}", True), ("setuid", "", f"user={os.getuid()}", True), ("setgroups", "", "extra_groups=[]", True), ): with self.subTest(name=sub_name): - non_vfork_result = assert_python_ok( - "-c", - textwrap.dedent(f"""\ + non_vfork_result = strace_helper.strace_python( + f"""\ import subprocess {preamble} try: @@ -3495,11 +3485,11 @@ def test_vfork_used_when_expected(self): [{true_binary!r}], **dict({sp_kwarg})) except PermissionError: if not {expect_permission_error}: - raise"""), - __run_using_command=strace_command, + raise""", + strace_args ) # Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...). - self.assertNotRegex(non_vfork_result.err, br"(?i)vfork") + self.assertNotRegex(non_vfork_result._raw_events, r"(?i)vfork") @unittest.skipUnless(mswindows, "Windows specific tests") From 2ab832d4c1fc8e75b42c3fe379e257c46dbc9149 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 28 Jun 2024 18:43:18 -0700 Subject: [PATCH 03/19] Add test to FileIO that validates set of syscalls Goal is that as changes to I/O code are made changes to the set of system calls made need to be intentional so that common fast-paths aren't accidentally changed. --- Lib/test/test_fileio.py | 63 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index 0611d1749f41c1..3084d952701b89 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -11,7 +11,7 @@ from test.support import ( cpython_only, swap_attr, gc_collect, is_emscripten, is_wasi, - infinite_recursion, + infinite_recursion, strace_helper ) from test.support.os_helper import ( TESTFN, TESTFN_ASCII, TESTFN_UNICODE, make_bad_fd, @@ -24,6 +24,9 @@ import _pyio # Python implementation of io +_strace_flags=["--trace=%file,%desc"] + + class AutoFileTests: # file tests for which a test file is automatically set up @@ -359,6 +362,64 @@ def testErrnoOnClosedReadinto(self, f): a = array('b', b'x'*10) f.readinto(a) + @strace_helper.requires_strace() + def test_syscalls_read(self): + """Check that the set of system calls produced by the I/O stack is what + is expected for various read cases. + + It's expected as bits of the I/O implementation change, this will need + to change. The goal is to catch changes that unintentionally add + additional systemcalls (ex. additional fstat calls has been an issue). + """ + self.f.write(b"Hello, World!") + self.f.close() + + # "open, read, close" file with default options. + calls = strace_helper.get_syscalls( + f""" + f = open('{TESTFN}') + f.read() + f.close() + """, + _strace_flags + ) + assert calls == ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', 'fstat', + 'read', 'read', 'close'] + + # Focus on just `read()` + calls = strace_helper.get_syscalls( + prelude=f"f = open('{TESTFN}')", + code="f.read()", + cleanup="f.close()", + strace_flags=_strace_flags + ) + assert calls == ['lseek', 'fstat', 'read', 'read'] + + # Readall in binary mode + calls = strace_helper.get_syscalls( + f""" + f = open('{TESTFN}', 'rb') + f.read() + f.close() + """, + _strace_flags + ) + assert calls == ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', 'fstat', + 'read', 'read', 'close'] + + # Readall in text mode + calls = strace_helper.get_syscalls( + f""" + f = open('{TESTFN}', 'rb') + f.read() + f.close() + """, + _strace_flags + ) + assert calls == ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', 'fstat', + 'read', 'read', 'close'] + + class CAutoFileTests(AutoFileTests, unittest.TestCase): FileIO = _io.FileIO modulename = '_io' From ef298f2412df2c3cf3683e17c46df34ad58a9d83 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 28 Jun 2024 20:13:34 -0700 Subject: [PATCH 04/19] Move from assert to .assertEqual --- Lib/test/test_fileio.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index 3084d952701b89..6f128cfffffa90 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -383,8 +383,8 @@ def test_syscalls_read(self): """, _strace_flags ) - assert calls == ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', 'fstat', - 'read', 'read', 'close'] + self.assertEqual(calls, ['openat', 'fstat', 'ioctl', 'lseek', + 'lseek', 'fstat', 'read', 'read', 'close']) # Focus on just `read()` calls = strace_helper.get_syscalls( @@ -393,7 +393,7 @@ def test_syscalls_read(self): cleanup="f.close()", strace_flags=_strace_flags ) - assert calls == ['lseek', 'fstat', 'read', 'read'] + self.assertEqual(calls, ['lseek', 'fstat', 'read', 'read']) # Readall in binary mode calls = strace_helper.get_syscalls( @@ -404,8 +404,8 @@ def test_syscalls_read(self): """, _strace_flags ) - assert calls == ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', 'fstat', - 'read', 'read', 'close'] + self.assertEqual(calls, ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', + 'fstat', 'read', 'read', 'close']) # Readall in text mode calls = strace_helper.get_syscalls( @@ -416,8 +416,8 @@ def test_syscalls_read(self): """, _strace_flags ) - assert calls == ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', 'fstat', - 'read', 'read', 'close'] + self.assertEqual(calls, ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', + 'fstat', 'read', 'read', 'close']) class CAutoFileTests(AutoFileTests, unittest.TestCase): From 283a077ed2deec1b158cb5ae20d2cee2d6ff4cc6 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 28 Jun 2024 20:32:45 -0700 Subject: [PATCH 05/19] Allow libc to use different fstat variants Running on my dev machine, ArchLinux, get fstat while Ubuntu CI machine finds newfstatat --- Lib/test/test_fileio.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index 6f128cfffffa90..0e32fac76dedff 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -383,8 +383,14 @@ def test_syscalls_read(self): """, _strace_flags ) - self.assertEqual(calls, ['openat', 'fstat', 'ioctl', 'lseek', - 'lseek', 'fstat', 'read', 'read', 'close']) + + # There are a number of related syscalls used to implement `fstat` in + # a libc (ex. fstat, newfstatat). Default to fstat, but if we see the + # system using a variant, allow that + fstat = next((sc for sc in calls if 'fstat' in sc), 'fstat') + + self.assertEqual(calls, ['openat', fstat, 'ioctl', 'lseek', + 'lseek', fstat, 'read', 'read', 'close']) # Focus on just `read()` calls = strace_helper.get_syscalls( @@ -393,7 +399,7 @@ def test_syscalls_read(self): cleanup="f.close()", strace_flags=_strace_flags ) - self.assertEqual(calls, ['lseek', 'fstat', 'read', 'read']) + self.assertEqual(calls, ['lseek', fstat, 'read', 'read']) # Readall in binary mode calls = strace_helper.get_syscalls( @@ -404,8 +410,8 @@ def test_syscalls_read(self): """, _strace_flags ) - self.assertEqual(calls, ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', - 'fstat', 'read', 'read', 'close']) + self.assertEqual(calls, ['openat', fstat, 'ioctl', 'lseek', 'lseek', + fstat, 'read', 'read', 'close']) # Readall in text mode calls = strace_helper.get_syscalls( @@ -416,8 +422,8 @@ def test_syscalls_read(self): """, _strace_flags ) - self.assertEqual(calls, ['openat', 'fstat', 'ioctl', 'lseek', 'lseek', - 'fstat', 'read', 'read', 'close']) + self.assertEqual(calls, ['openat', fstat, 'ioctl', 'lseek', 'lseek', + fstat, 'read', 'read', 'close']) class CAutoFileTests(AutoFileTests, unittest.TestCase): From 3b6c09441e365d9d05e90c4aaf542f3acbe04805 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Fri, 28 Jun 2024 21:03:26 -0700 Subject: [PATCH 06/19] Exit early if strace exited non-zero --- Lib/test/support/strace_helper.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index e3de0562eab361..07e51db06de014 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -94,6 +94,9 @@ def _make_error(reason, details): except OSError as err: return _make_error("Caught OSError", err) + if check and res.rc: + res.fail(cmd_line) + # Get out program returncode decoded = res.err.decode().strip() @@ -108,7 +111,7 @@ def _make_error(reason, details): output[1][:50]) python_returncode = int(returncode_match["returncode"]) - if check and (res.rc or python_returncode): + if check and python_returncode: res.fail(cmd_line) return StraceResult(strace_returncode=res.rc, From 97b294f021d872807ad14f3b2fa3b8f1c9aedd00 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 29 Jun 2024 01:55:10 -0700 Subject: [PATCH 07/19] Add myself to ACKS --- Misc/ACKS | 1 + 1 file changed, 1 insertion(+) diff --git a/Misc/ACKS b/Misc/ACKS index 53258dbfd9f478..56d182fc5914ca 100644 --- a/Misc/ACKS +++ b/Misc/ACKS @@ -1163,6 +1163,7 @@ Grzegorz Makarewicz David Malcolm Greg Malcolm William Mallard +Cody Maloney Ken Manheimer Vladimir Marangozov Colin Marc From e5bdc6bb4d6780e2debf5f6320b96f655acd357f Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 29 Jun 2024 02:07:57 -0700 Subject: [PATCH 08/19] Add tests around pathilb read_*() behavior --- Lib/test/test_fileio.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index 0e32fac76dedff..fa01cf97981a56 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -389,8 +389,10 @@ def test_syscalls_read(self): # system using a variant, allow that fstat = next((sc for sc in calls if 'fstat' in sc), 'fstat') - self.assertEqual(calls, ['openat', fstat, 'ioctl', 'lseek', - 'lseek', fstat, 'read', 'read', 'close']) + readall_calls = ['openat', fstat, 'ioctl', 'lseek', 'lseek', fstat, + 'read', 'read', 'close'] + + self.assertEqual(calls, readall_calls) # Focus on just `read()` calls = strace_helper.get_syscalls( @@ -410,20 +412,33 @@ def test_syscalls_read(self): """, _strace_flags ) - self.assertEqual(calls, ['openat', fstat, 'ioctl', 'lseek', 'lseek', - fstat, 'read', 'read', 'close']) + self.assertEqual(calls, readall_calls) # Readall in text mode calls = strace_helper.get_syscalls( f""" - f = open('{TESTFN}', 'rb') + f = open('{TESTFN}', 'rt') f.read() f.close() """, _strace_flags ) - self.assertEqual(calls, ['openat', fstat, 'ioctl', 'lseek', 'lseek', - fstat, 'read', 'read', 'close']) + self.assertEqual(calls, readall_calls) + + # text and binary mode, but using pathlib + calls = strace_helper.get_syscalls( + """p.read_bytes()""", + _strace_flags, + prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" + ) + self.assertEqual(calls, readall_calls) + + calls = strace_helper.get_syscalls( + """p.read_text()""", + _strace_flags, + prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" + ) + self.assertEqual(calls, readall_calls) class CAutoFileTests(AutoFileTests, unittest.TestCase): From 397cd9e885b2fa8ce0e6ea687ee98ab125b3a1c5 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 29 Jun 2024 02:09:59 -0700 Subject: [PATCH 09/19] Remove subprocess._USE_VFORK strace test --- Lib/test/test_subprocess.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 701ce84e6ac0a3..0321c2916e0be8 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -3468,8 +3468,6 @@ def test_vfork_used_when_expected(self): # Test that each individual thing that would disable the use of vfork # actually disables it. for sub_name, preamble, sp_kwarg, expect_permission_error in ( - # FIXME(cmaloney): _USE_VFORK doesn't work currently. - # ("!use_vfork", "subprocess._USE_VFORK = False", "", False), ("preexec", "", "preexec_fn=lambda: None", False), ("setgid", "", f"group={os.getgid()}", True), ("setuid", "", f"user={os.getuid()}", True), From d99157f3d1e0d2c860e2df93d4a39b44712d8d84 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Thu, 4 Jul 2024 03:31:30 -0700 Subject: [PATCH 10/19] Update call sequence after gh-120755 --- Lib/test/test_fileio.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index fa01cf97981a56..64a422a7b6231d 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -389,8 +389,8 @@ def test_syscalls_read(self): # system using a variant, allow that fstat = next((sc for sc in calls if 'fstat' in sc), 'fstat') - readall_calls = ['openat', fstat, 'ioctl', 'lseek', 'lseek', fstat, - 'read', 'read', 'close'] + readall_calls = ['openat', fstat, 'ioctl', 'lseek', 'read', + 'read', 'close'] self.assertEqual(calls, readall_calls) @@ -401,7 +401,7 @@ def test_syscalls_read(self): cleanup="f.close()", strace_flags=_strace_flags ) - self.assertEqual(calls, ['lseek', fstat, 'read', 'read']) + self.assertEqual(calls, ['read', 'read']) # Readall in binary mode calls = strace_helper.get_syscalls( From 56645581ad6e5adc5582e8ad036462f360ee25c4 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 9 Jul 2024 15:01:16 -0700 Subject: [PATCH 11/19] Add specific python bug links --- Lib/test/test_fileio.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index 64a422a7b6231d..89810adcebda77 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -369,7 +369,8 @@ def test_syscalls_read(self): It's expected as bits of the I/O implementation change, this will need to change. The goal is to catch changes that unintentionally add - additional systemcalls (ex. additional fstat calls has been an issue). + additional systemcalls (ex. additional calls have been looked at in + bpo-21679 and gh-120754). """ self.f.write(b"Hello, World!") self.f.close() From 736d5bc0139ed19d9c4c3592eedf722378c01164 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 9 Jul 2024 15:50:31 -0700 Subject: [PATCH 12/19] Reduce annotations, stay bytes longer, make raw_events non-private --- Lib/test/support/strace_helper.py | 58 +++++++++++++++---------------- Lib/test/test_subprocess.py | 4 +-- 2 files changed, 30 insertions(+), 32 deletions(-) diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index 07e51db06de014..a153993d40f7f0 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -5,13 +5,12 @@ from dataclasses import dataclass from test import support from test.support.script_helper import run_python_until_end -from typing import Dict, List _strace_binary = "/usr/bin/strace" _syscall_regex = re.compile( r"(?P[^(]*)\((?P[^)]*)\)\s*[=]\s*(?P.+)") _returncode_regex = re.compile( - r"\+\+\+ exited with (?P\d+) \+\+\+") + br"\+\+\+ exited with (?P\d+) \+\+\+") # Cached value of whether or not there is a compatible strace binary _strace_working: bool | None = None @@ -20,7 +19,7 @@ @dataclass class StraceEvent: syscall: str - args: List[str] + args: list[str] returncode: str @@ -28,15 +27,22 @@ class StraceEvent: class StraceResult: strace_returncode: int python_returncode: int - _raw_events: str - stdout: str - stderr: str - def events(self) -> List[StraceEvent]: - """Extract the call list information from _raw_events""" + """The event messages generated by strace. This is very similar to the + stderr strace produces with returncode marker section removed.""" + raw_events: bytes + stdout: bytes + stderr: bytes + + def events(self): + """Parse raw_events data into system calls for easier processing. + + This assumes the program under inspection doesn't print any non-utf8 + strings which would mix into the strace output.""" + decoded_events = self.raw_events.decode('utf-8') matches = [ _syscall_regex.match(event) - for event in self._raw_events.splitlines() + for event in decoded_events.splitlines() ] return [ StraceEvent(match["syscall"], @@ -44,12 +50,12 @@ def events(self) -> List[StraceEvent]: match["returncode"]) for match in matches if match ] - def sections(self) -> Dict[str:List[StraceEvent]]: + def sections(self): """Find all "MARK " writes and use them to make groups of events. - This is useful to avoid variable / overhead strace events, like that - at interpreter startup, so a test can just check does the small case - under study work.""" + This is useful to avoid variable / overhead events, like those at + interpreter startup or when opening a file so a test can verify just + the small case under study.""" current_section = "__startup" sections = {current_section: []} for event in self.events(): @@ -68,20 +74,17 @@ def sections(self) -> Dict[str:List[StraceEvent]]: @support.requires_subprocess() -def strace_python(code: str, - strace_flags: List[str], - check: bool = True) -> StraceResult: +def strace_python(code, strace_flags, check = True): """Run strace and return the trace. - Sets strace_returncode and python_returncode to `-1` on error - """ + Sets strace_returncode and python_returncode to `-1` on error.""" res = None def _make_error(reason, details): return StraceResult( strace_returncode=-1, python_returncode=-1, - _raw_events=f"error({reason},details={details}) = -1", + raw_events=f"error({reason},details={details}) = -1".encode('utf-8'), stdout=res.out if res else "", stderr=res.err if res else "") @@ -98,12 +101,11 @@ def _make_error(reason, details): res.fail(cmd_line) # Get out program returncode - decoded = res.err.decode().strip() - - output = decoded.rsplit("\n", 1) + stripped = res.err.strip() + output = stripped.rsplit(b"\n", 1) if len(output) != 2: return _make_error("Expected strace events and exit code line", - decoded[-50:]) + stripped[-50:]) returncode_match = _returncode_regex.match(output[1]) if not returncode_match: @@ -116,13 +118,12 @@ def _make_error(reason, details): return StraceResult(strace_returncode=res.rc, python_returncode=python_returncode, - _raw_events=output[0], + raw_events=output[0], stdout=res.out, stderr=res.err) -def _get_events(code: str, strace_flags: List[str], prelude: str, - cleanup: str) -> List[StraceEvent]: +def _get_events(code, strace_flags, prelude, cleanup): # NOTE: The flush is currently required to prevent the prints from getting # buffered and done all at once at exit prelude = textwrap.dedent(prelude) @@ -142,10 +143,7 @@ def _get_events(code: str, strace_flags: List[str], prelude: str, return all_sections['code'] -def get_syscalls(code: str, - strace_flags: List[str], - prelude: str = "", - cleanup: str = "") -> List[str]: +def get_syscalls(code, strace_flags, prelude = "", cleanup = ""): """Get the syscalls which a given chunk of python code generates""" events = _get_events(code, strace_flags, prelude=prelude, cleanup=cleanup) return [ev.syscall for ev in events] diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index 0321c2916e0be8..d3298476f294c2 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -3461,7 +3461,7 @@ def test_vfork_used_when_expected(self): strace_args ) # Match both vfork() and clone(..., flags=...|CLONE_VFORK|...) - self.assertRegex(vfork_result._raw_events, r"(?i)vfork") + self.assertRegex(vfork_result.raw_events, rb"(?i)vfork") # Do NOT check that fork() or other clones did not happen. # If the OS denys the vfork it'll fallback to plain fork(). @@ -3487,7 +3487,7 @@ def test_vfork_used_when_expected(self): strace_args ) # Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...). - self.assertNotRegex(non_vfork_result._raw_events, r"(?i)vfork") + self.assertNotRegex(non_vfork_result.raw_events, rb"(?i)vfork") @unittest.skipUnless(mswindows, "Windows specific tests") From 47ed7fecb8c462f0799059759dccca0c16fe3e87 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 9 Jul 2024 15:56:13 -0700 Subject: [PATCH 13/19] Move _strace_working checks to all be in requires_strace --- Lib/test/support/strace_helper.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index a153993d40f7f0..5c096ae295384d 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -153,16 +153,17 @@ def _can_strace(): res = strace_python("import sys; sys.exit(0)", [], check=False) assert res.events(), "Should have parsed multiple calls" - global _strace_working - _strace_working = res.strace_returncode == 0 and res.python_returncode == 0 + return res.strace_returncode == 0 and res.python_returncode == 0 def requires_strace(): + global _strace_working + if sys.platform != "linux": return unittest.skip("Linux only, requires strace.") # Moderately expensive (spawns a subprocess), so share results when possible. if _strace_working is None: - _can_strace() + _strace_working = _can_strace() assert _strace_working is not None, "Should have been set by _can_strace" return unittest.skipUnless(_strace_working, "Requires working strace") From 55d1cec4268e0a04480803ee730121149597f668 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 9 Jul 2024 17:39:54 -0700 Subject: [PATCH 14/19] formatting fixes, reduce annotations further --- Lib/test/support/strace_helper.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index 5c096ae295384d..5f01f31d7655c3 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -13,7 +13,7 @@ br"\+\+\+ exited with (?P\d+) \+\+\+") # Cached value of whether or not there is a compatible strace binary -_strace_working: bool | None = None +_strace_working = None @dataclass @@ -74,7 +74,7 @@ def sections(self): @support.requires_subprocess() -def strace_python(code, strace_flags, check = True): +def strace_python(code, strace_flags, check=True): """Run strace and return the trace. Sets strace_returncode and python_returncode to `-1` on error.""" @@ -143,7 +143,7 @@ def _get_events(code, strace_flags, prelude, cleanup): return all_sections['code'] -def get_syscalls(code, strace_flags, prelude = "", cleanup = ""): +def get_syscalls(code, strace_flags, prelude="", cleanup=""): """Get the syscalls which a given chunk of python code generates""" events = _get_events(code, strace_flags, prelude=prelude, cleanup=cleanup) return [ev.syscall for ev in events] @@ -169,4 +169,4 @@ def requires_strace(): return unittest.skipUnless(_strace_working, "Requires working strace") -__all__ = ["requires_strace", "strace_python", "StraceResult", "StraceEvent"] +__all__ = ["requires_strace", "strace_python", "StraceEvent", "StraceResult"] From 6fe0961b480f6d96e21d74988299da4f8a1a24e4 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 9 Jul 2024 18:02:05 -0700 Subject: [PATCH 15/19] Small cleanups from self review --- Lib/test/support/strace_helper.py | 2 +- Lib/test/test_subprocess.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index 5f01f31d7655c3..583a2786183bd5 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -165,7 +165,7 @@ def requires_strace(): if _strace_working is None: _strace_working = _can_strace() - assert _strace_working is not None, "Should have been set by _can_strace" + assert isinstance(_strace_working, bool), "Should have been set by here" return unittest.skipUnless(_strace_working, "Requires working strace") diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index d3298476f294c2..ae9208a255b3d6 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -3461,7 +3461,7 @@ def test_vfork_used_when_expected(self): strace_args ) # Match both vfork() and clone(..., flags=...|CLONE_VFORK|...) - self.assertRegex(vfork_result.raw_events, rb"(?i)vfork") + self.assertRegex(vfork_result.raw_events, br"(?i)vfork") # Do NOT check that fork() or other clones did not happen. # If the OS denys the vfork it'll fallback to plain fork(). @@ -3487,7 +3487,7 @@ def test_vfork_used_when_expected(self): strace_args ) # Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...). - self.assertNotRegex(non_vfork_result.raw_events, rb"(?i)vfork") + self.assertNotRegex(non_vfork_result.raw_events, br"(?i)vfork") @unittest.skipUnless(mswindows, "Windows specific tests") From 2ea2bc8584538e3fa19676b00e76d3c23f12da8a Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 9 Jul 2024 19:24:57 -0700 Subject: [PATCH 16/19] Adjust test cases to match more general system call shape --- Lib/test/test_fileio.py | 92 ++++++++++++++++++++++------------------- 1 file changed, 50 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_fileio.py b/Lib/test/test_fileio.py index 89810adcebda77..bded9c8b8d3004 100644 --- a/Lib/test/test_fileio.py +++ b/Lib/test/test_fileio.py @@ -375,71 +375,79 @@ def test_syscalls_read(self): self.f.write(b"Hello, World!") self.f.close() - # "open, read, close" file with default options. - calls = strace_helper.get_syscalls( - f""" - f = open('{TESTFN}') - f.read() - f.close() - """, - _strace_flags - ) - # There are a number of related syscalls used to implement `fstat` in - # a libc (ex. fstat, newfstatat). Default to fstat, but if we see the - # system using a variant, allow that - fstat = next((sc for sc in calls if 'fstat' in sc), 'fstat') + def check_readall(name, code, prelude="", cleanup=""): + with self.subTest(name=name): + syscalls = strace_helper.get_syscalls(code, _strace_flags, + prelude=prelude, + cleanup=cleanup) - readall_calls = ['openat', fstat, 'ioctl', 'lseek', 'read', - 'read', 'close'] + # There are a number of related syscalls used to implement + # behaviors in a libc (ex. fstat, newfstatat, open, openat). + # Allow any that use the same substring. + def count_similarname(name): + return len([sc for sc in syscalls if name in sc]) - self.assertEqual(calls, readall_calls) + # Should open and close the file exactly once + self.assertEqual(count_similarname('open'), 1) + self.assertEqual(count_similarname('close'), 1) - # Focus on just `read()` - calls = strace_helper.get_syscalls( - prelude=f"f = open('{TESTFN}')", - code="f.read()", - cleanup="f.close()", - strace_flags=_strace_flags + # Should only have one fstat (bpo-21679, gh-120754) + self.assertEqual(count_similarname('fstat'), 1) + + + # "open, read, close" file using different common patterns. + check_readall( + "open builtin with default options", + f""" + f = open('{TESTFN}') + f.read() + f.close() + """ ) - self.assertEqual(calls, ['read', 'read']) - # Readall in binary mode - calls = strace_helper.get_syscalls( + check_readall( + "open in binary mode", f""" f = open('{TESTFN}', 'rb') f.read() f.close() - """, - _strace_flags + """ ) - self.assertEqual(calls, readall_calls) - # Readall in text mode - calls = strace_helper.get_syscalls( + check_readall( + "open in text mode", f""" f = open('{TESTFN}', 'rt') f.read() f.close() - """, - _strace_flags + """ ) - self.assertEqual(calls, readall_calls) - # text and binary mode, but using pathlib - calls = strace_helper.get_syscalls( - """p.read_bytes()""", - _strace_flags, + check_readall( + "pathlib read_bytes", + "p.read_bytes()", prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" + ) - self.assertEqual(calls, readall_calls) - calls = strace_helper.get_syscalls( - """p.read_text()""", - _strace_flags, + check_readall( + "pathlib read_text", + "p.read_text()", prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""" ) - self.assertEqual(calls, readall_calls) + + # Focus on just `read()`. + calls = strace_helper.get_syscalls( + prelude=f"f = open('{TESTFN}')", + code="f.read()", + cleanup="f.close()", + strace_flags=_strace_flags + ) + # One to read all the bytes + # One to read the EOF and get a size 0 return. + self.assertEqual(calls.count("read"), 2) + class CAutoFileTests(AutoFileTests, unittest.TestCase): From cdf449aca56b70cf8e76cde49fd1eb48b3fc5fce Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 9 Jul 2024 20:15:12 -0700 Subject: [PATCH 17/19] raw_events -> event_bytes --- Lib/test/support/strace_helper.py | 8 ++++---- Lib/test/test_subprocess.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index 583a2786183bd5..ff7b25f4813974 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -30,16 +30,16 @@ class StraceResult: """The event messages generated by strace. This is very similar to the stderr strace produces with returncode marker section removed.""" - raw_events: bytes + event_bytes: bytes stdout: bytes stderr: bytes def events(self): - """Parse raw_events data into system calls for easier processing. + """Parse event_bytes data into system calls for easier processing. This assumes the program under inspection doesn't print any non-utf8 strings which would mix into the strace output.""" - decoded_events = self.raw_events.decode('utf-8') + decoded_events = self.event_bytes.decode('utf-8') matches = [ _syscall_regex.match(event) for event in decoded_events.splitlines() @@ -84,7 +84,7 @@ def _make_error(reason, details): return StraceResult( strace_returncode=-1, python_returncode=-1, - raw_events=f"error({reason},details={details}) = -1".encode('utf-8'), + event_bytes=f"error({reason},details={details}) = -1".encode('utf-8'), stdout=res.out if res else "", stderr=res.err if res else "") diff --git a/Lib/test/test_subprocess.py b/Lib/test/test_subprocess.py index ae9208a255b3d6..b430f97b1cf43a 100644 --- a/Lib/test/test_subprocess.py +++ b/Lib/test/test_subprocess.py @@ -3461,7 +3461,7 @@ def test_vfork_used_when_expected(self): strace_args ) # Match both vfork() and clone(..., flags=...|CLONE_VFORK|...) - self.assertRegex(vfork_result.raw_events, br"(?i)vfork") + self.assertRegex(vfork_result.event_bytes, br"(?i)vfork") # Do NOT check that fork() or other clones did not happen. # If the OS denys the vfork it'll fallback to plain fork(). @@ -3487,7 +3487,7 @@ def test_vfork_used_when_expected(self): strace_args ) # Ensure neither vfork() or clone(..., flags=...|CLONE_VFORK|...). - self.assertNotRegex(non_vfork_result.raw_events, br"(?i)vfork") + self.assertNotRegex(non_vfork_result.event_bytes, br"(?i)vfork") @unittest.skipUnless(mswindows, "Windows specific tests") From c44bca64510dc9af5601db5c69c59b84fba35ee3 Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Tue, 9 Jul 2024 20:37:56 -0700 Subject: [PATCH 18/19] Add bits I forgot to commit --- Lib/test/support/strace_helper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index ff7b25f4813974..b222799782b68a 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -118,7 +118,7 @@ def _make_error(reason, details): return StraceResult(strace_returncode=res.rc, python_returncode=python_returncode, - raw_events=output[0], + event_bytes=output[0], stdout=res.out, stderr=res.err) From 0c6ebe6007b7ab220d5bc5fccdf3b53f5da69aec Mon Sep 17 00:00:00 2001 From: Cody Maloney Date: Sat, 17 Aug 2024 21:28:58 -0700 Subject: [PATCH 19/19] Switch to functools.cache, simplifying the code --- Lib/test/support/strace_helper.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/Lib/test/support/strace_helper.py b/Lib/test/support/strace_helper.py index b222799782b68a..b881670b89751d 100644 --- a/Lib/test/support/strace_helper.py +++ b/Lib/test/support/strace_helper.py @@ -3,6 +3,7 @@ import textwrap import unittest from dataclasses import dataclass +from functools import cache from test import support from test.support.script_helper import run_python_until_end @@ -12,9 +13,6 @@ _returncode_regex = re.compile( br"\+\+\+ exited with (?P\d+) \+\+\+") -# Cached value of whether or not there is a compatible strace binary -_strace_working = None - @dataclass class StraceEvent: @@ -149,6 +147,8 @@ def get_syscalls(code, strace_flags, prelude="", cleanup=""): return [ev.syscall for ev in events] +# Moderately expensive (spawns a subprocess), so share results when possible. +@cache def _can_strace(): res = strace_python("import sys; sys.exit(0)", [], check=False) assert res.events(), "Should have parsed multiple calls" @@ -157,16 +157,10 @@ def _can_strace(): def requires_strace(): - global _strace_working - if sys.platform != "linux": return unittest.skip("Linux only, requires strace.") - # Moderately expensive (spawns a subprocess), so share results when possible. - if _strace_working is None: - _strace_working = _can_strace() - assert isinstance(_strace_working, bool), "Should have been set by here" - return unittest.skipUnless(_strace_working, "Requires working strace") + return unittest.skipUnless(_can_strace(), "Requires working strace") __all__ = ["requires_strace", "strace_python", "StraceEvent", "StraceResult"]