8000 gh-120754: Add a strace helper and test set of syscalls for open().read(), Take 2 by cmaloney · Pull Request #123413 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-120754: Add a strace helper and test set of syscalls for open().read(), Take 2 #123413

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Nov 3, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Adjustments after revert
NOTE: This needs a full buildbot test pass before merge, see: #121143 (comment).

1. Added `statx` to set of allowed syscall forms (Should make Raspian bot pass).
2. Check that the `fd` returned from `open` is passed to all future calls. This helps ensure things like the `stat` call uses the file descriptor rather than the `filename` to avoid TOCTOU isuses.
3. Update the `Path().read_bytes()` test case to additionally validate the reduction in`isatty`/`ioctl` + `seek` calls from #122111
4. Better diagnostic assertion messagess from @gpshead, so when the test fails have first information immediately available. Makes remote CI debugging much simpler.
  • Loading branch information
cmaloney committed Aug 28, 2024
commit d3ecdbaabc56bfe020459f447e7d2f0146185a14
4 changes: 2 additions & 2 deletions Lib/test/support/strace_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def _make_error(reason, details):
stderr=res.err)


def _get_events(code, strace_flags, prelude, cleanup):
def get_events(code, strace_flags, prelude, cleanup):
# NOTE: The flush is currently required to prevent the prints from getting
# buffered and done all at once at exit
prelude = textwrap.dedent(prelude)
Expand All @@ -143,7 +143,7 @@ def _get_events(code, strace_flags, prelude, cleanup):

def get_syscalls(code, strace_flags, prelude="", cleanup=""):
"""Get the syscalls which a given chunk of python code generates"""
events = _get_events(code, strace_flags, prelude=prelude, cleanup=cleanup)
events = get_events(code, strace_flags, prelude=prelude, cleanup=cleanup)
return [ev.syscall for ev in events]


Expand Down
78 changes: 63 additions & 15 deletions Lib/test/test_fileio.py
A62B
Original file line number Diff line number Diff line change
Expand Up @@ -376,25 +376,62 @@ def test_syscalls_read(self):
self.f.close()


def check_readall(name, code, prelude="", cleanup=""):
def check_readall(name, code, prelude="", cleanup="",
extra_checks=None):
with self.subTest(name=name):
syscalls = strace_helper.get_syscalls(code, _strace_flags,
syscalls = strace_helper.get_events(code, _strace_flags,
8000 prelude=prelude,
cleanup=cleanup)

# The first call should be an open that returns a
# file descriptor (fd). Afer that calls may vary. Once the file
# is opened, check calls refer to it by fd as the filename
# could be removed from the filesystem, renamed, etc. See:
# Time-of-check time-of-use (TOCTOU) software bug class.
#
# There are a number of related but distinct open system calls
# so not checking precise name here.
self.assertGreater(
len(syscalls),
1,
f"Should have had at least an open call|calls={syscalls}")
fd_str = syscalls[0].returncode

# All other calls should contain the fd in their argument set.
for ev in syscalls[1:]:
self.assertIn(
fd_str,
ev.args,
f"Looking for file descriptor in arguments|ev={ev}"
)

# There are a number of related syscalls used to implement
# behaviors in a libc (ex. fstat, newfstatat, open, openat).
# behaviors in a libc (ex. fstat, newfstatat, statx, open, openat).
# Allow any that use the same substring.
def count_similarname(name):
return len([sc for sc in syscalls if name in sc])

# Should open and close the file exactly once
self.assertEqual(count_similarname('open'), 1)
self.assertEqual(count_similarname('close'), 1)

# Should only have one fstat (bpo-21679, gh-120754)
self.assertEqual(count_similarname('fstat'), 1)

return len([ev for ev in syscalls if name in ev.syscall])

checks = [
# Should open and close the file exactly once
("open", 1),
("close", 1),
# Should only have one fstat (bpo-21679, gh-120754)
# note: It's important this uses a fd rather than filename,
# That is validated by the `fd` check above.
# note: fstat, newfstatat, and statx have all been observed
# here in the underlying C library implementations.
("stat", 1)
]

if extra_checks:
checks += extra_checks

for call, count in checks:
self.assertEqual(
count_similarname(call),
count,
msg=f"call={call}|count={count}|syscalls={syscalls}"
)

# "open, read, close" file using different common patterns.
check_readall(
Expand All @@ -421,14 +458,25 @@ def count_similarname(name):
f = open('{TESTFN}', 'rt')
f.read()
f.close()
"""
""",
# GH-122111: read_text uses BufferedIO which requires looking up
# position in file. `read_bytes` disables that buffering, checked
# next and avoid these calls.
extra_checks=[
("ioctl", 1),
("seek", 1)
]
)

check_readall(
"pathlib read_bytes",
"p.read_bytes()",
prelude=f"""from pathlib import Path; p = Path("{TESTFN}")"""

prelude=f"""from pathlib import Path; p = Path("{TESTFN}")""",
# GH-122111: Buffering is disabled so these calls are avoided.
extra_checks=[
("ioctl", 0),
("seek", 0)
]
)

check_readall(
Expand Down
Loading
0