From 9f1b78b149874a3909ab21f306c670f71ddc7cf8 Mon Sep 17 00:00:00 2001 From: Sebastian Rittau Date: Mon, 21 Apr 2025 23:15:05 +0200 Subject: [PATCH] gh-122179: Fix hashlib.file_digest and non-blocking I/O (GH-122183) * Fix hashlib.file_digest and non-blocking I/O * Add documentation around this behavior * Add versionchanged (cherry picked from commit 2b47f46d7dc30d27b2486991fea4acd83553294b) --- Doc/library/hashlib.rst | 9 ++++++++- Lib/hashlib.py | 2 ++ Lib/test/test_hashlib.py | 9 +++++++++ .../2024-07-23-17-08-41.gh-issue-122179.0jZm9h.rst | 3 +++ 4 files changed, 22 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2024-07-23-17-08-41.gh-issue-122179.0jZm9h.rst diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst index 7bf6152311f058..ff15a08a792ed2 100644 --- a/Doc/library/hashlib.rst +++ b/Doc/library/hashlib.rst @@ -270,7 +270,10 @@ a file or file-like object. *fileobj* must be a file-like object opened for reading in binary mode. It accepts file objects from builtin :func:`open`, :class:`~io.BytesIO` instances, SocketIO objects from :meth:`socket.socket.makefile`, and - similar. The function may bypass Python's I/O and use the file descriptor + similar. *fileobj* must be opened in blocking mode, otherwise a + :exc:`BlockingIOError` may be raised. + + The function may bypass Python's I/O and use the file descriptor from :meth:`~io.IOBase.fileno` directly. *fileobj* must be assumed to be in an unknown state after this function returns or raises. It is up to the caller to close *fileobj*. @@ -299,6 +302,10 @@ a file or file-like object. .. versionadded:: 3.11 + .. versionchanged:: next + Now raises a :exc:`BlockingIOError` if the file is opened in blocking + mode. Previously, spurious null bytes were added to the digest. + Key derivation -------------- diff --git a/Lib/hashlib.py b/Lib/hashlib.py index 1b16441cb60ba7..296210e5d30d1d 100644 --- a/Lib/hashlib.py +++ b/Lib/hashlib.py @@ -231,6 +231,8 @@ def file_digest(fileobj, digest, /, *, _bufsize=2**18): view = memoryview(buf) while True: size = fileobj.readinto(buf) + if size is None: + raise BlockingIOError("I/O operation would block.") if size == 0: break # EOF digestobj.update(view[:size]) diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py index a3693f5b8934f7..48621f47af038b 100644 --- a/Lib/test/test_hashlib.py +++ b/Lib/test/test_hashlib.py @@ -1190,6 +1190,15 @@ def test_file_digest(self): with open(os_helper.TESTFN, "wb") as f: hashlib.file_digest(f, "sha256") + class NonBlocking: + def readinto(self, buf): + return None + def readable(self): + return True + + with self.assertRaises(BlockingIOError): + hashlib.file_digest(NonBlocking(), hashlib.sha256) + if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2024-07-23-17-08-41.gh-issue-122179.0jZm9h.rst b/Misc/NEWS.d/next/Library/2024-07-23-17-08-41.gh-issue-122179.0jZm9h.rst new file mode 100644 index 00000000000000..2b0678f31e8ef6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-07-23-17-08-41.gh-issue-122179.0jZm9h.rst @@ -0,0 +1,3 @@ +:func:`hashlib.file_digest` now raises :exc:`BlockingIOError` when no data +is available during non-blocking I/O. Before, it added spurious null bytes +to the digest.