diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 62d22150f50da5..2ea47978ff6f1c 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -1231,6 +1231,8 @@ def _proc_pax(self, tarfile): length, keyword = match.groups() length = int(length) + if length == 0: + raise InvalidHeaderError("invalid header") value = buf[match.end(2) + 1:match.start(1) + length - 1] # Normally, we could just use "utf-8" as the encoding and "strict" diff --git a/Lib/test/recursion.tar b/Lib/test/recursion.tar new file mode 100644 index 00000000000000..b8237251964983 Binary files /dev/null and b/Lib/test/recursion.tar differ diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index 4cd7d5370f58d6..573be812eaa179 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -395,6 +395,13 @@ def test_premature_end_of_archive(self): with self.assertRaisesRegex(tarfile.ReadError, "unexpected end of data"): tar.extractfile(t).read() + def test_length_zero_header(self): + # bpo-39017 (CVE-2019-20907): reading a zero-length header should fail + # with an exception + with self.assertRaisesRegex(tarfile.ReadError, "file could not be opened successfully"): + with tarfile.open(support.findfile('recursion.tar')) as tar: + pass + class MiscReadTestBase(CommonReadTest): def requires_name_attribute(self): pass diff --git a/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst b/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst new file mode 100644 index 00000000000000..ad26676f8b8563 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2020-07-12-22-16-58.bpo-39017.x3Cg-9.rst @@ -0,0 +1 @@ +Avoid infinite loop when reading specially crafted TAR files using the tarfile module (CVE-2019-20907).