8000 [3.13] gh-112346: Always set OS byte to 255, simpler gzip.compress fu… · python/cpython@a19bb26 · GitHub
[go: up one dir, main page]

Skip to content

Commit a19bb26

Browse files
[3.13] gh-112346: Always set OS byte to 255, simpler gzip.compress function. (GH-120486) (#120563)
gh-112346: Always set OS byte to 255, simpler gzip.compress function. (GH-120486) This matches the output behavior in 3.10 and earlier; the optimization in 3.11 allowed the zlib library's "os" value to be filled in instead in the circumstance when mtime was 0. this keeps things consistent. (cherry picked from commit 08d09cf) Co-authored-by: Ruben Vorderman <r.h.p.vorderman@lumc.nl>
1 parent 3a9f438 commit a19bb26

File tree

4 files changed

+26
-34
lines changed

4 files changed

+26
-34
lines changed

Doc/library/gzip.rst

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,7 @@ The module defines the following items:
188188

189189
Compress the *data*, returning a :class:`bytes` object containing
190190
the compressed data. *compresslevel* and *mtime* have the same meaning as in
191-
the :class:`GzipFile` constructor above. When *mtime* is set to ``0``, this
192-
function is equivalent to :func:`zlib.compress` with *wbits* set to ``31``.
193-
The zlib function is faster.
191+
the :class:`GzipFile` constructor above.
194192

195193
.. versionadded:: 3.2
196194
.. versionchanged:: 3.8
@@ -200,6 +198,10 @@ The module defines the following items:
200198
streamed fashion. Calls with *mtime* set to ``0`` are delegated to
201199
:func:`zlib.compress` for better speed.
202200

201+
.. versionchanged:: 3.13
202+
The gzip header OS byte is guaranteed to be set to 255 when this function
203+
is used as was the case in 3.10 and earlier.
204+
203205
.. function:: decompress(data)
204206

205207
Decompress the *data*, returning a :class:`bytes` object containing the

Lib/gzip.py

Lines changed: 8 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -580,43 +580,21 @@ def _rewind(self):
580580
self._new_member = True
581581

582582

583-
def _create_simple_gzip_header(compresslevel: int,
584-
mtime = None) -> bytes:
585-
"""
586-
Write a simple gzip header with no extra fields.
587-
:param compresslevel: Compresslevel used to determine the xfl bytes.
588-
:param mtime: The mtime (must support conversion to a 32-bit integer).
589-
:return: A bytes object representing the gzip header.
590-
"""
591-
if mtime is None:
592-
mtime = time.time()
593-
if compresslevel == _COMPRESS_LEVEL_BEST:
594-
xfl = 2
595-
elif compresslevel == _COMPRESS_LEVEL_FAST:
596-
xfl = 4
597-
else:
598-
xfl = 0
599-
# Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra
600-
# fields added to header), mtime, xfl and os (255 for unknown OS).
601-
return struct.pack("<BBBBLBB", 0x1f, 0x8b, 8, 0, int(mtime), xfl, 255)
602-
603-
604583
def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):
605584
"""Compress data in one shot and return the compressed string.
606585
607586
compresslevel sets the compression level in range of 0-9.
608587
mtime can be used to set the modification time. The modification time is
609588
set to the current time by default.
610589
"""
611-
if mtime == 0:
612-
# Use zlib as it creates the header with 0 mtime by default.
613-
# This is faster and with less overhead.
614-
return zlib.compress(data, level=compresslevel, wbits=31)
615-
header = _create_simple_gzip_header(compresslevel, mtime)
616-
trailer = struct.pack("<LL", zlib.crc32(data), (len(data) & 0xffffffff))
617-
# Wbits=-15 creates a raw deflate block.
618-
return (header + zlib.compress(data, level=compresslevel, wbits=-15) +
619-
trailer)
590+
# Wbits=31 automatically includes a gzip header and trailer.
591+
gzip_data = zlib.compress(data, level=compresslevel, wbits=31)
592+
if mtime is None:
593+
mtime = time.time()
594+
# Reuse gzip header created by zlib, replace mtime and OS byte for
595+
# consistency.
596+
header = struct.pack("<4sLBB", gzip_data, int(mtime), gzip_data[8], 255)
597+
return header + gzip_data[10:]
620598

621599

622600
def decompress(data):

Lib/test/test_gzip.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -714,14 +714,24 @@ def test_compress_mtime(self):
714714
self.assertEqual(f.mtime, mtime)
715715

716716
def test_compress_correct_level(self):
717-
# gzip.compress calls with mtime == 0 take a different code path.
718717
for mtime in (0, 42):
719718
with self.subTest(mtime=mtime):
720719
nocompress = gzip.compress(data1, compresslevel=0, mtime=mtime)
721720
yescompress = gzip.compress(data1, compresslevel=1, mtime=mtime)
722721
self.assertIn(data1, nocompress)
723722
self.assertNotIn(data1, yescompress)
724723

724+
def test_issue112346(self):
725+
# The OS byte should be 255, this should not change between Python versions.
726+
for mtime in (0, 42):
727+
with self.subTest(mtime=mtime):
728+
compress = gzip.compress(data1, compresslevel=1, mtime=mtime)
729+
self.assertEqual(
730+
struct.unpack("<IxB", compress[4:10]),
731+
(mtime, 255),
732+
"Gzip header does not properly set either mtime or OS byte."
733+
)
734+
725735
def test_decompress(self):
726736
for data in (data1, data2):
727737
buf = io.BytesIO()
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
The OS byte in gzip headers is now always set to 255 when using
2+
:func:`gzip.compress`.

0 commit comments

Comments
 (0)
0