From 608876cfb70be5315c279f40410b2544647237d9 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Mar 2021 07:40:53 +0100 Subject: [PATCH 01/22] Add test for zlib.compress wbits --- Lib/test/test_zlib.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index cb0610837baafc..c7ce4ad1547069 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -831,6 +831,12 @@ def test_wbits(self): dco = zlib.decompressobj(32 + 15) self.assertEqual(dco.decompress(gzip), HAMLET_SCENE) + for wbits in (-15, 15, 31): + self.assertEqual( + zlib.decompress( + zlib.compress(HAMLET_SCENE, wbits=wbits), wbits=wbits + ), HAMLET_SCENE) + def choose_lines(source, number, seed=None, generator=random): """Return a list of number lines randomly chosen from the source""" From 12fb5a545725355abf0a2b1868a4a23863d64790 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Mar 2021 08:07:36 +0100 Subject: [PATCH 02/22] Add wbits argument to zlib.compress --- Modules/clinic/zlibmodule.c.h | 30 ++++++++++++++++++++++++------ Modules/zlibmodule.c | 6 ++++-- 2 files changed, 28 insertions(+), 8 deletions(-) diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index 14e955db64e729..a4da381e65a5d5 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -17,18 +17,19 @@ PyDoc_STRVAR(zlib_compress__doc__, {"compress", (PyCFunction)(void(*)(void))zlib_compress, METH_FASTCALL|METH_KEYWORDS, zlib_compress__doc__}, static PyObject * -zlib_compress_impl(PyObject *module, Py_buffer *data, int level); +zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits); static PyObject * zlib_compress(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; - static const char * const _keywords[] = {"", "level", NULL}; + static const char * const _keywords[] = {"", "level", "wbits", NULL}; static _PyArg_Parser _parser = {NULL, _keywords, "compress", 0}; PyObject *argsbuf[2]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int level = Z_DEFAULT_COMPRESSION; + int wbits = MAX_WBITS; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); if (!args) { @@ -44,12 +45,29 @@ zlib_compress(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec if (!noptargs) { goto skip_optional_pos; } - level = _PyLong_AsInt(args[1]); - if (level == -1 && PyErr_Occurred()) { - goto exit; + if (args[1]) { + level = _PyLong_AsInt(args[1]); + if (level == -1 && PyErr_Occurred()) { + goto exit; + } + if (!--noptargs) { + goto skip_optional_pos; + } + } + { + int ival = -1; + PyObject *iobj = _PyNumber_Index(args[2]); + if (iobj != NULL) { + ival = _PyLong_AsInt(iobj); + Py_DECREF(iobj); + } + if (ival == -1 && PyErr_Occurred()) { + goto exit; + } + wbits = ival; } skip_optional_pos: - return_value = zlib_compress_impl(module, &data, level); + return_value = zlib_compress_impl(module, &data, level, wbits); exit: /* Cleanup for data */ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 3efb24a679caa2..374b019991b713 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -310,12 +310,14 @@ zlib.compress / level: int(c_default="Z_DEFAULT_COMPRESSION") = Z_DEFAULT_COMPRESSION Compression level, in 0-9 or -1. + wbits: int(c_default="MAX_WBITS") = MAX_WBITS + The window buffer size and container format. Returns a bytes object containing compressed data. [clinic start generated code]*/ static PyObject * -zlib_compress_impl(PyObject *module, Py_buffer *data, int level) +zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) /*[clinic end generated code: output=d80906d73f6294c8 input=638d54b6315dbed3]*/ { PyObject *RetVal; @@ -336,7 +338,7 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level) zst.zalloc = PyZlib_Malloc; zst.zfree = PyZlib_Free; zst.next_in = ibuf; - int err = deflateInit(&zst, level); + int err = deflateInit2(&zst, level, DEFLATED, wbits, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); switch (err) { case Z_OK: From 1b345105cee55dbbc9aedbd876f8f48fd67bbbcf Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Mar 2021 09:26:06 +0100 Subject: [PATCH 03/22] Use clinic to generate input --- Modules/clinic/zlibmodule.c.h | 26 ++++++++++---------------- Modules/zlibmodule.c | 2 +- 2 files changed, 11 insertions(+), 17 deletions(-) diff --git a/Modules/clinic/zlibmodule.c.h b/Modules/clinic/zlibmodule.c.h index a4da381e65a5d5..e2a5fccd36c54f 100644 --- a/Modules/clinic/zlibmodule.c.h +++ b/Modules/clinic/zlibmodule.c.h @@ -3,7 +3,7 @@ preserve [clinic start generated code]*/ PyDoc_STRVAR(zlib_compress__doc__, -"compress($module, data, /, level=Z_DEFAULT_COMPRESSION)\n" +"compress($module, data, /, level=Z_DEFAULT_COMPRESSION, wbits=MAX_WBITS)\n" "--\n" "\n" "Returns a bytes object containing compressed data.\n" @@ -11,7 +11,9 @@ PyDoc_STRVAR(zlib_compress__doc__, " data\n" " Binary data to be compressed.\n" " level\n" -" Compression level, in 0-9 or -1."); +" Compression level, in 0-9 or -1.\n" +" wbits\n" +" The window buffer size and container format."); #define ZLIB_COMPRESS_METHODDEF \ {"compress", (PyCFunction)(void(*)(void))zlib_compress, METH_FASTCALL|METH_KEYWORDS, zlib_compress__doc__}, @@ -25,13 +27,13 @@ zlib_compress(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec PyObject *return_value = NULL; static const char * const _keywords[] = {"", "level", "wbits", NULL}; static _PyArg_Parser _parser = {NULL, _keywords, "compress", 0}; - PyObject *argsbuf[2]; + PyObject *argsbuf[3]; Py_ssize_t noptargs = nargs + (kwnames ? PyTuple_GET_SIZE(kwnames) : 0) - 1; Py_buffer data = {NULL, NULL}; int level = Z_DEFAULT_COMPRESSION; int wbits = MAX_WBITS; - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 2, 0, argsbuf); + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 3, 0, argsbuf); if (!args) { goto exit; } @@ -54,17 +56,9 @@ zlib_compress(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec goto skip_optional_pos; } } - { - int ival = -1; - PyObject *iobj = _PyNumber_Index(args[2]); - if (iobj != NULL) { - ival = _PyLong_AsInt(iobj); - Py_DECREF(iobj); - } - if (ival == -1 && PyErr_Occurred()) { - goto exit; - } - wbits = ival; + wbits = _PyLong_AsInt(args[2]); + if (wbits == -1 && PyErr_Occurred()) { + goto exit; } skip_optional_pos: return_value = zlib_compress_impl(module, &data, level, wbits); @@ -821,4 +815,4 @@ zlib_crc32(PyObject *module, PyObject *const *args, Py_ssize_t nargs) #ifndef ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #define ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF #endif /* !defined(ZLIB_DECOMPRESS___DEEPCOPY___METHODDEF) */ -/*[clinic end generated code: output=6736bae59fab268b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=e3e8a6142ea045a7 input=a9049054013a1b77]*/ diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index 374b019991b713..b52c5e4a3491ca 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -318,7 +318,7 @@ Returns a bytes object containing compressed data. static PyObject * zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) -/*[clinic end generated code: output=d80906d73f6294c8 input=638d54b6315dbed3]*/ +/*[clinic end generated code: output=46bd152fadd66df2 input=c4d06ee5782a7e3f]*/ { PyObject *RetVal; int flush; From 4e90311b315c91fa810b4a8b4af25ed5d7d6e759 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Mar 2021 09:35:07 +0100 Subject: [PATCH 04/22] Update documentation for zlib --- Doc/library/zlib.rst | 48 ++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 19 deletions(-) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index ec60ea24db6627..1d2fa1bb34e9f9 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -47,19 +47,43 @@ The available exception and functions in this module are: platforms, use ``adler32(data) & 0xffffffff``. -.. function:: compress(data, /, level=-1) +.. function:: compress(data, /, level=-1, wbits=MAX_WBITS) Compresses the bytes in *data*, returning a bytes object containing compressed data. - *level* is an integer from ``0`` to ``9`` or ``-1`` controlling the level of compression; + + *level* is an integer from ``0`` to ``9`` or ``-1`` controlling the level of compression; ``1`` (Z_BEST_SPEED) is fastest and produces the least compression, ``9`` (Z_BEST_COMPRESSION) is slowest and produces the most. ``0`` (Z_NO_COMPRESSION) is no compression. The default value is ``-1`` (Z_DEFAULT_COMPRESSION). Z_DEFAULT_COMPRESSION represents a default compromise between speed and compression (currently equivalent to level 6). + + .. _compress-wbits: + + The *wbits* argument controls the size of the history buffer (or the + "window size") used when compressing data, and whether a header and + trailer is included in the output. It can take several ranges of values, + defaulting to ``15`` (MAX_WBITS): + + * +9 to +15: The base-two logarithm of the window size, which + therefore ranges between 512 and 32768. Larger values produce + better compression at the expense of greater memory usage. The + resulting output will include a zlib-specific header and trailer. + + * −9 to −15: Uses the absolute value of *wbits* as the + window size logarithm, while producing a raw output stream with no + header or trailing checksum. + + * +25 to +31 = 16 + (9 to 15): Uses the low 4 bits of the value as the + window size logarithm, while including a basic :program:`gzip` header + and trailing checksum in the output. + Raises the :exc:`error` exception if any error occurs. .. versionchanged:: 3.6 *level* can now be used as a keyword parameter. + .. versionchanged:: 3.10 + *wbits* parameter added. .. function:: compressobj(level=-1, method=DEFLATED, wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=Z_DEFAULT_STRATEGY[, zdict]) @@ -76,23 +100,9 @@ The available exception and functions in this module are: *method* is the compression algorithm. Currently, the only supported value is :const:`DEFLATED`. - The *wbits* argument controls the size of the history buffer (or the - "window size") used when compressing data, and whether a header and - trailer is included in the output. It can take several ranges of values, - defaulting to ``15`` (MAX_WBITS): - - * +9 to +15: The base-two logarithm of the window size, which - therefore ranges between 512 and 32768. Larger values produce - better compression at the expense of greater memory usage. The - resulting output will include a zlib-specific header and trailer. - - * −9 to −15: Uses the absolute value of *wbits* as the - window size logarithm, while producing a raw output stream with no - header or trailing checksum. - - * +25 to +31 = 16 + (9 to 15): Uses the low 4 bits of the value as the - window size logarithm, while including a basic :program:`gzip` header - and trailing checksum in the output. + The *wbits* parameter controls the size of the history buffer (or the + "window size"), and what header and trailer format will be used It has + the same meaning as `described for compress() <#compress-wbits>`__. The *memLevel* argument controls the amount of memory used for the internal compression state. Valid values range from ``1`` to ``9``. From 971725222fe9156494c85776cf3a0cb338c3d7e8 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Mar 2021 09:41:09 +0100 Subject: [PATCH 05/22] Add blurb news entry for zlib.compress wbits parameter --- .../next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst diff --git a/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst b/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst new file mode 100644 index 00000000000000..fb8878e1d302cb --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst @@ -0,0 +1,5 @@ +``zlib.compress`` now accepts a wbits parameter which allows users to +compress data as a raw deflate block without zlib headers and trailers in +one go. Previously this required instantiating a ``zlib.compressobj``. It +also provides a faster alternative to ``gzip.compress`` when wbits=31 is +used. From d70390ebbf71e063291846d95570e05d85d05a2a Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 24 Mar 2021 09:58:40 +0100 Subject: [PATCH 06/22] Fix doc typo --- Doc/library/zlib.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index 1d2fa1bb34e9f9..c0a139b9941365 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -51,7 +51,7 @@ The available exception and functions in this module are: Compresses the bytes in *data*, returning a bytes object containing compressed data. - *level* is an integer from ``0`` to ``9`` or ``-1`` controlling the level of compression; + *level* is an integer from ``0`` to ``9`` or ``-1`` controlling the level of compression; ``1`` (Z_BEST_SPEED) is fastest and produces the least compression, ``9`` (Z_BEST_COMPRESSION) is slowest and produces the most. ``0`` (Z_NO_COMPRESSION) is no compression. The default value is ``-1`` (Z_DEFAULT_COMPRESSION). Z_DEFAULT_COMPRESSION represents a default From c819e3ef61e0f43223b08514240c95a8879e2dc0 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 1 Jun 2021 08:19:32 +0200 Subject: [PATCH 07/22] Remove unnecessary whitespace, add punctionation and complete sentences. --- Doc/library/zlib.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Doc/library/zlib.rst b/Doc/library/zlib.rst index c0a139b9941365..793c90f3c4e7a4 100644 --- a/Doc/library/zlib.rst +++ b/Doc/library/zlib.rst @@ -50,7 +50,6 @@ The available exception and functions in this module are: .. function:: compress(data, /, level=-1, wbits=MAX_WBITS) Compresses the bytes in *data*, returning a bytes object containing compressed data. - *level* is an integer from ``0`` to ``9`` or ``-1`` controlling the level of compression; ``1`` (Z_BEST_SPEED) is fastest and produces the least compression, ``9`` (Z_BEST_COMPRESSION) is slowest and produces the most. ``0`` (Z_NO_COMPRESSION) is no compression. @@ -82,8 +81,9 @@ The available exception and functions in this module are: .. versionchanged:: 3.6 *level* can now be used as a keyword parameter. - .. versionchanged:: 3.10 - *wbits* parameter added. + .. versionchanged:: 3.11 + The *wbits* parameter is now available to set window bits and + compression type. .. function:: compressobj(level=-1, method=DEFLATED, wbits=MAX_WBITS, memLevel=DEF_MEM_LEVEL, strategy=Z_DEFAULT_STRATEGY[, zdict]) @@ -101,7 +101,7 @@ The available exception and functions in this module are: :const:`DEFLATED`. The *wbits* parameter controls the size of the history buffer (or the - "window size"), and what header and trailer format will be used It has + "window size"), and what header and trailer format will be used. It has the same meaning as `described for compress() <#compress-wbits>`__. The *memLevel* argument controls the amount of memory used for the From 1f3481f60bcbd8673d96b6d6f822677c44aa4474 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 1 Jun 2021 08:22:32 +0200 Subject: [PATCH 08/22] Break line to comply with PEP-7 --- Modules/zlibmodule.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c index b52c5e4a3491ca..27a6d9a9366bf7 100644 --- a/Modules/zlibmodule.c +++ b/Modules/zlibmodule.c @@ -338,7 +338,8 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level, int wbits) zst.zalloc = PyZlib_Malloc; zst.zfree = PyZlib_Free; zst.next_in = ibuf; - int err = deflateInit2(&zst, level, DEFLATED, wbits, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY); + int err = deflateInit2(&zst, level, DEFLATED, wbits, DEF_MEM_LEVEL, + Z_DEFAULT_STRATEGY); switch (err) { case Z_OK: From 8019932e5f0e77272b0900b330c131f17f29fd58 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 1 Jun 2021 08:30:01 +0200 Subject: [PATCH 09/22] Update blurb to include :func: reference --- .../next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst b/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst index fb8878e1d302cb..3f0063962d6ce4 100644 --- a/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst +++ b/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst @@ -1,4 +1,4 @@ -``zlib.compress`` now accepts a wbits parameter which allows users to +:func:``zlib.compress`` now accepts a wbits parameter which allows users to compress data as a raw deflate block without zlib headers and trailers in one go. Previously this required instantiating a ``zlib.compressobj``. It also provides a faster alternative to ``gzip.compress`` when wbits=31 is From 0ea98cfde71873109b9c99e2e65392d145329baa Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 1 Jun 2021 09:10:55 +0200 Subject: [PATCH 10/22] Remove erroneous double backticks --- .../next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst b/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst index 3f0063962d6ce4..e6fc88f45eea5e 100644 --- a/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst +++ b/Misc/NEWS.d/next/Library/2021-03-24-09-40-02.bpo-43612.vMGZ4y.rst @@ -1,4 +1,4 @@ -:func:``zlib.compress`` now accepts a wbits parameter which allows users to +:func:`zlib.compress` now accepts a wbits parameter which allows users to compress data as a raw deflate block without zlib headers and trailers in one go. Previously this required instantiating a ``zlib.compressobj``. It also provides a faster alternative to ``gzip.compress`` when wbits=31 is From d1c86dcf1dde1fb8442c93d999d2058c0cc7a4b1 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Tue, 24 Aug 2021 10:29:41 +0200 Subject: [PATCH 11/22] Faster gzip.compress implementation --- Lib/gzip.py | 39 ++++++++++++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 5 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 3d837b744800ed..c827e89123447b 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -544,14 +544,43 @@ def _rewind(self): super()._rewind() self._new_member = True -def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): + +def _create_simple_gzip_header(compresslevel: int, + mtime = None) -> bytes: + """ + Write a simple gzip header with no extra fields. + :param compresslevel: Compresslevel used to determine the xfl bytes. + :param mtime: The mtime (must support conversion to a 32-bit integer). + :return: A bytes object representing the gzip header. + """ + if mtime is None: + mtime = time.time() + if compresslevel == _COMPRESS_LEVEL_BEST: + xfl = 2 + elif compresslevel == _COMPRESS_LEVEL_FAST: + xfl = 4 + else: + xfl = 0 + # Pack ID1 and ID2 magic bytes, method (8=deflate), header flags (no extra + # fields added to header), mtime, xfl and os (255 for unknown OS). + return struct.pack(" Date: Tue, 24 Aug 2021 11:02:16 +0200 Subject: [PATCH 12/22] More efficiently decompress gzip files in memory --- Lib/gzip.py | 121 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 48 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index c827e89123447b..8436c3a7f7c801 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -403,6 +403,58 @@ def __iter__(self): return self._buffer.__iter__() +def _read_exact(fp, n): + '''Read exactly *n* bytes from `fp` + + This method is required because fp may be unbuffered, + i.e. return short reads. + ''' + data = fp.read(n) + while len(data) < n: + b = self._fp.read(n - len(data)) + if not b: + raise EOFError("Compressed file ended before the " + "end-of-stream marker was reached") + data += b + return data + + +def _read_gzip_header(fp): + '''Read a gzip header from a filestream and progresses the stream to + the end of the header. Returns last mtime if header was present or None + if no header was present''' + magic = fp.read(2) + if magic == b'': + return None + + if magic != b'\037\213': + raise BadGzipFile('Not a gzipped file (%r)' % magic) + + (method, flag, last_mtime) = struct.unpack(" Date: Tue, 24 Aug 2021 11:02:28 +0200 Subject: [PATCH 13/22] Ensure correct endianness --- Lib/gzip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 8436c3a7f7c801..b9e9bc3f4f894b 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -587,7 +587,7 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=0): # This is faster and with less overhead. return zlib.compress(data, level=compresslevel, wbits=31) header = _create_simple_gzip_header(compresslevel, mtime) - trailer = struct.pack("LL", zlib.crc32(data), (len(data) & 0xffffffff)) + trailer = struct.pack(" Date: Tue, 24 Aug 2021 11:06:19 +0200 Subject: [PATCH 14/22] Remove redundant line --- Lib/gzip.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index b9e9bc3f4f894b..3e902a685ebf94 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -605,9 +605,8 @@ def decompress(data): do = zlib.decompressobj(wbits=-zlib.MAX_WBITS) # Read all the data except the header decompressed = do.decompress(data[fp.tell():]) - checksum, length = struct.unpack(" Date: Tue, 24 Aug 2021 11:37:36 +0200 Subject: [PATCH 15/22] Fix typos and test errors --- Lib/gzip.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 3e902a685ebf94..be13a8f514245d 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -411,7 +411,7 @@ def _read_exact(fp, n): ''' data = fp.read(n) while len(data) < n: - b = self._fp.read(n - len(data)) + b = fp.read(n - len(data)) if not b: raise EOFError("Compressed file ended before the " "end-of-stream marker was reached") @@ -436,22 +436,22 @@ def _read_gzip_header(fp): if flag & FEXTRA: # Read & discard the extra field, if present - extra_len, = struct.unpack(" Date: Wed, 25 Aug 2021 10:25:09 +0200 Subject: [PATCH 16/22] Revert changing default on compress for backwards compatibility --- Lib/gzip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index be13a8f514245d..10c9abce552f2d 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -576,7 +576,7 @@ def _create_simple_gzip_header(compresslevel: int, return struct.pack(" Date: Wed, 25 Aug 2021 10:51:29 +0200 Subject: [PATCH 17/22] Update documentation with gzip speed improvements --- Doc/library/gzip.rst | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/Doc/library/gzip.rst b/Doc/library/gzip.rst index 33c40676f747c5..8cea2649ee6cb6 100644 --- a/Doc/library/gzip.rst +++ b/Doc/library/gzip.rst @@ -174,19 +174,30 @@ The module defines the following items: Compress the *data*, returning a :class:`bytes` object containing the compressed data. *compresslevel* and *mtime* have the same meaning as in - the :class:`GzipFile` constructor above. + the :class:`GzipFile` constructor above. When *mtime* is set to ``0``, this + function is equivalent to :func:`zlib.compress` with *wbits* set to ``31``. + The zlib function is faster. .. versionadded:: 3.2 .. versionchanged:: 3.8 Added the *mtime* parameter for reproducible output. + .. versionchanged:: 3.11 + Speed is improved by compressing all data at once instead of in a + streamed fashion. Calls with *mtime* set to ``0`` are delegated to + :func:`zlib.compress` for better speed. .. function:: decompress(data) Decompress the *data*, returning a :class:`bytes` object containing the - uncompressed data. + uncompressed data. This function is capable of decompressing multi-member + gzip data (multiple gzip blocks concatenated together). When the data is + certain to contain only one member the :func:`zlib.decompress` function with + *wbits* set to 31 is faster. .. versionadded:: 3.2 - + .. versionchanged:: 3.11 + Speed is improved by decompressing members at once in memory instead of in + a streamed fashion. .. _gzip-usage-examples: From 77f79fda6e4160dddf62b85df8df2dddac82e930 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 25 Aug 2021 10:51:47 +0200 Subject: [PATCH 18/22] Add a blurb for gzip speed improvements --- .../next/Library/2021-08-25-10-28-49.bpo-43613.WkYmI0.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2021-08-25-10-28-49.bpo-43613.WkYmI0.rst diff --git a/Misc/NEWS.d/next/Library/2021-08-25-10-28-49.bpo-43613.WkYmI0.rst b/Misc/NEWS.d/next/Library/2021-08-25-10-28-49.bpo-43613.WkYmI0.rst new file mode 100644 index 00000000000000..d6af35c12b3c70 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2021-08-25-10-28-49.bpo-43613.WkYmI0.rst @@ -0,0 +1,3 @@ +Improve the speed of :func:`gzip.compress` and :func:`gzip.decompress` by +compressing and decompressing at once in memory instead of in a streamed +fashion. From ca3e5432d3d8a31191c0df87c8aa37c312ba4999 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Wed, 25 Aug 2021 11:33:03 +0200 Subject: [PATCH 19/22] Use + instead of bytes.join() method --- Lib/gzip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 10c9abce552f2d..2e296a1c21bd8b 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -589,7 +589,7 @@ def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): header = _create_simple_gzip_header(compresslevel, mtime) trailer = struct.pack(" Date: Mon, 30 Aug 2021 14:23:34 +0200 Subject: [PATCH 20/22] Reword docstring for read_gzip_header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa --- Lib/gzip.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index 2e296a1c21bd8b..c08a4864f82b46 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -420,9 +420,10 @@ def _read_exact(fp, n): def _read_gzip_header(fp): - '''Read a gzip header from a filestream and progresses the stream to - the end of the header. Returns last mtime if header was present or None - if no header was present''' + '''Read a gzip header from `fp` and progress to the end of the header. + + Returns last mtime if header was present or None otherwise. + ''' magic = fp.read(2) if magic == b'': return None From 97a8100b81d0bd8fa7ee2b58e41d18764f6a4438 Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 30 Aug 2021 14:26:07 +0200 Subject: [PATCH 21/22] Update docstring for gzip.compress --- Lib/gzip.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/gzip.py b/Lib/gzip.py index c08a4864f82b46..0dddb51553fabd 100644 --- a/Lib/gzip.py +++ b/Lib/gzip.py @@ -421,7 +421,7 @@ def _read_exact(fp, n): def _read_gzip_header(fp): '''Read a gzip header from `fp` and progress to the end of the header. - + Returns last mtime if header was present or None otherwise. ''' magic = fp.read(2) @@ -579,9 +579,10 @@ def _create_simple_gzip_header(compresslevel: int, def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None): """Compress data in one shot and return the compressed string. - Optional argument is the compression level, in range of 0-9. - mtime can be used to set the modification time. 0 is default, - use 'None' for current time. + + compresslevel sets the compression level in range of 0-9. + mtime can be used to set the modification time. The modification time is + set to the current time by default. """ if mtime == 0: # Use zlib as it creates the header with 0 mtime by default. From eeb7766c01f34c3326b23d0d67c162324889566e Mon Sep 17 00:00:00 2001 From: Ruben Vorderman Date: Mon, 30 Aug 2021 14:29:13 +0200 Subject: [PATCH 22/22] Use subtest for zlib.compress/decompress test. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Łukasz Langa --- Lib/test/test_zlib.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/test/test_zlib.py b/Lib/test/test_zlib.py index c7ce4ad1547069..04fb4d93de45e1 100644 --- a/Lib/test/test_zlib.py +++ b/Lib/test/test_zlib.py @@ -832,11 +832,12 @@ def test_wbits(self): self.assertEqual(dco.decompress(gzip), HAMLET_SCENE) for wbits in (-15, 15, 31): - self.assertEqual( - zlib.decompress( + with self.subTest(wbits=wbits): + expected = HAMLET_SCENE + actual = zlib.decompress( zlib.compress(HAMLET_SCENE, wbits=wbits), wbits=wbits - ), HAMLET_SCENE) - + ) + self.assertEqual(expected, actual) def choose_lines(source, number, seed=None, generator=random): """Return a list of number lines randomly chosen from the source"""