8000 gh-95534: Improve gzip reading speed by 10% by rhpvorderman · Pull Request #97664 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-95534: Improve gzip reading speed by 10% #97664

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 33 commits into from
Oct 17, 2022
Merged
Changes from 1 commit
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
1e13a89
Add code from python-isal project
rhpvorderman Sep 28, 2022
6a5cdfd
Reorder code
rhpvorderman Sep 28, 2022
809ad5f
Add ZlibDecompressor
rhpvorderman Sep 28, 2022
03254b8
Add zlibdecompressor object
rhpvorderman Sep 28, 2022
669848a
Fix compile warnings
rhpvorderman Sep 28, 2022
69ff613
Do not use class input
rhpvorderman Sep 28, 2022
6fa43ae
Fix lock stuff
rhpvorderman Sep 28, 2022
cdc5972
Fix incorrect error handling
rhpvorderman Sep 28, 2022
7820627
Rework _GzipReader to be more efficient
rhpvorderman Sep 28, 2022
6f8b64a
Properly initialize zstate
rhpvorderman Sep 30, 2022
3e2a4f5
Add blurb for increased gzip read speed
rhpvorderman Sep 30, 2022
070df1c
Make sure self->initialised is set to 0. Reword some comments.
rhpvorderman Sep 30, 2022
70b7d4d
Add appropriate doctype in blurb
rhpvorderman Sep 30, 2022
22d3893
Merge branch 'main' into gh-95534
rhpvorderman Sep 30, 2022
18a7692
Add missing NULL member to ZlibDecompressor_Members
rhpvorderman Sep 30, 2022
d54c8b5
Merge branch 'gh-95534' of github.com:rhpvorderman/cpython into gh-95534
rhpvorderman Sep 30, 2022
c90096f
Remove double comment
rhpvorderman Sep 30, 2022
1c15839
Use READ_BUFFER_SIZE in python -m gzip command line application
rhpvorderman Sep 30, 2022
d0ff4f0
Fix error in news entry
rhpvorderman Sep 30, 2022
afd92ab
minor edit, use +=
gpshead Sep 30, 2022
922ac5c
Throw compile warning on zlib versions that are too old
rhpvorderman Oct 2, 2022
dc7de61
Use bool instead of int
rhpvorderman Oct 2, 2022
ca12c1f
Correct spelling of insufficient
rhpvorderman Oct 2, 2022
1ce342b
Put brackets around if statement
rhpvorderman Oct 2, 2022
0b7735e
Remove strange default case
rhpvorderman Oct 2, 2022
043a376
Remove unnecessary zero op
rhpvorderman Oct 2, 2022
2a653a9
Change RetVal to return_value
rhpvorderman Oct 2, 2022
475aef6
Change char to bool
rhpvorderman Oct 2, 2022
41ba076
Properly bracketify if-else clause
rhpvorderman Oct 2, 2022
5f1901d
Prefix underscore to _ZlibDecompressor name
rhpvorderman Oct 2, 2022
c5d6888
Copy explanation about zdict from python docs into function docstring
rhpvorderman Oct 2, 2022
9d60339
Merge branch 'gh-95534' of github.com:rhpvorderman/cpython into gh-95534
rhpvorderman Oct 2, 2022
e3da415
Add tests for _ZlibDecompressor
rhpvorderman Oct 3, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix compile warnings
  • Loading branch information
rhpvorderman committed Sep 28, 2022
commit 669848a6b0fe6fe52254f2001c45dd88f2b84a0f
46 changes: 29 additions & 17 deletions Modules/zlibmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,6 @@ typedef struct {
PyObject *zdict;
PyThread_type_lock lock;
PyObject *unused_data;
PyObject *zdict;
uint8_t *input_buffer;
Py_ssize_t input_buffer_size;
/* zst>avail_in is only 32 bit, so we store the true length
Expand All @@ -1372,11 +1371,9 @@ typedef struct {
} ZlibDecompressor;

/*[clinic input]
module zlib
class zlib.Compress "compobject *" "&Comptype"
class zlib.ZlibDecompressor "ZlibDecompressor *" "&ZlibDecompressorType"
[clinic start generated code]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=fc826e280aec6432]*/
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=0658178ab94645df]*/

static void
ZlibDecompressor_dealloc(ZlibDecompressor *self)
Expand All @@ -1386,19 +1383,35 @@ ZlibDecompressor_dealloc(ZlibDecompressor *self)
if (self->is_initialised) {
inflateEnd(&self->zst);
}
Dealloc(self);
PyMem_Free(self->input_buffer);
Py_CLEAR(self->unused_data);
Py_CLEAR(self->zdict);
PyObject_Free(self);
Py_DECREF(type);
}

static inline void
arrange_input_buffer(uint32_t *avail_in, Py_ssize_t *remains)
static int
set_inflate_zdict_ZlibDecompressor(zlibstate *state, ZlibDecompressor *self)
{
*avail_in = (uint32_t)Py_MIN((size_t)*remains, UINT32_MAX);
*remains -= *avail_in;
Py_buffer zdict_buf;
if (PyObject_GetBuffer(self->zdict, &zdict_buf, PyBUF_SIMPLE) == -1) {
return -1;
}
if ((size_t)zdict_buf.len > UINT_MAX) {
PyErr_SetString(PyExc_OverflowError,
"zdict length does not fit in an unsigned int");
PyBuffer_Release(&zdict_buf);
return -1;
}
int err;
err = inflateSetDictionary(&self->zst,
zdict_buf.buf, (unsigned int)zdict_buf.len);
PyBuffer_Release(&zdict_buf);
if (err != Z_OK) {
zlib_error(state, self->zst, err, "while setting zdict");
return -1;
}
return 0;
}

static Py_ssize_t
Expand Down Expand Up @@ -1470,8 +1483,8 @@ decompress_buf(ZlibDecompressor *self, Py_ssize_t max_length)
Py_ssize_t hard_limit;
Py_ssize_t obuflen;
zlibstate *state = PyType_GetModuleState(Py_TYPE(self));

int err;
int err = Z_OK;

/* In Python 3.10 sometimes sys.maxsize is passed by default. In those cases
we do want to use DEF_BUF_SIZE as start buffer. */
Expand All @@ -1492,7 +1505,7 @@ decompress_buf(ZlibDecompressor *self, Py_ssize_t max_length)
}

do {
arrange_input_buffer(&(self->zst.avail_in), &(self->avail_in_real));
arrange_input_buffer(&(self->zst), &(self->avail_in_real));

do {
obuflen = arrange_output_buffer_with_maximum(&(self->zst.avail_out),
Expand Down Expand Up @@ -1620,7 +1633,7 @@ decompress(ZlibDecompressor *self, PyTypeObject *cls, uint8_t *data,

if (self->avail_in_real > 0) {
PyObject *unused_data = PyBytes_FromStringAndSize(
self->zst.next_in, self->avail_in_real);
(char *)self->zst.next_in, self->avail_in_real);
if (unused_data == NULL) {
goto error;
}
Expand Down Expand Up @@ -1738,7 +1751,6 @@ ZlibDecompressor__new__(PyTypeObject *cls,
return NULL;
}
ZlibDecompressor *self = PyObject_New(ZlibDecompressor, cls);
int err;
self->eof = 0;
self->needs_input = 1;
self->avail_in_real = 0;
Expand All @@ -1759,7 +1771,7 @@ ZlibDecompressor__new__(PyTypeObject *cls,
self->is_initialised = 1;
if (self->zdict != NULL && wbits < 0) {
#ifdef AT_LEAST_ZLIB_1_2_2_1
if (set_inflate_zdict(state, self) < 0) {
if (set_inflate_zdict_ZlibDecompressor(state, self) < 0) {
Py_DECREF(self);
return NULL;
}
Expand Down Expand Up @@ -1963,7 +1975,7 @@ static PyType_Slot ZlibDecompressor_type_slots[] = {
{Py_tp_dealloc, ZlibDecompressor_dealloc},
{Py_tp_members, ZlibDecompressor_members},
{Py_tp_new, ZlibDecompressor__new__},
{Py_tp_doc, ZlibDecompressor__new____doc__},
{Py_tp_doc, (char *)ZlibDecompressor__new____doc__},
{Py_tp_methods, ZlibDecompressor_methods},
{0, 0},
};
Expand Down Expand Up @@ -2056,7 +2068,7 @@ zlib_exec(PyObject *mod)
}
Py_INCREF(state->ZlibDecompressorType);
if (PyModule_AddObject(mod, "_ZlibDecompressor",
state->ZlibDecompressorType) < 0) {
(PyObject *)state->ZlibDecompressorType) < 0) {
Py_DECREF(state->ZlibDecompressorType);
return -1;
}
Expand Down
0