8000 gh-132983: Remove leftovers from EndlessZstdDecompressor (#133856) · python/cpython@878e0fb · GitHub
[go: up one dir, main page]

Skip to content

Commit 878e0fb

Browse files
RogdhamAA-TurnerZeroIntensity
authored
gh-132983: Remove leftovers from EndlessZstdDecompressor (#133856)
Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Co-authored-by: Peter Bierma <zintensitydev@gmail.com>
1 parent 1a87b6e commit 878e0fb

File tree

1 file changed

+30
-150
lines changed

1 file changed

+30
-150
lines changed

Modules/_zstd/decompressor.c

Lines changed: 30 additions & 150 deletions
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,11 @@ typedef struct {
4343
PyObject *unused_data;
4444

4545
/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
46-
char needs_input;
47-
48-
/* For decompress(), 0 or 1.
49-
1 when both input and output streams are at a frame edge, means a
50-
frame is completely decoded and fully flushed, or the decompressor
51-
just be initialized. */
52-
char at_frame_edge;
46+
bool needs_input;
5347

5448
/* For ZstdDecompressor, 0 or 1.
5549
1 means the end of the first frame has been reached. */
56-
char eof;
57-
58-
/* Used for fast reset above three variables */
59-
char _unused_char_for_align;
50+
bool eof;
6051

6152
/* __init__ has been called, 0 or 1. */
6253
bool initialized;
@@ -258,19 +249,13 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
258249
return 0;
259250
}
260251

261-
typedef enum {
262-
TYPE_DECOMPRESSOR, // <D>, ZstdDecompressor class
263-
TYPE_ENDLESS_DECOMPRESSOR, // <E>, decompress() function
264-
} decompress_type;
265-
266252
/*
267-
Given the two types of decompressors (defined above),
268-
decompress implementation for <D>, <E>, pseudo code:
253+
Decompress implementation in pseudo code:
269254
270255
initialize_output_buffer
271256
while True:
272257
decompress_data
273-
set_object_flag # .eof for <D>, .at_frame_edge for <E>.
258+
set_object_flag # .eof
274259
275260
if output_buffer_exhausted:
276261
if output_buffer_reached_max_length:
@@ -287,63 +272,19 @@ typedef enum {
287272
flushing to do to complete current frame.
288273
289274
Note, decompressing "an empty input" in any case will make it > 0.
290-
291-
<E> supports multiple frames, has an .at_frame_edge flag, it means both the
292-
input and output streams are at a frame edge. The flag can be set by this
293-
statement:
294-
295-
.at_frame_edge = (zstd_ret == 0) ? 1 : 0
296-
297-
But if decompressing "an empty input" at "a frame edge", zstd_ret will be
298-
non-zero, then .at_frame_edge will be wrongly set to false. To solve this
299-
problem, two AFE checks are needed to ensure that: when at "a frame edge",
300-
empty input will not be decompressed.
301-
302-
// AFE check
303-
if (self->at_frame_edge && in->pos == in->size) {
304-
finish
305-
}
306-
307-
In <E>, if .at_frame_edge is eventually set to true, but input stream has
308-
unconsumed data (in->pos < in->size), then the outer function
309-
stream_decompress() will set .at_frame_edge to false. In this case,
310-
although the output stream is at a frame edge, for the caller, the input
311-
stream is not at a frame edge, see below diagram. This behavior does not
312-
affect the next AFE check, since (in->pos < in->size).
313-
314-
input stream: --------------|---
315-
^
316-
output stream: ====================|
317-
^
318275
*/
319276
static PyObject *
320277
decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
321-
Py_ssize_t max_length,
322-
Py_ssize_t initial_size,
323-
decompress_type type)
278+
Py_ssize_t max_length)
324279
{
325280
size_t zstd_ret;
326281
ZSTD_outBuffer out;
327282
_BlocksOutputBuffer buffer = {.list = NULL};
328283
PyObject *ret;
329284

330-
/* The first AFE check for setting .at_frame_edge flag */
331-
if (type == TYPE_ENDLESS_DECOMPRESSOR) {
332-
if (self->at_frame_edge && in->pos == in->size) {
333-
return Py_GetConstant(Py_CONSTANT_EMPTY_BYTES);
334-
}
335-
}
336-
337285
/* Initialize the output buffer */
338-
if (initial_size >= 0) {
339-
if (_OutputBuffer_InitWithSize(&buffer, &out, max_length, initial_size) < 0) {
340-
goto error;
341-
}
342-
}
343-
else {
344-
if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
345-
goto error;
346-
}
286+
if (_OutputBuffer_InitAndGrow(&buffer, &out, max_length) < 0) {
287+
goto error;
347288
}
348289
assert(out.pos == 0);
349290

@@ -362,22 +303,11 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
362303
goto error;
363304
}
364305

365-
/* Set .eof/.af_frame_edge flag */
366-
if (type == TYPE_DECOMPRESSOR) {
367-
/* ZstdDecompressor class stops when a frame is decompressed */
368-
if (zstd_ret == 0) {
369-
self->eof = 1;
370-
break;
371-
}
372-
}
373-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
374-
/* decompress() function supports multiple frames */
375-
self->at_frame_edge = (zstd_ret == 0) ? 1 : 0;
376-
377-
/* The second AFE check for setting .at_frame_edge flag */
378-
if (self->at_frame_edge && in->pos == in->size) {
379-
break;
380-
}
306+
/* Set .eof flag */
307+
if (zstd_ret == 0) {
308+
/* Stop when a frame is decompressed */
309+
self->eof = 1;
310+
break;
381311
}
382312

383313
/* Need to check out before in. Maybe zstd's internal buffer still has
@@ -415,8 +345,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
415345
}
416346

417347
static void
418-
decompressor_reset_session(ZstdDecompressor *self,
419-
decompress_type type)
348+
decompressor_reset_session(ZstdDecompressor *self)
420349
{
421350
// TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
422351
// and ensure lock is always held
@@ -425,56 +354,28 @@ decompressor_reset_session(ZstdDecompressor *self,
425354
self->in_begin = 0;
426355
self->in_end = 0;
427356

428-
if (type == TYPE_DECOMPRESSOR) {
429-
Py_CLEAR(self->unused_data);
430-
}
357+
Py_CLEAR(self->unused_data);
431358

432359
/* Reset variables in one operation */
433360
self->needs_input = 1;
434-
self->at_frame_edge = 1;
435361
self->eof = 0;
436-
self->_unused_char_for_align = 0;
437362

438-
/* Resetting session never fail */
363+
/* Resetting session is guaranteed to never fail */
439364
ZSTD_DCtx_reset(self->dctx, ZSTD_reset_session_only);
440365
}
441366

442367
static PyObject *
443-
stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length,
444-
decompress_type type)
368+
stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length)
445369
{
446-
Py_ssize_t initial_buffer_size = -1;
447370
ZSTD_inBuffer in;
448371
PyObject *ret = NULL;
449372
int use_input_buffer;
450373

451-
if (type == TYPE_DECOMPRESSOR) {
452-
/* Check .eof flag */
453-
if (self->eof) {
454-
PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
455-
assert(ret == NULL);
456-
goto success;
457-
}
458-
}
459-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
460-
/* Fast path for the first frame */
461-
if (self->at_frame_edge && self->in_begin == self->in_end) {
462-
/* Read decompressed size */
463-
uint64_t decompressed_size = ZSTD_getFrameContentSize(data->buf, data->len);
464-
465-
/* These two zstd constants always > PY_SSIZE_T_MAX:
466-
ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
467-
ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
468-
469-
Use ZSTD_findFrameCompressedSize() to check complete frame,
470-
prevent allocating too much memory for small input chunk. */
471-
472-
if (decompressed_size <= (uint64_t) PY_SSIZE_T_MAX &&
473-
!ZSTD_isError(ZSTD_findFrameCompressedSize(data->buf, data->len)) )
474-
{
475-
initial_buffer_size = (Py_ssize_t) decompressed_size;
476-
}
477-
}
374+
/* Check .eof flag */
375+
if (self->eof) {
376+
PyErr_SetString(PyExc_EOFError, "Already at the end of a zstd frame.");
377+
assert(ret == NULL);
378+
return NULL;
478379
}
479380

480381
/* Prepare input buffer w/wo unconsumed data */
@@ -561,30 +462,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
561462
assert(in.pos == 0);
562463

563464
/* Decompress */
564-
ret = decompress_impl(self, &in,
565-
max_length, initial_buffer_size,
566-
type);
465+
ret = decompress_impl(self, &in, max_length);
567466
if (ret == NULL) {
568467
goto error;
569468
}
570469

571470
/* Unconsumed input data */
572471
if (in.pos == in.size) {
573-
if (type == TYPE_DECOMPRESSOR) {
574-
if (Py_SIZE(ret) == max_length || self->eof) {
575-
self->needs_input = 0;
576-
}
577-
else {
578-
self->needs_input = 1;
579-
}
472+
if (Py_SIZE(ret) == max_length || self->eof) {
473+
self->needs_input = 0;
580474
}
581-
else if (type == TYPE_ENDLESS_DECOMPRESSOR) {
582-
if (Py_SIZE(ret) == max_length && !self->at_frame_edge) {
583-
self->needs_input = 0;
584-
}
585-
else {
586-
self->needs_input = 1;
587-
}
475+
else {
476+
self->needs_input = 1;
588477
}
589478

590479
if (use_input_buffer) {
@@ -598,10 +487,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
598487

599488
self->needs_input = 0;
600489

601-
if (type == TYPE_ENDLESS_DECOMPRESSOR) {
602-
self->at_frame_edge = 0;
603-
}
604-
605490
if (!use_input_buffer) {
606491
/* Discard buffer if it's too small
607492
(resizing it may needlessly copy the current contents) */
@@ -634,16 +519,14 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
634519
}
635520
}
636521

637-
goto success;
522+
return ret;
638523

639524
error:
640525
/* Reset decompressor's states/session */
641-
decompressor_reset_session(self, type);
526+
decompressor_reset_session(self);
642527

643528
Py_CLEAR(ret);
644-
success:
645-
646-
return ret;
529+
return NULL;
647530
}
648531

649532

@@ -668,9 +551,6 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
668551
/* needs_input flag */
669552
self->needs_input = 1;
670553

671-
/* at_frame_edge flag */
672-
self->at_frame_edge = 1;
673-
674554
/* Decompression context */
675555
self->dctx = ZSTD_createDCtx();
676556
if (self->dctx == NULL) {
@@ -837,7 +717,7 @@ _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
837717
/* Thread-safe code */
838718
Py_BEGIN_CRITICAL_SECTION(self);
839719

840-
ret = stream_decompress(self, data, max_length, TYPE_DECOMPRESSOR);
720+
ret = stream_decompress(self, data, max_length);
841721
Py_END_CRITICAL_SECTION();
842722
return ret;
843723
}

0 commit comments

Comments
 (0)
0