@@ -43,20 +43,11 @@ typedef struct {
43
43
PyObject * unused_data ;
44
44
45
45
/* 0 if decompressor has (or may has) unconsumed input data, 0 or 1. */
46
- char needs_input ;
47
-
48
- /* For decompress(), 0 or 1.
49
- 1 when both input and output streams are at a frame edge, means a
50
- frame is completely decoded and fully flushed, or the decompressor
51
- just be initialized. */
52
- char at_frame_edge ;
46
+ bool needs_input ;
53
47
54
48
/* For ZstdDecompressor, 0 or 1.
55
49
1 means the end of the first frame has been reached. */
56
- char eof ;
57
-
58
- /* Used for fast reset above three variables */
59
- char _unused_char_for_align ;
50
+ bool eof ;
60
51
61
52
/* __init__ has been called, 0 or 1. */
62
53
bool initialized ;
@@ -258,19 +249,13 @@ _zstd_load_d_dict(ZstdDecompressor *self, PyObject *dict)
258
249
return 0 ;
259
250
}
260
251
261
- typedef enum {
262
- TYPE_DECOMPRESSOR , // <D>, ZstdDecompressor class
263
- TYPE_ENDLESS_DECOMPRESSOR , // <E>, decompress() function
264
- } decompress_type ;
265
-
266
252
/*
267
- Given the two types of decompressors (defined above),
268
- decompress implementation for <D>, <E>, pseudo code:
253
+ Decompress implementation in pseudo code:
269
254
270
255
initialize_output_buffer
271
256
while True:
272
257
decompress_data
273
- set_object_flag # .eof for <D>, .at_frame_edge for <E>.
258
+ set_object_flag # .eof
274
259
275
260
if output_buffer_exhausted:
276
261
if output_buffer_reached_max_length:
@@ -287,63 +272,19 @@ typedef enum {
287
272
flushing to do to complete current frame.
288
273
289
274
Note, decompressing "an empty input" in any case will make it > 0.
290
-
291
- <E> supports multiple frames, has an .at_frame_edge flag, it means both the
292
- input and output streams are at a frame edge. The flag can be set by this
293
- statement:
294
-
295
- .at_frame_edge = (zstd_ret == 0) ? 1 : 0
296
-
297
- But if decompressing "an empty input" at "a frame edge", zstd_ret will be
298
- non-zero, then .at_frame_edge will be wrongly set to false. To solve this
299
- problem, two AFE checks are needed to ensure that: when at "a frame edge",
300
- empty input will not be decompressed.
301
-
302
- // AFE check
303
- if (self->at_frame_edge && in->pos == in->size) {
304
- finish
305
- }
306
-
307
- In <E>, if .at_frame_edge is eventually set to true, but input stream has
308
- unconsumed data (in->pos < in->size), then the outer function
309
- stream_decompress() will set .at_frame_edge to false. In this case,
310
- although the output stream is at a frame edge, for the caller, the input
311
- stream is not at a frame edge, see below diagram. This behavior does not
312
- affect the next AFE check, since (in->pos < in->size).
313
-
314
- input stream: --------------|---
315
- ^
316
- output stream: ====================|
317
- ^
318
275
*/
319
276
static PyObject *
320
277
decompress_impl (ZstdDecompressor * self , ZSTD_inBuffer * in ,
321
- Py_ssize_t max_length ,
322
- Py_ssize_t initial_size ,
323
- decompress_type type )
278
+ Py_ssize_t max_length )
324
279
{
325
280
size_t zstd_ret ;
326
281
ZSTD_outBuffer out ;
327
282
_BlocksOutputBuffer buffer = {.list = NULL };
328
283
PyObject * ret ;
329
284
330
- /* The first AFE check for setting .at_frame_edge flag */
331
- if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
332
- if (self -> at_frame_edge && in -> pos == in -> size ) {
333
- return Py_GetConstant (Py_CONSTANT_EMPTY_BYTES );
334
- }
335
- }
336
-
337
285
/* Initialize the output buffer */
338
- if (initial_size >= 0 ) {
339
- if (_OutputBuffer_InitWithSize (& buffer , & out , max_length , initial_size ) < 0 ) {
340
- goto error ;
341
- }
342
- }
343
- else {
344
- if (_OutputBuffer_InitAndGrow (& buffer , & out , max_length ) < 0 ) {
345
- goto error ;
346
- }
286
+ if (_OutputBuffer_InitAndGrow (& buffer , & out , max_length ) < 0 ) {
287
+ goto error ;
347
288
}
348
289
assert (out .pos == 0 );
349
290
@@ -362,22 +303,11 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
362
303
goto error ;
363
304
}
364
305
365
- /* Set .eof/.af_frame_edge flag */
366
- if (type == TYPE_DECOMPRESSOR ) {
367
- /* ZstdDecompressor class stops when a frame is decompressed */
368
- if (zstd_ret == 0 ) {
369
- self -> eof = 1 ;
370
- break ;
371
- }
372
- }
373
- else if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
374
- /* decompress() function supports multiple frames */
375
- self -> at_frame_edge = (zstd_ret == 0 ) ? 1 : 0 ;
376
-
377
- /* The second AFE check for setting .at_frame_edge flag */
378
- if (self -> at_frame_edge && in -> pos == in -> size ) {
379
- break ;
380
- }
306
+ /* Set .eof flag */
307
+ if (zstd_ret == 0 ) {
308
+ /* Stop when a frame is decompressed */
309
+ self -> eof = 1 ;
310
+ break ;
381
311
}
382
312
383
313
/* Need to check out before in. Maybe zstd's internal buffer still has
@@ -415,8 +345,7 @@ decompress_impl(ZstdDecompressor *self, ZSTD_inBuffer *in,
415
345
}
416
346
417
347
static void
418
- decompressor_reset_session (ZstdDecompressor * self ,
419
- decompress_type type )
348
+ decompressor_reset_session (ZstdDecompressor * self )
420
349
{
421
350
// TODO(emmatyping): use _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED here
422
351
// and ensure lock is always held
@@ -425,56 +354,28 @@ decompressor_reset_session(ZstdDecompressor *self,
425
354
self -> in_begin = 0 ;
426
355
self -> in_end = 0 ;
427
356
428
- if (type == TYPE_DECOMPRESSOR ) {
429
- Py_CLEAR (self -> unused_data );
430
- }
357
+ Py_CLEAR (self -> unused_data );
431
358
432
359
/* Reset variables in one operation */
433
360
self -> needs_input = 1 ;
434
- self -> at_frame_edge = 1 ;
435
361
self -> eof = 0 ;
436
- self -> _unused_char_for_align = 0 ;
437
362
438
- /* Resetting session never fail */
363
+ /* Resetting session is guaranteed to never fail */
439
364
ZSTD_DCtx_reset (self -> dctx , ZSTD_reset_session_only );
440
365
}
441
366
442
367
static PyObject *
443
- stream_decompress (ZstdDecompressor * self , Py_buffer * data , Py_ssize_t max_length ,
444
- decompress_type type )
368
+ stream_decompress (ZstdDecompressor * self , Py_buffer * data , Py_ssize_t max_length )
445
369
{
446
- Py_ssize_t initial_buffer_size = -1 ;
447
370
ZSTD_inBuffer in ;
448
371
PyObject * ret = NULL ;
449
372
int use_input_buffer ;
450
373
451
- if (type == TYPE_DECOMPRESSOR ) {
452
- /* Check .eof flag */
453
- if (self -> eof ) {
454
- PyErr_SetString (PyExc_EOFError , "Already at the end of a zstd frame." );
455
- assert (ret == NULL );
456
- goto success ;
457
- }
458
- }
459
- else if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
460
- /* Fast path for the first frame */
461
- if (self -> at_frame_edge && self -> in_begin == self -> in_end ) {
462
- /* Read decompressed size */
463
- uint64_t decompressed_size = ZSTD_getFrameContentSize (data -> buf , data -> len );
464
-
465
- /* These two zstd constants always > PY_SSIZE_T_MAX:
466
- ZSTD_CONTENTSIZE_UNKNOWN is (0ULL - 1)
467
- ZSTD_CONTENTSIZE_ERROR is (0ULL - 2)
468
-
469
- Use ZSTD_findFrameCompressedSize() to check complete frame,
470
- prevent allocating too much memory for small input chunk. */
471
-
472
- if (decompressed_size <= (uint64_t ) PY_SSIZE_T_MAX &&
473
- !ZSTD_isError (ZSTD_findFrameCompressedSize (data -> buf , data -> len )) )
474
- {
475
- initial_buffer_size = (Py_ssize_t ) decompressed_size ;
476
- }
477
- }
374
+ /* Check .eof flag */
375
+ if (self -> eof ) {
376
+ PyErr_SetString (PyExc_EOFError , "Already at the end of a zstd frame." );
377
+ assert (ret == NULL );
378
+ return NULL ;
478
379
}
479
380
480
381
/* Prepare input buffer w/wo unconsumed data */
@@ -561,30 +462,18 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
561
462
assert (in .pos == 0 );
562
463
563
464
/* Decompress */
564
- ret = decompress_impl (self , & in ,
565
- max_length , initial_buffer_size ,
566
- type );
465
+ ret = decompress_impl (self , & in , max_length );
567
466
if (ret == NULL ) {
568
467
goto error ;
569
468
}
570
469
571
470
/* Unconsumed input data */
572
471
if (in .pos == in .size ) {
573
- if (type == TYPE_DECOMPRESSOR ) {
574
- if (Py_SIZE (ret ) == max_length || self -> eof ) {
575
- self -> needs_input = 0 ;
576
- }
577
- else {
578
- self -> needs_input = 1 ;
579
- }
472
+ if (Py_SIZE (ret ) == max_length || self -> eof ) {
473
+ self -> needs_input = 0 ;
580
474
}
581
- else if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
582
- if (Py_SIZE (ret ) == max_length && !self -> at_frame_edge ) {
583
- self -> needs_input = 0 ;
584
- }
585
- else {
586
- self -> needs_input = 1 ;
587
- }
475
+ else {
476
+ self -> needs_input = 1 ;
588
477
}
589
478
590
479
if (use_input_buffer ) {
@@ -598,10 +487,6 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
598
487
599
488
self -> needs_input = 0 ;
600
489
601
- if (type == TYPE_ENDLESS_DECOMPRESSOR ) {
602
- self -> at_frame_edge = 0 ;
603
- }
604
-
605
490
if (!use_input_buffer ) {
606
491
/* Discard buffer if it's too small
607
492
(resizing it may needlessly copy the current contents) */
@@ -634,16 +519,14 @@ stream_decompress(ZstdDecompressor *self, Py_buffer *data, Py_ssize_t max_length
634
519
}
635
520
}
636
521
637
- goto success ;
522
+ return ret ;
638
523
639
524
error :
640
525
/* Reset decompressor's states/session */
641
- decompressor_reset_session (self , type );
526
+ decompressor_reset_session (self );
642
527
643
528
Py_CLEAR (ret );
644
- success :
645
-
646
- return ret ;
529
+ return NULL ;
647
530
}
648
531
649
532
@@ -668,9 +551,6 @@ _zstd_ZstdDecompressor_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
668
551
/* needs_input flag */
669
552
self -> needs_input = 1 ;
670
553
671
- /* at_frame_edge flag */
672
- self -> at_frame_edge = 1 ;
673
-
674
554
/* Decompression context */
675
555
self -> dctx = ZSTD_createDCtx ();
676
556
if (self -> dctx == NULL ) {
@@ -837,7 +717,7 @@ _zstd_ZstdDecompressor_decompress_impl(ZstdDecompressor *self,
837
717
/* Thread-safe code */
838
718
Py_BEGIN_CRITICAL_SECTION (self );
839
719
840
- ret = stream_decompress (self , data , max_length , TYPE_DECOMPRESSOR );
720
+ ret = stream_decompress (self , data , max_length );
841
721
Py_END_CRITICAL_SECTION ();
842
722
return ret ;
843
723
}
0 commit comments