1
+ /*
2
+ _BlocksOutputBuffer is used to maintain an output buffer
3
+ that has unpredictable size. Suitable for compression/decompression
4
+ API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out:
5
+
6
+ stream->next_out: point to the next output position.
7
+ stream->avail_out: the number of available bytes left in the buffer.
8
+
9
+ It maintains a list of bytes object, so there is no overhead of resizing
10
+ the buffer.
11
+
12
+ Usage:
13
+
14
+ 1, Initialize the struct instance like this:
15
+ _BlocksOutputBuffer buffer = {.list = NULL};
16
+ Set .list to NULL for _BlocksOutputBuffer_OnError()
17
+
18
+ 2, Initialize the buffer use one of these functions:
19
+ _BlocksOutputBuffer_InitAndGrow()
20
+ _BlocksOutputBuffer_InitWithSize()
21
+
22
+ 3, If (avail_out == 0), grow the buffer:
23
+ _BlocksOutputBuffer_Grow()
24
+
25
+ 4, Get the current outputted data size:
26
+ _BlocksOutputBuffer_GetDataSize()
27
+
28
+ 5, Finish the buffer, and return a bytes object:
29
+ _BlocksOutputBuffer_Finish()
30
+
31
+ 6, Clean up the buffer when an error occurred:
32
+ _BlocksOutputBuffer_OnError()
33
+ */
34
+
35
+ #ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
36
+ #define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H
37
+ #ifdef __cplusplus
38
+ extern "C" {
39
+ #endif
40
+
41
+ #include "Python.h"
42
+
43
+ typedef struct {
44
+ // List of bytes objects
45
+ PyObject * list ;
46
+ // Number of whole allocated size
47
+ Py_ssize_t allocated ;
48
+ // Max length of the buffer, negative number means unlimited length.
49
+ Py_ssize_t max_length ;
50
+ } _BlocksOutputBuffer ;
51
+
52
+ static const char unable_allocate_msg [] = "Unable to allocate output buffer." ;
53
+
54
+ /* In 32-bit build, the max block size should <= INT32_MAX. */
55
+ #define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024)
56
+
57
+ /* Block size sequence */
58
+ #define KB (1024)
59
+ #define MB (1024*1024)
60
+ const Py_ssize_t BUFFER_BLOCK_SIZE [] =
61
+ { 32 * KB , 64 * KB , 256 * KB , 1 * MB , 4 * MB , 8 * MB , 16 * MB , 16 * MB ,
62
+ 32 * MB , 32 * MB , 32 * MB , 32 * MB , 64 * MB , 64 * MB , 128 * MB , 128 * MB ,
63
+ OUTPUT_BUFFER_MAX_BLOCK_SIZE };
64
+ #undef KB
65
+ #undef MB
66
+
67
+ /* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole
68
+ allocated size growth step is:
69
+ 1 32 KB +32 KB
70
+ 2 96 KB +64 KB
71
+ 3 352 KB +256 KB
72
+ 4 1.34 MB +1 MB
73
+ 5 5.34 MB +4 MB
74
+ 6 13.34 MB +8 MB
75
+ 7 29.34 MB +16 MB
76
+ 8 45.34 MB +16 MB
77
+ 9 77.34 MB +32 MB
78
+ 10 109.34 MB +32 MB
79
+ 11 141.34 MB +32 MB
80
+ 12 173.34 MB +32 MB
81
+ 13 237.34 MB +64 MB
82
+ 14 301.34 MB +64 MB
83
+ 15 429.34 MB +128 MB
84
+ 16 557.34 MB +128 MB
85
+ 17 813.34 MB +256 MB
86
+ 18 1069.34 MB +256 MB
87
+ 19 1325.34 MB +256 MB
88
+ 20 1581.34 MB +256 MB
89
+ 21 1837.34 MB +256 MB
90
+ 22 2093.34 MB +256 MB
91
+ ...
92
+ */
93
+
94
+ /* Initialize the buffer, and grow the buffer.
95
+
96
+ max_length: Max length of the buffer, -1 for unlimited length.
97
+
98
+ On success, return allocated size (>=0)
99
+ On failure, return -1
100
+ */
101
+ static inline Py_ssize_t
102
+ _BlocksOutputBuffer_InitAndGrow (_BlocksOutputBuffer * buffer ,
103
+ const Py_ssize_t max_length ,
104
+ void * * next_out )
105
+ {
106
+ PyObject * b ;
107
+ Py_ssize_t block_size ;
108
+
109
+ // ensure .list was set to NULL
110
+ assert (buffer -> list == NULL );
111
+
112
+ // get block size
113
+ if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE [0 ]) {
114
+ block_size = max_length ;
115
+ } else {
116
+ block_size = BUFFER_BLOCK_SIZE [0 ];
117
+ }
118
+
119
+ // the first block
120
+ b = PyBytes_FromStringAndSize (NULL , block_size );
121
+ if (b == NULL ) {
122
+ return -1 ;
123
+ }
124
+
125
+ // create the list
126
+ buffer -> list = PyList_New (1 );
127
+ if (buffer -> list == NULL ) {
128
+ Py_DECREF (b );
129
+ return -1 ;
130
+ }
131
+ PyList_SET_ITEM (buffer -> list , 0 , b );
132
+
133
+ // set variables
134
+ buffer -> allocated = block_size ;
135
+ buffer -> max_length = max_length ;
136
+
137
+ * next_out = PyBytes_AS_STRING (b );
138
+ return block_size ;
139
+ }
140
+
141
+ /* Initialize the buffer, with an initial size.
142
+
143
+ Check block size limit in the outer wrapper function. For example, some libs
144
+ accept UINT32_MAX as the maximum block size, then init_size should <= it.
145
+
146
+ On success, return allocated size (>=0)
147
+ On failure, return -1
148
+ */
149
+ static inline Py_ssize_t
150
+ _BlocksOutputBuffer_InitWithSize (_BlocksOutputBuffer * buffer ,
151
+ const Py_ssize_t init_size ,
152
+ void * * next_out )
153
+ {
154
+ PyObject * b ;
155
+
156
+ // ensure .list was set to NULL
157
+ assert (buffer -> list == NULL );
158
+
159
+ // the first block
160
+ b = PyBytes_FromStringAndSize (NULL , init_size );
161
+ if (b == NULL ) {
162
+ PyErr_SetString (PyExc_MemoryError , unable_allocate_msg );
163
+ return -1 ;
164
+ }
165
+
166
+ // create the list
167
+ buffer -> list = PyList_New (1 );
168
+ if (buffer -> list == NULL ) {
169
+ Py_DECREF (b );
170
+ return -1 ;
171
+ }
172
+ PyList_SET_ITEM (buffer -> list , 0 , b );
173
+
174
+ // set variables
175
+ buffer -> allocated = init_size ;
176
+ buffer -> max_length = -1 ;
177
+
178
+ * next_out = PyBytes_AS_STRING (b );
179
+ return init_size ;
180
+ }
181
+
182
+ /* Grow the buffer. The avail_out must be 0, please check it before calling.
183
+
184
+ On success, return allocated size (>=0)
185
+ On failure, return -1
186
+ */
187
+ static inline Py_ssize_t
188
+ _BlocksOutputBuffer_Grow (_BlocksOutputBuffer * buffer ,
189
+ void * * next_out ,
190
+ const Py_ssize_t avail_out )
191
+ {
192
+ PyObject * b ;
193
+ const Py_ssize_t list_len = Py_SIZE (buffer -> list );
194
+ Py_ssize_t block_size ;
195
+
196
+ // ensure no gaps in the data
197
+ if (avail_out != 0 ) {
198
+ PyErr_SetString (PyExc_SystemError ,
199
+ "avail_out is non-zero in _BlocksOutputBuffer_Grow()." );
200
+ return -1 ;
201
+ }
202
+
203
+ // get block size
204
+ if (list_len < (Py_ssize_t ) Py_ARRAY_LENGTH (BUFFER_BLOCK_SIZE )) {
205
+ block_size = BUFFER_BLOCK_SIZE [list_len ];
206
+ } else {
207
+ block_size = BUFFER_BLOCK_SIZE [Py_ARRAY_LENGTH (BUFFER_BLOCK_SIZE ) - 1 ];
208
+ }
209
+
210
+ // check max_length
211
+ if (buffer -> max_length >= 0 ) {
212
+ // if (rest == 0), should not grow the buffer.
213
+ Py_ssize_t rest = buffer -> max_length - buffer -> allocated ;
214
+ assert (rest > 0 );
215
+
216
+ // block_size of the last block
217
+ if (block_size > rest ) {
218
+ block_size = rest ;
219
+ }
220
+ }
221
+
222
+ // check buffer->allocated overflow
223
+ if (block_size > PY_SSIZE_T_MAX - buffer -> allocated ) {
224
+ PyErr_SetString (PyExc_MemoryError , unable_allocate_msg );
225
+ return -1 ;
226
+ }
227
+
228
+ // create the block
229
+ b = PyBytes_FromStringAndSize (NULL , block_size );
230
+ if (b == NULL ) {
231
+ PyErr_SetString (PyExc_MemoryError , unable_allocate_msg );
232
+ return -1 ;
233
+ }
234
+ if (PyList_Append (buffer -> list , b ) < 0 ) {
235
+ Py_DECREF (b );
236
+ return -1 ;
237
+ }
238
+ Py_DECREF (b );
239
+
240
+ // set variables
241
+ buffer -> allocated += block_size ;
242
+
243
+ * next_out = PyBytes_AS_STRING (b );
244
+ return block_size ;
245
+ }
246
+
247
+ /* Return the current outputted data size. */
248
+ static inline Py_ssize_t
249
+ _BlocksOutputBuffer_GetDataSize (_BlocksOutputBuffer * buffer ,
250
+ const Py_ssize_t avail_out )
251
+ {
252
+ return buffer -> allocated - avail_out ;
253
+ }
254
+
255
+ /* Finish the buffer.
256
+
257
+ Return a bytes object on success
258
+ Return NULL on failure
259
+ */
260
+ static inline PyObject *
261
+ _BlocksOutputBuffer_Finish (_BlocksOutputBuffer * buffer ,
262
+ const Py_ssize_t avail_out )
263
+ {
264
+ PyObject * result , * block ;
265
+ const Py_ssize_t list_len = Py_SIZE (buffer -> list );
266
+
267
+ // fast path for single block
268
+ if ((list_len == 1 && avail_out == 0 ) ||
269
+ (list_len == 2 && Py_SIZE (PyList_GET_ITEM (buffer -> list , 1 )) == avail_out ))
270
+ {
271
+ block = PyList_GET_ITEM (buffer -> list , 0 );
272
+ Py_INCREF (block );
273
+
274
+ Py_CLEAR (buffer -> list );
275
+ return block ;
276
+ }
277
+
278
+ // final bytes object
279
+ result = PyBytes_FromStringAndSize (NULL , buffer -> allocated - avail_out );
280
+ if (result == NULL ) {
281
+ PyErr_SetString (PyExc_MemoryError , unable_allocate_msg );
282
+ return NULL ;
283
+ }
284
+
285
+ // memory copy
286
+ if (list_len > 0 ) {
287
+ char * posi = PyBytes_AS_STRING (result );
288
+
289
+ // blocks except the last one
290
+ Py_ssize_t i = 0 ;
291
+ for (; i < list_len - 1 ; i ++ ) {
292
+ block = PyList_GET_ITEM (buffer -> list , i );
293
+ memcpy (posi , PyBytes_AS_STRING (block ), Py_SIZE (block ));
294
+ posi += Py_SIZE (block );
295
+ }
296
+ // the last block
297
+ block = PyList_GET_ITEM (buffer -> list , i );
298
+ memcpy (posi , PyBytes_AS_STRING (block ), Py_SIZE (block ) - avail_out );
299
+ } else {
300
+ assert (Py_SIZE (result ) == 0 );
301
+ }
302
+
303
+ Py_CLEAR (buffer -> list );
304
+ return result ;
305
+ }
306
+
307
+ /* Clean up the buffer when an error occurred. */
308
+ static inline void
309
+ _BlocksOutputBuffer_OnError (_BlocksOutputBuffer * buffer )
310
+ {
311
+ Py_CLEAR (buffer -> list );
312
+ }
313
+
314
+ #ifdef __cplusplus
315
+ }
316
+ #endif
317
+ #endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */
0 commit comments