10000 bpo-34749: Improved performance of binascii.a2b_base64(). (GH-9444) · python/cpython@1c5e68e · GitHub
[go: up one dir, main page]

Skip to content

Commit 1c5e68e

Browse files
sir-sigurdmiss-islington
authored andcommitted
bpo-34749: Improved performance of binascii.a2b_base64(). (GH-9444)
https://bugs.python.org/issue34749
1 parent 0d4f435 commit 1c5e68e

File tree

2 files changed

+58
-86
lines changed

2 files changed

+58
-86
lines changed
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
:func:`binascii.a2b_base64` is now up to 2 times faster. Patch by Sergey
2+
Fedoseev.

Modules/binascii.c

Lines changed: 56 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -130,15 +130,24 @@ static const unsigned char table_a2b_hqx[256] = {
130130
static const unsigned char table_b2a_hqx[] =
131131
"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
132132

133-
static const char table_a2b_base64[] = {
133+
static const unsigned char table_a2b_base64[] = {
134134
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
135135
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
136136
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
137137
52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
138138
-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
139139
15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
140140
-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
141-
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
141+
41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1,
142+
143+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
144+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
145+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
146+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
147+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
148+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
149+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
150+
-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
142151
};
143152

144153
#define BASE64_PAD '='
@@ -413,32 +422,6 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
413422
return _PyBytesWriter_Finish(&writer, ascii_data);
414423
}
415424

416-
417-
static int
418-
binascii_find_valid(const unsigned char *s, Py_ssize_t slen, int num)
419-
{
420-
/* Finds & returns the (num+1)th
421-
** valid character for base64, or -1 if none.
422-
*/
423-
424-
int ret = -1;
425-
unsigned char c, b64val;
426-
427-
while ((slen > 0) && (ret == -1)) {
428-
c = *s;
429-
b64val = table_a2b_base64[c & 0x7f];
430-
if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
431-
if (num == 0)
432-
ret = *s;
433-
num--;
434-
}
435-
436-
s++;
437-
slen--;
438-
}
439-
return ret;
440-
}
441-
442425
/*[clinic input]
443426
binascii.a2b_base64
444427
@@ -452,88 +435,74 @@ static PyObject *
452435
binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
453436
/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
454437
{
455-
const unsigned char *ascii_data;
456-
unsigned char *bin_data;
457-
unsigned char *bin_data_start;
458-
int leftbits = 0;
459-
unsigned char this_ch;
460-
unsigned int leftchar = 0;
461-
Py_ssize_t ascii_len, bin_len;
462-
int quad_pos = 0;
463-
_PyBytesWriter writer;
464-
binascii_state *state;
465-
466-
ascii_data = data->buf;
467-
ascii_len = data->len;
438+
assert(data->len >= 0);
468439

469-
assert(ascii_len >= 0);
470-
471-
if (ascii_len > PY_SSIZE_T_MAX - 3)
472-
return PyErr_NoMemory();
473-
474-
bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
475-
476-
_PyBytesWriter_Init(&writer);
440+
const unsigned char *ascii_data = data->buf;
441+
size_t ascii_len = data->len;
477442

478443
/* Allocate the buffer */
479-
bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
444+
Py_ssize_t bin_len = ((ascii_len+3)/4)*3; /* Upper bound, corrected later */
445+
_PyBytesWriter writer;
446+
_PyBytesWriter_Init(&writer);
447+
unsigned char *bin_data = _PyBytesWriter_Alloc(&writer, bin_len);
480448
if (bin_data == NULL)
481449
return NULL;
482-
bin_data_start = bin_data;
483-
484-
for( ; ascii_len > 0; ascii_len--, ascii_data++) {
485-
this_ch = *ascii_data;
450+
unsigned char *bin_data_start = bin_data;
486451

487-
if (this_ch > 0x7f ||
488-
this_ch == '\r' || this_ch == '\n' || this_ch == ' ')
489-
continue;
452+
int quad_pos = 0;
453+
unsigned char leftchar = 0;
454+
int pads = 0;
455+
for (size_t i = 0; i < ascii_len; i++) {
456+
unsigned char this_ch = ascii_data[i];
490457

491458
/* Check for pad sequences and ignore
492459
** the invalid ones.
493460
*/
494461
if (this_ch == BASE64_PAD) {
495-
if ( (quad_pos < 2) ||
496-
((quad_pos == 2) &&
497-
(binascii_find_valid(ascii_data, ascii_len, 1)
498-
!= BASE64_PAD)) )
499-
{
500-
continue;
501-
}
502-
else {
462+
if (quad_pos >= 2 && quad_pos + ++pads >= 4) {
503463
/* A pad sequence means no more input.
504464
** We've already interpreted the data
505465
** from the quad at this point.
506466
*/
507-
leftbits = 0;
508-
break;
467+
goto done;
509468
}
469+
continue;
510470
}
511471

512-
this_ch = table_a2b_base64[*ascii_data];
513-
if ( this_ch == (unsigned char) -1 )
472+
this_ch = table_a2b_base64[this_ch];
473+
if (this_ch >= 64) {
514474
continue;
475+
}
476+
pads = 0;
515477

516-
/*
517-
** Shift it in on the low end, and see if there's
518-
** a byte ready for output.
519-
*/
520-
quad_pos = (quad_pos + 1) & 0x03;
521-
leftchar = (leftchar << 6) | (this_ch);
522-
leftbits += 6;
523-
524-
if ( leftbits >= 8 ) {
525-
leftbits -= 8;
526-
*bin_data++ = (leftchar >> leftbits) & 0xff;
527-
leftchar &= ((1 << leftbits) - 1);
478+
switch (quad_pos) {
479+
case 0:
480+
quad_pos = 1;
481+
leftchar = this_ch;
482+
break;
483+
case 1:
484+
quad_pos = 2;
485+
*bin_data++ = (leftchar << 2) | (this_ch >> 4);
486+
leftchar = this_ch & 0x0f;
487+
break;
488+
case 2:
489+
quad_pos = 3;
490+
*bin_data++ = (leftchar << 4) | (this_ch >> 2);
491+
leftchar = this_ch & 0x03;
492+
break;
493+
case 3:
494+
quad_pos = 0;
495+
*bin_data++ = (leftchar << 6) | (this_ch);
496+
leftchar = 0;
497+
break;
528498
}
529499
}
530500

531-
if (leftbits != 0) {
532-
state = PyModule_GetState(module);
501+
if (quad_pos != 0) {
502+
binascii_state *state = PyModule_GetState(module);
533503
if (state == NULL) {
534-
return NULL;
535-
}
536-
if (leftbits == 6) {
504+
/* error already set, from PyModule_GetState */
505+
} else if (quad_pos == 1) {
537506
/*
538507
** There is exactly one extra valid, non-padding, base64 character.
539508
** This is an invalid length, as there is no possible input that
@@ -551,6 +520,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
551520
return NULL;
552521
}
553522

523+
done:
554524
return _PyBytesWriter_Finish(&writer, bin_data);
555525
}
556526

0 commit comments

Comments
 (0)
0