@@ -5036,70 +5036,15 @@ PyUnicode_DecodeUTF8(const char *s,
5036
5036
# error C 'size_t' size should be either 4 or 8!
5037
5037
#endif
5038
5038
5039
- static Py_ssize_t
5040
- ascii_decode (const char * start , const char * end , Py_UCS1 * dest )
5041
- {
5042
- const char * p = start ;
5043
-
5044
- #if SIZEOF_SIZE_T <= SIZEOF_VOID_P
5045
- if (_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )
5046
- && _Py_IS_ALIGNED (dest , ALIGNOF_SIZE_T ))
5047
- {
5048
- /* Fast path, see in STRINGLIB(utf8_decode) for
5049
- an explanation. */
5050
- /* Help allocation */
5051
- const char * _p = p ;
5052
- Py_UCS1 * q = dest ;
5053
- while (_p + SIZEOF_SIZE_T <= end ) {
5054
- size_t value = * (const size_t * ) _p ;
5055
- if (value & ASCII_CHAR_MASK )
5056
- break ;
5057
- * ((size_t * )q ) = value ;
5058
- _p += SIZEOF_SIZE_T ;
5059
- q += SIZEOF_SIZE_T ;
5060
- }
5061
- p = _p ;
5062
- while (p < end ) {
5063
- if ((unsigned char )* p & 0x80 )
5064
- break ;
5065
- * q ++ = * p ++ ;
5066
- }
5067
- return p - start ;
5068
- }
5069
- #endif
5070
- while (p < end ) {
5071
- /* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
5072
- for an explanation. */
5073
- if (_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )) {
5074
- /* Help allocation */
5075
- const char * _p = p ;
5076
- while (_p + SIZEOF_SIZE_T <= end ) {
5077
- size_t value = * (const size_t * ) _p ;
5078
- if (value & ASCII_CHAR_MASK )
5079
- break ;
5080
- _p += SIZEOF_SIZE_T ;
5081
- }
5082
- p = _p ;
5083
- if (_p == end )
5084
- break ;
5085
- }
5086
- if ((unsigned char )* p & 0x80 )
5087
- break ;
5088
- ++ p ;
5089
- }
5090
- memcpy (dest , start , p - start );
5091
- return p - start ;
5092
- }
5093
-
5094
5039
#if (defined(__clang__ ) || defined(__GNUC__ ))
5095
- #define HAS_CTZ 1
5040
+ #define HAVE_CTZ 1
5096
5041
static inline unsigned int
5097
5042
ctz (size_t v )
5098
5043
{
5099
5044
return __builtin_ctzll ((unsigned long long )v );
5100
5045
}
5101
5046
#elif defined(_MSC_VER )
5102
- #define HAS_CTZ 1
5047
+ #define HAVE_CTZ 1
5103
5048
static inline unsigned int
5104
5049
ctz (size_t v )
5105
5050
{
@@ -5113,24 +5058,79 @@ ctz(size_t v)
5113
5058
}
5114
5059
#endif
5115
5060
5061
+ #if HAVE_CTZ
5062
+ // load p[0]..p[size-1] as a little-endian size_t
5063
+ // without unaligned access nor read ahead.
5064
+ static size_t
5065
+ load_unaligned (const unsigned char * p , size_t size )
5066
+ {
5067
+ assert (0 <= size && size <= SIZEOF_SIZE_T );
5068
+ union {
5069
+ size_t s ;
5070
+ unsigned char b [SIZEOF_SIZE_T ];
5071
+ } u ;
5072
+ u .s = 0 ;
5073
+ switch (size ) {
5074
+ case 8 :
5075
+ u .b [7 ] = p [7 ];
5076
+ // fall through
5077
+ case 7 :
5078
+ u .b [6 ] = p [6 ];
5079
+ // fall through
5080
+ case 6 :
5081
+ u .b [5 ] = p [5 ];
5082
+ // fall through
5083
+ case 5 :
5084
+ u .b [4 ] = p [4 ];
5085
+ // fall through
5086
+ case 4 :
5087
+ u .b [3 ] = p [3 ];
5088
+ // fall through
5089
+ case 3 :
5090
+ u .b [2 ] = p [2 ];
5091
+ // fall through
5092
+ case 2 :
5093
+ u .b [1 ] = p [1 ];
5094
+ // fall through
5095
+ case 1 :
5096
+ u .b [0 ] = p [0 ];
5097
+ break ;
5098
+ case 0 :
5099
+ break ;
5100
+ default :
5101
+ Py_UNREACHABLE ();
5102
+ }
5103
+ return u .s ;
5104
+ }
5105
+ #endif
5106
+
5116
5107
static Py_ssize_t
5117
5108
find_first_nonascii (const unsigned char * start , const unsigned char * end )
5118
5109
{
5119
5110
const unsigned char * p = start ;
5120
5111
5121
- if (end - start > SIZEOF_SIZE_T + ALIGNOF_SIZE_T ) {
5122
- while (!_Py_IS_ALIGNED (p , ALIGNOF_SIZE_T )) {
5123
- if ((unsigned char )* p & 0x80 ) {
5112
+ if (end - start >= SIZEOF_SIZE_T ) {
5113
+ const unsigned char * p2 = _Py_ALIGN_UP (p , SIZEOF_SIZE_T );
5114
+ #ifdef HAVE_CTZ
5115
+ size_t u = load_unaligned (p , p2 - p ) & ASCII_CHAR_MASK ;
5116
+ if (u ) {
5117
+ return p - start + (ctz (u ) - 7 ) / 8 ;
5118
+ }
5119
+ p = p2 ;
5120
+ #else
5121
+ while (p < p2 ) {
5122
+ if (* p & 0x80 ) {
5124
5123
return p - start ;
5125
5124
}
5126
5125
p ++ ;
5127
5126
}
5127
+ #endif
5128
5128
const unsigned char * e = end - SIZEOF_SIZE_T ;
5129
5129
while (p <= e ) {
5130
- size_t value = (* (const size_t * )p ) & ASCII_CHAR_MASK ;
5131
- if (value ) {
5132
- #if PY_LITTLE_ENDIAN && HAS_CTZ
5133
- return p - start + (ctz (value ) - 7 ) / 8 ;
5130
+ size_t u = (* (const size_t * )p ) & ASCII_CHAR_MASK ;
5131
+ if (u ) {
5132
+ #if PY_LITTLE_ENDIAN && HAVE_CTZ
5133
+ return p - start + (ctz (u ) - 7 ) / 8 ;
5134
5134
#else
5135
5135
// big endian and minor compilers are difficult to test.
5136
5136
// fallback to per byte check.
@@ -5140,47 +5140,15 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
5140
5140
p += SIZEOF_SIZE_T ;
5141
5141
}
5142
5142
}
5143
- #if HAS_CTZ
5144
- // This part looks bit tricky, but decoding short ASCII is super important.
5145
- // Since we copy from p to size_t manually, this part works fine with big endian.
5146
- while (p < end ) {
5147
- size_t u = (size_t )(p [0 ]);
5148
- switch (end - p ) {
5149
- default :
5150
- #if SIZEOF_SIZE_T == 8
5151
- u |= (size_t )(p [7 ]) << 56ull ;
5152
- // fall through
5153
- case 7 :
5154
- u |= (size_t )(p [6 ]) << 48ull ;
5155
- // fall through
5156
- case 6 :
5157
- u |= (size_t )(p [5 ]) << 40ull ;
5158
- // fall through
5159
- case 5 :
5160
- u |= (size_t )(p [4 ]) << 32ull ;
5161
- // fall through
5162
- case 4 :
5163
- #endif
5164
- u |= (size_t )(p [3 ]) << 24 ;
5165
- // fall through
5166
- case 3 :
5167
- u |= (size_t )(p [2 ]) << 16 ;
5168
- // fall through
5169
- case 2 :
5170
- u |= (size_t )(p [1 ]) << 8 ;
5171
- break ;
5172
- case 1 :
5173
- break ;
5174
- }
5175
- if (u & ASCII_CHAR_MASK ) {
5176
- return p - start + (ctz (u & ASCII_CHAR_MASK ) - 7 ) / 8 ;
5177
- }
5178
- p += SIZEOF_SIZE_T ;
5143
+ #if HAVE_CTZ
5144
+ size_t u = load_unaligned (p , end - p ) & ASCII_CHAR_MASK ;
5145
+ if (u ) {
5146
+ return p - start + (ctz (u ) - 7 ) / 8 ;
5179
5147
}
5180
5148
return end - start ;
5181
5149
#else
5182
5150
while (p < end ) {
5183
- if (( unsigned char ) * p & 0x80 ) {
5151
+ if (* p & 0x80 ) {
5184
5152
break ;
5185
5153
}
5186
5154
p ++ ;
@@ -5204,7 +5172,7 @@ static Py_ssize_t utf8_count_codepoints(const unsigned char *s, const unsigned c
5204
5172
{
5205
5173
Py_ssize_t len = 0 ;
5206
5174
5207
- if (end - s > SIZEOF_SIZE_T + ALIGNOF_SIZE_T ) {
5175
+ if (end - s >= SIZEOF_SIZE_T ) {
5208
5176
while (!_Py_IS_ALIGNED (s , ALIGNOF_SIZE_T )) {
5209
5177
len += scalar_utf8_start_char (* s ++ );
5210
5178
}
@@ -5235,6 +5203,39 @@ static Py_ssize_t utf8_count_codepoints(const unsigned char *s, const unsigned c
5235
5203
return len ;
5236
5204
}
5237
5205
5206
+ static Py_ssize_t
5207
+ ascii_decode (const char * start , const char * end , Py_UCS1 * dest )
5208
+ {
5209
+ #if SIZEOF_SIZE_T <= SIZEOF_VOID_P
5210
+ if (_Py_IS_ALIGNED (start , ALIGNOF_SIZE_T )
5211
+ && _Py_IS_ALIGNED (dest , ALIGNOF_SIZE_T ))
5212
+ {
5213
+ /* Fast path, see in STRINGLIB(utf8_decode) for
5214
+ an explanation. */
5215
+ const char * p = start ;
5216
+ Py_UCS1 * q = dest ;
5217
+ while (p + SIZEOF_SIZE_T <= end ) {
5218
+ size_t value = * (const size_t * ) p ;
5219
+ if (value & ASCII_CHAR_MASK )
5220
+ break ;
5221
+ * ((size_t * )q ) = value ;
5222
+ p += SIZEOF_SIZE_T ;
5223
+ q += SIZEOF_SIZE_T ;
5224
+ }
5225
+ while (p < end ) {
5226
+ if ((unsigned char )* p & 0x80 )
5227
+ break ;
5228
+ * q ++ = * p ++ ;
5229
+ }
5230
+ return p - start ;
5231
+ }
5232
+ #endif
5233
+ Py_ssize_t pos = find_first_nonascii ((const unsigned char * )start ,
5234
+ (const unsigned char * )end );
5235
+ memcpy (dest , start , pos );
5236
+ return pos ;
5237
+ }
5238
+
5238
5239
static int
5239
5240
unicode_decode_utf8_impl (_PyUnicodeWriter * writer ,
5240
5241
const char * starts , const char * s , const char * end ,
0 commit comments