8000 gh-126024: unicodeobject: optimize find_first_nonascii (GH-127790) · python/cpython@5dd775b · GitHub
[go: up one dir, main page]

Skip to content

Commit 5dd775b

Browse files
authored
gh-126024: unicodeobject: optimize find_first_nonascii (GH-127790)
Remove 1 branch.
1 parent 8bc1818 commit 5dd775b

File tree

1 file changed

+14
-9
lines changed

1 file changed

+14
-9
lines changed

Objects/unicodeobject.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5077,28 +5077,32 @@ load_unaligned(const unsigned char *p, size_t size)
50775077
static Py_ssize_t
50785078
find_first_nonascii(const unsigned char *start, const unsigned char *end)
50795079
{
5080+
// The search is done in `size_t` chunks.
5081+
// The start and end might not be aligned at `size_t` boundaries,
5082+
// so they're handled specially.
5083+
50805084
const unsigned char *p = start;
50815085

50825086
if (end - start >= SIZEOF_SIZE_T) {
5083-
const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T);
5087+
// Avoid unaligned read.
50845088
#if PY_LITTLE_ENDIAN && HAVE_CTZ
5085-
if (p < p2) {
5086-
size_t u;
5087-
memcpy(&u, p, sizeof(size_t));
5088-
u &= ASCII_CHAR_MASK;
5089-
if (u) {
5090-
return (ctz(u) - 7) / 8;
5091-
}
5092-
p = p2;
5089+
size_t u;
5090+
memcpy(&u, p, sizeof(size_t));
5091+
u &= ASCII_CHAR_MASK;
5092+
if (u) {
5093+
return (ctz(u) - 7) / 8;
50935094
}
5095+
p = _Py_ALIGN_DOWN(p + SIZEOF_SIZE_T, SIZEOF_SIZE_T);
50945096
#else /* PY_LITTLE_ENDIAN && HAVE_CTZ */
5097+
const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T);
50955098
while (p < p2) {
50965099
if (*p & 0x80) {
50975100
return p - start;
50985101
}
50995102
p++;
51005103
}
51015104
#endif
5105+
51025106
const unsigned char *e = end - SIZEOF_SIZE_T;
51035107
while (p <= e) {
51045108
size_t u = (*(const size_t *)p) & ASCII_CHAR_MASK;
@@ -5115,6 +5119,7 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
51155119
}
51165120
}
51175121
#if PY_LITTLE_ENDIAN && HAVE_CTZ
5122+
assert((end - p) < SIZEOF_SIZE_T);
51185123
// we can not use *(const size_t*)p to avoid buffer overrun.
51195124
size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK;
51205125
if (u) {

0 commit comments

Comments
 (0)
0