8000 gh-126024: Use only memcpy for unaligned loads in find_first_nonascii by encukou · Pull Request #127769 · python/cpython · GitHub
[go: up one dir, main page]

Skip to content

gh-126024: Use only memcpy for unaligned loads in find_first_nonascii #127769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 12 additions & 49 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -5018,53 +5018,6 @@ ctz(size_t v)
#define HAVE_CTZ 0
#endif

#if HAVE_CTZ && PY_LITTLE_ENDIAN
// load p[0]..p[size-1] as a size_t without unaligned access nor read ahead.
static size_t
load_unaligned(const unsigned char *p, size_t size)
{
union {
size_t s;
unsigned char b[SIZEOF_SIZE_T];
8000 } u;
u.s = 0;
// This switch statement assumes little endian because:
// * union is faster than bitwise or and shift.
// * big endian machine is rare and hard to maintain.
switch (size) {
default:
#if SIZEOF_SIZE_T == 8
case 8:
u.b[7] = p[7];
_Py_FALLTHROUGH;
case 7:
u.b[6] = p[6];
_Py_FALLTHROUGH;
case 6:
u.b[5] = p[5];
_Py_FALLTHROUGH;
case 5:
u.b[4] = p[4];
_Py_FALLTHROUGH;
#endif
case 4:
u.b[3] = p[3];
_Py_FALLTHROUGH;
case 3:
u.b[2] = p[2];
_Py_FALLTHROUGH;
case 2:
u.b[1] = p[1];
_Py_FALLTHROUGH;
case 1:
u.b[0] = p[0];
break;
case 0:
break;
}
return u.s;
}
#endif

/*
* Find the first non-ASCII character in a byte sequence.
Expand All @@ -5077,12 +5030,17 @@ load_unaligned(const unsigned char *p, size_t size)
static Py_ssize_t
find_first_nonascii(const unsigned char *start, const unsigned char *end)
{
// The search is done in `size_t` chunks.
// The start and end might not be aligned at `size_t` boundaries,
// so they're handled specially.

const unsigned char *p = start;

if (end - start >= SIZEOF_SIZE_T) {
const unsigned char *p2 = _Py_ALIGN_UP(p, SIZEOF_SIZE_T);
#if PY_LITTLE_ENDIAN && HAVE_CTZ
if (p < p2) {
// Avoid unaligned read.
size_t u;
memcpy(&u, p, sizeof(size_t));
u &= ASCII_CHAR_MASK;
Expand Down Expand Up @@ -5114,9 +5072,14 @@ find_first_nonascii(const unsigned char *start, const unsigned char *end)
p += SIZEOF_SIZE_T;
}
}

// less than size_t bytes left.
assert(end - p <= SIZEOF_SIZE_T);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This assertion would fail when not PY_LITTLE_ENDIAN && HAVE_CTZ case.

                // big endian and minor compilers are difficult to test.
                // fallback to per byte check.
                break;

#if PY_LITTLE_ENDIAN && HAVE_CTZ
// we can not use *(const size_t*)p to avoid buffer overrun.
size_t u = load_unaligned(p, end - p) & ASCII_CHAR_MASK;
// Avoid unaligned read and read ahead.
size_t u = 0;
memcpy(&u, p, end - p);
u &= ASCII_CHAR_MASK;
if (u) {
return p - start + (ctz(u) - 7) / 8;
}
Expand Down
Loading
0