8000 bpo-43179: Generalise alignment for optimised string routines (GH-24624) · python/cpython@dec0757 · GitHub
[go: up one dir, main page]

Skip to content

Commit dec0757

Browse files
authored
bpo-43179: Generalise alignment for optimised string routines (GH-24624)
* Remove m68k-specific hack from ascii_decode On m68k, alignments of primitives is more relaxed, with 4-byte and 8-byte types only requiring 2-byte alignment, thus using sizeof(size_t) does not work. Instead, use the portable alternative. Note that this is a minimal fix that only relaxes the assertion and the condition for when to use the optimised version remains overly strict. Such issues will be fixed tree-wide in the next commit. NB: In C11 we could use _Alignof(size_t) instead, but for compatibility we use autoconf. * Optimise string routines for architectures with non-natural alignment C only requires that sizeof(x) is a multiple of alignof(x), not that the two are equal. Thus anywhere where we optimise based on alignment we should be using alignof(x) not sizeof(x). This is more annoying than it would be in C11 where we could just use _Alignof(x) (and alignof(x) in C++11), but since we still require only C99 we must plumb the information all the way from autoconf through the various typedefs and defines.
1 parent cfa1766 commit dec0757

File tree

8 files changed

+97
-32
lines changed

8 files changed

+97
-32
lines changed

Objects/bytes_methods.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -115,15 +115,14 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
115115
{
116116
const char *p = cptr;
117117
const char *end = p + len;
118-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
119118

120119
while (p < end) {
121120
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
122121
for an explanation. */
123-
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
122+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
124123
/* Help allocation */
125124
const char *_p = p;
126-
while (_p < aligned_end) {
125+
while (_p + SIZEOF_SIZE_T <= end) {
127126
size_t value = *(const size_t *) _p;
128127
if (value & ASCII_CHAR_MASK) {
129128
Py_RETURN_FALSE;

Objects/stringlib/codecs.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
2626
{
2727
Py_UCS4 ch;
2828
const char *s = *inptr;
29-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
3029
STRINGLIB_CHAR *p = dest + *outpos;
3130

3231
while (s < end) {
@@ -40,11 +39,11 @@ STRINGLIB(utf8_decode)(const char **inptr, const char *end,
4039
First, check if we can do an aligned read, as most CPUs have
4140
a penalty for unaligned reads.
4241
*/
43-
if (_Py_IS_ALIGNED(s, SIZEOF_SIZE_T)) {
42+
if (_Py_IS_ALIGNED(s, ALIGNOF_SIZE_T)) {
4443
/* Help register allocation */
4544
const char *_s = s;
4645
STRINGLIB_CHAR *_p = p;
47-
while (_s < aligned_end) {
46+
while (_s + SIZEOF_SIZE_T <= end) {
4847
/* Read a whole size_t at a time (either 4 or 8 bytes),
4948
and do a fast unrolled copy if it only contains ASCII
5049
characters. */
@@ -496,8 +495,6 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
496495
int native_ordering)
497496
{
498497
Py_UCS4 ch;
499-
const unsigned char *aligned_end =
500 6D40 -
(const unsigned char *) _Py_ALIGN_DOWN(e, SIZEOF_LONG);
501498
const unsigned char *q = *inptr;
502499
STRINGLIB_CHAR *p = dest + *outpos;
503500
/* Offsets from q for retrieving byte pairs in the right order. */
@@ -512,10 +509,10 @@ STRINGLIB(utf16_decode)(const unsigned char **inptr, const unsigned char *e,
512509
Py_UCS4 ch2;
513510
/* First check for possible aligned read of a C 'long'. Unaligned
514511
reads are more expensive, better to defer to another iteration. */
515-
if (_Py_IS_ALIGNED(q, SIZEOF_LONG)) {
512+
if (_Py_IS_ALIGNED(q, ALIGNOF_LONG)) {
516513
/* Fast path for runs of in-range non-surrogate chars. */
517514
const unsigned char *_q = q;
518-
while (_q < aligned_end) {
515+
while (_q + SIZEOF_LONG <= e) {
519516
unsigned long block = * (const unsigned long *) _q;
520517
if (native_ordering) {
521518
/* Can use buffer directly */

Objects/stringlib/find_max_char.h

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,12 @@ Py_LOCAL_INLINE(Py_UCS4)
2020
STRINGLIB(find_max_char)(const STRINGLIB_CHAR *begin, const STRINGLIB_CHAR *end)
2121
{
2222
const unsigned char *p = (const unsigned char *) begin;
23-
const unsigned char *aligned_end =
24-
(const unsigned char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
2523

2624
while (p < end) {
27-
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
25+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
2826
/* Help register allocation */
2927
const unsigned char *_p = p;
30-
while (_p < aligned_end) {
28+
while (_p + SIZEOF_SIZE_T <= end) {
3129
size_t value = *(const size_t *) _p;
3230
if (value & UCS1_ASCII_CHAR_MASK)
3331
return 255;

Objects/unicodeobject.c

Lines changed: 6 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -5070,25 +5070,16 @@ static Py_ssize_t
50705070
ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
50715071
{
50725072
const char *p = start;
5073-
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
5074-
5075-
/*
5076-
* Issue #17237: m68k is a bit different from most architectures in
5077-
* that objects do not use "natural alignment" - for example, int and
5078-
* long are only aligned at 2-byte boundaries. Therefore the assert()
5079-
* won't work; also, tests have shown that skipping the "optimised
5080-
* version" will even speed up m68k.
5081-
*/
5082-
#if !defined(__m68k__)
5073+
50835074
#if SIZEOF_SIZE_T <= SIZEOF_VOID_P
5084-
assert(_Py_IS_ALIGNED(dest, SIZEOF_SIZE_T));
5085-
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
5075+
assert(_Py_IS_ALIGNED(dest, ALIGNOF_SIZE_T));
5076+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
50865077
/* Fast path, see in STRINGLIB(utf8_decode) for
50875078
an explanation. */
50885079
/* Help allocation */
50895080
const char *_p = p;
50905081
Py_UCS1 * q = dest;
5091-
while (_p < aligned_end) {
5082+
while (_p + SIZEOF_SIZE_T <= end) {
50925083
size_t value = *(const size_t *) _p;
50935084
if (value & ASCII_CHAR_MASK)
50945085
break;
@@ -5104,15 +5095,14 @@ ascii_decode(const char *start, const char *end, Py_UCS1 *dest)
51045095
}
51055096
return p - start;
51065097
}
5107-
#endif
51085098
#endif
51095099
while (p < end) {
51105100
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
51115101
for an explanation. */
5112-
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
5102+
if (_Py_IS_ALIGNED(p, ALIGNOF_SIZE_T)) {
51135103
/* Help allocation */
51145104
const char *_p = p;
5115-
while (_p < aligned_end) {
5105+
while (_p + SIZEOF_SIZE_T <= end) {
51165106
size_t value = *(const size_t *) _p;
51175107
if (value & ASCII_CHAR_MASK)
51185108
break;

PC/pyconfig.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
289289
# define SIZEOF_FPOS_T 8
290290
# define SIZEOF_HKEY 8
291291
# define SIZEOF_SIZE_T 8
292+
# define ALIGNOF_SIZE_T 8
292293
/* configure.ac defines HAVE_LARGEFILE_SUPPORT iff
293294
sizeof(off_t) > sizeof(long), and sizeof(long long) >= sizeof(off_t).
294295
On Win64 the second condition is not true, but if fpos_t replaces off_t
@@ -303,6 +304,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
303304
# define SIZEOF_FPOS_T 8
304305
# define SIZEOF_HKEY 4
305306
# define SIZEOF_SIZE_T 4
307+
# define ALIGNOF_SIZE_T 4
306308
/* MS VS2005 changes time_t to a 64-bit type on all platforms */
307309
# if defined(_MSC_VER) && _MSC_VER >= 1400
308310
# define SIZEOF_TIME_T 8
@@ -321,6 +323,7 @@ Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */
321323
#define SIZEOF_SHORT 2
322324
#define SIZEOF_INT 4
323325
#define SIZEOF_LONG 4
326+
#define ALIGNOF_LONG 4
324327
#define SIZEOF_LONG_LONG 8
325328
#define SIZEOF_DOUBLE 8
326329
#define SIZEOF_FLOAT 4

configure

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8644,7 +8644,7 @@ $as_echo "#define HAVE_GCC_UINT128_T 1" >>confdefs.h
86448644
fi
86458645

86468646

8647-
# Sizes of various common basic types
8647+
# Sizes and alignments of various common basic types
86488648
# ANSI C requires sizeof(char) == 1, so no need to check it
86498649
# The cast to long int works around a bug in the HP C Compiler
86508650
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
@@ -8712,6 +8712,41 @@ cat >>confdefs.h <<_ACEOF
87128712
_ACEOF
87138713

87148714

8715+
# The cast to long int works around a bug in the HP C Compiler,
8716+
# see AC_CHECK_SIZEOF for more information.
8717+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking alignment of long" >&5
8718+
$as_echo_n "checking alignment of long... " >&6; }
8719+
if ${ac_cv_alignof_long+:} false; then :
8720+
$as_echo_n "(cached) " >&6
8721+
else
8722+
if ac_fn_c_compute_int "$LINENO" "(long int) offsetof (ac__type_alignof_, y)" "ac_cv_alignof_long" "$ac_includes_default
8723+
#ifndef offsetof
8724+
# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0)
8725+
#endif
8726+
typedef struct { char x; long y; } ac__type_alignof_;"; then :
8727+
8728+
else
8729+
if test "$ac_cv_type_long" = yes; then
8730+
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
8731+
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
8732+
as_fn_error 77 "cannot compute alignment of long
8733+
See \`config.log' for more details" "$LINENO" 5; }
8734+
else
8735+
ac_cv_alignof_long=0
8736+
fi
8737+
fi
8738+
8739+
fi
8740+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_alignof_long" >&5
8741+
$as_echo "$ac_cv_alignof_long" >&6; }
8742+
8743+
8744+
8745+
cat >>confdefs.h <<_ACEOF
8746+
#define ALIGNOF_LONG $ac_cv_alignof_long
8747+
_ACEOF
8748+
8749+
87158750
# The cast to long int works around a bug in the HP C Compiler
87168751
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
87178752
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.
@@ -8943,6 +8978,41 @@ cat >>confdefs.h <<_ACEOF
89438978
_ACEOF
89448979

89458980

8981+
# The cast to long int works around a bug in the HP C Compiler,
8982+
# see AC_CHECK_SIZEOF for more information.
8983+
{ $as_echo "$as_me:${as_lineno-$LINENO}: checking alignment of size_t" >&5
8984+
$as_echo_n "checking alignment of size_t... " >&6; }
8985+
if ${ac_cv_alignof_size_t+:} false; then :
8986+
$as_echo_n "(cached) " >&6
8987+
else
8988+
if ac_fn_c_compute_int "$LINENO" "(long int) offsetof (ac__type_alignof_, y)" "ac_cv_alignof_size_t" "$ac_includes_default
8989+
#ifndef offsetof
8990+
# define offsetof(type, member) ((char *) &((type *) 0)->member - (char *) 0)
8991+
#endif
8992+
typedef struct { char x; size_t y; } ac__type_alignof_;"; then :
8993+
8994+
else
8995+
if test "$ac_cv_type_size_t" = yes; then
8996+
{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
8997+
$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
8998+
as_fn_error 77 "cannot compute alignment of size_t
8999+
See \`config.log' for more details" "$LINENO" 5; }
9000+
else
9001+
ac_cv_alignof_size_t=0
9002+
fi
9003+
fi
9004+
9005+
fi
9006+
{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_alignof_size_t" >&5
9007+
$as_echo "$ac_cv_alignof_size_t" >&6; }
9008+
9009+
9010+
9011+
cat >>confdefs.h <<_ACEOF
9012+
#define ALIGNOF_SIZE_T $ac_cv_alignof_size_t
9013+
_ACEOF
9014+
9015+
89469016
# The cast to long int works around a bug in the HP C Compiler
89479017
# version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
89489018
# declarations like `int a3[[(sizeof (unsigned char)) >= 0]];'.

configure.ac

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2354,17 +2354,19 @@ AC_CHECK_TYPE(ssize_t,
23542354
AC_CHECK_TYPE(__uint128_t,
23552355
AC_DEFINE(HAVE_GCC_UINT128_T, 1, [Define if your compiler provides __uint128_t]),,)
23562356

2357-
# Sizes of various common basic types
2357+
# Sizes and alignments of various common basic types
23582358
# ANSI C requires sizeof(char) == 1, so no need to check it
23592359
AC_CHECK_SIZEOF(int, 4)
23602360
AC_CHECK_SIZEOF(long, 4)
2361+
AC_CHECK_ALIGNOF(long)
23612362
AC_CHECK_SIZEOF(long long, 8)
23622363
AC_CHECK_SIZEOF(void *, 4)
23632364
AC_CHECK_SIZEOF(short, 2)
23642365
AC_CHECK_SIZEOF(float, 4)
23652366
AC_CHECK_SIZEOF(double, 8)
23662367
AC_CHECK_SIZEOF(fpos_t, 4)
23672368
AC_CHECK_SIZEOF(size_t, 4)
2369+
AC_CHECK_ALIGNOF(size_t)
23682370
AC_CHECK_SIZEOF(pid_t, 4)
23692371
AC_CHECK_SIZEOF(uintptr_t)
23702372

pyconfig.h.in

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@
1616
support for AIX C++ shared extension modules. */
1717
#undef AIX_GENUINE_CPLUSPLUS
1818

19+
/* The normal alignment of `long', in bytes. */
20+
#undef ALIGNOF_LONG
21+
22+
/* The normal alignment of `size_t', in bytes. */
23+
#undef ALIGNOF_SIZE_T
24+
1925
/* Alternative SOABI used in debug build to load C extensions built in release
2026
mode */
2127
#undef ALT_SOABI

0 commit comments

Comments
 (0)
0