8000 py/objint: Fix int.to_bytes() buffer size checks. · micropython/micropython@908ab1c · GitHub
[go: up one dir, main page]

Skip to content

Commit 908ab1c

Browse files
projectgusdpgeorge
authored andcommitted
py/objint: Fix int.to_bytes() buffer size checks.
Fixes and improvements to `int.to_bytes()` are: - No longer overflows if byte size is 0 (closes #13041). - Raises OverflowError in any case where number won't fit into byte length (now matches CPython, previously MicroPython would return a truncated bytes object). - Document that `micropython int.to_bytes()` doesn't implement the optional signed kwarg, but will behave as if `signed=True` when the integer is negative (this is the current behaviour). Add tests for this also. Requires changes for small ints, MPZ large ints, and "long long" large ints. Adds a new set of unit tests for ints between 32 and 64 bits to increase coverage of "long long" large ints, which are otherwise untested. Tested on unix port (64 bit small ints, MPZ long ints) and Zephyr STM32WB board (32 bit small ints, long long large ints). This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton <angus@redyak.com.au>
1 parent d933210 commit 908ab1c

12 files changed

+302
-29
lines changed

docs/library/builtins.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,10 @@ Functions and types
8282
In MicroPython, `byteorder` parameter must be positional (this is
8383
compatible with CPython).
8484

85+
.. note:: The optional ``signed`` kwarg from CPython is not supported.
86+
MicroPython currently converts negative integers as signed,
87+
and positive as unsigned. (:ref:`Details <cpydiff_types_int_to_bytes>`.)
88+
8589
.. function:: isinstance()
8690

8791
.. function:: issubclass()

py/misc.h

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -343,13 +343,46 @@ static uint32_t mp_clz(uint32_t x) {
343343
return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0;
344344
}
345345

346+
static uint32_t mp_clzl(unsigned long x) {
347+
unsigned long lz = 0;
348+
return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0;
349+
}
350+
351+
#ifdef _WIN64
352+
static uint32_t mp_clzll(unsigned long long x) {
353+
unsigned long lz = 0;
354+
return _BitScanReverse64(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0;
355+
}
356+
#else
357+
// Microsoft don't ship _BitScanReverse64 on Win32, so emulate it
358+
static uint32_t mp_clzll(unsigned long long x) {
359+
unsigned long h = x >> 32;
360+
return h ? mp_clzl(h) : (mp_clzl(x) + 32);
361+
}
362+
#endif
363+
346364
static uint32_t mp_ctz(uint32_t x) {
347365
unsigned long tz = 0;
348366
return _BitScanForward(&tz, x) ? tz : 0;
349367
}
350368
#else
351369
#define mp_clz(x) __builtin_clz(x)
370+
#define mp_clzl(x) __builtin_clzl(x)
371+
#define mp_clzll(x) __builtin_clzll(x)
352372
#define mp_ctz(x) __builtin_ctz(x)
353373
#endif
354374

375+
// mp_int_t can be larger than long, i.e. Windows 64-bit, nan-box variants
376+
static inline uint32_t mp_clz_mpi(mp_int_t x) {
377+
MP_STATIC_ASSERT(sizeof(mp_int_t) == sizeof(long long)
378+
|| sizeof(mp_int_t) == sizeof(long));
379+
380+
// ugly, but should compile to single intrinsic unless O0 is set
381+
if (sizeof(mp_int_t) == sizeof(long)) {
382+
return mp_clzl(x);
383+
} else {
384+
return mp_clzll(x);
385+
}
386+
}
387+
355388
#endif // MICROPY_INCLUDED_PY_MISC_H

py/mpz.c

Lines changed: 19 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1589,7 +1589,7 @@ bool mpz_as_uint_checked(const mpz_t *i, mp_uint_t *value) {
15891589
return true;
15901590
}
15911591

1592-
void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
1592+
bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf) {
15931593
byte *b = buf;
15941594
if (big_endian) {
15951595
b += len;
@@ -1598,6 +1598,8 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
15981598
int bits = 0;
15991599
mpz_dbl_dig_t d = 0;
16001600
mpz_dbl_dig_t carry = 1;
1601+
size_t olen = len; // bytes in output buffer
1602+
bool ok = true;
16011603
for (size_t zlen = z->len; zlen > 0; --zlen) {
16021604
bits += DIG_SIZE;
16031605
d = (d << DIG_SIZE) | *zdig++;
@@ -1607,28 +1609,32 @@ void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf) {
16071609
val = (~val & 0xff) + carry;
16081610
carry = val >> 8;
16091611
}
1612+
1613+
if (!olen) {
1614+
// Buffer is full, only OK if all remaining bytes are zeroes
1615+
ok = ok && ((byte)val == 0);
1616+
continue;
1617+
}
1618+
16101619
if (big_endian) {
16111620
*--b = val;
1612-
if (b == buf) {
1613-
return;
1614-
}
16151621
} else {
16161622
*b++ = val;
1617-
if (b == buf + len) {
1618-
return;
1619-
}
16201623
}
1624+
olen--;
16211625
}
16221626
}
16231627

1624-
// fill remainder of buf with zero/sign extension of the integer
1625-
if (big_endian) {
1626-
len = b - buf;
1628+
if (as_signed && olen == 0 && len > 0) {
1629+
// If output exhausted then ensure there was enough space for the sign bit
1630+
byte most_sig = big_endian ? buf[0] : buf[len - 1];
1631+
ok = ok && (bool)(most_sig & 0x80) == (bool)z->neg;
16271632
} else {
1628-
len = buf + len - b;
1629-
buf = b;
1633+
// fill remainder of buf with zero/sign extension of the integer
1634+
memset(big_endian ? buf : b, z->neg ? 0xff : 0x00, olen);
16301635
}
1631-
memset(buf, z->neg ? 0xff : 0x00, len);
1636+
1637+
return ok;
16321638
}
16331639

16341640
#if MICROPY_PY_BUILTINS_FLOAT

py/mpz.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,9 @@ typedef int8_t mpz_dbl_dig_signed_t;
9393
typedef struct _mpz_t {
9494
// Zero has neg=0, len=0. Negative zero is not allowed.
9595
size_t neg : 1;
96-
size_t fixed_dig : 1;
97-
size_t alloc : (8 * sizeof(size_t) - 2);
98-
size_t len;
96+
size_t fixed_dig : 1; // flag, 'dig' buffer cannot be reallocated
97+
size_t alloc : (8 * sizeof(size_t) - 2); // number of entries allocated in 'dig'
98+
size_t len; // number of entries used in 'dig'
9999
mpz_dig_t *dig;
100100
} mpz_t;
101101

@@ -145,7 +145,8 @@ static inline size_t mpz_max_num_bits(const mpz_t *z) {
145145
mp_int_t mpz_hash(const mpz_t *z);
146146
bool mpz_as_int_checked(const mpz_t *z, mp_int_t *value);
147147
bool mpz_as_uint_checked(const mpz_t *z, mp_uint_t *value);
148-
void mpz_as_bytes(const mpz_t *z, bool big_endian, size_t len, byte *buf);
148+
// Returns true if 'z' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
149+
bool mpz_as_bytes(const mpz_t *z, bool big_endian, bool as_signed, size_t len, byte *buf);
149150
#if MICROPY_PY_BUILTINS_FLOAT
150151
mp_float_t mpz_as_float(const mpz_t *z);
151152
#endif

py/objint.c

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -421,29 +421,50 @@ static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_fro
421421
static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));
422422

423423
static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {
424-
// TODO: Support signed param (assumes signed=False)
424+
// TODO: Support signed (currently behaves as if signed=(val < 0))
425425
(void)n_args;
426+
bool overflow;
426427

427-
mp_int_t len = mp_obj_get_int(args[1]);
428-
if (len < 0) {
428+
mp_int_t dlen = mp_obj_get_int(args[1]);
429+
if (dlen < 0) {
429430
mp_raise_ValueError(NULL);
430431
}
431432
bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
432433

433434
vstr_t vstr;
434-
vstr_init_len(&vstr, len);
435+
vstr_init_len(&vstr, dlen);
435436
byte *data = (byte *)vstr.buf;
436-
memset(data, 0, len);
437437

438438
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
439439
if (!mp_obj_is_small_int(args[0])) {
440-
mp_obj_int_to_bytes_impl(args[0], big_endian, len, data);
440+
overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data);
441441
} else
442442
#endif
443443
{
444444
mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]);
445-
size_t l = MIN((size_t)len, sizeof(val));
446-
mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val);
445+
int slen = 0; // Number of bytes to represent val
446+
447+
// This logic has a twin in objint_longlong.c
448+
if (val > 0) {
449+
slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(val) + 7) / 8;
450+
} else if (val < -1) {
451+
slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(~val) + 8) / 8;
452+
} else {
453+
// clz of 0 is defined, so 0 and -1 map to 0 and 1
454+
slen = -val;
455+
}
456+
457+
if (slen <= dlen) {
458+
memset(data, val < 0 ? 0xFF : 0x00, dlen);
459+
mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val);
460+
overflow = false;
461+
} else {
462+
overflow = true;
463+
}
464+
}
465+
466+
if (overflow) {
467+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("buffer too small"));
447468
}
448469

449470
return mp_obj_new_bytes_from_vstr(&vstr);

py/objint.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
5555
int base, const char *prefix, char base_char, char comma);
5656
mp_int_t mp_obj_int_hash(mp_obj_t self_in);
5757
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
58-
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
58+
// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
59+
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
5960
int mp_obj_int_sign(mp_obj_t self_in);
6061
mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
6162
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);

py/objint_longlong.c

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,10 +57,27 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
5757
return mp_obj_new_int_from_ll(value);
5858
}
5959

60-
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
60+
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
6161
assert(mp_obj_is_exact_type(self_in, &mp_type_int));
6262
mp_obj_int_t *self = self_in;
6363
long long val = self->val;
64+
size_t slen; // Number of bytes to represent val
65+
66+
// This logic has a twin in objint.c
67+
if (val > 0) {
68+
slen = (sizeof(long long) * 8 - mp_clzll(val) + 7) / 8;
69+
} else if (val < -1) {
70+
slen = (sizeof(long long) * 8 - mp_clzll(~val) + 8) / 8;
71+
} else {
72+
// clz of 0 is defined, so 0 and -1 map to 0 and 1
73+
slen = -val;
74+
}
75+
76+
if (slen > len) {
77+
return false; // Would overflow
78+
// TODO: Determine whether to copy and truncate, as some callers probably expect this...?
79+
}
80+
6481
if (big_endian) {
6582
byte *b = buf + len;
6683
while (b > buf) {
@@ -73,6 +90,7 @@ void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byt
7390
val >>= 8;
7491
}
7592
}
93+
return true;
7694
}
7795

7896
int mp_obj_int_sign(mp_obj_t self_in) {

py/objint_mpz.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,10 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
112112
return MP_OBJ_FROM_PTR(o);
113113
}
114114

115-
void mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
115+
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
116116
assert(mp_obj_is_exact_type(self_in, &mp_type_int));
117117
mp_obj_int_t *self = MP_OBJ_TO_PTR(self_in);
118-
mpz_as_bytes(&self->mpz, big_endian, len, buf);
118+
return mpz_as_bytes(&self->mpz, big_endian, self->mpz.neg, len, buf);
119119
}
120120

121121
int mp_obj_int_sign(mp_obj_t self_in) {

tests/basics/int_bytes.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import sys
2+
13
print((10).to_bytes(1, "little"))
24
print((111111).to_bytes(4, "little"))
35
print((100).to_bytes(10, "little"))
@@ -20,3 +22,74 @@
2022
(1).to_bytes(-1, "little")
2123
except ValueError:
2224
print("ValueError")
25+
26+
# zero byte destination should also raise an error
27+
try:
28+
(1).to_bytes(0, "little")
29+
except OverflowError:
30+
print("OverflowError")
31+
32+
# except for converting 0 to a zero-length byte array
33+
print((0).to_bytes(0, "big"))
34+
35+
# byte length can fit the integer directly
36+
print((0xFF).to_bytes(1, "little"))
37+
print((0xFF).to_bytes(1, "big"))
38+
print((0xEFF).to_bytes(2, "little"))
39+
print((0xEFF).to_bytes(2, "big"))
40+
print((0xCDEFF).to_bytes(3, "little"))
41+
print((0xCDEFF).to_bytes(3, "big"))
42+
43+
# OverFlowError if not big enough
44+
45+
try:
46+
(0x123).to_bytes(1, "big")
47+
except OverflowError:
48+
print("OverflowError")
49+
50+
try:
51+
(0x12345).to_bytes(2, "big")
52+
except OverflowError:
53+
print("OverflowError")
54+
55+
try:
56+
(0x1234567).to_bytes(3, "big")
57+
except OverflowError:
58+
print("OverflowError")
59+
60+
61+
# negative representations
62+
63+
# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
64+
if "micropython" in repr(sys.implementation):
65+
66+
def to_bytes_compat(i, l, e):
67+
return i.to_bytes(l, e)
68+
else:
69+
# Implement MicroPython compatible behaviour for CPython
70+
def to_bytes_compat(i, l, e):
71+
return i.to_bytes(l, e, signed=i < 0)
72+
73+
74+
print(to_bytes_compat(-1, 1, "little"))
75+
print(to_bytes_compat(-1, 3, "little"))
76+
print(to_bytes_compat(-1, 1, "big"))
77+
print(to_bytes_compat(-1, 3, "big"))
78+
print(to_bytes_compat(-128, 1, "big"))
79+
print(to_bytes_compat(-32768, 2, "big"))
80+
print(to_bytes_compat(-(1 << 23), 3, "big"))
81+
82+
try:
83+
print(to_bytes_compat(-129, 1, "big"))
84+
except OverflowError:
85+
print("OverflowError")
86+
87+
try:
88+
print(to_bytes_compat(-32769, 2, "big"))
89+
except OverflowError:
90+
print("OverflowError")
91+
92+
try:
93+
print(to_bytes_compat(-(1 << 23) - 1, 2, "big"))
94+
except OverflowError:
95+
print("OverflowError")

tests/basics/int_bytes_int64.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import sys
2+
3+
# Depending on the port, the numbers in this test may be implemented as "small"
4+
# native 64 bit ints, arbitrary precision large ints, or large integers using 64-bit
5+
# long longs.
6+
7+
try:
8+
x = int.from_bytes(b"\x6F\xAB\xCD\x12\x34\x56\x78\xFB", "big")
9+
except OverflowError:
10+
print("SKIP") # Port can't represent this size of integer at all
11+
raise SystemExit
12+
13+
print(hex(x))
14+
b = x.to_bytes(8, "little")
15+
print(b)
16+
print(x.to_bytes(8, "big"))
17+
18+
# padding in output
19+
print(x.to_bytes(20, "little"))
20+
print(x.to_bytes(20, "big"))
21+
22+
# check that extra zero bytes don't change the internal int value
23+
print(int.from_bytes(b + bytes(10), "little") == x)
24+
25+
# can't write to a zero-length bytes object
26+
try:
27+
x.to_bytes(0, "little")
28+
except OverflowError:
29+
print("OverflowError")
30+
31+
# or one that it too short
32+
try:
33+
x.to_bytes(7, "big")
34+
except OverflowError:
35+
print("OverflowError")
36+
37+
# negative representations
38+
39+
# MicroPython int.to_bytes() behaves as if signed=True for negative numbers
40+
if "micropython" in repr(sys.implementation):
41+
42+
def to_bytes_compat(i, l, e):
43+
return i.to_bytes(l, e)
44+
else:
45+
# Implement MicroPython compatible behaviour for CPython
46+
def to_bytes_compat(i, l, e):
47+
return i.to_bytes(l, e, signed=i < 0)
48+
49+
50+
print(to_bytes_compat(-x, 8, "little"))
51+
print(to_bytes_compat(-x, 20, "big"))
52+
print(to_bytes_compat(-x, 20, "little"))

0 commit comments

Comments
 (0)
0