From abc4e5da646cbf502012d24da7f9264085296efc Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 13 Sep 2018 09:47:22 -0700 Subject: [PATCH 1/5] bpo-34656: Avoid relying on signed overflow in _pickle memos. --- Modules/_pickle.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 2de70f5d9405dc..d731b22e4c8659 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -650,8 +650,8 @@ typedef struct UnpicklerObject { /* The unpickler memo is just an array of PyObject *s. Using a dict is unnecessary, since the keys are contiguous ints. */ PyObject **memo; - Py_ssize_t memo_size; /* Capacity of the memo array */ - Py_ssize_t memo_len; /* Number of objects in the memo */ + size_t memo_size; /* Capacity of the memo array */ + size_t memo_len; /* Number of objects in the memo */ PyObject *pers_func; /* persistent_load() method, can be NULL. */ PyObject *pers_func_self; /* borrowed reference to self if pers_func @@ -821,22 +821,18 @@ _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key) /* Returns -1 on failure, 0 on success. */ static int -_PyMemoTable_ResizeTable(PyMemoTable *self, Py_ssize_t min_size) +_PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size) { PyMemoEntry *oldtable = NULL; PyMemoEntry *oldentry, *newentry; - Py_ssize_t new_size = MT_MINSIZE; + size_t new_size = MT_MINSIZE; Py_ssize_t to_process; assert(min_size > 0); /* Find the smallest valid table size >= min_size. */ - while (new_size < min_size && new_size > 0) + while (new_size < min_size) new_size <<= 1; - if (new_size <= 0) { - PyErr_NoMemory(); - return -1; - } /* new_size needs to be a power of two. */ assert((new_size & (new_size - 1)) == 0); @@ -1376,9 +1372,9 @@ _Unpickler_Readline(UnpicklerObject *self, char **result) /* Returns -1 (with an exception set) on failure, 0 on success. The memo array will be modified in place. */ static int -_Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size) +_Unpickler_ResizeMemoList(UnpicklerObject *self, size_t new_size) { - Py_ssize_t i; + size_t i; assert(new_size > self->memo_size); @@ -1397,9 +1393,9 @@ _Unpickler_ResizeMemoList(UnpicklerObject *self, Py_ssize_t new_size) /* Returns NULL if idx is out of bounds. */ static PyObject * -_Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx) +_Unpickler_MemoGet(UnpicklerObject *self, size_t idx) { - if (idx < 0 || idx >= self->memo_size) + if (idx >= self->memo_size) return NULL; return self->memo[idx]; @@ -1408,7 +1404,7 @@ _Unpickler_MemoGet(UnpicklerObject *self, Py_ssize_t idx) /* Returns -1 (with an exception set) on failure, 0 on success. This takes its own reference to `value`. */ static int -_Unpickler_MemoPut(UnpicklerObject *self, Py_ssize_t idx, PyObject *value) +_Unpickler_MemoPut(UnpicklerObject *self, size_t idx, PyObject *value) { PyObject *old_item; @@ -6843,7 +6839,7 @@ static PyObject * _pickle_UnpicklerMemoProxy_copy_impl(UnpicklerMemoProxyObject *self) /*[clinic end generated code: output=e12af7e9bc1e4c77 input=97769247ce032c1d]*/ { - Py_ssize_t i; + size_t i; PyObject *new_memo = PyDict_New(); if (new_memo == NULL) return NULL; @@ -6994,8 +6990,8 @@ static int Unpickler_set_memo(UnpicklerObject *self, PyObject *obj) { PyObject **new_memo; - Py_ssize_t new_memo_size = 0; - Py_ssize_t i; + size_t new_memo_size = 0; + size_t i; if (obj == NULL) { PyErr_SetString(PyExc_TypeError, @@ -7061,7 +7057,7 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj) error: if (new_memo_size) { i = new_memo_size; - while (--i >= 0) { + for (i = new_memo_size - 1; i != SIZE_MAX; i--) { Py_XDECREF(new_memo[i]); } PyMem_FREE(new_memo); From c7a73fc0d611c6bf2ce13317c0d1dd470f12f699 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Thu, 13 Sep 2018 21:11:24 -0700 Subject: [PATCH 2/5] delete redundant line --- Modules/_pickle.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index d731b22e4c8659..febd7e8da4e966 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -6991,7 +6991,6 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj) { PyObject **new_memo; size_t new_memo_size = 0; - size_t i; if (obj == NULL) { PyErr_SetString(PyExc_TypeError, @@ -7008,7 +7007,7 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj) if (new_memo == NULL) return -1; - for (i = 0; i < new_memo_size; i++) { + for (size_t i = 0; i < new_memo_size; i++) { Py_XINCREF(unpickler->memo[i]); new_memo[i] = unpickler->memo[i]; } @@ -7056,8 +7055,7 @@ Unpickler_set_memo(UnpicklerObject *self, PyObject *obj) error: if (new_memo_size) { - i = new_memo_size; - for (i = new_memo_size - 1; i != SIZE_MAX; i--) { + for (size_t i = new_memo_size - 1; i != SIZE_MAX; i--) { Py_XDECREF(new_memo[i]); } PyMem_FREE(new_memo); From d8a3e613d217a2cc4426317db54dcf4385e78bb4 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sat, 15 Sep 2018 09:41:53 -0700 Subject: [PATCH 3/5] do more arithmetic in size_t --- Modules/_pickle.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index febd7e8da4e966..9197db28f1d749 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -602,9 +602,9 @@ typedef struct { } PyMemoEntry; typedef struct { - Py_ssize_t mt_mask; - Py_ssize_t mt_used; - Py_ssize_t mt_allocated; + size_t mt_mask; + size_t mt_used; + size_t mt_allocated; PyMemoEntry *mt_table; } PyMemoTable; @@ -737,7 +737,6 @@ PyMemoTable_New(void) static PyMemoTable * PyMemoTable_Copy(PyMemoTable *self) { - Py_ssize_t i; PyMemoTable *new = PyMemoTable_New(); if (new == NULL) return NULL; @@ -754,7 +753,7 @@ PyMemoTable_Copy(PyMemoTable *self) PyErr_NoMemory(); return NULL; } - for (i = 0; i < self->mt_allocated; i++) { + for (size_t i = 0; i < self->mt_allocated; i++) { Py_XINCREF(self->mt_table[i].me_key); } memcpy(new->mt_table, self->mt_table, @@ -800,7 +799,7 @@ _PyMemoTable_Lookup(PyMemoTable *self, PyObject *key) { size_t i; size_t perturb; - size_t mask = (size_t)self->mt_mask; + size_t mask = self->mt_mask; PyMemoEntry *table = self->mt_table; PyMemoEntry *entry; Py_hash_t hash = (Py_hash_t)key >> 3; @@ -826,10 +825,15 @@ _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size) PyMemoEntry *oldtable = NULL; PyMemoEntry *oldentry, *newentry; size_t new_size = MT_MINSIZE; - Py_ssize_t to_process; + size_t to_process; assert(min_size > 0); + if (min_size > PY_SSIZE_T_MAX) { + PyErr_NoMemory(); + return -1; + } + /* Find the smallest valid table size >= min_size. */ while (new_size < min_size) new_size <<= 1; @@ -905,7 +909,8 @@ PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) * Very large memo tables (over 50K items) use doubling instead. * This may help applications with severe memory constraints. */ - if (!(self->mt_used * 3 >= (self->mt_mask + 1) * 2)) + size_t triple_used = self->mt_used * 3; + if (triple_used > self->mt_used && triple_used < self->mt_allocated * 2) return 0; return _PyMemoTable_ResizeTable(self, (self->mt_used > 50000 ? 2 : 4) * self->mt_used); @@ -4409,14 +4414,13 @@ static PyObject * _pickle_PicklerMemoProxy_copy_impl(PicklerMemoProxyObject *self) /*[clinic end generated code: output=bb83a919d29225ef input=b73043485ac30b36]*/ { - Py_ssize_t i; PyMemoTable *memo; PyObject *new_memo = PyDict_New(); if (new_memo == NULL) return NULL; memo = self->pickler->memo; - for (i = 0; i < memo->mt_allocated; ++i) { + for (size_t i = 0; i < memo->mt_allocated; ++i) { PyMemoEntry entry = memo->mt_table[i]; if (entry.me_key != NULL) { int status; From 853afa2d4cf9792f7e4ce23fce361a1f8763be65 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Sat, 15 Sep 2018 10:41:01 -0700 Subject: [PATCH 4/5] add braces --- Modules/_pickle.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 9197db28f1d749..4cb26b1ec0eb7d 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -835,8 +835,9 @@ _PyMemoTable_ResizeTable(PyMemoTable *self, size_t min_size) } /* Find the smallest valid table size >= min_size. */ - while (new_size < min_size) + while (new_size < min_size) { new_size <<= 1; + } /* new_size needs to be a power of two. */ assert((new_size & (new_size - 1)) == 0); @@ -910,8 +911,9 @@ PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) * This may help applications with severe memory constraints. */ size_t triple_used = self->mt_used * 3; - if (triple_used > self->mt_used && triple_used < self->mt_allocated * 2) + if (triple_used > self->mt_used && triple_used < self->mt_allocated * 2) { return 0; + } return _PyMemoTable_ResizeTable(self, (self->mt_used > 50000 ? 2 : 4) * self->mt_used); } From d8b389bdac725fc1f040848a32e9cf6dabcf6f85 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Wed, 19 Sep 2018 23:04:31 -0700 Subject: [PATCH 5/5] simplify & add a comment --- Modules/_pickle.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Modules/_pickle.c b/Modules/_pickle.c index 4cb26b1ec0eb7d..3588e33f097165 100644 --- a/Modules/_pickle.c +++ b/Modules/_pickle.c @@ -910,12 +910,12 @@ PyMemoTable_Set(PyMemoTable *self, PyObject *key, Py_ssize_t value) * Very large memo tables (over 50K items) use doubling instead. * This may help applications with severe memory constraints. */ - size_t triple_used = self->mt_used * 3; - if (triple_used > self->mt_used && triple_used < self->mt_allocated * 2) { + if (SIZE_MAX / 3 >= self->mt_used && self->mt_used * 3 < self->mt_allocated * 2) { return 0; } - return _PyMemoTable_ResizeTable(self, - (self->mt_used > 50000 ? 2 : 4) * self->mt_used); + // self->mt_used is always < PY_SSIZE_T_MAX, so this can't overflow. + size_t desired_size = (self->mt_used > 50000 ? 2 : 4) * self->mt_used; + return _PyMemoTable_ResizeTable(self, desired_size); } #undef MT_MINSIZE