From 5b66b83668453d0a4808d02c6590e9e45cc53eb7 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sun, 31 Jul 2022 03:05:07 +0900 Subject: [PATCH 1/2] gh-91146: Reduce allocation size of list from str.split() --- ...2-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst | 1 + Objects/unicodeobject.c | 20 +++++++++---------- 2 files changed, 11 insertions(+), 10 deletions(-) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst new file mode 100644 index 00000000000000..fa36b2d0b5fafb --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst @@ -0,0 +1 @@ +Reduce allocation size of list from :meth:`str.split`. Patch by Dong-hee Na. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ad16ada16fe3bd..ab2cd440670bac 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9696,40 +9696,40 @@ split(PyObject *self, const void *buf1, *buf2; Py_ssize_t len1, len2; PyObject* out; - - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; + len1 = PyUnicode_GET_LENGTH(self); + kind1 = PyUnicode_KIND(self); + if (maxcount < 0) { + maxcount = len1; + } if (substring == NULL) - switch (PyUnicode_KIND(self)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) return asciilib_split_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); else return ucs1lib_split_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_2BYTE_KIND: return ucs2lib_split_whitespace( self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_4BYTE_KIND: return ucs4lib_split_whitespace( self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); default: Py_UNREACHABLE(); } - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); if (kind1 < kind2 || len1 < len2) { out = PyList_New(1); From ba39640b05bbc32050c4bff38df165542dd49669 Mon Sep 17 00:00:00 2001 From: Dong-hee Na Date: Sun, 31 Jul 2022 09:29:57 +0900 Subject: [PATCH 2/2] Apply to str.rsplit too --- ...2-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst | 3 ++- Objects/unicodeobject.c | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst index fa36b2d0b5fafb..52568dbedd1308 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2022-07-31-03-22-58.gh-issue-91146.Y2Hziy.rst @@ -1 +1,2 @@ -Reduce allocation size of list from :meth:`str.split`. Patch by Dong-hee Na. +Reduce allocation size of :class:`list` from :meth:`str.split` +and :meth:`str.rsplit`. Patch by Dong-hee Na. diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ab2cd440670bac..355d74fe3bbda7 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -9783,39 +9783,40 @@ rsplit(PyObject *self, Py_ssize_t len1, len2; PyObject* out; - if (maxcount < 0) - maxcount = PY_SSIZE_T_MAX; + len1 = PyUnicode_GET_LENGTH(self); + kind1 = PyUnicode_KIND(self); + if (maxcount < 0) { + maxcount = len1; + } if (substring == NULL) - switch (PyUnicode_KIND(self)) { + switch (kind1) { case PyUnicode_1BYTE_KIND: if (PyUnicode_IS_ASCII(self)) return asciilib_rsplit_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); else return ucs1lib_rsplit_whitespace( self, PyUnicode_1BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_2BYTE_KIND: return ucs2lib_rsplit_whitespace( self, PyUnicode_2BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); case PyUnicode_4BYTE_KIND: return ucs4lib_rsplit_whitespace( self, PyUnicode_4BYTE_DATA(self), - PyUnicode_GET_LENGTH(self), maxcount + len1, maxcount ); default: Py_UNREACHABLE(); } - kind1 = PyUnicode_KIND(self); kind2 = PyUnicode_KIND(substring); - len1 = PyUnicode_GET_LENGTH(self); len2 = PyUnicode_GET_LENGTH(substring); if (kind1 < kind2 || len1 < len2) { out = PyList_New(1);