8000 [3.12] gh-118164: Break a loop between _pydecimal and _pylong and opt… · python/cpython@a4812fd · GitHub
[go: up one dir, main page]

Skip to content

Commit a4812fd

Browse files
miss-islingtonserhiy-storchakatim-one
authored
[3.12] gh-118164: Break a loop between _pydecimal and _pylong and optimize int to str conversion (GH-118483) (GH-118590)
For converting large ints to strings, CPython invokes a function in _pylong.py, which uses the decimal module to implement an asymptotically waaaaay sub-quadratic algorithm. But if the C decimal module isn't available, CPython uses _pydecimal.py instead. Which in turn frequently does str(int). If the int is very large, _pylong ends up doing the work, which in turn asks decimal to do "big" arithmetic, which in turn calls str(big_int), which in turn ... it can become infinite mutual recursion. This change introduces a different int->str function that doesn't use decimal. It's asymptotically worse, "Karatsuba time" instead of quadratic time, so still a huge improvement. _pylong switches to that when the C decimal isn't available. It is also used for not too large integers (less than 450_000 bits), where it is faster (up to 2 times for 30_000 bits) than the asymptotically better implementation that uses the C decimal. (cherry picked from commit 711c80b) Co-authored-by: Serhiy Storchaka <storchaka@gmail.com> Co-authored-by: Tim Peters <tim.peters@gmail.com>
1 parent a81fe2a commit a4812fd

File tree

3 files changed

+70
-11
lines changed

3 files changed

+70
-11
lines changed

Lib/_pylong.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@
1414

1515
import re
1616
import decimal
17+
try:
18+
import _decimal
19+
except ImportError:
20+
_decimal = None
1721

1822

1923
def int_to_decimal(n):
@@ -82,7 +86,47 @@ def inner(n, w):
8286

8387
def int_to_decimal_string(n):
8488
"""Asymptotically fast conversion of an 'int' to a decimal string."""
85-
return str(int_to_decimal(n))
89+
w = n.bit_length()
90+
if w > 450_000 and _decimal is not None:
91+
# It is only usable with the C decimal implementation.
92+
# _pydecimal.py calls str() on very large integers, which in its
93+
# turn calls int_to_decimal_string(), causing very deep recursion.
94+
return str(int_to_decimal(n))
95+
96+
# Fallback algorithm for the case when the C decimal module isn't
97+
# available. This algorithm is asymptotically worse than the algorithm
98+
# using the decimal module, but better than the quadratic time
99+
# implementation in longobject.c.
100+
def inner(n, w):
101+
if w <= 1000:
102+
return str(n)
103+
w2 = w >> 1
104+
d = pow10_cache.get(w2)
105+
if d is None:
106+
d = pow10_cache[w2] = 5**w2 << w2 # 10**i = (5*2)**i = 5**i * 2**i
107+
hi, lo = divmod(n, d)
108+
return inner(hi, w - w2) + inner(lo, w2).zfill(w2)
109+
110+
# The estimation of the number of decimal digits.
111+
# There is no harm in small error. If we guess too large, there may
112+
# be leading 0's that need to be stripped. If we guess too small, we
113+
# may need to call str() recursively for the remaining highest digits,
114+
# which can still potentially be a large integer. This is manifested
115+
# only if the number has way more than 10**15 digits, that exceeds
116+
# the 52-bit physical address limit in both Intel64 and AMD64.
117+
w = int(w * 0.3010299956639812 + 1) # log10(2)
118+
pow10_cache = {}
119+
if n < 0:
120+
n = -n
121+
sign = '-'
122+
else:
123+
sign = ''
124+
s = inner(n, w)
125+
if s[0] == '0' and n:
126+
# If our guess of w is too large, there may be leading 0's that
127+
# need to be stripped.
128+
s = s.lstrip('0')
129+
return sign + s
86130

87131

88132
def _str_to_int_inner(s):

Lib/test/test_int.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -834,17 +834,28 @@ def tearDown(self):
834834
sys.set_int_max_str_digits(self._previous_limit)
835835
super().tearDown()
836836

837-
def test_pylong_int_to_decimal(self):
838-
n = (1 << 100_000) - 1
839-
suffix = '9883109375'
837+
def _test_pylong_int_to_decimal(self, n, suffix):
840838
s = str(n)
841-
assert s[-10:] == suffix
842-
s = str(-n)
843-
assert s[-10:] == suffix
844-
s = '%d' % n
845-
assert s[-10:] == suffix
846-
s = b'%d' % n
847-
assert s[-10:] == suffix.encode('ascii')
839+
self.assertEqual(s[-10:], suffix)
840+
s2 = str(-n)
841+
self.assertEqual(s2, '-' + s)
842+
s3 = '%d' % n
843+
self.assertEqual(s3, s)
844+
s4 = b'%d' % n
845+
self.assertEqual(s4, s.encode('ascii'))
846+
847+
def test_pylong_int_to_decimal(self):
848+
self._test_pylong_int_to_decimal((1 << 100_000), '9883109376')
849+
self._test_pylong_int_to_decimal((1 << 100_000) - 1, '9883109375')
850+
self._test_pylong_int_to_decimal(10**30_000, '0000000000')
851+
self._test_pylong_int_to_decimal(10**30_000 - 1, '9999999999')
852+
self._test_pylong_int_to_decimal(3**60_000, '9313200001')
853+
854+
@support.requires_resource('cpu')
855+
def test_pylong_int_to_decimal_2(self):
856+
self._test_pylong_int_to_decimal(2**1_000_000, '2747109376')
857+
self._test_pylong_int_to_decimal(10**300_000, '0000000000')
858+
self._test_pylong_int_to_decimal(3**600_000, '3132000001')
848859

849860
def test_pylong_int_divmod(self):
850861
n = (1 << 100_000)
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Break a loop between the Python implementation of the :mod:`decimal` module
2+
and the Python code for integer to string conversion. Also optimize integer
3+
to string conversion for values in the range from 9_000 to 135_000 decimal
4+
digits.

0 commit comments

Comments
 (0)
0