@@ -158,31 +158,47 @@ def _repr_pretty_(self, p: "RepresentationPrinter", cycle: bool) -> None:
158
158
# The one case where this may be detrimental is fuzzing, where the throughput of
159
159
# examples is so high that it really may saturate important nodes. We'll cross
160
160
# that bridge when we come to it.
161
- MAX_CHILDREN_EFFECTIVELY_INFINITE : Final [int ] = 100_000
161
+ MAX_CHILDREN_EFFECTIVELY_INFINITE : Final [int ] = 10_000_000
162
162
163
163
164
164
def _count_distinct_strings (* , alphabet_size : int , min_size : int , max_size : int ) -> int :
165
165
# We want to estimate if we're going to have more children than
166
166
# MAX_CHILDREN_EFFECTIVELY_INFINITE, without computing a potentially
167
- # extremely expensive pow. We'll check if the number of strings in
168
- # the largest string size alone is enough to put us over this limit.
169
- # We'll also employ a trick of estimating against log, which is cheaper
170
- # than computing a pow.
171
- #
172
- # x = max_size
173
- # y = alphabet_size
174
- # n = MAX_CHILDREN_EFFECTIVELY_INFINITE
175
- #
176
- # x**y > n
177
- # <=> log(x**y) > log(n)
178
- # <=> y * log(x) > log(n)
179
- definitely_too_large = max_size * math .log (alphabet_size ) > math .log (
180
- MAX_CHILDREN_EFFECTIVELY_INFINITE
181
- )
182
- if definitely_too_large :
183
- return MAX_CHILDREN_EFFECTIVELY_INFINITE
167
+ # extremely expensive pow. We'll check the two extreme cases - if the
168
+ # number of strings in the largest string size alone is enough to put us
169
+ # over this limit (at alphabet_size >= 2), and if the variation in sizes
170
+ # (at alphabet_size == 1) is enough. If neither result in an early return,
171
+ # the exact result should be reasonably cheap to compute.
172
+ if alphabet_size == 0 :
173
+ # Special-case the empty string, avoid error in math.log(0).
174
+ return 1
175
+ elif alphabet_size == 1 :
176
+ # Special-case the constant alphabet, invalid in the geom-series sum.
177
+ return max_size - min_size + 1
178
+ else :
179
+ # Estimate against log, which is cheaper than computing a pow.
180
+ #
181
+ # m = max_size
182
+ # a = alphabet_size
183
+ # N = MAX_CHILDREN_EFFECTIVELY_INFINITE
184
+ #
185
+ # a**m > N
186
+ # <=> m * log(a) > log(N)
187
+ log_max_sized_children = max_size * math .log (alphabet_size )
188
+ if log_max_sized_children > math .log (MAX_CHILDREN_EFFECTIVELY_INFINITE ):
189
+ return MAX_CHILDREN_EFFECTIVELY_INFINITE
184
190
185
- return sum (alphabet_size ** k for k in range (min_size , max_size + 1 ))
191
+ # The sum of a geometric series is given by (ref: wikipedia):
192
+ # ᵐ∑ₖ₌₀ aᵏ = (aᵐ⁺¹ - 1) / (a - 1)
193
+ # = S(m) / S(0)
194
+ # assuming a != 1 and using the definition
195
+ # S(m) := aᵐ⁺¹ - 1.
196
+ # The sum we want, starting from a number n [0 <= n <= m] rather than zero, is
197
+ # ᵐ∑ₖ₌ₙ aᵏ = ᵐ∑ₖ₌₀ aᵏ - ⁿ⁻¹∑ₖ₌₀ aᵏ = S(m) / S(0) - S(n - 1) / S(0)
198
+ def S (n ):
199
+ return alphabet_size ** (n + 1 ) - 1
200
+
201
+ return (S (max_size ) - S (min_size - 1 )) // S (0 )
186
202
187
203
188
204
def compute_max_children (
@@ -226,16 +242,6 @@ def compute_max_children(
226
242
max_size = constraints ["max_size" ]
227
243
intervals = constraints ["intervals" ]
228
244
229
- if len (intervals ) == 0 :
230
- # Special-case the empty alphabet to avoid an error in math.log(0).
231
- # Only possibility is the empty string.
232
- return 1
233
-
234
- # avoid math.log(1) == 0 and incorrectly failing our effectively_infinite
235
- # estimate, even when we definitely are too large.
236
- if len (intervals ) == 1 and max_size > MAX_CHILDREN_EFFECTIVELY_INFINITE :
237
- return MAX_CHILDREN_EFFECTIVELY_INFINITE
238
-
239
245
return _count_distinct_strings (
240
246
alphabet_size = len (intervals ), min_size = min_size , max_size = max_size
241
247
)
0 commit comments