8000 Gave this a facelift: "/" vs "//", whrandom vs random, etc. Boosted · python/cpython@8b6ec79 · GitHub
[go: up one dir, main page]

Skip to content

Commit 8b6ec79

Browse files
committed
Gave this a facelift: "/" vs "//", whrandom vs random, etc. Boosted
the default range to end at 2**20 (machines are much faster now). Fixed what was quite a arguably a bug, explaining an old mystery: the "!sort" case here contructs what *was* a quadratic-time disaster for the old quicksort implementation. But under the current samplesort, it always ran much faster than *sort (the random case). This never made sense. Turns out it was because !sort was sorting an integer array, while all the other cases sort floats; and comparing ints goes much quicker than comparing floats in Python. After changing !sort to chew on floats instead, it's now slower than the random sort case, which makes more sense (but is just a few percent slower; samplesort is massively less sensitive to "bad patterns" than quicksort).
1 parent 30d4896 commit 8b6ec79

File tree

1 file changed

+46
-35
lines changed

1 file changed

+46
-35
lines changed

Lib/test/sortperf.py

Lines changed: 46 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -10,20 +10,21 @@
1010
import random
1111
import marshal
1212
import tempfile
13-
import operator
1413
import os
1514

1615
td = tempfile.gettempdir()
1716

18-
def randrange(n):
19-
"""Return a random shuffle of range(n)."""
17+
def randfloats(n):
18+
"""Return a list of n random floats in [0, 1)."""
19+
# Generating floats is expensive, so this writes them out to a file in
20+
# a temp directory. If the file already exists, it just reads them
21+
# back in and shuffles them a bit.
2022
fn = os.path.join(td, "rr%06d" % n)
2123
try:
2224
fp = open(fn, "rb")
2325
except IOError:
24-
result = []
25-
for i in range(n):
26-
result.append(random.random())
26+
r = random.random
27+
result = [r() for i in xrange(n)]
2728
try:
2829
try:
2930
fp = open(fn, "wb")
@@ -41,26 +42,26 @@ def randrange(n):
4142
else:
4243
result = marshal.load(fp)
4344
fp.close()
44-
##assert len(result) == n
4545
# Shuffle it a bit...
4646
for i in range(10):
47-
i = random.randrange(0, n)
47+
i = random.randrange(n)
4848
temp = result[:i]
4949
del result[:i]
5050
temp.reverse()
51-
result[len(result):] = temp
51+
result.extend(temp)
5252
del temp
53+
assert len(result) == n
5354
return result
5455

55-
def fl():
56+
def flush():
5657
sys.stdout.flush()
5758

5859
def doit(L):
5960
t0 = time.clock()
6061
L.sort()
6162
t1 = time.clock()
6263
print "%6.2f" % (t1-t0),
63-
fl()
64+
flush()
6465

6566
def tabulate(r):
6667
"""Tabulate sort speed for lists of various sizes.
@@ -74,33 +75,50 @@ def tabulate(r):
7475
\sort: descending data
7576
/sort: ascending data
7677
~sort: many duplicates
77-
-sort: all equal
78+
=sort: all equal
7879
!sort: worst case scenario
7980
8081
"""
81-
cases = ("*sort", "\\sort", "/sort", "~sort", "-sort", "!sort")
82-
fmt = ("%2s %6s" + " %6s"*len(cases))
82+
cases = ("*sort", "\\sort", "/sort", "~sort", "=sort", "!sort")
83+
fmt = ("%2s %7s" + " %6s"*len(cases))
8384
print fmt % (("i", "2**i") + cases)
8485
for i in r:
85-
n = 1<<i
86-
L = randrange(n)
87-
##assert len(L) == n
88-
print "%2d %6d" % (i, n),
89-
fl()
86+
n = 1 << i
87+
L = randfloats(n)
88+
print "%2d %7d" % (i, n),
89+
flush()
9090
doit(L) # *sort
9191
L.reverse()
9292
doit(L) # \sort
9393
doit(L) # /sort
94+
95+
# Arrange for lots of duplicates.
9496
if n > 4:
9597
del L[4:]
96-
L = L*(n/4)
98+
L = L * (n // 4)
99+
# Force the elements to be distinct objects, else timings can be
100+
# artificially low.
97101
L = map(lambda x: --x, L)
98102
doit(L) # ~sort
99103
del L
100-
L = map(abs, [-0.5]*n)
101-
doit(L) # -sort
102-
L = range(n/2-1, -1, -1)
103-
L[len(L):] = range(n/2)
104+
105+
# All equal. Again, force the elements to be distinct objects.
106+
L = map(abs, [-0.5] * n)
107+
doit(L) # =sort
108+
del L
109+
110+
# This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case
111+
# for an older implementation of quicksort, which used the median
112+
# of the first, last and middle elements as the pivot. It's still
113+
# a worse-than-average case for samplesort, but on the order of a
114+
# measly 5% worse, not a quadratic-time disaster as it was with
115+
# quicksort.
116+
half = n // 2
117+
L = range(half - 1, -1, -1)
118+
L.extend(range(half))
119+
# Force to float, so that the timings are comparable. This is
120+
# significantly faster if we leave tham as ints.
121+
L = map(float, L)
104122
doit(L) # !sort
105123
print
106124

@@ -114,7 +132,7 @@ def main():
114132
"""
115133
# default range (inclusive)
116134
k1 = 15
117-
k2 = 19
135+
k2 = 20
118136
if sys.argv[1:]:
119137
# one argument: single point
120138
k1 = k2 = int(sys.argv[1])
@@ -123,17 +141,10 @@ def main():
123141
k2 = int(sys.argv[2])
124142
if sys.argv[3:]:
125143
# derive random seed from remaining arguments
126-
x, y, z = 0, 0, 0
144+
x = 1
127145
for a in sys.argv[3:]:
128-
h = hash(a)
129-
h, d = divmod(h, 256)
130-
h = h & 0xffffff
131-
x = (x^h^d) & 255
132-
h = h>>8
133-
y = (y^h^d) & 255
134-
h = h>>8
135-
z = (z^h^d) & 255
136-
whrandom.seed(x, y, z)
146+
x = 69069 * x + hash(a)
147+
random.seed(x)
137148
r = range(k1, k2+1) # include the end point
138149
tabulate(r)
139150

0 commit comments

Comments
 (0)
0