8000 Add `list.sort` benchmarks by sobolevn · Pull Request #328 · python/pyperformance · GitHub
[go: up one dir, main page]

Skip to content

Add list.sort benchmarks #328

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Georg Brandl <georg@python.org>
James Abbatiello <abbeyj@gmail.com>
Jeffrey Yasskin <jyasskin@gmail.com>
Maciej Fijalkowski
Nikita Sobolev <mail@sobolevn.me>
Reid Klecker <mk@mit.edu>
Robert Grimm
Skip Montanaro
Expand Down
1 change: 1 addition & 0 deletions doc/benchmarks.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Available benchmark groups:
* ``startup``: Collection of microbenchmarks focused on Python interpreter
start-up time.
* ``template``: Templating libraries
* ``sorting``: Different sorting algorithms

Use the ``python3 -m pyperformance list_groups`` command to list groups and their
benchmarks.
Expand Down
10 changes: 10 additions & 0 deletions pyperformance/data-files/benchmarks/MANIFEST
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,15 @@ hexiom <local>
html5lib <local>
json_dumps <local>
json_loads <local>
list_sort <local>
list_sort_descending <local:list_sort>
list_sort_ascending <local:list_sort>
list_sort_ascending_exchanged <local:list_sort>
list_sort_ascending_random <local:list_sort>
list_sort_ascending_one_percent <local:list_sort>
list_sort_duplicates <local:list_sort>
list_sort_equal <local:list_sort>
list_sort_worst_case <local:list_sort>
logging <local>
mako <local>
mdp <local>
Expand Down Expand Up @@ -98,6 +107,7 @@ xml_etree <local>
#apps
#math
#template
#sorting


[group default]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[tool.pyperformance]
name = "list_sort_ascending"
tags = "sorting"
extra_opts = ["list_sort_ascending"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[tool.pyperformance]
name = "list_sort_ascending_exchanged"
tags = "sorting"
extra_opts = ["list_sort_ascending_exchanged"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[tool.pyperformance]
name = "list_sort_ascending_one_percent"
tags = "sorting"
extra_opts = ["list_sort_ascending_one_percent"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[tool.pyperformance]
name = "list_sort_ascending_random"
tags = "sorting"
extra_opts = ["list_sort_ascending_random"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[tool.pyperformance]
name = "list_sort_descending"
tags = "sorting"
extra_opts = ["list_sort_descending"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[tool.pyperformance]
name = "list_sort_duplicates"
tags = "sorting"
extra_opts = ["list_sort_duplicates"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[tool.pyperformance]
name = "list_sort_equal"
tags = "sorting"
extra_opts = ["list_sort_equal"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[tool.pyperformance]
name = "list_sort_worst_case"
tags = "sorting"
extra_opts = ["list_sort_worst_case"]
11 changes: 11 additions & 0 deletions pyperformance/data-files/benchmarks/bm_list_sort/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[project]
name = "pyperformance_bm_list_sort"
requires-python = ">=3.8"
dependencies = ["pyperf"]
urls = {repository = "https://github.com/python/pyperformance"}
dynamic = ["version"]

[tool.pyperformance]
name = "list_sort"
tags = "sorting"
extra_opts = ["list_sort"]
177 changes: 177 additions & 0 deletions pyperformance/data-files/benchmarks/bm_list_sort/run_benchmark.py
< F438 td class="blob-code blob-code-addition js-file-line">
< 71E8 td class="blob-code blob-code-addition js-file-line">
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
"""
List sort performance test.

Based on https://github.com/python/cpython/blob/963904335e579bfe39101adf3fd6a0cf705975ff/Lib/test/sortperf.py
"""

from __future__ import annotations

import time
import random

import pyperf


# ===============
# Data generation
# ===============

def _random_data(size: int, rand: random.Random) -> list[float]:
result = [rand.random() for _ in range(size)]
# Shuffle it a bit...
for i in range(10):
i = rand.randrange(size)
temp = result[:i]
del result[:i]
temp.reverse()
result.extend(temp)
del temp
assert len(result) == size
return result


def list_sort(size: int, rand: random.Random) -> list[float]:
return _random_data(size, rand)


def list_sort_descending(size: int, rand: random.Random) -> list[float]:
return list(reversed(list_sort_ascending(size, rand)))

def list_sort_ascending(size: int, rand: random.Random) -> list[float]:
return sorted(_random_data(size, rand))


def list_sort_ascending_exchanged(size: int, rand: random.Random) -> list[float]:
result = list_sort_ascending(size, rand)
# Do 3 random exchanges.
for _ in range(3):
i1 = rand.randrange(size)
i2 = rand.randrange(size)
result[i1], result[i2] = result[i2], result[i1]
return result


def list_sort_ascending_random(size: int, rand: random.Random) -> list[float]:
assert size >= 10, "This benchmark requires size to be >= 10"
result = list_sort_ascending(size, rand)
# Replace the last 10 with random floats.
result[-10:] = [rand.random() for _ in range(10)]
return result


def list_sort_ascending_one_percent(size: int, rand: random.Random) -> list[float]:
result = list_sort_ascending(size, rand)
# Replace 1% of the elements at random.
for _ in range(size // 100):
result[rand.randrange(size)] = rand.random()
return result


def list_sort_duplicates(size: int, rand: random.Random) -> list[float]:
assert size >= 4
result = list_sort_ascending(4, rand)
# Arrange for lots of duplicates.
result = result * (size // 4)
# Force the elements to be distinct objects, else timings can be
# artificially low.
return list(map(abs, result))


def list_sort_equal(size: int, rand: random.Random) -> list[float]:
# All equal. Again, force the elements to be distinct objects.
return list(map(abs, [-0.519012] * size))


def list_sort_worst_case(size: int, rand: random.Random) -> list[float]:
# This one looks like [3, 2, 1, 0, 0, 1, 2, 3]. It was a bad case
# for an older implementation of quicksort, which used the median
# of the first, last and middle elements as the pivot.
half = size // 2
result = list(range(half - 1, -1, -1))
result.extend(range(half))
# Force to float, so that the timings are comparable. This is
# significantly faster if we leave them as ints.
return list(map(float, result))


# =========
# Benchmark
# =========

class Benchmark:
def __init__(self, name: str, size: int, seed: int) -> None:
self._name = name
self._size = size
self._seed = seed
self._random = random.Random(self._seed)

def run(self, loops: int) -> float:
all_data = self._prepare_data(loops)
start = time.perf_counter()

for data in all_data:
data.sort() # Benching this method!

return time.perf_counter() - start

def _prepare_data(self, loops: int) -> list[float]:
bench = BENCHMARKS[self._name]
return [
bench(self._size, self._random)
for _ in range(loops)
]


def add_cmdline_args(cmd: list[str], args) -> None:
cmd.append(args.benchmark)
cmd.append(f"--size={args.size}")
cmd.append(f"--rng-seed={args.rng_seed}")


def add_parser_args(parser) -> None:
parser.add_argument(
"benchmark",
choices=BENCHMARKS,
default="list_sort",
help="Can be any of: {0}".format(", ".join(BENCHMARKS)),
)
parser.add_argument(
"--size",
type=int,
default=DEFAULT_SIZE,
help=f"Size of the lists to sort (default: {DEFAULT_SIZE})",
)
parser.add_argument(
"--rng-seed",
type=int,
default=DEFAULT_RANDOM_SEED,
help=f"Random number generator seed (default: {DEFAULT_RANDOM_SEED})",
)

DEFAULT_SIZE = 262144 # 1 << 18
DEFAULT_RANDOM_SEED = 0
BENCHMARKS = {
"list_sort": list_sort,
"list_sort_descending": list_sort_descending,
"list_sort_ascending": list_sort_ascending,
"list_sort_ascending_exchanged": list_sort_ascending_exchanged,
"list_sort_ascending_random": list_sort_ascending_random,
"list_sort_ascending_one_percent": list_sort_ascending_one_percent,
"list_sort_duplicates": list_sort_duplicates,
"list_sort_equal": list_sort_equal,
"list_sort_worst_case": list_sort_worst_case,
}

if __name__ == "__main__":
runner = pyperf.Runner(add_cmdline_args=add_cmdline_args)
add_parser_args(runner.argparser)
args = runner.parse_args()

runner.metadata["description"] = "Test `list.sort()` with different data"
runner.metadata["list_sort_size"] = args.size
runner.metadata["list_sort_random_seed"] = args.rng_seed

benchmark = Benchmark(args.benchmark, args.size, args.rng_seed)
runner.bench_time_func(args.benchmark, benchmark.run)
0