numpy · charris · Oct 10, 2020 · Sep 10, 2020
diff --git a/.gitignore b/.gitignore
@@ -182,6 +182,7 @@ benchmarks/results
 benchmarks/html
 benchmarks/env
 benchmarks/numpy
+benchmarks/_asv_compare.conf.json
 # cythonized files
 cythonize.dat
 numpy/random/_mtrand/_mtrand.c

diff --git a/benchmarks/asv_compare.conf.json.tpl b/benchmarks/asv_compare.conf.json.tpl
 @@ -0,0 +1,95 @@
+// This config file is almost similar to 'asv.conf.json' except it contains
+// custom tokens that can be substituted by 'runtests.py' and ASV,
+// due to the necessity to add custom build options when `--bench-compare`
+// is used.
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "numpy",
+
+    // The project's homepage
+    "project_url": "https://www.numpy.org/",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "..",
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "tip" (for mercurial).
+    "branches": ["HEAD"],
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv" or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "virtualenv",
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/numpy/numpy/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    "pythons": ["3.7"],
+
+    // The matrix of dependencies to test.  Each key is the name of a
+    // package (in PyPI) and the values are version numbers.  An empty
+    // list indicates to just test against the default (latest)
+    // version.
+    "matrix": {
+        "Cython": [],
+    },
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    "benchmark_dir": "benchmarks",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    // NOTE: changes dir name will requires update `generate_asv_config()` in
+    // runtests.py
+    "env_dir": "env",
+
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    "results_dir": "results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    "html_dir": "html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache wheels of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // number of builds to keep, per environment.
+    "build_cache_size": 8,
+
+    "build_command" : [
+        "python setup.py build {numpy_build_options}",
+        "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    ],
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // }
+}
diff --git a/runtests.py b/runtests.py
@@ -52,7 +52,7 @@
 
 
 import sys
-import os
+import os, glob
 
 # In case we are run from the source directory, we don't want to import the
 # project from there:
@@ -310,8 +310,16 @@ def main(argv):
             out = subprocess.check_output(['git', 'rev-parse', commit_a])
             commit_a = out.strip().decode('ascii')
 
+            # generate config file with the required build options
+            asv_cfpath = [
+                '--config', asv_compare_config(
+                    os.path.join(ROOT_DIR, 'benchmarks'), args,
+                    # to clear the cache if the user changed build options
+                    (commit_a, commit_b)
+                )
+            ]
             cmd = ['asv', 'continuous', '-e', '-f', '1.05',
-                   commit_a, commit_b] + bench_args
+                   commit_a, commit_b] + asv_cfpath + bench_args
             ret = subprocess.call(cmd, cwd=os.path.join(ROOT_DIR, 'benchmarks'))
             sys.exit(ret)
 
@@ -361,7 +369,6 @@ def main(argv):
     else:
         sys.exit(1)
 
-
 def build_project(args):
     """
     Build a dev version of the project.
@@ -491,6 +498,98 @@ def build_project(args):
 
     return site_dir, site_dir_noarch
 
+def asv_compare_config(bench_path, args, h_commits):
+    """
+    Fill the required build options through custom variable
+    'numpy_build_options' and return the generated config path.
+    """
+    conf_path = os.path.join(bench_path, "asv_compare.conf.json.tpl")
+    nconf_path = os.path.join(bench_path, "_asv_compare.conf.json")
+
+    # add custom build
+    build = []
+    if args.parallel > 1:
+        build += ["-j", str(args.parallel)]
+    if args.cpu_baseline:
+        build += ["--cpu-baseline", args.cpu_baseline]
+    if args.cpu_dispatch:
+        build += ["--cpu-dispatch", args.cpu_dispatch]
+    if args.disable_optimization:
+        build += ["--disable-optimization"]
+
+    is_cached = asv_substitute_config(conf_path, nconf_path,
+        numpy_build_options = ' '.join([f'\\"{v}\\"' for v in build]),
+    )
+    if not is_cached:
+        asv_clear_cache(bench_path, h_commits)
+    return nconf_path
+
+def asv_clear_cache(bench_path, h_commits, env_dir="env"):
+    """
+    Force ASV to clear the cache according to specified commit hashes.
+    """
+    # FIXME: only clear the cache from the current environment dir
+    asv_build_pattern = os.path.join(bench_path, env_dir, "*", "asv-build-cache")
+    for asv_build_cache in glob.glob(asv_build_pattern, recursive=True):
+        for c in h_commits:
+            try: shutil.rmtree(os.path.join(asv_build_cache, c))
+            except OSError: pass 
+
+def asv_substitute_config(in_config, out_config, **custom_vars):
+    """
+    A workaround to allow substituting custom tokens within
+    ASV configuration file since there's no official way to add custom
+    variables(e.g. env vars).
+
+    Parameters
+    ----------
+    in_config : str
+        The path of ASV configuration file, e.g. '/path/to/asv.conf.json'
+    out_config : str
+        The path of generated configuration file,
+        e.g. '/path/to/asv_substituted.conf.json'.
+
+    The other keyword arguments represent the custom variables.
+
+    Returns
+    -------
+    True(is cached) if 'out_config' is already generated with
+    the same '**custom_vars' and updated with latest 'in_config',
+    False otherwise.
+
+    Examples
+    --------
+    See asv_compare_config().
+    """
+    assert in_config != out_config
+    assert len(custom_vars) > 0
+
+    def sdbm_hash(*factors):
+        chash = 0
+        for f in factors:
+            for char in str(f):
+                chash  = ord(char) + (chash << 6) + (chash << 16) - chash
+                chash &= 0xFFFFFFFF
+        return chash
+
+    vars_hash = sdbm_hash(custom_vars, os.path.getmtime(in_config))
+    try:
+        with open(out_config, "r") as wfd:
+            hash_line = wfd.readline().split('hash:')
+            if len(hash_line) > 1 and int(hash_line[1]) == vars_hash:
+                return True
+    except IOError:
+        pass
+
+    custom_vars = {f'{{{k}}}':v for k, v in custom_vars.items()}
+    with open(in_config, "r") as rfd, open(out_config, "w") as wfd:
+        wfd.write(f"// hash:{vars_hash}\n")
+        wfd.write("// This file is automatically generated by runtests.py\n")
+        for line in rfd:
+            for key, val in custom_vars.items():
+                line = line.replace(key, val)
+            wfd.write(line)
+    return False
 
 #
 # GCOV support