diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..e354eaae --- /dev/null +++ b/.flake8 @@ -0,0 +1,2 @@ +[flake8] +select = F diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 00000000..a832af83 --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +array_api_tests/_version.py} export-subst diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 00000000..9b49c09b --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,16 @@ +name: Linting + +on: [push, pull_request] + +jobs: + build: + + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: "3.10" + - name: Run pre-commit hook + uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index fc9a37a8..00000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: Tests - -on: [push, pull_request] - -jobs: - build: - - runs-on: ubuntu-latest - strategy: - matrix: - python-version: [3.8, 3.9] - - steps: - - uses: actions/checkout@v1 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - # - name: Install dependencies - # run: | - # python -m pip install --upgrade pip - # pip install ... - - name: Lint with pyfalkes - run: | - pip install pyflakes - pyflakes . - # - name: Test with pytest - # run: | - # pip install pytest - # pytest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..2fab2072 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,34 @@ +name: Test Array API Strict + +on: [push, pull_request] + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.10", "3.11"] + + steps: + - name: Checkout array-api-tests + uses: actions/checkout@v1 + with: + submodules: 'true' + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v1 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install array-api-strict + python -m pip install -r requirements.txt + - name: Run the test suite + env: + ARRAY_API_TESTS_MODULE: array_api_strict + ARRAY_API_STRICT_API_VERSION: 2024.12 + run: | + pytest -v -rxXfE --skips-file array-api-strict-skips.txt array_api_tests/ + # We also have internal tests that isn't really necessary for adopters + pytest -v -rxXfE meta_tests/ diff --git a/.gitignore b/.gitignore index b6e47617..fc5b8b8a 100644 --- a/.gitignore +++ b/.gitignore @@ -117,6 +117,10 @@ venv.bak/ # Rope project settings .ropeproject +# IDE +.idea/ +.vscode/ + # mkdocs documentation /site @@ -127,3 +131,6 @@ dmypy.json # Pyre type checker .pyre/ + +# pytest-json-report +.report.json diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 00000000..c225c24e --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "array_api_tests/array-api"] + path = array-api + url = https://github.com/data-apis/array-api/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..a2ee60df --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,6 @@ +repos: +- repo: https://github.com/pycqa/flake8 + rev: '4.0.1' + hooks: + - id: flake8 + args: [--select, F] diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 00000000..f8c5a8c3 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include versioneer.py +include array_api_tests/_version.py} +include array_api_tests/_version.py diff --git a/README.md b/README.md index 0ff0bce0..fa17b763 100644 --- a/README.md +++ b/README.md @@ -1,132 +1,414 @@ -# Array API Standard Test Suite +# Test Suite for Array API Compliance -This is the test suite for the PyData Array APIs standard. +This is the test suite for array libraries adopting the [Python Array API +standard](https://data-apis.org/array-api/latest). -**NOTE: This test suite is still a work in progress.** +Keeping full coverage of the spec is an on-going priority as the Array API evolves. +Feedback and contributions are welcome! -Feedback and contributions are welcome, but be aware that this suite is not -yet completed. In particular, there are still many parts of the array API -specification that are not yet tested here. +## Quickstart -## Running the tests +### Setup -To run the tests, first install the testing dependencies +Currently we pin the Array API specification repo [`array-api`](https://github.com/data-apis/array-api/) +as a git submodule. This might change in the future to better support vendoring +use cases (see [#107](https://github.com/data-apis/array-api-tests/issues/107)), +but for now be sure submodules are pulled too, e.g. - pip install pytest hypothesis +```bash +$ git submodule update --init +``` -or +To run the tests, install the testing dependencies. - conda install pytest hypothesis +```bash +$ pip install -r requirements.txt +``` -as well as the array libraries that you want to test. To run the tests, you -need to set the array library that is to be tested. There are two ways to do -this. One way is to set the `ARRAY_API_TESTS_MODULE` environment variable. For -example +Ensure you have the array library that you want to test installed. - ARRAY_API_TESTS_MODULE=numpy pytest +### Specifying the array module -Alternately, edit the `array_api_tests/_array_module.py` file and change the -line +You need to specify the array library to test. It can be specified via the +`ARRAY_API_TESTS_MODULE` environment variable, e.g. -```py -array_module = None +```bash +$ export ARRAY_API_TESTS_MODULE=array_api_strict ``` -to +To specify a runtime-defined module, define `xp` using the `exec('...')` syntax: +```bash +$ export ARRAY_API_TESTS_MODULE="exec('import quantity_array, numpy; xp = quantity_array.quantity_namespace(numpy)')" ``` -import numpy as array_module + +Alternately, import/define the `xp` variable in `array_api_tests/__init__.py`. + +### Specifying the API version + +You can specify the API version to use when testing via the +`ARRAY_API_TESTS_VERSION` environment variable, e.g. + +```bash +$ export ARRAY_API_TESTS_VERSION="2023.12" ``` -(replacing `numpy` with the array module namespace to be tested). +Currently this defaults to the array module's `__array_api_version__` value, and +if that attribute doesn't exist then we fallback to `"2021.12"`. -## Notes on Interpreting Errors +### Run the suite -- Some tests cannot be run unless other tests pass first. This is because very - basic APIs such as certain array creation APIs are required for a large - fraction of the tests to run. TODO: Write which tests are required to pass - first here. +Simply run `pytest` against the `array_api_tests/` folder to run the full suite. -- If an error message involves `_UndefinedStub`, it means some name that is - required for the test to run is not defined in the array library. +```bash +$ pytest array_api_tests/ +``` -- Due to the nature of the array api spec, virtually every array library will - produce a large number of errors from nonconformance. It is still a work in - progress to enable reporting the errors in a way that makes them easy to - understand, even if there are a large number of them. +The suite tries to logically organise its tests. `pytest` allows you to only run +a specific test case, which is useful when developing functions. -- The spec documents are the ground source of truth. If the test suite appears - to be testing something that is different from the spec, or something that - isn't actually mentioned in the spec, this is a bug. [Please report - it](https://github.com/data-apis/array-api-tests/issues/new). Furthermore, - be aware that some aspects of the spec are either impossible or extremely - difficult to actually test, so they are not covered in the test suite (TODO: - list what these are). +```bash +$ pytest array_api_tests/test_creation_functions.py::test_zeros +``` -## Contributing +## What the test suite covers + +We are interested in array libraries conforming to the +[spec](https://data-apis.org/array-api/latest/API_specification/index.html). +Ideally this means that if a library has fully adopted the Array API, the test +suite passes. We take great care to _not_ test things which are out-of-scope, +so as to not unexpectedly fail the suite. + +### Primary tests + +Every function—including array object methods—has a respective test +method1. We use +[Hypothesis](https://hypothesis.readthedocs.io/en/latest/) +to generate a diverse set of valid inputs. This means array inputs will cover +different dtypes and shapes, as well as contain interesting elements. These +examples generate with interesting arrangements of non-array positional +arguments and keyword arguments. + +Each test case will cover the following areas if relevant: + +* **Smoking**: We pass our generated examples to all functions. As these + examples solely consist of *valid* inputs, we are testing that functions can + be called using their documented inputs without raising errors. + +* **Data type**: For functions returning/modifying arrays, we assert that output + arrays have the correct data types. Most functions + [type-promote](https://data-apis.org/array-api/latest/API_specification/type_promotion.html) + input arrays and some functions have bespoke rules—in both cases we simulate + the correct behaviour to find the expected data types. + +* **Shape**: For functions returning/modifying arrays, we assert that output + arrays have the correct shape. Most functions + [broadcast](https://data-apis.org/array-api/latest/API_specification/broadcasting.html) + input arrays and some functions have bespoke rules—in both cases we simulate + the correct behaviour to find the expected shapes. + +* **Values**: We assert output values (including the elements of + returned/modified arrays) are as expected. Except for manipulation functions + or special cases, the spec allows floating-point inputs to have inexact + outputs, so with such examples we only assert values are roughly as expected. + +### Additional tests -### Adding Tests +In addition to having one test case for each function, we test other properties +of the functions and some miscellaneous things. -It is important that every test in the test suite only uses APIs that are part -of the standard. This means that, for instance, when creating test arrays, you -should only use array creation functions that are part of the spec, such as -`ones` or `full`. It also means that many array testing functions that are -built-in to libraries like numpy are reimplemented in the test suite (see -`array_api_tests/pytest_helpers.py`, `array_api_tests/array_helpers.py`, and -`array_api_tests/hypothesis_helpers.py`). +* **Special cases**: For functions with special case behaviour, we assert that + these functions return the correct values. -In order to enforce this, the `array_api_tests._array_module` should be used -everywhere in place of the actual array module that is being tested. +* **Signatures**: We assert functions have the correct signatures. -### Hypothesis +* **Constants**: We assert that + [constants](https://data-apis.org/array-api/latest/API_specification/constants.html) + behave expectedly, are roughly the expected value, and that any related + functions interact with them correctly. -The test suite uses [Hypothesis](https://hypothesis.readthedocs.io/en/latest/) -to generate random input data. Any test that should be applied over all -possible array inputs should use hypothesis tests. Custom Hypothesis -strategies are in the `array_api_tests/hypothesis_helpers.py` file. +Be aware that some aspects of the spec are impractical or impossible to actually +test, so they are not covered in the suite. -### Parameterization +## Interpreting errors -Any test that applies over all functions in a module should use -`pytest.mark.parametrize` to parameterize over them. For example, +First and foremost, note that most tests have to assume that certain aspects of +the Array API have been correctly adopted, as fundamental APIs such as array +creation and equalities are hard requirements for many assertions. This means a +test case for one function might fail because another function has bugs or even +no implementation. -```py -from . import function_stubs +This means adopting libraries at first will result in a vast number of errors +due to cascading errors. Generally the nature of the spec means many granular +details such as type promotion is likely going to also fail nearly-conforming +functions. + +We hope to improve user experience in regards to "noisy" errors in +[#51](https://github.com/data-apis/array-api-tests/issues/51). For now, if an +error message involves `_UndefinedStub`, it means an attribute of the array +library (including functions) and it's objects (e.g. the array) is missing. + +The spec is the suite's source of truth. If the suite appears to assume +behaviour different from the spec, or test something that is not documented, +this is a bug—please [report such +issues](https://github.com/data-apis/array-api-tests/issues/) to us. + + +## Running on CI + +See our existing [GitHub Actions workflow for `array-api-strict`](https://github.com/data-apis/array-api-tests/blob/master/.github/workflows/test.yml) +for an example of using the test suite on CI. Note [`array-api-strict`](https://github.com/data-apis/array-api-strict) +is an implementation of the array API that uses NumPy under the hood. + +### Releases + +We recommend pinning against a [release tag](https://github.com/data-apis/array-api-tests/releases) +when running on CI. + +We use [calender versioning](https://calver.org/) for the releases. You should +expect that any version may be "breaking" compared to the previous one, in that +new tests (or improvements to existing tests) may cause a previously passing +library to fail. + +### Configuration + +#### Data-dependent shapes + +Use the `--disable-data-dependent-shapes` flag to skip testing functions which have +[data-dependent shapes](https://data-apis.org/array-api/latest/design_topics/data_dependent_output_shapes.html). + +#### Extensions + +By default, tests for the optional Array API extensions such as +[`linalg`](https://data-apis.org/array-api/latest/extensions/linear_algebra_functions.html) +will be skipped if not present in the specified array module. You can purposely +skip testing extension(s) via the `--disable-extension` option. + +#### Skip or XFAIL test cases + +Test cases you want to skip can be specified in a skips or XFAILS file. The +difference between skip and XFAIL is that XFAIL tests are still run and +reported as XPASS if they pass. + +By default, the skips and xfails files are `skips.txt` and `fails.txt` in the root +of this repository, but any file can be specified with the `--skips-file` and +`--xfails-file` command line flags. + +The files should list the test ids to be skipped/xfailed. Empty lines and +lines starting with `#` are ignored. The test id can be any substring of the +test ids to skip/xfail. -@pytest.mark.parametrize('name', function_stubs.__all__) -def test_whatever(name): - ... ``` +# skips.txt or xfails.txt +# Line comments can be denoted with the hash symbol (#) -will parameterize `test_whatever` over all the functions stubs generated from -the spec. Parameterization should be preferred over using Hypothesis whenever -there are a finite number of input possibilities, as this will cause pytest to -report failures for all input values separately, as opposed to Hypothesis -which will only report one failure. +# Skip specific test case, e.g. when argsort() does not respect relative order +# https://github.com/numpy/numpy/issues/20778 +array_api_tests/test_sorting_functions.py::test_argsort -### Error Strings +# Skip specific test case parameter, e.g. you forgot to implement in-place adds +array_api_tests/test_add[__iadd__(x1, x2)] +array_api_tests/test_add[__iadd__(x, s)] -Any assertion or exception should be accompanied with a useful error message. -The test suite is designed to be ran by people who are not familiar with the -test suite code, so the error messages should be self explanatory as to why -the module fails a given test. +# Skip module, e.g. when your set functions treat NaNs as non-distinct +# https://github.com/numpy/numpy/issues/20326 +array_api_tests/test_set_functions.py +``` -### Meta-errors +Here is an example GitHub Actions workflow file, where the xfails are stored +in `array-api-tests.xfails.txt` in the base of the `your-array-library` repo. + +If you want, you can use `-o xfail_strict=True`, which causes XPASS tests (XFAIL +tests that actually pass) to fail the test suite. However, be aware that +XFAILures can be flaky (see below, so this may not be a good idea unless you +use some other mitigation of such flakyness). + +If you don't want this behavior, you can remove it, or use `--skips-file` +instead of `--xfails-file`. + +```yaml +# ./.github/workflows/array_api.yml +jobs: + tests: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.8', '3.9', '3.10', '3.11'] + + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + path: your-array-library + + - name: Checkout array-api-tests + uses: actions/checkout@v3 + with: + repository: data-apis/array-api-tests + submodules: 'true' + path: array-api-tests + + - name: Run the array API test suite + env: + ARRAY_API_TESTS_MODULE: your.array.api.namespace + run: | + export PYTHONPATH="${GITHUB_WORKSPACE}/your-array-library" + cd ${GITHUB_WORKSPACE}/array-api-tests + pytest -v -rxXfE --ci --xfails-file ${GITHUB_WORKSPACE}/your-array-library/array-api-tests-xfails.txt array_api_tests/ +``` + +> **Warning** +> +> XFAIL tests that use Hypothesis (basically every test in the test suite except +> those in test_has_names.py) can be flaky, due to the fact that Hypothesis +> might not always run the test with an input that causes the test to fail. +> There are several ways to avoid this problem: +> +> - Increase the maximum number of examples, e.g., by adding `--max-examples +> 200` to the test command (the default is `20`, see below). This will +> make it more likely that the failing case will be found, but it will also +> make the tests take longer to run. +> - Don't use `-o xfail_strict=True`. This will make it so that if an XFAIL +> test passes, it will alert you in the test summary but will not cause the +> test run to register as failed. +> - Use skips instead of XFAILS. The difference between XFAIL and skip is that +> a skipped test is never run at all, whereas an XFAIL test is always run +> but ignored if it fails. +> - Save the [Hypothesis examples +> database](https://hypothesis.readthedocs.io/en/latest/database.html) +> persistently on CI. That way as soon as a run finds one failing example, +> it will always re-run future runs with that example. But note that the +> Hypothesis examples database may be cleared when a new version of +> Hypothesis or the test suite is released. + +#### Max examples + +The tests make heavy use +[Hypothesis](https://hypothesis.readthedocs.io/en/latest/). You can configure +how many examples are generated using the `--max-examples` flag, which +defaults to `20`. Lower values can be useful for quick checks, and larger +values should result in more rigorous runs. For example, `--max-examples +10_000` may find bugs where default runs don't but will take much longer to +run. + +#### Skipping Dtypes + +The test suite will automatically skip testing of inessential dtypes if they +are not present on the array module namespace, but dtypes can also be skipped +manually by setting the environment variable `ARRAY_API_TESTS_SKIP_DTYPES` to +a comma separated list of dtypes to skip. For example + +``` +ARRAY_API_TESTS_SKIP_DTYPES=uint16,uint32,uint64 pytest array_api_tests/ +``` + +Note that skipping certain essential dtypes such as `bool` and the default +floating-point dtype is not supported. + +#### Turning xfails into skips + +Keeping a large number of ``xfails`` can have drastic effects on the run time. This is due +to the way `hypothesis` works: when it detects a failure, it does a large amount +of work to simplify the failing example. +If the run time of the test suite becomes a problem, you can use the +``ARRAY_API_TESTS_XFAIL_MARK`` environment variable: setting it to ``skip`` skips the +entries from the ``xfail.txt`` file instead of xfailing them. Anecdotally, we saw +speed-ups by a factor of 4-5---which allowed us to use 4-5 larger values of +``--max-examples`` within the same time budget. + +#### Limiting the array sizes + +The test suite generates random arrays as inputs to functions it tests. "unvectorized" +tests iterate over elements of arrays, which might be slow. If the run time becomes +a problem, you can limit the maximum number of elements in generated arrays by +setting the environment variable ``ARRAY_API_TESTS_MAX_ARRAY_SIZE`` to the +desired value. By default, it is set to 1024. + + +## Contributing + +### Remain in-scope + +It is important that every test only uses APIs that are part of the standard. +For instance, when creating input arrays you should only use the [array creation +functions](https://data-apis.org/array-api/latest/API_specification/creation_functions.html) +that are documented in the spec. The same goes for testing arrays—you'll find +many utilities that parralel NumPy's own test utils in the `*_helpers.py` files. + +### Tools + +Hypothesis should almost always be used for the primary tests, and can be useful +elsewhere. Effort should be made so drawn arguments are labeled with their +respective names. For +[`st.data()`](https://hypothesis.readthedocs.io/en/latest/data.html#hypothesis.strategies.data), +draws should be accompanied with the `label` kwarg i.e. `data.draw(, +label=