From 955019b55a31b4a03c76f62edfde5df2301282c4 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 15 Dec 2022 23:46:41 -0800 Subject: [PATCH 01/34] Cleanup ``import``s in ``adhoc/blosc_memleak_check.py`` (#408) * Include space for new release notes * Tidy `import`s in memleak script --- adhoc/blosc_memleak_check.py | 6 ++---- docs/release.rst | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/adhoc/blosc_memleak_check.py b/adhoc/blosc_memleak_check.py index 6f38967f..3a875449 100644 --- a/adhoc/blosc_memleak_check.py +++ b/adhoc/blosc_memleak_check.py @@ -1,13 +1,11 @@ import sys - -import numcodecs as codecs -from numcodecs import blosc +import numcodecs import numpy as np from numpy.testing import assert_array_equal -codec = codecs.Blosc() +codec = numcodecs.Blosc() data = np.arange(int(sys.argv[1])) for i in range(int(sys.argv[2])): enc = codec.encode(data) diff --git a/docs/release.rst b/docs/release.rst index 60159c7c..6f176b8c 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -7,6 +7,27 @@ Release notes # re-indented so that it does not show up in the notes. +.. _unreleased: + +Unreleased +---------- + +Enhancements +~~~~~~~~~~~~ + +* + +Fix +~~~ + +* + +Maintenance +~~~~~~~~~~~ + +* Cleanup ``import``s in ``adhoc/blosc_memleak_check.py`` + By :user:`John Kirkham `, :issue:`408`. + .. _release_0.11.0: 0.11.0 From 4f2a2e326244d69719d3d8bb7640ca36b48d3f19 Mon Sep 17 00:00:00 2001 From: Ben Greiner Date: Fri, 13 Jan 2023 20:42:32 +0100 Subject: [PATCH 02/34] Enforce dtype=object for incompatible numpy array conversion (#417) * use dtype=object for sequences first not accepted by np.asarray * parameterize test_non_numpy_inputs --- numcodecs/json.py | 5 +++- numcodecs/msgpacks.py | 5 +++- numcodecs/tests/test_json.py | 36 +++++++++++++------------ numcodecs/tests/test_msgpacks.py | 45 +++++++++++++++++--------------- 4 files changed, 51 insertions(+), 40 deletions(-) diff --git a/numcodecs/json.py b/numcodecs/json.py index 670f2235..b803a77b 100644 --- a/numcodecs/json.py +++ b/numcodecs/json.py @@ -54,7 +54,10 @@ def __init__(self, encoding='utf-8', skipkeys=False, ensure_ascii=True, self._decoder = _json.JSONDecoder(**self._decoder_config) def encode(self, buf): - buf = np.asarray(buf) + try: + buf = np.asarray(buf) + except ValueError: + buf = np.asarray(buf, dtype=object) items = buf.tolist() items.extend((buf.dtype.str, buf.shape)) return self._encoder.encode(items).encode(self._text_encoding) diff --git a/numcodecs/msgpacks.py b/numcodecs/msgpacks.py index 026f583a..65564984 100644 --- a/numcodecs/msgpacks.py +++ b/numcodecs/msgpacks.py @@ -52,7 +52,10 @@ def __init__(self, use_single_float=False, use_bin_type=True, raw=False): self.raw = raw def encode(self, buf): - buf = np.asarray(buf) + try: + buf = np.asarray(buf) + except ValueError: + buf = np.asarray(buf, dtype=object) items = buf.tolist() items.extend((buf.dtype.str, buf.shape)) return msgpack.packb(items, use_bin_type=self.use_bin_type, diff --git a/numcodecs/tests/test_json.py b/numcodecs/tests/test_json.py index 7e8fcd64..8dac2b41 100644 --- a/numcodecs/tests/test_json.py +++ b/numcodecs/tests/test_json.py @@ -2,7 +2,7 @@ import numpy as np - +import pytest from numcodecs.json import JSON from numcodecs.tests.common import (check_config, check_repr, check_encode_decode_array, @@ -53,21 +53,23 @@ def test_backwards_compatibility(): check_backwards_compatibility(JSON.codec_id, arrays, codecs) -def test_non_numpy_inputs(): +@pytest.mark.parametrize( + "input_data, dtype", + [ + ([0, 1], None), + ([[0, 1], [2, 3]], None), + ([[0], [1], [2, 3]], object), + ([[[0, 0]], [[1, 1]], [[2, 3]]], None), + (["1"], None), + (["11", "11"], None), + (["11", "1", "1"], None), + ([{}], None), + ([{"key": "value"}, ["list", "of", "strings"]], object), + ] +) +def test_non_numpy_inputs(input_data, dtype): # numpy will infer a range of different shapes and dtypes for these inputs. # Make sure that round-tripping through encode preserves this. - data = [ - [0, 1], - [[0, 1], [2, 3]], - [[0], [1], [2, 3]], - [[[0, 0]], [[1, 1]], [[2, 3]]], - ["1"], - ["11", "11"], - ["11", "1", "1"], - [{}], - [{"key": "value"}, ["list", "of", "strings"]], - ] - for input_data in data: - for codec in codecs: - output_data = codec.decode(codec.encode(input_data)) - assert np.array_equal(np.array(input_data), output_data) + for codec in codecs: + output_data = codec.decode(codec.encode(input_data)) + assert np.array_equal(np.array(input_data, dtype=dtype), output_data) diff --git a/numcodecs/tests/test_msgpacks.py b/numcodecs/tests/test_msgpacks.py index 6aeadcf0..d76aa125 100644 --- a/numcodecs/tests/test_msgpacks.py +++ b/numcodecs/tests/test_msgpacks.py @@ -2,6 +2,7 @@ import numpy as np +import pytest try: @@ -52,30 +53,32 @@ def test_backwards_compatibility(): check_backwards_compatibility(codec.codec_id, arrays, [codec]) -def test_non_numpy_inputs(): +@pytest.mark.parametrize( + "input_data, dtype", + [ + ([0, 1], None), + ([[0, 1], [2, 3]], None), + ([[0], [1], [2, 3]], object), + ([[[0, 0]], [[1, 1]], [[2, 3]]], None), + (["1"], None), + (["11", "11"], None), + (["11", "1", "1"], None), + ([{}], None), + ([{"key": "value"}, ["list", "of", "strings"]], object), + ([b"1"], None), + ([b"11", b"11"], None), + ([b"11", b"1", b"1"], None), + ([{b"key": b"value"}, [b"list", b"of", b"strings"]], object), + ] +) +def test_non_numpy_inputs(input_data, dtype): codec = MsgPack() # numpy will infer a range of different shapes and dtypes for these inputs. # Make sure that round-tripping through encode preserves this. - data = [ - [0, 1], - [[0, 1], [2, 3]], - [[0], [1], [2, 3]], - [[[0, 0]], [[1, 1]], [[2, 3]]], - ["1"], - ["11", "11"], - ["11", "1", "1"], - [{}], - [{"key": "value"}, ["list", "of", "strings"]], - [b"1"], - [b"11", b"11"], - [b"11", b"1", b"1"], - [{b"key": b"value"}, [b"list", b"of", b"strings"]], - ] - for input_data in data: - actual = codec.decode(codec.encode(input_data)) - expect = np.array(input_data) - assert expect.shape == actual.shape - assert np.array_equal(expect, actual) + actual = codec.decode(codec.encode(input_data)) + expect = np.array(input_data, dtype=dtype) + assert expect.shape == actual.shape + assert np.array_equal(expect, actual) def test_encode_decode_shape_dtype_preserved(): From 67ede4c6b4f1707f1da18351945e10904a5572de Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Sun, 15 Jan 2023 13:02:57 -0500 Subject: [PATCH 03/34] implement fletcher32 (#412) * implement fletcher32 * Update numcodecs/fletcher32.pyx Co-authored-by: Ryan Abernathey * Add docstring and erorr test * Use HDF C impl * Remove unused, add docstrings * to runtime and int test * to cython * Update numcodecs/fletcher32.pyx Co-authored-by: Ryan Abernathey * Add docs Co-authored-by: Ryan Abernathey --- docs/checksum32.rst | 11 ++++ docs/release.rst | 3 +- numcodecs/__init__.py | 3 ++ numcodecs/fletcher32.pyx | 85 ++++++++++++++++++++++++++++++ numcodecs/tests/test_fletcher32.py | 42 +++++++++++++++ setup.py | 28 +++++++++- 6 files changed, 170 insertions(+), 2 deletions(-) create mode 100644 numcodecs/fletcher32.pyx create mode 100644 numcodecs/tests/test_fletcher32.py diff --git a/docs/checksum32.rst b/docs/checksum32.rst index 1d5522e2..5e682afc 100644 --- a/docs/checksum32.rst +++ b/docs/checksum32.rst @@ -22,3 +22,14 @@ Adler32 .. automethod:: decode .. automethod:: get_config .. automethod:: from_config + + +Fletcher32 +---------- + +.. autoclass:: numcodecs.fletcher32.Fletcher32 + + .. autoattribute:: codec_id + .. automethod:: encode + .. automethod:: decode + diff --git a/docs/release.rst b/docs/release.rst index 6f176b8c..90d62750 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -15,7 +15,8 @@ Unreleased Enhancements ~~~~~~~~~~~~ -* +* Add ``fletcher32`` checksum codec + By :user:`Martin Durant `, :issue:`410`. Fix ~~~ diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index 53f3e795..1e3c8536 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -111,3 +111,6 @@ register_codec(VLenUTF8) register_codec(VLenBytes) register_codec(VLenArray) + +from numcodecs.fletcher32 import Fletcher32 +register_codec(Fletcher32) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx new file mode 100644 index 00000000..02f9319c --- /dev/null +++ b/numcodecs/fletcher32.pyx @@ -0,0 +1,85 @@ +# cython: language_level=3 +# cython: overflowcheck=False +# cython: cdivision=True +import struct + +from numcodecs.abc import Codec +from numcodecs.compat import ensure_contiguous_ndarray + +from libc.stdint cimport uint8_t, uint16_t, uint32_t + + +cdef uint32_t _fletcher32(const uint8_t[::1] _data): + # converted from + # https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L109 + cdef: + const uint8_t *data = &_data[0] + size_t _len = _data.shape[0] + size_t len = _len / 2 + size_t tlen + uint32_t sum1 = 0, sum2 = 0; + + + while len: + tlen = 360 if len > 360 else len + len -= tlen + while True: + sum1 += ((data[0]) << 8) | (data[1]) + data += 2 + sum2 += sum1 + tlen -= 1 + if tlen < 1: + break + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + if _len % 2: + sum1 += (((data[0])) << 8) + sum2 += sum1 + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + sum1 = (sum1 & 0xffff) + (sum1 >> 16) + sum2 = (sum2 & 0xffff) + (sum2 >> 16) + + return (sum2 << 16) | sum1 + + +class Fletcher32(Codec): + """The fletcher checksum with 16-bit words and 32-bit output + + This is the netCDF4/HED5 implementation, which is not equivalent + to the one in wikipedia + https://github.com/Unidata/netcdf-c/blob/main/plugins/H5checksum.c#L95 + + With this codec, the checksum is concatenated on the end of the data + bytes when encoded. At decode time, the checksum is performed on + the data portion and compared with the four-byte checksum, raising + RuntimeError if inconsistent. + """ + + codec_id = "fletcher32" + + def encode(self, buf): + """Return buffer plus 4-byte fletcher checksum""" + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') + cdef const uint8_t[::1] b_ptr = buf + val = _fletcher32(b_ptr) + return buf.tobytes() + struct.pack(" Date: Tue, 24 Jan 2023 10:41:34 +0100 Subject: [PATCH 04/34] Bump pypa/cibuildwheel from 2.11.3 to 2.12.0 (#419) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.11.3 to 2.12.0. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.11.3...v2.12.0) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/wheel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index 39ed5353..a3f25b21 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -22,7 +22,7 @@ jobs: with: submodules: true - - uses: pypa/cibuildwheel@v2.11.3 + - uses: pypa/cibuildwheel@v2.12.0 - uses: actions/upload-artifact@v3 with: From 2ad7a129e196b40bcbb389d6ec2b06bb5f169a81 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 2 Feb 2023 16:43:29 +0100 Subject: [PATCH 05/34] =?UTF-8?q?http://=20=E2=86=92=20https://=20(#415)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update links in the process. --- .github/CONTRIBUTING.md | 2 +- CODE_OF_CONDUCT.md | 6 +++--- README.rst | 2 +- docs/index.rst | 9 ++++----- docs/release.rst | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index bccd9160..9c117cc3 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -1,4 +1,4 @@ Contributing ============ -Please see the [project documentation](http://numcodecs.readthedocs.io/en/stable/contributing.html) for information about contributing to NumCodecs. +Please see the [project documentation](https://numcodecs.readthedocs.io/en/stable/contributing.html) for information about contributing to NumCodecs. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md index 93175dd6..b7395b1c 100644 --- a/CODE_OF_CONDUCT.md +++ b/CODE_OF_CONDUCT.md @@ -40,7 +40,7 @@ Project maintainers who do not follow or enforce the Code of Conduct in good fai ## Attribution -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] +This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [https://www.contributor-covenant.org/version/1/4/code-of-conduct/][version] -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ +[homepage]: https://www.contributor-covenant.org/ +[version]: https://www.contributor-covenant.org/version/1/4/code-of-conduct/ diff --git a/README.rst b/README.rst index 9850c2e7..c9e58f52 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ Numcodecs is a Python package providing buffer compression and transformation codecs for use in data storage and communication applications. .. image:: https://readthedocs.org/projects/numcodecs/badge/?version=latest - :target: http://numcodecs.readthedocs.io/en/latest/?badge=latest + :target: https://numcodecs.readthedocs.io/en/latest/?badge=latest .. image:: https://github.com/zarr-developers/numcodecs/workflows/Linux%20CI/badge.svg?branch=main :target: https://github.com/zarr-developers/numcodecs/actions?query=workflow%3A%22Linux+CI%22 diff --git a/docs/index.rst b/docs/index.rst index db9f935d..6b2169d3 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,9 +11,8 @@ Installation ------------ Numcodecs depends on NumPy. It is generally best to `install NumPy -`_ first using -whatever method is most appropriate for you operating system and -Python distribution. +`_ first using whatever method is most +appropriate for you operating system and Python distribution. Install from PyPI:: @@ -105,7 +104,7 @@ documentation, code reviews, comments and/or ideas: Numcodecs bundles the `c-blosc `_ library. Development of this package is supported by the -`MRC Centre for Genomics and Global Health `_. +`MRC Centre for Genomics and Global Health `_. Indices and tables ------------------ @@ -114,4 +113,4 @@ Indices and tables * :ref:`modindex` * :ref:`search` -.. _Blosc: http://www.blosc.org/ +.. _Blosc: https://www.blosc.org/ diff --git a/docs/release.rst b/docs/release.rst index 90d62750..3f2394b5 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -691,7 +691,7 @@ Fixed project description in setup.py. ----- First release. This version is a port of the ``codecs`` module from `Zarr -`_ 2.1.0. The following changes have been made from +`_ 2.1.0. The following changes have been made from the original Zarr module: * Codec classes have been re-organized into separate modules, mostly one per From 8d2bac09ccb4b75111db18d76c03f33812203f9c Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Sun, 7 May 2023 12:41:59 +0200 Subject: [PATCH 06/34] Temporarily pin down Sphinx to work around RTD bug (#434) * https://github.com/readthedocs/readthedocs.org/issues/10279 * https://pullanswer.com/questions/project-fails-to-build-with-sphinx-7-0-0-jinja2-exceptions-undefinederror-style-is-undefined --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 21beb3f7..5d6f18fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ Homepage = "https://github.com/zarr-developers/numcodecs" [project.optional-dependencies] docs = [ - "sphinx", + "sphinx<7.0.0", "sphinx-issues", "numpydoc", "mock", From 35b6631a3699977d83b81be9d0186a945bbe8ba5 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Sun, 7 May 2023 14:46:34 +0200 Subject: [PATCH 07/34] Merge comparisons with `in` (#432) Co-authored-by: Josh Moore --- numcodecs/tests/test_zfpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/tests/test_zfpy.py b/numcodecs/tests/test_zfpy.py index 7766b059..c72b7e27 100644 --- a/numcodecs/tests/test_zfpy.py +++ b/numcodecs/tests/test_zfpy.py @@ -49,7 +49,7 @@ def test_encode_decode(): for arr in arrays: - if arr.dtype == np.int32 or arr.dtype == np.int64: + if arr.dtype in (np.int32, np.int64): codec = [codecs[-1]] else: codec = codecs From 4f194b661faf1f945cb21b1ac1f19c4314ff8f05 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 31 May 2023 10:11:37 +0200 Subject: [PATCH 08/34] Remove Python 2 leftover (#438) UTF-8 is the default encoding of sources in Python 3. --- docs/conf.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index e2922fef..fba84bc7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # numcodecs documentation build configuration file, created by # sphinx-quickstart on Mon May 2 21:40:09 2016. From 774673763a0676a39ea985566fdd8c79f9d97f51 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Tue, 18 Jul 2023 19:43:18 -0400 Subject: [PATCH 09/34] Fix Makefile when sphinx is not available (#451) * Fix Makefile when sphinx is not available * Add release notes --- docs/Makefile | 63 +++++++++++++++++++++++++----------------------- docs/release.rst | 2 +- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index fe6a0bc4..19bcf031 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -7,10 +7,12 @@ SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build -# User-friendly check for sphinx-build -ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) - $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) -endif +.PHONY: sphinx +sphinx: + # User-friendly check for sphinx-build + ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) + $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) + endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 @@ -54,44 +56,44 @@ clean: rm -rf $(BUILDDIR)/* .PHONY: html -html: +html: sphinx $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." .PHONY: dirhtml -dirhtml: +dirhtml: sphinx $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." .PHONY: singlehtml -singlehtml: +singlehtml: sphinx $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." .PHONY: pickle -pickle: +pickle: sphinx $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." .PHONY: json -json: +json: sphinx $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." .PHONY: htmlhelp -htmlhelp: +htmlhelp: sphinx $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." .PHONY: qthelp -qthelp: +qthelp: sphinx $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ @@ -101,7 +103,7 @@ qthelp: @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zarr.qhc" .PHONY: applehelp -applehelp: +applehelp: sphinx $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp @echo @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." @@ -110,7 +112,7 @@ applehelp: "bundle." .PHONY: devhelp -devhelp: +devhelp: sphinx $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @@ -120,19 +122,19 @@ devhelp: @echo "# devhelp" .PHONY: epub -epub: +epub: sphinx $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." .PHONY: epub3 -epub3: +epub3: sphinx $(SPHINXBUILD) -b epub3 $(ALLSPHINXOPTS) $(BUILDDIR)/epub3 @echo @echo "Build finished. The epub3 file is in $(BUILDDIR)/epub3." .PHONY: latex -latex: +latex: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @@ -140,33 +142,33 @@ latex: "(use \`make latexpdf' here to do that automatically)." .PHONY: latexpdf -latexpdf: +latexpdf: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: latexpdfja -latexpdfja: +latexpdfja: sphinx $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." .PHONY: text -text: +text: sphinx $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." .PHONY: man -man: +man: sphinx $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." .PHONY: texinfo -texinfo: +texinfo: sphinx $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @@ -174,57 +176,58 @@ texinfo: "(use \`make info' here to do that automatically)." .PHONY: info -info: +info: sphinx $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." .PHONY: gettext -gettext: +gettext: sphinx $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." .PHONY: changes -changes: +changes: sphinx $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." .PHONY: linkcheck -linkcheck: +linkcheck: sphinx $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." .PHONY: doctest -doctest: +doctest: sphinx $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." .PHONY: coverage -coverage: +coverage: sphinx $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage @echo "Testing of coverage in the sources finished, look at the " \ "results in $(BUILDDIR)/coverage/python.txt." .PHONY: xml -xml: +xml: sphinx $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." .PHONY: pseudoxml -pseudoxml: +pseudoxml: sphinx $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." .PHONY: dummy -dummy: +dummy: sphinx $(SPHINXBUILD) -b dummy $(ALLSPHINXOPTS) $(BUILDDIR)/dummy @echo @echo "Build finished. Dummy builder generates no files." + diff --git a/docs/release.rst b/docs/release.rst index 3f2394b5..f9a280bf 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -21,7 +21,7 @@ Enhancements Fix ~~~ -* +* Fixed docs/Makefile error message when sphinx is not present Maintenance ~~~~~~~~~~~ From 948da6da82a5e1119efde342d405f29acd075ce6 Mon Sep 17 00:00:00 2001 From: Peter Sobolewski <76622105+psobolewskiPhD@users.noreply.github.com> Date: Tue, 18 Jul 2023 22:55:15 -0400 Subject: [PATCH 10/34] Use concurrency to cancel-in-progress workflows on new commit (#427) --- .github/workflows/ci-linux.yaml | 4 ++++ .github/workflows/ci-osx.yaml | 4 ++++ .github/workflows/ci-windows.yaml | 4 ++++ .github/workflows/wheel.yaml | 4 ++++ 4 files changed, 16 insertions(+) diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index bd617ba6..a46ad395 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -2,6 +2,10 @@ name: Linux CI on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: runs-on: ubuntu-latest diff --git a/.github/workflows/ci-osx.yaml b/.github/workflows/ci-osx.yaml index d5fd3c78..1f340bc0 100644 --- a/.github/workflows/ci-osx.yaml +++ b/.github/workflows/ci-osx.yaml @@ -2,6 +2,10 @@ name: OSX CI on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: runs-on: macos-latest diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index 64b70938..154ed20b 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -2,6 +2,10 @@ name: Windows CI on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build: runs-on: windows-latest diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index a3f25b21..b4445e3f 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -2,6 +2,10 @@ name: Wheels on: [push, pull_request] +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + jobs: build_wheels: name: Build wheel on ${{ matrix.os }} From 63e820e76c4bc52bee06eaeebe769cc299bf7f37 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 19 Jul 2023 12:54:57 -0400 Subject: [PATCH 11/34] Add jenkins lookup3 32-bit checksum (#446) * Add initial version of Cython jenkins lookup3 32-bit checksum * Add release notes and docs --- docs/checksum32.rst | 10 + docs/release.rst | 2 + numcodecs/__init__.py | 3 +- numcodecs/checksum32.py | 57 ++++++ numcodecs/jenkins.pyx | 327 ++++++++++++++++++++++++++++++++ numcodecs/tests/test_jenkins.py | 151 +++++++++++++++ setup.py | 27 ++- 7 files changed, 575 insertions(+), 2 deletions(-) create mode 100644 numcodecs/jenkins.pyx create mode 100644 numcodecs/tests/test_jenkins.py diff --git a/docs/checksum32.rst b/docs/checksum32.rst index 5e682afc..5b2013f8 100644 --- a/docs/checksum32.rst +++ b/docs/checksum32.rst @@ -33,3 +33,13 @@ Fletcher32 .. automethod:: encode .. automethod:: decode +JenkinsLookup3 +-------------- + +.. autoclass:: JenkinsLookup3 + + .. autoattribute:: codec_id + .. autoattribute:: initval + .. autoattribute:: prefix + .. automethod:: encode + .. automethod:: decode diff --git a/docs/release.rst b/docs/release.rst index f9a280bf..d7211436 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -17,6 +17,8 @@ Enhancements * Add ``fletcher32`` checksum codec By :user:`Martin Durant `, :issue:`410`. +* Add ``jenkins_lookup3`` checksum codec + By :user:`Mark Kittisopkul `, :issue:`445`. Fix ~~~ diff --git a/numcodecs/__init__.py b/numcodecs/__init__.py index 1e3c8536..3d7befe2 100644 --- a/numcodecs/__init__.py +++ b/numcodecs/__init__.py @@ -98,9 +98,10 @@ from numcodecs.msgpacks import MsgPack register_codec(MsgPack) -from numcodecs.checksum32 import CRC32, Adler32 +from numcodecs.checksum32 import CRC32, Adler32, JenkinsLookup3 register_codec(CRC32) register_codec(Adler32) +register_codec(JenkinsLookup3) from numcodecs.json import JSON register_codec(JSON) diff --git a/numcodecs/checksum32.py b/numcodecs/checksum32.py index 06dfbdb4..35a5ab99 100644 --- a/numcodecs/checksum32.py +++ b/numcodecs/checksum32.py @@ -2,10 +2,12 @@ import numpy as np +import struct from .abc import Codec from .compat import ensure_contiguous_ndarray, ndarray_copy +from .jenkins import jenkins_lookup3 class Checksum32(Codec): @@ -40,3 +42,58 @@ class Adler32(Checksum32): codec_id = 'adler32' checksum = zlib.adler32 + + +class JenkinsLookup3(Checksum32): + """Bob Jenkin's lookup3 checksum with 32-bit output + + This is the HDF5 implementation. + https://github.com/HDFGroup/hdf5/blob/577c192518598c7e2945683655feffcdbdf5a91b/src/H5checksum.c#L378-L472 + + With this codec, the checksum is concatenated on the end of the data + bytes when encoded. At decode time, the checksum is performed on + the data portion and compared with the four-byte checksum, raising + RuntimeError if inconsistent. + + Attributes: + initval: initial seed passed to the hash algorithm, default: 0 + prefix: bytes prepended to the buffer before evaluating the hash, default: None + """ + + checksum = jenkins_lookup3 + codec_id = "jenkins_lookup3" + + def __init__(self, initval: int = 0, prefix=None): + self.initval = initval + if prefix is None: + self.prefix = None + else: + self.prefix = np.frombuffer(prefix, dtype='uint8') + + def encode(self, buf): + """Return buffer plus 4-byte Bob Jenkin's lookup3 checksum""" + buf = ensure_contiguous_ndarray(buf).ravel().view('uint8') + if self.prefix is None: + val = jenkins_lookup3(buf, self.initval) + else: + val = jenkins_lookup3(np.hstack((self.prefix, buf)), self.initval) + return buf.tobytes() + struct.pack("0xdeadbeef) + (length) + initval + + # Return immediately for empty bytes + if length == 0: + return c + + cdef: + const uint8_t *k = &_data[0] + + # We are adding uint32_t values (words) byte by byte so we do not assume endianness or alignment + # lookup3.c hashlittle checks for alignment + + # all but the last block: affect some 32 bits of (a,b,c) + while length > 12: + a += k[0] + a += (k[1]) << 8 + a += (k[2]) << 16 + a += (k[3]) << 24 + b += k[4] + b += (k[5]) << 8 + b += (k[6]) << 16 + b += (k[7]) << 24 + c += k[8] + c += (k[9]) << 8 + c += (k[10]) << 16 + c += (k[11]) << 24 + a, b, c = _jenkins_lookup3_mix(a, b, c) + length -= 12 + k += 12 + + # -------------------------------- last block: affect all 32 bits of (c) + if length == 12: + c += (k[11]) << 24 + length -= 1 + + if length == 11: + c += (k[10]) << 16 + length -= 1 + + if length == 10: + c += (k[9]) << 8 + length -= 1 + + if length == 9: + c += k[8] + length -= 1 + + if length == 8: + b += (k[7]) << 24 + length -= 1 + + if length == 7: + b += (k[6]) << 16 + length -= 1 + + if length == 6: + b += (k[5]) << 8 + length -= 1 + + if length == 5: + b += k[4] + length -= 1 + + if length == 4: + a += (k[3]) << 24 + length -= 1 + + if length == 3: + a += (k[2]) << 16 + length -= 1 + + if length == 2: + a += (k[1]) << 8 + length -= 1 + + if length == 1: + a += k[0] + length -= 1 + + if length == 0: + pass + + return _jenkins_lookup3_final(a, b, c) + +cdef inline uint32_t _jenkins_lookup3_final(uint32_t a, uint32_t b, uint32_t c): + """ + _jenkins_lookup3_final -- final mixing of 3 32-bit values (a,b,c) into c + + Pairs of (a,b,c) values differing in only a few bits will usually + produce values of c that look totally different. This was tested for + * pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). + * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. + * the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + + These constants passed: + 14 11 25 16 4 14 24 + 12 14 25 16 4 14 24 + and these came close: + 4 8 15 26 3 22 24 + 10 8 15 26 3 22 24 + 11 8 15 26 3 22 24 + """ + c ^= b + c -= _jenkins_lookup3_rot(b,14) + a ^= c + a -= _jenkins_lookup3_rot(c,11) + b ^= a + b -= _jenkins_lookup3_rot(a,25) + c ^= b + c -= _jenkins_lookup3_rot(b,16) + a ^= c + a -= _jenkins_lookup3_rot(c,4) + b ^= a + b -= _jenkins_lookup3_rot(a,14) + c ^= b + c -= _jenkins_lookup3_rot(b,24) + return c + +cdef inline uint32_t _jenkins_lookup3_rot(uint32_t x, uint8_t k): + return (((x) << (k)) ^ ((x) >> (32 - (k)))) + +cdef inline (uint32_t, uint32_t, uint32_t) _jenkins_lookup3_mix(uint32_t a, uint32_t b, uint32_t c): + """ + _jenkins_lookup3_mix -- mix 3 32-bit values reversibly. + + This is reversible, so any information in (a,b,c) before mix() is + still in (a,b,c) after mix(). + + If four pairs of (a,b,c) inputs are run through mix(), or through + mix() in reverse, there are at least 32 bits of the output that + are sometimes the same for one pair and different for another pair. + This was tested for: + * pairs that differed by one bit, by two bits, in any combination + of top bits of (a,b,c), or in any combination of bottom bits of + (a,b,c). + * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed + the output delta to a Gray code (a^(a>>1)) so a string of 1's (as + is commonly produced by subtraction) look like a single 1-bit + difference. + * the base values were pseudorandom, all zero but one bit set, or + all zero plus a counter that starts at zero. + + Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that + satisfy this are + 4 6 8 16 19 4 + 9 15 3 18 27 15 + 14 9 3 7 17 3 + Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing + for "differ" defined as + with a one-bit base and a two-bit delta. I + used http://burtleburtle.net/bob/hash/avalanche.html to choose + the operations, constants, and arrangements of the variables. + + This does not achieve avalanche. There are input bits of (a,b,c) + that fail to affect some output bits of (a,b,c), especially of a. The + most thoroughly mixed value is c, but it doesn't really even achieve + avalanche in c. + + This allows some parallelism. Read-after-writes are good at doubling + the number of bits affected, so the goal of mixing pulls in the opposite + direction as the goal of parallelism. I did what I could. Rotates + seem to cost as much as shifts on every machine I could lay my hands + on, and rotates are much kinder to the top and bottom bits, so I used + rotates. + """ + a -= c + a ^= _jenkins_lookup3_rot(c, 4) + c += b + b -= a + b ^= _jenkins_lookup3_rot(a, 6) + a += c + c -= b + c ^= _jenkins_lookup3_rot(b, 8) + b += a + a -= c + a ^= _jenkins_lookup3_rot(c, 16) + c += b + b -= a + b ^= _jenkins_lookup3_rot(a, 19) + a += c + c -= b + c ^= _jenkins_lookup3_rot(b, 4) + b += a + return a, b, c + + diff --git a/numcodecs/tests/test_jenkins.py b/numcodecs/tests/test_jenkins.py new file mode 100644 index 00000000..4873e44f --- /dev/null +++ b/numcodecs/tests/test_jenkins.py @@ -0,0 +1,151 @@ +import numpy as np +import pytest + +from numcodecs.jenkins import jenkins_lookup3 +from numcodecs.checksum32 import JenkinsLookup3 + + +def test_jenkins_lookup3(): + h = jenkins_lookup3(b"", 0) + assert h == 0xdeadbeef + h = jenkins_lookup3(b"", 0xdeadbeef) + assert h == 0xbd5b7dde + h = jenkins_lookup3(b"Four score and seven years ago", 0) + assert h == 0x17770551 + h = jenkins_lookup3(b"Four score and seven years ago", 1) + assert h == 0xcd628161 + + # jenkins-cffi example + h = jenkins_lookup3(b"jenkins", 0) + assert h == 202276345 + + h_last = [0] + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + h = jenkins_lookup3(b"", h_last[-1]) + assert h not in h_last + h_last.append(h) + + a = np.frombuffer(b"Four score and seven years ago", dtype="uint8") + h = jenkins_lookup3(a, 0) + assert h == 0x17770551 + + +def test_jenkins_lookup3_codec(): + s = b"Four score and seven years ago" + j = JenkinsLookup3() + result = j.encode(s) + assert result[-4:] == b'\x51\x05\x77\x17' + assert bytes(j.decode(result)) == s + + j = JenkinsLookup3(initval=0xdeadbeef) + result = j.encode(s) + assert bytes(j.decode(result)) == s + + j = JenkinsLookup3(initval=1230) + result = j.encode(s) + assert result[-4:] == b'\xd7Z\xe2\x0e' + assert bytes(j.decode(result)) == s + + j = JenkinsLookup3(initval=1230, prefix=b"Hello world") + result = j.encode(s) + assert bytes(j.decode(result)) == s + + chunk_index = b"\x00\x08\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x17\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xee'\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xe57\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xdcG\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xd3W\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xcag\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xc1w\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xb8\x87\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xaf\x97\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\xa6\xa7\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\x9d\xb7\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\x94\xc7\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\x8b\xd7\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"\x82\xe7\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"y\xf7\x00\x00\x00\x00\x00\x00" + \ + b"\xf7\x0f\x00\x00\x00\x00\x00\x00" + \ + b"n\x96\x07\x85" + hdf5_fadb_prefix = b'FADB\x00\x01\xcf\x01\x00\x00\x00\x00\x00\x00' + j = JenkinsLookup3(prefix=hdf5_fadb_prefix) + result = j.encode(chunk_index[:-4]) + j.decode(result) + assert result == chunk_index + + +@pytest.mark.parametrize( + "dtype", + ["uint8", "int32", "float32"] +) +def test_with_data(dtype): + data = np.arange(100, dtype=dtype) + j = JenkinsLookup3() + arr = np.frombuffer(j.decode(j.encode(data)), dtype=dtype) + assert (arr == data).all() + + +def test_error(): + data = np.arange(100) + j = JenkinsLookup3() + enc = j.encode(data) + enc2 = bytearray(enc) + enc2[0] += 1 + with pytest.raises(RuntimeError) as e: + j.decode(enc2) + assert "Bob Jenkin's lookup3 checksum" in str(e.value) + + +def test_out(): + data = np.frombuffer(bytearray(b"Hello World"), dtype="uint8") + j = JenkinsLookup3() + result = j.encode(data) + j.decode(result, out=data) diff --git a/setup.py b/setup.py index 14072445..f07cf8d4 100644 --- a/setup.py +++ b/setup.py @@ -223,6 +223,31 @@ def fletcher_extension(): return extensions +def jenkins_extension(): + info('setting up jenkins extension') + + extra_compile_args = base_compile_args.copy() + define_macros = [] + + # setup sources + include_dirs = ['numcodecs'] + define_macros += [('CYTHON_TRACE', '1')] + + sources = ['numcodecs/jenkins.pyx'] + + # define extension module + extensions = [ + Extension('numcodecs.jenkins', + sources=sources, + include_dirs=include_dirs, + define_macros=define_macros, + extra_compile_args=extra_compile_args, + ), + ] + + return extensions + + def compat_extension(): info('setting up compat extension') @@ -291,7 +316,7 @@ def run_setup(with_extensions): if with_extensions: ext_modules = (blosc_extension() + zstd_extension() + lz4_extension() + compat_extension() + shuffle_extension() + vlen_extension() + - fletcher_extension()) + fletcher_extension() + jenkins_extension()) cmdclass = dict(build_ext=ve_build_ext) else: From f1d7c0e282ac9193a60cafa00a15ed92073484c9 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Thu, 20 Jul 2023 08:52:23 -0400 Subject: [PATCH 12/34] Fix Fletcher32 out and test (#449) --- numcodecs/fletcher32.pyx | 2 +- numcodecs/tests/test_fletcher32.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/numcodecs/fletcher32.pyx b/numcodecs/fletcher32.pyx index 02f9319c..7c7b159f 100644 --- a/numcodecs/fletcher32.pyx +++ b/numcodecs/fletcher32.pyx @@ -79,7 +79,7 @@ class Fletcher32(Codec): f" match the expected checksum ({found}).\n" "This could be a sign that the data has been corrupted." ) - if out: + if out is not None: out.view("uint8")[:] = b[:-4] return out return memoryview(b[:-4]) diff --git a/numcodecs/tests/test_fletcher32.py b/numcodecs/tests/test_fletcher32.py index 76564e95..aa4ca1ab 100644 --- a/numcodecs/tests/test_fletcher32.py +++ b/numcodecs/tests/test_fletcher32.py @@ -40,3 +40,10 @@ def test_known(): 1911, -2427, 1897, -2412, 2440, 873, -621, -829, 551, -2118, ] assert outarr.tolist() == expected + + +def test_out(): + data = np.frombuffer(bytearray(b"Hello World"), dtype="uint8") + f = Fletcher32() + result = f.encode(data) + f.decode(result, out=data) From 2d7264cacd0c07db68d26ef545c95fa1457595ac Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 27 Jul 2023 18:22:41 +0000 Subject: [PATCH 13/34] Bump pypa/cibuildwheel from 2.12.0 to 2.13.0 (#440) --- .github/workflows/wheel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index b4445e3f..7847e866 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -26,7 +26,7 @@ jobs: with: submodules: true - - uses: pypa/cibuildwheel@v2.12.0 + - uses: pypa/cibuildwheel@v2.13.0 - uses: actions/upload-artifact@v3 with: From 6adfa933fadca2eeb227e0cbe5e309bc71bdba31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 2 Aug 2023 10:49:16 +0200 Subject: [PATCH 14/34] Bump pypa/cibuildwheel from 2.13.0 to 2.14.1 (#454) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.13.0 to 2.14.1. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.13.0...v2.14.1) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/wheel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index 7847e866..7e97f014 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -26,7 +26,7 @@ jobs: with: submodules: true - - uses: pypa/cibuildwheel@v2.13.0 + - uses: pypa/cibuildwheel@v2.14.1 - uses: actions/upload-artifact@v3 with: From e0cc459a8db42f6b6c09c3ed12bf0d1e430d2a78 Mon Sep 17 00:00:00 2001 From: Peter Sobolewski <76622105+psobolewskiPhD@users.noreply.github.com> Date: Wed, 2 Aug 2023 09:22:10 -0400 Subject: [PATCH 15/34] use CIBW overrides to build arm64 macOS wheels (#428) Co-authored-by: Josh Moore --- .github/workflows/wheel.yaml | 7 ++++--- pyproject.toml | 8 ++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index 7e97f014..36a0d450 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -18,9 +18,10 @@ jobs: CIBW_TEST_COMMAND: pytest --pyargs numcodecs CIBW_TEST_REQUIRES: pytest CIBW_SKIP: "pp* cp36-* *-musllinux_* *win32 *_i686 *_s390x" - CIBW_ENVIRONMENT: "DISABLE_NUMCODECS_AVX2=1" - CIBW_ENVIRONMENT_MACOS: 'MACOSX_DEPLOYMENT_TARGET=10.9 DISABLE_NUMCODECS_AVX2=1 CFLAGS="$CFLAGS -Wno-implicit-function-declaration"' - + CIBW_ARCHS_MACOS: 'x86_64 arm64' + CIBW_TEST_SKIP: '*-macosx_arm64' + # note: CIBW_ENVIRONMENT is now set in pyproject.toml + steps: - uses: actions/checkout@v3 with: diff --git a/pyproject.toml b/pyproject.toml index 5d6f18fc..a2b50e32 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -108,3 +108,11 @@ norecursedirs = [ "notebooks", "numcodecs.egg-info", ] +[tool.cibuildwheel] + environment = { DISABLE_NUMCODECS_AVX2=1 } + [tool.cibuildwheel.macos] + environment = { MACOSX_DEPLOYMENT_TARGET=10.9, CFLAGS="$CFLAGS -Wno-implicit-function-declaration" } + [[tool.cibuildwheel.overrides]] + select = "*-macosx_arm64" + environment = { DISABLE_NUMCODECS_SSE2=1 } + \ No newline at end of file From 7cbf40d9fa13e5a8349646421b8c0773f01fab78 Mon Sep 17 00:00:00 2001 From: Peter Sobolewski <76622105+psobolewskiPhD@users.noreply.github.com> Date: Thu, 3 Aug 2023 04:26:27 -0400 Subject: [PATCH 16/34] [CI] Just test import for wheels (#453) * Just test import for wheels * Use double quotes --------- Co-authored-by: Josh Moore --- .github/workflows/wheel.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index 36a0d450..99a26a7a 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -15,8 +15,7 @@ jobs: matrix: os: [ubuntu-latest, windows-latest, macos-latest] env: - CIBW_TEST_COMMAND: pytest --pyargs numcodecs - CIBW_TEST_REQUIRES: pytest + CIBW_TEST_COMMAND: python -c "import numcodecs" CIBW_SKIP: "pp* cp36-* *-musllinux_* *win32 *_i686 *_s390x" CIBW_ARCHS_MACOS: 'x86_64 arm64' CIBW_TEST_SKIP: '*-macosx_arm64' From f93cebd4f7f6f5023c574a444c97194701c31162 Mon Sep 17 00:00:00 2001 From: Peter Sobolewski <76622105+psobolewskiPhD@users.noreply.github.com> Date: Thu, 3 Aug 2023 04:29:15 -0400 Subject: [PATCH 17/34] Update index.rst for installation and test changes (#426) --- docs/index.rst | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 6b2169d3..86a185f1 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -28,28 +28,35 @@ library. Wheels are available for most platforms. Installing a wheel or via conda will install a pre-compiled binary distribution. However, if you have a newer CPU that supports the AVX2 instruction set (e.g., Intel Haswell, Broadwell or Skylake) then installing via pip is preferable, -because this will compile the Blosc library from source with optimisations -for AVX2. +because you can compile the Blosc library from source with optimisations +for AVX2.:: + + $ pip install -v --no-cache-dir --no-binary numcodecs numcodecs Note that if you compile the C extensions on a machine with AVX2 support you probably then cannot use the same binaries on a machine without AVX2. -To disable compilation with AVX2 support regardless of the machine -architecture:: - $ export DISABLE_NUMCODECS_AVX2= - $ pip install -v --no-cache-dir --no-binary numcodecs numcodecs +If you specifically want to disable AVX2 or SSE2 when compiling, you can use +the following environment variables:: -To work with Numcodecs source code in development, install from GitHub:: + $ export DISABLE_NUMCODECS_AVX2=1 + $ export DISABLE_NUMCODECS_SSE2=1 + + +To work with Numcodecs source code in development, clone the repository from GitHub +and then install in editable mode using `pip`.:: $ git clone --recursive https://github.com/zarr-developers/numcodecs.git $ cd numcodecs - $ python setup.py install + $ pip install -e .[test,msgpack,zfpy] + +Note: if you prefer to use the GitHub CLI ``gh`` you will need to append ``-- --recurse-submodules`` +to the clone command to everything works properly. To verify that Numcodecs has been fully installed (including the Blosc extension) run the test suite:: - $ pip install nose - $ python -m nose -v numcodecs + $ pytest -v Contents -------- From d667b31ebe7e8a49f8be6adb15e32c30e957487a Mon Sep 17 00:00:00 2001 From: jakirkham Date: Tue, 8 Aug 2023 22:02:50 -0700 Subject: [PATCH 18/34] Add missing RTD requirements (#455) --- .readthedocs.yaml | 2 ++ docs/release.rst | 3 +++ 2 files changed, 5 insertions(+) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 984ffc46..1405493c 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -17,3 +17,5 @@ python: path: . extra_requirements: - docs + - msgpack + - zfpy diff --git a/docs/release.rst b/docs/release.rst index d7211436..1c64cee1 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -24,6 +24,9 @@ Fix ~~~ * Fixed docs/Makefile error message when sphinx is not present + By :user:`Mark Kittisopikul `, :issue:`451`. +* Add missing RTD requirements + By :user:`John Kirkham `, :issue:`455`. Maintenance ~~~~~~~~~~~ From cb155432e36536e17a2d054c8c24b7bf6f4a7347 Mon Sep 17 00:00:00 2001 From: Steve Kowalik Date: Fri, 11 Aug 2023 19:59:13 +1000 Subject: [PATCH 19/34] Remove use of entrypoints (#442) Since Python 3.8, the standard library has included functionality to query entry points directly using importlib.metadata. Since the API has changed for the better with Python 3.10, we need to support both ways of using it. --- numcodecs/registry.py | 14 +++++++++----- numcodecs/tests/test_entrypoints.py | 2 -- pyproject.toml | 7 ++++++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/numcodecs/registry.py b/numcodecs/registry.py index 532e9967..a4dff6d7 100644 --- a/numcodecs/registry.py +++ b/numcodecs/registry.py @@ -1,7 +1,7 @@ """The registry module provides some simple convenience functions to enable applications to dynamically register and look-up codec classes.""" +from importlib.metadata import entry_points import logging -from contextlib import suppress logger = logging.getLogger("numcodecs") codec_registry = {} @@ -9,13 +9,17 @@ def run_entrypoints(): - import entrypoints entries.clear() - entries.update(entrypoints.get_group_named("numcodecs.codecs")) + eps = entry_points() + if hasattr(eps, 'select'): + # If entry_points() has a select method, use that. Python 3.10+ + entries.update(eps.select(group="numcodecs.codecs")) + else: + # Otherwise, fallback to using get + entries.update(eps.get("numcodecs.codecs", [])) -with suppress(ImportError): - run_entrypoints() +run_entrypoints() def get_codec(config): diff --git a/numcodecs/tests/test_entrypoints.py b/numcodecs/tests/test_entrypoints.py index 81af635d..0d017f2d 100644 --- a/numcodecs/tests/test_entrypoints.py +++ b/numcodecs/tests/test_entrypoints.py @@ -7,7 +7,6 @@ here = os.path.abspath(os.path.dirname(__file__)) -pytest.importorskip("entrypoints") @pytest.fixture() @@ -20,7 +19,6 @@ def set_path(): numcodecs.registry.codec_registry.pop("test") -@pytest.mark.xfail(reason="FIXME: not working in wheels build") def test_entrypoint_codec(set_path): cls = numcodecs.registry.get_codec({"id": "test"}) assert cls.codec_id == "test" diff --git a/pyproject.toml b/pyproject.toml index a2b50e32..147f9b54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,7 +15,6 @@ in data storage and communication applications. """ readme = "README.rst" dependencies = [ - "entrypoints", "numpy>=1.7", ] requires-python = ">=3.8" @@ -71,6 +70,12 @@ package-dir = {"" = "."} packages = ["numcodecs", "numcodecs.tests"] zip-safe = false +[tool.setuptools.package-data] +numcodecs = [ + "tests/package_with_entrypoint/__init__.py", + "tests/package_with_entrypoint-0.1.dist-info/entry_points.txt" +] + [tool.setuptools_scm] version_scheme = "guess-next-dev" local_scheme = "dirty-tag" From e474389707258d5a56e7c1bae6a0f595a14f152f Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 4 Oct 2023 20:04:24 +0200 Subject: [PATCH 20/34] Fix typo found by codespell (#466) --- numcodecs/jenkins.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/numcodecs/jenkins.pyx b/numcodecs/jenkins.pyx index 39cd0793..24392238 100644 --- a/numcodecs/jenkins.pyx +++ b/numcodecs/jenkins.pyx @@ -96,7 +96,7 @@ cpdef uint32_t jenkins_lookup3(const uint8_t[::1] _data, uint32_t initval=0): hash a variable-length key into a 32-bit value data : the key (unaligned variable-length array of bytes) - initval : can be any 4-byte value, defualts to 0 + initval : can be any 4-byte value, defaults to 0 Returns a 32-bit value. Every bit of the key affects every bit of the return value. Two keys differing by one or two bits will have From 515d097da67d70cc866d0fe197de3ef57ecde673 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Wed, 4 Oct 2023 20:05:17 +0200 Subject: [PATCH 21/34] Fix warning in CI jobs (#467) !! ******************************************************************************** newlines are not allowed in `summary` and will break in the future ******************************************************************************** !! --- pyproject.toml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 147f9b54..e68dabd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,9 +10,8 @@ build-backend = "setuptools.build_meta" [project] name = "numcodecs" description = """ -A Python package providing buffer compression and transformation codecs for use -in data storage and communication applications. -""" +A Python package providing buffer compression and transformation codecs \ +for use in data storage and communication applications.""" readme = "README.rst" dependencies = [ "numpy>=1.7", @@ -120,4 +119,4 @@ norecursedirs = [ [[tool.cibuildwheel.overrides]] select = "*-macosx_arm64" environment = { DISABLE_NUMCODECS_SSE2=1 } - \ No newline at end of file + From 680d0205ab9e32440c4c1d5f3601fd9fbfdf6bf1 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Fri, 6 Oct 2023 11:06:37 -0700 Subject: [PATCH 22/34] Support Python 3.12 (#471) * Support Python 3.12 * Ensure `entries.update(...)` recieves a `dict` On Python 3.12, the existing code did not produce a `dict` that could be used to update `entries` whereas previous Python versions had. To workaround this issue, manually create a `dict` that can be used to update `entries` with a format consistent to what had been seen in Python versions before 3.12. --- .github/workflows/ci-linux.yaml | 2 +- .github/workflows/ci-osx.yaml | 2 +- .github/workflows/ci-windows.yaml | 2 +- docs/release.rst | 2 ++ numcodecs/registry.py | 2 +- 5 files changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index a46ad395..665f6233 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source diff --git a/.github/workflows/ci-osx.yaml b/.github/workflows/ci-osx.yaml index 1f340bc0..e76fe050 100644 --- a/.github/workflows/ci-osx.yaml +++ b/.github/workflows/ci-osx.yaml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index 154ed20b..c2847b5c 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -12,7 +12,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - name: Checkout source diff --git a/docs/release.rst b/docs/release.rst index 1c64cee1..5b9d493d 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -19,6 +19,8 @@ Enhancements By :user:`Martin Durant `, :issue:`410`. * Add ``jenkins_lookup3`` checksum codec By :user:`Mark Kittisopkul `, :issue:`445`. +* Support Python 3.12. + By :user:`John Kirkham `, :issue:`471`. Fix ~~~ diff --git a/numcodecs/registry.py b/numcodecs/registry.py index a4dff6d7..24186fa5 100644 --- a/numcodecs/registry.py +++ b/numcodecs/registry.py @@ -13,7 +13,7 @@ def run_entrypoints(): eps = entry_points() if hasattr(eps, 'select'): # If entry_points() has a select method, use that. Python 3.10+ - entries.update(eps.select(group="numcodecs.codecs")) + entries.update({e.name: e for e in eps.select(group="numcodecs.codecs")}) else: # Otherwise, fallback to using get entries.update(eps.get("numcodecs.codecs", [])) From 9e0d2d22ee4eb89005adf57a03922327b0f49b85 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Mon, 9 Oct 2023 15:16:54 -0700 Subject: [PATCH 23/34] Add 0.12.0 to release notes --- docs/release.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 5b9d493d..ef83441e 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -9,8 +9,8 @@ Release notes .. _unreleased: -Unreleased ----------- +0.12.0 +------ Enhancements ~~~~~~~~~~~~ From 3f36d47001f12cad598e78a8eedd88964de7cef1 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Mon, 9 Oct 2023 15:19:01 -0700 Subject: [PATCH 24/34] Add 0.12.0 release note anchor --- docs/release.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index ef83441e..bd6caf37 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -7,7 +7,7 @@ Release notes # re-indented so that it does not show up in the notes. -.. _unreleased: +.. _release_0.12.0: 0.12.0 ------ From a5539b5876fb76f014ac74c17fa98d805d5bbfda Mon Sep 17 00:00:00 2001 From: jakirkham Date: Mon, 9 Oct 2023 15:20:55 -0700 Subject: [PATCH 25/34] Fix Rst syntax in release notes --- docs/release.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index bd6caf37..680dd186 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -33,7 +33,7 @@ Fix Maintenance ~~~~~~~~~~~ -* Cleanup ``import``s in ``adhoc/blosc_memleak_check.py`` +* Cleanup ``import``\ s in ``adhoc/blosc_memleak_check.py`` By :user:`John Kirkham `, :issue:`408`. .. _release_0.11.0: From 0e23e79192563bb9e74141e2ecff350826d45264 Mon Sep 17 00:00:00 2001 From: Peter Sobolewski <76622105+psobolewskiPhD@users.noreply.github.com> Date: Tue, 10 Oct 2023 23:59:27 -0700 Subject: [PATCH 26/34] ensure cibuildwheel.macos also has DISABLE_NUMCODECS_AVX2=1 (#479) --- pyproject.toml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e68dabd7..873057c4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,10 +113,9 @@ norecursedirs = [ "numcodecs.egg-info", ] [tool.cibuildwheel] - environment = { DISABLE_NUMCODECS_AVX2=1 } - [tool.cibuildwheel.macos] - environment = { MACOSX_DEPLOYMENT_TARGET=10.9, CFLAGS="$CFLAGS -Wno-implicit-function-declaration" } - [[tool.cibuildwheel.overrides]] - select = "*-macosx_arm64" - environment = { DISABLE_NUMCODECS_SSE2=1 } - +environment = { DISABLE_NUMCODECS_AVX2=1 } +[tool.cibuildwheel.macos] +environment = { MACOSX_DEPLOYMENT_TARGET=10.9, DISABLE_NUMCODECS_AVX2=1, CFLAGS="$CFLAGS -Wno-implicit-function-declaration" } +[[tool.cibuildwheel.overrides]] +select = "*-macosx_arm64" +environment = { DISABLE_NUMCODECS_SSE2=1 } From 6f819a96cc959a6ccde733a4b3de8cb3d7333946 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Thu, 12 Oct 2023 20:52:40 +0200 Subject: [PATCH 27/34] Fix error in CI jobs (#468) --- .flake8 | 1 + 1 file changed, 1 insertion(+) diff --git a/.flake8 b/.flake8 index e6b17013..27acc6a7 100644 --- a/.flake8 +++ b/.flake8 @@ -12,3 +12,4 @@ exclude = fixture, notebooks, numcodecs.egg-info, + numcodecs/version.py, From 28d57330cff2a5fefb2b1bed073e8b253237142a Mon Sep 17 00:00:00 2001 From: jakirkham Date: Mon, 16 Oct 2023 13:26:56 -0700 Subject: [PATCH 28/34] Fix intermittent macOS GHA build issues w/AVX2 (#482) * Try dropping `CC` on macOS GHA builds Ideally this specification is not needed to pick up the right compiler on macOS. So try dropping it to see if things still work. * Disable AVX2 on macOS Lately there have been intermittent issues on macOS GHA building with AVX2 support on. So just turn it off. Hopefully this clears those errors. --- .github/workflows/ci-osx.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci-osx.yaml b/.github/workflows/ci-osx.yaml index e76fe050..3de464a0 100644 --- a/.github/workflows/ci-osx.yaml +++ b/.github/workflows/ci-osx.yaml @@ -49,7 +49,7 @@ jobs: shell: "bash -l {0}" run: | conda activate env - export CC=clang + export DISABLE_NUMCODECS_AVX2="" python -m pip install -v -e .[test,msgpack,zfpy] - name: List installed packages From 05a54d93f516241f9edaf70646372afc74169cd6 Mon Sep 17 00:00:00 2001 From: jakirkham Date: Mon, 16 Oct 2023 17:45:32 -0700 Subject: [PATCH 29/34] Disable AVX2 in macOS ARM wheels (#483) As AVX2 is an `x86_64` feature, ARM understandably doesn't have it. However for some reason macOS ARM wheels are building with this functionality enabled. So disable it in `pyproject.toml`. Hopefully this should fix that issue. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 873057c4..6167147c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -118,4 +118,4 @@ environment = { DISABLE_NUMCODECS_AVX2=1 } environment = { MACOSX_DEPLOYMENT_TARGET=10.9, DISABLE_NUMCODECS_AVX2=1, CFLAGS="$CFLAGS -Wno-implicit-function-declaration" } [[tool.cibuildwheel.overrides]] select = "*-macosx_arm64" -environment = { DISABLE_NUMCODECS_SSE2=1 } +environment = { DISABLE_NUMCODECS_AVX2=1, DISABLE_NUMCODECS_SSE2=1 } From 27a8cd8cacf4ea8b021794bddb219f3fa61f8bd1 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 02:53:37 +0000 Subject: [PATCH 30/34] Bump actions/checkout from 3 to 4 (#462) Bumps [actions/checkout](https://github.com/actions/checkout) from 3 to 4. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: jakirkham --- .github/workflows/ci-linux.yaml | 2 +- .github/workflows/ci-osx.yaml | 2 +- .github/workflows/ci-windows.yaml | 2 +- .github/workflows/codeql-analysis.yml | 2 +- .github/workflows/wheel.yaml | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index 665f6233..20e79eed 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -16,7 +16,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive diff --git a/.github/workflows/ci-osx.yaml b/.github/workflows/ci-osx.yaml index 3de464a0..7757bc02 100644 --- a/.github/workflows/ci-osx.yaml +++ b/.github/workflows/ci-osx.yaml @@ -16,7 +16,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index c2847b5c..bc62f301 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -16,7 +16,7 @@ jobs: steps: - name: Checkout source - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 2d5a1fdf..7ae21d34 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -32,7 +32,7 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index 99a26a7a..6474a669 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -22,7 +22,7 @@ jobs: # note: CIBW_ENVIRONMENT is now set in pyproject.toml steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: true @@ -37,7 +37,7 @@ jobs: name: Build source distribution runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: true From 98a14f3c730fd020107885f2cfa104cd57b2e163 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 17 Oct 2023 03:37:57 +0000 Subject: [PATCH 31/34] Bump pypa/cibuildwheel from 2.14.1 to 2.16.2 (#474) Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.14.1 to 2.16.2. - [Release notes](https://github.com/pypa/cibuildwheel/releases) - [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md) - [Commits](https://github.com/pypa/cibuildwheel/compare/v2.14.1...v2.16.2) --- updated-dependencies: - dependency-name: pypa/cibuildwheel dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: jakirkham --- .github/workflows/wheel.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/wheel.yaml b/.github/workflows/wheel.yaml index 6474a669..69dd297b 100644 --- a/.github/workflows/wheel.yaml +++ b/.github/workflows/wheel.yaml @@ -26,7 +26,7 @@ jobs: with: submodules: true - - uses: pypa/cibuildwheel@v2.14.1 + - uses: pypa/cibuildwheel@v2.16.2 - uses: actions/upload-artifact@v3 with: From 1bc1e4433317f4697fc6e5b7292f51de47915c81 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 17 Oct 2023 06:26:44 +0200 Subject: [PATCH 32/34] `Codec` is now derived from `abc.ABC` (#472) * Codec is now derived from abc.ABC * Add release note for `Codec(ABC)` --------- Co-authored-by: jakirkham --- docs/release.rst | 10 ++++++++++ numcodecs/abc.py | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index 680dd186..f19f2c2f 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -6,6 +6,16 @@ Release notes # to document your changes. On releases it will be # re-indented so that it does not show up in the notes. +.. _release_0.12.1: + +0.12.1 +------ + +Fix +~~~ + +* `Codec` is now derived from `abc.ABC` + By :user:`Mads R. B. Kristensen `, :issue:`472`. .. _release_0.12.0: diff --git a/numcodecs/abc.py b/numcodecs/abc.py index 4aa9c1a4..703ba037 100644 --- a/numcodecs/abc.py +++ b/numcodecs/abc.py @@ -29,10 +29,10 @@ """ -from abc import abstractmethod +from abc import ABC, abstractmethod -class Codec: +class Codec(ABC): """Codec abstract base class.""" # override in sub-class From c28f5c912f1e0e4e9cc3357e04a357d40f2b439d Mon Sep 17 00:00:00 2001 From: Martin Durant Date: Tue, 17 Oct 2023 16:15:14 -0400 Subject: [PATCH 33/34] Make shuffle pyx functions `noexcept` (#477) * Make shuffle pyx functions return int * Update numcodecs/_shuffle.pyx Co-authored-by: jakirkham * Update numcodecs/_shuffle.pyx Co-authored-by: jakirkham * Note shuffle pyx functions are `noexcept` --------- Co-authored-by: jakirkham --- docs/release.rst | 4 +++- numcodecs/_shuffle.pyx | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index f19f2c2f..c1504201 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -14,8 +14,10 @@ Release notes Fix ~~~ -* `Codec` is now derived from `abc.ABC` +* ``Codec`` is now derived from ``abc.ABC`` By :user:`Mads R. B. Kristensen `, :issue:`472`. +* Make shuffle pyx functions ``noexcept`` + By :user:`Martin Durant `, :issue:`477`. .. _release_0.12.0: diff --git a/numcodecs/_shuffle.pyx b/numcodecs/_shuffle.pyx index 308ea491..0f0dafeb 100644 --- a/numcodecs/_shuffle.pyx +++ b/numcodecs/_shuffle.pyx @@ -8,7 +8,7 @@ cimport cython @cython.boundscheck(False) @cython.wraparound(False) -cpdef void _doShuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_ssize_t element_size) nogil: +cpdef void _doShuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_ssize_t element_size) noexcept nogil: cdef Py_ssize_t count, i, j, offset, byte_index count = len(src) // element_size for i in range(count): @@ -20,7 +20,7 @@ cpdef void _doShuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_s @cython.boundscheck(False) @cython.wraparound(False) -cpdef void _doUnshuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_ssize_t element_size) nogil: +cpdef void _doUnshuffle(const unsigned char[::1] src, unsigned char[::1] des, Py_ssize_t element_size) noexcept nogil: cdef Py_ssize_t count, i, j, offset, byte_index count = len(src) // element_size for i in range(element_size): From c58f210a84c2bd628efe7c84016e446797f5ea83 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Tue, 17 Oct 2023 16:10:41 -0700 Subject: [PATCH 34/34] Fix default entrypoint handling on older Pythons (#475) --- .github/workflows/ci-linux.yaml | 2 +- .github/workflows/ci-osx.yaml | 2 +- .github/workflows/ci-windows.yaml | 2 +- docs/release.rst | 2 ++ numcodecs/registry.py | 2 +- numcodecs/tests/test_entrypoints.py | 3 +- numcodecs/tests/test_entrypoints_backport.py | 32 ++++++++++++++++++++ pyproject.toml | 3 ++ 8 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 numcodecs/tests/test_entrypoints_backport.py diff --git a/.github/workflows/ci-linux.yaml b/.github/workflows/ci-linux.yaml index 20e79eed..b5d1571a 100644 --- a/.github/workflows/ci-linux.yaml +++ b/.github/workflows/ci-linux.yaml @@ -50,7 +50,7 @@ jobs: run: | conda activate env export DISABLE_NUMCODECS_AVX2="" - python -m pip install -v -e .[test,msgpack,zfpy] + python -m pip install -v -e .[test,test_extras,msgpack,zfpy] - name: List installed packages shell: "bash -l {0}" diff --git a/.github/workflows/ci-osx.yaml b/.github/workflows/ci-osx.yaml index 7757bc02..db09a12e 100644 --- a/.github/workflows/ci-osx.yaml +++ b/.github/workflows/ci-osx.yaml @@ -50,7 +50,7 @@ jobs: run: | conda activate env export DISABLE_NUMCODECS_AVX2="" - python -m pip install -v -e .[test,msgpack,zfpy] + python -m pip install -v -e .[test,test_extras,msgpack,zfpy] - name: List installed packages shell: "bash -l {0}" diff --git a/.github/workflows/ci-windows.yaml b/.github/workflows/ci-windows.yaml index bc62f301..201a2ac4 100644 --- a/.github/workflows/ci-windows.yaml +++ b/.github/workflows/ci-windows.yaml @@ -42,7 +42,7 @@ jobs: shell: "bash -l {0}" run: | conda activate env - python -m pip install -v -e .[test,msgpack,zfpy] + python -m pip install -v -e .[test,test_extras,msgpack,zfpy] - name: List installed packages shell: "bash -l {0}" diff --git a/docs/release.rst b/docs/release.rst index c1504201..2ec8e1b9 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -16,6 +16,8 @@ Fix * ``Codec`` is now derived from ``abc.ABC`` By :user:`Mads R. B. Kristensen `, :issue:`472`. +* Fix handling of entry points on older Python versions where ``importlib_metadata`` compatibility is concerned + By :user:`Vyas Ramasubramani `, :issue:`478`. * Make shuffle pyx functions ``noexcept`` By :user:`Martin Durant `, :issue:`477`. diff --git a/numcodecs/registry.py b/numcodecs/registry.py index 24186fa5..d0cd0748 100644 --- a/numcodecs/registry.py +++ b/numcodecs/registry.py @@ -16,7 +16,7 @@ def run_entrypoints(): entries.update({e.name: e for e in eps.select(group="numcodecs.codecs")}) else: # Otherwise, fallback to using get - entries.update(eps.get("numcodecs.codecs", [])) + entries.update({e.name: e for e in eps.get("numcodecs.codecs", [])}) run_entrypoints() diff --git a/numcodecs/tests/test_entrypoints.py b/numcodecs/tests/test_entrypoints.py index 0d017f2d..2923ac22 100644 --- a/numcodecs/tests/test_entrypoints.py +++ b/numcodecs/tests/test_entrypoints.py @@ -19,6 +19,7 @@ def set_path(): numcodecs.registry.codec_registry.pop("test") -def test_entrypoint_codec(set_path): +@pytest.mark.usefixtures("set_path") +def test_entrypoint_codec(): cls = numcodecs.registry.get_codec({"id": "test"}) assert cls.codec_id == "test" diff --git a/numcodecs/tests/test_entrypoints_backport.py b/numcodecs/tests/test_entrypoints_backport.py new file mode 100644 index 00000000..4e0459e5 --- /dev/null +++ b/numcodecs/tests/test_entrypoints_backport.py @@ -0,0 +1,32 @@ +import os.path +import pkgutil +import sys + +import pytest + +from multiprocessing import Process + +import numcodecs.registry + +if not pkgutil.find_loader("importlib_metadata"): # pragma: no cover + pytest.skip("This test module requires importlib_metadata to be installed") + +here = os.path.abspath(os.path.dirname(__file__)) + + +def get_entrypoints_with_importlib_metadata_loaded(): + # importlib_metadata patches importlib.metadata, which can lead to breaking changes + # to the APIs of EntryPoint objects used when registering entrypoints. Attempt to + # isolate those changes to just this test. + import importlib_metadata # noqa: F401 + sys.path.append(here) + numcodecs.registry.run_entrypoints() + cls = numcodecs.registry.get_codec({"id": "test"}) + assert cls.codec_id == "test" + + +def test_entrypoint_codec_with_importlib_metadata(): + p = Process(target=get_entrypoints_with_importlib_metadata_loaded) + p.start() + p.join() + assert p.exitcode == 0 diff --git a/pyproject.toml b/pyproject.toml index 6167147c..31268274 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,6 +56,9 @@ test = [ "pytest", "pytest-cov", ] +test_extras = [ + "importlib_metadata", +] msgpack = [ "msgpack", ]