diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..7818bef34 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,15 @@ +# See https://docs.github.com/en/code-security/supply-chain-security/keeping-your-dependencies-updated-automatically/keeping-your-actions-up-to-date-with-dependabot + +version: 2 +updates: + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "daily" + labels: + - "Bot" + groups: + github-actions: + patterns: + - '*' diff --git a/.github/stubtest-allowlist b/.github/stubtest-allowlist new file mode 100644 index 000000000..0352fdcf7 --- /dev/null +++ b/.github/stubtest-allowlist @@ -0,0 +1,42 @@ +netCDF4.RealTypeLiteral +netCDF4.ComplexTypeLiteral +netCDF4.NumericTypeLiteral +netCDF4.CharTypeLiteral +netCDF4.TypeLiteral +netCDF4.NumPyRealType +netCDF4.NumPyComplexType +netCDF4.NumPyNumericType +netCDF4.NetCDFUDTClass +netCDF4.AccessMode +netCDF4.CompressionLevel +netCDF4.CompressionType +netCDF4.DatatypeType +netCDF4.DimensionsType +netCDF4.DiskFormat +netCDF4.EndianType +netCDF4.Format +netCDF4.QuantizeMode +netCDF4.CalendarType +netCDF4.DateTimeArray +netCDF4.FiltersDict +netCDF4.SzipInfo +netCDF4.BloscInfo +netCDF4.BoolInt +netCDF4.VarT +netCDF4.RealVarT +netCDF4.ComplexVarT +netCDF4.NumericVarT +netCDF4.Dimension.__reduce_cython__ +netCDF4.Dimension.__setstate_cython__ +netCDF4.Variable.auto_complex +netCDF4.Variable.__iter__ +netCDF4._netCDF4.Dimension.__reduce_cython__ +netCDF4._netCDF4.Dimension.__setstate_cython__ +netCDF4._netCDF4.NC_DISKLESS +netCDF4._netCDF4.NC_PERSIST +netCDF4._netCDF4.Variable.auto_complex +netCDF4._netCDF4.Variable.__iter__ +netCDF4._netCDF4.__reduce_cython__ +netCDF4._netCDF4.__setstate_cython__ +netCDF4._netCDF4.__test__ +netCDF4.utils \ No newline at end of file diff --git a/.github/workflows/build_latest.yml b/.github/workflows/build_latest.yml new file mode 100644 index 000000000..0d2f29121 --- /dev/null +++ b/.github/workflows/build_latest.yml @@ -0,0 +1,115 @@ +name: Build and Test Linux with latest netcdf-c +on: [push, pull_request] +jobs: + build-linux: + name: Python (${{ matrix.python-version }}) + runs-on: ubuntu-latest + env: + PNETCDF_VERSION: 1.14.1 + NETCDF_VERSION: 4.9.3 + NETCDF_DIR: ${{ github.workspace }}/.. + NETCDF_EXTRA_CONFIG: --enable-pnetcdf + #CC: mpicc.mpich + CC: mpicc + #NO_NET: 1 + strategy: + matrix: + python-version: ["3.14"] + steps: + + - uses: actions/checkout@v6 + with: + submodules: true + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Ubuntu Dependencies + run: | + sudo apt-get update + #sudo apt-get install mpich libmpich-dev libhdf5-mpich-dev openmpi-bin openmpi-common libopenmpi-dev libhdf5-openmpi-dev libcurl4-openssl-dev bzip2 libsnappy-dev libblosc-dev libzstd-dev + sudo apt-get install openmpi-common libopenmpi-dev openmpi-bin libhdf5-openmpi-dev libcurl4-openssl-dev bzip2 libsnappy-dev libblosc-dev libzstd-dev + echo "Download and build PnetCDF version ${PNETCDF_VERSION}" + wget https://parallel-netcdf.github.io/Release/pnetcdf-${PNETCDF_VERSION}.tar.gz + tar -xzf pnetcdf-${PNETCDF_VERSION}.tar.gz + pushd pnetcdf-${PNETCDF_VERSION} + ./configure --prefix $NETCDF_DIR --enable-shared --disable-fortran --disable-cxx + make -j 2 + sudo make install + popd + echo "Download and build netCDF version ${NETCDF_VERSION}" + wget https://downloads.unidata.ucar.edu/netcdf-c/${NETCDF_VERSION}/netcdf-c-${NETCDF_VERSION}.tar.gz + tar -xzf netcdf-c-${NETCDF_VERSION}.tar.gz + pushd netcdf-c-${NETCDF_VERSION} + #export CPPFLAGS="-I/usr/include/hdf5/mpich -I${NETCDF_DIR}/include" + export CPPFLAGS="-I/usr/include/hdf5/openmpi -I${NETCDF_DIR}/include" + export LDFLAGS="-L${NETCDF_DIR}/lib" + #export LIBS="-lhdf5_mpich_hl -lhdf5_mpich -lm -lz" + export LIBS="-lhdf5_openmpi_hl -lhdf5_openmpi -lm -lz" + which $CC + ./configure --prefix $NETCDF_DIR --enable-netcdf-4 --enable-shared --enable-dap --enable-parallel4 $NETCDF_EXTRA_CONFIG + make -j 2 + sudo make install + popd + +# - name: The job has failed +# if: ${{ failure() }} +# run: | +# cd netcdf-c-${NETCDF_VERSION} +# cat config.log + + - name: Install python dependencies via pip + run: | + python -m pip install --upgrade pip + python -m pip install numpy cython cftime pytest twine wheel check-manifest mpi4py typing-extensions + + - name: Install netcdf4-python + run: | + export PATH=${NETCDF_DIR}/bin:${PATH} + export NETCDF_PLUGIN_DIR=${{ github.workspace }}/netcdf-c-${NETCDF_VERSION}/plugins/plugindir + python -m pip install . --no-build-isolation + + - name: Test + run: | + export PATH=${NETCDF_DIR}/bin:${PATH} + python checkversion.py + # serial + cd test + python run_all.py + # parallel (hdf5 for netcdf4, pnetcdf for netcdf3) + cd ../examples + #mpirun.mpich -np 4 python mpi_example.py + mpirun -np 4 --oversubscribe python mpi_example.py + if [ $? -ne 0 ] ; then + echo "hdf5 mpi test failed!" + exit 1 + else + echo "hdf5 mpi test passed!" + fi + #mpirun.mpich -np 4 python mpi_example_compressed.py + mpirun -np 4 --oversubscribe python mpi_example_compressed.py + if [ $? -ne 0 ] ; then + echo "hdf5 compressed mpi test failed!" + exit 1 + else + echo "hdf5 compressed mpi test passed!" + fi + #mpirun.mpich -np 4 python mpi_example.py NETCDF3_64BIT_DATA + mpirun -np 4 --oversubscribe python mpi_example.py NETCDF3_64BIT_DATA + if [ $? -ne 0 ] ; then + echo "pnetcdf mpi test failed!" + exit 1 + else + echo "pnetcdf mpi test passed!" + fi + +# - name: Tarball +# run: | +# export PATH=${NETCDF_DIR}/bin:${PATH} +# python setup.py --version +# check-manifest --version +# check-manifest --verbose +# pip wheel . -w dist --no-deps +# twine check dist/* diff --git a/.github/workflows/build_master.yml b/.github/workflows/build_master.yml new file mode 100644 index 000000000..5459dd52f --- /dev/null +++ b/.github/workflows/build_master.yml @@ -0,0 +1,93 @@ +name: Build and Test on Linux with netcdf-c github master +on: [push, pull_request] +jobs: + build-linux: + name: Python (${{ matrix.python-version }}) + runs-on: ubuntu-latest + env: + NETCDF_DIR: ${{ github.workspace }}/.. + #CC: mpicc.mpich + CC: mpicc + #NO_NET: 1 + strategy: + matrix: + python-version: ["3.14"] + steps: + + - uses: actions/checkout@v6 + with: + submodules: true + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Ubuntu Dependencies + run: | + sudo apt-get update + #sudo apt-get install mpich libmpich-dev libhdf5-mpich-dev libcurl4-openssl-dev bzip2 libsnappy-dev libblosc-dev libzstd-dev + sudo apt-get install openmpi-common libopenmpi-dev openmpi-bin libhdf5-openmpi-dev libcurl4-openssl-dev bzip2 libsnappy-dev libblosc-dev libzstd-dev + echo "Download and build netCDF github master" + git clone https://github.com/Unidata/netcdf-c + pushd netcdf-c + #export CPPFLAGS="-I/usr/include/hdf5/mpich -I${NETCDF_DIR}/include" + export CPPFLAGS="-I/usr/include/hdf5/openmpi -I${NETCDF_DIR}/include" + export LDFLAGS="-L${NETCDF_DIR}/lib" + #export LIBS="-lhdf5_mpich_hl -lhdf5_mpich -lm -lz" + export LIBS="-lhdf5_openmpi_hl -lhdf5_openmpi -lm -lz" + autoreconf -i + ./configure --prefix $NETCDF_DIR --enable-netcdf-4 --enable-shared --enable-dap --enable-parallel4 + make -j 2 + sudo make install + popd + +# - name: The job has failed +# if: ${{ failure() }} +# run: | +# cd netcdf-c-${NETCDF_VERSION} +# cat config.log + + - name: Install python dependencies via pip + run: | + python -m pip install --upgrade pip + python -m pip install numpy cython cftime pytest twine wheel check-manifest mpi4py mypy types-setuptools typing-extensions + + - name: Install netcdf4-python + run: | + export PATH=${NETCDF_DIR}/bin:${PATH} + export NETCDF_PLUGIN_DIR=${{ github.workspace }}/netcdf-c/plugins/plugindir + python -m pip install . --no-build-isolation + + - name: Test + run: | + export PATH=${NETCDF_DIR}/bin:${PATH} + #export HDF5_PLUGIN_PATH=${NETCDF_DIR}/plugins/plugindir + python checkversion.py + # serial + cd test + python run_all.py + # parallel + cd ../examples + #mpirun.mpich -np 4 python mpi_example.py + mpirun -np 4 --oversubscribe python mpi_example.py + if [ $? -ne 0 ] ; then + echo "hdf5 mpi test failed!" + exit 1 + else + echo "hdf5 mpi test passed!" + fi + #mpirun.mpich -np 4 python mpi_example_compressed.py + mpirun -np 4 --oversubscribe python mpi_example_compressed.py + if [ $? -ne 0 ] ; then + echo "hdf5 compressed mpi test failed!" + exit 1 + else + echo "hdf5 compressed mpi test passed!" + fi + + - name: Stubtest + run: | + stubtest netCDF4 --allowlist .github/stubtest-allowlist --mypy-config-file=pyproject.toml + mypy test + mypy examples diff --git a/.github/workflows/build_old.yml b/.github/workflows/build_old.yml new file mode 100644 index 000000000..dba7cc57e --- /dev/null +++ b/.github/workflows/build_old.yml @@ -0,0 +1,115 @@ +name: Build and Test Linux with older netcdf-c +on: [push, pull_request] +jobs: + build-linux: + name: Python (${{ matrix.python-version }}) + runs-on: ubuntu-latest + env: + PNETCDF_VERSION: 1.12.1 + NETCDF_VERSION: 4.7.4 + NETCDF_DIR: ${{ github.workspace }}/.. + NETCDF_EXTRA_CONFIG: --enable-pnetcdf + #CC: mpicc.mpich + CC: mpicc + #NO_NET: 1 + strategy: + matrix: + python-version: ["3.14"] + steps: + + - uses: actions/checkout@v6 + with: + submodules: true + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.python-version }} + + - name: Install Ubuntu Dependencies + run: | + sudo apt-get update + #sudo apt-get install mpich libmpich-dev libhdf5-mpich-dev libcurl4-openssl-dev bzip2 libsnappy-dev libblosc-dev libzstd-dev + sudo apt-get install openmpi-common libopenmpi-dev openmpi-bin libhdf5-openmpi-dev libcurl4-openssl-dev bzip2 libsnappy-dev libblosc-dev libzstd-dev + echo "Download and build PnetCDF version ${PNETCDF_VERSION}" + wget https://parallel-netcdf.github.io/Release/pnetcdf-${PNETCDF_VERSION}.tar.gz + tar -xzf pnetcdf-${PNETCDF_VERSION}.tar.gz + pushd pnetcdf-${PNETCDF_VERSION} + ./configure --prefix $NETCDF_DIR --enable-shared --disable-fortran --disable-cxx + make -j 2 + sudo make install + popd + echo "Download and build netCDF version ${NETCDF_VERSION}" + #wget https://downloads.unidata.ucar.edu/netcdf-c/${NETCDF_VERSION}/netcdf-c-${NETCDF_VERSION}.tar.gz + wget https://www.gfd-dennou.org/arch/netcdf/unidata-mirror/netcdf-c-${NETCDF_VERSION}.tar.gz + tar -xzf netcdf-c-${NETCDF_VERSION}.tar.gz + pushd netcdf-c-${NETCDF_VERSION} + #export CPPFLAGS="-I/usr/include/hdf5/mpich -I${NETCDF_DIR}/include" + export CPPFLAGS="-I/usr/include/hdf5/openmpi -I${NETCDF_DIR}/include" + export LDFLAGS="-L${NETCDF_DIR}/lib" + #export LIBS="-lhdf5_mpich_hl -lhdf5_mpich -lm -lz" + export LIBS="-lhdf5_openmpi_hl -lhdf5_openmpi -lm -lz" + ./configure --prefix $NETCDF_DIR --enable-netcdf-4 --enable-shared --enable-dap --enable-parallel4 $NETCDF_EXTRA_CONFIG + make -j 2 + sudo make install + popd + +# - name: The job has failed +# if: ${{ failure() }} +# run: | +# cd netcdf-c-${NETCDF_VERSION} +# cat config.log + + - name: Install python dependencies via pip + run: | + python -m pip install --upgrade pip + python -m pip install numpy cython cftime pytest twine wheel check-manifest mpi4py typing-extensions + + - name: Install netcdf4-python + run: | + export PATH=${NETCDF_DIR}/bin:${PATH} + export NETCDF_PLUGIN_DIR=${{ github.workspace }}/netcdf-c-${NETCDF_VERSION}/plugins/plugindir + python -m pip install . --no-build-isolation + + - name: Test + run: | + export PATH=${NETCDF_DIR}/bin:${PATH} + python checkversion.py + # serial + cd test + python run_all.py + # parallel (hdf5 for netcdf4, pnetcdf for netcdf3) + cd ../examples + #mpirun.mpich -np 4 python mpi_example.py + mpirun -np 4 --oversubscribe python mpi_example.py + if [ $? -ne 0 ] ; then + echo "hdf5 mpi test failed!" + exit 1 + else + echo "hdf5 mpi test passed!" + fi + #mpirun.mpich -np 4 python mpi_example_compressed.py + mpirun -np 4 --oversubscribe python mpi_example_compressed.py + if [ $? -ne 0 ] ; then + echo "hdf5 compressed mpi test failed!" + exit 1 + else + echo "hdf5 compressed mpi test passed!" + fi + #mpirun.mpich -np 4 python mpi_example.py NETCDF3_64BIT_DATA + mpirun -np 4 --oversubscribe python mpi_example.py NETCDF3_64BIT_DATA + if [ $? -ne 0 ] ; then + echo "pnetcdf mpi test failed!" + exit 1 + else + echo "pnetcdf mpi test passed!" + fi + +# - name: Tarball +# run: | +# export PATH=${NETCDF_DIR}/bin:${PATH} +# python setup.py --version +# check-manifest --version +# check-manifest --verbose +# pip wheel . -w dist --no-deps +# twine check dist/* diff --git a/.github/workflows/cibuildwheel.yml b/.github/workflows/cibuildwheel.yml new file mode 100644 index 000000000..517d19e27 --- /dev/null +++ b/.github/workflows/cibuildwheel.yml @@ -0,0 +1,214 @@ +name: Wheels + +on: + pull_request: + push: + tags: + - "v*" + release: + types: + - published + +permissions: + contents: read + +jobs: + + build_sdist: + name: Build source distribution + runs-on: ubuntu-22.04 + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v6 + name: Install Python + with: + python-version: 3.x + + - name: Install APT packages + if: contains(${{ matrix.os }}, 'ubuntu') + run: | + sudo apt update + sudo apt install libhdf5-dev libnetcdf-dev + + - name: Build sdist + run: > + pip install build + && python -m build --sdist . --outdir dist + + - uses: actions/upload-artifact@v6 + with: + name: pypi-artifacts + path: ${{ github.workspace }}/dist/*.tar.gz + + + build_bdist: + name: "Build ${{ matrix.os }} (${{ matrix.arch }}) wheels" + runs-on: ${{ matrix.os }} + # Prevent hanging when building from emulation like aarch64. + timeout-minutes: 300 + strategy: + fail-fast: false + matrix: + include: + - os: ubuntu-22.04 + arch: x86_64 + - os: ubuntu-24.04-arm + arch: aarch64 + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + # For aarch64 support + # https://cibuildwheel.pypa.io/en/stable/faq/#emulation + - uses: docker/setup-qemu-action@v3 + with: + platforms: all + if: runner.os == 'Linux' && matrix.arch == 'aarch64' + + - name: Build oldest and newest Python + shell: bash + # On PRs we run only oldest and newest Python versions to reduce CI load. + # Skips pypy and musllinux everywhere. + # We are building 310, 311 and 314 for now. + # (3.11 is the oldest version for which we support abi3 wheels) + # These needs to rotate every new Python release. + run: | + set -x + echo "CIBW_BUILD=cp310-* cp311-* cp314-* cp314t-*" >> $GITHUB_ENV + set +x + + if: ${{ github.event_name }} == "pull_request" + + - name: "Building ${{ matrix.os }} (${{ matrix.arch }}) wheels" + uses: pypa/cibuildwheel@v3.3.1 + env: + CIBW_ARCHS: ${{ matrix.arch }} + + - uses: actions/upload-artifact@v6 + with: + name: pypi-artifacts-${{ matrix.os }}-${{ matrix.arch }} + path: ${{ github.workspace }}/wheelhouse/*.whl + + + build_wheels_winmac: + name: Build wheels for ${{matrix.arch}} on ${{ matrix.os }} + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - os: windows-latest + arch: AMD64 + - os: macos-14 + arch: arm64 + - os: macos-15-intel + arch: x86_64 + + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v6 + name: Install Python + with: + python-version: 3.x + + - name: Setup Micromamba Python ${{ matrix.python-version }} + uses: mamba-org/setup-micromamba@v2 + with: + environment-name: build + init-shell: bash + create-args: >- + python=${{ matrix.python-version }} libnetcdf=4.9.3 --channel conda-forge + + - name: Build wheels for Windows/Mac + uses: pypa/cibuildwheel@v3.3.1 + env: + CIBW_ARCHS: ${{ matrix.arch }} + + - uses: actions/upload-artifact@v6 + with: + name: pypi-artifacts-${{ matrix.os }}-${{ matrix.arch }} + path: ${{ github.workspace }}/wheelhouse/*.whl + + + build_wheels_windows_arm: + name: Build wheels for ARM64 on Windows + runs-on: windows-11-arm + steps: + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - uses: actions/setup-python@v6 + name: Install Python + with: + python-version: 3.x + + - name: Install vcpkg dependencies + shell: pwsh + run: | + # Install vcpkg + git clone https://github.com/Microsoft/vcpkg.git C:\vcpkg + cd C:\vcpkg + .\bootstrap-vcpkg.bat + + # Install netcdf and dependencies + .\vcpkg.exe install hdf5:arm64-windows netcdf-c:arm64-windows zlib:arm64-windows + + # Set environment variables for build + echo "HDF5_DIR=C:\vcpkg\installed\arm64-windows" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + echo "NETCDF4_DIR=C:\vcpkg\installed\arm64-windows" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append + + - name: Build wheels for Windows ARM64 + uses: pypa/cibuildwheel@v3.3.1 + env: + CIBW_ARCHS: ARM64 + CIBW_SKIP: "cp310-*" + + - uses: actions/upload-artifact@v6 + with: + name: pypi-artifacts-windows-11-arm-ARM64 + path: ${{ github.workspace }}/wheelhouse/*.whl + + + show-artifacts: + needs: [build_bdist, build_sdist, build_wheels_winmac, build_wheels_windows_arm] + name: "Show artifacts" + runs-on: ubuntu-22.04 + steps: + - uses: actions/download-artifact@v7 + with: + pattern: pypi-artifacts* + path: ${{ github.workspace }}/dist + merge-multiple: true + + - shell: bash + run: | + ls -lh ${{ github.workspace }}/dist + + + publish-artifacts-pypi: + needs: [build_bdist, build_sdist, build_wheels_winmac, build_wheels_windows_arm] + name: "Publish to PyPI" + runs-on: ubuntu-22.04 + # upload to PyPI for every tag starting with 'v' + if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v') + steps: + - uses: actions/download-artifact@v7 + with: + pattern: pypi-artifacts* + path: ${{ github.workspace }}/dist + merge-multiple: true + + - uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_PASSWORD }} + print_hash: true diff --git a/.github/workflows/miniconda.yml b/.github/workflows/miniconda.yml new file mode 100644 index 000000000..220fdf57c --- /dev/null +++ b/.github/workflows/miniconda.yml @@ -0,0 +1,107 @@ +name: Build and Test + +on: + pull_request: + push: + branches: [master] + +jobs: + run-serial: + runs-on: ${{ matrix.os }} + #env: + # NO_NET: 1 + strategy: + matrix: + python-version: [ + ["python", "3.10"], + ["python", "3.11"], + ["python", "3.12"], + ["python", "3.13"], + ["python", "3.14"], + ["python-freethreading", "3.14" ], + ] + os: [windows-latest, ubuntu-latest, macos-latest] + platform: [x64, x32] + exclude: + - os: macos-latest + platform: x32 + fail-fast: false + defaults: + run: + shell: bash -l {0} + + steps: + - uses: actions/checkout@v6 + with: + submodules: true + + - name: Setup Micromamba + uses: mamba-org/setup-micromamba@v2 + with: + environment-name: TEST + init-shell: bash + create-args: >- + ${{ matrix.python-version[0] }}=${{ matrix.python-version[1] }} + numpy cython pip setuptools pytest hdf5 libnetcdf cftime zlib certifi typing-extensions + --channel conda-forge + + - name: Install netcdf4-python + run: | + export PATH="${CONDA_PREFIX}/bin:${CONDA_PREFIX}/Library/bin:$PATH" # so setup.py finds nc-config + python -m pip install -v -e . --no-deps --no-build-isolation --force-reinstall + + - name: Tests + run: | + if [ "$RUNNER_OS" == "Windows" ]; then + export HDF5_PLUGIN_PATH="${CONDA_PREFIX}\\Library\\hdf5\\lib\\plugin" + else + export HDF5_PLUGIN_PATH="${CONDA_PREFIX}/hdf5/lib/plugin/" + fi + pytest -s -rxs -v test + + run-mpi: + runs-on: ${{ matrix.os }} + strategy: + matrix: + python-version: [ "3.12" ] + os: [ubuntu-latest] + platform: [x64] + defaults: + run: + shell: bash -l {0} + steps: + - uses: actions/checkout@v6 + with: + submodules: true + + - name: Setup Micromamba + uses: mamba-org/setup-micromamba@v2 + with: + environment-name: TEST + init-shell: bash + create-args: >- + python=${{ matrix.python-version }} + numpy cython pip pytest openmpi mpi4py hdf5=*=mpi* libnetcdf=*=mpi* cftime zlib certifi typing-extensions + --channel conda-forge + + - name: Install netcdf4-python with mpi + run: | + export PATH="${CONDA_PREFIX}/bin:${CONDA_PREFIX}/Library/bin:$PATH" # so setup.py finds nc-config + nc-config --all + python -m pip install -v -e . --no-build-isolation --no-deps --force-reinstall + + - name: Tests + run: | + cd test && python run_all.py + cd ../examples + export PATH="${CONDA_PREFIX}/bin:${CONDA_PREFIX}/Library/bin:$PATH" + which mpirun + mpirun --version + mpirun -np 4 --oversubscribe python mpi_example.py # for openmpi + #mpirun -np 4 python mpi_example.py + if [ $? -ne 0 ] ; then + echo "hdf5 mpi test failed!" + exit 1 + else + echo "hdf5 mpi test passed!" + fi diff --git a/.gitignore b/.gitignore index 89fdd2a41..219570adf 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,15 @@ build/ *.pyc dist/ *.egg-info/ -netCDF4/_netCDF4.c +__pycache__ +.mypy_cache +src/netCDF4/*.c +src/netCDF4/*.so +src/netCDF4/*.pyd include/constants.pyx +include/parallel_support_imports.pxi netcdftime/_netcdftime.c +venv/ +.eggs/ +.idea/ +.vscode/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 000000000..e69de29bb diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 8c7aa5af4..000000000 --- a/.travis.yml +++ /dev/null @@ -1,66 +0,0 @@ -language: python - -sudo: false - -addons: - apt: - packages: - - libhdf5-serial-dev - - netcdf-bin - - libnetcdf-dev - -env: - global: - - DEPENDS="numpy>=1.9.0 cython>=0.21 setuptools>=18.0" - - NO_NET=1 - - MPI=0 - -python: - - "2.7" - - "3.5" - - "3.6" - -matrix: - allow_failures: - - python: "3.7-dev" - include: - # Absolute minimum dependencies. - - python: 2.7 - env: - - DEPENDS="numpy==1.9.0 cython==0.21 ordereddict==1.1 setuptools==18.0" - # test MPI - - python: 2.7 - env: - - MPI=1 - - CC=mpicc - - DEPENDS="numpy>=1.9.0 cython>=0.21 setuptools>=18.0 mpi4py>=1.3.1" - - NETCDF_VERSION=4.4.1.1 - - NETCDF_DIR=$HOME - - PATH=${NETCDF_DIR}/bin:${PATH} # pick up nc-config here - addons: - apt: - packages: - - openmpi-bin - - libopenmpi-dev - - libhdf5-openmpi-dev - -notifications: - email: false - -before_install: - - pip install $DEPENDS - -install: - - if [ $MPI -eq 1 ] ; then ci/travis/build-parallel-netcdf.sh; fi - - python setup.py build - - python setup.py install - -script: - - | - if [ $MPI -eq 1 ] ; then - cd examples - mpirun -np 4 python mpi_example.py - cd .. - fi - - cd test - - python run_all.py diff --git a/COPYING b/COPYING deleted file mode 100644 index 2cc338aca..000000000 --- a/COPYING +++ /dev/null @@ -1,39 +0,0 @@ -copyright: 2008 by Jeffrey Whitaker. - -Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby granted, -provided that the above copyright notice appear in all copies and that -both the copyright notice and this permission notice appear in -supporting documentation. - -THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO -EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF -USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR -OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. - - -parts of pyiso8601 are included in netcdftime under the following license: - -Copyright (c) 2007 Michael Twomey - -Permission is hereby granted, free of charge, to any person obtaining a -copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be included -in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Changelog b/Changelog index 89f8a9c38..55bc6b0db 100644 --- a/Changelog +++ b/Changelog @@ -1,9 +1,321 @@ - version 1.3.2 (not yet released) + version 1.7.4.1 (tag v1.7.4.1rel) + ================================= + * Change default encoding for stringtochar/chartostring functions from 'utf-8' to 'utf-8'/'ascii' for dtype.kind='U'/'S' + (issue #1464). + + version 1.7.4 (tag v1.7.4rel) + ================================ + * Make sure automatic conversion of character arrays <--> string arrays works for Unicode strings (issue #1440). + (previously only worked correctly for encoding="ascii"). + * Add netcdf plugins (blosc, zstd, bzip2) in wheels. Blosc plugin doesn't work in Windows wheels. + Macos wheels now use conda provided libs. (PR #1450) + * Add windows/arm (PR #1453) and free-threaded python wheels (issue #1454). Windows wheels now use netcdf-c 4.9.3. + WARNING: netcdf-c is not thread-safe and netcdf4-python does have internal locking so expect segfaults if you + use netcdf4-python on multiple threads with free-threaded python. Users must exercise care to only call netcdf from + a single thread. + + version 1.7.3 (tag v1.7.3rel) + ============================= + * Python 3.14 wheels (issue #1432) + * support os.PathLike arguments for `Dataset.fromcdl` and raise a `FileNotFoundError` + if the cdl is missing and a `FileExistsError` if the nc file already exists (PR #1387) + * raise more informative error when trying to iterate or + perform a membership operation on a Dataset (issue #1383) + * fix type hint for createEnumType (issue #1378) + * add python 3.13 to windows wheel builds (PR #1377) + * allow slicing of vlen and string variables with non-unitary strides (issue #1408). + + version 1.7.2 (tag v1.7.2rel) + ============================= + * add static type hints (PRs #1302, #1349) + * Expose nc_rc_set, nc_rc_get (via rc_set, rc_get module functions). (PR #1348) + * Add Variable.get_fill_value and allow `fill_value='default'` to + set `_FillValue` attribute using default fill values. (issue #1374, PR #1375). + * Fix NETCDF3 endian error (issue #1373, PR #1355). + + version 1.7.1 (tag v1.7.1rel) +=============================== + * include nc_complex source code from v0.2.0 tag (instead of using submodule). + * add aarch64 wheels. + + version 1.7.0 (tag v1.7.0rel) +=============================== + * add support for complex numbers via `auto_complex` keyword to `Dataset` (PR #1295) + * fix for deprecated Cython `DEF` and `IF` statements using compatibility header + with shims for unavailable functionality (PR #1277) + * use `szip` as the library name on Windows (PR #1304) + * add support for MS-MPI `MPI_Message` detection (PR #1305) + * fix for issue #1306 - surprising result when indexing vlen str with non-contiguous + indices. + * Fix bug in set_collective introduced in PR #1277 (collective mode was + always set). + + version 1.6.5 (tag v1.6.5rel) +=============================== + * fix for issue #1271 (mask ignored if bool MA assigned to uint8 var) + * include information on specific object when reporting errors from netcdf-c + * python 3.12 wheels added, support for python 3.7 removed. + + version 1.6.4 (tag v1.6.4rel) +=============================== + * set path to SSL certificates internally, so https DAP URLs work with wheels + (issue #1246, requires nc_rc_set function available starting with netcdf-c + 4.9.1, plus bugfix in netcdf-c PR #2690). + Added certifi as a dependency. + * Added `isopen` method to `MFDataset` object to check if underlying files are open. + + version 1.6.3 (tag v1.6.3rel) +============================== + * Use ``nc_put_vars`` for strided writes for netcdf-c >= 4.6.2 (issue #1222). + * _Unsigned="false" should be same as not having _Unsigned set (issue #1232). + _Unsigned now must be set to "true" or "True" for variable to be interpreted + as unsigned, instead of just having _Unsigned be set (to anything). + * pypi wheels built with netcdf-c 4.9.1. + + version 1.6.2 (tag v1.6.2rel) +============================== + * Added ``netCDF4.__has_set_alignment__`` property to help identify if the + underlying netcdf4 supports setting the HDF5 alignment. + * Slicing multi-dimensional variables with an all False boolean index array + now returns an empty numpy array (instead of raising an exception - issue #1197). + Behavior now consistent with numpy slicing. + * fix problem with compiling using netcdf-c < 4.9.0 (issue #1209) + * pypi wheels build with netcdf-c 4.9.0. + + version 1.6.1 (tag v1.6.1rel) +============================== + * add Dataset methods has__filter (where =zstd,blosc,bzip2,szip) + to check for availability of extra compression filters. + * release GIL for all C-lib calls (issue #1180). + * Add support for nc_set_alignment and nc_get_alignment to control alignment + of data within HDF5 files. + + version 1.6.0 (tag v1.6.0rel) +============================== + * add support for new quantization functionality in netcdf-c 4.9.0 via "signficant_digits" + and "quantize_mode" kwargs in Dataset.createVariable. Default quantization_mode is "BitGroom", + but alternate methods "BitRound" and GranularBitRound" also supported. + * opening a Dataset in append mode (mode = 'a' or 'r+') creates a Dataset + if one does not already exist (similar to python open builtin). Issue #1144. + Added a mode='x' option (as in python open) which is the same as mode='w' with + clobber=False. + * allow createVariable to accept either Dimension instances or Dimension + names in "dimensions" tuple kwarg (issue #1145). + * remove all vestiges of python 2 in _netCDF4.pyx and set cython language_level + directive to 3 in setup.py. + * add 'compression' kwarg to createVariable to enable new compression + functionality in netcdf-c 4.9.0. 'None','zlib','szip','zstd','bzip2' + 'blosc_lz','blosc_lz4','blosc_lz4hc','blosc_zlib' and 'blosc_zstd' + are currently supported. 'blosc_shuffle', + 'szip_mask' and 'szip_pixels_per_block' kwargs also added. + compression='zlib' is equivalent to (the now deprecated) zlib=True. + If the environment variable NETCDF_PLUGIN_DIR is set to point to the + directory with the compression plugin lib__nc* files, then the compression plugins will + be installed within the package and be automatically available (the binary + wheels have this). Otherwise, the environment variable HDF5_PLUGIN_PATH + needs to be set at runtime to point to plugins in order to use the new compression + options. + * MFDataset did not aggregate 'name' variable attribute (issue #1153). + * issue warning instead of raising an exception if missing_value or + _FillValue can't be cast to the variable type when creating a + masked array (issue #1152). + * Define MPI_Session for compatibility with current mpi4py (PR #1156). + + version 1.5.8 (tag v1.5.8rel) +============================== + * Fix Enum bug (issue #1128): the enum_dict member of an EnumType read from a file + contains invalid values when the enum is large enough (more than 127 or 255 + members). + * Binary wheels for aarch64 and python 3.10. + + version 1.5.7 (tag v1.5.7rel) +============================== + * don't try to mask vlens with default _FillValue, since vlens don't have a default _FillValue. + This gets rid of numpy DeprecationWarning (issue #1099). + * update docs to reflect the fact that a variable must be in collective mode before writing + compressed data to it in parallel. Added a test for this (examples/mpi_example_compressed.py). + Issue #1108. + * Fix OverflowError when dimension sizes become greater than 2**32-1 elements on Windows (Issue #1112). + * Don't return masked arrays for vlens (only for primitive and enum types - issue #1115). + + version 1.5.6 (tag v1.5.6rel) +============================== + * move CI/CD tests from travis/appveyor to Github Actions (PR #1061). + * move netCDF4 dir under src so module can be imported in source directory (PR #1062). + * change numpy.bool to numpy.bool_ and numpy.float to numpy.float_ (float and + bool are deprecated in numpy 1.20, issue #1065) + * clean up docstrings so that they work with latest pdoc. + * update cython numpy API to remove deprecation warnings. + * Add "fromcdl" and "tocdl" Dataset methods for import/export of CDL + via ncdump/ncgen called externally via the subprocess module (issue #1078). + * remove python 2.7 support. + * broadcast data (if possible)to conform to variable shape when writing to a slice + (issue #1083). + + version 1.5.5.1 (tag v1.5.5.1rel) +================================== + * rebuild binary wheels for linux and OSX to link netcdf-c 4.7.4 and hdf5 1.12.0. + + version 1.5.5 (tag v1.5.5rel) +============================== + * have setup.py always try use nc-config first to find paths to netcdf and + hdf5 libraries and headers. Don't use pkg-config to find HDF5 if HDF5 env + vars are set (or read from setup.cfg). + * Change MIT license text to standard OSI wording (PR #1046). + + version 1.5.4 (tag v1.5.4rel) +============================== + * fix printing of variable objects for variables that end with the letter 'u' + (issue #983). + * make sure root group has 'name' attribute (issue #988). + * add the ability to pack vlen floats to integers using + scale_factor/add_offset (issue #1003) + * use len instead of deprecated numpy.alen (issue #1008) + * check size on valid_range instead of using len (issue #1013). + * add `set_chunk_cache/get_chunk_cache` module functions to reset the + default chunk cache sizes before opening a Dataset (issue #1018). + * replace use of numpy's deprecated tostring() method with tobytes() + (issue #1023). + * bump minimal numpy version to 1.9 (first version to have tobytes()). + + version 1.5.3 (tag v1.5.3rel) +============================== + * make sure arrays are masked that are not filled when auto_fill is off + (issue #972). + * python 3.8 binary wheels. + + version 1.5.2 (tag v1.5.2rel) +============================== + * fix for scaling bug when _Unsigned attribute is set and byteorder of data + does not match native byteorder (issue #930). + * revise documentation for Python 3 (issue #946). + * establish support for Python 2.7, 3.5, 3.6 and 3.7 (issue #948). + * use dict built-in instead of OrderedDict for Python 3.7+ + (pull request #955). + * remove underline ANSI in Dataset string representation (pull request #956). + * remove newlines from string representation (pull request #960). + * fix for issue #957 (size of scalar var is a float since numpy.prod(())=1.0). + * make sure Variable.setncattr fails to set _FillValue (issue #959). + * fix detection of parallel HDF5 support with netcdf-c 4.6.1 (issue #964). + + version 1.5.1.2 (tag v1.5.1.2rel) +================================== + * fix another slicing bug introduced by the fix to issue #906 (issue #922). + + version 1.5.1.1 (tag v1.5.1.1rel) +================================== + * fixed __version__ attribute (was set incorrectly in 1.5.1 release). + * fix for issue #919 (assigning 2d array to 3d variable with singleton + first dimension with v[:] = a). + * minimum numpy changed from 1.9.0 to 1.10.0. + + version 1.5.1 (tag v1.5.1rel) +============================== + * fix issue #908 by adding workaround for incorrect value returned + by nc_inq_var_fill for netcdf-c < 4.5.1. + * fix bug writing slice to unlimited dimension that is not the first + (leftmost). Issue #906. + * make sure data gets converted to type of scale_factor when add_offset=0 + and scale_factor=1 (issue #913). + * fix for reading empty (NIL) string attributes (issue #915). + + version 1.5.0.1 (tag v1.5.0.1rel) +================================== + * binary wheels for linux and macosx rebuilt against netcdf-c 4.6.3 (instead + of 4.4.1.1). + * add read-shared mode (mode='rs'). Significantly speeds up reads of NETCDF3 + files (pull request #902). + + version 1.5.0 (tag v1.5.0rel) +=============================== + * added support for parallel IO in the classic netcdf-3 formats through the + pnetcdf library (pull request #897). + + version 1.4.3.2 (tag v1.4.3.2) +=============================== + * include missing membuf.pyx file in release source tarball. + + version 1.4.3.1 (tag v1.4.3.1) +=============================== + * fix bug in implementation of NETCDF4_CLASSIC support for parallel IO + in v1.4.3 release. + + version 1.4.3 (tag v1.4.3rel) +============================= + * make set_always_mask work in MFDataset. + * fix saving diskless files to disk with netcdf-c >= 4.6.2. + * write to an in-memory Dataset, memoryview buffer returned by Dataset.close() + (issue #865, requires netcdf-c >= 4.6.2) + * fix performance regression when using large sequences of consecutive + integers for indexing with netcdf-c >= 4.6.2 (issue #870). + * improved error messages for ncinfo and other utilities (issue #873). + * fix for int64 attributes not being created for NETCDF3_64BIT_DATA (CDF5) + files (issue #878). + * fix for MPI parallel error ("NetCDF: Attempt to use feature that was not + turned on when netCDF was built") using netcdf-c 4.6.2 (issue #883). + * Added methods `set_ncstring_attrs()` to Dataset, Group and Variable that + forces all text attributes to be written as variable length strings (netCDF + type NC_STRING - issue #882). + * Allow parallel mode with NETCDF4_CLASSIC files (issue #890). + + version 1.4.2 (tag v1.4.2rel) +============================= + * add get_dims Variable method (issue #824) + * make sure format keyword not ignored when mode is 'ws' (issue #827) + * fix numpy FutureWarning (non-tuple sequence for + multidimensional indexing is deprecated), issue #833. + * add 'master_file' kwarg to MFDataset.__init__ (issue #835). + * always use nc_get_vars for strided access over OpenDAP (issue #838). + * raise FutureWarning when trying to set multi-dimensional array attribute + while still silently flattening the array (issue #841). Will change + to ValueError in next release (1.4.3). + * fix parallel writes when both nc4 parallel and pnetcdf parallel options + enabled in the netcdf-c library (issue #820). + * fix for writing masked scalar character variable (issue #850). + + version 1.4.1 (tag v1.4.1rel) +============================= + * disable workaround for slow nc_get_vars for __netcdflibversion__ >= 4.6.2, + since a fix was added to speed up nc_get_vars in the C library. Issue 680. + * new Dataset and Variable methods (set_always_mask) to optionally + re-enable old behaviour (return masked arrays + only if selected slice contains missing values) (issue #809). + + version 1.4.0 (tag v1.4.0rel) ============================= * fixed bug in detection of CDF5 library support in setup.py (pull request #736, issue #713). * fixed reading of variables with zero-length dimensions in NETCDF3_CLASSIC files (issue #743). + * allow integer-like objects in VLEN slices (not just python ints, issue + #526, pull request #757). + * treating _FillValue as a valid_min/valid_max was too surprising, despite + the fact the thet netcdf docs 'attribute best practices' suggests that + clients should to this. Revert this change from issue #576 (issue #761). + * remove netcdftime, since it is now a separate package. date2num, num2date + and date2index still importable from netCDF4. + * fix 'Unreachable code' cython warning (issue #767). + * Change behavior of string attributes so that nc.stringatt = ['foo','bar'] + produces an vlen string array attribute in NETCDF4, instead of concatenating + into a single string ('foobar'). In NETCDF3/NETCDF4_CLASSIC, an IOError + is now raised, instead of writing 'foobar'. Issue #770. + * fix loading of enum type names (issue #775). + * make sure missing_value applies only to scaled short integers if + auto-scaling is on (issue #777). + * automatically create views of compound types with character arrays as + numpy strings (issue #773). Can be disabled using + 'set_auto_chartostring(False)'. Numpy structured + array dtypes with 'SN' string subtypes can now be used to + define netcdf compound types (they get converted to ('S1',N) + character array types automatically). + * always return masked array by default, even if there are no + masked values (too surprising to get ndarray or MaskedArray depending + on slice, issue #785). + * treat valid_min/valid_max/_FillValue/missing_value as unsigned + integers if _Unsigned is set (to mimic behaviour of netcdf-java). + Conversion to unsigned type now occurs before masking and scale/offset + operation. Issue #794. + version 1.3.1 (tag v1.3.1rel) ============================= @@ -22,20 +334,20 @@ version 1.3.0 (tag v1.3.0rel) ============================== - * always search for HDF5 headers when building, even when nc-config is used + * always search for HDF5 headers when building, even when nc-config is used (since nc-config does not always include the path to the HDF5 headers). Also use H5get_libversion to obtain HDF5 version info instead of H5public.h. Fixes issue #677. * encoding kwarg added to Dataset.__init__ and Dataset.filepath (default is to use sys.getfilesystemencoding()) so that oddball - encodings (such as cp1252 on windows) can be handled in Dataset + encodings (such as cp1252 on windows) can be handled in Dataset filepaths (issue #686). * Calls to nc_get_vars are avoided, since nc_get_vars is very slow (issue #680). Strided slices are now converted to multiple calls to nc_get_vara. This speeds up strided slice reads by a factor of 10-100 (especially for NETCDF4/HDF5 files) in most cases. In some cases, strided reads using nc_get_vars are faster (e.g. strided reads over many dimensions - such as var[:,::2,::2,::2])), so a variable method use_nc_get_vars was added. + such as var[:,::2,::2,::2])), so a variable method use_nc_get_vars was added. var.use_nc_get_vars(True) will tell the library to use nc_get_vars instead of multiple calls to nc_get_vara, which was the default behaviour previous to this change. @@ -43,10 +355,10 @@ exactly backwards (issue #685 - thanks to @pgamez and @mdecker). * Fix error message for illegal ellipsis slicing, add test (issue #701). * Improve timezone format parsing in netcdftime - (https://github.com/Unidata/netcdftime/issues/17). + (https://github.com/Unidata/netcdftime/issues/17). * make sure numpy datatypes used to define CompoundTypes have isalignedstruct flag set to True (issue #705), otherwise. - segfaults can occur. Fix required raising them minimum numpy requirement + segfaults can occur. Fix required raising them minimum numpy requirement from 1.7.0 to 1.9.0. * ignore missing_value, _FillValue, valid_range, valid_min and valid_max when creating masked arrays if attribute cannot be safely @@ -59,8 +371,8 @@ * Fix for auto scaling and masking when _Unsigned attribute set (create view as unsigned type after scaling and masking). Issue #671. * Always mask values outside valid_min, valid_max (not just when - missing_value attribue present). Issue #672. - * Fix setup.py so pip install doesn't fail if cython not installed. + missing_value attribute present). Issue #672. + * Fix setup.py so pip install doesn't fail if cython not installed. setuptools >= 18.0 now required for installation (Issue #666). version 1.2.8 (tag v1.2.8rel) @@ -79,7 +391,7 @@ the chartostring utility function is used to convert the array of characters to an array of strings with one less dimension (the last dimension is interpreted as the length of each string) when reading the - data. When writing the data, stringtochar is used to convert a numpy + data. When writing the data, stringtochar is used to convert a numpy array of fixed length strings to an array of characters with one more dimension. chartostring and stringtochar now also have an 'encoding' kwarg. Automatic conversion to/from character to string arrays can be turned off @@ -99,7 +411,7 @@ version 1.2.6 (tag v1.2.6rel) ============================== - * fix some test failures on big endian PPC64 that were due to + * fix some test failures on big endian PPC64 that were due to errors in byte-swapping logic. Also fixed bug in enum code exposed on PPC64 (issue #608). * remove support for python 2.6 (it probably still will work for a while @@ -133,7 +445,7 @@ reading, a vlen string array attribute is returned as a list of strings. To write, use var.setncattr_string("name", ["two", "strings"]).) * Fix for issue #596 - julian day calculations wrong for negative years, - caused incorrect rountrip num2date(date2num(date)) roundtrip for dates with year + caused incorrect roundtrip num2date(date2num(date)) roundtrip for dates with year < 0. * Make sure negative years work in utime.num2date (issue #596). * raise NotImplementedError when trying to pickle Dataset, Variable, @@ -146,13 +458,13 @@ byte order before passing to netcdf-c library. Data read from variable with non-native byte order is also byte-swapped, so that dtype remains consistent with netcdf variable. Behavior now consistent with h5py. - * raise warning for HDF5 1.10.x (issue #549), since backwards + * raise warning for HDF5 1.10.x (issue #549), since backwards incompatible files may be created. * raise AttributeError instead of RuntimeError when attribute operation - fails. raise IOError instead of RuntimeError when nc_create or + fails. raise IOError instead of RuntimeError when nc_create or nc_open fails (issue #546). * Use NamedTemporaryFile instead of deprecated mktemp in tests - (pull request #543). + (pull request #543). * add AppVeyor automated windows tests (pull request #540). version 1.2.3.1 (tag v1.2.3.1rel) @@ -177,17 +489,17 @@ NETCDF3_64BIT_DATA format and filepath Dataset method). * expose netcdftime.__version__ (issue #504). * fix potential memory leak in Dataset.filepath in attempt to fix - mysterious segfaults on CentOS6 (issue #506). Segfaults + mysterious segfaults on CentOS6 (issue #506). Segfaults can apparently still occur on systems like CentOS6 with old versions of glibc. version 1.2.2 (tag v1.2.2rel) ============================= * fix failing tests on python 2.6 (issue #497). Change minimum required python from 2.5 to 2.6. - * Potential memory leaks fixed by freeing string pointers internally allocated + * Potential memory leaks fixed by freeing string pointers internally allocated in netcdf-c using nc_free_string. Also use nc_free_vlens to free space allocated for vlens inside netcdf-c (issue #495). - * invoke str on filename argument to Dataset constructor, so pathlib + * invoke str on filename argument to Dataset constructor, so pathlib instances can be used (issue #489). * don't use hardwired NC_MAX_DIMS or NC_MAX_VARS internally to allocate space for dimension or variable ids. Instead, find out the number of dims @@ -206,7 +518,7 @@ * add 'size' attribute to Dimension (same as len(d), where d is a Dimension instance, issue #477). * fix bug in nc3tonc4 with --unpackshort=1 (issue #474). - * dates do not have to be contiguous, i.e. can be before and after the + * dates do not have to be contiguous, i.e. can be before and after the missing dates in Gregorian calendar (pull request #476). version 1.2.1 (tag v1.2.1rel) @@ -254,9 +566,9 @@ packing (set_auto_scale and set_auto_maskandscale) when writing. Pull request #435. * use USE_SETUPCFG env var to over-ride use of setup.cfg. If USE_SETUPCFG - evaluates to false, setup.cfg will not be used and all configuration + evaluates to false, setup.cfg will not be used and all configuration variables can be set from environment variables. Useful when using 'pip - install' and nc-config is broken (issue #438). + install' and nc-config is broken (issue #438). * fix for integer overflow in date2index (issue #444). version 1.1.8 (tag v1.1.8rel) @@ -274,18 +586,18 @@ will now return the existing group instance. Dataset.__getitem__ also added. nc['/path/to'] returns a Group instance, and nc['/path/to/var1'] returns a Variable - instance. + instance. * change minimum required numpy to 1.7.0, fix so all tests pass with 1.7.0. Added travis tests for minimum required cython, numpy (issue #404). * enable abbreviations to time units specification, as allowed in CF (issue #402). Now, instead of just 'seconds' and 'seconds', 'secs', 'sec' and 's' are also allowed (similar to minutes, days and hours). - * install utility scripts in utils directory with setuptools entry points + * install utility scripts in utils directory with setuptools entry points (pull request #392 from @mindw). Code for utilities moved to netCDF4_utils.py - makes utilities more windows-friendly. * make sure booleans are treated correctly in setup.cfg. Add use_cython (default True) to setup.cfg. If set to False, then - cython will not be used to compile netCDF4.pyx (existing netCDF4.c + cython will not be used to compile netCDF4.pyx (existing netCDF4.c will be used instead). * use "from Cython.Build import cythonize" instead of "from Cython.Distutils import build_ext" in setup.py (issue #393) @@ -293,9 +605,9 @@ https://github.com/cython/cython/wiki/enhancements-distutils_preprocessing). * unicode attributes now written as strings, not bytes (using nc_put_att_string instead of nc_put_att_text, issue #388). - * add __orthogonal_indexing__ attribute to Variable, Dataset and Group (issue #385) to + * add __orthogonal_indexing__ attribute to Variable, Dataset and Group (issue #385) to denote that Variable objects do not follow numpy indexing semantics for integer and - boolean array indices. + boolean array indices. * make sure application of scale_factor and add_offset works correctly when scale_factor not given (issue #381). * add man pages for nc3tonc4, nc4tonc3, ncinfo in man directory. @@ -313,14 +625,14 @@ to use it (otherwise compilation with fail). Issue 367. * add ipython notebooks from Unidata workshop in examples directory. * fix ellipsis variable slicing regression (issue 371). - * release the Global Interpreter Lock (GIL) when calling the C + * release the Global Interpreter Lock (GIL) when calling the C library for read operations. Speeds up multi-threaded reads - (issue 369). Caution - the HDF5 library may need to be compiled + (issue 369). Caution - the HDF5 library may need to be compiled with the threadsafe option to ensure that global data structures are not corrupted by simultaneous manipulation by different threads. * Make sure USE_NCCONFIG environment variable takes precedence over value of use_ncconfig in setup.cfg. With this change, 'pip install netCDF4' - with USE_NCCONFIG=1 will use environment variables to find paths to + with USE_NCCONFIG=1 will use environment variables to find paths to libraries and include files, instead of relying on nc-config (issue #341). version 1.1.6 (tag v1.1.6rel) @@ -333,13 +645,13 @@ * make calendar name keyword for num2date/date2num case insensitive (issue 362). * make sure units parser returns time-zone naive datetime instance that - includes UTC offset (issue 357). UTC offset was applied incorrectly in + includes UTC offset (issue 357). UTC offset was applied incorrectly in netcdftime.date2num and num2date. No longer need to depend on python-dateutil. version 1.1.5 (tag v1.1.5rel) ============================= - + * add dependency on python-dateutil in setup.py and install docs. * use python datetime in num2date and date2num whenever possible. Remove duplicate num2date and date2num functions from netcdftime. Addresses issue @@ -359,7 +671,7 @@ * speedup conversion of array indices to slices (issue #325). * fix for issue #330 (incorrect values for seconds returned by netcdftime). * fix reading of scalar vlen variables (issue #333). - * setting fill_value=False in createVariable for vlen and compound variables + * setting fill_value=False in createVariable for vlen and compound variables now does nothing, instead of causing an error when the Dataset is closed (issue #331). * cython will regenerate netCDF4.c when install is run, not just build. @@ -676,7 +988,7 @@ lib after the 4.2 release). Controlled by kwarg 'diskless' to netCDF4.Dataset (default False). diskless=True when creating a file results in a file that exists only in memory, closing the file - makes the data disapper, except if persist=True keyword given in + makes the data disappear, except if persist=True keyword given in which case it is persisted to a disk file on close. diskless=True when opening a file creates an in-memory copy of the file for faster access. @@ -914,7 +1226,7 @@ version 0.8.1 (svn revision 744) * Experimental variable-length (vlen) data type support added. - * changes to accomodate compound types in netcdf-4.1-beta snapshots. + * changes to accommodate compound types in netcdf-4.1-beta snapshots. Compound types now work correctly for snapshots >= 20090603. * Added __len__ method and 'size' property to Variable class. @@ -925,7 +1237,7 @@ version 0.8.1 (svn revision 744) * Fixed bug occurring when indexing with a numpy array of length 1. - * Fixed bug that occured when -1 was used as a variable index. + * Fixed bug that occurred when -1 was used as a variable index. * enabled 'shared access' mode for NETCDF3 formatted files (mode='ws', 'r+s' or 'as'). Writes in shared mode are unbuffered, which can @@ -1094,7 +1406,7 @@ version 0.7.3 (svn revision 501) to work as slice indices. * (netCDF4_classic only) try to make sure file is not left in 'define mode' - when execption is raised. + when exception is raised. * if slicing a variable results in a array with shape (1,), just return a scalar (except for compound types). diff --git a/LICENSE b/LICENSE new file mode 100644 index 000000000..cca96131a --- /dev/null +++ b/LICENSE @@ -0,0 +1,7 @@ +Copyright 2008 Jeffrey Whitaker + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/MANIFEST.in b/MANIFEST.in index f6f156aee..38a3c86a5 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,22 +1,30 @@ -recursive-include docs * +include docs/index.html recursive-include man * -recursive-include conda.recipe * +recursive-include external * include MANIFEST.in -include README.md -include COPYING +include README.htmldocs include Changelog -include appveyor.yml -include .travis.yml include setup.cfg include examples/*py -include examples/*ipynb include examples/README.md +exclude examples/data include test/*py include test/*nc -include netcdftime/__init__.py -include netcdftime/_netcdftime.pyx -include netCDF4/__init__.py -include netCDF4/_netCDF4.pyx -include netCDF4/utils.py +include src/netCDF4/__init__.py +include src/netCDF4/_netCDF4.pyx +exclude src/netCDF4/_netCDF4.c +include src/netCDF4/utils.py +include src/netCDF4/plugins/empty.txt +include src/netCDF4/py.typed +include src/netCDF4/*.pyi include include/netCDF4.pxi include include/mpi-compat.h +include include/membuf.pyx +include include/netcdf-compat.h +include include/no_parallel_support_imports.pxi.in +include include/parallel_support_imports.pxi.in +include *.md +include *.py +include *.release +include *.sh +include _build/*.py diff --git a/PKG-INFO b/PKG-INFO deleted file mode 100644 index ab725c698..000000000 --- a/PKG-INFO +++ /dev/null @@ -1,47 +0,0 @@ -Metadata-Version: 1.1 -Name: netCDF4 -Version: 1.3.2 -Author: Jeff Whitaker -Author-email: jeffrey s whitaker at noaa gov -Home-page: https://github.com/Unidata/netcdf4-python -Summary: python/numpy interface to netCDF library (versions 3 and 4) -License: OSI Approved -Description: netCDF version 4 has many features not found in earlier versions of the library - and is implemented on - top of HDF5. This module can read and write files in both the new netCDF 4 and - the old netCDF 3 - format, and can create files that are readable by HDF5 clients. The API modelled - after - Scientific.IO.NetCDF, and should be familiar to users of that module. - - Most new features of netCDF 4 are implemented, such as multiple unlimited - dimensions, groups and zlib data compression. All the new numeric data types - (such as 64 bit and unsigned integer types) are implemented. Compound, - variable length (vlen), and enumerated (enum) data types are supported, but - the opaque type is not. Mixtures of compound, vlen and/or enum data types are not supported. - - This project has a `Github repository - `_ where you may access - the most - up-to-date source. - - `Documentation - `_ - - `Changelog - `_ - - Also available in the `Anaconda scientific python distribution `_ - - Download source tarball and binary wheels below... -Keywords: numpy netcdf data science network oceanography meteorology climate -Platform: any -Classifier: Intended Audience :: Science/Research -Classifier: Programming Language :: Python :: 2 -Classifier: Programming Language :: Python :: 2.7 -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 -Classifier: Programming Language :: Python :: 3.4 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Topic :: Scientific/Engineering - diff --git a/README.gh-pages b/README.gh-pages deleted file mode 100644 index be44b3997..000000000 --- a/README.gh-pages +++ /dev/null @@ -1,15 +0,0 @@ -To update web docs at http://github.unidata.io/netcdf4-python: - -First install fork of pdoc from https://github.com/jswhit/pdoc (requires mako, -markdown, pygments and future). - -Then in netcdf4-python github clone directory (after building and -installing github master), - -* generate docs (sh create_docs.sh) -* copy docs/netCDF4/index.html up one level (cp docs/netCDF4/index.html ..) -* git checkout gh-pages -* cp ../index.html . -* git commit index.html -* git push origin gh-pages -* git checkout master diff --git a/README.htmldocs b/README.htmldocs new file mode 100644 index 000000000..a4ba23ee0 --- /dev/null +++ b/README.htmldocs @@ -0,0 +1,11 @@ +To update web docs at http://github.unidata.io/netcdf4-python: + +First install pdoc (https://github.com/pdoc3/pdoc) + +Then in netcdf4-python github clone directory (after building and +installing github master), generate docs by running create_docs.sh. + +Docs are put in docs/index.html. + +Github pages (https://unidata.github.io/netcdf4-python/) points to docs/index.html +in master branch. diff --git a/README.md b/README.md index e3401baf7..e7cea8c4a 100644 --- a/README.md +++ b/README.md @@ -1,13 +1,136 @@ -# netcdf4-python +# [netcdf4-python](http://unidata.github.io/netcdf4-python) [Python](http://python.org)/[numpy](http://numpy.org) interface to the netCDF [C library](https://github.com/Unidata/netcdf-c). -[![Linux Build Status](https://travis-ci.org/Unidata/netcdf4-python.svg?branch=master)](https://travis-ci.org/Unidata/netcdf4-python) -[![Windows Build Status](https://ci.appveyor.com/api/projects/status/fl9taa9je4e6wi7n/branch/master?svg=true)](https://ci.appveyor.com/project/jswhit/netcdf4-python/branch/master) -[![PyPI package](https://badge.fury.io/py/netCDF4.svg)](http://python.org/pypi/netCDF4) +[![CodeQL](https://github.com/Unidata/netcdf4-python/actions/workflows/github-code-scanning/codeql/badge.svg)](https://github.com/Unidata/netcdf4-python/actions/workflows/github-code-scanning/codeql) +[![PyPI package](https://img.shields.io/pypi/v/netCDF4.svg)](http://python.org/pypi/netCDF4) +[![Anaconda-Server Badge](https://anaconda.org/conda-forge/netCDF4/badges/version.svg)](https://anaconda.org/conda-forge/netCDF4) +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2592291.svg)](https://doi.org/10.5281/zenodo.2592290) + ## News For details on the latest updates, see the [Changelog](https://github.com/Unidata/netcdf4-python/blob/master/Changelog). +1/5/2026: Version [1.7.4](https://pypi.python.org/pypi/netCDF4/1.7.4) released. Compression plugins now included in wheels, windows/arm64 and +free-threaded python wheels provided. Automatic conversion of character arrays <--> string arrays works for Unicode (not just ascii) strings. +WARNING: netcdf-c is not thread-safe and netcdf4-python does have internal locking so expect segfaults if you +use netcdf4-python on multiple threads with free-threaded python. Users must exercise care to only call netcdf from +a single thread. + +10/13/2025: Version [1.7.3](https://pypi.python.org/pypi/netCDF4/1.7.3) released. Minor updates/bugfixes and python 3.14 wheels, see Changelog for details. + +10/22/2024: Version [1.7.2](https://pypi.python.org/pypi/netCDF4/1.7.2) released. Minor updates/bugfixes and python 3.13 wheels, see Changelog for details. + +06/17/2024: Version [1.7.1](https://pypi.python.org/pypi/netCDF4/1.7.1) released. Fixes for wheels, no code changes. + +06/13/2024: Version [1.7.0](https://pypi.python.org/pypi/netCDF4/1.7.0) released. Add support for complex numbers via `auto_complex` keyword to `Dataset` ([PR #1295](https://github.com/Unidata/netcdf4-python/pull/1295)) + +10/20/2023: Version [1.6.5](https://pypi.python.org/pypi/netCDF4/1.6.5) released. +Fix for issue #1271 (mask ignored if bool MA assigned to uint8 var), +support for python 3.12 (removal of python 3.7 support), more +informative error messages. + +6/4/2023: Version [1.6.4](https://pypi.python.org/pypi/netCDF4/1.6.4) released. Now requires +[certifi](https://github.com/certifi/python-certifi) to locate SSL certificates - this allows +OpenDAP https URLs to work with linux wheels (issue [#1246](https://github.com/Unidata/netcdf4-python/issues/1246)). + +3/3/2023: Version [1.6.3](https://pypi.python.org/pypi/netCDF4/1.6.3) released. + +11/15/2022: Version [1.6.2](https://pypi.python.org/pypi/netCDF4/1.6.2) released. Fix for +compilation with netcdf-c < 4.9.0 (issue [#1209](https://github.com/Unidata/netcdf4-python/issues/1209)). +Slicing multi-dimensional variables with an all False boolean index array +now returns an empty numpy array (instead of raising an exception - issue [#1197](https://github.com/Unidata/netcdf4-python/issues/1197)). + +09/18/2022: Version [1.6.1](https://pypi.python.org/pypi/netCDF4/1.6.1) released. GIL now +released for all C lib calls, `set_alignment` and `get_alignment` module functions +added to modify/retrieve HDF5 data alignment properties. Added `Dataset` methods to +query availability of optional compression filters. + +06/24/2022: Version [1.6.0](https://pypi.python.org/pypi/netCDF4/1.6.0) released. Support +for quantization (bit-grooming and bit-rounding) functionality in netcdf-c 4.9.0 which can +dramatically improve compression. Dataset.createVariable now accepts dimension instances (instead +of just dimension names). 'compression' kwarg added to Dataset.createVariable to support szip as +well as new compression algorithms available in netcdf-c 4.9.0 through compression plugins (such +as zstd, bzip2 and blosc). Working arm64 wheels for Apple M1 Silicon now available on pypi. + +10/31/2021: Version [1.5.8](https://pypi.python.org/pypi/netCDF4/1.5.8) released. Fix Enum bug, add binary wheels for aarch64 and python 3.10. + +6/22/2021: Version [1.5.7](https://pypi.python.org/pypi/netCDF4/1.5.7) released. +Fixed OverflowError on Windows when reading data with dimension sizes greater than 2**32-1. +Masked arrays no longer returned for vlens. + +2/15/2021: Version [1.5.6](https://pypi.python.org/pypi/netCDF4/1.5.6) released. Added `Dataset.fromcdl` and `Dataset.tocdl`, which require `ncdump` and `ncgen` utilities to be in `$PATH`. Removed python 2.7 support. + +12/20/2020: Version [1.5.5.1](https://pypi.python.org/pypi/netCDF4/1.5.5.1) released. +Updated binary wheels for OSX and linux that link latest netcdf-c and hdf5 libs. + +12/01/2020: Version [1.5.5](https://pypi.python.org/pypi/netCDF4/1.5.5) released. +Update license wording to be consistent with MIT license. + +07/23/2020: Version [1.5.4](https://pypi.python.org/pypi/netCDF4/1.5.4) released. +Now requires numpy >= 1.9. + +10/27/2019: Version [1.5.3](https://pypi.python.org/pypi/netCDF4/1.5.3) released. Fix for +[issue #972](https://github.com/Unidata/netcdf4-python/issues/972), plus binary wheels for +python 3.8. + +09/03/2019: Version [1.5.2](https://pypi.python.org/pypi/netCDF4/1.5.2) released. Bugfixes, no new features. + +05/06/2019: Version [1.5.1.2](https://pypi.python.org/pypi/netCDF4/1.5.1.2) released. Fixes another slicing +regression ([issue #922)](https://github.com/Unidata/netcdf4-python/issues/922)) introduced in the 1.5.1 release. + +05/02/2019: Version [1.5.1.1](https://pypi.python.org/pypi/netCDF4/1.5.1.1) released. Fixes incorrect `__version__` +module variable in 1.5.1 release, plus a slicing bug ([issue #919)](https://github.com/Unidata/netcdf4-python/issues/919)). + +04/30/2019: Version [1.5.1](https://pypi.python.org/pypi/netCDF4/1.5.1) released. Bugfixes, no new features. + +04/02/2019: Version [1.5.0.1](https://pypi.python.org/pypi/netCDF4/1.5.0.1) released. Binary wheels for macos x +and linux rebuilt with netcdf-c 4.6.3 (instead of 4.4.1.1). Added read-shared capability for faster reads +of NETCDF3 files (mode='rs'). + +03/24/2019: Version [1.5.0](https://pypi.python.org/pypi/netCDF4/1.5.0) released. Parallel IO support for classic +file formats added using the pnetcdf library (contribution from Lars Pastewka, [pull request #897](https://github.com/Unidata/netcdf4-python/pull/897)). + +03/08/2019: Version [1.4.3.2](https://pypi.python.org/pypi/netCDF4/1.4.3.2) released. +Include missing membuf.pyx file in source tarball. No need to update if you installed +1.4.3.1 from a binary wheel. + +03/07/2019: Version [1.4.3.1](https://pypi.python.org/pypi/netCDF4/1.4.3.1) released. +Fixes bug in implementation of NETCDF4_CLASSIC parallel IO support in 1.4.3. + +03/05/2019: Version [1.4.3](https://pypi.python.org/pypi/netCDF4/1.4.3) released. Issues with netcdf-c 4.6.2 fixed (including broken parallel IO). `set_ncstring_attrs()` method added, memoryview buffer now returned when an in-memory Dataset is closed. + +10/26/2018: Version [1.4.2](https://pypi.python.org/pypi/netCDF4/1.4.2) released. Minor bugfixes, added `Variable.get_dims()` method and `master_file` kwarg for `MFDataset.__init__`. + +08/10/2018: Version [1.4.1](https://pypi.python.org/pypi/netCDF4/1.4.1) released. The old slicing behavior +(numpy array returned unless missing values are present, otherwise masked array returned) is re-enabled +via `set_always_mask(False)`. + +05/11/2018: Version [1.4.0](https://pypi.python.org/pypi/netCDF4/1.4.0) released. The netcdftime package is no longer +included, it is now a separate [package](https://pypi.python.org/pypi/cftime) dependency. In addition to several +bug fixes, there are a few important changes to the default behaviour to note: + * Slicing a netCDF variable will now always return masked array by default, even if there are no + masked values. The result depended on the slice before, which was too surprising. + If auto-masking is turned off (with `set_auto_mask(False)`) a numpy array will always + be returned. + * `_FillValue` is no longer treated as a valid_min/valid_max. This was too surprising, despite + the fact the thet netcdf docs [attribute best practices](https://www.unidata.ucar.edu/software/netcdf/docs/attribute_conventions.html) suggests that + clients should to this if `valid_min`, `valid_max` and `valid_range` are not set. + * Changed behavior of string attributes so that `nc.stringatt = ['foo','bar']` + produces an vlen string array attribute in NETCDF4, instead of concatenating + into a single string (`foobar`). In NETCDF3/NETCDF4_CLASSIC, an IOError + is now raised, instead of writing `foobar`. + * Retrieved compound-type variable data now returned with character array elements converted to + numpy strings ([issue #773](https://github.com/Unidata/netcdf4-python/issues/773)). + Works for assignment also. Can be disabled using + `set_auto_chartostring(False)`. Numpy structured + array dtypes with `'SN'` string subtypes can now be used to + define netcdf compound types in `createCompoundType` (they get converted to `('S1',N)` + character array types automatically). + * `valid_min`, `valid_max`, `_FillValue` and `missing_value` are now treated as unsigned + integers if `_Unsigned` variable attribute is set (to mimic behaviour of netcdf-java). + Conversion to unsigned type now occurs before masking and scale/offset + operation ([issue #794](https://github.com/Unidata/netcdf4-python/issues/794)) + 11/01/2017: Version [1.3.1](https://pypi.python.org/pypi/netCDF4/1.3.1) released. Parallel IO support with MPI! Requires that netcdf-c and hdf5 be built with MPI support, and [mpi4py](http://mpi4py.readthedocs.io/en/stable). To open a file for parallel access in a program running in an MPI environment @@ -132,16 +255,29 @@ regarding thread-safety in the HDF5 C library. Fixes to `setup.py` now ensure t with `export USE_NCCONFIG=0` will use environment variables to find paths to libraries and include files, instead of relying exclusively on the nc-config utility. -## Quick Start -* Clone GitHub repository (`git clone https://github.com/Unidata/netcdf4-python.git`), or get source tarball from [PyPI](https://pypi.python.org/pypi/netCDF4). Links to Windows and OS X precompiled binary packages are also available on [PyPI](https://pypi.python.org/pypi/netCDF4). +## Installation +The easiest way to install is through pip: + +```shell +pip install netCDF4 +``` + +or, if you are a user of the Conda package manager, + +```shell +conda install -c conda-forge netCDF4 +``` + +## Development installation +* Clone GitHub repository (`git clone https://github.com/Unidata/netcdf4-python.git`) * Make sure [numpy](http://www.numpy.org/) and [Cython](http://cython.org/) are - installed and you have [Python](https://www.python.org) 2.7 or newer. + installed and you have [Python](https://www.python.org) 3.8 or newer. -* Make sure [HDF5](http://www.h5py.org/) and netcdf-4 are installed, and the `nc-config` utility - is in your Unix PATH. +* Make sure [HDF5](http://www.h5py.org/) and netcdf-4 are installed, + and the `nc-config` utility is in your Unix PATH. -* Run `python setup.py build`, then `python setup.py install` (with `sudo` if necessary). +* Run `python setup.py build`, then `pip install -e .`. * To run all the tests, execute `cd test && python run_all.py`. diff --git a/README.release b/README.release index 5f8b8c32f..89cb406e0 100644 --- a/README.release +++ b/README.release @@ -1,8 +1,7 @@ * create a release branch ('vX.Y.Zrel'). In the release branch... * make sure version number in PKG-INFO, setup.py and netCDF4/_netCDF4.pyx are up to date (in _netCDF4.pyx, change 'Version' in first line of docstring at top of file, - and __version__ variable). If netcdftime module has any updates, - increment __version__ in netcdftime/_netcdftime.pyx. + and __version__ variable). * update Changelog and README.md as needed. * commit and push all of the above changes. * install the module (python setup.py install), then run 'sh create_docs.sh' diff --git a/README.wheels.md b/README.wheels.md deleted file mode 100644 index d3a7fdd2e..000000000 --- a/README.wheels.md +++ /dev/null @@ -1,100 +0,0 @@ -# Building and uploading wheels - -## For OSX - -We automate OSX wheel building using a custom github repository that builds on -the travis-ci OSX machines. - -The travis-ci interface for the builds is : -https://travis-ci.org/MacPython/netcdf4-python-wheels - -The driving github repository is : -https://github.com/MacPython/netcdf4-python-wheels - -### How it works - -The wheel-building repository: - -* does a fresh build of the required C / C++ libraries; -* builds a netcdf4-python wheel, linking against these fresh builds; -* processes the wheel using [delocate](https://pypi.python.org/pypi/delocate). - `delocate` copies the required dynamic libraries into the wheel and relinks - the extension modules against the copied libraries; -* uploads the built wheel to http://wheels.scipy.org (a Rackspace container - kindly donated by Rackspace to scikit-learn). - -The resulting wheel is therefore self-contained and does not need any external -dynamic libraries apart from those provided as standard by OSX. - -### Triggering a build - -You will need write permision to the github repository to trigger new builds -on the travis-ci interface. Contact us on the mailing list if you need this. - -You can trigger a build by: - -* making a commit to the `netcdf4-python-wheels` repository (e.g. with `git - commit --allow-empty`); or -* clicking on the circular arrow icon towards the top right of the travis-ci - page, to rerun the previous build. - -In general, it is better to trigger a build with a commit, because this makes -a new set of build products and logs, keeping the old ones for reference. -Keeping the old build logs helps us keep track of previous problems and -successful builds. - -### Which netcdf4-python commit does the repository build? - -By default, the `netcd4-python-wheels` repository is usually set up to build -the latest git tag. To check whether this is so have a look around line 5 of -`.travis.yml` in the `netcdf4-python-wheels` repository. You should see -something like: - -``` -- BUILD_COMMIT='latest-tag' -``` - -If this is commented out, then the repository is set up to build the current -commit in the `netcdf4-python` submodule of the repository. If it is set to -another value then it will be specifying a commit to build. - -You can therefore build any arbitrary commit by specificying the commit hash -or branch name or tag name in this line of the `.travis.yml` file. - -### Uploading the built wheels to pypi - -Be careful, http://wheels.scipy.org points to a container on a distributed -content delivery network. It can take up to 15 minutes for the new wheel file -to get updated into the container at http://wheels.scipy.org. - -When the wheels are updated, you can of course just download them to your -machine manually, and then upload them manually to pypi, or by using -[twine][twine]. You can also use a script for doing this, housed at : -https://github.com/MacPython/terryfy/blob/master/wheel-uploader - -You'll need [twine][twine] and [beautiful soup 4][bs4]. - -You will typically have a directory on your machine where you store wheels, -called a `wheelhouse`. The typical call for `wheel-uploader` would then -be something like: - -``` -wheel-uploader -v -w ~/wheelhouse netCDF4 1.1.8 -``` - -where: - -* `-v` means give verbose messages; -* `-w ~/wheelhouse` means download the wheels from https://wheels.scipy.org to - the directory `~/wheelhouse`; -* `netCDF4` is the root name of the wheel(s) to download / upload; -* `1.1.8` is the version to download / upload. - -So, in this case, `wheel-uploader` will download all wheels starting with -`netCDF4-1.1.8-` from http://wheels.scipy.org to `~/wheelhouse`, then upload -them to pypi. - -Of course, you will need permissions to upload to pypi, for this to work. - -[twine]: https://pypi.python.org/pypi/twine -[bs4]: https://pypi.python.org/pypi/beautifulsoup4 diff --git a/_build/backend.py b/_build/backend.py new file mode 100644 index 000000000..11764a849 --- /dev/null +++ b/_build/backend.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- +""" +In-tree build backend that programmatically adds mpi4py to the list of build dependencies if the +underlying netCDF4 C library has parallel support enabled. +""" + +from setuptools.build_meta import * + +import utils + + +def get_requires_for_build_editable(config_settings=None): + return ["mpi4py>=3.1"] if utils.netcdf4_has_parallel_support() else [] + + +def get_requires_for_build_sdist(config_settings=None): + return ["mpi4py>=3.1"] if utils.netcdf4_has_parallel_support() else [] + + +def get_requires_for_build_wheel(config_settings=None): + return ["mpi4py>=3.1"] if utils.netcdf4_has_parallel_support() else [] diff --git a/_build/utils.py b/_build/utils.py new file mode 100644 index 000000000..ce7938144 --- /dev/null +++ b/_build/utils.py @@ -0,0 +1,127 @@ +# -*- coding: utf-8 -*- +""" +This module contains a streamlined version of some utilities defined in `setup.py`, to be at +disposal of in-tree build backends. +""" + +import configparser +import os +import subprocess + + +PROJECT_ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) +OPEN_KWARGS = {"encoding": "utf-8"} + + +def get_config_flags(command: list[str], flag: str) -> list[str]: + """Pull out specific flags from a config command (pkg-config or nc-config)""" + flags = subprocess.run(command, capture_output=True, text=True) + return [arg[2:] for arg in flags.stdout.split() if arg.startswith(flag)] + + +def is_netcdf4_include_dir(inc_dir: str) -> bool: + try: + f = open(os.path.join(inc_dir, "netcdf.h"), **OPEN_KWARGS) + except OSError: + return False + + for line in f: + if line.startswith("nc_inq_compound"): + return True + return False + + +def get_netcdf4_include_dir(): + netcdf4_dir = os.environ.get("NETCDF4_DIR") + netcdf4_incdir = os.environ.get("NETCDF4_INCDIR") + + if bool(int(os.environ.get("USE_SETUPCFG", 1))) and os.path.exists( + setup_cfg := os.path.join(PROJECT_ROOT_DIR, "setup.cfg") + ): + config = configparser.ConfigParser() + config.read(setup_cfg) + + netcdf4_dir = config.get("directories", "NETCDF4_DIR", fallback=netcdf4_dir) + netcdf4_incdir = config.get( + "directories", "NETCDF4_INCDIR", fallback=netcdf4_incdir + ) + + # make sure USE_NCCONFIG from environment takes precedence over use_ncconfig from setup.cfg + # (issue #341) + if "USE_NCCONFIG" in os.environ: + use_ncconfig = bool(int(os.environ.get("USE_NCCONFIG", 0))) + else: + use_ncconfig = config.getboolean("options", "use_ncconfig", fallback=None) + + ncconfig = config.get("options", "ncconfig", fallback=None) + else: + use_ncconfig = None + ncconfig = None + + try: + if ncconfig is None: + if netcdf4_dir is not None: + ncconfig = os.path.join(netcdf4_dir, "bin", "nc-config") + else: # otherwise, just hope it's in the users PATH + ncconfig = "nc-config" + has_ncconfig = subprocess.call([ncconfig, "--libs"]) == 0 + except OSError: + has_ncconfig = False + + # if nc-config exists, and use_ncconfig not set, try to use it + if use_ncconfig is None and has_ncconfig: + use_ncconfig = has_ncconfig + + dirs_to_search = [] + if os.environ.get("CONDA_PREFIX"): + dirs_to_search.append(os.environ["CONDA_PREFIX"]) # linux,macosx + dirs_to_search.append( + os.path.join(os.environ["CONDA_PREFIX"], "Library") + ) # windows + dirs_to_search += [ + os.path.expanduser("~"), + "/usr/local", + "/sw", + "/opt", + "/opt/local", + "/opt/homebrew", + "/usr", + ] + + if netcdf4_incdir is None and netcdf4_dir is None: + if use_ncconfig and has_ncconfig: + inc_dirs = get_config_flags([ncconfig, "--cflags"], "-I") + else: + inc_dirs = [os.path.join(dir_, "include") for dir_ in dirs_to_search] + + for inc_dir in inc_dirs: + if is_netcdf4_include_dir(inc_dir): + netcdf4_incdir = inc_dir + break + + if netcdf4_incdir is None: + raise ValueError("Did not find netCDF version 4 headers.") + else: + if netcdf4_incdir is None: + netcdf4_incdir = os.path.join(netcdf4_dir, "include") + if not is_netcdf4_include_dir(netcdf4_incdir): + raise ValueError( + f"Did not find netCDF version 4 headers under `{netcdf4_incdir}`." + ) + + return netcdf4_incdir + + +def netcdf4_has_parallel_support() -> bool: + netcdf4_incdir = get_netcdf4_include_dir() + if os.path.exists(ncmetapath := os.path.join(netcdf4_incdir, "netcdf_meta.h")): + with open(ncmetapath) as f: + for line in f: + if line.startswith("#define NC_HAS_PARALLEL"): + try: + return bool(int(line.split()[2])) + except ValueError: + pass + return False + else: + return False diff --git a/appveyor.yml b/appveyor.yml deleted file mode 100644 index 9d5057f58..000000000 --- a/appveyor.yml +++ /dev/null @@ -1,56 +0,0 @@ -environment: - - # SDK v7.0 MSVC Express 2008's SetEnv.cmd script will fail if the - # /E:ON and /V:ON options are not enabled in the batch script interpreter - # See: http://stackoverflow.com/a/13751649/163740 - CMD_IN_ENV: "cmd /E:ON /V:ON /C obvci_appveyor_python_build_env.cmd" - - matrix: - - TARGET_ARCH: x64 - CONDA_NPY: 111 - CONDA_PY: 27 - CONDA_INSTALL_LOCN: C:\\Miniconda-x64 - - - TARGET_ARCH: x64 - CONDA_NPY: 111 - CONDA_PY: 36 - CONDA_INSTALL_LOCN: C:\\Miniconda35-x64 - -# We always use a 64-bit machine, but can build x86 distributions -# with the TARGET_ARCH variable. -platform: - - x64 - -install: - # If there is a newer build queued for the same PR, cancel this one. - # The AppVeyor 'rollout builds' option is supposed to serve the same - # purpose but it is problematic because it tends to cancel builds pushed - # directly to master instead of just PR builds (or the converse). - # credits: JuliaLang developers. - - ps: if ($env:APPVEYOR_PULL_REQUEST_NUMBER -and $env:APPVEYOR_BUILD_NUMBER -ne ((Invoke-RestMethod ` - https://ci.appveyor.com/api/projects/$env:APPVEYOR_ACCOUNT_NAME/$env:APPVEYOR_PROJECT_SLUG/history?recordsNumber=50).builds | ` - Where-Object pullRequestId -eq $env:APPVEYOR_PULL_REQUEST_NUMBER)[0].buildNumber) { ` - throw "There are newer queued builds for this pull request, failing early." } - - # Add path, activate `conda` and update conda. - - cmd: set "PATH=%CONDA_INSTALL_LOCN%\\Scripts;%CONDA_INSTALL_LOCN%\\Library\\bin;%PATH%" - - cmd: set PYTHONUNBUFFERED=1 - - cmd: call %CONDA_INSTALL_LOCN%\Scripts\activate.bat - # for obvci_appveyor_python_build_env.cmd - - cmd: conda update --all --yes - - cmd: conda install anaconda-client=1.6.3 --yes - - cmd: conda install -c conda-forge --yes obvious-ci - # for msinttypes and newer stuff - - cmd: conda config --prepend channels conda-forge - - cmd: conda config --set show_channel_urls yes - - cmd: conda config --set always_yes true - # For building conda packages - - cmd: conda install --yes conda-build jinja2 anaconda-client - # this is now the downloaded conda... - - cmd: conda info -a - -# Skip .NET project specific build phase. -build: off - -test_script: - - "%CMD_IN_ENV% conda build conda.recipe --quiet" diff --git a/checkversion.py b/checkversion.py index 0df6827a1..cb39ee982 100644 --- a/checkversion.py +++ b/checkversion.py @@ -1,5 +1,5 @@ -import netCDF4, sys, numpy -sys.stdout.write('netcdf4-python version: %s\n'%netCDF4.__version__) -sys.stdout.write('HDF5 lib version: %s\n'%netCDF4.__hdf5libversion__) -sys.stdout.write('netcdf lib version: %s\n'%netCDF4.__netcdf4libversion__) -sys.stdout.write('numpy version %s\n' % numpy.__version__) +import netCDF4, numpy +print('netcdf4-python version: %s'%netCDF4.__version__) +print('HDF5 lib version: %s'%netCDF4.__hdf5libversion__) +print('netcdf lib version: %s'%netCDF4.__netcdf4libversion__) +print('numpy version %s' % numpy.__version__) diff --git a/ci/travis/build-parallel-netcdf.sh b/ci/travis/build-parallel-netcdf.sh deleted file mode 100755 index 1c38eec2c..000000000 --- a/ci/travis/build-parallel-netcdf.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -set -e - -echo "Using downloaded netCDF version ${NETCDF_VERSION} with parallel capabilities enabled" -pushd /tmp -wget ftp://ftp.unidata.ucar.edu/pub/netcdf/netcdf-${NETCDF_VERSION}.tar.gz -tar -xzvf netcdf-${NETCDF_VERSION}.tar.gz -pushd netcdf-${NETCDF_VERSION} -./configure --prefix $NETCDF_DIR --enable-netcdf-4 --enable-shared --disable-dap --enable-parallel -make -j 2 -make install -popd diff --git a/conda.recipe/bld.bat b/conda.recipe/bld.bat deleted file mode 100644 index a022b9e0c..000000000 --- a/conda.recipe/bld.bat +++ /dev/null @@ -1,11 +0,0 @@ -set SITECFG=%SRC_DIR%/setup.cfg - -echo [options] > %SITECFG% -echo [directories] >> %SITECFG% -echo HDF5_libdir = %LIBRARY_LIB% >> %SITECFG% -echo HDF5_incdir = %LIBRARY_INC% >> %SITECFG% -echo netCDF4_libdir = %LIBRARY_LIB% >> %SITECFG% -echo netCDF4_incdir = %LIBRARY_INC% >> %SITECFG% - -"%PYTHON%" setup.py install --single-version-externally-managed --record record.txt -if errorlevel 1 exit 1 diff --git a/conda.recipe/build.sh b/conda.recipe/build.sh deleted file mode 100644 index 79fc65e21..000000000 --- a/conda.recipe/build.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -SETUPCFG=$SRC_DIR\setup.cfg - -echo "[options]" > $SETUPCFG -echo "[directories]" >> $SETUPCFG -echo "netCDF4_dir = $PREFIX" >> $SETUPCFG - -${PYTHON} setup.py install --single-version-externally-managed --record record.txt diff --git a/conda.recipe/meta.yaml b/conda.recipe/meta.yaml deleted file mode 100644 index b5d35108d..000000000 --- a/conda.recipe/meta.yaml +++ /dev/null @@ -1,47 +0,0 @@ -{% set version = "dev" %} - -package: - name: netcdf4 - version: {{ version }} - -source: - path: ../ - -build: - number: 0 - entry_points: - - ncinfo = netCDF4.utils:ncinfo - - nc4tonc3 = netCDF4.utils:nc4tonc3 - - nc3tonc4 = netCDF4.utils:nc3tonc4 - -requirements: - build: - - python - - setuptools - - cython - - numpy x.x - - msinttypes # [win and py<35] - - hdf5 - - libnetcdf - run: - - python - - setuptools - - numpy x.x - - hdf5 - - libnetcdf - -test: - source_files: - - test - imports: - - netCDF4 - - netcdftime - commands: - - ncinfo -h - - nc4tonc3 -h - - nc3tonc4 -h - -about: - home: http://github.com/Unidata/netcdf4-python - license: OSI Approved - summary: 'Provides an object-oriented python interface to the netCDF version 4 library..' diff --git a/conda.recipe/run_test.py b/conda.recipe/run_test.py deleted file mode 100644 index a3a4a524c..000000000 --- a/conda.recipe/run_test.py +++ /dev/null @@ -1,8 +0,0 @@ -import os -import netCDF4 - -# Run the unittests, skipping the opendap test. -test_dir = os.path.join('test') -os.chdir(test_dir) -os.environ['NO_NET']='1' -os.system('python run_all.py') diff --git a/create_docs.sh b/create_docs.sh index 45d94e111..5f4148ba6 100644 --- a/create_docs.sh +++ b/create_docs.sh @@ -1,5 +1,3 @@ -# Uses pdoc (https://github.com/BurntSushi/pdoc) -# to create html docs from docstrings in Cython source. -# Use hacked version at https://github.com/jswhit/pdoc -# which extracts cython method docstrings and function signatures. -pdoc --html --html-no-source --overwrite --html-dir 'docs' netCDF4 +# use pdoc (https://pdoc3.github.io/pdoc/) to generate API docs +pdoc3 --html --config show_source_code=False --force -o 'docs' netCDF4 +/bin/cp -f docs/netCDF4/index.html docs/index.html diff --git a/docs/_config.yml b/docs/_config.yml new file mode 100644 index 000000000..fff4ab923 --- /dev/null +++ b/docs/_config.yml @@ -0,0 +1 @@ +theme: jekyll-theme-minimal diff --git a/docs/index.html b/docs/index.html new file mode 100644 index 000000000..a416acf07 --- /dev/null +++ b/docs/index.html @@ -0,0 +1,3369 @@ + + + + + + +netCDF4 API documentation + + + + + + + + + + + +
+
+
+

Package netCDF4

+
+
+

Version 1.7.4

+

Introduction

+

netcdf4-python is a Python interface to the netCDF C library.

+

netCDF version 4 has many features +not found in earlier versions of the library and is implemented on top of +HDF5. This module can read and write +files in both the new netCDF 4 and the old netCDF 3 format, and can create +files that are readable by HDF5 clients. The API modelled after +Scientific.IO.NetCDF, +and should be familiar to users of that module.

+

Most new features of netCDF 4 are implemented, such as multiple +unlimited dimensions, groups and data compression. +All the new +numeric data types (such as 64 bit and unsigned integer types) are +implemented. Compound (struct), variable length (vlen) and +enumerated (enum) data types are supported, but not the opaque data type. +Mixtures of compound, vlen and enum data types (such as +compound types containing enums, or vlens containing compound +types) are not supported.

+

Quick Install

+
    +
  • the easiest way to get going is to install via pip install netCDF4. +(or if you use the conda package manager conda install -c conda-forge netCDF4).
  • +
+

Developer Install

+
    +
  • Clone the github repository. Make +sure you either clone recursively, or run git submodule update --init to +ensure all the submodules are also checked out.
  • +
  • Make sure the dependencies are satisfied (Python 3.8 or later, +numpy, +Cython, +cftime, +setuptools, +the HDF5 C library, +and the netCDF C library). +For MPI parallel IO support, an MPI-enabled versions of the netcdf library +is required, as is mpi4py. +Parallel IO further depends on the existence of MPI-enabled HDF5 or the +PnetCDF library.
  • +
  • By default, the utility nc-config (installed with netcdf-c) +will be run used to determine where all the dependencies live.
  • +
  • If nc-config is not in your default PATH, you can set the NETCDF4_DIR +environment variable and setup.py will look in $NETCDF4_DIR/bin. +You can also use the file setup.cfg to set the path to nc-config, or +enter the paths to the libraries and include files manually. Just +edit the setup.cfg file +in a text editor and follow the instructions in the comments. +To disable the use of nc-config, set the env var USE_NCCONFIG to 0. +To disable the use of setup.cfg, set USE_SETUPCFG to 0. +As a last resort, the library and include paths can be set via environment variables. +If you go this route, set USE_NCCONFIG and USE_SETUPCFG to 0, and specify +NETCDF4_LIBDIR, NETCDF4_INCDIR, HDF5_LIBDIR and HDF5_INCDIR. +If the dependencies are not found +in any of the paths specified by environment variables, then standard locations +(such as /usr and /usr/local) are searched.
  • +
  • if the env var NETCDF_PLUGIN_DIR is set to point to the location of the netcdf-c compression +plugins built by netcdf >= 4.9.0, they will be installed inside the package. +In this +case HDF5_PLUGIN_PATH will be set to the package installation path on import, +so the extra compression algorithms available in netcdf-c >= 4.9.0 will automatically +be available. +Otherwise, the user will have to set HDF5_PLUGIN_PATH explicitly +to have access to the extra compression plugins.
  • +
  • run pip install -v . (as root if necessary)
  • +
  • run the tests in the 'test' directory by running python run_all.py.
  • +
+

Tutorial

+ +

All of the code in this tutorial is available in examples/tutorial.py, except +the parallel IO example, which is in examples/mpi_example.py. +Unit tests are in the test directory.

+

Creating/Opening/Closing a netCDF file

+

To create a netCDF file from python, you simply call the Dataset +constructor. This is also the method used to open an existing netCDF +file. +If the file is open for write access (mode='w', 'r+' or 'a'), you may +write any type of data including new dimensions, groups, variables and +attributes. +netCDF files come in five flavors (NETCDF3_CLASSIC, +NETCDF3_64BIT_OFFSET, NETCDF3_64BIT_DATA, NETCDF4_CLASSIC, and NETCDF4). +NETCDF3_CLASSIC was the original netcdf binary format, and was limited +to file sizes less than 2 Gb. NETCDF3_64BIT_OFFSET was introduced +in version 3.6.0 of the library, and extended the original binary format +to allow for file sizes greater than 2 Gb. +NETCDF3_64BIT_DATA is a new format that requires version 4.4.0 of +the C library - it extends the NETCDF3_64BIT_OFFSET binary format to +allow for unsigned/64 bit integer data types and 64-bit dimension sizes. +NETCDF3_64BIT is an alias for NETCDF3_64BIT_OFFSET. +NETCDF4_CLASSIC files use the version 4 disk format (HDF5), but omits features +not found in the version 3 API. They can be read by netCDF 3 clients +only if they have been relinked against the netCDF 4 library. They can +also be read by HDF5 clients. NETCDF4 files use the version 4 disk +format (HDF5) and use the new features of the version 4 API. +The +netCDF4 module can read and write files in any of these formats. When +creating a new file, the format may be specified using the format +keyword in the Dataset constructor. +The default format is +NETCDF4. To see how a given file is formatted, you can examine the +data_model attribute. +Closing the netCDF file is +accomplished via the Dataset.close() method of the Dataset +instance.

+

Here's an example:

+
>>> from netCDF4 import Dataset
+>>> rootgrp = Dataset("test.nc", "w", format="NETCDF4")
+>>> print(rootgrp.data_model)
+NETCDF4
+>>> rootgrp.close()
+
+

Remote OPeNDAP-hosted datasets can be accessed for +reading over http if a URL is provided to the Dataset constructor instead of a +filename. +However, this requires that the netCDF library be built with +OPenDAP support, via the --enable-dap configure option (added in +version 4.0.1).

+

Groups in a netCDF file

+

netCDF version 4 added support for organizing data in hierarchical +groups, which are analogous to directories in a filesystem. Groups serve +as containers for variables, dimensions and attributes, as well as other +groups. +A Dataset creates a special group, called +the 'root group', which is similar to the root directory in a unix +filesystem. +To create Group instances, use the +Dataset.createGroup() method of a Dataset or Group +instance. Dataset.createGroup() takes a single argument, a +python string containing the name of the new group. The new Group +instances contained within the root group can be accessed by name using +the groups dictionary attribute of the Dataset instance. +Only +NETCDF4 formatted files support Groups, if you try to create a Group +in a netCDF 3 file you will get an error message.

+
>>> rootgrp = Dataset("test.nc", "a")
+>>> fcstgrp = rootgrp.createGroup("forecasts")
+>>> analgrp = rootgrp.createGroup("analyses")
+>>> print(rootgrp.groups)
+{'forecasts': <class 'netCDF4._netCDF4.Group'>
+group /forecasts:
+    dimensions(sizes):
+    variables(dimensions):
+    groups: , 'analyses': <class 'netCDF4._netCDF4.Group'>
+group /analyses:
+    dimensions(sizes):
+    variables(dimensions):
+    groups: }
+>>>
+
+

Groups can exist within groups in a Dataset, just as directories +exist within directories in a unix filesystem. Each Group instance +has a groups attribute dictionary containing all of the group +instances contained within that group. Each Group instance also has a +path attribute that contains a simulated unix directory path to +that group. +To simplify the creation of nested groups, you can +use a unix-like path as an argument to Dataset.createGroup().

+
>>> fcstgrp1 = rootgrp.createGroup("/forecasts/model1")
+>>> fcstgrp2 = rootgrp.createGroup("/forecasts/model2")
+
+

If any of the intermediate elements of the path do not exist, they are created, +just as with the unix command 'mkdir -p'. If you try to create a group +that already exists, no error will be raised, and the existing group will be +returned.

+

Here's an example that shows how to navigate all the groups in a +Dataset. The function walktree is a Python generator that is used +to walk the directory tree. Note that printing the Dataset or Group +object yields summary information about it's contents.

+
>>> def walktree(top):
+...     yield top.groups.values()
+...     for value in top.groups.values():
+...         yield from walktree(value)
+>>> print(rootgrp)
+<class 'netCDF4._netCDF4.Dataset'>
+root group (NETCDF4 data model, file format HDF5):
+    dimensions(sizes):
+    variables(dimensions):
+    groups: forecasts, analyses
+>>> for children in walktree(rootgrp):
+...     for child in children:
+...         print(child)
+<class 'netCDF4._netCDF4.Group'>
+group /forecasts:
+    dimensions(sizes):
+    variables(dimensions):
+    groups: model1, model2
+<class 'netCDF4._netCDF4.Group'>
+group /analyses:
+    dimensions(sizes):
+    variables(dimensions):
+    groups:
+<class 'netCDF4._netCDF4.Group'>
+group /forecasts/model1:
+    dimensions(sizes):
+    variables(dimensions):
+    groups:
+<class 'netCDF4._netCDF4.Group'>
+group /forecasts/model2:
+    dimensions(sizes):
+    variables(dimensions):
+    groups:
+
+

Dimensions in a netCDF file

+

netCDF defines the sizes of all variables in terms of dimensions, so +before any variables can be created the dimensions they use must be +created first. A special case, not often used in practice, is that of a +scalar variable, which has no dimensions. A dimension is created using +the Dataset.createDimension() method of a Dataset +or Group instance. A Python string is used to set the name of the +dimension, and an integer value is used to set the size. To create an +unlimited dimension (a dimension that can be appended to), the size +value is set to None or 0. In this example, there both the time and +level dimensions are unlimited. +Having more than one unlimited +dimension is a new netCDF 4 feature, in netCDF 3 files there may be only +one, and it must be the first (leftmost) dimension of the variable.

+
>>> level = rootgrp.createDimension("level", None)
+>>> time = rootgrp.createDimension("time", None)
+>>> lat = rootgrp.createDimension("lat", 73)
+>>> lon = rootgrp.createDimension("lon", 144)
+
+

All of the Dimension instances are stored in a python dictionary.

+
>>> print(rootgrp.dimensions)
+{'level': <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'level', size = 0, 'time': <class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 0, 'lat': <class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 73, 'lon': <class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 144}
+
+

Using the python len function with a Dimension instance returns +current size of that dimension. +Dimension.isunlimited() method of a Dimension instance +be used to determine if the dimensions is unlimited, or appendable.

+
>>> print(len(lon))
+144
+>>> print(lon.isunlimited())
+False
+>>> print(time.isunlimited())
+True
+
+

Printing the Dimension object +provides useful summary info, including the name and length of the dimension, +and whether it is unlimited.

+
>>> for dimobj in rootgrp.dimensions.values():
+...     print(dimobj)
+<class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'level', size = 0
+<class 'netCDF4._netCDF4.Dimension'> (unlimited): name = 'time', size = 0
+<class 'netCDF4._netCDF4.Dimension'>: name = 'lat', size = 73
+<class 'netCDF4._netCDF4.Dimension'>: name = 'lon', size = 144
+
+

Dimension names can be changed using the +Dataset.renameDimension() method of a Dataset or +Group instance.

+

Variables in a netCDF file

+

netCDF variables behave much like python multidimensional array objects +supplied by the numpy module. However, +unlike numpy arrays, netCDF4 variables can be appended to along one or +more 'unlimited' dimensions. To create a netCDF variable, use the +Dataset.createVariable() method of a Dataset or +Group instance. The Dataset.createVariable() method +has two mandatory arguments, the variable name (a Python string), and +the variable datatype. The variable's dimensions are given by a tuple +containing the dimension names (defined previously with +Dataset.createDimension()). To create a scalar +variable, simply leave out the dimensions keyword. The variable +primitive datatypes correspond to the dtype attribute of a numpy array. +You can specify the datatype as a numpy dtype object, or anything that +can be converted to a numpy dtype object. Valid datatype specifiers +include:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SpecifierDatatypeOld typecodes
'f4'32-bit floating point'f'
'f8'64-bit floating point'd'
'i4'32-bit signed integer'i' 'l'
'i2'16-bit signed integer'h' 's'
'i8'64-bit signed integer
'i1'8-bit signed integer'b' 'B'
'u1'8-bit unsigned integer
'u2'16-bit unsigned integer
'u4'32-bit unsigned integer
'u8'64-bit unsigned integer
'S1'single-character string'c'
+

The unsigned integer types and the 64-bit integer type +can only be used if the file format is NETCDF4.

+

The dimensions themselves are usually also defined as variables, called +coordinate variables. The Dataset.createVariable() +method returns an instance of the Variable class whose methods can be +used later to access and set variable data and attributes.

+
>>> times = rootgrp.createVariable("time","f8",("time",))
+>>> levels = rootgrp.createVariable("level","i4",("level",))
+>>> latitudes = rootgrp.createVariable("lat","f4",("lat",))
+>>> longitudes = rootgrp.createVariable("lon","f4",("lon",))
+>>> # two dimensions unlimited
+>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",))
+>>> temp.units = "K"
+
+

To get summary info on a Variable instance in an interactive session, +just print it.

+
>>> print(temp)
+<class 'netCDF4._netCDF4.Variable'>
+float32 temp(time, level, lat, lon)
+    units: K
+unlimited dimensions: time, level
+current shape = (0, 0, 73, 144)
+filling on, default _FillValue of 9.969209968386869e+36 used
+
+

You can use a path to create a Variable inside a hierarchy of groups.

+
>>> ftemp = rootgrp.createVariable("/forecasts/model1/temp","f4",("time","level","lat","lon",))
+
+

If the intermediate groups do not yet exist, they will be created.

+

You can also query a Dataset or Group instance directly to obtain Group or +Variable instances using paths.

+
>>> print(rootgrp["/forecasts/model1"])  # a Group instance
+<class 'netCDF4._netCDF4.Group'>
+group /forecasts/model1:
+    dimensions(sizes):
+    variables(dimensions): float32 temp(time,level,lat,lon)
+    groups:
+>>> print(rootgrp["/forecasts/model1/temp"])  # a Variable instance
+<class 'netCDF4._netCDF4.Variable'>
+float32 temp(time, level, lat, lon)
+path = /forecasts/model1
+unlimited dimensions: time, level
+current shape = (0, 0, 73, 144)
+filling on, default _FillValue of 9.969209968386869e+36 used
+
+

All of the variables in the Dataset or Group are stored in a +Python dictionary, in the same way as the dimensions:

+
>>> print(rootgrp.variables)
+{'time': <class 'netCDF4._netCDF4.Variable'>
+float64 time(time)
+unlimited dimensions: time
+current shape = (0,)
+filling on, default _FillValue of 9.969209968386869e+36 used, 'level': <class 'netCDF4._netCDF4.Variable'>
+int32 level(level)
+unlimited dimensions: level
+current shape = (0,)
+filling on, default _FillValue of -2147483647 used, 'lat': <class 'netCDF4._netCDF4.Variable'>
+float32 lat(lat)
+unlimited dimensions:
+current shape = (73,)
+filling on, default _FillValue of 9.969209968386869e+36 used, 'lon': <class 'netCDF4._netCDF4.Variable'>
+float32 lon(lon)
+unlimited dimensions:
+current shape = (144,)
+filling on, default _FillValue of 9.969209968386869e+36 used, 'temp': <class 'netCDF4._netCDF4.Variable'>
+float32 temp(time, level, lat, lon)
+    units: K
+unlimited dimensions: time, level
+current shape = (0, 0, 73, 144)
+filling on, default _FillValue of 9.969209968386869e+36 used}
+
+

Variable names can be changed using the +Dataset.renameVariable() method of a Dataset +instance.

+

Variables can be sliced similar to numpy arrays, but there are some differences. +See +Writing data to and retrieving data from a netCDF variable for more details.

+

Attributes in a netCDF file

+

There are two types of attributes in a netCDF file, global and variable. +Global attributes provide information about a group, or the entire +dataset, as a whole. Variable attributes provide information about +one of the variables in a group. Global attributes are set by assigning +values to Dataset or Group instance variables. Variable +attributes are set by assigning values to Variable instances +variables. Attributes can be strings, numbers or sequences. Returning to +our example,

+
>>> import time
+>>> rootgrp.description = "bogus example script"
+>>> rootgrp.history = "Created " + time.ctime(time.time())
+>>> rootgrp.source = "netCDF4 python module tutorial"
+>>> latitudes.units = "degrees north"
+>>> longitudes.units = "degrees east"
+>>> levels.units = "hPa"
+>>> temp.units = "K"
+>>> times.units = "hours since 0001-01-01 00:00:00.0"
+>>> times.calendar = "gregorian"
+
+

The Dataset.ncattrs() method of a Dataset, Group or +Variable instance can be used to retrieve the names of all the netCDF +attributes. This method is provided as a convenience, since using the +built-in dir Python function will return a bunch of private methods +and attributes that cannot (or should not) be modified by the user.

+
>>> for name in rootgrp.ncattrs():
+...     print("Global attr {} = {}".format(name, getattr(rootgrp, name)))
+Global attr description = bogus example script
+Global attr history = Created Mon Jul  8 14:19:41 2019
+Global attr source = netCDF4 python module tutorial
+
+

The __dict__ attribute of a Dataset, Group or Variable +instance provides all the netCDF attribute name/value pairs in a python +dictionary:

+
>>> print(rootgrp.__dict__)
+{'description': 'bogus example script', 'history': 'Created Mon Jul  8 14:19:41 2019', 'source': 'netCDF4 python module tutorial'}
+
+

Attributes can be deleted from a netCDF Dataset, Group or +Variable using the python del statement (i.e. del grp.foo +removes the attribute foo the the group grp).

+

Writing data to and retrieving data from a netCDF variable

+

Now that you have a netCDF Variable instance, how do you put data +into it? You can just treat it like an array and assign data to a slice.

+
>>> import numpy as np
+>>> lats =  np.arange(-90,91,2.5)
+>>> lons =  np.arange(-180,180,2.5)
+>>> latitudes[:] = lats
+>>> longitudes[:] = lons
+>>> print("latitudes =\n{}".format(latitudes[:]))
+latitudes =
+[-90.  -87.5 -85.  -82.5 -80.  -77.5 -75.  -72.5 -70.  -67.5 -65.  -62.5
+ -60.  -57.5 -55.  -52.5 -50.  -47.5 -45.  -42.5 -40.  -37.5 -35.  -32.5
+ -30.  -27.5 -25.  -22.5 -20.  -17.5 -15.  -12.5 -10.   -7.5  -5.   -2.5
+   0.    2.5   5.    7.5  10.   12.5  15.   17.5  20.   22.5  25.   27.5
+  30.   32.5  35.   37.5  40.   42.5  45.   47.5  50.   52.5  55.   57.5
+  60.   62.5  65.   67.5  70.   72.5  75.   77.5  80.   82.5  85.   87.5
+  90. ]
+
+

Unlike NumPy's array objects, netCDF Variable +objects with unlimited dimensions will grow along those dimensions if you +assign data outside the currently defined range of indices.

+
>>> # append along two unlimited dimensions by assigning to slice.
+>>> nlats = len(rootgrp.dimensions["lat"])
+>>> nlons = len(rootgrp.dimensions["lon"])
+>>> print("temp shape before adding data = {}".format(temp.shape))
+temp shape before adding data = (0, 0, 73, 144)
+>>>
+>>> from numpy.random import uniform
+>>> temp[0:5, 0:10, :, :] = uniform(size=(5, 10, nlats, nlons))
+>>> print("temp shape after adding data = {}".format(temp.shape))
+temp shape after adding data = (5, 10, 73, 144)
+>>>
+>>> # levels have grown, but no values yet assigned.
+>>> print("levels shape after adding pressure data = {}".format(levels.shape))
+levels shape after adding pressure data = (10,)
+
+

Note that the size of the levels variable grows when data is appended +along the level dimension of the variable temp, even though no +data has yet been assigned to levels.

+
>>> # now, assign data to levels dimension variable.
+>>> levels[:] =  [1000.,850.,700.,500.,300.,250.,200.,150.,100.,50.]
+
+

However, that there are some differences between NumPy and netCDF +variable slicing rules. Slices behave as usual, being specified as a +start:stop:step triplet. Using a scalar integer index i takes the ith +element and reduces the rank of the output array by one. Boolean array and +integer sequence indexing behaves differently for netCDF variables +than for numpy arrays. +Only 1-d boolean arrays and integer sequences are +allowed, and these indices work independently along each dimension (similar +to the way vector subscripts work in fortran). +This means that

+
>>> temp[0, 0, [0,1,2,3], [0,1,2,3]].shape
+(4, 4)
+
+

returns an array of shape (4,4) when slicing a netCDF variable, but for a +numpy array it returns an array of shape (4,). +Similarly, a netCDF variable of shape (2,3,4,5) indexed +with [0, array([True, False, True]), array([False, True, True, True]), :] +would return a (2, 3, 5) array. In NumPy, this would raise an error since +it would be equivalent to [0, [0,1], [1,2,3], :]. When slicing with integer +sequences, the indices need not be sorted and may contain +duplicates (both of these are new features in version 1.2.1). +While this behaviour may cause some confusion for those used to NumPy's 'fancy indexing' rules, +it provides a very powerful way to extract data from multidimensional netCDF +variables by using logical operations on the dimension arrays to create slices.

+

For example,

+
>>> tempdat = temp[::2, [1,3,6], lats>0, lons>0]
+
+

will extract time indices 0,2 and 4, pressure levels +850, 500 and 200 hPa, all Northern Hemisphere latitudes and Eastern +Hemisphere longitudes, resulting in a numpy array of shape +(3, 3, 36, 71).

+
>>> print("shape of fancy temp slice = {}".format(tempdat.shape))
+shape of fancy temp slice = (3, 3, 36, 71)
+
+

Special note for scalar variables: To extract data from a scalar variable +v with no associated dimensions, use numpy.asarray(v) or v[…]. +The result will be a numpy scalar array.

+

By default, netcdf4-python returns numpy masked arrays with values equal to the +missing_value or _FillValue variable attributes masked for primitive and +enum data types. +The Dataset.set_auto_mask() Dataset and Variable methods +can be used to disable this feature so that +numpy arrays are always returned, with the missing values included. Prior to +version 1.4.0 the default behavior was to only return masked arrays when the +requested slice contained missing values. +This behavior can be recovered +using the Dataset.set_always_mask() method. If a masked array is +written to a netCDF variable, the masked elements are filled with the +value specified by the missing_value attribute. +If the variable has +no missing_value, the _FillValue is used instead.

+

Dealing with time coordinates

+

Time coordinate values pose a special challenge to netCDF users. +Most +metadata standards (such as CF) specify that time should be +measure relative to a fixed date using a certain calendar, with units +specified like hours since YY-MM-DD hh:mm:ss. +These units can be +awkward to deal with, without a utility to convert the values to and +from calendar dates. +The functions num2date +and date2num are +provided by cftime to do just that. +Here's an example of how they can be used:

+
>>> # fill in times.
+>>> from datetime import datetime, timedelta
+>>> from cftime import num2date, date2num
+>>> dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(temp.shape[0])]
+>>> times[:] = date2num(dates,units=times.units,calendar=times.calendar)
+>>> print("time values (in units {}):\n{}".format(times.units, times[:]))
+time values (in units hours since 0001-01-01 00:00:00.0):
+[17533104. 17533116. 17533128. 17533140. 17533152.]
+>>> dates = num2date(times[:],units=times.units,calendar=times.calendar)
+>>> print("dates corresponding to time values:\n{}".format(dates))
+ [cftime.DatetimeGregorian(2001, 3, 1, 0, 0, 0, 0, has_year_zero=False)
+  cftime.DatetimeGregorian(2001, 3, 1, 12, 0, 0, 0, has_year_zero=False)
+  cftime.DatetimeGregorian(2001, 3, 2, 0, 0, 0, 0, has_year_zero=False)
+  cftime.DatetimeGregorian(2001, 3, 2, 12, 0, 0, 0, has_year_zero=False)
+  cftime.DatetimeGregorian(2001, 3, 3, 0, 0, 0, 0, has_year_zero=False)]
+
+

num2date() converts numeric values of time in the specified units +and calendar to datetime objects, and date2num() does the reverse. +All the calendars currently defined in the +CF metadata convention are supported. +A function called date2index() is also provided which returns the indices +of a netCDF time variable corresponding to a sequence of datetime instances.

+

Reading data from a multi-file netCDF dataset

+

If you want to read data from a variable that spans multiple netCDF files, +you can use the MFDataset class to read the data as if it were +contained in a single file. Instead of using a single filename to create +a Dataset instance, create a MFDataset instance with either a list +of filenames, or a string with a wildcard (which is then converted to +a sorted list of files using the python glob module). +Variables in the list of files that share the same unlimited +dimension are aggregated together, and can be sliced across multiple +files. +To illustrate this, let's first create a bunch of netCDF files with +the same variable (with the same unlimited dimension). +The files +must in be in NETCDF3_64BIT_OFFSET, NETCDF3_64BIT_DATA, NETCDF3_CLASSIC or +NETCDF4_CLASSIC format (NETCDF4 formatted multi-file +datasets are not supported).

+
>>> for nf in range(10):
+...     with Dataset("mftest%s.nc" % nf, "w", format="NETCDF4_CLASSIC") as f:
+...         _ = f.createDimension("x",None)
+...         x = f.createVariable("x","i",("x",))
+...         x[0:10] = np.arange(nf*10,10*(nf+1))
+
+

Now read all the files back in at once with MFDataset

+
>>> from netCDF4 import MFDataset
+>>> f = MFDataset("mftest*nc")
+>>> print(f.variables["x"][:])
+[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
+ 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
+ 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
+ 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
+ 96 97 98 99]
+
+

Note that MFDataset can only be used to read, not write, multi-file +datasets.

+

Efficient compression of netCDF variables

+

Data stored in netCDF Variable objects can be compressed and +decompressed on the fly. The compression algorithm used is determined +by the compression keyword argument to the Dataset.createVariable() method. +zlib compression is always available, szip is available if the linked HDF5 +library supports it, and zstd, bzip2, blosc_lz,blosc_lz4,blosc_lz4hc, +blosc_zlib and blosc_zstd are available via optional external plugins. +The complevel keyword regulates the +speed and efficiency of the compression for zlib, bzip and zstd (1 being fastest, but lowest +compression ratio, 9 being slowest but best compression ratio). The +default value of complevel is 4. Setting shuffle=False will turn +off the HDF5 shuffle filter, which de-interlaces a block of data before +zlib compression by reordering the bytes. +The shuffle filter can +significantly improve compression ratios, and is on by default if compression=zlib. +Setting +fletcher32 keyword argument to +Dataset.createVariable() to True (it's False by +default) enables the Fletcher32 checksum algorithm for error detection. +It's also possible to set the HDF5 chunking parameters and endian-ness +of the binary data stored in the HDF5 file with the chunksizes +and endian keyword arguments to +Dataset.createVariable(). +These keyword arguments only +are relevant for NETCDF4 and NETCDF4_CLASSIC files (where the +underlying file format is HDF5) and are silently ignored if the file +format is NETCDF3_CLASSIC, NETCDF3_64BIT_OFFSET or NETCDF3_64BIT_DATA. +If the HDF5 library is built with szip support, compression=szip can also +be used (in conjunction with the szip_coding and szip_pixels_per_block keyword +arguments).

+

If your data only has a certain number of digits of precision (say for +example, it is temperature data that was measured with a precision of +0.1 degrees), you can dramatically improve compression by +quantizing (or truncating) the data. There are two methods supplied for +doing this. +You can use the least_significant_digit +keyword argument to Dataset.createVariable() to specify +the power of ten of the smallest decimal place in +the data that is a reliable value. For example if the data has a +precision of 0.1, then setting least_significant_digit=1 will cause +data the data to be quantized using numpy.around(scale*data)/scale, where +scale = 2**bits, and bits is determined so that a precision of 0.1 is +retained (in this case bits=4). +This is done at the python level and is +not a part of the underlying C library. +Starting with netcdf-c version 4.9.0, +a quantization capability is provided in the library. +This can be +used via the significant_digits Dataset.createVariable() kwarg (new in +version 1.6.0). +The interpretation of significant_digits is different than least_signficant_digit +in that it specifies the absolute number of significant digits independent +of the magnitude of the variable (the floating point exponent). +Either of these approaches makes the compression +'lossy' instead of 'lossless', that is some precision in the data is +sacrificed for the sake of disk space.

+

In our example, try replacing the line

+
>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",))
+
+

with

+
>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",),compression='zlib')
+
+

and then

+
>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",),compression='zlib',least_significant_digit=3)
+
+

or with netcdf-c >= 4.9.0

+
>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",),compression='zlib',significant_digits=4)
+
+

and see how much smaller the resulting files are.

+

Beyond homogeneous arrays of a fixed type - compound data types

+

Compound data types map directly to numpy structured (a.k.a 'record') +arrays. +Structured arrays are akin to C structs, or derived types +in Fortran. They allow for the construction of table-like structures +composed of combinations of other data types, including other +compound types. Compound types might be useful for representing multiple +parameter values at each point on a grid, or at each time and space +location for scattered (point) data. You can then access all the +information for a point by reading one variable, instead of reading +different parameters from different variables. +Compound data types +are created from the corresponding numpy data type using the +Dataset.createCompoundType() method of a Dataset or Group instance. +Since there is no native complex data type in netcdf (but see +Support for complex numbers), compound +types are handy for storing numpy complex arrays. Here's an example:

+
>>> f = Dataset("complex.nc","w")
+>>> size = 3 # length of 1-d complex array
+>>> # create sample complex data.
+>>> datac = np.exp(1j*(1.+np.linspace(0, np.pi, size)))
+>>> # create complex128 compound data type.
+>>> complex128 = np.dtype([("real",np.float64),("imag",np.float64)])
+>>> complex128_t = f.createCompoundType(complex128,"complex128")
+>>> # create a variable with this data type, write some data to it.
+>>> x_dim = f.createDimension("x_dim",None)
+>>> v = f.createVariable("cmplx_var",complex128_t,"x_dim")
+>>> data = np.empty(size,complex128) # numpy structured array
+>>> data["real"] = datac.real; data["imag"] = datac.imag
+>>> v[:] = data # write numpy structured array to netcdf compound var
+>>> # close and reopen the file, check the contents.
+>>> f.close(); f = Dataset("complex.nc")
+>>> v = f.variables["cmplx_var"]
+>>> datain = v[:] # read in all the data into a numpy structured array
+>>> # create an empty numpy complex array
+>>> datac2 = np.empty(datain.shape,np.complex128)
+>>> # .. fill it with contents of structured array.
+>>> datac2.real = datain["real"]; datac2.imag = datain["imag"]
+>>> print('{}: {}'.format(datac.dtype, datac)) # original data
+complex128: [ 0.54030231+0.84147098j -0.84147098+0.54030231j -0.54030231-0.84147098j]
+>>>
+>>> print('{}: {}'.format(datac2.dtype, datac2)) # data from file
+complex128: [ 0.54030231+0.84147098j -0.84147098+0.54030231j -0.54030231-0.84147098j]
+
+

Compound types can be nested, but you must create the 'inner' +ones first. All possible numpy structured arrays cannot be +represented as Compound variables - an error message will be +raise if you try to create one that is not supported. +All of the compound types defined for a Dataset or Group are stored +in a Python dictionary, just like variables and dimensions. As always, printing +objects gives useful summary information in an interactive session:

+
>>> print(f)
+<class 'netCDF4._netCDF4.Dataset'>
+root group (NETCDF4 data model, file format HDF5):
+    dimensions(sizes): x_dim(3)
+    variables(dimensions): {'names':['real','imag'], 'formats':['<f8','<f8'], 'offsets':[0,8], 'itemsize':16, 'aligned':True} cmplx_var(x_dim)
+    groups:
+>>> print(f.variables["cmplx_var"])
+<class 'netCDF4._netCDF4.Variable'>
+compound cmplx_var(x_dim)
+compound data type: {'names':['real','imag'], 'formats':['<f8','<f8'], 'offsets':[0,8], 'itemsize':16, 'aligned':True}
+unlimited dimensions: x_dim
+current shape = (3,)
+>>> print(f.cmptypes)
+{'complex128': <class 'netCDF4._netCDF4.CompoundType'>: name = 'complex128', numpy dtype = {'names':['real','imag'], 'formats':['<f8','<f8'], 'offsets':[0,8], 'itemsize':16, 'aligned':True}}
+>>> print(f.cmptypes["complex128"])
+<class 'netCDF4._netCDF4.CompoundType'>: name = 'complex128', numpy dtype = {'names':['real','imag'], 'formats':['<f8','<f8'], 'offsets':[0,8], 'itemsize':16, 'aligned':True}
+
+

Variable-length (vlen) data types

+

NetCDF 4 has support for variable-length or "ragged" arrays. +These are arrays +of variable length sequences having the same type. To create a variable-length +data type, use the Dataset.createVLType() method +method of a Dataset or Group instance.

+
>>> f = Dataset("tst_vlen.nc","w")
+>>> vlen_t = f.createVLType(np.int32, "phony_vlen")
+
+

The numpy datatype of the variable-length sequences and the name of the +new datatype must be specified. Any of the primitive datatypes can be +used (signed and unsigned integers, 32 and 64 bit floats, and characters), +but compound data types cannot. +A new variable can then be created using this datatype.

+
>>> x = f.createDimension("x",3)
+>>> y = f.createDimension("y",4)
+>>> vlvar = f.createVariable("phony_vlen_var", vlen_t, ("y","x"))
+
+

Since there is no native vlen datatype in numpy, vlen arrays are represented +in python as object arrays (arrays of dtype object). These are arrays whose +elements are Python object pointers, and can contain any type of python object. +For this application, they must contain 1-D numpy arrays all of the same type +but of varying length. +In this case, they contain 1-D numpy int32 arrays of random length between +1 and 10.

+
>>> import random
+>>> random.seed(54321)
+>>> data = np.empty(len(y)*len(x),object)
+>>> for n in range(len(y)*len(x)):
+...     data[n] = np.arange(random.randint(1,10),dtype="int32")+1
+>>> data = np.reshape(data,(len(y),len(x)))
+>>> vlvar[:] = data
+>>> print("vlen variable =\n{}".format(vlvar[:]))
+vlen variable =
+[[array([1, 2, 3, 4, 5, 6, 7, 8], dtype=int32) array([1, 2], dtype=int32)
+  array([1, 2, 3, 4], dtype=int32)]
+ [array([1, 2, 3], dtype=int32)
+  array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)
+  array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)]
+ [array([1, 2, 3, 4, 5, 6, 7], dtype=int32) array([1, 2, 3], dtype=int32)
+  array([1, 2, 3, 4, 5, 6], dtype=int32)]
+ [array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)
+  array([1, 2, 3, 4, 5], dtype=int32) array([1, 2], dtype=int32)]]
+>>> print(f)
+<class 'netCDF4._netCDF4.Dataset'>
+root group (NETCDF4 data model, file format HDF5):
+    dimensions(sizes): x(3), y(4)
+    variables(dimensions): int32 phony_vlen_var(y,x)
+    groups:
+>>> print(f.variables["phony_vlen_var"])
+<class 'netCDF4._netCDF4.Variable'>
+vlen phony_vlen_var(y, x)
+vlen data type: int32
+unlimited dimensions:
+current shape = (4, 3)
+>>> print(f.vltypes["phony_vlen"])
+<class 'netCDF4._netCDF4.VLType'>: name = 'phony_vlen', numpy dtype = int32
+
+

Numpy object arrays containing python strings can also be written as vlen +variables, +For vlen strings, you don't need to create a vlen data type. +Instead, simply use the python str builtin (or a numpy string datatype +with fixed length greater than 1) when calling the +Dataset.createVariable() method.

+
>>> z = f.createDimension("z",10)
+>>> strvar = f.createVariable("strvar", str, "z")
+
+

In this example, an object array is filled with random python strings with +random lengths between 2 and 12 characters, and the data in the object +array is assigned to the vlen string variable.

+
>>> chars = "1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+>>> data = np.empty(10,"O")
+>>> for n in range(10):
+...     stringlen = random.randint(2,12)
+...     data[n] = "".join([random.choice(chars) for i in range(stringlen)])
+>>> strvar[:] = data
+>>> print("variable-length string variable:\n{}".format(strvar[:]))
+variable-length string variable:
+['Lh' '25F8wBbMI' '53rmM' 'vvjnb3t63ao' 'qjRBQk6w' 'aJh' 'QF'
+ 'jtIJbJACaQk4' '3Z5' 'bftIIq']
+>>> print(f)
+<class 'netCDF4._netCDF4.Dataset'>
+root group (NETCDF4 data model, file format HDF5):
+    dimensions(sizes): x(3), y(4), z(10)
+    variables(dimensions): int32 phony_vlen_var(y,x), <class 'str'> strvar(z)
+    groups:
+>>> print(f.variables["strvar"])
+<class 'netCDF4._netCDF4.Variable'>
+vlen strvar(z)
+vlen data type: <class 'str'>
+unlimited dimensions:
+current shape = (10,)
+
+

It is also possible to set contents of vlen string variables with numpy arrays +of any string or unicode data type. Note, however, that accessing the contents +of such variables will always return numpy arrays with dtype object.

+

Enum data type

+

netCDF4 has an enumerated data type, which is an integer datatype that is +restricted to certain named values. Since Enums don't map directly to +a numpy data type, they are read and written as integer arrays.

+

Here's an example of using an Enum type to hold cloud type data. +The base integer data type and a python dictionary describing the allowed +values and their names are used to define an Enum data type using +Dataset.createEnumType().

+
>>> nc = Dataset('clouds.nc','w')
+>>> # python dict with allowed values and their names.
+>>> enum_dict = {'Altocumulus': 7, 'Missing': 255,
+... 'Stratus': 2, 'Clear': 0,
+... 'Nimbostratus': 6, 'Cumulus': 4, 'Altostratus': 5,
+... 'Cumulonimbus': 1, 'Stratocumulus': 3}
+>>> # create the Enum type called 'cloud_t'.
+>>> cloud_type = nc.createEnumType(np.uint8,'cloud_t',enum_dict)
+>>> print(cloud_type)
+<class 'netCDF4._netCDF4.EnumType'>: name = 'cloud_t', numpy dtype = uint8, fields/values ={'Altocumulus': 7, 'Missing': 255, 'Stratus': 2, 'Clear': 0, 'Nimbostratus': 6, 'Cumulus': 4, 'Altostratus': 5, 'Cumulonimbus': 1, 'Stratocumulus': 3}
+
+

A new variable can be created in the usual way using this data type. +Integer data is written to the variable that represents the named +cloud types in enum_dict. A ValueError will be raised if an attempt +is made to write an integer value not associated with one of the +specified names.

+
>>> time = nc.createDimension('time',None)
+>>> # create a 1d variable of type 'cloud_type'.
+>>> # The fill_value is set to the 'Missing' named value.
+>>> cloud_var = nc.createVariable('primary_cloud',cloud_type,'time',
+...                               fill_value=enum_dict['Missing'])
+>>> # write some data to the variable.
+>>> cloud_var[:] = [enum_dict[k] for k in ['Clear', 'Stratus', 'Cumulus',
+...                                        'Missing', 'Cumulonimbus']]
+>>> nc.close()
+>>> # reopen the file, read the data.
+>>> nc = Dataset('clouds.nc')
+>>> cloud_var = nc.variables['primary_cloud']
+>>> print(cloud_var)
+<class 'netCDF4._netCDF4.Variable'>
+enum primary_cloud(time)
+    _FillValue: 255
+enum data type: uint8
+unlimited dimensions: time
+current shape = (5,)
+>>> print(cloud_var.datatype.enum_dict)
+{'Altocumulus': 7, 'Missing': 255, 'Stratus': 2, 'Clear': 0, 'Nimbostratus': 6, 'Cumulus': 4, 'Altostratus': 5, 'Cumulonimbus': 1, 'Stratocumulus': 3}
+>>> print(cloud_var[:])
+[0 2 4 -- 1]
+>>> nc.close()
+
+

Parallel IO

+

If MPI parallel enabled versions of netcdf and hdf5 or pnetcdf are detected, +and mpi4py is installed, netcdf4-python will +be built with parallel IO capabilities enabled. Parallel IO of NETCDF4 or +NETCDF4_CLASSIC formatted files is only available if the MPI parallel HDF5 +library is available. Parallel IO of classic netcdf-3 file formats is only +available if the PnetCDF library is +available. To use parallel IO, your program must be running in an MPI +environment using mpi4py.

+
>>> from mpi4py import MPI
+>>> import numpy as np
+>>> from netCDF4 import Dataset
+>>> rank = MPI.COMM_WORLD.rank  # The process ID (integer 0-3 for 4-process run)
+
+

To run an MPI-based parallel program like this, you must use mpiexec to launch several +parallel instances of Python (for example, using mpiexec -np 4 python mpi_example.py). +The parallel features of netcdf4-python are mostly transparent - +when a new dataset is created or an existing dataset is opened, +use the parallel keyword to enable parallel access.

+
>>> nc = Dataset('parallel_test.nc','w',parallel=True)
+
+

The optional comm keyword may be used to specify a particular +MPI communicator (MPI_COMM_WORLD is used by default). +Each process (or rank) +can now write to the file independently. +In this example the process rank is +written to a different variable index on each task

+
>>> d = nc.createDimension('dim',4)
+>>> v = nc.createVariable('var', np.int64, 'dim')
+>>> v[rank] = rank
+>>> nc.close()
+
+% ncdump parallel_test.nc
+netcdf parallel_test {
+dimensions:
+    dim = 4 ;
+variables:
+    int64 var(dim) ;
+data:
+
+    var = 0, 1, 2, 3 ;
+}
+
+

There are two types of parallel IO, independent (the default) and collective. +Independent IO means that each process can do IO independently. It should not +depend on or be affected by other processes. Collective IO is a way of doing +IO defined in the MPI-IO standard; unlike independent IO, all processes must +participate in doing IO. To toggle back and forth between +the two types of IO, use the Variable.set_collective() +Variable method. All metadata +operations (such as creation of groups, types, variables, dimensions, or attributes) +are collective. +There are a couple of important limitations of parallel IO:

+
    +
  • parallel IO for NETCDF4 or NETCDF4_CLASSIC formatted files is only available +if the netcdf library was compiled with MPI enabled HDF5.
  • +
  • parallel IO for all classic netcdf-3 file formats is only available if the +netcdf library was compiled with PnetCDF.
  • +
  • If a variable has an unlimited dimension, appending data must be done in collective mode. +If the write is done in independent mode, the operation will fail with a +a generic "HDF Error".
  • +
  • You can write compressed data in parallel only with netcdf-c >= 4.7.4 +and hdf5 >= 1.10.3 (although you can read in parallel with earlier versions). To write +compressed data in parallel, the variable must be in 'collective IO mode'. +This is done +automatically on variable creation if compression is turned on, but if you are appending +to a variable in an existing file, you must use Variable.set_collective()(True) before attempting +to write to it.
  • +
  • You cannot use variable-length (VLEN) data types.
  • +
+

Important warning regarding threads: +The underlying netcdf-c library is not thread-safe, so netcdf4-python cannot perform parallel +IO in a multi-threaded environment. +Users should expect segfaults if a netcdf file is opened on multiple threads - care should +be taken to restrict netcdf4-python usage to a single thread, even when using free-threaded python.

+

Dealing with strings

+

The most flexible way to store arrays of strings is with the +Variable-length (vlen) string data type. However, this requires +the use of the NETCDF4 data model, and the vlen type does not map very well +numpy arrays (you have to use numpy arrays of dtype=object, which are arrays of +arbitrary python objects). numpy does have a fixed-width string array +data type, but unfortunately the netCDF data model does not. +Instead fixed-width byte strings are typically stored as arrays of 8-bit +characters. +To perform the conversion to and from character arrays to fixed-width numpy string arrays, the +following convention is followed by the python interface. +If the _Encoding special attribute is set for a character array +(dtype S1) variable, the chartostring() utility function is used to convert the array of +characters to an array of strings with one less dimension (the last dimension is +interpreted as the length of each string) when reading the data. The character +set is specified by the _Encoding attribute. If _Encoding +is 'none' or 'bytes', then the character array is converted to a numpy +fixed-width byte string array (dtype S#), otherwise a numpy unicode (dtype +U#) array is created. +When writing the data, +stringtochar() is used to convert the numpy string array to an array of +characters with one more dimension. For example,

+
>>> from netCDF4 import stringtochar
+>>> nc = Dataset('stringtest.nc','w',format='NETCDF4_CLASSIC')
+>>> _ = nc.createDimension('nchars',3)
+>>> _ = nc.createDimension('nstrings',None)
+>>> v = nc.createVariable('strings','S1',('nstrings','nchars'))
+>>> datain = np.array(['foo','bar'],dtype='S3')
+>>> v[:] = stringtochar(datain) # manual conversion to char array
+>>> print(v[:]) # data returned as char array
+[[b'f' b'o' b'o']
+ [b'b' b'a' b'r']]
+>>> v._Encoding = 'ascii' # this enables automatic conversion
+>>> v[:] = datain # conversion to char array done internally
+>>> print(v[:])  # data returned in numpy string array
+['foo' 'bar']
+>>> nc.close()
+
+

Even if the _Encoding attribute is set, the automatic conversion of char +arrays to/from string arrays can be disabled with +Variable.set_auto_chartostring().

+

A similar situation is often encountered with numpy structured arrays with subdtypes +containing fixed-wdith byte strings (dtype=S#). Since there is no native fixed-length string +netCDF datatype, these numpy structure arrays are mapped onto netCDF compound +types with character array elements. +In this case the string <-> char array +conversion is handled automatically (without the need to set the _Encoding +attribute) using numpy +views. +The structured array dtype (including the string elements) can even be used to +define the compound data type - the string dtype will be converted to +character array dtype under the hood when creating the netcdf compound type. +Here's an example:

+
>>> nc = Dataset('compoundstring_example.nc','w')
+>>> dtype = np.dtype([('observation', 'f4'),
+...                      ('station_name','S10')])
+>>> station_data_t = nc.createCompoundType(dtype,'station_data')
+>>> _ = nc.createDimension('station',None)
+>>> statdat = nc.createVariable('station_obs', station_data_t, ('station',))
+>>> data = np.empty(2,dtype)
+>>> data['observation'][:] = (123.,3.14)
+>>> data['station_name'][:] = ('Boulder','New York')
+>>> print(statdat.dtype) # strings actually stored as character arrays
+{'names':['observation','station_name'], 'formats':['<f4',('S1', (10,))], 'offsets':[0,4], 'itemsize':16, 'aligned':True}
+>>> statdat[:] = data # strings converted to character arrays internally
+>>> print(statdat[:])  # character arrays converted back to strings
+[(123.  , b'Boulder') (  3.14, b'New York')]
+>>> print(statdat[:].dtype)
+{'names':['observation','station_name'], 'formats':['<f4','S10'], 'offsets':[0,4], 'itemsize':16, 'aligned':True}
+>>> statdat.set_auto_chartostring(False) # turn off auto-conversion
+>>> statdat[:] = data.view(dtype=[('observation', 'f4'),('station_name','S1',10)])
+>>> print(statdat[:])  # now structured array with char array subtype is returned
+[(123.  , [b'B', b'o', b'u', b'l', b'd', b'e', b'r', b'', b'', b''])
+ (  3.14, [b'N', b'e', b'w', b' ', b'Y', b'o', b'r', b'k', b'', b''])]
+>>> nc.close()
+
+

Note that there is currently no support for mapping numpy structured arrays with +unicode elements (dtype U#) onto netCDF compound types, nor is there support +for netCDF compound types with vlen string components.

+

In-memory (diskless) Datasets

+

You can create netCDF Datasets whose content is held in memory +instead of in a disk file. +There are two ways to do this. +If you +don't need access to the memory buffer containing the Dataset from +within python, the best way is to use the diskless=True keyword +argument when creating the Dataset. +If you want to save the Dataset +to disk when you close it, also set persist=True. +If you want to +create a new read-only Dataset from an existing python memory buffer, use the +memory keyword argument to pass the memory buffer when creating the Dataset. +If you want to create a new in-memory Dataset, and then access the memory buffer +directly from Python, use the memory keyword argument to specify the +estimated size of the Dataset in bytes when creating the Dataset with +mode='w'. +Then, the Dataset.close() method will return a python memoryview +object representing the Dataset. Below are examples illustrating both +approaches.

+
>>> # create a diskless (in-memory) Dataset,
+>>> # and persist the file to disk when it is closed.
+>>> nc = Dataset('diskless_example.nc','w',diskless=True,persist=True)
+>>> d = nc.createDimension('x',None)
+>>> v = nc.createVariable('v',np.int32,'x')
+>>> v[0:5] = np.arange(5)
+>>> print(nc)
+<class 'netCDF4._netCDF4.Dataset'>
+root group (NETCDF4 data model, file format HDF5):
+    dimensions(sizes): x(5)
+    variables(dimensions): int32 v(x)
+    groups:
+>>> print(nc['v'][:])
+[0 1 2 3 4]
+>>> nc.close() # file saved to disk
+>>> # create an in-memory dataset from an existing python
+>>> # python memory buffer.
+>>> # read the newly created netcdf file into a python
+>>> # bytes object.
+>>> with open('diskless_example.nc', 'rb') as f:
+...     nc_bytes = f.read()
+>>> # create a netCDF in-memory dataset from the bytes object.
+>>> nc = Dataset('inmemory.nc', memory=nc_bytes)
+>>> print(nc)
+<class 'netCDF4._netCDF4.Dataset'>
+root group (NETCDF4 data model, file format HDF5):
+    dimensions(sizes): x(5)
+    variables(dimensions): int32 v(x)
+    groups:
+>>> print(nc['v'][:])
+[0 1 2 3 4]
+>>> nc.close()
+>>> # create an in-memory Dataset and retrieve memory buffer
+>>> # estimated size is 1028 bytes - this is actually only
+>>> # used if format is NETCDF3
+>>> # (ignored for NETCDF4/HDF5 files).
+>>> nc = Dataset('inmemory.nc', mode='w',memory=1028)
+>>> d = nc.createDimension('x',None)
+>>> v = nc.createVariable('v',np.int32,'x')
+>>> v[0:5] = np.arange(5)
+>>> nc_buf = nc.close() # close returns memoryview
+>>> print(type(nc_buf))
+<class 'memoryview'>
+>>> # save nc_buf to disk, read it back in and check.
+>>> with open('inmemory.nc', 'wb') as f:
+...     f.write(nc_buf)
+>>> nc = Dataset('inmemory.nc')
+>>> print(nc)
+<class 'netCDF4._netCDF4.Dataset'>
+root group (NETCDF4 data model, file format HDF5):
+    dimensions(sizes): x(5)
+    variables(dimensions): int32 v(x)
+    groups:
+>>> print(nc['v'][:])
+[0 1 2 3 4]
+>>> nc.close()
+
+

Support for complex numbers

+

Although there is no native support for complex numbers in netCDF, there are +some common conventions for storing them. Two of the most common are to either +use a compound datatype for the real and imaginary components, or a separate +dimension. netCDF4 supports reading several of these conventions, as well as +writing using one of two conventions (depending on file format). This support +for complex numbers is enabled by setting auto_complex=True when opening a +Dataset:

+
>>> complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j])
+>>> with netCDF4.Dataset("complex.nc", "w", auto_complex=True) as nc:
+...     nc.createDimension("x", size=len(complex_array))
+...     var = nc.createVariable("data", "c16", ("x",))
+...     var[:] = complex_array
+...     print(var)
+<class 'netCDF4._netCDF4.Variable'>
+compound data(x)
+compound data type: complex128
+unlimited dimensions:
+current shape = (5,)
+
+

When reading files using auto_complex=True, netCDF4 will interpret variables +stored using the following conventions as complex numbers:

+
    +
  • compound datatypes with two float or double members who names begin with +r and i (case insensitive)
  • +
  • a dimension of length 2 named complex or ri
  • +
+

When writing files using auto_complex=True, netCDF4 will use:

+
    +
  • a compound datatype named _PFNC_DOUBLE_COMPLEX_TYPE (or *FLOAT* as +appropriate) with members r and i for netCDF4 formats;
  • +
  • or a dimension of length 2 named _pfnc_complex for netCDF3 or classic +formats.
  • +
+

Support for complex numbers is handled via the +nc-complex library. See there for +further details.

+

contact: Jeffrey Whitaker whitaker.jeffrey@gmail.com

+

copyright: 2008 by Jeffrey Whitaker.

+

license: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

+

The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

+

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

+
+
+
+
+
+
+

Functions

+
+
+def chartostring(b, encoding=None) +
+
+

chartostring(b,encoding='utf-8')

+

convert a character array to a string array with one less dimension.

+

b: +Input character array (numpy datatype 'S1' or 'U1'). +Will be converted to a array of strings, where each string has a fixed +length of b.shape[-1] characters.

+

optional kwarg encoding can be used to specify character encoding (default +utf-8 for dtype='UN' or ascii for dtype='SN'). If encoding is 'none' or 'bytes', +a numpy.string_ byte array is returned.

+

returns a numpy string array with datatype 'UN' (or 'SN') and shape +b.shape[:-1] where where N=b.shape[-1].

+
+
+def date2index(dates, nctime, calendar=None, select='exact', has_year_zero=None) +
+
+

date2index(dates, nctime, calendar=None, select='exact', has_year_zero=None)

+

Return indices of a netCDF time variable corresponding to the given dates.

+

dates: A datetime object or a sequence of datetime objects. +The datetime objects should not include a time-zone offset.

+

nctime: A netCDF time variable object. The nctime object must have a +units attribute.

+

calendar: describes the calendar to be used in the time calculations. +All the values currently defined in the +CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported. +Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai', +'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. +Default is None which means the calendar associated with the first +input datetime instance will be used.

+

select: 'exact', 'before', 'after', 'nearest' +The index selection method. exact will return the indices perfectly +matching the dates given. before and after will return the indices +corresponding to the dates just before or just after the given dates if +an exact match cannot be found. nearest will return the indices that +correspond to the closest dates.

+

has_year_zero: if set to True, astronomical year numbering +is used and the year zero exists. +If set to False for real-world +calendars, then historical year numbering is used and the year 1 is +preceded by year -1 and no year zero exists. +The defaults are set to conform with +CF version 1.9 conventions (False for 'julian', 'gregorian'/'standard', True +for 'proleptic_gregorian' (ISO 8601) and True for the idealized +calendars 'noleap'/'365_day', '360_day', 366_day'/'all_leap') +The defaults can only be over-ridden for the real-world calendars, +for the the idealized calendars the year zero +always exists and the has_year_zero kwarg is ignored. +This kwarg is not needed to define calendar systems allowed by CF +(the calendar-specific defaults do this).

+

returns an index (indices) of the netCDF time variable corresponding +to the given datetime object(s).

+
+
+def date2num(dates, units, calendar=None, has_year_zero=None, longdouble=False) +
+
+

date2num(dates, units, calendar=None, has_year_zero=None, longdouble=False)

+

Return numeric time values given datetime objects. The units +of the numeric time values are described by the units argument +and the calendar keyword. The datetime objects must +be in UTC with no time-zone offset. +If there is a +time-zone offset in units, it will be applied to the +returned numeric values.

+

dates: A datetime object or a sequence of datetime objects. +The datetime objects should not include a time-zone offset. They +can be either native python datetime instances (which use +the proleptic gregorian calendar) or cftime.datetime instances.

+

units: a string of the form +describing the time units. can be days, hours, minutes, +seconds, milliseconds or microseconds. is the time +origin. months since is allowed only for the 360_day calendar +and common_years since is allowed only for the 365_day calendar.

+

calendar: describes the calendar to be used in the time calculations. +All the values currently defined in the +CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported. +Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai', +'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. +Default is None which means the calendar associated with the first +input datetime instance will be used.

+

has_year_zero: If set to True, astronomical year numbering +is used and the year zero exists. +If set to False for real-world +calendars, then historical year numbering is used and the year 1 is +preceded by year -1 and no year zero exists. +The defaults are set to conform with +CF version 1.9 conventions (False for 'julian', 'gregorian'/'standard', True +for 'proleptic_gregorian' (ISO 8601) and True for the idealized +calendars 'noleap'/'365_day', '360_day', 366_day'/'all_leap') +Note that CF v1.9 does not specifically mention whether year zero +is allowed in the proleptic_gregorian calendar, but ISO-8601 has +a year zero so we have adopted this as the default. +The defaults can only be over-ridden for the real-world calendars, +for the the idealized calendars the year zero +always exists and the has_year_zero kwarg is ignored. +This kwarg is not needed to define calendar systems allowed by CF +(the calendar-specific defaults do this).

+

longdouble: If set True, output is in the long double float type +(numpy.float128) instead of float (numpy.float64), allowing microsecond +accuracy when converting a time value to a date and back again. Otherwise +this is only possible if the discretization of the time variable is an +integer multiple of the units.

+

returns a numeric time value, or an array of numeric time values +with approximately 1 microsecond accuracy.

+
+
+def get_alignment() +
+
+

get_alignment()

+

return current netCDF alignment within HDF5 files in a tuple +(threshold,alignment). See netcdf C library documentation for +nc_get_alignment for details. Values can be reset with +set_alignment().

+

This function was added in netcdf 4.9.0.

+
+
+def get_chunk_cache() +
+
+

get_chunk_cache()

+

return current netCDF chunk cache information in a tuple (size,nelems,preemption). +See netcdf C library documentation for nc_get_chunk_cache for +details. Values can be reset with set_chunk_cache().

+
+
+def getlibversion() +
+
+

getlibversion()

+

returns a string describing the version of the netcdf library +used to build the module, and when it was built.

+
+
+def num2date(times,
units,
calendar='standard',
only_use_cftime_datetimes=True,
only_use_python_datetimes=False,
has_year_zero=None)
+
+
+

num2date(times, units, calendar='standard', only_use_cftime_datetimes=True, only_use_python_datetimes=False, has_year_zero=None)

+

Return datetime objects given numeric time values. The units +of the numeric time values are described by the units argument +and the calendar keyword. The returned datetime objects represent +UTC with no time-zone offset, even if the specified +units contain a time-zone offset.

+

times: numeric time values.

+

units: a string of the form +describing the time units. can be days, hours, minutes, +seconds, milliseconds or microseconds. is the time +origin. months since is allowed only for the 360_day calendar +and common_years since is allowed only for the 365_day calendar.

+

calendar: describes the calendar used in the time calculations. +All the values currently defined in the +CF metadata convention <http://cfconventions.org/cf-conventions/cf-conventions#calendar>__ are supported. +Valid calendars 'standard', 'gregorian', 'proleptic_gregorian', 'tai', +'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. +Default is 'standard', which is a mixed Julian/Gregorian calendar.

+

only_use_cftime_datetimes: if False, python datetime.datetime +objects are returned from num2date where possible; if True dates which +subclass cftime.datetime are returned for all calendars. Default True.

+

only_use_python_datetimes: always return python datetime.datetime +objects and raise an error if this is not possible. Ignored unless +only_use_cftime_datetimes=False. Default False.

+

has_year_zero: if set to True, astronomical year numbering +is used and the year zero exists. +If set to False for real-world +calendars, then historical year numbering is used and the year 1 is +preceded by year -1 and no year zero exists. +The defaults are set to conform with +CF version 1.9 conventions (False for 'julian', 'gregorian'/'standard', True +for 'proleptic_gregorian' (ISO 8601) and True for the idealized +calendars 'noleap'/'365_day', '360_day', 366_day'/'all_leap') +The defaults can only be over-ridden for the real-world calendars, +for the the idealized calendars the year zero +always exists and the has_year_zero kwarg is ignored. +This kwarg is not needed to define calendar systems allowed by CF +(the calendar-specific defaults do this).

+

returns a datetime instance, or an array of datetime instances with +microsecond accuracy, if possible.

+

Note: If only_use_cftime_datetimes=False and +use_only_python_datetimes=False, the datetime instances +returned are 'real' python datetime +objects if calendar='proleptic_gregorian', or +calendar='standard' or 'gregorian' +and the date is after the breakpoint between the Julian and +Gregorian calendars (1582-10-15). Otherwise, they are ctime.datetime +objects which support some but not all the methods of native python +datetime objects. The datetime instances +do not contain a time-zone offset, even if the specified units +contains one.

+
+
+def rc_get(key) +
+
+

rc_get(key)

+

Returns the internal netcdf-c rc table value corresponding to key. +See https://docs.unidata.ucar.edu/netcdf-c/current/md_auth.html +for more information on rc files and values.

+
+
+def rc_set(key, value) +
+
+

rc_set(key, value)

+

Sets the internal netcdf-c rc table value corresponding to key. +See https://docs.unidata.ucar.edu/netcdf-c/current/md_auth.html +for more information on rc files and values.

+
+
+def set_alignment(threshold, alignment) +
+
+

set_alignment(threshold,alignment)

+

Change the HDF5 file alignment. +See netcdf C library documentation for nc_set_alignment for +details.

+

This function was added in netcdf 4.9.0.

+
+
+def set_chunk_cache(size=None, nelems=None, preemption=None) +
+
+

set_chunk_cache(size=None,nelems=None,preemption=None)

+

change netCDF4 chunk cache settings. +See netcdf C library documentation for nc_set_chunk_cache for +details.

+
+
+def stringtoarr(string, NUMCHARS, dtype='S') +
+
+

stringtoarr(a, NUMCHARS,dtype='S')

+

convert a string to a character array of length NUMCHARS

+

a: +Input python string.

+

NUMCHARS: +number of characters used to represent string +(if len(a) < NUMCHARS, it will be padded on the right with blanks).

+

dtype: +type of numpy array to return. +Default is 'S', which +means an array of dtype 'S1' will be returned. +If dtype='U', a +unicode array (dtype = 'U1') will be returned.

+

returns a rank 1 numpy character array of length NUMCHARS with datatype 'S1' +(default) or 'U1' (if dtype='U')

+
+
+def stringtochar(a, encoding=None, n_strlen=None) +
+
+

stringtochar(a,encoding='utf-8',n_strlen=None)

+

convert a string array to a character array with one extra dimension

+

a: +Input numpy string array with numpy datatype 'SN' or 'UN', where N +is the number of characters in each string. +Will be converted to +an array of characters (datatype 'S1' or 'U1') of shape a.shape + (N,).

+

optional kwarg encoding can be used to specify character encoding (default +utf-8 for dtype='UN' or ascii for dtype='SN'). If encoding is 'none' or 'bytes', +a numpy.string_ the input array is treated a raw byte strings (numpy.string_).

+

optional kwarg n_strlen is the number of characters in each string. +Default +is None, which means n_strlen will be set to a.itemsize (the number of bytes +used to represent each string in the input array).

+

returns a numpy character array with datatype 'S1' or 'U1' +and shape a.shape + (N,), where N is the length of each string in a.

+
+
+
+
+

Classes

+
+
+class CompoundType +(...) +
+
+

A CompoundType instance is used to describe a compound data +type, and can be passed to the the Dataset.createVariable() method of +a Dataset or Group instance. +Compound data types map to numpy structured arrays. +See CompoundType for more details.

+

The instance variables dtype and name should not be modified by +the user.

+

__init__(group, datatype, datatype_name)

+

CompoundType constructor.

+

grp: Group instance to associate with the compound datatype.

+

dt: A numpy dtype object describing a structured (a.k.a record) +array. +Can be composed of homogeneous numeric or character data types, or +other structured array data types.

+

dtype_name: a Python string containing a description of the +compound data type.

+

Note 1: When creating nested compound data types, +the inner compound data types must already be associated with CompoundType +instances (so create CompoundType instances for the innermost structures +first).

+

Note 2: CompoundType instances should be created using the +Dataset.createCompoundType() method of a Dataset or +Group instance, not using this class directly.

+

Instance variables

+
+
var dtype
+
+
+
+
var dtype_view
+
+
+
+
var name
+
+
+
+
+
+
+class Dataset +(...) +
+
+

A netCDF Dataset is a collection of dimensions, groups, variables and +attributes. Together they describe the meaning of data and relations among +data fields stored in a netCDF file. See Dataset for more +details.

+

A list of attribute names corresponding to global netCDF attributes +defined for the Dataset can be obtained with the +Dataset.ncattrs() method. +These attributes can be created by assigning to an attribute of the +Dataset instance. A dictionary containing all the netCDF attribute +name/value pairs is provided by the __dict__ attribute of a +Dataset instance.

+

The following class variables are read-only and should not be +modified by the user.

+

dimensions: The dimensions dictionary maps the names of +dimensions defined for the Group or Dataset to instances of the +Dimension class.

+

variables: The variables dictionary maps the names of variables +defined for this Dataset or Group to instances of the +Variable class.

+

groups: The groups dictionary maps the names of groups created for +this Dataset or Group to instances of the Group class (the +Dataset class is simply a special case of the Group class which +describes the root group in the netCDF4 file).

+

cmptypes: The cmptypes dictionary maps the names of +compound types defined for the Group or Dataset to instances of the +CompoundType class.

+

vltypes: The vltypes dictionary maps the names of +variable-length types defined for the Group or Dataset to instances +of the VLType class.

+

enumtypes: The enumtypes dictionary maps the names of +Enum types defined for the Group or Dataset to instances +of the EnumType class.

+

data_model: data_model describes the netCDF +data model version, one of NETCDF3_CLASSIC, NETCDF4, +NETCDF4_CLASSIC, NETCDF3_64BIT_OFFSET or NETCDF3_64BIT_DATA.

+

file_format: same as data_model, retained for backwards compatibility.

+

disk_format: disk_format describes the underlying +file format, one of NETCDF3, HDF5, HDF4, +PNETCDF, DAP2, DAP4 or UNDEFINED. Only available if using +netcdf C library version >= 4.3.1, otherwise will always return +UNDEFINED.

+

parent: parent is a reference to the parent +Group instance. None for the root group or Dataset +instance.

+

path: path shows the location of the Group in +the Dataset in a unix directory format (the names of groups in the +hierarchy separated by backslashes). A Dataset instance is the root +group, so the path is simply '/'.

+

keepweakref: If True, child Dimension and Variables objects only keep weak +references to the parent Dataset or Group.

+

_ncstring_attrs__: If True, all text attributes will be written as variable-length +strings.

+

__init__(self, filename, mode="r", clobber=True, diskless=False, +persist=False, keepweakref=False, memory=None, encoding=None, +parallel=False, comm=None, info=None, format='NETCDF4')

+

Dataset constructor.

+

filename: Name of netCDF file to hold dataset. Can also +be a python 3 pathlib instance or the URL of an OpenDAP dataset. +When memory is +set this is just used to set the filepath().

+

mode: access mode. r means read-only; no data can be +modified. w means write; a new file is created, an existing file with +the same name is deleted. x means write, but fail if an existing +file with the same name already exists. a and r+ mean append; +an existing file is opened for reading and writing, if +file does not exist already, one is created. +Appending s to modes r, w, r+ or a will enable unbuffered shared +access to NETCDF3_CLASSIC, NETCDF3_64BIT_OFFSET or +NETCDF3_64BIT_DATA formatted files. +Unbuffered access may be useful even if you don't need shared +access, since it may be faster for programs that don't access data +sequentially. This option is ignored for NETCDF4 and NETCDF4_CLASSIC +formatted files.

+

clobber: if True (default), opening a file with mode='w' +will clobber an existing file with the same name. +if False, an +exception will be raised if a file with the same name already exists. +mode=x is identical to mode=w with clobber=False.

+

format: underlying file format (one of 'NETCDF4', +'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC', 'NETCDF3_64BIT_OFFSET' or +'NETCDF3_64BIT_DATA'. +Only relevant if mode = 'w' (if mode = 'r','a' or 'r+' the file format +is automatically detected). Default 'NETCDF4', which means the data is +stored in an HDF5 file, using netCDF 4 API features. +Setting +format='NETCDF4_CLASSIC' will create an HDF5 file, using only netCDF 3 +compatible API features. netCDF 3 clients must be recompiled and linked +against the netCDF 4 library to read files in NETCDF4_CLASSIC format. +'NETCDF3_CLASSIC' is the classic netCDF 3 file format that does not +handle 2+ Gb files. 'NETCDF3_64BIT_OFFSET' is the 64-bit offset +version of the netCDF 3 file format, which fully supports 2+ GB files, but +is only compatible with clients linked against netCDF version 3.6.0 or +later. 'NETCDF3_64BIT_DATA' is the 64-bit data version of the netCDF 3 +file format, which supports 64-bit dimension sizes plus unsigned and +64 bit integer data types, but is only compatible with clients linked against +netCDF version 4.4.0 or later.

+

diskless: If True, create diskless (in-core) file. +This is a feature added to the C library after the +netcdf-4.2 release. If you need to access the memory buffer directly, +use the in-memory feature instead (see memory kwarg).

+

persist: if diskless=True, persist file to disk when closed +(default False).

+

keepweakref: if True, child Dimension and Variable instances will keep weak +references to the parent Dataset or Group object. +Default is False, which +means strong references will be kept. +Having Dimension and Variable instances +keep a strong reference to the parent Dataset instance, which in turn keeps a +reference to child Dimension and Variable instances, creates circular references. +Circular references complicate garbage collection, which may mean increased +memory usage for programs that create may Dataset instances with lots of +Variables. It also will result in the Dataset object never being deleted, which +means it may keep open files alive as well. Setting keepweakref=True allows +Dataset instances to be garbage collected as soon as they go out of scope, potentially +reducing memory usage and open file handles. +However, in many cases this is not +desirable, since the associated Variable instances may still be needed, but are +rendered unusable when the parent Dataset instance is garbage collected.

+

memory: if not None, create or open an in-memory Dataset. +If mode = r, the memory kwarg must contain a memory buffer object +(an object that supports the python buffer interface). +The Dataset will then be created with contents taken from this block of memory. +If mode = w, the memory kwarg should contain the anticipated size +of the Dataset in bytes (used only for NETCDF3 files). +A memory +buffer containing a copy of the Dataset is returned by the +Dataset.close() method. Requires netcdf-c version 4.4.1 for mode=r +netcdf-c 4.6.2 for mode=w. To persist the file to disk, the raw +bytes from the returned buffer can be written into a binary file. +The Dataset can also be re-opened using this memory buffer.

+

encoding: encoding used to encode filename string into bytes. +Default is None (sys.getdefaultfileencoding() is used).

+

parallel: open for parallel access using MPI (requires mpi4py and +parallel-enabled netcdf-c and hdf5 libraries). +Default is False. If +True, comm and info kwargs may also be specified.

+

comm: MPI_Comm object for parallel access. Default None, which +means MPI_COMM_WORLD will be used. +Ignored if parallel=False.

+

info: MPI_Info object for parallel access. Default None, which +means MPI_INFO_NULL will be used. +Ignored if parallel=False.

+

auto_complex: if True, then automatically convert complex number types

+

Subclasses

+
    +
  • netCDF4._netCDF4.Group
  • +
  • netCDF4._netCDF4.MFDataset
  • +
+

Static methods

+
+
+def fromcdl(cdlfilename, ncfilename=None, mode='a', format='NETCDF4') +
+
+

fromcdl(cdlfilename, ncfilename=None, mode='a',format='NETCDF4')

+

call ncgen via subprocess to create Dataset from CDL +text representation. Requires ncgen to be installed and in $PATH.

+

cdlfilename: +CDL file.

+

ncfilename: netCDF file to create. If not given, CDL filename with +suffix replaced by .nc is used..

+

mode: +Access mode to open Dataset (Default 'a').

+

format: underlying file format to use (one of 'NETCDF4', +'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC', 'NETCDF3_64BIT_OFFSET' or +'NETCDF3_64BIT_DATA'. Default 'NETCDF4'.

+

Dataset instance for ncfilename is returned.

+
+
+

Instance variables

+
+
var auto_complex
+
+
+
+
var cmptypes
+
+
+
+
var data_model
+
+
+
+
var dimensions
+
+
+
+
var disk_format
+
+
+
+
var enumtypes
+
+
+
+
var file_format
+
+
+
+
var groups
+
+
+
+
var keepweakref
+
+
+
+
var name
+
+

string name of Group instance

+
+
var parent
+
+
+
+
var path
+
+
+
+
var variables
+
+
+
+
var vltypes
+
+
+
+
+

Methods

+
+
+def close(self) +
+
+

close(self)

+

Close the Dataset.

+
+
+def createCompoundType(self, datatype, datatype_name) +
+
+

createCompoundType(self, datatype, datatype_name)

+

Creates a new compound data type named datatype_name from the numpy +dtype object datatype.

+

Note: If the new compound data type contains other compound data types +(i.e. it is a 'nested' compound type, where not all of the elements +are homogeneous numeric data types), then the 'inner' compound types must be +created first.

+

The return value is the CompoundType class instance describing the new +datatype.

+
+
+def createDimension(self, dimname, size=None) +
+
+

createDimension(self, dimname, size=None)

+

Creates a new dimension with the given dimname and size.

+

size must be a positive integer or None, which stands for +"unlimited" (default is None). Specifying a size of 0 also +results in an unlimited dimension. The return value is the Dimension +class instance describing the new dimension. +To determine the current +maximum size of the dimension, use the len function on the Dimension +instance. To determine if a dimension is 'unlimited', use the +Dimension.isunlimited() method of the Dimension instance.

+
+
+def createEnumType(self, datatype, datatype_name, enum_dict) +
+
+

createEnumType(self, datatype, datatype_name, enum_dict)

+

Creates a new Enum data type named datatype_name from a numpy +integer dtype object datatype, and a python dictionary +defining the enum fields and values.

+

The return value is the EnumType class instance describing the new +datatype.

+
+
+def createGroup(self, groupname) +
+
+

createGroup(self, groupname)

+

Creates a new Group with the given groupname.

+

If groupname is specified as a path, using forward slashes as in unix to +separate components, then intermediate groups will be created as necessary +(analogous to mkdir -p in unix). +For example, +createGroup('/GroupA/GroupB/GroupC') will create GroupA, +GroupA/GroupB, and GroupA/GroupB/GroupC, if they don't already exist. +If the specified path describes a group that already exists, no error is +raised.

+

The return value is a Group class instance.

+
+
+def createVLType(self, datatype, datatype_name) +
+
+

createVLType(self, datatype, datatype_name)

+

Creates a new VLEN data type named datatype_name from a numpy +dtype object datatype.

+

The return value is the VLType class instance describing the new +datatype.

+
+
+def createVariable(self,
varname,
datatype,
dimensions=(),
compression=None,
zlib=False,
complevel=4,
shuffle=True,
szip_coding='nn',
szip_pixels_per_block=8,
blosc_shuffle=1,
fletcher32=False,
contiguous=False,
chunksizes=None,
endian='native',
least_significant_digit=None,
significant_digits=None,
quantize_mode='BitGroom',
fill_value=None,
chunk_cache=None)
+
+
+

createVariable(self, varname, datatype, dimensions=(), compression=None, zlib=False, +complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, +szip_coding='nn', szip_pixels_per_block=8, blosc_shuffle=1, +endian='native', least_significant_digit=None, significant_digits=None, quantize_mode='BitGroom', +fill_value=None, chunk_cache=None)

+

Creates a new variable with the given varname, datatype, and +dimensions. If dimensions are not given, the variable is assumed to be +a scalar.

+

If varname is specified as a path, using forward slashes as in unix to +separate components, then intermediate groups will be created as necessary +For example, createVariable('/GroupA/GroupB/VarC', float, ('x','y')) will create groups GroupA +and GroupA/GroupB, plus the variable GroupA/GroupB/VarC, if the preceding +groups don't already exist.

+

The datatype can be a numpy datatype object, or a string that describes +a numpy dtype object (like the dtype.str attribute of a numpy array). +Supported specifiers include: 'S1' or 'c' (NC_CHAR), 'i1' or 'b' or 'B' +(NC_BYTE), 'u1' (NC_UBYTE), 'i2' or 'h' or 's' (NC_SHORT), 'u2' +(NC_USHORT), 'i4' or 'i' or 'l' (NC_INT), 'u4' (NC_UINT), 'i8' (NC_INT64), +'u8' (NC_UINT64), 'f4' or 'f' (NC_FLOAT), 'f8' or 'd' (NC_DOUBLE). +datatype can also be a CompoundType instance +(for a structured, or compound array), a VLType instance +(for a variable-length array), or the python str builtin +(for a variable-length string array). Numpy string and unicode datatypes with +length greater than one are aliases for str.

+

Data from netCDF variables is presented to python as numpy arrays with +the corresponding data type.

+

dimensions must be a tuple containing Dimension instances and/or +dimension names (strings) that have been defined +previously using Dataset.createDimension(). The default value +is an empty tuple, which means the variable is a scalar.

+

If the optional keyword argument compression is set, the data will be +compressed in the netCDF file using the specified compression algorithm. +Currently zlib,szip,zstd,bzip2,blosc_lz,blosc_lz4,blosc_lz4hc, +blosc_zlib and blosc_zstd are supported. +Default is None (no compression). +All of the compressors except +zlib and szip use the HDF5 plugin architecture.

+

If the optional keyword zlib is True, the data will be compressed in +the netCDF file using zlib compression (default False). +The use of this option is +deprecated in favor of compression='zlib'.

+

The optional keyword complevel is an integer between 0 and 9 describing +the level of compression desired (default 4). Ignored if compression=None. +A value of zero disables compression.

+

If the optional keyword shuffle is True, the HDF5 shuffle filter +will be applied before compressing the data with zlib (default True). +This +significantly improves compression. Default is True. Ignored if +zlib=False.

+

The optional kwarg blosc_shuffleis +ignored +unless the blosc compressor is used. blosc_shuffle can be 0 (no shuffle), +1 (byte-wise shuffle) or 2 (bit-wise shuffle). Default is 1.

+

The optional kwargs szip_coding and szip_pixels_per_block are ignored +unless the szip compressor is used. szip_coding can be ec (entropy coding) +or nn (nearest neighbor coding). Default is nn. szip_pixels_per_block +can be 4, 8, 16 or 32 (default 8).

+

If the optional keyword fletcher32 is True, the Fletcher32 HDF5 +checksum algorithm is activated to detect errors. Default False.

+

If the optional keyword contiguous is True, the variable data is +stored contiguously on disk. +Default False. Setting to True for +a variable with an unlimited dimension will trigger an error. +Fixed size variables (with no unlimited dimension) with no compression filters +are contiguous by default.

+

The optional keyword chunksizes can be used to manually specify the +HDF5 chunksizes for each dimension of the variable. +A detailed discussion of HDF chunking and I/O performance is available +here. +The default chunking scheme in the netcdf-c library is discussed +here. +Basically, you want the chunk size for each dimension to match as +closely as possible the size of the data block that users will read +from the file. chunksizes cannot be set if contiguous=True.

+

The optional keyword endian can be used to control whether the +data is stored in little or big endian format on disk. Possible +values are little, big or native (default). The library +will automatically handle endian conversions when the data is read, +but if the data is always going to be read on a computer with the +opposite format as the one used to create the file, there may be +some performance advantage to be gained by setting the endian-ness.

+

The optional keyword fill_value can be used to override the default +netCDF _FillValue (the value that the variable gets filled with before +any data is written to it, defaults given in the dict netCDF4.default_fillvals). +If fill_value is set to False, then the variable is not pre-filled.

+

If the optional keyword parameters least_significant_digit or significant_digits are +specified, variable data will be truncated (quantized). In conjunction +with compression='zlib' this produces 'lossy', but significantly more +efficient compression. For example, if least_significant_digit=1, +data will be quantized using numpy.around(scale*data)/scale, where +scale = 2**bits, and bits is determined so that a precision of 0.1 is +retained (in this case bits=4). From the +PSL metadata conventions: +"least_significant_digit – power of ten of the smallest decimal place +in unpacked data that is a reliable value." Default is None, or no +quantization, or 'lossless' compression. +If significant_digits=3 +then the data will be quantized so that three significant digits are retained, independent +of the floating point exponent. The keyword argument quantize_mode controls +the quantization algorithm (default 'BitGroom', 'BitRound' and +'GranularBitRound' also available). +The 'GranularBitRound' +algorithm may result in better compression for typical geophysical datasets. +This significant_digits kwarg is only available +with netcdf-c >= 4.9.0, and +only works with NETCDF4 or NETCDF4_CLASSIC formatted files.

+

When creating variables in a NETCDF4 or NETCDF4_CLASSIC formatted file, +HDF5 creates something called a 'chunk cache' for each variable. +The +default size of the chunk cache may be large enough to completely fill +available memory when creating thousands of variables. +The optional +keyword chunk_cache allows you to reduce (or increase) the size of +the default chunk cache when creating a variable. +The setting only +persists as long as the Dataset is open - you can use the set_var_chunk_cache +method to change it the next time the Dataset is opened. +Warning - messing with this parameter can seriously degrade performance.

+

The return value is the Variable class instance describing the new +variable.

+

A list of names corresponding to netCDF variable attributes can be +obtained with the Variable method Variable.ncattrs(). A dictionary +containing all the netCDF attribute name/value pairs is provided by +the __dict__ attribute of a Variable instance.

+

Variable instances behave much like array objects. Data can be +assigned to or retrieved from a variable with indexing and slicing +operations on the Variable instance. A Variable instance has six +Dataset standard attributes: dimensions, dtype, shape, ndim, name and +least_significant_digit. Application programs should never modify +these attributes. The dimensions attribute is a tuple containing the +names of the dimensions associated with this variable. The dtype +attribute is a string describing the variable's data type (i4, f8, +S1, etc). The shape attribute is a tuple describing the current +sizes of all the variable's dimensions. The name attribute is a +string containing the name of the Variable instance. +The least_significant_digit +attributes describes the power of ten of the smallest decimal place in +the data the contains a reliable value. +assigned to the Variable +instance. The ndim attribute +is the number of variable dimensions.

+
+
+def delncattr(self, name) +
+
+

delncattr(self,name,value)

+

delete a netCDF dataset or group attribute. +Use if you need to delete a +netCDF attribute with the same name as one of the reserved python +attributes.

+
+
+def filepath(self, encoding=None) +
+
+

filepath(self,encoding=None)

+

Get the file system path (or the opendap URL) which was used to +open/create the Dataset. Requires netcdf >= 4.1.2. +The path +is decoded into a string using sys.getfilesystemencoding() by default, this can be +changed using the encoding kwarg.

+
+
+def get_variables_by_attributes(self, **kwargs) +
+
+

get_variables_by_attributes(self, **kwargs)

+

Returns a list of variables that match specific conditions.

+

Can pass in key=value parameters and variables are returned that +contain all of the matches. For example,

+
>>> # Get variables with x-axis attribute.
+>>> vs = nc.get_variables_by_attributes(axis='X')
+>>> # Get variables with matching "standard_name" attribute
+>>> vs = nc.get_variables_by_attributes(standard_name='northward_sea_water_velocity')
+
+

Can pass in key=callable parameter and variables are returned if the +callable returns True. +The callable should accept a single parameter, +the attribute value. +None is given as the attribute value when the +attribute does not exist on the variable. For example,

+
>>> # Get Axis variables
+>>> vs = nc.get_variables_by_attributes(axis=lambda v: v in ['X', 'Y', 'Z', 'T'])
+>>> # Get variables that don't have an "axis" attribute
+>>> vs = nc.get_variables_by_attributes(axis=lambda v: v is None)
+>>> # Get variables that have a "grid_mapping" attribute
+>>> vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None)
+
+
+
+def getncattr(self, name, encoding='utf-8') +
+
+

getncattr(self,name)

+

retrieve a netCDF dataset or group attribute. +Use if you need to get a netCDF attribute with the same +name as one of the reserved python attributes.

+

option kwarg encoding can be used to specify the +character encoding of a string attribute (default is utf-8).

+
+
+def has_blosc_filter(self) +
+
+

has_blosc_filter(self) +returns True if blosc compression filter is available

+
+
+def has_bzip2_filter(self) +
+
+

has_bzip2_filter(self) +returns True if bzip2 compression filter is available

+
+
+def has_szip_filter(self) +
+
+

has_szip_filter(self) +returns True if szip compression filter is available

+
+
+def has_zstd_filter(self) +
+
+

has_zstd_filter(self) +returns True if zstd compression filter is available

+
+
+def isopen(self) +
+
+

isopen(self)

+

Is the Dataset open or closed?

+
+
+def ncattrs(self) +
+
+

ncattrs(self)

+

return netCDF global attribute names for this Dataset or Group in a list.

+
+
+def renameAttribute(self, oldname, newname) +
+
+

renameAttribute(self, oldname, newname)

+

rename a Dataset or Group attribute named oldname to newname.

+
+
+def renameDimension(self, oldname, newname) +
+
+

renameDimension(self, oldname, newname)

+

rename a Dimension named oldname to newname.

+
+
+def renameGroup(self, oldname, newname) +
+
+

renameGroup(self, oldname, newname)

+

rename a Group named oldname to newname (requires netcdf >= 4.3.1).

+
+
+def renameVariable(self, oldname, newname) +
+
+

renameVariable(self, oldname, newname)

+

rename a Variable named oldname to newname

+
+
+def set_always_mask(self, value) +
+
+

set_always_mask(self, True_or_False)

+

Call Variable.set_always_mask() for all variables contained in +this Dataset or Group, as well as for all +variables in all its subgroups.

+

True_or_False: Boolean determining if automatic conversion of +masked arrays with no missing values to regular numpy arrays shall be +applied for all variables. Default True. Set to False to restore the default behaviour +in versions prior to 1.4.1 (numpy array returned unless missing values are present, +otherwise masked array returned).

+

Note: Calling this function only affects existing +variables. Variables created after calling this function will follow +the default behaviour.

+
+
+def set_auto_chartostring(self, value) +
+
+

set_auto_chartostring(self, True_or_False)

+

Call Variable.set_auto_chartostring() for all variables contained in this Dataset or +Group, as well as for all variables in all its subgroups.

+

True_or_False: Boolean determining if automatic conversion of +all character arrays <–> string arrays should be performed for +character variables (variables of type NC_CHAR or S1) with the +_Encoding attribute set.

+

Note: Calling this function only affects existing variables. Variables created +after calling this function will follow the default behaviour.

+
+
+def set_auto_mask(self, value) +
+
+

set_auto_mask(self, True_or_False)

+

Call Variable.set_auto_mask() for all variables contained in this Dataset or +Group, as well as for all variables in all its subgroups. Only affects +Variables with primitive or enum types (not compound or vlen Variables).

+

True_or_False: Boolean determining if automatic conversion to masked arrays +shall be applied for all variables.

+

Note: Calling this function only affects existing variables. Variables created +after calling this function will follow the default behaviour.

+
+
+def set_auto_maskandscale(self, value) +
+
+

set_auto_maskandscale(self, True_or_False)

+

Call Variable.set_auto_maskandscale() for all variables contained in this Dataset or +Group, as well as for all variables in all its subgroups.

+

True_or_False: Boolean determining if automatic conversion to masked arrays +and variable scaling shall be applied for all variables.

+

Note: Calling this function only affects existing variables. Variables created +after calling this function will follow the default behaviour.

+
+
+def set_auto_scale(self, value) +
+
+

set_auto_scale(self, True_or_False)

+

Call Variable.set_auto_scale() for all variables contained in this Dataset or +Group, as well as for all variables in all its subgroups.

+

True_or_False: Boolean determining if automatic variable scaling +shall be applied for all variables.

+

Note: Calling this function only affects existing variables. Variables created +after calling this function will follow the default behaviour.

+
+
+def set_fill_off(self) +
+
+

set_fill_off(self)

+

Sets the fill mode for a Dataset open for writing to off.

+

This will prevent the data from being pre-filled with fill values, which +may result in some performance improvements. However, you must then make +sure the data is actually written before being read.

+
+
+def set_fill_on(self) +
+
+

set_fill_on(self)

+

Sets the fill mode for a Dataset open for writing to on.

+

This causes data to be pre-filled with fill values. The fill values can be +controlled by the variable's _Fill_Value attribute, but is usually +sufficient to the use the netCDF default _Fill_Value (defined +separately for each variable type). The default behavior of the netCDF +library corresponds to set_fill_on. +Data which are equal to the +_Fill_Value indicate that the variable was created, but never written +to.

+
+
+def set_ncstring_attrs(self, value) +
+
+

set_ncstring_attrs(self, True_or_False)

+

Call Variable.set_ncstring_attrs() for all variables contained in +this Dataset or Group, as well as for all its +subgroups and their variables.

+

True_or_False: Boolean determining if all string attributes are +created as variable-length NC_STRINGs, (if True), or if ascii text +attributes are stored as NC_CHARs (if False; default)

+

Note: Calling this function only affects newly created attributes +of existing (sub-) groups and their variables.

+
+
+def setncattr(self, name, value) +
+
+

setncattr(self,name,value)

+

set a netCDF dataset or group attribute using name,value pair. +Use if you need to set a netCDF attribute with the +with the same name as one of the reserved python attributes.

+
+
+def setncattr_string(self, name, value) +
+
+

setncattr_string(self,name,value)

+

set a netCDF dataset or group string attribute using name,value pair. +Use if you need to ensure that a netCDF attribute is created with type +NC_STRING if the file format is NETCDF4.

+
+
+def setncatts(self, attdict) +
+
+

setncatts(self,attdict)

+

set a bunch of netCDF dataset or group attributes at once using a python dictionary. +This may be faster when setting a lot of attributes for a NETCDF3 +formatted file, since nc_redef/nc_enddef is not called in between setting +each attribute

+
+
+def sync(self) +
+
+

sync(self)

+

Writes all buffered data in the Dataset to the disk file.

+
+
+def tocdl(self, coordvars=False, data=False, outfile=None) +
+
+

tocdl(self, coordvars=False, data=False, outfile=None)

+

call ncdump via subprocess to create CDL +text representation of Dataset. Requires ncdump +to be installed and in $PATH.

+

coordvars: include coordinate variable data (via ncdump -c). Default False

+

data: if True, write out variable data (Default False).

+

outfile: If not None, file to output ncdump to. Default is to return a string.

+
+
+
+
+class Dimension +(...) +
+
+

A netCDF Dimension is used to describe the coordinates of a Variable. +See Dimension for more details.

+

The current maximum size of a Dimension instance can be obtained by +calling the python len function on the Dimension instance. The +Dimension.isunlimited() method of a Dimension instance can be used to +determine if the dimension is unlimited.

+

Read-only class variables:

+

name: String name, used when creating a Variable with +Dataset.createVariable().

+

size: Current Dimension size (same as len(d), where d is a +Dimension instance).

+

__init__(self, group, name, size=None)

+

Dimension constructor.

+

group: Group instance to associate with dimension.

+

name: Name of the dimension.

+

size: Size of the dimension. None or 0 means unlimited. (Default None).

+

Note: Dimension instances should be created using the +Dataset.createDimension() method of a Group or +Dataset instance, not using Dimension directly.

+

Instance variables

+
+
var name
+
+

string name of Dimension instance

+
+
var size
+
+

current size of Dimension (calls len on Dimension instance)

+
+
+

Methods

+
+
+def group(self) +
+
+

group(self)

+

return the group that this Dimension is a member of.

+
+
+def isunlimited(self) +
+
+

isunlimited(self)

+

returns True if the Dimension instance is unlimited, False otherwise.

+
+
+
+
+class EnumType +(...) +
+
+

A EnumType instance is used to describe an Enum data +type, and can be passed to the the Dataset.createVariable() method of +a Dataset or Group instance. See +EnumType for more details.

+

The instance variables dtype, name and enum_dict should not be modified by +the user.

+

__init__(group, datatype, datatype_name, enum_dict)

+

EnumType constructor.

+

group: Group instance to associate with the VLEN datatype.

+

datatype: An numpy integer dtype object describing the base type +for the Enum.

+

datatype_name: a Python string containing a description of the +Enum data type.

+

enum_dict: a Python dictionary containing the Enum field/value +pairs.

+

Note: EnumType instances should be created using the +Dataset.createEnumType() method of a Dataset or +Group instance, not using this class directly.

+

Instance variables

+
+
var dtype
+
+
+
+
var enum_dict
+
+
+
+
var name
+
+
+
+
+
+
+class Group +(...) +
+
+

Groups define a hierarchical namespace within a netCDF file. They are +analogous to directories in a unix filesystem. Each Group behaves like +a Dataset within a Dataset, and can contain it's own variables, +dimensions and attributes (and other Groups). See Group +for more details.

+

Group inherits from Dataset, so all the +Dataset class methods and variables are available +to a Group instance (except the close method).

+

Additional read-only class variables:

+

name: String describing the group name.

+

__init__(self, parent, name) +Group constructor.

+

parent: Group instance for the parent group. +If being created +in the root group, use a Dataset instance.

+

name: - Name of the group.

+

Note: Group instances should be created using the +Dataset.createGroup() method of a Dataset instance, or +another Group instance, not using this class directly.

+

Ancestors

+
    +
  • netCDF4._netCDF4.Dataset
  • +
+

Methods

+
+
+def close(self) +
+
+

close(self)

+

overrides Dataset close method which does not apply to Group +instances, raises OSError.

+
+
+
+
+class MFDataset +(files, check=False, aggdim=None, exclude=[], master_file=None) +
+
+

Class for reading multi-file netCDF Datasets, making variables +spanning multiple files appear as if they were in one file. +Datasets must be in NETCDF4_CLASSIC, NETCDF3_CLASSIC, NETCDF3_64BIT_OFFSET +or NETCDF3_64BIT_DATA format (NETCDF4 Datasets won't work).

+

Adapted from pycdf by Andre Gosselin.

+

Example usage (See MFDataset for more details):

+
>>> import numpy as np
+>>> # create a series of netCDF files with a variable sharing
+>>> # the same unlimited dimension.
+>>> for nf in range(10):
+...     with Dataset("mftest%s.nc" % nf, "w", format='NETCDF4_CLASSIC') as f:
+...         f.createDimension("x",None)
+...         x = f.createVariable("x","i",("x",))
+...         x[0:10] = np.arange(nf*10,10*(nf+1))
+>>> # now read all those files in at once, in one Dataset.
+>>> f = MFDataset("mftest*nc")
+>>> print(f.variables["x"][:])
+[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
+ 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
+ 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
+ 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
+ 96 97 98 99]
+
+

__init__(self, files, check=False, aggdim=None, exclude=[], +master_file=None)

+

Open a Dataset spanning multiple files, making it look as if it was a +single file. Variables in the list of files that share the same +dimension (specified with the keyword aggdim) are aggregated. If +aggdim is not specified, the unlimited is aggregated. +Currently, +aggdim must be the leftmost (slowest varying) dimension of each +of the variables to be aggregated.

+

files: either a sequence of netCDF files or a string with a +wildcard (converted to a sorted list of files using glob) +If +the master_file kwarg is not specified, the first file +in the list will become the "master" file, defining all the +variables with an aggregation dimension which may span +subsequent files. Attribute access returns attributes only from "master" +file. The files are always opened in read-only mode.

+

check: True if you want to do consistency checking to ensure the +correct variables structure for all of the netcdf files. +Checking makes +the initialization of the MFDataset instance much slower. Default is +False.

+

aggdim: The name of the dimension to aggregate over (must +be the leftmost dimension of each of the variables to be aggregated). +If None (default), aggregate over the unlimited dimension.

+

exclude: A list of variable names to exclude from aggregation. +Default is an empty list.

+

master_file: file to use as "master file", defining all the +variables with an aggregation dimension and all global attributes.

+

Ancestors

+
    +
  • netCDF4._netCDF4.Dataset
  • +
+

Methods

+
+
+def close(self) +
+
+

close(self)

+

close all the open files.

+
+
+def isopen(self) +
+
+

isopen(self)

+

True if all files are open, False otherwise.

+
+
+def ncattrs(self) +
+
+

ncattrs(self)

+

return the netcdf attribute names from the master file.

+
+
+
+
+class MFTime +(time, units=None, calendar=None) +
+
+

Class providing an interface to a MFDataset time Variable by imposing a unique common +time unit and/or calendar to all files.

+

Example usage (See MFTime for more details):

+
>>> import numpy as np
+>>> f1 = Dataset("mftest_1.nc","w", format="NETCDF4_CLASSIC")
+>>> f2 = Dataset("mftest_2.nc","w", format="NETCDF4_CLASSIC")
+>>> f1.createDimension("time",None)
+>>> f2.createDimension("time",None)
+>>> t1 = f1.createVariable("time","i",("time",))
+>>> t2 = f2.createVariable("time","i",("time",))
+>>> t1.units = "days since 2000-01-01"
+>>> t2.units = "days since 2000-02-01"
+>>> t1.calendar = "standard"
+>>> t2.calendar = "standard"
+>>> t1[:] = np.arange(31)
+>>> t2[:] = np.arange(30)
+>>> f1.close()
+>>> f2.close()
+>>> # Read the two files in at once, in one Dataset.
+>>> f = MFDataset("mftest_*nc")
+>>> t = f.variables["time"]
+>>> print(t.units)
+days since 2000-01-01
+>>> print(t[32])  # The value written in the file, inconsistent with the MF time units.
+1
+>>> T = MFTime(t)
+>>> print(T[32])
+32
+
+

__init__(self, time, units=None, calendar=None)

+

Create a time Variable with units consistent across a multifile +dataset.

+

time: Time variable from a MFDataset.

+

units: Time units, for example, 'days since 1979-01-01'. If None, +use the units from the master variable.

+

calendar: Calendar overload to use across all files, for example, +'standard' or 'gregorian'. If None, check that the calendar attribute +is present on each variable and values are unique across files raising a +ValueError otherwise.

+

Ancestors

+
    +
  • netCDF4._netCDF4._Variable
  • +
+
+
+class VLType +(...) +
+
+

A VLType instance is used to describe a variable length (VLEN) data +type, and can be passed to the the Dataset.createVariable() method of +a Dataset or Group instance. See +VLType for more details.

+

The instance variables dtype and name should not be modified by +the user.

+

__init__(group, datatype, datatype_name)

+

VLType constructor.

+

group: Group instance to associate with the VLEN datatype.

+

datatype: An numpy dtype object describing the component type for the +variable length array.

+

datatype_name: a Python string containing a description of the +VLEN data type.

+

Note: VLType instances should be created using the +Dataset.createVLType() method of a Dataset or +Group instance, not using this class directly.

+

Instance variables

+
+
var dtype
+
+
+
+
var name
+
+
+
+
+
+
+class Variable +(...) +
+
+

A netCDF Variable is used to read and write netCDF data. +They are +analogous to numpy array objects. See Variable for more +details.

+

A list of attribute names corresponding to netCDF attributes defined for +the variable can be obtained with the Variable.ncattrs() method. These +attributes can be created by assigning to an attribute of the +Variable instance. A dictionary containing all the netCDF attribute +name/value pairs is provided by the __dict__ attribute of a +Variable instance.

+

The following class variables are read-only:

+

dimensions: A tuple containing the names of the +dimensions associated with this variable.

+

dtype: A numpy dtype object describing the +variable's data type.

+

ndim: The number of variable dimensions.

+

shape: A tuple with the current shape (length of all dimensions).

+

scale: If True, scale_factor and add_offset are +applied, and signed integer data is automatically converted to +unsigned integer data if the _Unsigned attribute is set to "true" or "True". +Default is True, can be reset using Variable.set_auto_scale() and +Variable.set_auto_maskandscale() methods.

+

mask: If True, data is automatically converted to/from masked +arrays when missing values or fill values are present. Default is True, can be +reset using Variable.set_auto_mask() and Variable.set_auto_maskandscale() +methods. Only relevant for Variables with primitive or enum types (ignored +for compound and vlen Variables).

+

chartostring(): If True, data is automatically converted to/from character +arrays to string arrays when the _Encoding variable attribute is set. +Default is True, can be reset using +Variable.set_auto_chartostring() method.

+

least_significant_digit: Describes the power of ten of the +smallest decimal place in the data the contains a reliable value. +Data is +truncated to this decimal place when it is assigned to the Variable +instance. If None, the data is not truncated.

+

significant_digits: New in version 1.6.0. Describes the number of significant +digits in the data the contains a reliable value. +Data is +truncated to retain this number of significant digits when it is assigned to the +Variable instance. If None, the data is not truncated. +Only available with netcdf-c >= 4.9.0, +and only works with NETCDF4 or NETCDF4_CLASSIC formatted files. +The number of significant digits used in the quantization of variable data can be +obtained using the Variable.significant_digits method. Default None - +no quantization done.

+

quantize_mode: New in version 1.6.0. Controls +the quantization algorithm (default 'BitGroom', 'BitRound' and +'GranularBitRound' also available). +The 'GranularBitRound' +algorithm may result in better compression for typical geophysical datasets. +Ignored if significant_digits not specified. If 'BitRound' is used, then +significant_digits is interpreted as binary (not decimal) digits.

+

__orthogonal_indexing__: Always True. +Indicates to client code +that the object supports 'orthogonal indexing', which means that slices +that are 1d arrays or lists slice along each dimension independently. +This +behavior is similar to Fortran or Matlab, but different than numpy.

+

datatype: numpy data type (for primitive data types) or VLType/CompoundType +instance (for compound or vlen data types).

+

name: String name.

+

size: The number of stored elements.

+

__init__(self, group, name, datatype, dimensions=(), compression=None, zlib=False, +complevel=4, shuffle=True, szip_coding='nn', szip_pixels_per_block=8, +blosc_shuffle=1, fletcher32=False, contiguous=False, +chunksizes=None, endian='native', +least_significant_digit=None,fill_value=None,chunk_cache=None)

+

Variable constructor.

+

group: Group or Dataset instance to associate with variable.

+

name: Name of the variable.

+

datatype: Variable data type. Can be specified by providing a +numpy dtype object, or a string that describes a numpy dtype object. +Supported values, corresponding to str attribute of numpy dtype +objects, include 'f4' (32-bit floating point), 'f8' (64-bit floating +point), 'i4' (32-bit signed integer), 'i2' (16-bit signed integer), +'i8' (64-bit signed integer), 'i4' (8-bit signed integer), 'i1' +(8-bit signed integer), 'u1' (8-bit unsigned integer), 'u2' (16-bit +unsigned integer), 'u4' (32-bit unsigned integer), 'u8' (64-bit +unsigned integer), or 'S1' (single-character string). +From +compatibility with Scientific.IO.NetCDF, the old Numeric single character +typecodes can also be used ('f' instead of 'f4', 'd' instead of +'f8', 'h' or 's' instead of 'i2', 'b' or 'B' instead of +'i1', 'c' instead of 'S1', and 'i' or 'l' instead of +'i4'). datatype can also be a CompoundType instance +(for a structured, or compound array), a VLType instance +(for a variable-length array), or the python str builtin +(for a variable-length string array). Numpy string and unicode datatypes with +length greater than one are aliases for str.

+

dimensions: a tuple containing the variable's Dimension instances +(defined previously with createDimension). Default is an empty tuple +which means the variable is a scalar (and therefore has no dimensions).

+

compression: compression algorithm to use. +Currently zlib,szip,zstd,bzip2,blosc_lz,blosc_lz4,blosc_lz4hc, +blosc_zlib and blosc_zstd are supported. +Default is None (no compression). +All of the compressors except +zlib and szip use the HDF5 plugin architecture.

+

zlib: if True, data assigned to the Variable +instance is compressed on disk. Default False. Deprecated - use +compression='zlib' instead.

+

complevel: the level of compression to use (1 is the fastest, +but poorest compression, 9 is the slowest but best compression). Default 4. +Ignored if compression=None or szip. A value of 0 disables compression.

+

shuffle: if True, the HDF5 shuffle filter is applied +to improve zlib compression. Default True. Ignored unless compression = 'zlib'.

+

blosc_shuffle: shuffle filter inside blosc compressor (only +relevant if compression kwarg set to one of the blosc compressors). +Can be 0 (no blosc shuffle), 1 (bytewise shuffle) or 2 (bitwise +shuffle)). Default is 1. Ignored if blosc compressor not used.

+

szip_coding: szip coding method. Can be ec (entropy coding) +or nn (nearest neighbor coding). Default is nn. +Ignored if szip compressor not used.

+

szip_pixels_per_block: Can be 4,8,16 or 32 (Default 8). +Ignored if szip compressor not used.

+

fletcher32: if True (default False), the Fletcher32 checksum +algorithm is used for error detection.

+

contiguous: if True (default False), the variable data is +stored contiguously on disk. +Default False. Setting to True for +a variable with an unlimited dimension will trigger an error. Fixed +size variables (with no unlimited dimension) with no compression +filters are contiguous by default.

+

chunksizes: Can be used to specify the HDF5 chunksizes for each +dimension of the variable. A detailed discussion of HDF chunking and I/O +performance is available +here. +The default chunking scheme in the netcdf-c library is discussed +here. +Basically, you want the chunk size for each dimension to match as +closely as possible the size of the data block that users will read +from the file. chunksizes cannot be set if contiguous=True.

+

endian: Can be used to control whether the +data is stored in little or big endian format on disk. Possible +values are little, big or native (default). The library +will automatically handle endian conversions when the data is read, +but if the data is always going to be read on a computer with the +opposite format as the one used to create the file, there may be +some performance advantage to be gained by setting the endian-ness. +For netCDF 3 files (that don't use HDF5), only endian='native' is allowed.

+

The compression, zlib, complevel, shuffle, fletcher32, contiguous and chunksizes +keywords are silently ignored for netCDF 3 files that do not use HDF5.

+

least_significant_digit: If this or significant_digits are specified, +variable data will be truncated (quantized). +In conjunction with compression='zlib' this produces +'lossy', but significantly more efficient compression. For example, if +least_significant_digit=1, data will be quantized using +around(scaledata)/scale, where scale = 2*bits, and bits is determined +so that a precision of 0.1 is retained (in this case bits=4). Default is +None, or no quantization.

+

significant_digits: New in version 1.6.0. +As described for least_significant_digit +except the number of significant digits retained is prescribed independent +of the floating point exponent. Default None - no quantization done.

+

quantize_mode: New in version 1.6.0. Controls +the quantization algorithm (default 'BitGroom', 'BitRound' and +'GranularBitRound' also available). +The 'GranularBitRound' +algorithm may result in better compression for typical geophysical datasets. +Ignored if significant_digts not specified. If 'BitRound' is used, then +significant_digits is interpreted as binary (not decimal) digits.

+

fill_value: +If specified, the default netCDF fill value (the +value that the variable gets filled with before any data is written to it) +is replaced with this value, and the _FillValue attribute is set. +If fill_value is set to False, then the variable is not pre-filled. +The default netCDF fill values can be found in the dictionary netCDF4.default_fillvals. +If not set, the default fill value will be used but no _FillValue attribute will be created +(this is the default behavior of the netcdf-c library). If you want to use the +default fill value, but have the _FillValue attribute set, use +fill_value='default' (note - this only works for primitive data types). Variable.get_fill_value() +can be used to retrieve the fill value, even if the _FillValue attribute is not set.

+

chunk_cache: If specified, sets the chunk cache size for this variable. +Persists as long as Dataset is open. Use set_var_chunk_cache to +change it when Dataset is re-opened.

+

Note: Variable instances should be created using the +Dataset.createVariable() method of a Dataset or +Group instance, not using this class directly.

+

Instance variables

+
+
var always_mask
+
+
+
+
var auto_complex
+
+
+
+
var chartostring
+
+
+
+
var datatype
+
+

numpy data type (for primitive data types) or +VLType/CompoundType/EnumType instance +(for compound, vlen +or enum data types)

+
+
var dimensions
+
+

get variables's dimension names

+
+
var dtype
+
+
+
+
var mask
+
+
+
+
var name
+
+

string name of Variable instance

+
+
var ndim
+
+
+
+
var scale
+
+
+
+
var shape
+
+

find current sizes of all variable dimensions

+
+
var size
+
+

Return the number of stored elements.

+
+
+

Methods

+
+
+def assignValue(self, val) +
+
+

assignValue(self, val)

+

assign a value to a scalar variable. +Provided for compatibility with +Scientific.IO.NetCDF, can also be done by assigning to an Ellipsis slice ([…]).

+
+
+def chunking(self) +
+
+

chunking(self)

+

return variable chunking information. +If the dataset is +defined to be contiguous (and hence there is no chunking) the word 'contiguous' +is returned. +Otherwise, a sequence with the chunksize for +each dimension is returned.

+
+
+def delncattr(self, name) +
+
+

delncattr(self,name,value)

+

delete a netCDF variable attribute. +Use if you need to delete a +netCDF attribute with the same name as one of the reserved python +attributes.

+
+
+def endian(self) +
+
+

endian(self)

+

return endian-ness (little,big,native) of variable (as stored in HDF5 file).

+
+
+def filters(self) +
+
+

filters(self)

+

return dictionary containing HDF5 filter parameters.

+
+
+def getValue(self) +
+
+

getValue(self)

+

get the value of a scalar variable. +Provided for compatibility with +Scientific.IO.NetCDF, can also be done by slicing with an Ellipsis ([…]).

+
+
+def get_dims(self) +
+
+

get_dims(self)

+

return a tuple of Dimension instances associated with this +Variable.

+
+
+def get_fill_value(self) +
+
+

get_fill_value(self)

+

return the fill value associated with this Variable (returns None if data is not +pre-filled). Works even if default fill value was used, and _FillValue attribute +does not exist.

+
+
+def get_var_chunk_cache(self) +
+
+

get_var_chunk_cache(self)

+

return variable chunk cache information in a tuple (size,nelems,preemption). +See netcdf C library documentation for nc_get_var_chunk_cache for +details.

+
+
+def getncattr(self, name, encoding='utf-8') +
+
+

getncattr(self,name)

+

retrieve a netCDF variable attribute. +Use if you need to set a +netCDF attribute with the same name as one of the reserved python +attributes.

+

option kwarg encoding can be used to specify the +character encoding of a string attribute (default is utf-8).

+
+
+def group(self) +
+
+

group(self)

+

return the group that this Variable is a member of.

+
+
+def ncattrs(self) +
+
+

ncattrs(self)

+

return netCDF attribute names for this Variable in a list.

+
+
+def quantization(self) +
+
+

quantization(self)

+

return number of significant digits and the algorithm used in quantization. +Returns None if quantization not active.

+
+
+def renameAttribute(self, oldname, newname) +
+
+

renameAttribute(self, oldname, newname)

+

rename a Variable attribute named oldname to newname.

+
+
+def set_always_mask(self, always_mask) +
+
+

set_always_mask(self,always_mask)

+

turn on or off conversion of data without missing values to regular +numpy arrays.

+

always_mask is a Boolean determining if automatic conversion of +masked arrays with no missing values to regular numpy arrays shall be +applied. Default is True. Set to False to restore the default behaviour +in versions prior to 1.4.1 (numpy array returned unless missing values are present, +otherwise masked array returned).

+
+
+def set_auto_chartostring(self, chartostring) +
+
+

set_auto_chartostring(self,chartostring())

+

turn on or off automatic conversion of character variable data to and +from numpy fixed length string arrays when the _Encoding variable attribute +is set.

+

If chartostring() is set to True, when data is read from a character variable +(dtype = S1) that has an _Encoding attribute, it is converted to a numpy +fixed length unicode string array (dtype = UN, where N is the length +of the the rightmost dimension of the variable). +The value of _Encoding +is the unicode encoding that is used to decode the bytes into strings.

+

When numpy string data is written to a variable it is converted back to +individual bytes, with the number of bytes in each string equalling the +rightmost dimension of the variable.

+

The default value of chartostring() is True +(automatic conversions are performed).

+
+
+def set_auto_mask(self, mask) +
+
+

set_auto_mask(self,mask)

+

turn on or off automatic conversion of variable data to and +from masked arrays .

+

If mask is set to True, when data is read from a variable +it is converted to a masked array if any of the values are exactly +equal to the either the netCDF _FillValue or the value specified by the +missing_value variable attribute. The fill_value of the masked array +is set to the missing_value attribute (if it exists), otherwise +the netCDF _FillValue attribute (which has a default value +for each data type). If the variable has no missing_value attribute, the +_FillValue is used instead. If the variable has valid_min/valid_max and +missing_value attributes, data outside the specified range will be masked. +When data is written to a variable, the masked +array is converted back to a regular numpy array by replacing all the +masked values by the missing_value attribute of the variable (if it +exists). +If the variable has no missing_value attribute, the _FillValue +is used instead.

+

The default value of mask is True +(automatic conversions are performed).

+
+
+def set_auto_maskandscale(self, maskandscale) +
+
+

set_auto_maskandscale(self,maskandscale)

+

turn on or off automatic conversion of variable data to and +from masked arrays, automatic packing/unpacking of variable +data using scale_factor and add_offset attributes and +automatic conversion of signed integer data to unsigned integer +data if the _Unsigned attribute exists and is set to "true" (or "True").

+

If maskandscale is set to True, when data is read from a variable +it is converted to a masked array if any of the values are exactly +equal to the either the netCDF _FillValue or the value specified by the +missing_value variable attribute. The fill_value of the masked array +is set to the missing_value attribute (if it exists), otherwise +the netCDF _FillValue attribute (which has a default value +for each data type). If the variable has no missing_value attribute, the +_FillValue is used instead. If the variable has valid_min/valid_max and +missing_value attributes, data outside the specified range will be masked. +When data is written to a variable, the masked +array is converted back to a regular numpy array by replacing all the +masked values by the missing_value attribute of the variable (if it +exists). +If the variable has no missing_value attribute, the _FillValue +is used instead.

+

If maskandscale is set to True, and the variable has a +scale_factor or an add_offset attribute, then data read +from that variable is unpacked using::

+
data = self.scale_factor*data + self.add_offset
+
+

When data is written to a variable it is packed using::

+
data = (data - self.add_offset)/self.scale_factor
+
+

If either scale_factor is present, but add_offset is missing, add_offset +is assumed zero. +If add_offset is present, but scale_factor is missing, +scale_factor is assumed to be one. +For more information on how scale_factor and add_offset can be +used to provide simple compression, see the +PSL metadata conventions.

+

In addition, if maskandscale is set to True, and if the variable has an +attribute _Unsigned set to "true", and the variable has a signed integer data type, +a view to the data is returned with the corresponding unsigned integer data type. +This convention is used by the netcdf-java library to save unsigned integer +data in NETCDF3 or NETCDF4_CLASSIC files (since the NETCDF3 +data model does not have unsigned integer data types).

+

The default value of maskandscale is True +(automatic conversions are performed).

+
+
+def set_auto_scale(self, scale) +
+
+

set_auto_scale(self,scale)

+

turn on or off automatic packing/unpacking of variable +data using scale_factor and add_offset attributes. +Also turns on and off automatic conversion of signed integer data +to unsigned integer data if the variable has an _Unsigned +attribute set to "true" or "True".

+

If scale is set to True, and the variable has a +scale_factor or an add_offset attribute, then data read +from that variable is unpacked using::

+
data = self.scale_factor*data + self.add_offset
+
+

When data is written to a variable it is packed using::

+
data = (data - self.add_offset)/self.scale_factor
+
+

If either scale_factor is present, but add_offset is missing, add_offset +is assumed zero. +If add_offset is present, but scale_factor is missing, +scale_factor is assumed to be one. +For more information on how scale_factor and add_offset can be +used to provide simple compression, see the +PSL metadata conventions.

+

In addition, if scale is set to True, and if the variable has an +attribute _Unsigned set to "true", and the variable has a signed integer data type, +a view to the data is returned with the corresponding unsigned integer datatype. +This convention is used by the netcdf-java library to save unsigned integer +data in NETCDF3 or NETCDF4_CLASSIC files (since the NETCDF3 +data model does not have unsigned integer data types).

+

The default value of scale is True +(automatic conversions are performed).

+
+
+def set_collective(self, value) +
+
+

set_collective(self,True_or_False)

+

turn on or off collective parallel IO access. Ignored if file is not +open for parallel access.

+
+
+def set_ncstring_attrs(self, ncstring_attrs) +
+
+

set_always_mask(self,ncstring_attrs)

+

turn on or off creating NC_STRING string attributes.

+

If ncstring_attrs is set to True then text attributes will be variable-length +NC_STRINGs.

+

The default value of ncstring_attrs is False (writing ascii text attributes as +NC_CHAR).

+
+
+def set_var_chunk_cache(self, size=None, nelems=None, preemption=None) +
+
+

set_var_chunk_cache(self,size=None,nelems=None,preemption=None)

+

change variable chunk cache settings. +See netcdf C library documentation for nc_set_var_chunk_cache for +details.

+
+
+def setncattr(self, name, value) +
+
+

setncattr(self,name,value)

+

set a netCDF variable attribute using name,value pair. +Use if you need to set a +netCDF attribute with the same name as one of the reserved python +attributes.

+
+
+def setncattr_string(self, name, value) +
+
+

setncattr_string(self,name,value)

+

set a netCDF variable string attribute using name,value pair. +Use if you need to ensure that a netCDF attribute is created with type +NC_STRING if the file format is NETCDF4. +Use if you need to set an attribute to an array of variable-length strings.

+
+
+def setncatts(self, attdict) +
+
+

setncatts(self,attdict)

+

set a bunch of netCDF variable attributes at once using a python dictionary. +This may be faster when setting a lot of attributes for a NETCDF3 +formatted file, since nc_redef/nc_enddef is not called in between setting +each attribute

+
+
+def use_nc_get_vars(self, use_nc_get_vars) +
+
+

use_nc_get_vars(self,_use_get_vars)

+

enable the use of netcdf library routine nc_get_vars +to retrieve strided variable slices. +By default, +nc_get_vars may not used by default (depending on the +version of the netcdf-c library being used) since it may be +slower than multiple calls to the unstrided read routine nc_get_vara.

+
+
+
+
+
+
+ +
+ + + diff --git a/docs/netCDF4/index.html b/docs/netCDF4/index.html deleted file mode 100644 index bdf78cba9..000000000 --- a/docs/netCDF4/index.html +++ /dev/null @@ -1,6496 +0,0 @@ - - - - - - netCDF4 API documentation - - - - - - - - - - - - - - - -Top - -
- - - - -
- - - - - - -
-

netCDF4 module

-

Version 1.3.1

-
-

Introduction

-

netcdf4-python is a Python interface to the netCDF C library.

-

netCDF version 4 has many features -not found in earlier versions of the library and is implemented on top of -HDF5. This module can read and write -files in both the new netCDF 4 and the old netCDF 3 format, and can create -files that are readable by HDF5 clients. The API modelled after -Scientific.IO.NetCDF, -and should be familiar to users of that module.

-

Most new features of netCDF 4 are implemented, such as multiple -unlimited dimensions, groups and zlib data compression. All the new -numeric data types (such as 64 bit and unsigned integer types) are -implemented. Compound (struct), variable length (vlen) and -enumerated (enum) data types are supported, but not the opaque data type. -Mixtures of compound, vlen and enum data types (such as -compound types containing enums, or vlens containing compound -types) are not supported.

-

Download

- -

Requires

-
    -
  • Python 2.7 or later (python 3 works too).
  • -
  • numpy array module, version 1.9.0 or later.
  • -
  • Cython, version 0.21 or later.
  • -
  • setuptools, version 18.0 or - later.
  • -
  • The HDF5 C library version 1.8.4-patch1 or higher (1.8.x recommended) - from . - netCDF version 4.4.1 or higher is recommended if using HDF5 1.10.x - - otherwise resulting files may be unreadable by clients using earlier - versions of HDF5. For netCDF < 4.4.1, HDF5 version 1.8.x is recommended. - Be sure to build with --enable-hl --enable-shared.
  • -
  • Libcurl, if you want - OPeNDAP support.
  • -
  • HDF4, if you want - to be able to read HDF4 "Scientific Dataset" (SD) files.
  • -
  • The netCDF-4 C library from the github releases - page. - Version 4.1.1 or higher is required (4.2 or higher recommended). - Be sure to build with --enable-netcdf-4 --enable-shared, and set - CPPFLAGS="-I $HDF5_DIR/include" and LDFLAGS="-L $HDF5_DIR/lib", - where $HDF5_DIR is the directory where HDF5 was installed. - If you want OPeNDAP support, add --enable-dap. - If you want HDF4 SD support, add --enable-hdf4 and add - the location of the HDF4 headers and library to $CPPFLAGS and $LDFLAGS.
  • -
  • for MPI parallel IO support, MPI-enabled versions of the HDF5 and netcdf - libraries are required, as is the mpi4py python - module.
  • -
-

Install

-
    -
  • install the requisite python modules and C libraries (see above). It's - easiest if all the C libs are built as shared libraries.
  • -
  • By default, the utility nc-config, installed with netcdf 4.1.2 or higher, - will be run used to determine where all the dependencies live.
  • -
  • If nc-config is not in your default $PATH - edit the setup.cfg file - in a text editor and follow the instructions in the comments. - In addition to specifying the path to nc-config, - you can manually set the paths to all the libraries and their include files - (in case nc-config does not do the right thing).
  • -
  • run python setup.py build, then python setup.py install (as root if - necessary).
  • -
  • pip install can - also be used, with library paths set with environment variables. To make - this work, the USE_SETUPCFG environment variable must be used to tell - setup.py not to use setup.cfg. - For example, USE_SETUPCFG=0 HDF5_INCDIR=/usr/include/hdf5/serial - HDF5_LIBDIR=/usr/lib/x86_64-linux-gnu/hdf5/serial pip install has been - shown to work on an Ubuntu/Debian linux system. Similarly, environment variables - (all capitalized) can be used to set the include and library paths for - hdf5, netCDF4, hdf4, szip, jpeg, curl and zlib. If the - libraries are installed in standard places (e.g. /usr or /usr/local), - the environment variables do not need to be set.
  • -
  • run the tests in the 'test' directory by running python run_all.py.
  • -
-

Tutorial

-
    -
  1. Creating/Opening/Closing a netCDF file.
  2. -
  3. Groups in a netCDF file.
  4. -
  5. Dimensions in a netCDF file.
  6. -
  7. Variables in a netCDF file.
  8. -
  9. Attributes in a netCDF file.
  10. -
  11. Writing data to and retrieving data from a netCDF variable.
  12. -
  13. Dealing with time coordinates.
  14. -
  15. Reading data from a multi-file netCDF dataset.
  16. -
  17. Efficient compression of netCDF variables.
  18. -
  19. Beyond homogeneous arrays of a fixed type - compound data types.
  20. -
  21. Variable-length (vlen) data types.
  22. -
  23. Enum data type.
  24. -
  25. Parallel IO.
  26. -
-

1) Creating/Opening/Closing a netCDF file.

-

To create a netCDF file from python, you simply call the Dataset -constructor. This is also the method used to open an existing netCDF -file. If the file is open for write access (mode='w', 'r+' or 'a'), you may -write any type of data including new dimensions, groups, variables and -attributes. netCDF files come in five flavors (NETCDF3_CLASSIC, -NETCDF3_64BIT_OFFSET, NETCDF3_64BIT_DATA, NETCDF4_CLASSIC, and NETCDF4). -NETCDF3_CLASSIC was the original netcdf binary format, and was limited -to file sizes less than 2 Gb. NETCDF3_64BIT_OFFSET was introduced -in version 3.6.0 of the library, and extended the original binary format -to allow for file sizes greater than 2 Gb. -NETCDF3_64BIT_DATA is a new format that requires version 4.4.0 of -the C library - it extends the NETCDF3_64BIT_OFFSET binary format to -allow for unsigned/64 bit integer data types and 64-bit dimension sizes. -NETCDF3_64BIT is an alias for NETCDF3_64BIT_OFFSET. -NETCDF4_CLASSIC files use the version 4 disk format (HDF5), but omits features -not found in the version 3 API. They can be read by netCDF 3 clients -only if they have been relinked against the netCDF 4 library. They can -also be read by HDF5 clients. NETCDF4 files use the version 4 disk -format (HDF5) and use the new features of the version 4 API. The -netCDF4 module can read and write files in any of these formats. When -creating a new file, the format may be specified using the format -keyword in the Dataset constructor. The default format is -NETCDF4. To see how a given file is formatted, you can examine the -data_model attribute. Closing the netCDF file is -accomplished via the close method of the Dataset -instance.

-

Here's an example:

-
>>> from netCDF4 import Dataset
->>> rootgrp = Dataset("test.nc", "w", format="NETCDF4")
->>> print rootgrp.data_model
-NETCDF4
->>> rootgrp.close()
-
- - -

Remote OPeNDAP-hosted datasets can be accessed for -reading over http if a URL is provided to the Dataset constructor instead of a -filename. However, this requires that the netCDF library be built with -OPenDAP support, via the --enable-dap configure option (added in -version 4.0.1).

-

2) Groups in a netCDF file.

-

netCDF version 4 added support for organizing data in hierarchical -groups, which are analogous to directories in a filesystem. Groups serve -as containers for variables, dimensions and attributes, as well as other -groups. A Dataset creates a special group, called -the 'root group', which is similar to the root directory in a unix -filesystem. To create Group instances, use the -createGroup method of a Dataset or Group -instance. createGroup takes a single argument, a -python string containing the name of the new group. The new Group -instances contained within the root group can be accessed by name using -the groups dictionary attribute of the Dataset instance. Only -NETCDF4 formatted files support Groups, if you try to create a Group -in a netCDF 3 file you will get an error message.

-
>>> rootgrp = Dataset("test.nc", "a")
->>> fcstgrp = rootgrp.createGroup("forecasts")
->>> analgrp = rootgrp.createGroup("analyses")
->>> print rootgrp.groups
-OrderedDict([("forecasts", 
-              <netCDF4._netCDF4.Group object at 0x1b4b7b0>),
-             ("analyses", 
-              <netCDF4._netCDF4.Group object at 0x1b4b970>)])
-
- - -

Groups can exist within groups in a Dataset, just as directories -exist within directories in a unix filesystem. Each Group instance -has a groups attribute dictionary containing all of the group -instances contained within that group. Each Group instance also has a -path attribute that contains a simulated unix directory path to -that group. To simplify the creation of nested groups, you can -use a unix-like path as an argument to createGroup.

-
>>> fcstgrp1 = rootgrp.createGroup("/forecasts/model1")
->>> fcstgrp2 = rootgrp.createGroup("/forecasts/model2")
-
- - -

If any of the intermediate elements of the path do not exist, they are created, -just as with the unix command 'mkdir -p'. If you try to create a group -that already exists, no error will be raised, and the existing group will be -returned.

-

Here's an example that shows how to navigate all the groups in a -Dataset. The function walktree is a Python generator that is used -to walk the directory tree. Note that printing the Dataset or Group -object yields summary information about it's contents.

-
>>> def walktree(top):
->>>     values = top.groups.values()
->>>     yield values
->>>     for value in top.groups.values():
->>>         for children in walktree(value):
->>>             yield children
->>> print rootgrp
->>> for children in walktree(rootgrp):
->>>      for child in children:
->>>          print child
-<type "netCDF4._netCDF4.Dataset">
-root group (NETCDF4 file format):
-    dimensions:
-    variables:
-    groups: forecasts, analyses
-<type "netCDF4._netCDF4.Group">
-group /forecasts:
-    dimensions:
-    variables:
-    groups: model1, model2
-<type "netCDF4._netCDF4.Group">
-group /analyses:
-    dimensions:
-    variables:
-    groups:
-<type "netCDF4._netCDF4.Group">
-group /forecasts/model1:
-    dimensions:
-    variables:
-    groups:
-<type "netCDF4._netCDF4.Group">
-group /forecasts/model2:
-    dimensions:
-    variables:
-    groups:
-
- - -

3) Dimensions in a netCDF file.

-

netCDF defines the sizes of all variables in terms of dimensions, so -before any variables can be created the dimensions they use must be -created first. A special case, not often used in practice, is that of a -scalar variable, which has no dimensions. A dimension is created using -the createDimension method of a Dataset -or Group instance. A Python string is used to set the name of the -dimension, and an integer value is used to set the size. To create an -unlimited dimension (a dimension that can be appended to), the size -value is set to None or 0. In this example, there both the time and -level dimensions are unlimited. Having more than one unlimited -dimension is a new netCDF 4 feature, in netCDF 3 files there may be only -one, and it must be the first (leftmost) dimension of the variable.

-
>>> level = rootgrp.createDimension("level", None)
->>> time = rootgrp.createDimension("time", None)
->>> lat = rootgrp.createDimension("lat", 73)
->>> lon = rootgrp.createDimension("lon", 144)
-
- - -

All of the Dimension instances are stored in a python dictionary.

-
>>> print rootgrp.dimensions
-OrderedDict([("level", <netCDF4._netCDF4.Dimension object at 0x1b48030>),
-             ("time", <netCDF4._netCDF4.Dimension object at 0x1b481c0>),
-             ("lat", <netCDF4._netCDF4.Dimension object at 0x1b480f8>),
-             ("lon", <netCDF4._netCDF4.Dimension object at 0x1b48a08>)])
-
- - -

Calling the python len function with a Dimension instance returns -the current size of that dimension. -The isunlimited method of a Dimension instance -can be used to determine if the dimensions is unlimited, or appendable.

-
>>> print len(lon)
-144
->>> print lon.isunlimited()
-False
->>> print time.isunlimited()
-True
-
- - -

Printing the Dimension object -provides useful summary info, including the name and length of the dimension, -and whether it is unlimited.

-
>>> for dimobj in rootgrp.dimensions.values():
->>>    print dimobj
-<type "netCDF4._netCDF4.Dimension"> (unlimited): name = "level", size = 0
-<type "netCDF4._netCDF4.Dimension"> (unlimited): name = "time", size = 0
-<type "netCDF4._netCDF4.Dimension">: name = "lat", size = 73
-<type "netCDF4._netCDF4.Dimension">: name = "lon", size = 144
-<type "netCDF4._netCDF4.Dimension"> (unlimited): name = "time", size = 0
-
- - -

Dimension names can be changed using the -netCDF4.Datatset.renameDimension method of a Dataset or -Group instance.

-

4) Variables in a netCDF file.

-

netCDF variables behave much like python multidimensional array objects -supplied by the numpy module. However, -unlike numpy arrays, netCDF4 variables can be appended to along one or -more 'unlimited' dimensions. To create a netCDF variable, use the -createVariable method of a Dataset or -Group instance. The createVariable method -has two mandatory arguments, the variable name (a Python string), and -the variable datatype. The variable's dimensions are given by a tuple -containing the dimension names (defined previously with -createDimension). To create a scalar -variable, simply leave out the dimensions keyword. The variable -primitive datatypes correspond to the dtype attribute of a numpy array. -You can specify the datatype as a numpy dtype object, or anything that -can be converted to a numpy dtype object. Valid datatype specifiers -include: 'f4' (32-bit floating point), 'f8' (64-bit floating -point), 'i4' (32-bit signed integer), 'i2' (16-bit signed -integer), 'i8' (64-bit signed integer), 'i1' (8-bit signed -integer), 'u1' (8-bit unsigned integer), 'u2' (16-bit unsigned -integer), 'u4' (32-bit unsigned integer), 'u8' (64-bit unsigned -integer), or 'S1' (single-character string). The old Numeric -single-character typecodes ('f','d','h', -'s','b','B','c','i','l'), corresponding to -('f4','f8','i2','i2','i1','i1','S1','i4','i4'), -will also work. The unsigned integer types and the 64-bit integer type -can only be used if the file format is NETCDF4.

-

The dimensions themselves are usually also defined as variables, called -coordinate variables. The createVariable -method returns an instance of the Variable class whose methods can be -used later to access and set variable data and attributes.

-
>>> times = rootgrp.createVariable("time","f8",("time",))
->>> levels = rootgrp.createVariable("level","i4",("level",))
->>> latitudes = rootgrp.createVariable("lat","f4",("lat",))
->>> longitudes = rootgrp.createVariable("lon","f4",("lon",))
->>> # two dimensions unlimited
->>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",))
-
- - -

To get summary info on a Variable instance in an interactive session, just print it.

-
>>> print temp
-<type "netCDF4._netCDF4.Variable">
-float32 temp(time, level, lat, lon)
-    least_significant_digit: 3
-    units: K
-unlimited dimensions: time, level
-current shape = (0, 0, 73, 144)
-
- - -

You can use a path to create a Variable inside a hierarchy of groups.

-
>>> ftemp = rootgrp.createVariable("/forecasts/model1/temp","f4",("time","level","lat","lon",))
-
- - -

If the intermediate groups do not yet exist, they will be created.

-

You can also query a Dataset or Group instance directly to obtain Group or -Variable instances using paths.

-
>>> print rootgrp["/forecasts/model1"] # a Group instance
-<type "netCDF4._netCDF4.Group">
-group /forecasts/model1:
-    dimensions(sizes):
-    variables(dimensions): float32 temp(time,level,lat,lon)
-    groups:
->>> print rootgrp["/forecasts/model1/temp"] # a Variable instance
-<type "netCDF4._netCDF4.Variable">
-float32 temp(time, level, lat, lon)
-path = /forecasts/model1
-unlimited dimensions: time, level
-current shape = (0, 0, 73, 144)
-filling on, default _FillValue of 9.96920996839e+36 used
-
- - -

All of the variables in the Dataset or Group are stored in a -Python dictionary, in the same way as the dimensions:

-
>>> print rootgrp.variables
-OrderedDict([("time", <netCDF4.Variable object at 0x1b4ba70>),
-             ("level", <netCDF4.Variable object at 0x1b4bab0>),
-             ("lat", <netCDF4.Variable object at 0x1b4baf0>),
-             ("lon", <netCDF4.Variable object at 0x1b4bb30>),
-             ("temp", <netCDF4.Variable object at 0x1b4bb70>)])
-
- - -

Variable names can be changed using the -renameVariable method of a Dataset -instance.

-

5) Attributes in a netCDF file.

-

There are two types of attributes in a netCDF file, global and variable. -Global attributes provide information about a group, or the entire -dataset, as a whole. Variable attributes provide information about -one of the variables in a group. Global attributes are set by assigning -values to Dataset or Group instance variables. Variable -attributes are set by assigning values to Variable instances -variables. Attributes can be strings, numbers or sequences. Returning to -our example,

-
>>> import time
->>> rootgrp.description = "bogus example script"
->>> rootgrp.history = "Created " + time.ctime(time.time())
->>> rootgrp.source = "netCDF4 python module tutorial"
->>> latitudes.units = "degrees north"
->>> longitudes.units = "degrees east"
->>> levels.units = "hPa"
->>> temp.units = "K"
->>> times.units = "hours since 0001-01-01 00:00:00.0"
->>> times.calendar = "gregorian"
-
- - -

The ncattrs method of a Dataset, Group or -Variable instance can be used to retrieve the names of all the netCDF -attributes. This method is provided as a convenience, since using the -built-in dir Python function will return a bunch of private methods -and attributes that cannot (or should not) be modified by the user.

-
>>> for name in rootgrp.ncattrs():
->>>     print "Global attr", name, "=", getattr(rootgrp,name)
-Global attr description = bogus example script
-Global attr history = Created Mon Nov  7 10.30:56 2005
-Global attr source = netCDF4 python module tutorial
-
- - -

The __dict__ attribute of a Dataset, Group or Variable -instance provides all the netCDF attribute name/value pairs in a python -dictionary:

-
>>> print rootgrp.__dict__
-OrderedDict([(u"description", u"bogus example script"),
-             (u"history", u"Created Thu Mar  3 19:30:33 2011"),
-             (u"source", u"netCDF4 python module tutorial")])
-
- - -

Attributes can be deleted from a netCDF Dataset, Group or -Variable using the python del statement (i.e. del grp.foo -removes the attribute foo the the group grp).

-

6) Writing data to and retrieving data from a netCDF variable.

-

Now that you have a netCDF Variable instance, how do you put data -into it? You can just treat it like an array and assign data to a slice.

-
>>> import numpy
->>> lats =  numpy.arange(-90,91,2.5)
->>> lons =  numpy.arange(-180,180,2.5)
->>> latitudes[:] = lats
->>> longitudes[:] = lons
->>> print "latitudes =\n",latitudes[:]
-latitudes =
-[-90.  -87.5 -85.  -82.5 -80.  -77.5 -75.  -72.5 -70.  -67.5 -65.  -62.5
- -60.  -57.5 -55.  -52.5 -50.  -47.5 -45.  -42.5 -40.  -37.5 -35.  -32.5
- -30.  -27.5 -25.  -22.5 -20.  -17.5 -15.  -12.5 -10.   -7.5  -5.   -2.5
-   0.    2.5   5.    7.5  10.   12.5  15.   17.5  20.   22.5  25.   27.5
-  30.   32.5  35.   37.5  40.   42.5  45.   47.5  50.   52.5  55.   57.5
-  60.   62.5  65.   67.5  70.   72.5  75.   77.5  80.   82.5  85.   87.5
-  90. ]
-
- - -

Unlike NumPy's array objects, netCDF Variable -objects with unlimited dimensions will grow along those dimensions if you -assign data outside the currently defined range of indices.

-
>>> # append along two unlimited dimensions by assigning to slice.
->>> nlats = len(rootgrp.dimensions["lat"])
->>> nlons = len(rootgrp.dimensions["lon"])
->>> print "temp shape before adding data = ",temp.shape
-temp shape before adding data =  (0, 0, 73, 144)
->>>
->>> from numpy.random import uniform
->>> temp[0:5,0:10,:,:] = uniform(size=(5,10,nlats,nlons))
->>> print "temp shape after adding data = ",temp.shape
-temp shape after adding data =  (6, 10, 73, 144)
->>>
->>> # levels have grown, but no values yet assigned.
->>> print "levels shape after adding pressure data = ",levels.shape
-levels shape after adding pressure data =  (10,)
-
- - -

Note that the size of the levels variable grows when data is appended -along the level dimension of the variable temp, even though no -data has yet been assigned to levels.

-
>>> # now, assign data to levels dimension variable.
->>> levels[:] =  [1000.,850.,700.,500.,300.,250.,200.,150.,100.,50.]
-
- - -

However, that there are some differences between NumPy and netCDF -variable slicing rules. Slices behave as usual, being specified as a -start:stop:step triplet. Using a scalar integer index i takes the ith -element and reduces the rank of the output array by one. Boolean array and -integer sequence indexing behaves differently for netCDF variables -than for numpy arrays. Only 1-d boolean arrays and integer sequences are -allowed, and these indices work independently along each dimension (similar -to the way vector subscripts work in fortran). This means that

-
>>> temp[0, 0, [0,1,2,3], [0,1,2,3]]
-
- - -

returns an array of shape (4,4) when slicing a netCDF variable, but for a -numpy array it returns an array of shape (4,). -Similarly, a netCDF variable of shape (2,3,4,5) indexed -with [0, array([True, False, True]), array([False, True, True, True]), :] -would return a (2, 3, 5) array. In NumPy, this would raise an error since -it would be equivalent to [0, [0,1], [1,2,3], :]. When slicing with integer -sequences, the indices need not be sorted and may contain -duplicates (both of these are new features in version 1.2.1). -While this behaviour may cause some confusion for those used to NumPy's 'fancy indexing' rules, -it provides a very powerful way to extract data from multidimensional netCDF -variables by using logical operations on the dimension arrays to create slices.

-

For example,

-
>>> tempdat = temp[::2, [1,3,6], lats>0, lons>0]
-
- - -

will extract time indices 0,2 and 4, pressure levels -850, 500 and 200 hPa, all Northern Hemisphere latitudes and Eastern -Hemisphere longitudes, resulting in a numpy array of shape (3, 3, 36, 71).

-
>>> print "shape of fancy temp slice = ",tempdat.shape
-shape of fancy temp slice =  (3, 3, 36, 71)
-
- - -

Special note for scalar variables: To extract data from a scalar variable -v with no associated dimensions, use np.asarray(v) or v[...]. The result -will be a numpy scalar array.

-

7) Dealing with time coordinates.

-

Time coordinate values pose a special challenge to netCDF users. Most -metadata standards (such as CF) specify that time should be -measure relative to a fixed date using a certain calendar, with units -specified like hours since YY-MM-DD hh:mm:ss. These units can be -awkward to deal with, without a utility to convert the values to and -from calendar dates. The function called num2date and date2num are -provided with this package to do just that. Here's an example of how they -can be used:

-
>>> # fill in times.
->>> from datetime import datetime, timedelta
->>> from netCDF4 import num2date, date2num
->>> dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(temp.shape[0])]
->>> times[:] = date2num(dates,units=times.units,calendar=times.calendar)
->>> print "time values (in units %s): " % times.units+"\n",times[:]
-time values (in units hours since January 1, 0001):
-[ 17533056.  17533068.  17533080.  17533092.  17533104.]
->>> dates = num2date(times[:],units=times.units,calendar=times.calendar)
->>> print "dates corresponding to time values:\n",dates
-dates corresponding to time values:
-[2001-03-01 00:00:00 2001-03-01 12:00:00 2001-03-02 00:00:00
- 2001-03-02 12:00:00 2001-03-03 00:00:00]
-
- - -

num2date converts numeric values of time in the specified units -and calendar to datetime objects, and date2num does the reverse. -All the calendars currently defined in the -CF metadata convention are supported. -A function called date2index is also provided which returns the indices -of a netCDF time variable corresponding to a sequence of datetime instances.

-

8) Reading data from a multi-file netCDF dataset.

-

If you want to read data from a variable that spans multiple netCDF files, -you can use the MFDataset class to read the data as if it were -contained in a single file. Instead of using a single filename to create -a Dataset instance, create a MFDataset instance with either a list -of filenames, or a string with a wildcard (which is then converted to -a sorted list of files using the python glob module). -Variables in the list of files that share the same unlimited -dimension are aggregated together, and can be sliced across multiple -files. To illustrate this, let's first create a bunch of netCDF files with -the same variable (with the same unlimited dimension). The files -must in be in NETCDF3_64BIT_OFFSET, NETCDF3_64BIT_DATA, NETCDF3_CLASSIC or -NETCDF4_CLASSIC format (NETCDF4 formatted multi-file -datasets are not supported).

-
>>> for nf in range(10):
->>>     f = Dataset("mftest%s.nc" % nf,"w")
->>>     f.createDimension("x",None)
->>>     x = f.createVariable("x","i",("x",))
->>>     x[0:10] = numpy.arange(nf*10,10*(nf+1))
->>>     f.close()
-
- - -

Now read all the files back in at once with MFDataset

-
>>> from netCDF4 import MFDataset
->>> f = MFDataset("mftest*nc")
->>> print f.variables["x"][:]
-[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
- 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
- 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
- 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]
-
- - -

Note that MFDataset can only be used to read, not write, multi-file -datasets.

-

9) Efficient compression of netCDF variables.

-

Data stored in netCDF 4 Variable objects can be compressed and -decompressed on the fly. The parameters for the compression are -determined by the zlib, complevel and shuffle keyword arguments -to the createVariable method. To turn on -compression, set zlib=True. The complevel keyword regulates the -speed and efficiency of the compression (1 being fastest, but lowest -compression ratio, 9 being slowest but best compression ratio). The -default value of complevel is 4. Setting shuffle=False will turn -off the HDF5 shuffle filter, which de-interlaces a block of data before -compression by reordering the bytes. The shuffle filter can -significantly improve compression ratios, and is on by default. Setting -fletcher32 keyword argument to -createVariable to True (it's False by -default) enables the Fletcher32 checksum algorithm for error detection. -It's also possible to set the HDF5 chunking parameters and endian-ness -of the binary data stored in the HDF5 file with the chunksizes -and endian keyword arguments to -createVariable. These keyword arguments only -are relevant for NETCDF4 and NETCDF4_CLASSIC files (where the -underlying file format is HDF5) and are silently ignored if the file -format is NETCDF3_CLASSIC, NETCDF3_64BIT_OFFSET or NETCDF3_64BIT_DATA.

-

If your data only has a certain number of digits of precision (say for -example, it is temperature data that was measured with a precision of -0.1 degrees), you can dramatically improve zlib compression by -quantizing (or truncating) the data using the least_significant_digit -keyword argument to createVariable. The least -significant digit is the power of ten of the smallest decimal place in -the data that is a reliable value. For example if the data has a -precision of 0.1, then setting least_significant_digit=1 will cause -data the data to be quantized using numpy.around(scale*data)/scale, where -scale = 2**bits, and bits is determined so that a precision of 0.1 is -retained (in this case bits=4). Effectively, this makes the compression -'lossy' instead of 'lossless', that is some precision in the data is -sacrificed for the sake of disk space.

-

In our example, try replacing the line

-
>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",))
-
- - -

with

-
>>> temp = dataset.createVariable("temp","f4",("time","level","lat","lon",),zlib=True)
-
- - -

and then

-
>>> temp = dataset.createVariable("temp","f4",("time","level","lat","lon",),zlib=True,least_significant_digit=3)
-
- - -

and see how much smaller the resulting files are.

-

10) Beyond homogeneous arrays of a fixed type - compound data types.

-

Compound data types map directly to numpy structured (a.k.a 'record') -arrays. Structured arrays are akin to C structs, or derived types -in Fortran. They allow for the construction of table-like structures -composed of combinations of other data types, including other -compound types. Compound types might be useful for representing multiple -parameter values at each point on a grid, or at each time and space -location for scattered (point) data. You can then access all the -information for a point by reading one variable, instead of reading -different parameters from different variables. Compound data types -are created from the corresponding numpy data type using the -createCompoundType method of a Dataset or Group instance. -Since there is no native complex data type in netcdf, compound types are handy -for storing numpy complex arrays. Here's an example:

-
>>> f = Dataset("complex.nc","w")
->>> size = 3 # length of 1-d complex array
->>> # create sample complex data.
->>> datac = numpy.exp(1j*(1.+numpy.linspace(0, numpy.pi, size)))
->>> # create complex128 compound data type.
->>> complex128 = numpy.dtype([("real",numpy.float64),("imag",numpy.float64)])
->>> complex128_t = f.createCompoundType(complex128,"complex128")
->>> # create a variable with this data type, write some data to it.
->>> f.createDimension("x_dim",None)
->>> v = f.createVariable("cmplx_var",complex128_t,"x_dim")
->>> data = numpy.empty(size,complex128) # numpy structured array
->>> data["real"] = datac.real; data["imag"] = datac.imag
->>> v[:] = data # write numpy structured array to netcdf compound var
->>> # close and reopen the file, check the contents.
->>> f.close(); f = Dataset("complex.nc")
->>> v = f.variables["cmplx_var"]
->>> datain = v[:] # read in all the data into a numpy structured array
->>> # create an empty numpy complex array
->>> datac2 = numpy.empty(datain.shape,numpy.complex128)
->>> # .. fill it with contents of structured array.
->>> datac2.real = datain["real"]; datac2.imag = datain["imag"]
->>> print datac.dtype,datac # original data
-complex128 [ 0.54030231+0.84147098j -0.84147098+0.54030231j  -0.54030231-0.84147098j]
->>>
->>> print datac2.dtype,datac2 # data from file
-complex128 [ 0.54030231+0.84147098j -0.84147098+0.54030231j  -0.54030231-0.84147098j]
-
- - -

Compound types can be nested, but you must create the 'inner' -ones first. All possible numpy structured arrays cannot be -represented as Compound variables - an error message will be -raise if you try to create one that is not supported. -All of the compound types defined for a Dataset or Group are stored -in a Python dictionary, just like variables and dimensions. As always, printing -objects gives useful summary information in an interactive session:

-
>>> print f
-<type "netCDF4._netCDF4.Dataset">
-root group (NETCDF4 file format):
-    dimensions: x_dim
-    variables: cmplx_var
-    groups:
-<type "netCDF4._netCDF4.Variable">
->>> print f.variables["cmplx_var"]
-compound cmplx_var(x_dim)
-compound data type: [("real", "<f8"), ("imag", "<f8")]
-unlimited dimensions: x_dim
-current shape = (3,)
->>> print f.cmptypes
-OrderedDict([("complex128", <netCDF4.CompoundType object at 0x1029eb7e8>)])
->>> print f.cmptypes["complex128"]
-<type "netCDF4._netCDF4.CompoundType">: name = "complex128", numpy dtype = [(u"real","<f8"), (u"imag", "<f8")]
-
- - -

11) Variable-length (vlen) data types.

-

NetCDF 4 has support for variable-length or "ragged" arrays. These are arrays -of variable length sequences having the same type. To create a variable-length -data type, use the createVLType method -method of a Dataset or Group instance.

-
>>> f = Dataset("tst_vlen.nc","w")
->>> vlen_t = f.createVLType(numpy.int32, "phony_vlen")
-
- - -

The numpy datatype of the variable-length sequences and the name of the -new datatype must be specified. Any of the primitive datatypes can be -used (signed and unsigned integers, 32 and 64 bit floats, and characters), -but compound data types cannot. -A new variable can then be created using this datatype.

-
>>> x = f.createDimension("x",3)
->>> y = f.createDimension("y",4)
->>> vlvar = f.createVariable("phony_vlen_var", vlen_t, ("y","x"))
-
- - -

Since there is no native vlen datatype in numpy, vlen arrays are represented -in python as object arrays (arrays of dtype object). These are arrays whose -elements are Python object pointers, and can contain any type of python object. -For this application, they must contain 1-D numpy arrays all of the same type -but of varying length. -In this case, they contain 1-D numpy int32 arrays of random length between -1 and 10.

-
>>> import random
->>> data = numpy.empty(len(y)*len(x),object)
->>> for n in range(len(y)*len(x)):
->>>    data[n] = numpy.arange(random.randint(1,10),dtype="int32")+1
->>> data = numpy.reshape(data,(len(y),len(x)))
->>> vlvar[:] = data
->>> print "vlen variable =\n",vlvar[:]
-vlen variable =
-[[[ 1  2  3  4  5  6  7  8  9 10] [1 2 3 4 5] [1 2 3 4 5 6 7 8]]
- [[1 2 3 4 5 6 7] [1 2 3 4 5 6] [1 2 3 4 5]]
- [[1 2 3 4 5] [1 2 3 4] [1]]
- [[ 1  2  3  4  5  6  7  8  9 10] [ 1  2  3  4  5  6  7  8  9 10]
-  [1 2 3 4 5 6 7 8]]]
->>> print f
-<type "netCDF4._netCDF4.Dataset">
-root group (NETCDF4 file format):
-    dimensions: x, y
-    variables: phony_vlen_var
-    groups:
->>> print f.variables["phony_vlen_var"]
-<type "netCDF4._netCDF4.Variable">
-vlen phony_vlen_var(y, x)
-vlen data type: int32
-unlimited dimensions:
-current shape = (4, 3)
->>> print f.VLtypes["phony_vlen"]
-<type "netCDF4._netCDF4.VLType">: name = "phony_vlen", numpy dtype = int32
-
- - -

Numpy object arrays containing python strings can also be written as vlen -variables, For vlen strings, you don't need to create a vlen data type. -Instead, simply use the python str builtin (or a numpy string datatype -with fixed length greater than 1) when calling the -createVariable method.

-
>>> z = f.createDimension("z",10)
->>> strvar = rootgrp.createVariable("strvar", str, "z")
-
- - -

In this example, an object array is filled with random python strings with -random lengths between 2 and 12 characters, and the data in the object -array is assigned to the vlen string variable.

-
>>> chars = "1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
->>> data = numpy.empty(10,"O")
->>> for n in range(10):
->>>     stringlen = random.randint(2,12)
->>>     data[n] = "".join([random.choice(chars) for i in range(stringlen)])
->>> strvar[:] = data
->>> print "variable-length string variable:\n",strvar[:]
-variable-length string variable:
-[aDy29jPt 5DS9X8 jd7aplD b8t4RM jHh8hq KtaPWF9cQj Q1hHN5WoXSiT MMxsVeq tdLUzvVTzj]
->>> print f
-<type "netCDF4._netCDF4.Dataset">
-root group (NETCDF4 file format):
-    dimensions: x, y, z
-    variables: phony_vlen_var, strvar
-    groups:
->>> print f.variables["strvar"]
-<type "netCDF4._netCDF4.Variable">
-vlen strvar(z)
-vlen data type: <type "str">
-unlimited dimensions:
-current size = (10,)
-
- - -

It is also possible to set contents of vlen string variables with numpy arrays -of any string or unicode data type. Note, however, that accessing the contents -of such variables will always return numpy arrays with dtype object.

-

12) Enum data type.

-

netCDF4 has an enumerated data type, which is an integer datatype that is -restricted to certain named values. Since Enums don't map directly to -a numpy data type, they are read and written as integer arrays.

-

Here's an example of using an Enum type to hold cloud type data. -The base integer data type and a python dictionary describing the allowed -values and their names are used to define an Enum data type using -createEnumType.

-
>>> nc = Dataset('clouds.nc','w')
->>> # python dict with allowed values and their names.
->>> enum_dict = {u'Altocumulus': 7, u'Missing': 255, 
->>> u'Stratus': 2, u'Clear': 0,
->>> u'Nimbostratus': 6, u'Cumulus': 4, u'Altostratus': 5,
->>> u'Cumulonimbus': 1, u'Stratocumulus': 3}
->>> # create the Enum type called 'cloud_t'.
->>> cloud_type = nc.createEnumType(numpy.uint8,'cloud_t',enum_dict)
->>> print cloud_type
-<type 'netCDF4._netCDF4.EnumType'>: name = 'cloud_t',
-numpy dtype = uint8, fields/values ={u'Cumulus': 4,
-u'Altocumulus': 7, u'Missing': 255,
-u'Stratus': 2, u'Clear': 0,
-u'Cumulonimbus': 1, u'Stratocumulus': 3,
-u'Nimbostratus': 6, u'Altostratus': 5}
-
- - -

A new variable can be created in the usual way using this data type. -Integer data is written to the variable that represents the named -cloud types in enum_dict. A ValueError will be raised if an attempt -is made to write an integer value not associated with one of the -specified names.

-
>>> time = nc.createDimension('time',None)
->>> # create a 1d variable of type 'cloud_type'.
->>> # The fill_value is set to the 'Missing' named value.
->>> cloud_var =
->>> nc.createVariable('primary_cloud',cloud_type,'time',
->>> fill_value=enum_dict['Missing'])
->>> # write some data to the variable.
->>> cloud_var[:] = [enum_dict['Clear'],enum_dict['Stratus'],
->>> enum_dict['Cumulus'],enum_dict['Missing'],
->>> enum_dict['Cumulonimbus']]
->>> nc.close()
->>> # reopen the file, read the data.
->>> nc = Dataset('clouds.nc')
->>> cloud_var = nc.variables['primary_cloud']
->>> print cloud_var
-<type 'netCDF4._netCDF4.Variable'>
-enum primary_cloud(time)
-    _FillValue: 255
-enum data type: uint8
-unlimited dimensions: time
-current shape = (5,)
->>> print cloud_var.datatype.enum_dict
-{u'Altocumulus': 7, u'Missing': 255, u'Stratus': 2,
-u'Clear': 0, u'Nimbostratus': 6, u'Cumulus': 4,
-u'Altostratus': 5, u'Cumulonimbus': 1,
-u'Stratocumulus': 3}
->>> print cloud_var[:]
-[0 2 4 -- 1]
->>> nc.close()
-
- - -

13) Parallel IO.

-

If MPI parallel enabled versions of netcdf and hdf5 are detected, and -mpi4py is installed, netcdf4-python will -be built with parallel IO capabilities enabled. To use parallel IO, -your program must be running in an MPI environment using -mpi4py.

-
>>> from mpi4py import MPI
->>> import numpy as np
->>> from netCDF4 import Dataset
->>> rank = MPI.COMM_WORLD.rank  # The process ID (integer 0-3 for 4-process run)
-
- - -

To run an MPI-based parallel program like this, you must use mpiexec to launch several -parallel instances of Python (for example, using mpiexec -np 4 python mpi_example.py). -The parallel features of netcdf4-python are mostly transparent - -when a new dataset is created or an existing dataset is opened, -use the parallel keyword to enable parallel access.

-
>>> nc = Dataset('parallel_tst.nc','w',parallel=True)
-
- - -

The optional comm keyword may be used to specify a particular -MPI communicator (MPI_COMM_WORLD is used by default). Each process (or rank) -can now write to the file indepedently. In this example the process rank is -written to a different variable index on each task

-
>>> d = nc.createDimension('dim',4)
->>> v = nc.createVariable('var', np.int, 'dim')
->>> v[rank] = rank
->>> nc.close()
-
-% ncdump parallel_test.nc
-netcdf parallel_test {
-dimensions:
-    dim = 4 ;
-    variables:
-    int64 var(dim) ;
-    data:
-
-    var = 0, 1, 2, 3 ;
-}
-
- - -

There are two types of parallel IO, independent (the default) and collective. -Independent IO means that each process can do IO independently. It should not -depend on or be affected by other processes. Collective IO is a way of doing -IO defined in the MPI-IO standard; unlike independent IO, all processes must -participate in doing IO. To toggle back and forth between -the two types of IO, use the set_collective -Variablemethod. All metadata -operations (such as creation of groups, types, variables, dimensions, or attributes) -are collective. There are a couple of important limitatons of parallel IO:

-
    -
  • If a variable has an unlimited dimension, appending data must be done in collective mode. - If the write is done in independent mode, the operation will fail with a - a generic "HDF Error".
  • -
  • You cannot write compressed data in parallel (although - you can read it).
  • -
  • You cannot use variable-length (VLEN) data types.
  • -
-

All of the code in this tutorial is available in examples/tutorial.py, except -the parallel IO example, which is in examples/mpi_example.py. -Unit tests are in the test directory.

-

contact: Jeffrey Whitaker jeffrey.s.whitaker@noaa.gov

-

copyright: 2008 by Jeffrey Whitaker.

-

license: Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby granted, -provided that the above copyright notice appear in all copies and that -both the copyright notice and this permission notice appear in -supporting documentation. -THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO -EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF -USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR -OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE.

-
- - -
- -
- -

Functions

- -
-
-

def chartostring(

b,encoding='utf-8')

-
- - - - -

convert a character array to a string array with one less dimension.

-

b: Input character array (numpy datatype 'S1' or 'U1'). -Will be converted to a array of strings, where each string has a fixed -length of b.shape[-1] characters.

-

optional kwarg encoding can be used to specify character encoding (default -utf-8).

-

returns a numpy string array with datatype 'UN' and shape -b.shape[:-1] where where N=b.shape[-1].

-
-
- -
- - -
-
-

def date2index(

dates, nctime, calendar=None, select='exact')

-
- - - - -

Return indices of a netCDF time variable corresponding to the given dates.

-

dates: A datetime object or a sequence of datetime objects. -The datetime objects should not include a time-zone offset.

-

nctime: A netCDF time variable object. The nctime object must have a -units attribute.

-

calendar: describes the calendar used in the time calculations. -All the values currently defined in the -CF metadata convention -Valid calendars 'standard', 'gregorian', 'proleptic_gregorian' -'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. -Default is 'standard', which is a mixed Julian/Gregorian calendar. -If calendar is None, its value is given by nctime.calendar or -standard if no such attribute exists.

-

select: 'exact', 'before', 'after', 'nearest' -The index selection method. exact will return the indices perfectly -matching the dates given. before and after will return the indices -corresponding to the dates just before or just after the given dates if -an exact match cannot be found. nearest will return the indices that -correspond to the closest dates.

-

returns an index (indices) of the netCDF time variable corresponding -to the given datetime object(s).

-
-
- -
- - -
-
-

def date2num(

dates,units,calendar='standard')

-
- - - - -

Return numeric time values given datetime objects. The units -of the numeric time values are described by the netCDF4.units argument -and the netCDF4.calendar keyword. The datetime objects must -be in UTC with no time-zone offset. If there is a -time-zone offset in units, it will be applied to the -returned numeric values.

-

dates: A datetime object or a sequence of datetime objects. -The datetime objects should not include a time-zone offset.

-

units: a string of the form <time units> since <reference time> -describing the time units. <time units> can be days, hours, minutes, -seconds, milliseconds or microseconds. <reference time> is the time -origin.

-

calendar: describes the calendar used in the time calculations. -All the values currently defined in the -CF metadata convention -Valid calendars 'standard', 'gregorian', 'proleptic_gregorian' -'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. -Default is 'standard', which is a mixed Julian/Gregorian calendar.

-

returns a numeric time value, or an array of numeric time values -with approximately millisecond accuracy.

-
-
- -
- - -
-
-

def getlibversion(

)

-
- - - - -

returns a string describing the version of the netcdf library -used to build the module, and when it was built.

-
-
- -
- - -
-
-

def num2date(

times,units,calendar='standard')

-
- - - - -

Return datetime objects given numeric time values. The units -of the numeric time values are described by the units argument -and the calendar keyword. The returned datetime objects represent -UTC with no time-zone offset, even if the specified -units contain a time-zone offset.

-

times: numeric time values.

-

units: a string of the form <time units> since <reference time> -describing the time units. <time units> can be days, hours, minutes, -seconds, milliseconds or microseconds. <reference time> is the time -origin.

-

calendar: describes the calendar used in the time calculations. -All the values currently defined in the -CF metadata convention -Valid calendars 'standard', 'gregorian', 'proleptic_gregorian' -'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'. -Default is 'standard', which is a mixed Julian/Gregorian calendar.

-

returns a datetime instance, or an array of datetime instances with -approximately millisecond accuracy.

-

Note: The datetime instances returned are 'real' python datetime -objects if calendar='proleptic_gregorian', or -calendar='standard' or 'gregorian' -and the date is after the breakpoint between the Julian and -Gregorian calendars (1582-10-15). Otherwise, they are 'phony' datetime -objects which support some but not all the methods of 'real' python -datetime objects. The datetime instances -do not contain a time-zone offset, even if the specified units -contains one.

-
-
- -
- - -
-
-

def stringtoarr(

a, NUMCHARS,dtype='S')

-
- - - - -

convert a string to a character array of length NUMCHARS

-

a: Input python string.

-

NUMCHARS: number of characters used to represent string -(if len(a) < NUMCHARS, it will be padded on the right with blanks).

-

dtype: type of numpy array to return. Default is 'S', which -means an array of dtype 'S1' will be returned. If dtype='U', a -unicode array (dtype = 'U1') will be returned.

-

returns a rank 1 numpy character array of length NUMCHARS with datatype 'S1' -(default) or 'U1' (if dtype='U')

-
-
- -
- - -
-
-

def stringtochar(

a,encoding='utf-8')

-
- - - - -

convert a string array to a character array with one extra dimension

-

a: Input numpy string array with numpy datatype 'SN' or 'UN', where N -is the number of characters in each string. Will be converted to -an array of characters (datatype 'S1' or 'U1') of shape a.shape + (N,).

-

optional kwarg encoding can be used to specify character encoding (default -utf-8).

-

returns a numpy character array with datatype 'S1' or 'U1' -and shape a.shape + (N,), where N is the length of each string in a.

-
-
- -
- - -

Classes

- -
-

class CompoundType

- - -

A CompoundType instance is used to describe a compound data -type, and can be passed to the the createVariable method of -a Dataset or Group instance. -Compound data types map to numpy structured arrays. -See __init__ for more details.

-

The instance variables dtype and name should not be modified by -the user.

-
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var dtype

- - - - -

A numpy dtype object describing the compound data type.

-
-
- -
-
-

var name

- - - - -

String name.

-
-
- -
-

Static methods

- -
-
-

def __init__(

group, datatype, datatype_name)

-
- - - - -

CompoundType constructor.

-

group: Group instance to associate with the compound datatype.

-

datatype: A numpy dtype object describing a structured (a.k.a record) -array. Can be composed of homogeneous numeric or character data types, or -other structured array data types.

-

datatype_name: a Python string containing a description of the -compound data type.

-

Note 1: When creating nested compound data types, -the inner compound data types must already be associated with CompoundType -instances (so create CompoundType instances for the innermost structures -first).

-

Note 2: CompoundType instances should be created using the -createCompoundType -method of a Dataset or Group instance, not using this class directly.

-
-
- -
- -
-
- -
-

class Dataset

- - -

A netCDF Dataset is a collection of dimensions, groups, variables and -attributes. Together they describe the meaning of data and relations among -data fields stored in a netCDF file. See __init__ for more -details.

-

A list of attribute names corresponding to global netCDF attributes -defined for the Dataset can be obtained with the -ncattrs method. -These attributes can be created by assigning to an attribute of the -Dataset instance. A dictionary containing all the netCDF attribute -name/value pairs is provided by the __dict__ attribute of a -Dataset instance.

-

The following class variables are read-only and should not be -modified by the user.

-

dimensions: The dimensions dictionary maps the names of -dimensions defined for the Group or Dataset to instances of the -Dimension class.

-

variables: The variables dictionary maps the names of variables -defined for this Dataset or Group to instances of the -Variable class.

-

groups: The groups dictionary maps the names of groups created for -this Dataset or Group to instances of the Group class (the -Dataset class is simply a special case of the Group class which -describes the root group in the netCDF4 file).

-

cmptypes: The cmptypes dictionary maps the names of -compound types defined for the Group or Dataset to instances of the -CompoundType class.

-

vltypes: The vltypes dictionary maps the names of -variable-length types defined for the Group or Dataset to instances -of the VLType class.

-

enumtypes: The enumtypes dictionary maps the names of -Enum types defined for the Group or Dataset to instances -of the EnumType class.

-

data_model: data_model describes the netCDF -data model version, one of NETCDF3_CLASSIC, NETCDF4, -NETCDF4_CLASSIC, NETCDF3_64BIT_OFFSET or NETCDF3_64BIT_DATA.

-

file_format: same as data_model, retained for backwards compatibility.

-

disk_format: disk_format describes the underlying -file format, one of NETCDF3, HDF5, HDF4, -PNETCDF, DAP2, DAP4 or UNDEFINED. Only available if using -netcdf C library version >= 4.3.1, otherwise will always return -UNDEFINED.

-

parent: parent is a reference to the parent -Group instance. None for the root group or Dataset -instance.

-

path: path shows the location of the Group in -the Dataset in a unix directory format (the names of groups in the -hierarchy separated by backslashes). A Dataset instance is the root -group, so the path is simply '/'.

-

keepweakref: If True, child Dimension and Variables objects only keep weak -references to the parent Dataset or Group.

-
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var cmptypes

- - - - -

The cmptypes dictionary maps the names of -compound types defined for the Group or Dataset to instances of the -CompoundType class.

-
-
- -
-
-

var data_model

- - - - -

data_model describes the netCDF -data model version, one of NETCDF3_CLASSIC, NETCDF4, -NETCDF4_CLASSIC, NETCDF3_64BIT_OFFSET or NETCDF3_64BIT_DATA.

-
-
- -
-
-

var dimensions

- - - - -

The dimensions dictionary maps the names of -dimensions defined for the Group or Dataset to instances of the -Dimension class.

-
-
- -
-
-

var disk_format

- - - - -

disk_format describes the underlying -file format, one of NETCDF3, HDF5, HDF4, -PNETCDF, DAP2, DAP4 or UNDEFINED. Only available if using -netcdf C library version >= 4.3.1, otherwise will always return -UNDEFINED.

-
-
- -
-
-

var enumtypes

- - - - -

The enumtypes dictionary maps the names of -Enum types defined for the Group or Dataset to instances of the -EnumType class.

-
-
- -
-
-

var file_format

- - - - -

same as data_model, retained for backwards compatibility.

-
-
- -
-
-

var groups

- - - - -

The groups dictionary maps the names of groups created for -this Dataset or Group to instances of the Group class (the -Dataset class is simply a special case of the Group class which -describes the root group in the netCDF4 file).

-
-
- -
-
-

var keepweakref

- - - - -

If True, child Dimension and Variables objects only keep weak references to -the parent Dataset or Group.

-
-
- -
-
-

var parent

- - - - -

parent is a reference to the parent -Group instance. None for the root group or Dataset instance

-
-
- -
-
-

var path

- - - - -

path shows the location of the Group in -the Dataset in a unix directory format (the names of groups in the -hierarchy separated by backslashes). A Dataset instance is the root -group, so the path is simply '/'.

-
-
- -
-
-

var variables

- - - - -

The variables dictionary maps the names of variables -defined for this Dataset or Group to instances of the Variable -class.

-
-
- -
-
-

var vltypes

- - - - -

The vltypes dictionary maps the names of -variable-length types defined for the Group or Dataset to instances of the -VLType class.

-
-
- -
-

Static methods

- -
-
-

def __init__(

self, filename, mode="r", clobber=True, diskless=False, persist=False, keepweakref=False, format='NETCDF4')

-
- - - - -

Dataset constructor.

-

filename: Name of netCDF file to hold dataset. Can also -be a python 3 pathlib instance or the URL of an OpenDAP dataset. When memory is -set this is just used to set the filepath().

-

mode: access mode. r means read-only; no data can be -modified. w means write; a new file is created, an existing file with -the same name is deleted. a and r+ mean append (in analogy with -serial files); an existing file is opened for reading and writing. -Appending s to modes w, r+ or a will enable unbuffered shared -access to NETCDF3_CLASSIC, NETCDF3_64BIT_OFFSET or -NETCDF3_64BIT_DATA formatted files. -Unbuffered access may be useful even if you don't need shared -access, since it may be faster for programs that don't access data -sequentially. This option is ignored for NETCDF4 and NETCDF4_CLASSIC -formatted files.

-

clobber: if True (default), opening a file with mode='w' -will clobber an existing file with the same name. if False, an -exception will be raised if a file with the same name already exists.

-

format: underlying file format (one of 'NETCDF4', -'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC', 'NETCDF3_64BIT_OFFSET' or -'NETCDF3_64BIT_DATA'. -Only relevant if mode = 'w' (if mode = 'r','a' or 'r+' the file format -is automatically detected). Default 'NETCDF4', which means the data is -stored in an HDF5 file, using netCDF 4 API features. Setting -format='NETCDF4_CLASSIC' will create an HDF5 file, using only netCDF 3 -compatible API features. netCDF 3 clients must be recompiled and linked -against the netCDF 4 library to read files in NETCDF4_CLASSIC format. -'NETCDF3_CLASSIC' is the classic netCDF 3 file format that does not -handle 2+ Gb files. 'NETCDF3_64BIT_OFFSET' is the 64-bit offset -version of the netCDF 3 file format, which fully supports 2+ GB files, but -is only compatible with clients linked against netCDF version 3.6.0 or -later. 'NETCDF3_64BIT_DATA' is the 64-bit data version of the netCDF 3 -file format, which supports 64-bit dimension sizes plus unsigned and -64 bit integer data types, but is only compatible with clients linked against -netCDF version 4.4.0 or later.

-

diskless: If True, create diskless (in memory) file.
-This is an experimental feature added to the C library after the -netcdf-4.2 release.

-

persist: if diskless=True, persist file to disk when closed -(default False).

-

keepweakref: if True, child Dimension and Variable instances will keep weak -references to the parent Dataset or Group object. Default is False, which -means strong references will be kept. Having Dimension and Variable instances -keep a strong reference to the parent Dataset instance, which in turn keeps a -reference to child Dimension and Variable instances, creates circular references. -Circular references complicate garbage collection, which may mean increased -memory usage for programs that create may Dataset instances with lots of -Variables. It also will result in the Dataset object never being deleted, which -means it may keep open files alive as well. Setting keepweakref=True allows -Dataset instances to be garbage collected as soon as they go out of scope, potentially -reducing memory usage and open file handles. However, in many cases this is not -desirable, since the associated Variable instances may still be needed, but are -rendered unusable when the parent Dataset instance is garbage collected.

-

memory: if not None, open file with contents taken from this block of memory. -Must be a sequence of bytes. Note this only works with "r" mode.

-

encoding: encoding used to encode filename string into bytes. -Default is None (sys.getdefaultfileencoding() is used).

-

parallel: open for parallel access using MPI (requires mpi4py and -parallel-enabled netcdf-c and hdf5 libraries). Default is False. If -True, comm and info kwargs may also be specified.

-

comm: MPI_Comm object for parallel access. Default None, which -means MPI_COMM_WORLD will be used. Ignored if parallel=False.

-

info: MPI_Info object for parallel access. Default None, which -means MPI_INFO_NULL will be used. Ignored if parallel=False.

-
-
- -
- - -
-
-

def close(

self)

-
- - - - -

Close the Dataset.

-
-
- -
- - -
-
-

def createCompoundType(

self, datatype, datatype_name)

-
- - - - -

Creates a new compound data type named datatype_name from the numpy -dtype object datatype.

-

Note: If the new compound data type contains other compound data types -(i.e. it is a 'nested' compound type, where not all of the elements -are homogeneous numeric data types), then the 'inner' compound types must be -created first.

-

The return value is the CompoundType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createDimension(

self, dimname, size=None)

-
- - - - -

Creates a new dimension with the given dimname and size.

-

size must be a positive integer or None, which stands for -"unlimited" (default is None). Specifying a size of 0 also -results in an unlimited dimension. The return value is the Dimension -class instance describing the new dimension. To determine the current -maximum size of the dimension, use the len function on the Dimension -instance. To determine if a dimension is 'unlimited', use the -isunlimited method of the Dimension instance.

-
-
- -
- - -
-
-

def createEnumType(

self, datatype, datatype_name, enum_dict)

-
- - - - -

Creates a new Enum data type named datatype_name from a numpy -integer dtype object datatype, and a python dictionary -defining the enum fields and values.

-

The return value is the EnumType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createGroup(

self, groupname)

-
- - - - -

Creates a new Group with the given groupname.

-

If groupname is specified as a path, using forward slashes as in unix to -separate components, then intermediate groups will be created as necessary -(analogous to mkdir -p in unix). For example, -createGroup('/GroupA/GroupB/GroupC') will create GroupA, -GroupA/GroupB, and GroupA/GroupB/GroupC, if they don't already exist. -If the specified path describes a group that already exists, no error is -raised.

-

The return value is a Group class instance.

-
-
- -
- - -
-
-

def createVLType(

self, datatype, datatype_name)

-
- - - - -

Creates a new VLEN data type named datatype_name from a numpy -dtype object datatype.

-

The return value is the VLType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createVariable(

self, varname, datatype, dimensions=(), zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', least_significant_digit=None, fill_value=None)

-
- - - - -

Creates a new variable with the given varname, datatype, and -dimensions. If dimensions are not given, the variable is assumed to be -a scalar.

-

If varname is specified as a path, using forward slashes as in unix to -separate components, then intermediate groups will be created as necessary -For example, createVariable('/GroupA/GroupB/VarC', float, ('x','y')) will create groups GroupA -and GroupA/GroupB, plus the variable GroupA/GroupB/VarC, if the preceding -groups don't already exist.

-

The datatype can be a numpy datatype object, or a string that describes -a numpy dtype object (like the dtype.str attribute of a numpy array). -Supported specifiers include: 'S1' or 'c' (NC_CHAR), 'i1' or 'b' or 'B' -(NC_BYTE), 'u1' (NC_UBYTE), 'i2' or 'h' or 's' (NC_SHORT), 'u2' -(NC_USHORT), 'i4' or 'i' or 'l' (NC_INT), 'u4' (NC_UINT), 'i8' (NC_INT64), -'u8' (NC_UINT64), 'f4' or 'f' (NC_FLOAT), 'f8' or 'd' (NC_DOUBLE). -datatype can also be a CompoundType instance -(for a structured, or compound array), a VLType instance -(for a variable-length array), or the python str builtin -(for a variable-length string array). Numpy string and unicode datatypes with -length greater than one are aliases for str.

-

Data from netCDF variables is presented to python as numpy arrays with -the corresponding data type.

-

dimensions must be a tuple containing dimension names (strings) that -have been defined previously using createDimension. The default value -is an empty tuple, which means the variable is a scalar.

-

If the optional keyword zlib is True, the data will be compressed in -the netCDF file using gzip compression (default False).

-

The optional keyword complevel is an integer between 1 and 9 describing -the level of compression desired (default 4). Ignored if zlib=False.

-

If the optional keyword shuffle is True, the HDF5 shuffle filter -will be applied before compressing the data (default True). This -significantly improves compression. Default is True. Ignored if -zlib=False.

-

If the optional keyword fletcher32 is True, the Fletcher32 HDF5 -checksum algorithm is activated to detect errors. Default False.

-

If the optional keyword contiguous is True, the variable data is -stored contiguously on disk. Default False. Setting to True for -a variable with an unlimited dimension will trigger an error.

-

The optional keyword chunksizes can be used to manually specify the -HDF5 chunksizes for each dimension of the variable. A detailed -discussion of HDF chunking and I/O performance is available -here. -Basically, you want the chunk size for each dimension to match as -closely as possible the size of the data block that users will read -from the file. chunksizes cannot be set if contiguous=True.

-

The optional keyword endian can be used to control whether the -data is stored in little or big endian format on disk. Possible -values are little, big or native (default). The library -will automatically handle endian conversions when the data is read, -but if the data is always going to be read on a computer with the -opposite format as the one used to create the file, there may be -some performance advantage to be gained by setting the endian-ness.

-

The zlib, complevel, shuffle, fletcher32, contiguous, chunksizes and endian -keywords are silently ignored for netCDF 3 files that do not use HDF5.

-

The optional keyword fill_value can be used to override the default -netCDF _FillValue (the value that the variable gets filled with before -any data is written to it, defaults given in netCDF4.default_fillvals). -If fill_value is set to False, then the variable is not pre-filled.

-

If the optional keyword parameter least_significant_digit is -specified, variable data will be truncated (quantized). In conjunction -with zlib=True this produces 'lossy', but significantly more -efficient compression. For example, if least_significant_digit=1, -data will be quantized using numpy.around(scale*data)/scale, where -scale = 2**bits, and bits is determined so that a precision of 0.1 is -retained (in this case bits=4). From the -PSD metadata conventions: -"least_significant_digit -- power of ten of the smallest decimal place -in unpacked data that is a reliable value." Default is None, or no -quantization, or 'lossless' compression.

-

When creating variables in a NETCDF4 or NETCDF4_CLASSIC formatted file, -HDF5 creates something called a 'chunk cache' for each variable. The -default size of the chunk cache may be large enough to completely fill -available memory when creating thousands of variables. The optional -keyword chunk_cache allows you to reduce (or increase) the size of -the default chunk cache when creating a variable. The setting only -persists as long as the Dataset is open - you can use the set_var_chunk_cache -method to change it the next time the Dataset is opened. -Warning - messing with this parameter can seriously degrade performance.

-

The return value is the Variable class instance describing the new -variable.

-

A list of names corresponding to netCDF variable attributes can be -obtained with the Variable method ncattrs. A dictionary -containing all the netCDF attribute name/value pairs is provided by -the __dict__ attribute of a Variable instance.

-

Variable instances behave much like array objects. Data can be -assigned to or retrieved from a variable with indexing and slicing -operations on the Variable instance. A Variable instance has six -Dataset standard attributes: dimensions, dtype, shape, ndim, name and -least_significant_digit. Application programs should never modify -these attributes. The dimensions attribute is a tuple containing the -names of the dimensions associated with this variable. The dtype -attribute is a string describing the variable's data type (i4, f8, -S1, etc). The shape attribute is a tuple describing the current -sizes of all the variable's dimensions. The name attribute is a -string containing the name of the Variable instance. -The least_significant_digit -attributes describes the power of ten of the smallest decimal place in -the data the contains a reliable value. assigned to the Variable -instance. If None, the data is not truncated. The ndim attribute -is the number of variable dimensions.

-
-
- -
- - -
-
-

def delncattr(

self,name,value)

-
- - - - -

delete a netCDF dataset or group attribute. Use if you need to delete a -netCDF attribute with the same name as one of the reserved python -attributes.

-
-
- -
- - -
-
-

def filepath(

self,encoding=None)

-
- - - - -

Get the file system path (or the opendap URL) which was used to -open/create the Dataset. Requires netcdf >= 4.1.2. The path -is decoded into a string using sys.getfilesystemencoding() by default, this can be -changed using the encoding kwarg.

-
-
- -
- - -
-
-

def get_variables_by_attributes(

...)

-
- - - - -

Returns a list of variables that match specific conditions.

-

Can pass in key=value parameters and variables are returned that -contain all of the matches. For example,

-
>>> # Get variables with x-axis attribute.
->>> vs = nc.get_variables_by_attributes(axis='X')
->>> # Get variables with matching "standard_name" attribute
->>> vs = nc.get_variables_by_attributes(standard_name='northward_sea_water_velocity')
-
- - -

Can pass in key=callable parameter and variables are returned if the -callable returns True. The callable should accept a single parameter, -the attribute value. None is given as the attribute value when the -attribute does not exist on the variable. For example,

-
>>> # Get Axis variables
->>> vs = nc.get_variables_by_attributes(axis=lambda v: v in ['X', 'Y', 'Z', 'T'])
->>> # Get variables that don't have an "axis" attribute
->>> vs = nc.get_variables_by_attributes(axis=lambda v: v is None)
->>> # Get variables that have a "grid_mapping" attribute
->>> vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None)
-
-
-
- -
- - -
-
-

def getncattr(

self,name)

-
- - - - -

retrieve a netCDF dataset or group attribute. -Use if you need to get a netCDF attribute with the same -name as one of the reserved python attributes.

-

option kwarg encoding can be used to specify the -character encoding of a string attribute (default is utf-8).

-
-
- -
- - -
-
-

def isopen(

...)

-
- - - - -

is the Dataset open or closed?

-
-
- -
- - -
-
-

def ncattrs(

self)

-
- - - - -

return netCDF global attribute names for this Dataset or Group in a list.

-
-
- -
- - -
-
-

def renameAttribute(

self, oldname, newname)

-
- - - - -

rename a Dataset or Group attribute named oldname to newname.

-
-
- -
- - -
-
-

def renameDimension(

self, oldname, newname)

-
- - - - -

rename a Dimension named oldname to newname.

-
-
- -
- - -
-
-

def renameGroup(

self, oldname, newname)

-
- - - - -

rename a Group named oldname to newname (requires netcdf >= 4.3.1).

-
-
- -
- - -
-
-

def renameVariable(

self, oldname, newname)

-
- - - - -

rename a Variable named oldname to newname

-
-
- -
- - -
-
-

def set_auto_chartostring(

self, True_or_False)

-
- - - - -

Call set_auto_chartostring for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion of -all character arrays <--> string arrays should be performed for -character variables (variables of type NC_CHAR or S1) with the -_Encoding attribute set.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_mask(

self, True_or_False)

-
- - - - -

Call set_auto_mask for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion to masked arrays -shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_maskandscale(

self, True_or_False)

-
- - - - -

Call set_auto_maskandscale for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion to masked arrays -and variable scaling shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_scale(

self, True_or_False)

-
- - - - -

Call set_auto_scale for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic variable scaling -shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_fill_off(

self)

-
- - - - -

Sets the fill mode for a Dataset open for writing to off.

-

This will prevent the data from being pre-filled with fill values, which -may result in some performance improvements. However, you must then make -sure the data is actually written before being read.

-
-
- -
- - -
-
-

def set_fill_on(

self)

-
- - - - -

Sets the fill mode for a Dataset open for writing to on.

-

This causes data to be pre-filled with fill values. The fill values can be -controlled by the variable's _Fill_Value attribute, but is usually -sufficient to the use the netCDF default _Fill_Value (defined -separately for each variable type). The default behavior of the netCDF -library corresponds to set_fill_on. Data which are equal to the -_Fill_Value indicate that the variable was created, but never written -to.

-
-
- -
- - -
-
-

def setncattr(

self,name,value)

-
- - - - -

set a netCDF dataset or group attribute using name,value pair. -Use if you need to set a netCDF attribute with the -with the same name as one of the reserved python attributes.

-
-
- -
- - -
-
-

def setncattr_string(

self,name,value)

-
- - - - -

set a netCDF dataset or group string attribute using name,value pair. -Use if you need to ensure that a netCDF attribute is created with type -NC_STRING if the file format is NETCDF4. -Use if you need to set an attribute to an array of variable-length strings.

-
-
- -
- - -
-
-

def setncatts(

self,attdict)

-
- - - - -

set a bunch of netCDF dataset or group attributes at once using a python dictionary. -This may be faster when setting a lot of attributes for a NETCDF3 -formatted file, since nc_redef/nc_enddef is not called in between setting -each attribute

-
-
- -
- - -
-
-

def sync(

self)

-
- - - - -

Writes all buffered data in the Dataset to the disk file.

-
-
- -
- -
-
- -
-

class Dimension

- - -

A netCDF Dimension is used to describe the coordinates of a Variable. -See __init__ for more details.

-

The current maximum size of a Dimension instance can be obtained by -calling the python len function on the Dimension instance. The -isunlimited method of a Dimension instance can be used to -determine if the dimension is unlimited.

-

Read-only class variables:

-

name: String name, used when creating a Variable with -createVariable.

-

size: Current Dimension size (same as len(d), where d is a -Dimension instance).

-
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var name

- - - - -

A string describing the name of the Dimension - used when creating a -Variable instance with createVariable.

-
-
- -
-
-

var size

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, group, name, size=None)

-
- - - - -

Dimension constructor.

-

group: Group instance to associate with dimension.

-

name: Name of the dimension.

-

size: Size of the dimension. None or 0 means unlimited. (Default None).

-

Note: Dimension instances should be created using the -createDimension method of a Group or -Dataset instance, not using __init__ directly.

-
-
- -
- - -
-
-

def group(

self)

-
- - - - -

return the group that this Dimension is a member of.

-
-
- -
- - -
-
-

def isunlimited(

self)

-
- - - - -

returns True if the Dimension instance is unlimited, False otherwise.

-
-
- -
- -
-
- -
-

class EnumType

- - -

A EnumType instance is used to describe an Enum data -type, and can be passed to the the createVariable method of -a Dataset or Group instance. See -__init__ for more details.

-

The instance variables dtype, name and enum_dict should not be modified by -the user.

-
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var dtype

- - - - -

A numpy integer dtype object describing the base type for the Enum.

-
-
- -
-
-

var enum_dict

- - - - -

A python dictionary describing the enum fields and values.

-
-
- -
-
-

var name

- - - - -

String name.

-
-
- -
-

Static methods

- -
-
-

def __init__(

group, datatype, datatype_name, enum_dict)

-
- - - - -

EnumType constructor.

-

group: Group instance to associate with the VLEN datatype.

-

datatype: An numpy integer dtype object describing the base type -for the Enum.

-

datatype_name: a Python string containing a description of the -Enum data type.

-

enum_dict: a Python dictionary containing the Enum field/value -pairs.

-

Note: EnumType instances should be created using the -createEnumType -method of a Dataset or Group instance, not using this class directly.

-
-
- -
- -
-
- -
-

class Group

- - -

Groups define a hierarchical namespace within a netCDF file. They are -analogous to directories in a unix filesystem. Each Group behaves like -a Dataset within a Dataset, and can contain it's own variables, -dimensions and attributes (and other Groups). See __init__ -for more details.

-

Group inherits from Dataset, so all the -Dataset class methods and variables are available -to a Group instance (except the close method).

-

Additional read-only class variables:

-

name: String describing the group name.

-
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var cmptypes

- -

- Inheritance: - Dataset.cmptypes -

- - - -

The cmptypes dictionary maps the names of -compound types defined for the Group or Dataset to instances of the -CompoundType class.

-
-
- -
-
-

var data_model

- -

- Inheritance: - Dataset.data_model -

- - - -

data_model describes the netCDF -data model version, one of NETCDF3_CLASSIC, NETCDF4, -NETCDF4_CLASSIC, NETCDF3_64BIT_OFFSET or NETCDF3_64BIT_DATA.

-
-
- -
-
-

var dimensions

- -

- Inheritance: - Dataset.dimensions -

- - - -

The dimensions dictionary maps the names of -dimensions defined for the Group or Dataset to instances of the -Dimension class.

-
-
- -
-
-

var disk_format

- -

- Inheritance: - Dataset.disk_format -

- - - -

disk_format describes the underlying -file format, one of NETCDF3, HDF5, HDF4, -PNETCDF, DAP2, DAP4 or UNDEFINED. Only available if using -netcdf C library version >= 4.3.1, otherwise will always return -UNDEFINED.

-
-
- -
-
-

var enumtypes

- -

- Inheritance: - Dataset.enumtypes -

- - - -

The enumtypes dictionary maps the names of -Enum types defined for the Group or Dataset to instances of the -EnumType class.

-
-
- -
-
-

var file_format

- -

- Inheritance: - Dataset.file_format -

- - - -

same as data_model, retained for backwards compatibility.

-
-
- -
-
-

var groups

- -

- Inheritance: - Dataset.groups -

- - - -

The groups dictionary maps the names of groups created for -this Dataset or Group to instances of the Group class (the -Dataset class is simply a special case of the Group class which -describes the root group in the netCDF4 file).

-
-
- -
-
-

var keepweakref

- -

- Inheritance: - Dataset.keepweakref -

- - - -

If True, child Dimension and Variables objects only keep weak references to -the parent Dataset or Group.

-
-
- -
-
-

var name

- - - - -

A string describing the name of the Group.

-
-
- -
-
-

var parent

- -

- Inheritance: - Dataset.parent -

- - - -

parent is a reference to the parent -Group instance. None for the root group or Dataset instance

-
-
- -
-
-

var path

- -

- Inheritance: - Dataset.path -

- - - -

path shows the location of the Group in -the Dataset in a unix directory format (the names of groups in the -hierarchy separated by backslashes). A Dataset instance is the root -group, so the path is simply '/'.

-
-
- -
-
-

var variables

- -

- Inheritance: - Dataset.variables -

- - - -

The variables dictionary maps the names of variables -defined for this Dataset or Group to instances of the Variable -class.

-
-
- -
-
-

var vltypes

- -

- Inheritance: - Dataset.vltypes -

- - - -

The vltypes dictionary maps the names of -variable-length types defined for the Group or Dataset to instances of the -VLType class.

-
-
- -
-

Static methods

- -
-
-

def __init__(

self, parent, name)

-
- -

- Inheritance: - Dataset.__init__ -

- - - -

Group constructor.

-

parent: Group instance for the parent group. If being created -in the root group, use a Dataset instance.

-

name: - Name of the group.

-

Note: Group instances should be created using the -createGroup method of a Dataset instance, or -another Group instance, not using this class directly.

-
-
- -
- - -
-
-

def close(

self)

-
- -

- Inheritance: - Dataset.close -

- - - -

overrides Dataset close method which does not apply to Group -instances, raises IOError.

-
-
- -
- - -
-
-

def createCompoundType(

self, datatype, datatype_name)

-
- -

- Inheritance: - Dataset.createCompoundType -

- - - -

Creates a new compound data type named datatype_name from the numpy -dtype object datatype.

-

Note: If the new compound data type contains other compound data types -(i.e. it is a 'nested' compound type, where not all of the elements -are homogeneous numeric data types), then the 'inner' compound types must be -created first.

-

The return value is the CompoundType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createDimension(

self, dimname, size=None)

-
- -

- Inheritance: - Dataset.createDimension -

- - - -

Creates a new dimension with the given dimname and size.

-

size must be a positive integer or None, which stands for -"unlimited" (default is None). Specifying a size of 0 also -results in an unlimited dimension. The return value is the Dimension -class instance describing the new dimension. To determine the current -maximum size of the dimension, use the len function on the Dimension -instance. To determine if a dimension is 'unlimited', use the -isunlimited method of the Dimension instance.

-
-
- -
- - -
-
-

def createEnumType(

self, datatype, datatype_name, enum_dict)

-
- -

- Inheritance: - Dataset.createEnumType -

- - - -

Creates a new Enum data type named datatype_name from a numpy -integer dtype object datatype, and a python dictionary -defining the enum fields and values.

-

The return value is the EnumType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createGroup(

self, groupname)

-
- -

- Inheritance: - Dataset.createGroup -

- - - -

Creates a new Group with the given groupname.

-

If groupname is specified as a path, using forward slashes as in unix to -separate components, then intermediate groups will be created as necessary -(analogous to mkdir -p in unix). For example, -createGroup('/GroupA/GroupB/GroupC') will create GroupA, -GroupA/GroupB, and GroupA/GroupB/GroupC, if they don't already exist. -If the specified path describes a group that already exists, no error is -raised.

-

The return value is a Group class instance.

-
-
- -
- - -
-
-

def createVLType(

self, datatype, datatype_name)

-
- -

- Inheritance: - Dataset.createVLType -

- - - -

Creates a new VLEN data type named datatype_name from a numpy -dtype object datatype.

-

The return value is the VLType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createVariable(

self, varname, datatype, dimensions=(), zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', least_significant_digit=None, fill_value=None)

-
- -

- Inheritance: - Dataset.createVariable -

- - - -

Creates a new variable with the given varname, datatype, and -dimensions. If dimensions are not given, the variable is assumed to be -a scalar.

-

If varname is specified as a path, using forward slashes as in unix to -separate components, then intermediate groups will be created as necessary -For example, createVariable('/GroupA/GroupB/VarC', float, ('x','y')) will create groups GroupA -and GroupA/GroupB, plus the variable GroupA/GroupB/VarC, if the preceding -groups don't already exist.

-

The datatype can be a numpy datatype object, or a string that describes -a numpy dtype object (like the dtype.str attribute of a numpy array). -Supported specifiers include: 'S1' or 'c' (NC_CHAR), 'i1' or 'b' or 'B' -(NC_BYTE), 'u1' (NC_UBYTE), 'i2' or 'h' or 's' (NC_SHORT), 'u2' -(NC_USHORT), 'i4' or 'i' or 'l' (NC_INT), 'u4' (NC_UINT), 'i8' (NC_INT64), -'u8' (NC_UINT64), 'f4' or 'f' (NC_FLOAT), 'f8' or 'd' (NC_DOUBLE). -datatype can also be a CompoundType instance -(for a structured, or compound array), a VLType instance -(for a variable-length array), or the python str builtin -(for a variable-length string array). Numpy string and unicode datatypes with -length greater than one are aliases for str.

-

Data from netCDF variables is presented to python as numpy arrays with -the corresponding data type.

-

dimensions must be a tuple containing dimension names (strings) that -have been defined previously using createDimension. The default value -is an empty tuple, which means the variable is a scalar.

-

If the optional keyword zlib is True, the data will be compressed in -the netCDF file using gzip compression (default False).

-

The optional keyword complevel is an integer between 1 and 9 describing -the level of compression desired (default 4). Ignored if zlib=False.

-

If the optional keyword shuffle is True, the HDF5 shuffle filter -will be applied before compressing the data (default True). This -significantly improves compression. Default is True. Ignored if -zlib=False.

-

If the optional keyword fletcher32 is True, the Fletcher32 HDF5 -checksum algorithm is activated to detect errors. Default False.

-

If the optional keyword contiguous is True, the variable data is -stored contiguously on disk. Default False. Setting to True for -a variable with an unlimited dimension will trigger an error.

-

The optional keyword chunksizes can be used to manually specify the -HDF5 chunksizes for each dimension of the variable. A detailed -discussion of HDF chunking and I/O performance is available -here. -Basically, you want the chunk size for each dimension to match as -closely as possible the size of the data block that users will read -from the file. chunksizes cannot be set if contiguous=True.

-

The optional keyword endian can be used to control whether the -data is stored in little or big endian format on disk. Possible -values are little, big or native (default). The library -will automatically handle endian conversions when the data is read, -but if the data is always going to be read on a computer with the -opposite format as the one used to create the file, there may be -some performance advantage to be gained by setting the endian-ness.

-

The zlib, complevel, shuffle, fletcher32, contiguous, chunksizes and endian -keywords are silently ignored for netCDF 3 files that do not use HDF5.

-

The optional keyword fill_value can be used to override the default -netCDF _FillValue (the value that the variable gets filled with before -any data is written to it, defaults given in netCDF4.default_fillvals). -If fill_value is set to False, then the variable is not pre-filled.

-

If the optional keyword parameter least_significant_digit is -specified, variable data will be truncated (quantized). In conjunction -with zlib=True this produces 'lossy', but significantly more -efficient compression. For example, if least_significant_digit=1, -data will be quantized using numpy.around(scale*data)/scale, where -scale = 2**bits, and bits is determined so that a precision of 0.1 is -retained (in this case bits=4). From the -PSD metadata conventions: -"least_significant_digit -- power of ten of the smallest decimal place -in unpacked data that is a reliable value." Default is None, or no -quantization, or 'lossless' compression.

-

When creating variables in a NETCDF4 or NETCDF4_CLASSIC formatted file, -HDF5 creates something called a 'chunk cache' for each variable. The -default size of the chunk cache may be large enough to completely fill -available memory when creating thousands of variables. The optional -keyword chunk_cache allows you to reduce (or increase) the size of -the default chunk cache when creating a variable. The setting only -persists as long as the Dataset is open - you can use the set_var_chunk_cache -method to change it the next time the Dataset is opened. -Warning - messing with this parameter can seriously degrade performance.

-

The return value is the Variable class instance describing the new -variable.

-

A list of names corresponding to netCDF variable attributes can be -obtained with the Variable method ncattrs. A dictionary -containing all the netCDF attribute name/value pairs is provided by -the __dict__ attribute of a Variable instance.

-

Variable instances behave much like array objects. Data can be -assigned to or retrieved from a variable with indexing and slicing -operations on the Variable instance. A Variable instance has six -Dataset standard attributes: dimensions, dtype, shape, ndim, name and -least_significant_digit. Application programs should never modify -these attributes. The dimensions attribute is a tuple containing the -names of the dimensions associated with this variable. The dtype -attribute is a string describing the variable's data type (i4, f8, -S1, etc). The shape attribute is a tuple describing the current -sizes of all the variable's dimensions. The name attribute is a -string containing the name of the Variable instance. -The least_significant_digit -attributes describes the power of ten of the smallest decimal place in -the data the contains a reliable value. assigned to the Variable -instance. If None, the data is not truncated. The ndim attribute -is the number of variable dimensions.

-
-
- -
- - -
-
-

def delncattr(

self,name,value)

-
- -

- Inheritance: - Dataset.delncattr -

- - - -

delete a netCDF dataset or group attribute. Use if you need to delete a -netCDF attribute with the same name as one of the reserved python -attributes.

-
-
- -
- - -
-
-

def filepath(

self,encoding=None)

-
- -

- Inheritance: - Dataset.filepath -

- - - -

Get the file system path (or the opendap URL) which was used to -open/create the Dataset. Requires netcdf >= 4.1.2. The path -is decoded into a string using sys.getfilesystemencoding() by default, this can be -changed using the encoding kwarg.

-
-
- -
- - -
-
-

def get_variables_by_attributes(

...)

-
- -

- Inheritance: - Dataset.get_variables_by_attributes -

- - - -

Returns a list of variables that match specific conditions.

-

Can pass in key=value parameters and variables are returned that -contain all of the matches. For example,

-
>>> # Get variables with x-axis attribute.
->>> vs = nc.get_variables_by_attributes(axis='X')
->>> # Get variables with matching "standard_name" attribute
->>> vs = nc.get_variables_by_attributes(standard_name='northward_sea_water_velocity')
-
- - -

Can pass in key=callable parameter and variables are returned if the -callable returns True. The callable should accept a single parameter, -the attribute value. None is given as the attribute value when the -attribute does not exist on the variable. For example,

-
>>> # Get Axis variables
->>> vs = nc.get_variables_by_attributes(axis=lambda v: v in ['X', 'Y', 'Z', 'T'])
->>> # Get variables that don't have an "axis" attribute
->>> vs = nc.get_variables_by_attributes(axis=lambda v: v is None)
->>> # Get variables that have a "grid_mapping" attribute
->>> vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None)
-
-
-
- -
- - -
-
-

def getncattr(

self,name)

-
- -

- Inheritance: - Dataset.getncattr -

- - - -

retrieve a netCDF dataset or group attribute. -Use if you need to get a netCDF attribute with the same -name as one of the reserved python attributes.

-

option kwarg encoding can be used to specify the -character encoding of a string attribute (default is utf-8).

-
-
- -
- - -
-
-

def isopen(

...)

-
- -

- Inheritance: - Dataset.isopen -

- - - -

is the Dataset open or closed?

-
-
- -
- - -
-
-

def ncattrs(

self)

-
- -

- Inheritance: - Dataset.ncattrs -

- - - -

return netCDF global attribute names for this Dataset or Group in a list.

-
-
- -
- - -
-
-

def renameAttribute(

self, oldname, newname)

-
- -

- Inheritance: - Dataset.renameAttribute -

- - - -

rename a Dataset or Group attribute named oldname to newname.

-
-
- -
- - -
-
-

def renameDimension(

self, oldname, newname)

-
- -

- Inheritance: - Dataset.renameDimension -

- - - -

rename a Dimension named oldname to newname.

-
-
- -
- - -
-
-

def renameGroup(

self, oldname, newname)

-
- -

- Inheritance: - Dataset.renameGroup -

- - - -

rename a Group named oldname to newname (requires netcdf >= 4.3.1).

-
-
- -
- - -
-
-

def renameVariable(

self, oldname, newname)

-
- -

- Inheritance: - Dataset.renameVariable -

- - - -

rename a Variable named oldname to newname

-
-
- -
- - -
-
-

def set_auto_chartostring(

self, True_or_False)

-
- -

- Inheritance: - Dataset.set_auto_chartostring -

- - - -

Call set_auto_chartostring for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion of -all character arrays <--> string arrays should be performed for -character variables (variables of type NC_CHAR or S1) with the -_Encoding attribute set.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_mask(

self, True_or_False)

-
- -

- Inheritance: - Dataset.set_auto_mask -

- - - -

Call set_auto_mask for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion to masked arrays -shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_maskandscale(

self, True_or_False)

-
- -

- Inheritance: - Dataset.set_auto_maskandscale -

- - - -

Call set_auto_maskandscale for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion to masked arrays -and variable scaling shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_scale(

self, True_or_False)

-
- -

- Inheritance: - Dataset.set_auto_scale -

- - - -

Call set_auto_scale for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic variable scaling -shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_fill_off(

self)

-
- -

- Inheritance: - Dataset.set_fill_off -

- - - -

Sets the fill mode for a Dataset open for writing to off.

-

This will prevent the data from being pre-filled with fill values, which -may result in some performance improvements. However, you must then make -sure the data is actually written before being read.

-
-
- -
- - -
-
-

def set_fill_on(

self)

-
- -

- Inheritance: - Dataset.set_fill_on -

- - - -

Sets the fill mode for a Dataset open for writing to on.

-

This causes data to be pre-filled with fill values. The fill values can be -controlled by the variable's _Fill_Value attribute, but is usually -sufficient to the use the netCDF default _Fill_Value (defined -separately for each variable type). The default behavior of the netCDF -library corresponds to set_fill_on. Data which are equal to the -_Fill_Value indicate that the variable was created, but never written -to.

-
-
- -
- - -
-
-

def setncattr(

self,name,value)

-
- -

- Inheritance: - Dataset.setncattr -

- - - -

set a netCDF dataset or group attribute using name,value pair. -Use if you need to set a netCDF attribute with the -with the same name as one of the reserved python attributes.

-
-
- -
- - -
-
-

def setncattr_string(

self,name,value)

-
- -

- Inheritance: - Dataset.setncattr_string -

- - - -

set a netCDF dataset or group string attribute using name,value pair. -Use if you need to ensure that a netCDF attribute is created with type -NC_STRING if the file format is NETCDF4. -Use if you need to set an attribute to an array of variable-length strings.

-
-
- -
- - -
-
-

def setncatts(

self,attdict)

-
- -

- Inheritance: - Dataset.setncatts -

- - - -

set a bunch of netCDF dataset or group attributes at once using a python dictionary. -This may be faster when setting a lot of attributes for a NETCDF3 -formatted file, since nc_redef/nc_enddef is not called in between setting -each attribute

-
-
- -
- - -
-
-

def sync(

self)

-
- -

- Inheritance: - Dataset.sync -

- - - -

Writes all buffered data in the Dataset to the disk file.

-
-
- -
- -
-
- -
-

class MFDataset

- - -

Class for reading multi-file netCDF Datasets, making variables -spanning multiple files appear as if they were in one file. -Datasets must be in NETCDF4_CLASSIC, NETCDF3_CLASSIC, NETCDF3_64BIT_OFFSET -or NETCDF3_64BIT_DATA format (NETCDF4 Datasets won't work).

-

Adapted from pycdf by Andre Gosselin.

-

Example usage (See __init__ for more details):

-
>>> import numpy
->>> # create a series of netCDF files with a variable sharing
->>> # the same unlimited dimension.
->>> for nf in range(10):
->>>     f = Dataset("mftest%s.nc" % nf,"w")
->>>     f.createDimension("x",None)
->>>     x = f.createVariable("x","i",("x",))
->>>     x[0:10] = numpy.arange(nf*10,10*(nf+1))
->>>     f.close()
->>> # now read all those files in at once, in one Dataset.
->>> f = MFDataset("mftest*nc")
->>> print f.variables["x"][:]
-[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
- 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
- 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
- 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99]
-
-
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var cmptypes

- -

- Inheritance: - Dataset.cmptypes -

- - - -

The cmptypes dictionary maps the names of -compound types defined for the Group or Dataset to instances of the -CompoundType class.

-
-
- -
-
-

var data_model

- -

- Inheritance: - Dataset.data_model -

- - - -

data_model describes the netCDF -data model version, one of NETCDF3_CLASSIC, NETCDF4, -NETCDF4_CLASSIC, NETCDF3_64BIT_OFFSET or NETCDF3_64BIT_DATA.

-
-
- -
-
-

var dimensions

- -

- Inheritance: - Dataset.dimensions -

- - - -

The dimensions dictionary maps the names of -dimensions defined for the Group or Dataset to instances of the -Dimension class.

-
-
- -
-
-

var disk_format

- -

- Inheritance: - Dataset.disk_format -

- - - -

disk_format describes the underlying -file format, one of NETCDF3, HDF5, HDF4, -PNETCDF, DAP2, DAP4 or UNDEFINED. Only available if using -netcdf C library version >= 4.3.1, otherwise will always return -UNDEFINED.

-
-
- -
-
-

var enumtypes

- -

- Inheritance: - Dataset.enumtypes -

- - - -

The enumtypes dictionary maps the names of -Enum types defined for the Group or Dataset to instances of the -EnumType class.

-
-
- -
-
-

var file_format

- -

- Inheritance: - Dataset.file_format -

- - - -

same as data_model, retained for backwards compatibility.

-
-
- -
-
-

var groups

- -

- Inheritance: - Dataset.groups -

- - - -

The groups dictionary maps the names of groups created for -this Dataset or Group to instances of the Group class (the -Dataset class is simply a special case of the Group class which -describes the root group in the netCDF4 file).

-
-
- -
-
-

var keepweakref

- -

- Inheritance: - Dataset.keepweakref -

- - - -

If True, child Dimension and Variables objects only keep weak references to -the parent Dataset or Group.

-
-
- -
-
-

var parent

- -

- Inheritance: - Dataset.parent -

- - - -

parent is a reference to the parent -Group instance. None for the root group or Dataset instance

-
-
- -
-
-

var path

- -

- Inheritance: - Dataset.path -

- - - -

path shows the location of the Group in -the Dataset in a unix directory format (the names of groups in the -hierarchy separated by backslashes). A Dataset instance is the root -group, so the path is simply '/'.

-
-
- -
-
-

var variables

- -

- Inheritance: - Dataset.variables -

- - - -

The variables dictionary maps the names of variables -defined for this Dataset or Group to instances of the Variable -class.

-
-
- -
-
-

var vltypes

- -

- Inheritance: - Dataset.vltypes -

- - - -

The vltypes dictionary maps the names of -variable-length types defined for the Group or Dataset to instances of the -VLType class.

-
-
- -
-

Static methods

- -
-
-

def createCompoundType(

self, datatype, datatype_name)

-
- -

- Inheritance: - Dataset.createCompoundType -

- - - -

Creates a new compound data type named datatype_name from the numpy -dtype object datatype.

-

Note: If the new compound data type contains other compound data types -(i.e. it is a 'nested' compound type, where not all of the elements -are homogeneous numeric data types), then the 'inner' compound types must be -created first.

-

The return value is the CompoundType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createDimension(

self, dimname, size=None)

-
- -

- Inheritance: - Dataset.createDimension -

- - - -

Creates a new dimension with the given dimname and size.

-

size must be a positive integer or None, which stands for -"unlimited" (default is None). Specifying a size of 0 also -results in an unlimited dimension. The return value is the Dimension -class instance describing the new dimension. To determine the current -maximum size of the dimension, use the len function on the Dimension -instance. To determine if a dimension is 'unlimited', use the -isunlimited method of the Dimension instance.

-
-
- -
- - -
-
-

def createEnumType(

self, datatype, datatype_name, enum_dict)

-
- -

- Inheritance: - Dataset.createEnumType -

- - - -

Creates a new Enum data type named datatype_name from a numpy -integer dtype object datatype, and a python dictionary -defining the enum fields and values.

-

The return value is the EnumType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createGroup(

self, groupname)

-
- -

- Inheritance: - Dataset.createGroup -

- - - -

Creates a new Group with the given groupname.

-

If groupname is specified as a path, using forward slashes as in unix to -separate components, then intermediate groups will be created as necessary -(analogous to mkdir -p in unix). For example, -createGroup('/GroupA/GroupB/GroupC') will create GroupA, -GroupA/GroupB, and GroupA/GroupB/GroupC, if they don't already exist. -If the specified path describes a group that already exists, no error is -raised.

-

The return value is a Group class instance.

-
-
- -
- - -
-
-

def createVLType(

self, datatype, datatype_name)

-
- -

- Inheritance: - Dataset.createVLType -

- - - -

Creates a new VLEN data type named datatype_name from a numpy -dtype object datatype.

-

The return value is the VLType class instance describing the new -datatype.

-
-
- -
- - -
-
-

def createVariable(

self, varname, datatype, dimensions=(), zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', least_significant_digit=None, fill_value=None)

-
- -

- Inheritance: - Dataset.createVariable -

- - - -

Creates a new variable with the given varname, datatype, and -dimensions. If dimensions are not given, the variable is assumed to be -a scalar.

-

If varname is specified as a path, using forward slashes as in unix to -separate components, then intermediate groups will be created as necessary -For example, createVariable('/GroupA/GroupB/VarC', float, ('x','y')) will create groups GroupA -and GroupA/GroupB, plus the variable GroupA/GroupB/VarC, if the preceding -groups don't already exist.

-

The datatype can be a numpy datatype object, or a string that describes -a numpy dtype object (like the dtype.str attribute of a numpy array). -Supported specifiers include: 'S1' or 'c' (NC_CHAR), 'i1' or 'b' or 'B' -(NC_BYTE), 'u1' (NC_UBYTE), 'i2' or 'h' or 's' (NC_SHORT), 'u2' -(NC_USHORT), 'i4' or 'i' or 'l' (NC_INT), 'u4' (NC_UINT), 'i8' (NC_INT64), -'u8' (NC_UINT64), 'f4' or 'f' (NC_FLOAT), 'f8' or 'd' (NC_DOUBLE). -datatype can also be a CompoundType instance -(for a structured, or compound array), a VLType instance -(for a variable-length array), or the python str builtin -(for a variable-length string array). Numpy string and unicode datatypes with -length greater than one are aliases for str.

-

Data from netCDF variables is presented to python as numpy arrays with -the corresponding data type.

-

dimensions must be a tuple containing dimension names (strings) that -have been defined previously using createDimension. The default value -is an empty tuple, which means the variable is a scalar.

-

If the optional keyword zlib is True, the data will be compressed in -the netCDF file using gzip compression (default False).

-

The optional keyword complevel is an integer between 1 and 9 describing -the level of compression desired (default 4). Ignored if zlib=False.

-

If the optional keyword shuffle is True, the HDF5 shuffle filter -will be applied before compressing the data (default True). This -significantly improves compression. Default is True. Ignored if -zlib=False.

-

If the optional keyword fletcher32 is True, the Fletcher32 HDF5 -checksum algorithm is activated to detect errors. Default False.

-

If the optional keyword contiguous is True, the variable data is -stored contiguously on disk. Default False. Setting to True for -a variable with an unlimited dimension will trigger an error.

-

The optional keyword chunksizes can be used to manually specify the -HDF5 chunksizes for each dimension of the variable. A detailed -discussion of HDF chunking and I/O performance is available -here. -Basically, you want the chunk size for each dimension to match as -closely as possible the size of the data block that users will read -from the file. chunksizes cannot be set if contiguous=True.

-

The optional keyword endian can be used to control whether the -data is stored in little or big endian format on disk. Possible -values are little, big or native (default). The library -will automatically handle endian conversions when the data is read, -but if the data is always going to be read on a computer with the -opposite format as the one used to create the file, there may be -some performance advantage to be gained by setting the endian-ness.

-

The zlib, complevel, shuffle, fletcher32, contiguous, chunksizes and endian -keywords are silently ignored for netCDF 3 files that do not use HDF5.

-

The optional keyword fill_value can be used to override the default -netCDF _FillValue (the value that the variable gets filled with before -any data is written to it, defaults given in netCDF4.default_fillvals). -If fill_value is set to False, then the variable is not pre-filled.

-

If the optional keyword parameter least_significant_digit is -specified, variable data will be truncated (quantized). In conjunction -with zlib=True this produces 'lossy', but significantly more -efficient compression. For example, if least_significant_digit=1, -data will be quantized using numpy.around(scale*data)/scale, where -scale = 2**bits, and bits is determined so that a precision of 0.1 is -retained (in this case bits=4). From the -PSD metadata conventions: -"least_significant_digit -- power of ten of the smallest decimal place -in unpacked data that is a reliable value." Default is None, or no -quantization, or 'lossless' compression.

-

When creating variables in a NETCDF4 or NETCDF4_CLASSIC formatted file, -HDF5 creates something called a 'chunk cache' for each variable. The -default size of the chunk cache may be large enough to completely fill -available memory when creating thousands of variables. The optional -keyword chunk_cache allows you to reduce (or increase) the size of -the default chunk cache when creating a variable. The setting only -persists as long as the Dataset is open - you can use the set_var_chunk_cache -method to change it the next time the Dataset is opened. -Warning - messing with this parameter can seriously degrade performance.

-

The return value is the Variable class instance describing the new -variable.

-

A list of names corresponding to netCDF variable attributes can be -obtained with the Variable method ncattrs. A dictionary -containing all the netCDF attribute name/value pairs is provided by -the __dict__ attribute of a Variable instance.

-

Variable instances behave much like array objects. Data can be -assigned to or retrieved from a variable with indexing and slicing -operations on the Variable instance. A Variable instance has six -Dataset standard attributes: dimensions, dtype, shape, ndim, name and -least_significant_digit. Application programs should never modify -these attributes. The dimensions attribute is a tuple containing the -names of the dimensions associated with this variable. The dtype -attribute is a string describing the variable's data type (i4, f8, -S1, etc). The shape attribute is a tuple describing the current -sizes of all the variable's dimensions. The name attribute is a -string containing the name of the Variable instance. -The least_significant_digit -attributes describes the power of ten of the smallest decimal place in -the data the contains a reliable value. assigned to the Variable -instance. If None, the data is not truncated. The ndim attribute -is the number of variable dimensions.

-
-
- -
- - -
-
-

def delncattr(

self,name,value)

-
- -

- Inheritance: - Dataset.delncattr -

- - - -

delete a netCDF dataset or group attribute. Use if you need to delete a -netCDF attribute with the same name as one of the reserved python -attributes.

-
-
- -
- - -
-
-

def filepath(

self,encoding=None)

-
- -

- Inheritance: - Dataset.filepath -

- - - -

Get the file system path (or the opendap URL) which was used to -open/create the Dataset. Requires netcdf >= 4.1.2. The path -is decoded into a string using sys.getfilesystemencoding() by default, this can be -changed using the encoding kwarg.

-
-
- -
- - -
-
-

def get_variables_by_attributes(

...)

-
- -

- Inheritance: - Dataset.get_variables_by_attributes -

- - - -

Returns a list of variables that match specific conditions.

-

Can pass in key=value parameters and variables are returned that -contain all of the matches. For example,

-
>>> # Get variables with x-axis attribute.
->>> vs = nc.get_variables_by_attributes(axis='X')
->>> # Get variables with matching "standard_name" attribute
->>> vs = nc.get_variables_by_attributes(standard_name='northward_sea_water_velocity')
-
- - -

Can pass in key=callable parameter and variables are returned if the -callable returns True. The callable should accept a single parameter, -the attribute value. None is given as the attribute value when the -attribute does not exist on the variable. For example,

-
>>> # Get Axis variables
->>> vs = nc.get_variables_by_attributes(axis=lambda v: v in ['X', 'Y', 'Z', 'T'])
->>> # Get variables that don't have an "axis" attribute
->>> vs = nc.get_variables_by_attributes(axis=lambda v: v is None)
->>> # Get variables that have a "grid_mapping" attribute
->>> vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None)
-
-
-
- -
- - -
-
-

def getncattr(

self,name)

-
- -

- Inheritance: - Dataset.getncattr -

- - - -

retrieve a netCDF dataset or group attribute. -Use if you need to get a netCDF attribute with the same -name as one of the reserved python attributes.

-

option kwarg encoding can be used to specify the -character encoding of a string attribute (default is utf-8).

-
-
- -
- - -
-
-

def isopen(

...)

-
- -

- Inheritance: - Dataset.isopen -

- - - -

is the Dataset open or closed?

-
-
- -
- - -
-
-

def renameAttribute(

self, oldname, newname)

-
- -

- Inheritance: - Dataset.renameAttribute -

- - - -

rename a Dataset or Group attribute named oldname to newname.

-
-
- -
- - -
-
-

def renameDimension(

self, oldname, newname)

-
- -

- Inheritance: - Dataset.renameDimension -

- - - -

rename a Dimension named oldname to newname.

-
-
- -
- - -
-
-

def renameGroup(

self, oldname, newname)

-
- -

- Inheritance: - Dataset.renameGroup -

- - - -

rename a Group named oldname to newname (requires netcdf >= 4.3.1).

-
-
- -
- - -
-
-

def renameVariable(

self, oldname, newname)

-
- -

- Inheritance: - Dataset.renameVariable -

- - - -

rename a Variable named oldname to newname

-
-
- -
- - -
-
-

def set_auto_chartostring(

self, True_or_False)

-
- -

- Inheritance: - Dataset.set_auto_chartostring -

- - - -

Call set_auto_chartostring for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion of -all character arrays <--> string arrays should be performed for -character variables (variables of type NC_CHAR or S1) with the -_Encoding attribute set.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_mask(

self, True_or_False)

-
- -

- Inheritance: - Dataset.set_auto_mask -

- - - -

Call set_auto_mask for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion to masked arrays -shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_maskandscale(

self, True_or_False)

-
- -

- Inheritance: - Dataset.set_auto_maskandscale -

- - - -

Call set_auto_maskandscale for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic conversion to masked arrays -and variable scaling shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_auto_scale(

self, True_or_False)

-
- -

- Inheritance: - Dataset.set_auto_scale -

- - - -

Call set_auto_scale for all variables contained in this Dataset or -Group, as well as for all variables in all its subgroups.

-

True_or_False: Boolean determining if automatic variable scaling -shall be applied for all variables.

-

Note: Calling this function only affects existing variables. Variables created -after calling this function will follow the default behaviour.

-
-
- -
- - -
-
-

def set_fill_off(

self)

-
- -

- Inheritance: - Dataset.set_fill_off -

- - - -

Sets the fill mode for a Dataset open for writing to off.

-

This will prevent the data from being pre-filled with fill values, which -may result in some performance improvements. However, you must then make -sure the data is actually written before being read.

-
-
- -
- - -
-
-

def set_fill_on(

self)

-
- -

- Inheritance: - Dataset.set_fill_on -

- - - -

Sets the fill mode for a Dataset open for writing to on.

-

This causes data to be pre-filled with fill values. The fill values can be -controlled by the variable's _Fill_Value attribute, but is usually -sufficient to the use the netCDF default _Fill_Value (defined -separately for each variable type). The default behavior of the netCDF -library corresponds to set_fill_on. Data which are equal to the -_Fill_Value indicate that the variable was created, but never written -to.

-
-
- -
- - -
-
-

def setncattr(

self,name,value)

-
- -

- Inheritance: - Dataset.setncattr -

- - - -

set a netCDF dataset or group attribute using name,value pair. -Use if you need to set a netCDF attribute with the -with the same name as one of the reserved python attributes.

-
-
- -
- - -
-
-

def setncattr_string(

self,name,value)

-
- -

- Inheritance: - Dataset.setncattr_string -

- - - -

set a netCDF dataset or group string attribute using name,value pair. -Use if you need to ensure that a netCDF attribute is created with type -NC_STRING if the file format is NETCDF4. -Use if you need to set an attribute to an array of variable-length strings.

-
-
- -
- - -
-
-

def setncatts(

self,attdict)

-
- -

- Inheritance: - Dataset.setncatts -

- - - -

set a bunch of netCDF dataset or group attributes at once using a python dictionary. -This may be faster when setting a lot of attributes for a NETCDF3 -formatted file, since nc_redef/nc_enddef is not called in between setting -each attribute

-
-
- -
- - -
-
-

def sync(

self)

-
- -

- Inheritance: - Dataset.sync -

- - - -

Writes all buffered data in the Dataset to the disk file.

-
-
- -
- -

Methods

- -
-
-

def __init__(

self, files, check=False, aggdim=None, exclude=[])

-
- -

- Inheritance: - Dataset.__init__ -

- - - -

Open a Dataset spanning multiple files, making it look as if it was a -single file. Variables in the list of files that share the same -dimension (specified with the keyword aggdim) are aggregated. If -aggdim is not specified, the unlimited is aggregated. Currently, -aggdim must be the leftmost (slowest varying) dimension of each -of the variables to be aggregated.

-

files: either a sequence of netCDF files or a string with a -wildcard (converted to a sorted list of files using glob) The first file -in the list will become the "master" file, defining all the -variables with an aggregation dimension which may span -subsequent files. Attribute access returns attributes only from "master" -file. The files are always opened in read-only mode.

-

check: True if you want to do consistency checking to ensure the -correct variables structure for all of the netcdf files. Checking makes -the initialization of the MFDataset instance much slower. Default is -False.

-

aggdim: The name of the dimension to aggregate over (must -be the leftmost dimension of each of the variables to be aggregated). -If None (default), aggregate over the unlimited dimension.

-

exclude: A list of variable names to exclude from aggregation. -Default is an empty list.

-
-
- -
- - -
-
-

def close(

self)

-
- -

- Inheritance: - Dataset.close -

- - - -

close all the open files.

-
-
- -
- - -
-
-

def ncattrs(

self)

-
- -

- Inheritance: - Dataset.ncattrs -

- - - -

return the netcdf attribute names from the master file.

-
-
- -
- -
-
- -
-

class MFTime

- - -

Class providing an interface to a MFDataset time Variable by imposing a unique common -time unit to all files.

-

Example usage (See __init__ for more details):

-
>>> import numpy
->>> f1 = Dataset("mftest_1.nc","w", format="NETCDF4_CLASSIC")
->>> f2 = Dataset("mftest_2.nc","w", format="NETCDF4_CLASSIC")
->>> f1.createDimension("time",None)
->>> f2.createDimension("time",None)
->>> t1 = f1.createVariable("time","i",("time",))
->>> t2 = f2.createVariable("time","i",("time",))
->>> t1.units = "days since 2000-01-01"
->>> t2.units = "days since 2000-02-01"
->>> t1.calendar = "standard"
->>> t2.calendar = "standard"
->>> t1[:] = numpy.arange(31)
->>> t2[:] = numpy.arange(30)
->>> f1.close()
->>> f2.close()
->>> # Read the two files in at once, in one Dataset.
->>> f = MFDataset("mftest*nc")
->>> t = f.variables["time"]
->>> print t.units
-days since 2000-01-01
->>> print t[32] # The value written in the file, inconsistent with the MF time units.
-1
->>> T = MFTime(t)
->>> print T[32]
-32
-
-
-
- - -
-

Ancestors (in MRO)

-
    -
  • MFTime
  • -
  • netCDF4._netCDF4._Variable
  • -
  • __builtin__.object
  • -
-

Methods

- -
-
-

def __init__(

self, time, units=None)

-
- - - - -

Create a time Variable with units consistent across a multifile -dataset.

-

time: Time variable from a MFDataset.

-

units: Time units, for example, days since 1979-01-01. If None, use -the units from the master variable.

-
-
- -
- - -
-
-

def ncattrs(

...)

-
- - - - -
-
- -
- - -
-
-

def set_auto_chartostring(

...)

-
- - - - -
-
- -
- - -
-
-

def set_auto_mask(

...)

-
- - - - -
-
- -
- - -
-
-

def set_auto_maskandscale(

...)

-
- - - - -
-
- -
- - -
-
-

def set_auto_scale(

...)

-
- - - - -
-
- -
- - -
-
-

def typecode(

...)

-
- - - - -
-
- -
- -
-
- -
-

class VLType

- - -

A VLType instance is used to describe a variable length (VLEN) data -type, and can be passed to the the createVariable method of -a Dataset or Group instance. See -__init__ for more details.

-

The instance variables dtype and name should not be modified by -the user.

-
-
- - -
-

Ancestors (in MRO)

-
    -
  • VLType
  • -
  • __builtin__.object
  • -
-

Class variables

-
-

var dtype

- - - - -

A numpy dtype object describing the component type for the VLEN.

-
-
- -
-
-

var name

- - - - -

String name.

-
-
- -
-

Static methods

- -
-
-

def __init__(

group, datatype, datatype_name)

-
- - - - -

VLType constructor.

-

group: Group instance to associate with the VLEN datatype.

-

datatype: An numpy dtype object describing the component type for the -variable length array.

-

datatype_name: a Python string containing a description of the -VLEN data type.

-

Note: VLType instances should be created using the -createVLType -method of a Dataset or Group instance, not using this class directly.

-
-
- -
- -
-
- -
-

class Variable

- - -

A netCDF Variable is used to read and write netCDF data. They are -analogous to numpy array objects. See __init__ for more -details.

-

A list of attribute names corresponding to netCDF attributes defined for -the variable can be obtained with the ncattrs method. These -attributes can be created by assigning to an attribute of the -Variable instance. A dictionary containing all the netCDF attribute -name/value pairs is provided by the __dict__ attribute of a -Variable instance.

-

The following class variables are read-only:

-

dimensions: A tuple containing the names of the -dimensions associated with this variable.

-

dtype: A numpy dtype object describing the -variable's data type.

-

ndim: The number of variable dimensions.

-

shape: A tuple with the current shape (length of all dimensions).

-

scale: If True, scale_factor and add_offset are -applied, and signed integer data is automatically converted to -unsigned integer data if the _Unsigned attribute is set. -Default is True, can be reset using set_auto_scale and -set_auto_maskandscale methods.

-

mask: If True, data is automatically converted to/from masked -arrays when missing values or fill values are present. Default is True, can be -reset using set_auto_mask and set_auto_maskandscale -methods.

-

chartostring: If True, data is automatically converted to/from character -arrays to string arrays when the _Encoding variable attribute is set. -Default is True, can be reset using -set_auto_chartostring method.

-

least_significant_digit: Describes the power of ten of the -smallest decimal place in the data the contains a reliable value. Data is -truncated to this decimal place when it is assigned to the Variable -instance. If None, the data is not truncated.

-

__orthogonal_indexing__: Always True. Indicates to client code -that the object supports 'orthogonal indexing', which means that slices -that are 1d arrays or lists slice along each dimension independently. This -behavior is similar to Fortran or Matlab, but different than numpy.

-

datatype: numpy data type (for primitive data types) or VLType/CompoundType - instance (for compound or vlen data types).

-

name: String name.

-

size: The number of stored elements.

-
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var chartostring

- - - - -

If True, data is automatically converted to/from character -arrays to string arrays when _Encoding variable attribute is set. -Default is True, can be reset using -set_auto_chartostring method.

-
-
- -
-
-

var datatype

- - - - -

numpy data type (for primitive data types) or -VLType/CompoundType/EnumType instance (for compound, vlen or enum -data types).

-
-
- -
-
-

var dimensions

- - - - -

A tuple containing the names of the -dimensions associated with this variable.

-
-
- -
-
-

var dtype

- - - - -

A numpy dtype object describing the -variable's data type.

-
-
- -
-
-

var mask

- - - - -

If True, data is automatically converted to/from masked -arrays when missing values or fill values are present. Default is True, can be -reset using set_auto_mask and set_auto_maskandscale -methods.

-
-
- -
-
-

var name

- - - - -

String name.

-
-
- -
-
-

var ndim

- - - - -

The number of variable dimensions.

-
-
- -
-
-

var scale

- - - - -

if True, scale_factor and add_offset are -applied, and signed integer data is converted to unsigned -integer data if the _Unsigned attribute is set. -Default is True, can be reset using set_auto_scale and -set_auto_maskandscale methods.

-
-
- -
-
-

var shape

- - - - -

A tuple with the current shape (length of all dimensions).

-
-
- -
-
-

var size

- - - - -

The number of stored elements.

-
-
- -
-

Static methods

- -
-
-

def __init__(

self, group, name, datatype, dimensions=(), zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, endian='native', least_significant_digit=None,fill_value=None)

-
- - - - -

Variable constructor.

-

group: Group or Dataset instance to associate with variable.

-

name: Name of the variable.

-

datatype: Variable data type. Can be specified by providing a -numpy dtype object, or a string that describes a numpy dtype object. -Supported values, corresponding to str attribute of numpy dtype -objects, include 'f4' (32-bit floating point), 'f8' (64-bit floating -point), 'i4' (32-bit signed integer), 'i2' (16-bit signed integer), -'i8' (64-bit signed integer), 'i4' (8-bit signed integer), 'i1' -(8-bit signed integer), 'u1' (8-bit unsigned integer), 'u2' (16-bit -unsigned integer), 'u4' (32-bit unsigned integer), 'u8' (64-bit -unsigned integer), or 'S1' (single-character string). From -compatibility with Scientific.IO.NetCDF, the old Numeric single character -typecodes can also be used ('f' instead of 'f4', 'd' instead of -'f8', 'h' or 's' instead of 'i2', 'b' or 'B' instead of -'i1', 'c' instead of 'S1', and 'i' or 'l' instead of -'i4'). datatype can also be a CompoundType instance -(for a structured, or compound array), a VLType instance -(for a variable-length array), or the python str builtin -(for a variable-length string array). Numpy string and unicode datatypes with -length greater than one are aliases for str.

-

dimensions: a tuple containing the variable's dimension names -(defined previously with createDimension). Default is an empty tuple -which means the variable is a scalar (and therefore has no dimensions).

-

zlib: if True, data assigned to the Variable -instance is compressed on disk. Default False.

-

complevel: the level of zlib compression to use (1 is the fastest, -but poorest compression, 9 is the slowest but best compression). Default 4. -Ignored if zlib=False.

-

shuffle: if True, the HDF5 shuffle filter is applied -to improve compression. Default True. Ignored if zlib=False.

-

fletcher32: if True (default False), the Fletcher32 checksum -algorithm is used for error detection.

-

contiguous: if True (default False), the variable data is -stored contiguously on disk. Default False. Setting to True for -a variable with an unlimited dimension will trigger an error.

-

chunksizes: Can be used to specify the HDF5 chunksizes for each -dimension of the variable. A detailed discussion of HDF chunking and I/O -performance is available -here. -Basically, you want the chunk size for each dimension to match as -closely as possible the size of the data block that users will read -from the file. chunksizes cannot be set if contiguous=True.

-

endian: Can be used to control whether the -data is stored in little or big endian format on disk. Possible -values are little, big or native (default). The library -will automatically handle endian conversions when the data is read, -but if the data is always going to be read on a computer with the -opposite format as the one used to create the file, there may be -some performance advantage to be gained by setting the endian-ness. -For netCDF 3 files (that don't use HDF5), only endian='native' is allowed.

-

The zlib, complevel, shuffle, fletcher32, contiguous and chunksizes -keywords are silently ignored for netCDF 3 files that do not use HDF5.

-

least_significant_digit: If specified, variable data will be -truncated (quantized). In conjunction with zlib=True this produces -'lossy', but significantly more efficient compression. For example, if -least_significant_digit=1, data will be quantized using -around(scaledata)/scale, where scale = 2*bits, and bits is determined -so that a precision of 0.1 is retained (in this case bits=4). Default is -None, or no quantization.

-

fill_value: If specified, the default netCDF _FillValue (the -value that the variable gets filled with before any data is written to it) -is replaced with this value. If fill_value is set to False, then -the variable is not pre-filled. The default netCDF fill values can be found -in netCDF4.default_fillvals.

-

Note: Variable instances should be created using the -createVariable method of a Dataset or -Group instance, not using this class directly.

-
-
- -
- - -
-
-

def assignValue(

self, val)

-
- - - - -

assign a value to a scalar variable. Provided for compatibility with -Scientific.IO.NetCDF, can also be done by assigning to an Ellipsis slice ([...]).

-
-
- -
- - -
-
-

def chunking(

self)

-
- - - - -

return variable chunking information. If the dataset is -defined to be contiguous (and hence there is no chunking) the word 'contiguous' -is returned. Otherwise, a sequence with the chunksize for -each dimension is returned.

-
-
- -
- - -
-
-

def delncattr(

self,name,value)

-
- - - - -

delete a netCDF variable attribute. Use if you need to delete a -netCDF attribute with the same name as one of the reserved python -attributes.

-
-
- -
- - -
-
-

def endian(

self)

-
- - - - -

return endian-ness (little,big,native) of variable (as stored in HDF5 file).

-
-
- -
- - -
-
-

def filters(

self)

-
- - - - -

return dictionary containing HDF5 filter parameters.

-
-
- -
- - -
-
-

def getValue(

self)

-
- - - - -

get the value of a scalar variable. Provided for compatibility with -Scientific.IO.NetCDF, can also be done by slicing with an Ellipsis ([...]).

-
-
- -
- - -
-
-

def get_var_chunk_cache(

self)

-
- - - - -

return variable chunk cache information in a tuple (size,nelems,preemption). -See netcdf C library documentation for nc_get_var_chunk_cache for -details.

-
-
- -
- - -
-
-

def getncattr(

self,name)

-
- - - - -

retrieve a netCDF variable attribute. Use if you need to set a -netCDF attribute with the same name as one of the reserved python -attributes.

-

option kwarg encoding can be used to specify the -character encoding of a string attribute (default is utf-8).

-
-
- -
- - -
-
-

def group(

self)

-
- - - - -

return the group that this Variable is a member of.

-
-
- -
- - -
-
-

def ncattrs(

self)

-
- - - - -

return netCDF attribute names for this Variable in a list.

-
-
- -
- - -
-
-

def renameAttribute(

self, oldname, newname)

-
- - - - -

rename a Variable attribute named oldname to newname.

-
-
- -
- - -
-
-

def set_auto_chartostring(

self,chartostring)

-
- - - - -

turn on or off automatic conversion of character variable data to and -from numpy fixed length string arrays when the _Encoding variable attribute -is set.

-

If chartostring is set to True, when data is read from a character variable -(dtype = S1) that has an _Encoding attribute, it is converted to a numpy -fixed length unicode string array (dtype = UN, where N is the length -of the the rightmost dimension of the variable). The value of _Encoding -is the unicode encoding that is used to decode the bytes into strings.

-

When numpy string data is written to a variable it is converted back to -indiviual bytes, with the number of bytes in each string equalling the -rightmost dimension of the variable.

-

The default value of chartostring is True -(automatic conversions are performed).

-
-
- -
- - -
-
-

def set_auto_mask(

self,mask)

-
- - - - -

turn on or off automatic conversion of variable data to and -from masked arrays .

-

If mask is set to True, when data is read from a variable -it is converted to a masked array if any of the values are exactly -equal to the either the netCDF _FillValue or the value specified by the -missing_value variable attribute. The fill_value of the masked array -is set to the missing_value attribute (if it exists), otherwise -the netCDF _FillValue attribute (which has a default value -for each data type). When data is written to a variable, the masked -array is converted back to a regular numpy array by replacing all the -masked values by the missing_value attribute of the variable (if it -exists). If the variable has no missing_value attribute, the _FillValue -is used instead. If the variable has valid_min/valid_max and -missing_value attributes, data outside the specified range will be -set to missing_value.

-

The default value of mask is True -(automatic conversions are performed).

-
-
- -
- - -
-
-

def set_auto_maskandscale(

self,maskandscale)

-
- - - - -

turn on or off automatic conversion of variable data to and -from masked arrays, automatic packing/unpacking of variable -data using scale_factor and add_offset attributes and -automatic conversion of signed integer data to unsigned integer -data if the _Unsigned attribute exists.

-

If maskandscale is set to True, when data is read from a variable -it is converted to a masked array if any of the values are exactly -equal to the either the netCDF _FillValue or the value specified by the -missing_value variable attribute. The fill_value of the masked array -is set to the missing_value attribute (if it exists), otherwise -the netCDF _FillValue attribute (which has a default value -for each data type). When data is written to a variable, the masked -array is converted back to a regular numpy array by replacing all the -masked values by the missing_value attribute of the variable (if it -exists). If the variable has no missing_value attribute, the _FillValue -is used instead. If the variable has valid_min/valid_max and -missing_value attributes, data outside the specified range will be -set to missing_value.

-

If maskandscale is set to True, and the variable has a -scale_factor or an add_offset attribute, then data read -from that variable is unpacked using::

-
data = self.scale_factor*data + self.add_offset
-
- - -

When data is written to a variable it is packed using::

-
data = (data - self.add_offset)/self.scale_factor
-
- - -

If either scale_factor is present, but add_offset is missing, add_offset -is assumed zero. If add_offset is present, but scale_factor is missing, -scale_factor is assumed to be one. -For more information on how scale_factor and add_offset can be -used to provide simple compression, see the -PSD metadata conventions.

-

In addition, if maskandscale is set to True, and if the variable has an -attribute _Unsigned set, and the variable has a signed integer data type, -a view to the data is returned with the corresponding unsigned integer data type. -This convention is used by the netcdf-java library to save unsigned integer -data in NETCDF3 or NETCDF4_CLASSIC files (since the NETCDF3 -data model does not have unsigned integer data types).

-

The default value of maskandscale is True -(automatic conversions are performed).

-
-
- -
- - -
-
-

def set_auto_scale(

self,scale)

-
- - - - -

turn on or off automatic packing/unpacking of variable -data using scale_factor and add_offset attributes. -Also turns on and off automatic conversion of signed integer data -to unsigned integer data if the variable has an _Unsigned -attribute.

-

If scale is set to True, and the variable has a -scale_factor or an add_offset attribute, then data read -from that variable is unpacked using::

-
data = self.scale_factor*data + self.add_offset
-
- - -

When data is written to a variable it is packed using::

-
data = (data - self.add_offset)/self.scale_factor
-
- - -

If either scale_factor is present, but add_offset is missing, add_offset -is assumed zero. If add_offset is present, but scale_factor is missing, -scale_factor is assumed to be one. -For more information on how scale_factor and add_offset can be -used to provide simple compression, see the -PSD metadata conventions.

-

In addition, if scale is set to True, and if the variable has an -attribute _Unsigned set, and the variable has a signed integer data type, -a view to the data is returned with the corresponding unsigned integer datatype. -This convention is used by the netcdf-java library to save unsigned integer -data in NETCDF3 or NETCDF4_CLASSIC files (since the NETCDF3 -data model does not have unsigned integer data types).

-

The default value of scale is True -(automatic conversions are performed).

-
-
- -
- - -
-
-

def set_collective(

self,True_or_False)

-
- - - - -

turn on or off collective parallel IO access. Ignored if file is not -open for parallel access.

-
-
- -
- - -
-
-

def set_var_chunk_cache(

self,size=None,nelems=None,preemption=None)

-
- - - - -

change variable chunk cache settings. -See netcdf C library documentation for nc_set_var_chunk_cache for -details.

-
-
- -
- - -
-
-

def setncattr(

self,name,value)

-
- - - - -

set a netCDF variable attribute using name,value pair. Use if you need to set a -netCDF attribute with the same name as one of the reserved python -attributes.

-
-
- -
- - -
-
-

def setncattr_string(

self,name,value)

-
- - - - -

set a netCDF variable string attribute using name,value pair. -Use if you need to ensure that a netCDF attribute is created with type -NC_STRING if the file format is NETCDF4. -Use if you need to set an attribute to an array of variable-length strings.

-
-
- -
- - -
-
-

def setncatts(

self,attdict)

-
- - - - -

set a bunch of netCDF variable attributes at once using a python dictionary. -This may be faster when setting a lot of attributes for a NETCDF3 -formatted file, since nc_redef/nc_enddef is not called in between setting -each attribute

-
-
- -
- - -
-
-

def use_nc_get_vars(

self,_no_get_vars)

-
- - - - -

enable the use of netcdf library routine nc_get_vars -to retrieve strided variable slices. By default, -nc_get_vars not used since it slower than multiple calls -to the unstrided read routine nc_get_vara in most cases.

-
-
- -
- -
-
- -
- -
-
- -
- - diff --git a/examples/bench.py b/examples/bench.py index f3ad75246..08f95d48f 100644 --- a/examples/bench.py +++ b/examples/bench.py @@ -1,9 +1,14 @@ # benchmark reads and writes, with and without compression. # tests all four supported file formats. +from typing import TYPE_CHECKING, Any from numpy.random.mtrand import uniform import netCDF4 from timeit import Timer import os, sys +if TYPE_CHECKING: + from netCDF4 import Format as NCFormat +else: + NCFormat = Any # create an n1dim by n2dim by n3dim random array. n1dim = 30 @@ -14,7 +19,7 @@ sys.stdout.write('reading and writing a %s by %s by %s by %s random array ..\n'%(n1dim,n2dim,n3dim,n4dim)) array = uniform(size=(n1dim,n2dim,n3dim,n4dim)) -def write_netcdf(filename,zlib=False,least_significant_digit=None,format='NETCDF4'): +def write_netcdf(filename,zlib=False,least_significant_digit=None,format: NCFormat='NETCDF4'): file = netCDF4.Dataset(filename,'w',format=format) file.createDimension('n1', n1dim) file.createDimension('n2', n2dim) diff --git a/examples/bench_compress.py b/examples/bench_compress.py index 2b4680c55..f094a6ffa 100644 --- a/examples/bench_compress.py +++ b/examples/bench_compress.py @@ -1,9 +1,15 @@ # benchmark reads and writes, with and without compression. # tests all four supported file formats. +from typing import TYPE_CHECKING, Any from numpy.random.mtrand import uniform import netCDF4 +import netCDF4.utils from timeit import Timer import os, sys +if TYPE_CHECKING: + from netCDF4 import CompressionLevel +else: + CompressionLevel = Any # create an n1dim by n2dim by n3dim random array. n1dim = 30 @@ -15,8 +21,7 @@ sys.stdout.write('(average of %s trials)\n' % ntrials) array = netCDF4.utils._quantize(uniform(size=(n1dim,n2dim,n3dim,n4dim)),4) - -def write_netcdf(filename,zlib=False,shuffle=False,complevel=6): +def write_netcdf(filename,zlib=False,shuffle=False,complevel: CompressionLevel = 6): file = netCDF4.Dataset(filename,'w',format='NETCDF4') file.createDimension('n1', n1dim) file.createDimension('n2', n2dim) diff --git a/examples/bench_compress3.py b/examples/bench_compress3.py index c503cefee..78ff89e8c 100644 --- a/examples/bench_compress3.py +++ b/examples/bench_compress3.py @@ -1,4 +1,3 @@ -from __future__ import print_function # benchmark reads and writes, with and without compression. # tests all four supported file formats. from numpy.random.mtrand import uniform diff --git a/examples/bench_compress4.py b/examples/bench_compress4.py new file mode 100644 index 000000000..d8f643935 --- /dev/null +++ b/examples/bench_compress4.py @@ -0,0 +1,57 @@ +# benchmark reads and writes, with and without compression. +# tests all four supported file formats. +from typing import Literal +from numpy.random.mtrand import uniform +import netCDF4 +from timeit import Timer +import os, sys + +# use real data. +URL="http://www.esrl.noaa.gov/psd/thredds/dodsC/Datasets/ncep.reanalysis/pressure/hgt.1990.nc" +nc = netCDF4.Dataset(URL) + +# use real 500 hPa geopotential height data. +n1dim = 100 +n3dim = 73 +n4dim = 144 +ntrials = 10 +sys.stdout.write('reading and writing a %s by %s by %s random array ..\n'%(n1dim,n3dim,n4dim)) +sys.stdout.write('(average of %s trials)\n\n' % ntrials) +array = nc.variables['hgt'][0:n1dim,5,:,:] + + +def write_netcdf( + filename, + nsd, + quantize_mode: Literal["BitGroom", "BitRound", "GranularBitRound"] = "BitGroom" + ): + file = netCDF4.Dataset(filename,'w',format='NETCDF4') + file.createDimension('n1', None) + file.createDimension('n3', n3dim) + file.createDimension('n4', n4dim) + foo = file.createVariable('data',\ + 'f4',('n1','n3','n4'),\ + zlib=True,shuffle=True,\ + quantize_mode=quantize_mode,\ + significant_digits=nsd) + foo[:] = array + file.close() + +def read_netcdf(filename): + file = netCDF4.Dataset(filename) + data = file.variables['data'][:] + file.close() + +for sigdigits in range(1,5,1): + sys.stdout.write('testing compression with significant_digits=%s...\n' %\ + sigdigits) + write_netcdf('test.nc',sigdigits) + read_netcdf('test.nc') + # print out size of resulting files with standard quantization. + sys.stdout.write('size of test.nc = %s\n'%repr(os.stat('test.nc').st_size)) + sys.stdout.write("testing compression with significant_digits=%s and 'GranularBitRound'...\n" %\ + sigdigits) + write_netcdf('test.nc',sigdigits,quantize_mode='GranularBitRound') + read_netcdf('test.nc') + # print out size of resulting files with alternate quantization. + sys.stdout.write('size of test.nc = %s\n'%repr(os.stat('test.nc').st_size)) diff --git a/examples/bench_diskless.py b/examples/bench_diskless.py index dd7a78315..076f446b4 100644 --- a/examples/bench_diskless.py +++ b/examples/bench_diskless.py @@ -1,9 +1,14 @@ # benchmark reads and writes, with and without compression. # tests all four supported file formats. +from typing import TYPE_CHECKING, Any, Literal from numpy.random.mtrand import uniform import netCDF4 from timeit import Timer import os, sys +if TYPE_CHECKING: + from netCDF4 import Format as NCFormat +else: + NCFormat = Any # create an n1dim by n2dim by n3dim random array. n1dim = 30 @@ -14,7 +19,7 @@ sys.stdout.write('reading and writing a %s by %s by %s by %s random array ..\n'%(n1dim,n2dim,n3dim,n4dim)) array = uniform(size=(n1dim,n2dim,n3dim,n4dim)) -def write_netcdf(filename,zlib=False,least_significant_digit=None,format='NETCDF4',closeit=False): +def write_netcdf(filename, zlib=False, least_significant_digit=None, format: NCFormat='NETCDF4',closeit=False): file = netCDF4.Dataset(filename,'w',format=format,diskless=True,persist=True) file.createDimension('n1', n1dim) file.createDimension('n2', n2dim) @@ -42,13 +47,13 @@ def read_netcdf(ncfile): sys.stdout.write('writing took %s seconds\n' %\ repr(sum(t.repeat(ntrials,1))/ntrials)) # test reading. - ncfile = write_netcdf('test1.nc',format=format) + ncfile = write_netcdf('test1.nc',format=format) # type: ignore t = Timer("read_netcdf(ncfile)","from __main__ import read_netcdf,ncfile") sys.stdout.write('reading took %s seconds\n' % repr(sum(t.repeat(ntrials,1))/ntrials)) # test diskless=True in nc_open -format='NETCDF3_CLASSIC' +format: Literal["NETCDF3_CLASSIC"] = 'NETCDF3_CLASSIC' # mypy should know this but it needs help... trials=50 sys.stdout.write('test caching of file in memory on open for %s\n' % format) sys.stdout.write('testing file format %s ...\n' % format) diff --git a/examples/complex_numbers.py b/examples/complex_numbers.py new file mode 100644 index 000000000..51d7a61f1 --- /dev/null +++ b/examples/complex_numbers.py @@ -0,0 +1,51 @@ +import netCDF4 +import numpy as np + +complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j], dtype="c16") +np_dt = np.dtype([("r", np.float64), ("i", np.float64)]) +complex_struct_array = np.array( + [(r, i) for r, i in zip(complex_array.real, complex_array.imag)], + dtype=np_dt, +) + +print("\n**********") +print("Reading a file that uses a dimension for complex numbers") +filename = "complex_numbers_as_dimension.nc" + +with netCDF4.Dataset(filename, "w") as f: + f.createDimension("x", size=len(complex_array)) + f.createDimension("complex", size=2) + c_ri = f.createVariable("data_dim", np.float64, ("x", "complex")) + as_dim_array = np.vstack((complex_array.real, complex_array.imag)).T + c_ri[:] = as_dim_array + +with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + print(f["data_dim"]) + + +print("\n**********") +print("Reading a file that uses a compound datatype for complex numbers") +filename = "complex_numbers_as_datatype.nc" + +with netCDF4.Dataset(filename, "w") as f: + f.createDimension("x", size=len(complex_array)) + nc_dt = f.createCompoundType(np_dt, "nc_complex") + breakpoint() + + c_struct = f.createVariable("data_struct", nc_dt, ("x",)) + c_struct[:] = complex_struct_array + +with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + print(f["data_struct"]) + +print("\n**********") +print("Writing complex numbers to a file") +filename = "writing_complex_numbers.nc" +with netCDF4.Dataset(filename, "w", auto_complex=True) as f: + f.createDimension("x", size=len(complex_array)) + c_var = f.createVariable("data", np.complex128, ("x",)) + c_var[:] = complex_array + print(c_var) + +with netCDF4.Dataset(filename, "r", auto_complex=True) as f: + print(f["data"]) diff --git a/examples/json_att.py b/examples/json_att.py index 7d95a7e89..59e5c1241 100644 --- a/examples/json_att.py +++ b/examples/json_att.py @@ -4,7 +4,7 @@ # can be serialized as strings, saved as netCDF attributes, # and then converted back to python objects using json. ds = Dataset('json.nc', 'w') -ds.pythonatt1 = json.dumps([u'foo', {u'bar': [u'baz', None, 1.0, 2]}]) +ds.pythonatt1 = json.dumps(['foo', {'bar': ['baz', None, 1.0, 2]}]) ds.pythonatt2 = "true" # converted to bool ds.pythonatt3 = "null" # converted to None print(ds) diff --git a/examples/mpi_example.py b/examples/mpi_example.py index afac52ddb..93ca57bc7 100644 --- a/examples/mpi_example.py +++ b/examples/mpi_example.py @@ -1,29 +1,46 @@ # to run: mpirun -np 4 python mpi_example.py +import sys from mpi4py import MPI import numpy as np from netCDF4 import Dataset + + +nc_format = 'NETCDF4_CLASSIC' if len(sys.argv) < 2 else sys.argv[1] + rank = MPI.COMM_WORLD.rank # The process ID (integer 0-3 for 4-process run) -nc = Dataset('parallel_test.nc', 'w', parallel=True, comm=MPI.COMM_WORLD, - info=MPI.Info()) +if rank == 0: + print('Creating file with format {}'.format(nc_format)) +nc = Dataset( + "parallel_test.nc", + "w", + parallel=True, + comm=MPI.COMM_WORLD, + info=MPI.Info(), + format=nc_format, # type: ignore # we'll assume it's OK +) # below should work also - MPI_COMM_WORLD and MPI_INFO_NULL will be used. #nc = Dataset('parallel_test.nc', 'w', parallel=True) d = nc.createDimension('dim',4) -v = nc.createVariable('var', np.int, 'dim') +v = nc.createVariable('var', np.int32, 'dim') v[rank] = rank + # switch to collective mode, rewrite the data. v.set_collective(True) v[rank] = rank nc.close() + # reopen the file read-only, check the data nc = Dataset('parallel_test.nc', parallel=True, comm=MPI.COMM_WORLD, - info=MPI.Info()) + info=MPI.Info()) assert rank==nc['var'][rank] nc.close() + # reopen the file in append mode, modify the data on the last rank. nc = Dataset('parallel_test.nc', 'a',parallel=True, comm=MPI.COMM_WORLD, - info=MPI.Info()) + info=MPI.Info()) if rank == 3: v[rank] = 2*rank nc.close() + # reopen the file read-only again, check the data. # leave out the comm and info kwargs to check that the defaults # (MPI_COMM_WORLD and MPI_INFO_NULL) work. diff --git a/examples/mpi_example_compressed.py b/examples/mpi_example_compressed.py new file mode 100644 index 000000000..ece1d1ee2 --- /dev/null +++ b/examples/mpi_example_compressed.py @@ -0,0 +1,18 @@ +# to run: mpirun -np 4 python mpi_example_compressed.py +import sys +from mpi4py import MPI +import numpy as np +from netCDF4 import Dataset +rank = MPI.COMM_WORLD.rank # The process ID (integer 0-3 for 4-process run) +nc = Dataset('parallel_test_compressed.nc', 'w', parallel=True) +d = nc.createDimension('dim',4) +v = nc.createVariable('var', np.int32, 'dim', zlib=True) +v[:] = np.arange(4) +nc.close() +# read compressed files in parallel, check the data, try to rewrite some data +nc = Dataset('parallel_test_compressed.nc', 'a', parallel=True) +v = nc['var'] +assert rank==v[rank] +v.set_collective(True) # issue #1108 (var must be in collective mode or write will fail) +v[rank]=2*rank +nc.close() diff --git a/examples/reading_netCDF.ipynb b/examples/reading_netCDF.ipynb index 9e8070a6f..95d33957d 100644 --- a/examples/reading_netCDF.ipynb +++ b/examples/reading_netCDF.ipynb @@ -41,9 +41,8 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": { - "collapsed": false, "internals": { "frag_number": 2, "slide_helper": "subslide_end" @@ -79,9 +78,8 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 2, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 4, @@ -97,7 +95,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "root group (NETCDF4_CLASSIC data model, file format HDF5):\n", " Conventions: CF-1.0\n", " title: HYCOM ATLb2.00\n", @@ -106,9 +104,8 @@ " experiment: 90.9\n", " history: archv2ncdf3z\n", " dimensions(sizes): MT(1), Y(850), X(712), Depth(10)\n", - " variables(dimensions): float64 \u001b[4mMT\u001b[0m(MT), float64 \u001b[4mDate\u001b[0m(MT), float32 \u001b[4mDepth\u001b[0m(Depth), int32 \u001b[4mY\u001b[0m(Y), int32 \u001b[4mX\u001b[0m(X), float32 \u001b[4mLatitude\u001b[0m(Y,X), float32 \u001b[4mLongitude\u001b[0m(Y,X), float32 \u001b[4mu\u001b[0m(MT,Depth,Y,X), float32 \u001b[4mv\u001b[0m(MT,Depth,Y,X), float32 \u001b[4mtemperature\u001b[0m(MT,Depth,Y,X), float32 \u001b[4msalinity\u001b[0m(MT,Depth,Y,X)\n", - " groups: \n", - "\n" + " variables(dimensions): float64 MT(MT), float64 Date(MT), float32 Depth(Depth), int32 Y(Y), int32 X(X), float32 Latitude(Y, X), float32 Longitude(Y, X), float32 u(MT, Depth, Y, X), float32 v(MT, Depth, Y, X), float32 temperature(MT, Depth, Y, X), float32 salinity(MT, Depth, Y, X)\n", + " groups: \n" ] } ], @@ -138,9 +135,8 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 3, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 6, @@ -156,14 +152,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "[u'MT', u'Date', u'Depth', u'Y', u'X', u'Latitude', u'Longitude', u'u', u'v', u'temperature', u'salinity']\n", - "\n", + "dict_keys(['MT', 'Date', 'Depth', 'Y', 'X', 'Latitude', 'Longitude', 'u', 'v', 'temperature', 'salinity'])\n", + "\n", "float32 temperature(MT, Depth, Y, X)\n", " coordinates: Longitude Latitude Date\n", " standard_name: sea_water_potential_temperature\n", " units: degC\n", - " _FillValue: 1.26765e+30\n", - " valid_range: [ -5.07860279 11.14989948]\n", + " _FillValue: 1.2676506e+30\n", + " valid_range: [-5.078603 11.1498995]\n", " long_name: temp [90.9H]\n", "unlimited dimensions: MT\n", "current shape = (1, 10, 850, 712)\n", @@ -199,9 +195,8 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 4, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 8 @@ -215,14 +210,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "(u'MT', (unlimited): name = 'MT', size = 1\n", - ")\n", - "(u'Y', : name = 'Y', size = 850\n", - ")\n", - "(u'X', : name = 'X', size = 712\n", - ")\n", - "(u'Depth', : name = 'Depth', size = 10\n", - ")\n" + "('MT', (unlimited): name = 'MT', size = 1)\n", + "('Y', : name = 'Y', size = 850)\n", + "('X', : name = 'X', size = 712)\n", + "('Depth', : name = 'Depth', size = 10)\n" ] } ], @@ -248,9 +239,8 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 5, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 10 @@ -263,10 +253,10 @@ { "data": { "text/plain": [ - "(u'MT', u'Depth', u'Y', u'X')" + "('MT', 'Depth', 'Y', 'X')" ] }, - "execution_count": 9, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -277,9 +267,8 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 6, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 11, @@ -297,7 +286,7 @@ "(1, 10, 850, 712)" ] }, - "execution_count": 10, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -326,9 +315,8 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 7, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 13, @@ -344,7 +332,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "float64 MT(MT)\n", " long_name: time\n", " units: days since 1900-12-31 00:00:00\n", @@ -352,16 +340,14 @@ " axis: T\n", "unlimited dimensions: MT\n", "current shape = (1,)\n", - "filling on, default _FillValue of 9.96920996839e+36 used\n", - "\n", - "\n", + "filling on, default _FillValue of 9.969209968386869e+36 used\n", + "\n", "int32 X(X)\n", " point_spacing: even\n", " axis: X\n", "unlimited dimensions: \n", "current shape = (712,)\n", - "filling on, default _FillValue of -2147483647 used\n", - "\n" + "filling on, default _FillValue of -2147483647 used\n" ] } ], @@ -395,9 +381,8 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 8, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 15 @@ -411,7 +396,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[ 41023.25]\n" + "[41023.25]\n" ] } ], @@ -422,9 +407,8 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 9, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 16 @@ -438,7 +422,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "[ 0. 100. 200. 400. 700. 1000. 2000. 3000. 4000. 5000.]\n" + "[ 0. 100. 200. 400. 700. 1000. 2000. 3000. 4000. 5000.]\n" ] } ], @@ -449,9 +433,8 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 10, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 17, @@ -496,14 +479,13 @@ "### Finding the latitude and longitude indices of 50N, 140W\n", "\n", "- The `X` and `Y` dimensions don't look like longitudes and latitudes\n", - "- Use the auxilary coordinate variables named in the `coordinates` variable attribute, `Latitude` and `Longitude`" + "- Use the auxiliary coordinate variables named in the `coordinates` variable attribute, `Latitude` and `Longitude`" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 11, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 19 @@ -517,14 +499,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", + "\n", "float32 Latitude(Y, X)\n", " standard_name: latitude\n", " units: degrees_north\n", "unlimited dimensions: \n", "current shape = (850, 712)\n", - "filling on, default _FillValue of 9.96920996839e+36 used\n", - "\n" + "filling on, default _FillValue of 9.969209968386869e+36 used\n" ] } ], @@ -552,9 +533,8 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 12, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 20, @@ -636,9 +616,8 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 13, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 25, @@ -707,9 +686,8 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 14, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 28, @@ -725,7 +703,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20150711_0600.grib2/GC\n" + "https://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_20230525_1200.grib2/GC\n" ] } ], @@ -733,7 +711,7 @@ "import datetime\n", "date = datetime.datetime.now()\n", "# build URL for latest synoptic analysis time\n", - "URL = 'http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_%04i%02i%02i_%02i%02i.grib2/GC' %\\\n", + "URL = 'https://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p5deg/GFS_Global_0p5deg_%04i%02i%02i_%02i%02i.grib2/GC' %\\\n", "(date.year,date.month,date.day,6*(date.hour//6),0)\n", "# keep moving back 6 hours until a valid URL found\n", "validURL = False; ncount = 0\n", @@ -749,9 +727,8 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 15, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 28, @@ -768,52 +745,50 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "float32 Temperature_surface(time2, lat, lon)\n", + "\n", + "float32 Temperature_surface(time1, lat, lon)\n", " long_name: Temperature @ Ground or water surface\n", " units: K\n", " abbreviation: TMP\n", " missing_value: nan\n", " grid_mapping: LatLon_Projection\n", - " coordinates: reftime time2 lat lon \n", + " coordinates: reftime time1 lat lon \n", " Grib_Variable_Id: VAR_0-0-0_L1\n", " Grib2_Parameter: [0 0 0]\n", " Grib2_Parameter_Discipline: Meteorological products\n", " Grib2_Parameter_Category: Temperature\n", " Grib2_Parameter_Name: Temperature\n", - " Grib2_Level_Type: Ground or water surface\n", + " Grib2_Level_Type: 1\n", + " Grib2_Level_Desc: Ground or water surface\n", " Grib2_Generating_Process_Type: Forecast\n", + " Grib2_Statistical_Process_Type: UnknownStatType--1\n", "unlimited dimensions: \n", - "current shape = (93, 361, 720)\n", + "current shape = (129, 361, 720)\n", "filling off\n", - "\n", - "\n", - "float64 time2(time2)\n", - " units: Hour since 2015-07-11T06:00:00Z\n", + "\n", + "float64 time1(time1)\n", + " units: Hour since 2023-05-25T12:00:00Z\n", " standard_name: time\n", " long_name: GRIB forecast or observation time\n", " calendar: proleptic_gregorian\n", " _CoordinateAxisType: Time\n", "unlimited dimensions: \n", - "current shape = (93,)\n", + "current shape = (129,)\n", "filling off\n", - "\n", - "\n", + "\n", "float32 lat(lat)\n", " units: degrees_north\n", " _CoordinateAxisType: Lat\n", "unlimited dimensions: \n", "current shape = (361,)\n", "filling off\n", - "\n", - "\n", + "\n", "float32 lon(lon)\n", " units: degrees_east\n", " _CoordinateAxisType: Lon\n", "unlimited dimensions: \n", "current shape = (720,)\n", - "filling off\n", - "\n" + "filling off\n" ] } ], @@ -849,9 +824,8 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 16, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 31 @@ -870,9 +844,9 @@ }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXIAAAD7CAYAAAB37B+tAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX+MJkeZ378Ptmf5dbPLgM8/N7dWwCc23NhGwr6EO7Fc\nwNhSgi9ShNlVyAlOEQpZQAQns2sHZ+OLHQb5yEm3AkXHDzmEmeCYHwIFDtvEm3CKzE/bY1gc2zpW\nYn322vHCDhd0s177yR/dNVNvvVXV1d3V3VX9Ph9pNO/bb/+o7q769tNPPfUUMTMEQRCEfHnR0AUQ\nBEEQ2iFCLgiCkDki5IIgCJkjQi4IgpA5IuSCIAiZI0IuCIKQOWcPcVAikphHQRCEBjAz2RY6/wC8\nGMB3ADwI4EcADpXLDwE4DuCB8u9abZuDAB4D8AiAqx37Zd9xU/1T55/jn5Rdyj0LZc+13KFld2mn\n1yJn5r8hojcz86+I6GwAf0FE3wDAAD7OzB/X1yei3QCuB7AbwEUA7iWiS5n5hZAnjSAIglCfSh85\nM/+q/DgH4BwUIg4A0+Y9cB2AVWZ+jpmPAXgcwJURyikIgiA4qBRyInoRET0I4ASAu5n5u+VP7yei\nh4jo00S0o1x2IQqXi+I4Cst8LBwZugAtODJ0AVpwZOgCNOTI0AVowZGhC9CQI0MXoAVHmm4YYpG/\nwMyXA7gYwFVE9HcAfBLAJQAuB/AkgD/27aJp4VKDmY8MXYamSNn7J9dyA/mWPddyA+3KHhy1wsyn\niOg+ANcw86ZwE9GnAHyt/PoEgJ3aZheXy6YgokPa1yM53wBBEIQuIKI9APZUrlf2hLp28ioAZ5j5\nF0T0EgDfBPBRAD9k5qfKdT4E4A3MvK/s7FxB4Re/CMC9AF7NxkGIiNkWQiMIgpAg206ub2rYxsI8\nuZZ1jUs7qyzyCwDcQURnoXDDfIGZv05E/5mILkfhNvkpgPcCADMfJaI7ARwFcAbA+0wRFwRBAADa\nB547vN56PzFEtEqozXX1Yy4tLOPm0oW8fHKpN1HX8VrknR1ULHJBmBlo31Y/WQzhNtGF03UsU1xN\n4XaJtsnSwjJuueE24PYt/VrTBjgudqxrLu0UIR8ZekXmFWuIqCB0gl73gK36p0Ty9P75aEKuhLnN\nQ+KZ7ec6f7v9rBs2P9/w/O0Tvx07+3TlvheZ6WbcxDc8fzvmz9rYbIdt3TEi5DOCaVkM8ZonzB5K\nUE0xNeufWs80MlS9fWb7uZhfKoRyfXkOAHDuqWec+9O3A7C5rULtw0aIIMckhrU+GiE3X2PWLHlb\nun69SRnXK6IIutA1Pp/36f3zU8ts637vldsnvle15ZtxEwPAP6bbwgs6ALE0KVshV0KtLoQp5Eq4\nRKgmMS2k0688Vvywd1FcLkJtaB84pN64LHObkAPAR1ZuAjApxKboTblsLiSs+UauJMjMC7nCaXnf\nUC6/fXatcEGIgcvtUWf7qm1NUX5odXp1m5BPuWxu256FmMf2DoxOyGfZfdIUsxFhdQ3Mi3IdhU6x\nWemq47OOK8UUdHPbFHnDs6eieguaxpEngwh3GKZVNSXelvXnDq9PvfqK+2WchLpIYhwHcNejucPr\neGb7uTimLatq47wC2nZyqz7fxTfihudv773TMpTFD/d3rGws8tQZstO1SqxD2PSla4LeVYPvS0yE\nMFyiW3WffGJt1km9foW4U9pia49D8IZnTwGI14eXvWslZYZw+8QQ7ypEbMcD7QP36Urz1c9Tn9s2\nFSYYu4/rZtzEt9yw1YE6pD89ph6IkHdEn6O6dGILuYj2OKmqJzHvu2tA0JCoqLah/Om7zsxNDAhq\nS/Y+8uS4gVh/yncl4n1Y3nWPozfQtpEOQsesrgF7F50/V90/30jhuiGJLqoGEeWGPr5l+7s2wCvd\nH1Ms8oasP7+Nj519uujQqPlaaLNc9AbVl3h3QR1BDzlPeUA0x3V9m9Yxm5Db9ml70NdBRbNEGQlZ\nduYvLSwD6H7g0K4zc9j+rg3MHV7v5IHk0s7KiSUEO2rYsG8IsA2fiNt+19erX8r+Mcsfcm5CN/AK\nyKw3Vd+b7N8n8E1QogsUrpHQhFYuTu+fxy24lZZPLuEuvhF38Y1tdudkkZmULrgGQHXFaCzyIQRw\n/fltfPtZN+AW3Bp0jK6FzBZGOAuk+pDr0z9dlzZvgCFRKm3EXYUmzp+1semiWPwwQH/FzuOHsjkS\nvIMBReoNokt346g7O32VJKVGbitnDFeKHtplG3RhivsYBT/0PvcR+tjqfpaRJURrXCfCpGlHo2sc\ngY8mQu5b14USdKDIRrh8cgmn9883atP6fe9SzHedmcO5p57pzM8/qs7OOg1lCJGv45uMaaWHpvEc\nm4gD9fztXdWJKPdy72KxH/VfZ3Vtcx2FyxBwlUUXbfNcQwTdZWmb1zZU2H2W++n989iOjYnyNWWy\nfPPgC7t5luvpb/skK4u8S9dEVQVVy2wDZ0LwNZKYVrkqW9Ny9kFObwR1LNvOClEReVIH/dqrOuKy\nHl1vkFXr2HA9RNtY7rU61mmt2NfeRXxk5aZonZ6LHwZuvv3GYPdqW7J3rYRUmBgCoVc4s/KFZnTz\nlc01HL5LIUhZ1HOgSzEf+t70+VC11fNQC99Ev261QiBX1/AQLmtS/E2GTBeSrWslpsBVCaarUrle\n6WzLbbOgmBaQuY8if0R3jUkEvB0hVmTM8FFXveqCPutG3XDT0OtYq99j7yJ2fW6udn6WXWfqRaf1\nTfJCXgeXYKrlVbkhYuYsUcedO7zuFHDz+9gF13WOuZ97mxBLX0d0zPkt28Z2d1GONmxO9Yb653Lu\nqWfwPYSP9FTzHtTtwPQZgk07bZ3HSt210mTQiC3uVG8cthviO05Ig9L3rzdOU9irlgvD4xO9kIiN\ntuhuOGA89aOucMXulDaF1TdsP4b7ZNvJdT69f6tjVUXIqMgWoP4o1iwHBOk30mXZ6uvqVrVvail9\nPdoH74CDkMbks7yrGmGXjbSLGcubklJZXLgG0FSJhj4wJmR9E73u6P9P759PR8RX16b/GmC2vabF\nafMGra5pVwODFBsL8zR3eH0qxFElDIuap9xnkRPRiwH8TwDbULhh7mLmQ0S0AOALAH4DwDEA72Dm\nX5TbHATwHgDPA/gAM99t2a/TInf5Gn1pVpt2Mrlm9TY7Jc1j2ywml+D36e80yd1l0Qf6/YnZsEJn\njq+qO643vSa06awHYBdvFU1jiayJ5TqIaZnrkWdLC8vO6JWYHZoqnUeMBFqNLHJm/hsAb2bmywFc\nDuAaIroKwAEA9zDzpQC+VX4HEe0GcD2A3QCuAfAJIgqy+s0nrHnzXBMftH0q+ypzEyu7dWNx7KcJ\nqYt41VtWH8ePhRpKHjqk3OYH93WS20Q+5Pp1fn1bWOcmLis7pi+5al+LH44flTJ/1gYtMlPMLIgm\nlZ2dzPyr8uMcgHNQdC68HcCbyuV3ADiCQsyvA7DKzM8BOEZEjwO4EsD9vmOEirFrIEMTfBXcF6Wi\nW+uhQlnVeVV1vLHieuPpki4y7RVW3vTyKmvchms8gHmN6u67ahwDENAOTevbI+AxB1Y1cVf52FiY\nJ9fDdn15rsP4se6o7OwsLeofAvjbAA4z80Ei+jkzv6L8nQCcZOZXENGfArifmT9f/vYpAN9g5i8a\n+5x4PWib68HXuamvH2optbViXA0mNFwxdH8x6KvTtYuHktkpGSJIVYNgmuKqg1VuFeVCLKYxW596\nC43xhucrw8bCfJwxDJqoN528ou/cNHpmROViuYv7G9zThMadncz8QulauRjAVUT0OuN3hj8EKHpY\nTN1IljoZ1Lp4FW1jcbYRv7qv3V27OroKp1Pf1X02j6Ove3r//KD5rtWbnBJpvV6qcsXs5Ky65m0z\nC26ydxHYu5jVZN68AtpYmKdbcCtdtpdx2V5OWsR9BMeRM/MpIroPwNsAnCCi85n5KSK6AMDT5WpP\nANipbXZxuWwKIjqkfX0z9vJ9QeWwPJX1hqkqZtGgQ/YYH1fcuC+Spsl+ba4J/bPveC6hSM2VY7Oy\nbXVgShCNZP5dJ8qy1UHbOtsOb8Uj07754i1iZXKdybJPjoFQhHSQtiGHvPgxswymlFxPh4j2ANhT\nuV5F1MqrAJxh5l8Q0UsAfBPAR8sdP8vMy0R0AMAOZj5QdnauoPCLXwTgXgCvZuMgtteDqjzdQPVr\ncV3rwvfq28WgjLq4ImNCtnMxpBvHLEOsPg89EgEYfoYZXz1sMxDEPE+1Px9D3e82cd4h+26a7TEn\nNrM0avW56RD9CwDcQURnoXDDfIGZv05E9wO4k4j+EGX4IQAw81EiuhPAUQBnALzPFHEXthth3rDN\nSmdYW1Wjrlwi2KTHP4YPvQ5NHiipDjbyhZA2IVWL0ayLurC3ncihSOfQjBh1V2+TsTupQ98CXNke\nxyjmQNi5eYWcmR8G8HrL8pMA3uLY5jYArVKL+V6ZXB2dIYN6Qmlj0aaALv5mpE0qjLXRuYj1ljD1\nkDi8Fa9uw1YX1PcYuI5b5QoT/Oh6Vtwrf9tNbmTnUFaW2QmVyvDoJg3O1olZtS/z3Ju8wVQRsxNP\n79RM9eG67eQ60z7UztFRB3PfVR3W5u91r531DTlgm1TfnlKlbp1JTshduGJJ9cpkVixdnH0i4sqB\nYvs+BE3FvOp8becdep2aEjMm2IzpHzoixWRjYT5q/LMNZbn5xiK0iWWPSdeCPuaHRVX/X5JJs1QS\neFsoU9XNcg2oCInpNbcfgjox6G2OYRsY00dDiCVsesWOOVAsZ2ydoaF0Ff1SRdsR2iH7zgkz+mpq\nbMErtyP5iSVi53H2WdopEtM378sj49pfVw03ZFRhXcwKnpIl3id6ZIPKtgf4O+pjDTSKQZdCru8/\nB1x1Ws/bg1WyCnkyrpXYN7JuxfS5aOruw+bSGaKh1HGTdFG+0Nd65UtuepxZFXGgOPfQ8+96dG0K\n+xkb6uFcdd+SsMiJ1jjWnIQumligoVZLnY7Rqrj1utuF7qfPeOKQa21bp64gN0n2P3ZsGUN1unhg\ntx081JVVPnfYPvdAqkxY3nBcV4dFPqiQ225cH765VCJSTJqO/jRdSGbfgG9fXVnivjLq6+h+wCYz\nsIglN4kpBn260JrQdVscQ/2Y0MnUhBx73ce1+fFSqnxdYqvYIZ20IftRy6t88XWvdZP+B3V/2wi5\nYCdUzLtoX6HC3KVBoSfJG1udymry5Sr/bRIWtcr21rFLCKjuuFQ0DSF0hVw2cdOEjh7cfGCsVK4q\n1MTWflydnF0fuyuqLO1oycAyITmL3BSRkERRg6DnYu5BzIHq1+SmbimbZWbLh+KaTLaJO0ihLPKx\nWU4poI+QtoVrAsOmc2hybFtKalvdGWNUE+0DJx+1Akze2CYDGTon0kwoMbFZ0+afa11FiPWuRlJW\nNb6QdYR+sHUi6g/qLowhV7u1DQSrGx48d3h9KiV1iIjnjDkXsXO9VCzytlEZvWIKus0ij+R6qftK\n3LaTK3QAlbk/vZyhDXQMVlLqmHOHVrkp24q7+RDXJ85Qy0NT8vr2raiqQ2Ppf9m8ZjlY5FU3MmTA\njEs4msyObtsHr4CYF0kl0rcJNa+AXL/59u0qq0uUQyxiff0qzAbl26aOD95G7g0rR0JCQOsIeEgn\nvBIg3bI03xLNuqzXYf3NssnDpU6cfcq0yn7YF3Vvkm193YI0f1cXwZfjvOr4XYcxFf6vwoonLHpf\nk0JFtO51DWnoIdtWvUWMoWHliK8+NHVjmvsMsex1S91X50IHk+n7bHAK2cArIFq1u1iSda2YQhwy\nWMBVecxp34Dp4a+uY5vb+8oRul4IbV43Q7YLPX7d/fpe3wG3T1PEvRtc/uJQ11nbDnRfO7a1F1+9\nM+uR2YE7diEvclBdln4cucvPanba1PHjhdxcr5ivrk0k7woR8tg5Y2yEhiTGILTz0vSv28ro65wS\nMY9PlaFi3lvbd58VXyfaJGQWMNf+9bqlt7Oxi7eOL2olKSH3WbT6zYst5EB1ha8l0qtrhX88pFNU\nI1aUTmwxb/t2oJiVMLGUsA3Zd4Ws1nkLcz3cQ+qKWkfPuBly7FkQ7coHW2rZD5WQ+26OLtpmAw8R\n1Lo33vcaau7Le3xXmKIm5K7oEFW5QwfddBnFExIvbltXjz0POY5Y5N2h6pKtztSJSqqD603RjGcP\nmbBa37ZRYTKjUtdyiFox4RWQ6tE2YyltkR2+ikn7wCoG1YxFdVG7Iq+u+a1wi8CbZdZnvnGVp7GI\n14iDt4m4q1y2WPY6DW8skQUpYovmCnXdNcUWUaViwM11q9piEuHGQ1BzzMpgUSuuhm5OKmG6NHS/\nWIjFqCySucOWYxluGn0dMwSvsT/OEU9edzBESEhgbHyRQDEQS3xYTCvdFhYY0j/S1CVYZUzNkiXe\nljQtcldstgc1rZYeH+57TXGFRbnis6dQAt1gtKcZJ2uWRy9HcHksx5iiweAk2/F9I/TEuk4LVz03\nqeNG6wsR8XCSiCOfQHUSrjhGSzrEyBfC5qq4TUXSWq4AbC4IszzW/aprYjl3n5slZlSL6fc2O6Y3\n9ytJsJKi8EsX98fmXtHddXXfEhUhoYx1mNW6pLwRAGobXV6LnIh2EtF9RPRjIvoREX2gXH6IiI4T\n0QPl37XaNgeJ6DEieoSIrq53KuXoSctcnUDhbtEFZGNhnkJcDjYBnVhf+bZX1+xWgMX3zbxIdYfh\nVz04JsqpH8+w/qv6A2JgK2eVheTygwrDYrYRV3toWq+6qItijdfDG7VCROcDOJ+ZHySilwP4AYDf\nB/AOAL9k5o8b6+9G8Rx9A4CLANwL4FJmfsFYzzv5srUsDh+13itvDvyJ+npoxJOrY1dts4kh9iHD\nm13bh4RqBe2/Aa57oB8zxLUi/vF+8WWuDBXi0Le+tsxynZiwygFLsIR9QJDXtcLMTwF4qvz810T0\nExQCDcD6xLwOwCozPwfgGBE9DuBKAPdPFThS+GAxqGB+SuQ3FuaJsCXyXmxuC8sy2/B+575t7haP\na8jsWAQAHN6lLa/XYKoaaNPOSz01qlq2lVq0+M3n5hKLfVhsYYjmwJ++BpoJ0yhjcUrQS0OSyK6b\nwT5yItoF4AoUovxGAO8non8K4PsAPszMvwBwISZF+zi2hL8VPlF3/VYsn7f62rayiRmdluYT0OGX\n9lZsVwiixwUTGtrnI6Z15POZVllMIuLpEyLMrqil2GMYmvrmx8yUe9nWZ6gRNCCodKscAfDvmfkr\nRPTrAJ4pf/4jABcw8x8S0Z8CuJ+ZP19u9ykAX2fmLxn7Y+ChSt9yl36yKSH3UTfaI2DSidAKu7Sw\njOWTS/WOH0DdRhiSLyX0d996QneE5iuqGnDWScemhvjHHffK41qpDD8konMAfBHAf2HmrwAAMz/N\nJQA+hcJ9AgBPANipbX5xuczCJ4HVfw48fAg4cWS6wH1M4hB6DK0z1LptwEhOk9CImToibu6vTeii\nKxzNtT8VdugaraeW29YT0qDJ4KG2A4dMRMQNThwpNPLhQ8DrvuRcraqzkwDcAeBZZv6QtvwCZn6y\n/PwhAG9g5n1aZ+eV2OrsfDUbB9m0yG0YOUpcESxtmLDGldjWfXDYRmzqYYKeiBbXIIy21N2fz9ry\nve7KQI28qcrnYcOXT6VtndORejWdH2cy+KFB9kMi+h0A/wvAGrC58xsB7AVwebnspwDey8wnym1u\nBPAeAGcAfJCZv2nZr1vIHbgEXe8UCBV9Z0WusqxdfnS1zLa9R9Rto+dcVK3X9qFQZ9SeCPk4qCvo\nsUZ2uvYz63VqU8uMCLXT++c1bUktjW1NIcfexenEVWbPLsLEvDMh9+3D3Beqc3cPRZWlJW6RceBq\nB02jVprmcJl1AVcQrbHSB/cAwQbhh6mhz6ITYqF70YW3qjMzxOq2LXOIuqsjKYZlHgvfMWQiiJFh\nGCM2N0qMXDsSwljB3kV3u6swENMXciWISnDLyhYs2KGE+MirQhWrsDQUnRCRHlrEhfGgT+zAvEim\nb1YRo0NT6lQYzjQdFaQr5LqAd0FI1InGZqC+/jpqE/aWybRyYGlhGcCtQxdDiACvgKrmiK0TQ25G\nLW25TSbHc7SdRWtsTOmK543eRprZDwH3yMiU8ZXP5n5pcT5tsyM2pRBxYVSsrgXHmPsw66HP/SZ+\n8S3cMeMGHhdwuha5i5ZiXgxzremW2bsIojWe8su7OkGryhjhgTSU9a7i2m9ZGOTwQodUzaHpMxhE\nmJuhd3Bu4rLGPbqRjkXeIFd2E6yCHEJZPj3fuflbG3JxqwjjZajJjGfezRLBhZxe+GHTATo1aGqV\n2zIgKir3V5GUKychl4iVcaInRDNTKlTlFmpiHPmmbpwFmgVsNByi3zlaJEpfVjkQf8To5v7U+diE\nWxAywXxYe3OVN2y3syjeithRd8MLucKM+OhQ1BtdxLJs5rZEa6zPM6pPN8e8OPE3sb+K80s15nZj\nYX7KWhPGievNyxT1NoJsdVUKtRnWtRI5kqMOzry/LfdXB1t+85RRDVsGBM0Oroe2bmiIENejneak\n6loZmAmXSAt0y7xy3X1gWwePaYWnFOonwj2b+CZv7mPKQSGM4SzyveVxB/Qdx7bKzf36sI2iazoK\nrkt3hwi4YE4TZ45AbltHZmnav/Zak6JFnlIHYCSffKiLxZXv24VtsEWsvN7mPiRnuKDDK6A2MwS5\n3kAVUtfak59rJWIn6MTTcYCHSsxp3No2BH3ihzb7EcaHbpHbEmr53gbrxogr0Z/52PKaDCfkDWbV\nmdguoqDH8pPX6fBUVkjosGd9OH5MsdX3JSIuVNG1T3zsPvc2gxF9pGeR15l+LSKxxLwuSjyrJo1Q\n+CIE6gqxvt9tJ9dZwgoFG7wCqnKj2OpOk/q0sTBPekiiqpdSP/2kJ+R9snfRns2wAW0HGOkNpa9Z\nxZvuWxrV7BESYugT3aYhiq52MTMEZkGcbSHHpJg1tcpjjBKtFNXVtaDGYLPKzcZgWuJqO3GtCD6a\ndIK3DVHkFWweU+LV3aSXa6UPLEKtKslENrIaecrbsO3kOsealLbKUraFONYV8FkKFxPc1Hkrk7qy\nxeZMZ74BkS5jcpUyn7MzBo4Z7fXJhEOFPGauFr1BtJ3T0BTZ0MYW2tBExAUTVSdUO1LflxaWN9Me\nm8xq/WkcjVMx+fJwrhVdVG1JpuruoyWhF9iaN6UlMRPwu159Y01CMasNUHDjcn24RBxo1hE601Ro\n5LA+8hABd/1uy5jo25fxm813R/swmavc2Ca2gJt01ZkT0nkqDUuIhTzs+8cr5ES0k4juI6IfE9GP\niOgD5fIFIrqHiB4loruJaIe2zUEieoyIHiGiq2MUspaAOn1LbjdJVWdMF1Z4k3LExHxo1HmISEMV\nqlBW+kxGmgxA1VRvzwH4EDM/SEQvB/ADIroHwLsB3MPMHyOiJQAHABwgot0ArgewG8BFAO4lokuZ\n+QXnEQKmRZuYHDZ0wJBnXZ9gbnZEoHyArPQbVx6bjYV52nZ4ujN17AMvhDQo3C1F3aN94Fmvd7Z8\nNbY35roPQK+QM/NTAJ4qP/81Ef0EhUC/HcCbytXuAHAEhZhfB2CVmZ8DcIyIHgdwJYD7nQep4+f2\nuVkCp0uyVSTzojlnAdJmUKk8UE1U4it9GHTM42wNsLD3BRTXRawnoR9C3+oaT82YKLYEdy5BryPm\nwT5yItoF4AoA3wFwHjOfKH86AeC88vOFAI5rmx1HIfztadKxGTjAZ8pXbk4eMaLcD64GJO4SoWvq\nGibbTq6zGrQ3phwsrtQc+rK6FnmQkJdulS8C+CAz/1L/jYv4Rd/F7efC61OsuaZbq7EvZ8XpMLlW\nV2Jqy26oVxQRcaFvqhJtmalz9f9q5Cjtw+b/Pso8FCHuqCofOYjoHBQi/jlm/kq5+AQRnc/MTxHR\nBQCeLpc/AWCntvnF5bJpHj609fnX9wDn7aksbFt8FyT0adjXa14st4oabGTuj1dAtG9+kFnThdkl\nJHe+q536ltO+eZ47vJ6NUVJ1HZRr5ex3/necWf4L4OFt3v15BwQREaHwgT/LzB/Sln+sXLZMRAcA\n7GBm1dm5gsIvfhGAewG8mo2DTEwsgTAHf9NOANs+XEzs2zbyquFs4XXQ5/+M4ZPvegCPXhlzaUTC\n8Oj1Uk0d6JtWztV2zd9sRkvKKEPL1Xc3pXuOkZ1VrpU3AvgnAN5MRA+Uf9cA+CiAtxLRowB+r/wO\nZj4K4E4ARwF8A8D7TBE30U/A5Tdqkrc7yjRU+mTQPYg4MBnqGKOHX8RVSBF94FpVHa37Jp2bq6Xu\nG4iNwad6M29ELatZI2S70FSxmxj+8L57z3MYDp9DGYU8iDkoLaf6WOe8T79ye2JD9EvqukraWKk5\nDk6IVeY2qWclba3QBzHFN6f6GuO8B7PI9adKrSdSgNXu2842MMYplppFnnssa1NftnlvcrJ0hPzo\nUoBTr7vmudu0LlmLHJiebqyPnCO+ZWPATPDf5DxzsmoEAfDX89Trs6mDei72qjzwSQg5UG9ig6bi\nW9kBasaI9zztW0z0a9Sk88dW6VO3aIRxsbSwXHubqrf11DtCm07wUhlHPgRFTuNELvjexazCmRT6\nNVTxtbRvvrimK/5tRcSFITDrnS8NblPGmooiGYu8DjFiyq14Zg7KEV18Qy0R1+tn6q+lwjhpYpXP\nIkl0dtrwDQ4wCe38dGUcs5GzgJuYMxDZzs0cEm3DnHXIZqWrwR0Rii3MEH0bCrnWUZd2JulaCRFx\n20CAWCkyxyTiodTNtiYIuZKriPvI0rViI2RAkC7QUUZ+ZkLIDEFVYZ1mj3poFkVxyQhV9F1Hxjgu\nIkmL3EabGW1s27ks0DFa42bnsc39EdrBXCceXa0r7hYhRcY0KjlJi7yJSIdMLKx3+M2aG8E836rc\nzjEmarZtb8a3C8LQjKE+JtvZaU4LtZX9q9lUbiGiNEZrXMfs9AQmz9kUdnUdzU5OfVkVIdN7jcEi\nEtqRipDqOpGiHri0M0mLHCguorIKJ0RcF28jO6GJ2k5EvCAkUkf/c60fW3jHYBEJzaF9mJpTdmiK\nHOeJjGUccxLpAAARaUlEQVQJIFmLHHDEPttm6FFzdqrPLjzrzIKQA2GhiF0f14dY57OFemNrmkMp\nNrZ5M1PShuwscsAirqYQq+nc6k6/1uF0banTViibWimhQ49tgj+WuRoFO6lY467orhzqXtJC3giX\nSFuWq5uV0hO3D1RisiHOOyQpmi7m206uy3R0I0bd26Gs8Sn3rbZcJ3VjIn0h91nPum+8QYKrXGcU\niYFqQOYM5VUVNoao1tmHsuJFzIVQQi38qvVsKa9TJX0hB6bF3JWl0CXmeiepcsf49jej9FVRVedW\njBBHIX/0wIa+aBrinCrJDwhiXiSiNQ4SWyXYtomTAbfQZ5yuti2T4Yfzg76Z6BPwyiCi2SRWp6cr\nbUebFNgFaYp5Hha5z5oGqvOIh0SyCBPhh30eR7fGmmRsFPInZp3T02/k5B5pQ/IW+SZKjH3Ca4sx\nn2Fruw9oH1p1Rtq21S1zQWhKLNE2B8alSB4WuaKJ9RzokiFaE+FoQFfWu5rqqot9C4kS4e3Y5ToJ\n7pPJ9A29UsiJ6DNEdIKIHtaWHSKi40T0QPl3rfbbQSJ6jIgeIaKro5fY5V5pup4gCEmgJjiP2fFZ\naz9G35pu0adsjQNhFvlnAVxjLGMAH2fmK8q/bwAAEe0GcD2A3eU2nyCieFZ/E193JDEXf209iNaY\naK0IZ1T/HddQrdt3GYUE0dpxXUG3peQIdq849ENNkxhciIGoFFlm/jaAn1t+sp3cdQBWmfk5Zj4G\n4HEAV7Yp4EQDV3HjNQb9VFJD6EXMBaFblFVeB9eAHpegh8xFkINfXKeNtfx+InqIiD5NRDvKZRcC\nOK6tcxzARS2OUeATb9e6dbapEPOQDH7CFlbrus79EGYbo574kt/pYYamQKtcKepPj2Q5vX9+OpNq\nxq7YpkL+SQCXALgcwJMA/tiz7rBWbEQBETG3s+lGsblIQq592YjEvSK4aDOAxzlPb8bCbdJIyJn5\naS4B8ClsuU+eALBTW/XictkUZYep+tvTpBxebKM3Q0MXhTjINRUawLxIrrbqiguPPTI0lVGcRLRH\n10rXeo2EnIgu0L7+IwAqouWrAN5JRHNEdAmA1wD4rm0fzHxI+zvSpBxBBIqJGeq22UlHa5ybv6xP\n6lrR5voT/Q4i/EJJEzG3/eZaVkUqk5Ez8xFdK13rVQ4IIqJVAG8C8Coi+hmAfwtgDxFdjsJt8lMA\n7y0PepSI7gRwFMAZAO/jIRKeA/aZhBwVwxmvvCks4lIx2RRkVxphV/4bLWbf1rElQ/MFhaofRGts\n1iPbUH7faM6mYp7qkHyTpCeWKNaN4Dd1WHq+ASe6pSgW+TSVQq5wpVTwTNMn11kwcUWM6eJdJ69K\nqLinNigty4kloiERE91RdV1Dk52VpPA6KySIx81iRrX4IlnU8pDkXKmJuI98cq00pYE1Lvip/ZZk\nm9nJtk65vG3+FmGEBPafhGZPNMMUzW1zMyiSt8ibDBBwUgpKHZGQkMMtWo/A9I2+NRKeyWhPwUbs\n9ujaX27tPnkhb4XD8qsSCP13W3rVWWJi1iAluF1GlyjLPPMBGkJkSiNgaWG5schWhSrmlFvFJAsh\nb2SVRxKB3J7MMfE+8ERkhYxQ7djlN9fJza0CZCLkQqI0EfOanc6S30bQWT65BKBbAyvH/pn8hDxU\nPOrmW/GQ4xO6LZVD7dtc25rhiiLmgs7SwnL0fW4szJP6i77zHshPyOuixzk3dAfk+IRuTdV0eVXT\n70VGxFz4yMpNmyIeS8xzFm+dvIS8iXhYrL8qn/vm8ODVtZm0xoHAh5d+D0TMhS4x6pdysQgFecWR\ne0YEOlEWuQo9DOw4jRr2OAv4JsCWwVhCW1bXgJW4uxyDJa7IxiLfFFZbDhUbutUuoWzdEeIr18MJ\nG96LWY4eEgp0K3zqTXnGjYW8LPJQRLT7QX9DsvVDtOyb0JlVF5cwzR/tuxVAOhkKUyAbixwIdHd4\nREPcJQ2oM8uS7drLQ1VoCdEazz27C0CFVY7ZfXPLSsitiPtkOKqud8P7YcuDIQiA582srGuhFvqY\n/ONAhkJutaoDBGMmQwhbEu0Npqb/0pa8KGTCXGEGiDg+ZExkJ+RCz8R4y4lgmQszTM3645useSxx\n4yZZCnld61qs8bywWePA+F6HhQYEhrXOWifoOKNWNETE+yF0tqUQZq0RChWsrgGHd7XezZgNgSwt\n8lBExPuh6jq3vQ+n98+PuhEKflTEygSO4AZXR/nY68/ohJxXQOpv6LKMFf0ah15ntZ5rffGHC0EE\ndHLWmbtzLCQ/+bIwPMo1EuvhaHO1uPzip/fPy5vVjLPt5PrkRC+e0d1jrzcu7RQhFwZBF3PdgrLN\nuTj212LBjS7igGFdV+RemiUhr3StENFniOgEET2sLVsgonuI6FEiupuIdmi/HSSix4joESK6Ot4p\nCGPCbGRq+i1xsQhebHnxHcxStswQH/lnAVxjLDsA4B5mvhTAt8rvIKLdAK4HsLvc5hNENDo/vBAH\nU7Rtvs0u/ZvbTq6zafEJ6aFyj5/eP18rdfIYLXIXleGHzPxtItplLH47gDeVn+8AcASFmF8HYJWZ\nnwNwjIgeB3AlgPsjlVcYIfoADj0RUtcNUR2HELcPQBD6pmkc+XnMfKL8fALAeeXnCzEp2scBXNTw\nGMLI2ViYp+kc0/PR8067MDtYhUzRsmzO6sO49YAgZmYi8r2eyqurkDRzh9elQzVBtp1c59Ap3WY9\ns2lTIT9BROcz81NEdAGAp8vlTwDYqa13cblsCiI6pH09wsxHGpZFEBqxsTBP4iNPm+WTS51MtpwL\nRLQHwJ7K9ULCD0sf+deY+bfK7x8D8CwzLxPRAQA7mPlA2dm5gsIvfhGAewG8mo2DSPih4EIJa5dx\nwOoYSsjFGk8T10PWNjvQrFjkbcIPVwH8bwC/SUQ/I6J3A/gogLcS0aMAfq/8DmY+CuBOAEcBfAPA\n+0wRF4QhMcVBRDxd1L3xWeS8ApoVEfcRErWy1/HTWxzr3wbgtjaFEmYXFbnShTWui7gIeJ5Ix7Qd\nifEWkiN2Y9XjxWfZ3zoGZMCYndGnsRXyouvwseWTS2KNC6NDcq0Io0dcKnni6+yc1Xjxxp2dgiAI\nfSOusHqIa0UYPWKF54cvxr/wk0unp45Y5IIgJItrQJAM5JpELHJBEJJCF2mXa0XesiYRi1wQhKSo\nEmkR8WlEyAVBEDJHwg8FQUgOlw981q1xCT8UBCEbNhbmaWNhnpYWlmuFIM7S9G46YpELgpAsNst8\nlq1yl3ZK1IogCMmzsTBPN+OmUtRvHbYwCSIWuSAIySK+8knERy4IQnYoX/nQ5UgdscgFQRAyQSxy\nQRCEkSJCLgiCkDki5IIgCJkjQi4IgpA5IuSCIAiZI0IuCIKQOa1GdhLRMQDrAJ4H8BwzX0lECwC+\nAOA3ABwD8A5m/kXLcgqCIAgO2lrkDGAPM1/BzFeWyw4AuIeZLwXwrfK7IAiC0BExXCtmcPrbAdxR\nfr4DwO9HOIYgCILgoNXITiL6SwCnULhW/hMz/xkR/ZyZX1H+TgBOqu/adjKyUxCEqKwRTYjZ4gg1\npqvsh29k5ieJ6FwA9xDRI/qPzMxkXFxBEISYmAIOjFPEfbQScmZ+svz/DBF9GcCVAE4Q0fnM/BQR\nXQDgadu2RHRI+3qEmY+0KYuwhV6xVYVeI+JFZrL9Jgi5MnYRJ6I9APZUrtfUtUJELwVwFjP/kohe\nBuBuAP8OwFsAPMvMy0R0AMAOZj5gbJula8VWaRRdVp66x/Wt79uHa7s6xzAfFlXHFISmjF3Ebbi0\ns42QXwLgy+XXswF8npn/Qxl+eCeAvwVH+GFOQl5XFHWUqLWpXG2Onytjb4xCc6raw9jrTnQh76Iw\nqdCHeIZUuFkUcZOxN0yhPkO9GaeACDm2KkAMd0Rs9DINXZYcGHuDFbYw2+0sW+VZC7lPgEO3tSHi\nOR7G3HhnGbNzfpZFHEhQyB/q/ajCrDD2xjwrNDGuxnDvfed9GYAu4sgFITnqRN8IaTIrb8ixzlOE\nXJgZ2kYQCQXyoIxDzIeVCLkgCMH4xCcFKzrFh3Uf10WEXBg9qTXsronlW7aJYuiAr6FI7V73da1E\nyIXRklqj7oJYQuFKOCUpHeozxINOolaEUZKr6KRs7eZG33Wgj3snUSvCzJCjiIuAx2eW3iZEyAWh\nY2wD2kS4+yV2rvLU7p+4VgTBgiQ6E/SO3VQ6eV2uFRFyQRCETHAJeYw5OwVBEIQBESEXBEHIHBFy\nQRCEzBEhFwRByBwRckEQhMwRIRcEQcgcEXJBEITMESEXBEHIHBFyQRCEzOlEyInoGiJ6hIgeI6Kl\nLo4hCIIgFEQXciI6C8BhANcA2A1gLxG9NvZxhuB7QxegBVL2/sm13EC+Zc+13EC7sndhkV8J4HFm\nPsbMzwH4rwCu6+A4vfP9oQvQAil7/+RabiDfsudabqBd2bsQ8osA/Ez7frxcJgiCIHRAF0I+eKpH\nQRCEWSJ6Glsi+m0Ah5j5mvL7QQAvMPOyto6IvSAIQgN6yUdORGcD+D8A/j6AvwLwXQB7mfknUQ8k\nCIIgAOhgqjdmPkNE+wF8E8BZAD4tIi4IgtAdg8wQJAiCIMSj15GdqQ8UIqLPENEJInpYW7ZARPcQ\n0aNEdDcR7dB+O1ieyyNEdPUwpQaIaCcR3UdEPyaiHxHRBzIq+4uJ6DtE9GBZ9kO5lL0sy1lE9AAR\nfa38nku5jxHRWln275bLki87Ee0goruI6CdEdJSIrsqk3L9ZXmv1d4qIPhCt7Mzcyx8KN8vjAHYB\nOAfAgwBe29fxA8v4uwCuAPCwtuxjAP51+XkJwEfLz7vLczinPKfHAbxooHKfD+Dy8vPLUfRRvDaH\nspfleWn5/2wA9wO4KqOy/0sAnwfw1VzqS1menwJYMJYlX3YAdwB4j1ZftudQbuMcXgTgSQA7Y5W9\nz8L/XQB/rn0/AODA0BfVUs5dmBTyRwCcV34+H8Aj5eeDAJa09f4cwG8PXf6yLF8B8Jbcyg7gpQB+\ngGJQWfJlB3AxgHsBvBnA13KqL6WQv9JYlnTZS9H+S8vypMttKe/VAL4ds+x9ulZyHSh0HjOfKD+f\nAHBe+flCFOegSOJ8iGgXireK7yCTshPRi4joQRRlvJuZv4s8yv4fAfwrAC9oy3IoN1CM97iXiL5P\nRP+sXJZ62S8B8AwRfZaIfkhEf0ZEL0P65TZ5J4DV8nOUsvcp5Nn3qnLxaPSdx6DnSEQvB/BFAB9k\n5l/qv6VcdmZ+gZkvR2HhXkVErzN+T67sRPQPADzNzA8AmIrrBdIst8YbmfkKANcC+BdE9Lv6j4mW\n/WwArwfwCWZ+PYD/h+LNfqtQaZZ7EyKaA/APAfw387c2Ze9TyJ9A4RNS7MTkEydVThDR+QBARBcA\neLpcbp7PxeWyQSCic1CI+OeY+Svl4izKrmDmUwDuA/A2pF/2vwfg7UT0UxTW1e8R0eeQfrkBAMz8\nZPn/GQBfRuHOSr3sxwEcZ2aVX+ouFML+VOLl1rkWwA/K6w5EuuZ9Cvn3AbyGiHaVT6XrAXy1x+M3\n5asA/qD8/Aco/M9q+TuJaI6ILgHwGhSDn3qHiAjApwEcZeY/0X7KoeyvUj31RPQSAG8F8BMkXnZm\nvpGZdzLzJShelf8HM78r9XIDABG9lIh+rfz8MhQ+24eReNmZ+SkAPyOiS8tFbwHwYwBfQ8LlNtiL\nLbcKEOua9+zkvxZFRMXjAA4O3elgKd8qitGop1H4898NYAFFh9ajAO4GsENb/8byXB4B8LYBy/07\nKPy0DwJ4oPy7JpOy/xaAHwJ4CIWY/JtyefJl18rzJmxFrSRfbhS+5gfLvx+ptphJ2S9DkfH1IQBf\nQtEBmny5y7K8DMD/BfBr2rIoZZcBQYIgCJkjU70JgiBkjgi5IAhC5oiQC4IgZI4IuSAIQuaIkAuC\nIGSOCLkgCELmiJALgiBkjgi5IAhC5vx/oWJ9OHx0YTwAAAAASUVORK5CYII=\n", + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAigAAAGdCAYAAAA44ojeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjcuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/bCgiHAAAACXBIWXMAAA9hAAAPYQGoP6dpAABmwElEQVR4nO29f3RV1Z33/45AIkLIN0jJDwlMpJhWA5kOqECdGhWQtOIoXaOdzlCtTpe2CERgyaDrabGrA+KMRKgtz7SPBcE6+MwSHJ0CEsYGx4fHpxo1As7KWMXyQ9KsakjAoYnA+f4R9mXfk/Njn3P2+XHveb/Wugtyz7nnnLvv/vHen/35fHaBYRgGCCGEEEISxAVxPwAhhBBCiBkKFEIIIYQkDgoUQgghhCQOChRCCCGEJA4KFEIIIYQkDgoUQgghhCQOChRCCCGEJA4KFEIIIYQkjsFxP4Afzp49i48++gjFxcUoKCiI+3EIIYQQooBhGDhx4gQqKytxwQXONpKcFCgfffQRqqqq4n4MQgghhPjg8OHDGDNmjOM5OSlQiouLAfR/wREjRsT8NIQQQghRoaenB1VVVZlx3ImcFChiWWfEiBEUKIQQQkiOoeKeQSdZQgghhCQOChRCCCGEJA4KFEIIIYQkDk8CZf369Zg0aVLG92PatGnYsWNH5vidd96JgoKCrNfUqVOzrtHb24sFCxZg1KhRGDZsGG6++WYcOXJEz7chhBBCSF7gSaCMGTMGjzzyCN544w288cYbuP766/EXf/EXOHDgQOac2bNn49ixY5nX9u3bs67R2NiIbdu2YcuWLXj11Vdx8uRJ3HTTTThz5oyeb0QIIYSQnKfAMAwjyAVGjhyJf/iHf8Ddd9+NO++8E8ePH8fzzz9veW53dzc+97nPYfPmzbj99tsBnM9psn37dtx4441K9+zp6UFJSQm6u7sZxUMIIYTkCF7Gb98+KGfOnMGWLVvw6aefYtq0aZn3W1paMHr0aFx22WX4zne+g87Ozsyx1tZWfPbZZ5g1a1bmvcrKStTW1mLv3r229+rt7UVPT0/WixBCCCH5i2eBsm/fPgwfPhxFRUW49957sW3bNlx++eUAgIaGBvzyl7/Eyy+/jMceewyvv/46rr/+evT29gIAOjo6UFhYiNLS0qxrlpWVoaOjw/aeq1atQklJSebFLLKEEEJIfuM5UVtNTQ3efvttHD9+HM899xzuuOMO7NmzB5dffnlm2QYAamtrMWXKFIwbNw6/+tWvMHfuXNtrGobhmLRl+fLlWLx4ceZvkYmOEEIIIfmJZ4FSWFiIz3/+8wCAKVOm4PXXX8fatWvxT//0TwPOraiowLhx4/Dee+8BAMrLy9HX14eurq4sK0pnZyemT59ue8+ioiIUFRV5fVRCCCGE5CiBU90bhpFZwjHz8ccf4/Dhw6ioqAAATJ48GUOGDEFzczNuu+02AMCxY8ewf/9+PProo0EfhRBCCMlb2g5lrxzUjT1s+b44Jt4X5+UangTKgw8+iIaGBlRVVeHEiRPYsmULWlpasHPnTpw8eRIrVqzA17/+dVRUVODDDz/Egw8+iFGjRuHWW28FAJSUlODuu+/GkiVLcPHFF2PkyJFYunQpJk6ciBkzZoTyBQkhhKQLeWC2Grw3d03H1j1XYe61v8G80r2xDuBWzwc4iw+v181VoeJJoPz+97/HvHnzcOzYMZSUlGDSpEnYuXMnZs6ciVOnTmHfvn3YtGkTjh8/joqKClx33XV49tlns3YtbGpqwuDBg3Hbbbfh1KlTuOGGG7Bx40YMGjRI+5cjhBCSX3gZsJ3OHV7dPeC8zV3TMa/0fETprc834oOFS3w8pT11i5rO33ft/bbnqXzPtkNVOSc6vBA4D0ocMA8KIYTkH36tBV7Y3NXv7yiEyLy2b58/Vrch61wrgTLpxe8POE9c9x/rnrW856QXv4+TB0tQ/H524OymJWu8fwETdgLFbtknbryM3xQoKSRXzX2EkPwmLIFy6/ONAJBZ0rHCzWfDz7OZxVBS8CJq3D7jFQoUkhrsnMYIIclDbq/z2r6Ngt2lA5Y5xLLFpeseAwBsu+VxpWubl2e8IvcdspUk6HWTiB//FgoURShQ0o1bo6JIISSZmNuusDDsOlRjKVaAfp+NE+PP2vqCBLG6OFkS5GezWtLJZ8LsQylQArC0rT/ZnHktUaURcGCMBtkMa7ceLOBvQkj8THrx+5g1tt3REiEEwdY9V2Hs9tNo2bkMAFA/ezW6agqzzjVmdHkSDZu7pmPXoZrM3+/M+aHj+VH4wsSJm1UoKQIlcB6UXEWugKJhNP9iKmbe1b9eaF6LjPMHJdnYlbXVb5TvXu6E5ALvzPmh66Av2m7z+1PRVVOI+tmrMyKltL0vS6TMGttueQ1Z5Mh8sHAJUKf2rPkuToDzZW3uM5PWV6bagmKuiN96bDFm3vWapRBJ2g9HshEmWTm3AdD/uwmrmGBe6V7Ma/u26yyKEOKMHDILOIfNCoRvyfDqbksryK3PN6L4/QuyRElpex8AoKumECfGn3X1SxH9dd2iJsdnEs/i5Dyb7wiREtUYxyUeDwiR8q3Hzu/1YzYfmh2KKFZyC7O1TBYvhJDzeBUc8vknxp/F8OruTDit+Kzdsrl5eVYswxTszt5M1pjRlRWiqyJQAOv2bZ6sAMmLsIkaMbHTne/FDi7xeEB4i4tUcifGn8Xcse1ZpsIPFvYfE+9967H+RqkyWyDxI3dUdWNjfBBCEkzboSrMvGs6mn8xVf0zpj5wadvt2Howe3nFTgD0WzDPWzHrxqJ/GWZO9nlL227HLtQA7/cLF3MuESvswoTnlVqcnHKiFCdeSb0FRSBmAuYlHnNFv3TdY5kGQoGSe5hniAB/R5IuRBsw13u5bxPoahterM/mpfd5bd/GyYMl+GDhEkx68fso2F1qm+BM1SoqnHaB85YdN4f7fMUu3DssuMQTgEvXPYbh1d30T8gzrISJDEUKyVeEn0VY4kMFVYHilKUV6A/5PXmwZMASj9fl2jQ4wrohltRmjW1H8y+mJlKgpH6Jx0xSTV1xkhbfGycRQwFDcpFJL34fwMAU66r12cnJ1MoSI/oKMSsXx4WzurzEKp8rJoTCqmHOwCqECXDOuVZTUrY0CxVRfv9Y9yywNuaHsYEWFIKlbbcPaOyiAfd3cO55A+LAzSqimySIFCsnxrZDVaFsakZyH2E9MdcNVTHutiQqCxhhfQaQ5ehq126sBEpcqKYjyEdBI5xkZcfmMOESD3Elij0XdBK1GLEiiQJFrMWLdfqoOhmS2wRpT071y+66cuSNLKYvXfdY5n2rnBxJTpCZL2Il6r6DSzzEklzKuhqFIBG5FQTmbJVJx5jRlfn/O3N+iEkvfh9tC5Nn6SLJQwxE5mWa+tmrAfhvC21r789qu+Lv4dXdmYFQpvj9C7IiJndVn08rny8CIOlsrtuAb+1enMiJDS0oKcK8AdauQzWxm1bt0CVQ5EYnOl9V5FTb4v+EJB25nnupt+JzSanrk178fmL7J0E+RP5s7pqeWCdZCpQUIA/2Ytad9IbvV6CYrSJ+ETPIJM4qZOxCRkn6cBLgZotI2PVF+LV967HsmfnSttuz0tCLjLFJEUV+yDVLj5icCmEVtf8al3iILRnntTnO54WN7iWcoMJE7iDrZ6/OXM+L1cWqkzV/Ppc7YpK7yO2jq6YQdYuaPAsD1bpcP3s1Pplfg12HajBSuq9IlDbvlvP7wDS/n50UTmR6lZPFmZefZNb+7Imsv+NwuFX1lYmLeW3fzmzUKMqn7hdNuPVgIwC1xHdxQQtKHhN2UjK3fS7cPhsUHdYS814fQXDqsM3n+TXDE2KH1yVMGbc6KLdXs7CRl4bMPixy/yAiAk8eLMmkxB+7/XTW/Ze23d4vbH4yDF01hZl72X23T+Z/OmCX5M1d0wek1Q+bukVNtsnjkoJYypGJw/JKCwqxRFdllDsrs0OcFWGsbetaytF1HcDfABGkTFTuR/GTDvyKEyEkRDsWgkClXasgW2qE9XZsex+AYXhn5xJg4flz62evxqGvXnVuRt/nKk6A7KVq4Q9itRt92LStvR+XrlPbIygu5pXuxa4Z/X6HVlmDkwgtKHlKmNYTO+uH7MFv9ua3Ord+9mrf0QI6hUWchC1QdN6PJBNVoapaX+Q26dTOnJY0zddQsRoGsQABwJTVb8a6E32Sl3msMPsHRQWdZImjiAjrulbhhVHh1JGqdrgqWC0JWW0J7/VediZz+W+d5IJQoQOwd6zqilXd0oFTHbKymrr5sAR9tkNfHYzh1d2YNbY9k8Jd7I58YvzZ0PN8yEktkyhW5rV9GwCyyoUCJQQoUNTxun266nWs8CNKwhQyZsHgdI7bear3M19DVaSEKUZU7itIilOvriUGv5j9KnJB0KkQlUBRuaeKwPGL3A7FBrDm9PthIXzzkiBS5OR3IlpHtC1jRlcs0ZwUKBFgNle6zVzyAZUN95KQ8TVpJH05yosFKN/qtEBuv1blYWUhjFO4hHVvv8LA7Vm8ihQdQurQVwdj7rW/yfwtb4wn0B3iXLeoCSfGnwWAzO7LceVJkTcDBJDlOCyeM47tMegkqxErM7O58UQ96yX6sbJ+6ELVguN0vqrIMS9nqSxvebXy+EH4GyVpuSZLTJ+LGJHLS/6/WXjXLWoCzoXrqqDje5uj5tz+drqOmaAiWrdgsrLsOfnR2B2TfVK27rkKu1CDAul4V02htme3Ktf+DM+IRaSI7y7+lS06mzK6JNn7d9GC4oC5woUZiho35jBCYOCz5qIFJUzhYUbMnLx6x6um3A+Smt/ps17Ej7AcWCHqi9VAKTtEOz1LVAJGZVIRZr1x+56iDOV/BWZLl5tzuvleOgWKCAUGgvmUeFnuUbXWdNUUYuZdr2XeFxviAef3BtKdRVVYJuZe+5uMxSIJSz1m4tzWhEs8ARGKOgyBYkUSRIvVd7UaOFR3QXU7N5cQDnZ2f5vPBbyJFK9+MF59a6ywEyjygGPGbpaqsiRkN1g5fYewxIrTd1BBpfy9XkfeERhAxqFThOm6OV6bfzeVcpVDi92Q+yg7gaPiz6RiAVEROn6WlOQyFMs/YW/3UbeoKeMDkwSSsN8aBYpP7KJQovIhUG10KoJGxUdGHFMVEiodcxKtKG44CY4wPmdGt6OuCmFHNql+TuW7hx0e74RqdFiQ64lyCLK0Z4fKM5qtYk4hwF6chr0IDjNukUde+0ArumoKQ4nukeuZvItzXCRBlMjQB0UDUYsTQH2TL7sOwq5BOjXU+tmrAWmAybUdfb1gJyi8iAwra4rXa5jRWeZOFhAZnfXayY/GToCUtvcpWyLiDDfu0tQ2nMpbHFP1GQp6b6t7WPUnDWMWYseRdVnvib5E/Dt0/9EB55hx8x/Ridd7FL9/ATYtWYO2Q2syUT46HWejTIaWNCGiAwqUhOI0S9Gd10DuwFRmtHbnJMkB0gq3ZRm387xeT+U6TgOf2bSvOkCqipQwcFo6sjtPlaBCJUhyQC9Lb0HQ/btZXc/8ntyHDN1/NOuYlUgxn9cwpj8d7KnaSwI9qxVOky4VEeHUV5rb1Oa6DUBdfzjupese0xbhIof6hkU+ihOASzwA7E2/ujsLr7MIJ9No2DMSL+ZmeQ0915Z3BLqWa4Jey2q262ZpCGt5wCtOOWCshEsQi4TZgdSPaPFSV1X8QPIFs0gBzosPq2Pmc1SdvIPi1cph119aZaC99flG30s/VvUqrH16clGYcInHA1GJE0BvOHJY4sRLojHz7ETnLDPqpSadTq06hI5duKvduVYiJeoB1SlM14yu31e0X1XriixmnCKSrMglYRIk8Z+VGHESLYIgTtt2WE3EgggTeTIl+qzmX0xFM6YOcGadeddraDtU5XvzQZ2TnrSSeoFiJikOsUDy86tYmUx1oStSQhdBHVnlZSSnTsvv8oyKGND1+yThN/Fi/TCfG8bmlUklSOivitXEijB92YJaTUrb+yzLwLzrsjnBG+rU7xlW1M7mrukA+vOa6LKcmIW9VTi7II72QoHikzBTkztdU7d/gdnHIcmzRN0dn5cZTtDlCBE+Ku4L2AuVsBPGBfmNw9gOIEyshIy4f9InADrQ4aAqW0rsrCqnai8JdUnc7+Do57sX7C4F7uoXJs2/mIp3PC7z/GPds9pyn8hCpG6slkvaJh+1ssDKv2kcmZNTLVD85Dkx/0BReaebCSomkjAL9kpSntnKuuM08NYtakIxzieISiN+LGJWyzVh7tKdr+jsn8zLOkKkDN1/1NFJNqhlMKzBUbRb89LfpBe/j5MHS/CBz7r1rccWZ/7v16ISln+JlbXEql3291XxTlpT6SRrtSU44E+gBGn8ZiuMF6uMToEiDx5hVkavM/ioBYkuZ1OnSBbVpR4/qEQPBfkuUeIkOryKlDBSu+tCtc35sX6ZLRxhYhYoTu3ISyi11ff2u8mgSoRZWPlQ/DjJhu0Aa7dMaszoGpCa/9bnGzF2+2ktItHL+J06Dx6rCiznZAh6LT+fl4WJ6jXD6GCjSAqlA/F7yS/xvt25Xq8t/9/r9zI/l/x+8fsXZF6CE+PPRmZZ8fJd4hAnbWvvdx0kxDmijFXOl891yv2RL5j9RobuP5p5hcGp2kuyrm2u/177FvHbyKJGvHRFCprr96Yla7C07XYt1wayv7PwH/FC2CnyvYqxONpL6iwoTruWuqE7/4h83biWiYDkzCbtcAuvNc+G3Cwhfi05usvJqwDQkRTOCqsEYTrFidflnaiXbHS0va6aQhgzugAAJw+WYHh1N0b+ZFjg6+pACAchIsK2pgTBnPjNLjpQ1FEvdcXtWiLr67y2b2tLf1+3qMnSIqH02QhCiK3GQ4F5TyFdMMzYArGnhUzcwiTMa6qSdHFiRh48nUJa7cJu/VjLwiojp7wyqugIZdSRRM2KpEViyQRpc5/M/xSzxrZj16EajPzJsAHfb3h1NwC9S6ZBriULkihEikpIsozTzFzeisNpR3kV7Lb1EL9f8fsX4FuPLe7f7XiO58vnJFYbwpa268ukG5S8t6DYhU2pNnidPifEHyrJymScQoJzNeGW0/f2KlLsfI50ROiY8XvNMKwouiwlwulRpEZ3IpfqmE7MIsWvQAkDK0vKzLtewz/WPRvKtgqXrnvM0348tz7fqC2LbRKhBeUcTmuVfsRJ2HhxHksTZnHilJrer5Usl/G6N5Cd5cnPEpKc9MrpPk7IM7awJgA6rysSexVYHDMLvimr38TWPVcBAMZuP63l/iqTK1UriVMiNF2hyeJ55GMqe/h4xbzZqx3y9xN1VETZhLHZ6QcLl2DSi93Kyzz5LE684kmgrF+/HuvXr8eHH34IALjiiivw/e9/Hw0NDQAAwzDw8MMP42c/+xm6urpw9dVX4yc/+QmuuOKKzDV6e3uxdOlS/PM//zNOnTqFG264AT/96U8xZswYbV/K734bMlGIE9E5JNEEnmSYnfE8ZutJlGVjlfTKa102DyRJMS2bEd/LmNGVsZrI1riZd72WOXfrnqtQ/P4FA4T0J/M/xcmDJYGFioro9rKEYxYpYYjEqP1eVCK9suva/bbn6eCdOT/EpetKYt/ZONfw1JuNGTMGjzzyCN544w288cYbuP766/EXf/EXOHDgAADg0UcfxZo1a/DEE0/g9ddfR3l5OWbOnIkTJ05krtHY2Iht27Zhy5YtePXVV3Hy5EncdNNNOHPmjN5vlhCS2uHmInbRMfJxL9fJF0QUUJBIICF0ZIEjomXMUTPimFy3RYSFKirROklj5l2vYXPdhowzrMCY0dW/5HPuNby6G8aMLhS/f0HGegIAs8a2Y3h1N6asfhOfzP80UROTlp3LQovwsbqXbusJgKwILSvqFjXFulfYBwuXDIjm8RPdkyY8WVDmzMn2HPr7v/97rF+/Hq+99houv/xyPP7443jooYcwd+5cAMBTTz2FsrIyPPPMM7jnnnvQ3d2NJ598Eps3b8aMGTMAAE8//TSqqqqwe/du3HjjjZq+VvCty3UlBpKvY/aYFn/nql9EHPh1dM1H7ASJ6lKNk6BxEg/ysaAWk6hwi5TzE0oumDW2HUC/74Bg7rW/QTOmYnh1d+Y4gIyZf17pXqCuP6FXWHVZjt6xwtwvDYX60lAuE6cjaPMvpmLr+KswvLobJw+WAADm3XI+iVsubv4XJr59UM6cOYN/+Zd/waeffopp06bh4MGD6OjowKxZszLnFBUV4dprr8XevXtxzz33oLW1FZ999lnWOZWVlaitrcXevXttBUpvby96e3szf/f09Lg+nwgbK37/At8dgB+R4ifZmmrnSB+VftJeDlY+OHJdPzF+sK+lHnENOxEhtwWnTTbl8M8wnA7jYtehGuw6VIN35vwQ9T9ZfS6a503sOlSDrXuuyvIdEN971th2xyyixowuoF1vOLJVDhQzp2ovSbXDf1ISENYtasLmrumYV7r3nHNsLI+VWDwLlH379mHatGn44x//iOHDh2Pbtm24/PLLsXdvfyMsKyvLOr+srAy/+93vAAAdHR0oLCxEaWnpgHM6Ojps77lq1So8/PDDys9oF06mA/NMzE7AyOfZJYez+r8dbvskpIUgOTrsyiyKstRxDyfnYFmIi/+7lZOViFHZDVjGyvonXyNJwsRPviHZQVu2hExZ/WZGeDT/YiqKTZ+z+97m5Fub6zbgWzXn06IHnVSpYmVdMSdbC4MorBYqdd9tKQiIpu4Kp9yt46+ib50FngVKTU0N3n77bRw/fhzPPfcc7rjjDuzZsydzvKAg27fdMIwB75lxO2f58uVYvPh8I+7p6UFVlXuWvbAaulNelLCSrvkJkU4TfpfIrHKqqIbLes1QG4T+zzsnrDPfyy4022ovDr/5VHLNxypI2gA5YZUIM35nzg/Rtlb9/sKEL4eeyo63w6u7YVQDXSgNnPfESWzYLePk8vKO/Fv6zcFTP3s1cO4z8v48YZIkEZ80PAuUwsJCfP7znwcATJkyBa+//jrWrl2LZcv6G35HRwcqKioy53d2dmasKuXl5ejr60NXV1eWFaWzsxPTp9s7CxUVFaGoqMjTc9o1bi/739h1vk6dWhRmU4qTflStULJ4UVlWc8oP4neGq2OPH6fPB+nkVD5rFW2WTx2rU53oshiw6sYexjsBdpftXw5akhEq8/Dt/gy06Leq3HqwEUE2arPbhycqR1g74tgRV2D+ja2eJUmOy0TDXjyGYaC3txfV1dUoLy9Hc3Nz5lhfXx/27NmTER+TJ0/GkCFDss45duwY9u/f7yhQdGHVMOz2v8m1mWGSCLuR+3WSla0lTp93Sl4mrAzy3iBW93E67nReENO0V3IxkkY3LTuXZV7AwDIJayM5gfBbEctHWZaUGV049FV/boJmUSJep2ovUbKwhImXPceC4tZe5GV4K7eANPvp6ED81n7L0VMm2QcffBANDQ2oqqrCiRMnsGXLFjzyyCPYuXMnZs6cidWrV2PVqlXYsGEDJkyYgJUrV6KlpQXt7e0oLu5fpf3ud7+Lf/u3f8PGjRsxcuRILF26FB9//DFaW1sxaNAgpefwkonOTrG7FZiqQGEFTjZu+/hEjRxh47Ss4mYRko+HKaad9urIV3Fjl0MjTN+ES9c9BiC7Xmxasgabu6Zj656rXHOn6I4CjCqaR3fdlXeqN5eFn2VccS4nrN6x89UMLZPs73//e8ybNw/Hjh1DSUkJJk2alBEnAPDAAw/g1KlT+N73vpdJ1LZr166MOAGApqYmDB48GLfddlsmUdvGjRuVxYlXrPxFxIzJj7gwLw3lK/kebujWgXnFq+DxK07Mx8OqhyoJ2KJao48TIUzCzp/xwcIlUtqB/vdEdMeu6hp8Mh+Omw/qXvbN1bYvB0h4XVaNa9KSBuT+5IX//V3lz+X9XjwCKzVnJ1D8+J7oIm4HWN3CxC0XQ9g4dTg6BYr5mm5758iYU/Wr1IGwBbJKhuN8FCd26dKjEmPypqYtO5dh0ovfB3Au2uex84EC5v2UVPuNpIfo66jXbmJSxV/N6vx8n5Tqxs51wsv4zbgmCTGYxrFs48UPISzE9xfr1Srnq5xnDl9UydOgC9XOKEiiLqfj5gyvcsZWIUzMgkUMOGYfFvF3nOIkiYOaDmQriZVvTlRiTM7QWz97NQp2l6Jgdylufb7RMrGeyqzfqi5FFcbslSj8U/z0rxQn8UCBcg6Rfll2mIuKJJgV3ToUKzHi5mwnCx7zNayiDOKOMDBj54yrmmq/tL1vQPp4EQHkZRYnBpfS9r7YrRZJqKthEnf5mhF1RfigmNPsy+fIqDhoB0Fla4Mg7VmHSPHjTG/3GfoaBsfPuJpqgeI3S6wu7Bq5jtmNqnXDCuHpbxeq6HR9L/d0sqroREVQ2DnNmWed5s942R/I6vNmAeP0HaISzip1MmkDeVCSFtFk9VsXv38BCnar50cx58OJg6AixaswqJ+92rI9qrRVu/PlaxP/+Pk9fae6z1Vk86lXa0muVNCgfiR2fiPmrJNWyNYQL5YRq8/pxC6ZmRVyxIaOzt3LNczPKKeNj5N8t5wkFbm/8psY0JwHSAduz6IzK61VJJlVm7Drn3UKtLQESfhFXh5uswlE+epcdYfz1AkUgZeEbVEh+x54aVQ6stdadShBHFzF9VQ/K+dosPuc1fOEYYES2STDmnn6ym4JdoppQ/69dWeo9lu3k7qpadT9NtvkQOycYoP8NqkTKHaFFYcwMUd8+G38QZ/dbjMxFb8Uq8/puLcVVu97mRWqlGvb2vs9z1a9oBpVZHaaNpuaxXtRdpBJsOSkCR2DoFx3gkYI+rW+6LSKRiGMkjBZzSXk3DPAwN8oSHnmvUDJhUrmlLnUD7qXSKw6GPGeldVFft+rT4rXZ7fL5CrjZc1eZ31R+S29dLhWe42IpUrO6PKfoLNRnYO7ynLpjiPr0DCmf3veXMqr5KWM2e7O41S//NbbvHWSjTKdsk7iMJ3K6a/tsIvgUV2KUX0OL8iK3ckBTkUohOEA7Ybdb+1XYOnCKsSW1pPocauTKpE04jxV7CYUVm3N6jk4UKcPp9886Dic0wLlq3ObLL983MIkaeGyqgihIosPO/Ei7+9hh2o5WN3DbY8bwD680gqnzjysHaidcMtoSQgwsPM3/+22xOk11FZladfpfuZ2tOPIOuV7eyHuPp6oEVSw5t0STxIqbpimTLt15LBNqFYRPUGEmJsTrdk/xy0k2A271PJexYmdJUkFv1EYZuLONkziRdTZ0vY+fDL/U8wa+ybeWPZnmeNuFji3+uNUp71YY0S7Gqr8CZIPWG0v45ecFyg6CyNuVJxk7Y7Fsb4bRKR4FVQqg7JT5ymLE/laYdUZswlcl6BguC+RmTW2HfNK96K5ZioA+3w+dpZGXfXSTfCEYVVOUn9vtyltWrD77uYJYFdNIYoP/FH5ujm9xCOTpMoq8NooOSvOxutyjVVyJvGvbOrWWc4qS1067+dmstfVDoTfCX1PkokYELbuuQrz2r6NE+PPYuZdr3nesdev4KVQpr+NCub+yGtfmNMWlO1b+zvPJIoTwJ9VI8hg5pT51e/13K4TVpp6tw5Q1fck30UfO8l0058CfxgKai7AruoaFJiOW4n2OMRFrkTwqOLmG0Sykcunf7PAFUqfy2mBAiRXnISJypKBrmyy5oicMH1d/OQziTJ8MYnOz2k3LZN+Stv7gPZhANTSuOsUKfk0CfDSluoWNYWa0DEfCNo35fQSj5eUuQIvDdNqF9AkoOqH4nc/Hj977KiEKuvA/FuIe1rtcxMlVt89ih2qKU7SS5BZvNVyp0pf4XVPm1wiH7c9SRr1s1cz1b0uzJEkYaHTYc1qycVryvmg9/ey944KuqJfvOIlTFpGNf1+GNYfCpZ0ITshmpP2CcwWV7v+zK0uCstLnBOBMBBtRuy/JVDxv7Jytmcb1EdqBIqO7cVldDs+6hZBqkLB76Z+cRFGh2iXKVcHfsM5vcJOMb3IqcStRIpTqnvVCDPdWWidrqeSGl11408V5A3uvJAP4ixsUh9mrEKQyhzGZnRW+O04BG7ZW512KFYdjFVm/F5T22djbYnwamFS9dFxOmYuF6+hkuy8SFyEvfygI2ze7hqqGyTqnNTJ11GxmnB5JzryTqA4iRGnSm2eZcTtd+K1A1AZQN024YvCCVR+BierxanaSwb8DjqWwrxsjKgrf0OQXaGdMOcAohWFOGH2OzFvJCgf04WX61rVXydrig6RZKZuUZPrJpzmY+ZnY1s8T9A8ZTntJGuF3R4R4m877BwazeutYc2MW3YuC1yp41x+sXLIlVPlq4gg+VxxPT9l7rasYvVcbnhJ6e90/zBFIGd26cZL/+HmvB00OMDp8+Y2Lfd9UdVhnRNQihE1/JRT3llQgGxLyYnxZzG8uhsFu0t9XUf+Nwx0ZsIduv9o1t4XYidRp2Ufcaxl57LM+eIzdtE8Xv01xHeUr6+KuF+YDsuyhcOL46oXQRiFMyw7SuIX1T4ujL7QygIhY1evnawzureDcNu1mG0vHPJSoAQhjAZoJULczIRB7iFQXaIQn9Wdktpu2UQ+riJ25PN0/z7mZ3BbBrP6jHhPPjdMSwnFCbHDi9OrisXYL7rFgeqeWVaZclWTOgIDo3Ks+mm2Nf+07FyGa2Y8rHx+3i3xCERFK37/Al/WEyv8zN6jqMxuyyZOx+xMq34HWKvdSxvGLLS0nnixxKiklPeDm1OxH+xElo5nZ+dIoiYJuaBkwSDagNvyr9ccRCqRRSRa8tKC4qUxta29Pyv+XXVTOh2hbeZK7yfayK7hNIxZCITgk+K2VOG0jPPR16sz/9fh6Opm1fB6TbPlwy6XTNyh1oT4wSmHidlXzyrnSdQ4+aXYHbNy+g0irpjfJF7ywoLiN1unCCkzb4qmuhmdG2Gb4q1m5CIXgsogag7p05V11ureKs/jdeA3WyWC7Kxs9XkVR9oo09871R06yBJzojEZFWdYK8ESt0gB/LUxp81Brf5uW3u/p8gdEg05L1CCpBJ3atDimn7Vt1tlt0pTrZLtMSy8OLA6iRL5X/N7dimy5XI2+4Oo+Ie4iRQvETh2qPimBLmOGyodpBCnJJ3I1gM/iSVloWIe3OMSKQ1jFlpGADrVdS/Rm/L3snLO1RFdSfyT8wJFNQxVWEnM57qJlChRbQhO/gx+GpPX6Bor0aAjbbvoWD76erWrUHGybMj+KnbWERXM17G7RlCRExSrKIgk1WsSDV53AHc6HvYeUm6IqEJzu3cT4C07lyklWwOQyXci+kyzRZnET077oBz/fCEGFRVmiZSWncuyOmerymoXsirONXfuXvefCGL2b1t7f1bqZSu/FF0DnWjsQz18xikjrRXC78Sq85PXuVW8763wkmROxz5DceC1s4zboZHEg46omzC39NCFk0+KOC6HRVh9BycRQ3HiD7ffxQ8FhmEYga4QAz09PSgpKcEV96zEoKILAZxXwyqIgnRKcWw1Aw3quOoFq2e0eg67e6ma+oMM2iqb5HlFFiul7X3K2XGj9AVRjT4yP5uqX5AXxzyr37mrplB5FknyBx3Le1bOpap5RnRufhl24jaKEL14+Z1On/4j/s+/r0B3dzdGjBjheG5OL/H8f7/ts1X+ZuoWNTkm27HaydKpkw97lipMj3adg8CuYqg0QB3ixK0cvAyUskVFVQyGEXocFKvlJy9Oy17Xvs2maYoTooqXjK/mYzLyUqqu9ug06NE/JFmI38HsDxg0PUROL/HIeHECs8Mq9FcOQ9bh0S72ehCoNDDz8oj5GbwmD9I101FZpzYLP7eZmXgvaaJDxmveGa8RVV6wWj8nJErMSQp1oDojp1N48lCN7FQhLwSKyvKO+Rzxf/MA6jTYB8myaPYrkTMWelmm8XJfp4ynKgLALSOqXdSR2/u5IEIIyTXC2vDPDS+Dj45JnmpWWafPk/gY+u4x5XNzXqB4qWx2FhJxTGDe0dJPgxrgw+KQhE2c65YcyakDshM6ujOiysfkjsmcOtvs+Co/c64IEy8p68NOb2+F2/4ghKigK729G7qWxf2IE7aR+DDvcXbq8gpAUaPktEDZvlXvWrudALDzbHeyfAgvcq+N0qmDsDoW12Av+6B4XafOFZz217EKrzZvvKg6s/TbeYq6xf1BCOBPXERpbZH9BaO8L9tGdNhN1LxuxCrIaYGiE/OSj1OltoqScFP1fn1k3D4XdRSLlXOsnYBTXUZSPTcu3BqVnCbfai8iJ4J0nsI/imHFBAi+9OEVv0JDhzhJQoZbcp6GMQsd01X49XvM6SieqDAneLNqGFYDjcqasNvgojr4eB0Y/XCq9hJLr/8T48/6vqZOr385gZtbinovOGWnFfeRxYnYamDo/qOO0QZBZ3b1s1cP2H2VECAcwZ+ETQNlkvQsxJ4gdTE1AkU1JM1OTFhtu62Kaqpl3egULUKcWDH32t/AmNGV+Vt0ZFEmNvObit5tt2eVc1t2LrMsa/N7YQ0a4hkIiZKoLBhOYdBusF0kA79jQWoEig7MeQFkkeImWNxESlRbfQexMNg9565DNdhct0HXI3pG/h5W5RzEmmL3WafrWYlhK5+UoFYPziCJGVHv3NqECmZBLbf/KOueXToCt2egOMl9cjqTrEomOjfkjK1uzq/y+QKz34FdBljV2YbftVW7Z5b32bEaVIPsIyN/R2NGFwp2lw44p/K5g0rXd7qfSjZZc+SQl0RvVtdzWjM1Z4YVf7tZrHSmgpbrGROzETOZbSzO1eOgmV/jCmF2wu2ZKFCixWoSZlXHvGSSpZPsOVTEiZkgS0a6zgf6G2rdoiZfA5VqeKz5HHOlsxIn8nlBhJCKsPK766rdZodedk6WzxWC0EqsWKXv9hOBI+fUIcQr5joc1x5TQff9icrqTPwTtG55WuJZtWoVrrzyShQXF2P06NG45ZZb0N7ennXOnXfeiYKCgqzX1KlTs87p7e3FggULMGrUKAwbNgw333wzjhw5EuiLWFG3qCkTlSO257bbUlsX5twpXgcSP6mBhenV6vvtOLIutA7IqoM4Mf5sltOs+P5OSyVWS05uDnlmC4tdman+tkHKSNzb667QXhH1meKEeGHo/qOOSyLmtmOVotzL9hOqyJOKJFlmiD92HFmHlp3LbMcu0ad7SQ/iaYln9uzZ+MY3voErr7wSp0+fxkMPPYR9+/bh3XffxbBhwwD0C5Tf//732LDhvE9CYWEhRo4cmfn7u9/9Ll588UVs3LgRF198MZYsWYJPPvkEra2tGDRokOtzyEs8N9+2fsBxvxvs6cS84ZuVMDKf52alUMFpicguJbUXMSQsLmKXYjPGjC7MGtuOXYdqcPJgCYrf79fAXjugQ1/tN+6N3X46856dSdfLZnxBUSkrr0s9KvXRavNKAZd4iBX1s1dntQ3RN7htbgn474tUl4LcElLqECy0osSH04anXlw0PC3x7Ny5M+vvDRs2YPTo0WhtbcVXvvKVzPtFRUUoLy+3vEZ3dzeefPJJbN68GTNmzAAAPP3006iqqsLu3btx4403Kj/PV+c2YfDgC718BXTVFEaS2Mp8fbv76Y7sUBEn4m+VTserg+jJgyWYV7cX80r3YvPY6diKqzIiBbBPiW9G/oxAWGZKs412sZmo40LFQZCQgcuR6vl8ZLy0L1VhoZI1O4hIoTiJFzEpD/o7BIri6e7uBoAs6wgAtLS0YPTo0bjsssvwne98B52dnZljra2t+OyzzzBr1qzMe5WVlaitrcXevXuDPE7eEVS8qC4X+Q3RtaL4/Qswr+3bmNf2bew6VINttzwOY0bXAIdat9wpVmbf4vcvsBQuKugc0J3KRWdot1jSMVtPVEUeSTdWdVFlo0uv7d5vfhTznl0686wwL1C8OJX/17/wgPJ1fDvJGoaBxYsX45prrkFtbW3m/YaGBvzlX/4lxo0bh4MHD+J//I//geuvvx6tra0oKipCR0cHCgsLUVqa7VRZVlaGjo4Oy3v19vait7c383dPT4/jszmlqk9ah+7mpOp3+cKLuNFtxZEdZjePnd6/1HPu79L2PhwaX+Lrul5/OzmyR+fvbldeKuLEbXnHaSnHDJd2iBe87MKt6jxvzijtt53p7pdpQUkGVkLldBR78dx3331455138Oqrr2a9f/vtt2f+X1tbiylTpmDcuHH41a9+hblz59pezzAMFBQUWB5btWoVHn74Yb+POqDy50Ll9ZMC3qvQCCJMrDoUq9nP1j39Szz9FpP+Dmzs9tMDZktOmwsGIcqU2A1jFrqKFKe6ZxYnIpW9+T1CvOLmc+Y1mkdX9E8Y7TMX+neihi97+YIFC/DCCy/g17/+NcaMGeN4bkVFBcaNG4f33nsPAFBeXo6+vj50dXVlndfZ2YmysjLLayxfvhzd3d2Z1+HDh7OOW4V+6kyhHhZeoj68OrImBbEkI/61c2AWHZXV0k4Qs2+UiaVO1V7i27RsZTkxh45TnBCv7DiyLhNdIVDtH9wSHIrooCBQnOQfOqMZPQkUwzBw3333YevWrXj55ZdRXW0dySHz8ccf4/Dhw6ioqAAATJ48GUOGDEFzc3PmnGPHjmH//v2YPn265TWKioowYsSIrJfASpBYhczJ/w87HFQXSRdYZtw6G1l8mMWIk5AQxw59dXAmukdGJRW27o5QpwiUxYkQIWZhQnFCgqI6eKv6rYk2oDNM2OyLojqx0J0ugninYczCzD5kuvAkUObPn4+nn34azzzzDIqLi9HR0YGOjg6cOnUKAHDy5EksXboU//f//l98+OGHaGlpwZw5czBq1CjceuutAICSkhLcfffdWLJkCf793/8db731Fv7mb/4GEydOzET1qDL0XcWFLAwc7JMiUuw85u3MsCrryDqEjViD9pOTJQhukT1+nWRlglhT5I7ZnL9Ft6CkKCG6cRrErSL93M4R6LZQ+k28SPILTz4o69f35xypr6/Pen/Dhg248847MWjQIOzbtw+bNm3C8ePHUVFRgeuuuw7PPvssiouLM+c3NTVh8ODBuO2223Dq1CnccMMN2Lhxo1IOFJ2YRYqT/4CVoAkSsWEnkIJmddU1WKp+3vq8/mcQndaJ8WdthYWXtWyz17/V+14Jms1SIH+HoOF1FCYkbOycYGWn/DgsuDq3+SDRIcYz3ZmJPQkUt5xuQ4cOxUsvveR6nQsvvBA//vGP8eMf/9jL7UPHzsmxfvZqwKLBqjhFRkEU6aqtoons9rExYxYnsmOcLkc7QK1zMzvlmZ1z49hzxMoZlpCkYO5f7BI+xgHFSbJQ2SLk9Fn1vpW7GQekYczCzMvpHKe/vWLlvGblKGz3uaBp3eXrOT2TnbNrXGZbeV3bzSTtlJdBd84GQqLEKR25wOzPZ7fUG2YqfJIbuI1nQSxx3CzQhFzYwjrSsnOZkqjwIlL8YrfTrlksqDi5+a04bjv9ysesLBTmpRo/+U3M6MpgqXo/u2N+s7xyWYfExY4j63z1T247nIctWGg9STY6lghpQXFANNqkONTKmC0Zbg6tOkyxKmnvVe4TpBMzD/4z73otk6lWRRhYCSSddNUUUmyQnEPXUrXs3BqWlZGWy+TiJ7DCCQoUF5IiTtyie+QOJiznNpWMt1ZY5Tfxukzidp55F2Wnz4fdwdGfhCSdsH3nvC7nyn2BCBm2Cx3mUlJ64BJPHiA6GytTrSxsdDq0uUUA2OF3CcQ8MxM0/2IqCgDgnDiJMnMsIfmA7kmYWxuXRYfKzt5WO7+TdEALSo4Qd9K2oI618vNbWVN0orrUQ1MxSTNmK0oUEYlWu7y37FymlIzQ/FkKlvyHAiVHURUMbt73Ou9tFyFktzwlRMqJ8WfxyfxPlUWDlYVEVxI3QtKEX1Hi1v9YRfXocGqlY2xycIte1QF79DzAKvLIiiAOTEEtOLJIMVtTzFj5p8jvqUTx6ErARkhaCDOvE4VFfhGVbyYFSgyIDbyC4CQYwjbV6hIrgrHbT2PkT4ZZnqtiVfESYhwFjOIh5DwUJ/lFlIEjFCgxolNIeEnbrxMdyd8EOlPWh3EPQvIdFSuKl+VlQoJAgRIDYSlQs9OYlRNcFMLFatMxqx2mrTowL9YQ8fLq7BqmSKH1hOQaVn2Crr4iDJEiX5OOstESddoNhhnnKKqZYKNc7rHar8NttmUX/uwlf4L5M2bBYvUsfsOdCclH5BQFunxRwtqnZ8eRdaifvZrtN2LiyAlGC0rM6BQQcZtU/UYLyRaVofuPahEn8jXcNjgkhES3LKwTWizzGwqUHMBqJmI30DqFfon3o1rq8eub4lVEmHcnthI4Uey6ys6SpIEkiPyWncvoT5YCuMQTE7IZ1e9mXV7vZ3Vfr593Q3fn5ZYyOymdlCxO6hY1UayQvED0EXL79yL2w4zgYXRQdMS15QstKDlA0DT1OiqXH4uLX7Fi/q5exImXdekwtojnPjwkl8nFZR4SLnHuR0cLSkJws6J4HeydruXXCc7qM26VNw5zsNN+PHYOuUEc7mgtIfmE3BdZWXlVHeAJCQotKDESV+4ScW8/ylh8Lsw0x15ETZBQZR1QnJB8xC70mJAooUBJOV5FRlSd1ND9Rx2tGuKYPItT3cnYnDbf7w7ISfF/ISQO4nKWrZ+9OvMi4RLn8g5AgRI7bhVAV5ZWnViJFN3PKK4XZMOxoMng7BA7r9o9C60qJB9QsaKI9hWlWJHbHYVKfkMflIRiHvDtdgROAmEJKFlIqAgTL8LDKqmbmy+KivCgOCFpI6gTvx9EfyDEifiXkT36iNt6AlCgJAKz02qUjT2J6azF93frbOyWZuyy25o/C1iLIDkSh4KDkGxkh1mR0dpOpNTPXu1JNMjXVfmclVChSMkfuMSTQyTJemK1z48uhu4/GmsnI8KPKU4IcUfHhEp2uvdzPdFfdNUUom5RE8P9NZAEp2gKlISx48g6WyGSRF+UMLLSun1P0RkJ64eO8pIFUcvOZZyFEaKImx9KVD4i5jZLkZL7UKAkECeRovMeSVDIVqh8d7N1w/wZLz47VmKEjncEYD1wQ86JEnQCpXsCxii73Ic+KCRR6NxFVayL2zm+WnVgYpfU0sBPQfIF+jWEi980+lY4LcuaLSpcwk0+tKAklKRaN8LA7+xLDBp2n1e5ptXAw5kXEXC5zxqr/klYU6wmBE4RIVbtN4oyZ4hy8qFASQhWDdhqeUKHGTQp4sfcMel4LqcwYbPw6KopDKWD4to3SRMikkdgJ/DN2afjFAh21lOSLAoMwzDifgiv9PT0oKSkBDMq7sHgC/zvoZJEzIO0VaNx8qswdxZu19eJaty8VfbXIDMmUUZuOx+b0XFvK4RAoQmZpIWGMQtdJ09OEy5zmHJYFhSnnca5lGeN7nwop8/2Yfexf0J3dzdGjBjheC4tKAkhioyxSbGciDT2QTboM1/PCbf76GyAnIWRNCL6FtV2bdXfiXYcpkhwmjRQnAwk7mRtFCgJQTROc4UwD75u1pNcQH5Op5TxKsiCQOQvEYiysssQK849VXuJNmGhS3QRkmu07Fw2oA3KOPVPSdzSg8QPBUrCMDdSs9XDrhHbvR9WrhKvmDsgp44sCLLYUe3wdFpzCEkzwqlYnkiZ2xaFCFGFAiVHcBpEncRJlOw4si4jRFRmRLpMqlap8c1lpSKGgjq30jmWkH52HFlnmVAx7okSyS2YByWBmPfmccLOKTaujsBuTw75GXV2VHYiR+4UvczYdDnK0UGWEOv22bJzmeOSql1gAMVN+qAFJYG4DahmC0UY4bpBcNusL8rncytLs6UlyFIPRQkhash756gSt8Nm2ghS3rqW8ShQEkr97NWeKoiYYcQtTsQyT9yIHVbdCMsXhhDijmrbS0KfQtTRtVULBUqCsNtPxuxbYTXriFuYOOHFL0UHLTuX+b5PULFCKwohanhdSqVISR7ybxJG/04fFPQP7kk1H5r9UeQBlHH7aoiEbE7otKJQpBCihtyHMYdQslAZF736+HmFFhR4c0qNg4YxC7OWIrg/iBrCgTjqJRxG8xDiHXO/xtD/3EPX0o6AAuUcSbCgiJ13rRqmaLwUJt6xKjOrhsR8KITEjxyeLNqj7oGP6CPM3ybvBQrXLaPHSURF1dGYnWTlqIGumsIB9UKnMLFa4qH5mhB1zDlUdPTj5s0KiT/MPoXyfkpW5wXBk0BZtWoVrrzyShQXF2P06NG45ZZb0N7ennWOYRhYsWIFKisrMXToUNTX1+PAgQNZ5/T29mLBggUYNWoUhg0bhptvvhlHjhzx/PCnLq+wP+axcOIUMk7P6vW5kjQQJsEaIS/duaXi1o281JOEsiAkF9ExqamfvZqTVR/I/afTOCV8UeTUFzrw5CS7Z88ezJ8/H1deeSVOnz6Nhx56CLNmzcK7776LYcOGAQAeffRRrFmzBhs3bsRll12GH/3oR5g5cyba29tRXFwMAGhsbMSLL76ILVu24OKLL8aSJUtw0003obW1FYMGDfL0BawSlakUTlIqq+qzWvnJmMVIV00hSrU+nX9adi5LpC+GSBJldu6S/Xt0k8RyICTpiLYYxOqRaXvnJggtO9cN6De5bO6M1yAStz2XTp/+I3BM7VqeBMrOnTuz/t6wYQNGjx6N1tZWfOUrX4FhGHj88cfx0EMPYe7cuQCAp556CmVlZXjmmWdwzz33oLu7G08++SQ2b96MGTNmAACefvppVFVVYffu3bjxxhu9PNIArApHiBj5X5k41zdVvaCFSBH/tyJp+TzMzxNlOX/09WpUPnfQ8ljYnucCeamHkT2E+MNvW7WbGFhF9Ylz5XYqhAwFTPZvYFV+KslF/RAozLi7uxsAMHLkSADAwYMH0dHRgVmzZmXOKSoqwrXXXou9e/finnvuQWtrKz777LOscyorK1FbW4u9e/daCpTe3l709vZm/u7p6ck67tViYh6gZCuMXer4sDA/u5PISIrVRxWxrCF/p6iipUrb+2zvJd6XZ1JhdUJ1i5ooTgjRgJdlUlWrZVdNYda5dYuazjvnclk2g9v2BE4EGbd8O8kahoHFixfjmmuuQW1tLQCgo6MDAFBWVpZ1bllZWeZYR0cHCgsLUVpaanuOmVWrVqGkpCTzqqqq8vvYSoQtBJySlpkjSRhZQghJMy07l2X6QB0+dqqWZnFe3aImLtMi+3ewGpPCGKt8C5T77rsP77zzDv75n/95wLGCgoKsvw3DGPCeGadzli9fju7u7szr8OHDfh87Cz++KzqQf0jxf/k9oeBzXZjIDqlxbPal0pmFHbotW0/Y0RHiD9GOrCweXtpU3aKmAf2tjOivzP+SePC1xLNgwQK88MILeOWVVzBmzJjM++Xl5QD6rSQVFecjbDo7OzNWlfLycvT19aGrqyvLitLZ2Ynp06db3q+oqAhFRUV+HtUSK0/jqMRJ2PdJ0nqpbBYU4qRhzELLFP5h4bQ7cZRlRWFCiD7EUkyp9HdYiAkjl2v7haLo080CLwwx58mCYhgG7rvvPmzduhUvv/wyqqurs45XV1ejvLwczc3Nmff6+vqwZ8+ejPiYPHkyhgwZknXOsWPHsH//fluBopO4w4mB8z+knYXEznxGcgsxu6M4IUQfcv/ZVVOIlp3LtAkHt36WbbmfqMYjTxaU+fPn45lnnsG//uu/ori4OOMzUlJSgqFDh6KgoACNjY1YuXIlJkyYgAkTJmDlypW46KKL8M1vfjNz7t13340lS5bg4osvxsiRI7F06VJMnDgxE9WTBHQ7y3oVRrIaVa0MSbKeCMzPlGuOvoSQZGBeLo0SOastLSnWqOx55hVPFpT169eju7sb9fX1qKioyLyeffbZzDkPPPAAGhsb8b3vfQ9TpkzB0aNHsWvXrkwOFABoamrCLbfcgttuuw1f/vKXcdFFF+HFF1/0nAMlCDoVYBi7ONJiEowkiLW2tfdnXoQQfcTdptJsSREJL63QPW4VGIZhaL1iBPT09KCkpARfvmEFBg++0PPnrcJfrVCxoJjDl1XOk5/BDVVFmoQBWYUoQnut7hd3+XDWRUj46BIOssXE6m8gfpEUN6Ks5bJRGa9On/4j/s+/r0B3dzdGjBjheG7e7cXj5KFtjpQxvyeQc6KY8WopsdunwI0oU7LnO2bPf134iSAghCQflQkk2/N5nMYrc6Tq8c+rW1lyXqDIu/yanaXMobxWWBWsWYSYo37s8phYRQc5iR078lmYCEtGVBYN+T46OxRzqCMhJH+xGz/S3Pbl5euw0mIEyiQbN9u3upvYghSabPVQFRhumyWJLLZmEeLFRJbrhLXfjYrJNa6llrSbgwnJV6xESlrbu9UKRRByWqDY0bb2fsv1MS+EtV+L3X5AQYjbvyIpyB1FWB1EmmdMhBA1rPb2yVfE+GPuG63GpfrZq/HC/56Pkn96UOnaOb/E40ZXTSHa1t6fWcpR9e3wKyBUxJCTX4pXMUVx4h2/IoPihBBCrBFirG3t/dqSY+atQDGHeOoeyM3p6uV/5XPcECLFLu/J0P1HBwgZq/fSjnmm4iQmdM9q0jBLIoQQN3T3hXkrUKwQmx3pdOaxEide72EWKU5Ou7IwofUkGzuRIixoAj8bjtkJHooTQggJh1QJFJ3ocGa1WkZKg5NslAhhYd6I0c81vB4jhERHEttiEp8pl0i1QAlqSZHFhFsMuB9ka4l5WSfqnYFzBTuLhvA90ilOgsLOixBC7EmVQDGb9nVaK/yKEKc0+XbOtBQnzphFipxNNikWKooTQvSR5PaU5GdLOqkSKGHkGlG5lpf7CTFi5whLceIdKyuWkx+K112I2QERQpxgH+GPVAkUXclj/NxTxm3Zxy5Ch+IkGHWLmkLZcdN8DxVxQ+daQvTAwT9/SZVASQp+9tmhOPGGkwAQ2yJY4SfCx+5e7DgJIcQ/Ob2bscpuiDLy4KN7ucdprx+BGBTrZ6+2vT+tJ3rxEx4s/z4q0BpCSDzk2iSAfYW38Tu1FpQodgu2u77X/CUUJ7mB192Nna6Rax0vIX6pn73al+UyF2G79kZqBEocDUD2NZFFid2zMDusfuKYsfgVGebz2ZmRtOAn6WSutg9OQtRJjUARDSCMbaH9Xs/OwiKHHtN6Eg5OnYNdOLLVe04CiOZcQtxJc0ZsihRnUiNQzIQhVFQQ1hMx2MlhxWbC2E2ZnMfNtOyWiE+3OHH7jDzzYsdGSH7AtmxPqgRKXEq9q6bQdjAUIsScsM0p0oR4w27gVxGocmi6F0Hrt9ORN7h0ux6FCiH5AduyNakSKMDAmXAcVhQSPU4O0VbC0UocOl1DZ+ciX6tuUZOr/xQ7N5JG8nEJVUxk5VeaSZ1AsRp4dO7Jo3pcZIo1L+1wWSd67CxV4v3S9r4BnaHdbx5Gp6laPylSCMk/0ixSUidQAPvsrlFgl8Jevj+XdvRj5fha2t7nWtbyUpt8DfPnVJdlvOBH7NCaQtJCmup5WkVKKgWK3WAVBJFXxZxfRf4/w4jjRcdmgS07l4VuWpb9UPzeK02dN0knSdn40y+5/vxRkEqBAui1UlhZRbwmgmNljYYodjQWAkOXkPF7LTeRQhFDSDyopjGQSaNPSmoFCpBdIeIKOzbD5Z3wiUKkhIEuwcNlIJIPxGGR1tFvmC3sVpNZ83eT/06TUBkc9wPETRw7HJP4CUMIRhFV0Lb2fk/iQpwrns38NyG5TFw71Id1PzvRldbgidQLlCDIlelU7SUYuv+o54pkPr9+9mpaUSJGDNpmB9h8/C0oTEi+sOPIusgtCZzIRgsFigbSqm7zja6aQpS292V1evL/802sEJLrDN1/FB99vTruxwiMleWEKShS7oMiDzhuTq0qOUvkv4VPi3wPp/NJfAirQi76pcicGH8WJ8afjfsxCImMHUfWZdptVO1X930Y3WkPLSgKyBXIahlHfk/8X1RiOxMkxUmyMC99JNUJrX72apTC2dRs9jchJA3kgh+KmzOsGbtxIi1LTakXKHZ+I04Vx6/ipVLOHayWc3T4pDSMWRhoh2r5/mYBUvy+vUG0blET/U9IXtKyc1lmQpEkkSI78Oa6dTYuUr3EY0dQITF0/1HLgUxsCEjrSW5i/k29hPuJc3X+9laCQ37PfJwWFUL0YhcmLAhLnKRF8KTegqILs6hpGLMQQ8/9n4IkP7ESLHaWF7vPBMXNKuI1LJmQXMWr9cRsAfGzbBOXdSQtDvu0oJxDCIwwlmFUr8kloNzHyaKShE6FYoXkI37alhAX4uUmTsyWEnG++XPyeWmxdIRF6gWK8AcQVo44rR3i3g1jFsb2DMQ/Tp1knOKEvickDTiJAqclGD/o8nVxGm/sjiVhohMVXOKBvaNsXM+RhGch/rDqPJLQoVCkkHxHdpY14yQogogNc9r6sJ10k9CXREnqLSgyVrlOdF/b7vpc3iGEED34XVpxssKoiA/V+7q5FJgnqUnZKy5qaEE5R1wCQSWDIEkXYonPSzhyw5iFOFV7SepmWIToRPYrkS0iZuHhVYiYEVujEGdoQYkJO2sKKy0R0BeJEP+E4aCqq392uo6V9QRI5zItLSgxoVNBi3VXzp5zFzsx4jVEOR83OCREFbMfShC/EKsQYi/+gZxsBsezBeWVV17BnDlzUFlZiYKCAjz//PNZx++8804UFBRkvaZOnZp1Tm9vLxYsWIBRo0Zh2LBhuPnmm3HkyJFAX8QvccxSw6q4nHHnJk6/Gzs5QrxhFuhuydTM51r9P05K2/tSaT0BfAiUTz/9FHV1dXjiiSdsz5k9ezaOHTuWeW3fvj3reGNjI7Zt24YtW7bg1VdfxcmTJ3HTTTfhzJkz3r9BAOIc0N0GHtVnk2cLjP7JT+wyE1udRwixx0106HJE9bKEY/VZ8fm0OscKPC/xNDQ0oKGhwfGcoqIilJeXWx7r7u7Gk08+ic2bN2PGjBkAgKeffhpVVVXYvXs3brzxRq+PREhO4ccJVgU63hHijuqyj52DrBmrXe51tcWkWHHiIhQn2ZaWFowePRqXXXYZvvOd76CzszNzrLW1FZ999hlmzZqVea+yshK1tbXYu3ev5fV6e3vR09OT9SIkFwnTaic6xIYxC7ncR1KNmzhQyfSquiwkkIVJ0M1maQ3vR7tAaWhowC9/+Uu8/PLLeOyxx/D666/j+uuvR29vLwCgo6MDhYWFKC0tzfpcWVkZOjo6LK+5atUqlJSUZF5VVVW6HztWgmSx5Yw5dzCLBgoJQsJhx5F1Sn2j3V46Xi0Xou9mf6wX7QLl9ttvx9e+9jXU1tZizpw52LFjB/7rv/4Lv/rVrxw/ZxgGCgoKLI8tX74c3d3dmdfhw4d1P3bsmMWJm/mfg1u64G9NiDfc+lCnPXPi9vug0Okn9DwoFRUVGDduHN577z0AQHl5Ofr6+tDV1ZV1XmdnJ8rKyiyvUVRUhBEjRmS9gpKkDt8ty6yMkzBheGly0VHf7K6RpLpMSJJo2bks1AzhqnDJxh+hC5SPP/4Yhw8fRkVFBQBg8uTJGDJkCJqbmzPnHDt2DPv378f06dPDfpycxmogYsVPPjoFBMUIId5QdUb3akWRhY8OvxOBeT+2NE88PUfxnDx5Er/97W8zfx88eBBvv/02Ro4ciZEjR2LFihX4+te/joqKCnz44Yd48MEHMWrUKNx6660AgJKSEtx9991YsmQJLr74YowcORJLly7FxIkTM1E9aUZ3ZAeJj7DEhEoU0KnaS5i0jZBziLYitoSIA5XIHk44s/EsUN544w1cd911mb8XL14MALjjjjuwfv167Nu3D5s2bcLx48dRUVGB6667Ds8++yyKi4szn2lqasLgwYNx22234dSpU7jhhhuwceNGDBo0SMNXyl0oTogX8s2aUreoCUA6U3qTZCCsKHbOs05YOcp6CTemOBmIZ4FSX18PwzBsj7/00kuu17jwwgvx4x//GD/+8Y+93j7vEBXYjzhhhSZ2DN1/lPWDEBM7jqxTsqJYiRMhNOxEh9t7cfvB5CKp3SwwCdYK0UhUn8W8NilDUz7JddrW3k/rCQmdoH0/hUZ0pFagJAHVFOby+YCzUCHJIAnLL0P3H03EcxCS6+gUJV7677RPPFMtUOK2osR9fxIOSRMFzJlDSDay4NCV8yTIpJETTmtSLVByDQqaZEMhQEhu0VVTiBPjz2oRKap+X2ZLOMWJPRQoESJXSD+VkoNfMskVYZILz0hIlKhG6jj11162KlHt++POZJsUPEfx5BvCqzsshMd3mCo57euUcZBrgz1naYRkI/rlsdtP+76G333U3MaE0vY+9uugBSV0rCqiH4crMSDSg5x4RQ6LrJ+9OuanISS/UbWkEHdSb0GJArf4eZIb5JrVRMA6R4heRF9uZwnh7sZ6oAUF0TmfMmkPiRPRoeaq0CIkTvws5/j1OeTyTj+0oMRIw5iFnsSRVSVnRSaEEH949Q+Uz/WzTON2P/bn2VCg5AiqKZoJcUKYpr2KY0JIMJgF3Dtc4omRoAMEQ9GiI1+WRbi0SNKOXVuOo21QnDhDC0oI6IjasUM2EXrdbZMQGdFR05JC0krUzqxil2QKEzVoQTlH0E7ayRlKdq7yukGg1T1I9HAQJyS/iaJv5aTSG7SgaELF2Wro/qNaBjpd1yHpwyrUnf4oJO1YWb3DEiy0nqhDgRIhQSs8KzYJgmzJk3PzEEKCI3wCnawk7MO9wSUejTitYzJJG4kTsxChMCHEHraPZECBohFWapJLsL4SQpIMBUrI0GpCkoDIIksI6UdVoKu2m9L2PjrBaoYChRBF6EhKSLqgqI8XChSJoAOQVWWWVToHOJJUuMsxSROq1hMug8YLBYoJigjiRC7WDz+bnBGSFtysJGw38UGBEhEMLyNJhvWTpJ2w/bTYxrxDgUJISuB6OiHWFhHmBUomFCgW7DiyTmtaeSpnkiQoVEhaEftPiaRqUbUFjgH+oEDRBPfJIUlFnh2K/1OkEBI+FCbBoECxQVQspqcnYSJvMkmRS0g0qOYr6aopzLz8wjHAP9yLxyMcQMiOI+tQP3t1ICuEUz2Sj+m0dNhdix0oSSNe21ZXTSETsUUMBYoCwjTuVZyw489vvAoJ835MdvXDLScJ93UixB9icgEMbEcq/bvKhoAC9v/BoUBxwa9qZuUkVqjkURF1p372atudh8OyshCSzwhxotqvl7b3WS7v0JoSDRQoinBphwhkC4eqOAiS4M2t7gWxqFBIEzIQFYu52Zoif4btSg90knVAVEAvDlItO5excuY5XsWAX3HipR55FdCnai9hPSXkHELk+0nWJsYHTmL1QwuKJtjZpxOz9UJ3KvyWncsyFhv5/yrP4nQeIWlDXt4x49Ru7JZ5ZLjkEw4UKA60rb0fAFC3qAkARQjpR3a0k98LE1H3/IoUIUror0KId6xEStva+zNjg2Do/qM5uV9XUqFAUUAIFUIEslAI2yJhFsZuszUnS0oS1shFp852RXIJ0eaEUKlb1MQ6HDIUKIT4JK5BXsWUbBf9Q0iaMVtBnMS8k6OsyrIPCQ6dZAnJMcJ0no0C89IpIUnEqu2YAydYh8OFAoWQHERFpNiJE/qhkLThlvzQCqt2QkfYaKFAISRPsdtDJAlOfLSikCiR20Fpe19W7hI7nJZ3SDRQoBBCCMlb7ERwEEuiECl0kg0XzwLllVdewZw5c1BZWYmCggI8//zzWccNw8CKFStQWVmJoUOHor6+HgcOHMg6p7e3FwsWLMCoUaMwbNgw3HzzzThy5EigL0JI2vDiiyISCCYxVL5+9mrUz15NawoJBSEivCbedBMwtKSEj2eB8umnn6Kurg5PPPGE5fFHH30Ua9aswRNPPIHXX38d5eXlmDlzJk6cOJE5p7GxEdu2bcOWLVvw6quv4uTJk7jppptw5swZ/9+EkBRiJTySKkSskDt5dvgkDIIIXyeRkittLJcpMAzD8P3hggJs27YNt9xyC4B+60llZSUaGxuxbFn/j9fb24uysjKsXr0a99xzD7q7u/G5z30Omzdvxu233w4A+Oijj1BVVYXt27fjxhtvdL1vT08PSkpK0N3djREjRvh9fELynvrZqxPbkVo5Lib1WUluYxYppe19npZ4kpA/KF/wMn5r9UE5ePAgOjo6MGvWrMx7RUVFuPbaa7F3714AQGtrKz777LOscyorK1FbW5s5hxCihyR3qGZTe5KfleQPfnenZ/2MHq0CpaOjAwBQVlaW9X5ZWVnmWEdHBwoLC1FaWmp7jpne3l709PRkvQghuY3sYMjOn4SF2XridXO/JES9pZVQongKCgqy/jYMY8B7ZpzOWbVqFUpKSjKvqqoqbc9KCCGEkOShVaCUl5cDwABLSGdnZ8aqUl5ejr6+PnR1ddmeY2b58uXo7u7OvA4fPqzzsQnJO+oWNWX5ePhJVBUGImJH/J+QqKEzdu6gVaBUV1ejvLwczc3Nmff6+vqwZ88eTJ8+HQAwefJkDBkyJOucY8eOYf/+/ZlzzBQVFWHEiBFZL0KIM101hVmCIG7MgkneoZmQOLBb5tlxZF3mReLD82aBJ0+exG9/+9vM3wcPHsTbb7+NkSNHYuzYsWhsbMTKlSsxYcIETJgwAStXrsRFF12Eb37zmwCAkpIS3H333ViyZAkuvvhijBw5EkuXLsXEiRMxY8YMfd+MEOK683FU2IkkihMSNm5Zi502DCTx4lmgvPHGG7juuusyfy9evBgAcMcdd2Djxo144IEHcOrUKXzve99DV1cXrr76auzatQvFxcWZzzQ1NWHw4MG47bbbcOrUKdxwww3YuHEjBg0apOErEULa1t6f6MRnFCaEEDcC5UGJC+ZBIcQdIVBK2/tiFQR1i5oGWHEoUEiUyGLdzqIoW1G4tBMeseVBIYQkB2HajluczLzrNRz66nljLcUJIUQFChRC8pg4NzMTs9Zdh2pQ/D67GpIb0HqSHDz7oBBCiBvy8hLahwGId5mJEJJ7cFpDCNFKljghJEa8OorTepIsaEEhhGgjyZFDJJ241UnhHEtxkjxoQSGEhA6Xd0jUCGFizOjCpiVrLM+hOEk2tKAQQrQhnHLlxGwUJyRpMDFbbsA8KIQQQvKSukVN2LRkDea1fRsFu0sBDPSNohUlWpgHhRBCCPFAw5iFcT8CMUGBQgghJC9pW3s/NndNx8mDJa7n2m0cSOKDPiiEEELyCjlyZ9OSvdiKqzJ/mzfQpDBJLrSgEEIIyUtOjD8LABhe3e16Lp25kwcFCiGEkNRglUCQ4iSZUKAQQgjJK5z2oOqqKURXTWGET0P8QoFCCCEk71DZKJOWk2RDgUIIISQvEbtozxrbnvW+vMxDkZJcKFAIIYTkNfNK9wLoFybcxDJ3YJgxIYSQvKRt7f1Y2vYamn8xFcDAEGMvyNs3ALS8RAEFCiGEkLykPx/KVO3XpTiJBgoUQggheUlpe19WxE6Q5R2KkuihDwohhJC8xCwqzOHF5mUbkiwoUAghhKQKWbhQpCQXChRCCCF5i5wPpW3t/fhk/qe4dN1j+GT+p0zalnAKDMMw4n4Ir/T09KCkpATd3d0YMWJE3I9DCCEkR2g7VIXNXdMBIBPdU9reRx+TiPAyftNJlhBCSGq49fnGzP+Lz/1LK0oyoUAhhBCSGj5YuORc+HE/KinxSTxQoBBCCEkVFCW5AZ1kCSGEEJI4KFAIIYQQkjgoUAghhBCSOChQCCGEEJI4KFAIIYQQkjgoUAghhBCSOChQCCGEEJI4KFAIIYQQkjgoUAghhBCSOChQCCGEEJI4KFAIIYQQkjgoUAghhBCSOChQCCGEEJI4KFAIIYQQkji0C5QVK1agoKAg61VeXp45bhgGVqxYgcrKSgwdOhT19fU4cOCA7scghBBCQqHtUFXmRcJjcBgXveKKK7B79+7M34MGDcr8/9FHH8WaNWuwceNGXHbZZfjRj36EmTNnor29HcXFxWE8DiGEEBIIipHoCUWgDB48OMtqIjAMA48//jgeeughzJ07FwDw1FNPoaysDM888wzuueeeMB6HEEII8YQQJHVjDw8QJ3VjD8fxSKkjFB+U9957D5WVlaiursY3vvENfPDBBwCAgwcPoqOjA7NmzcqcW1RUhGuvvRZ79+61vV5vby96enqyXoQQQkjYUJzEh3aBcvXVV2PTpk146aWX8POf/xwdHR2YPn06Pv74Y3R0dAAAysrKsj5TVlaWOWbFqlWrUFJSknlVVdHURrJxWhOW3+PaMSHEDfYPyUD7Ek9DQ0Pm/xMnTsS0adMwfvx4PPXUU5g6dSoAoKCgIOszhmEMeE9m+fLlWLx4cebvnp4eihQF5EaWJtXvJlLk91TKRfU83cgmZkIISRuh+KDIDBs2DBMnTsR7772HW265BQDQ0dGBioqKzDmdnZ0DrCoyRUVFKCoqCvtR84Ygg3GU2A3AXoVVWLMds+VF9Xn83sfu2kn87QjJR9z6ErbDaAldoPT29uI///M/8ed//ueorq5GeXk5mpub8aUvfQkA0NfXhz179mD16tVhP0oqcGpgSZ2RB3nmoOLEbn3Z7rqq69F+OjqVcnC6JyGE5BPaBcrSpUsxZ84cjB07Fp2dnfjRj36Enp4e3HHHHSgoKEBjYyNWrlyJCRMmYMKECVi5ciUuuugifPOb39T9KKkhDeulUX1Hr/fx+1xBvo/Xz1LQEDKQNPSbuY52gXLkyBH81V/9Ff7whz/gc5/7HKZOnYrXXnsN48aNAwA88MADOHXqFL73ve+hq6sLV199NXbt2sUcKB7QbTWQMYfUhTW4sXMghBDiRIFhGEbcD+GVnp4elJSUoLu7GyNGjIj7cSIhiQO6H/GSxO+RJmhNIWQgXvoltqFgeBm/Q/dBIf5J+mDuJQqGJAOV34IdMCEkCVCgRIguB8skYfWsHOByG0YNkXzEq0M8iR8KFISTKVCl0ufr4M4Gn/vka90k6UQ1Ks8NtoHg7Dv8ReVzc9oH5dX9lRhe3J8M12vYZhDywfJBiF/YSZNcQWefzHofDPFbnDxxFtfUfpQuH5QoxQGFCEkzzMlC0giXPtXRNUbmjUAhhESPW8g6IXHgtBMx0UMU5RrKbsaEEMJNGUncsP6FQ1TlSgsKIYQQ36gOVlFa1Gg50U8c5UmBQgghxBdeBi07H46gEWN2PlHy/3UNrmnzQ4lb5FGgEEIIiQTVAc8sOvx8jngjiWVHgUII0Yo8oKRpthk1dgOKzjJ3+x2jSHaWpIEzX+tzkspYhgKFEKIN0YHna0eeFJwGFL+DjdNvlralDSvy9fsnVZwAFCiEEE3kawceF1EPHHa+IOL9tOe/yQeRlmQxYgUFCiEkELneacdBrgwUulLE5wv5IFJyCQoUQgjRSFoH77SQi5akXK2TFCiEEF/kSuccBbk6AJBgcFPNcKFAIYQQD1CMECeSEMGWL3WUAoUQQs7BGTHRhbkuhV2P8kWUyFCgEEJ8YZ4pBu0gkyoE8rHjJ9Gj23clDfWSAoUQEgidacStCFO4pKGTJ8mD9U4NChRCSKJhZ05IOrkg7gcghBBCCDFDgUIIIYSQxEGBQgghhJDEQYFCCCGEkMRBgUIIIYSQxEGBQgghhJDEQYFCCCGEkMRBgUIIIYSQxEGBQgghhJDEQYFCCCGEkMRBgUIIIYSQxEGBQgghhJDEQYFCCCGEkMRBgUIIIYSQxEGBQgghhJDEQYFCCCGEkMRBgUIIIYSQxEGBQgghhJDEQYFCCCGEkMQRq0D56U9/iurqalx44YWYPHky/uM//iPOxyGEEEJIQohNoDz77LNobGzEQw89hLfeegt//ud/joaGBhw6dCiuRyKEEEJIQohNoKxZswZ33303/vZv/xZf/OIX8fjjj6Oqqgrr16+P65EIIYQQkhAGx3HTvr4+tLa24u/+7u+y3p81axb27t074Pze3l709vZm/u7u7gYAfHrybLgPSgghhBBtiHHbMAzXc2MRKH/4wx9w5swZlJWVZb1fVlaGjo6OAeevWrUKDz/88ID3b5w68FxCCCGEJJsTJ06gpKTE8ZxYBIqgoKAg62/DMAa8BwDLly/H4sWLM38fP34c48aNw6FDh1y/YFro6elBVVUVDh8+jBEjRsT9OImAZTIQlslAWCYDYZkMhGUyED9lYhgGTpw4gcrKStdzYxEoo0aNwqBBgwZYSzo7OwdYVQCgqKgIRUVFA94vKSlhRTExYsQIlokJlslAWCYDYZkMhGUyEJbJQLyWiaphIRYn2cLCQkyePBnNzc1Z7zc3N2P69OlxPBIhhBBCEkRsSzyLFy/GvHnzMGXKFEybNg0/+9nPcOjQIdx7771xPRIhhBBCEkJsAuX222/Hxx9/jB/+8Ic4duwYamtrsX37dowbN871s0VFRfjBD35gueyTVlgmA2GZDIRlMhCWyUBYJgNhmQwk7DIpMFRifQghhBBCIoR78RBCCCEkcVCgEEIIISRxUKAQQgghJHFQoBBCCCEkceSkQPnpT3+K6upqXHjhhZg8eTL+4z/+I+5HCo1XXnkFc+bMQWVlJQoKCvD8889nHTcMAytWrEBlZSWGDh2K+vp6HDhwIOuc3t5eLFiwAKNGjcKwYcNw880348iRIxF+C32sWrUKV155JYqLizF69GjccsstaG9vzzonbWWyfv16TJo0KZMsadq0adixY0fmeNrKw4pVq1ahoKAAjY2NmffSVi4rVqxAQUFB1qu8vDxzPG3lITh69Cj+5m/+BhdffDEuuugi/Omf/ilaW1szx9NWLn/yJ38yoJ4UFBRg/vz5ACIuDyPH2LJlizFkyBDj5z//ufHuu+8aixYtMoYNG2b87ne/i/vRQmH79u3GQw89ZDz33HMGAGPbtm1Zxx955BGjuLjYeO6554x9+/YZt99+u1FRUWH09PRkzrn33nuNSy65xGhubjbefPNN47rrrjPq6uqM06dPR/xtgnPjjTcaGzZsMPbv32+8/fbbxte+9jVj7NixxsmTJzPnpK1MXnjhBeNXv/qV0d7ebrS3txsPPvigMWTIEGP//v2GYaSvPMz85je/Mf7kT/7EmDRpkrFo0aLM+2krlx/84AfGFVdcYRw7dizz6uzszBxPW3kYhmF88sknxrhx44w777zT+H//7/8ZBw8eNHbv3m389re/zZyTtnLp7OzMqiPNzc0GAOPXv/61YRjRlkfOCZSrrrrKuPfee7Pe+8IXvmD83d/9XUxPFB1mgXL27FmjvLzceOSRRzLv/fGPfzRKSkqM//k//6dhGIZx/PhxY8iQIcaWLVsy5xw9etS44IILjJ07d0b27GHR2dlpADD27NljGAbLRFBaWmr8r//1v1JfHidOnDAmTJhgNDc3G9dee21GoKSxXH7wgx8YdXV1lsfSWB6GYRjLli0zrrnmGtvjaS0XmUWLFhnjx483zp49G3l55NQST19fH1pbWzFr1qys92fNmoW9e/fG9FTxcfDgQXR0dGSVR1FREa699tpMebS2tuKzzz7LOqeyshK1tbV5UWbd3d0AgJEjRwJgmZw5cwZbtmzBp59+imnTpqW+PObPn4+vfe1rmDFjRtb7aS2X9957D5WVlaiursY3vvENfPDBBwDSWx4vvPACpkyZgr/8y7/E6NGj8aUvfQk///nPM8fTWi6Cvr4+PP3007jrrrtQUFAQeXnklED5wx/+gDNnzgzYULCsrGzAxoNpQHxnp/Lo6OhAYWEhSktLbc/JVQzDwOLFi3HNNdegtrYWQHrLZN++fRg+fDiKiopw7733Ytu2bbj88stTWx4AsGXLFrz55ptYtWrVgGNpLJerr74amzZtwksvvYSf//zn6OjowPTp0/Hxxx+nsjwA4IMPPsD69esxYcIEvPTSS7j33nuxcOFCbNq0CUA664nM888/j+PHj+POO+8EEH15xJbqPggFBQVZfxuGMeC9NOGnPPKhzO677z688847ePXVVwccS1uZ1NTU4O2338bx48fx3HPP4Y477sCePXsyx9NWHocPH8aiRYuwa9cuXHjhhbbnpalcGhoaMv+fOHEipk2bhvHjx+Opp57C1KlTAaSrPADg7NmzmDJlClauXAkA+NKXvoQDBw5g/fr1+Na3vpU5L23lInjyySfR0NCAysrKrPejKo+csqCMGjUKgwYNGqDCOjs7Byi6NCA88J3Ko7y8HH19fejq6rI9JxdZsGABXnjhBfz617/GmDFjMu+ntUwKCwvx+c9/HlOmTMGqVatQV1eHtWvXprY8Wltb0dnZicmTJ2Pw4MEYPHgw9uzZg3Xr1mHw4MGZ75W2cpEZNmwYJk6ciPfeey+19aSiogKXX3551ntf/OIXcejQIQDp7U8A4He/+x12796Nv/3bv828F3V55JRAKSwsxOTJk9Hc3Jz1fnNzM6ZPnx7TU8VHdXU1ysvLs8qjr68Pe/bsyZTH5MmTMWTIkKxzjh07hv379+dkmRmGgfvuuw9bt27Fyy+/jOrq6qzjaSwTKwzDQG9vb2rL44YbbsC+ffvw9ttvZ15TpkzBX//1X+Ptt9/GpZdemspykent7cV//ud/oqKiIrX15Mtf/vKANAX/9V//ldm0Nq3lAgAbNmzA6NGj8bWvfS3zXuTl4cerN05EmPGTTz5pvPvuu0ZjY6MxbNgw48MPP4z70ULhxIkTxltvvWW89dZbBgBjzZo1xltvvZUJq37kkUeMkpISY+vWrca+ffuMv/qrv7IM+RozZoyxe/du48033zSuv/76nA2B++53v2uUlJQYLS0tWaFw//3f/505J21lsnz5cuOVV14xDh48aLzzzjvGgw8+aFxwwQXGrl27DMNIX3nYIUfxGEb6ymXJkiVGS0uL8cEHHxivvfaacdNNNxnFxcWZvjNt5WEY/SHogwcPNv7+7//eeO+994xf/vKXxkUXXWQ8/fTTmXPSWC5nzpwxxo4dayxbtmzAsSjLI+cEimEYxk9+8hNj3LhxRmFhofFnf/ZnmRDTfOTXv/61AWDA64477jAMoz8M7gc/+IFRXl5uFBUVGV/5yleMffv2ZV3j1KlTxn333WeMHDnSGDp0qHHTTTcZhw4diuHbBMeqLAAYGzZsyJyTtjK56667Mu3hc5/7nHHDDTdkxIlhpK887DALlLSVi8hXMWTIEKOystKYO3euceDAgczxtJWH4MUXXzRqa2uNoqIi4wtf+ILxs5/9LOt4GsvlpZdeMgAY7e3tA45FWR4FhmEYnm0/hBBCCCEhklM+KIQQQghJBxQohBBCCEkcFCiEEEIISRwUKIQQQghJHBQohBBCCEkcFCiEEEIISRwUKIQQQghJHBQohBBCCEkcFCiEEEIISRwUKIQQQghJHBQohBBCCEkcFCiEEEIISRz/PxbMLoEQjFtlAAAAAElFTkSuQmCC", "text/plain": [ - "" + "
" ] }, "metadata": {}, @@ -931,9 +905,8 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 17, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 34 @@ -947,15 +920,17 @@ "name": "stdout", "output_type": "stream", "text": [ - "name of time dimension = time2\n", - "units = Hour since 2015-07-11T06:00:00Z, values = [ 0. 3. 6. 9. 12. 15. 18. 21. 24. 27. 30. 33.\n", - " 36. 39. 42. 45. 48. 51. 54. 57. 60. 63. 66. 69.\n", - " 72. 75. 78. 81. 84. 87. 90. 93. 96. 99. 102. 105.\n", - " 108. 111. 114. 117. 120. 123. 126. 129. 132. 135. 138. 141.\n", - " 144. 147. 150. 153. 156. 159. 162. 165. 168. 171. 174. 177.\n", - " 180. 183. 186. 189. 192. 195. 198. 201. 204. 207. 210. 213.\n", - " 216. 219. 222. 225. 228. 231. 234. 237. 240. 252. 264. 276.\n", - " 288. 300. 312. 324. 336. 348. 360. 372. 384.]\n" + "name of time dimension = time1\n", + "units = Hour since 2023-05-25T12:00:00Z, values = [ 0. 3. 6. 9. 12. 15. 18. 21. 24. 27. 30. 33. 36. 39.\n", + " 42. 45. 48. 51. 54. 57. 60. 63. 66. 69. 72. 75. 78. 81.\n", + " 84. 87. 90. 93. 96. 99. 102. 105. 108. 111. 114. 117. 120. 123.\n", + " 126. 129. 132. 135. 138. 141. 144. 147. 150. 153. 156. 159. 162. 165.\n", + " 168. 171. 174. 177. 180. 183. 186. 189. 192. 195. 198. 201. 204. 207.\n", + " 210. 213. 216. 219. 222. 225. 228. 231. 234. 237. 240. 243. 246. 249.\n", + " 252. 255. 258. 261. 264. 267. 270. 273. 276. 279. 282. 285. 288. 291.\n", + " 294. 297. 300. 303. 306. 309. 312. 315. 318. 321. 324. 327. 330. 333.\n", + " 336. 339. 342. 345. 348. 351. 354. 357. 360. 363. 366. 369. 372. 375.\n", + " 378. 381. 384.]\n" ] } ], @@ -969,9 +944,8 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 18, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 35, @@ -987,7 +961,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "['2015-07-11 06:00:00', '2015-07-11 09:00:00', '2015-07-11 12:00:00', '2015-07-11 15:00:00', '2015-07-11 18:00:00', '2015-07-11 21:00:00', '2015-07-12 00:00:00', '2015-07-12 03:00:00', '2015-07-12 06:00:00', '2015-07-12 09:00:00']\n" + "['2023-05-25 12:00:00', '2023-05-25 15:00:00', '2023-05-25 18:00:00', '2023-05-25 21:00:00', '2023-05-26 00:00:00', '2023-05-26 03:00:00', '2023-05-26 06:00:00', '2023-05-26 09:00:00', '2023-05-26 12:00:00', '2023-05-26 15:00:00']\n" ] } ], @@ -1014,9 +988,8 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 19, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 37 @@ -1030,8 +1003,8 @@ "name": "stdout", "output_type": "stream", "text": [ - "2015-07-14 07:22:39.579246\n", - "index = 24, date = 2015-07-14 06:00:00\n" + "2023-05-28 15:57:27.760935\n", + "index = 25, date = 2023-05-28 15:00:00\n" ] } ], @@ -1061,9 +1034,8 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 20, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 39, @@ -1079,7 +1051,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Boulder forecast valid at 2015-07-14 06:00:00 UTC = 296.8 K\n" + "Boulder forecast valid at 2023-05-28 15:00:00 UTC = 297.6 K\n" ] } ], @@ -1113,9 +1085,8 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 21, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 41 @@ -1129,23 +1100,23 @@ "name": "stdout", "output_type": "stream", "text": [ - "-rw-r--r-- 1 jwhitaker staff 8985332 Jul 10 06:43 data/prmsl.2000.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8968789 Jul 10 06:43 data/prmsl.2001.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8972796 Jul 10 06:43 data/prmsl.2002.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8974435 Jul 10 06:43 data/prmsl.2003.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8997438 Jul 10 06:43 data/prmsl.2004.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8976678 Jul 10 06:43 data/prmsl.2005.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8969714 Jul 10 06:43 data/prmsl.2006.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8974360 Jul 10 06:43 data/prmsl.2007.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8994260 Jul 10 06:43 data/prmsl.2008.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8974678 Jul 10 06:43 data/prmsl.2009.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8970732 Jul 10 06:43 data/prmsl.2010.nc\r\n", - "-rw-r--r-- 1 jwhitaker staff 8976285 Jul 10 06:43 data/prmsl.2011.nc\r\n" + "-rw-rw-r-- 1 8985332 May 17 15:27 data/prmsl.2000.nc\r\n", + "-rw-rw-r-- 1 8968789 May 17 15:27 data/prmsl.2001.nc\r\n", + "-rw-rw-r-- 1 8972796 May 17 15:27 data/prmsl.2002.nc\r\n", + "-rw-rw-r-- 1 8974435 May 17 15:27 data/prmsl.2003.nc\r\n", + "-rw-rw-r-- 1 8997438 May 17 15:27 data/prmsl.2004.nc\r\n", + "-rw-rw-r-- 1 8976678 May 17 15:27 data/prmsl.2005.nc\r\n", + "-rw-rw-r-- 1 8969714 May 17 15:27 data/prmsl.2006.nc\r\n", + "-rw-rw-r-- 1 8974360 May 17 15:27 data/prmsl.2007.nc\r\n", + "-rw-rw-r-- 1 8994260 May 17 15:27 data/prmsl.2008.nc\r\n", + "-rw-rw-r-- 1 8974678 May 17 15:27 data/prmsl.2009.nc\r\n", + "-rw-rw-r-- 1 8970732 May 17 15:27 data/prmsl.2010.nc\r\n", + "-rw-rw-r-- 1 8976285 May 17 15:27 data/prmsl.2011.nc\r\n" ] } ], "source": [ - "!ls -l data/prmsl*nc" + "!ls -ldgG data/prmsl*nc" ] }, { @@ -1172,9 +1143,8 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 22, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 43, @@ -1193,7 +1163,7 @@ "starting date = 2000-01-01 00:00:00\n", "ending date = 2011-12-31 00:00:00\n", "times shape = 4383\n", - "prmsl dimensions = (u'time', u'lat', u'lon'), prmsl shape = (4383, 91, 180)\n" + "prmsl dimensions = ('time', 'lat', 'lon'), prmsl shape = (4383, 91, 180)\n" ] } ], @@ -1229,9 +1199,8 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 23, "metadata": { - "collapsed": false, "internals": { "frag_helper": "fragment_end", "frag_number": 45 @@ -1271,23 +1240,23 @@ "metadata": { "celltoolbar": "Raw Cell Format", "kernelspec": { - "display_name": "Python 2", + "display_name": "Python 3 (ipykernel)", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.9" + "pygments_lexer": "ipython3", + "version": "3.9.16" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/examples/subset.py b/examples/subset.py index a8bd2463b..a356453e2 100644 --- a/examples/subset.py +++ b/examples/subset.py @@ -3,7 +3,7 @@ import numpy as np import matplotlib.pyplot as plt -# use real data from CFS reanlysis. +# use real data from CFS reanalysis. # note: we're reading GRIB2 data! URL="http://nomads.ncdc.noaa.gov/thredds/dodsC/modeldata/cmd_flxf/2010/201007/20100701/flxf00.gdas.2010070100.grb2" nc = netCDF4.Dataset(URL) diff --git a/examples/test_stringarr.py b/examples/test_stringarr.py index 758c4a749..7644cd59a 100644 --- a/examples/test_stringarr.py +++ b/examples/test_stringarr.py @@ -1,5 +1,6 @@ from netCDF4 import Dataset, stringtochar, chartostring import random, numpy +from typing import Final # test utilities for converting arrays of fixed-length strings # to arrays of characters (with an extra dimension), and vice-versa. @@ -16,7 +17,7 @@ FILE_NAME = 'tst_stringarr.nc' -FILE_FORMAT = 'NETCDF4_CLASSIC' +FILE_FORMAT: Final = 'NETCDF4_CLASSIC' chars = '1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' nc = Dataset(FILE_NAME,'w',format=FILE_FORMAT) @@ -26,7 +27,6 @@ nc.createDimension('nchar',nchar) v = nc.createVariable('strings','S1',('n1','n2','nchar')) for nrec in range(nrecs): - data = [] data = numpy.empty((n2,),'S'+repr(nchar)) # fill data with random nchar character strings for n in range(n2): diff --git a/examples/threaded_read.py b/examples/threaded_read.py index 1f1d3b7d3..229f0379a 100644 --- a/examples/threaded_read.py +++ b/examples/threaded_read.py @@ -1,4 +1,3 @@ -from __future__ import print_function from netCDF4 import Dataset from numpy.testing import assert_array_equal, assert_array_almost_equal import numpy as np @@ -29,7 +28,7 @@ nc.close() # Queue them up -items = queue.Queue() +items: queue.Queue = queue.Queue() for data,fname in zip(datal,fnames): items.put(fname) diff --git a/examples/tutorial.py b/examples/tutorial.py index 78eb27fdf..3134881ca 100644 --- a/examples/tutorial.py +++ b/examples/tutorial.py @@ -1,3 +1,4 @@ +from typing import Literal from netCDF4 import Dataset # code from tutorial. @@ -16,32 +17,31 @@ # walk the group tree using a Python generator. def walktree(top): - values = top.groups.values() - yield values + yield top.groups.values() for value in top.groups.values(): - for children in walktree(value): - yield children + yield from walktree(value) + print(rootgrp) for children in walktree(rootgrp): for child in children: print(child) # dimensions. -level = rootgrp.createDimension('level', None) -time = rootgrp.createDimension('time', None) -lat = rootgrp.createDimension('lat', 73) -lon = rootgrp.createDimension('lon', 144) +level_dim = rootgrp.createDimension('level', None) +time_dim = rootgrp.createDimension('time', None) +lat_dim = rootgrp.createDimension('lat', 73) +lon_dim = rootgrp.createDimension('lon', 144) print(rootgrp.dimensions) -print(len(lon)) -print(lon.isunlimited()) -print(time.isunlimited()) +print(len(lon_dim)) +print(lon_dim.isunlimited()) +print(time_dim.isunlimited()) for dimobj in rootgrp.dimensions.values(): print(dimobj) -print(time) +print(time_dim) # variables. times = rootgrp.createVariable('time','f8',('time',)) @@ -69,7 +69,8 @@ def walktree(top): levels.units = 'hPa' temp.units = 'K' times.units = 'hours since 0001-01-01 00:00:00.0' -times.calendar = 'gregorian' +calendar: Literal['gregorian'] = 'gregorian' +times.calendar = calendar for name in rootgrp.ncattrs(): print('Global attr', name, '=', getattr(rootgrp,name)) @@ -80,10 +81,10 @@ def walktree(top): print(rootgrp.variables) -import numpy +import numpy as np # no unlimited dimension, just assign to slice. -lats = numpy.arange(-90,91,2.5) -lons = numpy.arange(-180,180,2.5) +lats = np.arange(-90,91,2.5) +lons = np.arange(-180,180,2.5) latitudes[:] = lats longitudes[:] = lons print('latitudes =\n',latitudes[:]) @@ -111,198 +112,257 @@ def walktree(top): from netCDF4 import num2date, date2num, date2index dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(temp.shape[0])] times[:] = date2num(dates,units=times.units,calendar=times.calendar) -print('time values (in units %s): ' % times.units+'\\n',times[:]) -dates = num2date(times[:],units=times.units,calendar=times.calendar) -print('dates corresponding to time values:\\n',dates) +print("time values (in units {}):\n{}".format(times.units, times[:])) +dates_array = num2date(times[:],units=times.units,calendar=times.calendar) +print("dates corresponding to time values:\n{}".format(dates_array)) rootgrp.close() # create a series of netCDF files with a variable sharing # the same unlimited dimension. for nfile in range(10): - f = Dataset('mftest'+repr(nfile)+'.nc','w',format='NETCDF4_CLASSIC') - f.createDimension('x',None) - x = f.createVariable('x','i',('x',)) - x[0:10] = numpy.arange(nfile*10,10*(nfile+1)) - f.close() + nc = Dataset('mftest'+repr(nfile)+'.nc','w',format='NETCDF4_CLASSIC') + nc.createDimension('x',None) + x_var = nc.createVariable('x','i',('x',)) + x_var[0:10] = np.arange(nfile*10,10*(nfile+1)) + nc.close() # now read all those files in at once, in one Dataset. from netCDF4 import MFDataset -f = MFDataset('mftest*nc') -print(f.variables['x'][:]) +nc = MFDataset('mftest*nc') +print(nc.variables['x'][:]) # example showing how to save numpy complex arrays using compound types. -f = Dataset('complex.nc','w') +nc = Dataset('complex.nc','w') size = 3 # length of 1-d complex array # create sample complex data. -datac = numpy.exp(1j*(1.+numpy.linspace(0, numpy.pi, size))) +datac = np.exp(1j*(1.+np.linspace(0, np.pi, size))) print(datac.dtype) # create complex128 compound data type. -complex128 = numpy.dtype([('real',numpy.float64),('imag',numpy.float64)]) -complex128_t = f.createCompoundType(complex128,'complex128') +complex128 = np.dtype([('real',np.float64),('imag',np.float64)]) +complex128_t = nc.createCompoundType(complex128,'complex128') # create a variable with this data type, write some data to it. -f.createDimension('x_dim',None) -v = f.createVariable('cmplx_var',complex128_t,'x_dim') -data = numpy.empty(size,complex128) # numpy structured array +nc.createDimension('x_dim',None) +var_complex = nc.createVariable('cmplx_var',complex128_t,'x_dim') +data = np.empty(size,complex128) # numpy structured array data['real'] = datac.real; data['imag'] = datac.imag -v[:] = data +var_complex[:] = data # close and reopen the file, check the contents. -f.close() -f = Dataset('complex.nc') -print(f) -print(f.variables['cmplx_var']) -print(f.cmptypes) -print(f.cmptypes['complex128']) -v = f.variables['cmplx_var'] -print(v.shape) -datain = v[:] # read in all the data into a numpy structured array +nc.close() +nc = Dataset('complex.nc') +print(nc) +print(nc.variables['cmplx_var']) +print(nc.cmptypes) +print(nc.cmptypes['complex128']) +var_complex = nc.variables['cmplx_var'] +print(var_complex.shape) +datain = var_complex[:] # read in all the data into a numpy structured array # create an empty numpy complex array -datac2 = numpy.empty(datain.shape,numpy.complex128) +datac2 = np.empty(datain.shape,np.complex128) # .. fill it with contents of structured array. datac2.real = datain['real'] datac2.imag = datain['imag'] print(datac.dtype,datac) print(datac2.dtype,datac2) +nc.close() + # more complex compound type example. -from netCDF4 import chartostring, stringtoarr -f = Dataset('compound_example.nc','w') # create a new dataset. +nc = Dataset('compound_example.nc','w') # create a new dataset. # create an unlimited dimension call 'station' -f.createDimension('station',None) +nc.createDimension('station',None) # define a compound data type (can contain arrays, or nested compound types). -NUMCHARS = 80 # number of characters to use in fixed-length strings. -winddtype = numpy.dtype([('speed','f4'),('direction','i4')]) -statdtype = numpy.dtype([('latitude', 'f4'), ('longitude', 'f4'), - ('surface_wind',winddtype), - ('temp_sounding','f4',10),('press_sounding','i4',10), - ('location_name','S1',NUMCHARS)]) +winddtype = np.dtype([('speed','f4'),('direction','i4')]) +statdtype = np.dtype([('latitude', 'f4'), ('longitude', 'f4'), + ('surface_wind',winddtype), + ('temp_sounding','f4',10),('press_sounding','i4',10), + ('location_name','S12')]) # use this data type definitions to create a compound data types # called using the createCompoundType Dataset method. # create a compound type for vector wind which will be nested inside # the station data type. This must be done first! -wind_data_t = f.createCompoundType(winddtype,'wind_data') +wind_data_t = nc.createCompoundType(winddtype,'wind_data') # now that wind_data_t is defined, create the station data type. -station_data_t = f.createCompoundType(statdtype,'station_data') +station_data_t = nc.createCompoundType(statdtype,'station_data') # create nested compound data types to hold the units variable attribute. -winddtype_units = numpy.dtype([('speed','S1',NUMCHARS),('direction','S1',NUMCHARS)]) -statdtype_units = numpy.dtype([('latitude', 'S1',NUMCHARS), ('longitude', 'S1',NUMCHARS), - ('surface_wind',winddtype_units), - ('temp_sounding','S1',NUMCHARS), - ('location_name','S1',NUMCHARS), - ('press_sounding','S1',NUMCHARS)]) +winddtype_units = np.dtype([('speed','S12'),('direction','S12')]) +statdtype_units = np.dtype([('latitude', 'S12'), ('longitude', 'S12'), + ('surface_wind',winddtype_units), + ('temp_sounding','S12'), + ('location_name','S12'), + ('press_sounding','S12')]) # create the wind_data_units type first, since it will nested inside # the station_data_units data type. -wind_data_units_t = f.createCompoundType(winddtype_units,'wind_data_units') +wind_data_units_t = nc.createCompoundType(winddtype_units,'wind_data_units') station_data_units_t =\ -f.createCompoundType(statdtype_units,'station_data_units') +nc.createCompoundType(statdtype_units,'station_data_units') # create a variable of of type 'station_data_t' -statdat = f.createVariable('station_obs', station_data_t, ('station',)) +statdat = nc.createVariable('station_obs', station_data_t, ('station',)) # create a numpy structured array, assign data to it. -data = numpy.empty(1,station_data_t) +data = np.empty(1,statdtype) data['latitude'] = 40. data['longitude'] = -105. data['surface_wind']['speed'] = 12.5 data['surface_wind']['direction'] = 270 data['temp_sounding'] = (280.3,272.,270.,269.,266.,258.,254.1,250.,245.5,240.) data['press_sounding'] = range(800,300,-50) -# variable-length string datatypes are not supported inside compound types, so -# to store strings in a compound data type, each string must be -# stored as fixed-size (in this case 80) array of characters. -data['location_name'] = stringtoarr('Boulder, Colorado, USA',NUMCHARS) +data['location_name'] = 'Boulder, CO' # assign structured array to variable slice. statdat[0] = data # or just assign a tuple of values to variable slice # (will automatically be converted to a structured array). -statdat[1] = (40.78,-73.99,(-12.5,90), +statdat[1] = np.array((40.78,-73.99,(-12.5,90), (290.2,282.5,279.,277.9,276.,266.,264.1,260.,255.5,243.), - range(900,400,-50),stringtoarr('New York, New York, USA',NUMCHARS)) -print(f.cmptypes) -windunits = numpy.empty(1,winddtype_units) -stationobs_units = numpy.empty(1,statdtype_units) -windunits['speed'] = stringtoarr('m/s',NUMCHARS) -windunits['direction'] = stringtoarr('degrees',NUMCHARS) -stationobs_units['latitude'] = stringtoarr('degrees north',NUMCHARS) -stationobs_units['longitude'] = stringtoarr('degrees west',NUMCHARS) + range(900,400,-50),'New York, NY'),data.dtype) +print(nc.cmptypes) +windunits = np.empty(1,winddtype_units) +stationobs_units = np.empty(1,statdtype_units) +windunits['speed'] = 'm/s' +windunits['direction'] = 'degrees' +stationobs_units['latitude'] = 'degrees N' +stationobs_units['longitude'] = 'degrees W' stationobs_units['surface_wind'] = windunits -stationobs_units['location_name'] = stringtoarr('None', NUMCHARS) -stationobs_units['temp_sounding'] = stringtoarr('Kelvin',NUMCHARS) -stationobs_units['press_sounding'] = stringtoarr('hPa',NUMCHARS) +stationobs_units['location_name'] = 'None' +stationobs_units['temp_sounding'] = 'Kelvin' +stationobs_units['press_sounding'] = 'hPa' +print(stationobs_units.dtype) statdat.units = stationobs_units # close and reopen the file. -f.close() -f = Dataset('compound_example.nc') -print(f) -statdat = f.variables['station_obs'] +nc.close() +nc = Dataset('compound_example.nc') +print(nc) +statdat = nc.variables['station_obs'] print(statdat) # print out data in variable. print('data in a variable of compound type:') -print('----') -for data in statdat[:]: - for name in statdat.dtype.names: - if data[name].dtype.kind == 'S': # a string - # convert array of characters back to a string for display. - units = chartostring(statdat.units[name]) - print(name,': value =',chartostring(data[name]),\ - ': units=',units) - elif data[name].dtype.kind == 'V': # a nested compound type - units_list = [chartostring(s) for s in tuple(statdat.units[name])] - print(name,data[name].dtype.names,': value=',data[name],': units=',\ - units_list) - else: # a numeric type. - units = chartostring(statdat.units[name]) - print(name,': value=',data[name],': units=',units) - print('----') -f.close() +print(statdat[:]) +nc.close() -f = Dataset('tst_vlen.nc','w') -vlen_t = f.createVLType(numpy.int32, 'phony_vlen') -x = f.createDimension('x',3) -y = f.createDimension('y',4) -vlvar = f.createVariable('phony_vlen_var', vlen_t, ('y','x')) +nc = Dataset('tst_vlen.nc','w') +vlen_t = nc.createVLType(np.int32, 'phony_vlen') +x = nc.createDimension('x',3) +y = nc.createDimension('y',4) +vlvar = nc.createVariable('phony_vlen_var', vlen_t, ('y','x')) import random -data = numpy.empty(len(y)*len(x),object) +data = np.empty(len(y)*len(x),object) for n in range(len(y)*len(x)): - data[n] = numpy.arange(random.randint(1,10),dtype='int32')+1 -data = numpy.reshape(data,(len(y),len(x))) + data[n] = np.arange(random.randint(1,10),dtype='int32')+1 +data = np.reshape(data,(len(y),len(x))) vlvar[:] = data print(vlvar) print('vlen variable =\n',vlvar[:]) -print(f) -print(f.variables['phony_vlen_var']) -print(f.vltypes['phony_vlen']) -z = f.createDimension('z', 10) -strvar = f.createVariable('strvar',str,'z') +print(nc) +print(nc.variables['phony_vlen_var']) +print(nc.vltypes['phony_vlen']) +z = nc.createDimension('z', 10) +strvar = nc.createVariable('strvar',str,'z') chars = '1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' -data = numpy.empty(10,object) +data = np.empty(10,object) for n in range(10): stringlen = random.randint(2,12) data[n] = ''.join([random.choice(chars) for i in range(stringlen)]) strvar[:] = data print('variable-length string variable:\n',strvar[:]) -print(f) -print(f.variables['strvar']) -f.close() +print(nc) +print(nc.variables['strvar']) +nc.close() # Enum type example. -f = Dataset('clouds.nc','w') +nc = Dataset('clouds.nc','w') # python dict describing the allowed values and their names. -enum_dict = {u'Altocumulus': 7, u'Missing': 255, u'Stratus': 2, u'Clear': 0, -u'Nimbostratus': 6, u'Cumulus': 4, u'Altostratus': 5, u'Cumulonimbus': 1, -u'Stratocumulus': 3} +enum_dict = {'Altocumulus': 7, 'Missing': 255, 'Stratus': 2, 'Clear': 0, +'Nimbostratus': 6, 'Cumulus': 4, 'Altostratus': 5, 'Cumulonimbus': 1, +'Stratocumulus': 3} # create the Enum type called 'cloud_t'. -cloud_type = f.createEnumType(numpy.uint8,'cloud_t',enum_dict) +cloud_type = nc.createEnumType(np.uint8,'cloud_t',enum_dict) print(cloud_type) -time = f.createDimension('time',None) +time_dim = nc.createDimension('time',None) # create a 1d variable of type 'cloud_type' called 'primary_clouds'. # The fill_value is set to the 'Missing' named value. -cloud_var = f.createVariable('primary_cloud',cloud_type,'time',\ +cloud_var = nc.createVariable('primary_cloud',cloud_type,'time',\ fill_value=enum_dict['Missing']) # write some data to the variable. cloud_var[:] = [enum_dict['Clear'],enum_dict['Stratus'],enum_dict['Cumulus'],\ enum_dict['Missing'],enum_dict['Cumulonimbus']] # close file, reopen it. -f.close() -f = Dataset('clouds.nc') -cloud_var = f.variables['primary_cloud'] +nc.close() +nc = Dataset('clouds.nc') +cloud_var = nc.variables['primary_cloud'] print(cloud_var) print(cloud_var.datatype.enum_dict) print(cloud_var[:]) -f.close() +nc.close() + +# dealing with strings +from netCDF4 import stringtochar +nc = Dataset('stringtest.nc','w',format='NETCDF4_CLASSIC') +nc.createDimension('nchars',3) +nc.createDimension('nstrings',None) +var = nc.createVariable('strings','S1',('nstrings','nchars')) +datain = np.array(['foo','bar'],dtype='S3') +var[:] = stringtochar(datain) # manual conversion to char array +print(var[:]) # data returned as char array +var._Encoding = 'ascii' # this enables automatic conversion +var[:] = datain # conversion to char array done internally +print(var[:]) # data returned in numpy string array +nc.close() +# strings in compound types +nc = Dataset('compoundstring_example.nc','w') +dtype = np.dtype([('observation', 'f4'), + ('station_name','S12')]) +station_data_t = nc.createCompoundType(dtype,'station_data') +nc.createDimension('station',None) +statdat = nc.createVariable('station_obs', station_data_t, ('station',)) +data = np.empty(2,station_data_t.dtype_view) +data['observation'][:] = (123.,3.14) +data['station_name'][:] = ('Boulder','New York') +print(statdat.dtype) # strings actually stored as character arrays +statdat[:] = data # strings converted to character arrays internally +print(statdat[:]) # character arrays converted back to strings +print(statdat[:].dtype) +statdat.set_auto_chartostring(False) # turn off auto-conversion +statdat[:] = data.view(station_data_t.dtype) +print(statdat[:]) # now structured array with char array subtype is returned +nc.close() + +# create a diskless (in-memory) Dataset, and persist the file +# to disk when it is closed. +nc = Dataset('diskless_example.nc','w',diskless=True,persist=True) +d = nc.createDimension('x',None) +v = nc.createVariable('v',np.int32,'x') +v[0:5] = np.arange(5) +print(nc) +print(nc['v'][:]) +nc.close() # file saved to disk +# create an in-memory dataset from an existing python memory +# buffer. +# read the newly created netcdf file into a python bytes object. +f = open('diskless_example.nc', 'rb') +nc_bytes = f.read(); f.close() +# create a netCDF in-memory dataset from the bytes object. +nc = Dataset('inmemory.nc', memory=nc_bytes) +print(nc) +print(nc['v'][:]) +nc.close() +# create an in-memory Dataset and retrieve memory buffer +# estimated size is 1028 bytes - this is actually only +# used if format is NETCDF3 (ignored for NETCDF4/HDF5 files). +nc = Dataset('inmemory.nc', mode='w',memory=1028) +d = nc.createDimension('x',None) +v = nc.createVariable('v',np.int32,'x') +v[0:5] = np.arange(5) +nc_buf = nc.close() # close returns memoryview +print(type(nc_buf)) +# save nc_buf to disk, read it back in and check. +f2 = open('inmemory.nc', 'wb') +f2.write(nc_buf); f2.close() +nc = Dataset('inmemory.nc') +print(nc) +print(nc['v'][:]) +nc.close() + +# Write complex numbers to file +complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j]) +with Dataset("complex.nc", "w", auto_complex=True) as nc: + nc.createDimension("x", size=len(complex_array)) + var = nc.createVariable("data", "c16", ("x",)) + var[:] = complex_array + print(var) diff --git a/examples/writing_netCDF.ipynb b/examples/writing_netCDF.ipynb index 4f2d7dd1d..61927929f 100644 --- a/examples/writing_netCDF.ipynb +++ b/examples/writing_netCDF.ipynb @@ -374,7 +374,7 @@ "source": [ "## Writing data\n", "\n", - "To write data a netCDF variable object, just treat it like a numpy array and assign values to a slice." + "To write data to a netCDF variable object, just treat it like a numpy array and assign values to a slice." ] }, { @@ -710,7 +710,7 @@ "\n", "netCDF version 4 added support for organizing data in hierarchical groups.\n", "\n", - "- analagous to directories in a filesystem. \n", + "- analogous to directories in a filesystem. \n", "- Groups serve as containers for variables, dimensions and attributes, as well as other groups. \n", "- A `netCDF4.Dataset` creates a special group, called the 'root group', which is similar to the root directory in a unix filesystem. \n", "\n", @@ -1175,10 +1175,10 @@ "source": [ "##Other interesting and useful projects using netcdf4-python\n", "\n", - "- [Xray](http://xray.readthedocs.org/en/stable/): N-dimensional variant of the core [pandas](http://pandas.pydata.org) data structure that can operate on netcdf variables.\n", - "- [Iris](http://scitools.org.uk/iris/): a data model to create a data abstraction layer which isolates analysis and visualisation code from data format specifics. Uses netcdf4-python to access netcdf data (can also handle GRIB).\n", - "- [Biggus](https://github.com/SciTools/biggus): Virtual large arrays (from netcdf variables) with lazy evaluation.\n", - "- [cf-python](http://cfpython.bitbucket.org/): Implements the [CF](http://cfconventions.org) data model for the reading, writing and processing of data and metadata. " + "- [xarray](https://xarray.pydata.org/en/stable/): N-dimensional variant of the core [pandas](https://pandas.pydata.org) data structure that can operate on netcdf variables.\n", + "- [Iris](https://scitools.org.uk/iris/docs/latest/): a data model to create a data abstraction layer which isolates analysis and visualisation code from data format specifics. Uses netcdf4-python to access netcdf data (can also handle GRIB).\n", + "- [Dask](https://dask.org/): Virtual large arrays (from netcdf variables) with lazy evaluation.\n", + "- [cf-python](https://cfpython.bitbucket.io/): Implements the [CF](http://cfconventions.org) data model for the reading, writing and processing of data and metadata. " ] } ], diff --git a/external/README b/external/README new file mode 100644 index 000000000..900a8c32f --- /dev/null +++ b/external/README @@ -0,0 +1 @@ +* 20240616: remove submodule, include v0.2.0 tag source files (https://github.com/PlasmaFAIR/nc-complex/releases/tag/v0.2.0). diff --git a/external/nc_complex/include/generated_fallbacks/nc_complex_version.h b/external/nc_complex/include/generated_fallbacks/nc_complex_version.h new file mode 100644 index 000000000..1dc040843 --- /dev/null +++ b/external/nc_complex/include/generated_fallbacks/nc_complex_version.h @@ -0,0 +1,4 @@ +#define NC_COMPLEX_GIT_SHA1 "37310ed00f3910974bdefefcdfa4787588651f59" +#define NC_COMPLEX_GIT_VERSION "v0.2.0" +#define NC_COMPLEX_GIT_STATE "clean" +#define NC_COMPLEX_GIT_DATE "2023-12-08" diff --git a/external/nc_complex/include/nc_complex/nc_complex.h b/external/nc_complex/include/nc_complex/nc_complex.h new file mode 100644 index 000000000..a31841de3 --- /dev/null +++ b/external/nc_complex/include/nc_complex/nc_complex.h @@ -0,0 +1,291 @@ +/// nc-complex: A lightweight, drop-in extension for complex number support in +/// netCDF +/// +/// Copyright (C) 2023 Peter Hill +/// +/// SPDX-License-Identifier: MIT + +#ifndef PLASMA_FAIR_NC_COMPLEX +#define PLASMA_FAIR_NC_COMPLEX + +// This header is required when building as a DLL on Windows and is +// automatically generated by CMake. If you're not using CMake (and +// not on Windows) for some reason, then define `NC_COMPLEX_NO_EXPORT` +// to skip this. +#ifndef NC_COMPLEX_NO_EXPORT +#include "nc_complex/nc_complex_export.h" +#else +#define NC_COMPLEX_EXPORT +#endif + +#include +#include +#include +#include + +#ifdef __cplusplus +#include +#endif + +//@{ +/// Portable typedefs for complex numbers +/// +/// These become aliases for `std::complex` with C++. +#ifdef _MSC_VER +typedef _Dcomplex double_complex; +typedef _Fcomplex float_complex; +#else +#if defined(__cplusplus) && defined(__clang__) +using double_complex = std::complex; +using float_complex = std::complex; +#else +typedef double _Complex double_complex; +typedef float _Complex float_complex; +#endif +#endif +//@} + +#ifdef __cplusplus +/// @name Helper functions +///@{ +/// Helper functions for converting between (pointers to) C++ and C complex types +NC_COMPLEX_EXPORT inline double_complex* cpp_to_c_complex(std::complex* data) { + return reinterpret_cast(data); +} + +NC_COMPLEX_EXPORT inline std::complex* c_to_cpp_complex(double_complex* data) { + return reinterpret_cast*>(data); +} + +NC_COMPLEX_EXPORT inline float_complex* cpp_to_c_complex(std::complex* data) { + return reinterpret_cast(data); +} + +NC_COMPLEX_EXPORT inline std::complex* c_to_cpp_complex(float_complex* data) { + return reinterpret_cast*>(data); +} +///@} +extern "C" { +#endif + +/// @name Complex datatype defines +/// Datatype for complex numbers, for use with \rstref{pfnc_def_var} +/// +/// @note +/// These *only* work when defining a variable with \rstref{pfnc_def_var}. To +/// check the type of an existing variable use \rstref{pfnc_var_is_complex}, and +/// to check if it is specifically using a compound datatype or a dimension use +/// \rstref{pfnc_var_is_complex_type} or \rstref{pfnc_var_has_complex_dimension} +/// respectively. +/// @endnote +///@{ + +/// Uses complex compound datatype with netCDF4 format, and complex dimension otherwise +#define PFNC_FLOAT_COMPLEX (NC_FIRSTUSERTYPEID - 4) +/// Always use a complex dimension, regardless of file format +#define PFNC_FLOAT_COMPLEX_DIM (NC_FIRSTUSERTYPEID - 3) +/// Uses complex compound datatype with netCDF4 format, and complex dimension otherwise +#define PFNC_DOUBLE_COMPLEX (NC_FIRSTUSERTYPEID - 2) +/// Always use a complex dimension, regardless of file format +#define PFNC_DOUBLE_COMPLEX_DIM (NC_FIRSTUSERTYPEID - 1) +///@} + +/// Return true if variable is complex +NC_COMPLEX_EXPORT bool pfnc_var_is_complex(int ncid, int varid); +/// Return true if variable is complex and uses a compound datatype +NC_COMPLEX_EXPORT bool pfnc_var_is_complex_type(int ncid, int varid); +/// Return true if variable is complex and has a complex dimension +/// (assumed to be the last dimension) +NC_COMPLEX_EXPORT bool pfnc_var_has_complex_dimension(int ncid, int varid); + +/// Return true if dimension is complex +NC_COMPLEX_EXPORT bool pfnc_is_complex_dim(int ncid, int dim_id); + +/// Get the ID for the complex datatype with `double` elements, creating it if it doesn't already exist +NC_COMPLEX_EXPORT int pfnc_get_double_complex_typeid(int ncid, nc_type* nc_typeid); +/// Get the ID for the complex datatype with `float` elements, creating it if it doesn't already exist +NC_COMPLEX_EXPORT int pfnc_get_float_complex_typeid(int ncid, nc_type* nc_typeid); + +/// Get complex dimension, creating one if it doesn't already exist +NC_COMPLEX_EXPORT int pfnc_get_complex_dim(int ncid, int* nc_dim); + +/// Get the base numerical type of a complex type +/// +/// Returns the type of the components for a compound type, or the +/// type of an element for a dimension type. +NC_COMPLEX_EXPORT int pfnc_complex_base_type( + int ncid, nc_type nc_typeid, nc_type* base_type_id +); + +/// Get the base numerical type of a complex variable +NC_COMPLEX_EXPORT int pfnc_inq_var_complex_base_type( + int ncid, int varid, nc_type* nc_typeid +); + +/// Return some information about the `nc-complex` library +NC_COMPLEX_EXPORT const char* pfnc_inq_libvers(void); + +/// @name Wrappers +/// Wrappers for the equivalent `nc_*` functions that correctly handle +/// the start/count/stride arrays for complex dimensions. +/// +/// When the variable is stored using a complex dimension, the file +/// representation has one more dimension than the user-visible +/// in-memory representation. For example, a 1D array: +/// +/// ```c +/// double_complex data[5]; +/// ``` +/// +/// would be represented in the file with two dimensions (when not +/// using a compound datatype!), and so if we use the standard netCDF +/// API we would need to use `{5, 2}` for the `countp` arguments, for +/// example, while using nc-complex, we only need `{5}`. +/// +/// NOTE: The `pfnc_put/get*` functions do *not* currently handle +/// conversion between `float/double` base types +///@{ + +/// Extension to `nc_def_var` that also accepts +/// \rstref{PFNC_FLOAT_COMPLEX}, \rstref{PFNC_FLOAT_COMPLEX_DIM}, +/// \rstref{PFNC_DOUBLE_COMPLEX}, and \rstref{PFNC_DOUBLE_COMPLEX_DIM} +NC_COMPLEX_EXPORT int pfnc_def_var( + int ncid, + const char* name, + nc_type xtype, + int ndims, + const int* dimidsp, + int* varidp +); + +NC_COMPLEX_EXPORT int pfnc_put_vara_double_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const double_complex* op +); + +NC_COMPLEX_EXPORT int pfnc_get_vara_double_complex( + int ncid, int varid, const size_t* startp, const size_t* countp, double_complex* ip +); + +NC_COMPLEX_EXPORT int pfnc_put_vars_double_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + const double_complex* op +); + +NC_COMPLEX_EXPORT int pfnc_get_vars_double_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + double_complex* ip +); + +NC_COMPLEX_EXPORT int pfnc_put_var1_double_complex( + int ncid, int varid, const size_t* indexp, const double_complex* data +); +NC_COMPLEX_EXPORT int pfnc_get_var1_double_complex( + int ncid, int varid, const size_t* indexp, double_complex* data +); + +NC_COMPLEX_EXPORT int pfnc_put_vara_float_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const float_complex* op +); + +NC_COMPLEX_EXPORT int pfnc_get_vara_float_complex( + int ncid, int varid, const size_t* startp, const size_t* countp, float_complex* ip +); + +NC_COMPLEX_EXPORT int pfnc_put_vars_float_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + const float_complex* op +); + +NC_COMPLEX_EXPORT int pfnc_get_vars_float_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + float_complex* ip +); + +NC_COMPLEX_EXPORT int pfnc_put_var1_float_complex( + int ncid, int varid, const size_t* indexp, const float_complex* data +); +NC_COMPLEX_EXPORT int pfnc_get_var1_float_complex( + int ncid, int varid, const size_t* indexp, float_complex* data +); + +NC_COMPLEX_EXPORT int pfnc_inq_var( + int ncid, + int varid, + char* name, + nc_type* xtypep, + int* ndimsp, + int* dimidsp, + int* nattsp +); + +// NOLINTBEGIN(modernize-use-nullptr) +NC_COMPLEX_EXPORT inline int pfnc_inq_varndims(int ncid, int varid, int* ndimsp) { + return pfnc_inq_var(ncid, varid, NULL, NULL, ndimsp, NULL, NULL); +} +NC_COMPLEX_EXPORT inline int pfnc_inq_vardimid(int ncid, int varid, int* dimidsp) { + return pfnc_inq_var(ncid, varid, NULL, NULL, NULL, dimidsp, NULL); +} +// NOLINTEND(modernize-use-nullptr) + +NC_COMPLEX_EXPORT int pfnc_def_var_chunking( + int ncid, int varid, int storage, const size_t* chunksizesp +); +NC_COMPLEX_EXPORT int pfnc_inq_var_chunking( + int ncid, int varid, int* storagep, size_t* chunksizesp +); + +NC_COMPLEX_EXPORT int pfnc_get_vara( + int ncid, int varid, const size_t* startp, const size_t* countp, void* ip +); +NC_COMPLEX_EXPORT int pfnc_get_vars( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + void* ip +); + +NC_COMPLEX_EXPORT int pfnc_put_vara( + int ncid, int varid, const size_t* startp, const size_t* countp, const void* op +); + +NC_COMPLEX_EXPORT int pfnc_put_vars( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + const void* op +); +///@} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/external/nc_complex/src/nc_complex.c b/external/nc_complex/src/nc_complex.c new file mode 100644 index 000000000..4063d4fdc --- /dev/null +++ b/external/nc_complex/src/nc_complex.c @@ -0,0 +1,872 @@ +#include "nc_complex/nc_complex.h" + +#include +#include +#include +#include +#include +#include + +#include "nc_complex_version.h" + +// to enable compilation with older versions of netcdf-c +#ifndef NC_FORMATX_NCZARR +#define NC_FORMATX_NCZARR (10) +#endif + +// NOLINTBEGIN(bugprone-assignment-in-if-condition) +#define CHECK(func) \ + do { \ + int res; \ + if ((res = (func))) { \ + return res; \ + } \ + } while (0) +// NOLINTEND(bugprone-assignment-in-if-condition) + +// Vector of ones for get/put_var1 functions +static const size_t coord_one[NC_MAX_VAR_DIMS] = {1}; + +static const char* double_complex_struct_name = "_PFNC_DOUBLE_COMPLEX_TYPE"; +static const char* float_complex_struct_name = "_PFNC_FLOAT_COMPLEX_TYPE"; + +#define COMPLEX_DIM_NAME "_pfnc_complex" +static const char* complex_dim_name = COMPLEX_DIM_NAME; + +static const char* known_dim_names[] = {COMPLEX_DIM_NAME, "complex", "ri"}; +static const size_t num_known_dim_names = + sizeof(known_dim_names) / sizeof(known_dim_names[0]); + +static const char pfnc_libvers[] = NC_COMPLEX_GIT_VERSION; + +const char* pfnc_inq_libvers(void) { + return pfnc_libvers; +} + +bool pfnc_var_is_complex(int ncid, int varid) { + return pfnc_var_is_complex_type(ncid, varid) + || pfnc_var_has_complex_dimension(ncid, varid); +} + +int pfnc_complex_base_type(int ncid, nc_type nc_typeid, nc_type* base_type_id) { + if (nc_typeid < NC_MAX_ATOMIC_TYPE) { + *base_type_id = nc_typeid; + return NC_NOERR; + } + + // TODO: This should probably handle vlens too + + return nc_inq_compound_field( + ncid, nc_typeid, 0, NULL, NULL, base_type_id, NULL, NULL + ); +} + +int pfnc_inq_var_complex_base_type(int ncid, int varid, nc_type* nc_typeid) { + nc_type var_type_id; + CHECK(nc_inq_vartype(ncid, varid, &var_type_id)); + return pfnc_complex_base_type(ncid, var_type_id, nc_typeid); +} + +/// Return true if a compound type is compatible with a known convention +bool compound_type_is_compatible(int ncid, nc_type nc_typeid) { + // Does the name matching a known convention? + char name[NC_MAX_NAME + 1]; + nc_inq_compound_name(ncid, nc_typeid, name); + if (name == double_complex_struct_name) { + return true; + } + + // Does it have exactly two fields? + size_t num_fields; + nc_inq_compound_nfields(ncid, nc_typeid, &num_fields); + if (num_fields != 2) { + return false; + } + + // As far as I can tell, all conventions put the real part first and + // the imaginary part second. I'm pretty sure all native language + // types are also this way round. That means we don't have to worry + // about trying both combinations! + char real_name[NC_MAX_NAME + 1]; + size_t real_offset; + nc_type real_field_type; + int real_rank; + nc_inq_compound_field( + ncid, nc_typeid, 0, real_name, &real_offset, &real_field_type, &real_rank, NULL + ); + + // If it's not a floating type, we're not interested + if (!(real_field_type == NC_FLOAT || real_field_type == NC_DOUBLE)) { + return false; + } + // Also needs to be scalar + if (real_rank != 0) { + return false; + } + + // Now check names. For now, just check it starts with "r", in any case + if (tolower(real_name[0]) != 'r') { + return false; + } + + char imag_name[NC_MAX_NAME + 1]; + size_t imag_offset; + nc_type imag_field_type; + int imag_rank; + nc_inq_compound_field( + ncid, nc_typeid, 1, imag_name, &imag_offset, &imag_field_type, &imag_rank, NULL + ); + + // Both component types better match + if (imag_field_type != real_field_type) { + return false; + } + if (imag_rank != 0) { + return false; + } + if (tolower(imag_name[0]) != 'i') { + return false; + } + + return true; +} + +/// Return true if file already has a complex type with the given base type +bool file_has_complex_struct(int ncid, nc_type* typeidp, nc_type base_type) { + // Simplest case, check for our type name + int err = nc_inq_typeid(ncid, double_complex_struct_name, typeidp); + if (err == NC_NOERR) { + return true; + } + + int ntypes; + err = nc_inq_typeids(ncid, &ntypes, NULL); + if (err != NC_NOERR) { + return false; + } + + bool result = false; + + nc_type* typeids = malloc((size_t)ntypes * sizeof(nc_type)); + err = nc_inq_typeids(ncid, NULL, typeids); + if (err != NC_NOERR) { + goto cleanup; + } + + for (size_t i = 0; i < (size_t)ntypes; i++) { + if (compound_type_is_compatible(ncid, typeids[i])) { + nc_type base_type_id; + err = pfnc_complex_base_type(ncid, typeids[i], &base_type_id); + if (err != NC_NOERR) { + goto cleanup; + } + if (base_type_id == base_type) { + *typeidp = typeids[i]; + result = true; + goto cleanup; + } + } + } +cleanup: + free(typeids); + return result; +} + +/// Return true if a given dimension matches a known convention +bool pfnc_is_complex_dim(int ncid, int dim_id) { + size_t length; + nc_inq_dimlen(ncid, dim_id, &length); + + // Definitely can only be exactly two. Note that we can't catch + // unlimited dimensions that only have two records so far. + if (length != 2) { + return false; + } + + // Not sure if this is the best way, but here we are. + char name[NC_MAX_NAME + 1]; + nc_inq_dimname(ncid, dim_id, name); + + const size_t name_length = strlen(name); + + // Check against known names of complex dimensions + for (size_t i = 0; i < num_known_dim_names; i++) { + if (strncmp(name, known_dim_names[i], name_length) == 0) { + return true; + } + } + + return false; +} + +/// Return true if a variable uses the dimension-convention +bool pfnc_var_has_complex_dimension(int ncid, int nc_varid) { + int num_dims; + nc_inq_varndims(ncid, nc_varid, &num_dims); + + int* dim_ids = (int*)malloc((size_t)num_dims * sizeof(int)); + nc_inq_vardimid(ncid, nc_varid, dim_ids); + + // Now we check if any of the dimensions match one of our known + // conventions. Do we need to check all of them, or just the + // first/last? + for (int i = 0; i < num_dims; i++) { + if (pfnc_is_complex_dim(ncid, dim_ids[i])) { + free(dim_ids); + return true; + } + } + + free(dim_ids); + return false; +} + +/// Return true if a netCDF datatype is a compound type +bool is_compound_type(int ncid, int type_id) { + // There appears to be no API for detecting whether a type ID is a + // primitive type, so we have to check ourselves + if (type_id <= NC_MAX_ATOMIC_TYPE) { + return false; + } + + int class_type; + nc_inq_user_type(ncid, type_id, NULL, NULL, NULL, NULL, &class_type); + return class_type == NC_COMPOUND; +} + +/// Copy an array meant for a complex-dimensioned variable +size_t* copy_complex_dim_size_t_array( + const size_t* old_array, int numdims, size_t complex_dim_value +) { + size_t* new_buffer = NULL; + + if (old_array != NULL) { + new_buffer = (size_t*)malloc(sizeof(size_t) * (size_t)numdims); + + size_t last_dim = (size_t)(numdims - 1); + for (size_t i = 0; i < last_dim; i++) { + new_buffer[i] = old_array[i]; + } + + new_buffer[last_dim] = complex_dim_value; + } + return new_buffer; +} + +ptrdiff_t* copy_complex_dim_ptrdiff_t_array( + const ptrdiff_t* old_array, int numdims, ptrdiff_t complex_dim_value +) { + ptrdiff_t* new_buffer = NULL; + + if (old_array != NULL) { + new_buffer = (ptrdiff_t*)malloc(sizeof(ptrdiff_t) * (size_t)numdims); + + size_t last_dim = (size_t)(numdims - 1); + for (size_t i = 0; i < last_dim; i++) { + new_buffer[i] = old_array[i]; + } + + new_buffer[last_dim] = complex_dim_value; + } + return new_buffer; +} + +bool pfnc_var_is_complex_type(int ncid, int varid) { + nc_type var_type_id; + if (nc_inq_vartype(ncid, varid, &var_type_id)) { + return false; + } + + if (is_compound_type(ncid, var_type_id)) { + return compound_type_is_compatible(ncid, var_type_id); + } + return false; +} + +size_t complex_type_size(nc_type base_type) { + switch (base_type) { + case NC_FLOAT: + return sizeof(float_complex); + case NC_DOUBLE: + return sizeof(double_complex); + default: + return 0; + } +} + +size_t base_type_size(nc_type base_type) { + switch (base_type) { + case NC_FLOAT: + return sizeof(float); + case NC_DOUBLE: + return sizeof(double); + default: + return 0; + } +} + +int get_or_make_complex_struct( + int ncid, nc_type* nc_typeid, nc_type base_type, const char* struct_name +) { + // TODO: Error if not netCDF4 + + if (file_has_complex_struct(ncid, nc_typeid, base_type)) { + return NC_NOERR; + } + + const size_t complex_size = complex_type_size(base_type); + if (complex_size == 0) { + return NC_EBADTYPE; + } + const size_t base_size = base_type_size(base_type); + if (base_size == 0) { + return NC_EBADTYPE; + } + + CHECK(nc_def_compound(ncid, complex_size, struct_name, nc_typeid)); + CHECK(nc_insert_compound(ncid, *nc_typeid, "r", 0, base_type)); + CHECK(nc_insert_compound(ncid, *nc_typeid, "i", base_size, base_type)); + + return NC_NOERR; +} + +int pfnc_get_double_complex_typeid(int ncid, nc_type* nc_typeid) { + return get_or_make_complex_struct( + ncid, nc_typeid, NC_DOUBLE, double_complex_struct_name + ); +} + +int pfnc_get_float_complex_typeid(int ncid, nc_type* nc_typeid) { + return get_or_make_complex_struct( + ncid, nc_typeid, NC_FLOAT, float_complex_struct_name + ); +} + +int pfnc_get_complex_dim(int ncid, int* nc_dim) { + int num_dims; + CHECK(nc_inq_ndims(ncid, &num_dims)); + + int* dim_ids = (int*)malloc((size_t)num_dims * sizeof(int)); + int ierr = nc_inq_dimids(ncid, NULL, dim_ids, true); + if (ierr != NC_NOERR) { + goto cleanup; + } + + // Now we check if any of the dimensions match one of our known + // conventions. Do we need to check all of them, or just the + // first/last? + for (int i = 0; i < num_dims; i++) { + if (pfnc_is_complex_dim(ncid, dim_ids[i])) { + *nc_dim = dim_ids[i]; + goto cleanup; + } + } + + ierr = nc_def_dim(ncid, complex_dim_name, 2, nc_dim); + +cleanup: + free(dim_ids); + return ierr; +} + +int pfnc_put_vara_double_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const double_complex* op +) { + return pfnc_put_vars_double_complex(ncid, varid, startp, countp, NULL, op); +} + +int pfnc_get_vara_double_complex( + int ncid, int varid, const size_t* startp, const size_t* countp, double_complex* ip +) { + return pfnc_get_vars_double_complex(ncid, varid, startp, countp, NULL, ip); +} + +int pfnc_put_vars_double_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + const double_complex* op +) { + if (!pfnc_var_is_complex(ncid, varid)) { + return NC_EBADTYPE; + } + + // TODO: handle converting different float sizes + + // Check if we can get away without fudging count/start sizes + if (((startp == NULL) && (countp == NULL) && (stridep == NULL)) + || !pfnc_var_has_complex_dimension(ncid, varid)) { + return nc_put_vars(ncid, varid, startp, countp, stridep, op); + } + + // The real variable has a complex dimension, but we're pretending + // it doesn't, so now we need start/count arrays of the real size + + int numdims = 0; + CHECK(nc_inq_varndims(ncid, varid, &numdims)); + + // Copy start/count buffers, appending an extra element for the + // complex dimension. This dimension starts at 0 and has 2 elements + size_t* start_buffer = copy_complex_dim_size_t_array(startp, numdims, 0); + size_t* count_buffer = copy_complex_dim_size_t_array(countp, numdims, 2); + ptrdiff_t* stride_buffer = copy_complex_dim_ptrdiff_t_array(stridep, numdims, 1); + + const int ierr = + nc_put_vars(ncid, varid, start_buffer, count_buffer, stride_buffer, op); + + if (start_buffer != NULL) { + free(start_buffer); + } + if (count_buffer != NULL) { + free(count_buffer); + } + if (stride_buffer != NULL) { + free(stride_buffer); + } + return ierr; +} + +int pfnc_get_vars_double_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + double_complex* ip +) { + if (!pfnc_var_is_complex(ncid, varid)) { + return NC_EBADTYPE; + } + + // TODO: handle converting different float sizes + + // Check if we can get away without fudging count/start sizes + if (((startp == NULL) && (countp == NULL) && (stridep == NULL)) + || !pfnc_var_has_complex_dimension(ncid, varid)) { + return nc_get_vars(ncid, varid, startp, countp, stridep, ip); + } + + // The real variable has a complex dimension, but we're pretending + // it doesn't, so now we need start/count arrays of the real size + + int numdims = 0; + CHECK(nc_inq_varndims(ncid, varid, &numdims)); + + // Copy start/count buffers, appending an extra element for the + // complex dimension. This dimension starts at 0 and has 2 elements + size_t* start_buffer = copy_complex_dim_size_t_array(startp, numdims, 0); + size_t* count_buffer = copy_complex_dim_size_t_array(countp, numdims, 2); + ptrdiff_t* stride_buffer = copy_complex_dim_ptrdiff_t_array(stridep, numdims, 1); + + const int ierr = + nc_get_vars(ncid, varid, start_buffer, count_buffer, stride_buffer, ip); + + if (start_buffer != NULL) { + free(start_buffer); + } + if (count_buffer != NULL) { + free(count_buffer); + } + if (stride_buffer != NULL) { + free(stride_buffer); + } + return ierr; +} + +int pfnc_put_var1_double_complex( + int ncid, int varid, const size_t* indexp, const double_complex* data +) { + return pfnc_put_vara_double_complex(ncid, varid, indexp, coord_one, data); +} + +int pfnc_get_var1_double_complex( + int ncid, int varid, const size_t* indexp, double_complex* data +) { + return pfnc_get_vara_double_complex(ncid, varid, indexp, coord_one, data); +} + +int pfnc_put_vara_float_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const float_complex* op +) { + return pfnc_put_vars_float_complex(ncid, varid, startp, countp, NULL, op); +} + +int pfnc_get_vara_float_complex( + int ncid, int varid, const size_t* startp, const size_t* countp, float_complex* ip +) { + return pfnc_get_vars_float_complex(ncid, varid, startp, countp, NULL, ip); +} + +int pfnc_put_vars_float_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + const float_complex* op +) { + if (!pfnc_var_is_complex(ncid, varid)) { + return NC_EBADTYPE; + } + + // TODO: handle converting different float sizes + + // Check if we can get away without fudging count/start sizes + if (((startp == NULL) && (countp == NULL) && (stridep == NULL)) + || !pfnc_var_has_complex_dimension(ncid, varid)) { + return nc_put_vars(ncid, varid, startp, countp, stridep, op); + } + + // The real variable has a complex dimension, but we're pretending + // it doesn't, so now we need start/count arrays of the real size + + int numdims = 0; + CHECK(nc_inq_varndims(ncid, varid, &numdims)); + + // Copy start/count buffers, appending an extra element for the + // complex dimension. This dimension starts at 0 and has 2 elements + size_t* start_buffer = copy_complex_dim_size_t_array(startp, numdims, 0); + size_t* count_buffer = copy_complex_dim_size_t_array(countp, numdims, 2); + ptrdiff_t* stride_buffer = copy_complex_dim_ptrdiff_t_array(stridep, numdims, 1); + + const int ierr = + nc_put_vars(ncid, varid, start_buffer, count_buffer, stride_buffer, op); + + if (start_buffer != NULL) { + free(start_buffer); + } + if (count_buffer != NULL) { + free(count_buffer); + } + if (stride_buffer != NULL) { + free(stride_buffer); + } + return ierr; +} + +int pfnc_get_vars_float_complex( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + float_complex* ip +) { + if (!pfnc_var_is_complex(ncid, varid)) { + return NC_EBADTYPE; + } + + // TODO: handle converting different float sizes + + // Check if we can get away without fudging count/start sizes + if (((startp == NULL) && (countp == NULL) && (stridep == NULL)) + || !pfnc_var_has_complex_dimension(ncid, varid)) { + return nc_get_vars(ncid, varid, startp, countp, stridep, ip); + } + + // The real variable has a complex dimension, but we're pretending + // it doesn't, so now we need start/count arrays of the real size + + int numdims = 0; + CHECK(nc_inq_varndims(ncid, varid, &numdims)); + + // Copy start/count buffers, appending an extra element for the + // complex dimension. This dimension starts at 0 and has 2 elements + size_t* start_buffer = copy_complex_dim_size_t_array(startp, numdims, 0); + size_t* count_buffer = copy_complex_dim_size_t_array(countp, numdims, 2); + ptrdiff_t* stride_buffer = copy_complex_dim_ptrdiff_t_array(stridep, numdims, 1); + + const int ierr = + nc_get_vars(ncid, varid, start_buffer, count_buffer, stride_buffer, ip); + + if (start_buffer != NULL) { + free(start_buffer); + } + if (count_buffer != NULL) { + free(count_buffer); + } + if (stride_buffer != NULL) { + free(stride_buffer); + } + return ierr; +} + +int pfnc_put_var1_float_complex( + int ncid, int varid, const size_t* indexp, const float_complex* data +) { + return pfnc_put_vara_float_complex(ncid, varid, indexp, coord_one, data); +} + +int pfnc_get_var1_float_complex( + int ncid, int varid, const size_t* indexp, float_complex* data +) { + return pfnc_get_vara_float_complex(ncid, varid, indexp, coord_one, data); +} + +int pfnc_def_var( + int ncid, + const char* name, + nc_type xtype, + int ndims, + const int* dimidsp, + int* varidp +) { + // If it's not a complex number, we don't need to do anything + if (!(xtype == PFNC_DOUBLE_COMPLEX || xtype == PFNC_DOUBLE_COMPLEX_DIM + || xtype == PFNC_FLOAT_COMPLEX || xtype == PFNC_FLOAT_COMPLEX_DIM)) { + return nc_def_var(ncid, name, xtype, ndims, dimidsp, varidp); + } + + const bool base_is_double = + (xtype == PFNC_DOUBLE_COMPLEX || xtype == PFNC_DOUBLE_COMPLEX_DIM); + + // Check the format used by this file. If it's some variation on the + // classic model, then we have to use a complex dimension. Also, + // NcZarr, for some reason doesn't support compound types (yet?). + // I _think_ DAP supports compound types + int format = 0; + int mode = 0; + CHECK(nc_inq_format_extended(ncid, &format, &mode)); + + if ((format == NC_FORMAT_CLASSIC || format == NC_FORMAT_NETCDF4_CLASSIC) + || (mode == NC_FORMATX_NCZARR)) { + xtype = base_is_double ? PFNC_DOUBLE_COMPLEX_DIM : PFNC_FLOAT_COMPLEX_DIM; + } + + if (xtype == PFNC_DOUBLE_COMPLEX_DIM || xtype == PFNC_FLOAT_COMPLEX_DIM) { + // Using a complex dimension. We need to get the complex dimension + // used in this file and append it to the list of dimensions + // passed in by the user + + int complex_dim = 0; + CHECK(pfnc_get_complex_dim(ncid, &complex_dim)); + + int new_dims = ndims + 1; + int* dim_ids_buffer = (int*)malloc((size_t)new_dims * sizeof(int)); + for (size_t i = 0; i < (size_t)ndims; i++) { + dim_ids_buffer[i] = dimidsp[i]; + } + dim_ids_buffer[ndims] = complex_dim; + + const nc_type base_type = base_is_double ? NC_DOUBLE : NC_FLOAT; + + const int ierr = + nc_def_var(ncid, name, base_type, new_dims, dim_ids_buffer, varidp); + free(dim_ids_buffer); + return ierr; + } + + // Using a complex type. We need to get the complex type used in + // this file and pass that as `xtype` + nc_type complex_type = 0; + if (base_is_double) { + CHECK(pfnc_get_double_complex_typeid(ncid, &complex_type)); + } else { + CHECK(pfnc_get_float_complex_typeid(ncid, &complex_type)); + } + + return nc_def_var(ncid, name, complex_type, ndims, dimidsp, varidp); +} + +int pfnc_inq_var( + int ncid, + int varid, + char* name, + nc_type* xtypep, + int* ndimsp, + int* dimidsp, + int* nattsp +) { + if (!pfnc_var_has_complex_dimension(ncid, varid)) { + return nc_inq_var(ncid, varid, name, xtypep, ndimsp, dimidsp, nattsp); + } + + // Tricky bit: if variable has complex dimension, and user used + // pfnc_inq_varndims, then dimidsp is one smaller than netCDF thinks + // it should be. So we'll have to allocate our own array of the + // correct size and copy out of that. + + // This buffer will point to either the user's array, or our own one + int* buffer = dimidsp; + int numdims = 0; + + if (dimidsp != NULL) { + CHECK(nc_inq_varndims(ncid, varid, &numdims)); + buffer = (int*)malloc(sizeof(int) * (size_t)numdims); + } + + int ierr = nc_inq_var(ncid, varid, name, xtypep, &numdims, buffer, nattsp); + + if (ierr != NC_NOERR) { + goto cleanup; + } + + if (dimidsp != NULL) { + if (numdims <= 0) { + // This should never happen + goto cleanup; + } + const size_t other_dims = (size_t)(numdims - 1); + for (size_t i = 0; i < other_dims; i++) { + dimidsp[i] = buffer[i]; + } + } + + if (ndimsp != NULL) { + *ndimsp = numdims - 1; + } + +cleanup: + free(buffer); + return ierr; +} + +int pfnc_def_var_chunking(int ncid, int varid, int storage, const size_t* chunksizesp) { + if (chunksizesp == NULL || !pfnc_var_has_complex_dimension(ncid, varid)) { + return nc_def_var_chunking(ncid, varid, storage, chunksizesp); + } + + // The real variable has a complex dimension, but we're pretending + // it doesn't, so now we need start/count arrays of the real size + + int numdims = 0; + CHECK(nc_inq_varndims(ncid, varid, &numdims)); + + // Copy chunksize buffer, appending an extra element for the + // complex dimension + size_t* chunk_buffer = copy_complex_dim_size_t_array(chunksizesp, numdims, 2); + + const int ierr = nc_def_var_chunking(ncid, varid, storage, chunk_buffer); + free(chunk_buffer); + return ierr; +} + +int pfnc_inq_var_chunking(int ncid, int varid, int* storagep, size_t* chunksizesp) { + if (chunksizesp == NULL || !pfnc_var_has_complex_dimension(ncid, varid)) { + return nc_inq_var_chunking(ncid, varid, storagep, chunksizesp); + } + + int numdims = 0; + + CHECK(nc_inq_varndims(ncid, varid, &numdims)); + + // Copy chunksize buffer, appending an extra element for the + // complex dimension + size_t* chunk_buffer = copy_complex_dim_size_t_array(chunksizesp, numdims, 2); + + const int ierr = nc_inq_var_chunking(ncid, varid, storagep, chunk_buffer); + + if (ierr != NC_NOERR) { + goto cleanup; + } + + const size_t other_dims = (size_t)(numdims - 1); + for (size_t i = 0; i < other_dims; i++) { + chunksizesp[i] = chunk_buffer[i]; + } + +cleanup: + free(chunk_buffer); + return ierr; +} + +int pfnc_get_vara( + int ncid, int varid, const size_t* startp, const size_t* countp, void* ip +) { + if (pfnc_var_is_complex(ncid, varid)) { + nc_type base_type; + CHECK(pfnc_inq_var_complex_base_type(ncid, varid, &base_type)); + switch (base_type) { + case NC_DOUBLE: + return pfnc_get_vara_double_complex(ncid, varid, startp, countp, ip); + case NC_FLOAT: + return pfnc_get_vara_float_complex(ncid, varid, startp, countp, ip); + default: + return NC_EBADTYPE; + } + } + + return nc_get_vara(ncid, varid, startp, countp, ip); +} + +int pfnc_put_vara( + int ncid, int varid, const size_t* startp, const size_t* countp, const void* op +) { + if (pfnc_var_is_complex(ncid, varid)) { + nc_type base_type; + CHECK(pfnc_inq_var_complex_base_type(ncid, varid, &base_type)); + switch (base_type) { + case NC_DOUBLE: + return pfnc_put_vara_double_complex(ncid, varid, startp, countp, op); + case NC_FLOAT: + return pfnc_put_vara_float_complex(ncid, varid, startp, countp, op); + default: + return NC_EBADTYPE; + } + } + return nc_put_vara(ncid, varid, startp, countp, op); +} + +int pfnc_put_vars( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + const void* op +) { + if (pfnc_var_is_complex(ncid, varid)) { + nc_type base_type; + CHECK(pfnc_inq_var_complex_base_type(ncid, varid, &base_type)); + switch (base_type) { + case NC_DOUBLE: + return pfnc_put_vars_double_complex( + ncid, varid, startp, countp, stridep, op + ); + case NC_FLOAT: + return pfnc_put_vars_float_complex( + ncid, varid, startp, countp, stridep, op + ); + default: + return NC_EBADTYPE; + } + } + return nc_put_vars(ncid, varid, startp, countp, stridep, op); +} + +int pfnc_get_vars( + int ncid, + int varid, + const size_t* startp, + const size_t* countp, + const ptrdiff_t* stridep, + void* ip +) { + if (pfnc_var_is_complex(ncid, varid)) { + nc_type base_type; + CHECK(pfnc_inq_var_complex_base_type(ncid, varid, &base_type)); + switch (base_type) { + case NC_DOUBLE: + return pfnc_get_vars_double_complex( + ncid, varid, startp, countp, stridep, ip + ); + case NC_FLOAT: + return pfnc_get_vars_float_complex( + ncid, varid, startp, countp, stridep, ip + ); + default: + return NC_EBADTYPE; + } + } + return nc_get_vars(ncid, varid, startp, countp, stridep, ip); +} diff --git a/include/membuf.pyx b/include/membuf.pyx new file mode 100644 index 000000000..a236a1043 --- /dev/null +++ b/include/membuf.pyx @@ -0,0 +1,25 @@ +# Creates a memoryview from a malloced C pointer, +# which will be freed when the python object is garbage collected. +# Code found here is derived from +# http://stackoverflow.com/a/28166272/428751 +from cpython.buffer cimport PyBuffer_FillInfo +from libc.stdlib cimport free + +# create a python memoryview object from a raw pointer. +cdef memview_fromptr(void *memory, size_t size): + cdef _MemBuf buf = _MemBuf() + buf.memory = memory # malloced void pointer + buf.size = size # size of pointer in bytes + return memoryview(buf) + +# private extension type that implements buffer protocol. +cdef class _MemBuf: + cdef void *memory + cdef size_t size + def __getbuffer__(self, Py_buffer *buf, int flags): + PyBuffer_FillInfo(buf, self, self.memory, self.size, 1, flags) + def __releasebuffer__(self, Py_buffer *buf): + # why doesn't this do anything?? + pass + def __dealloc__(self): + free(self.memory) diff --git a/include/mpi-compat.h b/include/mpi-compat.h index 367c58a7d..adf6219b7 100644 --- a/include/mpi-compat.h +++ b/include/mpi-compat.h @@ -4,11 +4,26 @@ #ifndef MPI_COMPAT_H #define MPI_COMPAT_H +#include "netcdf-compat.h" + +#if HAS_PARALLEL_SUPPORT + #include +#ifdef MSMPI_VER +#define PyMPI_HAVE_MPI_Message 1 +#endif + #if (MPI_VERSION < 3) && !defined(PyMPI_HAVE_MPI_Message) typedef void *PyMPI_MPI_Message; #define MPI_Message PyMPI_MPI_Message #endif +#if (MPI_VERSION < 4) && !defined(PyMPI_HAVE_MPI_Session) +typedef void *PyMPI_MPI_Session; +#define MPI_Session PyMPI_MPI_Session +#endif + +#endif /* HAS_PARALLEL_SUPPORT */ + #endif/*MPI_COMPAT_H*/ diff --git a/include/netCDF4.pxi b/include/netCDF4.pxi index 3a8c837c7..62b9be609 100644 --- a/include/netCDF4.pxi +++ b/include/netCDF4.pxi @@ -6,7 +6,7 @@ cdef extern from "stdlib.h": # hdf5 version info. cdef extern from "H5public.h": ctypedef int herr_t - int H5get_libversion( unsigned int *majnum, unsigned int *minnum, unsigned int *relnum ) + int H5get_libversion( unsigned int *majnum, unsigned int *minnum, unsigned int *relnum ) nogil cdef extern from *: ctypedef char* const_char_ptr "const char*" @@ -51,12 +51,11 @@ cdef extern from "netcdf.h": NC_CLOBBER NC_NOCLOBBER # Don't destroy existing file on create NC_64BIT_OFFSET # Use large (64-bit) file offsets + NC_64BIT_DATA # Use cdf-5 format NC_NETCDF4 # Use netCDF-4/HDF5 format NC_CLASSIC_MODEL # Enforce strict netcdf-3 rules. # Use these 'mode' flags for both nc_create and nc_open. - NC_SHARE # Share updates, limit cacheing - NC_MPIIO - NC_MPIPOSIX + NC_SHARE # Share updates, limit caching # The following flag currently is ignored, but use in # nc_open() or nc_create() may someday support use of advisory # locking to prevent multiple writers from clobbering a file @@ -112,7 +111,7 @@ cdef extern from "netcdf.h": NC_FILL NC_NOFILL # Starting with version 3.6, there are different format netCDF - # files. 4.0 instroduces the third one. These defines are only for + # files. 4.0 introduces the third one. These defines are only for # the nc_set_default_format function. NC_FORMAT_CLASSIC NC_FORMAT_64BIT @@ -127,10 +126,7 @@ cdef extern from "netcdf.h": NC_FORMAT_DAP4 NC_FORMAT_PNETCDF NC_FORMAT_UNDEFINED - # Let nc__create() or nc__open() figure out - # as suitable chunk size. NC_SIZEHINT_DEFAULT - # In nc__enddef(), align to the chunk size. NC_ALIGN_CHUNK # 'size' argument to ncdimdef for an unlimited dimension NC_UNLIMITED @@ -217,14 +213,10 @@ cdef extern from "netcdf.h": NC_ENDIAN_NATIVE NC_ENDIAN_LITTLE NC_ENDIAN_BIG - NC_SZIP_EC_OPTION_MASK # entropy encoding - NC_SZIP_NN_OPTION_MASK # nearest neighbor encoding const_char_ptr *nc_inq_libvers() nogil const_char_ptr *nc_strerror(int ncerr) - int nc_create(char *path, int cmode, int *ncidp) - int nc__create(char *path, int cmode, size_t initialsz, size_t *chunksizehintp, int *ncidp) - int nc_open(char *path, int mode, int *ncidp) - int nc__open(char *path, int mode, size_t *chunksizehintp, int *ncidp) + int nc_create(char *path, int cmode, int *ncidp) nogil + int nc_open(char *path, int mode, int *ncidp) nogil int nc_inq_path(int ncid, size_t *pathlen, char *path) nogil int nc_inq_format_extended(int ncid, int *formatp, int* modep) nogil int nc_inq_ncid(int ncid, char *name, int *grp_ncid) nogil @@ -233,13 +225,13 @@ cdef extern from "netcdf.h": int nc_inq_grp_parent(int ncid, int *parent_ncid) nogil int nc_inq_varids(int ncid, int *nvars, int *varids) nogil int nc_inq_dimids(int ncid, int *ndims, int *dimids, int include_parents) nogil - int nc_def_grp(int parent_ncid, char *name, int *new_ncid) - int nc_def_compound(int ncid, size_t size, char *name, nc_type *typeidp) + int nc_def_grp(int parent_ncid, char *name, int *new_ncid) nogil + int nc_def_compound(int ncid, size_t size, char *name, nc_type *typeidp) nogil int nc_insert_compound(int ncid, nc_type xtype, char *name, - size_t offset, nc_type field_typeid) + size_t offset, nc_type field_typeid) nogil int nc_insert_array_compound(int ncid, nc_type xtype, char *name, size_t offset, nc_type field_typeid, - int ndims, int *dim_sizes) + int ndims, int *dim_sizes) nogil int nc_inq_type(int ncid, nc_type xtype, char *name, size_t *size) nogil int nc_inq_compound(int ncid, nc_type xtype, char *name, size_t *size, size_t *nfieldsp) nogil @@ -261,83 +253,81 @@ cdef extern from "netcdf.h": int *ndimsp) nogil int nc_inq_compound_fielddim_sizes(int ncid, nc_type xtype, int fieldid, int *dim_sizes) nogil - int nc_def_vlen(int ncid, char *name, nc_type base_typeid, nc_type *xtypep) + int nc_def_vlen(int ncid, char *name, nc_type base_typeid, nc_type *xtypep) nogil int nc_inq_vlen(int ncid, nc_type xtype, char *name, size_t *datum_sizep, nc_type *base_nc_typep) nogil int nc_inq_user_type(int ncid, nc_type xtype, char *name, size_t *size, nc_type *base_nc_typep, size_t *nfieldsp, int *classp) nogil int nc_inq_typeids(int ncid, int *ntypes, int *typeids) nogil int nc_put_att(int ncid, int varid, char *name, nc_type xtype, - size_t len, void *op) + size_t len, void *op) nogil int nc_get_att(int ncid, int varid, char *name, void *ip) nogil int nc_get_att_string(int ncid, int varid, char *name, char **ip) nogil - int nc_put_att_string(int ncid, int varid, char *name, size_t len, char **op) nogil - int nc_def_opaque(int ncid, size_t size, char *name, nc_type *xtypep) - int nc_inq_opaque(int ncid, nc_type xtype, char *name, size_t *sizep) + int nc_put_att_string(int ncid, int varid, char *name, size_t len, const char **op) nogil + int nc_def_opaque(int ncid, size_t size, char *name, nc_type *xtypep) nogil + int nc_inq_opaque(int ncid, nc_type xtype, char *name, size_t *sizep) nogil int nc_put_att_opaque(int ncid, int varid, char *name, - size_t len, void *op) + size_t len, void *op) nogil int nc_get_att_opaque(int ncid, int varid, char *name, - void *ip) + void *ip) nogil int nc_put_cmp_att_opaque(int ncid, nc_type xtype, int fieldid, - char *name, size_t len, void *op) + char *name, size_t len, void *op) nogil int nc_get_cmp_att_opaque(int ncid, nc_type xtype, int fieldid, - char *name, void *ip) + char *name, void *ip) nogil int nc_put_var1(int ncid, int varid, size_t *indexp, - void *op) + void *op) nogil int nc_get_var1(int ncid, int varid, size_t *indexp, - void *ip) + void *ip) nogil int nc_put_vara(int ncid, int varid, size_t *startp, - size_t *countp, void *op) + size_t *countp, void *op) nogil int nc_get_vara(int ncid, int varid, size_t *startp, size_t *countp, void *ip) nogil int nc_put_vars(int ncid, int varid, size_t *startp, size_t *countp, ptrdiff_t *stridep, - void *op) + void *op) nogil int nc_get_vars(int ncid, int varid, size_t *startp, size_t *countp, ptrdiff_t *stridep, void *ip) nogil int nc_put_varm(int ncid, int varid, size_t *startp, size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, void *op) + ptrdiff_t *imapp, void *op) nogil int nc_get_varm(int ncid, int varid, size_t *startp, size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, void *ip) - int nc_put_var(int ncid, int varid, void *op) - int nc_get_var(int ncid, int varid, void *ip) + ptrdiff_t *imapp, void *ip) nogil + int nc_put_var(int ncid, int varid, void *op) nogil + int nc_get_var(int ncid, int varid, void *ip) nogil int nc_def_var_deflate(int ncid, int varid, int shuffle, int deflate, - int deflate_level) - int nc_def_var_fletcher32(int ncid, int varid, int fletcher32) + int deflate_level) nogil + int nc_def_var_fletcher32(int ncid, int varid, int fletcher32) nogil int nc_inq_var_fletcher32(int ncid, int varid, int *fletcher32p) nogil - int nc_def_var_chunking(int ncid, int varid, int contiguous, size_t *chunksizesp) - int nc_def_var_fill(int ncid, int varid, int no_fill, void *fill_value) - int nc_def_var_endian(int ncid, int varid, int endian) + int nc_def_var_chunking(int ncid, int varid, int contiguous, size_t *chunksizesp) nogil + int nc_def_var_fill(int ncid, int varid, int no_fill, void *fill_value) nogil + int nc_def_var_endian(int ncid, int varid, int endian) nogil int nc_inq_var_chunking(int ncid, int varid, int *contiguousp, size_t *chunksizesp) nogil int nc_inq_var_deflate(int ncid, int varid, int *shufflep, int *deflatep, int *deflate_levelp) nogil int nc_inq_var_fill(int ncid, int varid, int *no_fill, void *fill_value) nogil int nc_inq_var_endian(int ncid, int varid, int *endianp) nogil - int nc_set_fill(int ncid, int fillmode, int *old_modep) - int nc_set_default_format(int format, int *old_formatp) - int nc_redef(int ncid) - int nc__enddef(int ncid, size_t h_minfree, size_t v_align, - size_t v_minfree, size_t r_align) - int nc_enddef(int ncid) - int nc_sync(int ncid) - int nc_abort(int ncid) - int nc_close(int ncid) + int nc_set_fill(int ncid, int fillmode, int *old_modep) nogil + int nc_set_default_format(int format, int *old_formatp) nogil + int nc_redef(int ncid) nogil + int nc_enddef(int ncid) nogil + int nc_sync(int ncid) nogil + int nc_abort(int ncid) nogil + int nc_close(int ncid) nogil int nc_inq(int ncid, int *ndimsp, int *nvarsp, int *nattsp, int *unlimdimidp) nogil - int nc_inq_ndims(int ncid, int *ndimsp) nogil + int nc_inq_ndims(int ncid, int *ndimsp) nogil int nc_inq_nvars(int ncid, int *nvarsp) nogil - int nc_inq_natts(int ncid, int *nattsp) nogil + int nc_inq_natts(int ncid, int *nattsp) nogil int nc_inq_unlimdim(int ncid, int *unlimdimidp) nogil int nc_inq_unlimdims(int ncid, int *nunlimdimsp, int *unlimdimidsp) nogil int nc_inq_format(int ncid, int *formatp) nogil - int nc_def_dim(int ncid, char *name, size_t len, int *idp) + int nc_def_dim(int ncid, char *name, size_t len, int *idp) nogil int nc_inq_dimid(int ncid, char *name, int *idp) nogil int nc_inq_dim(int ncid, int dimid, char *name, size_t *lenp) nogil int nc_inq_dimname(int ncid, int dimid, char *name) nogil int nc_inq_dimlen(int ncid, int dimid, size_t *lenp) nogil - int nc_rename_dim(int ncid, int dimid, char *name) + int nc_rename_dim(int ncid, int dimid, char *name) nogil int nc_inq_att(int ncid, int varid, char *name, nc_type *xtypep, size_t *lenp) nogil int nc_inq_attid(int ncid, int varid, char *name, int *idp) nogil @@ -345,47 +335,13 @@ cdef extern from "netcdf.h": int nc_inq_attlen(int ncid, int varid, char *name, size_t *lenp) nogil int nc_inq_attname(int ncid, int varid, int attnum, char *name) nogil int nc_copy_att(int ncid_in, int varid_in, char *name, int ncid_out, int varid_out) - int nc_rename_att(int ncid, int varid, char *name, char *newname) - int nc_del_att(int ncid, int varid, char *name) + int nc_rename_att(int ncid, int varid, char *name, char *newname) nogil + int nc_del_att(int ncid, int varid, char *name) nogil int nc_put_att_text(int ncid, int varid, char *name, - size_t len, char *op) + size_t len, char *op) nogil int nc_get_att_text(int ncid, int varid, char *name, char *ip) nogil - int nc_put_att_uchar(int ncid, int varid, char *name, nc_type xtype, - size_t len, unsigned char *op) - int nc_get_att_uchar(int ncid, int varid, char *name, unsigned char *ip) - int nc_put_att_schar(int ncid, int varid, char *name, nc_type xtype, - size_t len, signed char *op) - int nc_get_att_schar(int ncid, int varid, char *name, signed char *ip) - int nc_put_att_short(int ncid, int varid, char *name, nc_type xtype, - size_t len, short *op) - int nc_get_att_short(int ncid, int varid, char *name, short *ip) - int nc_put_att_int(int ncid, int varid, char *name, nc_type xtype, - size_t len, int *op) - int nc_get_att_int(int ncid, int varid, char *name, int *ip) - int nc_put_att_long(int ncid, int varid, char *name, nc_type xtype, - size_t len, long *op) - int nc_get_att_long(int ncid, int varid, char *name, long *ip) - int nc_put_att_float(int ncid, int varid, char *name, nc_type xtype, - size_t len, float *op) - int nc_get_att_float(int ncid, int varid, char *name, float *ip) - int nc_put_att_double(int ncid, int varid, char *name, nc_type xtype, - size_t len, double *op) - int nc_get_att_double(int ncid, int varid, char *name, double *ip) - int nc_put_att_ushort(int ncid, int varid, char *name, nc_type xtype, - size_t len, unsigned short *op) - int nc_get_att_ushort(int ncid, int varid, char *name, unsigned short *ip) - int nc_put_att_uint(int ncid, int varid, char *name, nc_type xtype, - size_t len, unsigned int *op) - int nc_get_att_uint(int ncid, int varid, char *name, unsigned int *ip) - int nc_put_att_longlong(int ncid, int varid, char *name, nc_type xtype, - size_t len, long long *op) - int nc_get_att_longlong(int ncid, int varid, char *name, long long *ip) - int nc_put_att_ulonglong(int ncid, int varid, char *name, nc_type xtype, - size_t len, unsigned long long *op) - int nc_get_att_ulonglong(int ncid, int varid, char *name, - unsigned long long *ip) int nc_def_var(int ncid, char *name, nc_type xtype, int ndims, - int *dimidsp, int *varidp) + int *dimidsp, int *varidp) nogil int nc_inq_var(int ncid, int varid, char *name, nc_type *xtypep, int *ndimsp, int *dimidsp, int *nattsp) nogil int nc_inq_varid(int ncid, char *name, int *varidp) nogil @@ -394,336 +350,148 @@ cdef extern from "netcdf.h": int nc_inq_varndims(int ncid, int varid, int *ndimsp) nogil int nc_inq_vardimid(int ncid, int varid, int *dimidsp) nogil int nc_inq_varnatts(int ncid, int varid, int *nattsp) nogil - int nc_rename_var(int ncid, int varid, char *name) - int nc_copy_var(int ncid_in, int varid, int ncid_out) - int nc_put_var1_text(int ncid, int varid, size_t *indexp, char *op) - int nc_get_var1_text(int ncid, int varid, size_t *indexp, char *ip) - int nc_put_var1_uchar(int ncid, int varid, size_t *indexp, - unsigned char *op) - int nc_get_var1_uchar(int ncid, int varid, size_t *indexp, - unsigned char *ip) - int nc_put_var1_schar(int ncid, int varid, size_t *indexp, - signed char *op) - int nc_get_var1_schar(int ncid, int varid, size_t *indexp, - signed char *ip) - int nc_put_var1_short(int ncid, int varid, size_t *indexp, - short *op) - int nc_get_var1_short(int ncid, int varid, size_t *indexp, - short *ip) - int nc_put_var1_int(int ncid, int varid, size_t *indexp, int *op) - int nc_get_var1_int(int ncid, int varid, size_t *indexp, int *ip) - int nc_put_var1_long(int ncid, int varid, size_t *indexp, long *op) - int nc_get_var1_long(int ncid, int varid, size_t *indexp, long *ip) - int nc_put_var1_float(int ncid, int varid, size_t *indexp, float *op) - int nc_get_var1_float(int ncid, int varid, size_t *indexp, float *ip) - int nc_put_var1_double(int ncid, int varid, size_t *indexp, double *op) - int nc_get_var1_double(int ncid, int varid, size_t *indexp, double *ip) - int nc_put_var1_ubyte(int ncid, int varid, size_t *indexp, - unsigned char *op) - int nc_get_var1_ubyte(int ncid, int varid, size_t *indexp, - unsigned char *ip) - int nc_put_var1_ushort(int ncid, int varid, size_t *indexp, - unsigned short *op) - int nc_get_var1_ushort(int ncid, int varid, size_t *indexp, - unsigned short *ip) - int nc_put_var1_uint(int ncid, int varid, size_t *indexp, - unsigned int *op) - int nc_get_var1_uint(int ncid, int varid, size_t *indexp, - unsigned int *ip) - int nc_put_var1_longlong(int ncid, int varid, size_t *indexp, - long long *op) - int nc_get_var1_longlong(int ncid, int varid, size_t *indexp, - long long *ip) - int nc_put_var1_ulonglong(int ncid, int varid, size_t *indexp, - unsigned long long *op) - int nc_get_var1_ulonglong(int ncid, int varid, size_t *indexp, - unsigned long long *ip) - int nc_put_vara_text(int ncid, int varid, - size_t *startp, size_t *countp, char *op) - int nc_get_vara_text(int ncid, int varid, - size_t *startp, size_t *countp, char *ip) - int nc_put_vara_uchar(int ncid, int varid, - size_t *startp, size_t *countp, unsigned char *op) - int nc_get_vara_uchar(int ncid, int varid, size_t *startp, - size_t *countp, unsigned char *ip) - int nc_put_vara_schar(int ncid, int varid, size_t *startp, - size_t *countp, signed char *op) - int nc_get_vara_schar(int ncid, int varid, size_t *startp, - size_t *countp, signed char *ip) - int nc_put_vara_short(int ncid, int varid, size_t *startp, - size_t *countp, short *op) - int nc_get_vara_short(int ncid, int varid, size_t *startp, - size_t *countp, short *ip) - int nc_put_vara_int(int ncid, int varid, size_t *startp, - size_t *countp, int *op) - int nc_get_vara_int(int ncid, int varid, size_t *startp, - size_t *countp, int *ip) - int nc_put_vara_long(int ncid, int varid, size_t *startp, - size_t *countp, long *op) - int nc_get_vara_long(int ncid, int varid, - size_t *startp, size_t *countp, long *ip) - int nc_put_vara_float(int ncid, int varid, - size_t *startp, size_t *countp, float *op) - int nc_get_vara_float(int ncid, int varid, - size_t *startp, size_t *countp, float *ip) - int nc_put_vara_double(int ncid, int varid, size_t *startp, - size_t *countp, double *op) - int nc_get_vara_double(int ncid, int varid, size_t *startp, - size_t *countp, double *ip) - int nc_put_vara_ubyte(int ncid, int varid, size_t *startp, - size_t *countp, unsigned char *op) - int nc_get_vara_ubyte(int ncid, int varid, size_t *startp, - size_t *countp, unsigned char *ip) - int nc_put_vara_ushort(int ncid, int varid, size_t *startp, - size_t *countp, unsigned short *op) - int nc_get_vara_ushort(int ncid, int varid, size_t *startp, - size_t *countp, unsigned short *ip) - int nc_put_vara_uint(int ncid, int varid, size_t *startp, - size_t *countp, unsigned int *op) - int nc_get_vara_uint(int ncid, int varid, size_t *startp, - size_t *countp, unsigned int *ip) - int nc_put_vara_longlong(int ncid, int varid, size_t *startp, - size_t *countp, long long *op) - int nc_get_vara_longlong(int ncid, int varid, size_t *startp, - size_t *countp, long long *ip) - int nc_put_vara_ulonglong(int ncid, int varid, size_t *startp, - size_t *countp, unsigned long long *op) - int nc_get_vara_ulonglong(int ncid, int varid, size_t *startp, - size_t *countp, unsigned long long *ip) - int nc_put_vars_text(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - char *op) - int nc_get_vars_text(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - char *ip) - int nc_put_vars_uchar(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - unsigned char *op) - int nc_get_vars_uchar(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - unsigned char *ip) - int nc_put_vars_schar(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - signed char *op) - int nc_get_vars_schar(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - signed char *ip) - int nc_put_vars_short(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - short *op) - int nc_get_vars_short(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - short *ip) - int nc_put_vars_int(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - int *op) - int nc_get_vars_int(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - int *ip) - int nc_put_vars_long(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - long *op) - int nc_get_vars_long(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - long *ip) - int nc_put_vars_float(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - float *op) - int nc_get_vars_float(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - float *ip) - int nc_put_vars_double(int ncid, int varid, - size_t *startp, size_t *countp, ptrdiff_t *stridep, - double *op) - int nc_get_vars_double(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - double *ip) - int nc_put_vars_ubyte(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - unsigned char *op) - int nc_get_vars_ubyte(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - unsigned char *ip) - int nc_put_vars_ushort(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - unsigned short *op) - int nc_get_vars_ushort(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - unsigned short *ip) - int nc_put_vars_uint(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - unsigned int *op) - int nc_get_vars_uint(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - unsigned int *ip) - int nc_put_vars_longlong(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - long long *op) - int nc_get_vars_longlong(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - long long *ip) - int nc_put_vars_ulonglong(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - unsigned long long *op) - int nc_get_vars_ulonglong(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - unsigned long long *ip) - int nc_put_varm_text(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, char *op) - int nc_get_varm_text(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, char *ip) - int nc_put_varm_uchar(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, unsigned char *op) - int nc_get_varm_uchar(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, unsigned char *ip) - int nc_put_varm_schar(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, signed char *op) - int nc_get_varm_schar(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, signed char *ip) - int nc_put_varm_short(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, short *op) - int nc_get_varm_short(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, short *ip) - int nc_put_varm_int(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, int *op) - int nc_get_varm_int(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, int *ip) - int nc_put_varm_long(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, long *op) - int nc_get_varm_long(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, long *ip) - int nc_put_varm_float(int ncid, int varid,size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, float *op) - int nc_get_varm_float(int ncid, int varid,size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, float *ip) - int nc_put_varm_double(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t *imapp, double *op) - int nc_get_varm_double(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, double *ip) - int nc_put_varm_ubyte(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, unsigned char *op) - int nc_get_varm_ubyte(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, unsigned char *ip) - int nc_put_varm_ushort(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, unsigned short *op) - int nc_get_varm_ushort(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, unsigned short *ip) - int nc_put_varm_uint(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, unsigned int *op) - int nc_get_varm_uint(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, unsigned int *ip) - int nc_put_varm_longlong(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, long long *op) - int nc_get_varm_longlong(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, long long *ip) - int nc_put_varm_ulonglong(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, unsigned long long *op) - int nc_get_varm_ulonglong(int ncid, int varid, size_t *startp, - size_t *countp, ptrdiff_t *stridep, - ptrdiff_t * imapp, unsigned long long *ip) - int nc_put_var_text(int ncid, int varid, char *op) - int nc_get_var_text(int ncid, int varid, char *ip) - int nc_put_var_uchar(int ncid, int varid, unsigned char *op) - int nc_get_var_uchar(int ncid, int varid, unsigned char *ip) - int nc_put_var_schar(int ncid, int varid, signed char *op) - int nc_get_var_schar(int ncid, int varid, signed char *ip) - int nc_put_var_short(int ncid, int varid, short *op) - int nc_get_var_short(int ncid, int varid, short *ip) - int nc_put_var_int(int ncid, int varid, int *op) - int nc_get_var_int(int ncid, int varid, int *ip) - int nc_put_var_long(int ncid, int varid, long *op) - int nc_get_var_long(int ncid, int varid, long *ip) - int nc_put_var_float(int ncid, int varid, float *op) - int nc_get_var_float(int ncid, int varid, float *ip) - int nc_put_var_double(int ncid, int varid, double *op) - int nc_get_var_double(int ncid, int varid, double *ip) - int nc_put_var_ubyte(int ncid, int varid, unsigned char *op) - int nc_get_var_ubyte(int ncid, int varid, unsigned char *ip) - int nc_put_var_ushort(int ncid, int varid, unsigned short *op) - int nc_get_var_ushort(int ncid, int varid, unsigned short *ip) - int nc_put_var_uint(int ncid, int varid, unsigned int *op) - int nc_get_var_uint(int ncid, int varid, unsigned int *ip) - int nc_put_var_longlong(int ncid, int varid, long long *op) - int nc_get_var_longlong(int ncid, int varid, long long *ip) - int nc_put_var_ulonglong(int ncid, int varid, unsigned long long *op) - int nc_get_var_ulonglong(int ncid, int varid, unsigned long long *ip) - # set logging verbosity level. - void nc_set_log_level(int new_level) - int nc_show_metadata(int ncid) - int nc_free_vlen(nc_vlen_t *vl) - int nc_free_vlens(size_t len, nc_vlen_t *vl) - int nc_free_string(size_t len, char **data) - int nc_set_chunk_cache(size_t size, size_t nelems, float preemption) - int nc_get_chunk_cache(size_t *sizep, size_t *nelemsp, float *preemptionp) - int nc_set_var_chunk_cache(int ncid, int varid, size_t size, size_t nelems, float preemption) + int nc_rename_var(int ncid, int varid, char *name) nogil + int nc_free_vlen(nc_vlen_t *vl) nogil + int nc_free_vlens(size_t len, nc_vlen_t *vl) nogil + int nc_free_string(size_t len, char **data) nogil + int nc_get_chunk_cache(size_t *sizep, size_t *nelemsp, float *preemptionp) nogil + int nc_set_chunk_cache(size_t size, size_t nelems, float preemption) nogil + int nc_set_var_chunk_cache(int ncid, int varid, size_t size, size_t nelems, float preemption) nogil int nc_get_var_chunk_cache(int ncid, int varid, size_t *sizep, size_t *nelemsp, float *preemptionp) nogil - int nc_rename_grp(int grpid, char *name) - int nc_def_enum(int ncid, nc_type base_typeid, char *name, nc_type *typeidp) - int nc_insert_enum(int ncid, nc_type xtype, char *name, void *value) + int nc_def_enum(int ncid, nc_type base_typeid, char *name, nc_type *typeidp) nogil + int nc_insert_enum(int ncid, nc_type xtype, char *name, void *value) nogil int nc_inq_enum(int ncid, nc_type xtype, char *name, nc_type *base_nc_typep,\ size_t *base_sizep, size_t *num_membersp) nogil int nc_inq_enum_member(int ncid, nc_type xtype, int idx, char *name, void *value) nogil - int nc_inq_enum_ident(int ncid, nc_type xtype, long long value, char *identifier) nogil -IF HAS_NC_OPEN_MEM: - cdef extern from "netcdf_mem.h": - int nc_open_mem(const char *path, int mode, size_t size, void* memory, int *ncidp) +cdef extern from "mpi-compat.h": + pass -IF HAS_NC_PAR: - cdef extern from "mpi-compat.h": pass - cdef extern from "netcdf_par.h": - ctypedef int MPI_Comm - ctypedef int MPI_Info - int nc_create_par(char *path, int cmode, MPI_Comm comm, MPI_Info info, int *ncidp); - int nc_open_par(char *path, int mode, MPI_Comm comm, MPI_Info info, int *ncidp); - int nc_var_par_access(int ncid, int varid, int par_access); - cdef enum: - NC_COLLECTIVE - NC_INDEPENDENT - cdef extern from "netcdf.h": - cdef enum: - NC_MPIIO - NC_PNETCDF # taken from numpy.pxi in numpy 1.0rc2. cdef extern from "numpy/arrayobject.h": ctypedef int npy_intp ctypedef extern class numpy.ndarray [object PyArrayObject]: - cdef char *data - cdef int nd - cdef npy_intp *dimensions - cdef npy_intp *strides - cdef object base -# cdef dtype descr - cdef int flags - npy_intp PyArray_SIZE(ndarray arr) - npy_intp PyArray_ISCONTIGUOUS(ndarray arr) - npy_intp PyArray_ISALIGNED(ndarray arr) + pass + npy_intp PyArray_SIZE(ndarray arr) nogil + npy_intp PyArray_ISCONTIGUOUS(ndarray arr) nogil + npy_intp PyArray_ISALIGNED(ndarray arr) nogil + void* PyArray_DATA(ndarray) nogil + char* PyArray_BYTES(ndarray) nogil + npy_intp* PyArray_STRIDES(ndarray) nogil void import_array() + + +include "parallel_support_imports.pxi" + +# Compatibility shims +cdef extern from "netcdf-compat.h": + int nc_rename_grp(int grpid, char *name) nogil + int nc_set_alignment(int threshold, int alignment) + int nc_get_alignment(int *threshold, int *alignment) + int nc_rc_set(char* key, char* value) nogil + const_char_ptr *nc_rc_get(char* key) + + int nc_open_mem(const char *path, int mode, size_t size, void* memory, int *ncidp) nogil + int nc_create_mem(const char *path, int mode, size_t initialize, int *ncidp) nogil + ctypedef struct NC_memio: + size_t size + void* memory + int flags + int nc_close_memio(int ncid, NC_memio* info) nogil + + # Quantize shims + int nc_def_var_quantize(int ncid, int varid, int quantize_mode, int nsd) nogil + int nc_inq_var_quantize(int ncid, int varid, int *quantize_modep, int *nsdp) nogil + + # Filter shims + int nc_inq_filter_avail(int ncid, unsigned filterid) nogil + + int nc_def_var_szip(int ncid, int varid, int options_mask, + int pixels_per_bloc) nogil + int nc_inq_var_szip(int ncid, int varid, int *options_maskp, + int *pixels_per_blockp) nogil + + int nc_def_var_zstandard(int ncid, int varid, int level) nogil + int nc_inq_var_zstandard(int ncid, int varid, int* hasfilterp, int *levelp) nogil + + int nc_def_var_bzip2(int ncid, int varid, int level) nogil + int nc_inq_var_bzip2(int ncid, int varid, int* hasfilterp, int *levelp) nogil + + int nc_def_var_blosc(int ncid, int varid, unsigned subcompressor, unsigned level, + unsigned blocksize, unsigned addshuffle) nogil + int nc_inq_var_blosc(int ncid, int varid, int* hasfilterp, unsigned* subcompressorp, + unsigned* levelp, unsigned* blocksizep, + unsigned* addshufflep) nogil + + # Parallel shims + int nc_create_par(char *path, int cmode, MPI_Comm comm, MPI_Info info, int *ncidp) nogil + int nc_open_par(char *path, int mode, MPI_Comm comm, MPI_Info info, int *ncidp) nogil + int nc_var_par_access(int ncid, int varid, int par_access) nogil + + cdef enum: + HAS_RENAME_GRP + HAS_NC_INQ_PATH + HAS_NC_INQ_FORMAT_EXTENDED + HAS_NC_OPEN_MEM + HAS_NC_CREATE_MEM + HAS_CDF5_FORMAT + HAS_PARALLEL_SUPPORT + HAS_PARALLEL4_SUPPORT + HAS_PNETCDF_SUPPORT + HAS_SZIP_SUPPORT + HAS_QUANTIZATION_SUPPORT + HAS_ZSTANDARD_SUPPORT + HAS_BZIP2_SUPPORT + HAS_BLOSC_SUPPORT + HAS_SET_ALIGNMENT + HAS_NCFILTER + HAS_NCRCSET + + NC_NOQUANTIZE + NC_QUANTIZE_BITGROOM + NC_QUANTIZE_GRANULARBR + NC_QUANTIZE_BITROUND + + H5Z_FILTER_SZIP + H5Z_FILTER_ZSTD + H5Z_FILTER_BZIP2 + H5Z_FILTER_BLOSC + + NC_COLLECTIVE + NC_INDEPENDENT + + NC_MPIIO + NC_MPIPOSIX + NC_PNETCDF + + +# Declarations for handling complex numbers +cdef extern from "nc_complex/nc_complex.h": + bint pfnc_var_is_complex(int ncid, int varid) nogil + bint pfnc_var_is_complex_type(int ncid, int varid) nogil + + int pfnc_get_complex_dim(int ncid, int* nc_dim) nogil + int pfnc_inq_var_complex_base_type(int ncid, int varid, int* nc_typeid) nogil + + int pfnc_inq_varndims (int ncid, int varid, int *ndimsp) nogil + int pfnc_inq_vardimid (int ncid, int varid, int *dimidsp) nogil + + int pfnc_def_var(int ncid, char *name, nc_type xtype, int ndims, + int *dimidsp, int *varidp) nogil + + int pfnc_get_vars(int ncid, int varid, size_t *startp, + size_t *countp, ptrdiff_t *stridep, + void *ip) nogil + + int pfnc_put_vars(int ncid, int varid, size_t *startp, + size_t *countp, ptrdiff_t *stridep, + void *op) nogil + + cdef enum: + PFNC_DOUBLE_COMPLEX + PFNC_DOUBLE_COMPLEX_DIM + PFNC_FLOAT_COMPLEX + PFNC_FLOAT_COMPLEX_DIM diff --git a/include/netcdf-compat.h b/include/netcdf-compat.h new file mode 100644 index 000000000..ccfb8322e --- /dev/null +++ b/include/netcdf-compat.h @@ -0,0 +1,206 @@ +#ifndef NETCDF_COMPAT_H +#define NETCDF_COMPAT_H + +#include +#include + +#define NC_VERSION_EQ(MAJOR, MINOR, PATCH) \ + ((NC_VERSION_MAJOR == (MAJOR)) && \ + (NC_VERSION_MINOR == (MINOR)) && \ + (NC_VERSION_PATCH == (PATCH))) + +#define NC_VERSION_GT(MAJOR, MINOR, PATCH) \ + (NC_VERSION_MAJOR > (MAJOR) || \ + (NC_VERSION_MAJOR == (MAJOR) && \ + (NC_VERSION_MINOR > (MINOR) || \ + (NC_VERSION_MINOR == (MINOR) && \ + (NC_VERSION_PATCH > (PATCH)))))) + +#define NC_VERSION_GE(MAJOR, MINOR, PATCH) \ + (NC_VERSION_GT(MAJOR, MINOR, PATCH) || \ + NC_VERSION_EQ(MAJOR, MINOR, PATCH)) + +#if NC_VERSION_GE(4, 3, 0) +#define HAS_RENAME_GRP 1 +#else +#define HAS_RENAME_GRP 0 +static inline int nc_rename_grp(int grpid, const char* name) { return NC_EINVAL; } +#endif + +#if NC_VERSION_GE(4, 1, 2) +#define HAS_NC_INQ_PATH 1 +#else +#define HAS_NC_INQ_PATH 0 +static inline int nc_inq_path(int ncid, size_t *pathlen, char *path) { + *pathlen = 0; *path = "\0"; return NC_EINVAL; +} +#endif + +#if NC_VERSION_GE(4, 3, 1) +#define HAS_NC_INQ_FORMAT_EXTENDED 1 +#else +#define HAS_NC_INQ_FORMAT_EXTENDED 0 +static inline int nc_inq_format_extended(int ncid, int *formatp, int* modep) { + *formatp = 0; *modep = 0; return NC_EINVAL; +} +#endif + +#if NC_VERSION_GE(4, 9, 0) +#define HAS_SET_ALIGNMENT 1 +#else +#define HAS_SET_ALIGNMENT 0 +static inline int nc_set_alignment(int threshold, int alignment) { return NC_EINVAL; } +static inline int nc_get_alignment(int* thresholdp, int* alignmentp) { + *thresholdp = 0; *alignmentp = 0; return NC_EINVAL; +} +#endif + +#if NC_VERSION_GE(4, 9, 0) +#define HAS_NCRCSET 1 +#else +#define HAS_NCRCSET 0 +static inline int nc_rc_set(const char* key, const char* value) { return NC_EINVAL; } +static inline const char *nc_rc_get(const char* key) { return NULL; } +#endif + +#if NC_VERSION_GE(4, 4, 0) +#include +#define HAS_NC_OPEN_MEM 1 +#else +#define HAS_NC_OPEN_MEM 0 +static inline int nc_open_mem(const char *path, int mode, size_t size, void* memory, int *ncidp) { return NC_EINVAL; } +#endif + +#if NC_VERSION_GE(4, 6, 2) +#define HAS_NC_CREATE_MEM 1 +#else +#define HAS_NC_CREATE_MEM 0 +static inline int nc_create_mem(const char *path, int mode, size_t initialize, int *ncidp) { return NC_EINVAL; } +typedef struct NC_memio { + size_t size; + void* memory; + int flags; +} NC_memio; +static inline int nc_close_memio(int ncid, NC_memio* info) { return NC_EINVAL; } +#endif + +#if defined(NC_HAS_CDF5) && NC_HAS_CDF5 +#define HAS_CDF5_FORMAT 1 +#else +# ifndef NC_HAS_CDF5 +# define NC_64BIT_DATA 0x0020 +# define NC_CDF5 NC_64BIT_DATA +# define NC_FORMAT_64BIT_OFFSET (2) +# define NC_FORMAT_64BIT_DATA (5) +# endif +#define HAS_CDF5_FORMAT 0 +#endif + +#if defined(NC_HAS_PARALLEL) && NC_HAS_PARALLEL +#include +#define HAS_PARALLEL_SUPPORT 1 +#else +#define HAS_PARALLEL_SUPPORT 0 +typedef int MPI_Comm; +typedef int MPI_Info; +static inline int nc_create_par(const char *path, int cmode, MPI_Comm comm, MPI_Info info, int *ncidp) { return NC_EINVAL; } +static inline int nc_open_par(const char *path, int mode, MPI_Comm comm, MPI_Info info, int *ncidp) { return NC_EINVAL; } +static inline int nc_var_par_access(int ncid, int varid, int par_access) { return NC_EINVAL; } +# ifndef NC_INDEPENDENT +# define NC_INDEPENDENT 0 +# define NC_COLLECTIVE 1 +# endif +# ifndef NC_MPIIO +# define NC_MPIIO 0x2000 +# define NC_MPIPOSIX NC_MPIIO +# define NC_PNETCDF (NC_MPIIO) +# endif +#endif + +#if defined(NC_HAS_PARALLEL4) && NC_HAS_PARALLEL4 +#define HAS_PARALLEL4_SUPPORT 1 +#else +#define HAS_PARALLEL4_SUPPORT 0 +#endif + +#if defined(NC_HAS_PNETCDF) && NC_HAS_PNETCDF +#define HAS_PNETCDF_SUPPORT 1 +#else +#define HAS_PNETCDF_SUPPORT 0 +#endif + +#if NC_VERSION_GE(4, 7, 0) +#include +#endif + +#if NC_VERSION_GE(4, 9, 0) +#define HAS_NCFILTER 1 +#else +#define HAS_NCFILTER 0 +static inline int nc_inq_filter_avail(int ncid, unsigned filterid) { return -136; } +#endif + +#if defined(NC_HAS_SZIP) && NC_HAS_SZIP +#define HAS_SZIP_SUPPORT 1 +#else +#define HAS_SZIP_SUPPORT 0 +# ifndef NC_HAS_SZIP +static inline int nc_def_var_szip(int ncid, int varid, int options_mask, int pixels_per_bloc) { return NC_EINVAL; } +# endif +# ifndef H5Z_FILTER_SZIP +# define H5Z_FILTER_SZIP 4 +# endif +#endif + +#if defined(NC_HAS_QUANTIZE) && NC_HAS_QUANTIZE +#define HAS_QUANTIZATION_SUPPORT 1 +#else +#define HAS_QUANTIZATION_SUPPORT 0 +# ifndef NC_HAS_QUANTIZE +static inline int nc_def_var_quantize(int ncid, int varid, int quantize_mode, int nsd) { return NC_EINVAL; } +static inline int nc_inq_var_quantize(int ncid, int varid, int *quantize_modep, int *nsdp) { return NC_EINVAL; } +# define NC_NOQUANTIZE 0 +# define NC_QUANTIZE_BITGROOM 1 +# define NC_QUANTIZE_GRANULARBR 2 +# define NC_QUANTIZE_BITROUND 3 +# endif +#endif + +#if defined(NC_HAS_ZSTD) && NC_HAS_ZSTD +#define HAS_ZSTANDARD_SUPPORT 1 +#else +# ifndef NC_HAS_ZSTD +static inline int nc_def_var_zstandard(int ncid, int varid, int level) { return NC_EINVAL; } +static inline int nc_inq_var_zstandard(int ncid, int varid, int* hasfilterp, int *levelp) { return NC_EINVAL; } +# define H5Z_FILTER_ZSTD 32015 +# endif +#define HAS_ZSTANDARD_SUPPORT 0 +#endif + +#if defined(NC_HAS_BZ2) && NC_HAS_BZ2 +#define HAS_BZIP2_SUPPORT 1 +#else +# ifndef NC_HAS_BZ2 +static inline int nc_def_var_bzip2(int ncid, int varid, int level) { return NC_EINVAL; } +static inline int nc_inq_var_bzip2(int ncid, int varid, int* hasfilterp, int *levelp) { return NC_EINVAL; } +# define H5Z_FILTER_BZIP2 307 +# endif +#define HAS_BZIP2_SUPPORT 0 +#endif + +#if defined(NC_HAS_BLOSC) && NC_HAS_BLOSC +#define HAS_BLOSC_SUPPORT 1 +#else +# ifndef NC_HAS_BLOSC +static inline int nc_def_var_blosc(int ncid, int varid, unsigned subcompressor, unsigned level, unsigned blocksize, unsigned addshuffle) { + return NC_EINVAL; +} +static inline int nc_inq_var_blosc(int ncid, int varid, int* hasfilterp, unsigned* subcompressorp, unsigned* levelp, unsigned* blocksizep, unsigned* addshufflep) { + return NC_EINVAL; +} +# define H5Z_FILTER_BLOSC 32001 +# endif +#define HAS_BLOSC_SUPPORT 0 +#endif + +#endif /* NETCDF_COMPAT_H */ diff --git a/include/no_parallel_support_imports.pxi.in b/include/no_parallel_support_imports.pxi.in new file mode 100644 index 000000000..dd22f1968 --- /dev/null +++ b/include/no_parallel_support_imports.pxi.in @@ -0,0 +1,10 @@ +# Stubs for when parallel support is not enabled + +ctypedef int MPI_Comm +ctypedef int MPI_Info +ctypedef int Comm +ctypedef int Info +cdef MPI_Comm MPI_COMM_WORLD +cdef MPI_Info MPI_INFO_NULL +MPI_COMM_WORLD = 0 +MPI_INFO_NULL = 0 diff --git a/include/parallel_support_imports.pxi.in b/include/parallel_support_imports.pxi.in new file mode 100644 index 000000000..5379bedc4 --- /dev/null +++ b/include/parallel_support_imports.pxi.in @@ -0,0 +1,16 @@ +# Imports and typedefs required at compile time for enabling parallel support + +cimport mpi4py.MPI as MPI +from mpi4py.libmpi cimport ( + MPI_Comm, + MPI_Info, + MPI_Comm_dup, + MPI_Info_dup, + MPI_Comm_free, + MPI_Info_free, + MPI_INFO_NULL, + MPI_COMM_WORLD, +) + +ctypedef MPI.Comm Comm +ctypedef MPI.Info Info diff --git a/netCDF4/__init__.py b/netCDF4/__init__.py deleted file mode 100644 index 42e311374..000000000 --- a/netCDF4/__init__.py +++ /dev/null @@ -1,11 +0,0 @@ -# init for netCDF4. package -# Docstring comes from extension module _netCDF4. -from ._netCDF4 import * -# Need explicit imports for names beginning with underscores -from ._netCDF4 import __doc__, __pdoc__ -from ._netCDF4 import (__version__, __netcdf4libversion__, __hdf5libversion__, - __has_rename_grp__, __has_nc_inq_path__, - __has_nc_inq_format_extended__, __has_nc_open_mem__, - __has_cdf5_format__,__has_nc_par__) -__all__ =\ -['Dataset','Variable','Dimension','Group','MFDataset','MFTime','CompoundType','VLType','date2num','num2date','date2index','stringtochar','chartostring','stringtoarr','getlibversion','EnumType'] diff --git a/netcdftime/__init__.py b/netcdftime/__init__.py deleted file mode 100644 index 8693bb6a2..000000000 --- a/netcdftime/__init__.py +++ /dev/null @@ -1,8 +0,0 @@ -from ._netcdftime import utime, JulianDayFromDate, DateFromJulianDay -from ._netcdftime import _parse_date, date2index, time2index -from ._netcdftime import DatetimeProlepticGregorian as datetime -from ._netcdftime import DatetimeNoLeap, DatetimeAllLeap, Datetime360Day, DatetimeJulian, \ - DatetimeGregorian, DatetimeProlepticGregorian -from ._netcdftime import microsec_units, millisec_units, \ - sec_units, hr_units, day_units, min_units -from ._netcdftime import __version__ diff --git a/netcdftime/_netcdftime.pyx b/netcdftime/_netcdftime.pyx deleted file mode 100644 index 5302684a8..000000000 --- a/netcdftime/_netcdftime.pyx +++ /dev/null @@ -1,1705 +0,0 @@ -""" -Performs conversions of netCDF time coordinate data to/from datetime objects. -""" - -from cpython.object cimport PyObject_RichCompare - -import numpy as np -import math -import numpy -import re - -from datetime import datetime as real_datetime -from datetime import timedelta -import time # strftime - -try: - from itertools import izip as zip -except ImportError: # python 3.x - pass - -microsec_units = ['microseconds','microsecond', 'microsec', 'microsecs'] -millisec_units = ['milliseconds', 'millisecond', 'millisec', 'millisecs'] -sec_units = ['second', 'seconds', 'sec', 'secs', 's'] -min_units = ['minute', 'minutes', 'min', 'mins'] -hr_units = ['hour', 'hours', 'hr', 'hrs', 'h'] -day_units = ['day', 'days', 'd'] -_units = microsec_units+millisec_units+sec_units+min_units+hr_units+day_units -_calendars = ['standard', 'gregorian', 'proleptic_gregorian', - 'noleap', 'julian', 'all_leap', '365_day', '366_day', '360_day'] - -__version__ = '1.4.1' - -# Adapted from http://delete.me.uk/2005/03/iso8601.html -# Note: This regex ensures that all ISO8601 timezone formats are accepted - -# but, due to legacy support for other timestrings, not all incorrect formats can be rejected. -# For example, the TZ spec "+01:0" will still work even though the minutes value is only one character long. -ISO8601_REGEX = re.compile(r"(?P[+-]?[0-9]{1,4})(-(?P[0-9]{1,2})(-(?P[0-9]{1,2})" - r"(((?P.)(?P[0-9]{1,2}):(?P[0-9]{1,2})(:(?P[0-9]{1,2})(\.(?P[0-9]+))?)?)?" - r"((?P.?)(?PZ|(([-+])([0-9]{2})((:([0-9]{2}))|([0-9]{2}))?)))?)?)?)?" - ) -# Note: The re module apparently does not support branch reset groups that allow -# redifinition of the same group name in alternative branches as PCRE does. -# Using two different group names is also somewhat ugly, but other solutions might -# hugely inflate the expression. feel free to contribute a better solution. -TIMEZONE_REGEX = re.compile( - "(?P[+-])(?P[0-9]{2})(?:(?::(?P[0-9]{2}))|(?P[0-9]{2}))?") - -def JulianDayFromDate(date, calendar='standard'): - """ - - creates a Julian Day from a 'datetime-like' object. Returns the fractional - Julian Day (approximately millisecond accuracy). - - if calendar='standard' or 'gregorian' (default), Julian day follows Julian - Calendar on and before 1582-10-5, Gregorian calendar after 1582-10-15. - - if calendar='proleptic_gregorian', Julian Day follows gregorian calendar. - - if calendar='julian', Julian Day follows julian calendar. - - Algorithm: - - Meeus, Jean (1998) Astronomical Algorithms (2nd Edition). Willmann-Bell, - Virginia. p. 63 - - """ - - # based on redate.py by David Finlayson. - - # check if input was scalar and change return accordingly - isscalar = False - try: - date[0] - except: - isscalar = True - - date = np.atleast_1d(np.array(date)) - year = np.empty(len(date), dtype=np.int32) - month = year.copy() - day = year.copy() - hour = year.copy() - minute = year.copy() - second = year.copy() - microsecond = year.copy() - for i, d in enumerate(date): - year[i] = d.year - month[i] = d.month - day[i] = d.day - hour[i] = d.hour - minute[i] = d.minute - second[i] = d.second - microsecond[i] = d.microsecond - # convert years in BC era to astronomical years (so that 1 BC is year zero) - # (fixes issue #596) - year[year < 0] = year[year < 0] + 1 - # Convert time to fractions of a day - day = day + hour / 24.0 + minute / 1440.0 + (second + microsecond/1.e6) / 86400.0 - - # Start Meeus algorithm (variables are in his notation) - month_lt_3 = month < 3 - month[month_lt_3] = month[month_lt_3] + 12 - year[month_lt_3] = year[month_lt_3] - 1 - - # MC - assure array - # A = np.int64(year / 100) - A = (year / 100).astype(np.int64) - - # MC - # jd = int(365.25 * (year + 4716)) + int(30.6001 * (month + 1)) + \ - # day - 1524.5 - jd = 365. * year + np.int32(0.25 * year + 2000.) + np.int32(30.6001 * (month + 1)) + \ - day + 1718994.5 - - # optionally adjust the jd for the switch from - # the Julian to Gregorian Calendar - # here assumed to have occurred the day after 1582 October 4 - if calendar in ['standard', 'gregorian']: - # MC - do not have to be contiguous dates - # if np.min(jd) >= 2299170.5: - # # 1582 October 15 (Gregorian Calendar) - # B = 2 - A + np.int32(A / 4) - # elif np.max(jd) < 2299160.5: - # # 1582 October 5 (Julian Calendar) - # B = np.zeros(len(jd)) - # else: - # print(date, calendar, jd) - # raise ValueError( - # 'impossible date (falls in gap between end of Julian calendar and beginning of Gregorian calendar') - if np.any((jd >= 2299160.5) & (jd < 2299170.5)): # missing days in Gregorian calendar - raise ValueError( - 'impossible date (falls in gap between end of Julian calendar and beginning of Gregorian calendar') - B = np.zeros(len(jd)) # 1582 October 5 (Julian Calendar) - ii = np.where(jd >= 2299170.5)[0] # 1582 October 15 (Gregorian Calendar) - if ii.size>0: - B[ii] = 2 - A[ii] + np.int32(A[ii] / 4) - elif calendar == 'proleptic_gregorian': - B = 2 - A + np.int32(A / 4) - elif calendar == 'julian': - B = np.zeros(len(jd)) - else: - raise ValueError( - 'unknown calendar, must be one of julian,standard,gregorian,proleptic_gregorian, got %s' % calendar) - - # adjust for Julian calendar if necessary - jd = jd + B - - # Add a small offset (proportional to Julian date) for correct re-conversion. - # This is about 45 microseconds in 2000 for Julian date starting -4712. - # (pull request #433). - eps = np.finfo(float).eps - eps = np.maximum(eps*jd, eps) - jd += eps - - if isscalar: - return jd[0] - else: - return jd - - -cdef _NoLeapDayFromDate(date): - """ - -creates a Julian Day for a calendar with no leap years from a datetime -instance. Returns the fractional Julian Day (approximately millisecond accuracy). - - """ - - year = date.year - month = date.month - day = date.day - hour = date.hour - minute = date.minute - second = date.second - microsecond = date.microsecond - # Convert time to fractions of a day - day = day + hour / 24.0 + minute / 1440.0 + (second + microsecond/1.e6) / 86400.0 - - # Start Meeus algorithm (variables are in his notation) - if (month < 3): - month = month + 12 - year = year - 1 - - jd = int(365. * (year + 4716)) + int(30.6001 * (month + 1)) + \ - day - 1524.5 - - return jd - - -cdef _AllLeapFromDate(date): - """ - -creates a Julian Day for a calendar where all years have 366 days from -a 'datetime-like' object. -Returns the fractional Julian Day (approximately millisecond accuracy). - - """ - - year = date.year - month = date.month - day = date.day - hour = date.hour - minute = date.minute - second = date.second - microsecond = date.microsecond - # Convert time to fractions of a day - day = day + hour / 24.0 + minute / 1440.0 + (second + microsecond/1.e6) / 86400.0 - - # Start Meeus algorithm (variables are in his notation) - if (month < 3): - month = month + 12 - year = year - 1 - - jd = int(366. * (year + 4716)) + int(30.6001 * (month + 1)) + \ - day - 1524.5 - - return jd - - -cdef _360DayFromDate(date): - """ - -creates a Julian Day for a calendar where all months have 30 days from -a 'datetime-like' object. -Returns the fractional Julian Day (approximately millisecond accuracy). - - """ - - year = date.year - month = date.month - day = date.day - hour = date.hour - minute = date.minute - second = date.second - microsecond = date.microsecond - # Convert time to fractions of a day - day = day + hour / 24.0 + minute / 1440.0 + (second + microsecond/1.e6) / 86400.0 - - jd = int(360. * (year + 4716)) + int(30. * (month - 1)) + day - - return jd - - -def DateFromJulianDay(JD, calendar='standard'): - """ - - returns a 'datetime-like' object given Julian Day. Julian Day is a - fractional day with approximately millisecond accuracy. - - if calendar='standard' or 'gregorian' (default), Julian day follows Julian - Calendar on and before 1582-10-5, Gregorian calendar after 1582-10-15. - - if calendar='proleptic_gregorian', Julian Day follows gregorian calendar. - - if calendar='julian', Julian Day follows julian calendar. - - The datetime object is a 'real' datetime object if the date falls in - the Gregorian calendar (i.e. calendar='proleptic_gregorian', or - calendar = 'standard'/'gregorian' and the date is after 1582-10-15). - Otherwise, it's a 'phony' datetime object which is actually an instance - of netcdftime.datetime. - - - Algorithm: - - Meeus, Jean (1998) Astronomical Algorithms (2nd Edition). Willmann-Bell, - Virginia. p. 63 - """ - - # based on redate.py by David Finlayson. - - julian = np.array(JD, dtype=float) - - if np.min(julian) < 0: - raise ValueError('Julian Day must be positive') - - dayofwk = np.atleast_1d(np.int32(np.fmod(np.int32(julian + 1.5), 7))) - # get the day (Z) and the fraction of the day (F) - # add 0.000005 which is 452 ms in case of jd being after - # second 23:59:59 of a day we want to round to the next day see issue #75 - Z = np.atleast_1d(np.int32(np.round(julian))) - F = np.atleast_1d(julian + 0.5 - Z).astype(np.float64) - if calendar in ['standard', 'gregorian']: - # MC - # alpha = int((Z - 1867216.25)/36524.25) - # A = Z + 1 + alpha - int(alpha/4) - alpha = np.int32(((Z - 1867216.) - 0.25) / 36524.25) - A = Z + 1 + alpha - np.int32(0.25 * alpha) - # check if dates before oct 5th 1582 are in the array - ind_before = np.where(julian < 2299160.5)[0] - if len(ind_before) > 0: - A[ind_before] = Z[ind_before] - - elif calendar == 'proleptic_gregorian': - # MC - # alpha = int((Z - 1867216.25)/36524.25) - # A = Z + 1 + alpha - int(alpha/4) - alpha = np.int32(((Z - 1867216.) - 0.25) / 36524.25) - A = Z + 1 + alpha - np.int32(0.25 * alpha) - elif calendar == 'julian': - A = Z - else: - raise ValueError( - 'unknown calendar, must be one of julian,standard,gregorian,proleptic_gregorian, got %s' % calendar) - - B = A + 1524 - # MC - # C = int((B - 122.1)/365.25) - # D = int(365.25 * C) - C = np.atleast_1d(np.int32(6680. + ((B - 2439870.) - 122.1) / 365.25)) - D = np.atleast_1d(np.int32(365 * C + np.int32(0.25 * C))) - E = np.atleast_1d(np.int32((B - D) / 30.6001)) - - # Convert to date - day = np.clip(B - D - np.int64(30.6001 * E) + F, 1, None) - nday = B - D - 123 - dayofyr = nday - 305 - ind_nday_before = np.where(nday <= 305)[0] - if len(ind_nday_before) > 0: - dayofyr[ind_nday_before] = nday[ind_nday_before] + 60 - # MC - # if E < 14: - # month = E - 1 - # else: - # month = E - 13 - - # if month > 2: - # year = C - 4716 - # else: - # year = C - 4715 - month = E - 1 - month[month > 12] = month[month > 12] - 12 - year = C - 4715 - year[month > 2] = year[month > 2] - 1 - year[year <= 0] = year[year <= 0] - 1 - - # a leap year? - leap = np.zeros(len(year),dtype=dayofyr.dtype) - leap[year % 4 == 0] = 1 - if calendar == 'proleptic_gregorian': - leap[(year % 100 == 0) & (year % 400 != 0)] = 0 - elif calendar in ['standard', 'gregorian']: - leap[(year % 100 == 0) & (year % 400 != 0) & (julian < 2299160.5)] = 0 - - inc_idx = np.where((leap == 1) & (month > 2))[0] - dayofyr[inc_idx] = dayofyr[inc_idx] + leap[inc_idx] - - # Subtract the offset from JulianDayFromDate from the microseconds (pull - # request #433). - eps = np.finfo(float).eps - eps = np.maximum(eps*julian, eps) - hour = np.clip((F * 24.).astype(np.int64), 0, 23) - F -= hour / 24. - minute = np.clip((F * 1440.).astype(np.int64), 0, 59) - # this is an overestimation due to added offset in JulianDayFromDate - second = np.clip((F - minute / 1440.) * 86400., 0, None) - microsecond = (second % 1)*1.e6 - # remove the offset from the microsecond calculation. - microsecond = np.clip(microsecond - eps*86400.*1e6, 0, 999999) - - # convert year, month, day, hour, minute, second to int32 - year = year.astype(np.int32) - month = month.astype(np.int32) - day = day.astype(np.int32) - hour = hour.astype(np.int32) - minute = minute.astype(np.int32) - second = second.astype(np.int32) - microsecond = microsecond.astype(np.int32) - - # check if input was scalar and change return accordingly - isscalar = False - try: - JD[0] - except: - isscalar = True - - if calendar in 'proleptic_gregorian': - # FIXME: datetime.datetime does not support years < 1 - if (year < 0).any(): - datetime_type = DatetimeProlepticGregorian - else: - datetime_type = real_datetime - elif calendar in ('standard', 'gregorian'): - # return a 'real' datetime instance if calendar is proleptic - # Gregorian or Gregorian and all dates are after the - # Julian/Gregorian transition - if len(ind_before) == 0: - datetime_type = real_datetime - else: - datetime_type = DatetimeGregorian - elif calendar == "julian": - datetime_type = DatetimeJulian - else: - raise ValueError("unsupported calendar: {0}".format(calendar)) - - if not isscalar: - return np.array([datetime_type(*args) - for args in - zip(year, month, day, hour, minute, second, - microsecond)]) - - else: - return datetime_type(year[0], month[0], day[0], hour[0], - minute[0], second[0], microsecond[0]) - - -cdef _DateFromNoLeapDay(JD): - """ - -returns a 'datetime-like' object given Julian Day for a calendar with no leap -days. Julian Day is a fractional day with approximately millisecond accuracy. - - """ - - # based on redate.py by David Finlayson. - - if JD < 0: - year_offset = int(-JD) // 365 + 1 - JD += year_offset * 365 - else: - year_offset = 0 - - dayofwk = int(math.fmod(int(JD + 1.5), 7)) - (F, Z) = math.modf(JD + 0.5) - Z = int(Z) - A = Z - B = A + 1524 - C = int((B - 122.1) / 365.) - D = int(365. * C) - E = int((B - D) / 30.6001) - - # Convert to date - day = B - D - int(30.6001 * E) + F - nday = B - D - 123 - if nday <= 305: - dayofyr = nday + 60 - else: - dayofyr = nday - 305 - if E < 14: - month = E - 1 - else: - month = E - 13 - - if month > 2: - year = C - 4716 - else: - year = C - 4715 - - # Convert fractions of a day to time - (dfrac, days) = math.modf(day / 1.0) - (hfrac, hours) = math.modf(dfrac * 24.0) - (mfrac, minutes) = math.modf(hfrac * 60.0) - (sfrac, seconds) = math.modf(mfrac * 60.0) - microseconds = sfrac*1.e6 - - if year_offset > 0: - # correct dayofwk - - # 365 mod 7 = 1, so the day of the week changes by one day for - # every year in year_offset - dayofwk -= int(math.fmod(year_offset, 7)) - - if dayofwk < 0: - dayofwk += 7 - - return DatetimeNoLeap(year - year_offset, month, int(days), int(hours), int(minutes), - int(seconds), int(microseconds),dayofwk, dayofyr) - - -cdef _DateFromAllLeap(JD): - """ - -returns a 'datetime-like' object given Julian Day for a calendar where all -years have 366 days. -Julian Day is a fractional day with approximately millisecond accuracy. - - """ - - # based on redate.py by David Finlayson. - - if JD < 0: - raise ValueError('Julian Day must be positive') - - dayofwk = int(math.fmod(int(JD + 1.5), 7)) - (F, Z) = math.modf(JD + 0.5) - Z = int(Z) - A = Z - B = A + 1524 - C = int((B - 122.1) / 366.) - D = int(366. * C) - E = int((B - D) / 30.6001) - - # Convert to date - day = B - D - int(30.6001 * E) + F - nday = B - D - 123 - if nday <= 305: - dayofyr = nday + 60 - else: - dayofyr = nday - 305 - if E < 14: - month = E - 1 - else: - month = E - 13 - if month > 2: - dayofyr = dayofyr + 1 - - if month > 2: - year = C - 4716 - else: - year = C - 4715 - - # Convert fractions of a day to time - (dfrac, days) = math.modf(day / 1.0) - (hfrac, hours) = math.modf(dfrac * 24.0) - (mfrac, minutes) = math.modf(hfrac * 60.0) - (sfrac, seconds) = math.modf(mfrac * 60.0) - microseconds = sfrac*1.e6 - - return DatetimeAllLeap(year, month, int(days), int(hours), int(minutes), - int(seconds), int(microseconds),dayofwk, dayofyr) - - -cdef _DateFrom360Day(JD): - """ - -returns a 'datetime-like' object given Julian Day for a calendar where all -months have 30 days. -Julian Day is a fractional day with approximately millisecond accuracy. - - """ - - if JD < 0: - year_offset = int(-JD) // 360 + 1 - JD += year_offset * 360 - else: - year_offset = 0 - - #jd = int(360. * (year + 4716)) + int(30. * (month - 1)) + day - (F, Z) = math.modf(JD) - year = int((Z - 0.5) / 360.) - 4716 - dayofyr = Z - (year + 4716) * 360 - month = int((dayofyr - 0.5) / 30) + 1 - day = dayofyr - (month - 1) * 30 + F - - # Convert fractions of a day to time - (dfrac, days) = math.modf(day / 1.0) - (hfrac, hours) = math.modf(dfrac * 24.0) - (mfrac, minutes) = math.modf(hfrac * 60.0) - (sfrac, seconds) = math.modf(mfrac * 60.0) - microseconds = sfrac*1.e6 - - return Datetime360Day(year - year_offset, month, int(days), int(hours), int(minutes), - int(seconds), int(microseconds), -1, dayofyr) - - -cdef _dateparse(timestr): - """parse a string of the form time-units since yyyy-mm-dd hh:mm:ss - return a tuple (units,utc_offset, datetimeinstance)""" - timestr_split = timestr.split() - units = timestr_split[0].lower() - if units not in _units: - raise ValueError( - "units must be one of 'seconds', 'minutes', 'hours' or 'days' (or singular version of these), got '%s'" % units) - if timestr_split[1].lower() != 'since': - raise ValueError("no 'since' in unit_string") - # parse the date string. - n = timestr.find('since') + 6 - year, month, day, hour, minute, second, utc_offset = _parse_date( - timestr[n:].strip()) - return units, utc_offset, datetime(year, month, day, hour, minute, second) - - -class utime: - - """ -Performs conversions of netCDF time coordinate -data to/from datetime objects. - -To initialize: C{t = utime(unit_string,calendar='standard')} - -where - -B{C{unit_string}} is a string of the form -C{'time-units since '} defining the time units. - -Valid time-units are days, hours, minutes and seconds (the singular forms -are also accepted). An example unit_string would be C{'hours -since 0001-01-01 00:00:00'}. - -The B{C{calendar}} keyword describes the calendar used in the time calculations. -All the values currently defined in the U{CF metadata convention -} -are accepted. The default is C{'standard'}, which corresponds to the mixed -Gregorian/Julian calendar used by the C{udunits library}. Valid calendars -are: - -C{'gregorian'} or C{'standard'} (default): - -Mixed Gregorian/Julian calendar as defined by udunits. - -C{'proleptic_gregorian'}: - -A Gregorian calendar extended to dates before 1582-10-15. That is, a year -is a leap year if either (i) it is divisible by 4 but not by 100 or (ii) -it is divisible by 400. - -C{'noleap'} or C{'365_day'}: - -Gregorian calendar without leap years, i.e., all years are 365 days long. -all_leap or 366_day Gregorian calendar with every year being a leap year, -i.e., all years are 366 days long. - -C{'360_day'}: - -All years are 360 days divided into 30 day months. - -C{'julian'}: - -Proleptic Julian calendar, extended to dates after 1582-10-5. A year is a -leap year if it is divisible by 4. - -The C{L{num2date}} and C{L{date2num}} class methods can used to convert datetime -instances to/from the specified time units using the specified calendar. - -The datetime instances returned by C{num2date} are 'real' python datetime -objects if the date falls in the Gregorian calendar (i.e. -C{calendar='proleptic_gregorian', 'standard'} or C{'gregorian'} and -the date is after 1582-10-15). Otherwise, they are 'phony' datetime -objects which are actually instances of C{L{netcdftime.datetime}}. This is -because the python datetime module cannot handle the weird dates in some -calendars (such as C{'360_day'} and C{'all_leap'}) which don't exist in any real -world calendar. - - -Example usage: - ->>> from netcdftime import utime ->>> from datetime import datetime ->>> cdftime = utime('hours since 0001-01-01 00:00:00') ->>> date = datetime.now() ->>> print date -2016-10-05 08:46:27.245015 ->>> ->>> t = cdftime.date2num(date) ->>> print t -17669840.7742 ->>> ->>> date = cdftime.num2date(t) ->>> print date -2016-10-05 08:46:27.244996 ->>> - -The resolution of the transformation operation is approximately a millisecond. - -Warning: Dates between 1582-10-5 and 1582-10-15 do not exist in the -C{'standard'} or C{'gregorian'} calendars. An exception will be raised if you pass -a 'datetime-like' object in that range to the C{L{date2num}} class method. - -Words of Wisdom from the British MetOffice concerning reference dates: - -"udunits implements the mixed Gregorian/Julian calendar system, as -followed in England, in which dates prior to 1582-10-15 are assumed to use -the Julian calendar. Other software cannot be relied upon to handle the -change of calendar in the same way, so for robustness it is recommended -that the reference date be later than 1582. If earlier dates must be used, -it should be noted that udunits treats 0 AD as identical to 1 AD." - -@ivar origin: datetime instance defining the origin of the netCDF time variable. -@ivar calendar: the calendar used (as specified by the C{calendar} keyword). -@ivar unit_string: a string defining the the netCDF time variable. -@ivar units: the units part of C{unit_string} (i.e. 'days', 'hours', 'seconds'). - """ - - def __init__(self, unit_string, calendar='standard'): - """ -@param unit_string: a string of the form -C{'time-units since '} defining the time units. - -Valid time-units are days, hours, minutes and seconds (the singular forms -are also accepted). An example unit_string would be C{'hours -since 0001-01-01 00:00:00'}. - -@keyword calendar: describes the calendar used in the time calculations. -All the values currently defined in the U{CF metadata convention -} -are accepted. The default is C{'standard'}, which corresponds to the mixed -Gregorian/Julian calendar used by the C{udunits library}. Valid calendars -are: - - C{'gregorian'} or C{'standard'} (default): - Mixed Gregorian/Julian calendar as defined by udunits. - - C{'proleptic_gregorian'}: - A Gregorian calendar extended to dates before 1582-10-15. That is, a year - is a leap year if either (i) it is divisible by 4 but not by 100 or (ii) - it is divisible by 400. - - C{'noleap'} or C{'365_day'}: - Gregorian calendar without leap years, i.e., all years are 365 days long. - - C{'all_leap'} or C{'366_day'}: - Gregorian calendar with every year being a leap year, i.e., - all years are 366 days long. - -C{'360_day'}: - All years are 360 days divided into 30 day months. - -C{'julian'}: - Proleptic Julian calendar, extended to dates after 1582-10-5. A year is a - leap year if it is divisible by 4. - -@returns: A class instance which may be used for converting times from netCDF -units to datetime objects. - """ - calendar = calendar.lower() - if calendar in _calendars: - self.calendar = calendar - else: - raise ValueError( - "calendar must be one of %s, got '%s'" % (str(_calendars), calendar)) - units, tzoffset, self.origin = _dateparse(unit_string) - # real-world calendars limited to positive reference years. - if self.calendar in ['julian', 'standard', 'gregorian', 'proleptic_gregorian']: - if self.origin.year == 0: - msg='zero not allowed as a reference year, does not exist in Julian or Gregorian calendars' - raise ValueError(msg) - elif self.origin.year < 0: - msg='negative reference year in time units, must be >= 1' - raise ValueError(msg) - self.tzoffset = tzoffset # time zone offset in minutes - self.units = units - self.unit_string = unit_string - if self.calendar in ['noleap', '365_day'] and self.origin.month == 2 and self.origin.day == 29: - raise ValueError( - 'cannot specify a leap day as the reference time with the noleap calendar') - if self.calendar == '360_day' and self.origin.day > 30: - raise ValueError( - 'there are only 30 days in every month with the 360_day calendar') - if self.calendar in ['noleap', '365_day']: - self._jd0 = _NoLeapDayFromDate(self.origin) - elif self.calendar in ['all_leap', '366_day']: - self._jd0 = _AllLeapFromDate(self.origin) - elif self.calendar == '360_day': - self._jd0 = _360DayFromDate(self.origin) - else: - self._jd0 = JulianDayFromDate(self.origin, calendar=self.calendar) - - def date2num(self, date): - """ - Returns C{time_value} in units described by L{unit_string}, using - the specified L{calendar}, given a 'datetime-like' object. - - The datetime object must represent UTC with no time-zone offset. - If there is a time-zone offset implied by L{unit_string}, it will - be applied to the returned numeric values. - - Resolution is approximately a millisecond. - - If C{calendar = 'standard'} or C{'gregorian'} (indicating - that the mixed Julian/Gregorian calendar is to be used), an - exception will be raised if the 'datetime-like' object describes - a date between 1582-10-5 and 1582-10-15. - - Works for scalars, sequences and numpy arrays. - Returns a scalar if input is a scalar, else returns a numpy array. - """ - isscalar = False - try: - date[0] - except: - isscalar = True - if not isscalar: - date = numpy.array(date) - shape = date.shape - if self.calendar in ['julian', 'standard', 'gregorian', 'proleptic_gregorian']: - if isscalar: - jdelta = JulianDayFromDate(date, self.calendar) - self._jd0 - else: - jdelta = JulianDayFromDate( - date.flat, self.calendar) - self._jd0 - elif self.calendar in ['noleap', '365_day']: - if isscalar: - if date.month == 2 and date.day == 29: - raise ValueError( - 'there is no leap day in the noleap calendar') - jdelta = _NoLeapDayFromDate(date) - self._jd0 - else: - jdelta = [] - for d in date.flat: - if d.month == 2 and d.day == 29: - raise ValueError( - 'there is no leap day in the noleap calendar') - jdelta.append(_NoLeapDayFromDate(d) - self._jd0) - elif self.calendar in ['all_leap', '366_day']: - if isscalar: - jdelta = _AllLeapFromDate(date) - self._jd0 - else: - jdelta = [_AllLeapFromDate(d) - self._jd0 for d in date.flat] - elif self.calendar == '360_day': - if isscalar: - if date.day > 30: - raise ValueError( - 'there are only 30 days in every month with the 360_day calendar') - jdelta = _360DayFromDate(date) - self._jd0 - else: - jdelta = [] - for d in date.flat: - if d.day > 30: - raise ValueError( - 'there are only 30 days in every month with the 360_day calendar') - jdelta.append(_360DayFromDate(d) - self._jd0) - if not isscalar: - jdelta = numpy.array(jdelta) - # convert to desired units, add time zone offset. - if self.units in microsec_units: - jdelta = jdelta * 86400. * 1.e6 + self.tzoffset * 60. * 1.e6 - elif self.units in millisec_units: - jdelta = jdelta * 86400. * 1.e3 + self.tzoffset * 60. * 1.e3 - elif self.units in sec_units: - jdelta = jdelta * 86400. + self.tzoffset * 60. - elif self.units in min_units: - jdelta = jdelta * 1440. + self.tzoffset - elif self.units in hr_units: - jdelta = jdelta * 24. + self.tzoffset / 60. - elif self.units in day_units: - jdelta = jdelta + self.tzoffset / 1440. - else: - raise ValueError('unsupported time units') - if isscalar: - return jdelta - else: - return numpy.reshape(jdelta, shape) - - def num2date(self, time_value): - """ - Return a 'datetime-like' object given a C{time_value} in units - described by L{unit_string}, using L{calendar}. - - dates are in UTC with no offset, even if L{unit_string} contains - a time zone offset from UTC. - - Resolution is approximately a millisecond. - - Works for scalars, sequences and numpy arrays. - Returns a scalar if input is a scalar, else returns a numpy array. - - The datetime instances returned by C{num2date} are 'real' python datetime - objects if the date falls in the Gregorian calendar (i.e. - C{calendar='proleptic_gregorian'}, or C{calendar = 'standard'/'gregorian'} and - the date is after 1582-10-15). Otherwise, they are 'phony' datetime - objects which are actually instances of netcdftime.datetime. This is - because the python datetime module cannot handle the weird dates in some - calendars (such as C{'360_day'} and C{'all_leap'}) which - do not exist in any real world calendar. - """ - isscalar = False - try: - time_value[0] - except: - isscalar = True - ismasked = False - if hasattr(time_value, 'mask'): - mask = time_value.mask - ismasked = True - if not isscalar: - time_value = numpy.array(time_value, dtype='d') - shape = time_value.shape - # convert to desired units, subtract time zone offset. - if self.units in microsec_units: - jdelta = time_value / 86400000000. - self.tzoffset / 1440. - elif self.units in millisec_units: - jdelta = time_value / 86400000. - self.tzoffset / 1440. - elif self.units in sec_units: - jdelta = time_value / 86400. - self.tzoffset / 1440. - elif self.units in min_units: - jdelta = time_value / 1440. - self.tzoffset / 1440. - elif self.units in hr_units: - jdelta = time_value / 24. - self.tzoffset / 1440. - elif self.units in day_units: - jdelta = time_value - self.tzoffset / 1440. - else: - raise ValueError('unsupported time units') - jd = self._jd0 + jdelta - if self.calendar in ['julian', 'standard', 'gregorian', 'proleptic_gregorian']: - if not isscalar: - if ismasked: - date = [] - for j, m in zip(jd.flat, mask.flat): - if not m: - date.append(DateFromJulianDay(j, self.calendar)) - else: - date.append(None) - else: - date = DateFromJulianDay(jd.flat, self.calendar) - else: - if ismasked and mask.item(): - date = None - else: - date = DateFromJulianDay(jd, self.calendar) - elif self.calendar in ['noleap', '365_day']: - if not isscalar: - date = [_DateFromNoLeapDay(j) for j in jd.flat] - else: - date = _DateFromNoLeapDay(jd) - elif self.calendar in ['all_leap', '366_day']: - if not isscalar: - date = [_DateFromAllLeap(j) for j in jd.flat] - else: - date = _DateFromAllLeap(jd) - elif self.calendar == '360_day': - if not isscalar: - date = [_DateFrom360Day(j) for j in jd.flat] - else: - date = _DateFrom360Day(jd) - if isscalar: - return date - else: - return numpy.reshape(numpy.array(date), shape) - - -cdef _parse_timezone(tzstring): - """Parses ISO 8601 time zone specs into tzinfo offsets - - Adapted from pyiso8601 (http://code.google.com/p/pyiso8601/) - """ - if tzstring == "Z": - return 0 - # This isn't strictly correct, but it's common to encounter dates without - # time zones so I'll assume the default (which defaults to UTC). - if tzstring is None: - return 0 - m = TIMEZONE_REGEX.match(tzstring) - prefix, hours, minutes1, minutes2 = m.groups() - hours = int(hours) -# Note: Minutes don't have to be specified in tzstring, -# so if the group is not found it means minutes is 0. -# Also, due to the timezone regex definition, there are two mutually -# exclusive groups that might hold the minutes value, so check both. - minutes = int(minutes1) if minutes1 is not None else int(minutes2) if minutes2 is not None else 0 - if prefix == "-": - hours = -hours - minutes = -minutes - return minutes + hours * 60. - - -cpdef _parse_date(datestring): - """Parses ISO 8601 dates into datetime objects - - The timezone is parsed from the date string, assuming UTC - by default. - - Adapted from pyiso8601 (http://code.google.com/p/pyiso8601/) - """ - if not isinstance(datestring, str) and not isinstance(datestring, unicode): - raise ValueError("Expecting a string %r" % datestring) - m = ISO8601_REGEX.match(datestring.strip()) - if not m: - raise ValueError("Unable to parse date string %r" % datestring) - groups = m.groupdict() - tzoffset_mins = _parse_timezone(groups["timezone"]) - if groups["hour"] is None: - groups["hour"] = 0 - if groups["minute"] is None: - groups["minute"] = 0 - if groups["second"] is None: - groups["second"] = 0 - # if groups["fraction"] is None: - # groups["fraction"] = 0 - # else: - # groups["fraction"] = int(float("0.%s" % groups["fraction"]) * 1e6) - iyear = int(groups["year"]) - return iyear, int(groups["month"]), int(groups["day"]),\ - int(groups["hour"]), int(groups["minute"]), int(groups["second"]),\ - tzoffset_mins - -cdef _check_index(indices, times, nctime, calendar, select): - """Return True if the time indices given correspond to the given times, - False otherwise. - - Parameters: - - indices : sequence of integers - Positive integers indexing the time variable. - - times : sequence of times. - Reference times. - - nctime : netCDF Variable object - NetCDF time object. - - calendar : string - Calendar of nctime. - - select : string - Index selection method. - """ - N = nctime.shape[0] - if (indices < 0).any(): - return False - - if (indices >= N).any(): - return False - - try: - t = nctime[indices] - nctime = nctime - # WORKAROUND TO CHANGES IN SLICING BEHAVIOUR in 1.1.2 - # this may be unacceptably slow... - # if indices are unsorted, or there are duplicate - # values in indices, read entire time variable into numpy - # array so numpy slicing rules can be used. - except IndexError: - nctime = nctime[:] - t = nctime[indices] -# if fancy indexing not available, fall back on this. -# t=[] -# for ind in indices: -# t.append(nctime[ind]) - - if select == 'exact': - return numpy.all(t == times) - - elif select == 'before': - ta = nctime[numpy.clip(indices + 1, 0, N - 1)] - return numpy.all(t <= times) and numpy.all(ta > times) - - elif select == 'after': - tb = nctime[numpy.clip(indices - 1, 0, N - 1)] - return numpy.all(t >= times) and numpy.all(tb < times) - - elif select == 'nearest': - ta = nctime[numpy.clip(indices + 1, 0, N - 1)] - tb = nctime[numpy.clip(indices - 1, 0, N - 1)] - delta_after = ta - t - delta_before = t - tb - delta_check = numpy.abs(times - t) - return numpy.all(delta_check <= delta_after) and numpy.all(delta_check <= delta_before) - - -def date2index(dates, nctime, calendar=None, select='exact'): - """ - date2index(dates, nctime, calendar=None, select='exact') - - Return indices of a netCDF time variable corresponding to the given dates. - - @param dates: A datetime object or a sequence of datetime objects. - The datetime objects should not include a time-zone offset. - - @param nctime: A netCDF time variable object. The nctime object must have a - C{units} attribute. The entries are assumed to be stored in increasing - order. - - @param calendar: Describes the calendar used in the time calculation. - Valid calendars C{'standard', 'gregorian', 'proleptic_gregorian' - 'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'}. - Default is C{'standard'}, which is a mixed Julian/Gregorian calendar - If C{calendar} is None, its value is given by C{nctime.calendar} or - C{standard} if no such attribute exists. - - @param select: C{'exact', 'before', 'after', 'nearest'} - The index selection method. C{exact} will return the indices perfectly - matching the dates given. C{before} and C{after} will return the indices - corresponding to the dates just before or just after the given dates if - an exact match cannot be found. C{nearest} will return the indices that - correspond to the closest dates. - """ - try: - nctime.units - except AttributeError: - raise AttributeError("netcdf time variable is missing a 'units' attribute") - # Setting the calendar. - if calendar == None: - calendar = getattr(nctime, 'calendar', 'standard') - cdftime = utime(nctime.units,calendar=calendar) - times = cdftime.date2num(dates) - return time2index(times, nctime, calendar=calendar, select=select) - - -def time2index(times, nctime, calendar=None, select='exact'): - """ - time2index(times, nctime, calendar=None, select='exact') - - Return indices of a netCDF time variable corresponding to the given times. - - @param times: A numeric time or a sequence of numeric times. - - @param nctime: A netCDF time variable object. The nctime object must have a - C{units} attribute. The entries are assumed to be stored in increasing - order. - - @param calendar: Describes the calendar used in the time calculation. - Valid calendars C{'standard', 'gregorian', 'proleptic_gregorian' - 'noleap', '365_day', '360_day', 'julian', 'all_leap', '366_day'}. - Default is C{'standard'}, which is a mixed Julian/Gregorian calendar - If C{calendar} is None, its value is given by C{nctime.calendar} or - C{standard} if no such attribute exists. - - @param select: C{'exact', 'before', 'after', 'nearest'} - The index selection method. C{exact} will return the indices perfectly - matching the times given. C{before} and C{after} will return the indices - corresponding to the times just before or just after the given times if - an exact match cannot be found. C{nearest} will return the indices that - correspond to the closest times. - """ - try: - nctime.units - except AttributeError: - raise AttributeError("netcdf time variable is missing a 'units' attribute") - # Setting the calendar. - if calendar == None: - calendar = getattr(nctime, 'calendar', 'standard') - - num = numpy.atleast_1d(times) - N = len(nctime) - - # Trying to infer the correct index from the starting time and the stride. - # This assumes that the times are increasing uniformly. - if len(nctime) >= 2: - t0, t1 = nctime[:2] - dt = t1 - t0 - else: - t0 = nctime[0] - dt = 1. - if select in ['exact', 'before']: - index = numpy.array((num - t0) / dt, int) - elif select == 'after': - index = numpy.array(numpy.ceil((num - t0) / dt), int) - else: - index = numpy.array(numpy.around((num - t0) / dt), int) - - # Checking that the index really corresponds to the given time. - # If the times do not correspond, then it means that the times - # are not increasing uniformly and we try the bisection method. - if not _check_index(index, times, nctime, calendar, select): - - # Use the bisection method. Assumes nctime is ordered. - import bisect - index = numpy.array([bisect.bisect_right(nctime, n) for n in num], int) - before = index == 0 - - index = numpy.array([bisect.bisect_left(nctime, n) for n in num], int) - after = index == N - - if select in ['before', 'exact'] and numpy.any(before): - raise ValueError( - 'Some of the times given are before the first time in `nctime`.') - - if select in ['after', 'exact'] and numpy.any(after): - raise ValueError( - 'Some of the times given are after the last time in `nctime`.') - - # Find the times for which the match is not perfect. - # Use list comprehension instead of the simpler `nctime[index]` since - # not all time objects support numpy integer indexing (eg dap). - index[after] = N - 1 - ncnum = numpy.squeeze([nctime[i] for i in index]) - mismatch = numpy.nonzero(ncnum != num)[0] - - if select == 'exact': - if len(mismatch) > 0: - raise ValueError( - 'Some of the times specified were not found in the `nctime` variable.') - - elif select == 'before': - index[after] = N - index[mismatch] -= 1 - - elif select == 'after': - pass - - elif select == 'nearest': - nearest_to_left = num[mismatch] < numpy.array( - [float(nctime[i - 1]) + float(nctime[i]) for i in index[mismatch]]) / 2. - index[mismatch] = index[mismatch] - 1 * nearest_to_left - - else: - raise ValueError( - "%s is not an option for the `select` argument." % select) - - # Correct for indices equal to -1 - index[before] = 0 - - # convert numpy scalars or single element arrays to python ints. - return _toscalar(index) - - -cdef _toscalar(a): - if a.shape in [(), (1,)]: - return a.item() - else: - return a - -cdef to_tuple(dt): - """Turn a datetime.datetime instance into a tuple of integers. Elements go - in the order of decreasing significance, making it easy to compare - datetime instances. Parts of the state that don't affect ordering - are omitted. Compare to datetime.timetuple().""" - return (dt.year, dt.month, dt.day, dt.hour, dt.minute, - dt.second, dt.microsecond) - -# a cache of converters (utime instances) for different calendars -cdef dict _converters -_converters = {} -for calendar in _calendars: - _converters[calendar] = utime("seconds since 1-1-1", calendar) - -cdef class datetime(object): - """ -The base class implementing most methods of datetime classes that -mimic datetime.datetime but support calendars other than the proleptic -Gregorial calendar. - """ - cdef readonly int year, month, day, hour, minute, dayofwk, dayofyr - cdef readonly int second, microsecond - cdef readonly str calendar - - # Python's datetime.datetime uses the proleptic Gregorian - # calendar. This boolean is used to decide whether a - # netcdftime.datetime instance can be converted to - # datetime.datetime. - cdef readonly bint datetime_compatible - - def __init__(self, int year, int month, int day, int hour=0, int minute=0, int second=0, - int microsecond=0, int dayofwk=-1, int dayofyr=1): - """dayofyr set to 1 by default - otherwise time.strftime will complain""" - - self.year = year - self.month = month - self.day = day - self.hour = hour - self.minute = minute - self.dayofwk = dayofwk - self.dayofyr = dayofyr - self.second = second - self.microsecond = microsecond - self.calendar = "" - - self.datetime_compatible = True - - @property - def format(self): - return '%Y-%m-%d %H:%M:%S' - - def strftime(self, format=None): - if format is None: - format = self.format - return _strftime(self, format) - - def replace(self, **kwargs): - "Return datetime with new specified fields." - args = {"year": self.year, - "month": self.month, - "day": self.day, - "hour": self.hour, - "minute": self.minute, - "second": self.second, - "microsecond": self.microsecond, - "dayofwk": self.dayofwk, - "dayofyr": self.dayofyr} - - for name, value in kwargs.items(): - args[name] = value - - return self.__class__(**args) - - def timetuple(self): - return (self.year, self.month, self.day, self.hour, - self.minute, self.second, self.dayofwk, self.dayofyr, -1) - - cpdef _to_real_datetime(self): - return real_datetime(self.year, self.month, self.day, - self.hour, self.minute, self.second, - self.microsecond) - - def __repr__(self): - return "{0}.{1}{2}".format(self.__class__.__module__, - self.__class__.__name__, - self._getstate()) - - def __str__(self): - return self.strftime(self.format) - - def __hash__(self): - try: - d = self._to_real_datetime() - except ValueError: - return hash(self.timetuple()) - return hash(d) - - cdef to_tuple(self): - return (self.year, self.month, self.day, self.hour, self.minute, - self.second, self.microsecond) - - def __richcmp__(self, other, int op): - cdef datetime dt, dt_other - dt = self - if isinstance(other, datetime): - dt_other = other - # comparing two datetime instances - if dt.calendar == dt_other.calendar: - return PyObject_RichCompare(dt.to_tuple(), dt_other.to_tuple(), op) - else: - # Note: it *is* possible to compare datetime - # instances that use difference calendars by using - # utime.date2num(), but this implementation does - # not attempt it. - raise TypeError("cannot compare {0!r} and {1!r} (different calendars)".format(dt, dt_other)) - elif isinstance(other, real_datetime): - # comparing datetime and real_datetime - if not dt.datetime_compatible: - raise TypeError("cannot compare {0!r} and {1!r} (different calendars)".format(self, other)) - return PyObject_RichCompare(dt.to_tuple(), to_tuple(other), op) - else: - raise TypeError("cannot compare {0!r} and {1!r}".format(self, other)) - - cdef _getstate(self): - return (self.year, self.month, self.day, self.hour, - self.minute, self.second, self.microsecond, - self.dayofwk, self.dayofyr) - - def __reduce__(self): - """special method that allows instance to be pickled""" - return (self.__class__, self._getstate()) - - cdef _add_timedelta(self, other): - return NotImplemented - - def __add__(self, other): - cdef datetime dt - if isinstance(self, datetime) and isinstance(other, timedelta): - dt = self - delta = other - elif isinstance(self, timedelta) and isinstance(other, datetime): - dt = other - delta = self - else: - return NotImplemented - return dt._add_timedelta(delta) - - def __sub__(self, other): - cdef datetime dt - if isinstance(self, datetime): # left arg is a datetime instance - dt = self - if isinstance(other, datetime): - # datetime - datetime - if dt.calendar != other.calendar: - raise ValueError("cannot compute the time difference between dates with different calendars") - if dt.calendar == "": - raise ValueError("cannot compute the time difference between dates that are not calendar-aware") - converter = _converters[dt.calendar] - return timedelta(seconds=converter.date2num(dt) - converter.date2num(other)) - elif isinstance(other, real_datetime): - # datetime - real_datetime - if not dt.datetime_compatible: - raise ValueError("cannot compute the time difference between dates with different calendars") - return dt._to_real_datetime() - other - elif isinstance(other, timedelta): - # datetime - timedelta - return dt._add_timedelta(-other) - else: - return NotImplemented - else: - if isinstance(self, real_datetime): - # real_datetime - datetime - if not other.datetime_compatible: - raise ValueError("cannot compute the time difference between dates with different calendars") - return self - other._to_real_datetime() - else: - return NotImplemented - -cdef class DatetimeNoLeap(datetime): - """ -Phony datetime object which mimics the python datetime object, -but uses the "noleap" ("365_day") calendar. - """ - def __init__(self, *args, **kwargs): - datetime.__init__(self, *args, **kwargs) - self.calendar = "noleap" - self.datetime_compatible = False - - cdef _add_timedelta(self, delta): - return DatetimeNoLeap(*add_timedelta(self, delta, no_leap, False)) - -cdef class DatetimeAllLeap(datetime): - """ -Phony datetime object which mimics the python datetime object, -but uses the "all_leap" ("366_day") calendar. - """ - def __init__(self, *args, **kwargs): - datetime.__init__(self, *args, **kwargs) - self.calendar = "all_leap" - self.datetime_compatible = False - - cdef _add_timedelta(self, delta): - return DatetimeAllLeap(*add_timedelta(self, delta, all_leap, False)) - -cdef class Datetime360Day(datetime): - """ -Phony datetime object which mimics the python datetime object, -but uses the "360_day" calendar. - """ - def __init__(self, *args, **kwargs): - datetime.__init__(self, *args, **kwargs) - self.calendar = "360_day" - self.datetime_compatible = False - - cdef _add_timedelta(self, delta): - return Datetime360Day(*add_timedelta_360_day(self, delta)) - -cdef class DatetimeJulian(datetime): - """ -Phony datetime object which mimics the python datetime object, -but uses the "julian" calendar. - """ - def __init__(self, *args, **kwargs): - datetime.__init__(self, *args, **kwargs) - self.calendar = "julian" - self.datetime_compatible = False - - cdef _add_timedelta(self, delta): - return DatetimeJulian(*add_timedelta(self, delta, is_leap_julian, False)) - -cdef class DatetimeGregorian(datetime): - """ -Phony datetime object which mimics the python datetime object, -but uses the mixed Julian-Gregorian ("standard", "gregorian") calendar. - -The last date of the Julian calendar is 1582-10-4, which is followed -by 1582-10-15, using the Gregorian calendar. - -Instances using the date after 1582-10-15 can be compared to -datetime.datetime instances and used to compute time differences -(datetime.timedelta) by subtracting a DatetimeGregorian instance from -a datetime.datetime instance or vice versa. - """ - def __init__(self, *args, **kwargs): - datetime.__init__(self, *args, **kwargs) - self.calendar = "gregorian" - - # dates after 1582-10-15 can be converted to and compared to - # proleptic Gregorian dates - if self.to_tuple() >= (1582, 10, 15, 0, 0, 0, 0): - self.datetime_compatible = True - else: - self.datetime_compatible = False - - cdef _add_timedelta(self, delta): - return DatetimeGregorian(*add_timedelta(self, delta, is_leap_gregorian, True)) - -cdef class DatetimeProlepticGregorian(datetime): - """ -Phony datetime object which mimics the python datetime object, -but allows for dates that don't exist in the proleptic gregorian calendar. - -Supports timedelta operations by overloading + and -. - -Has strftime, timetuple, replace, __repr__, and __str__ methods. The -format of the string produced by __str__ is controlled by self.format -(default %Y-%m-%d %H:%M:%S). Supports comparisons with other phony -datetime instances using the same calendar; comparison with -datetime.datetime instances is possible for netcdftime.datetime -instances using 'gregorian' and 'proleptic_gregorian' calendars. - -Instance variables are year,month,day,hour,minute,second,microsecond,dayofwk,dayofyr, -format, and calendar. - """ - def __init__(self, *args, **kwargs): - datetime.__init__(self, *args, **kwargs) - self.calendar = "proleptic_gregorian" - self.datetime_compatible = True - - cdef _add_timedelta(self, delta): - return DatetimeProlepticGregorian(*add_timedelta(self, delta, - is_leap_proleptic_gregorian, False)) - -_illegal_s = re.compile(r"((^|[^%])(%%)*%s)") - - -cdef _findall(text, substr): - # Also finds overlaps - sites = [] - i = 0 - while 1: - j = text.find(substr, i) - if j == -1: - break - sites.append(j) - i = j + 1 - return sites - -# Every 28 years the calendar repeats, except through century leap -# years where it's 6 years. But only if you're using the Gregorian -# calendar. ;) - - -cdef _strftime(datetime dt, fmt): - if _illegal_s.search(fmt): - raise TypeError("This strftime implementation does not handle %s") - # don't use strftime method at all. - # if dt.year > 1900: - # return dt.strftime(fmt) - - year = dt.year - # For every non-leap year century, advance by - # 6 years to get into the 28-year repeat cycle - delta = 2000 - year - off = 6 * (delta // 100 + delta // 400) - year = year + off - - # Move to around the year 2000 - year = year + ((2000 - year) // 28) * 28 - timetuple = dt.timetuple() - s1 = time.strftime(fmt, (year,) + timetuple[1:]) - sites1 = _findall(s1, str(year)) - - s2 = time.strftime(fmt, (year + 28,) + timetuple[1:]) - sites2 = _findall(s2, str(year + 28)) - - sites = [] - for site in sites1: - if site in sites2: - sites.append(site) - - s = s1 - syear = "%4d" % (dt.year,) - for site in sites: - s = s[:site] + syear + s[site + 4:] - return s - -cdef bint is_leap_julian(int year): - "Return 1 if year is a leap year in the Julian calendar, 0 otherwise." - cdef int y - y = year if year > 0 else year + 1 - return (y % 4) == 0 - -cdef bint is_leap_proleptic_gregorian(int year): - "Return 1 if year is a leap year in the Gregorian calendar, 0 otherwise." - cdef int y - y = year if year > 0 else year + 1 - return (((y % 4) == 0) and ((y % 100) != 0)) or ((y % 400) == 0) - -cdef bint is_leap_gregorian(int year): - return (year > 1582 and is_leap_proleptic_gregorian(year)) or (year < 1582 and is_leap_julian(year)) - -cdef bint all_leap(int year): - "Return True for all years." - return True - -cdef bint no_leap(int year): - "Return False for all years." - return False - -# numbers of days per month for calendars supported by add_timedelta(...) -cdef int[13] month_lengths_365_day, month_lengths_366_day -# Dummy Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec -for j,N in enumerate([-1, 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]): - month_lengths_365_day[j] = N - -# Dummy Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec -for j,N in enumerate([-1, 31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]): - month_lengths_366_day[j] = N - -cdef int* month_lengths(bint (*is_leap)(int), int year): - if is_leap(year): - return month_lengths_366_day - else: - return month_lengths_365_day - -# Add a datetime.timedelta to a netcdftime.datetime instance. Uses -# integer arithmetic to avoid rounding errors and preserve -# microsecond accuracy. -# -# The argument is_leap is the pointer to a function returning 1 for leap years and 0 otherwise. -# -# This implementation supports 365_day (no_leap), 366_day (all_leap), -# julian, proleptic_gregorian, and the mixed Julian/Gregorian -# (standard, gregorian) calendars by using different is_leap and -# julian_gregorian_mixed arguments. -# -# The date of the transition from the Julian to Gregorian calendar and -# the number of invalid dates are hard-wired (1582-10-4 is the last day -# of the Julian calendar, after which follows 1582-10-15). -cdef tuple add_timedelta(datetime dt, delta, bint (*is_leap)(int), bint julian_gregorian_mixed): - cdef int microsecond, second, minute, hour, day, month, year - cdef int delta_microseconds, delta_seconds, delta_days - cdef int* month_length - cdef int extra_days, n_invalid_dates - - # extract these inputs here to avoid type conversion in the code below - delta_microseconds = delta.microseconds - delta_seconds = delta.seconds - delta_days = delta.days - - # shift microseconds, seconds, days - microsecond = dt.microsecond + delta_microseconds - second = dt.second + delta_seconds - minute = dt.minute - hour = dt.hour - day = dt.day - month = dt.month - year = dt.year - - # validate inputs: - if year == 0: - raise ValueError("invalid year in {0!r}".format(dt)) - - month_length = month_lengths(is_leap, year) - - if month < 1 or month > 12: - raise ValueError("invalid month in {0!r}".format(dt)) - - if day < 1 or day > month_length[month]: - raise ValueError("invalid day number in {0!r}".format(dt)) - - if julian_gregorian_mixed and year == 1582 and month == 10 and day > 4 and day < 15: - raise ValueError("{0!r} is not present in the mixed Julian/Gregorian calendar".format(dt)) - - n_invalid_dates = 10 if julian_gregorian_mixed else 0 - - # Normalize microseconds, seconds, minutes, hours. - second += microsecond // 1000000 - microsecond = microsecond % 1000000 - minute += second // 60 - second = second % 60 - hour += minute // 60 - minute = minute % 60 - extra_days = hour // 24 - hour = hour % 24 - - delta_days += extra_days - - while delta_days < 0: - if year == 1582 and month == 10 and day > 14 and day + delta_days < 15: - delta_days -= n_invalid_dates # skip over invalid dates - if day + delta_days < 1: - delta_days += day - # decrement month - month -= 1 - if month < 1: - month = 12 - year -= 1 - if year == 0: - year = -1 - month_length = month_lengths(is_leap, year) - day = month_length[month] - else: - day += delta_days - delta_days = 0 - - while delta_days > 0: - if year == 1582 and month == 10 and day < 5 and day + delta_days > 4: - delta_days += n_invalid_dates # skip over invalid dates - if day + delta_days > month_length[month]: - delta_days -= month_length[month] - (day - 1) - # increment month - month += 1 - if month > 12: - month = 1 - year += 1 - if year == 0: - year = 1 - month_length = month_lengths(is_leap, year) - day = 1 - else: - day += delta_days - delta_days = 0 - - return (year, month, day, hour, minute, second, microsecond, -1, 1) - -# Add a datetime.timedelta to a netcdftime.datetime instance with the 360_day calendar. -# -# Assumes that the 360_day calendar (unlike the rest of supported -# calendars) has the year 0. Also, there are no leap years and all -# months are 30 days long, so we can compute month and year by using -# "//" and "%". -cdef tuple add_timedelta_360_day(datetime dt, delta): - cdef int microsecond, second, minute, hour, day, month, year - cdef int delta_microseconds, delta_seconds, delta_days - - assert dt.month >= 1 and dt.month <= 12 - - # extract these inputs here to avoid type conversion in the code below - delta_microseconds = delta.microseconds - delta_seconds = delta.seconds - delta_days = delta.days - - # shift microseconds, seconds, days - microsecond = dt.microsecond + delta_microseconds - second = dt.second + delta_seconds - minute = dt.minute - hour = dt.hour - day = dt.day + delta_days - month = dt.month - year = dt.year - - # Normalize microseconds, seconds, minutes, hours, days, and months. - second += microsecond // 1000000 - microsecond = microsecond % 1000000 - minute += second // 60 - second = second % 60 - hour += minute // 60 - minute = minute % 60 - day += hour // 24 - hour = hour % 24 - # day and month are counted from 1; all months have 30 days - month += (day - 1) // 30 - day = (day - 1) % 30 + 1 - # all years have 12 months - year += (month - 1) // 12 - month = (month - 1) % 12 + 1 - - return (year, month, day, hour, minute, second, microsecond, -1, 1) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 000000000..fc2d049b0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,171 @@ +[build-system] +requires = [ + "Cython>=0.29", + "numpy>=2.0.0", + "setuptools>=77.0.1", + "setuptools_scm[toml]>=3.4", +] +build-backend = "backend" +backend-path = ["_build"] + +[project] +name = "netCDF4" +description = "Provides an object-oriented python interface to the netCDF version 4 library" +authors = [ + {name = "Jeff Whitaker", email = "whitaker.jeffrey@gmail.com"}, +] +requires-python = ">=3.10" +keywords = [ + "numpy", "netcdf", "data", "science", "network", "oceanography", + "meteorology", "climate", +] +license = "MIT" +license-files = ["LICENSE"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", + "Programming Language :: Python :: 3.14", + "Intended Audience :: Science/Research", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: System :: Archiving :: Compression", + "Operating System :: OS Independent", +] +dependencies = [ + "cftime", + "certifi", + "numpy>=2.3.0; platform_system == 'Windows' and platform_machine == 'ARM64'", + "numpy>=1.21.2; platform_system != 'Windows' or platform_machine != 'ARM64'", +] +dynamic = ["version"] + +[project.optional-dependencies] +tests = [ + "Cython", + "packaging", + "pytest", + "typing-extensions>=4.15.0", +] +parallel = [ + "mpi4py", +] + +[project.readme] +text = """\ +netCDF version 4 has many features not found in earlier versions of the library, +such as hierarchical groups, zlib compression, multiple unlimited dimensions, +and new data types. It is implemented on top of HDF5. This module implements +most of the new features, and can read and write netCDF files compatible with +older versions of the library. The API is modelled after Scientific.IO.NetCDF, +and should be familiar to users of that module. +""" +content-type = "text/x-rst" + +[project.scripts] +nc3tonc4 = "netCDF4.utils:nc3tonc4" +nc4tonc3 = "netCDF4.utils:nc4tonc3" +ncinfo = "netCDF4.utils:ncinfo" + +[project.urls] +Documentation = "https://unidata.github.io/netcdf4-python/" +Repository = "https://github.com/Unidata/netcdf4-python" + +[tool.setuptools.packages.find] +where = ["src"] + +[tool.setuptools.package-data] +"netCDF4.plugins" = ["*__nc*"] + +[tool.setuptools_scm] + +[tool.pytest.ini_options] +pythonpath = ["test"] +filterwarnings = [ + "error", + "ignore::UserWarning", + "ignore::RuntimeWarning", +] + +[tool.mypy] +files = ["src/netCDF4"] +exclude = "utils.py" +check_untyped_defs = true +allow_redefinition = true +# next 2 lines workarounds for mypy dealing with type_guards.py +mypy_path = "test" +explicit_package_bases = true + +[[tool.mypy.overrides]] +ignore_missing_imports = true +module = [ + "cftime.*", + "cython.*", + "filter_availability", + "matplotlib.*" +] + +[tool.cibuildwheel] +build-verbosity = 1 +build-frontend = "build" +skip = [ + "*-musllinux*", +] +test-extras = "tests" +test-sources = [ + "test", + "pyproject.toml" +] +test-command = [ + '''python -c "import netCDF4; print(f'netCDF4 v{netCDF4.__version__}')"''', + "pytest -s -rxs -v test", +] +manylinux-x86_64-image = "ghcr.io/ocefpaf/manylinux_2_28_x86_64-netcdf" +manylinux-aarch64-image = "ghcr.io/ocefpaf/manylinux_2_28_aarch64-netcdf" +environment = {NETCDF4_LIMITED_API="1"} + +[tool.cibuildwheel.macos] +# https://cibuildwheel.pypa.io/en/stable/faq/#macos-passing-dyld_library_path-to-delocate +repair-wheel-command = """\ +DYLD_FALLBACK_LIBRARY_PATH=/Users/runner/micromamba/envs/build/lib \ +delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} \ +""" + +[tool.cibuildwheel.windows] +before-build = "python -m pip install delvewheel" +environment = "PATH=$PATH:$CONDA_PREFIX/bin" +repair-wheel-command = [ + "delvewheel show --include blosc.dll;zstd.dll;lz4.dll {wheel}", + "delvewheel repair --include blosc.dll;zstd.dll;lz4.dll -w {dest_dir} {wheel}", +] + +[[tool.cibuildwheel.overrides]] +select = "*linux*" +inherit.environment = "append" +environment = {NETCDF_PLUGIN_DIR="/usr/local/hdf5/lib/plugin/"} + +[[tool.cibuildwheel.overrides]] +select = "*-macosx_x86_64" +inherit.environment = "append" +environment = {MACOSX_DEPLOYMENT_TARGET="13.0",HDF5_DIR="/Users/runner/micromamba/envs/build",netCDF4_DIR="/Users/runner/micromamba/envs/build",PATH="${PATH}:/Users/runner/micromamba/envs/build/bin",NETCDF_PLUGIN_DIR="/Users/runner/micromamba/envs/build/hdf5/lib/plugin"} + +[[tool.cibuildwheel.overrides]] +select = "*-macosx_arm64" +inherit.environment = "append" +environment = {MACOSX_DEPLOYMENT_TARGET="14.0",HDF5_DIR="/Users/runner/micromambe/envs/build",netCDF4_DIR="/Users/runner/micromambe/envs/build",PATH="${PATH}:/Users/runner/micromamba/envs/build/bin",NETCDF_PLUGIN_DIR="/Users/runner/micromamba/envs/build/hdf5/lib/plugin"} + +[[tool.cibuildwheel.overrides]] +select = "*-win_*" +inherit.environment = "append" +environment = {HDF5_DIR='C:\\\\Users\\runneradmin\\micromamba\\envs\\build\\Library',netCDF4_DIR='C:\\\\Users\\runneradmin\\micromamba\\envs\\build\\Library',PATH='C:\\\\Users\\runneradmin\\micromamba\\envs\\build\\Library\\bin;${PATH}',NETCDF_PLUGIN_DIR='C:\\\\Users\\runneradmin\\micromamba\\envs\\build\\Library\\hdf5\\lib\\plugin'} + +[[tool.cibuildwheel.overrides]] +select = "*-win_arm64" +inherit.environment = "append" +environment = { HDF5_DIR = 'C:\\\\vcpkg\\\\installed\\\\arm64-windows', netCDF4_DIR = 'C:\\\\vcpkg\\\\installed\\\\arm64-windows', PATH = 'C:\\\\vcpkg\\\\installed\\\\arm64-windows\\\\bin;${PATH}', NO_CDL = '1' } +repair-wheel-command = [ + "delvewheel show {wheel}", + "delvewheel repair -w {dest_dir} {wheel}", +] diff --git a/setup.cfg b/setup.cfg index 9b0094d68..c0afd570c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -32,7 +32,7 @@ use_ncconfig=True # use szip_libdir and szip_incdir. #szip_dir = /usr/local # if netcdf lib was build statically with HDF4 support, -# uncomment and set to hdf4 lib (libmfhdf and libdf) nstall location. +# uncomment and set to hdf4 lib (libmfhdf and libdf) install location. # If the libraries and include files are installed in separate locations, # use hdf4_libdir and hdf4_incdir. #hdf4_dir = /usr/local @@ -48,3 +48,10 @@ use_ncconfig=True #curl_dir = /usr/local # location of mpi.h (needed for parallel support) #mpi_incdir=/opt/local/include/mpich-mp +[check-manifest] +ignore = + .gitignore + README.gh-pages + README.release + examples/data/*nc + examples/*ipynb diff --git a/setup.py b/setup.py index 339432c2a..c3b934606 100644 --- a/setup.py +++ b/setup.py @@ -1,34 +1,37 @@ -import os, sys, subprocess +import os, sys, subprocess, glob import os.path as osp +import pathlib +import shutil +import configparser +import sysconfig from setuptools import setup, Extension -from distutils.dist import Distribution - -setuptools_extra_kwargs = { - "install_requires": ["numpy>=1.7"], - "setup_requires": ['setuptools>=18.0', "cython>=0.19"], - "entry_points": { - 'console_scripts': [ - 'ncinfo = netCDF4.utils:ncinfo', - 'nc4tonc3 = netCDF4.utils:nc4tonc3', - 'nc3tonc4 = netCDF4.utils:nc3tonc4', - ] - }, -} - -if sys.version_info[0] < 3: - import ConfigParser as configparser - - open_kwargs = {} +from setuptools.dist import Distribution +from typing import List + + +USE_PY_LIMITED_API = ( + # require opt-in (builds are specialized by default) + os.getenv('NETCDF4_LIMITED_API', '0') == '1' + # Cython + numpy + limited API de facto requires Python >=3.11 + and sys.version_info >= (3, 11) + # as of Python 3.14t, free-threaded builds don't support the limited API + and not sysconfig.get_config_var("Py_GIL_DISABLED") +) +ABI3_TARGET_VERSION = "".join(str(_) for _ in sys.version_info[:2]) +ABI3_TARGET_HEX = hex(sys.hexversion & 0xFFFF00F0) + +if USE_PY_LIMITED_API: + SETUP_OPTIONS = {"bdist_wheel": {"py_limited_api": f"cp{ABI3_TARGET_VERSION}"}} else: - import configparser + SETUP_OPTIONS = {} - open_kwargs = {'encoding': 'utf-8'} +open_kwargs = {'encoding': 'utf-8'} def check_hdf5version(hdf5_includedir): try: f = open(os.path.join(hdf5_includedir, 'H5public.h'), **open_kwargs) - except IOError: + except OSError: return None hdf5_version = None for line in f: @@ -36,11 +39,22 @@ def check_hdf5version(hdf5_includedir): hdf5_version = line.split('"')[1] return hdf5_version +def get_hdf5_version(direc): + # check to see if hdf5 headers in direc, return version number or None + hdf5_version = None + print(f"checking {direc}...") + hdf5_version = check_hdf5version(direc) + if hdf5_version is None: + print(f'hdf5 headers not found in {direc}') + return None + else: + print(f'{hdf5_version} headers found in {direc}') + return hdf5_version def check_ifnetcdf4(netcdf4_includedir): try: f = open(os.path.join(netcdf4_includedir, 'netcdf.h'), **open_kwargs) - except IOError: + except OSError: return False isnetcdf4 = False for line in f: @@ -49,42 +63,23 @@ def check_ifnetcdf4(netcdf4_includedir): return isnetcdf4 -def check_api(inc_dirs): - has_rename_grp = False - has_nc_inq_path = False - has_nc_inq_format_extended = False - has_cdf5_format = False - has_nc_open_mem = False - has_nc_par = False +def check_has_parallel_support(inc_dirs: list) -> bool: + has_parallel_support = False for d in inc_dirs: - try: - f = open(os.path.join(d, 'netcdf.h'), **open_kwargs) - except IOError: + ncmetapath = os.path.join(d,'netcdf_meta.h') + if not os.path.exists(ncmetapath): continue - has_nc_open_mem = os.path.exists(os.path.join(d, 'netcdf_mem.h')) - has_nc_par = os.path.exists(os.path.join(d, 'netcdf_par.h')) + with open(ncmetapath) as f: + for line in f: + if line.startswith('#define NC_HAS_PARALLEL'): + try: + has_parallel_support = bool(int(line.split()[2])) + except ValueError: + pass - for line in f: - if line.startswith('nc_rename_grp'): - has_rename_grp = True - if line.startswith('nc_inq_path'): - has_nc_inq_path = True - if line.startswith('nc_inq_format_extended'): - has_nc_inq_format_extended = True - if line.startswith('#define NC_FORMAT_64BIT_DATA'): - has_cdf5_format = True - - ncmetapath = os.path.join(d,'netcdf_meta.h') - if os.path.exists(ncmetapath): - for line in open(ncmetapath): - if line.startswith('#define NC_HAS_CDF5'): - has_cdf5_format = bool(int(line.split()[2])) - break - - return has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \ - has_cdf5_format, has_nc_open_mem, has_nc_par + return has_parallel_support def getnetcdfvers(libdirs): @@ -128,6 +123,16 @@ def getnetcdfvers(libdirs): return None +def extract_version(CYTHON_FNAME): + version = None + with open(CYTHON_FNAME) as fi: + for line in fi: + if (line.startswith('__version__')): + _, version = line.split('=') + version = version.strip()[1:-1] # Remove quotation characters. + break + return version + HDF5_dir = os.environ.get('HDF5_DIR') HDF5_incdir = os.environ.get('HDF5_INCDIR') @@ -149,183 +154,101 @@ def getnetcdfvers(libdirs): curl_incdir = os.environ.get('CURL_INCDIR') mpi_incdir = os.environ.get('MPI_INCDIR') -USE_NCCONFIG = os.environ.get('USE_NCCONFIG') -if USE_NCCONFIG is not None: - USE_NCCONFIG = bool(int(USE_NCCONFIG)) -USE_SETUPCFG = os.environ.get('USE_SETUPCFG') +USE_NCCONFIG = bool(int(os.environ.get('USE_NCCONFIG', 0))) # override use of setup.cfg with env var. -if USE_SETUPCFG is not None: - USE_SETUPCFG = bool(int(USE_SETUPCFG)) -else: - USE_SETUPCFG = True +USE_SETUPCFG = bool(int(os.environ.get('USE_SETUPCFG', 1))) setup_cfg = 'setup.cfg' # contents of setup.cfg will override env vars, unless -# USE_SETUPCFG evaluates to True. Exception is use_ncconfig, -# which does not take precedence ofver USE_NCCONFIG env var. +# USE_SETUPCFG evaluates to False. ncconfig = None use_ncconfig = None if USE_SETUPCFG and os.path.exists(setup_cfg): - sys.stdout.write('reading from setup.cfg...\n') - config = configparser.SafeConfigParser() + print('reading from setup.cfg...') + config = configparser.ConfigParser() config.read(setup_cfg) - try: - HDF5_dir = config.get("directories", "HDF5_dir") - except: - pass - try: - HDF5_libdir = config.get("directories", "HDF5_libdir") - except: - pass - try: - HDF5_incdir = config.get("directories", "HDF5_incdir") - except: - pass - try: - netCDF4_dir = config.get("directories", "netCDF4_dir") - except: - pass - try: - netCDF4_libdir = config.get("directories", "netCDF4_libdir") - except: - pass - try: - netCDF4_incdir = config.get("directories", "netCDF4_incdir") - except: - pass - try: - szip_dir = config.get("directories", "szip_dir") - except: - pass - try: - szip_libdir = config.get("directories", "szip_libdir") - except: - pass - try: - szip_incdir = config.get("directories", "szip_incdir") - except: - pass - try: - hdf4_dir = config.get("directories", "hdf4_dir") - except: - pass - try: - hdf4_libdir = config.get("directories", "hdf4_libdir") - except: - pass - try: - hdf4_incdir = config.get("directories", "hdf4_incdir") - except: - pass - try: - jpeg_dir = config.get("directories", "jpeg_dir") - except: - pass - try: - jpeg_libdir = config.get("directories", "jpeg_libdir") - except: - pass - try: - jpeg_incdir = config.get("directories", "jpeg_incdir") - except: - pass - try: - curl_dir = config.get("directories", "curl_dir") - except: - pass - try: - curl_libdir = config.get("directories", "curl_libdir") - except: - pass - try: - curl_incdir = config.get("directories", "curl_incdir") - except: - pass - try: - mpi_incdir = config.get("directories","mpi_incdir") - except: - pass - try: - use_ncconfig = config.getboolean("options", "use_ncconfig") - except: - pass - try: - ncconfig = config.get("options", "ncconfig") - except: - pass - -# make sure USE_NCCONFIG from environment takes -# precendence over use_ncconfig from setup.cfg (issue #341). -if USE_NCCONFIG is None and use_ncconfig is not None: - USE_NCCONFIG = use_ncconfig -elif USE_NCCONFIG is None: - USE_NCCONFIG = False + HDF5_dir = config.get("directories", "HDF5_dir", fallback=HDF5_dir) + HDF5_libdir = config.get("directories", "HDF5_libdir", fallback=HDF5_libdir) + HDF5_incdir = config.get("directories", "HDF5_incdir", fallback=HDF5_incdir) + netCDF4_dir = config.get("directories", "netCDF4_dir", fallback=netCDF4_dir) + netCDF4_libdir = config.get("directories", "netCDF4_libdir", fallback=netCDF4_libdir) + netCDF4_incdir = config.get("directories", "netCDF4_incdir", fallback=netCDF4_incdir) + szip_dir = config.get("directories", "szip_dir", fallback=szip_dir) + szip_libdir = config.get("directories", "szip_libdir", fallback=szip_libdir) + szip_incdir = config.get("directories", "szip_incdir", fallback=szip_incdir) + hdf4_dir = config.get("directories", "hdf4_dir", fallback=hdf4_dir) + hdf4_libdir = config.get("directories", "hdf4_libdir", fallback=hdf4_libdir) + hdf4_incdir = config.get("directories", "hdf4_incdir", fallback=hdf4_incdir) + jpeg_dir = config.get("directories", "jpeg_dir", fallback=jpeg_dir) + jpeg_libdir = config.get("directories", "jpeg_libdir", fallback=jpeg_libdir) + jpeg_incdir = config.get("directories", "jpeg_incdir", fallback=jpeg_incdir) + curl_dir = config.get("directories", "curl_dir", fallback=curl_dir) + curl_libdir = config.get("directories", "curl_libdir", fallback=curl_libdir) + curl_incdir = config.get("directories", "curl_incdir", fallback=curl_incdir) + mpi_incdir = config.get("directories","mpi_incdir", fallback=mpi_incdir) + use_ncconfig = config.getboolean("options", "use_ncconfig", fallback=use_ncconfig) + ncconfig = config.get("options", "ncconfig", fallback=ncconfig) -# if USE_NCCONFIG set, and nc-config works, use it. -if USE_NCCONFIG: - # if NETCDF4_DIR env var is set, look for nc-config in NETCDF4_DIR/bin. +try: if ncconfig is None: if netCDF4_dir is not None: ncconfig = os.path.join(netCDF4_dir, 'bin/nc-config') else: # otherwise, just hope it's in the users PATH. ncconfig = 'nc-config' - try: - retcode = subprocess.call([ncconfig, '--libs'], stdout=subprocess.PIPE) - except: - retcode = 1 -else: - retcode = 1 + HAS_NCCONFIG = subprocess.call([ncconfig, '--libs']) == 0 +except OSError: + HAS_NCCONFIG = False + +# make sure USE_NCCONFIG from environment takes +# precedence over use_ncconfig from setup.cfg (issue #341). +if use_ncconfig and not USE_NCCONFIG: + USE_NCCONFIG = use_ncconfig +elif not USE_NCCONFIG: + # if nc-config exists, and USE_NCCONFIG not set, try to use it. + USE_NCCONFIG = HAS_NCCONFIG try: - HAS_PKG_CONFIG = subprocess.call(['pkg-config', '--libs', 'hdf5'], - stdout=subprocess.PIPE) == 0 + HAS_PKG_CONFIG = subprocess.call(['pkg-config', '--libs', 'hdf5']) == 0 except OSError: HAS_PKG_CONFIG = False + +def config_flags(command: List[str], flag: str) -> list: + """Pull out specific flags from a config command (pkg-config or nc-config)""" + flags = subprocess.run(command, capture_output=True, text=True) + return [arg[2:] for arg in flags.stdout.split() if arg.startswith(flag)] + + def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): global HDF5_incdir, HDF5_dir, HDF5_libdir - if HAS_PKG_CONFIG: - dep = subprocess.Popen(['pkg-config', '--cflags', 'hdf5'], - stdout=subprocess.PIPE).communicate()[0] - inc_dirs.extend([str(i[2:].decode()) for i in dep.split() if - i[0:2].decode() == '-I']) - dep = subprocess.Popen(['pkg-config', '--libs', 'hdf5'], - stdout=subprocess.PIPE).communicate()[0] - libs.extend( - [str(l[2:].decode()) for l in dep.split() if l[0:2].decode() == '-l']) - lib_dirs.extend( - [str(l[2:].decode()) for l in dep.split() if l[0:2].decode() == '-L']) - dep = subprocess.Popen(['pkg-config', '--cflags', 'hdf5'], - stdout=subprocess.PIPE).communicate()[0] - inc_dirs.extend( - [str(i[2:].decode()) for i in dep.split() if i[0:2].decode() == '-I']) + nohdf5dirs = HDF5_incdir is None and HDF5_libdir is None and HDF5_dir is None + if HAS_PKG_CONFIG and nohdf5dirs: + # if HDF5 dirs not specified, and pkg-config available, use it + inc_dirs.extend(config_flags(["pkg-config", "--cflags", "hdf5"], "-I")) + libs.extend(config_flags(["pkg-config", "--libs", "hdf5"], "-l")) + lib_dirs.extend(config_flags(["pkg-config", "--libs", "hdf5"], "-L")) else: if HDF5_incdir is None and HDF5_dir is None: - sys.stdout.write(""" - HDF5_DIR environment variable not set, checking some standard locations ..\n""") + print(" HDF5_DIR environment variable not set, checking some standard locations ..") for direc in dirstosearch: - sys.stdout.write('checking %s ...\n' % direc) - hdf5_version = check_hdf5version(os.path.join(direc, 'include')) + hdf5_version = get_hdf5_version(os.path.join(direc, 'include')) if hdf5_version is None: continue else: HDF5_dir = direc HDF5_incdir = os.path.join(direc, 'include') - sys.stdout.write('%s found in %s\n' % - (hdf5_version,HDF5_dir)) + print(f'{hdf5_version} found in {HDF5_dir}') break if HDF5_dir is None: raise ValueError('did not find HDF5 headers') else: if HDF5_incdir is None: HDF5_incdir = os.path.join(HDF5_dir, 'include') - hdf5_version = check_hdf5version(HDF5_incdir) + hdf5_version = get_hdf5_version(HDF5_incdir) if hdf5_version is None: - raise ValueError('did not find HDF5 headers in %s' % HDF5_incdir) - else: - sys.stdout.write('%s found in %s\n' % - (hdf5_version,HDF5_dir)) + raise ValueError(f'did not find HDF5 headers in {HDF5_incdir}') + print(f'{hdf5_version} found in {HDF5_dir}') if HDF5_libdir is None and HDF5_dir is not None: HDF5_libdir = os.path.join(HDF5_dir, 'lib') @@ -336,52 +259,54 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): libs.extend(['hdf5_hl', 'hdf5']) -dirstosearch = [os.path.expanduser('~'), '/usr/local', '/sw', '/opt', - '/opt/local', '/usr'] +dirstosearch = [] +if os.environ.get("CONDA_PREFIX"): + dirstosearch.append(os.environ["CONDA_PREFIX"]) # linux,macosx + dirstosearch.append(os.path.join(os.environ["CONDA_PREFIX"],'Library')) # windows +dirstosearch += [os.path.expanduser('~'), '/usr/local', '/sw', '/opt', + '/opt/local', '/opt/homebrew', '/usr'] -if not retcode: # Try nc-config. - sys.stdout.write('using nc-config ...\n') - dep = subprocess.Popen([ncconfig, '--libs'], - stdout=subprocess.PIPE).communicate()[0] - libs = [str(l[2:].decode()) for l in dep.split() if l[0:2].decode() == '-l'] - lib_dirs = [str(l[2:].decode()) for l in dep.split() if - l[0:2].decode() == '-L'] - dep = subprocess.Popen([ncconfig, '--cflags'], - stdout=subprocess.PIPE).communicate()[0] - inc_dirs = [str(i[2:].decode()) for i in dep.split() if - i[0:2].decode() == '-I'] - - _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs) -elif HAS_PKG_CONFIG: # Try pkg-config. - sys.stdout.write('using pkg-config ...\n') - dep = subprocess.Popen(['pkg-config', '--libs', 'netcdf'], - stdout=subprocess.PIPE).communicate()[0] - libs = [str(l[2:].decode()) for l in dep.split() if l[0:2].decode() == '-l'] - lib_dirs = [str(l[2:].decode()) for l in dep.split() if - l[0:2].decode() == '-L'] +# try nc-config first +if USE_NCCONFIG and HAS_NCCONFIG and ncconfig is not None: + print(f'using {ncconfig}...') + libs = config_flags([ncconfig, "--libs"], "-l") + lib_dirs = config_flags([ncconfig, "--libs"], "-L") + inc_dirs = config_flags([ncconfig, '--cflags'], "-I") - inc_dirs = [] - _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs) -# If nc-config and pkg-config both didn't work (it won't on Windows), fall back on brute force method. + # check to see if hdf5 found in directories returned by nc-config + hdf5_version = None + for direc in inc_dirs: + hdf5_version = get_hdf5_version(direc) + if hdf5_version is not None: + if sys.platform == "cygwin": + _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs) + break + # if hdf5 not found, search other standard locations (including those specified in env vars). + if hdf5_version is None: + print('nc-config did provide path to HDF5 headers, search standard locations...') + _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs) + +# If nc-config doesn't work, fall back on brute force method. else: lib_dirs = [] inc_dirs = [] libs = [] + # _populate_hdf5_info will use HDF5_dir, HDF5_libdir and HDF5_incdir if they are set. + # otherwise pkg-config will be tried, and if that fails, dirstosearch will be searched. _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs) if netCDF4_incdir is None and netCDF4_dir is None: - sys.stdout.write(""" -NETCDF4_DIR environment variable not set, checking standard locations.. \n""") + print("NETCDF4_DIR environment variable not set, checking standard locations..") for direc in dirstosearch: - sys.stdout.write('checking %s ...\n' % direc) + print(f'checking {direc}...') isnetcdf4 = check_ifnetcdf4(os.path.join(direc, 'include')) if not isnetcdf4: continue else: netCDF4_dir = direc netCDF4_incdir = os.path.join(direc, 'include') - sys.stdout.write('netCDF4 found in %s\n' % netCDF4_dir) + print(f'netCDF4 found in {netCDF4_dir}') break if netCDF4_dir is None: raise ValueError('did not find netCDF version 4 headers') @@ -410,7 +335,11 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): if szip_incdir is None and szip_dir is not None: szip_incdir = os.path.join(szip_dir, 'include') if szip_incdir is not None and szip_libdir is not None: - libs.append('sz') + if sys.platform == 'win32': + libs.append('szip') + else: + libs.append('sz') + lib_dirs.append(szip_libdir) inc_dirs.append(szip_incdir) # add hdf4 to link if desired. @@ -442,7 +371,7 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): lib_dirs.append(curl_libdir) inc_dirs.append(curl_incdir) -if sys.platform == 'win32': +if sys.platform == 'win32' or sys.platform == 'cygwin': runtime_lib_dirs = [] else: runtime_lib_dirs = lib_dirs @@ -460,130 +389,122 @@ def _populate_hdf5_info(dirstosearch, inc_dirs, libs, lib_dirs): # get netcdf library version. netcdf_lib_version = getnetcdfvers(lib_dirs) if netcdf_lib_version is None: - sys.stdout.write('unable to detect netcdf library version\n') + print('unable to detect netcdf library version') else: netcdf_lib_version = str(netcdf_lib_version) - sys.stdout.write('using netcdf library version %s\n' % netcdf_lib_version) + print(f'using netcdf library version {netcdf_lib_version}') -cmdclass = {} -netcdf4_src_root = osp.join('netCDF4', '_netCDF4') +DEFINE_MACROS = [("NPY_NO_DEPRECATED_API", "NPY_1_7_API_VERSION")] +netcdf4_src_root = osp.join(osp.join('src','netCDF4'), '_netCDF4') netcdf4_src_c = netcdf4_src_root + '.c' -netcdftime_src_root = osp.join('netcdftime', '_netcdftime') -netcdftime_src_c = netcdftime_src_root + '.c' -if 'sdist' not in sys.argv[1:] and 'clean' not in sys.argv[1:]: - sys.stdout.write('using Cython to compile netCDF4.pyx...\n') +netcdf4_src_pyx = netcdf4_src_root + '.pyx' +if 'sdist' not in sys.argv[1:] and 'clean' not in sys.argv[1:] and '--version' not in sys.argv[1:]: + print('using Cython to compile netCDF4.pyx...') # remove _netCDF4.c file if it exists, so cython will recompile _netCDF4.pyx. # run for build *and* install (issue #263). Otherwise 'pip install' will # not regenerate _netCDF4.c, even if the C lib supports the new features. if len(sys.argv) >= 2: if os.path.exists(netcdf4_src_c): os.remove(netcdf4_src_c) - # same for _netcdftime.c - if os.path.exists(netcdftime_src_c): - os.remove(netcdftime_src_c) - # this determines whether renameGroup and filepath methods will work. - has_rename_grp, has_nc_inq_path, has_nc_inq_format_extended, \ - has_cdf5_format, has_nc_open_mem, has_nc_par = check_api(inc_dirs) + # for netcdf 4.4.x CDF5 format is always enabled. if netcdf_lib_version is not None and\ (netcdf_lib_version > "4.4" and netcdf_lib_version < "4.5"): has_cdf5_format = True - # disable parallel support if mpi4py not available. - try: - import mpi4py - except ImportError: - has_nc_par = False - - f = open(osp.join('include', 'constants.pyx'), 'w') - if has_rename_grp: - sys.stdout.write('netcdf lib has group rename capability\n') - f.write('DEF HAS_RENAME_GRP = 1\n') - else: - sys.stdout.write('netcdf lib does not have group rename capability\n') - f.write('DEF HAS_RENAME_GRP = 0\n') + has_parallel_support = check_has_parallel_support(inc_dirs) + has_has_not = "has" if has_parallel_support else "does not have" + print(f"netcdf lib {has_has_not} parallel functions") - if has_nc_inq_path: - sys.stdout.write('netcdf lib has nc_inq_path function\n') - f.write('DEF HAS_NC_INQ_PATH = 1\n') - else: - sys.stdout.write('netcdf lib does not have nc_inq_path function\n') - f.write('DEF HAS_NC_INQ_PATH = 0\n') - - if has_nc_inq_format_extended: - sys.stdout.write('netcdf lib has nc_inq_format_extended function\n') - f.write('DEF HAS_NC_INQ_FORMAT_EXTENDED = 1\n') - else: - sys.stdout.write( - 'netcdf lib does not have nc_inq_format_extended function\n') - f.write('DEF HAS_NC_INQ_FORMAT_EXTENDED = 0\n') - - if has_nc_open_mem: - sys.stdout.write('netcdf lib has nc_open_mem function\n') - f.write('DEF HAS_NC_OPEN_MEM = 1\n') - else: - sys.stdout.write('netcdf lib does not have nc_open_mem function\n') - f.write('DEF HAS_NC_OPEN_MEM = 0\n') - - if has_cdf5_format: - sys.stdout.write('netcdf lib has cdf-5 format capability\n') - f.write('DEF HAS_CDF5_FORMAT = 1\n') - else: - sys.stdout.write('netcdf lib does not have cdf-5 format capability\n') - f.write('DEF HAS_CDF5_FORMAT = 0\n') - - if has_nc_par: - sys.stdout.write('netcdf lib has netcdf4 parallel functions\n') - f.write('DEF HAS_NC_PAR = 1\n') + if has_parallel_support: + # note(stubbiali): mpi4py is not available when using the in-tree build backend + try: + import mpi4py + except ImportError: + mpi4py = None + + if mpi4py is not None: + inc_dirs.append(mpi4py.get_include()) + # mpi_incdir should not be needed if using nc-config + # (should be included in nc-config --cflags) + if mpi_incdir is not None: + inc_dirs.append(mpi_incdir) + + # Name of file containing imports required for parallel support + parallel_support_imports = "parallel_support_imports.pxi.in" else: - sys.stdout.write('netcdf lib does not have netcdf4 parallel functions\n') - f.write('DEF HAS_NC_PAR = 0\n') - - f.close() - - if has_nc_par: - inc_dirs.append(mpi4py.get_include()) - # mpi_incdir should not be needed if using nc-config - # (should be included in nc-config --cflags) - if mpi_incdir is not None: inc_dirs.append(mpi_incdir) + parallel_support_imports = "no_parallel_support_imports.pxi.in" + + # Copy the specific version of the file containing parallel + # support imports + shutil.copyfile( + osp.join("include", parallel_support_imports), + osp.join("include", "parallel_support_imports.pxi") + ) + + nc_complex_dir = pathlib.Path("./external/nc_complex") + source_files = [ + netcdf4_src_pyx, + str(nc_complex_dir / "src/nc_complex.c"), + ] + include_dirs = inc_dirs + [ + "include", + str(nc_complex_dir / "include"), + str(nc_complex_dir / "include/generated_fallbacks"), + ] + DEFINE_MACROS += [("NC_COMPLEX_NO_EXPORT", "1")] + if USE_PY_LIMITED_API: + DEFINE_MACROS.append(("Py_LIMITED_API", ABI3_TARGET_HEX)) ext_modules = [Extension("netCDF4._netCDF4", - [netcdf4_src_root + '.pyx'], + source_files, + define_macros=DEFINE_MACROS, libraries=libs, library_dirs=lib_dirs, - include_dirs=inc_dirs + ['include'], - runtime_library_dirs=runtime_lib_dirs), - Extension('netcdftime._netcdftime', - [netcdftime_src_root + '.pyx'])] + include_dirs=include_dirs, + runtime_library_dirs=runtime_lib_dirs, + py_limited_api=USE_PY_LIMITED_API)] + # set language_level directive to 3 + for e in ext_modules: + e.cython_directives = {'language_level': "3"} # else: ext_modules = None -setup(name="netCDF4", - cmdclass=cmdclass, - version="1.3.2", - long_description="netCDF version 4 has many features not found in earlier versions of the library, such as hierarchical groups, zlib compression, multiple unlimited dimensions, and new data types. It is implemented on top of HDF5. This module implements most of the new features, and can read and write netCDF files compatible with older versions of the library. The API is modelled after Scientific.IO.NetCDF, and should be familiar to users of that module.\n\nThis project is hosted on a `GitHub repository `_ where you may access the most up-to-date source.", - author="Jeff Whitaker", - author_email="jeffrey.s.whitaker@noaa.gov", - url="http://github.com/Unidata/netcdf4-python", - download_url="http://python.org/pypi/netCDF4", - platforms=["any"], - license="OSI Approved", - description="Provides an object-oriented python interface to the netCDF version 4 library.", - keywords=['numpy', 'netcdf', 'data', 'science', 'network', 'oceanography', - 'meteorology', 'climate'], - classifiers=["Development Status :: 3 - Alpha", - "Programming Language :: Python :: 2", - "Programming Language :: Python :: 2.6", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.3", - "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5", - "Intended Audience :: Science/Research", - "License :: OSI Approved", - "Topic :: Software Development :: Libraries :: Python Modules", - "Topic :: System :: Archiving :: Compression", - "Operating System :: OS Independent"], - packages=['netcdftime', 'netCDF4'], - ext_modules=ext_modules, - **setuptools_extra_kwargs) +# if NETCDF_PLUGIN_DIR set, install netcdf-c compression plugins inside package +# (should point to location of lib__nc* files built by netcdf-c) +copied_plugins=False +if os.environ.get("NETCDF_PLUGIN_DIR"): + plugin_dir = os.environ.get("NETCDF_PLUGIN_DIR") + plugins = glob.glob(os.path.join(plugin_dir, "*__nc*")) + if not plugins: + print('no plugin files in %s, not installing...' % plugin_dir) + if not os.path.exists(plugin_dir): + print('directory %s does not exist!' % plugin_dir) + data_files = [] + else: + data_files = plugins + print(f'installing netcdf compression plugins from {plugin_dir} ...') + sofiles = [os.path.basename(sofilepath) for sofilepath in data_files] + print(repr(sofiles)) + if 'sdist' not in sys.argv[1:] and 'clean' not in sys.argv[1:] and '--version' not in sys.argv[1:]: + for f in data_files: + shutil.copy(f, osp.join(os.getcwd(),osp.join(osp.join('src','netCDF4'),'plugins'))) + copied_plugins=True +else: + print('NETCDF_PLUGIN_DIR not set, no netcdf compression plugins installed') + data_files = [] + +# See pyproject.toml for project metadata +setup( + name="netCDF4", # need by GitHub dependency graph + version=extract_version(netcdf4_src_pyx), + ext_modules=ext_modules, + options=SETUP_OPTIONS, +) + +# remove plugin files copied from outside source tree +if copied_plugins: + for f in sofiles: + filepath = osp.join(osp.join(osp.join('src','netCDF4'),'plugins'),f) + if os.path.exists(filepath): + os.remove(filepath) diff --git a/src/netCDF4/__init__.py b/src/netCDF4/__init__.py new file mode 100644 index 000000000..9049f7a9a --- /dev/null +++ b/src/netCDF4/__init__.py @@ -0,0 +1,28 @@ +# init for netCDF4. package +# if HDF5_PLUGIN_PATH not set, point to package path if plugins live there +import os +pluginpath = os.path.join(__path__[0],'plugins') +if 'HDF5_PLUGIN_PATH' not in os.environ and\ + (os.path.exists(os.path.join(pluginpath,'lib__nczhdf5filters.so')) or\ + os.path.exists(os.path.join(pluginpath,'__nczhdf5filters.dll')) or\ + os.path.exists(os.path.join(pluginpath,'lib__nczhdf5filters.dylib'))): + os.environ['HDF5_PLUGIN_PATH']=pluginpath +# Docstring comes from extension module _netCDF4. +from ._netCDF4 import * +# Need explicit imports for names beginning with underscores +from ._netCDF4 import __doc__ +from ._netCDF4 import (__version__, __netcdf4libversion__, __hdf5libversion__, + __has_rename_grp__, __has_nc_inq_path__, + __has_nc_inq_format_extended__, __has_nc_open_mem__, + __has_nc_create_mem__, __has_cdf5_format__, + __has_parallel4_support__, __has_pnetcdf_support__, + __has_quantization_support__, __has_zstandard_support__, + __has_bzip2_support__, __has_blosc_support__, __has_szip_support__, + __has_set_alignment__, __has_parallel_support__, __has_ncfilter__, __has_nc_rc_set__) +__all__ = [ + 'Dataset', 'Variable', 'Dimension', 'Group', 'MFDataset', 'MFTime', 'CompoundType', + 'VLType', 'date2num', 'num2date', 'date2index', 'stringtochar', 'chartostring', + 'stringtoarr', 'getlibversion', 'EnumType', 'get_chunk_cache', 'set_chunk_cache', + 'set_alignment', 'get_alignment', 'rc_get', 'rc_set', +] +__pdoc__ = {'utils': False} diff --git a/src/netCDF4/__init__.pyi b/src/netCDF4/__init__.pyi new file mode 100644 index 000000000..4b11688b1 --- /dev/null +++ b/src/netCDF4/__init__.pyi @@ -0,0 +1,726 @@ +"""__init__.pyi - Type stubs for the netCDF4 Python package""" +# Notes: +# +# - The stubs in this file are manually-generated and must be updated if and when the API is changed. +# - The following **ruff** commands may be used to format this file according to +# https://typing.readthedocs.io/en/latest/source/stubs.html +# +# ruff format --line-length 130 src/netCDF4/__init__.pyi # format code +# ruff check --line-length 130 --select I --fix src/netCDF4/__init__.pyi # sort imports +# +# - The Variable class is a generic and may thus be statically typed, but this has limited utility for the following reasons: +# - The return type of `Variable.__getitem__()` (and `Variable.getValue()`) depends on a number of factors (e.g. variable +# shape, key shape, whether masking is enabled) that cannot be easily determined statically. +# - Similarly, the types and shapes of data that `Variable.__setitem__()` may accept varies widely depending on many factors +# and is intractable to determine statically. +# - Some facility for automatically typing a Variable on creation has been provided, however it is not exhaustive as a variable +# may created with a string literal indicating its type and it would require an excessive number of overloads to enumerate +# each of these cases. +# - It is not possible to statically type a Variable of any user-defined type (CompoundType, EnumType, VLType) as these types +# are created dynamically. +# Thus it is most often best for the user to implement TypeGuards and/or perform other mixed static/runtime type-checking to +# ensure the type and shape of data retrieved from this library. +# - The return type of some functions or properties (such as `Dataset.__getitem__()`) may one of a number of types. Where it is +# not possible to narrow the type with overloads, the authors of these stubs have generally chosen to use `Any` as the return +# type rather than a union of the possible types. +# - `MFDataset.dimensions` returns `dict[str, Dimension]` and `MFDataset.variables` returns `dict[str, Variable]` even though the +# dict value types may actually be `_Dimension` and `_Variable`, respectively. The original authors of this stubfile have +# elected to do this for simplicity's sake, but it may make sense to change this in the future, or just return `dict[str, Any]`. + +import datetime as dt +import os +import sys +from typing import ( + TYPE_CHECKING, + Any, + Callable, + final, + Final, + Generic, + Iterable, + Iterator, + Literal, + Mapping, + NoReturn, + Sequence, + TypedDict, + TypeVar, + Union, + overload, +) + +import cftime +import numpy as np +import numpy.typing as npt +from typing_extensions import Buffer, Self, TypeAlias, disjoint_base + +__all__ = [ + "Dataset", + "Variable", + "Dimension", + "Group", + "MFDataset", + "MFTime", + "CompoundType", + "VLType", + "date2num", + "num2date", + "rc_get", + "rc_set", + "date2index", + "stringtochar", + "chartostring", + "stringtoarr", + "getlibversion", + "EnumType", + "get_chunk_cache", + "set_chunk_cache", + "set_alignment", + "get_alignment", +] +__pdoc__ = {"utils": False} + +# string type specifiers +# fmt: off +RealTypeLiteral: TypeAlias = Literal[ + "i1", "b", "B", "int8", # NC_BYTE + "u1", "uint8", # NC_UBYTE + "i2", "h", "s", "int16", # NC_SHORT + "u2", "uint16", # NC_USHORT + "i4", "i", "l", "int32", # NC_INT + "u4", "uint32", # NC_UINT + "i8", "int64", "int", # NC_INT64 + "u8", "uint64", # NC_UINT64 + "f4", "f", "float32", # NC_FLOAT + "f8", "d", "float64", "float" # NC_DOUBLE +] +# fmt: on +ComplexTypeLiteral: TypeAlias = Literal["c8", "c16", "complex64", "complex128"] +NumericTypeLiteral: TypeAlias = RealTypeLiteral | ComplexTypeLiteral +CharTypeLiteral: TypeAlias = Literal["S1", "c"] # NC_CHAR +TypeLiteral: TypeAlias = NumericTypeLiteral | CharTypeLiteral + +# Numpy types +NumPyRealType: TypeAlias = ( + np.int8 | np.uint8 | np.int16 | np.uint16 | np.int32 | np.uint32 | np.int64 | np.uint64 | np.float16 | np.float32 | np.float64 +) +NumPyComplexType: TypeAlias = np.complex64 | np.complex128 +NumPyNumericType: TypeAlias = NumPyRealType | NumPyComplexType +# Classes that can create instances of NetCDF user-defined types +NetCDFUDTClass: TypeAlias = CompoundType | VLType | EnumType +# Possible argument types for the datatype argument used in Variable creation. At this time, it is not possible to allow unknown +# strings arguments in the datatype field but exclude and string literals that are not one of `TypeLiteral`, so really +# `TypeLiteral` is made irrelevant, except for anyone who looks at this file. +DatatypeType: TypeAlias = ( + TypeLiteral + | str + | np.dtype[NumPyNumericType | np.str_] + | type[int | float | NumPyNumericType | str | np.str_] + | NetCDFUDTClass +) + +VarT = TypeVar("VarT") +RealVarT = TypeVar("RealVarT", bound=NumPyRealType) +ComplexVarT = TypeVar("ComplexVarT", bound=NumPyComplexType) +NumericVarT = TypeVar("NumericVarT", bound=NumPyNumericType) + +DimensionsType: TypeAlias = Union[str, bytes, Dimension, Iterable[Union[str, bytes, Dimension]]] +CompressionType: TypeAlias = Literal[ + "zlib", "szip", "zstd", "bzip2", "blosc_lz", "blosc_lz4", "blosc_lz4hc", "blosc_zlib", "blosc_zstd" +] +CompressionLevel: TypeAlias = Literal[0, 1, 2, 3, 4, 5, 6, 7, 8, 9] +AccessMode: TypeAlias = Literal["r", "w", "r+", "a", "x", "rs", "ws", "r+s", "as"] +Format: TypeAlias = Literal["NETCDF4", "NETCDF4_CLASSIC", "NETCDF3_CLASSIC", "NETCDF3_64BIT_OFFSET", "NETCDF3_64BIT_DATA"] +DiskFormat: TypeAlias = Literal["NETCDF3", "HDF5", "HDF4", "PNETCDF", "DAP2", "DAP4", "UNDEFINED"] +QuantizeMode: TypeAlias = Literal["BitGroom", "BitRound", "GranularBitRound"] +EndianType: TypeAlias = Literal["native", "little", "big"] +CalendarType: TypeAlias = Literal[ + "standard", "gregorian", "proleptic_gregorian", "noleap", "365_day", "360_day", "julian", "all_leap", "366_day" +] +BoolInt: TypeAlias = Literal[0, 1] + +DateTimeArray: TypeAlias = npt.NDArray[np.object_] +"""numpy array of datetime.datetime or cftime.datetime""" + +class BloscInfo(TypedDict): + compressor: Literal["blosc_lz", "blosc_lz4", "blosc_lz4hc", "blosc_zlib", "blosc_zstd"] + shuffle: Literal[0, 1, 2] + +class SzipInfo(TypedDict): + coding: Literal["nn", "ec"] + pixels_per_block: Literal[4, 8, 16, 32] + +class FiltersDict(TypedDict): + """Dict returned from netCDF4.Variable.filters()""" + + zlib: bool + szip: Literal[False] | SzipInfo + zstd: bool + bzip2: bool + blosc: Literal[False] | BloscInfo + shuffle: bool + complevel: int + fletcher32: bool + +__version__: str +__netcdf4libversion__: str +__hdf5libversion__: str +__has_rename_grp__: BoolInt +__has_nc_inq_path__: BoolInt +__has_nc_inq_format_extended__: BoolInt +__has_nc_open_mem__: BoolInt +__has_nc_create_mem__: BoolInt +__has_cdf5_format__: BoolInt +__has_parallel4_support__: BoolInt +__has_pnetcdf_support__: BoolInt +__has_parallel_support__: BoolInt +__has_quantization_support__: BoolInt +__has_zstandard_support__: BoolInt +__has_bzip2_support__: BoolInt +__has_blosc_support__: BoolInt +__has_szip_support__: BoolInt +__has_set_alignment__: BoolInt +__has_ncfilter__: BoolInt +__has_nc_rc_set__: BoolInt +is_native_little: bool +is_native_big: bool +default_encoding: Final = "utf-8" +unicode_error: Final = "replace" +default_fillvals: dict[str, int | float | str] + +# date2index, date2num, and num2date are actually provided by cftime and if stubs for +# cftime are completed these should be removed. +def date2index( + dates: dt.datetime | cftime.datetime | Sequence[dt.datetime | cftime.datetime] | DateTimeArray, + nctime: Variable, + calendar: CalendarType | str | None = None, + select: Literal["exact", "before", "after", "nearest"] = "exact", + has_year_zero: bool | None = None, +) -> int | npt.NDArray[np.int_]: ... +def date2num( + dates: dt.datetime | cftime.datetime | Sequence[dt.datetime | cftime.datetime] | DateTimeArray, + units: str, + calendar: CalendarType | str | None = None, + has_year_zero: bool | None = None, + longdouble: bool = False, +) -> np.number | npt.NDArray[np.number]: ... +def num2date( + times: Sequence[int | float | np.number] | npt.NDArray[np.number], + units: str, + calendar: CalendarType | str = "standard", + only_use_cftime_datetimes: bool = True, + only_use_python_datetimes: bool = False, + has_year_zero: bool | None = None, +) -> dt.datetime | DateTimeArray: ... + +class NetCDF4MissingFeatureException(Exception): + def __init__(self, feature: str, version: str): ... + +def dtype_is_complex(dtype: str) -> bool: ... + +@disjoint_base +class Dataset: + def __init__( + self, + filename: str | os.PathLike, + mode: AccessMode = "r", + clobber: bool = True, + format: Format = "NETCDF4", + diskless: bool = False, + persist: bool = False, + keepweakref: bool = False, + memory: Buffer | int | None = None, + encoding: str | None = None, + parallel: bool = False, + comm: Any = None, + info: Any = None, + auto_complex: bool = False, + **kwargs: Any, + ): ... + @property + def name(self) -> str: ... + @property + def groups(self) -> dict[str, Group]: ... + @property + def dimensions(self) -> dict[str, Dimension]: ... + @property + def variables(self) -> dict[str, Variable[Any]]: ... + @property + def cmptypes(self) -> dict[str, CompoundType]: ... + @property + def vltypes(self) -> dict[str, VLType]: ... + @property + def enumtypes(self) -> dict[str, EnumType]: ... + @property + def data_model(self) -> Format: ... + @property + def file_format(self) -> Format: ... + @property + def disk_format(self) -> DiskFormat: ... + @property + def parent(self) -> Dataset | None: ... + @property + def path(self) -> str: ... + @property + def keepweakref(self) -> bool: ... + @property + def auto_complex(self) -> bool: ... + @property + def _ncstring_attrs__(self) -> bool: ... + @property + def __orthogonal_indexing__(self) -> bool: ... + def filepath(self, encoding: str | None = None) -> str: ... + def isopen(self) -> bool: ... + def close(self) -> memoryview: ... # only if writing and memory != None, but otherwise people ignore the return None anyway + def sync(self) -> None: ... + def set_fill_on(self) -> None: ... + def set_fill_off(self) -> None: ... + def createDimension(self, dimname: str, size: int | None = None) -> Dimension: ... + def renameDimension(self, oldname: str, newname: str) -> None: ... + @overload + def createVariable( + self, + varname: str, + datatype: np.dtype[NumericVarT] | type[NumericVarT], + dimensions: DimensionsType = (), + compression: CompressionType | None = None, + zlib: bool = False, + complevel: CompressionLevel | None = 4, + shuffle: bool = True, + szip_coding: Literal["nn", "ec"] = "nn", + szip_pixels_per_block: Literal[4, 8, 16, 32] = 8, + blosc_shuffle: Literal[0, 1, 2] = 1, + fletcher32: bool = False, + contiguous: bool = False, + chunksizes: Sequence[int] | None = None, + endian: EndianType = "native", + least_significant_digit: int | None = None, + significant_digits: int | None = None, + quantize_mode: QuantizeMode = "BitGroom", + fill_value: int | float | np.generic | str | bytes | Literal[False] | np.ndarray | None = None, + chunk_cache: int | None = None, + ) -> Variable[NumericVarT]: ... + @overload + def createVariable( + self, + varname: str, + datatype: np.dtype[np.str_] | type[str | np.str_], + dimensions: DimensionsType = (), + compression: CompressionType | None = None, + zlib: bool = False, + complevel: CompressionLevel | None = 4, + shuffle: bool = True, + szip_coding: Literal["nn", "ec"] = "nn", + szip_pixels_per_block: Literal[4, 8, 16, 32] = 8, + blosc_shuffle: Literal[0, 1, 2] = 1, + fletcher32: bool = False, + contiguous: bool = False, + chunksizes: Sequence[int] | None = None, + endian: EndianType = "native", + least_significant_digit: int | None = None, + significant_digits: int | None = None, + quantize_mode: QuantizeMode = "BitGroom", + fill_value: int | float | np.generic | str | bytes | Literal[False] | np.ndarray | None = None, + chunk_cache: int | None = None, + ) -> Variable[str]: ... + @overload + def createVariable( + self, + varname: str, + datatype: DatatypeType, + dimensions: DimensionsType = (), + compression: CompressionType | None = None, + zlib: bool = False, + complevel: CompressionLevel | None = 4, + shuffle: bool = True, + szip_coding: Literal["nn", "ec"] = "nn", + szip_pixels_per_block: Literal[4, 8, 16, 32] = 8, + blosc_shuffle: Literal[0, 1, 2] = 1, + fletcher32: bool = False, + contiguous: bool = False, + chunksizes: Sequence[int] | None = None, + endian: EndianType = "native", + least_significant_digit: int | None = None, + significant_digits: int | None = None, + quantize_mode: QuantizeMode = "BitGroom", + fill_value: int | float | np.generic | str | bytes | Literal[False] | np.ndarray | None = None, + chunk_cache: int | None = None, + ) -> Variable: ... + def renameVariable(self, oldname: str, newname: str) -> None: ... + def createGroup(self, groupname: str) -> Group: ... + def renameGroup(self, oldname: str, newname: str) -> None: ... + def renameAttribute(self, oldname: str, newname: str) -> None: ... + def createCompoundType( + self, datatype: npt.DTypeLike | Sequence[tuple[str, npt.DTypeLike]], datatype_name: str + ) -> CompoundType: ... + def createVLType(self, datatype: npt.DTypeLike, datatype_name: str) -> VLType: ... + def createEnumType( + self, + datatype: np.dtype[np.integer] | type[np.integer] | type[int] | str, + datatype_name: str, + enum_dict: Mapping[str, int | np.integer], + ) -> EnumType: ... + def ncattrs(self) -> list[str]: ... + def setncattr_string(self, name: str, value: Any) -> None: ... + def setncattr(self, name: str, value: Any) -> None: ... + def setncatts(self, attdict: Mapping[str, Any]) -> None: ... + def getncattr(self, name: str, encoding: str = "utf-8") -> Any: ... + def delncattr(self, name: str) -> None: ... + def set_auto_chartostring(self, value: bool) -> None: ... + def set_auto_maskandscale(self, value: bool) -> None: ... + def set_auto_mask(self, value: bool) -> None: ... + def set_auto_scale(self, value: bool) -> None: ... + def set_always_mask(self, value: bool) -> None: ... + def set_ncstring_attrs(self, value: bool) -> None: ... + def get_variables_by_attributes(self, **kwargs: Callable[[Any], bool] | Any) -> list[Variable]: ... + @staticmethod + def fromcdl( + cdlfilename: str | os.PathLike, ncfilename: str | os.PathLike | None = None, mode: AccessMode = "a", format: Format = "NETCDF4" + ) -> Dataset: ... + @overload + def tocdl(self, coordvars: bool = False, data: bool = False, outfile: None = None) -> str: ... + @overload + def tocdl(self, coordvars: bool = False, data: bool = False, *, outfile: str | os.PathLike) -> None: ... + def has_blosc_filter(self) -> bool: ... + def has_zstd_filter(self) -> bool: ... + def has_bzip2_filter(self) -> bool: ... + def has_szip_filter(self) -> bool: ... + def __getitem__(self, elem: str) -> Any: ... # should be Group | Variable, but this causes too many problems + # __iter__ and __contains__ always error because iteration and membership ops are not allowed + def __iter__(self) -> NoReturn: ... + def __contains__(self, key) -> NoReturn: ... + def __setattr__(self, name: str, value: Any) -> None: ... + def __getattr__(self, name: str) -> Any: ... + def __delattr__(self, name: str): ... + def __reduce__(self) -> NoReturn: ... + def __enter__(self) -> Self: ... + def __exit__(self, atype, value, traceback) -> None: ... + +class Group(Dataset): + def __init__(self, parent: Dataset, name: str, **kwargs: Any) -> None: ... + def close(self) -> NoReturn: ... + +@final +class Dimension: + def __init__(self, grp: Dataset, name: str, size: int | None = None, **kwargs: Any) -> None: ... + @property + def name(self) -> str: ... + @property + def size(self) -> int: ... + def group(self) -> Dataset: ... + def isunlimited(self) -> bool: ... + def __len__(self) -> int: ... + +class _VarDatatypeProperty: + # A faux descriptor definition of the property to allow overloads + @overload + def __get__(self, instance: Variable[RealVarT], owner: Any) -> np.dtype[RealVarT]: ... + @overload + def __get__(self, instance: Variable[ComplexVarT], owner: Any) -> CompoundType: ... + @overload + def __get__(self, instance: Variable[str], owner: Any) -> VLType: ... + @overload + def __get__( + self, instance: Variable, owner: Any + ) -> Any: ... # actual return type np.dtype | CompoundType | VLType | EnumType + +class _VarDtypeProperty: + # A faux descriptor definition of the property to allow overloads + @overload + def __get__(self, instance: Variable[NumericVarT], owner: Any) -> np.dtype[NumericVarT]: ... + @overload + def __get__(self, instance: Variable[str], owner: Any) -> type[str]: ... + @overload + def __get__(self, instance: Variable, owner: Any) -> Any: ... # actual return type np.dtype | Type[str] + +@final +class Variable(Generic[VarT]): + # Overloads of __new__ are provided for some cases where the Variable's type may be statically inferred from the datatype arg + @overload + def __new__( + cls, + grp: Dataset, + name: str, + datatype: np.dtype[NumericVarT] | type[NumericVarT], + dimensions: DimensionsType = (), + compression: CompressionType | None = None, + zlib: bool = False, + complevel: CompressionLevel | None = 4, + shuffle: bool = True, + szip_coding: Literal["nn", "ec"] = "nn", + szip_pixels_per_block: Literal[4, 8, 16, 32] = 8, + blosc_shuffle: Literal[0, 1, 2] = 1, + fletcher32: bool = False, + contiguous: bool = False, + chunksizes: Sequence[int] | None = None, + endian: EndianType = "native", + least_significant_digit: int | None = None, + significant_digits: int | None = None, + quantize_mode: QuantizeMode = "BitGroom", + fill_value: int | float | np.generic | str | bytes | Literal[False] | np.ndarray | None = None, + chunk_cache: int | None = None, + **kwargs: Any, + ) -> Variable[NumericVarT]: ... + @overload + def __new__( + cls, + grp: Dataset, + name: str, + datatype: np.dtype[np.str_] | type[str | np.str_], + dimensions: DimensionsType = (), + compression: CompressionType | None = None, + zlib: bool = False, + complevel: CompressionLevel | None = 4, + shuffle: bool = True, + szip_coding: Literal["nn", "ec"] = "nn", + szip_pixels_per_block: Literal[4, 8, 16, 32] = 8, + blosc_shuffle: Literal[0, 1, 2] = 1, + fletcher32: bool = False, + contiguous: bool = False, + chunksizes: Sequence[int] | None = None, + endian: EndianType = "native", + least_significant_digit: int | None = None, + significant_digits: int | None = None, + quantize_mode: QuantizeMode = "BitGroom", + fill_value: int | float | np.generic | str | bytes | Literal[False] | np.ndarray | None = None, + chunk_cache: int | None = None, + **kwargs: Any, + ) -> Variable[str]: ... + @overload + def __new__( + cls, + grp: Dataset, + name: str, + datatype: DatatypeType, + dimensions: DimensionsType = (), + compression: CompressionType | None = None, + zlib: bool = False, + complevel: CompressionLevel | None = 4, + shuffle: bool = True, + szip_coding: Literal["nn", "ec"] = "nn", + szip_pixels_per_block: Literal[4, 8, 16, 32] = 8, + blosc_shuffle: Literal[0, 1, 2] = 1, + fletcher32: bool = False, + contiguous: bool = False, + chunksizes: Sequence[int] | None = None, + endian: EndianType = "native", + least_significant_digit: int | None = None, + significant_digits: int | None = None, + quantize_mode: QuantizeMode = "BitGroom", + fill_value: int | float | np.generic | str | bytes | Literal[False] | np.ndarray | None = None, + chunk_cache: int | None = None, + **kwargs: Any, + ) -> Variable: ... + def __init__( + self, + grp: Dataset, + name: str, + datatype: DatatypeType, + dimensions: DimensionsType = (), + compression: CompressionType | None = None, + zlib: bool = False, + complevel: CompressionLevel | None = 4, + shuffle: bool = True, + szip_coding: Literal["nn", "ec"] = "nn", + szip_pixels_per_block: Literal[4, 8, 16, 32] = 8, + blosc_shuffle: Literal[0, 1, 2] = 1, + fletcher32: bool = False, + contiguous: bool = False, + chunksizes: Sequence[int] | None = None, + endian: EndianType = "native", + least_significant_digit: int | None = None, + significant_digits: int | None = None, + quantize_mode: QuantizeMode = "BitGroom", + fill_value: int | float | np.generic | str | bytes | Literal[False] | np.ndarray | None = None, + chunk_cache: int | None = None, + **kwargs: Any, + ) -> None: ... + datatype: _VarDatatypeProperty + dtype: _VarDtypeProperty + @property + def name(self) -> str: ... + @property + def shape(self) -> tuple[int, ...]: ... + @property + def size(self) -> int: ... + @property + def dimensions(self) -> tuple[str, ...]: ... + @property + def ndim(self) -> int: ... + @property + def scale(self) -> bool: ... + @property + def mask(self) -> bool: ... + @property + def chartostring(self) -> bool: ... + @property + def always_mask(self) -> bool: ... + @property + def __orthogonal_indexing__(self) -> bool: ... + def group(self) -> Dataset: ... + def ncattrs(self) -> list[str]: ... + def setncattr(self, name: str, value: Any) -> None: ... + def setncattr_string(self, name: str, value: Any) -> None: ... + def setncatts(self, attdict: Mapping[str, Any]) -> None: ... + def getncattr(self, name: str, encoding="utf-8"): ... + def delncattr(self, name: str) -> None: ... + def filters(self) -> FiltersDict: ... + def quantization(self) -> tuple[int, QuantizeMode] | None: ... + def endian(self) -> EndianType: ... + def chunking(self) -> Literal["contiguous"] | list[int]: ... + def get_var_chunk_cache(self) -> tuple[int, int, float]: ... + def set_var_chunk_cache( + self, size: int | None = None, nelems: int | None = None, preemption: float | None = None + ) -> None: ... + def renameAttribute(self, oldname: str, newname: str) -> None: ... + def assignValue(self, val: Any) -> None: ... + def getValue(self) -> Any: ... + def get_fill_value(self) -> Any: ... + def set_auto_chartostring(self, chartostring: bool) -> None: ... + def use_nc_get_vars(self, use_nc_get_vars: bool) -> None: ... + def set_auto_maskandscale(self, maskandscale: bool) -> None: ... + def set_auto_scale(self, scale: bool) -> None: ... + def set_auto_mask(self, mask: bool) -> None: ... + def set_always_mask(self, always_mask: bool) -> None: ... + def set_ncstring_attrs(self, ncstring_attrs: bool) -> None: ... + def set_collective(self, value: bool) -> None: ... + def get_dims(self) -> tuple[Dimension, ...]: ... + def __delattr__(self, name: str) -> None: ... + def __setattr__(self, name: str, value: Any) -> None: ... + def __getattr__(self, name: str) -> Any: ... + def __getitem__(self, elem: Any) -> Any: ... + def __setitem__(self, elem: Any, data: npt.ArrayLike) -> None: ... + def __array__(self) -> np.ndarray: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[Any]: ... # faux method so mypy believes Variable is iterable + +@final +class CompoundType: + dtype: np.dtype + dtype_view: np.dtype + name: str + + def __init__( + self, grp: Dataset, dt: npt.DTypeLike | Sequence[tuple[str, npt.DTypeLike]], dtype_name: str, **kwargs: Any + ) -> None: ... + def __reduce__(self) -> NoReturn: ... + +@final +class VLType: + dtype: np.dtype + name: str | None + + def __init__(self, grp: Dataset, dt: npt.DTypeLike, dtype_name: str, **kwargs: Any) -> None: ... + def __reduce__(self) -> NoReturn: ... + +@final +class EnumType: + dtype: np.dtype[np.integer] + name: str + enum_dict: Mapping[str, int] + + def __init__( + self, + grp: Dataset, + dt: np.dtype[np.integer] | type[np.integer] | type[int] | str, + dtype_name: str, + enum_dict: Mapping[str, int | np.integer], + **kwargs: Any, + ) -> None: ... + def __reduce__(self) -> NoReturn: ... + +class MFDataset(Dataset): + def __init__( + self, + files: str | Sequence[str | os.PathLike], + check: bool = False, + aggdim: str | None = None, + exclude: Sequence[str] = [], + master_file: str | os.PathLike | None = None, + ) -> None: ... + @property + def dimensions(self) -> dict[str, Dimension]: ... # this should be: dict[str, Dimension | _Dimension] + @property + def variables(self) -> dict[str, Variable[Any]]: ... # this should be: dict[str, _Variable[Any] | _Variable] + +class _Dimension: + dimlens: list[int] + dimtolen: int + + def __init__(self, dimname: str, dim: Dimension, dimlens: list[int], dimtotlen: int) -> None: ... + def __len__(self) -> int: ... + def isunlimited(self) -> Literal[True]: ... + +class _Variable: + dimensions: tuple[str, ...] + dtype: np.dtype | type[str] + + def __init__(self, dset: Dataset, varname: str, var: Variable[Any], recdimname: str) -> None: ... + + # shape, ndim, and name actually come from __getattr__ + @property + def shape(self) -> tuple[int, ...]: ... + @property + def ndim(self) -> int: ... + @property + def name(self) -> str: ... + def typecode(self) -> np.dtype | type[str]: ... + def ncattrs(self) -> list[str]: ... + def _shape(self) -> tuple[int, ...]: ... + def set_auto_chartostring(self, val: bool) -> None: ... + def set_auto_maskandscale(self, val: bool) -> None: ... + def set_auto_mask(self, val: bool) -> None: ... + def set_auto_scale(self, val: bool) -> None: ... + def set_always_mask(self, val: bool) -> None: ... + def __getattr__(self, name: str) -> Any: ... + def __getitem__(self, elem: Any) -> Any: ... + def __len__(self) -> int: ... + +class MFTime(_Variable): + calendar: CalendarType | None + units: str | None + + def __init__(self, time: Variable, units: str | None = None, calendar: CalendarType | str | None = None): ... + def __getitem__(self, elem: Any) -> np.ndarray: ... + +@overload +def stringtoarr( + string: str, + NUMCHARS: int, + dtype: Literal["S"] | np.dtype[np.bytes_] = "S", +) -> npt.NDArray[np.bytes_]: ... +@overload +def stringtoarr( + string: str, + NUMCHARS: int, + dtype: Literal["U"] | np.dtype[np.str_], +) -> npt.NDArray[np.str_]: ... +@overload +def stringtochar( + a: npt.NDArray[np.character], + encoding: Literal["none", "None", "bytes"], + n_strlen: int | None = None, +) -> npt.NDArray[np.bytes_]: ... +@overload +def stringtochar( + a: npt.NDArray[np.character], + encoding: str | None = None, + n_strlen: int | None = None, +) -> npt.NDArray[np.str_] | npt.NDArray[np.bytes_]: ... +@overload +def chartostring( + b: npt.NDArray[np.character], + encoding: Literal["none", "None", "bytes"] = ..., +) -> npt.NDArray[np.bytes_]: ... +@overload +def chartostring( + b: npt.NDArray[np.character], + encoding: str | None = None, +) -> npt.NDArray[np.str_] | npt.NDArray[np.bytes_]: ... +def getlibversion() -> str: ... +def rc_get(key: str) -> str | None: ... +def rc_set(key: str, value: str) -> None: ... +def set_alignment(threshold: int, alignment: int): ... +def get_alignment() -> tuple[int, int]: ... +def set_chunk_cache(size: int | None = None, nelems: int | None = None, preemption: float | None = None) -> None: ... +def get_chunk_cache() -> tuple[int, int, float]: ... diff --git a/src/netCDF4/_netCDF4.pyi b/src/netCDF4/_netCDF4.pyi new file mode 100644 index 000000000..d0bdb389f --- /dev/null +++ b/src/netCDF4/_netCDF4.pyi @@ -0,0 +1,53 @@ +# The definitions are intentionally done in the __init__. +# This file only exists in case someone imports from netCDF4._netCDF4 +from . import ( + CompoundType, + Dataset, + Dimension, + EnumType, + Group, + MFDataset, + MFTime, + NetCDF4MissingFeatureException, + Variable, + VLType, + __has_blosc_support__, + __has_bzip2_support__, + __has_cdf5_format__, + __has_nc_create_mem__, + __has_nc_inq_format_extended__, + __has_nc_inq_path__, + __has_nc_open_mem__, + __has_nc_rc_set__, + __has_ncfilter__, + __has_parallel4_support__, + __has_parallel_support__, + __has_pnetcdf_support__, + __has_quantization_support__, + __has_rename_grp__, + __has_set_alignment__, + __has_szip_support__, + __has_zstandard_support__, + __hdf5libversion__, + __netcdf4libversion__, + __version__, + chartostring, + date2index, + date2num, + default_encoding, + default_fillvals, + dtype_is_complex, + get_alignment, + get_chunk_cache, + getlibversion, + is_native_big, + is_native_little, + num2date, + rc_get, + rc_set, + set_alignment, + set_chunk_cache, + stringtoarr, + stringtochar, + unicode_error, +) diff --git a/netCDF4/_netCDF4.pyx b/src/netCDF4/_netCDF4.pyx similarity index 54% rename from netCDF4/_netCDF4.pyx rename to src/netCDF4/_netCDF4.pyx index b04354f9e..2f5a133c9 100644 --- a/netCDF4/_netCDF4.pyx +++ b/src/netCDF4/_netCDF4.pyx @@ -1,12 +1,9 @@ -""" -Version 1.3.2 +"""Version 1.7.4.1 ------------- -- - - -Introduction -============ +# Introduction -netcdf4-python is a Python interface to the netCDF C library. +netcdf4-python is a Python interface to the netCDF C library. [netCDF](http://www.unidata.ucar.edu/software/netcdf/) version 4 has many features not found in earlier versions of the library and is implemented on top of @@ -17,7 +14,7 @@ files that are readable by HDF5 clients. The API modelled after and should be familiar to users of that module. Most new features of netCDF 4 are implemented, such as multiple -unlimited dimensions, groups and zlib data compression. All the new +unlimited dimensions, groups and data compression. All the new numeric data types (such as 64 bit and unsigned integer types) are implemented. Compound (struct), variable length (vlen) and enumerated (enum) data types are supported, but not the opaque data type. @@ -25,104 +22,85 @@ Mixtures of compound, vlen and enum data types (such as compound types containing enums, or vlens containing compound types) are not supported. -Download -======== - - - Latest bleeding-edge code from the - [github repository](http://github.com/Unidata/netcdf4-python). - - Latest [releases](https://pypi.python.org/pypi/netCDF4) - (source code and windows installers). - -Requires -======== - - - Python 2.7 or later (python 3 works too). - - [numpy array module](http://numpy.scipy.org), version 1.9.0 or later. - - [Cython](http://cython.org), version 0.21 or later. - - [setuptools](https://pypi.python.org/pypi/setuptools), version 18.0 or - later. - - The HDF5 C library version 1.8.4-patch1 or higher (1.8.x recommended) - from [](ftp://ftp.hdfgroup.org/HDF5/current/src). - ***netCDF version 4.4.1 or higher is recommended if using HDF5 1.10.x - - otherwise resulting files may be unreadable by clients using earlier - versions of HDF5. For netCDF < 4.4.1, HDF5 version 1.8.x is recommended.*** - Be sure to build with `--enable-hl --enable-shared`. - - [Libcurl](http://curl.haxx.se/libcurl), if you want - [OPeNDAP](http://opendap.org) support. - - [HDF4](http://www.hdfgroup.org/products/hdf4), if you want - to be able to read HDF4 "Scientific Dataset" (SD) files. - - The netCDF-4 C library from the [github releases - page](https://github.com/Unidata/netcdf-c/releases). - Version 4.1.1 or higher is required (4.2 or higher recommended). - Be sure to build with `--enable-netcdf-4 --enable-shared`, and set - `CPPFLAGS="-I $HDF5_DIR/include"` and `LDFLAGS="-L $HDF5_DIR/lib"`, - where `$HDF5_DIR` is the directory where HDF5 was installed. - If you want [OPeNDAP](http://opendap.org) support, add `--enable-dap`. - If you want HDF4 SD support, add `--enable-hdf4` and add - the location of the HDF4 headers and library to `$CPPFLAGS` and `$LDFLAGS`. - - for MPI parallel IO support, MPI-enabled versions of the HDF5 and netcdf - libraries are required, as is the [mpi4py](http://mpi4py.scipy.org) python - module. - - -Install -======= - - - install the requisite python modules and C libraries (see above). It's - easiest if all the C libs are built as shared libraries. - - By default, the utility `nc-config`, installed with netcdf 4.1.2 or higher, - will be run used to determine where all the dependencies live. - - If `nc-config` is not in your default `$PATH` - edit the `setup.cfg` file - in a text editor and follow the instructions in the comments. - In addition to specifying the path to `nc-config`, - you can manually set the paths to all the libraries and their include files - (in case `nc-config` does not do the right thing). - - run `python setup.py build`, then `python setup.py install` (as root if - necessary). - - [`pip install`](https://pip.pypa.io/en/latest/reference/pip_install.html) can - also be used, with library paths set with environment variables. To make - this work, the `USE_SETUPCFG` environment variable must be used to tell - setup.py not to use `setup.cfg`. - For example, `USE_SETUPCFG=0 HDF5_INCDIR=/usr/include/hdf5/serial - HDF5_LIBDIR=/usr/lib/x86_64-linux-gnu/hdf5/serial pip install` has been - shown to work on an Ubuntu/Debian linux system. Similarly, environment variables - (all capitalized) can be used to set the include and library paths for - `hdf5`, `netCDF4`, `hdf4`, `szip`, `jpeg`, `curl` and `zlib`. If the - libraries are installed in standard places (e.g. `/usr` or `/usr/local`), - the environment variables do not need to be set. +## Quick Install + + - the easiest way to get going is to install via `pip install netCDF4`. + (or if you use the [conda](http://conda.io) package manager `conda install -c conda-forge netCDF4`). + +## Developer Install + + - Clone the [github repository](http://github.com/Unidata/netcdf4-python). Make + sure you either clone recursively, or run `git submodule update --init` to + ensure all the submodules are also checked out. + - Make sure the dependencies are satisfied (Python 3.8 or later, + [numpy](http://numpy.scipy.org), + [Cython](http://cython.org), + [cftime](https://github.com/Unidata/cftime), + [setuptools](https://pypi.python.org/pypi/setuptools), + the [HDF5 C library](https://www.hdfgroup.org/solutions/hdf5/), + and the [netCDF C library](https://www.unidata.ucar.edu/software/netcdf/)). + For MPI parallel IO support, an MPI-enabled versions of the netcdf library + is required, as is [mpi4py](http://mpi4py.scipy.org). + Parallel IO further depends on the existence of MPI-enabled HDF5 or the + [PnetCDF](https://parallel-netcdf.github.io/) library. + - By default, the utility `nc-config` (installed with netcdf-c) + will be run used to determine where all the dependencies live. + - If `nc-config` is not in your default `PATH`, you can set the `NETCDF4_DIR` + environment variable and `setup.py` will look in `$NETCDF4_DIR/bin`. + You can also use the file `setup.cfg` to set the path to `nc-config`, or + enter the paths to the libraries and include files manually. Just edit the `setup.cfg` file + in a text editor and follow the instructions in the comments. + To disable the use of `nc-config`, set the env var `USE_NCCONFIG` to 0. + To disable the use of `setup.cfg`, set `USE_SETUPCFG` to 0. + As a last resort, the library and include paths can be set via environment variables. + If you go this route, set `USE_NCCONFIG` and `USE_SETUPCFG` to 0, and specify + `NETCDF4_LIBDIR`, `NETCDF4_INCDIR`, `HDF5_LIBDIR` and `HDF5_INCDIR`. + If the dependencies are not found + in any of the paths specified by environment variables, then standard locations + (such as `/usr` and `/usr/local`) are searched. + - if the env var `NETCDF_PLUGIN_DIR` is set to point to the location of the netcdf-c compression + plugins built by netcdf >= 4.9.0, they will be installed inside the package. In this + case `HDF5_PLUGIN_PATH` will be set to the package installation path on import, + so the extra compression algorithms available in netcdf-c >= 4.9.0 will automatically + be available. Otherwise, the user will have to set `HDF5_PLUGIN_PATH` explicitly + to have access to the extra compression plugins. + - run `pip install -v .` (as root if necessary) - run the tests in the 'test' directory by running `python run_all.py`. -Tutorial -======== - -1. [Creating/Opening/Closing a netCDF file.](#section1) -2. [Groups in a netCDF file.](#section2) -3. [Dimensions in a netCDF file.](#section3) -4. [Variables in a netCDF file.](#section4) -5. [Attributes in a netCDF file.](#section5) -6. [Writing data to and retrieving data from a netCDF variable.](#section6) -7. [Dealing with time coordinates.](#section7) -8. [Reading data from a multi-file netCDF dataset.](#section8) -9. [Efficient compression of netCDF variables.](#section9) -10. [Beyond homogeneous arrays of a fixed type - compound data types.](#section10) -11. [Variable-length (vlen) data types.](#section11) -12. [Enum data type.](#section12) -13. [Parallel IO.](#section13) +# Tutorial + +- [Creating/Opening/Closing a netCDF file](#creatingopeningclosing-a-netcdf-file) +- [Groups in a netCDF file](#groups-in-a-netcdf-file) +- [Dimensions in a netCDF file](#dimensions-in-a-netcdf-file) +- [Variables in a netCDF file](#variables-in-a-netcdf-file) +- [Attributes in a netCDF file](#attributes-in-a-netcdf-file) +- [Dealing with time coordinates](#dealing-with-time-coordinates) +- [Writing data to and retrieving data from a netCDF variable](#writing-data-to-and-retrieving-data-from-a-netcdf-variable) +- [Reading data from a multi-file netCDF dataset](#reading-data-from-a-multi-file-netcdf-dataset) +- [Efficient compression of netCDF variables](#efficient-compression-of-netcdf-variables) +- [Beyond homogeneous arrays of a fixed type - compound data types](#beyond-homogeneous-arrays-of-a-fixed-type-compound-data-types) +- [Variable-length (vlen) data types](#variable-length-vlen-data-types) +- [Enum data type](#enum-data-type) +- [Parallel IO](#parallel-io) +- [Dealing with strings](#dealing-with-strings) +- [In-memory (diskless) Datasets](#in-memory-diskless-datasets) +All of the code in this tutorial is available in `examples/tutorial.py`, except +the parallel IO example, which is in `examples/mpi_example.py`. +Unit tests are in the `test` directory. -##
1) Creating/Opening/Closing a netCDF file. +## Creating/Opening/Closing a netCDF file -To create a netCDF file from python, you simply call the `netCDF4.Dataset` +To create a netCDF file from python, you simply call the `Dataset` constructor. This is also the method used to open an existing netCDF file. If the file is open for write access (`mode='w', 'r+'` or `'a'`), you may write any type of data including new dimensions, groups, variables and -attributes. netCDF files come in five flavors (`NETCDF3_CLASSIC, -NETCDF3_64BIT_OFFSET, NETCDF3_64BIT_DATA, NETCDF4_CLASSIC`, and `NETCDF4`). -`NETCDF3_CLASSIC` was the original netcdf binary format, and was limited +attributes. netCDF files come in five flavors (`NETCDF3_CLASSIC`, +`NETCDF3_64BIT_OFFSET`, `NETCDF3_64BIT_DATA`, `NETCDF4_CLASSIC`, and `NETCDF4`). +`NETCDF3_CLASSIC` was the original netcdf binary format, and was limited to file sizes less than 2 Gb. `NETCDF3_64BIT_OFFSET` was introduced in version 3.6.0 of the library, and extended the original binary format -to allow for file sizes greater than 2 Gb. +to allow for file sizes greater than 2 Gb. `NETCDF3_64BIT_DATA` is a new format that requires version 4.4.0 of the C library - it extends the `NETCDF3_64BIT_OFFSET` binary format to allow for unsigned/64 bit integer data types and 64-bit dimension sizes. @@ -132,123 +110,131 @@ not found in the version 3 API. They can be read by netCDF 3 clients only if they have been relinked against the netCDF 4 library. They can also be read by HDF5 clients. `NETCDF4` files use the version 4 disk format (HDF5) and use the new features of the version 4 API. The -`netCDF4` module can read and write files in any of these formats. When +netCDF4 module can read and write files in any of these formats. When creating a new file, the format may be specified using the `format` keyword in the `Dataset` constructor. The default format is `NETCDF4`. To see how a given file is formatted, you can examine the `data_model` attribute. Closing the netCDF file is -accomplished via the `netCDF4.Dataset.close` method of the `netCDF4.Dataset` +accomplished via the `Dataset.close` method of the `Dataset` instance. Here's an example: - :::python - >>> from netCDF4 import Dataset - >>> rootgrp = Dataset("test.nc", "w", format="NETCDF4") - >>> print rootgrp.data_model - NETCDF4 - >>> rootgrp.close() +```python +>>> from netCDF4 import Dataset +>>> rootgrp = Dataset("test.nc", "w", format="NETCDF4") +>>> print(rootgrp.data_model) +NETCDF4 +>>> rootgrp.close() +``` Remote [OPeNDAP](http://opendap.org)-hosted datasets can be accessed for -reading over http if a URL is provided to the `netCDF4.Dataset` constructor instead of a +reading over http if a URL is provided to the `Dataset` constructor instead of a filename. However, this requires that the netCDF library be built with OPenDAP support, via the `--enable-dap` configure option (added in version 4.0.1). -##
2) Groups in a netCDF file. +## Groups in a netCDF file netCDF version 4 added support for organizing data in hierarchical groups, which are analogous to directories in a filesystem. Groups serve as containers for variables, dimensions and attributes, as well as other -groups. A `netCDF4.Dataset` creates a special group, called +groups. A `Dataset` creates a special group, called the 'root group', which is similar to the root directory in a unix -filesystem. To create `netCDF4.Group` instances, use the -`netCDF4.Dataset.createGroup` method of a `netCDF4.Dataset` or `netCDF4.Group` -instance. `netCDF4.Dataset.createGroup` takes a single argument, a -python string containing the name of the new group. The new `netCDF4.Group` +filesystem. To create `Group` instances, use the +`Dataset.createGroup` method of a `Dataset` or `Group` +instance. `Dataset.createGroup` takes a single argument, a +python string containing the name of the new group. The new `Group` instances contained within the root group can be accessed by name using -the `groups` dictionary attribute of the `netCDF4.Dataset` instance. Only +the `groups` dictionary attribute of the `Dataset` instance. Only `NETCDF4` formatted files support Groups, if you try to create a Group in a netCDF 3 file you will get an error message. - :::python - >>> rootgrp = Dataset("test.nc", "a") - >>> fcstgrp = rootgrp.createGroup("forecasts") - >>> analgrp = rootgrp.createGroup("analyses") - >>> print rootgrp.groups - OrderedDict([("forecasts", - ), - ("analyses", - )]) - -Groups can exist within groups in a `netCDF4.Dataset`, just as directories -exist within directories in a unix filesystem. Each `netCDF4.Group` instance +```python +>>> rootgrp = Dataset("test.nc", "a") +>>> fcstgrp = rootgrp.createGroup("forecasts") +>>> analgrp = rootgrp.createGroup("analyses") +>>> print(rootgrp.groups) +{'forecasts': +group /forecasts: + dimensions(sizes): + variables(dimensions): + groups: , 'analyses': +group /analyses: + dimensions(sizes): + variables(dimensions): + groups: } +>>> +``` + +Groups can exist within groups in a `Dataset`, just as directories +exist within directories in a unix filesystem. Each `Group` instance has a `groups` attribute dictionary containing all of the group -instances contained within that group. Each `netCDF4.Group` instance also has a +instances contained within that group. Each `Group` instance also has a `path` attribute that contains a simulated unix directory path to that group. To simplify the creation of nested groups, you can -use a unix-like path as an argument to `netCDF4.Dataset.createGroup`. +use a unix-like path as an argument to `Dataset.createGroup`. - :::python - >>> fcstgrp1 = rootgrp.createGroup("/forecasts/model1") - >>> fcstgrp2 = rootgrp.createGroup("/forecasts/model2") +```python +>>> fcstgrp1 = rootgrp.createGroup("/forecasts/model1") +>>> fcstgrp2 = rootgrp.createGroup("/forecasts/model2") +``` If any of the intermediate elements of the path do not exist, they are created, just as with the unix command `'mkdir -p'`. If you try to create a group -that already exists, no error will be raised, and the existing group will be +that already exists, no error will be raised, and the existing group will be returned. Here's an example that shows how to navigate all the groups in a -`netCDF4.Dataset`. The function `walktree` is a Python generator that is used -to walk the directory tree. Note that printing the `netCDF4.Dataset` or `netCDF4.Group` +`Dataset`. The function `walktree` is a Python generator that is used +to walk the directory tree. Note that printing the `Dataset` or `Group` object yields summary information about it's contents. - :::python - >>> def walktree(top): - >>> values = top.groups.values() - >>> yield values - >>> for value in top.groups.values(): - >>> for children in walktree(value): - >>> yield children - >>> print rootgrp - >>> for children in walktree(rootgrp): - >>> for child in children: - >>> print child - - root group (NETCDF4 file format): - dimensions: - variables: - groups: forecasts, analyses - - group /forecasts: - dimensions: - variables: - groups: model1, model2 - - group /analyses: - dimensions: - variables: - groups: - - group /forecasts/model1: - dimensions: - variables: - groups: - - group /forecasts/model2: - dimensions: - variables: - groups: - -##
3) Dimensions in a netCDF file. +```python +>>> def walktree(top): +... yield top.groups.values() +... for value in top.groups.values(): +... yield from walktree(value) +>>> print(rootgrp) + +root group (NETCDF4 data model, file format HDF5): + dimensions(sizes): + variables(dimensions): + groups: forecasts, analyses +>>> for children in walktree(rootgrp): +... for child in children: +... print(child) + +group /forecasts: + dimensions(sizes): + variables(dimensions): + groups: model1, model2 + +group /analyses: + dimensions(sizes): + variables(dimensions): + groups: + +group /forecasts/model1: + dimensions(sizes): + variables(dimensions): + groups: + +group /forecasts/model2: + dimensions(sizes): + variables(dimensions): + groups: +``` + +## Dimensions in a netCDF file netCDF defines the sizes of all variables in terms of dimensions, so before any variables can be created the dimensions they use must be created first. A special case, not often used in practice, is that of a scalar variable, which has no dimensions. A dimension is created using -the `netCDF4.Dataset.createDimension` method of a `netCDF4.Dataset` -or `netCDF4.Group` instance. A Python string is used to set the name of the +the `Dataset.createDimension` method of a `Dataset` +or `Group` instance. A Python string is used to set the name of the dimension, and an integer value is used to set the size. To create an unlimited dimension (a dimension that can be appended to), the size value is set to `None` or 0. In this example, there both the `time` and @@ -256,243 +242,282 @@ value is set to `None` or 0. In this example, there both the `time` and dimension is a new netCDF 4 feature, in netCDF 3 files there may be only one, and it must be the first (leftmost) dimension of the variable. - :::python - >>> level = rootgrp.createDimension("level", None) - >>> time = rootgrp.createDimension("time", None) - >>> lat = rootgrp.createDimension("lat", 73) - >>> lon = rootgrp.createDimension("lon", 144) +```python +>>> level = rootgrp.createDimension("level", None) +>>> time = rootgrp.createDimension("time", None) +>>> lat = rootgrp.createDimension("lat", 73) +>>> lon = rootgrp.createDimension("lon", 144) +``` -All of the `netCDF4.Dimension` instances are stored in a python dictionary. +All of the `Dimension` instances are stored in a python dictionary. - :::python - >>> print rootgrp.dimensions - OrderedDict([("level", ), - ("time", ), - ("lat", ), - ("lon", )]) +```python +>>> print(rootgrp.dimensions) +{'level': (unlimited): name = 'level', size = 0, 'time': (unlimited): name = 'time', size = 0, 'lat': : name = 'lat', size = 73, 'lon': : name = 'lon', size = 144} +``` -Calling the python `len` function with a `netCDF4.Dimension` instance returns -the current size of that dimension. -The `netCDF4.Dimension.isunlimited` method of a `netCDF4.Dimension` instance -can be used to determine if the dimensions is unlimited, or appendable. +Using the python `len` function with a `Dimension` instance returns +current size of that dimension. +`Dimension.isunlimited` method of a `Dimension` instance +be used to determine if the dimensions is unlimited, or appendable. - :::python - >>> print len(lon) - 144 - >>> print lon.isunlimited() - False - >>> print time.isunlimited() - True +```python +>>> print(len(lon)) +144 +>>> print(lon.isunlimited()) +False +>>> print(time.isunlimited()) +True +``` -Printing the `netCDF4.Dimension` object +Printing the `Dimension` object provides useful summary info, including the name and length of the dimension, and whether it is unlimited. - :::python - >>> for dimobj in rootgrp.dimensions.values(): - >>> print dimobj - (unlimited): name = "level", size = 0 - (unlimited): name = "time", size = 0 - : name = "lat", size = 73 - : name = "lon", size = 144 - (unlimited): name = "time", size = 0 +```python +>>> for dimobj in rootgrp.dimensions.values(): +... print(dimobj) + (unlimited): name = 'level', size = 0 + (unlimited): name = 'time', size = 0 +: name = 'lat', size = 73 +: name = 'lon', size = 144 +``` -`netCDF4.Dimension` names can be changed using the -`netCDF4.Datatset.renameDimension` method of a `netCDF4.Dataset` or -`netCDF4.Group` instance. +`Dimension` names can be changed using the +`Dataset.renameDimension` method of a `Dataset` or +`Group` instance. -##
4) Variables in a netCDF file. +## Variables in a netCDF file netCDF variables behave much like python multidimensional array objects supplied by the [numpy module](http://numpy.scipy.org). However, unlike numpy arrays, netCDF4 variables can be appended to along one or more 'unlimited' dimensions. To create a netCDF variable, use the -`netCDF4.Dataset.createVariable` method of a `netCDF4.Dataset` or -`netCDF4.Group` instance. The `netCDF4.Dataset.createVariable` method +`Dataset.createVariable` method of a `Dataset` or +`Group` instance. The `Dataset.createVariable` method has two mandatory arguments, the variable name (a Python string), and the variable datatype. The variable's dimensions are given by a tuple containing the dimension names (defined previously with -`netCDF4.Dataset.createDimension`). To create a scalar +`Dataset.createDimension`). To create a scalar variable, simply leave out the dimensions keyword. The variable primitive datatypes correspond to the dtype attribute of a numpy array. You can specify the datatype as a numpy dtype object, or anything that -can be converted to a numpy dtype object. Valid datatype specifiers -include: `'f4'` (32-bit floating point), `'f8'` (64-bit floating -point), `'i4'` (32-bit signed integer), `'i2'` (16-bit signed -integer), `'i8'` (64-bit signed integer), `'i1'` (8-bit signed -integer), `'u1'` (8-bit unsigned integer), `'u2'` (16-bit unsigned -integer), `'u4'` (32-bit unsigned integer), `'u8'` (64-bit unsigned -integer), or `'S1'` (single-character string). The old Numeric -single-character typecodes (`'f'`,`'d'`,`'h'`, -`'s'`,`'b'`,`'B'`,`'c'`,`'i'`,`'l'`), corresponding to -(`'f4'`,`'f8'`,`'i2'`,`'i2'`,`'i1'`,`'i1'`,`'S1'`,`'i4'`,`'i4'`), -will also work. The unsigned integer types and the 64-bit integer type +can be converted to a numpy dtype object. Valid datatype specifiers +include: + +| Specifier | Datatype | Old typecodes | +|-----------|-------------------------|---------------| +| `'f4'` | 32-bit floating point | `'f'` | +| `'f8'` | 64-bit floating point | `'d'` | +| `'i4'` | 32-bit signed integer | `'i'` `'l'` | +| `'i2'` | 16-bit signed integer | `'h'` `'s'` | +| `'i8'` | 64-bit signed integer | | +| `'i1'` | 8-bit signed integer | `'b'` `'B'` | +| `'u1'` | 8-bit unsigned integer | | +| `'u2'` | 16-bit unsigned integer | | +| `'u4'` | 32-bit unsigned integer | | +| `'u8'` | 64-bit unsigned integer | | +| `'S1'` | single-character string | `'c'` | + +The unsigned integer types and the 64-bit integer type can only be used if the file format is `NETCDF4`. The dimensions themselves are usually also defined as variables, called -coordinate variables. The `netCDF4.Dataset.createVariable` -method returns an instance of the `netCDF4.Variable` class whose methods can be +coordinate variables. The `Dataset.createVariable` +method returns an instance of the `Variable` class whose methods can be used later to access and set variable data and attributes. - :::python - >>> times = rootgrp.createVariable("time","f8",("time",)) - >>> levels = rootgrp.createVariable("level","i4",("level",)) - >>> latitudes = rootgrp.createVariable("lat","f4",("lat",)) - >>> longitudes = rootgrp.createVariable("lon","f4",("lon",)) - >>> # two dimensions unlimited - >>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",)) - -To get summary info on a `netCDF4.Variable` instance in an interactive session, just print it. - - :::python - >>> print temp - - float32 temp(time, level, lat, lon) - least_significant_digit: 3 - units: K - unlimited dimensions: time, level - current shape = (0, 0, 73, 144) +```python +>>> times = rootgrp.createVariable("time","f8",("time",)) +>>> levels = rootgrp.createVariable("level","i4",("level",)) +>>> latitudes = rootgrp.createVariable("lat","f4",("lat",)) +>>> longitudes = rootgrp.createVariable("lon","f4",("lon",)) +>>> # two dimensions unlimited +>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",)) +>>> temp.units = "K" +``` + +To get summary info on a `Variable` instance in an interactive session, +just print it. + +```python +>>> print(temp) + +float32 temp(time, level, lat, lon) + units: K +unlimited dimensions: time, level +current shape = (0, 0, 73, 144) +filling on, default _FillValue of 9.969209968386869e+36 used +``` You can use a path to create a Variable inside a hierarchy of groups. - :::python - >>> ftemp = rootgrp.createVariable("/forecasts/model1/temp","f4",("time","level","lat","lon",)) +```python +>>> ftemp = rootgrp.createVariable("/forecasts/model1/temp","f4",("time","level","lat","lon",)) +``` If the intermediate groups do not yet exist, they will be created. -You can also query a `netCDF4.Dataset` or `netCDF4.Group` instance directly to obtain `netCDF4.Group` or -`netCDF4.Variable` instances using paths. - - :::python - >>> print rootgrp["/forecasts/model1"] # a Group instance - - group /forecasts/model1: - dimensions(sizes): - variables(dimensions): float32 temp(time,level,lat,lon) - groups: - >>> print rootgrp["/forecasts/model1/temp"] # a Variable instance - - float32 temp(time, level, lat, lon) - path = /forecasts/model1 - unlimited dimensions: time, level - current shape = (0, 0, 73, 144) - filling on, default _FillValue of 9.96920996839e+36 used - -All of the variables in the `netCDF4.Dataset` or `netCDF4.Group` are stored in a +You can also query a `Dataset` or `Group` instance directly to obtain `Group` or +`Variable` instances using paths. + +```python +>>> print(rootgrp["/forecasts/model1"]) # a Group instance + +group /forecasts/model1: + dimensions(sizes): + variables(dimensions): float32 temp(time,level,lat,lon) + groups: +>>> print(rootgrp["/forecasts/model1/temp"]) # a Variable instance + +float32 temp(time, level, lat, lon) +path = /forecasts/model1 +unlimited dimensions: time, level +current shape = (0, 0, 73, 144) +filling on, default _FillValue of 9.969209968386869e+36 used +``` + + +All of the variables in the `Dataset` or `Group` are stored in a Python dictionary, in the same way as the dimensions: - :::python - >>> print rootgrp.variables - OrderedDict([("time", ), - ("level", ), - ("lat", ), - ("lon", ), - ("temp", )]) - -`netCDF4.Variable` names can be changed using the -`netCDF4.Dataset.renameVariable` method of a `netCDF4.Dataset` +```python +>>> print(rootgrp.variables) +{'time': +float64 time(time) +unlimited dimensions: time +current shape = (0,) +filling on, default _FillValue of 9.969209968386869e+36 used, 'level': +int32 level(level) +unlimited dimensions: level +current shape = (0,) +filling on, default _FillValue of -2147483647 used, 'lat': +float32 lat(lat) +unlimited dimensions: +current shape = (73,) +filling on, default _FillValue of 9.969209968386869e+36 used, 'lon': +float32 lon(lon) +unlimited dimensions: +current shape = (144,) +filling on, default _FillValue of 9.969209968386869e+36 used, 'temp': +float32 temp(time, level, lat, lon) + units: K +unlimited dimensions: time, level +current shape = (0, 0, 73, 144) +filling on, default _FillValue of 9.969209968386869e+36 used} +``` + +`Variable` names can be changed using the +`Dataset.renameVariable` method of a `Dataset` instance. +Variables can be sliced similar to numpy arrays, but there are some differences. See +[Writing data to and retrieving data from a netCDF variable](#writing-data-to-and-retrieving-data-from-a-netcdf-variable) for more details. -##
5) Attributes in a netCDF file. + +## Attributes in a netCDF file There are two types of attributes in a netCDF file, global and variable. Global attributes provide information about a group, or the entire -dataset, as a whole. `netCDF4.Variable` attributes provide information about +dataset, as a whole. `Variable` attributes provide information about one of the variables in a group. Global attributes are set by assigning -values to `netCDF4.Dataset` or `netCDF4.Group` instance variables. `netCDF4.Variable` -attributes are set by assigning values to `netCDF4.Variable` instances +values to `Dataset` or `Group` instance variables. `Variable` +attributes are set by assigning values to `Variable` instances variables. Attributes can be strings, numbers or sequences. Returning to our example, - :::python - >>> import time - >>> rootgrp.description = "bogus example script" - >>> rootgrp.history = "Created " + time.ctime(time.time()) - >>> rootgrp.source = "netCDF4 python module tutorial" - >>> latitudes.units = "degrees north" - >>> longitudes.units = "degrees east" - >>> levels.units = "hPa" - >>> temp.units = "K" - >>> times.units = "hours since 0001-01-01 00:00:00.0" - >>> times.calendar = "gregorian" - -The `netCDF4.Dataset.ncattrs` method of a `netCDF4.Dataset`, `netCDF4.Group` or -`netCDF4.Variable` instance can be used to retrieve the names of all the netCDF +```python +>>> import time +>>> rootgrp.description = "bogus example script" +>>> rootgrp.history = "Created " + time.ctime(time.time()) +>>> rootgrp.source = "netCDF4 python module tutorial" +>>> latitudes.units = "degrees north" +>>> longitudes.units = "degrees east" +>>> levels.units = "hPa" +>>> temp.units = "K" +>>> times.units = "hours since 0001-01-01 00:00:00.0" +>>> times.calendar = "gregorian" +``` + +The `Dataset.ncattrs` method of a `Dataset`, `Group` or +`Variable` instance can be used to retrieve the names of all the netCDF attributes. This method is provided as a convenience, since using the built-in `dir` Python function will return a bunch of private methods and attributes that cannot (or should not) be modified by the user. - :::python - >>> for name in rootgrp.ncattrs(): - >>> print "Global attr", name, "=", getattr(rootgrp,name) - Global attr description = bogus example script - Global attr history = Created Mon Nov 7 10.30:56 2005 - Global attr source = netCDF4 python module tutorial +```python +>>> for name in rootgrp.ncattrs(): +... print("Global attr {} = {}".format(name, getattr(rootgrp, name))) +Global attr description = bogus example script +Global attr history = Created Mon Jul 8 14:19:41 2019 +Global attr source = netCDF4 python module tutorial +``` -The `__dict__` attribute of a `netCDF4.Dataset`, `netCDF4.Group` or `netCDF4.Variable` +The `__dict__` attribute of a `Dataset`, `Group` or `Variable` instance provides all the netCDF attribute name/value pairs in a python dictionary: - :::python - >>> print rootgrp.__dict__ - OrderedDict([(u"description", u"bogus example script"), - (u"history", u"Created Thu Mar 3 19:30:33 2011"), - (u"source", u"netCDF4 python module tutorial")]) +```python +>>> print(rootgrp.__dict__) +{'description': 'bogus example script', 'history': 'Created Mon Jul 8 14:19:41 2019', 'source': 'netCDF4 python module tutorial'} +``` -Attributes can be deleted from a netCDF `netCDF4.Dataset`, `netCDF4.Group` or -`netCDF4.Variable` using the python `del` statement (i.e. `del grp.foo` +Attributes can be deleted from a netCDF `Dataset`, `Group` or +`Variable` using the python `del` statement (i.e. `del grp.foo` removes the attribute `foo` the the group `grp`). -##
6) Writing data to and retrieving data from a netCDF variable. +## Writing data to and retrieving data from a netCDF variable -Now that you have a netCDF `netCDF4.Variable` instance, how do you put data +Now that you have a netCDF `Variable` instance, how do you put data into it? You can just treat it like an array and assign data to a slice. - :::python - >>> import numpy - >>> lats = numpy.arange(-90,91,2.5) - >>> lons = numpy.arange(-180,180,2.5) - >>> latitudes[:] = lats - >>> longitudes[:] = lons - >>> print "latitudes =\\n",latitudes[:] - latitudes = - [-90. -87.5 -85. -82.5 -80. -77.5 -75. -72.5 -70. -67.5 -65. -62.5 - -60. -57.5 -55. -52.5 -50. -47.5 -45. -42.5 -40. -37.5 -35. -32.5 - -30. -27.5 -25. -22.5 -20. -17.5 -15. -12.5 -10. -7.5 -5. -2.5 - 0. 2.5 5. 7.5 10. 12.5 15. 17.5 20. 22.5 25. 27.5 - 30. 32.5 35. 37.5 40. 42.5 45. 47.5 50. 52.5 55. 57.5 - 60. 62.5 65. 67.5 70. 72.5 75. 77.5 80. 82.5 85. 87.5 - 90. ] - -Unlike NumPy's array objects, netCDF `netCDF4.Variable` +```python +>>> import numpy as np +>>> lats = np.arange(-90,91,2.5) +>>> lons = np.arange(-180,180,2.5) +>>> latitudes[:] = lats +>>> longitudes[:] = lons +>>> print("latitudes =\\n{}".format(latitudes[:])) +latitudes = +[-90. -87.5 -85. -82.5 -80. -77.5 -75. -72.5 -70. -67.5 -65. -62.5 + -60. -57.5 -55. -52.5 -50. -47.5 -45. -42.5 -40. -37.5 -35. -32.5 + -30. -27.5 -25. -22.5 -20. -17.5 -15. -12.5 -10. -7.5 -5. -2.5 + 0. 2.5 5. 7.5 10. 12.5 15. 17.5 20. 22.5 25. 27.5 + 30. 32.5 35. 37.5 40. 42.5 45. 47.5 50. 52.5 55. 57.5 + 60. 62.5 65. 67.5 70. 72.5 75. 77.5 80. 82.5 85. 87.5 + 90. ] +``` + +Unlike NumPy's array objects, netCDF `Variable` objects with unlimited dimensions will grow along those dimensions if you assign data outside the currently defined range of indices. - :::python - >>> # append along two unlimited dimensions by assigning to slice. - >>> nlats = len(rootgrp.dimensions["lat"]) - >>> nlons = len(rootgrp.dimensions["lon"]) - >>> print "temp shape before adding data = ",temp.shape - temp shape before adding data = (0, 0, 73, 144) - >>> - >>> from numpy.random import uniform - >>> temp[0:5,0:10,:,:] = uniform(size=(5,10,nlats,nlons)) - >>> print "temp shape after adding data = ",temp.shape - temp shape after adding data = (6, 10, 73, 144) - >>> - >>> # levels have grown, but no values yet assigned. - >>> print "levels shape after adding pressure data = ",levels.shape - levels shape after adding pressure data = (10,) +```python +>>> # append along two unlimited dimensions by assigning to slice. +>>> nlats = len(rootgrp.dimensions["lat"]) +>>> nlons = len(rootgrp.dimensions["lon"]) +>>> print("temp shape before adding data = {}".format(temp.shape)) +temp shape before adding data = (0, 0, 73, 144) +>>> +>>> from numpy.random import uniform +>>> temp[0:5, 0:10, :, :] = uniform(size=(5, 10, nlats, nlons)) +>>> print("temp shape after adding data = {}".format(temp.shape)) +temp shape after adding data = (5, 10, 73, 144) +>>> +>>> # levels have grown, but no values yet assigned. +>>> print("levels shape after adding pressure data = {}".format(levels.shape)) +levels shape after adding pressure data = (10,) +``` Note that the size of the levels variable grows when data is appended along the `level` dimension of the variable `temp`, even though no data has yet been assigned to levels. - :::python - >>> # now, assign data to levels dimension variable. - >>> levels[:] = [1000.,850.,700.,500.,300.,250.,200.,150.,100.,50.] +```python +>>> # now, assign data to levels dimension variable. +>>> levels[:] = [1000.,850.,700.,500.,300.,250.,200.,150.,100.,50.] +``` However, that there are some differences between NumPy and netCDF variable slicing rules. Slices behave as usual, being specified as a @@ -503,8 +528,10 @@ than for numpy arrays. Only 1-d boolean arrays and integer sequences are allowed, and these indices work independently along each dimension (similar to the way vector subscripts work in fortran). This means that - :::python - >>> temp[0, 0, [0,1,2,3], [0,1,2,3]] +```python +>>> temp[0, 0, [0,1,2,3], [0,1,2,3]].shape +(4, 4) +``` returns an array of shape (4,4) when slicing a netCDF variable, but for a numpy array it returns an array of shape (4,). @@ -520,61 +547,80 @@ variables by using logical operations on the dimension arrays to create slices. For example, - :::python - >>> tempdat = temp[::2, [1,3,6], lats>0, lons>0] +```python +>>> tempdat = temp[::2, [1,3,6], lats>0, lons>0] +``` will extract time indices 0,2 and 4, pressure levels 850, 500 and 200 hPa, all Northern Hemisphere latitudes and Eastern Hemisphere longitudes, resulting in a numpy array of shape (3, 3, 36, 71). - :::python - >>> print "shape of fancy temp slice = ",tempdat.shape - shape of fancy temp slice = (3, 3, 36, 71) +```python +>>> print("shape of fancy temp slice = {}".format(tempdat.shape)) +shape of fancy temp slice = (3, 3, 36, 71) +``` ***Special note for scalar variables***: To extract data from a scalar variable -`v` with no associated dimensions, use `np.asarray(v)` or `v[...]`. The result -will be a numpy scalar array. - -##
7) Dealing with time coordinates. +`v` with no associated dimensions, use `numpy.asarray(v)` or `v[...]`. +The result will be a numpy scalar array. + +By default, netcdf4-python returns numpy masked arrays with values equal to the +`missing_value` or `_FillValue` variable attributes masked for primitive and +enum data types. +The `Dataset.set_auto_mask` `Dataset` and `Variable` methods +can be used to disable this feature so that +numpy arrays are always returned, with the missing values included. Prior to +version 1.4.0 the default behavior was to only return masked arrays when the +requested slice contained missing values. This behavior can be recovered +using the `Dataset.set_always_mask` method. If a masked array is +written to a netCDF variable, the masked elements are filled with the +value specified by the `missing_value` attribute. If the variable has +no `missing_value`, the `_FillValue` is used instead. + +## Dealing with time coordinates Time coordinate values pose a special challenge to netCDF users. Most metadata standards (such as CF) specify that time should be measure relative to a fixed date using a certain calendar, with units specified like `hours since YY-MM-DD hh:mm:ss`. These units can be awkward to deal with, without a utility to convert the values to and -from calendar dates. The function called `netCDF4.num2date` and `netCDF4.date2num` are -provided with this package to do just that. Here's an example of how they -can be used: - - :::python - >>> # fill in times. - >>> from datetime import datetime, timedelta - >>> from netCDF4 import num2date, date2num - >>> dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(temp.shape[0])] - >>> times[:] = date2num(dates,units=times.units,calendar=times.calendar) - >>> print "time values (in units %s): " % times.units+"\\n",times[:] - time values (in units hours since January 1, 0001): - [ 17533056. 17533068. 17533080. 17533092. 17533104.] - >>> dates = num2date(times[:],units=times.units,calendar=times.calendar) - >>> print "dates corresponding to time values:\\n",dates - dates corresponding to time values: - [2001-03-01 00:00:00 2001-03-01 12:00:00 2001-03-02 00:00:00 - 2001-03-02 12:00:00 2001-03-03 00:00:00] - -`netCDF4.num2date` converts numeric values of time in the specified `units` -and `calendar` to datetime objects, and `netCDF4.date2num` does the reverse. +from calendar dates. The functions [num2date](https://unidata.github.io/cftime/api.html) +and [date2num](https://unidata.github.io/cftime/api.html) are +provided by [cftime](https://unidata.github.io/cftime) to do just that. +Here's an example of how they can be used: + +```python +>>> # fill in times. +>>> from datetime import datetime, timedelta +>>> from cftime import num2date, date2num +>>> dates = [datetime(2001,3,1)+n*timedelta(hours=12) for n in range(temp.shape[0])] +>>> times[:] = date2num(dates,units=times.units,calendar=times.calendar) +>>> print("time values (in units {}):\\n{}".format(times.units, times[:])) +time values (in units hours since 0001-01-01 00:00:00.0): +[17533104. 17533116. 17533128. 17533140. 17533152.] +>>> dates = num2date(times[:],units=times.units,calendar=times.calendar) +>>> print("dates corresponding to time values:\\n{}".format(dates)) + [cftime.DatetimeGregorian(2001, 3, 1, 0, 0, 0, 0, has_year_zero=False) + cftime.DatetimeGregorian(2001, 3, 1, 12, 0, 0, 0, has_year_zero=False) + cftime.DatetimeGregorian(2001, 3, 2, 0, 0, 0, 0, has_year_zero=False) + cftime.DatetimeGregorian(2001, 3, 2, 12, 0, 0, 0, has_year_zero=False) + cftime.DatetimeGregorian(2001, 3, 3, 0, 0, 0, 0, has_year_zero=False)] +``` + +`num2date` converts numeric values of time in the specified `units` +and `calendar` to datetime objects, and `date2num` does the reverse. All the calendars currently defined in the [CF metadata convention](http://cfconventions.org) are supported. -A function called `netCDF4.date2index` is also provided which returns the indices +A function called `date2index` is also provided which returns the indices of a netCDF time variable corresponding to a sequence of datetime instances. -##
8) Reading data from a multi-file netCDF dataset. +## Reading data from a multi-file netCDF dataset If you want to read data from a variable that spans multiple netCDF files, -you can use the `netCDF4.MFDataset` class to read the data as if it were +you can use the `MFDataset` class to read the data as if it were contained in a single file. Instead of using a single filename to create -a `netCDF4.Dataset` instance, create a `netCDF4.MFDataset` instance with either a list +a `Dataset` instance, create a `MFDataset` instance with either a list of filenames, or a string with a wildcard (which is then converted to a sorted list of files using the python glob module). Variables in the list of files that share the same unlimited @@ -585,84 +631,109 @@ must in be in `NETCDF3_64BIT_OFFSET`, `NETCDF3_64BIT_DATA`, `NETCDF3_CLASSIC` or `NETCDF4_CLASSIC` format (`NETCDF4` formatted multi-file datasets are not supported). - :::python - >>> for nf in range(10): - >>> f = Dataset("mftest%s.nc" % nf,"w") - >>> f.createDimension("x",None) - >>> x = f.createVariable("x","i",("x",)) - >>> x[0:10] = numpy.arange(nf*10,10*(nf+1)) - >>> f.close() - -Now read all the files back in at once with `netCDF4.MFDataset` - - :::python - >>> from netCDF4 import MFDataset - >>> f = MFDataset("mftest*nc") - >>> print f.variables["x"][:] - [ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 - 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 - 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 - 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99] - -Note that `netCDF4.MFDataset` can only be used to read, not write, multi-file +```python +>>> for nf in range(10): +... with Dataset("mftest%s.nc" % nf, "w", format="NETCDF4_CLASSIC") as f: +... _ = f.createDimension("x",None) +... x = f.createVariable("x","i",("x",)) +... x[0:10] = np.arange(nf*10,10*(nf+1)) +``` + +Now read all the files back in at once with `MFDataset` + +```python +>>> from netCDF4 import MFDataset +>>> f = MFDataset("mftest*nc") +>>> print(f.variables["x"][:]) +[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 + 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 + 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 + 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 + 96 97 98 99] +``` + +Note that `MFDataset` can only be used to read, not write, multi-file datasets. -##
9) Efficient compression of netCDF variables. +## Efficient compression of netCDF variables -Data stored in netCDF 4 `netCDF4.Variable` objects can be compressed and -decompressed on the fly. The parameters for the compression are -determined by the `zlib`, `complevel` and `shuffle` keyword arguments -to the `netCDF4.Dataset.createVariable` method. To turn on -compression, set `zlib=True`. The `complevel` keyword regulates the -speed and efficiency of the compression (1 being fastest, but lowest +Data stored in netCDF `Variable` objects can be compressed and +decompressed on the fly. The compression algorithm used is determined +by the `compression` keyword argument to the `Dataset.createVariable` method. +`zlib` compression is always available, `szip` is available if the linked HDF5 +library supports it, and `zstd`, `bzip2`, `blosc_lz`,`blosc_lz4`,`blosc_lz4hc`, +`blosc_zlib` and `blosc_zstd` are available via optional external plugins. +The `complevel` keyword regulates the +speed and efficiency of the compression for `zlib`, `bzip` and `zstd` (1 being fastest, but lowest compression ratio, 9 being slowest but best compression ratio). The default value of `complevel` is 4. Setting `shuffle=False` will turn off the HDF5 shuffle filter, which de-interlaces a block of data before -compression by reordering the bytes. The shuffle filter can -significantly improve compression ratios, and is on by default. Setting +`zlib` compression by reordering the bytes. The shuffle filter can +significantly improve compression ratios, and is on by default if `compression=zlib`. Setting `fletcher32` keyword argument to -`netCDF4.Dataset.createVariable` to `True` (it's `False` by +`Dataset.createVariable` to `True` (it's `False` by default) enables the Fletcher32 checksum algorithm for error detection. It's also possible to set the HDF5 chunking parameters and endian-ness of the binary data stored in the HDF5 file with the `chunksizes` and `endian` keyword arguments to -`netCDF4.Dataset.createVariable`. These keyword arguments only +`Dataset.createVariable`. These keyword arguments only are relevant for `NETCDF4` and `NETCDF4_CLASSIC` files (where the underlying file format is HDF5) and are silently ignored if the file format is `NETCDF3_CLASSIC`, `NETCDF3_64BIT_OFFSET` or `NETCDF3_64BIT_DATA`. +If the HDF5 library is built with szip support, compression=`szip` can also +be used (in conjunction with the `szip_coding` and `szip_pixels_per_block` keyword +arguments). If your data only has a certain number of digits of precision (say for example, it is temperature data that was measured with a precision of -0.1 degrees), you can dramatically improve zlib compression by -quantizing (or truncating) the data using the `least_significant_digit` -keyword argument to `netCDF4.Dataset.createVariable`. The least -significant digit is the power of ten of the smallest decimal place in +0.1 degrees), you can dramatically improve compression by +quantizing (or truncating) the data. There are two methods supplied for +doing this. You can use the `least_significant_digit` +keyword argument to `Dataset.createVariable` to specify +the power of ten of the smallest decimal place in the data that is a reliable value. For example if the data has a precision of 0.1, then setting `least_significant_digit=1` will cause data the data to be quantized using `numpy.around(scale*data)/scale`, where scale = 2**bits, and bits is determined so that a precision of 0.1 is -retained (in this case bits=4). Effectively, this makes the compression +retained (in this case bits=4). This is done at the python level and is +not a part of the underlying C library. Starting with netcdf-c version 4.9.0, +a quantization capability is provided in the library. This can be +used via the `significant_digits` `Dataset.createVariable` kwarg (new in +version 1.6.0). +The interpretation of `significant_digits` is different than `least_signficant_digit` +in that it specifies the absolute number of significant digits independent +of the magnitude of the variable (the floating point exponent). +Either of these approaches makes the compression 'lossy' instead of 'lossless', that is some precision in the data is sacrificed for the sake of disk space. In our example, try replacing the line - :::python - >>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",)) +```python +>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",)) +``` with - :::python - >>> temp = dataset.createVariable("temp","f4",("time","level","lat","lon",),zlib=True) +```python +>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",),compression='zlib') +``` and then - :::python - >>> temp = dataset.createVariable("temp","f4",("time","level","lat","lon",),zlib=True,least_significant_digit=3) +```python +>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",),compression='zlib',least_significant_digit=3) +``` + +or with netcdf-c >= 4.9.0 + +```python +>>> temp = rootgrp.createVariable("temp","f4",("time","level","lat","lon",),compression='zlib',significant_digits=4) +``` and see how much smaller the resulting files are. -##
10) Beyond homogeneous arrays of a fixed type - compound data types. +## Beyond homogeneous arrays of a fixed type - compound data types Compound data types map directly to numpy structured (a.k.a 'record') arrays. Structured arrays are akin to C structs, or derived types @@ -674,74 +745,78 @@ location for scattered (point) data. You can then access all the information for a point by reading one variable, instead of reading different parameters from different variables. Compound data types are created from the corresponding numpy data type using the -`netCDF4.Dataset.createCompoundType` method of a `netCDF4.Dataset` or `netCDF4.Group` instance. -Since there is no native complex data type in netcdf, compound types are handy -for storing numpy complex arrays. Here's an example: - - :::python - >>> f = Dataset("complex.nc","w") - >>> size = 3 # length of 1-d complex array - >>> # create sample complex data. - >>> datac = numpy.exp(1j*(1.+numpy.linspace(0, numpy.pi, size))) - >>> # create complex128 compound data type. - >>> complex128 = numpy.dtype([("real",numpy.float64),("imag",numpy.float64)]) - >>> complex128_t = f.createCompoundType(complex128,"complex128") - >>> # create a variable with this data type, write some data to it. - >>> f.createDimension("x_dim",None) - >>> v = f.createVariable("cmplx_var",complex128_t,"x_dim") - >>> data = numpy.empty(size,complex128) # numpy structured array - >>> data["real"] = datac.real; data["imag"] = datac.imag - >>> v[:] = data # write numpy structured array to netcdf compound var - >>> # close and reopen the file, check the contents. - >>> f.close(); f = Dataset("complex.nc") - >>> v = f.variables["cmplx_var"] - >>> datain = v[:] # read in all the data into a numpy structured array - >>> # create an empty numpy complex array - >>> datac2 = numpy.empty(datain.shape,numpy.complex128) - >>> # .. fill it with contents of structured array. - >>> datac2.real = datain["real"]; datac2.imag = datain["imag"] - >>> print datac.dtype,datac # original data - complex128 [ 0.54030231+0.84147098j -0.84147098+0.54030231j -0.54030231-0.84147098j] - >>> - >>> print datac2.dtype,datac2 # data from file - complex128 [ 0.54030231+0.84147098j -0.84147098+0.54030231j -0.54030231-0.84147098j] +`Dataset.createCompoundType` method of a `Dataset` or `Group` instance. +Since there is no native complex data type in netcdf (but see +[Support for complex numbers](#support-for-complex-numbers)), compound +types are handy for storing numpy complex arrays. Here's an example: + +```python +>>> f = Dataset("complex.nc","w") +>>> size = 3 # length of 1-d complex array +>>> # create sample complex data. +>>> datac = np.exp(1j*(1.+np.linspace(0, np.pi, size))) +>>> # create complex128 compound data type. +>>> complex128 = np.dtype([("real",np.float64),("imag",np.float64)]) +>>> complex128_t = f.createCompoundType(complex128,"complex128") +>>> # create a variable with this data type, write some data to it. +>>> x_dim = f.createDimension("x_dim",None) +>>> v = f.createVariable("cmplx_var",complex128_t,"x_dim") +>>> data = np.empty(size,complex128) # numpy structured array +>>> data["real"] = datac.real; data["imag"] = datac.imag +>>> v[:] = data # write numpy structured array to netcdf compound var +>>> # close and reopen the file, check the contents. +>>> f.close(); f = Dataset("complex.nc") +>>> v = f.variables["cmplx_var"] +>>> datain = v[:] # read in all the data into a numpy structured array +>>> # create an empty numpy complex array +>>> datac2 = np.empty(datain.shape,np.complex128) +>>> # .. fill it with contents of structured array. +>>> datac2.real = datain["real"]; datac2.imag = datain["imag"] +>>> print('{}: {}'.format(datac.dtype, datac)) # original data +complex128: [ 0.54030231+0.84147098j -0.84147098+0.54030231j -0.54030231-0.84147098j] +>>> +>>> print('{}: {}'.format(datac2.dtype, datac2)) # data from file +complex128: [ 0.54030231+0.84147098j -0.84147098+0.54030231j -0.54030231-0.84147098j] +``` Compound types can be nested, but you must create the 'inner' ones first. All possible numpy structured arrays cannot be represented as Compound variables - an error message will be raise if you try to create one that is not supported. -All of the compound types defined for a `netCDF4.Dataset` or `netCDF4.Group` are stored +All of the compound types defined for a `Dataset` or `Group` are stored in a Python dictionary, just like variables and dimensions. As always, printing objects gives useful summary information in an interactive session: - :::python - >>> print f - - root group (NETCDF4 file format): - dimensions: x_dim - variables: cmplx_var - groups: - - >>> print f.variables["cmplx_var"] - compound cmplx_var(x_dim) - compound data type: [("real", ">> print f.cmptypes - OrderedDict([("complex128", )]) - >>> print f.cmptypes["complex128"] - : name = "complex128", numpy dtype = [(u"real","11) Variable-length (vlen) data types. +```python +>>> print(f) + +root group (NETCDF4 data model, file format HDF5): + dimensions(sizes): x_dim(3) + variables(dimensions): {'names':['real','imag'], 'formats':['>> print(f.variables["cmplx_var"]) + +compound cmplx_var(x_dim) +compound data type: {'names':['real','imag'], 'formats':['>> print(f.cmptypes) +{'complex128': : name = 'complex128', numpy dtype = {'names':['real','imag'], 'formats':['>> print(f.cmptypes["complex128"]) +: name = 'complex128', numpy dtype = {'names':['real','imag'], 'formats':['>> f = Dataset("tst_vlen.nc","w") - >>> vlen_t = f.createVLType(numpy.int32, "phony_vlen") +```python +>>> f = Dataset("tst_vlen.nc","w") +>>> vlen_t = f.createVLType(np.int32, "phony_vlen") +``` The numpy datatype of the variable-length sequences and the name of the new datatype must be specified. Any of the primitive datatypes can be @@ -749,10 +824,11 @@ used (signed and unsigned integers, 32 and 64 bit floats, and characters), but compound data types cannot. A new variable can then be created using this datatype. - :::python - >>> x = f.createDimension("x",3) - >>> y = f.createDimension("y",4) - >>> vlvar = f.createVariable("phony_vlen_var", vlen_t, ("y","x")) +```python +>>> x = f.createDimension("x",3) +>>> y = f.createDimension("y",4) +>>> vlvar = f.createVariable("phony_vlen_var", vlen_t, ("y","x")) +``` Since there is no native vlen datatype in numpy, vlen arrays are represented in python as object arrays (arrays of dtype `object`). These are arrays whose @@ -762,103 +838,108 @@ but of varying length. In this case, they contain 1-D numpy `int32` arrays of random length between 1 and 10. - :::python - >>> import random - >>> data = numpy.empty(len(y)*len(x),object) - >>> for n in range(len(y)*len(x)): - >>> data[n] = numpy.arange(random.randint(1,10),dtype="int32")+1 - >>> data = numpy.reshape(data,(len(y),len(x))) - >>> vlvar[:] = data - >>> print "vlen variable =\\n",vlvar[:] - vlen variable = - [[[ 1 2 3 4 5 6 7 8 9 10] [1 2 3 4 5] [1 2 3 4 5 6 7 8]] - [[1 2 3 4 5 6 7] [1 2 3 4 5 6] [1 2 3 4 5]] - [[1 2 3 4 5] [1 2 3 4] [1]] - [[ 1 2 3 4 5 6 7 8 9 10] [ 1 2 3 4 5 6 7 8 9 10] - [1 2 3 4 5 6 7 8]]] - >>> print f - - root group (NETCDF4 file format): - dimensions: x, y - variables: phony_vlen_var - groups: - >>> print f.variables["phony_vlen_var"] - - vlen phony_vlen_var(y, x) - vlen data type: int32 - unlimited dimensions: - current shape = (4, 3) - >>> print f.VLtypes["phony_vlen"] - : name = "phony_vlen", numpy dtype = int32 +```python +>>> import random +>>> random.seed(54321) +>>> data = np.empty(len(y)*len(x),object) +>>> for n in range(len(y)*len(x)): +... data[n] = np.arange(random.randint(1,10),dtype="int32")+1 +>>> data = np.reshape(data,(len(y),len(x))) +>>> vlvar[:] = data +>>> print("vlen variable =\\n{}".format(vlvar[:])) +vlen variable = +[[array([1, 2, 3, 4, 5, 6, 7, 8], dtype=int32) array([1, 2], dtype=int32) + array([1, 2, 3, 4], dtype=int32)] + [array([1, 2, 3], dtype=int32) + array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32) + array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)] + [array([1, 2, 3, 4, 5, 6, 7], dtype=int32) array([1, 2, 3], dtype=int32) + array([1, 2, 3, 4, 5, 6], dtype=int32)] + [array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32) + array([1, 2, 3, 4, 5], dtype=int32) array([1, 2], dtype=int32)]] +>>> print(f) + +root group (NETCDF4 data model, file format HDF5): + dimensions(sizes): x(3), y(4) + variables(dimensions): int32 phony_vlen_var(y,x) + groups: +>>> print(f.variables["phony_vlen_var"]) + +vlen phony_vlen_var(y, x) +vlen data type: int32 +unlimited dimensions: +current shape = (4, 3) +>>> print(f.vltypes["phony_vlen"]) +: name = 'phony_vlen', numpy dtype = int32 +``` Numpy object arrays containing python strings can also be written as vlen variables, For vlen strings, you don't need to create a vlen data type. Instead, simply use the python `str` builtin (or a numpy string datatype with fixed length greater than 1) when calling the -`netCDF4.Dataset.createVariable` method. +`Dataset.createVariable` method. - :::python - >>> z = f.createDimension("z",10) - >>> strvar = rootgrp.createVariable("strvar", str, "z") +```python +>>> z = f.createDimension("z",10) +>>> strvar = f.createVariable("strvar", str, "z") +``` In this example, an object array is filled with random python strings with random lengths between 2 and 12 characters, and the data in the object array is assigned to the vlen string variable. - :::python - >>> chars = "1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" - >>> data = numpy.empty(10,"O") - >>> for n in range(10): - >>> stringlen = random.randint(2,12) - >>> data[n] = "".join([random.choice(chars) for i in range(stringlen)]) - >>> strvar[:] = data - >>> print "variable-length string variable:\\n",strvar[:] - variable-length string variable: - [aDy29jPt 5DS9X8 jd7aplD b8t4RM jHh8hq KtaPWF9cQj Q1hHN5WoXSiT MMxsVeq tdLUzvVTzj] - >>> print f - - root group (NETCDF4 file format): - dimensions: x, y, z - variables: phony_vlen_var, strvar - groups: - >>> print f.variables["strvar"] - - vlen strvar(z) - vlen data type: - unlimited dimensions: - current size = (10,) +```python +>>> chars = "1234567890aabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" +>>> data = np.empty(10,"O") +>>> for n in range(10): +... stringlen = random.randint(2,12) +... data[n] = "".join([random.choice(chars) for i in range(stringlen)]) +>>> strvar[:] = data +>>> print("variable-length string variable:\\n{}".format(strvar[:])) +variable-length string variable: +['Lh' '25F8wBbMI' '53rmM' 'vvjnb3t63ao' 'qjRBQk6w' 'aJh' 'QF' + 'jtIJbJACaQk4' '3Z5' 'bftIIq'] +>>> print(f) + +root group (NETCDF4 data model, file format HDF5): + dimensions(sizes): x(3), y(4), z(10) + variables(dimensions): int32 phony_vlen_var(y,x), strvar(z) + groups: +>>> print(f.variables["strvar"]) + +vlen strvar(z) +vlen data type: +unlimited dimensions: +current shape = (10,) +``` It is also possible to set contents of vlen string variables with numpy arrays of any string or unicode data type. Note, however, that accessing the contents of such variables will always return numpy arrays with dtype `object`. -##
12) Enum data type. +## Enum data type netCDF4 has an enumerated data type, which is an integer datatype that is restricted to certain named values. Since Enums don't map directly to a numpy data type, they are read and written as integer arrays. -Here's an example of using an Enum type to hold cloud type data. +Here's an example of using an Enum type to hold cloud type data. The base integer data type and a python dictionary describing the allowed values and their names are used to define an Enum data type using -`netCDF4.Dataset.createEnumType`. - - :::python - >>> nc = Dataset('clouds.nc','w') - >>> # python dict with allowed values and their names. - >>> enum_dict = {u'Altocumulus': 7, u'Missing': 255, - >>> u'Stratus': 2, u'Clear': 0, - >>> u'Nimbostratus': 6, u'Cumulus': 4, u'Altostratus': 5, - >>> u'Cumulonimbus': 1, u'Stratocumulus': 3} - >>> # create the Enum type called 'cloud_t'. - >>> cloud_type = nc.createEnumType(numpy.uint8,'cloud_t',enum_dict) - >>> print cloud_type - : name = 'cloud_t', - numpy dtype = uint8, fields/values ={u'Cumulus': 4, - u'Altocumulus': 7, u'Missing': 255, - u'Stratus': 2, u'Clear': 0, - u'Cumulonimbus': 1, u'Stratocumulus': 3, - u'Nimbostratus': 6, u'Altostratus': 5} +`Dataset.createEnumType`. + +```python +>>> nc = Dataset('clouds.nc','w') +>>> # python dict with allowed values and their names. +>>> enum_dict = {'Altocumulus': 7, 'Missing': 255, +... 'Stratus': 2, 'Clear': 0, +... 'Nimbostratus': 6, 'Cumulus': 4, 'Altostratus': 5, +... 'Cumulonimbus': 1, 'Stratocumulus': 3} +>>> # create the Enum type called 'cloud_t'. +>>> cloud_type = nc.createEnumType(np.uint8,'cloud_t',enum_dict) +>>> print(cloud_type) +: name = 'cloud_t', numpy dtype = uint8, fields/values ={'Altocumulus': 7, 'Missing': 255, 'Stratus': 2, 'Clear': 0, 'Nimbostratus': 6, 'Cumulus': 4, 'Altostratus': 5, 'Cumulonimbus': 1, 'Stratocumulus': 3} +``` A new variable can be created in the usual way using this data type. Integer data is written to the variable that represents the named @@ -866,50 +947,50 @@ cloud types in enum_dict. A `ValueError` will be raised if an attempt is made to write an integer value not associated with one of the specified names. - :::python - >>> time = nc.createDimension('time',None) - >>> # create a 1d variable of type 'cloud_type'. - >>> # The fill_value is set to the 'Missing' named value. - >>> cloud_var = - >>> nc.createVariable('primary_cloud',cloud_type,'time', - >>> fill_value=enum_dict['Missing']) - >>> # write some data to the variable. - >>> cloud_var[:] = [enum_dict['Clear'],enum_dict['Stratus'], - >>> enum_dict['Cumulus'],enum_dict['Missing'], - >>> enum_dict['Cumulonimbus']] - >>> nc.close() - >>> # reopen the file, read the data. - >>> nc = Dataset('clouds.nc') - >>> cloud_var = nc.variables['primary_cloud'] - >>> print cloud_var - - enum primary_cloud(time) - _FillValue: 255 - enum data type: uint8 - unlimited dimensions: time - current shape = (5,) - >>> print cloud_var.datatype.enum_dict - {u'Altocumulus': 7, u'Missing': 255, u'Stratus': 2, - u'Clear': 0, u'Nimbostratus': 6, u'Cumulus': 4, - u'Altostratus': 5, u'Cumulonimbus': 1, - u'Stratocumulus': 3} - >>> print cloud_var[:] - [0 2 4 -- 1] - >>> nc.close() - -##
13) Parallel IO. - -If MPI parallel enabled versions of netcdf and hdf5 are detected, and -[mpi4py](https://mpi4py.scipy.org) is installed, netcdf4-python will -be built with parallel IO capabilities enabled. To use parallel IO, -your program must be running in an MPI environment using -[mpi4py](https://mpi4py.scipy.org). - - :::python - >>> from mpi4py import MPI - >>> import numpy as np - >>> from netCDF4 import Dataset - >>> rank = MPI.COMM_WORLD.rank # The process ID (integer 0-3 for 4-process run) +```python +>>> time = nc.createDimension('time',None) +>>> # create a 1d variable of type 'cloud_type'. +>>> # The fill_value is set to the 'Missing' named value. +>>> cloud_var = nc.createVariable('primary_cloud',cloud_type,'time', +... fill_value=enum_dict['Missing']) +>>> # write some data to the variable. +>>> cloud_var[:] = [enum_dict[k] for k in ['Clear', 'Stratus', 'Cumulus', +... 'Missing', 'Cumulonimbus']] +>>> nc.close() +>>> # reopen the file, read the data. +>>> nc = Dataset('clouds.nc') +>>> cloud_var = nc.variables['primary_cloud'] +>>> print(cloud_var) + +enum primary_cloud(time) + _FillValue: 255 +enum data type: uint8 +unlimited dimensions: time +current shape = (5,) +>>> print(cloud_var.datatype.enum_dict) +{'Altocumulus': 7, 'Missing': 255, 'Stratus': 2, 'Clear': 0, 'Nimbostratus': 6, 'Cumulus': 4, 'Altostratus': 5, 'Cumulonimbus': 1, 'Stratocumulus': 3} +>>> print(cloud_var[:]) +[0 2 4 -- 1] +>>> nc.close() +``` + +## Parallel IO + +If MPI parallel enabled versions of netcdf and hdf5 or pnetcdf are detected, +and [mpi4py](https://mpi4py.scipy.org) is installed, netcdf4-python will +be built with parallel IO capabilities enabled. Parallel IO of NETCDF4 or +NETCDF4_CLASSIC formatted files is only available if the MPI parallel HDF5 +library is available. Parallel IO of classic netcdf-3 file formats is only +available if the [PnetCDF](https://parallel-netcdf.github.io/) library is +available. To use parallel IO, your program must be running in an MPI +environment using [mpi4py](https://mpi4py.scipy.org). + +```python +>>> from mpi4py import MPI +>>> import numpy as np +>>> from netCDF4 import Dataset +>>> rank = MPI.COMM_WORLD.rank # The process ID (integer 0-3 for 4-process run) +``` To run an MPI-based parallel program like this, you must use `mpiexec` to launch several parallel instances of Python (for example, using `mpiexec -np 4 python mpi_example.py`). @@ -917,159 +998,513 @@ The parallel features of netcdf4-python are mostly transparent - when a new dataset is created or an existing dataset is opened, use the `parallel` keyword to enable parallel access. - :::python - >>> nc = Dataset('parallel_tst.nc','w',parallel=True) +```python +>>> nc = Dataset('parallel_test.nc','w',parallel=True) +``` The optional `comm` keyword may be used to specify a particular MPI communicator (`MPI_COMM_WORLD` is used by default). Each process (or rank) -can now write to the file indepedently. In this example the process rank is +can now write to the file independently. In this example the process rank is written to a different variable index on each task - :::python - >>> d = nc.createDimension('dim',4) - >>> v = nc.createVariable('var', np.int, 'dim') - >>> v[rank] = rank - >>> nc.close() +```python +>>> d = nc.createDimension('dim',4) +>>> v = nc.createVariable('var', np.int64, 'dim') +>>> v[rank] = rank +>>> nc.close() - % ncdump parallel_test.nc - netcdf parallel_test { - dimensions: - dim = 4 ; - variables: - int64 var(dim) ; - data: +% ncdump parallel_test.nc +netcdf parallel_test { +dimensions: + dim = 4 ; +variables: + int64 var(dim) ; +data: - var = 0, 1, 2, 3 ; - } + var = 0, 1, 2, 3 ; +} +``` There are two types of parallel IO, independent (the default) and collective. Independent IO means that each process can do IO independently. It should not depend on or be affected by other processes. Collective IO is a way of doing IO defined in the MPI-IO standard; unlike independent IO, all processes must participate in doing IO. To toggle back and forth between -the two types of IO, use the `netCDF4.Variable.set_collective` -`netCDF4.Variable`method. All metadata +the two types of IO, use the `Variable.set_collective` +`Variable` method. All metadata operations (such as creation of groups, types, variables, dimensions, or attributes) -are collective. There are a couple of important limitatons of parallel IO: +are collective. There are a couple of important limitations of parallel IO: + - parallel IO for NETCDF4 or NETCDF4_CLASSIC formatted files is only available + if the netcdf library was compiled with MPI enabled HDF5. + - parallel IO for all classic netcdf-3 file formats is only available if the + netcdf library was compiled with [PnetCDF](https://parallel-netcdf.github.io). - If a variable has an unlimited dimension, appending data must be done in collective mode. If the write is done in independent mode, the operation will fail with a a generic "HDF Error". - - You cannot write compressed data in parallel (although - you can read it). - - You cannot use variable-length (VLEN) data types. - -All of the code in this tutorial is available in `examples/tutorial.py`, except -the parallel IO example, which is in `examples/mpi_example.py`. -Unit tests are in the `test` directory. + - You can write compressed data in parallel only with netcdf-c >= 4.7.4 + and hdf5 >= 1.10.3 (although you can read in parallel with earlier versions). To write + compressed data in parallel, the variable must be in 'collective IO mode'. This is done + automatically on variable creation if compression is turned on, but if you are appending + to a variable in an existing file, you must use `Variable.set_collective(True)` before attempting + to write to it. + - You cannot use variable-length (VLEN) data types. + +***Important warning regarding threads:*** The underlying netcdf-c library is not thread-safe, so netcdf4-python cannot perform parallel +IO in a multi-threaded environment. Users should expect segfaults if a netcdf file is opened on multiple threads - care should +be taken to restrict netcdf4-python usage to a single thread, even when using free-threaded python. + +## Dealing with strings + +The most flexible way to store arrays of strings is with the +[Variable-length (vlen) string data type](#variable-length-vlen-data-type). However, this requires +the use of the NETCDF4 data model, and the vlen type does not map very well +numpy arrays (you have to use numpy arrays of dtype=`object`, which are arrays of +arbitrary python objects). numpy does have a fixed-width string array +data type, but unfortunately the netCDF data model does not. +Instead fixed-width byte strings are typically stored as [arrays of 8-bit +characters](https://www.unidata.ucar.edu/software/netcdf/docs/BestPractices.html#bp_Strings-and-Variables-of-type-char). +To perform the conversion to and from character arrays to fixed-width numpy string arrays, the +following convention is followed by the python interface. +If the `_Encoding` special attribute is set for a character array +(dtype `S1`) variable, the `chartostring` utility function is used to convert the array of +characters to an array of strings with one less dimension (the last dimension is +interpreted as the length of each string) when reading the data. The character +set is specified by the `_Encoding` attribute. If `_Encoding` +is 'none' or 'bytes', then the character array is converted to a numpy +fixed-width byte string array (dtype `S#`), otherwise a numpy unicode (dtype +`U#`) array is created. When writing the data, +`stringtochar` is used to convert the numpy string array to an array of +characters with one more dimension. For example, + +```python +>>> from netCDF4 import stringtochar +>>> nc = Dataset('stringtest.nc','w',format='NETCDF4_CLASSIC') +>>> _ = nc.createDimension('nchars',3) +>>> _ = nc.createDimension('nstrings',None) +>>> v = nc.createVariable('strings','S1',('nstrings','nchars')) +>>> datain = np.array(['foo','bar'],dtype='S3') +>>> v[:] = stringtochar(datain) # manual conversion to char array +>>> print(v[:]) # data returned as char array +[[b'f' b'o' b'o'] + [b'b' b'a' b'r']] +>>> v._Encoding = 'ascii' # this enables automatic conversion +>>> v[:] = datain # conversion to char array done internally +>>> print(v[:]) # data returned in numpy string array +['foo' 'bar'] +>>> nc.close() +``` + +Even if the `_Encoding` attribute is set, the automatic conversion of char +arrays to/from string arrays can be disabled with +`Variable.set_auto_chartostring`. + +A similar situation is often encountered with numpy structured arrays with subdtypes +containing fixed-wdith byte strings (dtype=`S#`). Since there is no native fixed-length string +netCDF datatype, these numpy structure arrays are mapped onto netCDF compound +types with character array elements. In this case the string <-> char array +conversion is handled automatically (without the need to set the `_Encoding` +attribute) using [numpy +views](https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.view.html). +The structured array dtype (including the string elements) can even be used to +define the compound data type - the string dtype will be converted to +character array dtype under the hood when creating the netcdf compound type. +Here's an example: -**contact**: Jeffrey Whitaker +```python +>>> nc = Dataset('compoundstring_example.nc','w') +>>> dtype = np.dtype([('observation', 'f4'), +... ('station_name','S10')]) +>>> station_data_t = nc.createCompoundType(dtype,'station_data') +>>> _ = nc.createDimension('station',None) +>>> statdat = nc.createVariable('station_obs', station_data_t, ('station',)) +>>> data = np.empty(2,dtype) +>>> data['observation'][:] = (123.,3.14) +>>> data['station_name'][:] = ('Boulder','New York') +>>> print(statdat.dtype) # strings actually stored as character arrays +{'names':['observation','station_name'], 'formats':['>> statdat[:] = data # strings converted to character arrays internally +>>> print(statdat[:]) # character arrays converted back to strings +[(123. , b'Boulder') ( 3.14, b'New York')] +>>> print(statdat[:].dtype) +{'names':['observation','station_name'], 'formats':['>> statdat.set_auto_chartostring(False) # turn off auto-conversion +>>> statdat[:] = data.view(dtype=[('observation', 'f4'),('station_name','S1',10)]) +>>> print(statdat[:]) # now structured array with char array subtype is returned +[(123. , [b'B', b'o', b'u', b'l', b'd', b'e', b'r', b'', b'', b'']) + ( 3.14, [b'N', b'e', b'w', b' ', b'Y', b'o', b'r', b'k', b'', b''])] +>>> nc.close() +``` + +Note that there is currently no support for mapping numpy structured arrays with +unicode elements (dtype `U#`) onto netCDF compound types, nor is there support +for netCDF compound types with vlen string components. + +## In-memory (diskless) Datasets + +You can create netCDF Datasets whose content is held in memory +instead of in a disk file. There are two ways to do this. If you +don't need access to the memory buffer containing the Dataset from +within python, the best way is to use the `diskless=True` keyword +argument when creating the Dataset. If you want to save the Dataset +to disk when you close it, also set `persist=True`. If you want to +create a new read-only Dataset from an existing python memory buffer, use the +`memory` keyword argument to pass the memory buffer when creating the Dataset. +If you want to create a new in-memory Dataset, and then access the memory buffer +directly from Python, use the `memory` keyword argument to specify the +estimated size of the Dataset in bytes when creating the Dataset with +`mode='w'`. Then, the `Dataset.close` method will return a python memoryview +object representing the Dataset. Below are examples illustrating both +approaches. + +```python +>>> # create a diskless (in-memory) Dataset, +>>> # and persist the file to disk when it is closed. +>>> nc = Dataset('diskless_example.nc','w',diskless=True,persist=True) +>>> d = nc.createDimension('x',None) +>>> v = nc.createVariable('v',np.int32,'x') +>>> v[0:5] = np.arange(5) +>>> print(nc) + +root group (NETCDF4 data model, file format HDF5): + dimensions(sizes): x(5) + variables(dimensions): int32 v(x) + groups: +>>> print(nc['v'][:]) +[0 1 2 3 4] +>>> nc.close() # file saved to disk +>>> # create an in-memory dataset from an existing python +>>> # python memory buffer. +>>> # read the newly created netcdf file into a python +>>> # bytes object. +>>> with open('diskless_example.nc', 'rb') as f: +... nc_bytes = f.read() +>>> # create a netCDF in-memory dataset from the bytes object. +>>> nc = Dataset('inmemory.nc', memory=nc_bytes) +>>> print(nc) + +root group (NETCDF4 data model, file format HDF5): + dimensions(sizes): x(5) + variables(dimensions): int32 v(x) + groups: +>>> print(nc['v'][:]) +[0 1 2 3 4] +>>> nc.close() +>>> # create an in-memory Dataset and retrieve memory buffer +>>> # estimated size is 1028 bytes - this is actually only +>>> # used if format is NETCDF3 +>>> # (ignored for NETCDF4/HDF5 files). +>>> nc = Dataset('inmemory.nc', mode='w',memory=1028) +>>> d = nc.createDimension('x',None) +>>> v = nc.createVariable('v',np.int32,'x') +>>> v[0:5] = np.arange(5) +>>> nc_buf = nc.close() # close returns memoryview +>>> print(type(nc_buf)) + +>>> # save nc_buf to disk, read it back in and check. +>>> with open('inmemory.nc', 'wb') as f: +... f.write(nc_buf) +>>> nc = Dataset('inmemory.nc') +>>> print(nc) + +root group (NETCDF4 data model, file format HDF5): + dimensions(sizes): x(5) + variables(dimensions): int32 v(x) + groups: +>>> print(nc['v'][:]) +[0 1 2 3 4] +>>> nc.close() +``` + +## Support for complex numbers + +Although there is no native support for complex numbers in netCDF, there are +some common conventions for storing them. Two of the most common are to either +use a compound datatype for the real and imaginary components, or a separate +dimension. `netCDF4` supports reading several of these conventions, as well as +writing using one of two conventions (depending on file format). This support +for complex numbers is enabled by setting `auto_complex=True` when opening a +`Dataset`: + +```python +>>> complex_array = np.array([0 + 0j, 1 + 0j, 0 + 1j, 1 + 1j, 0.25 + 0.75j]) +>>> with netCDF4.Dataset("complex.nc", "w", auto_complex=True) as nc: +... nc.createDimension("x", size=len(complex_array)) +... var = nc.createVariable("data", "c16", ("x",)) +... var[:] = complex_array +... print(var) + +compound data(x) +compound data type: complex128 +unlimited dimensions: +current shape = (5,) +``` + +When reading files using `auto_complex=True`, `netCDF4` will interpret variables +stored using the following conventions as complex numbers: + +- compound datatypes with two `float` or `double` members who names begin with + `r` and `i` (case insensitive) +- a dimension of length 2 named `complex` or `ri` + +When writing files using `auto_complex=True`, `netCDF4` will use: + +- a compound datatype named `_PFNC_DOUBLE_COMPLEX_TYPE` (or `*FLOAT*` as + appropriate) with members `r` and `i` for netCDF4 formats; +- or a dimension of length 2 named `_pfnc_complex` for netCDF3 or classic + formats. + +Support for complex numbers is handled via the +[`nc-complex`](https://github.com/PlasmaFAIR/nc-complex) library. See there for +further details. + + +**contact**: Jeffrey Whitaker **copyright**: 2008 by Jeffrey Whitaker. -**license**: Permission to use, copy, modify, and distribute this software and -its documentation for any purpose and without fee is hereby granted, -provided that the above copyright notice appear in all copies and that -both the copyright notice and this permission notice appear in -supporting documentation. -THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, -INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO -EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, INDIRECT OR -CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF -USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR -OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR -PERFORMANCE OF THIS SOFTWARE. -- - - +**license**: Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + """ # Make changes to this file, not the c-wrappers that Cython generates. from cpython.mem cimport PyMem_Malloc, PyMem_Free from cpython.buffer cimport PyObject_GetBuffer, PyBuffer_Release, PyBUF_SIMPLE, PyBUF_ANY_CONTIGUOUS +from cpython.bytes cimport PyBytes_FromStringAndSize # pure python utilities from .utils import (_StartCountStride, _quantize, _find_dim, _walk_grps, - _out_array_shape, _sortbylist, _tostr, _safecast) -# try to use built-in ordered dict in python >= 2.7 -try: - from collections import OrderedDict -except ImportError: # or else use drop-in substitute - try: - from ordereddict import OrderedDict - except ImportError: - raise ImportError('please install ordereddict (https://pypi.python.org/pypi/ordereddict)') -try: - from itertools import izip as zip -except ImportError: - # python3: zip is already python2's itertools.izip - pass + _out_array_shape, _sortbylist, _tostr, _safecast, _is_int) +import sys +import functools +from typing import Union -__version__ = "1.3.2" +__version__ = "1.7.4.1" # Initialize numpy import posixpath -import netcdftime +from cftime import date2num, num2date, date2index import numpy +cimport numpy import weakref -import sys import warnings +import subprocess +import pathlib +import os from glob import glob from numpy import ma from libc.string cimport memcpy, memset from libc.stdlib cimport malloc, free -import_array() -include "constants.pyx" +numpy.import_array() +include "membuf.pyx" include "netCDF4.pxi" -IF HAS_NC_PAR: - cimport mpi4py.MPI as MPI - from mpi4py.libmpi cimport MPI_Comm, MPI_Info, MPI_Comm_dup, MPI_Info_dup, \ - MPI_Comm_free, MPI_Info_free, MPI_INFO_NULL,\ - MPI_COMM_WORLD - ctypedef MPI.Comm Comm - ctypedef MPI.Info Info -ELSE: - ctypedef object Comm - ctypedef object Info -# check for required version of netcdf-4 and hdf5. +__has_rename_grp__ = HAS_RENAME_GRP +__has_nc_inq_path__ = HAS_NC_INQ_PATH +__has_nc_inq_format_extended__ = HAS_NC_INQ_FORMAT_EXTENDED +__has_cdf5_format__ = HAS_CDF5_FORMAT +__has_nc_open_mem__ = HAS_NC_OPEN_MEM +__has_nc_create_mem__ = HAS_NC_CREATE_MEM +__has_parallel4_support__ = HAS_PARALLEL4_SUPPORT +__has_pnetcdf_support__ = HAS_PNETCDF_SUPPORT +__has_parallel_support__ = HAS_PARALLEL_SUPPORT +__has_quantization_support__ = HAS_QUANTIZATION_SUPPORT +__has_zstandard_support__ = HAS_ZSTANDARD_SUPPORT +__has_bzip2_support__ = HAS_BZIP2_SUPPORT +__has_blosc_support__ = HAS_BLOSC_SUPPORT +__has_szip_support__ = HAS_SZIP_SUPPORT +__has_set_alignment__ = HAS_SET_ALIGNMENT +__has_ncfilter__ = HAS_NCFILTER +__has_nc_rc_set__ = HAS_NCRCSET + + +# set path to SSL certificates (issue #1246) +# available starting in version 4.9.1 +if __has_nc_rc_set__: + import certifi + if nc_rc_set("HTTP.SSL.CAINFO", _strencode(certifi.where())) != 0: + raise RuntimeError('error setting path to SSL certificates') + +def rc_get(key): + """ +**```rc_get(key)```** + +Returns the internal netcdf-c rc table value corresponding to key. +See +for more information on rc files and values. + """ + cdef int ierr + cdef char *keyc + cdef char *valc + if __has_nc_rc_set__: + bytestr = _strencode(_tostr(key)) + keyc = bytestr + valc = nc_rc_get(keyc) + if valc is NULL: + return None + else: + return valc.decode('utf-8') + else: + raise RuntimeError( + "This function requires netcdf-c 4.9.0+ to be used at compile time" + ) + +def rc_set(key, value): + """ +**```rc_set(key, value)```** +Sets the internal netcdf-c rc table value corresponding to key. +See +for more information on rc files and values. + """ + cdef int ierr + cdef char *keyc + cdef char *valuec + if __has_nc_rc_set__: + key_bytestr = _strencode(_tostr(key)) + keyc = key_bytestr + val_bytestr = _strencode(_tostr(value)) + valuec = val_bytestr + with nogil: + ierr = nc_rc_set(keyc,valuec) + _ensure_nc_success(ierr) + else: + raise RuntimeError( + "This function requires netcdf-c 4.9.0+ to be used at compile time" + ) + + + +# check for required version of netcdf-4 and hdf5. def _gethdf5libversion(): cdef unsigned int majorvers, minorvers, releasevers cdef herr_t ierr - ierr = H5get_libversion( &majorvers, &minorvers, &releasevers) - if ierr < 0: + with nogil: + ierr = H5get_libversion( &majorvers, &minorvers, &releasevers) + if ierr != 0: raise RuntimeError('error getting HDF5 library version info') return '%d.%d.%d' % (majorvers,minorvers,releasevers) def getlibversion(): """ -**`getlibversion()`** +**```getlibversion()```** returns a string describing the version of the netcdf library used to build the module, and when it was built. """ return (nc_inq_libvers()).decode('ascii') +def get_chunk_cache(): + """ +**```get_chunk_cache()```** + +return current netCDF chunk cache information in a tuple (size,nelems,preemption). +See netcdf C library documentation for `nc_get_chunk_cache` for +details. Values can be reset with `set_chunk_cache`.""" + cdef int ierr + cdef size_t sizep, nelemsp + cdef float preemptionp + with nogil: + ierr = nc_get_chunk_cache(&sizep, &nelemsp, &preemptionp) + _ensure_nc_success(ierr) + size = sizep; nelems = nelemsp; preemption = preemptionp + return (size,nelems,preemption) + +def set_chunk_cache(size=None,nelems=None,preemption=None): + """ +**```set_chunk_cache(size=None,nelems=None,preemption=None)```** + +change netCDF4 chunk cache settings. +See netcdf C library documentation for `nc_set_chunk_cache` for +details.""" + cdef int ierr + cdef size_t sizep, nelemsp + cdef float preemptionp + # reset chunk cache size, leave other parameters unchanged. + size_orig, nelems_orig, preemption_orig = get_chunk_cache() + if size is not None: + sizep = size + else: + sizep = size_orig + if nelems is not None: + nelemsp = nelems + else: + nelemsp = nelems_orig + if preemption is not None: + preemptionp = preemption + else: + preemptionp = preemption_orig + with nogil: + ierr = nc_set_chunk_cache(sizep,nelemsp, preemptionp) + _ensure_nc_success(ierr) + + +def get_alignment(): + """**```get_alignment()```** + + return current netCDF alignment within HDF5 files in a tuple + (threshold,alignment). See netcdf C library documentation for + `nc_get_alignment` for details. Values can be reset with + `set_alignment`. + + This function was added in netcdf 4.9.0.""" + + if not __has_set_alignment__: + raise RuntimeError( + "This function requires netcdf-c 4.9.0+ to be used at compile time" + ) + + cdef int ierr + cdef int thresholdp, alignmentp + ierr = nc_get_alignment(&thresholdp, &alignmentp) + _ensure_nc_success(ierr) + threshold = thresholdp + alignment = alignmentp + return (threshold, alignment) + + +def set_alignment(threshold, alignment): + """**```set_alignment(threshold,alignment)```** + + Change the HDF5 file alignment. + See netcdf C library documentation for `nc_set_alignment` for + details. + + This function was added in netcdf 4.9.0.""" + + if not __has_set_alignment__: + raise RuntimeError( + "This function requires netcdf-c 4.9.0+ to be used at compile time" + ) + + cdef int ierr + cdef int thresholdp, alignmentp + thresholdp = threshold + alignmentp = alignment + + ierr = nc_set_alignment(thresholdp, alignmentp) + _ensure_nc_success(ierr) + + __netcdf4libversion__ = getlibversion().split()[0] __hdf5libversion__ = _gethdf5libversion() -__has_rename_grp__ = HAS_RENAME_GRP -__has_nc_inq_path__ = HAS_NC_INQ_PATH -__has_nc_inq_format_extended__ = HAS_NC_INQ_FORMAT_EXTENDED -__has_cdf5_format__ = HAS_CDF5_FORMAT -__has_nc_open_mem__ = HAS_NC_OPEN_MEM -__has_nc_par__ = HAS_NC_PAR _needsworkaround_issue485 = __netcdf4libversion__ < "4.4.0" or \ (__netcdf4libversion__.startswith("4.4.0") and \ "-development" in __netcdf4libversion__) -# issue warning for hdf5 1.10 (issue #549) -if __netcdf4libversion__[0:5] < "4.4.1" and\ - __hdf5libversion__.startswith("1.10"): - msg = """ -WARNING: Backwards incompatible files will be created with HDF5 1.10.x -and netCDF < 4.4.1. Upgrading to netCDF4 >= 4.4.1 or downgrading to -to HDF5 version 1.8.x is highly recommended -(see https://github.com/Unidata/netcdf-c/issues/250).""" - warnings.warn(msg) +class NetCDF4MissingFeatureException(Exception): + """Custom exception when trying to use features missing from the linked netCDF library""" + def __init__(self, feature: str, version: str): + super().__init__( + f"{feature} requires netCDF lib >= {version} (using {__netcdf4libversion__}). " + f"To enable, rebuild netcdf4-python using netCDF {version} or higher " + f"(and possibly enable {feature})" + ) + # numpy data type <--> netCDF 4 data type mapping. _nptonctype = {'S1' : NC_CHAR, @@ -1094,26 +1529,57 @@ _intnptonctype = {'i1' : NC_BYTE, 'i8' : NC_INT64, 'u8' : NC_UINT64} +_complex_types = { + "c16": PFNC_DOUBLE_COMPLEX, + "c8": PFNC_FLOAT_COMPLEX, +} + # create dictionary mapping string identifiers to netcdf format codes _format_dict = {'NETCDF3_CLASSIC' : NC_FORMAT_CLASSIC, 'NETCDF4_CLASSIC' : NC_FORMAT_NETCDF4_CLASSIC, 'NETCDF4' : NC_FORMAT_NETCDF4} -IF HAS_CDF5_FORMAT: +# create dictionary mapping string identifiers to netcdf create format codes +_cmode_dict = {'NETCDF3_CLASSIC' : NC_CLASSIC_MODEL, + 'NETCDF4_CLASSIC' : NC_CLASSIC_MODEL | NC_NETCDF4, + 'NETCDF4' : NC_NETCDF4} +# dicts for blosc, szip compressors. +_blosc_dict={'blosc_lz':0,'blosc_lz4':1,'blosc_lz4hc':2,'blosc_snappy':3,'blosc_zlib':4,'blosc_zstd':5} +_blosc_dict_inv = {v: k for k, v in _blosc_dict.items()} +_szip_dict = {'ec': 4, 'nn': 32} +_szip_dict_inv = {v: k for k, v in _szip_dict.items()} +if __has_cdf5_format__: # NETCDF3_64BIT deprecated, saved for compatibility. # use NETCDF3_64BIT_OFFSET instead. _format_dict['NETCDF3_64BIT_OFFSET'] = NC_FORMAT_64BIT_OFFSET _format_dict['NETCDF3_64BIT_DATA'] = NC_FORMAT_64BIT_DATA -ELSE: + _cmode_dict['NETCDF3_64BIT_OFFSET'] = NC_64BIT_OFFSET + _cmode_dict['NETCDF3_64BIT_DATA'] = NC_64BIT_DATA +else: _format_dict['NETCDF3_64BIT'] = NC_FORMAT_64BIT + _cmode_dict['NETCDF3_64BIT'] = NC_64BIT_OFFSET # invert dictionary mapping _reverse_format_dict = dict((v, k) for k, v in _format_dict.iteritems()) # add duplicate entry (NETCDF3_64BIT == NETCDF3_64BIT_OFFSET) -IF HAS_CDF5_FORMAT: +if __has_cdf5_format__: _format_dict['NETCDF3_64BIT'] = NC_FORMAT_64BIT_OFFSET -ELSE: + _cmode_dict['NETCDF3_64BIT'] = NC_64BIT_OFFSET +else: _format_dict['NETCDF3_64BIT_OFFSET'] = NC_FORMAT_64BIT - -# default fill_value to numpy datatype mapping. + _cmode_dict['NETCDF3_64BIT_OFFSET'] = NC_64BIT_OFFSET + +_parallel_formats = [] +if __has_parallel4_support__: + _parallel_formats += ['NETCDF4', 'NETCDF4_CLASSIC'] +if __has_pnetcdf_support__: + _parallel_formats += [ + 'NETCDF3_CLASSIC', + 'NETCDF3_64BIT_OFFSET', + 'NETCDF3_64BIT_DATA', + 'NETCDF3_64BIT' + ] + +# Default fill_value to numpy datatype mapping. Last two for complex +# numbers only applies to complex dimensions default_fillvals = {#'S1':NC_FILL_CHAR, 'S1':'\0', 'i1':NC_FILL_BYTE, @@ -1125,24 +1591,28 @@ default_fillvals = {#'S1':NC_FILL_CHAR, 'i8':NC_FILL_INT64, 'u8':NC_FILL_UINT64, 'f4':NC_FILL_FLOAT, - 'f8':NC_FILL_DOUBLE} + 'f8':NC_FILL_DOUBLE, + 'c8':NC_FILL_FLOAT, + 'c16':NC_FILL_DOUBLE, +} # logical for native endian type. -is_native_little = numpy.dtype('f4').byteorder == '=' +is_native_little = numpy.dtype('f4').byteorder == c'=' # hard code these here, instead of importing from netcdf.h # so it will compile with versions <= 4.2. NC_DISKLESS = 0x0008 +# introduced in 4.6.2 +if __netcdf4libversion__[0:5] >= "4.6.2": + NC_PERSIST = 0x4000 +else: # prior to 4.6.2 this flag doesn't work, so make the same as NC_DISKLESS + NC_PERSIST = NC_DISKLESS # next two lines do nothing, preserved for backwards compatibility. -default_encoding = 'utf-8' +default_encoding = 'utf-8' unicode_error = 'replace' -python3 = sys.version_info[0] > 2 -if python3: - buffer = memoryview - _nctonptype = {} for _key,_value in _nptonctype.items(): _nctonptype[_value] = _key @@ -1150,6 +1620,15 @@ _supportedtypes = _nptonctype.keys() # make sure NC_CHAR points to S1 _nctonptype[NC_CHAR]='S1' +# Mapping from numpy dtype endian format to what we expect +_dtype_endian_lookup = { + "=": "native", + ">": "big", + "<": "little", + "|": None, + None: None, +} + # internal C functions. cdef _get_att_names(int grpid, int varid): @@ -1164,7 +1643,7 @@ cdef _get_att_names(int grpid, int varid): ierr = nc_inq_varnatts(grpid, varid, &numatts) _ensure_nc_success(ierr, err_cls=AttributeError) attslist = [] - for n from 0 <= n < numatts: + for n in range(numatts): with nogil: ierr = nc_inq_attname(grpid, varid, n, namstring) _ensure_nc_success(ierr, err_cls=AttributeError) @@ -1190,15 +1669,16 @@ cdef _get_att(grp, int varid, name, encoding='utf-8'): if att_type == NC_CHAR: value_arr = numpy.empty(att_len,'S1') with nogil: - ierr = nc_get_att_text(_grpid, varid, attname, value_arr.data) + ierr = nc_get_att_text(_grpid, varid, attname, + PyArray_BYTES(value_arr)) _ensure_nc_success(ierr, err_cls=AttributeError) - if name == '_FillValue' and python3: + if name == '_FillValue': # make sure _FillValue for character arrays is a byte on python 3 # (issue 271). - pstring = value_arr.tostring() + pstring = value_arr.tobytes() else: pstring =\ - value_arr.tostring().decode(encoding,errors='replace').replace('\x00','') + value_arr.tobytes().decode(encoding,errors='replace').replace('\x00','') return pstring elif att_type == NC_STRING: values = PyMem_Malloc(sizeof(char*) * att_len) @@ -1210,9 +1690,10 @@ cdef _get_att(grp, int varid, name, encoding='utf-8'): _ensure_nc_success(ierr, err_cls=AttributeError) try: result = [values[j].decode(encoding,errors='replace').replace('\x00','') - for j in range(att_len)] + if values[j] else "" for j in range(att_len)] finally: - ierr = nc_free_string(att_len, values) # free memory in netcdf C lib + with nogil: + ierr = nc_free_string(att_len, values) # free memory in netcdf C lib finally: PyMem_Free(values) @@ -1231,7 +1712,7 @@ cdef _get_att(grp, int varid, name, encoding='utf-8'): # check if it's a compound try: type_att = _read_compound(grp, att_type) - value_arr = numpy.empty(att_len,type_att) + value_arr = numpy.empty(att_len,type_att.dtype_view) except: # check if it's an enum try: @@ -1240,7 +1721,7 @@ cdef _get_att(grp, int varid, name, encoding='utf-8'): except: raise KeyError('attribute %s has unsupported datatype' % attname) with nogil: - ierr = nc_get_att(_grpid, varid, attname, value_arr.data) + ierr = nc_get_att(_grpid, varid, attname, PyArray_BYTES(value_arr)) _ensure_nc_success(ierr, err_cls=AttributeError) if value_arr.shape == (): # return a scalar for a scalar array @@ -1253,9 +1734,13 @@ cdef _get_att(grp, int varid, name, encoding='utf-8'): def _set_default_format(object format='NETCDF4'): # Private function to set the netCDF file format + cdef int ierr, formatid if format not in _format_dict: raise ValueError("unrecognized format requested") - nc_set_default_format(_format_dict[format], NULL) + formatid = _format_dict[format] + with nogil: + ierr = nc_set_default_format(formatid, NULL) + _ensure_nc_success(ierr) cdef _get_format(int grpid): # Private function to get the netCDF file format @@ -1267,30 +1752,33 @@ cdef _get_format(int grpid): raise ValueError('format not supported by python interface') return _reverse_format_dict[formatp] + cdef _get_full_format(int grpid): - # Private function to get the underlying disk format + """Private function to get the underlying disk format""" + + if not __has_nc_inq_format_extended__: + return "UNDEFINED" + cdef int ierr, formatp, modep - IF HAS_NC_INQ_FORMAT_EXTENDED: - with nogil: - ierr = nc_inq_format_extended(grpid, &formatp, &modep) - _ensure_nc_success(ierr) - if formatp == NC_FORMAT_NC3: - return 'NETCDF3' - elif formatp == NC_FORMAT_NC_HDF5: - return 'HDF5' - elif formatp == NC_FORMAT_NC_HDF4: - return 'HDF4' - elif formatp == NC_FORMAT_PNETCDF: - return 'PNETCDF' - elif formatp == NC_FORMAT_DAP2: - return 'DAP2' - elif formatp == NC_FORMAT_DAP4: - return 'DAP4' - elif formatp == NC_FORMAT_UNDEFINED: - return 'UNDEFINED' - ELSE: + with nogil: + ierr = nc_inq_format_extended(grpid, &formatp, &modep) + _ensure_nc_success(ierr) + if formatp == NC_FORMAT_NC3: + return 'NETCDF3' + if formatp == NC_FORMAT_NC_HDF5: + return 'HDF5' + if formatp == NC_FORMAT_NC_HDF4: + return 'HDF4' + if formatp == NC_FORMAT_PNETCDF: + return 'PNETCDF' + if formatp == NC_FORMAT_DAP2: + return 'DAP2' + if formatp == NC_FORMAT_DAP4: + return 'DAP4' + if formatp == NC_FORMAT_UNDEFINED: return 'UNDEFINED' + cdef issue485_workaround(int grpid, int varid, char* attname): # check to see if attribute already exists # and is NC_CHAR, if so delete it and re-create it @@ -1302,37 +1790,51 @@ cdef issue485_workaround(int grpid, int varid, char* attname): if not _needsworkaround_issue485: return - ierr = nc_inq_att(grpid, varid, attname, &att_type, &att_len) + with nogil: + ierr = nc_inq_att(grpid, varid, attname, &att_type, &att_len) if ierr == NC_NOERR and att_type == NC_CHAR: - ierr = nc_del_att(grpid, varid, attname) + with nogil: + ierr = nc_del_att(grpid, varid, attname) _ensure_nc_success(ierr) cdef _set_att(grp, int varid, name, value,\ nc_type xtype=-99, force_ncstring=False): # Private function to set an attribute name/value pair - cdef int ierr, lenarr + cdef int ierr, lenarr, N, grpid cdef char *attname cdef char *datstring cdef char **string_ptrs cdef ndarray value_arr bytestr = _strencode(name) attname = bytestr + grpid = grp._grpid # put attribute value into a numpy array. value_arr = numpy.array(value) + if value_arr.ndim > 1: # issue #841 + if __version__ > "1.4.2": + raise ValueError('multi-dimensional array attributes not supported') + else: + msg = """ +Multi-dimensional array attributes are now deprecated. +Instead of silently flattening the array, an error will +be raised in the next release.""" + warnings.warn(msg,FutureWarning) # if array is 64 bit integers or # if 64-bit datatype not supported, cast to 32 bit integers. - fmt = _get_format(grp._grpid) + fmt = _get_format(grpid) is_netcdf3 = fmt.startswith('NETCDF3') or fmt == 'NETCDF4_CLASSIC' if value_arr.dtype.str[1:] == 'i8' and ('i8' not in _supportedtypes or\ - is_netcdf3): + (is_netcdf3 and fmt != 'NETCDF3_64BIT_DATA')): value_arr = value_arr.astype('i4') # if array contains ascii strings, write a text attribute (stored as bytes). - # if array contains unicode strings, and data model is NETCDF4, + # if array contains unicode strings, and data model is NETCDF4, # write as a string. if value_arr.dtype.char in ['S','U']: - if not is_netcdf3 and force_ncstring and value_arr.size > 1: - N = value_arr.size + # force array of strings if array has multiple elements (issue #770) + N = value_arr.size + if N > 1: force_ncstring=True + if not is_netcdf3 and force_ncstring and N > 1: string_ptrs = PyMem_Malloc(N * sizeof(char*)) if not string_ptrs: raise MemoryError() @@ -1342,11 +1844,16 @@ cdef _set_att(grp, int varid, name, value,\ if len(strings[j]) == 0: strings[j] = _strencode('\x00') string_ptrs[j] = strings[j] - issue485_workaround(grp._grpid, varid, attname) - ierr = nc_put_att_string(grp._grpid, varid, attname, N, string_ptrs) + issue485_workaround(grpid, varid, attname) + with nogil: + ierr = nc_put_att_string(grpid, varid, attname, N, string_ptrs) finally: PyMem_Free(string_ptrs) else: + # don't allow string array attributes in NETCDF3 files. + if is_netcdf3 and N > 1: + msg='array string attributes can only be written with NETCDF4' + raise OSError(msg) if not value_arr.shape: dats = _strencode(value_arr.item()) else: @@ -1363,29 +1870,34 @@ cdef _set_att(grp, int varid, name, value,\ try: if force_ncstring: raise UnicodeError dats_ascii = _to_ascii(dats) # try to encode bytes as ascii string - ierr = nc_put_att_text(grp._grpid, varid, attname, lenarr, datstring) + with nogil: + ierr = nc_put_att_text(grpid, varid, attname, lenarr, datstring) except UnicodeError: - issue485_workaround(grp._grpid, varid, attname) - ierr = nc_put_att_string(grp._grpid, varid, attname, 1, &datstring) + issue485_workaround(grpid, varid, attname) + with nogil: + ierr = nc_put_att_string(grpid, varid, attname, 1, &datstring) else: - ierr = nc_put_att_text(grp._grpid, varid, attname, lenarr, datstring) + with nogil: + ierr = nc_put_att_text(grpid, varid, attname, lenarr, datstring) _ensure_nc_success(ierr, err_cls=AttributeError) # a 'regular' array type ('f4','i4','f8' etc) else: if value_arr.dtype.kind == 'V': # compound attribute. xtype = _find_cmptype(grp,value_arr.dtype) elif value_arr.dtype.str[1:] not in _supportedtypes: - raise TypeError, 'illegal data type for attribute, must be one of %s, got %s' % (_supportedtypes, value_arr.dtype.str[1:]) + raise TypeError, 'illegal data type for attribute %r, must be one of %s, got %s' % (attname, _supportedtypes, value_arr.dtype.str[1:]) elif xtype == -99: # if xtype is not passed in as kwarg. xtype = _nptonctype[value_arr.dtype.str[1:]] lenarr = PyArray_SIZE(value_arr) - ierr = nc_put_att(grp._grpid, varid, attname, xtype, lenarr, value_arr.data) + with nogil: + ierr = nc_put_att(grpid, varid, attname, xtype, lenarr, + PyArray_DATA(value_arr)) _ensure_nc_success(ierr, err_cls=AttributeError) cdef _get_types(group): - # Private function to create `netCDF4.CompoundType`, - # `netCDF4.VLType` or `netCDF4.EnumType` instances for all the - # compound, VLEN or Enum types in a `netCDF4.Group` or `netCDF4.Dataset`. + # Private function to create `CompoundType`, + # `VLType` or `EnumType` instances for all the + # compound, VLEN or Enum types in a `Group` or `Dataset`. cdef int ierr, ntypes, classp, n, _grpid cdef nc_type xtype cdef nc_type *typeids @@ -1401,11 +1913,12 @@ cdef _get_types(group): ierr = nc_inq_typeids(_grpid, &ntypes, typeids) _ensure_nc_success(ierr) # create empty dictionary for CompoundType instances. - cmptypes = OrderedDict() - vltypes = OrderedDict() - enumtypes = OrderedDict() + cmptypes = dict() + vltypes = dict() + enumtypes = dict() + if ntypes > 0: - for n from 0 <= n < ntypes: + for n in range(ntypes): xtype = typeids[n] with nogil: ierr = nc_inq_user_type(_grpid, xtype, namstring, @@ -1448,8 +1961,8 @@ cdef _get_types(group): return cmptypes, vltypes, enumtypes cdef _get_dims(group): - # Private function to create `netCDF4.Dimension` instances for all the - # dimensions in a `netCDF4.Group` or Dataset + # Private function to create `Dimension` instances for all the + # dimensions in a `Group` or Dataset cdef int ierr, numdims, n, _grpid cdef int *dimids cdef char namstring[NC_MAX_NAME+1] @@ -1459,7 +1972,7 @@ cdef _get_dims(group): ierr = nc_inq_ndims(_grpid, &numdims) _ensure_nc_success(ierr) # create empty dictionary for dimensions. - dimensions = OrderedDict() + dimensions = dict() if numdims > 0: dimids = malloc(sizeof(int) * numdims) if group.data_model == 'NETCDF4': @@ -1467,9 +1980,9 @@ cdef _get_dims(group): ierr = nc_inq_dimids(_grpid, &numdims, dimids, 0) _ensure_nc_success(ierr) else: - for n from 0 <= n < numdims: + for n in range(numdims): dimids[n] = n - for n from 0 <= n < numdims: + for n in range(numdims): with nogil: ierr = nc_inq_dimname(_grpid, dimids[n], namstring) _ensure_nc_success(ierr) @@ -1479,8 +1992,8 @@ cdef _get_dims(group): return dimensions cdef _get_grps(group): - # Private function to create `netCDF4.Group` instances for all the - # groups in a `netCDF4.Group` or Dataset + # Private function to create `Group` instances for all the + # groups in a `Group` or Dataset cdef int ierr, numgrps, n, _grpid cdef int *grpids cdef char namstring[NC_MAX_NAME+1] @@ -1489,14 +2002,14 @@ cdef _get_grps(group): with nogil: ierr = nc_inq_grps(_grpid, &numgrps, NULL) _ensure_nc_success(ierr) - # create dictionary containing `netCDF4.Group` instances for groups in this group - groups = OrderedDict() + # create dictionary containing `Group` instances for groups in this group + groups = dict() if numgrps > 0: grpids = malloc(sizeof(int) * numgrps) with nogil: ierr = nc_inq_grps(_grpid, NULL, grpids) _ensure_nc_success(ierr) - for n from 0 <= n < numgrps: + for n in range(numgrps): with nogil: ierr = nc_inq_grpname(grpids[n], namstring) _ensure_nc_success(ierr) @@ -1505,22 +2018,24 @@ cdef _get_grps(group): free(grpids) return groups -cdef _get_vars(group): - # Private function to create `netCDF4.Variable` instances for all the - # variables in a `netCDF4.Group` or Dataset +cdef _get_vars(group, bint auto_complex=False): + # Private function to create `Variable` instances for all the + # variables in a `Group` or Dataset cdef int ierr, numvars, n, nn, numdims, varid, classp, iendian, _grpid cdef int *varids - cdef int *dimids cdef nc_type xtype cdef char namstring[NC_MAX_NAME+1] cdef char namstring_cmp[NC_MAX_NAME+1] + cdef bint is_complex + cdef nc_type complex_nc_type + # get number of variables in this Group. _grpid = group._grpid with nogil: ierr = nc_inq_nvars(_grpid, &numvars) _ensure_nc_success(ierr, err_cls=AttributeError) # create empty dictionary for variables. - variables = OrderedDict() + variables = dict() if numvars > 0: # get variable ids. varids = malloc(sizeof(int) * numvars) @@ -1529,10 +2044,10 @@ cdef _get_vars(group): ierr = nc_inq_varids(_grpid, &numvars, varids) _ensure_nc_success(ierr) else: - for n from 0 <= n < numvars: + for n in range(numvars): varids[n] = n # loop over variables. - for n from 0 <= n < numvars: + for n in range(numvars): varid = varids[n] # get variable name. with nogil: @@ -1547,10 +2062,11 @@ cdef _get_vars(group): endianness = None with nogil: ierr = nc_inq_var_endian(_grpid, varid, &iendian) - if ierr == NC_NOERR and iendian == NC_ENDIAN_LITTLE: - endianness = '<' - elif iendian == NC_ENDIAN_BIG: - endianness = '>' + if ierr == NC_NOERR: + if iendian == NC_ENDIAN_LITTLE: + endianness = '<' + elif iendian == NC_ENDIAN_BIG: + endianness = '>' # check to see if it is a supported user-defined type. try: datatype = _nctonptype[xtype] @@ -1592,104 +2108,159 @@ cdef _get_vars(group): msg="WARNING: variable '%s' has unsupported datatype, skipping .." % name warnings.warn(msg) continue + # get number of dimensions. - with nogil: - ierr = nc_inq_varndims(_grpid, varid, &numdims) - _ensure_nc_success(ierr) - dimids = malloc(sizeof(int) * numdims) - # get dimension ids. - with nogil: - ierr = nc_inq_vardimid(_grpid, varid, dimids) - _ensure_nc_success(ierr) + dimids = _inq_vardimid(_grpid, varid, auto_complex) + # loop over dimensions, retrieve names. # if not found in current group, look in parents. # QUESTION: what if grp1 has a dimension named 'foo' # and so does it's parent - can a variable in grp1 # use the 'foo' dimension from the parent? dimensions = [] - for nn from 0 <= nn < numdims: + for dimid in dimids: grp = group found = False while not found: for key, value in grp.dimensions.items(): - if value._dimid == dimids[nn]: + if value._dimid == dimid: dimensions.append(key) found = True break grp = grp.parent - free(dimids) + # create new variable instance. - if endianness == '>': - variables[name] = Variable(group, name, datatype, dimensions, id=varid, endian='big') - elif endianness == '<': - variables[name] = Variable(group, name, datatype, dimensions, id=varid, endian='little') - else: - variables[name] = Variable(group, name, datatype, dimensions, id=varid) + dimensions = tuple(_find_dim(group, d) for d in dimensions) + + if auto_complex and pfnc_var_is_complex(_grpid, varid): + with nogil: + ierr = pfnc_inq_var_complex_base_type(_grpid, varid, &complex_nc_type) + _ensure_nc_success(ierr) + # TODO: proper lookup + datatype = "c16" if complex_nc_type == NC_DOUBLE else "c8" + + endian = _dtype_endian_lookup[endianness] or "native" + variables[name] = Variable(group, name, datatype, dimensions, id=varid, endian=endian) + free(varids) # free pointer holding variable ids. return variables -cdef _ensure_nc_success(ierr, err_cls=RuntimeError, filename=None): + +def _ensure_nc_success(ierr, err_cls=RuntimeError, filename=None, extra_msg=None): # print netcdf error message, raise error. - if ierr != NC_NOERR: - err_str = (nc_strerror(ierr)).decode('ascii') - if issubclass(err_cls, EnvironmentError): - raise err_cls(ierr, err_str, filename) - else: - raise err_cls(err_str) + if ierr == NC_NOERR: + return + + err_str = (nc_strerror(ierr)).decode('ascii') + if issubclass(err_cls, OSError): + if isinstance(filename, bytes): + filename = filename.decode() + raise err_cls(ierr, err_str, filename) + + if extra_msg: + if isinstance(extra_msg, bytes): + extra_msg = extra_msg.decode() + err_str = f"{err_str}: {extra_msg}" + raise err_cls(err_str) + + +def dtype_is_complex(dtype): + """Return True if dtype is a complex number""" + return dtype in ("c8", "c16") + + +cdef int _inq_varndims(int ncid, int varid, bint auto_complex): + """Wrapper around `nc_inq_varndims`/`pfnc_inq_varndims` for complex numbers""" + + cdef int ierr = NC_NOERR + cdef int ndims + + if auto_complex: + with nogil: + ierr = pfnc_inq_varndims(ncid, varid, &ndims) + else: + with nogil: + ierr = nc_inq_varndims(ncid, varid, &ndims) + + _ensure_nc_success(ierr) + return ndims + + +cdef _inq_vardimid(int ncid, int varid, bint auto_complex): + """Wrapper around `nc_inq_vardimid`/`pfnc_inq_vardimid` for complex numbers""" + + cdef int ierr = NC_NOERR + cdef int ndims = _inq_varndims(ncid, varid, auto_complex) + cdef int* dimids = malloc(sizeof(int) * ndims) + + if auto_complex: + with nogil: + ierr = pfnc_inq_vardimid(ncid, varid, dimids) + else: + with nogil: + ierr = nc_inq_vardimid(ncid, varid, dimids) + + _ensure_nc_success(ierr) + + result = [dimids[n] for n in range(ndims)] + free(dimids) + return result + # these are class attributes that # only exist at the python level (not in the netCDF file). _private_atts = \ -['_grpid','_grp','_varid','groups','dimensions','variables','dtype','data_model','disk_format', +('_grpid','_grp','_varid','groups','dimensions','variables','dtype','data_model','disk_format', '_nunlimdim','path','parent','ndim','mask','scale','cmptypes','vltypes','enumtypes','_isprimitive', 'file_format','_isvlen','_isenum','_iscompound','_cmptype','_vltype','_enumtype','name', - '__orthogoral_indexing__','keepweakref','_has_lsd', - '_buffer','chartostring','_no_get_vars'] -__pdoc__ = {} + '__orthogoral_indexing__','keepweakref','_has_lsd','always_mask', + '_buffer','chartostring','_use_get_vars','_ncstring_attrs__', + 'auto_complex' +) cdef class Dataset: """ -A netCDF `netCDF4.Dataset` is a collection of dimensions, groups, variables and +A netCDF `Dataset` is a collection of dimensions, groups, variables and attributes. Together they describe the meaning of data and relations among -data fields stored in a netCDF file. See `netCDF4.Dataset.__init__` for more +data fields stored in a netCDF file. See `Dataset.__init__` for more details. A list of attribute names corresponding to global netCDF attributes -defined for the `netCDF4.Dataset` can be obtained with the -`netCDF4.Dataset.ncattrs` method. +defined for the `Dataset` can be obtained with the +`Dataset.ncattrs` method. These attributes can be created by assigning to an attribute of the -`netCDF4.Dataset` instance. A dictionary containing all the netCDF attribute +`Dataset` instance. A dictionary containing all the netCDF attribute name/value pairs is provided by the `__dict__` attribute of a -`netCDF4.Dataset` instance. +`Dataset` instance. The following class variables are read-only and should not be modified by the user. **`dimensions`**: The `dimensions` dictionary maps the names of -dimensions defined for the `netCDF4.Group` or `netCDF4.Dataset` to instances of the -`netCDF4.Dimension` class. +dimensions defined for the `Group` or `Dataset` to instances of the +`Dimension` class. **`variables`**: The `variables` dictionary maps the names of variables -defined for this `netCDF4.Dataset` or `netCDF4.Group` to instances of the -`netCDF4.Variable` class. +defined for this `Dataset` or `Group` to instances of the +`Variable` class. **`groups`**: The groups dictionary maps the names of groups created for -this `netCDF4.Dataset` or `netCDF4.Group` to instances of the `netCDF4.Group` class (the -`netCDF4.Dataset` class is simply a special case of the `netCDF4.Group` class which +this `Dataset` or `Group` to instances of the `Group` class (the +`Dataset` class is simply a special case of the `Group` class which describes the root group in the netCDF4 file). **`cmptypes`**: The `cmptypes` dictionary maps the names of -compound types defined for the `netCDF4.Group` or `netCDF4.Dataset` to instances of the -`netCDF4.CompoundType` class. +compound types defined for the `Group` or `Dataset` to instances of the +`CompoundType` class. **`vltypes`**: The `vltypes` dictionary maps the names of -variable-length types defined for the `netCDF4.Group` or `netCDF4.Dataset` to instances -of the `netCDF4.VLType` class. +variable-length types defined for the `Group` or `Dataset` to instances +of the `VLType` class. **`enumtypes`**: The `enumtypes` dictionary maps the names of -Enum types defined for the `netCDF4.Group` or `netCDF4.Dataset` to instances -of the `netCDF4.EnumType` class. +Enum types defined for the `Group` or `Dataset` to instances +of the `EnumType` class. **`data_model`**: `data_model` describes the netCDF data model version, one of `NETCDF3_CLASSIC`, `NETCDF4`, @@ -1704,106 +2275,64 @@ netcdf C library version >= 4.3.1, otherwise will always return `UNDEFINED`. **`parent`**: `parent` is a reference to the parent -`netCDF4.Group` instance. `None` for the root group or `netCDF4.Dataset` +`Group` instance. `None` for the root group or `Dataset` instance. -**`path`**: `path` shows the location of the `netCDF4.Group` in -the `netCDF4.Dataset` in a unix directory format (the names of groups in the -hierarchy separated by backslashes). A `netCDF4.Dataset` instance is the root +**`path`**: `path` shows the location of the `Group` in +the `Dataset` in a unix directory format (the names of groups in the +hierarchy separated by backslashes). A `Dataset` instance is the root group, so the path is simply `'/'`. -**`keepweakref`**: If `True`, child Dimension and Variables objects only keep weak +**`keepweakref`**: If `True`, child Dimension and Variables objects only keep weak references to the parent Dataset or Group. + +**`_ncstring_attrs__`**: If `True`, all text attributes will be written as variable-length +strings. """ - cdef object __weakref__ + cdef object __weakref__, _inmemory cdef public int _grpid cdef public int _isopen cdef Py_buffer _buffer cdef public groups, dimensions, variables, disk_format, path, parent,\ file_format, data_model, cmptypes, vltypes, enumtypes, __orthogonal_indexing__, \ - keepweakref - # Docstrings for class variables (used by pdoc). - __pdoc__['Dataset.dimensions']=\ - """The `dimensions` dictionary maps the names of - dimensions defined for the `netCDF4.Group` or `netCDF4.Dataset` to instances of the - `netCDF4.Dimension` class.""" - __pdoc__['Dataset.variables']=\ - """The `variables` dictionary maps the names of variables - defined for this `netCDF4.Dataset` or `netCDF4.Group` to instances of the `netCDF4.Variable` - class.""" - __pdoc__['Dataset.groups']=\ - """The groups dictionary maps the names of groups created for - this `netCDF4.Dataset` or `netCDF4.Group` to instances of the `netCDF4.Group` class (the - `netCDF4.Dataset` class is simply a special case of the `netCDF4.Group` class which - describes the root group in the netCDF4 file).""" - __pdoc__['Dataset.cmptypes']=\ - """The `cmptypes` dictionary maps the names of - compound types defined for the `netCDF4.Group` or `netCDF4.Dataset` to instances of the - `netCDF4.CompoundType` class.""" - __pdoc__['Dataset.vltypes']=\ - """The `vltypes` dictionary maps the names of - variable-length types defined for the `netCDF4.Group` or `netCDF4.Dataset` to instances of the - `netCDF4.VLType` class.""" - __pdoc__['Dataset.enumtypes']=\ - """The `enumtypes` dictionary maps the names of - Enum types defined for the `netCDF4.Group` or `netCDF4.Dataset` to instances of the - `netCDF4.EnumType` class.""" - __pdoc__['Dataset.data_model']=\ - """`data_model` describes the netCDF - data model version, one of `NETCDF3_CLASSIC`, `NETCDF4`, - `NETCDF4_CLASSIC`, `NETCDF3_64BIT_OFFSET` or `NETCDF3_64BIT_DATA`.""" - __pdoc__['Dataset.file_format']=\ - """same as `data_model`, retained for backwards compatibility.""" - __pdoc__['Dataset.disk_format']=\ - """`disk_format` describes the underlying - file format, one of `NETCDF3`, `HDF5`, `HDF4`, - `PNETCDF`, `DAP2`, `DAP4` or `UNDEFINED`. Only available if using - netcdf C library version >= 4.3.1, otherwise will always return - `UNDEFINED`.""" - __pdoc__['Dataset.parent']=\ - """`parent` is a reference to the parent - `netCDF4.Group` instance. `None` for the root group or `netCDF4.Dataset` instance""" - __pdoc__['Dataset.path']=\ - """`path` shows the location of the `netCDF4.Group` in - the `netCDF4.Dataset` in a unix directory format (the names of groups in the - hierarchy separated by backslashes). A `netCDF4.Dataset` instance is the root - group, so the path is simply `'/'`.""" - __pdoc__['Dataset.keepweakref']=\ - """If `True`, child Dimension and Variables objects only keep weak references to - the parent Dataset or Group.""" + keepweakref, _ncstring_attrs__, auto_complex def __init__(self, filename, mode='r', clobber=True, format='NETCDF4', diskless=False, persist=False, keepweakref=False, memory=None, encoding=None, parallel=False, - Comm comm=None, Info info=None, **kwargs): + comm=None, info=None, auto_complex=False, **kwargs): """ **`__init__(self, filename, mode="r", clobber=True, diskless=False, - persist=False, keepweakref=False, format='NETCDF4')`** + persist=False, keepweakref=False, memory=None, encoding=None, + parallel=False, comm=None, info=None, format='NETCDF4')`** - `netCDF4.Dataset` constructor. + `Dataset` constructor. **`filename`**: Name of netCDF file to hold dataset. Can also - be a python 3 pathlib instance or the URL of an OpenDAP dataset. When memory is - set this is just used to set the `filepath()`. - + be a python 3 pathlib instance or the URL of an OpenDAP dataset. When memory is + set this is just used to set the `filepath()`. + **`mode`**: access mode. `r` means read-only; no data can be modified. `w` means write; a new file is created, an existing file with - the same name is deleted. `a` and `r+` mean append (in analogy with - serial files); an existing file is opened for reading and writing. - Appending `s` to modes `w`, `r+` or `a` will enable unbuffered shared + the same name is deleted. `x` means write, but fail if an existing + file with the same name already exists. `a` and `r+` mean append; + an existing file is opened for reading and writing, if + file does not exist already, one is created. + Appending `s` to modes `r`, `w`, `r+` or `a` will enable unbuffered shared access to `NETCDF3_CLASSIC`, `NETCDF3_64BIT_OFFSET` or `NETCDF3_64BIT_DATA` formatted files. Unbuffered access may be useful even if you don't need shared access, since it may be faster for programs that don't access data sequentially. This option is ignored for `NETCDF4` and `NETCDF4_CLASSIC` formatted files. - + **`clobber`**: if `True` (default), opening a file with `mode='w'` will clobber an existing file with the same name. if `False`, an exception will be raised if a file with the same name already exists. - - **`format`**: underlying file format (one of `'NETCDF4', - 'NETCDF4_CLASSIC', 'NETCDF3_CLASSIC'`, `'NETCDF3_64BIT_OFFSET'` or + mode=`x` is identical to mode=`w` with clobber=False. + + **`format`**: underlying file format (one of `'NETCDF4'`, + `'NETCDF4_CLASSIC'`, `'NETCDF3_CLASSIC'`, `'NETCDF3_64BIT_OFFSET'` or `'NETCDF3_64BIT_DATA'`. Only relevant if `mode = 'w'` (if `mode = 'r','a'` or `'r+'` the file format is automatically detected). Default `'NETCDF4'`, which means the data is @@ -1819,11 +2348,12 @@ references to the parent Dataset or Group. file format, which supports 64-bit dimension sizes plus unsigned and 64 bit integer data types, but is only compatible with clients linked against netCDF version 4.4.0 or later. - - **`diskless`**: If `True`, create diskless (in memory) file. - This is an experimental feature added to the C library after the - netcdf-4.2 release. - + + **`diskless`**: If `True`, create diskless (in-core) file. + This is a feature added to the C library after the + netcdf-4.2 release. If you need to access the memory buffer directly, + use the in-memory feature instead (see `memory` kwarg). + **`persist`**: if `diskless=True`, persist file to disk when closed (default `False`). @@ -1840,9 +2370,18 @@ references to the parent Dataset or Group. reducing memory usage and open file handles. However, in many cases this is not desirable, since the associated Variable instances may still be needed, but are rendered unusable when the parent Dataset instance is garbage collected. - - **`memory`**: if not `None`, open file with contents taken from this block of memory. - Must be a sequence of bytes. Note this only works with "r" mode. + + **`memory`**: if not `None`, create or open an in-memory Dataset. + If mode = `r`, the memory kwarg must contain a memory buffer object + (an object that supports the python buffer interface). + The Dataset will then be created with contents taken from this block of memory. + If mode = `w`, the memory kwarg should contain the anticipated size + of the Dataset in bytes (used only for NETCDF3 files). A memory + buffer containing a copy of the Dataset is returned by the + `Dataset.close` method. Requires netcdf-c version 4.4.1 for mode=`r` + netcdf-c 4.6.2 for mode=`w`. To persist the file to disk, the raw + bytes from the returned buffer can be written into a binary file. + The Dataset can also be re-opened using this memory buffer. **`encoding`**: encoding used to encode filename string into bytes. Default is None (`sys.getdefaultfileencoding()` is used). @@ -1856,13 +2395,16 @@ references to the parent Dataset or Group. **`info`**: MPI_Info object for parallel access. Default `None`, which means MPI_INFO_NULL will be used. Ignored if `parallel=False`. + + **`auto_complex`**: if `True`, then automatically convert complex number types """ - cdef int grpid, ierr, numgrps, numdims, numvars + cdef int grpid, ierr, numgrps, numdims, numvars, + cdef size_t initialsize cdef char *path cdef char namstring[NC_MAX_NAME+1] - IF HAS_NC_PAR: - cdef MPI_Comm mpicomm - cdef MPI_Info mpiinfo + cdef int cmode, parmode + cdef MPI_Comm mpicomm + cdef MPI_Info mpiinfo memset(&self._buffer, 0, sizeof(self._buffer)) @@ -1878,141 +2420,105 @@ references to the parent Dataset or Group. bytestr = _strencode(_tostr(filename), encoding=encoding) path = bytestr - if memory is not None and (mode != 'r' or type(memory) != bytes): - raise ValueError('memory mode only works with \'r\' modes and must be `bytes`') + if memory is not None and mode not in ('r', 'w'): + raise ValueError("if memory kwarg specified, mode must be 'r' or 'w'") + if parallel: - IF HAS_NC_PAR != 1: - msg='parallel mode requires MPI enabled netcdf-c' - raise ValueError(msg) - if format != 'NETCDF4': - msg='parallel mode only works with format=NETCDF4' - raise ValueError(msg) - if comm is not None: - mpicomm = comm.ob_mpi - else: - mpicomm = MPI_COMM_WORLD - if info is not None: - mpiinfo = info.ob_mpi - else: - mpiinfo = MPI_INFO_NULL + if not __has_parallel_support__: + raise ValueError("parallel mode requires MPI enabled netcdf-c") + + if format not in _parallel_formats: + raise ValueError( + f"parallel mode only works with the following formats: {' '.join(_parallel_formats)}" + ) + + mpicomm = (comm).ob_mpi if comm is not None else MPI_COMM_WORLD + mpiinfo = (info).ob_mpi if info is not None else MPI_INFO_NULL + parmode = NC_MPIIO | _cmode_dict[format] + + self._inmemory = False + self.auto_complex = auto_complex + + # mode='x' is the same as mode='w' with clobber=False + if mode == "x": + mode = "w" + clobber = False + + # r+ is synonym for append + if "r+" in mode: + mode = mode.replace("r+", "a") + + # If appending and the file doesn't exist, we need to create it + if mode in ("a", "as") and not os.path.exists(filename): + mode = mode.replace("a", "w") - if mode == 'w': + read_mode = mode in ("r", "rs") + write_mode = mode in ("w", "ws") + append_mode = mode in ("a", "as") + + if not (read_mode or write_mode or append_mode): + raise ValueError(f"mode must be 'w', 'x', 'r', 'a' or 'r+', got '{mode}'") + + # Initial value for cmode + if write_mode: + cmode = NC_CLOBBER if clobber else NC_NOCLOBBER + else: + cmode = NC_WRITE if append_mode else NC_NOWRITE + if mode.endswith("s") and not parallel: + cmode |= NC_SHARE + + if diskless: + cmode |= NC_DISKLESS + if write_mode and persist: + cmode |= NC_WRITE | NC_PERSIST + + if write_mode: _set_default_format(format=format) - if clobber: - if parallel: - IF HAS_NC_PAR: - ierr = nc_create_par(path, NC_CLOBBER | NC_MPIIO, \ - mpicomm, mpiinfo, &grpid) - ELSE: - pass - elif diskless: - if persist: - ierr = nc_create(path, NC_WRITE | NC_CLOBBER | NC_DISKLESS , &grpid) - else: - ierr = nc_create(path, NC_CLOBBER | NC_DISKLESS , &grpid) - else: - ierr = nc_create(path, NC_CLOBBER, &grpid) - else: - if parallel: - IF HAS_NC_PAR: - ierr = nc_create_par(path, NC_NOCLOBBER | NC_MPIIO, \ - mpicomm, mpiinfo, &grpid) - ELSE: - pass - elif diskless: - if persist: - ierr = nc_create(path, NC_WRITE | NC_NOCLOBBER | NC_DISKLESS , &grpid) - else: - ierr = nc_create(path, NC_NOCLOBBER | NC_DISKLESS , &grpid) - else: - ierr = nc_create(path, NC_NOCLOBBER, &grpid) - # reset default format to netcdf3 - this is a workaround - # for issue 170 (nc_open'ing a DAP dataset after switching - # format to NETCDF4). This bug should be fixed in version - # 4.3.0 of the netcdf library (add a version check here?). - _set_default_format(format='NETCDF3_64BIT_OFFSET') - elif mode == 'r': if memory is not None: - IF HAS_NC_OPEN_MEM: - # Store reference to memory - result = PyObject_GetBuffer(memory, &self._buffer, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS) - if result != 0: - raise ValueError("Unable to retrieve Buffer from %s" % (memory,)) - - ierr = nc_open_mem(path, 0, self._buffer.len, self._buffer.buf, &grpid) - ELSE: - msg = """ - nc_open_mem method not enabled. To enable, install Cython, make sure you have - version 4.4.1 or higher of the netcdf C lib, and rebuild netcdf4-python.""" - raise ValueError(msg) - elif parallel: - IF HAS_NC_PAR: - ierr = nc_open_par(path, NC_NOWRITE | NC_MPIIO, \ - mpicomm, mpiinfo, &grpid) - ELSE: - pass - elif diskless: - ierr = nc_open(path, NC_NOWRITE | NC_DISKLESS, &grpid) - else: - ierr = nc_open(path, NC_NOWRITE, &grpid) - elif mode == 'r+' or mode == 'a': - if parallel: - IF HAS_NC_PAR: - ierr = nc_open_par(path, NC_WRITE | NC_MPIIO, \ - mpicomm, mpiinfo, &grpid) - ELSE: - pass - elif diskless: - ierr = nc_open(path, NC_WRITE | NC_DISKLESS, &grpid) - else: - ierr = nc_open(path, NC_WRITE, &grpid) - elif mode == 'as' or mode == 'r+s': - if parallel: - # NC_SHARE ignored - IF HAS_NC_PAR: - ierr = nc_open_par(path, NC_WRITE | NC_MPIIO, \ - mpicomm, mpiinfo, &grpid) - ELSE: - pass - elif diskless: - ierr = nc_open(path, NC_SHARE | NC_DISKLESS, &grpid) - else: - ierr = nc_open(path, NC_SHARE, &grpid) - elif mode == 'ws': - if clobber: - if parallel: - # NC_SHARE ignored - IF HAS_NC_PAR: - ierr = nc_create_par(path, NC_CLOBBER | NC_MPIIO, \ - mpicomm, mpiinfo, &grpid) - ELSE: - pass - elif diskless: - if persist: - ierr = nc_create(path, NC_WRITE | NC_SHARE | NC_CLOBBER | NC_DISKLESS , &grpid) - else: - ierr = nc_create(path, NC_SHARE | NC_CLOBBER | NC_DISKLESS , &grpid) - else: - ierr = nc_create(path, NC_SHARE | NC_CLOBBER, &grpid) + if not __has_nc_create_mem__: + raise NetCDF4MissingFeatureException("nc_create_mem", "4.6.2") + + # if memory is not None and mode='w', memory + # kwarg is interpreted as advisory size. + initialsize = memory + with nogil: + ierr = nc_create_mem(path, 0, initialsize, &grpid) + self._inmemory = True # checked in close method + else: if parallel: - # NC_SHARE ignored - IF HAS_NC_PAR: - ierr = nc_create_par(path, NC_NOCLOBBER | NC_MPIIO, \ - mpicomm, mpiinfo, &grpid) - ELSE: - pass - elif diskless: - if persist: - ierr = nc_create(path, NC_WRITE | NC_SHARE | NC_NOCLOBBER | NC_DISKLESS , &grpid) - else: - ierr = nc_create(path, NC_SHARE | NC_NOCLOBBER | NC_DISKLESS , &grpid) + with nogil: + ierr = nc_create_par(path, cmode | parmode, mpicomm, mpiinfo, &grpid) else: - ierr = nc_create(path, NC_SHARE | NC_NOCLOBBER, &grpid) + with nogil: + ierr = nc_create(path, cmode, &grpid) + + elif read_mode and memory is not None: + if not __has_nc_open_mem__: + raise NetCDF4MissingFeatureException("nc_open_mem", "4.4.1") + + # Store reference to memory + result = PyObject_GetBuffer( + memory, &self._buffer, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS + ) + if result != 0: + raise ValueError(f"Unable to retrieve Buffer from {memory}") + + with nogil: + ierr = nc_open_mem( + path, 0, self._buffer.len, self._buffer.buf, &grpid + ) + else: - raise ValueError("mode must be 'w', 'r', 'a' or 'r+', got '%s'" % mode) + # Read or append mode, flags already all set in cmode + if parallel: + with nogil: + ierr = nc_open_par(path, cmode | NC_MPIIO, mpicomm, mpiinfo, &grpid) + else: + with nogil: + ierr = nc_open(path, cmode, &grpid) - _ensure_nc_success(ierr, err_cls=IOError, filename=path) + _ensure_nc_success(ierr, err_cls=OSError, filename=path) # data model and file format attributes self.data_model = _get_format(grpid) @@ -2020,26 +2526,23 @@ references to the parent Dataset or Group. # file_format for backwards compatibility. self.file_format = self.data_model self.disk_format = _get_full_format(grpid) - # diskless read access only works with NETCDF_CLASSIC (for now) - #ncopen = mode.startswith('a') or mode.startswith('r') - #if diskless and self.data_model != 'NETCDF3_CLASSIC' and ncopen: - # raise ValueError("diskless access only supported for NETCDF3_CLASSIC format") self._grpid = grpid self._isopen = 1 self.path = '/' self.parent = None self.keepweakref = keepweakref + self._ncstring_attrs__ = False # get compound, vlen and enum types in the root Group. self.cmptypes, self.vltypes, self.enumtypes = _get_types(self) # get dimensions in the root group. self.dimensions = _get_dims(self) # get variables in the root Group. - self.variables = _get_vars(self) + self.variables = _get_vars(self, self.auto_complex) # get groups in the root Group. if self.data_model == 'NETCDF4': self.groups = _get_grps(self) else: - self.groups = OrderedDict() + self.groups = dict() # these allow Dataset objects to be used via a "with" statement. def __enter__(self): @@ -2066,75 +2569,79 @@ references to the parent Dataset or Group. else: raise IndexError('%s not found in %s' % (lastname,group.path)) + def __iter__(self): + raise TypeError( + "Dataset is not iterable. Consider iterating on Dataset.variables." + ) + + def __contains__(self, key): + raise TypeError( + "Dataset does not support membership operations. Perhaps try 'varname in" + " dataset.variables' or 'dimname in dataset.dimensions'." + ) + def filepath(self,encoding=None): + """**`filepath(self,encoding=None)`** + + Get the file system path (or the opendap URL) which was used to + open/create the Dataset. Requires netcdf >= 4.1.2. The path + is decoded into a string using `sys.getfilesystemencoding()` by default, this can be + changed using the `encoding` kwarg. """ -**`filepath(self,encoding=None)`** + if not __has_nc_inq_path__: + raise NetCDF4MissingFeatureException("filepath method", "4.1.2") -Get the file system path (or the opendap URL) which was used to -open/create the Dataset. Requires netcdf >= 4.1.2. The path -is decoded into a string using `sys.getfilesystemencoding()` by default, this can be -changed using the `encoding` kwarg.""" cdef int ierr cdef size_t pathlen cdef char *c_path if encoding is None: encoding = sys.getfilesystemencoding() - IF HAS_NC_INQ_PATH: + + with nogil: + ierr = nc_inq_path(self._grpid, &pathlen, NULL) + _ensure_nc_success(ierr) + + c_path = malloc(sizeof(char) * (pathlen + 1)) + if not c_path: + raise MemoryError() + try: with nogil: - ierr = nc_inq_path(self._grpid, &pathlen, NULL) + ierr = nc_inq_path(self._grpid, &pathlen, c_path) _ensure_nc_success(ierr) - c_path = malloc(sizeof(char) * (pathlen + 1)) - if not c_path: - raise MemoryError() - try: - with nogil: - ierr = nc_inq_path(self._grpid, &pathlen, c_path) - _ensure_nc_success(ierr) - - py_path = c_path[:pathlen] # makes a copy of pathlen bytes from c_string - finally: - free(c_path) - return py_path.decode(encoding) - ELSE: - msg = """ -filepath method not enabled. To enable, install Cython, make sure you have -version 4.1.2 or higher of the netcdf C lib, and rebuild netcdf4-python.""" - raise ValueError(msg) + py_path = c_path[:pathlen] # makes a copy of pathlen bytes from c_string + finally: + free(c_path) + return py_path.decode(encoding) def __repr__(self): - if python3: - return self.__unicode__() - else: - return unicode(self).encode('utf-8') + return self.__str__() - def __unicode__(self): - ncdump = ['%r\n' % type(self)] - dimnames = tuple([_tostr(dimname)+'(%s)'%len(self.dimensions[dimname])\ - for dimname in self.dimensions.keys()]) + def __str__(self): + ncdump = [repr(type(self)).replace("._netCDF4", "")] + dimnames = tuple(_tostr(dimname)+'(%s)'%len(self.dimensions[dimname])\ + for dimname in self.dimensions.keys()) varnames = tuple(\ - [_tostr(self.variables[varname].dtype)+' \033[4m'+_tostr(varname)+'\033[0m'+ - (((_tostr(self.variables[varname].dimensions) - .replace("u'",""))\ - .replace("'",""))\ - .replace(", ",","))\ - .replace(",)",")") for varname in self.variables.keys()]) - grpnames = tuple([_tostr(grpname) for grpname in self.groups.keys()]) + [_tostr(self.variables[varname].dtype)+' '+_tostr(varname)+ + ((_tostr(self.variables[varname].dimensions)).replace(",)",")")).replace("'","") + for varname in self.variables.keys()]) + grpnames = tuple(_tostr(grpname) for grpname in self.groups.keys()) if self.path == '/': - ncdump.append('root group (%s data model, file format %s):\n' % + ncdump.append('root group (%s data model, file format %s):' % (self.data_model, self.disk_format)) else: - ncdump.append('group %s:\n' % self.path) - attrs = [' %s: %s\n' % (name,self.getncattr(name)) for name in\ - self.ncattrs()] - ncdump = ncdump + attrs - ncdump.append(' dimensions(sizes): %s\n' % ', '.join(dimnames)) - ncdump.append(' variables(dimensions): %s\n' % ', '.join(varnames)) - ncdump.append(' groups: %s\n' % ', '.join(grpnames)) - return ''.join(ncdump) + ncdump.append('group %s:' % self.path) + for name in self.ncattrs(): + ncdump.append(' %s: %s' % (name, self.getncattr(name))) + ncdump.append(' dimensions(sizes): %s' % ', '.join(dimnames)) + ncdump.append(' variables(dimensions): %s' % ', '.join(varnames)) + ncdump.append(' groups: %s' % ', '.join(grpnames)) + return '\n'.join(ncdump) def _close(self, check_err): - cdef int ierr = nc_close(self._grpid) + cdef int ierr + with nogil: + ierr = nc_close(self._grpid) if check_err: _ensure_nc_success(ierr) @@ -2146,25 +2653,44 @@ version 4.1.2 or higher of the netcdf C lib, and rebuild netcdf4-python.""" # view.obj is checked, ref on obj is decremented and obj will be null'd out PyBuffer_Release(&self._buffer) + def _close_mem(self, check_err): + cdef int ierr + cdef NC_memio memio + with nogil: + ierr = nc_close_memio(self._grpid, &memio) + + if check_err: + _ensure_nc_success(ierr) + + self._isopen = 0 + PyBuffer_Release(&self._buffer) + + # membuf_fromptr from membuf.pyx - creates a python memoryview + # from a raw pointer without making a copy. + return memview_fromptr(memio.memory, memio.size) def close(self): - """ -**`close(self)`** + """**`close(self)`** -Close the Dataset. + Close the Dataset. """ + if __has_nc_create_mem__ and self._inmemory: + return self._close_mem(True) + self._close(True) def isopen(self): """ -**`close(self)`** +**`isopen(self)`** -is the Dataset open or closed? +Is the Dataset open or closed? """ return bool(self._isopen) def __dealloc__(self): - # close file when there are no references to object left + # close file when there are no references to object left and clear the cache. + if self.get_variables_by_attributes: + self.get_variables_by_attributes.cache_clear() if self._isopen: self._close(False) @@ -2176,22 +2702,27 @@ is the Dataset open or closed? """ **`sync(self)`** -Writes all buffered data in the `netCDF4.Dataset` to the disk file.""" - _ensure_nc_success(nc_sync(self._grpid)) +Writes all buffered data in the `Dataset` to the disk file.""" + cdef int ierr + with nogil: + ierr = nc_sync(self._grpid) + _ensure_nc_success(ierr) def _redef(self): cdef int ierr - ierr = nc_redef(self._grpid) + with nogil: + ierr = nc_redef(self._grpid) def _enddef(self): cdef int ierr - ierr = nc_enddef(self._grpid) + with nogil: + ierr = nc_enddef(self._grpid) def set_fill_on(self): """ **`set_fill_on(self)`** -Sets the fill mode for a `netCDF4.Dataset` open for writing to `on`. +Sets the fill mode for a `Dataset` open for writing to `on`. This causes data to be pre-filled with fill values. The fill values can be controlled by the variable's `_Fill_Value` attribute, but is usually @@ -2200,20 +2731,24 @@ separately for each variable type). The default behavior of the netCDF library corresponds to `set_fill_on`. Data which are equal to the `_Fill_Value` indicate that the variable was created, but never written to.""" - cdef int oldmode - _ensure_nc_success(nc_set_fill(self._grpid, NC_FILL, &oldmode)) + cdef int oldmode, ierr + with nogil: + ierr = nc_set_fill(self._grpid, NC_FILL, &oldmode) + _ensure_nc_success(ierr) def set_fill_off(self): """ **`set_fill_off(self)`** -Sets the fill mode for a `netCDF4.Dataset` open for writing to `off`. +Sets the fill mode for a `Dataset` open for writing to `off`. This will prevent the data from being pre-filled with fill values, which may result in some performance improvements. However, you must then make sure the data is actually written before being read.""" - cdef int oldmode - _ensure_nc_success(nc_set_fill(self._grpid, NC_NOFILL, &oldmode)) + cdef int oldmode, ierr + with nogil: + ierr = nc_set_fill(self._grpid, NC_NOFILL, &oldmode) + _ensure_nc_success(ierr) def createDimension(self, dimname, size=None): """ @@ -2223,11 +2758,11 @@ Creates a new dimension with the given `dimname` and `size`. `size` must be a positive integer or `None`, which stands for "unlimited" (default is `None`). Specifying a size of 0 also -results in an unlimited dimension. The return value is the `netCDF4.Dimension` +results in an unlimited dimension. The return value is the `Dimension` class instance describing the new dimension. To determine the current -maximum size of the dimension, use the `len` function on the `netCDF4.Dimension` +maximum size of the dimension, use the `len` function on the `Dimension` instance. To determine if a dimension is 'unlimited', use the -`netCDF4.Dimension.isunlimited` method of the `netCDF4.Dimension` instance.""" +`Dimension.isunlimited` method of the `Dimension` instance.""" self.dimensions[dimname] = Dimension(self, dimname, size=size) return self.dimensions[dimname] @@ -2235,8 +2770,9 @@ instance. To determine if a dimension is 'unlimited', use the """ **`renameDimension(self, oldname, newname)`** -rename a `netCDF4.Dimension` named `oldname` to `newname`.""" +rename a `Dimension` named `oldname` to `newname`.""" cdef char *namstring + cdef Dimension dim bytestr = _strencode(newname) namstring = bytestr if self.data_model != 'NETCDF4': self._redef() @@ -2244,7 +2780,8 @@ rename a `netCDF4.Dimension` named `oldname` to `newname`.""" dim = self.dimensions[oldname] except KeyError: raise KeyError('%s not a valid dimension name' % oldname) - ierr = nc_rename_dim(self._grpid, dim._dimid, namstring) + with nogil: + ierr = nc_rename_dim(self._grpid, dim._dimid, namstring) if self.data_model != 'NETCDF4': self._enddef() _ensure_nc_success(ierr) @@ -2267,7 +2804,7 @@ dtype object `datatype`. are homogeneous numeric data types), then the 'inner' compound types **must** be created first. -The return value is the `netCDF4.CompoundType` class instance describing the new +The return value is the `CompoundType` class instance describing the new datatype.""" self.cmptypes[datatype_name] = CompoundType(self, datatype,\ datatype_name) @@ -2280,7 +2817,7 @@ datatype.""" Creates a new VLEN data type named `datatype_name` from a numpy dtype object `datatype`. -The return value is the `netCDF4.VLType` class instance describing the new +The return value is the `VLType` class instance describing the new datatype.""" self.vltypes[datatype_name] = VLType(self, datatype, datatype_name) return self.vltypes[datatype_name] @@ -2293,27 +2830,32 @@ Creates a new Enum data type named `datatype_name` from a numpy integer dtype object `datatype`, and a python dictionary defining the enum fields and values. -The return value is the `netCDF4.EnumType` class instance describing the new +The return value is the `EnumType` class instance describing the new datatype.""" self.enumtypes[datatype_name] = EnumType(self, datatype, datatype_name, enum_dict) return self.enumtypes[datatype_name] - def createVariable(self, varname, datatype, dimensions=(), zlib=False, - complevel=4, shuffle=True, fletcher32=False, contiguous=False, + def createVariable(self, varname, datatype, dimensions=(), + compression=None, zlib=False, + complevel=4, shuffle=True, + szip_coding='nn',szip_pixels_per_block=8, + blosc_shuffle=1,fletcher32=False, contiguous=False, chunksizes=None, endian='native', least_significant_digit=None, - fill_value=None, chunk_cache=None): + significant_digits=None,quantize_mode='BitGroom',fill_value=None, chunk_cache=None): """ -**`createVariable(self, varname, datatype, dimensions=(), zlib=False, +**`createVariable(self, varname, datatype, dimensions=(), compression=None, zlib=False, complevel=4, shuffle=True, fletcher32=False, contiguous=False, chunksizes=None, -endian='native', least_significant_digit=None, fill_value=None)`** +szip_coding='nn', szip_pixels_per_block=8, blosc_shuffle=1, +endian='native', least_significant_digit=None, significant_digits=None, quantize_mode='BitGroom', +fill_value=None, chunk_cache=None)`** Creates a new variable with the given `varname`, `datatype`, and `dimensions`. If dimensions are not given, the variable is assumed to be a scalar. If `varname` is specified as a path, using forward slashes as in unix to -separate components, then intermediate groups will be created as necessary +separate components, then intermediate groups will be created as necessary For example, `createVariable('/GroupA/GroupB/VarC', float, ('x','y'))` will create groups `GroupA` and `GroupA/GroupB`, plus the variable `GroupA/GroupB/VarC`, if the preceding groups don't already exist. @@ -2324,8 +2866,8 @@ Supported specifiers include: `'S1' or 'c' (NC_CHAR), 'i1' or 'b' or 'B' (NC_BYTE), 'u1' (NC_UBYTE), 'i2' or 'h' or 's' (NC_SHORT), 'u2' (NC_USHORT), 'i4' or 'i' or 'l' (NC_INT), 'u4' (NC_UINT), 'i8' (NC_INT64), 'u8' (NC_UINT64), 'f4' or 'f' (NC_FLOAT), 'f8' or 'd' (NC_DOUBLE)`. -`datatype` can also be a `netCDF4.CompoundType` instance -(for a structured, or compound array), a `netCDF4.VLType` instance +`datatype` can also be a `CompoundType` instance +(for a structured, or compound array), a `VLType` instance (for a variable-length array), or the python `str` builtin (for a variable-length string array). Numpy string and unicode datatypes with length greater than one are aliases for `str`. @@ -2333,35 +2875,58 @@ length greater than one are aliases for `str`. Data from netCDF variables is presented to python as numpy arrays with the corresponding data type. -`dimensions` must be a tuple containing dimension names (strings) that -have been defined previously using `netCDF4.Dataset.createDimension`. The default value +`dimensions` must be a tuple containing `Dimension` instances and/or +dimension names (strings) that have been defined +previously using `Dataset.createDimension`. The default value is an empty tuple, which means the variable is a scalar. +If the optional keyword argument `compression` is set, the data will be +compressed in the netCDF file using the specified compression algorithm. +Currently `zlib`,`szip`,`zstd`,`bzip2`,`blosc_lz`,`blosc_lz4`,`blosc_lz4hc`, +`blosc_zlib` and `blosc_zstd` are supported. +Default is `None` (no compression). All of the compressors except +`zlib` and `szip` use the HDF5 plugin architecture. + If the optional keyword `zlib` is `True`, the data will be compressed in -the netCDF file using gzip compression (default `False`). +the netCDF file using zlib compression (default `False`). The use of this option is +deprecated in favor of `compression='zlib'`. -The optional keyword `complevel` is an integer between 1 and 9 describing -the level of compression desired (default 4). Ignored if `zlib=False`. +The optional keyword `complevel` is an integer between 0 and 9 describing +the level of compression desired (default 4). Ignored if `compression=None`. +A value of zero disables compression. If the optional keyword `shuffle` is `True`, the HDF5 shuffle filter -will be applied before compressing the data (default `True`). This +will be applied before compressing the data with zlib (default `True`). This significantly improves compression. Default is `True`. Ignored if `zlib=False`. +The optional kwarg `blosc_shuffle`is ignored +unless the blosc compressor is used. `blosc_shuffle` can be 0 (no shuffle), +1 (byte-wise shuffle) or 2 (bit-wise shuffle). Default is 1. + +The optional kwargs `szip_coding` and `szip_pixels_per_block` are ignored +unless the szip compressor is used. `szip_coding` can be `ec` (entropy coding) +or `nn` (nearest neighbor coding). Default is `nn`. `szip_pixels_per_block` +can be 4, 8, 16 or 32 (default 8). + If the optional keyword `fletcher32` is `True`, the Fletcher32 HDF5 checksum algorithm is activated to detect errors. Default `False`. If the optional keyword `contiguous` is `True`, the variable data is stored contiguously on disk. Default `False`. Setting to `True` for a variable with an unlimited dimension will trigger an error. +Fixed size variables (with no unlimited dimension) with no compression filters +are contiguous by default. The optional keyword `chunksizes` can be used to manually specify the -HDF5 chunksizes for each dimension of the variable. A detailed -discussion of HDF chunking and I/O performance is available -[here](http://www.hdfgroup.org/HDF5/doc/H5.user/Chunking.html). +HDF5 chunksizes for each dimension of the variable. +A detailed discussion of HDF chunking and I/O performance is available +[here](https://support.hdfgroup.org/HDF5/doc/Advanced/Chunking). +The default chunking scheme in the netcdf-c library is discussed +[here](https://www.unidata.ucar.edu/software/netcdf/documentation/NUG/netcdf_perf_chunking.html). Basically, you want the chunk size for each dimension to match as closely as possible the size of the data block that users will read -from the file. `chunksizes` cannot be set if `contiguous=True`. +from the file. `chunksizes` cannot be set if `contiguous=True`. The optional keyword `endian` can be used to control whether the data is stored in little or big endian format on disk. Possible @@ -2371,25 +2936,29 @@ but if the data is always going to be read on a computer with the opposite format as the one used to create the file, there may be some performance advantage to be gained by setting the endian-ness. -The `zlib, complevel, shuffle, fletcher32, contiguous, chunksizes` and `endian` -keywords are silently ignored for netCDF 3 files that do not use HDF5. - The optional keyword `fill_value` can be used to override the default netCDF `_FillValue` (the value that the variable gets filled with before -any data is written to it, defaults given in `netCDF4.default_fillvals`). +any data is written to it, defaults given in the dict `netCDF4.default_fillvals`). If fill_value is set to `False`, then the variable is not pre-filled. -If the optional keyword parameter `least_significant_digit` is +If the optional keyword parameters `least_significant_digit` or `significant_digits` are specified, variable data will be truncated (quantized). In conjunction -with `zlib=True` this produces 'lossy', but significantly more +with `compression='zlib'` this produces 'lossy', but significantly more efficient compression. For example, if `least_significant_digit=1`, data will be quantized using `numpy.around(scale*data)/scale`, where scale = 2**bits, and bits is determined so that a precision of 0.1 is -retained (in this case bits=4). From the -[PSD metadata conventions](http://www.esrl.noaa.gov/psd/data/gridded/conventions/cdc_netcdf_standard.shtml): +retained (in this case bits=4). From the +[PSL metadata conventions](http://www.esrl.noaa.gov/psl/data/gridded/conventions/cdc_netcdf_standard.shtml): "least_significant_digit -- power of ten of the smallest decimal place in unpacked data that is a reliable value." Default is `None`, or no -quantization, or 'lossless' compression. +quantization, or 'lossless' compression. If `significant_digits=3` +then the data will be quantized so that three significant digits are retained, independent +of the floating point exponent. The keyword argument `quantize_mode` controls +the quantization algorithm (default 'BitGroom', 'BitRound' and +'GranularBitRound' also available). The 'GranularBitRound' +algorithm may result in better compression for typical geophysical datasets. +This `significant_digits` kwarg is only available with netcdf-c >= 4.9.0, and +only works with `NETCDF4` or `NETCDF4_CLASSIC` formatted files. When creating variables in a `NETCDF4` or `NETCDF4_CLASSIC` formatted file, HDF5 creates something called a 'chunk cache' for each variable. The @@ -2401,29 +2970,29 @@ persists as long as the Dataset is open - you can use the set_var_chunk_cache method to change it the next time the Dataset is opened. Warning - messing with this parameter can seriously degrade performance. -The return value is the `netCDF4.Variable` class instance describing the new +The return value is the `Variable` class instance describing the new variable. A list of names corresponding to netCDF variable attributes can be -obtained with the `netCDF4.Variable` method `netCDF4.Variable.ncattrs`. A dictionary +obtained with the `Variable` method `Variable.ncattrs`. A dictionary containing all the netCDF attribute name/value pairs is provided by -the `__dict__` attribute of a `netCDF4.Variable` instance. +the `__dict__` attribute of a `Variable` instance. -`netCDF4.Variable` instances behave much like array objects. Data can be +`Variable` instances behave much like array objects. Data can be assigned to or retrieved from a variable with indexing and slicing -operations on the `netCDF4.Variable` instance. A `netCDF4.Variable` instance has six +operations on the `Variable` instance. A `Variable` instance has six Dataset standard attributes: `dimensions, dtype, shape, ndim, name` and `least_significant_digit`. Application programs should never modify these attributes. The `dimensions` attribute is a tuple containing the names of the dimensions associated with this variable. The `dtype` -attribute is a string describing the variable's data type (`i4, f8, -S1,` etc). The `shape` attribute is a tuple describing the current +attribute is a string describing the variable's data type (`i4`, `f8`, +`S1`, etc). The `shape` attribute is a tuple describing the current sizes of all the variable's dimensions. The `name` attribute is a string containing the name of the Variable instance. The `least_significant_digit` attributes describes the power of ten of the smallest decimal place in -the data the contains a reliable value. assigned to the `netCDF4.Variable` -instance. If `None`, the data is not truncated. The `ndim` attribute +the data the contains a reliable value. assigned to the `Variable` +instance. The `ndim` attribute is the number of variable dimensions.""" # if varname specified as a path, split out group names. varname = posixpath.normpath(varname) @@ -2433,20 +3002,35 @@ is the number of variable dimensions.""" group = self else: group = self.createGroup(dirname) + # if dimensions is a single string or Dimension instance, + # convert to a tuple. + # This prevents a common error that occurs when + # dimensions = 'lat' instead of ('lat',) + if isinstance(dimensions, (str, bytes, Dimension)): + dimensions = dimensions, + # convert elements of dimensions tuple to Dimension + # instances if they are strings. + # _find_dim looks for dimension in this group, and if not + # found there, looks in parent (and it's parent, etc, back to root). + dimensions =\ + tuple(_find_dim(group,d) if isinstance(d,(str,bytes)) else d for d in dimensions) # create variable. group.variables[varname] = Variable(group, varname, datatype, - dimensions=dimensions, zlib=zlib, complevel=complevel, shuffle=shuffle, + dimensions=dimensions, compression=compression, zlib=zlib, complevel=complevel, shuffle=shuffle, + szip_coding=szip_coding, szip_pixels_per_block=szip_pixels_per_block, + blosc_shuffle=blosc_shuffle, fletcher32=fletcher32, contiguous=contiguous, chunksizes=chunksizes, endian=endian, least_significant_digit=least_significant_digit, - fill_value=fill_value, chunk_cache=chunk_cache) + significant_digits=significant_digits,quantize_mode=quantize_mode,fill_value=fill_value, chunk_cache=chunk_cache) return group.variables[varname] def renameVariable(self, oldname, newname): """ **`renameVariable(self, oldname, newname)`** -rename a `netCDF4.Variable` named `oldname` to `newname`""" +rename a `Variable` named `oldname` to `newname`""" cdef char *namstring + cdef Variable var try: var = self.variables[oldname] except KeyError: @@ -2454,7 +3038,8 @@ rename a `netCDF4.Variable` named `oldname` to `newname`""" bytestr = _strencode(newname) namstring = bytestr if self.data_model != 'NETCDF4': self._redef() - ierr = nc_rename_var(self._grpid, var._varid, namstring) + with nogil: + ierr = nc_rename_var(self._grpid, var._varid, namstring) if self.data_model != 'NETCDF4': self._enddef() _ensure_nc_success(ierr) @@ -2467,17 +3052,17 @@ rename a `netCDF4.Variable` named `oldname` to `newname`""" """ **`createGroup(self, groupname)`** -Creates a new `netCDF4.Group` with the given `groupname`. +Creates a new `Group` with the given `groupname`. If `groupname` is specified as a path, using forward slashes as in unix to -separate components, then intermediate groups will be created as necessary +separate components, then intermediate groups will be created as necessary (analogous to `mkdir -p` in unix). For example, `createGroup('/GroupA/GroupB/GroupC')` will create `GroupA`, `GroupA/GroupB`, and `GroupA/GroupB/GroupC`, if they don't already exist. If the specified path describes a group that already exists, no error is raised. -The return value is a `netCDF4.Group` class instance.""" +The return value is a `Group` class instance.""" # if group specified as a path, split out group names groupname = posixpath.normpath(groupname) nestedgroups = groupname.split('/') @@ -2497,7 +3082,7 @@ The return value is a `netCDF4.Group` class instance.""" """ **`ncattrs(self)`** -return netCDF global attribute names for this `netCDF4.Dataset` or `netCDF4.Group` in a list.""" +return netCDF global attribute names for this `Dataset` or `Group` in a list.""" return _get_att_names(self._grpid, NC_GLOBAL) def setncattr(self,name,value): @@ -2507,8 +3092,10 @@ return netCDF global attribute names for this `netCDF4.Dataset` or `netCDF4.Grou set a netCDF dataset or group attribute using name,value pair. Use if you need to set a netCDF attribute with the with the same name as one of the reserved python attributes.""" + cdef nc_type xtype + xtype=-99 if self.data_model != 'NETCDF4': self._redef() - _set_att(self, NC_GLOBAL, name, value) + _set_att(self, NC_GLOBAL, name, value, xtype=xtype, force_ncstring=self._ncstring_attrs__) if self.data_model != 'NETCDF4': self._enddef() def setncattr_string(self,name,value): @@ -2517,13 +3104,12 @@ with the same name as one of the reserved python attributes.""" set a netCDF dataset or group string attribute using name,value pair. Use if you need to ensure that a netCDF attribute is created with type -`NC_STRING` if the file format is `NETCDF4`. -Use if you need to set an attribute to an array of variable-length strings.""" +`NC_STRING` if the file format is `NETCDF4`.""" cdef nc_type xtype xtype=-99 if self.data_model != 'NETCDF4': msg='file format does not support NC_STRING attributes' - raise IOError(msg) + raise OSError(msg) _set_att(self, NC_GLOBAL, name, value, xtype=xtype, force_ncstring=True) def setncatts(self,attdict): @@ -2571,7 +3157,8 @@ attributes.""" bytestr = _strencode(name) attname = bytestr if self.data_model != 'NETCDF4': self._redef() - ierr = nc_del_att(self._grpid, NC_GLOBAL, attname) + with nogil: + ierr = nc_del_att(self._grpid, NC_GLOBAL, attname) if self.data_model != 'NETCDF4': self._enddef() _ensure_nc_success(ierr) @@ -2597,7 +3184,7 @@ attributes.""" values = [] for name in names: values.append(_get_att(self, NC_GLOBAL, name)) - return OrderedDict(zip(names,values)) + return dict(zip(names, values)) else: raise AttributeError elif name in _private_atts: @@ -2609,48 +3196,56 @@ attributes.""" """ **`renameAttribute(self, oldname, newname)`** -rename a `netCDF4.Dataset` or `netCDF4.Group` attribute named `oldname` to `newname`.""" +rename a `Dataset` or `Group` attribute named `oldname` to `newname`.""" cdef char *oldnamec cdef char *newnamec + cdef int ierr bytestr = _strencode(oldname) oldnamec = bytestr bytestr = _strencode(newname) newnamec = bytestr - _ensure_nc_success(nc_rename_att(self._grpid, NC_GLOBAL, oldnamec, newnamec)) + with nogil: + ierr = nc_rename_att(self._grpid, NC_GLOBAL, oldnamec, newnamec) + _ensure_nc_success(ierr) def renameGroup(self, oldname, newname): """ **`renameGroup(self, oldname, newname)`** -rename a `netCDF4.Group` named `oldname` to `newname` (requires netcdf >= 4.3.1).""" +rename a `Group` named `oldname` to `newname` (requires netcdf >= 4.3.1).""" cdef char *newnamec - IF HAS_RENAME_GRP: - bytestr = _strencode(newname) - newnamec = bytestr - try: - grp = self.groups[oldname] - except KeyError: - raise KeyError('%s not a valid group name' % oldname) - _ensure_nc_success(nc_rename_grp(grp._grpid, newnamec)) - # remove old key from groups dict. - self.groups.pop(oldname) - # add new key. - self.groups[newname] = grp - ELSE: - msg = """ -renameGroup method not enabled. To enable, install Cython, make sure you have -version 4.3.1 or higher of the netcdf C lib, and rebuild netcdf4-python.""" - raise ValueError(msg) + cdef int grpid + cdef int ierr + if not __has_rename_grp__: + raise ValueError( + "renameGroup method not enabled. To enable, install Cython, make sure you have" + "version 4.3.1 or higher of the netcdf C lib, and rebuild netcdf4-python." + ) + + bytestr = _strencode(newname) + newnamec = bytestr + try: + grp = self.groups[oldname] + grpid = grp._grpid + except KeyError: + raise KeyError('%s not a valid group name' % oldname) + with nogil: + ierr = nc_rename_grp(grpid, newnamec) + _ensure_nc_success(ierr) + # remove old key from groups dict. + self.groups.pop(oldname) + # add new key. + self.groups[newname] = grp def set_auto_chartostring(self, value): """ **`set_auto_chartostring(self, True_or_False)`** -Call `netCDF4.Variable.set_auto_chartostring` for all variables contained in this `netCDF4.Dataset` or -`netCDF4.Group`, as well as for all variables in all its subgroups. +Call `Variable.set_auto_chartostring` for all variables contained in this `Dataset` or +`Group`, as well as for all variables in all its subgroups. **`True_or_False`**: Boolean determining if automatic conversion of -all character arrays <--> string arrays should be performed for +all character arrays <--> string arrays should be performed for character variables (variables of type `NC_CHAR` or `S1`) with the `_Encoding` attribute set. @@ -2674,8 +3269,8 @@ after calling this function will follow the default behaviour. """ **`set_auto_maskandscale(self, True_or_False)`** -Call `netCDF4.Variable.set_auto_maskandscale` for all variables contained in this `netCDF4.Dataset` or -`netCDF4.Group`, as well as for all variables in all its subgroups. +Call `Variable.set_auto_maskandscale` for all variables contained in this `Dataset` or +`Group`, as well as for all variables in all its subgroups. **`True_or_False`**: Boolean determining if automatic conversion to masked arrays and variable scaling shall be applied for all variables. @@ -2701,8 +3296,9 @@ after calling this function will follow the default behaviour. """ **`set_auto_mask(self, True_or_False)`** -Call `netCDF4.Variable.set_auto_mask` for all variables contained in this `netCDF4.Dataset` or -`netCDF4.Group`, as well as for all variables in all its subgroups. +Call `Variable.set_auto_mask` for all variables contained in this `Dataset` or +`Group`, as well as for all variables in all its subgroups. Only affects +Variables with primitive or enum types (not compound or vlen Variables). **`True_or_False`**: Boolean determining if automatic conversion to masked arrays shall be applied for all variables. @@ -2711,7 +3307,11 @@ shall be applied for all variables. after calling this function will follow the default behaviour. """ - for var in self.variables.values(): + # this is a hack to make inheritance work in MFDataset + # (which stores variables in _vars) + _vars = self.variables + if _vars is None: _vars = self._vars + for var in _vars.values(): var.set_auto_mask(value) for groups in _walk_grps(self): @@ -2723,8 +3323,8 @@ after calling this function will follow the default behaviour. """ **`set_auto_scale(self, True_or_False)`** -Call `netCDF4.Variable.set_auto_scale` for all variables contained in this `netCDF4.Dataset` or -`netCDF4.Group`, as well as for all variables in all its subgroups. +Call `Variable.set_auto_scale` for all variables contained in this `Dataset` or +`Group`, as well as for all variables in all its subgroups. **`True_or_False`**: Boolean determining if automatic variable scaling shall be applied for all variables. @@ -2745,33 +3345,97 @@ after calling this function will follow the default behaviour. for var in group.variables.values(): var.set_auto_scale(value) + def set_always_mask(self, value): + """ +**`set_always_mask(self, True_or_False)`** + +Call `Variable.set_always_mask` for all variables contained in +this `Dataset` or `Group`, as well as for all +variables in all its subgroups. + +**`True_or_False`**: Boolean determining if automatic conversion of +masked arrays with no missing values to regular numpy arrays shall be +applied for all variables. Default True. Set to False to restore the default behaviour +in versions prior to 1.4.1 (numpy array returned unless missing values are present, +otherwise masked array returned). + +***Note***: Calling this function only affects existing +variables. Variables created after calling this function will follow +the default behaviour. + """ + + # this is a hack to make inheritance work in MFDataset + # (which stores variables in _vars) + _vars = self.variables + if _vars is None: _vars = self._vars + for var in _vars.values(): + var.set_always_mask(value) + + for groups in _walk_grps(self): + for group in groups: + for var in group.variables.values(): + var.set_always_mask(value) + + def set_ncstring_attrs(self, value): + """ +**`set_ncstring_attrs(self, True_or_False)`** + +Call `Variable.set_ncstring_attrs` for all variables contained in +this `Dataset` or `Group`, as well as for all its +subgroups and their variables. + +**`True_or_False`**: Boolean determining if all string attributes are +created as variable-length NC_STRINGs, (if True), or if ascii text +attributes are stored as NC_CHARs (if False; default) + +***Note***: Calling this function only affects newly created attributes +of existing (sub-) groups and their variables. + """ + + self._ncstring_attrs__ = bool(value) + + # this is a hack to make inheritance work in MFDataset + # (which stores variables in _vars) + _vars = self.variables + if _vars is None: + _vars = self._vars + for var in _vars.values(): + var.set_ncstring_attrs(value) + + for groups in _walk_grps(self): + for group in groups: + group.set_ncstring_attrs(value) # recurse into subgroups... + + @functools.lru_cache(maxsize=128) def get_variables_by_attributes(self, **kwargs): """ -**`get_variables_by_attribute(self, **kwargs)`** +**`get_variables_by_attributes(self, **kwargs)`** Returns a list of variables that match specific conditions. Can pass in key=value parameters and variables are returned that -contain all of the matches. For example, +contain all of the matches. For example, - :::python - >>> # Get variables with x-axis attribute. - >>> vs = nc.get_variables_by_attributes(axis='X') - >>> # Get variables with matching "standard_name" attribute - >>> vs = nc.get_variables_by_attributes(standard_name='northward_sea_water_velocity') +```python +>>> # Get variables with x-axis attribute. +>>> vs = nc.get_variables_by_attributes(axis='X') +>>> # Get variables with matching "standard_name" attribute +>>> vs = nc.get_variables_by_attributes(standard_name='northward_sea_water_velocity') +``` Can pass in key=callable parameter and variables are returned if the callable returns True. The callable should accept a single parameter, the attribute value. None is given as the attribute value when the attribute does not exist on the variable. For example, - :::python - >>> # Get Axis variables - >>> vs = nc.get_variables_by_attributes(axis=lambda v: v in ['X', 'Y', 'Z', 'T']) - >>> # Get variables that don't have an "axis" attribute - >>> vs = nc.get_variables_by_attributes(axis=lambda v: v is None) - >>> # Get variables that have a "grid_mapping" attribute - >>> vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None) +```python +>>> # Get Axis variables +>>> vs = nc.get_variables_by_attributes(axis=lambda v: v in ['X', 'Y', 'Z', 'T']) +>>> # Get variables that don't have an "axis" attribute +>>> vs = nc.get_variables_by_attributes(axis=lambda v: v is None) +>>> # Get variables that have a "grid_mapping" attribute +>>> vs = nc.get_variables_by_attributes(grid_mapping=lambda v: v is not None) +``` """ vs = [] @@ -2798,41 +3462,198 @@ attribute does not exist on the variable. For example, return vs + def _getname(self): + # private method to get name associated with instance. + cdef int ierr + cdef char namstring[NC_MAX_NAME+1] + with nogil: + ierr = nc_inq_grpname(self._grpid, namstring) + _ensure_nc_success(ierr) + return namstring.decode('utf-8') + + property name: + """string name of Group instance""" + def __get__(self): + return self._getname() + def __set__(self,value): + raise AttributeError("name cannot be altered") + + @staticmethod + def fromcdl(cdlfilename,ncfilename=None,mode='a',format='NETCDF4'): + """ +**`fromcdl(cdlfilename, ncfilename=None, mode='a',format='NETCDF4')`** + +call [ncgen][ncgen] via subprocess to create Dataset from [CDL][cdl] +text representation. Requires [ncgen][ncgen] to be installed and in `$PATH`. + +**`cdlfilename`**: CDL file. + +**`ncfilename`**: netCDF file to create. If not given, CDL filename with +suffix replaced by `.nc` is used.. + +**`mode`**: Access mode to open Dataset (Default `'a'`). + +**`format`**: underlying file format to use (one of `'NETCDF4'`, +`'NETCDF4_CLASSIC'`, `'NETCDF3_CLASSIC'`, `'NETCDF3_64BIT_OFFSET'` or +`'NETCDF3_64BIT_DATA'`. Default `'NETCDF4'`. + +Dataset instance for `ncfilename` is returned. + +[ncgen]: https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_utilities_guide.html#ncgen_guide +[cdl]: https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_utilities_guide.html#cdl_guide + """ + filepath = pathlib.Path(cdlfilename) + if ncfilename is None: + ncfilename = filepath.with_suffix('.nc') + else: + ncfilename = pathlib.Path(ncfilename) + formatcodes = {'NETCDF4': 4, + 'NETCDF4_CLASSIC': 7, + 'NETCDF3_CLASSIC': 3, + 'NETCDF3_64BIT': 6, # legacy + 'NETCDF3_64BIT_OFFSET': 6, + 'NETCDF3_64BIT_DATA': 5} + + if format not in formatcodes: + raise ValueError('illegal format requested') + if not filepath.exists(): + raise FileNotFoundError(filepath) + if ncfilename.exists(): + raise FileExistsError(ncfilename) + + ncgenargs="-knc%s" % formatcodes[format] + subprocess.run(["ncgen", ncgenargs, "-o", str(ncfilename), str(filepath)], check=True) + return Dataset(ncfilename, mode=mode) + + def tocdl(self,coordvars=False,data=False,outfile=None): + """ +**`tocdl(self, coordvars=False, data=False, outfile=None)`** + +call [ncdump][ncdump] via subprocess to create [CDL][cdl] +text representation of Dataset. Requires [ncdump][ncdump] +to be installed and in `$PATH`. + +**`coordvars`**: include coordinate variable data (via `ncdump -c`). Default False + +**`data`**: if True, write out variable data (Default False). + +**`outfile`**: If not None, file to output ncdump to. Default is to return a string. + +[ncdump]: https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_utilities_guide.html#ncdump_guide +[cdl]: https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_utilities_guide.html#cdl_guide + """ + self.sync() + if coordvars: + ncdumpargs = "-cs" + else: + ncdumpargs = "-s" + if not data: ncdumpargs += "h" + result=subprocess.run(["ncdump", ncdumpargs, self.filepath()], + check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, + encoding='utf-8') + if outfile is None: + return result.stdout + else: + f = open(outfile,'w') + f.write(result.stdout) + f.close() + + def has_blosc_filter(self): + """**`has_blosc_filter(self)`** + returns True if blosc compression filter is available + """ + + #if __has_blosc_support__: + # return True + #else: + # return False + + cdef int ierr + with nogil: + ierr = nc_inq_filter_avail(self._grpid, H5Z_FILTER_BLOSC) + return ierr == 0 + + def has_zstd_filter(self): + """**`has_zstd_filter(self)`** + returns True if zstd compression filter is available + """ + + #if __has_zstandard_support__: + # return True + #else: + # return False + + cdef int ierr + with nogil: + ierr = nc_inq_filter_avail(self._grpid, H5Z_FILTER_ZSTD) + return ierr == 0 + + def has_bzip2_filter(self): + """**`has_bzip2_filter(self)`** + returns True if bzip2 compression filter is available + """ + + #if __has_bzip2_support__: + # return True + #else: + # return False + + cdef int ierr + with nogil: + ierr = nc_inq_filter_avail(self._grpid, H5Z_FILTER_BZIP2) + return ierr == 0 + + def has_szip_filter(self): + """**`has_szip_filter(self)`** + returns True if szip compression filter is available + """ + + #if not __has_ncfilter__: + # return __has_szip_support__ + + #if __has_szip_support__: + # return True + #else: + # return False + + cdef int ierr + with nogil: + ierr = nc_inq_filter_avail(self._grpid, H5Z_FILTER_SZIP) + return ierr == 0 + cdef class Group(Dataset): """ Groups define a hierarchical namespace within a netCDF file. They are -analogous to directories in a unix filesystem. Each `netCDF4.Group` behaves like -a `netCDF4.Dataset` within a Dataset, and can contain it's own variables, -dimensions and attributes (and other Groups). See `netCDF4.Group.__init__` +analogous to directories in a unix filesystem. Each `Group` behaves like +a `Dataset` within a Dataset, and can contain it's own variables, +dimensions and attributes (and other Groups). See `Group.__init__` for more details. -`netCDF4.Group` inherits from `netCDF4.Dataset`, so all the -`netCDF4.Dataset` class methods and variables are available -to a `netCDF4.Group` instance (except the `close` method). +`Group` inherits from `Dataset`, so all the +`Dataset` class methods and variables are available +to a `Group` instance (except the `close` method). Additional read-only class variables: **`name`**: String describing the group name. """ - # Docstrings for class variables (used by pdoc). - __pdoc__['Group.name']=\ - """A string describing the name of the `netCDF4.Group`.""" def __init__(self, parent, name, **kwargs): """ **`__init__(self, parent, name)`** - `netCDF4.Group` constructor. + `Group` constructor. - **`parent`**: `netCDF4.Group` instance for the parent group. If being created - in the root group, use a `netCDF4.Dataset` instance. + **`parent`**: `Group` instance for the parent group. If being created + in the root group, use a `Dataset` instance. **`name`**: - Name of the group. - ***Note***: `netCDF4.Group` instances should be created using the - `netCDF4.Dataset.createGroup` method of a `netCDF4.Dataset` instance, or - another `netCDF4.Group` instance, not using this class directly. + ***Note***: `Group` instances should be created using the + `Dataset.createGroup` method of a `Dataset` instance, or + another `Group` instance, not using this class directly. """ cdef char *groupname + cdef int ierr, grpid # flag to indicate that Variables in this Group support orthogonal indexing. self.__orthogonal_indexing__ = True # set data_model and file_format attributes. @@ -2844,6 +3665,10 @@ Additional read-only class variables: self.parent = parent # propagate weak reference setting from parent. self.keepweakref = parent.keepweakref + # propagate _ncstring_attrs__ setting from parent. + self._ncstring_attrs__ = parent._ncstring_attrs__ + self.auto_complex = parent.auto_complex + if 'id' in kwargs: self._grpid = kwargs['id'] # get compound, vlen and enum types in this Group. @@ -2851,85 +3676,69 @@ Additional read-only class variables: # get dimensions in this Group. self.dimensions = _get_dims(self) # get variables in this Group. - self.variables = _get_vars(self) + self.variables = _get_vars(self, self.auto_complex) # get groups in this Group. self.groups = _get_grps(self) else: bytestr = _strencode(name) groupname = bytestr - _ensure_nc_success(nc_def_grp(parent._grpid, groupname, &self._grpid)) - self.cmptypes = OrderedDict() - self.vltypes = OrderedDict() - self.enumtypes = OrderedDict() - self.dimensions = OrderedDict() - self.variables = OrderedDict() - self.groups = OrderedDict() + grpid = parent._grpid + with nogil: + ierr = nc_def_grp(grpid, groupname, &self._grpid) + _ensure_nc_success(ierr) + self.cmptypes = dict() + self.vltypes = dict() + self.enumtypes = dict() + self.dimensions = dict() + self.variables = dict() + self.groups = dict() + def close(self): """ **`close(self)`** -overrides `netCDF4.Dataset` close method which does not apply to `netCDF4.Group` -instances, raises IOError.""" - raise IOError('cannot close a `netCDF4.Group` (only applies to Dataset)') - - def _getname(self): - # private method to get name associated with instance. - cdef int ierr - cdef char namstring[NC_MAX_NAME+1] - with nogil: - ierr = nc_inq_grpname(self._grpid, namstring) - _ensure_nc_success(ierr) - return namstring.decode('utf-8') - - property name: - """string name of Group instance""" - def __get__(self): - return self._getname() - def __set__(self,value): - raise AttributeError("name cannot be altered") +overrides `Dataset` close method which does not apply to `Group` +instances, raises OSError.""" + raise OSError('cannot close a `Group` (only applies to Dataset)') cdef class Dimension: """ -A netCDF `netCDF4.Dimension` is used to describe the coordinates of a `netCDF4.Variable`. -See `netCDF4.Dimension.__init__` for more details. +A netCDF `Dimension` is used to describe the coordinates of a `Variable`. +See `Dimension.__init__` for more details. -The current maximum size of a `netCDF4.Dimension` instance can be obtained by -calling the python `len` function on the `netCDF4.Dimension` instance. The -`netCDF4.Dimension.isunlimited` method of a `netCDF4.Dimension` instance can be used to +The current maximum size of a `Dimension` instance can be obtained by +calling the python `len` function on the `Dimension` instance. The +`Dimension.isunlimited` method of a `Dimension` instance can be used to determine if the dimension is unlimited. Read-only class variables: -**`name`**: String name, used when creating a `netCDF4.Variable` with -`netCDF4.Dataset.createVariable`. +**`name`**: String name, used when creating a `Variable` with +`Dataset.createVariable`. -**`size`**: Current `netCDF4.Dimension` size (same as `len(d)`, where `d` is a -`netCDF4.Dimension` instance). +**`size`**: Current `Dimension` size (same as `len(d)`, where `d` is a +`Dimension` instance). """ cdef public int _dimid, _grpid cdef public _data_model, _name, _grp - # Docstrings for class variables (used by pdoc). - __pdoc__['Dimension.name']=\ - """A string describing the name of the `netCDF4.Dimension` - used when creating a - `netCDF4.Variable` instance with `netCDF4.Dataset.createVariable`.""" def __init__(self, grp, name, size=None, **kwargs): """ **`__init__(self, group, name, size=None)`** - `netCDF4.Dimension` constructor. + `Dimension` constructor. - **`group`**: `netCDF4.Group` instance to associate with dimension. + **`group`**: `Group` instance to associate with dimension. **`name`**: Name of the dimension. **`size`**: Size of the dimension. `None` or 0 means unlimited. (Default `None`). - ***Note***: `netCDF4.Dimension` instances should be created using the - `netCDF4.Dataset.createDimension` method of a `netCDF4.Group` or - `netCDF4.Dataset` instance, not using `netCDF4.Dimension.__init__` directly. + ***Note***: `Dimension` instances should be created using the + `Dataset.createDimension` method of a `Group` or + `Dataset` instance, not using `Dimension.__init__` directly. """ cdef int ierr cdef char *dimname @@ -2953,7 +3762,8 @@ Read-only class variables: else: lendim = NC_UNLIMITED if grp.data_model != 'NETCDF4': grp._redef() - ierr = nc_def_dim(self._grpid, dimname, lendim, &self._dimid) + with nogil: + ierr = nc_def_dim(self._grpid, dimname, lendim, &self._dimid) if grp.data_model != 'NETCDF4': grp._enddef() _ensure_nc_success(ierr) @@ -2982,21 +3792,21 @@ Read-only class variables: raise AttributeError("size cannot be altered") def __repr__(self): - if python3: - return self.__unicode__() - else: - return unicode(self).encode('utf-8') + return self.__str__() - def __unicode__(self): + def __str__(self): if not dir(self._grp): return 'Dimension object no longer valid' + typ = repr(type(self)).replace("._netCDF4", "") if self.isunlimited(): - return repr(type(self))+" (unlimited): name = '%s', size = %s\n" % (self._name,len(self)) + return "%r (unlimited): name = '%s', size = %s" %\ + (typ, self._name, len(self)) else: - return repr(type(self))+": name = '%s', size = %s\n" % (self._name,len(self)) + return "%r: name = '%s', size = %s" %\ + (typ, self._name, len(self)) def __len__(self): - # len(`netCDF4.Dimension` instance) returns current size of dimension + # len(`Dimension` instance) returns current size of dimension cdef int ierr cdef size_t lengthp with nogil: @@ -3008,18 +3818,19 @@ Read-only class variables: """ **`group(self)`** -return the group that this `netCDF4.Dimension` is a member of.""" +return the group that this `Dimension` is a member of.""" return self._grp def isunlimited(self): """ **`isunlimited(self)`** -returns `True` if the `netCDF4.Dimension` instance is unlimited, `False` otherwise.""" +returns `True` if the `Dimension` instance is unlimited, `False` otherwise.""" cdef int ierr, n, numunlimdims, ndims, nvars, ngatts, xdimid cdef int *unlimdimids if self._data_model == 'NETCDF4': - ierr = nc_inq_unlimdims(self._grpid, &numunlimdims, NULL) + with nogil: + ierr = nc_inq_unlimdims(self._grpid, &numunlimdims, NULL) _ensure_nc_success(ierr) if numunlimdims == 0: return False @@ -3030,7 +3841,7 @@ returns `True` if the `netCDF4.Dimension` instance is unlimited, `False` otherwi ierr = nc_inq_unlimdims(self._grpid, &numunlimdims, unlimdimids) _ensure_nc_success(ierr) unlimdim_ids = [] - for n from 0 <= n < numunlimdims: + for n in range(numunlimdims): unlimdim_ids.append(unlimdimids[n]) free(unlimdimids) if dimid in unlimdim_ids: @@ -3048,16 +3859,16 @@ returns `True` if the `netCDF4.Dimension` instance is unlimited, `False` otherwi cdef class Variable: """ -A netCDF `netCDF4.Variable` is used to read and write netCDF data. They are -analogous to numpy array objects. See `netCDF4.Variable.__init__` for more +A netCDF `Variable` is used to read and write netCDF data. They are +analogous to numpy array objects. See `Variable.__init__` for more details. A list of attribute names corresponding to netCDF attributes defined for -the variable can be obtained with the `netCDF4.Variable.ncattrs` method. These +the variable can be obtained with the `Variable.ncattrs` method. These attributes can be created by assigning to an attribute of the -`netCDF4.Variable` instance. A dictionary containing all the netCDF attribute +`Variable` instance. A dictionary containing all the netCDF attribute name/value pairs is provided by the `__dict__` attribute of a -`netCDF4.Variable` instance. +`Variable` instance. The following class variables are read-only: @@ -3073,25 +3884,43 @@ variable's data type. **`scale`**: If True, `scale_factor` and `add_offset` are applied, and signed integer data is automatically converted to -unsigned integer data if the `_Unsigned` attribute is set. -Default is `True`, can be reset using `netCDF4.Variable.set_auto_scale` and -`netCDF4.Variable.set_auto_maskandscale` methods. +unsigned integer data if the `_Unsigned` attribute is set to "true" or "True". +Default is `True`, can be reset using `Variable.set_auto_scale` and +`Variable.set_auto_maskandscale` methods. -**`mask`**: If True, data is automatically converted to/from masked +**`mask`**: If True, data is automatically converted to/from masked arrays when missing values or fill values are present. Default is `True`, can be -reset using `netCDF4.Variable.set_auto_mask` and `netCDF4.Variable.set_auto_maskandscale` -methods. +reset using `Variable.set_auto_mask` and `Variable.set_auto_maskandscale` +methods. Only relevant for Variables with primitive or enum types (ignored +for compound and vlen Variables). -**`chartostring`**: If True, data is automatically converted to/from character -arrays to string arrays when the `_Encoding` variable attribute is set. +**`chartostring`**: If True, data is automatically converted to/from character +arrays to string arrays when the `_Encoding` variable attribute is set. Default is `True`, can be reset using -`netCDF4.Variable.set_auto_chartostring` method. +`Variable.set_auto_chartostring` method. -**`least_significant_digit`**: Describes the power of ten of the +**`least_significant_digit`**: Describes the power of ten of the smallest decimal place in the data the contains a reliable value. Data is -truncated to this decimal place when it is assigned to the `netCDF4.Variable` +truncated to this decimal place when it is assigned to the `Variable` instance. If `None`, the data is not truncated. +**`significant_digits`**: New in version 1.6.0. Describes the number of significant +digits in the data the contains a reliable value. Data is +truncated to retain this number of significant digits when it is assigned to the +`Variable` instance. If `None`, the data is not truncated. +Only available with netcdf-c >= 4.9.0, +and only works with `NETCDF4` or `NETCDF4_CLASSIC` formatted files. +The number of significant digits used in the quantization of variable data can be +obtained using the `Variable.significant_digits` method. Default `None` - +no quantization done. + +**`quantize_mode`**: New in version 1.6.0. Controls +the quantization algorithm (default 'BitGroom', 'BitRound' and +'GranularBitRound' also available). The 'GranularBitRound' +algorithm may result in better compression for typical geophysical datasets. +Ignored if `significant_digits` not specified. If 'BitRound' is used, then +`significant_digits` is interpreted as binary (not decimal) digits. + **`__orthogonal_indexing__`**: Always `True`. Indicates to client code that the object supports 'orthogonal indexing', which means that slices that are 1d arrays or lists slice along each dimension independently. This @@ -3105,75 +3934,32 @@ behavior is similar to Fortran or Matlab, but different than numpy. **`size`**: The number of stored elements. """ cdef public int _varid, _grpid, _nunlimdim - cdef public _name, ndim, dtype, mask, scale, chartostring, _isprimitive, _iscompound,\ - _isvlen, _isenum, _grp, _cmptype, _vltype, _enumtype,\ - __orthogonal_indexing__, _has_lsd, _no_get_vars - # Docstrings for class variables (used by pdoc). - __pdoc__['Variable.dimensions'] = \ - """A tuple containing the names of the - dimensions associated with this variable.""" - __pdoc__['Variable.dtype'] = \ - """A numpy dtype object describing the - variable's data type.""" - __pdoc__['Variable.ndim'] = \ - """The number of variable dimensions.""" - __pdoc__['Variable.scale'] = \ - """if True, `scale_factor` and `add_offset` are - applied, and signed integer data is converted to unsigned - integer data if the `_Unsigned` attribute is set. - Default is `True`, can be reset using `netCDF4.Variable.set_auto_scale` and - `netCDF4.Variable.set_auto_maskandscale` methods.""" - __pdoc__['Variable.mask'] = \ - """If True, data is automatically converted to/from masked - arrays when missing values or fill values are present. Default is `True`, can be - reset using `netCDF4.Variable.set_auto_mask` and `netCDF4.Variable.set_auto_maskandscale` - methods.""" - __pdoc__['Variable.chartostring'] = \ - """If True, data is automatically converted to/from character - arrays to string arrays when `_Encoding` variable attribute is set. - Default is `True`, can be reset using - `netCDF4.Variable.set_auto_chartostring` method.""" - __pdoc__['Variable._no_get_vars'] = \ - """If True (default), netcdf routine `nc_get_vars` is not used for strided slicing - slicing. Can be re-set using `netCDF4.Variable.use_nc_get_vars` method.""" - __pdoc__['Variable.least_significant_digit'] = \ - """Describes the power of ten of the - smallest decimal place in the data the contains a reliable value. Data is - truncated to this decimal place when it is assigned to the `netCDF4.Variable` - instance. If `None`, the data is not truncated.""" - __pdoc__['Variable.__orthogonal_indexing__'] = \ - """Always `True`. Indicates to client code - that the object supports 'orthogonal indexing', which means that slices - that are 1d arrays or lists slice along each dimension independently. This - behavior is similar to Fortran or Matlab, but different than numpy.""" - __pdoc__['Variable.datatype'] = \ - """numpy data type (for primitive data types) or - VLType/CompoundType/EnumType instance (for compound, vlen or enum - data types).""" - __pdoc__['Variable.name'] = \ - """String name.""" - __pdoc__['Variable.shape'] = \ - """A tuple with the current shape (length of all dimensions).""" - __pdoc__['Variable.size'] = \ - """The number of stored elements.""" - - def __init__(self, grp, name, datatype, dimensions=(), zlib=False, - complevel=4, shuffle=True, fletcher32=False, contiguous=False, + cdef public _name, ndim, dtype, mask, scale, always_mask, chartostring, _isprimitive, \ + _iscompound, _isvlen, _isenum, _grp, _cmptype, _vltype, _enumtype,\ + __orthogonal_indexing__, _has_lsd, _use_get_vars, _ncstring_attrs__, auto_complex + + def __init__(self, grp, name, datatype, dimensions=(), + compression=None, zlib=False, + complevel=4, shuffle=True, szip_coding='nn', szip_pixels_per_block=8, + blosc_shuffle=1, + fletcher32=False, contiguous=False, chunksizes=None, endian='native', least_significant_digit=None, - fill_value=None, chunk_cache=None, **kwargs): + significant_digits=None,quantize_mode='BitGroom',fill_value=None, chunk_cache=None, + **kwargs): """ - **`__init__(self, group, name, datatype, dimensions=(), zlib=False, - complevel=4, shuffle=True, fletcher32=False, contiguous=False, + **`__init__(self, group, name, datatype, dimensions=(), compression=None, zlib=False, + complevel=4, shuffle=True, szip_coding='nn', szip_pixels_per_block=8, + blosc_shuffle=1, fletcher32=False, contiguous=False, chunksizes=None, endian='native', - least_significant_digit=None,fill_value=None)`** + least_significant_digit=None,fill_value=None,chunk_cache=None)`** - `netCDF4.Variable` constructor. + `Variable` constructor. - **`group`**: `netCDF4.Group` or `netCDF4.Dataset` instance to associate with variable. + **`group`**: `Group` or `Dataset` instance to associate with variable. **`name`**: Name of the variable. - **`datatype`**: `netCDF4.Variable` data type. Can be specified by providing a + **`datatype`**: `Variable` data type. Can be specified by providing a numpy dtype object, or a string that describes a numpy dtype object. Supported values, corresponding to `str` attribute of numpy dtype objects, include `'f4'` (32-bit floating point), `'f8'` (64-bit floating @@ -3186,41 +3972,64 @@ behavior is similar to Fortran or Matlab, but different than numpy. typecodes can also be used (`'f'` instead of `'f4'`, `'d'` instead of `'f8'`, `'h'` or `'s'` instead of `'i2'`, `'b'` or `'B'` instead of `'i1'`, `'c'` instead of `'S1'`, and `'i'` or `'l'` instead of - `'i4'`). `datatype` can also be a `netCDF4.CompoundType` instance - (for a structured, or compound array), a `netCDF4.VLType` instance + `'i4'`). `datatype` can also be a `CompoundType` instance + (for a structured, or compound array), a `VLType` instance (for a variable-length array), or the python `str` builtin (for a variable-length string array). Numpy string and unicode datatypes with length greater than one are aliases for `str`. - - **`dimensions`**: a tuple containing the variable's dimension names + + **`dimensions`**: a tuple containing the variable's Dimension instances (defined previously with `createDimension`). Default is an empty tuple which means the variable is a scalar (and therefore has no dimensions). - - **`zlib`**: if `True`, data assigned to the `netCDF4.Variable` - instance is compressed on disk. Default `False`. - - **`complevel`**: the level of zlib compression to use (1 is the fastest, + + **`compression`**: compression algorithm to use. + Currently `zlib`,`szip`,`zstd`,`bzip2`,`blosc_lz`,`blosc_lz4`,`blosc_lz4hc`, + `blosc_zlib` and `blosc_zstd` are supported. + Default is `None` (no compression). All of the compressors except + `zlib` and `szip` use the HDF5 plugin architecture. + + **`zlib`**: if `True`, data assigned to the `Variable` + instance is compressed on disk. Default `False`. Deprecated - use + `compression='zlib'` instead. + + **`complevel`**: the level of compression to use (1 is the fastest, but poorest compression, 9 is the slowest but best compression). Default 4. - Ignored if `zlib=False`. - + Ignored if `compression=None` or `szip`. A value of 0 disables compression. + **`shuffle`**: if `True`, the HDF5 shuffle filter is applied - to improve compression. Default `True`. Ignored if `zlib=False`. - + to improve zlib compression. Default `True`. Ignored unless `compression = 'zlib'`. + + **`blosc_shuffle`**: shuffle filter inside blosc compressor (only + relevant if compression kwarg set to one of the blosc compressors). + Can be 0 (no blosc shuffle), 1 (bytewise shuffle) or 2 (bitwise + shuffle)). Default is 1. Ignored if blosc compressor not used. + + **`szip_coding`**: szip coding method. Can be `ec` (entropy coding) + or `nn` (nearest neighbor coding). Default is `nn`. + Ignored if szip compressor not used. + + **`szip_pixels_per_block`**: Can be 4,8,16 or 32 (Default 8). + Ignored if szip compressor not used. + **`fletcher32`**: if `True` (default `False`), the Fletcher32 checksum algorithm is used for error detection. - + **`contiguous`**: if `True` (default `False`), the variable data is stored contiguously on disk. Default `False`. Setting to `True` for - a variable with an unlimited dimension will trigger an error. - + a variable with an unlimited dimension will trigger an error. Fixed + size variables (with no unlimited dimension) with no compression + filters are contiguous by default. + **`chunksizes`**: Can be used to specify the HDF5 chunksizes for each dimension of the variable. A detailed discussion of HDF chunking and I/O performance is available - [here](http://www.hdfgroup.org/HDF5/doc/H5.user/Chunking.html). + [here](https://support.hdfgroup.org/HDF5/doc/Advanced/Chunking). + The default chunking scheme in the netcdf-c library is discussed + [here](https://www.unidata.ucar.edu/software/netcdf/documentation/NUG/netcdf_perf_chunking.html). Basically, you want the chunk size for each dimension to match as closely as possible the size of the data block that users will read from the file. `chunksizes` cannot be set if `contiguous=True`. - + **`endian`**: Can be used to control whether the data is stored in little or big endian format on disk. Possible values are `little, big` or `native` (default). The library @@ -3229,45 +4038,122 @@ behavior is similar to Fortran or Matlab, but different than numpy. opposite format as the one used to create the file, there may be some performance advantage to be gained by setting the endian-ness. For netCDF 3 files (that don't use HDF5), only `endian='native'` is allowed. - - The `zlib, complevel, shuffle, fletcher32, contiguous` and `chunksizes` + + The `compression, zlib, complevel, shuffle, fletcher32, contiguous` and `chunksizes` keywords are silently ignored for netCDF 3 files that do not use HDF5. - - **`least_significant_digit`**: If specified, variable data will be - truncated (quantized). In conjunction with `zlib=True` this produces + + **`least_significant_digit`**: If this or `significant_digits` are specified, + variable data will be truncated (quantized). + In conjunction with `compression='zlib'` this produces 'lossy', but significantly more efficient compression. For example, if `least_significant_digit=1`, data will be quantized using around(scale*data)/scale, where scale = 2**bits, and bits is determined so that a precision of 0.1 is retained (in this case bits=4). Default is `None`, or no quantization. - - **`fill_value`**: If specified, the default netCDF `_FillValue` (the - value that the variable gets filled with before any data is written to it) - is replaced with this value. If fill_value is set to `False`, then - the variable is not pre-filled. The default netCDF fill values can be found - in `netCDF4.default_fillvals`. - ***Note***: `netCDF4.Variable` instances should be created using the - `netCDF4.Dataset.createVariable` method of a `netCDF4.Dataset` or - `netCDF4.Group` instance, not using this class directly. + **`significant_digits`**: New in version 1.6.0. + As described for `least_significant_digit` + except the number of significant digits retained is prescribed independent + of the floating point exponent. Default `None` - no quantization done. + + **`quantize_mode`**: New in version 1.6.0. Controls + the quantization algorithm (default 'BitGroom', 'BitRound' and + 'GranularBitRound' also available). The 'GranularBitRound' + algorithm may result in better compression for typical geophysical datasets. + Ignored if `significant_digts` not specified. If 'BitRound' is used, then + `significant_digits` is interpreted as binary (not decimal) digits. + + **`fill_value`**: If specified, the default netCDF fill value (the + value that the variable gets filled with before any data is written to it) + is replaced with this value, and the `_FillValue` attribute is set. + If fill_value is set to `False`, then the variable is not pre-filled. + The default netCDF fill values can be found in the dictionary `netCDF4.default_fillvals`. + If not set, the default fill value will be used but no `_FillValue` attribute will be created + (this is the default behavior of the netcdf-c library). If you want to use the + default fill value, but have the `_FillValue` attribute set, use + `fill_value='default'` (note - this only works for primitive data types). `Variable.get_fill_value` + can be used to retrieve the fill value, even if the `_FillValue` attribute is not set. + + **`chunk_cache`**: If specified, sets the chunk cache size for this variable. + Persists as long as Dataset is open. Use `set_var_chunk_cache` to + change it when Dataset is re-opened. + + ***Note***: `Variable` instances should be created using the + `Dataset.createVariable` method of a `Dataset` or + `Group` instance, not using this class directly. """ - cdef int ierr, ndims, icontiguous, ideflate_level, numdims, _grpid + cdef int ierr, ndims, icontiguous, icomplevel, numdims, _grpid, nsd, + cdef unsigned int iblosc_complevel,iblosc_blocksize,iblosc_compressor,iblosc_shuffle + cdef int iszip_coding, iszip_pixels_per_block + cdef char namstring[NC_MAX_NAME+1] cdef char *varname cdef nc_type xtype - cdef int *dimids + cdef int *dimids = NULL cdef size_t sizep, nelemsp cdef size_t *chunksizesp cdef float preemptionp + cdef int nc_complex_typeid, complex_base_type_id, complex_dim_id + cdef int _nc_endian + + # Extra information for more helpful error messages + error_info = f"(variable '{name}', group '{grp.name}')" + # flag to indicate that orthogonal indexing is supported self.__orthogonal_indexing__ = True - # if complevel is set to zero, set zlib to False. + # For backwards compatibility, deprecated zlib kwarg takes + # precedence if compression kwarg not set. + if zlib and not compression: + compression = 'zlib' + # if complevel is set to zero, turn off compression if not complevel: - zlib = False - # if dimensions is a string, convert to a tuple - # this prevents a common error that occurs when - # dimensions = 'lat' instead of ('lat',) - if type(dimensions) == str or type(dimensions) == bytes or type(dimensions) == unicode: - dimensions = dimensions, + compression = None + zlib = False + szip = False + zstd = False + bzip2 = False + blosc_lz = False + blosc_lz4 = False + blosc_lz4hc = False + #blosc_snappy = False + blosc_zlib = False + blosc_zstd = False + if compression == 'zlib': + zlib = True + elif compression == 'szip': + szip = True + elif compression == 'zstd': + zstd = True + elif compression == 'bzip2': + bzip2 = True + elif compression == 'blosc_lz': + blosc_lz = True + elif compression == 'blosc_lz4': + blosc_lz4 = True + elif compression == 'blosc_lz4hc': + blosc_lz4hc = True + #elif compression == 'blosc_snappy': + # blosc_snappy = True + elif compression == 'blosc_zlib': + blosc_zlib = True + elif compression == 'blosc_zstd': + blosc_zstd = True + elif not compression: + compression = None # if compression evaluates to False, set to None. + pass + else: + raise ValueError(f"Unsupported value for compression kwarg {error_info}") + + if grp.data_model.startswith("NETCDF3") and endian != 'native': + raise RuntimeError( + f"only endian='native' allowed for NETCDF3 files, got '{endian}' {error_info}" + ) + + if endian not in ("little", "big", "native"): + raise ValueError( + f"'endian' keyword argument must be 'little','big' or 'native', got '{endian}' " + f"{error_info}" + ) + self._grpid = grp._grpid # make a weakref to group to avoid circular ref (issue 218) # keep strong reference the default behaviour (issue 251) @@ -3275,10 +4161,13 @@ behavior is similar to Fortran or Matlab, but different than numpy. self._grp = weakref.proxy(grp) else: self._grp = grp - user_type = isinstance(datatype, CompoundType) or \ - isinstance(datatype, VLType) or \ - isinstance(datatype, EnumType) or \ - datatype == str + + self._iscompound = isinstance(datatype, CompoundType) + self._isvlen = isinstance(datatype, VLType) or datatype==str + self._isenum = isinstance(datatype, EnumType) + + user_type = self._iscompound or self._isvlen or self._isenum + # convert to a real numpy datatype object if necessary. if not user_type and type(datatype) != numpy.dtype: datatype = numpy.dtype(datatype) @@ -3289,35 +4178,29 @@ behavior is similar to Fortran or Matlab, but different than numpy. datatype.kind == 'U')): datatype = str user_type = True + self._isvlen = True + + # If datatype is complex, convert to compoundtype + is_complex = dtype_is_complex(datatype) + if is_complex and not self._grp.auto_complex: + raise ValueError( + f"complex datatypes ({datatype}) are only supported with `auto_complex=True`" + ) + # check if endian keyword consistent with datatype specification. - dtype_endian = getattr(datatype,'byteorder',None) - if dtype_endian == '=': dtype_endian='native' - if dtype_endian == '>': dtype_endian='big' - if dtype_endian == '<': dtype_endian='little' - if dtype_endian == '|': dtype_endian=None + dtype_endian = _dtype_endian_lookup[getattr(datatype, "byteorder", None)] if dtype_endian is not None and dtype_endian != endian: - if dtype_endian == 'native' and endian == sys.byteorder: - pass - else: - # endian keyword prevails, issue warning - msg = 'endian-ness of dtype and endian kwarg do not match, using endian kwarg' - #msg = 'endian-ness of dtype and endian kwarg do not match, dtype over-riding endian kwarg' - warnings.warn(msg) - #endian = dtype_endian # dtype prevails + if not (dtype_endian == 'native' and endian == sys.byteorder): + warnings.warn('endian-ness of dtype and endian kwarg do not match, using endian kwarg') + # check validity of datatype. - self._isprimitive = False - self._iscompound = False - self._isvlen = False - self._isenum = False + self._isprimitive = not user_type if user_type: - if isinstance(datatype, CompoundType): - self._iscompound = True + if self._iscompound: self._cmptype = datatype - if isinstance(datatype, VLType) or datatype==str: - self._isvlen = True + if self._isvlen: self._vltype = datatype - if isinstance(datatype, EnumType): - self._isenum = True + if self._isenum: self._enumtype = datatype if datatype==str: if grp.data_model != 'NETCDF4': @@ -3325,21 +4208,29 @@ behavior is similar to Fortran or Matlab, but different than numpy. 'Variable length strings are only supported for the ' 'NETCDF4 format. For other formats, consider using ' 'netCDF4.stringtochar to convert string arrays into ' - 'character arrays with an additional dimension.') + 'character arrays with an additional dimension.' + f' {error_info}') datatype = VLType(self._grp, str, None) self._vltype = datatype xtype = datatype._nc_type + # make sure this a valid user defined datatype defined in this Group + with nogil: + ierr = nc_inq_type(self._grpid, xtype, namstring, NULL) + _ensure_nc_success(ierr, extra_msg=error_info) # dtype variable attribute is a numpy datatype object. self.dtype = datatype.dtype elif datatype.str[1:] in _supportedtypes: - self._isprimitive = True # find netCDF primitive data type corresponding to # specified numpy data type. xtype = _nptonctype[datatype.str[1:]] # dtype variable attribute is a numpy datatype object. self.dtype = datatype + elif is_complex: + xtype = _complex_types[datatype.str[1:]] + self.dtype = datatype else: - raise TypeError('illegal primitive data type, must be one of %s, got %s' % (_supportedtypes,datatype)) + raise TypeError(f'Illegal primitive data type, must be one of {_supportedtypes}, got {datatype} {error_info}') + if 'id' in kwargs: self._varid = kwargs['id'] else: @@ -3348,68 +4239,126 @@ behavior is similar to Fortran or Matlab, but different than numpy. ndims = len(dimensions) # find dimension ids. if ndims: - dims = [] dimids = malloc(sizeof(int) * ndims) - for n from 0 <= n < ndims: - dimname = dimensions[n] - # look for dimension in this group, and if not - # found there, look in parent (and it's parent, etc, back to root). - dim = _find_dim(grp, dimname) - if dim is None: - raise KeyError("dimension %s not defined in group %s or any group in it's family tree" % (dimname, grp.path)) - dimids[n] = dim._dimid - dims.append(dim) + for n in range(ndims): + dimids[n] = dimensions[n]._dimid # go into define mode if it's a netCDF 3 compatible # file format. Be careful to exit define mode before # any exceptions are raised. if grp.data_model != 'NETCDF4': grp._redef() # define variable. - if ndims: - ierr = nc_def_var(self._grpid, varname, xtype, ndims, + with nogil: + ierr = pfnc_def_var(self._grpid, varname, xtype, ndims, dimids, &self._varid) + if ndims: free(dimids) - else: # a scalar variable. - ierr = nc_def_var(self._grpid, varname, xtype, ndims, - NULL, &self._varid) + + if ierr != NC_NOERR: + if grp.data_model != 'NETCDF4': + grp._enddef() + _ensure_nc_success(ierr, extra_msg=error_info) + # set chunk cache size if desired # default is 1mb per var, can cause problems when many (1000's) # of vars are created. This change only lasts as long as file is # open. if grp.data_model.startswith('NETCDF4') and chunk_cache is not None: - ierr = nc_get_var_chunk_cache(self._grpid, self._varid, &sizep, - &nelemsp, &preemptionp) - _ensure_nc_success(ierr) + with nogil: + ierr = nc_get_var_chunk_cache(self._grpid, self._varid, &sizep, + &nelemsp, &preemptionp) + _ensure_nc_success(ierr, extra_msg=error_info) # reset chunk cache size, leave other parameters unchanged. sizep = chunk_cache - ierr = nc_set_var_chunk_cache(self._grpid, self._varid, sizep, - nelemsp, preemptionp) - _ensure_nc_success(ierr) - if ierr != NC_NOERR: - if grp.data_model != 'NETCDF4': grp._enddef() - _ensure_nc_success(ierr) - # set zlib, shuffle, chunking, fletcher32 and endian + with nogil: + ierr = nc_set_var_chunk_cache(self._grpid, self._varid, sizep, + nelemsp, preemptionp) + _ensure_nc_success(ierr, extra_msg=error_info) + + # set compression, shuffle, chunking, fletcher32 and endian # variable settings. # don't bother for NETCDF3* formats. - # for NETCDF3* formats, the zlib,shuffle,chunking, - # and fletcher32 are silently ignored. Only + # for NETCDF3* formats, the comopression,zlib,shuffle,chunking, + # and fletcher32 flags are silently ignored. Only # endian='native' allowed for NETCDF3. if grp.data_model in ['NETCDF4','NETCDF4_CLASSIC']: - # set zlib and shuffle parameters. - if zlib and ndims: # don't bother for scalar variable - ideflate_level = complevel - if shuffle: - ierr = nc_def_var_deflate(self._grpid, self._varid, 1, 1, ideflate_level) - else: - ierr = nc_def_var_deflate(self._grpid, self._varid, 0, 1, ideflate_level) - if ierr != NC_NOERR: - if grp.data_model != 'NETCDF4': grp._enddef() - _ensure_nc_success(ierr) + # set compression and shuffle parameters. + if compression is not None and ndims: # don't bother for scalar variable + if zlib: + icomplevel = complevel + if shuffle: + with nogil: + ierr = nc_def_var_deflate(self._grpid, self._varid, 1, 1, icomplevel) + else: + with nogil: + ierr = nc_def_var_deflate(self._grpid, self._varid, 0, 1, icomplevel) + if ierr != NC_NOERR: + if grp.data_model != 'NETCDF4': grp._enddef() + _ensure_nc_success(ierr, extra_msg=error_info) + + if szip: + if not __has_szip_support__: + raise ValueError("compression='szip' only works if linked version of hdf5 has szip functionality enabled") + try: + iszip_coding = _szip_dict[szip_coding] + except KeyError: + raise ValueError("unknown szip coding ('ec' or 'nn' supported)") + iszip_pixels_per_block = szip_pixels_per_block + with nogil: + ierr = nc_def_var_szip(self._grpid, self._varid, iszip_coding, iszip_pixels_per_block) + if ierr != NC_NOERR: + if grp.data_model != 'NETCDF4': + grp._enddef() + _ensure_nc_success(ierr, extra_msg=error_info) + + if zstd: + if not __has_zstandard_support__: + raise NetCDF4MissingFeatureException("compression='zstd'", "4.9.0") + + icomplevel = complevel + with nogil: + ierr = nc_def_var_zstandard(self._grpid, self._varid, icomplevel) + if ierr != NC_NOERR: + if grp.data_model != 'NETCDF4': + grp._enddef() + _ensure_nc_success(ierr, extra_msg=error_info) + + if bzip2: + if not __has_bzip2_support__: + raise NetCDF4MissingFeatureException("compression='bzip2'", "4.9.0") + + icomplevel = complevel + with nogil: + ierr = nc_def_var_bzip2(self._grpid, self._varid, icomplevel) + if ierr != NC_NOERR: + if grp.data_model != 'NETCDF4': + grp._enddef() + _ensure_nc_success(ierr, extra_msg=error_info) + + if blosc_zstd or blosc_lz or blosc_lz4 or blosc_lz4hc or blosc_zlib: + if not __has_blosc_support__: + raise NetCDF4MissingFeatureException("compression='blosc_*'", "4.9.0") + + iblosc_compressor = _blosc_dict[compression] + iblosc_shuffle = blosc_shuffle + iblosc_blocksize = 0 # not currently used by c lib + iblosc_complevel = complevel + with nogil: + ierr = nc_def_var_blosc(self._grpid, self._varid, + iblosc_compressor, + iblosc_complevel,iblosc_blocksize, + iblosc_shuffle) + if ierr != NC_NOERR: + if grp.data_model != 'NETCDF4': + grp._enddef() + _ensure_nc_success(ierr, extra_msg=error_info) + # set checksum. if fletcher32 and ndims: # don't bother for scalar variable - ierr = nc_def_var_fletcher32(self._grpid, self._varid, 1) + with nogil: + ierr = nc_def_var_fletcher32(self._grpid, self._varid, 1) if ierr != NC_NOERR: if grp.data_model != 'NETCDF4': grp._enddef() - _ensure_nc_success(ierr) + _ensure_nc_success(ierr, extra_msg=error_info) # set chunking stuff. if ndims: # don't bother for scalar variable. if contiguous: @@ -3425,49 +4374,79 @@ behavior is similar to Fortran or Matlab, but different than numpy. if grp.data_model != 'NETCDF4': grp._enddef() raise ValueError('chunksizes must be a sequence with the same length as dimensions') chunksizesp = malloc(sizeof(size_t) * ndims) - for n from 0 <= n < ndims: - if not dims[n].isunlimited() and \ - chunksizes[n] > dims[n].size: + for n in range(ndims): + if not dimensions[n].isunlimited() and \ + chunksizes[n] > dimensions[n].size: msg = 'chunksize cannot exceed dimension size' raise ValueError(msg) chunksizesp[n] = chunksizes[n] if chunksizes is not None or contiguous: - ierr = nc_def_var_chunking(self._grpid, self._varid, icontiguous, chunksizesp) + with nogil: + ierr = nc_def_var_chunking(self._grpid, self._varid, icontiguous, chunksizesp) free(chunksizesp) if ierr != NC_NOERR: if grp.data_model != 'NETCDF4': grp._enddef() - _ensure_nc_success(ierr) + _ensure_nc_success(ierr, extra_msg=error_info) + # set endian-ness of variable - if endian == 'little': - ierr = nc_def_var_endian(self._grpid, self._varid, NC_ENDIAN_LITTLE) - elif endian == 'big': - ierr = nc_def_var_endian(self._grpid, self._varid, NC_ENDIAN_BIG) - elif endian == 'native': - pass # this is the default format. - else: - raise ValueError("'endian' keyword argument must be 'little','big' or 'native', got '%s'" % endian) + if endian != 'native': + _nc_endian = NC_ENDIAN_LITTLE if endian == "little" else NC_ENDIAN_BIG + with nogil: + ierr = nc_def_var_endian(self._grpid, self._varid, _nc_endian) + _ensure_nc_success(ierr, extra_msg=error_info) + + # set quantization + if significant_digits is not None: + if not __has_quantization_support__: + raise ValueError( + "significant_digits kwarg only works with netcdf-c >= 4.9.0. " + "To enable, install Cython, make sure you have version 4.9.0 " + "or higher netcdf-c, and rebuild netcdf4-python. Otherwise, " + f"use least_significant_digit kwarg for quantization. {error_info}" + ) + + nsd = significant_digits + if quantize_mode == 'BitGroom': + with nogil: + ierr = nc_def_var_quantize(self._grpid, + self._varid, NC_QUANTIZE_BITGROOM, nsd) + elif quantize_mode == 'GranularBitRound': + with nogil: + ierr = nc_def_var_quantize(self._grpid, + self._varid, NC_QUANTIZE_GRANULARBR, nsd) + elif quantize_mode == 'BitRound': + ierr = nc_def_var_quantize(self._grpid, + self._varid, NC_QUANTIZE_BITROUND, nsd) + else: + raise ValueError("'quantize_mode' keyword argument must be 'BitGroom','GranularBitRound' or 'BitRound', got '%s'" % quantize_mode) + if ierr != NC_NOERR: if grp.data_model != 'NETCDF4': grp._enddef() - _ensure_nc_success(ierr) - else: - if endian != 'native': - msg="only endian='native' allowed for NETCDF3 files" - raise RuntimeError(msg) + _ensure_nc_success(ierr, extra_msg=error_info) + # set a fill value for this variable if fill_value keyword # given. This avoids the HDF5 overhead of deleting and # recreating the dataset if it is set later (after the enddef). if fill_value is not None: - if not fill_value and isinstance(fill_value,bool): + if fill_value is False: # no filling for this variable if fill_value==False. - if not self._isprimitive: - # no fill values for VLEN and compound variables - # anyway. - ierr = 0 - else: - ierr = nc_def_var_fill(self._grpid, self._varid, 1, NULL) + if self._isprimitive: + with nogil: + ierr = nc_def_var_fill(self._grpid, self._varid, 1, NULL) if ierr != NC_NOERR: if grp.data_model != 'NETCDF4': grp._enddef() - _ensure_nc_success(ierr) + _ensure_nc_success(ierr, extra_msg=error_info) + elif fill_value == 'default': + if self._isprimitive: + fillval = numpy.array(default_fillvals[self.dtype.str[1:]]) + if not fillval.dtype.isnative: fillval.byteswap(True) + _set_att(self._grp, self._varid, '_FillValue',\ + fillval, xtype=xtype) + else: + msg = """ +WARNING: there is no default fill value for this data type, so fill_value='default' +does not do anything.""" + warnings.warn(msg) else: if self._isprimitive or self._isenum or \ (self._isvlen and self.dtype == str): @@ -3485,29 +4464,74 @@ behavior is similar to Fortran or Matlab, but different than numpy. self.least_significant_digit = least_significant_digit # leave define mode if not a NETCDF4 format file. if grp.data_model != 'NETCDF4': grp._enddef() + + # If the variable is a complex number, we need to check if + # it was created using a compound type or a complex + # dimension, and then make the equivalent class in Python + if is_complex: + self._fix_complex_numbers() + # count how many unlimited dimensions there are. self._nunlimdim = 0 - for dimname in dimensions: - # look in current group, and parents for dim. - dim = _find_dim(self._grp, dimname) + for dim in dimensions: if dim.isunlimited(): self._nunlimdim = self._nunlimdim + 1 + # set ndim attribute (number of dimensions). - with nogil: - ierr = nc_inq_varndims(self._grpid, self._varid, &numdims) - _ensure_nc_success(ierr) - self.ndim = numdims + self.ndim = _inq_varndims(self._grpid, self._varid, self._grp.auto_complex) self._name = name # default for automatically applying scale_factor and # add_offset, and converting to/from masked arrays is True. self.scale = True self.mask = True + # issue 809: default for converting arrays with no missing values to + # regular numpy arrays + self.always_mask = True # default is to automatically convert to/from character # to string arrays when _Encoding variable attribute is set. self.chartostring = True + # propagate _ncstring_attrs__ setting from parent group. + self._ncstring_attrs__ = grp._ncstring_attrs__ if 'least_significant_digit' in self.ncattrs(): self._has_lsd = True # avoid calling nc_get_vars for strided slices by default. - self._no_get_vars = True + # a fix for strided slice access using HDF5 was added + # in 4.6.2. + # always use nc_get_vars for strided access with OpenDAP (issue #838). + if __netcdf4libversion__ >= "4.6.2" or\ + self._grp.filepath().startswith('http'): + self._use_get_vars = True + else: + self._use_get_vars = False + + def _fix_complex_numbers(self): + cdef char name[NC_MAX_NAME + 1] + cdef int complex_typeid, complex_dim_id + + error_info = f"(variable '{name}', group '{self._grp.name}')" + + if pfnc_var_is_complex_type(self._grpid, self._varid): + self._isprimitive = False + self._iscompound = True + with nogil: + ierr = pfnc_inq_var_complex_base_type(self._grpid, self._varid, &complex_typeid) + _ensure_nc_success(ierr, extra_msg=error_info) + + np_complex_type = _nctonptype[complex_typeid] + compound_complex_type = f"{np_complex_type}, {np_complex_type}" + + self._cmptype = CompoundType( + self._grp, compound_complex_type, "complex", typeid=complex_typeid + ) + else: + with nogil: + ierr = pfnc_get_complex_dim(self._grpid, &complex_dim_id) + _ensure_nc_success(ierr, extra_msg=error_info) + with nogil: + ierr = nc_inq_dimname(self._grpid, complex_dim_id, name) + _ensure_nc_success(ierr, extra_msg=error_info) + self._grp.dimensions[name.decode("utf-8")] = Dimension( + self._grp, name, size=2, id=complex_dim_id + ) def __array__(self): # numpy special method that returns a numpy array. @@ -3516,49 +4540,48 @@ behavior is similar to Fortran or Matlab, but different than numpy. return self[...] def __repr__(self): - if python3: - return self.__unicode__() - else: - return unicode(self).encode('utf-8') + return self.__str__() - def __unicode__(self): + def __str__(self): cdef int ierr, no_fill if not dir(self._grp): return 'Variable object no longer valid' - ncdump_var = ['%r\n' % type(self)] - dimnames = tuple([_tostr(dimname) for dimname in self.dimensions]) - attrs = [' %s: %s\n' % (name,self.getncattr(name)) for name in\ - self.ncattrs()] + ncdump = [repr(type(self)).replace("._netCDF4", "")] + show_more_dtype = True if self._iscompound: - ncdump_var.append('%s %s(%s)\n' %\ - ('compound',self._name,', '.join(dimnames))) + kind = 'compound' elif self._isvlen: - ncdump_var.append('%s %s(%s)\n' %\ - ('vlen',self._name,', '.join(dimnames))) + kind = 'vlen' elif self._isenum: - ncdump_var.append('%s %s(%s)\n' %\ - ('enum',self._name,', '.join(dimnames))) + kind = 'enum' else: - ncdump_var.append('%s %s(%s)\n' %\ - (self.dtype,self._name,', '.join(dimnames))) - ncdump_var = ncdump_var + attrs - if self._iscompound: - ncdump_var.append('compound data type: %s\n' % self.dtype) - elif self._isvlen: - ncdump_var.append('vlen data type: %s\n' % self.dtype) - elif self._isenum: - ncdump_var.append('enum data type: %s\n' % self.dtype) + show_more_dtype = False + kind = str(self.dtype) + dimnames = tuple(_tostr(dimname) for dimname in self.dimensions) + ncdump.append('%s %s(%s)' %\ + (kind, self._name, ', '.join(dimnames))) + for name in self.ncattrs(): + ncdump.append(' %s: %s' % (name, self.getncattr(name))) + if show_more_dtype: + ncdump.append('%s data type: %s' % (kind, self.dtype)) unlimdims = [] for dimname in self.dimensions: dim = _find_dim(self._grp, dimname) if dim.isunlimited(): unlimdims.append(dimname) - if (self._grp.path != '/'): ncdump_var.append('path = %s\n' % self._grp.path) - ncdump_var.append('unlimited dimensions: %s\n' % ', '.join(unlimdims)) - ncdump_var.append('current shape = %s\n' % repr(self.shape)) - with nogil: - ierr = nc_inq_var_fill(self._grpid,self._varid,&no_fill,NULL) - _ensure_nc_success(ierr) + if (self._grp.path != '/'): ncdump.append('path = %s' % self._grp.path) + ncdump.append('unlimited dimensions: %s' % ', '.join(unlimdims)) + ncdump.append('current shape = %r' % (self.shape,)) + if __netcdf4libversion__ < '4.5.1' and\ + self._grp.file_format.startswith('NETCDF3'): + # issue #908: no_fill not correct for NETCDF3 files before 4.5.1 + # before 4.5.1 there was no way to turn off filling on a + # per-variable basis for classic files. + no_fill=0 + else: + with nogil: + ierr = nc_inq_var_fill(self._grpid,self._varid,&no_fill,NULL) + _ensure_nc_success(ierr) if self._isprimitive: if no_fill != 1: try: @@ -3567,39 +4590,32 @@ behavior is similar to Fortran or Matlab, but different than numpy. except AttributeError: fillval = default_fillvals[self.dtype.str[1:]] if self.dtype.str[1:] in ['u1','i1']: - msg = 'filling on, default _FillValue of %s ignored\n' % fillval + msg = 'filling on, default _FillValue of %s ignored' % fillval else: - msg = 'filling on, default _FillValue of %s used\n' % fillval - ncdump_var.append(msg) + msg = 'filling on, default _FillValue of %s used' % fillval + ncdump.append(msg) else: - ncdump_var.append('filling off\n') + ncdump.append('filling off') - return ''.join(ncdump_var) + return '\n'.join(ncdump) def _getdims(self): # Private method to get variables's dimension names - cdef int ierr, numdims, n, nn + cdef int ierr, dimid cdef char namstring[NC_MAX_NAME+1] - cdef int *dimids - # get number of dimensions for this variable. - with nogil: - ierr = nc_inq_varndims(self._grpid, self._varid, &numdims) - _ensure_nc_success(ierr) - dimids = malloc(sizeof(int) * numdims) - # get dimension ids. - with nogil: - ierr = nc_inq_vardimid(self._grpid, self._varid, dimids) - _ensure_nc_success(ierr) + + dimids = _inq_vardimid(self._grpid, self._varid, self._grp.auto_complex) + # loop over dimensions, retrieve names. dimensions = () - for nn from 0 <= nn < numdims: + for dimid in dimids: with nogil: - ierr = nc_inq_dimname(self._grpid, dimids[nn], namstring) + ierr = nc_inq_dimname(self._grpid, dimid, namstring) _ensure_nc_success(ierr) name = namstring.decode('utf-8') dimensions = dimensions + (name,) - free(dimids) + return dimensions def _getname(self): @@ -3621,7 +4637,7 @@ behavior is similar to Fortran or Matlab, but different than numpy. property datatype: """numpy data type (for primitive data types) or - VLType/CompoundType/EnumType instance + VLType/CompoundType/EnumType instance (for compound, vlen or enum data types)""" def __get__(self): if self._iscompound: @@ -3648,7 +4664,8 @@ behavior is similar to Fortran or Matlab, but different than numpy. property size: """Return the number of stored elements.""" def __get__(self): - return numpy.prod(self.shape) + # issue #957: add int since prod(())=1.0 + return int(numpy.prod(self.shape)) property dimensions: """get variables's dimension names""" @@ -3662,14 +4679,44 @@ behavior is similar to Fortran or Matlab, but different than numpy. """ **`group(self)`** -return the group that this `netCDF4.Variable` is a member of.""" +return the group that this `Variable` is a member of.""" return self._grp + def get_fill_value(self): + """ +**`get_fill_value(self)`** + +return the fill value associated with this `Variable` (returns `None` if data is not +pre-filled). Works even if default fill value was used, and `_FillValue` attribute +does not exist.""" + cdef int ierr, no_fill + with nogil: + ierr = nc_inq_var_fill(self._grpid,self._varid,&no_fill,NULL) + _ensure_nc_success(ierr) + if no_fill == 1: # no filling for this variable + return None + else: + try: + fillval = self._FillValue + return fillval + except AttributeError: + # _FillValue attribute not set, see if we can retrieve _FillValue. + # for primitive data types. + if self._isprimitive: + #return numpy.array(default_fillvals[self.dtype.str[1:]],self.dtype) + fillval = numpy.empty((),self.dtype) + ierr=nc_inq_var_fill(self._grpid,self._varid,&no_fill,PyArray_DATA(fillval)) + _ensure_nc_success(ierr) + return fillval + else: + # no default filling for non-primitive data types. + return None + def ncattrs(self): """ **`ncattrs(self)`** -return netCDF attribute names for this `netCDF4.Variable` in a list.""" +return netCDF attribute names for this `Variable` in a list.""" return _get_att_names(self._grpid, self._varid) def setncattr(self,name,value): @@ -3679,8 +4726,17 @@ return netCDF attribute names for this `netCDF4.Variable` in a list.""" set a netCDF variable attribute using name,value pair. Use if you need to set a netCDF attribute with the same name as one of the reserved python attributes.""" + cdef nc_type xtype + xtype=-99 + # issue #959 - trying to set _FillValue results in mysterious + # error when close method is called so catch it here. It is + # already caught in __setattr__. + if name == '_FillValue': + msg='_FillValue attribute must be set when variable is '+\ + 'created (using fill_value keyword to createVariable)' + raise AttributeError(msg) if self._grp.data_model != 'NETCDF4': self._grp._redef() - _set_att(self._grp, self._varid, name, value) + _set_att(self._grp, self._varid, name, value, xtype=xtype, force_ncstring=self._ncstring_attrs__) if self._grp.data_model != 'NETCDF4': self._grp._enddef() def setncattr_string(self,name,value): @@ -3695,7 +4751,7 @@ Use if you need to set an attribute to an array of variable-length strings.""" xtype=-99 if self._grp.data_model != 'NETCDF4': msg='file format does not support NC_STRING attributes' - raise IOError(msg) + raise OSError(msg) _set_att(self._grp, self._varid, name, value, xtype=xtype, force_ncstring=True) def setncatts(self,attdict): @@ -3734,7 +4790,8 @@ attributes.""" bytestr = _strencode(name) attname = bytestr if self._grp.data_model != 'NETCDF4': self._grp._redef() - ierr = nc_del_att(self._grpid, self._varid, attname) + with nogil: + ierr = nc_del_att(self._grpid, self._varid, attname) if self._grp.data_model != 'NETCDF4': self._grp._enddef() _ensure_nc_success(ierr) @@ -3743,24 +4800,110 @@ attributes.""" **`filters(self)`** return dictionary containing HDF5 filter parameters.""" - cdef int ierr,ideflate,ishuffle,ideflate_level,ifletcher32 - filtdict = {'zlib':False,'shuffle':False,'complevel':0,'fletcher32':False} + cdef int ierr,ideflate,ishuffle,icomplevel,ifletcher32 + cdef int izstd=0 + cdef int ibzip2=0 + cdef int iblosc=0 + cdef int iszip=0 + cdef int iszip_coding=0 + cdef int iszip_pixels_per_block=0 + cdef int icomplevel_zstd=0 + cdef int icomplevel_bzip2=0 + cdef unsigned int iblosc_shuffle=0 + cdef unsigned int iblosc_compressor=0 + cdef unsigned int iblosc_blocksize=0 + cdef unsigned int iblosc_complevel=0 + filtdict = {'zlib':False,'szip':False,'zstd':False,'bzip2':False,'blosc':False,'shuffle':False,'complevel':0,'fletcher32':False} if self._grp.data_model not in ['NETCDF4_CLASSIC','NETCDF4']: return with nogil: - ierr = nc_inq_var_deflate(self._grpid, self._varid, &ishuffle, &ideflate, &ideflate_level) + ierr = nc_inq_var_deflate(self._grpid, self._varid, &ishuffle, &ideflate, &icomplevel) _ensure_nc_success(ierr) with nogil: ierr = nc_inq_var_fletcher32(self._grpid, self._varid, &ifletcher32) _ensure_nc_success(ierr) + if __has_zstandard_support__: + with nogil: + ierr = nc_inq_var_zstandard(self._grpid, self._varid, &izstd,\ + &icomplevel_zstd) + if ierr != 0: izstd=0 + # _ensure_nc_success(ierr) + if __has_bzip2_support__: + with nogil: + ierr = nc_inq_var_bzip2(self._grpid, self._varid, &ibzip2,\ + &icomplevel_bzip2) + if ierr != 0: ibzip2=0 + #_ensure_nc_success(ierr) + if __has_blosc_support__: + with nogil: + ierr = nc_inq_var_blosc(self._grpid, self._varid, &iblosc,\ + &iblosc_compressor,&iblosc_complevel,&iblosc_blocksize,&iblosc_shuffle) + if ierr != 0: iblosc=0 + #_ensure_nc_success(ierr) + if __has_szip_support__: + with nogil: + ierr = nc_inq_var_szip(self._grpid, self._varid, &iszip_coding,\ + &iszip_pixels_per_block) + if ierr != 0: + iszip=0 + else: + if iszip_coding: + iszip=1 + else: + iszip=0 + #_ensure_nc_success(ierr) if ideflate: filtdict['zlib']=True - filtdict['complevel']=ideflate_level + filtdict['complevel']=icomplevel + if izstd: + filtdict['zstd']=True + filtdict['complevel']=icomplevel_zstd + if ibzip2: + filtdict['bzip2']=True + filtdict['complevel']=icomplevel_bzip2 + if iblosc: + blosc_compressor = iblosc_compressor + filtdict['blosc']={'compressor':_blosc_dict_inv[blosc_compressor],'shuffle':iblosc_shuffle} + filtdict['complevel']=iblosc_complevel + if iszip: + szip_coding = iszip_coding + filtdict['szip']={'coding':_szip_dict_inv[szip_coding],'pixels_per_block':iszip_pixels_per_block} if ishuffle: filtdict['shuffle']=True if ifletcher32: filtdict['fletcher32']=True return filtdict + def quantization(self): + """ +**`quantization(self)`** + +return number of significant digits and the algorithm used in quantization. +Returns None if quantization not active. +""" + if not __has_quantization_support__: + return None + + cdef int ierr, nsd, quantize_mode + if self._grp.data_model not in ['NETCDF4_CLASSIC','NETCDF4']: + return None + + with nogil: + ierr = nc_inq_var_quantize(self._grpid, self._varid, &quantize_mode, &nsd) + _ensure_nc_success(ierr) + if quantize_mode == NC_NOQUANTIZE: + return None + + if quantize_mode == NC_QUANTIZE_GRANULARBR: + sig_digits = nsd + quant_mode = 'GranularBitRound' + elif quantize_mode == NC_QUANTIZE_BITROUND: + sig_digits = nsd # interpreted as bits, not decimal + quant_mode = 'BitRound' + else: + sig_digits = nsd + quant_mode = 'BitGroom' + return sig_digits, quant_mode + def endian(self): """ **`endian(self)`** @@ -3796,7 +4939,7 @@ each dimension is returned.""" ierr = nc_inq_var_chunking(self._grpid, self._varid, &icontiguous, chunksizesp) _ensure_nc_success(ierr) chunksizes=[] - for n from 0 <= n < ndims: + for n in range(ndims): chunksizes.append(chunksizesp[n]) free(chunksizesp) if icontiguous: @@ -3845,8 +4988,9 @@ details.""" preemptionp = preemption else: preemptionp = preemption_orig - ierr = nc_set_var_chunk_cache(self._grpid, self._varid, sizep, - nelemsp, preemptionp) + with nogil: + ierr = nc_set_var_chunk_cache(self._grpid, self._varid, sizep, + nelemsp, preemptionp) _ensure_nc_success(ierr) def __delattr__(self,name): @@ -3904,7 +5048,8 @@ details.""" values = [] for name in names: values.append(_get_att(self._grp, self._varid, name)) - return OrderedDict(zip(names,values)) + return dict(zip(names, values)) + else: raise AttributeError elif name in _private_atts: @@ -3916,7 +5061,7 @@ details.""" """ **`renameAttribute(self, oldname, newname)`** -rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" +rename a `Variable` attribute named `oldname` to `newname`.""" cdef int ierr cdef char *oldnamec cdef char *newnamec @@ -3924,7 +5069,8 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" oldnamec = bytestr bytestr = _strencode(newname) newnamec = bytestr - ierr = nc_rename_att(self._grpid, self._varid, oldnamec, newnamec) + with nogil: + ierr = nc_rename_att(self._grpid, self._varid, oldnamec, newnamec) _ensure_nc_success(ierr) def __getitem__(self, elem): @@ -3934,7 +5080,7 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" # arguments to the nc_get_var() function, and is much more easy # to use. start, count, stride, put_ind =\ - _StartCountStride(elem,self.shape,dimensions=self.dimensions,grp=self._grp,no_get_vars=self._no_get_vars) + _StartCountStride(elem,self.shape,dimensions=self.dimensions,grp=self._grp,use_get_vars=self._use_get_vars) datashape = _out_array_shape(count) if self._isvlen: data = numpy.empty(datashape, dtype='O') @@ -3947,7 +5093,7 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" # put_ind for this dimension is set to -1 by _StartCountStride. squeeze = data.ndim * [slice(None),] for i,n in enumerate(put_ind.shape[:-1]): - if n == 1 and put_ind[...,i].ravel()[0] == -1: + if n == 1 and put_ind.size > 0 and put_ind[...,i].ravel()[0] == -1: squeeze[i] = 0 # Reshape the arrays so we can iterate over them. @@ -3967,7 +5113,11 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" # special case of scalar VLEN data[0] = datout else: - data[tuple(i)] = datout.reshape(shape) + if self._isvlen and not shape: + # issue #1306 - convert length 1 object array to string + data[tuple(i)] = datout.item() + else: + data[tuple(i)] = datout.reshape(shape) # Remove extra singleton dimensions. if hasattr(data,'shape'): @@ -3984,7 +5134,8 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" # set_auto_mask or set_auto_maskandscale), perform # automatic conversion to masked array using # missing_value/_Fill_Value. - # ignore for compound, vlen or enum datatypes. + # applied for primitive and (non-string) vlen, + # ignored for compound and enum datatypes. try: # check to see if scale_factor and add_offset is valid (issue 176). if hasattr(self,'scale_factor'): float(self.scale_factor) if hasattr(self,'add_offset'): float(self.add_offset) @@ -3995,29 +5146,34 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" msg = 'invalid scale_factor or add_offset attribute, no unpacking done...' warnings.warn(msg) - if self.mask and (self._isprimitive or self._isenum): + if self.mask and (self._isprimitive or self._isenum):\ data = self._toma(data) - - # if attribute _Unsigned is True, and variable has signed integer - # dtype, return view with corresponding unsigned dtype (issue #656) - if self.scale: # only do this if autoscale option is on. - is_unsigned = getattr(self, '_Unsigned', False) - if is_unsigned and data.dtype.kind == 'i': - data = data.view('u%s' % data.dtype.itemsize) - - if self.scale and self._isprimitive and valid_scaleoffset: - # if variable has scale_factor and add_offset attributes, rescale. - if hasattr(self, 'scale_factor') and hasattr(self, 'add_offset') and\ - (self.add_offset != 0.0 or self.scale_factor != 1.0): - data = data*self.scale_factor + self.add_offset - # else if variable has only scale_factor attributes, rescale. + else: + # if attribute _Unsigned is "true", and variable has signed integer + # dtype, return view with corresponding unsigned dtype (issue #656) + if self.scale: # only do this if autoscale option is on. + is_unsigned = getattr(self, '_Unsigned', False) in ["true","True"] + if is_unsigned and data.dtype.kind == 'i': + data=data.view('%su%s'%(data.dtype.byteorder,data.dtype.itemsize)) + + if self.scale and\ + (self._isprimitive or (self._isvlen and self.dtype != str)) and\ + valid_scaleoffset: + # if variable has scale_factor and add_offset attributes, apply + # them. + if hasattr(self, 'scale_factor') and hasattr(self, 'add_offset'): + if self.add_offset != 0.0 or self.scale_factor != 1.0: + data = data*self.scale_factor + self.add_offset + else: + data = data.astype(self.scale_factor.dtype) # issue 913 + # else if variable has only scale_factor attribute, rescale. elif hasattr(self, 'scale_factor') and self.scale_factor != 1.0: data = data*self.scale_factor - # else if variable has only add_offset attributes, rescale. + # else if variable has only add_offset attribute, add offset. elif hasattr(self, 'add_offset') and self.add_offset != 0.0: data = data + self.add_offset - # if _Encoding is specified for a character variable, return + # if _Encoding is specified for a character variable, return # a numpy array of strings with one less dimension. if self.chartostring and getattr(self.dtype,'kind',None) == 'S' and\ getattr(self.dtype,'itemsize',None) == 1: @@ -4031,25 +5187,43 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" # also make sure slice is along last dimension matchdim = True for cnt in count: - if cnt[-1] != self.shape[-1]: + if cnt[-1] != self.shape[-1]: matchdim = False break if matchdim: data = chartostring(data, encoding=encoding) + # if structure array contains char arrays, return view as strings + # if _Encoding att set (issue #773) + if self._iscompound and \ + self._cmptype.dtype != self._cmptype.dtype_view and \ + self.chartostring: +# self.chartostring and getattr(self,'_Encoding',None) is not None: + data = data.view(self._cmptype.dtype_view) return data def _toma(self,data): cdef int ierr, no_fill + # if attribute _Unsigned is "true", and variable has signed integer + # dtype, return view with corresponding unsigned dtype (issues #656, + # #794) + # _Unsigned attribute must be "true" or "True" (string). Issue #1232. + is_unsigned = getattr(self, '_Unsigned', False) in ["True","true"] + is_unsigned_int = is_unsigned and data.dtype.kind == 'i' + if self.scale and is_unsigned_int: # only do this if autoscale option is on. + dtype_unsigned_int='%su%s' % (data.dtype.byteorder,data.dtype.itemsize) + data = data.view(dtype_unsigned_int) # private function for creating a masked array, masking missing_values # and/or _FillValues. - totalmask = numpy.zeros(data.shape, numpy.bool) + totalmask = numpy.zeros(data.shape, numpy.bool_) fill_value = None safe_missval = self._check_safecast('missing_value') if safe_missval: mval = numpy.array(self.missing_value, self.dtype) - # create mask from missing values. - mvalmask = numpy.zeros(data.shape, numpy.bool) + if self.scale and is_unsigned_int: + mval = mval.view(dtype_unsigned_int) + # create mask from missing values. + mvalmask = numpy.zeros(data.shape, numpy.bool_) if mval.shape == (): # mval a scalar. mval = [mval] # make into iterable. for m in mval: @@ -4058,12 +5232,12 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" mvalisnan = numpy.isnan(m) except TypeError: # isnan fails on some dtypes (issue 206) mvalisnan = False - if mvalisnan: + if mvalisnan: mvalmask += numpy.isnan(data) else: mvalmask += data==m if mvalmask.any(): - # set fill_value for masked array + # set fill_value for masked array # to missing_value (or 1st element # if missing_value is a vector). fill_value = mval[0] @@ -4072,6 +5246,8 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" safe_fillval = self._check_safecast('_FillValue') if safe_fillval: fval = numpy.array(self._FillValue, self.dtype) + if self.scale and is_unsigned_int: + fval = fval.view(dtype_unsigned_int) # is _FillValue a NaN? try: fvalisnan = numpy.isnan(fval) @@ -4090,9 +5266,16 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" # issue 209: don't return masked array if variable filling # is disabled. else: - with nogil: - ierr = nc_inq_var_fill(self._grpid,self._varid,&no_fill,NULL) - _ensure_nc_success(ierr) + if __netcdf4libversion__ < '4.5.1' and\ + self._grp.file_format.startswith('NETCDF3'): + # issue #908: no_fill not correct for NETCDF3 files before 4.5.1 + # before 4.5.1 there was no way to turn off filling on a + # per-variable basis for classic files. + no_fill=0 + else: + with nogil: + ierr = nc_inq_var_fill(self._grpid,self._varid,&no_fill,NULL) + _ensure_nc_success(ierr) # if no_fill is not 1, and not a byte variable, then use default fill value. # from http://www.unidata.ucar.edu/software/netcdf/docs/netcdf-c/Fill-Values.html#Fill-Values # "If you need a fill value for a byte variable, it is recommended @@ -4105,7 +5288,8 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" # type, signed or unsigned, because the byte ranges are too # small to assume one of the values should appear as a missing # value unless a _FillValue attribute is set explicitly." - if no_fill != 1 and self.dtype.str[1:] not in ['u1','i1']: + # (do this only for non-vlens, since vlens don't have a default _FillValue) + if not self._isvlen and (no_fill != 1 or self.dtype.str[1:] not in ['u1','i1']): fillval = numpy.array(default_fillvals[self.dtype.str[1:]],self.dtype) has_fillval = data == fillval # if data is an array scalar, has_fillval will be a boolean. @@ -4126,7 +5310,7 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" safe_validrange = self._check_safecast('valid_range') safe_validmin = self._check_safecast('valid_min') safe_validmax = self._check_safecast('valid_max') - if safe_validrange and len(self.valid_range) == 2: + if safe_validrange and self.valid_range.size == 2: validmin = numpy.array(self.valid_range[0], self.dtype) validmax = numpy.array(self.valid_range[1], self.dtype) else: @@ -4134,12 +5318,16 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" validmin = numpy.array(self.valid_min, self.dtype) if safe_validmax: validmax = numpy.array(self.valid_max, self.dtype) + if validmin is not None and self.scale and is_unsigned_int: + validmin = validmin.view(dtype_unsigned_int) + if validmax is not None and self.scale and is_unsigned_int: + validmax = validmax.view(dtype_unsigned_int) # http://www.unidata.ucar.edu/software/netcdf/docs/attribute_conventions.html). - # "If the data type is byte and _FillValue + # "If the data type is byte and _FillValue # is not explicitly defined, # then the valid range should include all possible values. # Otherwise, the valid range should exclude the _FillValue - # (whether defined explicitly or by default) as follows. + # (whether defined explicitly or by default) as follows. # If the _FillValue is positive then it defines a valid maximum, # otherwise it defines a valid minimum." byte_type = self.dtype.str[1:] in ['u1','i1'] @@ -4149,26 +5337,88 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" fval = numpy.array(default_fillvals[self.dtype.str[1:]],self.dtype) if byte_type: fval = None if self.dtype.kind != 'S': # don't set mask for character data - if validmin is None and (fval is not None and fval <= 0): - validmin = fval - if validmax is None and (fval is not None and fval > 0): - validmax = fval + # issues #761 and #748: setting valid_min/valid_max to the + # _FillVaue is too surprising for many users (despite the + # netcdf docs attribute best practices suggesting clients + # should do this). + #if validmin is None and (fval is not None and fval <= 0): + # validmin = fval + #if validmax is None and (fval is not None and fval > 0): + # validmax = fval if validmin is not None: totalmask += data < validmin if validmax is not None: totalmask += data > validmax if fill_value is None and fval is not None: fill_value = fval + # if all else fails, use default _FillValue as fill_value + # for masked array. + if fill_value is None: + fill_value = default_fillvals[self.dtype.str[1:]] # create masked array with computed mask - if totalmask.any() and fill_value is not None: + masked_values = bool(totalmask.any()) + if masked_values: data = ma.masked_array(data,mask=totalmask,fill_value=fill_value) - # issue 515 scalar array with mask=True should be converted - # to numpy.ma.MaskedConstant to be consistent with slicing - # behavior of masked arrays. - if data.shape == () and data.mask.all(): - # return a scalar numpy masked constant not a 0-d masked array, - # so that data == numpy.ma.masked. - data = data[()] # changed from [...] (issue #662) + else: + # issue #785: always return masked array, if no values masked + data = ma.masked_array(data) + # issue 515 scalar array with mask=True should be converted + # to numpy.ma.MaskedConstant to be consistent with slicing + # behavior of masked arrays. + if data.shape == () and data.mask.all(): + # return a scalar numpy masked constant not a 0-d masked array, + # so that data == numpy.ma.masked. + data = data[()] # changed from [...] (issue #662) + elif not self.always_mask and not masked_values: + # issue #809: return a regular numpy array if requested + # and there are no missing values + data = numpy.array(data, copy=False) + + return data + + def _pack(self,data): + # pack non-masked values using scale_factor and add_offset + if hasattr(self, 'scale_factor') and hasattr(self, 'add_offset'): + data = (data - self.add_offset)/self.scale_factor + if self.dtype.kind in 'iu': data = numpy.around(data) + elif hasattr(self, 'scale_factor'): + data = data/self.scale_factor + if self.dtype.kind in 'iu': data = numpy.around(data) + elif hasattr(self, 'add_offset'): + data = data - self.add_offset + if self.dtype.kind in 'iu': data = numpy.around(data) + if self.dtype != data.dtype: + data = data.astype(self.dtype) # cast data to var type, if necessary. + if ma.isMA(data): + # if underlying data in masked regions of masked array + # corresponds to missing values, don't fill masked array - + # just use underlying data instead + if hasattr(self, 'missing_value') and \ + numpy.all(numpy.isin(data.data[data.mask],self.missing_value)): + data = data.data + else: + if hasattr(self, 'missing_value'): + # if missing value is a scalar, use it as fill_value. + # if missing value is a vector, raise an exception + # since we then don't know how to fill in masked values. + if numpy.array(self.missing_value).shape == (): + fillval = self.missing_value + else: + msg="cannot assign fill_value for masked array when missing_value attribute is not a scalar" + raise RuntimeError(msg) + if numpy.array(fillval).shape != (): + fillval = fillval[0] + elif hasattr(self, '_FillValue'): + fillval = self._FillValue + else: + fillval = default_fillvals[self.dtype.str[1:]] + # some versions of numpy have trouble handling + # MaskedConstants when filling - this is is + # a workaround (issue #850) + if data.shape == (1,) and data.mask.all(): + data = numpy.array([fillval],self.dtype) + else: + data = data.filled(fill_value=fillval) return data def _assign_vlen(self, elem, data): @@ -4185,7 +5435,7 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" msg="data can only be assigned to VLEN variables using integer indices" # check to see that elem is a tuple of integers. # handle negative integers. - if isinstance(elem, int): + if _is_int(elem): if ndims > 1: raise IndexError(msg) if elem < 0: @@ -4198,7 +5448,7 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" raise IndexError("Illegal index") elemnew = [] for n,e in enumerate(elem): - if not isinstance(e, int): + if not _is_int(e): raise IndexError(msg) elif e < 0: enew = self.shape[n]+e @@ -4219,7 +5469,7 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" count = [1]*ndims startp = malloc(sizeof(size_t) * ndims) countp = malloc(sizeof(size_t) * ndims) - for n from 0 <= n < ndims: + for n in range(ndims): startp[n] = start[n] countp[n] = count[n] if self.dtype == str: # VLEN string @@ -4229,8 +5479,9 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" encoding = getattr(self,'_Encoding','utf-8') bytestr = _strencode(data,encoding=encoding) strdata[0] = bytestr - ierr = nc_put_vara(self._grpid, self._varid, - startp, countp, strdata) + with nogil: + ierr = nc_put_vara(self._grpid, self._varid, + startp, countp, strdata) _ensure_nc_success(ierr) free(strdata) else: # regular VLEN @@ -4239,9 +5490,10 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" data2 = data vldata = malloc(sizeof(nc_vlen_t)) vldata[0].len = PyArray_SIZE(data2) - vldata[0].p = data2.data - ierr = nc_put_vara(self._grpid, self._varid, - startp, countp, vldata) + vldata[0].p = PyArray_DATA(data2) + with nogil: + ierr = nc_put_vara(self._grpid, self._varid, + startp, countp, vldata) _ensure_nc_success(ierr) free(vldata) free(startp) @@ -4250,15 +5502,20 @@ rename a `netCDF4.Variable` attribute named `oldname` to `newname`.""" def _check_safecast(self, attname): # check to see that variable attribute exists # can can be safely cast to variable data type. + msg="""WARNING: %s not used since it +cannot be safely cast to variable data type""" % attname if hasattr(self, attname): att = numpy.array(self.getncattr(attname)) else: return False - atta = numpy.array(att, self.dtype) + try: + atta = numpy.array(att, self.dtype) + except ValueError: + is_safe = False + warnings.warn(msg) + return is_safe is_safe = _safecast(att,atta) if not is_safe: - msg="""WARNING: %s not used since it -cannot be safely cast to variable data type""" % attname warnings.warn(msg) return is_safe @@ -4281,11 +5538,25 @@ cannot be safely cast to variable data type""" % attname # if data is a string or a bytes object, convert to a numpy string array # whose length is equal to the rightmost dimension of the # variable. - if type(data) in [str,bytes]: data = numpy.asarray(data,dtype='S'+repr(self.shape[-1])) + if type(data) in [str,bytes]: + if encoding == 'ascii': + data = numpy.asarray(data,dtype='S'+repr(self.shape[-1])) + else: + data = numpy.asarray(data,dtype='U'+repr(self.shape[-1])) if data.dtype.kind in ['S','U'] and data.dtype.itemsize > 1: # if data is a numpy string array, convert it to an array # of characters with one more dimension. - data = stringtochar(data, encoding=encoding) + data = stringtochar(data, encoding=encoding,n_strlen=self.shape[-1]) + + # if structured data has strings (and _Encoding att set), create view as char arrays + # (issue #773) + if self._iscompound and \ + self._cmptype.dtype != self._cmptype.dtype_view and \ + _set_viewdtype(data.dtype) == self._cmptype.dtype_view and \ + self.chartostring: +# self.chartostring and getattr(self,'_Encoding',None) is not None: + # may need to cast input data to aligned type + data = data.astype(self._cmptype.dtype_view).view(self._cmptype.dtype) if self._isvlen: # if vlen, should be object array (don't try casting) if self.dtype == str: @@ -4315,22 +5586,25 @@ cannot be safely cast to variable data type""" % attname # issue 458, allow Ellipsis to be used for scalar var if type(elem) == type(Ellipsis) and not\ len(self.dimensions): elem = 0 + # pack as integers if desired. + if self.scale: + data = self._pack(data) self._assign_vlen(elem, data) return # A numpy or masked array (or an object supporting the buffer interface) is needed. # Convert if necessary. - if not ma.isMA(data) and not (hasattr(data,'data') and isinstance(data.data,buffer)): + if not ma.isMA(data) and not (hasattr(data,'data') and isinstance(data.data,memoryview)): # if auto scaling is to be done, don't cast to an integer yet. if self.scale and self.dtype.kind in 'iu' and \ hasattr(self, 'scale_factor') or hasattr(self, 'add_offset'): - data = numpy.array(data,numpy.float) + data = numpy.array(data,numpy.float64) else: data = numpy.array(data,self.dtype) # for Enum variable, make sure data is valid. if self._isenum: - test = numpy.zeros(data.shape,numpy.bool) + test = numpy.zeros(data.shape,numpy.bool_) if ma.isMA(data): # fix for new behaviour in numpy.ma in 1.13 (issue #662) for val in self.datatype.enum_dict.values(): @@ -4343,19 +5617,22 @@ cannot be safely cast to variable data type""" % attname raise ValueError(msg) start, count, stride, put_ind =\ - _StartCountStride(elem,self.shape,self.dimensions,self._grp,datashape=data.shape,put=True) + _StartCountStride(elem,self.shape,self.dimensions,self._grp,datashape=data.shape,put=True,use_get_vars=self._use_get_vars) datashape = _out_array_shape(count) # if a numpy scalar, create an array of the right size # and fill with scalar values. if data.shape == (): data = numpy.tile(data,datashape) - # reshape data array by adding extra singleton dimensions - # if needed to conform with start,count,stride. - if len(data.shape) != len(datashape): + # reshape data array if needed to conform with start,count,stride. + if data.ndim != len(datashape) or\ + (data.shape != datashape and data.ndim > 1): # issue #1083 # create a view so shape in caller is not modified (issue 90) - data = data.view() - data.shape = tuple(datashape) + try: # if extra singleton dims, just reshape + data = data.view() + data.shape = tuple(datashape) + except ValueError: # otherwise broadcast + data = numpy.broadcast_to(data, datashape) # Reshape these arrays so we can iterate over them. start = start.reshape((-1, self.ndim or 1)) @@ -4367,55 +5644,10 @@ cannot be safely cast to variable data type""" % attname # exists (improves compression). if self._has_lsd: data = _quantize(data,self.least_significant_digit) - # if auto_scale mode set to True, (through - # a call to set_auto_scale or set_auto_maskandscale), - # perform automatic unpacking using scale_factor/add_offset. - # if auto_mask mode is set to True (through a call to - # set_auto_mask or set_auto_maskandscale), perform - # automatic conversion to masked array using - # valid_min,validmax,missing_value,_Fill_Value. - # ignore if not a primitive or enum data type (not compound or vlen). - if self.mask and (self._isprimitive or self._isenum): - # use missing_value as fill value. - # if no missing value set, use _FillValue. - if hasattr(self, 'scale_factor') or hasattr(self, 'add_offset'): - # if not masked, create a masked array. - if not ma.isMA(data): data = self._toma(data) + if self.scale and self._isprimitive: # pack non-masked values using scale_factor and add_offset - if hasattr(self, 'scale_factor') and hasattr(self, 'add_offset'): - data = (data - self.add_offset)/self.scale_factor - if self.dtype.kind in 'iu': data = numpy.around(data) - elif hasattr(self, 'scale_factor'): - data = data/self.scale_factor - if self.dtype.kind in 'iu': data = numpy.around(data) - elif hasattr(self, 'add_offset'): - data = data - self.add_offset - if self.dtype.kind in 'iu': data = numpy.around(data) - if ma.isMA(data): - # if underlying data in masked regions of masked array - # corresponds to missing values, don't fill masked array - - # just use underlying data instead - if hasattr(self, 'missing_value') and \ - numpy.all(numpy.in1d(data.data[data.mask],self.missing_value)): - data = data.data - else: - if hasattr(self, 'missing_value'): - # if missing value is a scalar, use it as fill_value. - # if missing value is a vector, raise an exception - # since we then don't know how to fill in masked values. - if numpy.array(self.missing_value).shape == (): - fillval = self.missing_value - else: - msg="cannot assign fill_value for masked array when missing_value attribute is not a scalar" - raise RuntimeError(msg) - if numpy.array(fillval).shape != (): - fillval = fillval[0] - elif hasattr(self, '_FillValue'): - fillval = self._FillValue - else: - fillval = default_fillvals[self.dtype.str[1:]] - data = data.filled(fill_value=fillval) + data = self._pack(data) # Fill output array with data chunks. for (a,b,c,i) in zip(start, count, stride, put_ind): @@ -4469,10 +5701,10 @@ If `chartostring` is set to `True`, when data is read from a character variable (dtype = `S1`) that has an `_Encoding` attribute, it is converted to a numpy fixed length unicode string array (dtype = `UN`, where `N` is the length of the the rightmost dimension of the variable). The value of `_Encoding` -is the unicode encoding that is used to decode the bytes into strings. +is the unicode encoding that is used to decode the bytes into strings. When numpy string data is written to a variable it is converted back to -indiviual bytes, with the number of bytes in each string equalling the +individual bytes, with the number of bytes in each string equalling the rightmost dimension of the variable. The default value of `chartostring` is `True` @@ -4482,24 +5714,25 @@ The default value of `chartostring` is `True` def use_nc_get_vars(self,use_nc_get_vars): """ -**`use_nc_get_vars(self,_no_get_vars)`** +**`use_nc_get_vars(self,_use_get_vars)`** enable the use of netcdf library routine `nc_get_vars` to retrieve strided variable slices. By default, -`nc_get_vars` not used since it slower than multiple calls -to the unstrided read routine `nc_get_vara` in most cases. +`nc_get_vars` may not used by default (depending on the +version of the netcdf-c library being used) since it may be +slower than multiple calls to the unstrided read routine `nc_get_vara`. """ - self._no_get_vars = not bool(use_nc_get_vars) - + self._use_get_vars = bool(use_nc_get_vars) + def set_auto_maskandscale(self,maskandscale): """ **`set_auto_maskandscale(self,maskandscale)`** turn on or off automatic conversion of variable data to and from masked arrays, automatic packing/unpacking of variable -data using `scale_factor` and `add_offset` attributes and +data using `scale_factor` and `add_offset` attributes and automatic conversion of signed integer data to unsigned integer -data if the `_Unsigned` attribute exists. +data if the `_Unsigned` attribute exists and is set to "true" (or "True"). If `maskandscale` is set to `True`, when data is read from a variable it is converted to a masked array if any of the values are exactly @@ -4507,13 +5740,14 @@ equal to the either the netCDF _FillValue or the value specified by the missing_value variable attribute. The fill_value of the masked array is set to the missing_value attribute (if it exists), otherwise the netCDF _FillValue attribute (which has a default value -for each data type). When data is written to a variable, the masked +for each data type). If the variable has no missing_value attribute, the +_FillValue is used instead. If the variable has valid_min/valid_max and +missing_value attributes, data outside the specified range will be masked. +When data is written to a variable, the masked array is converted back to a regular numpy array by replacing all the masked values by the missing_value attribute of the variable (if it exists). If the variable has no missing_value attribute, the _FillValue -is used instead. If the variable has valid_min/valid_max and -missing_value attributes, data outside the specified range will be -set to missing_value. +is used instead. If `maskandscale` is set to `True`, and the variable has a `scale_factor` or an `add_offset` attribute, then data read @@ -4530,13 +5764,13 @@ is assumed zero. If add_offset is present, but scale_factor is missing, scale_factor is assumed to be one. For more information on how `scale_factor` and `add_offset` can be used to provide simple compression, see the -[PSD metadata conventions](http://www.esrl.noaa.gov/psd/data/gridded/conventions/cdc_netcdf_standard.shtml). +[PSL metadata conventions](http://www.esrl.noaa.gov/psl/data/gridded/conventions/cdc_netcdf_standard.shtml). -In addition, if `maskandscale` is set to `True`, and if the variable has an -attribute `_Unsigned` set, and the variable has a signed integer data type, +In addition, if `maskandscale` is set to `True`, and if the variable has an +attribute `_Unsigned` set to "true", and the variable has a signed integer data type, a view to the data is returned with the corresponding unsigned integer data type. This convention is used by the netcdf-java library to save unsigned integer -data in `NETCDF3` or `NETCDF4_CLASSIC` files (since the `NETCDF3` +data in `NETCDF3` or `NETCDF4_CLASSIC` files (since the `NETCDF3` data model does not have unsigned integer data types). The default value of `maskandscale` is `True` @@ -4552,7 +5786,7 @@ turn on or off automatic packing/unpacking of variable data using `scale_factor` and `add_offset` attributes. Also turns on and off automatic conversion of signed integer data to unsigned integer data if the variable has an `_Unsigned` -attribute. +attribute set to "true" or "True". If `scale` is set to `True`, and the variable has a `scale_factor` or an `add_offset` attribute, then data read @@ -4569,20 +5803,20 @@ is assumed zero. If add_offset is present, but scale_factor is missing, scale_factor is assumed to be one. For more information on how `scale_factor` and `add_offset` can be used to provide simple compression, see the -[PSD metadata conventions](http://www.esrl.noaa.gov/psd/data/gridded/conventions/cdc_netcdf_standard.shtml). +[PSL metadata conventions](http://www.esrl.noaa.gov/psl/data/gridded/conventions/cdc_netcdf_standard.shtml). -In addition, if `scale` is set to `True`, and if the variable has an -attribute `_Unsigned` set, and the variable has a signed integer data type, +In addition, if `scale` is set to `True`, and if the variable has an +attribute `_Unsigned` set to "true", and the variable has a signed integer data type, a view to the data is returned with the corresponding unsigned integer datatype. This convention is used by the netcdf-java library to save unsigned integer -data in `NETCDF3` or `NETCDF4_CLASSIC` files (since the `NETCDF3` +data in `NETCDF3` or `NETCDF4_CLASSIC` files (since the `NETCDF3` data model does not have unsigned integer data types). The default value of `scale` is `True` (automatic conversions are performed). """ self.scale = bool(scale) - + def set_auto_mask(self,mask): """ **`set_auto_mask(self,mask)`** @@ -4596,24 +5830,54 @@ equal to the either the netCDF _FillValue or the value specified by the missing_value variable attribute. The fill_value of the masked array is set to the missing_value attribute (if it exists), otherwise the netCDF _FillValue attribute (which has a default value -for each data type). When data is written to a variable, the masked +for each data type). If the variable has no missing_value attribute, the +_FillValue is used instead. If the variable has valid_min/valid_max and +missing_value attributes, data outside the specified range will be masked. +When data is written to a variable, the masked array is converted back to a regular numpy array by replacing all the masked values by the missing_value attribute of the variable (if it exists). If the variable has no missing_value attribute, the _FillValue -is used instead. If the variable has valid_min/valid_max and -missing_value attributes, data outside the specified range will be -set to missing_value. +is used instead. The default value of `mask` is `True` (automatic conversions are performed). """ self.mask = bool(mask) - + + def set_always_mask(self,always_mask): + """ +**`set_always_mask(self,always_mask)`** + +turn on or off conversion of data without missing values to regular +numpy arrays. + +`always_mask` is a Boolean determining if automatic conversion of +masked arrays with no missing values to regular numpy arrays shall be +applied. Default is True. Set to False to restore the default behaviour +in versions prior to 1.4.1 (numpy array returned unless missing values are present, +otherwise masked array returned). + """ + self.always_mask = bool(always_mask) + + def set_ncstring_attrs(self,ncstring_attrs): + """ +**`set_always_mask(self,ncstring_attrs)`** + +turn on or off creating NC_STRING string attributes. + +If `ncstring_attrs` is set to `True` then text attributes will be variable-length +NC_STRINGs. + +The default value of `ncstring_attrs` is `False` (writing ascii text attributes as +NC_CHAR). + + """ + self._ncstring_attrs__ = bool(ncstring_attrs) def _put(self,ndarray data,start,count,stride): """Private method to put data into a netCDF variable""" cdef int ierr, ndims - cdef npy_intp totelem + cdef npy_intp totelem, dataelem, i cdef size_t *startp cdef size_t *countp cdef ptrdiff_t *stridep @@ -4635,7 +5899,7 @@ The default value of `mask` is `True` startp = malloc(sizeof(size_t) * ndims) countp = malloc(sizeof(size_t) * ndims) stridep = malloc(sizeof(ptrdiff_t) * ndims) - for n from 0 <= n < ndims: + for n in range(ndims): count[n] = abs(count[n]) # make -1 into +1 countp[n] = count[n] # for neg strides, reverse order (then flip that axis after data read in) @@ -4657,7 +5921,7 @@ The default value of `mask` is `True` raise IndexError('size of data array does not conform to slice') if negstride: # reverse data along axes with negative strides. - data = data[sl].copy() # make sure a copy is made. + data = data[tuple(sl)].copy() # make sure a copy is made. if self._isprimitive or self._iscompound or self._isenum: # primitive, enum or compound data type. # if data type of array doesn't match variable, @@ -4665,17 +5929,23 @@ The default value of `mask` is `True` if self.dtype != data.dtype: data = data.astype(self.dtype) # cast data, if necessary. # byte-swap data in numpy array so that is has native - # endian byte order (this is what netcdf-c expects - + # endian byte order (this is what netcdf-c expects - # issue #554, pull request #555) if not data.dtype.isnative: data = data.byteswap() # strides all 1 or scalar variable, use put_vara (faster) - if sum(stride) == ndims or ndims == 0: - ierr = nc_put_vara(self._grpid, self._varid, - startp, countp, data.data) + if self._grp.auto_complex: + with nogil: + ierr = pfnc_put_vars(self._grpid, self._varid, + startp, countp, stridep, PyArray_DATA(data)) + elif sum(stride) == ndims or ndims == 0: + with nogil: + ierr = nc_put_vara(self._grpid, self._varid, + startp, countp, PyArray_DATA(data)) else: - ierr = nc_put_vars(self._grpid, self._varid, - startp, countp, stridep, data.data) + with nogil: + ierr = nc_put_vars(self._grpid, self._varid, + startp, countp, stridep, PyArray_DATA(data)) _ensure_nc_success(ierr) elif self._isvlen: if data.dtype.char !='O': @@ -4694,26 +5964,27 @@ The default value of `mask` is `True` # each element in struct. # allocate struct array to hold vlen data. strdata = malloc(sizeof(char *)*totelem) - for i from 0<=idata) # allocate struct array to hold vlen data. vldata = malloc(totelem*sizeof(nc_vlen_t)) - for i from 0<=idatabuff)[0] dataarr = elptr if self.dtype != dataarr.dtype.str[1:]: @@ -4721,16 +5992,17 @@ The default value of `mask` is `True` # casting doesn't work ?? just raise TypeError raise TypeError("wrong data type in object array: should be %s, got %s" % (self.dtype,dataarr.dtype)) vldata[i].len = PyArray_SIZE(dataarr) - vldata[i].p = dataarr.data - databuff = databuff + data.strides[0] + vldata[i].p = PyArray_DATA(dataarr) + databuff = databuff + PyArray_STRIDES(data)[0] # strides all 1 or scalar variable, use put_vara (faster) if sum(stride) == ndims or ndims == 0: - ierr = nc_put_vara(self._grpid, self._varid, - startp, countp, vldata) + with nogil: + ierr = nc_put_vara(self._grpid, self._varid, + startp, countp, vldata) else: - raise IndexError('strides must all be 1 for vlen variables') - #ierr = nc_put_vars(self._grpid, self._varid, - # startp, countp, stridep, vldata) + with nogil: + ierr = nc_put_vars(self._grpid, self._varid, + startp, countp, stridep, vldata) _ensure_nc_success(ierr) # free the pointer array. free(vldata) @@ -4741,6 +6013,7 @@ The default value of `mask` is `True` def _get(self,start,count,stride): """Private method to retrieve data from a netCDF variable""" cdef int ierr, ndims + cdef npy_intp totelem, i cdef size_t *startp cdef size_t *countp cdef ptrdiff_t *stridep @@ -4767,7 +6040,7 @@ The default value of `mask` is `True` startp = malloc(sizeof(size_t) * ndims) countp = malloc(sizeof(size_t) * ndims) stridep = malloc(sizeof(ptrdiff_t) * ndims) - for n from 0 <= n < ndims: + for n in range(ndims): count[n] = abs(count[n]) # make -1 into +1 countp[n] = count[n] # for neg strides, reverse order (then flip that axis after data read in) @@ -4786,14 +6059,20 @@ The default value of `mask` is `True` # strides all 1 or scalar variable, use get_vara (faster) # if count contains a zero element, no data is being read if 0 not in count: - if sum(stride) == ndims or ndims == 0: + if self._grp.auto_complex: + with nogil: + ierr = pfnc_get_vars(self._grpid, self._varid, + startp, countp, stridep, + PyArray_DATA(data)) + elif sum(stride) == ndims or ndims == 0: with nogil: ierr = nc_get_vara(self._grpid, self._varid, - startp, countp, data.data) + startp, countp, PyArray_DATA(data)) else: with nogil: ierr = nc_get_vars(self._grpid, self._varid, - startp, countp, stridep, data.data) + startp, countp, stridep, + PyArray_DATA(data)) else: ierr = 0 if ierr == NC_EINVALCOORDS: @@ -4816,10 +6095,9 @@ The default value of `mask` is `True` ierr = nc_get_vara(self._grpid, self._varid, startp, countp, strdata) else: - # FIXME: is this a bug in netCDF4? - raise IndexError('strides must all be 1 for string variables') - #ierr = nc_get_vars(self._grpid, self._varid, - # startp, countp, stridep, strdata) + with nogil: + ierr = nc_get_vars(self._grpid, self._varid, + startp, countp, stridep, strdata) if ierr == NC_EINVALCOORDS: raise IndexError elif ierr != NC_NOERR: @@ -4829,12 +6107,16 @@ The default value of `mask` is `True` # use _Encoding attribute to decode string to bytes - if # not given, use 'utf-8'. encoding = getattr(self,'_Encoding','utf-8') - for i from 0<=ivldata[i].p - memcpy(dataarr.data, vldata[i].p, dataarr.nbytes) + memcpy(PyArray_DATA(dataarr), vldata[i].p, dataarr.nbytes) data[i] = dataarr # reshape the output array data = numpy.reshape(data, shapeout) # free vlen data internally allocated in netcdf C lib - ierr = nc_free_vlens(totelem, vldata) + with nogil: + ierr = nc_free_vlens(totelem, vldata) # free the pointer array free(vldata) free(startp) @@ -4876,9 +6159,9 @@ The default value of `mask` is `True` free(stridep) if negstride: # reverse data along axes with negative strides. - data = data[sl].copy() # make a copy so data is contiguous. + data = data[tuple(sl)].copy() # make a copy so data is contiguous. # netcdf-c always returns data in native byte order, - # regardless of variable endian-ness. Here we swap the + # regardless of variable endian-ness. Here we swap the # bytes if the variable dtype is not native endian, so the # dtype of the returned numpy array matches the variable dtype. # (pull request #555, issue #554). @@ -4892,23 +6175,29 @@ The default value of `mask` is `True` return data def set_collective(self, value): + """**`set_collective(self,True_or_False)`** + + turn on or off collective parallel IO access. Ignored if file is not + open for parallel access. + """ + if not __has_parallel_support__: + return + + mode = NC_COLLECTIVE if value else NC_INDEPENDENT + with nogil: + ierr = nc_var_par_access(self._grpid, self._varid, + mode) + _ensure_nc_success(ierr) + + + def get_dims(self): """ -**`set_collective(self,True_or_False)`** +**`get_dims(self)`** -turn on or off collective parallel IO access. Ignored if file is not -open for parallel access. +return a tuple of `Dimension` instances associated with this +`Variable`. """ - IF HAS_NC_PAR: - # set collective MPI IO mode on or off - if value: - ierr = nc_var_par_access(self._grpid, self._varid, - NC_COLLECTIVE) - else: - ierr = nc_var_par_access(self._grpid, self._varid, - NC_INDEPENDENT) - _ensure_nc_success(ierr) - ELSE: - pass # does nothing + return tuple(_find_dim(self._grp, dim) for dim in self.dimensions) def __reduce__(self): # raise error is user tries to pickle a Variable object. @@ -4918,34 +6207,30 @@ open for parallel access. cdef class CompoundType: """ -A `netCDF4.CompoundType` instance is used to describe a compound data -type, and can be passed to the the `netCDF4.Dataset.createVariable` method of -a `netCDF4.Dataset` or `netCDF4.Group` instance. +A `CompoundType` instance is used to describe a compound data +type, and can be passed to the the `Dataset.createVariable` method of +a `Dataset` or `Group` instance. Compound data types map to numpy structured arrays. -See `netCDF4.CompoundType.__init__` for more details. +See `CompoundType.__init__` for more details. The instance variables `dtype` and `name` should not be modified by the user. """ cdef public nc_type _nc_type - cdef public dtype, name - __pdoc__['CompoundType.name'] = \ - """String name.""" - __pdoc__['CompoundType.dtype'] = \ - """A numpy dtype object describing the compound data type.""" + cdef public dtype, dtype_view, name def __init__(self, grp, object dt, object dtype_name, **kwargs): """ ***`__init__(group, datatype, datatype_name)`*** CompoundType constructor. - **`group`**: `netCDF4.Group` instance to associate with the compound datatype. + **`grp`**: `Group` instance to associate with the compound datatype. - **`datatype`**: A numpy dtype object describing a structured (a.k.a record) + **`dt`**: A numpy dtype object describing a structured (a.k.a record) array. Can be composed of homogeneous numeric or character data types, or other structured array data types. - **`datatype_name`**: a Python string containing a description of the + **`dtype_name`**: a Python string containing a description of the compound data type. ***Note 1***: When creating nested compound data types, @@ -4953,9 +6238,9 @@ the user. instances (so create CompoundType instances for the innermost structures first). - ***Note 2***: `netCDF4.CompoundType` instances should be created using the - `netCDF4.Dataset.createCompoundType` - method of a `netCDF4.Dataset` or `netCDF4.Group` instance, not using this class directly. + ***Note 2***: `CompoundType` instances should be created using the + `Dataset.createCompoundType` method of a `Dataset` or + `Group` instance, not using this class directly. """ cdef nc_type xtype # convert dt to a numpy datatype object @@ -4968,24 +6253,30 @@ the user. # (this may or may not be still true, but empirical # evidence suggests that segfaults occur if this # alignment step is skipped - see issue #705). + # numpy string subdtypes (i.e. 'S80') are + # automatically converted to character array + # subtypes (i.e. ('S1',80)). If '_Encoding' + # variable attribute is set, data will be converted + # to and from the string array representation with views. dt = _set_alignment(numpy.dtype(dt)) + # create a view datatype for converting char arrays to/from strings + dtview = _set_viewdtype(numpy.dtype(dt)) if 'typeid' in kwargs: xtype = kwargs['typeid'] else: xtype = _def_compound(grp, dt, dtype_name) self._nc_type = xtype self.dtype = dt + self.dtype_view = dtview self.name = dtype_name def __repr__(self): - if python3: - return self.__unicode__() - else: - return unicode(self).encode('utf-8') + return self.__str__() - def __unicode__(self): - return repr(type(self))+": name = '%s', numpy dtype = %s\n" %\ - (self.name,self.dtype) + def __str__(self): + typ = repr(type(self)).replace("._netCDF4", "") + return "%s: name = '%s', numpy dtype = %s" %\ + (typ, self.name, self.dtype) def __reduce__(self): # raise error is user tries to pickle a CompoundType object. @@ -5004,11 +6295,38 @@ def _set_alignment(dt): raise TypeError('nested structured dtype arrays not supported') else: dtx = dt.fields[name][0] + else: + # convert character string elements to char arrays + if fmt.kind == 'S' and fmt.itemsize != 1: + dtx = numpy.dtype('(%s,)S1' % fmt.itemsize) + else: + # primitive data type + dtx = dt.fields[name][0] + formats.append(dtx) + # leave out offsets, they will be re-computed to preserve alignment. + dtype_dict = {'names':names,'formats':formats} + return numpy.dtype(dtype_dict, align=True) + +def _set_viewdtype(dt): + # recursively change character array dtypes to string dtypes + names = dt.names; formats = [] + for name in names: + fmt = dt.fields[name][0] + if fmt.kind == 'V': + if fmt.shape == (): + dtx = _set_viewdtype(dt.fields[name][0]) + else: + if fmt.subdtype[0].kind == 'V': # structured dtype + raise TypeError('nested structured dtype arrays not supported') + elif fmt.subdtype[0].kind == 'S' and len(dt.fields[name][0].shape) == 1: + lenchar = dt.fields[name][0].shape[0] + dtx = numpy.dtype('S%s' % lenchar) + else: + dtx = dt.fields[name][0] else: # primitive data type dtx = dt.fields[name][0] formats.append(dtx) - # leave out offsets, they will be re-computed to preserve alignment. dtype_dict = {'names':names,'formats':formats} return numpy.dtype(dtype_dict, align=True) @@ -5016,7 +6334,7 @@ cdef _def_compound(grp, object dt, object dtype_name): # private function used to construct a netcdf compound data type # from a numpy dtype object by CompoundType.__init__. cdef nc_type xtype, xtype_tmp - cdef int ierr, ndims + cdef int ierr, ndims, grpid cdef size_t offset, size cdef char *namstring cdef char *nested_namstring @@ -5024,7 +6342,9 @@ cdef _def_compound(grp, object dt, object dtype_name): bytestr = _strencode(dtype_name) namstring = bytestr size = dt.itemsize - ierr = nc_def_compound(grp._grpid, size, namstring, &xtype) + grpid = grp._grpid + with nogil: + ierr = nc_def_compound(grpid, size, namstring, &xtype) _ensure_nc_success(ierr) names = list(dt.fields.keys()) formats = [v[0] for v in dt.fields.values()] @@ -5042,8 +6362,9 @@ cdef _def_compound(grp, object dt, object dtype_name): xtype_tmp = _nptonctype[format.str[1:]] except KeyError: raise ValueError('Unsupported compound type element') - ierr = nc_insert_compound(grp._grpid, xtype, namstring, - offset, xtype_tmp) + with nogil: + ierr = nc_insert_compound(grpid, xtype, namstring, + offset, xtype_tmp) _ensure_nc_success(ierr) else: if format.shape == (): # nested scalar compound type @@ -5051,21 +6372,23 @@ cdef _def_compound(grp, object dt, object dtype_name): xtype_tmp = _find_cmptype(grp, format) bytestr = _strencode(name) nested_namstring = bytestr - ierr = nc_insert_compound(grp._grpid, xtype,\ - nested_namstring,\ - offset, xtype_tmp) + with nogil: + ierr = nc_insert_compound(grpid, xtype,\ + nested_namstring,\ + offset, xtype_tmp) _ensure_nc_success(ierr) else: # nested array compound element ndims = len(format.shape) dim_sizes = malloc(sizeof(int) * ndims) - for n from 0 <= n < ndims: + for n in range(ndims): dim_sizes[n] = format.shape[n] if format.subdtype[0].kind != 'V': # primitive type. try: xtype_tmp = _nptonctype[format.subdtype[0].str[1:]] except KeyError: raise ValueError('Unsupported compound type element') - ierr = nc_insert_array_compound(grp._grpid,xtype,namstring, + with nogil: + ierr = nc_insert_array_compound(grpid,xtype,namstring, offset,xtype_tmp,ndims,dim_sizes) _ensure_nc_success(ierr) else: # nested array compound type. @@ -5076,10 +6399,11 @@ cdef _def_compound(grp, object dt, object dtype_name): # xtype_tmp = _find_cmptype(grp, format.subdtype[0]) # bytestr = _strencode(name) # nested_namstring = bytestr - # ierr = nc_insert_array_compound(grp._grpid,xtype,\ - # nested_namstring,\ - # offset,xtype_tmp,\ - # ndims,dim_sizes) + # with nogil: + # ierr = nc_insert_array_compound(grpid,xtype,\ + # nested_namstring,\ + # offset,xtype_tmp,\ + # ndims,dim_sizes) # _ensure_nc_success(ierr) free(dim_sizes) return xtype @@ -5095,9 +6419,10 @@ cdef _find_cmptype(grp, dtype): names2 = cmpdt.dtype.fields.keys() formats1 = [v[0] for v in dtype.fields.values()] formats2 = [v[0] for v in cmpdt.dtype.fields.values()] + formats2v = [v[0] for v in cmpdt.dtype_view.fields.values()] # match names, formats, but not offsets (they may be changed # by netcdf lib). - if names1==names2 and formats1==formats2: + if names1==names2 and formats1==formats2 or (formats1 == formats2v): match = True break if not match: @@ -5133,7 +6458,7 @@ cdef _read_compound(group, nc_type xtype, endian=None): names = [] formats = [] offsets = [] - for nf from 0 <= nf < nfields: + for nf in range(nfields): with nogil: ierr = nc_inq_compound_field(_grpid, xtype, @@ -5161,7 +6486,7 @@ cdef _read_compound(group, nc_type xtype, endian=None): # if numdims=0, not an array. field_shape = () if numdims != 0: - for ndim from 0 <= ndim < numdims: + for ndim in range(numdims): field_shape = field_shape + (dim_sizes[ndim],) free(dim_sizes) # check to see if this field is a nested compound type. @@ -5194,27 +6519,23 @@ cdef _read_compound(group, nc_type xtype, endian=None): cdef class VLType: """ -A `netCDF4.VLType` instance is used to describe a variable length (VLEN) data -type, and can be passed to the the `netCDF4.Dataset.createVariable` method of -a `netCDF4.Dataset` or `netCDF4.Group` instance. See -`netCDF4.VLType.__init__` for more details. +A `VLType` instance is used to describe a variable length (VLEN) data +type, and can be passed to the the `Dataset.createVariable` method of +a `Dataset` or `Group` instance. See +`VLType.__init__` for more details. The instance variables `dtype` and `name` should not be modified by the user. """ cdef public nc_type _nc_type cdef public dtype, name - __pdoc__['VLType.name'] = \ - """String name.""" - __pdoc__['VLType.dtype'] = \ - """A numpy dtype object describing the component type for the VLEN.""" def __init__(self, grp, object dt, object dtype_name, **kwargs): """ **`__init__(group, datatype, datatype_name)`** VLType constructor. - **`group`**: `netCDF4.Group` instance to associate with the VLEN datatype. + **`group`**: `Group` instance to associate with the VLEN datatype. **`datatype`**: An numpy dtype object describing the component type for the variable length array. @@ -5222,9 +6543,9 @@ the user. **`datatype_name`**: a Python string containing a description of the VLEN data type. - ***`Note`***: `netCDF4.VLType` instances should be created using the - `netCDF4.Dataset.createVLType` - method of a `netCDF4.Dataset` or `netCDF4.Group` instance, not using this class directly. + ***`Note`***: `VLType` instances should be created using the + `Dataset.createVLType` method of a `Dataset` or + `Group` instance, not using this class directly. """ cdef nc_type xtype if 'typeid' in kwargs: @@ -5239,17 +6560,15 @@ the user. self.name = dtype_name def __repr__(self): - if python3: - return self.__unicode__() - else: - return unicode(self).encode('utf-8') + return self.__str__() - def __unicode__(self): + def __str__(self): + typ = repr(type(self)).replace("._netCDF4", "") if self.dtype == str: - return repr(type(self))+': string type' + return '%r: string type' % (typ,) else: - return repr(type(self))+": name = '%s', numpy dtype = %s\n" %\ - (self.name, self.dtype) + return "%r: name = '%s', numpy dtype = %s" %\ + (typ, self.name, self.dtype) def __reduce__(self): # raise error is user tries to pickle a VLType object. @@ -5259,10 +6578,11 @@ cdef _def_vlen(grp, object dt, object dtype_name): # private function used to construct a netcdf VLEN data type # from a numpy dtype object or python str object by VLType.__init__. cdef nc_type xtype, xtype_tmp - cdef int ierr, ndims + cdef int ierr, ndims, grpid cdef size_t offset, size cdef char *namstring cdef char *nested_namstring + grpid = grp._grpid if dt == str: # python string, use NC_STRING xtype = NC_STRING # dtype_name ignored @@ -5274,7 +6594,8 @@ cdef _def_vlen(grp, object dt, object dtype_name): # find netCDF primitive data type corresponding to # specified numpy data type. xtype_tmp = _nptonctype[dt.str[1:]] - ierr = nc_def_vlen(grp._grpid, namstring, xtype_tmp, &xtype); + with nogil: + ierr = nc_def_vlen(grpid, namstring, xtype_tmp, &xtype); _ensure_nc_success(ierr) else: raise KeyError("unsupported datatype specified for VLEN") @@ -5285,17 +6606,17 @@ cdef _read_vlen(group, nc_type xtype, endian=None): # construct a corresponding numpy dtype instance, # then use that to create a VLType instance. # called by _get_types, _get_vars. - cdef int ierr, _grpid + cdef int ierr, grpid cdef size_t vlsize cdef nc_type base_xtype cdef char vl_namstring[NC_MAX_NAME+1] - _grpid = group._grpid + grpid = group._grpid if xtype == NC_STRING: dt = str name = None else: with nogil: - ierr = nc_inq_vlen(_grpid, xtype, vl_namstring, &vlsize, &base_xtype) + ierr = nc_inq_vlen(grpid, xtype, vl_namstring, &vlsize, &base_xtype) _ensure_nc_success(ierr) name = vl_namstring.decode('utf-8') try: @@ -5310,29 +6631,23 @@ cdef _read_vlen(group, nc_type xtype, endian=None): cdef class EnumType: """ -A `netCDF4.EnumType` instance is used to describe an Enum data -type, and can be passed to the the `netCDF4.Dataset.createVariable` method of -a `netCDF4.Dataset` or `netCDF4.Group` instance. See -`netCDF4.EnumType.__init__` for more details. +A `EnumType` instance is used to describe an Enum data +type, and can be passed to the the `Dataset.createVariable` method of +a `Dataset` or `Group` instance. See +`EnumType.__init__` for more details. The instance variables `dtype`, `name` and `enum_dict` should not be modified by the user. """ cdef public nc_type _nc_type cdef public dtype, name, enum_dict - __pdoc__['EnumType.name'] = \ - """String name.""" - __pdoc__['EnumType.dtype'] = \ - """A numpy integer dtype object describing the base type for the Enum.""" - __pdoc__['EnumType.enum_dict'] = \ - """A python dictionary describing the enum fields and values.""" def __init__(self, grp, object dt, object dtype_name, object enum_dict, **kwargs): """ **`__init__(group, datatype, datatype_name, enum_dict)`** EnumType constructor. - **`group`**: `netCDF4.Group` instance to associate with the VLEN datatype. + **`group`**: `Group` instance to associate with the VLEN datatype. **`datatype`**: An numpy integer dtype object describing the base type for the Enum. @@ -5343,9 +6658,9 @@ the user. **`enum_dict`**: a Python dictionary containing the Enum field/value pairs. - ***`Note`***: `netCDF4.EnumType` instances should be created using the - `netCDF4.Dataset.createEnumType` - method of a `netCDF4.Dataset` or `netCDF4.Group` instance, not using this class directly. + ***`Note`***: `EnumType` instances should be created using the + `Dataset.createEnumType` method of a `Dataset` or + `Group` instance, not using this class directly. """ cdef nc_type xtype if 'typeid' in kwargs: @@ -5358,15 +6673,12 @@ the user. self.enum_dict = enum_dict def __repr__(self): - if python3: - return self.__unicode__() - else: - return unicode(self).encode('utf-8') + return self.__str__() - def __unicode__(self): - return repr(type(self))+\ - ": name = '%s', numpy dtype = %s, fields/values =%s\n" %\ - (self.name, self.dtype, self.enum_dict) + def __str__(self): + typ = repr(type(self)).replace("._netCDF4", "") + return "%r: name = '%s', numpy dtype = %s, fields/values =%s" %\ + (typ, self.name, self.dtype, self.enum_dict) def __reduce__(self): # raise error is user tries to pickle a EnumType object. @@ -5376,27 +6688,31 @@ cdef _def_enum(grp, object dt, object dtype_name, object enum_dict): # private function used to construct a netCDF Enum data type # from a numpy dtype object or python str object by EnumType.__init__. cdef nc_type xtype, xtype_tmp - cdef int ierr + cdef int ierr, grpid cdef char *namstring cdef ndarray value_arr bytestr = _strencode(dtype_name) namstring = bytestr + grpid = grp._grpid dt = numpy.dtype(dt) # convert to numpy datatype. if dt.str[1:] in _intnptonctype.keys(): # find netCDF primitive data type corresponding to # specified numpy data type. xtype_tmp = _intnptonctype[dt.str[1:]] - ierr = nc_def_enum(grp._grpid, xtype_tmp, namstring, &xtype); + with nogil: + ierr = nc_def_enum(grpid, xtype_tmp, namstring, &xtype) _ensure_nc_success(ierr) else: - msg="unsupported datatype specified for Enum (must be integer)" + msg="unsupported datatype specified for ENUM (must be integer)" raise KeyError(msg) # insert named members into enum type. for field in enum_dict: value_arr = numpy.array(enum_dict[field],dt) bytestr = _strencode(field) namstring = bytestr - ierr = nc_insert_enum(grp._grpid, xtype, namstring, value_arr.data) + with nogil: + ierr = nc_insert_enum(grpid, xtype, namstring, + PyArray_DATA(value_arr)) _ensure_nc_success(ierr) return xtype, dt @@ -5405,34 +6721,35 @@ cdef _read_enum(group, nc_type xtype, endian=None): # construct a corresponding numpy dtype instance, # then use that to create a EnumType instance. # called by _get_types, _get_vars. - cdef int ierr, _grpid, nmem - cdef char enum_val + cdef int ierr, grpid, nmem + cdef ndarray enum_val cdef nc_type base_xtype cdef char enum_namstring[NC_MAX_NAME+1] cdef size_t nmembers - _grpid = group._grpid + grpid = group._grpid # get name, datatype, and number of members. with nogil: - ierr = nc_inq_enum(_grpid, xtype, enum_namstring, &base_xtype, NULL,\ + ierr = nc_inq_enum(grpid, xtype, enum_namstring, &base_xtype, NULL,\ &nmembers) _ensure_nc_success(ierr) - name = enum_namstring.decode('utf-8') + enum_name = enum_namstring.decode('utf-8') try: datatype = _nctonptype[base_xtype] if endian is not None: datatype = endian + datatype dt = numpy.dtype(datatype) # see if it is a primitive type except KeyError: - raise KeyError("unsupported component type for VLEN") + raise KeyError("unsupported component type for ENUM") # loop over members, build dict. enum_dict = {} - for nmem from 0 <= nmem < nmembers: + enum_val = numpy.empty(1,dt) + for nmem in range(nmembers): with nogil: - ierr = nc_inq_enum_member(_grpid, xtype, nmem, \ - enum_namstring, &enum_val) + ierr = nc_inq_enum_member(grpid, xtype, nmem, \ + enum_namstring,PyArray_DATA(enum_val)) _ensure_nc_success(ierr) name = enum_namstring.decode('utf-8') - enum_dict[name] = int(enum_val) - return EnumType(group, dt, name, enum_dict, typeid=xtype) + enum_dict[name] = enum_val.item() + return EnumType(group, dt, enum_name, enum_dict, typeid=xtype) cdef _strencode(pystr,encoding=None): # encode a string into bytes. If already bytes, do nothing. @@ -5446,45 +6763,7 @@ cdef _strencode(pystr,encoding=None): def _to_ascii(bytestr): # encode a byte string to an ascii encoded string. - if python3: - return str(bytestr,encoding='ascii') - else: - return bytestr.encode('ascii') - -#---------------------------------------- -# extra utilities (formerly in utils.pyx) -#---------------------------------------- -from datetime import timedelta, datetime, MINYEAR -from netcdftime import _parse_date, microsec_units, millisec_units,\ - sec_units, min_units, hr_units, day_units - -# start of the gregorian calendar -gregorian = datetime(1582,10,15) - -def _dateparse(timestr): - """parse a string of the form time-units since yyyy-mm-dd hh:mm:ss, - return a datetime instance""" - # same as version in netcdftime, but returns a timezone naive - # python datetime instance with the utc_offset included. - timestr_split = timestr.split() - units = timestr_split[0].lower() - if timestr_split[1].lower() != 'since': - raise ValueError("no 'since' in unit_string") - # parse the date string. - n = timestr.find('since')+6 - isostring = timestr[n:] - year, month, day, hour, minute, second, utc_offset =\ - _parse_date( isostring.strip() ) - if year >= MINYEAR: - basedate = datetime(year, month, day, hour, minute, second) - # subtract utc_offset from basedate time instance (which is timezone naive) - basedate -= timedelta(days=utc_offset/1440.) - else: - if not utc_offset: - basedate = netcdftime.datetime(year, month, day, hour, minute, second) - else: - raise ValueError('cannot use utc_offset for reference years <= 0') - return basedate + return str(bytestr,encoding='ascii') def stringtoarr(string,NUMCHARS,dtype='S'): """ @@ -5509,9 +6788,9 @@ returns a rank 1 numpy character array of length NUMCHARS with datatype `'S1'` arr[0:len(string)] = tuple(string) return arr -def stringtochar(a,encoding='utf-8'): +def stringtochar(a,encoding=None,n_strlen=None): """ -**`stringtochar(a,encoding='utf-8')`** +**`stringtochar(a,encoding='utf-8',n_strlen=None)`** convert a string array to a character array with one extra dimension @@ -5520,18 +6799,40 @@ is the number of characters in each string. Will be converted to an array of characters (datatype `'S1'` or `'U1'`) of shape `a.shape + (N,)`. optional kwarg `encoding` can be used to specify character encoding (default -`utf-8`). +`utf-8` for dtype=`'UN'` or `ascii` for dtype=`'SN'`). If `encoding` is 'none' or 'bytes', +a `numpy.string_` the input array is treated a raw byte strings (`numpy.string_`). + +optional kwarg `n_strlen` is the number of characters in each string. Default +is None, which means `n_strlen` will be set to a.itemsize (the number of bytes +used to represent each string in the input array). returns a numpy character array with datatype `'S1'` or `'U1'` and shape `a.shape + (N,)`, where N is the length of each string in a.""" dtype = a.dtype.kind if dtype not in ["S","U"]: raise ValueError("type must string or unicode ('S' or 'U')") - b = numpy.array(tuple(a.tostring().decode(encoding)),dtype+'1') - b.shape = a.shape + (a.itemsize,) + if encoding is None: + if dtype == 'S': + encoding = 'ascii' + else: + encoding = 'utf-8' + if n_strlen is None: + n_strlen = a.dtype.itemsize + if encoding in ['none','None','bytes']: + b = numpy.array(tuple(a.tobytes()),'S1') + elif encoding == 'ascii': + b = numpy.array(tuple(a.tobytes().decode(encoding)),dtype+'1') + b.shape = a.shape + (n_strlen,) + else: + if not a.ndim: + a = numpy.array([a]) + bbytes = [text.encode(encoding) for text in a] + pad = b'\0' * n_strlen + bbytes = [(x + pad)[:n_strlen] for x in bbytes] + b = numpy.array([[bb[i:i+1] for i in range(n_strlen)] for bb in bbytes]) return b -def chartostring(b,encoding='utf-8'): +def chartostring(b,encoding=None): """ **`chartostring(b,encoding='utf-8')`** @@ -5542,300 +6843,64 @@ Will be converted to a array of strings, where each string has a fixed length of `b.shape[-1]` characters. optional kwarg `encoding` can be used to specify character encoding (default -`utf-8`). +`utf-8` for dtype=`'UN'` or `ascii` for dtype=`'SN'`). If `encoding` is 'none' or 'bytes', +a `numpy.string_` byte array is returned. -returns a numpy string array with datatype `'UN'` and shape +returns a numpy string array with datatype `'UN'` (or `'SN'`) and shape `b.shape[:-1]` where where `N=b.shape[-1]`.""" dtype = b.dtype.kind if dtype not in ["S","U"]: raise ValueError("type must be string or unicode ('S' or 'U')") - bs = b.tostring().decode(encoding) + if encoding is None: + if dtype == 'S': + encoding = 'ascii' + else: + encoding = 'utf-8' + bs = b.tobytes() slen = int(b.shape[-1]) - a = numpy.array([bs[n1:n1+slen] for n1 in range(0,len(bs),slen)],'U'+repr(slen)) + if encoding in ['none','None','bytes']: + a = numpy.array([bs[n1:n1+slen] for n1 in range(0,len(bs),slen)],'S'+repr(slen)) + else: + a = numpy.array([bs[n1:n1+slen].decode(encoding) for n1 in range(0,len(bs),slen)],'U'+repr(slen)) a.shape = b.shape[:-1] return a -def date2num(dates,units,calendar='standard'): - """ -**`date2num(dates,units,calendar='standard')`** - -Return numeric time values given datetime objects. The units -of the numeric time values are described by the `netCDF4.units` argument -and the `netCDF4.calendar` keyword. The datetime objects must -be in UTC with no time-zone offset. If there is a -time-zone offset in `units`, it will be applied to the -returned numeric values. - -**`dates`**: A datetime object or a sequence of datetime objects. -The datetime objects should not include a time-zone offset. - -**`units`**: a string of the form `