diff --git a/.circleci/config.yml b/.circleci/config.yml index 741e44e024..39aea3517a 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -32,12 +32,12 @@ docs_deploy: &docs name: Deploy docs to gh-pages branch command: gh-pages --no-history --dotfiles --message "doc(update) [skip ci]" --dist docs/_build/html -version: 2 +version: 2.1 jobs: build_docs: docker: - - image: python:3.7.4 + - image: cimg/python:3.11 working_directory: /tmp/gh-pages environment: - FSLOUTPUTTYPE: NIFTI diff --git a/.codespell-ignorewords b/.codespell-ignorewords deleted file mode 100644 index 0e50db9118..0000000000 --- a/.codespell-ignorewords +++ /dev/null @@ -1 +0,0 @@ -nd diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000000..ca79ca5b4d --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + schedule: + interval: weekly diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0f71d0ad2e..b515c076c1 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -12,9 +12,9 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: '3.x' - name: Install dependencies diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d64aec9365..4319b65649 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,16 +6,12 @@ on: branches: - master -env: - # Pin to v10.28.0, which (as of 2021-05-25) is the latest version with assets - AUTO_VERSION: v10.29.3 - jobs: auto-release: runs-on: ubuntu-latest if: "!contains(github.event.head_commit.message, 'ci skip') && !contains(github.event.head_commit.message, 'skip ci')" steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Prepare repository # Fetch full git history and tags @@ -27,13 +23,13 @@ jobs: run: git config --local --unset http.https://github.com/.extraheader - name: Set up Python - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: - python-version: 3.7 + python-version: 3 - name: Download auto run: | - auto_download_url="$(curl -fsSL https://api.github.com/repos/intuit/auto/releases/tags/$AUTO_VERSION | jq -r '.assets[] | select(.name == "auto-linux.gz") | .browser_download_url')" + auto_download_url="$(curl -fsSL https://api.github.com/repos/intuit/auto/releases/latest | jq -r '.assets[] | select(.name == "auto-linux.gz") | .browser_download_url')" wget -O- "$auto_download_url" | gunzip > ~/auto chmod a+x ~/auto @@ -41,4 +37,4 @@ jobs: run: | ~/auto shipit -vv env: - GH_TOKEN: ${{ secrets.AUTO_USER_TOKEN }} + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/testdask.yml b/.github/workflows/testdask.yml new file mode 100644 index 0000000000..7ca8a29f51 --- /dev/null +++ b/.github/workflows/testdask.yml @@ -0,0 +1,45 @@ +name: Dask + +on: + push: + branches: + - master + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + python-version: ['3.9', '3.10', '3.11', '3.12'] + fail-fast: false + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + repository: ${{ github.repository }} + + - name: Setup Python version ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies for Dask + run: | + pip install -e ".[test,dask]" + + - name: Run tests for Dask + run: | + pytest -v --dask pydra/engine --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml + + - name: Upload to codecov + run: codecov -f cov.xml -F unittests -e GITHUB_WORKFLOW diff --git a/.github/workflows/testpsijlocal.yml b/.github/workflows/testpsijlocal.yml new file mode 100644 index 0000000000..2e1a752ed2 --- /dev/null +++ b/.github/workflows/testpsijlocal.yml @@ -0,0 +1,45 @@ +name: PSI/J-Local + +on: + push: + branches: + - master + pull_request: + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test: + strategy: + matrix: + os: [ubuntu-latest, macos-latest] + python-version: ['3.11'] + fail-fast: false + runs-on: ${{ matrix.os }} + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + repository: ${{ github.repository }} + + - name: Setup Python version ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install dependencies for PSI/J + run: | + pip install -e ".[test, psij]" + + - name: Run tests for PSI/J + run: | + pytest --color=yes -vs --psij=local -n auto pydra/engine --cov pydra --cov-config .coveragerc --cov-report xml:cov.xml + + - name: Upload to codecov + run: codecov -f cov.xml -F unittests -e GITHUB_WORKFLOW diff --git a/.github/workflows/testpsijslurm.yml b/.github/workflows/testpsijslurm.yml new file mode 100644 index 0000000000..eb33eca612 --- /dev/null +++ b/.github/workflows/testpsijslurm.yml @@ -0,0 +1,54 @@ +name: PSI/J-SLURM + +on: + push: + branches: + - master + pull_request: + +jobs: + build: + strategy: + matrix: + python-version: [3.11.5] + fail-fast: false + runs-on: ubuntu-latest + env: + DOCKER_IMAGE: adi611/docker-centos7-slurm:23.02.1 + + steps: + - name: Disable etelemetry + run: echo "NO_ET=TRUE" >> $GITHUB_ENV + - uses: actions/checkout@v4 + - name: Pull docker image + run: | + docker pull $DOCKER_IMAGE + # Have image running in the background + docker run `bash <(curl -s https://codecov.io/env)` -itd -h slurmctl --cap-add sys_admin -d --name slurm -v `pwd`:/pydra -e NO_ET=$NO_ET $DOCKER_IMAGE + - name: Display previous jobs with sacct + run: | + echo "Allowing ports/daemons time to start" && sleep 10 + docker exec slurm bash -c "sacctmgr -i add account none,test Cluster=linux Description='none' Organization='none'" + docker exec slurm bash -c "sacct && sinfo && squeue" 2&> /dev/null + if [ $? -ne 0 ]; then + echo "Slurm docker image error" + exit 1 + fi + - name: Setup Python + run: | + docker exec slurm bash -c "echo $NO_ET" + docker exec slurm bash -c "ls -la && echo list top level dir" + docker exec slurm bash -c "ls -la /pydra && echo list pydra dir" + if [[ "${{ matrix.python-version }}" == "3.11.5" ]]; then + docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5" + fi + docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}" + docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test,psij] && python -c 'import pydra; print(pydra.__version__)'" + - name: Run pytest + run: | + docker exec slurm bash -c "pytest --color=yes -vs -n auto --psij=slurm --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'" + - name: Upload to codecov + run: | + docker exec slurm bash -c "pip install urllib3==1.26.6" + docker exec slurm bash -c "codecov --root /pydra -f /pydra/cov.xml -F unittests" + docker rm -f slurm diff --git a/.github/workflows/testpydra.yml b/.github/workflows/testpydra.yml index 93c722e6b4..3ead2e3a6b 100644 --- a/.github/workflows/testpydra.yml +++ b/.github/workflows/testpydra.yml @@ -22,16 +22,16 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: fetch-depth: 0 - - uses: actions/setup-python@v4 + - uses: actions/setup-python@v5 with: python-version: 3 - run: pip install --upgrade build twine - run: python -m build - run: twine check dist/* - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: dist path: dist/ @@ -40,7 +40,7 @@ jobs: git clean -fxd mkdir archive git archive -o archive/pydra.zip HEAD - - uses: actions/upload-artifact@v3 + - uses: actions/upload-artifact@v4 with: name: archive path: archive/ @@ -50,7 +50,7 @@ jobs: strategy: matrix: os: [macos-latest, ubuntu-latest, windows-latest] - python-version: ['3.7', '3.8', '3.9', '3.10', '3.11'] + python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] install: ['wheel'] include: - os: 'ubuntu-latest' @@ -68,23 +68,23 @@ jobs: steps: - name: Fetch sdist/wheel - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 if: matrix.install == 'sdist' || matrix.install == 'wheel' with: name: dist path: dist/ - name: Fetch git archive - uses: actions/download-artifact@v3 + uses: actions/download-artifact@v4 if: matrix.install == 'archive' with: name: archive path: archive/ - name: Fetch repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 if: matrix.install == 'repo' - name: Set up Python ${{ matrix.python-version }} on ${{ matrix.os }} - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Update pip diff --git a/.github/workflows/testsingularity.yml b/.github/workflows/testsingularity.yml index 5e2d5362ac..3d3a384583 100644 --- a/.github/workflows/testsingularity.yml +++ b/.github/workflows/testsingularity.yml @@ -12,7 +12,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [3.7, 3.8, 3.9, "3.10"] + python-version: [3.8, 3.9, "3.10", "3.11"] fail-fast: False steps: @@ -21,13 +21,13 @@ jobs: echo "RELEASE_VERSION=v3.7.1" >> $GITHUB_ENV echo "NO_ET=TRUE" >> $GITHUB_ENV - name: Setup Singularity - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: repository: hpcng/singularity ref: 'v3.7.1' path: 'singularity' - name: Setup GO - uses: actions/setup-go@v2 + uses: actions/setup-go@v5 with: go-version: '^1.13' - name: Install OS deps @@ -49,7 +49,7 @@ jobs: - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Update build tools @@ -57,7 +57,7 @@ jobs: - name: Checkout Pydra repo - uses: actions/checkout@v2 + uses: actions/checkout@v4 with: repository: ${{ github.repository }} - name: Install pydra (test) diff --git a/.github/workflows/testslurm.yml b/.github/workflows/testslurm.yml index a0bb516607..e4f4bddec2 100644 --- a/.github/workflows/testslurm.yml +++ b/.github/workflows/testslurm.yml @@ -8,26 +8,27 @@ on: jobs: build: + strategy: + matrix: + python-version: [3.8.16, 3.9.16, 3.10.9, 3.11.5] + fail-fast: false runs-on: ubuntu-latest env: - DOCKER_IMAGE: mgxd/slurm:19.05.1 + DOCKER_IMAGE: adi611/docker-centos7-slurm:23.02.1 steps: - name: Disable etelemetry run: echo "NO_ET=TRUE" >> $GITHUB_ENV - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Pull docker image run: | docker pull $DOCKER_IMAGE - # Have image running in background - docker run `bash <(curl -s https://codecov.io/env)` -itd -h ernie --name slurm -v `pwd`:/pydra -e NO_ET=$NO_ET $DOCKER_IMAGE + # Have image running in the background + docker run `bash <(curl -s https://codecov.io/env)` -itd -h slurmctl --cap-add sys_admin -d --name slurm -v `pwd`:/pydra -e NO_ET=$NO_ET $DOCKER_IMAGE - name: Display previous jobs with sacct run: | echo "Allowing ports/daemons time to start" && sleep 10 - docker exec slurm bash -c "sacctmgr -i add cluster name=linux \ - && supervisorctl restart slurmdbd \ - && supervisorctl restart slurmctld \ - && sacctmgr -i add account none,test Cluster=linux Description='none' Organization='none'" + docker exec slurm bash -c "sacctmgr -i add account none,test Cluster=linux Description='none' Organization='none'" docker exec slurm bash -c "sacct && sinfo && squeue" 2&> /dev/null if [ $? -ne 0 ]; then echo "Slurm docker image error" @@ -38,10 +39,16 @@ jobs: docker exec slurm bash -c "echo $NO_ET" docker exec slurm bash -c "ls -la && echo list top level dir" docker exec slurm bash -c "ls -la /pydra && echo list pydra dir" + if [[ "${{ matrix.python-version }}" == "3.11.5" ]]; then + docker exec slurm bash -c "CONFIGURE_OPTS=\"-with-openssl=/opt/openssl\" pyenv install -v 3.11.5" + fi + docker exec slurm bash -c "pyenv global ${{ matrix.python-version }}" docker exec slurm bash -c "pip install --upgrade pip && pip install -e /pydra[test] && python -c 'import pydra; print(pydra.__version__)'" - name: Run pytest - run: docker exec slurm bash -c "pytest --color=yes -vs -n auto --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra" + run: | + docker exec slurm bash -c "pytest --color=yes -vs --cov pydra --cov-config /pydra/.coveragerc --cov-report xml:/pydra/cov.xml --doctest-modules /pydra/pydra/ -k 'not test_audit_prov and not test_audit_prov_messdir_1 and not test_audit_prov_messdir_2 and not test_audit_prov_wf and not test_audit_all'" - name: Upload to codecov run: | + docker exec slurm bash -c "pip install urllib3==1.26.6" docker exec slurm bash -c "codecov --root /pydra -f /pydra/cov.xml -F unittests" docker rm -f slurm diff --git a/.gitignore b/.gitignore index 1263cb93e9..da16b937b9 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,7 @@ cov.xml .*.swp *~ .idea +*.venv .DS_Store diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d4499feb2c..8406de86b5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,23 +2,23 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: trailing-whitespace - id: end-of-file-fixer - id: check-yaml - id: check-added-large-files - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 24.2.0 hooks: - id: black - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 + rev: v2.2.6 hooks: - id: codespell additional_dependencies: - tomli - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 7.0.0 hooks: - id: flake8 diff --git a/.zenodo.json b/.zenodo.json index 7d81b12ac7..90806af15a 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -75,6 +75,11 @@ "name": "Vaillant, Ghislain", "orcid": "0000-0003-0267-3033" }, + { + "affiliation": "Indian Institute of Information Technology Kalyani", + "name": "Agarwal, Aditya", + "orcid": "0009-0008-2639-5334" + }, { "affiliation": "MIT, HMS", "name": "Ghosh, Satrajit", diff --git a/docs/changes.rst b/docs/changes.rst index 0fb4187e33..4e23840e90 100644 --- a/docs/changes.rst +++ b/docs/changes.rst @@ -9,7 +9,7 @@ Release Notes * adding simple validators to input spec (using ``attr.validator``) * adding ``create_dotfile`` for workflows, that creates graphs as dotfiles (can convert to other formats if dot available) * adding a simple user guide with ``input_spec`` description -* expanding docstrings for ``State``, ``audit`` and ``messanger`` +* expanding docstrings for ``State``, ``audit`` and ``messenger`` * updating syntax to newer python 0.7.0 diff --git a/docs/components.rst b/docs/components.rst index 7872fec87c..d4928e82c6 100644 --- a/docs/components.rst +++ b/docs/components.rst @@ -169,15 +169,17 @@ the Task execution, the user can set splitter and combiner attributes of the Sta .. code-block:: python task_with_state = - add2(x=[1, 5]).split("x").combine("x") + add2().split(x=[1, 5]).combine("x") In this example, the ``State`` class is responsible for creating a list of two separate inputs, *[{x: 1}, {x:5}]*, each run of the *Task* should get one -element from the list. -The results are grouped back when returning the result from the *Task*. -While this example -illustrates mapping and grouping of results over a single parameter, *Pydra* -extends this to arbitrary combinations of input fields and downstream grouping +element from the list. Note that in this case the value for `x` is set in the `split()` +method, not at the task's initialisation. +The `combine()` method, specifies that the results are grouped back when returning the +result from the *Task*. + +While this example illustrates mapping and grouping of results over a single parameter, +*Pydra* extends this to arbitrary combinations of input fields and downstream grouping over nested dataflows. Details of how splitters and combiners power *Pydra*'s scalable dataflows are described in the next section. diff --git a/docs/conf.py b/docs/conf.py index b3a1d23acc..fd0b69ca43 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -89,7 +89,7 @@ napoleon_custom_sections = [("Inputs", "Parameters"), ("Outputs", "Parameters")] # Intersphinx -intersphinx_mapping = {"https://docs.python.org/3/": None} +intersphinx_mapping = {"python": ("https://docs.python.org/3", None)} # Linkcode # The following is used by sphinx.ext.linkcode to provide links to github diff --git a/docs/sphinxext/github_link.py b/docs/sphinxext/github_link.py index d30186c70c..b9a5684c20 100644 --- a/docs/sphinxext/github_link.py +++ b/docs/sphinxext/github_link.py @@ -2,12 +2,13 @@ This script comes from scikit-learn: https://github.com/scikit-learn/scikit-learn/blob/master/doc/sphinxext/github_link.py """ -from operator import attrgetter + import inspect -import subprocess import os +import subprocess import sys from functools import partial +from operator import attrgetter REVISION_CMD = "git rev-parse --short HEAD" @@ -44,15 +45,18 @@ def _linkcode_resolve(domain, info, package, url_fmt, revision): return class_name = info["fullname"].split(".")[0] - if type(class_name) != str: - # Python 2 only - class_name = class_name.encode("utf-8") module = __import__(info["module"], fromlist=[class_name]) + + # FIXME: Bypass resolving for attrs-defined classes. try: obj = attrgetter(info["fullname"])(module) except AttributeError: return + # Unwrap the object to get the correct source + # file in case that is wrapped by a decorator + obj = inspect.unwrap(obj) + try: fn = inspect.getsourcefile(obj) except Exception: diff --git a/pydra/__init__.py b/pydra/__init__.py index 0416c9be75..f704d670a5 100644 --- a/pydra/__init__.py +++ b/pydra/__init__.py @@ -5,21 +5,34 @@ first-class operations. It forms the core of the Nipype 2.0 ecosystem. """ + # This call enables pydra.tasks to be used as a namespace package when installed # in editable mode. In normal installations it has no effect. __path__ = __import__("pkgutil").extend_path(__path__, __name__) import logging -logger = logging.getLogger("pydra") +import __main__ +import attr + +from . import mark +from .engine import AuditFlag, ShellCommandTask, Submitter, Workflow, specs + +__all__ = ( + "Submitter", + "Workflow", + "AuditFlag", + "ShellCommandTask", + "specs", + "mark", +) try: from ._version import __version__ except ImportError: pass -from .engine import Submitter, Workflow, AuditFlag, ShellCommandTask, DockerTask, specs -from . import mark +logger = logging.getLogger("pydra") def check_latest_version(): @@ -29,20 +42,8 @@ def check_latest_version(): # Run telemetry on import for interactive sessions, such as IPython, Jupyter notebooks, Python REPL -import __main__ - if not hasattr(__main__, "__file__"): from .engine.core import TaskBase if TaskBase._etelemetry_version_data is None: TaskBase._etelemetry_version_data = check_latest_version() - - -# attr run_validators is set to False, but could be changed using use_validator -import attr - -attr.set_run_validators(False) - - -def set_input_validator(flag=False): - attr.set_run_validators(flag) diff --git a/pydra/conftest.py b/pydra/conftest.py index 4404e06f71..66a1d200fc 100644 --- a/pydra/conftest.py +++ b/pydra/conftest.py @@ -7,6 +7,12 @@ def pytest_addoption(parser): parser.addoption("--dask", action="store_true", help="run all combinations") + parser.addoption( + "--psij", + action="store", + help="run with psij subtype plugin", + choices=["local", "slurm"], + ) def pytest_generate_tests(metafunc): @@ -21,6 +27,16 @@ def pytest_generate_tests(metafunc): except ValueError: # Called as --pyargs, so --dask isn't available pass + try: + if metafunc.config.getoption("psij"): + Plugins.append("psij-" + metafunc.config.getoption("psij")) + if ( + bool(shutil.which("sbatch")) + and metafunc.config.getoption("psij") == "slurm" + ): + Plugins.remove("slurm") + except ValueError: + pass metafunc.parametrize("plugin_dask_opt", Plugins) if "plugin" in metafunc.fixturenames: @@ -35,6 +51,16 @@ def pytest_generate_tests(metafunc): Plugins = ["slurm"] else: Plugins = ["cf"] + try: + if metafunc.config.getoption("psij"): + Plugins.append("psij-" + metafunc.config.getoption("psij")) + if ( + bool(shutil.which("sbatch")) + and metafunc.config.getoption("psij") == "slurm" + ): + Plugins.remove("slurm") + except ValueError: + pass metafunc.parametrize("plugin", Plugins) diff --git a/pydra/engine/__init__.py b/pydra/engine/__init__.py index 411af8a1d9..2eca36ba28 100644 --- a/pydra/engine/__init__.py +++ b/pydra/engine/__init__.py @@ -1,12 +1,12 @@ """The core of the workflow engine.""" + from .submitter import Submitter from .core import Workflow -from .task import AuditFlag, ShellCommandTask, DockerTask +from .task import AuditFlag, ShellCommandTask from . import specs __all__ = [ "AuditFlag", - "DockerTask", "ShellCommandTask", "Submitter", "Workflow", diff --git a/pydra/engine/audit.py b/pydra/engine/audit.py index ece23239e6..7397fad6e6 100644 --- a/pydra/engine/audit.py +++ b/pydra/engine/audit.py @@ -1,11 +1,18 @@ """Module to keep track of provenance information.""" + import os -from pathlib import Path import json import attr from ..utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag -from .helpers import ensure_list, gather_runtime_info, hash_file -from .specs import attr_fields, File, Directory +from ..utils.hash import hash_function +from .helpers import ensure_list, gather_runtime_info +from .specs import attr_fields +from fileformats.core import FileSet + +try: + import importlib_resources +except ImportError: + import importlib.resources as importlib_resources # type: ignore class Audit: @@ -133,9 +140,8 @@ def audit_message(self, message, flags=None): """ if self.develop: - with open( - Path(os.path.dirname(__file__)) / ".." / "schema/context.jsonld" - ) as fp: + context_file = importlib_resources.files("pydra") / "schema/context.jsonld" + with context_file.open() as fp: context = json.load(fp) else: context = { @@ -178,10 +184,11 @@ def audit_task(self, task): command = task.cmdline if hasattr(task.inputs, "executable") else None attr_list = attr_fields(task.inputs) for attrs in attr_list: - if attrs.type in [File, Directory]: - input_name = attrs.name - input_path = os.path.abspath(getattr(task.inputs, input_name)) - file_hash = hash_file(input_path) + input_name = attrs.name + value = getattr(task.inputs, input_name) + if isinstance(value, FileSet): + input_path = os.path.abspath(value) + file_hash = hash_function(value) entity_id = f"uid:{gen_uuid()}" entity_message = { "@id": entity_id, diff --git a/pydra/engine/core.py b/pydra/engine/core.py index 31127cac99..a523c24525 100644 --- a/pydra/engine/core.py +++ b/pydra/engine/core.py @@ -1,21 +1,22 @@ """Basic processing graph elements.""" + import abc -import attr import json import logging +import itertools +from functools import cached_property import os import sys from pathlib import Path import typing as ty -from copy import deepcopy +from copy import deepcopy, copy from uuid import uuid4 - -import cloudpickle as cp from filelock import SoftFileLock import shutil from tempfile import mkdtemp from traceback import format_exception - +import attr +import cloudpickle as cp from . import state from . import helpers_state as hlpst from .specs import ( @@ -24,9 +25,12 @@ RuntimeSpec, Result, SpecInfo, + LazyIn, + LazyOut, LazyField, TaskHook, attr_fields, + StateArray, ) from .helpers import ( make_klass, @@ -36,13 +40,16 @@ save, ensure_list, record_error, - hash_function, PydraFileLock, + parse_copyfile, ) -from .helpers_file import copyfile_input, template_update +from ..utils.hash import hash_function +from .helpers_file import copy_nested_files, template_update from .graph import DiGraph from .audit import Audit from ..utils.messenger import AuditFlag +from ..utils.typing import TypeParser +from fileformats.core import FileSet logger = logging.getLogger("pydra") @@ -53,7 +60,7 @@ class TaskBase: """ A base structure for the nodes in the processing graph. - Tasks are a generic compute step from which both elemntary tasks and + Tasks are a generic compute step from which both elementary tasks and :class:`Workflow` instances inherit. """ @@ -201,6 +208,7 @@ def __init__( self.plugin = None self.hooks = TaskHook() self._errored = False + self._lzout = None def __str__(self): return self.name @@ -223,10 +231,9 @@ def __setstate__(self, state): state["inputs"] = make_klass(state["input_spec"])(**state["inputs"]) self.__dict__.update(state) - def __getattr__(self, name): - if name == "lzout": # lazy output - return LazyField(self, "output") - return self.__getattribute__(name) + @cached_property + def lzout(self): + return LazyOut(self) def help(self, returnhelp=False): """Print class help.""" @@ -274,25 +281,21 @@ def checksum_states(self, state_index=None): """ if is_workflow(self) and self.inputs._graph_checksums is attr.NOTHING: - self.inputs._graph_checksums = [nd.checksum for nd in self.graph_sorted] + self.inputs._graph_checksums = { + nd.name: nd.checksum for nd in self.graph_sorted + } if state_index is not None: - inputs_copy = deepcopy(self.inputs) + inputs_copy = copy(self.inputs) for key, ind in self.state.inputs_ind[state_index].items(): val = self._extract_input_el( - inputs=inputs_copy, inp_nm=key.split(".")[1], ind=ind + inputs=self.inputs, inp_nm=key.split(".")[1], ind=ind ) setattr(inputs_copy, key.split(".")[1], val) # setting files_hash again in case it was cleaned by setting specific element # that might be important for outer splitter of input variable with big files # the file can be changed with every single index even if there are only two files - inputs_copy.files_hash = self.inputs.files_hash input_hash = inputs_copy.hash - # updating self.inputs.files_hash, so big files hashes - # doesn't have to be recompute for the next element - for key, val in inputs_copy.files_hash.items(): - if val: - self.inputs.files_hash[key].update(val) if is_workflow(self): con_hash = hash_function(self._connections) # TODO: hash list is not used @@ -356,7 +359,9 @@ def generated_output_names(self): """ output_klass = make_klass(self.output_spec) if hasattr(output_klass, "generated_output_names"): - output = output_klass(**{f.name: None for f in attr.fields(output_klass)}) + output = output_klass( + **{f.name: attr.NOTHING for f in attr.fields(output_klass)} + ) # using updated input (after filing the templates) _inputs = deepcopy(self.inputs) modified_inputs = template_update(_inputs, self.output_dir) @@ -427,7 +432,13 @@ def cont_dim(self, cont_dim): self._cont_dim = cont_dim def __call__( - self, submitter=None, plugin=None, plugin_kwargs=None, rerun=False, **kwargs + self, + submitter=None, + plugin=None, + plugin_kwargs=None, + rerun=False, + environment=None, + **kwargs, ): """Make tasks callable themselves.""" from .submitter import Submitter @@ -447,22 +458,61 @@ def __call__( if submitter: with submitter as sub: self.inputs = attr.evolve(self.inputs, **kwargs) - res = sub(self) + res = sub(self, environment=environment) else: # tasks without state could be run without a submitter - res = self._run(rerun=rerun, **kwargs) + res = self._run(rerun=rerun, environment=environment, **kwargs) return res def _modify_inputs(self): - """Update and preserve a Task's original inputs""" + """This method modifies the inputs of the task ahead of its execution: + - links/copies upstream files and directories into the destination tasks + working directory as required select state array values corresponding to + state index (it will try to leave them where they are unless specified or + they are on different file systems) + - resolve template values (e.g. output_file_template) + - deepcopy all inputs to guard against in-place changes during the task's + execution (they will be replaced after the task's execution with the + original inputs to ensure the tasks checksums are consistent) + """ orig_inputs = { - k: deepcopy(v) for k, v in attr.asdict(self.inputs, recurse=False).items() + k: v + for k, v in attr.asdict(self.inputs, recurse=False).items() + if not k.startswith("_") } - map_copyfiles = copyfile_input(self.inputs, self.output_dir) + map_copyfiles = {} + input_fields = attr.fields(type(self.inputs)) + for name, value in orig_inputs.items(): + fld = getattr(input_fields, name) + copy_mode, copy_collation = parse_copyfile( + fld, default_collation=self.DEFAULT_COPY_COLLATION + ) + if value is not attr.NOTHING and TypeParser.contains_type( + FileSet, fld.type + ): + copied_value = copy_nested_files( + value=value, + dest_dir=self.output_dir, + mode=copy_mode, + collation=copy_collation, + supported_modes=self.SUPPORTED_COPY_MODES, + ) + if value is not copied_value: + map_copyfiles[name] = copied_value modified_inputs = template_update( self.inputs, self.output_dir, map_copyfiles=map_copyfiles ) - if modified_inputs: - self.inputs = attr.evolve(self.inputs, **modified_inputs) + assert all(m in orig_inputs for m in modified_inputs), ( + "Modified inputs contain fields not present in original inputs. " + "This is likely a bug." + ) + for name, orig_value in orig_inputs.items(): + try: + value = modified_inputs[name] + except KeyError: + # Ensure we pass a copy not the original just in case inner + # attributes are modified during execution + value = deepcopy(orig_value) + setattr(self.inputs, name, value) return orig_inputs def _populate_filesystem(self, checksum, output_dir): @@ -482,7 +532,7 @@ def _populate_filesystem(self, checksum, output_dir): shutil.rmtree(output_dir) output_dir.mkdir(parents=False, exist_ok=self.can_resume) - def _run(self, rerun=False, **kwargs): + def _run(self, rerun=False, environment=None, **kwargs): self.inputs = attr.evolve(self.inputs, **kwargs) self.inputs.check_fields_input_spec() @@ -491,7 +541,7 @@ def _run(self, rerun=False, **kwargs): lockfile = self.cache_dir / (checksum + ".lock") # Eagerly retrieve cached - see scenarios in __init__() self.hooks.pre_run(self) - logger.debug(f"'%s' is attempting to acquire lock on %s", self.name, lockfile) + logger.debug("'%s' is attempting to acquire lock on %s", self.name, lockfile) with SoftFileLock(lockfile): if not (rerun or self.task_rerun): result = self.result() @@ -499,6 +549,7 @@ def _run(self, rerun=False, **kwargs): return result cwd = os.getcwd() self._populate_filesystem(checksum, output_dir) + os.chdir(output_dir) orig_inputs = self._modify_inputs() result = Result(output=None, runtime=None, errored=False) self.hooks.pre_run_task(self) @@ -507,7 +558,7 @@ def _run(self, rerun=False, **kwargs): self.audit.audit_task(task=self) try: self.audit.monitor() - self._run_task() + self._run_task(environment=environment) result.output = self._collect_outputs(output_dir=output_dir) except Exception: etype, eval, etr = sys.exc_info() @@ -519,43 +570,72 @@ def _run(self, rerun=False, **kwargs): self.hooks.post_run_task(self, result) self.audit.finalize_audit(result) save(output_dir, result=result, task=self) - self.output_ = None - # removing the additional file with the chcksum + # removing the additional file with the checksum (self.cache_dir / f"{self.uid}_info.json").unlink() - # # function etc. shouldn't change anyway, so removing - orig_inputs = { - k: v for k, v in orig_inputs.items() if not k.startswith("_") - } - self.inputs = attr.evolve(self.inputs, **orig_inputs) + # Restore original values to inputs + for field_name, field_value in orig_inputs.items(): + setattr(self.inputs, field_name, field_value) os.chdir(cwd) self.hooks.post_run(self, result) + # Check for any changes to the input hashes that have occurred during the execution + # of the task + self._check_for_hash_changes() return result def _collect_outputs(self, output_dir): - run_output = self.output_ output_klass = make_klass(self.output_spec) - output = output_klass(**{f.name: None for f in attr.fields(output_klass)}) + output = output_klass( + **{f.name: attr.NOTHING for f in attr.fields(output_klass)} + ) other_output = output.collect_additional_outputs( - self.inputs, output_dir, run_output + self.inputs, output_dir, self.output_ ) - return attr.evolve(output, **run_output, **other_output) + return attr.evolve(output, **self.output_, **other_output) - def split(self, splitter, overwrite=False, cont_dim=None, **kwargs): + def split( + self, + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None] = None, + overwrite: bool = False, + cont_dim: ty.Optional[dict] = None, + **inputs, + ): """ Run this task parametrically over lists of split inputs. Parameters ---------- - splitter : - TODO - overwrite : :obj:`bool` - TODO - cont_dim : :obj:`dict` + splitter : str or list[str] or tuple[str] or None + the fields which to split over. If splitting over multiple fields, lists of + fields are interpreted as outer-products and tuples inner-products. If None, + then the fields to split are taken from the keyword-arg names. + overwrite : bool, optional + whether to overwrite an existing split on the node, by default False + cont_dim : dict, optional Container dimensions for specific inputs, used in the splitter. If input name is not in cont_dim, it is assumed that the input values has a container dimension of 1, so only the most outer dim will be used for splitting. + **split_inputs + fields to split over, will automatically be wrapped in a StateArray object + and passed to the node inputs + Returns + ------- + self : TaskBase + a reference to the task """ + if self._lzout: + raise RuntimeError( + f"Cannot split {self} as its output interface has already been accessed" + ) + if splitter is None and inputs: + splitter = list(inputs) + elif splitter: + missing = set(hlpst.unwrap_splitter(splitter)) - set(inputs) + missing = [m for m in missing if not m.startswith("_")] + if missing: + raise ValueError( + f"Split is missing values for the following fields {list(missing)}" + ) splitter = hlpst.add_name_splitter(splitter, self.name) # if user want to update the splitter, overwrite has to be True if self.state and not overwrite and self.state.splitter != splitter: @@ -566,24 +646,62 @@ def split(self, splitter, overwrite=False, cont_dim=None, **kwargs): if cont_dim: for key, vel in cont_dim.items(): self._cont_dim[f"{self.name}.{key}"] = vel - if kwargs: - self.inputs = attr.evolve(self.inputs, **kwargs) + if inputs: + new_inputs = {} + split_inputs = set( + f"{self.name}.{n}" if "." not in n else n + for n in hlpst.unwrap_splitter(splitter) + if not n.startswith("_") + ) + for inpt_name, inpt_val in inputs.items(): + new_val: ty.Any + if f"{self.name}.{inpt_name}" in split_inputs: # type: ignore + if isinstance(inpt_val, LazyField): + new_val = inpt_val.split(splitter) + elif isinstance(inpt_val, ty.Iterable) and not isinstance( + inpt_val, (ty.Mapping, str) + ): + new_val = StateArray(inpt_val) + else: + raise TypeError( + f"Could not split {inpt_val} as it is not a sequence type" + ) + else: + new_val = inpt_val + new_inputs[inpt_name] = new_val + self.inputs = attr.evolve(self.inputs, **new_inputs) if not self.state or splitter != self.state.splitter: self.set_state(splitter) return self - def combine(self, combiner, overwrite=False): + def combine( + self, + combiner: ty.Union[ty.List[str], str], + overwrite: bool = False, # **kwargs + ): """ Combine inputs parameterized by one or more previous tasks. Parameters ---------- - combiner : - TODO - overwrite : :obj:`bool` - TODO + combiner : list[str] or str + the + overwrite : bool + whether to overwrite an existing combiner on the node + **kwargs : dict[str, Any] + values for the task that will be "combined" before they are provided to the + node + Returns + ------- + self : TaskBase + a reference to the task """ + if self._lzout: + raise RuntimeError( + f"Cannot combine {self} as its output interface has already been " + "accessed" + ) if not isinstance(combiner, (str, list)): raise Exception("combiner has to be a string or a list") combiner = hlpst.add_name_combiner(ensure_list(combiner), self.name) @@ -603,11 +721,10 @@ def combine(self, combiner, overwrite=False): # if is connected to one with a splitter; # self.fut_combiner will be used later as a combiner self.fut_combiner = combiner - return self else: # self.state and not self.state.combiner self.combiner = combiner self.set_state(splitter=self.state.splitter, combiner=self.combiner) - return self + return self def _extract_input_el(self, inputs, inp_nm, ind): """ @@ -629,26 +746,22 @@ def _extract_input_el(self, inputs, inp_nm, ind): def get_input_el(self, ind): """Collect all inputs required to run the node (for specific state element).""" - if ind is not None: - # TODO: doesn't work properly for more cmplicated wf (check if still an issue) - state_dict = self.state.states_val[ind] - input_ind = self.state.inputs_ind[ind] - inputs_dict = {} - for inp in set(self.input_names): - if f"{self.name}.{inp}" in input_ind: - inputs_dict[inp] = self._extract_input_el( - inputs=self.inputs, - inp_nm=inp, - ind=input_ind[f"{self.name}.{inp}"], - ) - else: - inputs_dict[inp] = getattr(self.inputs, inp) - return state_dict, inputs_dict - else: - # todo it never gets here - breakpoint() - inputs_dict = {inp: getattr(self.inputs, inp) for inp in self.input_names} - return None, inputs_dict + # TODO: doesn't work properly for more cmplicated wf (check if still an issue) + input_ind = self.state.inputs_ind[ind] + inputs_dict = {} + for inp in set(self.input_names): + if f"{self.name}.{inp}" in input_ind: + inputs_dict[inp] = self._extract_input_el( + inputs=self.inputs, + inp_nm=inp, + ind=input_ind[f"{self.name}.{inp}"], + ) + return inputs_dict + # else: + # # todo it never gets here + # breakpoint() + # inputs_dict = {inp: getattr(self.inputs, inp) for inp in self.input_names} + # return None, inputs_dict def pickle_task(self): """Pickling the tasks with full inputs""" @@ -728,8 +841,8 @@ def result(self, state_index=None, return_inputs=False): Returns ------- - result : - + result : Result + the result of the task """ # TODO: check if result is available in load_result and # return a future if not @@ -796,10 +909,58 @@ def _reset(self): for task in self.graph.nodes: task._reset() + def _check_for_hash_changes(self): + hash_changes = self.inputs.hash_changes() + details = "" + for changed in hash_changes: + field = getattr(attr.fields(type(self.inputs)), changed) + val = getattr(self.inputs, changed) + field_type = type(val) + if issubclass(field.type, FileSet): + details += ( + f"- {changed}: value passed to the {field.type} field is of type " + f"{field_type} ('{val}'). If it is intended to contain output data " + "then the type of the field in the interface class should be changed " + "to `pathlib.Path`. Otherwise, if the field is intended to be an " + "input field but it gets altered by the task in some way, then the " + "'copyfile' flag should be set to 'copy' in the field metadata of " + "the task interface class so copies of the files/directories in it " + "are passed to the task instead.\n" + ) + else: + details += ( + f"- {changed}: the {field_type} object passed to the {field.type}" + f"field appears to have an unstable hash. This could be due to " + "a stochastic/non-thread-safe attribute(s) of the object\n\n" + f"The {field.type}.__bytes_repr__() method can be implemented to " + "bespoke hashing methods based only on the stable attributes for " + f"the `{field_type.__module__}.{field_type.__name__}` type. " + f"See pydra/utils/hash.py for examples. Value: {val}\n" + ) + if hash_changes: + raise RuntimeError( + f"Input field hashes have changed during the execution of the " + f"'{self.name}' {type(self).__name__}.\n\n{details}" + ) + logger.debug( + "Input values and hashes for '%s' %s node:\n%s\n%s", + self.name, + type(self).__name__, + self.inputs, + self.inputs._hashes, + ) -def _sanitize_input_spec( - input_spec: ty.Union[SpecInfo, ty.List[str]], + SUPPORTED_COPY_MODES = FileSet.CopyMode.any + DEFAULT_COPY_COLLATION = FileSet.CopyCollation.any + + +def _sanitize_spec( + spec: ty.Union[ + SpecInfo, ty.List[str], ty.Dict[str, ty.Type[ty.Any]], BaseSpec, None + ], wf_name: str, + spec_name: str, + allow_empty: bool = False, ) -> SpecInfo: """Makes sure the provided input specifications are valid. @@ -808,51 +969,65 @@ def _sanitize_input_spec( Parameters ---------- - input_spec : SpecInfo or List[str] - Input specification to be sanitized. - + spec : SpecInfo or List[str] or Dict[str, type] + Specification to be sanitized. wf_name : str The name of the workflow for which the input specifications - are sanitized. + spec_name : str + name given to generated SpecInfo object Returns ------- - input_spec : SpecInfo - Sanitized input specifications. + spec : SpecInfo + Sanitized specification. Raises ------ ValueError - If provided `input_spec` is None. + If provided `spec` is None. """ graph_checksum_input = ("_graph_checksums", ty.Any) - if input_spec: - if isinstance(input_spec, SpecInfo): - if not any([x == BaseSpec for x in input_spec.bases]): - raise ValueError("Provided SpecInfo must have BaseSpec as it's base.") - if "_graph_checksums" not in {f[0] for f in input_spec.fields}: - input_spec.fields.insert(0, graph_checksum_input) - return input_spec + if spec: + if isinstance(spec, SpecInfo): + if BaseSpec not in spec.bases: + raise ValueError("Provided SpecInfo must have BaseSpec as its base.") + if "_graph_checksums" not in {f[0] for f in spec.fields}: + spec.fields.insert(0, graph_checksum_input) + return spec else: + base = BaseSpec + if isinstance(spec, list): + typed_spec = zip(spec, itertools.repeat(ty.Any)) + elif isinstance(spec, dict): + typed_spec = spec.items() # type: ignore + elif isinstance(spec, BaseSpec): + base = spec + typed_spec = [] + else: + raise TypeError( + f"Unrecognised spec type, {spec}, should be SpecInfo, list or dict" + ) return SpecInfo( - name="Inputs", + name=spec_name, fields=[graph_checksum_input] + [ ( nm, attr.ib( - type=ty.Any, + type=tp, metadata={ "help_string": f"{nm} input from {wf_name} workflow" }, ), ) - for nm in input_spec + for nm, tp in typed_spec ], - bases=(BaseSpec,), + bases=(base,), ) + elif allow_empty: + return None else: - raise ValueError(f"Empty input_spec provided to Workflow {wf_name}.") + raise ValueError(f'Empty "{spec_name}" spec provided to Workflow {wf_name}.') class Workflow(TaskBase): @@ -864,11 +1039,15 @@ def __init__( audit_flags: AuditFlag = AuditFlag.NONE, cache_dir=None, cache_locations=None, - input_spec: ty.Optional[ty.Union[ty.List[ty.Text], SpecInfo]] = None, + input_spec: ty.Optional[ + ty.Union[ty.List[ty.Text], ty.Dict[ty.Text, ty.Type[ty.Any]], SpecInfo] + ] = None, cont_dim=None, messenger_args=None, messengers=None, - output_spec: ty.Optional[ty.Union[SpecInfo, BaseSpec]] = None, + output_spec: ty.Optional[ + ty.Union[ty.List[str], ty.Dict[str, type], SpecInfo, BaseSpec] + ] = None, rerun=False, propagate_rerun=True, **kwargs, @@ -900,9 +1079,10 @@ def __init__( TODO """ - self.input_spec = _sanitize_input_spec(input_spec, name) - - self.output_spec = output_spec + self.input_spec = _sanitize_spec(input_spec, name, "Inputs") + self.output_spec = _sanitize_spec( + output_spec, name, "Outputs", allow_empty=True + ) if name in dir(self): raise ValueError( @@ -924,17 +1104,21 @@ def __init__( self.graph = DiGraph(name=name) self.name2obj = {} + self._lzin = None + self._pre_split = ( + False # To signify if the workflow has been split on task load or not + ) # store output connections self._connections = None # propagating rerun if task_rerun=True self.propagate_rerun = propagate_rerun + @cached_property + def lzin(self): + return LazyIn(self) + def __getattr__(self, name): - if name == "lzin": - return LazyField(self, "input") - if name == "lzout": - return super().__getattr__(name) if name in self.name2obj: return self.name2obj[name] return self.__getattribute__(name) @@ -958,7 +1142,9 @@ def checksum(self): """ # if checksum is called before run the _graph_checksums is not ready if is_workflow(self) and self.inputs._graph_checksums is attr.NOTHING: - self.inputs._graph_checksums = [nd.checksum for nd in self.graph_sorted] + self.inputs._graph_checksums = { + nd.name: nd.checksum for nd in self.graph_sorted + } input_hash = self.inputs.hash if not self.state: @@ -1105,7 +1291,7 @@ async def _run(self, submitter=None, rerun=False, **kwargs): lockfile = self.cache_dir / (checksum + ".lock") self.hooks.pre_run(self) logger.debug( - f"'%s' is attempting to acquire lock on %s with Pydra lock", + "'%s' is attempting to acquire lock on %s with Pydra lock", self.name, lockfile, ) @@ -1134,12 +1320,13 @@ async def _run(self, submitter=None, rerun=False, **kwargs): self.hooks.post_run_task(self, result) self.audit.finalize_audit(result=result) save(output_dir, result=result, task=self) - # removing the additional file with the chcksum + # removing the additional file with the checksum (self.cache_dir / f"{self.uid}_info.json").unlink() os.chdir(cwd) self.hooks.post_run(self, result) - if result is None: - raise Exception("This should never happen, please open new issue") + # Check for any changes to the input hashes that have occurred during the execution + # of the task + self._check_for_hash_changes() return result async def _run_task(self, submitter, rerun=False): @@ -1151,16 +1338,23 @@ async def _run_task(self, submitter, rerun=False): # at this point Workflow is stateless so this should be fine await submitter.expand_workflow(self, rerun=rerun) - def set_output(self, connections): + def set_output( + self, + connections: ty.Union[ + ty.Tuple[str, LazyField], ty.List[ty.Tuple[str, LazyField]] + ], + ): """ - Write outputs. + Set outputs of the workflow by linking them with lazy outputs of tasks Parameters ---------- - connections : - TODO - + connections : tuple[str, LazyField] or list[tuple[str, LazyField]] or None + single or list of tuples linking the name of the output to a lazy output + of a task in the workflow. """ + from ..utils.typing import TypeParser + if self._connections is None: self._connections = [] if isinstance(connections, tuple) and len(connections) == 2: @@ -1172,17 +1366,40 @@ def set_output(self, connections): elif isinstance(connections, dict): new_connections = list(connections.items()) else: - raise Exception( + raise TypeError( "Connections can be a 2-elements tuple, a list of these tuples, or dictionary" ) # checking if a new output name is already in the connections connection_names = [name for name, _ in self._connections] - new_names = [name for name, _ in new_connections] - if set(connection_names).intersection(new_names): - raise Exception( - f"output name {set(connection_names).intersection(new_names)} is already set" + if self.output_spec: + output_types = { + a.name: a.type for a in attr.fields(make_klass(self.output_spec)) + } + else: + output_types = {} + # Check for type matches with explicitly defined outputs + conflicting = [] + type_mismatches = [] + for conn_name, lazy_field in new_connections: + if conn_name in connection_names: + conflicting.append(conn_name) + try: + output_type = output_types[conn_name] + except KeyError: + pass + else: + if not TypeParser.matches_type(lazy_field.type, output_type): + type_mismatches.append((conn_name, output_type, lazy_field.type)) + if conflicting: + raise ValueError(f"the output names {conflicting} are already set") + if type_mismatches: + raise TypeError( + f"the types of the following outputs of {self} don't match their declared types: " + + ", ".join( + f"{n} (expected: {ex}, provided: {p})" + for n, ex, p in type_mismatches + ) ) - self._connections += new_connections fields = [] for con in self._connections: @@ -1192,6 +1409,8 @@ def set_output(self, connections): help_string = f"all outputs from {task_nm}" fields.append((wf_out_nm, dict, {"help_string": help_string})) else: + from ..utils.typing import TypeParser + # getting information about the output field from the task output_spec # providing proper type and some help string task_output_spec = getattr(self, task_nm).output_spec @@ -1199,13 +1418,19 @@ def set_output(self, connections): help_string = ( f"{out_fld.metadata.get('help_string', '')} (from {task_nm})" ) - fields.append((wf_out_nm, out_fld.type, {"help_string": help_string})) + if TypeParser.get_origin(lf.type) is StateArray: + type_ = TypeParser.get_item_type(lf.type) + else: + type_ = lf.type + fields.append((wf_out_nm, type_, {"help_string": help_string})) self.output_spec = SpecInfo(name="Output", fields=fields, bases=(BaseSpec,)) logger.info("Added %s to %s", self.output_spec, self) def _collect_outputs(self): output_klass = make_klass(self.output_spec) - output = output_klass(**{f.name: None for f in attr.fields(output_klass)}) + output = output_klass( + **{f.name: attr.NOTHING for f in attr.fields(output_klass)} + ) # collecting outputs from tasks output_wf = {} for name, val in self._connections: @@ -1214,7 +1439,7 @@ def _collect_outputs(self): try: val_out = val.get_value(self) output_wf[name] = val_out - except (ValueError, AttributeError): + except (ValueError, AttributeError) as e: output_wf[name] = None # checking if the tasks has predecessors that raises error if isinstance(getattr(self, val.name)._errored, list): @@ -1227,11 +1452,15 @@ def _collect_outputs(self): el / "_error.pklz" for el in getattr(self, val.name).output_dir ] + if not all(e.exists() for e in err_file): + raise e else: err_file = getattr(self, val.name).output_dir / "_error.pklz" + if not Path(err_file).exists(): + raise e raise ValueError( f"Task {val.name} raised an error, full crash report is here: " - f"{str(err_file)}" + f"{err_file}" ) return attr.evolve(output, **output_wf) diff --git a/pydra/engine/environments.py b/pydra/engine/environments.py new file mode 100644 index 0000000000..0c57008058 --- /dev/null +++ b/pydra/engine/environments.py @@ -0,0 +1,157 @@ +from .helpers import execute + +from pathlib import Path + + +class Environment: + """ + Base class for environments that are used to execute tasks. + Right now it is assumed that the environment, including container images, + are available and are not removed at the end + TODO: add setup and teardown methods + """ + + def setup(self): + pass + + def execute(self, task): + """ + Execute the task in the environment. + + Parameters + ---------- + task : TaskBase + the task to execute + + Returns + ------- + output + Output of the task. + """ + raise NotImplementedError + + def teardown(self): + pass + + +class Native(Environment): + """ + Native environment, i.e. the tasks are executed in the current python environment. + """ + + def execute(self, task): + keys = ["return_code", "stdout", "stderr"] + values = execute(task.command_args(), strip=task.strip) + output = dict(zip(keys, values)) + if output["return_code"]: + msg = f"Error running '{task.name}' task with {task.command_args()}:" + if output["stderr"]: + msg += "\n\nstderr:\n" + output["stderr"] + if output["stdout"]: + msg += "\n\nstdout:\n" + output["stdout"] + raise RuntimeError(msg) + return output + + +class Container(Environment): + """ + Base class for container environments used by Docker and Singularity. + + Parameters + ---------- + image : str + Name of the container image + tag : str + Tag of the container image + root : str + Base path for mounting host directories into the container + xargs : Union[str, List[str]] + Extra arguments to be passed to the container + """ + + def __init__(self, image, tag="latest", root="/mnt/pydra", xargs=None): + self.image = image + self.tag = tag + if xargs is None: + xargs = [] + elif isinstance(xargs, str): + xargs = xargs.split() + self.xargs = xargs + self.root = root + + def bind(self, loc, mode="ro"): + loc_abs = Path(loc).absolute() + return f"{loc_abs}:{self.root}{loc_abs}:{mode}" + + +class Docker(Container): + """Docker environment.""" + + def execute(self, task): + docker_img = f"{self.image}:{self.tag}" + # mounting all input locations + mounts = task.get_bindings(root=self.root) + + docker_args = [ + "docker", + "run", + "-v", + self.bind(task.cache_dir, "rw"), + *self.xargs, + ] + docker_args.extend( + " ".join( + [f"-v {key}:{val[0]}:{val[1]}" for (key, val) in mounts.items()] + ).split() + ) + docker_args.extend(["-w", f"{self.root}{task.output_dir}"]) + keys = ["return_code", "stdout", "stderr"] + + values = execute( + docker_args + [docker_img] + task.command_args(root=self.root), + strip=task.strip, + ) + output = dict(zip(keys, values)) + if output["return_code"]: + if output["stderr"]: + raise RuntimeError(output["stderr"]) + else: + raise RuntimeError(output["stdout"]) + return output + + +class Singularity(Container): + """Singularity environment.""" + + def execute(self, task): + singularity_img = f"{self.image}:{self.tag}" + # mounting all input locations + mounts = task.get_bindings(root=self.root) + + # todo adding xargsy etc + singularity_args = [ + "singularity", + "exec", + "-B", + self.bind(task.cache_dir, "rw"), + *self.xargs, + ] + singularity_args.extend( + " ".join( + [f"-B {key}:{val[0]}:{val[1]}" for (key, val) in mounts.items()] + ).split() + ) + singularity_args.extend(["--pwd", f"{self.root}{task.output_dir}"]) + keys = ["return_code", "stdout", "stderr"] + + values = execute( + singularity_args + [singularity_img] + task.command_args(root=self.root), + strip=task.strip, + ) + output = dict(zip(keys, values)) + if output["return_code"]: + if output["stderr"]: + raise RuntimeError(output["stderr"]) + else: + raise RuntimeError(output["stdout"]) + return output diff --git a/pydra/engine/graph.py b/pydra/engine/graph.py index 7b2724e4eb..bfa62e0764 100644 --- a/pydra/engine/graph.py +++ b/pydra/engine/graph.py @@ -1,4 +1,5 @@ """Data structure to support :class:`~pydra.engine.core.Workflow` tasks.""" + from copy import copy from pathlib import Path import subprocess as sp @@ -498,7 +499,7 @@ def _create_dotfile_single_graph(self, nodes, edges): return dotstr def export_graph(self, dotfile, ext="png"): - """exporting dotfile to other format, equires the dot command""" + """exporting dotfile to other formats requires the dot command""" available_ext = [ "bmp", "canon", diff --git a/pydra/engine/helpers.py b/pydra/engine/helpers.py index d455a2115a..4d8e84132b 100644 --- a/pydra/engine/helpers.py +++ b/pydra/engine/helpers.py @@ -1,37 +1,32 @@ """Administrative support for the engine framework.""" + import asyncio import asyncio.subprocess as asp -import attr -import cloudpickle as cp from pathlib import Path -from filelock import SoftFileLock, Timeout import os import sys -from hashlib import sha256 from uuid import uuid4 -import subprocess as sp import getpass +import typing as ty +import subprocess as sp import re from time import strftime from traceback import format_exception -import typing as ty -import inspect -import warnings - - +import attr +import attrs # New defaults +from filelock import SoftFileLock, Timeout +import cloudpickle as cp from .specs import ( Runtime, - File, - Directory, attr_fields, Result, LazyField, - MultiOutputObj, - MultiInputObj, - MultiInputFile, - MultiOutputFile, + File, ) -from .helpers_file import hash_file, hash_dir, copyfile, is_existing_file +from .helpers_file import copy_nested_files +from ..utils.typing import TypeParser +from fileformats.core import FileSet +from .specs import MultiInputFile, MultiInputObj, MultiOutputObj, MultiOutputFile def ensure_list(obj, tuple2list=False): @@ -54,6 +49,8 @@ def ensure_list(obj, tuple2list=False): [5.0] """ + if obj is attr.NOTHING: + return attr.NOTHING if obj is None: return [] # list or numpy.array (this might need some extra flag in case an array has to be converted) @@ -61,13 +58,23 @@ def ensure_list(obj, tuple2list=False): return obj elif tuple2list and isinstance(obj, tuple): return list(obj) - elif isinstance(obj, list): - return obj elif isinstance(obj, LazyField): return obj return [obj] +def from_list_if_single(obj): + """Converts a list to a single item if it is of length == 1""" + if obj is attr.NOTHING: + return obj + if isinstance(obj, LazyField): + return obj + obj = list(obj) + if len(obj) == 1: + return obj[0] + return obj + + def print_help(obj): """Visit a task object and print its input/output interface.""" lines = [f"Help for {obj.__class__.__name__}"] @@ -158,53 +165,17 @@ def save(task_path: Path, result=None, task=None, name_prefix=None): cp.dump(task, fp) -def copyfile_workflow(wf_path, result): +def copyfile_workflow(wf_path: os.PathLike, result): """if file in the wf results, the file will be copied to the workflow directory""" for field in attr_fields(result.output): value = getattr(result.output, field.name) # if the field is a path or it can contain a path _copyfile_single_value is run # to move all files and directories to the workflow directory - if field.type in [File, Directory, MultiOutputObj] or type(value) in [ - list, - tuple, - dict, - ]: - new_value = _copyfile_single_value(wf_path=wf_path, value=value) - setattr(result.output, field.name, new_value) + new_value = copy_nested_files(value, wf_path, mode=FileSet.CopyMode.hardlink) + setattr(result.output, field.name, new_value) return result -def _copyfile_single_value(wf_path, value): - """checking a single value for files that need to be copied to the wf dir""" - if isinstance(value, (tuple, list)): - return [_copyfile_single_value(wf_path, val) for val in value] - elif isinstance(value, dict): - return { - key: _copyfile_single_value(wf_path, val) for (key, val) in value.items() - } - elif is_existing_file(value): - new_path = wf_path / Path(value).name - copyfile(originalfile=value, newfile=new_path, copy=True, use_hardlink=True) - return new_path - else: - return value - - -def task_hash(task): - """ - Calculate the checksum of a task. - - input hash, output hash, environment hash - - Parameters - ---------- - task : :class:`~pydra.engine.core.TaskBase` - The input task. - - """ - return NotImplementedError - - def gather_runtime_info(fname): """ Extract runtime information from a file. @@ -257,15 +228,14 @@ def make_klass(spec): return None fields = spec.fields if fields: - newfields = dict() + newfields = {} for item in fields: if len(item) == 2: name = item[0] if isinstance(item[1], attr._make._CountingAttr): - newfields[name] = item[1] - newfields[name].validator(custom_validator) + newfield = item[1] else: - newfields[name] = attr.ib(type=item[1], validator=custom_validator) + newfield = attr.ib(type=item[1]) else: if ( any([isinstance(ii, attr._make._CountingAttr) for ii in item]) @@ -276,210 +246,53 @@ def make_klass(spec): "(name, type, default), (name, type, default, metadata)" "or (name, type, metadata)" ) - else: - if len(item) == 3: - name, tp = item[:2] - if isinstance(item[-1], dict) and "help_string" in item[-1]: - mdata = item[-1] - newfields[name] = attr.ib( - type=tp, metadata=mdata, validator=custom_validator - ) - else: - dflt = item[-1] - newfields[name] = attr.ib( - type=tp, default=dflt, validator=custom_validator - ) - elif len(item) == 4: - name, tp, dflt, mdata = item - newfields[name] = attr.ib( - type=tp, - default=dflt, - metadata=mdata, - validator=custom_validator, - ) - # if type has converter, e.g. MultiInputObj - if hasattr(newfields[name].type, "converter"): - newfields[name].converter = newfields[name].type.converter - fields = newfields - return attr.make_class(spec.name, fields, bases=spec.bases, kw_only=True) - - -def custom_validator(instance, attribute, value): - """simple custom validation - take into account ty.Union, ty.List, ty.Dict (but only one level depth) - adding an additional validator, if allowe_values provided - """ - validators = [] - tp_attr = attribute.type - # a flag that could be changed to False, if the type is not recognized - check_type = True - if ( - value is attr.NOTHING - or value is None - or attribute.name.startswith("_") # e.g. _func - or isinstance(value, LazyField) - or tp_attr - in [ - ty.Any, - inspect._empty, - MultiOutputObj, - MultiInputObj, - MultiOutputFile, - MultiInputFile, - ] - ): - check_type = False # no checking of the type - elif isinstance(tp_attr, type) or tp_attr in [File, Directory]: - tp = _single_type_update(tp_attr, name=attribute.name) - cont_type = None - else: # more complex types - cont_type, tp_attr_list = _check_special_type(tp_attr, name=attribute.name) - if cont_type is ty.Union: - tp, check_type = _types_updates(tp_attr_list, name=attribute.name) - elif cont_type is list: - tp, check_type = _types_updates(tp_attr_list, name=attribute.name) - elif cont_type is dict: - # assuming that it should have length of 2 for keys and values - if len(tp_attr_list) != 2: - check_type = False - else: - tp_attr_key, tp_attr_val = tp_attr_list - # updating types separately for keys and values - tp_k, check_k = _types_updates([tp_attr_key], name=attribute.name) - tp_v, check_v = _types_updates([tp_attr_val], name=attribute.name) - # assuming that I have to be able to check keys and values - if not (check_k and check_v): - check_type = False - else: - tp = {"key": tp_k, "val": tp_v} - else: - warnings.warn( - f"no type check for {attribute.name} field, " - f"no type check implemented for value {value} and type {tp_attr}" - ) - check_type = False - - if check_type: - validators.append(_type_validator(instance, attribute, value, tp, cont_type)) - - # checking additional requirements for values (e.g. allowed_values) - meta_attr = attribute.metadata - if "allowed_values" in meta_attr: - validators.append(_allowed_values_validator(isinstance, attribute, value)) - return validators - - -def _type_validator(instance, attribute, value, tp, cont_type): - """creating a customized type validator, - uses validator.deep_iterable/mapping if the field is a container - (i.e. ty.List or ty.Dict), - it also tries to guess when the value is a list due to the splitter - and validates the elements - """ - if cont_type is None or cont_type is ty.Union: - # if tp is not (list,), we are assuming that the value is a list - # due to the splitter, so checking the member types - if isinstance(value, list) and tp != (list,): - return attr.validators.deep_iterable( - member_validator=attr.validators.instance_of( - tp + (attr._make._Nothing,) + kwargs = {} + if len(item) == 3: + name, tp = item[:2] + if isinstance(item[-1], dict) and "help_string" in item[-1]: + mdata = item[-1] + kwargs["metadata"] = mdata + else: + kwargs["default"] = item[-1] + elif len(item) == 4: + name, tp, dflt, mdata = item + kwargs["default"] = dflt + kwargs["metadata"] = mdata + newfield = attr.ib( + type=tp, + **kwargs, ) - )(instance, attribute, value) - else: - return attr.validators.instance_of(tp + (attr._make._Nothing,))( - instance, attribute, value + checker_label = f"'{name}' field of {spec.name}" + type_checker = TypeParser[newfield.type]( + newfield.type, label=checker_label, superclass_auto_cast=True ) - elif cont_type is list: - return attr.validators.deep_iterable( - member_validator=attr.validators.instance_of(tp + (attr._make._Nothing,)) - )(instance, attribute, value) - elif cont_type is dict: - return attr.validators.deep_mapping( - key_validator=attr.validators.instance_of(tp["key"]), - value_validator=attr.validators.instance_of( - tp["val"] + (attr._make._Nothing,) - ), - )(instance, attribute, value) - else: - raise Exception( - f"container type of {attribute.name} should be None, list, dict or ty.Union, " - f"and not {cont_type}" - ) - - -def _types_updates(tp_list, name): - """updating the type's tuple with possible additional types""" - tp_upd_list = [] - check = True - for tp_el in tp_list: - tp_upd = _single_type_update(tp_el, name, simplify=True) - if tp_upd is None: - check = False - break - else: - tp_upd_list += list(tp_upd) - tp_upd = tuple(set(tp_upd_list)) - return tp_upd, check - - -def _single_type_update(tp, name, simplify=False): - """updating a single type with other related types - e.g. adding bytes for str - if simplify is True, than changing typing.List to list etc. - (assuming that I validate only one depth, so have to simplify at some point) - """ - if isinstance(tp, type) or tp in [File, Directory]: - if tp is str: - return (str, bytes) - elif tp in [File, Directory, os.PathLike]: - return (os.PathLike, str) - elif tp is float: - return (float, int) - else: - return (tp,) - elif simplify is True: - warnings.warn(f"simplify validator for {name} field, checking only one depth") - cont_tp, types_list = _check_special_type(tp, name=name) - if cont_tp is list: - return (list,) - elif cont_tp is dict: - return (dict,) - elif cont_tp is ty.Union: - return types_list - else: - warnings.warn( - f"no type check for {name} field, type check not implemented for type of {tp}" - ) - return None - else: - warnings.warn( - f"no type check for {name} field, type check not implemented for type - {tp}, " - f"consider using simplify=True" - ) - return None - - -def _check_special_type(tp, name): - """checking if the type is a container: ty.List, ty.Dict or ty.Union""" - if sys.version_info.minor >= 8: - return ty.get_origin(tp), ty.get_args(tp) - else: - if isinstance(tp, type): # simple type - return None, () - else: - if tp._name == "List": - return list, tp.__args__ - elif tp._name == "Dict": - return dict, tp.__args__ - elif tp.__origin__ is ty.Union: - return ty.Union, tp.__args__ + if newfield.type in (MultiInputObj, MultiInputFile): + converter = attr.converters.pipe(ensure_list, type_checker) + elif newfield.type in (MultiOutputObj, MultiOutputFile): + converter = attr.converters.pipe(from_list_if_single, type_checker) else: - warnings.warn( - f"not type check for {name} field, type check not implemented for type {tp}" - ) - return None, () + converter = type_checker + newfield.converter = converter + newfield.on_setattr = attr.setters.convert + if "allowed_values" in newfield.metadata: + if newfield._validator is None: + newfield._validator = allowed_values_validator + elif isinstance(newfield._validator, ty.Iterable): + if allowed_values_validator not in newfield._validator: + newfield._validator.append(allowed_values_validator) + elif newfield._validator is not allowed_values_validator: + newfield._validator = [ + newfield._validator, + allowed_values_validator, + ] + newfields[name] = newfield + fields = newfields + return attrs.make_class( + spec.name, fields, bases=spec.bases, kw_only=True, on_setattr=None + ) -def _allowed_values_validator(instance, attribute, value): +def allowed_values_validator(_, attribute, value): """checking if the values is in allowed_values""" allowed = attribute.metadata["allowed_values"] if value is attr.NOTHING or isinstance(value, LazyField): @@ -669,45 +482,6 @@ def get_open_loop(): return loop -def hash_function(obj): - """Generate hash of object.""" - return sha256(str(obj).encode()).hexdigest() - - -def hash_value(value, tp=None, metadata=None, precalculated=None): - """calculating hash or returning values recursively""" - if metadata is None: - metadata = {} - if isinstance(value, (tuple, list, set)): - return [hash_value(el, tp, metadata, precalculated) for el in value] - elif isinstance(value, dict): - dict_hash = { - k: hash_value(v, tp, metadata, precalculated) for (k, v) in value.items() - } - # returning a sorted object - return [list(el) for el in sorted(dict_hash.items(), key=lambda x: x[0])] - else: # not a container - if ( - (tp is File or "pydra.engine.specs.File" in str(tp)) - and is_existing_file(value) - and "container_path" not in metadata - ): - return hash_file(value, precalculated=precalculated) - elif ( - (tp is File or "pydra.engine.specs.Directory" in str(tp)) - and is_existing_file(value) - and "container_path" not in metadata - ): - return hash_dir(value, precalculated=precalculated) - elif type(value).__module__ == "numpy": # numpy objects - return [ - hash_value(el, tp, metadata, precalculated) - for el in ensure_list(value.tolist()) - ] - else: - return value - - def output_from_inputfields(output_spec, input_spec): """ Collect values from output from input fields. @@ -821,8 +595,9 @@ def load_task(task_pkl, ind=None): task_pkl = Path(task_pkl) task = cp.loads(task_pkl.read_bytes()) if ind is not None: - _, inputs_dict = task.get_input_el(ind) - task.inputs = attr.evolve(task.inputs, **inputs_dict) + ind_inputs = task.get_input_el(ind) + task.inputs = attr.evolve(task.inputs, **ind_inputs) + task._pre_split = True task.state = None # resetting uid for task task._uid = uuid4().hex @@ -881,7 +656,11 @@ def argstr_formatting(argstr, inputs, value_updates=None): for fld in inp_fields: fld_name = fld[1:-1] # extracting the name form {field_name} fld_value = inputs_dict[fld_name] - if fld_value is attr.NOTHING: + fld_attr = getattr(attrs.fields(type(inputs)), fld_name) + if fld_value is attr.NOTHING or ( + fld_value is False + and TypeParser.matches_type(fld_attr.type, ty.Union[Path, bool]) + ): # if value is NOTHING, nothing should be added to the command val_dict[fld_name] = "" else: @@ -924,3 +703,38 @@ async def __aenter__(self): async def __aexit__(self, exc_type, exc_value, traceback): self.lock.release() return None + + +def parse_copyfile(fld: attr.Attribute, default_collation=FileSet.CopyCollation.any): + """Gets the copy mode from the 'copyfile' value from a field attribute""" + copyfile = fld.metadata.get("copyfile", FileSet.CopyMode.any) + if isinstance(copyfile, tuple): + mode, collation = copyfile + elif isinstance(copyfile, str): + try: + mode, collation = copyfile.split(",") + except ValueError: + mode = copyfile + collation = default_collation + else: + collation = FileSet.CopyCollation[collation] + mode = FileSet.CopyMode[mode] + else: + if copyfile is True: + mode = FileSet.CopyMode.copy + elif copyfile is False: + mode = FileSet.CopyMode.link + elif copyfile is None: + mode = FileSet.CopyMode.any + else: + mode = copyfile + collation = default_collation + if not isinstance(mode, FileSet.CopyMode): + raise TypeError( + f"Unrecognised type for mode copyfile metadata of {fld}, {mode}" + ) + if not isinstance(collation, FileSet.CopyCollation): + raise TypeError( + f"Unrecognised type for collation copyfile metadata of {fld}, {collation}" + ) + return mode, collation diff --git a/pydra/engine/helpers_file.py b/pydra/engine/helpers_file.py index 73d59b718f..f194533ac7 100644 --- a/pydra/engine/helpers_file.py +++ b/pydra/engine/helpers_file.py @@ -1,513 +1,20 @@ """Functions ported from Nipype 1, after removing parts that were related to py2.""" -import attr -import subprocess as sp -from hashlib import sha256 + import os -import os.path as op import re -import shutil -import posixpath import logging from pathlib import Path import typing as ty from copy import copy +import subprocess as sp +from contextlib import contextmanager +import attr +from fileformats.core import FileSet -related_filetype_sets = [(".hdr", ".img", ".mat"), (".nii", ".mat"), (".BRIK", ".HEAD")] -"""List of neuroimaging file types that are to be interpreted together.""" logger = logging.getLogger("pydra") -def split_filename(fname): - """ - Split a filename into parts: path, base filename and extension. - - Parameters - ---------- - fname : :obj:`str` - file or path name - - Returns - ------- - pth : :obj:`str` - base path from fname - fname : :obj:`str` - filename from fname, without extension - ext : :obj:`str` - file extension from fname - - Examples - -------- - >>> pth, fname, ext = split_filename('/home/data/subject.nii.gz') - >>> pth - '/home/data' - - >>> fname - 'subject' - - >>> ext - '.nii.gz' - - """ - special_extensions = [".nii.gz", ".tar.gz", ".niml.dset"] - - pth = op.dirname(fname) - fname = op.basename(fname) - - ext = None - for special_ext in special_extensions: - ext_len = len(special_ext) - if (len(fname) > ext_len) and (fname[-ext_len:].lower() == special_ext.lower()): - ext = fname[-ext_len:] - fname = fname[:-ext_len] - break - if not ext: - fname, ext = op.splitext(fname) - - return pth, fname, ext - - -def hash_file( - afile, chunk_len=8192, crypto=sha256, raise_notfound=True, precalculated=None -): - """Compute hash of a file using 'crypto' module.""" - from .specs import LazyField - - if afile is None or isinstance(afile, LazyField) or isinstance(afile, list): - return None - if not Path(afile).is_file(): - if raise_notfound: - raise RuntimeError('File "%s" not found.' % afile) - return None - - # if the path exists already in precalculated - # the time of the last modification will be compared - # and the precalculated hash value will be used if the file has not change - if precalculated and str(Path(afile)) in precalculated: - pre_mtime, pre_cont_hash = precalculated[str(Path(afile))] - if Path(afile).stat().st_mtime == pre_mtime: - return pre_cont_hash - - crypto_obj = crypto() - with open(afile, "rb") as fp: - while True: - data = fp.read(chunk_len) - if not data: - break - crypto_obj.update(data) - - cont_hash = crypto_obj.hexdigest() - if precalculated is not None: - precalculated[str(Path(afile))] = (Path(afile).stat().st_mtime, cont_hash) - return cont_hash - - -def hash_dir( - dirpath, - crypto=sha256, - ignore_hidden_files=False, - ignore_hidden_dirs=False, - raise_notfound=True, - precalculated=None, -): - """Compute hash of directory contents. - - This function computes the hash of every file in directory `dirpath` and then - computes the hash of that list of hashes to return a single hash value. The - directory is traversed recursively. - - Parameters - ---------- - dirpath : :obj:`str` - Path to directory. - crypto : :obj: `function` - cryptographic hash functions - ignore_hidden_files : :obj:`bool` - If `True`, ignore filenames that begin with `.`. - ignore_hidden_dirs : :obj:`bool` - If `True`, ignore files in directories that begin with `.`. - raise_notfound : :obj:`bool` - If `True` and `dirpath` does not exist, raise `FileNotFound` exception. If - `False` and `dirpath` does not exist, return `None`. - - Returns - ------- - hash : :obj:`str` - Hash of the directory contents. - """ - from .specs import LazyField - - if dirpath is None or isinstance(dirpath, LazyField) or isinstance(dirpath, list): - return None - if not Path(dirpath).is_dir(): - if raise_notfound: - raise FileNotFoundError(f"Directory {dirpath} not found.") - return None - - file_hashes = [] - for dpath, dirnames, filenames in os.walk(dirpath): - # Sort in-place to guarantee order. - dirnames.sort() - filenames.sort() - dpath = Path(dpath) - if ignore_hidden_dirs and dpath.name.startswith(".") and str(dpath) != dirpath: - continue - for filename in filenames: - if ignore_hidden_files and filename.startswith("."): - continue - if not is_existing_file(dpath / filename): - file_hashes.append(str(dpath / filename)) - else: - this_hash = hash_file(dpath / filename, precalculated=precalculated) - file_hashes.append(this_hash) - - crypto_obj = crypto() - for h in file_hashes: - crypto_obj.update(h.encode()) - - return crypto_obj.hexdigest() - - -def _parse_mount_table(exit_code, output): - """ - Parse the output of ``mount`` to produce (path, fs_type) pairs. - - Separated from _generate_cifs_table to enable testing logic with real - outputs - - """ - # Not POSIX - if exit_code != 0: - return [] - - # Linux mount example: sysfs on /sys type sysfs (rw,nosuid,nodev,noexec) - # ^^^^ ^^^^^ - # OSX mount example: /dev/disk2 on / (hfs, local, journaled) - # ^ ^^^ - pattern = re.compile(r".*? on (/.*?) (?:type |\()([^\s,\)]+)") - - # Keep line and match for error reporting (match == None on failure) - # Ignore empty lines - matches = [(ll, pattern.match(ll)) for ll in output.strip().splitlines() if ll] - - # (path, fstype) tuples, sorted by path length (longest first) - mount_info = sorted( - (match.groups() for _, match in matches if match is not None), - key=lambda x: len(x[0]), - reverse=True, - ) - cifs_paths = [path for path, fstype in mount_info if fstype.lower() == "cifs"] - - # Report failures as warnings - for line, match in matches: - if match is None: - logger.debug("Cannot parse mount line: '%s'", line) - - return [ - mount - for mount in mount_info - if any(mount[0].startswith(path) for path in cifs_paths) - ] - - -def _generate_cifs_table(): - """ - Construct a reverse-length-ordered list of mount points that fall under a CIFS mount. - - This precomputation allows efficient checking for whether a given path - would be on a CIFS filesystem. - On systems without a ``mount`` command, or with no CIFS mounts, returns an - empty list. - - """ - exit_code, output = sp.getstatusoutput("mount") - return _parse_mount_table(exit_code, output) - - -_cifs_table = _generate_cifs_table() - - -def on_cifs(fname): - """ - Check whether a file path is on a CIFS filesystem mounted in a POSIX host. - - POSIX hosts are assumed to have the ``mount`` command. - - On Windows, Docker mounts host directories into containers through CIFS - shares, which has support for Minshall+French symlinks, or text files that - the CIFS driver exposes to the OS as symlinks. - We have found that under concurrent access to the filesystem, this feature - can result in failures to create or read recently-created symlinks, - leading to inconsistent behavior and ``FileNotFoundError`` errors. - - This check is written to support disabling symlinks on CIFS shares. - - """ - # Only the first match (most recent parent) counts - for fspath, fstype in _cifs_table: - if fname.startswith(fspath): - return fstype == "cifs" - return False - - -def copyfile( - originalfile, - newfile, - copy=False, - create_new=False, - use_hardlink=True, - copy_related_files=True, -): - """ - Copy or link files. - - If ``use_hardlink`` is True, and the file can be hard-linked, then a - link is created, instead of copying the file. - - If a hard link is not created and ``copy`` is False, then a symbolic - link is created. - - .. admonition:: Copy options for existing files - - * symlink - - * to regular file originalfile (keep if symlinking) - * to same dest as symlink originalfile (keep if symlinking) - * to other file (unlink) - - * regular file - - * hard link to originalfile (keep) - * copy of file (same hash) (keep) - * different file (diff hash) (unlink) - - .. admonition:: Copy options for new files - - * ``use_hardlink`` & ``can_hardlink`` => hardlink - * ``~hardlink`` & ``~copy`` & ``can_symlink`` => symlink - * ``~hardlink`` & ``~symlink`` => copy - - Parameters - ---------- - originalfile : :obj:`str` - full path to original file - newfile : :obj:`str` - full path to new file - copy : Bool - specifies whether to copy or symlink files - (default=False) but only for POSIX systems - use_hardlink : Bool - specifies whether to hard-link files, when able - (Default=False), taking precedence over copy - copy_related_files : Bool - specifies whether to also operate on related files, as defined in - ``related_filetype_sets`` - - Returns - ------- - None - - """ - newhash = None - orighash = None - logger.debug(newfile) - - if create_new: - while op.exists(newfile): - base, fname, ext = split_filename(newfile) - s = re.search("_c[0-9]{4,4}$", fname) - i = 0 - if s: - i = int(s.group()[2:]) + 1 - fname = fname[:-6] + "_c%04d" % i - else: - fname += "_c%04d" % i - newfile = base + os.sep + fname + ext - - # Don't try creating symlinks on CIFS - if copy is False and on_cifs(newfile): - copy = True - - keep = False - if op.lexists(newfile): - if op.islink(newfile): - if all( - ( - os.readlink(newfile) == op.realpath(originalfile), - not use_hardlink, - not copy, - ) - ): - keep = True - elif posixpath.samefile(newfile, originalfile): - keep = True - else: - newhash = hash_file(newfile) - logger.debug("File: %s already exists,%s, copy:%d", newfile, newhash, copy) - orighash = hash_file(originalfile) - keep = newhash == orighash - if keep: - logger.debug( - "File: %s already exists, not overwriting, copy:%d", newfile, copy - ) - else: - os.unlink(newfile) - - if not keep and use_hardlink: - try: - logger.debug("Linking File: %s->%s", newfile, originalfile) - # Use realpath to avoid hardlinking symlinks - os.link(op.realpath(originalfile), newfile) - except OSError: - use_hardlink = False # Disable hardlink for associated files - else: - keep = True - - if not keep and not copy and os.name == "posix": - try: - logger.debug("Symlinking File: %s->%s", newfile, originalfile) - os.symlink(originalfile, newfile) - except OSError: - copy = True # Disable symlink for associated files - else: - keep = True - - if not keep: - try: - logger.debug("Copying File: %s->%s", newfile, originalfile) - shutil.copyfile(originalfile, newfile) - except shutil.Error as e: - logger.warning(e.message) - - # Associated files - if copy_related_files: - related_file_pairs = ( - get_related_files(f, include_this_file=False) - for f in (originalfile, newfile) - ) - for alt_ofile, alt_nfile in zip(*related_file_pairs): - if op.exists(alt_ofile): - copyfile( - alt_ofile, - alt_nfile, - copy, - use_hardlink=use_hardlink, - copy_related_files=False, - ) - - return newfile - - -def get_related_files(filename, include_this_file=True): - """ - Return a list of related files. - - As defined in :attr:`related_filetype_sets`, for a filename - (e.g., Nifti-Pair, Analyze (SPM), and AFNI files). - - Parameters - ---------- - filename : :obj:`str` - File name to find related filetypes of. - include_this_file : bool - If true, output includes the input filename. - - """ - related_files = [] - path, name, this_type = split_filename(filename) - for type_set in related_filetype_sets: - if this_type in type_set: - for related_type in type_set: - if include_this_file or related_type != this_type: - related_files.append(Path(path) / (name + related_type)) - if not len(related_files): - related_files = [filename] - return related_files - - -def copyfiles(filelist, dest, copy=False, create_new=False): - """ - Copy or symlink files in ``filelist`` to ``dest`` directory. - - Parameters - ---------- - filelist : list - List of files to copy. - dest : path/files - full path to destination. If it is a list of length greater - than 1, then it assumes that these are the names of the new - files. - copy : Bool - specifies whether to copy or symlink files - (default=False) but only for posix systems - - Returns - ------- - None - - """ - # checking if dest is a single dir or filepath/filepaths - if not isinstance(dest, list) and Path(dest).is_dir(): - dest_dir = True - out_path = str(Path(dest).resolve()) - else: - dest_dir = False - out_path = ensure_list(dest) - newfiles = [] - for i, f in enumerate(ensure_list(filelist)): - # Todo: this part is not tested - if isinstance(f, list): - newfiles.insert(i, copyfiles(f, dest, copy=copy, create_new=create_new)) - else: - if dest_dir: - destfile = fname_presuffix(f, newpath=out_path) - else: - destfile = out_path[i] - destfile = copyfile(f, destfile, copy, create_new=create_new) - newfiles.insert(i, destfile) - return newfiles - - -def fname_presuffix(fname, prefix="", suffix="", newpath=None, use_ext=True): - """ - Manipulate path and name of input filename. - - Parameters - ---------- - fname : :obj:`str` - A filename (may or may not include path) - prefix : :obj:`str` - Characters to prepend to the filename - suffix : :obj:`str` - Characters to append to the filename - newpath : :obj:`str` - Path to replace the path of the input fname - use_ext : :obj:`bool` - If True (default), appends the extension of the original file - to the output name. - Return - ------ - path : :obj:`str` - Absolute path of the modified filename - Examples - -------- - >>> import pytest, sys - >>> if sys.platform.startswith('win'): pytest.skip() - >>> from pydra.engine.helpers_file import fname_presuffix - >>> fname = 'foo.nii.gz' - >>> fname_presuffix(fname,'pre','post','/tmp') - '/tmp/prefoopost.nii.gz' - """ - pth, fname, ext = split_filename(fname) - if not use_ext: - ext = "" - - # No need for isdefined: bool(Undefined) evaluates to False - if newpath: - pth = op.abspath(newpath) - return str(Path(pth) / (prefix + fname + suffix + ext)) - - # dj: copied from misc def is_container(item): """ @@ -544,31 +51,48 @@ def ensure_list(filename): return None -# not sure if this might be useful for Function Task -def copyfile_input(inputs, output_dir): - """Implement the base class method.""" - from .specs import attr_fields, File, MultiInputFile - - map_copyfiles = {} - for fld in attr_fields(inputs): - copy = fld.metadata.get("copyfile") - if copy is not None and fld.type not in [File, MultiInputFile]: - raise Exception( - f"if copyfile set, field has to be a File " f"but {fld.type} provided" - ) - file = getattr(inputs, fld.name) - if copy in [True, False] and file != attr.NOTHING: - if isinstance(file, list): - map_copyfiles[fld.name] = [] - for el in file: - newfile = output_dir.joinpath(Path(el).name) - copyfile(el, newfile, copy=copy) - map_copyfiles[fld.name].append(str(newfile)) - else: - newfile = output_dir.joinpath(Path(file).name) - copyfile(file, newfile, copy=copy) - map_copyfiles[fld.name] = str(newfile) - return map_copyfiles or None +def copy_nested_files( + value: ty.Any, + dest_dir: os.PathLike, + supported_modes: FileSet.CopyMode = FileSet.CopyMode.any, + **kwargs, +) -> ty.Any: + """Copies all "file-sets" found within the nested value (e.g. dict, list,...) into the + destination directory. If no nested file-sets are found then the original value is + returned. Note that multiple nested file-sets (e.g. a list) will to have unique names + names (i.e. not differentiated by parent directories) otherwise there will be a path + clash in the destination directory. + + Parameters + ---------- + value : Any + the value to copy files from (if required) + dest_dir : os.PathLike + the destination directory to copy the files to + **kwargs + passed directly onto FileSet.copy() + """ + from ..utils.typing import TypeParser # noqa + + cache: ty.Dict[FileSet, FileSet] = {} + + def copy_fileset(fileset: FileSet): + try: + return cache[fileset] + except KeyError: + pass + supported = supported_modes + if any(MountIndentifier.on_cifs(p) for p in fileset.fspaths): + supported -= FileSet.CopyMode.symlink + if not all( + MountIndentifier.on_same_mount(p, dest_dir) for p in fileset.fspaths + ): + supported -= FileSet.CopyMode.hardlink + copied = fileset.copy(dest_dir=dest_dir, supported_modes=supported, **kwargs) + cache[fileset] = copied + return copied + + return TypeParser.apply_to_instances(FileSet, copy_fileset, value) # not sure if this might be useful for Function Task @@ -591,16 +115,20 @@ def template_update(inputs, output_dir, state_ind=None, map_copyfiles=None): from .specs import attr_fields + # Collect templated inputs for which all requirements are satisfied. fields_templ = [ - fld for fld in attr_fields(inputs) if fld.metadata.get("output_file_template") + field + for field in attr_fields(inputs) + if field.metadata.get("output_file_template") + and getattr(inputs, field.name) is not False + and all( + getattr(inputs, required_field) is not attr.NOTHING + for required_field in field.metadata.get("requires", ()) + ) ] + dict_mod = {} for fld in fields_templ: - if fld.type not in [str, ty.Union[str, bool]]: - raise Exception( - f"fields with output_file_template" - " has to be a string or Union[str, bool]" - ) dict_mod[fld.name] = template_update_single( field=fld, inputs=inputs, @@ -620,52 +148,48 @@ def template_update_single( based on the value from inputs_dict (checking the types of the fields, that have "output_file_template)" """ - from .specs import File, MultiOutputFile, Directory - # if input_dict_st with state specific value is not available, # the dictionary will be created from inputs object + from ..utils.typing import TypeParser # noqa + from pydra.engine.specs import LazyField, OUTPUT_TEMPLATE_TYPES + if inputs_dict_st is None: inputs_dict_st = attr.asdict(inputs, recurse=False) if spec_type == "input": - if field.type not in [str, ty.Union[str, bool]]: - raise Exception( - f"fields with output_file_template" - "has to be a string or Union[str, bool]" - ) inp_val_set = inputs_dict_st[field.name] - if inp_val_set is not attr.NOTHING and not isinstance(inp_val_set, (str, bool)): - raise Exception( - f"{field.name} has to be str or bool, but {inp_val_set} set" - ) - if isinstance(inp_val_set, bool) and field.type is str: - raise Exception( - f"type of {field.name} is str, consider using Union[str, bool]" + if isinstance(inp_val_set, bool) and field.type in (Path, str): + raise TypeError( + f"type of '{field.name}' is Path, consider using Union[Path, bool]" ) + if inp_val_set is not attr.NOTHING and not isinstance(inp_val_set, LazyField): + inp_val_set = TypeParser(ty.Union[OUTPUT_TEMPLATE_TYPES])(inp_val_set) elif spec_type == "output": - if field.type not in [File, MultiOutputFile, Directory]: - raise Exception( - f"output {field.name} should be a File, but {field.type} set as the type" + if not TypeParser.contains_type(FileSet, field.type): + raise TypeError( + f"output {field.name} should be file-system object, but {field.type} " + "set as the type" ) else: - raise Exception(f"spec_type can be input or output, but {spec_type} provided") + raise TypeError(f"spec_type can be input or output, but {spec_type} provided") # for inputs that the value is set (so the template is ignored) - if spec_type == "input" and isinstance(inputs_dict_st[field.name], str): - return inputs_dict_st[field.name] - elif spec_type == "input" and inputs_dict_st[field.name] is False: - # if input fld is set to False, the fld shouldn't be used (setting NOTHING) - return attr.NOTHING - else: # inputs_dict[field.name] is True or spec_type is output - value = _template_formatting(field, inputs, inputs_dict_st) - # changing path so it is in the output_dir - if output_dir and value is not attr.NOTHING: - # should be converted to str, it is also used for input fields that should be str - if type(value) is list: - return [str(output_dir / Path(val).name) for val in value] - else: - return str(output_dir / Path(value).name) - else: + if spec_type == "input": + if isinstance(inp_val_set, (Path, list)): + return inp_val_set + if inp_val_set is False: + # if input fld is set to False, the fld shouldn't be used (setting NOTHING) return attr.NOTHING + # inputs_dict[field.name] is True or spec_type is output + value = _template_formatting(field, inputs, inputs_dict_st) + # changing path so it is in the output_dir + if output_dir and value is not attr.NOTHING: + # should be converted to str, it is also used for input fields that should be str + if type(value) is list: + return [str(output_dir / Path(val).name) for val in value] + else: + return str(output_dir / Path(value).name) + else: + return attr.NOTHING def _template_formatting(field, inputs, inputs_dict_st): @@ -676,16 +200,27 @@ def _template_formatting(field, inputs, inputs_dict_st): Allowing for multiple input values used in the template as longs as there is no more than one file (i.e. File, PathLike or string with extensions) """ - from .specs import MultiOutputFile - # if a template is a function it has to be run first with the inputs as the only arg template = field.metadata["output_file_template"] if callable(template): template = template(inputs) # as default, we assume that keep_extension is True - keep_extension = field.metadata.get("keep_extension", True) + if isinstance(template, (tuple, list)): + formatted = [ + _string_template_formatting(field, t, inputs, inputs_dict_st) + for t in template + ] + else: + assert isinstance(template, str) + formatted = _string_template_formatting(field, template, inputs, inputs_dict_st) + return formatted + + +def _string_template_formatting(field, template, inputs, inputs_dict_st): + from .specs import MultiInputObj, MultiOutputFile + keep_extension = field.metadata.get("keep_extension", True) inp_fields = re.findall(r"{\w+}", template) inp_fields_fl = re.findall(r"{\w+:[0-9.]+f}", template) inp_fields += [re.sub(":[0-9.]+f", "", el) for el in inp_fields_fl] @@ -694,7 +229,6 @@ def _template_formatting(field, inputs, inputs_dict_st): val_dict = {} file_template = None - from .specs import attr_fields_dict, File for fld in inp_fields: fld_name = fld[1:-1] # extracting the name form {field_name} @@ -707,10 +241,8 @@ def _template_formatting(field, inputs, inputs_dict_st): else: # checking for fields that can be treated as a file: # have type File, or value that is path like (including str with extensions) - if ( - attr_fields_dict(inputs)[fld_name].type is File - or isinstance(fld_value, os.PathLike) - or (isinstance(fld_value, str) and "." in fld_value) + if isinstance(fld_value, os.PathLike) or ( + isinstance(fld_value, str) and "." in fld_value ): if file_template: raise Exception( @@ -726,10 +258,12 @@ def _template_formatting(field, inputs, inputs_dict_st): # each element of the list should be used separately in the template # and return a list with formatted values if field.type is MultiOutputFile and any( - [isinstance(el, list) for el in val_dict.values()] + [isinstance(el, (list, MultiInputObj)) for el in val_dict.values()] ): # all fields that are lists - keys_list = [k for k, el in val_dict.items() if isinstance(el, list)] + keys_list = [ + k for k, el in val_dict.items() if isinstance(el, (list, MultiInputObj)) + ] if any( [len(val_dict[key]) != len(val_dict[keys_list[0]]) for key in keys_list[1:]] ): @@ -795,23 +329,144 @@ def _element_formatting(template, values_template_dict, file_template, keep_exte def is_local_file(f): - from .specs import File, Directory, MultiInputFile + from ..utils.typing import TypeParser - if "container_path" not in f.metadata and ( - f.type in [File, Directory, MultiInputFile] - or "pydra.engine.specs.File" in str(f.type) - or "pydra.engine.specs.Directory" in str(f.type) - ): - return True - else: - return False + return "container_path" not in f.metadata and TypeParser.contains_type( + FileSet, f.type + ) -def is_existing_file(value): - """checking if an object is an existing file""" - if isinstance(value, str) and value == "": - return False - try: - return Path(value).exists() - except TypeError: - return False +class MountIndentifier: + """Used to check the mount type that given file paths reside on in order to determine + features that can be used (e.g. symlinks)""" + + @classmethod + def on_cifs(cls, path: os.PathLike) -> bool: + """ + Check whether a file path is on a CIFS filesystem mounted in a POSIX host. + + POSIX hosts are assumed to have the ``mount`` command. + + On Windows, Docker mounts host directories into containers through CIFS + shares, which has support for Minshall+French symlinks, or text files that + the CIFS driver exposes to the OS as symlinks. + We have found that under concurrent access to the filesystem, this feature + can result in failures to create or read recently-created symlinks, + leading to inconsistent behavior and ``FileNotFoundError`` errors. + + This check is written to support disabling symlinks on CIFS shares. + + NB: This function and sub-functions are copied from the nipype.utils.filemanip module + + + NB: Adapted from https://github.com/nipy/nipype + """ + return cls.get_mount(path)[1] == "cifs" + + @classmethod + def on_same_mount(cls, path1: os.PathLike, path2: os.PathLike) -> bool: + """Checks whether two or paths are on the same logical file system""" + return cls.get_mount(path1)[0] == cls.get_mount(path2)[0] + + @classmethod + def get_mount(cls, path: os.PathLike) -> ty.Tuple[Path, str]: + """Get the mount point for a given file-system path + + Parameters + ---------- + path: os.PathLike + the file-system path to identify the mount of + + Returns + ------- + mount_point: os.PathLike + the root of the mount the path sits on + fstype : str + the type of the file-system (e.g. ext4 or cifs)""" + try: + # Only the first match (most recent parent) counts, mount table sorted longest + # to shortest + return next( + (Path(p), t) + for p, t in cls.get_mount_table() + if str(path).startswith(p) + ) + except StopIteration: + return (Path("/"), "ext4") + + @classmethod + def generate_cifs_table(cls) -> ty.List[ty.Tuple[str, str]]: + """ + Construct a reverse-length-ordered list of mount points that fall under a CIFS mount. + + This precomputation allows efficient checking for whether a given path + would be on a CIFS filesystem. + On systems without a ``mount`` command, or with no CIFS mounts, returns an + empty list. + + """ + exit_code, output = sp.getstatusoutput("mount") + return cls.parse_mount_table(exit_code, output) + + @classmethod + def parse_mount_table( + cls, exit_code: int, output: str + ) -> ty.List[ty.Tuple[str, str]]: + """ + Parse the output of ``mount`` to produce (path, fs_type) pairs. + + Separated from _generate_cifs_table to enable testing logic with real + outputs + + """ + # Not POSIX + if exit_code != 0: + return [] + + # Linux mount example: sysfs on /sys type sysfs (rw,nosuid,nodev,noexec) + # ^^^^ ^^^^^ + # OSX mount example: /dev/disk2 on / (hfs, local, journaled) + # ^ ^^^ + pattern = re.compile(r".*? on (/.*?) (?:type |\()([^\s,\)]+)") + + # Keep line and match for error reporting (match == None on failure) + # Ignore empty lines + matches = [(ll, pattern.match(ll)) for ll in output.strip().splitlines() if ll] + + # (path, fstype) tuples, sorted by path length (longest first) + mount_info = sorted( + (match.groups() for _, match in matches if match is not None), + key=lambda x: len(x[0]), + reverse=True, + ) + cifs_paths = [path for path, fstype in mount_info if fstype.lower() == "cifs"] + + # Report failures as warnings + for line, match in matches: + if match is None: + logger.debug("Cannot parse mount line: '%s'", line) + + return [ + mount + for mount in mount_info + if any(mount[0].startswith(path) for path in cifs_paths) + ] + + @classmethod + def get_mount_table(cls) -> ty.List[ty.Tuple[str, str]]: + if cls._mount_table is None: + cls._mount_table = cls.generate_cifs_table() + return cls._mount_table + + @classmethod + @contextmanager + def patch_table(cls, mount_table: ty.List[ty.Tuple[str, str]]): + """Patch the mount table with new values. Used in test routines""" + orig_table = cls._mount_table + cls._mount_table = list(mount_table) + try: + yield + finally: + cls._mount_table = orig_table + + _mount_table: ty.Optional[ty.List[ty.Tuple[str, str]]] = None diff --git a/pydra/engine/helpers_state.py b/pydra/engine/helpers_state.py index 4eb9248f85..866d408a46 100644 --- a/pydra/engine/helpers_state.py +++ b/pydra/engine/helpers_state.py @@ -4,6 +4,7 @@ import itertools from copy import deepcopy import logging +import typing as ty from .helpers import ensure_list logger = logging.getLogger("pydra") @@ -326,15 +327,18 @@ def add_name_combiner(combiner, name): return combiner_changed -def add_name_splitter(splitter, name): +def add_name_splitter( + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...], None], name: str +) -> ty.Optional[ty.List[str]]: """adding a node's name to each field from the splitter""" if isinstance(splitter, str): return _add_name([splitter], name)[0] elif isinstance(splitter, list): - return _add_name(splitter, name) + return _add_name(list(splitter), name) elif isinstance(splitter, tuple): - splitter_l = list(splitter) - return tuple(_add_name(splitter_l, name)) + return tuple(_add_name(list(splitter), name)) + else: + return None def _add_name(mlist, name): @@ -344,7 +348,7 @@ def _add_name(mlist, name): if "." in elem or elem.startswith("_"): pass else: - mlist[i] = "{}.{}".format(name, mlist[i]) + mlist[i] = f"{name}.{mlist[i]}" elif isinstance(elem, list): mlist[i] = _add_name(elem, name) elif isinstance(elem, tuple): @@ -625,3 +629,25 @@ def inputs_types_to_dict(name, inputs): for field in input_names: inputs_dict[f"{name}.{field}"] = getattr(inputs, field) return inputs_dict + + +def unwrap_splitter( + splitter: ty.Union[str, ty.List[str], ty.Tuple[str, ...]] +) -> ty.Iterable[str]: + """Unwraps a splitter into a flat list of fields that are split over, i.e. + [("a", "b"), "c"] -> ["a", "b", "c"] + + Parameters + ---------- + splitter: str or list[str] or tuple[str, ...] + the splitter spec to unwrap + + Returns + ------- + unwrapped : ty.Iterable[str] + the field names listed in the splitter + """ + if isinstance(splitter, str): + return [splitter] + else: + return itertools.chain(*(unwrap_splitter(s) for s in splitter)) diff --git a/pydra/engine/run_pickled.py b/pydra/engine/run_pickled.py new file mode 100644 index 0000000000..902b243242 --- /dev/null +++ b/pydra/engine/run_pickled.py @@ -0,0 +1,31 @@ +import pickle +import sys +from pydra.engine.helpers import load_and_run + + +def run_pickled(*file_paths, rerun=False): + loaded_objects = [] + + for file_path in file_paths: + with open(file_path, "rb") as file: + loaded_objects.append(pickle.load(file)) + + if len(loaded_objects) == 1: + result = loaded_objects[0](rerun=rerun) + elif len(loaded_objects) == 2: + result = load_and_run(loaded_objects[0], loaded_objects[1], rerun=rerun) + else: + raise ValueError("Unsupported number of loaded objects") + + print(f"Result: {result}") + + +if __name__ == "__main__": + rerun = False # Default value for rerun + file_paths = sys.argv[1:] + + if "--rerun" in file_paths: + rerun = True + file_paths.remove("--rerun") + + run_pickled(*file_paths, rerun=rerun) diff --git a/pydra/engine/specs.py b/pydra/engine/specs.py index 20d4661bda..cccd272a9a 100644 --- a/pydra/engine/specs.py +++ b/pydra/engine/specs.py @@ -1,64 +1,59 @@ """Task I/O specifications.""" -import attr + from pathlib import Path import typing as ty import inspect import re +import os +from copy import copy from glob import glob - +import attr +from fileformats.core import FileSet +from fileformats.generic import ( + File, + Directory, +) +import pydra from .helpers_file import template_update_single +from ..utils.hash import hash_function, Cache +# from ..utils.misc import add_exc_note -def attr_fields(spec, exclude_names=()): - return [field for field in spec.__attrs_attrs__ if field.name not in exclude_names] - - -def attr_fields_dict(spec, exclude_names=()): - return { - field.name: field - for field in spec.__attrs_attrs__ - if field.name not in exclude_names - } +T = ty.TypeVar("T") -class File: - """An :obj:`os.pathlike` object, designating a file.""" - -class Directory: - """An :obj:`os.pathlike` object, designating a folder.""" +def attr_fields(spec, exclude_names=()): + return [field for field in spec.__attrs_attrs__ if field.name not in exclude_names] -class MultiInputObj: - """A ty.List[ty.Any] object, converter changes a single values to a list""" +# These are special types that are checked for in the construction of input/output specs +# and special converters inserted into the attrs fields. - @classmethod - def converter(cls, value): - from .helpers import ensure_list - if value == attr.NOTHING: - return value - else: - return ensure_list(value) +class MultiInputObj(list, ty.Generic[T]): + pass -class MultiOutputObj: - """A ty.List[ty.Any] object, converter changes an 1-el list to the single value""" +MultiInputFile = MultiInputObj[File] - @classmethod - def converter(cls, value): - if isinstance(value, list) and len(value) == 1: - return value[0] - else: - return value +# Since we can't create a NewType from a type union, we add a dummy type to the union +# so we can detect the MultiOutput in the input/output spec creation +class MultiOutputType: + pass -class MultiInputFile(MultiInputObj): - """A ty.List[File] object, converter changes a single file path to a list""" +MultiOutputObj = ty.Union[list, object, MultiOutputType] +MultiOutputFile = ty.Union[File, ty.List[File], MultiOutputType] -class MultiOutputFile(MultiOutputObj): - """A ty.List[File] object, converter changes an 1-el list to the single value""" +OUTPUT_TEMPLATE_TYPES = ( + Path, + ty.List[Path], + ty.Union[Path, bool], + ty.Union[ty.List[Path], bool], + ty.List[ty.List[Path]], +) @attr.s(auto_attribs=True, kw_only=True) @@ -69,7 +64,7 @@ class SpecInfo: """A name for the specification.""" fields: ty.List[ty.Tuple] = attr.ib(factory=list) """List of names of fields (can be inputs or outputs).""" - bases: ty.Tuple[ty.Type] = attr.ib(factory=tuple) + bases: ty.Sequence[ty.Type["BaseSpec"]] = attr.ib(factory=tuple) """Keeps track of specification inheritance. Should be a tuple containing at least one BaseSpec """ @@ -78,44 +73,23 @@ class SpecInfo: class BaseSpec: """The base dataclass specs for all inputs and outputs.""" - def __attrs_post_init__(self): - self.files_hash = { - field.name: {} - for field in attr_fields( - self, exclude_names=("_graph_checksums", "bindings", "files_hash") - ) - if field.metadata.get("output_file_template") is None - } - - def __setattr__(self, name, value): - """changing settatr, so the converter and validator is run - if input is set after __init__ - """ - if inspect.stack()[1][3] == "__init__" or name in [ - "inp_hash", - "changed", - "files_hash", - ]: - super().__setattr__(name, value) - else: - tp = attr.fields_dict(self.__class__)[name].type - # if the type has a converter, e.g., MultiInputObj - if hasattr(tp, "converter"): - value = tp.converter(value) - self.files_hash[name] = {} - super().__setattr__(name, value) - # validate all fields that have set a validator - attr.validate(self) - def collect_additional_outputs(self, inputs, output_dir, outputs): """Get additional outputs.""" return {} @property def hash(self): - """Compute a basic hash for any given set of fields.""" - from .helpers import hash_value, hash_function + hsh, self._hashes = self._compute_hashes() + return hsh + + def hash_changes(self): + """Detects any changes in the hashed values between the current inputs and the + previously calculated values""" + _, new_hashes = self._compute_hashes() + return [k for k, v in new_hashes.items() if v != self._hashes[k]] + def _compute_hashes(self) -> ty.Tuple[bytes, ty.Dict[str, bytes]]: + """Compute a basic hash for any given set of fields.""" inp_dict = {} for field in attr_fields( self, exclude_names=("_graph_checksums", "bindings", "files_hash") @@ -125,28 +99,28 @@ def hash(self): # removing values that are not set from hash calculation if getattr(self, field.name) is attr.NOTHING: continue - value = getattr(self, field.name) - inp_dict[field.name] = hash_value( - value=value, - tp=field.type, - metadata=field.metadata, - precalculated=self.files_hash[field.name], - ) - inp_hash = hash_function(inp_dict) + if "container_path" in field.metadata: + continue + inp_dict[field.name] = getattr(self, field.name) + hash_cache = Cache() + field_hashes = { + k: hash_function(v, cache=hash_cache) for k, v in inp_dict.items() + } if hasattr(self, "_graph_checksums"): - inp_hash = hash_function((inp_hash, self._graph_checksums)) - return inp_hash + field_hashes["_graph_checksums"] = self._graph_checksums + return hash_function(sorted(field_hashes.items())), field_hashes - def retrieve_values(self, wf, state_index=None): + def retrieve_values(self, wf, state_index: ty.Optional[int] = None): """Get values contained by this spec.""" - temp_values = {} + retrieved_values = {} for field in attr_fields(self): value = getattr(self, field.name) if isinstance(value, LazyField): - value = value.get_value(wf, state_index=state_index) - temp_values[field.name] = value - for field, value in temp_values.items(): - setattr(self, field, value) + retrieved_values[field.name] = value.get_value( + wf, state_index=state_index + ) + for field, val in retrieved_values.items(): + setattr(self, field, val) def check_fields_input_spec(self): """ @@ -156,75 +130,58 @@ def check_fields_input_spec(self): """ fields = attr_fields(self) - names = [] - require_to_check = {} - for fld in fields: - mdata = fld.metadata - # checking if the mandatory field is provided - if getattr(self, fld.name) is attr.NOTHING: - if mdata.get("mandatory"): - # checking if the mandatory field is provided elsewhere in the xor list - in_exclusion_list = mdata.get("xor") is not None - alreday_populated = in_exclusion_list and [ - getattr(self, el) - for el in mdata["xor"] - if (getattr(self, el) is not attr.NOTHING) - ] - if ( - alreday_populated - ): # another input satisfies mandatory attribute via xor condition - continue - else: - raise AttributeError( - f"{fld.name} is mandatory, but no value provided" - ) - else: - continue - names.append(fld.name) - # checking if fields meet the xor and requires are - if "xor" in mdata: - if [el for el in mdata["xor"] if (el in names and el != fld.name)]: + for field in fields: + field_is_mandatory = bool(field.metadata.get("mandatory")) + field_is_unset = getattr(self, field.name) is attr.NOTHING + + if field_is_unset and not field_is_mandatory: + continue + + # Collect alternative fields associated with this field. + alternative_fields = { + name: getattr(self, name) is not attr.NOTHING + for name in field.metadata.get("xor", []) + if name != field.name + } + alternatives_are_set = any(alternative_fields.values()) + + # Raise error if no field in mandatory alternative group is set. + if field_is_unset: + if alternatives_are_set: + continue + message = f"{field.name} is mandatory and unset." + if alternative_fields: raise AttributeError( - f"{fld.name} is mutually exclusive with {mdata['xor']}" + message[:-1] + + f", but no alternative provided by {list(alternative_fields)}." ) + else: + raise AttributeError(message) - if "requires" in mdata: - if [el for el in mdata["requires"] if el not in names]: - # will check after adding all fields to names - require_to_check[fld.name] = mdata["requires"] - - if ( - fld.type in [File, Directory] - or "pydra.engine.specs.File" in str(fld.type) - or "pydra.engine.specs.Directory" in str(fld.type) - ): - self._file_check(fld) - - for nm, required in require_to_check.items(): - required_notfound = [el for el in required if el not in names] - if required_notfound: - raise AttributeError(f"{nm} requires {required_notfound}") - - def _file_check(self, field): - """checking if the file exists""" - if isinstance(getattr(self, field.name), list): - # if value is a list and type is a list of Files/Directory, checking all elements - if field.type in [ty.List[File], ty.List[Directory]]: - for el in getattr(self, field.name): - file = Path(el) - if not file.exists() and field.type in [File, Directory]: - raise FileNotFoundError( - f"the file {file} from the {field.name} input does not exist" - ) - else: - file = Path(getattr(self, field.name)) - # error should be raised only if the type is strictly File or Directory - if not file.exists() and field.type in [File, Directory]: - raise FileNotFoundError( - f"the file {file} from the {field.name} input does not exist" + # Raise error if multiple alternatives are set. + elif alternatives_are_set: + set_alternative_fields = [ + name for name, is_set in alternative_fields.items() if is_set + ] + raise AttributeError( + f"{field.name} is mutually exclusive with {set_alternative_fields}" ) + # Collect required fields associated with this field. + required_fields = { + name: getattr(self, name) is not attr.NOTHING + for name in field.metadata.get("requires", []) + if name != field.name + } + + # Raise error if any required field is unset. + if not all(required_fields.values()): + unset_required_fields = [ + name for name, is_set in required_fields.items() if not is_set + ] + raise AttributeError(f"{field.name} requires {unset_required_fields}") + def check_metadata(self): """Check contained metadata.""" @@ -350,7 +307,8 @@ def check_metadata(self): # not allowing for default if the field is mandatory if not fld.default == attr.NOTHING and mdata.get("mandatory"): raise AttributeError( - "default value should not be set when the field is mandatory" + f"default value ({fld.default!r}) should not be set when the field " + f"('{fld.name}') in {self}) is mandatory" ) # setting default if value not provided and default is available if getattr(self, fld.name) is None: @@ -384,11 +342,12 @@ def retrieve_values(self, wf, state_index=None): if not field.metadata.get("output_file_template"): value = getattr(self, field.name) if isinstance(value, LazyField): - value = value.get_value(wf, state_index=state_index) - temp_values[field.name] = value - for field, value in temp_values.items(): + temp_values[field.name] = value.get_value( + wf, state_index=state_index + ) + for field, val in temp_values.items(): value = path_to_string(value) - setattr(self, field, value) + setattr(self, field, val) def check_metadata(self): """ @@ -397,6 +356,8 @@ def check_metadata(self): Also sets the default values when available and needed. """ + from ..utils.typing import TypeParser + supported_keys = { "allowed_values", "argstr", @@ -413,29 +374,44 @@ def check_metadata(self): "xor", "sep", "formatter", + "_output_type", } + for fld in attr_fields(self, exclude_names=("_func", "_graph_checksums")): mdata = fld.metadata # checking keys from metadata if set(mdata.keys()) - supported_keys: raise AttributeError( f"only these keys are supported {supported_keys}, but " - f"{set(mdata.keys()) - supported_keys} provided" + f"{set(mdata.keys()) - supported_keys} provided for '{fld.name}' " + f"field in {self}" ) # checking if the help string is provided (required field) if "help_string" not in mdata: - raise AttributeError(f"{fld.name} doesn't have help_string field") - # assuming that fields with output_file_template shouldn't have default - if fld.default not in [attr.NOTHING, True, False] and mdata.get( - "output_file_template" - ): raise AttributeError( - "default value should not be set together with output_file_template" + f"{fld.name} doesn't have help_string field in {self}" ) + # assuming that fields with output_file_template shouldn't have default + if mdata.get("output_file_template"): + if not any( + TypeParser.matches_type(fld.type, t) for t in OUTPUT_TEMPLATE_TYPES + ): + raise TypeError( + f"Type of '{fld.name}' should be one of {OUTPUT_TEMPLATE_TYPES} " + f"(not {fld.type}) because it has a value for output_file_template " + f"({mdata['output_file_template']!r})" + ) + if fld.default not in [attr.NOTHING, True, False]: + raise AttributeError( + f"default value ({fld.default!r}) should not be set together with " + f"output_file_template ({mdata['output_file_template']!r}) for " + f"'{fld.name}' field in {self}" + ) # not allowing for default if the field is mandatory if not fld.default == attr.NOTHING and mdata.get("mandatory"): raise AttributeError( - "default value should not be set when the field is mandatory" + f"default value ({fld.default!r}) should not be set when the field " + f"('{fld.name}') in {self}) is mandatory" ) # setting default if value not provided and default is available if getattr(self, fld.name) is None: @@ -449,36 +425,43 @@ class ShellOutSpec: return_code: int """The process' exit code.""" - stdout: ty.Union[File, str] + stdout: str """The process' standard output.""" - stderr: ty.Union[File, str] + stderr: str """The process' standard input.""" def collect_additional_outputs(self, inputs, output_dir, outputs): + from ..utils.typing import TypeParser + """Collect additional outputs from shelltask output_spec.""" additional_out = {} for fld in attr_fields(self, exclude_names=("return_code", "stdout", "stderr")): - if fld.type not in [ - File, - MultiOutputFile, - Directory, - Path, - int, - float, - bool, - str, - list, - ]: - raise Exception( - f"Support for {fld.type} type, required for {fld.name} in {self}, " + if not TypeParser.is_subclass( + fld.type, + ( + os.PathLike, + MultiOutputObj, + int, + float, + bool, + str, + list, + ), + ): + raise TypeError( + f"Support for {fld.type} type, required for '{fld.name}' in {self}, " "has not been implemented in collect_additional_output" ) # assuming that field should have either default or metadata, but not both input_value = getattr(inputs, fld.name, attr.NOTHING) if input_value is not attr.NOTHING: - if fld.type in (File, MultiOutputFile, Directory, Path): - input_value = Path(input_value).absolute() - additional_out[fld.name] = input_value + if TypeParser.contains_type(FileSet, fld.type): + if input_value is not False: + label = f"output field '{fld.name}' of {self}" + input_value = TypeParser(fld.type, label=label).coerce( + input_value + ) + additional_out[fld.name] = input_value elif ( fld.default is None or fld.default == attr.NOTHING ) and not fld.metadata: # TODO: is it right? @@ -593,7 +576,12 @@ def _field_metadata( return attr.NOTHING return val elif "callable" in fld.metadata: - call_args = inspect.getfullargspec(fld.metadata["callable"]) + callable_ = fld.metadata["callable"] + if isinstance(callable_, staticmethod): + # In case callable is defined as a static method, + # retrieve the function wrapped in the descriptor. + callable_ = callable_.__func__ + call_args = inspect.getfullargspec(callable_) call_args_val = {} for argnm in call_args.args: if argnm == "field": @@ -615,9 +603,12 @@ def _field_metadata( f"has to be in inputs or be field or output_dir, " f"but {argnm} is used" ) - return fld.metadata["callable"](**call_args_val) + return callable_(**call_args_val) else: - raise Exception("(_field_metadata) is not a current valid metadata key.") + raise Exception( + f"Metadata for '{fld.name}', does not not contain any of the required fields " + f'("callable", "output_file_template" or "value"): {fld.metadata}.' + ) def _check_requires(self, fld, inputs): """checking if all fields from the requires and template are set in the input @@ -627,11 +618,12 @@ def _check_requires(self, fld, inputs): if "requires" in fld.metadata: # if requires is a list of list it is treated as el[0] OR el[1] OR... - if all([isinstance(el, list) for el in fld.metadata["requires"]]): - field_required_OR = fld.metadata["requires"] + required_fields = ensure_list(fld.metadata["requires"]) + if all([isinstance(el, list) for el in required_fields]): + field_required_OR = required_fields # if requires is a list of tuples/strings - I'm creating a 1-el nested list - elif all([isinstance(el, (str, tuple)) for el in fld.metadata["requires"]]): - field_required_OR = [fld.metadata["requires"]] + elif all([isinstance(el, (str, tuple)) for el in required_fields]): + field_required_OR = [required_fields] else: raise Exception( f"requires field can be a list of list, or a list " @@ -692,106 +684,361 @@ def _check_requires(self, fld, inputs): return False -@attr.s(auto_attribs=True, kw_only=True) -class ContainerSpec(ShellSpec): - """Refine the generic command-line specification to container execution.""" +@attr.s +class LazyInterface: + _task: "core.TaskBase" = attr.ib() + _attr_type: str - image: ty.Union[File, str] = attr.ib( - metadata={"help_string": "image", "mandatory": True} - ) - """The image to be containerized.""" - container: ty.Union[File, str, None] = attr.ib( - metadata={"help_string": "container"} - ) - """The container.""" - container_xargs: ty.Optional[ty.List[str]] = attr.ib( - default=None, metadata={"help_string": "todo"} - ) + def __getattr__(self, name): + if name in ("_task", "_attr_type", "_field_names"): + raise AttributeError(f"{name} hasn't been set yet") + if name not in self._field_names: + raise AttributeError( + f"Task {self._task.name} has no {self._attr_type} attribute {name}" + ) + type_ = self._get_type(name) + splits = self._get_task_splits() + combines = self._get_task_combines() + if combines and self._attr_type == "output": + # Add in any scalar splits referencing upstream splits, i.e. "_myupstreamtask", + # "_myarbitrarytask" + combined_upstreams = set() + if self._task.state: + for scalar in LazyField.sanitize_splitter( + self._task.state.splitter, strip_previous=False + ): + for field in scalar: + if field.startswith("_"): + node_name = field[1:] + if any(c.split(".")[0] == node_name for c in combines): + combines.update( + f for f in scalar if not f.startswith("_") + ) + combined_upstreams.update( + f[1:] for f in scalar if f.startswith("_") + ) + if combines: + # Wrap type in list which holds the combined items + type_ = ty.List[type_] + # Iterate through splits to remove any splits which are removed by the + # combiner + for splitter in copy(splits): + remaining = tuple( + s + for s in splitter + if not any( + (x in combines or x.split(".")[0] in combined_upstreams) + for x in s + ) + ) + if remaining != splitter: + splits.remove(splitter) + if remaining: + splits.add(remaining) + # Wrap the type in a nested StateArray type + if splits: + type_ = StateArray[type_] + lf_klass = LazyInField if self._attr_type == "input" else LazyOutField + return lf_klass[type_]( + name=self._task.name, + field=name, + type=type_, + splits=splits, + ) + def _get_task_splits(self) -> ty.Set[ty.Tuple[ty.Tuple[str, ...], ...]]: + """Returns the states over which the inputs of the task are split""" + splitter = self._task.state.splitter if self._task.state else None + splits = set() + if splitter: + # Ensure that splits is of tuple[tuple[str, ...], ...] form + splitter = LazyField.sanitize_splitter(splitter) + if splitter: + splits.add(splitter) + for inpt in attr.asdict(self._task.inputs, recurse=False).values(): + if isinstance(inpt, LazyField): + splits.update(inpt.splits) + return splits + + def _get_task_combines(self) -> ty.Set[ty.Union[str, ty.Tuple[str, ...]]]: + """Returns the states over which the outputs of the task are combined""" + combiner = ( + self._task.state.combiner + if self._task.state is not None + else getattr(self._task, "fut_combiner", None) + ) + return set(combiner) if combiner else set() -@attr.s(auto_attribs=True, kw_only=True) -class DockerSpec(ContainerSpec): - """Particularize container specifications to the Docker engine.""" - container: str = attr.ib("docker", metadata={"help_string": "container"}) +class LazyIn(LazyInterface): + _attr_type = "input" + def _get_type(self, name): + attr = next(t for n, t in self._task.input_spec.fields if n == name) + if attr is None: + return ty.Any + elif inspect.isclass(attr): + return attr + else: + return attr.type -@attr.s(auto_attribs=True, kw_only=True) -class SingularitySpec(ContainerSpec): - """Particularize container specifications to Singularity.""" + @property + def _field_names(self): + return [field[0] for field in self._task.input_spec.fields] - container: str = attr.ib("singularity", metadata={"help_string": "container type"}) +class LazyOut(LazyInterface): + _attr_type = "output" -class LazyField: + def _get_type(self, name): + try: + type_ = next(f[1] for f in self._task.output_spec.fields if f[0] == name) + except StopIteration: + type_ = ty.Any + else: + if not inspect.isclass(type_): + try: + type_ = type_.type # attrs _CountingAttribute + except AttributeError: + pass # typing._SpecialForm + return type_ + + @property + def _field_names(self): + return self._task.output_names + ["all_"] + + +TypeOrAny = ty.Union[ty.Type[T], ty.Any] +Splitter = ty.Union[str, ty.Tuple[str, ...]] + + +@attr.s(auto_attribs=True, kw_only=True) +class LazyField(ty.Generic[T]): """Lazy fields implement promises.""" - def __init__(self, node, attr_type): - """Initialize a lazy field.""" - self.name = node.name - if attr_type == "input": - self.fields = [field[0] for field in node.input_spec.fields] - elif attr_type == "output": - self.fields = node.output_names - else: - raise ValueError(f"LazyField: Unknown attr_type: {attr_type}") - self.attr_type = attr_type - self.field = None + name: str + field: str + type: TypeOrAny + # Set of splitters that have been applied to the lazy field. Note that the splitter + # specifications are transformed to a tuple[tuple[str, ...], ...] form where the + # outer tuple is the outer product, the inner tuple are inner products (where either + # product can be of length==1) + splits: ty.FrozenSet[ty.Tuple[ty.Tuple[str, ...], ...]] = attr.field( + factory=frozenset, converter=frozenset + ) + cast_from: ty.Optional[ty.Type[ty.Any]] = None - def __getattr__(self, name): - if name in self.fields or name == "all_": - self.field = name + def __bytes_repr__(self, cache): + yield type(self).__name__.encode() + yield self.name.encode() + yield self.field.encode() + + def cast(self, new_type: TypeOrAny) -> "LazyField": + """ "casts" the lazy field to a new type + + Parameters + ---------- + new_type : type + the type to cast the lazy-field to + + Returns + ------- + cast_field : LazyField + a copy of the lazy field with the new type + """ + return type(self)[new_type]( + name=self.name, + field=self.field, + type=new_type, + splits=self.splits, + cast_from=self.cast_from if self.cast_from else self.type, + ) + + def split(self, splitter: Splitter) -> "LazyField": + """ "Splits" the lazy field over an array of nodes by replacing the sequence type + of the lazy field with StateArray to signify that it will be "split" across + + Parameters + ---------- + splitter : str or ty.Tuple[str, ...] or ty.List[str] + the splitter to append to the list of splitters + """ + from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel + + splits = self.splits | set([LazyField.sanitize_splitter(splitter)]) + # Check to see whether the field has already been split over the given splitter + if splits == self.splits: return self - if name in dir(self): - return self.__getattribute__(name) - raise AttributeError( - f"Task {self.name} has no {self.attr_type} attribute {name}" + + # Modify the type of the lazy field to include the split across a state-array + inner_type, prev_split_depth = TypeParser.strip_splits(self.type) + assert prev_split_depth <= 1 + if inner_type is ty.Any: + type_ = StateArray[ty.Any] + elif TypeParser.matches_type(inner_type, list): + item_type = TypeParser.get_item_type(inner_type) + type_ = StateArray[item_type] + else: + raise TypeError( + f"Cannot split non-sequence field {self} of type {inner_type}" + ) + if prev_split_depth: + type_ = StateArray[type_] + return type(self)[type_]( + name=self.name, + field=self.field, + type=type_, + splits=splits, ) - def __getstate__(self): - state = self.__dict__.copy() - state["name"] = self.name - state["fields"] = self.fields - state["field"] = self.field - return state + @classmethod + def sanitize_splitter( + cls, splitter: Splitter, strip_previous: bool = True + ) -> ty.Tuple[ty.Tuple[str, ...], ...]: + """Converts the splitter spec into a consistent tuple[tuple[str, ...], ...] form + used in LazyFields""" + if isinstance(splitter, str): + splitter = (splitter,) + if isinstance(splitter, tuple): + splitter = (splitter,) # type: ignore + else: + assert isinstance(splitter, list) + # convert to frozenset to differentiate from tuple, yet still be hashable + # (NB: order of fields in list splitters aren't relevant) + splitter = tuple((s,) if isinstance(s, str) else s for s in splitter) + # Strip out fields starting with "_" designating splits in upstream nodes + if strip_previous: + stripped = tuple( + tuple(f for f in i if not f.startswith("_")) for i in splitter + ) + splitter = tuple(s for s in stripped if s) # type: ignore + return splitter # type: ignore - def __setstate__(self, state): - self.__dict__.update(state) + def _apply_cast(self, value): + """\"Casts\" the value from the retrieved type if a cast has been applied to + the lazy-field""" + from pydra.utils.typing import TypeParser - def __repr__(self): - return f"LF('{self.name}', '{self.field}')" - - def get_value(self, wf, state_index=None): - """Return the value of a lazy field.""" - if self.attr_type == "input": - return getattr(wf.inputs, self.field) - elif self.attr_type == "output": - node = getattr(wf, self.name) - result = node.result(state_index=state_index) - if isinstance(result, list): - if len(result) and isinstance(result[0], list): - results_new = [] - for res_l in result: - res_l_new = [] - for res in res_l: - if res.errored: - raise ValueError("Error from get_value") - else: - res_l_new.append(res.get_output_field(self.field)) - results_new.append(res_l_new) - return results_new + if self.cast_from: + assert TypeParser.matches(value, self.cast_from) + value = self.type(value) + return value + + +class LazyInField(LazyField[T]): + attr_type = "input" + + def get_value( + self, wf: "pydra.Workflow", state_index: ty.Optional[int] = None + ) -> ty.Any: + """Return the value of a lazy field. + + Parameters + ---------- + wf : Workflow + the workflow the lazy field references + state_index : int, optional + the state index of the field to access + + Returns + ------- + value : Any + the resolved value of the lazy-field + """ + from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel + + value = getattr(wf.inputs, self.field) + if TypeParser.is_subclass(self.type, StateArray) and not wf._pre_split: + _, split_depth = TypeParser.strip_splits(self.type) + + def apply_splits(obj, depth): + if depth < 1: + return obj + return StateArray[self.type](apply_splits(i, depth - 1) for i in obj) + + value = apply_splits(value, split_depth) + value = self._apply_cast(value) + return value + + +class LazyOutField(LazyField[T]): + attr_type = "output" + + def get_value( + self, wf: "pydra.Workflow", state_index: ty.Optional[int] = None + ) -> ty.Any: + """Return the value of a lazy field. + + Parameters + ---------- + wf : Workflow + the workflow the lazy field references + state_index : int, optional + the state index of the field to access + + Returns + ------- + value : Any + the resolved value of the lazy-field + """ + from ..utils.typing import TypeParser # pylint: disable=import-outside-toplevel + + node = getattr(wf, self.name) + result = node.result(state_index=state_index) + if result is None: + raise RuntimeError( + f"Could not find results of '{node.name}' node in a sub-directory " + f"named '{node.checksum}' in any of the cache locations.\n" + + "\n".join(str(p) for p in set(node.cache_locations)) + + f"\n\nThis is likely due to hash changes in '{self.name}' node inputs. " + f"Current values and hashes: {self.inputs}, " + f"{self.inputs._hashes}\n\n" + "Set loglevel to 'debug' in order to track hash changes " + "throughout the execution of the workflow.\n\n " + "These issues may have been caused by `bytes_repr()` methods " + "that don't return stable hash values for specific object " + "types across multiple processes (see bytes_repr() " + '"singledispatch "function in pydra/utils/hash.py).' + "You may need to write specific `bytes_repr()` " + "implementations (see `pydra.utils.hash.register_serializer`) or a " + "`__bytes_repr__()` dunder methods to handle one or more types in " + "your interface inputs." + ) + _, split_depth = TypeParser.strip_splits(self.type) + + def get_nested_results(res, depth: int): + if isinstance(res, list): + if not depth: + val = [r.get_output_field(self.field) for r in res] else: - results_new = [] - for res in result: - if res.errored: - raise ValueError("Error from get_value") - else: - results_new.append(res.get_output_field(self.field)) - return results_new + val = StateArray[self.type]( + get_nested_results(res=r, depth=depth - 1) for r in res + ) else: - if result.errored: - raise ValueError("Error from get_value") - return result.get_output_field(self.field) + if res.errored: + raise ValueError( + f"Cannot retrieve value for {self.field} from {self.name} as " + "the node errored" + ) + val = res.get_output_field(self.field) + if depth and not wf._pre_split: + assert isinstance(val, ty.Sequence) and not isinstance(val, str) + val = StateArray[self.type](val) + return val + + value = get_nested_results(result, depth=split_depth) + value = self._apply_cast(value) + return value + + +class StateArray(ty.List[T]): + """an array of values from, or to be split over in an array of nodes (see TaskBase.split()), + multiple nodes of the same task. Used in type-checking to differentiate between list + types and values for multiple nodes + """ + + def __repr__(self): + return f"{type(self).__name__}(" + ", ".join(repr(i) for i in self) + ")" def donothing(*args, **kwargs): @@ -807,7 +1054,7 @@ class TaskHook: pre_run: ty.Callable = donothing post_run: ty.Callable = donothing - def __setattr__(cls, attr, val): + def __setattr__(self, attr, val): if attr not in ["pre_run_task", "post_run_task", "pre_run", "post_run"]: raise AttributeError("Cannot set unknown hook") super().__setattr__(attr, val) @@ -824,3 +1071,6 @@ def path_to_string(value): elif isinstance(value, list) and len(value) and isinstance(value[0], Path): value = [str(val) for val in value] return value + + +from . import core # noqa diff --git a/pydra/engine/state.py b/pydra/engine/state.py index 4e532775ae..befbf86b9d 100644 --- a/pydra/engine/state.py +++ b/pydra/engine/state.py @@ -1,4 +1,5 @@ """Keeping track of mapping and reduce operations over tasks.""" + from copy import deepcopy import itertools from functools import reduce @@ -454,13 +455,11 @@ def _remove_repeated(self, previous_splitters): f"{self.other_states}" ) - repeated = set( - [ - (el, previous_splitters.count(el)) - for el in previous_splitters - if previous_splitters.count(el) > 1 - ] - ) + repeated = { + (el, previous_splitters.count(el)) + for el in previous_splitters + if previous_splitters.count(el) > 1 + } if repeated: # assuming that I want to remove from right previous_splitters.reverse() diff --git a/pydra/engine/submitter.py b/pydra/engine/submitter.py index 0f1e33ef59..cbb4064e7a 100644 --- a/pydra/engine/submitter.py +++ b/pydra/engine/submitter.py @@ -1,10 +1,13 @@ """Handle execution backends.""" + import asyncio +import typing as ty import pickle from uuid import uuid4 -from .workers import WORKERS +from .workers import Worker, WORKERS from .core import is_workflow from .helpers import get_open_loop, load_and_run_async +from ..utils.hash import PersistentCache import logging @@ -15,34 +18,47 @@ class Submitter: """Send a task to the execution backend.""" - def __init__(self, plugin="cf", **kwargs): + def __init__(self, plugin: ty.Union[str, ty.Type[Worker]] = "cf", **kwargs): """ Initialize task submission. Parameters ---------- - plugin : :obj:`str` - The identifier of the execution backend. + plugin : :obj:`str` or :obj:`ty.Type[pydra.engine.core.Worker]` + Either the identifier of the execution backend or the worker class itself. Default is ``cf`` (Concurrent Futures). + **kwargs + Additional keyword arguments to pass to the worker. """ self.loop = get_open_loop() self._own_loop = not self.loop.is_running() - self.plugin = plugin - try: - self.worker = WORKERS[self.plugin](**kwargs) - except KeyError: - raise NotImplementedError(f"No worker for {self.plugin}") + if isinstance(plugin, str): + self.plugin = plugin + try: + worker_cls = WORKERS[self.plugin] + except KeyError: + raise NotImplementedError(f"No worker for '{self.plugin}' plugin") + else: + try: + self.plugin = plugin.plugin_name + except AttributeError: + raise ValueError("Worker class must have a 'plugin_name' str attribute") + worker_cls = plugin + self.worker = worker_cls(**kwargs) self.worker.loop = self.loop - def __call__(self, runnable, cache_locations=None, rerun=False): + def __call__(self, runnable, cache_locations=None, rerun=False, environment=None): """Submitter run function.""" if cache_locations is not None: runnable.cache_locations = cache_locations - self.loop.run_until_complete(self.submit_from_call(runnable, rerun)) + self.loop.run_until_complete( + self.submit_from_call(runnable, rerun, environment) + ) + PersistentCache().clean_up() return runnable.result() - async def submit_from_call(self, runnable, rerun): + async def submit_from_call(self, runnable, rerun, environment): """ This coroutine should only be called once per Submitter call, and serves as the bridge between sync/async lands. @@ -56,7 +72,7 @@ async def submit_from_call(self, runnable, rerun): Once Python 3.10 is the minimum, this should probably be refactored into using structural pattern matching. """ - if is_workflow(runnable): + if is_workflow(runnable): # TODO: env to wf # connect and calculate the checksum of the graph before running runnable._connect_and_propagate_to_tasks(override_task_caches=True) # 0 @@ -74,10 +90,11 @@ async def submit_from_call(self, runnable, rerun): # 2 if runnable.state is None: # run_el should always return a coroutine - await self.worker.run_el(runnable, rerun=rerun) + print("in SUBM", environment) + await self.worker.run_el(runnable, rerun=rerun, environment=environment) # 3 else: - await self.expand_runnable(runnable, wait=True, rerun=rerun) + await self.expand_runnable(runnable, wait=True, rerun=rerun) # TODO return True async def expand_runnable(self, runnable, wait=False, rerun=False): @@ -168,15 +185,55 @@ async def expand_workflow(self, wf, rerun=False): # don't block the event loop! await asyncio.sleep(1) if ii > 60: - blocked = _list_blocked_tasks(graph_copy) - get_runnable_tasks(graph_copy) - raise Exception( - "graph is not empty, but not able to get more tasks " - "- something may have gone wrong when retrieving the results " - "of predecessor tasks caused by a file-system error or a bug " - "in the internal workflow logic.\n\nBlocked tasks\n-------------\n" - + "\n".join(blocked) + msg = ( + f"Graph of '{wf}' workflow is not empty, but not able to get " + "more tasks - something has gone wrong when retrieving the " + "results predecessors:\n\n" ) + # Get blocked tasks and the predecessors they are waiting on + outstanding = { + t: [ + p for p in graph_copy.predecessors[t.name] if not p.done + ] + for t in graph_copy.sorted_nodes + } + + hashes_have_changed = False + for task, waiting_on in outstanding.items(): + if not waiting_on: + continue + msg += f"- '{task.name}' node blocked due to\n" + for pred in waiting_on: + if ( + pred.checksum + != wf.inputs._graph_checksums[pred.name] + ): + msg += ( + f" - hash changes in '{pred.name}' node inputs. " + f"Current values and hashes: {pred.inputs}, " + f"{pred.inputs._hashes}\n" + ) + hashes_have_changed = True + elif pred not in outstanding: + msg += ( + f" - undiagnosed issues in '{pred.name}' node, " + "potentially related to file-system access issues " + ) + msg += "\n" + if hashes_have_changed: + msg += ( + "Set loglevel to 'debug' in order to track hash changes " + "throughout the execution of the workflow.\n\n " + "These issues may have been caused by `bytes_repr()` methods " + "that don't return stable hash values for specific object " + "types across multiple processes (see bytes_repr() " + '"singledispatch "function in pydra/utils/hash.py).' + "You may need to write specific `bytes_repr()` " + "implementations (see `pydra.utils.hash.register_serializer`) " + "or `__bytes_repr__()` dunder methods to handle one " + "or more types in your interface inputs." + ) + raise RuntimeError(msg) for task in tasks: # grab inputs if needed logger.debug(f"Retrieving inputs for {task}") @@ -307,7 +364,7 @@ def _list_blocked_tasks(graph): matching_name.append( f"{saved_tsk.name} ({tsk_work_dir.name})" ) - blocking.append(pred, ", ".join(matching_name)) + blocking.append((pred, ", ".join(matching_name))) if blocking: blocked.append( f"\n{tsk.name} ({tsk.checksum}) is blocked by " diff --git a/pydra/engine/task.py b/pydra/engine/task.py index c6125fbadd..cb55d9e390 100644 --- a/pydra/engine/task.py +++ b/pydra/engine/task.py @@ -38,16 +38,19 @@ `__ """ + +from __future__ import annotations + import platform import re import attr -import cloudpickle as cp import inspect import typing as ty import shlex from pathlib import Path import warnings - +import cloudpickle as cp +from fileformats.core import FileSet, DataType from .core import TaskBase, is_lazy from ..utils.messenger import AuditFlag from .specs import ( @@ -55,21 +58,18 @@ SpecInfo, ShellSpec, ShellOutSpec, - ContainerSpec, - DockerSpec, - SingularitySpec, attr_fields, - File, - Directory, ) from .helpers import ( ensure_list, - execute, position_sort, argstr_formatting, output_from_inputfields, + parse_copyfile, ) -from .helpers_file import template_update, is_local_file +from .helpers_file import template_update +from ..utils.typing import TypeParser +from .environments import Native class FunctionTask(TaskBase): @@ -125,6 +125,12 @@ def __init__( val_dflt = val.default else: val_dflt = attr.NOTHING + if isinstance(val.annotation, ty.TypeVar): + raise NotImplementedError( + "Template types are not currently supported in task signatures " + f"(found in '{val.name}' field of '{name}' task), " + "see https://github.com/nipype/pydra/issues/672" + ) fields.append( ( val.name, @@ -137,11 +143,11 @@ def __init__( ), ) ) - fields.append(("_func", attr.ib(default=cp.dumps(func), type=str))) + fields.append(("_func", attr.ib(default=cp.dumps(func), type=bytes))) input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) else: input_spec.fields.append( - ("_func", attr.ib(default=cp.dumps(func), type=str)) + ("_func", attr.ib(default=cp.dumps(func), type=bytes)) ) self.input_spec = input_spec if name is None: @@ -162,10 +168,14 @@ def __init__( fields = [("out", ty.Any)] if "return" in func.__annotations__: return_info = func.__annotations__["return"] - # e.g. python annotation: fun() -> ty.NamedTuple("Output", [("out", float)]) - # or pydra decorator: @pydra.mark.annotate({"return": ty.NamedTuple(...)}) - if hasattr(return_info, "__name__") and getattr( - return_info, "__annotations__", None + # # e.g. python annotation: fun() -> ty.NamedTuple("Output", [("out", float)]) + # # or pydra decorator: @pydra.mark.annotate({"return": ty.NamedTuple(...)}) + # + + if ( + hasattr(return_info, "__name__") + and getattr(return_info, "__annotations__", None) + and not issubclass(return_info, DataType) ): name = return_info.__name__ fields = list(return_info.__annotations__.items()) @@ -185,7 +195,7 @@ def __init__( self.output_spec = output_spec - def _run_task(self): + def _run_task(self, environment=None): inputs = attr.asdict(self.inputs, recurse=False) del inputs["_func"] self.output_ = None @@ -213,33 +223,6 @@ class ShellCommandTask(TaskBase): input_spec = None output_spec = None - def __new__(cls, container_info=None, *args, **kwargs): - if not container_info: - return super().__new__(cls) - - if len(container_info) == 2: - type_cont, image = container_info - else: - raise Exception( - f"container_info has to have 2 elements, but {container_info} provided" - ) - - if type_cont == "docker": - # changing base class of spec if user defined - if "input_spec" in kwargs: - kwargs["input_spec"].bases = (DockerSpec,) - return DockerTask(image=image, *args, **kwargs) - elif type_cont == "singularity": - # changing base class of spec if user defined - if "input_spec" in kwargs: - kwargs["input_spec"].bases = (SingularitySpec,) - return SingularityTask(image=image, *args, **kwargs) - else: - raise Exception( - f"first element of container_info has to be " - f"docker or singularity, but {container_info[0]} provided" - ) - def __init__( self, audit_flags: AuditFlag = AuditFlag.NONE, @@ -252,6 +235,7 @@ def __init__( output_spec: ty.Optional[SpecInfo] = None, rerun=False, strip=False, + environment=Native(), **kwargs, ): """ @@ -319,9 +303,33 @@ def __init__( rerun=rerun, ) self.strip = strip + self.environment = environment + self.bindings = {} + self.inputs_mod_root = {} - @property - def command_args(self): + def get_bindings(self, root: str | None = None) -> dict[str, tuple[str, str]]: + """Return bindings necessary to run task in an alternative root. + + This is primarily intended for contexts when a task is going + to be run in a container with mounted volumes. + + Arguments + --------- + root: str + + Returns + ------- + bindings: dict + Mapping from paths in the host environment to the target environment + """ + + if root is None: + return {} + else: + self._prepare_bindings(root=root) + return self.bindings + + def command_args(self, root=None): """Get command line arguments""" if is_lazy(self.inputs): raise Exception("can't return cmdline, self.inputs has LazyFields") @@ -329,15 +337,12 @@ def command_args(self): raise NotImplementedError modified_inputs = template_update(self.inputs, output_dir=self.output_dir) - if modified_inputs is not None: - self.inputs = attr.evolve(self.inputs, **modified_inputs) + for field_name, field_value in modified_inputs.items(): + setattr(self.inputs, field_name, field_value) pos_args = [] # list for (position, command arg) self._positions_provided = [] - for field in attr_fields( - self.inputs, - exclude_names=("container", "image", "container_xargs"), - ): + for field in attr_fields(self.inputs): name, meta = field.name, field.metadata if ( getattr(self.inputs, name) is attr.NOTHING @@ -352,7 +357,10 @@ def command_args(self): if pos_val: pos_args.append(pos_val) else: - pos_val = self._command_pos_args(field) + if name in modified_inputs: + pos_val = self._command_pos_args(field, root=root) + else: + pos_val = self._command_pos_args(field) if pos_val: pos_args.append(pos_val) @@ -389,7 +397,7 @@ def _command_shelltask_args(self, field): else: return pos, ensure_list(value, tuple2list=True) - def _command_pos_args(self, field): + def _command_pos_args(self, field, root=None): """ Checking all additional input fields, setting pos to None, if position not set. Creating a list with additional parts of the command that comes from @@ -418,6 +426,13 @@ def _command_pos_args(self, field): pos += 1 if pos >= 0 else -1 value = self._field_value(field, check_file=True) + + if value: + if field.name in self.inputs_mod_root: + value = self.inputs_mod_root[field.name] + elif root: # values from templates + value = value.replace(str(self.output_dir), f"{root}{self.output_dir}") + if field.metadata.get("readonly", False) and value is not None: raise Exception(f"{field.name} is read only, the value can't be provided") elif ( @@ -509,13 +524,9 @@ def cmdline(self): self.inputs.check_fields_input_spec() if self.state: raise NotImplementedError - if isinstance(self, ContainerTask): - command_args = self.container_args + self.command_args - else: - command_args = self.command_args - # Skip the executable, which can be a multipart command, e.g. 'docker run'. - cmdline = command_args[0] - for arg in command_args[1:]: + # Skip the executable, which can be a multi-part command, e.g. 'docker run'. + cmdline = self.command_args()[0] + for arg in self.command_args()[1:]: # If there are spaces in the arg, and it is not enclosed by matching # quotes, add quotes to escape the space. Not sure if this should # be expanded to include other special characters apart from spaces @@ -525,328 +536,34 @@ def cmdline(self): cmdline += " " + arg return cmdline - def _run_task(self): - self.output_ = None - if isinstance(self, ContainerTask): - args = self.container_args + self.command_args - else: - args = self.command_args - if args: - # removing empty strings - args = [str(el) for el in args if el not in ["", " "]] - keys = ["return_code", "stdout", "stderr"] - values = execute(args, strip=self.strip) - self.output_ = dict(zip(keys, values)) - if self.output_["return_code"]: - msg = f"Error running '{self.name}' task with {args}:" - if self.output_["stderr"]: - msg += "\n\nstderr:\n" + self.output_["stderr"] - if self.output_["stdout"]: - msg += "\n\nstdout:\n" + self.output_["stdout"] - raise RuntimeError(msg) - - -class ContainerTask(ShellCommandTask): - """Extend shell command task for containerized execution.""" - - def __init__( - self, - name, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - input_spec: ty.Optional[SpecInfo] = None, - messenger_args=None, - messengers=None, - output_cpath="/output_pydra", - output_spec: ty.Optional[SpecInfo] = None, - rerun=False, - strip=False, - **kwargs, - ): - """ - Initialize this task. - - Parameters - ---------- - name : :obj:`str` - Name of this task. - audit_flags : :obj:`pydra.utils.messenger.AuditFlag` - Auditing configuration - cache_dir : :obj:`os.pathlike` - Cache directory - input_spec : :obj:`pydra.engine.specs.SpecInfo` - Specification of inputs. - messenger_args : - TODO - messengers : - TODO - output_cpath : :obj:`str` - Output path within the container filesystem. - output_spec : :obj:`pydra.engine.specs.BaseSpec` - Specification of inputs. - strip : :obj:`bool` - TODO - - """ - if input_spec is None: - input_spec = SpecInfo(name="Inputs", fields=[], bases=(ContainerSpec,)) - self.output_cpath = Path(output_cpath) - self.bindings = {} - super().__init__( - name=name, - input_spec=input_spec, - output_spec=output_spec, - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - cache_dir=cache_dir, - strip=strip, - rerun=rerun, - **kwargs, - ) - - def _field_value(self, field, check_file=False): - """ - Checking value of the specific field, if value is not set, None is returned. - If check_file is True, checking if field is a local file - and settings bindings if needed. - """ - value = super()._field_value(field) - if value and check_file and is_local_file(field): - # changing path to the cpath (the directory should be mounted) - lpath = Path(str(value)) - cdir = self.bind_paths()[lpath.parent][0] - cpath = cdir.joinpath(lpath.name) - value = str(cpath) - return value - - def container_check(self, container_type): - """Get container-specific CLI arguments.""" - if self.inputs.container is None: - raise AttributeError("Container software is not specified") - elif self.inputs.container != container_type: - raise AttributeError( - f"Container type should be {container_type}, but {self.inputs.container} given" - ) - if self.inputs.image is attr.NOTHING: - raise AttributeError("Container image is not specified") - - def bind_paths(self): - """Get bound mount points - - Returns - ------- - mount points: dict - mapping from local path to tuple of container path + mode - """ - self._check_inputs() - return {**self.bindings, **{self.output_dir: (self.output_cpath, "rw")}} - - def binds(self, opt): - """ - Specify mounts to bind from local filesystems to container and working directory. - - Uses py:meth:`bind_paths` - - """ - bargs = [] - for lpath, (cpath, mode) in self.bind_paths().items(): - bargs.extend([opt, f"{lpath}:{cpath}:{mode}"]) - return bargs - - def _check_inputs(self): - fields = attr_fields(self.inputs) - for fld in fields: - if ( - fld.type in [File, Directory] - or "pydra.engine.specs.File" in str(fld.type) - or "pydra.engine.specs.Directory" in str(fld.type) - ): - if fld.name == "image": - continue - file = Path(getattr(self.inputs, fld.name)) - if fld.metadata.get("container_path"): - # if the path is in a container the input should be treated as a str (hash as a str) - # field.type = "str" - # setattr(self, field.name, str(file)) - pass - # if this is a local path, checking if the path exists - # TODO: if copyfile, ro -> rw - elif file.exists(): # is it ok if two inputs have the same parent? - self.bindings[Path(file.parent)] = ( - Path(f"/pydra_inp_{fld.name}"), - "ro", - ) - # error should be raised only if the type is strictly File or Directory - elif fld.type in [File, Directory]: - raise FileNotFoundError( - f"the file {file} from {fld.name} input does not exist, " - f"if the file comes from the container, " - f"use field.metadata['container_path']=True" - ) - - -class DockerTask(ContainerTask): - """Extend shell command task for containerized execution with the Docker Engine.""" - - init = False - - def __init__( - self, - name=None, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - input_spec: ty.Optional[SpecInfo] = None, - messenger_args=None, - messengers=None, - output_cpath="/output_pydra", - output_spec: ty.Optional[SpecInfo] = None, - rerun=False, - strip=False, - **kwargs, - ): - """ - Initialize this task. + def _run_task(self, environment=None): + if environment is None: + environment = self.environment + self.output_ = environment.execute(self) - Parameters - ---------- - name : :obj:`str` - Name of this task. - audit_flags : :obj:`pydra.utils.messenger.AuditFlag` - Auditing configuration - cache_dir : :obj:`os.pathlike` - Cache directory - input_spec : :obj:`pydra.engine.specs.SpecInfo` - Specification of inputs. - messenger_args : - TODO - messengers : - TODO - output_cpath : :obj:`str` - Output path within the container filesystem. - output_spec : :obj:`pydra.engine.specs.BaseSpec` - Specification of inputs. - strip : :obj:`bool` - TODO + def _prepare_bindings(self, root: str): + """Prepare input files to be passed to the task + This updates the ``bindings`` attribute of the current task to make files available + in an ``Environment``-defined ``root``. """ - if not self.init: - if input_spec is None: - input_spec = SpecInfo(name="Inputs", fields=[], bases=(DockerSpec,)) - super().__init__( - name=name, - input_spec=input_spec, - output_spec=output_spec, - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - cache_dir=cache_dir, - strip=strip, - output_cpath=output_cpath, - rerun=rerun, - **kwargs, - ) - self.inputs.container_xargs = ["--rm"] - self.init = True - - @property - def container_args(self): - """Get container-specific CLI arguments, returns a list if the task has a state""" - if is_lazy(self.inputs): - raise Exception("can't return container_args, self.inputs has LazyFields") - self.container_check("docker") - if self.state: - raise NotImplementedError - - cargs = ["docker", "run"] - if self.inputs.container_xargs is not None: - cargs.extend(self.inputs.container_xargs) + for fld in attr_fields(self.inputs): + if TypeParser.contains_type(FileSet, fld.type): + fileset = getattr(self.inputs, fld.name) + copy = parse_copyfile(fld)[0] == FileSet.CopyMode.copy - cargs.extend(self.binds("-v")) - cargs.extend(["-w", str(self.output_cpath)]) - cargs.append(self.inputs.image) + host_path, env_path = fileset.parent, Path(f"{root}{fileset.parent}") - return cargs + # Default to mounting paths as read-only, but respect existing modes + old_mode = self.bindings.get(host_path, ("", "ro"))[1] + self.bindings[host_path] = (env_path, "rw" if copy else old_mode) - -class SingularityTask(ContainerTask): - """Extend shell command task for containerized execution with Singularity.""" - - init = False - - def __init__( - self, - name=None, - audit_flags: AuditFlag = AuditFlag.NONE, - cache_dir=None, - input_spec: ty.Optional[SpecInfo] = None, - messenger_args=None, - messengers=None, - output_spec: ty.Optional[SpecInfo] = None, - rerun=False, - strip=False, - **kwargs, - ): - """ - Initialize this task. - - Parameters - ---------- - name : :obj:`str` - Name of this task. - audit_flags : :obj:`pydra.utils.messenger.AuditFlag` - Auditing configuration - cache_dir : :obj:`os.pathlike` - Cache directory - input_spec : :obj:`pydra.engine.specs.SpecInfo` - Specification of inputs. - messenger_args : - TODO - messengers : - TODO - output_spec : :obj:`pydra.engine.specs.BaseSpec` - Specification of inputs. - strip : :obj:`bool` - TODO - - """ - if not self.init: - if input_spec is None: - input_spec = SpecInfo( - name="Inputs", fields=[], bases=(SingularitySpec,) + # Provide in-container paths without type-checking + self.inputs_mod_root[fld.name] = tuple( + env_path / rel for rel in fileset.relative_fspaths ) - super().__init__( - name=name, - input_spec=input_spec, - output_spec=output_spec, - audit_flags=audit_flags, - messengers=messengers, - messenger_args=messenger_args, - cache_dir=cache_dir, - strip=strip, - rerun=rerun, - **kwargs, - ) - self.init = True - - @property - def container_args(self): - """Get container-specific CLI arguments.""" - if is_lazy(self.inputs): - raise Exception("can't return container_args, self.inputs has LazyFields") - self.container_check("singularity") - if self.state: - raise NotImplementedError - - cargs = ["singularity", "exec"] - - if self.inputs.container_xargs is not None: - cargs.extend(self.inputs.container_xargs) - cargs.extend(self.binds("-B")) - cargs.extend(["--pwd", str(self.output_cpath)]) - cargs.append(self.inputs.image) - return cargs + DEFAULT_COPY_COLLATION = FileSet.CopyCollation.adjacent def split_cmd(cmd: str): diff --git a/pydra/engine/tests/conftest.py b/pydra/engine/tests/conftest.py new file mode 100644 index 0000000000..b7ecfbb8e9 --- /dev/null +++ b/pydra/engine/tests/conftest.py @@ -0,0 +1,16 @@ +import pytest + + +try: + import importlib_resources +except ImportError: + import importlib.resources as importlib_resources + + +@pytest.fixture(scope="package") +def data_tests_dir(): + test_nii = importlib_resources.files("pydra").joinpath( + "engine", "tests", "data_tests" + ) + with importlib_resources.as_file(test_nii) as path: + yield path diff --git a/pydra/engine/tests/data_tests/loading.py b/pydra/engine/tests/data_tests/loading.py index 0fe80e1b2b..8240b20798 100644 --- a/pydra/engine/tests/data_tests/loading.py +++ b/pydra/engine/tests/data_tests/loading.py @@ -1,6 +1,3 @@ -import os - - def loading(filename): with open(filename) as f: txt = f.read() diff --git a/pydra/engine/tests/data_tests/saving.py b/pydra/engine/tests/data_tests/saving.py index 1d5b67257b..f50aa82a45 100644 --- a/pydra/engine/tests/data_tests/saving.py +++ b/pydra/engine/tests/data_tests/saving.py @@ -1,6 +1,3 @@ -import os - - def saving(filename): with open(filename, "w") as f: f.write("Hello!") diff --git a/pydra/engine/tests/test_boutiques.py b/pydra/engine/tests/test_boutiques.py index 0008b78873..48f484b687 100644 --- a/pydra/engine/tests/test_boutiques.py +++ b/pydra/engine/tests/test_boutiques.py @@ -1,6 +1,5 @@ -import os, shutil +import shutil import subprocess as sp -from pathlib import Path import attr import pytest @@ -9,7 +8,6 @@ from ..submitter import Submitter from ..boutiques import BoshTask from .utils import result_no_submitter, result_submitter, no_win -from ...engine.specs import File need_bosh_docker = pytest.mark.skipif( shutil.which("docker") is None @@ -18,8 +16,6 @@ reason="requires docker and bosh", ) -Infile = Path(__file__).resolve().parent / "data_tests" / "test.nii.gz" - pytestmark = pytest.mark.skip() @@ -30,10 +26,10 @@ "maskfile", ["test_brain.nii.gz", "test_brain", "test_brain.nii"] ) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_boutiques_1(maskfile, plugin, results_function, tmpdir): +def test_boutiques_1(maskfile, plugin, results_function, tmpdir, data_tests_dir): """simple task to run fsl.bet using BoshTask""" btask = BoshTask(name="NA", zenodo_id="1482743") - btask.inputs.infile = Infile + btask.inputs.infile = data_tests_dir / "test.nii.gz" btask.inputs.maskfile = maskfile btask.cache_dir = tmpdir res = results_function(btask, plugin) @@ -50,12 +46,12 @@ def test_boutiques_1(maskfile, plugin, results_function, tmpdir): @no_win @need_bosh_docker @pytest.mark.flaky(reruns=3) -def test_boutiques_spec_1(): +def test_boutiques_spec_1(data_tests_dir): """testing spec: providing input/output fields names""" btask = BoshTask( name="NA", zenodo_id="1482743", - infile=Infile, + infile=data_tests_dir / "test.nii.gz", maskfile="test_brain.nii.gz", input_spec_names=["infile", "maskfile"], output_spec_names=["outfile", "out_outskin_off"], @@ -75,12 +71,12 @@ def test_boutiques_spec_1(): @no_win @need_bosh_docker @pytest.mark.flaky(reruns=3) -def test_boutiques_spec_2(): +def test_boutiques_spec_2(data_tests_dir): """testing spec: providing partial input/output fields names""" btask = BoshTask( name="NA", zenodo_id="1482743", - infile=Infile, + infile=data_tests_dir / "test.nii.gz", maskfile="test_brain.nii.gz", input_spec_names=["infile"], output_spec_names=[], @@ -101,11 +97,11 @@ def test_boutiques_spec_2(): @pytest.mark.parametrize( "maskfile", ["test_brain.nii.gz", "test_brain", "test_brain.nii"] ) -def test_boutiques_wf_1(maskfile, plugin, tmpdir): +def test_boutiques_wf_1(maskfile, plugin, tmpdir, infile): """wf with one task that runs fsl.bet using BoshTask""" wf = Workflow(name="wf", input_spec=["maskfile", "infile"]) wf.inputs.maskfile = maskfile - wf.inputs.infile = Infile + wf.inputs.infile = infile wf.cache_dir = tmpdir wf.add( @@ -134,11 +130,11 @@ def test_boutiques_wf_1(maskfile, plugin, tmpdir): @pytest.mark.parametrize( "maskfile", ["test_brain.nii.gz", "test_brain", "test_brain.nii"] ) -def test_boutiques_wf_2(maskfile, plugin, tmpdir): +def test_boutiques_wf_2(maskfile, plugin, tmpdir, infile): """wf with two BoshTasks (fsl.bet and fsl.stats) and one ShellTask""" wf = Workflow(name="wf", input_spec=["maskfile", "infile"]) wf.inputs.maskfile = maskfile - wf.inputs.infile = Infile + wf.inputs.infile = infile wf.cache_dir = tmpdir wf.add( diff --git a/pydra/engine/tests/test_dockertask.py b/pydra/engine/tests/test_dockertask.py index 117d35d658..5ccf37e292 100644 --- a/pydra/engine/tests/test_dockertask.py +++ b/pydra/engine/tests/test_dockertask.py @@ -1,12 +1,13 @@ -import os +import typing as ty import pytest import attr -from ..task import DockerTask, ShellCommandTask +from ..task import ShellCommandTask from ..submitter import Submitter from ..core import Workflow -from ..specs import ShellOutSpec, SpecInfo, File, DockerSpec, ShellSpec -from .utils import no_win, need_docker +from ..specs import ShellOutSpec, SpecInfo, File, ShellSpec +from ..environments import Docker +from .utils import no_win, need_docker, result_submitter, result_no_submitter @no_win @@ -16,13 +17,13 @@ def test_docker_1_nosubm(): no submitter """ cmd = "whoami" - docky = DockerTask(name="docky", executable=cmd, image="busybox") - assert docky.inputs.image == "busybox" - assert docky.inputs.container == "docker" - assert ( - docky.cmdline - == f"docker run --rm -v {docky.output_dir}:/output_pydra:rw -w /output_pydra {docky.inputs.image} {cmd}" + docky = ShellCommandTask( + name="docky", executable=cmd, environment=Docker(image="busybox") ) + assert docky.environment.image == "busybox" + assert docky.environment.tag == "latest" + assert isinstance(docky.environment, Docker) + assert docky.cmdline == cmd res = docky() assert res.output.stdout == "root\n" @@ -36,7 +37,9 @@ def test_docker_1(plugin): using submitter """ cmd = "whoami" - docky = DockerTask(name="docky", executable=cmd, image="busybox") + docky = ShellCommandTask( + name="docky", executable=cmd, environment=Docker(image="busybox") + ) with Submitter(plugin=plugin) as sub: docky(submitter=sub) @@ -48,574 +51,74 @@ def test_docker_1(plugin): @no_win @need_docker -def test_docker_1_dockerflag(plugin): - """simple command in a container, a default bindings and working directory is added - using ShellComandTask with container_info=("docker", image) - """ - cmd = "whoami" - shocky = ShellCommandTask( - name="shocky", executable=cmd, container_info=("docker", "busybox") - ) - - with Submitter(plugin=plugin) as sub: - shocky(submitter=sub) - - res = shocky.result() - assert res.output.stdout == "root\n" - assert res.output.return_code == 0 - - -@no_win -@need_docker -def test_docker_1_dockerflag_exception(plugin): - """using ShellComandTask with container_info=("docker"), no image provided""" - cmd = "whoami" - with pytest.raises(Exception) as excinfo: - shocky = ShellCommandTask( - name="shocky", executable=cmd, container_info=("docker") - ) - assert "container_info has to have 2 elements" in str(excinfo.value) - - -@no_win -@need_docker -def test_docker_2_nosubm(): - """a command with arguments, cmd and args given as executable - no submitter - """ - cmd = ["echo", "hail", "pydra"] - docky = DockerTask(name="docky", executable=cmd, image="busybox") - assert ( - docky.cmdline - == f"docker run --rm -v {docky.output_dir}:/output_pydra:rw -w /output_pydra {docky.inputs.image} {' '.join(cmd)}" - ) - - res = docky() - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 - - -@no_win -@need_docker -def test_docker_2(plugin): - """a command with arguments, cmd and args given as executable - using submitter - """ - cmd = ["echo", "hail", "pydra"] - docky = DockerTask(name="docky", executable=cmd, image="busybox") - assert ( - docky.cmdline - == f"docker run --rm -v {docky.output_dir}:/output_pydra:rw -w /output_pydra {docky.inputs.image} {' '.join(cmd)}" - ) - - with Submitter(plugin=plugin) as sub: - docky(submitter=sub) - res = docky.result() - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 - - -@no_win -@need_docker -def test_docker_2_dockerflag(plugin): +@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +def test_docker_2(results_function, plugin): """a command with arguments, cmd and args given as executable - using ShellComandTask with container_info=("docker", image) + with and without submitter """ cmd = ["echo", "hail", "pydra"] - shocky = ShellCommandTask( - name="shocky", executable=cmd, container_info=("docker", "busybox") - ) - assert ( - shocky.cmdline - == f"docker run --rm -v {shocky.output_dir}:/output_pydra:rw -w /output_pydra {shocky.inputs.image} {' '.join(cmd)}" + docky = ShellCommandTask( + name="docky", executable=cmd, environment=Docker(image="busybox") ) - - with Submitter(plugin=plugin) as sub: - shocky(submitter=sub) - res = shocky.result() + # cmdline doesn't know anything about docker + assert docky.cmdline == " ".join(cmd) + res = results_function(docky, plugin) assert res.output.stdout.strip() == " ".join(cmd[1:]) assert res.output.return_code == 0 @no_win @need_docker -def test_docker_2a_nosubm(): - """a command with arguments, using executable and args - no submitter - """ - cmd_exec = "echo" - cmd_args = ["hail", "pydra"] - # separate command into exec + args - docky = DockerTask( - name="docky", executable=cmd_exec, args=cmd_args, image="busybox" - ) - assert docky.inputs.executable == "echo" - assert ( - docky.cmdline - == f"docker run --rm -v {docky.output_dir}:/output_pydra:rw -w /output_pydra {docky.inputs.image} {cmd_exec} {' '.join(cmd_args)}" - ) - - res = docky() - assert res.output.stdout.strip() == " ".join(cmd_args) - assert res.output.return_code == 0 - - -@no_win -@need_docker -def test_docker_2a(plugin): +@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +def test_docker_2a(results_function, plugin): """a command with arguments, using executable and args using submitter """ cmd_exec = "echo" cmd_args = ["hail", "pydra"] # separate command into exec + args - docky = DockerTask( - name="docky", executable=cmd_exec, args=cmd_args, image="busybox" + docky = ShellCommandTask( + name="docky", + executable=cmd_exec, + args=cmd_args, + environment=Docker(image="busybox"), ) assert docky.inputs.executable == "echo" - assert ( - docky.cmdline - == f"docker run --rm -v {docky.output_dir}:/output_pydra:rw -w /output_pydra {docky.inputs.image} {cmd_exec} {' '.join(cmd_args)}" - ) + assert docky.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" - with Submitter(plugin=plugin) as sub: - docky(submitter=sub) - res = docky.result() + res = results_function(docky, plugin) assert res.output.stdout.strip() == " ".join(cmd_args) assert res.output.return_code == 0 -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_docker_3(plugin, tmpdir): - """a simple command in container with bindings, - creating directory in tmp dir and checking if it is in the container - """ - # creating a new directory - tmpdir.mkdir("new_dir") - cmd = ["ls", "/tmp_dir"] - docky = DockerTask(name="docky", executable=cmd, image="busybox") - # binding tmp directory to the container - docky.inputs.bindings = [(str(tmpdir), "/tmp_dir", "ro")] - - with Submitter(plugin=plugin) as sub: - docky(submitter=sub) - - res = docky.result() - assert res.output.stdout == "new_dir\n" - assert res.output.return_code == 0 - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_docker_3_dockerflag(plugin, tmpdir): - """a simple command in container with bindings, - creating directory in tmp dir and checking if it is in the container - using ShellComandTask with container_info=("docker", image) - """ - # creating a new directory - tmpdir.mkdir("new_dir") - cmd = ["ls", "/tmp_dir"] - shocky = ShellCommandTask( - name="shocky", container_info=("docker", "busybox"), executable=cmd - ) - # binding tmp directory to the container - shocky.inputs.bindings = [(str(tmpdir), "/tmp_dir", "ro")] - - with Submitter(plugin=plugin) as sub: - shocky(submitter=sub) - - res = shocky.result() - assert res.output.stdout == "new_dir\n" - assert res.output.return_code == 0 - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_docker_3_dockerflagbind(plugin, tmpdir): - """a simple command in container with bindings, - creating directory in tmp dir and checking if it is in the container - using ShellComandTask with container_info=("docker", image) - """ - # creating a new directory - tmpdir.mkdir("new_dir") - cmd = ["ls", "/tmp_dir"] - shocky = ShellCommandTask( - name="shocky", - container_info=("docker", "busybox", [(str(tmpdir), "/tmp_dir", "ro")]), - executable=cmd, - ) - - with Submitter(plugin=plugin) as sub: - shocky(submitter=sub) - - res = shocky.result() - assert res.output.stdout == "new_dir\n" - assert res.output.return_code == 0 - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_docker_4(plugin, tmpdir): - """task reads the file that is bounded to the container - specifying bindings, - """ - with open(tmpdir.join("file_pydra.txt"), "w") as f: - f.write("hello from pydra") - - cmd = ["cat", "/tmp_dir/file_pydra.txt"] - docky = DockerTask( - name="docky_cat", - image="busybox", - executable=cmd, - bindings=[(str(tmpdir), "/tmp_dir", "ro")], - strip=True, - ) - - with Submitter(plugin=plugin) as sub: - docky(submitter=sub) - - res = docky.result() - assert res.output.stdout == "hello from pydra" - assert res.output.return_code == 0 - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_docker_4_dockerflag(plugin, tmpdir): - """task reads the file that is bounded to the container - specifying bindings, - using ShellComandTask with container_info=("docker", image, bindings) - """ - with open(tmpdir.join("file_pydra.txt"), "w") as f: - f.write("hello from pydra") - - cmd = ["cat", "/tmp_dir/file_pydra.txt"] - shocky = ShellCommandTask( - name="shocky", - container_info=("docker", "busybox", [(str(tmpdir), "/tmp_dir", "ro")]), - executable=cmd, - strip=True, - ) - - with Submitter(plugin=plugin) as sub: - shocky(submitter=sub) - - res = shocky.result() - assert res.output.stdout == "hello from pydra" - assert res.output.return_code == 0 - - # tests with State @no_win @need_docker -def test_docker_st_1(plugin): +@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +def test_docker_st_1(results_function, plugin): """commands without arguments in container splitter = executable """ cmd = ["pwd", "whoami"] - docky = DockerTask(name="docky", executable=cmd, image="busybox").split( - "executable" + docky = ShellCommandTask(name="docky", environment=Docker(image="busybox")).split( + "executable", executable=cmd ) assert docky.state.splitter == "docky.executable" - # for ii, el in enumerate(docky.cmdline): - # assert ( - # el - # == f"docker run --rm -v {docky.output_dir[ii]}:/output_pydra:rw -w /output_pydra {docky.inputs.image} {cmd[ii]}" - # ) - - res = docky(plugin=plugin) - assert res[0].output.stdout == "/output_pydra\n" + res = results_function(docky, plugin) + assert res[0].output.stdout == f"/mnt/pydra{docky.output_dir[0]}\n" assert res[1].output.stdout == "root\n" assert res[0].output.return_code == res[1].output.return_code == 0 -@no_win -@need_docker -def test_docker_st_2(plugin): - """command with arguments in docker, checking the distribution - splitter = image - """ - cmd = ["cat", "/etc/issue"] - docky = DockerTask(name="docky", executable=cmd, image=["debian", "ubuntu"]).split( - "image" - ) - assert docky.state.splitter == "docky.image" - - # for ii, el in enumerate(docky.cmdline): - # assert ( - # el - # == f"docker run --rm -v {docky.output_dir[ii]}:/output_pydra:rw -w /output_pydra {docky.inputs.image[ii]} {' '.join(cmd)}" - # ) - - res = docky(plugin=plugin) - assert "Debian" in res[0].output.stdout - assert "Ubuntu" in res[1].output.stdout - assert res[0].output.return_code == res[1].output.return_code == 0 - - -@no_win -@need_docker -def test_docker_st_3(plugin): - """outer splitter image and executable""" - cmd = ["whoami", ["cat", "/etc/issue"]] - docky = DockerTask(name="docky", executable=cmd, image=["debian", "ubuntu"]).split( - ["image", "executable"] - ) - assert docky.state.splitter == ["docky.image", "docky.executable"] - res = docky(plugin=plugin) - - assert res[0].output.stdout == "root\n" - assert "Debian" in res[1].output.stdout - assert res[2].output.stdout == "root\n" - assert "Ubuntu" in res[3].output.stdout - - -@no_win -@need_docker -def test_docker_st_4(plugin): - """outer splitter image and executable, combining with images""" - cmd = ["whoami", ["cat", "/etc/issue"]] - docky = ( - DockerTask(name="docky", executable=cmd, image=["debian", "ubuntu"]) - .split(["image", "executable"]) - .combine("image") - ) - assert docky.state.splitter == ["docky.image", "docky.executable"] - assert docky.state.combiner == ["docky.image"] - assert docky.state.splitter_final == "docky.executable" - - # for ii, el in enumerate(docky.cmdline): - # i, j = ii // 2, ii % 2 - # if j == 0: - # cmd_str = "whoami" - # else: - # cmd_str = " ".join(["cat", "/etc/issue"]) - # assert ( - # el - # == f"docker run --rm -v {docky.output_dir[ii]}:/output_pydra:rw -w /output_pydra {docky.inputs.image[i]} {cmd_str}" - # ) - - res = docky(plugin=plugin) - - # checking the first command - res_cmd1 = res[0] - assert res_cmd1[0].output.stdout == "root\n" - assert res_cmd1[1].output.stdout == "root\n" - - # checking the second command - res_cmd2 = res[1] - assert "Debian" in res_cmd2[0].output.stdout - assert "Ubuntu" in res_cmd2[1].output.stdout - - -# tests with workflows - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_wf_docker_1(plugin, tmpdir): - """a workflow with two connected task - the first one read the file that is bounded to the container, - the second uses echo - """ - with open(tmpdir.join("file_pydra.txt"), "w") as f: - f.write("hello from pydra") - - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) - wf.inputs.cmd1 = ["cat", "/tmp_dir/file_pydra.txt"] - wf.inputs.cmd2 = ["echo", "message from the previous task:"] - wf.add( - DockerTask( - name="docky_cat", - image="busybox", - executable=wf.lzin.cmd1, - bindings=[(str(tmpdir), "/tmp_dir", "ro")], - strip=True, - ) - ) - wf.add( - DockerTask( - name="docky_echo", - image="ubuntu", - executable=wf.lzin.cmd2, - args=wf.docky_cat.lzout.stdout, - strip=True, - ) - ) - wf.set_output([("out", wf.docky_echo.lzout.stdout)]) - - with pytest.raises(Exception) as excinfo: - wf.docky_echo.cmdline - assert "can't return cmdline" in str(excinfo.value) - - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() - assert res.output.out == "message from the previous task: hello from pydra" - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_wf_docker_1_dockerflag(plugin, tmpdir): - """a workflow with two connected task - the first one read the file that is bounded to the container, - the second uses echo - using ShellComandTask with container_info - """ - with open(tmpdir.join("file_pydra.txt"), "w") as f: - f.write("hello from pydra") - - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) - wf.inputs.cmd1 = ["cat", "/tmp_dir/file_pydra.txt"] - wf.inputs.cmd2 = ["echo", "message from the previous task:"] - wf.add( - ShellCommandTask( - name="shocky_cat", - container_info=("docker", "busybox", [(str(tmpdir), "/tmp_dir", "ro")]), - executable=wf.lzin.cmd1, - strip=True, - ) - ) - wf.add( - ShellCommandTask( - name="shocky_echo", - executable=wf.lzin.cmd2, - args=wf.shocky_cat.lzout.stdout, - strip=True, - container_info=("docker", "ubuntu"), - ) - ) - wf.set_output([("out", wf.shocky_echo.lzout.stdout)]) - - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() - assert res.output.out == "message from the previous task: hello from pydra" - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_wf_docker_2pre(plugin, tmpdir): - """a workflow with two connected task that run python scripts - the first one creates a text file and the second one reads the file - """ - - scripts_dir = os.path.join(os.path.dirname(__file__), "data_tests") - - cmd1 = ["python", "/scripts/saving.py", "-f", "/outputs/tmp.txt"] - dt = DockerTask( - name="save", - image="python:3.7-alpine", - executable=cmd1, - bindings=[(str(tmpdir), "/outputs"), (scripts_dir, "/scripts", "ro")], - strip=True, - ) - res = dt(plugin=plugin) - assert res.output.stdout == "/outputs/tmp.txt" - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_wf_docker_2(plugin, tmpdir): - """a workflow with two connected task that run python scripts - the first one creates a text file and the second one reads the file - """ - - scripts_dir = os.path.join(os.path.dirname(__file__), "data_tests") - - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) - wf.inputs.cmd1 = ["python", "/scripts/saving.py", "-f", "/outputs/tmp.txt"] - wf.inputs.cmd2 = ["python", "/scripts/loading.py", "-f"] - wf.add( - DockerTask( - name="save", - image="python:3.7-alpine", - executable=wf.lzin.cmd1, - bindings=[(str(tmpdir), "/outputs"), (scripts_dir, "/scripts", "ro")], - strip=True, - ) - ) - wf.add( - DockerTask( - name="load", - image="python:3.7-alpine", - executable=wf.lzin.cmd2, - args=wf.save.lzout.stdout, - bindings=[(str(tmpdir), "/outputs"), (scripts_dir, "/scripts", "ro")], - strip=True, - ) - ) - wf.set_output([("out", wf.load.lzout.stdout)]) - - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() - assert res.output.out == "Hello!" - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_wf_docker_3(plugin, tmpdir): - """a workflow with two connected task - the first one read the file that contains the name of the image, - the output is passed to the second task as the image used to run the task - """ - with open(tmpdir.join("image.txt"), "w") as f: - f.write("ubuntu") - - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) - wf.inputs.cmd1 = ["cat", "/tmp_dir/image.txt"] - wf.inputs.cmd2 = ["echo", "image passed to the second task:"] - wf.add( - DockerTask( - name="docky_cat", - image="busybox", - executable=wf.lzin.cmd1, - bindings=[(str(tmpdir), "/tmp_dir", "ro")], - strip=True, - ) - ) - wf.add( - DockerTask( - name="docky_echo", - image=wf.docky_cat.lzout.stdout, - executable=wf.lzin.cmd2, - args=wf.docky_cat.lzout.stdout, - strip=True, - ) - ) - wf.set_output([("out", wf.docky_echo.lzout.stdout)]) - - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() - assert res.output.out == "image passed to the second task: ubuntu" - - # tests with customized output_spec @no_win @need_docker -def test_docker_outputspec_1(plugin, tmpdir): +def test_docker_outputspec_1(plugin, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname output_path is automatically added to the bindings @@ -626,8 +129,11 @@ def test_docker_outputspec_1(plugin, tmpdir): fields=[("newfile", File, "newfile_tmp.txt")], bases=(ShellOutSpec,), ) - docky = DockerTask( - name="docky", image="ubuntu", executable=cmd, output_spec=my_output_spec + docky = ShellCommandTask( + name="docky", + environment=Docker(image="ubuntu"), + executable=cmd, + output_spec=my_output_spec, ) with Submitter(plugin=plugin) as sub: @@ -635,7 +141,6 @@ def test_docker_outputspec_1(plugin, tmpdir): res = docky.result() assert res.output.stdout == "" - assert res.output.newfile.exists() # tests with customised input_spec @@ -643,9 +148,9 @@ def test_docker_outputspec_1(plugin, tmpdir): @no_win @need_docker -def test_docker_inputspec_1(tmpdir): +def test_docker_inputspec_1(tmp_path): """a simple customized input spec for docker task""" - filename = str(tmpdir.join("file_pydra.txt")) + filename = str(tmp_path / "file_pydra.txt") with open(filename, "w") as f: f.write("hello from pydra") @@ -667,12 +172,12 @@ def test_docker_inputspec_1(tmpdir): ), ) ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=cmd, file=filename, input_spec=my_input_spec, @@ -685,11 +190,11 @@ def test_docker_inputspec_1(tmpdir): @no_win @need_docker -def test_docker_inputspec_1a(tmpdir): +def test_docker_inputspec_1a(tmp_path): """a simple customized input spec for docker task a default value is used """ - filename = str(tmpdir.join("file_pydra.txt")) + filename = str(tmp_path / "file_pydra.txt") with open(filename, "w") as f: f.write("hello from pydra") @@ -707,107 +212,14 @@ def test_docker_inputspec_1a(tmpdir): ), ) ], - bases=(DockerSpec,), - ) - - docky = DockerTask( - name="docky", - image="busybox", - executable=cmd, - input_spec=my_input_spec, - strip=True, - ) - - res = docky() - assert res.output.stdout == "hello from pydra" - - -@no_win -@need_docker -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_docker_inputspec_1b(tmpdir): - """a simple customized input spec for docker task - instead of using automatic binding I provide the bindings - and name of the file inside the container - """ - filename = str(tmpdir.join("file_pydra.txt")) - with open(filename, "w") as f: - f.write("hello from pydra") - - cmd = "cat" - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - "container_path": True, - }, - ), - ) - ], - bases=(DockerSpec,), - ) - - docky = DockerTask( - name="docky", - image="busybox", - executable=cmd, - # container_path is set to True, so providing the filename inside the container - file="/in_container/file_pydra.txt", - bindings=[(str(tmpdir), "/in_container")], - input_spec=my_input_spec, - strip=True, - ) - - res = docky() - assert res.output.stdout == "hello from pydra" - - -@no_win -@need_docker -def test_docker_inputspec_1_dockerflag(tmpdir): - """a simple customized input spec for docker task - using ShellTask with container_info - """ - filename = str(tmpdir.join("file_pydra.txt")) - with open(filename, "w") as f: - f.write("hello from pydra") - - cmd = "cat" - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), - ) - ], bases=(ShellSpec,), ) docky = ShellCommandTask( name="docky", + environment=Docker(image="busybox"), executable=cmd, - file=filename, input_spec=my_input_spec, - container_info=("docker", "busybox"), strip=True, ) @@ -817,13 +229,13 @@ def test_docker_inputspec_1_dockerflag(tmpdir): @no_win @need_docker -def test_docker_inputspec_2(plugin, tmpdir): +def test_docker_inputspec_2(plugin, tmp_path): """a customized input spec with two fields for docker task""" - filename_1 = tmpdir.join("file_pydra.txt") + filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") - filename_2 = tmpdir.join("file_nice.txt") + filename_2 = tmp_path / "file_nice.txt" with open(filename_2, "w") as f: f.write("have a nice one") @@ -856,12 +268,12 @@ def test_docker_inputspec_2(plugin, tmpdir): ), ), ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=cmd, file1=filename_1, input_spec=my_input_spec, @@ -874,14 +286,14 @@ def test_docker_inputspec_2(plugin, tmpdir): @no_win @need_docker -def test_docker_inputspec_2a_except(plugin, tmpdir): +def test_docker_inputspec_2a_except(plugin, tmp_path): """a customized input spec with two fields first one uses a default, and second doesn't - raises a dataclass exception """ - filename_1 = tmpdir.join("file_pydra.txt") + filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") - filename_2 = tmpdir.join("file_nice.txt") + filename_2 = tmp_path / "file_nice.txt" with open(filename_2, "w") as f: f.write("have a nice one") @@ -915,18 +327,18 @@ def test_docker_inputspec_2a_except(plugin, tmpdir): ), ), ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=cmd, file2=filename_2, input_spec=my_input_spec, strip=True, ) - assert docky.inputs.file2 == filename_2 + assert docky.inputs.file2.fspath == filename_2 res = docky() assert res.output.stdout == "hello from pydra\nhave a nice one" @@ -934,15 +346,15 @@ def test_docker_inputspec_2a_except(plugin, tmpdir): @no_win @need_docker -def test_docker_inputspec_2a(plugin, tmpdir): +def test_docker_inputspec_2a(plugin, tmp_path): """a customized input spec with two fields first one uses a default value this is fine even if the second field is not using any defaults """ - filename_1 = tmpdir.join("file_pydra.txt") + filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") - filename_2 = tmpdir.join("file_nice.txt") + filename_2 = tmp_path / "file_nice.txt" with open(filename_2, "w") as f: f.write("have a nice one") @@ -976,12 +388,12 @@ def test_docker_inputspec_2a(plugin, tmpdir): ), ), ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=cmd, file2=filename_2, input_spec=my_input_spec, @@ -995,7 +407,7 @@ def test_docker_inputspec_2a(plugin, tmpdir): @no_win @need_docker @pytest.mark.xfail(reason="'docker' not in /proc/1/cgroup on ubuntu; TODO") -def test_docker_inputspec_3(plugin, tmpdir): +def test_docker_inputspec_3(plugin, tmp_path): """input file is in the container, so metadata["container_path"]: True, the input will be treated as a str""" filename = "/proc/1/cgroup" @@ -1019,12 +431,12 @@ def test_docker_inputspec_3(plugin, tmpdir): ), ) ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=cmd, file=filename, input_spec=my_input_spec, @@ -1039,57 +451,12 @@ def test_docker_inputspec_3(plugin, tmpdir): @no_win @need_docker -@pytest.mark.skip(reason="we probably don't want to support container_path") -def test_docker_inputspec_3a(plugin, tmpdir): - """input file does not exist in the local file system, - but metadata["container_path"] is not used, - so exception is raised - """ - filename = "/_proc/1/cgroup" - - cmd = "cat" - - my_input_spec = SpecInfo( - name="Input", - fields=[ - ( - "file", - attr.ib( - type=File, - metadata={ - "mandatory": True, - "position": 1, - "argstr": "", - "help_string": "input file", - }, - ), - ) - ], - bases=(DockerSpec,), - ) - - docky = DockerTask( - name="docky", - image="busybox", - executable=cmd, - file=filename, - input_spec=my_input_spec, - strip=True, - ) - - with pytest.raises(Exception) as excinfo: - res = docky() - assert "use field.metadata['container_path']=True" in str(excinfo.value) - - -@no_win -@need_docker -def test_docker_cmd_inputspec_copyfile_1(plugin, tmpdir): +def test_docker_cmd_inputspec_copyfile_1(plugin, tmp_path): """shelltask changes a file in place, adding copyfile=True to the file-input from input_spec hardlink or copy in the output_dir should be created """ - file = tmpdir.join("file_pydra.txt") + file = tmp_path / "file_pydra.txt" with open(file, "w") as f: f.write("hello from pydra\n") @@ -1107,7 +474,7 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmpdir): "argstr": "", "help_string": "orig file", "mandatory": True, - "copyfile": True, + "copyfile": "copy", }, ), ), @@ -1122,12 +489,12 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmpdir): ), ), ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=cmd, input_spec=my_input_spec, orig_file=str(file), @@ -1135,10 +502,11 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmpdir): res = docky() assert res.output.stdout == "" - assert res.output.out_file.exists() + out_file = res.output.out_file.fspath + assert out_file.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.parent == docky.output_dir - with open(res.output.out_file) as f: + assert out_file.parent == docky.output_dir + with open(out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged with open(file) as f: @@ -1147,19 +515,18 @@ def test_docker_cmd_inputspec_copyfile_1(plugin, tmpdir): @no_win @need_docker -def test_docker_inputspec_state_1(plugin, tmpdir): +def test_docker_inputspec_state_1(plugin, tmp_path): """a customised input spec for a docker file with a splitter, splitter is on files """ - filename_1 = tmpdir.join("file_pydra.txt") + filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") - filename_2 = tmpdir.join("file_nice.txt") + filename_2 = tmp_path / "file_nice.txt" with open(filename_2, "w") as f: f.write("have a nice one") cmd = "cat" - filename = [str(filename_1), str(filename_2)] my_input_spec = SpecInfo( name="Input", @@ -1177,17 +544,16 @@ def test_docker_inputspec_state_1(plugin, tmpdir): ), ) ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=cmd, - file=filename, input_spec=my_input_spec, strip=True, - ).split("file") + ).split("file", file=[str(filename_1), str(filename_2)]) res = docky() assert res[0].output.stdout == "hello from pydra" @@ -1196,20 +562,20 @@ def test_docker_inputspec_state_1(plugin, tmpdir): @no_win @need_docker -def test_docker_inputspec_state_1b(plugin, tmpdir): +def test_docker_inputspec_state_1b(plugin, tmp_path): """a customised input spec for a docker file with a splitter, files from the input spec have the same path in the local os and the container, so hash is calculated and the test works fine """ - file_1 = tmpdir.join("file_pydra.txt") - file_2 = tmpdir.join("file_nice.txt") + file_1 = tmp_path / "file_pydra.txt" + file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: f.write("hello from pydra") with open(file_2, "w") as f: f.write("have a nice one") cmd = "cat" - filename = [str(file_1), str(file_2)] + filename = [] my_input_spec = SpecInfo( name="Input", @@ -1227,17 +593,16 @@ def test_docker_inputspec_state_1b(plugin, tmpdir): ), ) ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=cmd, - file=filename, input_spec=my_input_spec, strip=True, - ).split("file") + ).split("file", file=[str(file_1), str(file_2)]) res = docky() assert res[0].output.stdout == "hello from pydra" @@ -1246,9 +611,9 @@ def test_docker_inputspec_state_1b(plugin, tmpdir): @no_win @need_docker -def test_docker_wf_inputspec_1(plugin, tmpdir): +def test_docker_wf_inputspec_1(plugin, tmp_path): """a customized input spec for workflow with docker tasks""" - filename = tmpdir.join("file_pydra.txt") + filename = tmp_path / "file_pydra.txt" with open(filename, "w") as f: f.write("hello from pydra") @@ -1270,16 +635,16 @@ def test_docker_wf_inputspec_1(plugin, tmpdir): ), ) ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) wf = Workflow(name="wf", input_spec=["cmd", "file"]) wf.inputs.cmd = cmd wf.inputs.file = filename - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=wf.lzin.cmd, file=wf.lzin.file, input_spec=my_input_spec, @@ -1298,17 +663,16 @@ def test_docker_wf_inputspec_1(plugin, tmpdir): @no_win @need_docker -def test_docker_wf_state_inputspec_1(plugin, tmpdir): +def test_docker_wf_state_inputspec_1(plugin, tmp_path): """a customized input spec for workflow with docker tasks that has a state""" - file_1 = tmpdir.join("file_pydra.txt") - file_2 = tmpdir.join("file_nice.txt") + file_1 = tmp_path / "file_pydra.txt" + file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: f.write("hello from pydra") with open(file_2, "w") as f: f.write("have a nice one") cmd = "cat" - filename = [str(file_1), str(file_2)] my_input_spec = SpecInfo( name="Input", @@ -1326,23 +690,22 @@ def test_docker_wf_state_inputspec_1(plugin, tmpdir): ), ) ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) wf = Workflow(name="wf", input_spec=["cmd", "file"]) + wf.split(file=[str(file_1), str(file_2)]) wf.inputs.cmd = cmd - wf.inputs.file = filename - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=wf.lzin.cmd, file=wf.lzin.file, input_spec=my_input_spec, strip=True, ) wf.add(docky) - wf.split("file") wf.set_output([("out", wf.docky.lzout.stdout)]) @@ -1356,17 +719,16 @@ def test_docker_wf_state_inputspec_1(plugin, tmpdir): @no_win @need_docker -def test_docker_wf_ndst_inputspec_1(plugin, tmpdir): +def test_docker_wf_ndst_inputspec_1(plugin, tmp_path): """a customized input spec for workflow with docker tasks with states""" - file_1 = tmpdir.join("file_pydra.txt") - file_2 = tmpdir.join("file_nice.txt") + file_1 = tmp_path / "file_pydra.txt" + file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: f.write("hello from pydra") with open(file_2, "w") as f: f.write("have a nice one") cmd = "cat" - filename = [str(file_1), str(file_2)] my_input_spec = SpecInfo( name="Input", @@ -1384,21 +746,20 @@ def test_docker_wf_ndst_inputspec_1(plugin, tmpdir): ), ) ], - bases=(DockerSpec,), + bases=(ShellSpec,), ) wf = Workflow(name="wf", input_spec=["cmd", "file"]) wf.inputs.cmd = cmd - wf.inputs.file = filename - docky = DockerTask( + docky = ShellCommandTask( name="docky", - image="busybox", + environment=Docker(image="busybox"), executable=wf.lzin.cmd, file=wf.lzin.file, input_spec=my_input_spec, strip=True, - ).split("file") + ).split("file", file=[str(file_1), str(file_2)]) wf.add(docky) wf.set_output([("out", wf.docky.lzout.stdout)]) diff --git a/pydra/engine/tests/test_environments.py b/pydra/engine/tests/test_environments.py new file mode 100644 index 0000000000..bd05d9daed --- /dev/null +++ b/pydra/engine/tests/test_environments.py @@ -0,0 +1,539 @@ +from pathlib import Path + +from ..environments import Native, Docker, Singularity +from ..task import ShellCommandTask +from ..submitter import Submitter +from ..specs import ( + ShellSpec, + SpecInfo, + File, +) +from .utils import no_win, need_docker, need_singularity + +import attr +import pytest + + +def makedir(path, name): + newdir = path / name + newdir.mkdir() + return newdir + + +def test_native_1(tmp_path): + """simple command, no arguments""" + newcache = lambda x: makedir(tmp_path, x) + + cmd = ["whoami"] + shelly = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly") + ) + assert shelly.cmdline == " ".join(cmd) + + env_res = Native().execute(shelly) + shelly() + assert env_res == shelly.output_ + + shelly_call = ShellCommandTask( + name="shelly_call", executable=cmd, cache_dir=newcache("shelly_call") + ) + shelly_call(environment=Native()) + assert env_res == shelly_call.output_ + + shelly_subm = ShellCommandTask( + name="shelly_subm", executable=cmd, cache_dir=newcache("shelly_subm") + ) + with Submitter(plugin="cf") as sub: + shelly_subm(submitter=sub, environment=Native()) + assert env_res == shelly_subm.result().output.__dict__ + + +@no_win +@need_docker +def test_docker_1(tmp_path): + """docker env: simple command, no arguments""" + newcache = lambda x: makedir(tmp_path, x) + + cmd = ["whoami"] + docker = Docker(image="busybox") + shelly = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly") + ) + assert shelly.cmdline == " ".join(cmd) + env_res = docker.execute(shelly) + + shelly_env = ShellCommandTask( + name="shelly", + executable=cmd, + cache_dir=newcache("shelly_env"), + environment=docker, + ) + shelly_env() + assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + + shelly_call = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly_call") + ) + shelly_call(environment=docker) + assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + + +@no_win +@need_docker +@pytest.mark.parametrize( + "docker", + [ + Docker(image="busybox"), + Docker(image="busybox", tag="latest", xargs="--rm"), + Docker(image="busybox", xargs=["--rm"]), + ], +) +def test_docker_1_subm(tmp_path, docker): + """docker env with submitter: simple command, no arguments""" + newcache = lambda x: makedir(tmp_path, x) + + cmd = ["whoami"] + docker = Docker(image="busybox") + shelly = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly") + ) + assert shelly.cmdline == " ".join(cmd) + env_res = docker.execute(shelly) + + shelly_env = ShellCommandTask( + name="shelly", + executable=cmd, + cache_dir=newcache("shelly_env"), + environment=docker, + ) + with Submitter(plugin="cf") as sub: + shelly_env(submitter=sub) + assert env_res == shelly_env.result().output.__dict__ + + shelly_call = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly_call") + ) + with Submitter(plugin="cf") as sub: + shelly_call(submitter=sub, environment=docker) + assert env_res == shelly_call.result().output.__dict__ + + +@no_win +@need_singularity +def test_singularity_1(tmp_path): + """singularity env: simple command, no arguments""" + newcache = lambda x: makedir(tmp_path, x) + + cmd = ["whoami"] + sing = Singularity(image="docker://alpine") + shelly = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly") + ) + assert shelly.cmdline == " ".join(cmd) + env_res = sing.execute(shelly) + + shelly_env = ShellCommandTask( + name="shelly", + executable=cmd, + cache_dir=newcache("shelly_env"), + environment=sing, + ) + shelly_env() + assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + + shelly_call = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly_call") + ) + shelly_call(environment=sing) + assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + + +@no_win +@need_singularity +def test_singularity_1_subm(tmp_path, plugin): + """docker env with submitter: simple command, no arguments""" + newcache = lambda x: makedir(tmp_path, x) + + cmd = ["whoami"] + sing = Singularity(image="docker://alpine") + shelly = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly") + ) + assert shelly.cmdline == " ".join(cmd) + env_res = sing.execute(shelly) + + shelly_env = ShellCommandTask( + name="shelly", + executable=cmd, + cache_dir=newcache("shelly_env"), + environment=sing, + ) + with Submitter(plugin=plugin) as sub: + shelly_env(submitter=sub) + assert env_res == shelly_env.result().output.__dict__ + + shelly_call = ShellCommandTask( + name="shelly", executable=cmd, cache_dir=newcache("shelly_call") + ) + with Submitter(plugin=plugin) as sub: + shelly_call(submitter=sub, environment=sing) + for key in [ + "stdout", + "return_code", + ]: # singularity gives info about cashed image in stderr + assert env_res[key] == shelly_call.result().output.__dict__[key] + + +def create_shelly_inputfile(tempdir, filename, name, executable): + """creating a task with a simple input_spec""" + my_input_spec = SpecInfo( + name="Input", + fields=[ + ( + "file", + attr.ib( + type=File, + metadata={ + "position": 1, + "help_string": "files", + "mandatory": True, + "argstr": "", + }, + ), + ) + ], + bases=(ShellSpec,), + ) + + kwargs = {} if filename is None else {"file": filename} + shelly = ShellCommandTask( + name=name, + executable=executable, + cache_dir=makedir(tempdir, name), + input_spec=my_input_spec, + **kwargs, + ) + return shelly + + +def test_shell_fileinp(tmp_path): + """task with a file in the command/input""" + input_dir = makedir(tmp_path, "inputs") + filename = input_dir / "file.txt" + with open(filename, "w") as f: + f.write("hello ") + + shelly = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] + ) + env_res = Native().execute(shelly) + + shelly_env = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] + ) + shelly_env.environment = Native() + shelly_env() + assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + + shelly_call = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] + ) + shelly_call(environment=Native()) + assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + + +def test_shell_fileinp_st(tmp_path): + """task (with a splitter) with a file in the command/input""" + input_dir = makedir(tmp_path, "inputs") + filename_1 = input_dir / "file_1.txt" + with open(filename_1, "w") as f: + f.write("hello ") + + filename_2 = input_dir / "file_2.txt" + with open(filename_2, "w") as f: + f.write("hi ") + + filename = [filename_1, filename_2] + + shelly_env = create_shelly_inputfile( + tempdir=tmp_path, filename=None, name="shelly_env", executable=["cat"] + ) + shelly_env.environment = Native() + shelly_env.split(file=filename) + shelly_env() + assert shelly_env.result()[0].output.stdout.strip() == "hello" + assert shelly_env.result()[1].output.stdout.strip() == "hi" + + shelly_call = create_shelly_inputfile( + tempdir=tmp_path, filename=None, name="shelly_call", executable=["cat"] + ) + shelly_call.split(file=filename) + shelly_call(environment=Native()) + assert shelly_call.result()[0].output.stdout.strip() == "hello" + assert shelly_call.result()[1].output.stdout.strip() == "hi" + + +@no_win +@need_docker +def test_docker_fileinp(tmp_path): + """docker env: task with a file in the command/input""" + docker = Docker(image="busybox") + + input_dir = makedir(tmp_path, "inputs") + filename = input_dir / "file.txt" + with open(filename, "w") as f: + f.write("hello ") + + shelly = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] + ) + env_res = docker.execute(shelly) + + shelly_env = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] + ) + shelly_env.environment = docker + shelly_env() + + assert env_res == shelly_env.output_ == shelly_env.result().output.__dict__ + + shelly_call = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] + ) + shelly_call(environment=docker) + assert env_res == shelly_call.output_ == shelly_call.result().output.__dict__ + + +@no_win +@need_docker +def test_docker_fileinp_subm(tmp_path, plugin): + """docker env with a submitter: task with a file in the command/input""" + docker = Docker(image="busybox") + + input_dir = makedir(tmp_path, "inputs") + filename = input_dir / "file.txt" + with open(filename, "w") as f: + f.write("hello ") + + shelly = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly", executable=["cat"] + ) + env_res = docker.execute(shelly) + + shelly_env = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly_env", executable=["cat"] + ) + shelly_env.environment = docker + with Submitter(plugin=plugin) as sub: + shelly_env(submitter=sub) + assert env_res == shelly_env.result().output.__dict__ + + shelly_call = create_shelly_inputfile( + tempdir=tmp_path, filename=filename, name="shelly_call", executable=["cat"] + ) + with Submitter(plugin=plugin) as sub: + shelly_call(submitter=sub, environment=docker) + assert env_res == shelly_call.result().output.__dict__ + + +@no_win +@need_docker +def test_docker_fileinp_st(tmp_path): + """docker env: task (with a splitter) with a file in the command/input""" + docker = Docker(image="busybox") + + input_dir = makedir(tmp_path, "inputs") + filename_1 = input_dir / "file_1.txt" + with open(filename_1, "w") as f: + f.write("hello ") + + filename_2 = input_dir / "file_2.txt" + with open(filename_2, "w") as f: + f.write("hi ") + + filename = [filename_1, filename_2] + + shelly_env = create_shelly_inputfile( + tempdir=tmp_path, filename=None, name="shelly_env", executable=["cat"] + ) + shelly_env.environment = docker + shelly_env.split(file=filename) + shelly_env() + assert shelly_env.result()[0].output.stdout.strip() == "hello" + assert shelly_env.result()[1].output.stdout.strip() == "hi" + + shelly_call = create_shelly_inputfile( + tempdir=tmp_path, filename=None, name="shelly_call", executable=["cat"] + ) + shelly_call.split(file=filename) + shelly_call(environment=docker) + assert shelly_call.result()[0].output.stdout.strip() == "hello" + assert shelly_call.result()[1].output.stdout.strip() == "hi" + + +def create_shelly_outputfile(tempdir, filename, name, executable="cp"): + """creating a task with an input_spec that contains a template""" + my_input_spec = SpecInfo( + name="Input", + fields=[ + ( + "file_orig", + attr.ib( + type=File, + metadata={"position": 2, "help_string": "new file", "argstr": ""}, + ), + ), + ( + "file_copy", + attr.ib( + type=str, + metadata={ + "output_file_template": "{file_orig}_copy", + "help_string": "output file", + "argstr": "", + }, + ), + ), + ], + bases=(ShellSpec,), + ) + + kwargs = {} if filename is None else {"file_orig": filename} + shelly = ShellCommandTask( + name=name, + executable=executable, + cache_dir=makedir(tempdir, name), + input_spec=my_input_spec, + **kwargs, + ) + return shelly + + +def test_shell_fileout(tmp_path): + """task with a file in the output""" + input_dir = makedir(tmp_path, "inputs") + filename = input_dir / "file.txt" + with open(filename, "w") as f: + f.write("hello ") + + # execute does not create the cashedir, so this part will fail, + # but I guess we don't want to use it this way anyway + # shelly = create_shelly_outputfile(tempdir=tmp_path, filename=filename, name="shelly") + # env_res = Native().execute(shelly) + + shelly_env = create_shelly_outputfile( + tempdir=tmp_path, filename=filename, name="shelly_env" + ) + shelly_env.environment = Native() + shelly_env() + assert ( + Path(shelly_env.result().output.file_copy) + == shelly_env.output_dir / "file_copy.txt" + ) + + shelly_call = create_shelly_outputfile( + tempdir=tmp_path, filename=filename, name="shelly_call" + ) + shelly_call(environment=Native()) + assert ( + Path(shelly_call.result().output.file_copy) + == shelly_call.output_dir / "file_copy.txt" + ) + + +def test_shell_fileout_st(tmp_path): + """task (with a splitter) with a file in the output""" + input_dir = makedir(tmp_path, "inputs") + filename_1 = input_dir / "file_1.txt" + with open(filename_1, "w") as f: + f.write("hello ") + + filename_2 = input_dir / "file_2.txt" + with open(filename_2, "w") as f: + f.write("hi ") + + filename = [filename_1, filename_2] + + shelly_env = create_shelly_outputfile( + tempdir=tmp_path, filename=None, name="shelly_env" + ) + shelly_env.environment = Native() + shelly_env.split(file_orig=filename) + shelly_env() + assert ( + Path(shelly_env.result()[0].output.file_copy) + == shelly_env.output_dir[0] / "file_1_copy.txt" + ) + assert ( + Path(shelly_env.result()[1].output.file_copy) + == shelly_env.output_dir[1] / "file_2_copy.txt" + ) + + shelly_call = create_shelly_outputfile( + tempdir=tmp_path, filename=None, name="shelly_call" + ) + shelly_call.split(file_orig=filename) + shelly_call(environment=Native()) + assert ( + Path(shelly_call.result()[0].output.file_copy) + == shelly_call.output_dir[0] / "file_1_copy.txt" + ) + assert ( + Path(shelly_call.result()[1].output.file_copy) + == shelly_call.output_dir[1] / "file_2_copy.txt" + ) + + +@no_win +@need_docker +def test_docker_fileout(tmp_path): + """docker env: task with a file in the output""" + docker_env = Docker(image="busybox") + + input_dir = makedir(tmp_path, "inputs") + filename = input_dir / "file.txt" + with open(filename, "w") as f: + f.write("hello ") + + shelly_env = create_shelly_outputfile( + tempdir=tmp_path, filename=filename, name="shelly_env" + ) + shelly_env.environment = docker_env + shelly_env() + assert ( + Path(shelly_env.result().output.file_copy) + == shelly_env.output_dir / "file_copy.txt" + ) + + +@no_win +@need_docker +def test_docker_fileout_st(tmp_path): + """docker env: task (with a splitter) with a file in the output""" + docker_env = Docker(image="busybox") + + input_dir = makedir(tmp_path, "inputs") + filename_1 = input_dir / "file_1.txt" + with open(filename_1, "w") as f: + f.write("hello ") + + filename_2 = input_dir / "file_2.txt" + with open(filename_2, "w") as f: + f.write("hi ") + + filename = [filename_1, filename_2] + + shelly_env = create_shelly_outputfile( + tempdir=tmp_path, filename=None, name="shelly_env" + ) + shelly_env.environment = docker_env + shelly_env.split(file_orig=filename) + shelly_env() + assert ( + Path(shelly_env.result()[0].output.file_copy) + == shelly_env.output_dir[0] / "file_1_copy.txt" + ) + assert ( + Path(shelly_env.result()[1].output.file_copy) + == shelly_env.output_dir[1] / "file_2_copy.txt" + ) diff --git a/pydra/engine/tests/test_graph.py b/pydra/engine/tests/test_graph.py index 4d8a58a29d..403b9e6ef9 100644 --- a/pydra/engine/tests/test_graph.py +++ b/pydra/engine/tests/test_graph.py @@ -65,13 +65,13 @@ def test_edges_3(): def test_edges_ecxeption_1(): with pytest.raises(Exception) as excinfo: - graph = DiGraph(nodes=[A, B, A], edges=[(A, B)]) + DiGraph(nodes=[A, B, A], edges=[(A, B)]) assert "repeated elements" in str(excinfo.value) def test_edges_ecxeption_2(): with pytest.raises(Exception) as excinfo: - graph = DiGraph(nodes=[A, B], edges=[(A, C)]) + DiGraph(nodes=[A, B], edges=[(A, C)]) assert "can't be added" in str(excinfo.value) diff --git a/pydra/engine/tests/test_helpers.py b/pydra/engine/tests/test_helpers.py index efceeb6753..06ce39220d 100644 --- a/pydra/engine/tests/test_helpers.py +++ b/pydra/engine/tests/test_helpers.py @@ -1,23 +1,23 @@ import os -import hashlib +import shutil from pathlib import Path import random import platform - import pytest import cloudpickle as cp - +from unittest.mock import Mock +from fileformats.generic import Directory, File +from fileformats.core import FileSet from .utils import multiply, raise_xeq1 from ..helpers import ( - hash_value, - hash_function, get_available_cpus, save, load_and_run, position_sort, + parse_copyfile, ) +from ...utils.hash import hash_function from .. import helpers_file -from ..specs import File, Directory from ..core import Workflow @@ -47,11 +47,10 @@ def test_save(tmpdir): def test_hash_file(tmpdir): outdir = Path(tmpdir) - with open(outdir / "test.file", "wt") as fp: + with open(outdir / "test.file", "w") as fp: fp.write("test") assert ( - helpers_file.hash_file(outdir / "test.file") - == "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" + hash_function(File(outdir / "test.file")) == "37fcc546dce7e59585f3217bb4c30299" ) @@ -73,118 +72,87 @@ def test_hashfun_float(): assert hash_function(math.pi) != hash_function(pi_10) -def test_hash_value_dict(): +def test_hash_function_dict(): dict1 = {"a": 10, "b": 5} dict2 = {"b": 5, "a": 10} - assert ( - hash_value(dict1) - == hash_value(dict2) - == [["a", hash_value(10)], ["b", hash_value(5)]] - == [["a", 10], ["b", 5]] - ) + assert hash_function(dict1) == hash_function(dict2) -def test_hash_value_list_tpl(): +def test_hash_function_list_tpl(): lst = [2, 5.6, "ala"] tpl = (2, 5.6, "ala") - assert hash_value(lst) == [hash_value(2), hash_value(5.6), hash_value("ala")] == lst - assert hash_value(lst) == hash_value(tpl) + assert hash_function(lst) != hash_function(tpl) -def test_hash_value_list_dict(): +def test_hash_function_list_dict(): lst = [2, {"a": "ala", "b": 1}] - hash_value(lst) - assert ( - hash_value(lst) - == [hash_value(2), hash_value([["a", "ala"], ["b", 1]])] - == [2, [["a", "ala"], ["b", 1]]] - ) - - -def test_hash_value_files(tmpdir): - file_1 = tmpdir.join("file_1.txt") - file_2 = tmpdir.join("file_2.txt") - with open(file_1, "w") as f: - f.write("hello") - with open(file_2, "w") as f: - f.write("hello") + hash_function(lst) - assert hash_value(file_1, tp=File) == hash_value(file_2, tp=File) - assert hash_value(file_1, tp=str) != hash_value(file_2, tp=str) - assert hash_value(file_1) != hash_value(file_2) - assert hash_value(file_1, tp=File) == helpers_file.hash_file(file_1) +def test_hash_function_files(tmp_path: Path): + file_1 = tmp_path / "file_1.txt" + file_2 = tmp_path / "file_2.txt" + file_1.write_text("hello") + file_2.write_text("hello") -def test_hash_value_files_list(tmpdir): - file_1 = tmpdir.join("file_1.txt") - file_2 = tmpdir.join("file_2.txt") - with open(file_1, "w") as f: - f.write("hello") - with open(file_2, "w") as f: - f.write("hi") + assert hash_function(File(file_1)) == hash_function(File(file_2)) - assert hash_value([file_1, file_2], tp=File) == [ - hash_value(file_1, tp=File), - hash_value(file_2, tp=File), - ] +def test_hash_function_dir_and_files_list(tmp_path: Path): + dir1 = tmp_path / "foo" + dir2 = tmp_path / "bar" + for d in (dir1, dir2): + d.mkdir() + for i in range(3): + f = d / f"{i}.txt" + f.write_text(str(i)) -def test_hash_value_dir(tmpdir): - file_1 = tmpdir.join("file_1.txt") - file_2 = tmpdir.join("file_2.txt") - with open(file_1, "w") as f: - f.write("hello") - with open(file_2, "w") as f: - f.write("hi") + assert hash_function(Directory(dir1)) == hash_function(Directory(dir2)) + file_list1: ty.List[File] = [File(f) for f in dir1.iterdir()] + file_list2: ty.List[File] = [File(f) for f in dir2.iterdir()] + assert hash_function(file_list1) == hash_function(file_list2) - test_sha = hashlib.sha256() - for fx in [file_1, file_2]: - test_sha.update(helpers_file.hash_file(fx).encode()) - bad_sha = hashlib.sha256() - for fx in [file_2, file_1]: - bad_sha.update(helpers_file.hash_file(fx).encode()) +def test_hash_function_files_mismatch(tmp_path: Path): + file_1 = tmp_path / "file_1.txt" + file_2 = tmp_path / "file_2.txt" + file_1.write_text("hello") + file_2.write_text("hi") - orig_hash = helpers_file.hash_dir(tmpdir) + assert hash_function(File(file_1)) != hash_function(File(file_2)) - assert orig_hash == test_sha.hexdigest() - assert orig_hash != bad_sha.hexdigest() - assert orig_hash == hash_value(tmpdir, tp=Directory) +def test_hash_function_nested(tmp_path: Path): + dpath = tmp_path / "dir" + dpath.mkdir() + hidden = dpath / ".hidden" + nested = dpath / "nested" + hidden.mkdir() + nested.mkdir() + file_1 = dpath / "file_1.txt" + file_2 = hidden / "file_2.txt" + file_3 = nested / ".file_3.txt" + file_4 = nested / "file_4.txt" -def test_hash_value_nested(tmpdir): - hidden = tmpdir.mkdir(".hidden") - nested = tmpdir.mkdir("nested") - file_1 = tmpdir.join("file_1.txt") - file_2 = hidden.join("file_2.txt") - file_3 = nested.join(".file_3.txt") - file_4 = nested.join("file_4.txt") - - test_sha = hashlib.sha256() for fx in [file_1, file_2, file_3, file_4]: - with open(fx, "w") as f: - f.write(str(random.randint(0, 1000))) - test_sha.update(helpers_file.hash_file(fx).encode()) + fx.write_text(str(random.randint(0, 1000))) - orig_hash = helpers_file.hash_dir(tmpdir) + nested_dir = Directory(dpath) - assert orig_hash == test_sha.hexdigest() - assert orig_hash == hash_value(tmpdir, tp=Directory) + orig_hash = nested_dir.hash() - nohidden_hash = helpers_file.hash_dir( - tmpdir, ignore_hidden_dirs=True, ignore_hidden_files=True - ) - nohiddendirs_hash = helpers_file.hash_dir(tmpdir, ignore_hidden_dirs=True) - nohiddenfiles_hash = helpers_file.hash_dir(tmpdir, ignore_hidden_files=True) + nohidden_hash = nested_dir.hash(ignore_hidden_dirs=True, ignore_hidden_files=True) + nohiddendirs_hash = nested_dir.hash(ignore_hidden_dirs=True) + nohiddenfiles_hash = nested_dir.hash(ignore_hidden_files=True) assert orig_hash != nohidden_hash assert orig_hash != nohiddendirs_hash assert orig_hash != nohiddenfiles_hash - file_3.remove() - assert helpers_file.hash_dir(tmpdir) == nohiddenfiles_hash - hidden.remove() - assert helpers_file.hash_dir(tmpdir) == nohidden_hash + os.remove(file_3) + assert nested_dir.hash() == nohiddenfiles_hash + shutil.rmtree(hidden) + assert nested_dir.hash() == nohidden_hash def test_get_available_cpus(): @@ -210,7 +178,7 @@ def test_load_and_run(tmpdir): """testing load_and_run for pickled task""" task_pkl = Path(tmpdir.join("task_main.pkl")) - task = multiply(name="mult", x=[1, 2], y=10).split("x") + task = multiply(name="mult", y=10).split(x=[1, 2]) task.state.prepare_states(inputs=task.inputs) task.state.prepare_inputs() with task_pkl.open("wb") as fp: @@ -228,16 +196,16 @@ def test_load_and_run(tmpdir): def test_load_and_run_exception_load(tmpdir): """testing raising exception and saving info in crashfile when when load_and_run""" task_pkl = Path(tmpdir.join("task_main.pkl")) - task = raise_xeq1(name="raise", x=[1, 2]).split("x") - with pytest.raises(FileNotFoundError) as excinfo: - task_0 = load_and_run(task_pkl=task_pkl, ind=0) + raise_xeq1(name="raise").split("x", x=[1, 2]) + with pytest.raises(FileNotFoundError): + load_and_run(task_pkl=task_pkl, ind=0) def test_load_and_run_exception_run(tmpdir): """testing raising exception and saving info in crashfile when when load_and_run""" task_pkl = Path(tmpdir.join("task_main.pkl")) - task = raise_xeq1(name="raise", x=[1, 2]).split("x") + task = raise_xeq1(name="raise").split("x", x=[1, 2]) task.state.prepare_states(inputs=task.inputs) task.state.prepare_inputs() @@ -245,7 +213,7 @@ def test_load_and_run_exception_run(tmpdir): cp.dump(task, fp) with pytest.raises(Exception) as excinfo: - task_0 = load_and_run(task_pkl=task_pkl, ind=0) + load_and_run(task_pkl=task_pkl, ind=0) assert "i'm raising an exception!" in str(excinfo.value) # checking if the crashfile has been created assert "crash" in str(excinfo.value) @@ -268,11 +236,9 @@ def test_load_and_run_wf(tmpdir): """testing load_and_run for pickled task""" wf_pkl = Path(tmpdir.join("wf_main.pkl")) - wf = Workflow(name="wf", input_spec=["x", "y"]) + wf = Workflow(name="wf", input_spec=["x", "y"], y=10) wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.split("x") - wf.inputs.x = [1, 2] - wf.inputs.y = 10 + wf.split("x", x=[1, 2]) wf.set_output([("out", wf.mult.lzout.out)]) @@ -306,3 +272,42 @@ def test_load_and_run_wf(tmpdir): def test_position_sort(pos_args): final_args = position_sort(pos_args) assert final_args == ["a", "b", "c"] + + +def test_parse_copyfile(): + Mode = FileSet.CopyMode + Collation = FileSet.CopyCollation + + def mock_field(copyfile): + mock = Mock(["metadata"]) + mock.metadata = {"copyfile": copyfile} + return mock + + assert parse_copyfile(mock_field((Mode.any, Collation.any))) == ( + Mode.any, + Collation.any, + ) + assert parse_copyfile(mock_field("copy"), default_collation=Collation.siblings) == ( + Mode.copy, + Collation.siblings, + ) + assert parse_copyfile(mock_field("link,adjacent")) == ( + Mode.link, + Collation.adjacent, + ) + assert parse_copyfile(mock_field(True)) == ( + Mode.copy, + Collation.any, + ) + assert parse_copyfile(mock_field(False)) == ( + Mode.link, + Collation.any, + ) + assert parse_copyfile(mock_field(None)) == ( + Mode.any, + Collation.any, + ) + with pytest.raises(TypeError, match="Unrecognised type for mode copyfile"): + parse_copyfile(mock_field((1, 2))) + with pytest.raises(TypeError, match="Unrecognised type for collation copyfile"): + parse_copyfile(mock_field((Mode.copy, 2))) diff --git a/pydra/engine/tests/test_helpers_file.py b/pydra/engine/tests/test_helpers_file.py index 5c2e560a7c..ea5dd2afdc 100644 --- a/pydra/engine/tests/test_helpers_file.py +++ b/pydra/engine/tests/test_helpers_file.py @@ -1,18 +1,17 @@ -import os +import typing as ty import sys -import pytest from pathlib import Path - +import attr +from unittest.mock import Mock +import pytest +from fileformats.generic import File +from ..specs import SpecInfo, ShellSpec +from ..task import ShellCommandTask from ..helpers_file import ( - split_filename, - fname_presuffix, - copyfile, - copyfiles, - on_cifs, - get_related_files, ensure_list, - _cifs_table, - _parse_mount_table, + MountIndentifier, + copy_nested_files, + template_update_single, ) @@ -20,38 +19,6 @@ def _ignore_atime(stat): return stat[:7] + stat[8:] -@pytest.mark.parametrize( - "filename, split", - [ - ("foo.nii", ("", "foo", ".nii")), - ("foo.nii.gz", ("", "foo", ".nii.gz")), - ("foo.niml.dset", ("", "foo", ".niml.dset")), - ("/usr/local/foo.nii.gz", ("/usr/local", "foo", ".nii.gz")), - ("../usr/local/foo.nii", ("../usr/local", "foo", ".nii")), - ("/usr/local/foo.a.b.c.d", ("/usr/local", "foo.a.b.c", ".d")), - ("/usr/local/", ("/usr/local", "", "")), - ], -) -def test_split_filename(filename, split): - res = split_filename(filename) - assert res == split - - -@pytest.mark.skipif( - sys.platform.startswith("win"), - reason="windows drive not known in advance", -) -def test_fname_presuffix(): - fname = "foo.nii" - pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == str(Path("/tmp/pre_foo_post.nii")) - fname += ".gz" - pth = fname_presuffix(fname, "pre_", "_post", "/tmp") - assert pth == str(Path("/tmp/pre_foo_post.nii.gz")) - pth = fname_presuffix(fname, "pre_", "_post", "/tmp", use_ext=False) - assert pth == str(Path("/tmp/pre_foo_post")) - - @pytest.fixture() def _temp_analyze_files(tmpdir): """Generate temporary analyze file pair.""" @@ -72,148 +39,6 @@ def _temp_analyze_files_prime(tmpdir): return Path(orig_img.strpath), Path(orig_hdr.strpath) -def test_copyfile(_temp_analyze_files): - orig_img, orig_hdr = _temp_analyze_files - pth, fname = os.path.split(orig_img) - new_img = os.path.join(pth, "newfile.img") - new_hdr = os.path.join(pth, "newfile.hdr") - copyfile(orig_img, new_img) - assert os.path.exists(new_img) - assert os.path.exists(new_hdr) - - -def test_copyfile_true(_temp_analyze_files): - orig_img, orig_hdr = _temp_analyze_files - pth, fname = os.path.split(orig_img) - new_img = os.path.join(pth, "newfile.img") - new_hdr = os.path.join(pth, "newfile.hdr") - # Test with copy=True - copyfile(orig_img, new_img, copy=True) - assert os.path.exists(new_img) - assert os.path.exists(new_hdr) - - -def test_copyfiles(_temp_analyze_files, _temp_analyze_files_prime): - orig_img1, orig_hdr1 = _temp_analyze_files - orig_img2, orig_hdr2 = _temp_analyze_files_prime - pth, fname = os.path.split(orig_img1) - new_img1 = os.path.join(pth, "newfile.img") - new_hdr1 = os.path.join(pth, "newfile.hdr") - pth, fname = os.path.split(orig_img2) - new_img2 = os.path.join(pth, "secondfile.img") - new_hdr2 = os.path.join(pth, "secondfile.hdr") - # providing specific filenames for a new destinations - copyfiles([orig_img1, orig_img2], [new_img1, new_img2]) - # checking if the new files exist (together with hdr files) - assert os.path.exists(new_img1) - assert os.path.exists(new_hdr1) - assert os.path.exists(new_img2) - assert os.path.exists(new_hdr2) - - -def test_copyfiles_destdir(_temp_analyze_files, _temp_analyze_files_prime, tmpdir): - orig_img1, _ = _temp_analyze_files - orig_img2, _ = _temp_analyze_files_prime - _, fname = os.path.split(orig_img1) - new_img1 = tmpdir.join(fname) - _, fname = os.path.split(orig_img2) - new_img2 = tmpdir.join(fname) - # providing directory as a new destination - copyfiles([orig_img1, orig_img2], tmpdir) - assert os.path.exists(new_img1) - assert os.path.exists(new_img2) - - -def test_linkchain(_temp_analyze_files): - if os.name != "posix": - return - orig_img, orig_hdr = _temp_analyze_files - pth, fname = os.path.split(orig_img) - new_img1 = os.path.join(pth, "newfile1.img") - new_hdr1 = os.path.join(pth, "newfile1.hdr") - new_img2 = os.path.join(pth, "newfile2.img") - new_hdr2 = os.path.join(pth, "newfile2.hdr") - new_img3 = os.path.join(pth, "newfile3.img") - new_hdr3 = os.path.join(pth, "newfile3.hdr") - copyfile(orig_img, new_img1, use_hardlink=False) - assert os.path.islink(new_img1) - assert os.path.islink(new_hdr1) - copyfile(new_img1, new_img2, copy=True, use_hardlink=False) - assert not os.path.islink(new_img2) - assert not os.path.islink(new_hdr2) - assert not os.path.samefile(orig_img, new_img2) - assert not os.path.samefile(orig_hdr, new_hdr2) - copyfile(new_img1, new_img3, copy=True, use_hardlink=True) - assert not os.path.islink(new_img3) - assert not os.path.islink(new_hdr3) - assert os.path.samefile(orig_img, new_img3) - assert os.path.samefile(orig_hdr, new_hdr3) - - -def test_recopy(_temp_analyze_files): - # Re-copying with the same parameters on an unchanged file should be - # idempotent - # - # Test for copying from regular files and symlinks - orig_img, orig_hdr = _temp_analyze_files - pth, fname = os.path.split(orig_img) - img_link = os.path.join(pth, "imglink.img") - new_img = os.path.join(pth, "newfile.img") - new_hdr = os.path.join(pth, "newfile.hdr") - copyfile(orig_img, img_link) - for copy in (True, False): - for use_hardlink in (True, False): - kwargs = {"copy": copy, "use_hardlink": use_hardlink} - - copyfile(orig_img, new_img, **kwargs) - img_stat = _ignore_atime(os.stat(new_img)) - hdr_stat = _ignore_atime(os.stat(new_hdr)) - copyfile(orig_img, new_img, **kwargs) - err_msg = "Regular - OS: {}; Copy: {}; Hardlink: {}".format( - os.name, copy, use_hardlink - ) - assert img_stat == _ignore_atime(os.stat(new_img)), err_msg - assert hdr_stat == _ignore_atime(os.stat(new_hdr)), err_msg - os.unlink(new_img) - os.unlink(new_hdr) - - copyfile(img_link, new_img, **kwargs) - img_stat = _ignore_atime(os.stat(new_img)) - hdr_stat = _ignore_atime(os.stat(new_hdr)) - copyfile(img_link, new_img, **kwargs) - err_msg = "Symlink - OS: {}; Copy: {}; Hardlink: {}".format( - os.name, copy, use_hardlink - ) - assert img_stat == _ignore_atime(os.stat(new_img)), err_msg - assert hdr_stat == _ignore_atime(os.stat(new_hdr)), err_msg - os.unlink(new_img) - os.unlink(new_hdr) - - -def test_get_related_files(_temp_analyze_files): - orig_img, orig_hdr = _temp_analyze_files - - related_files = get_related_files(orig_img) - assert orig_img in related_files - assert orig_hdr in related_files - - related_files = get_related_files(orig_hdr) - assert orig_img in related_files - assert orig_hdr in related_files - - -def test_get_related_files_noninclusive(_temp_analyze_files): - orig_img, orig_hdr = _temp_analyze_files - - related_files = get_related_files(orig_img, include_this_file=False) - assert orig_img not in related_files - assert orig_hdr in related_files - - related_files = get_related_files(orig_hdr, include_this_file=False) - assert orig_img in related_files - assert orig_hdr not in related_files - - @pytest.mark.parametrize( "filename, expected", [ @@ -228,31 +53,119 @@ def test_ensure_list(filename, expected): assert x == expected -@pytest.mark.parametrize( - "file, length, expected_files", - [ - ( - "/path/test.img", - 3, - [Path("/path/test.hdr"), Path("/path/test.img"), Path("/path/test.mat")], - ), - ( - "/path/test.hdr", - 3, - [Path("/path/test.hdr"), Path("/path/test.img"), Path("/path/test.mat")], - ), - ("/path/test.BRIK", 2, [Path("/path/test.BRIK"), Path("/path/test.HEAD")]), - ("/path/test.HEAD", 2, [Path("/path/test.BRIK"), Path("/path/test.HEAD")]), - ("/path/foo.nii", 2, [Path("/path/foo.nii"), Path("/path/foo.mat")]), - ], +def test_copy_nested_files_copy(tmp_path: Path): + # Test copying files from within nested data structures + src_dir = tmp_path / "src" + + src_dir.mkdir() + + # Create temporary files + files = [] + for x in "abcde": + p = src_dir / (x + ".txt") + p.write_text(x) + files.append(File(p)) + a, b, c, d, e = files + + nested_files = [{"a": a}, b, [(c, a), (d, e)]] + + dest_dir = tmp_path / "dest" + nested_files_copy = copy_nested_files( + nested_files, dest_dir, mode=File.CopyMode.copy + ) + assert sorted(p.relative_to(src_dir) for p in src_dir.glob("**/*.txt")) == sorted( + p.relative_to(dest_dir) for p in dest_dir.glob("**/*.txt") + ) + copied_files = [] + for x in "abcde": + copied_files.append(File(dest_dir / (x + ".txt"))) + a, b, c, d, e = copied_files + assert nested_files_copy == [{"a": a}, b, [(c, a), (d, e)]] + + +def test_copy_nested_files_hardlink(tmp_path: Path): + src_dir = tmp_path / "src" + + src_dir.mkdir() + + # Create temporary files + files = [] + for x in "abcde": + p = src_dir / (x + ".txt") + p.write_text(x) + files.append(File(p)) + a, b, c, d, e = files + + nested_files = [{"a": a}, b, [(c, a), (d, e)]] + + dest_dir = tmp_path / "dest" + nested_files_copy = copy_nested_files( + nested_files, dest_dir, mode=File.CopyMode.hardlink + ) + assert sorted(p.relative_to(src_dir) for p in src_dir.glob("**/*.txt")) == sorted( + p.relative_to(dest_dir) for p in dest_dir.glob("**/*.txt") + ) + copied_files = [] + for x in "abcde": + copied_files.append(File(dest_dir / (x + ".txt"))) + a, b, c, d, e = copied_files + assert nested_files_copy == [{"a": a}, b, [(c, a), (d, e)]] + + +@pytest.mark.skipif( + sys.platform.startswith("win"), reason="symlinks not supported on Windows" ) -def test_related_files(file, length, expected_files): - related_files = get_related_files(file) +def test_copy_nested_files_symlink(tmp_path: Path): + src_dir = tmp_path / "src" + + src_dir.mkdir() + + # Create temporary files + files = [] + for x in "abcde": + p = src_dir / (x + ".txt") + p.write_text(x) + files.append(File(p)) + a, b, c, d, e = files + + nested_files = [{"a": a}, b, [(c, a), (d, e)]] + + dest_dir = tmp_path / "dest" + nested_files_copy = copy_nested_files( + nested_files, dest_dir, mode=File.CopyMode.symlink + ) + assert sorted(p.relative_to(src_dir) for p in src_dir.glob("**/*.txt")) == sorted( + p.relative_to(dest_dir) for p in dest_dir.glob("**/*.txt") + ) + copied_files: ty.List[File] = [] + for x in "abcde": + copied_files.append(File(dest_dir / (x + ".txt"))) + assert all(f.fspath.is_symlink() for f in copied_files) + a, b, c, d, e = copied_files + assert nested_files_copy == [{"a": a}, b, [(c, a), (d, e)]] - assert len(related_files) == length - for ef in expected_files: - assert ef in related_files +def test_copy_nested_files_leave(tmp_path: Path): + src_dir = tmp_path / "src" + + src_dir.mkdir() + + # Create temporary files + files = [] + for x in "abcde": + p = src_dir / (x + ".txt") + p.write_text(x) + files.append(File(p)) + a, b, c, d, e = files + + nested_files = [{"a": a}, b, [(c, a), (d, e)]] + + dest_dir = tmp_path / "dest" # not used + + nested_files_copy = copy_nested_files( + nested_files, dest_dir, mode=File.CopyMode.leave + ) + assert nested_files_copy == nested_files MOUNT_OUTPUTS = ( @@ -411,12 +324,12 @@ def test_related_files(file, length, expected_files): @pytest.mark.parametrize("output, exit_code, expected", MOUNT_OUTPUTS) def test_parse_mount_table(output, exit_code, expected): - assert _parse_mount_table(exit_code, output) == expected + assert MountIndentifier.parse_mount_table(exit_code, output) == expected def test_cifs_check(): - assert isinstance(_cifs_table, list) - assert isinstance(on_cifs("/"), bool) + assert isinstance(MountIndentifier.get_mount_table(), list) + assert isinstance(MountIndentifier.on_cifs("/"), bool) fake_table = [("/scratch/tmp", "ext4"), ("/scratch", "cifs")] cifs_targets = [ ("/scratch/tmp/x/y", False), @@ -428,15 +341,79 @@ def test_cifs_check(): ("/", False), ] - orig_table = _cifs_table[:] - _cifs_table[:] = [] - - for target, _ in cifs_targets: - assert on_cifs(target) is False - - _cifs_table.extend(fake_table) - for target, expected in cifs_targets: - assert on_cifs(target) is expected - - _cifs_table[:] = [] - _cifs_table.extend(orig_table) + with MountIndentifier.patch_table([]): + for target, _ in cifs_targets: + assert MountIndentifier.on_cifs(target) is False + + with MountIndentifier.patch_table(fake_table): + for target, expected in cifs_targets: + assert MountIndentifier.on_cifs(target) is expected + + +def test_output_template(tmp_path): + filename = str(tmp_path / "file.txt") + with open(filename, "w") as f: + f.write("hello from pydra") + in_file = File(filename) + + my_input_spec = SpecInfo( + name="Input", + fields=[ + ( + "in_file", + attr.ib( + type=File, + metadata={ + "mandatory": True, + "position": 1, + "argstr": "", + "help_string": "input file", + }, + ), + ), + ( + "optional", + attr.ib( + type=ty.Union[Path, bool], + default=False, + metadata={ + "position": 2, + "argstr": "--opt", + "output_file_template": "{in_file}.out", + "help_string": "optional file output", + }, + ), + ), + ], + bases=(ShellSpec,), + ) + + class MyCommand(ShellCommandTask): + executable = "my" + input_spec = my_input_spec + + task = MyCommand(in_file=filename) + assert task.cmdline == f"my {filename}" + task.inputs.optional = True + assert task.cmdline == f"my {filename} --opt {task.output_dir / 'file.out'}" + task.inputs.optional = False + assert task.cmdline == f"my {filename}" + task.inputs.optional = "custom-file-out.txt" + assert task.cmdline == f"my {filename} --opt custom-file-out.txt" + + +def test_template_formatting(tmp_path): + field = Mock() + field.name = "grad" + field.argstr = "--grad" + field.metadata = {"output_file_template": ("{in_file}.bvec", "{in_file}.bval")} + inputs = Mock() + inputs_dict = {"in_file": "/a/b/c/file.txt", "grad": True} + + assert template_update_single( + field, + inputs, + inputs_dict_st=inputs_dict, + output_dir=tmp_path, + spec_type="input", + ) == [str(tmp_path / "file.bvec"), str(tmp_path / "file.bval")] diff --git a/pydra/engine/tests/test_nipype1_convert.py b/pydra/engine/tests/test_nipype1_convert.py index fd60f30da4..8408fddb6c 100644 --- a/pydra/engine/tests/test_nipype1_convert.py +++ b/pydra/engine/tests/test_nipype1_convert.py @@ -1,15 +1,9 @@ -import attr import typing as ty -import os, sys import pytest -from pathlib import Path from ..task import ShellCommandTask -from ..submitter import Submitter -from ..core import Workflow from ..specs import ShellOutSpec, ShellSpec, SpecInfo, File -from .utils import result_no_submitter, result_submitter, use_validator interf_input_spec = SpecInfo( name="Input", fields=[("test", ty.Any, {"help_string": "test"})], bases=(ShellSpec,) @@ -125,4 +119,4 @@ def test_interface_run_1(): task = TouchInterf(new_file="hello.txt") assert task.cmdline == "touch hello.txt" res = task() - assert res.output.new_file.exists() + assert res.output.new_file.fspath.exists() diff --git a/pydra/engine/tests/test_node_task.py b/pydra/engine/tests/test_node_task.py index 95ba9c7eac..bceaf97402 100644 --- a/pydra/engine/tests/test_node_task.py +++ b/pydra/engine/tests/test_node_task.py @@ -1,8 +1,15 @@ import os import shutil import attr +import typing as ty import numpy as np +import time +from unittest import mock +from pathlib import Path import pytest +import time +from fileformats.generic import File +import pydra.mark from .utils import ( fun_addtwo, @@ -18,6 +25,7 @@ ) from ..core import TaskBase +from ..specs import StateArray from ..submitter import Submitter @@ -69,7 +77,7 @@ def test_task_init_3( if input_type == "array": a_in = np.array(a_in) - nn = fun_addtwo(name="NA", a=a_in).split(splitter=splitter) + nn = fun_addtwo(name="NA").split(splitter=splitter, a=a_in) assert np.allclose(nn.inputs.a, [3, 5]) assert nn.state.splitter == state_splitter @@ -119,7 +127,7 @@ def test_task_init_3a( a_in, b_in = np.array(a_in), np.array(b_in) elif input_type == "mixed": a_in = np.array(a_in) - nn = fun_addvar(name="NA", a=a_in, b=b_in).split(splitter=splitter) + nn = fun_addvar(name="NA").split(splitter=splitter, a=a_in, b=b_in) assert np.allclose(nn.inputs.a, [3, 5]) assert np.allclose(nn.inputs.b, [10, 20]) @@ -132,21 +140,7 @@ def test_task_init_3a( def test_task_init_4(): - """task with interface and inputs. splitter set using split method""" - nn = fun_addtwo(name="NA", a=[3, 5]) - nn.split(splitter="a") - assert np.allclose(nn.inputs.a, [3, 5]) - - assert nn.state.splitter == "NA.a" - assert nn.state.splitter_rpn == ["NA.a"] - - nn.state.prepare_states(nn.inputs) - assert nn.state.states_ind == [{"NA.a": 0}, {"NA.a": 1}] - assert nn.state.states_val == [{"NA.a": 3}, {"NA.a": 5}] - - -def test_task_init_4a(): - """task with a splitter and inputs set in the split method""" + """task with interface splitter and inputs set in the split method""" nn = fun_addtwo(name="NA") nn.split(splitter="a", a=[3, 5]) assert np.allclose(nn.inputs.a, [3, 5]) @@ -162,8 +156,8 @@ def test_task_init_4a(): def test_task_init_4b(): """updating splitter using overwrite=True""" nn = fun_addtwo(name="NA") - nn.split(splitter="b", a=[3, 5]) - nn.split(splitter="a", overwrite=True) + nn.split(splitter="a", a=[1, 2]) + nn.split(splitter="a", a=[3, 5], overwrite=True) assert np.allclose(nn.inputs.a, [3, 5]) assert nn.state.splitter == "NA.a" @@ -176,9 +170,9 @@ def test_task_init_4b(): def test_task_init_4c(): """trying to set splitter twice without using overwrite""" - nn = fun_addtwo(name="NA").split(splitter="b", a=[3, 5]) + nn = fun_addvar(name="NA").split(splitter="b", b=[1, 2]) with pytest.raises(Exception) as excinfo: - nn.split(splitter="a") + nn.split(splitter="a", a=[3, 5]) assert "splitter has been already set" in str(excinfo.value) assert nn.state.splitter == "NA.b" @@ -189,7 +183,7 @@ def test_task_init_4d(): if the splitter is the same, the exception shouldn't be raised """ nn = fun_addtwo(name="NA").split(splitter="a", a=[3, 5]) - nn.split(splitter="a") + nn.split(splitter="a", a=[3, 5]) assert nn.state.splitter == "NA.a" @@ -293,8 +287,8 @@ def test_task_init_5c(): def test_task_init_6(): """task with splitter, but the input is an empty list""" - nn = fun_addtwo(name="NA", a=[]) - nn.split(splitter="a") + nn = fun_addtwo(name="NA") + nn.split(splitter="a", a=[]) assert nn.inputs.a == [] assert nn.state.splitter == "NA.a" @@ -305,13 +299,13 @@ def test_task_init_6(): assert nn.state.states_val == [] -def test_task_init_7(tmpdir): +def test_task_init_7(tmp_path): """task with a dictionary of files as an input, checking checksum""" - file1 = tmpdir.join("file1.txt") + file1 = tmp_path / "file1.txt" with open(file1, "w") as f: f.write("hello") - file2 = tmpdir.join("file2.txt") + file2 = tmp_path / "file2.txt" with open(file2, "w") as f: f.write("from pydra\n") @@ -319,7 +313,8 @@ def test_task_init_7(tmpdir): output_dir1 = nn1.output_dir # changing the content of the file - file2 = tmpdir.join("file2.txt") + time.sleep(2) # need the mtime to be different + file2 = tmp_path / "file2.txt" with open(file2, "w") as f: f.write("from pydra") @@ -366,10 +361,10 @@ def test_odir_init(): @pytest.mark.flaky(reruns=2) # when dask -def test_task_nostate_1(plugin_dask_opt, tmpdir): +def test_task_nostate_1(plugin_dask_opt, tmp_path): """task without splitter""" nn = fun_addtwo(name="NA", a=3) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -407,10 +402,10 @@ def test_task_nostate_1_call(): @pytest.mark.flaky(reruns=2) # when dask -def test_task_nostate_1_call_subm(plugin_dask_opt, tmpdir): +def test_task_nostate_1_call_subm(plugin_dask_opt, tmp_path): """task without splitter""" nn = fun_addtwo(name="NA", a=3) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -425,10 +420,10 @@ def test_task_nostate_1_call_subm(plugin_dask_opt, tmpdir): @pytest.mark.flaky(reruns=2) # when dask -def test_task_nostate_1_call_plug(plugin_dask_opt, tmpdir): +def test_task_nostate_1_call_plug(plugin_dask_opt, tmp_path): """task without splitter""" nn = fun_addtwo(name="NA", a=3) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -454,10 +449,10 @@ def test_task_nostate_1_call_updateinp(): assert nn.output_dir.exists() -def test_task_nostate_2(plugin, tmpdir): +def test_task_nostate_2(plugin, tmp_path): """task with a list as an input, but no splitter""" nn = moment(name="NA", n=3, lst=[2, 3, 4]) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [3]) assert np.allclose(nn.inputs.lst, [2, 3, 4]) assert nn.state is None @@ -472,10 +467,10 @@ def test_task_nostate_2(plugin, tmpdir): assert nn.output_dir.exists() -def test_task_nostate_3(plugin, tmpdir): +def test_task_nostate_3(plugin, tmp_path): """task with a dictionary as an input""" nn = fun_dict(name="NA", d={"a": "ala", "b": "bala"}) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert nn.inputs.d == {"a": "ala", "b": "bala"} with Submitter(plugin=plugin) as sub: @@ -488,14 +483,14 @@ def test_task_nostate_3(plugin, tmpdir): assert nn.output_dir.exists() -def test_task_nostate_4(plugin, tmpdir): +def test_task_nostate_4(plugin, tmp_path): """task with a dictionary as an input""" - file1 = tmpdir.join("file.txt") + file1 = tmp_path / "file.txt" with open(file1, "w") as f: f.write("hello from pydra\n") nn = fun_file(name="NA", filename=file1) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path with Submitter(plugin) as sub: sub(nn) @@ -507,13 +502,13 @@ def test_task_nostate_4(plugin, tmpdir): assert nn.output_dir.exists() -def test_task_nostate_5(tmpdir): +def test_task_nostate_5(tmp_path): """task with a dictionary of files as an input""" - file1 = tmpdir.join("file1.txt") + file1 = tmp_path / "file1.txt" with open(file1, "w") as f: f.write("hello") - file2 = tmpdir.join("file2.txt") + file2 = tmp_path / "file2.txt" with open(file2, "w") as f: f.write("from pydra\n") @@ -557,9 +552,10 @@ def test_task_nostate_7(): @pytest.mark.flaky(reruns=2) # when dask -def test_task_nostate_cachedir(plugin_dask_opt, tmpdir): - """task with provided cache_dir using pytest tmpdir""" - cache_dir = tmpdir.mkdir("test_task_nostate") +def test_task_nostate_cachedir(plugin_dask_opt, tmp_path): + """task with provided cache_dir using pytest tmp_path""" + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) assert np.allclose(nn.inputs.a, [3]) assert nn.state is None @@ -573,11 +569,11 @@ def test_task_nostate_cachedir(plugin_dask_opt, tmpdir): @pytest.mark.flaky(reruns=2) # when dask -def test_task_nostate_cachedir_relativepath(tmpdir, plugin_dask_opt): +def test_task_nostate_cachedir_relativepath(tmp_path, plugin_dask_opt): """task with provided cache_dir as relative path""" - cwd = tmpdir.chdir() + os.chdir(tmp_path) cache_dir = "test_task_nostate" - tmpdir.mkdir(cache_dir) + (tmp_path / cache_dir).mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) assert np.allclose(nn.inputs.a, [3]) @@ -594,13 +590,15 @@ def test_task_nostate_cachedir_relativepath(tmpdir, plugin_dask_opt): @pytest.mark.flaky(reruns=2) # when dask -def test_task_nostate_cachelocations(plugin_dask_opt, tmpdir): +def test_task_nostate_cachelocations(plugin_dask_opt, tmp_path): """ Two identical tasks with provided cache_dir; the second task has cache_locations and should not recompute the results """ - cache_dir = tmpdir.mkdir("test_task_nostate") - cache_dir2 = tmpdir.mkdir("test_task_nostate2") + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) with Submitter(plugin=plugin_dask_opt) as sub: @@ -619,14 +617,16 @@ def test_task_nostate_cachelocations(plugin_dask_opt, tmpdir): assert not nn2.output_dir.exists() -def test_task_nostate_cachelocations_forcererun(plugin, tmpdir): +def test_task_nostate_cachelocations_forcererun(plugin, tmp_path): """ Two identical tasks with provided cache_dir; the second task has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir = tmpdir.mkdir("test_task_nostate") - cache_dir2 = tmpdir.mkdir("test_task_nostate2") + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) with Submitter(plugin=plugin) as sub: @@ -645,13 +645,15 @@ def test_task_nostate_cachelocations_forcererun(plugin, tmpdir): assert nn2.output_dir.exists() -def test_task_nostate_cachelocations_nosubmitter(tmpdir): +def test_task_nostate_cachelocations_nosubmitter(tmp_path): """ Two identical tasks (that are run without submitter!) with provided cache_dir; the second task has cache_locations and should not recompute the results """ - cache_dir = tmpdir.mkdir("test_task_nostate") - cache_dir2 = tmpdir.mkdir("test_task_nostate2") + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) nn() @@ -668,14 +670,16 @@ def test_task_nostate_cachelocations_nosubmitter(tmpdir): assert not nn2.output_dir.exists() -def test_task_nostate_cachelocations_nosubmitter_forcererun(tmpdir): +def test_task_nostate_cachelocations_nosubmitter_forcererun(tmp_path): """ Two identical tasks (that are run without submitter!) with provided cache_dir; the second task has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir = tmpdir.mkdir("test_task_nostate") - cache_dir2 = tmpdir.mkdir("test_task_nostate2") + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) nn() @@ -692,16 +696,19 @@ def test_task_nostate_cachelocations_nosubmitter_forcererun(tmpdir): assert nn2.output_dir.exists() -def test_task_nostate_cachelocations_updated(plugin, tmpdir): +def test_task_nostate_cachelocations_updated(plugin, tmp_path): """ Two identical tasks with provided cache_dir; the second task has cache_locations in init, that is later overwritten in Submitter.__call__; the cache_locations passed to call doesn't exist so the second task should run again """ - cache_dir = tmpdir.mkdir("test_task_nostate") - cache_dir1 = tmpdir.mkdir("test_task_nostate1") - cache_dir2 = tmpdir.mkdir("test_task_nostate2") + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir1 = tmp_path / "test_task_nostate1" + cache_dir1.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir) with Submitter(plugin=plugin) as sub: @@ -726,14 +733,14 @@ def test_task_nostate_cachelocations_updated(plugin, tmpdir): @pytest.mark.flaky(reruns=2) # when dask @pytest.mark.parametrize("input_type", ["list", "array"]) -def test_task_state_1(plugin_dask_opt, input_type, tmpdir): +def test_task_state_1(plugin_dask_opt, input_type, tmp_path): """task with the simplest splitter""" a_in = [3, 5] if input_type == "array": a_in = np.array(a_in) nn = fun_addtwo(name="NA").split(splitter="a", a=a_in) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] @@ -769,12 +776,12 @@ def test_task_state_1(plugin_dask_opt, input_type, tmpdir): assert odir.exists() -def test_task_state_1a(plugin, tmpdir): +def test_task_state_1a(plugin, tmp_path): """task with the simplest splitter (inputs set separately)""" nn = fun_addtwo(name="NA") - nn.split(splitter="a") - nn.inputs.a = [3, 5] - nn.cache_dir = tmpdir + nn.split(splitter="a", a=[1, 2]) + nn.inputs.a = StateArray([3, 5]) + nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] @@ -790,12 +797,12 @@ def test_task_state_1a(plugin, tmpdir): assert results[i].output.out == res[1] -def test_task_state_singl_1(plugin, tmpdir): +def test_task_state_singl_1(plugin, tmp_path): """Tasks with two inputs and a splitter (no combiner) one input is a single value, the other is in the splitter and combiner """ nn = fun_addvar(name="NA").split(splitter="a", a=[3, 5], b=10) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert nn.inputs.a == [3, 5] assert nn.inputs.b == 10 @@ -856,7 +863,7 @@ def test_task_state_2( expected, expected_ind, input_type, - tmpdir, + tmp_path, ): """Tasks with two inputs and a splitter (no combiner)""" a_in, b_in = [3, 5], [10, 20] @@ -865,7 +872,7 @@ def test_task_state_2( elif input_type == "mixed": a_in = np.array(a_in) nn = fun_addvar(name="NA").split(splitter=splitter, a=a_in, b=b_in) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert (nn.inputs.a == np.array([3, 5])).all() assert (nn.inputs.b == np.array([10, 20])).all() @@ -902,10 +909,10 @@ def test_task_state_2( assert odir.exists() -def test_task_state_3(plugin, tmpdir): +def test_task_state_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" nn = fun_addtwo(name="NA").split(splitter="a", a=[]) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] @@ -924,13 +931,13 @@ def test_task_state_3(plugin, tmpdir): @pytest.mark.parametrize("input_type", ["list", "array"]) -def test_task_state_4(plugin, input_type, tmpdir): +def test_task_state_4(plugin, input_type, tmp_path): """task with a list as an input, and a simple splitter""" lst_in = [[2, 3, 4], [1, 2, 3]] if input_type == "array": - lst_in = np.array(lst_in) - nn = moment(name="NA", n=3, lst=lst_in).split(splitter="lst") - nn.cache_dir = tmpdir + lst_in = np.array(lst_in, dtype=int) + nn = moment(name="NA", n=3).split(splitter="lst", lst=lst_in) + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, 3) assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) @@ -944,8 +951,7 @@ def test_task_state_4(plugin, input_type, tmpdir): if input_type == "list": assert el_0 == [2, 3, 4] elif input_type == "array": - assert isinstance(el_0, np.ndarray) - assert (el_0 == [2, 3, 4]).all() + assert el_0 == [2, 3, 4] # checking the results results = nn.result() @@ -957,10 +963,10 @@ def test_task_state_4(plugin, input_type, tmpdir): assert odir.exists() -def test_task_state_4a(plugin, tmpdir): +def test_task_state_4a(plugin, tmp_path): """task with a tuple as an input, and a simple splitter""" - nn = moment(name="NA", n=3, lst=[(2, 3, 4), (1, 2, 3)]).split(splitter="lst") - nn.cache_dir = tmpdir + nn = moment(name="NA", n=3).split(splitter="lst", lst=[(2, 3, 4), (1, 2, 3)]) + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, 3) assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) @@ -979,12 +985,12 @@ def test_task_state_4a(plugin, tmpdir): assert odir.exists() -def test_task_state_5(plugin, tmpdir): +def test_task_state_5(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter""" - nn = moment(name="NA", n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]]).split( - splitter=("n", "lst") + nn = moment(name="NA").split( + splitter=("n", "lst"), n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]] ) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [1, 3]) assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) @@ -1003,14 +1009,14 @@ def test_task_state_5(plugin, tmpdir): assert odir.exists() -def test_task_state_5_exception(plugin, tmpdir): +def test_task_state_5_exception(plugin, tmp_path): """task with a list as an input, and the variable is part of the scalar splitter the shapes are not matching, so exception should be raised """ - nn = moment(name="NA", n=[1, 3, 3], lst=[[2, 3, 4], [1, 2, 3]]).split( - splitter=("n", "lst") + nn = moment(name="NA").split( + splitter=("n", "lst"), n=[1, 3, 3], lst=[[2, 3, 4], [1, 2, 3]] ) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [1, 3, 3]) assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) @@ -1022,12 +1028,12 @@ def test_task_state_5_exception(plugin, tmpdir): assert "shape" in str(excinfo.value) -def test_task_state_6(plugin, tmpdir): +def test_task_state_6(plugin, tmp_path): """ask with a list as an input, and the variable is part of the outer splitter""" - nn = moment(name="NA", n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]]).split( - splitter=["n", "lst"] + nn = moment(name="NA").split( + splitter=["n", "lst"], n=[1, 3], lst=[[2, 3, 4], [1, 2, 3]] ) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [1, 3]) assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) @@ -1046,12 +1052,12 @@ def test_task_state_6(plugin, tmpdir): assert odir.exists() -def test_task_state_6a(plugin, tmpdir): +def test_task_state_6a(plugin, tmp_path): """ask with a tuple as an input, and the variable is part of the outer splitter""" - nn = moment(name="NA", n=[1, 3], lst=[(2, 3, 4), (1, 2, 3)]).split( - splitter=["n", "lst"] + nn = moment(name="NA").split( + splitter=["n", "lst"], n=[1, 3], lst=[(2, 3, 4), (1, 2, 3)] ) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert np.allclose(nn.inputs.n, [1, 3]) assert np.allclose(nn.inputs.lst, [[2, 3, 4], [1, 2, 3]]) @@ -1071,10 +1077,10 @@ def test_task_state_6a(plugin, tmpdir): @pytest.mark.flaky(reruns=2) # when dask -def test_task_state_comb_1(plugin_dask_opt, tmpdir): +def test_task_state_comb_1(plugin_dask_opt, tmp_path): """task with the simplest splitter and combiner""" nn = fun_addtwo(name="NA").split(a=[3, 5], splitter="a").combine(combiner="a") - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert (nn.inputs.a == np.array([3, 5])).all() @@ -1206,7 +1212,7 @@ def test_task_state_comb_2( state_rpn_final, expected, expected_val, - tmpdir, + tmp_path, ): """Tasks with scalar and outer splitters and partial or full combiners""" nn = ( @@ -1214,7 +1220,7 @@ def test_task_state_comb_2( .split(a=[3, 5], b=[10, 20], splitter=splitter) .combine(combiner=combiner) ) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert (nn.inputs.a == np.array([3, 5])).all() @@ -1254,19 +1260,19 @@ def test_task_state_comb_2( assert odir.exists() -def test_task_state_comb_singl_1(plugin, tmpdir): +def test_task_state_comb_singl_1(plugin, tmp_path): """Tasks with two inputs; one input is a single value, the other is in the splitter and combiner """ nn = fun_addvar(name="NA").split(splitter="a", a=[3, 5], b=10).combine(combiner="a") - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert nn.inputs.a == [3, 5] assert nn.inputs.b == 10 assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] assert nn.state.combiner == ["NA.a"] - assert nn.state.splitter_final == None + assert nn.state.splitter_final is None assert nn.state.splitter_rpn_final == [] with Submitter(plugin=plugin) as sub: @@ -1284,10 +1290,10 @@ def test_task_state_comb_singl_1(plugin, tmpdir): assert odir.exists() -def test_task_state_comb_3(plugin, tmpdir): +def test_task_state_comb_3(plugin, tmp_path): """task with the simplest splitter, the input is an empty list""" nn = fun_addtwo(name="NA").split(splitter="a", a=[]).combine(combiner=["a"]) - nn.cache_dir = tmpdir + nn.cache_dir = tmp_path assert nn.state.splitter == "NA.a" assert nn.state.splitter_rpn == ["NA.a"] @@ -1364,68 +1370,80 @@ def test_task_state_comb_order(): # Testing with container dimensions for the input -def test_task_state_contdim_1(tmpdir): +def test_task_state_contdim_1(tmp_path): """task with a spliter and container dimension for one of the value""" task_4var = op_4var( name="op_4var", a="a1", + cache_dir=tmp_path, + ) + task_4var.split( + ("b", ["c", "d"]), b=[["b1", "b2"], ["b3", "b4"]], c=["c1", "c2"], d=["d1", "d2"], - cache_dir=tmpdir, + cont_dim={"b": 2}, ) - task_4var.split(("b", ["c", "d"]), cont_dim={"b": 2}) task_4var() res = task_4var.result() assert len(res) == 4 assert res[3].output.out == "a1 b4 c2 d2" -def test_task_state_contdim_2(tmpdir): +def test_task_state_contdim_2(tmp_path): """task with a splitter and container dimension for one of the value""" task_4var = op_4var( name="op_4var", + cache_dir=tmp_path, + ) + task_4var.split( + ["a", ("b", ["c", "d"])], + cont_dim={"b": 2}, a=["a1", "a2"], b=[["b1", "b2"], ["b3", "b4"]], c=["c1", "c2"], d=["d1", "d2"], - cache_dir=tmpdir, ) - task_4var.split(["a", ("b", ["c", "d"])], cont_dim={"b": 2}) task_4var() res = task_4var.result() assert len(res) == 8 assert res[7].output.out == "a2 b4 c2 d2" -def test_task_state_comb_contdim_1(tmpdir): +def test_task_state_comb_contdim_1(tmp_path): """task with a splitter-combiner, and container dimension for one of the value""" task_4var = op_4var( name="op_4var", a="a1", + cache_dir=tmp_path, + ) + task_4var.split( + ("b", ["c", "d"]), + cont_dim={"b": 2}, b=[["b1", "b2"], ["b3", "b4"]], c=["c1", "c2"], d=["d1", "d2"], - cache_dir=tmpdir, - ) - task_4var.split(("b", ["c", "d"]), cont_dim={"b": 2}).combine("b") + ).combine("b") task_4var() res = task_4var.result() assert len(res) == 4 assert res[3].output.out == "a1 b4 c2 d2" -def test_task_state_comb_contdim_2(tmpdir): +def test_task_state_comb_contdim_2(tmp_path): """task with a splitter-combiner, and container dimension for one of the value""" task_4var = op_4var( name="op_4var", + cache_dir=tmp_path, + ) + task_4var.split( + ["a", ("b", ["c", "d"])], a=["a1", "a2"], b=[["b1", "b2"], ["b3", "b4"]], c=["c1", "c2"], d=["d1", "d2"], - cache_dir=tmpdir, - ) - task_4var.split(["a", ("b", ["c", "d"])], cont_dim={"b": 2}).combine("a") + cont_dim={"b": 2}, + ).combine("a") task_4var() res = task_4var.result() assert len(res) == 4 @@ -1436,9 +1454,10 @@ def test_task_state_comb_contdim_2(tmpdir): @pytest.mark.flaky(reruns=2) # when dask -def test_task_state_cachedir(plugin_dask_opt, tmpdir): - """task with a state and provided cache_dir using pytest tmpdir""" - cache_dir = tmpdir.mkdir("test_task_nostate") +def test_task_state_cachedir(plugin_dask_opt, tmp_path): + """task with a state and provided cache_dir using pytest tmp_path""" + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() nn = fun_addtwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) assert nn.state.splitter == "NA.a" @@ -1454,13 +1473,15 @@ def test_task_state_cachedir(plugin_dask_opt, tmpdir): assert results[i].output.out == res[1] -def test_task_state_cachelocations(plugin, tmpdir): +def test_task_state_cachelocations(plugin, tmp_path): """ Two identical tasks with a state and cache_dir; the second task has cache_locations and should not recompute the results """ - cache_dir = tmpdir.mkdir("test_task_nostate") - cache_dir2 = tmpdir.mkdir("test_task_nostate2") + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) with Submitter(plugin=plugin) as sub: @@ -1482,14 +1503,16 @@ def test_task_state_cachelocations(plugin, tmpdir): assert not any([dir.exists() for dir in nn2.output_dir]) -def test_task_state_cachelocations_forcererun(plugin, tmpdir): +def test_task_state_cachelocations_forcererun(plugin, tmp_path): """ Two identical tasks with a state and cache_dir; the second task has cache_locations, but submitter is called with rerun=True, so should recompute """ - cache_dir = tmpdir.mkdir("test_task_nostate") - cache_dir2 = tmpdir.mkdir("test_task_nostate2") + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() nn = fun_addtwo(name="NA", a=3, cache_dir=cache_dir).split(splitter="a", a=[3, 5]) with Submitter(plugin=plugin) as sub: @@ -1512,16 +1535,19 @@ def test_task_state_cachelocations_forcererun(plugin, tmpdir): assert all([dir.exists() for dir in nn2.output_dir]) -def test_task_state_cachelocations_updated(plugin, tmpdir): +def test_task_state_cachelocations_updated(plugin, tmp_path): """ Two identical tasks with states and cache_dir; the second task has cache_locations in init, that is later overwritten in Submitter.__call__; the cache_locations from call doesn't exist so the second task should run again """ - cache_dir = tmpdir.mkdir("test_task_nostate") - cache_dir1 = tmpdir.mkdir("test_task_nostate1") - cache_dir2 = tmpdir.mkdir("test_task_nostate2") + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir1 = tmp_path / "test_task_nostate1" + cache_dir1.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() nn = fun_addtwo(name="NA", cache_dir=cache_dir).split(splitter="a", a=[3, 5]) with Submitter(plugin=plugin) as sub: @@ -1542,3 +1568,98 @@ def test_task_state_cachelocations_updated(plugin, tmpdir): # both workflows should be run assert all([dir.exists() for dir in nn.output_dir]) assert all([dir.exists() for dir in nn2.output_dir]) + + +def test_task_files_cachelocations(plugin_dask_opt, tmp_path): + """ + Two identical tasks with provided cache_dir that use file as an input; + the second task has cache_locations and should not recompute the results + """ + cache_dir = tmp_path / "test_task_nostate" + cache_dir.mkdir() + cache_dir2 = tmp_path / "test_task_nostate2" + cache_dir2.mkdir() + input_dir = tmp_path / "input" + input_dir.mkdir() + + input1 = input_dir / "input1.txt" + input1.write_text("test") + input2 = input_dir / "input2.txt" + input2.write_text("test") + + nn = fun_file(name="NA", filename=input1, cache_dir=cache_dir) + with Submitter(plugin=plugin_dask_opt) as sub: + sub(nn) + + nn2 = fun_file( + name="NA", filename=input2, cache_dir=cache_dir2, cache_locations=cache_dir + ) + with Submitter(plugin=plugin_dask_opt) as sub: + sub(nn2) + + # checking the results + results2 = nn2.result() + assert results2.output.out == "test" + + # checking if the second task didn't run the interface again + assert nn.output_dir.exists() + assert not nn2.output_dir.exists() + + +class OverriddenContentsFile(File): + """A class for testing purposes, to that enables you to override the contents + of the file to allow you to check whether the persistent cache is used.""" + + def __init__( + self, + fspaths: ty.Iterator[Path], + contents: ty.Optional[bytes] = None, + metadata: ty.Dict[str, ty.Any] = None, + ): + super().__init__(fspaths, metadata=metadata) + self._contents = contents + + def byte_chunks(self, **kwargs) -> ty.Generator[ty.Tuple[str, bytes], None, None]: + if self._contents is not None: + yield (str(self.fspath), iter([self._contents])) + else: + yield from super().byte_chunks(**kwargs) + + @property + def contents(self): + if self._contents is not None: + return self._contents + return super().contents + + +def test_task_files_persistentcache(tmp_path): + """ + Two identical tasks with provided cache_dir that use file as an input; + the second task has cache_locations and should not recompute the results + """ + test_file_path = tmp_path / "test_file.txt" + test_file_path.write_bytes(b"foo") + cache_dir = tmp_path / "cache-dir" + cache_dir.mkdir() + test_file = OverriddenContentsFile(test_file_path) + + @pydra.mark.task + def read_contents(x: OverriddenContentsFile) -> bytes: + return x.contents + + assert ( + read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out + == b"foo" + ) + test_file._contents = b"bar" + # should return result from the first run using the persistent cache + assert ( + read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out + == b"foo" + ) + time.sleep(2) # Windows has a 2-second resolution for mtime + test_file_path.touch() # update the mtime to invalidate the persistent cache value + assert ( + read_contents(x=test_file, cache_dir=cache_dir)(plugin="serial").output.out + == b"bar" + ) # returns the overridden value diff --git a/pydra/engine/tests/test_numpy_examples.py b/pydra/engine/tests/test_numpy_examples.py index 59e9629729..defdad7a2b 100644 --- a/pydra/engine/tests/test_numpy_examples.py +++ b/pydra/engine/tests/test_numpy_examples.py @@ -1,14 +1,16 @@ -import numpy as np import typing as ty import importlib -import pytest +from pathlib import Path import pickle as pk +import numpy as np +import pytest + from ..submitter import Submitter from ..core import Workflow from ...mark import task, annotate from .utils import identity -from ..helpers import hash_value +from ...utils.hash import hash_function, Cache if importlib.util.find_spec("numpy") is None: pytest.skip("can't find numpy library", allow_module_level=True) @@ -40,8 +42,8 @@ def test_multiout(tmpdir): def test_multiout_st(tmpdir): """testing a simple function that returns a numpy array, adding splitter""" wf = Workflow("wf", input_spec=["val"], val=[0, 1, 2]) - wf.add(arrayout(name="mo", val=wf.lzin.val)) - wf.mo.split("val").combine("val") + wf.add(arrayout(name="mo")) + wf.mo.split("val", val=wf.lzin.val).combine("val") wf.set_output([("array", wf.mo.lzout.b)]) wf.cache_dir = tmpdir @@ -61,7 +63,7 @@ def test_numpy_hash_1(): A = np.array([1, 2]) A_pk = pk.loads(pk.dumps(A)) assert (A == A_pk).all() - assert hash_value(A) == hash_value(A_pk) + assert hash_function(A) == hash_function(A_pk) def test_numpy_hash_2(): @@ -69,28 +71,32 @@ def test_numpy_hash_2(): A = np.array([["NDAR"]], dtype=object) A_pk = pk.loads(pk.dumps(A)) assert (A == A_pk).all() - assert hash_value(A) == hash_value(A_pk) + assert hash_function(A) == hash_function(A_pk) + + +def test_numpy_hash_3(): + """hashing check for numeric numpy array""" + A = np.array([1, 2]) + B = np.array([3, 4]) + assert hash_function(A) != hash_function(B) -def test_task_numpyinput_1(tmpdir): +def test_task_numpyinput_1(tmp_path: Path): """task with numeric numpy array as an input""" - nn = identity(name="NA", x=[np.array([1, 2]), np.array([3, 4])]) - nn.cache_dir = tmpdir - nn.split("x") + nn = identity(name="NA") + nn.cache_dir = tmp_path + nn.split(x=[np.array([1, 2]), np.array([3, 4])]) # checking the results results = nn() assert (results[0].output.out == np.array([1, 2])).all() assert (results[1].output.out == np.array([3, 4])).all() -def test_task_numpyinput_2(tmpdir): +def test_task_numpyinput_2(tmp_path: Path): """task with numpy array of type object as an input""" - nn = identity( - name="NA", - x=[np.array(["VAL1"], dtype=object), np.array(["VAL2"], dtype=object)], - ) - nn.cache_dir = tmpdir - nn.split("x") + nn = identity(name="NA") + nn.cache_dir = tmp_path + nn.split(x=[np.array(["VAL1"], dtype=object), np.array(["VAL2"], dtype=object)]) # checking the results results = nn() assert (results[0].output.out == np.array(["VAL1"], dtype=object)).all() diff --git a/pydra/engine/tests/test_shelltask.py b/pydra/engine/tests/test_shelltask.py index f77dd06484..4857db094f 100644 --- a/pydra/engine/tests/test_shelltask.py +++ b/pydra/engine/tests/test_shelltask.py @@ -1,6 +1,7 @@ import attr import typing as ty import os, sys +import subprocess as sp import pytest from pathlib import Path import re @@ -19,7 +20,7 @@ MultiOutputFile, MultiInputObj, ) -from .utils import result_no_submitter, result_submitter, use_validator, no_win +from .utils import result_no_submitter, result_submitter, no_win if sys.platform.startswith("win"): pytest.skip("SLURM not available in windows", allow_module_level=True) @@ -27,10 +28,10 @@ @pytest.mark.flaky(reruns=2) # when dask @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_1(plugin_dask_opt, results_function, tmpdir): +def test_shell_cmd_1(plugin_dask_opt, results_function, tmp_path): """simple command, no arguments""" cmd = ["pwd"] - shelly = ShellCommandTask(name="shelly", executable=cmd, cache_dir=tmpdir) + shelly = ShellCommandTask(name="shelly", executable=cmd, cache_dir=tmp_path) assert shelly.cmdline == " ".join(cmd) res = results_function(shelly, plugin=plugin_dask_opt) @@ -40,13 +41,13 @@ def test_shell_cmd_1(plugin_dask_opt, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_1_strip(plugin, results_function, tmpdir): +def test_shell_cmd_1_strip(plugin, results_function, tmp_path): """simple command, no arguments strip option to remove \n at the end os stdout """ cmd = ["pwd"] shelly = ShellCommandTask(name="shelly", executable=cmd, strip=True) - shelly.cache_dir = tmpdir + shelly.cache_dir = tmp_path assert shelly.cmdline == " ".join(cmd) res = results_function(shelly, plugin) @@ -56,11 +57,11 @@ def test_shell_cmd_1_strip(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_2(plugin, results_function, tmpdir): +def test_shell_cmd_2(plugin, results_function, tmp_path): """a command with arguments, cmd and args given as executable""" cmd = ["echo", "hail", "pydra"] shelly = ShellCommandTask(name="shelly", executable=cmd) - shelly.cache_dir = tmpdir + shelly.cache_dir = tmp_path assert shelly.cmdline == " ".join(cmd) res = results_function(shelly, plugin) @@ -70,13 +71,13 @@ def test_shell_cmd_2(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_2a(plugin, results_function, tmpdir): +def test_shell_cmd_2a(plugin, results_function, tmp_path): """a command with arguments, using executable and args""" cmd_exec = "echo" cmd_args = ["hail", "pydra"] # separate command into exec + args shelly = ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args) - shelly.cache_dir = tmpdir + shelly.cache_dir = tmp_path assert shelly.inputs.executable == "echo" assert shelly.cmdline == "echo " + " ".join(cmd_args) @@ -87,13 +88,13 @@ def test_shell_cmd_2a(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_2b(plugin, results_function, tmpdir): +def test_shell_cmd_2b(plugin, results_function, tmp_path): """a command with arguments, using strings executable and args""" cmd_exec = "echo" cmd_args = "pydra" # separate command into exec + args shelly = ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args) - shelly.cache_dir = tmpdir + shelly.cache_dir = tmp_path assert shelly.inputs.executable == "echo" assert shelly.cmdline == "echo pydra" @@ -107,15 +108,15 @@ def test_shell_cmd_2b(plugin, results_function, tmpdir): @pytest.mark.flaky(reruns=2) -def test_shell_cmd_3(plugin_dask_opt, tmpdir): +def test_shell_cmd_3(plugin_dask_opt, tmp_path): """commands without arguments splitter = executable """ cmd = ["pwd", "whoami"] # all args given as executable - shelly = ShellCommandTask(name="shelly", executable=cmd).split("executable") - shelly.cache_dir = tmpdir + shelly = ShellCommandTask(name="shelly").split("executable", executable=cmd) + shelly.cache_dir = tmp_path # assert shelly.cmdline == ["pwd", "whoami"] res = shelly(plugin=plugin_dask_opt) @@ -129,17 +130,17 @@ def test_shell_cmd_3(plugin_dask_opt, tmpdir): assert res[0].output.stderr == res[1].output.stderr == "" -def test_shell_cmd_4(plugin, tmpdir): +def test_shell_cmd_4(plugin, tmp_path): """a command with arguments, using executable and args splitter=args """ cmd_exec = "echo" cmd_args = ["nipype", "pydra"] # separate command into exec + args - shelly = ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args).split( - splitter="args" + shelly = ShellCommandTask(name="shelly", executable=cmd_exec).split( + splitter="args", args=cmd_args ) - shelly.cache_dir = tmpdir + shelly.cache_dir = tmp_path assert shelly.inputs.executable == "echo" assert shelly.inputs.args == ["nipype", "pydra"] @@ -153,7 +154,7 @@ def test_shell_cmd_4(plugin, tmpdir): assert res[0].output.stderr == res[1].output.stderr == "" -def test_shell_cmd_5(plugin, tmpdir): +def test_shell_cmd_5(plugin, tmp_path): """a command with arguments using splitter and combiner for args """ @@ -161,11 +162,11 @@ def test_shell_cmd_5(plugin, tmpdir): cmd_args = ["nipype", "pydra"] # separate command into exec + args shelly = ( - ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args) - .split(splitter="args") + ShellCommandTask(name="shelly", executable=cmd_exec) + .split(splitter="args", args=cmd_args) .combine("args") ) - shelly.cache_dir = tmpdir + shelly.cache_dir = tmp_path assert shelly.inputs.executable == "echo" assert shelly.inputs.args == ["nipype", "pydra"] @@ -176,17 +177,17 @@ def test_shell_cmd_5(plugin, tmpdir): assert res[1].output.stdout == "pydra\n" -def test_shell_cmd_6(plugin, tmpdir): +def test_shell_cmd_6(plugin, tmp_path): """a command with arguments, outer splitter for executable and args """ cmd_exec = ["echo", ["echo", "-n"]] cmd_args = ["nipype", "pydra"] # separate command into exec + args - shelly = ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args).split( - splitter=["executable", "args"] + shelly = ShellCommandTask(name="shelly").split( + splitter=["executable", "args"], executable=cmd_exec, args=cmd_args ) - shelly.cache_dir = tmpdir + shelly.cache_dir = tmp_path assert shelly.inputs.executable == ["echo", ["echo", "-n"]] assert shelly.inputs.args == ["nipype", "pydra"] @@ -219,7 +220,7 @@ def test_shell_cmd_6(plugin, tmpdir): ) -def test_shell_cmd_7(plugin, tmpdir): +def test_shell_cmd_7(plugin, tmp_path): """a command with arguments, outer splitter for executable and args, and combiner=args """ @@ -227,11 +228,11 @@ def test_shell_cmd_7(plugin, tmpdir): cmd_args = ["nipype", "pydra"] # separate command into exec + args shelly = ( - ShellCommandTask(name="shelly", executable=cmd_exec, args=cmd_args) - .split(splitter=["executable", "args"]) + ShellCommandTask(name="shelly") + .split(splitter=["executable", "args"], executable=cmd_exec, args=cmd_args) .combine("args") ) - shelly.cache_dir = tmpdir + shelly.cache_dir = tmp_path assert shelly.inputs.executable == ["echo", ["echo", "-n"]] assert shelly.inputs.args == ["nipype", "pydra"] @@ -248,7 +249,7 @@ def test_shell_cmd_7(plugin, tmpdir): # tests with workflows -def test_wf_shell_cmd_1(plugin, tmpdir): +def test_wf_shell_cmd_1(plugin, tmp_path): """a workflow with two connected commands""" wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"]) wf.inputs.cmd1 = "pwd" @@ -261,7 +262,7 @@ def test_wf_shell_cmd_1(plugin, tmpdir): ) wf.set_output([("out", wf.shelly_ls.lzout.stdout)]) - wf.cache_dir = tmpdir + wf.cache_dir = tmp_path with Submitter(plugin=plugin) as sub: wf(submitter=sub) @@ -275,7 +276,7 @@ def test_wf_shell_cmd_1(plugin, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_1(plugin, results_function, use_validator, tmpdir): +def test_shell_cmd_inputspec_1(plugin, results_function, tmp_path): """a command with executable, args and one command opt, using a customized input_spec to add the opt to the command in the right place that is specified in metadata["cmd_pos"] @@ -304,7 +305,7 @@ def test_shell_cmd_inputspec_1(plugin, results_function, use_validator, tmpdir): args=cmd_args, opt_n=cmd_opt, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) assert shelly.inputs.executable == cmd_exec assert shelly.inputs.args == cmd_args @@ -315,7 +316,7 @@ def test_shell_cmd_inputspec_1(plugin, results_function, use_validator, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_2(plugin, results_function, use_validator, tmpdir): +def test_shell_cmd_inputspec_2(plugin, results_function, tmp_path): """a command with executable, args and two command options, using a customized input_spec to add the opt to the command in the right place that is specified in metadata["cmd_pos"] @@ -353,7 +354,7 @@ def test_shell_cmd_inputspec_2(plugin, results_function, use_validator, tmpdir): opt_n=cmd_opt, opt_hello=cmd_opt_hello, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) assert shelly.inputs.executable == cmd_exec assert shelly.inputs.args == cmd_args @@ -363,7 +364,7 @@ def test_shell_cmd_inputspec_2(plugin, results_function, use_validator, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_3(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_3(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" hello = "HELLO" @@ -392,7 +393,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmpdir): executable=cmd_exec, text=hello, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) assert shelly.inputs.executable == cmd_exec assert shelly.cmdline == "echo HELLO" @@ -401,7 +402,7 @@ def test_shell_cmd_inputspec_3(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_3a(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_3a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided using shorter syntax for input spec (no attr.ib) """ @@ -425,7 +426,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmpdir): executable=cmd_exec, text=hello, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) assert shelly.inputs.executable == cmd_exec assert shelly.cmdline == "echo HELLO" @@ -434,7 +435,7 @@ def test_shell_cmd_inputspec_3a(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_3b(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_3b(plugin, results_function, tmp_path): """mandatory field added to fields, value provided after init""" cmd_exec = "echo" hello = "HELLO" @@ -459,7 +460,7 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmpdir): # separate command into exec + args shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmpdir + name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) shelly.inputs.text = hello @@ -469,7 +470,7 @@ def test_shell_cmd_inputspec_3b(plugin, results_function, tmpdir): assert res.output.stdout == "HELLO\n" -def test_shell_cmd_inputspec_3c_exception(plugin, tmpdir): +def test_shell_cmd_inputspec_3c_exception(plugin, tmp_path): """mandatory field added to fields, value is not provided, so exception is raised""" cmd_exec = "echo" my_input_spec = SpecInfo( @@ -492,7 +493,7 @@ def test_shell_cmd_inputspec_3c_exception(plugin, tmpdir): ) shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmpdir + name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) with pytest.raises(Exception) as excinfo: @@ -501,7 +502,7 @@ def test_shell_cmd_inputspec_3c_exception(plugin, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_3c(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_3c(plugin, results_function, tmp_path): """mandatory=False, so tasks runs fine even without the value""" cmd_exec = "echo" my_input_spec = SpecInfo( @@ -510,7 +511,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmpdir): ( "text", attr.ib( - type=str, + type=ty.Optional[str], default=None, metadata={ "position": 1, @@ -526,7 +527,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmpdir): # separate command into exec + args shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmpdir + name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) assert shelly.inputs.executable == cmd_exec @@ -536,7 +537,7 @@ def test_shell_cmd_inputspec_3c(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_4(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_4(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" my_input_spec = SpecInfo( @@ -556,7 +557,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmpdir): # separate command into exec + args shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmpdir + name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) assert shelly.inputs.executable == cmd_exec @@ -567,7 +568,7 @@ def test_shell_cmd_inputspec_4(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_4a(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_4a(plugin, results_function, tmp_path): """mandatory field added to fields, value provided using shorter syntax for input spec (no attr.ib) """ @@ -582,7 +583,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmpdir): # separate command into exec + args shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmpdir + name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) assert shelly.inputs.executable == cmd_exec @@ -593,7 +594,7 @@ def test_shell_cmd_inputspec_4a(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_4b(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_4b(plugin, results_function, tmp_path): """mandatory field added to fields, value provided""" cmd_exec = "echo" my_input_spec = SpecInfo( @@ -613,7 +614,7 @@ def test_shell_cmd_inputspec_4b(plugin, results_function, tmpdir): # separate command into exec + args shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmpdir + name="shelly", executable=cmd_exec, input_spec=my_input_spec, cache_dir=tmp_path ) assert shelly.inputs.executable == cmd_exec @@ -647,14 +648,10 @@ def test_shell_cmd_inputspec_4c_exception(plugin): ) # separate command into exec + args - with pytest.raises(Exception) as excinfo: - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec - ) - assert ( - str(excinfo.value) - == "default value should not be set when the field is mandatory" - ) + with pytest.raises( + Exception, match=r"default value \('Hello'\) should not be set when the field" + ): + ShellCommandTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) def test_shell_cmd_inputspec_4d_exception(plugin): @@ -681,18 +678,14 @@ def test_shell_cmd_inputspec_4d_exception(plugin): ) # separate command into exec + args - with pytest.raises(Exception) as excinfo: - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, input_spec=my_input_spec - ) - assert ( - str(excinfo.value) - == "default value should not be set together with output_file_template" - ) + with pytest.raises( + Exception, match=r"default value \('Hello'\) should not be set together" + ) as excinfo: + ShellCommandTask(name="shelly", executable=cmd_exec, input_spec=my_input_spec) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmp_path): """checking xor in metadata: task should work fine, since only one option is True""" cmd_exec = "ls" cmd_t = True @@ -733,14 +726,14 @@ def test_shell_cmd_inputspec_5_nosubm(plugin, results_function, tmpdir): executable=cmd_exec, opt_t=cmd_t, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) assert shelly.inputs.executable == cmd_exec assert shelly.cmdline == "ls -t" - res = results_function(shelly, plugin) + results_function(shelly, plugin) -def test_shell_cmd_inputspec_5a_exception(plugin, tmpdir): +def test_shell_cmd_inputspec_5a_exception(plugin, tmp_path): """checking xor in metadata: both options are True, so the task raises exception""" cmd_exec = "ls" cmd_t = True @@ -782,7 +775,7 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmpdir): opt_t=cmd_t, opt_S=cmd_S, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) with pytest.raises(Exception) as excinfo: shelly() @@ -790,7 +783,7 @@ def test_shell_cmd_inputspec_5a_exception(plugin, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_6(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_6(plugin, results_function, tmp_path): """checking requires in metadata: the required field is set in the init, so the task works fine """ @@ -830,11 +823,11 @@ def test_shell_cmd_inputspec_6(plugin, results_function, tmpdir): opt_t=cmd_t, opt_l=cmd_l, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) assert shelly.inputs.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - res = results_function(shelly, plugin) + results_function(shelly, plugin) def test_shell_cmd_inputspec_6a_exception(plugin): @@ -878,7 +871,7 @@ def test_shell_cmd_inputspec_6a_exception(plugin): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_6b(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_6b(plugin, results_function, tmp_path): """checking requires in metadata: the required field set after the init """ @@ -918,16 +911,16 @@ def test_shell_cmd_inputspec_6b(plugin, results_function, tmpdir): opt_t=cmd_t, # opt_l=cmd_l, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) shelly.inputs.opt_l = cmd_l assert shelly.inputs.executable == cmd_exec assert shelly.cmdline == "ls -l -t" - res = results_function(shelly, plugin) + results_function(shelly, plugin) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_7(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_7(plugin, results_function, tmp_path): """ providing output name using input_spec, using name_tamplate in metadata @@ -957,19 +950,20 @@ def test_shell_cmd_inputspec_7(plugin, results_function, tmpdir): executable=cmd, args=args, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out1.exists() + out1 = res.output.out1.fspath + assert out1.exists() # checking if the file is created in a good place - assert shelly.output_dir == res.output.out1.parent - assert res.output.out1.name == "newfile_tmp.txt" + assert shelly.output_dir == out1.parent + assert out1.name == "newfile_tmp.txt" @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_7a(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_7a(plugin, results_function, tmp_path): """ providing output name using input_spec, using name_tamplate in metadata @@ -1001,19 +995,18 @@ def test_shell_cmd_inputspec_7a(plugin, results_function, tmpdir): executable=cmd, args=args, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out1_changed.exists() # checking if the file is created in a good place - assert shelly.output_dir == res.output.out1_changed.parent - assert res.output.out1_changed.name == "newfile_tmp.txt" + assert shelly.output_dir == res.output.out1_changed.fspath.parent + assert res.output.out1_changed.fspath.name == "newfile_tmp.txt" @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_7b(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_7b(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, using name_template in metadata @@ -1049,16 +1042,16 @@ def test_shell_cmd_inputspec_7b(plugin, results_function, tmpdir): executable=cmd, newfile="newfile_tmp.txt", input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out1.exists() + assert res.output.out1.fspath.exists() @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_7c(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_7c(plugin, results_function, tmp_path): """ providing output name using input_spec, using name_tamplate with txt extension (extension from args should be removed @@ -1088,19 +1081,18 @@ def test_shell_cmd_inputspec_7c(plugin, results_function, tmpdir): executable=cmd, args=args, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out1.exists() # checking if the file is created in a good place - assert shelly.output_dir == res.output.out1.parent - assert res.output.out1.name == "newfile_tmp.txt" + assert shelly.output_dir == res.output.out1.fspath.parent + assert res.output.out1.fspath.name == "newfile_tmp.txt" @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_8(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_8(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, adding additional string input field with argstr @@ -1148,16 +1140,16 @@ def test_shell_cmd_inputspec_8(plugin, results_function, tmpdir): newfile="newfile_tmp.txt", time="02121010", input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out1.exists() + assert res.output.out1.fspath.exists() @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_8a(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_8a(plugin, results_function, tmp_path): """ providing new file and output name using input_spec, adding additional string input field with argstr (argstr uses string formatting) @@ -1205,23 +1197,25 @@ def test_shell_cmd_inputspec_8a(plugin, results_function, tmpdir): newfile="newfile_tmp.txt", time="02121010", input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out1.exists() + assert res.output.out1.fspath.exists() @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_9(tmpdir, plugin, results_function): +def test_shell_cmd_inputspec_9(tmp_path, plugin, results_function): """ providing output name using input_spec (output_file_template in metadata), the template has a suffix, the extension of the file will be moved to the end """ cmd = "cp" - file = tmpdir.mkdir("data_inp").join("file.txt") - file.write("content\n") + ddir = tmp_path / "data_inp" + ddir.mkdir() + file = ddir / ("file.txt") + file.write_text("content\n") my_input_spec = SpecInfo( name="Input", @@ -1253,27 +1247,28 @@ def test_shell_cmd_inputspec_9(tmpdir, plugin, results_function): executable=cmd, input_spec=my_input_spec, file_orig=file, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.file_copy.exists() - assert res.output.file_copy.name == "file_copy.txt" + assert res.output.file_copy.fspath.exists() + assert res.output.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.parent + assert shelly.output_dir == res.output.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [result_no_submitter]) -def test_shell_cmd_inputspec_9a(tmpdir, plugin, results_function): +def test_shell_cmd_inputspec_9a(tmp_path, plugin, results_function): """ providing output name using input_spec (output_file_template in metadata), the template has a suffix, the extension of the file will be moved to the end the change: input file has directory with a dot """ cmd = "cp" - file = tmpdir.mkdir("data.inp").join("file.txt") - file.write("content\n") + file = tmp_path / "data.inp" / "file.txt" + file.parent.mkdir() + file.write_text("content\n") my_input_spec = SpecInfo( name="Input", @@ -1306,21 +1301,21 @@ def test_shell_cmd_inputspec_9a(tmpdir, plugin, results_function): res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.file_copy.exists() - assert res.output.file_copy.name == "file_copy.txt" + assert res.output.file_copy.fspath.exists() + assert res.output.file_copy.fspath.name == "file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.parent + assert shelly.output_dir == res.output.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_9b(tmpdir, plugin, results_function): +def test_shell_cmd_inputspec_9b(tmp_path, plugin, results_function): """ providing output name using input_spec (output_file_template in metadata) and the keep_extension is set to False, so the extension is removed completely. """ cmd = "cp" - file = tmpdir.join("file.txt") - file.write("content\n") + file = tmp_path / "file.txt" + file.write_text("content\n") my_input_spec = SpecInfo( name="Input", @@ -1353,25 +1348,25 @@ def test_shell_cmd_inputspec_9b(tmpdir, plugin, results_function): executable=cmd, input_spec=my_input_spec, file_orig=file, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.file_copy.exists() - assert res.output.file_copy.name == "file_copy" + assert res.output.file_copy.fspath.exists() + assert res.output.file_copy.fspath.name == "file_copy" @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_9c(tmpdir, plugin, results_function): +def test_shell_cmd_inputspec_9c(tmp_path, plugin, results_function): """ providing output name using input_spec (output_file_template in metadata) and the keep_extension is set to False, so the extension is removed completely, no suffix in the template. """ cmd = "cp" - file = tmpdir.join("file.txt") - file.write("content\n") + file = tmp_path / "file.txt" + file.write_text("content\n") my_input_spec = SpecInfo( name="Input", @@ -1404,25 +1399,27 @@ def test_shell_cmd_inputspec_9c(tmpdir, plugin, results_function): executable=cmd, input_spec=my_input_spec, file_orig=file, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.file_copy.exists() - assert res.output.file_copy.name == "file" - assert res.output.file_copy.parent == shelly.output_dir + assert res.output.file_copy.fspath.exists() + assert res.output.file_copy.fspath.name == "file" + assert res.output.file_copy.fspath.parent == shelly.output_dir @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_9d(tmpdir, plugin, results_function): +def test_shell_cmd_inputspec_9d(tmp_path, plugin, results_function): """ providing output name explicitly by manually setting value in input_spec (instead of using default provided byoutput_file_template in metadata) """ cmd = "cp" - file = tmpdir.mkdir("data_inp").join("file.txt") - file.write("content\n") + ddir = tmp_path / "data_inp" + ddir.mkdir() + file = ddir / ("file.txt") + file.write_text("content\n") my_input_spec = SpecInfo( name="Input", @@ -1455,23 +1452,23 @@ def test_shell_cmd_inputspec_9d(tmpdir, plugin, results_function): input_spec=my_input_spec, file_orig=file, file_copy="my_file_copy.txt", - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.file_copy.exists() - assert res.output.file_copy.name == "my_file_copy.txt" + assert res.output.file_copy.fspath.exists() + assert res.output.file_copy.fspath.name == "my_file_copy.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.parent + assert shelly.output_dir == res.output.file_copy.fspath.parent @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_10(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_10(plugin, results_function, tmp_path): """using input_spec, providing list of files as an input""" - file_1 = tmpdir.join("file_1.txt") - file_2 = tmpdir.join("file_2.txt") + file_1 = tmp_path / "file_1.txt" + file_2 = tmp_path / "file_2.txt" with open(file_1, "w") as f: f.write("hello ") with open(file_2, "w") as f: @@ -1505,7 +1502,7 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmpdir): executable=cmd_exec, files=files_list, input_spec=my_input_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) assert shelly.inputs.executable == cmd_exec @@ -1513,15 +1510,15 @@ def test_shell_cmd_inputspec_10(plugin, results_function, tmpdir): assert res.output.stdout == "hello from boston" -def test_shell_cmd_inputspec_10_err(tmpdir): +def test_shell_cmd_inputspec_10_err(tmp_path): """checking if the proper error is raised when broken symlink is provided as a input field with File as a type """ - file_1 = tmpdir.join("file_1.txt") + file_1 = tmp_path / "file_1.txt" with open(file_1, "w") as f: f.write("hello") - file_2 = tmpdir.join("file_2.txt") + file_2 = tmp_path / "file_2.txt" # creating symlink and removing the original file os.symlink(file_1, file_2) @@ -1548,21 +1545,18 @@ def test_shell_cmd_inputspec_10_err(tmpdir): bases=(ShellSpec,), ) - shelly = ShellCommandTask( - name="shelly", executable=cmd_exec, files=file_2, input_spec=my_input_spec - ) - shelly.cache_dir = tmpdir - with pytest.raises(FileNotFoundError): - res = shelly() + shelly = ShellCommandTask( + name="shelly", executable=cmd_exec, files=file_2, input_spec=my_input_spec + ) -def test_shell_cmd_inputsspec_11(): +def test_shell_cmd_inputspec_11(tmp_path): input_fields = [ ( "inputFiles", attr.ib( - type=MultiInputFile, + type=MultiInputObj[str], metadata={ "argstr": "...", "help_string": "The list of input image files to be segmented.", @@ -1593,6 +1587,7 @@ def test_shell_cmd_inputsspec_11(): input_spec=input_spec, output_spec=output_spec, ) + wf = Workflow(name="wf", input_spec=["inputFiles"], inputFiles=["test1", "test2"]) task.inputs.inputFiles = wf.lzin.inputFiles @@ -1600,24 +1595,29 @@ def test_shell_cmd_inputsspec_11(): wf.add(task) wf.set_output([("out", wf.echoMultiple.lzout.outputFiles)]) - with Submitter(plugin="cf") as sub: + # XXX: Figure out why this fails with "cf". Occurs in CI when using Ubuntu + Python >= 3.10 + # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_outputspec_7a + # see https://github.com/nipype/pydra/issues/671 + with Submitter(plugin="serial") as sub: sub(wf) result = wf.result() for out_file in result.output.out: - assert out_file.name == "test1" or out_file.name == "test2" + assert out_file.fspath.name == "test1" or out_file.fspath.name == "test2" @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_12(tmpdir, plugin, results_function): +def test_shell_cmd_inputspec_12(tmp_path: Path, plugin, results_function): """ providing output name using input_spec output_file_template is provided as a function that returns various templates depending on the values of inputs fields """ cmd = "cp" - file = tmpdir.mkdir("data_inp").join("file.txt") - file.write("content\n") + ddir = tmp_path / "data_inp" + ddir.mkdir() + file = ddir / "file.txt" + file.write_text("content\n") def template_function(inputs): if inputs.number % 2 == 0: @@ -1663,15 +1663,16 @@ def template_function(inputs): input_spec=my_input_spec, file_orig=file, number=2, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.file_copy.exists() - assert res.output.file_copy.name == "file_even.txt" + fspath = res.output.file_copy.fspath + assert fspath.exists() + assert fspath.name == "file_even.txt" # checking if it's created in a good place - assert shelly.output_dir == res.output.file_copy.parent + assert shelly.output_dir == fspath.parent def test_shell_cmd_inputspec_with_iterable(): @@ -1709,12 +1710,12 @@ def test_shell_cmd_inputspec_with_iterable(): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmp_path): """shelltask changes a file in place, adding copyfile=True to the file-input from input_spec hardlink or copy in the output_dir should be created """ - file = tmpdir.join("file_pydra.txt") + file = tmp_path / "file_pydra.txt" with open(file, "w") as f: f.write("hello from pydra\n") @@ -1755,14 +1756,14 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmpdir): executable=cmd, input_spec=my_input_spec, orig_file=str(file), - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out_file.exists() + assert res.output.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.parent == shelly.output_dir + assert res.output.out_file.fspath.parent == shelly.output_dir with open(res.output.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged @@ -1771,12 +1772,12 @@ def test_shell_cmd_inputspec_copyfile_1(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmp_path): """shelltask changes a file in place, adding copyfile=False to the File-input from input_spec hardlink or softlink in the output_dir is created """ - file = tmpdir.join("file_pydra.txt") + file = tmp_path / "file_pydra.txt" with open(file, "w") as f: f.write("hello from pydra\n") @@ -1794,7 +1795,7 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmpdir): "argstr": "", "help_string": "orig file", "mandatory": True, - "copyfile": False, + "copyfile": "hardlink", }, ), ), @@ -1817,22 +1818,24 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmpdir): executable=cmd, input_spec=my_input_spec, orig_file=str(file), - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out_file.exists() + assert res.output.out_file.fspath.exists() # the file is uses a soft link, but it creates and an extra copy before modifying - assert res.output.out_file.parent == shelly.output_dir + assert res.output.out_file.fspath.parent == shelly.output_dir - assert res.output.out_file.parent.joinpath(res.output.out_file.name + "s").exists() + assert res.output.out_file.fspath.parent.joinpath( + res.output.out_file.fspath.name + "s" + ).exists() with open(res.output.out_file) as f: assert "hi from pydra\n" == f.read() # the file is uses a soft link, but it creates and an extra copy # it might depend on the OS - linked_file_copy = res.output.out_file.parent.joinpath( - res.output.out_file.name + "s" + linked_file_copy = res.output.out_file.fspath.parent.joinpath( + res.output.out_file.fspath.name + "s" ) if linked_file_copy.exists(): with open(linked_file_copy) as f: @@ -1849,11 +1852,11 @@ def test_shell_cmd_inputspec_copyfile_1a(plugin, results_function, tmpdir): " and the results can't be found" ) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmp_path): """shelltask changes a file in place, copyfile is None for the file-input, so original filed is changed """ - file = tmpdir.join("file_pydra.txt") + file = tmp_path / "file_pydra.txt" with open(file, "w") as f: f.write("hello from pydra\n") @@ -1893,12 +1896,12 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmpdir): executable=cmd, input_spec=my_input_spec, orig_file=str(file), - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out_file.exists() + assert res.output.out_file.fspath.exists() # the file is not copied, it is changed in place assert res.output.out_file == file with open(res.output.out_file) as f: @@ -1906,7 +1909,7 @@ def test_shell_cmd_inputspec_copyfile_1b(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_state_1(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_state_1(plugin, results_function, tmp_path): """adding state to the input from input_spec""" cmd_exec = "echo" hello = ["HELLO", "hi"] @@ -1933,10 +1936,9 @@ def test_shell_cmd_inputspec_state_1(plugin, results_function, tmpdir): shelly = ShellCommandTask( name="shelly", executable=cmd_exec, - text=hello, input_spec=my_input_spec, - cache_dir=tmpdir, - ).split("text") + cache_dir=tmp_path, + ).split("text", text=hello) assert shelly.inputs.executable == cmd_exec # todo: this doesn't work when state # assert shelly.cmdline == "echo HELLO" @@ -1945,7 +1947,7 @@ def test_shell_cmd_inputspec_state_1(plugin, results_function, tmpdir): assert res[1].output.stdout == "hi\n" -def test_shell_cmd_inputspec_typeval_1(use_validator): +def test_shell_cmd_inputspec_typeval_1(): """customized input_spec with a type that doesn't match the value - raise an exception """ @@ -1966,12 +1968,10 @@ def test_shell_cmd_inputspec_typeval_1(use_validator): ) with pytest.raises(TypeError): - shelly = ShellCommandTask( - executable=cmd_exec, text="hello", input_spec=my_input_spec - ) + ShellCommandTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) -def test_shell_cmd_inputspec_typeval_2(use_validator): +def test_shell_cmd_inputspec_typeval_2(): """customized input_spec (shorter syntax) with a type that doesn't match the value - raise an exception """ @@ -1984,18 +1984,15 @@ def test_shell_cmd_inputspec_typeval_2(use_validator): ) with pytest.raises(TypeError): - shelly = ShellCommandTask( - executable=cmd_exec, text="hello", input_spec=my_input_spec - ) + ShellCommandTask(executable=cmd_exec, text="hello", input_spec=my_input_spec) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmp_path): """adding state to the input from input_spec using shorter syntax for input_spec (without default) """ cmd_exec = "echo" - hello = ["HELLO", "hi"] my_input_spec = SpecInfo( name="Input", fields=[ @@ -2012,10 +2009,9 @@ def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmpdir): shelly = ShellCommandTask( name="shelly", executable=cmd_exec, - text=hello, input_spec=my_input_spec, - cache_dir=tmpdir, - ).split("text") + cache_dir=tmp_path, + ).split(text=["HELLO", "hi"]) assert shelly.inputs.executable == cmd_exec res = results_function(shelly, plugin) @@ -2024,7 +2020,7 @@ def test_shell_cmd_inputspec_state_1a(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_state_2(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_state_2(plugin, results_function, tmp_path): """ adding splitter to input that is used in the output_file_tamplate """ @@ -2051,31 +2047,29 @@ def test_shell_cmd_inputspec_state_2(plugin, results_function, tmpdir): shelly = ShellCommandTask( name="shelly", executable=cmd, - args=args, input_spec=my_input_spec, - cache_dir=tmpdir, - ).split("args") + cache_dir=tmp_path, + ).split(args=args) res = results_function(shelly, plugin) for i in range(len(args)): assert res[i].output.stdout == "" - assert res[i].output.out1.exists() - assert res[i].output.out1.parent == shelly.output_dir[i] + assert res[i].output.out1.fspath.exists() + assert res[i].output.out1.fspath.parent == shelly.output_dir[i] @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_state_3(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_state_3(plugin, results_function, tmp_path): """adding state to the File-input from input_spec""" - file_1 = tmpdir.join("file_pydra.txt") - file_2 = tmpdir.join("file_nice.txt") + file_1 = tmp_path / "file_pydra.txt" + file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: f.write("hello from pydra") with open(file_2, "w") as f: f.write("have a nice one") cmd_exec = "cat" - files = [file_1, file_2] my_input_spec = SpecInfo( name="Input", @@ -2099,10 +2093,9 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmpdir): shelly = ShellCommandTask( name="shelly", executable=cmd_exec, - file=files, input_spec=my_input_spec, - cache_dir=tmpdir, - ).split("file") + cache_dir=tmp_path, + ).split(file=[file_1, file_2]) assert shelly.inputs.executable == cmd_exec # todo: this doesn't work when state @@ -2113,14 +2106,14 @@ def test_shell_cmd_inputspec_state_3(plugin, results_function, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmpdir): +def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmp_path): """adding state to the File-input from input_spec""" - file1 = tmpdir.join("file1.txt") + file1 = tmp_path / "file1.txt" with open(file1, "w") as f: f.write("hello from pydra\n") - file2 = tmpdir.join("file2.txt") + file2 = tmp_path / "file2.txt" with open(file2, "w") as f: f.write("hello world\n") @@ -2139,7 +2132,7 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmpdir): "argstr": "", "help_string": "orig file", "mandatory": True, - "copyfile": True, + "copyfile": "copy", }, ), ), @@ -2161,17 +2154,16 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmpdir): name="shelly", executable=cmd, input_spec=my_input_spec, - orig_file=files, - cache_dir=tmpdir, - ).split("orig_file") + cache_dir=tmp_path, + ).split("orig_file", orig_file=files) txt_l = ["from pydra", "world"] res_l = results_function(shelly, plugin) for i, res in enumerate(res_l): assert res.output.stdout == "" - assert res.output.out_file.exists() + assert res.output.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.parent == shelly.output_dir[i] + assert res.output.out_file.fspath.parent == shelly.output_dir[i] with open(res.output.out_file) as f: assert f"hi {txt_l[i]}\n" == f.read() # the original file is unchanged @@ -2183,7 +2175,7 @@ def test_shell_cmd_inputspec_copyfile_state_1(plugin, results_function, tmpdir): @pytest.mark.flaky(reruns=2) # when dask -def test_wf_shell_cmd_2(plugin_dask_opt, tmpdir): +def test_wf_shell_cmd_2(plugin_dask_opt, tmp_path): """a workflow with input with defined output_file_template (str) that requires wf.lzin """ @@ -2191,7 +2183,7 @@ def test_wf_shell_cmd_2(plugin_dask_opt, tmpdir): wf.inputs.cmd = "touch" wf.inputs.args = "newfile.txt" - wf.cache_dir = tmpdir + wf.cache_dir = tmp_path my_input_spec = SpecInfo( name="Input", @@ -2226,11 +2218,11 @@ def test_wf_shell_cmd_2(plugin_dask_opt, tmpdir): res = wf.result() assert res.output.out == "" - assert res.output.out_f.exists() - assert res.output.out_f.parent == wf.output_dir + assert res.output.out_f.fspath.exists() + assert res.output.out_f.fspath.parent == wf.output_dir -def test_wf_shell_cmd_2a(plugin, tmpdir): +def test_wf_shell_cmd_2a(plugin, tmp_path): """a workflow with input with defined output_file_template (tuple) that requires wf.lzin """ @@ -2238,7 +2230,7 @@ def test_wf_shell_cmd_2a(plugin, tmpdir): wf.inputs.cmd = "touch" wf.inputs.args = "newfile.txt" - wf.cache_dir = tmpdir + wf.cache_dir = tmp_path my_input_spec = SpecInfo( name="Input", @@ -2273,10 +2265,10 @@ def test_wf_shell_cmd_2a(plugin, tmpdir): res = wf.result() assert res.output.out == "" - assert res.output.out_f.exists() + assert res.output.out_f.fspath.exists() -def test_wf_shell_cmd_3(plugin, tmpdir): +def test_wf_shell_cmd_3(plugin, tmp_path): """a workflow with 2 tasks, first one has input with output_file_template (str, uses wf.lzin), that is passed to the second task @@ -2286,7 +2278,7 @@ def test_wf_shell_cmd_3(plugin, tmpdir): wf.inputs.cmd1 = "touch" wf.inputs.cmd2 = "cp" wf.inputs.args = "newfile.txt" - wf.cache_dir = tmpdir + wf.cache_dir = tmp_path my_input_spec1 = SpecInfo( name="Input", @@ -2366,14 +2358,14 @@ def test_wf_shell_cmd_3(plugin, tmpdir): res = wf.result() assert res.output.out1 == "" - assert res.output.touch_file.exists() - assert res.output.touch_file.parent == wf.output_dir + assert res.output.touch_file.fspath.exists() + assert res.output.touch_file.fspath.parent == wf.output_dir assert res.output.out2 == "" - assert res.output.cp_file.exists() - assert res.output.cp_file.parent == wf.output_dir + assert res.output.cp_file.fspath.exists() + assert res.output.cp_file.fspath.parent == wf.output_dir -def test_wf_shell_cmd_3a(plugin, tmpdir): +def test_wf_shell_cmd_3a(plugin, tmp_path): """a workflow with 2 tasks, first one has input with output_file_template (str, uses wf.lzin), that is passed to the second task @@ -2383,7 +2375,7 @@ def test_wf_shell_cmd_3a(plugin, tmpdir): wf.inputs.cmd1 = "touch" wf.inputs.cmd2 = "cp" wf.inputs.args = "newfile.txt" - wf.cache_dir = tmpdir + wf.cache_dir = tmp_path my_input_spec1 = SpecInfo( name="Input", @@ -2463,21 +2455,22 @@ def test_wf_shell_cmd_3a(plugin, tmpdir): res = wf.result() assert res.output.out1 == "" - assert res.output.touch_file.exists() + assert res.output.touch_file.fspath.exists() assert res.output.out2 == "" - assert res.output.cp_file.exists() + assert res.output.cp_file.fspath.exists() -def test_wf_shell_cmd_state_1(plugin): +def test_wf_shell_cmd_state_1(plugin, tmp_path): """a workflow with 2 tasks and splitter on the wf level, first one has input with output_file_template (str, uses wf.lzin), that is passed to the second task """ - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2", "args"]).split("args") + wf = Workflow( + name="wf", input_spec=["cmd1", "cmd2", "args"], cache_dir=tmp_path + ).split("args", args=["newfile_1.txt", "newfile_2.txt"]) wf.inputs.cmd1 = "touch" wf.inputs.cmd2 = "cp" - wf.inputs.args = ["newfile_1.txt", "newfile_2.txt"] my_input_spec1 = SpecInfo( name="Input", @@ -2558,14 +2551,14 @@ def test_wf_shell_cmd_state_1(plugin): res_l = wf.result() for i, res in enumerate(res_l): assert res.output.out1 == "" - assert res.output.touch_file.exists() - assert res.output.touch_file.parent == wf.output_dir[i] + assert res.output.touch_file.fspath.exists() + assert res.output.touch_file.fspath.parent == wf.output_dir[i] assert res.output.out2 == "" - assert res.output.cp_file.exists() - assert res.output.cp_file.parent == wf.output_dir[i] + assert res.output.cp_file.fspath.exists() + assert res.output.cp_file.fspath.parent == wf.output_dir[i] -def test_wf_shell_cmd_ndst_1(plugin, tmpdir): +def test_wf_shell_cmd_ndst_1(plugin, tmp_path): """a workflow with 2 tasks and a splitter on the node level, first one has input with output_file_template (str, uses wf.lzin), that is passed to the second task @@ -2575,7 +2568,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmpdir): wf.inputs.cmd1 = "touch" wf.inputs.cmd2 = "cp" wf.inputs.args = ["newfile_1.txt", "newfile_2.txt"] - wf.cache_dir = tmpdir + wf.cache_dir = tmp_path my_input_spec1 = SpecInfo( name="Input", @@ -2629,8 +2622,7 @@ def test_wf_shell_cmd_ndst_1(plugin, tmpdir): name="shelly1", input_spec=my_input_spec1, executable=wf.lzin.cmd1, - args=wf.lzin.args, - ).split("args") + ).split("args", args=wf.lzin.args) ) wf.add( ShellCommandTask( @@ -2655,16 +2647,16 @@ def test_wf_shell_cmd_ndst_1(plugin, tmpdir): res = wf.result() assert res.output.out1 == ["", ""] - assert all([file.exists() for file in res.output.touch_file]) + assert all([file.fspath.exists() for file in res.output.touch_file]) assert res.output.out2 == ["", ""] - assert all([file.exists() for file in res.output.cp_file]) + assert all([file.fspath.exists() for file in res.output.cp_file]) # customised output spec @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_1(plugin, results_function, tmpdir): +def test_shell_cmd_outputspec_1(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname """ @@ -2675,16 +2667,16 @@ def test_shell_cmd_outputspec_1(plugin, results_function, tmpdir): bases=(ShellOutSpec,), ) shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.newfile.exists() + assert res.output.newfile.fspath.exists() @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_1a(plugin, results_function, tmpdir): +def test_shell_cmd_outputspec_1a(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname """ @@ -2695,15 +2687,15 @@ def test_shell_cmd_outputspec_1a(plugin, results_function, tmpdir): bases=(ShellOutSpec,), ) shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.newfile.exists() + assert res.output.newfile.fspath.exists() -def test_shell_cmd_outputspec_1b_exception(plugin, tmpdir): +def test_shell_cmd_outputspec_1b_exception(plugin, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname """ @@ -2714,7 +2706,7 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmpdir): bases=(ShellOutSpec,), ) shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) with pytest.raises(Exception) as exinfo: @@ -2724,7 +2716,7 @@ def test_shell_cmd_outputspec_1b_exception(plugin, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_2(plugin, results_function, tmpdir): +def test_shell_cmd_outputspec_2(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, using a wildcard in default @@ -2736,15 +2728,15 @@ def test_shell_cmd_outputspec_2(plugin, results_function, tmpdir): bases=(ShellOutSpec,), ) shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.newfile.exists() + assert res.output.newfile.fspath.exists() -def test_shell_cmd_outputspec_2a_exception(plugin, tmpdir): +def test_shell_cmd_outputspec_2a_exception(plugin, tmp_path): """ customised output_spec, adding files to the output, using a wildcard in default @@ -2756,7 +2748,7 @@ def test_shell_cmd_outputspec_2a_exception(plugin, tmpdir): bases=(ShellOutSpec,), ) shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) with pytest.raises(Exception) as excinfo: @@ -2766,7 +2758,7 @@ def test_shell_cmd_outputspec_2a_exception(plugin, tmpdir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_3(plugin, results_function, tmpdir): +def test_shell_cmd_outputspec_3(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, using a wildcard in default, should collect two files @@ -2774,74 +2766,22 @@ def test_shell_cmd_outputspec_3(plugin, results_function, tmpdir): cmd = ["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"] my_output_spec = SpecInfo( name="Output", - fields=[("newfile", File, "newfile_*.txt")], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir - ) - - res = results_function(shelly, plugin) - assert res.output.stdout == "" - # newfile is a list - assert len(res.output.newfile) == 2 - assert all([file.exists for file in res.output.newfile]) - - -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_4(plugin, results_function, tmpdir): - """ - customised output_spec, adding files to the output, - using a wildcard in default (in the directory name) - """ - cmd = ["mkdir", "tmp1", ";", "touch", "tmp1/newfile.txt"] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "tmp*/newfile.txt")], + fields=[("newfile", MultiOutputFile, "newfile_*.txt")], bases=(ShellOutSpec,), ) shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir - ) - - res = results_function(shelly, plugin) - assert res.output.stdout == "" - assert res.output.newfile.exists() - - -@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_4a(plugin, results_function, tmpdir): - """ - customised output_spec, adding files to the output, - using a wildcard in default (in the directory name), should collect two files - """ - cmd = [ - "mkdir", - "tmp1", - "tmp2", - ";", - "touch", - "tmp1/newfile.txt", - "tmp2/newfile.txt", - ] - my_output_spec = SpecInfo( - name="Output", - fields=[("newfile", File, "tmp*/newfile.txt")], - bases=(ShellOutSpec,), - ) - shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) res = results_function(shelly, plugin) assert res.output.stdout == "" # newfile is a list assert len(res.output.newfile) == 2 - assert all([file.exists for file in res.output.newfile]) + assert all([file.fspath.exists() for file in res.output.newfile]) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_5(plugin, results_function, tmpdir): +def test_shell_cmd_outputspec_5(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, using a function to collect output, the function is saved in the field metadata @@ -2864,14 +2804,14 @@ def gather_output(field, output_dir): bases=(ShellOutSpec,), ) shelly = ShellCommandTask( - name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmpdir + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path ) res = results_function(shelly, plugin) assert res.output.stdout == "" # newfile is a list assert len(res.output.newfile) == 2 - assert all([file.exists for file in res.output.newfile]) + assert all([file.fspath.exists() for file in res.output.newfile]) assert ( shelly.output_names == shelly.generated_output_names @@ -2880,7 +2820,7 @@ def gather_output(field, output_dir): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_5a(plugin, results_function): +def test_shell_cmd_outputspec_5a(plugin, results_function, tmp_path): """ customised output_spec, adding files to the output, using a function to collect output, the function is saved in the field metadata @@ -2894,16 +2834,23 @@ def gather_output(executable, output_dir): my_output_spec = SpecInfo( name="Output", - fields=[("newfile", attr.ib(type=File, metadata={"callable": gather_output}))], + fields=[ + ( + "newfile", + attr.ib(type=MultiOutputFile, metadata={"callable": gather_output}), + ) + ], bases=(ShellOutSpec,), ) - shelly = ShellCommandTask(name="shelly", executable=cmd, output_spec=my_output_spec) + shelly = ShellCommandTask( + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path + ) res = results_function(shelly, plugin) assert res.output.stdout == "" # newfile is a list assert len(res.output.newfile) == 2 - assert all([file.exists for file in res.output.newfile]) + assert all([file.fspath.exists() for file in res.output.newfile]) def test_shell_cmd_outputspec_5b_error(): @@ -2929,7 +2876,37 @@ def gather_output(executable, output_dir, ble): @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_6(plugin, results_function, tmpdir): +def test_shell_cmd_outputspec_5c(plugin, results_function, tmp_path): + """ + Customised output spec defined as a class, + using a static function to collect output files. + """ + + @attr.s(kw_only=True) + class MyOutputSpec(ShellOutSpec): + @staticmethod + def gather_output(executable, output_dir): + files = executable[1:] + return [Path(output_dir) / file for file in files] + + newfile: MultiOutputFile = attr.ib(metadata={"callable": gather_output}) + + shelly = ShellCommandTask( + name="shelly", + executable=["touch", "newfile_tmp1.txt", "newfile_tmp2.txt"], + output_spec=SpecInfo(name="Output", bases=(MyOutputSpec,)), + cache_dir=tmp_path, + ) + + res = results_function(shelly, plugin) + assert res.output.stdout == "" + # newfile is a list + assert len(res.output.newfile) == 2 + assert all([file.exists() for file in res.output.newfile]) + + +@pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) +def test_shell_cmd_outputspec_6(plugin, results_function, tmp_path): """ providing output name by providing output_file_template (similar to the previous example, but not touching input_spec) @@ -2959,12 +2936,12 @@ def test_shell_cmd_outputspec_6(plugin, results_function, tmpdir): executable=cmd, args=args, output_spec=my_output_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = results_function(shelly, plugin) assert res.output.stdout == "" - assert res.output.out1.exists() + assert res.output.out1.fspath.exists() def test_shell_cmd_outputspec_6a(): @@ -2993,17 +2970,17 @@ def test_shell_cmd_outputspec_6a(): res = shelly() assert res.output.stdout == "" - assert res.output.out1.exists() + assert res.output.out1.fspath.exists() @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_7(tmpdir, plugin, results_function): +def test_shell_cmd_outputspec_7(tmp_path, plugin, results_function): """ providing output with output_file_name and using MultiOutputFile as a type. the input field used in the template is a MultiInputObj, so it can be and is a list """ - file = tmpdir.join("script.sh") - file.write(f'for var in "$@"; do touch file"$var".txt; done') + file = tmp_path / "script.sh" + file.write_text('for var in "$@"; do touch file"$var".txt; done') cmd = "bash" new_files_id = ["1", "2", "3"] @@ -3066,20 +3043,20 @@ def test_shell_cmd_outputspec_7(tmpdir, plugin, results_function): files_id=new_files_id, ) - res = results_function(shelly, plugin) + res = results_function(shelly, "serial") assert res.output.stdout == "" for file in res.output.new_files: - assert file.exists() + assert file.fspath.exists() @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_7a(tmpdir, plugin, results_function): +def test_shell_cmd_outputspec_7a(tmp_path, plugin, results_function): """ providing output with output_file_name and using MultiOutputFile as a type. the input field used in the template is a MultiInputObj, but a single element is used """ - file = tmpdir.join("script.sh") - file.write(f'for var in "$@"; do touch file"$var".txt; done') + file = tmp_path / "script.sh" + file.write_text('for var in "$@"; do touch file"$var".txt; done') cmd = "bash" new_files_id = "1" @@ -3142,13 +3119,16 @@ def test_shell_cmd_outputspec_7a(tmpdir, plugin, results_function): files_id=new_files_id, ) - res = results_function(shelly, plugin) + # XXX: Figure out why this fails with "cf". Occurs in CI when using Ubuntu + Python >= 3.10 + # (but not when using macOS + Python >= 3.10). Same error occurs in test_shell_cmd_inputspec_11 + # see https://github.com/nipype/pydra/issues/671 + res = results_function(shelly, "serial") assert res.output.stdout == "" - assert res.output.new_files.exists() + assert res.output.new_files.fspath.exists() @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_8a(tmpdir, plugin, results_function): +def test_shell_cmd_outputspec_8a(tmp_path, plugin, results_function): """ customised output_spec, adding int and str to the output, requiring two callables with parameters stdout and stderr @@ -3200,8 +3180,8 @@ def get_stderr(stderr): ) shelly = ShellCommandTask( - name="shelly", executable=cmd, args=args, output_spec=my_output_spec - ).split("args") + name="shelly", executable=cmd, output_spec=my_output_spec, cache_dir=tmp_path + ).split("args", args=args) results = results_function(shelly, plugin) for index, res in enumerate(results): @@ -3230,15 +3210,15 @@ def test_shell_cmd_outputspec_8b_error(): bases=(ShellOutSpec,), ) shelly = ShellCommandTask( - name="shelly", executable=cmd, args=args, output_spec=my_output_spec - ).split("args") + name="shelly", executable=cmd, output_spec=my_output_spec + ).split("args", args=args) with pytest.raises(Exception) as e: shelly() assert "has to have a callable" in str(e.value) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_8c(tmpdir, plugin, results_function): +def test_shell_cmd_outputspec_8c(tmp_path, plugin, results_function): """ customised output_spec, adding Directory to the output named by args """ @@ -3247,7 +3227,7 @@ def get_lowest_directory(directory_path): return str(directory_path).replace(str(Path(directory_path).parents[0]), "") cmd = "mkdir" - args = [f"{tmpdir}/dir1", f"{tmpdir}/dir2"] + args = [f"{tmp_path}/dir1", f"{tmp_path}/dir2"] my_output_spec = SpecInfo( name="Output", @@ -3269,19 +3249,19 @@ def get_lowest_directory(directory_path): shelly = ShellCommandTask( name="shelly", executable=cmd, - args=args, output_spec=my_output_spec, resultsDir="outdir", - ).split("args") + cache_dir=tmp_path, + ).split("args", args=args) - res = results_function(shelly, plugin) + results_function(shelly, plugin) for index, arg_dir in enumerate(args): - assert Path(Path(tmpdir) / Path(arg_dir)).exists() == True + assert Path(Path(tmp_path) / Path(arg_dir)).exists() assert get_lowest_directory(arg_dir) == f"/dir{index+1}" @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_outputspec_8d(tmpdir, plugin, results_function): +def test_shell_cmd_outputspec_8d(tmp_path, plugin, results_function): """ customised output_spec, adding Directory to the output named by input spec """ @@ -3332,8 +3312,8 @@ def get_lowest_directory(directory_path): executable=cmd, input_spec=my_input_spec, output_spec=my_output_spec, - cache_dir=tmpdir, - resultsDir="test", # Path(tmpdir) / "test" TODO: Not working without absolute path support + cache_dir=tmp_path, + resultsDir="test", # Path(tmp_path) / "test" TODO: Not working without absolute path support ) assert ( shelly.output_names @@ -3342,14 +3322,14 @@ def get_lowest_directory(directory_path): ) res = results_function(shelly, plugin) print("Cache_dirr:", shelly.cache_dir) - assert (shelly.output_dir / Path("test")).exists() == True + assert (shelly.output_dir / Path("test")).exists() assert get_lowest_directory(res.output.resultsDir) == get_lowest_directory( shelly.output_dir / Path("test") ) @pytest.mark.parametrize("results_function", [result_no_submitter, result_submitter]) -def test_shell_cmd_state_outputspec_1(plugin, results_function, tmpdir): +def test_shell_cmd_state_outputspec_1(plugin, results_function, tmp_path): """ providing output name by providing output_file_template splitter for a field that is used in the template @@ -3377,21 +3357,20 @@ def test_shell_cmd_state_outputspec_1(plugin, results_function, tmpdir): shelly = ShellCommandTask( name="shelly", executable=cmd, - args=args, output_spec=my_output_spec, - cache_dir=tmpdir, - ).split("args") + cache_dir=tmp_path, + ).split("args", args=args) res = results_function(shelly, plugin) for i in range(len(args)): assert res[i].output.stdout == "" - assert res[i].output.out1.exists() + assert res[i].output.out1.fspath.exists() # customised output_spec for tasks in workflows -def test_shell_cmd_outputspec_wf_1(plugin, tmpdir): +def test_shell_cmd_outputspec_wf_1(plugin, tmp_path): """ customised output_spec for tasks within a Workflow, adding files to the output, providing specific pathname @@ -3400,7 +3379,7 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmpdir): cmd = ["touch", "newfile_tmp.txt"] wf = Workflow(name="wf", input_spec=["cmd"]) wf.inputs.cmd = cmd - wf.cache_dir = tmpdir + wf.cache_dir = tmp_path my_output_spec = SpecInfo( name="Output", @@ -3421,9 +3400,9 @@ def test_shell_cmd_outputspec_wf_1(plugin, tmpdir): res = wf.result() assert res.output.stdout == "" - assert res.output.newfile.exists() + assert res.output.newfile.fspath.exists() # checking if the file was copied to the wf dir - assert res.output.newfile.parent == wf.output_dir + assert res.output.newfile.fspath.parent == wf.output_dir def test_shell_cmd_inputspec_outputspec_1(): @@ -3475,8 +3454,8 @@ def test_shell_cmd_inputspec_outputspec_1(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() - assert res.output.newfile2.exists() + assert res.output.newfile1.fspath.exists() + assert res.output.newfile2.fspath.exists() def test_shell_cmd_inputspec_outputspec_1a(): @@ -3528,7 +3507,7 @@ def test_shell_cmd_inputspec_outputspec_1a(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() + assert res.output.newfile1.fspath.exists() # newfile2 is not created, since file2 is not provided assert res.output.newfile2 is attr.NOTHING @@ -3596,8 +3575,8 @@ def test_shell_cmd_inputspec_outputspec_2(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() - assert res.output.newfile2.exists() + assert res.output.newfile1.fspath.exists() + assert res.output.newfile2.fspath.exists() def test_shell_cmd_inputspec_outputspec_2a(): @@ -3670,7 +3649,7 @@ def test_shell_cmd_inputspec_outputspec_2a(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() + assert res.output.newfile1.fspath.exists() assert res.output.newfile2 is attr.NOTHING @@ -3693,7 +3672,7 @@ def test_shell_cmd_inputspec_outputspec_3(): str, {"help_string": "2nd creadted file", "argstr": "", "position": 2}, ), - ("additional_inp", str, {"help_string": "additional inp"}), + ("additional_inp", int, {"help_string": "additional inp"}), ], bases=(ShellSpec,), ) @@ -3730,8 +3709,8 @@ def test_shell_cmd_inputspec_outputspec_3(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() - assert res.output.newfile2.exists() + assert res.output.newfile1.fspath.exists() + assert res.output.newfile2.fspath.exists() def test_shell_cmd_inputspec_outputspec_3a(): @@ -3804,7 +3783,7 @@ def test_shell_cmd_inputspec_outputspec_3a(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() + assert res.output.newfile1.fspath.exists() # additional input not provided so no newfile2 set (even if the file was created) assert res.output.newfile2 is attr.NOTHING @@ -3823,7 +3802,7 @@ def test_shell_cmd_inputspec_outputspec_4(): str, {"help_string": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp", str, {"help_string": "additional inp"}), + ("additional_inp", int, {"help_string": "additional inp"}), ], bases=(ShellSpec,), ) @@ -3860,7 +3839,7 @@ def test_shell_cmd_inputspec_outputspec_4(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() + assert res.output.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_4a(): @@ -3878,7 +3857,7 @@ def test_shell_cmd_inputspec_outputspec_4a(): str, {"help_string": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp", str, {"help_string": "additional inp"}), + ("additional_inp", int, {"help_string": "additional inp"}), ], bases=(ShellSpec,), ) @@ -3928,7 +3907,7 @@ def test_shell_cmd_inputspec_outputspec_5(): str, {"help_string": "1st creadted file", "argstr": "", "position": 1}, ), - ("additional_inp_A", str, {"help_string": "additional inp A"}), + ("additional_inp_A", int, {"help_string": "additional inp A"}), ("additional_inp_B", str, {"help_string": "additional inp B"}), ], bases=(ShellSpec,), @@ -3964,7 +3943,7 @@ def test_shell_cmd_inputspec_outputspec_5(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() + assert res.output.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_5a(): @@ -3983,7 +3962,7 @@ def test_shell_cmd_inputspec_outputspec_5a(): {"help_string": "1st creadted file", "argstr": "", "position": 1}, ), ("additional_inp_A", str, {"help_string": "additional inp A"}), - ("additional_inp_B", str, {"help_string": "additional inp B"}), + ("additional_inp_B", int, {"help_string": "additional inp B"}), ], bases=(ShellSpec,), ) @@ -4018,7 +3997,7 @@ def test_shell_cmd_inputspec_outputspec_5a(): res = shelly() assert res.output.stdout == "" - assert res.output.newfile1.exists() + assert res.output.newfile1.fspath.exists() def test_shell_cmd_inputspec_outputspec_5b(): @@ -4119,7 +4098,7 @@ def test_shell_cmd_inputspec_outputspec_6_except(): shelly.inputs.file1 = "new_file_1.txt" with pytest.raises(Exception, match="requires field can be"): - res = shelly() + shelly() def no_fsl(): @@ -4128,7 +4107,7 @@ def no_fsl(): @pytest.mark.skipif(no_fsl(), reason="fsl is not installed") -def test_fsl(): +def test_fsl(data_tests_dir): """mandatory field added to fields, value provided""" _xor_inputs = [ @@ -4344,7 +4323,7 @@ def change_name(file): "help_string": "bias field and neck cleanup", }, ), - ) + ), # ("number_classes", int, attr.ib(metadata={"help_string": 'number of tissue-type classes', "argstr": '-n', # "allowed_values": {"min_val": 1, "max_val": 10}})), # ("output_biasfield", bool, @@ -4356,7 +4335,7 @@ def change_name(file): ) # TODO: not sure why this has to be string - in_file = Path(__file__).parent / "data_tests" / "test.nii.gz" + in_file = data_tests_dir / "test.nii.gz" # separate command into exec + args shelly = ShellCommandTask( @@ -4368,7 +4347,103 @@ def change_name(file): # res = shelly(plugin="cf") -def test_shell_cmd_non_existing_outputs_1(tmpdir): +def test_shell_cmd_optional_output_file1(tmp_path): + """ + Test to see that 'unused' doesn't complain about not having an output passed to it + """ + my_cp_spec = SpecInfo( + name="Input", + fields=[ + ( + "input", + attr.ib( + type=File, metadata={"argstr": "", "help_string": "input file"} + ), + ), + ( + "output", + attr.ib( + type=Path, + metadata={ + "argstr": "", + "output_file_template": "out.txt", + "help_string": "output file", + }, + ), + ), + ( + "unused", + attr.ib( + type=ty.Union[Path, bool], + default=False, + metadata={ + "argstr": "--not-used", + "output_file_template": "out.txt", + "help_string": "dummy output", + }, + ), + ), + ], + bases=(ShellSpec,), + ) + + my_cp = ShellCommandTask( + name="my_cp", + executable="cp", + input_spec=my_cp_spec, + ) + file1 = tmp_path / "file1.txt" + file1.write_text("foo") + result = my_cp(input=file1, unused=False) + assert result.output.output.fspath.read_text() == "foo" + + +def test_shell_cmd_optional_output_file2(tmp_path): + """ + Test to see that 'unused' doesn't complain about not having an output passed to it + """ + my_cp_spec = SpecInfo( + name="Input", + fields=[ + ( + "input", + attr.ib( + type=File, metadata={"argstr": "", "help_string": "input file"} + ), + ), + ( + "output", + attr.ib( + type=ty.Union[Path, bool], + default=False, + metadata={ + "argstr": "", + "output_file_template": "out.txt", + "help_string": "dummy output", + }, + ), + ), + ], + bases=(ShellSpec,), + ) + + my_cp = ShellCommandTask( + name="my_cp", + executable="cp", + input_spec=my_cp_spec, + ) + file1 = tmp_path / "file1.txt" + file1.write_text("foo") + result = my_cp(input=file1, output=True) + assert result.output.output.fspath.read_text() == "foo" + + file2 = tmp_path / "file2.txt" + file2.write_text("bar") + with pytest.raises(RuntimeError): + my_cp(input=file2, output=False) + + +def test_shell_cmd_non_existing_outputs_1(tmp_path): """Checking that non existing output files do not return a phantom path, but return NOTHING instead""" input_spec = SpecInfo( @@ -4417,7 +4492,7 @@ def test_shell_cmd_non_existing_outputs_1(tmpdir): ) shelly = ShellCommandTask( - cache_dir=tmpdir, + cache_dir=tmp_path, executable="echo", input_spec=input_spec, output_spec=out_spec, @@ -4428,7 +4503,7 @@ def test_shell_cmd_non_existing_outputs_1(tmpdir): assert res.output.out_1 == attr.NOTHING and res.output.out_2 == attr.NOTHING -def test_shell_cmd_non_existing_outputs_2(tmpdir): +def test_shell_cmd_non_existing_outputs_2(tmp_path): """Checking that non existing output files do not return a phantom path, but return NOTHING instead. This test has one existing and one non existing output file. """ @@ -4479,7 +4554,7 @@ def test_shell_cmd_non_existing_outputs_2(tmpdir): ) shelly = ShellCommandTask( - cache_dir=tmpdir, + cache_dir=tmp_path, executable="touch", input_spec=input_spec, output_spec=out_spec, @@ -4488,13 +4563,13 @@ def test_shell_cmd_non_existing_outputs_2(tmpdir): shelly() res = shelly.result() # the first output file is created - assert res.output.out_1 == Path(shelly.output_dir) / Path("test_1.nii") - assert res.output.out_1.exists() + assert res.output.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") + assert res.output.out_1.fspath.exists() # the second output file is not created assert res.output.out_2 == attr.NOTHING -def test_shell_cmd_non_existing_outputs_3(tmpdir): +def test_shell_cmd_non_existing_outputs_3(tmp_path): """Checking that non existing output files do not return a phantom path, but return NOTHING instead. This test has an existing mandatory output and another non existing output file. """ @@ -4546,7 +4621,7 @@ def test_shell_cmd_non_existing_outputs_3(tmpdir): ) shelly = ShellCommandTask( - cache_dir=tmpdir, + cache_dir=tmp_path, executable="touch", input_spec=input_spec, output_spec=out_spec, @@ -4555,13 +4630,13 @@ def test_shell_cmd_non_existing_outputs_3(tmpdir): shelly() res = shelly.result() # the first output file is created - assert res.output.out_1 == Path(shelly.output_dir) / Path("test_1.nii") - assert res.output.out_1.exists() + assert res.output.out_1.fspath == Path(shelly.output_dir) / Path("test_1.nii") + assert res.output.out_1.fspath.exists() # the second output file is not created assert res.output.out_2 == attr.NOTHING -def test_shell_cmd_non_existing_outputs_4(tmpdir): +def test_shell_cmd_non_existing_outputs_4(tmp_path): """Checking that non existing output files do not return a phantom path, but return NOTHING instead. This test has an existing mandatory output and another non existing mandatory output file.""" @@ -4614,7 +4689,7 @@ def test_shell_cmd_non_existing_outputs_4(tmpdir): ) shelly = ShellCommandTask( - cache_dir=tmpdir, + cache_dir=tmp_path, executable="touch", input_spec=input_spec, output_spec=out_spec, @@ -4628,7 +4703,7 @@ def test_shell_cmd_non_existing_outputs_4(tmpdir): assert (Path(shelly.output_dir) / Path("test_1.nii")).exists() -def test_shell_cmd_non_existing_outputs_multi_1(tmpdir): +def test_shell_cmd_non_existing_outputs_multi_1(tmp_path): """This test looks if non existing files of an multiOuputFile are also set to NOTHING""" input_spec = SpecInfo( name="Input", @@ -4667,7 +4742,7 @@ def test_shell_cmd_non_existing_outputs_multi_1(tmpdir): ) shelly = ShellCommandTask( - cache_dir=tmpdir, + cache_dir=tmp_path, executable="echo", input_spec=input_spec, output_spec=out_spec, @@ -4680,7 +4755,7 @@ def test_shell_cmd_non_existing_outputs_multi_1(tmpdir): assert res.output.out_list[1] == attr.NOTHING -def test_shell_cmd_non_existing_outputs_multi_2(tmpdir): +def test_shell_cmd_non_existing_outputs_multi_2(tmp_path): """This test looks if non existing files of an multiOutputFile are also set to NOTHING. It checks that it also works if one file of the multiOutputFile actually exists.""" input_spec = SpecInfo( @@ -4721,7 +4796,7 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmpdir): ) shelly = ShellCommandTask( - cache_dir=tmpdir, + cache_dir=tmp_path, executable="touch", input_spec=input_spec, output_spec=out_spec, @@ -4730,11 +4805,17 @@ def test_shell_cmd_non_existing_outputs_multi_2(tmpdir): shelly() res = shelly.result() # checking if the outputs are Nothing - assert res.output.out_list[0] == Path(shelly.output_dir) / "test_1_real.nii" + assert res.output.out_list[0] == File(Path(shelly.output_dir) / "test_1_real.nii") assert res.output.out_list[1] == attr.NOTHING -def test_shellspec_formatter_1(tmpdir): +@pytest.mark.xfail( + reason=( + "Not sure what the desired behaviour for formatter 5 is. Field is declared as a list " + "but a string containing the formatted arg is passed instead." + ) +) +def test_shellspec_formatter_1(tmp_path): """test the input callable 'formatter'.""" def spec_info(formatter): @@ -4830,7 +4911,7 @@ def formatter_3(in1, in3): == str(excinfo.value) ) - # chcking if field value is accessible when None + # checking if field value is accessible when None def formatter_5(field): assert field == "-t test" # formatter must return a string @@ -4843,13 +4924,13 @@ def formatter_5(field): input_spec=input_spec, in1="i1", in2="i2", - together="-t test", + # together="-t test", ) assert shelly.cmdline == "exec -t test" - # chcking if field value is accessible when None + # checking if field value is accessible when None def formatter_4(field): - assert field == None + assert field is None # formatter must return a string return "" @@ -4861,7 +4942,7 @@ def formatter_4(field): assert shelly.cmdline == "exec" -def test_shellspec_formatter_splitter_2(tmpdir): +def test_shellspec_formatter_splitter_2(tmp_path): """test the input callable 'formatter' when a splitter is used on an argument of the formatter.""" def spec_info(formatter): @@ -4910,9 +4991,9 @@ def formatter_1(in1, in2): input_spec = spec_info(formatter_1) in1 = ["in11", "in12"] shelly = ShellCommandTask( - name="f", executable="executable", input_spec=input_spec, in1=in1, in2="in2" - ).split("in1") - assert shelly != None + name="f", executable="executable", input_spec=input_spec, in2="in2" + ).split("in1", in1=in1) + assert shelly is not None # results = shelly.cmdline # assert len(results) == 2 @@ -4922,8 +5003,8 @@ def formatter_1(in1, in2): @no_win -def test_shellcommand_error_msg(tmpdir): - script_path = Path(tmpdir) / "script.sh" +def test_shellcommand_error_msg(tmp_path): + script_path = Path(tmp_path) / "script.sh" with open(script_path, "w") as f: f.write( diff --git a/pydra/engine/tests/test_shelltask_inputspec.py b/pydra/engine/tests/test_shelltask_inputspec.py index cbb9a28e4b..9bc7f7a232 100644 --- a/pydra/engine/tests/test_shelltask_inputspec.py +++ b/pydra/engine/tests/test_shelltask_inputspec.py @@ -1,6 +1,6 @@ -import attr import typing as ty from pathlib import Path +import attr import pytest from ..task import ShellCommandTask @@ -10,12 +10,7 @@ SpecInfo, File, MultiInputObj, - MultiInputFile, - MultiOutputFile, ) -from .utils import use_validator -from ..core import Workflow -from ..submitter import Submitter def test_shell_cmd_execargs_1(): @@ -110,13 +105,12 @@ def test_shell_cmd_inputs_1_st(): bases=(ShellSpec,), ) - shelly = ShellCommandTask( + ShellCommandTask( name="shelly", executable="executable", args="arg", - inpA=["inp1", "inp2"], input_spec=my_input_spec, - ).split("inpA") + ).split("inpA", inpA=["inp1", "inp2"]) # cmdline should be a list # assert shelly.cmdline[0] == "executable inp1 arg" # assert shelly.cmdline[1] == "executable inp2 arg" @@ -395,7 +389,7 @@ def test_shell_cmd_inputs_list_sep_1(): ( "inpA", attr.ib( - type=str, + type=MultiInputObj[str], metadata={ "position": 1, "help_string": "inpA", @@ -409,7 +403,9 @@ def test_shell_cmd_inputs_list_sep_1(): ) shelly = ShellCommandTask( - executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec + executable="executable", + inpA=["aaa", "bbb", "ccc"], + input_spec=my_input_spec, ) # separated by commas assert shelly.cmdline == "executable aaa,bbb,ccc" @@ -423,7 +419,7 @@ def test_shell_cmd_inputs_list_sep_2(): ( "inpA", attr.ib( - type=str, + type=MultiInputObj[str], metadata={ "position": 1, "help_string": "inpA", @@ -437,7 +433,9 @@ def test_shell_cmd_inputs_list_sep_2(): ) shelly = ShellCommandTask( - executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec + executable="executable", + inpA=["aaa", "bbb", "ccc"], + input_spec=my_input_spec, ) # a flag is used once assert shelly.cmdline == "executable -v aaa,bbb,ccc" @@ -451,7 +449,7 @@ def test_shell_cmd_inputs_list_sep_2a(): ( "inpA", attr.ib( - type=str, + type=MultiInputObj[str], metadata={ "position": 1, "help_string": "inpA", @@ -465,7 +463,9 @@ def test_shell_cmd_inputs_list_sep_2a(): ) shelly = ShellCommandTask( - executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec + executable="executable", + inpA=["aaa", "bbb", "ccc"], + input_spec=my_input_spec, ) # a flag is used once assert shelly.cmdline == "executable -v aaa,bbb,ccc" @@ -479,7 +479,7 @@ def test_shell_cmd_inputs_list_sep_3(): ( "inpA", attr.ib( - type=str, + type=MultiInputObj[str], metadata={ "position": 1, "help_string": "inpA", @@ -493,7 +493,9 @@ def test_shell_cmd_inputs_list_sep_3(): ) shelly = ShellCommandTask( - executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec + executable="executable", + inpA=["aaa", "bbb", "ccc"], + input_spec=my_input_spec, ) # a flag is repeated assert shelly.cmdline == "executable -v aaa, -v bbb, -v ccc" @@ -507,7 +509,7 @@ def test_shell_cmd_inputs_list_sep_3a(): ( "inpA", attr.ib( - type=str, + type=MultiInputObj[str], metadata={ "position": 1, "help_string": "inpA", @@ -521,7 +523,9 @@ def test_shell_cmd_inputs_list_sep_3a(): ) shelly = ShellCommandTask( - executable="executable", inpA=["aaa", "bbb", "ccc"], input_spec=my_input_spec + executable="executable", + inpA=["aaa", "bbb", "ccc"], + input_spec=my_input_spec, ) # a flag is repeated assert shelly.cmdline == "executable -v aaa, -v bbb, -v ccc" @@ -535,7 +539,7 @@ def test_shell_cmd_inputs_sep_4(): ( "inpA", attr.ib( - type=str, + type=MultiInputObj[str], metadata={ "position": 1, "help_string": "inpA", @@ -615,7 +619,7 @@ def test_shell_cmd_inputs_format_2(): ( "inpA", attr.ib( - type=str, + type=MultiInputObj[str], metadata={ "position": 1, "help_string": "inpA", @@ -628,7 +632,9 @@ def test_shell_cmd_inputs_format_2(): ) shelly = ShellCommandTask( - executable="executable", inpA=["el_1", "el_2"], input_spec=my_input_spec + executable="executable", + inpA=["el_1", "el_2"], + input_spec=my_input_spec, ) assert shelly.cmdline == "executable -v el_1 -v el_2" @@ -729,7 +735,7 @@ def test_shell_cmd_inputs_not_given_1(): shelly.inputs.arg2 = "argument2" - assert shelly.cmdline == f"executable --arg2 argument2" + assert shelly.cmdline == "executable --arg2 argument2" def test_shell_cmd_inputs_template_1(): @@ -770,7 +776,7 @@ def test_shell_cmd_inputs_template_1(): ) # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line - assert shelly.cmdline == f"executable inpA -o {str(shelly.output_dir / 'inpA_out')}" + assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # checking if outA in the output fields assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] @@ -810,7 +816,7 @@ def test_shell_cmd_inputs_template_1a(): executable="executable", input_spec=my_input_spec, inpA="inpA" ) # outA has no argstr in metadata, so it's not a part of the command line - assert shelly.cmdline == f"executable inpA" + assert shelly.cmdline == "executable inpA" # TODO: after deciding how we use requires/templates @@ -849,10 +855,14 @@ def test_shell_cmd_inputs_template_2(): assert shelly.output_names == ["return_code", "stdout", "stderr", "outB"] -def test_shell_cmd_inputs_template_3(): +def test_shell_cmd_inputs_template_3(tmp_path): """additional inputs with output_file_template and an additional read-only fields that combine two outputs together in the command line """ + inpA = tmp_path / "inpA" + inpB = tmp_path / "inpB" + Path.touch(inpA) + Path.touch(inpB) my_input_spec = SpecInfo( name="Input", fields=[ @@ -917,12 +927,12 @@ def test_shell_cmd_inputs_template_3(): ) shelly = ShellCommandTask( - executable="executable", input_spec=my_input_spec, inpA="inpA", inpB="inpB" + executable="executable", input_spec=my_input_spec, inpA=inpA, inpB=inpB ) # using syntax from the outAB field assert ( shelly.cmdline - == f"executable inpA inpB -o {str(shelly.output_dir / 'inpA_out')} {str(shelly.output_dir / 'inpB_out')}" + == f"executable {tmp_path / 'inpA'} {tmp_path / 'inpB'} -o {shelly.output_dir / 'inpA_out'} {str(shelly.output_dir / 'inpB_out')}" ) # checking if outA and outB in the output fields (outAB should not be) assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] @@ -1002,7 +1012,7 @@ def test_shell_cmd_inputs_template_3a(): # using syntax from the outAB field assert ( shelly.cmdline - == f"executable inpA inpB -o {str(shelly.output_dir / 'inpA_out')} {str(shelly.output_dir / 'inpB_out')}" + == f"executable inpA inpB -o {shelly.output_dir / 'inpA_out'} {str(shelly.output_dir / 'inpB_out')}" ) # checking if outA and outB in the output fields (outAB should not be) assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] @@ -1076,7 +1086,7 @@ def test_shell_cmd_inputs_template_4(): executable="executable", input_spec=my_input_spec, inpA="inpA" ) # inpB is not provided so outB not in the command line - assert shelly.cmdline == f"executable inpA -o {str(shelly.output_dir / 'inpA_out')}" + assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" assert shelly.output_names == ["return_code", "stdout", "stderr", "outA", "outB"] @@ -1151,7 +1161,7 @@ def test_shell_cmd_inputs_template_6(): shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA="inpA" ) - assert shelly.cmdline == f"executable inpA -o {str(shelly.output_dir / 'inpA_out')}" + assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # a string is provided for outA, so this should be used as the outA value shelly = ShellCommandTask( @@ -1163,7 +1173,7 @@ def test_shell_cmd_inputs_template_6(): shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True ) - assert shelly.cmdline == f"executable inpA -o {str(shelly.output_dir / 'inpA_out')}" + assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used shelly = ShellCommandTask( @@ -1225,7 +1235,7 @@ def test_shell_cmd_inputs_template_6a(): shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA="inpA", outA=True ) - assert shelly.cmdline == f"executable inpA -o {str(shelly.output_dir / 'inpA_out')}" + assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" # False is provided for outA, so the outA shouldn't be used shelly = ShellCommandTask( @@ -1234,7 +1244,7 @@ def test_shell_cmd_inputs_template_6a(): assert shelly.cmdline == "executable inpA" -def test_shell_cmd_inputs_template_7(tmpdir): +def test_shell_cmd_inputs_template_7(tmp_path: Path): """additional inputs uses output_file_template with a suffix (no extension) no keep_extension is used """ @@ -1269,8 +1279,8 @@ def test_shell_cmd_inputs_template_7(tmpdir): bases=(ShellSpec,), ) - inpA_file = tmpdir.join("a_file.txt") - inpA_file.write("content") + inpA_file = tmp_path / "a_file.txt" + inpA_file.write_text("content") shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file ) @@ -1278,11 +1288,11 @@ def test_shell_cmd_inputs_template_7(tmpdir): # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmpdir.join('a_file.txt')} {str(shelly.output_dir / 'a_file_out.txt')}" + == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out.txt'}" ) -def test_shell_cmd_inputs_template_7a(tmpdir): +def test_shell_cmd_inputs_template_7a(tmp_path: Path): """additional inputs uses output_file_template with a suffix (no extension) keep_extension is True (as default) """ @@ -1318,8 +1328,8 @@ def test_shell_cmd_inputs_template_7a(tmpdir): bases=(ShellSpec,), ) - inpA_file = tmpdir.join("a_file.txt") - inpA_file.write("content") + inpA_file = tmp_path / "a_file.txt" + inpA_file.write_text("content") shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file ) @@ -1327,11 +1337,11 @@ def test_shell_cmd_inputs_template_7a(tmpdir): # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmpdir.join('a_file.txt')} {str(shelly.output_dir / 'a_file_out.txt')}" + == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out.txt'}" ) -def test_shell_cmd_inputs_template_7b(tmpdir): +def test_shell_cmd_inputs_template_7b(tmp_path: Path): """additional inputs uses output_file_template with a suffix (no extension) keep extension is False (so the extension is removed when creating the output) """ @@ -1367,8 +1377,8 @@ def test_shell_cmd_inputs_template_7b(tmpdir): bases=(ShellSpec,), ) - inpA_file = tmpdir.join("a_file.txt") - inpA_file.write("content") + inpA_file = tmp_path / "a_file.txt" + inpA_file.write_text("content") shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file ) @@ -1376,11 +1386,11 @@ def test_shell_cmd_inputs_template_7b(tmpdir): # outA should be formatted in a way that that .txt goes to the end assert ( shelly.cmdline - == f"executable {tmpdir.join('a_file.txt')} {str(shelly.output_dir / 'a_file_out')}" + == f"executable {tmp_path / 'a_file.txt'} {shelly.output_dir / 'a_file_out'}" ) -def test_shell_cmd_inputs_template_8(tmpdir): +def test_shell_cmd_inputs_template_8(tmp_path: Path): """additional inputs uses output_file_template with a suffix and an extension""" my_input_spec = SpecInfo( name="Input", @@ -1413,8 +1423,8 @@ def test_shell_cmd_inputs_template_8(tmpdir): bases=(ShellSpec,), ) - inpA_file = tmpdir.join("a_file.t") - inpA_file.write("content") + inpA_file = tmp_path / "a_file.t" + inpA_file.write_text("content") shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file ) @@ -1422,11 +1432,11 @@ def test_shell_cmd_inputs_template_8(tmpdir): # outA should be formatted in a way that inpA extension is removed and the template extension is used assert ( shelly.cmdline - == f"executable {tmpdir.join('a_file.t')} {str(shelly.output_dir / 'a_file_out.txt')}" + == f"executable {tmp_path / 'a_file.t'} {shelly.output_dir / 'a_file_out.txt'}" ) -def test_shell_cmd_inputs_template_9(tmpdir): +def test_shell_cmd_inputs_template_9(tmp_path: Path): """additional inputs, one uses output_file_template with two fields: one File and one ints - the output should be recreated from the template """ @@ -1473,8 +1483,8 @@ def test_shell_cmd_inputs_template_9(tmpdir): bases=(ShellSpec,), ) - inpA_file = tmpdir.join("inpA.t") - inpA_file.write("content") + inpA_file = tmp_path / "inpA.t" + inpA_file.write_text("content") shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpInt=3 @@ -1482,13 +1492,13 @@ def test_shell_cmd_inputs_template_9(tmpdir): assert ( shelly.cmdline - == f"executable {tmpdir.join('inpA.t')} -i 3 -o {str(shelly.output_dir / 'inpA_3_out.txt')}" + == f"executable {tmp_path / 'inpA.t'} -i 3 -o {shelly.output_dir / 'inpA_3_out.txt'}" ) # checking if outA in the output fields assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] -def test_shell_cmd_inputs_template_9a(tmpdir): +def test_shell_cmd_inputs_template_9a(tmp_path: Path): """additional inputs, one uses output_file_template with two fields: one file and one string without extension - should be fine """ @@ -1535,8 +1545,8 @@ def test_shell_cmd_inputs_template_9a(tmpdir): bases=(ShellSpec,), ) - inpA_file = tmpdir.join("inpA.t") - inpA_file.write("content") + inpA_file = tmp_path / "inpA.t" + inpA_file.write_text("content") shelly = ShellCommandTask( executable="executable", input_spec=my_input_spec, inpA=inpA_file, inpStr="hola" @@ -1544,13 +1554,13 @@ def test_shell_cmd_inputs_template_9a(tmpdir): assert ( shelly.cmdline - == f"executable {tmpdir.join('inpA.t')} -i hola -o {str(shelly.output_dir / 'inpA_hola_out.txt')}" + == f"executable {tmp_path / 'inpA.t'} -i hola -o {shelly.output_dir / 'inpA_hola_out.txt'}" ) # checking if outA in the output fields assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] -def test_shell_cmd_inputs_template_9b_err(tmpdir): +def test_shell_cmd_inputs_template_9b_err(tmp_path: Path): """output_file_template with two fields that are both Files, an exception should be raised """ @@ -1597,11 +1607,11 @@ def test_shell_cmd_inputs_template_9b_err(tmpdir): bases=(ShellSpec,), ) - inpA_file = tmpdir.join("inpA.t") - inpA_file.write("content") + inpA_file = tmp_path / "inpA.t" + inpA_file.write_text("content") - inpFile_file = tmpdir.join("inpFile.t") - inpFile_file.write("content") + inpFile_file = tmp_path / "inpFile.t" + inpFile_file.write_text("content") shelly = ShellCommandTask( executable="executable", @@ -1614,7 +1624,7 @@ def test_shell_cmd_inputs_template_9b_err(tmpdir): shelly.cmdline -def test_shell_cmd_inputs_template_9c_err(tmpdir): +def test_shell_cmd_inputs_template_9c_err(tmp_path: Path): """output_file_template with two fields: a file and a string with extension, that should be used as an additional file and the exception should be raised """ @@ -1661,8 +1671,8 @@ def test_shell_cmd_inputs_template_9c_err(tmpdir): bases=(ShellSpec,), ) - inpA_file = tmpdir.join("inpA.t") - inpA_file.write("content") + inpA_file = tmp_path / "inpA.t" + inpA_file.write_text("content") shelly = ShellCommandTask( executable="executable", @@ -1714,13 +1724,63 @@ def test_shell_cmd_inputs_template_10(): ) # outA has argstr in the metadata fields, so it's a part of the command line # the full path will be use din the command line - assert ( - shelly.cmdline == f"executable 3.3 -o {str(shelly.output_dir / 'file_3.3_out')}" - ) + assert shelly.cmdline == f"executable 3.3 -o {shelly.output_dir / 'file_3.3_out'}" # checking if outA in the output fields assert shelly.output_names == ["return_code", "stdout", "stderr", "outA"] +def test_shell_cmd_inputs_template_requires_1(): + """Given an input specification with a templated output file subject to required fields, + ensure the field is set only when all requirements are met.""" + + my_input_spec = SpecInfo( + name="Input", + fields=[ + ( + "in_file", + attr.ib( + type=str, + metadata={ + "help_string": "input file", + "mandatory": True, + "argstr": "", + }, + ), + ), + ( + "with_tpl", + attr.ib( + type=bool, + metadata={"help_string": "enable template"}, + ), + ), + ( + "out_file", + attr.ib( + type=str, + metadata={ + "help_string": "output file", + "argstr": "--tpl", + "output_file_template": "tpl.{in_file}", + "requires": {"with_tpl"}, + }, + ), + ), + ], + bases=(ShellSpec,), + ) + + # When requirements are not met. + shelly = ShellCommandTask( + executable="cmd", input_spec=my_input_spec, in_file="in.file" + ) + assert "--tpl" not in shelly.cmdline + + # When requirements are met. + shelly.inputs.with_tpl = True + assert "tpl.in.file" in shelly.cmdline + + def test_shell_cmd_inputs_template_function_1(): """one input field uses output_file_template that is a simple function this can be easily done by simple template as in test_shell_cmd_inputs_template_1 @@ -1765,7 +1825,7 @@ def template_fun(inputs): executable="executable", input_spec=my_input_spec, inpA="inpA" ) - assert shelly.cmdline == f"executable inpA -o {str(shelly.output_dir / 'inpA_out')}" + assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_out'}" def test_shell_cmd_inputs_template_function_2(): @@ -1828,7 +1888,7 @@ def template_fun(inputs): inpB=1, ) - assert shelly.cmdline == f"executable inpA -o {str(shelly.output_dir / 'inpA_odd')}" + assert shelly.cmdline == f"executable inpA -o {shelly.output_dir / 'inpA_odd'}" def test_shell_cmd_inputs_template_1_st(): @@ -1867,22 +1927,23 @@ def test_shell_cmd_inputs_template_1_st(): ) inpA = ["inpA_1", "inpA_2"] - shelly = ShellCommandTask( + ShellCommandTask( name="f", executable="executable", input_spec=my_input_spec, - inpA=inpA, - ).split("inpA") + ).split("inpA", inpA=inpA) # cmdline_list = shelly.cmdline # assert len(cmdline_list) == 2 # for i in range(2): # path_out = Path(shelly.output_dir[i]) / f"{inpA[i]}_out" - # assert cmdline_list[i] == f"executable {inpA[i]} -o {str(path_out)}" + # assert cmdline_list[i] == f"executable {inpA[i]} -o {path_out}" # TODO: after deciding how we use requires/templates -def test_shell_cmd_inputs_di(tmpdir, use_validator): +def test_shell_cmd_inputs_denoise_image( + tmp_path, +): """example from #279""" my_input_spec = SpecInfo( name="Input", @@ -2060,8 +2121,8 @@ def test_shell_cmd_inputs_di(tmpdir, use_validator): bases=(ShellSpec,), ) - my_input_file = tmpdir.join("a_file.ext") - my_input_file.write("content") + my_input_file = tmp_path / "a_file.ext" + my_input_file.write_text("content") # no input provided shelly = ShellCommandTask(executable="DenoiseImage", input_spec=my_input_spec) @@ -2077,7 +2138,7 @@ def test_shell_cmd_inputs_di(tmpdir, use_validator): ) assert ( shelly.cmdline - == f"DenoiseImage -i {tmpdir.join('a_file.ext')} -s 1 -p 1 -r 2 -o [{str(shelly.output_dir / 'a_file_out.ext')}]" + == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{shelly.output_dir / 'a_file_out.ext'}]" ) # input file name, noiseImage is set to True, so template is used in the output @@ -2088,8 +2149,8 @@ def test_shell_cmd_inputs_di(tmpdir, use_validator): noiseImage=True, ) assert ( - shelly.cmdline == f"DenoiseImage -i {tmpdir.join('a_file.ext')} -s 1 -p 1 -r 2 " - f"-o [{str(shelly.output_dir / 'a_file_out.ext')}, {str(shelly.output_dir / 'a_file_noise.ext')}]" + shelly.cmdline == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 " + f"-o [{shelly.output_dir / 'a_file_out.ext'}, {str(shelly.output_dir / 'a_file_noise.ext')}]" ) # input file name and help_short @@ -2101,7 +2162,7 @@ def test_shell_cmd_inputs_di(tmpdir, use_validator): ) assert ( shelly.cmdline - == f"DenoiseImage -i {tmpdir.join('a_file.ext')} -s 1 -p 1 -r 2 -h -o [{str(shelly.output_dir / 'a_file_out.ext')}]" + == f"DenoiseImage -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -h -o [{shelly.output_dir / 'a_file_out.ext'}]" ) assert shelly.output_names == [ @@ -2121,7 +2182,7 @@ def test_shell_cmd_inputs_di(tmpdir, use_validator): ) assert ( shelly.cmdline - == f"DenoiseImage -d 2 -i {tmpdir.join('a_file.ext')} -s 1 -p 1 -r 2 -o [{str(shelly.output_dir / 'a_file_out.ext')}]" + == f"DenoiseImage -d 2 -i {tmp_path / 'a_file.ext'} -s 1 -p 1 -r 2 -o [{shelly.output_dir / 'a_file_out.ext'}]" ) # adding image_dimensionality that has allowed_values [2, 3, 4] and providing 5 - exception should be raised @@ -2204,7 +2265,8 @@ def test_task_inputs_mandatory_with_xOR_zero_mandatory_raises_error(): task.inputs.input_2 = attr.NOTHING with pytest.raises(Exception) as excinfo: task.inputs.check_fields_input_spec() - assert "input_1 is mandatory, but no value provided" in str(excinfo.value) + assert "input_1 is mandatory" in str(excinfo.value) + assert "no alternative provided by ['input_2', 'input_3']" in str(excinfo.value) assert excinfo.type is AttributeError @@ -2216,9 +2278,7 @@ def test_task_inputs_mandatory_with_xOR_two_mandatories_raises_error(): with pytest.raises(Exception) as excinfo: task.inputs.check_fields_input_spec() - assert "input_2 is mutually exclusive with ('input_1', 'input_2'" in str( - excinfo.value - ) + assert "input_1 is mutually exclusive with ['input_2']" in str(excinfo.value) assert excinfo.type is AttributeError @@ -2231,7 +2291,7 @@ def test_task_inputs_mandatory_with_xOR_3_mandatories_raises_error(): with pytest.raises(Exception) as excinfo: task.inputs.check_fields_input_spec() - assert "input_2 is mutually exclusive with ('input_1', 'input_2', 'input_3'" in str( + assert "input_1 is mutually exclusive with ['input_2', 'input_3']" in str( excinfo.value ) assert excinfo.type is AttributeError diff --git a/pydra/engine/tests/test_singularity.py b/pydra/engine/tests/test_singularity.py index 2beb7218f5..791575adc1 100644 --- a/pydra/engine/tests/test_singularity.py +++ b/pydra/engine/tests/test_singularity.py @@ -1,12 +1,13 @@ -import os, shutil +import shutil import subprocess as sp import pytest import attr -from ..task import SingularityTask, DockerTask, ShellCommandTask +from ..task import ShellCommandTask from ..submitter import Submitter from ..core import Workflow -from ..specs import ShellOutSpec, SpecInfo, File, SingularitySpec +from ..specs import ShellOutSpec, SpecInfo, File, ShellSpec +from ..environments import Singularity need_docker = pytest.mark.skipif( @@ -23,37 +24,41 @@ @need_singularity -def test_singularity_1_nosubm(tmpdir): +def test_singularity_1_nosubm(tmp_path): """simple command in a container, a default bindings and working directory is added no submitter """ cmd = "pwd" image = "docker://alpine" - singu = SingularityTask(name="singu", executable=cmd, image=image, cache_dir=tmpdir) - assert singu.inputs.image == "docker://alpine" - assert singu.inputs.container == "singularity" - assert ( - singu.cmdline - == f"singularity exec -B {singu.output_dir}:/output_pydra:rw --pwd /output_pydra {image} {cmd}" + singu = ShellCommandTask( + name="singu", + executable=cmd, + environment=Singularity(image=image), + cache_dir=tmp_path, ) + assert singu.environment.image == "docker://alpine" + assert isinstance(singu.environment, Singularity) + assert singu.cmdline == cmd res = singu() - assert "output_pydra" in res.output.stdout + assert "/mnt/pydra" in res.output.stdout assert res.output.return_code == 0 @need_singularity -def test_singularity_2_nosubm(tmpdir): +def test_singularity_2_nosubm(tmp_path): """a command with arguments, cmd and args given as executable no submitter """ cmd = ["echo", "hail", "pydra"] image = "docker://alpine" - singu = SingularityTask(name="singu", executable=cmd, image=image, cache_dir=tmpdir) - assert ( - singu.cmdline - == f"singularity exec -B {singu.output_dir}:/output_pydra:rw --pwd /output_pydra {image} {' '.join(cmd)}" + singu = ShellCommandTask( + name="singu", + executable=cmd, + environment=Singularity(image=image), + cache_dir=tmp_path, ) + assert singu.cmdline == " ".join(cmd) res = singu() assert res.output.stdout.strip() == " ".join(cmd[1:]) @@ -61,52 +66,30 @@ def test_singularity_2_nosubm(tmpdir): @need_singularity -def test_singularity_2(plugin, tmpdir): +def test_singularity_2(plugin, tmp_path): """a command with arguments, cmd and args given as executable using submitter """ cmd = ["echo", "hail", "pydra"] image = "docker://alpine" - singu = SingularityTask(name="singu", executable=cmd, image=image, cache_dir=tmpdir) - assert ( - singu.cmdline - == f"singularity exec -B {singu.output_dir}:/output_pydra:rw --pwd /output_pydra {image} {' '.join(cmd)}" - ) - with Submitter(plugin=plugin) as sub: - singu(submitter=sub) - res = singu.result() - assert res.output.stdout.strip() == " ".join(cmd[1:]) - assert res.output.return_code == 0 - - -@need_singularity -def test_singularity_2_singuflag(plugin, tmpdir): - """a command with arguments, cmd and args given as executable - using ShellComandTask with container_info=("singularity", image) - """ - cmd = ["echo", "hail", "pydra"] - image = "docker://alpine" - shingu = ShellCommandTask( - name="shingu", + singu = ShellCommandTask( + name="singu", executable=cmd, - container_info=("singularity", image), - cache_dir=tmpdir, - ) - assert ( - shingu.cmdline - == f"singularity exec -B {shingu.output_dir}:/output_pydra:rw --pwd /output_pydra {image} {' '.join(cmd)}" + environment=Singularity(image=image), + cache_dir=tmp_path, ) + assert singu.cmdline == " ".join(cmd) with Submitter(plugin=plugin) as sub: - shingu(submitter=sub) - res = shingu.result() + singu(submitter=sub) + res = singu.result() assert res.output.stdout.strip() == " ".join(cmd[1:]) assert res.output.return_code == 0 @need_singularity -def test_singularity_2a(plugin, tmpdir): +def test_singularity_2a(plugin, tmp_path): """a command with arguments, using executable and args using submitter """ @@ -114,13 +97,14 @@ def test_singularity_2a(plugin, tmpdir): cmd_args = ["hail", "pydra"] # separate command into exec + args image = "docker://alpine" - singu = SingularityTask( - name="singu", executable=cmd_exec, args=cmd_args, image=image, cache_dir=tmpdir - ) - assert ( - singu.cmdline - == f"singularity exec -B {singu.output_dir}:/output_pydra:rw --pwd /output_pydra {image} {cmd_exec} {' '.join(cmd_args)}" + singu = ShellCommandTask( + name="singu", + executable=cmd_exec, + args=cmd_args, + environment=Singularity(image=image), + cache_dir=tmp_path, ) + assert singu.cmdline == f"{cmd_exec} {' '.join(cmd_args)}" with Submitter(plugin=plugin) as sub: singu(submitter=sub) @@ -129,151 +113,44 @@ def test_singularity_2a(plugin, tmpdir): assert res.output.return_code == 0 -@need_singularity -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_singularity_3(plugin, tmpdir): - """a simple command in container with bindings, - creating directory in tmp dir and checking if it is in the container - """ - # creating a new directory - tmpdir.mkdir("new_dir") - cmd = ["ls", "/tmp_dir"] - image = "docker://alpine" - singu = SingularityTask(name="singu", executable=cmd, image=image, cache_dir=tmpdir) - # binding tmp directory to the container - singu.inputs.bindings = [(str(tmpdir), "/tmp_dir", "ro")] - - with Submitter(plugin=plugin) as sub: - singu(submitter=sub) - - res = singu.result() - assert "new_dir\n" in res.output.stdout - assert res.output.return_code == 0 - - -@need_singularity -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_singularity_3_singuflag(plugin, tmpdir): - """a simple command in container with bindings, - creating directory in tmp dir and checking if it is in the container - using ShellComandTask with container_info=("singularity", image) - """ - # creating a new directory - tmpdir.mkdir("new_dir") - cmd = ["ls", "/tmp_dir"] - image = "docker://alpine" - shingu = SingularityTask( - name="singu", - executable=cmd, - container_info=("singularity", image), - cache_dir=tmpdir, - ) - # binding tmp directory to the container - shingu.inputs.bindings = [(str(tmpdir), "/tmp_dir", "ro")] - - with Submitter(plugin=plugin) as sub: - shingu(submitter=sub) - - res = shingu.result() - assert "new_dir\n" in res.output.stdout - assert res.output.return_code == 0 - - -@need_singularity -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_singularity_3_singuflagbind(plugin, tmpdir): - """a simple command in container with bindings, - creating directory in tmp dir and checking if it is in the container - using ShellComandTask with container_info=("singularity", image, bindings) - """ - # creating a new directory - tmpdir.mkdir("new_dir") - cmd = ["ls", "/tmp_dir"] - image = "docker://alpine" - shingu = SingularityTask( - name="singu", - executable=cmd, - container_info=("singularity", image, [(str(tmpdir), "/tmp_dir", "ro")]), - cache_dir=tmpdir, - ) - - with Submitter(plugin=plugin) as sub: - shingu(submitter=sub) - - res = shingu.result() - assert "new_dir\n" in res.output.stdout - assert res.output.return_code == 0 - - # tests with State @need_singularity -def test_singularity_st_1(plugin, tmpdir): +def test_singularity_st_1(plugin, tmp_path): """commands without arguments in container splitter = executable """ cmd = ["pwd", "ls"] image = "docker://alpine" - singu = SingularityTask( - name="singu", executable=cmd, image=image, cache_dir=tmpdir - ).split("executable") + singu = ShellCommandTask( + name="singu", environment=Singularity(image=image), cache_dir=tmp_path + ).split("executable", executable=cmd) assert singu.state.splitter == "singu.executable" res = singu(plugin=plugin) - assert "/output_pydra" in res[0].output.stdout + assert "/mnt/pydra" in res[0].output.stdout assert res[1].output.stdout == "" assert res[0].output.return_code == res[1].output.return_code == 0 -@need_singularity -def test_singularity_st_2(plugin, tmpdir): - """command with arguments in docker, checking the distribution - splitter = image - """ - cmd = ["cat", "/etc/issue"] - image = ["docker://alpine", "docker://ubuntu"] - singu = SingularityTask( - name="singu", executable=cmd, image=image, cache_dir=tmpdir - ).split("image") - assert singu.state.splitter == "singu.image" - - res = singu(plugin=plugin) - assert "Alpine" in res[0].output.stdout - assert "Ubuntu" in res[1].output.stdout - assert res[0].output.return_code == res[1].output.return_code == 0 - - -@need_singularity -def test_singularity_st_3(plugin, tmpdir): - """outer splitter image and executable""" - cmd = ["pwd", ["cat", "/etc/issue"]] - image = ["docker://alpine", "docker://ubuntu"] - singu = SingularityTask( - name="singu", executable=cmd, image=image, cache_dir=tmpdir - ).split(["image", "executable"]) - assert singu.state.splitter == ["singu.image", "singu.executable"] - res = singu(plugin=plugin) - - assert "/output_pydra" in res[0].output.stdout - assert "Alpine" in res[1].output.stdout - assert "/output_pydra" in res[2].output.stdout - assert "Ubuntu" in res[3].output.stdout - - @need_singularity @need_slurm +@pytest.mark.skip(reason="TODO, xfail incorrect") @pytest.mark.xfail( reason="slurm can complain if the number of submitted jobs exceeds the limit" ) @pytest.mark.parametrize("n", [10, 50, 100]) -def test_singularity_st_4(tmpdir, n): +def test_singularity_st_2(tmp_path, n): """splitter over args (checking bigger splitters if slurm available)""" args_n = list(range(n)) image = "docker://alpine" - singu = SingularityTask( - name="singu", executable="echo", image=image, cache_dir=tmpdir, args=args_n - ).split("args") + singu = ShellCommandTask( + name="singu", + executable="echo", + environment=Singularity(image=image), + cache_dir=tmp_path, + ).split("args", args=args_n) assert singu.state.splitter == "singu.args" res = singu(plugin="slurm") assert "1" in res[1].output.stdout @@ -281,95 +158,11 @@ def test_singularity_st_4(tmpdir, n): assert res[0].output.return_code == res[1].output.return_code == 0 -@need_singularity -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_wf_singularity_1(plugin, tmpdir): - """a workflow with two connected task - the first one read the file that is bounded to the container, - the second uses echo - """ - with open(tmpdir.join("file_pydra.txt"), "w") as f: - f.write("hello from pydra") - - image = "docker://alpine" - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"], cache_dir=tmpdir) - wf.inputs.cmd1 = ["cat", "/tmp_dir/file_pydra.txt"] - wf.inputs.cmd2 = ["echo", "message from the previous task:"] - wf.add( - SingularityTask( - name="singu_cat", - image=image, - executable=wf.lzin.cmd1, - bindings=[(str(tmpdir), "/tmp_dir", "ro")], - strip=True, - ) - ) - wf.add( - SingularityTask( - name="singu_echo", - image=image, - executable=wf.lzin.cmd2, - args=wf.singu_cat.lzout.stdout, - strip=True, - ) - ) - wf.set_output([("out", wf.singu_echo.lzout.stdout)]) - - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() - assert res.output.out == "message from the previous task: hello from pydra" - - -@need_docker -@need_singularity -@pytest.mark.skip(reason="we probably don't want to support bindings as an input") -def test_wf_singularity_1a(plugin, tmpdir): - """a workflow with two connected task - using both containers: Docker and Singul. - the first one read the file that is bounded to the container, - the second uses echo - """ - with open(tmpdir.join("file_pydra.txt"), "w") as f: - f.write("hello from pydra") - - image_sing = "docker://alpine" - image_doc = "ubuntu" - wf = Workflow(name="wf", input_spec=["cmd1", "cmd2"], cache_dir=tmpdir) - wf.inputs.cmd1 = ["cat", "/tmp_dir/file_pydra.txt"] - wf.inputs.cmd2 = ["echo", "message from the previous task:"] - wf.add( - SingularityTask( - name="singu_cat", - image=image_sing, - executable=wf.lzin.cmd1, - bindings=[(str(tmpdir), "/tmp_dir", "ro")], - strip=True, - ) - ) - wf.add( - DockerTask( - name="singu_echo", - image=image_doc, - executable=wf.lzin.cmd2, - args=wf.singu_cat.lzout.stdout, - strip=True, - ) - ) - wf.set_output([("out", wf.singu_echo.lzout.stdout)]) - - with Submitter(plugin=plugin) as sub: - wf(submitter=sub) - - res = wf.result() - assert res.output.out == "message from the previous task: hello from pydra" - - # tests with customized output_spec @need_singularity -def test_singularity_outputspec_1(plugin, tmpdir): +def test_singularity_outputspec_1(plugin, tmp_path): """ customised output_spec, adding files to the output, providing specific pathname output_path is automatically added to the bindings @@ -382,12 +175,12 @@ def test_singularity_outputspec_1(plugin, tmpdir): fields=[("newfile", File, "newfile_tmp.txt")], bases=(ShellOutSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, output_spec=my_output_spec, - cache_dir=tmpdir, + cache_dir=tmp_path, ) with Submitter(plugin=plugin) as sub: @@ -395,16 +188,16 @@ def test_singularity_outputspec_1(plugin, tmpdir): res = singu.result() assert res.output.stdout == "" - assert res.output.newfile.exists() + assert res.output.newfile.fspath.exists() # tests with customised input_spec @need_singularity -def test_singularity_inputspec_1(plugin, tmpdir): +def test_singularity_inputspec_1(plugin, tmp_path): """a simple customized input spec for singularity task""" - filename = str(tmpdir.join("file_pydra.txt")) + filename = str((tmp_path / "file_pydra.txt")) with open(filename, "w") as f: f.write("hello from pydra") @@ -427,17 +220,17 @@ def test_singularity_inputspec_1(plugin, tmpdir): ), ) ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, file=filename, input_spec=my_input_spec, strip=True, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = singu() @@ -445,11 +238,11 @@ def test_singularity_inputspec_1(plugin, tmpdir): @need_singularity -def test_singularity_inputspec_1a(plugin, tmpdir): +def test_singularity_inputspec_1a(plugin, tmp_path): """a simple customized input spec for singularity task a default value is used """ - filename = str(tmpdir.join("file_pydra.txt")) + filename = str((tmp_path / "file_pydra.txt")) with open(filename, "w") as f: f.write("hello from pydra") @@ -468,16 +261,16 @@ def test_singularity_inputspec_1a(plugin, tmpdir): ), ) ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, input_spec=my_input_spec, strip=True, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = singu() @@ -485,13 +278,13 @@ def test_singularity_inputspec_1a(plugin, tmpdir): @need_singularity -def test_singularity_inputspec_2(plugin, tmpdir): +def test_singularity_inputspec_2(plugin, tmp_path): """a customized input spec with two fields for singularity task""" - filename_1 = tmpdir.join("file_pydra.txt") + filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") - filename_2 = tmpdir.join("file_nice.txt") + filename_2 = tmp_path / "file_nice.txt" with open(filename_2, "w") as f: f.write("have a nice one") @@ -525,17 +318,17 @@ def test_singularity_inputspec_2(plugin, tmpdir): ), ), ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, file1=filename_1, input_spec=my_input_spec, strip=True, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = singu() @@ -543,14 +336,14 @@ def test_singularity_inputspec_2(plugin, tmpdir): @need_singularity -def test_singularity_inputspec_2a_except(plugin, tmpdir): +def test_singularity_inputspec_2a_except(plugin, tmp_path): """a customized input spec with two fields first one uses a default, and second doesn't - raises a dataclass exception """ - filename_1 = tmpdir.join("file_pydra.txt") + filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") - filename_2 = tmpdir.join("file_nice.txt") + filename_2 = tmp_path / "file_nice.txt" with open(filename_2, "w") as f: f.write("have a nice one") @@ -585,32 +378,32 @@ def test_singularity_inputspec_2a_except(plugin, tmpdir): ), ), ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, file2=filename_2, input_spec=my_input_spec, strip=True, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = singu() assert res.output.stdout == "hello from pydra\nhave a nice one" @need_singularity -def test_singularity_inputspec_2a(plugin, tmpdir): +def test_singularity_inputspec_2a(plugin, tmp_path): """a customized input spec with two fields first one uses a default value, this is fine even if the second field is not using any defaults """ - filename_1 = tmpdir.join("file_pydra.txt") + filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") - filename_2 = tmpdir.join("file_nice.txt") + filename_2 = tmp_path / "file_nice.txt" with open(filename_2, "w") as f: f.write("have a nice one") @@ -645,17 +438,17 @@ def test_singularity_inputspec_2a(plugin, tmpdir): ), ), ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, file2=filename_2, input_spec=my_input_spec, strip=True, - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = singu() @@ -663,12 +456,12 @@ def test_singularity_inputspec_2a(plugin, tmpdir): @need_singularity -def test_singularity_cmd_inputspec_copyfile_1(plugin, tmpdir): +def test_singularity_cmd_inputspec_copyfile_1(plugin, tmp_path): """shelltask changes a file in place, adding copyfile=True to the file-input from input_spec hardlink or copy in the output_dir should be created """ - file = tmpdir.join("file_pydra.txt") + file = tmp_path / "file_pydra.txt" with open(file, "w") as f: f.write("hello from pydra\n") @@ -702,23 +495,23 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmpdir): ), ), ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, input_spec=my_input_spec, orig_file=str(file), - cache_dir=tmpdir, + cache_dir=tmp_path, ) res = singu() assert res.output.stdout == "" - assert res.output.out_file.exists() + assert res.output.out_file.fspath.exists() # the file is copied, and than it is changed in place - assert res.output.out_file.parent == singu.output_dir + assert res.output.out_file.fspath.parent == singu.output_dir with open(res.output.out_file) as f: assert "hi from pydra\n" == f.read() # the original file is unchanged @@ -727,14 +520,14 @@ def test_singularity_cmd_inputspec_copyfile_1(plugin, tmpdir): @need_singularity -def test_singularity_inputspec_state_1(plugin, tmpdir): +def test_singularity_inputspec_state_1(tmp_path): """a customised input spec for a singularity file with a splitter, splitter is on files """ - filename_1 = tmpdir.join("file_pydra.txt") + filename_1 = tmp_path / "file_pydra.txt" with open(filename_1, "w") as f: f.write("hello from pydra\n") - filename_2 = tmpdir.join("file_nice.txt") + filename_2 = tmp_path / "file_nice.txt" with open(filename_2, "w") as f: f.write("have a nice one") @@ -758,18 +551,17 @@ def test_singularity_inputspec_state_1(plugin, tmpdir): ), ) ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, - file=filename, input_spec=my_input_spec, strip=True, - cache_dir=tmpdir, - ).split("file") + cache_dir=tmp_path, + ).split("file", file=filename) res = singu() assert res[0].output.stdout == "hello from pydra" @@ -777,13 +569,13 @@ def test_singularity_inputspec_state_1(plugin, tmpdir): @need_singularity -def test_singularity_inputspec_state_1b(plugin, tmpdir): +def test_singularity_inputspec_state_1b(plugin, tmp_path): """a customised input spec for a singularity file with a splitter, files from the input spec have the same path in the local os and the container, so hash is calculated and the test works fine """ - file_1 = tmpdir.join("file_pydra.txt") - file_2 = tmpdir.join("file_nice.txt") + file_1 = tmp_path / "file_pydra.txt" + file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: f.write("hello from pydra") with open(file_2, "w") as f: @@ -809,18 +601,17 @@ def test_singularity_inputspec_state_1b(plugin, tmpdir): ), ) ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=cmd, - file=filename, input_spec=my_input_spec, strip=True, - cache_dir=tmpdir, - ).split("file") + cache_dir=tmp_path, + ).split("file", file=filename) res = singu() assert res[0].output.stdout == "hello from pydra" @@ -828,9 +619,9 @@ def test_singularity_inputspec_state_1b(plugin, tmpdir): @need_singularity -def test_singularity_wf_inputspec_1(plugin, tmpdir): +def test_singularity_wf_inputspec_1(plugin, tmp_path): """a customized input spec for workflow with singularity tasks""" - filename = tmpdir.join("file_pydra.txt") + filename = tmp_path / "file_pydra.txt" with open(filename, "w") as f: f.write("hello from pydra") @@ -853,16 +644,16 @@ def test_singularity_wf_inputspec_1(plugin, tmpdir): ), ) ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmpdir) + wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) wf.inputs.cmd = cmd wf.inputs.file = filename - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=wf.lzin.cmd, file=wf.lzin.file, input_spec=my_input_spec, @@ -872,7 +663,7 @@ def test_singularity_wf_inputspec_1(plugin, tmpdir): wf.set_output([("out", wf.singu.lzout.stdout)]) - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="serial") as sub: wf(submitter=sub) res = wf.result() @@ -880,10 +671,10 @@ def test_singularity_wf_inputspec_1(plugin, tmpdir): @need_singularity -def test_singularity_wf_state_inputspec_1(plugin, tmpdir): +def test_singularity_wf_state_inputspec_1(plugin, tmp_path): """a customized input spec for workflow with singularity tasks that has a state""" - file_1 = tmpdir.join("file_pydra.txt") - file_2 = tmpdir.join("file_nice.txt") + file_1 = tmp_path / "file_pydra.txt" + file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: f.write("hello from pydra") with open(file_2, "w") as f: @@ -909,23 +700,22 @@ def test_singularity_wf_state_inputspec_1(plugin, tmpdir): ), ) ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmpdir) + wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) wf.inputs.cmd = cmd - wf.inputs.file = filename - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=wf.lzin.cmd, file=wf.lzin.file, input_spec=my_input_spec, strip=True, ) wf.add(singu) - wf.split("file") + wf.split("file", file=filename) wf.set_output([("out", wf.singu.lzout.stdout)]) @@ -938,10 +728,10 @@ def test_singularity_wf_state_inputspec_1(plugin, tmpdir): @need_singularity -def test_singularity_wf_ndst_inputspec_1(plugin, tmpdir): +def test_singularity_wf_ndst_inputspec_1(plugin, tmp_path): """a customized input spec for workflow with singularity tasks with states""" - file_1 = tmpdir.join("file_pydra.txt") - file_2 = tmpdir.join("file_nice.txt") + file_1 = tmp_path / "file_pydra.txt" + file_2 = tmp_path / "file_nice.txt" with open(file_1, "w") as f: f.write("hello from pydra") with open(file_2, "w") as f: @@ -967,21 +757,20 @@ def test_singularity_wf_ndst_inputspec_1(plugin, tmpdir): ), ) ], - bases=(SingularitySpec,), + bases=(ShellSpec,), ) - wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmpdir) + wf = Workflow(name="wf", input_spec=["cmd", "file"], cache_dir=tmp_path) wf.inputs.cmd = cmd wf.inputs.file = filename - singu = SingularityTask( + singu = ShellCommandTask( name="singu", - image=image, + environment=Singularity(image=image), executable=wf.lzin.cmd, - file=wf.lzin.file, input_spec=my_input_spec, strip=True, - ).split("file") + ).split("file", file=wf.lzin.file) wf.add(singu) wf.set_output([("out", wf.singu.lzout.stdout)]) diff --git a/pydra/engine/tests/test_specs.py b/pydra/engine/tests/test_specs.py index a0af757546..4f54cd4041 100644 --- a/pydra/engine/tests/test_specs.py +++ b/pydra/engine/tests/test_specs.py @@ -1,29 +1,32 @@ from pathlib import Path import typing as ty +import os +import attrs from copy import deepcopy +import time from ..specs import ( BaseSpec, SpecInfo, File, - RuntimeSpec, Runtime, Result, ShellSpec, - ContainerSpec, - DockerSpec, - SingularitySpec, + # ContainerSpec, + LazyIn, + LazyOut, LazyField, + StateArray, ) from ..helpers import make_klass +from .utils import foo +from pydra import mark, Workflow import pytest def test_basespec(): spec = BaseSpec() - assert ( - spec.hash == "44136fa355b3678a1146ad16f7e8649e94fb4fc21fe77e8310c060f61caaff8a" - ) + assert spec.hash == "0b1d98df22ecd1733562711c205abca2" def test_runtime(): @@ -38,7 +41,7 @@ def test_result(): assert hasattr(result, "runtime") assert hasattr(result, "output") assert hasattr(result, "errored") - assert getattr(result, "errored") == False + assert getattr(result, "errored") is False def test_shellspec(): @@ -49,50 +52,27 @@ def test_shellspec(): assert hasattr(spec, "args") -container_attrs = ["image", "container", "container_xargs"] - - -def test_container(): - with pytest.raises(TypeError): - spec = ContainerSpec() - spec = ContainerSpec( - executable="ls", image="busybox", container="docker" - ) # (execute, args, image, cont) - assert all([hasattr(spec, attr) for attr in container_attrs]) - assert hasattr(spec, "executable") - - -def test_docker(): - with pytest.raises(TypeError): - spec = DockerSpec(executable="ls") - spec = DockerSpec(executable="ls", image="busybox") - assert all(hasattr(spec, attr) for attr in container_attrs) - assert getattr(spec, "container") == "docker" - - -def test_singularity(): - with pytest.raises(TypeError): - spec = SingularitySpec() - spec = SingularitySpec(executable="ls", image="busybox") - assert all(hasattr(spec, attr) for attr in container_attrs) - assert getattr(spec, "container") == "singularity" - - class NodeTesting: + @attrs.define() + class Input: + inp_a: str = "A" + inp_b: str = "B" + def __init__(self): - class Input: + class InpSpec: def __init__(self): - self.inp_a = "A" - self.inp_b = "B" + self.fields = [("inp_a", int), ("inp_b", int)] - class InpSpec: + class OutSpec: def __init__(self): - self.fields = [("inp_a", None), ("inp_b", None)] + self.fields = [("out_a", int)] self.name = "tn" - self.inputs = Input() + self.inputs = self.Input() self.input_spec = InpSpec() + self.output_spec = OutSpec() self.output_names = ["out_a"] + self.state = None def result(self, state_index=None): class Output: @@ -123,65 +103,49 @@ def __init__(self): def test_lazy_inp(): tn = NodeTesting() - lf = LazyField(node=tn, attr_type="input") - - with pytest.raises(Exception): - lf.get_value(wf=WorkflowTesting()) + lzin = LazyIn(task=tn) - lf.inp_a + lf = lzin.inp_a assert lf.get_value(wf=WorkflowTesting()) == "A" - lf.inp_b + lf = lzin.inp_b assert lf.get_value(wf=WorkflowTesting()) == "B" def test_lazy_out(): tn = NodeTesting() - lf = LazyField(node=tn, attr_type="output") - - lf.out_a + lzout = LazyOut(task=tn) + lf = lzout.out_a assert lf.get_value(wf=WorkflowTesting()) == "OUT_A" -def test_laxy_errorattr(): - with pytest.raises(Exception) as excinfo: - tn = NodeTesting() - lf = LazyField(node=tn, attr_type="out") - assert "LazyField: Unknown attr_type:" in str(excinfo.value) - - def test_lazy_getvale(): tn = NodeTesting() - lf = LazyField(node=tn, attr_type="input") + lf = LazyIn(task=tn) with pytest.raises(Exception) as excinfo: lf.inp_c assert str(excinfo.value) == "Task tn has no input attribute inp_c" -def test_input_file_hash_1(tmpdir): - tmpdir.chdir() +def test_input_file_hash_1(tmp_path): + os.chdir(tmp_path) outfile = "test.file" fields = [("in_file", ty.Any)] input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) inputs = make_klass(input_spec) - assert ( - inputs(in_file=outfile).hash - == "1384a1eb11cd94a5b826a82b948313b9237a0956d406ccff59e79ec92b3c935f" - ) - with open(outfile, "wt") as fp: + assert inputs(in_file=outfile).hash == "9a106eb2830850834d9b5bf098d5fa85" + + with open(outfile, "w") as fp: fp.write("test") fields = [("in_file", File)] input_spec = SpecInfo(name="Inputs", fields=fields, bases=(BaseSpec,)) inputs = make_klass(input_spec) - assert ( - inputs(in_file=outfile).hash - == "088625131e6718a00170ad445a9c295244dffd4e5d847c8ee4b1606d623dacb1" - ) + assert inputs(in_file=outfile).hash == "0e9306e5cae1de1b4dff1f27cca03bce" -def test_input_file_hash_2(tmpdir): +def test_input_file_hash_2(tmp_path): """input spec with File types, checking when the checksum changes""" - file = tmpdir.join("in_file_1.txt") + file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") @@ -190,26 +154,27 @@ def test_input_file_hash_2(tmpdir): # checking specific hash value hash1 = inputs(in_file=file).hash - assert hash1 == "5d2870a7376150274eac72115fbf211792a8e5f250f220b3cc11bfc1851e4b53" + assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb" # checking if different name doesn't affect the hash - file_diffname = tmpdir.join("in_file_2.txt") + file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") hash2 = inputs(in_file=file_diffname).hash assert hash1 == hash2 # checking if different content (the same name) affects the hash - file_diffcontent = tmpdir.join("in_file_1.txt") + time.sleep(2) # ensure mtime is different + file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") hash3 = inputs(in_file=file_diffcontent).hash assert hash1 != hash3 -def test_input_file_hash_2a(tmpdir): +def test_input_file_hash_2a(tmp_path): """input spec with ty.Union[File, ...] type, checking when the checksum changes""" - file = tmpdir.join("in_file_1.txt") + file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") @@ -220,30 +185,31 @@ def test_input_file_hash_2a(tmpdir): # checking specific hash value hash1 = inputs(in_file=file).hash - assert hash1 == "5d2870a7376150274eac72115fbf211792a8e5f250f220b3cc11bfc1851e4b53" + assert hash1 == "17e4e2b4d8ce8f36bf3fd65804958dbb" # checking if different name doesn't affect the hash - file_diffname = tmpdir.join("in_file_2.txt") + file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") hash2 = inputs(in_file=file_diffname).hash assert hash1 == hash2 # checking if different content (the same name) affects the hash - file_diffcontent = tmpdir.join("in_file_1.txt") + time.sleep(2) # ensure mtime is different + file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") hash3 = inputs(in_file=file_diffcontent).hash assert hash1 != hash3 # checking if string is also accepted - hash4 = inputs(in_file="ala").hash - assert hash4 == "004060c4475e8874c5fa55c6fffbe67f9ec8a81d578ea1b407dd77186f4d61c2" + hash4 = inputs(in_file=str(file)).hash + assert hash4 == "aee7c7ae25509fb4c92a081d58d17a67" -def test_input_file_hash_3(tmpdir): +def test_input_file_hash_3(tmp_path): """input spec with File types, checking when the hash and file_hash change""" - file = tmpdir.join("in_file_1.txt") + file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") @@ -255,50 +221,51 @@ def test_input_file_hash_3(tmpdir): my_inp = inputs(in_file=file, in_int=3) # original hash and files_hash (dictionary contains info about files) hash1 = my_inp.hash - files_hash1 = deepcopy(my_inp.files_hash) + # files_hash1 = deepcopy(my_inp.files_hash) # file name should be in files_hash1[in_file] filename = str(Path(file)) - assert filename in files_hash1["in_file"] + # assert filename in files_hash1["in_file"] # changing int input my_inp.in_int = 5 hash2 = my_inp.hash - files_hash2 = deepcopy(my_inp.files_hash) + # files_hash2 = deepcopy(my_inp.files_hash) # hash should be different assert hash1 != hash2 # files_hash should be the same, and the tuple for filename shouldn't be recomputed - assert files_hash1 == files_hash2 - assert id(files_hash1["in_file"][filename]) == id(files_hash2["in_file"][filename]) + # assert files_hash1 == files_hash2 + # assert id(files_hash1["in_file"][filename]) == id(files_hash2["in_file"][filename]) # recreating the file + time.sleep(2) # ensure mtime is different with open(file, "w") as f: f.write("hello") hash3 = my_inp.hash - files_hash3 = deepcopy(my_inp.files_hash) + # files_hash3 = deepcopy(my_inp.files_hash) # hash should be the same, # but the entry for in_file in files_hash should be different (modification time) assert hash3 == hash2 - assert files_hash3["in_file"][filename] != files_hash2["in_file"][filename] + # assert files_hash3["in_file"][filename] != files_hash2["in_file"][filename] # different timestamp - assert files_hash3["in_file"][filename][0] != files_hash2["in_file"][filename][0] + # assert files_hash3["in_file"][filename][0] != files_hash2["in_file"][filename][0] # the same content hash - assert files_hash3["in_file"][filename][1] == files_hash2["in_file"][filename][1] + # assert files_hash3["in_file"][filename][1] == files_hash2["in_file"][filename][1] # setting the in_file again my_inp.in_file = file # filename should be removed from files_hash - assert my_inp.files_hash["in_file"] == {} + # assert my_inp.files_hash["in_file"] == {} # will be saved again when hash is calculated assert my_inp.hash == hash3 - assert filename in my_inp.files_hash["in_file"] + # assert filename in my_inp.files_hash["in_file"] -def test_input_file_hash_4(tmpdir): +def test_input_file_hash_4(tmp_path): """input spec with nested list, that contain ints and Files, checking changes in checksums """ - file = tmpdir.join("in_file_1.txt") + file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") @@ -311,58 +278,122 @@ def test_input_file_hash_4(tmpdir): # checking specific hash value hash1 = inputs(in_file=[[file, 3]]).hash - assert hash1 == "507d81adc3f2f468e82c27ac800d16f6beae4f24f69daaab1d04f52b32b4514d" + assert hash1 == "11b7e9c90bc8d9dc5ccfc8d4526ba091" # the same file, but int field changes hash1a = inputs(in_file=[[file, 5]]).hash assert hash1 != hash1a # checking if different name doesn't affect the hash - file_diffname = tmpdir.join("in_file_2.txt") + file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") hash2 = inputs(in_file=[[file_diffname, 3]]).hash assert hash1 == hash2 # checking if different content (the same name) affects the hash - file_diffcontent = tmpdir.join("in_file_1.txt") + time.sleep(2) # need the mtime to be different + file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") hash3 = inputs(in_file=[[file_diffcontent, 3]]).hash assert hash1 != hash3 -def test_input_file_hash_5(tmpdir): +def test_input_file_hash_5(tmp_path): """input spec with File in nested containers, checking changes in checksums""" - file = tmpdir.join("in_file_1.txt") + file = tmp_path / "in_file_1.txt" with open(file, "w") as f: f.write("hello") input_spec = SpecInfo( name="Inputs", - fields=[("in_file", ty.List[ty.Dict[ty.Any, File]])], + fields=[("in_file", ty.List[ty.Dict[ty.Any, ty.Union[File, int]]])], bases=(BaseSpec,), ) inputs = make_klass(input_spec) # checking specific hash value hash1 = inputs(in_file=[{"file": file, "int": 3}]).hash - assert hash1 == "e0555e78a40a02611674b0f48da97cdd28eee7e9885ecc17392b560c14826f06" + assert hash1 == "5fd53b79e55bbf62a4bb3027eb753a2c" # the same file, but int field changes hash1a = inputs(in_file=[{"file": file, "int": 5}]).hash assert hash1 != hash1a # checking if different name doesn't affect the hash - file_diffname = tmpdir.join("in_file_2.txt") + file_diffname = tmp_path / "in_file_2.txt" with open(file_diffname, "w") as f: f.write("hello") hash2 = inputs(in_file=[{"file": file_diffname, "int": 3}]).hash assert hash1 == hash2 # checking if different content (the same name) affects the hash - file_diffcontent = tmpdir.join("in_file_1.txt") + time.sleep(2) # ensure mtime is different + file_diffcontent = tmp_path / "in_file_1.txt" with open(file_diffcontent, "w") as f: f.write("hi") hash3 = inputs(in_file=[{"file": file_diffcontent, "int": 3}]).hash assert hash1 != hash3 + + +def test_lazy_field_cast(): + task = foo(a="a", b=1, c=2.0, name="foo") + + assert task.lzout.y.type == int + assert task.lzout.y.cast(float).type == float + + +def test_lazy_field_multi_same_split(): + @mark.task + def f(x: ty.List[int]) -> ty.List[int]: + return x + + task = f(x=[1, 2, 3], name="foo") + + lf = task.lzout.out.split("foo.x") + + assert lf.type == StateArray[int] + assert lf.splits == set([(("foo.x",),)]) + + lf2 = lf.split("foo.x") + assert lf2.type == StateArray[int] + assert lf2.splits == set([(("foo.x",),)]) + + +def test_lazy_field_multi_diff_split(): + @mark.task + def f(x: ty.Any, y: ty.Any) -> ty.Any: + return x + + task = f(x=[1, 2, 3], name="foo") + + lf = task.lzout.out.split("foo.x") + + assert lf.type == StateArray[ty.Any] + assert lf.splits == set([(("foo.x",),)]) + + lf2 = lf.split("foo.x") + assert lf2.type == StateArray[ty.Any] + assert lf2.splits == set([(("foo.x",),)]) + + lf3 = lf.split("foo.y") + assert lf3.type == StateArray[StateArray[ty.Any]] + assert lf3.splits == set([(("foo.x",),), (("foo.y",),)]) + + +def test_wf_lzin_split(): + @mark.task + def identity(x: int) -> int: + return x + + inner = Workflow(name="inner", input_spec=["x"]) + inner.add(identity(x=inner.lzin.x, name="f")) + inner.set_output(("out", inner.f.lzout.out)) + + outer = Workflow(name="outer", input_spec=["x"]) + outer.add(inner.split(x=outer.lzin.x)) + outer.set_output(("out", outer.inner.lzout.out)) + + result = outer(x=[1, 2, 3]) + assert result.output.out == StateArray([1, 2, 3]) diff --git a/pydra/engine/tests/test_state.py b/pydra/engine/tests/test_state.py index 93a71793f3..c8ef0941ca 100644 --- a/pydra/engine/tests/test_state.py +++ b/pydra/engine/tests/test_state.py @@ -97,13 +97,13 @@ def test_state_1( def test_state_2_err(): with pytest.raises(PydraStateError) as exinfo: - st = State("NA", splitter={"a"}) + State("NA", splitter={"a"}) assert "splitter has to be a string, a tuple or a list" == str(exinfo.value) def test_state_3_err(): with pytest.raises(PydraStateError) as exinfo: - st = State("NA", splitter=["a", "b"], combiner=("a", "b")) + State("NA", splitter=["a", "b"], combiner=("a", "b")) assert "combiner has to be a string or a list" == str(exinfo.value) @@ -516,7 +516,7 @@ def test_state_connect_1a(): def test_state_connect_1b_exception(): """can't provide explicitly NA.a (should be _NA)""" - st1 = State(name="NA", splitter="a", other_states={}) + State(name="NA", splitter="a", other_states={}) st2 = State(name="NB", splitter="NA.a") with pytest.raises(PydraStateError) as excinfo: st2.splitter_validation() @@ -526,7 +526,7 @@ def test_state_connect_1b_exception(): @pytest.mark.parametrize("splitter2, other_states2", [("_NA", {}), ("_N", {"NA": ()})]) def test_state_connect_1c_exception(splitter2, other_states2): """can't ask for splitter from node that is not connected""" - with pytest.raises(PydraStateError) as excinfo: + with pytest.raises(PydraStateError): st2 = State(name="NB", splitter=splitter2, other_states=other_states2) st2.splitter_validation() @@ -1136,7 +1136,7 @@ def test_state_connect_innerspl_1b(): """incorrect splitter - the current & prev-state parts in scalar splitter""" with pytest.raises(PydraStateError): st1 = State(name="NA", splitter="a") - st2 = State(name="NB", splitter=("_NA", "b"), other_states={"NA": (st1, "b")}) + State(name="NB", splitter=("_NA", "b"), other_states={"NA": (st1, "b")}) def test_state_connect_innerspl_2(): @@ -1511,7 +1511,7 @@ def test_state_combine_1(): assert st.splitter_rpn == ["NA.a"] assert st.current_combiner == st.current_combiner_all == st.combiner == ["NA.a"] assert st.prev_state_combiner == st.prev_state_combiner_all == [] - assert st.splitter_final == None + assert st.splitter_final is None assert st.splitter_rpn_final == [] st.prepare_states(inputs={"NA.a": [3, 5]}) @@ -2174,7 +2174,7 @@ def test_connect_splitters( ) def test_connect_splitters_exception_1(splitter, other_states): with pytest.raises(PydraStateError) as excinfo: - st = State(name="CN", splitter=splitter, other_states=other_states) + State(name="CN", splitter=splitter, other_states=other_states) assert "prev-state and current splitters are mixed" in str(excinfo.value) @@ -2191,7 +2191,7 @@ def test_connect_splitters_exception_2(): def test_connect_splitters_exception_3(): with pytest.raises(PydraStateError) as excinfo: - st = State( + State( name="CN", splitter="_NB", other_states=["NA", (State(name="NA", splitter="a"), "b")], diff --git a/pydra/engine/tests/test_submitter.py b/pydra/engine/tests/test_submitter.py index d1823f1fc9..298e7e74b4 100644 --- a/pydra/engine/tests/test_submitter.py +++ b/pydra/engine/tests/test_submitter.py @@ -1,11 +1,15 @@ from dateutil import parser +import secrets import re -import shutil import subprocess as sp import time - +import attrs +import typing as ty +from random import randint +import os +from unittest.mock import patch import pytest - +from fileformats.generic import Directory from .utils import ( need_sge, need_slurm, @@ -13,8 +17,9 @@ gen_basic_wf_with_threadcount, gen_basic_wf_with_threadcount_concurrent, ) -from ..core import Workflow +from ..core import Workflow, TaskBase from ..submitter import Submitter +from ..workers import SerialWorker from ... import mark from pathlib import Path from datetime import datetime @@ -157,8 +162,7 @@ def test_wf_with_state(plugin_dask_opt, tmpdir): wf.add(sleep_add_one(name="taska", x=wf.lzin.x)) wf.add(sleep_add_one(name="taskb", x=wf.taska.lzout.out)) - wf.inputs.x = [1, 2, 3] - wf.split("x") + wf.split("x", x=[1, 2, 3]) wf.set_output([("out", wf.taskb.lzout.out)]) wf.cache_dir = tmpdir @@ -217,8 +221,7 @@ def test_slurm_wf_cf(tmpdir): @need_slurm def test_slurm_wf_state(tmpdir): wf = gen_basic_wf() - wf.split("x") - wf.inputs.x = [5, 6] + wf.split("x", x=[5, 6]) wf.cache_dir = tmpdir with Submitter("slurm") as sub: sub(wf) @@ -404,7 +407,6 @@ def test_sge_wf(tmpdir): assert res.output.out == 9 script_dir = tmpdir / "SGEWorker_scripts" assert script_dir.exists() - sdirs = [sd for sd in script_dir.listdir() if sd.isdir()] # ensure each task was executed with sge assert len([sd for sd in script_dir.listdir() if sd.isdir()]) == 2 @@ -521,18 +523,18 @@ def test_sge_limit_maxthreads(tmpdir): sp.run(["qacct", "-j", jobids[3]], capture_output=True).stdout.decode().strip() ) - out_job0_dict = qacct_output_to_dict(out_job0) + qacct_output_to_dict(out_job0) out_job1_dict = qacct_output_to_dict(out_job1) out_job2_dict = qacct_output_to_dict(out_job2) - out_job3_dict = qacct_output_to_dict(out_job3) + qacct_output_to_dict(out_job3) job_1_endtime = datetime.strptime( - out_job1_dict["end_time"][0], f"%a %b %d %H:%M:%S %Y" + out_job1_dict["end_time"][0], "%a %b %d %H:%M:%S %Y" ) # Running both task_1_1 and task_1_2 at once would exceed max_threads, # so task_1_2 waits for task_1_1 to complete job_2_starttime = datetime.strptime( - out_job2_dict["start_time"][0], f"%a %b %d %H:%M:%S %Y" + out_job2_dict["start_time"][0], "%a %b %d %H:%M:%S %Y" ) assert job_1_endtime < job_2_starttime @@ -562,16 +564,170 @@ def test_sge_no_limit_maxthreads(tmpdir): sp.run(["qacct", "-j", jobids[2]], capture_output=True).stdout.decode().strip() ) - out_job0_dict = qacct_output_to_dict(out_job0) + qacct_output_to_dict(out_job0) out_job1_dict = qacct_output_to_dict(out_job1) out_job2_dict = qacct_output_to_dict(out_job2) job_1_endtime = datetime.strptime( - out_job1_dict["end_time"][0], f"%a %b %d %H:%M:%S %Y" + out_job1_dict["end_time"][0], "%a %b %d %H:%M:%S %Y" ) # Running both task_1_1 and task_1_2 at once would not exceed max_threads, # so task_1_2 does not wait for task_1_1 to complete job_2_starttime = datetime.strptime( - out_job2_dict["start_time"][0], f"%a %b %d %H:%M:%S %Y" + out_job2_dict["start_time"][0], "%a %b %d %H:%M:%S %Y" ) assert job_1_endtime > job_2_starttime + + +def test_hash_changes_in_task_inputs_file(tmp_path): + @mark.task + def output_dir_as_input(out_dir: Directory) -> Directory: + (out_dir.fspath / "new-file.txt").touch() + return out_dir + + task = output_dir_as_input(out_dir=tmp_path) + with pytest.raises(RuntimeError, match="Input field hashes have changed"): + task() + + +def test_hash_changes_in_task_inputs_unstable(tmp_path): + @attrs.define + class Unstable: + value: int # type: ignore + + def __bytes_repr__(self, cache) -> ty.Iterator[bytes]: + """Random 128-bit bytestring""" + yield secrets.token_bytes(16) + + @mark.task + def unstable_input(unstable: Unstable) -> int: + return unstable.value + + task = unstable_input(unstable=Unstable(1)) + with pytest.raises(RuntimeError, match="Input field hashes have changed"): + task() + + +def test_hash_changes_in_workflow_inputs(tmp_path): + @mark.task + def output_dir_as_output(out_dir: Path) -> Directory: + (out_dir / "new-file.txt").touch() + return out_dir + + wf = Workflow( + name="test_hash_change", input_spec={"in_dir": Directory}, in_dir=tmp_path + ) + wf.add(output_dir_as_output(out_dir=wf.lzin.in_dir, name="task")) + wf.set_output(("out_dir", wf.task.lzout.out)) + with pytest.raises(RuntimeError, match="Input field hashes have changed.*Workflow"): + wf() + + +def test_hash_changes_in_workflow_graph(tmpdir): + class X: + """Dummy class with unstable hash (i.e. which isn't altered in a node in which + it is an input)""" + + value = 1 + + def __bytes_repr__(self, cache): + """Bytes representation from class attribute, which will be changed be + 'alter_x" node. + + NB: this is a contrived example where the bytes_repr implementation returns + a bytes representation of a class attribute in order to trigger the exception, + hopefully cases like this will be very rare""" + yield bytes(self.value) + + @mark.task + @mark.annotate({"return": {"x": X, "y": int}}) + def identity(x: X) -> ty.Tuple[X, int]: + return x, 99 + + @mark.task + def alter_x(y): + X.value = 2 + return y + + @mark.task + def to_tuple(x, y): + return (x, y) + + wf = Workflow(name="wf_with_blocked_tasks", input_spec=["x", "y"]) + wf.add(identity(name="taska", x=wf.lzin.x)) + wf.add(alter_x(name="taskb", y=wf.taska.lzout.y)) + wf.add(to_tuple(name="taskc", x=wf.taska.lzout.x, y=wf.taskb.lzout.out)) + wf.set_output([("out", wf.taskc.lzout.out)]) + + wf.inputs.x = X() + + wf.cache_dir = tmpdir + + with pytest.raises( + RuntimeError, match="Graph of 'wf_with_blocked_tasks' workflow is not empty" + ): + with Submitter("cf") as sub: + result = sub(wf) + + +@mark.task +def to_tuple(x, y): + return (x, y) + + +class BYOAddVarWorker(SerialWorker): + """A dummy worker that adds 1 to the output of the task""" + + plugin_name = "byo_add_env_var" + + def __init__(self, add_var, **kwargs): + super().__init__(**kwargs) + self.add_var = add_var + + async def exec_serial(self, runnable, rerun=False, environment=None): + if isinstance(runnable, TaskBase): + with patch.dict(os.environ, {"BYO_ADD_VAR": str(self.add_var)}): + result = runnable._run(rerun, environment=environment) + return result + else: # it could be tuple that includes pickle files with tasks and inputs + return super().exec_serial(runnable, rerun, environment) + + +@mark.task +def add_env_var_task(x: int) -> int: + return x + int(os.environ.get("BYO_ADD_VAR", 0)) + + +def test_byo_worker(): + + task1 = add_env_var_task(x=1) + + with Submitter(plugin=BYOAddVarWorker, add_var=10) as sub: + assert sub.plugin == "byo_add_env_var" + result = task1(submitter=sub) + + assert result.output.out == 11 + + task2 = add_env_var_task(x=2) + + with Submitter(plugin="serial") as sub: + result = task2(submitter=sub) + + assert result.output.out == 2 + + +def test_bad_builtin_worker(): + + with pytest.raises(NotImplementedError, match="No worker for 'bad-worker' plugin"): + Submitter(plugin="bad-worker") + + +def test_bad_byo_worker(): + + class BadWorker: + pass + + with pytest.raises( + ValueError, match="Worker class must have a 'plugin_name' str attribute" + ): + Submitter(plugin=BadWorker) diff --git a/pydra/engine/tests/test_task.py b/pydra/engine/tests/test_task.py index 2762bcf950..0d666574e3 100644 --- a/pydra/engine/tests/test_task.py +++ b/pydra/engine/tests/test_task.py @@ -4,14 +4,13 @@ import pytest import cloudpickle as cp from pathlib import Path -import re import json import glob as glob from ... import mark from ..core import Workflow -from ..task import AuditFlag, ShellCommandTask, DockerTask, SingularityTask +from ..task import AuditFlag, ShellCommandTask from ...utils.messenger import FileMessenger, PrintMessenger, collect_messages -from .utils import gen_basic_wf, use_validator, Submitter +from .utils import gen_basic_wf from ..specs import ( MultiInputObj, MultiOutputObj, @@ -21,7 +20,8 @@ ShellSpec, File, ) -from ..helpers import hash_file +from ...utils.hash import hash_function + no_win = pytest.mark.skipif( sys.platform.startswith("win"), @@ -43,14 +43,14 @@ def test_output(): def test_name_conflict(): """raise error if task name conflicts with a class attribute or method""" with pytest.raises(ValueError) as excinfo1: - nn = funaddtwo(name="split", a=3) + funaddtwo(name="split", a=3) assert "Cannot use names of attributes or methods" in str(excinfo1.value) with pytest.raises(ValueError) as excinfo2: - nn = funaddtwo(name="checksum", a=3) + funaddtwo(name="checksum", a=3) assert "Cannot use names of attributes or methods" in str(excinfo2.value) -def test_numpy(use_validator): +def test_numpy(): """checking if mark.task works for numpy functions""" np = pytest.importorskip("numpy") fft = mark.annotate({"a": np.ndarray, "return": np.ndarray})(np.fft.fft) @@ -70,7 +70,7 @@ def test_checksum(): ) -def test_annotated_func(use_validator): +def test_annotated_func(): @mark.task def testfunc( a: int, b: float = 0.1 @@ -108,17 +108,17 @@ def testfunc( "Input Parameters:", "- a: int", "- b: float (default: 0.1)", - "- _func: str", + "- _func: bytes", "Output Parameters:", "- out_out: float", ] -def test_annotated_func_dictreturn(use_validator): +def test_annotated_func_dictreturn(): """Test mapping from returned dictionary to output spec.""" @mark.task - @mark.annotate({"return": {"sum": int, "mul": int}}) + @mark.annotate({"return": {"sum": int, "mul": ty.Optional[int]}}) def testfunc(a: int, b: int): return dict(sum=a + b, diff=a - b) @@ -128,14 +128,14 @@ def testfunc(a: int, b: int): # Part of the annotation and returned, should be exposed to output. assert result.output.sum == 5 - # Part of the annotation but not returned, should be coalesced to None. + # Part of the annotation but not returned, should be coalesced to None assert result.output.mul is None # Not part of the annotation, should be discarded. assert not hasattr(result.output, "diff") -def test_annotated_func_multreturn(use_validator): +def test_annotated_func_multreturn(): """the function has two elements in the return statement""" @mark.task @@ -167,14 +167,14 @@ def testfunc( "Help for FunctionTask", "Input Parameters:", "- a: float", - "- _func: str", + "- _func: bytes", "Output Parameters:", "- fractional: float", "- integer: int", ] -def test_annotated_input_func_1(use_validator): +def test_annotated_input_func_1(): """the function with annotated input (float)""" @mark.task @@ -185,7 +185,7 @@ def testfunc(a: float): assert getattr(funky.inputs, "a") == 3.5 -def test_annotated_input_func_2(use_validator): +def test_annotated_input_func_2(): """the function with annotated input (int, but float provided)""" @mark.task @@ -193,10 +193,10 @@ def testfunc(a: int): return a with pytest.raises(TypeError): - funky = testfunc(a=3.5) + testfunc(a=3.5) -def test_annotated_input_func_2a(use_validator): +def test_annotated_input_func_2a(): """the function with annotated input (int, but float provided)""" @mark.task @@ -208,7 +208,7 @@ def testfunc(a: int): funky.inputs.a = 3.5 -def test_annotated_input_func_3(use_validator): +def test_annotated_input_func_3(): """the function with annotated input (list)""" @mark.task @@ -230,7 +230,7 @@ def testfunc(a: ty.List[float]): assert getattr(funky.inputs, "a") == [1.0, 3.5] -def test_annotated_input_func_3b(use_validator): +def test_annotated_input_func_3b(): """the function with annotated input (list of floats - int and float provided, should be fine) """ @@ -243,7 +243,7 @@ def testfunc(a: ty.List[float]): assert getattr(funky.inputs, "a") == [1, 3.5] -def test_annotated_input_func_3c_excep(use_validator): +def test_annotated_input_func_3c_excep(): """the function with annotated input (list of ints - int and float provided, should raise an error) """ @@ -253,10 +253,10 @@ def testfunc(a: ty.List[int]): return sum(a) with pytest.raises(TypeError): - funky = testfunc(a=[1, 3.5]) + testfunc(a=[1, 3.5]) -def test_annotated_input_func_4(use_validator): +def test_annotated_input_func_4(): """the function with annotated input (dictionary)""" @mark.task @@ -267,7 +267,7 @@ def testfunc(a: dict): assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} -def test_annotated_input_func_4a(use_validator): +def test_annotated_input_func_4a(): """the function with annotated input (dictionary of floats)""" @mark.task @@ -278,7 +278,7 @@ def testfunc(a: ty.Dict[str, float]): assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} -def test_annotated_input_func_4b_excep(use_validator): +def test_annotated_input_func_4b_excep(): """the function with annotated input (dictionary of ints, but float provided)""" @mark.task @@ -286,24 +286,24 @@ def testfunc(a: ty.Dict[str, int]): return sum(a.values()) with pytest.raises(TypeError): - funky = testfunc(a={"el1": 1, "el2": 3.5}) + testfunc(a={"el1": 1, "el2": 3.5}) -def test_annotated_input_func_5(use_validator): +def test_annotated_input_func_5(): """the function with annotated more complex input type (ty.List in ty.Dict) the validator should simply check if values of dict are lists so no error for 3.5 """ @mark.task - def testfunc(a: ty.Dict[str, ty.List[int]]): + def testfunc(a: ty.Dict[str, ty.List]): return sum(a["el1"]) funky = testfunc(a={"el1": [1, 3.5]}) assert getattr(funky.inputs, "a") == {"el1": [1, 3.5]} -def test_annotated_input_func_5a_except(use_validator): +def test_annotated_input_func_5a_except(): """the function with annotated more complex input type (ty.Dict in ty.Dict) list is provided as a dict value (instead a dict), so error is raised """ @@ -313,10 +313,10 @@ def testfunc(a: ty.Dict[str, ty.Dict[str, float]]): return sum(a["el1"]) with pytest.raises(TypeError): - funky = testfunc(a={"el1": [1, 3.5]}) + testfunc(a={"el1": [1, 3.5]}) -def test_annotated_input_func_6(use_validator): +def test_annotated_input_func_6(): """the function with annotated more complex input type (ty.Union in ty.Dict) the validator should unpack values from the Union """ @@ -329,7 +329,7 @@ def testfunc(a: ty.Dict[str, ty.Union[float, int]]): assert getattr(funky.inputs, "a") == {"el1": 1, "el2": 3.5} -def test_annotated_input_func_6a_excep(use_validator): +def test_annotated_input_func_6a_excep(): """the function with annotated more complex input type (ty.Union in ty.Dict) the validator should unpack values from the Union and raise an error for 3.5 """ @@ -339,10 +339,10 @@ def testfunc(a: ty.Dict[str, ty.Union[str, int]]): return sum(a["el1"]) with pytest.raises(TypeError): - funky = testfunc(a={"el1": 1, "el2": 3.5}) + testfunc(a={"el1": 1, "el2": 3.5}) -def test_annotated_input_func_7(use_validator): +def test_annotated_input_func_7(): """the function with annotated input (float) the task has a splitter, so list of float is provided it should work, the validator tries to guess if this is a field with a splitter @@ -352,11 +352,11 @@ def test_annotated_input_func_7(use_validator): def testfunc(a: float): return a - funky = testfunc(a=[3.5, 2.1]).split("a") + funky = testfunc().split("a", a=[3.5, 2.1]) assert getattr(funky.inputs, "a") == [3.5, 2.1] -def test_annotated_input_func_7a_excep(use_validator): +def test_annotated_input_func_7a_excep(): """the function with annotated input (int) and splitter list of float provided - should raise an error (list of int would be fine) """ @@ -366,7 +366,7 @@ def testfunc(a: int): return a with pytest.raises(TypeError): - funky = testfunc(a=[3.5, 2.1]).split("a") + testfunc(a=[3.5, 2.1]).split("a") def test_annotated_input_func_8(): @@ -417,7 +417,7 @@ def testfunc(a: MultiInputObj): assert res.output.out == 1 -def test_annotated_func_multreturn_exception(use_validator): +def test_annotated_func_multreturn_exception(): """function has two elements in the return statement, but three element provided in the spec - should raise an error """ @@ -473,7 +473,7 @@ def testfunc(a, b) -> int: "Input Parameters:", "- a: _empty", "- b: _empty", - "- _func: str", + "- _func: bytes", "Output Parameters:", "- out: int", ] @@ -514,7 +514,7 @@ def testfunc(a, b) -> (int, int): "Input Parameters:", "- a: _empty", "- b: _empty", - "- _func: str", + "- _func: bytes", "Output Parameters:", "- out1: int", "- out2: int", @@ -582,7 +582,7 @@ def no_annots(c, d): assert result.output.out == (20.2, 13.8) -def test_input_spec_func_1(use_validator): +def test_input_spec_func_1(): """the function w/o annotated, but input_spec is used""" @mark.task @@ -599,7 +599,7 @@ def testfunc(a): assert getattr(funky.inputs, "a") == 3.5 -def test_input_spec_func_1a_except(use_validator): +def test_input_spec_func_1a_except(): """the function w/o annotated, but input_spec is used a TypeError is raised (float is provided instead of int) """ @@ -614,10 +614,10 @@ def testfunc(a): bases=(FunctionSpec,), ) with pytest.raises(TypeError): - funky = testfunc(a=3.5, input_spec=my_input_spec) + testfunc(a=3.5, input_spec=my_input_spec) -def test_input_spec_func_1b_except(use_validator): +def test_input_spec_func_1b_except(): """the function w/o annotated, but input_spec is used metadata checks raise an error """ @@ -637,10 +637,10 @@ def testfunc(a): bases=(FunctionSpec,), ) with pytest.raises(AttributeError, match="only these keys are supported"): - funky = testfunc(a=3.5, input_spec=my_input_spec) + testfunc(a=3.5, input_spec=my_input_spec) -def test_input_spec_func_1d_except(use_validator): +def test_input_spec_func_1d_except(): """the function w/o annotated, but input_spec is used input_spec doesn't contain 'a' input, an error is raised """ @@ -655,7 +655,7 @@ def testfunc(a): funky() -def test_input_spec_func_2(use_validator): +def test_input_spec_func_2(): """the function with annotation, and the task has input_spec, input_spec changes the type of the input (so error is not raised) """ @@ -674,7 +674,7 @@ def testfunc(a: int): assert getattr(funky.inputs, "a") == 3.5 -def test_input_spec_func_2a(use_validator): +def test_input_spec_func_2a(): """the function with annotation, and the task has input_spec, input_spec changes the type of the input (so error is not raised) using the shorter syntax @@ -694,7 +694,7 @@ def testfunc(a: int): assert getattr(funky.inputs, "a") == 3.5 -def test_input_spec_func_3(use_validator): +def test_input_spec_func_3(): """the function w/o annotated, but input_spec is used additional keys (allowed_values) are used in metadata """ @@ -721,7 +721,7 @@ def testfunc(a): assert getattr(funky.inputs, "a") == 2 -def test_input_spec_func_3a_except(use_validator): +def test_input_spec_func_3a_except(): """the function w/o annotated, but input_spec is used allowed_values is used in metadata and the ValueError is raised """ @@ -745,10 +745,10 @@ def testfunc(a): ) with pytest.raises(ValueError, match="value of a has to be"): - funky = testfunc(a=3, input_spec=my_input_spec) + testfunc(a=3, input_spec=my_input_spec) -def test_input_spec_func_4(use_validator): +def test_input_spec_func_4(): """the function with a default value for b but b is set as mandatory in the input_spec, so error is raised if not provided """ @@ -781,7 +781,7 @@ def testfunc(a, b=1): funky() -def test_input_spec_func_4a(use_validator): +def test_input_spec_func_4a(): """the function with a default value for b and metadata in the input_spec has a different default value, so value from the function is overwritten """ @@ -827,12 +827,12 @@ def testfunc(a): ) funky = testfunc(a=3.5, input_spec=my_input_spec) - assert getattr(funky.inputs, "a") == [3.5] + assert getattr(funky.inputs, "a") == MultiInputObj([3.5]) res = funky() assert res.output.out == 1 -def test_output_spec_func_1(use_validator): +def test_output_spec_func_1(): """the function w/o annotated, but output_spec is used""" @mark.task @@ -850,7 +850,7 @@ def testfunc(a): assert res.output.out1 == 3.5 -def test_output_spec_func_1a_except(use_validator): +def test_output_spec_func_1a_except(): """the function w/o annotated, but output_spec is used float returned instead of int - TypeError """ @@ -867,10 +867,10 @@ def testfunc(a): funky = testfunc(a=3.5, output_spec=my_output_spec) with pytest.raises(TypeError): - res = funky() + funky() -def test_output_spec_func_2(use_validator): +def test_output_spec_func_2(): """the function w/o annotated, but output_spec is used output_spec changes the type of the output (so error is not raised) """ @@ -890,7 +890,7 @@ def testfunc(a) -> int: assert res.output.out1 == 3.5 -def test_output_spec_func_2a(use_validator): +def test_output_spec_func_2a(): """the function w/o annotated, but output_spec is used output_spec changes the type of the output (so error is not raised) using a shorter syntax @@ -911,7 +911,7 @@ def testfunc(a) -> int: assert res.output.out1 == 3.5 -def test_output_spec_func_3(use_validator): +def test_output_spec_func_3(): """the function w/o annotated, but output_spec is used MultiOutputObj is used, output is a 2-el list, so converter doesn't do anything """ @@ -936,7 +936,7 @@ def testfunc(a, b): assert res.output.out_list == [3.5, 1] -def test_output_spec_func_4(use_validator): +def test_output_spec_func_4(): """the function w/o annotated, but output_spec is used MultiOutputObj is used, output is a 1el list, so converter return the element """ @@ -995,7 +995,9 @@ def fun_none(x) -> (ty.Any, ty.Any): assert res.output.out2 is None -def test_audit_prov(tmpdir, use_validator): +def test_audit_prov( + tmpdir, +): @mark.task def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b @@ -1030,7 +1032,7 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] message_path = tmpdir / funky.checksum / "messages" for file in glob(str(message_path) + "/*.jsonld"): - with open(file, "r") as f: + with open(file) as f: data = json.load(f) if "@type" in data: if "AssociatedWith" in data: @@ -1038,9 +1040,9 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] if "@type" in data: if data["@type"] == "input": - assert None == data["Label"] + assert None is data["Label"] if "AssociatedWith" in data: - assert None == data["AssociatedWith"] + assert None is data["AssociatedWith"] # assert any(json_content) @@ -1065,7 +1067,7 @@ def test_audit_shellcommandtask(tmpdir): command_content = [] for file in glob(str(message_path) + "/*.jsonld"): - with open(file, "r") as f: + with open(file) as f: data = json.load(f) if "@type" in data: @@ -1074,7 +1076,7 @@ def test_audit_shellcommandtask(tmpdir): if "@type" in data: if data["@type"] == "input": - assert data["Label"] == None + assert data["Label"] is None if "Command" in data: command_content.append(True) @@ -1083,7 +1085,7 @@ def test_audit_shellcommandtask(tmpdir): assert any(command_content) -def test_audit_shellcommandtask_file(tmpdir): +def test_audit_shellcommandtask_file(tmp_path): # sourcery skip: use-fstring-for-concatenation import glob import shutil @@ -1097,14 +1099,14 @@ def test_audit_shellcommandtask_file(tmpdir): f.write("This is a test") # copy the test.txt file to the tmpdir - shutil.copy("test.txt", tmpdir) - shutil.copy("test2.txt", tmpdir) + shutil.copy("test.txt", tmp_path) + shutil.copy("test2.txt", tmp_path) cmd = "cat" - file_in = tmpdir / "test.txt" - file_in_2 = tmpdir / "test2.txt" - test_file_hash = hash_file(file_in) - test_file_hash_2 = hash_file(file_in_2) + file_in = File(tmp_path / "test.txt") + file_in_2 = File(tmp_path / "test2.txt") + test_file_hash = hash_function(file_in) + test_file_hash_2 = hash_function(file_in_2) my_input_spec = SpecInfo( name="Input", fields=[ @@ -1144,11 +1146,11 @@ def test_audit_shellcommandtask_file(tmpdir): audit_flags=AuditFlag.PROV, messengers=FileMessenger(), ) - shelly.cache_dir = tmpdir - shelly() - message_path = tmpdir / shelly.checksum / "messages" + shelly.cache_dir = tmp_path + results = shelly() + message_path = tmp_path / shelly.checksum / "messages" for file in glob.glob(str(message_path) + "/*.jsonld"): - with open(file, "r") as x: + with open(file) as x: data = json.load(x) if "@type" in data: if data["@type"] == "input": @@ -1184,7 +1186,7 @@ def test_audit_shellcommandtask_version(tmpdir): # go through each jsonld file in message_path and check if the label field exists version_content = [] for file in glob.glob(str(message_path) + "/*.jsonld"): - with open(file, "r") as f: + with open(file) as f: data = json.load(f) if "AssociatedWith" in data: if version_cmd in data["AssociatedWith"]: @@ -1193,7 +1195,9 @@ def test_audit_shellcommandtask_version(tmpdir): assert any(version_content) -def test_audit_prov_messdir_1(tmpdir, use_validator): +def test_audit_prov_messdir_1( + tmpdir, +): """customized messenger dir""" @mark.task @@ -1219,7 +1223,9 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] assert (tmpdir / funky.checksum / "messages.jsonld").exists() -def test_audit_prov_messdir_2(tmpdir, use_validator): +def test_audit_prov_messdir_2( + tmpdir, +): """customized messenger dir in init""" @mark.task @@ -1249,7 +1255,9 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] assert (tmpdir / "messages.jsonld").exists() -def test_audit_prov_wf(tmpdir, use_validator): +def test_audit_prov_wf( + tmpdir, +): """FileMessenger for wf""" @mark.task @@ -1276,7 +1284,9 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)] assert (tmpdir / wf.checksum / "messages.jsonld").exists() -def test_audit_all(tmpdir, use_validator): +def test_audit_all( + tmpdir, +): @mark.task def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]): return a + b @@ -1315,58 +1325,6 @@ def test_shell_cmd(tmpdir): assert res.output.stdout == " ".join(cmd[1:]) + "\n" -def test_container_cmds(tmpdir): - containy = DockerTask(name="containy", executable="pwd") - with pytest.raises(AttributeError) as excinfo: - containy.cmdline - assert "mandatory" in str(excinfo.value) - containy.inputs.image = "busybox" - assert containy.cmdline - - -@no_win -def test_docker_cmd(tmpdir): - docky = DockerTask(name="docky", executable="pwd", image="busybox") - assert ( - docky.cmdline - == f"docker run --rm -v {docky.output_dir}:/output_pydra:rw -w /output_pydra busybox pwd" - ) - docky.inputs.container_xargs = ["--rm", "-it"] - assert ( - docky.cmdline - == f"docker run --rm -it -v {docky.output_dir}:/output_pydra:rw -w /output_pydra busybox pwd" - ) - # TODO: we probably don't want to support container_path - # docky.inputs.bindings = [ - # ("/local/path", "/container/path", "ro"), - # ("/local2", "/container2", None), - # ] - # assert docky.cmdline == ( - # "docker run --rm -it -v /local/path:/container/path:ro" - # f" -v /local2:/container2:rw -v {docky.output_dir}:/output_pydra:rw -w /output_pydra busybox pwd" - # ) - - -@no_win -def test_singularity_cmd(tmpdir): - # todo how this should be done? - image = "library://sylabsed/linux/alpine" - singu = SingularityTask(name="singi", executable="pwd", image=image) - assert ( - singu.cmdline - == f"singularity exec -B {singu.output_dir}:/output_pydra:rw --pwd /output_pydra {image} pwd" - ) - # TODO: we probably don't want to support container_path - # singu.inputs.bindings = [ - # ("/local/path", "/container/path", "ro"), - # ("/local2", "/container2", None), - # ] - # assert singu.cmdline == ( - # "singularity exec -B /local/path:/container/path:ro" - # f" -B /local2:/container2:rw -B {singu.output_dir}:/output_pydra:rw --pwd /output_pydra {image} pwd" - # ) - - def test_functask_callable(tmpdir): # no submitter or plugin foo = funaddtwo(a=1) @@ -1501,7 +1459,7 @@ def myhook_postrun_task(task, result, *args): print(f"postrun task hook was called, result object is {result}") def myhook_postrun(task, result, *args): - print(f"postrun hook should not be called") + print("postrun hook should not be called") foo.hooks.post_run = myhook_postrun foo.hooks.post_run_task = myhook_postrun_task @@ -1526,10 +1484,10 @@ def test_traceback(tmpdir): def fun_error(x): raise Exception("Error from the function") - task = fun_error(name="error", x=[3, 4], cache_dir=tmpdir).split("x") + task = fun_error(name="error", cache_dir=tmpdir).split("x", x=[3, 4]) with pytest.raises(Exception, match="from the function") as exinfo: - res = task() + task() # getting error file from the error message error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] @@ -1553,12 +1511,12 @@ def test_traceback_wf(tmpdir): def fun_error(x): raise Exception("Error from the function") - wf = Workflow(name="wf", input_spec=["x"], x=[3, 4], cache_dir=tmpdir).split("x") + wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir).split("x", x=[3, 4]) wf.add(fun_error(name="error", x=wf.lzin.x)) wf.set_output([("out", wf.error.lzout.out)]) with pytest.raises(Exception, match="Task error raised an error") as exinfo: - res = wf() + wf() # getting error file from the error message error_file_match = str(exinfo.value).split("here: ")[-1].split("_error.pklz")[0] @@ -1585,7 +1543,7 @@ def pass_odds(x): print(f"x%2 = {x % 2}\n") return x - task = pass_odds(name="pass_odds", x=[1, 2, 3, 4, 5], cache_dir=tmpdir).split("x") + task = pass_odds(name="pass_odds", cache_dir=tmpdir).split("x", x=[1, 2, 3, 4, 5]) with pytest.raises(Exception, match="even error"): task() diff --git a/pydra/engine/tests/test_tasks_files.py b/pydra/engine/tests/test_tasks_files.py index f572bb6cea..a1849e221b 100644 --- a/pydra/engine/tests/test_tasks_files.py +++ b/pydra/engine/tests/test_tasks_files.py @@ -133,10 +133,8 @@ def test_broken_file(tmpdir): with Submitter(plugin="cf") as sub: sub(nn) - nn2 = file_add2_annot(name="add2_annot", file=file) - with pytest.raises(FileNotFoundError, match="does not exist"): - with Submitter(plugin="cf") as sub: - sub(nn2) + with pytest.raises(FileNotFoundError, match="do not exist"): + file_add2_annot(name="add2_annot", file=file) def test_broken_file_link(tmpdir): @@ -159,11 +157,8 @@ def test_broken_file_link(tmpdir): with Submitter(plugin="cf") as sub: sub(nn) - # raises error before task is run - nn2 = file_add2_annot(name="add2_annot", file=file_link) - with pytest.raises(FileNotFoundError, match="does not exist"): - with Submitter(plugin="cf") as sub: - sub(nn2) + with pytest.raises(FileNotFoundError, match="do not exist"): + file_add2_annot(name="add2_annot", file=file_link) def test_broken_dir(): @@ -178,10 +173,8 @@ def test_broken_dir(): sub(nn) # raises error before task is run - nn2 = dir_count_file_annot(name="listdir", dirpath="/broken_dir_path/") with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: - sub(nn2) + dir_count_file_annot(name="listdir", dirpath="/broken_dir_path/") def test_broken_dir_link1(tmpdir): @@ -195,17 +188,14 @@ def test_broken_dir_link1(tmpdir): os.symlink(dir1, dir1_link) os.rmdir(dir1) - nn = dir_count_file(name="listdir", dirpath=dir1) + nn = dir_count_file(name="listdir", dirpath=Path(dir1)) # raises error while running task with pytest.raises(FileNotFoundError): with Submitter(plugin="cf") as sub: sub(nn) - nn2 = dir_count_file_annot(name="listdir", dirpath=dir1) - # raises error before task is run with pytest.raises(FileNotFoundError): - with Submitter(plugin="cf") as sub: - sub(nn2) + dir_count_file_annot(name="listdir", dirpath=Path(dir1)) def test_broken_dir_link2(tmpdir): diff --git a/pydra/engine/tests/test_workflow.py b/pydra/engine/tests/test_workflow.py index bdae9e135b..598021c832 100644 --- a/pydra/engine/tests/test_workflow.py +++ b/pydra/engine/tests/test_workflow.py @@ -1,14 +1,15 @@ import pytest import shutil, os, sys import time +import typing as ty import attr from pathlib import Path -import logging - from .utils import ( add2, add2_wait, multiply, + multiply_list, + multiply_mixed, power, ten, identity, @@ -19,12 +20,17 @@ fun_addvar, fun_addtwo, add2_sub2_res, + add2_sub2_res_list, fun_addvar_none, fun_addvar_default, + fun_addvar_default_notype, + fun_addvar_notype, + fun_addtwo_notype, fun_write_file, fun_write_file_list, fun_write_file_list2dict, list_sum, + list_mult_sum, DOT_FLAG, ) from ..submitter import Submitter @@ -34,7 +40,7 @@ def test_wf_no_input_spec(): - with pytest.raises(ValueError, match="Empty input_spec"): + with pytest.raises(ValueError, match='Empty "Inputs" spec'): Workflow(name="workflow") @@ -63,18 +69,59 @@ def test_wf_specinfo_input_spec(): bases=(ShellSpec,), ) with pytest.raises( - ValueError, match="Provided SpecInfo must have BaseSpec as it's base." + ValueError, match="Provided SpecInfo must have BaseSpec as its base." ): Workflow(name="workflow", input_spec=bad_input_spec) +def test_wf_dict_input_and_output_spec(): + spec = { + "a": str, + "b": ty.Dict[str, ty.Union[int, bool]], + } + wf = Workflow( + name="workflow", + input_spec=spec, + output_spec=spec, + ) + wf.add( + identity_2flds( + name="identity", + x1=wf.lzin.a, + x2=wf.lzin.b, + ) + ) + wf.set_output( + [ + ("a", wf.identity.lzout.out1), + ("b", wf.identity.lzout.out2), + ] + ) + for x in ["a", "b", "_graph_checksums"]: + assert hasattr(wf.inputs, x) + wf.inputs.a = "any-string" + wf.inputs.b = {"foo": 1, "bar": False} + + with pytest.raises(TypeError, match="Cannot coerce 1.0 into "): + wf.inputs.a = 1.0 + with pytest.raises( + TypeError, + match=("Could not coerce object, 'bad-value', to any of the union types "), + ): + wf.inputs.b = {"foo": 1, "bar": "bad-value"} + + result = wf() + assert result.output.a == "any-string" + assert result.output.b == {"foo": 1, "bar": False} + + def test_wf_name_conflict1(): """raise error when workflow name conflicts with a class attribute or method""" with pytest.raises(ValueError) as excinfo1: - wf = Workflow(name="result", input_spec=["x"]) + Workflow(name="result", input_spec=["x"]) assert "Cannot use names of attributes or methods" in str(excinfo1.value) with pytest.raises(ValueError) as excinfo2: - wf = Workflow(name="done", input_spec=["x"]) + Workflow(name="done", input_spec=["x"]) assert "Cannot use names of attributes or methods" in str(excinfo2.value) @@ -449,9 +496,8 @@ def test_wf_5b_exception(tmpdir): wf.set_output([("out", wf.addsub.lzout.sum)]) wf.cache_dir = tmpdir - with pytest.raises(Exception) as excinfo: + with pytest.raises(Exception, match="are already set"): wf.set_output([("out", wf.addsub.lzout.sub)]) - assert "is already set" in str(excinfo.value) def test_wf_6(plugin, tmpdir): @@ -498,13 +544,12 @@ def test_wf_st_1(plugin, tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"]) wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x") - wf.inputs.x = [1, 2] + wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir checksum_before = wf.checksum - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="serial") as sub: sub(wf) assert wf.checksum == checksum_before @@ -523,8 +568,7 @@ def test_wf_st_1_call_subm(plugin, tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"]) wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x") - wf.inputs.x = [1, 2] + wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -548,8 +592,7 @@ def test_wf_st_1_call_plug(plugin, tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"]) wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x") - wf.inputs.x = [1, 2] + wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -572,8 +615,7 @@ def test_wf_st_1_call_selfplug(plugin, tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"]) wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x") - wf.inputs.x = [1, 2] + wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) wf.plugin = plugin wf.cache_dir = tmpdir @@ -597,8 +639,7 @@ def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"]) wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x") - wf.inputs.x = [1, 2] + wf.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -615,19 +656,22 @@ def test_wf_st_1_call_noplug_nosubm(plugin, tmpdir): def test_wf_st_1_inp_in_call(tmpdir): """Defining input in __call__""" - wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split("x") + wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split( + "x", x=[1, 2] + ) wf.add(add2(name="add2", x=wf.lzin.x)) wf.set_output([("out", wf.add2.lzout.out)]) - results = wf(x=[1, 2]) + results = wf() assert results[0].output.out == 3 assert results[1].output.out == 4 def test_wf_st_1_upd_inp_call(tmpdir): """Updating input in __call___""" - wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split("x") + wf = Workflow(name="wf_spl_1", input_spec=["x"], cache_dir=tmpdir).split( + "x", x=[11, 22] + ) wf.add(add2(name="add2", x=wf.lzin.x)) - wf.inputs.x = [11, 22] wf.set_output([("out", wf.add2.lzout.out)]) results = wf(x=[1, 2]) assert results[0].output.out == 3 @@ -639,8 +683,7 @@ def test_wf_st_noinput_1(plugin, tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"]) wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x") - wf.inputs.x = [] + wf.split("x", x=[]) wf.set_output([("out", wf.add2.lzout.out)]) wf.plugin = plugin wf.cache_dir = tmpdir @@ -659,7 +702,7 @@ def test_wf_st_noinput_1(plugin, tmpdir): def test_wf_ndst_1(plugin, tmpdir): """workflow with one task, a splitter on the task level""" wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x).split("x")) + wf.add(add2(name="add2").split("x", x=wf.lzin.x)) wf.inputs.x = [1, 2] wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -680,11 +723,11 @@ def test_wf_ndst_updatespl_1(plugin, tmpdir): a splitter on the task level is added *after* calling add """ wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x)) + wf.add(add2(name="add2")) wf.inputs.x = [1, 2] + wf.add2.split("x", x=wf.lzin.x) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir - wf.add2.split("x") with Submitter(plugin=plugin) as sub: sub(wf) @@ -704,8 +747,7 @@ def test_wf_ndst_updatespl_1a(plugin, tmpdir): wf = Workflow(name="wf_spl_1", input_spec=["x"]) task_add2 = add2(name="add2", x=wf.lzin.x) wf.add(task_add2) - task_add2.split("x") - wf.inputs.x = [1, 2] + task_add2.split("x", x=[1, 2]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -729,9 +771,8 @@ def test_wf_ndst_updateinp_1(plugin, tmpdir): wf.add(add2(name="add2", x=wf.lzin.x)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] + wf.add2.split("x", x=wf.lzin.y) wf.set_output([("out", wf.add2.lzout.out)]) - wf.add2.split("x") - wf.add2.inputs.x = wf.lzin.y wf.cache_dir = tmpdir with Submitter(plugin=plugin) as sub: @@ -747,7 +788,7 @@ def test_wf_ndst_updateinp_1(plugin, tmpdir): def test_wf_ndst_noinput_1(plugin, tmpdir): """workflow with one task, a splitter on the task level""" wf = Workflow(name="wf_spl_1", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x).split("x")) + wf.add(add2(name="add2").split("x", x=wf.lzin.x)) wf.inputs.x = [] wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -768,8 +809,7 @@ def test_wf_st_2(plugin, tmpdir): wf = Workflow(name="wf_st_2", input_spec=["x"]) wf.add(add2(name="add2", x=wf.lzin.x)) - wf.split("x").combine(combiner="x") - wf.inputs.x = [1, 2] + wf.split("x", x=[1, 2]).combine(combiner="x") wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -789,7 +829,7 @@ def test_wf_st_2(plugin, tmpdir): def test_wf_ndst_2(plugin, tmpdir): """workflow with one task, splitters and combiner on the task level""" wf = Workflow(name="wf_ndst_2", input_spec=["x"]) - wf.add(add2(name="add2", x=wf.lzin.x).split("x").combine(combiner="x")) + wf.add(add2(name="add2").split("x", x=wf.lzin.x).combine(combiner="x")) wf.inputs.x = [1, 2] wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -811,9 +851,7 @@ def test_wf_st_3(plugin, tmpdir): wf = Workflow(name="wfst_3", input_spec=["x", "y"]) wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) wf.add(add2(name="add2", x=wf.mult.lzout.out)) - wf.inputs.x = [1, 2] - wf.inputs.y = [11, 12] - wf.split(("x", "y")) + wf.split(("x", "y"), x=[1, 2], y=[11, 12]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -856,7 +894,7 @@ def test_wf_st_3(plugin, tmpdir): def test_wf_ndst_3(plugin, tmpdir): """Test workflow with 2 tasks, splitter on a task level""" wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(("x", "y"))) + wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) wf.add(add2(name="add2", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] @@ -902,7 +940,7 @@ def test_wf_st_4(plugin, tmpdir): def test_wf_ndst_4(plugin, tmpdir): """workflow with two tasks, scalar splitter and combiner on tasks level""" wf = Workflow(name="wf_ndst_4", input_spec=["a", "b"]) - wf.add(multiply(name="mult", x=wf.lzin.a, y=wf.lzin.b).split(("x", "y"))) + wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.a, y=wf.lzin.b)) wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) wf.set_output([("out", wf.add2.lzout.out)]) @@ -949,7 +987,7 @@ def test_wf_st_5(plugin, tmpdir): def test_wf_ndst_5(plugin, tmpdir): """workflow with two tasks, outer splitter on tasks level and no combiner""" wf = Workflow(name="wf_ndst_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(["x", "y"])) + wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) wf.add(add2(name="add2", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] @@ -998,7 +1036,7 @@ def test_wf_st_6(plugin, tmpdir): def test_wf_ndst_6(plugin, tmpdir): """workflow with two tasks, outer splitter and combiner on tasks level""" wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(["x", "y"])) + wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) wf.inputs.x = [1, 2, 3] wf.inputs.y = [11, 12] @@ -1019,7 +1057,7 @@ def test_wf_ndst_6(plugin, tmpdir): def test_wf_ndst_7(plugin, tmpdir): """workflow with two tasks, outer splitter and (full) combiner for first node only""" wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split("x").combine("x")) + wf.add(multiply(name="mult").split("x", x=wf.lzin.x, y=wf.lzin.y).combine("x")) wf.add(identity(name="iden", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2, 3] wf.inputs.y = 11 @@ -1040,7 +1078,7 @@ def test_wf_ndst_8(plugin, tmpdir): """workflow with two tasks, outer splitter and (partial) combiner for first task only""" wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) wf.add( - multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(["x", "y"]).combine("x") + multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") ) wf.add(identity(name="iden", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2, 3] @@ -1063,8 +1101,8 @@ def test_wf_ndst_9(plugin, tmpdir): """workflow with two tasks, outer splitter and (full) combiner for first task only""" wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) wf.add( - multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y) - .split(["x", "y"]) + multiply(name="mult") + .split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y) .combine(["x", "y"]) ) wf.add(identity(name="iden", x=wf.mult.lzout.out)) @@ -1089,7 +1127,7 @@ def test_wf_ndst_9(plugin, tmpdir): def test_wf_3sernd_ndst_1(plugin, tmpdir): """workflow with three "serial" tasks, checking if the splitter is propagating""" wf = Workflow(name="wf_3sernd_ndst_1", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(["x", "y"])) + wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) wf.add(add2(name="add2_1st", x=wf.mult.lzout.out)) wf.add(add2(name="add2_2nd", x=wf.add2_1st.lzout.out)) wf.inputs.x = [1, 2] @@ -1128,8 +1166,8 @@ def test_wf_3sernd_ndst_1a(plugin, tmpdir): and the 2nd task is adding one more input to the splitter """ wf = Workflow(name="wf_3sernd_ndst_1", input_spec=["x", "y"]) - wf.add(add2(name="add2_1st", x=wf.lzin.x).split("x")) - wf.add(multiply(name="mult", x=wf.add2_1st.lzout.out, y=wf.lzin.y).split("y")) + wf.add(add2(name="add2_1st").split("x", x=wf.lzin.x)) + wf.add(multiply(name="mult", x=wf.add2_1st.lzout.out).split("y", y=wf.lzin.y)) wf.add(add2(name="add2_2nd", x=wf.mult.lzout.out)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] @@ -1196,8 +1234,8 @@ def test_wf_3nd_ndst_1(plugin_dask_opt, tmpdir): splitter on the tasks levels """ wf = Workflow(name="wf_ndst_7", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x).split("x")) - wf.add(add2(name="add2y", x=wf.lzin.y).split("x")) + wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.inputs.x = [1, 2, 3] wf.inputs.y = [11, 12] @@ -1249,8 +1287,8 @@ def test_wf_3nd_ndst_2(plugin, tmpdir): splitter and partial combiner on the tasks levels """ wf = Workflow(name="wf_ndst_8", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x).split("x")) - wf.add(add2(name="add2y", x=wf.lzin.y).split("x")) + wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( "add2x.x" @@ -1259,10 +1297,9 @@ def test_wf_3nd_ndst_2(plugin, tmpdir): wf.inputs.x = [1, 2, 3] wf.inputs.y = [11, 12] wf.set_output([("out", wf.mult.lzout.out)]) - wf.plugin = plugin wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="serial") as sub: sub(wf) results = wf.result() @@ -1308,8 +1345,8 @@ def test_wf_3nd_ndst_3(plugin, tmpdir): splitter and partial combiner (from the second task) on the tasks levels """ wf = Workflow(name="wf_ndst_9", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x).split("x")) - wf.add(add2(name="add2y", x=wf.lzin.y).split("x")) + wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( "add2y.x" @@ -1367,8 +1404,8 @@ def test_wf_3nd_ndst_4(plugin, tmpdir): splitter and full combiner on the tasks levels """ wf = Workflow(name="wf_ndst_10", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x).split("x")) - wf.add(add2(name="add2y", x=wf.lzin.y).split("x")) + wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).combine( ["add2x.x", "add2y.x"] @@ -1433,13 +1470,11 @@ def test_wf_3nd_ndst_5(plugin, tmpdir): all tasks have splitters and the last one has a partial combiner (from the 2nd) """ wf = Workflow(name="wf_st_9", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2x", x=wf.lzin.x).split("x")) - wf.add(add2(name="add2y", x=wf.lzin.y).split("x")) + wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( - fun_addvar3( - name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out, c=wf.lzin.z - ) - .split("c") + fun_addvar3(name="addvar", a=wf.add2x.lzout.out, b=wf.add2y.lzout.out) + .split("c", c=wf.lzin.z) .combine("add2x.x") ) wf.inputs.x = [2, 3] @@ -1468,8 +1503,8 @@ def test_wf_3nd_ndst_6(plugin, tmpdir): the third one uses scalar splitter from the previous ones and a combiner """ wf = Workflow(name="wf_ndst_9", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x).split("x")) - wf.add(add2(name="add2y", x=wf.lzin.y).split("x")) + wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(add2(name="add2y").split("x", x=wf.lzin.y)) wf.add( multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out) .split(("_add2x", "_add2y")) @@ -1494,8 +1529,8 @@ def test_wf_3nd_ndst_7(plugin, tmpdir): the third one uses scalar splitter from the previous ones """ wf = Workflow(name="wf_ndst_9", input_spec=["x"]) - wf.add(add2(name="add2x", x=wf.lzin.x).split("x")) - wf.add(add2(name="add2y", x=wf.lzin.x).split("x")) + wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) + wf.add(add2(name="add2y").split("x", x=wf.lzin.x)) wf.add( multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out).split( ("_add2x", "_add2y") @@ -1522,7 +1557,7 @@ def test_wf_3nd_8(tmpdir): wf = Workflow(name="wf", input_spec=["zip"], cache_dir=tmpdir) wf.inputs.zip = [["test1", "test3", "test5"], ["test2", "test4", "test6"]] - wf.add(identity_2flds(name="iden2flds_1", x1=wf.lzin.zip, x2="Hoi").split("x1")) + wf.add(identity_2flds(name="iden2flds_1", x2="Hoi").split("x1", x1=wf.lzin.zip)) wf.add(identity(name="identity", x=wf.iden2flds_1.lzout.out1)) @@ -1571,8 +1606,8 @@ def test_wf_ndstLR_1(plugin, tmpdir): and the Left part from the first task should be added """ wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(add2(name="add2", x=wf.lzin.x).split("x")) - wf.add(multiply(name="mult", x=wf.add2.lzout.out, y=wf.lzin.y).split("y")) + wf.add(add2(name="add2").split("x", x=wf.lzin.x)) + wf.add(multiply(name="mult", x=wf.add2.lzout.out).split("y", y=wf.lzin.y)) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] wf.set_output([("out", wf.mult.lzout.out)]) @@ -1599,9 +1634,9 @@ def test_wf_ndstLR_1a(plugin, tmpdir): and the Right part (it's own splitter) """ wf = Workflow(name="wf_ndst_3", input_spec=["x", "y"]) - wf.add(add2(name="add2", x=wf.lzin.x).split("x")) + wf.add(add2(name="add2").split("x", x=wf.lzin.x)) wf.add( - multiply(name="mult", x=wf.add2.lzout.out, y=wf.lzin.y).split(["_add2", "y"]) + multiply(name="mult").split(["_add2", "y"], x=wf.add2.lzout.out, y=wf.lzin.y) ) wf.inputs.x = [1, 2] wf.inputs.y = [11, 12] @@ -1629,10 +1664,10 @@ def test_wf_ndstLR_2(plugin, tmpdir): and the Left part from the first task should be added """ wf = Workflow(name="wf_ndst_3", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2", x=wf.lzin.x).split("x")) + wf.add(add2(name="add2").split("x", x=wf.lzin.x)) wf.add( - fun_addvar3(name="addvar", a=wf.add2.lzout.out, b=wf.lzin.y, c=wf.lzin.z).split( - ["b", "c"] + fun_addvar3(name="addvar", a=wf.add2.lzout.out).split( + ["b", "c"], b=wf.lzin.y, c=wf.lzin.z ) ) wf.inputs.x = [1, 2, 3] @@ -1678,10 +1713,10 @@ def test_wf_ndstLR_2a(plugin, tmpdir): and the Right part (it's own outer splitter) """ wf = Workflow(name="wf_ndst_3", input_spec=["x", "y", "z"]) - wf.add(add2(name="add2", x=wf.lzin.x).split("x")) + wf.add(add2(name="add2").split("x", x=wf.lzin.x)) wf.add( - fun_addvar3(name="addvar", a=wf.add2.lzout.out, b=wf.lzin.y, c=wf.lzin.z).split( - ["_add2", ["b", "c"]] + fun_addvar3(name="addvar", a=wf.add2.lzout.out).split( + ["_add2", ["b", "c"]], b=wf.lzin.y, c=wf.lzin.z ) ) wf.inputs.x = [1, 2, 3] @@ -1728,9 +1763,9 @@ def test_wf_ndstinner_1(plugin, tmpdir): """workflow with 2 tasks, the second task has inner splitter """ - wf = Workflow(name="wf_st_3", input_spec=["x"]) + wf = Workflow(name="wf_st_3", input_spec={"x": int}) wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(add2(name="add2", x=wf.list.lzout.out).split("x")) + wf.add(add2(name="add2").split("x", x=wf.list.lzout.out)) wf.inputs.x = 1 wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -1754,7 +1789,7 @@ def test_wf_ndstinner_2(plugin, tmpdir): """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult", x=wf.list.lzout.out, y=wf.lzin.y).split("x")) + wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) wf.inputs.x = 1 wf.inputs.y = 10 wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) @@ -1779,7 +1814,7 @@ def test_wf_ndstinner_3(plugin, tmpdir): """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult", x=wf.list.lzout.out, y=wf.lzin.y).split(["x", "y"])) + wf.add(multiply(name="mult").split(["x", "y"], x=wf.list.lzout.out, y=wf.lzin.y)) wf.inputs.x = 1 wf.inputs.y = [10, 100] wf.set_output([("out_list", wf.list.lzout.out), ("out", wf.mult.lzout.out)]) @@ -1805,7 +1840,7 @@ def test_wf_ndstinner_4(plugin, tmpdir): """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) wf.add(list_output(name="list", x=wf.lzin.x)) - wf.add(multiply(name="mult", x=wf.list.lzout.out, y=wf.lzin.y).split("x")) + wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.list.lzout.out)) wf.add(add2(name="add2", x=wf.mult.lzout.out)) wf.inputs.x = 1 wf.inputs.y = 10 @@ -1835,9 +1870,9 @@ def test_wf_ndstinner_5(plugin, tmpdir): the third task has no new splitter """ wf = Workflow(name="wf_5", input_spec=["x", "y", "b"]) - wf.add(list_output(name="list", x=wf.lzin.x).split("x")) - wf.add(multiply(name="mult", x=wf.list.lzout.out, y=wf.lzin.y).split(["y", "x"])) - wf.add(fun_addvar(name="addvar", a=wf.mult.lzout.out, b=wf.lzin.b).split("b")) + wf.add(list_output(name="list").split("x", x=wf.lzin.x)) + wf.add(multiply(name="mult").split(["y", "x"], x=wf.list.lzout.out, y=wf.lzin.y)) + wf.add(fun_addvar(name="addvar", a=wf.mult.lzout.out).split("b", b=wf.lzin.b)) wf.inputs.x = [1, 2] wf.inputs.y = [10, 100] wf.inputs.b = [3, 5] @@ -1944,7 +1979,7 @@ def test_wf_ndst_singl_1(plugin, tmpdir): only one input is part of the splitter, the other is a single value """ wf = Workflow(name="wf_ndst_5", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split("x")) + wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) wf.inputs.x = [1, 2] wf.inputs.y = 11 @@ -1994,7 +2029,7 @@ def test_wf_ndst_singl_2(plugin, tmpdir): only one input is part of the splitter, the other is a single value """ wf = Workflow(name="wf_ndst_6", input_spec=["x", "y"]) - wf.add(add2(name="add2x", x=wf.lzin.x).split("x")) + wf.add(add2(name="add2x").split("x", x=wf.lzin.x)) wf.add(add2(name="add2y", x=wf.lzin.y)) wf.add(multiply(name="mult", x=wf.add2x.lzout.out, y=wf.add2y.lzout.out)) wf.inputs.x = [1, 2, 3] @@ -2141,8 +2176,7 @@ def test_wfasnd_st_1(plugin, tmpdir): wfnd = Workflow(name="wfnd", input_spec=["x"]) wfnd.add(add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.split("x") - wfnd.inputs.x = [2, 4] + wfnd.split("x", x=[2, 4]) wf = Workflow(name="wf", input_spec=["x"]) wf.add(wfnd) @@ -2168,11 +2202,10 @@ def test_wfasnd_st_updatespl_1(plugin, tmpdir): wfnd = Workflow(name="wfnd", input_spec=["x"]) wfnd.add(add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) - wfnd.inputs.x = [2, 4] wf = Workflow(name="wf", input_spec=["x"]) wf.add(wfnd) - wfnd.split("x") + wfnd.split("x", x=[2, 4]) wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir @@ -2191,7 +2224,7 @@ def test_wfasnd_ndst_1(plugin, tmpdir): splitter for node """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2", x=wfnd.lzin.x).split("x")) + wfnd.add(add2(name="add2").split("x", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) # TODO: without this the test is failing wfnd.plugin = plugin @@ -2218,13 +2251,11 @@ def test_wfasnd_ndst_updatespl_1(plugin, tmpdir): """ wfnd = Workflow(name="wfnd", input_spec=["x"]) wfnd.add(add2(name="add2", x=wfnd.lzin.x)) + wfnd.add2.split("x", x=[2, 4]) wfnd.set_output([("out", wfnd.add2.lzout.out)]) - # TODO: without this the test is failing - wfnd.inputs.x = [2, 4] wf = Workflow(name="wf", input_spec=["x"]) wf.add(wfnd) - wfnd.add2.split("x") wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir @@ -2248,8 +2279,7 @@ def test_wfasnd_wfst_1(plugin, tmpdir): wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) - wf.split("x") - wf.inputs.x = [2, 4] + wf.split("x", x=[2, 4]) wf.set_output([("out", wf.wfnd.lzout.out)]) with Submitter(plugin=plugin) as sub: @@ -2275,9 +2305,7 @@ def test_wfasnd_st_2(plugin, tmpdir): wfnd = Workflow(name="wfnd", input_spec=["x", "y"]) wfnd.add(multiply(name="mult", x=wfnd.lzin.x, y=wfnd.lzin.y)) wfnd.set_output([("out", wfnd.mult.lzout.out)]) - wfnd.split(("x", "y")) - wfnd.inputs.x = [2, 4] - wfnd.inputs.y = [1, 10] + wfnd.split(("x", "y"), x=[2, 4], y=[1, 10]) wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) wf.add(wfnd) @@ -2306,9 +2334,7 @@ def test_wfasnd_wfst_2(plugin, tmpdir): wf.add(wfnd) wf.add(add2(name="add2", x=wf.wfnd.lzout.out)) - wf.split(("x", "y")) - wf.inputs.x = [2, 4] - wf.inputs.y = [1, 10] + wf.split(("x", "y"), x=[2, 4], y=[1, 10]) wf.set_output([("out", wf.add2.lzout.out)]) wf.cache_dir = tmpdir @@ -2333,7 +2359,7 @@ def test_wfasnd_ndst_3(plugin, tmpdir): splitter for the first task """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(("x", "y"))) + wf.add(multiply(name="mult").split(("x", "y"), x=wf.lzin.x, y=wf.lzin.y)) wf.inputs.x = [2, 4] wf.inputs.y = [1, 10] @@ -2345,7 +2371,7 @@ def test_wfasnd_ndst_3(plugin, tmpdir): wf.set_output([("out", wf.wfnd.lzout.out)]) wf.cache_dir = tmpdir - with Submitter(plugin=plugin) as sub: + with Submitter(plugin="serial") as sub: sub(wf) # assert wf.output_dir.exists() results = wf.result() @@ -2361,9 +2387,7 @@ def test_wfasnd_wfst_3(plugin, tmpdir): """ wf = Workflow(name="wf_st_3", input_spec=["x", "y"]) wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y)) - wf.inputs.x = [2, 4] - wf.inputs.y = [1, 10] - wf.split(("x", "y")) + wf.split(("x", "y"), x=[2, 4], y=[1, 10]) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) wfnd.add(add2(name="add2", x=wfnd.lzin.x)) @@ -2419,7 +2443,7 @@ def test_wfasnd_ndst_4(plugin, tmpdir): splitter for node """ wfnd = Workflow(name="wfnd", input_spec=["x"]) - wfnd.add(add2(name="add2_1st", x=wfnd.lzin.x).split("x")) + wfnd.add(add2(name="add2_1st").split("x", x=wfnd.lzin.x)) wfnd.add(add2(name="add2_2nd", x=wfnd.add2_1st.lzout.out)) wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) wfnd.inputs.x = [2, 4] @@ -2450,8 +2474,7 @@ def test_wfasnd_wfst_4(plugin, tmpdir): wfnd.set_output([("out", wfnd.add2_2nd.lzout.out)]) wf.add(wfnd) - wf.split("x") - wf.inputs.x = [2, 4] + wf.split("x", x=[2, 4]) wf.set_output([("out", wf.wfnd.lzout.out)]) with Submitter(plugin=plugin) as sub: @@ -3162,9 +3185,7 @@ def test_wf_state_cachelocations(plugin, tmpdir): wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.split(splitter=("x", "y")) + wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf1.plugin = plugin t0 = time.time() @@ -3185,9 +3206,7 @@ def test_wf_state_cachelocations(plugin, tmpdir): wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.split(splitter=("x", "y")) + wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin t0 = time.time() @@ -3230,9 +3249,7 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.split(splitter=("x", "y")) + wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf1.plugin = plugin t0 = time.time() @@ -3253,9 +3270,7 @@ def test_wf_state_cachelocations_forcererun(plugin, tmpdir): wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.split(splitter=("x", "y")) + wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin t0 = time.time() @@ -3299,9 +3314,7 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 4] - wf1.split(splitter=("x", "y")) + wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf1.plugin = plugin t0 = time.time() @@ -3319,12 +3332,10 @@ def test_wf_state_cachelocations_updateinp(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.x)) + wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.split(splitter=("x", "y")) + wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin wf2.mult.inputs.y = wf2.lzin.y @@ -3386,9 +3397,7 @@ def test_wf_state_n_nostate_cachelocations(plugin, tmpdir): wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) - wf2.inputs.x = [2, 20] - wf2.inputs.y = [3, 4] - wf2.split(splitter=("x", "y")) + wf2.split(splitter=("x", "y"), x=[2, 20], y=[3, 4]) wf2.plugin = plugin with Submitter(plugin=plugin) as sub: @@ -3529,7 +3538,7 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) wf1.add( - multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y).split(splitter=("x", "y")) + multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) ) wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) @@ -3552,7 +3561,7 @@ def test_wf_ndstate_cachelocations(plugin, tmpdir): cache_locations=cache_dir1, ) wf2.add( - multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y).split(splitter=("x", "y")) + multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) ) wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) @@ -3594,7 +3603,7 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) wf1.add( - multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y).split(splitter=("x", "y")) + multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) ) wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) @@ -3617,7 +3626,7 @@ def test_wf_ndstate_cachelocations_forcererun(plugin, tmpdir): cache_locations=cache_dir1, ) wf2.add( - multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y).split(splitter=("x", "y")) + multiply(name="mult").split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) ) wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) @@ -3657,7 +3666,7 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) wf1.add( - multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y).split(splitter=("x", "y")) + multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) ) wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) @@ -3679,10 +3688,9 @@ def test_wf_ndstate_cachelocations_updatespl(plugin, tmpdir): cache_dir=cache_dir2, cache_locations=cache_dir1, ) - wf2.add(multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y)) - + wf2.add(multiply(name="mult")) + wf2.mult.split(splitter=("x", "y"), x=wf2.lzin.x, y=wf2.lzin.y) wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) - wf2.mult.split(splitter=("x", "y")) wf2.set_output([("out", wf2.add2.lzout.out)]) wf2.inputs.x = [2, 20] wf2.inputs.y = [3, 4] @@ -3721,7 +3729,7 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): wf1 = Workflow(name="wf", input_spec=["x", "y"], cache_dir=cache_dir1) wf1.add( - multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y).split(splitter=("x", "y")) + multiply(name="mult").split(splitter=("x", "y"), x=wf1.lzin.x, y=wf1.lzin.y) ) wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) @@ -3744,7 +3752,7 @@ def test_wf_ndstate_cachelocations_recompute(plugin, tmpdir): cache_locations=cache_dir1, ) wf2.add( - multiply(name="mult", x=wf2.lzin.x, y=wf2.lzin.y).split(splitter=["x", "y"]) + multiply(name="mult").split(splitter=["x", "y"], x=wf2.lzin.x, y=wf2.lzin.y) ) wf2.add(add2_wait(name="add2", x=wf2.mult.lzout.out)) wf2.set_output([("out", wf2.add2.lzout.out)]) @@ -3832,9 +3840,7 @@ def test_wf_state_runtwice_usecache(plugin, tmpdir): wf1.add(multiply(name="mult", x=wf1.lzin.x, y=wf1.lzin.y)) wf1.add(add2_wait(name="add2", x=wf1.mult.lzout.out)) wf1.set_output([("out", wf1.add2.lzout.out)]) - wf1.split(splitter=("x", "y")) - wf1.inputs.x = [2, 20] - wf1.inputs.y = [3, 30] + wf1.split(splitter=("x", "y"), x=[2, 20], y=[3, 30]) wf1.plugin = plugin t0 = time.time() @@ -3904,8 +3910,7 @@ def test_cache_propagation2(tmpdir, create_tasks): def test_cache_propagation3(tmpdir, create_tasks): """Shared cache_dir with state""" wf, t1, t2 = create_tasks - wf.inputs.x = [1, 2] - wf.split("x") + wf.split("x", x=[1, 2]) wf.cache_dir = (tmpdir / "shared").strpath wf(plugin="cf") assert wf.cache_dir == t1.cache_dir == t2.cache_dir @@ -3913,7 +3918,7 @@ def test_cache_propagation3(tmpdir, create_tasks): def test_workflow_combine1(tmpdir): wf1 = Workflow(name="wf1", input_spec=["a", "b"], a=[1, 2], b=[2, 3]) - wf1.add(power(name="power", a=wf1.lzin.a, b=wf1.lzin.b).split(["a", "b"])) + wf1.add(power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b)) wf1.add(identity(name="identity1", x=wf1.power.lzout.out).combine("power.a")) wf1.add(identity(name="identity2", x=wf1.identity1.lzout.out).combine("power.b")) wf1.set_output( @@ -3934,7 +3939,7 @@ def test_workflow_combine1(tmpdir): def test_workflow_combine2(tmpdir): wf1 = Workflow(name="wf1", input_spec=["a", "b"], a=[1, 2], b=[2, 3]) wf1.add( - power(name="power", a=wf1.lzin.a, b=wf1.lzin.b).split(["a", "b"]).combine("a") + power(name="power").split(["a", "b"], a=wf1.lzin.a, b=wf1.lzin.b).combine("a") ) wf1.add(identity(name="identity", x=wf1.power.lzout.out).combine("power.b")) wf1.set_output({"out_pow": wf1.power.lzout.out, "out_iden": wf1.identity.lzout.out}) @@ -3996,7 +4001,7 @@ def test_wf_lzoutall_st_1(plugin, tmpdir): by using lzout.all syntax """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(["x", "y"])) + wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_add", wf.add_sub.lzout.out_add)]) wf.inputs.x = [2, 20] @@ -4018,7 +4023,7 @@ def test_wf_lzoutall_st_1a(plugin, tmpdir): by using lzout.all syntax """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(["x", "y"])) + wf.add(multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y)) wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_all", wf.add_sub.lzout.all_)]) wf.inputs.x = [2, 20] @@ -4046,9 +4051,9 @@ def test_wf_lzoutall_st_2(plugin, tmpdir): """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) wf.add( - multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(["x", "y"]).combine("x") + multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") ) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) + wf.add(add2_sub2_res_list(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_add", wf.add_sub.lzout.out_add)]) wf.inputs.x = [2, 20] wf.inputs.y = [3, 30] @@ -4064,6 +4069,13 @@ def test_wf_lzoutall_st_2(plugin, tmpdir): assert results.output.out_add[1] == [62, 602] +@pytest.mark.xfail( + condition=bool(shutil.which("sbatch")), # using SLURM + reason=( + "Not passing on SLURM image for some reason, hoping upgrade of image/Python " + "version fixes it" + ), +) def test_wf_lzoutall_st_2a(plugin, tmpdir): """workflow with 2 tasks, no splitter passing entire result object to add2_res function @@ -4071,9 +4083,9 @@ def test_wf_lzoutall_st_2a(plugin, tmpdir): """ wf = Workflow(name="wf_2", input_spec=["x", "y"]) wf.add( - multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split(["x", "y"]).combine("x") + multiply(name="mult").split(["x", "y"], x=wf.lzin.x, y=wf.lzin.y).combine("x") ) - wf.add(add2_sub2_res(name="add_sub", res=wf.mult.lzout.all_)) + wf.add(add2_sub2_res_list(name="add_sub", res=wf.mult.lzout.all_)) wf.set_output([("out_all", wf.add_sub.lzout.all_)]) wf.inputs.x = [2, 20] wf.inputs.y = [3, 30] @@ -4107,8 +4119,9 @@ def test_wf_resultfile_1(plugin, tmpdir): results = wf.result() # checking if the file exists and if it is in the Workflow directory - assert results.output.wf_out.exists() - assert results.output.wf_out == wf.output_dir / "file_1.txt" + wf_out = results.output.wf_out.fspath + wf_out.exists() + assert wf_out == wf.output_dir / "file_1.txt" def test_wf_resultfile_2(plugin, tmpdir): @@ -4128,8 +4141,8 @@ def test_wf_resultfile_2(plugin, tmpdir): results = wf.result() # checking if the file exists and if it is in the Workflow directory for ii, file in enumerate(results.output.wf_out): - assert file.exists() - assert file == wf.output_dir / file_list[ii] + assert file.fspath.exists() + assert file.fspath == wf.output_dir / file_list[ii] def test_wf_resultfile_3(plugin, tmpdir): @@ -4152,18 +4165,18 @@ def test_wf_resultfile_3(plugin, tmpdir): if key == "random_int": assert val == 20 else: - assert val.exists() + assert val.fspath.exists() ii = int(key.split("_")[1]) - assert val == wf.output_dir / file_list[ii] + assert val.fspath == wf.output_dir / file_list[ii] def test_wf_upstream_error1(plugin, tmpdir): """workflow with two tasks, task2 dependent on an task1 which raised an error""" wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4178,11 +4191,10 @@ def test_wf_upstream_error2(plugin, tmpdir): goal - workflow finish running, one output errors but the other doesn't """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) - wf.inputs.x = [1, "hi"] # TypeError for adding str and int - wf.split("x") # workflow-level split + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.split("x", x=[1, "hi"]) # workflow-level split TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(Exception) as excinfo: @@ -4198,11 +4210,11 @@ def test_wf_upstream_error3(plugin, tmpdir): goal - workflow finish running, one output errors but the other doesn't """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1")) wf.inputs.x = [1, "hi"] # TypeError for adding str and int - wf.addvar1.split("a") # task-level split + wf.addvar1.split("a", a=wf.lzin.x) # task-level split wf.plugin = plugin - wf.add(fun_addvar_default(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(Exception) as excinfo: @@ -4215,7 +4227,7 @@ def test_wf_upstream_error3(plugin, tmpdir): def test_wf_upstream_error4(plugin, tmpdir): """workflow with one task, which raises an error""" wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin wf.set_output([("out", wf.addvar1.lzout.out)]) @@ -4231,7 +4243,7 @@ def test_wf_upstream_error5(plugin, tmpdir): """nested workflow with one task, which raises an error""" wf_main = Workflow(name="wf_main", input_spec=["x"], cache_dir=tmpdir) wf = Workflow(name="wf", input_spec=["x"], x=wf_main.lzin.x) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.plugin = plugin wf.set_output([("wf_out", wf.addvar1.lzout.out)]) @@ -4251,8 +4263,8 @@ def test_wf_upstream_error6(plugin, tmpdir): """nested workflow with two tasks, the first one raises an error""" wf_main = Workflow(name="wf_main", input_spec=["x"], cache_dir=tmpdir) wf = Workflow(name="wf", input_spec=["x"], x=wf_main.lzin.x) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) - wf.add(fun_addvar_default(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) wf.plugin = plugin wf.set_output([("wf_out", wf.addvar2.lzout.out)]) @@ -4274,11 +4286,11 @@ def test_wf_upstream_error7(plugin, tmpdir): the last task is set as the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default(name="addvar3", a=wf.addvar2.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) wf.set_output([("out", wf.addvar3.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4296,11 +4308,11 @@ def test_wf_upstream_error7a(plugin, tmpdir): the second task is set as the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default(name="addvar3", a=wf.addvar2.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) wf.set_output([("out", wf.addvar2.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4318,11 +4330,11 @@ def test_wf_upstream_error7b(plugin, tmpdir): the second and the third tasks are set as the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default(name="addvar2", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default(name="addvar3", a=wf.addvar2.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar3", a=wf.addvar2.lzout.out)) wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addvar3.lzout.out)]) with pytest.raises(ValueError) as excinfo: @@ -4337,10 +4349,10 @@ def test_wf_upstream_error7b(plugin, tmpdir): def test_wf_upstream_error8(plugin, tmpdir): """workflow with three tasks, the first one raises an error, so 2 others are removed""" wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.inputs.x = "hi" # TypeError for adding str and int wf.plugin = plugin - wf.add(fun_addvar_default(name="addvar2", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addvar1.lzout.out)) wf.add(fun_addtwo(name="addtwo", a=wf.addvar1.lzout.out)) wf.set_output([("out1", wf.addvar2.lzout.out), ("out2", wf.addtwo.lzout.out)]) @@ -4361,13 +4373,13 @@ def test_wf_upstream_error9(plugin, tmpdir): the errored branch is connected to the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.inputs.x = 2 - wf.add(fun_addvar(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(fun_addvar_default(name="follow_err", a=wf.err.lzout.out)) + wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) + wf.add(fun_addvar_default_notype(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default(name="addvar2", a=wf.addtwo.lzout.out)) + wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addtwo.lzout.out)) wf.set_output([("out1", wf.follow_err.lzout.out)]) wf.plugin = plugin @@ -4390,10 +4402,10 @@ def test_wf_upstream_error9a(plugin, tmpdir): wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) wf.inputs.x = 2 - wf.add(fun_addvar(name="err", a=wf.addvar1.lzout.out, b="hi")) + wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) wf.add(fun_addvar_default(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo(name="addtwo", a=wf.addvar1.lzout.out)) + wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) wf.add(fun_addvar_default(name="addvar2", a=wf.addtwo.lzout.out)) wf.set_output([("out1", wf.addvar2.lzout.out)]) # , ("out2", wf.addtwo.lzout.out)]) @@ -4411,13 +4423,13 @@ def test_wf_upstream_error9b(plugin, tmpdir): both branches are connected to the workflow output """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(fun_addvar_default(name="addvar1", a=wf.lzin.x)) + wf.add(fun_addvar_default_notype(name="addvar1", a=wf.lzin.x)) wf.inputs.x = 2 - wf.add(fun_addvar(name="err", a=wf.addvar1.lzout.out, b="hi")) - wf.add(fun_addvar_default(name="follow_err", a=wf.err.lzout.out)) + wf.add(fun_addvar_notype(name="err", a=wf.addvar1.lzout.out, b="hi")) + wf.add(fun_addvar_default_notype(name="follow_err", a=wf.err.lzout.out)) - wf.add(fun_addtwo(name="addtwo", a=wf.addvar1.lzout.out)) - wf.add(fun_addvar_default(name="addvar2", a=wf.addtwo.lzout.out)) + wf.add(fun_addtwo_notype(name="addtwo", a=wf.addvar1.lzout.out)) + wf.add(fun_addvar_default_notype(name="addvar2", a=wf.addtwo.lzout.out)) wf.set_output([("out1", wf.follow_err.lzout.out), ("out2", wf.addtwo.lzout.out)]) wf.plugin = plugin @@ -4464,7 +4476,7 @@ def test_graph_1(tmpdir, splitter): wf.add(multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) wf.add(add2(name="add2", x=wf.mult_1.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) - wf.split(splitter) + wf.split(splitter, x=[1, 2]) # simple graph dotfile_s = wf.create_dotfile() @@ -4502,7 +4514,7 @@ def test_graph_1st(tmpdir): some nodes have splitters, should be marked with blue color """ wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult_1", x=wf.lzin.x, y=wf.lzin.y).split("x")) + wf.add(multiply(name="mult_1", y=wf.lzin.y).split("x", x=wf.lzin.x)) wf.add(multiply(name="mult_2", x=wf.lzin.x, y=wf.lzin.x)) wf.add(add2(name="add2", x=wf.mult_1.lzout.out)) wf.set_output([("out", wf.add2.lzout.out)]) @@ -4543,7 +4555,7 @@ def test_graph_1st_cmb(tmpdir): first two nodes should be blue and the arrow between them should be blue """ wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split("x")) + wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) wf.add(add2(name="add2", x=wf.mult.lzout.out).combine("mult.x")) wf.add(list_sum(name="sum", x=wf.add2.lzout.out)) wf.set_output([("out", wf.sum.lzout.out)]) @@ -4616,7 +4628,7 @@ def test_graph_2st(tmpdir): the inner workflow has a state, so should be blue """ wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.lzin.x).split("x") + wfnd = Workflow(name="wfnd", input_spec=["x"]).split("x", x=wf.lzin.x) wfnd.add(add2(name="add2", x=wfnd.lzin.x)) wfnd.set_output([("out", wfnd.add2.lzout.out)]) wf.add(wfnd) @@ -4692,7 +4704,7 @@ def test_graph_3st(tmpdir): (blue node and a wfasnd, and blue arrow from the node to the wfasnd) """ wf = Workflow(name="wf", input_spec=["x", "y"], cache_dir=tmpdir) - wf.add(multiply(name="mult", x=wf.lzin.x, y=wf.lzin.y).split("x")) + wf.add(multiply(name="mult", y=wf.lzin.y).split("x", x=wf.lzin.x)) wfnd = Workflow(name="wfnd", input_spec=["x"], x=wf.mult.lzout.out) wfnd.add(add2(name="add2", x=wfnd.lzin.x)) @@ -4843,10 +4855,10 @@ def printer(a): @pytest.mark.timeout(40) def test_inner_outer_wf_duplicate(tmpdir): """checking if the execution gets stuck if there is an inner and outer workflows - thar run two nodes with the exact same inputs. + that run two nodes with the exact same inputs. """ task_list = ["First", "Second"] - start_list = [3] + start_list = [3, 4] @mark.task def one_arg(start_number): @@ -4862,7 +4874,10 @@ def one_arg_inner(start_number): # Outer workflow test_outer = Workflow( - name="test_outer", input_spec=["start_number", "task_name"], cache_dir=tmpdir + name="test_outer", + input_spec=["start_number", "task_name", "dummy"], + cache_dir=tmpdir, + dummy=1, ) # Splitting on both arguments test_outer.split( @@ -4904,7 +4919,7 @@ def pass_odds(x): return x wf = Workflow(name="wf", input_spec=["x"], cache_dir=tmpdir) - wf.add(pass_odds(name="pass_odds", x=[1, 2, 3, 4, 5]).split("x")) + wf.add(pass_odds(name="pass_odds").split("x", x=[1, 2, 3, 4, 5])) wf.set_output([("out", wf.pass_odds.lzout.out)]) with pytest.raises(Exception): @@ -4928,3 +4943,87 @@ def pass_odds(x): # and another 2 messagers after calling the second time assert tasks_run == 7 assert errors_found == 4 + + +def test_wf_state_arrays(): + wf = Workflow( + name="test", + input_spec={"x": ty.List[int], "y": int}, + output_spec={"alpha": int, "beta": ty.List[int]}, + ) + + wf.add( # Split over workflow input "x" on "scalar" input + list_mult_sum( + in_list=wf.lzin.x, + name="A", + ).split(scalar=wf.lzin.x) + ) + + wf.add( # Workflow is still split over "x", combined over "x" on out + list_mult_sum( + name="B", + scalar=wf.A.lzout.sum, + in_list=wf.A.lzout.products, + ).combine("A.scalar") + ) + + wf.add( # Workflow " + list_mult_sum( + name="C", + scalar=wf.lzin.y, + in_list=wf.B.lzout.sum, + ) + ) + + wf.add( # Workflow is split again, this time over C.products + list_mult_sum( + name="D", + in_list=wf.lzin.x, + ) + .split(scalar=wf.C.lzout.products) + .combine("scalar") + ) + + wf.add( # Workflow is finally combined again into a single node + list_mult_sum(name="E", scalar=wf.lzin.y, in_list=wf.D.lzout.sum) + ) + + wf.set_output([("alpha", wf.E.lzout.sum), ("beta", wf.E.lzout.products)]) + + results = wf(x=[1, 2, 3, 4], y=10) + assert results.output.alpha == 3000000 + assert results.output.beta == [100000, 400000, 900000, 1600000] + + +def test_wf_input_output_typing(): + wf = Workflow( + name="test", + input_spec={"x": int, "y": ty.List[int]}, + output_spec={"alpha": int, "beta": ty.List[int]}, + ) + + with pytest.raises( + TypeError, match="Cannot coerce into " + ): + list_mult_sum( + scalar=wf.lzin.y, + in_list=wf.lzin.y, + name="A", + ) + + wf.add( # Split over workflow input "x" on "scalar" input + list_mult_sum( + scalar=wf.lzin.x, + in_list=wf.lzin.y, + name="A", + ) + ) + + with pytest.raises(TypeError, match="don't match their declared types"): + wf.set_output( + [ + ("alpha", wf.A.lzout.products), + ] + ) + + wf.set_output([("alpha", wf.A.lzout.sum), ("beta", wf.A.lzout.products)]) diff --git a/pydra/engine/tests/utils.py b/pydra/engine/tests/utils.py index b2f3b6652d..5b0858866c 100644 --- a/pydra/engine/tests/utils.py +++ b/pydra/engine/tests/utils.py @@ -3,19 +3,24 @@ import sys, shutil import typing as ty from pathlib import Path +import functools +import operator import subprocess as sp import pytest +from fileformats.generic import File from ..core import Workflow from ..submitter import Submitter from ... import mark -from ..specs import File -from ... import set_input_validator + need_docker = pytest.mark.skipif( shutil.which("docker") is None or sp.call(["docker", "info"]), reason="no docker within the container", ) +need_singularity = pytest.mark.skipif( + shutil.which("singularity") is None, reason="no singularity available" +) no_win = pytest.mark.skipif( sys.platform.startswith("win"), reason="docker command not adjusted for windows docker", @@ -52,12 +57,12 @@ def result_submitter(shell_task, plugin): @mark.task -def op_4var(a, b, c, d): +def op_4var(a, b, c, d) -> str: return f"{a} {b} {c} {d}" @mark.task -def fun_addtwo(a): +def fun_addtwo(a: int) -> int: import time time.sleep(1) @@ -67,7 +72,7 @@ def fun_addtwo(a): @mark.task -def fun_addtwo_with_threadcount(a, sgeThreads=1): +def fun_addtwo_notype(a): import time time.sleep(1) @@ -77,18 +82,35 @@ def fun_addtwo_with_threadcount(a, sgeThreads=1): @mark.task -def fun_addvar(a, b): +def fun_addtwo_with_threadcount(a: int, sgeThreads: int = 1) -> int: + import time + + time.sleep(1) + if a == 3: + time.sleep(2) + return a + 2 + + +@mark.task +def fun_addvar( + a: ty.Union[int, float], b: ty.Union[int, float] +) -> ty.Union[int, float]: + return a + b + + +@mark.task +def fun_addvar_notype(a, b): return a + b @mark.task @mark.annotate({"return": {"sum": float, "sub": float}}) -def fun_addsubvar(a, b): +def fun_addsubvar(a: float, b: float): return a + b, a - b @mark.task -def fun_addvar_none(a, b): +def fun_addvar_none(a: int, b: ty.Optional[int]) -> int: if b is None: return a else: @@ -96,44 +118,59 @@ def fun_addvar_none(a, b): @mark.task -def fun_addvar_default(a, b=1): +def fun_addvar_default(a: int, b: int = 1) -> int: + return a + b + + +@mark.task +def fun_addvar_default_notype(a, b=1): return a + b @mark.task -def fun_addvar3(a, b, c): +def fun_addvar3(a: int, b: int, c: int) -> int: return a + b + c @mark.task -def fun_addvar4(a, b, c, d): +def fun_addvar4(a: int, b: int, c: int, d: int) -> int: return a + b + c + d @mark.task -def moment(lst, n): +def moment(lst: ty.List[float], n: float) -> float: return sum([i**n for i in lst]) / len(lst) @mark.task -def fun_div(a, b): +def fun_div(a: ty.Union[int, float], b: ty.Union[int, float]) -> float: return a / b @mark.task -def multiply(x, y): +def multiply(x: int, y: int) -> int: return x * y @mark.task -def add2(x): +def multiply_list(x: list, y: int) -> list: + return x * y + + +@mark.task +def multiply_mixed(x: list, y: int) -> list: + return x * y + + +@mark.task +def add2(x: int) -> int: if x == 1 or x == 12: time.sleep(1) return x + 2 @mark.task -def raise_xeq1(x): +def raise_xeq1(x: int) -> int: if x == 1: raise Exception("x is 1, so i'm raising an exception!") return x @@ -143,13 +180,18 @@ def raise_xeq1(x): @mark.annotate({"return": {"out_add": float, "out_sub": float}}) def add2_sub2_res(res): """function that takes entire output as an input""" - if isinstance(res, list): - return [r["out"] + 2 for r in res], [r["out"] - 2 for r in res] return res["out"] + 2, res["out"] - 2 @mark.task -def power(a, b): +@mark.annotate({"return": {"out_add": ty.List[float], "out_sub": ty.List[float]}}) +def add2_sub2_res_list(res): + """function that takes entire output as an input""" + return [r["out"] + 2 for r in res], [r["out"] - 2 for r in res] + + +@mark.task +def power(a: int, b: int) -> int: return a**b @@ -166,41 +208,43 @@ def identity_2flds( @mark.task -def ten(x): +def ten(x) -> int: return 10 @mark.task -def add2_wait(x): +def add2_wait(x: int) -> int: time.sleep(2) return x + 2 @mark.task -def list_output(x): +def list_output(x: int) -> ty.List[int]: return [x, 2 * x, 3 * x] @mark.task -def list_sum(x): +def list_sum(x: ty.Sequence[ty.Union[int, float]]) -> ty.Union[int, float]: return sum(x) @mark.task -def fun_dict(d): +def fun_dict(d: dict) -> str: kv_list = [f"{k}:{v}" for (k, v) in d.items()] return "_".join(kv_list) @mark.task -def fun_write_file(filename: ty.Union[str, File, Path], text="hello") -> File: +def fun_write_file(filename: Path, text="hello") -> File: with open(filename, "w") as f: f.write(text) - return Path(filename).absolute() + return File(filename) @mark.task -def fun_write_file_list(filename_list: ty.List[ty.Union[str, File, Path]], text="hi"): +def fun_write_file_list( + filename_list: ty.List[ty.Union[str, File, Path]], text="hi" +) -> ty.List[File]: for ii, filename in enumerate(filename_list): with open(filename, "w") as f: f.write(f"from file {ii}: {text}") @@ -211,7 +255,7 @@ def fun_write_file_list(filename_list: ty.List[ty.Union[str, File, Path]], text= @mark.task def fun_write_file_list2dict( filename_list: ty.List[ty.Union[str, File, Path]], text="hi" -): +) -> ty.Dict[str, ty.Union[File, int]]: filename_dict = {} for ii, filename in enumerate(filename_list): with open(filename, "w") as f: @@ -299,11 +343,14 @@ def gen_basic_wf_with_threadcount_concurrent(name="basic-wf-with-threadcount"): return wf -@pytest.fixture(scope="function") -def use_validator(request): - set_input_validator(flag=True) +@mark.task +@mark.annotate({"return": {"sum": int, "products": ty.List[int]}}) +def list_mult_sum(scalar: int, in_list: ty.List[int]) -> ty.Tuple[int, ty.List[int]]: + products = [scalar * x for x in in_list] + return functools.reduce(operator.add, products, 0), products - def fin(): - set_input_validator(flag=False) - request.addfinalizer(fin) +@mark.task +@mark.annotate({"return": {"x": str, "y": int, "z": float}}) +def foo(a: str, b: int, c: float) -> ty.Tuple[str, int, float]: + return a, b, c diff --git a/pydra/engine/workers.py b/pydra/engine/workers.py index 152a8680c4..eaa40beb0a 100644 --- a/pydra/engine/workers.py +++ b/pydra/engine/workers.py @@ -1,4 +1,5 @@ """Execution workers.""" + import asyncio import sys import json @@ -127,23 +128,29 @@ async def fetch_finished(self, futures): class SerialWorker(Worker): """A worker to execute linearly.""" + plugin_name = "serial" + def __init__(self, **kwargs): """Initialize worker.""" logger.debug("Initialize SerialWorker") - def run_el(self, interface, rerun=False, **kwargs): + def run_el(self, interface, rerun=False, environment=None, **kwargs): """Run a task.""" - return self.exec_serial(interface, rerun=rerun) + return self.exec_serial(interface, rerun=rerun, environment=environment) def close(self): """Return whether the task is finished.""" - async def exec_serial(self, runnable, rerun=False): - return runnable() + async def exec_serial(self, runnable, rerun=False, environment=None): + if isinstance(runnable, TaskBase): + return runnable._run(rerun, environment=environment) + else: # it could be tuple that includes pickle files with tasks and inputs + ind, task_main_pkl, _ = runnable + return load_and_run(task_main_pkl, ind, rerun, environment=environment) async def fetch_finished(self, futures): await asyncio.gather(*futures) - return set([]) + return set() # async def fetch_finished(self, futures): # return await asyncio.wait(futures) @@ -152,6 +159,8 @@ async def fetch_finished(self, futures): class ConcurrentFuturesWorker(Worker): """A worker to execute in parallel using Python's concurrent futures.""" + plugin_name = "cf" + def __init__(self, n_procs=None): """Initialize Worker.""" super().__init__() @@ -161,19 +170,21 @@ def __init__(self, n_procs=None): # self.loop = asyncio.get_event_loop() logger.debug("Initialize ConcurrentFuture") - def run_el(self, runnable, rerun=False, **kwargs): + def run_el(self, runnable, rerun=False, environment=None, **kwargs): """Run a task.""" assert self.loop, "No event loop available to submit tasks" - return self.exec_as_coro(runnable, rerun=rerun) + return self.exec_as_coro(runnable, rerun=rerun, environment=environment) - async def exec_as_coro(self, runnable, rerun=False): + async def exec_as_coro(self, runnable, rerun=False, environment=None): """Run a task (coroutine wrapper).""" if isinstance(runnable, TaskBase): - res = await self.loop.run_in_executor(self.pool, runnable._run, rerun) + res = await self.loop.run_in_executor( + self.pool, runnable._run, rerun, environment + ) else: # it could be tuple that includes pickle files with tasks and inputs ind, task_main_pkl, task_orig = runnable res = await self.loop.run_in_executor( - self.pool, load_and_run, task_main_pkl, ind, rerun + self.pool, load_and_run, task_main_pkl, ind, rerun, environment ) return res @@ -185,6 +196,7 @@ def close(self): class SlurmWorker(DistributedWorker): """A worker to execute tasks on SLURM systems.""" + plugin_name = "slurm" _cmd = "sbatch" _sacct_re = re.compile( "(?P\\d*) +(?P\\w*)\\+? +" "(?P\\d+):\\d+" @@ -211,7 +223,7 @@ def __init__(self, loop=None, max_jobs=None, poll_delay=1, sbatch_args=None): self.sbatch_args = sbatch_args or "" self.error = {} - def run_el(self, runnable, rerun=False): + def run_el(self, runnable, rerun=False, environment=None): """Worker submission API.""" script_dir, batch_script = self._prepare_runscripts(runnable, rerun=rerun) if (script_dir / script_dir.parts[1]) == gettempdir(): @@ -252,12 +264,12 @@ def _prepare_runscripts(self, task, interpreter="/bin/sh", rerun=False): batchscript = script_dir / f"batchscript_{uid}.sh" python_string = ( f"""'from pydra.engine.helpers import load_and_run; """ - f"""load_and_run(task_pkl="{str(task_pkl)}", ind={ind}, rerun={rerun}) '""" + f"""load_and_run(task_pkl="{task_pkl}", ind={ind}, rerun={rerun}) '""" ) bcmd = "\n".join( ( f"#!{interpreter}", - f"#SBATCH --output={str(script_dir / 'slurm-%j.out')}", + f"#SBATCH --output={script_dir / 'slurm-%j.out'}", f"{sys.executable} -c " + python_string, ) ) @@ -360,6 +372,8 @@ async def _verify_exit_code(self, jobid): class SGEWorker(DistributedWorker): """A worker to execute tasks on SLURM systems.""" + plugin_name = "sge" + _cmd = "qsub" _sacct_re = re.compile( "(?P\\d*) +(?P\\w*)\\+? +" "(?P\\d+):\\d+" @@ -439,7 +453,7 @@ def __init__( self.default_qsub_args = default_qsub_args self.max_mem_free = max_mem_free - def run_el(self, runnable, rerun=False): + def run_el(self, runnable, rerun=False): # TODO: add env """Worker submission API.""" ( script_dir, @@ -812,7 +826,7 @@ async def _submit_job( await asyncio.sleep(self.poll_delay) async def _poll_job(self, jobid, cache_dir): - cmd = (f"qstat", "-j", jobid) + cmd = ("qstat", "-j", jobid) logger.debug(f"Polling job {jobid}") rc, stdout, stderr = await read_and_display_async(*cmd, hide_display=True) @@ -823,7 +837,7 @@ async def _poll_job(self, jobid, cache_dir): return False async def _verify_exit_code(self, jobid): - cmd = (f"qacct", "-j", jobid) + cmd = ("qacct", "-j", jobid) rc, stdout, stderr = await read_and_display_async(*cmd, hide_display=True) if not stdout: await asyncio.sleep(10) @@ -853,6 +867,8 @@ class DaskWorker(Worker): This is an experimental implementation with limited testing. """ + plugin_name = "dask" + def __init__(self, **kwargs): """Initialize Worker.""" super().__init__() @@ -871,12 +887,16 @@ def run_el(self, runnable, rerun=False, **kwargs): async def exec_dask(self, runnable, rerun=False): """Run a task (coroutine wrapper).""" - if self.client is None: - from dask.distributed import Client - - self.client = await Client(**self.client_args, asynchronous=True) - future = self.client.submit(runnable._run, rerun) - result = await future + from dask.distributed import Client + + async with Client(**self.client_args, asynchronous=True) as client: + if isinstance(runnable, TaskBase): + future = client.submit(runnable._run, rerun) + result = await future + else: # it could be tuple that includes pickle files with tasks and inputs + ind, task_main_pkl, task_orig = runnable + future = client.submit(load_and_run, task_main_pkl, ind, rerun) + result = await future return result def close(self): @@ -884,10 +904,164 @@ def close(self): pass +class PsijWorker(Worker): + """A worker to execute tasks using PSI/J.""" + + def __init__(self, **kwargs): + """ + Initialize PsijWorker. + + Parameters + ---------- + subtype : str + Scheduler for PSI/J. + """ + try: + import psij + except ImportError: + logger.critical("Please install psij.") + raise + logger.debug("Initialize PsijWorker") + self.psij = psij + + def run_el(self, interface, rerun=False, **kwargs): + """Run a task.""" + return self.exec_psij(interface, rerun=rerun) + + def make_spec(self, cmd=None, arg=None): + """ + Create a PSI/J job specification. + + Parameters + ---------- + cmd : str, optional + Executable command. Defaults to None. + arg : list, optional + List of arguments. Defaults to None. + + Returns + ------- + psij.JobSpec + PSI/J job specification. + """ + spec = self.psij.JobSpec() + spec.executable = cmd + spec.arguments = arg + + return spec + + def make_job(self, spec, attributes): + """ + Create a PSI/J job. + + Parameters + ---------- + spec : psij.JobSpec + PSI/J job specification. + attributes : any + Job attributes. + + Returns + ------- + psij.Job + PSI/J job. + """ + job = self.psij.Job() + job.spec = spec + return job + + async def exec_psij(self, runnable, rerun=False): + """ + Run a task (coroutine wrapper). + + Raises + ------ + Exception + If stderr is not empty. + + Returns + ------- + None + """ + import pickle + from pathlib import Path + + jex = self.psij.JobExecutor.get_instance(self.subtype) + absolute_path = Path(__file__).parent + + if isinstance(runnable, TaskBase): + cache_dir = runnable.cache_dir + file_path = cache_dir / "runnable_function.pkl" + with open(file_path, "wb") as file: + pickle.dump(runnable._run, file) + func_path = absolute_path / "run_pickled.py" + spec = self.make_spec("python", [func_path, file_path]) + else: # it could be tuple that includes pickle files with tasks and inputs + cache_dir = runnable[-1].cache_dir + file_path_1 = cache_dir / "taskmain.pkl" + file_path_2 = cache_dir / "ind.pkl" + ind, task_main_pkl, task_orig = runnable + with open(file_path_1, "wb") as file: + pickle.dump(task_main_pkl, file) + with open(file_path_2, "wb") as file: + pickle.dump(ind, file) + func_path = absolute_path / "run_pickled.py" + spec = self.make_spec( + "python", + [ + func_path, + file_path_1, + file_path_2, + ], + ) + + if rerun: + spec.arguments.append("--rerun") + + spec.stdout_path = cache_dir / "demo.stdout" + spec.stderr_path = cache_dir / "demo.stderr" + + job = self.make_job(spec, None) + jex.submit(job) + job.wait() + + if spec.stderr_path.stat().st_size > 0: + with open(spec.stderr_path, "r") as stderr_file: + stderr_contents = stderr_file.read() + raise Exception( + f"stderr_path '{spec.stderr_path}' is not empty. Contents:\n{stderr_contents}" + ) + + return + + def close(self): + """Finalize the internal pool of tasks.""" + pass + + +class PsijLocalWorker(PsijWorker): + """A worker to execute tasks using PSI/J on the local machine.""" + + subtype = "local" + plugin_name = f"psij-{subtype}" + + +class PsijSlurmWorker(PsijWorker): + """A worker to execute tasks using PSI/J using SLURM.""" + + subtype = "slurm" + plugin_name = f"psij-{subtype}" + + WORKERS = { - "serial": SerialWorker, - "cf": ConcurrentFuturesWorker, - "slurm": SlurmWorker, - "dask": DaskWorker, - "sge": SGEWorker, + w.plugin_name: w + for w in ( + SerialWorker, + ConcurrentFuturesWorker, + SlurmWorker, + DaskWorker, + SGEWorker, + PsijLocalWorker, + PsijSlurmWorker, + ) } diff --git a/pydra/mark/__init__.py b/pydra/mark/__init__.py index 020031de1a..31e4cf832e 100644 --- a/pydra/mark/__init__.py +++ b/pydra/mark/__init__.py @@ -1 +1,3 @@ from .functions import annotate, task + +__all__ = ("annotate", "task") diff --git a/pydra/mark/functions.py b/pydra/mark/functions.py index 6830b3b34c..e191a61809 100644 --- a/pydra/mark/functions.py +++ b/pydra/mark/functions.py @@ -1,4 +1,5 @@ """ Decorators to apply to functions used in Pydra workflows """ + from functools import wraps diff --git a/pydra/tasks/__init__.py b/pydra/tasks/__init__.py index 0da1a3b130..fae53c2d92 100644 --- a/pydra/tasks/__init__.py +++ b/pydra/tasks/__init__.py @@ -5,6 +5,7 @@ To create a task package, please fork the `pydra-tasks-template `__. """ + # This call enables pydra.tasks to be used as a namespace package when installed # in editable mode. In normal installations it has no effect. __path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/pydra/utils/__init__.py b/pydra/utils/__init__.py index e69de29bb2..9008779e27 100644 --- a/pydra/utils/__init__.py +++ b/pydra/utils/__init__.py @@ -0,0 +1 @@ +from .misc import user_cache_dir, add_exc_note # noqa: F401 diff --git a/pydra/utils/hash.py b/pydra/utils/hash.py new file mode 100644 index 0000000000..6f35da0f76 --- /dev/null +++ b/pydra/utils/hash.py @@ -0,0 +1,544 @@ +"""Generic object hashing dispatch""" + +import os +import struct +from datetime import datetime +import typing as ty +from pathlib import Path +from collections.abc import Mapping +from functools import singledispatch +from hashlib import blake2b +import logging +from typing import ( + Dict, + Iterator, + NewType, + Sequence, + Set, +) +from filelock import SoftFileLock +import attrs.exceptions +from fileformats.core import FileSet +from . import user_cache_dir, add_exc_note + +logger = logging.getLogger("pydra") + +try: + from typing import Protocol +except ImportError: + from typing_extensions import Protocol # type: ignore + +try: + from typing import runtime_checkable +except ImportError: + from typing_extensions import runtime_checkable # type: ignore + + +try: + import numpy +except ImportError: + HAVE_NUMPY = False +else: + HAVE_NUMPY = True + +__all__ = ( + "hash_function", + "hash_object", + "hash_single", + "register_serializer", + "Hash", + "Cache", + "bytes_repr_mapping_contents", + "bytes_repr_sequence_contents", +) + +Hash = NewType("Hash", bytes) +CacheKey = NewType("CacheKey", ty.Tuple[ty.Hashable, ty.Hashable]) + + +def location_converter(path: ty.Union[Path, str, None]) -> Path: + if path is None: + path = PersistentCache.location_default() + path = Path(path) + try: + path.mkdir(parents=True, exist_ok=True) + except FileExistsError: + raise ValueError( + f"provided path to persistent cache {path} is a file not a directory" + ) from None + return path + + +@attrs.define +class PersistentCache: + """Persistent cache in which to store computationally expensive hashes between nodes + and workflow/task runs. It does this in via the `get_or_calculate_hash` method, which + takes a locally unique key (e.g. file-system path + mtime) and a function to + calculate the hash if it isn't present in the persistent store. + + The locally unique key is hashed (cheaply) using hashlib cryptography and this + "local hash" is use to name the entry of the (potentially expensive) hash of the + object itself (e.g. the contents of a file). This entry is saved as a text file + within a user-specific cache directory (see `platformdirs.user_cache_dir`), with + the name of the file being the "local hash" of the key and the contents of the + file being the "globally unique hash" of the object itself. + + Parameters + ---------- + location: Path + the directory in which to store the hashes cache + """ + + location: Path = attrs.field(converter=location_converter) # type: ignore[misc] + cleanup_period: int = attrs.field() + _hashes: ty.Dict[CacheKey, Hash] = attrs.field(factory=dict) + + # Set the location of the persistent hash cache + LOCATION_ENV_VAR = "PYDRA_HASH_CACHE" + CLEANUP_ENV_VAR = "PYDRA_HASH_CACHE_CLEANUP_PERIOD" + + @classmethod + def location_default(cls): + try: + location = os.environ[cls.LOCATION_ENV_VAR] + except KeyError: + location = user_cache_dir / "hashes" + return location + + # the default needs to be an instance method + @location.default + def _location_default(self): + return self.location_default() + + @cleanup_period.default + def cleanup_period_default(self): + return int(os.environ.get(self.CLEANUP_ENV_VAR, 30)) + + def get_or_calculate_hash(self, key: CacheKey, calculate_hash: ty.Callable) -> Hash: + """Check whether key is present in the persistent cache store and return it if so. + Otherwise use `calculate_hash` to generate the hash and save it in the persistent + store. + + Parameters + ---------- + key : CacheKey + locally unique key (e.g. to the host) used to lookup the corresponding hash + in the persistent store + calculate_hash : ty.Callable + function to calculate the hash if it isn't present in the persistent store + + Returns + ------- + Hash + the hash corresponding to the key, which is either retrieved from the persistent + store or calculated using `calculate_hash` if not present + """ + try: + return self._hashes[key] + except KeyError: + pass + key_path = self.location / blake2b(str(key).encode()).hexdigest() + with SoftFileLock(key_path.with_suffix(".lock")): + if key_path.exists(): + return Hash(key_path.read_bytes()) + hsh = calculate_hash() + key_path.write_bytes(hsh) + self._hashes[key] = Hash(hsh) + return Hash(hsh) + + def clean_up(self): + """Cleans up old hash caches that haven't been accessed in the last 30 days""" + now = datetime.now() + for path in self.location.iterdir(): + if path.name.endswith(".lock"): + continue + days = (now - datetime.fromtimestamp(path.lstat().st_atime)).days + if days > self.cleanup_period: + path.unlink() + + @classmethod + def from_path( + cls, path: ty.Union[Path, str, "PersistentCache", None] + ) -> "PersistentCache": + if isinstance(path, PersistentCache): + return path + return PersistentCache(path) + + +@attrs.define +class Cache: + """Cache for hashing objects, used to avoid infinite recursion caused by circular + references between objects, and to store hashes of objects that have already been + hashed to avoid recomputation. + + This concept is extended to persistent caching of hashes for certain object types, + for which calculating the hash is a potentially expensive operation (e.g. + File/Directory types). For these classes the `bytes_repr` override function yields a + "locally unique cache key" (e.g. file-system path + mtime) as the first item of its + iterator. + """ + + persistent: ty.Optional[PersistentCache] = attrs.field( + default=None, + converter=PersistentCache.from_path, # type: ignore[misc] + ) + _hashes: ty.Dict[int, Hash] = attrs.field(factory=dict) + + def __getitem__(self, object_id: int) -> Hash: + return self._hashes[object_id] + + def __setitem__(self, object_id: int, hsh: Hash): + self._hashes[object_id] = hsh + + def __contains__(self, object_id): + return object_id in self._hashes + + +def hash_function(obj, **kwargs): + """Generate hash of object.""" + return hash_object(obj, **kwargs).hex() + + +def hash_object( + obj: object, + cache: ty.Optional[Cache] = None, + persistent_cache: ty.Union[PersistentCache, Path, None] = None, +) -> Hash: + """Hash an object + + Constructs a byte string that uniquely identifies the object, + and returns the hash of that string. + + Base Python types are implemented, including recursive lists and + dicts. Custom types can be registered with :func:`register_serializer`. + """ + if cache is None: + cache = Cache(persistent=persistent_cache) + try: + return hash_single(obj, cache) + except Exception as e: + tp = type(obj) + add_exc_note( + e, + ( + f"and therefore cannot hash `{obj!r}` of type " + f"`{tp.__module__}.{tp.__name__}`. Consider implementing a " + "specific `bytes_repr()`(see pydra.utils.hash.register_serializer) " + "or a `__bytes_repr__()` dunder methods for this type" + ), + ) + raise e + + +def hash_single(obj: object, cache: Cache) -> Hash: + """Single object-scoped hash + + Uses a local cache to prevent infinite recursion. This cache is unsafe + to reuse across multiple objects, so this function should not be used directly. + """ + objid = id(obj) + if objid not in cache: + # Handle recursion by putting a dummy value in the cache + cache[objid] = Hash(b"\x00") + bytes_it = bytes_repr(obj, cache) + # Pop first element from the bytes_repr iterator and check whether it is a + # "local cache key" (e.g. file-system path + mtime tuple) or the first bytes + # chunk + + def calc_hash(first: ty.Optional[bytes] = None) -> Hash: + """ + Calculate the hash of the object + + Parameters + ---------- + first : ty.Optional[bytes] + the first bytes chunk from the bytes_repr iterator, passed if the first + chunk wasn't a local cache key + """ + h = blake2b(digest_size=16, person=b"pydra-hash") + # We want to use the first chunk that was popped to check for a cache-key + # if present + if first is not None: + h.update(first) + for chunk in bytes_it: # Note that `bytes_it` is in outer scope + h.update(chunk) + return Hash(h.digest()) + + # Read the first item of the bytes_repr iterator and check to see whether it yields + # a "cache-key" tuple instead of a bytes chunk for the type of the object to be cached + # (e.g. file-system path + mtime for fileformats.core.FileSet objects). If it + # does, use that key to check the persistent cache for a precomputed hash and + # return it if it is, otherwise calculate the hash and store it in the persistent + # cache with that hash of that key (not to be confused with the hash of the + # object that is saved/retrieved). + first = next(bytes_it) + if isinstance(first, tuple): + tp = type(obj) + key = ( + tp.__module__, + tp.__name__, + ) + first + hsh = cache.persistent.get_or_calculate_hash(key, calc_hash) + else: + # If the first item is a bytes chunk (i.e. the object type doesn't have an + # associated 'cache-key'), then simply calculate the hash of the object, + # passing the first chunk to the `calc_hash` function so it can be included + # in the hash calculation + hsh = calc_hash(first=first) + logger.debug("Hash of %s object is %s", obj, hsh) + cache[objid] = hsh + return cache[objid] + + +@runtime_checkable +class HasBytesRepr(Protocol): + def __bytes_repr__(self, cache: Cache) -> Iterator[bytes]: + pass # pragma: no cover + + +@singledispatch +def bytes_repr(obj: object, cache: Cache) -> Iterator[bytes]: + """Default implementation of hashing for generic objects. Single dispatch is used + to provide hooks for class-specific implementations + + Parameters + ---------- + obj: object + the object to hash + cache : Cache + a dictionary object used to store a cache of previously cached objects to + handle circular object references + + Yields + ------- + bytes + unique representation of the object in a series of bytes + """ + cls = obj.__class__ + yield f"{cls.__module__}.{cls.__name__}:{{".encode() + dct: Dict[str, ty.Any] + if attrs.has(type(obj)): + # Drop any attributes that aren't used in comparisons by default + dct = attrs.asdict(obj, recurse=False, filter=lambda a, _: bool(a.eq)) + elif hasattr(obj, "__slots__"): + dct = {attr: getattr(obj, attr) for attr in obj.__slots__} + else: + dct = obj.__dict__ + yield from bytes_repr_mapping_contents(dct, cache) + yield b"}" + + +register_serializer = bytes_repr.register +register_serializer.__doc__ = """Register a custom serializer for a type + +The generator function should yield byte strings that will be hashed +to produce the final hash. A recommended convention is to yield a +qualified type prefix (e.g. ``f"{module}.{class}"``), +followed by a colon, followed by the serialized value. + +If serializing an iterable, an open and close bracket may be yielded +to identify the start and end of the iterable. + +Consider using :func:`bytes_repr_mapping_contents` and +:func:`bytes_repr_sequence_contents` to serialize the contents of a mapping +or sequence. These do not include the prefix or brackets, so they can be +reused as part of a custom serializer. + +As an example, the following example is the default serializer for user-defined +classes: + +.. code-block:: python + + @register_serializer + def bytes_repr(obj: object, cache: Cache) -> Iterator[bytes]: + cls = obj.__class__ + yield f"{cls.__module__}.{cls.__name__}:{{".encode() + yield from bytes_repr_mapping_contents(obj.__dict__, cache) + yield b"}" + +Serializers must accept a ``cache`` argument, which is a dictionary that +permits caching of hashes for recursive objects. If the hash of sub-objects +is used to create an object serialization, the :func:`hash_single` function +should be called with the same cache object. +""" + + +@register_serializer +def bytes_repr_dunder(obj: HasBytesRepr, cache: Cache) -> Iterator[bytes]: + yield from obj.__bytes_repr__(cache) + + +@register_serializer(type(None)) +@register_serializer(type(Ellipsis)) +@register_serializer(bool) +@register_serializer(range) +def bytes_repr_builtin_repr( + obj: object, + cache: Cache, +) -> Iterator[bytes]: + yield repr(obj).encode() + + +@register_serializer +def bytes_repr_slice(obj: slice, cache: Cache) -> Iterator[bytes]: + yield b"slice(" + yield from bytes_repr_sequence_contents((obj.start, obj.stop, obj.step), cache) + yield b")" + + +@register_serializer +def bytes_repr_pathlike(obj: os.PathLike, cache: Cache) -> Iterator[bytes]: + cls = obj.__class__ + yield f"{cls.__module__}.{cls.__name__}:{os.fspath(obj)}".encode() + + +@register_serializer +def bytes_repr_bytes(obj: bytes, cache: Cache) -> Iterator[bytes]: + yield f"bytes:{len(obj)}:".encode() + yield obj + + +@register_serializer +def bytes_repr_str(obj: str, cache: Cache) -> Iterator[bytes]: + val = obj.encode() + yield f"str:{len(val)}:".encode() + yield val + + +@register_serializer +def bytes_repr_int(obj: int, cache: Cache) -> Iterator[bytes]: + try: + # Up to 64-bit ints + val = struct.pack(" Iterator[bytes]: + yield b"float:" + yield struct.pack(" Iterator[bytes]: + yield b"complex:" + yield struct.pack(" Iterator[bytes]: + yield b"dict:{" + yield from bytes_repr_mapping_contents(obj, cache) + yield b"}" + + +@register_serializer(ty._GenericAlias) +@register_serializer(ty._SpecialForm) +@register_serializer(type) +def bytes_repr_type(klass: type, cache: Cache) -> Iterator[bytes]: + def type_name(tp): + try: + name = tp.__name__ + except AttributeError: + name = tp._name + return name + + yield b"type:(" + origin = ty.get_origin(klass) + if origin: + yield f"{origin.__module__}.{type_name(origin)}[".encode() + for arg in ty.get_args(klass): + if isinstance( + arg, list + ): # sometimes (e.g. Callable) the args of a type is a list + yield b"[" + yield from (b for t in arg for b in bytes_repr_type(t, cache)) + yield b"]" + else: + yield from bytes_repr_type(arg, cache) + yield b"]" + else: + yield f"{klass.__module__}.{type_name(klass)}".encode() + yield b")" + + +@register_serializer(FileSet) +def bytes_repr_fileset( + fileset: FileSet, cache: Cache +) -> Iterator[ty.Union[CacheKey, bytes]]: + fspaths = sorted(fileset.fspaths) + yield CacheKey( + tuple(repr(p) for p in fspaths) # type: ignore[arg-type] + + tuple(p.lstat().st_mtime_ns for p in fspaths) + ) + yield from fileset.__bytes_repr__(cache) + + +@register_serializer(list) +@register_serializer(tuple) +def bytes_repr_seq(obj: Sequence, cache: Cache) -> Iterator[bytes]: + yield f"{obj.__class__.__name__}:(".encode() + yield from bytes_repr_sequence_contents(obj, cache) + yield b")" + + +@register_serializer(set) +@register_serializer(frozenset) +def bytes_repr_set(obj: Set, cache: Cache) -> Iterator[bytes]: + yield f"{obj.__class__.__name__}:{{".encode() + yield from bytes_repr_sequence_contents(sorted(obj), cache) + yield b"}" + + +def bytes_repr_mapping_contents(mapping: Mapping, cache: Cache) -> Iterator[bytes]: + """Serialize the contents of a mapping + + Concatenates byte-serialized keys and hashed values. + + .. code-block:: python + + >>> from pydra.utils.hash import bytes_repr_mapping_contents, Cache + >>> generator = bytes_repr_mapping_contents({"a": 1, "b": 2}, Cache()) + >>> b''.join(generator) + b'str:1:a=...str:1:b=...' + """ + for key in sorted(mapping): + yield from bytes_repr(key, cache) + yield b"=" + yield bytes(hash_single(mapping[key], cache)) + + +def bytes_repr_sequence_contents(seq: Sequence, cache: Cache) -> Iterator[bytes]: + """Serialize the contents of a sequence + + Concatenates hashed values. + + .. code-block:: python + + >>> from pydra.utils.hash import bytes_repr_sequence_contents, Cache + >>> generator = bytes_repr_sequence_contents([1, 2], Cache()) + >>> list(generator) + [b'\x6d...', b'\xa3...'] + """ + for val in seq: + yield bytes(hash_single(val, cache)) + + +if HAVE_NUMPY: + + @register_serializer(numpy.generic) + @register_serializer(numpy.ndarray) + def bytes_repr_numpy(obj: numpy.ndarray, cache: Cache) -> Iterator[bytes]: + yield f"{obj.__class__.__module__}{obj.__class__.__name__}:{obj.size}:".encode() + if obj.dtype == "object": + yield from bytes_repr_sequence_contents(iter(obj.ravel()), cache) + else: + yield obj.tobytes(order="C") + + +NUMPY_CHUNK_LEN = 8192 diff --git a/pydra/utils/messenger.py b/pydra/utils/messenger.py index b1c587153c..fc70e345f8 100644 --- a/pydra/utils/messenger.py +++ b/pydra/utils/messenger.py @@ -1,4 +1,5 @@ """Messaging of states.""" + import abc import datetime as dt import enum @@ -129,9 +130,11 @@ def send(self, message, **kwargs): r = requests.post( kwargs["post_url"], json=message, - auth=kwargs["auth"]() - if getattr(kwargs["auth"], "__call__", None) - else kwargs["auth"], + auth=( + kwargs["auth"]() + if getattr(kwargs["auth"], "__call__", None) + else kwargs["auth"] + ), ) return r.status_code @@ -196,5 +199,5 @@ def collect_messages(collected_path, message_path, ld_op="compact"): pld.jsonld.from_rdf(pld.jsonld.to_rdf(data, {})), data[0] ) records["@id"] = f"uid:{gen_uuid()}" - with open(collected_path / "messages.jsonld", "wt") as fp: + with open(collected_path / "messages.jsonld", "w") as fp: json.dump(records, fp, ensure_ascii=False, indent=2, sort_keys=False) diff --git a/pydra/utils/misc.py b/pydra/utils/misc.py new file mode 100644 index 0000000000..9a40769c9d --- /dev/null +++ b/pydra/utils/misc.py @@ -0,0 +1,33 @@ +from pathlib import Path +import platformdirs +from pydra._version import __version__ + +user_cache_dir = Path( + platformdirs.user_cache_dir( + appname="pydra", + appauthor="nipype", + version=__version__, + ) +) + + +def add_exc_note(e: Exception, note: str) -> Exception: + """Adds a note to an exception in a Python <3.11 compatible way + + Parameters + ---------- + e : Exception + the exception to add the note to + note : str + the note to add + + Returns + ------- + Exception + returns the exception again + """ + if hasattr(e, "add_note"): + e.add_note(note) + else: + e.args = (e.args[0] + "\n" + note,) + return e diff --git a/pydra/utils/profiler.py b/pydra/utils/profiler.py index 9b417ef987..f531345c98 100644 --- a/pydra/utils/profiler.py +++ b/pydra/utils/profiler.py @@ -1,4 +1,5 @@ """Utilities to keep track of performance and resource utilization.""" + from pathlib import Path import psutil import threading @@ -9,7 +10,7 @@ class ResourceMonitor(threading.Thread): - """A thread to monitor a specific PID with a certain frequence to a file.""" + """A thread to monitor a specific PID with a certain frequency to a file.""" def __init__(self, pid, interval=5, logdir=None, fname=None): """ diff --git a/pydra/utils/tests/test_hash.py b/pydra/utils/tests/test_hash.py new file mode 100644 index 0000000000..2c74de6e48 --- /dev/null +++ b/pydra/utils/tests/test_hash.py @@ -0,0 +1,412 @@ +import re +import os +from hashlib import blake2b +from pathlib import Path +import time +from unittest import mock +import attrs +import pytest +import typing as ty +from fileformats.application import Zip, Json +from fileformats.text import TextFile +from ..hash import ( + Cache, + bytes_repr, + hash_object, + register_serializer, + PersistentCache, +) + + +@pytest.fixture +def hasher(): + yield blake2b(digest_size=16, person=b"pydra-hash") + + +def join_bytes_repr(obj): + return b"".join(bytes_repr(obj, Cache())) + + +def test_bytes_repr_builtins(): + # Can't beat repr for some + assert join_bytes_repr(None) == b"None" + assert join_bytes_repr(Ellipsis) == b"Ellipsis" + assert join_bytes_repr(True) == b"True" + assert join_bytes_repr(False) == b"False" + assert join_bytes_repr(range(1)) == b"range(0, 1)" + assert join_bytes_repr(range(-1, 10, 2)) == b"range(-1, 10, 2)" + # String types + assert join_bytes_repr(b"abc") == b"bytes:3:abc" + assert join_bytes_repr("abc") == b"str:3:abc" + # Little-endian, 64-bit signed integer + assert join_bytes_repr(123) == b"int:\x7b\x00\x00\x00\x00\x00\x00\x00" + # ASCII string representation of a Python "long" integer + assert join_bytes_repr(12345678901234567890) == b"long:20:12345678901234567890" + # Float uses little-endian double-precision format + assert join_bytes_repr(1.0) == b"float:\x00\x00\x00\x00\x00\x00\xf0?" + # Complex concatenates two floats + complex_repr = join_bytes_repr(0.0 + 0j) + assert complex_repr == b"complex:" + bytes(16) + # Dicts are sorted by key, and values are hashed + dict_repr = join_bytes_repr({"b": "c", "a": 0}) + assert re.match(rb"dict:{str:1:a=.{16}str:1:b=.{16}}$", dict_repr) + # Lists and tuples concatenate hashes of their contents + list_repr = join_bytes_repr([1, 2, 3]) + assert re.match(rb"list:\(.{48}\)$", list_repr) + tuple_repr = join_bytes_repr((1, 2, 3)) + assert re.match(rb"tuple:\(.{48}\)$", tuple_repr) + # Sets sort, hash and concatenate their contents + set_repr = join_bytes_repr({1, 2, 3}) + assert re.match(rb"set:{.{48}}$", set_repr) + # Sets sort, hash and concatenate their contents + fset_repr = join_bytes_repr(frozenset((1, 2, 3))) + assert re.match(rb"frozenset:{.{48}}$", fset_repr) + # Slice fields can be anything, so hash contents + slice_repr = join_bytes_repr(slice(1, 2, 3)) + assert re.match(rb"slice\(.{48}\)$", slice_repr) + + +@pytest.mark.parametrize( + "obj,expected", + [ + ("abc", "bc6289a80ec21621f20dea1907cc8b9a"), + (b"abc", "29ddaec80d4b3baba945143faa4c9e96"), + (1, "6dc1db8d4dcdd8def573476cbb90cce0"), + (12345678901234567890, "2b5ba668c1e8ea4902361b8d81e53074"), + (1.0, "29492927b2e505840235e15a5be9f79a"), + ({"b": "c", "a": 0}, "2405cd36f4e4b6318c033f32db289f7d"), + ([1, 2, 3], "2f8902ff90f63d517bd6f6e6111e15b8"), + ((1, 2, 3), "054a7b31c29e7875a6f83ff1dcb4841b"), + ], +) +def test_hash_object_known_values(obj: object, expected: str): + # Regression test to avoid accidental changes to hash_object + # We may update this, but it will indicate that users should + # expect cache directories to be invalidated + assert hash_object(obj).hex() == expected + + +def test_pathlike_reprs(tmp_path): + cls = tmp_path.__class__ + prefix = f"{cls.__module__}.{cls.__name__}" + # Directory + assert join_bytes_repr(tmp_path) == f"{prefix}:{tmp_path}".encode() + # Non-existent file + empty_file = tmp_path / "empty" + assert join_bytes_repr(empty_file) == f"{prefix}:{empty_file}".encode() + # Existent file + empty_file.touch() + assert join_bytes_repr(empty_file) == f"{prefix}:{empty_file}".encode() + + class MyPathLike: + def __fspath__(self): + return "/tmp" + + prefix = f"{__name__}.MyPathLike" + assert join_bytes_repr(MyPathLike()) == f"{prefix}:/tmp".encode() + + +def test_hash_pathlikes(tmp_path, hasher): + cls = tmp_path.__class__ + prefix = f"{cls.__module__}.{cls.__name__}" + + # Directory + h = hasher.copy() + h.update(f"{prefix}:{tmp_path}".encode()) + assert hash_object(tmp_path) == h.digest() + + # Non-existent file + empty_file = tmp_path / "empty" + h = hasher.copy() + h.update(f"{prefix}:{empty_file}".encode()) + assert hash_object(empty_file) == h.digest() + + # Existent file + empty_file.touch() + assert hash_object(empty_file) == h.digest() + + class MyPathLike: + def __fspath__(self): + return "/tmp" + + prefix = f"{__name__}.MyPathLike" + h = hasher.copy() + h.update(f"{prefix}:/tmp".encode()) + assert hash_object(MyPathLike()) == h.digest() + + +def test_bytes_repr_custom_obj(): + class MyClass: + def __init__(self, x): + self.x = x + + obj_repr = join_bytes_repr(MyClass(1)) + assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + + +def test_bytes_repr_slots_obj(): + class MyClass: + __slots__ = ("x",) + + def __init__(self, x): + self.x = x + + obj_repr = join_bytes_repr(MyClass(1)) + assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + + +def test_bytes_repr_attrs_slots(): + @attrs.define + class MyClass: + x: int + + obj_repr = join_bytes_repr(MyClass(1)) + assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + + +def test_bytes_repr_attrs_no_slots(): + @attrs.define(slots=False) + class MyClass: + x: int + + obj_repr = join_bytes_repr(MyClass(1)) + assert re.match(rb".*\.MyClass:{str:1:x=.{16}}", obj_repr) + + +def test_bytes_repr_type1(): + obj_repr = join_bytes_repr(Path) + assert obj_repr == b"type:(pathlib.Path)" + + +def test_bytes_repr_type1a(): + obj_repr = join_bytes_repr(Zip[Json]) + assert obj_repr == rb"type:(fileformats.application.archive.Json__Zip)" + + +def test_bytes_repr_type2(): + T = ty.TypeVar("T") + + class MyClass(ty.Generic[T]): + pass + + obj_repr = join_bytes_repr(MyClass[int]) + assert ( + obj_repr == b"type:(pydra.utils.tests.test_hash.MyClass[type:(builtins.int)])" + ) + + +def test_bytes_special_form1(): + obj_repr = join_bytes_repr(ty.Union[int, float]) + assert obj_repr == b"type:(typing.Union[type:(builtins.int)type:(builtins.float)])" + + +def test_bytes_special_form2(): + obj_repr = join_bytes_repr(ty.Any) + assert re.match(rb"type:\(typing.Any\)", obj_repr) + + +def test_bytes_special_form3(): + obj_repr = join_bytes_repr(ty.Optional[Path]) + assert ( + obj_repr == b"type:(typing.Union[type:(pathlib.Path)type:(builtins.NoneType)])" + ) + + +def test_bytes_special_form4(): + obj_repr = join_bytes_repr(ty.Type[Path]) + assert obj_repr == b"type:(builtins.type[type:(pathlib.Path)])" + + +def test_bytes_special_form5(): + obj_repr = join_bytes_repr(ty.Callable[[Path, int], ty.Tuple[float, str]]) + assert obj_repr == ( + b"type:(collections.abc.Callable[[type:(pathlib.Path)type:(builtins.int)]" + b"type:(builtins.tuple[type:(builtins.float)type:(builtins.str)])])" + ) + + +def test_recursive_object(): + a = [] + b = [a] + a.append(b) + + obj_repr = join_bytes_repr(a) + assert re.match(rb"list:\(.{16}\)$", obj_repr) + + # Objects are structurally equal, but not the same object + assert hash_object(a) == hash_object(b) + + +def test_multi_object(): + # Including the same object multiple times in a list + # should produce the same hash each time it is encountered + set1 = {1, 2, 3} + set2 = {4, 5, 6} + listA = [set1, set2, set1] + listB = [set1, set2, set2] + + reprA = join_bytes_repr(listA) + reprB = join_bytes_repr(listB) + assert re.match(rb"list:\((.{16})(.{16})\1\)$", reprA) + assert re.match(rb"list:\((.{16})(.{16})\2\)$", reprB) + + +def test_magic_method(): + class MyClass: + def __init__(self, x): + self.x = x + + def __bytes_repr__(self, cache): + yield b"x" + + assert join_bytes_repr(MyClass(1)) == b"x" + + +def test_registration(): + # WARNING: This test appends to a registry that cannot be restored + # to previous state. + class MyClass: + def __init__(self, x): + self.x = x + + @register_serializer + def _(obj: MyClass, cache: Cache): + yield b"x" + + assert join_bytes_repr(MyClass(1)) == b"x" + + +def test_registration_conflict(): + # Verify the order of precedence: class/superclass registration, __bytes_repr__, protocols + # + # WARNING: This test appends to a registry that cannot be restored + # to previous state. + class MyClass: + def __init__(self, x): + self.x = x + + def __fspath__(self): + return "pathlike" + + assert join_bytes_repr(MyClass(1)) == f"{__name__}.MyClass:pathlike".encode() + + class MyNewClass(MyClass): + def __bytes_repr__(self, cache: Cache): + yield b"bytes_repr" + + assert join_bytes_repr(MyNewClass(1)) == b"bytes_repr" + + @register_serializer + def _(obj: MyClass, cache: Cache): + yield b"serializer" + + assert join_bytes_repr(MyClass(1)) == b"serializer" + + register_serializer(MyNewClass, _) + + assert join_bytes_repr(MyNewClass(1)) == b"serializer" + + +@pytest.fixture +def cache_path(tmp_path): + cache_path = tmp_path / "hash-cache" + cache_path.mkdir() + return cache_path + + +@pytest.fixture +def text_file(tmp_path): + text_file_path = tmp_path / "text-file.txt" + text_file_path.write_text("foo") + return TextFile(text_file_path) + + +def test_persistent_hash_cache(cache_path, text_file): + """ + Test the persistent hash cache with a text file + + The cache is used to store the hash of the text file, and the hash is + retrieved from the cache when the file is unchanged. + """ + # Test hash is stable between calls + hsh = hash_object(text_file, persistent_cache=cache_path) + assert hsh == hash_object(text_file, persistent_cache=cache_path) + + # Test that cached hash has been used by explicitly modifying it and seeing that the + # hash is the same as the modified hash + cache_files = list(cache_path.iterdir()) + assert len(cache_files) == 1 + modified_hash = "modified".encode() + cache_files[0].write_bytes(modified_hash) + assert hash_object(text_file, persistent_cache=cache_path) == modified_hash + + # Test that changes to the text file result in new hash + time.sleep(2) # Need to ensure that the mtimes will be different + text_file.fspath.write_text("bar") + assert hash_object(text_file, persistent_cache=cache_path) != modified_hash + assert len(list(cache_path.iterdir())) == 2 + + +def test_persistent_hash_cache_cleanup1(cache_path, text_file): + """ + Test the persistent hash is cleaned up after use if the periods between cleanups + is greater than the environment variable PYDRA_HASH_CACHE_CLEANUP_PERIOD + """ + with mock.patch.dict( + os.environ, + { + "PYDRA_HASH_CACHE": str(cache_path), + "PYDRA_HASH_CACHE_CLEANUP_PERIOD": "-100", + }, + ): + persistent_cache = PersistentCache() + hash_object(text_file, persistent_cache=persistent_cache) + assert len(list(cache_path.iterdir())) == 1 + persistent_cache.clean_up() + assert len(list(cache_path.iterdir())) == 0 + + +def test_persistent_hash_cache_cleanup2(cache_path, text_file): + """ + Test the persistent hash is cleaned up after use if the periods between cleanups + is greater than the explicitly provided cleanup_period + """ + persistent_cache = PersistentCache(cache_path, cleanup_period=-100) + hash_object(text_file, persistent_cache=persistent_cache) + assert len(list(cache_path.iterdir())) == 1 + time.sleep(2) + persistent_cache.clean_up() + assert len(list(cache_path.iterdir())) == 0 + + +def test_persistent_hash_cache_not_dir(text_file): + """ + Test that an error is raised if the provided cache path is not a directory + """ + with pytest.raises(ValueError, match="not a directory"): + PersistentCache(text_file.fspath) + + +def test_unhashable(): + """ + Test that an error is raised if an unhashable object is provided + """ + + class A: + + def __bytes_repr__(self, cache: Cache) -> ty.Generator[bytes, None, None]: + raise TypeError("unhashable") + + def __repr__(self): + return "A()" + + # hash_object(A()) + + with pytest.raises( + TypeError, + match=( + "unhashable\nand therefore cannot hash `A\(\)` of type " + "`pydra.utils.tests.test_hash.A`" + ), + ): + hash_object(A()) diff --git a/pydra/utils/tests/test_typing.py b/pydra/utils/tests/test_typing.py new file mode 100644 index 0000000000..665d79327d --- /dev/null +++ b/pydra/utils/tests/test_typing.py @@ -0,0 +1,782 @@ +import os +import itertools +import sys +import typing as ty +from pathlib import Path +import tempfile +import pytest +from pydra import mark +from ...engine.specs import File, LazyOutField +from ..typing import TypeParser +from pydra import Workflow +from fileformats.application import Json, Yaml, Xml +from .utils import ( + generic_func_task, + GenericShellTask, + specific_func_task, + SpecificShellTask, + other_specific_func_task, + OtherSpecificShellTask, + MyFormatX, + MyOtherFormatX, + MyHeader, +) + + +def lz(tp: ty.Type): + """convenience method for creating a LazyField of type 'tp'""" + return LazyOutField(name="foo", field="boo", type=tp) + + +PathTypes = ty.Union[str, os.PathLike] + + +def test_type_check_basic1(): + TypeParser(float, coercible=[(int, float)])(lz(int)) + + +def test_type_check_basic2(): + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + TypeParser(int, coercible=[(int, float)])(lz(float)) + + +def test_type_check_basic3(): + TypeParser(int, coercible=[(ty.Any, int)])(lz(float)) + + +def test_type_check_basic4(): + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + TypeParser(int, coercible=[(ty.Any, float)])(lz(float)) + + +def test_type_check_basic5(): + assert TypeParser(float, not_coercible=[(ty.Any, str)])(lz(int)) + + +def test_type_check_basic6(): + with pytest.raises(TypeError, match="explicitly excluded"): + TypeParser(int, coercible=None, not_coercible=[(float, int)])(lz(float)) + + +def test_type_check_basic7(): + path_coercer = TypeParser(Path, coercible=[(os.PathLike, os.PathLike)]) + + path_coercer(lz(Path)) + + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + path_coercer(lz(str)) + + +def test_type_check_basic8(): + TypeParser(Path, coercible=[(PathTypes, PathTypes)])(lz(str)) + TypeParser(str, coercible=[(PathTypes, PathTypes)])(lz(Path)) + + +def test_type_check_basic9(): + file_coercer = TypeParser(File, coercible=[(PathTypes, File)]) + + file_coercer(lz(Path)) + file_coercer(lz(str)) + + +def test_type_check_basic10(): + impotent_str_coercer = TypeParser(str, coercible=[(PathTypes, File)]) + + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + impotent_str_coercer(lz(File)) + + +def test_type_check_basic11(): + TypeParser(str, coercible=[(PathTypes, PathTypes)])(lz(File)) + TypeParser(File, coercible=[(PathTypes, PathTypes)])(lz(str)) + + +def test_type_check_basic12(): + TypeParser( + list, + coercible=[(ty.Sequence, ty.Sequence)], + not_coercible=[(str, ty.Sequence)], + )(lz(ty.Tuple[int, int, int])) + + +def test_type_check_basic13(): + TypeParser( + list, + coercible=[(ty.Sequence, ty.Sequence)], + not_coercible=[(str, ty.Sequence)], + )(lz(ty.Tuple[int, ...])) + + +def test_type_check_basic14(): + with pytest.raises(TypeError, match="explicitly excluded"): + TypeParser( + list, + coercible=[(ty.Sequence, ty.Sequence)], + not_coercible=[(str, ty.Sequence)], + )(lz(str)) + + +def test_type_check_basic15(): + TypeParser(ty.Union[Path, File, float])(lz(int)) + + +def test_type_check_basic16(): + with pytest.raises( + TypeError, match="Cannot coerce to any of the union types" + ): + TypeParser(ty.Union[Path, File, bool, int])(lz(float)) + + +def test_type_check_basic17(): + TypeParser(ty.Sequence)(lz(ty.Tuple[int, ...])) + + +def test_type_check_nested1(): + TypeParser(ty.List[File])(lz(ty.List[Path])) + + +def test_type_check_nested2(): + TypeParser(ty.List[Path])(lz(ty.List[File])) + + +def test_type_check_nested3(): + TypeParser(ty.List[Path])(lz(ty.List[str])) + + +def test_type_check_nested4(): + TypeParser(ty.List[str])(lz(ty.List[File])) + + +def test_type_check_nested5(): + TypeParser(ty.Dict[str, ty.List[File]])(lz(ty.Dict[str, ty.List[Path]])) + + +def test_type_check_nested6(): + TypeParser(ty.Tuple[float, ...])(lz(ty.List[int])) + + +def test_type_check_nested7(): + TypeParser(ty.Tuple[float, float, float])(lz(ty.List[int])) + + +def test_type_check_nested7a(): + with pytest.raises(TypeError, match="Wrong number of type arguments"): + TypeParser(ty.Tuple[float, float, float])(lz(ty.Tuple[int])) + + +def test_type_check_nested8(): + with pytest.raises(TypeError, match="explicitly excluded"): + TypeParser( + ty.Tuple[int, ...], + not_coercible=[(ty.Sequence, ty.Tuple)], + )(lz(ty.List[float])) + + +def test_type_check_permit_superclass(): + # Typical case as Json is subclass of File + TypeParser(ty.List[File])(lz(ty.List[Json])) + # Permissive super class, as File is superclass of Json + TypeParser(ty.List[Json], superclass_auto_cast=True)(lz(ty.List[File])) + with pytest.raises(TypeError, match="Cannot coerce"): + TypeParser(ty.List[Json], superclass_auto_cast=False)(lz(ty.List[File])) + # Fails because Yaml is neither sub or super class of Json + with pytest.raises(TypeError, match="Cannot coerce"): + TypeParser(ty.List[Json], superclass_auto_cast=True)(lz(ty.List[Yaml])) + + +def test_type_check_fail1(): + with pytest.raises(TypeError, match="Wrong number of type arguments in tuple"): + TypeParser(ty.Tuple[int, int, int])(lz(ty.Tuple[float, float, float, float])) + + +def test_type_check_fail2(): + with pytest.raises(TypeError, match="to any of the union types"): + TypeParser(ty.Union[Path, File])(lz(int)) + + +def test_type_check_fail3(): + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + TypeParser(ty.Sequence, coercible=[(ty.Sequence, ty.Sequence)])( + lz(ty.Dict[str, int]) + ) + + +def test_type_check_fail4(): + with pytest.raises(TypeError, match="Cannot coerce into"): + TypeParser(ty.Sequence)(lz(ty.Dict[str, int])) + + +def test_type_check_fail5(): + with pytest.raises(TypeError, match=" doesn't match pattern"): + TypeParser(ty.List[int])(lz(int)) + + +def test_type_check_fail6(): + with pytest.raises(TypeError, match=" doesn't match pattern"): + TypeParser(ty.List[ty.Dict[str, str]])(lz(ty.Tuple[int, int, int])) + + +def test_type_coercion_basic(): + assert TypeParser(float, coercible=[(ty.Any, float)])(1) == 1.0 + + +def test_type_coercion_basic1(): + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + TypeParser(float, coercible=[(ty.Any, int)])(1) + + +def test_type_coercion_basic2(): + assert ( + TypeParser(int, coercible=[(ty.Any, ty.Any)], not_coercible=[(ty.Any, str)])( + 1.0 + ) + == 1 + ) + + +def test_type_coercion_basic3(): + with pytest.raises(TypeError, match="explicitly excluded"): + TypeParser(int, coercible=[(ty.Any, ty.Any)], not_coercible=[(float, int)])(1.0) + + +def test_type_coercion_basic4(): + path_coercer = TypeParser(Path, coercible=[(os.PathLike, os.PathLike)]) + + assert path_coercer(Path("/a/path")) == Path("/a/path") + + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + path_coercer("/a/path") + + +def test_type_coercion_basic5(): + assert TypeParser(Path, coercible=[(PathTypes, PathTypes)])("/a/path") == Path( + "/a/path" + ) + + +def test_type_coercion_basic6(): + assert TypeParser(str, coercible=[(PathTypes, PathTypes)])(Path("/a/path")) == str( + Path("/a/path") + ) + + +@pytest.fixture +def a_file(tmp_path): + fspath = tmp_path / "a-file.txt" + Path.touch(fspath) + return fspath + + +def test_type_coercion_basic7(a_file): + file_coercer = TypeParser(File, coercible=[(PathTypes, File)]) + + assert file_coercer(a_file) == File(a_file) + assert file_coercer(str(a_file)) == File(a_file) + + +def test_type_coercion_basic8(a_file): + impotent_str_coercer = TypeParser(str, coercible=[(PathTypes, File)]) + + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + impotent_str_coercer(File(a_file)) + + +def test_type_coercion_basic9(a_file): + assert TypeParser(str, coercible=[(PathTypes, PathTypes)])(File(a_file)) == str( + a_file + ) + + +def test_type_coercion_basic10(a_file): + assert TypeParser(File, coercible=[(PathTypes, PathTypes)])(str(a_file)) == File( + a_file + ) + + +def test_type_coercion_basic11(): + assert TypeParser( + list, + coercible=[(ty.Sequence, ty.Sequence)], + not_coercible=[(str, ty.Sequence)], + )((1, 2, 3)) == [1, 2, 3] + + +def test_type_coercion_basic12(): + with pytest.raises(TypeError, match="explicitly excluded"): + TypeParser( + list, + coercible=[(ty.Sequence, ty.Sequence)], + not_coercible=[(str, ty.Sequence)], + )("a-string") + + assert TypeParser(ty.Union[Path, File, int], coercible=[(ty.Any, ty.Any)])(1.0) == 1 + + +def test_type_coercion_basic13(): + assert ( + TypeParser(ty.Union[Path, File, bool, int], coercible=[(ty.Any, ty.Any)])(1.0) + is True + ) + + +def test_type_coercion_basic14(): + assert TypeParser(ty.Sequence, coercible=[(ty.Any, ty.Any)])((1, 2, 3)) == ( + 1, + 2, + 3, + ) + + +@pytest.fixture +def another_file(tmp_path): + fspath = tmp_path / "another-file.txt" + Path.touch(fspath) + return fspath + + +@pytest.fixture +def yet_another_file(tmp_path): + fspath = tmp_path / "yet-another-file.txt" + Path.touch(fspath) + return fspath + + +def test_type_coercion_nested1(a_file, another_file, yet_another_file): + assert TypeParser(ty.List[File], coercible=[(PathTypes, PathTypes)])( + [a_file, another_file, yet_another_file] + ) == [File(a_file), File(another_file), File(yet_another_file)] + + +def test_type_coercion_nested3(a_file, another_file, yet_another_file): + assert TypeParser(ty.List[Path], coercible=[(PathTypes, PathTypes)])( + [File(a_file), File(another_file), File(yet_another_file)] + ) == [a_file, another_file, yet_another_file] + + +def test_type_coercion_nested4(a_file, another_file, yet_another_file): + assert TypeParser(ty.Dict[str, ty.List[File]], coercible=[(PathTypes, PathTypes)])( + { + "a": [a_file, another_file, yet_another_file], + "b": [a_file, another_file], + } + ) == { + "a": [File(a_file), File(another_file), File(yet_another_file)], + "b": [File(a_file), File(another_file)], + } + + +def test_type_coercion_nested5(a_file, another_file, yet_another_file): + assert TypeParser(ty.List[File], coercible=[(PathTypes, PathTypes)])( + [a_file, another_file, yet_another_file] + ) == [File(a_file), File(another_file), File(yet_another_file)] + + +def test_type_coercion_nested6(): + assert TypeParser(ty.Tuple[int, int, int], coercible=[(ty.Any, ty.Any)])( + [1.0, 2.0, 3.0] + ) == (1, 2, 3) + + +def test_type_coercion_nested7(): + assert TypeParser(ty.Tuple[int, ...], coercible=[(ty.Any, ty.Any)])( + [1.0, 2.0, 3.0] + ) == (1, 2, 3) + + +def test_type_coercion_nested8(): + with pytest.raises(TypeError, match="explicitly excluded"): + TypeParser( + ty.Tuple[int, ...], + coercible=[(ty.Any, ty.Any)], + not_coercible=[(ty.Sequence, ty.Tuple)], + )([1.0, 2.0, 3.0]) + + +def test_type_coercion_fail1(): + with pytest.raises(TypeError, match="Incorrect number of items"): + TypeParser(ty.Tuple[int, int, int], coercible=[(ty.Any, ty.Any)])( + [1.0, 2.0, 3.0, 4.0] + ) + + +def test_type_coercion_fail2(): + with pytest.raises(TypeError, match="to any of the union types"): + TypeParser(ty.Union[Path, File], coercible=[(ty.Any, ty.Any)])(1) + + +def test_type_coercion_fail3(): + with pytest.raises(TypeError, match="doesn't match any of the explicit inclusion"): + TypeParser(ty.Sequence, coercible=[(ty.Sequence, ty.Sequence)])( + {"a": 1, "b": 2} + ) + + +def test_type_coercion_fail4(): + with pytest.raises(TypeError, match="Cannot coerce {'a': 1} into"): + TypeParser(ty.Sequence, coercible=[(ty.Any, ty.Any)])({"a": 1}) + + +def test_type_coercion_fail5(): + with pytest.raises(TypeError, match="as 1 is not iterable"): + TypeParser(ty.List[int], coercible=[(ty.Any, ty.Any)])(1) + + +def test_type_coercion_fail6(): + with pytest.raises(TypeError, match="is not a mapping type"): + TypeParser(ty.List[ty.Dict[str, str]], coercible=[(ty.Any, ty.Any)])((1, 2, 3)) + + +def test_type_coercion_realistic(): + tmpdir = Path(tempfile.mkdtemp()) + a_file = tmpdir / "a-file.txt" + another_file = tmpdir / "another-file.txt" + yet_another_file = tmpdir / "yet-another-file.txt" + Path.touch(a_file) + Path.touch(another_file) + Path.touch(yet_another_file) + file_list = [File(p) for p in (a_file, another_file, yet_another_file)] + + @mark.task + @mark.annotate({"return": {"a": ty.List[File], "b": ty.List[str]}}) + def f(x: ty.List[File], y: ty.Dict[str, ty.List[File]]): + return list(itertools.chain(x, *y.values())), list(y.keys()) + + task = f(x=file_list, y={"a": file_list[1:]}) + + TypeParser(ty.List[str])(task.lzout.a) # pylint: disable=no-member + with pytest.raises( + TypeError, + match="Cannot coerce into ", + ): + TypeParser(ty.List[int])(task.lzout.a) # pylint: disable=no-member + + with pytest.raises( + TypeError, match="Cannot coerce 'bad-value' into " + ): + task.inputs.x = "bad-value" + + +def test_check_missing_type_args(): + with pytest.raises(TypeError, match="wasn't declared with type args required"): + TypeParser(ty.List[int]).check_type(list) + with pytest.raises(TypeError, match="doesn't match pattern"): + TypeParser(ty.List[int]).check_type(dict) + + +def test_matches_type_union(): + assert TypeParser.matches_type(ty.Union[int, bool, str], ty.Union[int, bool, str]) + assert TypeParser.matches_type(ty.Union[int, bool], ty.Union[int, bool, str]) + assert not TypeParser.matches_type(ty.Union[int, bool, str], ty.Union[int, bool]) + + +def test_matches_type_dict(): + COERCIBLE = [(str, Path), (Path, str), (int, float)] + + assert TypeParser.matches_type( + ty.Dict[Path, int], ty.Dict[str, int], coercible=COERCIBLE + ) + assert TypeParser.matches_type( + ty.Dict[Path, int], ty.Dict[str, float], coercible=COERCIBLE + ) + assert not TypeParser.matches_type( + ty.Dict[Path, int], ty.Dict[str, int], coercible=[] + ) + assert not TypeParser.matches_type( + ty.Dict[Path, int], ty.Dict[str, float], coercible=[] + ) + assert not TypeParser.matches_type( + ty.Dict[Path, float], ty.Dict[str, int], coercible=COERCIBLE + ) + assert not TypeParser.matches_type( + ty.Tuple[str, int], ty.Dict[str, int], coercible=COERCIBLE + ) + + +def test_matches_type_type(): + assert TypeParser.matches_type(type, type) + assert not TypeParser.matches_type(int, type) + + +def test_matches_type_tuple(): + assert TypeParser.matches_type(ty.Tuple[int], ty.Tuple[int]) + assert TypeParser.matches_type( + ty.Tuple[int], ty.Tuple[float], coercible=[(int, float)] + ) + assert not TypeParser.matches_type( + ty.Tuple[float], ty.Tuple[int], coercible=[(int, float)] + ) + assert TypeParser.matches_type(ty.Tuple[int, int], ty.Tuple[int, int]) + assert not TypeParser.matches_type(ty.Tuple[int, int], ty.Tuple[int]) + assert not TypeParser.matches_type(ty.Tuple[int], ty.Tuple[int, int]) + + +def test_matches_type_tuple_ellipsis1(): + assert TypeParser.matches_type(ty.Tuple[int], ty.Tuple[int, ...]) + + +def test_matches_type_tuple_ellipsis2(): + assert TypeParser.matches_type(ty.Tuple[int, int], ty.Tuple[int, ...]) + + +def test_matches_type_tuple_ellipsis3(): + assert not TypeParser.matches_type(ty.Tuple[int, float], ty.Tuple[int, ...]) + + +def test_matches_type_tuple_ellipsis4(): + assert TypeParser.matches_type(ty.Tuple[int, ...], ty.Tuple[int]) + + +def test_matches_type_tuple_ellipsis5(): + assert TypeParser.matches_type( + ty.Tuple[int], ty.List[int], coercible=[(tuple, list)] + ) + + +def test_matches_type_tuple_ellipsis6(): + assert TypeParser.matches_type( + ty.Tuple[int, ...], ty.List[int], coercible=[(tuple, list)] + ) + + +def test_contains_type_in_dict(): + assert TypeParser.contains_type(int, ty.Dict[str, ty.List[ty.Tuple[int, ...]]]) + assert not TypeParser.contains_type( + int, ty.Dict[str, ty.List[ty.Tuple[float, ...]]] + ) + + +def test_type_matches(): + assert TypeParser.matches([1, 2, 3], ty.List[int]) + assert TypeParser.matches((1, 2, 3), ty.Tuple[int, ...]) + + assert TypeParser.matches((1, 2, 3), ty.List[int]) + assert not TypeParser.matches((1, 2, 3), ty.List[int], coercible=[]) + + +@pytest.fixture(params=["func", "shell"]) +def generic_task(request): + if request.param == "func": + return generic_func_task + elif request.param == "shell": + return GenericShellTask + else: + assert False + + +@pytest.fixture(params=["func", "shell"]) +def specific_task(request): + if request.param == "func": + return specific_func_task + elif request.param == "shell": + return SpecificShellTask + else: + assert False + + +@pytest.fixture(params=["func", "shell"]) +def other_specific_task(request): + if request.param == "func": + return other_specific_func_task + elif request.param == "shell": + return OtherSpecificShellTask + else: + assert False + + +def test_typing_implicit_cast_from_super(tmp_path, generic_task, specific_task): + """Check the casting of lazy fields and whether specific file-sets can be recovered + from generic `File` classes""" + + wf = Workflow( + name="test", + input_spec={"in_file": MyFormatX}, + output_spec={"out_file": MyFormatX}, + ) + + wf.add( + specific_task( + in_file=wf.lzin.in_file, + name="specific1", + ) + ) + + wf.add( # Generic task + generic_task( + in_file=wf.specific1.lzout.out, + name="generic", + ) + ) + + wf.add( + specific_task( + in_file=wf.generic.lzout.out, + name="specific2", + ) + ) + + wf.set_output( + [ + ("out_file", wf.specific2.lzout.out), + ] + ) + + in_file = MyFormatX.sample() + + result = wf(in_file=in_file, plugin="serial") + + out_file: MyFormatX = result.output.out_file + assert type(out_file) is MyFormatX + assert out_file.parent != in_file.parent + assert type(out_file.header) is MyHeader + assert out_file.header.parent != in_file.header.parent + + +def test_typing_cast(tmp_path, specific_task, other_specific_task): + """Check the casting of lazy fields and whether specific file-sets can be recovered + from generic `File` classes""" + + wf = Workflow( + name="test", + input_spec={"in_file": MyFormatX}, + output_spec={"out_file": MyFormatX}, + ) + + wf.add( + specific_task( + in_file=wf.lzin.in_file, + name="entry", + ) + ) + + with pytest.raises(TypeError, match="Cannot coerce"): + # No cast of generic task output to MyFormatX + wf.add( # Generic task + other_specific_task( + in_file=wf.entry.lzout.out, + name="inner", + ) + ) + + wf.add( # Generic task + other_specific_task( + in_file=wf.entry.lzout.out.cast(MyOtherFormatX), + name="inner", + ) + ) + + with pytest.raises(TypeError, match="Cannot coerce"): + # No cast of generic task output to MyFormatX + wf.add( + specific_task( + in_file=wf.inner.lzout.out, + name="exit", + ) + ) + + wf.add( + specific_task( + in_file=wf.inner.lzout.out.cast(MyFormatX), + name="exit", + ) + ) + + wf.set_output( + [ + ("out_file", wf.exit.lzout.out), + ] + ) + + in_file = MyFormatX.sample() + + result = wf(in_file=in_file, plugin="serial") + + out_file: MyFormatX = result.output.out_file + assert type(out_file) is MyFormatX + assert out_file.parent != in_file.parent + assert type(out_file.header) is MyHeader + assert out_file.header.parent != in_file.header.parent + + +def test_type_is_subclass1(): + assert TypeParser.is_subclass(ty.Type[File], type) + + +def test_type_is_subclass2(): + assert not TypeParser.is_subclass(ty.Type[File], ty.Type[Json]) + + +def test_type_is_subclass3(): + assert TypeParser.is_subclass(ty.Type[Json], ty.Type[File]) + + +def test_union_is_subclass1(): + assert TypeParser.is_subclass(ty.Union[Json, Yaml], ty.Union[Json, Yaml, Xml]) + + +def test_union_is_subclass2(): + assert not TypeParser.is_subclass(ty.Union[Json, Yaml, Xml], ty.Union[Json, Yaml]) + + +def test_union_is_subclass3(): + assert TypeParser.is_subclass(Json, ty.Union[Json, Yaml]) + + +def test_union_is_subclass4(): + assert not TypeParser.is_subclass(ty.Union[Json, Yaml], Json) + + +def test_generic_is_subclass1(): + assert TypeParser.is_subclass(ty.List[int], list) + + +def test_generic_is_subclass2(): + assert not TypeParser.is_subclass(list, ty.List[int]) + + +def test_generic_is_subclass3(): + assert not TypeParser.is_subclass(ty.List[float], ty.List[int]) + + +@pytest.mark.skipif( + sys.version_info < (3, 9), reason="Cannot subscript tuple in < Py3.9" +) +def test_generic_is_subclass4(): + class MyTuple(tuple): + pass + + class A: + pass + + class B(A): + pass + + assert TypeParser.is_subclass(MyTuple[A], ty.Tuple[A]) + assert TypeParser.is_subclass(ty.Tuple[B], ty.Tuple[A]) + assert TypeParser.is_subclass(MyTuple[B], ty.Tuple[A]) + assert not TypeParser.is_subclass(ty.Tuple[A], ty.Tuple[B]) + assert not TypeParser.is_subclass(ty.Tuple[A], MyTuple[A]) + assert not TypeParser.is_subclass(MyTuple[A], ty.Tuple[B]) + assert TypeParser.is_subclass(MyTuple[A, int], ty.Tuple[A, int]) + assert TypeParser.is_subclass(ty.Tuple[B, int], ty.Tuple[A, int]) + assert TypeParser.is_subclass(MyTuple[B, int], ty.Tuple[A, int]) + assert TypeParser.is_subclass(MyTuple[int, B], ty.Tuple[int, A]) + assert not TypeParser.is_subclass(MyTuple[B, int], ty.Tuple[int, A]) + assert not TypeParser.is_subclass(MyTuple[int, B], ty.Tuple[A, int]) + assert not TypeParser.is_subclass(MyTuple[B, int], ty.Tuple[A]) + assert not TypeParser.is_subclass(MyTuple[B], ty.Tuple[A, int]) + + +def test_type_is_instance1(): + assert TypeParser.is_instance(File, ty.Type[File]) + + +def test_type_is_instance2(): + assert not TypeParser.is_instance(File, ty.Type[Json]) + + +def test_type_is_instance3(): + assert TypeParser.is_instance(Json, ty.Type[File]) + + +def test_type_is_instance4(): + assert TypeParser.is_instance(Json, type) diff --git a/pydra/utils/tests/utils.py b/pydra/utils/tests/utils.py new file mode 100644 index 0000000000..3582fa9eda --- /dev/null +++ b/pydra/utils/tests/utils.py @@ -0,0 +1,181 @@ +from fileformats.generic import File +from fileformats.core.mixin import WithSeparateHeader, WithMagicNumber +from pydra import mark +from pydra.engine.task import ShellCommandTask +from pydra.engine import specs + + +class MyFormat(WithMagicNumber, File): + ext = ".my" + magic_number = b"MYFORMAT" + + +class MyHeader(File): + ext = ".hdr" + + +class MyFormatX(WithSeparateHeader, MyFormat): + header_type = MyHeader + + +class MyOtherFormatX(WithMagicNumber, WithSeparateHeader, File): + magic_number = b"MYFORMAT" + ext = ".my" + header_type = MyHeader + + +@mark.task +def generic_func_task(in_file: File) -> File: + return in_file + + +generic_shell_input_fields = [ + ( + "in_file", + File, + { + "help_string": "the input file", + "argstr": "", + "copyfile": "copy", + }, + ), + ( + "out", + str, + { + "help_string": "output file name", + "argstr": "", + "position": -1, + "output_file_template": "{in_file}", + }, + ), +] + +generic_shell_input_spec = specs.SpecInfo( + name="Input", fields=generic_shell_input_fields, bases=(specs.ShellSpec,) +) + +generic_shell_output_fields = [ + ( + "out", + File, + { + "help_string": "output file", + }, + ), +] +generic_shelloutput_spec = specs.SpecInfo( + name="Output", fields=generic_shell_output_fields, bases=(specs.ShellOutSpec,) +) + + +class GenericShellTask(ShellCommandTask): + input_spec = generic_shell_input_spec + output_spec = generic_shelloutput_spec + executable = "echo" + + +@mark.task +def specific_func_task(in_file: MyFormatX) -> MyFormatX: + return in_file + + +specific_shell_input_fields = [ + ( + "in_file", + MyFormatX, + { + "help_string": "the input file", + "argstr": "", + "copyfile": "copy", + "sep": " ", + }, + ), + ( + "out", + str, + { + "help_string": "output file name", + "argstr": "", + "position": -1, + "output_file_template": "{in_file}", # Pass through un-altered + }, + ), +] + +specific_shell_input_spec = specs.SpecInfo( + name="Input", fields=specific_shell_input_fields, bases=(specs.ShellSpec,) +) + +specific_shell_output_fields = [ + ( + "out", + MyFormatX, + { + "help_string": "output file", + }, + ), +] +specific_shelloutput_spec = specs.SpecInfo( + name="Output", fields=specific_shell_output_fields, bases=(specs.ShellOutSpec,) +) + + +class SpecificShellTask(ShellCommandTask): + input_spec = specific_shell_input_spec + output_spec = specific_shelloutput_spec + executable = "echo" + + +@mark.task +def other_specific_func_task(in_file: MyOtherFormatX) -> MyOtherFormatX: + return in_file + + +other_specific_shell_input_fields = [ + ( + "in_file", + MyOtherFormatX, + { + "help_string": "the input file", + "argstr": "", + "copyfile": "copy", + "sep": " ", + }, + ), + ( + "out", + str, + { + "help_string": "output file name", + "argstr": "", + "position": -1, + "output_file_template": "{in_file}", # Pass through un-altered + }, + ), +] + +other_specific_shell_input_spec = specs.SpecInfo( + name="Input", fields=other_specific_shell_input_fields, bases=(specs.ShellSpec,) +) + +other_specific_shell_output_fields = [ + ( + "out", + MyOtherFormatX, + { + "help_string": "output file", + }, + ), +] +other_specific_shelloutput_spec = specs.SpecInfo( + name="Output", + fields=other_specific_shell_output_fields, + bases=(specs.ShellOutSpec,), +) + + +class OtherSpecificShellTask(ShellCommandTask): + input_spec = other_specific_shell_input_spec + output_spec = other_specific_shelloutput_spec + executable = "echo" diff --git a/pydra/utils/typing.py b/pydra/utils/typing.py new file mode 100644 index 0000000000..ee8e733e44 --- /dev/null +++ b/pydra/utils/typing.py @@ -0,0 +1,868 @@ +import itertools +import inspect +from pathlib import Path +import os +import sys +import typing as ty +import logging +import attr +from ..engine.specs import ( + LazyField, + StateArray, + MultiInputObj, + MultiOutputObj, +) +from fileformats import field + +try: + from typing import get_origin, get_args +except ImportError: + # Python < 3.8 + from typing_extensions import get_origin, get_args # type: ignore + +logger = logging.getLogger("pydra") + +NO_GENERIC_ISSUBCLASS = sys.version_info.major == 3 and sys.version_info.minor < 10 + +if NO_GENERIC_ISSUBCLASS: + from typing_utils import issubtype + +try: + import numpy +except ImportError: + HAVE_NUMPY = False +else: + HAVE_NUMPY = True + +T = ty.TypeVar("T") +TypeOrAny = ty.Union[type, ty.Any] + + +class TypeParser(ty.Generic[T]): + """A callable which can be used as a converter for attrs.fields to check whether an + object or LazyField matches the specified field type, or can be + coerced into it (given the criteria passed on initialisation of the checker). + Nested container type are expanded and each of their type args are checked/coerced + against corresponding parts of the object. + + Parameters + ---------- + tp : type + the type objects will be coerced to + coercible: Iterable[ty.Tuple[type or Any, type or Any]], optional + limits coercing between the pairs of types where they appear within the + tree of more complex nested container types. If None, then all types are + coercible except explicitly excluded + not_coercible: Iterable[ty.Tuple[type or Any, type or Any]], optional + excludes the limits coercing between the pairs of types where they appear within + the tree of more complex nested container types. Overrides 'coercible' to enable + you to carve out exceptions, such as TypeParser(list, coercible=[(ty.Iterable, list)], + not_coercible=[(str, list)]) + superclass_auto_cast : bool + Allow lazy fields to pass the type check if their types are superclasses of the + specified pattern (instead of matching or being subclasses of the pattern) + label : str + the label to be used to identify the type parser in error messages. Especially + useful when TypeParser is used as a converter in attrs.fields + """ + + tp: ty.Type[T] + coercible: ty.List[ty.Tuple[TypeOrAny, TypeOrAny]] + not_coercible: ty.List[ty.Tuple[TypeOrAny, TypeOrAny]] + superclass_auto_cast: bool + label: str + + COERCIBLE_DEFAULT: ty.Tuple[ty.Tuple[type, type], ...] = ( + ( + (ty.Sequence, ty.Sequence), + (ty.Mapping, ty.Mapping), + (Path, os.PathLike), + (str, os.PathLike), + (os.PathLike, Path), + (os.PathLike, str), + (ty.Any, MultiInputObj), + (int, float), + (field.Integer, float), + (int, field.Decimal), + ) + + tuple((f, f.primitive) for f in field.Singular.subclasses() if f.primitive) + + tuple((f.primitive, f) for f in field.Singular.subclasses() if f.primitive) + ) + + if HAVE_NUMPY: + COERCIBLE_DEFAULT += ( + (numpy.integer, int), + (numpy.floating, float), + (numpy.bool_, bool), + (numpy.integer, float), + (numpy.character, str), + (numpy.complexfloating, complex), + (numpy.bytes_, bytes), + (numpy.ndarray, ty.Sequence), + (ty.Sequence, numpy.ndarray), + ) + + NOT_COERCIBLE_DEFAULT = ((str, ty.Sequence), (ty.Sequence, str)) + + def __init__( + self, + tp, + coercible: ty.Optional[ + ty.Iterable[ty.Tuple[TypeOrAny, TypeOrAny]] + ] = COERCIBLE_DEFAULT, + not_coercible: ty.Optional[ + ty.Iterable[ty.Tuple[TypeOrAny, TypeOrAny]] + ] = NOT_COERCIBLE_DEFAULT, + superclass_auto_cast: bool = False, + label: str = "", + ): + def expand_pattern(t): + """Recursively expand the type arguments of the target type in nested tuples""" + if t is inspect._empty: + return None + origin = get_origin(t) + if origin is None: + return t + args = get_args(t) + if not args or args == (Ellipsis,): # Not sure Ellipsis by itself is valid + # If no args were provided, or those arguments were an ellipsis + assert isinstance(origin, type) + return origin + if origin not in (ty.Union, type) and not issubclass(origin, ty.Iterable): + raise TypeError( + f"TypeParser doesn't know how to handle args ({args}) for {origin} " + f"types{self.label_str}" + ) + return (origin, [expand_pattern(a) for a in args]) + + self.label = label + self.tp = tp + self.coercible = ( + list(coercible) if coercible is not None else [(ty.Any, ty.Any)] + ) + self.not_coercible = list(not_coercible) if not_coercible is not None else [] + self.pattern = expand_pattern(tp) + self.superclass_auto_cast = superclass_auto_cast + + def __call__(self, obj: ty.Any) -> ty.Union[T, LazyField[T]]: + """Attempts to coerce the object to the specified type, unless the value is + a LazyField where the type of the field is just checked instead or an + attrs.NOTHING where it is simply returned. + + Parameters + ---------- + obj : ty.Any + the object to coerce/check-type + + Returns + ------- + T + the coerced object + + Raises + ------ + TypeError + if the coercion is not possible, or not specified by the + `coercible`/`not_coercible` parameters, then a TypeError is raised + """ + coerced: T + if obj is attr.NOTHING: + coerced = attr.NOTHING # type: ignore[assignment] + elif isinstance(obj, LazyField): + try: + self.check_type(obj.type) + except TypeError as e: + if self.superclass_auto_cast: + try: + # Check whether the type of the lazy field isn't a superclass of + # the type to check against, and if so, allow it due to permissive + # typing rules. + TypeParser(obj.type).check_type(self.tp) + except TypeError: + raise e + else: + logger.info( + "Connecting lazy field %s to %s%s via permissive typing that " + "allows super-to-sub type connections", + obj, + self.tp, + self.label_str, + ) + else: + raise e + coerced = obj # type: ignore + elif isinstance(obj, StateArray): + coerced = StateArray(self(o) for o in obj) # type: ignore[assignment] + else: + coerced = self.coerce(obj) + return coerced + + def coerce(self, object_: ty.Any) -> T: + """Attempts to coerce the given object to the type of the specified type""" + if self.pattern is None: + return object_ + + def expand_and_coerce(obj, pattern: ty.Union[type, tuple]): + """Attempt to expand the object along the lines of the coercion pattern""" + if obj is attr.NOTHING: + return attr.NOTHING + if not isinstance(pattern, tuple): + return coerce_basic(obj, pattern) + origin, pattern_args = pattern + if origin is ty.Union: + return coerce_union(obj, pattern_args) + if origin is type: + return coerce_type(obj, pattern_args) + if not self.is_instance(obj, origin): + self.check_coercible(obj, origin) + type_ = origin + else: + type_ = type(obj) + if issubclass(type_, ty.Mapping): + return coerce_mapping(obj, type_, pattern_args) + try: + obj_args = list(obj) + except TypeError as e: + msg = ( + f" (part of coercion from {object_!r} to {self.pattern}" + if obj is not object_ + else "" + ) + raise TypeError( + f"Could not coerce to {type_} as {obj!r} is not iterable{msg}{self.label_str}" + ) from e + if issubclass(origin, tuple): + return coerce_tuple(type_, obj_args, pattern_args) + if issubclass(origin, ty.Iterable): + return coerce_sequence(type_, obj_args, pattern_args) + assert False, f"Coercion from {obj!r} to {pattern} is not handled" + + def coerce_basic(obj, pattern): + """Coerce an object to a "basic types" like `int`, `float`, `bool`, `Path` + and `File` in contrast to compound types like `list[int]`, + `dict[str, str]` and `dict[str, list[int]]`""" + if self.is_instance(obj, pattern): + return obj + self.check_coercible(obj, pattern) + return coerce_obj(obj, pattern) + + def coerce_union(obj, pattern_args): + """Coerce an object into the first type in a Union construct that it is + coercible into""" + reasons = [] + for arg in pattern_args: + try: + return expand_and_coerce(obj, arg) + except TypeError as e: + reasons.append(e) + raise TypeError( + f"Could not coerce object, {obj!r}, to any of the union types " + f"{pattern_args}{self.label_str}:\n\n" + + "\n\n".join(f"{a} -> {e}" for a, e in zip(pattern_args, reasons)) + ) + + def coerce_mapping( + obj: ty.Mapping, type_: ty.Type[ty.Mapping], pattern_args: list + ): + """Coerce a mapping (e.g. dict)""" + key_pattern, val_pattern = pattern_args + items: ty.Iterable[ty.Tuple[ty.Any, ty.Any]] + try: + items = obj.items() + except AttributeError as e: + msg = ( + f" (part of coercion from {object_} to {self.pattern}" + if obj is not object_ + else "" + ) + raise TypeError( + f"Could not coerce to {type_} as {obj} is not a mapping type{msg}{self.label_str}" + ) from e + return coerce_obj( + { + expand_and_coerce(k, key_pattern): expand_and_coerce(v, val_pattern) + for k, v in items + }, + type_, + ) + + def coerce_tuple( + type_: ty.Type[ty.Sequence], + obj_args: list, + pattern_args: list, + ): + """coerce to a tuple object""" + if pattern_args[-1] is Ellipsis: + pattern_args = itertools.chain( # type: ignore[assignment] + pattern_args[:-2], itertools.repeat(pattern_args[-2]) + ) + elif len(pattern_args) != len(obj_args): + raise TypeError( + f"Incorrect number of items in tuple, expected " + f"{len(pattern_args)}, got {len(obj_args)}{self.label_str}" + ) + return coerce_obj( + [expand_and_coerce(o, p) for o, p in zip(obj_args, pattern_args)], type_ + ) + + def coerce_sequence( + type_: ty.Type[ty.Sequence], obj_args: list, pattern_args: list + ): + """Coerce a non-tuple sequence object (e.g. list, ...)""" + assert len(pattern_args) == 1 + return coerce_obj( + [expand_and_coerce(o, pattern_args[0]) for o in obj_args], type_ + ) + + def coerce_type(type_: ty.Type[ty.Any], pattern_args: ty.List[ty.Type[ty.Any]]): + if not any(issubclass(type_, t) for t in pattern_args): + raise TypeError( + f"{type_} is not one of the specified types {pattern_args}{self.label_str}" + ) + return type_ + + def coerce_obj(obj, type_): + """Attempt to do the innermost (i.e. non-nested) coercion and fail with + helpful message + """ + try: + return type_(obj) + except (TypeError, ValueError) as e: + msg = ( + f" (part of coercion from {object_} to {self.pattern}" + if obj is not object_ + else "" + ) + raise TypeError( + f"Cannot coerce {obj!r} into {type_}{msg}{self.label_str}" + ) from e + + return expand_and_coerce(object_, self.pattern) + + def check_type(self, type_: ty.Type[ty.Any]): + """Checks the given type to see whether it matches or is a subtype of the + specified type or whether coercion rule is specified between the types + + Parameters + ---------- + type_ : ty.Type[ty.Any] + the type to check whether it is coercible into the specified type + + Raises + ------ + TypeError + if the type is not either the specified type, a sub-type or coercible to it + """ + if self.pattern is None or type_ is ty.Any: + return + if self.is_subclass(type_, StateArray): + args = get_args(type_) + if not args: + raise TypeError("Splits without any type arguments are invalid") + if len(args) > 1: + raise TypeError( + f"Splits with more than one type argument ({args}) are invalid{self.label_str}" + ) + return self.check_type(args[0]) + + def expand_and_check(tp, pattern: ty.Union[type, tuple]): + """Attempt to expand the object along the lines of the coercion pattern""" + if not isinstance(pattern, tuple): + return check_basic(tp, pattern) + pattern_origin, pattern_args = pattern + if pattern_origin is ty.Union: + return check_union(tp, pattern_args) + tp_origin = get_origin(tp) + if tp_origin is None: + if issubclass(tp, pattern_origin): + raise TypeError( + f"Type {tp} wasn't declared with type args required to match pattern " + f"{pattern_args}, when matching {type_} to {self.pattern}" + ) + raise TypeError( + f"{tp} doesn't match pattern {pattern}, when matching {type_} to " + f"{self.pattern}{self.label_str}" + ) + tp_args = get_args(tp) + self.check_coercible(tp_origin, pattern_origin) + if issubclass(pattern_origin, ty.Mapping): + return check_mapping(tp_args, pattern_args) + if issubclass(pattern_origin, tuple): + if not issubclass(tp_origin, tuple): + assert len(tp_args) == 1 + tp_args += (Ellipsis,) + return check_tuple(tp_args, pattern_args) + return check_sequence(tp_args, pattern_args) + + def check_basic(tp, target): + # Note that we are deliberately more permissive than typical type-checking + # here, allowing parents of the target type as well as children, + # to avoid users having to cast from loosely typed tasks to strict ones + if not self.is_subclass(tp, target): + self.check_coercible(tp, target) + + def check_union(tp, pattern_args): + if get_origin(tp) is ty.Union: + for tp_arg in get_args(tp): + reasons = [] + for pattern_arg in pattern_args: + try: + expand_and_check(tp_arg, pattern_arg) + except TypeError as e: + reasons.append(e) + else: + reasons = None + break + if reasons: + raise TypeError( + f"Cannot coerce {tp} to " + f"ty.Union[{', '.join(str(a) for a in pattern_args)}]{self.label_str}, " + f"because {tp_arg} cannot be coerced to any of its args:\n\n" + + "\n\n".join( + f"{a} -> {e}" for a, e in zip(pattern_args, reasons) + ) + ) + return + reasons = [] + for pattern_arg in pattern_args: + try: + return expand_and_check(tp, pattern_arg) + except TypeError as e: + reasons.append(e) + raise TypeError( + f"Cannot coerce {tp} to any of the union types:\n\n" + + "\n\n".join(f"{a} -> {e}" for a, e in zip(pattern_args, reasons)) + ) + + def check_mapping(tp_args, pattern_args): + key_pattern, val_pattern = pattern_args + key_tp, val_tp = tp_args + expand_and_check(key_tp, key_pattern) + expand_and_check(val_tp, val_pattern) + + def check_tuple(tp_args, pattern_args): + if pattern_args[-1] is Ellipsis: + if len(pattern_args) == 1: # matches anything + return + if tp_args[-1] is Ellipsis: + return expand_and_check(tp_args[0], pattern_args[0]) + for arg in tp_args: + expand_and_check(arg, pattern_args[0]) + return + elif tp_args[-1] is Ellipsis: + for pattern_arg in pattern_args: + expand_and_check(tp_args[0], pattern_arg) + return + if len(tp_args) != len(pattern_args): + raise TypeError( + f"Wrong number of type arguments in tuple {tp_args} compared to pattern " + f"{pattern_args} in attempting to match {type_} to {self.pattern}{self.label_str}" + ) + for t, p in zip(tp_args, pattern_args): + expand_and_check(t, p) + + def check_sequence(tp_args, pattern_args): + assert len(pattern_args) == 1 + if tp_args[-1] is Ellipsis: + tp_args = tp_args[:-1] + if not tp_args: + raise TypeError( + "Generic ellipsis type arguments not specific enough to match " + f"{pattern_args} in attempting to match {type_} to " + f"{self.pattern}{self.label_str}" + ) + for arg in tp_args: + expand_and_check(arg, pattern_args[0]) + + return expand_and_check(type_, self.pattern) + + def check_coercible( + self, source: ty.Union[object, type], target: ty.Union[type, ty.Any] + ): + """Checks whether the source object or type is coercible to the target type + given the coercion rules defined in the `coercible` and `not_coercible` attrs + + Parameters + ---------- + source : object or type + source object or type to be coerced + target : type or ty.Any + target type for the source to be coerced to + + Raises + ------ + TypeError + If the source type cannot be coerced into the target type depending on the + explicit inclusions and exclusions set in the `coercible` and `not_coercible` + member attrs + """ + # Short-circuit the basic cases where the source and target are the same + if source is target: + return + if self.superclass_auto_cast and self.is_subclass(target, type(source)): + logger.info( + "Attempting to coerce %s into %s due to super-to-sub class coercion " + "being permitted", + source, + target, + ) + return + source_origin = get_origin(source) + if source_origin is not None: + source = source_origin + + source_check = self.is_subclass if inspect.isclass(source) else self.is_instance + + def matches_criteria(criteria): + return [ + (src, tgt) + for src, tgt in criteria + if source_check(source, src) and self.is_subclass(target, tgt) + ] + + def type_name(t): + try: + return t.__name__ + except AttributeError: + return t._name # typing generics for Python < 3.10 + + if not matches_criteria(self.coercible): + raise TypeError( + f"Cannot coerce {repr(source)} into {target}{self.label_str} as the " + "coercion doesn't match any of the explicit inclusion criteria: " + + ", ".join( + f"{type_name(s)} -> {type_name(t)}" for s, t in self.coercible + ) + ) + matches_not_coercible = matches_criteria(self.not_coercible) + if matches_not_coercible: + raise TypeError( + f"Cannot coerce {repr(source)} into {target}{self.label_str} as it is explicitly " + "excluded by the following coercion criteria: " + + ", ".join( + f"{type_name(s)} -> {type_name(t)}" + for s, t in matches_not_coercible + ) + ) + + @classmethod + def matches(cls, obj: ty.Type[ty.Any], target: ty.Type[ty.Any], **kwargs) -> bool: + """Returns true if the provided type matches the pattern of the TypeParser + + Parameters + ---------- + type_ : type + the type to check + target : type + the target type to check against + **kwargs : dict[str, Any], optional + passed on to TypeParser.__init__ + + Returns + ------- + matches : bool + whether the type matches the target type factoring in sub-classes and coercible + pairs + """ + parser = cls(target, **kwargs) + try: + parser.coerce(obj) + except TypeError: + return False + return True + + @classmethod + def matches_type( + cls, type_: ty.Type[ty.Any], target: ty.Type[ty.Any], **kwargs + ) -> bool: + """Returns true if the provided type matches the pattern of the TypeParser + + Parameters + ---------- + type_ : type + the type to check + target : type + the target type to check against + **kwargs : dict[str, Any], optional + passed on to TypeParser.__init__ + + Returns + ------- + matches : bool + whether the type matches the target type factoring in sub-classes and coercible + pairs + """ + parser = cls(target, **kwargs) + try: + parser.check_type(type_) + except TypeError: + return False + return True + + @classmethod + def is_instance( + cls, + obj: object, + candidates: ty.Union[ty.Type[ty.Any], ty.Sequence[ty.Type[ty.Any]]], + ) -> bool: + """Checks whether the object is an instance of cls or that cls is typing.Any, + extending the built-in isinstance to check nested type args + + Parameters + ---------- + obj: object + the object to check whether it is an instance of one of the candidates + candidates : type or ty.Iterable[type] + the candidate types to check the object against + """ + if not isinstance(candidates, ty.Sequence): + candidates = [candidates] + for candidate in candidates: + if candidate is ty.Any: + return True + # Handle ty.Type[*] candidates + if ty.get_origin(candidate) is type: + return inspect.isclass(obj) and cls.is_subclass( + obj, ty.get_args(candidate)[0] + ) + if NO_GENERIC_ISSUBCLASS: + if inspect.isclass(obj): + return candidate is type + if issubtype(type(obj), candidate) or ( + type(obj) is dict and candidate is ty.Mapping + ): + return True + else: + if isinstance(obj, candidate): + return True + return False + + @classmethod + def is_subclass( + cls, + klass: ty.Type[ty.Any], + candidates: ty.Union[ty.Type[ty.Any], ty.Sequence[ty.Type[ty.Any]]], + any_ok: bool = False, + ) -> bool: + """Checks whether the class a is either the same as b, a subclass of b or b is + typing.Any, extending built-in issubclass to check nested type args + + Parameters + ---------- + klass : type + the klass to check whether it is a subclass of one of the candidates + candidates : type or ty.Iterable[type] + the candidate types to check the object against + any_ok : bool + whether klass=typing.Any should return True or False + """ + if not isinstance(candidates, ty.Sequence): + candidates = [candidates] + if ty.Any in candidates: + return True + if klass is ty.Any: + return any_ok + + origin = get_origin(klass) + args = get_args(klass) + + for candidate in candidates: + candidate_origin = get_origin(candidate) + candidate_args = get_args(candidate) + # Handle ty.Type[*] types in klass and candidates + if origin is type and (candidate is type or candidate_origin is type): + if candidate is type: + return True + return cls.is_subclass(args[0], candidate_args[0]) + elif origin is type or candidate_origin is type: + return False + if NO_GENERIC_ISSUBCLASS: + if klass is type and candidate is not type: + return False + if issubtype(klass, candidate) or ( + klass is dict and candidate is ty.Mapping + ): + return True + else: + if origin is ty.Union: + union_args = ( + candidate_args if candidate_origin is ty.Union else (candidate,) + ) + matches = all( + any(cls.is_subclass(a, c) for c in union_args) for a in args + ) + if matches: + return True + else: + if candidate_args and candidate_origin is not ty.Union: + if ( + origin + and issubclass(origin, candidate_origin) # type: ignore[arg-type] + and len(args) == len(candidate_args) + and all( + issubclass(a, c) for a, c in zip(args, candidate_args) + ) + ): + return True + else: + if issubclass(origin if origin else klass, candidate): + return True + return False + + @classmethod + def contains_type(cls, target: ty.Type[ty.Any], type_: ty.Type[ty.Any]): + """Checks a potentially nested type for sub-classes of the target type + + Parameters + ---------- + target : type + the target type to check for sub-classes of + type_: type + the type to check for nested types that are sub-classes of target + """ + if cls.is_subclass(type_, target): + return True + if type_ in (str, bytes, int, bool, float): # shortcut primitive types + return False + type_args = get_args(type_) + if not type_args: + return False + type_origin = get_origin(type_) + if type_origin is ty.Union: + for type_arg in type_args: + if cls.contains_type(target, type_arg): + return True + return False + if cls.is_subclass(type_origin, ty.Mapping): + type_key, type_val = type_args + return cls.contains_type(target, type_key) or cls.contains_type( + target, type_val + ) + if cls.is_subclass(type_, (ty.Sequence, MultiOutputObj)): + if type_args[-1] == Ellipsis: + type_args = type_args[:-1] + return any(cls.contains_type(target, a) for a in type_args) + return False + + @classmethod + def apply_to_instances( + cls, + target_type: ty.Type[ty.Any], + func: ty.Callable, + value: ty.Any, + cache: ty.Optional[ty.Dict[int, ty.Any]] = None, + ) -> ty.Any: + """Applies a function to all instances of the given type that are potentially + nested within the given value, caching previously computed modifications to + handle repeated elements + + Parameters + ---------- + target_type : type + the target type to apply the function to + func : callable + the callable object (e.g. function) to apply to the instances + value : Any + the value to copy files from (if required) + cache: dict, optional + guards against multiple references to the same objects by keeping a cache of + the modified + """ + if ( + not cls.is_instance(value, (target_type, ty.Mapping, ty.Sequence)) + or target_type is not str + and cls.is_instance(value, str) + ): + return value + if cache is None: + cache = {} + obj_id = id(value) + try: + return cache[obj_id] + except KeyError: + pass + if cls.is_instance(value, target_type): + modified = func(value) + elif cls.is_instance(value, ty.Mapping): + modified = type(value)( # type: ignore + ( + cls.apply_to_instances(target_type, func, key), + cls.apply_to_instances(target_type, func, val), + ) + for (key, val) in value.items() + ) + else: + assert cls.is_instance(value, (ty.Sequence, MultiOutputObj)) + args = [cls.apply_to_instances(target_type, func, val) for val in value] + modified = type(value)(args) # type: ignore + cache[obj_id] = modified + return modified + + @classmethod + def get_item_type( + cls, sequence_type: ty.Type[ty.Sequence[T]] + ) -> ty.Union[ty.Type[T], ty.Any]: + """Return the type of the types of items in a sequence type + + Parameters + ---------- + sequence_type: type[Sequence] + the type to find the type of the items of + + Returns + ------- + item_type: type or None + the type of the items + """ + if not TypeParser.is_subclass(sequence_type, ty.Sequence): + raise TypeError( + f"Cannot get item type from {sequence_type}, as it is not a sequence type" + ) + args = get_args(sequence_type) + if not args: + return ty.Any + if len(args) > 1 and not (len(args) == 2 and args[-1] == Ellipsis): + raise TypeError( + f"Cannot get item type from {sequence_type}, as it has multiple " + f"item types: {args}" + ) + return args[0] + + @classmethod + def strip_splits(cls, type_: ty.Type[ty.Any]) -> ty.Tuple[ty.Type, int]: + """Strips any StateArray types from the outside of the specified type and returns + the stripped type and the depth it was found at + + Parameters + ---------- + type_ : ty.Type[ty.Any] + the type to list the nested sequences of + only_splits : bool, optional + whether to only return nested splits, not all sequence types + + Returns + ------- + inner_type : type + the inner type once all outer sequences are stripped + depth : int + the number of splits outside the inner_type + """ + depth = 0 + while cls.is_subclass(type_, StateArray) and not cls.is_subclass(type_, str): + origin = get_origin(type_) + # If type is a union, pick the first sequence type in the union + if origin is ty.Union: + for tp in get_args(type_): + if cls.is_subclass(tp, ty.Sequence): + type_ = tp + break + type_ = cls.get_item_type(type_) + depth += 1 + return type_, depth + + @property + def label_str(self): + return f" in {self.label} " if self.label else "" + + get_origin = staticmethod(get_origin) + get_args = staticmethod(get_args) diff --git a/pyproject.toml b/pyproject.toml index ce7eb465dc..6a6ad5e703 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,12 +6,17 @@ build-backend = "flit_scm:buildapi" name = "pydra" description = "Pydra dataflow engine" readme = "README.rst" -requires-python = ">=3.7" +requires-python = ">=3.8" dependencies = [ "attrs >=19.1.0", "cloudpickle >=2.0.0", "etelemetry >=0.2.2", "filelock >=3.0.0", + "fileformats >=0.8", + "importlib_resources >=5.7; python_version < '3.11'", + "platformdirs >=2", + "typing_extensions >=4.6.3; python_version < '3.10'", + "typing_utils >=0.1.0; python_version < '3.10'", ] license = {file = "LICENSE"} authors = [ @@ -44,6 +49,9 @@ classifiers = [ dynamic = ["version"] [project.optional-dependencies] +psij = [ + "psij-python", +] dask = [ "dask", "distributed", @@ -55,7 +63,7 @@ dev = [ ] doc = [ "packaging", - "sphinx >=2.1.2", + "sphinx ==6.2.1", "sphinx_rtd_theme", "sphinxcontrib-apidoc ~=0.3.0", "sphinxcontrib-versioning", @@ -100,4 +108,4 @@ target-version = ['py37', 'py38'] exclude = "pydra/_version.py" [tool.codespell] -ignore-words = ".codespell-ignorewords" +ignore-words-list = "nd,afile"