nipype
diff --git a/‎pydra/engine/audit.py
Lines changed: 20 additions & 15 deletions b/‎pydra/engine/audit.py
Lines changed: 20 additions & 15 deletions
diff --git a/‎pydra/engine/tests/test_task.py
Lines changed: 110 additions & 16 deletions b/‎pydra/engine/tests/test_task.py
Lines changed: 110 additions & 16 deletions
@@ -4,7 +4,8 @@
 import json
 import attr
 from ..utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag
-from .helpers import ensure_list, gather_runtime_info
+from .helpers import ensure_list, gather_runtime_info, hash_file
+from .specs import attr_fields, File, Directory
 
 
 class Audit:
@@ -173,15 +174,24 @@ def audit_task(self, task):
         import subprocess as sp
 
         label = task.name
-        if hasattr(task.inputs, "executable"):
-            command = task.cmdline
-        # assume function task
-        else:
-            command = None
-        # if hasattr(task.inputs, "in_file"):
-        #     input_file = task.inputs.in_file
-        # else:
-        #     input_file = None
+
+        command = task.cmdline if hasattr(task.inputs, "executable") else None
+        attr_list = attr_fields(task.inputs)
+        for attrs in attr_list:
+            if attrs.type in [File, Directory]:
+                input_name = attrs.name
+                input_path = os.path.abspath(getattr(task.inputs, input_name))
+                file_hash = hash_file(input_path)
+                entity_id = f"uid:{gen_uuid()}"
+                entity_message = {
+                    "@id": entity_id,
+                    "Label": input_name,
+                    "AtLocation": input_path,
+                    "GeneratedBy": None,
+                    "@type": "input",
+                    "digest": file_hash,
+                }
+                self.audit_message(entity_message, AuditFlag.PROV)
 
         if command is not None:
             cmd_name = command.split()[0]
@@ -210,9 +220,4 @@ def audit_task(self, task):
             "AssociatedWith": version_cmd,
         }
 
-        # new code to be added here for i/o tracking - WIP
-
         self.audit_message(start_message, AuditFlag.PROV)
-
-        # add more fields according to BEP208 doc
-        # with every field, check in tests
 
@@ -11,8 +11,17 @@
 from ..core import Workflow
 from ..task import AuditFlag, ShellCommandTask, DockerTask, SingularityTask
 from ...utils.messenger import FileMessenger, PrintMessenger, collect_messages
-from .utils import gen_basic_wf, use_validator
-from ..specs import MultiInputObj, MultiOutputObj, SpecInfo, FunctionSpec, BaseSpec
+from .utils import gen_basic_wf, use_validator, Submitter
+from ..specs import (
+    MultiInputObj,
+    MultiOutputObj,
+    SpecInfo,
+    FunctionSpec,
+    BaseSpec,
+    ShellSpec,
+    File,
+)
+from ..helpers import hash_file
 
 no_win = pytest.mark.skipif(
     sys.platform.startswith("win"),
@@ -998,18 +1007,21 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]
     funky.cache_dir = tmpdir
     funky()
     message_path = tmpdir / funky.checksum / "messages"
-    # go through each jsonld file in message_path and check if the label field exists
-    json_content = []
+
     for file in glob(str(message_path) + "/*.jsonld"):
         with open(file, "r") as f:
             data = json.load(f)
-            print(data)
-            if "Label" in data:
-                json_content.append(True)
-                assert "testfunc" == data["Label"]
+            if "@type" in data:
                 if "AssociatedWith" in data:
-                    assert None == data["AssociatedWith"]
-    assert any(json_content)
+                    assert "testfunc" in data["Label"]
+
+            if "@type" in data:
+                if data["@type"] == "input":
+                    assert None == data["Label"]
+            if "AssociatedWith" in data:
+                assert None == data["AssociatedWith"]
+
+    # assert any(json_content)
 
 
 def test_audit_shellcommandtask(tmpdir):
@@ -1028,21 +1040,103 @@ def test_audit_shellcommandtask(tmpdir):
     shelly()
     message_path = tmpdir / shelly.checksum / "messages"
     # go through each jsonld file in message_path and check if the label field exists
-    label_content = []
+
     command_content = []
 
     for file in glob(str(message_path) + "/*.jsonld"):
         with open(file, "r") as f:
             data = json.load(f)
-            print(data)
-            if "Label" in data:
-                label_content.append(True)
+
+            if "@type" in data:
+                if "AssociatedWith" in data:
+                    assert "shelly" in data["Label"]
+
+            if "@type" in data:
+                if data["@type"] == "input":
+                    assert data["Label"] == None
+
             if "Command" in data:
                 command_content.append(True)
                 assert "ls -l" == data["Command"]
 
-    print(command_content)
-    assert any(label_content)
+    assert any(command_content)
+
+
+def test_audit_shellcommandtask_file(tmpdir):
+    # sourcery skip: use-fstring-for-concatenation
+    import glob
+    import shutil
+
+    # create test.txt file with "This is a test" in it in the tmpdir
+    # create txt file in cwd
+    with open("test.txt", "w") as f:
+        f.write("This is a test")
+
+    with open("test2.txt", "w") as f:
+        f.write("This is a test")
+
+    # copy the test.txt file to the tmpdir
+    shutil.copy("test.txt", tmpdir)
+    shutil.copy("test2.txt", tmpdir)
+
+    cmd = "cat"
+    file_in = tmpdir / "test.txt"
+    file_in_2 = tmpdir / "test2.txt"
+    test_file_hash = hash_file(file_in)
+    test_file_hash_2 = hash_file(file_in_2)
+    my_input_spec = SpecInfo(
+        name="Input",
+        fields=[
+            (
+                "in_file",
+                attr.ib(
+                    type=File,
+                    metadata={
+                        "position": 1,
+                        "argstr": "",
+                        "help_string": "text",
+                        "mandatory": True,
+                    },
+                ),
+            ),
+            (
+                "in_file_2",
+                attr.ib(
+                    type=File,
+                    metadata={
+                        "position": 2,
+                        "argstr": "",
+                        "help_string": "text",
+                        "mandatory": True,
+                    },
+                ),
+            ),
+        ],
+        bases=(ShellSpec,),
+    )
+    shelly = ShellCommandTask(
+        name="shelly",
+        in_file=file_in,
+        in_file_2=file_in_2,
+        input_spec=my_input_spec,
+        executable=cmd,
+        audit_flags=AuditFlag.PROV,
+        messengers=FileMessenger(),
+    )
+    shelly.cache_dir = tmpdir
+    shelly()
+    message_path = tmpdir / shelly.checksum / "messages"
+    for file in glob.glob(str(message_path) + "/*.jsonld"):
+        with open(file, "r") as x:
+            data = json.load(x)
+            if "@type" in data:
+                if data["@type"] == "input":
+                    if data["Label"] == "in_file":
+                        assert data["AtLocation"] == str(file_in)
+                        assert data["digest"] == test_file_hash
+                    if data["Label"] == "in_file_2":
+                        assert data["AtLocation"] == str(file_in_2)
+                        assert data["digest"] == test_file_hash_2
 
 
 def test_audit_shellcommandtask_version(tmpdir):