8000 Working on adding entity_message by rcali21 · Pull Request #593 · nipype/pydra · GitHub
[go: up one dir, main page]

Skip to content

Working on adding entity_message #593

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 39 commits into from
Nov 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5b05be5
Working on adding entity_message
Nov 2, 2022
1f236f9
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 2, 2022
162b03b
Added GeneratedBy to entity using a path grabber
Nov 2, 2022
5a60569
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 2, 2022
1cc91ef
GeneratedBy populated with path grabber
Nov 2, 2022
ca9d0ea
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 2, 2022
d0770b4
Implemented AtLocation field, tested with input_spec
Nov 3, 2022
3462466
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 3, 2022
096cbb4
Updated tests
Nov 4, 2022
ae543ae
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 4, 2022
ef25055
Removed unused import
Nov 4, 2022
70f15ee
flake8
Nov 4, 2022
7ab3ddf
Updated tests, accounting for duplicate key names
Nov 7, 2022
c4f21af
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 7, 2022
fd625a9
Updated tests, accounting for duplicate key names
Nov 7, 2022
4d447d1
Fixed test
Nov 7, 2022
58e4767
Fixed tests
Nov 7, 2022
e355f21
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 7, 2022
a8c3a37
Added atlocation in tests to meet coverage
Nov 7, 2022
175662f
Added file test, atlocation test, hash test
Nov 18, 2022
0759ab1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 18, 2022
6c3cded
Updated ent. uuid, input_file checks
Nov 18, 2022
7895319
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 18, 2022
8a0d63c
Refactored, cleaned up old comments
Nov 20, 2022
1a5822c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2022
704d52e
Refactored, cleaned up old comments
Nov 20, 2022
2912ee6
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 20, 2022
dfecb8a
Added dict for atlocation, digest tracking, added tests
Nov 22, 2022
9be04d2
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 22, 2022
7d3845e
Updated entity_message to be collected per input
Nov 23, 2022
230d0ef
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 23, 2022
d1e3c58
Addressed all issues in audit
Nov 24, 2022
9d037df
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 24, 2022
e73b15b
Addressed all issues in audit
Nov 24, 2022
bf593de
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 24, 2022
118b7cd
Fixed tests
Nov 26, 2022
15a7542
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 26, 2022
662ade1
Added two file test back
Nov 27, 2022
19beeba
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 27, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions pydra/engine/audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import json
import attr
from ..utils.messenger import send_message, make_message, gen_uuid, now, AuditFlag
from .helpers import ensure_list, gather_runtime_info
from .helpers import ensure_list, gather_runtime_info, hash_file
from .specs import attr_fields, File, Directory


class Audit:
Expand Down Expand Up @@ -173,15 +174,24 @@ def audit_task(self, task):
import subprocess as sp

label = task.name
if hasattr(task.inputs, "executable"):
command = task.cmdline
# assume function task
else:
command = None
# if hasattr(task.inputs, "in_file"):
# input_file = task.inputs.in_file
# else:
# input_file = None

command = task.cmdline if hasattr(task.inputs, "executable") else None
attr_list = attr_fields(task.inputs)
for attrs in attr_list:
if attrs.type in [File, Directory]:
input_name = attrs.name
input_path = os.path.abspath(getattr(task.inputs, input_name))
file_hash = hash_file(input_path)
entity_id = f"uid:{gen_uuid()}"
entity_message = {
"@id": entity_id,
"Label": input_name,
"AtLocation": input_path,
"GeneratedBy": None,
"@type": "input",
"digest": file_hash,
}
self.audit_message(entity_message, AuditFlag.PROV)

if command is not None:
cmd_name = command.split()[0]
Expand Down Expand Up @@ -210,9 +220,4 @@ def audit_task(self, task):
"AssociatedWith": version_cmd,
}

# new code to be added here for i/o tracking - WIP

self.audit_message(start_message, AuditFlag.PROV)

# add more fields according to BEP208 doc
# with every field, check in tests
126 changes: 110 additions & 16 deletions pydra/engine/tests/test_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,17 @@
from ..core import Workflow
from ..task import AuditFlag, ShellCommandTask, DockerTask, SingularityTask
from ...utils.messenger import FileMessenger, PrintMessenger, collect_messages
from .utils import gen_basic_wf, use_validator
from ..specs import MultiInputObj, MultiOutputObj, SpecInfo, FunctionSpec, BaseSpec
from .utils import gen_basic_wf, use_validator, Submitter
from ..specs import (
MultiInputObj,
MultiOutputObj,
SpecInfo,
FunctionSpec,
BaseSpec,
ShellSpec,
File,
)
from ..helpers import hash_file

no_win = pytest.mark.skipif(
sys.platform.startswith("win"),
Expand Down Expand Up @@ -998,18 +1007,21 @@ def testfunc(a: int, b: float = 0.1) -> ty.NamedTuple("Output", [("out", float)]
funky.cache_dir = tmpdir
funky()
message_path = tmpdir / funky.checksum / "messages"
# go through each jsonld file in message_path and check if the label field exists
json_content = []

for file in glob(str(message_path) + "/*.jsonld"):
with open(file, "r") as f:
data = json.load(f)
print(data)
if "Label" in data:
json_content.append(True)
assert "testfunc" == data["Label"]
if "@type" in data:
if "AssociatedWith" in data:
assert None == data["AssociatedWith"]
assert any(json_content)
assert "testfunc" in data["Label"]

if "@type" in data:
if data["@type"] == "input":
assert None == data["Label"]
if "AssociatedWith" in data:
assert None == data["AssociatedWith"]

# assert any(json_content)


def test_audit_shellcommandtask(tmpdir):
Expand All @@ -1028,21 +1040,103 @@ def test_audit_shellcommandtask(tmpdir):
shelly()
message_path = tmpdir / shelly.checksum / "messages"
# go through each jsonld file in message_path and check if the label field exists
label_content = []

command_content = []

for file in glob(str(message_path) + "/*.jsonld"):
with open(file, "r") as f:
data = json.load(f)
print(data)
if "Label" in data:
label_content.append(True)

if "@type" in data:
if "AssociatedWith" in data:
assert "shelly" in data["Label"]

if "@type" in data:
if data["@type"] == "input":
assert data["Label"] == None

if "Command" in data:
command_content.append(True)
assert "ls -l" == data["Command"]

print(command_content)
assert any(label_content)
assert any(command_content)


def test_audit_shellcommandtask_file(tmpdir):
# sourcery skip: use-fstring-for-concatenation
import glob
import shutil

# create test.txt file with "This is a test" in it in the tmpdir
# create txt file in cwd
with open("test.txt", "w") as f:
f.write("This is a test")

with open("test2.txt", "w") as f:
f.write("This is a test")

# copy the test.txt file to the tmpdir
shutil.copy("test.txt", tmpdir)
shutil.copy("test2.txt", tmpdir)

cmd = "cat"
file_in = tmpdir / "test.txt"
file_in_2 = tmpdir / "test2.txt"
test_file_hash = hash_file(file_in)
test_file_hash_2 = hash_file(file_in_2)
my_input_spec = SpecInfo(
name="Input",
fields=[
(
"in_file",
attr.ib(
type=File,
metadata={
"position": 1,
"argstr": "",
"help_string": "text",
"mandatory": True,
},
),
),
(
"in_file_2",
attr.ib(
type=File,
metadata={
"position": 2,
"argstr": "",
"help_string": "text",
"mandatory": True,
},
),
),
],
bases=(ShellSpec,),
)
shelly = ShellCommandTask(
name="shelly",
in_file=file_in,
in_file_2=file_in_2,
input_spec=my_input_spec,
executable=cmd,
audit_flags=AuditFlag.PROV,
messengers=FileMessenger(),
)
shelly.cache_dir = tmpdir
shelly()
message_path = tmpdir / shelly.checksum / "messages"
for file in glob.glob(str(message_path) + "/*.jsonld"):
with open(file, "r") as x:
data = json.load(x)
if "@type" in data:
if data["@type"] == "input":
if data["Label"] == "in_file":
assert data["AtLocation"] == str(file_in)
assert data["digest"] == test_file_hash
if data["Label"] == "in_file_2":
assert data["AtLocation"] == str(file_in_2)
assert data["digest"] == test_file_hash_2


def test_audit_shellcommandtask_version(tmpdir):
Expand Down
0