8000 bugfix: ReferenceFileSystem.cat() scrambles and omits reference mappi… · fsspec/filesystem_spec@f7b454e · GitHub
[go: up one dir, main page]

8000
Skip to content

Commit f7b454e

Browse files
authored
bugfix: ReferenceFileSystem.cat() scrambles and omits reference mappings (#1436)
1 parent 14a7788 commit f7b454e

File tree

2 files changed

+31
-12
lines changed

2 files changed

+31
-12
lines changed

fsspec/implementations/reference.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -798,28 +798,30 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs):
798798
out = {}
799799
for proto, paths in proto_dict.items():
800800
fs = self.fss[proto]
801-
urls, starts, ends = [], [], []
801+
urls, starts, ends, valid_paths = [], [], [], []
802802
for p in paths:
803803
# find references or label not-found. Early exit if any not
804804
# found and on_error is "raise"
805805
try:
806806
u, s, e = self._cat_common(p)
807-
urls.append(u)
808-
starts.append(s)
809-
ends.append(e)
810807
except FileNotFoundError as err:
811808
if on_error == "raise":
812809
raise
813810
if on_error != "omit":
814811
out[p] = err
812+
else:
813+
urls.append(u)
814+
starts.append(s)
815+
ends.append(e)
816+
valid_paths.append(p)
815817

816818
# process references into form for merging
817819
urls2 = []
818820
starts2 = []
819821
ends2 = []
820822
paths2 = []
821823
whole_files = set()
822-
for u, s, e, p in zip(urls, starts, ends, paths):
824+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
823825
if isinstance(u, bytes):
824826
# data
825827
out[p] = u
@@ -831,7 +833,7 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs):
831833
starts2.append(s)
832834
ends2.append(e)
833835
paths2.append(p)
834-
for u, s, e, p in zip(urls, starts, ends, paths):
836+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
835837
# second run to account for files that are to be loaded whole
836838
if s is not None and u not in whole_files:
837839
urls2.append(u)
@@ -851,7 +853,7 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs):
851853
bytes_out = fs.cat_ranges(new_paths, new_starts, new_ends)
852854

853855
# unbundle from merged bytes - simple approach
854-
for u, s, e, p in zip(urls, starts, ends, paths):
856+
for u, s, e, p in zip(urls, starts, ends, valid_paths):
855857
if p in out:
856858
continue # was bytes, already handled
857859
for np, ns, ne, b in zip(new_paths, new_starts, new_ends, bytes_out):

fsspec/implementations/tests/test_reference.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,7 @@ def test_merging(m):
449449
def test_cat_file_ranges(m):
450450
other = b"other test data"
451451
m.pipe("/b", other)
452+
452453
fs = fsspec.filesystem(
453454
"reference",
454455
fo={
@@ -467,15 +468,26 @@ def test_cat_file_ranges(m):
467468
assert fs.cat_file("d", 1, -3) == other[4:10][1:-3]
468469

469470

470-
def test_cat_missing(m):
471+
@pytest.mark.parametrize(
472+
"fo",
473+
[
474+
{
475+
"c": ["memory://b"],
476+
"d": ["memory://unknown", 4, 6],
477+
},
478+
{
479+
"c": ["memory://b"],
480+
"d": ["//unknown", 4, 6],
481+
},
482+
],
483+
ids=["memory protocol", "mixed protocols: memory and unspecified"],
484+
)
485+
def test_cat_missing(m, fo):
471486
other = b"other test data"
472487
m.pipe("/b", other)
473488
fs = fsspec.filesystem(
474489
"reference",
475-
fo={
476-
"c": ["memory://b"],
477-
"d": ["memory://unknown", 4, 6],
478-
},
490+
fo=fo,
479491
)
480492
with pytest.raises(FileNotFoundError):
481493
fs.cat("notafile")
@@ -508,6 +520,11 @@ def test_cat_missing(m):
508520
out = mapper.getitems(["c", "d"], on_error="return")
509521
assert isinstance(out["d"], ReferenceNotReachable)
510522

523+
out = fs.cat(["notone", "c", "d"], on_error="return")
524+
assert isinstance(out["notone"], FileNotFoundError)
525+
assert out["c"] == other
526+
assert isinstance(out["d"], ReferenceNotReachable)
527+
511528
out = mapper.getitems(["c" 426C , "d"], on_error="omit")
512529
assert list(out) == ["c"]
513530

0 commit comments

Comments
 (0)
0