Skip to content

Commit 5fb46e1

Browse files
committed
Add tests for glob pattern group filtering
Add integration tests for netCDF4, h5netcdf, and zarr backends, plus unit tests for _is_glob_pattern, _filter_group_paths, and _resolve_group_and_filter covering *, ?, and [] metacharacters.
1 parent c17f134 commit 5fb46e1

File tree

1 file changed

+192
-0
lines changed

1 file changed

+192
-0
lines changed

xarray/tests/test_backends_datatree.py

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,79 @@ def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None:
377377
assert subgroup_tree.root.parent is None
378378
assert_equal(subgroup_tree, expected_subtree)
379379

380+
def test_open_datatree_group_glob(self, tmpdir) -> None:
381+
original_dt = DataTree.from_dict(
382+
{
383+
"/": xr.Dataset({"root_var": 1}),
384+
"/A": xr.Dataset({"a_var": 2}),
385+
"/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}),
386+
"/A/sweep_1": xr.Dataset({"data": ("x", [3, 4])}),
387+
"/B": xr.Dataset({"b_var": 3}),
388+
"/B/sweep_0": xr.Dataset({"data": ("x", [5, 6])}),
389+
}
390+
)
391+
filepath = tmpdir / "glob_test.nc"
392+
original_dt.to_netcdf(filepath, engine=self.engine)
393+
394+
with open_datatree(filepath, group="*/sweep_0", engine=self.engine) as tree:
395+
paths = {node.path for node in tree.subtree}
396+
assert "/A/sweep_0" in paths
397+
assert "/B/sweep_0" in paths
398+
assert "/A/sweep_1" not in paths
399+
400+
def test_open_datatree_group_glob_no_match(self, tmpdir) -> None:
401+
original_dt = DataTree.from_dict(
402+
{
403+
"/": xr.Dataset({"root_var": 1}),
404+
"/A": xr.Dataset({"a_var": 2}),
405+
}
406+
)
407+
filepath = tmpdir / "glob_nomatch.nc"
408+
original_dt.to_netcdf(filepath, engine=self.engine)
409+
410+
with open_datatree(filepath, group="*/nonexistent", engine=self.engine) as tree:
411+
paths = {node.path for node in tree.subtree}
412+
assert paths == {"/"}
413+
414+
def test_open_datatree_group_glob_preserves_data(self, tmpdir) -> None:
415+
original_dt = DataTree.from_dict(
416+
{
417+
"/": xr.Dataset({"root_var": 1}),
418+
"/A": xr.Dataset({"a_var": 2}),
419+
"/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}),
420+
}
421+
)
422+
filepath = tmpdir / "glob_data.nc"
423+
original_dt.to_netcdf(filepath, engine=self.engine)
424+
425+
with open_datatree(filepath, group="*/sweep_0", engine=self.engine) as tree:
426+
assert tree["/A"].dataset["a_var"].item() == 2
427+
np.testing.assert_array_equal(
428+
tree["/A/sweep_0"].dataset["data"].values, [1, 2]
429+
)
430+
431+
def test_open_groups_group_glob(self, tmpdir) -> None:
432+
original_dt = DataTree.from_dict(
433+
{
434+
"/": xr.Dataset({"root_var": 1}),
435+
"/A": xr.Dataset({"a_var": 2}),
436+
"/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}),
437+
"/A/sweep_1": xr.Dataset({"data": ("x", [3, 4])}),
438+
}
439+
)
440+
filepath = tmpdir / "glob_groups.nc"
441+
original_dt.to_netcdf(filepath, engine=self.engine)
442+
443+
groups = open_groups(filepath, group="*/sweep_0", engine=self.engine)
444+
try:
445+
assert "/" in groups
446+
assert "/A" in groups
447+
assert "/A/sweep_0" in groups
448+
assert "/A/sweep_1" not in groups
449+
finally:
450+
for ds in groups.values():
451+
ds.close()
452+
380453

381454
@requires_h5netcdf_or_netCDF4
382455
class TestGenericNetCDFIO(NetCDFIOBase):
@@ -1025,6 +1098,62 @@ def test_open_datatree_specific_group(
10251098
assert subgroup_tree.root.parent is None
10261099
assert_equal(subgroup_tree, expected_subtree)
10271100

1101+
def test_open_datatree_group_glob(self, tmpdir, zarr_format) -> None:
1102+
original_dt = DataTree.from_dict(
1103+
{
1104+
"/": xr.Dataset({"root_var": 1}),
1105+
"/A": xr.Dataset({"a_var": 2}),
1106+
"/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}),
1107+
"/A/sweep_1": xr.Dataset({"data": ("x", [3, 4])}),
1108+
"/B": xr.Dataset({"b_var": 3}),
1109+
"/B/sweep_0": xr.Dataset({"data": ("x", [5, 6])}),
1110+
}
1111+
)
1112+
filepath = str(tmpdir / "glob_test.zarr")
1113+
original_dt.to_zarr(filepath, zarr_format=zarr_format)
1114+
1115+
with open_datatree(filepath, group="*/sweep_0", engine=self.engine) as tree:
1116+
paths = {node.path for node in tree.subtree}
1117+
assert "/A/sweep_0" in paths
1118+
assert "/B/sweep_0" in paths
1119+
assert "/A/sweep_1" not in paths
1120+
1121+
def test_open_datatree_group_glob_no_match(self, tmpdir, zarr_format) -> None:
1122+
original_dt = DataTree.from_dict(
1123+
{
1124+
"/": xr.Dataset({"root_var": 1}),
1125+
"/A": xr.Dataset({"a_var": 2}),
1126+
}
1127+
)
1128+
filepath = str(tmpdir / "glob_nomatch.zarr")
1129+
original_dt.to_zarr(filepath, zarr_format=zarr_format)
1130+
1131+
with open_datatree(filepath, group="*/nonexistent", engine=self.engine) as tree:
1132+
paths = {node.path for node in tree.subtree}
1133+
assert paths == {"/"}
1134+
1135+
def test_open_groups_group_glob(self, tmpdir, zarr_format) -> None:
1136+
original_dt = DataTree.from_dict(
1137+
{
1138+
"/": xr.Dataset({"root_var": 1}),
1139+
"/A": xr.Dataset({"a_var": 2}),
1140+
"/A/sweep_0": xr.Dataset({"data": ("x", [1, 2])}),
1141+
"/A/sweep_1": xr.Dataset({"data": ("x", [3, 4])}),
1142+
}
1143+
)
1144+
filepath = str(tmpdir / "glob_groups.zarr")
1145+
original_dt.to_zarr(filepath, zarr_format=zarr_format)
1146+
1147+
groups = open_groups(filepath, group="*/sweep_0", engine=self.engine)
1148+
try:
1149+
assert "/" in groups
1150+
assert "/A" in groups
1151+
assert "/A/sweep_0" in groups
1152+
assert "/A/sweep_1" not in groups
1153+
finally:
1154+
for ds in groups.values():
1155+
ds.close()
1156+
10281157
@requires_dask
10291158
def test_open_groups_chunks(self, tmpdir, zarr_format) -> None:
10301159
"""Test `open_groups` with chunks on a zarr store."""
@@ -1142,3 +1271,66 @@ def test_zarr_engine_recognised(self, tmpdir, zarr_format) -> None:
11421271

11431272
with open_datatree(filepath) as roundtrip_dt:
11441273
assert_identical(original_dt, roundtrip_dt)
1274+
1275+
1276+
class TestGlobPatternUtilities:
1277+
def test_is_glob_pattern(self) -> None:
1278+
from xarray.backends.common import _is_glob_pattern
1279+
1280+
assert _is_glob_pattern("*/sweep_0")
1281+
assert _is_glob_pattern("VCP-34/sweep_[01]")
1282+
assert _is_glob_pattern("sweep_?")
1283+
assert not _is_glob_pattern("VCP-34")
1284+
assert not _is_glob_pattern("/group/subgroup")
1285+
1286+
def test_filter_group_paths(self) -> None:
1287+
from xarray.backends.common import _filter_group_paths
1288+
1289+
paths = ["/", "/A", "/A/sweep_0", "/A/sweep_1", "/B", "/B/sweep_0"]
1290+
result = _filter_group_paths(paths, "*/sweep_0")
1291+
assert result == ["/", "/A", "/A/sweep_0", "/B", "/B/sweep_0"]
1292+
1293+
def test_filter_group_paths_no_match(self) -> None:
1294+
from xarray.backends.common import _filter_group_paths
1295+
1296+
paths = ["/", "/A", "/B"]
1297+
result = _filter_group_paths(paths, "*/nonexistent")
1298+
assert result == ["/"]
1299+
1300+
def test_filter_group_paths_question_mark(self) -> None:
1301+
from xarray.backends.common import _filter_group_paths
1302+
1303+
paths = ["/", "/A", "/B", "/AB"]
1304+
result = _filter_group_paths(paths, "?")
1305+
assert result == ["/", "/A", "/B"]
1306+
1307+
def test_filter_group_paths_bracket(self) -> None:
1308+
from xarray.backends.common import _filter_group_paths
1309+
1310+
paths = ["/", "/A", "/A/sweep_0", "/A/sweep_1", "/A/sweep_2"]
1311+
result = _filter_group_paths(paths, "*/sweep_[01]")
1312+
assert result == ["/", "/A", "/A/sweep_0", "/A/sweep_1"]
1313+
1314+
def test_resolve_group_and_filter_none(self) -> None:
1315+
from xarray.backends.common import _resolve_group_and_filter
1316+
1317+
paths = ["/", "/A"]
1318+
effective, filtered = _resolve_group_and_filter(None, paths)
1319+
assert effective is None
1320+
assert filtered == paths
1321+
1322+
def test_resolve_group_and_filter_literal(self) -> None:
1323+
from xarray.backends.common import _resolve_group_and_filter
1324+
1325+
paths = ["/", "/A"]
1326+
effective, filtered = _resolve_group_and_filter("A", paths)
1327+
assert effective == "A"
1328+
assert filtered == paths
1329+
1330+
def test_resolve_group_and_filter_glob(self) -> None:
1331+
from xarray.backends.common import _resolve_group_and_filter
1332+
1333+
paths = ["/", "/A", "/A/sweep_0", "/A/sweep_1", "/B", "/B/sweep_0"]
1334+
effective, filtered = _resolve_group_and_filter("*/sweep_0", paths)
1335+
assert effective is None
1336+
assert filtered == ["/", "/A", "/A/sweep_0", "/B", "/B/sweep_0"]

0 commit comments

Comments
 (0)