@@ -377,6 +377,79 @@ def test_open_datatree_specific_group(self, tmpdir, simple_datatree) -> None:
377377 assert subgroup_tree .root .parent is None
378378 assert_equal (subgroup_tree , expected_subtree )
379379
380+ def test_open_datatree_group_glob (self , tmpdir ) -> None :
381+ original_dt = DataTree .from_dict (
382+ {
383+ "/" : xr .Dataset ({"root_var" : 1 }),
384+ "/A" : xr .Dataset ({"a_var" : 2 }),
385+ "/A/sweep_0" : xr .Dataset ({"data" : ("x" , [1 , 2 ])}),
386+ "/A/sweep_1" : xr .Dataset ({"data" : ("x" , [3 , 4 ])}),
387+ "/B" : xr .Dataset ({"b_var" : 3 }),
388+ "/B/sweep_0" : xr .Dataset ({"data" : ("x" , [5 , 6 ])}),
389+ }
390+ )
391+ filepath = tmpdir / "glob_test.nc"
392+ original_dt .to_netcdf (filepath , engine = self .engine )
393+
394+ with open_datatree (filepath , group = "*/sweep_0" , engine = self .engine ) as tree :
395+ paths = {node .path for node in tree .subtree }
396+ assert "/A/sweep_0" in paths
397+ assert "/B/sweep_0" in paths
398+ assert "/A/sweep_1" not in paths
399+
400+ def test_open_datatree_group_glob_no_match (self , tmpdir ) -> None :
401+ original_dt = DataTree .from_dict (
402+ {
403+ "/" : xr .Dataset ({"root_var" : 1 }),
404+ "/A" : xr .Dataset ({"a_var" : 2 }),
405+ }
406+ )
407+ filepath = tmpdir / "glob_nomatch.nc"
408+ original_dt .to_netcdf (filepath , engine = self .engine )
409+
410+ with open_datatree (filepath , group = "*/nonexistent" , engine = self .engine ) as tree :
411+ paths = {node .path for node in tree .subtree }
412+ assert paths == {"/" }
413+
414+ def test_open_datatree_group_glob_preserves_data (self , tmpdir ) -> None :
415+ original_dt = DataTree .from_dict (
416+ {
417+ "/" : xr .Dataset ({"root_var" : 1 }),
418+ "/A" : xr .Dataset ({"a_var" : 2 }),
419+ "/A/sweep_0" : xr .Dataset ({"data" : ("x" , [1 , 2 ])}),
420+ }
421+ )
422+ filepath = tmpdir / "glob_data.nc"
423+ original_dt .to_netcdf (filepath , engine = self .engine )
424+
425+ with open_datatree (filepath , group = "*/sweep_0" , engine = self .engine ) as tree :
426+ assert tree ["/A" ].dataset ["a_var" ].item () == 2
427+ np .testing .assert_array_equal (
428+ tree ["/A/sweep_0" ].dataset ["data" ].values , [1 , 2 ]
429+ )
430+
431+ def test_open_groups_group_glob (self , tmpdir ) -> None :
432+ original_dt = DataTree .from_dict (
433+ {
434+ "/" : xr .Dataset ({"root_var" : 1 }),
435+ "/A" : xr .Dataset ({"a_var" : 2 }),
436+ "/A/sweep_0" : xr .Dataset ({"data" : ("x" , [1 , 2 ])}),
437+ "/A/sweep_1" : xr .Dataset ({"data" : ("x" , [3 , 4 ])}),
438+ }
439+ )
440+ filepath = tmpdir / "glob_groups.nc"
441+ original_dt .to_netcdf (filepath , engine = self .engine )
442+
443+ groups = open_groups (filepath , group = "*/sweep_0" , engine = self .engine )
444+ try :
445+ assert "/" in groups
446+ assert "/A" in groups
447+ assert "/A/sweep_0" in groups
448+ assert "/A/sweep_1" not in groups
449+ finally :
450+ for ds in groups .values ():
451+ ds .close ()
452+
380453
381454@requires_h5netcdf_or_netCDF4
382455class TestGenericNetCDFIO (NetCDFIOBase ):
@@ -1025,6 +1098,62 @@ def test_open_datatree_specific_group(
10251098 assert subgroup_tree .root .parent is None
10261099 assert_equal (subgroup_tree , expected_subtree )
10271100
1101+ def test_open_datatree_group_glob (self , tmpdir , zarr_format ) -> None :
1102+ original_dt = DataTree .from_dict (
1103+ {
1104+ "/" : xr .Dataset ({"root_var" : 1 }),
1105+ "/A" : xr .Dataset ({"a_var" : 2 }),
1106+ "/A/sweep_0" : xr .Dataset ({"data" : ("x" , [1 , 2 ])}),
1107+ "/A/sweep_1" : xr .Dataset ({"data" : ("x" , [3 , 4 ])}),
1108+ "/B" : xr .Dataset ({"b_var" : 3 }),
1109+ "/B/sweep_0" : xr .Dataset ({"data" : ("x" , [5 , 6 ])}),
1110+ }
1111+ )
1112+ filepath = str (tmpdir / "glob_test.zarr" )
1113+ original_dt .to_zarr (filepath , zarr_format = zarr_format )
1114+
1115+ with open_datatree (filepath , group = "*/sweep_0" , engine = self .engine ) as tree :
1116+ paths = {node .path for node in tree .subtree }
1117+ assert "/A/sweep_0" in paths
1118+ assert "/B/sweep_0" in paths
1119+ assert "/A/sweep_1" not in paths
1120+
1121+ def test_open_datatree_group_glob_no_match (self , tmpdir , zarr_format ) -> None :
1122+ original_dt = DataTree .from_dict (
1123+ {
1124+ "/" : xr .Dataset ({"root_var" : 1 }),
1125+ "/A" : xr .Dataset ({"a_var" : 2 }),
1126+ }
1127+ )
1128+ filepath = str (tmpdir / "glob_nomatch.zarr" )
1129+ original_dt .to_zarr (filepath , zarr_format = zarr_format )
1130+
1131+ with open_datatree (filepath , group = "*/nonexistent" , engine = self .engine ) as tree :
1132+ paths = {node .path for node in tree .subtree }
1133+ assert paths == {"/" }
1134+
1135+ def test_open_groups_group_glob (self , tmpdir , zarr_format ) -> None :
1136+ original_dt = DataTree .from_dict (
1137+ {
1138+ "/" : xr .Dataset ({"root_var" : 1 }),
1139+ "/A" : xr .Dataset ({"a_var" : 2 }),
1140+ "/A/sweep_0" : xr .Dataset ({"data" : ("x" , [1 , 2 ])}),
1141+ "/A/sweep_1" : xr .Dataset ({"data" : ("x" , [3 , 4 ])}),
1142+ }
1143+ )
1144+ filepath = str (tmpdir / "glob_groups.zarr" )
1145+ original_dt .to_zarr (filepath , zarr_format = zarr_format )
1146+
1147+ groups = open_groups (filepath , group = "*/sweep_0" , engine = self .engine )
1148+ try :
1149+ assert "/" in groups
1150+ assert "/A" in groups
1151+ assert "/A/sweep_0" in groups
1152+ assert "/A/sweep_1" not in groups
1153+ finally :
1154+ for ds in groups .values ():
1155+ ds .close ()
1156+
10281157 @requires_dask
10291158 def test_open_groups_chunks (self , tmpdir , zarr_format ) -> None :
10301159 """Test `open_groups` with chunks on a zarr store."""
@@ -1142,3 +1271,66 @@ def test_zarr_engine_recognised(self, tmpdir, zarr_format) -> None:
11421271
11431272 with open_datatree (filepath ) as roundtrip_dt :
11441273 assert_identical (original_dt , roundtrip_dt )
1274+
1275+
1276+ class TestGlobPatternUtilities :
1277+ def test_is_glob_pattern (self ) -> None :
1278+ from xarray .backends .common import _is_glob_pattern
1279+
1280+ assert _is_glob_pattern ("*/sweep_0" )
1281+ assert _is_glob_pattern ("VCP-34/sweep_[01]" )
1282+ assert _is_glob_pattern ("sweep_?" )
1283+ assert not _is_glob_pattern ("VCP-34" )
1284+ assert not _is_glob_pattern ("/group/subgroup" )
1285+
1286+ def test_filter_group_paths (self ) -> None :
1287+ from xarray .backends .common import _filter_group_paths
1288+
1289+ paths = ["/" , "/A" , "/A/sweep_0" , "/A/sweep_1" , "/B" , "/B/sweep_0" ]
1290+ result = _filter_group_paths (paths , "*/sweep_0" )
1291+ assert result == ["/" , "/A" , "/A/sweep_0" , "/B" , "/B/sweep_0" ]
1292+
1293+ def test_filter_group_paths_no_match (self ) -> None :
1294+ from xarray .backends .common import _filter_group_paths
1295+
1296+ paths = ["/" , "/A" , "/B" ]
1297+ result = _filter_group_paths (paths , "*/nonexistent" )
1298+ assert result == ["/" ]
1299+
1300+ def test_filter_group_paths_question_mark (self ) -> None :
1301+ from xarray .backends .common import _filter_group_paths
1302+
1303+ paths = ["/" , "/A" , "/B" , "/AB" ]
1304+ result = _filter_group_paths (paths , "?" )
1305+ assert result == ["/" , "/A" , "/B" ]
1306+
1307+ def test_filter_group_paths_bracket (self ) -> None :
1308+ from xarray .backends .common import _filter_group_paths
1309+
1310+ paths = ["/" , "/A" , "/A/sweep_0" , "/A/sweep_1" , "/A/sweep_2" ]
1311+ result = _filter_group_paths (paths , "*/sweep_[01]" )
1312+ assert result == ["/" , "/A" , "/A/sweep_0" , "/A/sweep_1" ]
1313+
1314+ def test_resolve_group_and_filter_none (self ) -> None :
1315+ from xarray .backends .common import _resolve_group_and_filter
1316+
1317+ paths = ["/" , "/A" ]
1318+ effective , filtered = _resolve_group_and_filter (None , paths )
1319+ assert effective is None
1320+ assert filtered == paths
1321+
1322+ def test_resolve_group_and_filter_literal (self ) -> None :
1323+ from xarray .backends .common import _resolve_group_and_filter
1324+
1325+ paths = ["/" , "/A" ]
1326+ effective , filtered = _resolve_group_and_filter ("A" , paths )
1327+ assert effective == "A"
1328+ assert filtered == paths
1329+
1330+ def test_resolve_group_and_filter_glob (self ) -> None :
1331+ from xarray .backends .common import _resolve_group_and_filter
1332+
1333+ paths = ["/" , "/A" , "/A/sweep_0" , "/A/sweep_1" , "/B" , "/B/sweep_0" ]
1334+ effective , filtered = _resolve_group_and_filter ("*/sweep_0" , paths )
1335+ assert effective is None
1336+ assert filtered == ["/" , "/A" , "/A/sweep_0" , "/B" , "/B/sweep_0" ]
0 commit comments