@@ -1900,10 +1900,13 @@ def open_datatree(
19001900 zarr_format = None ,
19011901 max_concurrency : int | None = None ,
19021902 ) -> DataTree :
1903+ from xarray .backends .common import _is_glob_pattern , _resolve_group_and_filter
1904+
19031905 filename_or_obj = _normalize_path (filename_or_obj )
19041906
1905- if group :
1906- parent = str (NodePath ("/" ) / NodePath (group ))
1907+ effective_group = None if (group and _is_glob_pattern (group )) else group
1908+ if effective_group :
1909+ parent = str (NodePath ("/" ) / NodePath (effective_group ))
19071910 else :
19081911 parent = str (NodePath ("/" ))
19091912
@@ -1964,8 +1967,11 @@ def open_datatree(
19641967 zarr_version = zarr_version ,
19651968 zarr_format = zarr_format ,
19661969 )
1970+ all_paths = list (stores .keys ())
1971+ _ , filtered_paths = _resolve_group_and_filter (group , all_paths )
19671972 groups_dict = {}
1968- for path_group , store in stores .items ():
1973+ for path_group in filtered_paths :
1974+ store = stores [path_group ]
19691975 store_entrypoint = StoreBackendEntrypoint ()
19701976 with close_on_error (store ):
19711977 group_ds = store_entrypoint .open_dataset (
@@ -1978,7 +1984,7 @@ def open_datatree(
19781984 use_cftime = use_cftime ,
19791985 decode_timedelta = decode_timedelta ,
19801986 )
1981- if group :
1987+ if effective_group :
19821988 group_name = str (NodePath (path_group ).relative_to (parent ))
19831989 else :
19841990 group_name = str (NodePath (path_group ))
@@ -2045,6 +2051,16 @@ async def _open_datatree_from_stores_async(
20452051 if parent_path in group_children :
20462052 group_children [parent_path ][child_name ] = member
20472053
2054+ # Filter groups when glob pattern is used
2055+ from xarray .backends .common import _resolve_group_and_filter
2056+
2057+ effective_group , filtered_paths = _resolve_group_and_filter (
2058+ group , list (group_async .keys ())
2059+ )
2060+ filtered_set = set (filtered_paths )
2061+ group_async = {k : v for k , v in group_async .items () if k in filtered_set }
2062+ group_children = {k : v for k , v in group_children .items () if k in filtered_set }
2063+
20482064 # Phase 2: Open each group — wrap async objects, run CPU decode in threads.
20492065 async def open_one (path_group : str ) -> tuple [str , Dataset ]:
20502066 async_grp = group_async [path_group ]
@@ -2091,7 +2107,7 @@ def _cpu_open():
20912107 )
20922108
20932109 ds = await loop .run_in_executor (executor , _cpu_open )
2094- if group :
2110+ if effective_group :
20952111 group_name = str (NodePath (path_group ).relative_to (parent ))
20962112 else :
20972113 group_name = str (NodePath (path_group ))
@@ -2132,11 +2148,13 @@ def open_groups_as_dict(
21322148 zarr_version = None ,
21332149 zarr_format = None ,
21342150 ) -> dict [str , Dataset ]:
2151+ from xarray .backends .common import _is_glob_pattern , _resolve_group_and_filter
2152+
21352153 filename_or_obj = _normalize_path (filename_or_obj )
21362154
2137- # Check for a group and make it a parent if it exists
2138- if group :
2139- parent = str (NodePath ("/" ) / NodePath (group ))
2155+ effective_group = None if ( group and _is_glob_pattern ( group )) else group
2156+ if effective_group :
2157+ parent = str (NodePath ("/" ) / NodePath (effective_group ))
21402158 else :
21412159 parent = str (NodePath ("/" ))
21422160
@@ -2153,8 +2171,11 @@ def open_groups_as_dict(
21532171 zarr_format = zarr_format ,
21542172 )
21552173
2174+ _ , filtered_paths = _resolve_group_and_filter (group , list (stores .keys ()))
2175+
21562176 groups_dict = {}
2157- for path_group , store in stores .items ():
2177+ for path_group in filtered_paths :
2178+ store = stores [path_group ]
21582179 store_entrypoint = StoreBackendEntrypoint ()
21592180
21602181 with close_on_error (store ):
@@ -2168,7 +2189,7 @@ def open_groups_as_dict(
21682189 use_cftime = use_cftime ,
21692190 decode_timedelta = decode_timedelta ,
21702191 )
2171- if group :
2192+ if effective_group :
21722193 group_name = str (NodePath (path_group ).relative_to (parent ))
21732194 else :
21742195 group_name = str (NodePath (path_group ))
@@ -2200,11 +2221,13 @@ async def open_groups_as_dict_async(
22002221 This mirrors open_groups_as_dict but parallelizes per-group Dataset opening,
22012222 which can significantly reduce latency on high-RTT object stores.
22022223 """
2224+ from xarray .backends .common import _is_glob_pattern , _resolve_group_and_filter
2225+
22032226 filename_or_obj = _normalize_path (filename_or_obj )
22042227
2205- # Determine parent group path context
2206- if group :
2207- parent = str (NodePath ("/" ) / NodePath (group ))
2228+ effective_group = None if ( group and _is_glob_pattern ( group )) else group
2229+ if effective_group :
2230+ parent = str (NodePath ("/" ) / NodePath (effective_group ))
22082231 else :
22092232 parent = str (NodePath ("/" ))
22102233
@@ -2221,6 +2244,9 @@ async def open_groups_as_dict_async(
22212244 zarr_format = zarr_format ,
22222245 )
22232246
2247+ _ , filtered_paths = _resolve_group_and_filter (group , list (stores .keys ()))
2248+ filtered_set = set (filtered_paths )
2249+
22242250 loop = asyncio .get_running_loop ()
22252251 max_workers = min (len (stores ), 10 ) if stores else 1
22262252 executor = ThreadPoolExecutor (
@@ -2244,15 +2270,17 @@ def _load_sync():
22442270 )
22452271
22462272 ds = await loop .run_in_executor (executor , _load_sync )
2247- if group :
2273+ if effective_group :
22482274 group_name = str (NodePath (path_group ).relative_to (parent ))
22492275 else :
22502276 group_name = str (NodePath (path_group ))
22512277 return group_name , ds
22522278
22532279 try :
22542280 tasks = [
2255- open_one (path_group , store ) for path_group , store in stores .items ()
2281+ open_one (path_group , store )
2282+ for path_group , store in stores .items ()
2283+ if path_group in filtered_set
22562284 ]
22572285 results = await asyncio .gather (* tasks )
22582286 finally :
0 commit comments