diff --git a/docs/conf.py b/docs/conf.py index face209b..5c8e1f84 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -151,6 +151,17 @@ autosummary_generate = rst_files autoclass_content = "both" +# Render `open_groups_as_dict`, `open_datatree`, and `open_dataset` method +# docstrings on each BackendEntrypoint class. Without `members: True` the +# class page only shows the class-level docstring and class members get +# silently dropped — see `xradar/io/backends/common.py:_compose_docstring` +# for the per-method blocks that need rendering. +autodoc_default_options = { + "members": True, + "undoc-members": False, + "show-inheritance": True, +} + # The version info for the project you're documenting, acts as replacement # for |version| and |release|, also used in various other places throughout # the built documents. diff --git a/docs/history.md b/docs/history.md index c5567005..ce3923df 100644 --- a/docs/history.md +++ b/docs/history.md @@ -2,6 +2,7 @@ ## Development +* ENH: Add xarray-native ``open_datatree`` with ``engine=`` parameter for all 13 backends (odim, cfradial1, cfradial2, nexradlevel2, gamic, iris, furuno, rainbow, datamet, hpl, metek, uf, imd), enabling ``xd.open_datatree(file, engine="odim")`` and ``xr.open_datatree(file, engine="odim")``. Legacy ``open_*_datatree()`` functions emit ``FutureWarning`` and delegate to the new entry points; ``xd.io.open_imd_datatree([files])`` remains the documented exception for multi-file IMD volumes. Also adds public ``xd.io.list_engines()`` for engine discovery, NumPy-style docstrings on every ``BackendEntrypoint.open_groups_as_dict`` / ``open_datatree`` method, and a demo notebook covering all 13 engines ({issue}`329`, {pull}`335`) by [@aladinor](https://github.com/aladinor) * FIX: ensure `to_cfradial2` correctly selects the default storage engine when none is provided, ({pull}`378`) by [@chfer](https://github.com/chfer) * MNT: Add ``cfradial1_sgp_file`` session fixture and refactor 8 tests in ``test_util.py``/``test_accessors.py`` to share it instead of inlining ``DATASETS.fetch("sample_sgp_data.nc")``. Fixture returns the filename so each test opens its own DataTree, avoiding cross-test mutation ({issue}`346`, {pull}`347`) by [@aladinor](https://github.com/aladinor) * FIX: IRIS reader rotates the first-loaded moment in each sweep by 1 ray — ``IrisRawFile._get_ray_record_offsets_and_data`` initialised ``j = -1`` so the first matching ray of the first-loaded moment was written to ``raw_data[-1]``; affects files without ``DB_XHDR`` (data-type bit 0) where ``DB_DBT`` becomes the rotated moment ({issue}`357`, {pull}`375`) by [@aladinor](https://github.com/aladinor) diff --git a/docs/notebooks/Assign_GeoCoords.md b/docs/notebooks/Assign_GeoCoords.md index 40262791..7f92ee36 100644 --- a/docs/notebooks/Assign_GeoCoords.md +++ b/docs/notebooks/Assign_GeoCoords.md @@ -95,7 +95,7 @@ file2 = DATASETS.fetch("cfrad.20211011_201557.188_to_20211011_201617.720_DOW8_PP **Note:** Station coordinates (`latitude`, `longitude`, `altitude`) are stored on the root node of the DataTree. When accessing a sweep dataset directly, use `.to_dataset(inherit="all_coords")` to inherit these coordinates from the root. The `.xradar.georeference()` accessor handles this automatically. ```{code-cell} -dtree1 = xd.io.open_cfradial1_datatree(file1) +dtree1 = xd.open_datatree(file1, engine="cfradial1") ``` ```{code-cell} @@ -131,7 +131,7 @@ plt.show() ## Example #2 ```{code-cell} -dtree2 = xd.io.open_cfradial1_datatree(file2) +dtree2 = xd.open_datatree(file2, engine="cfradial1") ``` ```{code-cell} diff --git a/docs/notebooks/CfRadial1.md b/docs/notebooks/CfRadial1.md index 6759efd8..6fff1596 100644 --- a/docs/notebooks/CfRadial1.md +++ b/docs/notebooks/CfRadial1.md @@ -84,12 +84,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep number or list. ```{code-cell} -?xd.io.open_cfradial1_datatree +?xd.open_datatree ``` ```{code-cell} -dtree = xd.io.open_cfradial1_datatree( - filename, +dtree = xd.open_datatree(filename, engine="cfradial1", first_dim="time", optional=False, ) @@ -109,11 +108,11 @@ dtree["sweep_0"].ds.DBZ.sortby("azimuth").plot(y="azimuth") ``` ```{code-cell} -dtree = xd.io.open_cfradial1_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="cfradial1", sweep=[0, 1, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_cfradial1_datatree(filename, sweep=["sweep_0", "sweep_4", "sweep_8"]) +dtree = xd.open_datatree(filename, engine="cfradial1", sweep=["sweep_0", "sweep_4", "sweep_8"]) display(dtree) ``` diff --git a/docs/notebooks/CfRadial1_Export.md b/docs/notebooks/CfRadial1_Export.md index 8cc1a32f..4c1a451d 100644 --- a/docs/notebooks/CfRadial1_Export.md +++ b/docs/notebooks/CfRadial1_Export.md @@ -32,7 +32,7 @@ filename = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") ``` ```{code-cell} -radar = xd.io.open_cfradial1_datatree(filename, first_dim="auto") +radar = xd.open_datatree(filename, engine="cfradial1", first_dim="auto") display(radar) ``` diff --git a/docs/notebooks/CfRadial1_Model_Transformation.md b/docs/notebooks/CfRadial1_Model_Transformation.md index 60af2c9c..60685dc1 100644 --- a/docs/notebooks/CfRadial1_Model_Transformation.md +++ b/docs/notebooks/CfRadial1_Model_Transformation.md @@ -203,7 +203,7 @@ xradar provides two easy ways to retrieve the CfRadial1 data as CfRadial2 groups This is the most complete representation as a DataTree. All groups and subgroups are represented in a tree-like structure. Can be parameterized using kwargs. Easy write to netCDF4. ```{code-cell} -dtree = xd.io.open_cfradial1_datatree(filename, optional_groups=True) +dtree = xd.open_datatree(filename, engine="cfradial1", optional_groups=True) with xr.set_options(display_expand_data_vars=True, display_expand_attrs=True): display(dtree) ``` @@ -247,7 +247,7 @@ for grp in dtree.groups: #### Roundtrip with `xradar.io.to_cfradial2` ```{code-cell} -dtree3 = xd.io.open_cfradial1_datatree(filename, optional_groups=True) +dtree3 = xd.open_datatree(filename, engine="cfradial1", optional_groups=True) ``` ```{code-cell} diff --git a/docs/notebooks/Furuno.md b/docs/notebooks/Furuno.md index 10ac4df5..85ac115b 100644 --- a/docs/notebooks/Furuno.md +++ b/docs/notebooks/Furuno.md @@ -94,7 +94,7 @@ ds.DBZH.sortby("azimuth").plot(y="azimuth") Furuno scn/scnx files consist only of one sweep. But we might load and combine several sweeps into one DataTree. ```{code-cell} -dtree = xd.io.open_furuno_datatree(filename_scn) +dtree = xd.open_datatree(filename_scn, engine="furuno") display(dtree) ``` diff --git a/docs/notebooks/GAMIC.md b/docs/notebooks/GAMIC.md index f4ee5591..4b8fb3ce 100644 --- a/docs/notebooks/GAMIC.md +++ b/docs/notebooks/GAMIC.md @@ -75,11 +75,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -help(xd.io.open_gamic_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_gamic_datatree(filename, sweep=8) +dtree = xd.open_datatree(filename, engine="gamic", sweep=8) display(dtree) ``` @@ -96,16 +96,16 @@ dtree["sweep_0"].ds.DBZH.plot() ``` ```{code-cell} -dtree = xd.io.open_gamic_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="gamic", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_gamic_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="gamic", sweep=[0, 1, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_gamic_datatree(filename, sweep=["sweep_1", "sweep_2", "sweep_8"]) +dtree = xd.open_datatree(filename, engine="gamic", sweep=["sweep_1", "sweep_2", "sweep_8"]) display(dtree) ``` diff --git a/docs/notebooks/Georeference_TargetCRS.md b/docs/notebooks/Georeference_TargetCRS.md index 15fec4c5..ca0b2222 100644 --- a/docs/notebooks/Georeference_TargetCRS.md +++ b/docs/notebooks/Georeference_TargetCRS.md @@ -42,7 +42,7 @@ import xradar as xd ```{code-cell} filename = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") -radar = xd.io.open_cfradial1_datatree(filename, first_dim="auto") +radar = xd.open_datatree(filename, engine="cfradial1", first_dim="auto") radar ``` diff --git a/docs/notebooks/HaloPhotonics.md b/docs/notebooks/HaloPhotonics.md index 77b8147b..b7e0cb0c 100644 --- a/docs/notebooks/HaloPhotonics.md +++ b/docs/notebooks/HaloPhotonics.md @@ -22,15 +22,17 @@ import xradar as xd Opening a Halo Photonics Doppler lidar .hpl file. -We use the `xd.io.open_hpl_datatree` in order to load the Halo Photonics Doppler lidar data. After that we will need to enter in the latitude and longitude in order to properly georeference the data. The .hpl file does not contain the latitude, longitude, or altitude of the lidar, so these need to be entered in as keywords as a part of the `backend_kwargs` argument to `xd.io.open_hpl_datatree`. +We use `xd.open_datatree(file, engine="hpl")` to load the Halo Photonics Doppler lidar data. The .hpl file does not contain the latitude, longitude, or altitude of the lidar, so those need to be passed as `latitude=`, `longitude=`, and `altitude=` keyword arguments. In this example, we are using the coordinates of the Doppler lidar at the Nantucket Wastewater Management Facility, deployed as as part of the DOE Energy Efficiency and Renewable Energy Office's [3rd Wind Forecast Improvement Project](https://www2.whoi.edu/site/wfip3/). ```{code-cell} -ds = xd.io.open_hpl_datatree( +ds = xd.open_datatree( DATASETS.fetch("User1_184_20240601_013257.hpl"), + engine="hpl", sweep=[0, 1, 2, 3, 4, 5, 6, 7, 8], - backend_kwargs=dict(latitude=41.24276244459537, longitude=-70.1070364814594), + latitude=41.24276244459537, + longitude=-70.1070364814594, ) ``` diff --git a/docs/notebooks/IMD.md b/docs/notebooks/IMD.md index c26b4082..4373dd65 100644 --- a/docs/notebooks/IMD.md +++ b/docs/notebooks/IMD.md @@ -83,12 +83,17 @@ help(xd.io.open_imd_datatree) ### Single sweep ```{code-cell} -dtree = xd.io.open_imd_datatree(filename_sweep_0) +dtree = xd.open_datatree(filename_sweep_0, engine="imd") display(dtree) ``` ### Volume from multiple files +`xd.open_datatree(..., engine="imd")` takes a single file. To assemble a +full IMD volume from a list of per-sweep files, use the dedicated +`xd.io.open_imd_datatree` function (the documented multi-file carve-out +from the engine API). + ```{code-cell} dtree = xd.io.open_imd_datatree(volume_files) display(dtree) diff --git a/docs/notebooks/Iris.md b/docs/notebooks/Iris.md index e75e08f6..fba66062 100644 --- a/docs/notebooks/Iris.md +++ b/docs/notebooks/Iris.md @@ -76,27 +76,26 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. The underlying xarray.Dataset can be accessed with property `.ds`. ```{code-cell} -help(xd.io.open_iris_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_iris_datatree(filename_volume) +dtree = xd.open_datatree(filename_volume, engine="iris") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_iris_datatree(filename_volume, sweep="sweep_8") +dtree = xd.open_datatree(filename_volume, engine="iris", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_iris_datatree(filename_volume, sweep=[1, 2, 8]) +dtree = xd.open_datatree(filename_volume, engine="iris", sweep=[1, 2, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_iris_datatree( - filename_volume, +dtree = xd.open_datatree(filename_volume, engine="iris", sweep=["sweep_0", "sweep_1", "sweep_8"], ) display(dtree) diff --git a/docs/notebooks/MRR.md b/docs/notebooks/MRR.md index 96cb9094..1c572795 100644 --- a/docs/notebooks/MRR.md +++ b/docs/notebooks/MRR.md @@ -21,7 +21,7 @@ from open_radar_data import DATASETS import xradar as xd ``` -`xd.io.open_metek_datatree` supports the Metek MRR2 processed (.pro, .ave) and raw (.raw) files. The initialized datatree will contain all vertically pointing radar data in one sweep. +`xd.open_datatree(file, engine="metek")` supports the Metek MRR2 processed (.pro, .ave) and raw (.raw) files. The initialized datatree will contain all vertically pointing radar data in one sweep. In this example, we are loading the 60 s average files from the MRR2 sampling a rain event over the Argonne Testbed for Multiscale Observational Science at Argonne National Laboratory in the Chicago suburbs. @@ -34,7 +34,7 @@ decompressed_file = mrr_test_file[:-3] with gzip.open(mrr_test_file, "rb") as f_in: with open(decompressed_file, "wb") as f_out: shutil.copyfileobj(f_in, f_out) -with xd.io.open_metek_datatree(decompressed_file) as ds: +with xd.open_datatree(decompressed_file, engine="metek") as ds: display(ds) ``` diff --git a/docs/notebooks/Mapping_Sweeps.md b/docs/notebooks/Mapping_Sweeps.md index 6b45e327..4b3c1627 100644 --- a/docs/notebooks/Mapping_Sweeps.md +++ b/docs/notebooks/Mapping_Sweeps.md @@ -49,7 +49,7 @@ import xradar as xd filename = DATASETS.fetch("sample_sgp_data.nc") # Open the radar file into a DataTree object -dtree = xd.io.open_cfradial1_datatree(filename) +dtree = xd.open_datatree(filename, engine="cfradial1") dtree = dtree.xradar.georeference() ``` diff --git a/docs/notebooks/NexradLevel2.md b/docs/notebooks/NexradLevel2.md index 4b923e3b..e039a58a 100644 --- a/docs/notebooks/NexradLevel2.md +++ b/docs/notebooks/NexradLevel2.md @@ -86,11 +86,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -help(xd.io.open_nexradlevel2_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree(filename, sweep=4) +dtree = xd.open_datatree(filename, engine="nexradlevel2", sweep=4) display(dtree) ``` @@ -107,12 +107,12 @@ dtree["sweep_4"].ds.DBZH.plot(cmap="HomeyerRainbow") ``` ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="nexradlevel2", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="nexradlevel2", sweep=[0, 1, 8]) display(dtree) ``` @@ -125,9 +125,7 @@ dtree["sweep_8"]["sweep_fixed_angle"].values ``` ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree( - filename, -) +dtree = xd.open_datatree(filename, engine="nexradlevel2") display(dtree) ``` diff --git a/docs/notebooks/ODIM_H5.md b/docs/notebooks/ODIM_H5.md index ac1777fa..051a7816 100644 --- a/docs/notebooks/ODIM_H5.md +++ b/docs/notebooks/ODIM_H5.md @@ -79,11 +79,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -?xd.io.open_odim_datatree +?xd.open_datatree ``` ```{code-cell} -dtree = xd.io.open_odim_datatree(filename, sweep=8) +dtree = xd.open_datatree(filename, engine="odim", sweep=8) display(dtree) ``` @@ -100,16 +100,16 @@ dtree["sweep_0"].ds.DBZH.plot() ``` ```{code-cell} -dtree = xd.io.open_odim_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="odim", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_odim_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="odim", sweep=[0, 1, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_odim_datatree(filename, sweep=["sweep_0", "sweep_1", "sweep_8"]) +dtree = xd.open_datatree(filename, engine="odim", sweep=["sweep_0", "sweep_1", "sweep_8"]) display(dtree) ``` diff --git a/docs/notebooks/Open-Datatree-Engine.md b/docs/notebooks/Open-Datatree-Engine.md new file mode 100644 index 00000000..bcb36ea8 --- /dev/null +++ b/docs/notebooks/Open-Datatree-Engine.md @@ -0,0 +1,272 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.19.1 + main_language: python +kernelspec: + display_name: Python 3 + name: python3 +--- + +# `open_datatree` with `engine=` + +xradar registers each of its readers as an `xarray.backends.BackendEntrypoint`, +so you can load any supported radar format into an `xarray.DataTree` directly +through the xarray-native API: + +```python +import xarray as xr + +dtree = xr.open_datatree(file, engine="") +``` + +The same call is also exposed under `xradar` for convenience: + +```python +import xradar as xd + +dtree = xd.open_datatree(file, engine="") +``` + +Both paths return a CfRadial2-shaped `xarray.DataTree` with a root dataset +and one `sweep_N` child per sweep. The xarray-native form is preferred in +most cases; the xradar-prefixed form is a thin shim that resolves the engine +through xradar's registry. + +```{code-cell} +import atexit +import gzip +import shutil +import tempfile +import warnings +from pathlib import Path + +import xarray as xr +from open_radar_data import DATASETS + +import xradar as xd + +# Some sample files in the open-radar-data repository ship gzipped but the +# corresponding backends expect a raw binary stream. Helper to decompress on +# demand into a tmpdir cleaned up at interpreter exit. +_tmpdir_obj = tempfile.TemporaryDirectory() +atexit.register(_tmpdir_obj.cleanup) +_tmpdir = Path(_tmpdir_obj.name) + + +def fetch_ungzipped(name): + src = Path(DATASETS.fetch(name)) + dst = _tmpdir / src.stem + with gzip.open(src) as fin, open(dst, "wb") as fout: + shutil.copyfileobj(fin, fout) + return str(dst) +``` + +## Supported engines + +The current registry covers thirteen radar formats: + +```{code-cell} +xd.io.list_engines() +``` + +## ODIM_H5 + +```{code-cell} +odim_file = DATASETS.fetch("71_20181220_060628.pvol.h5") +dtree = xr.open_datatree(odim_file, engine="odim") +display(dtree) +``` + +## CfRadial1 + +```{code-cell} +cfradial1_file = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") +dtree = xr.open_datatree(cfradial1_file, engine="cfradial1") +display(dtree) +``` + +## CfRadial2 + +CfRadial2 files are already group-native; the backend normalizes common +institutional variations onto the FM301 layout. + +```{code-cell} +# Round-trip a CfRadial1 file to CfRadial2 so we have a demo input. +tmp_cfradial2 = _tmpdir / "demo_cfradial2.nc" +xd.io.to_cfradial2( + xr.open_datatree(cfradial1_file, engine="cfradial1", first_dim="time").copy(), + tmp_cfradial2, + engine="netcdf4", +) + +dtree = xr.open_datatree(str(tmp_cfradial2), engine="cfradial2") +display(dtree) +``` + +## NEXRAD Level II + +```{code-cell} +nexrad_file = DATASETS.fetch("KATX20130717_195021_V06") +dtree = xr.open_datatree(nexrad_file, engine="nexradlevel2") +display(dtree) +``` + +## GAMIC + +```{code-cell} +gamic_file = DATASETS.fetch("DWD-Vol-2_99999_20180601054047_00.h5") +dtree = xr.open_datatree(gamic_file, engine="gamic") +display(dtree) +``` + +## IRIS + +```{code-cell} +iris_file = DATASETS.fetch("cor-main131125105503.RAW2049") +dtree = xr.open_datatree(iris_file, engine="iris") +display(dtree) +``` + +## Furuno + +```{code-cell} +furuno_file = DATASETS.fetch("0080_20210730_160000_01_02.scn.gz") +dtree = xr.open_datatree(furuno_file, engine="furuno") +display(dtree) +``` + +## Rainbow + +```{code-cell} +rainbow_file = DATASETS.fetch("2013051000000600dBZ.vol") +dtree = xr.open_datatree(rainbow_file, engine="rainbow") +display(dtree) +``` + +## DataMet + +```{code-cell} +datamet_file = DATASETS.fetch("H-000-VOL-ILMONTE-201907100700.tar.gz") +dtree = xr.open_datatree(datamet_file, engine="datamet") +display(dtree) +``` + +## HPL (Halo Photonics) + +```{code-cell} +hpl_file = DATASETS.fetch("User1_100_20240714_122137.hpl") +dtree = xr.open_datatree(hpl_file, engine="hpl") +display(dtree) +``` + +## Metek MRR + +```{code-cell} +metek_file = fetch_ungzipped("0308.ave.gz") +dtree = xr.open_datatree(metek_file, engine="metek") +display(dtree) +``` + +## Universal Format (UF) + +```{code-cell} +uf_file = fetch_ungzipped("20110427_164233_rvp8-rel_v001_SUR.uf.gz") +dtree = xr.open_datatree(uf_file, engine="uf") +display(dtree) +``` + +## IMD - single file via `engine="imd"` + +IMD distributes one sweep per NetCDF file. The `engine="imd"` entry serves +the **single-file** case: + +```{code-cell} +imd_file = DATASETS.fetch("IMD/JPR220822135253-IMD-B.nc") +dtree = xr.open_datatree(imd_file, engine="imd") +display(dtree) +``` + +## IMD - multi-file volume via `open_imd_datatree` + +To assemble a full IMD volume you supply a list of sweep files. xarray's +`engine=` API takes a single path, so multi-file IMD volumes use the +module-level function (which delegates to `xradar.util.create_volume`): + +```{code-cell} +imd_volume = [ + DATASETS.fetch(f"IMD/JPR220822135253-IMD-B.nc{s}") + for s in ["", ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9"] +] +dtree = xd.io.open_imd_datatree(imd_volume) +display(dtree) +``` + +## Common parameters + +Every backend accepts a `sweep` selector (int, str, or list), `first_dim` +(`"auto"` or `"time"`), `optional`, and `optional_groups`. They behave +uniformly across all engines: + +```{code-cell} +# Single sweep by index +dtree = xr.open_datatree(odim_file, engine="odim", sweep=0) +list(dtree.children) +``` + +```{code-cell} +# Multiple sweeps by index +dtree = xr.open_datatree(odim_file, engine="odim", sweep=[0, 2, 4]) +list(dtree.children) +``` + +```{code-cell} +# Sweeps by name +dtree = xr.open_datatree( + cfradial1_file, engine="cfradial1", sweep=["sweep_0", "sweep_3"] +) +list(dtree.children) +``` + +## `open_groups_as_dict` — work with the raw dict + +If you want the pre-`DataTree` dict directly (useful for inspection or +custom assembly), instantiate the backend entrypoint and call +`open_groups_as_dict`: + +```{code-cell} +groups = xd.io.OdimBackendEntrypoint().open_groups_as_dict(odim_file, sweep=[0, 1]) +list(groups) +``` + +## Deprecated `open_*_datatree` shims + +Most legacy `xd.io.open__datatree(...)` functions still work but +emit a `FutureWarning` directing users to the engine API. The one documented +exception is `xd.io.open_imd_datatree`, which remains the supported API for +multi-file IMD volumes (lists of per-sweep paths) and does **not** emit a +deprecation warning. + +```{code-cell} +with warnings.catch_warnings(record=True) as captured: + warnings.simplefilter("always") + xd.io.open_odim_datatree(odim_file, sweep=[0]) +[w.message for w in captured if issubclass(w.category, FutureWarning)] +``` + +## Unknown engine + +`xd.open_datatree` looks the engine up in xradar's registry and raises a +clear `ValueError` listing every supported name. (The xarray-native +`xr.open_datatree` uses xarray's own plugin discovery and raises a +different error from there.) + +```{code-cell} +try: + xd.open_datatree(odim_file, engine="nonexistent") +except ValueError as exc: + print(exc) +``` diff --git a/docs/notebooks/Rainbow.md b/docs/notebooks/Rainbow.md index 3202fc08..d36e26ac 100644 --- a/docs/notebooks/Rainbow.md +++ b/docs/notebooks/Rainbow.md @@ -76,11 +76,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -help(xd.io.open_rainbow_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_rainbow_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="rainbow", sweep="sweep_8") display(dtree) ``` @@ -97,16 +97,16 @@ dtree["sweep_0"].ds.DBZH.plot() ``` ```{code-cell} -dtree = xd.io.open_rainbow_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="rainbow", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_rainbow_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="rainbow", sweep=[0, 1, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_rainbow_datatree(filename, sweep=["sweep_1", "sweep_2", "sweep_8"]) +dtree = xd.open_datatree(filename, engine="rainbow", sweep=["sweep_1", "sweep_2", "sweep_8"]) display(dtree) ``` diff --git a/docs/notebooks/Transform.md b/docs/notebooks/Transform.md index a8df4ec7..f8dc1a94 100644 --- a/docs/notebooks/Transform.md +++ b/docs/notebooks/Transform.md @@ -28,7 +28,7 @@ import xradar as xd ```{code-cell} file = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") -dtree = xd.io.open_cfradial1_datatree(file) +dtree = xd.open_datatree(file, engine="cfradial1") display(dtree) ``` diff --git a/docs/notebooks/UF.md b/docs/notebooks/UF.md index 0a0b28fd..89f250f7 100644 --- a/docs/notebooks/UF.md +++ b/docs/notebooks/UF.md @@ -105,11 +105,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -help(xd.io.open_uf_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_uf_datatree(fname, sweep=4) +dtree = xd.open_datatree(fname, engine="uf", sweep=4) display(dtree) ``` @@ -126,12 +126,12 @@ dtree["sweep_4"].ds.DBZH.plot(cmap="HomeyerRainbow") ``` ```{code-cell} -dtree = xd.io.open_uf_datatree(fname, sweep="sweep_8") +dtree = xd.open_datatree(fname, engine="uf", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_uf_datatree(fname, sweep=[0, 1, 8]) +dtree = xd.open_datatree(fname, engine="uf", sweep=[0, 1, 8]) display(dtree) ``` @@ -144,7 +144,7 @@ dtree["sweep_8"]["sweep_fixed_angle"].values ``` ```{code-cell} -dtree = xd.io.open_uf_datatree(fname) +dtree = xd.open_datatree(fname, engine="uf") display(dtree) ``` diff --git a/docs/notebooks/multiple-sweeps-into-volume-scan.md b/docs/notebooks/multiple-sweeps-into-volume-scan.md index 8dad2233..e69f7f0f 100644 --- a/docs/notebooks/multiple-sweeps-into-volume-scan.md +++ b/docs/notebooks/multiple-sweeps-into-volume-scan.md @@ -88,7 +88,7 @@ task_files = [ for i in radar_files[:4] ] # list of xradar datatrees -ls_dt = [xd.io.open_iris_datatree(i).xradar.georeference() for i in task_files] +ls_dt = [xd.open_datatree(i, engine="iris").xradar.georeference() for i in task_files] # sweeps and elevations within each task for i in ls_dt: @@ -190,7 +190,7 @@ def mult_vcp(radar_files): """ ls_files = [radar_files[i : i + 4] for i in range(len(radar_files)) if i % 4 == 0] ls_sigmet = [ - [xd.io.open_iris_datatree(data_accessor(i)).xradar.georeference() for i in j] + [xd.open_datatree(data_accessor(i), engine="iris").xradar.georeference() for i in j] for j in ls_files ] ls_dt = [create_vcp(i) for i in ls_sigmet] diff --git a/docs/notebooks/nexrad_read_chunks.md b/docs/notebooks/nexrad_read_chunks.md index f72d0b3b..107caaf3 100644 --- a/docs/notebooks/nexrad_read_chunks.md +++ b/docs/notebooks/nexrad_read_chunks.md @@ -101,7 +101,7 @@ all_bytes = None if chunk_paths: candidate = [fs.open(p, "rb").read() for p in chunk_paths] try: - xd.io.open_nexradlevel2_datatree(candidate) + xd.open_datatree(candidate, engine="nexradlevel2") all_bytes = candidate except ValueError as e: print(f"S3 listing rejected: {e}") @@ -129,7 +129,7 @@ When all chunks (S through E) are available, passing the list to complete volume file. ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree(all_bytes) +dtree = xd.open_datatree(all_bytes, engine="nexradlevel2") display(dtree) ``` @@ -157,8 +157,7 @@ partial_chunks = all_bytes[:15] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - dtree_drop = xd.io.open_nexradlevel2_datatree( - partial_chunks, incomplete_sweep="drop" + dtree_drop = xd.open_datatree(partial_chunks, engine="nexradlevel2", incomplete_sweep="drop" ) # Show warnings @@ -203,7 +202,7 @@ This is useful for visualization and monitoring where you want to see all available data as soon as it arrives. ```{code-cell} -dtree_pad = xd.io.open_nexradlevel2_datatree(partial_chunks, incomplete_sweep="pad") +dtree_pad = xd.open_datatree(partial_chunks, engine="nexradlevel2", incomplete_sweep="pad") sweep_groups_pad = list(dtree_pad.match("sweep_*").keys()) print(f"Sweeps available (pad mode): {sweep_groups_pad}") @@ -241,7 +240,7 @@ are still missing. ```{code-cell} early_chunks = all_bytes[:5] -dtree_early = xd.io.open_nexradlevel2_datatree(early_chunks, incomplete_sweep="pad") +dtree_early = xd.open_datatree(early_chunks, engine="nexradlevel2", incomplete_sweep="pad") sweep_groups_early = list(dtree_early.match("sweep_*").keys()) print(f"Sweeps from 5 chunks: {sweep_groups_early}") diff --git a/docs/notebooks/plot-ppi.md b/docs/notebooks/plot-ppi.md index 8eed6767..809a9f44 100644 --- a/docs/notebooks/plot-ppi.md +++ b/docs/notebooks/plot-ppi.md @@ -41,7 +41,7 @@ filename = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") Read the data using the `cfradial1` engine ```{code-cell} -radar = xd.io.open_cfradial1_datatree(filename, first_dim="auto") +radar = xd.open_datatree(filename, engine="cfradial1", first_dim="auto") display(radar) ``` diff --git a/docs/usage.md b/docs/usage.md index 96b33f98..b569b392 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -17,6 +17,7 @@ import xradar datamodel importers exporters +notebooks/Open-Datatree-Engine notebooks/Accessors notebooks/Mapping_Sweeps notebooks/CfRadial1_Model_Transformation diff --git a/pyproject.toml b/pyproject.toml index f76972f9..770dc467 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ changelog = "https://github.com/openradar/xradar/blob/main/docs/history.md" [project.entry-points."xarray.backends"] cfradial1 = "xradar.io.backends:CfRadial1BackendEntrypoint" +cfradial2 = "xradar.io.backends:CfRadial2BackendEntrypoint" furuno = "xradar.io.backends:FurunoBackendEntrypoint" gamic = "xradar.io.backends:GamicBackendEntrypoint" iris = "xradar.io.backends:IrisBackendEntrypoint" diff --git a/requirements.txt b/requirements.txt index d18d8ce2..0a629ae3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ dask h5netcdf >= 1.0.0 h5py >= 3.0.0 lat_lon_parser -netCDF4 +netCDF4 >= 1.5.0, != 1.7.3, != 1.7.4 numpy pyproj scipy diff --git a/tests/conftest.py b/tests/conftest.py index d76b510b..9432e777 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,17 @@ def cfradial1_file(tmp_path_factory): return DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") +@pytest.fixture(scope="session") +def cfradial2_file(cfradial1_file, tmp_path_factory): + """Round-trip a cfradial1 file through to_cfradial2 to get a real CfRadial2 file.""" + import xradar as xd + + outfile = tmp_path_factory.mktemp("cfradial2") / "sample_cfradial2.nc" + dtree = xd.io.open_cfradial1_datatree(cfradial1_file, first_dim="time") + xd.io.to_cfradial2(dtree.copy(), outfile, engine="netcdf4") + return str(outfile) + + @pytest.fixture(scope="session") def cfradial1n_file(tmp_path_factory): return DATASETS.fetch("DES_VOL_RAW_20240522_1600.nc") diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py new file mode 100644 index 00000000..ebc757e8 --- /dev/null +++ b/tests/io/test_backend_datatree.py @@ -0,0 +1,427 @@ +#!/usr/bin/env python +# Copyright (c) 2024-2025, openradar developers. +# Distributed under the MIT License. See LICENSE for more info. + +""" +Tests for xarray-native open_datatree with engine= parameter. + +Tests the unified ``xd.open_datatree()`` and ``xr.open_datatree()`` APIs, +``open_groups_as_dict()`` direct calls, backward compatibility with +deprecated standalone functions, and ``supports_groups`` attribute. +""" + +import warnings + +import pytest +import xarray as xr +from xarray import DataTree + +import xradar as xd +from xradar.io import _ENGINE_REGISTRY +from xradar.io.backends import open_imd_datatree + +# -- Fixtures ---------------------------------------------------------------- + + +@pytest.fixture( + params=[ + pytest.param(("odim", "odim_file"), id="odim"), + pytest.param(("gamic", "gamic_file"), id="gamic"), + pytest.param(("iris", "iris0_file"), id="iris"), + pytest.param(("nexradlevel2", "nexradlevel2_file"), id="nexradlevel2"), + pytest.param(("cfradial2", "cfradial2_file"), id="cfradial2"), + pytest.param(("furuno", "furuno_scn_file"), id="furuno"), + pytest.param(("rainbow", "rainbow_file"), id="rainbow"), + pytest.param(("datamet", "datamet_file"), id="datamet"), + pytest.param(("hpl", "hpl_file"), id="hpl"), + pytest.param(("metek", "metek_ave_gz_file"), id="metek"), + pytest.param(("uf", "uf_file_1"), id="uf"), + pytest.param( + ("imd", "imd_file"), + marks=pytest.mark.skip( + reason="IMD is single-sweep-per-file; see TestIMDMultiFile", + ), + id="imd", + ), + ] +) +def engine_and_file(request): + """Parametrize over all engines. + + See ``TestIMDMultiFile`` for IMD-specific coverage (the multi-file + carve-out from the engine= API). + """ + engine, fixture_name = request.param + filepath = request.getfixturevalue(fixture_name) + return engine, filepath + + +@pytest.fixture +def cfradial1_engine_file(cfradial1_file): + return "cfradial1", cfradial1_file + + +# -- Helper ------------------------------------------------------------------ + + +def _assert_cfradial2_structure(dtree, optional_groups=False): + """Verify that a DataTree has CfRadial2 group structure.""" + assert isinstance(dtree, DataTree) + children = set(dtree.children.keys()) + if optional_groups: + for grp in [ + "radar_parameters", + "georeferencing_correction", + "radar_calibration", + ]: + assert grp in children, f"Missing group: {grp}" + sweep_groups = [k for k in children if k.startswith("sweep_")] + assert len(sweep_groups) > 0, "No sweep groups found" + root_vars = set(dtree.ds.data_vars) + assert "time_coverage_start" in root_vars + assert "time_coverage_end" in root_vars + + +# -- xd.open_datatree integration tests (all engines) ----------------------- + + +class TestXdOpenDatatree: + """Test xd.open_datatree() for all engines.""" + + def test_basic_open(self, engine_and_file): + engine, filepath = engine_and_file + dtree = xd.open_datatree(filepath, engine=engine) + _assert_cfradial2_structure(dtree) + + def test_sweep_selection_int(self, engine_and_file): + engine, filepath = engine_and_file + dtree = xd.open_datatree(filepath, engine=engine, sweep=0) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == 1 + + def test_sweep_selection_string(self, engine_and_file): + engine, filepath = engine_and_file + dtree = xd.open_datatree(filepath, engine=engine, sweep="sweep_0") + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == 1 + + def test_kwargs_flow_through(self, engine_and_file): + engine, filepath = engine_and_file + dtree = xd.open_datatree( + filepath, engine=engine, first_dim="auto", site_coords=True, sweep=0 + ) + # Station coords are on root (promoted by _assign_root) + assert "latitude" in dtree.ds.coords + assert "longitude" in dtree.ds.coords + + def test_unknown_engine_raises(self, odim_file): + with pytest.raises(ValueError, match="Unknown engine"): + xd.open_datatree(odim_file, engine="nonexistent_engine") + + def test_empty_sweep_list_raises(self, engine_and_file): + engine, filepath = engine_and_file + with pytest.raises(ValueError, match="sweep list is empty"): + xd.open_datatree(filepath, engine=engine, sweep=[]) + + +# -- xd.open_datatree for CfRadial1 ----------------------------------------- + + +class TestXdOpenDatatreeCfRadial1: + """Test xd.open_datatree() for CfRadial1.""" + + def test_basic_open(self, cfradial1_engine_file): + _, filepath = cfradial1_engine_file + from xradar.io.backends.cfradial1 import CfRadial1BackendEntrypoint + + backend = CfRadial1BackendEntrypoint() + dtree = backend.open_datatree( + filepath, engine="h5netcdf", decode_timedelta=False + ) + _assert_cfradial2_structure(dtree) + + def test_sweep_selection(self, cfradial1_engine_file): + _, filepath = cfradial1_engine_file + from xradar.io.backends.cfradial1 import CfRadial1BackendEntrypoint + + backend = CfRadial1BackendEntrypoint() + dtree = backend.open_datatree( + filepath, engine="h5netcdf", decode_timedelta=False, sweep=[0, 1] + ) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == 2 + + +# -- xr.open_datatree tests ------------------------------------------------- + + +class TestXrOpenDatatree: + """Test xr.open_datatree() with xradar engines.""" + + def test_xr_open_datatree_odim(self, odim_file): + dtree = xr.open_datatree(odim_file, engine="odim") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_nexrad(self, nexradlevel2_file): + dtree = xr.open_datatree(nexradlevel2_file, engine="nexradlevel2") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_cfradial1(self, cfradial1_file): + dtree = xr.open_datatree( + cfradial1_file, engine="cfradial1", decode_timedelta=False + ) + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_gamic(self, gamic_file): + dtree = xr.open_datatree(gamic_file, engine="gamic") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_iris(self, iris0_file): + dtree = xr.open_datatree(iris0_file, engine="iris") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_furuno(self, furuno_scn_file): + dtree = xr.open_datatree(furuno_scn_file, engine="furuno") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_rainbow(self, rainbow_file): + dtree = xr.open_datatree(rainbow_file, engine="rainbow") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_datamet(self, datamet_file): + dtree = xr.open_datatree(datamet_file, engine="datamet") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_hpl(self, hpl_file): + dtree = xr.open_datatree(hpl_file, engine="hpl") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_metek(self, metek_ave_gz_file): + dtree = xr.open_datatree(metek_ave_gz_file, engine="metek") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_uf(self, uf_file_1): + dtree = xr.open_datatree(uf_file_1, engine="uf") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_cfradial2(self, cfradial2_file): + dtree = xr.open_datatree(cfradial2_file, engine="cfradial2") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_imd(self, imd_file): + dtree = xr.open_datatree(imd_file, engine="imd") + _assert_cfradial2_structure(dtree) + + +# -- IMD: multi-file carve-out vs single-file engine ------------------------- + + +class TestIMDMultiFile: + """IMD is the documented multi-file carve-out from the engine= API. + + The single-file path uses ``engine="imd"``; multi-file volumes still + go through the module-level ``xd.io.open_imd_datatree([files])``. + """ + + def test_engine_imd_handles_single_file(self, imd_file): + dtree = xd.open_datatree(imd_file, engine="imd") + _assert_cfradial2_structure(dtree) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == 1 + + def test_module_level_handles_multi_file_volume(self, imd_volume_files): + # Precondition: each fixture file in `imd_volume_files` contains + # exactly one sweep, so the resulting volume has one sweep per file. + dtree = open_imd_datatree(imd_volume_files) + _assert_cfradial2_structure(dtree) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == len(imd_volume_files) + + +# -- CfRadial2 site_coords behavior ------------------------------------------ + + +class TestCfRadial2SiteCoords: + """`site_coords` honors True/False for the CfRadial2 entrypoint.""" + + def test_site_coords_true_keeps_station_coords(self, cfradial2_file): + dtree = xd.open_datatree(cfradial2_file, engine="cfradial2", site_coords=True) + assert "latitude" in dtree.ds.coords + assert "longitude" in dtree.ds.coords + assert "altitude" in dtree.ds.coords + + def test_site_coords_false_drops_station_coords(self, cfradial2_file): + dtree = xd.open_datatree(cfradial2_file, engine="cfradial2", site_coords=False) + assert "latitude" not in dtree.ds.coords + assert "longitude" not in dtree.ds.coords + assert "altitude" not in dtree.ds.coords + + +# -- supports_groups attribute ----------------------------------------------- + + +class TestSupportsGroups: + """Verify supports_groups is True on all backend classes.""" + + @pytest.mark.parametrize( + "engine", + sorted(_ENGINE_REGISTRY.keys()), + ) + def test_supports_groups(self, engine): + backend_cls = _ENGINE_REGISTRY[engine] + assert backend_cls.supports_groups is True + + +# -- Docstring regression guard --------------------------------------------- + + +class TestDocstrings: + """`open_groups_as_dict` / `open_datatree` must carry usable docstrings. + + The composed docstrings are assigned by module-level side effects + (e.g. ``OdimBackendEntrypoint.open_groups_as_dict.__doc__ = ...``). + Without this guard a future refactor could silently delete a + docstring and no test would catch the regression. + """ + + @pytest.mark.parametrize( + "engine", + sorted(_ENGINE_REGISTRY.keys()), + ) + def test_open_groups_as_dict_has_param_docstring(self, engine): + doc = _ENGINE_REGISTRY[engine].open_groups_as_dict.__doc__ + assert doc, f"{engine} open_groups_as_dict has no docstring" + assert "Parameters" in doc + assert "Returns" in doc + assert "optional_groups" in doc + + @pytest.mark.parametrize( + "engine", + sorted(_ENGINE_REGISTRY.keys()), + ) + def test_open_datatree_references_groups_as_dict(self, engine): + doc = _ENGINE_REGISTRY[engine].open_datatree.__doc__ + assert doc, f"{engine} open_datatree has no docstring" + assert "open_groups_as_dict" in doc + + +def test_compose_docstring_structure(): + """`_compose_docstring` assembles summary + common block + extras + Returns.""" + from xradar.io.backends.common import REINDEX_PARAMS_DOC, _compose_docstring + + doc = _compose_docstring("Summary line.", REINDEX_PARAMS_DOC) + assert doc.startswith("Summary line.") + assert "Parameters" in doc + assert "Returns" in doc + assert "reindex_angle" in doc + assert "filename_or_obj" in doc # common block is always included + assert "dict[str, xarray.Dataset]" in doc + + +def test_compose_docstring_skips_empty_extra_blocks(): + """Empty/None extra blocks must not double-insert section headers.""" + from xradar.io.backends.common import _compose_docstring + + doc = _compose_docstring("Summary.", "", None) + assert doc.count("Parameters") == 1 + assert doc.count("Returns") == 1 + + +# -- Engine registry --------------------------------------------------------- + + +class TestEngineRegistry: + """Verify _ENGINE_REGISTRY contains all expected engines.""" + + def test_registry_contains_all_engines(self): + expected = { + "odim", + "cfradial1", + "cfradial2", + "nexradlevel2", + "gamic", + "iris", + "furuno", + "rainbow", + "datamet", + "hpl", + "metek", + "uf", + "imd", + } + assert set(_ENGINE_REGISTRY.keys()) == expected + + def test_demo_notebook_lists_all_engines(self): + """Bitrot guard: adding an engine to the registry must also be demoed.""" + from pathlib import Path + + repo_root = Path(__file__).resolve().parents[2] + notebook = repo_root / "docs/notebooks/Open-Datatree-Engine.md" + text = notebook.read_text() + for engine in _ENGINE_REGISTRY: + assert f'engine="{engine}"' in text, f"notebook missing engine={engine!r}" + + +# -- Backward compatibility & deprecation tests ------------------------------ + +# Map of deprecated function names to (import_path, engine, fixture_name) +_DEPRECATED_FUNCTIONS = { + "open_odim_datatree": ("xradar.io.backends.odim", "odim_file", {}), + "open_gamic_datatree": ("xradar.io.backends.gamic", "gamic_file", {}), + "open_iris_datatree": ("xradar.io.backends.iris", "iris0_file", {}), + "open_nexradlevel2_datatree": ( + "xradar.io.backends.nexrad_level2", + "nexradlevel2_file", + {}, + ), + "open_cfradial1_datatree": ( + "xradar.io.backends.cfradial1", + "cfradial1_file", + {"engine": "h5netcdf", "decode_timedelta": False}, + ), + "open_cfradial2_datatree": ( + "xradar.io.backends.cfradial2", + "cfradial2_file", + {}, + ), + "open_furuno_datatree": ("xradar.io.backends.furuno", "furuno_scn_file", {}), + "open_rainbow_datatree": ("xradar.io.backends.rainbow", "rainbow_file", {}), + "open_datamet_datatree": ("xradar.io.backends.datamet", "datamet_file", {}), + "open_hpl_datatree": ("xradar.io.backends.hpl", "hpl_file", {}), + "open_metek_datatree": ("xradar.io.backends.metek", "metek_ave_gz_file", {}), + "open_uf_datatree": ("xradar.io.backends.uf", "uf_file_1", {}), +} + + +class TestDeprecation: + """Test that all standalone functions emit FutureWarning.""" + + @pytest.mark.parametrize( + "func_name,module_path,fixture_name,extra_kwargs", + [ + (name, mod, fix, kw) + for name, (mod, fix, kw) in _DEPRECATED_FUNCTIONS.items() + ], + ids=list(_DEPRECATED_FUNCTIONS.keys()), + ) + def test_deprecated_function_warns( + self, func_name, module_path, fixture_name, extra_kwargs, request + ): + import importlib + + filepath = request.getfixturevalue(fixture_name) + module = importlib.import_module(module_path) + func = getattr(module, func_name) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + dtree = func(filepath, sweep=0, **extra_kwargs) + deprecation_warnings = [ + x for x in w if issubclass(x.category, FutureWarning) + ] + assert len(deprecation_warnings) == 1, ( + f"{func_name} emitted {len(deprecation_warnings)} " + f"FutureWarnings, expected 1" + ) + assert func_name in str(deprecation_warnings[0].message) + _assert_cfradial2_structure(dtree) diff --git a/tests/io/test_cfradial2.py b/tests/io/test_cfradial2.py index 8fa92c72..2b70775b 100644 --- a/tests/io/test_cfradial2.py +++ b/tests/io/test_cfradial2.py @@ -77,7 +77,14 @@ def test_open_cfradial2_roundtrip(cfradial1_file, temp_file): assert isinstance(dtree2, xr.DataTree) assert "sweep_0" in dtree2.children assert "DBZ" in dtree2["sweep_0"].data_vars - xr.testing.assert_equal(dtree["sweep_0"].ds["DBZ"], dtree2["sweep_0"].ds["DBZ"]) + # cfradial1 attaches station coords to each sweep; cfradial2 places them + # at root only. Drop them on the left so the DBZ comparison succeeds. + expected = ( + dtree["sweep_0"] + .ds["DBZ"] + .drop_vars(["latitude", "longitude", "altitude"], errors="ignore") + ) + xr.testing.assert_equal(expected, dtree2["sweep_0"].ds["DBZ"]) assert "latitude" in dtree2.ds.coords assert dtree2.ds["latitude"].attrs["standard_name"] == "latitude" assert ( @@ -139,6 +146,33 @@ def test_open_cfradial2_invalid_path(): xd.io.open_cfradial2_datatree("missing-cfradial2-file.nc") +def test_open_dataset_sweep_group(cfradial2_file): + """`xr.open_dataset(engine="cfradial2", group="sweep_0")` returns a normalized sweep.""" + ds = xr.open_dataset(cfradial2_file, engine="cfradial2", group="sweep_0") + assert "azimuth" in ds.coords + assert "range" in ds.coords + + +def test_open_dataset_missing_group_raises(cfradial2_file): + """`xr.open_dataset(engine="cfradial2", group="sweep_99")` raises ValueError.""" + with pytest.raises(ValueError, match="missing from file"): + xr.open_dataset(cfradial2_file, engine="cfradial2", group="sweep_99") + + +def test_xr_open_datatree_cfradial2_engine(cfradial2_file): + """End-to-end: `xr.open_datatree(file, engine="cfradial2")` returns a DataTree.""" + dtree = xr.open_datatree(cfradial2_file, engine="cfradial2") + assert isinstance(dtree, xr.DataTree) + assert any(name.startswith("sweep_") for name in dtree.children) + + +def test_xd_open_datatree_cfradial2_engine(cfradial2_file): + """End-to-end: `xd.open_datatree(file, engine="cfradial2")` returns a DataTree.""" + dtree = xd.open_datatree(cfradial2_file, engine="cfradial2") + assert isinstance(dtree, xr.DataTree) + assert any(name.startswith("sweep_") for name in dtree.children) + + @pytest.mark.parametrize( ("available", "expected_engine"), [ diff --git a/tests/io/test_furuno.py b/tests/io/test_furuno.py index d216c3ed..6aac103f 100644 --- a/tests/io/test_furuno.py +++ b/tests/io/test_furuno.py @@ -670,6 +670,8 @@ def test_open_furuno_datatree(furuno_scn_file): assert "altitude" in dtree.ds.coords assert "latitude" not in dtree.ds.data_vars + # Station vars (latitude/longitude/altitude) live as coordinates on the + # root only; per-sweep datasets do not duplicate them. assert len(dtree[sample_sweep].variables) == 18 assert dtree[sample_sweep]["DBZH"].shape == (360, 602) assert len(dtree.attrs) == 9 diff --git a/tests/io/test_imd.py b/tests/io/test_imd.py index 35506316..0c888e74 100644 --- a/tests/io/test_imd.py +++ b/tests/io/test_imd.py @@ -90,6 +90,35 @@ def test_open_imd_datatree_volume(imd_volume_files): assert int(dtree[sw].ds["sweep_number"].values) == i +def test_xr_open_datatree_imd_engine(imd_file): + """End-to-end: `xr.open_datatree(file, engine="imd")` returns a DataTree.""" + import xarray as xr + + dtree = xr.open_datatree(imd_file, engine="imd") + assert isinstance(dtree, DataTree) + assert "sweep_0" in dtree.children + + +def test_xd_open_datatree_imd_engine(imd_file): + """End-to-end: `xd.open_datatree(file, engine="imd")` returns a DataTree.""" + import xradar as xd + + dtree = xd.open_datatree(imd_file, engine="imd") + assert isinstance(dtree, DataTree) + assert "sweep_0" in dtree.children + + +def test_open_imd_datatree_no_futurewarning(imd_volume_files): + """The multi-file `open_imd_datatree` carve-out must not emit FutureWarning.""" + import warnings + + with warnings.catch_warnings(record=True) as captured: + warnings.simplefilter("always") + open_imd_datatree(imd_volume_files) + future = [w for w in captured if issubclass(w.category, FutureWarning)] + assert future == [], f"open_imd_datatree must not warn: {future}" + + def test_open_imd_datatree_angle_filter(imd_volume_files): """min_angle/max_angle forwarded to util.create_volume.""" # Load first to learn the actual angles, then filter to just the lowest. diff --git a/tests/io/test_nexrad_level2.py b/tests/io/test_nexrad_level2.py index 563d4b25..b663fc46 100644 --- a/tests/io/test_nexrad_level2.py +++ b/tests/io/test_nexrad_level2.py @@ -889,6 +889,27 @@ def test_open_nexradlevel2_single_dataset_site_as_coords(nexradlevel2_file): assert "altitude" in ds.coords +def test_open_nexradlevel2_datatree_legacy_site_coords_kwarg(nexradlevel2_file): + """Legacy `site_coords=` kwarg must reach the coord assignment in open_nexradlevel2_datatree.""" + dtree = open_nexradlevel2_datatree( + nexradlevel2_file, sweep=[0], reindex_angle=False, site_coords=True + ) + root_coords = dtree.ds.coords + assert "latitude" in root_coords + assert "longitude" in root_coords + assert "altitude" in root_coords + + +def test_actual_elevation_cuts_invariant_under_sweep_selection(nexradlevel2_file): + """`actual_elevation_cuts` reflects file contents, not user `sweep=` selection.""" + dtree_all = open_nexradlevel2_datatree(nexradlevel2_file) + dtree_subset = open_nexradlevel2_datatree(nexradlevel2_file, sweep=[0, 1]) + assert ( + dtree_all.attrs["actual_elevation_cuts"] + == dtree_subset.attrs["actual_elevation_cuts"] + ) + + @pytest.mark.parametrize( "sweeps_input, expected_sweeps, should_raise", [ diff --git a/xradar/__init__.py b/xradar/__init__.py index 4a990dca..b6ce6c7f 100644 --- a/xradar/__init__.py +++ b/xradar/__init__.py @@ -29,5 +29,6 @@ from . import util # noqa from .util import map_over_sweeps # noqa from . import transform # noqa +from .io import open_datatree # noqa __all__ = [s for s in dir() if not s.startswith("_")] diff --git a/xradar/io/__init__.py b/xradar/io/__init__.py index 3693e99e..7a514413 100644 --- a/xradar/io/__init__.py +++ b/xradar/io/__init__.py @@ -17,4 +17,77 @@ from .backends import * # noqa from .export import * # noqa +from .backends.cfradial1 import CfRadial1BackendEntrypoint +from .backends.cfradial2 import CfRadial2BackendEntrypoint +from .backends.datamet import DataMetBackendEntrypoint +from .backends.furuno import FurunoBackendEntrypoint +from .backends.gamic import GamicBackendEntrypoint +from .backends.hpl import HPLBackendEntrypoint +from .backends.imd import IMDBackendEntrypoint +from .backends.iris import IrisBackendEntrypoint +from .backends.metek import MRRBackendEntrypoint +from .backends.nexrad_level2 import NexradLevel2BackendEntrypoint +from .backends.odim import OdimBackendEntrypoint +from .backends.rainbow import RainbowBackendEntrypoint +from .backends.uf import UFBackendEntrypoint + +#: Registry mapping engine names to backend classes that support groups. +_ENGINE_REGISTRY = { + "odim": OdimBackendEntrypoint, + "cfradial1": CfRadial1BackendEntrypoint, + "cfradial2": CfRadial2BackendEntrypoint, + "nexradlevel2": NexradLevel2BackendEntrypoint, + "gamic": GamicBackendEntrypoint, + "iris": IrisBackendEntrypoint, + "furuno": FurunoBackendEntrypoint, + "rainbow": RainbowBackendEntrypoint, + "datamet": DataMetBackendEntrypoint, + "hpl": HPLBackendEntrypoint, + "metek": MRRBackendEntrypoint, + "uf": UFBackendEntrypoint, + "imd": IMDBackendEntrypoint, +} + + +def open_datatree(filename_or_obj, *, engine, **kwargs): + """Open a radar file as :py:class:`xarray.DataTree` using the specified engine. + + Parameters + ---------- + filename_or_obj : str, Path, or file-like + Path to the radar file. + engine : str + Backend engine name (e.g., ``"odim"``, ``"cfradial1"``, ``"nexradlevel2"``). + **kwargs + Additional keyword arguments passed to the backend's ``open_datatree`` method. + + Returns + ------- + dtree : xarray.DataTree + DataTree with CfRadial2 group structure. + + Examples + -------- + >>> import xradar as xd + >>> dtree = xd.open_datatree("file.h5", engine="odim") + """ + if engine not in _ENGINE_REGISTRY: + supported = ", ".join(sorted(_ENGINE_REGISTRY)) + raise ValueError(f"Unknown engine {engine!r}. Supported engines: {supported}") + backend = _ENGINE_REGISTRY[engine]() + return backend.open_datatree(filename_or_obj, **kwargs) + + +def list_engines(): + """Return a sorted list of engine names registered with :func:`open_datatree`. + + Examples + -------- + >>> import xradar as xd + >>> "odim" in xd.io.list_engines() + True + """ + return sorted(_ENGINE_REGISTRY) + + __all__ = [s for s in dir() if not s.startswith("_")] diff --git a/xradar/io/backends/cfradial1.py b/xradar/io/backends/cfradial1.py index 81b8760d..8c75f262 100644 --- a/xradar/io/backends/cfradial1.py +++ b/xradar/io/backends/cfradial1.py @@ -51,8 +51,11 @@ ) from .common import ( _STATION_VARS, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, - _attach_sweep_groups, + _compose_docstring, + _deprecation_warning, _maybe_decode, ) @@ -337,6 +340,10 @@ def _get_radar_calibration(ds): def open_cfradial1_datatree(filename_or_obj, **kwargs): """Open CfRadial1 dataset as :py:class:`xarray.DataTree`. + .. deprecated:: + Use ``xd.open_datatree(file, engine="cfradial1")`` or + ``xr.open_datatree(file, engine="cfradial1")`` instead. + Parameters ---------- filename_or_obj : str, Path, file-like or xarray.DataStore @@ -369,47 +376,27 @@ def open_cfradial1_datatree(filename_or_obj, **kwargs): dtree: xarray.DataTree DataTree with CfRadial2 groups. """ + _deprecation_warning("open_cfradial1_datatree", "cfradial1") - # handle kwargs, extract first_dim + # Bridge old kwargs to direct kwargs first_dim = kwargs.pop("first_dim", "auto") optional = kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) - kwargs.pop("site_as_coords", None) + site_coords = kwargs.pop("site_as_coords", True) sweep = kwargs.pop("sweep", None) engine = kwargs.pop("engine", "netcdf4") - # needed for new xarray literal timedelta decoding - kwargs.update(decode_timedelta=kwargs.pop("decode_timedelta", False)) - - # open root group, cfradial1 only has one group - # open_cfradial1_datatree only opens the file once using netcdf4 - # and retrieves the different groups from the loaded object - ds = open_dataset(filename_or_obj, engine=engine, **kwargs) - - # create datatree root node additional root metadata groups - dtree: dict = { - "/": _get_required_root_dataset(ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration(ds) + kwargs.setdefault("decode_timedelta", False) - # radar_calibration (connected with calib-dimension) - dtree = _attach_sweep_groups( - dtree, - list( - _get_sweep_groups( - ds, - sweep=sweep, - first_dim=first_dim, - optional=optional, - site_as_coords=False, - ).values() - ), + return CfRadial1BackendEntrypoint().open_datatree( + filename_or_obj, + first_dim=first_dim, + optional=optional, + optional_groups=optional_groups, + site_coords=site_coords, + sweep=sweep, + engine=engine, + **kwargs, ) - return DataTree.from_dict(dtree) class CfRadial1BackendEntrypoint(BackendEntrypoint): @@ -434,6 +421,7 @@ class CfRadial1BackendEntrypoint(BackendEntrypoint): description = "Open CfRadial1 (.nc, .nc4) using netCDF4 in Xarray" url = "https://xradar.rtfd.io/en/latest/io.html#cfradial1" + supports_groups = True def open_dataset( self, @@ -492,3 +480,91 @@ def open_dataset( ds._close = store.close return ds + + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=False, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + optional=True, + optional_groups=False, + sweep=None, + engine="netcdf4", + ): + # CfRadial1 opens the entire file once + ds = open_dataset( + filename_or_obj, + engine=engine, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + + groups_dict = { + "/": _get_required_root_dataset(ds, optional=optional), + } + if optional_groups: + groups_dict["/radar_parameters"] = _get_subgroup( + ds, radar_parameters_subgroup + ) + groups_dict["/georeferencing_correction"] = _get_subgroup( + ds, georeferencing_correction_subgroup + ) + groups_dict["/radar_calibration"] = _get_radar_calibration(ds) + + sweep_datasets = list( + _get_sweep_groups( + ds, + sweep=sweep, + first_dim=first_dim, + optional=optional, + site_as_coords=site_coords, + ).values() + ) + + for i, sw_ds in enumerate(sweep_datasets): + # Drop station coords from per-sweep datasets — they live on root. + sw = sw_ds.drop_vars(_STATION_VARS, errors="ignore") + groups_dict[f"/sweep_{i}"] = sw.drop_attrs(deep=False) + + return groups_dict + + def open_datatree( + self, + filename_or_obj, + **kwargs, + ): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +_CFRADIAL1_PARAMS_DOC = """ + engine : {"netcdf4", "h5netcdf"}, optional + Underlying NetCDF engine used by ``xr.open_dataset`` to read the + CfRadial1 file. Defaults to ``"netcdf4"``. +""" + +CfRadial1BackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a CfRadial1 file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _CFRADIAL1_PARAMS_DOC, +) +CfRadial1BackendEntrypoint.open_datatree.__doc__ = ( + "Open a CfRadial1 file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) diff --git a/xradar/io/backends/cfradial2.py b/xradar/io/backends/cfradial2.py index 5ea6dbba..f7a514e3 100644 --- a/xradar/io/backends/cfradial2.py +++ b/xradar/io/backends/cfradial2.py @@ -27,7 +27,7 @@ """ -__all__ = ["open_cfradial2_datatree"] +__all__ = ["CfRadial2BackendEntrypoint", "open_cfradial2_datatree"] __doc__ = __doc__.format("\n ".join(__all__)) @@ -38,6 +38,7 @@ import numpy as np from xarray import DataTree, Variable, open_datatree +from xarray.backends import BackendEntrypoint from ...model import ( georeferencing_correction_subgroup, @@ -57,7 +58,12 @@ required_root_vars, sweep_vars_mapping, ) -from .common import _STATION_VARS, _apply_site_as_coords +from .common import ( + _STATION_VARS, + _apply_site_as_coords, + _compose_docstring, + _deprecation_warning, +) _ROOT_ATTR_RENAMES = { "RadarName": "instrument_name", @@ -177,6 +183,8 @@ def _iter_selected_sweeps(tree: DataTree, sweep: Any) -> list[str]: selected.append(f"sweep_{item}") else: selected.append(_normalize_sweep_name(item)) + if not selected: + raise ValueError("sweep list is empty.") return selected raise TypeError("sweep must be None, int, str or an iterable of ints/strings") @@ -454,42 +462,21 @@ def _normalize_subgroup(node: DataTree, mapping: dict[str, str | None]): return ds -def open_cfradial2_datatree( - filename_or_obj: str | PathLike[str], **kwargs: Any -) -> DataTree: - """Open a CfRadial2-like grouped dataset as :py:class:`xarray.DataTree`. - - The reader performs best-effort normalization of common CfRadial2/FM301 - naming and metadata differences. It is not a full FM301 validator. - - Parameters - ---------- - filename_or_obj : str or PathLike - Path or object understood by :py:func:`xarray.open_datatree`. - - Keyword Arguments - ----------------- - sweep : int, str, iterable, optional - Sweep selection. Defaults to all available sweeps. - first_dim : str - Can be ``time`` or ``auto``. Defaults to ``time``. - optional : bool - Keep optional root variables when available. Defaults to ``True``. - optional_groups : bool - Include root metadata subgroups if present. Defaults to ``False``. - **kwargs : dict - Additional keyword arguments passed to :py:func:`xarray.open_datatree`. +def _build_cfradial2_dtree_dict( + filename_or_obj: str | PathLike[str], + *, + sweep: Any = None, + first_dim: str = "time", + optional: bool = True, + optional_groups: bool = False, + **kwargs: Any, +) -> dict[str, Any]: + """Build the dict[str, Dataset] of normalized CfRadial2 groups. - Returns - ------- - xarray.DataTree - Normalized DataTree containing root metadata and sweep groups. + Used by :class:`CfRadial2BackendEntrypoint` to assemble the DataTree + before `DataTree.from_dict(...)` is applied. """ - sweep = kwargs.pop("sweep", None) - first_dim = kwargs.pop("first_dim", "time") - optional = kwargs.pop("optional", True) - optional_groups = kwargs.pop("optional_groups", False) - kwargs.update(decode_timedelta=kwargs.pop("decode_timedelta", False)) + kwargs.setdefault("decode_timedelta", False) with open_datatree(filename_or_obj, **kwargs) as tree: raw_sweep_names = [name for name in tree.children if name.startswith("sweep_")] @@ -534,18 +521,19 @@ def open_cfradial2_datatree( cleaned.attrs = {} dtree[f"sweep_{i}"] = cleaned - normalized = selected != output_names or any( + renamed = selected != output_names or any( name != _normalize_sweep_name(name) for name in raw_sweep_names ) - if normalized: + if renamed: warnings.warn( "CfRadial2 sweep groups were renumbered into sequential `sweep_` order.", UserWarning, stacklevel=2, ) - root_ds = dtree["/"] - missing_root = required_root_vars - set(root_ds.data_vars) - set(root_ds.coords) + missing_root = ( + required_root_vars - set(dtree["/"].data_vars) - set(dtree["/"].coords) + ) if missing_root: warnings.warn( "CfRadial2 reader could not fully normalize FM301 root variables; " @@ -554,4 +542,161 @@ def open_cfradial2_datatree( stacklevel=2, ) - return DataTree.from_dict(dtree) + return dtree + + +class CfRadial2BackendEntrypoint(BackendEntrypoint): + """Xarray BackendEntrypoint for CfRadial2/FM301 grouped datasets. + + Keyword Arguments + ----------------- + sweep : int, str, iterable, optional + Sweep selection. Defaults to all available sweeps. + first_dim : str + Can be ``time`` or ``auto``. Defaults to ``time``. + optional : bool + Keep optional root variables when available. Defaults to ``True``. + optional_groups : bool + Include root metadata subgroups if present. Defaults to ``False``. + kwargs : dict + Additional kwargs are fed to :py:func:`xarray.open_datatree`. + """ + + description = "Open CfRadial2/FM301 grouped datasets in Xarray" + url = "https://xradar.rtfd.io/en/latest/io.html#cfradial2" + supports_groups = True + + def open_dataset( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=False, + group="sweep_0", + first_dim="time", + optional=True, + ): + with open_datatree( + filename_or_obj, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) as tree: + # Map canonical sweep names (`sweep_2`) to actual node names + # (`sweep_02`, `sweep2`, ...), matching the DataTree path. + sweep_lookup = { + _normalize_sweep_name(name): name + for name in tree.children + if name.startswith("sweep") + } + source = sweep_lookup.get(group, group) + if source != "/" and source not in tree.children: + raise ValueError( + f"Group `{group}` missing from file `{filename_or_obj}`." + ) + ds = tree[source].to_dataset(inherit=True) + if group.startswith("sweep_"): + ds = _normalize_sweep_dataset( + ds, + _normalize_sweep_name(source), + first_dim=first_dim, + optional=optional, + ) + ds.load() + return ds + + def open_groups_as_dict( + self, + filename_or_obj, + *, + sweep=None, + first_dim="time", + optional=True, + optional_groups=False, + site_coords=True, + **kwargs, + ): + groups_dict = _build_cfradial2_dtree_dict( + filename_or_obj, + sweep=sweep, + first_dim=first_dim, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) + # CfRadial2 places station coords at root by default. Honor + # site_coords=False by dropping them, matching the per-sweep + # contract used by odim/gamic/cfradial1. + if not site_coords: + groups_dict["/"] = groups_dict["/"].drop_vars( + _STATION_VARS, errors="ignore" + ) + return groups_dict + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +_CFRADIAL2_PARAMS_DOC = """ + site_coords : bool, optional + Keep ``latitude``/``longitude``/``altitude`` as coordinates on + the root dataset. CfRadial2 stores station coords at root by + default; pass ``False`` to drop them. Defaults to ``True``. +""" + +CfRadial2BackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a CfRadial2/FM301 grouped file as a dict of normalized group datasets.\n" + " Best-effort normalization of common institutional variations is\n" + " applied so the result matches xradar's FM301-oriented layout.", + _CFRADIAL2_PARAMS_DOC, +) +CfRadial2BackendEntrypoint.open_datatree.__doc__ = ( + "Open a CfRadial2/FM301 grouped file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + +def open_cfradial2_datatree( + filename_or_obj: str | PathLike[str], **kwargs: Any +) -> DataTree: + """Open a CfRadial2-like grouped dataset as :py:class:`xarray.DataTree`. + + .. deprecated:: + Use ``xd.open_datatree(file, engine="cfradial2")`` or + ``xr.open_datatree(file, engine="cfradial2")`` instead. + + Parameters + ---------- + filename_or_obj : str or PathLike + Path or object understood by :py:func:`xarray.open_datatree`. + + Keyword Arguments + ----------------- + sweep : int, str, iterable, optional + Sweep selection. Defaults to all available sweeps. + first_dim : str + Can be ``time`` or ``auto``. Defaults to ``time``. + optional : bool + Keep optional root variables when available. Defaults to ``True``. + optional_groups : bool + Include root metadata subgroups if present. Defaults to ``False``. + **kwargs : dict + Additional keyword arguments passed to :py:func:`xarray.open_datatree`. + + Returns + ------- + xarray.DataTree + Normalized DataTree containing root metadata and sweep groups. + """ + _deprecation_warning("open_cfradial2_datatree", "cfradial2") + return CfRadial2BackendEntrypoint().open_datatree(filename_or_obj, **kwargs) diff --git a/xradar/io/backends/common.py b/xradar/io/backends/common.py index 8b3f4317..39d0f293 100644 --- a/xradar/io/backends/common.py +++ b/xradar/io/backends/common.py @@ -14,6 +14,8 @@ import io import struct +import textwrap +import warnings from collections import OrderedDict import h5netcdf @@ -21,8 +23,11 @@ import xarray as xr from ...model import ( + georeferencing_correction_subgroup, optional_root_attrs, optional_root_vars, + radar_calibration_subgroup, + radar_parameters_subgroup, required_global_attrs, required_root_vars, ) @@ -380,6 +385,220 @@ def _prepare_backend_ds(ds): return ds +def _build_groups_dict(ls_ds, optional=True, optional_groups=False): + """Build CfRadial2 groups dict from a list of sweep Datasets. + + Parameters + ---------- + ls_ds : list of xr.Dataset + List of sweep Datasets. + optional : bool + Import optional metadata, defaults to True. + optional_groups : bool + If True, includes ``/radar_parameters``, ``/georeferencing_correction`` + and ``/radar_calibration`` metadata subgroups. Default is False. + + Returns + ------- + groups_dict : dict[str, xr.Dataset] + Dictionary with CfRadial2 group structure. + """ + groups_dict = { + "/": _get_required_root_dataset(ls_ds, optional=optional), + } + if optional_groups: + groups_dict["/radar_parameters"] = _get_subgroup( + ls_ds, radar_parameters_subgroup + ) + groups_dict["/georeferencing_correction"] = _get_subgroup( + ls_ds, georeferencing_correction_subgroup + ) + groups_dict["/radar_calibration"] = _get_radar_calibration( + ls_ds, radar_calibration_subgroup + ) + for i, ds in enumerate(ls_ds): + sw = ds.drop_vars(_STATION_VARS, errors="ignore").drop_attrs(deep=False) + groups_dict[f"/sweep_{i}"] = sw + return groups_dict + + +def _deprecation_warning(old_name, engine): + """Emit FutureWarning for deprecated standalone open_*_datatree functions.""" + warnings.warn( + f"`{old_name}` is deprecated. Use " + f'`xd.open_datatree(file, engine="{engine}")` or ' + f'`xr.open_datatree(file, engine="{engine}")` instead.', + FutureWarning, + stacklevel=4, + ) + + +#: NumPy-style Parameters block shared across all `open_groups_as_dict` +#: methods. Backend-specific blocks are appended via :func:`_compose_docstring`. +#: The CF decoder kwargs (`mask_and_scale`, `decode_times`, ...) thread +#: through to :py:func:`xarray.open_dataset`; see xarray's documentation for +#: full semantics. +COMMON_BACKEND_PARAMS_DOC = """ +Parameters +---------- +filename_or_obj : str, Path, or file-like + Path or file-like object understood by the underlying reader. +mask_and_scale : bool or dict-like, optional + Replace fill values with NA and apply ``scale_factor``/``add_offset`` + decoding. See :py:func:`xarray.open_dataset`. Defaults to ``True``. +decode_times : bool or dict-like, optional + Decode CF time variables (calendar, units) into ``np.datetime64``. + Defaults to ``True``. +concat_characters : bool or dict-like, optional + Concatenate character arrays into strings along their trailing + dimension. Defaults to ``True``. +decode_coords : bool or {"coordinates", "all"}, optional + Decode the CF ``coordinates`` attribute. Defaults to ``True`` + (equivalent to ``"coordinates"``). +drop_variables : str or iterable of str, optional + Names of variables to drop before processing. +use_cftime : bool, optional + Force ``cftime`` decoding for time variables (instead of + ``np.datetime64``). Defaults to ``None`` (auto). +decode_timedelta : bool, optional + Decode CF timedelta variables. Default mirrors ``decode_times`` + unless the backend overrides it (cfradial1, cfradial2, and imd + default to ``False``). +sweep : int, str, or list of int/str, optional + Sweep selection. ``None`` (default) returns all sweeps. An ``int`` + or ``"sweep_N"`` string returns one sweep; a list returns the + named subset. +first_dim : {"auto", "time"}, optional + Leading dimension of each sweep dataset. ``"auto"`` picks + ``azimuth`` (PPI) or ``elevation`` (RHI); ``"time"`` keeps the + raw time axis. Default ``"auto"`` (``"time"`` for cfradial2). +optional : bool, optional + Include optional root variables when available. Defaults to ``True``. +optional_groups : bool, optional + Include the ``/radar_parameters``, ``/georeferencing_correction``, + and ``/radar_calibration`` metadata subgroups under the root. + Defaults to ``False``. +""" + + +#: Reindex/angle parameter block — shared by backends that resample +#: rays onto a regular angular grid (odim, gamic, nexrad, cfradial1, +#: iris, furuno, uf). +REINDEX_PARAMS_DOC = """ +reindex_angle : bool or dict, optional + Resample rays onto a regular angular grid when truthy. A dict is + passed as kwargs to :func:`xradar.util.reindex_angle` (e.g. + ``{"start_angle": 0.0, "stop_angle": 360.0, "angle_res": 1.0}``). + Only invoked when ``decode_coords=True``. Defaults to ``False``. +fix_second_angle : bool, optional + Correct erroneous secondary-angle values (azimuth on RHI, + elevation on PPI). Only effective with ``first_dim="auto"``. + Defaults to ``False``. +""" + +#: Site-coordinate parameter block. Most multi-sweep backends spell this +#: `site_coords`; IMD uses the legacy `site_as_coords`. +SITE_COORDS_PARAM_DOC = """ +site_coords : bool, optional + Attach ``latitude``/``longitude``/``altitude`` as coordinates on + the root dataset (and on per-sweep datasets where the backend + supports it). Defaults to ``True``. +""" + +#: HDF5/h5netcdf options shared by ODIM, GAMIC, HPL, Metek. +HDF5_PARAMS_DOC = """ +format : str, optional + h5netcdf format string. Defaults to ``None``. +invalid_netcdf : bool, optional + Accept HDF5 files that are not strictly NetCDF-conformant. +phony_dims : {"access", "sort", None}, optional + How h5netcdf labels unnamed dimensions. Defaults to ``"access"``. +decode_vlen_strings : bool, optional + Decode variable-length strings stored in HDF5. Defaults to ``True``. +""" + +#: Reader-lock parameter shared by NEXRAD, IRIS, UF. +LOCK_PARAM_DOC = """ +lock : threading.Lock or None, optional + Reader lock for thread-safe access. Defaults to ``None``. +""" + + +def _compose_docstring(summary, *extra_blocks): + """Compose a NumPy-style docstring from a summary plus parameter blocks. + + The composed result always opens with the shared + :data:`COMMON_BACKEND_PARAMS_DOC` Parameters block and closes with a + fixed Returns section. Per-backend blocks (e.g. :data:`HDF5_PARAMS_DOC`, + :data:`REINDEX_PARAMS_DOC`) are inserted between the common block and + the Returns section in the order given. + + Each block is independently de-indented and re-indented with four + spaces, so block authors do not need to keep the indentation in sync + by hand — write a block at any indent level and this helper + normalises it. + + Parameters + ---------- + summary : str + One-paragraph summary that opens the docstring. + *extra_blocks : str + Optional backend-specific parameter blocks. Each may use any + indentation; the helper normalises them to four-space indent. + + Returns + ------- + str + Complete docstring suitable for ``method.__doc__ = ...``. + """ + + def _block(text): + return textwrap.indent(textwrap.dedent(text).strip("\n"), " ") + + parts = [summary.strip("\n"), "", _block(COMMON_BACKEND_PARAMS_DOC)] + for block in extra_blocks: + if block: + parts.append(_block(block)) + returns_body = ( + "dict[str, xarray.Dataset]\n" + " CfRadial2 group paths (``/``, ``/sweep_N``, optional\n" + " ``/radar_parameters`` etc.) mapped to their datasets,\n" + " ready for :py:meth:`xarray.DataTree.from_dict`." + ) + parts += ["", " Returns", " -------", _block(returns_body)] + return "\n".join(parts) + "\n" + + +def _resolve_sweeps(sweep, discover_fn): + """Normalise the sweep parameter into a list of sweep group names. + + Parameters + ---------- + sweep : int, str, list, or None + User-supplied sweep selection. + discover_fn : callable + Zero-arg function returning all sweep group names for the file. + + Returns + ------- + list[str] + List of sweep group name strings. + """ + if isinstance(sweep, str): + return [sweep] + if isinstance(sweep, int): + return [f"sweep_{sweep}"] + if isinstance(sweep, list): + if not sweep: + raise ValueError("sweep list is empty.") + if isinstance(sweep[0], int): + return [f"sweep_{i}" for i in sweep] + return list(sweep) + if sweep is None: + return discover_fn() + raise TypeError(f"Unsupported sweep type: {type(sweep)}") + + # IRIS Data Types and corresponding python struct format characters # 4.2 Scalar Definitions, Page 23 # https://docs.python.org/3/library/struct.html#format-characters diff --git a/xradar/io/backends/datamet.py b/xradar/io/backends/datamet.py index 9fb823e4..a5f27cf5 100644 --- a/xradar/io/backends/datamet.py +++ b/xradar/io/backends/datamet.py @@ -29,7 +29,6 @@ from datetime import datetime, timedelta import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import CachingFileManager @@ -40,7 +39,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -49,16 +47,16 @@ get_range_attrs, get_time_attrs, moment_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _compose_docstring, + _deprecation_warning, + _resolve_sweeps, ) #: mapping from DataMet names to CfRadial2/ODIM @@ -383,6 +381,7 @@ class DataMetBackendEntrypoint(BackendEntrypoint): description = "Open DataMet files in Xarray" url = "https://xradar.rtfd.io/latest/io.html#datamet-data-i-o" + supports_groups = True def open_dataset( self, @@ -450,84 +449,87 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + first_dim="auto", + reindex_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + def _discover(): + dmet = DataMetFile(filename_or_obj) + return [f"sweep_{i}" for i in range(dmet.scan_metadata["elevation_number"])] + + sweeps = _resolve_sweeps(sweep, _discover) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + first_dim=first_dim, + reindex_angle=reindex_angle, + site_as_coords=site_coords, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +DataMetBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a DataMet (Servizio Meteorologico Italiano) ``.tar.gz`` archive as a\n" + " CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +DataMetBackendEntrypoint.open_datatree.__doc__ = ( + "Open a DataMet archive as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + def open_datamet_datatree(filename_or_obj, **kwargs): """Open DataMet dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="datamet")`` instead. """ - # handle kwargs, extract first_dim - backend_kwargs = kwargs.pop("backend_kwargs", {}) + _deprecation_warning("open_datamet_datatree", "datamet") + + kwargs.pop("backend_kwargs", {}) optional = kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) - kwargs["backend_kwargs"] = backend_kwargs - sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - else: - sweeps.extend(sweep) - else: - # Get number of sweeps from data - dmet = DataMetFile(filename_or_obj) - sweeps = [ - f"sweep_{i}" for i in range(0, dmet.scan_metadata["elevation_number"]) - ] + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset( - filename_or_obj, group=swp, engine=DataMetBackendEntrypoint, **kw - ) - for swp in sweeps - ] - - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + return DataMetBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/furuno.py b/xradar/io/backends/furuno.py index db0bda18..eaee3a74 100644 --- a/xradar/io/backends/furuno.py +++ b/xradar/io/backends/furuno.py @@ -46,7 +46,6 @@ import lat_lon_parser import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import CachingFileManager @@ -57,7 +56,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -67,22 +65,23 @@ get_time_attrs, moment_attrs, radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( + REINDEX_PARAMS_DOC, SINT2, SINT4, + SITE_COORDS_PARAM_DOC, UINT1, UINT2, UINT4, _apply_site_as_coords, - _attach_sweep_groups, + _build_groups_dict, _calculate_angle_res, + _compose_docstring, + _deprecation_warning, _get_fmt_string, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _resolve_sweeps, _unpack_dictionary, ) @@ -707,6 +706,7 @@ class FurunoBackendEntrypoint(BackendEntrypoint): description = "Open FURUNO (.scn, .scnx) in Xarray" url = "https://xradar.rtfd.io/en/latest/io.html#furuno-binary-data" + supports_groups = True def open_dataset( self, @@ -779,58 +779,93 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + obsmode=None, + ): + sweeps = _resolve_sweeps(sweep, lambda: ["sweep_0"]) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + obsmode=obsmode, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +_FURUNO_PARAMS_DOC = """ + obsmode : int or None, optional + Override the file's observation-mode flag (rare; only needed for + files written by older firmware). Defaults to ``None``. +""" + +FurunoBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Furuno SCN or SCNX file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _FURUNO_PARAMS_DOC, +) +FurunoBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Furuno SCN or SCNX file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + def open_furuno_datatree(filename_or_obj, **kwargs): """Open FURUNO dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="furuno")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_furuno_datatree", "furuno") + backend_kwargs = kwargs.pop("backend_kwargs", {}) optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) - kwargs["backend_kwargs"] = backend_kwargs + sweep = kwargs.pop("sweep", None) + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - ls_ds = [xr.open_dataset(filename_or_obj, engine="furuno", **kwargs)] - - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + return FurunoBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/gamic.py b/xradar/io/backends/gamic.py index 540f5b11..19925019 100644 --- a/xradar/io/backends/gamic.py +++ b/xradar/io/backends/gamic.py @@ -38,7 +38,6 @@ import dateutil import h5netcdf import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import ( AbstractDataStore, @@ -54,26 +53,27 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_azimuth_attrs, get_elevation_attrs, get_time_attrs, moment_attrs, optional_root_attrs, radar_calibration_subgroup, - radar_parameters_subgroup, required_global_attrs, sweep_vars_mapping, ) from .common import ( + HDF5_PARAMS_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, - _attach_sweep_groups, + _build_groups_dict, + _compose_docstring, + _deprecation_warning, _fix_angle, _get_h5group_names, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, _prepare_backend_ds, + _resolve_sweeps, ) from .odim import H5NetCDFArrayWrapper, _get_h5netcdf_encoding, _H5NetCDFMetadata @@ -407,6 +407,7 @@ class GamicBackendEntrypoint(BackendEntrypoint): description = "Open GAMIC HDF5 (.h5, .hdf5, .mvol) using h5netcdf in Xarray" url = "https://xradar.rtfd.io/en/latest/io.html#gamic-hdf5" + supports_groups = True def open_dataset( self, @@ -495,76 +496,95 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + invalid_netcdf=None, + phony_dims="access", + decode_vlen_strings=True, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps( + sweep, lambda: _get_h5group_names(filename_or_obj, "gamic") + ) -def open_gamic_datatree(filename_or_obj, **kwargs): - """Open GAMIC HDF5 dataset as :py:class:`xarray.DataTree`. + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + format=format, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + ) - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) - Returns - ------- - dtree: xarray.DataTree - DataTree + +GamicBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a GAMIC HDF5 file as a CfRadial2-shaped dict of group datasets.", + HDF5_PARAMS_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +GamicBackendEntrypoint.open_datatree.__doc__ = ( + "Open a GAMIC HDF5 file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + +def open_gamic_datatree(filename_or_obj, **kwargs): + """Open GAMIC HDF5 dataset as :py:class:`xarray.DataTree`. + + .. deprecated:: + Use ``xd.open_datatree(file, engine="gamic")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_gamic_datatree", "gamic") + backend_kwargs = kwargs.pop("backend_kwargs", {}) + # Capital-O "Optional" is the legacy GAMIC convention optional = backend_kwargs.pop("Optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_h5group_names(filename_or_obj, "gamic") - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="gamic", **kw) - for swp in sweeps - ] - - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") + + return GamicBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/hpl.py b/xradar/io/backends/hpl.py index 3bc17f1e..08871e9d 100644 --- a/xradar/io/backends/hpl.py +++ b/xradar/io/backends/hpl.py @@ -45,21 +45,20 @@ from xarray.core.utils import FrozenDict from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, get_latitude_attrs, get_longitude_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, ) from .common import ( + HDF5_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _compose_docstring, + _deprecation_warning, + _resolve_sweeps, ) variable_attr_dict = {} @@ -516,6 +515,7 @@ class HPLBackendEntrypoint(BackendEntrypoint): description = "Backend for reading Halo Photonics Doppler lidar processed data" url = "https://xradar.rtfd.io/en/latest/io.html#metek" + supports_groups = True def open_dataset( self, @@ -539,8 +539,8 @@ def open_dataset( latitude=0, longitude=0, altitude=0, - transition_threshold_azi=0.05, - transition_threshold_el=0.001, + transition_threshold_azi=0.01, + transition_threshold_el=0.005, ): store_entrypoint = StoreBackendEntrypoint() @@ -591,8 +591,113 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + invalid_netcdf=None, + phony_dims="access", + decode_vlen_strings=True, + first_dim="auto", + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + latitude=0, + longitude=0, + altitude=0, + transition_threshold_azi=0.01, + transition_threshold_el=0.005, + ): + # For multi-sweep HPL files, `HplFile._data["sweep_0"]` is a junk + # lead-in entry (internal `sweep_number == -1`); the real first + # sweep lives at `sweep_1`. Shift integer-style selections by +1 + # so `sweep=0` maps to the real first sweep. Single-sweep files + # don't have the junk entry, so no shift is needed. + if isinstance(sweep, (int, list)): + with HplFile(filename_or_obj) as fh: + n_sweeps = len(fh.data["sweep_number"]) + if n_sweeps > 1: + if isinstance(sweep, int): + sweep = sweep + 1 + elif sweep and isinstance(sweep[0], int): + sweep = [i + 1 for i in sweep] + + sweeps = _resolve_sweeps(sweep, lambda: _get_hpl_group_names(filename_or_obj)) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + format=format, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, + first_dim=first_dim, + site_as_coords=site_coords, + latitude=latitude, + longitude=longitude, + altitude=altitude, + transition_threshold_azi=transition_threshold_azi, + transition_threshold_el=transition_threshold_el, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + groups_dict = _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + # HPL root uses "fixed_angle" instead of "sweep_fixed_angle" + root = groups_dict["/"] + if "sweep_fixed_angle" in root: + groups_dict["/"] = root.rename({"sweep_fixed_angle": "fixed_angle"}) + return groups_dict + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +_HPL_PARAMS_DOC = """ +latitude : float, optional + Override the site latitude (HPL files often lack geolocation). +longitude : float, optional + Override the site longitude. +altitude : float, optional + Override the site altitude above sea level (meters). +transition_threshold_azi : float, optional + Azimuth-jump threshold (deg) for sweep boundary detection. +transition_threshold_el : float, optional + Elevation-jump threshold (deg) for sweep boundary detection. +""" + +HPLBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Halo Photonics Stream Line (.hpl) lidar file as a\n" + " CfRadial2-shaped dict of group datasets.", + HDF5_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _HPL_PARAMS_DOC, +) +HPLBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Halo Photonics .hpl file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) -def _get_h5group_names(filename_or_obj): + +def _get_hpl_group_names(filename_or_obj): store = HplStore.open(filename_or_obj) return [f"sweep_{i}" for i in store.root.data["sweep_number"]] @@ -600,74 +705,23 @@ def _get_h5group_names(filename_or_obj): def open_hpl_datatree(filename_or_obj, **kwargs): """Open Halo Photonics processed Doppler lidar dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="hpl")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_hpl_datatree", "hpl") + backend_kwargs = kwargs.pop("backend_kwargs", {}) - optional = backend_kwargs.pop("optional", None) + optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i + 1}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_h5group_names(filename_or_obj) - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="hpl", **kw) - for swp in sweeps - ] + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional).rename( - {"sweep_fixed_angle": "fixed_angle"} - ), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + return HPLBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/imd.py b/xradar/io/backends/imd.py index c6ae82bf..fc091b84 100644 --- a/xradar/io/backends/imd.py +++ b/xradar/io/backends/imd.py @@ -64,6 +64,7 @@ from .common import ( _STATION_VARS, _apply_site_as_coords, + _compose_docstring, _get_subgroup, ) @@ -421,6 +422,10 @@ class IMDBackendEntrypoint(BackendEntrypoint): "Open India Meteorological Department (IMD) radar NetCDF files in Xarray" ) url = "https://xradar.rtfd.io/en/latest/io.html#imd" + # True even though IMD files contain no native groups: enables + # `xr.open_datatree(file, engine="imd")` to materialize the synthetic + # `/` + `/sweep_0` CfRadial2 layout from the single-sweep file. + supports_groups = True def open_dataset( self, @@ -461,6 +466,71 @@ def open_dataset( ds._close = store.close return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=False, + first_dim="auto", + reindex_angle=False, + site_as_coords=True, + optional_groups=False, + **kwargs, + ): + """Open a single IMD sweep file as a dict of CfRadial2 group datasets. + + Single-file only. For multi-file IMD volumes (one sweep per file), + use :func:`open_imd_datatree` with a list of paths. + """ + return _build_single_imd_dtree_dict( + filename_or_obj, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + first_dim=first_dim, + reindex_angle=reindex_angle, + site_as_coords=site_as_coords, + optional_groups=optional_groups, + **kwargs, + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +_IMD_PARAMS_DOC = """ + reindex_angle : bool or dict, optional + Resample rays onto a regular angular grid. See + :func:`xradar.util.reindex_angle`. Defaults to ``False``. + site_as_coords : bool, optional + Attach ``latitude``/``longitude``/``altitude`` as coords on the + sweep dataset. (Note: IMD uses the legacy ``site_as_coords`` + spelling rather than ``site_coords`` — kept for backward + compatibility.) Defaults to ``True``. +""" + +IMDBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a single IMD (India Meteorological Department) NetCDF file as a\n" + " CfRadial2-shaped dict of group datasets. Single-file only — for\n" + " multi-file IMD volumes use :func:`open_imd_datatree`.", + _IMD_PARAMS_DOC, +) +IMDBackendEntrypoint.open_datatree.__doc__ = ( + "Open a single IMD NetCDF file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + def _read_imd_sweep(filename, first_dim="auto", reindex_angle=False, **kwargs): """Open one IMD file and return a CfRadial2 sweep Dataset. @@ -468,17 +538,21 @@ def _read_imd_sweep(filename, first_dim="auto", reindex_angle=False, **kwargs): Avoids the xarray entrypoint registry so this works even when the ``imd`` engine has not been installed via pip entrypoints. """ - ds = xr.open_dataset( + raw = xr.open_dataset( filename, engine="netcdf4", decode_timedelta=kwargs.pop("decode_timedelta", False), **kwargs, ) - ds = _conform_imd_sweep(ds, first_dim=first_dim, site_as_coords=False) + # Preserve the file-handle closer across the rename/assign chain so the + # returned dataset can be closed by the caller. + close = raw._close + ds = _conform_imd_sweep(raw, first_dim=first_dim, site_as_coords=False) if reindex_angle is not False: ds = ds.pipe(util.remove_duplicate_rays) ds = ds.pipe(util.reindex_angle, **reindex_angle) ds = ds.pipe(util.ipol_time, **reindex_angle) + ds.set_close(close) return ds @@ -541,7 +615,7 @@ def _build_imd_root(sweeps): return root -def _open_single_imd_datatree( +def _build_single_imd_dtree_dict( filename, first_dim="auto", reindex_angle=False, @@ -549,7 +623,7 @@ def _open_single_imd_datatree( optional_groups=False, **kwargs, ): - """Build a single-sweep CfRadial2 DataTree from one IMD NetCDF file.""" + """Build the dict[str, Dataset] for a single-sweep IMD volume.""" sweep_ds = _read_imd_sweep( filename, first_dim=first_dim, reindex_angle=reindex_angle, **kwargs ) @@ -571,7 +645,12 @@ def _open_single_imd_datatree( sw = _apply_site_as_coords(sw, site_as_coords) sw.attrs = {} dtree["/sweep_0"] = sw - return DataTree.from_dict(dtree) + return dtree + + +def _open_single_imd_datatree(filename, **kwargs): + """Build a single-sweep CfRadial2 DataTree from one IMD NetCDF file.""" + return DataTree.from_dict(_build_single_imd_dtree_dict(filename, **kwargs)) def open_imd_datatree(filename_or_obj, **kwargs): @@ -587,6 +666,15 @@ def open_imd_datatree(filename_or_obj, **kwargs): sweeps by time and supports ``time_coverage_start``, ``time_coverage_end``, ``min_angle``, ``max_angle`` filtering. + .. note:: + + When opening a single IMD sweep file as a DataTree, prefer + ``xd.open_datatree(file, engine="imd")`` (or the xarray-native + ``xr.open_datatree(file, engine="imd")``). This function remains + the documented API for the multi-file path because IMD volumes + span multiple files, which the ``engine="imd"`` registry entry + does not support. + To split a directory of mixed-volume files into per-volume groups, use :func:`group_imd_files` first:: diff --git a/xradar/io/backends/iris.py b/xradar/io/backends/iris.py index 6c860a67..160d2e7f 100644 --- a/xradar/io/backends/iris.py +++ b/xradar/io/backends/iris.py @@ -43,7 +43,6 @@ from collections import OrderedDict import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import CachingFileManager @@ -55,7 +54,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -63,16 +61,17 @@ get_longitude_attrs, get_range_attrs, moment_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( + LOCK_PARAM_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _compose_docstring, + _deprecation_warning, + _resolve_sweeps, ) IRIS_LOCK = SerializableLock() @@ -3991,6 +3990,7 @@ class IrisBackendEntrypoint(BackendEntrypoint): description = "Open IRIS/Sigmet files in Xarray" url = "https://xradar.rtfd.io/latest/io.html#iris-sigmet-data-i-o" + supports_groups = True def open_dataset( self, @@ -4068,75 +4068,96 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + group=None, + lock=None, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps(sweep, lambda: _get_iris_group_names(filename_or_obj)) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + lock=lock, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +_IRIS_PARAMS_DOC = """ +group : str or None, optional + Specific Iris product group to open (``ingest_data`` / + ``raw_product`` etc.). Defaults to all sweep groups. +""" + +IrisBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open an Iris/Sigmet RAW file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _IRIS_PARAMS_DOC, + LOCK_PARAM_DOC, +) +IrisBackendEntrypoint.open_datatree.__doc__ = ( + "Open an Iris/Sigmet RAW file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + def open_iris_datatree(filename_or_obj, **kwargs): """Open Iris/Sigmet dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="iris")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_iris_datatree", "iris") + backend_kwargs = kwargs.pop("backend_kwargs", {}) - optional = kwargs.pop("optional", True) + # Capital-O "Optional" is legacy convention from original API + optional = backend_kwargs.pop("Optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{sw}" for sw in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_iris_group_names(filename_or_obj) + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="iris", **kw) - for swp in sweeps - ] - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + return IrisBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/metek.py b/xradar/io/backends/metek.py index ef5d530a..593ae537 100644 --- a/xradar/io/backends/metek.py +++ b/xradar/io/backends/metek.py @@ -33,21 +33,20 @@ from xarray.core.utils import FrozenDict from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, get_latitude_attrs, get_longitude_attrs, get_time_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, ) from .common import ( - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + HDF5_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _build_groups_dict, + _compose_docstring, + _deprecation_warning, + _resolve_sweeps, ) __all__ = [ @@ -577,6 +576,7 @@ class MRRBackendEntrypoint(BackendEntrypoint): description = "Backend for reading Metek MRR2 processed and raw data" url = "https://xradar.rtfd.io/en/latest/io.html#metek" + supports_groups = True def open_dataset( self, @@ -630,75 +630,90 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + invalid_netcdf=None, + phony_dims="access", + decode_vlen_strings=True, + first_dim="auto", + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps(sweep, lambda: ["sweep_0"]) -def open_metek_datatree(filename_or_obj, **kwargs): - """Open Metek MRR2 dataset as :py:class:`xarray.DataTree`. + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + format=format, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, + first_dim=first_dim, + site_as_coords=site_coords, + ) - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) - Returns - ------- - dtree: xarray.DataTree - DataTree + +MRRBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Metek MRR2 (.ave/.pro/.raw) file as a CfRadial2-shaped dict of\n" + " group datasets. MRR2 is a vertically pointing radar — the output\n" + " contains one ``sweep_0`` group with all profiles.", + HDF5_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +MRRBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Metek MRR2 file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + +def open_metek_datatree(filename_or_obj, **kwargs): + """Open Metek MRR2 dataset as :py:class:`xarray.DataTree`. + + .. deprecated:: + Use ``xd.open_datatree(file, engine="metek")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_metek_datatree", "metek") + backend_kwargs = kwargs.pop("backend_kwargs", {}) optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i + 1}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = ["sweep_0"] - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="metek", **kw) - for swp in sweeps - ].copy() - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") + + return MRRBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/nexrad_level2.py b/xradar/io/backends/nexrad_level2.py index 7d27c7b6..ac3d261e 100644 --- a/xradar/io/backends/nexrad_level2.py +++ b/xradar/io/backends/nexrad_level2.py @@ -55,10 +55,17 @@ from xradar import util from xradar.io.backends.common import ( + _STATION_VARS, + LOCK_PARAM_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _assign_root, + _compose_docstring, + _deprecation_warning, _get_radar_calibration, _get_subgroup, + _resolve_sweeps, ) from xradar.model import ( georeferencing_correction_subgroup, @@ -1153,6 +1160,24 @@ def _check_record(self): } +def _sweep_attrs_from_msg5_elev(elev): + """Build the per-sweep attrs dict from one MSG_5_ELEV entry (ICD Table XI).""" + wf = elev.get("waveform_type", 0) + ch = elev.get("channel_config", 0) + sup = elev.get("supplemental_data_decoded", {}) + return { + "waveform_type": _WAVEFORM_TYPES.get(wf, str(wf)), + "channel_config": _CHANNEL_CONFIGS.get(ch, str(ch)), + "super_resolution": elev.get("super_resolution", 0), + "sails_cut": sup.get("sails_cut", False), + "sails_sequence_number": sup.get("sails_sequence_number", 0), + "mrle_cut": sup.get("mrle_cut", False), + "mrle_sequence_number": sup.get("mrle_sequence_number", 0), + "mpda_cut": sup.get("mpda_cut", False), + "base_tilt_cut": sup.get("base_tilt_cut", False), + } + + def _assign_sweep_attrs(dtree, elev_data): """Inject per-sweep attrs from MSG_5_ELEV data onto sweep nodes. @@ -1165,22 +1190,7 @@ def _assign_sweep_attrs(dtree, elev_data): sweep_key = f"sweep_{i}" if sweep_key not in dtree.children: continue - wf = elev.get("waveform_type", 0) - ch = elev.get("channel_config", 0) - sup = elev.get("supplemental_data_decoded", {}) - dtree[sweep_key].ds.attrs.update( - { - "waveform_type": _WAVEFORM_TYPES.get(wf, str(wf)), - "channel_config": _CHANNEL_CONFIGS.get(ch, str(ch)), - "super_resolution": elev.get("super_resolution", 0), - "sails_cut": sup.get("sails_cut", False), - "sails_sequence_number": sup.get("sails_sequence_number", 0), - "mrle_cut": sup.get("mrle_cut", False), - "mrle_sequence_number": sup.get("mrle_sequence_number", 0), - "mpda_cut": sup.get("mpda_cut", False), - "base_tilt_cut": sup.get("base_tilt_cut", False), - } - ) + dtree[sweep_key].ds.attrs.update(_sweep_attrs_from_msg5_elev(elev)) def _get_dynamic_scan_type(supplemental): @@ -1893,6 +1903,7 @@ class NexradLevel2BackendEntrypoint(BackendEntrypoint): description = "Open NEXRAD Level2 files in Xarray" url = "tbd" + supports_groups = True def open_dataset( self, @@ -1964,6 +1975,179 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + sweep=None, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + optional=True, + optional_groups=False, + incomplete_sweep="drop", + lock=None, + **kwargs, + ): + from xarray.core.treenode import NodePath + + # Handle list/tuple of chunk files or bytes + if isinstance(filename_or_obj, (list, tuple)): + filename_or_obj = _concatenate_chunks(filename_or_obj) + if not filename_or_obj[:4].startswith(_VOLUME_HEADER_PREFIX): + raise ValueError( + "No chunk contains a volume header (AR2V prefix). " + "The first chunk must be the S file (volume scan start) " + "which contains the volume header and metadata." + ) + + # Single metadata read. Reading incomplete_sweeps triggers + # data_header parsing and populates nex.data — needed for + # present_keys below. + with NEXRADLevel2File(filename_or_obj, loaddata=False) as nex: + incomplete = nex.incomplete_sweeps + # Use sparse sweep keys: upstream-dropped interior cuts leave + # gaps like [0..9, 11] that range(act_sweeps) would mis-index. + # See #361. + present_keys = sorted(nex.data) + act_sweeps = len(present_keys) + elev_data = nex.msg_5.get("elevation_data", []) if nex.msg_5 else [] + + # Normalise NodePath strings before resolving sweeps + if isinstance(sweep, str): + sweep = NodePath(sweep).name + elif isinstance(sweep, list) and sweep: + if isinstance(sweep[0], str): + sweep = [NodePath(i).name for i in sweep] + elif not isinstance(sweep[0], int): + raise ValueError( + "Invalid type in 'sweep' list. Expected integers " + "(e.g., [0, 1, 2]) or strings " + "(e.g. [/sweep_0, sweep_1])." + ) + + if sweep is not None: + sweeps = _resolve_sweeps( + sweep, + lambda: [f"sweep_{i}" for i in present_keys], + ) + else: + if incomplete_sweep == "drop": + sweeps = [f"sweep_{i}" for i in present_keys if i not in incomplete] + if incomplete: + warnings.warn( + f"Dropped {len(incomplete)} incomplete sweep(s): " + f"{sorted(incomplete)}. Use incomplete_sweep='pad' " + f"to include them with NaN-filled rays.", + UserWarning, + stacklevel=2, + ) + if not sweeps: + warnings.warn( + "All sweeps are incomplete. Returning empty dict.", + UserWarning, + stacklevel=2, + ) + return {"/": xr.Dataset()} + elif incomplete_sweep == "pad": + sweeps = [f"sweep_{i}" for i in present_keys] + else: + raise ValueError( + f"Invalid incomplete_sweep={incomplete_sweep!r}. " + "Expected 'drop' or 'pad'." + ) + + # For pad mode, pass incomplete set to open_sweeps_as_dict + incomplete_sweeps = incomplete if incomplete_sweep == "pad" else set() + + sweep_dict = open_sweeps_as_dict( + filename_or_obj=filename_or_obj, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + sweeps=sweeps, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + optional=optional, + incomplete_sweeps=incomplete_sweeps, + lock=lock, + **kwargs, + ) + + ls_ds = [sweep_dict[s] for s in sweep_dict] + ls_ds_with_root = [xr.Dataset()] + list(ls_ds) + root, ls_ds_with_root = _assign_root(ls_ds_with_root) + # Per ICD, total cuts actually recorded in the file (MSG_31 headers), + # not user selection. Used downstream to detect AVSET truncation. + root.attrs["actual_elevation_cuts"] = act_sweeps + groups_dict = { + "/": root, + } + if optional_groups: + groups_dict["/radar_parameters"] = _get_subgroup( + ls_ds_with_root, radar_parameters_subgroup + ) + groups_dict["/georeferencing_correction"] = _get_subgroup( + ls_ds_with_root, georeferencing_correction_subgroup + ) + groups_dict["/radar_calibration"] = _get_radar_calibration( + ls_ds_with_root, radar_calibration_subgroup + ) + # Inject per-sweep attrs from MSG_5_ELEV (ICD Table XI). The elev_data + # index aligns with sweep_{i} because both order by VCP cut index. + for sweep_path, ds in sweep_dict.items(): + sw = ds.drop_vars(_STATION_VARS, errors="ignore").drop_attrs(deep=False) + sweep_idx = int(sweep_path.split("_")[-1]) + if 0 <= sweep_idx < len(elev_data): + sw.attrs.update(_sweep_attrs_from_msg5_elev(elev_data[sweep_idx])) + groups_dict[f"/{sweep_path}"] = sw + return groups_dict + + def open_datatree( + self, + filename_or_obj, + **kwargs, + ): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +_NEXRAD_PARAMS_DOC = """ +incomplete_sweep : {"drop", "pad"}, optional + How to handle sweeps with fewer rays than the VCP nominal count. + ``"drop"`` (default) excludes them with a UserWarning; ``"pad"`` + keeps them with NaN-filled rays so the reindexed azimuth grid is + complete. +""" + +NexradLevel2BackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a NEXRAD Level II file as a CfRadial2-shaped dict of group datasets.\n" + " Accepts a single file path, a bytes buffer, or a list/tuple of LDM\n" + " chunk paths (the first chunk must hold the AR2V volume header).", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _NEXRAD_PARAMS_DOC, + LOCK_PARAM_DOC, +) +NexradLevel2BackendEntrypoint.open_datatree.__doc__ = ( + "Open a NEXRAD Level II file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + def open_nexradlevel2_datatree( filename_or_obj, @@ -2074,84 +2258,15 @@ def open_nexradlevel2_datatree( dtree : xarray.DataTree An `xarray.DataTree` representing the radar data organized by sweeps. """ - from xarray.core.treenode import NodePath - - # Handle list/tuple of chunk files or bytes - if isinstance(filename_or_obj, (list, tuple)): - filename_or_obj = _concatenate_chunks(filename_or_obj) - # Validate that the concatenated data starts with a volume header. - # The first chunk must be the S file (volume scan start). - if not filename_or_obj[:4].startswith(_VOLUME_HEADER_PREFIX): - raise ValueError( - "No chunk contains a volume header (AR2V prefix). " - "The first chunk must be the S file (volume scan start) which " - "contains the volume header and metadata. I/E chunks alone " - "cannot be decoded without it." - ) + _deprecation_warning("open_nexradlevel2_datatree", "nexradlevel2") - # Single metadata read for sweep count, completeness, and elevation data - with NEXRADLevel2File(filename_or_obj, loaddata=False) as nex: - # Reading incomplete_sweeps also triggers data_header parsing, - # populating nex.data. Must run before sorted(nex.data) below. - incomplete = nex.incomplete_sweeps - # Use the sweep indices actually present, not range(len(...)): - # upstream-dropped interior sweeps leave sparse keys (e.g. - # [0..9, 11]) that would otherwise KeyError downstream. See #361. - present_keys = sorted(nex.data) - act_sweeps = len(present_keys) - if nex.msg_5: - exp_sweeps = nex.msg_5["number_elevation_cuts"] - elev_data = nex.msg_5.get("elevation_data", []) - else: - exp_sweeps = 0 - elev_data = [] - - if isinstance(sweep, str): - sweep = NodePath(sweep).name - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - elif isinstance(sweep[0], str): - sweeps = [NodePath(i).name for i in sweep] - else: - raise ValueError( - "Invalid type in 'sweep' list. Expected integers (e.g., [0, 1, 2]) or strings (e.g. [/sweep_0, sweep_1])." - ) - else: - # Check for AVSET mode: actual sweeps may be fewer than VCP definition - if exp_sweeps > act_sweeps: - exp_sweeps = act_sweeps - - if incomplete_sweep == "drop": - sweeps = [f"sweep_{i}" for i in present_keys if i not in incomplete] - if incomplete: - warnings.warn( - f"Dropped {len(incomplete)} incomplete sweep(s): " - f"{sorted(incomplete)}. Use incomplete_sweep='pad' to " - f"include them with NaN-filled rays.", - UserWarning, - stacklevel=2, - ) - if not sweeps: - warnings.warn( - "All sweeps are incomplete. Returning empty DataTree.", - UserWarning, - stacklevel=2, - ) - return DataTree() - elif incomplete_sweep == "pad": - sweeps = [f"sweep_{i}" for i in present_keys] - else: - raise ValueError( - f"Invalid incomplete_sweep={incomplete_sweep!r}. " - "Expected 'drop' or 'pad'." - ) + # Legacy callers may pass `site_coords` via kwargs; the explicit + # `site_as_coords` parameter is the canonical wrapper signature. + # Honor the legacy name if given so existing callers keep working. + site_as_coords = kwargs.pop("site_coords", site_as_coords) - sweep_dict = open_sweeps_as_dict( - filename_or_obj=filename_or_obj, + return NexradLevel2BackendEntrypoint().open_datatree( + filename_or_obj, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, @@ -2159,38 +2274,17 @@ def open_nexradlevel2_datatree( drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, - sweeps=sweeps, + sweep=sweep, first_dim=first_dim, reindex_angle=reindex_angle, fix_second_angle=fix_second_angle, - site_as_coords=False, + site_coords=site_as_coords, optional=optional, - incomplete_sweeps=incomplete, + optional_groups=optional_groups, + incomplete_sweep=incomplete_sweep, lock=lock, **kwargs, ) - ls_ds: list[xr.Dataset] = [xr.Dataset()] + list(sweep_dict.values()) - root, ls_ds = _assign_root(ls_ds) - dtree: dict = {"/": root} - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - # Build from ls_ds (station vars already stripped by _assign_root). - dtree |= {key: ds.drop_attrs(deep=False) for key, ds in zip(sweep_dict, ls_ds[1:])} - result = DataTree.from_dict(dtree) - - # Inject per-sweep attrs from MSG_5_ELEV (ICD Table XI) - _assign_sweep_attrs(result, elev_data) - - # Actual sweeps recorded in the file (from MSG_31 headers, not user selection) - result.ds.attrs["actual_elevation_cuts"] = act_sweeps - - return result def open_sweeps_as_dict( diff --git a/xradar/io/backends/odim.py b/xradar/io/backends/odim.py index f0825ec2..0d696a09 100644 --- a/xradar/io/backends/odim.py +++ b/xradar/io/backends/odim.py @@ -38,7 +38,6 @@ import h5netcdf import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import ( AbstractDataStore, @@ -55,7 +54,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -65,20 +63,21 @@ get_range_attrs, get_time_attrs, moment_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( + HDF5_PARAMS_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, - _attach_sweep_groups, + _build_groups_dict, + _compose_docstring, + _deprecation_warning, _fix_angle, _get_h5group_names, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, _maybe_decode, _prepare_backend_ds, + _resolve_sweeps, ) HDF5_LOCK = SerializableLock() @@ -792,6 +791,7 @@ class OdimBackendEntrypoint(BackendEntrypoint): description = "Open ODIM_H5 (.h5, .hdf5) using h5netcdf in Xarray" url = "https://xradar.rtfd.io/en/latest/io.html#odim-h5" + supports_groups = True def open_dataset( self, @@ -878,10 +878,88 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + invalid_netcdf=None, + phony_dims="access", + decode_vlen_strings=True, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps( + sweep, lambda: _get_h5group_names(filename_or_obj, "odim") + ) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + format=format, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree( + self, + filename_or_obj, + **kwargs, + ): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +OdimBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open an ODIM_H5 file as a CfRadial2-shaped dict of group datasets.", + HDF5_PARAMS_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +OdimBackendEntrypoint.open_datatree.__doc__ = ( + "Open an ODIM_H5 file as :py:class:`xarray.DataTree`.\n\n" + "Equivalent to " + "``DataTree.from_dict(self.open_groups_as_dict(filename_or_obj, **kwargs))``." + " See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + def open_odim_datatree(filename_or_obj, **kwargs): """Open ODIM_H5 dataset as :py:class:`xarray.DataTree`. + .. deprecated:: + Use ``xd.open_datatree(file, engine="odim")`` or + ``xr.open_datatree(file, engine="odim")`` instead. + Parameters ---------- filename_or_obj : str, Path, file-like or DataStore @@ -912,42 +990,22 @@ def open_odim_datatree(filename_or_obj, **kwargs): dtree: xarray.DataTree DataTree """ - # handle kwargs, extract first_dim + _deprecation_warning("open_odim_datatree", "odim") + + # Bridge old backend_kwargs to direct kwargs backend_kwargs = kwargs.pop("backend_kwargs", {}) optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i+1}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_h5group_names(filename_or_obj, "odim") - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="odim", **kw) - for swp in sweeps - ] - # todo: apply CfRadial2 group structure below - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + # Translate the legacy `site_as_coords` spelling to the canonical + # `site_coords` kwarg the entrypoint accepts. + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") + + return OdimBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/rainbow.py b/xradar/io/backends/rainbow.py index 69f9ac29..5e6196e7 100644 --- a/xradar/io/backends/rainbow.py +++ b/xradar/io/backends/rainbow.py @@ -37,7 +37,6 @@ import zlib import numpy as np -import xarray as xr import xmltodict from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint @@ -49,7 +48,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -58,16 +56,16 @@ get_range_attrs, get_time_attrs, moment_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _compose_docstring, + _deprecation_warning, + _resolve_sweeps, ) #: mapping of rainbow moment names to CfRadial2/ODIM names @@ -799,6 +797,7 @@ class RainbowBackendEntrypoint(BackendEntrypoint): description = "Open Rainbow5 files in Xarray" url = "https://xradar.rtfd.io/latest/io.html#rainbow-data-i-o" + supports_groups = True def open_dataset( self, @@ -867,6 +866,63 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + first_dim="auto", + reindex_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps( + sweep, lambda: _get_rainbow_group_names(filename_or_obj) + ) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + first_dim=first_dim, + reindex_angle=reindex_angle, + site_as_coords=site_coords, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +RainbowBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Rainbow5 file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +RainbowBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Rainbow5 file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + def _get_rainbow_group_names(filename): with RainbowFile(filename, loaddata=False) as fh: @@ -875,74 +931,25 @@ def _get_rainbow_group_names(filename): def open_rainbow_datatree(filename_or_obj, **kwargs): - """Open ODIM_H5 dataset as :py:class:`xarray.DataTree`. + """Open Rainbow5 dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="rainbow")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_rainbow_datatree", "rainbow") + backend_kwargs = kwargs.pop("backend_kwargs", {}) optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i + 1}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_rainbow_group_names(filename_or_obj) - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="rainbow", **kw) - for swp in sweeps - ] - - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") + + return RainbowBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/uf.py b/xradar/io/backends/uf.py index c8703ce9..4311b456 100644 --- a/xradar/io/backends/uf.py +++ b/xradar/io/backends/uf.py @@ -35,6 +35,7 @@ import dateutil import numpy as np import xarray as xr +from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import CachingFileManager from xarray.backends.locks import SerializableLock, ensure_lock @@ -45,10 +46,17 @@ from xradar import util from xradar.io.backends.common import ( + _STATION_VARS, + LOCK_PARAM_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _assign_root, + _compose_docstring, + _deprecation_warning, _get_radar_calibration, _get_subgroup, + _resolve_sweeps, ) from xradar.model import ( georeferencing_correction_subgroup, @@ -552,7 +560,8 @@ class UFStore(AbstractDataStore): def __init__(self, manager, group=None, lock=UF_LOCK): self._manager = manager - self._group = int(group[6:]) + 1 + # Accept both ``"sweep_N"`` and ``"/sweep_N"`` (NodePath form). + self._group = int(group.rsplit("sweep_", 1)[-1]) + 1 self._filename = self.filename self._need_time_recalc = False self.lock = ensure_lock(lock) @@ -741,6 +750,7 @@ class UFBackendEntrypoint(BackendEntrypoint): description = "Open Universal Format (UF) files in Xarray" url = "https://xradar.rtfd.io/latest/io.html#uf-data-i-o" + supports_groups = True def open_dataset( self, @@ -809,153 +819,125 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + sweep=None, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + optional=True, + optional_groups=False, + lock=None, + **kwargs, + ): + from xarray.core.treenode import NodePath + + # Normalise NodePath strings ("/sweep_0" -> "sweep_0") and validate + # list element types before resolving. + if isinstance(sweep, str): + sweep = NodePath(sweep).name + elif isinstance(sweep, list) and sweep: + if isinstance(sweep[0], str): + sweep = [NodePath(i).name for i in sweep] + elif not isinstance(sweep[0], int): + raise ValueError( + "Invalid type in 'sweep' list. Expected integers " + "(e.g., [0, 1, 2]) or strings (e.g. [/sweep_0, sweep_1])." + ) + + sweeps = _resolve_sweeps( + sweep, + lambda: [ + f"sweep_{i}" + for i in range(UFFile(filename_or_obj, loaddata=False).nsweeps) + ], + ) -def open_uf_datatree( - filename_or_obj, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - sweep=None, - first_dim="auto", - reindex_angle=False, - fix_second_angle=False, - site_as_coords=True, - optional=True, - optional_groups=False, - lock=None, - **kwargs, -): - """Open a Universal Format (UF) dataset as :py:class:`xarray.DataTree`. - - This function loads UF radar data into a DataTree structure, which - organizes radar sweeps as separate nodes. Provides options for decoding time - and applying various transformations to the data. - - Parameters - ---------- - filename_or_obj : str, Path, file-like, or DataStore - The path or file-like object representing the radar file. - Path-like objects are interpreted as local or remote paths. - - mask_and_scale : bool, optional - If True, replaces values in the dataset that match `_FillValue` with NaN - and applies scale and offset adjustments. Default is True. - - decode_times : bool, optional - If True, decodes time variables according to CF conventions. Default is True. - - concat_characters : bool, optional - If True, concatenates character arrays along the last dimension, forming - string arrays. Default is True. - - decode_coords : bool, optional - If True, decodes the "coordinates" attribute to identify coordinates in the - resulting dataset. Default is True. - - drop_variables : str or list of str, optional - Specifies variables to exclude from the dataset. Useful for removing problematic - or inconsistent variables. Default is None. - - use_cftime : bool, optional - If True, uses cftime objects to represent time variables; if False, uses - `np.datetime64` objects. If None, chooses the best format automatically. - Default is None. + sweep_dict = open_sweeps_as_dict( + filename_or_obj=filename_or_obj, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + sweeps=sweeps, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + optional=optional, + lock=lock, + **kwargs, + ) - decode_timedelta : bool, optional - If True, decodes variables with units of time (e.g., seconds, minutes) into - timedelta objects. If False, leaves them as numeric values. Default is None. + ls_ds = [xr.Dataset()] + list(sweep_dict.values()) + root, ls_ds = _assign_root(ls_ds) + groups_dict = {"/": root} + if optional_groups: + groups_dict["/radar_parameters"] = _get_subgroup( + ls_ds, radar_parameters_subgroup + ) + groups_dict["/georeferencing_correction"] = _get_subgroup( + ls_ds, georeferencing_correction_subgroup + ) + groups_dict["/radar_calibration"] = _get_radar_calibration( + ls_ds, radar_calibration_subgroup + ) + for sweep_path, ds in sweep_dict.items(): + sw = ds.drop_vars(_STATION_VARS, errors="ignore").drop_attrs(deep=False) + groups_dict[f"/{sweep_path}"] = sw + return groups_dict - sweep : int or list of int, optional - Sweep numbers to extract from the dataset. If None, extracts all sweeps into - a list. Default is the first sweep. + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) - first_dim : {"time", "auto"}, optional - Defines the first dimension for each sweep. If "time," uses time as the - first dimension. If "auto," determines the first dimension based on the sweep - type (azimuth or elevation). Default is "auto." - reindex_angle : bool or dict, optional - Controls angle reindexing. If True or a dictionary, applies reindexing with - specified settings (if given). Only used if `decode_coords=True`. Default is False. +UFBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Universal Format (UF) file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + LOCK_PARAM_DOC, +) +UFBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Universal Format (UF) file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) - fix_second_angle : bool, optional - If True, corrects errors in the second angle data, such as misaligned - elevation or azimuth values. Default is False. - site_as_coords : bool, optional - Attaches radar site coordinates to the dataset if True. Default is True. +def open_uf_datatree(filename_or_obj, **kwargs): + """Open a Universal Format (UF) dataset as :py:class:`xarray.DataTree`. - optional : bool, optional - If True, suppresses errors for optional dataset attributes, making them - optional instead of required. Default is True. + .. deprecated:: + Use ``xd.open_datatree(file, engine="uf")`` instead. + """ + _deprecation_warning("open_uf_datatree", "uf") - kwargs : dict - Additional keyword arguments passed to `xarray.open_dataset`. + optional = kwargs.pop("optional", True) + optional_groups = kwargs.pop("optional_groups", False) + sweep = kwargs.pop("sweep", None) + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - Returns - ------- - dtree : xarray.DataTree - An `xarray.DataTree` representing the radar data organized by sweeps. - """ - from xarray.core.treenode import NodePath - - if isinstance(sweep, str): - sweep = NodePath(sweep).name - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - elif isinstance(sweep[0], str): - sweeps = [NodePath(i).name for i in sweep] - else: - raise ValueError( - "Invalid type in 'sweep' list. Expected integers (e.g., [0, 1, 2]) or strings (e.g. [/sweep_0, sweep_1])." - ) - else: - with UFFile(filename_or_obj, loaddata=False) as ufh: - # Actual number of sweeps recorded in the file - act_sweeps = ufh.nsweeps - - sweeps = [f"sweep_{i}" for i in range(act_sweeps)] - - sweep_dict = open_sweeps_as_dict( - filename_or_obj=filename_or_obj, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - sweeps=sweeps, - first_dim=first_dim, - reindex_angle=reindex_angle, - fix_second_angle=fix_second_angle, - site_as_coords=False, + return UFBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, optional=optional, - lock=lock, + optional_groups=optional_groups, **kwargs, ) - ls_ds: list[xr.Dataset] = [xr.Dataset()] + list(sweep_dict.values()) - root, ls_ds = _assign_root(ls_ds) - dtree: dict = {"/": root} - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - # Build from ls_ds (station vars already stripped by _assign_root). - dtree |= {key: ds.drop_attrs(deep=False) for key, ds in zip(sweep_dict, ls_ds[1:])} - return xr.DataTree.from_dict(dtree) def open_sweeps_as_dict( diff --git a/xradar/util.py b/xradar/util.py index 93e508f5..bd30dbf9 100644 --- a/xradar/util.py +++ b/xradar/util.py @@ -380,7 +380,7 @@ def _ipol_time(da, dim0, a1gate=0, direction=1): sidx = da_sel[dim0].argsort() # special handling for wrap-around angles - angles = da_sel[dim0] + angles = da_sel[dim0].values.copy() # a1gate should normally only be set for PPI, if a1gate > 0: # create a boolean mask for the last a1gate indices