diff --git a/docs/history.md b/docs/history.md index bbd6cfbf..b3de19f5 100644 --- a/docs/history.md +++ b/docs/history.md @@ -2,6 +2,7 @@ ## Development +* MNT: Refactor the CfRadial1 writer for code quality ahead of v1.0 — descriptive helper names (``_main_info_mapper`` → ``_extract_root_dataset``, ``_variable_mapper`` → ``_combine_sweeps``, ``_calib_mapper`` → ``_map_radar_calibration``, ``_sweep_info_mapper`` → ``_collect_sweep_metadata``), clearer locals, fix the ``calibs`` bool/variable name collision, robust ``history`` attribute and missing-``elevation`` handling, and a shared ``_build_cfradial1_dataset`` reused by ``xradar.transform.to_cfradial1`` to remove duplication ({issue}`379`) by [@syedhamidali](https://github.com/syedhamidali) * FIX: ``open_nexradlevel2_datatree`` decodes volumes with interior sweep-index gaps end-to-end — translate sweep label → compact position in ``NexradLevel2Store.open_store_coordinates`` so the per-sweep entrypoint stops positionally indexing the compacted ``msg_31_header`` (follow-up to {pull}`362`) ({issue}`366`, {pull}`374`) by [@aladinor](https://github.com/aladinor) * FIX: ensure `to_cfradial2` correctly selects the default storage engine when none is provided, ({pull}`378`) by [@chfer](https://github.com/chfer) * MNT: Add ``cfradial1_sgp_file`` session fixture and refactor 8 tests in ``test_util.py``/``test_accessors.py`` to share it instead of inlining ``DATASETS.fetch("sample_sgp_data.nc")``. Fixture returns the filename so each test opens its own DataTree, avoiding cross-test mutation ({issue}`346`, {pull}`347`) by [@aladinor](https://github.com/aladinor) diff --git a/tests/io/test_cfradial1.py b/tests/io/test_cfradial1.py index 28ec2b09..6448c88c 100644 --- a/tests/io/test_cfradial1.py +++ b/tests/io/test_cfradial1.py @@ -4,6 +4,7 @@ import numpy as np +import pytest import xarray as xr from open_radar_data import DATASETS @@ -120,7 +121,7 @@ def test_cfradial1_export_helper_metadata_and_indices(): def test_cfradial1_export_helper_empty_sweep_info_and_time_fallback(): empty = xr.DataTree.from_dict({"/": xr.Dataset()}) - sweep_info = cf1_export._sweep_info_mapper(empty) + sweep_info = cf1_export._collect_sweep_metadata(empty) assert "sweep_number" in sweep_info assert np.isnan(sweep_info["sweep_number"].values[0]) @@ -142,6 +143,43 @@ def test_cfradial1_export_helper_empty_sweep_info_and_time_fallback(): }, ) dtree = xr.DataTree.from_dict({"/": xr.Dataset(), "/sweep_0": sweep}) - mapped = cf1_export._variable_mapper(dtree) + mapped = cf1_export._combine_sweeps(dtree) assert "DBZ" in mapped assert mapped["DBZ"].dims == ("time", "range") + + +def test_cfradial1_export_auto_filename(tmp_path, monkeypatch): + filename = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") + dtree = xd.io.open_cfradial1_datatree(filename) + + # filename=None derives the name from instrument_name + first timestamp + monkeypatch.chdir(tmp_path) + xd.io.to_cfradial1(dtree.copy(), filename=None, calibs=True) + + written = list(tmp_path.glob("cfrad1_*.nc")) + assert len(written) == 1 + assert written[0].name.startswith("cfrad1_") + + +def test_cfradial1_export_requires_dtree(): + with pytest.raises(ValueError, match="must be a radar"): + xd.io.to_cfradial1(None) + + +def test_cfradial1_export_sweep_indices_missing_elevation(): + dtree = xr.DataTree.from_dict( + { + "/": xr.Dataset(), + "/sweep_0": xr.Dataset( + coords={"elevation": ("azimuth", np.array([0.5, 0.5], dtype="float32"))} + ), + "/sweep_1": xr.Dataset(), # no elevation coordinate -> skipped with warning + } + ) + + with pytest.warns(UserWarning, match="no 'elevation'"): + out = cf1_export.calculate_sweep_indices(dtree) + + # only the valid sweep contributes a ray-index entry + assert out["sweep_start_ray_index"].size == 1 + assert out["sweep_end_ray_index"].size == 1 diff --git a/xradar/io/export/cfradial1.py b/xradar/io/export/cfradial1.py index daf5bafc..5116450a 100644 --- a/xradar/io/export/cfradial1.py +++ b/xradar/io/export/cfradial1.py @@ -27,11 +27,36 @@ __doc__ = __doc__.format("\n ".join(__all__)) +import warnings from importlib.metadata import version import numpy as np import xarray as xr +#: Per-sweep metadata variables that CfRadial1 stores as scalars (one value +#: per sweep) rather than along a ray dimension. +SWEEP_METADATA_VARS = ( + "sweep_number", + "sweep_mode", + "polarization_mode", + "prt_mode", + "follow_mode", + "sweep_fixed_angle", + "sweep_start_ray_index", + "sweep_end_ray_index", +) + +#: Subset of :data:`SWEEP_METADATA_VARS` gathered into the ``sweep``-indexed +#: info dataset; the ray-index variables are computed separately. +SWEEP_INFO_VARS = ( + "sweep_number", + "sweep_mode", + "polarization_mode", + "prt_mode", + "follow_mode", + "sweep_fixed_angle", +) + def _first_valid_scalar(data_array): """Collapse a metadata variable to its first non-missing scalar value.""" @@ -47,64 +72,58 @@ def _first_valid_scalar(data_array): return np.nan -def _normalize_sweep_metadata(data): +def _normalize_sweep_metadata(sweep_ds): """Restore sweep metadata variables to scalar form before CfRadial1 export.""" - metadata_vars = [ - "sweep_number", - "sweep_mode", - "polarization_mode", - "prt_mode", - "follow_mode", - "sweep_fixed_angle", - "sweep_start_ray_index", - "sweep_end_ray_index", - ] - - data = data.copy() - for name in metadata_vars: - if name not in data: + sweep_ds = sweep_ds.copy() + for name in SWEEP_METADATA_VARS: + if name not in sweep_ds: continue - if data[name].ndim == 0: + if sweep_ds[name].ndim == 0: continue - data[name] = xr.DataArray( - _first_valid_scalar(data[name]), - attrs=data[name].attrs, + sweep_ds[name] = xr.DataArray( + _first_valid_scalar(sweep_ds[name]), + attrs=sweep_ds[name].attrs, ) - return data + return sweep_ds + +def _sweep_group_names(dtree): + """Return the names of the sweep groups in a radar ``DataTree``.""" + return [name for name in dtree.groups if "sweep" in name] -def _calib_mapper(calib_params): + +def _map_radar_calibration(calib_ds): """ - Map calibration parameters to a new dataset format. + Map calibration parameters to the CfRadial1 ``r_calib_*`` layout. Parameters ---------- - calib_params: xarray.Dataset + calib_ds: xarray.Dataset Calibration parameters dataset. Returns ------- xarray.Dataset - New dataset with mapped calibration parameters. + New dataset with each variable renamed to ``r_calib_`` and a + leading ``r_calib`` dimension. """ - new_data_vars = {} - for var in calib_params.data_vars: - data_array = calib_params[var] - new_data_vars["r_calib_" + var] = xr.DataArray( + renamed_vars = {} + for name in calib_ds.data_vars: + data_array = calib_ds[name] + renamed_vars["r_calib_" + name] = xr.DataArray( data=data_array.data[np.newaxis, ...], dims=["r_calib"] + list(data_array.dims), coords={"r_calib": [0]}, attrs=data_array.attrs, ) - radar_calib_renamed = xr.Dataset(new_data_vars) - radar_calib_renamed = radar_calib_renamed.drop_vars("r_calib", errors="ignore") - return radar_calib_renamed + radar_calib = xr.Dataset(renamed_vars) + return radar_calib.drop_vars("r_calib", errors="ignore") -def _main_info_mapper(dtree): +def _extract_root_dataset(dtree): """ - Map main radar information from a radar xarray dataset. + Extract the root (volume-level) dataset from a radar ``DataTree``. Parameters ---------- @@ -114,77 +133,77 @@ def _main_info_mapper(dtree): Returns ------- xarray.Dataset - Dataset containing the mapped radar information. + The root dataset with ``sweep_group_name`` dropped and + ``sweep_fixed_angle`` renamed to ``fixed_angle`` if present. """ - dataset = dtree.root.to_dataset() - dataset = dataset.drop_vars("sweep_group_name", errors="ignore") + root_ds = dtree.root.to_dataset() + root_ds = root_ds.drop_vars("sweep_group_name", errors="ignore") - # Rename "sweep_fixed_angle" to "fixed_angle" if it exists - if "sweep_fixed_angle" in dataset: - dataset = dataset.rename({"sweep_fixed_angle": "fixed_angle"}) + if "sweep_fixed_angle" in root_ds: + root_ds = root_ds.rename({"sweep_fixed_angle": "fixed_angle"}) - return dataset + return root_ds -def _variable_mapper(dtree, dim0=None): +def _combine_sweeps(dtree, dim0=None): """ - Map radar variables for different sweep groups. + Combine all sweep groups into a single ray-indexed CfRadial1 dataset. Parameters ---------- dtree: xarray.DataTree Radar xarray.DataTree. - dim0: str - Either `azimuth` or `elevation` + dim0: str, optional + Either ``azimuth`` or ``elevation``. Inferred per sweep from the + sweep mode when not provided. Returns ------- xarray.Dataset - Dataset containing mapped radar variables. + Dataset with all sweeps concatenated along ``time``, merged with the + per-sweep and volume-level metadata. """ + sweep_info = _collect_sweep_metadata(dtree) + root_ds = _extract_root_dataset(dtree) + if "fixed_angle" in root_ds: + root_ds = root_ds.drop_vars("fixed_angle") - sweep_info = _sweep_info_mapper(dtree) - vol_info = _main_info_mapper(dtree) - if "fixed_angle" in vol_info: - vol_info = vol_info.drop_vars("fixed_angle") sweep_datasets = [] - for grp in dtree.groups: - if "sweep" in grp: - data = _normalize_sweep_metadata( - dtree[grp].to_dataset(inherit="all_coords") - ) + for group_name in _sweep_group_names(dtree): + sweep_ds = _normalize_sweep_metadata( + dtree[group_name].to_dataset(inherit="all_coords") + ) - # handling first dimension - if dim0 is None: - dim0 = ( - "elevation" - if str(data.sweep_mode.load().values) == "rhi" - else "azimuth" - ) - if dim0 not in data.dims: - dim0 = "time" - assert dim0 in data.dims + # handling first dimension + if dim0 is None: + dim0 = ( + "elevation" + if str(sweep_ds.sweep_mode.load().values) == "rhi" + else "azimuth" + ) + if dim0 not in sweep_ds.dims: + dim0 = "time" + assert dim0 in sweep_ds.dims - # swap dims, if needed - if dim0 != "time" and dim0 in data.dims: - data = data.swap_dims({dim0: "time"}) + # swap dims, if needed + if dim0 != "time" and dim0 in sweep_ds.dims: + sweep_ds = sweep_ds.swap_dims({dim0: "time"}) - # sort in any case - data = data.sortby("time") + # sort in any case + sweep_ds = sweep_ds.sortby("time") - data = data.drop_vars(["x", "y", "z"], errors="ignore") + sweep_ds = sweep_ds.drop_vars(["x", "y", "z"], errors="ignore") - # Strip per-sweep attrs that may vary across sweeps (e.g. - # NEXRAD ICD waveform_type, super_resolution) to avoid - # merge conflicts in combine_by_coords below. - data.attrs = {} + # Strip per-sweep attrs that may vary across sweeps (e.g. + # NEXRAD ICD waveform_type, super_resolution) to avoid + # merge conflicts in combine_by_coords below. + sweep_ds.attrs = {} - # Convert to a dataset and append to the list - sweep_datasets.append(data) + sweep_datasets.append(sweep_ds) # need to use combine_by_coords to correctly test for # incompatible attrs on DataArray's - result_dataset = xr.combine_by_coords( + combined = xr.combine_by_coords( sweep_datasets, data_vars="all", compat="no_conflicts", @@ -193,29 +212,28 @@ def _variable_mapper(dtree, dim0=None): combine_attrs="no_conflicts", ) - drop_variables = [ + per_sweep_vars = [ "sweep_fixed_angle", "sweep_number", "sweep_mode", "prt_mode", "follow_mode", ] - result_dataset = result_dataset.drop_vars(drop_variables, errors="ignore") + combined = combined.drop_vars(per_sweep_vars, errors="ignore") - drop_coords = ["latitude", "longitude", "altitude", "spatial_ref", "crs_wkt"] - result_dataset = result_dataset.drop_vars(drop_coords, errors="ignore") + georef_coords = ["latitude", "longitude", "altitude", "spatial_ref", "crs_wkt"] + combined = combined.drop_vars(georef_coords, errors="ignore") - result_dataset.update(sweep_info) - sweep_indices = calculate_sweep_indices(dtree, result_dataset) - result_dataset.update(sweep_indices) - result_dataset = result_dataset.reset_coords(["elevation", "azimuth"]) - result_dataset.update(vol_info) - return result_dataset + combined.update(sweep_info) + combined.update(calculate_sweep_indices(dtree, combined)) + combined = combined.reset_coords(["elevation", "azimuth"]) + combined.update(root_ds) + return combined -def _sweep_info_mapper(dtree): +def _collect_sweep_metadata(dtree): """ - Extract specified sweep information variables from a radar xarray.DataTree + Collect per-sweep metadata variables into a ``sweep``-indexed dataset. Parameters ---------- @@ -225,53 +243,32 @@ def _sweep_info_mapper(dtree): Returns ------- xarray.Dataset - Dataset containing the specified sweep information variables. + Dataset with one value per sweep for each metadata variable. """ - dataset = xr.Dataset() + sweep_info = xr.Dataset() + sweep_groups = _sweep_group_names(dtree) - sweep_vars = [ - "sweep_number", - "sweep_mode", - "polarization_mode", - "prt_mode", - "follow_mode", - "sweep_fixed_angle", - "sweep_start_ray_index", - "sweep_end_ray_index", - ] - - for var_name in sweep_vars: - var_data_list = [ + for var_name in SWEEP_INFO_VARS: + values = [ ( - np.asarray([_first_valid_scalar(dtree[s][var_name])]) - if var_name in dtree[s] + np.asarray([_first_valid_scalar(dtree[group][var_name])]) + if var_name in dtree[group] else np.array([np.nan]) ) - for s in dtree.groups - if "sweep" in s + for group in sweep_groups ] - var_attrs_list = [ - dtree[s][var_name].attrs if var_name in dtree[s] else {} - for s in dtree.groups - if "sweep" in s - ] - - if not var_data_list: - var_data = np.array([np.nan]) - else: - var_data = np.concatenate(var_data_list) - - var_attrs = {} - for attrs in var_attrs_list: - var_attrs.update(attrs) - - var_data_array = xr.DataArray(var_data, dims=("sweep",), attrs=var_attrs) - dataset[var_name] = var_data_array + merged_attrs = {} + for group in sweep_groups: + if var_name in dtree[group]: + merged_attrs.update(dtree[group][var_name].attrs) - dataset = dataset.rename({"sweep_fixed_angle": "fixed_angle"}) + var_data = np.concatenate(values) if values else np.array([np.nan]) + sweep_info[var_name] = xr.DataArray( + var_data, dims=("sweep",), attrs=merged_attrs + ) - return dataset + return sweep_info.rename({"sweep_fixed_angle": "fixed_angle"}) def calculate_sweep_indices(dtree, dataset=None): @@ -296,21 +293,21 @@ def calculate_sweep_indices(dtree, dataset=None): sweep_start_ray_index = [] sweep_end_ray_index = [] - cumulative_size = 0 - try: - for group_name in dtree.groups: - if "sweep" in group_name: - ele_size = dtree[group_name].elevation.size - sweep_start_ray_index.append(cumulative_size) - sweep_end_ray_index.append(cumulative_size + ele_size - 1) - cumulative_size += ele_size - - except KeyError as e: - print( - f"Error: The sweep group '{e.args[0]}' was not found in radar datatree. Skipping..." - ) + for group_name in _sweep_group_names(dtree): + try: + ele_size = dtree[group_name].elevation.size + except AttributeError: + warnings.warn( + f"Sweep group '{group_name}' has no 'elevation' coordinate; " + "skipping its ray-index calculation.", + stacklevel=2, + ) + continue + sweep_start_ray_index.append(cumulative_size) + sweep_end_ray_index.append(cumulative_size + ele_size - 1) + cumulative_size += ele_size dataset["sweep_start_ray_index"] = xr.DataArray( sweep_start_ray_index, @@ -327,53 +324,83 @@ def calculate_sweep_indices(dtree, dataset=None): return dataset -def to_cfradial1(dtree=None, filename=None, calibs=True): +def _build_cfradial1_dataset(dtree, calibs=True): """ - Convert a radar xarray.DataTree to the CFRadial1 format - and save it to a file. Ensure that the resulting dataset - is well-formed and does not include specified extraneous variables. + Build a single CfRadial1 ``Dataset`` from a radar ``DataTree``. + + This assembles the sweep variables, calibration, radar parameters and + georeferencing correction, and sets the CfRadial1 global attributes. + It is shared by :func:`to_cfradial1` (which writes to a file) and by + :func:`xradar.transform.to_cfradial1` (which returns the dataset). Parameters ---------- dtree: xarray.DataTree - Radar xarary.DataTree object. - filename: str, optional - The name of the output netCDF file. - calibs: Bool, optional + Radar xarray.DataTree object. + calibs: bool, optional Whether to include calibration parameters. + + Returns + ------- + xarray.Dataset + The assembled CfRadial1 dataset. """ - # Generate the initial ds_cf using the existing mapping functions - dataset = _variable_mapper(dtree) + if dtree is None: + raise ValueError("`dtree` must be a radar xarray.DataTree, not None.") + + cfradial1_ds = _combine_sweeps(dtree) # Handle calibration parameters - if calibs: - if "radar_calibration" in dtree: - calib_params = dtree["radar_calibration"].to_dataset() - calibs = _calib_mapper(calib_params) - dataset.update(calibs) + if calibs and "radar_calibration" in dtree: + calib_ds = _map_radar_calibration(dtree["radar_calibration"].to_dataset()) + cfradial1_ds.update(calib_ds) # Add additional parameters if they exist in dtree if "radar_parameters" in dtree: radar_params = dtree["radar_parameters"].to_dataset().reset_coords() - dataset.update(radar_params) + cfradial1_ds.update(radar_params) if "georeferencing_correction" in dtree: radar_georef = dtree["georeferencing_correction"].to_dataset().reset_coords() - dataset.update(radar_georef) + cfradial1_ds.update(radar_georef) # Ensure that the data type of sweep_mode and similar variables matches - if "sweep_mode" in dataset.variables: - dataset["sweep_mode"] = dataset["sweep_mode"].astype("S") + if "sweep_mode" in cfradial1_ds.variables: + cfradial1_ds["sweep_mode"] = cfradial1_ds["sweep_mode"].astype("S") # Update global attributes - dataset.attrs = dtree.attrs - dataset.attrs["Conventions"] = "Cf/Radial" - dataset.attrs["version"] = "1.2" + cfradial1_ds.attrs = dict(dtree.attrs) + cfradial1_ds.attrs["Conventions"] = "Cf/Radial" + cfradial1_ds.attrs["version"] = "1.2" xradar_version = version("xradar") - dataset.attrs["history"] += f": xradar v{xradar_version} CfRadial1 export" + history = cfradial1_ds.attrs.get("history", "") + cfradial1_ds.attrs["history"] = ( + f"{history}: xradar v{xradar_version} CfRadial1 export" + ) + + return cfradial1_ds + + +def to_cfradial1(dtree=None, filename=None, calibs=True): + """ + Convert a radar xarray.DataTree to the CfRadial1 format + and save it to a file. Ensure that the resulting dataset + is well-formed and does not include specified extraneous variables. + + Parameters + ---------- + dtree: xarray.DataTree + Radar xarray.DataTree object. + filename: str, optional + The name of the output netCDF file. When omitted, a name is derived + from the instrument name and first timestamp. + calibs: bool, optional + Whether to include calibration parameters. + """ + cfradial1_ds = _build_cfradial1_dataset(dtree, calibs=calibs) if filename is None: - time = str(dataset.time[0].dt.strftime("%Y%m%d_%H%M%S").values) - filename = f"cfrad1_{dataset.instrument_name}_{time}.nc" + time = str(cfradial1_ds.time[0].dt.strftime("%Y%m%d_%H%M%S").values) + filename = f"cfrad1_{cfradial1_ds.instrument_name}_{time}.nc" - dataset.to_netcdf(filename, format="netcdf4") + cfradial1_ds.to_netcdf(filename, format="netcdf4") diff --git a/xradar/transform/cfradial.py b/xradar/transform/cfradial.py index ff8978d6..c436caa0 100644 --- a/xradar/transform/cfradial.py +++ b/xradar/transform/cfradial.py @@ -57,8 +57,6 @@ __doc__ = __doc__.format("\n ".join(__all__)) -from importlib.metadata import version - from xarray import DataTree from ..io.backends.cfradial1 import ( @@ -69,10 +67,7 @@ open_cfradial1_datatree, ) from ..io.backends.common import _attach_sweep_groups -from ..io.export.cfradial1 import ( - _calib_mapper, - _variable_mapper, -) +from ..io.export.cfradial1 import _build_cfradial1_dataset from ..model import ( georeferencing_correction_subgroup, radar_parameters_subgroup, @@ -82,48 +77,21 @@ # to_cfradial1 function implementation def to_cfradial1(dtree=None, calibs=True): """ - Convert a radar xarray.DataTree to the CFRadial1 format - and save it to a file. Ensure that the resulting dataset - is well-formed and does not include specified extraneous variables. + Convert a radar xarray.DataTree to a CfRadial1 ``xarray.Dataset``. Parameters ---------- dtree: xarray.DataTree Radar xarray.DataTree object. - calibs: Bool, optional + calibs: bool, optional Whether to include calibration parameters. + + Returns + ------- + xarray.Dataset + The assembled CfRadial1 dataset. """ - # Generate the initial ds_cf using the existing mapping functions - dataset = _variable_mapper(dtree) - - # Handle calibration parameters - if calibs: - if "radar_calibration" in dtree: - calib_params = dtree["radar_calibration"].to_dataset() - calibs = _calib_mapper(calib_params) - dataset.update(calibs) - - # Add additional parameters if they exist in dtree - if "radar_parameters" in dtree: - radar_params = dtree["radar_parameters"].to_dataset().reset_coords() - dataset.update(radar_params) - - if "georeferencing_correction" in dtree: - radar_georef = dtree["georeferencing_correction"].to_dataset().reset_coords() - dataset.update(radar_georef) - - # Ensure that the data type of sweep_mode and similar variables matches - if "sweep_mode" in dataset.variables: - dataset["sweep_mode"] = dataset["sweep_mode"].astype("S") - - # Update global attributes - dataset.attrs = dtree.attrs - dataset.attrs["Conventions"] = "Cf/Radial" - dataset.attrs["version"] = "1.2" - xradar_version = version("xradar") - dataset.attrs["history"] += f": xradar v{xradar_version} CfRadial1 export" - - return dataset + return _build_cfradial1_dataset(dtree, calibs=calibs) # to_cfradial2 function implementation