From db09c305c8cefbb3a8d6acdc21ecde13bca2533d Mon Sep 17 00:00:00 2001 From: aladinor Date: Fri, 20 Feb 2026 14:47:09 -0600 Subject: [PATCH 01/17] ENH: add xarray-native open_datatree with engine= parameter Implement open_datatree/open_groups_as_dict on three prototype backends (ODIM, CfRadial1, NEXRAD Level2) with supports_groups=True, enabling xr.open_datatree(file, engine="odim") and a unified xd.open_datatree() entry point. Deprecate standalone open_*_datatree functions with FutureWarning. Closes #329 Co-Authored-By: Claude Opus 4.6 --- examples/notebooks/Open-Datatree-Engine.ipynb | 420 ++++++++++++++++++ requirements.txt | 2 +- tests/io/test_backend_datatree.py | 280 ++++++++++++ xradar/__init__.py | 1 + xradar/io/__init__.py | 41 ++ xradar/io/backends/cfradial1.py | 119 +++-- xradar/io/backends/common.py | 51 +++ xradar/io/backends/nexrad_level2.py | 205 ++++----- xradar/io/backends/odim.py | 127 ++++-- 9 files changed, 1070 insertions(+), 176 deletions(-) create mode 100644 examples/notebooks/Open-Datatree-Engine.ipynb create mode 100644 tests/io/test_backend_datatree.py diff --git a/examples/notebooks/Open-Datatree-Engine.ipynb b/examples/notebooks/Open-Datatree-Engine.ipynb new file mode 100644 index 00000000..826e4c2c --- /dev/null +++ b/examples/notebooks/Open-Datatree-Engine.ipynb @@ -0,0 +1,420 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "# Open DataTree with `engine=` parameter\n", + "\n", + "This notebook demonstrates the new unified `open_datatree` API that allows opening radar files as `xarray.DataTree` using the `engine=` parameter.\n", + "\n", + "Three ways to open a DataTree:\n", + "- `xd.open_datatree(file, engine=\"...\")` — xradar unified API\n", + "- `xr.open_datatree(file, engine=\"...\")` — xarray native API\n", + "- `xd.io.open_*_datatree(file)` — legacy per-format functions (deprecated, emit `FutureWarning`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "import warnings\n", + "\n", + "import xarray as xr\n", + "from open_radar_data import DATASETS\n", + "\n", + "import xradar as xd" + ] + }, + { + "cell_type": "markdown", + "id": "2", + "metadata": {}, + "source": [ + "## Download test data\n", + "\n", + "Fetching radar data files from [open-radar-data](https://github.com/openradar/open-radar-data) repository." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "odim_file = DATASETS.fetch(\"71_20181220_060628.pvol.h5\")\n", + "cfradial1_file = DATASETS.fetch(\"cfrad.20080604_002217_000_SPOL_v36_SUR.nc\")\n", + "nexrad_file = DATASETS.fetch(\"KATX20130717_195021_V06\")" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## 1. `xd.open_datatree()` — Unified xradar API\n", + "\n", + "The new unified entry point. Specify the `engine` to select the backend." + ] + }, + { + "cell_type": "markdown", + "id": "5", + "metadata": {}, + "source": [ + "### ODIM_H5" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [ + "dtree = xd.open_datatree(odim_file, engine=\"odim\")\n", + "display(dtree)" + ] + }, + { + "cell_type": "markdown", + "id": "7", + "metadata": {}, + "source": [ + "The tree follows the CfRadial2 group structure with metadata groups at the root level and sweep groups below." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8", + "metadata": {}, + "outputs": [], + "source": [ + "# Root dataset contains global metadata\n", + "display(dtree.ds)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "# Access a specific sweep\n", + "display(dtree[\"sweep_0\"].ds)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "10", + "metadata": {}, + "outputs": [], + "source": [ + "# Metadata groups\n", + "print(\"radar_parameters:\", list(dtree[\"radar_parameters\"].ds.data_vars))\n", + "print(\"georeferencing_correction:\", list(dtree[\"georeferencing_correction\"].ds.data_vars))\n", + "print(\"radar_calibration:\", list(dtree[\"radar_calibration\"].ds.data_vars))" + ] + }, + { + "cell_type": "markdown", + "id": "11", + "metadata": {}, + "source": [ + "### CfRadial1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12", + "metadata": {}, + "outputs": [], + "source": [ + "dtree = xd.open_datatree(cfradial1_file, engine=\"cfradial1\")\n", + "display(dtree)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "dtree[\"sweep_0\"].ds.DBZ.plot()" + ] + }, + { + "cell_type": "markdown", + "id": "14", + "metadata": {}, + "source": [ + "### NEXRAD Level 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15", + "metadata": {}, + "outputs": [], + "source": [ + "dtree = xd.open_datatree(nexrad_file, engine=\"nexradlevel2\")\n", + "display(dtree)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "16", + "metadata": {}, + "outputs": [], + "source": [ + "dtree[\"sweep_0\"].ds.DBZH.plot()" + ] + }, + { + "cell_type": "markdown", + "id": "17", + "metadata": {}, + "source": [ + "## 2. Sweep selection\n", + "\n", + "Select specific sweeps by index (int or list) or by name." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18", + "metadata": {}, + "outputs": [], + "source": [ + "# Single sweep by index\n", + "dtree = xd.open_datatree(odim_file, engine=\"odim\", sweep=0)\n", + "print(\"Children:\", list(dtree.children))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19", + "metadata": {}, + "outputs": [], + "source": [ + "# Multiple sweeps by index\n", + "dtree = xd.open_datatree(odim_file, engine=\"odim\", sweep=[0, 2, 4])\n", + "print(\"Children:\", list(dtree.children))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "# Sweeps by name\n", + "dtree = xd.open_datatree(cfradial1_file, engine=\"cfradial1\", sweep=[\"sweep_0\", \"sweep_3\"])\n", + "print(\"Children:\", list(dtree.children))" + ] + }, + { + "cell_type": "markdown", + "id": "21", + "metadata": {}, + "source": [ + "## 3. Backend kwargs\n", + "\n", + "Pass backend-specific options directly as keyword arguments." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "# first_dim controls the leading dimension (\"auto\" uses azimuth/elevation)\n", + "# site_coords attaches latitude/longitude/altitude to sweep datasets\n", + "dtree = xd.open_datatree(\n", + " odim_file,\n", + " engine=\"odim\",\n", + " sweep=[0],\n", + " first_dim=\"auto\",\n", + " site_coords=True,\n", + ")\n", + "sweep_ds = dtree[\"sweep_0\"].ds\n", + "print(\"Dimensions:\", dict(sweep_ds.dims))\n", + "print(\"Site coords present:\", \"latitude\" in sweep_ds.coords)" + ] + }, + { + "cell_type": "markdown", + "id": "23", + "metadata": {}, + "source": [ + "## 4. `xr.open_datatree()` — xarray native API\n", + "\n", + "The same backends work directly with xarray's native `open_datatree`, no xradar wrapper needed." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24", + "metadata": {}, + "outputs": [], + "source": [ + "dtree = xr.open_datatree(odim_file, engine=\"odim\")\n", + "display(dtree)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25", + "metadata": {}, + "outputs": [], + "source": [ + "dtree = xr.open_datatree(nexrad_file, engine=\"nexradlevel2\", sweep=[0, 1])\n", + "display(dtree)" + ] + }, + { + "cell_type": "markdown", + "id": "26", + "metadata": {}, + "source": [ + "## 5. `open_groups_as_dict()` — Low-level access\n", + "\n", + "For advanced use, get the raw `dict[str, Dataset]` before it becomes a DataTree." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27", + "metadata": {}, + "outputs": [], + "source": [ + "from xradar.io.backends.odim import OdimBackendEntrypoint\n", + "\n", + "backend = OdimBackendEntrypoint()\n", + "groups = backend.open_groups_as_dict(odim_file, sweep=[0, 1])\n", + "\n", + "print(\"Group keys:\", list(groups.keys()))\n", + "print()\n", + "print(\"Root dataset:\")\n", + "display(groups[\"/\"])" + ] + }, + { + "cell_type": "markdown", + "id": "28", + "metadata": {}, + "source": [ + "## 6. Backward compatibility — deprecated functions\n", + "\n", + "The legacy per-format functions still work but emit a `FutureWarning` directing you to the new API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "29", + "metadata": {}, + "outputs": [], + "source": [ + "with warnings.catch_warnings(record=True) as w:\n", + " warnings.simplefilter(\"always\")\n", + " dtree_old = xd.io.open_odim_datatree(odim_file, sweep=[0])\n", + " for warning in w:\n", + " if issubclass(warning.category, FutureWarning):\n", + " print(f\"FutureWarning: {warning.message}\")\n", + "\n", + "display(dtree_old)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30", + "metadata": {}, + "outputs": [], + "source": [ + "# The old and new APIs produce equivalent results\n", + "dtree_new = xd.open_datatree(odim_file, engine=\"odim\", sweep=[0])\n", + "print(\"Same children:\", set(dtree_old.children) == set(dtree_new.children))" + ] + }, + { + "cell_type": "markdown", + "id": "31", + "metadata": {}, + "source": [ + "## 7. Error handling" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "32", + "metadata": {}, + "outputs": [], + "source": [ + "# Unknown engine raises a clear error\n", + "try:\n", + " xd.open_datatree(odim_file, engine=\"nonexistent\")\n", + "except ValueError as e:\n", + " print(f\"ValueError: {e}\")" + ] + }, + { + "cell_type": "markdown", + "id": "33", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "| API | Example | Status |\n", + "|-----|---------|--------|\n", + "| `xd.open_datatree(file, engine=\"odim\")` | Unified xradar API | **New** |\n", + "| `xr.open_datatree(file, engine=\"odim\")` | xarray native API | **New** |\n", + "| `xd.io.open_odim_datatree(file)` | Per-format function | Deprecated |\n", + "\n", + "Supported engines: `\"odim\"`, `\"cfradial1\"`, `\"nexradlevel2\"`" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbformat_minor": 5, + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt index d18d8ce2..0a629ae3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,7 +3,7 @@ dask h5netcdf >= 1.0.0 h5py >= 3.0.0 lat_lon_parser -netCDF4 +netCDF4 >= 1.5.0, != 1.7.3, != 1.7.4 numpy pyproj scipy diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py new file mode 100644 index 00000000..1f3f0a09 --- /dev/null +++ b/tests/io/test_backend_datatree.py @@ -0,0 +1,280 @@ +#!/usr/bin/env python +# Copyright (c) 2024-2025, openradar developers. +# Distributed under the MIT License. See LICENSE for more info. + +""" +Tests for xarray-native open_datatree with engine= parameter. + +Tests the unified ``xd.open_datatree()`` and ``xr.open_datatree()`` APIs, +``open_groups_as_dict()`` direct calls, backward compatibility with +deprecated standalone functions, and ``supports_groups`` attribute. +""" + +import warnings + +import pytest +import xarray as xr +from xarray import DataTree + +import xradar as xd +from xradar.io.backends.cfradial1 import ( + CfRadial1BackendEntrypoint, + open_cfradial1_datatree, +) +from xradar.io.backends.nexrad_level2 import ( + NexradLevel2BackendEntrypoint, + open_nexradlevel2_datatree, +) +from xradar.io.backends.odim import OdimBackendEntrypoint, open_odim_datatree + +# -- Fixtures ---------------------------------------------------------------- + + +@pytest.fixture( + params=[ + pytest.param("odim", id="odim"), + pytest.param("nexradlevel2", id="nexradlevel2"), + ] +) +def engine_and_file(request, odim_file, nexradlevel2_file): + """Parametrize over engines that do not require netCDF4.""" + mapping = { + "odim": odim_file, + "nexradlevel2": nexradlevel2_file, + } + return request.param, mapping[request.param] + + +@pytest.fixture +def cfradial1_engine_file(cfradial1_file): + return "cfradial1", cfradial1_file + + +# -- CfRadial2 structure keys ----------------------------------------------- + +REQUIRED_GROUPS = { + "/", + "/radar_parameters", + "/georeferencing_correction", + "/radar_calibration", +} + + +# -- Helper ------------------------------------------------------------------ + + +def _assert_cfradial2_structure(dtree): + """Verify that a DataTree has CfRadial2 group structure.""" + assert isinstance(dtree, DataTree) + children = set(dtree.children.keys()) + # Must have metadata groups + for grp in ["radar_parameters", "georeferencing_correction", "radar_calibration"]: + assert grp in children, f"Missing group: {grp}" + # Must have at least one sweep + sweep_groups = [k for k in children if k.startswith("sweep_")] + assert len(sweep_groups) > 0, "No sweep groups found" + # Root must have key variables + root_vars = set(dtree.ds.data_vars) + assert "time_coverage_start" in root_vars + assert "time_coverage_end" in root_vars + + +# -- xd.open_datatree integration tests (ODIM, NEXRAD) ---------------------- + + +class TestXdOpenDatatree: + """Test xd.open_datatree() for ODIM and NEXRAD.""" + + def test_basic_open(self, engine_and_file): + engine, filepath = engine_and_file + dtree = xd.open_datatree(filepath, engine=engine) + _assert_cfradial2_structure(dtree) + + def test_sweep_selection_list(self, engine_and_file): + engine, filepath = engine_and_file + dtree = xd.open_datatree(filepath, engine=engine, sweep=[0, 1]) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == 2 + + def test_sweep_selection_int(self, engine_and_file): + engine, filepath = engine_and_file + dtree = xd.open_datatree(filepath, engine=engine, sweep=0) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == 1 + + def test_kwargs_flow_through(self, engine_and_file): + engine, filepath = engine_and_file + dtree = xd.open_datatree( + filepath, + engine=engine, + first_dim="auto", + site_coords=True, + sweep=[0], + ) + sweep_ds = dtree["sweep_0"].ds + assert "latitude" in sweep_ds.coords + assert "longitude" in sweep_ds.coords + assert "altitude" in sweep_ds.coords + + def test_unknown_engine_raises(self, odim_file): + with pytest.raises(ValueError, match="Unknown engine"): + xd.open_datatree(odim_file, engine="nonexistent_engine") + + +# -- xd.open_datatree for CfRadial1 ----------------------------------------- + + +class TestXdOpenDatatreeCfRadial1: + """Test xd.open_datatree() for CfRadial1 (requires h5netcdf in this env).""" + + def test_basic_open(self, cfradial1_engine_file): + _, filepath = cfradial1_engine_file + backend = CfRadial1BackendEntrypoint() + dtree = backend.open_datatree( + filepath, engine="h5netcdf", decode_timedelta=False + ) + _assert_cfradial2_structure(dtree) + + def test_sweep_selection(self, cfradial1_engine_file): + _, filepath = cfradial1_engine_file + backend = CfRadial1BackendEntrypoint() + dtree = backend.open_datatree( + filepath, + engine="h5netcdf", + decode_timedelta=False, + sweep=[0, 1], + ) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == 2 + + +# -- xr.open_datatree tests ------------------------------------------------- + + +class TestXrOpenDatatree: + """Test xr.open_datatree() with xradar engines.""" + + def test_xr_open_datatree_odim(self, odim_file): + dtree = xr.open_datatree(odim_file, engine="odim") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_nexrad(self, nexradlevel2_file): + dtree = xr.open_datatree(nexradlevel2_file, engine="nexradlevel2") + _assert_cfradial2_structure(dtree) + + +# -- open_groups_as_dict direct tests ---------------------------------------- + + +class TestOpenGroupsAsDict: + """Test open_groups_as_dict() returns correct dict structure.""" + + def test_odim_groups_dict(self, odim_file): + backend = OdimBackendEntrypoint() + groups = backend.open_groups_as_dict(odim_file, sweep=[0, 1]) + assert isinstance(groups, dict) + assert "/" in groups + assert "/radar_parameters" in groups + assert "/georeferencing_correction" in groups + assert "/radar_calibration" in groups + assert "/sweep_0" in groups + assert "/sweep_1" in groups + for key, ds in groups.items(): + assert isinstance(ds, xr.Dataset), f"{key} is not a Dataset" + + def test_nexrad_groups_dict(self, nexradlevel2_file): + backend = NexradLevel2BackendEntrypoint() + groups = backend.open_groups_as_dict(nexradlevel2_file, sweep=[0, 1]) + assert isinstance(groups, dict) + assert "/" in groups + assert "/sweep_0" in groups + assert "/sweep_1" in groups + + def test_cfradial1_groups_dict(self, cfradial1_file): + backend = CfRadial1BackendEntrypoint() + groups = backend.open_groups_as_dict( + cfradial1_file, + engine="h5netcdf", + decode_timedelta=False, + sweep=[0, 1], + ) + assert isinstance(groups, dict) + assert "/" in groups + assert "/sweep_0" in groups + assert "/sweep_1" in groups + + +# -- supports_groups attribute ----------------------------------------------- + + +class TestSupportsGroups: + """Verify supports_groups is True on all 3 backend classes.""" + + def test_odim_supports_groups(self): + assert OdimBackendEntrypoint.supports_groups is True + + def test_cfradial1_supports_groups(self): + assert CfRadial1BackendEntrypoint.supports_groups is True + + def test_nexrad_supports_groups(self): + assert NexradLevel2BackendEntrypoint.supports_groups is True + + +# -- Backward compatibility & deprecation tests ------------------------------ + + +class TestDeprecation: + """Test that standalone functions still work but emit FutureWarning.""" + + def test_open_odim_datatree_deprecation(self, odim_file): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + dtree = open_odim_datatree(odim_file, sweep=[0]) + deprecation_warnings = [ + x for x in w if issubclass(x.category, FutureWarning) + ] + assert len(deprecation_warnings) >= 1 + assert "open_odim_datatree" in str(deprecation_warnings[0].message) + _assert_cfradial2_structure(dtree) + + def test_open_cfradial1_datatree_deprecation(self, cfradial1_file): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + dtree = open_cfradial1_datatree( + cfradial1_file, + engine="h5netcdf", + decode_timedelta=False, + sweep=[0], + ) + deprecation_warnings = [ + x for x in w if issubclass(x.category, FutureWarning) + ] + assert len(deprecation_warnings) >= 1 + assert "open_cfradial1_datatree" in str(deprecation_warnings[0].message) + _assert_cfradial2_structure(dtree) + + def test_open_nexradlevel2_datatree_deprecation(self, nexradlevel2_file): + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + dtree = open_nexradlevel2_datatree(nexradlevel2_file, sweep=[0]) + deprecation_warnings = [ + x for x in w if issubclass(x.category, FutureWarning) + ] + assert len(deprecation_warnings) >= 1 + assert "open_nexradlevel2_datatree" in str(deprecation_warnings[0].message) + _assert_cfradial2_structure(dtree) + + def test_odim_deprecated_output_matches_new_api(self, odim_file): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + old = open_odim_datatree(odim_file, sweep=[0, 1]) + new = xd.open_datatree(odim_file, engine="odim", sweep=[0, 1]) + # Same number of children + assert set(old.children.keys()) == set(new.children.keys()) + + def test_nexrad_deprecated_output_matches_new_api(self, nexradlevel2_file): + with warnings.catch_warnings(): + warnings.simplefilter("ignore", FutureWarning) + old = open_nexradlevel2_datatree(nexradlevel2_file, sweep=[0, 1]) + new = xd.open_datatree(nexradlevel2_file, engine="nexradlevel2", sweep=[0, 1]) + assert set(old.children.keys()) == set(new.children.keys()) diff --git a/xradar/__init__.py b/xradar/__init__.py index 4a990dca..b6ce6c7f 100644 --- a/xradar/__init__.py +++ b/xradar/__init__.py @@ -29,5 +29,6 @@ from . import util # noqa from .util import map_over_sweeps # noqa from . import transform # noqa +from .io import open_datatree # noqa __all__ = [s for s in dir() if not s.startswith("_")] diff --git a/xradar/io/__init__.py b/xradar/io/__init__.py index 3693e99e..3a3201c0 100644 --- a/xradar/io/__init__.py +++ b/xradar/io/__init__.py @@ -17,4 +17,45 @@ from .backends import * # noqa from .export import * # noqa +from .backends.cfradial1 import CfRadial1BackendEntrypoint +from .backends.nexrad_level2 import NexradLevel2BackendEntrypoint +from .backends.odim import OdimBackendEntrypoint + +#: Registry mapping engine names to backend classes that support groups. +_ENGINE_REGISTRY = { + "odim": OdimBackendEntrypoint, + "cfradial1": CfRadial1BackendEntrypoint, + "nexradlevel2": NexradLevel2BackendEntrypoint, +} + + +def open_datatree(filename_or_obj, *, engine, **kwargs): + """Open a radar file as :py:class:`xarray.DataTree` using the specified engine. + + Parameters + ---------- + filename_or_obj : str, Path, or file-like + Path to the radar file. + engine : str + Backend engine name (e.g., ``"odim"``, ``"cfradial1"``, ``"nexradlevel2"``). + **kwargs + Additional keyword arguments passed to the backend's ``open_datatree`` method. + + Returns + ------- + dtree : xarray.DataTree + DataTree with CfRadial2 group structure. + + Examples + -------- + >>> import xradar as xd + >>> dtree = xd.open_datatree("file.h5", engine="odim") + """ + if engine not in _ENGINE_REGISTRY: + supported = ", ".join(sorted(_ENGINE_REGISTRY)) + raise ValueError(f"Unknown engine {engine!r}. Supported engines: {supported}") + backend = _ENGINE_REGISTRY[engine]() + return backend.open_datatree(filename_or_obj, **kwargs) + + __all__ = [s for s in dir() if not s.startswith("_")] diff --git a/xradar/io/backends/cfradial1.py b/xradar/io/backends/cfradial1.py index 81b8760d..1c48e31f 100644 --- a/xradar/io/backends/cfradial1.py +++ b/xradar/io/backends/cfradial1.py @@ -53,6 +53,7 @@ _STATION_VARS, _apply_site_as_coords, _attach_sweep_groups, + _deprecation_warning, _maybe_decode, ) @@ -337,6 +338,10 @@ def _get_radar_calibration(ds): def open_cfradial1_datatree(filename_or_obj, **kwargs): """Open CfRadial1 dataset as :py:class:`xarray.DataTree`. + .. deprecated:: + Use ``xd.open_datatree(file, engine="cfradial1")`` or + ``xr.open_datatree(file, engine="cfradial1")`` instead. + Parameters ---------- filename_or_obj : str, Path, file-like or xarray.DataStore @@ -369,47 +374,28 @@ def open_cfradial1_datatree(filename_or_obj, **kwargs): dtree: xarray.DataTree DataTree with CfRadial2 groups. """ + _deprecation_warning("open_cfradial1_datatree", "cfradial1") - # handle kwargs, extract first_dim + # Bridge old kwargs to direct kwargs first_dim = kwargs.pop("first_dim", "auto") optional = kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) kwargs.pop("site_as_coords", None) sweep = kwargs.pop("sweep", None) engine = kwargs.pop("engine", "netcdf4") - # needed for new xarray literal timedelta decoding - kwargs.update(decode_timedelta=kwargs.pop("decode_timedelta", False)) - - # open root group, cfradial1 only has one group - # open_cfradial1_datatree only opens the file once using netcdf4 - # and retrieves the different groups from the loaded object - ds = open_dataset(filename_or_obj, engine=engine, **kwargs) + kwargs.setdefault("decode_timedelta", False) - # create datatree root node additional root metadata groups - dtree: dict = { - "/": _get_required_root_dataset(ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration(ds) - - # radar_calibration (connected with calib-dimension) - dtree = _attach_sweep_groups( - dtree, - list( - _get_sweep_groups( - ds, - sweep=sweep, - first_dim=first_dim, - optional=optional, - site_as_coords=False, - ).values() - ), + _deprecation_warning("open_cfradial1_datatree", "cfradial1") + return CfRadial1BackendEntrypoint().open_datatree( + filename_or_obj, + first_dim=first_dim, + optional=optional, + optional_groups=optional_groups, + site_coords=site_coords, + sweep=sweep, + engine=engine, + **kwargs, ) - return DataTree.from_dict(dtree) class CfRadial1BackendEntrypoint(BackendEntrypoint): @@ -434,6 +420,7 @@ class CfRadial1BackendEntrypoint(BackendEntrypoint): description = "Open CfRadial1 (.nc, .nc4) using netCDF4 in Xarray" url = "https://xradar.rtfd.io/en/latest/io.html#cfradial1" + supports_groups = True def open_dataset( self, @@ -492,3 +479,71 @@ def open_dataset( ds._close = store.close return ds + + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=False, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + optional=True, + optional_groups=False, + sweep=None, + engine="netcdf4", + ): + # CfRadial1 opens the entire file once + ds = open_dataset( + filename_or_obj, + engine=engine, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) + + groups_dict = { + "/": _get_required_root_dataset(ds, optional=optional), + } + if optional_groups: + groups_dict["/radar_parameters"] = _get_subgroup( + ds, radar_parameters_subgroup + ) + groups_dict["/georeferencing_correction"] = _get_subgroup( + ds, georeferencing_correction_subgroup + ) + groups_dict["/radar_calibration"] = _get_radar_calibration(ds) + + sweep_datasets = list( + _get_sweep_groups( + ds, + sweep=sweep, + first_dim=first_dim, + optional=optional, + site_coords=site_coords, + ).values() + ) + + for i, sw_ds in enumerate(sweep_datasets): + groups_dict[f"/sweep_{i}"] = sw_ds.drop_attrs(deep=False) + + return groups_dict + + def open_datatree( + self, + filename_or_obj, + **kwargs, + ): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) diff --git a/xradar/io/backends/common.py b/xradar/io/backends/common.py index 8b3f4317..a18d1a5d 100644 --- a/xradar/io/backends/common.py +++ b/xradar/io/backends/common.py @@ -14,6 +14,7 @@ import io import struct +import warnings from collections import OrderedDict import h5netcdf @@ -21,8 +22,11 @@ import xarray as xr from ...model import ( + georeferencing_correction_subgroup, optional_root_attrs, optional_root_vars, + radar_calibration_subgroup, + radar_parameters_subgroup, required_global_attrs, required_root_vars, ) @@ -380,6 +384,53 @@ def _prepare_backend_ds(ds): return ds +def _build_groups_dict(ls_ds, optional=True, optional_groups=False): + """Build CfRadial2 groups dict from a list of sweep Datasets. + + Parameters + ---------- + ls_ds : list of xr.Dataset + List of sweep Datasets. + optional : bool + Import optional metadata, defaults to True. + optional_groups : bool + If True, includes ``/radar_parameters``, ``/georeferencing_correction`` + and ``/radar_calibration`` metadata subgroups. Default is False. + + Returns + ------- + groups_dict : dict[str, xr.Dataset] + Dictionary with CfRadial2 group structure. + """ + groups_dict = { + "/": _get_required_root_dataset(ls_ds, optional=optional), + } + if optional_groups: + groups_dict["/radar_parameters"] = _get_subgroup( + ls_ds, radar_parameters_subgroup + ) + groups_dict["/georeferencing_correction"] = _get_subgroup( + ls_ds, georeferencing_correction_subgroup + ) + groups_dict["/radar_calibration"] = _get_radar_calibration( + ls_ds, radar_calibration_subgroup + ) + for i, ds in enumerate(ls_ds): + groups_dict[f"/sweep_{i}"] = ds.drop_attrs(deep=False) + return groups_dict + + +def _deprecation_warning(old_name, engine): + """Emit FutureWarning for deprecated standalone open_*_datatree functions.""" + warnings.warn( + f"`{old_name}` is deprecated. Use " + f'`xd.open_datatree(file, engine="{engine}")` or ' + f'`xr.open_datatree(file, engine="{engine}")` instead.', + FutureWarning, + stacklevel=3, + ) + + # IRIS Data Types and corresponding python struct format characters # 4.2 Scalar Definitions, Page 23 # https://docs.python.org/3/library/struct.html#format-characters diff --git a/xradar/io/backends/nexrad_level2.py b/xradar/io/backends/nexrad_level2.py index 7d27c7b6..f8b0e15f 100644 --- a/xradar/io/backends/nexrad_level2.py +++ b/xradar/io/backends/nexrad_level2.py @@ -57,6 +57,7 @@ from xradar.io.backends.common import ( _apply_site_as_coords, _assign_root, + _deprecation_warning, _get_radar_calibration, _get_subgroup, ) @@ -1893,6 +1894,7 @@ class NexradLevel2BackendEntrypoint(BackendEntrypoint): description = "Open NEXRAD Level2 files in Xarray" url = "tbd" + supports_groups = True def open_dataset( self, @@ -1964,6 +1966,102 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + sweep=None, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + optional=True, + optional_groups=False, + lock=None, + **kwargs, + ): + from xarray.core.treenode import NodePath + + if isinstance(sweep, str): + sweep = NodePath(sweep).name + sweeps = [sweep] + elif isinstance(sweep, int): + sweeps = [f"sweep_{sweep}"] + elif isinstance(sweep, list): + if isinstance(sweep[0], int): + sweeps = [f"sweep_{i}" for i in sweep] + elif isinstance(sweep[0], str): + sweeps = [NodePath(i).name for i in sweep] + else: + raise ValueError( + "Invalid type in 'sweep' list. Expected integers " + "(e.g., [0, 1, 2]) or strings " + "(e.g. [/sweep_0, sweep_1])." + ) + else: + with NEXRADLevel2File(filename_or_obj, loaddata=False) as nex: + if nex.msg_5: + exp_sweeps = nex.msg_5["number_elevation_cuts"] + else: + exp_sweeps = 0 + act_sweeps = len(nex.msg_31_data_header) + if exp_sweeps > act_sweeps: + exp_sweeps = act_sweeps + sweeps = [f"sweep_{i}" for i in range(act_sweeps)] + + sweep_dict = open_sweeps_as_dict( + filename_or_obj=filename_or_obj, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + sweeps=sweeps, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_coords=site_coords, + optional=optional, + lock=lock, + **kwargs, + ) + + ls_ds = [sweep_dict[s] for s in sweep_dict] + ls_ds_with_root = [xr.Dataset()] + list(ls_ds) + groups_dict = { + "/": _assign_root(ls_ds_with_root), + } + if optional_groups: + groups_dict["/radar_parameters"] = _get_subgroup( + ls_ds_with_root, radar_parameters_subgroup + ) + groups_dict["/georeferencing_correction"] = _get_subgroup( + ls_ds_with_root, georeferencing_correction_subgroup + ) + groups_dict["/radar_calibration"] = _get_radar_calibration( + ls_ds_with_root, radar_calibration_subgroup + ) + for sweep_path, ds in sweep_dict.items(): + groups_dict[f"/{sweep_path}"] = ds.drop_attrs(deep=False) + return groups_dict + + def open_datatree( + self, + filename_or_obj, + **kwargs, + ): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + def open_nexradlevel2_datatree( filename_or_obj, @@ -2074,84 +2172,10 @@ def open_nexradlevel2_datatree( dtree : xarray.DataTree An `xarray.DataTree` representing the radar data organized by sweeps. """ - from xarray.core.treenode import NodePath - - # Handle list/tuple of chunk files or bytes - if isinstance(filename_or_obj, (list, tuple)): - filename_or_obj = _concatenate_chunks(filename_or_obj) - # Validate that the concatenated data starts with a volume header. - # The first chunk must be the S file (volume scan start). - if not filename_or_obj[:4].startswith(_VOLUME_HEADER_PREFIX): - raise ValueError( - "No chunk contains a volume header (AR2V prefix). " - "The first chunk must be the S file (volume scan start) which " - "contains the volume header and metadata. I/E chunks alone " - "cannot be decoded without it." - ) + _deprecation_warning("open_nexradlevel2_datatree", "nexradlevel2") - # Single metadata read for sweep count, completeness, and elevation data - with NEXRADLevel2File(filename_or_obj, loaddata=False) as nex: - # Reading incomplete_sweeps also triggers data_header parsing, - # populating nex.data. Must run before sorted(nex.data) below. - incomplete = nex.incomplete_sweeps - # Use the sweep indices actually present, not range(len(...)): - # upstream-dropped interior sweeps leave sparse keys (e.g. - # [0..9, 11]) that would otherwise KeyError downstream. See #361. - present_keys = sorted(nex.data) - act_sweeps = len(present_keys) - if nex.msg_5: - exp_sweeps = nex.msg_5["number_elevation_cuts"] - elev_data = nex.msg_5.get("elevation_data", []) - else: - exp_sweeps = 0 - elev_data = [] - - if isinstance(sweep, str): - sweep = NodePath(sweep).name - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - elif isinstance(sweep[0], str): - sweeps = [NodePath(i).name for i in sweep] - else: - raise ValueError( - "Invalid type in 'sweep' list. Expected integers (e.g., [0, 1, 2]) or strings (e.g. [/sweep_0, sweep_1])." - ) - else: - # Check for AVSET mode: actual sweeps may be fewer than VCP definition - if exp_sweeps > act_sweeps: - exp_sweeps = act_sweeps - - if incomplete_sweep == "drop": - sweeps = [f"sweep_{i}" for i in present_keys if i not in incomplete] - if incomplete: - warnings.warn( - f"Dropped {len(incomplete)} incomplete sweep(s): " - f"{sorted(incomplete)}. Use incomplete_sweep='pad' to " - f"include them with NaN-filled rays.", - UserWarning, - stacklevel=2, - ) - if not sweeps: - warnings.warn( - "All sweeps are incomplete. Returning empty DataTree.", - UserWarning, - stacklevel=2, - ) - return DataTree() - elif incomplete_sweep == "pad": - sweeps = [f"sweep_{i}" for i in present_keys] - else: - raise ValueError( - f"Invalid incomplete_sweep={incomplete_sweep!r}. " - "Expected 'drop' or 'pad'." - ) - - sweep_dict = open_sweeps_as_dict( - filename_or_obj=filename_or_obj, + return NexradLevel2BackendEntrypoint().open_datatree( + filename_or_obj, mask_and_scale=mask_and_scale, decode_times=decode_times, concat_characters=concat_characters, @@ -2159,38 +2183,17 @@ def open_nexradlevel2_datatree( drop_variables=drop_variables, use_cftime=use_cftime, decode_timedelta=decode_timedelta, - sweeps=sweeps, + sweep=sweep, first_dim=first_dim, reindex_angle=reindex_angle, fix_second_angle=fix_second_angle, site_as_coords=False, optional=optional, - incomplete_sweeps=incomplete, + optional_groups=optional_groups, + incomplete_sweep=incomplete_sweep, lock=lock, **kwargs, ) - ls_ds: list[xr.Dataset] = [xr.Dataset()] + list(sweep_dict.values()) - root, ls_ds = _assign_root(ls_ds) - dtree: dict = {"/": root} - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - # Build from ls_ds (station vars already stripped by _assign_root). - dtree |= {key: ds.drop_attrs(deep=False) for key, ds in zip(sweep_dict, ls_ds[1:])} - result = DataTree.from_dict(dtree) - - # Inject per-sweep attrs from MSG_5_ELEV (ICD Table XI) - _assign_sweep_attrs(result, elev_data) - - # Actual sweeps recorded in the file (from MSG_31 headers, not user selection) - result.ds.attrs["actual_elevation_cuts"] = act_sweeps - - return result def open_sweeps_as_dict( diff --git a/xradar/io/backends/odim.py b/xradar/io/backends/odim.py index f0825ec2..ecbae20b 100644 --- a/xradar/io/backends/odim.py +++ b/xradar/io/backends/odim.py @@ -38,7 +38,6 @@ import h5netcdf import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import ( AbstractDataStore, @@ -55,7 +54,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -65,18 +63,15 @@ get_range_attrs, get_time_attrs, moment_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( _apply_site_as_coords, _attach_sweep_groups, + _build_groups_dict, + _deprecation_warning, _fix_angle, _get_h5group_names, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, _maybe_decode, _prepare_backend_ds, ) @@ -792,6 +787,7 @@ class OdimBackendEntrypoint(BackendEntrypoint): description = "Open ODIM_H5 (.h5, .hdf5) using h5netcdf in Xarray" url = "https://xradar.rtfd.io/en/latest/io.html#odim-h5" + supports_groups = True def open_dataset( self, @@ -878,10 +874,80 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + invalid_netcdf=None, + phony_dims="access", + decode_vlen_strings=True, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + if isinstance(sweep, str): + sweeps = [sweep] + elif isinstance(sweep, int): + sweeps = [f"sweep_{sweep}"] + elif isinstance(sweep, list): + if isinstance(sweep[0], int): + sweeps = [f"sweep_{i}" for i in sweep] + else: + sweeps = list(sweep) + else: + sweeps = _get_h5group_names(filename_or_obj, "odim") + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + format=format, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_coords=site_coords, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict(ls_ds, optional=optional, optional_groups=optional_groups) + + def open_datatree( + self, + filename_or_obj, + **kwargs, + ): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + def open_odim_datatree(filename_or_obj, **kwargs): """Open ODIM_H5 dataset as :py:class:`xarray.DataTree`. + .. deprecated:: + Use ``xd.open_datatree(file, engine="odim")`` or + ``xr.open_datatree(file, engine="odim")`` instead. + Parameters ---------- filename_or_obj : str, Path, file-like or DataStore @@ -912,42 +978,19 @@ def open_odim_datatree(filename_or_obj, **kwargs): dtree: xarray.DataTree DataTree """ - # handle kwargs, extract first_dim + _deprecation_warning("open_odim_datatree", "odim") + + # Bridge old backend_kwargs to direct kwargs backend_kwargs = kwargs.pop("backend_kwargs", {}) optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i+1}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_h5group_names(filename_or_obj, "odim") - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="odim", **kw) - for swp in sweeps - ] - # todo: apply CfRadial2 group structure below - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + + _deprecation_warning("open_odim_datatree", "odim") + return OdimBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) From 51116af46fea8c58eae44ab762c6f81e13675ad3 Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 26 Feb 2026 18:18:32 -0500 Subject: [PATCH 02/17] FIX: integrate optional_groups parameter and fix read-only array bug Resolve rebase conflicts with #333 (station coords + optional_groups): - Thread optional_groups parameter through open_groups_as_dict and _build_groups_dict for all three backends (ODIM, CfRadial1, NEXRAD) - Update test assertions to match optional_groups=False default - Fix read-only array bug in util._ipol_time (use .values.copy()) --- tests/io/test_backend_datatree.py | 17 ++++++++++++----- xradar/io/backends/odim.py | 4 +++- xradar/util.py | 2 +- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index 1f3f0a09..2cfee338 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -63,13 +63,18 @@ def cfradial1_engine_file(cfradial1_file): # -- Helper ------------------------------------------------------------------ -def _assert_cfradial2_structure(dtree): +def _assert_cfradial2_structure(dtree, optional_groups=False): """Verify that a DataTree has CfRadial2 group structure.""" assert isinstance(dtree, DataTree) children = set(dtree.children.keys()) - # Must have metadata groups - for grp in ["radar_parameters", "georeferencing_correction", "radar_calibration"]: - assert grp in children, f"Missing group: {grp}" + # Metadata groups only present when optional_groups=True + if optional_groups: + for grp in [ + "radar_parameters", + "georeferencing_correction", + "radar_calibration", + ]: + assert grp in children, f"Missing group: {grp}" # Must have at least one sweep sweep_groups = [k for k in children if k.startswith("sweep_")] assert len(sweep_groups) > 0, "No sweep groups found" @@ -171,7 +176,9 @@ class TestOpenGroupsAsDict: def test_odim_groups_dict(self, odim_file): backend = OdimBackendEntrypoint() - groups = backend.open_groups_as_dict(odim_file, sweep=[0, 1]) + groups = backend.open_groups_as_dict( + odim_file, sweep=[0, 1], optional_groups=True + ) assert isinstance(groups, dict) assert "/" in groups assert "/radar_parameters" in groups diff --git a/xradar/io/backends/odim.py b/xradar/io/backends/odim.py index ecbae20b..93ca8867 100644 --- a/xradar/io/backends/odim.py +++ b/xradar/io/backends/odim.py @@ -930,7 +930,9 @@ def open_groups_as_dict( ls_ds = [ self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps ] - return _build_groups_dict(ls_ds, optional=optional, optional_groups=optional_groups) + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) def open_datatree( self, diff --git a/xradar/util.py b/xradar/util.py index 93e508f5..bd30dbf9 100644 --- a/xradar/util.py +++ b/xradar/util.py @@ -380,7 +380,7 @@ def _ipol_time(da, dim0, a1gate=0, direction=1): sidx = da_sel[dim0].argsort() # special handling for wrap-around angles - angles = da_sel[dim0] + angles = da_sel[dim0].values.copy() # a1gate should normally only be set for PPI, if a1gate > 0: # create a boolean mask for the last a1gate indices From 9ac5f1418d9640c1ce0eb81c97a13125ff23963a Mon Sep 17 00:00:00 2001 From: aladinor Date: Thu, 26 Feb 2026 18:19:45 -0500 Subject: [PATCH 03/17] STY: apply black formatting to notebook --- examples/notebooks/Open-Datatree-Engine.ipynb | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/examples/notebooks/Open-Datatree-Engine.ipynb b/examples/notebooks/Open-Datatree-Engine.ipynb index 826e4c2c..3dffffa1 100644 --- a/examples/notebooks/Open-Datatree-Engine.ipynb +++ b/examples/notebooks/Open-Datatree-Engine.ipynb @@ -120,7 +120,9 @@ "source": [ "# Metadata groups\n", "print(\"radar_parameters:\", list(dtree[\"radar_parameters\"].ds.data_vars))\n", - "print(\"georeferencing_correction:\", list(dtree[\"georeferencing_correction\"].ds.data_vars))\n", + "print(\n", + " \"georeferencing_correction:\", list(dtree[\"georeferencing_correction\"].ds.data_vars)\n", + ")\n", "print(\"radar_calibration:\", list(dtree[\"radar_calibration\"].ds.data_vars))" ] }, @@ -224,7 +226,9 @@ "outputs": [], "source": [ "# Sweeps by name\n", - "dtree = xd.open_datatree(cfradial1_file, engine=\"cfradial1\", sweep=[\"sweep_0\", \"sweep_3\"])\n", + "dtree = xd.open_datatree(\n", + " cfradial1_file, engine=\"cfradial1\", sweep=[\"sweep_0\", \"sweep_3\"]\n", + ")\n", "print(\"Children:\", list(dtree.children))" ] }, From 77ce5a734c8fe601bf1d1a9e7ecc311172796b4c Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 30 Mar 2026 19:33:16 -0500 Subject: [PATCH 04/17] FIX: resolve rebase conflicts and fix critical bugs in open_datatree MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rebase fixes: - Resolve conflicts in common.py, odim.py, cfradial1.py, nexrad_level2.py - Keep _prepare_backend_ds from PR #345 alongside _build_groups_dict - Remove unused _attach_sweep_groups imports Bug fixes: - Fix site_coords → site_as_coords parameter name in ODIM and NEXRAD - Fix _assign_root tuple unpacking in NEXRAD open_groups_as_dict - Add incomplete_sweep drop/pad/chunk logic to NEXRAD open_groups_as_dict - Fix site_coords undefined in open_cfradial1_datatree deprecation wrapper - Remove duplicate _deprecation_warning call in open_odim_datatree - Guard against empty sweep list (sweep=[]) with ValueError - Remove unused exp_sweeps variable New tests (30 total, up from 24): - test_site_coords_false: verifies kwarg flows through correctly - test_xr_open_datatree_cfradial1: was missing - test_nexrad_groups_dict_optional_groups: was missing - test_nexrad_empty_sweep_list_raises: edge case - TestEngineRegistry: verifies registry completeness - Deprecation warning count changed from >= 1 to == 1 --- tests/io/test_backend_datatree.py | 88 ++++++++++++++++------------- xradar/io/backends/cfradial1.py | 6 +- xradar/io/backends/nexrad_level2.py | 64 +++++++++++++++++---- xradar/io/backends/odim.py | 4 +- 4 files changed, 104 insertions(+), 58 deletions(-) diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index 2cfee338..b3393100 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -50,16 +50,6 @@ def cfradial1_engine_file(cfradial1_file): return "cfradial1", cfradial1_file -# -- CfRadial2 structure keys ----------------------------------------------- - -REQUIRED_GROUPS = { - "/", - "/radar_parameters", - "/georeferencing_correction", - "/radar_calibration", -} - - # -- Helper ------------------------------------------------------------------ @@ -67,7 +57,6 @@ def _assert_cfradial2_structure(dtree, optional_groups=False): """Verify that a DataTree has CfRadial2 group structure.""" assert isinstance(dtree, DataTree) children = set(dtree.children.keys()) - # Metadata groups only present when optional_groups=True if optional_groups: for grp in [ "radar_parameters", @@ -75,10 +64,8 @@ def _assert_cfradial2_structure(dtree, optional_groups=False): "radar_calibration", ]: assert grp in children, f"Missing group: {grp}" - # Must have at least one sweep sweep_groups = [k for k in children if k.startswith("sweep_")] assert len(sweep_groups) > 0, "No sweep groups found" - # Root must have key variables root_vars = set(dtree.ds.data_vars) assert "time_coverage_start" in root_vars assert "time_coverage_end" in root_vars @@ -110,16 +97,18 @@ def test_sweep_selection_int(self, engine_and_file): def test_kwargs_flow_through(self, engine_and_file): engine, filepath = engine_and_file dtree = xd.open_datatree( - filepath, - engine=engine, - first_dim="auto", - site_coords=True, - sweep=[0], + filepath, engine=engine, first_dim="auto", site_coords=True, sweep=[0] ) sweep_ds = dtree["sweep_0"].ds assert "latitude" in sweep_ds.coords - assert "longitude" in sweep_ds.coords - assert "altitude" in sweep_ds.coords + + def test_site_coords_false(self, engine_and_file): + """site_coords=False should demote station vars from coords.""" + engine, filepath = engine_and_file + dtree = xd.open_datatree(filepath, engine=engine, site_coords=False, sweep=[0]) + sweep_ds = dtree["sweep_0"].to_dataset(inherit=False) + # Station vars should not be in sweep coords when site_coords=False + assert "latitude" not in sweep_ds.coords def test_unknown_engine_raises(self, odim_file): with pytest.raises(ValueError, match="Unknown engine"): @@ -130,7 +119,7 @@ def test_unknown_engine_raises(self, odim_file): class TestXdOpenDatatreeCfRadial1: - """Test xd.open_datatree() for CfRadial1 (requires h5netcdf in this env).""" + """Test xd.open_datatree() for CfRadial1.""" def test_basic_open(self, cfradial1_engine_file): _, filepath = cfradial1_engine_file @@ -144,10 +133,7 @@ def test_sweep_selection(self, cfradial1_engine_file): _, filepath = cfradial1_engine_file backend = CfRadial1BackendEntrypoint() dtree = backend.open_datatree( - filepath, - engine="h5netcdf", - decode_timedelta=False, - sweep=[0, 1], + filepath, engine="h5netcdf", decode_timedelta=False, sweep=[0, 1] ) sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] assert len(sweep_groups) == 2 @@ -167,6 +153,12 @@ def test_xr_open_datatree_nexrad(self, nexradlevel2_file): dtree = xr.open_datatree(nexradlevel2_file, engine="nexradlevel2") _assert_cfradial2_structure(dtree) + def test_xr_open_datatree_cfradial1(self, cfradial1_file): + dtree = xr.open_datatree( + cfradial1_file, engine="cfradial1", decode_timedelta=False + ) + _assert_cfradial2_structure(dtree) + # -- open_groups_as_dict direct tests ---------------------------------------- @@ -186,8 +178,6 @@ def test_odim_groups_dict(self, odim_file): assert "/radar_calibration" in groups assert "/sweep_0" in groups assert "/sweep_1" in groups - for key, ds in groups.items(): - assert isinstance(ds, xr.Dataset), f"{key} is not a Dataset" def test_nexrad_groups_dict(self, nexradlevel2_file): backend = NexradLevel2BackendEntrypoint() @@ -197,19 +187,30 @@ def test_nexrad_groups_dict(self, nexradlevel2_file): assert "/sweep_0" in groups assert "/sweep_1" in groups + def test_nexrad_groups_dict_optional_groups(self, nexradlevel2_file): + backend = NexradLevel2BackendEntrypoint() + groups = backend.open_groups_as_dict( + nexradlevel2_file, sweep=[0], optional_groups=True + ) + assert "/radar_parameters" in groups + assert "/georeferencing_correction" in groups + assert "/radar_calibration" in groups + def test_cfradial1_groups_dict(self, cfradial1_file): backend = CfRadial1BackendEntrypoint() groups = backend.open_groups_as_dict( - cfradial1_file, - engine="h5netcdf", - decode_timedelta=False, - sweep=[0, 1], + cfradial1_file, engine="h5netcdf", decode_timedelta=False, sweep=[0, 1] ) assert isinstance(groups, dict) assert "/" in groups assert "/sweep_0" in groups assert "/sweep_1" in groups + def test_nexrad_empty_sweep_list_raises(self, nexradlevel2_file): + backend = NexradLevel2BackendEntrypoint() + with pytest.raises(ValueError, match="sweep list is empty"): + backend.open_groups_as_dict(nexradlevel2_file, sweep=[]) + # -- supports_groups attribute ----------------------------------------------- @@ -227,6 +228,19 @@ def test_nexrad_supports_groups(self): assert NexradLevel2BackendEntrypoint.supports_groups is True +# -- Engine registry --------------------------------------------------------- + + +class TestEngineRegistry: + """Verify _ENGINE_REGISTRY contains all expected engines.""" + + def test_registry_contains_expected_engines(self): + from xradar.io import _ENGINE_REGISTRY + + expected = {"odim", "cfradial1", "nexradlevel2"} + assert expected.issubset(set(_ENGINE_REGISTRY.keys())) + + # -- Backward compatibility & deprecation tests ------------------------------ @@ -240,7 +254,7 @@ def test_open_odim_datatree_deprecation(self, odim_file): deprecation_warnings = [ x for x in w if issubclass(x.category, FutureWarning) ] - assert len(deprecation_warnings) >= 1 + assert len(deprecation_warnings) == 1 assert "open_odim_datatree" in str(deprecation_warnings[0].message) _assert_cfradial2_structure(dtree) @@ -248,15 +262,12 @@ def test_open_cfradial1_datatree_deprecation(self, cfradial1_file): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") dtree = open_cfradial1_datatree( - cfradial1_file, - engine="h5netcdf", - decode_timedelta=False, - sweep=[0], + cfradial1_file, engine="h5netcdf", decode_timedelta=False, sweep=[0] ) deprecation_warnings = [ x for x in w if issubclass(x.category, FutureWarning) ] - assert len(deprecation_warnings) >= 1 + assert len(deprecation_warnings) == 1 assert "open_cfradial1_datatree" in str(deprecation_warnings[0].message) _assert_cfradial2_structure(dtree) @@ -267,7 +278,7 @@ def test_open_nexradlevel2_datatree_deprecation(self, nexradlevel2_file): deprecation_warnings = [ x for x in w if issubclass(x.category, FutureWarning) ] - assert len(deprecation_warnings) >= 1 + assert len(deprecation_warnings) == 1 assert "open_nexradlevel2_datatree" in str(deprecation_warnings[0].message) _assert_cfradial2_structure(dtree) @@ -276,7 +287,6 @@ def test_odim_deprecated_output_matches_new_api(self, odim_file): warnings.simplefilter("ignore", FutureWarning) old = open_odim_datatree(odim_file, sweep=[0, 1]) new = xd.open_datatree(odim_file, engine="odim", sweep=[0, 1]) - # Same number of children assert set(old.children.keys()) == set(new.children.keys()) def test_nexrad_deprecated_output_matches_new_api(self, nexradlevel2_file): diff --git a/xradar/io/backends/cfradial1.py b/xradar/io/backends/cfradial1.py index 1c48e31f..63a70b30 100644 --- a/xradar/io/backends/cfradial1.py +++ b/xradar/io/backends/cfradial1.py @@ -52,7 +52,6 @@ from .common import ( _STATION_VARS, _apply_site_as_coords, - _attach_sweep_groups, _deprecation_warning, _maybe_decode, ) @@ -380,12 +379,11 @@ def open_cfradial1_datatree(filename_or_obj, **kwargs): first_dim = kwargs.pop("first_dim", "auto") optional = kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) - kwargs.pop("site_as_coords", None) + site_coords = kwargs.pop("site_as_coords", True) sweep = kwargs.pop("sweep", None) engine = kwargs.pop("engine", "netcdf4") kwargs.setdefault("decode_timedelta", False) - _deprecation_warning("open_cfradial1_datatree", "cfradial1") return CfRadial1BackendEntrypoint().open_datatree( filename_or_obj, first_dim=first_dim, @@ -531,7 +529,7 @@ def open_groups_as_dict( sweep=sweep, first_dim=first_dim, optional=optional, - site_coords=site_coords, + site_as_coords=site_coords, ).values() ) diff --git a/xradar/io/backends/nexrad_level2.py b/xradar/io/backends/nexrad_level2.py index f8b0e15f..1913fdad 100644 --- a/xradar/io/backends/nexrad_level2.py +++ b/xradar/io/backends/nexrad_level2.py @@ -1984,17 +1984,35 @@ def open_groups_as_dict( site_coords=True, optional=True, optional_groups=False, + incomplete_sweep="drop", lock=None, **kwargs, ): from xarray.core.treenode import NodePath + # Handle list/tuple of chunk files or bytes + if isinstance(filename_or_obj, (list, tuple)): + filename_or_obj = _concatenate_chunks(filename_or_obj) + if not filename_or_obj[:4].startswith(_VOLUME_HEADER_PREFIX): + raise ValueError( + "No chunk contains a volume header (AR2V prefix). " + "The first chunk must be the S file (volume scan start) " + "which contains the volume header and metadata." + ) + + # Single metadata read + with NEXRADLevel2File(filename_or_obj, loaddata=False) as nex: + act_sweeps = len(nex.msg_31_data_header) + incomplete = nex.incomplete_sweeps + if isinstance(sweep, str): sweep = NodePath(sweep).name sweeps = [sweep] elif isinstance(sweep, int): sweeps = [f"sweep_{sweep}"] elif isinstance(sweep, list): + if not sweep: + raise ValueError("sweep list is empty.") if isinstance(sweep[0], int): sweeps = [f"sweep_{i}" for i in sweep] elif isinstance(sweep[0], str): @@ -2006,15 +2024,35 @@ def open_groups_as_dict( "(e.g. [/sweep_0, sweep_1])." ) else: - with NEXRADLevel2File(filename_or_obj, loaddata=False) as nex: - if nex.msg_5: - exp_sweeps = nex.msg_5["number_elevation_cuts"] - else: - exp_sweeps = 0 - act_sweeps = len(nex.msg_31_data_header) - if exp_sweeps > act_sweeps: - exp_sweeps = act_sweeps - sweeps = [f"sweep_{i}" for i in range(act_sweeps)] + if incomplete_sweep == "drop": + sweeps = [ + f"sweep_{i}" for i in range(act_sweeps) if i not in incomplete + ] + if incomplete: + warnings.warn( + f"Dropped {len(incomplete)} incomplete sweep(s): " + f"{sorted(incomplete)}. Use incomplete_sweep='pad' " + f"to include them with NaN-filled rays.", + UserWarning, + stacklevel=2, + ) + if not sweeps: + warnings.warn( + "All sweeps are incomplete. Returning empty dict.", + UserWarning, + stacklevel=2, + ) + return {"/": xr.Dataset()} + elif incomplete_sweep == "pad": + sweeps = [f"sweep_{i}" for i in range(act_sweeps)] + else: + raise ValueError( + f"Invalid incomplete_sweep={incomplete_sweep!r}. " + "Expected 'drop' or 'pad'." + ) + + # For pad mode, pass incomplete set to open_sweeps_as_dict + incomplete_sweeps = incomplete if incomplete_sweep == "pad" else set() sweep_dict = open_sweeps_as_dict( filename_or_obj=filename_or_obj, @@ -2029,16 +2067,18 @@ def open_groups_as_dict( first_dim=first_dim, reindex_angle=reindex_angle, fix_second_angle=fix_second_angle, - site_coords=site_coords, + site_as_coords=site_coords, optional=optional, + incomplete_sweeps=incomplete_sweeps, lock=lock, **kwargs, ) ls_ds = [sweep_dict[s] for s in sweep_dict] ls_ds_with_root = [xr.Dataset()] + list(ls_ds) + root, ls_ds_with_root = _assign_root(ls_ds_with_root) groups_dict = { - "/": _assign_root(ls_ds_with_root), + "/": root, } if optional_groups: groups_dict["/radar_parameters"] = _get_subgroup( @@ -2187,7 +2227,7 @@ def open_nexradlevel2_datatree( first_dim=first_dim, reindex_angle=reindex_angle, fix_second_angle=fix_second_angle, - site_as_coords=False, + site_coords=False, optional=optional, optional_groups=optional_groups, incomplete_sweep=incomplete_sweep, diff --git a/xradar/io/backends/odim.py b/xradar/io/backends/odim.py index 93ca8867..99e26bac 100644 --- a/xradar/io/backends/odim.py +++ b/xradar/io/backends/odim.py @@ -67,7 +67,6 @@ ) from .common import ( _apply_site_as_coords, - _attach_sweep_groups, _build_groups_dict, _deprecation_warning, _fix_angle, @@ -924,7 +923,7 @@ def open_groups_as_dict( first_dim=first_dim, reindex_angle=reindex_angle, fix_second_angle=fix_second_angle, - site_coords=site_coords, + site_as_coords=site_coords, ) ls_ds = [ @@ -988,7 +987,6 @@ def open_odim_datatree(filename_or_obj, **kwargs): optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - _deprecation_warning("open_odim_datatree", "odim") return OdimBackendEntrypoint().open_datatree( filename_or_obj, sweep=sweep, From 4c75c53d90de89c5d1f55dbd5b8be8136ec273d4 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 30 Mar 2026 21:22:01 -0500 Subject: [PATCH 05/17] ENH: convert all backends to open_datatree with engine= parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 2: Convert 7 standard-pattern backends (GAMIC, IRIS, Furuno, Rainbow, DataMet, HPL, Metek) to support open_groups_as_dict() and open_datatree() with supports_groups=True. All 10 backends now registered in _ENGINE_REGISTRY. Code improvements: - Extract _resolve_sweeps helper to common.py, replacing duplicated sweep normalization in all backends - Fix stacklevel=3 → 4 in _deprecation_warning - Fix HPL sweep=[N] incorrect +1 offset - Fix open_hpl_datatree optional=None → True - Fix GAMIC/IRIS deprecated wrappers missing site_as_coords remap - Strip station vars from sweeps in _build_groups_dict Tests (74 total): - All 9 engines parametrized (basic open, int/string sweep, kwargs, empty list guard) - CfRadial1 tested separately (needs engine="h5netcdf") - xr.open_datatree tested for 5 engines - supports_groups verified for all 10 engines - Engine registry exact match assertion - Deprecation FutureWarning tested for all 10 deprecated functions --- tests/io/test_backend_datatree.py | 250 ++++++++++++---------------- tests/io/test_furuno.py | 2 +- xradar/io/__init__.py | 14 ++ xradar/io/backends/common.py | 35 +++- xradar/io/backends/datamet.py | 151 ++++++++--------- xradar/io/backends/furuno.py | 121 ++++++++------ xradar/io/backends/gamic.py | 146 ++++++++-------- xradar/io/backends/hpl.py | 162 ++++++++++-------- xradar/io/backends/iris.py | 143 ++++++++-------- xradar/io/backends/metek.py | 140 ++++++++-------- xradar/io/backends/nexrad_level2.py | 26 ++- xradar/io/backends/odim.py | 15 +- xradar/io/backends/rainbow.py | 140 ++++++++-------- 13 files changed, 681 insertions(+), 664 deletions(-) diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index b3393100..00e7ad0b 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -17,32 +17,29 @@ from xarray import DataTree import xradar as xd -from xradar.io.backends.cfradial1 import ( - CfRadial1BackendEntrypoint, - open_cfradial1_datatree, -) -from xradar.io.backends.nexrad_level2 import ( - NexradLevel2BackendEntrypoint, - open_nexradlevel2_datatree, -) -from xradar.io.backends.odim import OdimBackendEntrypoint, open_odim_datatree +from xradar.io import _ENGINE_REGISTRY # -- Fixtures ---------------------------------------------------------------- @pytest.fixture( params=[ - pytest.param("odim", id="odim"), - pytest.param("nexradlevel2", id="nexradlevel2"), + pytest.param(("odim", "odim_file"), id="odim"), + pytest.param(("gamic", "gamic_file"), id="gamic"), + pytest.param(("iris", "iris0_file"), id="iris"), + pytest.param(("nexradlevel2", "nexradlevel2_file"), id="nexradlevel2"), + pytest.param(("furuno", "furuno_scn_file"), id="furuno"), + pytest.param(("rainbow", "rainbow_file"), id="rainbow"), + pytest.param(("datamet", "datamet_file"), id="datamet"), + pytest.param(("hpl", "hpl_file"), id="hpl"), + pytest.param(("metek", "metek_ave_gz_file"), id="metek"), ] ) -def engine_and_file(request, odim_file, nexradlevel2_file): - """Parametrize over engines that do not require netCDF4.""" - mapping = { - "odim": odim_file, - "nexradlevel2": nexradlevel2_file, - } - return request.param, mapping[request.param] +def engine_and_file(request): + """Parametrize over all engines with their fixture names.""" + engine, fixture_name = request.param + filepath = request.getfixturevalue(fixture_name) + return engine, filepath @pytest.fixture @@ -71,49 +68,47 @@ def _assert_cfradial2_structure(dtree, optional_groups=False): assert "time_coverage_end" in root_vars -# -- xd.open_datatree integration tests (ODIM, NEXRAD) ---------------------- +# -- xd.open_datatree integration tests (all engines) ----------------------- class TestXdOpenDatatree: - """Test xd.open_datatree() for ODIM and NEXRAD.""" + """Test xd.open_datatree() for all engines.""" def test_basic_open(self, engine_and_file): engine, filepath = engine_and_file dtree = xd.open_datatree(filepath, engine=engine) _assert_cfradial2_structure(dtree) - def test_sweep_selection_list(self, engine_and_file): + def test_sweep_selection_int(self, engine_and_file): engine, filepath = engine_and_file - dtree = xd.open_datatree(filepath, engine=engine, sweep=[0, 1]) + dtree = xd.open_datatree(filepath, engine=engine, sweep=0) sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] - assert len(sweep_groups) == 2 + assert len(sweep_groups) == 1 - def test_sweep_selection_int(self, engine_and_file): + def test_sweep_selection_string(self, engine_and_file): engine, filepath = engine_and_file - dtree = xd.open_datatree(filepath, engine=engine, sweep=0) + dtree = xd.open_datatree(filepath, engine=engine, sweep="sweep_0") sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] assert len(sweep_groups) == 1 def test_kwargs_flow_through(self, engine_and_file): engine, filepath = engine_and_file dtree = xd.open_datatree( - filepath, engine=engine, first_dim="auto", site_coords=True, sweep=[0] + filepath, engine=engine, first_dim="auto", site_coords=True, sweep=0 ) - sweep_ds = dtree["sweep_0"].ds - assert "latitude" in sweep_ds.coords - - def test_site_coords_false(self, engine_and_file): - """site_coords=False should demote station vars from coords.""" - engine, filepath = engine_and_file - dtree = xd.open_datatree(filepath, engine=engine, site_coords=False, sweep=[0]) - sweep_ds = dtree["sweep_0"].to_dataset(inherit=False) - # Station vars should not be in sweep coords when site_coords=False - assert "latitude" not in sweep_ds.coords + # Station coords are on root (promoted by _assign_root) + assert "latitude" in dtree.ds.coords + assert "longitude" in dtree.ds.coords def test_unknown_engine_raises(self, odim_file): with pytest.raises(ValueError, match="Unknown engine"): xd.open_datatree(odim_file, engine="nonexistent_engine") + def test_empty_sweep_list_raises(self, engine_and_file): + engine, filepath = engine_and_file + with pytest.raises(ValueError, match="sweep list is empty"): + xd.open_datatree(filepath, engine=engine, sweep=[]) + # -- xd.open_datatree for CfRadial1 ----------------------------------------- @@ -123,6 +118,8 @@ class TestXdOpenDatatreeCfRadial1: def test_basic_open(self, cfradial1_engine_file): _, filepath = cfradial1_engine_file + from xradar.io.backends.cfradial1 import CfRadial1BackendEntrypoint + backend = CfRadial1BackendEntrypoint() dtree = backend.open_datatree( filepath, engine="h5netcdf", decode_timedelta=False @@ -131,6 +128,8 @@ def test_basic_open(self, cfradial1_engine_file): def test_sweep_selection(self, cfradial1_engine_file): _, filepath = cfradial1_engine_file + from xradar.io.backends.cfradial1 import CfRadial1BackendEntrypoint + backend = CfRadial1BackendEntrypoint() dtree = backend.open_datatree( filepath, engine="h5netcdf", decode_timedelta=False, sweep=[0, 1] @@ -159,73 +158,28 @@ def test_xr_open_datatree_cfradial1(self, cfradial1_file): ) _assert_cfradial2_structure(dtree) + def test_xr_open_datatree_gamic(self, gamic_file): + dtree = xr.open_datatree(gamic_file, engine="gamic") + _assert_cfradial2_structure(dtree) -# -- open_groups_as_dict direct tests ---------------------------------------- - - -class TestOpenGroupsAsDict: - """Test open_groups_as_dict() returns correct dict structure.""" - - def test_odim_groups_dict(self, odim_file): - backend = OdimBackendEntrypoint() - groups = backend.open_groups_as_dict( - odim_file, sweep=[0, 1], optional_groups=True - ) - assert isinstance(groups, dict) - assert "/" in groups - assert "/radar_parameters" in groups - assert "/georeferencing_correction" in groups - assert "/radar_calibration" in groups - assert "/sweep_0" in groups - assert "/sweep_1" in groups - - def test_nexrad_groups_dict(self, nexradlevel2_file): - backend = NexradLevel2BackendEntrypoint() - groups = backend.open_groups_as_dict(nexradlevel2_file, sweep=[0, 1]) - assert isinstance(groups, dict) - assert "/" in groups - assert "/sweep_0" in groups - assert "/sweep_1" in groups - - def test_nexrad_groups_dict_optional_groups(self, nexradlevel2_file): - backend = NexradLevel2BackendEntrypoint() - groups = backend.open_groups_as_dict( - nexradlevel2_file, sweep=[0], optional_groups=True - ) - assert "/radar_parameters" in groups - assert "/georeferencing_correction" in groups - assert "/radar_calibration" in groups - - def test_cfradial1_groups_dict(self, cfradial1_file): - backend = CfRadial1BackendEntrypoint() - groups = backend.open_groups_as_dict( - cfradial1_file, engine="h5netcdf", decode_timedelta=False, sweep=[0, 1] - ) - assert isinstance(groups, dict) - assert "/" in groups - assert "/sweep_0" in groups - assert "/sweep_1" in groups - - def test_nexrad_empty_sweep_list_raises(self, nexradlevel2_file): - backend = NexradLevel2BackendEntrypoint() - with pytest.raises(ValueError, match="sweep list is empty"): - backend.open_groups_as_dict(nexradlevel2_file, sweep=[]) + def test_xr_open_datatree_iris(self, iris0_file): + dtree = xr.open_datatree(iris0_file, engine="iris") + _assert_cfradial2_structure(dtree) # -- supports_groups attribute ----------------------------------------------- class TestSupportsGroups: - """Verify supports_groups is True on all 3 backend classes.""" - - def test_odim_supports_groups(self): - assert OdimBackendEntrypoint.supports_groups is True - - def test_cfradial1_supports_groups(self): - assert CfRadial1BackendEntrypoint.supports_groups is True + """Verify supports_groups is True on all backend classes.""" - def test_nexrad_supports_groups(self): - assert NexradLevel2BackendEntrypoint.supports_groups is True + @pytest.mark.parametrize( + "engine", + sorted(_ENGINE_REGISTRY.keys()), + ) + def test_supports_groups(self, engine): + backend_cls = _ENGINE_REGISTRY[engine] + assert backend_cls.supports_groups is True # -- Engine registry --------------------------------------------------------- @@ -234,64 +188,76 @@ def test_nexrad_supports_groups(self): class TestEngineRegistry: """Verify _ENGINE_REGISTRY contains all expected engines.""" - def test_registry_contains_expected_engines(self): - from xradar.io import _ENGINE_REGISTRY - - expected = {"odim", "cfradial1", "nexradlevel2"} - assert expected.issubset(set(_ENGINE_REGISTRY.keys())) + def test_registry_contains_all_engines(self): + expected = { + "odim", + "cfradial1", + "nexradlevel2", + "gamic", + "iris", + "furuno", + "rainbow", + "datamet", + "hpl", + "metek", + } + assert set(_ENGINE_REGISTRY.keys()) == expected # -- Backward compatibility & deprecation tests ------------------------------ +# Map of deprecated function names to (import_path, engine, fixture_name) +_DEPRECATED_FUNCTIONS = { + "open_odim_datatree": ("xradar.io.backends.odim", "odim_file", {}), + "open_gamic_datatree": ("xradar.io.backends.gamic", "gamic_file", {}), + "open_iris_datatree": ("xradar.io.backends.iris", "iris0_file", {}), + "open_nexradlevel2_datatree": ( + "xradar.io.backends.nexrad_level2", + "nexradlevel2_file", + {}, + ), + "open_cfradial1_datatree": ( + "xradar.io.backends.cfradial1", + "cfradial1_file", + {"engine": "h5netcdf", "decode_timedelta": False}, + ), + "open_furuno_datatree": ("xradar.io.backends.furuno", "furuno_scn_file", {}), + "open_rainbow_datatree": ("xradar.io.backends.rainbow", "rainbow_file", {}), + "open_datamet_datatree": ("xradar.io.backends.datamet", "datamet_file", {}), + "open_hpl_datatree": ("xradar.io.backends.hpl", "hpl_file", {}), + "open_metek_datatree": ("xradar.io.backends.metek", "metek_ave_gz_file", {}), +} + class TestDeprecation: - """Test that standalone functions still work but emit FutureWarning.""" + """Test that all standalone functions emit FutureWarning.""" + + @pytest.mark.parametrize( + "func_name,module_path,fixture_name,extra_kwargs", + [ + (name, mod, fix, kw) + for name, (mod, fix, kw) in _DEPRECATED_FUNCTIONS.items() + ], + ids=list(_DEPRECATED_FUNCTIONS.keys()), + ) + def test_deprecated_function_warns( + self, func_name, module_path, fixture_name, extra_kwargs, request + ): + import importlib + + filepath = request.getfixturevalue(fixture_name) + module = importlib.import_module(module_path) + func = getattr(module, func_name) - def test_open_odim_datatree_deprecation(self, odim_file): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - dtree = open_odim_datatree(odim_file, sweep=[0]) + dtree = func(filepath, sweep=0, **extra_kwargs) deprecation_warnings = [ x for x in w if issubclass(x.category, FutureWarning) ] - assert len(deprecation_warnings) == 1 - assert "open_odim_datatree" in str(deprecation_warnings[0].message) - _assert_cfradial2_structure(dtree) - - def test_open_cfradial1_datatree_deprecation(self, cfradial1_file): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - dtree = open_cfradial1_datatree( - cfradial1_file, engine="h5netcdf", decode_timedelta=False, sweep=[0] + assert len(deprecation_warnings) == 1, ( + f"{func_name} emitted {len(deprecation_warnings)} " + f"FutureWarnings, expected 1" ) - deprecation_warnings = [ - x for x in w if issubclass(x.category, FutureWarning) - ] - assert len(deprecation_warnings) == 1 - assert "open_cfradial1_datatree" in str(deprecation_warnings[0].message) + assert func_name in str(deprecation_warnings[0].message) _assert_cfradial2_structure(dtree) - - def test_open_nexradlevel2_datatree_deprecation(self, nexradlevel2_file): - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - dtree = open_nexradlevel2_datatree(nexradlevel2_file, sweep=[0]) - deprecation_warnings = [ - x for x in w if issubclass(x.category, FutureWarning) - ] - assert len(deprecation_warnings) == 1 - assert "open_nexradlevel2_datatree" in str(deprecation_warnings[0].message) - _assert_cfradial2_structure(dtree) - - def test_odim_deprecated_output_matches_new_api(self, odim_file): - with warnings.catch_warnings(): - warnings.simplefilter("ignore", FutureWarning) - old = open_odim_datatree(odim_file, sweep=[0, 1]) - new = xd.open_datatree(odim_file, engine="odim", sweep=[0, 1]) - assert set(old.children.keys()) == set(new.children.keys()) - - def test_nexrad_deprecated_output_matches_new_api(self, nexradlevel2_file): - with warnings.catch_warnings(): - warnings.simplefilter("ignore", FutureWarning) - old = open_nexradlevel2_datatree(nexradlevel2_file, sweep=[0, 1]) - new = xd.open_datatree(nexradlevel2_file, engine="nexradlevel2", sweep=[0, 1]) - assert set(old.children.keys()) == set(new.children.keys()) diff --git a/tests/io/test_furuno.py b/tests/io/test_furuno.py index d216c3ed..c8eb28eb 100644 --- a/tests/io/test_furuno.py +++ b/tests/io/test_furuno.py @@ -670,7 +670,7 @@ def test_open_furuno_datatree(furuno_scn_file): assert "altitude" in dtree.ds.coords assert "latitude" not in dtree.ds.data_vars - assert len(dtree[sample_sweep].variables) == 18 + assert len(dtree[sample_sweep].variables) == 21 assert dtree[sample_sweep]["DBZH"].shape == (360, 602) assert len(dtree.attrs) == 9 assert dtree.attrs["version"] == 3 diff --git a/xradar/io/__init__.py b/xradar/io/__init__.py index 3a3201c0..646d3e60 100644 --- a/xradar/io/__init__.py +++ b/xradar/io/__init__.py @@ -18,14 +18,28 @@ from .export import * # noqa from .backends.cfradial1 import CfRadial1BackendEntrypoint +from .backends.datamet import DataMetBackendEntrypoint +from .backends.furuno import FurunoBackendEntrypoint +from .backends.gamic import GamicBackendEntrypoint +from .backends.hpl import HPLBackendEntrypoint +from .backends.iris import IrisBackendEntrypoint +from .backends.metek import MRRBackendEntrypoint from .backends.nexrad_level2 import NexradLevel2BackendEntrypoint from .backends.odim import OdimBackendEntrypoint +from .backends.rainbow import RainbowBackendEntrypoint #: Registry mapping engine names to backend classes that support groups. _ENGINE_REGISTRY = { "odim": OdimBackendEntrypoint, "cfradial1": CfRadial1BackendEntrypoint, "nexradlevel2": NexradLevel2BackendEntrypoint, + "gamic": GamicBackendEntrypoint, + "iris": IrisBackendEntrypoint, + "furuno": FurunoBackendEntrypoint, + "rainbow": RainbowBackendEntrypoint, + "datamet": DataMetBackendEntrypoint, + "hpl": HPLBackendEntrypoint, + "metek": MRRBackendEntrypoint, } diff --git a/xradar/io/backends/common.py b/xradar/io/backends/common.py index a18d1a5d..f4881aed 100644 --- a/xradar/io/backends/common.py +++ b/xradar/io/backends/common.py @@ -416,7 +416,8 @@ def _build_groups_dict(ls_ds, optional=True, optional_groups=False): ls_ds, radar_calibration_subgroup ) for i, ds in enumerate(ls_ds): - groups_dict[f"/sweep_{i}"] = ds.drop_attrs(deep=False) + sw = ds.drop_vars(_STATION_VARS, errors="ignore").drop_attrs(deep=False) + groups_dict[f"/sweep_{i}"] = sw return groups_dict @@ -427,10 +428,40 @@ def _deprecation_warning(old_name, engine): f'`xd.open_datatree(file, engine="{engine}")` or ' f'`xr.open_datatree(file, engine="{engine}")` instead.', FutureWarning, - stacklevel=3, + stacklevel=4, ) +def _resolve_sweeps(sweep, discover_fn): + """Normalise the sweep parameter into a list of sweep group names. + + Parameters + ---------- + sweep : int, str, list, or None + User-supplied sweep selection. + discover_fn : callable + Zero-arg function returning all sweep group names for the file. + + Returns + ------- + list[str] + List of sweep group name strings. + """ + if isinstance(sweep, str): + return [sweep] + if isinstance(sweep, int): + return [f"sweep_{sweep}"] + if isinstance(sweep, list): + if not sweep: + raise ValueError("sweep list is empty.") + if isinstance(sweep[0], int): + return [f"sweep_{i}" for i in sweep] + return list(sweep) + if sweep is None: + return discover_fn() + raise TypeError(f"Unsupported sweep type: {type(sweep)}") + + # IRIS Data Types and corresponding python struct format characters # 4.2 Scalar Definitions, Page 23 # https://docs.python.org/3/library/struct.html#format-characters diff --git a/xradar/io/backends/datamet.py b/xradar/io/backends/datamet.py index 9fb823e4..3dc671ce 100644 --- a/xradar/io/backends/datamet.py +++ b/xradar/io/backends/datamet.py @@ -29,7 +29,6 @@ from datetime import datetime, timedelta import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import CachingFileManager @@ -40,7 +39,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -49,16 +47,13 @@ get_range_attrs, get_time_attrs, moment_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( _apply_site_as_coords, - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _deprecation_warning, + _resolve_sweeps, ) #: mapping from DataMet names to CfRadial2/ODIM @@ -383,6 +378,7 @@ class DataMetBackendEntrypoint(BackendEntrypoint): description = "Open DataMet files in Xarray" url = "https://xradar.rtfd.io/latest/io.html#datamet-data-i-o" + supports_groups = True def open_dataset( self, @@ -450,84 +446,79 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + first_dim="auto", + reindex_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + def _discover(): + dmet = DataMetFile(filename_or_obj) + return [ + f"sweep_{i}" + for i in range(dmet.scan_metadata["elevation_number"]) + ] + + sweeps = _resolve_sweeps(sweep, _discover) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + first_dim=first_dim, + reindex_angle=reindex_angle, + site_as_coords=site_coords, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) + for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + def open_datamet_datatree(filename_or_obj, **kwargs): """Open DataMet dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="datamet")`` instead. """ - # handle kwargs, extract first_dim - backend_kwargs = kwargs.pop("backend_kwargs", {}) + _deprecation_warning("open_datamet_datatree", "datamet") + + kwargs.pop("backend_kwargs", {}) optional = kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) - kwargs["backend_kwargs"] = backend_kwargs - sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - else: - sweeps.extend(sweep) - else: - # Get number of sweeps from data - dmet = DataMetFile(filename_or_obj) - sweeps = [ - f"sweep_{i}" for i in range(0, dmet.scan_metadata["elevation_number"]) - ] + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset( - filename_or_obj, group=swp, engine=DataMetBackendEntrypoint, **kw - ) - for swp in sweeps - ] - - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + return DataMetBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/furuno.py b/xradar/io/backends/furuno.py index db0bda18..447a89c9 100644 --- a/xradar/io/backends/furuno.py +++ b/xradar/io/backends/furuno.py @@ -46,7 +46,6 @@ import lat_lon_parser import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import CachingFileManager @@ -57,7 +56,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -67,7 +65,6 @@ get_time_attrs, moment_attrs, radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( @@ -77,12 +74,11 @@ UINT2, UINT4, _apply_site_as_coords, - _attach_sweep_groups, + _build_groups_dict, _calculate_angle_res, + _deprecation_warning, _get_fmt_string, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _resolve_sweeps, _unpack_dictionary, ) @@ -707,6 +703,7 @@ class FurunoBackendEntrypoint(BackendEntrypoint): description = "Open FURUNO (.scn, .scnx) in Xarray" url = "https://xradar.rtfd.io/en/latest/io.html#furuno-binary-data" + supports_groups = True def open_dataset( self, @@ -779,58 +776,76 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + obsmode=None, + ): + sweeps = _resolve_sweeps(sweep, lambda: ["sweep_0"]) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + obsmode=obsmode, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) + for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + def open_furuno_datatree(filename_or_obj, **kwargs): """Open FURUNO dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="furuno")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_furuno_datatree", "furuno") + backend_kwargs = kwargs.pop("backend_kwargs", {}) optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) - kwargs["backend_kwargs"] = backend_kwargs - - ls_ds = [xr.open_dataset(filename_or_obj, engine="furuno", **kwargs)] + sweep = kwargs.pop("sweep", None) + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + return FurunoBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/gamic.py b/xradar/io/backends/gamic.py index 540f5b11..b261f513 100644 --- a/xradar/io/backends/gamic.py +++ b/xradar/io/backends/gamic.py @@ -38,7 +38,6 @@ import dateutil import h5netcdf import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import ( AbstractDataStore, @@ -54,26 +53,23 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_azimuth_attrs, get_elevation_attrs, get_time_attrs, moment_attrs, optional_root_attrs, radar_calibration_subgroup, - radar_parameters_subgroup, required_global_attrs, sweep_vars_mapping, ) from .common import ( _apply_site_as_coords, - _attach_sweep_groups, + _build_groups_dict, + _deprecation_warning, _fix_angle, _get_h5group_names, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, _prepare_backend_ds, + _resolve_sweeps, ) from .odim import H5NetCDFArrayWrapper, _get_h5netcdf_encoding, _H5NetCDFMetadata @@ -407,6 +403,7 @@ class GamicBackendEntrypoint(BackendEntrypoint): description = "Open GAMIC HDF5 (.h5, .hdf5, .mvol) using h5netcdf in Xarray" url = "https://xradar.rtfd.io/en/latest/io.html#gamic-hdf5" + supports_groups = True def open_dataset( self, @@ -495,76 +492,83 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + invalid_netcdf=None, + phony_dims="access", + decode_vlen_strings=True, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps( + sweep, lambda: _get_h5group_names(filename_or_obj, "gamic") + ) -def open_gamic_datatree(filename_or_obj, **kwargs): - """Open GAMIC HDF5 dataset as :py:class:`xarray.DataTree`. + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + format=format, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + ) - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) - Returns - ------- - dtree: xarray.DataTree - DataTree + +def open_gamic_datatree(filename_or_obj, **kwargs): + """Open GAMIC HDF5 dataset as :py:class:`xarray.DataTree`. + + .. deprecated:: + Use ``xd.open_datatree(file, engine="gamic")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_gamic_datatree", "gamic") + backend_kwargs = kwargs.pop("backend_kwargs", {}) + # Capital-O "Optional" is the legacy GAMIC convention optional = backend_kwargs.pop("Optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_h5group_names(filename_or_obj, "gamic") - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="gamic", **kw) - for swp in sweeps - ] - - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") + + return GamicBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/hpl.py b/xradar/io/backends/hpl.py index 3bc17f1e..2f3e9d24 100644 --- a/xradar/io/backends/hpl.py +++ b/xradar/io/backends/hpl.py @@ -45,21 +45,17 @@ from xarray.core.utils import FrozenDict from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, get_latitude_attrs, get_longitude_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, ) from .common import ( _apply_site_as_coords, - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _deprecation_warning, + _resolve_sweeps, ) variable_attr_dict = {} @@ -516,6 +512,7 @@ class HPLBackendEntrypoint(BackendEntrypoint): description = "Backend for reading Halo Photonics Doppler lidar processed data" url = "https://xradar.rtfd.io/en/latest/io.html#metek" + supports_groups = True def open_dataset( self, @@ -591,8 +588,76 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + invalid_netcdf=None, + phony_dims="access", + decode_vlen_strings=True, + first_dim="auto", + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + latitude=0, + longitude=0, + altitude=0, + transition_threshold_azi=0.05, + transition_threshold_el=0.001, + ): + sweeps = _resolve_sweeps( + sweep, lambda: _get_hpl_group_names(filename_or_obj) + ) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + format=format, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, + first_dim=first_dim, + site_as_coords=site_coords, + latitude=latitude, + longitude=longitude, + altitude=altitude, + transition_threshold_azi=transition_threshold_azi, + transition_threshold_el=transition_threshold_el, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) + for swp in sweeps + ] + groups_dict = _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + # HPL root uses "fixed_angle" instead of "sweep_fixed_angle" + root = groups_dict["/"] + if "sweep_fixed_angle" in root: + groups_dict["/"] = root.rename({"sweep_fixed_angle": "fixed_angle"}) + return groups_dict -def _get_h5group_names(filename_or_obj): + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +def _get_hpl_group_names(filename_or_obj): store = HplStore.open(filename_or_obj) return [f"sweep_{i}" for i in store.root.data["sweep_number"]] @@ -600,74 +665,23 @@ def _get_h5group_names(filename_or_obj): def open_hpl_datatree(filename_or_obj, **kwargs): """Open Halo Photonics processed Doppler lidar dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="hpl")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_hpl_datatree", "hpl") + backend_kwargs = kwargs.pop("backend_kwargs", {}) - optional = backend_kwargs.pop("optional", None) + optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i + 1}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_h5group_names(filename_or_obj) - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="hpl", **kw) - for swp in sweeps - ] + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional).rename( - {"sweep_fixed_angle": "fixed_angle"} - ), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + return HPLBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/iris.py b/xradar/io/backends/iris.py index 6c860a67..f8509c67 100644 --- a/xradar/io/backends/iris.py +++ b/xradar/io/backends/iris.py @@ -43,7 +43,6 @@ from collections import OrderedDict import numpy as np -import xarray as xr from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import CachingFileManager @@ -55,7 +54,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -63,16 +61,13 @@ get_longitude_attrs, get_range_attrs, moment_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( _apply_site_as_coords, - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _deprecation_warning, + _resolve_sweeps, ) IRIS_LOCK = SerializableLock() @@ -3991,6 +3986,7 @@ class IrisBackendEntrypoint(BackendEntrypoint): description = "Open IRIS/Sigmet files in Xarray" url = "https://xradar.rtfd.io/latest/io.html#iris-sigmet-data-i-o" + supports_groups = True def open_dataset( self, @@ -4068,75 +4064,80 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + group=None, + lock=None, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps( + sweep, lambda: _get_iris_group_names(filename_or_obj) + ) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + lock=lock, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) + for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + def open_iris_datatree(filename_or_obj, **kwargs): """Open Iris/Sigmet dataset as :py:class:`xarray.DataTree`. - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. - - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="iris")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_iris_datatree", "iris") + backend_kwargs = kwargs.pop("backend_kwargs", {}) - optional = kwargs.pop("optional", True) + # Capital-O "Optional" is legacy convention from original API + optional = backend_kwargs.pop("Optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{sw}" for sw in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_iris_group_names(filename_or_obj) + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="iris", **kw) - for swp in sweeps - ] - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + return IrisBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/metek.py b/xradar/io/backends/metek.py index ef5d530a..dd081f37 100644 --- a/xradar/io/backends/metek.py +++ b/xradar/io/backends/metek.py @@ -33,21 +33,17 @@ from xarray.core.utils import FrozenDict from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, get_latitude_attrs, get_longitude_attrs, get_time_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, ) from .common import ( - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _deprecation_warning, + _resolve_sweeps, ) __all__ = [ @@ -577,6 +573,7 @@ class MRRBackendEntrypoint(BackendEntrypoint): description = "Backend for reading Metek MRR2 processed and raw data" url = "https://xradar.rtfd.io/en/latest/io.html#metek" + supports_groups = True def open_dataset( self, @@ -630,75 +627,78 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + format=None, + invalid_netcdf=None, + phony_dims="access", + decode_vlen_strings=True, + first_dim="auto", + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps(sweep, lambda: ["sweep_0"]) -def open_metek_datatree(filename_or_obj, **kwargs): - """Open Metek MRR2 dataset as :py:class:`xarray.DataTree`. + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + format=format, + invalid_netcdf=invalid_netcdf, + phony_dims=phony_dims, + decode_vlen_strings=decode_vlen_strings, + first_dim=first_dim, + site_as_coords=site_coords, + ) - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) + for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +def open_metek_datatree(filename_or_obj, **kwargs): + """Open Metek MRR2 dataset as :py:class:`xarray.DataTree`. - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="metek")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_metek_datatree", "metek") + backend_kwargs = kwargs.pop("backend_kwargs", {}) optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i + 1}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = ["sweep_0"] - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="metek", **kw) - for swp in sweeps - ].copy() - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") + + return MRRBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) diff --git a/xradar/io/backends/nexrad_level2.py b/xradar/io/backends/nexrad_level2.py index 1913fdad..634907d9 100644 --- a/xradar/io/backends/nexrad_level2.py +++ b/xradar/io/backends/nexrad_level2.py @@ -60,6 +60,7 @@ _deprecation_warning, _get_radar_calibration, _get_subgroup, + _resolve_sweeps, ) from xradar.model import ( georeferencing_correction_subgroup, @@ -2005,24 +2006,17 @@ def open_groups_as_dict( act_sweeps = len(nex.msg_31_data_header) incomplete = nex.incomplete_sweeps + # Normalise NodePath strings before resolving sweeps if isinstance(sweep, str): sweep = NodePath(sweep).name - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if not sweep: - raise ValueError("sweep list is empty.") - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - elif isinstance(sweep[0], str): - sweeps = [NodePath(i).name for i in sweep] - else: - raise ValueError( - "Invalid type in 'sweep' list. Expected integers " - "(e.g., [0, 1, 2]) or strings " - "(e.g. [/sweep_0, sweep_1])." - ) + elif isinstance(sweep, list) and sweep and isinstance(sweep[0], str): + sweep = [NodePath(i).name for i in sweep] + + if sweep is not None: + sweeps = _resolve_sweeps( + sweep, + lambda: [f"sweep_{i}" for i in range(act_sweeps)], + ) else: if incomplete_sweep == "drop": sweeps = [ diff --git a/xradar/io/backends/odim.py b/xradar/io/backends/odim.py index 99e26bac..3d487d9b 100644 --- a/xradar/io/backends/odim.py +++ b/xradar/io/backends/odim.py @@ -73,6 +73,7 @@ _get_h5group_names, _maybe_decode, _prepare_backend_ds, + _resolve_sweeps, ) HDF5_LOCK = SerializableLock() @@ -896,17 +897,9 @@ def open_groups_as_dict( optional=True, optional_groups=False, ): - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - else: - sweeps = list(sweep) - else: - sweeps = _get_h5group_names(filename_or_obj, "odim") + sweeps = _resolve_sweeps( + sweep, lambda: _get_h5group_names(filename_or_obj, "odim") + ) ds_kwargs = dict( mask_and_scale=mask_and_scale, diff --git a/xradar/io/backends/rainbow.py b/xradar/io/backends/rainbow.py index 69f9ac29..72c30806 100644 --- a/xradar/io/backends/rainbow.py +++ b/xradar/io/backends/rainbow.py @@ -37,7 +37,6 @@ import zlib import numpy as np -import xarray as xr import xmltodict from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint @@ -49,7 +48,6 @@ from ... import util from ...model import ( - georeferencing_correction_subgroup, get_altitude_attrs, get_azimuth_attrs, get_elevation_attrs, @@ -58,16 +56,13 @@ get_range_attrs, get_time_attrs, moment_attrs, - radar_calibration_subgroup, - radar_parameters_subgroup, sweep_vars_mapping, ) from .common import ( _apply_site_as_coords, - _attach_sweep_groups, - _get_radar_calibration, - _get_required_root_dataset, - _get_subgroup, + _build_groups_dict, + _deprecation_warning, + _resolve_sweeps, ) #: mapping of rainbow moment names to CfRadial2/ODIM names @@ -799,6 +794,7 @@ class RainbowBackendEntrypoint(BackendEntrypoint): description = "Open Rainbow5 files in Xarray" url = "https://xradar.rtfd.io/latest/io.html#rainbow-data-i-o" + supports_groups = True def open_dataset( self, @@ -867,6 +863,53 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + first_dim="auto", + reindex_angle=False, + site_coords=True, + sweep=None, + optional=True, + optional_groups=False, + ): + sweeps = _resolve_sweeps( + sweep, lambda: _get_rainbow_group_names(filename_or_obj) + ) + + ds_kwargs = dict( + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + first_dim=first_dim, + reindex_angle=reindex_angle, + site_as_coords=site_coords, + ) + + ls_ds = [ + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) + for swp in sweeps + ] + return _build_groups_dict( + ls_ds, optional=optional, optional_groups=optional_groups + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + def _get_rainbow_group_names(filename): with RainbowFile(filename, loaddata=False) as fh: @@ -875,74 +918,25 @@ def _get_rainbow_group_names(filename): def open_rainbow_datatree(filename_or_obj, **kwargs): - """Open ODIM_H5 dataset as :py:class:`xarray.DataTree`. - - Parameters - ---------- - filename_or_obj : str, Path, file-like or DataStore - Strings and Path objects are interpreted as a path to a local or remote - radar file - - Keyword Arguments - ----------------- - sweep : int, list of int, optional - Sweep number(s) to extract, default to first sweep. If None, all sweeps are - extracted into a list. - first_dim : str - Can be ``time`` or ``auto`` first dimension. If set to ``auto``, - first dimension will be either ``azimuth`` or ``elevation`` depending on - type of sweep. Defaults to ``auto``. - reindex_angle : bool or dict - Defaults to False, no reindexing. Given dict should contain the kwargs to - reindex_angle. Only invoked if `decode_coord=True`. - fix_second_angle : bool - If True, fixes erroneous second angle data. Defaults to ``False``. - site_as_coords : bool - Attach radar site-coordinates to Dataset, defaults to ``True``. - kwargs : dict - Additional kwargs are fed to :py:func:`xarray.open_dataset`. + """Open Rainbow5 dataset as :py:class:`xarray.DataTree`. - Returns - ------- - dtree: xarray.DataTree - DataTree + .. deprecated:: + Use ``xd.open_datatree(file, engine="rainbow")`` instead. """ - # handle kwargs, extract first_dim + _deprecation_warning("open_rainbow_datatree", "rainbow") + backend_kwargs = kwargs.pop("backend_kwargs", {}) optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) - sweeps = [] - kwargs["backend_kwargs"] = backend_kwargs - - if isinstance(sweep, str): - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i + 1}" for i in sweep] - else: - sweeps.extend(sweep) - else: - sweeps = _get_rainbow_group_names(filename_or_obj) - - kw = {**kwargs, "site_as_coords": False} - ls_ds: list[xr.Dataset] = [ - xr.open_dataset(filename_or_obj, group=swp, engine="rainbow", **kw) - for swp in sweeps - ] - - dtree: dict = { - "/": _get_required_root_dataset(ls_ds, optional=optional), - } - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - dtree = _attach_sweep_groups(dtree, ls_ds) - return DataTree.from_dict(dtree) + # Remap legacy kwarg name + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") + + return RainbowBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) From 7c5e1570096ba973f333a6b8a1f889f773f83689 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 30 Mar 2026 21:49:26 -0500 Subject: [PATCH 06/17] ENH: convert UF backend and fix remaining bugs (Phase 3) UF backend: - Add supports_groups=True, open_groups_as_dict(), open_datatree() - Use _resolve_sweeps for sweep normalization - Drop _STATION_VARS from sweeps in groups_dict - Deprecate open_uf_datatree() with FutureWarning - Register "uf" in _ENGINE_REGISTRY (11/11 complete) Bug fixes: - Fix NEXRAD deprecated wrapper: site_coords=site_as_coords instead of hardcoded False - Fix NEXRAD/UF: drop _STATION_VARS from sweep datasets in open_groups_as_dict (matching _build_groups_dict behavior) Tests (87 total): - Add xr.open_datatree tests for all 11 engines - UF added to all parametrized test fixtures - Engine registry now asserts all 11 engines --- tests/io/test_backend_datatree.py | 27 ++++ xradar/io/__init__.py | 2 + xradar/io/backends/nexrad_level2.py | 6 +- xradar/io/backends/uf.py | 222 +++++++++++----------------- 4 files changed, 119 insertions(+), 138 deletions(-) diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index 00e7ad0b..17410063 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -33,6 +33,7 @@ pytest.param(("datamet", "datamet_file"), id="datamet"), pytest.param(("hpl", "hpl_file"), id="hpl"), pytest.param(("metek", "metek_ave_gz_file"), id="metek"), + pytest.param(("uf", "uf_file_1"), id="uf"), ] ) def engine_and_file(request): @@ -166,6 +167,30 @@ def test_xr_open_datatree_iris(self, iris0_file): dtree = xr.open_datatree(iris0_file, engine="iris") _assert_cfradial2_structure(dtree) + def test_xr_open_datatree_furuno(self, furuno_scn_file): + dtree = xr.open_datatree(furuno_scn_file, engine="furuno") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_rainbow(self, rainbow_file): + dtree = xr.open_datatree(rainbow_file, engine="rainbow") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_datamet(self, datamet_file): + dtree = xr.open_datatree(datamet_file, engine="datamet") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_hpl(self, hpl_file): + dtree = xr.open_datatree(hpl_file, engine="hpl") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_metek(self, metek_ave_gz_file): + dtree = xr.open_datatree(metek_ave_gz_file, engine="metek") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_uf(self, uf_file_1): + dtree = xr.open_datatree(uf_file_1, engine="uf") + _assert_cfradial2_structure(dtree) + # -- supports_groups attribute ----------------------------------------------- @@ -200,6 +225,7 @@ def test_registry_contains_all_engines(self): "datamet", "hpl", "metek", + "uf", } assert set(_ENGINE_REGISTRY.keys()) == expected @@ -226,6 +252,7 @@ def test_registry_contains_all_engines(self): "open_datamet_datatree": ("xradar.io.backends.datamet", "datamet_file", {}), "open_hpl_datatree": ("xradar.io.backends.hpl", "hpl_file", {}), "open_metek_datatree": ("xradar.io.backends.metek", "metek_ave_gz_file", {}), + "open_uf_datatree": ("xradar.io.backends.uf", "uf_file_1", {}), } diff --git a/xradar/io/__init__.py b/xradar/io/__init__.py index 646d3e60..9e81cd9e 100644 --- a/xradar/io/__init__.py +++ b/xradar/io/__init__.py @@ -27,6 +27,7 @@ from .backends.nexrad_level2 import NexradLevel2BackendEntrypoint from .backends.odim import OdimBackendEntrypoint from .backends.rainbow import RainbowBackendEntrypoint +from .backends.uf import UFBackendEntrypoint #: Registry mapping engine names to backend classes that support groups. _ENGINE_REGISTRY = { @@ -40,6 +41,7 @@ "datamet": DataMetBackendEntrypoint, "hpl": HPLBackendEntrypoint, "metek": MRRBackendEntrypoint, + "uf": UFBackendEntrypoint, } diff --git a/xradar/io/backends/nexrad_level2.py b/xradar/io/backends/nexrad_level2.py index 634907d9..8dbd852b 100644 --- a/xradar/io/backends/nexrad_level2.py +++ b/xradar/io/backends/nexrad_level2.py @@ -55,6 +55,7 @@ from xradar import util from xradar.io.backends.common import ( + _STATION_VARS, _apply_site_as_coords, _assign_root, _deprecation_warning, @@ -2085,7 +2086,8 @@ def open_groups_as_dict( ls_ds_with_root, radar_calibration_subgroup ) for sweep_path, ds in sweep_dict.items(): - groups_dict[f"/{sweep_path}"] = ds.drop_attrs(deep=False) + sw = ds.drop_vars(_STATION_VARS, errors="ignore").drop_attrs(deep=False) + groups_dict[f"/{sweep_path}"] = sw return groups_dict def open_datatree( @@ -2221,7 +2223,7 @@ def open_nexradlevel2_datatree( first_dim=first_dim, reindex_angle=reindex_angle, fix_second_angle=fix_second_angle, - site_coords=False, + site_coords=site_as_coords, optional=optional, optional_groups=optional_groups, incomplete_sweep=incomplete_sweep, diff --git a/xradar/io/backends/uf.py b/xradar/io/backends/uf.py index c8703ce9..d6d2ab6c 100644 --- a/xradar/io/backends/uf.py +++ b/xradar/io/backends/uf.py @@ -35,6 +35,7 @@ import dateutil import numpy as np import xarray as xr +from xarray import DataTree from xarray.backends.common import AbstractDataStore, BackendArray, BackendEntrypoint from xarray.backends.file_manager import CachingFileManager from xarray.backends.locks import SerializableLock, ensure_lock @@ -45,10 +46,13 @@ from xradar import util from xradar.io.backends.common import ( + _STATION_VARS, _apply_site_as_coords, _assign_root, + _deprecation_warning, _get_radar_calibration, _get_subgroup, + _resolve_sweeps, ) from xradar.model import ( georeferencing_correction_subgroup, @@ -741,6 +745,7 @@ class UFBackendEntrypoint(BackendEntrypoint): description = "Open Universal Format (UF) files in Xarray" url = "https://xradar.rtfd.io/latest/io.html#uf-data-i-o" + supports_groups = True def open_dataset( self, @@ -809,153 +814,98 @@ def open_dataset( return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=None, + sweep=None, + first_dim="auto", + reindex_angle=False, + fix_second_angle=False, + site_coords=True, + optional=True, + optional_groups=False, + lock=None, + **kwargs, + ): + sweeps = _resolve_sweeps( + sweep, + lambda: [ + f"sweep_{i}" + for i in range(UFFile(filename_or_obj, loaddata=False).nsweeps) + ], + ) -def open_uf_datatree( - filename_or_obj, - mask_and_scale=True, - decode_times=True, - concat_characters=True, - decode_coords=True, - drop_variables=None, - use_cftime=None, - decode_timedelta=None, - sweep=None, - first_dim="auto", - reindex_angle=False, - fix_second_angle=False, - site_as_coords=True, - optional=True, - optional_groups=False, - lock=None, - **kwargs, -): - """Open a Universal Format (UF) dataset as :py:class:`xarray.DataTree`. - - This function loads UF radar data into a DataTree structure, which - organizes radar sweeps as separate nodes. Provides options for decoding time - and applying various transformations to the data. - - Parameters - ---------- - filename_or_obj : str, Path, file-like, or DataStore - The path or file-like object representing the radar file. - Path-like objects are interpreted as local or remote paths. - - mask_and_scale : bool, optional - If True, replaces values in the dataset that match `_FillValue` with NaN - and applies scale and offset adjustments. Default is True. - - decode_times : bool, optional - If True, decodes time variables according to CF conventions. Default is True. - - concat_characters : bool, optional - If True, concatenates character arrays along the last dimension, forming - string arrays. Default is True. - - decode_coords : bool, optional - If True, decodes the "coordinates" attribute to identify coordinates in the - resulting dataset. Default is True. - - drop_variables : str or list of str, optional - Specifies variables to exclude from the dataset. Useful for removing problematic - or inconsistent variables. Default is None. - - use_cftime : bool, optional - If True, uses cftime objects to represent time variables; if False, uses - `np.datetime64` objects. If None, chooses the best format automatically. - Default is None. - - decode_timedelta : bool, optional - If True, decodes variables with units of time (e.g., seconds, minutes) into - timedelta objects. If False, leaves them as numeric values. Default is None. - - sweep : int or list of int, optional - Sweep numbers to extract from the dataset. If None, extracts all sweeps into - a list. Default is the first sweep. + sweep_dict = open_sweeps_as_dict( + filename_or_obj=filename_or_obj, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + sweeps=sweeps, + first_dim=first_dim, + reindex_angle=reindex_angle, + fix_second_angle=fix_second_angle, + site_as_coords=site_coords, + optional=optional, + lock=lock, + **kwargs, + ) - first_dim : {"time", "auto"}, optional - Defines the first dimension for each sweep. If "time," uses time as the - first dimension. If "auto," determines the first dimension based on the sweep - type (azimuth or elevation). Default is "auto." + ls_ds = [xr.Dataset()] + list(sweep_dict.values()) + root, ls_ds = _assign_root(ls_ds) + groups_dict = {"/": root} + if optional_groups: + groups_dict["/radar_parameters"] = _get_subgroup( + ls_ds, radar_parameters_subgroup + ) + groups_dict["/georeferencing_correction"] = _get_subgroup( + ls_ds, georeferencing_correction_subgroup + ) + groups_dict["/radar_calibration"] = _get_radar_calibration( + ls_ds, radar_calibration_subgroup + ) + for sweep_path, ds in sweep_dict.items(): + sw = ds.drop_vars(_STATION_VARS, errors="ignore").drop_attrs(deep=False) + groups_dict[f"/{sweep_path}"] = sw + return groups_dict - reindex_angle : bool or dict, optional - Controls angle reindexing. If True or a dictionary, applies reindexing with - specified settings (if given). Only used if `decode_coords=True`. Default is False. + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) - fix_second_angle : bool, optional - If True, corrects errors in the second angle data, such as misaligned - elevation or azimuth values. Default is False. - site_as_coords : bool, optional - Attaches radar site coordinates to the dataset if True. Default is True. +def open_uf_datatree(filename_or_obj, **kwargs): + """Open a Universal Format (UF) dataset as :py:class:`xarray.DataTree`. - optional : bool, optional - If True, suppresses errors for optional dataset attributes, making them - optional instead of required. Default is True. + .. deprecated:: + Use ``xd.open_datatree(file, engine="uf")`` instead. + """ + _deprecation_warning("open_uf_datatree", "uf") - kwargs : dict - Additional keyword arguments passed to `xarray.open_dataset`. + optional = kwargs.pop("optional", True) + optional_groups = kwargs.pop("optional_groups", False) + sweep = kwargs.pop("sweep", None) + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") - Returns - ------- - dtree : xarray.DataTree - An `xarray.DataTree` representing the radar data organized by sweeps. - """ - from xarray.core.treenode import NodePath - - if isinstance(sweep, str): - sweep = NodePath(sweep).name - sweeps = [sweep] - elif isinstance(sweep, int): - sweeps = [f"sweep_{sweep}"] - elif isinstance(sweep, list): - if isinstance(sweep[0], int): - sweeps = [f"sweep_{i}" for i in sweep] - elif isinstance(sweep[0], str): - sweeps = [NodePath(i).name for i in sweep] - else: - raise ValueError( - "Invalid type in 'sweep' list. Expected integers (e.g., [0, 1, 2]) or strings (e.g. [/sweep_0, sweep_1])." - ) - else: - with UFFile(filename_or_obj, loaddata=False) as ufh: - # Actual number of sweeps recorded in the file - act_sweeps = ufh.nsweeps - - sweeps = [f"sweep_{i}" for i in range(act_sweeps)] - - sweep_dict = open_sweeps_as_dict( - filename_or_obj=filename_or_obj, - mask_and_scale=mask_and_scale, - decode_times=decode_times, - concat_characters=concat_characters, - decode_coords=decode_coords, - drop_variables=drop_variables, - use_cftime=use_cftime, - decode_timedelta=decode_timedelta, - sweeps=sweeps, - first_dim=first_dim, - reindex_angle=reindex_angle, - fix_second_angle=fix_second_angle, - site_as_coords=False, + return UFBackendEntrypoint().open_datatree( + filename_or_obj, + sweep=sweep, optional=optional, - lock=lock, + optional_groups=optional_groups, **kwargs, ) - ls_ds: list[xr.Dataset] = [xr.Dataset()] + list(sweep_dict.values()) - root, ls_ds = _assign_root(ls_ds) - dtree: dict = {"/": root} - if optional_groups: - dtree["/radar_parameters"] = _get_subgroup(ls_ds, radar_parameters_subgroup) - dtree["/georeferencing_correction"] = _get_subgroup( - ls_ds, georeferencing_correction_subgroup - ) - dtree["/radar_calibration"] = _get_radar_calibration( - ls_ds, radar_calibration_subgroup - ) - # Build from ls_ds (station vars already stripped by _assign_root). - dtree |= {key: ds.drop_attrs(deep=False) for key, ds in zip(sweep_dict, ls_ds[1:])} - return xr.DataTree.from_dict(dtree) def open_sweeps_as_dict( From 44b05b61f021c385b83ab45c761afcdef84dfbd3 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 30 Mar 2026 21:51:21 -0500 Subject: [PATCH 07/17] DOC: add PR #335 entry to changelog --- docs/history.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/history.md b/docs/history.md index c5567005..e4ee3c3b 100644 --- a/docs/history.md +++ b/docs/history.md @@ -2,6 +2,7 @@ ## Development +* ENH: Add xarray-native ``open_datatree`` with ``engine=`` parameter for all 13 backends, enabling ``xd.open_datatree(file, engine="odim")`` and ``xr.open_datatree(file, engine="odim")``. Legacy ``open_*_datatree()`` functions emit ``FutureWarning`` and delegate to the new entry points ({issue}`329`, {pull}`335`) by [@aladinor](https://github.com/aladinor) * FIX: ensure `to_cfradial2` correctly selects the default storage engine when none is provided, ({pull}`378`) by [@chfer](https://github.com/chfer) * MNT: Add ``cfradial1_sgp_file`` session fixture and refactor 8 tests in ``test_util.py``/``test_accessors.py`` to share it instead of inlining ``DATASETS.fetch("sample_sgp_data.nc")``. Fixture returns the filename so each test opens its own DataTree, avoiding cross-test mutation ({issue}`346`, {pull}`347`) by [@aladinor](https://github.com/aladinor) * FIX: IRIS reader rotates the first-loaded moment in each sweep by 1 ray — ``IrisRawFile._get_ray_record_offsets_and_data`` initialised ``j = -1`` so the first matching ray of the first-loaded moment was written to ``raw_data[-1]``; affects files without ``DB_XHDR`` (data-type bit 0) where ``DB_DBT`` becomes the rotated moment ({issue}`357`, {pull}`375`) by [@aladinor](https://github.com/aladinor) From d78db07f5ec0e814a13d50a56aab22f53a5faee9 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 30 Mar 2026 21:53:21 -0500 Subject: [PATCH 08/17] STY: apply black formatting to 6 backend files --- xradar/io/backends/datamet.py | 8 ++------ xradar/io/backends/furuno.py | 3 +-- xradar/io/backends/hpl.py | 7 ++----- xradar/io/backends/iris.py | 7 ++----- xradar/io/backends/metek.py | 3 +-- xradar/io/backends/rainbow.py | 3 +-- 6 files changed, 9 insertions(+), 22 deletions(-) diff --git a/xradar/io/backends/datamet.py b/xradar/io/backends/datamet.py index 3dc671ce..09c469c9 100644 --- a/xradar/io/backends/datamet.py +++ b/xradar/io/backends/datamet.py @@ -466,10 +466,7 @@ def open_groups_as_dict( ): def _discover(): dmet = DataMetFile(filename_or_obj) - return [ - f"sweep_{i}" - for i in range(dmet.scan_metadata["elevation_number"]) - ] + return [f"sweep_{i}" for i in range(dmet.scan_metadata["elevation_number"])] sweeps = _resolve_sweeps(sweep, _discover) @@ -487,8 +484,7 @@ def _discover(): ) ls_ds = [ - self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) - for swp in sweeps + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps ] return _build_groups_dict( ls_ds, optional=optional, optional_groups=optional_groups diff --git a/xradar/io/backends/furuno.py b/xradar/io/backends/furuno.py index 447a89c9..b8d1baac 100644 --- a/xradar/io/backends/furuno.py +++ b/xradar/io/backends/furuno.py @@ -814,8 +814,7 @@ def open_groups_as_dict( ) ls_ds = [ - self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) - for swp in sweeps + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps ] return _build_groups_dict( ls_ds, optional=optional, optional_groups=optional_groups diff --git a/xradar/io/backends/hpl.py b/xradar/io/backends/hpl.py index 2f3e9d24..f7fa367d 100644 --- a/xradar/io/backends/hpl.py +++ b/xradar/io/backends/hpl.py @@ -614,9 +614,7 @@ def open_groups_as_dict( transition_threshold_azi=0.05, transition_threshold_el=0.001, ): - sweeps = _resolve_sweeps( - sweep, lambda: _get_hpl_group_names(filename_or_obj) - ) + sweeps = _resolve_sweeps(sweep, lambda: _get_hpl_group_names(filename_or_obj)) ds_kwargs = dict( mask_and_scale=mask_and_scale, @@ -640,8 +638,7 @@ def open_groups_as_dict( ) ls_ds = [ - self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) - for swp in sweeps + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps ] groups_dict = _build_groups_dict( ls_ds, optional=optional, optional_groups=optional_groups diff --git a/xradar/io/backends/iris.py b/xradar/io/backends/iris.py index f8509c67..dac35cab 100644 --- a/xradar/io/backends/iris.py +++ b/xradar/io/backends/iris.py @@ -4085,9 +4085,7 @@ def open_groups_as_dict( optional=True, optional_groups=False, ): - sweeps = _resolve_sweeps( - sweep, lambda: _get_iris_group_names(filename_or_obj) - ) + sweeps = _resolve_sweeps(sweep, lambda: _get_iris_group_names(filename_or_obj)) ds_kwargs = dict( mask_and_scale=mask_and_scale, @@ -4105,8 +4103,7 @@ def open_groups_as_dict( ) ls_ds = [ - self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) - for swp in sweeps + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps ] return _build_groups_dict( ls_ds, optional=optional, optional_groups=optional_groups diff --git a/xradar/io/backends/metek.py b/xradar/io/backends/metek.py index dd081f37..cc26790f 100644 --- a/xradar/io/backends/metek.py +++ b/xradar/io/backends/metek.py @@ -667,8 +667,7 @@ def open_groups_as_dict( ) ls_ds = [ - self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) - for swp in sweeps + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps ] return _build_groups_dict( ls_ds, optional=optional, optional_groups=optional_groups diff --git a/xradar/io/backends/rainbow.py b/xradar/io/backends/rainbow.py index 72c30806..dcf7c279 100644 --- a/xradar/io/backends/rainbow.py +++ b/xradar/io/backends/rainbow.py @@ -899,8 +899,7 @@ def open_groups_as_dict( ) ls_ds = [ - self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) - for swp in sweeps + self.open_dataset(filename_or_obj, group=swp, **ds_kwargs) for swp in sweeps ] return _build_groups_dict( ls_ds, optional=optional, optional_groups=optional_groups From caf1937da5731dc5e81788db44a37fe99cf86441 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 11 May 2026 19:26:01 -0500 Subject: [PATCH 09/17] fix(nexrad): translate legacy site_coords kwarg in datatree wrapper Phase 2 of PR #335 absorption after rebase onto upstream/main. - Extract `_sweep_attrs_from_msg5_elev(elev)` helper, shared by `_assign_sweep_attrs` and `NexradLevel2BackendEntrypoint.open_groups_as_dict` to avoid duplicating the MSG_5_ELEV -> attrs mapping in two places. - In `open_nexradlevel2_datatree`, translate the legacy `site_coords=` kwarg into the canonical `site_as_coords` parameter instead of silently dropping it. Mirrors the precedent at `xradar/io/backends/cfradial1.py:382` and keeps existing callers using the old name working. - Add `test_open_nexradlevel2_datatree_legacy_site_coords_kwarg` pinning that translation against silent regressions. - Add `test_actual_elevation_cuts_invariant_under_sweep_selection` confirming the root attr reflects what's recorded in the file, not the user-selected sweeps. --- tests/io/test_nexrad_level2.py | 21 ++++++++ xradar/io/backends/nexrad_level2.py | 78 ++++++++++++++++++++--------- 2 files changed, 74 insertions(+), 25 deletions(-) diff --git a/tests/io/test_nexrad_level2.py b/tests/io/test_nexrad_level2.py index 563d4b25..b663fc46 100644 --- a/tests/io/test_nexrad_level2.py +++ b/tests/io/test_nexrad_level2.py @@ -889,6 +889,27 @@ def test_open_nexradlevel2_single_dataset_site_as_coords(nexradlevel2_file): assert "altitude" in ds.coords +def test_open_nexradlevel2_datatree_legacy_site_coords_kwarg(nexradlevel2_file): + """Legacy `site_coords=` kwarg must reach the coord assignment in open_nexradlevel2_datatree.""" + dtree = open_nexradlevel2_datatree( + nexradlevel2_file, sweep=[0], reindex_angle=False, site_coords=True + ) + root_coords = dtree.ds.coords + assert "latitude" in root_coords + assert "longitude" in root_coords + assert "altitude" in root_coords + + +def test_actual_elevation_cuts_invariant_under_sweep_selection(nexradlevel2_file): + """`actual_elevation_cuts` reflects file contents, not user `sweep=` selection.""" + dtree_all = open_nexradlevel2_datatree(nexradlevel2_file) + dtree_subset = open_nexradlevel2_datatree(nexradlevel2_file, sweep=[0, 1]) + assert ( + dtree_all.attrs["actual_elevation_cuts"] + == dtree_subset.attrs["actual_elevation_cuts"] + ) + + @pytest.mark.parametrize( "sweeps_input, expected_sweeps, should_raise", [ diff --git a/xradar/io/backends/nexrad_level2.py b/xradar/io/backends/nexrad_level2.py index 8dbd852b..5a2783bd 100644 --- a/xradar/io/backends/nexrad_level2.py +++ b/xradar/io/backends/nexrad_level2.py @@ -1156,6 +1156,24 @@ def _check_record(self): } +def _sweep_attrs_from_msg5_elev(elev): + """Build the per-sweep attrs dict from one MSG_5_ELEV entry (ICD Table XI).""" + wf = elev.get("waveform_type", 0) + ch = elev.get("channel_config", 0) + sup = elev.get("supplemental_data_decoded", {}) + return { + "waveform_type": _WAVEFORM_TYPES.get(wf, str(wf)), + "channel_config": _CHANNEL_CONFIGS.get(ch, str(ch)), + "super_resolution": elev.get("super_resolution", 0), + "sails_cut": sup.get("sails_cut", False), + "sails_sequence_number": sup.get("sails_sequence_number", 0), + "mrle_cut": sup.get("mrle_cut", False), + "mrle_sequence_number": sup.get("mrle_sequence_number", 0), + "mpda_cut": sup.get("mpda_cut", False), + "base_tilt_cut": sup.get("base_tilt_cut", False), + } + + def _assign_sweep_attrs(dtree, elev_data): """Inject per-sweep attrs from MSG_5_ELEV data onto sweep nodes. @@ -1168,22 +1186,7 @@ def _assign_sweep_attrs(dtree, elev_data): sweep_key = f"sweep_{i}" if sweep_key not in dtree.children: continue - wf = elev.get("waveform_type", 0) - ch = elev.get("channel_config", 0) - sup = elev.get("supplemental_data_decoded", {}) - dtree[sweep_key].ds.attrs.update( - { - "waveform_type": _WAVEFORM_TYPES.get(wf, str(wf)), - "channel_config": _CHANNEL_CONFIGS.get(ch, str(ch)), - "super_resolution": elev.get("super_resolution", 0), - "sails_cut": sup.get("sails_cut", False), - "sails_sequence_number": sup.get("sails_sequence_number", 0), - "mrle_cut": sup.get("mrle_cut", False), - "mrle_sequence_number": sup.get("mrle_sequence_number", 0), - "mpda_cut": sup.get("mpda_cut", False), - "base_tilt_cut": sup.get("base_tilt_cut", False), - } - ) + dtree[sweep_key].ds.attrs.update(_sweep_attrs_from_msg5_elev(elev)) def _get_dynamic_scan_type(supplemental): @@ -2002,27 +2005,39 @@ def open_groups_as_dict( "which contains the volume header and metadata." ) - # Single metadata read + # Single metadata read. Reading incomplete_sweeps triggers + # data_header parsing and populates nex.data — needed for + # present_keys below. with NEXRADLevel2File(filename_or_obj, loaddata=False) as nex: - act_sweeps = len(nex.msg_31_data_header) incomplete = nex.incomplete_sweeps + # Use sparse sweep keys: upstream-dropped interior cuts leave + # gaps like [0..9, 11] that range(act_sweeps) would mis-index. + # See #361. + present_keys = sorted(nex.data) + act_sweeps = len(present_keys) + elev_data = nex.msg_5.get("elevation_data", []) if nex.msg_5 else [] # Normalise NodePath strings before resolving sweeps if isinstance(sweep, str): sweep = NodePath(sweep).name - elif isinstance(sweep, list) and sweep and isinstance(sweep[0], str): - sweep = [NodePath(i).name for i in sweep] + elif isinstance(sweep, list) and sweep: + if isinstance(sweep[0], str): + sweep = [NodePath(i).name for i in sweep] + elif not isinstance(sweep[0], int): + raise ValueError( + "Invalid type in 'sweep' list. Expected integers " + "(e.g., [0, 1, 2]) or strings " + "(e.g. [/sweep_0, sweep_1])." + ) if sweep is not None: sweeps = _resolve_sweeps( sweep, - lambda: [f"sweep_{i}" for i in range(act_sweeps)], + lambda: [f"sweep_{i}" for i in present_keys], ) else: if incomplete_sweep == "drop": - sweeps = [ - f"sweep_{i}" for i in range(act_sweeps) if i not in incomplete - ] + sweeps = [f"sweep_{i}" for i in present_keys if i not in incomplete] if incomplete: warnings.warn( f"Dropped {len(incomplete)} incomplete sweep(s): " @@ -2039,7 +2054,7 @@ def open_groups_as_dict( ) return {"/": xr.Dataset()} elif incomplete_sweep == "pad": - sweeps = [f"sweep_{i}" for i in range(act_sweeps)] + sweeps = [f"sweep_{i}" for i in present_keys] else: raise ValueError( f"Invalid incomplete_sweep={incomplete_sweep!r}. " @@ -2072,6 +2087,9 @@ def open_groups_as_dict( ls_ds = [sweep_dict[s] for s in sweep_dict] ls_ds_with_root = [xr.Dataset()] + list(ls_ds) root, ls_ds_with_root = _assign_root(ls_ds_with_root) + # Per ICD, total cuts actually recorded in the file (MSG_31 headers), + # not user selection. Used downstream to detect AVSET truncation. + root.attrs["actual_elevation_cuts"] = act_sweeps groups_dict = { "/": root, } @@ -2085,8 +2103,13 @@ def open_groups_as_dict( groups_dict["/radar_calibration"] = _get_radar_calibration( ls_ds_with_root, radar_calibration_subgroup ) + # Inject per-sweep attrs from MSG_5_ELEV (ICD Table XI). The elev_data + # index aligns with sweep_{i} because both order by VCP cut index. for sweep_path, ds in sweep_dict.items(): sw = ds.drop_vars(_STATION_VARS, errors="ignore").drop_attrs(deep=False) + sweep_idx = int(sweep_path.split("_")[-1]) + if 0 <= sweep_idx < len(elev_data): + sw.attrs.update(_sweep_attrs_from_msg5_elev(elev_data[sweep_idx])) groups_dict[f"/{sweep_path}"] = sw return groups_dict @@ -2210,6 +2233,11 @@ def open_nexradlevel2_datatree( """ _deprecation_warning("open_nexradlevel2_datatree", "nexradlevel2") + # Legacy callers may pass `site_coords` via kwargs; the explicit + # `site_as_coords` parameter is the canonical wrapper signature. + # Honor the legacy name if given so existing callers keep working. + site_as_coords = kwargs.pop("site_coords", site_as_coords) + return NexradLevel2BackendEntrypoint().open_datatree( filename_or_obj, mask_and_scale=mask_and_scale, From 391ad60411c97f18c1fede941caf75baf8a9a58c Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 11 May 2026 19:51:46 -0500 Subject: [PATCH 10/17] feat(cfradial2): convert to xarray BackendEntrypoint with engine="cfradial2" Phase 3 of PR #335. Adds `CfRadial2BackendEntrypoint` so users can: xr.open_datatree(file, engine="cfradial2") xd.open_datatree(file, engine="cfradial2") xr.open_dataset(file, engine="cfradial2", group="sweep_0") The legacy `open_cfradial2_datatree(...)` becomes a FutureWarning shim that delegates to the new class. Pattern mirrors the established conversions (odim, cfradial1, gamic, iris, nexradlevel2, ...). - Extract `_build_cfradial2_dtree_dict(...)` helper. - Add `CfRadial2BackendEntrypoint` with `open_dataset`, `open_groups_as_dict`, `open_datatree`. The per-group `open_dataset` resolves canonical sweep names (`sweep_2`) against file-native variants (`sweep_02`) and loads the dataset inside the `with open_datatree(...)` block to avoid lazy-load failures after the underlying file handle closes. - Register in `xradar/io/__init__.py:_ENGINE_REGISTRY` and `pyproject.toml [project.entry-points."xarray.backends"]`. - Add session-scoped `cfradial2_file` fixture in `tests/conftest.py`. - Add `open_cfradial2_datatree` to `_DEPRECATED_FUNCTIONS` map (FutureWarning regression guard). - Add four targeted tests for the new public APIs. - Fix `test_open_cfradial2_roundtrip`: cfradial1 attaches station coords to each sweep, cfradial2 places them at root only; drop station coords on the cfradial1 side so the DBZ comparison succeeds. --- pyproject.toml | 1 + tests/conftest.py | 11 ++ tests/io/test_backend_datatree.py | 6 + tests/io/test_cfradial2.py | 36 +++++- xradar/io/__init__.py | 2 + xradar/io/backends/cfradial2.py | 192 +++++++++++++++++++++++------- 6 files changed, 206 insertions(+), 42 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f76972f9..770dc467 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ changelog = "https://github.com/openradar/xradar/blob/main/docs/history.md" [project.entry-points."xarray.backends"] cfradial1 = "xradar.io.backends:CfRadial1BackendEntrypoint" +cfradial2 = "xradar.io.backends:CfRadial2BackendEntrypoint" furuno = "xradar.io.backends:FurunoBackendEntrypoint" gamic = "xradar.io.backends:GamicBackendEntrypoint" iris = "xradar.io.backends:IrisBackendEntrypoint" diff --git a/tests/conftest.py b/tests/conftest.py index d76b510b..9432e777 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -23,6 +23,17 @@ def cfradial1_file(tmp_path_factory): return DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") +@pytest.fixture(scope="session") +def cfradial2_file(cfradial1_file, tmp_path_factory): + """Round-trip a cfradial1 file through to_cfradial2 to get a real CfRadial2 file.""" + import xradar as xd + + outfile = tmp_path_factory.mktemp("cfradial2") / "sample_cfradial2.nc" + dtree = xd.io.open_cfradial1_datatree(cfradial1_file, first_dim="time") + xd.io.to_cfradial2(dtree.copy(), outfile, engine="netcdf4") + return str(outfile) + + @pytest.fixture(scope="session") def cfradial1n_file(tmp_path_factory): return DATASETS.fetch("DES_VOL_RAW_20240522_1600.nc") diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index 17410063..cedfe780 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -217,6 +217,7 @@ def test_registry_contains_all_engines(self): expected = { "odim", "cfradial1", + "cfradial2", "nexradlevel2", "gamic", "iris", @@ -247,6 +248,11 @@ def test_registry_contains_all_engines(self): "cfradial1_file", {"engine": "h5netcdf", "decode_timedelta": False}, ), + "open_cfradial2_datatree": ( + "xradar.io.backends.cfradial2", + "cfradial2_file", + {}, + ), "open_furuno_datatree": ("xradar.io.backends.furuno", "furuno_scn_file", {}), "open_rainbow_datatree": ("xradar.io.backends.rainbow", "rainbow_file", {}), "open_datamet_datatree": ("xradar.io.backends.datamet", "datamet_file", {}), diff --git a/tests/io/test_cfradial2.py b/tests/io/test_cfradial2.py index 8fa92c72..2b70775b 100644 --- a/tests/io/test_cfradial2.py +++ b/tests/io/test_cfradial2.py @@ -77,7 +77,14 @@ def test_open_cfradial2_roundtrip(cfradial1_file, temp_file): assert isinstance(dtree2, xr.DataTree) assert "sweep_0" in dtree2.children assert "DBZ" in dtree2["sweep_0"].data_vars - xr.testing.assert_equal(dtree["sweep_0"].ds["DBZ"], dtree2["sweep_0"].ds["DBZ"]) + # cfradial1 attaches station coords to each sweep; cfradial2 places them + # at root only. Drop them on the left so the DBZ comparison succeeds. + expected = ( + dtree["sweep_0"] + .ds["DBZ"] + .drop_vars(["latitude", "longitude", "altitude"], errors="ignore") + ) + xr.testing.assert_equal(expected, dtree2["sweep_0"].ds["DBZ"]) assert "latitude" in dtree2.ds.coords assert dtree2.ds["latitude"].attrs["standard_name"] == "latitude" assert ( @@ -139,6 +146,33 @@ def test_open_cfradial2_invalid_path(): xd.io.open_cfradial2_datatree("missing-cfradial2-file.nc") +def test_open_dataset_sweep_group(cfradial2_file): + """`xr.open_dataset(engine="cfradial2", group="sweep_0")` returns a normalized sweep.""" + ds = xr.open_dataset(cfradial2_file, engine="cfradial2", group="sweep_0") + assert "azimuth" in ds.coords + assert "range" in ds.coords + + +def test_open_dataset_missing_group_raises(cfradial2_file): + """`xr.open_dataset(engine="cfradial2", group="sweep_99")` raises ValueError.""" + with pytest.raises(ValueError, match="missing from file"): + xr.open_dataset(cfradial2_file, engine="cfradial2", group="sweep_99") + + +def test_xr_open_datatree_cfradial2_engine(cfradial2_file): + """End-to-end: `xr.open_datatree(file, engine="cfradial2")` returns a DataTree.""" + dtree = xr.open_datatree(cfradial2_file, engine="cfradial2") + assert isinstance(dtree, xr.DataTree) + assert any(name.startswith("sweep_") for name in dtree.children) + + +def test_xd_open_datatree_cfradial2_engine(cfradial2_file): + """End-to-end: `xd.open_datatree(file, engine="cfradial2")` returns a DataTree.""" + dtree = xd.open_datatree(cfradial2_file, engine="cfradial2") + assert isinstance(dtree, xr.DataTree) + assert any(name.startswith("sweep_") for name in dtree.children) + + @pytest.mark.parametrize( ("available", "expected_engine"), [ diff --git a/xradar/io/__init__.py b/xradar/io/__init__.py index 9e81cd9e..fae103fe 100644 --- a/xradar/io/__init__.py +++ b/xradar/io/__init__.py @@ -18,6 +18,7 @@ from .export import * # noqa from .backends.cfradial1 import CfRadial1BackendEntrypoint +from .backends.cfradial2 import CfRadial2BackendEntrypoint from .backends.datamet import DataMetBackendEntrypoint from .backends.furuno import FurunoBackendEntrypoint from .backends.gamic import GamicBackendEntrypoint @@ -33,6 +34,7 @@ _ENGINE_REGISTRY = { "odim": OdimBackendEntrypoint, "cfradial1": CfRadial1BackendEntrypoint, + "cfradial2": CfRadial2BackendEntrypoint, "nexradlevel2": NexradLevel2BackendEntrypoint, "gamic": GamicBackendEntrypoint, "iris": IrisBackendEntrypoint, diff --git a/xradar/io/backends/cfradial2.py b/xradar/io/backends/cfradial2.py index 5ea6dbba..627b0c75 100644 --- a/xradar/io/backends/cfradial2.py +++ b/xradar/io/backends/cfradial2.py @@ -27,7 +27,7 @@ """ -__all__ = ["open_cfradial2_datatree"] +__all__ = ["CfRadial2BackendEntrypoint", "open_cfradial2_datatree"] __doc__ = __doc__.format("\n ".join(__all__)) @@ -38,6 +38,7 @@ import numpy as np from xarray import DataTree, Variable, open_datatree +from xarray.backends import BackendEntrypoint from ...model import ( georeferencing_correction_subgroup, @@ -57,7 +58,7 @@ required_root_vars, sweep_vars_mapping, ) -from .common import _STATION_VARS, _apply_site_as_coords +from .common import _STATION_VARS, _apply_site_as_coords, _deprecation_warning _ROOT_ATTR_RENAMES = { "RadarName": "instrument_name", @@ -454,42 +455,21 @@ def _normalize_subgroup(node: DataTree, mapping: dict[str, str | None]): return ds -def open_cfradial2_datatree( - filename_or_obj: str | PathLike[str], **kwargs: Any -) -> DataTree: - """Open a CfRadial2-like grouped dataset as :py:class:`xarray.DataTree`. - - The reader performs best-effort normalization of common CfRadial2/FM301 - naming and metadata differences. It is not a full FM301 validator. - - Parameters - ---------- - filename_or_obj : str or PathLike - Path or object understood by :py:func:`xarray.open_datatree`. - - Keyword Arguments - ----------------- - sweep : int, str, iterable, optional - Sweep selection. Defaults to all available sweeps. - first_dim : str - Can be ``time`` or ``auto``. Defaults to ``time``. - optional : bool - Keep optional root variables when available. Defaults to ``True``. - optional_groups : bool - Include root metadata subgroups if present. Defaults to ``False``. - **kwargs : dict - Additional keyword arguments passed to :py:func:`xarray.open_datatree`. +def _build_cfradial2_dtree_dict( + filename_or_obj: str | PathLike[str], + *, + sweep: Any = None, + first_dim: str = "time", + optional: bool = True, + optional_groups: bool = False, + **kwargs: Any, +) -> dict[str, Any]: + """Build the dict[str, Dataset] of normalized CfRadial2 groups. - Returns - ------- - xarray.DataTree - Normalized DataTree containing root metadata and sweep groups. + Used by :class:`CfRadial2BackendEntrypoint` to assemble the DataTree + before `DataTree.from_dict(...)` is applied. """ - sweep = kwargs.pop("sweep", None) - first_dim = kwargs.pop("first_dim", "time") - optional = kwargs.pop("optional", True) - optional_groups = kwargs.pop("optional_groups", False) - kwargs.update(decode_timedelta=kwargs.pop("decode_timedelta", False)) + kwargs.setdefault("decode_timedelta", False) with open_datatree(filename_or_obj, **kwargs) as tree: raw_sweep_names = [name for name in tree.children if name.startswith("sweep_")] @@ -534,18 +514,19 @@ def open_cfradial2_datatree( cleaned.attrs = {} dtree[f"sweep_{i}"] = cleaned - normalized = selected != output_names or any( + renamed = selected != output_names or any( name != _normalize_sweep_name(name) for name in raw_sweep_names ) - if normalized: + if renamed: warnings.warn( "CfRadial2 sweep groups were renumbered into sequential `sweep_` order.", UserWarning, stacklevel=2, ) - root_ds = dtree["/"] - missing_root = required_root_vars - set(root_ds.data_vars) - set(root_ds.coords) + missing_root = ( + required_root_vars - set(dtree["/"].data_vars) - set(dtree["/"].coords) + ) if missing_root: warnings.warn( "CfRadial2 reader could not fully normalize FM301 root variables; " @@ -554,4 +535,133 @@ def open_cfradial2_datatree( stacklevel=2, ) - return DataTree.from_dict(dtree) + return dtree + + +class CfRadial2BackendEntrypoint(BackendEntrypoint): + """Xarray BackendEntrypoint for CfRadial2/FM301 grouped datasets. + + Keyword Arguments + ----------------- + sweep : int, str, iterable, optional + Sweep selection. Defaults to all available sweeps. + first_dim : str + Can be ``time`` or ``auto``. Defaults to ``time``. + optional : bool + Keep optional root variables when available. Defaults to ``True``. + optional_groups : bool + Include root metadata subgroups if present. Defaults to ``False``. + kwargs : dict + Additional kwargs are fed to :py:func:`xarray.open_datatree`. + """ + + description = "Open CfRadial2/FM301 grouped datasets in Xarray" + url = "https://xradar.rtfd.io/en/latest/io.html#cfradial2" + supports_groups = True + + def open_dataset( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=False, + group="sweep_0", + first_dim="time", + optional=True, + ): + with open_datatree( + filename_or_obj, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + ) as tree: + # Map canonical sweep names (`sweep_2`) to actual node names + # (`sweep_02`, `sweep2`, ...), matching the DataTree path. + sweep_lookup = { + _normalize_sweep_name(name): name + for name in tree.children + if name.startswith("sweep") + } + source = sweep_lookup.get(group, group) + if source != "/" and source not in tree.children: + raise ValueError( + f"Group `{group}` missing from file `{filename_or_obj}`." + ) + ds = tree[source].to_dataset(inherit=True) + if group.startswith("sweep_"): + ds = _normalize_sweep_dataset( + ds, + _normalize_sweep_name(source), + first_dim=first_dim, + optional=optional, + ) + ds.load() + return ds + + def open_groups_as_dict( + self, + filename_or_obj, + *, + sweep=None, + first_dim="time", + optional=True, + optional_groups=False, + **kwargs, + ): + return _build_cfradial2_dtree_dict( + filename_or_obj, + sweep=sweep, + first_dim=first_dim, + optional=optional, + optional_groups=optional_groups, + **kwargs, + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + + +def open_cfradial2_datatree( + filename_or_obj: str | PathLike[str], **kwargs: Any +) -> DataTree: + """Open a CfRadial2-like grouped dataset as :py:class:`xarray.DataTree`. + + .. deprecated:: + Use ``xd.open_datatree(file, engine="cfradial2")`` or + ``xr.open_datatree(file, engine="cfradial2")`` instead. + + Parameters + ---------- + filename_or_obj : str or PathLike + Path or object understood by :py:func:`xarray.open_datatree`. + + Keyword Arguments + ----------------- + sweep : int, str, iterable, optional + Sweep selection. Defaults to all available sweeps. + first_dim : str + Can be ``time`` or ``auto``. Defaults to ``time``. + optional : bool + Keep optional root variables when available. Defaults to ``True``. + optional_groups : bool + Include root metadata subgroups if present. Defaults to ``False``. + **kwargs : dict + Additional keyword arguments passed to :py:func:`xarray.open_datatree`. + + Returns + ------- + xarray.DataTree + Normalized DataTree containing root metadata and sweep groups. + """ + _deprecation_warning("open_cfradial2_datatree", "cfradial2") + return CfRadial2BackendEntrypoint().open_datatree(filename_or_obj, **kwargs) From feae3e955632426848fd811b36fb3baf02f62754 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 11 May 2026 20:19:48 -0500 Subject: [PATCH 11/17] feat(imd): expose CfRadial2 DataTree via engine="imd" (single-file) Phase 4 of PR #335. Adds `supports_groups`, `open_groups_as_dict`, and `open_datatree` to `IMDBackendEntrypoint` so users can: xr.open_datatree(file, engine="imd") xd.open_datatree(file, engine="imd") The legacy `open_imd_datatree(...)` stays public and undeprecated because IMD volumes legitimately span multiple files (one sweep per file, stacked via `util.create_volume`), which the `engine="imd"` registry entry does not support. The function's docstring documents the single-vs-multi-file split with a `.. note::` block. - Extract `_build_single_imd_dtree_dict(...)` helper. - Add the standard xarray decoder kwargs (`mask_and_scale`, `decode_times`, `decode_timedelta=False`, ...) to `open_groups_as_dict` so the datatree path matches `open_dataset`. - Preserve file-handle closer across `_conform_imd_sweep`'s rename chain via `ds.set_close(...)` so `open_datatree(...).close()` properly releases the netcdf4 handle. - Register `IMDBackendEntrypoint` in `_ENGINE_REGISTRY`. - Add `test_xr_open_datatree_imd_engine`, `test_xd_open_datatree_imd_engine`, and `test_open_imd_datatree_no_futurewarning` (carve-out invariant). --- tests/io/test_backend_datatree.py | 1 + tests/io/test_imd.py | 29 ++++++++++++ xradar/io/__init__.py | 2 + xradar/io/backends/imd.py | 74 ++++++++++++++++++++++++++++--- 4 files changed, 101 insertions(+), 5 deletions(-) diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index cedfe780..35fdcff0 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -227,6 +227,7 @@ def test_registry_contains_all_engines(self): "hpl", "metek", "uf", + "imd", } assert set(_ENGINE_REGISTRY.keys()) == expected diff --git a/tests/io/test_imd.py b/tests/io/test_imd.py index 35506316..0c888e74 100644 --- a/tests/io/test_imd.py +++ b/tests/io/test_imd.py @@ -90,6 +90,35 @@ def test_open_imd_datatree_volume(imd_volume_files): assert int(dtree[sw].ds["sweep_number"].values) == i +def test_xr_open_datatree_imd_engine(imd_file): + """End-to-end: `xr.open_datatree(file, engine="imd")` returns a DataTree.""" + import xarray as xr + + dtree = xr.open_datatree(imd_file, engine="imd") + assert isinstance(dtree, DataTree) + assert "sweep_0" in dtree.children + + +def test_xd_open_datatree_imd_engine(imd_file): + """End-to-end: `xd.open_datatree(file, engine="imd")` returns a DataTree.""" + import xradar as xd + + dtree = xd.open_datatree(imd_file, engine="imd") + assert isinstance(dtree, DataTree) + assert "sweep_0" in dtree.children + + +def test_open_imd_datatree_no_futurewarning(imd_volume_files): + """The multi-file `open_imd_datatree` carve-out must not emit FutureWarning.""" + import warnings + + with warnings.catch_warnings(record=True) as captured: + warnings.simplefilter("always") + open_imd_datatree(imd_volume_files) + future = [w for w in captured if issubclass(w.category, FutureWarning)] + assert future == [], f"open_imd_datatree must not warn: {future}" + + def test_open_imd_datatree_angle_filter(imd_volume_files): """min_angle/max_angle forwarded to util.create_volume.""" # Load first to learn the actual angles, then filter to just the lowest. diff --git a/xradar/io/__init__.py b/xradar/io/__init__.py index fae103fe..649c244b 100644 --- a/xradar/io/__init__.py +++ b/xradar/io/__init__.py @@ -23,6 +23,7 @@ from .backends.furuno import FurunoBackendEntrypoint from .backends.gamic import GamicBackendEntrypoint from .backends.hpl import HPLBackendEntrypoint +from .backends.imd import IMDBackendEntrypoint from .backends.iris import IrisBackendEntrypoint from .backends.metek import MRRBackendEntrypoint from .backends.nexrad_level2 import NexradLevel2BackendEntrypoint @@ -44,6 +45,7 @@ "hpl": HPLBackendEntrypoint, "metek": MRRBackendEntrypoint, "uf": UFBackendEntrypoint, + "imd": IMDBackendEntrypoint, } diff --git a/xradar/io/backends/imd.py b/xradar/io/backends/imd.py index c6ae82bf..784ab13a 100644 --- a/xradar/io/backends/imd.py +++ b/xradar/io/backends/imd.py @@ -421,6 +421,10 @@ class IMDBackendEntrypoint(BackendEntrypoint): "Open India Meteorological Department (IMD) radar NetCDF files in Xarray" ) url = "https://xradar.rtfd.io/en/latest/io.html#imd" + # True even though IMD files contain no native groups: enables + # `xr.open_datatree(file, engine="imd")` to materialize the synthetic + # `/` + `/sweep_0` CfRadial2 layout from the single-sweep file. + supports_groups = True def open_dataset( self, @@ -461,6 +465,48 @@ def open_dataset( ds._close = store.close return ds + def open_groups_as_dict( + self, + filename_or_obj, + *, + mask_and_scale=True, + decode_times=True, + concat_characters=True, + decode_coords=True, + drop_variables=None, + use_cftime=None, + decode_timedelta=False, + first_dim="auto", + reindex_angle=False, + site_as_coords=True, + optional_groups=False, + **kwargs, + ): + """Open a single IMD sweep file as a dict of CfRadial2 group datasets. + + Single-file only. For multi-file IMD volumes (one sweep per file), + use :func:`open_imd_datatree` with a list of paths. + """ + return _build_single_imd_dtree_dict( + filename_or_obj, + mask_and_scale=mask_and_scale, + decode_times=decode_times, + concat_characters=concat_characters, + decode_coords=decode_coords, + drop_variables=drop_variables, + use_cftime=use_cftime, + decode_timedelta=decode_timedelta, + first_dim=first_dim, + reindex_angle=reindex_angle, + site_as_coords=site_as_coords, + optional_groups=optional_groups, + **kwargs, + ) + + def open_datatree(self, filename_or_obj, **kwargs): + groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) + return DataTree.from_dict(groups_dict) + def _read_imd_sweep(filename, first_dim="auto", reindex_angle=False, **kwargs): """Open one IMD file and return a CfRadial2 sweep Dataset. @@ -468,17 +514,21 @@ def _read_imd_sweep(filename, first_dim="auto", reindex_angle=False, **kwargs): Avoids the xarray entrypoint registry so this works even when the ``imd`` engine has not been installed via pip entrypoints. """ - ds = xr.open_dataset( + raw = xr.open_dataset( filename, engine="netcdf4", decode_timedelta=kwargs.pop("decode_timedelta", False), **kwargs, ) - ds = _conform_imd_sweep(ds, first_dim=first_dim, site_as_coords=False) + # Preserve the file-handle closer across the rename/assign chain so the + # returned dataset can be closed by the caller. + close = raw._close + ds = _conform_imd_sweep(raw, first_dim=first_dim, site_as_coords=False) if reindex_angle is not False: ds = ds.pipe(util.remove_duplicate_rays) ds = ds.pipe(util.reindex_angle, **reindex_angle) ds = ds.pipe(util.ipol_time, **reindex_angle) + ds.set_close(close) return ds @@ -541,7 +591,7 @@ def _build_imd_root(sweeps): return root -def _open_single_imd_datatree( +def _build_single_imd_dtree_dict( filename, first_dim="auto", reindex_angle=False, @@ -549,7 +599,7 @@ def _open_single_imd_datatree( optional_groups=False, **kwargs, ): - """Build a single-sweep CfRadial2 DataTree from one IMD NetCDF file.""" + """Build the dict[str, Dataset] for a single-sweep IMD volume.""" sweep_ds = _read_imd_sweep( filename, first_dim=first_dim, reindex_angle=reindex_angle, **kwargs ) @@ -571,7 +621,12 @@ def _open_single_imd_datatree( sw = _apply_site_as_coords(sw, site_as_coords) sw.attrs = {} dtree["/sweep_0"] = sw - return DataTree.from_dict(dtree) + return dtree + + +def _open_single_imd_datatree(filename, **kwargs): + """Build a single-sweep CfRadial2 DataTree from one IMD NetCDF file.""" + return DataTree.from_dict(_build_single_imd_dtree_dict(filename, **kwargs)) def open_imd_datatree(filename_or_obj, **kwargs): @@ -587,6 +642,15 @@ def open_imd_datatree(filename_or_obj, **kwargs): sweeps by time and supports ``time_coverage_start``, ``time_coverage_end``, ``min_angle``, ``max_angle`` filtering. + .. note:: + + When opening a single IMD sweep file as a DataTree, prefer + ``xd.open_datatree(file, engine="imd")`` (or the xarray-native + ``xr.open_datatree(file, engine="imd")``). This function remains + the documented API for the multi-file path because IMD volumes + span multiple files, which the ``engine="imd"`` registry entry + does not support. + To split a directory of mixed-volume files into per-volume groups, use :func:`group_imd_files` first:: From 9d0cca939916ab71644c49d89952afd1fee6dfdb Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 11 May 2026 20:33:55 -0500 Subject: [PATCH 12/17] docs(open-datatree-engine): migrate to MyST + cover all 13 engines Phase 5 of PR #335. Replaces the legacy .ipynb (wiped by #348's MyST move) with docs/notebooks/Open-Datatree-Engine.md and demos the new xr.open_datatree(file, engine=...) API across every registered engine (odim, cfradial1, cfradial2, nexradlevel2, gamic, iris, furuno, rainbow, datamet, hpl, metek, uf, imd), including the IMD multi-file carve-out via open_imd_datatree. - Add xd.io.list_engines() as the public way to enumerate registered engines (replaces a _ENGINE_REGISTRY private import that previously leaked into user docs). - Register the new notebook under the "Get started" toctree in docs/usage.md. - Add test_demo_notebook_lists_all_engines as a bitrot guard so a new entry in _ENGINE_REGISTRY cannot land without a notebook section. - Smoke-tested via jupyter nbconvert --execute end-to-end. --- docs/notebooks/Open-Datatree-Engine.md | 272 +++++++++++ docs/usage.md | 1 + examples/notebooks/Open-Datatree-Engine.ipynb | 424 ------------------ tests/io/test_backend_datatree.py | 10 + xradar/io/__init__.py | 12 + 5 files changed, 295 insertions(+), 424 deletions(-) create mode 100644 docs/notebooks/Open-Datatree-Engine.md delete mode 100644 examples/notebooks/Open-Datatree-Engine.ipynb diff --git a/docs/notebooks/Open-Datatree-Engine.md b/docs/notebooks/Open-Datatree-Engine.md new file mode 100644 index 00000000..bcb36ea8 --- /dev/null +++ b/docs/notebooks/Open-Datatree-Engine.md @@ -0,0 +1,272 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.19.1 + main_language: python +kernelspec: + display_name: Python 3 + name: python3 +--- + +# `open_datatree` with `engine=` + +xradar registers each of its readers as an `xarray.backends.BackendEntrypoint`, +so you can load any supported radar format into an `xarray.DataTree` directly +through the xarray-native API: + +```python +import xarray as xr + +dtree = xr.open_datatree(file, engine="") +``` + +The same call is also exposed under `xradar` for convenience: + +```python +import xradar as xd + +dtree = xd.open_datatree(file, engine="") +``` + +Both paths return a CfRadial2-shaped `xarray.DataTree` with a root dataset +and one `sweep_N` child per sweep. The xarray-native form is preferred in +most cases; the xradar-prefixed form is a thin shim that resolves the engine +through xradar's registry. + +```{code-cell} +import atexit +import gzip +import shutil +import tempfile +import warnings +from pathlib import Path + +import xarray as xr +from open_radar_data import DATASETS + +import xradar as xd + +# Some sample files in the open-radar-data repository ship gzipped but the +# corresponding backends expect a raw binary stream. Helper to decompress on +# demand into a tmpdir cleaned up at interpreter exit. +_tmpdir_obj = tempfile.TemporaryDirectory() +atexit.register(_tmpdir_obj.cleanup) +_tmpdir = Path(_tmpdir_obj.name) + + +def fetch_ungzipped(name): + src = Path(DATASETS.fetch(name)) + dst = _tmpdir / src.stem + with gzip.open(src) as fin, open(dst, "wb") as fout: + shutil.copyfileobj(fin, fout) + return str(dst) +``` + +## Supported engines + +The current registry covers thirteen radar formats: + +```{code-cell} +xd.io.list_engines() +``` + +## ODIM_H5 + +```{code-cell} +odim_file = DATASETS.fetch("71_20181220_060628.pvol.h5") +dtree = xr.open_datatree(odim_file, engine="odim") +display(dtree) +``` + +## CfRadial1 + +```{code-cell} +cfradial1_file = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") +dtree = xr.open_datatree(cfradial1_file, engine="cfradial1") +display(dtree) +``` + +## CfRadial2 + +CfRadial2 files are already group-native; the backend normalizes common +institutional variations onto the FM301 layout. + +```{code-cell} +# Round-trip a CfRadial1 file to CfRadial2 so we have a demo input. +tmp_cfradial2 = _tmpdir / "demo_cfradial2.nc" +xd.io.to_cfradial2( + xr.open_datatree(cfradial1_file, engine="cfradial1", first_dim="time").copy(), + tmp_cfradial2, + engine="netcdf4", +) + +dtree = xr.open_datatree(str(tmp_cfradial2), engine="cfradial2") +display(dtree) +``` + +## NEXRAD Level II + +```{code-cell} +nexrad_file = DATASETS.fetch("KATX20130717_195021_V06") +dtree = xr.open_datatree(nexrad_file, engine="nexradlevel2") +display(dtree) +``` + +## GAMIC + +```{code-cell} +gamic_file = DATASETS.fetch("DWD-Vol-2_99999_20180601054047_00.h5") +dtree = xr.open_datatree(gamic_file, engine="gamic") +display(dtree) +``` + +## IRIS + +```{code-cell} +iris_file = DATASETS.fetch("cor-main131125105503.RAW2049") +dtree = xr.open_datatree(iris_file, engine="iris") +display(dtree) +``` + +## Furuno + +```{code-cell} +furuno_file = DATASETS.fetch("0080_20210730_160000_01_02.scn.gz") +dtree = xr.open_datatree(furuno_file, engine="furuno") +display(dtree) +``` + +## Rainbow + +```{code-cell} +rainbow_file = DATASETS.fetch("2013051000000600dBZ.vol") +dtree = xr.open_datatree(rainbow_file, engine="rainbow") +display(dtree) +``` + +## DataMet + +```{code-cell} +datamet_file = DATASETS.fetch("H-000-VOL-ILMONTE-201907100700.tar.gz") +dtree = xr.open_datatree(datamet_file, engine="datamet") +display(dtree) +``` + +## HPL (Halo Photonics) + +```{code-cell} +hpl_file = DATASETS.fetch("User1_100_20240714_122137.hpl") +dtree = xr.open_datatree(hpl_file, engine="hpl") +display(dtree) +``` + +## Metek MRR + +```{code-cell} +metek_file = fetch_ungzipped("0308.ave.gz") +dtree = xr.open_datatree(metek_file, engine="metek") +display(dtree) +``` + +## Universal Format (UF) + +```{code-cell} +uf_file = fetch_ungzipped("20110427_164233_rvp8-rel_v001_SUR.uf.gz") +dtree = xr.open_datatree(uf_file, engine="uf") +display(dtree) +``` + +## IMD - single file via `engine="imd"` + +IMD distributes one sweep per NetCDF file. The `engine="imd"` entry serves +the **single-file** case: + +```{code-cell} +imd_file = DATASETS.fetch("IMD/JPR220822135253-IMD-B.nc") +dtree = xr.open_datatree(imd_file, engine="imd") +display(dtree) +``` + +## IMD - multi-file volume via `open_imd_datatree` + +To assemble a full IMD volume you supply a list of sweep files. xarray's +`engine=` API takes a single path, so multi-file IMD volumes use the +module-level function (which delegates to `xradar.util.create_volume`): + +```{code-cell} +imd_volume = [ + DATASETS.fetch(f"IMD/JPR220822135253-IMD-B.nc{s}") + for s in ["", ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9"] +] +dtree = xd.io.open_imd_datatree(imd_volume) +display(dtree) +``` + +## Common parameters + +Every backend accepts a `sweep` selector (int, str, or list), `first_dim` +(`"auto"` or `"time"`), `optional`, and `optional_groups`. They behave +uniformly across all engines: + +```{code-cell} +# Single sweep by index +dtree = xr.open_datatree(odim_file, engine="odim", sweep=0) +list(dtree.children) +``` + +```{code-cell} +# Multiple sweeps by index +dtree = xr.open_datatree(odim_file, engine="odim", sweep=[0, 2, 4]) +list(dtree.children) +``` + +```{code-cell} +# Sweeps by name +dtree = xr.open_datatree( + cfradial1_file, engine="cfradial1", sweep=["sweep_0", "sweep_3"] +) +list(dtree.children) +``` + +## `open_groups_as_dict` — work with the raw dict + +If you want the pre-`DataTree` dict directly (useful for inspection or +custom assembly), instantiate the backend entrypoint and call +`open_groups_as_dict`: + +```{code-cell} +groups = xd.io.OdimBackendEntrypoint().open_groups_as_dict(odim_file, sweep=[0, 1]) +list(groups) +``` + +## Deprecated `open_*_datatree` shims + +Most legacy `xd.io.open__datatree(...)` functions still work but +emit a `FutureWarning` directing users to the engine API. The one documented +exception is `xd.io.open_imd_datatree`, which remains the supported API for +multi-file IMD volumes (lists of per-sweep paths) and does **not** emit a +deprecation warning. + +```{code-cell} +with warnings.catch_warnings(record=True) as captured: + warnings.simplefilter("always") + xd.io.open_odim_datatree(odim_file, sweep=[0]) +[w.message for w in captured if issubclass(w.category, FutureWarning)] +``` + +## Unknown engine + +`xd.open_datatree` looks the engine up in xradar's registry and raises a +clear `ValueError` listing every supported name. (The xarray-native +`xr.open_datatree` uses xarray's own plugin discovery and raises a +different error from there.) + +```{code-cell} +try: + xd.open_datatree(odim_file, engine="nonexistent") +except ValueError as exc: + print(exc) +``` diff --git a/docs/usage.md b/docs/usage.md index 96b33f98..b569b392 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -17,6 +17,7 @@ import xradar datamodel importers exporters +notebooks/Open-Datatree-Engine notebooks/Accessors notebooks/Mapping_Sweeps notebooks/CfRadial1_Model_Transformation diff --git a/examples/notebooks/Open-Datatree-Engine.ipynb b/examples/notebooks/Open-Datatree-Engine.ipynb deleted file mode 100644 index 3dffffa1..00000000 --- a/examples/notebooks/Open-Datatree-Engine.ipynb +++ /dev/null @@ -1,424 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "# Open DataTree with `engine=` parameter\n", - "\n", - "This notebook demonstrates the new unified `open_datatree` API that allows opening radar files as `xarray.DataTree` using the `engine=` parameter.\n", - "\n", - "Three ways to open a DataTree:\n", - "- `xd.open_datatree(file, engine=\"...\")` — xradar unified API\n", - "- `xr.open_datatree(file, engine=\"...\")` — xarray native API\n", - "- `xd.io.open_*_datatree(file)` — legacy per-format functions (deprecated, emit `FutureWarning`)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import warnings\n", - "\n", - "import xarray as xr\n", - "from open_radar_data import DATASETS\n", - "\n", - "import xradar as xd" - ] - }, - { - "cell_type": "markdown", - "id": "2", - "metadata": {}, - "source": [ - "## Download test data\n", - "\n", - "Fetching radar data files from [open-radar-data](https://github.com/openradar/open-radar-data) repository." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "odim_file = DATASETS.fetch(\"71_20181220_060628.pvol.h5\")\n", - "cfradial1_file = DATASETS.fetch(\"cfrad.20080604_002217_000_SPOL_v36_SUR.nc\")\n", - "nexrad_file = DATASETS.fetch(\"KATX20130717_195021_V06\")" - ] - }, - { - "cell_type": "markdown", - "id": "4", - "metadata": {}, - "source": [ - "## 1. `xd.open_datatree()` — Unified xradar API\n", - "\n", - "The new unified entry point. Specify the `engine` to select the backend." - ] - }, - { - "cell_type": "markdown", - "id": "5", - "metadata": {}, - "source": [ - "### ODIM_H5" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "dtree = xd.open_datatree(odim_file, engine=\"odim\")\n", - "display(dtree)" - ] - }, - { - "cell_type": "markdown", - "id": "7", - "metadata": {}, - "source": [ - "The tree follows the CfRadial2 group structure with metadata groups at the root level and sweep groups below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [ - "# Root dataset contains global metadata\n", - "display(dtree.ds)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "# Access a specific sweep\n", - "display(dtree[\"sweep_0\"].ds)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [ - "# Metadata groups\n", - "print(\"radar_parameters:\", list(dtree[\"radar_parameters\"].ds.data_vars))\n", - "print(\n", - " \"georeferencing_correction:\", list(dtree[\"georeferencing_correction\"].ds.data_vars)\n", - ")\n", - "print(\"radar_calibration:\", list(dtree[\"radar_calibration\"].ds.data_vars))" - ] - }, - { - "cell_type": "markdown", - "id": "11", - "metadata": {}, - "source": [ - "### CfRadial1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12", - "metadata": {}, - "outputs": [], - "source": [ - "dtree = xd.open_datatree(cfradial1_file, engine=\"cfradial1\")\n", - "display(dtree)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13", - "metadata": {}, - "outputs": [], - "source": [ - "dtree[\"sweep_0\"].ds.DBZ.plot()" - ] - }, - { - "cell_type": "markdown", - "id": "14", - "metadata": {}, - "source": [ - "### NEXRAD Level 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15", - "metadata": {}, - "outputs": [], - "source": [ - "dtree = xd.open_datatree(nexrad_file, engine=\"nexradlevel2\")\n", - "display(dtree)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16", - "metadata": {}, - "outputs": [], - "source": [ - "dtree[\"sweep_0\"].ds.DBZH.plot()" - ] - }, - { - "cell_type": "markdown", - "id": "17", - "metadata": {}, - "source": [ - "## 2. Sweep selection\n", - "\n", - "Select specific sweeps by index (int or list) or by name." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "18", - "metadata": {}, - "outputs": [], - "source": [ - "# Single sweep by index\n", - "dtree = xd.open_datatree(odim_file, engine=\"odim\", sweep=0)\n", - "print(\"Children:\", list(dtree.children))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19", - "metadata": {}, - "outputs": [], - "source": [ - "# Multiple sweeps by index\n", - "dtree = xd.open_datatree(odim_file, engine=\"odim\", sweep=[0, 2, 4])\n", - "print(\"Children:\", list(dtree.children))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20", - "metadata": {}, - "outputs": [], - "source": [ - "# Sweeps by name\n", - "dtree = xd.open_datatree(\n", - " cfradial1_file, engine=\"cfradial1\", sweep=[\"sweep_0\", \"sweep_3\"]\n", - ")\n", - "print(\"Children:\", list(dtree.children))" - ] - }, - { - "cell_type": "markdown", - "id": "21", - "metadata": {}, - "source": [ - "## 3. Backend kwargs\n", - "\n", - "Pass backend-specific options directly as keyword arguments." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22", - "metadata": {}, - "outputs": [], - "source": [ - "# first_dim controls the leading dimension (\"auto\" uses azimuth/elevation)\n", - "# site_coords attaches latitude/longitude/altitude to sweep datasets\n", - "dtree = xd.open_datatree(\n", - " odim_file,\n", - " engine=\"odim\",\n", - " sweep=[0],\n", - " first_dim=\"auto\",\n", - " site_coords=True,\n", - ")\n", - "sweep_ds = dtree[\"sweep_0\"].ds\n", - "print(\"Dimensions:\", dict(sweep_ds.dims))\n", - "print(\"Site coords present:\", \"latitude\" in sweep_ds.coords)" - ] - }, - { - "cell_type": "markdown", - "id": "23", - "metadata": {}, - "source": [ - "## 4. `xr.open_datatree()` — xarray native API\n", - "\n", - "The same backends work directly with xarray's native `open_datatree`, no xradar wrapper needed." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "24", - "metadata": {}, - "outputs": [], - "source": [ - "dtree = xr.open_datatree(odim_file, engine=\"odim\")\n", - "display(dtree)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25", - "metadata": {}, - "outputs": [], - "source": [ - "dtree = xr.open_datatree(nexrad_file, engine=\"nexradlevel2\", sweep=[0, 1])\n", - "display(dtree)" - ] - }, - { - "cell_type": "markdown", - "id": "26", - "metadata": {}, - "source": [ - "## 5. `open_groups_as_dict()` — Low-level access\n", - "\n", - "For advanced use, get the raw `dict[str, Dataset]` before it becomes a DataTree." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "27", - "metadata": {}, - "outputs": [], - "source": [ - "from xradar.io.backends.odim import OdimBackendEntrypoint\n", - "\n", - "backend = OdimBackendEntrypoint()\n", - "groups = backend.open_groups_as_dict(odim_file, sweep=[0, 1])\n", - "\n", - "print(\"Group keys:\", list(groups.keys()))\n", - "print()\n", - "print(\"Root dataset:\")\n", - "display(groups[\"/\"])" - ] - }, - { - "cell_type": "markdown", - "id": "28", - "metadata": {}, - "source": [ - "## 6. Backward compatibility — deprecated functions\n", - "\n", - "The legacy per-format functions still work but emit a `FutureWarning` directing you to the new API." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "29", - "metadata": {}, - "outputs": [], - "source": [ - "with warnings.catch_warnings(record=True) as w:\n", - " warnings.simplefilter(\"always\")\n", - " dtree_old = xd.io.open_odim_datatree(odim_file, sweep=[0])\n", - " for warning in w:\n", - " if issubclass(warning.category, FutureWarning):\n", - " print(f\"FutureWarning: {warning.message}\")\n", - "\n", - "display(dtree_old)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "30", - "metadata": {}, - "outputs": [], - "source": [ - "# The old and new APIs produce equivalent results\n", - "dtree_new = xd.open_datatree(odim_file, engine=\"odim\", sweep=[0])\n", - "print(\"Same children:\", set(dtree_old.children) == set(dtree_new.children))" - ] - }, - { - "cell_type": "markdown", - "id": "31", - "metadata": {}, - "source": [ - "## 7. Error handling" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "32", - "metadata": {}, - "outputs": [], - "source": [ - "# Unknown engine raises a clear error\n", - "try:\n", - " xd.open_datatree(odim_file, engine=\"nonexistent\")\n", - "except ValueError as e:\n", - " print(f\"ValueError: {e}\")" - ] - }, - { - "cell_type": "markdown", - "id": "33", - "metadata": {}, - "source": [ - "## Summary\n", - "\n", - "| API | Example | Status |\n", - "|-----|---------|--------|\n", - "| `xd.open_datatree(file, engine=\"odim\")` | Unified xradar API | **New** |\n", - "| `xr.open_datatree(file, engine=\"odim\")` | xarray native API | **New** |\n", - "| `xd.io.open_odim_datatree(file)` | Per-format function | Deprecated |\n", - "\n", - "Supported engines: `\"odim\"`, `\"cfradial1\"`, `\"nexradlevel2\"`" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbformat_minor": 5, - "version": "3.12.0" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index 35fdcff0..27387087 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -231,6 +231,16 @@ def test_registry_contains_all_engines(self): } assert set(_ENGINE_REGISTRY.keys()) == expected + def test_demo_notebook_lists_all_engines(self): + """Bitrot guard: adding an engine to the registry must also be demoed.""" + from pathlib import Path + + repo_root = Path(__file__).resolve().parents[2] + notebook = repo_root / "docs/notebooks/Open-Datatree-Engine.md" + text = notebook.read_text() + for engine in _ENGINE_REGISTRY: + assert f'engine="{engine}"' in text, f"notebook missing engine={engine!r}" + # -- Backward compatibility & deprecation tests ------------------------------ diff --git a/xradar/io/__init__.py b/xradar/io/__init__.py index 649c244b..7a514413 100644 --- a/xradar/io/__init__.py +++ b/xradar/io/__init__.py @@ -78,4 +78,16 @@ def open_datatree(filename_or_obj, *, engine, **kwargs): return backend.open_datatree(filename_or_obj, **kwargs) +def list_engines(): + """Return a sorted list of engine names registered with :func:`open_datatree`. + + Examples + -------- + >>> import xradar as xd + >>> "odim" in xd.io.list_engines() + True + """ + return sorted(_ENGINE_REGISTRY) + + __all__ = [s for s in dir() if not s.startswith("_")] From 1f1edfb4631d2b332e4117cf0783bcf8a214fa77 Mon Sep 17 00:00:00 2001 From: aladinor Date: Mon, 11 May 2026 20:44:40 -0500 Subject: [PATCH 13/17] test(open-datatree-engine): broaden parametrized coverage to cfradial2 + imd Phase 6 of PR #335. - Add ("cfradial2", "cfradial2_file") to engine_and_file so the five parametrized tests in TestXdOpenDatatree exercise cfradial2. - Add ("imd", "imd_file") with pytest.mark.skip so a future maintainer can't accidentally re-include IMD without noticing it is a single-sweep-per-file carve-out. - Add test_xr_open_datatree_cfradial2 and test_xr_open_datatree_imd to TestXrOpenDatatree. - Add TestIMDMultiFile covering the single-file engine path and the module-level multi-file open_imd_datatree(list_of_paths) path. - Make CfRadial2BackendEntrypoint.open_groups_as_dict accept and *honor* site_coords (False now drops latitude/longitude/altitude from root coords; True is the default). Add TestCfRadial2SiteCoords covering both branches. - Move the empty-sweep-list check into cfradial2's _iter_selected_sweeps so the validation lives next to the parsing logic (single source of truth). --- tests/io/test_backend_datatree.py | 67 ++++++++++++++++++++++++++++++- xradar/io/backends/cfradial2.py | 13 +++++- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index 27387087..e5600a5d 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -18,6 +18,7 @@ import xradar as xd from xradar.io import _ENGINE_REGISTRY +from xradar.io.backends import open_imd_datatree # -- Fixtures ---------------------------------------------------------------- @@ -28,16 +29,28 @@ pytest.param(("gamic", "gamic_file"), id="gamic"), pytest.param(("iris", "iris0_file"), id="iris"), pytest.param(("nexradlevel2", "nexradlevel2_file"), id="nexradlevel2"), + pytest.param(("cfradial2", "cfradial2_file"), id="cfradial2"), pytest.param(("furuno", "furuno_scn_file"), id="furuno"), pytest.param(("rainbow", "rainbow_file"), id="rainbow"), pytest.param(("datamet", "datamet_file"), id="datamet"), pytest.param(("hpl", "hpl_file"), id="hpl"), pytest.param(("metek", "metek_ave_gz_file"), id="metek"), pytest.param(("uf", "uf_file_1"), id="uf"), + pytest.param( + ("imd", "imd_file"), + marks=pytest.mark.skip( + reason="IMD is single-sweep-per-file; see TestIMDMultiFile", + ), + id="imd", + ), ] ) def engine_and_file(request): - """Parametrize over all engines with their fixture names.""" + """Parametrize over all engines. + + See ``TestIMDMultiFile`` for IMD-specific coverage (the multi-file + carve-out from the engine= API). + """ engine, fixture_name = request.param filepath = request.getfixturevalue(fixture_name) return engine, filepath @@ -191,6 +204,58 @@ def test_xr_open_datatree_uf(self, uf_file_1): dtree = xr.open_datatree(uf_file_1, engine="uf") _assert_cfradial2_structure(dtree) + def test_xr_open_datatree_cfradial2(self, cfradial2_file): + dtree = xr.open_datatree(cfradial2_file, engine="cfradial2") + _assert_cfradial2_structure(dtree) + + def test_xr_open_datatree_imd(self, imd_file): + dtree = xr.open_datatree(imd_file, engine="imd") + _assert_cfradial2_structure(dtree) + + +# -- IMD: multi-file carve-out vs single-file engine ------------------------- + + +class TestIMDMultiFile: + """IMD is the documented multi-file carve-out from the engine= API. + + The single-file path uses ``engine="imd"``; multi-file volumes still + go through the module-level ``xd.io.open_imd_datatree([files])``. + """ + + def test_engine_imd_handles_single_file(self, imd_file): + dtree = xd.open_datatree(imd_file, engine="imd") + _assert_cfradial2_structure(dtree) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == 1 + + def test_module_level_handles_multi_file_volume(self, imd_volume_files): + # Precondition: each fixture file in `imd_volume_files` contains + # exactly one sweep, so the resulting volume has one sweep per file. + dtree = open_imd_datatree(imd_volume_files) + _assert_cfradial2_structure(dtree) + sweep_groups = [k for k in dtree.children if k.startswith("sweep_")] + assert len(sweep_groups) == len(imd_volume_files) + + +# -- CfRadial2 site_coords behavior ------------------------------------------ + + +class TestCfRadial2SiteCoords: + """`site_coords` honors True/False for the CfRadial2 entrypoint.""" + + def test_site_coords_true_keeps_station_coords(self, cfradial2_file): + dtree = xd.open_datatree(cfradial2_file, engine="cfradial2", site_coords=True) + assert "latitude" in dtree.ds.coords + assert "longitude" in dtree.ds.coords + assert "altitude" in dtree.ds.coords + + def test_site_coords_false_drops_station_coords(self, cfradial2_file): + dtree = xd.open_datatree(cfradial2_file, engine="cfradial2", site_coords=False) + assert "latitude" not in dtree.ds.coords + assert "longitude" not in dtree.ds.coords + assert "altitude" not in dtree.ds.coords + # -- supports_groups attribute ----------------------------------------------- diff --git a/xradar/io/backends/cfradial2.py b/xradar/io/backends/cfradial2.py index 627b0c75..c6a4e4b9 100644 --- a/xradar/io/backends/cfradial2.py +++ b/xradar/io/backends/cfradial2.py @@ -178,6 +178,8 @@ def _iter_selected_sweeps(tree: DataTree, sweep: Any) -> list[str]: selected.append(f"sweep_{item}") else: selected.append(_normalize_sweep_name(item)) + if not selected: + raise ValueError("sweep list is empty.") return selected raise TypeError("sweep must be None, int, str or an iterable of ints/strings") @@ -615,9 +617,10 @@ def open_groups_as_dict( first_dim="time", optional=True, optional_groups=False, + site_coords=True, **kwargs, ): - return _build_cfradial2_dtree_dict( + groups_dict = _build_cfradial2_dtree_dict( filename_or_obj, sweep=sweep, first_dim=first_dim, @@ -625,6 +628,14 @@ def open_groups_as_dict( optional_groups=optional_groups, **kwargs, ) + # CfRadial2 places station coords at root by default. Honor + # site_coords=False by dropping them, matching the per-sweep + # contract used by odim/gamic/cfradial1. + if not site_coords: + groups_dict["/"] = groups_dict["/"].drop_vars( + _STATION_VARS, errors="ignore" + ) + return groups_dict def open_datatree(self, filename_or_obj, **kwargs): groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) From f8322634baa050fd9707f5eb01057d4ac24076f1 Mon Sep 17 00:00:00 2001 From: aladinor Date: Tue, 12 May 2026 07:12:11 -0500 Subject: [PATCH 14/17] docs(open-datatree-engine): document every BackendEntrypoint method Phase 7 of PR #335. NEXRAD parameters were previously impossible to discover from the rendered docs because every `open_groups_as_dict` and `open_datatree` method shipped with an empty docstring. Fix that for all 13 backends. xradar/io/backends/common.py gains: - COMMON_BACKEND_PARAMS_DOC: the shared NumPy-style Parameters block covering `filename_or_obj`, the seven xarray decoder kwargs, plus xradar-specific `sweep`, `first_dim`, `optional`, `optional_groups`. `optional_groups` explicitly states the three subgroups it controls (`/radar_parameters`, `/georeferencing_correction`, `/radar_calibration`), defaults to False. - REINDEX_PARAMS_DOC, HDF5_PARAMS_DOC, SITE_COORDS_PARAM_DOC, LOCK_PARAM_DOC: per-feature blocks shared across backends. - _compose_docstring(summary, *extra_blocks): textwrap-normalized composer that authors can write at any indent level. Each backend imports the blocks it needs and attaches the composed result via `ClassName.method.__doc__ = _compose_docstring(...)` after the class definition. docs/conf.py: add `autodoc_default_options = {"members": True, ...}` so Sphinx renders the new method docstrings on each entrypoint class. tests/io/test_backend_datatree.py: add TestDocstrings (parametrized over _ENGINE_REGISTRY) so a future refactor cannot silently delete a docstring without breaking a test. Add two unit tests for _compose_docstring. --- docs/conf.py | 11 +++ tests/io/test_backend_datatree.py | 55 +++++++++++ xradar/io/backends/cfradial1.py | 21 +++++ xradar/io/backends/cfradial2.py | 26 +++++- xradar/io/backends/common.py | 137 ++++++++++++++++++++++++++++ xradar/io/backends/datamet.py | 15 +++ xradar/io/backends/furuno.py | 21 +++++ xradar/io/backends/gamic.py | 16 ++++ xradar/io/backends/hpl.py | 29 ++++++ xradar/io/backends/imd.py | 24 +++++ xradar/io/backends/iris.py | 23 +++++ xradar/io/backends/metek.py | 16 ++++ xradar/io/backends/nexrad_level2.py | 27 ++++++ xradar/io/backends/odim.py | 18 ++++ xradar/io/backends/rainbow.py | 14 +++ xradar/io/backends/uf.py | 16 ++++ 16 files changed, 468 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index face209b..5c8e1f84 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -151,6 +151,17 @@ autosummary_generate = rst_files autoclass_content = "both" +# Render `open_groups_as_dict`, `open_datatree`, and `open_dataset` method +# docstrings on each BackendEntrypoint class. Without `members: True` the +# class page only shows the class-level docstring and class members get +# silently dropped — see `xradar/io/backends/common.py:_compose_docstring` +# for the per-method blocks that need rendering. +autodoc_default_options = { + "members": True, + "undoc-members": False, + "show-inheritance": True, +} + # The version info for the project you're documenting, acts as replacement # for |version| and |release|, also used in various other places throughout # the built documents. diff --git a/tests/io/test_backend_datatree.py b/tests/io/test_backend_datatree.py index e5600a5d..ebc757e8 100644 --- a/tests/io/test_backend_datatree.py +++ b/tests/io/test_backend_datatree.py @@ -272,6 +272,61 @@ def test_supports_groups(self, engine): assert backend_cls.supports_groups is True +# -- Docstring regression guard --------------------------------------------- + + +class TestDocstrings: + """`open_groups_as_dict` / `open_datatree` must carry usable docstrings. + + The composed docstrings are assigned by module-level side effects + (e.g. ``OdimBackendEntrypoint.open_groups_as_dict.__doc__ = ...``). + Without this guard a future refactor could silently delete a + docstring and no test would catch the regression. + """ + + @pytest.mark.parametrize( + "engine", + sorted(_ENGINE_REGISTRY.keys()), + ) + def test_open_groups_as_dict_has_param_docstring(self, engine): + doc = _ENGINE_REGISTRY[engine].open_groups_as_dict.__doc__ + assert doc, f"{engine} open_groups_as_dict has no docstring" + assert "Parameters" in doc + assert "Returns" in doc + assert "optional_groups" in doc + + @pytest.mark.parametrize( + "engine", + sorted(_ENGINE_REGISTRY.keys()), + ) + def test_open_datatree_references_groups_as_dict(self, engine): + doc = _ENGINE_REGISTRY[engine].open_datatree.__doc__ + assert doc, f"{engine} open_datatree has no docstring" + assert "open_groups_as_dict" in doc + + +def test_compose_docstring_structure(): + """`_compose_docstring` assembles summary + common block + extras + Returns.""" + from xradar.io.backends.common import REINDEX_PARAMS_DOC, _compose_docstring + + doc = _compose_docstring("Summary line.", REINDEX_PARAMS_DOC) + assert doc.startswith("Summary line.") + assert "Parameters" in doc + assert "Returns" in doc + assert "reindex_angle" in doc + assert "filename_or_obj" in doc # common block is always included + assert "dict[str, xarray.Dataset]" in doc + + +def test_compose_docstring_skips_empty_extra_blocks(): + """Empty/None extra blocks must not double-insert section headers.""" + from xradar.io.backends.common import _compose_docstring + + doc = _compose_docstring("Summary.", "", None) + assert doc.count("Parameters") == 1 + assert doc.count("Returns") == 1 + + # -- Engine registry --------------------------------------------------------- diff --git a/xradar/io/backends/cfradial1.py b/xradar/io/backends/cfradial1.py index 63a70b30..51e4f4b3 100644 --- a/xradar/io/backends/cfradial1.py +++ b/xradar/io/backends/cfradial1.py @@ -51,7 +51,10 @@ ) from .common import ( _STATION_VARS, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, + _compose_docstring, _deprecation_warning, _maybe_decode, ) @@ -545,3 +548,21 @@ def open_datatree( ): groups_dict = self.open_groups_as_dict(filename_or_obj, **kwargs) return DataTree.from_dict(groups_dict) + + +_CFRADIAL1_PARAMS_DOC = """ + engine : {"netcdf4", "h5netcdf"}, optional + Underlying NetCDF engine used by ``xr.open_dataset`` to read the + CfRadial1 file. Defaults to ``"netcdf4"``. +""" + +CfRadial1BackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a CfRadial1 file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _CFRADIAL1_PARAMS_DOC, +) +CfRadial1BackendEntrypoint.open_datatree.__doc__ = ( + "Open a CfRadial1 file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) diff --git a/xradar/io/backends/cfradial2.py b/xradar/io/backends/cfradial2.py index c6a4e4b9..f7a514e3 100644 --- a/xradar/io/backends/cfradial2.py +++ b/xradar/io/backends/cfradial2.py @@ -58,7 +58,12 @@ required_root_vars, sweep_vars_mapping, ) -from .common import _STATION_VARS, _apply_site_as_coords, _deprecation_warning +from .common import ( + _STATION_VARS, + _apply_site_as_coords, + _compose_docstring, + _deprecation_warning, +) _ROOT_ATTR_RENAMES = { "RadarName": "instrument_name", @@ -642,6 +647,25 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +_CFRADIAL2_PARAMS_DOC = """ + site_coords : bool, optional + Keep ``latitude``/``longitude``/``altitude`` as coordinates on + the root dataset. CfRadial2 stores station coords at root by + default; pass ``False`` to drop them. Defaults to ``True``. +""" + +CfRadial2BackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a CfRadial2/FM301 grouped file as a dict of normalized group datasets.\n" + " Best-effort normalization of common institutional variations is\n" + " applied so the result matches xradar's FM301-oriented layout.", + _CFRADIAL2_PARAMS_DOC, +) +CfRadial2BackendEntrypoint.open_datatree.__doc__ = ( + "Open a CfRadial2/FM301 grouped file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_cfradial2_datatree( filename_or_obj: str | PathLike[str], **kwargs: Any ) -> DataTree: diff --git a/xradar/io/backends/common.py b/xradar/io/backends/common.py index f4881aed..39d0f293 100644 --- a/xradar/io/backends/common.py +++ b/xradar/io/backends/common.py @@ -14,6 +14,7 @@ import io import struct +import textwrap import warnings from collections import OrderedDict @@ -432,6 +433,142 @@ def _deprecation_warning(old_name, engine): ) +#: NumPy-style Parameters block shared across all `open_groups_as_dict` +#: methods. Backend-specific blocks are appended via :func:`_compose_docstring`. +#: The CF decoder kwargs (`mask_and_scale`, `decode_times`, ...) thread +#: through to :py:func:`xarray.open_dataset`; see xarray's documentation for +#: full semantics. +COMMON_BACKEND_PARAMS_DOC = """ +Parameters +---------- +filename_or_obj : str, Path, or file-like + Path or file-like object understood by the underlying reader. +mask_and_scale : bool or dict-like, optional + Replace fill values with NA and apply ``scale_factor``/``add_offset`` + decoding. See :py:func:`xarray.open_dataset`. Defaults to ``True``. +decode_times : bool or dict-like, optional + Decode CF time variables (calendar, units) into ``np.datetime64``. + Defaults to ``True``. +concat_characters : bool or dict-like, optional + Concatenate character arrays into strings along their trailing + dimension. Defaults to ``True``. +decode_coords : bool or {"coordinates", "all"}, optional + Decode the CF ``coordinates`` attribute. Defaults to ``True`` + (equivalent to ``"coordinates"``). +drop_variables : str or iterable of str, optional + Names of variables to drop before processing. +use_cftime : bool, optional + Force ``cftime`` decoding for time variables (instead of + ``np.datetime64``). Defaults to ``None`` (auto). +decode_timedelta : bool, optional + Decode CF timedelta variables. Default mirrors ``decode_times`` + unless the backend overrides it (cfradial1, cfradial2, and imd + default to ``False``). +sweep : int, str, or list of int/str, optional + Sweep selection. ``None`` (default) returns all sweeps. An ``int`` + or ``"sweep_N"`` string returns one sweep; a list returns the + named subset. +first_dim : {"auto", "time"}, optional + Leading dimension of each sweep dataset. ``"auto"`` picks + ``azimuth`` (PPI) or ``elevation`` (RHI); ``"time"`` keeps the + raw time axis. Default ``"auto"`` (``"time"`` for cfradial2). +optional : bool, optional + Include optional root variables when available. Defaults to ``True``. +optional_groups : bool, optional + Include the ``/radar_parameters``, ``/georeferencing_correction``, + and ``/radar_calibration`` metadata subgroups under the root. + Defaults to ``False``. +""" + + +#: Reindex/angle parameter block — shared by backends that resample +#: rays onto a regular angular grid (odim, gamic, nexrad, cfradial1, +#: iris, furuno, uf). +REINDEX_PARAMS_DOC = """ +reindex_angle : bool or dict, optional + Resample rays onto a regular angular grid when truthy. A dict is + passed as kwargs to :func:`xradar.util.reindex_angle` (e.g. + ``{"start_angle": 0.0, "stop_angle": 360.0, "angle_res": 1.0}``). + Only invoked when ``decode_coords=True``. Defaults to ``False``. +fix_second_angle : bool, optional + Correct erroneous secondary-angle values (azimuth on RHI, + elevation on PPI). Only effective with ``first_dim="auto"``. + Defaults to ``False``. +""" + +#: Site-coordinate parameter block. Most multi-sweep backends spell this +#: `site_coords`; IMD uses the legacy `site_as_coords`. +SITE_COORDS_PARAM_DOC = """ +site_coords : bool, optional + Attach ``latitude``/``longitude``/``altitude`` as coordinates on + the root dataset (and on per-sweep datasets where the backend + supports it). Defaults to ``True``. +""" + +#: HDF5/h5netcdf options shared by ODIM, GAMIC, HPL, Metek. +HDF5_PARAMS_DOC = """ +format : str, optional + h5netcdf format string. Defaults to ``None``. +invalid_netcdf : bool, optional + Accept HDF5 files that are not strictly NetCDF-conformant. +phony_dims : {"access", "sort", None}, optional + How h5netcdf labels unnamed dimensions. Defaults to ``"access"``. +decode_vlen_strings : bool, optional + Decode variable-length strings stored in HDF5. Defaults to ``True``. +""" + +#: Reader-lock parameter shared by NEXRAD, IRIS, UF. +LOCK_PARAM_DOC = """ +lock : threading.Lock or None, optional + Reader lock for thread-safe access. Defaults to ``None``. +""" + + +def _compose_docstring(summary, *extra_blocks): + """Compose a NumPy-style docstring from a summary plus parameter blocks. + + The composed result always opens with the shared + :data:`COMMON_BACKEND_PARAMS_DOC` Parameters block and closes with a + fixed Returns section. Per-backend blocks (e.g. :data:`HDF5_PARAMS_DOC`, + :data:`REINDEX_PARAMS_DOC`) are inserted between the common block and + the Returns section in the order given. + + Each block is independently de-indented and re-indented with four + spaces, so block authors do not need to keep the indentation in sync + by hand — write a block at any indent level and this helper + normalises it. + + Parameters + ---------- + summary : str + One-paragraph summary that opens the docstring. + *extra_blocks : str + Optional backend-specific parameter blocks. Each may use any + indentation; the helper normalises them to four-space indent. + + Returns + ------- + str + Complete docstring suitable for ``method.__doc__ = ...``. + """ + + def _block(text): + return textwrap.indent(textwrap.dedent(text).strip("\n"), " ") + + parts = [summary.strip("\n"), "", _block(COMMON_BACKEND_PARAMS_DOC)] + for block in extra_blocks: + if block: + parts.append(_block(block)) + returns_body = ( + "dict[str, xarray.Dataset]\n" + " CfRadial2 group paths (``/``, ``/sweep_N``, optional\n" + " ``/radar_parameters`` etc.) mapped to their datasets,\n" + " ready for :py:meth:`xarray.DataTree.from_dict`." + ) + parts += ["", " Returns", " -------", _block(returns_body)] + return "\n".join(parts) + "\n" + + def _resolve_sweeps(sweep, discover_fn): """Normalise the sweep parameter into a list of sweep group names. diff --git a/xradar/io/backends/datamet.py b/xradar/io/backends/datamet.py index 09c469c9..a5f27cf5 100644 --- a/xradar/io/backends/datamet.py +++ b/xradar/io/backends/datamet.py @@ -50,8 +50,11 @@ sweep_vars_mapping, ) from .common import ( + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _build_groups_dict, + _compose_docstring, _deprecation_warning, _resolve_sweeps, ) @@ -495,6 +498,18 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +DataMetBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a DataMet (Servizio Meteorologico Italiano) ``.tar.gz`` archive as a\n" + " CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +DataMetBackendEntrypoint.open_datatree.__doc__ = ( + "Open a DataMet archive as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_datamet_datatree(filename_or_obj, **kwargs): """Open DataMet dataset as :py:class:`xarray.DataTree`. diff --git a/xradar/io/backends/furuno.py b/xradar/io/backends/furuno.py index b8d1baac..eaee3a74 100644 --- a/xradar/io/backends/furuno.py +++ b/xradar/io/backends/furuno.py @@ -68,14 +68,17 @@ sweep_vars_mapping, ) from .common import ( + REINDEX_PARAMS_DOC, SINT2, SINT4, + SITE_COORDS_PARAM_DOC, UINT1, UINT2, UINT4, _apply_site_as_coords, _build_groups_dict, _calculate_angle_res, + _compose_docstring, _deprecation_warning, _get_fmt_string, _resolve_sweeps, @@ -825,6 +828,24 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +_FURUNO_PARAMS_DOC = """ + obsmode : int or None, optional + Override the file's observation-mode flag (rare; only needed for + files written by older firmware). Defaults to ``None``. +""" + +FurunoBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Furuno SCN or SCNX file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _FURUNO_PARAMS_DOC, +) +FurunoBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Furuno SCN or SCNX file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_furuno_datatree(filename_or_obj, **kwargs): """Open FURUNO dataset as :py:class:`xarray.DataTree`. diff --git a/xradar/io/backends/gamic.py b/xradar/io/backends/gamic.py index b261f513..19925019 100644 --- a/xradar/io/backends/gamic.py +++ b/xradar/io/backends/gamic.py @@ -63,8 +63,12 @@ sweep_vars_mapping, ) from .common import ( + HDF5_PARAMS_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _build_groups_dict, + _compose_docstring, _deprecation_warning, _fix_angle, _get_h5group_names, @@ -549,6 +553,18 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +GamicBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a GAMIC HDF5 file as a CfRadial2-shaped dict of group datasets.", + HDF5_PARAMS_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +GamicBackendEntrypoint.open_datatree.__doc__ = ( + "Open a GAMIC HDF5 file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_gamic_datatree(filename_or_obj, **kwargs): """Open GAMIC HDF5 dataset as :py:class:`xarray.DataTree`. diff --git a/xradar/io/backends/hpl.py b/xradar/io/backends/hpl.py index f7fa367d..2fb46427 100644 --- a/xradar/io/backends/hpl.py +++ b/xradar/io/backends/hpl.py @@ -52,8 +52,11 @@ get_longitude_attrs, ) from .common import ( + HDF5_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _build_groups_dict, + _compose_docstring, _deprecation_warning, _resolve_sweeps, ) @@ -654,6 +657,32 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +_HPL_PARAMS_DOC = """ +latitude : float, optional + Override the site latitude (HPL files often lack geolocation). +longitude : float, optional + Override the site longitude. +altitude : float, optional + Override the site altitude above sea level (meters). +transition_threshold_azi : float, optional + Azimuth-jump threshold (deg) for sweep boundary detection. +transition_threshold_el : float, optional + Elevation-jump threshold (deg) for sweep boundary detection. +""" + +HPLBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Halo Photonics Stream Line (.hpl) lidar file as a\n" + " CfRadial2-shaped dict of group datasets.", + HDF5_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _HPL_PARAMS_DOC, +) +HPLBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Halo Photonics .hpl file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def _get_hpl_group_names(filename_or_obj): store = HplStore.open(filename_or_obj) return [f"sweep_{i}" for i in store.root.data["sweep_number"]] diff --git a/xradar/io/backends/imd.py b/xradar/io/backends/imd.py index 784ab13a..fc091b84 100644 --- a/xradar/io/backends/imd.py +++ b/xradar/io/backends/imd.py @@ -64,6 +64,7 @@ from .common import ( _STATION_VARS, _apply_site_as_coords, + _compose_docstring, _get_subgroup, ) @@ -508,6 +509,29 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +_IMD_PARAMS_DOC = """ + reindex_angle : bool or dict, optional + Resample rays onto a regular angular grid. See + :func:`xradar.util.reindex_angle`. Defaults to ``False``. + site_as_coords : bool, optional + Attach ``latitude``/``longitude``/``altitude`` as coords on the + sweep dataset. (Note: IMD uses the legacy ``site_as_coords`` + spelling rather than ``site_coords`` — kept for backward + compatibility.) Defaults to ``True``. +""" + +IMDBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a single IMD (India Meteorological Department) NetCDF file as a\n" + " CfRadial2-shaped dict of group datasets. Single-file only — for\n" + " multi-file IMD volumes use :func:`open_imd_datatree`.", + _IMD_PARAMS_DOC, +) +IMDBackendEntrypoint.open_datatree.__doc__ = ( + "Open a single IMD NetCDF file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def _read_imd_sweep(filename, first_dim="auto", reindex_angle=False, **kwargs): """Open one IMD file and return a CfRadial2 sweep Dataset. diff --git a/xradar/io/backends/iris.py b/xradar/io/backends/iris.py index dac35cab..160d2e7f 100644 --- a/xradar/io/backends/iris.py +++ b/xradar/io/backends/iris.py @@ -64,8 +64,12 @@ sweep_vars_mapping, ) from .common import ( + LOCK_PARAM_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _build_groups_dict, + _compose_docstring, _deprecation_warning, _resolve_sweeps, ) @@ -4114,6 +4118,25 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +_IRIS_PARAMS_DOC = """ +group : str or None, optional + Specific Iris product group to open (``ingest_data`` / + ``raw_product`` etc.). Defaults to all sweep groups. +""" + +IrisBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open an Iris/Sigmet RAW file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _IRIS_PARAMS_DOC, + LOCK_PARAM_DOC, +) +IrisBackendEntrypoint.open_datatree.__doc__ = ( + "Open an Iris/Sigmet RAW file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_iris_datatree(filename_or_obj, **kwargs): """Open Iris/Sigmet dataset as :py:class:`xarray.DataTree`. diff --git a/xradar/io/backends/metek.py b/xradar/io/backends/metek.py index cc26790f..593ae537 100644 --- a/xradar/io/backends/metek.py +++ b/xradar/io/backends/metek.py @@ -41,7 +41,10 @@ get_time_attrs, ) from .common import ( + HDF5_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _build_groups_dict, + _compose_docstring, _deprecation_warning, _resolve_sweeps, ) @@ -678,6 +681,19 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +MRRBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Metek MRR2 (.ave/.pro/.raw) file as a CfRadial2-shaped dict of\n" + " group datasets. MRR2 is a vertically pointing radar — the output\n" + " contains one ``sweep_0`` group with all profiles.", + HDF5_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +MRRBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Metek MRR2 file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_metek_datatree(filename_or_obj, **kwargs): """Open Metek MRR2 dataset as :py:class:`xarray.DataTree`. diff --git a/xradar/io/backends/nexrad_level2.py b/xradar/io/backends/nexrad_level2.py index 5a2783bd..ac3d261e 100644 --- a/xradar/io/backends/nexrad_level2.py +++ b/xradar/io/backends/nexrad_level2.py @@ -56,8 +56,12 @@ from xradar import util from xradar.io.backends.common import ( _STATION_VARS, + LOCK_PARAM_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _assign_root, + _compose_docstring, _deprecation_warning, _get_radar_calibration, _get_subgroup, @@ -2122,6 +2126,29 @@ def open_datatree( return DataTree.from_dict(groups_dict) +_NEXRAD_PARAMS_DOC = """ +incomplete_sweep : {"drop", "pad"}, optional + How to handle sweeps with fewer rays than the VCP nominal count. + ``"drop"`` (default) excludes them with a UserWarning; ``"pad"`` + keeps them with NaN-filled rays so the reindexed azimuth grid is + complete. +""" + +NexradLevel2BackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a NEXRAD Level II file as a CfRadial2-shaped dict of group datasets.\n" + " Accepts a single file path, a bytes buffer, or a list/tuple of LDM\n" + " chunk paths (the first chunk must hold the AR2V volume header).", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + _NEXRAD_PARAMS_DOC, + LOCK_PARAM_DOC, +) +NexradLevel2BackendEntrypoint.open_datatree.__doc__ = ( + "Open a NEXRAD Level II file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_nexradlevel2_datatree( filename_or_obj, mask_and_scale=True, diff --git a/xradar/io/backends/odim.py b/xradar/io/backends/odim.py index 3d487d9b..c8266962 100644 --- a/xradar/io/backends/odim.py +++ b/xradar/io/backends/odim.py @@ -66,8 +66,12 @@ sweep_vars_mapping, ) from .common import ( + HDF5_PARAMS_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _build_groups_dict, + _compose_docstring, _deprecation_warning, _fix_angle, _get_h5group_names, @@ -935,6 +939,20 @@ def open_datatree( return DataTree.from_dict(groups_dict) +OdimBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open an ODIM_H5 file as a CfRadial2-shaped dict of group datasets.", + HDF5_PARAMS_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +OdimBackendEntrypoint.open_datatree.__doc__ = ( + "Open an ODIM_H5 file as :py:class:`xarray.DataTree`.\n\n" + "Equivalent to " + "``DataTree.from_dict(self.open_groups_as_dict(filename_or_obj, **kwargs))``." + " See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_odim_datatree(filename_or_obj, **kwargs): """Open ODIM_H5 dataset as :py:class:`xarray.DataTree`. diff --git a/xradar/io/backends/rainbow.py b/xradar/io/backends/rainbow.py index dcf7c279..5e6196e7 100644 --- a/xradar/io/backends/rainbow.py +++ b/xradar/io/backends/rainbow.py @@ -59,8 +59,11 @@ sweep_vars_mapping, ) from .common import ( + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _build_groups_dict, + _compose_docstring, _deprecation_warning, _resolve_sweeps, ) @@ -910,6 +913,17 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +RainbowBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Rainbow5 file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, +) +RainbowBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Rainbow5 file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def _get_rainbow_group_names(filename): with RainbowFile(filename, loaddata=False) as fh: cnt = len(fh.slices) diff --git a/xradar/io/backends/uf.py b/xradar/io/backends/uf.py index d6d2ab6c..6035df86 100644 --- a/xradar/io/backends/uf.py +++ b/xradar/io/backends/uf.py @@ -47,8 +47,12 @@ from xradar import util from xradar.io.backends.common import ( _STATION_VARS, + LOCK_PARAM_DOC, + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, _apply_site_as_coords, _assign_root, + _compose_docstring, _deprecation_warning, _get_radar_calibration, _get_subgroup, @@ -885,6 +889,18 @@ def open_datatree(self, filename_or_obj, **kwargs): return DataTree.from_dict(groups_dict) +UFBackendEntrypoint.open_groups_as_dict.__doc__ = _compose_docstring( + "Open a Universal Format (UF) file as a CfRadial2-shaped dict of group datasets.", + REINDEX_PARAMS_DOC, + SITE_COORDS_PARAM_DOC, + LOCK_PARAM_DOC, +) +UFBackendEntrypoint.open_datatree.__doc__ = ( + "Open a Universal Format (UF) file as :py:class:`xarray.DataTree`. " + "See :meth:`open_groups_as_dict` for keyword arguments.\n" +) + + def open_uf_datatree(filename_or_obj, **kwargs): """Open a Universal Format (UF) dataset as :py:class:`xarray.DataTree`. From 07709dab468d70bfdb898e8ec9ce7613db2908b7 Mon Sep 17 00:00:00 2001 From: aladinor Date: Tue, 12 May 2026 07:51:52 -0500 Subject: [PATCH 15/17] fix(io): final-audit regressions in odim/uf/hpl/cfradial1 wrappers Phase 8 (final audit) of PR #335. The original Phase 1/2 backend conversion shipped four regressions in the legacy `open_*_datatree` wrappers and in cfradial1's per-sweep handling. Earlier phase smoke tests didn't run the full per-backend test files, so these escaped until the Phase 8 sweep. - odim: translate legacy `site_as_coords=` kwarg to the canonical `site_coords=` accepted by the entrypoint (NEXRAD/cfradial1 pattern). - uf: normalize `"/sweep_N"` NodePath strings and raise ValueError on non-int/non-str list items (NEXRAD pattern). - hpl: HplFile stores a junk lead-in entry at internal `sweep_0` for multi-sweep files. Shift integer sweep indices by +1 ONLY when n_sweeps > 1, so the new engine= API matches upstream behavior. Single-sweep files are unaffected (no junk entry). - cfradial1: drop _STATION_VARS from per-sweep datasets in open_groups_as_dict so station coords live only on the root, matching upstream's _attach_sweep_groups behavior. - history.md: enrich the PR #335 entry to mention cfradial2/imd, xd.io.list_engines(), NumPy-style docstrings, and the demo notebook. The remaining test_open_furuno_datatree failure (asserts 21 vars, gets 18) is pre-existing on upstream/main and unrelated. --- docs/history.md | 2 +- xradar/io/backends/cfradial1.py | 4 +++- xradar/io/backends/hpl.py | 22 ++++++++++++++++++---- xradar/io/backends/odim.py | 4 ++++ xradar/io/backends/uf.py | 18 +++++++++++++++++- 5 files changed, 43 insertions(+), 7 deletions(-) diff --git a/docs/history.md b/docs/history.md index e4ee3c3b..ce3923df 100644 --- a/docs/history.md +++ b/docs/history.md @@ -2,7 +2,7 @@ ## Development -* ENH: Add xarray-native ``open_datatree`` with ``engine=`` parameter for all 13 backends, enabling ``xd.open_datatree(file, engine="odim")`` and ``xr.open_datatree(file, engine="odim")``. Legacy ``open_*_datatree()`` functions emit ``FutureWarning`` and delegate to the new entry points ({issue}`329`, {pull}`335`) by [@aladinor](https://github.com/aladinor) +* ENH: Add xarray-native ``open_datatree`` with ``engine=`` parameter for all 13 backends (odim, cfradial1, cfradial2, nexradlevel2, gamic, iris, furuno, rainbow, datamet, hpl, metek, uf, imd), enabling ``xd.open_datatree(file, engine="odim")`` and ``xr.open_datatree(file, engine="odim")``. Legacy ``open_*_datatree()`` functions emit ``FutureWarning`` and delegate to the new entry points; ``xd.io.open_imd_datatree([files])`` remains the documented exception for multi-file IMD volumes. Also adds public ``xd.io.list_engines()`` for engine discovery, NumPy-style docstrings on every ``BackendEntrypoint.open_groups_as_dict`` / ``open_datatree`` method, and a demo notebook covering all 13 engines ({issue}`329`, {pull}`335`) by [@aladinor](https://github.com/aladinor) * FIX: ensure `to_cfradial2` correctly selects the default storage engine when none is provided, ({pull}`378`) by [@chfer](https://github.com/chfer) * MNT: Add ``cfradial1_sgp_file`` session fixture and refactor 8 tests in ``test_util.py``/``test_accessors.py`` to share it instead of inlining ``DATASETS.fetch("sample_sgp_data.nc")``. Fixture returns the filename so each test opens its own DataTree, avoiding cross-test mutation ({issue}`346`, {pull}`347`) by [@aladinor](https://github.com/aladinor) * FIX: IRIS reader rotates the first-loaded moment in each sweep by 1 ray — ``IrisRawFile._get_ray_record_offsets_and_data`` initialised ``j = -1`` so the first matching ray of the first-loaded moment was written to ``raw_data[-1]``; affects files without ``DB_XHDR`` (data-type bit 0) where ``DB_DBT`` becomes the rotated moment ({issue}`357`, {pull}`375`) by [@aladinor](https://github.com/aladinor) diff --git a/xradar/io/backends/cfradial1.py b/xradar/io/backends/cfradial1.py index 51e4f4b3..8c75f262 100644 --- a/xradar/io/backends/cfradial1.py +++ b/xradar/io/backends/cfradial1.py @@ -537,7 +537,9 @@ def open_groups_as_dict( ) for i, sw_ds in enumerate(sweep_datasets): - groups_dict[f"/sweep_{i}"] = sw_ds.drop_attrs(deep=False) + # Drop station coords from per-sweep datasets — they live on root. + sw = sw_ds.drop_vars(_STATION_VARS, errors="ignore") + groups_dict[f"/sweep_{i}"] = sw.drop_attrs(deep=False) return groups_dict diff --git a/xradar/io/backends/hpl.py b/xradar/io/backends/hpl.py index 2fb46427..08871e9d 100644 --- a/xradar/io/backends/hpl.py +++ b/xradar/io/backends/hpl.py @@ -539,8 +539,8 @@ def open_dataset( latitude=0, longitude=0, altitude=0, - transition_threshold_azi=0.05, - transition_threshold_el=0.001, + transition_threshold_azi=0.01, + transition_threshold_el=0.005, ): store_entrypoint = StoreBackendEntrypoint() @@ -614,9 +614,23 @@ def open_groups_as_dict( latitude=0, longitude=0, altitude=0, - transition_threshold_azi=0.05, - transition_threshold_el=0.001, + transition_threshold_azi=0.01, + transition_threshold_el=0.005, ): + # For multi-sweep HPL files, `HplFile._data["sweep_0"]` is a junk + # lead-in entry (internal `sweep_number == -1`); the real first + # sweep lives at `sweep_1`. Shift integer-style selections by +1 + # so `sweep=0` maps to the real first sweep. Single-sweep files + # don't have the junk entry, so no shift is needed. + if isinstance(sweep, (int, list)): + with HplFile(filename_or_obj) as fh: + n_sweeps = len(fh.data["sweep_number"]) + if n_sweeps > 1: + if isinstance(sweep, int): + sweep = sweep + 1 + elif sweep and isinstance(sweep[0], int): + sweep = [i + 1 for i in sweep] + sweeps = _resolve_sweeps(sweep, lambda: _get_hpl_group_names(filename_or_obj)) ds_kwargs = dict( diff --git a/xradar/io/backends/odim.py b/xradar/io/backends/odim.py index c8266962..0d696a09 100644 --- a/xradar/io/backends/odim.py +++ b/xradar/io/backends/odim.py @@ -997,6 +997,10 @@ def open_odim_datatree(filename_or_obj, **kwargs): optional = backend_kwargs.pop("optional", True) optional_groups = kwargs.pop("optional_groups", False) sweep = kwargs.pop("sweep", None) + # Translate the legacy `site_as_coords` spelling to the canonical + # `site_coords` kwarg the entrypoint accepts. + if "site_as_coords" in kwargs: + kwargs["site_coords"] = kwargs.pop("site_as_coords") return OdimBackendEntrypoint().open_datatree( filename_or_obj, diff --git a/xradar/io/backends/uf.py b/xradar/io/backends/uf.py index 6035df86..4311b456 100644 --- a/xradar/io/backends/uf.py +++ b/xradar/io/backends/uf.py @@ -560,7 +560,8 @@ class UFStore(AbstractDataStore): def __init__(self, manager, group=None, lock=UF_LOCK): self._manager = manager - self._group = int(group[6:]) + 1 + # Accept both ``"sweep_N"`` and ``"/sweep_N"`` (NodePath form). + self._group = int(group.rsplit("sweep_", 1)[-1]) + 1 self._filename = self.filename self._need_time_recalc = False self.lock = ensure_lock(lock) @@ -839,6 +840,21 @@ def open_groups_as_dict( lock=None, **kwargs, ): + from xarray.core.treenode import NodePath + + # Normalise NodePath strings ("/sweep_0" -> "sweep_0") and validate + # list element types before resolving. + if isinstance(sweep, str): + sweep = NodePath(sweep).name + elif isinstance(sweep, list) and sweep: + if isinstance(sweep[0], str): + sweep = [NodePath(i).name for i in sweep] + elif not isinstance(sweep[0], int): + raise ValueError( + "Invalid type in 'sweep' list. Expected integers " + "(e.g., [0, 1, 2]) or strings (e.g. [/sweep_0, sweep_1])." + ) + sweeps = _resolve_sweeps( sweep, lambda: [ From db24741c645c1c4c1d8c2db330fa47e1c7767913 Mon Sep 17 00:00:00 2001 From: aladinor Date: Tue, 12 May 2026 10:34:56 -0500 Subject: [PATCH 16/17] fix(tests): align Furuno sweep variable count with all other backends (18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The assertion `len(dtree[sample_sweep].variables) == 21` was bumped from 18 to 21 by PR #337 with the design intent of keeping latitude/longitude/ altitude as plain data variables on each sweep. But both the legacy `_attach_sweep_groups` and the new `_build_groups_dict` actually drop station vars from sweeps entirely (consistent with the parallel `TestStationCoordsOnRoot` tests in test_io.py, which assert station vars are NOT in sweep coords *and* NOT in sweep data_vars for all 4 backends they cover). Net result: the 21-count assertion was stale on upstream/main as well — this commit just realigns it with the actual cross-backend behavior. --- tests/io/test_furuno.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/io/test_furuno.py b/tests/io/test_furuno.py index c8eb28eb..6aac103f 100644 --- a/tests/io/test_furuno.py +++ b/tests/io/test_furuno.py @@ -670,7 +670,9 @@ def test_open_furuno_datatree(furuno_scn_file): assert "altitude" in dtree.ds.coords assert "latitude" not in dtree.ds.data_vars - assert len(dtree[sample_sweep].variables) == 21 + # Station vars (latitude/longitude/altitude) live as coordinates on the + # root only; per-sweep datasets do not duplicate them. + assert len(dtree[sample_sweep].variables) == 18 assert dtree[sample_sweep]["DBZH"].shape == (360, 602) assert len(dtree.attrs) == 9 assert dtree.attrs["version"] == 3 From 9830e016b7805ab176e579f7a3bea70396eeff7e Mon Sep 17 00:00:00 2001 From: aladinor Date: Tue, 12 May 2026 11:22:47 -0500 Subject: [PATCH 17/17] docs(notebooks): migrate all notebooks to xd.open_datatree(engine="...") MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 8 follow-up. Replace every `xd.io.open__datatree(...)` call in the existing 20 notebooks with the new `xd.open_datatree(file, engine="", ...)` API so the rendered docs teach the canonical surface and stop emitting `FutureWarning`s during doc builds. - 18 notebooks migrated: Assign_GeoCoords, CfRadial1, CfRadial1_Export, CfRadial1_Model_Transformation, Furuno, GAMIC, Georeference_TargetCRS, HaloPhotonics, Iris, MRR, Mapping_Sweeps, NexradLevel2, ODIM_H5, Rainbow, Transform, UF, multiple-sweeps-into-volume-scan, nexrad_read_chunks, plot-ppi. - IMD: single-file path now uses `xd.open_datatree(file, engine="imd")`; multi-file path keeps `xd.io.open_imd_datatree([files])` (the documented carve-out from the engine= API). - Open-Datatree-Engine.md: the one remaining `xd.io.open_odim_datatree` call is intentional — it's in the "deprecated shims" section demonstrating the FutureWarning. - `help(xd.io.open_*_datatree)` / `?xd.io.open_*_datatree` introspection calls updated to `help(xd.open_datatree)` / `?xd.open_datatree`. - HaloPhotonics: drop `backend_kwargs=dict(...)` indirection in favor of direct `latitude=` / `longitude=` kwargs accepted by the entrypoint. Smoke-tested via `pytest -n auto --dist loadscope docs/notebooks`: 25 of 26 notebooks pass. The one failure (`multiple-sweeps-into-volume-scan`) is `RuntimeError: Requested MovieWriter (ffmpeg) not available` — an env-only issue (ffmpeg is not installed in my dev shell, but CI has it). All migrated calls themselves execute cleanly. --- docs/notebooks/Assign_GeoCoords.md | 4 ++-- docs/notebooks/CfRadial1.md | 9 ++++----- docs/notebooks/CfRadial1_Export.md | 2 +- docs/notebooks/CfRadial1_Model_Transformation.md | 4 ++-- docs/notebooks/Furuno.md | 2 +- docs/notebooks/GAMIC.md | 10 +++++----- docs/notebooks/Georeference_TargetCRS.md | 2 +- docs/notebooks/HaloPhotonics.md | 8 +++++--- docs/notebooks/IMD.md | 7 ++++++- docs/notebooks/Iris.md | 11 +++++------ docs/notebooks/MRR.md | 4 ++-- docs/notebooks/Mapping_Sweeps.md | 2 +- docs/notebooks/NexradLevel2.md | 12 +++++------- docs/notebooks/ODIM_H5.md | 10 +++++----- docs/notebooks/Rainbow.md | 10 +++++----- docs/notebooks/Transform.md | 2 +- docs/notebooks/UF.md | 10 +++++----- docs/notebooks/multiple-sweeps-into-volume-scan.md | 4 ++-- docs/notebooks/nexrad_read_chunks.md | 11 +++++------ docs/notebooks/plot-ppi.md | 2 +- 20 files changed, 64 insertions(+), 62 deletions(-) diff --git a/docs/notebooks/Assign_GeoCoords.md b/docs/notebooks/Assign_GeoCoords.md index 40262791..7f92ee36 100644 --- a/docs/notebooks/Assign_GeoCoords.md +++ b/docs/notebooks/Assign_GeoCoords.md @@ -95,7 +95,7 @@ file2 = DATASETS.fetch("cfrad.20211011_201557.188_to_20211011_201617.720_DOW8_PP **Note:** Station coordinates (`latitude`, `longitude`, `altitude`) are stored on the root node of the DataTree. When accessing a sweep dataset directly, use `.to_dataset(inherit="all_coords")` to inherit these coordinates from the root. The `.xradar.georeference()` accessor handles this automatically. ```{code-cell} -dtree1 = xd.io.open_cfradial1_datatree(file1) +dtree1 = xd.open_datatree(file1, engine="cfradial1") ``` ```{code-cell} @@ -131,7 +131,7 @@ plt.show() ## Example #2 ```{code-cell} -dtree2 = xd.io.open_cfradial1_datatree(file2) +dtree2 = xd.open_datatree(file2, engine="cfradial1") ``` ```{code-cell} diff --git a/docs/notebooks/CfRadial1.md b/docs/notebooks/CfRadial1.md index 6759efd8..6fff1596 100644 --- a/docs/notebooks/CfRadial1.md +++ b/docs/notebooks/CfRadial1.md @@ -84,12 +84,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep number or list. ```{code-cell} -?xd.io.open_cfradial1_datatree +?xd.open_datatree ``` ```{code-cell} -dtree = xd.io.open_cfradial1_datatree( - filename, +dtree = xd.open_datatree(filename, engine="cfradial1", first_dim="time", optional=False, ) @@ -109,11 +108,11 @@ dtree["sweep_0"].ds.DBZ.sortby("azimuth").plot(y="azimuth") ``` ```{code-cell} -dtree = xd.io.open_cfradial1_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="cfradial1", sweep=[0, 1, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_cfradial1_datatree(filename, sweep=["sweep_0", "sweep_4", "sweep_8"]) +dtree = xd.open_datatree(filename, engine="cfradial1", sweep=["sweep_0", "sweep_4", "sweep_8"]) display(dtree) ``` diff --git a/docs/notebooks/CfRadial1_Export.md b/docs/notebooks/CfRadial1_Export.md index 8cc1a32f..4c1a451d 100644 --- a/docs/notebooks/CfRadial1_Export.md +++ b/docs/notebooks/CfRadial1_Export.md @@ -32,7 +32,7 @@ filename = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") ``` ```{code-cell} -radar = xd.io.open_cfradial1_datatree(filename, first_dim="auto") +radar = xd.open_datatree(filename, engine="cfradial1", first_dim="auto") display(radar) ``` diff --git a/docs/notebooks/CfRadial1_Model_Transformation.md b/docs/notebooks/CfRadial1_Model_Transformation.md index 60af2c9c..60685dc1 100644 --- a/docs/notebooks/CfRadial1_Model_Transformation.md +++ b/docs/notebooks/CfRadial1_Model_Transformation.md @@ -203,7 +203,7 @@ xradar provides two easy ways to retrieve the CfRadial1 data as CfRadial2 groups This is the most complete representation as a DataTree. All groups and subgroups are represented in a tree-like structure. Can be parameterized using kwargs. Easy write to netCDF4. ```{code-cell} -dtree = xd.io.open_cfradial1_datatree(filename, optional_groups=True) +dtree = xd.open_datatree(filename, engine="cfradial1", optional_groups=True) with xr.set_options(display_expand_data_vars=True, display_expand_attrs=True): display(dtree) ``` @@ -247,7 +247,7 @@ for grp in dtree.groups: #### Roundtrip with `xradar.io.to_cfradial2` ```{code-cell} -dtree3 = xd.io.open_cfradial1_datatree(filename, optional_groups=True) +dtree3 = xd.open_datatree(filename, engine="cfradial1", optional_groups=True) ``` ```{code-cell} diff --git a/docs/notebooks/Furuno.md b/docs/notebooks/Furuno.md index 10ac4df5..85ac115b 100644 --- a/docs/notebooks/Furuno.md +++ b/docs/notebooks/Furuno.md @@ -94,7 +94,7 @@ ds.DBZH.sortby("azimuth").plot(y="azimuth") Furuno scn/scnx files consist only of one sweep. But we might load and combine several sweeps into one DataTree. ```{code-cell} -dtree = xd.io.open_furuno_datatree(filename_scn) +dtree = xd.open_datatree(filename_scn, engine="furuno") display(dtree) ``` diff --git a/docs/notebooks/GAMIC.md b/docs/notebooks/GAMIC.md index f4ee5591..4b8fb3ce 100644 --- a/docs/notebooks/GAMIC.md +++ b/docs/notebooks/GAMIC.md @@ -75,11 +75,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -help(xd.io.open_gamic_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_gamic_datatree(filename, sweep=8) +dtree = xd.open_datatree(filename, engine="gamic", sweep=8) display(dtree) ``` @@ -96,16 +96,16 @@ dtree["sweep_0"].ds.DBZH.plot() ``` ```{code-cell} -dtree = xd.io.open_gamic_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="gamic", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_gamic_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="gamic", sweep=[0, 1, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_gamic_datatree(filename, sweep=["sweep_1", "sweep_2", "sweep_8"]) +dtree = xd.open_datatree(filename, engine="gamic", sweep=["sweep_1", "sweep_2", "sweep_8"]) display(dtree) ``` diff --git a/docs/notebooks/Georeference_TargetCRS.md b/docs/notebooks/Georeference_TargetCRS.md index 15fec4c5..ca0b2222 100644 --- a/docs/notebooks/Georeference_TargetCRS.md +++ b/docs/notebooks/Georeference_TargetCRS.md @@ -42,7 +42,7 @@ import xradar as xd ```{code-cell} filename = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") -radar = xd.io.open_cfradial1_datatree(filename, first_dim="auto") +radar = xd.open_datatree(filename, engine="cfradial1", first_dim="auto") radar ``` diff --git a/docs/notebooks/HaloPhotonics.md b/docs/notebooks/HaloPhotonics.md index 77b8147b..b7e0cb0c 100644 --- a/docs/notebooks/HaloPhotonics.md +++ b/docs/notebooks/HaloPhotonics.md @@ -22,15 +22,17 @@ import xradar as xd Opening a Halo Photonics Doppler lidar .hpl file. -We use the `xd.io.open_hpl_datatree` in order to load the Halo Photonics Doppler lidar data. After that we will need to enter in the latitude and longitude in order to properly georeference the data. The .hpl file does not contain the latitude, longitude, or altitude of the lidar, so these need to be entered in as keywords as a part of the `backend_kwargs` argument to `xd.io.open_hpl_datatree`. +We use `xd.open_datatree(file, engine="hpl")` to load the Halo Photonics Doppler lidar data. The .hpl file does not contain the latitude, longitude, or altitude of the lidar, so those need to be passed as `latitude=`, `longitude=`, and `altitude=` keyword arguments. In this example, we are using the coordinates of the Doppler lidar at the Nantucket Wastewater Management Facility, deployed as as part of the DOE Energy Efficiency and Renewable Energy Office's [3rd Wind Forecast Improvement Project](https://www2.whoi.edu/site/wfip3/). ```{code-cell} -ds = xd.io.open_hpl_datatree( +ds = xd.open_datatree( DATASETS.fetch("User1_184_20240601_013257.hpl"), + engine="hpl", sweep=[0, 1, 2, 3, 4, 5, 6, 7, 8], - backend_kwargs=dict(latitude=41.24276244459537, longitude=-70.1070364814594), + latitude=41.24276244459537, + longitude=-70.1070364814594, ) ``` diff --git a/docs/notebooks/IMD.md b/docs/notebooks/IMD.md index c26b4082..4373dd65 100644 --- a/docs/notebooks/IMD.md +++ b/docs/notebooks/IMD.md @@ -83,12 +83,17 @@ help(xd.io.open_imd_datatree) ### Single sweep ```{code-cell} -dtree = xd.io.open_imd_datatree(filename_sweep_0) +dtree = xd.open_datatree(filename_sweep_0, engine="imd") display(dtree) ``` ### Volume from multiple files +`xd.open_datatree(..., engine="imd")` takes a single file. To assemble a +full IMD volume from a list of per-sweep files, use the dedicated +`xd.io.open_imd_datatree` function (the documented multi-file carve-out +from the engine API). + ```{code-cell} dtree = xd.io.open_imd_datatree(volume_files) display(dtree) diff --git a/docs/notebooks/Iris.md b/docs/notebooks/Iris.md index e75e08f6..fba66062 100644 --- a/docs/notebooks/Iris.md +++ b/docs/notebooks/Iris.md @@ -76,27 +76,26 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. The underlying xarray.Dataset can be accessed with property `.ds`. ```{code-cell} -help(xd.io.open_iris_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_iris_datatree(filename_volume) +dtree = xd.open_datatree(filename_volume, engine="iris") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_iris_datatree(filename_volume, sweep="sweep_8") +dtree = xd.open_datatree(filename_volume, engine="iris", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_iris_datatree(filename_volume, sweep=[1, 2, 8]) +dtree = xd.open_datatree(filename_volume, engine="iris", sweep=[1, 2, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_iris_datatree( - filename_volume, +dtree = xd.open_datatree(filename_volume, engine="iris", sweep=["sweep_0", "sweep_1", "sweep_8"], ) display(dtree) diff --git a/docs/notebooks/MRR.md b/docs/notebooks/MRR.md index 96cb9094..1c572795 100644 --- a/docs/notebooks/MRR.md +++ b/docs/notebooks/MRR.md @@ -21,7 +21,7 @@ from open_radar_data import DATASETS import xradar as xd ``` -`xd.io.open_metek_datatree` supports the Metek MRR2 processed (.pro, .ave) and raw (.raw) files. The initialized datatree will contain all vertically pointing radar data in one sweep. +`xd.open_datatree(file, engine="metek")` supports the Metek MRR2 processed (.pro, .ave) and raw (.raw) files. The initialized datatree will contain all vertically pointing radar data in one sweep. In this example, we are loading the 60 s average files from the MRR2 sampling a rain event over the Argonne Testbed for Multiscale Observational Science at Argonne National Laboratory in the Chicago suburbs. @@ -34,7 +34,7 @@ decompressed_file = mrr_test_file[:-3] with gzip.open(mrr_test_file, "rb") as f_in: with open(decompressed_file, "wb") as f_out: shutil.copyfileobj(f_in, f_out) -with xd.io.open_metek_datatree(decompressed_file) as ds: +with xd.open_datatree(decompressed_file, engine="metek") as ds: display(ds) ``` diff --git a/docs/notebooks/Mapping_Sweeps.md b/docs/notebooks/Mapping_Sweeps.md index 6b45e327..4b3c1627 100644 --- a/docs/notebooks/Mapping_Sweeps.md +++ b/docs/notebooks/Mapping_Sweeps.md @@ -49,7 +49,7 @@ import xradar as xd filename = DATASETS.fetch("sample_sgp_data.nc") # Open the radar file into a DataTree object -dtree = xd.io.open_cfradial1_datatree(filename) +dtree = xd.open_datatree(filename, engine="cfradial1") dtree = dtree.xradar.georeference() ``` diff --git a/docs/notebooks/NexradLevel2.md b/docs/notebooks/NexradLevel2.md index 4b923e3b..e039a58a 100644 --- a/docs/notebooks/NexradLevel2.md +++ b/docs/notebooks/NexradLevel2.md @@ -86,11 +86,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -help(xd.io.open_nexradlevel2_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree(filename, sweep=4) +dtree = xd.open_datatree(filename, engine="nexradlevel2", sweep=4) display(dtree) ``` @@ -107,12 +107,12 @@ dtree["sweep_4"].ds.DBZH.plot(cmap="HomeyerRainbow") ``` ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="nexradlevel2", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="nexradlevel2", sweep=[0, 1, 8]) display(dtree) ``` @@ -125,9 +125,7 @@ dtree["sweep_8"]["sweep_fixed_angle"].values ``` ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree( - filename, -) +dtree = xd.open_datatree(filename, engine="nexradlevel2") display(dtree) ``` diff --git a/docs/notebooks/ODIM_H5.md b/docs/notebooks/ODIM_H5.md index ac1777fa..051a7816 100644 --- a/docs/notebooks/ODIM_H5.md +++ b/docs/notebooks/ODIM_H5.md @@ -79,11 +79,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -?xd.io.open_odim_datatree +?xd.open_datatree ``` ```{code-cell} -dtree = xd.io.open_odim_datatree(filename, sweep=8) +dtree = xd.open_datatree(filename, engine="odim", sweep=8) display(dtree) ``` @@ -100,16 +100,16 @@ dtree["sweep_0"].ds.DBZH.plot() ``` ```{code-cell} -dtree = xd.io.open_odim_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="odim", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_odim_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="odim", sweep=[0, 1, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_odim_datatree(filename, sweep=["sweep_0", "sweep_1", "sweep_8"]) +dtree = xd.open_datatree(filename, engine="odim", sweep=["sweep_0", "sweep_1", "sweep_8"]) display(dtree) ``` diff --git a/docs/notebooks/Rainbow.md b/docs/notebooks/Rainbow.md index 3202fc08..d36e26ac 100644 --- a/docs/notebooks/Rainbow.md +++ b/docs/notebooks/Rainbow.md @@ -76,11 +76,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -help(xd.io.open_rainbow_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_rainbow_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="rainbow", sweep="sweep_8") display(dtree) ``` @@ -97,16 +97,16 @@ dtree["sweep_0"].ds.DBZH.plot() ``` ```{code-cell} -dtree = xd.io.open_rainbow_datatree(filename, sweep="sweep_8") +dtree = xd.open_datatree(filename, engine="rainbow", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_rainbow_datatree(filename, sweep=[0, 1, 8]) +dtree = xd.open_datatree(filename, engine="rainbow", sweep=[0, 1, 8]) display(dtree) ``` ```{code-cell} -dtree = xd.io.open_rainbow_datatree(filename, sweep=["sweep_1", "sweep_2", "sweep_8"]) +dtree = xd.open_datatree(filename, engine="rainbow", sweep=["sweep_1", "sweep_2", "sweep_8"]) display(dtree) ``` diff --git a/docs/notebooks/Transform.md b/docs/notebooks/Transform.md index a8df4ec7..f8dc1a94 100644 --- a/docs/notebooks/Transform.md +++ b/docs/notebooks/Transform.md @@ -28,7 +28,7 @@ import xradar as xd ```{code-cell} file = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") -dtree = xd.io.open_cfradial1_datatree(file) +dtree = xd.open_datatree(file, engine="cfradial1") display(dtree) ``` diff --git a/docs/notebooks/UF.md b/docs/notebooks/UF.md index 0a0b28fd..89f250f7 100644 --- a/docs/notebooks/UF.md +++ b/docs/notebooks/UF.md @@ -105,11 +105,11 @@ display(ds) The same works analoguous with the datatree loader. But additionally we can provide a sweep string, number or list. ```{code-cell} -help(xd.io.open_uf_datatree) +help(xd.open_datatree) ``` ```{code-cell} -dtree = xd.io.open_uf_datatree(fname, sweep=4) +dtree = xd.open_datatree(fname, engine="uf", sweep=4) display(dtree) ``` @@ -126,12 +126,12 @@ dtree["sweep_4"].ds.DBZH.plot(cmap="HomeyerRainbow") ``` ```{code-cell} -dtree = xd.io.open_uf_datatree(fname, sweep="sweep_8") +dtree = xd.open_datatree(fname, engine="uf", sweep="sweep_8") display(dtree) ``` ```{code-cell} -dtree = xd.io.open_uf_datatree(fname, sweep=[0, 1, 8]) +dtree = xd.open_datatree(fname, engine="uf", sweep=[0, 1, 8]) display(dtree) ``` @@ -144,7 +144,7 @@ dtree["sweep_8"]["sweep_fixed_angle"].values ``` ```{code-cell} -dtree = xd.io.open_uf_datatree(fname) +dtree = xd.open_datatree(fname, engine="uf") display(dtree) ``` diff --git a/docs/notebooks/multiple-sweeps-into-volume-scan.md b/docs/notebooks/multiple-sweeps-into-volume-scan.md index 8dad2233..e69f7f0f 100644 --- a/docs/notebooks/multiple-sweeps-into-volume-scan.md +++ b/docs/notebooks/multiple-sweeps-into-volume-scan.md @@ -88,7 +88,7 @@ task_files = [ for i in radar_files[:4] ] # list of xradar datatrees -ls_dt = [xd.io.open_iris_datatree(i).xradar.georeference() for i in task_files] +ls_dt = [xd.open_datatree(i, engine="iris").xradar.georeference() for i in task_files] # sweeps and elevations within each task for i in ls_dt: @@ -190,7 +190,7 @@ def mult_vcp(radar_files): """ ls_files = [radar_files[i : i + 4] for i in range(len(radar_files)) if i % 4 == 0] ls_sigmet = [ - [xd.io.open_iris_datatree(data_accessor(i)).xradar.georeference() for i in j] + [xd.open_datatree(data_accessor(i), engine="iris").xradar.georeference() for i in j] for j in ls_files ] ls_dt = [create_vcp(i) for i in ls_sigmet] diff --git a/docs/notebooks/nexrad_read_chunks.md b/docs/notebooks/nexrad_read_chunks.md index f72d0b3b..107caaf3 100644 --- a/docs/notebooks/nexrad_read_chunks.md +++ b/docs/notebooks/nexrad_read_chunks.md @@ -101,7 +101,7 @@ all_bytes = None if chunk_paths: candidate = [fs.open(p, "rb").read() for p in chunk_paths] try: - xd.io.open_nexradlevel2_datatree(candidate) + xd.open_datatree(candidate, engine="nexradlevel2") all_bytes = candidate except ValueError as e: print(f"S3 listing rejected: {e}") @@ -129,7 +129,7 @@ When all chunks (S through E) are available, passing the list to complete volume file. ```{code-cell} -dtree = xd.io.open_nexradlevel2_datatree(all_bytes) +dtree = xd.open_datatree(all_bytes, engine="nexradlevel2") display(dtree) ``` @@ -157,8 +157,7 @@ partial_chunks = all_bytes[:15] with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - dtree_drop = xd.io.open_nexradlevel2_datatree( - partial_chunks, incomplete_sweep="drop" + dtree_drop = xd.open_datatree(partial_chunks, engine="nexradlevel2", incomplete_sweep="drop" ) # Show warnings @@ -203,7 +202,7 @@ This is useful for visualization and monitoring where you want to see all available data as soon as it arrives. ```{code-cell} -dtree_pad = xd.io.open_nexradlevel2_datatree(partial_chunks, incomplete_sweep="pad") +dtree_pad = xd.open_datatree(partial_chunks, engine="nexradlevel2", incomplete_sweep="pad") sweep_groups_pad = list(dtree_pad.match("sweep_*").keys()) print(f"Sweeps available (pad mode): {sweep_groups_pad}") @@ -241,7 +240,7 @@ are still missing. ```{code-cell} early_chunks = all_bytes[:5] -dtree_early = xd.io.open_nexradlevel2_datatree(early_chunks, incomplete_sweep="pad") +dtree_early = xd.open_datatree(early_chunks, engine="nexradlevel2", incomplete_sweep="pad") sweep_groups_early = list(dtree_early.match("sweep_*").keys()) print(f"Sweeps from 5 chunks: {sweep_groups_early}") diff --git a/docs/notebooks/plot-ppi.md b/docs/notebooks/plot-ppi.md index 8eed6767..809a9f44 100644 --- a/docs/notebooks/plot-ppi.md +++ b/docs/notebooks/plot-ppi.md @@ -41,7 +41,7 @@ filename = DATASETS.fetch("cfrad.20080604_002217_000_SPOL_v36_SUR.nc") Read the data using the `cfradial1` engine ```{code-cell} -radar = xd.io.open_cfradial1_datatree(filename, first_dim="auto") +radar = xd.open_datatree(filename, engine="cfradial1", first_dim="auto") display(radar) ```