From 3bf227cea431a1c4bbd22fc180c3cacbbb12aff9 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Fri, 22 Nov 2024 14:02:40 -0800 Subject: [PATCH 1/3] Add temporal bounds and center times for group_average API --- xcdat/temporal.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 3367e06a6..e3b288706 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -17,7 +17,7 @@ from xcdat import bounds # noqa: F401 from xcdat._logger import _setup_custom_logger -from xcdat.axis import get_dim_coords +from xcdat.axis import center_times, get_dim_coords from xcdat.dataset import _get_data_var logger = _setup_custom_logger(__name__) @@ -885,6 +885,10 @@ def _averager( ds = ds.drop_dims(self.dim) ds[dv_avg.name] = dv_avg + if self._mode == "group_average": + ds = ds.bounds.add_missing_bounds(axes="T") + ds = center_times(ds) + if keep_weights: ds = self._keep_weights(ds) @@ -1884,6 +1888,8 @@ def _convert_df_to_dt(self, df: pd.DataFrame) -> np.ndarray: """ df_new = df.copy() + # TODO: This is where the result should be in the middle, not the + # beginning. dt_components_defaults = {"year": 1, "month": 1, "day": 1, "hour": 0} for component, default_val in dt_components_defaults.items(): if component not in df_new.columns: From 6d257fcdc4ff9d787d62ef296d6b9317ebbba1a9 Mon Sep 17 00:00:00 2001 From: tomvothecoder Date: Fri, 22 Nov 2024 14:13:21 -0800 Subject: [PATCH 2/3] Remove comment in `temporal.py` --- xcdat/temporal.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index e3b288706..0818c1d5c 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -1888,8 +1888,6 @@ def _convert_df_to_dt(self, df: pd.DataFrame) -> np.ndarray: """ df_new = df.copy() - # TODO: This is where the result should be in the middle, not the - # beginning. dt_components_defaults = {"year": 1, "month": 1, "day": 1, "hour": 0} for component, default_val in dt_components_defaults.items(): if component not in df_new.columns: From ff2030d8c24ab39821188b26b9d72a52595f37d3 Mon Sep 17 00:00:00 2001 From: Tom Vo Date: Fri, 6 Dec 2024 15:46:20 -0800 Subject: [PATCH 3/3] Add initial prototype for group average bounds code --- xcdat/temporal.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/xcdat/temporal.py b/xcdat/temporal.py index 0818c1d5c..d69d55442 100644 --- a/xcdat/temporal.py +++ b/xcdat/temporal.py @@ -876,7 +876,7 @@ def _averager( if self._mode == "average": dv_avg = self._average(ds, data_var) elif self._mode in ["group_average", "climatology", "departures"]: - dv_avg = self._group_average(ds, data_var) + dv_avg, time_bnds = self._group_average(ds, data_var) # The original time dimension is dropped from the dataset because # it becomes obsolete after the data variable is averaged. When the @@ -885,8 +885,10 @@ def _averager( ds = ds.drop_dims(self.dim) ds[dv_avg.name] = dv_avg - if self._mode == "group_average": - ds = ds.bounds.add_missing_bounds(axes="T") + if self._mode in ["group_average", "climatology", "departures"]: + ds[time_bnds.name] = time_bnds + # FIXME: This is not working when time bounds are datetime and + # time is cftime. ds = center_times(ds) if keep_weights: @@ -1479,7 +1481,9 @@ def _average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: return dv - def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: + def _group_average( + self, ds: xr.Dataset, data_var: str + ) -> Tuple[xr.DataArray, xr.DataArray]: """Averages a data variable by time group. Parameters @@ -1491,7 +1495,7 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: Returns ------- - xr.DataArray + Tuple[xr.DataArray, xr.DataArray] The data variable averaged by time group. """ dv = _get_data_var(ds, data_var) @@ -1500,9 +1504,9 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: # values. self._labeled_time = self._label_time_coords(dv[self.dim]) dv = dv.assign_coords({self.dim: self._labeled_time}) + time_bounds = ds.bounds.get_bounds("T", var_key=data_var) if self._weighted: - time_bounds = ds.bounds.get_bounds("T", var_key=data_var) self._weights = self._get_weights(time_bounds) # Weight the data variable. @@ -1526,6 +1530,25 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: else: dv = self._group_data(dv).mean() + """I think we'll need to collect the bounds for each group, (e.g., group_bounds_array = [("2000-01-01 00:00", "2000-01-02 00:00"), ("2000-01-02 00:00", "2000-01-03 00:00"), ..., ("2000-01-31 00:00", "2000-02-01 00:00")] and then take the min of the lower bound and the max of the upper bound (i.e., group_bnd = [np.min(groups_bound_array[:, 0]), np.max(group_bounds_array[:, 1])]. + """ + # Create time bounds for each group + time_bounds_grouped = self._group_data(time_bounds) + group_bounds = [] + + for _, group_data in time_bounds_grouped: + group_times = group_data.values + group_bnds = (np.min(group_times[:, 0]), np.max(group_times[:, 1])) + group_bounds.append(group_bnds) + + # Convert group bounds to DataArray + da_bnds = xr.DataArray( + data=np.array(group_bounds), + dims=[self.dim, "bnds"], + coords={self.dim: dv[self.dim].values}, + name=f"{self.dim}_bnds", + ) + # After grouping and aggregating, the grouped time dimension's # attributes are removed. Xarray's `keep_attrs=True` option only keeps # attributes for data variables and not their coordinates, so the @@ -1535,7 +1558,7 @@ def _group_average(self, ds: xr.Dataset, data_var: str) -> xr.DataArray: dv = self._add_operation_attrs(dv) - return dv + return dv, da_bnds def _get_weights(self, time_bounds: xr.DataArray) -> xr.DataArray: """Calculates weights for a data variable using time bounds.