Source code for pyhanami.diags.Observations

import warnings
warnings.simplefilter("always")

import xarray as xr

from pathlib import Path

from pyhanami.utils import data_general, data_checker


[docs] class ObservationData: """ Retrieves and processes observational datasets for evaluation of simulations. This class interfaces with an external observational data source to retrieve datasets that match the variables and time period of a given simulation dataset. Retrieved data are then regridded to match the spatial resolution of the input simulation data. Parameters ---------- data_path : str Path to an observations database.data. sim : xr.Dataset Input simulation dataset. name : str Name of the observations instance (default: obs). realization : int Realization number to select from the observations dataset if more than one member is present (default: 0). regrid_method : str Regridding method (default: bilinear). Attributes ---------- data_path : Path Path to the observations database. data : xr.Dataset Processed observational data, regridded to match the input simulation. name : str Name of the observations instance (default: obs). realization : int Realization number to select from the observations dataset if more than one member is present (default: 0). regrid_method : str Regridding method (default: bilinear). """ def __init__(self, data_path, sim, name='obs', realization=0, regrid_method='bilinear'): if isinstance(data_path, (str, Path)): self.data_path = Path(data_path) else: raise TypeError("'data_path' must be a string or Path object.") if not self.data_path.exists(): raise FileNotFoundError(f"Observational data path {self.data_path} not found.") if not isinstance(sim, xr.Dataset): raise TypeError("Input simulation must be an xarray.Dataset.") if not sim.data_vars: raise ValueError("Input simulation must contain at least one climate variable.") if isinstance(name, str): self.name = name else: raise TypeError("'name' must be a string.") if isinstance(realization, int): self.realization = realization else: raise TypeError("'realization' must be an integer.") if isinstance(regrid_method, str): self.regrid_method = regrid_method else: raise TypeError("'regrid_method' must be a string.") self.data = self.load_and_process(sim) def _retrieve_obs(self, sim): """ Retrieve observations from database for the variables and period available in the given simulation ensemble. Parameters ---------- sim : xr.Dataset Input simulation dataset. Returns ------- obs : xr.Dataset Dataset containing observational data for the variables in sim. """ # Validate input if not isinstance(sim, xr.Dataset): raise TypeError("Input simulation must be an xarray.Dataset.") if not sim.data_vars: raise ValueError("Input simulation must contain at least one climate variable.") # Load observational data data_obs_vars = [] for var in sim.data_vars: var_path = next(self.data_path.glob(f"data_obs*_{var}.nc")) data_obs_aux = xr.open_dataset(var_path, chunks="auto") # Select realization if more than one member is present if 'realization' in data_obs_aux.dims: data_obs_aux = data_obs_aux.isel({'realization' : self.realization}, drop=True) if 'realization' in data_obs_aux.coords: data_obs_aux = data_obs_aux.drop_vars('realization') # Check time coordinate and format if "time" not in data_obs_aux.coords or "time" not in sim.coords: raise ValueError(f"'time' coordinate missing in either simulations or observations for variable {var}.") data_obs_time, errors, warnings = data_checker.DataChecker.normalize_time_format(data_obs_aux) if len(warnings) != 0: print(f"{len(warnings)} warnings encountered while loading the observations dataset:", flush=True) for warning in warnings: print(f'\t - {warning}', flush=True) if len(errors) != 0: print(f"{len(errors)} errors encountered while loading the observations dataset:", flush=True) for error in errors: print(f'\t - {error}', flush=True) raise RuntimeError("Loading of observations failed due to the errors listed above.") # Align the time range with the simulations try: data_obs_sel = data_obs_time.sel(time=sim.time) except KeyError: raise KeyError(f"Observations missing for some time points in variable {var}.") data_obs_vars.append(data_obs_sel) data_obs = xr.merge(data_obs_vars) return data_obs
[docs] def load_and_process(self, sim): """ Retrieve and regrid observational data for the variables and period available in the given simulation ensemble. Parameters ---------- sim : xr.Dataset Input simulation dataset. Returns ------- data_new_grid : xr.Dataset Regridded observational dataset matching the input simulation. """ # Validate input if not isinstance(sim, xr.Dataset): raise TypeError("Input simulation must be an xarray.Dataset.") if not sim.data_vars: raise ValueError("Input simulation must contain at least one climate variable.") data_old_grid = self._retrieve_obs(sim) data_new_grid = data_general.regrid_data(data_old_grid, sim, method=self.regrid_method) return data_new_grid