import warnings
warnings.simplefilter("always")
import xarray as xr
from pathlib import Path
from pyhanami.utils import data_general, data_checker
[docs]
class ObservationData:
"""
Retrieves and processes observational datasets for evaluation of simulations.
This class interfaces with an external observational data source to retrieve datasets
that match the variables and time period of a given simulation dataset. Retrieved data
are then regridded to match the spatial resolution of the input simulation data.
Parameters
----------
data_path : str
Path to an observations database.data.
sim : xr.Dataset
Input simulation dataset.
name : str
Name of the observations instance (default: obs).
realization : int
Realization number to select from the observations dataset if more than
one member is present (default: 0).
regrid_method : str
Regridding method (default: bilinear).
Attributes
----------
data_path : Path
Path to the observations database.
data : xr.Dataset
Processed observational data, regridded to match the input simulation.
name : str
Name of the observations instance (default: obs).
realization : int
Realization number to select from the observations dataset if more than
one member is present (default: 0).
regrid_method : str
Regridding method (default: bilinear).
"""
def __init__(self, data_path, sim, name='obs', realization=0, regrid_method='bilinear'):
if isinstance(data_path, (str, Path)):
self.data_path = Path(data_path)
else:
raise TypeError("'data_path' must be a string or Path object.")
if not self.data_path.exists():
raise FileNotFoundError(f"Observational data path {self.data_path} not found.")
if not isinstance(sim, xr.Dataset):
raise TypeError("Input simulation must be an xarray.Dataset.")
if not sim.data_vars:
raise ValueError("Input simulation must contain at least one climate variable.")
if isinstance(name, str):
self.name = name
else:
raise TypeError("'name' must be a string.")
if isinstance(realization, int):
self.realization = realization
else:
raise TypeError("'realization' must be an integer.")
if isinstance(regrid_method, str):
self.regrid_method = regrid_method
else:
raise TypeError("'regrid_method' must be a string.")
self.data = self.load_and_process(sim)
def _retrieve_obs(self, sim):
"""
Retrieve observations from database for the variables and period
available in the given simulation ensemble.
Parameters
----------
sim : xr.Dataset
Input simulation dataset.
Returns
-------
obs : xr.Dataset
Dataset containing observational data for the variables in sim.
"""
# Validate input
if not isinstance(sim, xr.Dataset):
raise TypeError("Input simulation must be an xarray.Dataset.")
if not sim.data_vars:
raise ValueError("Input simulation must contain at least one climate variable.")
# Load observational data
data_obs_vars = []
for var in sim.data_vars:
var_path = next(self.data_path.glob(f"data_obs*_{var}.nc"))
data_obs_aux = xr.open_dataset(var_path, chunks="auto")
# Select realization if more than one member is present
if 'realization' in data_obs_aux.dims:
data_obs_aux = data_obs_aux.isel({'realization' : self.realization}, drop=True)
if 'realization' in data_obs_aux.coords:
data_obs_aux = data_obs_aux.drop_vars('realization')
# Check time coordinate and format
if "time" not in data_obs_aux.coords or "time" not in sim.coords:
raise ValueError(f"'time' coordinate missing in either simulations or observations for variable {var}.")
data_obs_time, errors, warnings = data_checker.DataChecker.normalize_time_format(data_obs_aux)
if len(warnings) != 0:
print(f"{len(warnings)} warnings encountered while loading the observations dataset:", flush=True)
for warning in warnings:
print(f'\t - {warning}', flush=True)
if len(errors) != 0:
print(f"{len(errors)} errors encountered while loading the observations dataset:", flush=True)
for error in errors:
print(f'\t - {error}', flush=True)
raise RuntimeError("Loading of observations failed due to the errors listed above.")
# Align the time range with the simulations
try:
data_obs_sel = data_obs_time.sel(time=sim.time)
except KeyError:
raise KeyError(f"Observations missing for some time points in variable {var}.")
data_obs_vars.append(data_obs_sel)
data_obs = xr.merge(data_obs_vars)
return data_obs
[docs]
def load_and_process(self, sim):
"""
Retrieve and regrid observational data for the variables and period
available in the given simulation ensemble.
Parameters
----------
sim : xr.Dataset
Input simulation dataset.
Returns
-------
data_new_grid : xr.Dataset
Regridded observational dataset matching the input simulation.
"""
# Validate input
if not isinstance(sim, xr.Dataset):
raise TypeError("Input simulation must be an xarray.Dataset.")
if not sim.data_vars:
raise ValueError("Input simulation must contain at least one climate variable.")
data_old_grid = self._retrieve_obs(sim)
data_new_grid = data_general.regrid_data(data_old_grid, sim, method=self.regrid_method)
return data_new_grid