diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/.DS_Store differ diff --git a/README.rst b/README.rst index 97b0c93..bac71f6 100644 --- a/README.rst +++ b/README.rst @@ -1,5 +1,5 @@ -baysparpy -========= +baysparpy3 +========== .. image:: https://travis-ci.org/brews/baysparpy.svg?branch=master :target: https://travis-ci.org/brews/baysparpy @@ -9,10 +9,16 @@ An Open Source Python package for TEX86 calibration. This package is based on the original BAYSPAR (BAYesian SPAtially-varying Regression) for MATLAB (https://github.com/jesstierney/BAYSPAR). +This package is the updated version of baysparpy with new module(s) such as `predict_tex_analog` +based on the original python version of BAYSPAR (https://github.com/brews/baysparpy). Quick example ------------- +Standard prediction +~~~~~~~~~~~~~~~~~~~ + + First, load key packages and an example dataset: .. code-block:: python @@ -37,6 +43,81 @@ To see actual numbers from the prediction, directly parse ``prediction.ensemble` You can also plot your prediction with ``bsr.predictplot()`` or ``bsr.densityplot()``. + +Analog prediction +~~~~~~~~~~~~~~~~~ + + +Begin by loading data for a “Deep-Time” analog prediction: + +This dataset is a TEX86 record from record from Wilson Lake, New Jersey (`Zachos et al. 2006 `_). The file has two columns giving depth (m) and TEX86: + + +.. code-block:: python + + example_file = bsr.get_example_data('wilsonlake.csv') + d = np.genfromtxt(example_file, delimiter=',', names=True) + + +We can run the analog prediction of SST with ``predict_seatemp_analog()``. +We can also examine our prediction as though it were a standard prediction. For example, we can plot a time series of the prediction: + +.. code-block:: python + + search_tolerance = np.std(d['tex86'], ddof=1) * 2 + prediction = bsr.predict_seatemp_analog(d['tex86'], temptype='sst',prior_mean=30, prior_std=20,search_tol=search_tolerance,nens=500) + ax = bsr.predictplot(prediction, x=d['depth'], xlabel='Depth (m)', ylabel='SST (°C)') + ax.grid() + ax.legend() + + +Forward prediction +~~~~~~~~~~~~~~~~~~ + + +For this example, we make inferences about TEX86 from SST data using a forward-model prediction. We start by creating a SST series spanning from 0 - 40 °C. + +And now plug the SST data into ``predict_tex()`` along with additional information. In this case we’re using the same site location as in Standard prediction: + +.. code-block:: python + + sst = np.arange(0, 41) + prediction = bsr.predict_tex(sst, lon=34.0733, lat=31.6517, temptype='sst') + +As might be expected, we can use the output of the forward prediction to parse and plot: + +.. code-block:: python + + ax = bsr.predictplot(prediction, x=sst,xlabel='SST (°C)',ylabel=r'TEX$_{86}$') + ax.grid() + ax.legend() + + +Analog forward prediction +~~~~~~~~~~~~~~~~~~~~~~~~~ + + +This tool will calculate forwarded TEX using given SST data. Here is an example: + +.. code-block:: python + + sst = np.arange(0, 41) + prediction = bsr.predict_tex_analog(sst, temptype = 'sst', search_tol = 5., nens=8000) + ax = bsr.predictplot(prediction, x=sst,xlabel='SST (°C)',ylabel=r'TEX$_{86}$') + ax.grid() + ax.legend() + + +First, we make inferences about deep-time TEX86 from SST data using a forward-model analog prediction. We start by creating a SST series spanning from 0 - 40 °C. + +And then plug the SST data into ``predict_tex_analog()`` along with additional information (search tolerance is 5 °C). + +We can use the output of the forward prediction to parse and plot. + +Read More +~~~~~~~~~ + + For further details, examples, and additional prediction functions, see the online documentation (https://baysparpy.readthedocs.io). @@ -47,7 +128,7 @@ To install **baysparpy** with pip, run: .. code-block:: bash - $ pip install baysparpy + $ pip install baysparpy3 To install with conda, run: @@ -62,7 +143,7 @@ Support and development - Documentation is available online (https://baysparpy.readthedocs.io). -- Please feel free to report bugs and issues or view the source code on GitHub (https://github.com/brews/baysparpy). +- Please feel free to report bugs and issues or view the source code on GitHub (https://github.com/mingsongli/baysparpy). License diff --git a/bayspar/.DS_Store b/bayspar/.DS_Store new file mode 100644 index 0000000..d40c21b Binary files /dev/null and b/bayspar/.DS_Store differ diff --git a/bayspar/__init__.py b/bayspar/__init__.py index 834accc..68e2f68 100644 --- a/bayspar/__init__.py +++ b/bayspar/__init__.py @@ -1,3 +1,4 @@ -from bayspar.predict import predict_tex, predict_seatemp, predict_seatemp_analog +name = 'baysparpy3' +from bayspar.predict import predict_tex, predict_tex_analog, predict_seatemp, predict_seatemp_analog from bayspar.utils import get_example_data from bayspar.plot import predictplot, analogmap, densityplot diff --git a/bayspar/modelparams/Output_SpatAg_SST/.DS_Store b/bayspar/modelparams/Output_SpatAg_SST/.DS_Store new file mode 100644 index 0000000..3601717 Binary files /dev/null and b/bayspar/modelparams/Output_SpatAg_SST/.DS_Store differ diff --git a/bayspar/modelparams/Output_SpatAg_SST/alpha_samples.mat b/bayspar/modelparams/Output_SpatAg_SST/alpha_samples.mat new file mode 100644 index 0000000..b30cd1a Binary files /dev/null and b/bayspar/modelparams/Output_SpatAg_SST/alpha_samples.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_SST/alpha_samples_comp.mat b/bayspar/modelparams/Output_SpatAg_SST/alpha_samples_comp.mat index 39095b8..3c55f72 100644 Binary files a/bayspar/modelparams/Output_SpatAg_SST/alpha_samples_comp.mat and b/bayspar/modelparams/Output_SpatAg_SST/alpha_samples_comp.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_SST/beta_samples.mat b/bayspar/modelparams/Output_SpatAg_SST/beta_samples.mat new file mode 100644 index 0000000..6d0c079 Binary files /dev/null and b/bayspar/modelparams/Output_SpatAg_SST/beta_samples.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_SST/beta_samples_comp.mat b/bayspar/modelparams/Output_SpatAg_SST/beta_samples_comp.mat index 0c3e72b..c881e5b 100644 Binary files a/bayspar/modelparams/Output_SpatAg_SST/beta_samples_comp.mat and b/bayspar/modelparams/Output_SpatAg_SST/beta_samples_comp.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_SST/tau2_samples.mat b/bayspar/modelparams/Output_SpatAg_SST/tau2_samples.mat index 37f69b4..e7550a2 100644 Binary files a/bayspar/modelparams/Output_SpatAg_SST/tau2_samples.mat and b/bayspar/modelparams/Output_SpatAg_SST/tau2_samples.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_SST/tau2_samples_old.mat b/bayspar/modelparams/Output_SpatAg_SST/tau2_samples_old.mat new file mode 100644 index 0000000..37f69b4 Binary files /dev/null and b/bayspar/modelparams/Output_SpatAg_SST/tau2_samples_old.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_subT/alpha_samples.mat b/bayspar/modelparams/Output_SpatAg_subT/alpha_samples.mat new file mode 100644 index 0000000..9492041 Binary files /dev/null and b/bayspar/modelparams/Output_SpatAg_subT/alpha_samples.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_subT/beta_samples.mat b/bayspar/modelparams/Output_SpatAg_subT/beta_samples.mat new file mode 100644 index 0000000..3ad6d7c Binary files /dev/null and b/bayspar/modelparams/Output_SpatAg_subT/beta_samples.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_subT/beta_samples_comp.mat b/bayspar/modelparams/Output_SpatAg_subT/beta_samples_comp.mat index 74bfe22..3047890 100644 Binary files a/bayspar/modelparams/Output_SpatAg_subT/beta_samples_comp.mat and b/bayspar/modelparams/Output_SpatAg_subT/beta_samples_comp.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_subT/tau2_samples.mat b/bayspar/modelparams/Output_SpatAg_subT/tau2_samples.mat index 67743e5..febe7ae 100644 Binary files a/bayspar/modelparams/Output_SpatAg_subT/tau2_samples.mat and b/bayspar/modelparams/Output_SpatAg_subT/tau2_samples.mat differ diff --git a/bayspar/modelparams/Output_SpatAg_subT/tau2_samples_old.mat b/bayspar/modelparams/Output_SpatAg_subT/tau2_samples_old.mat new file mode 100644 index 0000000..67743e5 Binary files /dev/null and b/bayspar/modelparams/Output_SpatAg_subT/tau2_samples_old.mat differ diff --git a/bayspar/modelparams/__init__.py b/bayspar/modelparams/__init__.py index 9aa4928..b34153f 100644 --- a/bayspar/modelparams/__init__.py +++ b/bayspar/modelparams/__init__.py @@ -1 +1 @@ -from bayspar.modelparams.core import get_draws +from bayspar.modelparams.core import get_draws, get_draws_analog diff --git a/bayspar/modelparams/core.py b/bayspar/modelparams/core.py index e1e99fc..80821bf 100644 --- a/bayspar/modelparams/core.py +++ b/bayspar/modelparams/core.py @@ -1,3 +1,17 @@ +""" +Core + Originator: Steven Brewster Malevich + University of Arizona Department of Geosciences + + Revisions: Mingsong Li + Penn State Geosciences + Date: Sept 23, 2019 + + Purpose: add get_draws_analog module + Get Draws instance for a draw type + for the analog model + +""" import os.path from copy import deepcopy from pkgutil import get_data @@ -19,24 +33,25 @@ def get_matlab_resource(resource, package='bayspar', **kwargs): def read_draws(flstr, drawtype): """Grab single squeezed array from package resources + example: + alpha_samples_comp=read_draws('alpha_samples_comp.mat', 'sst') """ drawtype = drawtype.lower() - - var_template = 'modelparams/Output_SpatAg_{0}/{1}' - varstr = TRANSLATE_VAR[drawtype] - varstr_full = os.path.splitext(flstr)[0] + + var_template = 'modelparams/Output_SpatAg_{0}/{1}' + # resource_str = 'modelparams/Output_SpatAg_{0}/{1}'.format('SST', 'alpha_samples_comp.mat') resource_str = var_template.format(varstr, flstr) + varstr_full = os.path.splitext(flstr)[0] var = get_matlab_resource(resource_str, squeeze_me=True) return var[varstr_full] - @attr.s class Draws: """Spatially-aware modelparams draws """ - alpha_samples_comp = attr.ib() - beta_samples_comp = attr.ib() + alpha_samples_comp = attr.ib() # true data may be either alpha_samples_comp.mat or alpha_samples.mat + beta_samples_comp = attr.ib() # true data may be either beta_samples_comp.mat or beta_samples.mat tau2_samples = attr.ib() locs_comp = attr.ib() _half_grid_space = attr.ib(default=10) @@ -66,7 +81,7 @@ def find_alphabeta_near(self, lat, lon): alpha_select = self.alpha_samples_comp[idx].squeeze() beta_select = self.beta_samples_comp[idx].squeeze() return alpha_select, beta_select - + class BadLatlonError(Exception): """Raised when latitude or longitude is outside (-90, 90) or (-180, 180) @@ -79,7 +94,7 @@ class BadLatlonError(Exception): def __init__(self, latlon): self.latlon = latlon - +# for modern tex forward draws_sst = Draws(alpha_samples_comp=read_draws('alpha_samples_comp.mat', 'sst'), beta_samples_comp=read_draws('beta_samples_comp.mat', 'sst'), tau2_samples=read_draws('tau2_samples.mat', 'sst'), @@ -91,7 +106,6 @@ def __init__(self, latlon): tau2_samples=read_draws('tau2_samples.mat', 'subt'), locs_comp=read_draws('Locs_Comp.mat', 'subt')) - def get_draws(drawtype): """Get Draws instance for a draw type """ @@ -99,3 +113,23 @@ def get_draws(drawtype): return deepcopy(draws_sst) elif drawtype == 'subt': return deepcopy(draws_subt) + +# for analog tex forward model +draws_sst_analog = Draws(alpha_samples_comp=read_draws('alpha_samples.mat', 'sst'), + beta_samples_comp=read_draws('beta_samples.mat', 'sst'), + tau2_samples=read_draws('tau2_samples.mat', 'sst'), + locs_comp=read_draws('Locs_Comp.mat', 'sst')) + +draws_subt_analog = Draws(alpha_samples_comp=read_draws('alpha_samples.mat', 'subt'), + beta_samples_comp=read_draws('beta_samples.mat', 'subt'), + tau2_samples=read_draws('tau2_samples.mat', 'subt'), + locs_comp=read_draws('Locs_Comp.mat', 'subt')) + +def get_draws_analog(drawtype): + """Get Draws instance for a draw type + for the analog model""" + if drawtype == 'sst': + return deepcopy(draws_sst_analog) + elif drawtype == 'subt': + return deepcopy(draws_subt_analog) + \ No newline at end of file diff --git a/bayspar/observations/core.py b/bayspar/observations/core.py index dcb04d8..fe50a02 100644 --- a/bayspar/observations/core.py +++ b/bayspar/observations/core.py @@ -1,3 +1,17 @@ +""" +Core + Originator: Steven Brewster Malevich + University of Arizona Department of Geosciences + + Revisions: Mingsong Li + Penn State Geosciences + Date: Sept 23, 2019 + + Purpose: add find_t_within_tolerance module in TexObs class + : Find mean modern sea temperature observations + that are within ± tolerance + +""" from copy import deepcopy from pkgutil import get_data from io import BytesIO @@ -47,7 +61,10 @@ def read_tex(obstype): obs_stack = var['Obs_Stack'].squeeze().item() inds_stack = var['Inds_Stack'].squeeze().item() obslocs = np.array([x[...].squeeze() for x in var['ObsLocs'].item().ravel() if x.any()]) - return locs, obs_stack, inds_stack, obslocs + # return locs, obs_stack, inds_stack, obslocs + # M.Li + target_stack = var['Target_Stack'].squeeze().item() + return locs, obs_stack, inds_stack, obslocs, target_stack def chord_distance(latlon1, latlon2): @@ -177,7 +194,55 @@ class TexObs: obs_stack = attr.ib() inds_stack = attr.ib() obslocs = attr.ib() - + # M.Li added + target_stack = attr.ib() + + #M.Li added + def find_t_within_tolerance(self,t,tolerance): + """Find mean modern sea temperature observations that are within ± tolerance + cycle through the alpha/beta grid cells + + Parameters + ---------- + t : float + Mean sea temperature value. + tolerance : float + Value added and subtracted from 't' to get upper and lower tolerance bounds. + + Returns + ---------- + latlon_match : list + An n-length list of latlon tuples where matches were found. + vals_match : ndarray + A 1d array (n) of corresponding TEX86 averages from each match. + inder_g : ndarray + A 1d array (n) of location in the big grids that has sea temp within + tolerance + + Originator: Mingsong Li, Penn State Geosciences + Date: 25 September, 2019 + """ + n_bg = len(self.locs) # number of big grids + upper_bound = t + tolerance # upper bound of sea temperature range + lower_bound = t - tolerance # lower bound of sea temperature range + inder_g =[] + vals_match = [] + # search eachi big grid + for kk in range(1, n_bg+1): + # find the mean SST obs corresponding to this index location + # + vals = self.target_stack[self.inds_stack == kk] + # + vals_mean = vals.mean() + # if the mean of vals in the big grids within tolerance, add it to inder_g + if lower_bound <= vals_mean <= upper_bound: + idx = kk-1 # Back to 0-based idx + inder_g.append(idx) + vals_match.append(vals_mean) + + latlon_match = [tuple(x) for x in self.locs[inder_g, ::-1].tolist()] + return latlon_match, np.array(vals_match), np.array(inder_g) + def find_within_tolerance(self, x, tolerance): """Find mean TEX86 observations that are within ± tolerance from x diff --git a/bayspar/predict.py b/bayspar/predict.py index c3cd2f8..23cb431 100644 --- a/bayspar/predict.py +++ b/bayspar/predict.py @@ -1,10 +1,59 @@ +""" + baysparpy + ========= + + An Open Source Python package for TEX86 calibration. + + This package is based on the original BAYSPAR + (BAYesian SPAtially-varying Regression) for MATLAB + (https://github.com/jesstierney/BAYSPAR). + + Originator: Steven Brewster Malevich + University of Arizona Department of Geosciences + + Revisions: Mingsong Li + Penn State Geosciences + Date: Sept 23, 2019 + + Revision: Mingsong Li + Peking University + Date: Jun 16, 2021 + + Purpose: add predict_tex_analog module + + : TEX_forward model for analog model of baysparpy + to simplify the code and save space + + add get_draws_analog module + : Get Draws instance for a draw type + for the analog model + + New files added: alpha_samples.mat + beta_samples.mat + These two files were trimmed as they may not have had burnin removed + They were calculated using the following matlab code + in TEX_forward.m of BAYSPAR + https://github.com/jesstierney/BAYSPAR/blob/master/TEX_forward.m + + Ntk = 20000; + load('alpha_samples.mat', 'alpha_samples') + alpha_samples=[alpha_samples.field]; + alpha_samples=alpha_samples(:,end-Ntk+1:end); + load('beta_samples.mat') + beta_samples=[beta_samples.field]; + beta_samples=beta_samples(:,end-Ntk+1:end); + save('alpha_samples','alpha_samples'); + save('beta_samples','beta_samples'); +""" import numpy as np +import numpy.matlib import attr import attr.validators as av from tqdm import tqdm +import sys from bayspar.utils import target_timeseries_pred -from bayspar.modelparams import get_draws +from bayspar.modelparams import get_draws, get_draws_analog from bayspar.observations import get_seatemp, get_tex @@ -69,6 +118,83 @@ def percentile(self, q=None, interpolation='nearest'): interpolation=interpolation) return perc.T +def predict_tex_analog(seatemp, temptype = 'sst', search_tol = 5., nens=5000): + """Predict TEX86 from sea temperature analog model + + Parameters + ---------- + seatemp : ndarray + n-length array of sea temperature observations (°C) from a single location. + temptype : str + Type of sea temperature used. Either 'sst' (default) for sea-surface or 'subt'. + search_tol: float + search tolerance in seatemp units (required for analog mode) + nens : int + Size of MCMC ensemble draws to use for calculation. + + Returns + ------- + output : Prediction + Raises + ------ + EnsembleSizeError + """ + draws = get_draws_analog(temptype) + tex_obs = get_tex(temptype) + + nd = len(seatemp) # number of input sea temperature data + + ntk = draws.alpha_samples_comp.shape[1] + if ntk < nens: + raise EnsembleSizeError(ntk, nens) + + latlon_match, val_match, inder_g = tex_obs.find_t_within_tolerance(t=seatemp.mean(), + tolerance=search_tol) + + if inder_g.size == 0: + sys.exit('No analogs were found. Check seatemp or make your search tolerance wider.') + + alpha_samples1 = draws.alpha_samples_comp[inder_g] + + beta_samples1 = draws.beta_samples_comp[inder_g] + tau2_samples1 = draws.tau2_samples.reshape((draws.tau2_samples.size,1)).T + + tau2_sample_reshapen = int(alpha_samples1.shape[0] * alpha_samples1.shape[1] / draws.tau2_samples.size) + + alpha_samples = np.reshape(alpha_samples1, (alpha_samples1.size, )) + beta_samples = np.reshape(beta_samples1, (beta_samples1.size, )) + + tau2_samples2 = np.matlib.repmat(tau2_samples1, 1, tau2_sample_reshapen) + tau2_samples = np.reshape(tau2_samples2, (tau2_samples2.size, )) + + # downsample to nens = 5000 + iters=alpha_samples.size + ds=round(float(iters)/nens) + dsarray = np.arange(0,alpha_samples.size,ds) + + alpha_samples3 = alpha_samples[dsarray] + #print('mean of sliced alpha_samples {}'.format(np.mean(alpha_samples))) + beta_samples3 = beta_samples[dsarray] + tau2_samples3 = tau2_samples[dsarray] + + # numpy empty storing tex data + tex = np.empty((nd, nens)) + # estimate tex using given alpha_samples_comp, beta_samples_comp, and tau2_samples + for i in range(nens): + tau2_now = tau2_samples3[i] + beta_now = beta_samples3[i] + alpha_now = alpha_samples3[i] + tex[:, i] = np.random.normal(seatemp * beta_now + alpha_now, + np.sqrt(tau2_now)) + + tex[tex > 1] = 1 + tex[tex < 0] = 0 + #grid_latlon = draws.find_nearest_latlon(lat=lat, lon=lon) # useless? + output = Prediction(ensemble=tex, + temptype=temptype, + analog_gridpoints=[tuple(latlon_match)]) + return output + def predict_tex(seatemp, lat, lon, temptype, nens=5000): """Predict TEX86 from sea temperature @@ -115,7 +241,8 @@ def predict_tex(seatemp, lat, lon, temptype, nens=5000): alpha_now = alpha_samples_comp[i] tex[:, i] = np.random.normal(seatemp * beta_now + alpha_now, np.sqrt(tau2_now)) - + tex[tex > 1] = 1 + tex[tex < 0] = 0 output = Prediction(ensemble=tex, temptype=temptype, latlon=(lat, lon), diff --git a/bayspar/tests/core_modelparamsold.py b/bayspar/tests/core_modelparamsold.py new file mode 100644 index 0000000..f5c0185 --- /dev/null +++ b/bayspar/tests/core_modelparamsold.py @@ -0,0 +1,122 @@ +import os.path +from copy import deepcopy +from pkgutil import get_data +from io import BytesIO +import attr +import numpy as np +from scipy.io import loadmat + + +TRANSLATE_VAR = {'sst': 'SST', 'subt': 'subT'} + + +def get_matlab_resource(resource, package='bayspar', **kwargs): + """Read flat MATLAB files as package resources, output for Numpy""" + with BytesIO(get_data(package, resource)) as fl: + data = loadmat(fl, **kwargs) + return data + + +def read_draws(flstr, drawtype): + """Grab single squeezed array from package resources + """ + drawtype = drawtype.lower() + + var_template = 'modelparams/Output_SpatAg_{0}/{1}' + + varstr = TRANSLATE_VAR[drawtype] + varstr_full = os.path.splitext(flstr)[0] + resource_str = var_template.format(varstr, flstr) + var = get_matlab_resource(resource_str, squeeze_me=True) + return var[varstr_full] + + +@attr.s +class Draws: + """Spatially-aware modelparams draws + """ + alpha_samples_comp = attr.ib() + beta_samples_comp = attr.ib() + tau2_samples = attr.ib() + locs_comp = attr.ib() + _half_grid_space = attr.ib(default=10) + + def _index_near(self, lat, lon): + """Get gridpoint index nearest a lat lon + """ + if not (-90 <= lat <= 90) or not (-180 < lon <= 180): + raise BadLatlonError(tuple([lat, lon])) + + lon_adiff = np.abs(self.locs_comp[:, 0] - lon) <= self._half_grid_space + lat_adiff = np.abs(self.locs_comp[:, 1] - lat) <= self._half_grid_space + return np.where(lon_adiff & lat_adiff) + + def find_nearest_latlon(self, lat, lon): + """Find draws gridpoint nearest a given lat lon + """ + idx = self._index_near(lat, lon) + # Squeeze and flip so returns as latlon, not lonlat. + latlon = self.locs_comp[idx].squeeze()[::-1] + return latlon + + def find_alphabeta_near(self, lat, lon): + """Find alpha and beta samples nearest a given location + """ + idx = self._index_near(lat, lon) + alpha_select = self.alpha_samples_comp[idx].squeeze() + beta_select = self.beta_samples_comp[idx].squeeze() + return alpha_select, beta_select + + +class BadLatlonError(Exception): + """Raised when latitude or longitude is outside (-90, 90) or (-180, 180) + + Parameters + ---------- + latlon : tuple + The bad (lat, lon) values. + """ + def __init__(self, latlon): + self.latlon = latlon + + +draws_sst = Draws(alpha_samples_comp=read_draws('alpha_samples_comp.mat', 'sst'), + beta_samples_comp=read_draws('beta_samples_comp.mat', 'sst'), + tau2_samples=read_draws('tau2_samples.mat', 'sst'), + locs_comp=read_draws('Locs_Comp.mat', 'sst')) + + +draws_subt = Draws(alpha_samples_comp=read_draws('alpha_samples_comp.mat', 'subt'), + beta_samples_comp=read_draws('beta_samples_comp.mat', 'subt'), + tau2_samples=read_draws('tau2_samples.mat', 'subt'), + locs_comp=read_draws('Locs_Comp.mat', 'subt')) + + +#draws_sst_analog = Draws(alpha_samples_comp=read_draws('alpha_samples.mat', 'sst'), +# beta_samples_comp=read_draws('beta_samples.mat', 'sst'), +# tau2_samples=read_draws('tau2_samples.mat', 'sst'), +# locs_comp=read_draws('Locs_Comp.mat', 'sst')) + + +#draws_subt_analog = Draws(alpha_samples_comp=read_draws('alpha_samples.mat', 'subt'), +# beta_samples_comp=read_draws('beta_samples.mat', 'subt'), +# tau2_samples=read_draws('tau2_samples.mat', 'subt'), +# locs_comp=read_draws('Locs_Comp.mat', 'subt')) + +def get_draws(drawtype): + """Get Draws instance for a draw type + """ + if drawtype == 'sst': + return deepcopy(draws_sst) + elif drawtype == 'subt': + return deepcopy(draws_subt) + +# for analog model of get_draws +def get_draws_analog(drawtype): + """Get Draws instance for a draw type + for the analog model""" + if drawtype == 'sst': + return deepcopy(draws_sst_analog) + elif drawtype == 'subt': + return deepcopy(draws_subt_analog) + \ No newline at end of file diff --git a/bayspar/tests/core_observationsold.py b/bayspar/tests/core_observationsold.py new file mode 100644 index 0000000..dcb04d8 --- /dev/null +++ b/bayspar/tests/core_observationsold.py @@ -0,0 +1,241 @@ +from copy import deepcopy +from pkgutil import get_data +from io import BytesIO + +import attr +import numpy as np +from scipy.io import loadmat + + +TRANSLATE_VAR = {'sst': 'SST', 'subt': 'subT'} + + +def get_matlab_resource(resource, package='bayspar', **kwargs): + """Read flat MATLAB files as package resources, output for Numpy + """ + with BytesIO(get_data(package, resource)) as fl: + data = loadmat(fl, **kwargs) + return data + + +def read_seatemp(obstype): + """Grab squeezed variable & locs array from sea temp package resources + """ + obstype = obstype.lower() + + locs_template = 'observations/locs_woa_1degree_asvec_{0}.mat' + var_template = 'observations/st_woa_1degree_asvec_{0}.mat' + + var_str = TRANSLATE_VAR[obstype] + locs = get_matlab_resource(locs_template.format(var_str)) + var = get_matlab_resource(var_template.format(var_str)) + + var_clean = var['st_obs_ave_vec'].squeeze() + locs_clean = locs['locs_st_obs'].squeeze() + return var_clean, locs_clean + + +def read_tex(obstype): + """Grab squeezed variables array from TEX86 package resources + """ + var_template = 'observations/Data_Input_SpatAg_{}.mat' + + var_str = TRANSLATE_VAR[obstype] + var = get_matlab_resource(var_template.format(var_str))['Data_Input'] + + locs = var['Locs'].squeeze().item() + obs_stack = var['Obs_Stack'].squeeze().item() + inds_stack = var['Inds_Stack'].squeeze().item() + obslocs = np.array([x[...].squeeze() for x in var['ObsLocs'].item().ravel() if x.any()]) + return locs, obs_stack, inds_stack, obslocs + + +def chord_distance(latlon1, latlon2): + """Chordal distance between two groups of latlon points + + Parameters + ---------- + latlon1 : ndarray + An nx2 array of latitudes and longitudes for one set of points. + latlon2 : ndarray + An mx2 array of latitudes and longitudes for another set of points. + + Returns + ------- + dists : 2d array + An mxn array of Earth chordal distances [1]_ (km) between points in + latlon1 and latlon2. + + References + ---------- + .. [1] https://en.wikipedia.org/wiki/Chord_(geometry) + + """ + earth_radius = 6378.137 # in km + + latlon1 = np.atleast_2d(latlon1) + latlon2 = np.atleast_2d(latlon2) + + n = latlon1.shape[0] + m = latlon2.shape[0] + + paired = np.hstack((np.kron(latlon1, np.ones((m, 1))), + np.kron(np.ones((n, 1)), latlon2))) + + latdif = np.deg2rad(paired[:, 0] - paired[:, 2]) + londif = np.deg2rad(paired[:, 1] - paired[:, 3]) + + a = np.sin(latdif / 2) ** 2 + b = np.cos(np.deg2rad(paired[:, 0])) + c = np.cos(np.deg2rad(paired[:, 2])) + d = np.sin(np.abs(londif) / 2) ** 2 + + half_angles = np.arcsin(np.sqrt(a + b * c * d)) + + dists = 2 * earth_radius * np.sin(half_angles) + + return dists.reshape(m, n) + + +@attr.s +class SeaTempObs: + """Observed sea temperature fields as used in calibration + """ + st_obs_ave_vec = attr.ib() + locs_st_obs = attr.ib() + + def distance_from(self, lat, lon): + """Chordal distance (km) of observations from latlon + + Parameters + ---------- + lat : float or int + lon : float or int + + Returns + ------- + d : ndarray + An nx1 array of distances (km) between latlon and the (n) observed + points. + """ + lat = float(lat) + lon = float(lon) + latlon = np.array([[lat, lon]]) + d = chord_distance(latlon, self.locs_st_obs[:, ::-1]) + return d + + def get_close_obs(self, lat, lon, distance=500, min_obs=1): + """Get observations closest to a latlon point + + Parameters + ---------- + lat : float or int + lon : float or int + distance : float or int + Distance (km) of buffer from latlon point. + min_obs : int + Minimum number of obs to collect, if not enough within distance. + + Returns + ------- + obs_sorted : ndarray + 1d array of observation values within buffer. Sorted by distance + from latlon. + d_sorted : ndarray + Corresponding 1d array of observation distances (km) from latlon. + Sorted by distance + """ + lat = float(lat) + lon = float(lon) + distance = float(distance) + min_obs = int(min_obs) + + d = self.distance_from(lat, lon) + assert d.size == self.st_obs_ave_vec.size + + sort_idx = np.argsort(d.flat, axis=0) + d_sorted = d.flat[sort_idx] + obs_sorted = self.st_obs_ave_vec[sort_idx] + + n_in_buffer = (d < distance).sum() + assert n_in_buffer > 0 + + msk = np.empty(self.st_obs_ave_vec.shape, dtype=bool) + msk.fill(0) + if n_in_buffer > min_obs: + msk = d_sorted < distance + else: + msk[:min_obs] = True + + return obs_sorted[msk], d_sorted[msk] + + +@attr.s +class TexObs: + """Observed TEX86 values""" + locs = attr.ib() + obs_stack = attr.ib() + inds_stack = attr.ib() + obslocs = attr.ib() + + def find_within_tolerance(self, x, tolerance): + """Find mean TEX86 observations that are within ± tolerance from x + + Parameters + ---------- + x : float + Mean TEX86 value. + tolerance : float + Value added and subtracted from 'x' to get upper and lower tolerance + bounds. + + Returns + ------- + latlon_match : list + An n-length list of latlon tuples where matches were found. + vals_match : ndarray + A 1d array (n) of corresponding TEX86 averages from each match. + """ + n_bg = len(self.locs) + + upper_bound = x + tolerance + lower_bound = x - tolerance + + inder_g = [] + vals_match = [] + + for kk in range(1, n_bg + 1): # ...data was written to 1-based idx + # Find the tex obs corresponding to this index location, using the + # stacked obs: + vals = self.obs_stack[self.inds_stack == kk] + vals_mean = vals.mean() + + if lower_bound <= vals_mean <= upper_bound: + idx = kk - 1 # Back to 0-based idx + inder_g.append(idx) + vals_match.append(vals_mean) + + latlon_match = [tuple(x) for x in self.locs[inder_g, ::-1].tolist()] + return latlon_match, np.array(vals_match) + + +seatemp_sst = SeaTempObs(*read_seatemp('sst')) +seatemp_subt = SeaTempObs(*read_seatemp('subt')) +tex_sst = TexObs(*read_tex('sst')) +tex_subt = TexObs(*read_tex('subt')) + + +def get_seatemp(obstype): + """Get SeaTempObs instance for observation type""" + if obstype == 'sst': + return deepcopy(seatemp_sst) + elif obstype == 'subt': + return deepcopy(seatemp_subt) + + +def get_tex(obstype): + """Get TexObs instance for observation type""" + if obstype == 'sst': + return deepcopy(tex_sst) + elif obstype == 'subt': + return deepcopy(tex_subt) diff --git a/bayspar/tests/predict_old.py b/bayspar/tests/predict_old.py new file mode 100644 index 0000000..c3cd2f8 --- /dev/null +++ b/bayspar/tests/predict_old.py @@ -0,0 +1,278 @@ +import numpy as np +import attr +import attr.validators as av +from tqdm import tqdm + +from bayspar.utils import target_timeseries_pred +from bayspar.modelparams import get_draws +from bayspar.observations import get_seatemp, get_tex + + +@attr.s() +class Prediction: + """MCMC prediction + + Parameters + ---------- + ensemble : ndarray + Ensemble of predictions. A 2d array (nxm) for n predictands and m + ensemble members. + temptype : str + Type of sea temperature used for prediction. + latlon : tuple or None, optional + Optional tuple of the site location (lat, lon). + prior_mean : float or None, optional + Prior mean used for the prediction. + prior_std : float or None, optional + Prior sample standard deviation used for the prediction. + modelparam_gridpoints : list of tuples or None, optional + A list of one or more (lat, lon) points used to collect + spatially-sensitive model parameters. + analog_gridpoints : list of tuples or None, optional + A list of one or more (lat, lon) points used for an analog prediction. + """ + temptype = attr.ib() + ensemble = attr.ib(validator=av.optional(av.instance_of(np.ndarray))) + latlon = attr.ib(default=None, + validator=av.optional(av.instance_of(tuple))) + prior_mean = attr.ib(default=None) + prior_std = attr.ib(default=None) + modelparam_gridpoints = attr.ib(default=None, + validator=av.optional(av.instance_of(list))) + analog_gridpoints = attr.ib(default=None, + validator=av.optional(av.instance_of(list))) + + def percentile(self, q=None, interpolation='nearest'): + """Compute the qth ranked percentile from ensemble members. + + Parameters + ---------- + q : float ,sequence of floats, or None, optional + Percentiles (i.e. [0, 100]) to compute. Default is 5%, 50%, 95%. + interpolation : str, optional + Passed to numpy.percentile. Default is 'nearest'. + + Returns + ------- + perc : ndarray + A 2d (nxm) array of floats where n is the number of predictands in + the ensemble and m is the number of percentiles ('len(q)'). + """ + if q is None: + q = [5, 50, 95] + q = np.array(q, dtype=np.float64, copy=True) + + # Because analog ensembles have 3 dims + target_axis = list(range(self.ensemble.ndim))[1:] + + perc = np.percentile(self.ensemble, q=q, axis=target_axis, + interpolation=interpolation) + return perc.T + + +def predict_tex(seatemp, lat, lon, temptype, nens=5000): + """Predict TEX86 from sea temperature + + Parameters + ---------- + seatemp : ndarray + n-length array of sea temperature observations (°C) from a single + location. + lat : float + Site latitude from -90 to 90. + lon : float + Site longitude from -180 to 180. + temptype : str + Type of sea temperature used. Either 'sst' for sea-surface or 'subt'. + nens : int + Size of MCMC ensemble draws to use for calculation. + + Returns + ------- + output : Prediction + + Raises + ------ + EnsembleSizeError + """ + draws = get_draws(temptype) + + nd = len(seatemp) + ntk = draws.alpha_samples_comp.shape[1] + if ntk < nens: + raise EnsembleSizeError(ntk, nens) + + alpha_samples_comp, beta_samples_comp = draws.find_alphabeta_near(lat=lat, + lon=lon) + tau2_samples = draws.tau2_samples + + grid_latlon = draws.find_nearest_latlon(lat=lat, lon=lon) + + tex = np.empty((nd, nens)) + for i in range(nens): + tau2_now = tau2_samples[i] + beta_now = beta_samples_comp[i] + alpha_now = alpha_samples_comp[i] + tex[:, i] = np.random.normal(seatemp * beta_now + alpha_now, + np.sqrt(tau2_now)) + + output = Prediction(ensemble=tex, + temptype=temptype, + latlon=(lat, lon), + modelparam_gridpoints=[tuple(grid_latlon)]) + return output + + +def predict_seatemp(tex, lat, lon, prior_std, temptype, prior_mean=None, nens=5000): + """Predict sea temperature with TEX86 + + Parameters + ---------- + tex : ndarray + n-length array of TEX86 observations from a single location. + lat : float + Site latitude from -90 to 90. + lon : float + Site longitude from -180 to 180. + prior_std : float + Prior standard deviation for sea temperature (°C). + temptype : str + Type of sea temperature desired. Either 'sst' for sea-surface or 'subt'. + prior_mean : float or None, optional + Prior mean for sea temperature (°C). If 'None', the prior mean is found + by searching for a "close" value in observed sea temperature records. + nens : int + Size of MCMC ensemble draws to use for calculation. + + Returns + ------- + output : Prediction + + Raises + ------ + EnsembleSizeError + """ + draws = get_draws(temptype) + obs = get_seatemp(temptype) + + nd = len(tex) + ntk = draws.alpha_samples_comp.shape[1] + if ntk < nens: + raise EnsembleSizeError(ntk, nens) + + if prior_mean is None: + close_obs, close_dist = obs.get_close_obs(lat=lat, lon=lon) + prior_mean = close_obs.mean() + + alpha_samples_comp, beta_samples_comp = draws.find_alphabeta_near(lat=lat, lon=lon) + tau2_samples = draws.tau2_samples + + grid_latlon = draws.find_nearest_latlon(lat=lat, lon=lon) + + prior_par = {'mu': np.ones(nd) * prior_mean, + 'inv_cov': np.eye(nd) * prior_std ** -2} + + preds = np.empty((nd, nens)) + for jj in range(nens): + preds[:, jj] = target_timeseries_pred(alpha_now=alpha_samples_comp[jj], + beta_now=beta_samples_comp[jj], + tau2_now=tau2_samples[jj], + proxy_ts=tex, + prior_pars=prior_par) + # TODO(brews): Consider a progress bar for this loop. + + output = Prediction(ensemble=preds, + temptype=temptype, + latlon=(lat, lon), + prior_mean=prior_mean, + prior_std=prior_std, + modelparam_gridpoints=[tuple(grid_latlon)]) + return output + + +def predict_seatemp_analog(tex, prior_std, temptype, search_tol, prior_mean=None, nens=5000, progressbar=True): + """Predict sea temperature with TEX86, using the analog method + + Parameters + ---------- + tex : ndarray + n-length array of TEX86 observations from a single location. + prior_std : float + Prior standard deviation for sea temperature (°C). + temptype : str + Type of sea temperature desired. Either 'sst' for sea-surface or 'subt'. + search_tol: float + Tolerance for finding analog locations. Comparison is between the mean + of dats and the mean tex value within each large gridcell. + prior_mean : float + Prior mean for sea temperature (°C). + nens : int + Size of MCMC ensemble draws to use for calculation. + progressbar: bool + Whether or not to display a progress bar on the command line. The bar + shows how many analogs have been completed. + + Returns + ------- + output : Prediction + + Raises + ------ + EnsembleSizeError + """ + draws = get_draws(temptype) + tex_obs = get_tex(temptype) + + nd = len(tex) + ntk = draws.alpha_samples_comp.shape[1] + if ntk < nens: + raise EnsembleSizeError(ntk, nens) + + latlon_match, val_match = tex_obs.find_within_tolerance(x=tex.mean(), + tolerance=search_tol) + n_locs_g = len(latlon_match) + + prior_par = {'mu': np.ones(nd) * prior_mean, + 'inv_cov': np.eye(nd) * prior_std ** -2} + + n_latlon_matches = len(latlon_match) + indices = range(n_latlon_matches) + + if progressbar: + indices = tqdm(indices, total=n_latlon_matches) + + preds = np.empty((nd, n_locs_g, nens)) + for kk in indices: + latlon = latlon_match[kk] + alpha_samples, beta_samples = draws.find_alphabeta_near(*latlon) + for jj in range(nens): + a_now = alpha_samples[jj] + b_now = beta_samples[jj] + t2_now = draws.tau2_samples[jj] + preds[:, kk, jj] = target_timeseries_pred(alpha_now=a_now, + beta_now=b_now, + tau2_now=t2_now, + proxy_ts=tex, + prior_pars=prior_par) + + output = Prediction(ensemble=preds, + temptype=temptype, + prior_mean=prior_mean, + prior_std=prior_std, + analog_gridpoints=latlon_match) + return output + + +class EnsembleSizeError(Exception): + """Raised when user requests too large an ensemble for prediction + + Parameters + ---------- + available_size : int + The available ensemble size. + requested_size : int + The user-requested ensemble size. + """ + def __init__(self, available_size, requested_size): + self.available_size = available_size + self.requested_size = requested_size diff --git a/bayspar/tests/test_core.py b/bayspar/tests/test_core.py new file mode 100644 index 0000000..32c6f1b --- /dev/null +++ b/bayspar/tests/test_core.py @@ -0,0 +1,190 @@ +""" +Core + Originated by Brewster Malevich + + Revised by: Mingsong Li + Penn State + Sept 23, 2019 + + New file: alpha_samples.mat + beta_samples.mat + Purpose: add TEX_forward model for analog model of baysparpy + simplify the code and save space + These two files were trimmed and calculated + using the following matlab code in TEX_forward.m of BAYSPAR + https://github.com/jesstierney/BAYSPAR/blob/master/TEX_forward.m + + Ntk = 20000; + load('alpha_samples.mat', 'alpha_samples') + alpha_samples=[alpha_samples.field]; + alpha_samples=alpha_samples(:,end-Ntk+1:end); + load('beta_samples.mat') + beta_samples=[beta_samples.field]; + beta_samples=beta_samples(:,end-Ntk+1:end); + save('alpha_samples','alpha_samples'); + save('beta_samples','beta_samples'); + +""" +import os.path +from copy import deepcopy +from pkgutil import get_data +from io import BytesIO +import attr +import numpy as np +from scipy.io import loadmat + + +TRANSLATE_VAR = {'sst': 'SST', 'subt': 'subT'} + + +def get_matlab_resource(resource, package='bayspar', **kwargs): + """Read flat MATLAB files as package resources, output for Numpy""" + with BytesIO(get_data(package, resource)) as fl: + data = loadmat(fl, **kwargs) + return data + + +def read_draws(flstr, drawtype): + """Grab single squeezed array from package resources + example: + alpha_samples_comp=read_draws('alpha_samples_comp.mat', 'sst') + """ + drawtype = drawtype.lower() + varstr = TRANSLATE_VAR[drawtype] + if flstr[0:10] == 'Data_Input': + var_template = 'observations/Data_Input_SpatAg_{0}.mat' + # example: + # resource_str = 'observations/Data_Input_SpatAg_{0}.mat'.format('SST') + resource_str = var_template.format(varstr) + var = get_matlab_resource(resource_str, squeeze_me=True) # return a dict + Data_Input = var['Data_Input'] # return a numpy + # print(Data_Input.dtype) # to show dtype + # data bayspar wants include: + # Data_Input['Locs'] + # Data_Input['Target_Stack'] + # Data_Input['Inds_Stack'] + return Data_Input + + else: + var_template = 'modelparams/Output_SpatAg_{0}/{1}' + # example: + # resource_str = 'modelparams/Output_SpatAg_{0}/{1}'.format('SST', 'alpha_samples_comp.mat') + resource_str = var_template.format(varstr, flstr) + varstr_full = os.path.splitext(flstr)[0] + var = get_matlab_resource(resource_str, squeeze_me=True) + return var[varstr_full] + + +@attr.s +class Draws: + """Spatially-aware modelparams draws + """ + alpha_samples_comp = attr.ib() + beta_samples_comp = attr.ib() + tau2_samples = attr.ib() + locs_comp = attr.ib() + _half_grid_space = attr.ib(default=10) + + def _index_near(self, lat, lon): + """Get gridpoint index nearest a lat lon + """ + if not (-90 <= lat <= 90) or not (-180 < lon <= 180): + raise BadLatlonError(tuple([lat, lon])) + + lon_adiff = np.abs(self.locs_comp[:, 0] - lon) <= self._half_grid_space + lat_adiff = np.abs(self.locs_comp[:, 1] - lat) <= self._half_grid_space + return np.where(lon_adiff & lat_adiff) + + def find_nearest_latlon(self, lat, lon): + """Find draws gridpoint nearest a given lat lon + """ + idx = self._index_near(lat, lon) + # Squeeze and flip so returns as latlon, not lonlat. + latlon = self.locs_comp[idx].squeeze()[::-1] + return latlon + + def find_alphabeta_near(self, lat, lon): + """Find alpha and beta samples nearest a given location + """ + idx = self._index_near(lat, lon) + alpha_select = self.alpha_samples_comp[idx].squeeze() + beta_select = self.beta_samples_comp[idx].squeeze() + return alpha_select, beta_select + +# NEW class Draws_analog by Mingsong Li PennState +@attr.s +class Draws_analog: + """Spatially-aware modelparams draws + """ + alpha_samples = attr.ib() + beta_samples = attr.ib() + data_input = attr.ib() + tau2_samples = attr.ib() + locs_comp = attr.ib() + _half_grid_space = attr.ib(default=10) + + # number of big grids: + #N_bg = data_input['Locs'][0,0].shape[0] + # + #for kk in range(0,N_bg): + # # find the SST obs corresponding to this index location: + # vals=Data_Input['Target_Stack'][Data_Input['Inds_Stack'] == kk] + # # if the mean of vals in the big grids within tolerance, add it to inder_g + # if np.mean(vals) >= (np.mean(t)-stol) && mean(vals)<=(mean(t)+stol) + # inder_g = [inder_g; kk] + + +class BadLatlonError(Exception): + """Raised when latitude or longitude is outside (-90, 90) or (-180, 180) + + Parameters + ---------- + latlon : tuple + The bad (lat, lon) values. + """ + def __init__(self, latlon): + self.latlon = latlon + + +draws_sst = Draws(alpha_samples_comp=read_draws('alpha_samples_comp.mat', 'sst'), + beta_samples_comp=read_draws('beta_samples_comp.mat', 'sst'), + tau2_samples=read_draws('tau2_samples.mat', 'sst'), + locs_comp=read_draws('Locs_Comp.mat', 'sst')) + + +draws_subt = Draws(alpha_samples_comp=read_draws('alpha_samples_comp.mat', 'subt'), + beta_samples_comp=read_draws('beta_samples_comp.mat', 'subt'), + tau2_samples=read_draws('tau2_samples.mat', 'subt'), + locs_comp=read_draws('Locs_Comp.mat', 'subt')) + + +draws_sst_analog = Draws_analog(alpha_samples=read_draws('alpha_samples.mat', 'sst'), + beta_samples=read_draws('beta_samples.mat', 'sst'), + data_input = read_draws('Data_Input_SpatAg_SST','sst'), + tau2_samples=read_draws('tau2_samples.mat', 'sst'), + locs_comp=read_draws('Locs_Comp.mat', 'sst')) + + +draws_subt_analog = Draws_analog(alpha_samples=read_draws('alpha_samples.mat', 'subt'), + beta_samples=read_draws('beta_samples.mat', 'subt'), + data_input = read_draws('Data_Input_SpatAg_subT','subt'), + tau2_samples=read_draws('tau2_samples.mat', 'subt'), + locs_comp=read_draws('Locs_Comp.mat', 'subt')) + +def get_draws(drawtype): + """Get Draws instance for a draw type + """ + if drawtype == 'sst': + return deepcopy(draws_sst) + elif drawtype == 'subt': + return deepcopy(draws_subt) + +# for analog model of get_draws +def get_draws_analog(drawtype): + """Get Draws instance for a draw type + for the analog model""" + if drawtype == 'sst': + return deepcopy(draws_sst_analog) + elif drawtype == 'subt': + return deepcopy(draws_subt_analog) + \ No newline at end of file diff --git a/bayspar/tests/test_predict4.py b/bayspar/tests/test_predict4.py new file mode 100644 index 0000000..33fb072 --- /dev/null +++ b/bayspar/tests/test_predict4.py @@ -0,0 +1,349 @@ +import pytest +import numpy as np +import attr +import attr.validators as av +from tqdm import tqdm +import sys + +from bayspar.utils import target_timeseries_pred +from bayspar.modelparams import get_draws +from bayspar.observations import get_seatemp, get_tex + + +@attr.s() +class Prediction: + """MCMC prediction + + Parameters + ---------- + ensemble : ndarray + Ensemble of predictions. A 2d array (nxm) for n predictands and m + ensemble members. + temptype : str + Type of sea temperature used for prediction. + latlon : tuple or None, optional + Optional tuple of the site location (lat, lon). + prior_mean : float or None, optional + Prior mean used for the prediction. + prior_std : float or None, optional + Prior sample standard deviation used for the prediction. + modelparam_gridpoints : list of tuples or None, optional + A list of one or more (lat, lon) points used to collect + spatially-sensitive model parameters. + analog_gridpoints : list of tuples or None, optional + A list of one or more (lat, lon) points used for an analog prediction. + """ + temptype = attr.ib() + ensemble = attr.ib(validator=av.optional(av.instance_of(np.ndarray))) + latlon = attr.ib(default=None, + validator=av.optional(av.instance_of(tuple))) + prior_mean = attr.ib(default=None) + prior_std = attr.ib(default=None) + modelparam_gridpoints = attr.ib(default=None, + validator=av.optional(av.instance_of(list))) + analog_gridpoints = attr.ib(default=None, + validator=av.optional(av.instance_of(list))) + + def percentile(self, q=None, interpolation='nearest'): + """Compute the qth ranked percentile from ensemble members. + + Parameters + ---------- + q : float ,sequence of floats, or None, optional + Percentiles (i.e. [0, 100]) to compute. Default is 5%, 50%, 95%. + interpolation : str, optional + Passed to numpy.percentile. Default is 'nearest'. + + Returns + ------- + perc : ndarray + A 2d (nxm) array of floats where n is the number of predictands in + the ensemble and m is the number of percentiles ('len(q)'). + """ + if q is None: + q = [5, 50, 95] + q = np.array(q, dtype=np.float64, copy=True) + + # Because analog ensembles have 3 dims + target_axis = list(range(self.ensemble.ndim))[1:] + + perc = np.percentile(self.ensemble, q=q, axis=target_axis, + interpolation=interpolation) + return perc.T + +def predict_tex_analog(seatemp, lat, lon, temptype, stol, nens=5000): + """Predict TEX86 from sea temperature analog model + TO BE REVISED + Parameters + ---------- + seatemp : ndarray + n-length array of sea temperature observations (°C) from a single location. + lat : float Site latitude from -90 to 90. + lon : float Site longitude from -180 to 180. + temptype : str Type of sea temperature used. Either 'sst' for sea-surface or 'subt'. + stol: float search tolerance in seatemp units (required for analog mode) + nens : int Size of MCMC ensemble draws to use for calculation. + Returns + ------- + output : Prediction + Raises + ------ + EnsembleSizeError + """ + draws_analog = get_draws_analog(temptype) + # number of big grids: + N_bg = draws_analog.data_input['Locs'][0,0].shape[0] + + nd = len(seatemp) + t = seatemp + + inder_g = np.empty((0,1), int) + + for kk in range(0,N_bg): + # find the SST obs corresponding to this index location: + vals=Data_Input['Target_Stack'][Data_Input['Inds_Stack'] == kk] + # if the mean of vals in the big grids within tolerance, add it to inder_g + if np.mean(vals) >= (np.mean(t)-stol) && np.mean(vals)<=(np.mean(t)+stol) : + #inder_g = [inder_g; kk] + inder_g = np.append(inder_g, np.array([[kk]]), axis=0) + if inder_g.size == 0: + print('No analogs were found. Make your search tolerance wider.') + sys.exit('No analogs were found. Make your search tolerance wider.') + + + alpha_samples = draws_analog.alpha_samples[inder_g] + beta_samples = draws_analog.beta_samples[inder_g] + tau2_samples = + # reshape + alpha_samples = + beta_samples = + # downsample to 5000 + iters=len(alpha_samples) + ds=round(iters/5000) + alpha_samples = + beta_samples = + tau2_samples = + + + # numpy empty storing tex data + tex = np.empty((nd, nens)) + + # estimate tex using given alpha_samples_comp, beta_samples_comp, and tau2_samples + for i in range(nens): + tau2_now = tau2_samples[i] + beta_now = beta_samples_comp[i] + alpha_now = alpha_samples_comp[i] + tex[:, i] = np.random.normal(seatemp * beta_now + alpha_now, + np.sqrt(tau2_now)) + + output = Prediction(ensemble=tex, + temptype=temptype) + return output + + +def predict_tex(seatemp, lat, lon, temptype, nens=5000): + """Predict TEX86 from sea temperature + + Parameters + ---------- + seatemp : ndarray + n-length array of sea temperature observations (°C) from a single + location. + lat : float + Site latitude from -90 to 90. + lon : float + Site longitude from -180 to 180. + temptype : str + Type of sea temperature used. Either 'sst' for sea-surface or 'subt'. + nens : int + Size of MCMC ensemble draws to use for calculation. + + Returns + ------- + output : Prediction + + Raises + ------ + EnsembleSizeError + """ + draws = get_draws(temptype) + + nd = len(seatemp) + ntk = draws.alpha_samples_comp.shape[1] + if ntk < nens: + raise EnsembleSizeError(ntk, nens) + + alpha_samples_comp, beta_samples_comp = draws.find_alphabeta_near(lat=lat, + lon=lon) + tau2_samples = draws.tau2_samples + + grid_latlon = draws.find_nearest_latlon(lat=lat, lon=lon) + + tex = np.empty((nd, nens)) + for i in range(nens): + tau2_now = tau2_samples[i] + beta_now = beta_samples_comp[i] + alpha_now = alpha_samples_comp[i] + tex[:, i] = np.random.normal(seatemp * beta_now + alpha_now, + np.sqrt(tau2_now)) + + output = Prediction(ensemble=tex, + temptype=temptype, + latlon=(lat, lon), + modelparam_gridpoints=[tuple(grid_latlon)]) + return output + + +def predict_seatemp(tex, lat, lon, prior_std, temptype, prior_mean=None, nens=5000): + """Predict sea temperature with TEX86 + + Parameters + ---------- + tex : ndarray + n-length array of TEX86 observations from a single location. + lat : float + Site latitude from -90 to 90. + lon : float + Site longitude from -180 to 180. + prior_std : float + Prior standard deviation for sea temperature (°C). + temptype : str + Type of sea temperature desired. Either 'sst' for sea-surface or 'subt'. + prior_mean : float or None, optional + Prior mean for sea temperature (°C). If 'None', the prior mean is found + by searching for a "close" value in observed sea temperature records. + nens : int + Size of MCMC ensemble draws to use for calculation. + + Returns + ------- + output : Prediction + + Raises + ------ + EnsembleSizeError + """ + draws = get_draws(temptype) + obs = get_seatemp(temptype) + + nd = len(tex) + ntk = draws.alpha_samples_comp.shape[1] + if ntk < nens: + raise EnsembleSizeError(ntk, nens) + + if prior_mean is None: + close_obs, close_dist = obs.get_close_obs(lat=lat, lon=lon) + prior_mean = close_obs.mean() + + alpha_samples_comp, beta_samples_comp = draws.find_alphabeta_near(lat=lat, lon=lon) + tau2_samples = draws.tau2_samples + + grid_latlon = draws.find_nearest_latlon(lat=lat, lon=lon) + + prior_par = {'mu': np.ones(nd) * prior_mean, + 'inv_cov': np.eye(nd) * prior_std ** -2} + + preds = np.empty((nd, nens)) + for jj in range(nens): + preds[:, jj] = target_timeseries_pred(alpha_now=alpha_samples_comp[jj], + beta_now=beta_samples_comp[jj], + tau2_now=tau2_samples[jj], + proxy_ts=tex, + prior_pars=prior_par) + # TODO(brews): Consider a progress bar for this loop. + + output = Prediction(ensemble=preds, + temptype=temptype, + latlon=(lat, lon), + prior_mean=prior_mean, + prior_std=prior_std, + modelparam_gridpoints=[tuple(grid_latlon)]) + return output + + +def predict_seatemp_analog(tex, prior_std, temptype, search_tol, prior_mean=None, nens=5000, progressbar=True): + """Predict sea temperature with TEX86, using the analog method + + Parameters + ---------- + tex : ndarray + n-length array of TEX86 observations from a single location. + prior_std : float + Prior standard deviation for sea temperature (°C). + temptype : str + Type of sea temperature desired. Either 'sst' for sea-surface or 'subt'. + search_tol: float + Tolerance for finding analog locations. Comparison is between the mean + of dats and the mean tex value within each large gridcell. + prior_mean : float + Prior mean for sea temperature (°C). + nens : int + Size of MCMC ensemble draws to use for calculation. + progressbar: bool + Whether or not to display a progress bar on the command line. The bar + shows how many analogs have been completed. + + Returns + ------- + output : Prediction + + Raises + ------ + EnsembleSizeError + """ + draws = get_draws(temptype) + tex_obs = get_tex(temptype) + + nd = len(tex) + ntk = draws.alpha_samples_comp.shape[1] + if ntk < nens: + raise EnsembleSizeError(ntk, nens) + + latlon_match, val_match = tex_obs.find_within_tolerance(x=tex.mean(), + tolerance=search_tol) + n_locs_g = len(latlon_match) + + prior_par = {'mu': np.ones(nd) * prior_mean, + 'inv_cov': np.eye(nd) * prior_std ** -2} + + n_latlon_matches = len(latlon_match) + indices = range(n_latlon_matches) + + if progressbar: + indices = tqdm(indices, total=n_latlon_matches) + + preds = np.empty((nd, n_locs_g, nens)) + for kk in indices: + latlon = latlon_match[kk] + alpha_samples, beta_samples = draws.find_alphabeta_near(*latlon) + for jj in range(nens): + a_now = alpha_samples[jj] + b_now = beta_samples[jj] + t2_now = draws.tau2_samples[jj] + preds[:, kk, jj] = target_timeseries_pred(alpha_now=a_now, + beta_now=b_now, + tau2_now=t2_now, + proxy_ts=tex, + prior_pars=prior_par) + + output = Prediction(ensemble=preds, + temptype=temptype, + prior_mean=prior_mean, + prior_std=prior_std, + analog_gridpoints=latlon_match) + return output + + +class EnsembleSizeError(Exception): + """Raised when user requests too large an ensemble for prediction + + Parameters + ---------- + available_size : int + The available ensemble size. + requested_size : int + The user-requested ensemble size. + """ + def __init__(self, available_size, requested_size): + self.available_size = available_size + self.requested_size = requested_size diff --git a/setup.py b/setup.py index bebd815..1c9776b 100644 --- a/setup.py +++ b/setup.py @@ -8,14 +8,15 @@ def readme(): setup( name='baysparpy', - version='0.0.3', + version='0.0.5', description='An Open Source Python package for TEX86 calibration', long_description=readme(), license='GPLv3', - author='S. Brewster Malevich', - author_email='malevich@email.arizona.edu', - url='https://github.com/brews/baysparpy', + author='S. Brewster Malevich, Mingsong Li', + author_email='malevich@email.arizona.edu, msli@pku.edu.cn', + url='https://github.com/mingsongli/baysparpy', + # rl='https://github.com/brews/baysparpy', # See https://pypi.python.org/pypi?%3Aaction=list_classifiers classifiers=[ 'Development Status :: 3 - Alpha',