Skip to content

Commit

Permalink
Merge pull request #117 from hvasbath/nonT_2d
Browse files Browse the repository at this point in the history
covariance, models.geodetic introduce 2d spatial noise estimation
  • Loading branch information
hvasbath authored Feb 14, 2023
2 parents 5606e9b + 99acc71 commit 80f0b07
Show file tree
Hide file tree
Showing 27 changed files with 1,181 additions and 437 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ All notable changes to BEAT will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).

## [1.2.4] 14 February 2023
Contributors: Hannes Vasyura-Bathke @hvasbath

### Added
- covariance.GeodeticNoiseAnalyser: parametrize the residual noise allowing for non-Toeplitz/import
- plotting.geodetic/seismic added standardized residual histograms to residuals
- plotting.geodetic: new plot "geodetic_covariances"
- plotting.geodetic: add "individual" plot_projection to scene_fits to show stdz residuals
- plotting.seismic: fuzzy_bb, lune, hudson and fuzzy_mt_decomp support n_sources > 1
- plotting.seismic: allow plotting of fuzzy_bb for RectangularSource

### Fixed
- plotting.marginals: stage_posteriors fixed axis unification and erroneous histogram plotting
- docs: short_installation fix python version to 3.8
- heart: pol_synthetics allow for RectangularSource
- covariance: estimation of variance on amplitude spectra instead of complex spectra


## [1.2.3] 20 November 2022
Contributors: Hannes Vasyura-Bathke @hvasbath
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ If you came looking for the beat package calculating internet time you can find

Based on pyrocko, theano and pymc3

14 February 2023
Version 1.2.4 is released. Details in the [changelog](https://github.com/hvasbath/beat/blob/master/CHANGELOG.md).

20 November 2022
Version 1.2.3 is released. Bugfix-release.

Expand Down
2 changes: 1 addition & 1 deletion beat/apps/beat.py
Original file line number Diff line number Diff line change
Expand Up @@ -1212,7 +1212,7 @@ def setup(parser):
if options.calc_derived:
varnames, shapes = pc.get_derived_variables_shapes()
rtrace.add_derived_variables(varnames, shapes)
splitinds = cumsum([shape[0] for shape in shapes[:-1]])
splitinds = range(1, len(varnames))

rtrace.setup(draws=draws, chain=-1, overwrite=True)

Expand Down
24 changes: 20 additions & 4 deletions beat/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from theano import config as tconfig

from beat import utility
from beat.covariance import available_noise_structures
from beat.covariance import available_noise_structures, available_noise_structures_2d
from beat.heart import (
ArrivalTaper,
Filter,
Expand Down Expand Up @@ -254,6 +254,7 @@ def multi_event_stations_name(nevent=0):
_quantity_choices = ["displacement", "velocity", "acceleration"]
_interpolation_choices = ["nearest_neighbor", "multilinear"]
_structure_choices = available_noise_structures()
_structure_choices_2d = available_noise_structures_2d()
_mode_choices = [geometry_mode_str, ffi_mode_str]
_regularization_choices = ["laplacian", "none"]
_correlation_function_choices = ["nearest_neighbor", "gaussian", "exponential"]
Expand Down Expand Up @@ -691,6 +692,21 @@ class SeismicNoiseAnalyserConfig(Object):
)


class GeodeticNoiseAnalyserConfig(Object):

structure = StringChoice.T(
choices=_structure_choices_2d,
default="import",
help="Determines data-covariance matrix structure."
" Choices: %s" % utility.list2string(_structure_choices_2d),
)
max_dist_perc = Float.T(
default=0.2,
help="Distance in decimal percent from maximum distance in scene to use for "
"non-Toeplitz noise estimation",
)


class SeismicConfig(Object):
"""
Config for seismic data optimization related parameters.
Expand Down Expand Up @@ -1075,9 +1091,9 @@ class GeodeticConfig(Object):
default={"SAR": SARDatasetConfig.D(), "GNSS": GNSSDatasetConfig.D()},
help="Types of geodetic data, i.e. SAR, GNSS, with their configs",
)
calc_data_cov = Bool.T(
default=True,
help="Flag for calculating the data covariance matrix, " 'outsourced to "kite"',
noise_estimator = GeodeticNoiseAnalyserConfig.T(
default=GeodeticNoiseAnalyserConfig.D(),
help="Determines the structure of the data-covariance matrix.",
)
interpolation = StringChoice.T(
choices=_interpolation_choices,
Expand Down
195 changes: 186 additions & 9 deletions beat/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,12 @@
from pymc3 import Point
from pyrocko import gf, trace
from scipy.linalg import toeplitz
from scipy.spatial import KDTree
from theano import config as tconfig

from beat import heart
from beat.utility import ensure_cov_psd, list2string, running_window_rms
from beat.utility import ensure_cov_psd, list2string, running_window_rms, distances


logger = logging.getLogger("covariance")

Expand Down Expand Up @@ -91,10 +93,20 @@ def ones_data_covariance(n, dt=None, tzero=None):
}


NoiseStructureCatalog2d = {
"import": ones_data_covariance,
"non-toeplitz": ones_data_covariance,
}


def available_noise_structures():
return list(NoiseStructureCatalog.keys())


def available_noise_structures_2d():
return list(NoiseStructureCatalog2d.keys())


def import_data_covariance(data_trace, arrival_taper, sample_rate, domain="time"):
"""
Use imported covariance matrixes and check size consistency with taper.
Expand Down Expand Up @@ -141,14 +153,98 @@ def import_data_covariance(data_trace, arrival_taper, sample_rate, domain="time"
return data_cov


class GeodeticNoiseAnalyser(object):
"""
Geodetic noise analyser
Parameters
----------
structure : string
either, import, variance, non-toeplitz
events : list
of :class:`pyrocko.meta.Event` reference event(s) from catalog
"""

def __init__(
self,
config,
events=None,
):

avail = available_noise_structures_2d()

if config.structure not in avail:
raise AttributeError(
'Selected noise structure "%s" not supported! Implemented'
" noise structures: %s" % (structure, list2string(avail))
)

self.events = events
self.config = config

def get_structure(self, dataset):
return NoiseStructureCatalog2d[self.config.structure](dataset.ncoords)

def do_import(self, dataset):
if dataset.covariance.data is not None:
return dataset.covariance.data
else:
raise ValueError(
"Data covariance for dataset %s needs to be defined!" % dataset.name
)

def do_non_toeplitz(self, dataset, result):

if dataset.typ == "SAR":
dataset.update_local_coords(self.events[0])
coords = num.vstack([dataset.east_shifts, dataset.north_shifts]).T

scaling = non_toeplitz_covariance_2d(
coords, result.processed_res, max_dist_perc=self.config.max_dist_perc
)
else:
scaling = dataset.covariance.data

if num.isnan(scaling).any():
raise ValueError(
"Estimated Non-Toeplitz covariance matrix for dataset %s contains Nan! "
"Please increase 'max_dist_perc'!" % dataset.name
)

return scaling

def get_data_covariance(self, dataset, result=None):
"""
Estimated data covariances of seismic traces
Parameters
----------
datasets
results
Returns
-------
:class:`numpy.ndarray`
"""

covariance_structure = self.get_structure(dataset)

if self.config.structure == "import":
scaling = self.do_import(dataset)
elif self.config.structure == "non-toeplitz":
scaling = self.do_non_toeplitz(dataset, result)

return ensure_cov_psd(scaling * covariance_structure)


class SeismicNoiseAnalyser(object):
"""
Seismic noise analyser
Parameters
----------
structure : string
either identity, exponential, import
either, variance, exponential, import, non-toeplitz
pre_arrival_time : float
in [s], time before P arrival until variance is estimated
engine : :class:`pyrocko.gf.seismosizer.LocalEngine`
Expand All @@ -162,7 +258,7 @@ class SeismicNoiseAnalyser(object):

def __init__(
self,
structure="identity",
structure="variance",
pre_arrival_time=5.0,
engine=None,
events=None,
Expand Down Expand Up @@ -277,11 +373,15 @@ def do_variance_estimate(self, wmap, chop_bounds=None):
nslc_id_str = list2string(ctrace.nslc_id)

if wmap.config.domain == "spectrum":
lower_idx, upper_idx = wmap.get_valid_spectrum_indices(
valid_spectrum_indices = wmap.get_valid_spectrum_indices(
chop_bounds=chop_bounds, pad_to_pow2=True
)
data = ctrace.spectrum(True, 0.05)[1]
data = data[lower_idx:upper_idx]
data = heart.fft_transforms(
[ctrace],
valid_spectrum_indices,
outmode="stacked_traces",
pad_to_pow2=True,
)[0].get_ydata()
else:
data = ctrace.get_ydata()

Expand All @@ -294,7 +394,7 @@ def do_variance_estimate(self, wmap, chop_bounds=None):

scaling = num.nanvar(data)
if num.isfinite(scaling).all():
logger.debug("Variance estimate of %s = %g" % (nslc_id_str, scaling))
logger.info("Variance estimate of %s = %g" % (nslc_id_str, scaling))
scalings.append(scaling)
else:
raise ValueError(
Expand Down Expand Up @@ -651,7 +751,7 @@ def toeplitz_covariance(data, window_size):
Returns
-------
toeplitz : :class:`numpy.ndarray` 2-d, (size data, size data)
toeplitz : :class:`numpy.ndarray` 1-d, (size data)
stds : :class:`numpy.ndarray` 1-d, size data
of running windows
"""
Expand All @@ -663,7 +763,7 @@ def toeplitz_covariance(data, window_size):
def non_toeplitz_covariance(data, window_size):
"""
Get scaled non- Toeplitz covariance matrix, which may be able to account
for non-stationary data-errors.
for non-stationary data-errors. For 1-d data.
Parameters
----------
Expand All @@ -680,6 +780,83 @@ def non_toeplitz_covariance(data, window_size):
return toeplitz * stds[:, num.newaxis] * stds[num.newaxis, :]


def k_nearest_neighbor_rms(coords, data, k=None, max_dist_perc=0.2):
"""
Calculate running rms on irregular sampled 2d spatial data.
Parameters
----------
coords : :class:`numpy.ndarray` 2-d, (size data, n coords-dims)
containing spatial coordinates (east_shifts, north_shifts)
data : :class:`numpy.ndarray` 1-d, (size data)
containing values of physical quantity
k : int
taking k - nearest neighbors for std estimation
max_dist_perc : float
max distance [decimal percent] to select as nearest neighbors
"""

if k and max_dist_perc:
raise ValueError("Either k or max_dist_perc should be defined!")

kdtree = KDTree(coords, leafsize=1)

dists = distances(coords, coords)
r = dists.max() * max_dist_perc

logger.debug("Nearest neighbor distance is: %f", r)

stds = []
for point in coords:
if k is not None:
_, idxs = kdtree.query(point, k=k)
elif r is not None:
idxs = kdtree.query_ball_point(point, r=r)
else:
raise ValueError()

stds.append(num.std(data[idxs], ddof=1))

return num.array(stds)


def toeplitz_covariance_2d(coords, data, max_dist_perc=0.2):
"""
Get Toeplitz banded matrix for given 2d data.
Returns
-------
toeplitz : :class:`numpy.ndarray` 2-d, (size data, size data)
stds : :class:`numpy.ndarray` 1-d, size data
of running windows
max_dist_perc : float
max distance [decimal percent] to select as nearest neighbors
"""
stds = k_nearest_neighbor_rms(coords=coords, data=data, max_dist_perc=max_dist_perc)
coeffs = autocovariance(data / stds)
return toeplitz(coeffs), stds


def non_toeplitz_covariance_2d(coords, data, max_dist_perc):
"""
Get scaled non- Toeplitz covariance matrix, which may be able to account
for non-stationary data-errors. For 2-d geospatial data.
Parameters
----------
data : :class:`numpy.ndarray`
of data to estimate non-stationary error matrix for
max_dist_perc : float
max distance [decimal percent] to select as nearest neighbors
Returns
-------
:class:`numpy.ndarray` (size data, size data)
"""
toeplitz, stds = toeplitz_covariance_2d(coords, data, max_dist_perc)
return toeplitz * stds[:, num.newaxis] * stds[num.newaxis, :]


def init_proposal_covariance(bij, vars, model, pop_size=1000):
"""
Create initial proposal covariance matrix based on random samples
Expand Down
4 changes: 3 additions & 1 deletion beat/ffi/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1130,8 +1130,10 @@ def seis_construct_gf_linear(
flag to overwrite existing linear GF Library
"""

if wavemap.config.domain == "spectrum":
raise TypeError("FFI is currently only supported for time-domain!")

# get starttimes for hypocenter at corner of fault
# TODO: make nsubfaults compatible - should work
st_mins = []
st_maxs = []
for idx, sf in enumerate(fault.iter_subfaults()):
Expand Down
2 changes: 1 addition & 1 deletion beat/ffi/fault.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@
from beat.fast_sweeping import fast_sweep
from beat.heart import velocities_from_pole
from beat.models.laplacian import (
distances,
get_smoothing_operator_correlated,
get_smoothing_operator_nearest_neighbor,
)
from beat.utility import (
Counter,
check_point_keys,
distances,
dump_objects,
find_elbow,
kmtypes,
Expand Down
Loading

0 comments on commit 80f0b07

Please sign in to comment.