Source code for eqcorrscan.utils.synth_seis

"""
Early development functions to do **very** basic simulations of seismograms \
to be used as general matched-filter templates and see how well a simple \
model would fit with real data.  Mostly used in EQcorrscan for testing.

:copyright:
    EQcorrscan developers.

:license:
    GNU Lesser General Public License, Version 3
    (https://www.gnu.org/copyleft/lesser.html)
"""
import numpy as np
import logging

from obspy import Stream, Trace, UTCDateTime

from eqcorrscan.utils import clustering


Logger = logging.getLogger(__name__)



[docs]
def seis_sim(sp, amp_ratio=1.5, flength=False, phaseout='all'):
    """
    Generate a simulated seismogram from a given S-P time.

    Will generate spikes separated by a given S-P time, which are then
    convolved with a decaying sine function.  The P-phase is simulated by a
    positive spike of value 1, the S-arrival is simulated by a decaying
    boxcar of maximum amplitude 1.5.  These amplitude ratios can be altered by
    changing the amp_ratio, which is the ratio S amplitude:P amplitude.

    .. note::
        In testing this can achieve 0.3 or greater cross-correlations with
        data.

    :type sp: int
    :param sp: S-P time in samples
    :type amp_ratio: float
    :param amp_ratio: S:P amplitude ratio
    :type flength: int
    :param flength: Fixed length in samples, defaults to False
    :type phaseout: str
    :param phaseout:
        Either 'P', 'S' or 'all', controls which phases to cut around, defaults
        to 'all'. Can only be used with 'P' or 'S' options if flength
        is set.

    :returns: Simulated data.
    :rtype: :class:`numpy.ndarray`
    """
    if flength and 2.5 * sp < flength and 100 < flength:
        additional_length = flength
    elif 2.5 * sp < 100.0:
        additional_length = 100
    else:
        additional_length = 2.5 * sp
    synth = np.zeros(int(sp + 10 + additional_length))
    # Make the array begin 10 samples before the P
    # and at least 2.5 times the S-P samples after the S arrival
    synth[10] = 1.0  # P-spike fixed at 10 samples from start of window
    # The length of the decaying S-phase should depend on the SP time,\
    # Some basic estimations suggest this should be atleast 10 samples\
    # and that the coda should be about 1/10 of the SP time
    S_length = 10 + int(sp // 3)
    S_spikes = np.arange(amp_ratio, 0, -(amp_ratio / S_length))
    # What we actually want, or what appears better is to have a series of\
    # individual spikes, of alternating polarity...
    for i in range(len(S_spikes)):
        if i in np.arange(1, len(S_spikes), 2):
            S_spikes[i] = 0
        if i in np.arange(2, len(S_spikes), 4):
            S_spikes[i] *= -1
    # Put these spikes into the synthetic
    synth[10 + sp:10 + sp + len(S_spikes)] = S_spikes
    # Generate a rough damped sine wave to convolve with the model spikes
    sine_x = np.arange(0, 10.0, 0.5)
    damped_sine = np.exp(-sine_x) * np.sin(2 * np.pi * sine_x)
    # Convolve the spike model with the damped sine!
    synth = np.convolve(synth, damped_sine)
    # Normalize snyth
    synth = synth / np.max(np.abs(synth))
    if not flength:
        return synth
    else:
        if phaseout in ['all', 'P']:
            synth = synth[0:flength]
        elif phaseout == 'S':
            synth = synth[sp:]
            if len(synth) < flength:
                # If this is too short, pad
                synth = np.append(synth, np.zeros(flength - len(synth)))
            else:
                synth = synth[0:flength]
        return synth




[docs]
def SVD_sim(sp, lowcut, highcut, samp_rate,
            amp_range=np.arange(-10, 10, 0.01)):
    """
    Generate basis vectors of a set of simulated seismograms.

    Inputs should have a range of S-P amplitude ratios, in theory to simulate \
    a range of focal mechanisms.

    :type sp: int
    :param sp: S-P time in seconds - will be converted to samples according \
        to samp_rate.
    :type lowcut: float
    :param lowcut: Low-cut for bandpass filter in Hz
    :type highcut: float
    :param highcut: High-cut for bandpass filter in Hz
    :type samp_rate: float
    :param samp_rate: Sampling rate in Hz
    :type amp_range: numpy.ndarray
    :param amp_range: Amplitude ratio range to generate synthetics for.

    :returns: set of output basis vectors
    :rtype: :class:`numpy.ndarray`
    """
    # Convert SP to samples
    sp = int(sp * samp_rate)
    # Scan through a range of amplitude ratios
    synthetics = [Stream(Trace(seis_sim(sp, a))) for a in amp_range]
    for st in synthetics:
        for tr in st:
            tr.stats.station = 'SYNTH'
            tr.stats.channel = 'SH1'
            tr.stats.sampling_rate = samp_rate
            tr.filter('bandpass', freqmin=lowcut, freqmax=highcut)
    # We have a list of obspy Trace objects, we can pass this to EQcorrscan's
    # SVD functions
    U, s, V, stachans = clustering.svd(synthetics)
    return U, s, V, stachans




[docs]
def template_grid(stations, nodes, travel_times, phase, PS_ratio=1.68,
                  samp_rate=100, flength=False, phaseout='all'):
    """
    Generate a group of synthetic seismograms for a grid of sources.

    Used to simulate phase arrivals from a grid of known sources in a
    three-dimensional model.  Lags must be known and supplied, these can be
    generated from the bright_lights function: read_tt, and resampled to fit
    the desired grid dimensions and spacing using other functions therein.
    These synthetic seismograms are very simple models of seismograms using
    the seis_sim function herein. These approximate body-wave P and S first
    arrivals as spikes convolved with damped sine waves.

    :type stations: list
    :param stations: List of the station names
    :type nodes: list
    :param nodes: List of node locations in (lon,lat,depth)
    :type travel_times: numpy.ndarray
    :param travel_times: Array of travel times where travel_times[i][:] \
        refers to the travel times for station=stations[i], and \
        travel_times[i][j] refers to stations[i] for nodes[j]
    :type phase: str
    :param phase: Can be either 'P' or 'S'
    :type PS_ratio: float
    :param PS_ratio: P/S velocity ratio, defaults to 1.68
    :type samp_rate: float
    :param samp_rate: Desired sample rate in Hz, defaults to 100.0
    :type flength: int
    :param flength: Length of template in samples, defaults to False
    :type phaseout: str
    :param phaseout: Either 'S', 'P', 'all' or 'both', determines which \
        phases to clip around.  'all' Encompasses both phases in one channel, \
        but will return nothing if the flength is not long enough, 'both' \
        will return two channels for each stations, one SYN_Z with the \
        synthetic P-phase, and one SYN_H with the synthetic S-phase.

    :returns: List of :class:`obspy.core.stream.Stream`
    """
    if phase not in ['S', 'P']:
        raise IOError('Phase is neither P nor S')
    # Initialize empty list for templates
    templates = []
    # Loop through the nodes, for every node generate a template!
    for i, node in enumerate(nodes):
        st = []  # Empty list to be filled with synthetics
        # Loop through stations
        for j, station in enumerate(stations):
            tr = Trace()
            tr.stats.sampling_rate = samp_rate
            tr.stats.station = station
            tr.stats.channel = 'SYN'
            tt = travel_times[j][i]
            if phase == 'P':
                # If the input travel-time is the P-wave travel-time
                SP_time = (tt * PS_ratio) - tt
                if phaseout == 'S':
                    tr.stats.starttime += tt + SP_time
                else:
                    tr.stats.starttime += tt
            elif phase == 'S':
                # If the input travel-time is the S-wave travel-time
                SP_time = tt - (tt / PS_ratio)
                if phaseout == 'S':
                    tr.stats.starttime += tt
                else:
                    tr.stats.starttime += tt - SP_time
            # Set start-time of trace to be travel-time for P-wave
            # Check that the template length is long enough to include the SP
            if flength and SP_time * samp_rate < flength - 11 \
               and phaseout == 'all':
                tr.data = seis_sim(sp=int(SP_time * samp_rate), amp_ratio=1.5,
                                   flength=flength, phaseout=phaseout)
                st.append(tr)
            elif flength and phaseout == 'all':
                Logger.warning('Cannot make a bulk synthetic with this fixed '
                               'length for station ' + station)
            elif phaseout == 'all':
                tr.data = seis_sim(sp=int(SP_time * samp_rate), amp_ratio=1.5,
                                   flength=flength, phaseout=phaseout)
                st.append(tr)
            elif phaseout in ['P', 'S']:
                tr.data = seis_sim(sp=int(SP_time * samp_rate), amp_ratio=1.5,
                                   flength=flength, phaseout=phaseout)
                st.append(tr)
            elif phaseout == 'both':
                for _phaseout in ['P', 'S']:
                    _tr = tr.copy()
                    _tr.data = seis_sim(sp=int(SP_time * samp_rate),
                                        amp_ratio=1.5, flength=flength,
                                        phaseout=_phaseout)
                    if _phaseout == 'P':
                        _tr.stats.channel = 'SYN_Z'
                        # starttime defaults to S-time
                        _tr.stats.starttime = _tr.stats.starttime - SP_time
                    elif _phaseout == 'S':
                        _tr.stats.channel = 'SYN_H'
                    st.append(_tr)
        templates.append(Stream(st))
        # Stream(st).plot(size=(800,600))
    return templates




[docs]
def generate_synth_data(nsta, ntemplates, nseeds, samp_rate, t_length,
                        max_amp, max_lag, phaseout="all", jitter=0,
                        noise=True, same_phase=False):
    """
    Generate a synthetic dataset to be used for testing.

    This will generate both templates and data to scan through.
    Templates will be generated using the utils.synth_seis functions.
    The day of data will be random noise, with random signal-to-noise
    ratio copies of the templates randomly seeded throughout the day.
    It also returns the seed times and signal-to-noise ratios used.

    :type nsta: int
    :param nsta: Number of stations to generate data for < 15.
    :type ntemplates: int
    :param ntemplates: Number of templates to generate, will be generated \
        with random arrival times.
    :type nseeds: int
    :param nseeds: Number of copies of the template to seed within the \
        day of noisy data for each template.
    :type samp_rate: float
    :param samp_rate: Sampling rate to use in Hz
    :type t_length: float
    :param t_length: Length of templates in seconds.
    :type max_amp: float
    :param max_amp: Maximum signal-to-noise ratio of seeds.
    :param max_lag: Maximum lag time in seconds (randomised).
    :type max_lag: float
    :type jitter: int
    :param jitter:
        Random range to allow arrival shifts for seeded phases (samples)
    :type noise: bool
    :param noise: Set to False to give noise-free data.
    :type same_phase: bool
    :param same_phase: Whether to enforce all positive repeats (True) or not.

    :returns: Templates: List of :class:`obspy.core.stream.Stream`
    :rtype: list
    :returns: Data: :class:`obspy.core.stream.Stream` of seeded noisy data
    :rtype: :class:`obspy.core.stream.Stream`
    :returns: Seeds: dictionary of seed SNR and time with time in samples.
    :rtype: dict
    """
    jitter = abs(jitter)
    # Generate random arrival times
    t_times = np.abs(np.random.random([nsta, ntemplates])) * max_lag
    # Generate random node locations - these do not matter as they are only
    # used for naming
    lats = np.random.random(ntemplates) * 90.0
    lons = np.random.random(ntemplates) * 90.0
    depths = np.abs(np.random.random(ntemplates) * 40.0)
    nodes = zip(lats, lons, depths)
    # Generating a 5x3 array to make 3 templates
    stations = ['ALPH', 'BETA', 'GAMM', 'KAPP', 'ZETA', 'BOB', 'MAGG',
                'ALF', 'WALR', 'ALBA', 'PENG', 'BANA', 'WIGG', 'SAUS',
                'MALC']
    Logger.debug(nodes)
    Logger.debug(t_times)
    Logger.debug(stations[0:nsta])
    templates = template_grid(stations=stations[0:nsta], nodes=nodes,
                              travel_times=t_times, phase='S',
                              samp_rate=samp_rate,
                              flength=int(t_length * samp_rate),
                              phaseout=phaseout)
    for template in templates:
        Logger.debug(template)
    # Now we want to create a day of synthetic data
    seeds = []
    data = templates[0].copy()  # Copy a template to get the correct length
    # and stats for data, we will overwrite the data on this copy
    for tr in data:
        tr.data = np.zeros(86400 * int(samp_rate))
        # Set all the traces to have a day of zeros
        tr.stats.starttime = UTCDateTime(0)
    for i, template in enumerate(templates):
        impulses = np.zeros(86400 * int(samp_rate))
        # Generate a series of impulses for seeding
        # Need three separate impulse traces for each of the three templates,
        # all will be convolved within the data though.
        impulse_times = np.random.randint(86400 * int(samp_rate),
                                          size=nseeds)
        impulse_amplitudes = np.random.randn(nseeds) * max_amp
        if same_phase:
            impulse_amplitudes = np.abs(impulse_amplitudes)
        # Generate amplitudes up to maximum amplitude in a normal distribution
        seeds.append({'SNR': impulse_amplitudes,
                      'time': impulse_times})
        for j in range(nseeds):
            impulses[impulse_times[j]] = impulse_amplitudes[j]
        # We now have one vector of impulses, we need nsta numbers of them,
        # shifted with the appropriate lags
        mintime = min([template_tr.stats.starttime
                       for template_tr in template])
        for j, template_tr in enumerate(template):
            offset = int((template_tr.stats.starttime - mintime) * samp_rate)
            if jitter > 0:
                offset += np.random.randint(-jitter, jitter)
            pad = np.zeros(abs(offset))
            if offset >= 0:
                tr_impulses = np.append(pad, impulses)[0:len(impulses)]
            elif offset < 0:
                tr_impulses = np.append(impulses, pad)[-len(impulses):]
            # Convolve this with the template trace to give the daylong seeds
            data[j].data += np.convolve(
                tr_impulses, template_tr.data)[0:len(impulses)]
    # Add the noise
    if noise:
        for tr in data:
            noise_array = np.random.randn(86400 * int(samp_rate))
            tr.data += noise_array / max(noise_array)
    return templates, data, seeds



if __name__ == "__main__":
    import doctest
    doctest.testmod()