Source code for ENSO.diag

''' This module supports the following analysis:

#. Identify El Nino and La Nina events using an SST anomaly index and apply
   threshold and persistence criteria (:py:func:`find_events`,
   :py:func:`find_enso_percentile`, :py:func:`find_enso_threshold`)
#. Compute the skewness of SST anomaly time series
#. Compute the persistence of events (duration)
#. Compute the transition probability of events
#. Compute climatological composite of El Nino or La Nina at a certain strength
   (:py:func:`find_en_pattern`)
#. Compute the number of events peaking in a calendar month (for seasonal
   phase locking analysis; :py:func:`seasonal_locking`,
   :py:func:`seasonal_locking_from_nino34`)

:py:func:`enso_skewness`, :py:func:`enso_duration`, :py:func:`enso_transition2`
, :py:func:`find_enso_percentile` are rewritten from the Matlab code used in
`Choi et al (2013) <http://dx.doi.org/10.1175/JCLI-D-13-00045.1>`_.
:py:func:`find_en_pattern` is used in
`Choi et al (2015) <http://dx.doi.org/10.1175/JCLI-D-15-0211.1>`_.
A few of these functions require the `GeoDAT library </geodat_doc>`_.

'''
import warnings
import itertools

import numpy
import scipy

try:
    import geodat
    _GEODAT_INSTALLED = True
    _IMPORT_GEODAT_ERROR = None
except ImportError:
    _GEODAT_INSTALLED = False
    _IMPORT_GEODAT_ERROR = ImportError("GeoDAT is not installed. "+\
                                "http://kychoi.org/geodat_doc")


[docs]def find_enso_percentile(index, percentile, *args, **kwargs): ''' Find ENSO events using percentiles (i.e. insensitive to time mean) Args: index (numpy.ndarray): ENSO SST anomaly monthly index. Masked array is supported percentile (numeric): percentile beyond which El Nino and La Nina events are identified args and kwargs are fed to :py:func:`find_events` Returns: (dict, dict) each has keys "locs" and "peaks". "locs" contains the index where an event peaks. "peaks" contains the corresponding peak values Example:: >>> warm,cold = find_enso_percentile(nino34,15.) ''' if percentile > 50.: percentile = 100. - percentile if percentile < 0.: raise ValueError("percentile cannot be smaller than zero.") if percentile > 100.: raise ValueError("percentile cannot be bigger than 100.") warm = {} cold = {} if numpy.ma.isMaskedArray(index): if index.mask.any(): # Ignore masked values warm['locs'], warm['peaks'] = find_events(index, '>', numpy.percentile( index[~index.mask], 100-percentile), *args, **kwargs) cold['locs'], cold['peaks'] = find_events(index, '<', numpy.percentile( index[~index.mask], percentile), *args, **kwargs) return warm, cold # No masked values: warm['locs'], warm['peaks'] = find_events(index, '>', numpy.percentile(index, 100-percentile), *args, **kwargs) cold['locs'], cold['peaks'] = find_events(index, '<', numpy.percentile(index, percentile), *args, **kwargs) return warm, cold
[docs]def find_enso_threshold(index, warm_threshold, cold_threshold, *args, **kwargs): ''' Similar to find_enso_percentile but uses threshold to find ENSO events Args: index (numpy.ndarray): ENSO SST anomaly monthly index. Masked array is supported warm_threshold (numeric): Above which El Nino SST anomalies are cold_threshold (numeric): Below which La Nina SST anomalies are args and kwargs are fed to :py:func:`find_events` Returns: (dict, dict) each has keys "locs" and "peaks". "locs" contains the index where an event peaks. "peaks" contains the corresponding peak values ''' warm = {} cold = {} if not isinstance(index, numpy.ma.core.MaskedArray): index = numpy.ma.array(index) warm['locs'], warm['peaks'] = find_events(index, '>', warm_threshold, *args, **kwargs) cold['locs'], cold['peaks'] = find_events(index, '<', cold_threshold, *args, **kwargs) return warm, cold
[docs]def persistence_check(is_selected, per): ''' Filter is_selected where True indicating an active, persistent event Args: is_selected (numpy boolean array) per (int): length persistence required Returns: numpy boolean array same shape as is_selected ''' assert len(is_selected) > 0 assert is_selected.dtype == numpy.bool if not numpy.any(is_selected): return is_selected is_filtered = numpy.zeros_like(is_selected, dtype=numpy.bool) in_event = 0 n_select = 0 i_start = 0 for i, selected in enumerate(is_selected): if selected: n_select += 1 if not in_event: # going from normal to selected i_start = i in_event = 1 else: if in_event and n_select >= per: is_filtered[i_start:i-1] = True i_start = i n_select = 0 in_event = 0 return is_filtered
[docs]def find_events(index, operator, threshold, per=5, window=[-3, 3]): ''' Return the locations of ENSO warm/cold event peaks for a given index. Args: index (numpy 1d array): ENSO SST anomaly operator (str): ">" (index greater than threshold) or "<" (index smaller then threshold) threshold (numeric): threshold for event definition per (int): minimum persistence for index >/< threshold (default=5, unit is consistent with the array grid) window (iterable): range around the event peak within which the peak has to be a global minima/maxima; length = 2 (default=[-3,3]) Returns: (pklocs, pks) = (location in the input array, values of extrema) ''' if operator == '>': argpeak_op = numpy.argmax comp_op = numpy.greater peak_op = numpy.max elif operator == '<': argpeak_op = numpy.argmin comp_op = numpy.less peak_op = numpy.min else: raise Exception('operator has to be either > or <') if len(window) != 2: raise ValueError("window must have length=2") locs = numpy.where(comp_op(index, threshold))[0] if len(locs) <= 1: return ([], numpy.array([])) # Find the beginning (starts) and the end (ends) of events jumps = numpy.where(numpy.diff(locs) > 1)[0] starts = numpy.insert(locs[jumps+1], 0, locs[0]) ends = numpy.append(locs[jumps], locs[-1]) # Ignore the chunks that starts from the beginning or ends at the end of the # index if starts[0] == 0: starts = starts[1:] ends = ends[1:] if ends[-1] == len(index)-1: starts = starts[:-1] ends = ends[:-1] # Chunks of the index that exceed the threshold subsets = [index[starts[i]:ends[i]] for i in range(len(starts))] # Find the location of peaks and apply persistence check pklocs = [starts[i]+argpeak_op(subsets[i]) for i in range(len(subsets)) if len(subsets[i]) >= per] # Check for being global extrema within the window pklocs_new = [] local_append = pklocs_new.append for loc in pklocs: window_start = numpy.max([0, loc+window[0]]) window_end = numpy.min([len(index)-1, loc+window[1]]) if index[loc] == peak_op(index[window_start:window_end]): local_append(loc) # I don't think this does anything more than copying pklocs_new to pklocs pklocs = [int(loc) for loc in pklocs_new if loc != False] pks = numpy.array([index[loc].squeeze() for loc in pklocs]) return pklocs, pks
[docs]def find_en_pattern(field, nino34, nino34_mid=0.8, nino34_tole=0.4, do_climo=True, verbose=False): ''' Given a field and Nino3.4 index monthly time series, extract the time at which nino34_mid-nino34_tole < peak nino34 < nino34_mid+nino34_tole; then compute the climatology for these snap shots Args: field (geodat.nc.Variable) nino34 (geodat.nc.Variable): Nino3.4 SST anomaly index nino34_mid (numeric): mid point nino34_tole (numeric): half bin size do_climo (bool): whether a climatology is computed for the composite, default True verbose (bool) Returns: pattern (geodat.nc.Variable) This function requires the library `GeoDAT <http://kychoi.org/geodat_doc>`_ ''' if not _GEODAT_INSTALLED: raise _IMPORT_GEODAT_ERROR if not numpy.allclose(field.getTime(), nino34.getTime()): raise Exception("Expect the time record of nino3.4 and "+\ "the field to be the same") warm, cold = find_enso_percentile(nino34.data, 49.) locs = warm['locs'] + cold['locs'] peaks = numpy.append(warm['peaks'], cold['peaks']) locs = numpy.array(locs)[numpy.abs(peaks - nino34_mid) < nino34_tole] if len(locs) == 0: field_sliced_time = field.getRegion(time=slice(0, 1)) result = geodat.nc.Variable(data=numpy.ma.ones( field_sliced_time.data.shape), parent=field_sliced_time) result.data[:] = numpy.ma.masked warnings.warn("No event found!") return result pattern = field[locs] if do_climo: pattern = geodat.nc.climatology(pattern) pattern.setattr('event_loc', locs.squeeze()) if verbose: print 'Nino 3.4: '+ str(nino34[locs].time_ave().squeeze().data) return pattern
[docs]def compute_duration(nino34, operator, locs, evt_end, remove_merge_event=True): ''' Compute the duration of events counting from the event peak (locations given by locs) until the termination of events (given by the first occurrence of operator(nino34,evt_end)). See `Choi et al (2013) <http://dx.doi.org/10.1175/JCLI-D-13-00045.1>`_ Args: nino34 (numpy array): ENSO SST anomaly index operator (numpy operator): e.g. numpy.less or numpy.greater locs (list of int or numpy array of int): indices of event peaks evt_end (scalar number): value of nino34 when an event is considered as terminated remove_merge_event (bool): remove events that are joined on together due to reintensification Returns: list of int (same length as locs) ''' lengths = [] for iloc in xrange(len(locs)): loc = locs[iloc] after_end = operator(nino34[loc:], evt_end) if after_end.any(): length = numpy.where(after_end)[0][0] if remove_merge_event: # if this is not the last event peak if iloc < len(locs)-1: # Only count the duration if the next event occurs after # the termination if locs[iloc+1]-locs[iloc] > length: lengths.append(length) else: lengths.append(length) return lengths
[docs]def enso_duration(nino34, percentile, thres_std_fraction, per=5, window=[-3, 3]): '''Compute the duration of the warm and cold events Args: nino34 (numpy 1D array): ENSO SST anomalies percentile (numeric): greater than 0 and less than 100 thres_std_fraction (numeric): the fraction times the nino34 standard deviation is used for defining the termination Returns: dict: with keys "warm" and "cold" each contains a list of integers which are the duration of the events ''' warm, cold = find_enso_percentile(nino34, percentile, per, window) warm_end = nino34.std()*thres_std_fraction cold_end = warm_end*-1 duration = {} duration['warm'] = compute_duration(nino34, numpy.less, warm['locs'], warm_end) duration['cold'] = compute_duration(nino34, numpy.greater, cold['locs'], cold_end) duration['warm_locs'] = warm['locs'] duration['cold_locs'] = cold['locs'] # Length of duration['warm'] may not match the length of # duration['warm_locs'] as double counting is taken care of return duration
[docs]def enso_transition2(nino34, percentile, wait_window, thres_std_fraction, per=5, window=[-3, 3]): ''' Compute the transition probabilities. Word for word copy from the Matlab code, which was used in. See `Choi et al (2013) <http://dx.doi.org/10.1175/JCLI-D-13-00045.1>`_ Args: nino34 (numpy 1d array): ENSO SST anomaly index percentile (numeric): greater than 0. and less than 100. wait_window (int): time lapse between an event termination and the next event peak thres_std_fraction (numeric): the fraction times the nino34 standard deviation is used for defining the termination, similar to :py:func:`enso_duration` per (int): see :py:func:`find_events` windows (iterable): see :py:func:`find_events` Returns: transition (dict), indices(dict), warm_indices(list), cold_indices(list) transition= {"warm_cold": probability of warm-to-cold transition, "warm_warm": probability of warm-to-warm transition, "cold_warm":..., "cold_cold":...,} indices= {"ENLN": indices of the peaks of warm events that are followed by a cold event, "ENEN": indices of the peaks of warm events that are followed by a warm event, "LNLN": indices of the peaks of cold events that are followed by a cold event, "LNEN": indices of the peaks of cold events that are followed by a warm event} warm_indices (list) is the list of warm event peak locations; cold_indices (list) is the list of cold event peak locations ''' ENLNind = [] LNENind = [] ENENind = [] LNLNind = [] warm, cold = find_enso_percentile(nino34, percentile, per, window) warm['locs'] = numpy.array(sorted(warm['locs'])) cold['locs'] = numpy.array(sorted(cold['locs'])) warm_end = nino34.std()*thres_std_fraction cold_end = warm_end*-1 for i, iwarm in enumerate(warm['locs']): ended = numpy.where(nino34[iwarm:] < warm_end)[0] if len(ended): # The index where it terminates iterm = iwarm + ended[0] nextcold = numpy.where(cold['locs'] > iwarm)[0] nextwarm = numpy.where(warm['locs'] > iwarm)[0] if len(nextcold) and len(nextwarm): inextcold = cold['locs'][nextcold[0]] inextwarm = warm['locs'][nextwarm[0]] if (inextcold - iterm) <= wait_window: ENLNind.append(iwarm) elif (inextwarm - iterm) <= wait_window: ENENind.append(iwarm) elif len(nextcold): if (cold['locs'][nextcold[0]]-iterm) <= wait_window: ENLNind.append(iwarm) elif len(nextwarm): if (warm['locs'][nextwarm[0]]-iterm) <= wait_window: ENENind.append(iwarm) for i, icold in enumerate(cold['locs']): ended = numpy.where(nino34[icold:] > cold_end)[0] if len(ended): # The index where it terminates iterm = icold + ended[0] nextwarm = numpy.where(warm['locs'] > icold)[0] nextcold = numpy.where(cold['locs'] > icold)[0] if len(nextwarm) and len(nextcold): inextwarm = warm['locs'][nextwarm[0]] inextcold = cold['locs'][nextcold[0]] if (inextwarm - iterm) <= wait_window: LNENind.append(icold) elif (inextcold - iterm) <= wait_window: LNLNind.append(icold) elif len(nextwarm): if (warm['locs'][nextwarm[0]]-iterm) <= wait_window: LNENind.append(icold) elif len(nextcold): if (cold['locs'][nextcold[0]]-iterm) <= wait_window: LNLNind.append(icold) nEN = len(warm['locs']) nLN = len(cold['locs']) if nEN and nLN: if warm['locs'][-1] > cold['locs'][-1]: # warm event the last nEN -= 1 else: nLN -= 1 elif nEN: nEN -= 1 elif nLN: nLN -= 1 transition = {} if nEN: transition['warm_cold'] = float(len(ENLNind))/nEN transition['warm_warm'] = float(len(ENENind))/nEN if nLN: transition['cold_warm'] = float(len(LNENind))/nLN transition['cold_cold'] = float(len(LNLNind))/nLN return transition, dict(ENLN=ENLNind, ENEN=ENENind, LNEN=LNENind, LNLN=LNLNind),\ warm["locs"], cold["locs"]
[docs]def enso_transition(nino34, percentile, wait_window, thres_std_fraction, per=5, window=[-3, 3]): ''' Compute the number of transitions However, it does not eliminate double counting due to reintensification. Therefore the results are different from those of :py:func:`enso_transition2` Args: nino34 (numpy 1d array): ENSO SST anomaly index percentile (numeric): 0. < percentile < 100. wait_window (int): time lapse between an event termination and the next event peak thres_std_fraction (numeric): the fraction times the nino34 standard deviation is used for defining the termination, similar to :py:func:`enso_duration` per (int): see :py:func:`find_events` windows (iterable): see :py:func:`find_events` Returns: transition (dict) = {"warm_cold": number of warm-to-cold transition, "warm_warm": number of warm-to-warm transition, "cold_warm":..., "cold_cold":...,} ''' warm, cold = find_enso_percentile(nino34, percentile, per, window) warm_end = nino34.std()*thres_std_fraction cold_end = warm_end*-1 durations = {'warm': compute_duration(nino34, numpy.less, warm['locs'], warm_end, False), 'cold': compute_duration(nino34, numpy.greater, cold['locs'], cold_end, False),} events = ['warm',]*len(warm['locs'])+\ ['cold',]*len(cold['locs']) events, locs, termT = zip(*sorted( zip(events, warm['locs']+cold['locs'], durations['warm']+durations['cold']), key=lambda v: v[1])) transition = {} for iloc in xrange(len(events)-1): evt_pair = '_'.join(events[iloc:iloc+2]) if (locs[iloc+1] - locs[iloc] - termT[iloc]) <= wait_window: transition[evt_pair] = transition.setdefault(evt_pair, 0) + 1 # warm,cold = find_enso_percentile(nino34,percentile,per,window) # events = ['warm',]*len(warm['locs'])+['cold',]*len(cold['locs']) # events,locs = zip(*sorted( # zip(events,warm['locs']+cold['locs']),key=lambda v:v[1])) # transition = {} # for iloc in xrange(len(events)-1): # evt_pair = '_'.join(events[iloc:iloc+2]) # if locs[iloc+1]-locs[iloc] <= wait_window: # transition[evt_pair] = transition.setdefault(evt_pair,0) + 1 transition['warm'] = len(warm['locs']) transition['cold'] = len(cold['locs']) transition['warm_locs'] = warm['locs'] transition['cold_locs'] = warm['locs'] return transition
[docs]def enso_transition_prob(*args, **kwargs): ''' Compute the transition probability from the results given by :py:func:`enso_transition` All arguments and keyword arguments are fed to :py:func:`enso_transition`''' tra = enso_transition(*args, **kwargs) nEN, nLN = tra['warm'], tra['cold'] Prob = {} if tra['warm_locs'] and tra['cold_locs']: if tra['warm_locs'][-1] > tra['cold_locs'][-1]: # warm comes last nEN -= 1 else: # cold comes last nLN -= 1 elif tra['warm_locs']: # warm comes last nEN -= 1 elif tra['cold_locs']: # cold comes last nLN -= 1 for phase1, phase2 in itertools.product(['warm', 'cold'], ['warm', 'cold']): tra_phase = phase1+"_"+phase2 if phase1 == 'warm': n = nEN else: n = nLN Prob[tra_phase] = float(tra.get(tra_phase, 0.))/n return Prob
[docs]def enso_skewness(nino34): ''' Shortcut for calculating skewness using scipy.stats. Choose scipy.stats.mstats.skew if nino34 is a masked array (as it often is) Args: nino34 (numpy 1d array): ENSO SST anomaly index Returns: scalar ''' if _GEODAT_INSTALLED: return geodat.stat.skewness(nino34) else: if isinstance(nino34, numpy.ma.coreMaskedArray): return scipy.stats.mstats.skew(nino34) else: return scipy.stats.skew(nino34) ## This is assuming geodat.nc.Variable inputs ## The current version of geodat.nc.Variable can achieve the following easily # def threshold2composite(field,nino34,warm_thres,cold_thres): # '''Threshold only ''' # # Extract the times when the thresholds are passed # data = field[numpy.logical_or(nino34.data > warm_thres, # nino34.data < cold_thres)] # # Get the time stamps # time = field.getTime()[numpy.logical_or(nino34.data > warm_thres, # nino34.data < cold_thres)] # itime = data.getCAxes().index('T') # # Update time axis # data.dims[itime].data = time # return data
[docs]def seasonal_locking(pklocs, months): ''' Given the indices of the peak and a list of months, return the count of events in a particular month (Jan-Dec). Args: pklocs (list of int): indices of event peaks with reference to some time axis months (list of int): calendar months of that time axis Returns: list of int = [number of events peaked in Jan, number of events peaked in Feb,...] ''' event_months = months[pklocs] count = numpy.zeros(12) for month in event_months: count[month-1] += 1 return count
[docs]def seasonal_locking_from_nino34(nino34, months, find_events_func=None, count_warm=True, count_cold=True): ''' Shortcut for applying :py:func:`seasonal_locking` manually for warm and cold events. Args: nino34 (numpy 1d array): ENSO SST monthly anomaly months (list of int or numpy 1d array of int): calendar months for the time axis of nino34 find_events_func (function): a function that accepts nino34 as the only arguments for finding warm and cold events. If None, default is find_enso_threshold(nino34, 0.8, -0.8) count_warm (bool): whether warm events are counted count_cold (bool): whether cold events are counted Returns: list of int = [number of events peaked in Jan, number of events peaked in Feb,...] ''' if find_events_func is None: find_events_func = lambda index: find_enso_threshold(index, 0.8, -0.8) assert nino34.shape[0] == months.shape[0] warm, cold = find_events_func(nino34) total_count = 0 if count_warm: total_count += seasonal_locking(warm['locs'], months) if count_cold: total_count += seasonal_locking(cold['locs'], months) return total_count
[docs]def __keep_old_function_name__(f): ''' For preserving old function names used in previous projects ''' def new_func(*args, **kwargs): return f(*args, **kwargs) new_func.__doc__ = "Same as "+f.__name__ return new_func # Functions are renamed in order to compile with coding style convention # For compatibility with previous projects, old names are kept here
findENSO_percentile = __keep_old_function_name__(find_enso_percentile) findENSO_threshold = __keep_old_function_name__(find_enso_threshold) Persistence_check = __keep_old_function_name__(persistence_check) findEvents = __keep_old_function_name__(find_events) find_EN_pattern = __keep_old_function_name__(find_en_pattern) ENSO_duration = __keep_old_function_name__(enso_duration) ENSO_transition2 = __keep_old_function_name__(enso_transition2) ENSO_transition = __keep_old_function_name__(enso_transition) ENSO_transition_prob = __keep_old_function_name__(enso_transition_prob) ENSO_skewness = __keep_old_function_name__(enso_skewness) Seasonal_Locking = __keep_old_function_name__(seasonal_locking) Seasonal_Locking_from_nino34 = __keep_old_function_name__( seasonal_locking_from_nino34)
Fork me on GitHub