Source code for geodat.stat

import numpy
import scipy.interpolate
from scipy.ndimage import filters
import scipy.stats.mstats
import logging

logger = logging.getLogger(__name__)
formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s")


[docs]def lat_weights(lats):
    """ Returns cos(lats)

    Required as a weighting for calculating areas on
    a regular cartesian lat-lon grid in which meridians
    converge.

    Arguments:
       lats (numpy ndarray): in degree

    Returns:
       numpy ndarray
    """
    wgts = numpy.cos(numpy.radians(lats))
    return wgts


[docs]def runave(data, N, axis=0, step=None):
    """ Compute a smooth box-car running average and masked
    edge values using scipy.ndimage.filters.convolve1d

    Args:
       data (numpy array)
       N (int) : width of the box car
       axis (int): axis along which the average is computed
                (See scipy.ndimage.filters.convolve1d)
       step (optional, int): skips between the box car samples

    Returns:
       numpy masked array (same shape as data)
    """
    if isinstance(data, numpy.ma.core.MaskedArray):
        data = data.filled(numpy.nan)
    if step is None:
        weights = numpy.ones((N,))
    else:
        if type(step) is not int:
            raise Exception("step should be an integer")
        weights = numpy.array(([1.]+[0.]*(step-1))*(N-1)+[1.])
    weights /= weights.sum()
    return numpy.ma.masked_invalid(filters.convolve1d(
        data, weights, mode='constant', cval=numpy.nan, axis=axis))


[docs]def skewness(data):
    """ Essentially the same as scipy.stats.skew or
        scipy.stats.mstats.skew depending on the input

    Input:
        data (numpy array or masked array)
    Returns:
        numpy array or masked array
    """
    if isinstance(data,numpy.ma.core.MaskedArray):
        npmod = numpy.ma
    else:
        npmod = numpy
    return npmod.power(data-npmod.mean(data), 3).mean()/\
        npmod.power(npmod.var(data), 3./2.)


[docs]def resample_xy(x, y, xnew, nx, ny, y_always_valid=True):
    """ Given paired values of (x,y), randomly sample a set of
    values for ynew given an array xnew such that the joint distribution
    of (xnew, ynew) resembles that of (x,y)

    Arguments:
        x (numpy 1d array)
        y (numpy 1d array): shape should match x
        xnew (numpy 1d array)
        nx (int) : the number of bins applied to x
        ny (int) : the number of bins applied to y
        y_always_valid (bool): keep drawingn from the cdf until y is valid

    Returns:
        ynew (numpy 1d array): length = len(xnew)
    """
    if y.shape != x.shape:
        raise ValueError("shape of y should match the shape of x")

    xedges = numpy.linspace(x.min(), x.max(), nx+1)
    xnew_ibin = numpy.digitize(xnew, xedges)
    y_cdf = {}
    y_mids = {}
    ynew = numpy.empty_like(xnew, dtype=y.dtype)
    for ix, ibin in enumerate(xnew_ibin):
        # If the xnew is out of bound, return numpy.nan for that entry
        if xnew[ix] >= xedges[-1] or xnew[ix] < xedges[0]:
            logger.warn("Out of bound for x={:.2e}. "+\
                        "Return numpy.nan.".format(float(xnew[ix])))
            ynew[ix] = numpy.nan
            continue

        # Don't compute the cdf twice for the same bin
        # Compute it once and save it
        if ibin not in y_cdf:
            ys = y[(x > xedges[ibin-1]) & (x <= xedges[ibin])]
            y_cdf[ibin], y_mids[ibin] = cdf(ys,bins=ny)

        # Keep drawing a new y until the drawn value is valid
        while True:
            rand_num = numpy.random.uniform()
            try:
                ynew[ix] = scipy.interpolate.interp1d(
                    y_cdf[ibin], y_mids[ibin])(rand_num)
                break
            except ValueError:
                if y_always_valid:
                    continue
                else:
                    ynew[ix] = numpy.nan
                    break
    return ynew


[docs]def cdf(data, **kwargs):
    """ Compute a histogram and the corresponding cumulative
    frequency polygon.

    Args:
        data (numpy 1d array)
        option (str): currently accepts "hist" only

    Keyword arguments currently are passed to numpy.histogram

    Returns:
        bins, cdf : mid values of bins and the cumulative frequencies
    """
    # Use histogram for cdf
    h, x = numpy.histogram(data,**kwargs)
    x_mid = 0.5*(x[1:]+x[:-1])
    h_cum = numpy.cumsum(h)
    return h_cum/float(h_cum[-1]), x_mid
Source code for geodat.stat

Geophysical Data Analysis Tool

Navigation