Source code for geodat.stat
import numpy
import scipy.interpolate
from scipy.ndimage import filters
import scipy.stats.mstats
import logging
logger = logging.getLogger(__name__)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s")
[docs]def lat_weights(lats):
""" Returns cos(lats)
Required as a weighting for calculating areas on
a regular cartesian lat-lon grid in which meridians
converge.
Arguments:
lats (numpy ndarray): in degree
Returns:
numpy ndarray
"""
wgts = numpy.cos(numpy.radians(lats))
return wgts
[docs]def runave(data, N, axis=0, step=None):
""" Compute a smooth box-car running average and masked
edge values using scipy.ndimage.filters.convolve1d
Args:
data (numpy array)
N (int) : width of the box car
axis (int): axis along which the average is computed
(See scipy.ndimage.filters.convolve1d)
step (optional, int): skips between the box car samples
Returns:
numpy masked array (same shape as data)
"""
if isinstance(data, numpy.ma.core.MaskedArray):
data = data.filled(numpy.nan)
if step is None:
weights = numpy.ones((N,))
else:
if type(step) is not int:
raise Exception("step should be an integer")
weights = numpy.array(([1.]+[0.]*(step-1))*(N-1)+[1.])
weights /= weights.sum()
return numpy.ma.masked_invalid(filters.convolve1d(
data, weights, mode='constant', cval=numpy.nan, axis=axis))
[docs]def skewness(data):
""" Essentially the same as scipy.stats.skew or
scipy.stats.mstats.skew depending on the input
Input:
data (numpy array or masked array)
Returns:
numpy array or masked array
"""
if isinstance(data,numpy.ma.core.MaskedArray):
npmod = numpy.ma
else:
npmod = numpy
return npmod.power(data-npmod.mean(data), 3).mean()/\
npmod.power(npmod.var(data), 3./2.)
[docs]def resample_xy(x, y, xnew, nx, ny, y_always_valid=True):
""" Given paired values of (x,y), randomly sample a set of
values for ynew given an array xnew such that the joint distribution
of (xnew, ynew) resembles that of (x,y)
Arguments:
x (numpy 1d array)
y (numpy 1d array): shape should match x
xnew (numpy 1d array)
nx (int) : the number of bins applied to x
ny (int) : the number of bins applied to y
y_always_valid (bool): keep drawingn from the cdf until y is valid
Returns:
ynew (numpy 1d array): length = len(xnew)
"""
if y.shape != x.shape:
raise ValueError("shape of y should match the shape of x")
xedges = numpy.linspace(x.min(), x.max(), nx+1)
xnew_ibin = numpy.digitize(xnew, xedges)
y_cdf = {}
y_mids = {}
ynew = numpy.empty_like(xnew, dtype=y.dtype)
for ix, ibin in enumerate(xnew_ibin):
# If the xnew is out of bound, return numpy.nan for that entry
if xnew[ix] >= xedges[-1] or xnew[ix] < xedges[0]:
logger.warn("Out of bound for x={:.2e}. "+\
"Return numpy.nan.".format(float(xnew[ix])))
ynew[ix] = numpy.nan
continue
# Don't compute the cdf twice for the same bin
# Compute it once and save it
if ibin not in y_cdf:
ys = y[(x > xedges[ibin-1]) & (x <= xedges[ibin])]
y_cdf[ibin], y_mids[ibin] = cdf(ys,bins=ny)
# Keep drawing a new y until the drawn value is valid
while True:
rand_num = numpy.random.uniform()
try:
ynew[ix] = scipy.interpolate.interp1d(
y_cdf[ibin], y_mids[ibin])(rand_num)
break
except ValueError:
if y_always_valid:
continue
else:
ynew[ix] = numpy.nan
break
return ynew
[docs]def cdf(data, **kwargs):
""" Compute a histogram and the corresponding cumulative
frequency polygon.
Args:
data (numpy 1d array)
option (str): currently accepts "hist" only
Keyword arguments currently are passed to numpy.histogram
Returns:
bins, cdf : mid values of bins and the cumulative frequencies
"""
# Use histogram for cdf
h, x = numpy.histogram(data,**kwargs)
x_mid = 0.5*(x[1:]+x[:-1])
h_cum = numpy.cumsum(h)
return h_cum/float(h_cum[-1]), x_mid