-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathMADCAP_functions.py
201 lines (186 loc) · 9.83 KB
/
MADCAP_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
import scipy.stats as st
from datetime import datetime as dt
from datetime import timedelta
import warnings
from netCDF4 import Dataset
from matplotlib import pyplot as plt
import hashlib
import fnmatch
import pickle
from glob import glob
import os
def loadVARSnetCDF(filePath, varNames=None, verbose=False, keepTimeInd=None):
badDataCuttoff = 1e12 # float values larger than this will be replaced with NaNs
nonTimeVars = ['x','y','rayleigh_depol_ratio','ocean_refractive_index','lev','radius','dradius','rlow','rup'] # variables we skip when subselecting for keepTimeInd
if varNames: assert isinstance(varNames, (list, np.ndarray)), 'varNames must be a list or numpy array!'
measData = dict()
with Dataset(filePath, mode='r') as netCDFobj:
if varNames is None: varNames = netCDFobj.variables.keys()
if 'time' in netCDFobj.dimensions and keepTimeInd is not None: # this seems to happen a lot and previously the error was very cryptic
assert max(keepTimeInd)<netCDFobj.dimensions['time'].size, 'At least one keepTimeInd exceeded len(time)=%d of %s' % (netCDFobj.dimensions['time'].size, filePath)
for varName in varNames:
if varName in netCDFobj.variables.keys():
with warnings.catch_warnings():
warnings.simplefilter('ignore', category=UserWarning) # ignore missing_value not cast warning
if keepTimeInd is not None and varName not in nonTimeVars:
measData[varName] = np.array(netCDFobj.variables[varName][keepTimeInd])
else:
measData[varName] = np.array(netCDFobj.variables[varName])
if np.issubdtype(measData[varName].dtype, np.integer) and np.any(measData[varName] > badDataCuttoff):
if verbose:
msg = '%s had type INT with value(s) above badDataCuttoff (%g), converting to FLOAT for NaN compatibility.'
warnings.warn(msg % (varName, badDataCuttoff))
measData[varName] = measData[varName].astype(np.float64) # numpy ints can't be NaN
if 'float' in measData[varName].dtype.name:
with np.errstate(invalid='ignore'): # comparison against preexisting NaNs will produce a runtime warning
measData[varName][measData[varName] > badDataCuttoff] = np.nan
elif verbose:
print("\x1b[1;35m Could not find \x1b[1;31m%s\x1b[1;35m variable in netCDF file: %s\x1b[0m" % (varName,filePath))
return measData
def downsample1d(X, Y, Xnew, axis=0):
"""
Returns an array with the mean of Y over the interval Xnew-ΔXnew to Xnew+ΔXnew.
This function can actually be used to down- and up-sample (i.e. interpolate) data
"""
from scipy import integrate, interpolate
X = np.atleast_2d(X)
Y = np.atleast_2d(Y)
if X.shape[0]==1 and axis==0: X = X.T
if Y.shape[0]==1 and axis==0: Y = Y.T
if np.all(np.array(Y.shape)>1) and np.any(np.array(X.shape)==1): # Y was 2D but X was only 1D
X = np.tile(X, [1, Y.shape[1]]) if axis==0 else np.tile(X, [Y.shape[0],1])
assert X.shape==Y.shape, 'X and Y could not be made to have the same shape.'
assert X.ndim<=2 and axis<2, 'This function can only handle X and Y arrays of 1D or 2D (not greater).'
assert np.all(X==np.sort(X, axis=axis)), 'The values of X must be in ascending order along axis.'
assert np.array(Xnew).ndim==1, 'Xnew must be a 1D array, even if X and Y are 2D.'
assert np.all(Xnew==np.sort(Xnew)), 'The values of Xnew must be in ascending order.'
# edge treatment 1: mean(Y(X[0] to X[0]+ΔXnew))
# XnewBnds = np.interp(np.r_[0.5:len(Xnew)-1], np.r_[0:len(Xnew)], Xnew)
# XnewBnds = np.r_[Xnew[0], XnewBnds, Xnew[-1]]
# edge treatment 2: {Y[0]+mean(Y(X[0] to X[0]+ΔXnew))}/2 – produces curves that _look_ more like original Y
f = interpolate.interp1d(np.r_[0:len(Xnew)], Xnew, fill_value="extrapolate")
XnewBnds = f(np.r_[-0.5:len(Xnew)])
Ynew = np.empty([len(Xnew), Y.shape[1]]) if axis==0 else np.empty([Y.shape[0], len(Xnew)])
XYinLoopOrder = list(zip(X.T,Y.T)) if axis==0 else list(zip(X,Y)) # zip obj. is a generator, which we can only use once; list is a reusable iterable
for i,(lb,ub) in enumerate(zip(XnewBnds[:-1], XnewBnds[1:])):
for j,(x,y) in enumerate(XYinLoopOrder): # loop over each series
bndsY = np.interp([lb, ub], x, y)
chnkInd = np.logical_and(x>lb, x<ub)
xSelect = x[chnkInd]
chnkX = np.empty(len(xSelect)+2) # 4x lines of code, but 30x faster than np.r_[lb, x[chnkInd], ub]
chnkX[0] = lb
chnkX[-1] = ub
chnkX[1:-1] = xSelect
chnkY = np.empty(len(chnkX))
chnkY[0] = bndsY[0]
chnkY[-1] = bndsY[1]
chnkY[1:-1] = y[chnkInd]
if axis==0:
Ynew[i,j] = np.trapz(chnkY, chnkX)/(ub-lb)
else:
Ynew[j,i] = np.trapz(chnkY, chnkX)/(ub-lb)
if Ynew.shape[1]==1: Ynew = Ynew[:,0] # user probably doesn't want 2D out w/ singletons, at least horizontal singletons
return Ynew
def hashFileSHA1(filePaths, quick=False):
"""
filePaths -> string or list of string with file path(s) to hash; no NUMPY arrays!
quick -> only read the first block (~65kB from files)
"""
BLOCKSIZE = 65536
if type(filePaths) is np.ndarray: filePaths = filePaths.tolist()
if not type(filePaths) is list: filePaths = [filePaths]
hasher = hashlib.sha1()
for fp in filePaths:
with open(fp, 'rb') as afile:
buf = afile.read(BLOCKSIZE)
satisfied = False
while not satisfied:
hasher.update(buf)
buf = afile.read(BLOCKSIZE)
satisfied = len(buf) == 0 or quick
return hasher.hexdigest()
def findNewestMatch(directory, pattern='*'):
nwstTime = 0
for file in os.listdir(directory):
filePath = os.path.join(directory, file)
if fnmatch.fnmatch(file, pattern) and os.path.getmtime(filePath) > nwstTime:
nwstTime = os.path.getmtime(filePath)
newestFN = filePath
if nwstTime > 0:
return newestFN
else:
return ''
def ordinal2datetime(ordinal):
dtObjDay = dt.fromordinal(np.int32(np.floor(ordinal)))
dtObjTime = timedelta(seconds=np.remainder(ordinal, 1)*86400)
dtObj = dtObjDay + dtObjTime
return dtObj
def KDEhist2D(x,y, axHnd=None, res=100, xrng=None, yrng=None, sclPow=1, cmap='BuGn', clbl='Probability Density (a.u.)'):
# set plot range
xmin = xrng[0] if xrng else x.min()
xmax = xrng[1] if xrng else x.max()
ymin = yrng[0] if yrng else y.min()
ymax = yrng[1] if yrng else y.max()
# Peform the kernel density estimate
xx, yy = np.mgrid[xmin:xmax:complex(0,res), ymin:ymax:complex(0,res)]
positions = np.vstack([xx.ravel(), yy.ravel()])
values = np.vstack([x, y])
kernel = st.gaussian_kde(values)
f = np.reshape(kernel(positions).T, xx.shape)
# Plot results
if not axHnd:
fig = plt.figure()
axHnd = fig.gca()
axHnd.set_xlim(xmin, xmax)
axHnd.set_ylim(ymin, ymax)
# Contourf plot
objHnd = axHnd.contourf(xx, yy, f**sclPow, 256, cmap=cmap)
clrHnd = plt.colorbar(objHnd, ax=axHnd)
# objHnd.set_clim(vmin=0)
tckVals = clrHnd.get_ticks()**(1/sclPow)/np.max(clrHnd.get_ticks()**(1/sclPow))
clrHnd.set_ticklabels(['%4.1f' % x for x in 100*tckVals])
clrHnd.set_label(clbl)
return axHnd
def readPetesAngleFiles(fileDirPath, nAng=10, verbose=False, saveData=True, loadData=True):
"""
Read in the randomly sampled angle text files generated by Pete.
fileDirPath - Full path directory from which glob will grab ALL text files (should be free of extraneous *.txt files)
nAng - The number of viewing angles per ground pixel
saveData - Save the results to the directory in fileDirPath in the form of a pickle
loadData - Load the results from a previous run, if they exist
"""
fileNames = np.sort(glob(os.path.join(fileDirPath, '*.txt')))
hashTag = hashFileSHA1(fileNames, quick=True)[0:16] # if files differ after first 65kB we will miss it
pklPath = os.path.join(fileDirPath, 'readPetesAngleFiles_ALLdata_'+hashTag+'.pkl')
if loadData:
try:
with open(pklPath, 'rb') as f:
angDict = pickle.load(f)
if verbose: print('Data loaded from %s' % pklPath)
return angDict
except EnvironmentError:
if verbose: print('Could not load valid pickle data from %s. Processing raw text files...' % pklPath)
angDict = {k: [] for k in ['lon','lat','datetime','vis','sza','fis','sca']}
for fn in fileNames:
if verbose: print('Processing %s...' % os.path.basename(fn))
peteData = np.genfromtxt(fn, delimiter=',', skip_header=1, dtype=None, encoding='UTF8')
for ind in range(0, len(peteData), nAng):
angDict['lon'].append(peteData[ind][0])
angDict['lat'].append(peteData[ind][1])
dtStr = peteData[ind][2] + '%02d' % np.floor(95/60) + '%02d' % (95 % 60)
angDict['datetime'].append(dt.strptime(dtStr, '%Y%m%d_%H00z%M%S'))
angDict['vis'].append([x[5] for x in peteData[ind:ind+nAng]])
angDict['sza'].append(np.mean([x[6] for x in peteData[ind:ind+nAng]]))
fis = np.array([x[8]-x[7] for x in peteData[ind:ind+nAng]])
fis[fis<0] = fis[fis<0] + 360
angDict['fis'].append(fis)
angDict['sca'].append([x[9] for x in peteData[ind:ind+nAng]])
for key in angDict.keys(): angDict[key] = np.array(angDict[key]) # convert everything to numpy
if saveData:
with open(pklPath, 'wb') as f:
pickle.dump(angDict, f, pickle.HIGHEST_PROTOCOL)
if verbose: print('Data saved to %s' % pklPath)
return angDict