-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathAERONET_augmentation.py
180 lines (164 loc) · 8.79 KB
/
AERONET_augmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Example MERRA-2 profile used by AERONET retrieval:
discover:/gpfsm/dnb32/dgiles/AERONET/SAMPLER/DATA/Y2019/merra2_aeronet_aop_ext500nm.20190101_00-20190131_21.nc4
AERONET summary files:
Discover:/gpfsm/dnb32/okemppin/purecases-nc/
-rwxr-xr-x 1 okemppin s1043 16392 Feb 17 19:02 puredata-combi-Level2-alm-SS-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 16392 Feb 17 19:02 puredata-combi-Level2-hyb-SS-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 992584 Feb 17 19:02 puredata-combi-Level2-alm-SU-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 189768 Feb 17 19:02 puredata-combi-Level2-hyb-SU-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 648520 Feb 17 19:02 puredata-combi-Level2-alm-OC-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 304456 Feb 17 19:02 puredata-combi-Level2-hyb-OC-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 2827592 Feb 17 19:02 puredata-combi-Level2-alm-DU-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 189768 Feb 17 19:02 puredata-combi-Level2-hyb-DU-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 16392 Feb 17 19:02 puredata-combi-Level2-alm-BC-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 16392 Feb 17 19:02 puredata-combi-Level2-hyb-BC-0.70pureness-clean2.nc4
-rwxr-xr-x 1 okemppin s1043 196244 Feb 17 17:52 puredata-combi-Level1_5-hyb-SS-0.70pureness-clean2.nc4
...
Lookup tables with RI shape:
DISCOVER:/gpfsm/dnb32/okemppin/LUT-GSFun/
"""
import os
import sys
import re
from netCDF4 import Dataset
import numpy as np
RtrvSimParentDir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) # we assume GSFC-GRASP-Python-Interface is in parent of GSFC-Retrieval-Simulators
sys.path.append(os.path.join(RtrvSimParentDir, "GSFC-GRASP-Python-Interface"))
from miscFunctions import checkDiscover
from MADCAP_functions import loadVARSnetCDF
typeKeys = ['DU', 'SU', 'BC', 'OC', 'SS']
almTag = 'alm'
hybTag = 'hyb'
primeTag = 'Level2'
secondTag = 'Level1_5'
thrshLen = 10 # if primeTag has fewer than this many cases we use secondTag instead
wvlsOut = [340, 355, 360, 380, 410, 532, 550, 670, 870, 910, 1064, 1230, 1550, 1650, 1880, 2130, 2250] # nm
rhTrgt = 0.75 # target RH for LUT (fraction)
if checkDiscover(): # DISCOVER
baseDirCases = '/gpfsm/dnb32/okemppin/purecases-nc/'
baseDirLUT = '/gpfsm/dnb32/okemppin/LUT-GSFun/'
else: # probably Reed's MacBook Air
baseDirCases = '/Users/wrespino/Downloads/'
baseDirLUT = '/Users/wrespino/Synced/Remote_Sensing_Projects/MADCAP_CAPER/newOpticsTables/LUT-DUST/' # only has dust LUT
baseFN = 'puredata-combi-%s-%s-%s-0.70pureness-clean2.nc4'
basePathAERO = os.path.join(baseDirCases, baseFN)
basePathLUT = {
'DU':os.path.join(baseDirLUT, 'optics_DU.v15_6.nc'),
'SU':os.path.join(baseDirLUT, 'optics_SU.v5_7.GSFun.nc'),
'BC':os.path.join(baseDirLUT, 'optics_BC.v5_7.GSFun.nc'),
'OC':os.path.join(baseDirLUT, 'optics_OC.v12_7.GSFun.nc'),
'SS':os.path.join(baseDirLUT, 'optics_SS.v3_7.GSFun.nc')}
def main():
k = 0
curTag = primeTag
while k < len(typeKeys): # loop over species (also, we make second pass if lev2 is not at thrshLen)
rsltALM = parseFile(basePathAERO % (curTag, almTag, typeKeys[k]), typeKeys[k]) # read almacanter
rsltHYB = parseFile(basePathAERO % (curTag, hybTag, typeKeys[k]), typeKeys[k]) # read hybrid
rslt = dict()
for key in rsltALM.keys(): rslt[key] = np.r_[rsltALM[key], rsltHYB[key]]
if len(rslt['day']) < thrshLen and curTag==primeTag: # lev2.0 had too few cases: loop again, loading 1.5
curTag = secondTag
elif len(rslt['day']) >= thrshLen: # we will work with and then write this data
outFilePath = (basePathAERO[:-4]+'_augmented.nc4') % (curTag, 'allScans', typeKeys[k])
outFilePath = re.sub("-clean[0-9]", "", outFilePath)
if os.path.exists(outFilePath): os.remove(outFilePath) # overwrite any existing file & start from scratch (netCDF alone can't do this)
with Dataset(outFilePath, 'w', format='NETCDF4') as root_grp:
writeNewData(rslt, root_grp, typeKeys[k])
curTag = primeTag
k = k+1
else:
assert False, 'No cases were found with tags %s or %s' % (primeTag, secondTag)
def parseFile(filePath, typeKey):
rslt = loadVARSnetCDF(filePath)
rslt['rri'] = findRefInd(rslt, typeKey, 'Refractive_Index_Real', 'refreal')
rslt['iri'] = findRefInd(rslt, typeKey, 'Refractive_Index_Imag', 'refimag')
rslt['ssa'] = findRefInd(rslt, typeKey, 'Single_Scattering_Albedo', 'ssa')
return rslt
def findRefInd(rslt, typeKey, aeroNC4name, lutNC4name):
""" typeKey links LUT file, aeroNC4name+_XXX is aeronet dict key, lutNC4name is LUT key
Note this function is likely to occasionally return RI outside the bounds of GRASP's LUT """
LUTvars = ['bsca','bext'] if lutNC4name=='ssa' else [lutNC4name]
LUT = loadVARSnetCDF(basePathLUT[typeKey], ['lambda', 'rh'] + LUTvars)
rhInd = np.argmin(np.abs(LUT['rh'] - rhTrgt))
if lutNC4name=='ssa': LUT['ssa'] = LUT['bsca']/LUT['bext'] # TODO: Why does this differ from qsca/qext? See bottom of
if lutNC4name=='refimage': LUT[lutNC4name] = -lutNC4name # LUT convention is negative imaginary part, AERONET is positive
LUTri = LUT[lutNC4name][:,rhInd,:].mean(axis=0) # we average over all size modes
LUTλ = LUT['lambda']*1e9 # m -> nm
λaeroStr = [y for y in rslt.keys() if aeroNC4name in y]
λaero = np.sort([int(re.search(r'\d+$', y).group()) for y in λaeroStr]) # nm
RIaero = np.array([rslt['%s_%d' % (aeroNC4name,λ)] for λ in λaero]).T # RI[t,λ]
RI = np.zeros([len(RIaero), len(wvlsOut)])
for t,ri in enumerate(RIaero): # loop over all aeronet retrievals
LUTlowScl = ri[0]/np.interp(λaero[0], LUTλ, LUTri)
LUTupScl = ri[-1]/np.interp(λaero[-1], LUTλ, LUTri)
lowI = LUTλ < λaero[0]
upI = LUTλ > λaero[-1]
LUTlow = LUTri[lowI]*LUTlowScl
LUTup = LUTri[upI]*LUTupScl
RIfull = np.r_[LUTlow, ri, LUTup]
λfull = np.r_[LUTλ[lowI], λaero, LUTλ[upI]]
RI[t,:] = np.interp(wvlsOut, λfull, RIfull)
return RI
def writeNewData(rslt, root_grp, typeKey):
nc4Vars = dict()
root_grp.description = 'Collection of AERONET/MERRA2 %s state parameters' % typeKey
root_grp.contact = "Reed Espinosa <[email protected]>"
root_grp.source = "AERONET Version 3 retrieval products; MERRA-2"
# dimensions
λVar = 'wavelenth'
root_grp.createDimension(λVar, len(wvlsOut))
nc4Vars[λVar] = root_grp.createVariable(λVar, 'f4', (λVar))
nc4Vars[λVar][:] = np.array(wvlsOut)/1e3
nc4Vars[λVar].units = 'μm'
nc4Vars[λVar].long_name = 'wavelenth for which parameter specified'
indVar = 'index'
root_grp.createDimension(indVar, len(rslt['day']))
# create and write pass through variables
forbdnStrngs = ['Refractive_Index','Single_Scattering'] # if a key has this string anywhere it will be thrown out
keepKeys = [s for s in rslt.keys() if np.all([s1 not in s for s1 in forbdnStrngs])]
for key in keepKeys:
outKey = key.replace('Extinction_Angstrom_Exponent_440_870_Total', 'AE_T') # default variable name is too long
if rslt[key].ndim==2: # spectrally dependent variable
nc4Vars[outKey] = root_grp.createVariable(outKey, 'f4', (indVar, λVar))
nc4Vars[outKey][:,:] = rslt[key]
else: # spectrally invariant variable
nc4Vars[outKey] = root_grp.createVariable(outKey, 'f4', (indVar))
nc4Vars[outKey][:] = rslt[key]
nc4Vars[outKey].units = getUnits(outKey)
nc4Vars[outKey].long_name = getFullName(outKey)
def getFullName(key):
modeStr = {'F':'fine mode', 'C':'coarse mode', 'T':'total'}
riStr = {'r':'Real', 'i':'Imaginary'}
if 'VolC' in key:
return '%s volume concentration' % modeStr[key[-1]]
if 'VMR' in key:
return '%s volume median radius' % modeStr[key[-1]]
if 'Std' in key:
return '%s standard deviation' % modeStr[key[-1]]
if key=='hour':
return 'UTC '+key
if key in ['day', 'month']:
return key+' of year'
if key=='pureness':
return 'MERRA2 derived pureness of target species'
if key=='AERONET_Site_id':
return 'Site ID of AERONET station where data was retrieved'
if key=='AE_T':
return 'Angstrom Exponent derived from 440 and 870nm total AOD'
if key in ['rri','iri']:
return '%s Refractive Index' % riStr[key[0]]
if key=='ssa':
return 'Single Scattering Albedo'
return key
def getUnits(key):
if 'VolC' in key:
return 'μm^3/μm^2'
if 'VMR' in key:
return 'μm'
if 'pureness' in key:
return 'mass fraction in range 0-1'
return 'none'
if __name__ == '__main__': main()