PageRenderTime 78ms CodeModel.GetById 35ms RepoModel.GetById 1ms app.codeStats 0ms

/etm_interface.py

https://bitbucket.org/yosoyjay/etm-data
Python | 345 lines | 240 code | 22 blank | 83 comment | 35 complexity | 1d3678e677ff0ea8027465ada5b3c234 MD5 | raw file
  1. #!/usr/bin/python
  2. """ Collection of functions and classes to deal with CMWP data
  3. """
  4. #-------------------------------------------------------------------------------
  5. #Imports
  6. #-------------------------------------------------------------------------------
  7. import os
  8. import netCDF4
  9. import scipy as sp
  10. import numpy as np
  11. import pandas as pd
  12. from pandas import Panel, DataFrame, Series
  13. import seawater.csiro as sw
  14. from etm_data.lisst import *
  15. import etm_data.etm_time as t
  16. #--------------------------------------------------------------------------------
  17. #Constants
  18. #--------------------------------------------------------------------------------
  19. # DataFrame must have equal sized arrays, so skip x/y when loading data
  20. ship_keys = ['x', 'y', 'time']
  21. sub_sample = ['VG1', 'VG2', 'ACC1', 'ACC2', 'uT', 'uC', 'OXY']
  22. #--------------------------------------------------------------------------------
  23. #Functions
  24. #--------------------------------------------------------------------------------
  25. def formatLisstDataForDataFrame(netCDF_data):
  26. """Formats processed LISST-100x data from a netCDF file for loading into
  27. a pandas DataFrame.
  28. Ughhh... this is ugly.
  29. Parameters
  30. ----------
  31. netCDF_data - netCDF4.Dataset
  32. Returns
  33. -------
  34. (vol, diams) - Tuple of DataFrame of volume and diameters of lisst data
  35. """
  36. volume_data = {}
  37. for i in range(32):
  38. volume_data[i] = netCDF_data.variables['vol'][:,i]
  39. time = netCDF_data.variables['time'][:]
  40. # 1000000 is arbitrary, but effective for this.
  41. if isinstance(time[0], np.float64) and time[0] > 1000000:
  42. time = pd.DatetimeIndex(t.epochToDatetime(time))
  43. else:
  44. time = pd.DatetimeIndex(t.datenumToDatetime(time)).tz_localize('UTC')
  45. vol = DataFrame(volume_data, index=time)
  46. diam_data = {}
  47. for i in range(14):
  48. diam_data[i] = netCDF_data.variables['diams'][:,i]
  49. diams = DataFrame(diam_data, index=time)
  50. return (vol, diams)
  51. def formatDataForDataFrame(netCDF_data):
  52. """Formats data from a netCDF file for loading into a pandas DataFrame
  53. Parameters
  54. ----------
  55. netCDF_data - netCDF4.Dataset
  56. Returns
  57. -------
  58. df - DataFrame object with instrument data
  59. """
  60. keys = netCDF_data.variables.keys()
  61. data = {}
  62. for k in keys:
  63. if k in ship_keys: continue
  64. data[k] = netCDF_data.variables[k][:]
  65. time = netCDF_data.variables['time'][:]
  66. if isinstance(time[0], np.float64) and time[0] > 1000000:
  67. time = pd.DatetimeIndex(t.epochToDatetime(time))
  68. else:
  69. time = pd.DatetimeIndex(t.datenumToDatetime(time)).tz_localize('UTC')
  70. df = DataFrame(data, index=time)
  71. return df
  72. #TODO: Format like LISST and junk this approach
  73. def formatADCPForDataFrame(adcpNetCDFData):
  74. """ Formats ADCP data from wh300.nc for loading into a pandas Panel
  75. It seems you can't place DataFrames with different dimensions into a single
  76. Panel, so this returns a dictionary df with a:
  77. - DataFrame of 1d data
  78. - Panel of 2d data
  79. Parameters
  80. ----------
  81. adcdpNetCDFData - netCDF object with ADCP data
  82. returns
  83. -------
  84. df - Dict of 1d in DataFrame and 2d data in Panel
  85. """
  86. keys = adcpNetCDFData.variables.keys()
  87. # 'trajectory' is empty so I'm throwing it out
  88. if keys.count('trajectory') > 0:
  89. keys.remove('trajectory')
  90. dim1 = {}
  91. dim2 = {}
  92. for k in keys:
  93. if len(adcpNetCDFData.variables[k].shape) == 2:
  94. dim2[k] = adcpNetCDFData.variables[k][:]
  95. else:
  96. dim1[k] = adcpNetCDFData.variables[k][:]
  97. df = {}
  98. df['1d'] = DataFrame(dim1)
  99. df['2d'] = Panel(dim2)
  100. return df
  101. def loadNetCDF(file, mode='r') :
  102. """Loads netCDF file of data and returns DataFrame of all vars
  103. Parameters
  104. ----------
  105. file - File to netCDF file of data
  106. mode - Read!
  107. Returns
  108. -------
  109. df - DataFrame of instrument data
  110. """
  111. data = netCDF4.Dataset(file, mode)
  112. if 'LISST' in file:
  113. print 'Loading LISST data'
  114. if len(data.variables.keys()) == 4:
  115. dfData = formatLisstDataForDataFrame(data)
  116. else:
  117. dfData = formatDataForDataFrame(data)
  118. else:
  119. dfData = formatDataForDataFrame(data)
  120. data.close()
  121. return dfData
  122. def processData(path, dirBase='oc1204B.0.F.', optInsts=None):
  123. """ Processes stepA of all data (except LISST) for all days
  124. """
  125. days = np.arange(1,8)
  126. if optInsts != None:
  127. insts = opt_insts
  128. else:
  129. insts = procInst
  130. for inst in insts:
  131. if not os.path.exists(path+dirBase+inst_name[inst]):
  132. print 'Making dir '+path+dirBase+inst_name[inst]
  133. os.mkdir(path+dirBase+inst_name[inst])
  134. for d in days:
  135. print 'Processing day %s' % d
  136. procStepA(d, inPath=path)
  137. def procStepA(day, inPath='./', outPath='./', type='Normal', ship='oc1204B'):
  138. """ Begins processing of Spring ETM cruise data.
  139. Step A:
  140. 1. Subsample to 2s intervals to make size a bit more managable
  141. 2. Add lat, long, and depth
  142. 3. Saves changes to oc1204B.0.F.INSTRUMENT/dayDAY.nc
  143. Parameters
  144. ----------
  145. day - Int of day of cruise to process
  146. inPath - String of path to input files (where cmwp_day?_*?.nc exist
  147. outPath - String of path to place output files
  148. """
  149. if type == 'Normal':
  150. print 'Processing normal data'
  151. f = os.path.join(inPath, 'cmwp_day%d_CTD.nc' %day)
  152. ctd = loadNetCDF(f).resample('2s')
  153. #oowTimes = calcOutOfWater(ctd)
  154. for inst in procInst:
  155. if inst == 'LISST':
  156. continue
  157. _open_and_save_file(day, inst, ctd, inPath, outPath, ship)
  158. elif type == 'LISST':
  159. print 'Processing LISST data'
  160. f = os.path.join(inPath, 'cmwp_day%d_CTD.nc' %day)
  161. ctd = loadNetCDF(f).resample('2s')
  162. _open_and_save_file(day, 'LISST_PROC', ctd, inPath, outPath, ship)
  163. # Used to clear out memory after closing scope.
  164. def _open_and_save_file(day, inst, ctd, inPath, outPath, ship):
  165. # Open old file
  166. f = 'cmwp_day%d_%s.nc' % (day, inst)
  167. f = os.path.join(inPath, f)
  168. print 'Opening %s' % f
  169. data = loadNetCDF(f, mode='r')
  170. if inst == 'LISST_PROC':
  171. d1 = data[0].resample('2s').reindex(ctd.index)
  172. d2 = data[1].resample('2s').reindex(ctd.index)
  173. d1 = addDepth(d1, ctd)
  174. data = (d1, d2)
  175. ship = ship+'.0.F.'+'LISST'
  176. type = 'LISST'
  177. else:
  178. if inst in sub_sample:
  179. data = data[::100].resample('2s').reindex(ctd.index)
  180. else:
  181. data = data.resample('2s').reindex(ctd.index)
  182. data = addDepth(data, ctd)
  183. ship = ship+'.0.F.'+inst_name[inst]
  184. type = 'Normal'
  185. # Save new netCDF file
  186. fDir = os.path.join(outPath, ship)
  187. dayFile = 'day%d.nc' % day
  188. f = os.path.join(fDir, dayFile)
  189. saveNetCDF(f, data, cruise='fall_1', type=type)
  190. def saveNetCDF(filePath, df, cruise, type='Normal', cache=True):
  191. """ Saves data from an instrument (DataFrame) into a netCDF4 file.
  192. Also adds:
  193. lat, lon, and global description (offering)
  194. Parameters
  195. ----------
  196. filePath - String of path for file (Assumes inclusion of dir
  197. structure for description)
  198. df - DateFrame object of data to save to file
  199. cruise - 'fall_1', 'fall_2', or 'spring' - Used to get units
  200. type - String like origin of data
  201. - 'Normal' not from ADCP or LISST
  202. - 'ADCP'
  203. - 'LISST' - Raw lisst data
  204. - 'LISST2' - LISST data that has already been dumpted into netcdf files and pandas
  205. cache -
  206. True: Adheres to netCDF like cache path structure
  207. e.g. oceanus1204c.0.F.LISST/day8.nc
  208. """
  209. if (type != 'Normal') and (type != 'ADCP') and (type != 'LISST') and (type != 'LISST2'):
  210. print 'Data type not supported '+type
  211. raise
  212. if cruise == 'fall_1':
  213. from etm_data.fall_1_consts import *
  214. elif cruise == 'fall_2':
  215. from etm_data.fall_2_consts import *
  216. elif cruise == 'spring':
  217. from etm_data.spring_conts import *
  218. print 'Creating netCDF file '+filePath
  219. f = netCDF4.Dataset(filePath, 'w', format='NETCDF4', zlib=True)
  220. if cache:
  221. f.description=filePath[:-8]
  222. else:
  223. f.description=filePath
  224. if type == 'Normal' or type == 'LISST2':
  225. f.createDimension('time', df.shape[0])
  226. times = t.datetime64ToEpoch(df.index.values)
  227. else:
  228. f.createDimension('time', df[0].shape[0])
  229. times = t.datetime64ToEpoch(df[0].index.values)
  230. f.createDimension('x', 1)
  231. f.createDimension('y', 1)
  232. tmp = f.createVariable('time', 'float64', ('time',))
  233. tmp[:] = times
  234. tmp.units = units['time']
  235. tmp = f.createVariable('x', 'float', ('x',))
  236. tmp[:] = lon
  237. tmp.units = 'latlon'
  238. tmp = f.createVariable('y', 'float', ('y',))
  239. tmp[:] = lat
  240. tmp.units = 'latlon'
  241. # Treat Data Origin
  242. if type == 'Normal':
  243. for i,v in enumerate(df.keys()):
  244. print 'Variable '+rev_names[v]+' : '+units[rev_names[v]]
  245. tmp = f.createVariable(v, df[v].dtype, ('time',))
  246. tmp[:] = df[v].values
  247. tmp.units = units[rev_names[v]]
  248. f.sync()
  249. elif type == 'ADCP':
  250. print 'Not implemented'
  251. raise
  252. elif type == 'LISST':
  253. # Treat volume and diameter df separately
  254. for i,v in enumerate(df[0].keys()):
  255. print 'Variable '+lisst_vol_names[i]+' : '+'um'
  256. tmp = f.createVariable(lisst_vol_names[i], 'float64', ('time',))
  257. if v == 'elev':
  258. tmp[:] = df[0]['elev'].values
  259. tmp.units = 'm'
  260. else:
  261. tmp[:] = df[0][i].values
  262. tmp.units = 'um'
  263. f.sync()
  264. for i,v in enumerate(df[1].keys()):
  265. print 'Variable '+lisst_diams_names[i]+' : '+lisst_diams_units[i]
  266. tmp = f.createVariable(lisst_diams_names[i], 'float64', ('time',))
  267. tmp[:] = df[1][i].values
  268. tmp.units = lisst_diams_units[i]
  269. f.sync()
  270. elif type == 'LISST2':
  271. for i,v in enumerate(df.keys()):
  272. print 'Variable '+v
  273. tmp = f.createVariable(v, 'float64', ('time',))
  274. tmp[:] = df[v].values
  275. tmp.units = lisst_units[v]
  276. f.sync()
  277. else:
  278. print 'Not implemented'
  279. raise
  280. f.close()
  281. #-------------------------------------------------------------------------------
  282. # Main
  283. #-------------------------------------------------------------------------------
  284. if __name__ == "__main__":
  285. cruises = ['spring', 'fall1', 'fall2']
  286. usage = "Usage: %prog raw_matlab_dir etm_cruise ['spring', 'fall1', 'fall2']"
  287. parser = OptionParser(usage=usage)
  288. parser.add_option("-l", action="store_true", help="Process LISST data",
  289. default_value=False, dest=lisst, type="bool")
  290. (options, args) = parser.parse_args()
  291. if len(args) != 2:
  292. parser.error("Incorrect number of args")
  293. if arg[1] not in cruises:
  294. parser.error("That's not a valid ETM cruise.")
  295. elif arg[1] == 'spring':
  296. import etm.spring_consts as consts
  297. elif arg[1] == 'fall1':
  298. import etm.fall_1_consts as consts
  299. else:
  300. import etm.fall_2_consts as consts
  301. if lisst:
  302. lisstMatToNc(arg[0])
  303. else:
  304. cmwpMatToNc(arg[0])