PageRenderTime 67ms CodeModel.GetById 31ms RepoModel.GetById 0ms app.codeStats 0ms

/etmDataInterface.py

https://bitbucket.org/yosoyjay/etm-data
Python | 896 lines | 894 code | 0 blank | 2 comment | 0 complexity | 37479e42c0cf909b810bd1f6a02fbe2e MD5 | raw file
  1. """ Collection of functions and classes to deal with CMWP data
  2. OOW - Implies "out of water"
  3. """
  4. #--------------------------------------------------------------------------------
  5. #Imports
  6. #--------------------------------------------------------------------------------
  7. import netCDF4
  8. import os
  9. import scipy as sp
  10. import numpy as np
  11. import datetime
  12. import pytz
  13. from pandas import Panel, DataFrame, Series
  14. import pandas as pd
  15. from scipy.interpolate import interp1d
  16. import cookb_signalsmooth as cbs
  17. import seawater.csiro as sw
  18. #--------------------------------------------------------------------------------
  19. #Constants
  20. #--------------------------------------------------------------------------------
  21. # MATLAB datenum value for January 1, 1970
  22. #datenumDayEpoch = 719529
  23. # Spring
  24. #instruments = ['CTD', 'ALTIM', 'COMP', 'MOPAK', 'VG1', 'VG2', 'ACC1', 'ACC2',
  25. # 'uT', 'uC', 'OXY', 'ADV', 'ECO', 'LISST']
  26. # Fall
  27. #instruments = ['CTD', 'ALTIM', 'COMP', 'MOPAK', 'VG1', 'VG2', 'ACC1', 'ACC2',
  28. # 'uT', 'uC', 'OXY', 'ADV', 'PUCK', 'FLNTU', 'LISST']
  29. instName = {'CTD': 'CTD',
  30. 'ALTIM': 'Altim',
  31. 'COMP': 'Comp',
  32. 'MOPAK': 'Mopak',
  33. 'VG1': 'VG1',
  34. 'VG2': 'VG2',
  35. 'ACC1': 'ACC1',
  36. 'ACC2': 'ACC2',
  37. 'uT': 'uT',
  38. 'uC': 'uC',
  39. 'OXY': 'Oxygen',
  40. 'ADV': 'ADV',
  41. 'ECO': 'Eco',
  42. 'PUCK': 'Eco',
  43. 'FLNTU': 'FLNTU',
  44. 'LISST': 'LISST',
  45. }
  46. procInst = {'CTD': 'CTD',
  47. 'ALTIM': 'Altim',
  48. 'COMP': 'Comp',
  49. # 'MOPAK': 'Mopak',
  50. # 'OXY': 'Oxygen',
  51. 'ADV': 'ADV',
  52. 'ECO': 'Eco',
  53. # 'PUCK': 'Eco',
  54. # 'FLNTU': 'FLNTU',
  55. }
  56. name = {'temp': 'water_temperature',
  57. 'conductivity': 'water_conductivity',
  58. 'Pressure': 'water_pressure',
  59. 'Salinity': 'water_salinity',
  60. 'range': 'range',
  61. 'heading 1': 'heading1',
  62. 'heading 2': 'heading2',
  63. 'pitch': 'pitch',
  64. 'roll': 'roll',
  65. 'unknown 1': 'unknown1',
  66. 'unknown 2': 'unknown2',
  67. 'VG1': 'VG1',
  68. 'VG2': 'VG2',
  69. 'ACC1': 'ACC1',
  70. 'ACC2': 'ACC2',
  71. 'uT': 'uT',
  72. 'uC': 'uC',
  73. 'o2': 'oxygen',
  74. 'Velocity 1': 'vel1',
  75. 'Velocity 2': 'vel2',
  76. 'Velocity 3': 'vel3',
  77. 'Amplitude 1': 'amp1',
  78. 'Amplitude 2': 'amp2',
  79. 'Amplitude 3': 'amp3',
  80. 'cor1': 'cor1',
  81. 'cor2': 'cor2',
  82. 'cor3': 'cor3',
  83. 'Red': 'red',
  84. 'Blue': 'blue',
  85. 'Chlorophyll': 'cholorophyll',
  86. 'unknown 3': 'unknown2',
  87. 'unknown 6': 'unknown3',
  88. 'time': 'time',
  89. 'angular rate 1': 'ang_rate_1',
  90. 'angular rate 2': 'ang_rate_2',
  91. 'angular rate 3': 'ang_rate_3',
  92. 'tilt 1': 'tilt1',
  93. 'tilt2': 'tilt2',
  94. 'tilt3': 'tilt3',
  95. 'unkown 1': 'unknown1',
  96. 'elev': 'elev',
  97. 'Lambda1': 'lambda1',
  98. 'chlorophyll': 'cholorophyll',
  99. 'Lambda2': 'lambda2',
  100. 'NTU': 'NTU',
  101. 'Thermister': 'thermister',
  102. }
  103. units = {'temp': 'C',
  104. 'conductivity': '?',
  105. 'Pressure': 'dbar',
  106. 'Salinity': 'psu',
  107. 'range': 'm',
  108. 'heading 1': 'degrees',
  109. 'heading 2': 'degrees',
  110. 'pitch': 'degrees',
  111. 'roll': 'degrees',
  112. 'unknown 1': '?',
  113. 'unknown 2': '?',
  114. 'VG1': '?',
  115. 'VG2': '?',
  116. 'ACC1': '?',
  117. 'ACC2': '?',
  118. 'uT': '?',
  119. 'uC': '?',
  120. 'o2': '?',
  121. 'Velocity 1': 'mm/s',
  122. 'Velocity 2': 'mm/s',
  123. 'Velocity 3': 'mm/s',
  124. 'Amplitude 1': 'dB/0.43',
  125. 'Amplitude 2': 'dB/0.43',
  126. 'Amplitude 3': 'dB/0.43',
  127. 'cor1': '?',
  128. 'cor2': '?',
  129. 'cor3': '?',
  130. 'Red': '?',
  131. 'Blue': '?',
  132. 'Chlorophyll': '?',
  133. 'unknown 3': '?',
  134. 'unknown 6': '?',
  135. 'time': 'seconds from 1970-01-01 00:00:00-00',
  136. 'angular rate 1': '?',
  137. 'angular rate 2': '?',
  138. 'angular rate 3': '?',
  139. 'tilt 1': '?',
  140. 'tilt2': '?',
  141. 'tilt3': '?',
  142. 'unkown 1': '?',
  143. 'elev': 'db',
  144. 'Lambda1': '?',
  145. 'chlorophyll': '?',
  146. 'Lambda2': '?',
  147. 'NTU': 'NTU',
  148. 'Thermister': '?',
  149. }
  150. # DataFrame must have equal sized arrays, so skip x/y when loading data
  151. skipKeys = ['x', 'y', 'time']
  152. # Rotation coefficients from John Dunlap's work as found in
  153. # advrot.m from the cmwp source codes.
  154. # These don't appear to do squat.
  155. eulerRotations = {'1': np.array([-0.50, -0.50, 0.50]),
  156. '2': np.array([-0.50, -0.60, 0.50]),
  157. '3': np.array([-0.40, -0.45, 0.70]),
  158. '4': np.array([-0.40, -0.45, 0.70]),
  159. '5': np.array([-0.40, -0.45, 0.70]),
  160. '6': np.array([-0.40, -0.45, 0.70]),
  161. '7': np.array([ 0.36, -0.37, 0.55]),
  162. }
  163. # oc1204 location - Bounces a bit around here over the cruise.
  164. # There is a sift slightly west on decimal day 123, but still quite close
  165. lon = -123.915
  166. lat = 46.235
  167. # Approximate sampling frequency in Hz
  168. freq = {'CTD': 1, # Less than 1
  169. 'ALTIM': 5,
  170. 'COMP': 1,
  171. 'MOPAK': 23,
  172. 'VG1': 479,
  173. 'VG2': 479,
  174. 'ACC1': 479,
  175. 'ACC2': 479,
  176. 'uT': 479,
  177. 'uC': 479,
  178. 'OXY': 479,
  179. 'ADV': 14,
  180. 'ECO': 1,
  181. 'LISST': np.nan, # Not dealt with yet
  182. }
  183. subSample = ['VG1', 'VG2', 'ACC1', 'ACC2', 'uT', 'uC', 'OXY']
  184. # Type C instrument used on cruise - 2.5 to 500 microns
  185. lisstVolSizes = np.array([2.72, 3.20, 3.78, 4.46, 5.27, 6.21, 7.33, 8.65,
  186. 10.2, 12.1, 14.2, 16.8, 19.8, 23.4, 27.6, 32.5,
  187. 38.4, 45.3, 53.5, 63.1, 74.5, 87.9, 104, 122,
  188. 144, 170, 201, 237, 280, 331, 390, 460])
  189. lisstVolNames = np.array(['2.72 um', '3.20 um', '3.78 um', '4.46 um',
  190. '5.27 um', '6.21 um', '7.33 um', '8.65 um',
  191. '10.2 um', '12.1 um', '14.2 um', '16.8 um',
  192. '19.8 um', '23.4 um', '27.6 um', '32.5 um',
  193. '38.4 um', '45.3 um', '53.5 um', '63.1 um',
  194. '74.5 um', '87.9 um', '104 um', '122 um',
  195. '144 um', '170 um', '201 um', '237 um',
  196. '280 um', '331 um', '390 um', '460 um',
  197. 'elev'])
  198. lisstDiamsNames = ['total_volume', 'mean_size', 'std', 'nan',
  199. 'D10', 'D16', 'D50', 'D60', 'D84',
  200. 'D90', 'hazen_uniformity_coefficient',
  201. 'surface_area', 'silt_density',
  202. 'silt_volume']
  203. lisstDiamsUnits = ['ul/l', 'um', 'um', 'NaN',
  204. 'um', 'um', 'um', 'um', 'um',
  205. 'um', 'ratio', 'cm2/l', 'percent',
  206. 'um']
  207. #--------------------------------------------------------------------------------
  208. #Functions
  209. #--------------------------------------------------------------------------------
  210. def formatLisstDataForDataFrame(netCDFData):
  211. """Formats processed LISST-100x data from a netCDF file for loading into
  212. a pandas DataFrame.
  213. """
  214. volumeData = {}
  215. for i in range(32):
  216. volumeData[i] = netCDFData.variables['vol'][:,i]
  217. time = netCDFData.variables['time'][:]
  218. if isinstance(time[0], np.float64) and time[0] > 1000000:
  219. time = pd.DatetimeIndex(epochToDatetime(time))
  220. else:
  221. time = pd.DatetimeIndex(datenumToDatetime(time)).tz_localize('UTC')
  222. vol = DataFrame(volumeData, index=time)
  223. diamData = {}
  224. for i in range(14):
  225. diamData[i] = netCDFData.variables['diams'][:,i]
  226. diams = DataFrame(diamData, index=time)
  227. return (vol, diams)
  228. def formatDataForDataFrame(netCDFData):
  229. """ Formats data from a netCDF file for loading into a pandas DataFrame """
  230. keys = netCDFData.variables.keys()
  231. data = {}
  232. for k in keys:
  233. if k in skipKeys: continue
  234. data[k] = netCDFData.variables[k][:]
  235. time = netCDFData.variables['time'][:]
  236. if isinstance(time[0], np.float64) and time[0] > 1000000:
  237. time = pd.DatetimeIndex(epochToDatetime(time))
  238. else:
  239. time = pd.DatetimeIndex(datenumToDatetime(time)).tz_localize('UTC')
  240. #time = pd.DatetimeIndex(datenumToDatetime(time))
  241. df = DataFrame(data, index=time)
  242. return df
  243. #TODO: Format like LISST
  244. def formatADCPForDataFrame(adcpNetCDFData):
  245. """ Formats ADCP data from wh300.nc for loading into a pandas Panel
  246. It seems you can't place DataFrames with different dimensions into a single
  247. Panel, so this returns a dictionary df with a:
  248. - DataFrame of 1d data
  249. - Panel of 2d data
  250. Parameters
  251. ----------
  252. adcdpNetCDFData - netCDF object with ADCP data
  253. returns
  254. -------
  255. df - Dict of 1d in DataFrame and 2d data in Panel
  256. """
  257. keys = adcpNetCDFData.variables.keys()
  258. # 'trajectory' is empty so I'm throwing it out
  259. if keys.count('trajectory') > 0:
  260. keys.remove('trajectory')
  261. dim1 = {}
  262. dim2 = {}
  263. for k in keys:
  264. if len(adcpNetCDFData.variables[k].shape) == 2:
  265. dim2[k] = adcpNetCDFData.variables[k][:]
  266. else:
  267. dim1[k] = adcpNetCDFData.variables[k][:]
  268. df = {}
  269. df['1d'] = DataFrame(dim1)
  270. df['2d'] = Panel(dim2)
  271. return df
  272. def loadNetCDF(file, mode='r') :
  273. """ Loads netCDF file of data and returns DataFrame of all vars"""
  274. data = netCDF4.Dataset(file, mode)
  275. if 'LISST' in file:
  276. print 'Loading LISST data'
  277. if len(data.variables.keys()) == 4:
  278. dfData = formatLisstDataForDataFrame(data)
  279. else:
  280. dfData = formatDataForDataFrame(data)
  281. else:
  282. dfData = formatDataForDataFrame(data)
  283. data.close()
  284. return dfData
  285. #-------------------------------------------------------------------------------
  286. # Time functions
  287. #-------------------------------------------------------------------------------
  288. def epochToDatetime(epoch):
  289. """Converts UNIX epoch to python datetime
  290. """
  291. if isinstance(epoch, float):
  292. #return datetime.datetime.fromtimestamp(epoch, tz=pytz.utc)
  293. return datetime.datetime.fromtimestamp(epoch)
  294. if isinstance(epoch, np.ndarray):
  295. f = lambda x: datetime.datetime.fromtimestamp(x)
  296. vf = np.vectorize(f)
  297. return vf(epoch)
  298. def datetime64ToEpoch(dt64):
  299. """Converts np.datetime64 to Unix epoch
  300. 1. Need to add 7 hours to get it to work correctly
  301. """
  302. if isinstance(dt64, np.datetime64):
  303. if dt64.dtype == '<M8[ns]':
  304. return (dt64+np.timedelta64(7,'h')).astype('float64')/1e9
  305. if isinstance(dt64, np.ndarray):
  306. if dt64[0].dtype == '<M8[ns]':
  307. f = lambda x: int(x)/1e9+25200
  308. vf = np.vectorize(f)
  309. return vf(dt64)
  310. print 'Type not supported %s' % type(dt64)
  311. def datenumToDatetime(datenum):
  312. """Converts MATLAB datenum to python datetime
  313. Required steps:
  314. 1. datetime and datenum differ by 366 days
  315. """
  316. if isinstance(datenum, int) or isinstance(datenum, float):
  317. return (datetime.datetime.fromordinal(int(datenum)) +
  318. datetime.timedelta(days=datenum%1) -
  319. datetime.timedelta(days=366))
  320. if isinstance(datenum, np.ndarray):
  321. f = lambda x: (datetime.datetime.fromordinal(int(x)) +
  322. datetime.timedelta(days=x%1) -
  323. datetime.timedelta(days=366))
  324. vf = np.vectorize(f)
  325. return vf(datenum)
  326. def datetimeToEpoch(dt):
  327. """ Converts datetime to UNIX epoch
  328. Must add 7 hours to this to get epoch seconds to work correctly.
  329. """
  330. if isinstance(dt, datetime.datetime):
  331. return (dt - datetime.datetime(1970, 1, 1) +
  332. datetime.timedelta(hours=7)).total_seconds()
  333. if isinstance(dt, np.ndarray):
  334. f = lambda x: (x - datetime.datetime(1970, 1, 1) +
  335. datetime.timedelta(hours=7)).total_seconds()
  336. vf = np.vectorize(f)
  337. return vf(dt)
  338. print 'Type not support %s' % type(dt)
  339. def doyToDatetime(doy, year):
  340. """ Converts day of year to datetime format.
  341. """
  342. if isinstance(doy, float):
  343. return datetime.datetime(year,1,1) + datetime.timedelta(doy)
  344. if isinstance(doy, np.ndarray):
  345. f = lambda x: datetime.datetime(year,1,1) + datetime.timedelta(x)
  346. vf = np.vectorize(f)
  347. return vf(doy)
  348. #-------------------------------------------------------------------------------
  349. # interp functions
  350. #-------------------------------------------------------------------------------
  351. def interpVars(dfData1, var1, dfData2, var2):
  352. """ Interpolates data of var1 onto the time of var2 using simple
  353. linear interpolation.
  354. Parmeters
  355. ---------
  356. dfData1 -- DataFrame object that contains var1
  357. var1 -- Key into DataFrame of data for variable
  358. dfData2 -- DataFrame object that contains var2
  359. var2 -- Key into DataFrame of data for variable
  360. Returns
  361. -------
  362. tsData -- Series object with interpolated data
  363. """
  364. #idx = idTimeBounds(dfData1, dfData1)
  365. # Hand coded for development of ADV data for first day
  366. # Adjust to use indentified indices
  367. f = interp1d(dfData1['time'][:], dfData1[var1][:])
  368. tsData = f(dfData2['time'][1:])
  369. return tsData
  370. def interpDFtoCTD(ctd, df):
  371. """Interpolates all data in DataFrame object to CTD time.
  372. """
  373. for k in df.keys():
  374. f = interp1d(df['time'][:], df[k][:])
  375. df[k][:] = f(ctd['time'][1:])
  376. return df
  377. #-------------------------------------------------------------------------------
  378. # Other calculation functions
  379. #-------------------------------------------------------------------------------
  380. def calcBuoyFreq(ctd, rho='potential'):
  381. """Calculates the buoyancy frequency (Brunt-Vaisala) using CTD data frame.
  382. Parameters
  383. ----------
  384. ctd - CTD DataFrame object
  385. rho - 'potential' = rho_theta or 'average' uses mean of rho
  386. Returns
  387. -------
  388. N - Buoyancy frequency
  389. Notes
  390. -----
  391. rho:
  392. 'potential':
  393. .. math::
  394. N = \sqrt{-\frac{g}{\rho_theta}\frac{d\rho_theta}{dz}}
  395. rho_theta - Potential density calculated via seawater package
  396. 'average':
  397. .. math::
  398. N = \sqrt{-\frac{g}{\rho_mean}\frac{d\rho}{dz}}
  399. rho_mean - Density average over file
  400. References
  401. ----------
  402. Petrie, J, et. al (2010). Local boundary shear stress estimates from velocity
  403. profiles with an ADCP. River Flow 2010. ISBN 978-3-939230-00-7
  404. Mikkelsen, O, et. al (2008). The influence of schlieren on in situ optical
  405. measurements for particle characterization. Limnology and Oceanography:
  406. Methods. Vol. 6. 133-143.
  407. """
  408. if rho=='potential':
  409. rho_theta = sw.pden(ctd.water_salinity, ctd.water_temperature, ctd.water_pressure)
  410. drho = rho_theta.diff()
  411. dz = ctd.water_pressure.diff()
  412. g = -9.8
  413. N = np.sqrt(-g/rho_theta*drho/dz)
  414. return N
  415. elif rho=='average':
  416. rho = sw.dens(ctd.water_salinity, ctd.water_temperature, ctd.water_pressure)
  417. rho_mean = np.mean(rho)
  418. drho = rho.diff()
  419. dz = ctd.water_pressure.diff()
  420. g = -9.8
  421. bf = np.sqrt(-g/rho_mean*drho/dz)
  422. return N
  423. else:
  424. print 'Method not recognized'
  425. raise
  426. def calcPercentGoodLisst(bf, criteria=0.025):
  427. """Calculates the percent of good LISST data based on N (buoyancy frequency)
  428. """
  429. bad_value = bf > criteria
  430. bad_nan = np.isnan(bf)
  431. bad = np.logical_or(bad_value, bad_nan)
  432. return 1 - float(np.sum(bad))/bf.size
  433. def addDepth(df, ctd):
  434. """Returns df with depth added based on pressure measurement interpolated to
  435. same time as instrument.
  436. """
  437. # Fresh water correction for decibar to meters from Sea-Bird
  438. depth = ctd['Pressure']*1.019716
  439. df['elev'] = depth.values[:]
  440. return df
  441. def calcSeawaterDepth(lat, pres):
  442. """ Calculates the gravity variation with latitude and presure, then
  443. calculates the depth when ocean water column is assumed to be O C and 35 PSU.
  444. From Sea-Bird application notes:
  445. www.seabird.com/application_notes/AN69.htm
  446. Parameters
  447. ----------
  448. lat - Float of latitude (decimal)
  449. pres - Float of pressure reading (decibars)
  450. returns
  451. -------
  452. depth - Numpy array of depths (meters)
  453. """
  454. x = np.square(np.sin(lat/57.29578))
  455. g = 9.780318*(1.0 + (5.2788*1e-3 + 2.36*10e-5*x)*x) + 1.092*10e-6*pres
  456. d = ((((-1.82e-15*pres + 2.279*10e-10)*pres - 2.2512*10e-5)*pres +
  457. 9.72659)*pres)/g
  458. return d
  459. def calcFreshwaterDepth(pres) :
  460. """ Calculates depth from pressure for freshwater application when high
  461. precision is not too critical.
  462. From Sea-Bird application notes:
  463. www.seabird.com/application_notes/AN69.htm
  464. Parameters
  465. ----------
  466. pres -- Float or ndarray of pressure reading (decibar)
  467. returns
  468. -------
  469. depth -- Float or ndarray depth (meters)
  470. """
  471. return pres*1.019716
  472. def smoothAltim(alt, window, percent):
  473. """ Remove spikes from data based on percent of average of last n values
  474. This method is totatally based on heruistics, but effective with following
  475. Parameters:
  476. window = 3
  477. percent = 20
  478. 1. Apply median filter of size 3
  479. 2. Remove spikes defined as > percent of last 5 points mean and replace
  480. with average of last 5
  481. 3. Go through one more time and remove any spikes using same policy
  482. 4. Smooth using size 5 moving average
  483. """
  484. alt = sp.signal.medfilt(alt,3)
  485. end = window/2
  486. for i in np.arange(end, alt.shape[0]-(end+1)):
  487. avg = np.mean(alt[i-end:i+end+1])
  488. #print i, alt[i], avg
  489. if np.abs(alt[i]-avg)/avg > percent*0.01:
  490. print 'Removing value %d and replacing with average' % i
  491. alt[i] = avg
  492. for i in np.arange(0, alt.shape[0]-1):
  493. if np.abs(alt[i] - alt[i+1])/alt[i] > percent*0.01:
  494. print 'Removing value %d' % i
  495. alt[i] = avg
  496. alt = cbs.smooth(alt, window_len=5, window='flat')
  497. return alt
  498. def calcDepth(altimDF, ctdDF):
  499. """ Calculates the depth of the watercolumn using altimetery data and the
  500. pressure from the CTD.
  501. """
  502. sAltim = smoothAltim(altimDF, 3, 20)
  503. f = interp1d(altimDF['time'][::6], sAltim[::6])
  504. sAltim = f(ctdDF['time'])
  505. depth = sAltim + ctdDF['Pressure'][:]
  506. return depth
  507. def idContiguousIndices(dfIndex):
  508. """ Identifies contiguous sets of indices.
  509. Parameters
  510. ----------
  511. dfIndex - df.index
  512. returns
  513. -------
  514. indexSet - List of tuples with beginning and ending index of contiguous
  515. """
  516. indexSet = []
  517. start = dfIndex[0]
  518. last = dfIndex[0]
  519. end = dfIndex[-1]
  520. for idx in range(1, dfIndex.shape[0]-1):
  521. if (dfIndex[idx+1] - dfIndex[idx]) > datetime.timedelta(0, 60):
  522. print 'Making set ', start, last
  523. indexSet.append([start, last])
  524. start = dfIndex[idx]
  525. last = dfIndex[idx]
  526. else:
  527. last = dfIndex[idx]
  528. #print idx, dfIndex[idx], start, last
  529. if last == end:
  530. print 'Making set ', start, last
  531. indexSet.append([start, last])
  532. if start == dfIndex[0]:
  533. print 'A single contiguous set'
  534. indexSet.append([dfIndex[0], dfIndex[-1]])
  535. return indexSet
  536. def calcOutOfWater(ctdDF):
  537. """ Identifies time ranges when the profiler is out of the water.
  538. Consider any pressure < 0.1 to be out of water to provide buffer for bad
  539. data.
  540. OOW - Out of water
  541. a
  542. Parameters
  543. ----------
  544. ctdDF - DataFrame of ctd instrument data
  545. returns
  546. -------
  547. times - List of times of when the profiler is out of the water.
  548. """
  549. OOWidx = ctdDF['Pressure'][ctdDF['Pressure'] < 0.05 ]
  550. OOWsets = idContiguousIndices(OOWidx.index)
  551. times = []
  552. for set in OOWsets:
  553. times.append([ctdDF['time'][set[0]], ctdDF['time'][set[1]]])
  554. return times
  555. def filterOOW(times, df):
  556. """ Changes values from instruments to np.nan for times identified as being
  557. out of the water [Changes are inplace].
  558. Note:
  559. Some DataFrame objects are typecast to float64 because int arrays cannot hold
  560. np.nan.
  561. Parameters
  562. ----------
  563. times - List of start and end times
  564. df - DataFrame of instrument data
  565. returns
  566. df - DataFrame of instrument data with values removed
  567. """
  568. df = df.apply(np.float64)
  569. for t in times:
  570. low = df['time'] > t[0]
  571. high = df['time'] < t[1]
  572. idx = df['time'][low & high]
  573. # Do not change time to np.nan
  574. timeIdx = np.where(df.columns == 'time')[0][0]
  575. print 'Removing %d values when the WP was out of the water' % idx.shape
  576. df.ix[idx.index,:timeIdx] = np.nan
  577. df.ix[idx.index,(timeIdx+1):] = np.nan
  578. return df
  579. def processData(path, dirBase='oc1204B.0.F.', opt_insts=None):
  580. """ Processes stepA of all data (except LISST) for all days
  581. """
  582. days = np.arange(1,8)
  583. if opt_insts != None:
  584. insts = opt_insts
  585. else:
  586. insts = procInst
  587. for inst in insts:
  588. if not os.path.exists(path+dirBase+instName[inst]):
  589. print 'Making dir '+path+dirBase+instName[inst]
  590. os.mkdir(path+dirBase+instName[inst])
  591. for d in days:
  592. print 'Processing day %s' % d
  593. procStepA(d, inPath=path)
  594. def procStepA(day, inPath='./', outPath='./', type='Normal', ship='oc1204B'):
  595. """ Begins processing of Spring ETM cruise data.
  596. Step A:
  597. 1. Subsample to 2s intervals to make size a bit more managable
  598. 2. Add lat, long, and depth
  599. 3. Saves changes to oc1204B.0.F.INSTRUMENT/dayDAY.nc
  600. Parameters
  601. ----------
  602. day - Int of day of cruise to process
  603. inPath - String of path to input files (where cmwp_day?_*?.nc exist
  604. outPath - String of path to place output files
  605. """
  606. if type == 'Normal':
  607. print 'Processing normal data'
  608. f = os.path.join(inPath, 'cmwp_day%d_CTD.nc' %day)
  609. ctd = loadNetCDF(f).resample('2s')
  610. #oowTimes = calcOutOfWater(ctd)
  611. for inst in procInst:
  612. if inst == 'LISST':
  613. continue
  614. _open_and_save_file(day, inst, ctd, inPath, outPath, ship)
  615. elif type == 'LISST':
  616. print 'Processing LISST data'
  617. f = os.path.join(inPath, 'cmwp_day%d_CTD.nc' %day)
  618. ctd = loadNetCDF(f).resample('2s')
  619. _open_and_save_file(day, 'LISST_PROC', ctd, inPath, outPath, ship)
  620. # Used to clear out memory after return
  621. def _open_and_save_file(day, inst, ctd, inPath, outPath, ship):
  622. # Open old file
  623. f = 'cmwp_day%d_%s.nc' % (day, inst)
  624. f = os.path.join(inPath, f)
  625. print 'Opening %s' % f
  626. data = loadNetCDF(f, mode='r')
  627. if inst == 'LISST_PROC':
  628. d1 = data[0].resample('2s').reindex(ctd.index)
  629. d2 = data[1].resample('2s').reindex(ctd.index)
  630. d1 = addDepth(d1, ctd)
  631. data = (d1, d2)
  632. ship = ship+'.0.F.'+'LISST'
  633. type = 'LISST'
  634. else:
  635. if inst in subSample:
  636. data = data[::100].resample('2s').reindex(ctd.index)
  637. else:
  638. data = data.resample('2s').reindex(ctd.index)
  639. data = addDepth(data, ctd)
  640. ship = ship+'.0.F.'+instName[inst]
  641. type = 'Normal'
  642. # Save new netCDF file
  643. fDir = os.path.join(outPath, ship)
  644. dayFile = 'day%d.nc' % day
  645. f = os.path.join(fDir, dayFile)
  646. # f = dayFile
  647. saveNetCDF(f, data, type=type)
  648. def calcVerticalBin(df):
  649. """Calculates the size of the vertical "bin," or the vertical distanced
  650. traveled by the winched profiler between measurements.
  651. Charles recommended a "bin" size of 20cm.
  652. Parameters
  653. ----------
  654. df - DataFrame object of WP data
  655. Returns
  656. -------
  657. """
  658. dz = df.pressure.diff()
  659. # Returns microseconds, so change it to seconds
  660. dt = df.index.values.diff()
  661. dt = dt/1e9
  662. return dz/dt
  663. def medFilter(a):
  664. """ Calculates mean of readings before and after, if current is greater than
  665. or less than mean+10% then it is replaced with mean.
  666. """
  667. b = a.copy
  668. for i in xrange(b.shape[0]):
  669. m = (b[i-1] + b[1+1])/2.0
  670. if np.abs(b[i] - m)/m > 0.1:
  671. b[i] = m
  672. return b
  673. def saveNetCDF(filePath, df, type='Normal', cache=True):
  674. """ Saves data from an instrument (DataFrame) into a netCDF4 file.
  675. Also adds:
  676. lat, lon, and global description (offering)
  677. Parameters
  678. ----------
  679. filePath - String of path for file (Assumes inclusion of dir structure for description)
  680. df - DateFrame object of data to save to file
  681. type - String like origin of data
  682. - 'Normal' not from ADCP or LISST
  683. - 'ADCP'
  684. - 'LISST'
  685. cache -
  686. True: Adheres to netCDF like cache path structure
  687. e.g. oceanus1204c.0.F.LISST/day8.nc
  688. """
  689. if (type != 'Normal') and (type != 'ADCP') and (type != 'LISST'):
  690. print 'Data type not supported '+type
  691. raise
  692. print 'Creating netCDF file '+filePath
  693. f = netCDF4.Dataset(filePath, 'w', format='NETCDF4', zlib=True)
  694. if cache:
  695. f.description=filePath[:-8]
  696. else:
  697. f.description=filePath
  698. if type == 'Normal':
  699. f.createDimension('time', df.shape[0])
  700. times = datetime64ToEpoch(df.index.values)
  701. else:
  702. f.createDimension('time', df[0].shape[0])
  703. times = datetime64ToEpoch(df[0].index.values)
  704. f.createDimension('x', 1)
  705. f.createDimension('y', 1)
  706. tmpVar = f.createVariable('time', 'float64', ('time',))
  707. tmpVar[:] = times
  708. tmpVar.units = units['time']
  709. tmpVar = f.createVariable('x', 'float', ('x',))
  710. tmpVar[:] = lon
  711. tmpVar.units = 'latlon'
  712. tmpVar = f.createVariable('y', 'float', ('y',))
  713. tmpVar[:] = lat
  714. tmpVar.units = 'latlon'
  715. # Treat Data Origin
  716. if type == 'Normal':
  717. for i,v in enumerate(df.keys()):
  718. print 'Variable '+name[v]+' : '+units[v]
  719. tmpVar = f.createVariable(name[v], df[v].dtype, ('time',))
  720. tmpVar[:] = df[v].values
  721. tmpVar.units = units[v]
  722. f.sync()
  723. elif type == 'ADCP':
  724. print 'Not implemented'
  725. raise
  726. elif type == 'LISST':
  727. # Treat volume and diameter df separately
  728. for i,v in enumerate(df[0].keys()):
  729. print 'Variable '+lisstVolNames[i]+' : '+'um'
  730. tmpVar = f.createVariable(lisstVolNames[i], 'float64', ('time',))
  731. if v == 'elev':
  732. tmpVar[:] = df[0]['elev'].values
  733. tmpVar.units = 'm'
  734. else:
  735. tmpVar[:] = df[0][i].values
  736. tmpVar.units = 'um'
  737. f.sync()
  738. for i,v in enumerate(df[1].keys()):
  739. print 'Variable '+lisstDiamsNames[i]+' : '+lisstDiamsUnits[i]
  740. tmpVar = f.createVariable(lisstDiamsNames[i], 'float64', ('time',))
  741. tmpVar[:] = df[1][i].values
  742. tmpVar.units = lisstDiamsUnits[i]
  743. f.sync()
  744. f.close()
  745. def applyEulerRotation(euler) :
  746. """ Applies Euler rotation to ADV velocity data as described by eulerAngles as
  747. implemented by John Dunlap in cmwp_src codes.
  748. Parameters
  749. ----------
  750. adv - DataFrame of adv data
  751. euler- An ndarray (3,1) of Euler angles to apply to adv data
  752. returns
  753. -------
  754. r - Rotation matrix
  755. """
  756. # Rotation about x-axis 3rd
  757. R1 = np.array([[1, 0, 0],
  758. [0, np.cos(euler[0]), np.sin(euler[0])],
  759. [0, -np.sin(euler[0]), np.cos(euler[0])]])
  760. # Rotation about y-axis 2nd
  761. R2 = np.array([[np.cos(euler[1]), 0, -np.sin(euler[1])],
  762. [0, 1, 0],
  763. [np.sin(euler[1]), 0, np.cos(euler[1])]])
  764. # Rotation about z-axis 1st
  765. R3 = np.array([[np.cos(euler[2]), np.sin(euler[2]), 0],
  766. [-np.sin(euler[2]), np.cos(euler[2]), 0],
  767. [0, 0, 1]])
  768. R = R1 * R2 * R3
  769. return R, R1, R2, R3
  770. def idTimeBounds(dfData1, dfData2) :
  771. """ Identifies indices into var1 to begin interpolation
  772. to avoid any extrapolation.
  773. """
  774. idx = dfData2['time'] > dfData1['time']
  775. # Find first True
  776. # Find last time in dfData2 < dfData1
  777. # return these two indices