/etmDataInterface.py
Python | 896 lines | 894 code | 0 blank | 2 comment | 0 complexity | 37479e42c0cf909b810bd1f6a02fbe2e MD5 | raw file
- """ Collection of functions and classes to deal with CMWP data
- OOW - Implies "out of water"
- """
- #--------------------------------------------------------------------------------
- #Imports
- #--------------------------------------------------------------------------------
- import netCDF4
- import os
- import scipy as sp
- import numpy as np
- import datetime
- import pytz
- from pandas import Panel, DataFrame, Series
- import pandas as pd
- from scipy.interpolate import interp1d
- import cookb_signalsmooth as cbs
- import seawater.csiro as sw
- #--------------------------------------------------------------------------------
- #Constants
- #--------------------------------------------------------------------------------
- # MATLAB datenum value for January 1, 1970
- #datenumDayEpoch = 719529
- # Spring
- #instruments = ['CTD', 'ALTIM', 'COMP', 'MOPAK', 'VG1', 'VG2', 'ACC1', 'ACC2',
- # 'uT', 'uC', 'OXY', 'ADV', 'ECO', 'LISST']
- # Fall
- #instruments = ['CTD', 'ALTIM', 'COMP', 'MOPAK', 'VG1', 'VG2', 'ACC1', 'ACC2',
- # 'uT', 'uC', 'OXY', 'ADV', 'PUCK', 'FLNTU', 'LISST']
- instName = {'CTD': 'CTD',
- 'ALTIM': 'Altim',
- 'COMP': 'Comp',
- 'MOPAK': 'Mopak',
- 'VG1': 'VG1',
- 'VG2': 'VG2',
- 'ACC1': 'ACC1',
- 'ACC2': 'ACC2',
- 'uT': 'uT',
- 'uC': 'uC',
- 'OXY': 'Oxygen',
- 'ADV': 'ADV',
- 'ECO': 'Eco',
- 'PUCK': 'Eco',
- 'FLNTU': 'FLNTU',
- 'LISST': 'LISST',
- }
- procInst = {'CTD': 'CTD',
- 'ALTIM': 'Altim',
- 'COMP': 'Comp',
- # 'MOPAK': 'Mopak',
- # 'OXY': 'Oxygen',
- 'ADV': 'ADV',
- 'ECO': 'Eco',
- # 'PUCK': 'Eco',
- # 'FLNTU': 'FLNTU',
- }
- name = {'temp': 'water_temperature',
- 'conductivity': 'water_conductivity',
- 'Pressure': 'water_pressure',
- 'Salinity': 'water_salinity',
- 'range': 'range',
- 'heading 1': 'heading1',
- 'heading 2': 'heading2',
- 'pitch': 'pitch',
- 'roll': 'roll',
- 'unknown 1': 'unknown1',
- 'unknown 2': 'unknown2',
- 'VG1': 'VG1',
- 'VG2': 'VG2',
- 'ACC1': 'ACC1',
- 'ACC2': 'ACC2',
- 'uT': 'uT',
- 'uC': 'uC',
- 'o2': 'oxygen',
- 'Velocity 1': 'vel1',
- 'Velocity 2': 'vel2',
- 'Velocity 3': 'vel3',
- 'Amplitude 1': 'amp1',
- 'Amplitude 2': 'amp2',
- 'Amplitude 3': 'amp3',
- 'cor1': 'cor1',
- 'cor2': 'cor2',
- 'cor3': 'cor3',
- 'Red': 'red',
- 'Blue': 'blue',
- 'Chlorophyll': 'cholorophyll',
- 'unknown 3': 'unknown2',
- 'unknown 6': 'unknown3',
- 'time': 'time',
- 'angular rate 1': 'ang_rate_1',
- 'angular rate 2': 'ang_rate_2',
- 'angular rate 3': 'ang_rate_3',
- 'tilt 1': 'tilt1',
- 'tilt2': 'tilt2',
- 'tilt3': 'tilt3',
- 'unkown 1': 'unknown1',
- 'elev': 'elev',
- 'Lambda1': 'lambda1',
- 'chlorophyll': 'cholorophyll',
- 'Lambda2': 'lambda2',
- 'NTU': 'NTU',
- 'Thermister': 'thermister',
- }
- units = {'temp': 'C',
- 'conductivity': '?',
- 'Pressure': 'dbar',
- 'Salinity': 'psu',
- 'range': 'm',
- 'heading 1': 'degrees',
- 'heading 2': 'degrees',
- 'pitch': 'degrees',
- 'roll': 'degrees',
- 'unknown 1': '?',
- 'unknown 2': '?',
- 'VG1': '?',
- 'VG2': '?',
- 'ACC1': '?',
- 'ACC2': '?',
- 'uT': '?',
- 'uC': '?',
- 'o2': '?',
- 'Velocity 1': 'mm/s',
- 'Velocity 2': 'mm/s',
- 'Velocity 3': 'mm/s',
- 'Amplitude 1': 'dB/0.43',
- 'Amplitude 2': 'dB/0.43',
- 'Amplitude 3': 'dB/0.43',
- 'cor1': '?',
- 'cor2': '?',
- 'cor3': '?',
- 'Red': '?',
- 'Blue': '?',
- 'Chlorophyll': '?',
- 'unknown 3': '?',
- 'unknown 6': '?',
- 'time': 'seconds from 1970-01-01 00:00:00-00',
- 'angular rate 1': '?',
- 'angular rate 2': '?',
- 'angular rate 3': '?',
- 'tilt 1': '?',
- 'tilt2': '?',
- 'tilt3': '?',
- 'unkown 1': '?',
- 'elev': 'db',
- 'Lambda1': '?',
- 'chlorophyll': '?',
- 'Lambda2': '?',
- 'NTU': 'NTU',
- 'Thermister': '?',
- }
- # DataFrame must have equal sized arrays, so skip x/y when loading data
- skipKeys = ['x', 'y', 'time']
- # Rotation coefficients from John Dunlap's work as found in
- # advrot.m from the cmwp source codes.
- # These don't appear to do squat.
- eulerRotations = {'1': np.array([-0.50, -0.50, 0.50]),
- '2': np.array([-0.50, -0.60, 0.50]),
- '3': np.array([-0.40, -0.45, 0.70]),
- '4': np.array([-0.40, -0.45, 0.70]),
- '5': np.array([-0.40, -0.45, 0.70]),
- '6': np.array([-0.40, -0.45, 0.70]),
- '7': np.array([ 0.36, -0.37, 0.55]),
- }
- # oc1204 location - Bounces a bit around here over the cruise.
- # There is a sift slightly west on decimal day 123, but still quite close
- lon = -123.915
- lat = 46.235
- # Approximate sampling frequency in Hz
- freq = {'CTD': 1, # Less than 1
- 'ALTIM': 5,
- 'COMP': 1,
- 'MOPAK': 23,
- 'VG1': 479,
- 'VG2': 479,
- 'ACC1': 479,
- 'ACC2': 479,
- 'uT': 479,
- 'uC': 479,
- 'OXY': 479,
- 'ADV': 14,
- 'ECO': 1,
- 'LISST': np.nan, # Not dealt with yet
- }
- subSample = ['VG1', 'VG2', 'ACC1', 'ACC2', 'uT', 'uC', 'OXY']
- # Type C instrument used on cruise - 2.5 to 500 microns
- lisstVolSizes = np.array([2.72, 3.20, 3.78, 4.46, 5.27, 6.21, 7.33, 8.65,
- 10.2, 12.1, 14.2, 16.8, 19.8, 23.4, 27.6, 32.5,
- 38.4, 45.3, 53.5, 63.1, 74.5, 87.9, 104, 122,
- 144, 170, 201, 237, 280, 331, 390, 460])
- lisstVolNames = np.array(['2.72 um', '3.20 um', '3.78 um', '4.46 um',
- '5.27 um', '6.21 um', '7.33 um', '8.65 um',
- '10.2 um', '12.1 um', '14.2 um', '16.8 um',
- '19.8 um', '23.4 um', '27.6 um', '32.5 um',
- '38.4 um', '45.3 um', '53.5 um', '63.1 um',
- '74.5 um', '87.9 um', '104 um', '122 um',
- '144 um', '170 um', '201 um', '237 um',
- '280 um', '331 um', '390 um', '460 um',
- 'elev'])
- lisstDiamsNames = ['total_volume', 'mean_size', 'std', 'nan',
- 'D10', 'D16', 'D50', 'D60', 'D84',
- 'D90', 'hazen_uniformity_coefficient',
- 'surface_area', 'silt_density',
- 'silt_volume']
- lisstDiamsUnits = ['ul/l', 'um', 'um', 'NaN',
- 'um', 'um', 'um', 'um', 'um',
- 'um', 'ratio', 'cm2/l', 'percent',
- 'um']
- #--------------------------------------------------------------------------------
- #Functions
- #--------------------------------------------------------------------------------
- def formatLisstDataForDataFrame(netCDFData):
- """Formats processed LISST-100x data from a netCDF file for loading into
- a pandas DataFrame.
- """
- volumeData = {}
- for i in range(32):
- volumeData[i] = netCDFData.variables['vol'][:,i]
- time = netCDFData.variables['time'][:]
- if isinstance(time[0], np.float64) and time[0] > 1000000:
- time = pd.DatetimeIndex(epochToDatetime(time))
- else:
- time = pd.DatetimeIndex(datenumToDatetime(time)).tz_localize('UTC')
- vol = DataFrame(volumeData, index=time)
- diamData = {}
- for i in range(14):
- diamData[i] = netCDFData.variables['diams'][:,i]
- diams = DataFrame(diamData, index=time)
- return (vol, diams)
- def formatDataForDataFrame(netCDFData):
- """ Formats data from a netCDF file for loading into a pandas DataFrame """
- keys = netCDFData.variables.keys()
- data = {}
- for k in keys:
- if k in skipKeys: continue
- data[k] = netCDFData.variables[k][:]
- time = netCDFData.variables['time'][:]
- if isinstance(time[0], np.float64) and time[0] > 1000000:
- time = pd.DatetimeIndex(epochToDatetime(time))
- else:
- time = pd.DatetimeIndex(datenumToDatetime(time)).tz_localize('UTC')
- #time = pd.DatetimeIndex(datenumToDatetime(time))
- df = DataFrame(data, index=time)
- return df
- #TODO: Format like LISST
- def formatADCPForDataFrame(adcpNetCDFData):
- """ Formats ADCP data from wh300.nc for loading into a pandas Panel
- It seems you can't place DataFrames with different dimensions into a single
- Panel, so this returns a dictionary df with a:
- - DataFrame of 1d data
- - Panel of 2d data
- Parameters
- ----------
- adcdpNetCDFData - netCDF object with ADCP data
- returns
- -------
- df - Dict of 1d in DataFrame and 2d data in Panel
- """
- keys = adcpNetCDFData.variables.keys()
- # 'trajectory' is empty so I'm throwing it out
- if keys.count('trajectory') > 0:
- keys.remove('trajectory')
- dim1 = {}
- dim2 = {}
- for k in keys:
- if len(adcpNetCDFData.variables[k].shape) == 2:
- dim2[k] = adcpNetCDFData.variables[k][:]
- else:
- dim1[k] = adcpNetCDFData.variables[k][:]
- df = {}
- df['1d'] = DataFrame(dim1)
- df['2d'] = Panel(dim2)
- return df
- def loadNetCDF(file, mode='r') :
- """ Loads netCDF file of data and returns DataFrame of all vars"""
- data = netCDF4.Dataset(file, mode)
- if 'LISST' in file:
- print 'Loading LISST data'
- if len(data.variables.keys()) == 4:
- dfData = formatLisstDataForDataFrame(data)
- else:
- dfData = formatDataForDataFrame(data)
- else:
- dfData = formatDataForDataFrame(data)
- data.close()
- return dfData
- #-------------------------------------------------------------------------------
- # Time functions
- #-------------------------------------------------------------------------------
- def epochToDatetime(epoch):
- """Converts UNIX epoch to python datetime
- """
- if isinstance(epoch, float):
- #return datetime.datetime.fromtimestamp(epoch, tz=pytz.utc)
- return datetime.datetime.fromtimestamp(epoch)
-
- if isinstance(epoch, np.ndarray):
- f = lambda x: datetime.datetime.fromtimestamp(x)
- vf = np.vectorize(f)
- return vf(epoch)
- def datetime64ToEpoch(dt64):
- """Converts np.datetime64 to Unix epoch
- 1. Need to add 7 hours to get it to work correctly
- """
- if isinstance(dt64, np.datetime64):
- if dt64.dtype == '<M8[ns]':
- return (dt64+np.timedelta64(7,'h')).astype('float64')/1e9
- if isinstance(dt64, np.ndarray):
- if dt64[0].dtype == '<M8[ns]':
- f = lambda x: int(x)/1e9+25200
- vf = np.vectorize(f)
- return vf(dt64)
- print 'Type not supported %s' % type(dt64)
- def datenumToDatetime(datenum):
- """Converts MATLAB datenum to python datetime
- Required steps:
- 1. datetime and datenum differ by 366 days
- """
- if isinstance(datenum, int) or isinstance(datenum, float):
- return (datetime.datetime.fromordinal(int(datenum)) +
- datetime.timedelta(days=datenum%1) -
- datetime.timedelta(days=366))
- if isinstance(datenum, np.ndarray):
- f = lambda x: (datetime.datetime.fromordinal(int(x)) +
- datetime.timedelta(days=x%1) -
- datetime.timedelta(days=366))
- vf = np.vectorize(f)
- return vf(datenum)
- def datetimeToEpoch(dt):
- """ Converts datetime to UNIX epoch
- Must add 7 hours to this to get epoch seconds to work correctly.
- """
- if isinstance(dt, datetime.datetime):
- return (dt - datetime.datetime(1970, 1, 1) +
- datetime.timedelta(hours=7)).total_seconds()
- if isinstance(dt, np.ndarray):
- f = lambda x: (x - datetime.datetime(1970, 1, 1) +
- datetime.timedelta(hours=7)).total_seconds()
-
- vf = np.vectorize(f)
- return vf(dt)
- print 'Type not support %s' % type(dt)
- def doyToDatetime(doy, year):
- """ Converts day of year to datetime format.
- """
- if isinstance(doy, float):
- return datetime.datetime(year,1,1) + datetime.timedelta(doy)
- if isinstance(doy, np.ndarray):
- f = lambda x: datetime.datetime(year,1,1) + datetime.timedelta(x)
- vf = np.vectorize(f)
- return vf(doy)
- #-------------------------------------------------------------------------------
- # interp functions
- #-------------------------------------------------------------------------------
- def interpVars(dfData1, var1, dfData2, var2):
- """ Interpolates data of var1 onto the time of var2 using simple
- linear interpolation.
- Parmeters
- ---------
- dfData1 -- DataFrame object that contains var1
- var1 -- Key into DataFrame of data for variable
- dfData2 -- DataFrame object that contains var2
- var2 -- Key into DataFrame of data for variable
- Returns
- -------
- tsData -- Series object with interpolated data
- """
- #idx = idTimeBounds(dfData1, dfData1)
- # Hand coded for development of ADV data for first day
- # Adjust to use indentified indices
- f = interp1d(dfData1['time'][:], dfData1[var1][:])
- tsData = f(dfData2['time'][1:])
- return tsData
- def interpDFtoCTD(ctd, df):
- """Interpolates all data in DataFrame object to CTD time.
- """
- for k in df.keys():
- f = interp1d(df['time'][:], df[k][:])
- df[k][:] = f(ctd['time'][1:])
- return df
- #-------------------------------------------------------------------------------
- # Other calculation functions
- #-------------------------------------------------------------------------------
- def calcBuoyFreq(ctd, rho='potential'):
- """Calculates the buoyancy frequency (Brunt-Vaisala) using CTD data frame.
- Parameters
- ----------
- ctd - CTD DataFrame object
- rho - 'potential' = rho_theta or 'average' uses mean of rho
- Returns
- -------
- N - Buoyancy frequency
- Notes
- -----
- rho:
- 'potential':
- .. math::
- N = \sqrt{-\frac{g}{\rho_theta}\frac{d\rho_theta}{dz}}
- rho_theta - Potential density calculated via seawater package
- 'average':
- .. math::
- N = \sqrt{-\frac{g}{\rho_mean}\frac{d\rho}{dz}}
- rho_mean - Density average over file
-
- References
- ----------
- Petrie, J, et. al (2010). Local boundary shear stress estimates from velocity
- profiles with an ADCP. River Flow 2010. ISBN 978-3-939230-00-7
- Mikkelsen, O, et. al (2008). The influence of schlieren on in situ optical
- measurements for particle characterization. Limnology and Oceanography:
- Methods. Vol. 6. 133-143.
- """
- if rho=='potential':
- rho_theta = sw.pden(ctd.water_salinity, ctd.water_temperature, ctd.water_pressure)
- drho = rho_theta.diff()
- dz = ctd.water_pressure.diff()
- g = -9.8
- N = np.sqrt(-g/rho_theta*drho/dz)
- return N
- elif rho=='average':
- rho = sw.dens(ctd.water_salinity, ctd.water_temperature, ctd.water_pressure)
- rho_mean = np.mean(rho)
- drho = rho.diff()
- dz = ctd.water_pressure.diff()
- g = -9.8
- bf = np.sqrt(-g/rho_mean*drho/dz)
- return N
- else:
- print 'Method not recognized'
- raise
- def calcPercentGoodLisst(bf, criteria=0.025):
- """Calculates the percent of good LISST data based on N (buoyancy frequency)
- """
- bad_value = bf > criteria
- bad_nan = np.isnan(bf)
- bad = np.logical_or(bad_value, bad_nan)
- return 1 - float(np.sum(bad))/bf.size
- def addDepth(df, ctd):
- """Returns df with depth added based on pressure measurement interpolated to
- same time as instrument.
- """
- # Fresh water correction for decibar to meters from Sea-Bird
- depth = ctd['Pressure']*1.019716
- df['elev'] = depth.values[:]
- return df
- def calcSeawaterDepth(lat, pres):
- """ Calculates the gravity variation with latitude and presure, then
- calculates the depth when ocean water column is assumed to be O C and 35 PSU.
- From Sea-Bird application notes:
- www.seabird.com/application_notes/AN69.htm
- Parameters
- ----------
- lat - Float of latitude (decimal)
- pres - Float of pressure reading (decibars)
- returns
- -------
- depth - Numpy array of depths (meters)
- """
- x = np.square(np.sin(lat/57.29578))
- g = 9.780318*(1.0 + (5.2788*1e-3 + 2.36*10e-5*x)*x) + 1.092*10e-6*pres
- d = ((((-1.82e-15*pres + 2.279*10e-10)*pres - 2.2512*10e-5)*pres +
- 9.72659)*pres)/g
- return d
- def calcFreshwaterDepth(pres) :
- """ Calculates depth from pressure for freshwater application when high
- precision is not too critical.
- From Sea-Bird application notes:
- www.seabird.com/application_notes/AN69.htm
- Parameters
- ----------
- pres -- Float or ndarray of pressure reading (decibar)
- returns
- -------
- depth -- Float or ndarray depth (meters)
- """
- return pres*1.019716
- def smoothAltim(alt, window, percent):
- """ Remove spikes from data based on percent of average of last n values
- This method is totatally based on heruistics, but effective with following
- Parameters:
- window = 3
- percent = 20
- 1. Apply median filter of size 3
- 2. Remove spikes defined as > percent of last 5 points mean and replace
- with average of last 5
- 3. Go through one more time and remove any spikes using same policy
- 4. Smooth using size 5 moving average
- """
- alt = sp.signal.medfilt(alt,3)
- end = window/2
- for i in np.arange(end, alt.shape[0]-(end+1)):
- avg = np.mean(alt[i-end:i+end+1])
- #print i, alt[i], avg
- if np.abs(alt[i]-avg)/avg > percent*0.01:
- print 'Removing value %d and replacing with average' % i
- alt[i] = avg
- for i in np.arange(0, alt.shape[0]-1):
- if np.abs(alt[i] - alt[i+1])/alt[i] > percent*0.01:
- print 'Removing value %d' % i
- alt[i] = avg
- alt = cbs.smooth(alt, window_len=5, window='flat')
- return alt
- def calcDepth(altimDF, ctdDF):
- """ Calculates the depth of the watercolumn using altimetery data and the
- pressure from the CTD.
- """
- sAltim = smoothAltim(altimDF, 3, 20)
- f = interp1d(altimDF['time'][::6], sAltim[::6])
- sAltim = f(ctdDF['time'])
- depth = sAltim + ctdDF['Pressure'][:]
-
- return depth
- def idContiguousIndices(dfIndex):
- """ Identifies contiguous sets of indices.
- Parameters
- ----------
- dfIndex - df.index
- returns
- -------
- indexSet - List of tuples with beginning and ending index of contiguous
- """
- indexSet = []
- start = dfIndex[0]
- last = dfIndex[0]
- end = dfIndex[-1]
- for idx in range(1, dfIndex.shape[0]-1):
- if (dfIndex[idx+1] - dfIndex[idx]) > datetime.timedelta(0, 60):
- print 'Making set ', start, last
- indexSet.append([start, last])
- start = dfIndex[idx]
- last = dfIndex[idx]
- else:
- last = dfIndex[idx]
- #print idx, dfIndex[idx], start, last
- if last == end:
- print 'Making set ', start, last
- indexSet.append([start, last])
- if start == dfIndex[0]:
- print 'A single contiguous set'
- indexSet.append([dfIndex[0], dfIndex[-1]])
- return indexSet
- def calcOutOfWater(ctdDF):
- """ Identifies time ranges when the profiler is out of the water.
- Consider any pressure < 0.1 to be out of water to provide buffer for bad
- data.
- OOW - Out of water
- a
- Parameters
- ----------
- ctdDF - DataFrame of ctd instrument data
- returns
- -------
- times - List of times of when the profiler is out of the water.
- """
- OOWidx = ctdDF['Pressure'][ctdDF['Pressure'] < 0.05 ]
- OOWsets = idContiguousIndices(OOWidx.index)
- times = []
- for set in OOWsets:
- times.append([ctdDF['time'][set[0]], ctdDF['time'][set[1]]])
- return times
- def filterOOW(times, df):
- """ Changes values from instruments to np.nan for times identified as being
- out of the water [Changes are inplace].
- Note:
- Some DataFrame objects are typecast to float64 because int arrays cannot hold
- np.nan.
- Parameters
- ----------
- times - List of start and end times
- df - DataFrame of instrument data
- returns
- df - DataFrame of instrument data with values removed
- """
- df = df.apply(np.float64)
- for t in times:
- low = df['time'] > t[0]
- high = df['time'] < t[1]
- idx = df['time'][low & high]
- # Do not change time to np.nan
- timeIdx = np.where(df.columns == 'time')[0][0]
- print 'Removing %d values when the WP was out of the water' % idx.shape
- df.ix[idx.index,:timeIdx] = np.nan
- df.ix[idx.index,(timeIdx+1):] = np.nan
- return df
- def processData(path, dirBase='oc1204B.0.F.', opt_insts=None):
- """ Processes stepA of all data (except LISST) for all days
- """
- days = np.arange(1,8)
- if opt_insts != None:
- insts = opt_insts
- else:
- insts = procInst
- for inst in insts:
- if not os.path.exists(path+dirBase+instName[inst]):
- print 'Making dir '+path+dirBase+instName[inst]
- os.mkdir(path+dirBase+instName[inst])
- for d in days:
- print 'Processing day %s' % d
- procStepA(d, inPath=path)
- def procStepA(day, inPath='./', outPath='./', type='Normal', ship='oc1204B'):
- """ Begins processing of Spring ETM cruise data.
- Step A:
- 1. Subsample to 2s intervals to make size a bit more managable
- 2. Add lat, long, and depth
- 3. Saves changes to oc1204B.0.F.INSTRUMENT/dayDAY.nc
- Parameters
- ----------
- day - Int of day of cruise to process
- inPath - String of path to input files (where cmwp_day?_*?.nc exist
- outPath - String of path to place output files
- """
- if type == 'Normal':
- print 'Processing normal data'
- f = os.path.join(inPath, 'cmwp_day%d_CTD.nc' %day)
- ctd = loadNetCDF(f).resample('2s')
- #oowTimes = calcOutOfWater(ctd)
- for inst in procInst:
- if inst == 'LISST':
- continue
- _open_and_save_file(day, inst, ctd, inPath, outPath, ship)
- elif type == 'LISST':
- print 'Processing LISST data'
- f = os.path.join(inPath, 'cmwp_day%d_CTD.nc' %day)
- ctd = loadNetCDF(f).resample('2s')
- _open_and_save_file(day, 'LISST_PROC', ctd, inPath, outPath, ship)
- # Used to clear out memory after return
- def _open_and_save_file(day, inst, ctd, inPath, outPath, ship):
- # Open old file
- f = 'cmwp_day%d_%s.nc' % (day, inst)
- f = os.path.join(inPath, f)
- print 'Opening %s' % f
- data = loadNetCDF(f, mode='r')
- if inst == 'LISST_PROC':
- d1 = data[0].resample('2s').reindex(ctd.index)
- d2 = data[1].resample('2s').reindex(ctd.index)
- d1 = addDepth(d1, ctd)
- data = (d1, d2)
- ship = ship+'.0.F.'+'LISST'
- type = 'LISST'
- else:
- if inst in subSample:
- data = data[::100].resample('2s').reindex(ctd.index)
- else:
- data = data.resample('2s').reindex(ctd.index)
- data = addDepth(data, ctd)
- ship = ship+'.0.F.'+instName[inst]
- type = 'Normal'
- # Save new netCDF file
- fDir = os.path.join(outPath, ship)
- dayFile = 'day%d.nc' % day
- f = os.path.join(fDir, dayFile)
- # f = dayFile
- saveNetCDF(f, data, type=type)
- def calcVerticalBin(df):
- """Calculates the size of the vertical "bin," or the vertical distanced
- traveled by the winched profiler between measurements.
- Charles recommended a "bin" size of 20cm.
- Parameters
- ----------
- df - DataFrame object of WP data
- Returns
- -------
- """
- dz = df.pressure.diff()
- # Returns microseconds, so change it to seconds
- dt = df.index.values.diff()
- dt = dt/1e9
- return dz/dt
- def medFilter(a):
- """ Calculates mean of readings before and after, if current is greater than
- or less than mean+10% then it is replaced with mean.
- """
- b = a.copy
- for i in xrange(b.shape[0]):
- m = (b[i-1] + b[1+1])/2.0
- if np.abs(b[i] - m)/m > 0.1:
- b[i] = m
- return b
- def saveNetCDF(filePath, df, type='Normal', cache=True):
- """ Saves data from an instrument (DataFrame) into a netCDF4 file.
- Also adds:
- lat, lon, and global description (offering)
- Parameters
- ----------
- filePath - String of path for file (Assumes inclusion of dir structure for description)
- df - DateFrame object of data to save to file
- type - String like origin of data
- - 'Normal' not from ADCP or LISST
- - 'ADCP'
- - 'LISST'
- cache -
- True: Adheres to netCDF like cache path structure
- e.g. oceanus1204c.0.F.LISST/day8.nc
- """
- if (type != 'Normal') and (type != 'ADCP') and (type != 'LISST'):
- print 'Data type not supported '+type
- raise
- print 'Creating netCDF file '+filePath
- f = netCDF4.Dataset(filePath, 'w', format='NETCDF4', zlib=True)
- if cache:
- f.description=filePath[:-8]
- else:
- f.description=filePath
- if type == 'Normal':
- f.createDimension('time', df.shape[0])
- times = datetime64ToEpoch(df.index.values)
- else:
- f.createDimension('time', df[0].shape[0])
- times = datetime64ToEpoch(df[0].index.values)
- f.createDimension('x', 1)
- f.createDimension('y', 1)
- tmpVar = f.createVariable('time', 'float64', ('time',))
- tmpVar[:] = times
- tmpVar.units = units['time']
- tmpVar = f.createVariable('x', 'float', ('x',))
- tmpVar[:] = lon
- tmpVar.units = 'latlon'
- tmpVar = f.createVariable('y', 'float', ('y',))
- tmpVar[:] = lat
- tmpVar.units = 'latlon'
- # Treat Data Origin
- if type == 'Normal':
- for i,v in enumerate(df.keys()):
- print 'Variable '+name[v]+' : '+units[v]
- tmpVar = f.createVariable(name[v], df[v].dtype, ('time',))
- tmpVar[:] = df[v].values
- tmpVar.units = units[v]
- f.sync()
- elif type == 'ADCP':
- print 'Not implemented'
- raise
- elif type == 'LISST':
- # Treat volume and diameter df separately
- for i,v in enumerate(df[0].keys()):
- print 'Variable '+lisstVolNames[i]+' : '+'um'
- tmpVar = f.createVariable(lisstVolNames[i], 'float64', ('time',))
- if v == 'elev':
- tmpVar[:] = df[0]['elev'].values
- tmpVar.units = 'm'
- else:
- tmpVar[:] = df[0][i].values
- tmpVar.units = 'um'
- f.sync()
- for i,v in enumerate(df[1].keys()):
- print 'Variable '+lisstDiamsNames[i]+' : '+lisstDiamsUnits[i]
- tmpVar = f.createVariable(lisstDiamsNames[i], 'float64', ('time',))
- tmpVar[:] = df[1][i].values
- tmpVar.units = lisstDiamsUnits[i]
- f.sync()
- f.close()
- def applyEulerRotation(euler) :
- """ Applies Euler rotation to ADV velocity data as described by eulerAngles as
- implemented by John Dunlap in cmwp_src codes.
- Parameters
- ----------
- adv - DataFrame of adv data
- euler- An ndarray (3,1) of Euler angles to apply to adv data
- returns
- -------
- r - Rotation matrix
- """
- # Rotation about x-axis 3rd
- R1 = np.array([[1, 0, 0],
- [0, np.cos(euler[0]), np.sin(euler[0])],
- [0, -np.sin(euler[0]), np.cos(euler[0])]])
- # Rotation about y-axis 2nd
- R2 = np.array([[np.cos(euler[1]), 0, -np.sin(euler[1])],
- [0, 1, 0],
- [np.sin(euler[1]), 0, np.cos(euler[1])]])
- # Rotation about z-axis 1st
- R3 = np.array([[np.cos(euler[2]), np.sin(euler[2]), 0],
- [-np.sin(euler[2]), np.cos(euler[2]), 0],
- [0, 0, 1]])
- R = R1 * R2 * R3
- return R, R1, R2, R3
- def idTimeBounds(dfData1, dfData2) :
- """ Identifies indices into var1 to begin interpolation
- to avoid any extrapolation.
- """
- idx = dfData2['time'] > dfData1['time']
- # Find first True
- # Find last time in dfData2 < dfData1
- # return these two indices