etmDataInterface.py - MATLAB datenum value for January 1, 1…

/etmDataInterface.py

https://bitbucket.org/yosoyjay/etm-data · Python · 896 lines · 760 code · 43 blank · 93 comment · 38 complexity · 37479e42c0cf909b810bd1f6a02fbe2e MD5 · raw file

""" Collection of functions and classes to deal with CMWP data

OOW - Implies "out of water"
"""
#--------------------------------------------------------------------------------
#Imports
#--------------------------------------------------------------------------------

import netCDF4
import os
import scipy as sp
import numpy as np
import datetime
import pytz
from pandas import Panel, DataFrame, Series
import pandas as pd
from scipy.interpolate import interp1d
import cookb_signalsmooth as cbs
import seawater.csiro as sw

#--------------------------------------------------------------------------------
#Constants
#--------------------------------------------------------------------------------
# MATLAB datenum value for January 1, 1970
#datenumDayEpoch = 719529
# Spring
#instruments = ['CTD', 'ALTIM', 'COMP', 'MOPAK', 'VG1', 'VG2', 'ACC1', 'ACC2', 
#               'uT', 'uC', 'OXY', 'ADV', 'ECO', 'LISST']
# Fall
#instruments = ['CTD', 'ALTIM', 'COMP', 'MOPAK', 'VG1', 'VG2', 'ACC1', 'ACC2', 
#               'uT', 'uC', 'OXY', 'ADV', 'PUCK', 'FLNTU', 'LISST']

instName = {'CTD': 'CTD',
            'ALTIM': 'Altim',
            'COMP': 'Comp',
            'MOPAK': 'Mopak',
            'VG1': 'VG1',
            'VG2': 'VG2',
            'ACC1': 'ACC1',
            'ACC2': 'ACC2',
            'uT': 'uT',
            'uC': 'uC',
            'OXY': 'Oxygen',
            'ADV': 'ADV',
            'ECO': 'Eco',
            'PUCK': 'Eco',
            'FLNTU': 'FLNTU',
            'LISST': 'LISST',
           }

procInst = {'CTD': 'CTD',
            'ALTIM': 'Altim',
            'COMP': 'Comp',
#            'MOPAK': 'Mopak',
#            'OXY': 'Oxygen',
            'ADV': 'ADV',
            'ECO': 'Eco',
#            'PUCK': 'Eco',
#            'FLNTU': 'FLNTU',
           }

name = {'temp': 'water_temperature',
        'conductivity': 'water_conductivity',
        'Pressure': 'water_pressure',
        'Salinity': 'water_salinity',
        'range': 'range',
        'heading 1': 'heading1',
        'heading 2': 'heading2',
        'pitch': 'pitch',
        'roll': 'roll',
        'unknown 1': 'unknown1',
        'unknown 2': 'unknown2',
        'VG1': 'VG1',
        'VG2': 'VG2',
        'ACC1': 'ACC1',
        'ACC2': 'ACC2',
        'uT': 'uT',
        'uC': 'uC',
        'o2': 'oxygen',
        'Velocity 1': 'vel1',
        'Velocity 2': 'vel2',
        'Velocity 3': 'vel3',
        'Amplitude 1': 'amp1',
        'Amplitude 2': 'amp2',
        'Amplitude 3': 'amp3',
        'cor1': 'cor1',
        'cor2': 'cor2',
        'cor3': 'cor3',
        'Red': 'red',
        'Blue': 'blue',
        'Chlorophyll': 'cholorophyll',
        'unknown 3': 'unknown2',
        'unknown 6': 'unknown3',
        'time': 'time',
        'angular rate 1': 'ang_rate_1',
        'angular rate 2': 'ang_rate_2',
        'angular rate 3': 'ang_rate_3',
        'tilt 1': 'tilt1',
        'tilt2': 'tilt2',
        'tilt3': 'tilt3',
        'unkown 1': 'unknown1',
        'elev': 'elev',
        'Lambda1': 'lambda1',
        'chlorophyll': 'cholorophyll',
        'Lambda2': 'lambda2',
        'NTU': 'NTU',
        'Thermister': 'thermister',
       }

units = {'temp': 'C',
        'conductivity': '?',
        'Pressure': 'dbar',
        'Salinity': 'psu',
        'range': 'm',
        'heading 1': 'degrees',
        'heading 2': 'degrees',
        'pitch': 'degrees',
        'roll': 'degrees',
        'unknown 1': '?',
        'unknown 2': '?',
        'VG1': '?',
        'VG2': '?',
        'ACC1': '?',
        'ACC2': '?',
        'uT': '?',
        'uC': '?',
        'o2': '?',
        'Velocity 1': 'mm/s',
        'Velocity 2': 'mm/s',
        'Velocity 3': 'mm/s',
        'Amplitude 1': 'dB/0.43',
        'Amplitude 2': 'dB/0.43',
        'Amplitude 3': 'dB/0.43',
        'cor1': '?',
        'cor2': '?',
        'cor3': '?',
        'Red': '?',
        'Blue': '?',
        'Chlorophyll': '?',
        'unknown 3': '?',
        'unknown 6': '?',
        'time': 'seconds from 1970-01-01 00:00:00-00',
        'angular rate 1': '?',
        'angular rate 2': '?',
        'angular rate 3': '?',
        'tilt 1': '?',
        'tilt2': '?',
        'tilt3': '?',
        'unkown 1': '?',
        'elev': 'db',
        'Lambda1': '?',
        'chlorophyll': '?',
        'Lambda2': '?',
        'NTU': 'NTU',
        'Thermister': '?',
       }

# DataFrame must have equal sized arrays, so skip x/y when loading data
skipKeys = ['x', 'y', 'time']

# Rotation coefficients from John Dunlap's work as found in 
# advrot.m from the cmwp source codes.
# These don't appear to do squat.
eulerRotations = {'1': np.array([-0.50, -0.50, 0.50]),
                  '2': np.array([-0.50, -0.60, 0.50]),
                  '3': np.array([-0.40, -0.45, 0.70]),
                  '4': np.array([-0.40, -0.45, 0.70]),
                  '5': np.array([-0.40, -0.45, 0.70]),
                  '6': np.array([-0.40, -0.45, 0.70]),
                  '7': np.array([ 0.36, -0.37, 0.55]),
                 }

# oc1204 location - Bounces a bit around here over the cruise.
# There is a sift slightly west on decimal day 123, but still quite close
lon = -123.915 
lat = 46.235

# Approximate sampling frequency in Hz
freq = {'CTD': 1,       # Less than 1
        'ALTIM': 5,
        'COMP': 1,
        'MOPAK': 23,
        'VG1': 479,
        'VG2': 479,
        'ACC1': 479,
        'ACC2': 479,
        'uT': 479,
        'uC': 479,
        'OXY': 479,
        'ADV': 14,
        'ECO': 1,
        'LISST': np.nan, # Not dealt with yet
       }

subSample = ['VG1', 'VG2', 'ACC1', 'ACC2', 'uT', 'uC', 'OXY']

# Type C instrument used on cruise - 2.5 to 500 microns
lisstVolSizes = np.array([2.72, 3.20, 3.78, 4.46, 5.27, 6.21, 7.33, 8.65,
                          10.2, 12.1, 14.2, 16.8, 19.8, 23.4, 27.6, 32.5,
                          38.4, 45.3, 53.5, 63.1, 74.5, 87.9, 104,  122,
                          144,  170,  201,  237,  280,  331,  390,  460])

lisstVolNames = np.array(['2.72 um', '3.20 um', '3.78 um', '4.46 um', 
                          '5.27 um', '6.21 um', '7.33 um', '8.65 um',
                          '10.2 um', '12.1 um', '14.2 um', '16.8 um', 
                          '19.8 um', '23.4 um', '27.6 um', '32.5 um',
                          '38.4 um', '45.3 um', '53.5 um', '63.1 um', 
                          '74.5 um', '87.9 um', '104 um',  '122 um',
                          '144 um',  '170 um',  '201 um',  '237 um',  
                          '280 um',  '331 um',  '390 um',  '460 um',
                          'elev'])

lisstDiamsNames = ['total_volume', 'mean_size', 'std', 'nan', 
                  'D10', 'D16', 'D50', 'D60', 'D84',
                  'D90', 'hazen_uniformity_coefficient',
                  'surface_area', 'silt_density',
                  'silt_volume']
lisstDiamsUnits = ['ul/l', 'um', 'um', 'NaN', 
                   'um',   'um', 'um', 'um', 'um',
                   'um', 'ratio', 'cm2/l', 'percent', 
                   'um']


#--------------------------------------------------------------------------------
#Functions
#--------------------------------------------------------------------------------
def formatLisstDataForDataFrame(netCDFData):
  """Formats processed LISST-100x data from a netCDF file for loading into
  a pandas DataFrame.
  """
  volumeData = {} 
  for i in range(32):
    volumeData[i] = netCDFData.variables['vol'][:,i]
  time = netCDFData.variables['time'][:]
  if isinstance(time[0], np.float64) and time[0] > 1000000:
    time = pd.DatetimeIndex(epochToDatetime(time))
  else:
    time = pd.DatetimeIndex(datenumToDatetime(time)).tz_localize('UTC')
  vol = DataFrame(volumeData, index=time)

  diamData = {}
  for i in range(14):
    diamData[i] = netCDFData.variables['diams'][:,i] 
  diams = DataFrame(diamData, index=time)

  return (vol, diams)

def formatDataForDataFrame(netCDFData):
  """ Formats data from a netCDF file for loading into a pandas DataFrame """
  keys = netCDFData.variables.keys()
  data = {}
  for k in keys:
    if k in skipKeys: continue
    data[k] = netCDFData.variables[k][:]
  time = netCDFData.variables['time'][:]
  if isinstance(time[0], np.float64) and time[0] > 1000000:
    time = pd.DatetimeIndex(epochToDatetime(time))
  else:
    time = pd.DatetimeIndex(datenumToDatetime(time)).tz_localize('UTC')
  #time = pd.DatetimeIndex(datenumToDatetime(time))
  df = DataFrame(data, index=time)

  return df

#TODO: Format like LISST
def formatADCPForDataFrame(adcpNetCDFData):
  """ Formats ADCP data from wh300.nc for loading into a pandas Panel 

  It seems you can't place DataFrames with different dimensions into a single
  Panel, so this returns a dictionary df with a:
    - DataFrame of 1d data
    - Panel of 2d data

  Parameters
  ----------
    adcdpNetCDFData - netCDF object with ADCP data

  returns
  -------
    df - Dict of 1d in DataFrame and 2d data in Panel
  """
  keys = adcpNetCDFData.variables.keys()
  # 'trajectory' is empty so I'm throwing it out
  if keys.count('trajectory') > 0:
    keys.remove('trajectory')
  dim1 = {}
  dim2 = {}
  for k in keys:
    if len(adcpNetCDFData.variables[k].shape) == 2:
      dim2[k] = adcpNetCDFData.variables[k][:]
    else:
      dim1[k] = adcpNetCDFData.variables[k][:]
  df = {}
  df['1d'] = DataFrame(dim1)
  df['2d'] = Panel(dim2)

  return df

def loadNetCDF(file, mode='r') :
  """ Loads netCDF file of data and returns DataFrame of all vars""" 
  data = netCDF4.Dataset(file, mode) 
  if 'LISST' in file:
    print 'Loading LISST data'
    if len(data.variables.keys()) == 4:
      dfData = formatLisstDataForDataFrame(data)
    else:
      dfData = formatDataForDataFrame(data)
  else:
    dfData = formatDataForDataFrame(data)
  data.close()

  return dfData

#-------------------------------------------------------------------------------
# Time functions
#-------------------------------------------------------------------------------
def epochToDatetime(epoch):
  """Converts UNIX epoch to python datetime
  """
  if isinstance(epoch, float):
    #return datetime.datetime.fromtimestamp(epoch, tz=pytz.utc)
    return datetime.datetime.fromtimestamp(epoch)
  
  if isinstance(epoch, np.ndarray):
    f = lambda x: datetime.datetime.fromtimestamp(x)
    vf = np.vectorize(f)
    return vf(epoch)

def datetime64ToEpoch(dt64):
  """Converts np.datetime64 to Unix epoch 

  1. Need to add 7 hours to get it to work correctly
  """
  if isinstance(dt64, np.datetime64):
    if dt64.dtype == '<M8[ns]':
      return (dt64+np.timedelta64(7,'h')).astype('float64')/1e9
  if isinstance(dt64, np.ndarray):
    if dt64[0].dtype == '<M8[ns]':
      f = lambda x: int(x)/1e9+25200
      vf = np.vectorize(f)
      return vf(dt64)

  print 'Type not supported %s' % type(dt64)

def datenumToDatetime(datenum):
  """Converts MATLAB datenum to python datetime

  Required steps:
  1. datetime and datenum differ by 366 days
  """
  if isinstance(datenum, int) or isinstance(datenum, float):
    return (datetime.datetime.fromordinal(int(datenum)) +
            datetime.timedelta(days=datenum%1) -
            datetime.timedelta(days=366)) 

  if isinstance(datenum, np.ndarray):
    f = lambda x: (datetime.datetime.fromordinal(int(x)) +
                   datetime.timedelta(days=x%1) -
                   datetime.timedelta(days=366)) 
    vf = np.vectorize(f)
    return vf(datenum)

def datetimeToEpoch(dt):
  """ Converts datetime to UNIX epoch

  Must add 7 hours to this to get epoch seconds to work correctly.
  """
  if isinstance(dt, datetime.datetime):
    return (dt - datetime.datetime(1970, 1, 1) + 
            datetime.timedelta(hours=7)).total_seconds()

  if isinstance(dt, np.ndarray):
    f = lambda x: (x - datetime.datetime(1970, 1, 1) + 
                   datetime.timedelta(hours=7)).total_seconds()
                  
    vf = np.vectorize(f)
    return vf(dt)

  print 'Type not support %s' % type(dt)

def doyToDatetime(doy, year):
  """ Converts day of year to datetime format.
  """
  if isinstance(doy, float):
    return datetime.datetime(year,1,1) + datetime.timedelta(doy)

  if isinstance(doy, np.ndarray):
    f = lambda x: datetime.datetime(year,1,1) + datetime.timedelta(x)
    vf = np.vectorize(f)
    return vf(doy)

#-------------------------------------------------------------------------------
# interp functions
#-------------------------------------------------------------------------------
def interpVars(dfData1, var1, dfData2, var2):
  """ Interpolates data of var1 onto the time of var2 using simple
  linear interpolation. 

  Parmeters
  ---------
    dfData1 -- DataFrame object that contains var1
    var1 -- Key into DataFrame of data for variable
    dfData2 -- DataFrame object that contains var2
    var2 -- Key into DataFrame of data for variable

  Returns
  -------
    tsData -- Series object with interpolated data 
  """
  #idx = idTimeBounds(dfData1, dfData1) 
  # Hand coded for development of ADV data for first day
  # Adjust to use indentified indices
  f = interp1d(dfData1['time'][:], dfData1[var1][:])
  tsData = f(dfData2['time'][1:]) 

  return tsData

def interpDFtoCTD(ctd, df):
  """Interpolates all data in DataFrame object to CTD time.
  """
  for k in df.keys():
    f = interp1d(df['time'][:], df[k][:])
    df[k][:] = f(ctd['time'][1:])

  return df

#-------------------------------------------------------------------------------
# Other calculation functions
#-------------------------------------------------------------------------------
def calcBuoyFreq(ctd, rho='potential'):
	"""Calculates the buoyancy frequency (Brunt-Vaisala) using CTD data frame.

	Parameters
	----------
		ctd - CTD DataFrame object
		rho - 'potential' = rho_theta or 'average' uses mean of rho 

	Returns
	-------
		N - Buoyancy frequency	

	Notes
	-----
	rho:
		'potential':
		  .. math::
			N = \sqrt{-\frac{g}{\rho_theta}\frac{d\rho_theta}{dz}}
			rho_theta - Potential density calculated via seawater package
		'average':
		  .. math::
			N = \sqrt{-\frac{g}{\rho_mean}\frac{d\rho}{dz}}
			rho_mean - Density average over file
	
	References
	----------
		Petrie, J, et. al (2010). Local boundary shear stress estimates from velocity 
			profiles with an ADCP. River Flow 2010. ISBN 978-3-939230-00-7

		Mikkelsen, O, et. al (2008). The influence of schlieren on in situ optical
			measurements for particle characterization. Limnology and Oceanography:
			Methods. Vol. 6. 133-143.
	"""
	if rho=='potential':
		rho_theta = sw.pden(ctd.water_salinity, ctd.water_temperature, ctd.water_pressure)
		drho = rho_theta.diff()
		dz = ctd.water_pressure.diff()
		g = -9.8
		N = np.sqrt(-g/rho_theta*drho/dz)
		return N 
	elif rho=='average':
		rho = sw.dens(ctd.water_salinity, ctd.water_temperature, ctd.water_pressure)
		rho_mean = np.mean(rho)
		drho = rho.diff()
		dz = ctd.water_pressure.diff()
		g = -9.8
		bf = np.sqrt(-g/rho_mean*drho/dz)
		return N 
	else:
		print 'Method not recognized'
		raise

def calcPercentGoodLisst(bf, criteria=0.025):
	"""Calculates the percent of good LISST data based on N (buoyancy frequency)
	"""
	bad_value = bf > criteria
	bad_nan = np.isnan(bf)
	bad = np.logical_or(bad_value, bad_nan)
	return 1 - float(np.sum(bad))/bf.size

def addDepth(df, ctd):
  """Returns df with depth added based on pressure measurement interpolated to 
  same time as instrument.
  """
  # Fresh water correction for decibar to meters from Sea-Bird
  depth = ctd['Pressure']*1.019716
  df['elev'] = depth.values[:] 

  return df

def calcSeawaterDepth(lat, pres):
  """ Calculates the gravity variation with latitude and presure, then
  calculates the depth when ocean water column is assumed to be O C and 35 PSU.

  From Sea-Bird application notes:
  www.seabird.com/application_notes/AN69.htm

  Parameters
  ----------
    lat - Float of latitude (decimal)
    pres - Float of pressure reading (decibars)

  returns
  -------
    depth - Numpy array of depths (meters)
  """
  x = np.square(np.sin(lat/57.29578))
  g = 9.780318*(1.0 + (5.2788*1e-3 + 2.36*10e-5*x)*x) + 1.092*10e-6*pres 
  d = ((((-1.82e-15*pres + 2.279*10e-10)*pres - 2.2512*10e-5)*pres +
       9.72659)*pres)/g

  return d

def calcFreshwaterDepth(pres) :
  """ Calculates depth from pressure for freshwater application when high
  precision is not too critical.

  From Sea-Bird application notes:
  www.seabird.com/application_notes/AN69.htm

  Parameters
  ----------
    pres -- Float or ndarray of pressure reading (decibar)

  returns
  -------
    depth -- Float or ndarray depth (meters) 

  """
  return pres*1.019716

def smoothAltim(alt, window, percent):
  """ Remove spikes from data based on percent of average of last n values

  This method is totatally based on heruistics, but effective with following
  Parameters:
    window = 3
    percent = 20

  1. Apply median filter of size 3
  2. Remove spikes defined as > percent of last 5 points mean and replace
     with average of last 5
  3. Go through one more time and remove any spikes using same policy
  4. Smooth using size 5 moving average
  """
  alt = sp.signal.medfilt(alt,3) 
  end = window/2
  for i in np.arange(end, alt.shape[0]-(end+1)):
    avg = np.mean(alt[i-end:i+end+1])
    #print i, alt[i], avg
    if np.abs(alt[i]-avg)/avg > percent*0.01:  
      print 'Removing value %d and replacing with average' % i
      alt[i] = avg 

  for i in np.arange(0, alt.shape[0]-1):
    if np.abs(alt[i] - alt[i+1])/alt[i] > percent*0.01:
      print 'Removing value %d' % i
      alt[i] = avg 

  alt = cbs.smooth(alt, window_len=5, window='flat')
  return alt

def calcDepth(altimDF, ctdDF): 
  """ Calculates the depth of the watercolumn using altimetery data and the
  pressure from the CTD.
  """
  sAltim = smoothAltim(altimDF, 3, 20)
  f = interp1d(altimDF['time'][::6], sAltim[::6])
  sAltim = f(ctdDF['time'])
  depth = sAltim + ctdDF['Pressure'][:]
  
  return depth

def idContiguousIndices(dfIndex):
  """ Identifies contiguous sets of indices.

  Parameters
  ----------
    dfIndex - df.index

  returns
  -------
    indexSet - List of tuples with beginning and ending index of contiguous
  """
  indexSet = []
  start = dfIndex[0]
  last = dfIndex[0] 
  end = dfIndex[-1]
  for idx in range(1, dfIndex.shape[0]-1):
    if (dfIndex[idx+1] - dfIndex[idx]) > datetime.timedelta(0, 60):
      print 'Making set ', start, last
      indexSet.append([start, last])
      start = dfIndex[idx]
      last = dfIndex[idx]
    else:
      last = dfIndex[idx]
      #print idx, dfIndex[idx], start, last

  if last == end:
    print 'Making set ', start, last
    indexSet.append([start, last])

  if start == dfIndex[0]:
    print 'A single contiguous set'
    indexSet.append([dfIndex[0], dfIndex[-1]])

  return indexSet

def calcOutOfWater(ctdDF):
  """ Identifies time ranges when the profiler is out of the water.  

  Consider any pressure < 0.1 to be out of water to provide buffer for bad
  data. 

  OOW - Out of water
a
  Parameters
  ----------
    ctdDF - DataFrame of ctd instrument data

  returns
  -------
    times - List of times of when the profiler is out of the water. 
  """
  OOWidx = ctdDF['Pressure'][ctdDF['Pressure'] < 0.05 ] 
  OOWsets = idContiguousIndices(OOWidx.index)
  times = []
  for set in OOWsets:
    times.append([ctdDF['time'][set[0]], ctdDF['time'][set[1]]])
  return times

def filterOOW(times, df):
  """ Changes values from instruments to np.nan for times identified as being
  out of the water [Changes are inplace].

  Note:
  Some DataFrame objects are typecast to float64 because int arrays cannot hold
  np.nan.

  Parameters
  ----------
    times - List of start and end times
    df - DataFrame of instrument data

  returns
    df - DataFrame of instrument data with values removed
  """
  df = df.apply(np.float64)
  for t in times:
    low = df['time'] > t[0]                    
    high = df['time'] < t[1]
    idx = df['time'][low & high]
    # Do not change time to np.nan
    timeIdx = np.where(df.columns == 'time')[0][0]
    print 'Removing %d values when the WP was out of the water' % idx.shape
    df.ix[idx.index,:timeIdx] = np.nan
    df.ix[idx.index,(timeIdx+1):] = np.nan

  return df

def processData(path, dirBase='oc1204B.0.F.', opt_insts=None):
  """ Processes stepA of all data (except LISST) for all days
  """
  days = np.arange(1,8)
  if opt_insts != None:
    insts = opt_insts
  else:
    insts = procInst 

  for inst in insts: 
    if not os.path.exists(path+dirBase+instName[inst]):
      print 'Making dir '+path+dirBase+instName[inst]
      os.mkdir(path+dirBase+instName[inst])

  for d in days: 
    print 'Processing day %s' % d
    procStepA(d, inPath=path)

def procStepA(day, inPath='./', outPath='./', type='Normal', ship='oc1204B'):
  """ Begins processing of Spring ETM cruise data.
  Step A:
  1. Subsample to 2s intervals to make size a bit more managable
  2. Add lat, long, and depth 
  3. Saves changes to oc1204B.0.F.INSTRUMENT/dayDAY.nc 

  Parameters
  ----------
    day - Int of day of cruise to process
    inPath - String of path to input files (where cmwp_day?_*?.nc exist
    outPath - String of path to place output files 

  """
  if type == 'Normal':
    print 'Processing normal data'
    f = os.path.join(inPath, 'cmwp_day%d_CTD.nc' %day)
    ctd = loadNetCDF(f).resample('2s')
    #oowTimes = calcOutOfWater(ctd)

    for inst in procInst: 
      if inst == 'LISST':
        continue
      _open_and_save_file(day, inst, ctd, inPath, outPath, ship)
  elif type == 'LISST':
    print 'Processing LISST data'
    f = os.path.join(inPath, 'cmwp_day%d_CTD.nc' %day)
    ctd = loadNetCDF(f).resample('2s')
    _open_and_save_file(day, 'LISST_PROC', ctd, inPath, outPath, ship)

# Used to clear out memory after return
def _open_and_save_file(day, inst, ctd, inPath, outPath, ship):
  # Open old file
  f = 'cmwp_day%d_%s.nc' % (day, inst)
  f = os.path.join(inPath, f)
  print 'Opening %s' % f
  data = loadNetCDF(f, mode='r')
  if inst == 'LISST_PROC':
    d1 = data[0].resample('2s').reindex(ctd.index)
    d2 = data[1].resample('2s').reindex(ctd.index)
    d1 = addDepth(d1, ctd)
    data = (d1, d2)
    ship = ship+'.0.F.'+'LISST'
    type = 'LISST'
  else:
    if inst in subSample:
      data = data[::100].resample('2s').reindex(ctd.index)
    else: 
      data = data.resample('2s').reindex(ctd.index)
    data = addDepth(data, ctd)
    ship = ship+'.0.F.'+instName[inst]
    type = 'Normal'

  # Save new netCDF file
  fDir = os.path.join(outPath, ship)
  dayFile = 'day%d.nc' % day
  f = os.path.join(fDir, dayFile)
#  f = dayFile

  saveNetCDF(f, data, type=type)

def calcVerticalBin(df):
  """Calculates the size of the vertical "bin," or the vertical distanced 
  traveled by the winched profiler between measurements.

  Charles recommended a "bin" size of 20cm.

  Parameters
  ----------
    df - DataFrame object of WP data

  Returns
  -------
  """
  dz = df.pressure.diff()
  # Returns microseconds, so change it to seconds 
  dt = df.index.values.diff()
  dt = dt/1e9
  return dz/dt


def medFilter(a):
  """ Calculates mean of readings before and after, if current is greater than
  or less than mean+10% then it is replaced with mean.
  """
  b = a.copy
  for i in xrange(b.shape[0]):
    m = (b[i-1] + b[1+1])/2.0
    if np.abs(b[i] - m)/m > 0.1:
      b[i] = m
  return b

def saveNetCDF(filePath, df, type='Normal', cache=True):
  """ Saves data from an instrument (DataFrame) into a netCDF4 file.

  Also adds:
  lat, lon, and global description (offering)

  Parameters
  ----------
    filePath - String of path for file (Assumes inclusion of dir structure for description)
    df       - DateFrame object of data to save to file
    type     - String like origin of data
      - 'Normal' not from ADCP or LISST
      - 'ADCP'
      - 'LISST'
    cache    - 
      True: Adheres to netCDF like cache path structure 
      e.g. oceanus1204c.0.F.LISST/day8.nc
  """
  if (type != 'Normal') and (type != 'ADCP') and (type != 'LISST'):
    print 'Data type not supported '+type
    raise

  print 'Creating netCDF file '+filePath
  f = netCDF4.Dataset(filePath, 'w', format='NETCDF4', zlib=True)

  if cache:
    f.description=filePath[:-8]
  else:
    f.description=filePath
  if type == 'Normal':
    f.createDimension('time', df.shape[0])
    times = datetime64ToEpoch(df.index.values)
  else:
    f.createDimension('time', df[0].shape[0])
    times = datetime64ToEpoch(df[0].index.values)
  f.createDimension('x', 1)
  f.createDimension('y', 1)
  tmpVar = f.createVariable('time', 'float64', ('time',))
  tmpVar[:] = times
  tmpVar.units = units['time'] 
  tmpVar = f.createVariable('x', 'float', ('x',))
  tmpVar[:] = lon
  tmpVar.units = 'latlon'
  tmpVar = f.createVariable('y', 'float', ('y',))
  tmpVar[:] = lat
  tmpVar.units = 'latlon'

  # Treat Data Origin
  if type == 'Normal':
    for i,v in enumerate(df.keys()):
      print 'Variable '+name[v]+' : '+units[v]
      tmpVar = f.createVariable(name[v], df[v].dtype, ('time',))
      tmpVar[:] = df[v].values
      tmpVar.units = units[v]
      f.sync()
  elif type == 'ADCP':
    print 'Not implemented'
    raise
  elif type == 'LISST':
    # Treat volume and diameter df separately
    for i,v in enumerate(df[0].keys()):
      print 'Variable '+lisstVolNames[i]+' : '+'um'
      tmpVar = f.createVariable(lisstVolNames[i], 'float64', ('time',))
      if v == 'elev':
        tmpVar[:] = df[0]['elev'].values
        tmpVar.units = 'm'
      else:
        tmpVar[:] = df[0][i].values
        tmpVar.units = 'um'
      f.sync()
    for i,v in enumerate(df[1].keys()):
      print 'Variable '+lisstDiamsNames[i]+' : '+lisstDiamsUnits[i]
      tmpVar = f.createVariable(lisstDiamsNames[i], 'float64', ('time',))
      tmpVar[:] = df[1][i].values
      tmpVar.units = lisstDiamsUnits[i] 
      f.sync()
  f.close()

def applyEulerRotation(euler) :
  """ Applies Euler rotation to ADV velocity data as described by eulerAngles as
  implemented by John Dunlap in cmwp_src codes.

  Parameters
  ----------
    adv - DataFrame of adv data
    euler- An ndarray (3,1) of Euler angles to apply to adv data

  returns
  -------
    r - Rotation matrix
  """
  # Rotation about x-axis 3rd
  R1 = np.array([[1, 0, 0],
                 [0, np.cos(euler[0]), np.sin(euler[0])],
                 [0, -np.sin(euler[0]), np.cos(euler[0])]])
  # Rotation about y-axis 2nd
  R2 = np.array([[np.cos(euler[1]), 0, -np.sin(euler[1])],
                 [0, 1, 0],
                 [np.sin(euler[1]), 0, np.cos(euler[1])]])
  # Rotation about z-axis 1st
  R3 = np.array([[np.cos(euler[2]), np.sin(euler[2]), 0],
                 [-np.sin(euler[2]), np.cos(euler[2]), 0],
                 [0, 0, 1]])

  R = R1 * R2 * R3

  return R, R1, R2, R3


def idTimeBounds(dfData1, dfData2) :
  """ Identifies indices into var1 to begin interpolation
  to avoid any extrapolation.
  """
  idx = dfData2['time'] > dfData1['time']  
  # Find first True
  # Find last time in dfData2 < dfData1
  # return these two indices
Tech Fingerprint

Alerts (16)

'isinstance(' Overuse may indicate design issues; consider polymorphism
235 256 320 324 334 337 351 356 368 372 384 387
'type(' Use isinstance() for type checking instead of type()
343 379
'lambda' Avoid complex 'lambda' functions; prefer named functions for clarity and debugging
357 388