PageRenderTime 271ms CodeModel.GetById 40ms app.highlight 174ms RepoModel.GetById 48ms app.codeStats 0ms

/thirdparty/liblastfm2/src/fingerprint/fplib/FingerprintExtractor.cpp

http://github.com/tomahawk-player/tomahawk
C++ | 786 lines | 495 code | 163 blank | 128 comment | 81 complexity | 1d92239b693ba376ea7e5b5f31375634 MD5 | raw file
  1/*
  2   Copyright 2005-2009 Last.fm Ltd. <mir@last.fm>
  3
  4   This file is part of liblastfm.
  5
  6   liblastfm is free software: you can redistribute it and/or modify
  7   it under the terms of the GNU General Public License as published by
  8   the Free Software Foundation, either version 3 of the License, or
  9   (at your option) any later version.
 10
 11   liblastfm is distributed in the hope that it will be useful,
 12   but WITHOUT ANY WARRANTY; without even the implied warranty of
 13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 14   GNU General Public License for more details.
 15
 16   You should have received a copy of the GNU General Public License
 17   along with liblastfm.  If not, see <http://www.gnu.org/licenses/>.
 18*/
 19#include <iostream>
 20#include <limits>
 21#include <bitset>
 22#include <deque>
 23#include <vector>
 24#include <stdexcept>
 25#include <cmath>
 26#include <cstring>
 27
 28#include <samplerate.h> // libsamplerate
 29
 30#include "FingerprintExtractor.h"
 31#include "fp_helper_fun.h" // for GroupData
 32#include "Filter.h"
 33#include "FloatingAverage.h"
 34#include "OptFFT.h"
 35
 36//////////////////////////////////////////////////////////////////////////
 37
 38namespace fingerprint
 39{
 40
 41using namespace std;
 42static const int NUM_FRAMES_CLIENT = 32; // ~= 10 secs.
 43
 44enum eProcessType
 45{
 46   PT_UNKNOWN,
 47   PT_FOR_QUERY,
 48   PT_FOR_FULLSUBMIT
 49};
 50
 51//////////////////////////////////////////////////////////////////////////
 52
 53class PimplData
 54{
 55
 56public:
 57
 58   PimplData()
 59   : m_pDownsampledPCM(NULL), m_pDownsampledCurrIt(NULL),
 60     m_normalizedWindowMs(static_cast<unsigned int>(NORMALIZATION_SKIP_SECS * 1000 * 2)),
 61     m_compensateBufferSize(FRAMESIZE-OVERLAPSAMPLES + Filter::KEYWIDTH * OVERLAPSAMPLES),
 62     m_downsampledProcessSize(NUM_FRAMES_CLIENT*FRAMESIZE),
 63     // notice that the buffer has extra space on either side for the normalization window  
 64     m_fullDownsampledBufferSize( m_downsampledProcessSize + // the actual processed part
 65                                  m_compensateBufferSize +  // a compensation buffer for the fft
 66                                ((m_normalizedWindowMs * DFREQ / 1000) / 2) ), // a compensation buffer for the normalization
 67     m_normWindow(m_normalizedWindowMs * DFREQ / 1000),
 68     m_pFFT(NULL), m_pDownsampleState(NULL), m_processType(PT_UNKNOWN)
 69   {
 70      m_pFFT            = new OptFFT(m_downsampledProcessSize + m_compensateBufferSize);
 71      m_pDownsampledPCM = new float[m_fullDownsampledBufferSize];
 72
 73      // the end of ||-------m_bufferSize-------|-cb-|---norm/2---|| 
 74      //                                                           ^-- pEndDownsampledBuf
 75      m_pEndDownsampledBuf = m_pDownsampledPCM + m_fullDownsampledBufferSize;
 76
 77      // loading filters
 78      size_t numFilters = sizeof(rFilters) / sizeof(RawFilter) ;
 79      for (size_t i = 0; i < numFilters; ++i)
 80         m_filters.push_back( Filter( rFilters[i].ftid, rFilters[i].thresh, rFilters[i].weight ) );
 81
 82   }
 83
 84   ~PimplData()
 85   {
 86      if ( m_pFFT )
 87         delete m_pFFT;
 88      m_pFFT = NULL;
 89      if ( m_pDownsampledPCM )
 90         delete [] m_pDownsampledPCM;
 91      m_pDownsampledPCM = NULL;
 92
 93      if ( m_pDownsampleState )
 94         src_delete(m_pDownsampleState) ;
 95
 96   }
 97
 98   float*                 m_pDownsampledPCM;
 99   float*                 m_pDownsampledCurrIt;
100
101   const unsigned int     m_normalizedWindowMs;
102   const size_t           m_compensateBufferSize;
103   const size_t           m_downsampledProcessSize;
104   const size_t           m_fullDownsampledBufferSize;
105
106   FloatingAverage<double> m_normWindow;
107   OptFFT*                 m_pFFT;
108
109   //////////////////////////////////////////////////////////////////////////
110   
111   // libsamplerate
112   SRC_STATE*              m_pDownsampleState;
113   SRC_DATA                m_downsampleData;
114
115   vector<float>           m_floatInData;
116
117   //////////////////////////////////////////////////////////////////////////
118
119
120   bool                   m_groupsReady;
121   bool                   m_preBufferPassed;
122
123   eProcessType           m_processType;
124
125   size_t                 m_toSkipSize;
126   size_t                 m_toSkipMs;
127
128   size_t                 m_skippedSoFar;
129   bool                   m_skipPassed;
130
131   float*                 m_pEndDownsampledBuf;
132
133   int m_freq;
134   int m_nchannels;
135
136   unsigned int m_lengthMs;
137   int          m_minUniqueKeys;
138   unsigned int m_uniqueKeyWindowMs;
139
140   unsigned int m_toProcessKeys;
141   unsigned int m_totalWindowKeys;
142   
143   vector<Filter>     m_filters;
144
145   deque<GroupData>   m_groupWindow;
146   vector<GroupData>  m_groups;
147   unsigned int       m_processedKeys;
148
149   vector<unsigned int>   m_partialBits; // here just to avoid reallocation
150
151#if __BIG_ENDIAN__
152
153#define reorderbits(X)  ((((unsigned int)(X) & 0xff000000) >> 24) | \
154                        (((unsigned int)(X) & 0x00ff0000) >> 8)  | \
155                        (((unsigned int)(X) & 0x0000ff00) << 8)  | \
156                        (((unsigned int)(X) & 0x000000ff) << 24))
157
158   vector<GroupData>  m_bigEndianGroups;
159#endif
160};
161
162//////////////////////////////////////////////////////////////////////////
163
164void initCustom( PimplData& pd,
165                 int freq, int nchannels,
166                 unsigned int lengthMs, unsigned int skipMs,
167                 int minUniqueKeys, unsigned int uniqueKeyWindowMs, int duration );
168
169inline float getRMS( const FloatingAverage<double>& signal );
170unsigned int processKeys( deque<GroupData>& groups, size_t size, PimplData& pd );
171void         integralImage( float** ppFrames, unsigned int nFrames );
172void         computeBits( vector<unsigned int>& bits,
173                          const vector<Filter>& f, 
174                          float ** frames, unsigned int nframes );
175
176
177void src_short_to_float_and_mono_array(const short *in, float *out, int srclen, int nchannels);
178
179//////////////////////////////////////////////////////////////////////////
180
181// -----------------------------------------------------------------------------
182
183FingerprintExtractor::FingerprintExtractor()
184: m_pPimplData(NULL)
185{
186   m_pPimplData = new PimplData();
187}
188
189// -----------------------------------------------------------------------------
190
191FingerprintExtractor::~FingerprintExtractor()
192{
193   if ( m_pPimplData )
194      delete m_pPimplData;
195}
196
197// -----------------------------------------------------------------------------
198
199size_t FingerprintExtractor::getToSkipMs() 
200{ return m_pPimplData->m_toSkipMs; }
201
202// -----------------------------------------------------------------------------
203
204size_t FingerprintExtractor::getMinimumDurationMs()
205{
206   return static_cast<size_t>( (QUERY_SIZE_SECS + NORMALIZATION_SKIP_SECS * 2 + GUARD_SIZE_SECS) * 1000 );
207}
208
209// -----------------------------------------------------------------------------
210
211size_t FingerprintExtractor::getVersion()
212{ return FINGERPRINT_LIB_VERSION; }
213
214// -----------------------------------------------------------------------------
215
216void FingerprintExtractor::initForQuery(int freq, int nchannels, int duration )
217{
218   m_pPimplData->m_skipPassed = false;
219   m_pPimplData->m_processType = PT_FOR_QUERY;
220
221   if ( !m_pPimplData )
222      throw std::runtime_error("Not enough RAM to allocate the fingerprinter!");
223
224   initCustom( *m_pPimplData,
225               freq, nchannels,
226               static_cast<unsigned int>(QUERY_SIZE_SECS * 1000),
227               static_cast<unsigned int>(QUERY_START_SECS * 1000), 
228               MIN_UNIQUE_KEYS, 
229               static_cast<unsigned int>(UPDATE_SIZE_SECS * 1000), duration );
230}
231
232// -----------------------------------------------------------------------------
233
234void FingerprintExtractor::initForFullSubmit(int freq, int nchannels )
235{
236   m_pPimplData->m_skipPassed = true;
237   m_pPimplData->m_processType = PT_FOR_FULLSUBMIT;
238
239   if ( !m_pPimplData )
240      throw std::runtime_error("Not enough RAM to allocate the fingerprinter!");
241
242   initCustom( *m_pPimplData, 
243               freq, nchannels, 
244               numeric_limits<unsigned int>::max(), 
245               0, MIN_UNIQUE_KEYS, 0, -1 );
246}
247
248// -----------------------------------------------------------------------------
249
250void initCustom( PimplData& pd, 
251                 int freq, int nchannels,
252                 unsigned int lengthMs, 
253                 unsigned int skipMs, 
254                 int minUniqueKeys, 
255                 unsigned int uniqueKeyWindowMs, int duration )
256{
257   //////////////////////////////////////////////////////////////////////////
258   pd.m_freq = freq;
259   pd.m_nchannels = nchannels;
260   pd.m_lengthMs = lengthMs;
261   pd.m_minUniqueKeys = minUniqueKeys;
262   pd.m_uniqueKeyWindowMs = uniqueKeyWindowMs;
263   //////////////////////////////////////////////////////////////////////////
264
265   // ***********************************************************************
266   if ( pd.m_pDownsampleState )
267      pd.m_pDownsampleState = src_delete(pd.m_pDownsampleState) ;
268   pd.m_pDownsampleState = src_new (SRC_SINC_FASTEST, 1, NULL) ;
269   pd.m_downsampleData.src_ratio = FDFREQ / freq;
270   // ***********************************************************************
271
272   //////////////////////////////////////////////////////////////////////////
273   if ( pd.m_processType == PT_FOR_FULLSUBMIT ) 
274      skipMs = 0; // make sure
275   else if ( duration > 0 )
276   {
277      // skip + size + right normalization window + FFT guard
278      // 
279      int stdDurationMs = static_cast<int>((QUERY_START_SECS + QUERY_SIZE_SECS + NORMALIZATION_SKIP_SECS + GUARD_SIZE_SECS) * 1000);
280      int actualDurationMs = duration * 1000;
281      // compute the actual skipMs depending on the duration
282      if ( actualDurationMs < stdDurationMs )
283         skipMs -= max( stdDurationMs - actualDurationMs, 0 );
284   }
285
286   pd.m_toSkipMs = max( static_cast<int>(skipMs) - static_cast<int>((pd.m_normalizedWindowMs/2)), 0 );
287   pd.m_toSkipSize = static_cast<size_t>( freq * nchannels * 
288                                          (pd.m_toSkipMs / 1000.0) ); // half the norm window in secs;
289
290   //if ( pd.m_processType == PT_FOR_QUERY && skipMs > pd.m_normalizedWindowMs/2 )
291   //{
292   //   pd.m_toSkipMs = skipMs - (pd.m_normalizedWindowMs/2);
293   //   pd.m_toSkipSize = static_cast<size_t>( freq * nchannels * 
294   //                                         (pd.m_toSkipMs / 1000.0) ); // half the norm window in secs
295   //}
296   //else
297   //{
298   //   pd.m_toSkipMs = 0;
299   //   pd.m_toSkipSize = 0; // half of the normalization window will be skipped in ANY case
300   //}
301
302   pd.m_skippedSoFar = 0;
303   pd.m_groupsReady = false;
304   pd.m_preBufferPassed = false;
305
306   // prepare the position for pre-buffering
307   pd.m_pDownsampledCurrIt = pd.m_pDownsampledPCM + (pd.m_downsampledProcessSize - (pd.m_normWindow.size() / 2) ); 
308
309   pd.m_toProcessKeys = fingerprint::getTotalKeys(pd.m_lengthMs);//  (m_lengthMs * DFREQ) / (1000 * OVERLAPSAMPLES) + 1;
310   pd.m_totalWindowKeys = fingerprint::getTotalKeys(pd.m_uniqueKeyWindowMs); //(m_uniqueKeyWindowMs * DFREQ) / (1000 * OVERLAPSAMPLES) + 1;
311
312   if (pd.m_toProcessKeys == 1)
313      pd.m_toProcessKeys = 0;
314   if (pd.m_totalWindowKeys == 1)
315      pd.m_totalWindowKeys = 0;
316
317   pd.m_processedKeys = 0;
318
319   pd.m_groupWindow.clear();
320   pd.m_processedKeys = 0;
321}
322
323// -----------------------------------------------------------------------------
324
325
326// * cb = compensate buffer size
327// * norm = floating normalization window size
328//
329// PREBUFFER:
330//     (-------m_bufferSize-------)
331//    ||    EMPTY    |---norm/2---|-cb-|---norm/2---||
332// 1.                {--------read frames-----------}
333// 2.                {--read normalize window--}      
334// 3.                             {----}   normalize
335//
336// 1. read [norm + cb] frames to m_bufferSize - norm/2
337// 2. read [m_buffersize - norm/2...m_buffersize + norm/2] into normalize window
338// 3. normalize [m_bufferSize..m_bufferSize+cb]
339//
340// PROCESS:
341//
342//     ||-------m_bufferSize-------|-cb-|---norm/2---||
343// 1.   <--------------------------{------copy-------}
344// 2.                    {--------read frames-------}
345// 3.        {---------normalize--------}
346// 4.   {------fft/process/whatevs------}
347//
348// 1. copy [m_bufferSize..m_bufferSize + cb + norm/2] to beginning
349// 2. read m_bufferSize frames to cb + norm/2
350// 3. normalize [cb..m_bufferSize+cb]
351// 4. fft/process/whatevs [0...m_bufferSize+cb]
352//
353// repeat until enough blocks processed and enough groups!
354//
355bool FingerprintExtractor::process( const short* pPCM, size_t num_samples, bool end_of_stream )
356{
357   if ( num_samples == 0 )
358      return false;
359
360   // easier read
361   PimplData& pd = *m_pPimplData;
362
363   if ( pd.m_processType == PT_UNKNOWN )
364      throw std::runtime_error("Please call initForQuery() or initForFullSubmit() before process()!");
365
366   const short* pSourcePCMIt = pPCM;
367   const short* pSourcePCMIt_end = pPCM + num_samples;
368
369   if ( !pd.m_skipPassed )
370   {
371      // needs to skip data? (reminder: the query needs to skip QUERY_START_SECS (- half of the normalization window)
372      if ( pd.m_skippedSoFar + num_samples > pd.m_toSkipSize )
373      {
374         pSourcePCMIt = pPCM + (pd.m_toSkipSize - pd.m_skippedSoFar);
375         pd.m_skipPassed = true;
376      }
377      else
378      {
379         // need more data
380         pd.m_skippedSoFar += num_samples;
381         return false;
382      }
383   }
384
385   pair<size_t, size_t> readData(0,0);
386   pd.m_downsampleData.end_of_input = end_of_stream ? 1 : 0;
387
388   //////////////////////////////////////////////////////////////////////////
389   // PREBUFFER:
390   if ( !pd.m_preBufferPassed )
391   {
392      // 1. downsample [norm + cb] frames to m_bufferSize - norm/2
393      pd.m_floatInData.resize( (pSourcePCMIt_end - pSourcePCMIt) / pd.m_nchannels);
394      src_short_to_float_and_mono_array( pSourcePCMIt, 
395                                         &(pd.m_floatInData[0]), static_cast<int>(pSourcePCMIt_end - pSourcePCMIt), 
396                                         pd.m_nchannels);
397
398      pd.m_downsampleData.data_in = &(pd.m_floatInData[0]);
399      pd.m_downsampleData.input_frames = static_cast<long>(pd.m_floatInData.size());
400
401      pd.m_downsampleData.data_out = pd.m_pDownsampledCurrIt;
402      pd.m_downsampleData.output_frames = static_cast<long>(pd.m_pEndDownsampledBuf - pd.m_pDownsampledCurrIt);
403
404      int err = src_process(pd.m_pDownsampleState, &(pd.m_downsampleData));
405      if ( err )
406         throw std::runtime_error( src_strerror(err) );
407
408      pd.m_pDownsampledCurrIt += pd.m_downsampleData.output_frames_gen;
409
410      if ( pd.m_pDownsampledCurrIt != pd.m_pEndDownsampledBuf )
411         return false; // NEED MORE DATA
412
413      pSourcePCMIt += pd.m_downsampleData.input_frames_used * pd.m_nchannels;
414
415      size_t pos = pd.m_downsampledProcessSize;
416      size_t window_pos = pd.m_downsampledProcessSize - pd.m_normWindow.size() / 2;
417      const size_t end_window_pos = window_pos + pd.m_normWindow.size();
418
419      // 2. read [m_buffersize - norm/2...m_buffersize + norm/2] into normalize window
420      for (; window_pos < end_window_pos ; ++window_pos)
421         pd.m_normWindow.add(pd.m_pDownsampledPCM[window_pos] * pd.m_pDownsampledPCM[window_pos]);
422
423      // 3. normalize [m_bufferSize..m_bufferSize+cb]
424      for (; pos < pd.m_downsampledProcessSize + pd.m_compensateBufferSize; ++pos, ++window_pos)
425      {
426         pd.m_pDownsampledPCM[pos] /= getRMS(pd.m_normWindow);
427         pd.m_normWindow.add(pd.m_pDownsampledPCM[window_pos] * pd.m_pDownsampledPCM[window_pos]);
428      }
429
430      pd.m_preBufferPassed = true;
431   }
432
433   //////////////////////////////////////////////////////////////////////////
434   // PROCESS:
435
436   bool found_enough_unique_keys = false;
437   while (pd.m_toProcessKeys == 0 || pd.m_processedKeys < pd.m_toProcessKeys || !found_enough_unique_keys)
438   {
439
440      // 1. copy [m_bufferSize..m_bufferSize + cb + norm/2] to beginning
441      if ( pd.m_pDownsampledCurrIt == pd.m_pEndDownsampledBuf )
442      {
443         memcpy( pd.m_pDownsampledPCM, pd.m_pDownsampledPCM + pd.m_downsampledProcessSize,
444                (pd.m_compensateBufferSize + (pd.m_normWindow.size() / 2)) * sizeof(float));
445         pd.m_pDownsampledCurrIt = pd.m_pDownsampledPCM + (pd.m_compensateBufferSize + (pd.m_normWindow.size() / 2));
446      }
447
448      // 2. read m_bufferSize frames to cb + norm/2
449      pd.m_floatInData.resize( (pSourcePCMIt_end - pSourcePCMIt) / pd.m_nchannels);
450
451      if ( pd.m_floatInData.empty() )
452         return false;
453
454      src_short_to_float_and_mono_array( pSourcePCMIt, 
455                                         &(pd.m_floatInData[0]), static_cast<int>(pSourcePCMIt_end - pSourcePCMIt), 
456                                         pd.m_nchannels);
457
458      pd.m_downsampleData.data_in = &(pd.m_floatInData[0]);
459      pd.m_downsampleData.input_frames = static_cast<long>(pd.m_floatInData.size());
460
461      pd.m_downsampleData.data_out = pd.m_pDownsampledCurrIt;
462      pd.m_downsampleData.output_frames = static_cast<long>(pd.m_pEndDownsampledBuf - pd.m_pDownsampledCurrIt);
463
464      int err = src_process(pd.m_pDownsampleState, &(pd.m_downsampleData));
465      if ( err )
466         throw std::runtime_error( src_strerror(err) );
467
468      pd.m_pDownsampledCurrIt += pd.m_downsampleData.output_frames_gen;
469
470      if ( pd.m_pDownsampledCurrIt != pd.m_pEndDownsampledBuf && !end_of_stream )
471         return false; // NEED MORE DATA
472
473      //pSourcePCMIt += readData.second;
474      pSourcePCMIt += pd.m_downsampleData.input_frames_used * pd.m_nchannels;
475
476      // ********************************************************************
477
478      // 3. normalize [cb..m_bufferSize+cb]
479      size_t pos = static_cast<unsigned int>(pd.m_compensateBufferSize);
480      size_t window_pos = static_cast<unsigned int>(pd.m_compensateBufferSize + (pd.m_normWindow.size() / 2));
481
482      for(; pos < pd.m_downsampledProcessSize + pd.m_compensateBufferSize /* m_fullDownsampledBufferSize*/; ++pos, ++window_pos)
483      {
484         pd.m_pDownsampledPCM[pos] /= getRMS(pd.m_normWindow);
485         pd.m_normWindow.add(pd.m_pDownsampledPCM[window_pos] * pd.m_pDownsampledPCM[window_pos]);
486      }
487
488      // 4. fft/process/whatevs [0...m_bufferSize+cb]
489      pd.m_processedKeys += processKeys(pd.m_groupWindow, pos, pd);
490
491      // we have too many keys, now we have to chop either one end or the other
492      if (pd.m_toProcessKeys != 0 && pd.m_processedKeys > pd.m_toProcessKeys)
493      {
494         // set up window begin and end
495         deque<GroupData>::iterator itBeg = pd.m_groupWindow.begin(), itEnd = pd.m_groupWindow.end();
496         unsigned int offset_left, offset_right;
497
498         found_enough_unique_keys = 
499            fingerprint::findSignificantGroups( itBeg, itEnd, offset_left, offset_right, pd.m_toProcessKeys,
500                                                pd.m_totalWindowKeys, pd.m_minUniqueKeys);
501
502         // if we're happy with this set, snip the beginning and end of the grouped keys
503         if (found_enough_unique_keys)
504         {
505            itBeg->count -= offset_left;
506            if (offset_right > 0 && itEnd != pd.m_groupWindow.end())
507            {
508               itEnd->count = offset_right;
509               ++itEnd;
510            }
511         }
512
513         // chop the deque
514         copy(itBeg, itEnd, pd.m_groupWindow.begin());
515         pd.m_groupWindow.resize(itEnd - itBeg);            
516
517         // recalc keys
518         pd.m_processedKeys = 0;
519         for (deque<GroupData>::const_iterator it = pd.m_groupWindow.begin(); it != pd.m_groupWindow.end(); ++it)
520            pd.m_processedKeys += it->count;
521      }
522
523      if ( end_of_stream )
524         break;
525
526   } // while (totalKeys == 0 || keys < totalKeys || !found_enough_unique_keys)
527
528
529   if (pd.m_toProcessKeys != 0 && pd.m_processedKeys < pd.m_toProcessKeys)
530      throw std::runtime_error("Couldn't deliver the requested number of keys (it's the file too short?)");
531
532   if ((pd.m_toProcessKeys != 0 && !found_enough_unique_keys) || 
533       (pd.m_toProcessKeys == 0 && !enoughUniqueGoodGroups(pd.m_groupWindow.begin(), pd.m_groupWindow.end(), pd.m_minUniqueKeys)))
534   {
535      throw std::runtime_error("Not enough unique keys (it's the file too short?)");
536   }
537
538   // copy to a vector so that they can be returned as contiguous data
539   pd.m_groups.resize(pd.m_groupWindow.size());
540   copy(pd.m_groupWindow.begin(), pd.m_groupWindow.end(), pd.m_groups.begin());
541
542   pd.m_groupsReady = true;
543   pd.m_processType = PT_UNKNOWN;
544   return true;
545}
546
547// -----------------------------------------------------------------------------
548
549pair<const char*, size_t> FingerprintExtractor::getFingerprint()
550{
551   // easier read
552   PimplData& pd = *m_pPimplData;
553
554   if ( pd.m_groupsReady )
555   {
556#if __BIG_ENDIAN__
557      pd.m_bigEndianGroups.resize(pd.m_groups.size());
558      for ( size_t i = 0; i < pd.m_groups.size(); ++i )
559      {
560         pd.m_bigEndianGroups[i].key = reorderbits(pd.m_groups[i].key);
561         pd.m_bigEndianGroups[i].count = reorderbits(pd.m_groups[i].count);
562      }
563
564      return make_pair(reinterpret_cast<const char*>(&pd.m_bigEndianGroups[0]), pd.m_bigEndianGroups.size() * sizeof(GroupData) );
565
566#else
567      return make_pair(reinterpret_cast<const char*>(&pd.m_groups[0]), pd.m_groups.size() * sizeof(GroupData) );
568#endif
569   }
570   else
571      return make_pair(reinterpret_cast<const char*>(0), 0); // here's where null_ptr would become useful!
572}
573
574// -----------------------------------------------------------------------------
575// -----------------------------------------------------------------------------
576// -----------------------------------------------------------------------------
577
578float getRMS(const FloatingAverage<double>& signal)
579{
580   // we don't want to normalize by the real rms, because excessive clipping will occur
581   float rms = sqrtf(static_cast<float>(signal.getAverage())) * 10.0F;
582
583   if (rms < 0.1F)
584      rms = 0.1F;
585   else if (rms > 3.0F)
586      rms = 3.0F;
587
588   return rms;
589}
590
591// -----------------------------------------------------------------------------
592
593unsigned int processKeys( deque<GroupData>& groups, size_t size, PimplData& pd )
594{
595   size_t read_size = min(size, pd.m_downsampledProcessSize + pd.m_compensateBufferSize);
596
597   unsigned int numFrames = pd.m_pFFT->process(pd.m_pDownsampledPCM, read_size);
598
599   if ( numFrames <= Filter::KEYWIDTH )
600      return 0; // skip it when the number of frames is too small
601
602   float** ppFrames = pd.m_pFFT->getFrames();
603
604   integralImage(ppFrames, numFrames);
605   computeBits(pd.m_partialBits, pd.m_filters, ppFrames, numFrames);
606   fingerprint::keys2GroupData(pd.m_partialBits, groups, false);
607
608   return static_cast<unsigned int>(pd.m_partialBits.size());
609
610}
611
612// -----------------------------------------------------------------------------
613
614void integralImage(float** ppFrames, unsigned int nFrames)
615{
616   for (unsigned int y = 1; y < nFrames; y++) 
617   {
618      ppFrames[y][0] += ppFrames[y-1][0];
619   }
620
621   for (unsigned int x = 1; x < Filter::NBANDS; x++) 
622   {
623      ppFrames[0][x] += ppFrames[0][x-1];
624   }
625
626   for (unsigned int y = 1; y < nFrames; y++) 
627   {
628      for (unsigned int x = 1; x < Filter::NBANDS; x++) 
629      {
630         ppFrames[y][x] += static_cast<float>( static_cast<double>(ppFrames[y-1][x]) + 
631                                               static_cast<double>(ppFrames[y][x-1]) - 
632                                               static_cast<double>(ppFrames[y-1][x-1]) );
633      }
634   }
635}
636
637// ---------------------------------------------------------------------
638//
639/// Convert bands to bits, using the supplied filters
640void computeBits( vector<unsigned int>& bits,
641                  const vector<Filter>& f, 
642                  float ** frames, unsigned int nframes ) 
643{
644   unsigned int first_time = Filter::KEYWIDTH / 2 + 1;
645   unsigned int last_time = nframes - Filter::KEYWIDTH / 2;
646
647   unsigned int numBits = last_time - first_time + 1;
648   bits.resize(numBits);
649
650   const unsigned int fSize = static_cast<unsigned int>(f.size());
651   std::bitset<32> bt;
652   double X = 0;
653
654   for (unsigned int t2 = first_time; t2 <= last_time; ++t2) 
655   {
656
657      for (unsigned int i = 0; i < fSize; ++i) 
658      {
659         // we subtract 1 from t1 and b1 because we use integral images
660      
661         unsigned int t1 = (unsigned int) ((float) t2 - f[i].wt / 2.0 - 1);
662         unsigned int t3 = (unsigned int) ((float) t2 + f[i].wt / 2.0 - 1);
663         unsigned int b1 = f[i].first_band;
664         unsigned int b2 = (unsigned int) round__((float) b1 + f[i].wb / 2.0) - 1;
665         unsigned int b3 = b1 + f[i].wb - 1;
666         --b1;
667
668         unsigned int t_1q = (t1 + t2) / 2; // one quarter time 
669         unsigned int t_3q = t_1q + (t3 - t1 + 1) / 2; // three quarter time
670         unsigned int b_1q = (b1 + b2) / 2; // one quarter band
671         unsigned int b_3q = b_1q + (b3 - b1) / 2; // three quarter band
672         
673         X = 0;
674         
675         // we should check from t1 > 0, but in practice, this doesn't happen
676         // we subtract 1 from everything because this came from matlab where indices start from 1
677         switch (f[i].filter_type) {
678         case 1: { // total energy
679            if (b1 > 0)
680               X = static_cast<double>(frames[t3-1][b3-1]) - static_cast<double>(frames[t3-1][b1-1])
681                 - static_cast<double>(frames[t1-1][b3-1]) + static_cast<double>(frames[t1-1][b1-1]);
682            else
683               X = static_cast<double>(frames[t3-1][b3-1]) - static_cast<double>(frames[t1-1][b3-1]);
684            break;
685         }
686         case 2: { // energy difference over time
687            if (b1 > 0)
688               X = static_cast<double>(frames[t1-1][b1-1]) - 2*static_cast<double>(frames[t2-2][b1-1])
689                 + static_cast<double>(frames[t3-1][b1-1]) - static_cast<double>(frames[t1-1][b3-1])
690                 + 2*static_cast<double>(frames[t2-2][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
691            else
692               X = - static_cast<double>(frames[t1-1][b3-1]) + 2*static_cast<double>(frames[t2-2][b3-1])
693                   - static_cast<double>(frames[t3-1][b3-1]);
694            break;
695         
696         }
697         case 3: { // energy difference over bands
698            if (b1 > 0)
699               X = static_cast<double>(frames[t1-1][b1-1]) - static_cast<double>(frames[t3-1][b1-1])
700                 - 2*static_cast<double>(frames[t1-1][b2-1]) + 2*static_cast<double>(frames[t3-1][b2-1])
701                 + static_cast<double>(frames[t1-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
702            else
703               X = - 2*static_cast<double>(frames[t1-1][b2-1]) + 2*static_cast<double>(frames[t3-1][b2-1])
704                   + static_cast<double>(frames[t1-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
705            break;   
706         }
707         case 4: {
708            // energy difference over time and bands
709            if (b1 > 0)
710               X = static_cast<double>(frames[t1-1][b1-1]) - 2*static_cast<double>(frames[t2-2][b1-1])
711                 + static_cast<double>(frames[t3-1][b1-1]) - 2*static_cast<double>(frames[t1-1][b2-1])
712                 + 4*static_cast<double>(frames[t2-2][b2-1]) - 2*static_cast<double>(frames[t3-1][b2-1])
713                 + static_cast<double>(frames[t1-1][b3-1]) - 2*static_cast<double>(frames[t2-2][b3-1])
714                 + static_cast<double>(frames[t3-1][b3-1]);
715            else
716               X = - 2*static_cast<double>(frames[t1-1][b2-1]) + 4*static_cast<double>(frames[t2-2][b2-1])
717                   - 2*static_cast<double>(frames[t3-1][b2-1]) + static_cast<double>(frames[t1-1][b3-1])
718                   - 2*static_cast<double>(frames[t2-2][b3-1]) + static_cast<double>(frames[t3-1][b3-1]);
719            break;   
720         }
721         case 5: { // time peak
722            if (b1 > 0)
723               X = - static_cast<double>(frames[t1-1][b1-1]) + 2*static_cast<double>(frames[t_1q-1][b1-1])
724                   - 2*static_cast<double>(frames[t_3q-1][b1-1]) + static_cast<double>(frames[t3-1][b1-1])
725                   + static_cast<double>(frames[t1-1][b3-1]) - 2*static_cast<double>(frames[t_1q-1][b3-1])
726                   + 2*static_cast<double>(frames[t_3q-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
727            else
728               X = static_cast<double>(frames[t1-1][b3-1]) - 2*static_cast<double>(frames[t_1q-1][b3-1])
729                 + 2*static_cast<double>(frames[t_3q-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
730                  
731            break;
732         }
733         case 6: { // band beak
734            if (b1 > 0)
735               X = - static_cast<double>(frames[t1-1][b1-1]) + static_cast<double>(frames[t3-1][b1-1])
736                   + 2*static_cast<double>(frames[t1-1][b_1q-1]) - 2*static_cast<double>(frames[t3-1][b_1q-1])
737                   - 2*static_cast<double>(frames[t1-1][b_3q-1]) + 2*static_cast<double>(frames[t3-1][b_3q-1])
738                   + static_cast<double>(frames[t1-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
739            else
740               X = + 2*static_cast<double>(frames[t1-1][b_1q-1]) - 2*static_cast<double>(frames[t3-1][b_1q-1])
741                   - 2*static_cast<double>(frames[t1-1][b_3q-1]) + 2*static_cast<double>(frames[t3-1][b_3q-1])
742                   + static_cast<double>(frames[t1-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
743
744            break;
745         }
746         }
747
748         bt[i] = X > f[i].threshold;
749      }
750
751      bits[t2 - first_time] = bt.to_ulong();
752   }
753}
754
755// -----------------------------------------------------------------------------
756
757void src_short_to_float_and_mono_array( const short *in, float *out, int srclen, int nchannels )
758{
759   switch ( nchannels )
760   {
761   case 1:
762      src_short_to_float_array(in, out, srclen);
763      break;
764   case 2:
765      {
766         // this can be optimized
767         int j = 0;
768         const double div = numeric_limits<short>::max() * nchannels;
769         for ( int i = 0; i < srclen; i += 2, ++j )
770         {
771            out[j] = static_cast<float>( static_cast<double>(static_cast<int>(in[i]) + static_cast<int>(in[i+1])) / div );
772         }
773      }
774      break;
775
776   default:
777      throw( std::runtime_error("Unsupported number of channels!") );
778   }
779
780}
781
782// -----------------------------------------------------------------------------
783
784} // end of namespace
785
786// -----------------------------------------------------------------------------