/thirdparty/liblastfm2/src/fingerprint/fplib/FingerprintExtractor.cpp

http://github.com/tomahawk-player/tomahawk · C++ · 786 lines · 495 code · 163 blank · 128 comment · 81 complexity · 1d92239b693ba376ea7e5b5f31375634 MD5 · raw file

  1. /*
  2. Copyright 2005-2009 Last.fm Ltd. <mir@last.fm>
  3. This file is part of liblastfm.
  4. liblastfm is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. liblastfm is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with liblastfm. If not, see <http://www.gnu.org/licenses/>.
  14. */
  15. #include <iostream>
  16. #include <limits>
  17. #include <bitset>
  18. #include <deque>
  19. #include <vector>
  20. #include <stdexcept>
  21. #include <cmath>
  22. #include <cstring>
  23. #include <samplerate.h> // libsamplerate
  24. #include "FingerprintExtractor.h"
  25. #include "fp_helper_fun.h" // for GroupData
  26. #include "Filter.h"
  27. #include "FloatingAverage.h"
  28. #include "OptFFT.h"
  29. //////////////////////////////////////////////////////////////////////////
  30. namespace fingerprint
  31. {
  32. using namespace std;
  33. static const int NUM_FRAMES_CLIENT = 32; // ~= 10 secs.
  34. enum eProcessType
  35. {
  36. PT_UNKNOWN,
  37. PT_FOR_QUERY,
  38. PT_FOR_FULLSUBMIT
  39. };
  40. //////////////////////////////////////////////////////////////////////////
  41. class PimplData
  42. {
  43. public:
  44. PimplData()
  45. : m_pDownsampledPCM(NULL), m_pDownsampledCurrIt(NULL),
  46. m_normalizedWindowMs(static_cast<unsigned int>(NORMALIZATION_SKIP_SECS * 1000 * 2)),
  47. m_compensateBufferSize(FRAMESIZE-OVERLAPSAMPLES + Filter::KEYWIDTH * OVERLAPSAMPLES),
  48. m_downsampledProcessSize(NUM_FRAMES_CLIENT*FRAMESIZE),
  49. // notice that the buffer has extra space on either side for the normalization window
  50. m_fullDownsampledBufferSize( m_downsampledProcessSize + // the actual processed part
  51. m_compensateBufferSize + // a compensation buffer for the fft
  52. ((m_normalizedWindowMs * DFREQ / 1000) / 2) ), // a compensation buffer for the normalization
  53. m_normWindow(m_normalizedWindowMs * DFREQ / 1000),
  54. m_pFFT(NULL), m_pDownsampleState(NULL), m_processType(PT_UNKNOWN)
  55. {
  56. m_pFFT = new OptFFT(m_downsampledProcessSize + m_compensateBufferSize);
  57. m_pDownsampledPCM = new float[m_fullDownsampledBufferSize];
  58. // the end of ||-------m_bufferSize-------|-cb-|---norm/2---||
  59. // ^-- pEndDownsampledBuf
  60. m_pEndDownsampledBuf = m_pDownsampledPCM + m_fullDownsampledBufferSize;
  61. // loading filters
  62. size_t numFilters = sizeof(rFilters) / sizeof(RawFilter) ;
  63. for (size_t i = 0; i < numFilters; ++i)
  64. m_filters.push_back( Filter( rFilters[i].ftid, rFilters[i].thresh, rFilters[i].weight ) );
  65. }
  66. ~PimplData()
  67. {
  68. if ( m_pFFT )
  69. delete m_pFFT;
  70. m_pFFT = NULL;
  71. if ( m_pDownsampledPCM )
  72. delete [] m_pDownsampledPCM;
  73. m_pDownsampledPCM = NULL;
  74. if ( m_pDownsampleState )
  75. src_delete(m_pDownsampleState) ;
  76. }
  77. float* m_pDownsampledPCM;
  78. float* m_pDownsampledCurrIt;
  79. const unsigned int m_normalizedWindowMs;
  80. const size_t m_compensateBufferSize;
  81. const size_t m_downsampledProcessSize;
  82. const size_t m_fullDownsampledBufferSize;
  83. FloatingAverage<double> m_normWindow;
  84. OptFFT* m_pFFT;
  85. //////////////////////////////////////////////////////////////////////////
  86. // libsamplerate
  87. SRC_STATE* m_pDownsampleState;
  88. SRC_DATA m_downsampleData;
  89. vector<float> m_floatInData;
  90. //////////////////////////////////////////////////////////////////////////
  91. bool m_groupsReady;
  92. bool m_preBufferPassed;
  93. eProcessType m_processType;
  94. size_t m_toSkipSize;
  95. size_t m_toSkipMs;
  96. size_t m_skippedSoFar;
  97. bool m_skipPassed;
  98. float* m_pEndDownsampledBuf;
  99. int m_freq;
  100. int m_nchannels;
  101. unsigned int m_lengthMs;
  102. int m_minUniqueKeys;
  103. unsigned int m_uniqueKeyWindowMs;
  104. unsigned int m_toProcessKeys;
  105. unsigned int m_totalWindowKeys;
  106. vector<Filter> m_filters;
  107. deque<GroupData> m_groupWindow;
  108. vector<GroupData> m_groups;
  109. unsigned int m_processedKeys;
  110. vector<unsigned int> m_partialBits; // here just to avoid reallocation
  111. #if __BIG_ENDIAN__
  112. #define reorderbits(X) ((((unsigned int)(X) & 0xff000000) >> 24) | \
  113. (((unsigned int)(X) & 0x00ff0000) >> 8) | \
  114. (((unsigned int)(X) & 0x0000ff00) << 8) | \
  115. (((unsigned int)(X) & 0x000000ff) << 24))
  116. vector<GroupData> m_bigEndianGroups;
  117. #endif
  118. };
  119. //////////////////////////////////////////////////////////////////////////
  120. void initCustom( PimplData& pd,
  121. int freq, int nchannels,
  122. unsigned int lengthMs, unsigned int skipMs,
  123. int minUniqueKeys, unsigned int uniqueKeyWindowMs, int duration );
  124. inline float getRMS( const FloatingAverage<double>& signal );
  125. unsigned int processKeys( deque<GroupData>& groups, size_t size, PimplData& pd );
  126. void integralImage( float** ppFrames, unsigned int nFrames );
  127. void computeBits( vector<unsigned int>& bits,
  128. const vector<Filter>& f,
  129. float ** frames, unsigned int nframes );
  130. void src_short_to_float_and_mono_array(const short *in, float *out, int srclen, int nchannels);
  131. //////////////////////////////////////////////////////////////////////////
  132. // -----------------------------------------------------------------------------
  133. FingerprintExtractor::FingerprintExtractor()
  134. : m_pPimplData(NULL)
  135. {
  136. m_pPimplData = new PimplData();
  137. }
  138. // -----------------------------------------------------------------------------
  139. FingerprintExtractor::~FingerprintExtractor()
  140. {
  141. if ( m_pPimplData )
  142. delete m_pPimplData;
  143. }
  144. // -----------------------------------------------------------------------------
  145. size_t FingerprintExtractor::getToSkipMs()
  146. { return m_pPimplData->m_toSkipMs; }
  147. // -----------------------------------------------------------------------------
  148. size_t FingerprintExtractor::getMinimumDurationMs()
  149. {
  150. return static_cast<size_t>( (QUERY_SIZE_SECS + NORMALIZATION_SKIP_SECS * 2 + GUARD_SIZE_SECS) * 1000 );
  151. }
  152. // -----------------------------------------------------------------------------
  153. size_t FingerprintExtractor::getVersion()
  154. { return FINGERPRINT_LIB_VERSION; }
  155. // -----------------------------------------------------------------------------
  156. void FingerprintExtractor::initForQuery(int freq, int nchannels, int duration )
  157. {
  158. m_pPimplData->m_skipPassed = false;
  159. m_pPimplData->m_processType = PT_FOR_QUERY;
  160. if ( !m_pPimplData )
  161. throw std::runtime_error("Not enough RAM to allocate the fingerprinter!");
  162. initCustom( *m_pPimplData,
  163. freq, nchannels,
  164. static_cast<unsigned int>(QUERY_SIZE_SECS * 1000),
  165. static_cast<unsigned int>(QUERY_START_SECS * 1000),
  166. MIN_UNIQUE_KEYS,
  167. static_cast<unsigned int>(UPDATE_SIZE_SECS * 1000), duration );
  168. }
  169. // -----------------------------------------------------------------------------
  170. void FingerprintExtractor::initForFullSubmit(int freq, int nchannels )
  171. {
  172. m_pPimplData->m_skipPassed = true;
  173. m_pPimplData->m_processType = PT_FOR_FULLSUBMIT;
  174. if ( !m_pPimplData )
  175. throw std::runtime_error("Not enough RAM to allocate the fingerprinter!");
  176. initCustom( *m_pPimplData,
  177. freq, nchannels,
  178. numeric_limits<unsigned int>::max(),
  179. 0, MIN_UNIQUE_KEYS, 0, -1 );
  180. }
  181. // -----------------------------------------------------------------------------
  182. void initCustom( PimplData& pd,
  183. int freq, int nchannels,
  184. unsigned int lengthMs,
  185. unsigned int skipMs,
  186. int minUniqueKeys,
  187. unsigned int uniqueKeyWindowMs, int duration )
  188. {
  189. //////////////////////////////////////////////////////////////////////////
  190. pd.m_freq = freq;
  191. pd.m_nchannels = nchannels;
  192. pd.m_lengthMs = lengthMs;
  193. pd.m_minUniqueKeys = minUniqueKeys;
  194. pd.m_uniqueKeyWindowMs = uniqueKeyWindowMs;
  195. //////////////////////////////////////////////////////////////////////////
  196. // ***********************************************************************
  197. if ( pd.m_pDownsampleState )
  198. pd.m_pDownsampleState = src_delete(pd.m_pDownsampleState) ;
  199. pd.m_pDownsampleState = src_new (SRC_SINC_FASTEST, 1, NULL) ;
  200. pd.m_downsampleData.src_ratio = FDFREQ / freq;
  201. // ***********************************************************************
  202. //////////////////////////////////////////////////////////////////////////
  203. if ( pd.m_processType == PT_FOR_FULLSUBMIT )
  204. skipMs = 0; // make sure
  205. else if ( duration > 0 )
  206. {
  207. // skip + size + right normalization window + FFT guard
  208. //
  209. int stdDurationMs = static_cast<int>((QUERY_START_SECS + QUERY_SIZE_SECS + NORMALIZATION_SKIP_SECS + GUARD_SIZE_SECS) * 1000);
  210. int actualDurationMs = duration * 1000;
  211. // compute the actual skipMs depending on the duration
  212. if ( actualDurationMs < stdDurationMs )
  213. skipMs -= max( stdDurationMs - actualDurationMs, 0 );
  214. }
  215. pd.m_toSkipMs = max( static_cast<int>(skipMs) - static_cast<int>((pd.m_normalizedWindowMs/2)), 0 );
  216. pd.m_toSkipSize = static_cast<size_t>( freq * nchannels *
  217. (pd.m_toSkipMs / 1000.0) ); // half the norm window in secs;
  218. //if ( pd.m_processType == PT_FOR_QUERY && skipMs > pd.m_normalizedWindowMs/2 )
  219. //{
  220. // pd.m_toSkipMs = skipMs - (pd.m_normalizedWindowMs/2);
  221. // pd.m_toSkipSize = static_cast<size_t>( freq * nchannels *
  222. // (pd.m_toSkipMs / 1000.0) ); // half the norm window in secs
  223. //}
  224. //else
  225. //{
  226. // pd.m_toSkipMs = 0;
  227. // pd.m_toSkipSize = 0; // half of the normalization window will be skipped in ANY case
  228. //}
  229. pd.m_skippedSoFar = 0;
  230. pd.m_groupsReady = false;
  231. pd.m_preBufferPassed = false;
  232. // prepare the position for pre-buffering
  233. pd.m_pDownsampledCurrIt = pd.m_pDownsampledPCM + (pd.m_downsampledProcessSize - (pd.m_normWindow.size() / 2) );
  234. pd.m_toProcessKeys = fingerprint::getTotalKeys(pd.m_lengthMs);// (m_lengthMs * DFREQ) / (1000 * OVERLAPSAMPLES) + 1;
  235. pd.m_totalWindowKeys = fingerprint::getTotalKeys(pd.m_uniqueKeyWindowMs); //(m_uniqueKeyWindowMs * DFREQ) / (1000 * OVERLAPSAMPLES) + 1;
  236. if (pd.m_toProcessKeys == 1)
  237. pd.m_toProcessKeys = 0;
  238. if (pd.m_totalWindowKeys == 1)
  239. pd.m_totalWindowKeys = 0;
  240. pd.m_processedKeys = 0;
  241. pd.m_groupWindow.clear();
  242. pd.m_processedKeys = 0;
  243. }
  244. // -----------------------------------------------------------------------------
  245. // * cb = compensate buffer size
  246. // * norm = floating normalization window size
  247. //
  248. // PREBUFFER:
  249. // (-------m_bufferSize-------)
  250. // || EMPTY |---norm/2---|-cb-|---norm/2---||
  251. // 1. {--------read frames-----------}
  252. // 2. {--read normalize window--}
  253. // 3. {----} normalize
  254. //
  255. // 1. read [norm + cb] frames to m_bufferSize - norm/2
  256. // 2. read [m_buffersize - norm/2...m_buffersize + norm/2] into normalize window
  257. // 3. normalize [m_bufferSize..m_bufferSize+cb]
  258. //
  259. // PROCESS:
  260. //
  261. // ||-------m_bufferSize-------|-cb-|---norm/2---||
  262. // 1. <--------------------------{------copy-------}
  263. // 2. {--------read frames-------}
  264. // 3. {---------normalize--------}
  265. // 4. {------fft/process/whatevs------}
  266. //
  267. // 1. copy [m_bufferSize..m_bufferSize + cb + norm/2] to beginning
  268. // 2. read m_bufferSize frames to cb + norm/2
  269. // 3. normalize [cb..m_bufferSize+cb]
  270. // 4. fft/process/whatevs [0...m_bufferSize+cb]
  271. //
  272. // repeat until enough blocks processed and enough groups!
  273. //
  274. bool FingerprintExtractor::process( const short* pPCM, size_t num_samples, bool end_of_stream )
  275. {
  276. if ( num_samples == 0 )
  277. return false;
  278. // easier read
  279. PimplData& pd = *m_pPimplData;
  280. if ( pd.m_processType == PT_UNKNOWN )
  281. throw std::runtime_error("Please call initForQuery() or initForFullSubmit() before process()!");
  282. const short* pSourcePCMIt = pPCM;
  283. const short* pSourcePCMIt_end = pPCM + num_samples;
  284. if ( !pd.m_skipPassed )
  285. {
  286. // needs to skip data? (reminder: the query needs to skip QUERY_START_SECS (- half of the normalization window)
  287. if ( pd.m_skippedSoFar + num_samples > pd.m_toSkipSize )
  288. {
  289. pSourcePCMIt = pPCM + (pd.m_toSkipSize - pd.m_skippedSoFar);
  290. pd.m_skipPassed = true;
  291. }
  292. else
  293. {
  294. // need more data
  295. pd.m_skippedSoFar += num_samples;
  296. return false;
  297. }
  298. }
  299. pair<size_t, size_t> readData(0,0);
  300. pd.m_downsampleData.end_of_input = end_of_stream ? 1 : 0;
  301. //////////////////////////////////////////////////////////////////////////
  302. // PREBUFFER:
  303. if ( !pd.m_preBufferPassed )
  304. {
  305. // 1. downsample [norm + cb] frames to m_bufferSize - norm/2
  306. pd.m_floatInData.resize( (pSourcePCMIt_end - pSourcePCMIt) / pd.m_nchannels);
  307. src_short_to_float_and_mono_array( pSourcePCMIt,
  308. &(pd.m_floatInData[0]), static_cast<int>(pSourcePCMIt_end - pSourcePCMIt),
  309. pd.m_nchannels);
  310. pd.m_downsampleData.data_in = &(pd.m_floatInData[0]);
  311. pd.m_downsampleData.input_frames = static_cast<long>(pd.m_floatInData.size());
  312. pd.m_downsampleData.data_out = pd.m_pDownsampledCurrIt;
  313. pd.m_downsampleData.output_frames = static_cast<long>(pd.m_pEndDownsampledBuf - pd.m_pDownsampledCurrIt);
  314. int err = src_process(pd.m_pDownsampleState, &(pd.m_downsampleData));
  315. if ( err )
  316. throw std::runtime_error( src_strerror(err) );
  317. pd.m_pDownsampledCurrIt += pd.m_downsampleData.output_frames_gen;
  318. if ( pd.m_pDownsampledCurrIt != pd.m_pEndDownsampledBuf )
  319. return false; // NEED MORE DATA
  320. pSourcePCMIt += pd.m_downsampleData.input_frames_used * pd.m_nchannels;
  321. size_t pos = pd.m_downsampledProcessSize;
  322. size_t window_pos = pd.m_downsampledProcessSize - pd.m_normWindow.size() / 2;
  323. const size_t end_window_pos = window_pos + pd.m_normWindow.size();
  324. // 2. read [m_buffersize - norm/2...m_buffersize + norm/2] into normalize window
  325. for (; window_pos < end_window_pos ; ++window_pos)
  326. pd.m_normWindow.add(pd.m_pDownsampledPCM[window_pos] * pd.m_pDownsampledPCM[window_pos]);
  327. // 3. normalize [m_bufferSize..m_bufferSize+cb]
  328. for (; pos < pd.m_downsampledProcessSize + pd.m_compensateBufferSize; ++pos, ++window_pos)
  329. {
  330. pd.m_pDownsampledPCM[pos] /= getRMS(pd.m_normWindow);
  331. pd.m_normWindow.add(pd.m_pDownsampledPCM[window_pos] * pd.m_pDownsampledPCM[window_pos]);
  332. }
  333. pd.m_preBufferPassed = true;
  334. }
  335. //////////////////////////////////////////////////////////////////////////
  336. // PROCESS:
  337. bool found_enough_unique_keys = false;
  338. while (pd.m_toProcessKeys == 0 || pd.m_processedKeys < pd.m_toProcessKeys || !found_enough_unique_keys)
  339. {
  340. // 1. copy [m_bufferSize..m_bufferSize + cb + norm/2] to beginning
  341. if ( pd.m_pDownsampledCurrIt == pd.m_pEndDownsampledBuf )
  342. {
  343. memcpy( pd.m_pDownsampledPCM, pd.m_pDownsampledPCM + pd.m_downsampledProcessSize,
  344. (pd.m_compensateBufferSize + (pd.m_normWindow.size() / 2)) * sizeof(float));
  345. pd.m_pDownsampledCurrIt = pd.m_pDownsampledPCM + (pd.m_compensateBufferSize + (pd.m_normWindow.size() / 2));
  346. }
  347. // 2. read m_bufferSize frames to cb + norm/2
  348. pd.m_floatInData.resize( (pSourcePCMIt_end - pSourcePCMIt) / pd.m_nchannels);
  349. if ( pd.m_floatInData.empty() )
  350. return false;
  351. src_short_to_float_and_mono_array( pSourcePCMIt,
  352. &(pd.m_floatInData[0]), static_cast<int>(pSourcePCMIt_end - pSourcePCMIt),
  353. pd.m_nchannels);
  354. pd.m_downsampleData.data_in = &(pd.m_floatInData[0]);
  355. pd.m_downsampleData.input_frames = static_cast<long>(pd.m_floatInData.size());
  356. pd.m_downsampleData.data_out = pd.m_pDownsampledCurrIt;
  357. pd.m_downsampleData.output_frames = static_cast<long>(pd.m_pEndDownsampledBuf - pd.m_pDownsampledCurrIt);
  358. int err = src_process(pd.m_pDownsampleState, &(pd.m_downsampleData));
  359. if ( err )
  360. throw std::runtime_error( src_strerror(err) );
  361. pd.m_pDownsampledCurrIt += pd.m_downsampleData.output_frames_gen;
  362. if ( pd.m_pDownsampledCurrIt != pd.m_pEndDownsampledBuf && !end_of_stream )
  363. return false; // NEED MORE DATA
  364. //pSourcePCMIt += readData.second;
  365. pSourcePCMIt += pd.m_downsampleData.input_frames_used * pd.m_nchannels;
  366. // ********************************************************************
  367. // 3. normalize [cb..m_bufferSize+cb]
  368. size_t pos = static_cast<unsigned int>(pd.m_compensateBufferSize);
  369. size_t window_pos = static_cast<unsigned int>(pd.m_compensateBufferSize + (pd.m_normWindow.size() / 2));
  370. for(; pos < pd.m_downsampledProcessSize + pd.m_compensateBufferSize /* m_fullDownsampledBufferSize*/; ++pos, ++window_pos)
  371. {
  372. pd.m_pDownsampledPCM[pos] /= getRMS(pd.m_normWindow);
  373. pd.m_normWindow.add(pd.m_pDownsampledPCM[window_pos] * pd.m_pDownsampledPCM[window_pos]);
  374. }
  375. // 4. fft/process/whatevs [0...m_bufferSize+cb]
  376. pd.m_processedKeys += processKeys(pd.m_groupWindow, pos, pd);
  377. // we have too many keys, now we have to chop either one end or the other
  378. if (pd.m_toProcessKeys != 0 && pd.m_processedKeys > pd.m_toProcessKeys)
  379. {
  380. // set up window begin and end
  381. deque<GroupData>::iterator itBeg = pd.m_groupWindow.begin(), itEnd = pd.m_groupWindow.end();
  382. unsigned int offset_left, offset_right;
  383. found_enough_unique_keys =
  384. fingerprint::findSignificantGroups( itBeg, itEnd, offset_left, offset_right, pd.m_toProcessKeys,
  385. pd.m_totalWindowKeys, pd.m_minUniqueKeys);
  386. // if we're happy with this set, snip the beginning and end of the grouped keys
  387. if (found_enough_unique_keys)
  388. {
  389. itBeg->count -= offset_left;
  390. if (offset_right > 0 && itEnd != pd.m_groupWindow.end())
  391. {
  392. itEnd->count = offset_right;
  393. ++itEnd;
  394. }
  395. }
  396. // chop the deque
  397. copy(itBeg, itEnd, pd.m_groupWindow.begin());
  398. pd.m_groupWindow.resize(itEnd - itBeg);
  399. // recalc keys
  400. pd.m_processedKeys = 0;
  401. for (deque<GroupData>::const_iterator it = pd.m_groupWindow.begin(); it != pd.m_groupWindow.end(); ++it)
  402. pd.m_processedKeys += it->count;
  403. }
  404. if ( end_of_stream )
  405. break;
  406. } // while (totalKeys == 0 || keys < totalKeys || !found_enough_unique_keys)
  407. if (pd.m_toProcessKeys != 0 && pd.m_processedKeys < pd.m_toProcessKeys)
  408. throw std::runtime_error("Couldn't deliver the requested number of keys (it's the file too short?)");
  409. if ((pd.m_toProcessKeys != 0 && !found_enough_unique_keys) ||
  410. (pd.m_toProcessKeys == 0 && !enoughUniqueGoodGroups(pd.m_groupWindow.begin(), pd.m_groupWindow.end(), pd.m_minUniqueKeys)))
  411. {
  412. throw std::runtime_error("Not enough unique keys (it's the file too short?)");
  413. }
  414. // copy to a vector so that they can be returned as contiguous data
  415. pd.m_groups.resize(pd.m_groupWindow.size());
  416. copy(pd.m_groupWindow.begin(), pd.m_groupWindow.end(), pd.m_groups.begin());
  417. pd.m_groupsReady = true;
  418. pd.m_processType = PT_UNKNOWN;
  419. return true;
  420. }
  421. // -----------------------------------------------------------------------------
  422. pair<const char*, size_t> FingerprintExtractor::getFingerprint()
  423. {
  424. // easier read
  425. PimplData& pd = *m_pPimplData;
  426. if ( pd.m_groupsReady )
  427. {
  428. #if __BIG_ENDIAN__
  429. pd.m_bigEndianGroups.resize(pd.m_groups.size());
  430. for ( size_t i = 0; i < pd.m_groups.size(); ++i )
  431. {
  432. pd.m_bigEndianGroups[i].key = reorderbits(pd.m_groups[i].key);
  433. pd.m_bigEndianGroups[i].count = reorderbits(pd.m_groups[i].count);
  434. }
  435. return make_pair(reinterpret_cast<const char*>(&pd.m_bigEndianGroups[0]), pd.m_bigEndianGroups.size() * sizeof(GroupData) );
  436. #else
  437. return make_pair(reinterpret_cast<const char*>(&pd.m_groups[0]), pd.m_groups.size() * sizeof(GroupData) );
  438. #endif
  439. }
  440. else
  441. return make_pair(reinterpret_cast<const char*>(0), 0); // here's where null_ptr would become useful!
  442. }
  443. // -----------------------------------------------------------------------------
  444. // -----------------------------------------------------------------------------
  445. // -----------------------------------------------------------------------------
  446. float getRMS(const FloatingAverage<double>& signal)
  447. {
  448. // we don't want to normalize by the real rms, because excessive clipping will occur
  449. float rms = sqrtf(static_cast<float>(signal.getAverage())) * 10.0F;
  450. if (rms < 0.1F)
  451. rms = 0.1F;
  452. else if (rms > 3.0F)
  453. rms = 3.0F;
  454. return rms;
  455. }
  456. // -----------------------------------------------------------------------------
  457. unsigned int processKeys( deque<GroupData>& groups, size_t size, PimplData& pd )
  458. {
  459. size_t read_size = min(size, pd.m_downsampledProcessSize + pd.m_compensateBufferSize);
  460. unsigned int numFrames = pd.m_pFFT->process(pd.m_pDownsampledPCM, read_size);
  461. if ( numFrames <= Filter::KEYWIDTH )
  462. return 0; // skip it when the number of frames is too small
  463. float** ppFrames = pd.m_pFFT->getFrames();
  464. integralImage(ppFrames, numFrames);
  465. computeBits(pd.m_partialBits, pd.m_filters, ppFrames, numFrames);
  466. fingerprint::keys2GroupData(pd.m_partialBits, groups, false);
  467. return static_cast<unsigned int>(pd.m_partialBits.size());
  468. }
  469. // -----------------------------------------------------------------------------
  470. void integralImage(float** ppFrames, unsigned int nFrames)
  471. {
  472. for (unsigned int y = 1; y < nFrames; y++)
  473. {
  474. ppFrames[y][0] += ppFrames[y-1][0];
  475. }
  476. for (unsigned int x = 1; x < Filter::NBANDS; x++)
  477. {
  478. ppFrames[0][x] += ppFrames[0][x-1];
  479. }
  480. for (unsigned int y = 1; y < nFrames; y++)
  481. {
  482. for (unsigned int x = 1; x < Filter::NBANDS; x++)
  483. {
  484. ppFrames[y][x] += static_cast<float>( static_cast<double>(ppFrames[y-1][x]) +
  485. static_cast<double>(ppFrames[y][x-1]) -
  486. static_cast<double>(ppFrames[y-1][x-1]) );
  487. }
  488. }
  489. }
  490. // ---------------------------------------------------------------------
  491. //
  492. /// Convert bands to bits, using the supplied filters
  493. void computeBits( vector<unsigned int>& bits,
  494. const vector<Filter>& f,
  495. float ** frames, unsigned int nframes )
  496. {
  497. unsigned int first_time = Filter::KEYWIDTH / 2 + 1;
  498. unsigned int last_time = nframes - Filter::KEYWIDTH / 2;
  499. unsigned int numBits = last_time - first_time + 1;
  500. bits.resize(numBits);
  501. const unsigned int fSize = static_cast<unsigned int>(f.size());
  502. std::bitset<32> bt;
  503. double X = 0;
  504. for (unsigned int t2 = first_time; t2 <= last_time; ++t2)
  505. {
  506. for (unsigned int i = 0; i < fSize; ++i)
  507. {
  508. // we subtract 1 from t1 and b1 because we use integral images
  509. unsigned int t1 = (unsigned int) ((float) t2 - f[i].wt / 2.0 - 1);
  510. unsigned int t3 = (unsigned int) ((float) t2 + f[i].wt / 2.0 - 1);
  511. unsigned int b1 = f[i].first_band;
  512. unsigned int b2 = (unsigned int) round__((float) b1 + f[i].wb / 2.0) - 1;
  513. unsigned int b3 = b1 + f[i].wb - 1;
  514. --b1;
  515. unsigned int t_1q = (t1 + t2) / 2; // one quarter time
  516. unsigned int t_3q = t_1q + (t3 - t1 + 1) / 2; // three quarter time
  517. unsigned int b_1q = (b1 + b2) / 2; // one quarter band
  518. unsigned int b_3q = b_1q + (b3 - b1) / 2; // three quarter band
  519. X = 0;
  520. // we should check from t1 > 0, but in practice, this doesn't happen
  521. // we subtract 1 from everything because this came from matlab where indices start from 1
  522. switch (f[i].filter_type) {
  523. case 1: { // total energy
  524. if (b1 > 0)
  525. X = static_cast<double>(frames[t3-1][b3-1]) - static_cast<double>(frames[t3-1][b1-1])
  526. - static_cast<double>(frames[t1-1][b3-1]) + static_cast<double>(frames[t1-1][b1-1]);
  527. else
  528. X = static_cast<double>(frames[t3-1][b3-1]) - static_cast<double>(frames[t1-1][b3-1]);
  529. break;
  530. }
  531. case 2: { // energy difference over time
  532. if (b1 > 0)
  533. X = static_cast<double>(frames[t1-1][b1-1]) - 2*static_cast<double>(frames[t2-2][b1-1])
  534. + static_cast<double>(frames[t3-1][b1-1]) - static_cast<double>(frames[t1-1][b3-1])
  535. + 2*static_cast<double>(frames[t2-2][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
  536. else
  537. X = - static_cast<double>(frames[t1-1][b3-1]) + 2*static_cast<double>(frames[t2-2][b3-1])
  538. - static_cast<double>(frames[t3-1][b3-1]);
  539. break;
  540. }
  541. case 3: { // energy difference over bands
  542. if (b1 > 0)
  543. X = static_cast<double>(frames[t1-1][b1-1]) - static_cast<double>(frames[t3-1][b1-1])
  544. - 2*static_cast<double>(frames[t1-1][b2-1]) + 2*static_cast<double>(frames[t3-1][b2-1])
  545. + static_cast<double>(frames[t1-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
  546. else
  547. X = - 2*static_cast<double>(frames[t1-1][b2-1]) + 2*static_cast<double>(frames[t3-1][b2-1])
  548. + static_cast<double>(frames[t1-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
  549. break;
  550. }
  551. case 4: {
  552. // energy difference over time and bands
  553. if (b1 > 0)
  554. X = static_cast<double>(frames[t1-1][b1-1]) - 2*static_cast<double>(frames[t2-2][b1-1])
  555. + static_cast<double>(frames[t3-1][b1-1]) - 2*static_cast<double>(frames[t1-1][b2-1])
  556. + 4*static_cast<double>(frames[t2-2][b2-1]) - 2*static_cast<double>(frames[t3-1][b2-1])
  557. + static_cast<double>(frames[t1-1][b3-1]) - 2*static_cast<double>(frames[t2-2][b3-1])
  558. + static_cast<double>(frames[t3-1][b3-1]);
  559. else
  560. X = - 2*static_cast<double>(frames[t1-1][b2-1]) + 4*static_cast<double>(frames[t2-2][b2-1])
  561. - 2*static_cast<double>(frames[t3-1][b2-1]) + static_cast<double>(frames[t1-1][b3-1])
  562. - 2*static_cast<double>(frames[t2-2][b3-1]) + static_cast<double>(frames[t3-1][b3-1]);
  563. break;
  564. }
  565. case 5: { // time peak
  566. if (b1 > 0)
  567. X = - static_cast<double>(frames[t1-1][b1-1]) + 2*static_cast<double>(frames[t_1q-1][b1-1])
  568. - 2*static_cast<double>(frames[t_3q-1][b1-1]) + static_cast<double>(frames[t3-1][b1-1])
  569. + static_cast<double>(frames[t1-1][b3-1]) - 2*static_cast<double>(frames[t_1q-1][b3-1])
  570. + 2*static_cast<double>(frames[t_3q-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
  571. else
  572. X = static_cast<double>(frames[t1-1][b3-1]) - 2*static_cast<double>(frames[t_1q-1][b3-1])
  573. + 2*static_cast<double>(frames[t_3q-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
  574. break;
  575. }
  576. case 6: { // band beak
  577. if (b1 > 0)
  578. X = - static_cast<double>(frames[t1-1][b1-1]) + static_cast<double>(frames[t3-1][b1-1])
  579. + 2*static_cast<double>(frames[t1-1][b_1q-1]) - 2*static_cast<double>(frames[t3-1][b_1q-1])
  580. - 2*static_cast<double>(frames[t1-1][b_3q-1]) + 2*static_cast<double>(frames[t3-1][b_3q-1])
  581. + static_cast<double>(frames[t1-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
  582. else
  583. X = + 2*static_cast<double>(frames[t1-1][b_1q-1]) - 2*static_cast<double>(frames[t3-1][b_1q-1])
  584. - 2*static_cast<double>(frames[t1-1][b_3q-1]) + 2*static_cast<double>(frames[t3-1][b_3q-1])
  585. + static_cast<double>(frames[t1-1][b3-1]) - static_cast<double>(frames[t3-1][b3-1]);
  586. break;
  587. }
  588. }
  589. bt[i] = X > f[i].threshold;
  590. }
  591. bits[t2 - first_time] = bt.to_ulong();
  592. }
  593. }
  594. // -----------------------------------------------------------------------------
  595. void src_short_to_float_and_mono_array( const short *in, float *out, int srclen, int nchannels )
  596. {
  597. switch ( nchannels )
  598. {
  599. case 1:
  600. src_short_to_float_array(in, out, srclen);
  601. break;
  602. case 2:
  603. {
  604. // this can be optimized
  605. int j = 0;
  606. const double div = numeric_limits<short>::max() * nchannels;
  607. for ( int i = 0; i < srclen; i += 2, ++j )
  608. {
  609. out[j] = static_cast<float>( static_cast<double>(static_cast<int>(in[i]) + static_cast<int>(in[i+1])) / div );
  610. }
  611. }
  612. break;
  613. default:
  614. throw( std::runtime_error("Unsupported number of channels!") );
  615. }
  616. }
  617. // -----------------------------------------------------------------------------
  618. } // end of namespace
  619. // -----------------------------------------------------------------------------