PageRenderTime 56ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/webrtc/modules/audio_processing/test/process_test.cc

https://github.com/rillian/webrtc
C++ | 1073 lines | 887 code | 158 blank | 28 comment | 280 complexity | 2bf17ead046dd56b1685872148833058 MD5 | raw file
Possible License(s): BSD-3-Clause, CC-BY-SA-3.0
  1. /*
  2. * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
  3. *
  4. * Use of this source code is governed by a BSD-style license
  5. * that can be found in the LICENSE file in the root of the source
  6. * tree. An additional intellectual property rights grant can be found
  7. * in the file PATENTS. All contributing project authors may
  8. * be found in the AUTHORS file in the root of the source tree.
  9. */
  10. #include <math.h>
  11. #include <stdio.h>
  12. #include <string.h>
  13. #ifdef WEBRTC_ANDROID
  14. #include <sys/stat.h>
  15. #endif
  16. #include <algorithm>
  17. #include "gtest/gtest.h"
  18. #include "webrtc/modules/audio_processing/include/audio_processing.h"
  19. #include "webrtc/modules/interface/module_common_types.h"
  20. #include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
  21. #include "webrtc/system_wrappers/interface/scoped_ptr.h"
  22. #include "webrtc/system_wrappers/interface/tick_util.h"
  23. #include "webrtc/test/testsupport/fileutils.h"
  24. #include "webrtc/test/testsupport/perf_test.h"
  25. #ifdef WEBRTC_ANDROID_PLATFORM_BUILD
  26. #include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h"
  27. #else
  28. #include "webrtc/audio_processing/debug.pb.h"
  29. #endif
  30. using webrtc::AudioFrame;
  31. using webrtc::AudioProcessing;
  32. using webrtc::EchoCancellation;
  33. using webrtc::GainControl;
  34. using webrtc::NoiseSuppression;
  35. using webrtc::scoped_array;
  36. using webrtc::TickInterval;
  37. using webrtc::TickTime;
  38. using webrtc::VoiceDetection;
  39. using webrtc::audioproc::Event;
  40. using webrtc::audioproc::Init;
  41. using webrtc::audioproc::ReverseStream;
  42. using webrtc::audioproc::Stream;
  43. namespace {
  44. // Returns true on success, false on error or end-of-file.
  45. bool ReadMessageFromFile(FILE* file,
  46. ::google::protobuf::MessageLite* msg) {
  47. // The "wire format" for the size is little-endian.
  48. // Assume process_test is running on a little-endian machine.
  49. int32_t size = 0;
  50. if (fread(&size, sizeof(int32_t), 1, file) != 1) {
  51. return false;
  52. }
  53. if (size <= 0) {
  54. return false;
  55. }
  56. const size_t usize = static_cast<size_t>(size);
  57. scoped_array<char> array(new char[usize]);
  58. if (fread(array.get(), sizeof(char), usize, file) != usize) {
  59. return false;
  60. }
  61. msg->Clear();
  62. return msg->ParseFromArray(array.get(), usize);
  63. }
  64. void PrintStat(const AudioProcessing::Statistic& stat) {
  65. printf("%d, %d, %d\n", stat.average,
  66. stat.maximum,
  67. stat.minimum);
  68. }
  69. void usage() {
  70. printf(
  71. "Usage: process_test [options] [-pb PROTOBUF_FILE]\n"
  72. " [-ir REVERSE_FILE] [-i PRIMARY_FILE] [-o OUT_FILE]\n");
  73. printf(
  74. "process_test is a test application for AudioProcessing.\n\n"
  75. "When a protobuf debug file is available, specify it with -pb.\n"
  76. "Alternately, when -ir or -i is used, the specified files will be\n"
  77. "processed directly in a simulation mode. Otherwise the full set of\n"
  78. "legacy test files is expected to be present in the working directory.\n");
  79. printf("\n");
  80. printf("Options\n");
  81. printf("General configuration (only used for the simulation mode):\n");
  82. printf(" -fs SAMPLE_RATE_HZ\n");
  83. printf(" -ch CHANNELS_IN CHANNELS_OUT\n");
  84. printf(" -rch REVERSE_CHANNELS\n");
  85. printf("\n");
  86. printf("Component configuration:\n");
  87. printf(
  88. "All components are disabled by default. Each block below begins with a\n"
  89. "flag to enable the component with default settings. The subsequent flags\n"
  90. "in the block are used to provide configuration settings.\n");
  91. printf("\n -aec Echo cancellation\n");
  92. printf(" --drift_compensation\n");
  93. printf(" --no_drift_compensation\n");
  94. printf(" --no_echo_metrics\n");
  95. printf(" --no_delay_logging\n");
  96. printf(" --aec_suppression_level LEVEL [0 - 2]\n");
  97. printf("\n -aecm Echo control mobile\n");
  98. printf(" --aecm_echo_path_in_file FILE\n");
  99. printf(" --aecm_echo_path_out_file FILE\n");
  100. printf(" --no_comfort_noise\n");
  101. printf(" --routing_mode MODE [0 - 4]\n");
  102. printf("\n -agc Gain control\n");
  103. printf(" --analog\n");
  104. printf(" --adaptive_digital\n");
  105. printf(" --fixed_digital\n");
  106. printf(" --target_level LEVEL\n");
  107. printf(" --compression_gain GAIN\n");
  108. printf(" --limiter\n");
  109. printf(" --no_limiter\n");
  110. printf("\n -hpf High pass filter\n");
  111. printf("\n -ns Noise suppression\n");
  112. printf(" --ns_low\n");
  113. printf(" --ns_moderate\n");
  114. printf(" --ns_high\n");
  115. printf(" --ns_very_high\n");
  116. printf(" --ns_prob_file FILE\n");
  117. printf("\n -vad Voice activity detection\n");
  118. printf(" --vad_out_file FILE\n");
  119. printf("\n Level metrics (enabled by default)\n");
  120. printf(" --no_level_metrics\n");
  121. printf("\n");
  122. printf("Modifiers:\n");
  123. printf(" --noasm Disable SSE optimization.\n");
  124. printf(" --delay DELAY Add DELAY ms to input value.\n");
  125. printf(" --perf Measure performance.\n");
  126. printf(" --quiet Suppress text output.\n");
  127. printf(" --no_progress Suppress progress.\n");
  128. printf(" --debug_file FILE Dump a debug recording.\n");
  129. }
  130. static float MicLevel2Gain(int level) {
  131. return pow(10.0f, ((level - 127.0f) / 128.0f * 40.0f) / 20.0f);
  132. }
  133. static void SimulateMic(int mic_level, AudioFrame* frame) {
  134. mic_level = std::min(std::max(mic_level, 0), 255);
  135. float mic_gain = MicLevel2Gain(mic_level);
  136. int num_samples = frame->samples_per_channel_ * frame->num_channels_;
  137. float v;
  138. for (int n = 0; n < num_samples; n++) {
  139. v = floor(frame->data_[n] * mic_gain + 0.5);
  140. v = std::max(std::min(32767.0f, v), -32768.0f);
  141. frame->data_[n] = static_cast<int16_t>(v);
  142. }
  143. }
  144. // void function for gtest.
  145. void void_main(int argc, char* argv[]) {
  146. if (argc > 1 && strcmp(argv[1], "--help") == 0) {
  147. usage();
  148. return;
  149. }
  150. if (argc < 2) {
  151. printf("Did you mean to run without arguments?\n");
  152. printf("Try `process_test --help' for more information.\n\n");
  153. }
  154. AudioProcessing* apm = AudioProcessing::Create(0);
  155. ASSERT_TRUE(apm != NULL);
  156. const char* pb_filename = NULL;
  157. const char* far_filename = NULL;
  158. const char* near_filename = NULL;
  159. const char* out_filename = NULL;
  160. const char* vad_out_filename = NULL;
  161. const char* ns_prob_filename = NULL;
  162. const char* aecm_echo_path_in_filename = NULL;
  163. const char* aecm_echo_path_out_filename = NULL;
  164. int32_t sample_rate_hz = 16000;
  165. int32_t device_sample_rate_hz = 16000;
  166. int num_capture_input_channels = 1;
  167. int num_capture_output_channels = 1;
  168. int num_render_channels = 1;
  169. int samples_per_channel = sample_rate_hz / 100;
  170. bool simulating = false;
  171. bool perf_testing = false;
  172. bool verbose = true;
  173. bool progress = true;
  174. int extra_delay_ms = 0;
  175. //bool interleaved = true;
  176. ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(true));
  177. for (int i = 1; i < argc; i++) {
  178. if (strcmp(argv[i], "-pb") == 0) {
  179. i++;
  180. ASSERT_LT(i, argc) << "Specify protobuf filename after -pb";
  181. pb_filename = argv[i];
  182. } else if (strcmp(argv[i], "-ir") == 0) {
  183. i++;
  184. ASSERT_LT(i, argc) << "Specify filename after -ir";
  185. far_filename = argv[i];
  186. simulating = true;
  187. } else if (strcmp(argv[i], "-i") == 0) {
  188. i++;
  189. ASSERT_LT(i, argc) << "Specify filename after -i";
  190. near_filename = argv[i];
  191. simulating = true;
  192. } else if (strcmp(argv[i], "-o") == 0) {
  193. i++;
  194. ASSERT_LT(i, argc) << "Specify filename after -o";
  195. out_filename = argv[i];
  196. } else if (strcmp(argv[i], "-fs") == 0) {
  197. i++;
  198. ASSERT_LT(i, argc) << "Specify sample rate after -fs";
  199. ASSERT_EQ(1, sscanf(argv[i], "%d", &sample_rate_hz));
  200. samples_per_channel = sample_rate_hz / 100;
  201. ASSERT_EQ(apm->kNoError,
  202. apm->set_sample_rate_hz(sample_rate_hz));
  203. } else if (strcmp(argv[i], "-ch") == 0) {
  204. i++;
  205. ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch";
  206. ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_input_channels));
  207. i++;
  208. ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_output_channels));
  209. ASSERT_EQ(apm->kNoError,
  210. apm->set_num_channels(num_capture_input_channels,
  211. num_capture_output_channels));
  212. } else if (strcmp(argv[i], "-rch") == 0) {
  213. i++;
  214. ASSERT_LT(i, argc) << "Specify number of channels after -rch";
  215. ASSERT_EQ(1, sscanf(argv[i], "%d", &num_render_channels));
  216. ASSERT_EQ(apm->kNoError,
  217. apm->set_num_reverse_channels(num_render_channels));
  218. } else if (strcmp(argv[i], "-aec") == 0) {
  219. ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
  220. ASSERT_EQ(apm->kNoError,
  221. apm->echo_cancellation()->enable_metrics(true));
  222. ASSERT_EQ(apm->kNoError,
  223. apm->echo_cancellation()->enable_delay_logging(true));
  224. } else if (strcmp(argv[i], "--drift_compensation") == 0) {
  225. ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
  226. // TODO(ajm): this is enabled in the VQE test app by default. Investigate
  227. // why it can give better performance despite passing zeros.
  228. ASSERT_EQ(apm->kNoError,
  229. apm->echo_cancellation()->enable_drift_compensation(true));
  230. } else if (strcmp(argv[i], "--no_drift_compensation") == 0) {
  231. ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
  232. ASSERT_EQ(apm->kNoError,
  233. apm->echo_cancellation()->enable_drift_compensation(false));
  234. } else if (strcmp(argv[i], "--no_echo_metrics") == 0) {
  235. ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
  236. ASSERT_EQ(apm->kNoError,
  237. apm->echo_cancellation()->enable_metrics(false));
  238. } else if (strcmp(argv[i], "--no_delay_logging") == 0) {
  239. ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
  240. ASSERT_EQ(apm->kNoError,
  241. apm->echo_cancellation()->enable_delay_logging(false));
  242. } else if (strcmp(argv[i], "--no_level_metrics") == 0) {
  243. ASSERT_EQ(apm->kNoError, apm->level_estimator()->Enable(false));
  244. } else if (strcmp(argv[i], "--aec_suppression_level") == 0) {
  245. i++;
  246. ASSERT_LT(i, argc) << "Specify level after --aec_suppression_level";
  247. int suppression_level;
  248. ASSERT_EQ(1, sscanf(argv[i], "%d", &suppression_level));
  249. ASSERT_EQ(apm->kNoError,
  250. apm->echo_cancellation()->set_suppression_level(
  251. static_cast<webrtc::EchoCancellation::SuppressionLevel>(
  252. suppression_level)));
  253. } else if (strcmp(argv[i], "-aecm") == 0) {
  254. ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true));
  255. } else if (strcmp(argv[i], "--aecm_echo_path_in_file") == 0) {
  256. i++;
  257. ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_in_file";
  258. aecm_echo_path_in_filename = argv[i];
  259. } else if (strcmp(argv[i], "--aecm_echo_path_out_file") == 0) {
  260. i++;
  261. ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_out_file";
  262. aecm_echo_path_out_filename = argv[i];
  263. } else if (strcmp(argv[i], "--no_comfort_noise") == 0) {
  264. ASSERT_EQ(apm->kNoError,
  265. apm->echo_control_mobile()->enable_comfort_noise(false));
  266. } else if (strcmp(argv[i], "--routing_mode") == 0) {
  267. i++;
  268. ASSERT_LT(i, argc) << "Specify mode after --routing_mode";
  269. int routing_mode;
  270. ASSERT_EQ(1, sscanf(argv[i], "%d", &routing_mode));
  271. ASSERT_EQ(apm->kNoError,
  272. apm->echo_control_mobile()->set_routing_mode(
  273. static_cast<webrtc::EchoControlMobile::RoutingMode>(
  274. routing_mode)));
  275. } else if (strcmp(argv[i], "-agc") == 0) {
  276. ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
  277. } else if (strcmp(argv[i], "--analog") == 0) {
  278. ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
  279. ASSERT_EQ(apm->kNoError,
  280. apm->gain_control()->set_mode(GainControl::kAdaptiveAnalog));
  281. } else if (strcmp(argv[i], "--adaptive_digital") == 0) {
  282. ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
  283. ASSERT_EQ(apm->kNoError,
  284. apm->gain_control()->set_mode(GainControl::kAdaptiveDigital));
  285. } else if (strcmp(argv[i], "--fixed_digital") == 0) {
  286. ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
  287. ASSERT_EQ(apm->kNoError,
  288. apm->gain_control()->set_mode(GainControl::kFixedDigital));
  289. } else if (strcmp(argv[i], "--target_level") == 0) {
  290. i++;
  291. int level;
  292. ASSERT_EQ(1, sscanf(argv[i], "%d", &level));
  293. ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
  294. ASSERT_EQ(apm->kNoError,
  295. apm->gain_control()->set_target_level_dbfs(level));
  296. } else if (strcmp(argv[i], "--compression_gain") == 0) {
  297. i++;
  298. int gain;
  299. ASSERT_EQ(1, sscanf(argv[i], "%d", &gain));
  300. ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
  301. ASSERT_EQ(apm->kNoError,
  302. apm->gain_control()->set_compression_gain_db(gain));
  303. } else if (strcmp(argv[i], "--limiter") == 0) {
  304. ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
  305. ASSERT_EQ(apm->kNoError,
  306. apm->gain_control()->enable_limiter(true));
  307. } else if (strcmp(argv[i], "--no_limiter") == 0) {
  308. ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true));
  309. ASSERT_EQ(apm->kNoError,
  310. apm->gain_control()->enable_limiter(false));
  311. } else if (strcmp(argv[i], "-hpf") == 0) {
  312. ASSERT_EQ(apm->kNoError, apm->high_pass_filter()->Enable(true));
  313. } else if (strcmp(argv[i], "-ns") == 0) {
  314. ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
  315. } else if (strcmp(argv[i], "--ns_low") == 0) {
  316. ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
  317. ASSERT_EQ(apm->kNoError,
  318. apm->noise_suppression()->set_level(NoiseSuppression::kLow));
  319. } else if (strcmp(argv[i], "--ns_moderate") == 0) {
  320. ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
  321. ASSERT_EQ(apm->kNoError,
  322. apm->noise_suppression()->set_level(NoiseSuppression::kModerate));
  323. } else if (strcmp(argv[i], "--ns_high") == 0) {
  324. ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
  325. ASSERT_EQ(apm->kNoError,
  326. apm->noise_suppression()->set_level(NoiseSuppression::kHigh));
  327. } else if (strcmp(argv[i], "--ns_very_high") == 0) {
  328. ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true));
  329. ASSERT_EQ(apm->kNoError,
  330. apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh));
  331. } else if (strcmp(argv[i], "--ns_prob_file") == 0) {
  332. i++;
  333. ASSERT_LT(i, argc) << "Specify filename after --ns_prob_file";
  334. ns_prob_filename = argv[i];
  335. } else if (strcmp(argv[i], "-vad") == 0) {
  336. ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
  337. } else if (strcmp(argv[i], "--vad_very_low") == 0) {
  338. ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
  339. ASSERT_EQ(apm->kNoError,
  340. apm->voice_detection()->set_likelihood(
  341. VoiceDetection::kVeryLowLikelihood));
  342. } else if (strcmp(argv[i], "--vad_low") == 0) {
  343. ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
  344. ASSERT_EQ(apm->kNoError,
  345. apm->voice_detection()->set_likelihood(
  346. VoiceDetection::kLowLikelihood));
  347. } else if (strcmp(argv[i], "--vad_moderate") == 0) {
  348. ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
  349. ASSERT_EQ(apm->kNoError,
  350. apm->voice_detection()->set_likelihood(
  351. VoiceDetection::kModerateLikelihood));
  352. } else if (strcmp(argv[i], "--vad_high") == 0) {
  353. ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true));
  354. ASSERT_EQ(apm->kNoError,
  355. apm->voice_detection()->set_likelihood(
  356. VoiceDetection::kHighLikelihood));
  357. } else if (strcmp(argv[i], "--vad_out_file") == 0) {
  358. i++;
  359. ASSERT_LT(i, argc) << "Specify filename after --vad_out_file";
  360. vad_out_filename = argv[i];
  361. } else if (strcmp(argv[i], "--noasm") == 0) {
  362. WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM;
  363. // We need to reinitialize here if components have already been enabled.
  364. ASSERT_EQ(apm->kNoError, apm->Initialize());
  365. } else if (strcmp(argv[i], "--delay") == 0) {
  366. i++;
  367. ASSERT_EQ(1, sscanf(argv[i], "%d", &extra_delay_ms));
  368. } else if (strcmp(argv[i], "--perf") == 0) {
  369. perf_testing = true;
  370. } else if (strcmp(argv[i], "--quiet") == 0) {
  371. verbose = false;
  372. progress = false;
  373. } else if (strcmp(argv[i], "--no_progress") == 0) {
  374. progress = false;
  375. } else if (strcmp(argv[i], "--debug_file") == 0) {
  376. i++;
  377. ASSERT_LT(i, argc) << "Specify filename after --debug_file";
  378. ASSERT_EQ(apm->kNoError, apm->StartDebugRecording(argv[i]));
  379. } else {
  380. FAIL() << "Unrecognized argument " << argv[i];
  381. }
  382. }
  383. // If we're reading a protobuf file, ensure a simulation hasn't also
  384. // been requested (which makes no sense...)
  385. ASSERT_FALSE(pb_filename && simulating);
  386. if (verbose) {
  387. printf("Sample rate: %d Hz\n", sample_rate_hz);
  388. printf("Primary channels: %d (in), %d (out)\n",
  389. num_capture_input_channels,
  390. num_capture_output_channels);
  391. printf("Reverse channels: %d \n", num_render_channels);
  392. }
  393. const std::string out_path = webrtc::test::OutputPath();
  394. const char far_file_default[] = "apm_far.pcm";
  395. const char near_file_default[] = "apm_near.pcm";
  396. const std::string out_file_default = out_path + "out.pcm";
  397. const char event_filename[] = "apm_event.dat";
  398. const char delay_filename[] = "apm_delay.dat";
  399. const char drift_filename[] = "apm_drift.dat";
  400. const std::string vad_file_default = out_path + "vad_out.dat";
  401. const std::string ns_prob_file_default = out_path + "ns_prob.dat";
  402. if (!simulating) {
  403. far_filename = far_file_default;
  404. near_filename = near_file_default;
  405. }
  406. if (!out_filename) {
  407. out_filename = out_file_default.c_str();
  408. }
  409. if (!vad_out_filename) {
  410. vad_out_filename = vad_file_default.c_str();
  411. }
  412. if (!ns_prob_filename) {
  413. ns_prob_filename = ns_prob_file_default.c_str();
  414. }
  415. FILE* pb_file = NULL;
  416. FILE* far_file = NULL;
  417. FILE* near_file = NULL;
  418. FILE* out_file = NULL;
  419. FILE* event_file = NULL;
  420. FILE* delay_file = NULL;
  421. FILE* drift_file = NULL;
  422. FILE* vad_out_file = NULL;
  423. FILE* ns_prob_file = NULL;
  424. FILE* aecm_echo_path_in_file = NULL;
  425. FILE* aecm_echo_path_out_file = NULL;
  426. if (pb_filename) {
  427. pb_file = fopen(pb_filename, "rb");
  428. ASSERT_TRUE(NULL != pb_file) << "Unable to open protobuf file "
  429. << pb_filename;
  430. } else {
  431. if (far_filename) {
  432. far_file = fopen(far_filename, "rb");
  433. ASSERT_TRUE(NULL != far_file) << "Unable to open far-end audio file "
  434. << far_filename;
  435. }
  436. near_file = fopen(near_filename, "rb");
  437. ASSERT_TRUE(NULL != near_file) << "Unable to open near-end audio file "
  438. << near_filename;
  439. if (!simulating) {
  440. event_file = fopen(event_filename, "rb");
  441. ASSERT_TRUE(NULL != event_file) << "Unable to open event file "
  442. << event_filename;
  443. delay_file = fopen(delay_filename, "rb");
  444. ASSERT_TRUE(NULL != delay_file) << "Unable to open buffer file "
  445. << delay_filename;
  446. drift_file = fopen(drift_filename, "rb");
  447. ASSERT_TRUE(NULL != drift_file) << "Unable to open drift file "
  448. << drift_filename;
  449. }
  450. }
  451. out_file = fopen(out_filename, "wb");
  452. ASSERT_TRUE(NULL != out_file) << "Unable to open output audio file "
  453. << out_filename;
  454. int near_size_bytes = 0;
  455. if (pb_file) {
  456. struct stat st;
  457. stat(pb_filename, &st);
  458. // Crude estimate, but should be good enough.
  459. near_size_bytes = st.st_size / 3;
  460. } else {
  461. struct stat st;
  462. stat(near_filename, &st);
  463. near_size_bytes = st.st_size;
  464. }
  465. if (apm->voice_detection()->is_enabled()) {
  466. vad_out_file = fopen(vad_out_filename, "wb");
  467. ASSERT_TRUE(NULL != vad_out_file) << "Unable to open VAD output file "
  468. << vad_out_file;
  469. }
  470. if (apm->noise_suppression()->is_enabled()) {
  471. ns_prob_file = fopen(ns_prob_filename, "wb");
  472. ASSERT_TRUE(NULL != ns_prob_file) << "Unable to open NS output file "
  473. << ns_prob_file;
  474. }
  475. if (aecm_echo_path_in_filename != NULL) {
  476. aecm_echo_path_in_file = fopen(aecm_echo_path_in_filename, "rb");
  477. ASSERT_TRUE(NULL != aecm_echo_path_in_file) << "Unable to open file "
  478. << aecm_echo_path_in_filename;
  479. const size_t path_size =
  480. apm->echo_control_mobile()->echo_path_size_bytes();
  481. scoped_array<char> echo_path(new char[path_size]);
  482. ASSERT_EQ(path_size, fread(echo_path.get(),
  483. sizeof(char),
  484. path_size,
  485. aecm_echo_path_in_file));
  486. EXPECT_EQ(apm->kNoError,
  487. apm->echo_control_mobile()->SetEchoPath(echo_path.get(),
  488. path_size));
  489. fclose(aecm_echo_path_in_file);
  490. aecm_echo_path_in_file = NULL;
  491. }
  492. if (aecm_echo_path_out_filename != NULL) {
  493. aecm_echo_path_out_file = fopen(aecm_echo_path_out_filename, "wb");
  494. ASSERT_TRUE(NULL != aecm_echo_path_out_file) << "Unable to open file "
  495. << aecm_echo_path_out_filename;
  496. }
  497. size_t read_count = 0;
  498. int reverse_count = 0;
  499. int primary_count = 0;
  500. int near_read_bytes = 0;
  501. TickInterval acc_ticks;
  502. AudioFrame far_frame;
  503. AudioFrame near_frame;
  504. int delay_ms = 0;
  505. int drift_samples = 0;
  506. int capture_level = 127;
  507. int8_t stream_has_voice = 0;
  508. float ns_speech_prob = 0.0f;
  509. TickTime t0 = TickTime::Now();
  510. TickTime t1 = t0;
  511. int64_t max_time_us = 0;
  512. int64_t max_time_reverse_us = 0;
  513. int64_t min_time_us = 1e6;
  514. int64_t min_time_reverse_us = 1e6;
  515. // TODO(ajm): Ideally we would refactor this block into separate functions,
  516. // but for now we want to share the variables.
  517. if (pb_file) {
  518. Event event_msg;
  519. while (ReadMessageFromFile(pb_file, &event_msg)) {
  520. std::ostringstream trace_stream;
  521. trace_stream << "Processed frames: " << reverse_count << " (reverse), "
  522. << primary_count << " (primary)";
  523. SCOPED_TRACE(trace_stream.str());
  524. if (event_msg.type() == Event::INIT) {
  525. ASSERT_TRUE(event_msg.has_init());
  526. const Init msg = event_msg.init();
  527. ASSERT_TRUE(msg.has_sample_rate());
  528. ASSERT_EQ(apm->kNoError,
  529. apm->set_sample_rate_hz(msg.sample_rate()));
  530. ASSERT_TRUE(msg.has_device_sample_rate());
  531. ASSERT_EQ(apm->kNoError,
  532. apm->echo_cancellation()->set_device_sample_rate_hz(
  533. msg.device_sample_rate()));
  534. ASSERT_TRUE(msg.has_num_input_channels());
  535. ASSERT_TRUE(msg.has_num_output_channels());
  536. ASSERT_EQ(apm->kNoError,
  537. apm->set_num_channels(msg.num_input_channels(),
  538. msg.num_output_channels()));
  539. ASSERT_TRUE(msg.has_num_reverse_channels());
  540. ASSERT_EQ(apm->kNoError,
  541. apm->set_num_reverse_channels(msg.num_reverse_channels()));
  542. samples_per_channel = msg.sample_rate() / 100;
  543. far_frame.sample_rate_hz_ = msg.sample_rate();
  544. far_frame.samples_per_channel_ = samples_per_channel;
  545. far_frame.num_channels_ = msg.num_reverse_channels();
  546. near_frame.sample_rate_hz_ = msg.sample_rate();
  547. near_frame.samples_per_channel_ = samples_per_channel;
  548. near_frame.num_channels_ = msg.num_input_channels();
  549. if (verbose) {
  550. printf("Init at frame: %d (primary), %d (reverse)\n",
  551. primary_count, reverse_count);
  552. printf(" Sample rate: %d Hz\n", msg.sample_rate());
  553. printf(" Primary channels: %d (in), %d (out)\n",
  554. msg.num_input_channels(),
  555. msg.num_output_channels());
  556. printf(" Reverse channels: %d \n", msg.num_reverse_channels());
  557. }
  558. } else if (event_msg.type() == Event::REVERSE_STREAM) {
  559. ASSERT_TRUE(event_msg.has_reverse_stream());
  560. const ReverseStream msg = event_msg.reverse_stream();
  561. reverse_count++;
  562. ASSERT_TRUE(msg.has_data());
  563. ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
  564. far_frame.num_channels_, msg.data().size());
  565. memcpy(far_frame.data_, msg.data().data(), msg.data().size());
  566. if (perf_testing) {
  567. t0 = TickTime::Now();
  568. }
  569. ASSERT_EQ(apm->kNoError,
  570. apm->AnalyzeReverseStream(&far_frame));
  571. if (perf_testing) {
  572. t1 = TickTime::Now();
  573. TickInterval tick_diff = t1 - t0;
  574. acc_ticks += tick_diff;
  575. if (tick_diff.Microseconds() > max_time_reverse_us) {
  576. max_time_reverse_us = tick_diff.Microseconds();
  577. }
  578. if (tick_diff.Microseconds() < min_time_reverse_us) {
  579. min_time_reverse_us = tick_diff.Microseconds();
  580. }
  581. }
  582. } else if (event_msg.type() == Event::STREAM) {
  583. ASSERT_TRUE(event_msg.has_stream());
  584. const Stream msg = event_msg.stream();
  585. primary_count++;
  586. // ProcessStream could have changed this for the output frame.
  587. near_frame.num_channels_ = apm->num_input_channels();
  588. ASSERT_TRUE(msg.has_input_data());
  589. ASSERT_EQ(sizeof(int16_t) * samples_per_channel *
  590. near_frame.num_channels_, msg.input_data().size());
  591. memcpy(near_frame.data_,
  592. msg.input_data().data(),
  593. msg.input_data().size());
  594. near_read_bytes += msg.input_data().size();
  595. if (progress && primary_count % 100 == 0) {
  596. printf("%.0f%% complete\r",
  597. (near_read_bytes * 100.0) / near_size_bytes);
  598. fflush(stdout);
  599. }
  600. if (perf_testing) {
  601. t0 = TickTime::Now();
  602. }
  603. ASSERT_EQ(apm->kNoError,
  604. apm->gain_control()->set_stream_analog_level(msg.level()));
  605. ASSERT_EQ(apm->kNoError,
  606. apm->set_stream_delay_ms(msg.delay() + extra_delay_ms));
  607. apm->echo_cancellation()->set_stream_drift_samples(msg.drift());
  608. int err = apm->ProcessStream(&near_frame);
  609. if (err == apm->kBadStreamParameterWarning) {
  610. printf("Bad parameter warning. %s\n", trace_stream.str().c_str());
  611. }
  612. ASSERT_TRUE(err == apm->kNoError ||
  613. err == apm->kBadStreamParameterWarning);
  614. ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels());
  615. stream_has_voice =
  616. static_cast<int8_t>(apm->voice_detection()->stream_has_voice());
  617. if (vad_out_file != NULL) {
  618. ASSERT_EQ(1u, fwrite(&stream_has_voice,
  619. sizeof(stream_has_voice),
  620. 1,
  621. vad_out_file));
  622. }
  623. if (ns_prob_file != NULL) {
  624. ns_speech_prob = apm->noise_suppression()->speech_probability();
  625. ASSERT_EQ(1u, fwrite(&ns_speech_prob,
  626. sizeof(ns_speech_prob),
  627. 1,
  628. ns_prob_file));
  629. }
  630. if (perf_testing) {
  631. t1 = TickTime::Now();
  632. TickInterval tick_diff = t1 - t0;
  633. acc_ticks += tick_diff;
  634. if (tick_diff.Microseconds() > max_time_us) {
  635. max_time_us = tick_diff.Microseconds();
  636. }
  637. if (tick_diff.Microseconds() < min_time_us) {
  638. min_time_us = tick_diff.Microseconds();
  639. }
  640. }
  641. size_t size = samples_per_channel * near_frame.num_channels_;
  642. ASSERT_EQ(size, fwrite(near_frame.data_,
  643. sizeof(int16_t),
  644. size,
  645. out_file));
  646. }
  647. }
  648. ASSERT_TRUE(feof(pb_file));
  649. } else {
  650. enum Events {
  651. kInitializeEvent,
  652. kRenderEvent,
  653. kCaptureEvent,
  654. kResetEventDeprecated
  655. };
  656. int16_t event = 0;
  657. while (simulating || feof(event_file) == 0) {
  658. std::ostringstream trace_stream;
  659. trace_stream << "Processed frames: " << reverse_count << " (reverse), "
  660. << primary_count << " (primary)";
  661. SCOPED_TRACE(trace_stream.str());
  662. if (simulating) {
  663. if (far_file == NULL) {
  664. event = kCaptureEvent;
  665. } else {
  666. if (event == kRenderEvent) {
  667. event = kCaptureEvent;
  668. } else {
  669. event = kRenderEvent;
  670. }
  671. }
  672. } else {
  673. read_count = fread(&event, sizeof(event), 1, event_file);
  674. if (read_count != 1) {
  675. break;
  676. }
  677. }
  678. far_frame.sample_rate_hz_ = sample_rate_hz;
  679. far_frame.samples_per_channel_ = samples_per_channel;
  680. far_frame.num_channels_ = num_render_channels;
  681. near_frame.sample_rate_hz_ = sample_rate_hz;
  682. near_frame.samples_per_channel_ = samples_per_channel;
  683. if (event == kInitializeEvent || event == kResetEventDeprecated) {
  684. ASSERT_EQ(1u,
  685. fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file));
  686. samples_per_channel = sample_rate_hz / 100;
  687. ASSERT_EQ(1u,
  688. fread(&device_sample_rate_hz,
  689. sizeof(device_sample_rate_hz),
  690. 1,
  691. event_file));
  692. ASSERT_EQ(apm->kNoError,
  693. apm->set_sample_rate_hz(sample_rate_hz));
  694. ASSERT_EQ(apm->kNoError,
  695. apm->echo_cancellation()->set_device_sample_rate_hz(
  696. device_sample_rate_hz));
  697. far_frame.sample_rate_hz_ = sample_rate_hz;
  698. far_frame.samples_per_channel_ = samples_per_channel;
  699. far_frame.num_channels_ = num_render_channels;
  700. near_frame.sample_rate_hz_ = sample_rate_hz;
  701. near_frame.samples_per_channel_ = samples_per_channel;
  702. if (verbose) {
  703. printf("Init at frame: %d (primary), %d (reverse)\n",
  704. primary_count, reverse_count);
  705. printf(" Sample rate: %d Hz\n", sample_rate_hz);
  706. }
  707. } else if (event == kRenderEvent) {
  708. reverse_count++;
  709. size_t size = samples_per_channel * num_render_channels;
  710. read_count = fread(far_frame.data_,
  711. sizeof(int16_t),
  712. size,
  713. far_file);
  714. if (simulating) {
  715. if (read_count != size) {
  716. // Read an equal amount from the near file to avoid errors due to
  717. // not reaching end-of-file.
  718. EXPECT_EQ(0, fseek(near_file, read_count * sizeof(int16_t),
  719. SEEK_CUR));
  720. break; // This is expected.
  721. }
  722. } else {
  723. ASSERT_EQ(size, read_count);
  724. }
  725. if (perf_testing) {
  726. t0 = TickTime::Now();
  727. }
  728. ASSERT_EQ(apm->kNoError,
  729. apm->AnalyzeReverseStream(&far_frame));
  730. if (perf_testing) {
  731. t1 = TickTime::Now();
  732. TickInterval tick_diff = t1 - t0;
  733. acc_ticks += tick_diff;
  734. if (tick_diff.Microseconds() > max_time_reverse_us) {
  735. max_time_reverse_us = tick_diff.Microseconds();
  736. }
  737. if (tick_diff.Microseconds() < min_time_reverse_us) {
  738. min_time_reverse_us = tick_diff.Microseconds();
  739. }
  740. }
  741. } else if (event == kCaptureEvent) {
  742. primary_count++;
  743. near_frame.num_channels_ = num_capture_input_channels;
  744. size_t size = samples_per_channel * num_capture_input_channels;
  745. read_count = fread(near_frame.data_,
  746. sizeof(int16_t),
  747. size,
  748. near_file);
  749. near_read_bytes += read_count * sizeof(int16_t);
  750. if (progress && primary_count % 100 == 0) {
  751. printf("%.0f%% complete\r",
  752. (near_read_bytes * 100.0) / near_size_bytes);
  753. fflush(stdout);
  754. }
  755. if (simulating) {
  756. if (read_count != size) {
  757. break; // This is expected.
  758. }
  759. delay_ms = 0;
  760. drift_samples = 0;
  761. } else {
  762. ASSERT_EQ(size, read_count);
  763. // TODO(ajm): sizeof(delay_ms) for current files?
  764. ASSERT_EQ(1u,
  765. fread(&delay_ms, 2, 1, delay_file));
  766. ASSERT_EQ(1u,
  767. fread(&drift_samples, sizeof(drift_samples), 1, drift_file));
  768. }
  769. if (apm->gain_control()->is_enabled() &&
  770. apm->gain_control()->mode() == GainControl::kAdaptiveAnalog) {
  771. SimulateMic(capture_level, &near_frame);
  772. }
  773. if (perf_testing) {
  774. t0 = TickTime::Now();
  775. }
  776. const int capture_level_in = capture_level;
  777. ASSERT_EQ(apm->kNoError,
  778. apm->gain_control()->set_stream_analog_level(capture_level));
  779. ASSERT_EQ(apm->kNoError,
  780. apm->set_stream_delay_ms(delay_ms + extra_delay_ms));
  781. apm->echo_cancellation()->set_stream_drift_samples(drift_samples);
  782. int err = apm->ProcessStream(&near_frame);
  783. if (err == apm->kBadStreamParameterWarning) {
  784. printf("Bad parameter warning. %s\n", trace_stream.str().c_str());
  785. }
  786. ASSERT_TRUE(err == apm->kNoError ||
  787. err == apm->kBadStreamParameterWarning);
  788. ASSERT_TRUE(near_frame.num_channels_ == apm->num_output_channels());
  789. capture_level = apm->gain_control()->stream_analog_level();
  790. stream_has_voice =
  791. static_cast<int8_t>(apm->voice_detection()->stream_has_voice());
  792. if (vad_out_file != NULL) {
  793. ASSERT_EQ(1u, fwrite(&stream_has_voice,
  794. sizeof(stream_has_voice),
  795. 1,
  796. vad_out_file));
  797. }
  798. if (ns_prob_file != NULL) {
  799. ns_speech_prob = apm->noise_suppression()->speech_probability();
  800. ASSERT_EQ(1u, fwrite(&ns_speech_prob,
  801. sizeof(ns_speech_prob),
  802. 1,
  803. ns_prob_file));
  804. }
  805. if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) {
  806. ASSERT_EQ(capture_level_in, capture_level);
  807. }
  808. if (perf_testing) {
  809. t1 = TickTime::Now();
  810. TickInterval tick_diff = t1 - t0;
  811. acc_ticks += tick_diff;
  812. if (tick_diff.Microseconds() > max_time_us) {
  813. max_time_us = tick_diff.Microseconds();
  814. }
  815. if (tick_diff.Microseconds() < min_time_us) {
  816. min_time_us = tick_diff.Microseconds();
  817. }
  818. }
  819. size = samples_per_channel * near_frame.num_channels_;
  820. ASSERT_EQ(size, fwrite(near_frame.data_,
  821. sizeof(int16_t),
  822. size,
  823. out_file));
  824. }
  825. else {
  826. FAIL() << "Event " << event << " is unrecognized";
  827. }
  828. }
  829. }
  830. printf("100%% complete\r");
  831. if (aecm_echo_path_out_file != NULL) {
  832. const size_t path_size =
  833. apm->echo_control_mobile()->echo_path_size_bytes();
  834. scoped_array<char> echo_path(new char[path_size]);
  835. apm->echo_control_mobile()->GetEchoPath(echo_path.get(), path_size);
  836. ASSERT_EQ(path_size, fwrite(echo_path.get(),
  837. sizeof(char),
  838. path_size,
  839. aecm_echo_path_out_file));
  840. fclose(aecm_echo_path_out_file);
  841. aecm_echo_path_out_file = NULL;
  842. }
  843. if (verbose) {
  844. printf("\nProcessed frames: %d (primary), %d (reverse)\n",
  845. primary_count, reverse_count);
  846. if (apm->level_estimator()->is_enabled()) {
  847. printf("\n--Level metrics--\n");
  848. printf("RMS: %d dBFS\n", -apm->level_estimator()->RMS());
  849. }
  850. if (apm->echo_cancellation()->are_metrics_enabled()) {
  851. EchoCancellation::Metrics metrics;
  852. apm->echo_cancellation()->GetMetrics(&metrics);
  853. printf("\n--Echo metrics--\n");
  854. printf("(avg, max, min)\n");
  855. printf("ERL: ");
  856. PrintStat(metrics.echo_return_loss);
  857. printf("ERLE: ");
  858. PrintStat(metrics.echo_return_loss_enhancement);
  859. printf("ANLP: ");
  860. PrintStat(metrics.a_nlp);
  861. }
  862. if (apm->echo_cancellation()->is_delay_logging_enabled()) {
  863. int median = 0;
  864. int std = 0;
  865. apm->echo_cancellation()->GetDelayMetrics(&median, &std);
  866. printf("\n--Delay metrics--\n");
  867. printf("Median: %3d\n", median);
  868. printf("Standard deviation: %3d\n", std);
  869. }
  870. }
  871. if (!pb_file) {
  872. int8_t temp_int8;
  873. if (far_file) {
  874. read_count = fread(&temp_int8, sizeof(temp_int8), 1, far_file);
  875. EXPECT_NE(0, feof(far_file)) << "Far-end file not fully processed";
  876. }
  877. read_count = fread(&temp_int8, sizeof(temp_int8), 1, near_file);
  878. EXPECT_NE(0, feof(near_file)) << "Near-end file not fully processed";
  879. if (!simulating) {
  880. read_count = fread(&temp_int8, sizeof(temp_int8), 1, event_file);
  881. EXPECT_NE(0, feof(event_file)) << "Event file not fully processed";
  882. read_count = fread(&temp_int8, sizeof(temp_int8), 1, delay_file);
  883. EXPECT_NE(0, feof(delay_file)) << "Delay file not fully processed";
  884. read_count = fread(&temp_int8, sizeof(temp_int8), 1, drift_file);
  885. EXPECT_NE(0, feof(drift_file)) << "Drift file not fully processed";
  886. }
  887. }
  888. if (perf_testing) {
  889. if (primary_count > 0) {
  890. int64_t exec_time = acc_ticks.Milliseconds();
  891. printf("\nTotal time: %.3f s, file time: %.2f s\n",
  892. exec_time * 0.001, primary_count * 0.01);
  893. printf("Time per frame: %.3f ms (average), %.3f ms (max),"
  894. " %.3f ms (min)\n",
  895. (exec_time * 1.0) / primary_count,
  896. (max_time_us + max_time_reverse_us) / 1000.0,
  897. (min_time_us + min_time_reverse_us) / 1000.0);
  898. // Record the results with Perf test tools.
  899. webrtc::test::PrintResult("audioproc", "", "time_per_10ms_frame",
  900. (exec_time * 1000) / primary_count, "us", false);
  901. } else {
  902. printf("Warning: no capture frames\n");
  903. }
  904. }
  905. AudioProcessing::Destroy(apm);
  906. apm = NULL;
  907. }
  908. } // namespace
  909. int main(int argc, char* argv[])
  910. {
  911. void_main(argc, argv);
  912. // Optional, but removes memory leak noise from Valgrind.
  913. google::protobuf::ShutdownProtobufLibrary();
  914. return 0;
  915. }