/external/mbed-os/events/equeue/tests/prof.c

https://github.com/adamgreen/gcc4mbed · C · 407 lines · 293 code · 93 blank · 21 comment · 23 complexity · ad8ce0efe6a0afac6202d7f8afe955f9 MD5 · raw file

  1. /*
  2. * Profiling framework for the events library
  3. *
  4. * Copyright (c) 2016 Christopher Haster
  5. *
  6. * Licensed under the Apache License, Version 2.0 (the "License");
  7. * you may not use this file except in compliance with the License.
  8. * You may obtain a copy of the License at
  9. *
  10. * http://www.apache.org/licenses/LICENSE-2.0
  11. *
  12. * Unless required by applicable law or agreed to in writing, software
  13. * distributed under the License is distributed on an "AS IS" BASIS,
  14. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. * See the License for the specific language governing permissions and
  16. * limitations under the License.
  17. */
  18. #include "equeue.h"
  19. #include <unistd.h>
  20. #include <stdio.h>
  21. #include <setjmp.h>
  22. #include <stdint.h>
  23. #include <stdlib.h>
  24. #include <inttypes.h>
  25. #include <sys/time.h>
  26. // Performance measurement utils
  27. #define PROF_RUNS 5
  28. #define PROF_INTERVAL 100000000
  29. #define prof_volatile(t) __attribute__((unused)) volatile t
  30. typedef uint64_t prof_cycle_t;
  31. static volatile prof_cycle_t prof_start_cycle;
  32. static volatile prof_cycle_t prof_stop_cycle;
  33. static prof_cycle_t prof_accum_cycle;
  34. static prof_cycle_t prof_baseline_cycle;
  35. static prof_cycle_t prof_iterations;
  36. static const char *prof_units;
  37. #define prof_cycle() ({ \
  38. uint32_t a, b; \
  39. __asm__ volatile ("rdtsc" : "=a" (a), "=d" (b)); \
  40. ((uint64_t)b << 32) | (uint64_t)a; \
  41. })
  42. #define prof_loop() \
  43. for (prof_iterations = 0; \
  44. prof_accum_cycle < PROF_INTERVAL; \
  45. prof_iterations++)
  46. #define prof_start() ({ \
  47. prof_start_cycle = prof_cycle(); \
  48. })
  49. #define prof_stop() ({ \
  50. prof_stop_cycle = prof_cycle(); \
  51. prof_accum_cycle += prof_stop_cycle - prof_start_cycle; \
  52. })
  53. #define prof_result(value, units) ({ \
  54. prof_accum_cycle = value+prof_baseline_cycle; \
  55. prof_iterations = 1; \
  56. prof_units = units; \
  57. })
  58. #define prof_measure(func, ...) ({ \
  59. printf("%s: ...", #func); \
  60. fflush(stdout); \
  61. \
  62. prof_units = "cycles"; \
  63. prof_cycle_t runs[PROF_RUNS]; \
  64. for (int i = 0; i < PROF_RUNS; i++) { \
  65. prof_accum_cycle = 0; \
  66. prof_iterations = 0; \
  67. func(__VA_ARGS__); \
  68. runs[i] = prof_accum_cycle / prof_iterations; \
  69. } \
  70. \
  71. prof_cycle_t res = runs[0]; \
  72. for (int i = 0; i < PROF_RUNS; i++) { \
  73. if (runs[i] < res) { \
  74. res = runs[i]; \
  75. } \
  76. } \
  77. res -= prof_baseline_cycle; \
  78. printf("\r%s: %"PRIu64" %s", #func, res, prof_units); \
  79. \
  80. if (!isatty(0)) { \
  81. prof_cycle_t prev; \
  82. while (scanf("%*[^0-9]%"PRIu64, &prev) == 0); \
  83. int64_t perc = 100*((int64_t)prev - (int64_t)res) / (int64_t)prev; \
  84. \
  85. if (perc > 10) { \
  86. printf(" (\e[32m%+"PRId64"%%\e[0m)", perc); \
  87. } else if (perc < -10) { \
  88. printf(" (\e[31m%+"PRId64"%%\e[0m)", perc); \
  89. } else { \
  90. printf(" (%+"PRId64"%%)", perc); \
  91. } \
  92. } \
  93. \
  94. printf("\n"); \
  95. res; \
  96. })
  97. #define prof_baseline(func, ...) ({ \
  98. prof_baseline_cycle = 0; \
  99. prof_baseline_cycle = prof_measure(func, __VA_ARGS__); \
  100. })
  101. // Various test functions
  102. void no_func(void *eh) {
  103. }
  104. // Actual performance tests
  105. void baseline_prof(void) {
  106. prof_loop() {
  107. prof_start();
  108. __asm__ volatile ("");
  109. prof_stop();
  110. }
  111. }
  112. void equeue_tick_prof(void) {
  113. prof_volatile(unsigned) res;
  114. prof_loop() {
  115. prof_start();
  116. res = equeue_tick();
  117. prof_stop();
  118. }
  119. }
  120. void equeue_alloc_prof(void) {
  121. struct equeue q;
  122. equeue_create(&q, 32*EQUEUE_EVENT_SIZE);
  123. prof_loop() {
  124. prof_start();
  125. void *e = equeue_alloc(&q, 8 * sizeof(int));
  126. prof_stop();
  127. equeue_dealloc(&q, e);
  128. }
  129. equeue_destroy(&q);
  130. }
  131. void equeue_alloc_many_prof(int count) {
  132. struct equeue q;
  133. equeue_create(&q, count*EQUEUE_EVENT_SIZE);
  134. void *es[count];
  135. for (int i = 0; i < count; i++) {
  136. es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
  137. }
  138. for (int i = 0; i < count; i++) {
  139. equeue_dealloc(&q, es[i]);
  140. }
  141. prof_loop() {
  142. prof_start();
  143. void *e = equeue_alloc(&q, 8 * sizeof(int));
  144. prof_stop();
  145. equeue_dealloc(&q, e);
  146. }
  147. equeue_destroy(&q);
  148. }
  149. void equeue_post_prof(void) {
  150. struct equeue q;
  151. equeue_create(&q, EQUEUE_EVENT_SIZE);
  152. prof_loop() {
  153. void *e = equeue_alloc(&q, 0);
  154. prof_start();
  155. int id = equeue_post(&q, no_func, e);
  156. prof_stop();
  157. equeue_cancel(&q, id);
  158. }
  159. equeue_destroy(&q);
  160. }
  161. void equeue_post_many_prof(int count) {
  162. struct equeue q;
  163. equeue_create(&q, count*EQUEUE_EVENT_SIZE);
  164. for (int i = 0; i < count-1; i++) {
  165. equeue_call(&q, no_func, 0);
  166. }
  167. prof_loop() {
  168. void *e = equeue_alloc(&q, 0);
  169. prof_start();
  170. int id = equeue_post(&q, no_func, e);
  171. prof_stop();
  172. equeue_cancel(&q, id);
  173. }
  174. equeue_destroy(&q);
  175. }
  176. void equeue_post_future_prof(void) {
  177. struct equeue q;
  178. equeue_create(&q, EQUEUE_EVENT_SIZE);
  179. prof_loop() {
  180. void *e = equeue_alloc(&q, 0);
  181. equeue_event_delay(e, 1000);
  182. prof_start();
  183. int id = equeue_post(&q, no_func, e);
  184. prof_stop();
  185. equeue_cancel(&q, id);
  186. }
  187. equeue_destroy(&q);
  188. }
  189. void equeue_post_future_many_prof(int count) {
  190. struct equeue q;
  191. equeue_create(&q, count*EQUEUE_EVENT_SIZE);
  192. for (int i = 0; i < count-1; i++) {
  193. equeue_call(&q, no_func, 0);
  194. }
  195. prof_loop() {
  196. void *e = equeue_alloc(&q, 0);
  197. equeue_event_delay(e, 1000);
  198. prof_start();
  199. int id = equeue_post(&q, no_func, e);
  200. prof_stop();
  201. equeue_cancel(&q, id);
  202. }
  203. equeue_destroy(&q);
  204. }
  205. void equeue_dispatch_prof(void) {
  206. struct equeue q;
  207. equeue_create(&q, EQUEUE_EVENT_SIZE);
  208. prof_loop() {
  209. equeue_call(&q, no_func, 0);
  210. prof_start();
  211. equeue_dispatch(&q, 0);
  212. prof_stop();
  213. }
  214. equeue_destroy(&q);
  215. }
  216. void equeue_dispatch_many_prof(int count) {
  217. struct equeue q;
  218. equeue_create(&q, count*EQUEUE_EVENT_SIZE);
  219. prof_loop() {
  220. for (int i = 0; i < count; i++) {
  221. equeue_call(&q, no_func, 0);
  222. }
  223. prof_start();
  224. equeue_dispatch(&q, 0);
  225. prof_stop();
  226. }
  227. equeue_destroy(&q);
  228. }
  229. void equeue_cancel_prof(void) {
  230. struct equeue q;
  231. equeue_create(&q, EQUEUE_EVENT_SIZE);
  232. prof_loop() {
  233. int id = equeue_call(&q, no_func, 0);
  234. prof_start();
  235. equeue_cancel(&q, id);
  236. prof_stop();
  237. }
  238. equeue_destroy(&q);
  239. }
  240. void equeue_cancel_many_prof(int count) {
  241. struct equeue q;
  242. equeue_create(&q, count*EQUEUE_EVENT_SIZE);
  243. for (int i = 0; i < count-1; i++) {
  244. equeue_call(&q, no_func, 0);
  245. }
  246. prof_loop() {
  247. int id = equeue_call(&q, no_func, 0);
  248. prof_start();
  249. equeue_cancel(&q, id);
  250. prof_stop();
  251. }
  252. equeue_destroy(&q);
  253. }
  254. void equeue_alloc_size_prof(void) {
  255. size_t size = 32*EQUEUE_EVENT_SIZE;
  256. struct equeue q;
  257. equeue_create(&q, size);
  258. equeue_alloc(&q, 0);
  259. prof_result(size - q.slab.size, "bytes");
  260. equeue_destroy(&q);
  261. }
  262. void equeue_alloc_many_size_prof(int count) {
  263. size_t size = count*EQUEUE_EVENT_SIZE;
  264. struct equeue q;
  265. equeue_create(&q, size);
  266. for (int i = 0; i < count; i++) {
  267. equeue_alloc(&q, (i % 4) * sizeof(int));
  268. }
  269. prof_result(size - q.slab.size, "bytes");
  270. equeue_destroy(&q);
  271. }
  272. void equeue_alloc_fragmented_size_prof(int count) {
  273. size_t size = count*EQUEUE_EVENT_SIZE;
  274. struct equeue q;
  275. equeue_create(&q, size);
  276. void *es[count];
  277. for (int i = 0; i < count; i++) {
  278. es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
  279. }
  280. for (int i = 0; i < count; i++) {
  281. equeue_dealloc(&q, es[i]);
  282. }
  283. for (int i = count-1; i >= 0; i--) {
  284. es[i] = equeue_alloc(&q, (i % 4) * sizeof(int));
  285. }
  286. for (int i = count-1; i >= 0; i--) {
  287. equeue_dealloc(&q, es[i]);
  288. }
  289. for (int i = 0; i < count; i++) {
  290. equeue_alloc(&q, (i % 4) * sizeof(int));
  291. }
  292. prof_result(size - q.slab.size, "bytes");
  293. equeue_destroy(&q);
  294. }
  295. // Entry point
  296. int main() {
  297. printf("beginning profiling...\n");
  298. prof_baseline(baseline_prof);
  299. prof_measure(equeue_tick_prof);
  300. prof_measure(equeue_alloc_prof);
  301. prof_measure(equeue_post_prof);
  302. prof_measure(equeue_post_future_prof);
  303. prof_measure(equeue_dispatch_prof);
  304. prof_measure(equeue_cancel_prof);
  305. prof_measure(equeue_alloc_many_prof, 1000);
  306. prof_measure(equeue_post_many_prof, 1000);
  307. prof_measure(equeue_post_future_many_prof, 1000);
  308. prof_measure(equeue_dispatch_many_prof, 100);
  309. prof_measure(equeue_cancel_many_prof, 100);
  310. prof_measure(equeue_alloc_size_prof);
  311. prof_measure(equeue_alloc_many_size_prof, 1000);
  312. prof_measure(equeue_alloc_fragmented_size_prof, 1000);
  313. printf("done!\n");
  314. }