PageRenderTime 46ms CodeModel.GetById 15ms RepoModel.GetById 0ms app.codeStats 1ms

/test.c

https://github.com/SpeakingFish/nedmalloc
C | 515 lines | 478 code | 22 blank | 15 comment | 50 complexity | b48874814f1d307cb283ff1cd6a10fd2 MD5 | raw file
Possible License(s): GPL-2.0
  1. /* test.c
  2. An example of how to use nedalloc in C
  3. (C) 2005-2010 Niall Douglas
  4. */
  5. #define _CRT_SECURE_NO_WARNINGS 1 /* Don't care about MSVC warnings on POSIX functions */
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <assert.h>
  9. #include "nedmalloc.h"
  10. #define USE_NEDMALLOC_DLL
  11. /**** TEST CONFIGURATION ****/
  12. #if 0 /* Test patterns typical of C++ code */
  13. #define THREADS 4 /* How many threads to run */
  14. #define TESTCPLUSPLUS 1 /* =1 to make 50% of ops have blocksize<=512. This is typical for C++ allocator usage. */
  15. #define BLOCKSIZE 16384 /* Test will be with blocks up to BLOCKSIZE. Try 8-16Kb for typical app usage, 1Mb if you use large arrays etc. */
  16. #define TESTTYPE 2 /* =1 for maximum speed test, =2 for randomised test */
  17. #define TOUCH 0 /* Whether to touch all pages of an allocated region. Can make a huge difference to scores. */
  18. #define MAXMEMORY (768*1024*1024) /* Maximum memory to use (approx) */
  19. #define RECORDS (100000/THREADS)
  20. #define MAXMEMORY2 (MAXMEMORY/THREADS)
  21. #endif
  22. #if 1 /* Test avrg. 2Mb block realloc() speed */
  23. #define THREADS 1
  24. #define TESTCPLUSPLUS 1
  25. #define BLOCKSIZE (2*1024*1024)
  26. #define TESTTYPE 2
  27. #define TOUCH 1
  28. #define MAXMEMORY (768*1024*1024)
  29. #define RECORDS (400/THREADS)
  30. #define MAXMEMORY2 (MAXMEMORY/THREADS)
  31. #endif
  32. #ifdef _MSC_VER
  33. /*#pragma optimize("g", off)*/ /* Useful for debugging */
  34. #endif
  35. #if !defined(USE_NEDMALLOC_DLL)
  36. #include "nedmalloc.c"
  37. #elif defined(WIN32)
  38. #define WIN32_LEAN_AND_MEAN 1
  39. #include <windows.h>
  40. #include <malloc.h>
  41. #else
  42. #include <pthread.h>
  43. #endif
  44. #ifndef FORCEINLINE
  45. #if defined(__GNUC__)
  46. #define FORCEINLINE __inline __attribute__ ((always_inline))
  47. #elif defined(_MSC_VER)
  48. #define FORCEINLINE __forceinline
  49. #endif
  50. #endif
  51. #ifndef NOINLINE
  52. #if defined(__GNUC__)
  53. #define NOINLINE __attribute__ ((noinline))
  54. #elif defined(_MSC_VER)
  55. #define NOINLINE __declspec(noinline)
  56. #else
  57. #define NOINLINE
  58. #endif
  59. #endif
  60. static int whichmalloc;
  61. static int doRealloc;
  62. static struct threadstuff_t
  63. {
  64. struct
  65. {
  66. int mallocs;
  67. int reallocs;
  68. int frees;
  69. } ops;
  70. unsigned int *toalloc;
  71. void **allocs;
  72. char cachesync1[128];
  73. int done;
  74. char cachesync2[128];
  75. } threadstuff[THREADS];
  76. static void threadcode(int);
  77. #ifdef WIN32
  78. static DWORD WINAPI _threadcode(LPVOID a)
  79. {
  80. threadcode((int)(size_t) a);
  81. return 0;
  82. }
  83. #define THREADVAR HANDLE
  84. #define THREADINIT(v, id) (*v=CreateThread(NULL, 0, _threadcode, (LPVOID)(size_t) id, 0, NULL))
  85. #define THREADSLEEP(v) SleepEx(v, FALSE)
  86. #define THREADWAIT(v) (WaitForSingleObject(v, INFINITE), 0)
  87. typedef unsigned __int64 usCount;
  88. static FORCEINLINE usCount GetUsCount()
  89. {
  90. static LARGE_INTEGER ticksPerSec;
  91. static double scalefactor;
  92. LARGE_INTEGER val;
  93. if(!scalefactor)
  94. {
  95. if(QueryPerformanceFrequency(&ticksPerSec))
  96. scalefactor=ticksPerSec.QuadPart/1000000000000.0;
  97. else
  98. scalefactor=1;
  99. }
  100. if(!QueryPerformanceCounter(&val))
  101. return (usCount) GetTickCount() * 1000000000;
  102. return (usCount) (val.QuadPart/scalefactor);
  103. }
  104. static HANDLE win32heap;
  105. static void *win32malloc(size_t size)
  106. {
  107. return HeapAlloc(win32heap, 0, size);
  108. }
  109. static void *win32realloc(void *p, size_t size)
  110. {
  111. return HeapReAlloc(win32heap, 0, p, size);
  112. }
  113. static size_t win32memsize(void *p)
  114. {
  115. return HeapSize(win32heap, 0, p);
  116. }
  117. static void win32free(void *mem)
  118. {
  119. HeapFree(win32heap, 0, mem);
  120. }
  121. static void *(*const mallocs[])(size_t size)={ malloc, nedmalloc, win32malloc };
  122. static void *(*const reallocs[])(void *p, size_t size)={ realloc, nedrealloc, win32realloc };
  123. static size_t (*const memsizes[])(void *p)={ _msize, nedmemsize, win32memsize };
  124. static void (*const frees[])(void *mem)={ free, nedfree, win32free };
  125. #else
  126. #include <sys/time.h>
  127. #include <time.h>
  128. static void *_threadcode(void *a)
  129. {
  130. threadcode((int)(size_t) a);
  131. return 0;
  132. }
  133. #define THREADVAR pthread_t
  134. #define THREADINIT(v, id) pthread_create(v, NULL, _threadcode, (void *)(size_t) id)
  135. #define THREADSLEEP(v) usleep(v*1000)
  136. #define THREADWAIT(v) pthread_join(v, NULL)
  137. typedef unsigned long long usCount;
  138. static FORCEINLINE usCount GetUsCount()
  139. {
  140. #ifdef CLOCK_MONOTONIC
  141. struct timespec ts;
  142. clock_gettime(CLOCK_MONOTONIC, &ts);
  143. return ((usCount) ts.tv_sec*1000000000000LL)+ts.tv_nsec*1000LL;
  144. #else
  145. struct timeval tv;
  146. gettimeofday(&tv, 0);
  147. return ((usCount) tv.tv_sec*1000000000000LL)+tv.tv_usec*1000000LL;
  148. #endif
  149. }
  150. static void *(*const mallocs[])(size_t size)={ malloc, nedmalloc };
  151. static void *(*const reallocs[])(void *p, size_t size)={ realloc, nedrealloc };
  152. static void (*const frees[])(void *mem)={ free, nedfree };
  153. #endif
  154. static usCount times[THREADS];
  155. static FORCEINLINE unsigned int myrandom(unsigned int *seed)
  156. {
  157. *seed=1664525UL*(*seed)+1013904223UL;
  158. return *seed;
  159. }
  160. static void threadcode(int threadidx)
  161. {
  162. int n;
  163. unsigned int *toallocptr=threadstuff[threadidx].toalloc;
  164. void **allocptr=threadstuff[threadidx].allocs;
  165. unsigned int r, seed=threadidx;
  166. usCount start;
  167. size_t allocated=0, size;
  168. threadstuff[threadidx].done=0;
  169. /*neddisablethreadcache(0);*/
  170. THREADSLEEP(100);
  171. start=GetUsCount();
  172. #if 2==TESTTYPE
  173. /* A randomised malloc/realloc/free test (torture test) */
  174. for(n=0; n<RECORDS*100; n++)
  175. {
  176. static int reallocflip;
  177. unsigned int i, dorealloc=(reallocflip=!reallocflip);
  178. r=myrandom(&seed);
  179. i=(int)(r % RECORDS);
  180. #if TESTCPLUSPLUS
  181. dorealloc=!(r&(15<<28));
  182. if(r&(1<<31))
  183. { /* Make it two power multiple of less than 512 bytes to
  184. model frequent C++ new's */
  185. size=4<<(r & 7);
  186. dorealloc=0;
  187. }
  188. else
  189. #endif
  190. size=(size_t)(r & (BLOCKSIZE-1));
  191. if(allocated<MAXMEMORY2 && !allocptr[i])
  192. {
  193. if(!(allocptr[i]=mallocs[whichmalloc](size))) abort();
  194. #if TOUCH
  195. {
  196. volatile char *mem=(volatile char *)allocptr[i];
  197. volatile char *end=mem+size;
  198. for(; mem<end; mem+=4096) *mem;
  199. }
  200. #endif
  201. allocated+=memsizes[whichmalloc](allocptr[i]);
  202. threadstuff[threadidx].ops.mallocs++;
  203. }
  204. else if(allocated<MAXMEMORY2 && dorealloc) /* If not TESTCPLUSPLUS, then how often realloc() gets called depends on how small RECORDS is. */
  205. {
  206. allocated-=memsizes[whichmalloc](allocptr[i]);
  207. if(!(allocptr[i]=reallocs[whichmalloc](allocptr[i], size))) abort();
  208. #if TOUCH
  209. {
  210. volatile char *mem=(volatile char *)allocptr[i];
  211. volatile char *end=mem+size;
  212. for(; mem<end; mem+=4096) *mem;
  213. }
  214. #endif
  215. allocated+=memsizes[whichmalloc](allocptr[i]);
  216. threadstuff[threadidx].ops.reallocs++;
  217. }
  218. else if(allocptr[i])
  219. {
  220. allocated-=memsizes[whichmalloc](allocptr[i]);
  221. frees[whichmalloc](allocptr[i]);
  222. allocptr[i]=0;
  223. threadstuff[threadidx].ops.frees++;
  224. }
  225. }
  226. for(n=0; n<RECORDS; n++)
  227. {
  228. if(allocptr[n])
  229. {
  230. allocated-=memsizes[whichmalloc](allocptr[n]);
  231. frees[whichmalloc](allocptr[n]);
  232. allocptr[n]=0;
  233. threadstuff[threadidx].ops.frees++;
  234. }
  235. }
  236. assert(!allocated);
  237. #elif 1==TESTTYPE
  238. /* A simple stack which allocates and deallocates off the top (speed test) */
  239. for(n=0; n<RECORDS;)
  240. {
  241. #if 1
  242. r=myrandom(&seed);
  243. if(allocptr>threadstuff[threadidx].allocs && (r & 65535)<32760) /*<32760)*/
  244. { /* free */
  245. --toallocptr;
  246. --allocptr;
  247. --n;
  248. frees[whichmalloc](*allocptr);
  249. *allocptr=0;
  250. threadstuff[threadidx].ops.frees++;
  251. }
  252. else
  253. #endif
  254. {
  255. if(doRealloc && allocptr>threadstuff[threadidx].allocs && (r & 1))
  256. {
  257. if(!(allocptr[-1]=reallocs[whichmalloc](allocptr[-1], *toallocptr))) abort();
  258. #if TOUCH
  259. {
  260. volatile char *mem=(volatile char *)allocptr[-1];
  261. volatile char *end=mem+*toallocptr;
  262. for(; mem<end; mem+=4096) *mem;
  263. }
  264. #endif
  265. threadstuff[threadidx].ops.reallocs++;
  266. }
  267. else
  268. {
  269. if(!(allocptr[0]=mallocs[whichmalloc](*toallocptr))) abort();
  270. #if TOUCH
  271. {
  272. volatile char *mem=(volatile char *)allocptr[0];
  273. volatile char *end=mem+*toallocptr;
  274. for(; mem<end; mem+=4096) *mem;
  275. }
  276. #endif
  277. threadstuff[threadidx].ops.mallocs++;
  278. allocptr++;
  279. }
  280. n++;
  281. toallocptr++;
  282. /*if(!(threadstuff[threadidx].ops & 0xff))
  283. nedtrimthreadcache(0,0);*/
  284. }
  285. }
  286. while(allocptr>threadstuff[threadidx].allocs)
  287. {
  288. frees[whichmalloc](*--allocptr);
  289. threadstuff[threadidx].ops.frees++;
  290. }
  291. #endif
  292. times[threadidx]+=GetUsCount()-start;
  293. neddisablethreadcache(0);
  294. threadstuff[threadidx].done=1;
  295. }
  296. static double runtest()
  297. {
  298. unsigned int seed=1;
  299. int n, i;
  300. double opspersec=0;
  301. THREADVAR threads[THREADS];
  302. for(n=0; n<THREADS; n++)
  303. {
  304. unsigned int *toallocptr;
  305. int m;
  306. memset(&threadstuff[n].ops, 0, sizeof(threadstuff[n].ops));
  307. times[n]=0;
  308. threadstuff[n].toalloc=toallocptr=calloc(RECORDS, sizeof(unsigned int));
  309. threadstuff[n].allocs=calloc(RECORDS, sizeof(void *));
  310. for(m=0; m<RECORDS; m++)
  311. {
  312. unsigned int size=myrandom(&seed);
  313. #if TESTCPLUSPLUS
  314. if(size&(1<<31))
  315. { /* Make it two power multiple of less than 512 bytes to
  316. model frequent C++ new's */
  317. size=4<<(size & 7);
  318. }
  319. else
  320. #endif
  321. {
  322. size&=BLOCKSIZE-1;
  323. }
  324. *toallocptr++=size;
  325. }
  326. }
  327. #if 2==TESTTYPE
  328. for(n=0; n<THREADS; n++)
  329. {
  330. THREADINIT(&threads[n], n);
  331. }
  332. for(i=0; i<8; i++)
  333. {
  334. int found=-1;
  335. do
  336. {
  337. for(n=0; n<THREADS; n++)
  338. {
  339. THREADSLEEP(100);
  340. if(threadstuff[n].done)
  341. {
  342. found=n;
  343. break;
  344. }
  345. }
  346. } while(found<0);
  347. THREADWAIT(threads[found]);
  348. threads[found]=0;
  349. #if DEBUG
  350. {
  351. usCount totaltime=0;
  352. int totalops=0, totalmallocs=0, totalreallocs=0;
  353. for(n=0; n<THREADS; n++)
  354. {
  355. totaltime+=times[n];
  356. totalmallocs+=threadstuff[n].ops.mallocs;
  357. totalreallocs+=threadstuff[n].ops.reallocs;
  358. totalops+=threadstuff[n].ops.mallocs+threadstuff[n].ops.reallocs;
  359. }
  360. opspersec=1000000000000.0*totalops/totaltime*THREADS;
  361. printf("This test spent %f%% of its time doing reallocs\n", 100.0*totalreallocs/totalops);
  362. printf("This allocator achieves %lfops/sec under %d threads\n\n", opspersec, THREADS);
  363. }
  364. #endif
  365. THREADINIT(&threads[found], found);
  366. printf("Relaunched thread %d\n", found);
  367. }
  368. for(n=THREADS-1; n>=0; n--)
  369. {
  370. THREADWAIT(threads[n]);
  371. threads[n]=0;
  372. }
  373. #else
  374. #if 1
  375. for(n=0; n<THREADS; n++)
  376. {
  377. THREADINIT(&threads[n], n);
  378. }
  379. for(n=THREADS-1; n>=0; n--)
  380. {
  381. THREADWAIT(threads[n]);
  382. threads[n]=0;
  383. }
  384. #else
  385. /* Quick realloc() test */
  386. doRealloc=1;
  387. for(n=0; n<THREADS; n++)
  388. {
  389. THREADINIT(&threads[n], n);
  390. }
  391. for(n=THREADS-1; n>=0; n--)
  392. {
  393. THREADWAIT(threads[n]);
  394. threads[n]=0;
  395. }
  396. #endif
  397. #endif
  398. {
  399. usCount totaltime=0;
  400. int totalops=0, totalmallocs=0, totalreallocs=0;
  401. for(n=0; n<THREADS; n++)
  402. {
  403. totaltime+=times[n];
  404. totalmallocs+=threadstuff[n].ops.mallocs;
  405. totalreallocs+=threadstuff[n].ops.reallocs;
  406. totalops+=threadstuff[n].ops.mallocs+threadstuff[n].ops.reallocs;
  407. }
  408. opspersec=1000000000000.0*totalops/totaltime*THREADS;
  409. printf("This test spent %f%% of its time doing reallocs\n", 100.0*totalreallocs/totalops);
  410. printf("This allocator achieves %lfops/sec under %d threads\n", opspersec, THREADS);
  411. }
  412. for(n=THREADS-1; n>=0; n--)
  413. {
  414. free(threadstuff[n].allocs); threadstuff[n].allocs=0;
  415. free(threadstuff[n].toalloc); threadstuff[n].toalloc=0;
  416. }
  417. return opspersec;
  418. }
  419. int PatchInNedmallocDLL(void);
  420. int main(void)
  421. {
  422. double std=0, ned=0;
  423. #if defined(WIN32) && defined(USE_NEDMALLOC_DLL)
  424. /*PatchInNedmallocDLL();*/
  425. #endif
  426. #if 0
  427. {
  428. usCount start, end;
  429. start=GetUsCount();
  430. THREADSLEEP(5000);
  431. end=GetUsCount();
  432. printf("Wait was %lf\n", (end-start)/1000000000000.0);
  433. }
  434. #endif
  435. #ifdef WIN32
  436. #pragma comment(lib, "user32.lib")
  437. { /* Force load of user32.dll so we can debug */
  438. BOOL v;
  439. SystemParametersInfo(SPI_GETBEEP, 0, &v, 0);
  440. }
  441. #endif
  442. #if 2==TESTTYPE
  443. printf("Running torture test\n"
  444. "-=-=-=-=-=-=-=-=-=-=\n");
  445. #elif 1==TESTTYPE
  446. printf("Running speed test\n"
  447. "-=-=-=-=-=-=-=-=-=\n");
  448. #endif
  449. printf("Block size <= %u, C++ test mode is %s\n", BLOCKSIZE, TESTCPLUSPLUS ? "on" : "off");
  450. if(0)
  451. {
  452. printf("\nTesting standard allocator with %d threads ...\n", THREADS);
  453. std=runtest();
  454. }
  455. if(1)
  456. {
  457. printf("\nTesting nedmalloc with %d threads ...\n", THREADS);
  458. whichmalloc=1;
  459. ned=runtest();
  460. }
  461. #ifdef WIN32
  462. if(0)
  463. {
  464. ULONG data=2;
  465. win32heap=HeapCreate(0, 0, 0);
  466. HeapSetInformation(win32heap, HeapCompatibilityInformation, &data, sizeof(data));
  467. HeapQueryInformation(win32heap, HeapCompatibilityInformation, &data, sizeof(data), NULL);
  468. if(2!=data)
  469. {
  470. printf("The win32 low frag allocator won't work under a debugger!\n");
  471. }
  472. else
  473. {
  474. printf("Testing win32 low frag allocator with %d threads ...\n\n", THREADS);
  475. whichmalloc=2;
  476. runtest();
  477. }
  478. HeapDestroy(win32heap);
  479. }
  480. #endif
  481. if(std && ned)
  482. { // ned should have more ops/sec
  483. printf("\n\nnedmalloc allocator is %lf times faster than standard\n", ned/std);
  484. }
  485. printf("\nPress a key to trim\n");
  486. getchar();
  487. nedmalloc_trim(0);
  488. #ifdef _MSC_VER
  489. printf("\nPress a key to end\n");
  490. getchar();
  491. #endif
  492. return 0;
  493. }