/Src/OSF/ASMLIB/testmem.cpp

https://github.com/papyrussolution/OpenPapyrus · C++ · 396 lines · 316 code · 39 blank · 41 comment · 139 complexity · 3261b8f8010b2577162bad1d4546c1c9 MD5 · raw file

  1. // TESTMEM.CPP Agner Fog 2011-07-04
  2. // Test file for asmlib memcpy and memmove functions
  3. // Instructions: Compile on any platform and link with the appropriate
  4. // version of the asmlib library.
  5. #include <stdio.h>
  6. //#include <process.h>
  7. #include <stdlib.h>
  8. #include <memory.h>
  9. #include <string.h>
  10. #include "asmlib.h"
  11. // define function type
  12. typedef void * memcpyF (void * dest, const void * src, size_t count);
  13. typedef void * memsetF (void * dest, int c, size_t count);
  14. extern "C" {
  15. extern int IInstrSet;
  16. // function prototypes for CPU specific function versions
  17. memcpyF memcpy386, memcpySSE2, memcpySSSE3, memcpyU, memcpyU256;
  18. memcpyF memmove386, memmoveSSE2, memmoveSSSE3, memmoveU, memmoveU256;
  19. memsetF memset386, memsetSSE2, memsetAVX;
  20. }
  21. // Tables of function pointers
  22. #if defined(_WIN64) || defined(_M_X64) || defined(__amd64)
  23. const int NUMFUNC = 5;
  24. memcpyF * memcpyTab[NUMFUNC] = {A_memcpy, memcpySSE2, memcpySSSE3, memcpyU, memcpyU256};
  25. memcpyF * memmoveTab[NUMFUNC] = {A_memmove, memmoveSSE2, memmoveSSSE3, memmoveU, memmoveU256};
  26. const char * DispatchNames[NUMFUNC] = {"Dispatched", "SSE2", "SSSE3", "Unalign", "U256"};
  27. int isetreq [NUMFUNC] = {0, 4, 6, 4, 11}; // instruction set required
  28. const int MEMSETFUNCS = 3;
  29. memsetF * memsetTab[MEMSETFUNCS] = {A_memset, memsetSSE2, memsetAVX};
  30. const char * memsetNames[MEMSETFUNCS] = {"Dispatched", "SSE2", "AVX"};
  31. int memsetreq [NUMFUNC] = {0, 4, 11}; // instruction set required
  32. #else
  33. const int NUMFUNC = 6;
  34. memcpyF * memcpyTab[NUMFUNC] = {A_memcpy, memcpy386, memcpySSE2, memcpySSSE3, memcpyU, memcpyU256};
  35. memcpyF * memmoveTab[NUMFUNC] = {A_memmove, memmove386, memmoveSSE2, memmoveSSSE3, memmoveU, memmoveU256};
  36. const char * DispatchNames[NUMFUNC] = {"Dispatched", "386", "SSE2", "SSSE3", "Unalign", "U256"};
  37. int isetreq [NUMFUNC] = {0, 0, 4, 6, 4, 11}; // instruction set required
  38. const int MEMSETFUNCS = 4;
  39. memsetF * memsetTab[MEMSETFUNCS] = {A_memset, memset386, memsetSSE2, memsetAVX};
  40. const char * memsetNames[MEMSETFUNCS] = {"Dispatched", "386", "SSE2", "AVX"};
  41. int memsetreq [NUMFUNC] = {0, 0, 4, 11}; // instruction set required
  42. #endif
  43. void error(const char * s, int a, int b, int c) {
  44. printf("\nError %s: %i %i %i\n", s, a, b, c);
  45. exit (1);
  46. }
  47. void error(const char * s, int i, int a, int b, int c) {
  48. printf("\nError %s: %i %i %i %i\n", s, i, a, b, c);
  49. exit (1);
  50. }
  51. int main () {
  52. int ao, bo, os, len;
  53. int version;
  54. const int pagesize = 0x1000; // 4 kbytes
  55. const int n = 16*pagesize;
  56. char a[n], b[n], c[n];
  57. int instrset = InstructionSet();
  58. // CacheBypassLimit = 5;
  59. printf("\nmemcpy cache limit = 0x%X, memset cache limit 0x%X\n",
  60. (int)GetMemcpyCacheLimit(), (int)GetMemsetCacheLimit());
  61. printf("\nTest memcpy");
  62. int i, x = 91;
  63. for (i=0; i<n; i++) {
  64. x += 23;
  65. a[i] = (char)x;
  66. }
  67. A_memset(b, -1, n);
  68. SetMemcpyCacheLimit(0); // default
  69. #if 1
  70. // Test memcpy for correctness
  71. // Loop through versions
  72. for (version = 0; version < NUMFUNC; version++) {
  73. printf("\n%s", DispatchNames[version]);
  74. if (instrset < isetreq[version]) {
  75. // instruction set not supported
  76. printf(" skipped"); continue;
  77. }
  78. for (len=0; len<514; len++) {
  79. for (ao = 0; ao <=20; ao++) {
  80. for (bo = 0; bo <=32; bo++) {
  81. A_memset(b, -1, len+96);
  82. (*memcpyTab[version])(b+bo, a+ao, len);
  83. if (bo && b[bo-1] != -1) error("A", ao, bo, len);
  84. if (b[bo+len] != -1) error("B", ao, bo, len);
  85. if (len==0) continue;
  86. if (b[bo] != a[ao]) error("C", ao, bo, len);
  87. if (b[bo+len-1] != a[ao+len-1]) error("D", ao, bo, len);
  88. if (memcmp(b+bo, a+ao, len)) error("E", ao, bo, len);
  89. }
  90. }
  91. }
  92. // check false memory dependence branches
  93. len = 300;
  94. A_memcpy(b, a, 3*pagesize);
  95. for (ao = pagesize-300; ao < pagesize+200; ao++) {
  96. for (bo = 3*pagesize; bo <=3*pagesize+33; bo++) {
  97. A_memset(b+bo-64, -1, len+128);
  98. (*memcpyTab[version])(b+bo, b+ao, len);
  99. if (b[bo-1] != -1) error("A1", ao, bo, len);
  100. if (b[bo+len] != -1) error("B1", ao, bo, len);
  101. if (memcmp(b+bo, b+ao, len)) error("E1", ao, bo, len);
  102. }
  103. }
  104. // check false memory dependence branches with overlap
  105. // src > dest and overlap: must copy forwards
  106. len = pagesize+1000;
  107. for (ao = 2*pagesize; ao <=2*pagesize+33; ao++) {
  108. for (bo = pagesize-200; bo < pagesize+300; bo++) {
  109. A_memcpy(b, a, 4*pagesize);
  110. A_memcpy(c, a, 4*pagesize);
  111. (*memcpyTab[version])(b+bo, b+ao, len);
  112. //memcpy(c+bo, c+ao, len); // Most library versions of memcpy are actually memmove
  113. memcpySSE2(c+bo, c+ao, len);
  114. if (memcmp(b, c, 4*pagesize)) {
  115. error("E2", ao-pagesize, bo-2*pagesize, len);
  116. }
  117. }
  118. }
  119. // check false memory dependence branches with overlap
  120. // dest > src and overlap: undefined behavior
  121. #if 1
  122. len = pagesize+1000;
  123. for (ao = pagesize-200; ao < pagesize+200; ao++) {
  124. for (bo = 2*pagesize; bo <=2*pagesize+33; bo++) {
  125. A_memcpy(b, a, 4*pagesize);
  126. A_memcpy(c, a, 4*pagesize);
  127. (*memcpyTab[version])(b+bo, b+ao, len);
  128. //memcpy(c+bo, c+ao, len); // MS Most library versions of memcpy are actually memmove
  129. memcpySSE2(c+bo, c+ao, len);
  130. if (memcmp(b, c, 4*pagesize)) {
  131. error("E3", ao-pagesize, bo-2*pagesize, len);
  132. }
  133. }
  134. }
  135. #endif
  136. }
  137. printf("\n\nTest memmove");
  138. // Test memmove for correctness
  139. for (i=0; i<n; i++) {
  140. x += 23;
  141. a[i] = char(x);
  142. }
  143. // Loop through versions
  144. for (version = 0; version < NUMFUNC; version++) {
  145. printf("\n%s", DispatchNames[version]);
  146. if (instrset < isetreq[version]) {
  147. // instruction set not supported
  148. printf(" skipped"); continue;
  149. }
  150. // move forward
  151. for (len=0; len<400; len++) {
  152. for (bo = 0; bo <=33; bo++) {
  153. for (os = 0; os <= 33; os++) {
  154. A_memcpy(b, a, len+100);
  155. (*memmoveTab[version])(b+bo+os, b+bo, len);
  156. for (i=0; i<bo+os; i++) if (b[i]!=a[i]) error("E", i, bo, os, len);
  157. for (i=bo+os; i<bo+os+len; i++) if (b[i] != a[i-os]) error("F", i, bo, os, len);
  158. for (;i < bo+os+len+20; i++) if (b[i]!=a[i]) error("G", i, bo, os, len);
  159. }
  160. }
  161. }
  162. // move backwards
  163. for (len=0; len<400; len++) {
  164. for (bo = 0; bo <=33; bo++) {
  165. for (os = 0; os < 33; os++) {
  166. A_memcpy(b, a, len+96);
  167. (*memmoveTab[version])(b+bo, b+bo+os, len);
  168. for (i=0; i<bo; i++) if (b[i]!=a[i]) error("H", i, bo, os, len);
  169. for (i=bo; i<bo+len; i++) if (b[i] != a[i+os]) error("I", i, bo, os, len);
  170. for (;i < bo+len+20; i++) if (b[i]!=a[i]) error("J", i, bo, os, len);
  171. }
  172. }
  173. }
  174. }
  175. printf("\n\nSame, with non-temporal moves");
  176. SetMemcpyCacheLimit(1); // bypass cache
  177. // Loop through versions
  178. for (version = 0; version < NUMFUNC; version++) {
  179. printf("\n%s", DispatchNames[version]);
  180. if (instrset < isetreq[version]) {
  181. // instruction set not supported
  182. printf(" skipped"); continue;
  183. }
  184. for (len=0; len<514; len++) {
  185. for (ao = 0; ao <=20; ao++) {
  186. for (bo = 0; bo <=32; bo++) {
  187. A_memset(b, -1, len+96);
  188. (*memcpyTab[version])(b+bo, a+ao, len);
  189. if (bo && b[bo-1] != -1) error("A", ao, bo, len);
  190. if (b[bo+len] != -1) error("B", ao, bo, len);
  191. if (len==0) continue;
  192. if (b[bo] != a[ao]) error("C", ao, bo, len);
  193. if (b[bo+len-1] != a[ao+len-1]) error("D", ao, bo, len);
  194. if (memcmp(b+bo, a+ao, len)) error("E", ao, bo, len);
  195. }
  196. }
  197. }
  198. // check false memory dependence branches
  199. len = 300;
  200. A_memcpy(b, a, 3*pagesize);
  201. for (ao = pagesize-200; ao < pagesize+200; ao++) {
  202. for (bo = 3*pagesize; bo <=3*pagesize+33; bo++) {
  203. A_memset(b+bo-64, -1, len+128);
  204. (*memcpyTab[version])(b+bo, b+ao, len);
  205. if (b[bo-1] != -1) error("A1", ao, bo, len);
  206. if (b[bo+len] != -1) error("B1", ao, bo, len);
  207. if (memcmp(b+bo, b+ao, len)) error("E1", ao, bo, len);
  208. }
  209. }
  210. // check false memory dependence branches with overlap
  211. // src > dest and overlap: must copy forwards
  212. len = pagesize+1000;
  213. for (ao = 2*pagesize; ao <=2*pagesize+33; ao++) {
  214. for (bo = pagesize-200; bo < pagesize+200; bo++) {
  215. A_memcpy(b, a, 4*pagesize);
  216. A_memcpy(c, a, 4*pagesize);
  217. (*memcpyTab[version])(b+bo, b+ao, len);
  218. //memcpy(c+bo, c+ao, len); // Most library versions of memcpy are actually memmove
  219. memcpySSE2(c+bo, c+ao, len);
  220. if (memcmp(b, c, 4*pagesize)) {
  221. error("E2", ao-pagesize, bo-2*pagesize, len);
  222. }
  223. }
  224. }
  225. // (check false memory dependence branches with overlap. skipped)
  226. }
  227. printf("\n\nTest memmove");
  228. // Test memmove for correctness
  229. for (i=0; i<n; i++) {
  230. x += 23;
  231. a[i] = char(x);
  232. }
  233. // Loop through versions
  234. for (version = 0; version < NUMFUNC; version++) {
  235. printf("\n%s", DispatchNames[version]);
  236. if (instrset < isetreq[version]) {
  237. // instruction set not supported
  238. printf(" skipped"); continue;
  239. }
  240. // move forward
  241. for (len=0; len<400; len++) {
  242. for (bo = 0; bo <=33; bo++) {
  243. for (os = 0; os <= 33; os++) {
  244. A_memcpy(b, a, len+100);
  245. (*memmoveTab[version])(b+bo+os, b+bo, len);
  246. for (i=0; i<bo+os; i++) if (b[i]!=a[i]) error("E", i, bo, os, len);
  247. for (i=bo+os; i<bo+os+len; i++) if (b[i] != a[i-os]) error("F", i, bo, os, len);
  248. for (;i < bo+os+len+20; i++) if (b[i]!=a[i]) error("G", i, bo, os, len);
  249. }
  250. }
  251. }
  252. // move backwards
  253. for (len=0; len<400; len++) {
  254. for (bo = 0; bo <=33; bo++) {
  255. for (os = 0; os < 33; os++) {
  256. A_memcpy(b, a, len+96);
  257. (*memmoveTab[version])(b+bo, b+bo+os, len);
  258. for (i=0; i<bo; i++) if (b[i]!=a[i]) error("H", i, bo, os, len);
  259. for (i=bo; i<bo+len; i++) if (b[i] != a[i+os]) error("I", i, bo, os, len);
  260. for (;i < bo+len+20; i++) if (b[i]!=a[i]) error("J", i, bo, os, len);
  261. }
  262. }
  263. }
  264. }
  265. #endif
  266. SetMemcpyCacheLimit(0); // back to default
  267. SetMemsetCacheLimit(0);
  268. printf("\n\nTest memset");
  269. // test memset
  270. const int val1 = 0x4C, val2 = 0xA2, len2 = 1024;
  271. for (version = 0; version < MEMSETFUNCS; version++) {
  272. memsetF * func = memsetTab[version];
  273. printf("\n%s", memsetNames[version]);
  274. if (instrset < memsetreq[version]) {
  275. // instruction set not supported
  276. printf(" skipped"); continue;
  277. }
  278. for (os = 0; os < 34; os++) {
  279. for (len = 0; len < 500; len++) {
  280. memset(a, val1, len2);
  281. memset(a+os, val2, len);
  282. (*func)(b, val1, len2);
  283. (*func)(b+os, val2, len);
  284. if (memcmp(a, b, len2)) {
  285. error("MS", version, os, len);
  286. }
  287. }
  288. }
  289. for (len=0; len<200; len++) {
  290. for (os = 0; os <= 33; os++) {
  291. A_memcpy(b, a, len+64);
  292. A_memset(b+os, 55, len);
  293. for (i=0; i<os; i++) if (b[i] != a[i]) error("K", i, os, len);
  294. for (; i<os+len; i++) if (b[i] != 55) error("L", i, os, len);
  295. for (; i<os+len+17; i++) if (b[i] != a[i]) error("M", i, os, len);
  296. }
  297. }
  298. }
  299. printf("\n\nSame, with non-temporal moves");
  300. SetMemsetCacheLimit(1); // bypass cache
  301. for (version = 0; version < MEMSETFUNCS; version++) {
  302. memsetF * func = memsetTab[version];
  303. printf("\n%s", memsetNames[version]);
  304. if (instrset < memsetreq[version]) {
  305. // instruction set not supported
  306. printf(" skipped"); continue;
  307. }
  308. for (os = 0; os < 34; os++) {
  309. for (len = 0; len < 500; len++) {
  310. memset(a, val1, len2);
  311. memset(a+os, val2, len);
  312. (*func)(b, val1, len2);
  313. (*func)(b+os, val2, len);
  314. if (memcmp(a, b, len2)) {
  315. error("MS", version, os, len);
  316. }
  317. }
  318. }
  319. }
  320. SetMemsetCacheLimit(0); // back to default
  321. printf("\n\nTest strlen");
  322. // test strlen
  323. for (len=0; len<400; len++) {
  324. for (os = 0; os <= 32; os++) {
  325. A_memset(b, 0, len+64);
  326. A_memset(b+os, 'a', len);
  327. x = A_strlen(b+os);
  328. if (x != len) error("N", 0, os, len);
  329. A_memset(b, 1, len+64);
  330. b[os+len] = 0;
  331. x = A_strlen(b+os);
  332. if (x != len) error("O", 0, os, len);
  333. }
  334. }
  335. printf("\n\nTest strcpy and strcat");
  336. // test strcpy and strcat
  337. for (i=0; i<n; i++) {
  338. x += 23;
  339. a[i] = char(x) | 1;
  340. }
  341. for (len=0; len<400; len++) {
  342. for (os = 0; os <= 16; os++) {
  343. for (i=0; i<33; i++) {
  344. A_memmove(b, a, len+64);
  345. b[os+len] = 0;
  346. A_strcpy(c+5, b+os);
  347. if (A_strlen(c+5) != len) error("P", 0, os, len);
  348. A_memmove(b+55, a, i+4);
  349. b[55+i] = 0;
  350. A_strcat(c+5, b+55);
  351. if (A_strlen(c+5) != len+i) error("R", 0, os, len);
  352. }
  353. }
  354. }
  355. printf("\n\nSuccess\n");
  356. return 0;
  357. }