PageRenderTime 48ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/Presentations/ConcurrencyScalability/Demo/ConcurrencyAmp/ConcurrencyAmp.cpp

https://bitbucket.org/wcom/wcom-public
C++ | 365 lines | 290 code | 62 blank | 13 comment | 15 complexity | f840b50eb560f83346ec4e8a8c2e6063 MD5 | raw file
  1. // ----------------------------------------------------------------------------------------------
  2. // Copyright (c) WCOM AB.
  3. // ----------------------------------------------------------------------------------------------
  4. // This source code is subject to terms and conditions of the Microsoft Public License. A
  5. // copy of the license can be found in the License.html file at the root of this distribution.
  6. // If you cannot locate the Microsoft Public License, please send an email to
  7. // dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  8. // by the terms of the Microsoft Public License.
  9. // ----------------------------------------------------------------------------------------------
  10. // You must not remove this notice, or any other, from this software.
  11. // ----------------------------------------------------------------------------------------------
  12. #include "stdafx.h"
  13. using namespace concurrency;
  14. namespace
  15. {
  16. #define SIMPLE
  17. #ifdef SIMPLE
  18. typedef float mtype ;
  19. unsigned int const iter = 256 ;
  20. unsigned int const width = 1024 ;
  21. unsigned int const height= 1024 ;
  22. mtype const cx = -1 ;
  23. mtype const cy = 0 ;
  24. mtype const dx = 3.0F ;
  25. mtype const dy = 3.0F ;
  26. unsigned int const cpu_n = 20U ;
  27. unsigned int const amp_n = 20U ;
  28. #else
  29. typedef float mtype ;
  30. unsigned int const iter = 2048 ;
  31. unsigned int const width = 2048 ;
  32. unsigned int const height= 2048 ;
  33. mtype const cx = 0.001643721971153 ;
  34. mtype const cy = 0.822467633298876 ;
  35. mtype const dx = 0.00000000010 ;
  36. mtype const dy = 0.00000000010 ;
  37. unsigned int const cpu_n = 1U ;
  38. unsigned int const amp_n = 1U ;
  39. #endif
  40. template<typename TPredicate>
  41. long long execute_testruns (
  42. std::size_t test_runs
  43. , TPredicate predicate
  44. )
  45. {
  46. // Warm-up
  47. predicate ();
  48. auto then = std::chrono::high_resolution_clock::now ();
  49. for (auto test_run = 0U; test_run < test_runs; ++test_run)
  50. {
  51. predicate ();
  52. }
  53. auto now = std::chrono::high_resolution_clock::now ();
  54. auto diff = now - then;
  55. auto diff_in_ms = std::chrono::duration_cast<std::chrono::milliseconds>(diff).count ();
  56. return diff_in_ms;
  57. }
  58. inline int mandelbrot (mtype x, mtype y, int iter) restrict(amp, cpu)
  59. {
  60. auto ix = x;
  61. auto iy = y;
  62. auto i = 0;
  63. // Zn+1 = Zn^2 + C
  64. for (; (i < iter) & ((ix * ix + iy * iy) < 4); ++i)
  65. {
  66. auto tx = ix * ix - iy * iy + x;
  67. iy = 2 * ix * iy + y;
  68. ix = tx;
  69. }
  70. return i;
  71. }
  72. void compute_mandelbrot (
  73. std::vector<int> & result
  74. , unsigned int iter
  75. , unsigned int width
  76. , unsigned int height
  77. , mtype cx
  78. , mtype cy
  79. , mtype dx
  80. , mtype dy
  81. )
  82. {
  83. result.resize (width*height);
  84. auto tx = dx/width;
  85. auto mx = cx - dx / 2;
  86. auto ty = dy/height;
  87. auto my = cy - dy / 2;
  88. for (auto iy = 0U; iy < height; ++iy)
  89. {
  90. for (auto ix = 0U; ix < width; ++ix)
  91. {
  92. auto x = tx * ix + mx;
  93. auto y = ty * iy + my;
  94. result[ix + iy*width] = mandelbrot (x,y, iter);
  95. }
  96. }
  97. }
  98. void parallel_compute_mandelbrot (
  99. std::vector<int> & result
  100. , unsigned int iter
  101. , unsigned int width
  102. , unsigned int height
  103. , mtype cx
  104. , mtype cy
  105. , mtype dx
  106. , mtype dy
  107. )
  108. {
  109. result.resize (width*height);
  110. auto tx = dx/width;
  111. auto mx = cx - dx / 2;
  112. auto ty = dy/height;
  113. auto my = cy - dy / 2;
  114. parallel_for (
  115. 0U
  116. , width*height
  117. , [&] (unsigned int idx)
  118. {
  119. auto ix = idx % width;
  120. auto iy = idx / width;
  121. auto x = tx * ix + mx;
  122. auto y = ty * iy + my;
  123. result[idx] = mandelbrot (x,y, iter);
  124. });
  125. }
  126. void amp_compute_void (
  127. std::vector<int> & result
  128. , unsigned int width
  129. , unsigned int height
  130. )
  131. {
  132. result.resize (width*height);
  133. extent<2> extent (width, height);
  134. array_view<int, 2> view (extent, result);
  135. view.discard_data ();
  136. parallel_for_each (
  137. view.extent
  138. , [=] (index<2> idx) restrict(amp)
  139. {
  140. view[idx] = idx[0]*idx[1];
  141. });
  142. view.synchronize ();
  143. }
  144. void amp_compute_mandelbrot (
  145. std::vector<int> & result
  146. , unsigned int iter
  147. , unsigned int width
  148. , unsigned int height
  149. , mtype cx
  150. , mtype cy
  151. , mtype dx
  152. , mtype dy
  153. )
  154. {
  155. result.resize (width*height);
  156. auto tx = dx/width;
  157. auto mx = cx - dx / 2;
  158. auto ty = dy/height;
  159. auto my = cy - dy / 2;
  160. extent<2> extent (width, height);
  161. array_view<int, 2> view (extent, result);
  162. view.discard_data ();
  163. parallel_for_each (
  164. view.extent
  165. , [=] (index<2> idx) restrict(amp)
  166. {
  167. auto x = tx * idx[1] + mx;
  168. auto y = ty * idx[0] + my;
  169. view[idx] = mandelbrot (x,y, iter);
  170. });
  171. view.synchronize ();
  172. }
  173. }
  174. int main()
  175. {
  176. #ifdef _DEBUG
  177. {
  178. std::vector<int> trivial_result ;
  179. std::vector<int> parallel_result;
  180. std::vector<int> amp_result ;
  181. compute_mandelbrot (
  182. trivial_result
  183. , iter
  184. , width
  185. , height
  186. , cx
  187. , cy
  188. , dx
  189. , dy
  190. );
  191. parallel_compute_mandelbrot (
  192. parallel_result
  193. , iter
  194. , width
  195. , height
  196. , cx
  197. , cy
  198. , dx
  199. , dy
  200. );
  201. amp_compute_mandelbrot (
  202. amp_result
  203. , iter
  204. , width
  205. , height
  206. , cx
  207. , cy
  208. , dx
  209. , dy
  210. );
  211. auto expected_size = width*height;
  212. if (
  213. expected_size == trivial_result.size ()
  214. && expected_size == parallel_result.size ()
  215. && expected_size == amp_result.size ()
  216. )
  217. {
  218. for (auto iter = 0U; iter < expected_size; ++iter)
  219. {
  220. if (trivial_result[iter] != parallel_result[iter])
  221. {
  222. printf ("Error: Parallel result mismatch@%d!\r\n", iter);
  223. return 102;
  224. }
  225. if (trivial_result[iter] != amp_result[iter])
  226. {
  227. printf ("Error: Amp result mismatch@%d!\r\n", iter);
  228. return 103;
  229. }
  230. }
  231. }
  232. else
  233. {
  234. printf ("Error: Size mismatch!\r\n");
  235. return 101;
  236. }
  237. printf ("Results are all ok!\r\n");
  238. }
  239. #else
  240. std::vector<int> result;
  241. result.resize (width*height);
  242. auto mandelbrot_result = execute_testruns (
  243. cpu_n
  244. , [&] ()
  245. {
  246. compute_mandelbrot (
  247. result
  248. , iter
  249. , width
  250. , height
  251. , cx
  252. , cy
  253. , dx
  254. , dy
  255. );
  256. });
  257. printf ("%d trivial mandelbrot iterations took %d ms\r\n" , cpu_n, (int)mandelbrot_result );
  258. auto parallel_mandelbrot_result = execute_testruns (
  259. cpu_n
  260. , [&] ()
  261. {
  262. parallel_compute_mandelbrot (
  263. result
  264. , iter
  265. , width
  266. , height
  267. , cx
  268. , cy
  269. , dx
  270. , dy
  271. );
  272. });
  273. printf ("%d parallel mandelbrot iterations took %d ms\r\n" , cpu_n, (int)parallel_mandelbrot_result );
  274. auto amp_void_result = execute_testruns (
  275. amp_n
  276. , [&] ()
  277. {
  278. amp_compute_void (
  279. result
  280. , width
  281. , height
  282. );
  283. });
  284. printf ("%d amp void iterations took %d ms\r\n" , amp_n, (int)amp_void_result );
  285. auto amp_mandelbrot_result = execute_testruns (
  286. amp_n
  287. , [&] ()
  288. {
  289. amp_compute_mandelbrot (
  290. result
  291. , iter
  292. , width
  293. , height
  294. , cx
  295. , cy
  296. , dx
  297. , dy
  298. );
  299. });
  300. printf ("%d amp mandelbrot iterations took %d ms\r\n" , amp_n, (int)amp_mandelbrot_result );
  301. #endif
  302. return 0;
  303. }