PageRenderTime 45ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/Cudafy/CudafyByExample/chapter10/basic_double_stream_correct.cs

#
C# | 131 lines | 102 code | 17 blank | 12 comment | 6 complexity | b9b821dbafbe558f1c41695bb529044c MD5 | raw file
  1. /*
  2. * This software is based upon the book CUDA By Example by Sanders and Kandrot
  3. * and source code provided by NVIDIA Corporation.
  4. * It is a good idea to read the book while studying the examples!
  5. */
  6. using System;
  7. using System.Collections.Generic;
  8. using System.Linq;
  9. using System.Text;
  10. using Cudafy;
  11. using Cudafy.Host;
  12. using Cudafy.Translator;
  13. namespace CudafyByExample
  14. {
  15. public class basic_double_stream_correct
  16. {
  17. public const int N = (1024*1024);
  18. public const int FULL_DATA_SIZE = (N*20);
  19. [Cudafy]
  20. public static void thekernel(GThread thread, int[] a, int[] b, int[] c)
  21. {
  22. int idx = thread.threadIdx.x + thread.blockIdx.x * thread.blockDim.x;
  23. if (idx < N)
  24. {
  25. int idx1 = (idx + 1) % 256;
  26. int idx2 = (idx + 2) % 256;
  27. float aS = (a[idx] + a[idx1] + a[idx2]) / 3.0f;
  28. float bS = (b[idx] + b[idx1] + b[idx2]) / 3.0f;
  29. c[idx] = (int)(aS + bS) / 2;
  30. }
  31. }
  32. public static void Execute()
  33. {
  34. CudafyModule km = CudafyTranslator.Cudafy();
  35. GPGPU gpu = CudafyHost.GetDevice(CudafyModes.Target, CudafyModes.DeviceId);
  36. gpu.LoadModule(km);
  37. int[] dev_a0, dev_b0, dev_c0;
  38. int[] dev_a1, dev_b1, dev_c1;
  39. // allocate the memory on the GPU
  40. dev_a0 = gpu.Allocate<int>(N);
  41. dev_b0 = gpu.Allocate<int>(N);
  42. dev_c0 = gpu.Allocate<int>(N);
  43. dev_a1 = gpu.Allocate<int>(N);
  44. dev_b1 = gpu.Allocate<int>(N);
  45. dev_c1 = gpu.Allocate<int>(N);
  46. // allocate host locked memory, used to stream
  47. IntPtr host_aPtr = gpu.HostAllocate<int>(FULL_DATA_SIZE);
  48. IntPtr host_bPtr = gpu.HostAllocate<int>(FULL_DATA_SIZE);
  49. IntPtr host_cPtr = gpu.HostAllocate<int>(FULL_DATA_SIZE);
  50. Random rand = new Random();
  51. for (int i = 0; i < FULL_DATA_SIZE; i++)
  52. {
  53. host_aPtr.Set(i, rand.Next(1024 * 1024)); // There will be differences between the .NET code and the GPU
  54. host_bPtr.Set(i, rand.Next(1024 * 1024)); // So let's keep these to a minimum by having a max random values.
  55. }
  56. // start timer
  57. gpu.StartTimer();
  58. // now loop over full data, in bite-sized chunks
  59. for (int i = 0; i < FULL_DATA_SIZE; i += N * 2)
  60. {
  61. gpu.CopyToDeviceAsync(host_aPtr, i, dev_a0, 0, N, 1);
  62. gpu.CopyToDeviceAsync(host_bPtr, i, dev_b0, 0, N, 2);
  63. gpu.CopyToDeviceAsync(host_aPtr, i + N, dev_a1, 0, N, 1);
  64. gpu.CopyToDeviceAsync(host_bPtr, i + N, dev_b1, 0, N, 2);
  65. gpu.LaunchAsync(N / 256, 256, 1, "thekernel", dev_a0, dev_b0, dev_c0);
  66. gpu.LaunchAsync(N / 256, 256, 2, "thekernel", dev_a1, dev_b1, dev_c1);
  67. //gpu.Launch(N / 256, 256, 1).kernel(dev_a0, dev_b0, dev_c0);
  68. //gpu.Launch(N / 256, 256, 2).kernel(dev_a1, dev_b1, dev_c1);
  69. gpu.CopyFromDeviceAsync(dev_c0, 0, host_cPtr, i, N, 1);
  70. gpu.CopyFromDeviceAsync(dev_c1, 0, host_cPtr, i + N, N, 2);
  71. }
  72. gpu.SynchronizeStream(1);
  73. gpu.SynchronizeStream(2);
  74. float elapsed = gpu.StopTimer();
  75. // verify
  76. int[] host_a = new int[FULL_DATA_SIZE];
  77. int[] host_b = new int[FULL_DATA_SIZE];
  78. int[] host_c = new int[FULL_DATA_SIZE];
  79. GPGPU.CopyOnHost(host_aPtr, 0, host_a, 0, FULL_DATA_SIZE);
  80. GPGPU.CopyOnHost(host_bPtr, 0, host_b, 0, FULL_DATA_SIZE);
  81. GPGPU.CopyOnHost(host_cPtr, 0, host_c, 0, FULL_DATA_SIZE);
  82. Console.WriteLine("Elapsed: {0} ms", elapsed);
  83. int[] host_d = new int[FULL_DATA_SIZE];
  84. int errors = 0;
  85. int id = 0;
  86. {
  87. for (int j = 0; j < N; j++, id++)
  88. {
  89. control(id, j, host_a, host_b, host_d);
  90. if (host_c[id] > host_d[id] + 1) // There will be differences between the .NET code and the GPU
  91. {
  92. Console.WriteLine("Mismatch at {0}: {1} != {2}", id, host_c[id], host_d[id]);
  93. errors++;
  94. if (errors > 8)
  95. break;
  96. }
  97. }
  98. }
  99. gpu.HostFree(host_aPtr);
  100. gpu.HostFree(host_bPtr);
  101. gpu.HostFree(host_cPtr);
  102. gpu.DestroyStream(1);
  103. gpu.DestroyStream(2);
  104. }
  105. public static void control(int idx, int jdx, int[] a, int[] b, int[] c)
  106. {
  107. int idx1 = idx/N + (jdx + 1) % 256;
  108. int idx2 = idx/N + (jdx + 2) % 256;
  109. float aS = (a[idx] + a[idx1] + a[idx2]) / 3.0f;
  110. float bS = (b[idx] + b[idx1] + b[idx2]) / 3.0f;
  111. c[idx] = (int)(aS + bS) / 2;
  112. }
  113. }
  114. }