/unsupported/test/cxx11_tensor_argmax_sycl.cpp

https://gitlab.com/mrssss/eigen · C++ · 257 lines · 193 code · 43 blank · 21 comment · 29 complexity · 389d4dde7cefaaa6e14b6fe568176ac1 MD5 · raw file

  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // Copyright (C) 2016
  5. // Mehdi Goli Codeplay Software Ltd.
  6. // Ralph Potter Codeplay Software Ltd.
  7. // Luke Iwanski Codeplay Software Ltd.
  8. // Contact: <eigen@codeplay.com>
  9. //
  10. // This Source Code Form is subject to the terms of the Mozilla
  11. // Public License v. 2.0. If a copy of the MPL was not distributed
  12. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  13. #define EIGEN_TEST_NO_LONGDOUBLE
  14. #define EIGEN_TEST_NO_COMPLEX
  15. #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
  16. #define EIGEN_USE_SYCL
  17. #include "main.h"
  18. #include <unsupported/Eigen/CXX11/Tensor>
  19. using Eigen::array;
  20. using Eigen::SyclDevice;
  21. using Eigen::Tensor;
  22. using Eigen::TensorMap;
  23. template <typename DataType, int Layout, typename DenseIndex>
  24. static void test_sycl_simple_argmax(const Eigen::SyclDevice& sycl_device) {
  25. Tensor<DataType, 3, Layout, DenseIndex> in(Eigen::array<DenseIndex, 3>{{2, 2, 2}});
  26. Tensor<DenseIndex, 0, Layout, DenseIndex> out_max;
  27. Tensor<DenseIndex, 0, Layout, DenseIndex> out_min;
  28. in.setRandom();
  29. in *= in.constant(100.0);
  30. in(0, 0, 0) = -1000.0;
  31. in(1, 1, 1) = 1000.0;
  32. std::size_t in_bytes = in.size() * sizeof(DataType);
  33. std::size_t out_bytes = out_max.size() * sizeof(DenseIndex);
  34. DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
  35. DenseIndex* d_out_max = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
  36. DenseIndex* d_out_min = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
  37. Eigen::TensorMap<Eigen::Tensor<DataType, 3, Layout, DenseIndex> > gpu_in(d_in,
  38. Eigen::array<DenseIndex, 3>{{2, 2, 2}});
  39. Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout, DenseIndex> > gpu_out_max(d_out_max);
  40. Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout, DenseIndex> > gpu_out_min(d_out_min);
  41. sycl_device.memcpyHostToDevice(d_in, in.data(), in_bytes);
  42. gpu_out_max.device(sycl_device) = gpu_in.argmax();
  43. gpu_out_min.device(sycl_device) = gpu_in.argmin();
  44. sycl_device.memcpyDeviceToHost(out_max.data(), d_out_max, out_bytes);
  45. sycl_device.memcpyDeviceToHost(out_min.data(), d_out_min, out_bytes);
  46. VERIFY_IS_EQUAL(out_max(), 2 * 2 * 2 - 1);
  47. VERIFY_IS_EQUAL(out_min(), 0);
  48. sycl_device.deallocate(d_in);
  49. sycl_device.deallocate(d_out_max);
  50. sycl_device.deallocate(d_out_min);
  51. }
  52. template <typename DataType, int DataLayout, typename DenseIndex>
  53. static void test_sycl_argmax_dim(const Eigen::SyclDevice& sycl_device) {
  54. DenseIndex sizeDim0 = 9;
  55. DenseIndex sizeDim1 = 3;
  56. DenseIndex sizeDim2 = 5;
  57. DenseIndex sizeDim3 = 7;
  58. Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0, sizeDim1, sizeDim2, sizeDim3);
  59. std::vector<DenseIndex> dims;
  60. dims.push_back(sizeDim0);
  61. dims.push_back(sizeDim1);
  62. dims.push_back(sizeDim2);
  63. dims.push_back(sizeDim3);
  64. for (DenseIndex dim = 0; dim < 4; ++dim) {
  65. array<DenseIndex, 3> out_shape;
  66. for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d + 1];
  67. Tensor<DenseIndex, 3, DataLayout, DenseIndex> tensor_arg(out_shape);
  68. array<DenseIndex, 4> ix;
  69. for (DenseIndex i = 0; i < sizeDim0; ++i) {
  70. for (DenseIndex j = 0; j < sizeDim1; ++j) {
  71. for (DenseIndex k = 0; k < sizeDim2; ++k) {
  72. for (DenseIndex l = 0; l < sizeDim3; ++l) {
  73. ix[0] = i;
  74. ix[1] = j;
  75. ix[2] = k;
  76. ix[3] = l;
  77. // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l)
  78. // = 10.0
  79. tensor(ix) = (ix[dim] != 0) ? -1.0 : 10.0;
  80. }
  81. }
  82. }
  83. }
  84. std::size_t in_bytes = tensor.size() * sizeof(DataType);
  85. std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
  86. DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
  87. DenseIndex* d_out = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
  88. Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, DenseIndex> > gpu_in(
  89. d_in, Eigen::array<DenseIndex, 4>{{sizeDim0, sizeDim1, sizeDim2, sizeDim3}});
  90. Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout, DenseIndex> > gpu_out(d_out, out_shape);
  91. sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
  92. gpu_out.device(sycl_device) = gpu_in.argmax(dim);
  93. sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
  94. VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()),
  95. size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.dimension(dim)));
  96. for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
  97. // Expect max to be in the first index of the reduced dimension
  98. VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
  99. }
  100. sycl_device.synchronize();
  101. for (DenseIndex i = 0; i < sizeDim0; ++i) {
  102. for (DenseIndex j = 0; j < sizeDim1; ++j) {
  103. for (DenseIndex k = 0; k < sizeDim2; ++k) {
  104. for (DenseIndex l = 0; l < sizeDim3; ++l) {
  105. ix[0] = i;
  106. ix[1] = j;
  107. ix[2] = k;
  108. ix[3] = l;
  109. // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0
  110. tensor(ix) = (ix[dim] != tensor.dimension(dim) - 1) ? -1.0 : 20.0;
  111. }
  112. }
  113. }
  114. }
  115. sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
  116. gpu_out.device(sycl_device) = gpu_in.argmax(dim);
  117. sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
  118. for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
  119. // Expect max to be in the last index of the reduced dimension
  120. VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
  121. }
  122. sycl_device.deallocate(d_in);
  123. sycl_device.deallocate(d_out);
  124. }
  125. }
  126. template <typename DataType, int DataLayout, typename DenseIndex>
  127. static void test_sycl_argmin_dim(const Eigen::SyclDevice& sycl_device) {
  128. DenseIndex sizeDim0 = 9;
  129. DenseIndex sizeDim1 = 3;
  130. DenseIndex sizeDim2 = 5;
  131. DenseIndex sizeDim3 = 7;
  132. Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0, sizeDim1, sizeDim2, sizeDim3);
  133. std::vector<DenseIndex> dims;
  134. dims.push_back(sizeDim0);
  135. dims.push_back(sizeDim1);
  136. dims.push_back(sizeDim2);
  137. dims.push_back(sizeDim3);
  138. for (DenseIndex dim = 0; dim < 4; ++dim) {
  139. array<DenseIndex, 3> out_shape;
  140. for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d + 1];
  141. Tensor<DenseIndex, 3, DataLayout, DenseIndex> tensor_arg(out_shape);
  142. array<DenseIndex, 4> ix;
  143. for (DenseIndex i = 0; i < sizeDim0; ++i) {
  144. for (DenseIndex j = 0; j < sizeDim1; ++j) {
  145. for (DenseIndex k = 0; k < sizeDim2; ++k) {
  146. for (DenseIndex l = 0; l < sizeDim3; ++l) {
  147. ix[0] = i;
  148. ix[1] = j;
  149. ix[2] = k;
  150. ix[3] = l;
  151. // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = -10.0
  152. tensor(ix) = (ix[dim] != 0) ? 1.0 : -10.0;
  153. }
  154. }
  155. }
  156. }
  157. std::size_t in_bytes = tensor.size() * sizeof(DataType);
  158. std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex);
  159. DataType* d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes));
  160. DenseIndex* d_out = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes));
  161. Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, DenseIndex> > gpu_in(
  162. d_in, Eigen::array<DenseIndex, 4>{{sizeDim0, sizeDim1, sizeDim2, sizeDim3}});
  163. Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout, DenseIndex> > gpu_out(d_out, out_shape);
  164. sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
  165. gpu_out.device(sycl_device) = gpu_in.argmin(dim);
  166. sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
  167. VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()),
  168. size_t(sizeDim0 * sizeDim1 * sizeDim2 * sizeDim3 / tensor.dimension(dim)));
  169. for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
  170. // Expect max to be in the first index of the reduced dimension
  171. VERIFY_IS_EQUAL(tensor_arg.data()[n], 0);
  172. }
  173. sycl_device.synchronize();
  174. for (DenseIndex i = 0; i < sizeDim0; ++i) {
  175. for (DenseIndex j = 0; j < sizeDim1; ++j) {
  176. for (DenseIndex k = 0; k < sizeDim2; ++k) {
  177. for (DenseIndex l = 0; l < sizeDim3; ++l) {
  178. ix[0] = i;
  179. ix[1] = j;
  180. ix[2] = k;
  181. ix[3] = l;
  182. // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = -20.0
  183. tensor(ix) = (ix[dim] != tensor.dimension(dim) - 1) ? 1.0 : -20.0;
  184. }
  185. }
  186. }
  187. }
  188. sycl_device.memcpyHostToDevice(d_in, tensor.data(), in_bytes);
  189. gpu_out.device(sycl_device) = gpu_in.argmin(dim);
  190. sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes);
  191. for (DenseIndex n = 0; n < tensor_arg.size(); ++n) {
  192. // Expect max to be in the last index of the reduced dimension
  193. VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1);
  194. }
  195. sycl_device.deallocate(d_in);
  196. sycl_device.deallocate(d_out);
  197. }
  198. }
  199. template <typename DataType, typename Device_Selector>
  200. void sycl_argmax_test_per_device(const Device_Selector& d) {
  201. QueueInterface queueInterface(d);
  202. auto sycl_device = Eigen::SyclDevice(&queueInterface);
  203. test_sycl_simple_argmax<DataType, RowMajor, int64_t>(sycl_device);
  204. test_sycl_simple_argmax<DataType, ColMajor, int64_t>(sycl_device);
  205. test_sycl_argmax_dim<DataType, ColMajor, int64_t>(sycl_device);
  206. test_sycl_argmax_dim<DataType, RowMajor, int64_t>(sycl_device);
  207. test_sycl_argmin_dim<DataType, ColMajor, int64_t>(sycl_device);
  208. test_sycl_argmin_dim<DataType, RowMajor, int64_t>(sycl_device);
  209. }
  210. EIGEN_DECLARE_TEST(cxx11_tensor_argmax_sycl) {
  211. for (const auto& device : Eigen::get_sycl_supported_devices()) {
  212. CALL_SUBTEST(sycl_argmax_test_per_device<float>(device));
  213. }
  214. }