PageRenderTime 52ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/src/DeviceMatrixWrapper.cpp

https://github.com/mertdikmen/ViVid
C++ | 439 lines | 297 code | 82 blank | 60 comment | 30 complexity | 9da053eb2e280f487355da20098a4b19 MD5 | raw file
  1. #include "DeviceMatrixWrapper.hpp"
  2. #define PY_ARRAY_UNIQUE_SYMBOL tb
  3. #define NO_IMPORT
  4. #include <numpy/arrayobject.h>
  5. #include <iostream>
  6. #include <cuda_runtime.h>
  7. using namespace boost::python;
  8. //#include <cutil.h>
  9. // Cribbed (and modified) from cutil.h (can't seem to include the
  10. // whole thing)
  11. # define CUDA_SAFE_CALL_NO_SYNC( call) do { \
  12. cudaError_t err = call; \
  13. if( cudaSuccess != err) { \
  14. fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
  15. __FILE__, __LINE__, cudaGetErrorString( err) ); \
  16. exit(EXIT_FAILURE); \
  17. } } while (0)
  18. DeviceMatrix::Ptr makeDeviceMatrix(boost::python::object& array)
  19. {
  20. // If we already have a DeviceMatrix, just return it. This sholuld
  21. // help unify code paths.
  22. extract<DeviceMatrix::Ptr> get_matrix(array);
  23. if (get_matrix.check()) {
  24. return get_matrix();
  25. }
  26. NumPyMatrix arr(array);
  27. DeviceMatrix::Ptr retval = makeDeviceMatrix(arr.height(), arr.width());
  28. DeviceMatrix_copyToDevice(*retval, arr);
  29. return retval;
  30. }
  31. boost::python::object DeviceMatrix_copyFromDevicePy(const DeviceMatrix& self)
  32. {
  33. NumPyMatrix retval(self.height, self.width);
  34. //printf("reading %p (%i x %i)\n", self.data, self.width, self.height);
  35. if ((self.width > 0) && (self.height > 0)) {
  36. const size_t widthInBytes = self.width * sizeof(float);
  37. CUDA_SAFE_CALL_NO_SYNC
  38. (cudaMemcpy2D(retval.data(), widthInBytes,
  39. self.data, self.pitch * sizeof(float),
  40. widthInBytes, self.height,
  41. cudaMemcpyDeviceToHost));
  42. }
  43. return retval.array;
  44. }
  45. void DeviceMatrix_copyToDevice(DeviceMatrix& self,
  46. const NumPyMatrix& matrix)
  47. {
  48. assert(self.width == matrix.width());
  49. assert(self.height == matrix.height());
  50. if ((self.width > 0) && (self.height > 0)) {
  51. const size_t widthInBytes = self.width * sizeof(float);
  52. CUDA_SAFE_CALL_NO_SYNC
  53. (cudaMemcpy2D(self.data, self.pitch * sizeof(float),
  54. matrix.data(), widthInBytes,
  55. widthInBytes, self.height,
  56. cudaMemcpyHostToDevice));
  57. }
  58. }
  59. DeviceMatrixCL::Ptr makeDeviceMatrixCL(boost::python::object& array)
  60. {
  61. // If we already have a DeviceMatrix, just return it. This sholuld
  62. // help unify code paths.
  63. extract<DeviceMatrixCL::Ptr> get_matrix(array);
  64. if (get_matrix.check()) {
  65. return get_matrix();
  66. }
  67. NumPyMatrix arr(array);
  68. DeviceMatrixCL::Ptr retval = makeDeviceMatrixCL(arr.height(), arr.width());
  69. DeviceMatrixCL_copyToDevice(*retval, arr);
  70. return retval;
  71. }
  72. boost::python::object DeviceMatrixCL_copyFromDevicePy(const DeviceMatrixCL& self)
  73. {
  74. NumPyMatrix retval(self.height, self.width);
  75. DeviceMatrixCL_copyFromDevice(self, retval.data());
  76. return retval.array;
  77. }
  78. void DeviceMatrixCL_copyToDevice(DeviceMatrixCL& self,
  79. const NumPyMatrix& matrix)
  80. {
  81. assert(self.width == matrix.width());
  82. assert(self.height == matrix.height());
  83. DeviceMatrixCL_copyToDevice(self, matrix.data());
  84. }
  85. ///////////////////////////
  86. DeviceMatrix3D::Ptr makeDeviceMatrix3D(const boost::python::object& array)
  87. {
  88. PyObject* contig
  89. = PyArray_FromAny(array.ptr(), PyArray_DescrFromType(PyArray_FLOAT),
  90. 3, 3, NPY_CARRAY, NULL);
  91. handle<> temp(contig);
  92. object arr(temp);
  93. DeviceMatrix3D::Ptr retval = makeDeviceMatrix3D(PyArray_DIM(arr.ptr(), 0),
  94. PyArray_DIM(arr.ptr(), 1),
  95. PyArray_DIM(arr.ptr(), 2));
  96. DeviceMatrix3D_copyToDevicePy(*retval, arr);
  97. return retval;
  98. }
  99. #if 0
  100. // It would seem that cudaMemcpy3D would be the ideal function for the
  101. // job, but it fails mysteriously if x*y > 2**18
  102. boost::python::object DeviceMatrix3D_copyFromDevice(const DeviceMatrix3D& self)
  103. {
  104. npy_intp dims[3] = {self.dim_t, self.dim_y, self.dim_x};
  105. PyObject* arr = PyArray_New(&PyArray_Type, 3, dims, PyArray_FLOAT,
  106. NULL, NULL, 0, NPY_ARRAY_C_CONTIGUOUS, NULL);
  107. handle<> temp(arr);
  108. object retval(temp);
  109. if ((self.dim_x > 0) && (self.dim_y > 0) && (self.dim_t > 0)) {
  110. // Largely cribbed from
  111. // http://forums.nvidia.com/lofiversion/index.php?t77910.html
  112. cudaMemcpy3DParms copyParams = {0};
  113. /**
  114. * @todo Redefine DeviceMatrix3D to be closer to the form that
  115. * the library wants.
  116. */
  117. copyParams.srcPtr // Device
  118. = make_cudaPitchedPtr((void*)self.data,
  119. self.pitch_y * sizeof(float),
  120. self.dim_x,
  121. self.pitch_t / self.pitch_y);
  122. copyParams.dstPtr // Host
  123. = make_cudaPitchedPtr(PyArray_DATA(retval.ptr()),
  124. self.dim_x * sizeof(float),
  125. self.dim_x,
  126. self.dim_y);
  127. copyParams.kind = cudaMemcpyDeviceToHost;
  128. copyParams.extent
  129. = make_cudaExtent(self.dim_x*sizeof(float), self.dim_y, self.dim_t);
  130. CUDA_SAFE_CALL_NO_SYNC(cudaMemcpy3D(&copyParams));
  131. }
  132. return retval;
  133. }
  134. #else
  135. // Hack around problem with cudaMemcpy3D by using cudaMemcpy2D
  136. boost::python::object DeviceMatrix3D_copyFromDevicePy(const DeviceMatrix3D& self)
  137. {
  138. npy_intp dims[3] = {self.dim_t, self.dim_y, self.dim_x};
  139. PyObject* arr = PyArray_New(&PyArray_Type, 3, dims, PyArray_FLOAT,
  140. NULL, NULL, 0, 0, NULL);
  141. handle<> temp(arr);
  142. object retval(temp);
  143. if ((self.dim_x == 0) || (self.dim_y == 0) || (self.dim_t == 0)) {
  144. // Bail early if there is nothing to copy
  145. return retval;
  146. }
  147. if (self.pitch_t == self.dim_y * self.pitch_y) {
  148. // Shortcut if we're packed in the t direction
  149. const size_t widthInBytes = self.dim_x * sizeof(float);
  150. CUDA_SAFE_CALL_NO_SYNC
  151. (cudaMemcpy2D(PyArray_DATA(retval.ptr()), widthInBytes,
  152. self.data, self.pitch_y * sizeof(float),
  153. widthInBytes, self.dim_y * self.dim_t,
  154. cudaMemcpyDeviceToHost));
  155. return retval;
  156. }
  157. // Do a series of copies to fill in the 3D array
  158. for (size_t t=0; t < self.dim_t; t++) {
  159. const size_t widthInBytes = self.dim_x * sizeof(float);
  160. float* host_start = (float*)PyArray_DATA(retval.ptr())
  161. + t * self.dim_y * self.dim_x;
  162. float* device_start = self.data + t * self.pitch_t;
  163. CUDA_SAFE_CALL_NO_SYNC
  164. (cudaMemcpy2D(host_start, widthInBytes,
  165. device_start, self.pitch_y * sizeof(float),
  166. widthInBytes, self.dim_y,
  167. cudaMemcpyDeviceToHost));
  168. }
  169. return retval;
  170. }
  171. #endif
  172. void DeviceMatrix3D_copyToDevicePy(DeviceMatrix3D& self,
  173. const object& array)
  174. {
  175. PyObject* contig
  176. = PyArray_FromAny(array.ptr(), PyArray_DescrFromType(PyArray_FLOAT),
  177. 3, 3, NPY_CARRAY, NULL);
  178. handle<> temp(contig);
  179. object arr(temp);
  180. // Make sure that we are packed in the t direction
  181. assert(self.pitch_t == self.dim_y * self.pitch_y);
  182. if ((self.dim_x > 0) && (self.dim_y > 0) && (self.dim_t > 0)) {
  183. const size_t widthInBytes = self.dim_x * sizeof(float);
  184. CUDA_SAFE_CALL_NO_SYNC
  185. (cudaMemcpy2D(self.data, self.pitch_y * sizeof(float),
  186. PyArray_DATA(arr.ptr()), widthInBytes,
  187. widthInBytes, self.dim_y * self.dim_t,
  188. cudaMemcpyHostToDevice));
  189. }
  190. }
  191. /**
  192. OPENCL 3d
  193. **/
  194. DeviceMatrixCL3D::Ptr makeDeviceMatrixCL3D(const boost::python::object& array)
  195. {
  196. PyObject* contig
  197. = PyArray_FromAny(array.ptr(), PyArray_DescrFromType(PyArray_FLOAT),
  198. 3, 3, NPY_CARRAY, NULL);
  199. handle<> temp(contig);
  200. object arr(temp);
  201. DeviceMatrixCL3D::Ptr retval = makeDeviceMatrixCL3D(PyArray_DIM(arr.ptr(), 0),
  202. PyArray_DIM(arr.ptr(), 1),
  203. PyArray_DIM(arr.ptr(), 2));
  204. DeviceMatrixCL3D_copyToDevice(*retval, arr);
  205. return retval;
  206. }
  207. boost::python::object DeviceMatrixCL3D_copyFromDevicePy(const DeviceMatrixCL3D& self)
  208. {
  209. npy_intp dims[3] = {self.dim_t, self.dim_y, self.dim_x};
  210. PyObject* arr = PyArray_New(&PyArray_Type, 3, dims, PyArray_FLOAT,
  211. NULL, NULL, 0, NULL, NULL);
  212. handle<> temp(arr);
  213. object retval(temp);
  214. DeviceMatrixCL3D_copyFromDevice(self, (float*)PyArray_DATA(retval.ptr()));
  215. return retval;
  216. }
  217. void DeviceMatrixCL3D_copyToDevice(DeviceMatrixCL3D& self,
  218. const object& array)
  219. {
  220. PyObject* contig
  221. = PyArray_FromAny(array.ptr(), PyArray_DescrFromType(PyArray_FLOAT),
  222. 3, 3, NPY_CARRAY, NULL);
  223. handle<> temp(contig);
  224. object arr(temp);
  225. // Make sure that we are packed in the t direction
  226. assert(self.pitch_t == self.dim_y * self.pitch_y);
  227. DeviceMatrixCL3D_copyToDevice(self, (float*) PyArray_DATA(arr.ptr()));
  228. }
  229. /**
  230. MCUDAMATRIX3d
  231. **/
  232. MCudaMatrix3D::Ptr makeMCudaMatrix3D(const object& array)
  233. {
  234. PyObject* contig
  235. = PyArray_FromAny(array.ptr(), PyArray_DescrFromType(PyArray_FLOAT),
  236. 3, 3, NPY_CARRAY, NULL);
  237. handle<> temp(contig);
  238. object arr(temp);
  239. MCudaMatrix3D::Ptr retval = makeMCudaMatrix3D(PyArray_DIM(arr.ptr(), 0),
  240. PyArray_DIM(arr.ptr(), 1),
  241. PyArray_DIM(arr.ptr(), 2));
  242. memcpy(retval->data, PyArray_DATA(arr.ptr()),
  243. retval->dim_t * retval->dim_y * retval->dim_x * sizeof(float));
  244. return retval;
  245. }
  246. boost::python::object MCudaMatrix3D_copyFromDevice(const MCudaMatrix3D& self)
  247. {
  248. npy_intp dims[3] = {self.dim_t, self.dim_y, self.dim_x};
  249. PyObject* arr = PyArray_New(&PyArray_Type, 3, dims, PyArray_FLOAT,
  250. NULL, NULL, 0, 0, NULL);
  251. handle<> temp(arr);
  252. object retval(temp);
  253. /**
  254. * @todo Avoid the copy
  255. */
  256. memcpy(PyArray_DATA(retval.ptr()), self.data,
  257. self.dim_t * self.dim_y * self.dim_x * sizeof(float));
  258. return retval;
  259. }
  260. /**
  261. MCLMATRIX3d
  262. **/
  263. MCLMatrix3D::Ptr makeMCLMatrix3D(const object& array)
  264. {
  265. PyObject* contig
  266. = PyArray_FromAny(array.ptr(), PyArray_DescrFromType(PyArray_FLOAT),
  267. 3, 3, NPY_CARRAY, NULL);
  268. handle<> temp(contig);
  269. object arr(temp);
  270. MCLMatrix3D::Ptr retval = makeMCLMatrix3D(PyArray_DIM(arr.ptr(), 0),
  271. PyArray_DIM(arr.ptr(), 1),
  272. PyArray_DIM(arr.ptr(), 2));
  273. /// memcpy(retval->data, PyArray_DATA(arr.ptr()),
  274. // retval->dim_t * retval->dim_y * retval->dim_x * sizeof(float));
  275. return retval;
  276. }
  277. boost::python::object MCLMatrix3D_copyFromDevice(const MCLMatrix3D& self)
  278. {
  279. npy_intp dims[3] = {self.dim_t, self.dim_y, self.dim_x};
  280. PyObject* arr = PyArray_New(&PyArray_Type, 3, dims, PyArray_FLOAT,
  281. NULL, NULL, 0, 0, NULL);
  282. handle<> temp(arr);
  283. object retval(temp);
  284. /**
  285. * @todo Avoid the copy
  286. */
  287. //memcpy(PyArray_DATA(retval.ptr()), self.data,
  288. // self.dim_t * self.dim_y * self.dim_x * sizeof(float));
  289. return retval;
  290. }
  291. #include <boost/python/suite/indexing/vector_indexing_suite.hpp>
  292. void export_DeviceMatrix()
  293. {
  294. class_<DeviceMatrix, DeviceMatrix::Ptr >
  295. ("DeviceMatrix", no_init)
  296. .def("__init__",
  297. make_constructor<DeviceMatrix::Ptr (object&)>
  298. (makeDeviceMatrix))
  299. .def("mat", DeviceMatrix_copyFromDevicePy);
  300. class_<DeviceMatrixCL, DeviceMatrixCL::Ptr >
  301. ("DeviceMatrixCL", no_init)
  302. .def("__init__",
  303. make_constructor<DeviceMatrixCL::Ptr (object&)>
  304. (makeDeviceMatrixCL))
  305. .def("mat", DeviceMatrixCL_copyFromDevicePy);
  306. class_<DeviceMatrix3D, DeviceMatrix3D::Ptr >
  307. ("DeviceMatrix3D", no_init)
  308. .def("__init__",
  309. make_constructor<DeviceMatrix3D::Ptr (const object&)>
  310. (makeDeviceMatrix3D))
  311. .def("mat", DeviceMatrix3D_copyFromDevicePy)
  312. .def("set", DeviceMatrix3D_copyToDevicePy)
  313. .def("crop", cropDeviceMatrix3D)
  314. ;
  315. def("_makeDeviceMatrix3DPacked", makeDeviceMatrix3DPacked);
  316. class_<DeviceMatrixCL3D, DeviceMatrixCL3D::Ptr >
  317. ("DeviceMatrixCL3D", no_init)
  318. .def("__init__",
  319. make_constructor<DeviceMatrixCL3D::Ptr (const object&)>
  320. (makeDeviceMatrixCL3D))
  321. .def("mat", DeviceMatrixCL3D_copyFromDevicePy)
  322. //.def("set", DeviceMatrixCL3D_copyToDevice)
  323. .def("crop", cropDeviceMatrixCL3D)
  324. ;
  325. def("_makeDeviceMatrixCL3DPacked", makeDeviceMatrix3DPacked);
  326. // Don't tell python about the subclass relationship -- we should
  327. // try to keep this as distinct from DeviceMatrix3D as possible
  328. class_<MCudaMatrix3D, MCudaMatrix3D::Ptr >
  329. ("MCudaMatrix3D", no_init)
  330. .def("__init__",
  331. make_constructor<MCudaMatrix3D::Ptr (const object&)>
  332. (makeMCudaMatrix3D))
  333. .def("mat", MCudaMatrix3D_copyFromDevice)
  334. ;
  335. class_<MCLMatrix3D, MCLMatrix3D::Ptr >
  336. ("MCLMatrix3D", no_init)
  337. .def("__init__",
  338. make_constructor<MCLMatrix3D::Ptr (const object&)>
  339. (makeMCLMatrix3D))
  340. .def("mat", MCLMatrix3D_copyFromDevice)
  341. ;
  342. //class_<DeviceMatrix::PtrList >("DeviceMatrix3DList", no_init)
  343. // .def(vector_indexing_suite<DeviceMatrix::PtrList, true>());
  344. //class_<DeviceMatrix::PtrList >("DeviceMCuda3DList", no_init)
  345. // .def(vector_indexing_suite<DeviceMatrix::PtrList, true>());
  346. //class_<DeviceMatrixCL::PtrList >("DeviceMCL3DList", no_init)
  347. // .def(vector_indexing_suite<DeviceMatrixCL::PtrList, true>());
  348. }