PageRenderTime 51ms CodeModel.GetById 18ms RepoModel.GetById 1ms app.codeStats 0ms

/mpich2-1.4.1p1/src/mpid/common/datatype/dataloop/dataloop_create_indexed.c

#
C | 451 lines | 317 code | 52 blank | 82 comment | 61 complexity | 332d527e94385c4965daf6b33ad16cea MD5 | raw file
  1. /* -*- Mode: C; c-basic-offset:4 ; -*- */
  2. /*
  3. * (C) 2001 by Argonne National Laboratory.
  4. * See COPYRIGHT in top-level directory.
  5. */
  6. #include <stdlib.h>
  7. #include "./dataloop.h"
  8. static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
  9. DLOOP_Count contig_count,
  10. int *input_blocklength_array,
  11. void *input_displacement_array,
  12. DLOOP_Count *output_blocklength_array,
  13. DLOOP_Offset *out_disp_array,
  14. int dispinbytes,
  15. DLOOP_Offset old_extent);
  16. /*@
  17. DLOOP_Dataloop_create_indexed
  18. Arguments:
  19. + int icount
  20. . int *iblocklength_array
  21. . void *displacement_array (either ints or MPI_Aints)
  22. . int dispinbytes
  23. . MPI_Datatype oldtype
  24. . DLOOP_Dataloop **dlp_p
  25. . int *dlsz_p
  26. . int *dldepth_p
  27. - int flag
  28. .N Errors
  29. .N Returns 0 on success, -1 on error.
  30. @*/
  31. int PREPEND_PREFIX(Dataloop_create_indexed)(int icount,
  32. int *blocklength_array,
  33. void *displacement_array,
  34. int dispinbytes,
  35. MPI_Datatype oldtype,
  36. DLOOP_Dataloop **dlp_p,
  37. int *dlsz_p,
  38. int *dldepth_p,
  39. int flag)
  40. {
  41. int err, is_builtin;
  42. int i, new_loop_sz, old_loop_depth, blksz;
  43. DLOOP_Count first;
  44. DLOOP_Count old_type_count = 0, contig_count, count;
  45. DLOOP_Offset old_extent;
  46. struct DLOOP_Dataloop *new_dlp;
  47. count = (DLOOP_Count) icount; /* avoid subsequent casting */
  48. /* if count is zero, handle with contig code, call it an int */
  49. if (count == 0)
  50. {
  51. err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
  52. MPI_INT,
  53. dlp_p,
  54. dlsz_p,
  55. dldepth_p,
  56. flag);
  57. return err;
  58. }
  59. /* Skip any initial zero-length blocks */
  60. for (first = 0; first < count; first++)
  61. if ((DLOOP_Count) blocklength_array[first])
  62. break;
  63. is_builtin = (DLOOP_Handle_hasloop_macro(oldtype)) ? 0 : 1;
  64. if (is_builtin)
  65. {
  66. DLOOP_Handle_get_extent_macro(oldtype, old_extent);
  67. old_loop_depth = 0;
  68. }
  69. else
  70. {
  71. DLOOP_Handle_get_extent_macro(oldtype, old_extent);
  72. DLOOP_Handle_get_loopdepth_macro(oldtype, old_loop_depth, flag);
  73. }
  74. for (i=first; i < count; i++)
  75. {
  76. old_type_count += (DLOOP_Count) blocklength_array[i];
  77. }
  78. contig_count = PREPEND_PREFIX(Type_indexed_count_contig)(count,
  79. blocklength_array,
  80. displacement_array,
  81. dispinbytes,
  82. old_extent);
  83. /* if contig_count is zero (no data), handle with contig code */
  84. if (contig_count == 0)
  85. {
  86. err = PREPEND_PREFIX(Dataloop_create_contiguous)(0,
  87. MPI_INT,
  88. dlp_p,
  89. dlsz_p,
  90. dldepth_p,
  91. flag);
  92. return err;
  93. }
  94. /* optimization:
  95. *
  96. * if contig_count == 1 and block starts at displacement 0,
  97. * store it as a contiguous rather than an indexed dataloop.
  98. */
  99. if ((contig_count == 1) &&
  100. ((!dispinbytes && ((int *) displacement_array)[first] == 0) ||
  101. (dispinbytes && ((MPI_Aint *) displacement_array)[first] == 0)))
  102. {
  103. err = PREPEND_PREFIX(Dataloop_create_contiguous)((int) old_type_count,
  104. oldtype,
  105. dlp_p,
  106. dlsz_p,
  107. dldepth_p,
  108. flag);
  109. return err;
  110. }
  111. /* optimization:
  112. *
  113. * if contig_count == 1 (and displacement != 0), store this as
  114. * a single element blockindexed rather than a lot of individual
  115. * blocks.
  116. */
  117. if (contig_count == 1)
  118. {
  119. err = PREPEND_PREFIX(Dataloop_create_blockindexed)(1,
  120. (int) old_type_count,
  121. &(((int *)displacement_array)[first]),
  122. dispinbytes,
  123. oldtype,
  124. dlp_p,
  125. dlsz_p,
  126. dldepth_p,
  127. flag);
  128. return err;
  129. }
  130. /* optimization:
  131. *
  132. * if block length is the same for all blocks, store it as a
  133. * blockindexed rather than an indexed dataloop.
  134. */
  135. blksz = blocklength_array[first];
  136. for (i = first+1; i < count; i++)
  137. {
  138. if (blocklength_array[i] != blksz)
  139. {
  140. blksz--;
  141. break;
  142. }
  143. }
  144. if (blksz == blocklength_array[first])
  145. {
  146. err = PREPEND_PREFIX(Dataloop_create_blockindexed)(icount-first,
  147. blksz,
  148. &(((int *)displacement_array)[first]),
  149. dispinbytes,
  150. oldtype,
  151. dlp_p,
  152. dlsz_p,
  153. dldepth_p,
  154. flag);
  155. return err;
  156. }
  157. /* note: blockindexed looks for the vector optimization */
  158. /* TODO: optimization:
  159. *
  160. * if an indexed of a contig, absorb the contig into the blocklen array
  161. * and keep the same overall depth
  162. */
  163. /* otherwise storing as an indexed dataloop */
  164. if (is_builtin)
  165. {
  166. PREPEND_PREFIX(Dataloop_alloc)(DLOOP_KIND_INDEXED,
  167. count,
  168. &new_dlp,
  169. &new_loop_sz);
  170. /* --BEGIN ERROR HANDLING-- */
  171. if (!new_dlp) return -1;
  172. /* --END ERROR HANDLING-- */
  173. new_dlp->kind = DLOOP_KIND_INDEXED | DLOOP_FINAL_MASK;
  174. if (flag == DLOOP_DATALOOP_ALL_BYTES)
  175. {
  176. /* blocklengths are modified below */
  177. new_dlp->el_size = 1;
  178. new_dlp->el_extent = 1;
  179. new_dlp->el_type = MPI_BYTE;
  180. }
  181. else
  182. {
  183. new_dlp->el_size = old_extent;
  184. new_dlp->el_extent = old_extent;
  185. new_dlp->el_type = oldtype;
  186. }
  187. }
  188. else
  189. {
  190. DLOOP_Dataloop *old_loop_ptr = NULL;
  191. int old_loop_sz = 0;
  192. DLOOP_Handle_get_loopptr_macro(oldtype, old_loop_ptr, flag);
  193. DLOOP_Handle_get_loopsize_macro(oldtype, old_loop_sz, flag);
  194. PREPEND_PREFIX(Dataloop_alloc_and_copy)(DLOOP_KIND_INDEXED,
  195. contig_count,
  196. old_loop_ptr,
  197. old_loop_sz,
  198. &new_dlp,
  199. &new_loop_sz);
  200. /* --BEGIN ERROR HANDLING-- */
  201. if (!new_dlp) return -1;
  202. /* --END ERROR HANDLING-- */
  203. new_dlp->kind = DLOOP_KIND_INDEXED;
  204. DLOOP_Handle_get_size_macro(oldtype, new_dlp->el_size);
  205. DLOOP_Handle_get_extent_macro(oldtype, new_dlp->el_extent);
  206. DLOOP_Handle_get_basic_type_macro(oldtype, new_dlp->el_type);
  207. }
  208. new_dlp->loop_params.i_t.count = contig_count;
  209. new_dlp->loop_params.i_t.total_blocks = old_type_count;
  210. /* copy in blocklength and displacement parameters (in that order)
  211. *
  212. * regardless of dispinbytes, we store displacements in bytes in loop.
  213. */
  214. DLOOP_Type_indexed_array_copy(count,
  215. contig_count,
  216. blocklength_array,
  217. displacement_array,
  218. new_dlp->loop_params.i_t.blocksize_array,
  219. new_dlp->loop_params.i_t.offset_array,
  220. dispinbytes,
  221. old_extent);
  222. if (is_builtin && (flag == DLOOP_DATALOOP_ALL_BYTES))
  223. {
  224. DLOOP_Count *tmp_blklen_array =
  225. new_dlp->loop_params.i_t.blocksize_array;
  226. for (i=0; i < contig_count; i++)
  227. {
  228. /* increase block lengths so they are in bytes */
  229. tmp_blklen_array[i] *= old_extent;
  230. }
  231. new_dlp->loop_params.i_t.total_blocks *= old_extent;
  232. }
  233. *dlp_p = new_dlp;
  234. *dlsz_p = new_loop_sz;
  235. *dldepth_p = old_loop_depth + 1;
  236. return MPI_SUCCESS;
  237. }
  238. /* DLOOP_Type_indexed_array_copy()
  239. *
  240. * Copies arrays into place, combining adjacent contiguous regions and
  241. * dropping zero-length regions.
  242. *
  243. * Extent passed in is for the original type.
  244. *
  245. * Output displacements are always output in bytes, while block
  246. * lengths are always output in terms of the base type.
  247. */
  248. static void DLOOP_Type_indexed_array_copy(DLOOP_Count count,
  249. DLOOP_Count contig_count,
  250. int *in_blklen_array,
  251. void *in_disp_array,
  252. DLOOP_Count *out_blklen_array,
  253. DLOOP_Offset *out_disp_array,
  254. int dispinbytes,
  255. DLOOP_Offset old_extent)
  256. {
  257. DLOOP_Count i, first, cur_idx = 0;
  258. /* Skip any initial zero-length blocks */
  259. for (first = 0; first < count; ++first)
  260. if ((DLOOP_Count) in_blklen_array[first])
  261. break;
  262. out_blklen_array[0] = (DLOOP_Count) in_blklen_array[first];
  263. if (!dispinbytes)
  264. {
  265. out_disp_array[0] = (DLOOP_Offset)
  266. ((int *) in_disp_array)[first] * old_extent;
  267. for (i = first+1; i < count; ++i)
  268. {
  269. if (in_blklen_array[i] == 0)
  270. {
  271. continue;
  272. }
  273. else if (out_disp_array[cur_idx] +
  274. ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
  275. ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent)
  276. {
  277. /* adjacent to current block; add to block */
  278. out_blklen_array[cur_idx] += (DLOOP_Count) in_blklen_array[i];
  279. }
  280. else
  281. {
  282. cur_idx++;
  283. DLOOP_Assert(cur_idx < contig_count);
  284. out_disp_array[cur_idx] =
  285. ((DLOOP_Offset) ((int *) in_disp_array)[i]) * old_extent;
  286. out_blklen_array[cur_idx] = in_blklen_array[i];
  287. }
  288. }
  289. }
  290. else /* input displacements already in bytes */
  291. {
  292. out_disp_array[0] = (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[first];
  293. for (i = first+1; i < count; ++i)
  294. {
  295. if (in_blklen_array[i] == 0)
  296. {
  297. continue;
  298. }
  299. else if (out_disp_array[cur_idx] +
  300. ((DLOOP_Offset) out_blklen_array[cur_idx]) * old_extent ==
  301. ((DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i]))
  302. {
  303. /* adjacent to current block; add to block */
  304. out_blklen_array[cur_idx] += in_blklen_array[i];
  305. }
  306. else
  307. {
  308. cur_idx++;
  309. DLOOP_Assert(cur_idx < contig_count);
  310. out_disp_array[cur_idx] =
  311. (DLOOP_Offset) ((MPI_Aint *) in_disp_array)[i];
  312. out_blklen_array[cur_idx] = (DLOOP_Count) in_blklen_array[i];
  313. }
  314. }
  315. }
  316. DLOOP_Assert(cur_idx == contig_count - 1);
  317. return;
  318. }
  319. /* DLOOP_Type_indexed_count_contig()
  320. *
  321. * Determines the actual number of contiguous blocks represented by the
  322. * blocklength/displacement arrays. This might be less than count (as
  323. * few as 1).
  324. *
  325. * Extent passed in is for the original type.
  326. */
  327. DLOOP_Count PREPEND_PREFIX(Type_indexed_count_contig)(DLOOP_Count count,
  328. int *blocklength_array,
  329. void *displacement_array,
  330. int dispinbytes,
  331. DLOOP_Offset old_extent)
  332. {
  333. DLOOP_Count i, contig_count = 1;
  334. DLOOP_Count cur_blklen, first;
  335. if (count)
  336. {
  337. /* Skip any initial zero-length blocks */
  338. for (first = 0; first < count; ++first)
  339. if ((DLOOP_Count) blocklength_array[first])
  340. break;
  341. if (first == count) { /* avoid invalid reads later on */
  342. contig_count = 0;
  343. return contig_count;
  344. }
  345. cur_blklen = (DLOOP_Count) blocklength_array[first];
  346. if (!dispinbytes)
  347. {
  348. DLOOP_Offset cur_tdisp =
  349. (DLOOP_Offset) ((int *) displacement_array)[first];
  350. for (i = first+1; i < count; ++i)
  351. {
  352. if (blocklength_array[i] == 0)
  353. {
  354. continue;
  355. }
  356. else if (cur_tdisp + (DLOOP_Offset) cur_blklen ==
  357. (DLOOP_Offset) ((int *) displacement_array)[i])
  358. {
  359. /* adjacent to current block; add to block */
  360. cur_blklen += (DLOOP_Count) blocklength_array[i];
  361. }
  362. else
  363. {
  364. cur_tdisp = (DLOOP_Offset) ((int *) displacement_array)[i];
  365. cur_blklen = (DLOOP_Count) blocklength_array[i];
  366. contig_count++;
  367. }
  368. }
  369. }
  370. else
  371. {
  372. DLOOP_Offset cur_bdisp =
  373. (DLOOP_Offset) ((MPI_Aint *) displacement_array)[first];
  374. for (i = first+1; i < count; ++i)
  375. {
  376. if (blocklength_array[i] == 0)
  377. {
  378. continue;
  379. }
  380. else if (cur_bdisp + (DLOOP_Offset) cur_blklen * old_extent ==
  381. (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i])
  382. {
  383. /* adjacent to current block; add to block */
  384. cur_blklen += (DLOOP_Count) blocklength_array[i];
  385. }
  386. else
  387. {
  388. cur_bdisp =
  389. (DLOOP_Offset) ((MPI_Aint *) displacement_array)[i];
  390. cur_blklen = (DLOOP_Count) blocklength_array[i];
  391. contig_count++;
  392. }
  393. }
  394. }
  395. }
  396. return contig_count;
  397. }