PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/wrfv2_fire/external/RSL_LITE/c_code.c

http://github.com/jbeezley/wrf-fire
C | 703 lines | 639 code | 52 blank | 12 comment | 197 complexity | 2dbd2545ef685af3bde502738977f905 MD5 | raw file
Possible License(s): AGPL-1.0
  1. #ifndef MS_SUA_
  2. # include <stdio.h>
  3. #endif
  4. #include <fcntl.h>
  5. #ifndef O_CREAT
  6. # define O_CREAT _O_CREAT
  7. #endif
  8. #ifndef O_WRONLY
  9. # define O_WRONLY _O_WRONLY
  10. #endif
  11. #ifdef _WIN32
  12. #include <Winsock2.h>
  13. #endif
  14. #define STANDARD_ERROR 2
  15. #define STANDARD_OUTPUT 1
  16. #ifndef STUBMPI
  17. # include "mpi.h"
  18. #endif
  19. #include "rsl_lite.h"
  20. #define F_PACK
  21. RSL_LITE_ERROR_DUP1 ( int *me )
  22. {
  23. int newfd,rc ;
  24. char filename[256] ;
  25. char dirname[256] ;
  26. char hostname[256] ;
  27. /* redirect standard out and standard error based on compile options*/
  28. #ifndef NCEP_DEBUG_MULTIDIR
  29. gethostname( hostname, 256 ) ;
  30. /* redirect standard out*/
  31. sprintf(filename,"rsl.out.%04d",*me) ;
  32. if ((newfd = open( filename, O_CREAT | O_WRONLY, 0666 )) < 0 )
  33. {
  34. perror("error_dup: cannot open rsl.out.nnnn") ;
  35. fprintf(stderr,"...sending output to standard output and continuing.\n") ;
  36. return ;
  37. }
  38. if( dup2( newfd, STANDARD_OUTPUT ) < 0 )
  39. {
  40. perror("error_dup: dup2 fails to change output descriptor") ;
  41. fprintf(stderr,"...sending output to standard output and continuing.\n") ;
  42. close(newfd) ;
  43. return ;
  44. }
  45. /* redirect standard error */
  46. # if defined( _WIN32 )
  47. if ( *me != 0 ) { /* stderr from task 0 should come to screen on windows because it is buffered if redirected */
  48. #endif
  49. sprintf(filename,"rsl.error.%04d",*me) ;
  50. if ((newfd = open( filename, O_CREAT | O_WRONLY, 0666 )) < 0 )
  51. {
  52. perror("error_dup: cannot open rsl.error.log") ;
  53. fprintf(stderr,"...sending error to standard error and continuing.\n") ;
  54. return ;
  55. }
  56. if( dup2( newfd, STANDARD_ERROR ) < 0 )
  57. {
  58. perror("error_dup: dup2 fails to change error descriptor") ;
  59. fprintf(stderr,"...sending error to standard error and continuing.\n") ;
  60. close(newfd) ;
  61. return ;
  62. }
  63. fprintf( stdout, "taskid: %d hostname: %s\n",*me,hostname) ;
  64. fprintf( stderr, "taskid: %d hostname: %s\n",*me,hostname) ;
  65. # if defined( _WIN32 )
  66. }
  67. # endif
  68. #else
  69. # ifndef NCEP_DEBUG_GLOBALSTDOUT
  70. /*create TASKOUTPUT directory to contain separate task owned output directories*/
  71. /* let task 0 create the subdirectory path for the task directories */
  72. if (*me == 0)
  73. {
  74. sprintf(dirname, "%s","TASKOUTPUT");
  75. rc = mkdir(dirname, 0777);
  76. if ( rc != 0 && errno==EEXIST) rc=0;
  77. }
  78. /* If TASKOUTPUT directory is not created then return */
  79. MPI_Bcast(&rc, 1, MPI_INTEGER, 0, MPI_COMM_WORLD);
  80. if (rc != 0 ) {
  81. if (*me == 0 ) {
  82. perror("mkdir error");
  83. fprintf(stderr, "mkdir failed for directory %s on task %d. Sending error/output to stderr/stdout for all tasks and continuing.\n", dirname, *me);
  84. return;
  85. }
  86. else {
  87. return;
  88. }
  89. }
  90. /* TASKOUTPUT directory exists, continue with task specific directory */
  91. sprintf(dirname, "TASKOUTPUT/%04d", *me);
  92. rc=mkdir(dirname, 0777);
  93. if ( rc !=0 && errno!=EEXIST ) {
  94. perror("mkdir error");
  95. fprintf(stderr, "mkdir failed for directory %s on task %d. Sending error/output to stderr/stdout and continuing.\n", dirname, *me);
  96. return;
  97. }
  98. /* Each tasks creates/opens its own output and error files */
  99. sprintf(filename, "%s/%04d/rsl.out.%04d","TASKOUTPUT",*me,*me) ;
  100. if ((newfd = open( filename, O_CREAT | O_WRONLY, 0666 )) < 0 )
  101. {
  102. perror("error_dup: cannot open ./TASKOUTPUT/nnnn/rsl.out.nnnn") ;
  103. fprintf(stderr,"...sending output to standard output and continuing.\n")
  104. ;
  105. return ;
  106. }
  107. if( dup2( newfd, STANDARD_OUTPUT ) < 0 )
  108. {
  109. perror("error_dup: dup2 fails to change output descriptor") ;
  110. fprintf(stderr,"...sending output to standard output and continuing.\n");
  111. close(newfd) ;
  112. return ;
  113. }
  114. sprintf(filename, "%s/%04d/rsl.error.%04d","TASKOUTPUT",*me,*me) ;
  115. if ((newfd = open( filename, O_CREAT | O_WRONLY, 0666 )) < 0 )
  116. {
  117. perror("error_dup: cannot open ./TASKOUTPUT/nnnn/rsl.error.nnnn") ;
  118. fprintf(stderr,"...sending error to standard error and continuing.\n") ;
  119. return ;
  120. }
  121. if( dup2( newfd, STANDARD_ERROR ) < 0 )
  122. {
  123. perror("error_dup: dup2 fails to change error descriptor") ;
  124. fprintf(stderr,"...sending error to standard error and continuing.\n") ;
  125. close(newfd) ;
  126. return ;
  127. }
  128. # else
  129. /* Each task writes to global standard error and standard out */
  130. return;
  131. # endif
  132. #endif
  133. }
  134. #ifdef _WIN32
  135. /* Windows doesn't have a gethostid function so add a stub.
  136. TODO: Create a version that will work on Windows. */
  137. int
  138. gethostid ()
  139. {
  140. return 0;
  141. }
  142. #endif
  143. RSL_LITE_GET_HOSTNAME ( char * hn, int * size, int *n, int *hostid )
  144. {
  145. char temp[512] ;
  146. char *p, *q ;
  147. int i, cs ;
  148. if ( gethostname(temp,512) ) return(1) ;
  149. cs = gethostid() ;
  150. for ( p = temp , q = hn , i = 0 ; *p && i < *size && i < 512 ; i++ , p++ , q++ ) { *q = *p ; }
  151. *n = i ;
  152. *hostid = cs ;
  153. return(0) ;
  154. }
  155. BYTE_BCAST ( char * buf, int * size, int * Fcomm )
  156. {
  157. #ifndef STUBMPI
  158. MPI_Comm *comm, dummy_comm ;
  159. comm = &dummy_comm ;
  160. *comm = MPI_Comm_f2c( *Fcomm ) ;
  161. # ifdef crayx1
  162. if (*size % sizeof(int) == 0) {
  163. MPI_Bcast ( buf, *size/sizeof(int), MPI_INT, 0, *comm ) ;
  164. } else {
  165. MPI_Bcast ( buf, *size, MPI_BYTE, 0, *comm ) ;
  166. }
  167. # else
  168. MPI_Bcast ( buf, *size, MPI_BYTE, 0, *comm ) ;
  169. # endif
  170. #endif
  171. }
  172. static int yp_curs, ym_curs, xp_curs, xm_curs ;
  173. static int yp_curs_recv, ym_curs_recv, xp_curs_recv, xm_curs_recv ;
  174. RSL_LITE_INIT_EXCH (
  175. int * Fcomm0,
  176. int * shw0, int * xy0 ,
  177. int *sendbegm0 , int * sendwm0 , int * sendbegp0 , int * sendwp0 ,
  178. int *recvbegm0 , int * recvwm0 , int * recvbegp0 , int * recvwp0 ,
  179. int * n3dR0, int *n2dR0, int * typesizeR0 ,
  180. int * n3dI0, int *n2dI0, int * typesizeI0 ,
  181. int * n3dD0, int *n2dD0, int * typesizeD0 ,
  182. int * n3dL0, int *n2dL0, int * typesizeL0 ,
  183. int * me0, int * np0 , int * np_x0 , int * np_y0 ,
  184. int * ips0 , int * ipe0 , int * jps0 , int * jpe0 , int * kps0 , int * kpe0 )
  185. {
  186. int n3dR, n2dR, typesizeR ;
  187. int n3dI, n2dI, typesizeI ;
  188. int n3dD, n2dD, typesizeD ;
  189. int n3dL, n2dL, typesizeL ;
  190. int shw ;
  191. int sendbegm , sendwm, sendbegp , sendwp ;
  192. int recvbegm , recvwm, recvbegp , recvwp ;
  193. int me, np, np_x, np_y ;
  194. int ips , ipe , jps , jpe , kps , kpe ;
  195. int xy ;
  196. int yp, ym, xp, xm ;
  197. int nbytes ;
  198. int nbytes_x_recv = 0, nbytes_y_recv = 0 ;
  199. #ifndef STUBMPI
  200. MPI_Comm comm, *comm0, dummy_comm ;
  201. comm0 = &dummy_comm ;
  202. *comm0 = MPI_Comm_f2c( *Fcomm0 ) ;
  203. shw = *shw0 ; /* logical half-width of stencil */
  204. xy = *xy0 ; /* 0 = y , 1 = x */
  205. sendbegm = *sendbegm0 ; /* send index of sten copy (edge = 1), lower/left */
  206. sendwm = *sendwm0 ; /* send width of sten copy counting towards edge, lower/left */
  207. sendbegp = *sendbegp0 ; /* send index of sten copy (edge = 1), upper/right */
  208. sendwp = *sendwp0 ; /* send width of sten copy counting towards edge, upper/right */
  209. recvbegm = *recvbegm0 ; /* recv index of sten copy (edge = 1), lower/left */
  210. recvwm = *recvwm0 ; /* recv width of sten copy counting towards edge, lower/left */
  211. recvbegp = *recvbegp0 ; /* recv index of sten copy (edge = 1), upper/right */
  212. recvwp = *recvwp0 ; /* recv width of sten copy counting towards edge, upper/right */
  213. n3dR = *n3dR0 ; n2dR = *n2dR0 ; typesizeR = *typesizeR0 ;
  214. n3dI = *n3dI0 ; n2dI = *n2dI0 ; typesizeI = *typesizeI0 ;
  215. n3dD = *n3dD0 ; n2dD = *n2dD0 ; typesizeD = *typesizeD0 ;
  216. n3dL = *n3dL0 ; n2dL = *n2dL0 ; typesizeL = *typesizeL0 ;
  217. me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ;
  218. ips = *ips0-1 ; ipe = *ipe0-1 ; jps = *jps0-1 ; jpe = *jpe0-1 ; kps = *kps0-1 ; kpe = *kpe0-1 ;
  219. yp_curs_recv = 0 ; ym_curs_recv = 0 ;
  220. xp_curs_recv = 0 ; xm_curs_recv = 0 ;
  221. if ( xy == 0 && np_y > 1 ) {
  222. nbytes = typesizeR*(ipe-ips+1+2*shw)*shw*(n3dR*(kpe-kps+1)+n2dR) +
  223. typesizeI*(ipe-ips+1+2*shw)*shw*(n3dI*(kpe-kps+1)+n2dI) +
  224. typesizeD*(ipe-ips+1+2*shw)*shw*(n3dD*(kpe-kps+1)+n2dD) +
  225. typesizeL*(ipe-ips+1+2*shw)*shw*(n3dL*(kpe-kps+1)+n2dL) ;
  226. nbytes_y_recv =
  227. typesizeR*(ipe-ips+1+2*shw)*shw*(n3dR*(kpe-kps+1)+n2dR) +
  228. typesizeI*(ipe-ips+1+2*shw)*shw*(n3dI*(kpe-kps+1)+n2dI) +
  229. typesizeD*(ipe-ips+1+2*shw)*shw*(n3dD*(kpe-kps+1)+n2dD) +
  230. typesizeL*(ipe-ips+1+2*shw)*shw*(n3dL*(kpe-kps+1)+n2dL) ;
  231. MPI_Cart_shift ( *comm0, 0, 1, &ym, &yp ) ;
  232. if ( yp != MPI_PROC_NULL ) {
  233. buffer_for_proc ( yp , nbytes_y_recv, RSL_RECVBUF ) ;
  234. buffer_for_proc ( yp , nbytes, RSL_SENDBUF ) ;
  235. }
  236. if ( ym != MPI_PROC_NULL ) {
  237. buffer_for_proc ( ym , nbytes_y_recv, RSL_RECVBUF ) ;
  238. buffer_for_proc ( ym , nbytes, RSL_SENDBUF ) ;
  239. }
  240. }
  241. if ( xy == 1 && np_x > 1 ) {
  242. nbytes = typesizeR*(jpe-jps+1+2*shw)*shw*(n3dR*(kpe-kps+1)+n2dR) +
  243. typesizeI*(jpe-jps+1+2*shw)*shw*(n3dI*(kpe-kps+1)+n2dI) +
  244. typesizeD*(jpe-jps+1+2*shw)*shw*(n3dD*(kpe-kps+1)+n2dD) +
  245. typesizeL*(jpe-jps+1+2*shw)*shw*(n3dL*(kpe-kps+1)+n2dL) ;
  246. nbytes_x_recv =
  247. typesizeR*(jpe-jps+1+2*shw)*shw*(n3dR*(kpe-kps+1)+n2dR) +
  248. typesizeI*(jpe-jps+1+2*shw)*shw*(n3dI*(kpe-kps+1)+n2dI) +
  249. typesizeD*(jpe-jps+1+2*shw)*shw*(n3dD*(kpe-kps+1)+n2dD) +
  250. typesizeL*(jpe-jps+1+2*shw)*shw*(n3dL*(kpe-kps+1)+n2dL) ;
  251. MPI_Cart_shift ( *comm0, 1, 1, &xm, &xp ) ;
  252. if ( xp != MPI_PROC_NULL ) {
  253. buffer_for_proc ( xp , nbytes_x_recv, RSL_RECVBUF ) ;
  254. buffer_for_proc ( xp , nbytes, RSL_SENDBUF ) ;
  255. }
  256. if ( xm != MPI_PROC_NULL ) {
  257. buffer_for_proc ( xm , nbytes_x_recv, RSL_RECVBUF ) ;
  258. buffer_for_proc ( xm , nbytes, RSL_SENDBUF ) ;
  259. }
  260. }
  261. #endif
  262. yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ;
  263. yp_curs_recv = nbytes_y_recv ; ym_curs_recv = nbytes_y_recv ;
  264. xp_curs_recv = nbytes_x_recv ; xm_curs_recv = nbytes_x_recv ;
  265. }
  266. RSL_LITE_PACK ( int * Fcomm0, char * buf , int * shw0 ,
  267. int * sendbegm0 , int * sendwm0 , int * sendbegp0 , int * sendwp0 ,
  268. int * recvbegm0 , int * recvwm0 , int * recvbegp0 , int * recvwp0 ,
  269. int * typesize0 , int * xy0 , int * pu0 , int * imemord , int * xstag0, /* not used */
  270. int *me0, int * np0 , int * np_x0 , int * np_y0 ,
  271. int * ids0 , int * ide0 , int * jds0 , int * jde0 , int * kds0 , int * kde0 ,
  272. int * ims0 , int * ime0 , int * jms0 , int * jme0 , int * kms0 , int * kme0 ,
  273. int * ips0 , int * ipe0 , int * jps0 , int * jpe0 , int * kps0 , int * kpe0 )
  274. {
  275. int me, np, np_x, np_y ;
  276. int sendbegm , sendwm, sendbegp , sendwp ;
  277. int recvbegm , recvwm, recvbegp , recvwp ;
  278. int shw , typesize ;
  279. int ids , ide , jds , jde , kds , kde ;
  280. int ims , ime , jms , jme , kms , kme ;
  281. int ips , ipe , jps , jpe , kps , kpe ;
  282. int xy ; /* y = 0 , x = 1 */
  283. int pu ; /* pack = 0 , unpack = 1 */
  284. register int i, j, k, t ;
  285. #ifdef crayx1
  286. register int i2,i3,i4,i_offset;
  287. #endif
  288. char *p ;
  289. int da_buf ;
  290. int yp, ym, xp, xm ;
  291. int nbytes, ierr ;
  292. register int *pi, *qi ;
  293. #ifndef STUBMPI
  294. MPI_Comm comm, *comm0, dummy_comm ;
  295. int js, je, ks, ke, is, ie, wcount ;
  296. comm0 = &dummy_comm ;
  297. *comm0 = MPI_Comm_f2c( *Fcomm0 ) ;
  298. shw = *shw0 ; /* logical half-width of stencil */
  299. sendbegm = *sendbegm0 ; /* send index of sten copy (edge = 1), lower/left */
  300. sendwm = *sendwm0 ; /* send width of sten copy counting towards edge, lower/left */
  301. sendbegp = *sendbegp0 ; /* send index of sten copy (edge = 1), upper/right */
  302. sendwp = *sendwp0 ; /* send width of sten copy counting towards edge, upper/right */
  303. recvbegm = *recvbegm0 ; /* recv index of sten copy (edge = 1), lower/left */
  304. recvwm = *recvwm0 ; /* recv width of sten copy counting towards edge, lower/left */
  305. recvbegp = *recvbegp0 ; /* recv index of sten copy (edge = 1), upper/right */
  306. recvwp = *recvwp0 ; /* recv width of sten copy counting towards edge, upper/right */
  307. me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ;
  308. typesize = *typesize0 ;
  309. ids = *ids0-1 ; ide = *ide0-1 ; jds = *jds0-1 ; jde = *jde0-1 ; kds = *kds0-1 ; kde = *kde0-1 ;
  310. ims = *ims0-1 ; ime = *ime0-1 ; jms = *jms0-1 ; jme = *jme0-1 ; kms = *kms0-1 ; kme = *kme0-1 ;
  311. ips = *ips0-1 ; ipe = *ipe0-1 ; jps = *jps0-1 ; jpe = *jpe0-1 ; kps = *kps0-1 ; kpe = *kpe0-1 ;
  312. xy = *xy0 ;
  313. pu = *pu0 ;
  314. /* need to adapt for other memory orders */
  315. #define RANGE(S1,E1,S2,E2,S3,E3,S4,E4) (((E1)-(S1)+1)*((E2)-(S2)+1)*((E3)-(S3)+1)*((E4)-(S4)+1))
  316. #define IMAX(A) (((A)>ids)?(A):ids)
  317. #define IMIN(A) (((A)<ide)?(A):ide)
  318. #define JMAX(A) (((A)>jds)?(A):jds)
  319. #define JMIN(A) (((A)<jde)?(A):jde)
  320. da_buf = ( pu == 0 ) ? RSL_SENDBUF : RSL_RECVBUF ;
  321. if ( ips <= ipe && jps <= jpe ) {
  322. if ( np_y > 1 && xy == 0 ) {
  323. MPI_Cart_shift( *comm0 , 0, 1, &ym, &yp ) ;
  324. if ( yp != MPI_PROC_NULL && jpe <= jde && jde != jpe ) {
  325. p = buffer_for_proc( yp , 0 , da_buf ) ;
  326. if ( pu == 0 ) {
  327. if ( sendwp > 0 ) {
  328. je = jpe - sendbegp + 1 ; js = je - sendwp + 1 ;
  329. ks = kps ; ke = kpe ;
  330. is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ;
  331. nbytes = buffer_size_for_proc( yp, da_buf ) ;
  332. if ( yp_curs + RANGE( js, je, kps, kpe, ips-shw, ipe+shw, 1, typesize ) > nbytes ) {
  333. #ifndef MS_SUA
  334. fprintf(stderr,"memory overwrite in rsl_lite_pack, Y pack up, %d > %d\n",
  335. yp_curs + RANGE( js, je, kps, kpe, ips-shw, ipe+shw, 1, typesize ), nbytes ) ;
  336. #endif
  337. MPI_Abort(MPI_COMM_WORLD, 99) ;
  338. }
  339. if ( typesize == 8 ) {
  340. F_PACK_LINT ( buf, p+yp_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
  341. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  342. yp_curs += wcount*typesize ;
  343. }
  344. else if ( typesize == 4 ) {
  345. F_PACK_INT ( buf, p+yp_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
  346. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  347. yp_curs += wcount*typesize ;
  348. }
  349. else {
  350. #ifndef MS_SUA
  351. fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
  352. #endif
  353. }
  354. }
  355. } else {
  356. if ( recvwp > 0 ) {
  357. js = jpe+recvbegp ; je = js + recvwp - 1 ;
  358. ks = kps ; ke = kpe ;
  359. is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ;
  360. if ( typesize == 8 ) {
  361. F_UNPACK_LINT ( p+yp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
  362. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  363. yp_curs += wcount*typesize ;
  364. }
  365. else if ( typesize == 4 ) {
  366. F_UNPACK_INT ( p+yp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
  367. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  368. yp_curs += wcount*typesize ;
  369. }
  370. else {
  371. #ifndef MS_SUA
  372. fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
  373. #endif
  374. }
  375. }
  376. }
  377. }
  378. if ( ym != MPI_PROC_NULL && jps >= jds && jps != jds ) {
  379. p = buffer_for_proc( ym , 0 , da_buf ) ;
  380. if ( pu == 0 ) {
  381. if ( sendwm > 0 ) {
  382. js = jps+sendbegm-1 ; je = js + sendwm -1 ;
  383. ks = kps ; ke = kpe ;
  384. is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ;
  385. nbytes = buffer_size_for_proc( ym, da_buf ) ;
  386. if ( ym_curs + RANGE( js, je, kps, kpe, ips-shw, ipe+shw, 1, typesize ) > nbytes ) {
  387. #ifndef MS_SUA
  388. fprintf(stderr,"memory overwrite in rsl_lite_pack, Y pack dn, %d > %d\n",
  389. ym_curs + RANGE( js, je, kps, kpe, ips-shw, ipe+shw, 1, typesize ), nbytes ) ;
  390. #endif
  391. MPI_Abort(MPI_COMM_WORLD, 99) ;
  392. }
  393. if ( typesize == 8 ) {
  394. F_PACK_LINT ( buf, p+ym_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
  395. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  396. ym_curs += wcount*typesize ;
  397. }
  398. else if ( typesize == 4 ) {
  399. F_PACK_INT ( buf, p+ym_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
  400. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  401. ym_curs += wcount*typesize ;
  402. }
  403. else {
  404. #ifndef MS_SUA
  405. fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
  406. #endif
  407. }
  408. }
  409. } else {
  410. if ( recvwm > 0 ) {
  411. je = jps-recvbegm ; js = je - recvwm + 1 ;
  412. ks = kps ; ke = kpe ;
  413. is = IMAX(ips-shw) ; ie = IMIN(ipe+shw) ;
  414. if ( typesize == 8 ) {
  415. F_UNPACK_LINT ( p+ym_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
  416. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  417. ym_curs += wcount*typesize ;
  418. }
  419. else if ( typesize == 4 ) {
  420. F_UNPACK_INT ( p+ym_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
  421. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  422. ym_curs += wcount*typesize ;
  423. }
  424. else {
  425. #ifndef MS_SUA
  426. fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
  427. #endif
  428. }
  429. }
  430. }
  431. }
  432. }
  433. if ( np_x > 1 && xy == 1 ) {
  434. MPI_Cart_shift( *comm0, 1, 1, &xm, &xp ) ;
  435. if ( xp != MPI_PROC_NULL && ipe <= ide && ide != ipe ) {
  436. p = buffer_for_proc( xp , 0 , da_buf ) ;
  437. if ( pu == 0 ) {
  438. if ( sendwp > 0 ) {
  439. js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ;
  440. ks = kps ; ke = kpe ;
  441. ie = ipe - sendbegp + 1 ; is = ie - sendwp + 1 ;
  442. nbytes = buffer_size_for_proc( xp, da_buf ) ;
  443. if ( xp_curs + RANGE( js, je, kps, kpe, ipe-shw+1, ipe, 1, typesize ) > nbytes ) {
  444. #ifndef MS_SUA
  445. fprintf(stderr,"memory overwrite in rsl_lite_pack, X pack right, %d > %d\n",
  446. xp_curs + RANGE( js, je, kps, kpe, ipe-shw+1, ipe, 1, typesize ), nbytes ) ;
  447. #endif
  448. MPI_Abort(MPI_COMM_WORLD, 99) ;
  449. }
  450. if ( typesize == 8 ) {
  451. F_PACK_LINT ( buf, p+xp_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
  452. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  453. xp_curs += wcount*typesize ;
  454. }
  455. else if ( typesize == 4 ) {
  456. F_PACK_INT ( buf, p+xp_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
  457. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  458. xp_curs += wcount*typesize ;
  459. }
  460. else {
  461. #ifndef MS_SUA
  462. fprintf(stderr,"A internal error: %s %d\n",__FILE__,__LINE__) ;
  463. #endif
  464. }
  465. }
  466. } else {
  467. if ( recvwp > 0 ) {
  468. js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ;
  469. ks = kps ; ke = kpe ;
  470. is = ipe+recvbegp ; ie = is + recvwp - 1 ;
  471. if ( typesize == 8 ) {
  472. F_UNPACK_LINT ( p+xp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
  473. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  474. xp_curs += wcount*typesize ;
  475. }
  476. else if ( typesize == 4 ) {
  477. F_UNPACK_INT ( p+xp_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
  478. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  479. xp_curs += wcount*typesize ;
  480. }
  481. else {
  482. #ifndef MS_SUA
  483. fprintf(stderr,"B internal error: %s %d\n",__FILE__,__LINE__) ;
  484. fprintf(stderr," stenbeg %d stenw %d \n",is,ie) ;
  485. fprintf(stderr," is %d ie %d \n",is,ie) ;
  486. #endif
  487. }
  488. }
  489. }
  490. }
  491. if ( xm != MPI_PROC_NULL && ips >= ids && ids != ips ) {
  492. p = buffer_for_proc( xm , 0 , da_buf ) ;
  493. if ( pu == 0 ) {
  494. if ( sendwm > 0 ) {
  495. js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ;
  496. ks = kps ; ke = kpe ;
  497. is = ips+sendbegm-1 ; ie = is + sendwm-1 ;
  498. nbytes = buffer_size_for_proc( xm, da_buf ) ;
  499. if ( xm_curs + RANGE( js, je, kps, kpe, ips, ips+shw-1, 1, typesize ) > nbytes ) {
  500. #ifndef MS_SUA
  501. fprintf(stderr,"memory overwrite in rsl_lite_pack, X left , %d > %d\n",
  502. xm_curs + RANGE( js, je, kps, kpe, ips, ips+shw-1, 1, typesize ), nbytes ) ;
  503. #endif
  504. MPI_Abort(MPI_COMM_WORLD, 99) ;
  505. }
  506. if ( typesize == 8 ) {
  507. F_PACK_LINT ( buf, p+xm_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
  508. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  509. xm_curs += wcount*typesize ;
  510. }
  511. else if ( typesize == 4 ) {
  512. F_PACK_INT ( buf, p+xm_curs, imemord, &js, &je, &ks, &ke, &is, &ie,
  513. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  514. xm_curs += wcount*typesize ;
  515. }
  516. else {
  517. #ifndef MS_SUA
  518. fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
  519. #endif
  520. }
  521. }
  522. } else {
  523. if ( recvwm > 0 ) {
  524. js = JMAX(jps-shw) ; je = JMIN(jpe+shw) ;
  525. ks = kps ; ke = kpe ;
  526. ie = ips-recvbegm ; is = ie - recvwm + 1 ;
  527. if ( typesize == 8 ) {
  528. F_UNPACK_LINT ( p+xm_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
  529. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  530. xm_curs += wcount*typesize ;
  531. }
  532. else if ( typesize == 4 ) {
  533. F_UNPACK_INT ( p+xm_curs, buf, imemord, &js, &je, &ks, &ke, &is, &ie,
  534. &jms,&jme,&kms,&kme,&ims,&ime, &wcount ) ;
  535. xm_curs += wcount*typesize ;
  536. }
  537. else {
  538. #ifndef MS_SUA
  539. fprintf(stderr,"internal error: %s %d\n",__FILE__,__LINE__) ;
  540. #endif
  541. }
  542. }
  543. }
  544. }
  545. }
  546. }
  547. #endif
  548. }
  549. #ifndef STUBMPI
  550. static MPI_Request yp_recv, ym_recv, yp_send, ym_send ;
  551. static MPI_Request xp_recv, xm_recv, xp_send, xm_send ;
  552. #endif
  553. RSL_LITE_EXCH_Y ( int * Fcomm0, int *me0, int * np0 , int * np_x0 , int * np_y0 ,
  554. int * sendw_m, int * sendw_p, int * recvw_m , int * recvw_p )
  555. {
  556. int me, np, np_x, np_y ;
  557. int yp, ym, xp, xm, ierr ;
  558. #ifndef STUBMPI
  559. MPI_Status stat ;
  560. MPI_Comm comm, *comm0, dummy_comm ;
  561. comm0 = &dummy_comm ;
  562. *comm0 = MPI_Comm_f2c( *Fcomm0 ) ;
  563. comm = *comm0 ; me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ;
  564. if ( np_y > 1 ) {
  565. MPI_Cart_shift( *comm0, 0, 1, &ym, &yp ) ;
  566. if ( yp != MPI_PROC_NULL && *recvw_p > 0 ) {
  567. ierr=MPI_Irecv ( buffer_for_proc( yp, yp_curs_recv, RSL_RECVBUF ), yp_curs_recv, MPI_CHAR, yp, me, comm, &yp_recv ) ;
  568. }
  569. if ( ym != MPI_PROC_NULL && *recvw_m > 0 ) {
  570. ierr=MPI_Irecv ( buffer_for_proc( ym, ym_curs_recv, RSL_RECVBUF ), ym_curs_recv, MPI_CHAR, ym, me, comm, &ym_recv ) ;
  571. }
  572. if ( yp != MPI_PROC_NULL && *sendw_p > 0 ) {
  573. ierr=MPI_Isend ( buffer_for_proc( yp, 0, RSL_SENDBUF ), yp_curs, MPI_CHAR, yp, yp, comm, &yp_send ) ;
  574. }
  575. if ( ym != MPI_PROC_NULL && *sendw_m > 0 ) {
  576. ierr=MPI_Isend ( buffer_for_proc( ym, 0, RSL_SENDBUF ), ym_curs, MPI_CHAR, ym, ym, comm, &ym_send ) ;
  577. }
  578. if ( yp != MPI_PROC_NULL && *recvw_p > 0 ) { MPI_Wait( &yp_recv, &stat ) ; }
  579. if ( ym != MPI_PROC_NULL && *recvw_m > 0 ) { MPI_Wait( &ym_recv, &stat ) ; }
  580. if ( yp != MPI_PROC_NULL && *sendw_p > 0 ) { MPI_Wait( &yp_send, &stat ) ; }
  581. if ( ym != MPI_PROC_NULL && *sendw_m > 0 ) { MPI_Wait( &ym_send, &stat ) ; }
  582. }
  583. yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ;
  584. yp_curs_recv = 0 ; ym_curs_recv = 0 ;
  585. xp_curs_recv = 0 ; xm_curs_recv = 0 ;
  586. #endif
  587. }
  588. RSL_LITE_EXCH_X ( int * Fcomm0, int *me0, int * np0 , int * np_x0 , int * np_y0 ,
  589. int * sendw_m, int * sendw_p, int * recvw_m , int * recvw_p )
  590. {
  591. int me, np, np_x, np_y ;
  592. int yp, ym, xp, xm ;
  593. #ifndef STUBMPI
  594. MPI_Status stat ;
  595. MPI_Comm comm, *comm0, dummy_comm ;
  596. comm0 = &dummy_comm ;
  597. *comm0 = MPI_Comm_f2c( *Fcomm0 ) ;
  598. comm = *comm0 ; me = *me0 ; np = *np0 ; np_x = *np_x0 ; np_y = *np_y0 ;
  599. if ( np_x > 1 ) {
  600. MPI_Cart_shift( *comm0, 1, 1, &xm, &xp ) ;
  601. if ( xp != MPI_PROC_NULL && *recvw_p > 0 ) {
  602. MPI_Irecv ( buffer_for_proc( xp, xp_curs_recv, RSL_RECVBUF ), xp_curs_recv, MPI_CHAR, xp, me, comm, &xp_recv ) ;
  603. }
  604. if ( xm != MPI_PROC_NULL && *recvw_m > 0 ) {
  605. MPI_Irecv ( buffer_for_proc( xm, xm_curs_recv, RSL_RECVBUF ), xm_curs_recv, MPI_CHAR, xm, me, comm, &xm_recv ) ;
  606. }
  607. if ( xp != MPI_PROC_NULL && *sendw_p > 0 ) {
  608. MPI_Isend ( buffer_for_proc( xp, 0, RSL_SENDBUF ), xp_curs, MPI_CHAR, xp, xp, comm, &xp_send ) ;
  609. }
  610. if ( xm != MPI_PROC_NULL && *sendw_m > 0 ) {
  611. MPI_Isend ( buffer_for_proc( xm, 0, RSL_SENDBUF ), xm_curs, MPI_CHAR, xm, xm, comm, &xm_send ) ;
  612. }
  613. if ( xp != MPI_PROC_NULL && *recvw_p > 0 ) { MPI_Wait( &xp_recv, &stat ) ; }
  614. if ( xm != MPI_PROC_NULL && *recvw_m > 0 ) { MPI_Wait( &xm_recv, &stat ) ; }
  615. if ( xp != MPI_PROC_NULL && *sendw_p > 0 ) { MPI_Wait( &xp_send, &stat ) ; }
  616. if ( xm != MPI_PROC_NULL && *sendw_m > 0 ) { MPI_Wait( &xm_send, &stat ) ; }
  617. }
  618. yp_curs = 0 ; ym_curs = 0 ; xp_curs = 0 ; xm_curs = 0 ;
  619. yp_curs_recv = 0 ; ym_curs_recv = 0 ;
  620. xp_curs_recv = 0 ; xm_curs_recv = 0 ;
  621. #endif
  622. }
  623. #if !defined( MS_SUA) && !defined(_WIN32)
  624. #include <sys/time.h>
  625. RSL_INTERNAL_MILLICLOCK ()
  626. {
  627. struct timeval tb ;
  628. struct timezone tzp ;
  629. int isec ; /* seconds */
  630. int usec ; /* microseconds */
  631. int msecs ;
  632. gettimeofday( &tb, &tzp ) ;
  633. isec = tb.tv_sec ;
  634. usec = tb.tv_usec ;
  635. msecs = 1000 * isec + usec / 1000 ;
  636. return(msecs) ;
  637. }
  638. RSL_INTERNAL_MICROCLOCK ()
  639. {
  640. struct timeval tb ;
  641. struct timezone tzp ;
  642. int isec ; /* seconds */
  643. int usec ; /* microseconds */
  644. int msecs ;
  645. gettimeofday( &tb, &tzp ) ;
  646. isec = tb.tv_sec ;
  647. usec = tb.tv_usec ;
  648. msecs = 1000000 * isec + usec ;
  649. return(msecs) ;
  650. }
  651. #endif