HARM
harm and utilities
 All Data Structures Files Functions Variables Typedefs Macros Pages
boundmpi.c
Go to the documentation of this file.
1 #include "decs.h"
2 
3 
11 #define DEBUG 0
12 
13 
14 
15 static int get_truesize(int dir, int boundvartype);
16 
17 
18 
19 
20 
22 int bound_mpi_dir(int boundstage, int finalstep, int whichdir, int boundvartype, FTYPE (*prim)[NSTORE2][NSTORE3][NPR], FTYPE (*F1)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*F2)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*F3)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*vpot)[NSTORE1+SHIFTSTORE1][NSTORE2+SHIFTSTORE2][NSTORE3+SHIFTSTORE3])
23 {
24  FTYPE (*prim2bound[NDIM])[NSTORE2][NSTORE3][NPR];
25  FTYPE (*flux2bound[NDIM])[NSTORE2][NSTORE3][NPR+NSPECIAL];
27  int dir;
28 #if(DEBUG)
29  int i,j,k,pl,pliter;
30 #endif
31 
32  /* These arrays contain designations that identify
33  * each recv and send */
34  static MPI_Request requests[COMPDIM * 2 * 2];
35  // format of map for requests[dir*2+recv/send(0/1)]
36  static int didpostrecvs[COMPDIM*2]={0};
37 
38 
39  /*
40  *
41  * 1. do outflow/inflow/etc. boundary conditions (in bounds)
42  * 2. pack data into workbc arrays and do send/recv's
43  * 3. for each transfer do a wait/unpack
44  * 4. do all send waits
45  *
46  * NOTE: order is important so corner zones are assigned correctly
47  *
48  * workbc[PACK][][] is data that is being sent
49  * workbc[UNPACK][][] is data that is being recvd
50  *
51  */
52 
53  /*
54  * -> higher values of x1
55  * 2 -> lower values of x1
56  *
57  */
58 
59 
60  /* bounds has already done non-MPI boundary conditions; now do MPI
61  * boundary conditions */
62 
63  // must go in this order (0,1) then (2,3) then (4,5) or visa versa,
64  // a single directions order doesn't matter. This method of
65  // ordering is as opposed to directly transfering the corner zones
66  // to the corner CPU.
67 
68  // This may be faster since all transfers can proceed at once,
69  // although this may be slower since no transfers can occur until
70  // packing is completed. This way packing and transfering occur
71  // simultaneously.
72 
73  // Although l/r are packed together, since in the end we have to
74  // wait for both l/r to complete, so equal time completion is
75  // favored //over asynch completion.
76 
77  // Also, transfering corner zones with small message sizes increases
78  // the importance of latency.
79 
80  // for 2D:
81  // I choose left-right N?M first, then up/down N?M. Could
82  // just do N? for interior for L/R, but true boundary needs full
83  // N?M exchanged since cpu sets boundary using normal bc code
84  // which needs to get transfered to neight(i.e. currently if corner
85  // has bctype 99/? then doesn't do corner)
86 
87  // GODMARK: Make sure 3D makes sense (no extra things to do)
88 
89 #if(DEBUG)
90  PBOUNDLOOP(pliter,pl){
91  FULLLOOP{
92  MACP0A1(prim,i,j,k,pl)=-1-pl*100; // should be turned into myid-k*100
93  }
94  ZLOOP {
95  MACP0A1(prim,i,j,k,pl)=myid-pl*100; // differentiates but clear per pr
96  // logfprintf("%d %d %d %d %21.15g\n",i,j,k,pl,MACP0A1(prim,i,j,k,pl));
97  }
98  }
99 #endif
100 
101 
102  // this is designed to copy corners by indirectly copying them. The
103  // corners eventually get to corner-related CPUs by passing through
104  // another cpu. This required ordering the left/right and up/down
105  // procedures.
106 
107  // one could copy the corners directly and get more bandwidth since
108  // would transfers 2X as much data, but corners would transfer very
109  // slowly alone, and basically has the same number of operations
110  // required as does edge transfers.
111  // dualfprintf(fail_file,"innerboundhere1\n"); fflush(fail_file);
112 
113 
114 
116  //
117  // initialize prim2bound and vpot2bound so interior code knows which to act upon
118  //
120  {
121  int jj;
122  for(jj=0;jj<NDIM;jj++){
123  prim2bound[jj]=NULL;
124  flux2bound[jj]=NULL;
125  vpot2bound[jj]=NULL;
126  }
127  }
128 
129 
131  //
132  // set prim2bound or flux2bound or vpot2bound
133  //
135  if(boundvartype==BOUNDPRIMTYPE || boundvartype==BOUNDPRIMSIMPLETYPE || boundvartype==BOUNDPSTAGTYPE || boundvartype==BOUNDPSTAGSIMPLETYPE){
136  prim2bound[1]=prim;
137  prim2bound[2]=prim;
138  prim2bound[3]=prim;
139  }
140  else if(boundvartype==BOUNDFLUXTYPE || boundvartype==BOUNDFLUXSIMPLETYPE){
141  flux2bound[1]=F1;
142  flux2bound[2]=F2;
143  flux2bound[3]=F3;
144  }
145  else if(boundvartype==BOUNDVPOTTYPE || boundvartype==BOUNDVPOTSIMPLETYPE){
146  vpot2bound[1]=vpot;
147  vpot2bound[2]=vpot;
148  vpot2bound[3]=vpot;
149  }
150  else{
151  dualfprintf(fail_file,"No such type of MPI bounding: boundvartype=%d\n",boundvartype);
152  myexit(917616);
153  }
154 
155 
156  int dirstart,dirfinish;
157  if(whichdir==-1 || whichdir==1){ dirstart=X1UP; dirfinish=X1DN;}
158  if(whichdir==-2 || whichdir==2){ dirstart=X2UP; dirfinish=X2DN;}
159  if(whichdir==-3 || whichdir==3){ dirstart=X3UP; dirfinish=X3DN;}
160 
161 
163  //
164  // pre-post recv's
165  //
166  // OPTMARK: Could avoid dir-dependent MPI calls in step_ch.c (currently true!) and post all recv's for all dirs at once.
167  // OPTMARK: Or setup MPIFLOWCONTROL==2 or SUPERMPI
168  //
170  if(whichdir==-1 || whichdir==-2 || whichdir==-3){
171  if((boundstage==STAGEM1)||(boundstage==STAGE0)){
172  for(dir=dirstart;dir<=dirfinish;dir++){
173  if(dirgenset[boundvartype][dir][DIRIF]){
174  recvonly(dir,boundvartype,workbc,requests);
175  didpostrecvs[dir]=1;
176  }
177  }
178  }
179  }
180  else if(whichdir==1 || whichdir==2 || whichdir==3){
181 
183  //
184  // per-whichdir (1,2,3) conditionals for bounding. Assume whichdir=1 done first, then 2, then 3, so that corners are done correctly as done with normal bounds.
185  //
187 
189  //
190  // x or y or z -dir
191  // once dir=0,1(X1UP,X1DN) is done, so can start 2,3(X2UP,X2DN)
192  //
194  if((boundstage==STAGEM1)||(boundstage==STAGE0&&whichdir==1 || boundstage==STAGE2&&whichdir==2 || boundstage==STAGE4&&whichdir==3)){
195  for(dir=dirstart;dir<=dirfinish;dir++){
196  if(dirgenset[boundvartype][dir][DIRIF]){
197  if(didpostrecvs[dir]==0){
198  dualfprintf(fail_file,"Did not post recv and tried to already pack: dir=%d\n",dir);
199  myexit(234525155);
200  }
201  pack(dir,boundvartype,prim2bound[whichdir],flux2bound[whichdir],vpot2bound[whichdir],workbc);
202  }
203  }
204  for(dir=dirstart;dir<=dirfinish;dir++) if(dirgenset[boundvartype][dir][DIRIF]) sendonly(dir,boundvartype,workbc,requests);
205  }
206  if((boundstage==STAGEM1)||(boundstage==STAGE1&&whichdir==1 || boundstage==STAGE3&&whichdir==2 || boundstage==STAGE5&&whichdir==3)){
207  for(dir=dirstart;dir<=dirfinish;dir++) if(dirgenset[boundvartype][dir][DIRIF]){
208  recvwait(dir,requests);
209  didpostrecvs[dir]=0; // done with recv's
210  }
211  for(dir=dirstart;dir<=dirfinish;dir++) if(dirgenset[boundvartype][dir][DIRIF]) unpack(dir,boundvartype,workbc,prim2bound[whichdir],flux2bound[whichdir],vpot2bound[whichdir]);
212  for(dir=dirstart;dir<=dirfinish;dir++) if(dirgenset[boundvartype][dir][DIRIF]) sendwait(dir,requests);
213  }
214  }
215  else{
216  dualfprintf(fail_file,"No such whichdir=%d in boundmpi.c\n",whichdir);
217  myexit(1986290386);
218  }
219 
220 
221 
222 
223 
224 
225 
226 #if(DEBUG)
227  logfprintf("\n\nafter\n\n");
228  FULLLOOP{
229  PBOUNDLOOP(pliter,pl){
230  logfprintf("%d %d %d %d %21.15g\n",i,j,k,pl,MACP0A1(prim,i,j,k,pl));
231  }
232  }
233  myexit(0);
234 #endif
235 
236 
237 
238  return(0);
239 
240 }
241 // end function
242 
243 
244 // Since pr used to control loop range, PLOOPMPI() must be at outer loop
245 #define PACKLOOP(i,j,k,istart,istop,jstart,jstop,kstart,kstop,di,dj,dk,pr,num) PLOOPMPI(pr,num) SUPERGENLOOP(i,j,k,istart,istop,jstart,jstop,kstart,kstop,di,dj,dk)
246 
247 #define PACKLOOPORIG(i,j,k,istart,istop,jstart,jstop,kstart,kstop,di,dj,dk,pr,num) PLOOPMPIORIG(pr,num) SUPERGENLOOP(i,j,k,istart,istop,jstart,jstop,kstart,kstop,di,dj,dk)
248 
249 
250 // the last element not used if SPILTNPR==1
251 #define PACKBLOCK i,j,k \
252  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART1] \
253  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP1] \
254  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART2] \
255  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP2] \
256  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART3] \
257  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP3] \
258  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR1] \
259  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR2] \
260  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR3] \
261  ,pr \
262  ,dirgenset[boundvartype][dir][DIRNUMPR]
263 
264 
265 
267 void pack(int dir, int boundvartype, FTYPE (*prim)[NSTORE2][NSTORE3][NPR], FTYPE (*flux)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*vpot)[NSTORE1+SHIFTSTORE1][NSTORE2+SHIFTSTORE2][NSTORE3+SHIFTSTORE3], FTYPE (*workbc)[COMPDIM * 2][NMAXBOUND * NBIGBND * NBIGSM])
268 {
269  // dir=direction sending
270  int i,j,k;
271  int bci,pr;
272 
273  bci=0;
274  if(prim!=NULL){
275  PACKLOOP(i,j,k \
276  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART1] \
277  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP1] \
278  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART2] \
279  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP2] \
280  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART3] \
281  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP3] \
282  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR1] \
283  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR2] \
284  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR3] \
285  ,pr \
286  ,dirgenset[boundvartype][dir][DIRNUMPR]){
287 
288  /*
289  if(bci>=dirgenset[boundvartype][dir][DIRSIZE]){
290  dualfprintf(fail_file,"pack memory leak: bci: %d dirgenset[%d][DIRSIZE]: %d\n",bci,dirgenset[boundvartype][dir][DIRSIZE]);
291  myexit(10);
292  }
293  */
294 
295  workbc[PACK][dir][bci++] = MACP0A1(prim,i,j,k,pr) * primfactor[boundvartype][dir][primgridpos[boundvartype][dir][pr]][PACK][pr];
296 
297  }// end of vpot==NULL loop
298 
299  }// end if prim!=NULL
300  else{ // flux or vpot
301  PACKLOOPORIG(i,j,k \
302  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART1] \
303  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP1] \
304  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART2] \
305  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP2] \
306  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTART3] \
307  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPSTOP3] \
308  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR1] \
309  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR2] \
310  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRPDIR3] \
311  ,pr \
312  ,dirgenset[boundvartype][dir][DIRNUMPR]){
313  if(vpot!=NULL){
314  workbc[PACK][dir][bci++] = MACP1A0(vpot,pr,i,j,k) * primfactor[boundvartype][dir][primgridpos[boundvartype][dir][pr]][PACK][pr];
315  }
316  else if(flux!=NULL){
317  workbc[PACK][dir][bci++] = MACP1A0(flux,pr,i,j,k) * primfactor[boundvartype][dir][primgridpos[boundvartype][dir][pr]][PACK][pr];
318  }
319  else{
320  dualfprintf(fail_file,"No such pack type\n");
321  myexit(982359235);
322  }
323  }// end vpot!=NULL loop
324  }// end if vpot!=NULL
325 
326 
327 }
328 
329 
331 static int get_truesize(int dir, int boundvartype)
332 {
333  int truesize;
334 
335 #if(1)
336  // now always controlling range of quantities
337  // must be consistent with how PLOOPMPI is setup
338  if(boundvartype==BOUNDPRIMTYPE || boundvartype==BOUNDPRIMSIMPLETYPE || boundvartype==BOUNDPSTAGTYPE || boundvartype==BOUNDPSTAGSIMPLETYPE) truesize=dirgenset[boundvartype][dir][DIRSIZE]/NPRBOUND;
339  else if(boundvartype==BOUNDFLUXTYPE || boundvartype==BOUNDFLUXSIMPLETYPE) truesize=dirgenset[boundvartype][dir][DIRSIZE]/NFLUXBOUND;
340  else if(boundvartype==BOUNDVPOTTYPE || boundvartype==BOUNDVPOTSIMPLETYPE) truesize=dirgenset[boundvartype][dir][DIRSIZE]/NDIM;
341  else{
342  dualfprintf(fail_file,"No such boundvartype=%d in sendrecv() in boundmpi.c\n",boundvartype);
343  myexit(34867364);
344  }
345  truesize *= (nprboundend-nprboundstart+1);
346 
347 #else
348  truesize=dirgenset[boundvartype][dir][DIRSIZE];
349 #endif
350 
351  return(truesize);
352 
353 }
354 
356 void recvonly(int dir, int boundvartype, FTYPE (*workbc)[COMPDIM * 2][NMAXBOUND * NBIGBND * NBIGSM],MPI_Request *requests)
357 {
358  int truesize;
359 
360  truesize=get_truesize(dir, boundvartype);
361 
362  MPI_Irecv(workbc[UNPACK][dir],
363  truesize,
364  MPI_FTYPE,
365  MPIid[dirgenset[boundvartype][dir][DIROTHER]],
366  TAGSTARTBOUNDMPI + dirgenset[boundvartype][dir][DIRTAGR],
367  MPI_COMM_GRMHD,
368  &requests[dir*2+REQRECV]);
369 
370 }
371 
373 void sendonly(int dir, int boundvartype, FTYPE (*workbc)[COMPDIM * 2][NMAXBOUND * NBIGBND * NBIGSM],MPI_Request *requests)
374 {
375  int truesize;
376 
377  truesize=get_truesize(dir, boundvartype);
378 
379 
380  if(MPIFLOWCONTROL==1){
381  // nominally, good to pre-post recv as here, but other id's Isend might have already been sent a while back causing the unexpected buffer to be filled. Happens on NICS Kraken.
382  // http://climate.ornl.gov/~rmills/pubs/mills-hoffman_CUG2009.pdf
383  // The above suggests only pushing the Isend once a 0-byte blocking handshake has been completed from the receiver. This ensures the relevant Irecv is already posted.
384 
385  // See also:
386  // http://www.uni-due.de/imperia/md/content/css/ude_3_mpi.pdf
387  // http://www.nics.tennessee.edu/computing-resources/kraken/mpi-tips-for-cray-xt5
388  // http://www.nersc.gov/assets/Uploads/NERSCXTMPIEnvironment.pdf
389 
390  // Could place global barrier here, but then stops not just the pairs of recv/send but all procs.
391 
392  // So post blocking send of 0-byte size to same place as sending full data.
393  int nothingsend=0;
394  int nothingrecv=0;
395  // maxtag should fit into integer (modern machines is -2147483648 to +2147483647 or maximum numprocs ~ 3.5E8 that is quite large enough for now.
396  // otherwise use negative, which would work too.
397  int maxtag = numprocs*COMPDIM*2; // see mpi_init.c DIRTAGS,DIRTAGR how set.
398 
399  MPI_Sendrecv(
400  &nothingsend,0,MPI_INT,
401  MPIid[dirgenset[boundvartype][dir][DIROTHER]],
402  TAGSTARTBOUNDMPI + maxtag + dirgenset[boundvartype][dir][DIRTAGS],
403 
404  &nothingrecv,0,MPI_INT,
405  MPIid[dirgenset[boundvartype][dir][DIROTHER]],
406  TAGSTARTBOUNDMPI + maxtag + dirgenset[boundvartype][dir][DIRTAGR],
407 
408  MPI_COMM_GRMHD,MPI_STATUS_IGNORE);
409 
410  } // end if doing FLOWCONTROL
411 
412 
413  MPI_Isend(workbc[PACK][dir],
414  truesize,
415  MPI_FTYPE,
416  MPIid[dirgenset[boundvartype][dir][DIROTHER]],
417  TAGSTARTBOUNDMPI + dirgenset[boundvartype][dir][DIRTAGS],
418  MPI_COMM_GRMHD,
419  &requests[dir*2+REQSEND]);
420 }
421 
422 
423 void recvwait(int dir,MPI_Request *requests)
424 {
425  MPI_Wait(&requests[dir*2+REQRECV], &mpichstatus);
426 }
427 
428 
429 // last element not used if doing general quantity loop
430 #define UNPACKBLOCK i,j,k \
431  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART1] \
432  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP1] \
433  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART2] \
434  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP2] \
435  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART3] \
436  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP3] \
437  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR1] \
438  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR2] \
439  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR3] \
440  ,pr \
441  ,dirgenset[boundvartype][dir][DIRNUMPR]
442 
444 void unpack(int dir, int boundvartype, FTYPE (*workbc)[COMPDIM * 2][NMAXBOUND * NBIGBND * NBIGSM],FTYPE (*prim)[NSTORE2][NSTORE3][NPR], FTYPE (*flux)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*vpot)[NSTORE1+SHIFTSTORE1][NSTORE2+SHIFTSTORE2][NSTORE3+SHIFTSTORE3])
445 {
446  // dir is direction receiving from
447  int i,j,k;
448  int bci,pr;
449 
450  bci=0;
451  if(prim!=NULL){
452  PACKLOOP(i,j,k \
453  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART1] \
454  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP1] \
455  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART2] \
456  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP2] \
457  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART3] \
458  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP3] \
459  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR1] \
460  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR2] \
461  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR3] \
462  ,pr \
463  ,dirgenset[boundvartype][dir][DIRNUMPR]){
464 
465  MACP0A1(prim,i,j,k,pr)=workbc[UNPACK][dir][bci++] * primfactor[boundvartype][dir][primgridpos[boundvartype][dir][pr]][UNPACK][pr];
466  }
467  }
468  else{
469  PACKLOOPORIG(i,j,k \
470  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART1] \
471  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP1] \
472  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART2] \
473  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP2] \
474  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTART3] \
475  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUSTOP3] \
476  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR1] \
477  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR2] \
478  ,dirloopset[boundvartype][dir][primgridpos[boundvartype][dir][pr]][DIRUDIR3] \
479  ,pr \
480  ,dirgenset[boundvartype][dir][DIRNUMPR]){
481  if(vpot!=NULL){
482  MACP1A0(vpot,pr,i,j,k)=workbc[UNPACK][dir][bci++] * primfactor[boundvartype][dir][primgridpos[boundvartype][dir][pr]][UNPACK][pr];
483  }
484  else if(flux!=NULL){
485  MACP1A0(flux,pr,i,j,k)=workbc[UNPACK][dir][bci++] * primfactor[boundvartype][dir][primgridpos[boundvartype][dir][pr]][UNPACK][pr];
486  }
487  else{
488  dualfprintf(fail_file,"No such unpack type\n");
489  myexit(982359236);
490  }
491  }
492  }
493 
494 }
495 
496 
497 void sendwait(int dir,MPI_Request *requests)
498 {
499  MPI_Wait(&requests[dir*2+REQSEND], &mpichstatus);
500 }
501 
502 
503 
504 
506 int bound_mpi(int boundstage, int finalstep, int fakedir, int boundvartype, FTYPE (*prim)[NSTORE2][NSTORE3][NPR], FTYPE (*F1)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*F2)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*F3)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*vpot)[NSTORE1+SHIFTSTORE1][NSTORE2+SHIFTSTORE2][NSTORE3+SHIFTSTORE3])
507 {
508  int bound_mpi_dir(int boundstage, int finalstep, int whichdir, int boundvartype, FTYPE (*prim)[NSTORE2][NSTORE3][NPR], FTYPE (*F1)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*F2)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*F3)[NSTORE2][NSTORE3][NPR+NSPECIAL], FTYPE (*vpot)[NSTORE1+SHIFTSTORE1][NSTORE2+SHIFTSTORE2][NSTORE3+SHIFTSTORE3]);
509  int whichdir;
510 
511  // separate pre-post recv and intermediate user bound and then final other MPI stuff -- implies TAG space is globally separated for these MPI calls and any MPI stuff done by user!
512  if(fakedir==-1){
513  whichdir=-1; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot); // pre-post MPI recv's
514  whichdir=-2; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot); // pre-post MPI recv's
515  whichdir=-3; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot); // pre-post MPI recv's
516  }
517  else if(fakedir==1){
518  whichdir=1; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot);
519  whichdir=2; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot);
520  whichdir=3; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot);
521  }
522  else if(fakedir==0){
523  whichdir=-1; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot); // pre-post MPI recv's
524  whichdir=1; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot);
525  whichdir=-2; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot); // pre-post MPI recv's
526  whichdir=2; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot);
527  whichdir=-3; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot); // pre-post MPI recv's
528  whichdir=3; bound_mpi_dir(boundstage, finalstep, whichdir, boundvartype, prim, F1, F2, F3, vpot);
529  }
530 
531  return(0);
532 }
533 
534