HARM
harm and utilities
 All Data Structures Files Functions Variables Typedefs Macros Pages
mpi_init.c
Go to the documentation of this file.
1 
7 #include "decs.h"
8 
9 
10 
11 
18 int init_MPI_general(int *argc, char **argv[])
19 {
20 
21 
22 
23 #if(USEMPI)
24  int ierr;
25 #if(USEOPENMP)
26  int provided,required=MPI_THREAD_MULTIPLE;
27  // lonestar4 locked-up here for some reason. Had to set USEOPENMP->0 in makehead.inc. Ranger was fine with openmp, but wasn't using openmp with lonestar3 with production runs, so unsure what situation is.
28  ierr=MPI_Init_thread(argc, argv,required,&provided);
29  stderrfprintf("Using MPI_Init_thread with required=%d and provided=%d\n",required,provided);
30 #else
31  stderrfprintf( "Begin: MPI_Init\n"); fflush(stderr);
32  ierr=MPI_Init(argc, argv);
33  stderrfprintf( "End: MPI_Init\n"); fflush(stderr);
34 #endif
35 
36 
37 
38  if(ierr!=0){
39  stderrfprintf("MPI Error during MPI_Init\n");
40  exit(1);
41  }
42 
43  MPI_Comm_size(MPI_COMM_WORLD, &truenumprocs); // WORLD total number of processors
44  MPI_Comm_rank(MPI_COMM_WORLD, &myid_world); // WORLD proc id
45  MPI_Get_processor_name(processor_name, &procnamelen); // to ensure really on certain nodes
46 
47  stderrfprintf( "WORLD proc: %d of %d on %s\n", myid_world,truenumprocs,processor_name);
48 
49  stderrfprintf( "end: init_MPI\n");
50  fflush(stderr);
51 #else
52  truenumprocs=1;
53  myid=0;
54  myid_world=0;
55 #endif
56 
57 
58  // allocate things that are truenumprocs in size
59  MPIid=(int*)malloc(sizeof(int)*truenumprocs);
60  if(MPIid==NULL){
61  stderrfprintf("Problem allocating memory for MPIid with truenumprocs=%d\n",truenumprocs); fflush(stderr);
62  myexit(679438212);
63  }
64 
65 
66  return(0);
67 
68 }
69 
70 
71 
74 int init_OPENMP_general(FILE *out)
75 {
76 
77  // nothing to do for OpenMP so far
78 
79  return(0);
80 }
81 
82 
83 
84 
88 {
89 
90 #if(USEOPENMP)
91  // Set number of threads either from initialization somehow or from user arguments that set numopenmpthreads variable
92  if(numopenmpthreads!=numopenmpthreadsorig) omp_set_num_threads(numopenmpthreads);
93 #endif
94 
95  return(0);
96 
97 }
98 
99 
100 
101 
102 void init_MPI_setupfilesandgrid(int argc, char *argv[])
103 {
104  int size;
105 
106 
107 
109  //
110  // choose to combine files or not
111  //
112  //
114  if(USEMPI){
115  mpicombine = 1; // choice
116  //mpicombine=0;
117 
118  //
119  if(mpicombine){
120  if(USEROMIO==0){
121  // choice
122  if(sortedoutput==SORTED) mpicombinetype=MPICOMBINEMINMEM;
123  else if(sortedoutput==UNSORTED) mpicombinetype=MPICOMBINESIMPLE; //forced to happen since no unsorted method for the advanced combine technique
124  //mpicombinetype=MPICOMBINESIMPLE; // forced for testing
125  }
126  else truempicombinetype=mpicombinetype=MPICOMBINEROMIO;
127  }
128  }
129  else{
130  // no choice
131  mpicombine = 0;
132  }
133 
134 
135 
136 
138  //
139  // Setup files and grid information
140  // always done
141  //
143 
144  // after below can use dualfprintf, and trifprintf, etc.
145  init_genfiles(0);
146 
147  // after below, can access intra-CPU MPI related setup (all but boundary (i.e. inter-CPU MPI related) stuff)
148  init_placeongrid_gridlocation();
149 
150 
151 
153  //
154  // Some MPI type size checks
155  //
157 
158 #if(USEMPI)
159 
160  if(sizeof(CTYPE)==sizeof(long long int)){
161  MPI_Type_size(MPI_LONG_LONG_INT,&size);
162  if(size!=8){
163  dualfprintf(fail_file,"size of the long long int in MPI=%d, should be 8\n",size);
164  myexit(1000);
165  }
166  }
167 
168  if((sizeof(REALTYPE)==sizeof(long double))||(sizeof(SENSITIVE)==sizeof(long double))){
169  MPI_Type_size(MPI_LONG_DOUBLE,&size);
170  if(size!=16){
171  dualfprintf(fail_file,"size of the long double in MPI=%d, should be 16\n",size);
172  myexit(1000);
173  }
174  }
175 #endif
176 
177 
178  trifprintf("done with init_MPI_setupfilesandgrid()\n");
179 
180 }
181 
182 
183 
184 
185 
189 void myargs(int argc, char *argv[])
190 {
191  int argi,numargs,numextraargs;
192 
193 
194  // default:
195  ncpux1 = 1;
196  ncpux2 = 1;
197  ncpux3 = 1;
198 
199  // default unless user adds below
200  numextraargs=2;
201  RESTARTMODE=0;
202  WHICHFILE=0;
203 
204 
205 
206 
207  if(USEMPI && USEOPENMP){
208  numargs=1+3+1;
209 
210  if(! (argc==numargs || argc==numargs+numextraargs) ){
211  if(myid==0){
212  stderrfprintf("proc: %04d : Incorrect command line: argc: %d needed at least=%d, please specify:\n",myid,argc,numargs);
213  stderrfprintf("proc: %04d : mpirun <mpirunoptions> <progname> numopenmpithreads ncpux1 ncpux2 ncpux3\n",myid);
214  stderrfprintf("proc: %04d : OR\n",myid);
215  stderrfprintf("proc: %04d : mpirun <mpirunoptions> <progname> numopenmpithreads ncpux1 ncpux2 ncpux3 RESTARTMODE WHICHFILE\n",myid);
216  }
217  exit(1);
218  }
219  argi=1;
220  numopenmpthreads=atoi(argv[argi++]);
221  ncpux1=atoi(argv[argi++]);
222  ncpux2=atoi(argv[argi++]);
223  ncpux3=atoi(argv[argi++]);
224  }
225  else if(USEMPI){
226  numargs=1+3;
227 
228  if(! (argc==numargs || argc==numargs+numextraargs) ){
229  if(myid==0){
230  stderrfprintf("proc: %04d : Incorrect command line: argc: %d needed at least=%d, please specify:\n",myid,argc,numargs);
231  stderrfprintf("proc: %04d : mpirun <mpirunoptions> <progname> ncpux1 ncpux2 ncpux3\n",myid);
232  stderrfprintf("proc: %04d : OR\n",myid);
233  stderrfprintf("proc: %04d : mpirun <mpirunoptions> <progname> ncpux1 ncpux2 ncpux3 RESTARTMODE WHICHFILE\n",myid);
234  }
235  exit(1);
236  }
237  argi=1;
238  ncpux1=atoi(argv[argi++]);
239  ncpux2=atoi(argv[argi++]);
240  ncpux3=atoi(argv[argi++]);
241  }
242  else if(USEOPENMP){
243  numargs=1+1;
244 
245  if(! (argc==numargs || argc==numargs+numextraargs) ){
246  if(myid==0){
247  stderrfprintf("proc: %04d : Incorrect command line: argc: %d needed at least=%d, please specify:\n",myid,argc,numargs);
248  stderrfprintf("proc: %04d : mpirun <mpirunoptions> <progname> numopenmpthreads\n",myid);
249  stderrfprintf("proc: %04d : OR\n",myid);
250  stderrfprintf("proc: %04d : mpirun <mpirunoptions> <progname> numopenmpthreads RESTARTMODE WHICHFILE\n",myid);
251  }
252  exit(1);
253  }
254  argi=1;
255  numopenmpthreads=atoi(argv[argi++]);
256  }
257  else{
258  numargs=1;
259 
260  if(! (argc==numargs || argc==numargs+numextraargs) ){
261  if(myid==0){
262  stderrfprintf("<progname>\n");
263  stderrfprintf("OR\n");
264  stderrfprintf("<progname> RESTARTMODE WHICHFILE\n");
265  }
266  exit(1);
267  }
268  }// end if single CPU mode
269 
271  //
272  // get user-defined restart options
273  //
275  if(argc==numargs+numextraargs){
276  RESTARTMODE=atoi(argv[argi++]);
277  WHICHFILE=atoi(argv[argi++]);
278  }
279 
280 
281  // no failure variables inputted at run-time, since have to recompile anyways for failure tracking
282 
283 
284 
286  //
287  // General checks (that work if MPI or OpenMP not used)
288  //
290 
291  if(ncpux1>1 && N1==1){
292  stderrfprintf("Cannot have ncpux1=%d>N1=%d\n",ncpux1,N1);
293  exit(1);
294  }
295  if(ncpux2>1 && N2==1){
296  stderrfprintf("Cannot have ncpux2=%d>N2=%d\n",ncpux2,N2);
297  exit(1);
298  }
299  if(ncpux3>1 && N3==1){
300  stderrfprintf("Cannot have ncpux3=%d>N3=%d\n",ncpux3,N3);
301  exit(1);
302  }
303 
304 
305  if(numopenmpthreads>N1*N2*N3) stderrfprintf("OpenMP threads (%d) larger than total number of points (%d), so parallelization will be poor\n",numopenmpthreads,N1*N2*N3);
306 
307 
308 
309 
311  //
312  // get total number of processors
313  //
316 
317  // test
318  if(sizeproclist_grmhd!=numprocs){
319  stderrfprintf( "Got (sizeproclist_grmhd=%d) != (numprocs=ncpux1*ncpux2*ncpux3=%d). ncpux1=%d ncpux2=%d ncpux3=%d. Did you run without mpirun?\n",sizeproclist_grmhd,numprocs,ncpux1,ncpux2,ncpux3);
320  exit(1);
321  }
322 
323 
324  myfprintf(stderr,
325  "numprocs=%d ncpux1=%d ncpux2=%d ncpux3=%d numopenmpthreads=%d :: percpusize: N1=%d N2=%d N3=%d\n",
326  numprocs, ncpux1, ncpux2, ncpux3, numopenmpthreads, N1, N2,N3);
327 
328 
329 }
330 
331 
332 
333 
334 void init_genfiles(int gopp)
335 {
336  char temps[MAXFILENAME];
337  char extension[MAXFILENAME];
338  char binarytype[MAXFILENAME];
339  void set_binarytype(char *binarytype);
340 
341 
342  stderrfprintf( "begin: init_genfiles ... ");
343  fflush(stderr);
344 
345  if (gopp == 1) {
346  strcpy(extension, PPEXT);
347  } else if (gopp == 0) {
348  strcpy(extension, OUTEXT);
349  }
350  // always have fail and general log open
351 
352  if(PRODUCTION<=2 && myid==0 || PRODUCTION<=1){
353  sprintf(temps, "%s0_fail%s%s", DATADIR, extension, myidtxt);
354  if ((fail_file = fopen(temps, "at")) == NULL) {
355  stderrfprintf( "fail: Cannot open: %s\n", temps);
356  exit(1);
357  }
358  stderrfprintf( "opened: %s\n", temps);
359  }
360 
361 
362  if(PRODUCTION<=2 && myid==0 || PRODUCTION<=1){
363  sprintf(temps, "%s0_log%s%s", DATADIR, extension, myidtxt);
364  if ((log_file = fopen(temps, "at")) == NULL) {
365  stderrfprintf( "log: Cannot open: %s\n", temps);
366  exit(1);
367  }
368  stderrfprintf( "opened: %s\n", temps);
369  fprintf(log_file, "fail_file: %d log_file: %d\n", (int)fail_file,
370  (int)log_file);
371  fflush(log_file);
372  }
373 
374 
375  if(PRODUCTION<=2 && myid==0 || PRODUCTION<=1){
376  sprintf(temps, "%s0_logdt%s%s", DATADIR, extension, myidtxt);
377  if ((logdt_file = fopen(temps, "at")) == NULL) {
378  stderrfprintf( "logdt: Cannot open: %s\n", temps);
379  exit(1);
380  }
381  stderrfprintf( "opened: %s\n", temps);
382  fflush(logdt_file);
383  }
384 
385 
386 
387 
388 
389  if (myid == 0) {
390 
391  set_binarytype(binarytype);
392 
393 
394  sprintf(temps, "%s0_logfull%s%s", DATADIR, binarytype, extension);
395  if ((logfull_file = fopen(temps, "at")) == NULL) {
396  stderrfprintf( "logfull: Cannot open: %s\n", temps);
397  exit(1);
398  }
399  stderrfprintf( "opened: %s\n", temps);
400  fprintf(logfull_file, "logfull_file: %d \n", (int)logfull_file);
401  fflush(logfull_file);
402 
403 
404  sprintf(temps, "%s0_logdtfull%s%s", DATADIR, binarytype, extension);
405  if ((logdtfull_file = fopen(temps, "at")) == NULL) {
406  stderrfprintf( "logdtfull: Cannot open: %s\n", temps);
407  exit(1);
408  }
409  stderrfprintf( "opened: %s\n", temps);
410  fprintf(logdtfull_file, "logdtfull_file: %d \n", (int)logdtfull_file);
411  fflush(logdtfull_file);
412 
413  sprintf(temps, "%s0_logstep%s%s", DATADIR, binarytype, extension);
414  if ((logstep_file = fopen(temps, "at")) == NULL) {
415  stderrfprintf( "logstep: Cannot open: %s\n", temps);
416  exit(1);
417  }
418  stderrfprintf( "opened: %s\n", temps);
419 
420  sprintf(temps, "%s0_logperf%s%s", DATADIR, binarytype, extension);
421  if ((logperf_file = fopen(temps, "at")) == NULL) {
422  stderrfprintf( "logperf: Cannot open: %s\n", temps);
423  exit(1);
424  }
425  stderrfprintf( "opened: %s\n", temps);
426 
427 
428 
429  }
430 
431 
432  // ok now
433  trifprintf("end: init_genfiles\n");
434 }
435 
436 
437 
438 void set_binarytype(char *binarytype)
439 {
440  if(DOINGGRMHDTYPECODE){
441  sprintf(binarytype,".grmhd");
442  }
443  else if(DOINGGRRAYTYPECODE){
444  sprintf(binarytype,".grray");
445  }
446  else if(DOINGLIAISONTYPECODE){
447  sprintf(binarytype,".liaison");
448  }
449 }
450 
451 
452 
455 void set_primgridpos(void)
456 {
457  int dir,pl,pliter;
458 
459  // all centered
461 
462  // pstag (centered except along dir)
463  DIRLOOP(dir) PALLLOOP(pl){
464  if(
465  ((dir==X1DN || dir==X1UP) && pl==B1)
466  ||((dir==X2DN || dir==X2UP) && pl==B2)
467  ||((dir==X3DN || dir==X3UP) && pl==B3)
468  ){
470  }
471  else{
473  }
474  }
475 
476 
477  // all staggered (since F1 along dir=1, etc.)
479 
480 
481  // vpot (centered except along perp to dir for A_i)
482  DIRLOOP(dir) DLOOPA(pl){
483  if(
484  ((dir==X2DN || dir==X2UP || dir==X3DN || dir==X3UP) && pl==1)
485  ||((dir==X1DN || dir==X1UP || dir==X3DN || dir==X3UP) && pl==2)
486  ||((dir==X1DN || dir==X1UP || dir==X2DN || dir==X2UP) && pl==3)
487  ){
489  }
490  else{
492  }
493  }
494 
495 
496  // all centered
497  DIRLOOP(dir) PALLLOOP(pl) primgridpos[BOUNDINTTYPE][dir][pl]=CENTGRID;
498 
499 
500 }
501 
502 
503 
504 
506 void init_placeongrid_gridlocation(void)
507 {
508  // 3's were COMPDIM, but below code is fixed to require all 3 now
509  int i, j, m, l;
510  int N[3 + 1];
511 
512 
513  trifprintf("begin: init_placeongrid_gridlocation ... ");
514 
515 
516 
518  //
519  // Set CPU and grid size information
520  //
522  N[1] = N1;
523  N[2] = N2;
524  N[3] = N3;
525 
526  numbercpu[1] = ncpux1;
527  numbercpu[2] = ncpux2;
528  numbercpu[3] = ncpux3;
529 
530  mycpupos[1]=myid%ncpux1;
531  mycpupos[2]=(int)((myid%(ncpux1*ncpux2))/ncpux1);
532  mycpupos[3]=(int)(myid/(ncpux1*ncpux2));
533 
534 
535 
536  for (m = 1; m <= COMPDIM; m++) {
537  startpos[m] = mycpupos[m] * N[m];
538  endpos[m] = (mycpupos[m] + 1) * N[m] - 1;
539 
540  // add up sizes for total size of grid
541  totalsize[m] = 0;
542  itotalsize[m] = 0;
543  for (i = 0; i < numbercpu[m]; i++) {
544  totalsize[m] += N[m];
545  itotalsize[m] += N[m];
546  }
547  }
548 
549  for(m=1;m<=COMPDIM;m++){
550  if((mycpupos0[m]=(int*)malloc(sizeof(int)*numprocs))==NULL){
551  dualfprintf(fail_file,"can't allocate mycpupos0[%d]\n",m);
552  myexit(2845725);
553  }
554  if((startpos0[m]=(int*)malloc(sizeof(int)*numprocs))==NULL){
555  dualfprintf(fail_file,"can't allocate startpos0[%d]\n",m);
556  myexit(2845726);
557  }
558  if((endpos0[m]=(int*)malloc(sizeof(int)*numprocs))==NULL){
559  dualfprintf(fail_file,"can't allocate endpos0[%d]\n",m);
560  myexit(2845727);
561  }
562  }
563  // for cpu=0 as master to rest, needs this info
564  for(i=0;i<numprocs;i++){
565  mycpupos0[1][i]=i%ncpux1;
566  mycpupos0[2][i]=(int)((i%(ncpux1*ncpux2))/ncpux1);
567  mycpupos0[3][i]=(int)(i/(ncpux1*ncpux2));
568 
569 
570  for (m = 1; m <= COMPDIM; m++) {
571  startpos0[m][i] = mycpupos0[m][i] * N[m];
572  endpos0[m][i] = (mycpupos0[m][i] + 1) * N[m] - 1;
573  }
574  }
575 
576 
577 
581 
582 
583 
584 
585 
587  //
588  // output those things that were defined
589  //
591 
592 #if(USEMPI)
593  logfprintf("myid=%d node_name=%s procnamelen=%d\n",myid,processor_name,procnamelen);
594 #endif
595  trifprintf("\nnumprocs(MPI)=%d ncpux1=%d ncpux2=%d ncpux3=%d numopenmpthreads=%d\n",numprocs,ncpux1,ncpux2,ncpux3,numopenmpthreads);
596  trifprintf("\n Per MPI Task: N1=%d N2=%d N3=%d\n",N1,N2,N3);
597  logfprintf("per: %d %d %d\n", periodicx1, periodicx2, periodicx3);
598 
599  for (m = 1; m <= COMPDIM; m++) {
600  logfprintf("mycpupos[%d]: %d\n", m, mycpupos[m]);
601  logfprintf( "startpos[%d]: %d\n", m, startpos[m]);
602  logfprintf( "endpos[%d]: %d\n", m, endpos[m]);
603  logfprintf( "totalsize[%d]: %lld\n", m, totalsize[m]);
604  }
605 
606  trifprintf("totalzones: %d\n", totalzones);
607 
608 
609 
610 
611 
612 
613 
614  trifprintf("end: init_placeongrid_gridlocation\n");
615 
616 
617 }
618 
619 
620 
623 void init_placeongrid_griddecomposition(void)
624 {
625  // 3's were COMPDIM, but below code is fixed to require all 3 now
626  int stage,stagei,stagef;
627  int i, j, m, l;
628 
629  int dir, bti;
630  int opp[3*2];
631  int pl,pliter;
632 
633  int numbnd[NDIM],surfa[NDIM];
634  int numnpr;
635  int gridpos;
636 
637 
638  trifprintf("begin: init_placeongrid_griddecomposition ... ");
639 
640 
641 
643  //
644  // standard interior MPI data transfer setup
645  //
647  for(bti=0;bti<NUMBOUNDTYPES;bti++) for(dir=0;dir<COMPDIM*2;dir++) for(j=0;j<DIRGENNUMVARS;j++){
648  dirgenset[bti][dir][j]=0;
649  }
650 
651  for(bti=0;bti<NUMBOUNDTYPES;bti++) for(dir=0;dir<COMPDIM*2;dir++) for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++) for(j=0;j<DIRLOOPNUMVARS;j++){
652  dirloopset[bti][dir][gridpos][j]=0;
653  }
654  // see where this cpu needs to send/recv
655 
656 
657 
659  //
660  // set variable grid positions as CENT or STAG
661  //
663  set_primgridpos();
664 
665 
667  //
668  // set whether doing particular boundary transfer
669  //
671  for(bti=0;bti<NUMBOUNDTYPES;bti++) { // same for any bounding type
672 
673  if(N1>1){
674  // figure out left/right send/recv
675  if (mycpupos[1] > 0) {
676  dirgenset[bti][X1DN][DIRIF] = 1; // do -x1 dir
677  }
678  if (mycpupos[1] < ncpux1 - 1) {
679  dirgenset[bti][X1UP][DIRIF] = 1; // do +x1 dir
680  }
681 
682  // only do periodic mpi if
683  if(periodicx1&&(ncpux1>1)){
684  if(mycpupos[1]==0) dirgenset[bti][X1DN][DIRIF]=1;
685  else if(mycpupos[1]==ncpux1-1) dirgenset[bti][X1UP][DIRIF]=1;
686  }
687 
688 
689  }
690  else{
691  // then no assume boundaries to copy (i.e. can't have CPU with dimension of length 1 and stack them up -- inefficient anyways)
692  dirgenset[bti][X1UP][DIRIF] = 0;
693  dirgenset[bti][X1DN][DIRIF] = 0;
694  }
695 
696  // figure out up/down send/recv
697  if(N2>1){
698  if (mycpupos[2] > 0) {
699  dirgenset[bti][X2DN][DIRIF] = 1; // -x2 dir
700  }
701  if (mycpupos[2] < ncpux2 - 1) {
702  dirgenset[bti][X2UP][DIRIF] = 1; // towards and from +x2 dir
703  }
704 
705  // can't have both periodicx2 and SPC periodicx3
706  if(periodicx2&&(ncpux2>1)){
707  if(mycpupos[2]==0) dirgenset[bti][X2DN][DIRIF]=1;
708  else if(mycpupos[2]==ncpux2-1) dirgenset[bti][X2UP][DIRIF]=1;
709  }
710  else if(special3dspc&&(ncpux3>1)){
711  // non-reflecting, transmissive boundary conditions
712  if(mycpupos[2]==0) dirgenset[bti][X2DN][DIRIF]=1;
713  else if(mycpupos[2]==ncpux2-1) dirgenset[bti][X2UP][DIRIF]=1;
714  }
715 
716 
717  }
718  else{
719  // then no assume boundaries to copy (i.e. can't have CPU with dimension of length 1 and stack them up -- inefficient anyways)
720  dirgenset[bti][X2UP][DIRIF] = 0;
721  dirgenset[bti][X2DN][DIRIF] = 0;
722  }
723 
724  // figure out out/in send/recv
725  if(N3>1){
726  if (mycpupos[3] > 0) {
727  dirgenset[bti][X3DN][DIRIF] = 1; // -x3 dir
728  }
729  if (mycpupos[3] < ncpux3 - 1) {
730  dirgenset[bti][X3UP][DIRIF] = 1; // towards and from +x3 dir
731  }
732 
733  if(periodicx3&&(ncpux3>1)){
734  if(mycpupos[3]==0) dirgenset[bti][X3DN][DIRIF]=1;
735  else if(mycpupos[3]==ncpux3-1) dirgenset[bti][X3UP][DIRIF]=1;
736  }
737 
738 
739  }
740  else{
741  // then no assume boundaries to copy (i.e. can't have CPU with dimension of length 1 and stack them up -- inefficient anyways)
742  dirgenset[bti][X3UP][DIRIF] = 0;
743  dirgenset[bti][X3DN][DIRIF] = 0;
744  }
745  }
746 
747 
748 
750  //
751  // define which direction the other guy communicates. Just a simple way to store this obvious fact
752  //
754  opp[X1DN]=X1UP;
755  opp[X1UP]=X1DN;
756  opp[X2DN]=X2UP;
757  opp[X2UP]=X2DN;
758  opp[X3DN]=X3UP;
759  opp[X3UP]=X3DN;
760 
761 
763  //
764  // set which CPUs communicate to eachother
765  // same for any method (bti)
766  // In setting up communication, assumes the fastest index is x1 then x2 then x3 as slowest index
767  //
769  for(bti=0;bti<NUMBOUNDTYPES;bti++){
770  for(dir=0;dir<COMPDIM*2;dir++){
771  // sets opposite direction
772  dirgenset[bti][dir][DIROPP]=opp[dir];
773 
774 
775  // matching CPU to transfer to/from
776 
777  // x1
778  if((dir==X1UP)||(dir==X1DN)){
779  if(ncpux1>1 &&
780  (
781  ((mycpupos[1]>0)&&(mycpupos[1]<ncpux1-1)) // interior CPUs
782  || (mycpupos[1]==0 && dir==X1UP) // inner-CPUs pointing up
783  || (mycpupos[1]==ncpux1-1 && dir==X1DN) // outer-CPUs pointing down
784  )
785  ){
786  if(dir==X1UP) dirgenset[bti][dir][DIROTHER]=myid+1;
787  if(dir==X1DN) dirgenset[bti][dir][DIROTHER]=myid-1;
788  }
789  else if(periodicx1 && ncpux1>1){ // here X1DN/X1UP are implicitly associated with 0/ncpux1-1
790  if(mycpupos[1]==0 && dir==X1DN) dirgenset[bti][dir][DIROTHER]=myid+(ncpux1-1); // for X1DN
791  else if(mycpupos[1]==ncpux1-1 && dir==X1UP) dirgenset[bti][dir][DIROTHER]=myid-(ncpux1-1); // for X1UP
792  }
793  }
794 
795  // x2
796  if((dir==X2UP)||(dir==X2DN)){
797  if(ncpux2>1 &&
798  (
799  (mycpupos[2]>0 && mycpupos[2]<ncpux2-1) // interior CPU
800  || (mycpupos[2]==0 && dir==X2UP) // exterior CPU connected to interior
801  || (mycpupos[2]==ncpux2-1 && dir==X2DN) // exterior CPU connected to interior
802  )
803  ){
804  if(dir==X2UP) dirgenset[bti][dir][DIROTHER]=myid+ncpux1;
805  if(dir==X2DN) dirgenset[bti][dir][DIROTHER]=myid-ncpux1;
806  }
807  else if(periodicx2 && ncpux2>1){
808  if(mycpupos[2]==0 && dir==X2DN) dirgenset[bti][dir][DIROTHER]=myid+(ncpux2-1)*ncpux1;
809  else if(mycpupos[2]==ncpux2-1 && dir==X2UP) dirgenset[bti][dir][DIROTHER]=myid-(ncpux2-1)*ncpux1;
810  }
811  else if(special3dspc&&(ncpux3>1)){
812 
813  if(ncpux3%2){
814  // must have ncpux3 as even
815  dualfprintf(fail_file,"ncpux3=%d must be even for polar transmissive BCs\n",ncpux3);
816  myexit(33676958);
817  }
818  // see placeongrid.nb
819  // spherical polar wrapping
820  int othercpupos1 = mycpupos[1];
821  int othercpupos2 = mycpupos[2];
822  int othercpupos3 = (mycpupos[3] + (int)ncpux3/2)%ncpux3;
823  int othermyid = othercpupos1 + othercpupos2*ncpux1 + othercpupos3*ncpux1*ncpux2;
824  if(mycpupos[2]==0 && dir==X2DN){
825  dirgenset[bti][dir][DIROTHER] = othermyid;
826  dirgenset[bti][dir][DIROPP]=X2DN; // X2DN communicates with X2DN on other CPU
827  }
828  else if(mycpupos[2]==ncpux2-1 && dir==X2UP){
829  dirgenset[bti][dir][DIROTHER] = othermyid;
830  dirgenset[bti][dir][DIROPP]=X2UP; // X2UP communicates with X2UP on other CPU
831  }
832  }
833  }
834 
835  // x3
836  if((dir==X3UP)||(dir==X3DN)){
837  if(ncpux3>1 &&
838  (
839  ((mycpupos[3]>0)&&(mycpupos[3]<ncpux3-1))
840  || (mycpupos[3]==0 && dir==X3UP)
841  || (mycpupos[3]==ncpux3-1 && dir==X3DN)
842  )
843  ){
844  if(dir==X3UP) dirgenset[bti][dir][DIROTHER]=myid+ncpux1*ncpux2;
845  if(dir==X3DN) dirgenset[bti][dir][DIROTHER]=myid-ncpux1*ncpux2;
846  }
847  else if(periodicx3 && ncpux3>1){
848  if(mycpupos[3]==0 && dir==X3DN) dirgenset[bti][dir][DIROTHER]=myid+(ncpux3-1)*ncpux1*ncpux2;
849  else if(mycpupos[3]==ncpux3-1 && dir==X3UP) dirgenset[bti][dir][DIROTHER]=myid-(ncpux3-1)*ncpux1*ncpux2;
850  }
851  }
852 
853  // MPI tags that label transfer, must be unique while doing multiple transfers
854  // Send and Receive tags must match expected communications (Note that first level choice of CPU transfers is choosing which CPUs, but this tag identifies which transfer among all transfers as relevant between those CPUs)
855  // Normally all communications are X1DN (cpu#1) -> X1UP (cpu#2), and likewise.
856  // For SPC polar transfer, we have instead X1DN (cpu#1) -> X1DN (cpu#2). This is accounted for by overriding default DIROPP
857  // send tag (set send tag as base tag identified by current CPU id (myid) and direction pointing out of grid (dir)
858  dirgenset[bti][dir][DIRTAGS]= myid * COMPDIM * 2 + dir;
859  // receive tag (must match how send tag set so that receive using send tag according to other CPU)
860  dirgenset[bti][dir][DIRTAGR]= dirgenset[bti][dir][DIROTHER] * COMPDIM * 2 + dirgenset[bti][dir][DIROPP];
861 
863  //
864  // tags are defined by sender's ID and direction sent
865  // tag=(myid*COMPDIM*2)+{0,1,2,3,4,5}
866  // 0=right, 1=up,2=left,3=down,4=out,5=in
867  // works/v bc[1=output/2=input][0,1,2,3,4,5]
868  // so sends are like: (sendtoid,myid*COMPDIM*2+?) and recv's are
869  // like: (fromid,otherid*COMPDIM*2+*) where ? and * are
870  // opposites(i.e. 0 and 2, 1 and 3, 4 and 5 or non-opposites if doing polar transmissive BCs)
871  //
873 
874  }// end over dir
875  }// end over bti
876 
877 
878 
880  //
881  // set transfer sizes
882  //
884  for(bti=0;bti<NUMBOUNDTYPES;bti++){
885 
887  //
888  // get number of boundary cells and number of quantities to bound
889  //
891  set_numbnd(bti, numbnd, &numnpr);
892 
893 
894  // loop over directions
895  for(dir=0;dir<COMPDIM*2;dir++){
896 
898  //
899  // enter below if doing that particular direction for this CPU
900  //
902  if(dirgenset[bti][dir][DIRIF]){
903 
904 
906  //
907  // set number of variable types to transfer
908  //
910  if(bti==BOUNDPRIMTYPE || bti==BOUNDPRIMSIMPLETYPE || bti==BOUNDPSTAGTYPE || bti==BOUNDPSTAGSIMPLETYPE || bti==BOUNDINTTYPE ){
911  dirgenset[bti][dir][DIRNUMPR]=NPRBOUND; // not used if SPLITNPR==1 or doing general range for quantities // not used for BOUNDINTTYPE
912  }
913  else if(bti==BOUNDFLUXTYPE || bti==BOUNDFLUXSIMPLETYPE){
914  dirgenset[bti][dir][DIRNUMPR]=NFLUXBOUND;// not used if SPLITNPR==1 or doing general range for quantities
915  }
916  else if(bti==BOUNDVPOTTYPE || bti==BOUNDVPOTSIMPLETYPE){
917  dirgenset[bti][dir][DIRNUMPR]=NDIM;// used
918  }
919  else{
920  dualfprintf(fail_file,"No such bti=%d setup in set number of variable types in mpi_init.c\n",bti);
921  myexit(246346769);
922  }
923 
925  //
926  // set transfer size
927  //
929 
930  // surface area must be consistent with loops
931  surfa[1]=(N2+numbnd[2]*2)*(N3+numbnd[3]*2)*N1NOT1;
932  surfa[2]=(N1+numbnd[1]*2)*(N3+numbnd[3]*2)*N2NOT1;
933  surfa[3]=(N1+numbnd[1]*2)*(N2+numbnd[2]*2)*N3NOT1;
934 
935  if(
937  || bti==BOUNDPSTAGTYPE || bti==BOUNDPSTAGSIMPLETYPE
938  || bti==BOUNDINTTYPE
939  || bti==BOUNDFLUXTYPE || bti==BOUNDFLUXSIMPLETYPE
940  || bti==BOUNDVPOTTYPE || bti==BOUNDVPOTSIMPLETYPE
941  ){
942  // sets size of transfer for primitive
943  if((dir==X1UP)||(dir==X1DN)) dirgenset[bti][dir][DIRSIZE]=numbnd[1]*surfa[1]*numnpr;
944  else if((dir==X2UP)||(dir==X2DN)) dirgenset[bti][dir][DIRSIZE]=numbnd[2]*surfa[2]*numnpr;
945  else if((dir==X3UP)||(dir==X3DN)) dirgenset[bti][dir][DIRSIZE]=numbnd[3]*surfa[3]*numnpr;
946  }
947  // GODMARK: Why was I doing the below?
948  // else if(
949  // ){
950  // // sets size of transfer for fluxes
951  // // (different for "left" and "right") for flux-types
952  // if(dir==X1UP) dirgenset[bti][dir][DIRSIZE]=numbnd[1]*surfa[1]*numnpr;
953  // else if(dir==X1DN) dirgenset[bti][dir][DIRSIZE]=(numbnd[1]-1)*surfa[1]*numnpr;
954  // else if(dir==X2UP) dirgenset[bti][dir][DIRSIZE]=numbnd[2]*surfa[2]*numnpr;
955  // else if(dir==X2DN) dirgenset[bti][dir][DIRSIZE]=(numbnd[2]-1)*surfa[2]*numnpr;
956  // else if(dir==X3UP) dirgenset[bti][dir][DIRSIZE]=numbnd[3]*surfa[3]*numnpr;
957  // else if(dir==X3DN) dirgenset[bti][dir][DIRSIZE]=(numbnd[3]-1)*surfa[3]*numnpr;
958  // }
959  else{
960  dualfprintf(fail_file,"No such bti=%d setup in set transfer size in mpi_init.c\n",bti);
961  myexit(246346770);
962  }
963 
964  }// end if DIRIF
965  }// over dir's
966  }// end over bti
967 
968 
969 
970 
972  //
973  // Below code implements the following mappings:
974  //
975  // CENT = centered quantities in the direction of information copying
976  // STAG = staggered on faces (pstag for B^{dir} or fluxes F1 for dir=1, etc.) in direction of information copying
977  //
978  // For internal CPU boundary (any i=0..NBND)
979  // CENT: 0+i -> N+i and N-1-i -> -1-i
980  // STAG: 0+i -> N+i and N-1-i -> -1-i (assumes i=N set by first operation)
981  //
982  // For periodic BCs (any i=0..NBND)
983  // Note that same as interior CPU copy
984  // CENT: 0+i -> N+i and N-1-i -> -1-i (so lower CPU controls i=0 boundary)
985  // STAG: 0+i -> N+i and N-1-i -> -1-i (with i=N set by first operation)
986  //
987  // At pole in 3D (for j=0..NBND)
988  // Note copying is in opposite j directions
989  // CENT: 0+j -> -1-j and N-1-j -> N+j
990  // STAG: 0+j -> -0-j and N-0-j -> N+j
991  //
992  // dirloopset[bti] sets limits on all inclusive for loops used in boundmpi.c
993  //
994  // In reduced dimensions (i.e. N=1), below reduces to 0 positions,
995  // but above DIRIF's control whether that direction is bounded or
996  // not and should take care not MPI bounding the reduced dimensions.
997  // for reduced dimensions, normal boundary zones are handled by LOOPS defined in global.h
998  //
1000 
1002  //
1003  // first set BOUNDPRIMTYPE : CENT or FACE1,2,3 quantities that need full bounding
1004  //
1005  // BOUNDPRIMSIMPLETYPE as well
1006  //
1007  // then set BOUNDINTTYPE : CENT or FACE1,2,3 quantities that need full bounding
1008  //
1009  // set BOUNDFLUXTYPE : FACE(1/2/3) quantities -- used to only bound along flux direction (all that's needed for ENO to de-average flux using finite difference method)
1010  //
1011  // set BOUNDVPOTTYPE : perp to dir is staggered except A_0
1012  //
1013  // and BOUNDFLUXSIMPLETYPE too
1015 
1016  for(bti=0;bti<NUMBOUNDTYPES;bti++){
1017 
1018  if(bti==BOUNDPRIMTYPE || bti==BOUNDPRIMSIMPLETYPE || bti==BOUNDPSTAGTYPE || bti==BOUNDPSTAGSIMPLETYPE || bti==BOUNDINTTYPE){
1019  // then can stay
1020  }
1021  else if(bti==BOUNDFLUXTYPE || bti==BOUNDFLUXSIMPLETYPE){
1022  // then can stay
1023  }
1024  else if(bti==BOUNDVPOTTYPE || bti==BOUNDVPOTSIMPLETYPE){
1025  // then can stay
1026  }
1027  else{
1028  dualfprintf(fail_file,"No such bti=%d defined for dirloopset[]\n",bti);
1029  myexit(3468346);
1030  }
1031 
1033  //
1034  // get number of boundary cells and number of quantities to bound for this bti
1035  //
1037  set_numbnd(bti, numbnd, &numnpr);
1038 
1039 
1040  // loop over directions
1041  for(dir=0;dir<COMPDIM*2;dir++){
1042 
1043 
1045  //
1046  // enter below if doing that particular direction for this CPU
1047  //
1049  if(dirgenset[bti][dir][DIRIF]){
1050 
1051  // default factor by which to multiply data
1052  // allows simple transformations on MPI copies
1053  PALLLOOP(pl){
1054  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){
1055  primfactor[bti][dir][gridpos][PACK][pl] =1.0;
1056  primfactor[bti][dir][gridpos][UNPACK][pl]=1.0;
1057  }
1058  }
1059 
1060  // NOTEMARK: Must ensure that # of elements copied is same for PACK and UNPACK (generally numbnd[] BCs in all cases for a given direction along that direction)
1061 
1062 
1064  //
1065  // PACKING quantities (inclusive range for loops)
1066  //
1068  // zones to copy from (packing -- where to copy FROM)
1069  if(dir==X1UP){ // right
1070  gridpos=CENTGRID;
1071  dirloopset[bti][dir][gridpos][DIRPSTART1]=(N1-1)-(numbnd[1]-SHIFT1);
1072  dirloopset[bti][dir][gridpos][DIRPSTOP1] =(N1-1);
1073  dirloopset[bti][dir][gridpos][DIRPDIR1]=+1;
1074 
1075  gridpos=STAGGRID;
1076  dirloopset[bti][dir][gridpos][DIRPSTART1]=(N1-1)-(numbnd[1]-SHIFT1);
1077  dirloopset[bti][dir][gridpos][DIRPSTOP1] =(N1-1);
1078  dirloopset[bti][dir][gridpos][DIRPDIR1]=+1;
1079  }
1080  else if(dir==X1DN){ // left
1081  gridpos=CENTGRID;
1082  dirloopset[bti][dir][gridpos][DIRPSTART1]=+0;
1083  dirloopset[bti][dir][gridpos][DIRPSTOP1] =+0+(numbnd[1]-SHIFT1);
1084  dirloopset[bti][dir][gridpos][DIRPDIR1]=+1;
1085 
1086  gridpos=STAGGRID;
1087  dirloopset[bti][dir][gridpos][DIRPSTART1]=+0;
1088  dirloopset[bti][dir][gridpos][DIRPSTOP1] =+0+(numbnd[1]-SHIFT1);
1089  dirloopset[bti][dir][gridpos][DIRPDIR1]=+1;
1090  }
1091 
1092  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){ // stag and cent same for off-dir directions. Both are equivalent to CENTGRID
1093  if((dir==X1UP)||(dir==X1DN)){
1094  dirloopset[bti][dir][gridpos][DIRPSTART2]=0 -numbnd[2];
1095  dirloopset[bti][dir][gridpos][DIRPSTOP2] =N2-1+numbnd[2];
1096  dirloopset[bti][dir][gridpos][DIRPDIR2]=+1;
1097  dirloopset[bti][dir][gridpos][DIRPSTART3]=0 -numbnd[3];
1098  dirloopset[bti][dir][gridpos][DIRPSTOP3] =N3-1+numbnd[3];
1099  dirloopset[bti][dir][gridpos][DIRPDIR3]=+1;
1100  }
1101  }
1102 
1103 
1104 
1105  // below also correct for "if(periodicx3&&(ncpux3>1)&&ISSPCMCOORDNATIVE(MCOORD))"
1106  // for ISSPC, assumes "j=0" at \theta=0 and "j=N2" at \theta=\pi is copied to other CPUs.
1107  // mycpupos[3]<ncpux3/2 CPUs dominate others for polar value of B2, but all consistent in the end!
1108  // That should only affect things to machine accuracy since both poles should evolve polar B2 the same.
1109  //
1110  // Note that this special polar copy is unlike was setup, where copied j=0 and j=N2-1 effectively.
1111  if(special3dspc&&(ncpux3>1)&&(mycpupos[2]==0 && dir==X2DN || mycpupos[2]==ncpux2-1 && dir==X2UP) ){
1112  if(dir==X2UP){ // up
1113  gridpos=CENTGRID;
1114  dirloopset[bti][dir][gridpos][DIRPSTART2]=(N2-1); // inverted order
1115  dirloopset[bti][dir][gridpos][DIRPSTOP2] =(N2-1)-(numbnd[2]-SHIFT2); //N2-numbnd[2];
1116  dirloopset[bti][dir][gridpos][DIRPDIR2]=-1;
1117 
1118  gridpos=STAGGRID;
1119  // mycpupos[3]<ncpux3/2 packs j=N2
1120 
1121  if(mycpupos[3]<ncpux3/2) dirloopset[bti][dir][gridpos][DIRPSTART2]=(N2-1+SHIFT2); // inverted order // diff compared to non-pole // includes j=N2 right at pole
1122  else dirloopset[bti][dir][gridpos][DIRPSTART2]=(N2-1+SHIFT2)-(SHIFT2); // inverted order // do not pack j=N2 right at pole since will come from matching CPU
1123 
1124  dirloopset[bti][dir][gridpos][DIRPSTOP2] =(N2-1+SHIFT2)-(numbnd[2]-SHIFT2); //N2-numbnd[2];
1125  dirloopset[bti][dir][gridpos][DIRPDIR2]=-1;
1126 
1127 
1128  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){
1129  if(bti==BOUNDPRIMTYPE || bti==BOUNDPRIMSIMPLETYPE || bti==BOUNDPSTAGTYPE || bti==BOUNDPSTAGSIMPLETYPE ){
1130  // at both poles we flip signature of B2 and U2 only
1131  // Here we flip upon packing
1132  primfactor[bti][dir][gridpos][PACK][URAD1]=primfactor[bti][dir][gridpos][PACK][U1]=SIGNFLIPU1;
1133  primfactor[bti][dir][gridpos][PACK][B1]=SIGNFLIPB1;
1134  primfactor[bti][dir][gridpos][PACK][URAD2]=primfactor[bti][dir][gridpos][PACK][U2]=SIGNFLIPU2;
1135  primfactor[bti][dir][gridpos][PACK][B2]=SIGNFLIPB2;
1136  // NOTEMARK: if only interpolate U3 and B3 across pole and not \detg U3 and \detg B3 (with FLIPGDETAXIS==1), then have to flip sign across pole to avoid jump in U3 and B3 at pole. Then, U3 and B3 will be an extremum and reduce to lower order but not have a dissipation term in the EMF-type flux calculation.
1137  primfactor[bti][dir][gridpos][PACK][URAD3]=primfactor[bti][dir][gridpos][PACK][U3]=SIGNFLIPU3;
1138  primfactor[bti][dir][gridpos][PACK][B3]=SIGNFLIPB3;
1139  }
1140  else if(bti==BOUNDFLUXTYPE || bti==BOUNDFLUXSIMPLETYPE){
1141  // process sign while packing
1142  PALLLOOP(pl) primfactor[bti][dir][gridpos][PACK][pl]=SIGNFLIPGDET; // (e.g. gdet T^2_1. Assuming primitives are correct on active domain, then T^2_1 would be opposite signs for continuous flow through pole, but gdet has kink, so product has no kink)
1143  primfactor[bti][dir][gridpos][PACK][URAD2]=primfactor[bti][dir][gridpos][PACK][U2]=-SIGNFLIPGDET; // \detg T^2_2
1144  primfactor[bti][dir][gridpos][PACK][B2]=-SIGNFLIPGDET; // Note that F^2_{B2) = 0, so doesn't matter, but maintain consistency
1145  primfactor[bti][dir][gridpos][PACK][URAD3]=primfactor[bti][dir][gridpos][PACK][U3]=-SIGNFLIPGDET; // \detg T^2_3 is like \detg T^2_2 as far as sign if don't interpolate \detg U3 and \detg B3 across pole.
1146  primfactor[bti][dir][gridpos][PACK][B3]=-SIGNFLIPGDET; // F^2_{B3) like T^2_3 like T^2_2
1147  // No need to handle T^2_3
1148  }
1149  else if(bti==BOUNDVPOTTYPE || bti==BOUNDVPOTSIMPLETYPE){
1150  // flip while packing
1151  DLOOPA(pl) primfactor[bti][dir][gridpos][PACK][pl]=-SIGNFLIPGDET; // A_1 A_3 : These point in 1 and 3 directions like scalars, but gdet-compressed at pole with kink, so need to unkink
1152  primfactor[bti][dir][gridpos][PACK][2]=SIGNFLIPGDET; // A_2 (points into axis but with gdet, so as if gdet*B2)
1153  }
1154  }// end over gridpos
1155  }
1156  else if(dir==X2DN){ // down
1157  gridpos=CENTGRID;
1158  dirloopset[bti][dir][gridpos][DIRPSTART2]=0;
1159  dirloopset[bti][dir][gridpos][DIRPSTOP2] =0+(numbnd[2]-SHIFT2);
1160  dirloopset[bti][dir][gridpos][DIRPDIR2]=+1;
1161 
1162  gridpos=STAGGRID;
1163  // mycpupos[3]<ncpux3/2 packs j=0
1164  if(mycpupos[3]<ncpux3/2) dirloopset[bti][dir][gridpos][DIRPSTART2]=0; // includes j=0 right at pole
1165  else dirloopset[bti][dir][gridpos][DIRPSTART2]=SHIFT2; // doesn't includes j=0 right at pole
1166 
1167  dirloopset[bti][dir][gridpos][DIRPSTOP2] =0+(numbnd[2]-SHIFT2);
1168  dirloopset[bti][dir][gridpos][DIRPDIR2]=+1;
1169  }
1170  }
1171  else{
1172  // Old treatment of pole
1173  if(dir==X2UP){ // up
1174  gridpos=CENTGRID;
1175  dirloopset[bti][dir][gridpos][DIRPSTART2]=(N2-1)-(numbnd[2]-SHIFT2); //N2-numbnd[2];
1176  dirloopset[bti][dir][gridpos][DIRPSTOP2] =(N2-1);
1177  dirloopset[bti][dir][gridpos][DIRPDIR2]=+1;
1178 
1179  gridpos=STAGGRID;
1180  dirloopset[bti][dir][gridpos][DIRPSTART2]=(N2-1)-(numbnd[2]-SHIFT2); //N2-numbnd[2];
1181  dirloopset[bti][dir][gridpos][DIRPSTOP2] =(N2-1);
1182  dirloopset[bti][dir][gridpos][DIRPDIR2]=+1;
1183  }
1184  else if(dir==X2DN){ // down
1185  gridpos=CENTGRID;
1186  dirloopset[bti][dir][gridpos][DIRPSTART2]=0;
1187  dirloopset[bti][dir][gridpos][DIRPSTOP2] =0+(numbnd[2]-SHIFT2);
1188  dirloopset[bti][dir][gridpos][DIRPDIR2]=+1;
1189 
1190  gridpos=STAGGRID;
1191  dirloopset[bti][dir][gridpos][DIRPSTART2]=0;
1192  dirloopset[bti][dir][gridpos][DIRPSTOP2] =0+(numbnd[2]-SHIFT2);
1193  dirloopset[bti][dir][gridpos][DIRPDIR2]=+1;
1194  }
1195  }
1196 
1197  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){ // stag and cent same for off-dir directions. Both are equivalent to CENTGRID
1198  if((dir==X2UP)||(dir==X2DN)){
1199  dirloopset[bti][dir][gridpos][DIRPSTART1]=-numbnd[1];
1200  dirloopset[bti][dir][gridpos][DIRPSTOP1]=N1-1+numbnd[1];
1201  dirloopset[bti][dir][gridpos][DIRPDIR1]=+1;
1202  dirloopset[bti][dir][gridpos][DIRPSTART3]=-numbnd[3];
1203  dirloopset[bti][dir][gridpos][DIRPSTOP3]=N3-1+numbnd[3];
1204  dirloopset[bti][dir][gridpos][DIRPDIR3]=+1;
1205  }
1206  }
1207 
1208 
1209 
1210  if(dir==X3UP){ // up
1211  gridpos=CENTGRID;
1212  dirloopset[bti][dir][gridpos][DIRPSTART3]=(N3-1)-(numbnd[3]-SHIFT3); //N3-numbnd[3];
1213  dirloopset[bti][dir][gridpos][DIRPSTOP3] =(N3-1);
1214  dirloopset[bti][dir][gridpos][DIRPDIR3]=+1;
1215 
1216  gridpos=STAGGRID;
1217  dirloopset[bti][dir][gridpos][DIRPSTART3]=(N3-1)-(numbnd[3]-SHIFT3); //N3-numbnd[3];
1218  dirloopset[bti][dir][gridpos][DIRPSTOP3] =(N3-1);
1219  dirloopset[bti][dir][gridpos][DIRPDIR3]=+1;
1220  }
1221  else if(dir==X3DN){ // down
1222  gridpos=CENTGRID;
1223  dirloopset[bti][dir][gridpos][DIRPSTART3]=+0;
1224  dirloopset[bti][dir][gridpos][DIRPSTOP3] =+0+(numbnd[3]-SHIFT3);
1225  dirloopset[bti][dir][gridpos][DIRPDIR3]=+1;
1226 
1227  gridpos=STAGGRID;
1228  dirloopset[bti][dir][gridpos][DIRPSTART3]=+0;
1229  dirloopset[bti][dir][gridpos][DIRPSTOP3] =+0+(numbnd[3]-SHIFT3);
1230  dirloopset[bti][dir][gridpos][DIRPDIR3]=+1;
1231  }
1232 
1233  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){ // stag and cent same for off-dir directions. Both are equivalent to CENTGRID
1234  if((dir==X3UP)||(dir==X3DN)){
1235  dirloopset[bti][dir][gridpos][DIRPSTART1]=-numbnd[1];
1236  dirloopset[bti][dir][gridpos][DIRPSTOP1]=N1-1+numbnd[1];
1237  dirloopset[bti][dir][gridpos][DIRPDIR1]=+1;
1238  dirloopset[bti][dir][gridpos][DIRPSTART2]=-numbnd[2];
1239  dirloopset[bti][dir][gridpos][DIRPSTOP2]=N2-1+numbnd[2];
1240  dirloopset[bti][dir][gridpos][DIRPDIR2]=+1;
1241  }
1242  }
1243 
1244 
1245 
1246 
1247 
1248 
1250  //
1251  // UNPACKING quantities
1252  //
1254  // zones to copy into (unpacking -- where to copy INTO)
1255 
1256  // x1
1257  if(dir==X1UP){ // right
1258  gridpos=CENTGRID;
1259  dirloopset[bti][dir][gridpos][DIRUSTART1]=(N1-1+SHIFT1);
1260  dirloopset[bti][dir][gridpos][DIRUSTOP1] =(N1-1+SHIFT1)+(numbnd[1]-SHIFT1);
1261  dirloopset[bti][dir][gridpos][DIRUDIR1]=+1;
1262 
1263  gridpos=STAGGRID;
1264  dirloopset[bti][dir][gridpos][DIRUSTART1]=(N1-1+SHIFT1);
1265  dirloopset[bti][dir][gridpos][DIRUSTOP1] =(N1-1+SHIFT1)+(numbnd[1]-SHIFT1);
1266  dirloopset[bti][dir][gridpos][DIRUDIR1]=+1;
1267  }
1268  else if(dir==X1DN){ // left
1269  gridpos=CENTGRID;
1270  dirloopset[bti][dir][gridpos][DIRUSTART1]=-SHIFT1-(numbnd[1]-SHIFT1);
1271  dirloopset[bti][dir][gridpos][DIRUSTOP1] =-SHIFT1;
1272  dirloopset[bti][dir][gridpos][DIRUDIR1]=+1;
1273 
1274  gridpos=STAGGRID;
1275  dirloopset[bti][dir][gridpos][DIRUSTART1]=-SHIFT1-(numbnd[1]-SHIFT1);
1276  dirloopset[bti][dir][gridpos][DIRUSTOP1] =-SHIFT1;
1277  dirloopset[bti][dir][gridpos][DIRUDIR1]=+1;
1278  }
1279  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){ // stag and cent same for off-dir directions. Both are equivalent to CENTGRID
1280  if((dir==X1UP)||(dir==X1DN)){
1281  dirloopset[bti][dir][gridpos][DIRUSTART2]=-numbnd[2];
1282  dirloopset[bti][dir][gridpos][DIRUSTOP2]=N2-1+numbnd[2];
1283  dirloopset[bti][dir][gridpos][DIRUDIR2]=+1;
1284  dirloopset[bti][dir][gridpos][DIRUSTART3]=-numbnd[3];
1285  dirloopset[bti][dir][gridpos][DIRUSTOP3]=N3-1+numbnd[3];
1286  dirloopset[bti][dir][gridpos][DIRUDIR3]=+1;
1287  }
1288  }
1289 
1290  // normal bc has 0,1,2 -> N,N+1,N+2 for both pcent and pstag
1291  // for "if(periodicx3&&(ncpux3>1)&&ISSPCMCOORDNATIVE(MCOORD))" below gives: 0,1,2 -> 0,-1,-2 for pstag and 0,1,2 -> -1,-2,-3 for pcent
1292  // When inverting v^\theta -> -v^\theta, this effectively inverts order in x2 direction too
1293  // x2
1294  if(special3dspc&&(ncpux3>1)&&(mycpupos[2]==0 && dir==X2DN || mycpupos[2]==ncpux2-1 && dir==X2UP)){
1295  if(dir==X2UP){ // up
1296  gridpos=CENTGRID;
1297  dirloopset[bti][dir][gridpos][DIRUSTART2]=(N2-1+SHIFT2);
1298  dirloopset[bti][dir][gridpos][DIRUSTOP2] =(N2-1+SHIFT2)+(numbnd[2]-SHIFT2);
1299  dirloopset[bti][dir][gridpos][DIRUDIR2]=+1;
1300 
1301  gridpos=STAGGRID;
1302  // mycpupos[3]<ncpux3/2 packs j=N2,0, so mycpupos[3]>=ncpux3/2 unpacks j=N2,0
1303  if(mycpupos[3]<ncpux3/2) dirloopset[bti][dir][gridpos][DIRUSTART2]=(N2-1+SHIFT2)+(SHIFT2);
1304  else dirloopset[bti][dir][gridpos][DIRUSTART2]=(N2-1+SHIFT2); // includes from j=N2
1305 
1306  dirloopset[bti][dir][gridpos][DIRUSTOP2] =(N2-1+SHIFT2)+(numbnd[2]-SHIFT2);
1307  dirloopset[bti][dir][gridpos][DIRUDIR2]=+1;
1308  }
1309  else if(dir==X2DN){ // down
1310  gridpos=CENTGRID;
1311  dirloopset[bti][dir][gridpos][DIRUSTART2]=-SHIFT2; // inverted order compared to packing
1312  dirloopset[bti][dir][gridpos][DIRUSTOP2] =-SHIFT2-(numbnd[2]-SHIFT2);
1313  dirloopset[bti][dir][gridpos][DIRUDIR2]=-1;
1314 
1315  gridpos=STAGGRID;
1316  // mycpupos[3]<ncpux3/2 packs j=N2,0, so mycpupos[3]>=ncpux3/2 unpacks j=N2,0
1317  if(mycpupos[3]<ncpux3/2) dirloopset[bti][dir][gridpos][DIRUSTART2]=-SHIFT2; // inverted order compared to packing
1318  else dirloopset[bti][dir][gridpos][DIRUSTART2]=-0; // diff due to pole copy // inverted order compared to packing
1319 
1320  dirloopset[bti][dir][gridpos][DIRUSTOP2] =-0-(numbnd[2]-SHIFT2);
1321  dirloopset[bti][dir][gridpos][DIRUDIR2]=-1;
1322 
1323  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){
1324  if(bti==BOUNDPRIMTYPE || bti==BOUNDPRIMSIMPLETYPE || bti==BOUNDPSTAGTYPE || bti==BOUNDPSTAGSIMPLETYPE ){
1325  // at both poles we flip signature of B2 and U2 only
1326  // Here we flip upon unpacking
1327  primfactor[bti][dir][gridpos][UNPACK][URAD1]=primfactor[bti][dir][gridpos][UNPACK][U1]=SIGNFLIPU1;
1328  primfactor[bti][dir][gridpos][UNPACK][B1]=SIGNFLIPB1;
1329  primfactor[bti][dir][gridpos][UNPACK][URAD2]=primfactor[bti][dir][gridpos][UNPACK][U2]=SIGNFLIPU2;
1330  primfactor[bti][dir][gridpos][UNPACK][B2]=SIGNFLIPB2;
1331  // again assumes U3 and B3 interpolated across pole and not \detg U3 and \detg B3
1332  primfactor[bti][dir][gridpos][UNPACK][URAD3]=primfactor[bti][dir][gridpos][UNPACK][U3]=SIGNFLIPU3;
1333  primfactor[bti][dir][gridpos][UNPACK][B3]=SIGNFLIPB3;
1334  }
1335  else if(bti==BOUNDFLUXTYPE || bti==BOUNDFLUXSIMPLETYPE){
1336  // Here we flip upon unpacking
1337  PALLLOOP(pl) primfactor[bti][dir][gridpos][UNPACK][pl]=SIGNFLIPGDET; // gdet T^2_1, so like gdet*B2, and kink avoided if don't flip sign since B2 standard in active domains with sign change itself in active domains.
1338  // override for symmetric quantities
1339  primfactor[bti][dir][gridpos][UNPACK][URAD2]=primfactor[bti][dir][gridpos][UNPACK][U2]=-SIGNFLIPGDET; // \detg T^2_2 , avoid kink must flip sign
1340  primfactor[bti][dir][gridpos][UNPACK][B2]=-SIGNFLIPGDET; // Note that F^2_{B2) = 0, so doesn't matter, but maintain consistency
1341  primfactor[bti][dir][gridpos][UNPACK][URAD3]=primfactor[bti][dir][gridpos][UNPACK][U3]=-SIGNFLIPGDET; // \detg T^2_3 like \detg T^2_3 if U3&B3 (not \detg U3&B3) interpolated
1342  primfactor[bti][dir][gridpos][UNPACK][B3]=-SIGNFLIPGDET; // F^2_{B3) like T^2_3 like T^2_2
1343  }
1344  else if(bti==BOUNDVPOTTYPE || bti==BOUNDVPOTSIMPLETYPE){
1345  // Here we flip upon unpacking
1346  DLOOPA(pl) primfactor[bti][dir][gridpos][UNPACK][pl]=-SIGNFLIPGDET; // A_0 A_1 A_3 like scalars, but compressed by gdet. So flip sign so no kink
1347  primfactor[bti][dir][gridpos][UNPACK][2]=SIGNFLIPGDET; // A_2 like gdet B2. A_2 will have opposite sign across pole in active domains, but gdet + in both, so avoid flipping so that A_2 has no kink at pole.
1348  }
1349  }// end over gridpos
1350 
1351  }// end dir==X2DN
1352  }
1353  else{
1354  // Old treatment of pole
1355  if(dir==X2UP){ // up
1356  gridpos=CENTGRID;
1357  dirloopset[bti][dir][gridpos][DIRUSTART2]=(N2-1+SHIFT2);
1358  dirloopset[bti][dir][gridpos][DIRUSTOP2] =(N2-1+SHIFT2)+(numbnd[2]-SHIFT2);
1359  dirloopset[bti][dir][gridpos][DIRUDIR2]=+1;
1360 
1361  gridpos=STAGGRID;
1362  dirloopset[bti][dir][gridpos][DIRUSTART2]=(N2-1+SHIFT2);
1363  dirloopset[bti][dir][gridpos][DIRUSTOP2] =(N2-1+SHIFT2)+(numbnd[2]-SHIFT2);
1364  dirloopset[bti][dir][gridpos][DIRUDIR2]=+1;
1365  }
1366  else if(dir==X2DN){ // down
1367  gridpos=CENTGRID;
1368  dirloopset[bti][dir][gridpos][DIRUSTART2]=-SHIFT2-(numbnd[2]-SHIFT2);
1369  dirloopset[bti][dir][gridpos][DIRUSTOP2] =-SHIFT2;
1370  dirloopset[bti][dir][gridpos][DIRUDIR2]=+1;
1371 
1372  gridpos=STAGGRID;
1373  dirloopset[bti][dir][gridpos][DIRUSTART2]=-SHIFT2-(numbnd[2]-SHIFT2);
1374  dirloopset[bti][dir][gridpos][DIRUSTOP2] =-SHIFT2;
1375  dirloopset[bti][dir][gridpos][DIRUDIR2]=+1;
1376  }
1377  }
1378 
1379  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){ // stag and cent same for off-dir directions. Both are equivalent to CENTGRID
1380  if((dir==X2UP)||(dir==X2DN)){
1381  dirloopset[bti][dir][gridpos][DIRUSTART1]=-numbnd[1];
1382  dirloopset[bti][dir][gridpos][DIRUSTOP1]=N1-1+numbnd[1];
1383  dirloopset[bti][dir][gridpos][DIRUDIR1]=+1;
1384  dirloopset[bti][dir][gridpos][DIRUSTART3]=-numbnd[3];
1385  dirloopset[bti][dir][gridpos][DIRUSTOP3]=N3-1+numbnd[3];
1386  dirloopset[bti][dir][gridpos][DIRUDIR3]=+1;
1387  }
1388  }
1389 
1390 
1391  // x3
1392  if(dir==X3UP){ // up
1393  gridpos=CENTGRID;
1394  dirloopset[bti][dir][gridpos][DIRUSTART3]=(N3-1+SHIFT3);
1395  dirloopset[bti][dir][gridpos][DIRUSTOP3] =(N3-1+SHIFT3)+(numbnd[3]-SHIFT3);
1396  dirloopset[bti][dir][gridpos][DIRUDIR3]=+1;
1397 
1398  gridpos=STAGGRID;
1399  dirloopset[bti][dir][gridpos][DIRUSTART3]=(N3-1+SHIFT3);
1400  dirloopset[bti][dir][gridpos][DIRUSTOP3] =(N3-1+SHIFT3)+(numbnd[3]-SHIFT3);
1401  dirloopset[bti][dir][gridpos][DIRUDIR3]=+1;
1402  }
1403  else if(dir==X3DN){ // down
1404  gridpos=CENTGRID;
1405  dirloopset[bti][dir][gridpos][DIRUSTART3]=-SHIFT3-(numbnd[3]-SHIFT3);
1406  dirloopset[bti][dir][gridpos][DIRUSTOP3] =-SHIFT3;
1407  dirloopset[bti][dir][gridpos][DIRUDIR3]=+1;
1408 
1409  gridpos=STAGGRID;
1410  dirloopset[bti][dir][gridpos][DIRUSTART3]=-SHIFT3-(numbnd[3]-SHIFT3);
1411  dirloopset[bti][dir][gridpos][DIRUSTOP3] =-SHIFT3;
1412  dirloopset[bti][dir][gridpos][DIRUDIR3]=+1;
1413  }
1414 
1415  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++){ // stag and cent same for off-dir directions. Both are equivalent to CENTGRID
1416  if((dir==X3UP)||(dir==X3DN)){
1417  dirloopset[bti][dir][gridpos][DIRUSTART1]=-numbnd[1];
1418  dirloopset[bti][dir][gridpos][DIRUSTOP1]=N1-1+numbnd[1];
1419  dirloopset[bti][dir][gridpos][DIRUDIR1]=+1;
1420  dirloopset[bti][dir][gridpos][DIRUSTART2]=-numbnd[2];
1421  dirloopset[bti][dir][gridpos][DIRUSTOP2]=N2-1+numbnd[2];
1422  dirloopset[bti][dir][gridpos][DIRUDIR2]=+1;
1423  }
1424  }
1425 
1426 
1427  }// end if DIRIF
1428  }// end DIRLOOP
1429  }// end bti loop
1430 
1431 
1432 
1433 
1434 
1435 
1436 
1437 
1438 
1439 
1440 
1441 
1442 
1443 
1444 
1445 
1446 
1447 
1449  //
1450  // output those things that were defined
1451  //
1453 
1454  for(bti=0;bti<NUMBOUNDTYPES;bti++) {
1455  for (m = 0; m < COMPDIM*2; m++) {
1456  for(l = 0 ; l < DIRGENNUMVARS ; l++) {
1457  logfprintf( "dirgenset[%d][%d][%d]: %d\n", bti, m, l, dirgenset[bti][m][l]);
1458  }
1459  }
1460  }
1461 
1462  for(bti=0;bti<NUMBOUNDTYPES;bti++) {
1463  for (m = 0; m < COMPDIM*2; m++) {
1464  for(gridpos=0;gridpos<NUMPRIMGRIDPOS;gridpos++) {
1465  for(l = 0 ; l < DIRLOOPNUMVARS ; l++) {
1466  logfprintf( "dirloopset[%d][%d][%d][%d]: %d\n", bti, m, gridpos, l, dirloopset[bti][m][gridpos][l]);
1467  }
1468  }
1469  }
1470  }
1471 
1472 
1473 
1474 
1475 
1477  //
1478  // Setup supermpi method (not working right now)
1479  //
1481 
1482 
1483 #if(SIMULBCCALC!=-1)
1484  // this is definitely not setup for 3D, and never fully worked...still interesting idea.
1485 
1486  if(SIMULBCCALC==2){
1487  if(SIMULBCCALC<=0){ stagei=STAGEM1; stagef=STAGEM1; }
1488  else if(SIMULBCCALC==1) { stagei=STAGE0; stagef=STAGE2;}
1489  else if(SIMULBCCALC==2) { stagei=STAGE0; stagef=STAGE5;}
1490 
1491  if(SIMULBCCALC>=1){
1492  for(stage=stagei;stage<=stagef;stage++){
1493  STAGECONDITION(0,N1-1,0,N2-1,isc,iec,jsc,jec);
1494  logfprintf("CZLOOP: stage=%d : %d %d %d %d\n",stage,isc,iec,jsc,jec);
1495  STAGECONDITION(0,N1,-1,N2,isc,iec,jsc,jec);
1496  logfprintf("F1LOOP: stage=%d : %d %d %d %d\n",stage,isc,iec,jsc,jec);
1497  STAGECONDITION(-1,N1,0,N2,isc,iec,jsc,jec);
1498  logfprintf("F2LOOP: stage=%d : %d %d %d %d\n",stage,isc,iec,jsc,jec);
1499  STAGECONDITION(0,N1,0,N2,isc,iec,jsc,jec);
1500  logfprintf("EMFLOOP: stage=%d : %d %d %d %d\n",stage,isc,iec,jsc,jec);
1501  STAGECONDITION(0,N1,0,N2-1,isc,iec,jsc,jec);
1502  logfprintf("F1CTLOOP: stage=%d : %d %d %d %d\n",stage,isc,iec,jsc,jec);
1503  STAGECONDITION(0,N1-1,0,N2,isc,iec,jsc,jec);
1504  logfprintf("F2CTLOOP: stage=%d : %d %d %d %d\n",stage,isc,iec,jsc,jec);
1505  STAGECONDITION(-1,N1,-1,N2,isc,iec,jsc,jec);
1506  logfprintf("DQLOOP: stage=%d : %d %d %d %d\n",stage,isc,iec,jsc,jec);
1507  STAGECONDITION(-2,N1+1,-2,N2+1,isc,iec,jsc,jec);
1508  logfprintf("PREDQLOOP: stage=%d : %d %d %d %d\n",stage,isc,iec,jsc,jec);
1509  logfprintf("\n");
1510  }
1511  }
1512  }
1513 #endif
1514 
1515 
1516 
1517 
1518 
1519  trifprintf("end: init_placeongrid_griddecomposition\n");
1520 }
1521 
1522 
1523 
1524 
1525 int myexit(int call_code)
1526 {
1527  int i, j, k, l;
1528  int cleanfinish,dofaildump;
1529  FILE *faildump;
1530  char mysys[MAXFILENAMELONG];
1531  char binarytype[MAXFILENAME];
1532  void set_binarytype(char *binarytype);
1533  int inparallel,tid;
1534 
1535 
1536 
1537  trifprintf("proc: %s : Exiting cc: %d nstep: %ld\n", myidtxt, call_code, nstep);
1538 
1539 
1540 #if(USEOPENMP)
1541  if(omp_in_parallel()){
1542  inparallel=1;
1543  tid=omp_get_thread_num();
1544  }
1545  else{
1546  tid=0;
1547  inparallel=0;
1548  }
1549 #else
1550  tid=0;
1551  inparallel=0;
1552 #endif
1553 
1554 
1555  if(tid==0){ // only 1 thread does below
1556 
1557 
1558 #if(MAILWHENDONE && !MPIAVOIDFORK)
1559  if(myid==0){
1560 
1561 
1562  set_binarytype(binarytype);
1563 
1564  sprintf(mysys,"echo \"%s : done with `pwd`, inparallel=%d\" > done%s.txt",EMAILMESSAGE,inparallel,binarytype);
1565  system(mysys);
1566  if(MAILFROMREMOTE){
1567  sprintf(mysys,"scp done.txt %s ; ssh %s \"mail %s < done%s.txt\"",REMOTEHOST,REMOTEHOST,EMAILADDRESS,binarytype);
1568  system(mysys);
1569  }
1570  else{
1571  sprintf(mysys,"mail %s < done%s.txt",EMAILADDRESS,binarytype);
1572  system(mysys);
1573  }
1574  }
1575 #endif
1576 
1577  dofaildump=0;
1578  if (call_code > 0) {
1579  stderrfprintf(
1580  "proc: %s : Failure. Please check failure file: cc: %d\n",
1581  myidtxt, call_code);
1582 
1583  if(call_code<ERRORCODEBELOWCLEANFINISH) cleanfinish = 1;
1584  else cleanfinish=0; // assume this means dump procedure failed, so don't get into infinite failure loop
1585  // should never have non-clean finish, but sometimes do have them in code, but not marked right now
1586  if(cleanfinish) dofaildump=1;
1587  if(!cleanfinish){
1588 #if(USEMPI)
1589  // must abort since no clear to communicate to other cpus now
1590  MPI_Abort(MPI_COMM_GRMHD, 1);
1591 #endif
1592  }
1593  }
1594  else{
1595  dofaildump=0;
1596  cleanfinish=1;
1597  }
1598 
1599 
1600 
1601  if (dofaildump) {
1602  stderrfprintf( "proc: %s : dumping failure dump with callcode=2\n",
1603  myidtxt);
1604 
1605  // assume want previous timestep data, not bad just-computed
1606  // data\n");
1607  // now diag should not fail if last timestep was non-fail type
1609  }
1610 
1611 
1613  //
1614  // first cleanup any prior MPI non-blocking calls by making fake write call
1615  //
1617 #if(USEMPI)
1618  if(cleanfinish && USEROMIO==1 && MPIVERSION==2 ){
1619  fakedump(0);
1620  }
1621 #endif
1622 
1623 
1624 
1626  //
1627  // must close AFTER diag()
1628  //
1630  if (call_code >= 0) {
1631  if (fail_file) fclose(fail_file);
1632  if (log_file) fclose(log_file);
1633  myfclose(&logfull_file,"Can't close logfull_file\n");
1634  }
1635 
1636 
1637 
1638  if(cleanfinish){
1639  stderrfprintf( "Ending Computation on proc: %s, holding for other cpus\n", myidtxt);
1640  }
1641 
1642 
1643  myfprintf(stderr, "Ended Computation on all processors\n");
1644  //final_myexit(); // Don't want to Abort if don't have to
1645  stderrfprintf( "END\n");
1646  fflush(stderr);
1647  exit(0);
1648 
1649 
1650 
1651  }// end if master thread
1652 
1653 
1654 
1655  return (0);
1656 }
1657 
1658 
1659 
1660 #if(PRODUCTION<=1)
1661 
1664 int error_check(int wherefrom)
1665 {
1666  int i, j, k;
1667  int errorsend = 0;
1668  // check if error exists and exit if so
1669 
1670  if (failed > 0) {
1671  dualfprintf(fail_file,
1672  "Detected failure on proc: %d failed: %d nstep: %ld realnstep: %ld steppart=%d :: t: %21.15g wherefrom = %d\n",
1673  myid, failed, nstep, realnstep, steppart, t,wherefrom);
1674  }
1675 
1676  if (numprocs > 1) {
1677  errorsend = failed;
1678 #if(USEMPI)
1679  // dualfprintf(fail_file,"wtf: %d %d\n",errorsend,failed);
1680  // fflush(fail_file);
1681  MPI_Allreduce(&errorsend, &failed, 1, MPI_INT, MPI_MAX,
1682  MPI_COMM_GRMHD);
1683  // dualfprintf(fail_file,"wtf: %d %d\n",errorsend,failed);
1684  // fflush(fail_file);
1685 #endif
1686  }
1687  if (failed > 0) {
1688  dualfprintf(fail_file,
1689  "Result: Detected failure on proc: %d failed: %d nstep: %ld realnstep: %ld steppart=%d :: t: %21.15g\n",
1690  myid, failed, nstep, realnstep, steppart, t);
1691  // control behavior of failure here (i.e. could return(1) and
1692  // continue or something)
1693  // if(failed==1) myexit(1);
1694  // if(failed==2) myexit(1);
1695  // if(failed==3) myexit(1);
1696  myexit(wherefrom);
1697  return (1);
1698  }
1699  return (0);
1700 }
1701 
1702 #endif
1703 
1704 
1705 
1706 
1707 
1709 #if(0)
1710 void init_MPIgroup(void)
1711 {
1712  int *ranks;
1713  int i,j,k,numranks;
1714 
1715 
1716  // allocate things that are truenumprocs in size
1717  ranks=(int*)malloc(sizeof(int)*truenumprocs);
1718  if(ranks==NULL){
1719  stderrfprintf("Problem allocating memory for ranks with truenumprocs=%d\n",truenumprocs); fflush(stderr);
1720  myexit(915213756);
1721  }
1722 
1723 
1724  MPI_Comm_group(MPI_COMM_WORLD, &MPI_GROUP_WORLD);
1725 
1726  stderrfprintf("begin: proc: %s init_MPIgroup\n",myidtxt); fflush(stderr);
1727 
1728  // x1-inner
1729  j=0;
1730  for(i=0;i<numprocs;i++){
1731  if(i%ncpux1==0){
1732  ranks[j]=MPIid[i];
1733  j++;
1734  }
1735  }
1736  numranks=j;
1737  if(numranks!=ncpux2*ncpux3){
1738  stderrfprintf("problem with inner x1-group: numranks: %d ncpux2: %d ncpux3: %d ncpux2*ncpux3: %d\n",numranks,ncpux2,ncpux3,ncpux2*ncpux3);
1739  myexit(97834683);
1740  }
1741  // now ranks holds inner x1 boundary of cpus, and numranks holds number of such ranks
1742 
1743  // now create group and communicator
1744  MPI_Group_incl(MPI_GROUP_WORLD, numranks, ranks, &grprem[2]);
1745  MPI_Comm_create(MPI_COMM_WORLD, grprem[2], &combound[2]);
1746 
1747  // x1 - outer
1748  j=0;
1749  for(i=0;i<numprocs;i++){
1750  if(i%ncpux1==ncpux1-1){
1751  ranks[j]=MPIid[i];
1752  j++;
1753  }
1754  }
1755  numranks=j;
1756  if(numranks!=ncpux2*ncpux3){
1757  stderrfprintf("problem with outer x1-group: numranks: %d ncpux2*ncpux3: %d\n",numranks,ncpux2*ncpux3);
1758  myexit(92787621);
1759  }
1760  // now create group and communicator
1761  MPI_Group_incl(MPI_GROUP_WORLD, numranks, ranks, &grprem[0]);
1762  MPI_Comm_create(MPI_COMM_WORLD, grprem[0], &combound[0]);
1763 
1764  // x2 - inner
1765  j=0;
1766  for(i=0;i<numprocs;i++){
1767  if((i%(ncpux1*ncpux2))/ncpux1==0){
1768  ranks[j]=MPIid[i];
1769  j++;
1770  }
1771  }
1772  numranks=j;
1773  if(numranks!=ncpux1*ncpux3){
1774  stderrfprintf("problem with inner x2-group: numranks: %d ncpux1*ncpux3: %d\n",numranks,ncpux1*ncpux3);
1775  myexit(83649545);
1776  }
1777  // now create group and communicator
1778  MPI_Group_incl(MPI_GROUP_WORLD, numranks, ranks, &grprem[1]);
1779  MPI_Comm_create(MPI_COMM_WORLD, grprem[1], &combound[1]);
1780 
1781  // x2 - outer
1782  j=0;
1783  for(i=0;i<numprocs;i++){
1784  if((i%(ncpux1*ncpux2))/ncpux1==ncpux2-1){
1785  ranks[j]=MPIid[i];
1786  j++;
1787  }
1788  }
1789  numranks=j;
1790  if(numranks!=ncpux1*ncpux3){
1791  stderrfprintf("problem with outer x2-group: numranks: %d ncpux1*ncpux3: %d\n",numranks,ncpux1*ncpux3);
1792  myexit(28364888);
1793  }
1794 
1795  // now create group and communicator
1796  MPI_Group_incl(MPI_GROUP_WORLD, numranks, ranks, &grprem[3]);
1797  MPI_Comm_create(MPI_COMM_WORLD, grprem[3], &combound[3]);
1798 
1799 
1800 
1801  // x3 - inner
1802  j=0;
1803  for(i=0;i<numprocs;i++){
1804  if(i/(ncpux1*ncpux2)==0){
1805  ranks[j]=MPIid[i];
1806  j++;
1807  }
1808  }
1809  numranks=j;
1810  if(numranks!=ncpux1*ncpux2){
1811  stderrfprintf("problem with inner x3-group: numranks: %d ncpux1*ncpux2: %d\n",numranks,ncpux1*ncpux2);
1812  myexit(18758365);
1813  }
1814  // now create group and communicator
1815  MPI_Group_incl(MPI_GROUP_WORLD, numranks, ranks, &grprem[5]);
1816  MPI_Comm_create(MPI_COMM_WORLD, grprem[5], &combound[5]);
1817 
1818  // x3 - outer
1819  j=0;
1820  for(i=0;i<numprocs;i++){
1821  if(i/(ncpux1*ncpux2)==ncpux3-1){
1822  ranks[j]=MPIid[i];
1823  j++;
1824  }
1825  }
1826  numranks=j;
1827  if(numranks!=ncpux1*ncpux2){
1828  stderrfprintf("problem with outer x3-group: numranks: %d ncpux1*ncpux2: %d\n",numranks,ncpux1*ncpux2);
1829  myexit(29776546);
1830  }
1831 
1832  // now create group and communicator
1833  MPI_Group_incl(MPI_GROUP_WORLD, numranks, ranks, &grprem[4]);
1834  MPI_Comm_create(MPI_COMM_WORLD, grprem[4], &combound[4]);
1835 
1836 
1837  free(ranks);
1838 
1839  // 0: right 1: up 2: left 3: down 4: out 5: in(as in bound.c)
1840 
1841  // when using these communicators, must make sure the call to a communication using it isn't done by a non-member cpu!
1842  // (above: stupid, I know, should just skip if non-member cpu tries a function)
1843  stderrfprintf("end: proc: %s init_MPIgroup\n",myidtxt); fflush(stderr);
1844 }
1845 #endif
1846 
1847 
1848 
1849 
1850 
1851 
1852 
1853 // Environment variables (in bash, set like: export <variable>=<value> )
1854 //
1855 // OMP_SCHEDULE : Which schedule method is set (e.g. export OMP_SCHEDULE="guided, 4")
1856 //
1857 // OMP_NUM_THREADS : Sets the maximum number of threads to use during execution (e.g. export OMP_NUM_THREADS=8)
1858 //
1859 // OMP_DYNAMIC : Enables or disables dynamic adjustment of the number of threads available for execution of parallel regions. Valid values are TRUE or FALSE. (e.g. export OMP_DYNAMIC=TRUE )
1860 //
1861 // OMP_NESTED : Enables or disables nested parallelism. Valid values are TRUE or FALSE. (e.g. export OMP_NESTED=TRUE )
1862 // Implementation notes: * Your implementation may or may not support nested parallelism and/or dynamic threads. If nested parallelism is supported, it is often only nominal, in that a nested parallel region may only have one thread. * Consult your implementation's documentation for details - or experiment and find out for yourself if you can't find it in the documentation.
1863 //
1864 // OMP_STACKSIZE New with OpenMP 3.0. Controls the size of the stack for created (non-Master) threads. Examples:
1865 // setenv OMP_STACKSIZE 2000500B
1866 // setenv OMP_STACKSIZE "3000 k "
1867 // setenv OMP_STACKSIZE 10M
1868 // setenv OMP_STACKSIZE " 10 M "
1869 // setenv OMP_STACKSIZE "20 m "
1870 // setenv OMP_STACKSIZE " 1G"
1871 // setenv OMP_STACKSIZE 20000
1872 //
1873 // Default is about 4-8MB on modern systems.
1874 //
1875 // OMP_WAIT_POLICY New with OpenMP 3.0. Provides a hint to an OpenMP implementation about the desired behavior of waiting threads. A compliant OpenMP implementation may or may not abide by the setting of the environment variable. Valid values are ACTIVE and PASSIVE. ACTIVE specifies that waiting threads should mostly be active, i.e., consume processor cycles, while waiting. PASSIVE specifies that waiting threads should mostly be passive, i.e., not consume processor cycles, while waiting. The details of the ACTIVE and PASSIVE behaviors are implementation defined. Examples:
1876 // setenv OMP_WAIT_POLICY ACTIVE
1877 // setenv OMP_WAIT_POLICY active
1878 // setenv OMP_WAIT_POLICY PASSIVE
1879 // setenv OMP_WAIT_POLICY passive
1880 //
1881 //OMP_MAX_ACTIVE_LEVELS New with OpenMP 3.0. Controls the maximum number of nested active parallel regions. The value of this environment variable must be a non-negative integer. The behavior of the program is implementation defined if the requested value of OMP_MAX_ACTIVE_LEVELS is greater than the maximum number of nested active parallel levels an implementation can support, or if the value is not a non-negative integer. Example:
1882 // setenv OMP_MAX_ACTIVE_LEVELS 2
1883 //
1884 // OMP_THREAD_LIMIT New with OpenMP 3.0. Sets the number of OpenMP threads to use for the whole OpenMP program. The value of this environment variable must be a positive integer. The behavior of the program is implementation defined if the requested value of OMP_THREAD_LIMIT is greater than the number of threads an implementation can support, or if the value is not a positive integer. Example:
1885 // setenv OMP_THREAD_LIMIT 8
1886 
1887 
1888 // To enable OpenMP:
1889 // Compiler Flag
1891 // IBM -qsmp=omp
1892 // Intel -openmp
1893 // PathScale -mp
1894 // PGI -mp
1895 // GNU -fopenmp
1896 
1897 
1898 
1899 
1902 {
1903  int tid;
1904 
1905 #if(USEOPENMP) // need inside cpp conditional since using OpenMP functions, not just directives.
1906 
1907 #pragma omp parallel private(tid)
1908  {
1909  // Obtain and print thread id
1910  fprintf(out,"proc: %d : Thread = %d activated\n", myid,omp_get_thread_num());
1911 
1912  tid = omp_get_thread_num();
1913  if (tid == 0){
1914  // Only master thread does this
1915  numopenmpthreadsorig = omp_get_num_threads();
1916 
1917 #if(OPENMPVERSION==3)
1918  fprintf(out,"OpenMP 3.0 activated: Maximum number of threads available=%d\n",omp_get_thread_limit());
1919 #endif
1920 
1921  fprintf(out,"Master MPI proc=%d reports: Number of threads originaly=%d out of maximum=%d out of procs=%d\n", myid, numopenmpthreadsorig,omp_get_max_threads(),omp_get_num_procs());
1922  if(omp_get_dynamic()){
1923  fprintf(out,"Dynamic thread adjustment is enabled\n");
1924  }
1925  else{
1926  fprintf(out,"Dynamic thread adjustment is disabled\n");
1927  }
1928 
1929  // Use omp_set_nested() to enable if desired
1930 
1931  if(omp_get_nested()){
1932  fprintf(out,"Nested parallelism is enabled, so allowed\n");
1933  }
1934  else{
1935  fprintf(out,"Nested parallelism is disabled, so is NOT allowed\n");
1936  }
1937 
1938  // Note that when numopenmpthreads is requested in the next parallel region, all threads will have the value since the numopenmpthreads value is shared.
1939  }
1940 
1941 
1942  }// end parallel region
1943 
1944 #endif
1945 
1946 }
1947 
1948 
1949 
1950 
1951