HARM
harm and utilities
 All Data Structures Files Functions Variables Typedefs Macros Pages
mpi_fileio.c
Go to the documentation of this file.
1 
11 #include "decs.h"
12 
13 
14 
15 
16 
18 void mpiio_init(int bintxt, int sorted, FILE ** fpptr, long headerbytesize, int which, char *filename, int numcolumns,
19  MPI_Datatype datatype, void **jonioptr, void **writebufptr)
20 {
21 
22 
23  logsfprintf("\nmpiio_init begin\n");
24 
25 
27  //
28  // this check covers combine and seperate
29  //
31  if(!sorted){
32  if(bintxt==BINARYOUTPUT){
33  dualfprintf(fail_file,"No such thing as binary unsorted output\n");
34  myexit(1);
35  }
36  }
37 
38 
40  //
41  // clean up prior call (internally handle mpicombinetype)
42  //
44  mpiio_final(bintxt, sorted, fpptr, headerbytesize, which, filename, numcolumns, datatype, jonioptr, writebufptr);
45 
46 
48  //
49  // Initialize new call
50  //
52 
53  if(mpicombinetype==MPICOMBINEROMIO){
54  mpiioromio_init_combine(INITROMIO, which, headerbytesize, filename,numcolumns, datatype,writebufptr,(void*)0x0);
55  }
56  else{
57  mpiios_init(bintxt, sorted, fpptr, which, headerbytesize, filename, numcolumns, datatype,
58  jonioptr, writebufptr);
59  }
60 
61  logsfprintf("mpiio_init end\n");
62 
63 }
64 
65 
66 
67 
70 void mpiio_final(int bintxt, int sorted, FILE ** fpptr, long headerbytesize, int which, char *filename, int numcolumns,
71  MPI_Datatype datatype, void **jonioptr, void **writebufptr)
72 {
73  static int priorinit=0;
74 
75 
76 
77  if(priorinit){
78 
79  logsfprintf("\nmpiio_final begin\n");
80 
81  // "true" value same
82  // doesn't use jonioptr or bintxt or sorted or fpptr
83  // writebuf not set yet for combine or seperate, have to allocate at address writebufptr first.
84  // then assume previously wrote file that now needs to be fully ended before one can write another
85  mpiioromio_init_combine(WRITEENDROMIO, which, headerbytesize, filename,numcolumns, datatype,writebufptr,*writebufptr);
86 
87  logsfprintf("mpiio_final end\n");
88 
89  }
90 
91 
93  //
94  // priorinit indicates to *next call* of this function that mpiio_final(priorinit) should end/close/cleanup previously written file if was using ROMIO and writing in this call
95  // otherwise assume file writing call was fully completed already before
96  // below assumes mpicombinetype==truempicombinetype when doing ROMIO
97  //
99 
100  if(mpicombinetype==MPICOMBINEROMIO && which==WRITEFILE){
101  priorinit=1;
102  }
103  else priorinit=0;
104 
105  // if numcolumns==-1, then separate finish of ROMIO call
106  if(numcolumns==-1) priorinit=0;
107 
108 }
109 
110 
111 
112 
113 void mpiio_combine(int bintxt, int sorted,
114  int numcolumns, MPI_Datatype datatype,
115  FILE ** fpptr, void *jonio, void *writebuf)
116 {
117 #if(USEMPI)
118 
119  logsfprintf("mpiio start combine\n");
120 
121  if(USEROMIO){
122  // doesn't use jonioptr or bintxt or sorted or fpptr
123  // address not used, just value of writebuf
124  // headerbytesize no longer needed
125  mpiioromio_init_combine(WRITECLOSEROMIO, WRITEFILE, 0, "", numcolumns, datatype,&writebuf,writebuf);
126  }
127  else{
128  if(truempicombinetype==MPICOMBINESIMPLE){
129 
130  if (sorted) {
131  mpiios_combine(bintxt, datatype, numcolumns,fpptr, jonio, writebuf);
132  }
133  else{
134  mpiiotu_combine(datatype, numcolumns, fpptr, writebuf);
135  }
136  }
137  else if(truempicombinetype==MPICOMBINEMINMEM){
138  mpiiomin_final(numcolumns,fpptr, jonio, writebuf);
139  }
140  }
141 #endif
142  logsfprintf("mpiio end combine\n");
143 
144 }
145 
146 #define DEBUGMINMEM 0
147 
148 
150 void mpiio_minmem(int readwrite, int whichdump, int i, int j, int k, int bintxt, int sorted,
151  int numcolumns, MPI_Datatype datatype,
152  FILE ** fp, void *jonio, void *writebuf)
153 {
154  static struct blink * blinkptr;
155  static struct blink * cpulinkptr;
156  static long long int datasofar0,datasofarc0;
157  static int done0=0;
158  static long long int datagot;
159  long long int bfi;
160  long long int sii,uii;// sorted and unsorted index sometimes
161 #if(USEMPI)
162  MPI_Request request;
163  MPI_Request request0;
164 #endif
165  long long int joniooffset;
166  void *joniosubmit;
167 
168  unsigned char *jonio1;
169  float *jonio4;
170  double *jonio8;
171  long double *jonio16;
172  int *jonio4i;
173  long long int *jonio8i;
174 
175  long long int mapjoniosorted,mapjoniounsorted;
176  long long int datatoget0,datatogetc0,datasent0;
177  int doset;
178  int dofull;
179  int dolocal;
180  int doing0;
181  long long int gi,gj,gk;//global i,j,k starting from first cpu reference and adding the sorted index
182  long long int lastnum;
183  static int masterdone;
184  static int thisslavedone;
185 
186  int numfiles;
187  int sizeofdatatype;
188  long long int mypos;
189  unsigned short short4char;
190 
191 
192 #if(USEMPI)
193 
194  sizeofdatatype=getsizeofdatatype(datatype);
195 
196 #if(DEBUGMINMEM)
197  dualfprintf(fail_file,"got here 0: i=%d j=%d k=%d\n",i,j,k); fflush(fail_file);
198 #endif
199 
200  if(sorted==UNSORTED){
201  dualfprintf(fail_file,"Not setup to do unsorted with this method\n");
202  myexit(10001);
203  }
204 
205 
207  //
208  // very quickly determine whether at required point to begin read or write of data, or still busy with buffer.
209  //
211  if((i==0)&&(j==0)&&(k==0)){
212  // then first time here for this cpu for this dump
213  blinkptr=blinkptr0[whichdump];
214  datagot=0;
215  if(myid==0) masterdone=0;
216  thisslavedone=0;
217  }
218  if(masterdone&&(myid==0)) return;
219  if(thisslavedone&&(myid!=0)) return;
220 
221 
222  if(readwrite==WRITEFILE){
223  // i+1 since called inside loop before i++, but really has done i++ things
224  bfi=((long long int)k*(long long int)N1*(long long int)N2+(long long int)j*(long long int)N1+(long long int)(i+1))*(long long int)numcolumns; // recall that we are always at col=0 since we always complete a column
225  }
226  else if(readwrite==READFILE){
227  bfi=((long long int)k*(long long int)N1*(long long int)N2+(long long int)j*(long long int)N1+(long long int)i)*(long long int)numcolumns;
228  }
229 
230 
231  if(blinkptr==NULL) dolocal=0; // end of local list
232  else{
233  // see if completed a single node in the list yet
234  if(readwrite==WRITEFILE){
235  if(bfi==((long long int)datagot+(long long int)blinkptr->num)) dolocal=1; // at a completion point, need to send to cpu=0
236  else dolocal=0; // not yet completed
237  }
238  else if(readwrite==READFILE){
239  if(bfi==(long long int)datagot) dolocal=1; // at a completion point, need to recv from cpu=0
240  else dolocal=0; // not yet completed
241  }
242  }
243 
245  //
246  // ANY CPU Send/Recv a buffer to cpu=0
247  //
249 
250 
251 
252  if(dolocal){
253 #if(DEBUGMINMEM)
254  dualfprintf(fail_file,"got here 0.5: %lld %lld %lld %d\n",bfi,datagot+blinkptr->num,blinkptr->num,dolocal); fflush(fail_file);
255 #endif
256  // We are ready to read or write from/to cpu=0
257 #if(DEBUGMINMEM)
258  dualfprintf(fail_file,"got here 0.6\n"); fflush(fail_file);
259 #endif
260 
261 
263  //
264  // WRITEFILE, so other CPU sends to cpu=0
265  //
267 
268  if(readwrite==WRITEFILE){
269  // means we have put what we want into writebuf, now send to cpu=0
270 #if(DEBUGMINMEM)
271  dualfprintf(fail_file,"got here 0.65 : %lld %lld %d %d\n",writebuf,blinkptr->num,MPIid[0],myid); fflush(fail_file);
272 #endif
273  // must keep myid>0 cpus stalled until cpu=0 needs their data
274  // that is, Wait below continues if data copied out of buffer, but we want pseudo-blocking call here
275  if(myid>0) MPI_Issend(writebuf,blinkptr->num,datatype,MPIid[0],myid, MPI_COMM_GRMHD,&request);
276  // can't stall cpu=0 since only below has recv, but ok since cpu=0 stuck below until cpu=0 data needed again
277  else MPI_Isend(writebuf,blinkptr->num,datatype,MPIid[0],myid, MPI_COMM_GRMHD,&request);
278 #if(DEBUGMINMEM)
279  dualfprintf(fail_file,"got here 0.66 : %lld\n",writebuf); fflush(fail_file);
280 #endif
281  // adjust offset to keep standard writebuf BUFFERMAP format with just an offset (offset keeps true memory area always as written part)
282  bufferoffset -= (long long int)blinkptr->num;// or =-(long long int)datagot+(long long int)blinkptr->num
283 
284 
285  }
286  else if(readwrite==READFILE){
287 
289  //
290  // READFILE, so other CPU recvs from cpu=0
291  //
293 
294 
295  // means we need to fill writebuf with what cpu=0 will send us
296  // recv's will Wait below until data is coming.
297 #if(DEBUGMINMEM)
298  dualfprintf(fail_file,"got here 0.65 : %lld\n",writebuf); fflush(fail_file);
299 #endif
300  MPI_Irecv(writebuf,blinkptr->num,datatype,MPIid[0],myid, MPI_COMM_GRMHD,&request);
301 #if(DEBUGMINMEM)
302  dualfprintf(fail_file,"got here 0.66 : %lld %lld\n",writebuf,blinkptr); fflush(fail_file);
303 #endif
304  bufferoffset=(long long int)(-datagot);
305  }// end if READFILE
306 
307 
308 
309  datagot += (long long int)(blinkptr->num);
310  lastnum = (long long int)(blinkptr->num);
311  // now iterate to next node in list
312  blinkptr=(blinkptr->np);
313 #if(DEBUGMINMEM)
314  dualfprintf(fail_file,"got here 0.67 : %lld\n",blinkptr); fflush(fail_file);
315 #endif
316  if(blinkptr==NULL){
317  thisslavedone=1;
318  // then done with list, check to see if everything adds up
319  if(
320  ((readwrite==WRITEFILE)&&(-bufferoffset!=(long long int)N1*(long long int)N2*(long long int)N3*(long long int)numcolumns))
321  ||((readwrite==READFILE)&&(-bufferoffset!=(long long int)N1*(long long int)N2*(long long int)N3*(long long int)numcolumns-(long long int)lastnum))
322  )
323  {
324  dualfprintf(fail_file,"local total doesn't equal expected value\n got=%d demand=%d\n",-bufferoffset,(long long int)N1*(long long int)N2*(long long int)N3*(long long int)numcolumns);
325  myexit(10002);
326  }
327  }
328 #if(DEBUGMINMEM)
329  dualfprintf(fail_file,"got here 0.7\n"); fflush(fail_file);
330 #endif
331 
332  }
333 
334 #if(DEBUGMINMEM)
335  dualfprintf(fail_file,"got here .8\n"); fflush(fail_file);
336 #endif
337 
338 
339 
340 
341 
342 
343  // for cpu=0, don't wait on local send/recv since cpu=0 needs to setup it's receives first (since cpu=0 sends to itself)
344  // only do this if cpu=0 is just done with local data send/recv(i.e. dolocal==1) or cpu=0 is done (done0==1)
345  // no way to decouple cpu=0 local from master, so master waits for local
346  if(myid==0){
347  // first time hit, get initial node in list and reset counters
348  if((i==0)&&(j==0)&&(k==0)){
349  cpulinkptr=cpulinkptr0[whichdump];
350  datasofar0=0;
351  datasofarc0=0;
352  }
353  }
354 
355 
356 
357 
358  // check to see if done with cpu=0 data (we use cpu=0 to continue grabbing data as needed)
359  if(readwrite==WRITEFILE){
360  // wait until all data is grabbed from cpu=0 to send to disk before holding up here.
361  if((myid==0)&&(bfi==(long long int)N1*(long long int)N2*(long long int)N3*(long long int)numcolumns)){
362  done0=1;
363  }
364  else done0=0;
365  }
366  else if(readwrite==READFILE){
367  // wait until last read done. Still can't use writebuf since will do last local cpu=0 assignment using writebuf after done with distribution of rest of data using cpu=0 to other cpus
368  if((myid==0)&&(thisslavedone)){
369  done0=1; // then will be done after final read and grab for cpu=0 (then cpu=0's writebuf will be written to after all cpus done)
370  }
371  else done0=0;
372  }
373 
374 
375 
376 
377 
378 
379 
381  //
382  // CPU==0 stuff
383  //
385 
386 
387  if((myid==0)&&((dolocal==1)||(done0==1))){
388  // We are ready to read or write from/to other cpus
389 
390  if (datatype == MPI_UNSIGNED_CHAR) jonio1 = (unsigned char *) jonio;
391  else if (datatype == MPI_FLOAT) jonio4 = (float *) jonio;
392  else if (datatype == MPI_DOUBLE) jonio8 = (double *) jonio;
393  else if (datatype == MPI_LONG_DOUBLE) jonio16 = (long double *) jonio;
394  else if (datatype == MPI_INT) jonio4i = (int *) jonio;
395  else if (datatype == MPI_LONG_LONG_INT) jonio8i = (long long int *) jonio;
396 
397 
398 
399  // we need to loop over list until hit a marked end-cpu. This will give us a chunk of data to sort for direct output
400  // this is the offset to the sorted part of jonio
401  joniooffset=(long long int)joniosize/((long long int)2);
402 
403 #if(DEBUGMINMEM)
404  dualfprintf(fail_file,"got here 2: joniooffset=%lld\n",joniooffset); fflush(fail_file);
405 #endif
406 
407 
409  //
410  // WRITEFILE
411  //
413 
414 
415  if(readwrite==WRITEFILE){
416  dofull=1;
417  while(dofull){
418 
419 
420 #if(DEBUGMINMEM)
421  dualfprintf(fail_file,"got here 3\n"); fflush(fail_file);
422 #endif
423 
424  // determine amount of data to get from cpu group
425 
426  // limit the amount of data once doing last grab
427  if((long long int)datasofar0+(long long int)joniosize/((long long int)2)>(long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns) datatogetc0=-(long long int)datasofar0+(long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns;
428  else datatogetc0=joniosize/((long long int)2);
429  // new amount of data (that will be read total)
430  datasofarc0 += (long long int)datatogetc0;
431 
432 
433  doset=1;
434  doing0=0;
435  // every receieved dataset is kept in first half of jonio from start of jonio, sorted, then next data set received.
436  datatoget0=0;
437  while(doset){
438 #if(DEBUGMINMEM)
439  dualfprintf(fail_file,"got here 3.4 : cpu=%d: datasofar0=%lld datasofarc0=%lld\n",cpulinkptr->cpu,datasofar0,datasofarc0); fflush(fail_file);
440 #endif
441  datatoget0 += (long long int)(cpulinkptr->num);
442  if(cpulinkptr->cpu==0) doing0=1;
443 #if(DEBUGMINMEM)
444  dualfprintf(fail_file,"got here 3.5: %lld %d %d\n",jonio,cpulinkptr->num,cpulinkptr->cpu); fflush(fail_file);
445  dualfprintf(fail_file,"got here 3.51: %lld %lld %lld %lld\n",datatogetc0,totalsize[1],totalsize[2],totalsize[3]); fflush(fail_file);
446 #endif
447  MPI_Irecv(jonio,cpulinkptr->num,datatype,MPIid[cpulinkptr->cpu],cpulinkptr->cpu,MPI_COMM_GRMHD,&request0);
448 #if(DEBUGMINMEM)
449  dualfprintf(fail_file,"got here 3.515\n"); fflush(fail_file);
450 #endif
451  // have myid wait before continuing to make sure receive is complete
452  MPI_Wait(&request0,&mpichstatus);
453  // easiest algorithm/mapping is done using loop over full sorted size, reduced per cpu by if statement and checked
454  // for(sii=0;sii<cpulinkptr->num;sii++){
455 
456 #if(DEBUGMINMEM)
457  dualfprintf(fail_file,"got here 3.52: datatogetc0=%lld\n",datatogetc0); fflush(fail_file);
458 #endif
459 
460 #if(DEBUGMINMEM)
461  dualfprintf(fail_file,"got here 3.54: %d %d %d\n",cpulinkptr->ri,cpulinkptr->rj,cpulinkptr->rk); fflush(fail_file);
462 #endif
463 
464 
465  // repeat this loop until really get all of datatogetc0 from (multiple) CPUs
466  // The if(gi,gj,gk) below constrains the filling of jonio so that fills in global array with required data into correct array positions
467  uii=0;
468  for(sii=0;sii<datatogetc0;sii++){
469  // uii: iteration count for accessing memory from received data
470 
471  // sii: iteration count for accessing (with joniooffset) where to store some of received data
472 
473  // ri : starting global (over all CPUs) i-location
474  // rj : "" for j-location
475  // rk : "" for k-location
476 
477  // sii : sorted index that iterates to include column data
478 
479  // mypos: number of grid positions (not including columns) iterated beyond starting (ri,rj,rk) position
480 
481  // gi : global (over all CPUs) i-location
482  // gj : "" for j-location
483  // gk : "" for k-location
484 
485  mypos=(long long int)(sii/((long long int)numcolumns)) + (long long int)(cpulinkptr->ri) + (long long int)(cpulinkptr->rj)*(long long int)totalsize[1] + (long long int)(cpulinkptr->rk)*(long long int)totalsize[1]*(long long int)totalsize[2];
486 
487  gi=(long long int)mypos%((long long int)totalsize[1]);
488  gj=(long long int)((mypos%((long long int)totalsize[1]*(long long int)totalsize[2]))/((long long int)totalsize[1]));
489  gk=(long long int)(mypos/((long long int)totalsize[1]*(long long int)totalsize[2]));
490 
491 #if(DEBUGMINMEM)
492  dualfprintf(fail_file,"got here 3.55: sii=%lld mypos=%lld gi=%lld gj=%lld gk=%lld\n",sii, mypos, gi,gj,gk); fflush(fail_file);
493 #endif
494 
495  if(
496  (gi>=startpos0[1][cpulinkptr->cpu])&&
497  (gi<= endpos0[1][cpulinkptr->cpu])&&
498  (gj>=startpos0[2][cpulinkptr->cpu])&&
499  (gj<= endpos0[2][cpulinkptr->cpu])&&
500  (gk>=startpos0[3][cpulinkptr->cpu])&&
501  (gk<= endpos0[3][cpulinkptr->cpu])
502  ){
503 
504 #if(DEBUGMINMEM)
505  dualfprintf(fail_file,"got here 3.56: did assign: sii=%lld joniooffset=%lld uii=%lld\n",sii,joniooffset,uii); fflush(fail_file);
506 #endif
507 
508  if (datatype == MPI_UNSIGNED_CHAR) jonio1[sii+joniooffset]=jonio1[uii++];
509  else if (datatype == MPI_FLOAT) jonio4[sii+joniooffset]=jonio4[uii++];
510  else if (datatype == MPI_DOUBLE) jonio8[sii+joniooffset]=jonio8[uii++];
511  else if (datatype == MPI_LONG_DOUBLE) jonio16[sii+joniooffset]=jonio16[uii++];
512  else if (datatype == MPI_INT) jonio4i[sii+joniooffset]=jonio4i[uii++];
513  else if (datatype == MPI_LONG_LONG_INT) jonio8i[sii+joniooffset]=jonio8i[uii++];
514  }
515  }// end over sii
516 
517 
518 #if(DEBUGMINMEM)
519  dualfprintf(fail_file,"got here 3.6\n"); fflush(fail_file);
520 #endif
521 
523  //
524  // check!
525  // see if local data total is as expected
526  //
528  if(uii!=(long long int)(cpulinkptr->num)){
529  dualfprintf(fail_file,"uii and num for this cpu not same, algorithm failure: uii=%d num=%d datatogetc0=%d\n",uii,cpulinkptr->num,datatogetc0);
530  myexit(10003);
531  }
532  if(cpulinkptr->end) doset=0;
533  cpulinkptr=cpulinkptr->np;
534 
535 
536  }// end while(doset)
537 
538 
539 
540  // see if total data is as expected
541  datasofar0 += (long long int)datatoget0; // diagnostic
542  if((long long int)datasofar0 != (long long int)datasofarc0){
543  dualfprintf(fail_file,"cumulative data received via MPI and expected data is different: got=%d expected=%d\n",datasofar0,datasofarc0);
544  myexit(10004);
545  }
546 
547 
548 
549  // now take sorted part and write it to disk
550  // now write out collected data using CPU=0
551 
552 
554  //
555  // write data to file
556  //
558  if (datatype == MPI_UNSIGNED_CHAR) joniosubmit=(void*) (jonio1+joniooffset);
559  else if (datatype == MPI_FLOAT) joniosubmit=(void*) (jonio4+joniooffset);
560  else if (datatype == MPI_DOUBLE) joniosubmit=(void*) (jonio8+joniooffset);
561  else if (datatype == MPI_LONG_DOUBLE) joniosubmit=(void*) (jonio16+joniooffset);
562  else if (datatype == MPI_INT) joniosubmit=(void*) (jonio4i+joniooffset);
563  else if (datatype == MPI_LONG_LONG_INT) joniosubmit=(void*) (jonio8i+joniooffset);
564 
565  if(docolsplit){
566  numfiles=numcolumns;
567  }
568  else numfiles=1;
569 
570  if(bintxt==BINARYOUTPUT){
571  if(numfiles==1) fwrite(joniosubmit, sizeofdatatype,datatoget0, *fp);
572  else{
573  for(sii=0;sii<datatoget0;sii++){
574  if (datatype == MPI_UNSIGNED_CHAR) fwrite(&jonio1[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
575  else if (datatype == MPI_FLOAT) fwrite(&jonio4[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
576  else if (datatype == MPI_DOUBLE) fwrite(&jonio8[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
577  else if (datatype == MPI_LONG_DOUBLE) fwrite(&jonio16[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
578  else if (datatype == MPI_INT) fwrite(&jonio4i[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
579  else if (datatype == MPI_LONG_LONG_INT) fwrite(&jonio8i[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
580  }
581  }
582  }
583  else if(bintxt==TEXTOUTPUT){ // properly ordered, so just dump it
584  for(sii=0;sii<datatoget0;sii++){
585  if (datatype == MPI_UNSIGNED_CHAR) fprintf(fp[sii%numfiles],"%c",jonio1[sii+joniooffset]);
586  else if (datatype == MPI_FLOAT) fprintf(fp[sii%numfiles],"%15.7g",jonio4[sii+joniooffset]);
587  else if (datatype == MPI_DOUBLE) fprintf(fp[sii%numfiles],"%21.15g",jonio8[sii+joniooffset]);
588  else if (datatype == MPI_LONG_DOUBLE) fprintf(fp[sii%numfiles],"%31.25Lg",jonio16[sii+joniooffset]);
589  else if (datatype == MPI_INT) fprintf(fp[sii%numfiles],"%d",jonio4i[sii+joniooffset]);
590  else if (datatype == MPI_LONG_LONG_INT) fprintf(fp[sii%numfiles],"%lld",jonio8i[sii+joniooffset]);
591  if(numfiles==1){
592  if((sii+1)%numcolumns) fprintf(fp[sii%numfiles]," ");
593  else fprintf(fp[sii%numfiles],"\n");
594  }
595  else fprintf(fp[sii%numfiles],"\n");
596  }
597  }
598  if((done0==0)&&(doing0==1)) dofull=0; // cpu==0 still needs to deal with it's own data
599  // if wasn't locally doing cpu=0 by master, then can't exit yet, continue till cpu=0 local data needed by master
600  if(cpulinkptr==NULL){
601  dofull=0; // end of list
602  masterdone=1;
603  if(datasofar0 != (long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns){
604  dualfprintf(fail_file,"write: total data written not equal to expected amount: wrote=%lld expected=%lld\n",datasofar0,(long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns);
605  myexit(10005);
606  }
607  }
608  // otherwise continue
609  }
610 
611 
612 
613  }
614  else if(readwrite==READFILE){
615 
616 
618  //
619  // READFILE
620  //
622 
623 
624  dofull=1;
625  while(dofull){
626 #if(DEBUGMINMEM)
627  dualfprintf(fail_file,"got here 4\n"); fflush(fail_file);
628 #endif
629  // we read data into 2nd half of jonio (sorted part), then desort for one cpu, then continue for next cpu
630 
632  // determine amount of data to get from file
633 
634 
635  // limit the amount of data once doing last grab
636  if(datasofar0+(long long int)joniosize/((long long int)2) > (long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns) datatogetc0 = -(long long int)datasofar0 + (long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns;
637  else datatogetc0=joniosize/2;
638  // new amount of data (that will be read total)
639  datasofarc0+=datatogetc0;
640 
641 #if(DEBUGMINMEM)
642  dualfprintf(fail_file,"got here1 : %lld %lld\n",datatogetc0,datasofarc0); fflush(fail_file);
643 #endif
644 
646  // get data from file
647  if (datatype == MPI_UNSIGNED_CHAR) joniosubmit=(void*) (jonio1+joniooffset);
648  else if (datatype == MPI_FLOAT) joniosubmit=(void*) (jonio4+joniooffset);
649  else if (datatype == MPI_DOUBLE) joniosubmit=(void*) (jonio8+joniooffset);
650  else if (datatype == MPI_LONG_DOUBLE) joniosubmit=(void*) (jonio16+joniooffset);
651  else if (datatype == MPI_INT) joniosubmit=(void*) (jonio4i+joniooffset);
652  else if (datatype == MPI_LONG_LONG_INT) joniosubmit=(void*) (jonio8i+joniooffset);
653 
654 
655  if(docolsplit){
656  numfiles=numcolumns;
657  }
658  else numfiles=1;
659 
660  if(bintxt==BINARYOUTPUT){
661  // first let cpu=0 read data
662  if(numfiles==1) fread(joniosubmit, sizeofdatatype,datatogetc0,*fp);
663  else{
664  for(sii=0;sii<datatoget0;sii++){
665  if (datatype == MPI_UNSIGNED_CHAR) fread(&jonio1[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
666  else if (datatype == MPI_FLOAT) fread(&jonio4[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
667  else if (datatype == MPI_DOUBLE) fread(&jonio8[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
668  else if (datatype == MPI_LONG_DOUBLE) fread(&jonio16[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
669  else if (datatype == MPI_INT) fread(&jonio4i[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
670  else if (datatype == MPI_LONG_LONG_INT) fread(&jonio8i[sii+joniooffset], sizeofdatatype,1, fp[sii%numfiles]);
671  }
672  }
673  }
674  else if(bintxt==TEXTOUTPUT){ // properly ordered, so just grab it
675  for(sii=0;sii<datatogetc0;sii++){
676  if (datatype == MPI_UNSIGNED_CHAR){
677  fscanf(fp[sii%numfiles],"%hu",&short4char);
678  jonio1[sii+joniooffset]=short4char; // convert to unsigned char
679  }
680  else if (datatype == MPI_FLOAT) fscanf(fp[sii%numfiles],"%f",&jonio4[sii+joniooffset]);
681  else if (datatype == MPI_DOUBLE) fscanf(fp[sii%numfiles],"%lf",&jonio8[sii+joniooffset]);
682  else if (datatype == MPI_LONG_DOUBLE) fscanf(fp[sii%numfiles],"%Lf",&jonio16[sii+joniooffset]);
683  else if (datatype == MPI_INT) fscanf(fp[sii%numfiles],"%d",&jonio4i[sii+joniooffset]);
684  else if (datatype == MPI_LONG_LONG_INT) fscanf(fp[sii%numfiles],"%lld",&jonio8i[sii+joniooffset]);
685  }
686  }// end if TEXTOUTPUT
687 #if(DEBUGMINMEM)
688  dualfprintf(fail_file,"got here2\n"); fflush(fail_file);
689 #endif
690 
691 
692 
693 
695  //
696  // send data to other cpus (while(doset))
697  //
699 
700  // now that data is read into 2nd half of jonio, need to desort data into 1st half per cpu in a loop
701  datasent0=0;
702  datatoget0=0;
703  doset=1;
704  doing0=0;
705 
706  while(doset){
707 #if(DEBUGMINMEM)
708  dualfprintf(fail_file,"got here 2.4\n"); fflush(fail_file);
709 #endif
710 
711  if(cpulinkptr->cpu==0) doing0=1;
712  datatoget0 += (long long int)(cpulinkptr->num);
713 
714  // copy from 2nd half of jonio to first half the node cpu's data
715  uii=0;
716  for(sii=0;sii<datatogetc0;sii++){
717 #if(DEBUGMINMEM)
718  dualfprintf(fail_file,"got here2.5: %lld\n",sii); fflush(fail_file);
719 #endif
720 
721  mypos=(long long int)((long long int)sii/((long long int)numcolumns)) + (long long int)(cpulinkptr->ri) + (long long int)(cpulinkptr->rj)*(long long int)totalsize[1] + ((long long int)cpulinkptr->rk)*(long long int)totalsize[1]*(long long int)totalsize[2];
722 
723  gi=((long long int)mypos)%((long long int)totalsize[1]);
724  gj=(long long int)((((long long int)mypos)%((long long int)totalsize[1]*(long long int)totalsize[2]))/((long long int)totalsize[1]));
725  gk=(long long int)(((long long int)mypos)/((long long int)totalsize[1]*(long long int)totalsize[2]));
726 
727 #if(DEBUGMINMEM)
728  dualfprintf(fail_file,"got here2.6: %lld %lld\n",gi,gj); fflush(fail_file);
729 #endif
730  if(
731  (gi>=startpos0[1][cpulinkptr->cpu])&&
732  (gi<=endpos0[1][cpulinkptr->cpu])&&
733  (gj>=startpos0[2][cpulinkptr->cpu])&&
734  (gj<=endpos0[2][cpulinkptr->cpu])&&
735  (gk>=startpos0[3][cpulinkptr->cpu])&&
736  (gk<=endpos0[3][cpulinkptr->cpu])
737  ){
738 #if(DEBUGMINMEM)
739  dualfprintf(fail_file,"got here2.7 %lld\n",cpulinkptr->cpu); fflush(fail_file);
740 #endif
741 
742  if (datatype == MPI_UNSIGNED_CHAR) jonio1[uii++]=jonio1[sii+joniooffset];
743  else if (datatype == MPI_FLOAT) jonio4[uii++]=jonio4[sii+joniooffset];
744  else if (datatype == MPI_DOUBLE) jonio8[uii++]=jonio8[sii+joniooffset];
745  else if (datatype == MPI_LONG_DOUBLE) jonio16[uii++]=jonio16[sii+joniooffset];
746  else if (datatype == MPI_INT) jonio4i[uii++]=jonio16[sii+joniooffset];
747  else if (datatype == MPI_LONG_LONG_INT) jonio8i[uii++]=jonio16[sii+joniooffset];
748  }// end if within this CPUs data
749  }// end over sii
750 #if(DEBUGMINMEM)
751  dualfprintf(fail_file,"got here3\n"); fflush(fail_file);
752 #endif
753 
754 
756  //
757  // check!
758  if(uii != (long long int)(cpulinkptr->num)){
759  dualfprintf(fail_file,"uii and num for this cpu not same, algorithm failure: uii=%d num=%d datatogetc0=%d\n",uii,cpulinkptr->num,datatogetc0);
760  myexit(10006);
761  }
762 #if(DEBUGMINMEM)
763  dualfprintf(fail_file,"got here4\n"); fflush(fail_file);
764 #endif
765  // jonio is the unsorted bit here starting at index=0 (for all cpus)
766  MPI_Isend(jonio,cpulinkptr->num,datatype,MPIid[cpulinkptr->cpu],cpulinkptr->cpu,MPI_COMM_GRMHD,&request0);
767  // have myid wait before continuing to make sure receive is complete
768  // alternative is to have many sends, but then need to desort all at once which isn't easy since cycling through link list one at a time only once (could store starter pointer, etc.)
769  MPI_Wait(&request0,&mpichstatus);
770 
771  datasent0 += (long long int)(cpulinkptr->num); // diagnostic
772 
773  if(cpulinkptr->end) doset=0;
774  cpulinkptr=cpulinkptr->np;
775 #if(DEBUGMINMEM)
776  dualfprintf(fail_file,"got here5\n"); fflush(fail_file);
777 #endif
778 
779  }// end while doset
780 
781 
782 
783 
784 
786  //
787  // check to see if total data is correct
788  //
790 
791 #if(DEBUGMINMEM)
792  dualfprintf(fail_file,"got here6\n"); fflush(fail_file);
793 #endif
794 
795  datasofar0 += (long long int)datatoget0; // diagnostic
796  if((long long int)datasofar0 != (long long int)datasofarc0){
797  dualfprintf(fail_file,"cumulative data received via MPI and expected data is different: got=%d expected=%d\n",datasofar0,datasofarc0);
798  myexit(10007);
799  }
800  if((long long int)datasent0 != (long long int)datatoget0){
801  dualfprintf(fail_file,"data sent doesn't match data read\n");
802  myexit(10008);
803  }
804  if((done0==0)&&(doing0==1)) dofull=0; // cpu==0 still needs to deal with more reads for it's own data
805  if(cpulinkptr==NULL){
806  masterdone=1; // i.e. don't come back
807  dofull=0; // end of list
808  if(datasofar0 != (long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns){
809  dualfprintf(fail_file,"read: total data written not equal to expected amount: wrote=%lld expected=%lld\n",datasofar0,(long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns);
810  myexit(10009);
811  }
812  }// end if NULL
813 
814 
815 
816 
817  // otherwise continue
818 #if(DEBUGMINMEM)
819  dualfprintf(fail_file,"got here7\n"); fflush(fail_file);
820 #endif
821 
822 
823 
824  }// end while dofull
825 
826 
827 
828 
829 #if(DEBUGMINMEM)
830  dualfprintf(fail_file,"got here8\n"); fflush(fail_file);
831 #endif
832 
833 
834 
835  }// end if READFILE
836 
837 
838 
839 
840 
841  }// end if myid==0 && ((dolocal==0)||(doone==1))
842 
843 
844 
845 #if(DEBUGMINMEM)
846  dualfprintf(fail_file,"got here 6\n"); fflush(fail_file);
847 #endif
848 
849  // have myid wait before continuing so buffer can be released for more writing to
850  if(dolocal) MPI_Wait(&request,&mpichstatus); // myid==0 handled specially since also master, and can't wait, even if master done first in this function, since master may not require cpu=0 at all for its current node set
851 
852  // logsfprintf("end mpiminmem_read\n");
853 #if(DEBUGMINMEM)
854  dualfprintf(fail_file,"got here9\n"); fflush(fail_file);
855 #endif
856 
857 
858 #endif
859 }
860 
861 
862 
863 
864 
865 
866 void mpiio_seperate(int bintxt, int sorted, int stage,
867  int numcolumns, MPI_Datatype datatype,
868  FILE ** fpptr, void *jonio, void *writebuf)
869 {
870 #if(USEMPI)
871 
872  logsfprintf("mpiio begin seperate\n");
873 
874  if(truempicombinetype==MPICOMBINEROMIO){
875  // doesn't use jonioptr or bintxt or sorted
876  // headerbytesize no longer needed
877  if(stage==STAGE1) mpiioromio_init_combine(READROMIO, READFILE, 0, "", numcolumns, datatype,&writebuf,writebuf);
878  if(stage==STAGE2) mpiioromio_init_combine(READFREEROMIO, READFILE, 0, "", numcolumns, datatype,&writebuf,writebuf);
879  }
880  else{
881 
882  if(truempicombinetype==MPICOMBINESIMPLE){
883 
884  if(sorted){
885  mpiios_seperate(bintxt, stage, datatype, numcolumns, fpptr, jonio, writebuf);
886  }
887  else{
888  mpiiotu_seperate(stage, datatype, numcolumns, fpptr, writebuf);
889  }
890 
891 
892  }
893  else if(truempicombinetype==MPICOMBINEMINMEM){
894  // do nothing on STAGE1
895  if(stage==STAGE2) mpiiomin_final(numcolumns,fpptr, jonio, writebuf);
896  }
897  }
898 
899 #endif
900  logsfprintf("mpiio end seperate\n");
901 
902 }
903 
904 
905 
906 
914 void mpiioromio_init_combine(int operationtype, int which, long headerbytesize, char *filename, int numcolumns,MPI_Datatype datatype, void **writebufptr,void *writebuf)
915 {
916  int i;
917 
918  static long long int sizeofmemory;
919  static int sizeofdatatype;
920 
921  static long double **writebuf16;
922  static double **writebuf8;
923  static float **writebuf4;
924  static unsigned char **writebuf1;
925  static int **writebuf4i;
926  static long long int **writebuf8i;
927 
928  static int numfiles;
929  static int romiocolumns;
930  static char realfilename[MAXFILENAME];
931 #if(USEMPI&&USEROMIO)
932  static MPI_Datatype newtype;
933  static MPI_File fh;
934  static MPI_Status status;
935  static MPI_Request request;
936 #endif
937  static int ndims, array_of_gsizes[4], array_of_distribs[4];
938  static int order, len;
939  static int array_of_dargs[4], array_of_psizes[4];
940  static int bufcount, array_size;
941 
942 
943 
944 
945 
946 
947  if(operationtype==INITROMIO){
948 
949  sizeofdatatype=getsizeofdatatype(datatype);
950 
951  logsfprintf("mpiioromio_init begin\n");
952 
953 
955  //
956  // set dimensionality
957 
958  // see if splitting into individual columns
959  if(docolsplit){
960  numfiles=numcolumns;
961  romiocolumns=1;
962  }
963  else{
964  numfiles=1;
965  romiocolumns=numcolumns;
966  }
967 
968 
969 #if(USEMPI&&USEROMIO)
970  if((COMPDIM==3)&&(romiocolumns>1)){
971  //create the distributed array filetype
972  // this still works if any reduced dimensionality
973  ndims = 4;
974  order = MPI_ORDER_C;
975 
976  array_of_gsizes[3] = romiocolumns;
977  array_of_gsizes[2] = totalsize[1];
978  array_of_gsizes[1] = totalsize[2];
979  array_of_gsizes[0] = totalsize[3];
980 
981  sizeofmemory = (long long int)N1*(long long int)N2*(long long int)N3*(long long int)romiocolumns*(long long int)sizeofdatatype;
982 
983  array_of_distribs[3] = MPI_DISTRIBUTE_BLOCK;
984  array_of_distribs[2] = MPI_DISTRIBUTE_BLOCK;
985  array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
986  array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
987 
988  array_of_dargs[3] = MPI_DISTRIBUTE_DFLT_DARG;
989  array_of_dargs[2] = MPI_DISTRIBUTE_DFLT_DARG;
990  array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
991  array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
992 
993  array_of_psizes[3]=1;
994  array_of_psizes[2]=ncpux1;
995  array_of_psizes[1]=ncpux2;
996  array_of_psizes[0]=ncpux3;
997  }
998  else if((COMPDIM==2)&&(romiocolumns>1)){
999  //create the distributed array filetype
1000  ndims = 3;
1001  order = MPI_ORDER_C;
1002 
1003  array_of_gsizes[2] = romiocolumns;
1004  array_of_gsizes[1] = totalsize[1];
1005  array_of_gsizes[0] = totalsize[2];
1006 
1007  sizeofmemory = (long long int)N1*(long long int)N2*(long long int)romiocolumns*(long long int)sizeofdatatype;
1008 
1009  array_of_distribs[2] = MPI_DISTRIBUTE_BLOCK;
1010  array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
1011  array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
1012 
1013  array_of_dargs[2] = MPI_DISTRIBUTE_DFLT_DARG;
1014  array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
1015  array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
1016 
1017  array_of_psizes[2]=1;
1018  array_of_psizes[1]=ncpux1;
1019  array_of_psizes[0]=ncpux2;
1020  }
1021  else if((COMPDIM==1)&&(romiocolumns>1)){
1022  //create the distributed array filetype
1023  ndims = 1;
1024  order = MPI_ORDER_C;
1025 
1026  array_of_gsizes[1] = romiocolumns;
1027  array_of_gsizes[0] = totalsize[1];
1028 
1029  sizeofmemory = (long long int)N1*(long long int)romiocolumns*(long long int)sizeofdatatype;
1030 
1031  array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
1032  array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
1033 
1034  array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
1035  array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
1036 
1037  array_of_psizes[1]=1;
1038  array_of_psizes[0]=ncpux1;
1039  }
1040  else if((COMPDIM==3)&&(romiocolumns==1)){
1041  //create the distributed array filetype
1042  // this still works if any reduced dimensionality
1043  ndims=3;
1044  order = MPI_ORDER_C;
1045 
1046  array_of_gsizes[2] = totalsize[1];
1047  array_of_gsizes[1] = totalsize[2];
1048  array_of_gsizes[0] = totalsize[3];
1049 
1050  sizeofmemory = (long long int)N1*(long long int)N2*(long long int)N3*(long long int)sizeofdatatype;
1051 
1052  array_of_distribs[2] = MPI_DISTRIBUTE_BLOCK;
1053  array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
1054  array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
1055 
1056  array_of_dargs[2] = MPI_DISTRIBUTE_DFLT_DARG;
1057  array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
1058  array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
1059 
1060  array_of_psizes[2]=ncpux1;
1061  array_of_psizes[1]=ncpux2;
1062  array_of_psizes[0]=ncpux3;
1063  }
1064  else if((COMPDIM==2)&&(romiocolumns==1)){
1065  //create the distributed array filetype
1066  ndims=2;
1067  order = MPI_ORDER_C;
1068 
1069  array_of_gsizes[1] = totalsize[1];
1070  array_of_gsizes[0] = totalsize[2];
1071 
1072  sizeofmemory = (long long int)N1*(long long int)N2*(long long int)sizeofdatatype;
1073 
1074  array_of_distribs[1] = MPI_DISTRIBUTE_BLOCK;
1075  array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
1076 
1077  array_of_dargs[1] = MPI_DISTRIBUTE_DFLT_DARG;
1078  array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
1079 
1080  array_of_psizes[1]=ncpux1;
1081  array_of_psizes[0]=ncpux2;
1082  }
1083  else if((COMPDIM==1)&&(romiocolumns==1)){
1084  //create the distributed array filetype
1085  ndims=1;
1086  order = MPI_ORDER_C;
1087 
1088  array_of_gsizes[0] = totalsize[1];
1089 
1090  sizeofmemory = (long long int)N1*(long long int)sizeofdatatype;
1091 
1092  array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
1093 
1094  array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
1095 
1096  array_of_psizes[0]=ncpux1;
1097  }
1098  else if(romiocolumns==0){
1099  // write nothing actually
1100  ndims=1;
1101  order = MPI_ORDER_C;
1102 
1103  array_of_gsizes[0] = 1;
1104 
1105  sizeofmemory = 0; // 1*(long long int)sizeofdatatype;
1106 
1107  array_of_distribs[0] = MPI_DISTRIBUTE_BLOCK;
1108 
1109  array_of_dargs[0] = MPI_DISTRIBUTE_DFLT_DARG;
1110 
1111  array_of_psizes[0]=1;
1112  }
1113  else{
1114  dualfprintf(fail_file,"Shouldn't reach to end of ROMIO selection\n");
1115  myexit(17872);
1116  }
1117 #endif
1118 
1119  // initialize filename
1120  // must be same filename as in dump_gen() when opening files here, so header is included at top of the dump
1121  if(docolsplit&&(numcolumns>1)){
1122  sprintf(realfilename,"%s-col%04d",filename,romiocoliter);
1123  }
1124  else strcpy(realfilename,filename);
1125 
1126 
1127 
1129  //
1130  // initialize grid and file handler for ROMIO
1131 
1132 #if(USEMPI&&USEROMIO)
1133  // NOT using mapping MPIid[myid] below. Using GRMHD layout rank "myid" that ensures that ROMIO writes correctly spatially no matter what MPI rank is.
1134  MPI_Type_create_darray(numprocs, myid, ndims, array_of_gsizes,
1135  array_of_distribs, array_of_dargs,
1136  array_of_psizes, order, datatype, &newtype);
1137  MPI_Type_commit(&newtype);
1138 
1139 #if(0)
1140  MPI_Type_size(newtype, &bufcount); // ULTRASUPERGODMARK: bufcount is int limited to 2GB, while should be allowed to be larger. Couldn't find info online about how to fix this and suggestions that 2GB is max buffer size of any MPI communication! That's stupid, since ruins ROMIO ability.
1141  sizeofmemory=bufcount; //includes type
1142  bufcount = bufcount/sizeofdatatype; // number of elements of type
1143  // could avoid use of MPI_Type_size() and set sizeofmemory and bufcount directly, but still bufcount must be integer when used in MPI functions below, so only helps by sizeofdatatype in size
1144 #else
1145  // then sizeofmemory already set
1146  bufcount=(long long int)sizeofmemory/(long long int)sizeofdatatype;
1147 #endif
1148 
1149 
1150  // fail if MPI functions below can't handle buffer size
1151  if((long long int)N1*(long long int)N2*(long long int)N3*(long long int)romiocolumns*(long long int)sizeofdatatype>=(long long int)(2L*1024L*1024L*1024L) && sizeof(int)<=4){
1152  dualfprintf(fail_file,"JCM couldn't figure out how to modify ROMIO so would work when sizeof(int)==4 and buffer size is >2GB\n");
1153  myexit(867546243);
1154  }
1155 
1156  // setup file handler
1157  // setup to append, in case wanted binary header at front
1158  if(which==WRITEFILE){
1159  MPI_File_open(MPI_COMM_GRMHD, realfilename, MPI_MODE_APPEND | MPI_MODE_RDWR, MPI_INFO_NULL, &fh);
1160  }
1161  else if(which==READFILE){
1162  MPI_File_open(MPI_COMM_GRMHD, realfilename, MPI_MODE_RDONLY , MPI_INFO_NULL, &fh);
1163  }
1164 
1165  // this sets the distributed nature of the file writing
1166  // http://www.mpi-forum.org/docs/mpi-20-html/node198.htm#Node198 : "naive" "internal" "external32"
1167  // GODMARK: rather than "native" can use MPI_Register_datarep() to create a conversion between "native" and other types for reading/writing
1168  // but apparently can't produce formatted text output
1169  // MPI_Datarep_conversion_function() used too
1170  // http://www.mpi-forum.org/docs/mpi-20-html/node201.htm#Node201
1171 
1172  // Note that for 64-bit machines "native" and "external32" are not compatible
1173 
1174  // GODMARK: For now use "native" since "external32" not supported even as of 07/21/08 by MPICH2 or OpenMPI
1175  MPI_File_set_view(fh, headerbytesize, datatype, newtype, "native", MPI_INFO_NULL);
1176  // now use "external32" so files are exactly well-defined for any system as in:
1177  // http://parallel.ru/docs/Parallel/mpi2/node200.html#Node200
1178  // perhaps my bin2txt program should use these MPI types too
1179  //
1180  // GODMARK: Avoid the below good feature since not supported in general
1181  // MPI_File_set_view(fh, headerbytesize, datatype, newtype, "external32", MPI_INFO_NULL);
1182  // all that needs to be done now is initialize and later fill writebuf with the data
1183 #endif
1184 
1186  //
1187  // allocate memory for writebuf
1188 
1189 
1190 
1191  if(datatype==MPI_UNSIGNED_CHAR){ writebuf1=(unsigned char **)writebufptr; }
1192  else if(datatype==MPI_FLOAT){ writebuf4=(float **)writebufptr; }
1193  else if(datatype==MPI_DOUBLE){ writebuf8=(double**)writebufptr; }
1194  else if(datatype==MPI_LONG_DOUBLE){ writebuf16=(long double**)writebufptr; }
1195  else if(datatype==MPI_INT){ writebuf4i=(int **)writebufptr; }
1196  else if(datatype==MPI_LONG_LONG_INT){ writebuf8i=(long long int **)writebufptr; }
1197 
1198 
1199  if(datatype==MPI_UNSIGNED_CHAR) *writebuf1=(unsigned char*)malloc(sizeofmemory);
1200  else if(datatype==MPI_FLOAT) *writebuf4=(float*)malloc(sizeofmemory);
1201  else if(datatype==MPI_DOUBLE) *writebuf8 =(double*)malloc(sizeofmemory);
1202  else if(datatype==MPI_LONG_DOUBLE) *writebuf16 =(long double*)malloc(sizeofmemory);
1203  else if(datatype==MPI_INT) *writebuf4i=(int*)malloc(sizeofmemory);
1204  else if(datatype==MPI_LONG_LONG_INT) *writebuf8i=(long long int*)malloc(sizeofmemory);
1205  if(
1206  ((datatype==MPI_UNSIGNED_CHAR)&&(*writebuf1 == NULL)) ||
1207  ((datatype==MPI_FLOAT)&&(*writebuf4 == NULL)) ||
1208  ((datatype==MPI_DOUBLE)&&(*writebuf8 == NULL)) ||
1209  ((datatype==MPI_LONG_DOUBLE)&&(*writebuf16 == NULL)) ||
1210  ((datatype==MPI_INT)&&(*writebuf4i == NULL)) ||
1211  ((datatype==MPI_LONG_LONG_INT)&&(*writebuf8i == NULL))
1212  ){
1213  dualfprintf(fail_file,"Can't initialize writebuf memory for mpiioromio\n");
1214  myexit(10010);
1215  }
1216 
1217  logsfprintf("mpiioromio_init end\n");
1218 
1219  }// end if operationtype==INITROMIO
1220 
1221 
1222  // ROMIO examples
1223  // http://www.sesp.cse.clrc.ac.uk/Publications/paraio/paraio/paraio.html
1224  // http://www.sesp.cse.clrc.ac.uk/Publications/paraio/paraio/node53.html
1225 
1226  // GODMARK: Could use Asynchronous IO if using MPI-2
1227  // GODMARK: Could use non-blocking MPI_File_iread() and MPI_File_iwrite() with MPIO_Wait() if wanted to write while continuing processes
1228 
1229  //
1230  // http://www.nersc.gov/nusers/resources/software/libs/io/mpiio.php
1231  // Collective: MPI_File_iread_all() ?
1232  // OR:
1233  // http://www-unix.mcs.anl.gov/mpi/www/www3/MPI_File_read_all_begin.html
1234  // int MPI_File_read_all_begin(MPI_File fh, void *buf, int count, MPI_Datatype datatype)
1236  // http://www-unix.mcs.anl.gov/mpi/www/www3/MPI_File_read_all_end.html
1237  // int MPI_File_read_all_end(MPI_File fh, void *buf, MPI_Status *status)
1238  //
1239  // better description:
1240  // http://mpi.deino.net/mpi_functions/MPI_File_read_all_begin.html
1241 
1242 
1243 #if(USEMPI&&USEROMIO)
1244  if(operationtype==READROMIO){
1245  logsfprintf("mpiioromio_seperate begin\n");
1246 
1247  if(BARRIERROMIOPRE==1) MPI_Barrier(MPI_COMM_GRMHD); // force barrier before begin writing so avoids large number of unexpected buffer space required.
1248 
1249 #if(MPIVERSION>=2)
1250  // non-blocking but need data from read immediately, so not taking advantage
1251  MPI_File_read_all_begin(fh, writebuf, bufcount, datatype);
1252  MPI_File_read_all_end(fh, writebuf, &status);
1253  MPI_File_close(&fh);
1254 #else
1255  MPI_File_read_all(fh, writebuf, bufcount, datatype, &status);
1256  MPI_File_close(&fh);
1257 #endif
1258 
1259  logsfprintf("mpiioromio_seperate end\n");
1260  }
1261  else if(operationtype==READFREEROMIO){
1262  logsfprintf("mpiioromio_seperate_free begin\n");
1263  free(writebuf);
1264  MPI_Type_free(&newtype);
1265  logsfprintf("mpiioromio_seperate_free end\n");
1266  }
1267  else if(operationtype==WRITECLOSEROMIO){
1268  logsfprintf("mpiioromio_combine begin\n");
1269 
1270  if(BARRIERROMIOPRE==1) MPI_Barrier(MPI_COMM_GRMHD); // force barrier before begin writing so avoids large number of unexpected buffer space required.
1271 
1272 #if(MPIVERSION>=2)
1273  MPI_File_write_all_begin(fh, writebuf, bufcount, datatype);
1274  // wait till later to end, close, free writebuf, and free newtype
1275 #else
1276  // now write the buffer:
1277  MPI_File_write_all(fh, writebuf, bufcount, datatype, &status);
1278  MPI_File_close(&fh);
1279  // free buffer and type
1280  free(writebuf);
1281  MPI_Type_free(&newtype);
1282 #endif
1283 
1284  logsfprintf("mpiioromio_combine end\n");
1285 
1286  }
1287  else if(operationtype==WRITEENDROMIO){
1288 #if(MPIVERSION>=2)
1289  logsfprintf("mpiioromio_combine writeend begin\n");
1290  // now write the buffer:
1291  MPI_File_write_all_end(fh, writebuf, &status);
1292  // logsfprintf("mpiioromio_combine 1\n");
1293  MPI_File_close(&fh);
1294  // logsfprintf("mpiioromio_combine 2: writebuf=%d\n",writebuf);
1295  // free buffer and type
1296  free(writebuf);
1297  // logsfprintf("mpiioromio_combine 3\n");
1298  MPI_Type_free(&newtype);
1299  logsfprintf("mpiioromio_combine writeend end\n");
1300 #else
1301  // nothing to do
1302 
1303 #endif
1304  }
1305 
1306 #endif // end if USEMPI&&USEROMIO
1307 
1308 
1309 }
1310 
1311 
1312 
1313 
1314 
1315 
1316 
1317 
1318 
1319 
1321 int set_sizeofmemory(int numbuff, int sizeofdatatype, int numcolumns, long long int *sizeofmemory)
1322 {
1323  // can't trust that (int)ceil() along will be upper integer
1324  *sizeofmemory = (long long int)sizeofdatatype * (long long int)ROUND2LONGLONGINT(ceil((FTYPE)N1 * (FTYPE)N2 * (FTYPE)N3 * (FTYPE)NUMBUFFERS/(FTYPE)numcolumns))*(long long int)numcolumns * (long long int)numbuff ; // default
1325 
1326  return(0);
1327 }
1328 
1329 int set_maxnumsize(int numcolumns, long long int *maxnumsize)
1330 {
1331 
1332  *maxnumsize=(long long int)(ROUND2LONGLONGINT(ceil(ROUND2LONGLONGINT(ceil((FTYPE)(N1*N2*N3*NUMBUFFERS)/(FTYPE)numcolumns))*(FTYPE)(numcolumns))));
1333 
1334  return(0);
1335 }
1336 
1337 
1338 
1339 
1340 int set_numbuffers(int numcolumns, int *numbuffers)
1341 {
1342  int myval;
1343 
1344  myval=(int)ROUND2INT(ceil((FTYPE)numcolumns/((FTYPE)N1*(FTYPE)N2*(FTYPE)N3)));
1345 
1346  if(N1*N2*N3<numcolumns) *numbuffers=myval;
1347  else *numbuffers=1;
1348 
1349  return(0);
1350 }
1351 
1352 long long int gcountmod(int numcolumns)
1353 {
1354  long long int myval;
1355 
1356  myval=(long long int)ROUND2LONGLONGINT(ceil((FTYPE)((FTYPE)N1*(FTYPE)N2*(FTYPE)N3*(FTYPE)NUMBUFFERS/(FTYPE)numcolumns)))*(long long int)numcolumns;
1357 
1358  return(myval);
1359 }
1360 
1361 
1362 
1363 
1364 
1365 #define DEBUGSINIT 0
1366 
1367 
1370 void mpiios_init(int bintxt, int sorted, FILE ** fp, int which, int headerbytesize, char *filename, int numcolumns,
1371  MPI_Datatype datatype, void **jonioptr, void **writebufptr)
1372 {
1373  FTYPE fakevar;
1374  int fakei;
1375 
1376  int i;
1377 
1378  long long int sizeofmemory;
1379  int sizeofdatatype;
1380 
1381  long double **jonio16;
1382  double **jonio8;
1383  float **jonio4;
1384  unsigned char **jonio1;
1385  int **jonio4i;
1386  long long int **jonio8i;
1387 
1388  long double **writebuf16;
1389  double **writebuf8;
1390  float **writebuf4;
1391  unsigned char **writebuf1;
1392  int **writebuf4i;
1393  long long int **writebuf8i;
1394 
1395  int numfiles;
1396  char newfilename[MAXFILENAME];
1397 
1398  int trygettingfile;
1399 
1400 
1401 #if(USEMPI)
1402 
1403  sizeofdatatype=getsizeofdatatype(datatype);
1404 
1405  if(datatype==MPI_UNSIGNED_CHAR){ jonio1=(unsigned char **)jonioptr; writebuf1=(unsigned char **)writebufptr; }
1406  else if(datatype==MPI_FLOAT){ jonio4=(float **)jonioptr; writebuf4=(float **)writebufptr; }
1407  else if(datatype==MPI_DOUBLE){ jonio8=(double**)jonioptr; writebuf8=(double**)writebufptr; }
1408  else if(datatype==MPI_LONG_DOUBLE){ jonio16=(long double**)jonioptr; writebuf16=(long double**)writebufptr; }
1409  else if(datatype==MPI_INT){ jonio4i=(int **)jonioptr; writebuf4i=(int **)writebufptr; }
1410  else if(datatype==MPI_LONG_LONG_INT){ jonio8i=(long long int **)jonioptr; writebuf8i=(long long int **)writebufptr; }
1411 
1412  logsfprintf("mpiios_init begin\n");
1413 
1414 
1416  //
1417  // open files for non-ROMIO writing
1418  //
1420 
1421  if(myid==0){ // total on CPU=0, always, since mpicombine=1
1422  if(docolsplit){
1423  numfiles=numcolumns;
1424  }
1425  else numfiles=1;
1426 
1427  for(i=0;i<numfiles;i++){
1428  if(docolsplit&&(numfiles>1)){
1429  sprintf(newfilename,"%s-col%04d",filename,i); // must be same name as in dump_gen()
1430  }
1431  else{
1432  sprintf(newfilename,"%s",filename);
1433  }
1434 
1435 
1436 #define NUMTRYGETTINGFILE 3
1437  for(trygettingfile=0;trygettingfile<NUMTRYGETTINGFILE;trygettingfile++){
1438 
1439  if (which == WRITEFILE){
1440  if(bintxt==BINARYOUTPUT) fp[i] = fopen(newfilename, "a");
1441  else if(bintxt==TEXTOUTPUT) fp[i] = fopen(newfilename, "at");
1442  else{
1443  dualfprintf(fail_file,"No such bintxt=%d\n",bintxt);
1444  myexit(62061);
1445  }
1446  // file pointer set correctly upon append
1447  }
1448  else if (which == READFILE){
1449  if(bintxt==BINARYOUTPUT) fp[i] = fopen(newfilename, "rb");
1450  else if(bintxt==TEXTOUTPUT) fp[i] = fopen(newfilename, "rt");
1451  else{
1452  dualfprintf(fail_file,"No such bintxt=%d\n",bintxt);
1453  myexit(62062);
1454  }
1455  }
1456  if (fp[i] == NULL) {
1457  dualfprintf(fail_file, "error opening file: %s\n", newfilename);
1458  if(trygettingfile==NUMTRYGETTINGFILE) myexit(62676);
1459  else{
1460  dualfprintf(fail_file,"Pausing for disk to get into synch: %d\n",trygettingfile);
1461  if(MPIAVOIDFORK){
1462  // compute a bit to fake a sleep
1463  fakevar=0.8;
1464 #define FAKENUM 100 // number of square roots to take to pause
1465  for(fakei=i;fakei<=FAKENUM;fakei++){
1466  fakevar=sqrt(fakevar);
1467  }
1468  }
1469  else{
1470  system("sleep 1"); // or use something to pause for 1 second
1471  }
1472  }
1473  }
1474  else{
1475  // then done with loop (ensure no inner loop and so set correct loop to end)
1476  trygettingfile=NUMTRYGETTINGFILE;
1477  break;
1478  }
1479  }// end if trygettingfile
1480 
1481 
1482  // must set file pointer to after header
1483  if (which == READFILE){
1484  fseek(fp[i],headerbytesize,SEEK_SET);
1485  }
1486 
1487  }// end over numfiles
1488  }// end if myid==0
1489 
1490 
1491 
1492 
1493  if ( (sorted==SORTED)&&(myid == 0) ){ // total on CPU=0 is stored in memory somehow for sorting before output
1494 
1496  //
1497  // determine the memory needed for the mpicombinetype for cpu=0
1498  //
1500 
1501  truempicombinetype=mpicombinetype; // initial action
1502  if(truempicombinetype==MPICOMBINESIMPLE){
1503  sizeofmemory = (long long int)sizeofdatatype * (long long int)totalsize[1] * (long long int)totalsize[2] * (long long int)totalsize[3] * (long long int)numcolumns;
1504  }
1505  else if(truempicombinetype==MPICOMBINEMINMEM){
1506  // check this calculation against setuplinklist()'s cpulist0 array size!
1507  // 2 needed since need to read sequentially, then order it into the other buffer for writing
1508  set_sizeofmemory(2,sizeofdatatype, numcolumns, &sizeofmemory);
1509  if(sizeofmemory>(long long int)sizeofdatatype * (long long int)totalsize[1] * (long long int)totalsize[2] * (long long int)totalsize[3] * (long long int)numcolumns){
1510  sizeofmemory = (long long int)sizeofdatatype * (long long int)totalsize[1] * (long long int)totalsize[2] * (long long int)totalsize[3] * (long long int)numcolumns;
1511  truempicombinetype=MPICOMBINESIMPLE; // then switch to simple method
1512  }
1513  // need memory to be at least larger than number of columns (X2 for 2 buffers)
1514  // don't want to work with chunks smaller than # of columns, and all chunks should come in # of column chunks times some integer multiple
1515  if(sizeofmemory<(long long int)sizeofdatatype*(long long int)numcolumns*(long long int)2) sizeofmemory=(long long int)sizeofdatatype*(long long int)numcolumns*(long long int)2;
1516  if(sizeofmemory<(long long int)2*(long long int)numcolumns){
1517  dualfprintf(fail_file,"problem, sizeofmemory=%lld < %lld=2*numcolumns\n",sizeofmemory,(long long int)2*(long long int)numcolumns);
1518  myexit(102000);
1519  }
1520 
1521  }
1522  joniosize=sizeofmemory/((long long int)sizeofdatatype);
1523 
1524 #if(DEBUGMINMEM)
1525  dualfprintf(fail_file,"jonio sizeofmemory=%lld sizeofdatatype=%d\n",sizeofmemory,sizeofdatatype);
1526 #endif
1527 
1528  if(datatype==MPI_UNSIGNED_CHAR) *jonio1=(unsigned char*)malloc(sizeofmemory);
1529  else if(datatype==MPI_FLOAT) *jonio4=(float*)malloc(sizeofmemory);
1530  else if(datatype==MPI_DOUBLE) *jonio8 =(double*)malloc(sizeofmemory);
1531  else if(datatype==MPI_LONG_DOUBLE) *jonio16 =(long double*)malloc(sizeofmemory);
1532  else if(datatype==MPI_INT) *jonio4i=(int*)malloc(sizeofmemory);
1533  else if(datatype==MPI_LONG_LONG_INT) *jonio8i=(long long int*)malloc(sizeofmemory);
1534  if(
1535  (datatype==MPI_UNSIGNED_CHAR)&&(*jonio1 == NULL) ||
1536  (datatype==MPI_FLOAT)&&(*jonio4 == NULL) ||
1537  (datatype==MPI_DOUBLE)&&(*jonio8 == NULL) ||
1538  (datatype==MPI_LONG_DOUBLE)&&(*jonio16 == NULL) ||
1539  (datatype==MPI_INT)&&(*jonio4i == NULL) ||
1540  (datatype==MPI_LONG_LONG_INT)&&(*jonio8i == NULL)
1541  ){
1542  dualfprintf(fail_file, "Can't initialize jonio memory for mpiios_init\n");
1543  myexit(16010);
1544  }
1545 #if(DEBUGSINIT)
1546  /*
1547  for(i=0;i<sizeofmemory/datatype;i++){
1548  if(datatype==sizeof(long double)) (*jonio16)[i]=-10000.0000;
1549  if(datatype==sizeof(double)) (*jonio8)[i]=-10000.0000;
1550  if(datatype==sizeof(float)) (*jonio4)[i]=-10000.0000;
1551  if(datatype==sizeof(unsigned char)) (*jonio1)[i]=100;
1552  }
1553  dualfprintf(fail_file,"got here1\n");
1554  */
1555 #endif
1556 
1557  logsfprintf("mpiios_init jonio: sizeofmemory=%lld sizeofdatatype=%d\n",sizeofmemory,sizeofdatatype);
1558 
1559  }
1560 
1561 
1563  //
1564  // need to tell all CPUS the status of changed global vars
1565  //
1567 
1568  MPI_Bcast(&truempicombinetype,1,MPI_INT,MPIid[0], MPI_COMM_GRMHD);
1569 
1570 
1572  //
1573  // open per CPU writebuf
1574  //
1576 
1577  if(truempicombinetype==MPICOMBINESIMPLE){
1578  sizeofmemory = (long long int)sizeofdatatype * (long long int)N1 * (long long int)N2 * (long long int)N3 * (long long int)numcolumns ;
1579  }
1580  else if(truempicombinetype==MPICOMBINEMINMEM){
1581  // maximum cpu=0 could require under any case
1582  set_sizeofmemory(1,sizeofdatatype, numcolumns, &sizeofmemory);
1583  if(sizeofmemory>(long long int)sizeofdatatype*(long long int)N1*(long long int)N2*(long long int)N3*(long long int)numcolumns) sizeofmemory=(long long int)sizeofdatatype*(long long int)N1*(long long int)N2*(long long int)N3*(long long int)numcolumns; // can't ask for more!
1584  if(sizeofmemory<(long long int)sizeofdatatype*(long long int)numcolumns) sizeofmemory=(long long int)sizeofdatatype*(long long int)numcolumns; // minimum, chunk at minimum of number of columns
1585  if(sizeofmemory<(long long int)numcolumns){
1586  dualfprintf(fail_file,"problem, sizeofmemory=%lld < %lld=numcolumns\n",sizeofmemory,(long long int)numcolumns);
1587  myexit(6900);
1588  }
1589  }
1590 
1591  writebufsize=sizeofmemory/((long long int)sizeofdatatype); // never used currently
1592 
1593 #if(DEBUGMINMEM)
1594  dualfprintf(fail_file,"writebuf sizeofmemory=%lld sizeofdatatype=%d\n",sizeofmemory,sizeofdatatype);
1595 #endif
1596 
1597  if(datatype==MPI_UNSIGNED_CHAR) *writebuf1=(unsigned char*)malloc(sizeofmemory);
1598  else if(datatype==MPI_FLOAT) *writebuf4=(float*)malloc(sizeofmemory);
1599  else if(datatype==MPI_DOUBLE) *writebuf8 =(double*)malloc(sizeofmemory);
1600  else if(datatype==MPI_LONG_DOUBLE) *writebuf16 =(long double*)malloc(sizeofmemory);
1601  else if(datatype==MPI_INT) *writebuf4i=(int*)malloc(sizeofmemory);
1602  else if(datatype==MPI_LONG_LONG_INT) *writebuf8i=(long long int*)malloc(sizeofmemory);
1603  if(
1604  (datatype==MPI_UNSIGNED_CHAR)&&(*writebuf1 == NULL) ||
1605  (datatype==MPI_FLOAT)&&(*writebuf4 == NULL) ||
1606  (datatype==MPI_DOUBLE)&&(*writebuf8 == NULL) ||
1607  (datatype==MPI_LONG_DOUBLE)&&(*writebuf16 == NULL) ||
1608  (datatype==MPI_INT)&&(*writebuf4i == NULL) ||
1609  (datatype==MPI_LONG_LONG_INT)&&(*writebuf8i == NULL)
1610  ){
1611  dualfprintf(fail_file, "Can't initialize writebuf memory for mpiios_init: datatype=%d sizeofmemory=%d\n",datatype,sizeofmemory);
1612  myexit(86726);
1613  }
1614 #if(DEBUGSINIT)
1615  /*
1616  for(i=0;i<sizeofmemory/sizeofdatatype;i++){
1617  if(datatype==sizeof(long double)) (*writebuf16)[i]=-10000.0000;
1618  if(datatype==sizeof(double)) (*writebuf8)[i]=-10000.0000;
1619  if(datatype==sizeof(float)) (*writebuf4)[i]=-10000.0000;
1620  if(datatype==sizeof(unsigned char)) (*writebuf1)[i]=100;
1621  }
1622  dualfprintf(fail_file,"got here2\n");
1623  */
1624 #endif
1625  logsfprintf("mpiios_init end: sizeofmemory=%lld sizeofdatatype=%d\n",sizeofmemory,sizeofdatatype);
1626 #endif
1627 
1628 }
1629 
1630 
1631 
1632 void mpiiomin_final(int numcolumns,FILE **fp, void *jonio, void *writebuf)
1633 {
1634  int i;
1635  int numfiles;
1636 
1637  free(writebuf); // writebuf used by each CPU
1638  if(myid==0){
1639  free(jonio); // used by CPU=0
1640  if(docolsplit){
1641  numfiles=numcolumns;
1642  }
1643  else numfiles=1;
1644  for(i=0;i<numfiles;i++){
1645  fclose(fp[i]);
1646  fp[i] = NULL;
1647  }
1648  }
1649 }
1650 
1651 #if(USEMPI)
1652 
1653 
1654 void mpiios_combine(int bintxt, MPI_Datatype datatype, int numcolumns,
1655  FILE ** fp, void *jonio, void *writebuf)
1656 {
1657  long long int i, j, k, l, col, mapvaluejonio, mapvaluetempbuf;
1658 #if(USEMPI)
1659  MPI_Request rrequest;
1660  MPI_Request srequest;
1661 #endif
1662  int othercpupos[COMPDIM + 1];
1663 
1664  unsigned char *jonio1;
1665  float *jonio4;
1666  double *jonio8;
1667  long double *jonio16;
1668  int *jonio4i;
1669  long long int *jonio8i;
1670 
1671  unsigned char *writebuf1;
1672  float *writebuf4;
1673  double *writebuf8;
1674  long double *writebuf16;
1675  int *writebuf4i;
1676  long long int *writebuf8i;
1677 
1678  int numfiles;
1679  int sizeofdatatype;
1680 
1681  logsfprintf("mpiios begin combine\n");
1682 
1683  sizeofdatatype=getsizeofdatatype(datatype);
1684 
1685  if (datatype == MPI_UNSIGNED_CHAR) jonio1 = (unsigned char *) jonio;
1686  else if (datatype == MPI_FLOAT) jonio4 = (float *) jonio;
1687  else if (datatype == MPI_DOUBLE) jonio8 = (double *) jonio;
1688  else if (datatype == MPI_LONG_DOUBLE) jonio16 = (long double *) jonio;
1689  else if (datatype == MPI_INT) jonio4i = (int *) jonio;
1690  else if (datatype == MPI_LONG_LONG_INT) jonio8i = (long long int *) jonio;
1691 
1692  if (datatype == MPI_UNSIGNED_CHAR) writebuf1 = (unsigned char *) writebuf;
1693  else if (datatype == MPI_FLOAT) writebuf4 = (float *) writebuf;
1694  else if (datatype == MPI_DOUBLE) writebuf8 = (double *) writebuf;
1695  else if (datatype == MPI_LONG_DOUBLE) writebuf16 = (long double *) writebuf;
1696  else if (datatype == MPI_INT) writebuf4i = (int *) writebuf;
1697  else if (datatype == MPI_LONG_LONG_INT) writebuf8i = (long long int *) writebuf;
1698 
1699 
1700 #if(USEMPI)
1701  // no need for tempbuf, works since first write to jonio is CPU=0's writebuf
1702  if(myid!=0) MPI_Isend(writebuf, N1 * N2 * N3 * numcolumns, datatype, MPIid[0], myid, MPI_COMM_GRMHD, &srequest);
1703  if (myid == 0) {
1704  for (l = 0; l < numprocs; l++) {
1705  logsfprintf("on myid==0: mpiios combine: %d of numprocs=%d data=%lld\n",l,numprocs,(long long int) (N1 * N2 * N3 * numcolumns) );
1706  if(l!=0){
1707  MPI_Irecv(writebuf, N1 * N2 * N3 * numcolumns, datatype, MPIid[l], l, MPI_COMM_GRMHD, &rrequest);
1708  MPI_Wait(&rrequest, &mpichstatus);
1709  }
1710 
1711  othercpupos[1]=l%ncpux1;
1712  othercpupos[2]=(int)((l%(ncpux1*ncpux2))/ncpux1);
1713  othercpupos[3]=(int)(l/(ncpux1*ncpux2));
1714 
1715  // now fill jonio with proper sequence (i.e. tiled mapping)
1716  for (k = 0; k < N3; k++) for (j = 0; j < N2; j++)
1717  for (i = 0; i < N1; i++)
1718  for (col = 0; col < numcolumns; col++) {
1719  // mapvaluejonio is global single-dimensional index for position in total CPU space - in C order for "array[k][j][i]"
1720  // Note that this storage mapping function forces disk to have i as fastest regardless of order stored in original array.
1721  // This is fully compatible with any ORDERSTORAGE choice since global arrays are still properly accessed and this mapping just forces writebuf to be filled or read with fixed mapping function as desirable, so that user can change ORDERSTORAGE without the files written being changed.
1722  mapvaluejonio =
1723  + (long long int)ncpux1 * (long long int)N1 * (long long int)ncpux2 * (long long int)N2 * (long long int)numcolumns * ((long long int)k + (long long int)othercpupos[3] * (long long int)N3)
1724  + (long long int)ncpux1 * (long long int)N1 * (long long int)numcolumns * ((long long int)j + (long long int)othercpupos[2] * (long long int)N2)
1725  + (long long int)numcolumns * ((long long int)i + (long long int)othercpupos[1] * (long long int)N1)
1726  + (long long int)col;
1727 
1728  // mapvaluetempbuf is a single-buffer single-dimensional index for the position in the buffer in C-order
1729  mapvaluetempbuf =
1730  + (long long int)k * (long long int)N1 * (long long int)N2 * (long long int)numcolumns
1731  + (long long int)j * (long long int)N1 * (long long int)numcolumns
1732  + (long long int)i * (long long int)numcolumns + (long long int)col;
1733 
1734  if (datatype == MPI_UNSIGNED_CHAR) jonio1[mapvaluejonio] = writebuf1[mapvaluetempbuf];
1735  else if (datatype == MPI_FLOAT) jonio4[mapvaluejonio] = writebuf4[mapvaluetempbuf];
1736  else if (datatype == MPI_DOUBLE) jonio8[mapvaluejonio] = writebuf8[mapvaluetempbuf];
1737  else if (datatype == MPI_LONG_DOUBLE) jonio16[mapvaluejonio] = writebuf16[mapvaluetempbuf];
1738  else if (datatype == MPI_INT) jonio4i[mapvaluejonio] = writebuf4i[mapvaluetempbuf];
1739  else if (datatype == MPI_LONG_LONG_INT) jonio8i[mapvaluejonio] = writebuf8i[mapvaluetempbuf];
1740  }
1741  }
1742  }
1743  if(myid!=0) MPI_Wait(&srequest, &mpichstatus);
1744  free(writebuf); // writebuf used by each CPU
1745 
1746  if (myid == 0) {
1747  logsfprintf("on myid==0: mpiios combine: writing: %lld\n",(long long int)totalsize[1] * (long long int)totalsize[2] * (long long int)totalsize[3] * (long long int)numcolumns);
1748  // now write out collected data using CPU=0
1749  if(docolsplit){
1750  numfiles=numcolumns;
1751  }
1752  else numfiles=1;
1753 
1754  if(bintxt==BINARYOUTPUT){
1755  // GODMARK: Below may be too large of a size for fwrite() to output
1756  if(numfiles==1) fwrite(jonio, sizeofdatatype,totalsize[1] * totalsize[2] * totalsize[3] * numcolumns, *fp);
1757  else{
1758  for(i=0;i<totalsize[1]*totalsize[2]*totalsize[3]*numcolumns;i++){
1759  if (datatype == MPI_UNSIGNED_CHAR) fwrite(&jonio1[i], sizeofdatatype,1, fp[i%numfiles]);
1760  else if (datatype == MPI_FLOAT) fwrite(&jonio4[i], sizeofdatatype,1, fp[i%numfiles]);
1761  else if (datatype == MPI_DOUBLE) fwrite(&jonio8[i], sizeofdatatype,1, fp[i%numfiles]);
1762  else if (datatype == MPI_LONG_DOUBLE) fwrite(&jonio16[i], sizeofdatatype,1, fp[i%numfiles]);
1763  else if (datatype == MPI_INT) fwrite(&jonio4i[i], sizeofdatatype,1, fp[i%numfiles]);
1764  else if (datatype == MPI_LONG_LONG_INT) fwrite(&jonio8i[i], sizeofdatatype,1, fp[i%numfiles]);
1765  }
1766  }
1767  }
1768  else if(bintxt==TEXTOUTPUT){ // properly ordered, so just dump it
1769  for(i=0;i<totalsize[1]*totalsize[2]*totalsize[3]*numcolumns;i++){
1770  if (datatype == MPI_UNSIGNED_CHAR) fprintf(fp[i%numfiles],"%c",jonio1[i]);
1771  else if (datatype == MPI_FLOAT) fprintf(fp[i%numfiles],"%15.7g",jonio4[i]);
1772  else if (datatype == MPI_DOUBLE) fprintf(fp[i%numfiles],"%21.15g",jonio8[i]);
1773  else if (datatype == MPI_LONG_DOUBLE) fprintf(fp[i%numfiles],"%31.25Lg",jonio16[i]);
1774  else if (datatype == MPI_INT) fprintf(fp[i%numfiles],"%d",jonio4i[i]);
1775  else if (datatype == MPI_LONG_LONG_INT) fprintf(fp[i%numfiles],"%lld",jonio8i[i]);
1776  if(numfiles==1){
1777  if((i+1)%numcolumns) fprintf(*fp," ");
1778  else fprintf(*fp,"\n");
1779  }
1780  else fprintf(fp[i%numfiles],"\n");
1781  }
1782  }
1783  logsfprintf("on myid==0: mpiios combine: freeing\n");
1784  free(jonio); // used by CPU=0
1785  for(i=0;i<numfiles;i++){
1786  fclose(fp[i]);
1787  fp[i] = NULL;
1788  }
1789  }// end if myid==0
1790 #endif
1791 
1792  logsfprintf("mpiios end combine\n");
1793 
1794 }
1795 
1796 
1797 
1798 void mpiios_seperate(int bintxt, int stage, MPI_Datatype datatype, int numcolumns,
1799  FILE ** fp, void *jonio,
1800  void *writebuf)
1801 {
1802  long long int i, j, k, l, col, mapvaluejonio, mapvaluetempbuf;
1803 #if(USEMPI)
1804  MPI_Request rrequest;
1805  MPI_Request srequest;
1806 #endif
1807  int othercpupos[COMPDIM + 1];
1808 
1809  int sizeofdatatype;
1810 
1811  unsigned char *jonio1;
1812  float *jonio4;
1813  double *jonio8;
1814  long double *jonio16;
1815  int *jonio4i;
1816  long long int *jonio8i;
1817 
1818  unsigned char *writebuf1;
1819  float *writebuf4;
1820  double *writebuf8;
1821  long double *writebuf16;
1822  int *writebuf4i;
1823  long long int *writebuf8i;
1824  unsigned short short4char;
1825 
1826  int numfiles;
1827 
1828 
1829  logsfprintf("mpiios begin seperate\n");
1830 
1831  sizeofdatatype=getsizeofdatatype(datatype);
1832 
1833  if (datatype == MPI_UNSIGNED_CHAR) jonio1 = (unsigned char *) jonio;
1834  else if (datatype == MPI_FLOAT) jonio4 = (float *) jonio;
1835  else if (datatype == MPI_DOUBLE) jonio8 = (double *) jonio;
1836  else if (datatype == MPI_LONG_DOUBLE) jonio16 = (long double *) jonio;
1837  else if (datatype == MPI_INT) jonio4i = (int *) jonio;
1838  else if (datatype == MPI_LONG_LONG_INT) jonio8i = (long long int *) jonio;
1839 
1840 
1841  if (datatype == MPI_UNSIGNED_CHAR) writebuf1 = (unsigned char *) writebuf;
1842  else if (datatype == MPI_FLOAT) writebuf4 = (float *) writebuf;
1843  else if (datatype == MPI_DOUBLE) writebuf8 = (double *) writebuf;
1844  else if (datatype == MPI_LONG_DOUBLE) writebuf16 = (long double *) writebuf;
1845  else if (datatype == MPI_INT) writebuf4i = (int *) writebuf;
1846  else if (datatype == MPI_LONG_LONG_INT) writebuf8i = (long long int *) writebuf;
1847 
1848 
1849 #if(USEMPI)
1850 
1851  if (stage == STAGE1) {
1852  if (myid == 0) {
1853  if(docolsplit){
1854  numfiles=numcolumns;
1855  }
1856  else numfiles=1;
1857 
1858  if(bintxt==BINARYOUTPUT){
1859  // first let cpu=0 read data
1860  // GODMARK: Below may be too big for fread() to read
1861  if(numfiles==1) fread(jonio, sizeofdatatype,totalsize[1] * totalsize[2] * totalsize[3] * numcolumns, *fp);
1862  else{
1863  for(i=0;i<totalsize[1]*totalsize[2]*totalsize[3]*numcolumns;i++){
1864  if (datatype == MPI_UNSIGNED_CHAR) fread(&jonio1[i], sizeofdatatype,1, fp[i%numfiles]);
1865  else if (datatype == MPI_FLOAT) fread(&jonio4[i], sizeofdatatype,1, fp[i%numfiles]);
1866  else if (datatype == MPI_DOUBLE) fread(&jonio8[i], sizeofdatatype,1, fp[i%numfiles]);
1867  else if (datatype == MPI_LONG_DOUBLE) fread(&jonio16[i], sizeofdatatype,1, fp[i%numfiles]);
1868  else if (datatype == MPI_INT) fread(&jonio4i[i], sizeofdatatype,1, fp[i%numfiles]);
1869  else if (datatype == MPI_LONG_LONG_INT) fread(&jonio8i[i], sizeofdatatype,1, fp[i%numfiles]);
1870  }
1871  }
1872  }
1873  else if(bintxt==TEXTOUTPUT){ // properly ordered, so just dump it
1874  for(i=0;i<totalsize[1]*totalsize[2]*totalsize[3]*numcolumns;i++){
1875  if (datatype == MPI_UNSIGNED_CHAR){
1876  fscanf(fp[i%numfiles],"%hu",&short4char);
1877  jonio1[i]=short4char; // convert
1878  }
1879  else if (datatype == MPI_FLOAT) fscanf(fp[i%numfiles],"%f",&jonio4[i]);
1880  else if (datatype == MPI_DOUBLE) fscanf(fp[i%numfiles],"%lf",&jonio8[i]);
1881  else if (datatype == MPI_LONG_DOUBLE) fscanf(fp[i%numfiles],"%Lf",&jonio16[i]);
1882  else if (datatype == MPI_INT) fscanf(fp[i%numfiles],"%d",&jonio4i[i]);
1883  else if (datatype == MPI_LONG_LONG_INT) fscanf(fp[i%numfiles],"%lld",&jonio8i[i]);
1884  }
1885  }
1886  }
1887  // writebuf is CPU=0's tempbuf for each CPU, including CPU=0, which is done last
1888  if (myid == 0) {
1889  for (l = numprocs-1 ; l >=0; l--) {
1890 
1891  othercpupos[1]=l%ncpux1;
1892  othercpupos[2]=(int)((l%(ncpux1*ncpux2))/ncpux1);
1893  othercpupos[3]=(int)(l/(ncpux1*ncpux2));
1894 
1895  // now unfill jonio with proper sequence (i.e. tiled mapping)
1896  for (k = 0; k < N3; k++) for (j = 0; j < N2; j++) {
1897  for (i = 0; i < N1; i++) {
1898  for (col = 0; col < numcolumns; col++) {
1899 
1900  // mapvaluejonio is global single-dimensional index for position in total CPU space - in C order for "array[k][j][i]"
1901  mapvaluejonio =
1902  + (long long int)ncpux1 * (long long int)N1 * (long long int)ncpux2 * (long long int)N2 * (long long int)numcolumns * ((long long int)k + (long long int)othercpupos[3] * (long long int)N3)
1903  + (long long int)ncpux1 * (long long int)N1 * (long long int)numcolumns * ((long long int)j + (long long int)othercpupos[2] * (long long int)N2)
1904  + (long long int)numcolumns * ((long long int)i + (long long int)othercpupos[1] * (long long int)N1)
1905  + (long long int)col;
1906 
1907  // mapvaluetempbuf is a single-buffer single-dimensional index for the position in the buffer in C-order
1908  mapvaluetempbuf =
1909  + (long long int)k * (long long int)N1 * (long long int)N2 * (long long int)numcolumns
1910  + (long long int)j * (long long int)N1 * (long long int)numcolumns
1911  + (long long int)i * (long long int)numcolumns + (long long int)col;
1912 
1913  // debug check
1914  if((mapvaluetempbuf<0)||(mapvaluetempbuf>=(long long int)N1*N2*N3*numcolumns)){
1915  dualfprintf(fail_file,"mapvaluetempbuf out of range: %d\n",mapvaluetempbuf);
1916  myexit(96726);
1917  }
1918  if((mapvaluejonio<0)||(mapvaluejonio>=(long long int)totalsize[1]*(long long int)totalsize[2]*(long long int)totalsize[3]*(long long int)numcolumns)){
1919  dualfprintf(fail_file,"mapvaluejonio out of range: %d\n",mapvaluejonio);
1920  myexit(6726);
1921  }
1922  if (datatype == MPI_UNSIGNED_CHAR) writebuf1[mapvaluetempbuf] = jonio1[mapvaluejonio];
1923  if (datatype == MPI_FLOAT) writebuf4[mapvaluetempbuf] = jonio4[mapvaluejonio];
1924  if (datatype == MPI_DOUBLE) writebuf8[mapvaluetempbuf] = jonio8[mapvaluejonio];
1925  if (datatype == MPI_LONG_DOUBLE) writebuf16[mapvaluetempbuf] = jonio16[mapvaluejonio];
1926  if (datatype == MPI_INT) writebuf4i[mapvaluetempbuf] = jonio4i[mapvaluejonio];
1927  if (datatype == MPI_LONG_LONG_INT) writebuf8i[mapvaluetempbuf] = jonio8i[mapvaluejonio];
1928  }
1929  }
1930  }
1931  if(l!=0){
1932 
1933  MPI_Isend(writebuf, N1 * N2 * N3 * numcolumns, datatype, MPIid[l], l, MPI_COMM_GRMHD, &srequest);
1934  MPI_Wait(&srequest, &mpichstatus);
1935  }
1936  }
1937  free(jonio); // done with jonio after loop
1938  }
1939  else{
1940  // chosen CPU to receive data from CPU=0
1941 
1942  MPI_Irecv(writebuf, N1 * N2 * N3 * numcolumns, datatype, MPIid[0], myid, MPI_COMM_GRMHD, &rrequest);
1943  MPI_Wait(&rrequest, &mpichstatus); // writebuf used until
1944  }
1945  } else if (stage == STAGE2) {
1946  free(writebuf);
1947  if (myid == 0) {
1948  fclose(*fp);
1949  *fp = NULL;
1950  }
1951  }
1952 #endif
1953 
1954  logsfprintf("mpiios end seperate\n");
1955 
1956 
1957 
1958 }
1959 
1960 
1961 void mpiiotu_combine(MPI_Datatype datatype, int numcolumns, FILE ** fp, void *writebuf)
1962 {
1963  long long int i, j, k, l, col;
1964 #if(USEMPI)
1965  MPI_Request rrequest;
1966  MPI_Request srequest;
1967 #endif
1968  int othercpupos[COMPDIM + 1];
1969 
1970  int sizeofdatatype;
1971 
1972  unsigned char *writebuf1;
1973  float *writebuf4;
1974  double *writebuf8;
1975  long double *writebuf16;
1976  int *writebuf4i;
1977  long long int *writebuf8i;
1978 
1979  long long int bufferwritemap;
1980  int numfiles;
1981 
1982  sizeofdatatype=getsizeofdatatype(datatype);
1983 
1984  if (datatype == MPI_UNSIGNED_CHAR) writebuf1 = (unsigned char *) writebuf;
1985  else if (datatype == MPI_FLOAT) writebuf4 = (float *) writebuf;
1986  else if (datatype == MPI_DOUBLE) writebuf8 = (double *) writebuf;
1987  else if (datatype == MPI_LONG_DOUBLE) writebuf16 = (long double *) writebuf;
1988  else if (datatype == MPI_INT) writebuf4i = (int *) writebuf;
1989  else if (datatype == MPI_LONG_LONG_INT) writebuf8i = (long long int *) writebuf;
1990 
1991 
1992 #if(USEMPI)
1993  if(myid!=0) MPI_Isend(writebuf, N1 * N2 * N3 * numcolumns, datatype, MPIid[0], myid, MPI_COMM_GRMHD, &srequest);
1994  if (myid == 0) {
1995  if(docolsplit){
1996  numfiles=numcolumns;
1997  }
1998  else numfiles=1;
1999 
2000  // done in forward order, no need to use tempbuf since CPU=0's writebuf is first out
2001  for (l = 0; l <numprocs; l++) {
2002  if(l!=0){
2003  MPI_Irecv(writebuf, N1 * N2 * N3 * numcolumns, datatype, MPIid[l], l, MPI_COMM_GRMHD, &rrequest);
2004  MPI_Wait(&rrequest, &mpichstatus);
2005  }
2006  // now write writebuf
2007  DUMPGENLOOP{
2008  for(col=0;col<numcolumns;col++){
2009 
2010  bufferwritemap=(long long int)col + (long long int)numcolumns*((long long int)i+(long long int)N1*(long long int)j+(long long int)N1*(long long int)N2*(long long int)k);
2011 
2012  if(datatype== MPI_UNSIGNED_CHAR){
2013  fprintf(fp[(bufferwritemap)%numfiles],"%c ",writebuf1[bufferwritemap]);
2014  }
2015  else if(datatype==MPI_FLOAT){
2016  fprintf(fp[(bufferwritemap)%numfiles],"%15.7g ",writebuf4[bufferwritemap]);
2017  }
2018  else if(datatype==MPI_DOUBLE){
2019  fprintf(fp[(bufferwritemap)%numfiles],"%21.15g ",writebuf8[bufferwritemap]);
2020  }
2021  else if(datatype==MPI_LONG_DOUBLE){
2022  fprintf(fp[(bufferwritemap)%numfiles],"%31.25Lg ",writebuf16[bufferwritemap]);
2023  }
2024  else if(datatype==MPI_INT){
2025  fprintf(fp[(bufferwritemap)%numfiles],"%d ",writebuf4i[bufferwritemap]);
2026  }
2027  else if(datatype==MPI_LONG_LONG_INT){
2028  fprintf(fp[(bufferwritemap)%numfiles],"%lld ",writebuf8i[bufferwritemap]);
2029  }
2030  if(numfiles>1) fprintf(fp[(bufferwritemap)%numfiles],"\n");
2031  }
2032  if(numfiles==1) fprintf(*fp,"\n");
2033  }
2034  }
2035  }
2036  if(myid!=0) MPI_Wait(&srequest, &mpichstatus);
2037  free(writebuf); // writebuf used by each CPU
2038 
2039  if (myid == 0) {
2040  for(i=0;i<numfiles;i++){
2041  fclose(fp[i]);
2042  fp[i] = NULL;
2043  }
2044  }
2045 #endif
2046 
2047 }
2048 
2050 void mpiiotu_seperate(int stage, MPI_Datatype datatype, int numcolumns,
2051  FILE ** fp,void *writebuf)
2052 {
2053  long long int i, j, k, l, col;
2054 #if(USEMPI)
2055  MPI_Request rrequest;
2056  MPI_Request srequest;
2057 #endif
2058  int othercpupos[COMPDIM + 1];
2059 
2060  void *tempbuf;
2061  void *sendbuf;
2062 
2063  int numfiles;
2064 
2065  int sizeofdatatype;
2066 
2067  unsigned char *writebuf1;
2068  float *writebuf4;
2069  double *writebuf8;
2070  long double *writebuf16;
2071  int *writebuf4i;
2072  long long int *writebuf8i;
2073 
2074  unsigned char *tempbuf1;
2075  float *tempbuf4;
2076  double *tempbuf8;
2077  long double *tempbuf16;
2078  int *tempbuf4i;
2079  long long int *tempbuf8i;
2080 
2081  unsigned char *sendbuf1;
2082  float *sendbuf4;
2083  double *sendbuf8;
2084  long double *sendbuf16;
2085  int *sendbuf4i;
2086  long long int *sendbuf8i;
2087 
2088  unsigned short short4char;
2089 
2090  long long int bufferwritemap;
2091 
2092 
2093 
2094  sizeofdatatype=getsizeofdatatype(datatype);
2095 
2096  if (datatype == MPI_UNSIGNED_CHAR) writebuf1 = (unsigned char *) writebuf;
2097  else if (datatype == MPI_FLOAT) writebuf4 = (float *) writebuf;
2098  else if (datatype == MPI_DOUBLE) writebuf8 = (double *) writebuf;
2099  else if (datatype == MPI_LONG_DOUBLE) writebuf16 = (long double *) writebuf;
2100  else if (datatype == MPI_INT) writebuf4i = (int *) writebuf;
2101  else if (datatype == MPI_LONG_LONG_INT) writebuf8i = (long long int *) writebuf;
2102 
2103  if(myid==0){
2104  if((tempbuf=malloc(sizeofdatatype*N1*N2*N3*numcolumns))==NULL){
2105  dualfprintf(fail_file,"Can't open tempbuf in gammieio_sep\n");
2106  myexit(7566);
2107  }
2108 
2109  if (datatype == MPI_UNSIGNED_CHAR) tempbuf1 = (unsigned char *) tempbuf;
2110  else if (datatype == MPI_FLOAT) tempbuf4 = (float *) tempbuf;
2111  else if (datatype == MPI_DOUBLE) tempbuf8 = (double *) tempbuf;
2112  else if (datatype == MPI_LONG_DOUBLE) tempbuf16 = (long double *) tempbuf;
2113  else if (datatype == MPI_INT) tempbuf4i = (int *) tempbuf;
2114  else if (datatype == MPI_LONG_LONG_INT) tempbuf8i = (long long int *) tempbuf;
2115 
2116  }
2117 
2118 #if(USEMPI)
2119 
2120  if (stage == 1) {
2121  if (myid == 0) {
2122  if(docolsplit){
2123  numfiles=numcolumns;
2124  }
2125  else numfiles=1;
2126 
2127  for (l = 0; l < numprocs; l++) {
2128  if(l==0){
2129  sendbuf=writebuf;
2130  sendbuf1=writebuf1;
2131  sendbuf4=writebuf4;
2132  sendbuf8=writebuf8;
2133  sendbuf4i=writebuf4i;
2134  sendbuf8i=writebuf8i;
2135  }
2136  else{
2137  sendbuf=tempbuf;
2138  sendbuf1=tempbuf1;
2139  sendbuf4=tempbuf4;
2140  sendbuf8=tempbuf8;
2141  sendbuf4i=tempbuf4i;
2142  sendbuf8i=tempbuf8i;
2143  }
2144 
2145  DUMPGENLOOP for (col = 0; col < numcolumns; col++) {
2146 
2147  bufferwritemap=(long long int)col+(long long int)numcolumns*((long long int)i+(long long int)N1*(long long int)j+(long long int)N1*(long long int)N2*(long long int)k);
2148 
2149  if(datatype==MPI_UNSIGNED_CHAR){
2150  fscanf(fp[(bufferwritemap)%numfiles],"%hu",&short4char);
2151  sendbuf1[bufferwritemap]=short4char;
2152  }
2153  else if(datatype==MPI_FLOAT) fscanf(fp[(bufferwritemap)%numfiles],"%f",&sendbuf4[bufferwritemap]);
2154  else if(datatype==MPI_DOUBLE) fscanf(fp[(bufferwritemap)%numfiles],"%lf",&sendbuf8[bufferwritemap]);
2155  else if(datatype==MPI_LONG_DOUBLE) fscanf(fp[(bufferwritemap)%numfiles],"%Lf",&sendbuf16[bufferwritemap]);
2156  else if(datatype==MPI_INT) fscanf(fp[(bufferwritemap)%numfiles],"%d",&sendbuf4i[bufferwritemap]);
2157  else if(datatype==MPI_LONG_LONG_INT) fscanf(fp[(bufferwritemap)%numfiles],"%lld",&sendbuf8i[bufferwritemap]);
2158  }
2159  if(l!=0){
2160  MPI_Isend(sendbuf, N1 * N2 * N3 * numcolumns, datatype, MPIid[l], l, MPI_COMM_GRMHD, &srequest);
2161  // have to wait before filling sendbuf buffer again for next CPU
2162  MPI_Wait(&srequest, &mpichstatus);
2163  }
2164  }
2165  free(tempbuf);
2166  }
2167  else{
2168  MPI_Irecv(writebuf, N1 * N2 * N3 * numcolumns, datatype, MPIid[0], myid, MPI_COMM_GRMHD, &rrequest);
2169  MPI_Wait(&rrequest, &mpichstatus); // writebuf used until
2170  }
2171  } else if (stage == 2) {
2172  free(writebuf);
2173  if (myid == 0) {
2174  for(i=0;i<numfiles;i++){
2175  fclose(fp[i]);
2176  fp[i] = NULL;
2177  }
2178  }
2179  }
2180 #endif
2181 
2182 
2183 
2184 }
2185 
2186 
2187 
2188 
2189 #endif
2190 
2191 
2192 int getsizeofdatatype(MPI_Datatype datatype)
2193 {
2194  int sizeofdatatype;
2195 
2196  if (datatype == MPI_UNSIGNED_CHAR) sizeofdatatype=sizeof(unsigned char);
2197  else if (datatype == MPI_FLOAT) sizeofdatatype=sizeof(float);
2198  else if (datatype == MPI_DOUBLE) sizeofdatatype=sizeof(double);
2199  else if (datatype == MPI_LONG_DOUBLE) sizeofdatatype=sizeof(long double);
2200  else if (datatype == MPI_INT) sizeofdatatype=sizeof(int);
2201  else if (datatype == MPI_LONG) sizeofdatatype=sizeof(long int); // sometimes different than (int)
2202  else if (datatype == MPI_LONG_LONG_INT) sizeofdatatype=sizeof(long long int);
2203  else{
2204  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2205  myexit(8676);
2206  }
2207 
2208  return(sizeofdatatype);
2209 }
2210 
2211 
2214 void mpimax(SFTYPE*maxptr)
2215 {
2216  SFTYPE send;
2217 
2218 #if(USEMPI)
2219  send = *maxptr;
2220  MPI_Allreduce(&send, maxptr, 1, MPI_SFTYPE, MPI_MAX,MPI_COMM_GRMHD);
2221 #endif
2222 }
2223 
2224 
2227 void mpiimax(int*maxptr)
2228 {
2229  int send;
2230 
2231 #if(USEMPI)
2232  send = *maxptr;
2233  MPI_Allreduce(&send, maxptr, 1, MPI_INT, MPI_MAX,MPI_COMM_GRMHD);
2234 #endif
2235 }
2236 
2239 void mpiisum(int*sumptr)
2240 {
2241  int send;
2242 
2243 #if(USEMPI)
2244  send = *sumptr;
2245  MPI_Allreduce(&send, sumptr, 1, MPI_INT, MPI_SUM,MPI_COMM_GRMHD);
2246 #endif
2247 }
2248 
2251 void mpiFTYPEsum(FTYPE*sumptr)
2252 {
2253  FTYPE send;
2254 
2255 #if(USEMPI)
2256  send = *sumptr;
2257  MPI_Allreduce(&send, sumptr, 1, MPI_FTYPE, MPI_SUM,MPI_COMM_GRMHD);
2258 #endif
2259 }
2260 
2261 
2263 void mpiisum0(int*sumptr, int recvid)
2264 {
2265  int send;
2266 
2267 #if(USEMPI)
2268  send = *sumptr;
2269  MPI_Reduce(&send, sumptr, 1, MPI_INT, MPI_SUM,MPIid[recvid], MPI_COMM_GRMHD);
2270 #endif
2271 }
2272 
2274 void mpildsum0(long int*sumptr, int recvid)
2275 {
2276  long int send;
2277 
2278 #if(USEMPI)
2279  send = *sumptr;
2280  MPI_Reduce(&send, sumptr, 1, MPI_LONG, MPI_SUM,MPIid[recvid], MPI_COMM_GRMHD);
2281 #endif
2282 }
2283 
2286 void mpimin(SFTYPE*minptr)
2287 {
2288  SFTYPE send;
2289 
2290 #if(USEMPI)
2291  send = *minptr;
2292  MPI_Allreduce(&send, minptr, 1, MPI_SFTYPE, MPI_MIN,MPI_COMM_GRMHD);
2293 #endif
2294 }
2295 
2298 void mpifmin(FTYPE*minptr)
2299 {
2300  FTYPE send;
2301 
2302 #if(USEMPI)
2303  send = *minptr;
2304  MPI_Allreduce(&send, minptr, 1, MPI_FTYPE, MPI_MIN,MPI_COMM_GRMHD);
2305 #endif
2306 }
2307 
2308 
2310 void prminmaxsum(FTYPE (*p)[NSTORE2][NSTORE3][NPR], int start,int nmemb, FTYPE *maxptr, FTYPE*minptr,FTYPE*sumptr)
2311 {
2312  long long int i,j,k,pl;
2313  FTYPE maxsend,minsend,sumsend;
2314  int domin,domax,dosum;
2315 
2316  if(maxptr==NULL) domax=0; else domax=1;
2317  if(minptr==NULL) domin=0; else domin=1;
2318  if(sumptr==NULL) dosum=0; else dosum=1;
2319 
2320  for(pl=start;pl<start+nmemb;pl++){
2321 
2322  if(domin) minptr[pl]=1E30;
2323  if(domax) maxptr[pl]=-1E30;
2324  if(dosum) sumptr[pl]=0;
2325  }
2326  DUMPGENLOOP{
2327  for(pl=start;pl<start+nmemb;pl++){
2328  if(domax) if (MACP0A1(p,i,j,k,pl) > maxptr[pl]) maxptr[pl] = MACP0A1(p,i,j,k,pl);
2329  if(domin) if (MACP0A1(p,i,j,k,pl) < minptr[pl]) minptr[pl] = MACP0A1(p,i,j,k,pl);
2330  if(dosum) sumptr[pl]+=MACP0A1(p,i,j,k,pl);
2331  }
2332  }
2333 #if(USEMPI)
2334  for(pl=start;pl<start+nmemb;pl++){
2335  if(domax){
2336  maxsend = maxptr[pl];
2337  MPI_Allreduce(&maxsend, &maxptr[pl], 1, MPI_FTYPE, MPI_MAX, MPI_COMM_GRMHD);
2338  }
2339  if(domin){
2340  minsend = minptr[pl];
2341  MPI_Allreduce(&minsend, &minptr[pl], 1, MPI_FTYPE, MPI_MIN, MPI_COMM_GRMHD);
2342  }
2343  if(dosum){
2344  sumsend = sumptr[pl];
2345  MPI_Allreduce(&sumsend, &sumptr[pl], 1, MPI_FTYPE, MPI_SUM, MPI_COMM_GRMHD);
2346  }
2347  }
2348 #endif
2349 }
2350 
2351 
2352 
2353 
2354 
2356 void myfwrite(int bintxt, MPI_Datatype datatype, void *ptr, int start, int nmemb, int i, int j, int k, FILE**stream,void*writebuf)
2357 {
2358  long long int pl;
2359  void *voidbuf;
2360  int sizeofdatatype;
2361 
2362  static long double *writebuf16;
2363  static double *writebuf8;
2364  static float *writebuf4;
2365  static unsigned char *writebuf1;
2366  static int *writebuf4i;
2367  static long long int *writebuf8i;
2368 
2369  static long double *ptr16;
2370  static double *ptr8;
2371  static float *ptr4;
2372  static unsigned char *ptr1;
2373  static int *ptr4i;
2374  static long long int *ptr8i;
2375 
2376  long long int streamnum;
2377 
2378 
2379 
2380  sizeofdatatype=getsizeofdatatype(datatype);
2381 
2382  if (datatype == MPI_UNSIGNED_CHAR) writebuf1 = (unsigned char *) writebuf;
2383  else if (datatype == MPI_FLOAT) writebuf4 = (float *) writebuf;
2384  else if (datatype == MPI_DOUBLE) writebuf8 = (double *) writebuf;
2385  else if (datatype == MPI_LONG_DOUBLE) writebuf16 = (long double *) writebuf;
2386  else if (datatype == MPI_INT) writebuf4i = (int *) writebuf;
2387  else if (datatype == MPI_LONG_LONG_INT) writebuf8i = (long long int *) writebuf;
2388  else{
2389  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2390  myexit(1);
2391  }
2392 
2393  if (datatype == MPI_UNSIGNED_CHAR) ptr1 = (unsigned char *) ptr;
2394  else if (datatype == MPI_FLOAT) ptr4 = (float *) ptr;
2395  else if (datatype == MPI_DOUBLE) ptr8 = (double *) ptr;
2396  else if (datatype == MPI_LONG_DOUBLE) ptr16 = (long double *) ptr;
2397  else if (datatype == MPI_INT) ptr4i = (int *) ptr;
2398  else if (datatype == MPI_LONG_LONG_INT) ptr8i = (long long int *) ptr;
2399  else{
2400  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2401  myexit(1);
2402  }
2403 
2404  if(mpicombine==0){
2405  if(bintxt==BINARYOUTPUT){ // binaryoutput
2406  if(!docolsplit){
2407  fwrite((char*)ptr+(sizeofdatatype*start), sizeofdatatype, nmemb, stream[0]);
2408  nextbuf+=nmemb;
2409  }
2410  else{
2411  for(pl=start;pl<start+nmemb;pl++){
2412  streamnum=nextbuf;
2413  fwrite((char*)ptr+(sizeofdatatype*pl), sizeofdatatype, 1, stream[streamnum]);
2414  nextbuf++;
2415  }
2416  }
2417  }
2418  else{ // text output
2419  for(pl=start;pl<start+nmemb;pl++){
2420  if(docolsplit) streamnum=nextbuf;
2421  else streamnum=0;
2422  if (datatype == MPI_UNSIGNED_CHAR) fprintf(stream[streamnum],"%c ",ptr1[pl]);
2423  else if (datatype == MPI_FLOAT) fprintf(stream[streamnum],"%15.7g ",ptr4[pl]);
2424  else if (datatype == MPI_DOUBLE) fprintf(stream[streamnum],"%21.15g ",ptr8[pl]);
2425  else if (datatype == MPI_LONG_DOUBLE) fprintf(stream[streamnum],"%31.25Lg ",ptr16[pl]);
2426  else if (datatype == MPI_INT) fprintf(stream[streamnum],"%d ",ptr4i[pl]);
2427  else if (datatype == MPI_LONG_LONG_INT) fprintf(stream[streamnum],"%lld ",ptr8i[pl]);
2428  nextbuf++;
2429  }
2430  }
2431  }
2432  else{ // mpicombine==1
2433  if(docolsplit&&USEROMIO){ // column splitting with ROMIO
2434  for(pl=start;pl<start+nmemb;pl++){
2435  if(nextbuf==romiocoliter){ // only write if doing that column
2436  // BUFFERMAP2 only depends on i,j, not column number
2437  if (datatype == MPI_UNSIGNED_CHAR) writebuf1[BUFFERMAP2] = ptr1[pl];
2438  if (datatype == MPI_FLOAT) writebuf4[BUFFERMAP2] = ptr4[pl];
2439  if (datatype == MPI_DOUBLE) writebuf8[BUFFERMAP2] = ptr8[pl];
2440  if (datatype == MPI_LONG_DOUBLE) writebuf16[BUFFERMAP2] = ptr16[pl];
2441  if (datatype == MPI_INT) writebuf4i[BUFFERMAP2] = ptr4i[pl];
2442  if (datatype == MPI_LONG_LONG_INT) writebuf8i[BUFFERMAP2] = ptr8i[pl];
2443  }
2444  nextbuf++;
2445  }
2446  }
2447  else{ // no ROMIO column splitting, just normal MPI buffer writing
2448  for(pl=start;pl<start+nmemb;pl++){
2449  if (datatype == MPI_UNSIGNED_CHAR) writebuf1[BUFFERMAP] = ptr1[pl];
2450  if (datatype == MPI_FLOAT) writebuf4[BUFFERMAP] = ptr4[pl];
2451  if (datatype == MPI_DOUBLE) writebuf8[BUFFERMAP] = ptr8[pl];
2452  if (datatype == MPI_LONG_DOUBLE) writebuf16[BUFFERMAP] = ptr16[pl];
2453  if (datatype == MPI_INT) writebuf4i[BUFFERMAP] = ptr4i[pl];
2454  if (datatype == MPI_LONG_LONG_INT) writebuf8i[BUFFERMAP] = ptr8i[pl];
2455  }
2456  }
2457  }
2458 }
2459 
2461 void myfread(int bintxt, MPI_Datatype datatype, void *ptr, int start, int nmemb, int i, int j, int k, FILE**stream,void*writebuf)
2462 {
2463  long long int pl;
2464  void *voidbuf;
2465  int sizeofdatatype;
2466 
2467  static long double *writebuf16;
2468  static double *writebuf8;
2469  static float *writebuf4;
2470  static unsigned char *writebuf1;
2471  static int *writebuf4i;
2472  static long long int *writebuf8i;
2473 
2474  static long double *ptr16;
2475  static double *ptr8;
2476  static float *ptr4;
2477  static unsigned char *ptr1;
2478  static int *ptr4i;
2479  static long long int *ptr8i;
2480 
2481  unsigned short short4char;
2482 
2483  int streamnum;
2484 
2485 
2486 
2487  sizeofdatatype=getsizeofdatatype(datatype);
2488 
2489  if (datatype == MPI_UNSIGNED_CHAR) writebuf1 = (unsigned char *) writebuf;
2490  else if (datatype == MPI_FLOAT) writebuf4 = (float *) writebuf;
2491  else if (datatype == MPI_DOUBLE) writebuf8 = (double *) writebuf;
2492  else if (datatype == MPI_LONG_DOUBLE) writebuf16 = (long double *) writebuf;
2493  else if (datatype == MPI_INT) writebuf4i = (int *) writebuf;
2494  else if (datatype == MPI_LONG_LONG_INT) writebuf8i = (long long int *) writebuf;
2495  else{
2496  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2497  myexit(1);
2498  }
2499 
2500  if (datatype == MPI_UNSIGNED_CHAR) ptr1 = (unsigned char *) ptr;
2501  else if (datatype == MPI_FLOAT) ptr4 = (float *) ptr;
2502  else if (datatype == MPI_DOUBLE) ptr8 = (double *) ptr;
2503  else if (datatype == MPI_LONG_DOUBLE) ptr16 = (long double *) ptr;
2504  else if (datatype == MPI_INT) ptr4i = (int *) ptr;
2505  else if (datatype == MPI_LONG_LONG_INT) ptr8i = (long long int *) ptr;
2506  else{
2507  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2508  myexit(1);
2509  }
2510 
2511  if(mpicombine==0){
2512  if(bintxt==BINARYOUTPUT){
2513  if(!docolsplit){
2514  fread((char*)ptr+(sizeofdatatype*start), sizeofdatatype, nmemb, stream[0]);
2515  nextbuf+=nmemb;
2516  }
2517  else{
2518  for(pl=start;pl<start+nmemb;pl++){
2519  streamnum=nextbuf;
2520  fread((char*)ptr+(sizeofdatatype*pl), sizeofdatatype, 1, stream[streamnum]);
2521  nextbuf++;
2522  }
2523  }
2524  }
2525  else{
2526  for(pl=start;pl<start+nmemb;pl++){
2527  if(docolsplit) streamnum=nextbuf;
2528  else streamnum=0;
2529  if (datatype == MPI_UNSIGNED_CHAR){
2530  fscanf(stream[streamnum],"%hu",&short4char);
2531  ptr1[pl]=short4char;
2532  }
2533  else if (datatype == MPI_FLOAT) fscanf(stream[streamnum],"%f",&ptr4[pl]);
2534  else if (datatype == MPI_DOUBLE) fscanf(stream[streamnum],"%lf",&ptr8[pl]);
2535  else if (datatype == MPI_LONG_DOUBLE) fscanf(stream[streamnum],"%Lf",&ptr16[pl]);
2536  else if (datatype == MPI_INT) fscanf(stream[streamnum],"%d",&ptr4i[pl]);
2537  else if (datatype == MPI_LONG_LONG_INT) fscanf(stream[streamnum],"%lld",&ptr8i[pl]);
2538  nextbuf++;
2539  }
2540  }
2541  }
2542  else{
2543  if(docolsplit&&USEROMIO){
2544  for(pl=start;pl<start+nmemb;pl++){
2545  if(nextbuf==romiocoliter){
2546  if (datatype == MPI_UNSIGNED_CHAR) ptr1[pl]=writebuf1[BUFFERMAP2];
2547  if (datatype == MPI_FLOAT) ptr4[pl]=writebuf4[BUFFERMAP2];
2548  if (datatype == MPI_DOUBLE) ptr8[pl]=writebuf8[BUFFERMAP2];
2549  if (datatype == MPI_LONG_DOUBLE) ptr16[pl]=writebuf16[BUFFERMAP2];
2550  if (datatype == MPI_INT) ptr4i[pl]=writebuf4i[BUFFERMAP2];
2551  if (datatype == MPI_LONG_LONG_INT) ptr8i[pl]=writebuf8i[BUFFERMAP2];
2552  }
2553  nextbuf++;
2554  }
2555  }
2556  else for(pl=start;pl<start+nmemb;pl++){
2557  if (datatype == MPI_UNSIGNED_CHAR) ptr1[pl]=writebuf1[BUFFERMAP];
2558  if (datatype == MPI_FLOAT) ptr4[pl]=writebuf4[BUFFERMAP];
2559  if (datatype == MPI_DOUBLE) ptr8[pl]=writebuf8[BUFFERMAP];
2560  if (datatype == MPI_LONG_DOUBLE) ptr16[pl]=writebuf16[BUFFERMAP];
2561  if (datatype == MPI_INT) ptr4i[pl]=writebuf4i[BUFFERMAP];
2562  if (datatype == MPI_LONG_LONG_INT) ptr8i[pl]=writebuf8i[BUFFERMAP];
2563  }
2564  }
2565 }
2566 
2567 
2569 void myset(MPI_Datatype datatype, void *ptr, int start, int nmemb, void*writebuf)
2570 {
2571  long long int pl;
2572 
2573  static long double *writebuf16;
2574  static double *writebuf8;
2575  static float *writebuf4;
2576  static unsigned char *writebuf1;
2577  static int *writebuf4i;
2578  static long long int *writebuf8i;
2579 
2580  static long double *ptr16;
2581  static double *ptr8;
2582  static float *ptr4;
2583  static unsigned char *ptr1;
2584  static int *ptr4i;
2585  static long long int *ptr8i;
2586 
2587  if (datatype == MPI_UNSIGNED_CHAR) writebuf1 = (unsigned char *) writebuf;
2588  else if (datatype == MPI_FLOAT) writebuf4 = (float *) writebuf;
2589  else if (datatype == MPI_DOUBLE) writebuf8 = (double *) writebuf;
2590  else if (datatype == MPI_LONG_DOUBLE) writebuf16 = (long double *) writebuf;
2591  else if (datatype == MPI_INT) writebuf4i = (int *) writebuf;
2592  else if (datatype == MPI_LONG_LONG_INT) writebuf8i = (long long int *) writebuf;
2593  else{
2594  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2595  myexit(1);
2596  }
2597 
2598  if (datatype == MPI_UNSIGNED_CHAR) ptr1 = (unsigned char *) ptr;
2599  else if (datatype == MPI_FLOAT) ptr4 = (float *) ptr;
2600  else if (datatype == MPI_DOUBLE) ptr8 = (double *) ptr;
2601  else if (datatype == MPI_LONG_DOUBLE) ptr16 = (long double *) ptr;
2602  else if (datatype == MPI_INT) ptr4i = (int *) ptr;
2603  else if (datatype == MPI_LONG_LONG_INT) ptr8i = (long long int *) ptr;
2604  else{
2605  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2606  myexit(1);
2607  }
2608 
2609  for(pl=start;pl<start+nmemb;pl++){
2610  // nextcol is the iterator here, a global variable currently
2611  if (datatype == MPI_UNSIGNED_CHAR) writebuf1[nextcol++] = ptr1[pl];
2612  else if (datatype == MPI_FLOAT) writebuf4[nextcol++] = ptr4[pl];
2613  else if (datatype == MPI_DOUBLE) writebuf8[nextcol++] = ptr8[pl];
2614  else if (datatype == MPI_LONG_DOUBLE) writebuf16[nextcol++] = ptr16[pl];
2615  else if (datatype == MPI_INT) writebuf4i[nextcol++] = ptr4i[pl];
2616  else if (datatype == MPI_LONG_LONG_INT) writebuf8i[nextcol++] = ptr8i[pl];
2617  }
2618 }
2619 
2620 
2621 
2623 void myget(MPI_Datatype datatype, void *ptr, int start, int nmemb, void*writebuf)
2624 {
2625  long long int pl;
2626 
2627  static long double *writebuf16;
2628  static double *writebuf8;
2629  static float *writebuf4;
2630  static unsigned char *writebuf1;
2631  static int *writebuf4i;
2632  static long long int *writebuf8i;
2633 
2634  static long double *ptr16;
2635  static double *ptr8;
2636  static float *ptr4;
2637  static unsigned char *ptr1;
2638  static int *ptr4i;
2639  static long long int *ptr8i;
2640 
2641  if (datatype == MPI_UNSIGNED_CHAR) writebuf1 = (unsigned char *) writebuf;
2642  else if (datatype == MPI_FLOAT) writebuf4 = (float *) writebuf;
2643  else if (datatype == MPI_DOUBLE) writebuf8 = (double *) writebuf;
2644  else if (datatype == MPI_LONG_DOUBLE) writebuf16 = (long double *) writebuf;
2645  else if (datatype == MPI_INT) writebuf4i = (int *) writebuf;
2646  else if (datatype == MPI_LONG_LONG_INT) writebuf8i = (long long int *) writebuf;
2647  else{
2648  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2649  myexit(1);
2650  }
2651 
2652  if (datatype == MPI_UNSIGNED_CHAR) ptr1 = (unsigned char *) ptr;
2653  else if (datatype == MPI_FLOAT) ptr4 = (float *) ptr;
2654  else if (datatype == MPI_DOUBLE) ptr8 = (double *) ptr;
2655  else if (datatype == MPI_LONG_DOUBLE) ptr16 = (long double *) ptr;
2656  else if (datatype == MPI_INT) ptr4i = (int *) ptr;
2657  else if (datatype == MPI_LONG_LONG_INT) ptr8i = (long long int *) ptr;
2658  else{
2659  dualfprintf(fail_file,"No such datatype=%d\n",datatype);
2660  myexit(1);
2661  }
2662 
2663  for(pl=start;pl<start+nmemb;pl++){
2664  // nextcol is the iterator here, a global variable currently
2665  if (datatype == MPI_UNSIGNED_CHAR) ptr1[pl] = writebuf1[nextcol++];
2666  else if (datatype == MPI_FLOAT) ptr4[pl] = writebuf4[nextcol++];
2667  else if (datatype == MPI_DOUBLE) ptr8[pl] = writebuf8[nextcol++];
2668  else if (datatype == MPI_LONG_DOUBLE) ptr16[pl] = writebuf16[nextcol++];
2669  else if (datatype == MPI_INT) ptr4i[pl] = writebuf4i[nextcol++];
2670  else if (datatype == MPI_LONG_LONG_INT) ptr8i[pl] = writebuf8i[nextcol++];
2671  }
2672 }
2673 
2674 
2675 
2676 
2677 int init_linklists(void)
2678 {
2679  struct blink * blinkptr;
2680  struct blink * cpulinkptr;
2681  int i;
2682  long long int numlists;
2683  long long int numcells;
2684  int maxnumcolumns;
2685 
2686 
2687 
2688 
2690  //
2691  // Below shouldn't be modified for user purposes
2692  //
2693  // setup number of buffers
2694  //
2696 
2697  maxnumcolumns=0;
2698  for(i=0;i<NUMDUMPTYPES;i++){
2699  if(maxnumcolumns<dnumcolumns[i]) maxnumcolumns=dnumcolumns[i];
2700  }
2701  // buffer must at least hold maxcolumns of data, and since buffer is only N1*N2*N3 big, make sure that at least NUMBUFFERS*N1*N2*N3>maxnumcolumns
2702  set_numbuffers(maxnumcolumns,&NUMBUFFERS);
2703 
2704 
2705  for(i=0;i<NUMDUMPTYPES;i++) if(dnumcolumns[i]>0) setuplinklist(dnumcolumns[i],i);
2706 
2707 
2708  trifprintf("end setuplinklists: %d\n",NUMDUMPTYPES);
2709 
2710 
2711 
2713  //
2714  // setup link lists for setuplinklist()
2715  //
2717 
2718  trifprintf("start per cpu lists\n");
2719  // check link lists
2720  for(i=0;i<NUMDUMPTYPES;i++){
2721  if(dnumcolumns[i]>0){
2722  logfprintf("i=%d\n",i);
2723  blinkptr=blinkptr0[i];
2724  numlists=0;
2725  numcells=0;
2726  while(blinkptr!=NULL){
2727  numcells+=blinkptr->num;
2728  // logfprintf("i=%d num=%d, numtotal=%d\n",i,blinkptr->num,numcells);
2729  numlists++;
2730  blinkptr=blinkptr->np; // next one
2731  }
2732  logfprintf("i=%d numlists=%lld numcells=%lld\n",i,numlists,numcells);
2733  numlists=0;
2734  }
2735  }
2736 
2737  // check cpu=0 link list
2738  if(myid==0){
2739  trifprintf("start cpu==0 lists\n");
2740  for(i=0;i<NUMDUMPTYPES;i++){
2741  if(dnumcolumns[i]>0){
2742  logfprintf("i=%d\n",i);
2743  cpulinkptr=cpulinkptr0[i];
2744  numlists=0;
2745  numcells=0;
2746  while(cpulinkptr!=NULL){
2747  numcells+=cpulinkptr->num;
2748  // logfprintf("i=%d num=%d, cpu=%d, li=%d, lj=%d, lk=%d, col=%d, numtotal=%lld\n",i,cpulinkptr->num,cpulinkptr->cpu,cpulinkptr->i,cpulinkptr->j,cpulinkptr->k,cpulinkptr->col,numcells);
2749  numlists++;
2750  cpulinkptr=cpulinkptr->np; // next one
2751  }
2752  logfprintf("i=%d numlists=%lld numcells=%lld\n",i,numlists,numcells);
2753  numlists=0;
2754  }
2755  }
2756  }
2757 
2758 
2759 
2760  return(0);
2761 
2762 
2763 
2764 
2765 
2766 
2767 }
2768 
2769 
2774 int setuplinklist(int numcolumns,int which)
2775 {
2776  long long int gcount,lcount,numlinks;
2777  long long int i,j,k,col,li,lj,lk,pi,pj,pk,pid,firstlink;
2778  struct blink * clinkptr0, *clinkptr;
2779  struct blink * linkptr0for0, *linkptrfor0;
2780  long long int *lcountfor0;
2781  long long int firstlinkfor0;
2782  long long int *firstlijk,*li0,*lj0,*lk0,*lcol0;
2783  long long int ri,rj,rk,rcol;
2784  long long int *cpulist0;
2785  long long int numcpusinlist0,lcpu,itercpu,buffersize;
2786  long long int maxnumsize;
2787 
2788 
2789  set_maxnumsize(numcolumns,&maxnumsize);
2790 
2791 
2792 
2793 
2794  if(myid==0){
2795  // cpulist0's size is maximum possible number of cpus in a list due to buffer size
2796  // buffersize=(int)(ceil(ceil((FTYPE)(N1*N2*N3*NUMBUFFERS)/(FTYPE)numcolumns)*(FTYPE)(numcolumns)/(FTYPE)N1));
2797  buffersize=numprocs;
2798  stderrfprintf("max cpus in a list=%lld\n",buffersize); fflush(stderr);
2799  if((cpulist0=(long long int*)malloc(sizeof(long long int)*buffersize))==NULL){
2800  dualfprintf(fail_file,"can't allocate cpulist0\n");
2801  myexit(10012);
2802  }
2803  if((lcountfor0=(long long int*)malloc(sizeof(long long int)*numprocs))==NULL){
2804  dualfprintf(fail_file,"can't allocate lcountfor0\n");
2805  myexit(10013);
2806  }
2807  if((firstlijk=(long long int*)malloc(sizeof(long long int)*numprocs))==NULL){
2808  dualfprintf(fail_file,"can't allocate firstlijk\n");
2809  myexit(10014);
2810  }
2811  if((li0=(long long int*)malloc(sizeof(long long int)*numprocs))==NULL){
2812  dualfprintf(fail_file,"can't allocate li0\n");
2813  myexit(10015);
2814  }
2815  if((lj0=(long long int*)malloc(sizeof(long long int)*numprocs))==NULL){
2816  dualfprintf(fail_file,"can't allocate lj0\n");
2817  myexit(10016);
2818  }
2819  if((lk0=(long long int*)malloc(sizeof(long long int)*numprocs))==NULL){
2820  dualfprintf(fail_file,"can't allocate lk0\n");
2821  myexit(10017);
2822  }
2823  if((lcol0=(long long int*)malloc(sizeof(long long int)*numprocs))==NULL){
2824  dualfprintf(fail_file,"can't allocate lcol0\n");
2825  myexit(10018);
2826  }
2827  for(i=0;i<buffersize;i++){
2828  cpulist0[i]=0;
2829  }
2830  for(i=0;i<numprocs;i++){
2831  lcountfor0[i]=firstlijk[i]=li0[i]=lj0[i]=lk0[i]=lcol0[i]=0;
2832  }
2833  }
2834 
2835 
2836 
2837  numcpusinlist0=0;
2838 
2839  clinkptr0=NULL;
2840  gcount=0;
2841  lcount=0;
2842  numlinks=0;
2843  firstlink=1;
2844  if(myid==0){
2845  for(itercpu=0;itercpu<numprocs;itercpu++){ firstlijk[itercpu]=1; }
2846  linkptr0for0=NULL;
2847  firstlinkfor0=1;
2848  }
2849 
2850 
2851 
2852 
2854  // general loop
2855  for(k=0;k<ncpux3*N3;k++) for(j=0;j<ncpux2*N2;j++) for(i=0;i<ncpux1*N1;i++) for(col=0;col<numcolumns;col++){
2856  // relative local index
2857  li=i%N1;
2858  lj=j%N2;
2859  lk=k%N3;
2860  // cpu position number
2861  pi=(i/(long long int)N1);
2862  pj=(j/(long long int)N2);
2863  pk=(k/(long long int)N3);
2864  // cpu id for this data
2865  pid=pk*ncpux2*ncpux1+pj*ncpux1+pi;
2866  if(myid==pid) lcount++;
2867  if(myid==0){
2868  lcountfor0[pid]++;
2869  // below is if we have need this cpu's data (pid) and need to mark starting point on full grid
2870  if(firstlijk[pid]){
2871  cpulist0[numcpusinlist0++]=pid;
2872  li0[pid]=i;
2873  lj0[pid]=j;
2874  lk0[pid]=k;
2875  lcol0[pid]=col;
2876  if(col!=0){
2877  dualfprintf(fail_file,"col!=0 col=%lld, so chunking bad: numcolumns=%d which=%d\n",col,numcolumns,which);
2878  myexit(10019);
2879  }
2880  firstlijk[pid]=0;
2881  }
2882  }
2883  gcount++;
2884  // if(myid==0){
2885  // dualfprintf(fail_file,"%lld %lld %lld %lld\n",numcpusinlist0,gcount,pid,cpulist0[numcpusinlist0]); fflush(fail_file);
2886  // }
2887  // logfprintf("%lld %lld %lld %lld %lld %lld %lld %lld\n",li,lj,lk,pi,pj,pk,pid,lcount,gcount);
2888  // 1st below if is to catch every buffer amount, while 2nd if part is needed to account for when the number of buffers is such that the last buffer isn't completely needed
2889  // this should work for any numcolumns or NUMBUFFERS, even at very last zone no matter what
2890  // chunk in minimum size of numcolumns
2891  if((gcount%(gcountmod(numcolumns))==0)||(gcount==(long long int)totalzones*(long long int)numcolumns)){
2892  // ok, so numcolumns can't exceed the buffer size, highly unlikely to happen, and checked for!
2893  if(myid==0){
2894  // must do in order determined to have data, not numerical order
2895  for(itercpu=0;itercpu<numcpusinlist0;itercpu++){
2896  lcpu=cpulist0[itercpu];
2897  if(lcountfor0[lcpu]>0){
2898  if(itercpu==0){ // first cpu in list
2899  ri=li0[lcpu];
2900  rj=lj0[lcpu];
2901  rk=lk0[lcpu];
2902  rcol=lcol0[lcpu];
2903  }
2904  if(firstlinkfor0){
2905  linkptrfor0=linkptr0for0=addlink(NULL);
2906  firstlinkfor0=0;
2907  }
2908  else{
2909  linkptrfor0=addlink(linkptrfor0);
2910  }
2911  linkptrfor0->cpu=lcpu;
2912  linkptrfor0->num=lcountfor0[lcpu];
2913  linkptrfor0->i=li0[lcpu];
2914  linkptrfor0->j=lj0[lcpu];
2915  linkptrfor0->k=lk0[lcpu];
2916  linkptrfor0->col=lcol0[lcpu];
2917  linkptrfor0->ri=ri;
2918  linkptrfor0->rj=rj;
2919  linkptrfor0->rk=rk;
2920  linkptrfor0->rcol=rcol;
2921  linkptrfor0->end=0;
2922 
2923  lcountfor0[lcpu]=0; // reset counter for this id
2924  firstlijk[lcpu]=1; // reset starting value
2925  }
2926  else{
2927  dualfprintf(fail_file,"wtf: shoudn't be here. Maybe passed more CPUs to batch system (mpirun) than passed to code?\n");
2928  myexit(10020);
2929  }
2930  }
2931  // the last link is here identified as the last in the series of cpus to communicate with. There's at least one new link here!
2932  linkptrfor0->end=1;
2933  numcpusinlist0=0; // reset list of cpus for this list
2934  }
2935  if(lcount>0){
2936  logfprintf("numcolumns=%d lcount=%lld\n",numcolumns,lcount);
2937  // initialize another structure
2938  // set previous structure value to this structure, set this next one to NULL
2939  if(firstlink){
2940  clinkptr=clinkptr0=addlink(NULL);
2941  clinkptr->num=lcount;
2942  firstlink=0;
2943  }
2944  else{
2945  clinkptr=addlink(clinkptr);
2946  clinkptr->num=lcount;
2947  }
2948  lcount=0;
2949  }
2950  }// otherwise continue
2951  } // now we have a link list for each cpu that determines how long each next buffer is that needs to be sent to cpu=0
2952  blinkptr0[which]=clinkptr0;
2953  cpulinkptr0[which]=linkptr0for0;
2954 
2955  return(0);
2956 }
2957 
2959 struct blink * addlink(struct blink * clinkptr)
2960 {
2961  struct blink *pb;
2962 
2963  pb=(struct blink *)malloc(sizeof(struct blink));
2964  pb->np=NULL; // terminate list
2965  // set last link's pointer to this new structure
2966  if(clinkptr!=NULL) clinkptr->np=pb;
2967 
2968  return(pb);
2969 }
2970