HARM
harm and utilities
 All Data Structures Files Functions Variables Typedefs Macros Pages
Macros
global.openmploops.h File Reference

All macros and definitions related to OpenMP loop wrappers. More...

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Macros

#define OPENMPNUMCHUNKS   100
 below should be larger than # of threads or cores used! SUPERGODMARK: OPENMPOPTMARK: More...
 
#define MINCHUNKSIZE   10
 
#define OPENMPSCHEDULE(arg)   static
 OPENMPNOTE: If really blocksize<OPENMPMINCHUNKNUMBER, then other threads just stall as sufficient Note that by using not too many chunks, each thread is well-spaced in memory, so avoids false sharing problem. More...
 
#define OPENMPCHUNKSIZE(blocksize)   (MAX(blocksize/numopenmpthreads,MINCHUNKSIZE))
 
#define OPENMPNOVARYSCHEDULE(arg)   static
 below allows compiler and/or run-time system to decide, which may be best. More...
 
#define OPENMPFULLNOVARYSCHEDULE(arg)   static
 below used when don't want to provide CHUNK argument More...
 
#define OPENMPVARYENDTIMESCHEDULE(arg)   guided
 below is for loops with very different times for each iteration (e.g. inversion loop) More...
 
#define OPENMP3DLOOPBLOCK   for(blockijk=1;blockijk<=blocksize;blockijk++)
 Oddly, performance changes by 10% when changing how start and stop blockijk (and fixing how used too of course) More...
 
#define OPENMP3DLOOPVARSDEFINE   int nxsize, nxshift, nysize, nyshift, nzsize, nzshift, blocksize, blockijk
 
#define OPENMP3DLOOPSETUPSUPERGENCOMP(i, j, k, istart, istop, jstart, jstop, kstart, kstop, di, dj, dk)
 COMP version of SUPERGENLOOP for OpenMP : Not used anywhere currently. More...
 
#define OPENMP3DLOOPSETUPSUPERGEN(istart, istop, jstart, jstop, kstart, kstop, di, dj, dk)
 non-comp loop (or used in interlpline.c where start and stops already have grid section shifts More...
 
#define OPENMP3DLOOPSETUP(istart, istop, jstart, jstop, kstart, kstop)
 forced to be a computational loop, so start/stop cannot already have shifts in them (which is normal for everywhere except interpline.c) More...
 
#define OPENMP3DLOOPSETUPFULL   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND,-N2BND,N2-1+N2BND,-N3BND,N3-1+N3BND)
 COMPFULLLOOP equivalent for OpenMP. More...
 
#define OPENMP3DLOOPSETUPFULLINOUT2   OPENMP3DLOOPSETUP(-N1BND+2*SHIFT1,N1-1+N1BND-2*SHIFT1,-N2BND+2*SHIFT2,N2-1+N2BND-2*SHIFT2,-N3BND+2*SHIFT3,N3-1+N3BND-2*SHIFT3)
 
#define OPENMP3DLOOPSETUPFULLINOUT2DIR1   OPENMP3DLOOPSETUP(-N1BND+2*SHIFT1,N1-1+N1BND-2*SHIFT1,-N2BND,N2-1+N2BND,-N3BND,N3-1+N3BND)
 
#define OPENMP3DLOOPSETUPFULLINOUT2DIR2   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND,-N2BND+2*SHIFT2,N2-1+N2BND-2*SHIFT2,-N3BND,N3-1+N3BND)
 
#define OPENMP3DLOOPSETUPFULLINOUT2DIR3   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND,-N2BND,N2-1+N2BND,-N3BND+2*SHIFT3,N3-1+N3BND-2*SHIFT3)
 
#define OPENMP3DLOOPSETUPFULLP1   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND+SHIFT1,-N2BND,N2-1+N2BND+SHIFT2,-N3BND,N3-1+N3BND+SHIFT3)
 COMPFULLLOOPP1 equivalent for OpenMP. More...
 
#define OPENMP3DLOOPSETUPFULLP2   OPENMP3DLOOPSETUP(-N1BND-SHIFT1,N1-1+N1BND+SHIFT1*2,-N2BND-SHIFT2,N2-1+N2BND+SHIFT2*2,-N3BND-SHIFT3,N3-1+N3BND+SHIFT3*2)
 
#define OPENMP3DLOOPSETUPFULLP1EXCEPTX2   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND+SHIFT1,-N2BND,N2-1+N2BND,-N3BND,N3-1+N3BND+SHIFT3)
 
#define OPENMP3DLOOPSETUPZLOOP   OPENMP3DLOOPSETUP(0,N1-1,0,N2-1,0,N3-1)
 COMPZLOOP equivalent for OpenMP. More...
 
#define OPENMPBCLOOPSETUP(istart, istop, jstart, jstop, kstart, kstop)
 This is used for BC's that have SHIFT's included or not automatically, so shouldn't appear here. More...
 
#define OPENMPBCLOOPVARSDEFINELOOPX1DIR   int nxsize, nxshift, nysize, nyshift, nzsize, nzshift, blocksize, blockijk, fooi
 
#define OPENMPBCLOOPVARSDEFINELOOPX2DIR   int nxsize, nxshift, nysize, nyshift, nzsize, nzshift, blocksize, blockijk, fooj
 
#define OPENMPBCLOOPVARSDEFINELOOPX3DIR   int nxsize, nxshift, nysize, nyshift, nzsize, nzshift, blocksize, blockijk, fook
 
#define OPENMPBCLOOPSETUPLOOPX1DIR   OPENMPBCLOOPSETUP(0,0,innormalloop[2],outnormalloop[2],innormalloop[3],outnormalloop[3])
 LOOPX1dir equivalent (assumes use foo variable for i iterator to not conflict with true i iterator) Note like in global.loops.boundaries.h, assume X1-direction is done first since uses innormal and outnormal first. More...
 
#define OPENMPBCLOOPSETUPLOOPX2DIR   OPENMPBCLOOPSETUP(inboundloop[1],outboundloop[1],0,0,innormalloop[3],outnormalloop[3])
 LOOPX2dir equivalent (assumes use foo variable for j iterator to not conflict with true j iterator) Note like in global.loops.boundaries.h, assume X2-direction is done second. More...
 
#define OPENMPBCLOOPSETUPLOOPX3DIR   OPENMPBCLOOPSETUP(inboundloop[1],outboundloop[1],inboundloop[2],outboundloop[2],0,0)
 LOOPX3dir equivalent (assumes use foo variable for k iterator to not conflict with true k iterator) Note like in global.loops.boundaries.h, assume X3-direction is done last. More...
 
#define OPENMPBCLOOPBLOCK2IJKLOOPX1DIR(j, k)   OPENMP3DLOOPBLOCK2IJK(fooi,j,k);
 
#define OPENMPBCLOOPBLOCK2IJKLOOPX2DIR(i, k)   OPENMP3DLOOPBLOCK2IJK(i,fooj,k);
 
#define OPENMPBCLOOPBLOCK2IJKLOOPX3DIR(i, j)   OPENMP3DLOOPBLOCK2IJK(i,j,fook);
 
#define OPENMPBCLOOPBLOCK   OPENMP3DLOOPBLOCK
 basic loop block is the same More...
 

Detailed Description

All macros and definitions related to OpenMP loop wrappers.

Macros and definitions related to OpenMP loops

Note that OPENMP3DLOOPBLOCK2IJK defined in global.storage.h because depends upon ORDERSTORAGE

SUPERNOTE: Assume these loops are only used for non-geometry items (i.e. not Xstore, Vstore, etc., gcov, gcon, etc.) since those only exist at one point if unless (MCOORD!=CARTMINKMETRIC)

For general instructions about OpenMP, see:

https://computing.llnl.gov/tutorials/openMP/ http://perfdynamics.blogspot.com/2009/02/poor-scalability-on-multicore.html http://community.edc.intel.com/t5/Multicore-Virtualization-Blog/Multi-Core-Performance-and-Plumbing/bc-p/511;jsessionid=CA7E9DBCCE03E6734F5111DE91504905 http://www.intel.com/design/pentiumii/manuals/245127.htm http://www.sandia.gov/news/resources/releases/2009/multicore.html http://www.google.com/url?q=http://www.acumem.com/images/stories/articles/HP-CAST.pdf&sa=U&start=6&ei=bM4RSoviA47msgODneX4CQ&sig2=iMevZJjH6it4ZUeqfT1jog&usg=AFQjCNENB7jrMj9bPLnAHeaezqjLPFA-qw http://www.google.com/url?q=http://www.ecmwf.int/newsevents/meetings/workshops/2008/high_performance_computing_13th/presentations/Yelick.pdf&sa=U&start=18&ei=QtIRStm1E47msgODneX4CQ&sig2=MdBB5BWnAwNpzRY0ix1bnQ&usg=AFQjCNGeQ7-gqqCR49giZJpnZ33UYGwe9Q http://www.multicoreinfo.com/category/hpc/ http://crd.lbl.gov/%7Eoliker/papers/ipdps07.pdf http://www.cs.berkeley.edu/~samw/research/papers/ipdps08.pdf http://www.google.com/url?q=http://crd.lbl.gov/~oliker/papers/ipdps07.pdf&sa=U&start=1&ei=19MRSt2xDo7msgODneX4CQ&sig2=Iu6MRVS6j2owklJxk-rI8A&usg=AFQjCNFgeq7xM_gtde0LJYs0HGDNHpDrSQ http://www.hpcwire.com/offthewire/Multicore_Code_Booster.html http://software.intel.com/en-us/articles/performance-tools-for-software-developers-auto-parallelization-and-qpar-threshold/ http://software.intel.com/en-us/articles/performance-tools-for-software-developers-auto-parallelization-and-qpar-threshold/ http://www.cs.ucsb.edu/~tyang/class/pthreads/index_sgi.html https://computing.llnl.gov/tutorials/openMP/exercise.html http://msdn.microsoft.com/en-us/magazine/cc163717.aspx https://computing.llnl.gov/tutorials/openMP/#ThreadprivateExamples http://sam.zoy.org/writings/programming/gprof.html http://books.google.com/books?id=18CmnqIhbhUC&pg=PA59&lpg=PA59&dq=openmp+reduction+operator+on+element+of+structure&source=bl&ots=sUk-5CRZhV&sig=WFGKz6IdgUDI6dNKG8HdGW8ACls&hl=en http://www.cita.utoronto.ca/~merz/pi/

Definition in file global.openmploops.h.

Macro Definition Documentation

#define MINCHUNKSIZE   10

Definition at line 42 of file global.openmploops.h.

#define OPENMP3DLOOPBLOCK   for(blockijk=1;blockijk<=blocksize;blockijk++)

Oddly, performance changes by 10% when changing how start and stop blockijk (and fixing how used too of course)

Definition at line 71 of file global.openmploops.h.

#define OPENMP3DLOOPSETUP (   istart,
  istop,
  jstart,
  jstop,
  kstart,
  kstop 
)
Value:
{nxsize=((istop+SHIFTX1UP) - (istart+SHIFTX1DN) +1 ); \
nxshift=istart+SHIFTX1DN; \
nysize=((jstop+SHIFTX2UP) - (jstart+SHIFTX2DN) +1 ); \
nyshift=jstart+SHIFTX2DN; \
nzsize=((kstop+SHIFTX3UP) - (kstart+SHIFTX3DN) +1 ); \
nzshift=kstart+SHIFTX3DN; \
blocksize=nxsize*nysize*nzsize;}

forced to be a computational loop, so start/stop cannot already have shifts in them (which is normal for everywhere except interpline.c)

Definition at line 99 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPFULL   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND,-N2BND,N2-1+N2BND,-N3BND,N3-1+N3BND)

COMPFULLLOOP equivalent for OpenMP.

Definition at line 110 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPFULLINOUT2   OPENMP3DLOOPSETUP(-N1BND+2*SHIFT1,N1-1+N1BND-2*SHIFT1,-N2BND+2*SHIFT2,N2-1+N2BND-2*SHIFT2,-N3BND+2*SHIFT3,N3-1+N3BND-2*SHIFT3)

Definition at line 112 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPFULLINOUT2DIR1   OPENMP3DLOOPSETUP(-N1BND+2*SHIFT1,N1-1+N1BND-2*SHIFT1,-N2BND,N2-1+N2BND,-N3BND,N3-1+N3BND)

Definition at line 114 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPFULLINOUT2DIR2   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND,-N2BND+2*SHIFT2,N2-1+N2BND-2*SHIFT2,-N3BND,N3-1+N3BND)

Definition at line 116 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPFULLINOUT2DIR3   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND,-N2BND,N2-1+N2BND,-N3BND+2*SHIFT3,N3-1+N3BND-2*SHIFT3)

Definition at line 118 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPFULLP1   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND+SHIFT1,-N2BND,N2-1+N2BND+SHIFT2,-N3BND,N3-1+N3BND+SHIFT3)

COMPFULLLOOPP1 equivalent for OpenMP.

Definition at line 122 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPFULLP1EXCEPTX2   OPENMP3DLOOPSETUP(-N1BND,N1-1+N1BND+SHIFT1,-N2BND,N2-1+N2BND,-N3BND,N3-1+N3BND+SHIFT3)

Definition at line 126 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPFULLP2   OPENMP3DLOOPSETUP(-N1BND-SHIFT1,N1-1+N1BND+SHIFT1*2,-N2BND-SHIFT2,N2-1+N2BND+SHIFT2*2,-N3BND-SHIFT3,N3-1+N3BND+SHIFT3*2)

Definition at line 124 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPSUPERGEN (   istart,
  istop,
  jstart,
  jstop,
  kstart,
  kstop,
  di,
  dj,
  dk 
)
Value:
{nxsize=( (istop - istart)*di +1 ); \
nxshift=istart; \
nysize=( (jstop - jstart)*dj +1 ); \
nyshift=jstart; \
nzsize=( (kstop - kstart)*dk +1 ); \
nzshift=kstart; \
blocksize=nxsize*nysize*nzsize;}

non-comp loop (or used in interlpline.c where start and stops already have grid section shifts

Definition at line 89 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPSUPERGENCOMP (   i,
  j,
  k,
  istart,
  istop,
  jstart,
  jstop,
  kstart,
  kstop,
  di,
  dj,
  dk 
)
Value:
{nxsize=( ((istop+SHIFTX1UP) - (istart+SHIFTX1DN))*di +1 ); \
nxshift=istart+SHIFTX1DN; \
nysize=( ((jstop+SHIFTX2UP) - (jstart+SHIFTX2DN))*dj +1 ); \
nyshift=jstart+SHIFTX2DN; \
nzsize=( ((kstop+SHIFTX3UP) - (kstart+SHIFTX3DN))*dk +1 ); \
nzshift=kstart+SHIFTX3DN; \
blocksize=nxsize*nysize*nzsize;}

COMP version of SUPERGENLOOP for OpenMP : Not used anywhere currently.

Definition at line 79 of file global.openmploops.h.

#define OPENMP3DLOOPSETUPZLOOP   OPENMP3DLOOPSETUP(0,N1-1,0,N2-1,0,N3-1)

COMPZLOOP equivalent for OpenMP.

Definition at line 129 of file global.openmploops.h.

#define OPENMP3DLOOPVARSDEFINE   int nxsize, nxshift, nysize, nyshift, nzsize, nzshift, blocksize, blockijk

Definition at line 75 of file global.openmploops.h.

#define OPENMPBCLOOPBLOCK   OPENMP3DLOOPBLOCK

basic loop block is the same

Definition at line 169 of file global.openmploops.h.

#define OPENMPBCLOOPBLOCK2IJKLOOPX1DIR (   j,
 
)    OPENMP3DLOOPBLOCK2IJK(fooi,j,k);

Definition at line 162 of file global.openmploops.h.

#define OPENMPBCLOOPBLOCK2IJKLOOPX2DIR (   i,
 
)    OPENMP3DLOOPBLOCK2IJK(i,fooj,k);

Definition at line 164 of file global.openmploops.h.

#define OPENMPBCLOOPBLOCK2IJKLOOPX3DIR (   i,
  j 
)    OPENMP3DLOOPBLOCK2IJK(i,j,fook);

Definition at line 166 of file global.openmploops.h.

#define OPENMPBCLOOPSETUP (   istart,
  istop,
  jstart,
  jstop,
  kstart,
  kstop 
)
Value:
{nxsize=((istop) - (istart) +1 ); \
nxshift=istart; \
nysize=((jstop) - (jstart) +1 ); \
nyshift=jstart; \
nzsize=((kstop) - (kstart) +1 ); \
nzshift=kstart; \
blocksize=nxsize*nysize*nzsize;}

This is used for BC's that have SHIFT's included or not automatically, so shouldn't appear here.

Definition at line 134 of file global.openmploops.h.

#define OPENMPBCLOOPSETUPLOOPX1DIR   OPENMPBCLOOPSETUP(0,0,innormalloop[2],outnormalloop[2],innormalloop[3],outnormalloop[3])

LOOPX1dir equivalent (assumes use foo variable for i iterator to not conflict with true i iterator) Note like in global.loops.boundaries.h, assume X1-direction is done first since uses innormal and outnormal first.

Definition at line 152 of file global.openmploops.h.

#define OPENMPBCLOOPSETUPLOOPX2DIR   OPENMPBCLOOPSETUP(inboundloop[1],outboundloop[1],0,0,innormalloop[3],outnormalloop[3])

LOOPX2dir equivalent (assumes use foo variable for j iterator to not conflict with true j iterator) Note like in global.loops.boundaries.h, assume X2-direction is done second.

Definition at line 156 of file global.openmploops.h.

#define OPENMPBCLOOPSETUPLOOPX3DIR   OPENMPBCLOOPSETUP(inboundloop[1],outboundloop[1],inboundloop[2],outboundloop[2],0,0)

LOOPX3dir equivalent (assumes use foo variable for k iterator to not conflict with true k iterator) Note like in global.loops.boundaries.h, assume X3-direction is done last.

Definition at line 160 of file global.openmploops.h.

#define OPENMPBCLOOPVARSDEFINELOOPX1DIR   int nxsize, nxshift, nysize, nyshift, nzsize, nzshift, blocksize, blockijk, fooi

Definition at line 144 of file global.openmploops.h.

#define OPENMPBCLOOPVARSDEFINELOOPX2DIR   int nxsize, nxshift, nysize, nyshift, nzsize, nzshift, blocksize, blockijk, fooj

Definition at line 146 of file global.openmploops.h.

#define OPENMPBCLOOPVARSDEFINELOOPX3DIR   int nxsize, nxshift, nysize, nyshift, nzsize, nzshift, blocksize, blockijk, fook

Definition at line 148 of file global.openmploops.h.

#define OPENMPCHUNKSIZE (   blocksize)    (MAX(blocksize/numopenmpthreads,MINCHUNKSIZE))

Definition at line 50 of file global.openmploops.h.

#define OPENMPFULLNOVARYSCHEDULE (   arg)    static

below used when don't want to provide CHUNK argument

Definition at line 64 of file global.openmploops.h.

#define OPENMPNOVARYSCHEDULE (   arg)    static

below allows compiler and/or run-time system to decide, which may be best.

define OPENMPSCHEDULE(arg) auto // option doesn't seem to exist in icc. below is for loops ensured not to vary in how long each iteration takes Used to avoid overhead from guided that is not needed for simple loops (e.g. simple = just setting to 0 or just taking simple difference of variables)

Definition at line 61 of file global.openmploops.h.

#define OPENMPNUMCHUNKS   100

below should be larger than # of threads or cores used! SUPERGODMARK: OPENMPOPTMARK:

Definition at line 41 of file global.openmploops.h.

#define OPENMPSCHEDULE (   arg)    static

OPENMPNOTE: If really blocksize<OPENMPMINCHUNKNUMBER, then other threads just stall as sufficient Note that by using not too many chunks, each thread is well-spaced in memory, so avoids false sharing problem.

below seems best on average

Definition at line 49 of file global.openmploops.h.

#define OPENMPVARYENDTIMESCHEDULE (   arg)    guided

below is for loops with very different times for each iteration (e.g. inversion loop)

Definition at line 67 of file global.openmploops.h.