00001 #include "partitioning_strategies.h"
00002 #include "hilbert.h"
00003 #include "TopoManager.h"
00004 #include "converse.h"
00005 
00006 #ifdef __cplusplus
00007 #include <queue>
00008 #include <vector>
00009 #include <iostream>
00010 #include <sstream>
00011 #include <algorithm>
00012 #include <math.h>
00013 
00014 #ifndef __STDC_FORMAT_MACROS
00015 # define __STDC_FORMAT_MACROS
00016 #endif
00017 #ifndef __STDC_LIMIT_MACROS
00018 # define __STDC_LIMIT_MACROS
00019 #endif
00020 #include <inttypes.h>
00021 
00022 using namespace std;
00023 
00024 #ifndef PARTITION_TOPOLOGY_VERBOSE
00025 #define PARTITION_TOPOLOGY_VERBOSE 0
00026 #endif
00027 
00039 void getHilbertList(int * procList)
00040 {
00041   vector<int> hcoords;
00042 
00043   int numDims;
00044   int *dims, *pdims;
00045   int *ranks, numranks;
00046 
00047   TopoManager_getDimCount(&numDims);
00048 
00049   dims = new int[numDims+1];
00050   pdims = new int[numDims+1];
00051 
00052   TopoManager_getDims(dims);
00053   ranks = new int[dims[numDims]];
00054 
00055   
00056   int maxDim = dims[0];
00057   for(int i = 1; i < numDims; i++) {
00058     if(maxDim < dims[i]) maxDim = dims[i];
00059   }
00060 
00061   int pow2 = 1;
00062   while(maxDim>pow2)
00063     pow2 *= 2;
00064 
00065   int cubeDim = pow2;
00066 
00067   for(int i = 1; i < numDims; i++) {
00068     cubeDim *= pow2;
00069   }
00070 
00071   int currPos = 0;
00072   for(int i = 0; i < cubeDim; i++) {
00073     hcoords = int_to_Hilbert(i,numDims);
00074 
00075     for(int i = 0; i < numDims; i++) {
00076       if(hcoords[i] >= dims[i]) continue;
00077     }
00078 
00079     
00080     for(int i = 0; i < numDims; i++) {
00081       pdims[i] = hcoords[i];
00082     }
00083     TopoManager_getRanks(&numranks, ranks, pdims);
00084     if(numranks == 0) continue;
00085 
00086     
00087     for(int j = 0; j < numranks; j++) {
00088       procList[currPos++] = ranks[j];
00089     }
00090   }
00091 
00092   CmiAssert(currPos == CmiNumNodes());
00093 
00094   delete [] dims;
00095   delete [] pdims;
00096   delete [] ranks;
00097 }
00098 
00101 void getPlanarList(int *procList)
00102 {
00103   int numDims;
00104   int *dims, *pdims;
00105   int *ranks, numranks;
00106   int phynodes;
00107 
00108   TopoManager_getDimCount(&numDims);
00109 
00110   dims = new int[numDims+1];
00111   pdims = new int[numDims+1];
00112 
00113   TopoManager_getDims(dims);
00114   ranks = new int[dims[numDims]];
00115 
00116   phynodes = 1;
00117   for(int i = 0; i < numDims; i++) {
00118     phynodes *= dims[i];
00119     pdims[i] = 0;
00120   }
00121 
00122   int currPos = 0;
00123   for(int i = 0; i < phynodes; i++) {
00124 
00125     TopoManager_getRanks(&numranks, ranks, pdims);
00126     for(int j = numDims - 1; j > -1; j--) {
00127       pdims[j] = (pdims[j]+1) % dims[j];
00128       if(pdims[j] != 0) break;
00129     }
00130     if(numranks == 0) continue;
00131 
00132     for(int j = 0; j < numranks; j++) {
00133       procList[currPos++] = ranks[j];
00134     }
00135   }
00136 
00137   CmiAssert(currPos == CmiNumNodes());
00138 
00139   delete [] dims;
00140   delete [] pdims;
00141   delete [] ranks;
00142 }
00143 
00144 namespace {
00145 
00146 
00147 struct TopoManagerWrapper {
00148   TopoManager tmgr;
00149   int a_dim, b_dim, c_dim, d_dim, e_dim;
00150   int a_rot, b_rot, c_rot, d_rot, e_rot;
00151   int a_mod, b_mod, c_mod, d_mod, e_mod;
00152   int fixnode(int node) {  
00153     return node; 
00154   }
00155   TopoManagerWrapper() {
00156 #if CMK_BLUEGENEQ
00157     int na=tmgr.getDimNA();
00158     int nb=tmgr.getDimNB();
00159     int nc=tmgr.getDimNC();
00160     int nd=tmgr.getDimND();
00161     int ne=tmgr.getDimNE();
00162 #else
00163     int na=tmgr.getDimNX();
00164     int nb=tmgr.getDimNY();
00165     int nc=tmgr.getDimNZ();
00166     int nd=1;
00167     int ne=1;
00168 #endif
00169     std::vector<int> a_flags(na);
00170     std::vector<int> b_flags(nb);
00171     std::vector<int> c_flags(nc);
00172     std::vector<int> d_flags(nd);
00173     std::vector<int> e_flags(ne);
00174     for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
00175     for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
00176     for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
00177     for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
00178     for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
00179     int nnodes = CmiNumNodes();
00180     for ( int node=0; node<nnodes; ++node ) {
00181       int a,b,c,d,e,t;
00182 #if CMK_BLUEGENEQ
00183       tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,d,e,t);
00184 #else
00185       tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,t);
00186       d=0; e=0;
00187 #endif
00188       if ( a < 0 || a >= na ) CmiAbort("inconsistent torus topology!");
00189       if ( b < 0 || b >= nb ) CmiAbort("inconsistent torus topology!");
00190       if ( c < 0 || c >= nc ) CmiAbort("inconsistent torus topology!");
00191       if ( d < 0 || d >= nd ) CmiAbort("inconsistent torus topology!");
00192       if ( e < 0 || e >= ne ) CmiAbort("inconsistent torus topology!");
00193       a_flags[a] = 1;
00194       b_flags[b] = 1;
00195       c_flags[c] = 1;
00196       d_flags[d] = 1;
00197       e_flags[e] = 1;
00198     }
00199     std::basic_ostringstream<char> iout;
00200     int printTill = na, printCount = 0;
00201     iout << "Charm++> " << "TORUS A SIZE " << na << " USING";
00202     
00203     if((nb + nc +nd + ne) == 4) printTill = std::min(na,10);
00204     for ( int i = 0; (i < na) && (printCount <= printTill); ++i ) {
00205       if ( a_flags[i] ) {
00206         if(printCount == printTill) {
00207           iout << "...";
00208         } else {
00209           iout << " " << i;
00210         }
00211         printCount++;
00212       }
00213     }
00214     iout << "\n" ;
00215     iout << "Charm++> " << "TORUS B SIZE " << nb << " USING";
00216     for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
00217     iout << "\n" ;
00218     iout << "Charm++> " << "TORUS C SIZE " << nc << " USING";
00219     for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
00220     iout << "\n" ;
00221 #if CMK_BLUEGENEQ
00222     iout << "Charm++> " << "TORUS D SIZE " << nd << " USING";
00223     for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
00224     iout << "\n" ;
00225     iout << "Charm++> " << "TORUS E SIZE " << ne << " USING";
00226     for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
00227     iout << "\n" ;
00228 #endif
00229     
00230     a_rot = b_rot = c_rot = d_rot = e_rot = 0;
00231     a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
00232 #if CMK_BLUEGENEQ
00233     if ( tmgr.absA(na) == 0 ) 
00234 #else
00235     if ( tmgr.absX(na) == 0 ) 
00236 #endif
00237       for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
00238         if ( a_flags[i%na] ) gapstart = i+1;
00239         else if ( i - gapstart >= gaplen ) {
00240           a_rot = 2*na-i-1; gaplen = i - gapstart;
00241         }
00242       }
00243 #if CMK_BLUEGENEQ
00244     if ( tmgr.absB(nb) == 0 ) 
00245 #else
00246     if ( tmgr.absY(nb) == 0 ) 
00247 #endif
00248       for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
00249         if ( b_flags[i%nb] ) gapstart = i+1;
00250         else if ( i - gapstart >= gaplen ) {
00251           b_rot = 2*nb-i-1; gaplen = i - gapstart;
00252         }
00253       }
00254 #if CMK_BLUEGENEQ
00255     if ( tmgr.absC(nc) == 0 ) 
00256 #else
00257     if ( tmgr.absZ(nc) == 0 ) 
00258 #endif
00259       for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
00260         if ( c_flags[i%nc] ) gapstart = i+1;
00261         else if ( i - gapstart >= gaplen ) {
00262           c_rot = 2*nc-i-1; gaplen = i - gapstart;
00263         }
00264       }
00265 #if CMK_BLUEGENEQ
00266     if ( tmgr.absD(nd) == 0 ) 
00267       for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
00268         if ( d_flags[i%nd] ) gapstart = i+1;
00269         else if ( i - gapstart >= gaplen ) {
00270           d_rot = 2*nd-i-1; gaplen = i - gapstart;
00271         }
00272       }
00273     if ( tmgr.absE(ne) == 0 ) 
00274       for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
00275         if ( e_flags[i%ne] ) gapstart = i+1;
00276         else if ( i - gapstart >= gaplen ) {
00277           e_rot = 2*ne-i-1; gaplen = i - gapstart;
00278         }
00279       }
00280 #endif
00281     
00282     int a_min=na, a_max=-1;
00283     int b_min=nb, b_max=-1;
00284     int c_min=nc, c_max=-1;
00285     int d_min=nd, d_max=-1;
00286     int e_min=ne, e_max=-1;
00287     for ( int node=0; node<nnodes; ++node ) {
00288       int a,b,c,d,e,t;
00289 #if CMK_BLUEGENEQ
00290       tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,d,e,t);
00291 #else
00292       tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,t);
00293       d=0; e=0;
00294 #endif
00295       a = (a+a_rot)%a_mod;
00296       b = (b+b_rot)%b_mod;
00297       c = (c+c_rot)%c_mod;
00298       d = (d+d_rot)%d_mod;
00299       e = (e+e_rot)%e_mod;
00300       if ( a < a_min ) a_min = a;
00301       if ( b < b_min ) b_min = b;
00302       if ( c < c_min ) c_min = c;
00303       if ( d < d_min ) d_min = d;
00304       if ( e < e_min ) e_min = e;
00305       if ( a > a_max ) a_max = a;
00306       if ( b > b_max ) b_max = b;
00307       if ( c > c_max ) c_max = c;
00308       if ( d > d_max ) d_max = d;
00309       if ( e > e_max ) e_max = e;
00310     }
00311     int a_len = a_max - a_min + 1;
00312     int b_len = b_max - b_min + 1;
00313     int c_len = c_max - c_min + 1;
00314     int d_len = d_max - d_min + 1;
00315     int e_len = e_max - e_min + 1;
00316     int lensort[5];
00317     lensort[0] = (a_len << 3) + 4;
00318     lensort[1] = (b_len << 3) + 3;
00319     lensort[2] = (c_len << 3) + 2;
00320     lensort[3] = (d_len << 3) + 1;
00321     lensort[4] = (e_len << 3) + 0;
00322     
00323     std::sort(lensort, lensort+5);
00324     
00325     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) a_dim = 4-i; }
00326     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) b_dim = 4-i; }
00327     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
00328     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) d_dim = 4-i; }
00329     for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) e_dim = 4-i; }
00330     iout << "Charm++> " << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
00331 #if CMK_BLUEGENEQ
00332     << " BY " << d_len << " BY " << e_len
00333 #endif
00334     << "\n" ;
00335     if ( CmiMyNodeGlobal() == 0 ) printf("%s",iout.str().c_str());
00336     
00337   }
00338   void coords(int node, int *crds) {
00339     int a,b,c,d,e,t;
00340 #if CMK_BLUEGENEQ
00341     tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,d,e,t);
00342 #else
00343     tmgr.rankToCoordinates(CmiNodeFirst(fixnode(node)),a,b,c,t);
00344     d=0; e=0;
00345 #endif
00346     crds[a_dim] = (a+a_rot)%a_mod;
00347     crds[b_dim] = (b+b_rot)%b_mod;
00348     crds[c_dim] = (c+c_rot)%c_mod;
00349     crds[d_dim] = (d+d_rot)%d_mod;
00350     crds[e_dim] = (e+e_rot)%e_mod;
00351   }
00352   int coord(int node, int dim) {
00353     int crds[5];
00354     coords(node,crds);
00355     return crds[dim];
00356   }
00357   struct node_sortop_topo {
00358     TopoManagerWrapper &tmgr;
00359     const int *sortdims;
00360     node_sortop_topo(TopoManagerWrapper &t, int *d) : tmgr(t), sortdims(d) {}
00361     bool operator() (int node1, int node2) const {
00362       int crds1[5], crds2[5];
00363       tmgr.coords(node1,crds1);
00364       tmgr.coords(node2,crds2);
00365       for ( int i=0; i<5; ++i ) {
00366         int d = sortdims[i];
00367         if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
00368       }
00369       
00370       
00371       
00372       return ( node1 < node2 );
00373     }
00374   };
00375   void sortLongest(int *node_begin, int *node_end) {
00376     if ( node_begin == node_end ) return;
00377     int tmins[5], tmaxs[5], tlens[5], sortdims[5];
00378     coords(*node_begin, tmins);
00379     coords(*node_begin, tmaxs);
00380     for ( int *nodeitr = node_begin; nodeitr != node_end; ++nodeitr ) {
00381       int tvals[5];
00382       coords(*nodeitr, tvals);
00383       for ( int i=0; i<5; ++i ) {
00384         if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
00385         if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
00386       }
00387     }
00388     for ( int i=0; i<5; ++i ) {
00389       tlens[i] = ((tmaxs[i] - tmins[i] + 1) << 3) + (4-i);
00390     }
00391     std::sort(tlens, tlens+5);
00392     for ( int i=0; i<5; ++i ) {
00393       sortdims[4-i] = 4 - (tlens[i] & 7);
00394     }
00395     if ( PARTITION_TOPOLOGY_VERBOSE && CmiMyNodeGlobal() == 0 )
00396       printf("sorting %d(%d) %d(%d) %d(%d)\n", sortdims[0], tlens[4]>>3, sortdims[1], tlens[3]>>3, sortdims[2], tlens[2]>>3);
00397     std::sort(node_begin,node_end,node_sortop_topo(*this,sortdims));
00398     int *nodes = node_begin;
00399     int nnodes = node_end - node_begin;
00400   }
00401 };
00402 
00403 void recursive_bisect(
00404   int part_begin, int part_end,
00405   int *node_begin, int *node_end,
00406   TopoManagerWrapper &tmgr
00407   ) {
00408 
00409   if ( part_end - part_begin == 1 ) {
00410     if ( CmiPartitionSize(part_begin) != node_end - node_begin ) {
00411       CmiAbort("partitioning algorithm size mismatch in recursive_bisect()");
00412     }
00413     tmgr.sortLongest(node_begin, node_end);
00414     
00415     if ( PARTITION_TOPOLOGY_VERBOSE && CmiMyNodeGlobal() == 0 ) {
00416       int crds[5];
00417       tmgr.coords(*node_begin, crds);
00418       printf("partitioning node %5d at %5d %5d %5d %5d %5d nodes %5" PRId64 "\n",
00419                *node_begin,
00420                crds[0], crds[1], crds[2], crds[3], crds[4],
00421                (int64_t)(node_end - node_begin));
00422     }
00423     return;
00424   }
00425 
00426   if ( PARTITION_TOPOLOGY_VERBOSE && CmiMyNodeGlobal() == 0 )
00427     printf("recursive_bisect %d %d %" PRId64 "\n", part_begin, part_end, (int64_t)(node_end-node_begin));
00428 
00429   int nnodes = node_end - node_begin;
00430   int nsplit = (nnodes+1) / 2;
00431   int ncount = 0;
00432   int part_split = part_begin;
00433   for ( int p = part_begin; p < part_end; ++p ) {
00434     int ps = CmiPartitionSize(p);
00435     if ( abs(ncount+ps-nsplit) < abs(ncount-nsplit) ) part_split = p+1;
00436     else break;
00437     ncount += ps;
00438   }
00439   if ( part_split == part_begin || part_split == part_end )
00440     CmiAbort("partitioning algorithm failure in recursive_bisect()");
00441 
00442   int *node_split = node_begin + ncount;
00443 
00444   tmgr.sortLongest(node_begin, node_end);
00445 
00446   
00447   recursive_bisect(
00448     part_begin, part_split, node_begin, node_split, tmgr);
00449   recursive_bisect(
00450     part_split, part_end, node_split, node_end, tmgr);
00451 }
00452 
00453 } 
00454 
00457 void getRecursiveBisectionList(int numparts, int *procList)
00458 {
00459   int n = CmiNumNodes();
00460   for(int i = 0; i < n; i++) {
00461     procList[i] = i;
00462   }
00463   if ( numparts < 2 ) return;
00464   TopoManagerWrapper tmgr;
00465   recursive_bisect(
00466     0, numparts, procList, procList + n, tmgr);
00467   if ( PARTITION_TOPOLOGY_VERBOSE && CmiMyNodeGlobal() == 0 ) {
00468     int crds[5];
00469     for ( int i=0,p=0,ip=0; i<n; ++i,++ip ) {
00470       if ( p == numparts ) break;  
00471       if ( ip == CmiPartitionSize(p) ) { ++p; ip=0; }
00472       tmgr.coords(procList[i],crds);
00473       printf("procList[%5d] part[%3d] %5d (%2d %2d %2d %2d %2d)\n",
00474         i, p, procList[i],
00475         crds[0], crds[1], crds[2], crds[3], crds[4]);
00476     }
00477   }
00478 }
00479 
00480 
00481 #endif