#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "charm++.h"
#include "WorkUnits.decl.h"

/*readonly*/ CkChareID mainhandle;
/*readonly*/ int lastGeneration;

double doWork(int work)
{
  double x= 0.0, y = 1.0, z;
  for (int i = 0; i< work; i++)
    for (int j = 0; j<1000; j++) // so i*j can be large wo overflow
      {
	// fibonacci. hopefully the compiler doesn't notice
	z = x+y;
	x = y; 
	y = z;
      }
  return z;
}


class main: public Chare {
public:
  int numChares;
  main::main(CkArgMsg *m)
  {
    if (m->argc < 3) {CkPrintf("need 3 args. \n"); CkExit();}
    else {
      lastGeneration = 1;
      numChares = atoi(m->argv[1]);
      int smallGrain = atoi(m->argv[2]);
      int largestGrain = atoi(m->argv[3]);
      int perHeavy = atoi(m->argv[4]);
      CmiAssert(perHeavy < 100);
      
      CkPrintf("argc = %d, args = %d chares,  %d- %d workunits heavy: %d\%\n",
	       m->argc, numChares, smallGrain, largestGrain, perHeavy);
      mainhandle = thishandle;
      
      CProxy_master::ckNew(numChares, smallGrain, largestGrain, perHeavy);
#if 0
      // generate 1st generation of chares on randomly selected processors
      // this is the initial placement.

      for (int i = 0; i<numChares; i++) {
//	int work = (int) (smallGrain +  drand48() * ((float) (largestGrain - smallGrain))) ;
	int work;
	//int initialProcessor = rand() % CkNumPes();
	if (i < numChares * perHeavy/100.0) work = largestGrain;
	else work = smallGrain;
        int initialProcessor = (int)((1.0 * i * CkNumPes()) / numChares);

	CProxy_WorkUnit::ckNew(0, work*10, initialProcessor);
      }
      CkPrintf("fired chares\n");
#endif
    }
  }

  void main::finishedUnit() {
    // for now, we will use this. It is not a scalable method
    if (--numChares <= 0) CkExit();
  }
};

class master: public Group {
public:
  master::master(int numChares, int smallGrain, int largestGrain, int perHeavy)
  {
    {
      lastGeneration = 1;
      
      // generate 1st generation of chares on randomly selected processors
      // this is the initial placement.

      for (int i = 0; i<numChares; i++) {
//	int work = (int) (smallGrain +  drand48() * ((float) (largestGrain - smallGrain))) ;
	int work;
        int initialProcessor = (int)((1.0 * i * CkNumPes()) / numChares);
	//int initialProcessor = rand() % CkNumPes();
	if (i <= numChares * perHeavy/100.0) work = largestGrain;
	else work = smallGrain;

	if (initialProcessor == CmiMyPe())
	//CProxy_WorkUnit2::ckNew(1, work*10, initialProcessor);
	CProxy_WorkUnit2::ckNew(1, work*10);
      }
//      CkPrintf("fired chares\n");
    }
  }

};

class WorkUnit: public Chare {
public:
  WorkUnit::WorkUnit(int generation, int work) {
    // work for time propotional to "work" except in the 0'th generation
    //CkPrintf("[%d]: gen:%d, work:%d\n", CkMyPe(), generation, work);
CProxy_main(mainhandle).finishedUnit();
return;
    if (generation > 0)
      double x = doWork(work);
    if (generation >= lastGeneration)
      CProxy_main(mainhandle).finishedUnit();
    else 
      CProxy_WorkUnit2::ckNew(generation+1, work, CmiMyPe());
    // modify this to change the amount of work in each successive 
    // generation (i.e. iteration) if you want.
  }
};

class WorkUnit2: public Chare {
public:
  WorkUnit2::WorkUnit2(int generation, int work) {
    // work for time propotional to "work" except in the 0'th generation
    //CkPrintf("[%d]: gen:%d, work:%d\n", CkMyPe(), generation, work);
    if (generation > 0)
      double x = doWork(work);
    if (generation >= lastGeneration)
      CProxy_main(mainhandle).finishedUnit();
    else 
      CProxy_WorkUnit::ckNew(generation+1, work);
    // modify this to change the amount of work in each successive 
    // generation (i.e. iteration) if you want.
  }
};

#include "WorkUnits.def.h"
