#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#include "charm++.h"

#include "WorkUnits.decl.h"

/*  readonly */ CkChareID mainhandle;


double doWork(int work)
{
  double x= 0.0, y = 1.0, z = 0.0;
  for (int i = 0; i< work; i++)
    for (int j = 0; j<1000; j++) // so i*j can be large wo overflow
      {
	// fibonacci. hopefully the compiler doesn't notice
	z = x+y;
	x = y; 
	y = z;
      }
  return z;
}


class main: public Chare {
public:
  int numChares;
  main::main(CkArgMsg *m)
  {
    if (m->argc < 3) {CkPrintf("need 3 args. \n"); CkExit();}
    else {
      numChares = atoi(m->argv[1]);
      int smallGrain = atoi(m->argv[2]);
      int largestGrain = atoi(m->argv[3]);

      CkPrintf("argc = %d, args = %d chares,  %d- %d workunits\n",
	       m->argc, numChares, smallGrain, largestGrain);
      mainhandle = thishandle;
      
      CkArrayOptions opt;
      CkGroupID cldmapID = CProxy_CldMap::ckNew();
      opt.setMap(cldmapID);
      CProxy_WorkUnit arr = CProxy_WorkUnit::ckNew(0, opt);
      for (int i = 0; i<numChares; i++) {
	int work = (int) (smallGrain +  drand48() * ((float) (largestGrain - smallGrain))) ;
        arr[i].insert(work*100);
      }
    }
  }

  void main::finishedUnit() {
    // for now, we will use this. It is not a scalable method
    //    CkPrintf("numChares left before counting this reply: %d\n", numChares);
    if (--numChares <= 0) CkExit();
  }
};

class WorkUnit: public ArrayElement1D {
public:
  WorkUnit(int work){
	printf("Index %d on processor %d\n",thisIndex,CkMyPe());
	Work(work);
  }
  void Work(int work) {
    // work for time propotional to "work"
    double x = doWork(work);
    CkPrintf("[%d]: Finished %d work\n", CkMyPe(), work, x);
    CProxy_main(mainhandle).finishedUnit();
  }
  WorkUnit(CkMigrateMessage *m) {}	
};

#include "WorkUnits.def.h"
