#include "libprofile.h"



#include "libwrapper.h"


#define SHADOW_DEP_MATRIX 1


char *non_privatizable_set_array=NULL;

#define TRANSITIVE 1
//#define HASH
#define USE_SPINLOCK
#define MAX_CORES 32

#ifdef USE_SPINLOCK
pthread_spinlock_t spinlock[MAX_CORES];
#else
pthread_mutex_t mutex[MAX_CORES];
#endif

static int memop_num=0;

inline void shadow_init_lock(int thread_id)
{
#ifdef USE_SPINLOCK
      pthread_spin_init(&spinlock[thread_id], 0);
#else
      pthread_mutex_init(&mutex[thread_id], NULL);
#endif
}

inline void shadow_lock(int thread_id)
{
#ifdef USE_SPINLOCK
        pthread_spin_lock(&spinlock[thread_id]);
#else
        pthread_mutex_lock(&mutex[thread_id]);
#endif
}

inline void shadow_unlock(int thread_id)
{
#ifdef USE_SPINLOCK
        pthread_spin_unlock(&spinlock[thread_id]);
#else
        pthread_mutex_unlock(&mutex[thread_id]);
#endif
}

inline void shadow_destory_lock(int thread_id)
{  
#ifdef USE_SPINLOCK
      pthread_spin_destroy(&spinlock[thread_id]);
#else
      pthread_mutex_destroy(&mutex[thread_id]);
#endif
}



/*structured for storing currently open loops and its iteration numbers */
class   PROFILE
{
public:
  char *memops;
  int iterno;

	
  PROFILE () : 
    iterno(0),
    memops(NULL)
	 { } 

};


static int shadow_iterno=0;
static int shadow_stamp=0;

class   PRIVATIZE_SIGNATURE
{
public:

  unsigned int     iterno : 32;  /* iteration number of the latest store and load */
  unsigned int     stamp : 32;  /* in case that the loop executed multi-times */
  
   PRIVATIZE_SIGNATURE():
    iterno(0), stamp(0)
      {}
};


// interval_map<time, guests> party; 

/* addr - address;
   size - the bit size of memory to be accessed
   flag - read/write, if address read (0) or written (1) ; 
   id - memory operation id or slice id; */

extern "C"
void
__shadow_privatizable_print_store(PTR addr, int size, int id)
{
  PRIVATIZE_SIGNATURE *signature =  (PRIVATIZE_SIGNATURE*)(addr + size  );   
  signature->stamp = shadow_stamp;
  signature->iterno = shadow_iterno;
}


extern "C" 
void
__shadow_privatizable_print_load (PTR addr, int size, int id) 
{
  PRIVATIZE_SIGNATURE *signature =  (PRIVATIZE_SIGNATURE*)(addr + size );   
  if ( signature->stamp < shadow_stamp )
    return;
  if ( signature->iterno == shadow_iterno )
    return;
  non_privatizable_set_array[id] = 1;
}





/* entry-loop enter/exit; loc - loop global id; s-
*/

extern "C" void
__shadow_privatizable_print_entry (int loop_id)
{
  shadow_iterno++;
}






/* entry-loop enter/exit; loc - loop global id; s-
*/

extern "C" void
__shadow_privatizable_print_exit (int loop_id)
{
  shadow_iterno = 0;
  shadow_stamp++;  
}



extern "C" void
__shadow_privatizable_initialize (int mem_num)
{
  memop_num = mem_num + 1;
  non_privatizable_set_array = (char*) calloc( memop_num, sizeof(char) ); 
  local_pool = new MEM_POOL;
}



extern "C" void
__shadow_privatizable_finalize ()
{
  delete local_pool;

  generate_memory_op_table ("loopmemop.info");

  for (MEMOP_INFO_HASH::iterator iter = mem_info_hash.begin (); iter != mem_info_hash.end (); ++iter)
  {
    MEMORY_OPERATION_INFO *info = iter->second;
    if ( info->read && !non_privatizable_set_array[info->id]  )
      privatizable_set.insert(info->id);
  }

  free(non_privatizable_set_array);

  FILE *fp = fopen("privatize.info", "w");
  for ( std::set<int>::iterator iter = privatizable_set.begin(); iter != privatizable_set.end(); ++iter)
    fprintf( fp, "MEMOP %d is privatizable\n", *iter );

  fclose(fp);
  
}



class   PROFILE_SIGNATURE
{
public:

  int                stamp ;  /* in case that the loop executed multi-times */
  unsigned int       last_read_iterno;  // iteration number
  unsigned int       last_write_iterno;  // iteration number
  unsigned int       last_read : ID_BIT_WIDTH;       // The last read in current iteration
  unsigned int       last_write : ID_BIT_WIDTH;  
  
  PROFILE_SIGNATURE():
    last_read_iterno(0), last_write_iterno(0), stamp(0), 
    last_read(0), last_write(0)
  {}
};


class   DEPENDENCE_ENTRY
{
public:
   unsigned int  distance;

   DEPENDENCE_ENTRY():
    distance(0)
      {}
};


int shadow_thread_num=0;
DEPENDENCE_ENTRY* loop_independent_dependence_matrix;
DEPENDENCE_ENTRY* loop_carried_dependence_matrix;

struct thread_PARM
{
  int id;
} *parm;

static pthread_t *threads; 
#if 0

class SHADOW_TASK
{
public:
  PTR addr;
  int size; // bit size
  int id;
  int read;
  int iterno;
  int stamp;

  SHADOW_TASK( PTR a, int s, int d, int r, int i, int st) :
    addr(a), size(s), id(d), read(r), iterno(i), stamp(st)

    {    }

    
  void Run()
  {
    PROFILE_SIGNATURE *signature =  *(PROFILE_SIGNATURE**)(addr + (size >> 3) );
    if (!signature)
    {
      signature = new PROFILE_SIGNATURE;
      *(PROFILE_SIGNATURE**)(addr + (size >> 3) ) = signature;
    }
    if ( read )
    {
      if ( signature->latest_stamp < stamp )
      {
        signature->history_store.clear();
        signature->history_load.clear();
        signature->pending_store.clear();
        signature->pending_load.clear();
        signature->pending_load.insert(id); 
        signature->latest_iterno = iterno;
        return;
      }
      
      else if ( signature->latest_iterno < iterno )
      {
        signature->history_store.insert(signature->pending_store.begin(), signature->pending_store.end());
        signature->history_load.insert(signature->pending_load.begin(), signature->pending_load.end());
        signature->pending_store.clear();
        signature->pending_load.clear();
        signature->pending_load.insert(id); 
        signature->latest_iterno = iterno;
        check_dependence(signature->history_store, id,  LOOP_CARRIED);
      }
      
      else if ( signature->latest_iterno == iterno )
      {
        check_dependence(signature->pending_store, id,  LOOP_INDEPENDENT);
        if ( !signature->pending_store.empty() ) // not killed
          check_dependence(signature->history_store, id,  LOOP_CARRIED);
        signature->pending_load.insert(id); 
      }
      
    }
    else  // write
    {
      if ( signature->latest_stamp < stamp )
      {
        signature->history_store.clear();
        signature->history_load.clear();
        signature->pending_store.clear();
        signature->pending_load.clear();
        signature->pending_store.insert(id); 
        signature->latest_iterno = iterno;
        signature->latest_stamp = stamp;
        return;
      }
      
      else if ( signature->latest_iterno < iterno )
      {
        signature->history_store.insert(signature->pending_store.begin(), signature->pending_store.end());
        signature->history_load.insert(signature->pending_load.begin(), signature->pending_load.end());
        signature->pending_store.clear();
        signature->pending_load.clear();
        signature->pending_store.insert(id); 
        signature->latest_iterno = iterno;
        check_dependence(signature->history_store, id,  LOOP_CARRIED);
        check_dependence(signature->pending_load, id,  LOOP_CARRIED);
      }
      
      else if ( signature->latest_iterno == iterno )
      {
        check_dependence(signature->history_store, id,  LOOP_INDEPENDENT);
        check_dependence(signature->history_load, id,  LOOP_INDEPENDENT);
        check_dependence(signature->pending_store, id,  LOOP_CARRIED);
        check_dependence(signature->pending_load, id,  LOOP_CARRIED);
        signature->pending_store.insert(id); 
      }
      
    }
  }
  
};


typedef std::list<SHADOW_TASK>  SHADOW_TASK_TOOL;
SHADOW_TASK_TOOL task_pool[MAX_CORES];
int task_start[MAX_CORES];



void *shadow_dependence_thread (void *threadData)
{

  thread_PARM *p=(thread_PARM *)threadData;

  int thread_id = p->id;
  SHADOW_TASK_TOOL & taskList = task_pool[thread_id];
  SHADOW_TASK_TOOL localList;

  while (1)
  {
    //while (!task_start[thread_id] )
      ;
    //get task   
    if (!taskList.empty ())
    {
      shadow_lock(thread_id);
      SHADOW_TASK task = *(taskList.begin());     
      taskList.erase(taskList.begin());
      shadow_unlock(thread_id);
      if (task.id == 0)
        return 0;
      task.Run ();      
    }
    //task_start[thread_id] = 0;
  }
  return (void *) 0;
}


/* addr - address;
   size - the bit size of memory to be accessed
   flag - read/write, if address read (0) or written (1) ; 
   id - memory operation id or slice id; */

extern "C" void
__shadow_dependence_print_store_thread(PTR addr, int size, int id, int thread_id)
{
  shadow_lock(thread_id);
  task_pool[thread_id].push_back( SHADOW_TASK(addr, size, id, 0, shadow_iterno, shadow_stamp) );
  shadow_unlock(thread_id);
}

extern "C" void
__shadow_dependence_print_load_thread(PTR addr, int size, int id, int thread_id)
{
  shadow_lock(thread_id);
  task_pool[thread_id].push_back( SHADOW_TASK(addr, size, id, 1, shadow_iterno, shadow_stamp) );
  shadow_unlock(thread_id);
}
#endif

typedef   std::tr1::unordered_map < PTR, PROFILE_SIGNATURE >  MEMOP_HASH;
MEMOP_HASH  memop_hash;
static std::vector<std::map<int, DEPENDENCE_ENTRY> > shadow_carried_dependence_map;
static std::vector<std::map<int, DEPENDENCE_ENTRY> > shadow_independent_dependence_map;

static void shadow_check_dependence(std::set<int>& history, int cur, dep_kind kind)
{

  DEPENDENCE_ENTRY *ma ;

  if (kind == LOOP_INDEPENDENT )
    ma = loop_independent_dependence_matrix;
  else
    ma = loop_carried_dependence_matrix;

  for( std::set<int>::iterator iter = history.begin(); iter != history.end(); ++iter)
  {
    unsigned int prev = *iter;
    /* cur depends on prev */      
    ma [ cur * memop_num + prev ].distance = 1;
  }

}


inline DEPENDENCE_ENTRY*
shadow_carried_dependence_entry(int i, int j)
{
#ifdef SHADOW_DEP_MATRIX
  return &loop_carried_dependence_matrix [ i * memop_num + j ];
#else
  return &shadow_carried_dependence_map[i][j];
#endif
}

inline DEPENDENCE_ENTRY*
shadow_independent_dependence_entry(int i, int j)
{
#ifdef SHADOW_DEP_MATRIX
  return &loop_independent_dependence_matrix [ i * memop_num + j ];
#else
  return &shadow_independent_dependence_map[i][j];
#endif
}

static long double shadow_inst_count = 0;
static char *liveness_array=NULL;

inline void 
report_live(int id)
{
  liveness_array[id] = 1;
}

  
extern "C" void
__hash_bitwise_print_load(PTR addr, int size, int id)
{

  for (int i=0; i < size; i++)
  {

    PROFILE_SIGNATURE *signature = &(memop_hash [addr+i]);    
  
    // Check liveness. The write is from last loop
    if ( signature->last_write && signature->last_write_iterno == 0)
      report_live (signature->last_write);
    
    if ( shadow_iterno == 0 )
      continue;

    if ( signature->stamp < shadow_stamp )  // different loop instance
    {
      signature->stamp = shadow_stamp;
      signature->last_read = id;
      signature->last_read_iterno = shadow_iterno;
      signature->last_write = 0;
      signature->last_write_iterno = 0;
    }
    else  // the same loop instance 
    {
    
      /* Check RAW dependence */
      if ( signature->last_write )
      {
        int diff = shadow_iterno -  signature->last_write_iterno;
        if (diff)
        {
          DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_write );
          entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
        }
        else
        {
          DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_write );
          entry->distance = 1;
        }        
      }
    
      signature->last_read = id;
      signature->last_read_iterno = shadow_iterno;  
      
    }

  }

}






/* addr - address;
   size - the bit size of memory to be accessed
   flag - read/write, if address read (0) or written (1) ; 
   id - memory operation id or slice id; */

extern "C" void
__hash_bitwise_print_store(PTR addr, int size, int id)
{

  for (int i=0; i < size; i++)
  {

    PROFILE_SIGNATURE *signature = &(memop_hash [addr+i]);
   
    if ( shadow_iterno == 0 )
    {
      signature->stamp = -1;
      signature->last_write = id;
      signature->last_write_iterno = 0;
      continue ;   
    }

    if ( signature->stamp < shadow_stamp )  // different loop instance
    {
      signature->stamp = shadow_stamp;
      signature->last_read = 0;
      signature->last_read_iterno = 0;
      signature->last_write = id;
      signature->last_write_iterno = shadow_iterno;
    }
    else  // the same loop instance 
    {
    
      /* Check WAW dependence */
      int diff = shadow_iterno -  signature->last_write_iterno;
      if (diff)
      {
        DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_write );
        entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
      }
      else
      {
        DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_write );
        entry->distance = 1;
      }        

      /* Check WAR dependence */
      diff = shadow_iterno -  signature->last_read_iterno;
      if (diff)
      {
        DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_read );
        entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
      }
      else
      {
        DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_read );
        entry->distance = 1;
      }        

      signature->last_write = id;
      signature->last_write_iterno = shadow_iterno;  
      
    }
  
    
  }
}


  
extern "C" void
__hash_dependence_print_load(PTR addr, int size, int id)
{

  PROFILE_SIGNATURE *signature = &(memop_hash [addr]);
  
  // Check liveness. The write is from last loop
  if ( signature->last_write && signature->last_write_iterno == 0)
    report_live (signature->last_write);

  if ( shadow_iterno == 0 )
    return;
  

  if ( signature->stamp < shadow_stamp )  // different loop instance
  {
    signature->stamp = shadow_stamp;
    signature->last_read = id;
    signature->last_read_iterno = shadow_iterno;
    signature->last_write = 0;
    signature->last_write_iterno = 0;
  }
  else  // the same loop instance 
  {
  
    /* Check RAW dependence */
    if ( signature->last_write )
    {
      int diff = shadow_iterno -  signature->last_write_iterno;
      if (diff)
      {
        DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_write );
        entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
      }
      else
      {
        DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_write );
        entry->distance = 1;
      }        
    }

    signature->last_read = id;
    signature->last_read_iterno = shadow_iterno;  
    
  }
    

}




/* addr - address;
   size - the bit size of memory to be accessed
   flag - read/write, if address read (0) or written (1) ; 
   id - memory operation id or slice id; */

extern "C" void
__hash_dependence_print_store(PTR addr, int size, int id)
{

  PROFILE_SIGNATURE *signature = &(memop_hash [addr]);

  if ( shadow_iterno == 0 )
  {
    signature->stamp = -1;
    signature->last_write = id;
    signature->last_write_iterno = 0;
    return ;   
  }

  if ( signature->stamp < shadow_stamp )  // different loop instance
  {
    signature->stamp = shadow_stamp;
    signature->last_read = 0;
    signature->last_read_iterno = 0;
    signature->last_write = id;
    signature->last_write_iterno = shadow_iterno;
  }
  else  // the same loop instance 
  {

    /* Check WAW dependence */
    int diff = shadow_iterno -  signature->last_write_iterno;
    if (diff)
    {
      DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_write );
      entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
    }
    else
    {
      DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_write );
      entry->distance = 1;
    }        

    /* Check WAR dependence */
    diff = shadow_iterno -  signature->last_read_iterno;
    if (diff)
    {
      DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_read );
      entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
    }
    else
    {
      DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_read );
      entry->distance = 1;
    }        

    signature->last_write = id;
    signature->last_write_iterno = shadow_iterno;  
    
  }

  
  
}

  
extern "C" void
__shadow_dependence_print_load(PTR addr, int size, int id)
{

  PROFILE_SIGNATURE *signature =  (PROFILE_SIGNATURE*)(addr + size);

  // Check liveness. The write is from last loop
  if ( signature->last_write && signature->last_write_iterno == 0)
    report_live (signature->last_write);

  // Not in the loop now
  if ( shadow_iterno == 0 )
    return;
  
  if ( signature->stamp < shadow_stamp )  // different loop instance
  {
    signature->stamp = shadow_stamp;
    signature->last_read = id;
    signature->last_read_iterno = shadow_iterno;
    signature->last_write = 0;
    signature->last_write_iterno = 0;
  }
  else  // the same loop instance 
  {
  
    /* Check RAW dependence */
    if ( signature->last_write )
    {
      int diff = shadow_iterno -  signature->last_write_iterno;
      if (diff)
      {
        DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_write );
        entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
      }
      else
      {
        DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_write );
        entry->distance = 1;
      }        
    }

    signature->last_read = id;
    signature->last_read_iterno = shadow_iterno;  
    
  }
}


/* addr - address;
   size - the bit size of memory to be accessed
   flag - read/write, if address read (0) or written (1) ; 
   id - memory operation id or slice id; */

extern "C" void
__shadow_dependence_print_store(PTR addr, int size, int id)
{

  PROFILE_SIGNATURE *signature =  (PROFILE_SIGNATURE*)(addr + size);

  // Not in the loop now
  if ( shadow_iterno == 0 )
  {
    signature->stamp = -1;
    signature->last_write = id;
    signature->last_write_iterno = 0;
    return ;   
  }

  if ( signature->stamp < shadow_stamp )  // different loop instance
  {
    signature->stamp = shadow_stamp;
    signature->last_read = 0;
    signature->last_read_iterno = 0;
    signature->last_write = id;
    signature->last_write_iterno = shadow_iterno;
  }
  else  // the same loop instance 
  {
  
    /* Check WAW dependence */
    int diff = shadow_iterno -  signature->last_write_iterno;
    if (diff)
    {
      DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_write );
      entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
    }
    else
    {
      DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_write );
      entry->distance = 1;
    }        

    /* Check WAR dependence */
    diff = shadow_iterno -  signature->last_read_iterno;
    if (diff)
    {
      DEPENDENCE_ENTRY *entry = shadow_carried_dependence_entry( id , signature->last_read );
      entry->distance = entry->distance ? MIN(entry->distance, diff) : diff;
    }
    else
    {
      DEPENDENCE_ENTRY *entry = shadow_independent_dependence_entry( id , signature->last_read );
      entry->distance = 1;
    }        

    signature->last_write = id;
    signature->last_write_iterno = shadow_iterno;  
    
  }
}




  
extern "C" void
__shadow_dependence_print_virutal_load(PTR addr, int size, int id)
{
  __hash_dependence_print_load (addr, size, id);
}


extern "C" void
__shadow_dependence_print_virutal_store(PTR addr, int size, int id)
{
  __hash_dependence_print_store (addr, size, id);
}


/* entry-loop enter/exit; loc - loop global id; s-
*/

extern "C" void
__shadow_dependence_print_enter (int loop_id)
{
  shadow_iterno++;

#if 0
  cout <<"iter  " << shadow_iterno<<"  synchronzing  "<<endl;
  /* sychronize */
  for (int i = 0; i < shadow_thread_num; i++)
  {
    task_start[i] = 1;
  }

  while(1)
  {
    bool wait = false;
    for (int i = 0; i < shadow_thread_num; i++)
    {
      if ( task_start[i] == 1 )
        wait = true;
    }

    if ( !wait )
      break;
  }

  cout <<"iter  " << shadow_iterno<<"  finish"<<endl;
#endif  
}

extern "C" void
__shadow_dependence_print_exit (int loop_id)
{
  shadow_iterno = 0;
  shadow_stamp++;  
#if 0
  /* sychronize */
  for (int i = 0; i < shadow_thread_num; i++)
  {
    task_start[i] = 1;
  }

  while(1)
  {
    bool wait = false;
    for (int i = 0; i < shadow_thread_num; i++)
    {
      if ( task_start[i] == 1 )
        wait = true;
    }

    if ( !wait )
      break;
  }
#endif 
}



extern "C" void
__shadow_dependence_initialize (int mem_num)
{
  memop_num = mem_num +1;

#ifdef SHADOW_DEP_MATRIX  
  loop_independent_dependence_matrix = (DEPENDENCE_ENTRY*)calloc(memop_num*memop_num, sizeof(DEPENDENCE_ENTRY) );
  loop_carried_dependence_matrix = (DEPENDENCE_ENTRY*)calloc(memop_num*memop_num, sizeof(DEPENDENCE_ENTRY) );
  if (!loop_independent_dependence_matrix || !loop_carried_dependence_matrix)
  {
    printf("Allocate dependence matrix failed!\n");
    assert(false);
  }
#else
  shadow_carried_dependence_map.resize(memop_num);
  shadow_independent_dependence_map.resize(memop_num);
#endif

  liveness_array = (char*)calloc(memop_num, sizeof(char) );

  local_pool = new MEM_POOL;

#if 0
  shadow_thread_num = core_num - 1;
  pthread_attr_t pthread_custom_attr;
  pthread_attr_init(&pthread_custom_attr);

  if (shadow_thread_num)
  {
    threads = new pthread_t[shadow_thread_num];
    parm = new thread_PARM[shadow_thread_num];
  }

  for (int i=0; i < shadow_thread_num; i++)
  {
    parm[i].id = i;
    pthread_create (&threads[i], &pthread_custom_attr, shadow_dependence_thread, &parm[i]);
    shadow_init_lock(i);
  }
#endif  
}





extern "C" void
__shadow_dependence_finalize ()
{
 
  delete local_pool;

  generate_memory_op_table("loopmemop.info");

  FILE *fp = fopen("dependencies.info", "w");
  fprintf(fp, "shadow_inst_count=%Lf\n", shadow_inst_count);

  fprintf(fp, "INDEPENDENT\n");
  for (unsigned i = 1; i < memop_num; ++i)
  { 
    MEMORY_OPERATION_INFO *mem_op_i = mem_info_hash[i];
    if ( !mem_op_i )
      continue;
    for (unsigned j = 1; j < memop_num; ++j)
      if ( loop_independent_dependence_matrix [ i * memop_num + j ].distance)
      {
        MEMORY_OPERATION_INFO *mem_op_j = mem_info_hash[j];
        if ( !mem_op_j )
          continue;

        char *type;
          
        if ( mem_op_i->read && !mem_op_j->read )
          type = "RAW";
        else if ( !mem_op_i->read && mem_op_j->read )
          type = "WAR";
        else if ( !mem_op_i->read && !mem_op_j->read )
          type = "WAW";
        else
          continue;   

        // LOOP 277  RAW  38 --> 43 	MAY   LOOP INDEPENDENT  DISTANCE
        fprintf(fp, "LOOP %d  %s  ", profiled_loop, type);        
        // j is the source, i is the sink
        fprintf(fp, "%d --> %d   MAY  LOOP INDEPENDENT  ", mem_op_j->uid, mem_op_i->uid);
        fprintf(fp, "DISTANCE 0\n");
      }
  }

  
  fprintf(fp, "CARRIED\n");
  for (unsigned i = 1; i < memop_num; ++ i)
  {
    MEMORY_OPERATION_INFO *mem_op_i = mem_info_hash[i];
    if ( !mem_op_i )
      continue;
    for (unsigned j = 1; j < memop_num; ++ j)
      if ( loop_carried_dependence_matrix [ i * memop_num + j ].distance )
      {
        MEMORY_OPERATION_INFO *mem_op_j = mem_info_hash[j];
        if ( !mem_op_j )
          continue;
        
        char *type;
          
        if ( mem_op_i->read && !mem_op_j->read )
          type = "RAW";
        else if ( !mem_op_i->read && mem_op_j->read )
          type = "WAR";
        else if ( !mem_op_i->read && !mem_op_j->read )
          type = "WAW";
        else
          continue;   

        // LOOP 277  RAW  38 --> 43 	MAY   LOOP INDEPENDENT  DISTANCE
        fprintf(fp, "LOOP %4d  %s  ", profiled_loop, type);        
        fprintf(fp, "%10d --> %10d   MAY  LOOP CARRIED      ", mem_op_j->uid, mem_op_i->uid);
        fprintf(fp, "DISTANCE %d\n", loop_carried_dependence_matrix [ i * memop_num + j ].distance);
      }
  }


  fprintf(fp, "LIVENESS\n");
  for (unsigned i = 1; i < memop_num; ++ i)
  {
    if ( liveness_array [ i ] )
    {
      MEMORY_OPERATION_INFO *mem_op = mem_info_hash[i];
      if ( !mem_op )
        continue;
      fprintf(fp, "MEMOP %10d in LOOP %4d \n", mem_op->uid, profiled_loop );        
    }
  }
  
  fclose(fp);
  delete loop_independent_dependence_matrix;
  delete loop_carried_dependence_matrix;  
  free (liveness_array);
}

static int **lock_array = NULL;
static int lock_num = 0;

extern "C" 
void _init_lock_array(int *distance, int num)
{
  lock_num = num;
  lock_array = (int**)malloc( num * sizeof(int*));
  for (int i=0; i<num; i++)
  {
    lock_array[i] = (int*)calloc (distance[i], sizeof(int) );    
    for (int j=0; j<distance[i]; j++)
      lock_array[i][j] = -1;  
  }
}

extern "C" 
void _destruct_lock_array()
{
  for (int i=0; i<lock_num; i++)
    free (lock_array[i]);    
  free (lock_array);
}

pthread_mutex_t mutex1 = PTHREAD_MUTEX_INITIALIZER;

/*
 * Allows for multiple threads waiting for the same lock
 * but multiple threads can not post a single iteration.
 */
extern "C" 
void _wait(int lock_id, int cur_iter, int distance)
{
  int position = cur_iter % distance;
  int *lock = lock_array[lock_id];
  int val = cur_iter - distance;
/*
  pthread_mutex_lock( &mutex1 );
  cout<<"\twait begin" <<lock_id<<" "<<cur_iter<<endl;
  pthread_mutex_unlock( &mutex1 );
*/

  while ( val > lock[position] );
/*
  pthread_mutex_lock( &mutex1 );
  cout<<"\twait end" <<lock_id<<" "<<cur_iter<<endl;
  pthread_mutex_unlock( &mutex1 );
*/
}

extern "C" 
void _post(int lock_id, int cur_iter, int distance)
{
  int position = cur_iter % distance;
/*  cout<<"original : " <<lock_array[lock_id][position]<<endl;
  cout<<"after    : " << cur_iter<<endl;
  cout.flush();*/

      //_wait(lock_id, cur_iter, distance);
  if(lock_array[lock_id][position] < cur_iter){
      lock_array[lock_id][position] = cur_iter;
/*
      pthread_mutex_lock( &mutex1 );
      cout<<"post" <<lock_id<<" "<<cur_iter<<endl;
      pthread_mutex_unlock( &mutex1 );*/
  }else{
      exit(0);
/*   pthread_mutex_lock( &mutex1 );
      cout<<"try post" <<lock_id<<" "<<cur_iter<<endl;
      pthread_mutex_unlock( &mutex1 );
*/
 }
}

