




#include <boost/graph/adjacency_list.hpp>
#include <boost/tuple/tuple.hpp>
#include <boost/graph/strong_components.hpp>
#include <boost/graph/topological_sort.hpp>
#include <boost/graph/graph_traits.hpp>
#include <boost/dynamic_bitset.hpp>
#include <tr1/unordered_map>

#include "ipa-check-dependence.h"
#include "ipa-union-find.h"

extern "C"
{
#include "tree-ssa-propagate.h"
#include "tree-inline.h"
#include "except.h"
//#include "basic-block.h"
}

/* Edited by HTYU
 * Feedback-based loop parallelization
   The pass fipa-parallel-loops uses the dependence information collected by profiler
   for compiler optimizations such as automatic parallelization, privitization,
   loop optimizations etc.
*/
using namespace std;
//using namespace boost;


#define MAX_THREAD_NUM 8
#define THREAD_NUM 8
#define PERCOLATE_LEVEL 1

#define POINTER_DATA_FILED_NAME  "pointer"
#define POINTER_SPAN_FILED_NAME  "span"
#define THREAD_NUM_NAME  "upp_thread_num"
#define THREAD_ID_NAME   "upp_tid"
#define ITER_ID_NAME  "upp_iter_id"

static tree thread_num_var;
static tree iter_id_var;
static data_dependence_set *dependence_set = NULL;
static UINT32_MAP  clone_dr_map;

typedef std::map<int, data_dependence_set> Dependence_table;
static Dependence_table dependence_table;

typedef std::map<int, std::set<int> > Reference_table;
static Reference_table liveness_table;
static std::set<int> *liveness_set = NULL;


extern "C" 
{
bool try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter);
void cgraph_remove_invalid_call_edge (struct cgraph_node *node);

tree get_reaching_def (tree var);
void mark_phi_for_rewrite (basic_block bb, gimple phi);
void rewrite_update_phi_arguments (basic_block bb);
void rewrite_all_phi_nodes_with_iv (loop_p loop, tree main_iv);
int  initialize_reductions (void **slot, void *data);
void eliminate_local_variables (edge entry, edge exit);
void separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
			  tree *arg_struct, tree *new_arg_struct,
			  struct clsn_data *ld_st_data);
tree create_loop_fn (location_t loc);
void create_call_for_reduction (struct loop *loop, htab_t reduction_list,
			   struct clsn_data *ld_st_data);

tree get_default_def_for (tree sym);

/* Equality and hash functions for hashtab code.  */

int reduction_info_eq (const void *aa, const void *bb);

hashval_t reduction_info_hash (const void *aa);

struct reduction_info *reduction_phi (htab_t reduction_list, gimple phi);

bool try_create_reduction_list (loop_p loop, htab_t reduction_list);
}

static void 
print_dependence_set(const data_dependence_set *dependence_set, const char *name)
{
  FILE *fp = fopen(name, "w");
  for ( data_dependence_set::const_iterator iter = dependence_set->begin(); iter != dependence_set->end(); ++iter)
  {
    const ipa_data_dependency &dep = *iter;

    // ignore loop-independent dependence
    if (dep.distance() == 0)
      continue;
    dep.print (fp, 0);
  }
  fclose(fp);
}



void
cgraph_remove_invalid_edge (struct cgraph_node *node)
{


  basic_block bb;
  FOR_EACH_BB (bb)
  {
    for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    {
      gimple stmt = gsi_stmt (gsi);
      if (!is_gimple_call (stmt))
        continue;

      struct cgraph_edge *e = cgraph_edge (node, stmt);
      if (e)
        e->aux = (void*)1;     
    }
  }


  struct cgraph_edge *e, *f;

  for (e = node->callees; e; e = f)
  {
    f = e->next_callee;
    if (!e->call_stmt)
      cgraph_remove_edge (e);
    else if (!e->aux)
      cgraph_remove_edge (e);
    else
      e->aux = 0;
  }
  
  for (e = node->indirect_calls; e; e = f)
  {
    f = e->next_callee;
    if (!e->call_stmt)
      cgraph_remove_edge (e);
    else if (!e->aux)
      cgraph_remove_edge (e);      
    else
      e->aux = 0;
  }

}

/*
  Get loop exit basic blocks
*/
void get_loop_exit_bbs( loop_p loop, std::set<basic_block> &exit_bbs )
{
  VEC (edge, heap) * exits = get_loop_exit_edges (loop);
  edge e;
  for (int i = 0; VEC_iterate (edge, exits, i, e); i++)
  {
    basic_block bb = e->dest;
    exit_bbs.insert(bb);
  }  
  VEC_free (edge, heap, exits);
}


basic_block loop_exit_bb( loop_p loop )
{
  std::set<basic_block> exit_bbs;
  get_loop_exit_bbs (loop, exit_bbs);
  gcc_assert (exit_bbs.size()==1);  
  return *(exit_bbs.begin());  
}



/* 
   1. If there are more than one entry for the loop,  make it one by splitting the first entry edge and
      redirecting the others to the new BB.  

   2. Similarly, if there are more than one exit for the loop, canonicalize it to be one if possible

*/
   
void
canonicalize_loop_form (struct loop *loop)
{


  edge e;
  edge_iterator i;

  /* Avoid annoying special cases of edges going to exit block.  */
  FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
    if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
      split_edge (e);

  /* do-while loop */
  if (loop->latch == loop->header || EDGE_COUNT (loop->latch->succs) > 1)
  {
    FOR_EACH_EDGE (e, i, loop->header->preds)
      if (e->src == loop->latch)
        break;
    split_edge (e);
  }

  basic_block exit = loop_exit_bb (loop);  
  split_block (exit, NULL);
  return;


#if 0
  std::set<basic_block> exit_bbs;
  VEC (edge, heap) * exits = get_loop_exit_edges (loop);
  for (int i = 0; VEC_iterate (edge, exits, i, e); i++)
  {
    basic_block bb = e->dest;
    exit_bbs.insert(bb);
  }  

  if ( exit_bbs.size() > 1 )
  {
    for (std::set<basic_block>::iterator iter = exit_bbs.begin(); iter != exit_bbs.end(); ++iter)
    {
      basic_block bb = *iter;      
      gimple_seq seq = bb_seq (bb);      
      if ( gimple_seq_empty_p(seq) )
      {
        /* redirect edge to bb to true_exit*/
        basic_block succ = single_succ (bb);
        std::set<edge> preds;        
        FOR_EACH_EDGE (e, i, bb->preds)
          preds.insert(e);

        for (std::set<edge>::iterator iter2 = preds.begin(); iter2 != preds.end(); ++iter2)
        {
          e = redirect_edge_and_branch (*iter2, succ);                        
          rescan_loop_exit (*iter2, false, false);
          rescan_loop_exit (e, false, false);
        }
        
        delete_basic_block (bb);
     
      }
    }
  }
  VEC_free (edge, heap, exits);

  basic_block exit = loop_exit_bb (loop);  
  split_block (exit, NULL);
  #endif
}




#define BUILD_DEPENDENCE 1

#ifdef BUILD_DEPENDENCE 

/* Read dependence result file */
void Read_dependence(const char* finaldepc, const set<int> &loops)
{

  /* collect profiled dependence */  
  FILE *finaldep = fopen (finaldepc, "r");
  if (finaldep)
  {
    char *line = NULL;
    size_t len = 0;
    int bytes_read;
    char str[100];
    while ((bytes_read = getline (&line, &len, finaldep)) != -1)
    {
      sscanf (line, "%s", str);              
      if (strcmp (str, "LOOP") == 0)
      {
        char str_type[100];
        int a, b, distance, loop;        
        /* Try to recognize the form :     
        LOOP  437  WAW       20640 -->      20640   MAY  LOOP CARRIED      DISTANCE 1      */
        sscanf (line, "%*s %d %s %d %*s %d %*s %*s %*s %*s %d", &loop, str_type, &a, &b, &distance);

        if ( loops.find(loop) == loops.end() )
          continue;
        
        ipa_dep_type type;
        if (strcmp (str_type, "RAW") == 0)
          type = id_flow_dd;
        else if (strcmp (str_type, "WAR") == 0)
          type = id_anti_dd;
        else if (strcmp (str_type, "WAW") == 0)
          type = id_output_dd;
        else
          gcc_unreachable();

        
        ipa_data_dependency dep;
        dep.set_source (a);
        dep.set_sink (b);
        dep.set_type (type);
        dep.set_loop (loop);
        dep.set_distance (distance);
        dependence_table[loop].insert(dep);

        
        ipa_data_reference *dra = ipa_get_data_ref(a);
        ipa_data_reference *drb = ipa_get_data_ref(b);

        if ( distance > 0 )
        {
          dra->loop_carried_deps().insert(dep);
          drb->loop_carried_deps().insert(dep);          
        }
        else
        {
          dra->loop_independent_deps().insert(dep);
          drb->loop_independent_deps().insert(dep);          
        }          
      }  
      
      else if (strcmp (str, "MEMOP") == 0)
      {
        char str_type[100];
        int a, b, distance, loop;        
        /* Try to recognize the form :     
            MEMOP  437  LOOP  20640      */
        sscanf (line, "%*s %d %s %d ", &a, &loop);

        if ( loops.find(loop) == loops.end() )
          continue;
        
        liveness_table[loop].insert(a);       
      }
    }
    fclose(finaldep);
  }

//  print_dependence_set (&dependence_table[*loops.begin()], "tmpdep.read");

}

static data_dependence_set *load_dependence(loop_p loop)
{
  if (liveness_table.find(loop_uid(loop)) != liveness_table.end() )
    liveness_set = &liveness_table[loop_uid(loop)];
  else
    liveness_set = NULL;

  
  if (dependence_table.find(loop_uid(loop)) != dependence_table.end() )
    return &dependence_table[loop_uid(loop)];
  else
    return NULL;
  
#if 0
      Build_dependence_graph( "finaldep.info", pdg );
    
      /* Reduce the dependence graph into SCCs */
      vector<int> components(num_vertices(pdg));
      strong_components(pdg, &components[0]);
    
      /* Partition the scc graph by RAW dependence */
      COMPONENT_MAP partitions;
      Partition_dependence_graph(pdg, components, partitions);
#endif    

}

static void
find_consequent_blocks(loop_p loop, basic_block bb, IPA_NODE_SET &nodes, BB_SET &bbs );

static void
find_consequent_blocks(loop_p loop, cgraph_node_ptr node, gimple call, IPA_NODE_SET &nodes, BB_SET &bbs );


/* Find basic blocks or function nodes reachable from start */
static void
find_consequent_blocks(loop_p loop, gimple start, IPA_NODE_SET &nodes, BB_SET &bbs )
{
  basic_block bb = gimple_bb(start);

  gimple_stmt_iterator gsi;
  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  {
    gimple stmt = gsi_stmt (gsi);
    if ( start == stmt )
      break;
  }

  gsi_next (&gsi);
  for (; !gsi_end_p (gsi); gsi_next (&gsi))
  {
    gimple stmt = gsi_stmt (gsi);
    
    if ( gimple_code (stmt) != GIMPLE_CALL )
      continue;
    
    tree callee_decl = gimple_call_fndecl(stmt);
  
    if (callee_decl)
    {
       cgraph_node_ptr callee = cgraph_get_node(callee_decl);
       if ( valid_function_node_p(callee) )
         if (nodes.find(callee) == nodes.end())
         {
           nodes.insert(callee);
           collect_callees (callee, nodes);
         } 
    }
    else
    {
      /* indirect call */           
      struct cgraph_edge *e = cgraph_edge (cgraph_get_node(current_function_decl), stmt);        
      struct nodeList * callees = e->indirect_info->callees;
      while (callees)
      {
        if ( valid_function_node_p(callees->node) )
          if (nodes.find(callees->node) == nodes.end())
          {
            nodes.insert (callees->node);
            collect_callees (callees->node, nodes);
          } 
        callees = callees->next;
      }
    }
  }


  edge e;
  edge_iterator ei;
  FOR_EACH_EDGE (e, ei, bb->succs)
  {  
    basic_block succ = e->dest;
    if (succ == loop->header)
      continue;    
    find_consequent_blocks (loop, succ, nodes, bbs); 
  }


  if ( bb == EXIT_BLOCK_PTR )
  {
    cgraph_node_ptr node = cgraph_get_node(current_function_decl);
    // scan caller
    for (cgraph_edge_p edge = node->callers; edge; edge = edge->next_caller)      
    {
      struct cgraph_node *caller = edge->caller;            
      if ( nodes.find(caller) != nodes.end() )
        continue;
      gimple callstmt = edge->call_stmt;
      switch_to_context (caller->decl);
      find_consequent_blocks (loop, caller, callstmt, nodes, bbs);
      switch_off_context ();
    }   
  }


}


static void
find_consequent_blocks(loop_p loop, basic_block bb, IPA_NODE_SET &nodes, BB_SET &bbs )
{

  if ( bbs.find(bb) != bbs.end() )
    return;
  bbs.insert(bb);  

  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  {
    gimple stmt = gsi_stmt (gsi);
    if ( gimple_code (stmt) != GIMPLE_CALL )
      continue;
    
    tree callee_decl = gimple_call_fndecl(stmt);
  
    if (callee_decl)
    {
       cgraph_node_ptr callee = cgraph_get_node(callee_decl);
       if ( valid_function_node_p(callee) )
         if (nodes.find(callee) == nodes.end())
         {
           nodes.insert(callee);
           collect_callees (callee, nodes);
         } 
    }
    else
    {
      /* indirect call */           
      struct cgraph_edge *e = cgraph_edge (cgraph_get_node(current_function_decl), stmt);        
      struct nodeList * callees = e->indirect_info->callees;
      while (callees)
      {
        if ( valid_function_node_p(callees->node) )
          if (nodes.find(callees->node) == nodes.end())
          {
            nodes.insert (callees->node);
            collect_callees (callees->node, nodes);
          } 
        callees = callees->next;
      }
    }
  }


  edge e;
  edge_iterator ei;
  FOR_EACH_EDGE (e, ei, bb->succs)
  {  
    basic_block succ = e->dest;
    if (succ == loop->header)
      continue;    
    find_consequent_blocks (loop, succ, nodes, bbs); 
  }


  if ( bb == EXIT_BLOCK_PTR )
  {
    cgraph_node_ptr node = cgraph_get_node(current_function_decl);
    // scan caller
    for (cgraph_edge_p edge = node->callers; edge; edge = edge->next_caller)      
    {
      struct cgraph_node *caller = edge->caller;            
      if ( nodes.find(caller) != nodes.end() )
        continue;
      gimple callstmt = edge->call_stmt;
      find_consequent_blocks (loop, caller, callstmt, nodes, bbs);
    }   
  }


}


static void
find_consequent_blocks(loop_p loop, cgraph_node_ptr node, gimple call, IPA_NODE_SET &nodes, BB_SET &bbs )
{
  // collect local basic blocks
  switch_to_context (node->decl);
  find_consequent_blocks (loop, call, nodes, bbs);
  switch_off_context ();
}




static void
find_consequent_blocks(loop_p loop, IPA_NODE_SET &nodes, BB_SET &bbs )
{
  switch_to_context (loop_func_decl(loop));
  
  /* Start traversing from exit blocks */
  VEC (edge, heap) * exits = get_loop_exit_edges (loop);
  edge e;
  for (int i = 0; VEC_iterate (edge, exits, i, e); i++)
  {
    basic_block bb = e->dest;
    find_consequent_blocks (loop, bb, nodes, bbs);
  }  
  VEC_free (edge, heap, exits);

  switch_off_context ();
}


static bool
live_after_loop( ipa_data_reference_p dr, loop_p loop)
{
  if (!liveness_set)
    return false;

  return liveness_set->find(dr->uid()) != liveness_set->end();
    
  /* Find procedures and basic blocks from the end of loop to program exit */
  IPA_NODE_SET nodes;
  BB_SET bbs;
  find_consequent_blocks (loop, nodes, bbs);


  /* Check aliases in each bb */
  for (BB_SET::iterator iter = bbs.begin(); iter != bbs.end(); ++iter)
  {
    basic_block bb = *iter;
    tree func_decl = bb_func(bb);
    //switch_to_context (func_decl);
    for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    {
      gimple stmt = gsi_stmt (gsi);
      enum gimple_code stmt_code = gimple_code (stmt);
      ipa_data_reference_p drt;
      switch (stmt_code)
      {
    	  case GIMPLE_ASSIGN:
        {
    			drt = tree_data_reference (stmt, 1);
    			if (drt && ipa_dr_may_alias_p (dr, drt) )
            return true;
    			drt = tree_data_reference (stmt, 2);
    			if (drt && ipa_dr_may_alias_p (dr, drt) )
            return true;
    			break;
        }

    	  case GIMPLE_COND:
        {
    			drt = tree_data_reference (stmt, 0);
    			if (drt && ipa_dr_may_alias_p (dr, drt) )
            return true;
    			drt = tree_data_reference (stmt, 1);
    			if (drt && ipa_dr_may_alias_p (dr, drt) )
            return true;

      		break;
        }
        
        case GIMPLE_SWITCH:
        {
    			drt = tree_data_reference (stmt, 0);
    			if (drt && ipa_dr_may_alias_p (dr, drt) )
            return true;
          break;
        }

    	  case GIMPLE_CALL:
        {
    			int i;
    			int n = gimple_call_num_args (stmt);

    			for (i = 0; i < n; i++)
    		  {
            drt = tree_data_reference (stmt,  i + 3);
            if (drt && ipa_dr_may_alias_p (dr, drt) )
              return true;
    		  }
    			break;
        }

        case GIMPLE_RETURN :
        {
    			drt = tree_data_reference (stmt, 0);
    			if (drt && ipa_dr_may_alias_p (dr, drt) )
            return true;
          break;
        }
        
    	  default:
    	    break;
      }
    }    
   // switch_off_context ();
  }

  for (IPA_NODE_SET::iterator iter = nodes.begin(); iter != nodes.end(); ++iter)
  {
    cgraph_node_ptr node = *iter;
    switch_to_context (node->decl);
    basic_block bb;
    FOR_EACH_BB (bb)
    {
      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
      {
        gimple stmt = gsi_stmt (gsi);
        enum gimple_code stmt_code = gimple_code (stmt);
        ipa_data_reference_p drt;
        switch (stmt_code)
        {
      	  case GIMPLE_ASSIGN:
          {
      			drt = tree_data_reference (stmt, 1);
      			if (drt && ipa_dr_may_alias_p (dr, drt) )
              goto LTRUE;
      			drt = tree_data_reference (stmt, 2);
      			if (drt && ipa_dr_may_alias_p (dr, drt) )
              goto LTRUE;
      			break;
          }

      	  case GIMPLE_COND:
          {
      			drt = tree_data_reference (stmt, 0);
      			if (drt && ipa_dr_may_alias_p (dr, drt) )
              goto LTRUE;
      			drt = tree_data_reference (stmt, 1);
      			if (drt && ipa_dr_may_alias_p (dr, drt) )
              goto LTRUE;

        		break;
          }
          
          case GIMPLE_SWITCH:
          {
      			drt = tree_data_reference (stmt, 0);
      			if (drt && ipa_dr_may_alias_p (dr, drt) )
              goto LTRUE;
            break;
          }

      	  case GIMPLE_CALL:
          {
      			int i;
      			int n = gimple_call_num_args (stmt);

      			for (i = 0; i < n; i++)
      		  {
              drt = tree_data_reference (stmt,  i + 3);
              if (drt && ipa_dr_may_alias_p (dr, drt) )
                goto LTRUE;
      		  }
      			break;
          }

          case GIMPLE_RETURN :
          {
      			drt = tree_data_reference (stmt, 0);
      			if (drt && ipa_dr_may_alias_p (dr, drt) )
              goto LTRUE;
            break;
          }
          
      	  default:
      	    break;
        }
      }
    }
    switch_off_context ();

  }
  
  return false;
  
  LTRUE: switch_off_context ();
    return true;
}


class dep_class_rep;

class dep_class_member : public U_F_ELEMENT<dep_class_member> 
{
private:
   int    _base_id;  // the dr id

public:
  dep_class_member(int base_id) :
    _base_id(base_id)
      { }

  int  base_id() const
  {
    return _base_id;
  }

  void    Set_base_id(int id)
  {
    _base_id = id;
  }

  dep_class_rep *dep_class()
  { return (dep_class_rep *) Find(); }


};


// The C++ class representing an alias class
class dep_class_rep : public U_F_REP<dep_class_member> 
{

private:
  int _id;
  

public:

  bool has_loop_carried_raw;
  bool has_loop_carried_war;
  bool has_loop_carried_waw;
  bool live_after_loop;

  dep_class_rep(int id) 
   { BZERO(this, sizeof(dep_class_rep));}
   
  int id() const { return _id; }

};

typedef  std::tr1::unordered_map < int, dep_class_member* > DEP_MEMBER_HASH;
typedef  std::tr1::unordered_map < int, dep_class_rep* > DEP_CLASS_HASH;
typedef  std::list< dep_class_rep* > DEP_CLASS_LIST;


DEP_MEMBER_HASH   dep_base_id_map;



class dep_classifiction
{
private:

  DEP_MEMBER_HASH   _base_id_map;
  DEP_CLASS_LIST    _acr_list;
  DEP_CLASS_LIST    _free_list;
  int               _last_id_used;
    

public: 

  DEP_CLASS_HASH   dr_class_map;
  
  dep_classifiction() :
    _last_id_used(0)
  {}
    
  ~dep_classifiction() 
  {
    /* release dep class member */
    for ( DEP_MEMBER_HASH::iterator iter = _base_id_map.begin(); iter != _base_id_map.end(); ++iter )
      delete iter->second; 

    /* release dep class */
    for ( DEP_CLASS_LIST::iterator iter = _acr_list.begin(); iter != _acr_list.end(); ++iter )
      delete *iter;     
  }

  void clear() 
  {
    /* release dep class member */
    for ( DEP_MEMBER_HASH::iterator iter = _base_id_map.begin(); iter != _base_id_map.end(); ++iter )
      delete iter->second; 

    /* release dep class */
    for ( DEP_CLASS_LIST::iterator iter = _acr_list.begin(); iter != _acr_list.end(); ++iter )
      delete *iter; 

    dr_class_map.clear();
    _base_id_map.clear();
    _acr_list.clear();
    _free_list.clear();
    _last_id_used = 0;
  }
  
  dep_class_rep*      New_dep_class(dep_class_member &acm) ;
  void                Free_dep_class(dep_class_rep *acr);  
  void                Classify_memops(FILE *fp);
  void                Classify_memop(int a, int b);
  void                Write_back();
  void                Print(FILE *fp);

};


dep_class_rep*
dep_classifiction::New_dep_class(dep_class_member &acm) 
{
  dep_class_rep *retval;

  if ( !_free_list.empty() ) 
  {
    retval = _free_list.front();
    _free_list.pop_front();

    // Destruct and reconstruct.
    retval->~dep_class_rep();
    new(retval) dep_class_rep(++_last_id_used);
  }
  else {
    retval = new dep_class_rep(++_last_id_used);
    _acr_list.push_back (retval);
  }

  acm.Put_in_set(retval);
  return retval;
}


void
dep_classifiction::Free_dep_class(dep_class_rep *acr) 
{
  _free_list.push_front(acr);
}

void
dep_classifiction::Classify_memop(int a, int b)
{

  if (a == b )
    return ;

  dep_class_member* &member_a = _base_id_map[a];
  dep_class_member* &member_b = _base_id_map[b];
  
  if ( member_a == NULL )
  {
    member_a = new dep_class_member (a);
    New_dep_class(*member_a);        
  }

  if ( member_b == NULL )
  {
    member_b = new dep_class_member (b);
    New_dep_class(*member_b);
  }

  dep_class_rep *class_a = member_a->dep_class();
  dep_class_rep *class_b = member_b->dep_class();

  if ( class_a != class_b )
  {
    dep_class_rep *result = (dep_class_rep *) class_a->Union(*class_b);
    dep_class_rep *non_result = (result == class_a ? class_b : class_a);
    Free_dep_class (non_result);
  }
   
}


void
dep_classifiction::Write_back()
{
  /* write back */
  for ( DEP_MEMBER_HASH::iterator iter = _base_id_map.begin(); iter != _base_id_map.end(); ++iter )
  {
    int dr_id = iter->first;
    dr_class_map[dr_id] = iter->second->dep_class();     
  }
}


static dep_classifiction dpc;


/* Union memory ops that has loop-independent dependencies */  
static void
partition_dependence(loop_p loop)
{
  ipa_update_bb_info ();
 // print_dependence_set (dependence_set, "tmpdep.part");

  dpc.clear();
  
  if (!dependence_set)
    return;

  /* Union memory ops that has loop-independent dependencies */  
  for ( data_dependence_set::iterator iter = dependence_set->begin(); iter != dependence_set->end(); ++iter)
  {
    const ipa_data_dependency &dep = *iter;
    if ( dep.distance() > 0 )
      continue;
    
    int a = dep.source();
    int b = dep.sink();    
    dpc.Classify_memop (a, b);    
  }

  dpc.Write_back();

  /* Set properties of each partition */
  for ( data_dependence_set::iterator iter = dependence_set->begin(); iter != dependence_set->end(); ++iter)
  {
    const ipa_data_dependency &dep = *iter;
    if ( dep.distance() == 0 )
      continue;
    
    int a = dep.source();
    int b = dep.sink();    
    ipa_data_reference *dra = ipa_get_data_ref(a);
    ipa_data_reference *drb = ipa_get_data_ref(b);

    if (dpc.dr_class_map.find(b) != dpc.dr_class_map.end())
    {
      dep_class_rep *rep = dpc.dr_class_map[b];
      if ( dep.type() == id_flow_dd )
        rep->has_loop_carried_raw = true;
      else if ( dep.type() == id_anti_dd )
        rep->has_loop_carried_war = true;
      else if ( dep.type() == id_output_dd )
        rep->has_loop_carried_waw = true;  

      // check liveness after loop
      if ( dra->is_virtual() || drb->is_virtual() )
        rep->live_after_loop = true;
      if ( live_after_loop(drb, loop) )
        rep->live_after_loop = true;      
    }
  }

}


struct vertex_type {
  typedef boost::vertex_property_tag kind;
};

struct edge_type {
  typedef boost::edge_property_tag kind;
};

typedef boost::property<vertex_type, int> VertexProperty;
typedef boost::property<edge_type, int> EdgeProperty;


typedef boost::adjacency_list< boost::multisetS, boost::vecS, boost::bidirectionalS, 
                              VertexProperty, EdgeProperty > DEPENDENCE_GRAPH;
typedef boost::graph_traits<DEPENDENCE_GRAPH>::edge_descriptor edge_descriptor;
typedef boost::graph_traits<DEPENDENCE_GRAPH>::vertex_descriptor vertex_descriptor;
typedef boost::graph_traits<DEPENDENCE_GRAPH>::out_edge_iterator out_edge_iterator;


/* Step 1. Build preliminary dependence graph from dependence profiling */
void Build_dependence_graph(data_dependence_set *dependence_set, DEPENDENCE_GRAPH &pdg)
{
  unsigned int i;
  bitmap_iterator bi;

  if (!dependence_set)
    return;

  /* Add nodes */
  for (data_dependence_set::iterator iter = dependence_set->begin(); iter != dependence_set->end(); ++iter)
  {
    const ipa_data_dependency &dep = *iter;
    boost::add_edge(dep.source(), dep.sink(), EdgeProperty(dep.distance()), pdg);        
  }       
}



bool dr_has_no_loop_carried_dependence(ipa_data_reference * dr)
{
  if (!dr )
    return true;
 
  if (dpc.dr_class_map.find(dr->uid()) == dpc.dr_class_map.end())
    return true;
   
  return dpc.dr_class_map[dr->uid()];
}


bool dr_has_loop_carried_flow_dependence(ipa_data_reference * dr)
{
  if (!dr )
    return false;
  data_dependence_set &deps = dr->loop_carried_deps();
  for ( data_dependence_set::iterator iter = deps.begin(); iter != deps.end(); ++iter )
    if ( iter->type() == id_flow_dd )
      return true;

  return false;

}

bool dr_has_loop_carried_anti_dependence(ipa_data_reference * dr)
{
  if (!dr )
    return false;
  data_dependence_set &deps = dr->loop_carried_deps();
  for ( data_dependence_set::iterator iter = deps.begin(); iter != deps.end(); ++iter )
    if ( iter->type() == id_anti_dd )
      return true;

  return false;
}

bool dr_has_loop_carried_output_dependence(ipa_data_reference * dr)
{
  if (!dr )
    return false;
  data_dependence_set &deps = dr->loop_carried_deps();
  for ( data_dependence_set::iterator iter = deps.begin(); iter != deps.end(); ++iter )
    if ( iter->type() == id_output_dd )
      return true;

  return false;
}
#endif

/* Create a global thread_num variable and initialized it at the entry of main function */
static void 
create_and_initialize_thread_num_var()
{
 
  /* Build variable declaration */
  thread_num_var = ipa_add_new_external_global (unsigned_type_node, get_identifier (THREAD_NUM_NAME));
  insert_global_to_varpool (thread_num_var);

  /* Initialize it, 
     thread_num = atoi(argv[argc]);
  */  
  
  struct cgraph_node *node;
  for (node = cgraph_nodes; node; node = node->next)
  {
    if (!valid_function_node_p (node))
      continue;
  
    if ( strcmp (cgraph_node_name (node), "main") == 0 )
    {
      switch_to_context(node->decl);

      basic_block bb = ENTRY_BLOCK_PTR;
      gimple_seq seq = bb_seq (bb);      
      if ( gimple_seq_empty_p(seq) )
      {
        bb = single_succ(bb);
        seq = bb_seq (bb);
      }

      gimple_stmt_iterator gsi = gsi_start_bb (bb);
    

      // argc
      tree argc = DECL_ARGUMENTS (node->decl);       
      tree argv = DECL_CHAIN (argc);

      // argc
      if ( is_gimple_reg(argc) )
      {
        tree var = argc;
        argc = gimple_default_def (cfun, var);
        if (!argc)
        {
          argc = make_ssa_name (var, gimple_build_nop ());        
  	      set_default_def (var, argc);          
          add_referenced_var (var);
        }
      }

      // argv
      if ( is_gimple_reg(argv) )
      {
        tree var = argv;
        argv = gimple_default_def (cfun, var);
        if (!argv)
        {
          argv = make_ssa_name (var, gimple_build_nop ());        
          set_default_def (var, argv);
          add_referenced_var (var);
        }
      }

      // argc - 1;
      argc = gimplify_build2 (&gsi, MINUS_EXPR, TREE_TYPE(argc), argc, integer_one_node);

      tree type_pointed = TREE_TYPE( TREE_TYPE(argv) );      
      tree offset =	build1_stat (CONVERT_EXPR, size_type_node, argc);      
      tree size = build1 (SIZEOF_EXPR, size_type_node, type_pointed); 
      offset = gimplify_build2 (&gsi, MULT_EXPR, size_type_node, offset, size);
      tree addr = gimplify_build2 (&gsi, POINTER_PLUS_EXPR, TREE_TYPE(argv), argv, offset);
      addr = force_gimple_operand_gsi (&gsi, addr, true, NULL, true, GSI_SAME_STMT);  
      // argv[argc-1]
      tree rhs = build_simple_mem_ref(addr);      
      rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  

      //  atoi(argv[argc])
      tree atoi_ty = build_function_type_list (integer_type_node, 
                                                ptr_type_node,  /* char *str */
                                               NULL_TREE);
      tree atoi_fn = build_fn_decl ("atoi", atoi_ty);
      gimple atoi_call = gimple_build_call (atoi_fn, 1, rhs, NULL_TREE);
      tree lhs = build_decl (UNKNOWN_LOCATION, VAR_DECL, NULL_TREE, integer_type_node);
      DECL_ARTIFICIAL(lhs) = 1;
      gimple_add_tmp_var (lhs);
      add_referenced_var (lhs);
      gimple_call_set_lhs (atoi_call, lhs);      
      gsi_insert_before (&gsi, atoi_call, GSI_SAME_STMT);          
      mark_symbols_for_renaming (atoi_call);

      rhs = build1 (CONVERT_EXPR, unsigned_type_node, lhs);      
      rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  

      if (THREAD_NUM)
        rhs = build_int_cst (unsigned_type_node, THREAD_NUM);

      gimple assign = gimple_build_assign (thread_num_var, rhs);
      gsi_insert_before (&gsi, assign, GSI_SAME_STMT);          
      add_referenced_var (thread_num_var);  
      mark_symbols_for_renaming (assign);
      

      update_ssa (TODO_update_ssa);
      switch_off_context();
      
    }
  }


  // New thread id var structure
  for (node = cgraph_nodes; node; node = node->next)
  {
    if (!valid_function_node_p (node))
      continue;

    switch_to_context(node->decl);

    tree tid = build_decl (UNKNOWN_LOCATION, VAR_DECL, get_identifier (THREAD_ID_NAME), unsigned_type_node);
    DECL_ARTIFICIAL (tid) = 1;
    gimple_add_tmp_var (tid);
    add_referenced_var (tid);
    
   
    basic_block bb = ENTRY_BLOCK_PTR;
    gimple_seq seq = bb_seq (bb);      
    if ( gimple_seq_empty_p(seq) )
    {
      bb = single_succ(bb);
      seq = bb_seq (bb);
    }
    
    gimple_stmt_iterator gsi = gsi_start_bb (bb);    
    tree tid_func = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
    gimple call = gimple_build_call (tid_func, 0, NULL_TREE);    
    tree lhs = build_decl (UNKNOWN_LOCATION, VAR_DECL, NULL_TREE, integer_type_node);
    DECL_ARTIFICIAL(lhs) = 1;
    gimple_add_tmp_var (lhs);
    add_referenced_var (lhs);
    gimple_call_set_lhs (call, lhs);
    gsi_insert_before (&gsi, call, GSI_SAME_STMT);
    mark_symbols_for_renaming (call);
    
    tree rhs = build1 (CONVERT_EXPR, unsigned_type_node, lhs);      
    rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
    gimple assign = gimple_build_assign (tid, rhs);
    gsi_insert_before (&gsi, assign, GSI_SAME_STMT);          
    mark_symbols_for_renaming (assign);
    func_set_thread_id (node, tid);

    update_ssa (TODO_update_ssa);
    switch_off_context();

  }


  /* Create iteration id array */
  {
    tree t = build_index_type (build_int_cst (NULL_TREE, MAX_THREAD_NUM)); 
    tree type = build_array_type (unsigned_type_node, t);
    iter_id_var = ipa_add_new_external_global (type, get_identifier (ITER_ID_NAME));
    insert_global_to_varpool (iter_id_var);
  }

  
}


void canonicalize_gimple_call()
{

  struct cgraph_node *node;
  for (node = cgraph_nodes; node; node = node->next)
  {

    if (!valid_function_node_p (node))
      continue;

    switch_to_context(node->decl);
    basic_block bb;
    FOR_EACH_BB(bb)
    {          
      
      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
      {
        gimple stmt = gsi_stmt (gsi);

        if (!is_gimple_call(stmt))
          continue;

        tree func_decl = gimple_call_fndecl(stmt);
      
        for (int i = 0; i < gimple_call_num_args (stmt); i++)
        {
          tree rhs = gimple_call_arg (stmt, i);

          switch (TREE_CODE(rhs))
          {
            case INTEGER_CST :
            {
              if ( func_decl == built_in_decls[BUILT_IN_VA_START] )
                break;
            }
            case ADDR_EXPR :
            case SIZEOF_EXPR :
            {
              rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
              tree lhs = build_decl (UNKNOWN_LOCATION, VAR_DECL, NULL_TREE, TREE_TYPE(rhs));
              DECL_ARTIFICIAL(lhs) = 1;
              gimple_add_tmp_var (lhs);
              add_referenced_var (lhs);
              gimple assign = gimple_build_assign (lhs, rhs);
              mark_symbols_for_renaming (assign);
              gsi_insert_before (&gsi, assign, GSI_SAME_STMT);         
              gimple_call_set_arg (stmt, i, lhs);            
              mark_symbols_for_renaming (stmt);
              break;
            }

            case SSA_NAME : 
              break;
            
            default:
            {
              tree type = TREE_TYPE(rhs);
              if (TREE_CODE(type) == POINTER_TYPE)
              {
                rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
                gimple_call_set_arg (stmt, i, rhs);            
                mark_symbols_for_renaming (stmt);
              }
              break;
            }
          }          
          
        }

      }    
    }
    update_ssa (TODO_update_ssa);
    switch_off_context();

  }

  
}



class PARTITION
{
public:
  bool     inorder;
  bool     commutative;
  bool     reduce;
  gimple   firststmt;
  gimple   laststmt;
  set<gimple>  stmts;    // gimple set

};



#define SYNCHRONIZATION 1
#if SYNCHRONIZATION

class synchronization
{
  public:
  int    id;
  gimple post; // dr id
  int    distance;
  GIMPLE_SET waits;
  static int _num;
  static tree _post_action;
  static tree _wait_action;
  
  synchronization() 
  {  }

  synchronization( gimple src, int dist )  :
    post(src),
    distance(dist)
  {   }

  bool operator<(const synchronization &s) const 
  { if (post < s.post)
      return true;
    else if (post == s.post) 
      return distance < s.distance;
    else
      return false;
  }   

};

typedef std::set< synchronization > Synchronization_set;

int synchronization::_num=0;

static tree syn_list;
static int syn_count = 0;

static void
insert_begin_sync (gimple_stmt_iterator * gsi)
{

  tree init_fn_type = build_function_type_list (void_type_node, ptr_type_node, integer_type_node, NULL_TREE);
  tree tree_printf_profiler_fn = build_fn_decl ("_init_lock_array", init_fn_type);
  tree t_syn_count = build_int_cst_type (integer_type_node, syn_count+1);
  tree address = gimplify_build1 (gsi, ADDR_EXPR, build_pointer_type (TREE_TYPE(syn_list)), syn_list);
  gimple call = gimple_build_call (tree_printf_profiler_fn, 2, address, t_syn_count);
  gimple_set_block (call, gimple_block(gsi_stmt(*gsi)));
  gsi_insert_before (gsi, call, GSI_SAME_STMT);
  ipa_add_abnormal_goto_call_edges (*gsi);
}



static void
insert_exit_sync (gimple_stmt_iterator * gsi)
{

  tree final_fn_type = build_function_type_list (void_type_node, NULL_TREE);
  tree tree_printf_profiler_fn = build_fn_decl ("_destruct_lock_array", final_fn_type);
  gimple call = gimple_build_call (tree_printf_profiler_fn, 0);
  gimple_set_block (call, gimple_block(gsi_stmt(*gsi)));
  gsi_insert_before (gsi, call, GSI_SAME_STMT);
  ipa_add_abnormal_goto_call_edges (*gsi);

}

/*
  int syn_list[syn_count]
*/

static tree
build_initializer_for_syn_list (tree type, const Synchronization_set & synchronization_set)
{
  tree val = make_node (TREE_LIST);  
  TREE_PURPOSE (val) = build_int_cst_type (integer_type_node, 0); 
  TREE_VALUE (val) = build_int_cst_type (integer_type_node, 1) ;      

  tree last = val;
  int i = 1; 
  for (Synchronization_set::const_iterator iter = synchronization_set.begin (); 
       iter != synchronization_set.end ();  ++iter, ++i)   
  {
    tree vec_val = make_node (TREE_LIST);  
    TREE_PURPOSE (vec_val) = build_int_cst_type (integer_type_node, i); 
    TREE_VALUE (vec_val) = build_int_cst_type (integer_type_node, iter->distance) ;      
    TREE_CHAIN (last) = vec_val;     
    last = vec_val;
  }

  return build_constructor_from_list (type, val);

}


#if 0
static void
find_post_positions(gimple src, const IPA_NODE_INT_MAP &node_level_map, GIMPLE_SET &results)
{
  basic_block bb = gimple_bb(src) ;
  tree decl = bb_func( bb );
  cgraph_node_ptr node = cgraph_get_node(decl);
  gcc_assert (node_level_map[node] != 0);

  GIMPLE_SET candidates;
  gimple tmp = src;

  if ( node_level_map[node] == 1 )
    results.insert(src);
  else  
  {
    cgraph_edge_p edge = node->callers;
    gcc_assert (!edge->next_caller);
    node = edge->caller;
    tmp = edge->call_stmt;
    find_latest_post_position (tmp);
  }

}
#endif

/* 
   Find proper syc-placed position for the post. E.g.
   for (...)
   {
     for (...)
       source
     post source here
   }

   To sum up, the position for placing post/wait is the point of the main loop level to enter
   the construct the source/sink is nested in.
*/
typedef boost::adjacency_list< boost::multisetS, boost::vecS, boost::bidirectionalS, 
                              VertexProperty, EdgeProperty > SYNC_GRAPH;

typedef boost::graph_traits<SYNC_GRAPH>::edge_descriptor SYNC_EDGE;
typedef boost::graph_traits<SYNC_GRAPH>::vertex_descriptor SYNC_VERTEX;
typedef boost::graph_traits<SYNC_GRAPH>::out_edge_iterator SYNC_OEDGE_ITER;
typedef boost::graph_traits<SYNC_GRAPH>::in_edge_iterator SYNC_IEDGE_ITER;
typedef boost::graph_traits<SYNC_GRAPH>::vertex_iterator SYNC_VERTEX_ITER;
typedef boost::graph_traits<SYNC_GRAPH>::adjacency_iterator SYNC_ADJ_ITER;
typedef boost::graph_traits<SYNC_GRAPH>::edge_iterator SYNC_EDGE_ITER;
typedef enum {NULL_TYPE = 0, LOOP_TYPE, BB_TYPE} VERTEX_TYPE;

class disjoint_set{
public:
    int 
    find(int rep){
        if(nodes[rep] != rep){
            return nodes[rep] = find(nodes[rep]);
        }else{
            return rep;
        }
    }

    int 
    unite(int root, int from){
        int a = find(root);
        int b = find(from);
        if(a != b){
            nodes[a] = b;
        }
        return b;
    }

    void
    init(int size){
        int i;
        nodes.reserve(size+1);
        for(i = 0; i <= size; i ++)
            nodes[i] = i;
    }
private:
    std::vector<int> nodes;
};


class MISC_GRAPH : public SYNC_GRAPH{
public:
    std::vector < int > dom_depth, pdom_depth;
//    SYNC_GRAPH dom_tree;
//    SYNC_GRAPH pdom_tree;    
    MISC_GRAPH (){
        exit = 1;
    }
    ~MISC_GRAPH (){
        SYNC_VERTEX_ITER idx, end;
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            delete dom_mask[*idx];            
            delete pdom_mask[*idx];
        }
    }
    void build_dom_tree(){
        int node;
        SYNC_VERTEX_ITER idx, end;
        boost::dynamic_bitset<> tmp(num_vertices(*this));
        
        dom_mask.reserve(num_vertices(*this));
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            dom_mask[*idx] = new boost::dynamic_bitset<>(num_vertices(*this));
            dom_mask[*idx]->set();
        }
        dom_mask[entry]->reset();
        dom_mask[entry]->set(entry, true);
        bool nochange_flag = false;

        while(nochange_flag == false){
            nochange_flag = true;
            for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
                node = *idx;
                if(node != entry){
                    tmp.set();
                    SYNC_IEDGE_ITER in_i, in_end;
                    tie(in_i, in_end) = in_edges(node, *this);
                    for (; in_i != in_end; ++in_i) {
                        SYNC_VERTEX u, v;
                        SYNC_EDGE e = *in_i;
                        u = source(e, *this);
                        tmp = tmp & *dom_mask[u];
                    }            
                    tmp.set(node, true);
                    if(tmp != *dom_mask[node]){
                        *dom_mask[node] = tmp;
                        nochange_flag = false;
                    }
                }
            }
        }

        dom_depth.reserve(num_vertices(*this));
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            dom_depth[*idx] = dom_mask[*idx]->count();
            cerr << "dom_mask " << *idx << " = " << *dom_mask[*idx] << ", count = " << dom_depth[*idx] << endl;
        }

    }
    void build_pdom_tree(){
        int node;
        SYNC_VERTEX_ITER idx, end;
        boost::dynamic_bitset<> tmp(num_vertices(*this));
        
        pdom_mask.reserve(num_vertices(*this));
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            pdom_mask[*idx] = new boost::dynamic_bitset<>(num_vertices(*this));
            pdom_mask[*idx]->set();
        }
        pdom_mask[exit]->reset();
        pdom_mask[exit]->set(exit, true);
        bool nochange_flag = false;

        while(nochange_flag == false){
            nochange_flag = true;
            for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
                node = *idx;
                if(node != exit){
                    tmp.set();
                    SYNC_OEDGE_ITER out_i, out_end;
                    tie(out_i, out_end) = out_edges(node, *this);
                    for (; out_i != out_end; ++out_i) {
                        SYNC_VERTEX u, v;
                        SYNC_EDGE e = *out_i;
                        u = target(e, *this);
                        tmp = tmp & *pdom_mask[u];
                    }            
                    tmp.set(node, true);
                    if(tmp != *pdom_mask[node]){
                        *pdom_mask[node] = tmp;
                        nochange_flag = false;
                    }
                }
            }
        }

        pdom_depth.reserve(num_vertices(*this));
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            pdom_depth[*idx] = pdom_mask[*idx]->count();
            cerr << "pdom_mask " << *idx << " = " << *pdom_mask[*idx] << ", count = " << pdom_depth[*idx] << endl;
        }
    }
/*
    std::vector< boost::dynamic_bitset<> * >
    compute_DF(int node){
        boost::dynamic_bitset<> S(num_vertices(*this));
        S.reset();
        SYNC_OEDGE_ITER out_i, out_end;
        tie(out_i, out_end) = out_edges(node, *this);
        for (; out_i != out_end; ++out_i) {
            SYNC_VERTEX u, v;
            SYNC_EDGE e = *out_i;
            u = target(e, *this);
            if(idom_p(u, m))
                S.set(u, true);
        }            
    }
*/ 
    int 
    ipdom(int node){
        SYNC_VERTEX_ITER idx, end;
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            if(ipdom_p(*idx, node))
                return *idx;
        }
    }
   
    bool dom_p(int a, int b) {return dom_mask[b]->test(a);} // return true if a dominates b //
    bool idom_p(int a, int b) {return (dom_mask[b]->test(a) && dom_depth[b] - dom_depth[a] == 1);}
    bool pdom_p(int a, int b) {return pdom_mask[b]->test(a);} // return true if a post-dominates b //
    bool ipdom_p(int a, int b) {return (pdom_mask[b]->test(a) && pdom_depth[b] - pdom_depth[a] == 1);}
    bool dom_exit_p(int a) {return dom_p(a, exit);}
    void set_entry(int vertex) {entry = vertex;}
    void set_exit(int vertex) {exit = vertex;}
    bool isEntry(int node) {return entry == node;}
    bool isExit(int node) {return exit == node;}
    int entry, exit;    
private:
    std::vector< boost::dynamic_bitset<> * > dom_mask, pdom_mask;
};

class SYNC_VERTEX_INFO{
public:
    VERTEX_TYPE type;
    std::set<int> loop_BB_list;
};

class MY_SYNC_GRAPH : public MISC_GRAPH{
public:
    void initMap(int size){ InfoMap.reserve(size + 1); rep.init(size); memset(&InfoMap[0], 0, (size+1)*sizeof(SYNC_VERTEX_INFO));}
    int find(int num) {return rep.find(num);}
    int unite(int a, int b){return rep.unite(a, b);}
    void insertMap(int id, SYNC_VERTEX_INFO& info){ InfoMap[id] = info; }

    void map_GCC_BBs_into_my_graph(struct loop *loop){
        SYNC_VERTEX_INFO vertex;
    
        basic_block src, dest;
        struct edge_def *e;
        struct edge_iter ei;

        int loop_header_depth = loop->header->loop_depth;
        set_entry(loop->header->index);
        fprintf(stderr, "loop header = %d, depth = %d\n", loop->header->index, loop->header->loop_depth);
        
        initMap(n_basic_blocks);
        basic_block *loop_blocks = get_loop_body(loop);  

        for (unsigned int i = 0; i < loop->num_nodes; i++){
            basic_block src = loop_blocks[i];


            gcc_assert(src == BASIC_BLOCK(src->index));

            if(src->succs != NULL || src->preds != NULL){
                if(src->loop_depth == loop_header_depth){
                    vertex.type = BB_TYPE;
                }else{
                    vertex.type = LOOP_TYPE;
                }

                if(src->succs != NULL){
                    FOR_EACH_EDGE (e, ei, src->succs){
                        dest = e->dest;
                        add_edge(src->index, dest->index, *this);
                        if(vertex.type == LOOP_TYPE)
                            if(dest->loop_depth > loop_header_depth)
                                unite(src->index, dest->index);
                    }
                }             
                insertMap(src->index, vertex);
            }
        }
        // Pretend exit node as a loop type //
        // It recodrs a set of nodes pointing to exit in the future//
        vertex.type = LOOP_TYPE;
        insertMap(exit, vertex);
    }
    void simplify_to_DAG(struct loop *loop){
        int node;
        fprintf(stderr, "Simplify graph to be DAG\n");
        SYNC_VERTEX_ITER idx, end;
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            node = *idx;
            fprintf(stderr, "  !!!Vertex(%d):\n ", node);            
            if(isBB(node)){
                SYNC_OEDGE_ITER out_i, out_end;
                for (tie(out_i, out_end) = out_edges(node, *this); out_i != out_end; ++out_i) {
                    SYNC_VERTEX u, v;
                    SYNC_EDGE e = *out_i;
                    v = target(e, *this);
                    if(v != find(v)){
                        gcc_assert(isLOOP(v));
                        fprintf(stderr, "\t!!! delete %d -> %d\n", node, v);
                        fprintf(stderr, "\t!!! add %d -> %d\n", node, find(v));
                        add_edge(node, find(v), *this);
                        boost::remove_edge(e, *this);
                    }
                }
            }else if(isLOOP(node)){
                SYNC_OEDGE_ITER out_i, out_end;
                for (tie(out_i, out_end) = out_edges(node, *this); out_i != out_end; ++out_i) {
                    SYNC_VERTEX u, v;
                    SYNC_EDGE e = *out_i;
                    u = source(e, *this);
                    v = target(e, *this);
                    if(isLOOP(v)){
                        fprintf(stderr, "\t!!! delete %d -> %d\n", node, v);
                        boost::remove_edge(e, *this);
                        add_BB_to_loop(node, v);
                    }else if(isBB(v) && node != find(node)){
                        fprintf(stderr, "\t@@@ delete %d -> %d\n", node, v);
                        fprintf(stderr, "\t@@@ add %d -> %d\n", find(node), v);
                        add_edge(find(node), v, *this);
                        boost::remove_edge(node, v, *this);
                    }
                }                
            }
        }

        delete_back_edge();
       
        build_dom_tree();
        build_pdom_tree();
    }

    void print_edges(FILE *dump_file){
        SYNC_EDGE_ITER ai, a_end;
        fprintf(dump_file, "dump all edges:\n");
        for (boost::tie(ai, a_end) = edges(*this); ai != a_end; ++ai) {
            SYNC_VERTEX u, v;
            SYNC_EDGE e = *ai;
            u = source(e, *this);
            v = target(e, *this);
            fprintf(dump_file, "\t%d -> %d\n", u, v);
        }
    }

    void print_vertexes(FILE *dump_file){
        SYNC_VERTEX_ITER idx, end;
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            if(InfoMap[*idx].type != NULL_TYPE){
                fprintf(dump_file, "Vertex(%d) ", *idx);
                print_vertex_info(dump_file, InfoMap[*idx]);
                print_out_edges(dump_file, *idx);
                print_in_edges(dump_file, *idx);
                if(isLOOP(*idx) && *idx == find(*idx)){
                    std::set<int>::iterator iter;
                    fprintf(dump_file, "BBs in the loop: \n\t");
                    std::set <int> *bb_list = get_loop_BB_list(*idx);
                    for(iter = bb_list->begin(); iter != bb_list->end(); iter++)
                        fprintf(dump_file, "%d ", *iter);
                    fprintf(dump_file, "\n");
                }
            }
        }
    }

    bool isInGraph(int node) {return (InfoMap[node].type == BB_TYPE || InfoMap[node].type == LOOP_TYPE);}
    bool isBB(int node)  {return InfoMap[node].type == BB_TYPE;}
    bool isLOOP(int node){return InfoMap[node].type == LOOP_TYPE;}
    std::set <int> *get_loop_BB_list(int BB_index){
        gcc_assert(isLOOP(BB_index));
        return &InfoMap[find(BB_index)].loop_BB_list;
    }
    std::set <int> *get_exit_BB_list() {return &InfoMap[exit].loop_BB_list;}
private:
    disjoint_set rep;
    std::vector<SYNC_VERTEX_INFO> InfoMap; 
    void delete_back_edge(){
//        gcc_assert(in_degree(entry, *this) == 1);
        gcc_assert(degree(exit, *this) == 0);
        SYNC_IEDGE_ITER in_i, in_end;
        tie(in_i, in_end) = in_edges(entry, *this);
        for (; in_i != in_end; ++in_i) {
            SYNC_VERTEX u, v;
            SYNC_EDGE e = *in_i;
            u = source(e, *this);
            v = target(e, *this);
            add_BB_to_exit(u);
            boost::add_edge(u, exit, *this);
            boost::remove_edge(e, *this);
            fprintf(stderr, "\t### add %d -> %d\n", u, exit);
            fprintf(stderr, "\t### delete %d -> %d\n", u, v);
            fprintf(stderr, "%d %d\n", exit, u);
        }            
    }
    void add_BB_to_loop(int node1, int node2){
        gcc_assert(find(node1) == find(node2));
        InfoMap[find(node1)].loop_BB_list.insert(node1);
        InfoMap[find(node1)].loop_BB_list.insert(node2);
    }
    // exit is a special node that records a set of BBs which they point to exit //
    // utilize loop_BB_list. Nontheless, exit is not LOOP_TYPE //
    void add_BB_to_exit(int node1){
        gcc_assert(exit == 1);
        InfoMap[exit].loop_BB_list.insert(node1);
    }
    void print_vertex_info(FILE *dump_file, SYNC_VERTEX_INFO info){
        if(info.type == LOOP_TYPE){
            fprintf(dump_file, "LOOP:\n");
        }else if(info.type == BB_TYPE){
            fprintf(dump_file, "BB:\n");
        }else{
            gcc_assert(0);
        }
    }

    void print_out_edges(FILE *dump_file, SYNC_VERTEX vertex){
        SYNC_OEDGE_ITER out_i, out_end;
        tie(out_i, out_end) = out_edges(vertex, *this);
        if(out_i != out_end)
            fprintf(dump_file, "\tprint out edges:\n");
        for (; out_i != out_end; ++out_i) {
            SYNC_VERTEX u, v;
            SYNC_EDGE e = *out_i;
            u = source(e, *this);
            v = target(e, *this);
            fprintf(dump_file, "\t%d -> %d\n", u, v);
        }
    }

    void print_in_edges(FILE *dump_file, SYNC_VERTEX vertex){
        SYNC_IEDGE_ITER in_i, in_end;
        tie(in_i, in_end) = in_edges(vertex, *this);
        if(in_i != in_end)
            fprintf(dump_file, "\tprint in edges:\n");
        for (; in_i != in_end; ++in_i) {
            SYNC_VERTEX u, v;
            SYNC_EDGE e = *in_i;
            u = source(e, *this);
            v = target(e, *this);
            fprintf(dump_file, "\t%d -> %d\n", u, v);
        }            
    }

    void obselet_print_adj(FILE *dump_file){
        SYNC_VERTEX_ITER idx, end;
        for (boost::tie(idx, end) = vertices(*this); idx != end; ++idx) {
            fprintf(dump_file, "Vertex: %d\n", *idx);

            SYNC_ADJ_ITER ai, a_end;
            boost::tie(ai, a_end) = adjacent_vertices(*idx, *this);
            if (ai == a_end)
                fprintf(dump_file, "\thas no children\n");

            for (; ai != a_end; ++ai) {
                fprintf(dump_file, "\t -> %d\n", *ai);
                if (boost::next(ai) != a_end)
                    fprintf(dump_file, ",");
            }
            fprintf(dump_file, "\n");
        }
    }

};
MY_SYNC_GRAPH sync_graph;

typedef std::pair < synchronization *, GSI_SET > POST_SET;

class POST_SETS : public std::tr1::unordered_map < synchronization *, GSI_SET >
{
public:
    bool 
    insert_post(synchronization *syn, gimple_stmt_iterator gsi){
        std::tr1::unordered_map < synchronization *, GSI_SET >::iterator mapIter;
        std::pair<GSI_SET::iterator,bool> ins_ret;
        GSI_SET gsi_set, *pre_gsi_set;
        mapIter = this->find(syn);
        if(mapIter != this->end()){
            pre_gsi_set = &mapIter->second;
            ins_ret = pre_gsi_set->insert(gsi);
        }else{
            ins_ret = gsi_set.insert(gsi);
            this->insert(POST_SET (syn, gsi_set));
        }
        return ins_ret.second;
    }

    void 
    print_sets(){
        std::tr1::unordered_map < synchronization *, GSI_SET >::iterator mapIter;
        for(mapIter = this->begin(); mapIter != this->end(); mapIter++){
            synchronization *syn = mapIter->first;
            GSI_SET gsi_set = mapIter->second;
            GSI_SET::iterator gsiIt;
            for(gsiIt = gsi_set.begin(); gsiIt != gsi_set.end(); gsiIt++){
                gimple_stmt_iterator gsi = *gsiIt;
                fprintf(stderr, "\tsyn.id = %d, <bb %d>\t", syn->id, gsi_bb(gsi)->index);
                if (!gsi_end_p (gsi))
                    print_gimple_stmt(stderr, gsi_stmt(gsi), 0, false);
            }
        }
    }

    bool 
    insert_callee(POST_SETS *CalleePostSets, gimple call_site){
        if(this == CalleePostSets)
            return false;

        std::tr1::unordered_map < synchronization *, GSI_SET >::iterator calleeIter;
        bool ret = false;
        for(calleeIter = CalleePostSets->begin(); calleeIter !=  CalleePostSets->end(); calleeIter++){
            synchronization *syn = calleeIter->first;
            gimple_stmt_iterator gsi = gsi_for_stmt(call_site);
            if(this->insert_post(syn, gsi)){
                ret = true;
            }
        }
        return ret;
    }
};

/*
void POST_SETS::
adjust_gsi(gimple_stmt_iterator &gsi){

  gimple stmt_src = syn->post;
  basic_block bb = gimple_bb(stmt_src) ;
  tree decl = bb_func( bb );
  cgraph_node_ptr node = cgraph_get_node(decl);
  gcc_assert (node_level_map[node] != 0);

  while (node_level_map[node] > 1)
  {
    cgraph_edge_p edge = node->callers;
    gcc_assert (!edge->next_caller);
    node = edge->caller;
    stmt_src = edge->call_stmt;
  }

  gcc_assert (node_level_map[node] == 1);
  bb = gimple_bb(stmt_src) ;  
  int level = bb->loop_depth;

  if (node == root)
    level--;
    
  if ( level > 0 )
  {
    loop_p loop = bb->loop_father;
    while (level > 1)
    {
      loop = loop_outer (loop);
      level--;
    }

    VEC (edge, heap) * exits = get_loop_exit_edges (loop);
    edge e;
    for (int i = 0; VEC_iterate (edge, exits, i, e); i++)
    {
      basic_block bb = e->dest;
      insert_post(syn, node, gsi_after_labels(bb));
    }  
    VEC_free (edge, heap, exits);
  }
  else
  {
    gimple_stmt_iterator gsi;
    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
      if (gsi_stmt (gsi) == stmt_src)
          break;    
    insert_post(syn, node, gsi);
    }
    }*/


typedef std::tr1::unordered_map < cgraph_node_ptr, POST_SETS > NODE_POST_HASH;
static NODE_POST_HASH NodePostMap;

static void
print_post_sets(){
    struct cgraph_node *node;
    GSI_SET gsi_set;
    NODE_POST_HASH::iterator NodePostIt;
    GSI_SET::iterator gsiIt;
    POST_SETS post_sets;
    gimple_stmt_iterator gsi;
  
    fprintf(stderr, "dump post sets : \n");
    for(node = cgraph_nodes; node; node = node->next){
        switch_to_context(node->decl);
        NodePostIt = NodePostMap.find(node);
        if(NodePostIt != NodePostMap.end()){
            fprintf(stderr, "node (%s): \n", cgraph_node_name(node), node);
            post_sets = NodePostIt->second;
            post_sets.print_sets();
        }
        switch_off_context();
    }
}

typedef std::pair < int, gimple_stmt_iterator > GSI_POINT;

class FarthestNode{
public:
    bool operator()(GSI_POINT& p1, GSI_POINT& p2){
        if(p1.first < p2.first) return true;
        else return false;
    }
};

static inline gimple_stmt_iterator
simple_percolate_in_BB(gimple src, int ProcessBB){
    gcc_assert(sync_graph.isBB(ProcessBB));
    gimple_stmt_iterator gsi = gsi_last_bb(BASIC_BLOCK(ProcessBB));
    for (; !gsi_end_p (gsi); gsi_prev (&gsi)){
            // adjst the post after the source of the post or a function
        if (gsi_stmt(gsi) == src)
            break;
        if (gimple_code(gsi_stmt(gsi)) == GIMPLE_CALL){
            break;
        }
    }
    if(gsi_end_p(gsi))
        gsi = gsi_start_bb(BASIC_BLOCK(ProcessBB));
    return gsi;
}

static inline void
transfer_function_for_stmt(std::set <synchronization *> &IN, std::set <synchronization *> &OUT, gimple stmt){
    std::set <synchronization *>::iterator iter;

    OUT = IN;
    for(iter = IN.begin(); iter != IN.end(); iter++){
        synchronization *syn = *iter;
        if(syn->post == stmt)
            OUT.erase(syn);
        else if(gimple_code(stmt) == GIMPLE_CALL){
            basic_block bb = gimple_bb(stmt) ;
            tree decl = bb_func( bb );
            cgraph_node_ptr node = cgraph_get_node(decl);
            NODE_POST_HASH::iterator NodePostIter;
            NodePostIter = NodePostMap.find(node);
            gcc_assert(NodePostIter != NodePostMap.end());
            POST_SETS post_sets = NodePostIter->second;
            POST_SETS::iterator sync_iter;
            for(sync_iter = post_sets.begin(); sync_iter != post_sets.end(); sync_iter++){
                if(syn == sync_iter->first){
                    OUT.erase(syn);
                }
            }
        }
    }
}
static inline void
transfer_function_for_BB(std::set <synchronization *> &IN, std::set <synchronization *> &OUT, int ProcessNode){
    gimple_stmt_iterator gsi = gsi_last_bb(BASIC_BLOCK(ProcessNode));
    for (; !gsi_end_p (gsi); gsi_prev (&gsi)){
        transfer_function_for_stmt(IN, OUT, gsi_stmt(gsi));
        IN = OUT;
    }
}
    

static inline void
transfer_function_for_node(std::set <synchronization *> IN, std::set <synchronization *> &OUT, int ProcessNode){
    if(IN.empty())
        return;
    if(sync_graph.isBB(ProcessNode)){
        transfer_function_for_BB(IN, OUT, ProcessNode);
    }else if(sync_graph.isLOOP(ProcessNode)){
        std::set<int>::iterator iter;
        fprintf(dump_file, "BBs in the loop: \n\t");
        std::set <int> *bb_list = sync_graph.get_loop_BB_list(sync_graph.find(ProcessNode));
        for(iter = bb_list->begin(); iter != bb_list->end(); iter++)
            fprintf(stderr, "%d ", *iter);
        fprintf(stderr, "\n");
        
        for(iter = bb_list->begin(); iter != bb_list->end(); iter++){
            fprintf(stderr, "Process BB <%d> ", *iter);
            transfer_function_for_BB(IN, OUT, *iter);            
        }
    }else{
        gcc_assert(0); // impossible
    }
}

// A &= B
static void
intersect(std::set <synchronization *> &syn1, const std::set <synchronization *> &syn2){
    std::set<synchronization *>::iterator it1 = syn1.begin();
    std::set<synchronization *>::iterator it2 = syn2.begin();
    while ( (it1 != syn1.end()) && (it2 != syn2.end()) ) {
        if (*it1 < *it2) {
            syn1.erase(it1++);
        }else if (*it2 < *it1) {
            ++it2;
        }else{
            // *it1 == *it2
            ++it1;
            ++it2;
        }
    }
    syn1.erase(it1, syn1.end());
}

// A -= B
static void
deletion(std::set <synchronization *> &syn1, const std::set <synchronization *> &syn2){
    std::set<synchronization *>::iterator it1 = syn1.begin();
    std::set<synchronization *>::iterator it2 = syn2.begin();
    while ( (it1 != syn1.end()) && (it2 != syn2.end()) ) {
        if (*it1 < *it2) {
            ++it1;
        }else if (*it2 < *it1) {
            ++it2;
        }else{
            // *it1 == *it2
            syn1.erase(it1++);
            ++it2;
        }
    }
}

static void
adjust_post_set_in_Loop(int ProcessBB, std::set <synchronization *> &syn_set, POST_SETS *post_sets){
    gcc_assert(sync_graph.isLOOP(ProcessBB));
    // insert post to out edges if the node isLOOP //
    std::set<int> *bb_list = sync_graph.get_loop_BB_list(ProcessBB);

    for(std::set<synchronization *>::iterator synIter = syn_set.begin(); synIter != syn_set.end(); synIter++){
        GSI_SET *gsi_set = &post_sets->find(*synIter)->second;
        for(std::set<int>::iterator bbIter = bb_list->begin(); bbIter != bb_list->end(); bbIter++){
            int CurrentBB = *bbIter;
            SYNC_OEDGE_ITER out_i, out_end;
            tie(out_i, out_end) = out_edges(CurrentBB, sync_graph);
            for (; out_i != out_end; ++out_i) {
                SYNC_VERTEX u, v;
                SYNC_EDGE e = *out_i;
                u = source(e, sync_graph);
                v = target(e, sync_graph);
                // Check out edges if the destination is BB type
                if(sync_graph.isBB(v)){
                    gcc_assert(!sync_graph.isExit(v));
                    gcc_assert(sync_graph.isInGraph(v));
                    // insert post
                    gimple_stmt_iterator gsi = gsi_start_bb(BASIC_BLOCK(ProcessBB));
                    gsi_set->insert(gsi);
                }
            }
        }
    }
}

static void
adjust_post_set_in_BB(int ProcessBB, std::set <synchronization *> &IN, POST_SETS *post_sets){
    gcc_assert(sync_graph.isBB(ProcessBB));
    gimple_stmt_iterator gsi = gsi_last_bb(BASIC_BLOCK(ProcessBB));
    std::set <synchronization *> OUT;
    
    for (; !gsi_end_p (gsi); gsi_prev (&gsi)){
        transfer_function_for_stmt(IN, OUT, gsi_stmt(gsi));
        if(IN != OUT){
            std::set <synchronization *> tmp = IN;
            deletion(tmp, OUT);
            for(std::set<synchronization *>::iterator synIter = tmp.begin(); synIter != tmp.end(); synIter++){
                GSI_SET *gsi_set = &post_sets->find(*synIter)->second;
                gsi_set->insert(gsi);
            }
        }
        IN = OUT;
    }
    gcc_assert(IN.empty());
}    

static void
gimple_find_edge_insert_loc (edge e, gimple_stmt_iterator *gsi)
{
    basic_block dest, src;
    gimple tmp;

    dest = e->dest;

    /* If the destination has one predecessor,
       insert there.  Except for the exit block.
    */
  restart:
    if (single_pred_p (dest)){
        gcc_assert(dest != EXIT_BLOCK_PTR);
        *gsi = gsi_start_bb (dest);
        if (gsi_end_p (*gsi))
            return;

        /* Make sure we insert after any leading labels.  */
        tmp = gsi_stmt (*gsi);
        while (gimple_code (tmp) == GIMPLE_LABEL){
            gsi_next (gsi);
            if (gsi_end_p (*gsi))
                break;
            tmp = gsi_stmt (*gsi);
        }
        
        if (gsi_end_p (*gsi)){
            *gsi = gsi_last_bb (dest);
            return;
        }else{
            return;
        }
    }

  /* If the source has one successor, the edge is not abnormal and
     the last statement does not end a basic block, insert there.
     Except for the entry block.  */
    src = e->src;
    if (single_succ_p (src)){
        gcc_assert((e->flags & EDGE_ABNORMAL) == 0);
        gcc_assert(src != ENTRY_BLOCK_PTR);
        
        *gsi = gsi_last_bb (src);
        if (gsi_end_p (*gsi))
            return;

        tmp = gsi_stmt (*gsi);
        if (!stmt_ends_bb_p (tmp))
            return;

        switch (gimple_code (tmp)){
            case GIMPLE_RETURN:
            case GIMPLE_RESX:
                return gcc_assert(0);
            default:
                break;
        }
    }

  /* Otherwise, create a new basic block, and split this edge.  */

    dest = split_edge (e);
    e = single_pred_edge (dest);
    goto restart;
}

static void
adjust_post_sets(std::vector< std::set <synchronization *> > &IN, std::vector< std::set <synchronization *> > &OUT, POST_SETS *post_sets){
    int i;
    std::set <synchronization *> tmp;
    for(i = 0; i < IN.size(); i++){
        if(IN[i].empty())
            continue;
        if(OUT[i] != IN[i]){
            tmp = IN[i];
            deletion(tmp, OUT[i]);
            if(sync_graph.isLOOP(i))
                adjust_post_set_in_Loop(i, tmp, post_sets);
            else if(sync_graph.isBB(i))
                adjust_post_set_in_BB(i, tmp, post_sets);
            else
                gcc_assert(0);
        }

        basic_block src, dest = BASIC_BLOCK(i);
        struct edge_def *e;
        struct edge_iter ei;
        if(dest->preds != NULL){
            FOR_EACH_EDGE (e, ei, dest->preds){
                src = e->src;    
                if(OUT[dest->index] != IN[src->index]){
                    tmp = OUT[dest->index];
                    deletion(tmp, IN[src->index]);
                    for(std::set<synchronization *>::iterator synIter = tmp.begin(); synIter != tmp.end(); synIter++){
                            //here!!
                        GSI_SET *gsi_set = &post_sets->find(*synIter)->second;
                        gimple_stmt_iterator gsi;
                        
//                        tree start_label = create_artificial_label (UNKNOWN_LOCATION);
                            gimple stmt = gimple_build_nop();
//                        gimple stmt = gimple_build_label(start_label);
                        basic_block BB;
                        
                        gimple_find_edge_insert_loc (e, &gsi);
                        
//                        if(BB != NULL){
                        if(gsi_end_p(gsi)){
                            gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
/*                            gimple_seq seq = gimple_seq_alloc ();
                            gimple call = gimple_build_nop();
                            gimple_seq_add_stmt (&seq, call);
                            gsi_insert_seq_on_edge (e, seq);
                            gsi_commit_edge_inserts ();*/
//                            gsi_insert_on_edge (e, gimple_build_nop ());
//                            gsi_commit_edge_inserts ();
//                            ipa_update_bb_info();
                            update_cgraph();
                                //gsi = gsi_start(seq);

                            
                            basic_block bb = gimple_bb(gsi_stmt(gsi)) ;
                            fprintf(stderr, "bb %d\n", bb->index);

                            tree decl = bb_func( bb );
                            fprintf(stderr, "check %p\n", decl);
                            
                            cgraph_node_ptr node = cgraph_get_node(decl);                            
                        }
                        
/*                        gimple_find_edge_insert_loc (e, &gsi);
                        if(gsi_end_p(gsi)){
                            gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
                            basic_block bb = gimple_bb(gsi_stmt(gsi)) ;
                            tree decl = bb_func( bb );
                            cgraph_node_ptr node = cgraph_get_node(decl);

                        }
*/                      
                        gsi_set->insert(gsi);

                    }   
                    
                }
            }
            
        }             
        
/*
        SYNC_IEDGE_ITER in_i, in_end;
        tie(in_i, in_end) = in_edges(i, sync_graph);
        for (; in_i != in_end; ++in_i) {
            SYNC_VERTEX u;
            SYNC_EDGE e = *in_i;
            u = source(e, sync_graph);
            fprintf(stderr, "\t@%d -> %d\n", u, i);
            if(OUT[i] != IN[u]){
                tmp = OUT[i];
                deletion(tmp, IN[u]);
                for(std::set<synchronization *>::iterator synIter = tmp.begin(); synIter != tmp.end(); synIter++){
                        //here!!
                    GSI_SET *gsi_set = &post_sets->find(*synIter)->second;
                    gsi_set->insert(gsi_start_bb(BASIC_BLOCK(i)));
                }   
            }
            }*/
    }
    fprintf(stderr, "end\n");
/*
    basic_block dest;
    struct edge_def *e;
    struct edge_iter ei;

    basic_block src = BASIC_BLOCK(i);

                
    if(src->succs != NULL){
        FOR_EACH_EDGE (e, ei, src->succs){
            dest = e->dest;
            add_edge(src->index, dest->index, *this);
            if(vertex.type == LOOP_TYPE)
                if(dest->loop_depth > loop_header_depth)
                    unite(src->index, dest->index);
        }
    }             
*/  
}

static void
percolate_in_target_loop(struct loop *loop){
    struct cgraph_node *root = cgraph_get_node (loop_func_decl(loop));
    NODE_POST_HASH::iterator NodePostIter;
    POST_SETS *post_sets;
    
    NodePostIter = NodePostMap.find(root);
    gcc_assert(NodePostIter != NodePostMap.end());
    post_sets = &NodePostIter->second;

    std::vector < std::set <synchronization *> > IN, OUT;
    IN.resize(num_vertices(sync_graph)+1);
    OUT.resize(num_vertices(sync_graph)+1);
    std::tr1::unordered_map < synchronization *, GSI_SET >::iterator PostSetsIter;
    for(PostSetsIter = post_sets->begin(); PostSetsIter != post_sets->end(); PostSetsIter++){
        synchronization *syn = PostSetsIter->first;
        GSI_SET *post_sites = &PostSetsIter->second;
        for (GSI_SET::iterator iter = post_sites->begin(); iter != post_sites->end(); ++iter){
            gimple_stmt_iterator gsi = *iter;
            int bb_index = gsi_bb(gsi)->index;
            IN[bb_index].insert(syn);
        }
        
        // clear post sets
        post_sites->clear();
    }

    fprintf(stderr, "\n\npercolate in the target loop:\n");

    std::set <int> worklist;
    
    fprintf(stderr, "print all IN\n");
    for(unsigned int i = 0 ; i < IN.size(); i++){
        if(IN[i].empty())
            continue;
        
        fprintf(stderr, "IN <%d> = ", i); 
        std::set <synchronization *> :: iterator iter;
        fprintf(stderr, "{ ");
        for(iter = IN[i].begin(); iter != IN[i].end(); iter++){
            fprintf(stderr, "%d ", (*iter)->id);
        }
        fprintf(stderr, "}\n");

        
        worklist.insert(i);
    }


    while(!worklist.empty()){
        int ProcessNode = *worklist.begin();
        worklist.erase(worklist.begin());

        transfer_function_for_node(IN[ProcessNode], OUT[ProcessNode], ProcessNode);

        fprintf(stderr, "Process bb <%d>:\n", ProcessNode);
        
        SYNC_IEDGE_ITER in_i, in_end;
        tie(in_i, in_end) = in_edges(ProcessNode, sync_graph);
        for (; in_i != in_end; ++in_i) {
            SYNC_VERTEX u;
            SYNC_EDGE e = *in_i;
            u = source(e, sync_graph);
            fprintf(stderr, "\t@%d -> %d\n", u, ProcessNode);

            std::set <synchronization *> tmpIN;
            SYNC_OEDGE_ITER out_i, out_end;
            int first = true;
            tie(out_i, out_end) = out_edges(u, sync_graph);
            for (; out_i != out_end; ++out_i) {
                SYNC_VERTEX v;
                e = *out_i;
                v = target(e, sync_graph);
                fprintf(stderr, "\t!%d -> %d\n", u, v);
                if(first == true){
                    first = false;
                    tmpIN = OUT[v];
                }else{
                    //tmpIN &= OUT[v];
                    intersect(tmpIN, OUT[v]);
                }
            }

            if(tmpIN != IN[u]){
                IN[u] = tmpIN;
                worklist.insert(u);
            }

            std::set <int>::iterator Iter;
            fprintf(stderr, "worklist = { ");
            for(Iter = worklist.begin();Iter != worklist.end(); Iter++)
                fprintf(stderr, "%d ", *Iter);
            fprintf(stderr, "}\n");
        }
    }

    fprintf(stderr, "print all IN\n");
    for(unsigned int i = 0 ; i < IN.size(); i++){
        if(IN[i].empty())
            continue;
        
        fprintf(stderr, "IN <%d> = ", i); 
        std::set <synchronization *> :: iterator iter;
        fprintf(stderr, "{ ");
        for(iter = IN[i].begin(); iter != IN[i].end(); iter++){
            fprintf(stderr, "%d ", (*iter)->id);
        }
        fprintf(stderr, "}\n");
    }

    fprintf(stderr, "print all OUT\n");
    for(unsigned int i = 0 ; i < OUT.size(); i++){
        if(OUT[i].empty())
            continue;
        
        fprintf(stderr, "OUT <%d> = ", i); 
        std::set <synchronization *> :: iterator iter;
        fprintf(stderr, "{ ");
        for(iter = OUT[i].begin(); iter != OUT[i].end(); iter++){
            fprintf(stderr, "%d ", (*iter)->id);
        }
        fprintf(stderr, "}\n");
    }

    // insert correct posts
    adjust_post_sets(IN, OUT, post_sets);
    
}

static void 
fix_post_sets(struct loop *loop){
    struct cgraph_node *root = cgraph_get_node (loop_func_decl(loop));
    NODE_POST_HASH::iterator NodePostIt;
    POST_SETS *post_sets;
    NodePostIt = NodePostMap.find(root);
    gcc_assert(NodePostIt != NodePostMap.end());
    post_sets = &NodePostIt->second;
    
    std::tr1::unordered_map < synchronization *, GSI_SET >::iterator mapIter;
    for(mapIter = post_sets->begin(); mapIter != post_sets->end(); mapIter++){
        std::priority_queue <GSI_POINT, vector<GSI_POINT>, FarthestNode> queue;
        synchronization *syn = mapIter->first;
        gimple src = syn->post;
        std::set <int> bb_set;
        GSI_SET *gsi_set = &mapIter->second;
        GSI_SET::iterator gsiIt;
        // eliminate gsi that does not contain in the loop body //
        for(gsiIt = gsi_set->begin(); gsiIt != gsi_set->end(); gsiIt++){
            gimple_stmt_iterator gsi = *gsiIt;
            if(!sync_graph.isInGraph(gsi_bb(gsi)->index)){
                fprintf(stderr, "\tDelete::: syn.id = %d, <bb %d>\t", syn->id, gsi_bb(gsi)->index);
                print_gimple_stmt(stderr, gsi_stmt(gsi), 0, false);
                gsi_set->erase(gsiIt);
            }else{
                bb_set.insert(gsi_bb(gsi)->index);
            }
        }

        while(!(bb_set.size() == 1 && sync_graph.dom_exit_p(*bb_set.begin())) ){
            std::set <int>:: iterator bbIter, maxIter;
            int max_depth = 0;
            for(bbIter = bb_set.begin(); bbIter != bb_set.end(); bbIter++){
                int bbNum = *bbIter;
                int bbDepth = sync_graph.pdom_depth[bbNum];
                if(bbDepth > max_depth){
                    maxIter = bbIter;
                    max_depth = bbDepth;
                }
            }
            int ProcessBB = *maxIter;
            bb_set.erase(maxIter);
            bb_set.insert(sync_graph.ipdom(ProcessBB));
        }

        int ProcessBB = *bb_set.begin();
        bb_set.clear();
        gsi_set->clear();

        std::set<int>::iterator bbIter;
        std::set <int> *bb_list;
        if(sync_graph.isBB(ProcessBB)){
            if(sync_graph.isExit(ProcessBB)){
                bb_list = sync_graph.get_exit_BB_list();
                for(bbIter = bb_list->begin(); bbIter != bb_list->end(); bbIter ++){
                    gimple_stmt_iterator gsi = simple_percolate_in_BB(src, *bbIter);
                    gsi_set->insert(gsi);                    
                }
            }else{
                // percolate in bb
                gimple_stmt_iterator gsi = simple_percolate_in_BB(src, ProcessBB);
                gsi_set->insert(gsi);
            }
        }else if(sync_graph.isLOOP(ProcessBB)){
            // insert post to out edges if the node isLOOP //
            bb_list = sync_graph.get_loop_BB_list(ProcessBB);
            for(bbIter = bb_list->begin(); bbIter != bb_list->end(); bbIter++){
                int CurrentBB = *bbIter;
                SYNC_OEDGE_ITER out_i, out_end;
                tie(out_i, out_end) = out_edges(CurrentBB, sync_graph);
                for (; out_i != out_end; ++out_i) {
                    SYNC_VERTEX u, v;
                    SYNC_EDGE e = *out_i;
                    u = source(e, sync_graph);
                    v = target(e, sync_graph);
                    // Check out edges if the destination is BB type
                    if(sync_graph.isBB(v)){
                        gcc_assert(!sync_graph.isExit(v));
                        gcc_assert(sync_graph.isInGraph(v));
                        // insert post
                        // percolate in bb
                        gimple_stmt_iterator gsi = simple_percolate_in_BB(src, v);
                        gsi_set->insert(gsi);
                    }
                }
            }
        }else{
            gcc_assert(0); // impossible
        }
    }
}

static inline void
gather_post(synchronization *syn)
{
    gimple stmt_src = syn->post;
    gimple_stmt_iterator gsi = gsi_for_stmt(stmt_src);
    basic_block bb = gimple_bb(stmt_src) ;
    tree decl = bb_func( bb );
    cgraph_node_ptr node = cgraph_get_node(decl);
      

    GSI_SET gsi_set, *gsi_set_ptr;
    POST_SETS post_sets, *post_sets_ptr;
    NODE_POST_HASH::iterator NodePostIt;
    
    NodePostIt = NodePostMap.find(node);
    if(NodePostIt != NodePostMap.end()){
        post_sets_ptr = &NodePostIt->second;
        post_sets_ptr->insert_post(syn, gsi);
    }else{
        post_sets.insert_post(syn, gsi);
        NodePostMap.insert(pair<cgraph_node_ptr, POST_SETS>(node, post_sets));
    }
}

/*
static void
insert_wait_sets_for_post(){
    // insert a wait primitive right before each post //
    insert_wait_sets_for_post();

    for (Synchronization_set::iterator iter = synchronization_set.begin();  
         iter != synchronization_set.end(); ++iter ){
        synchronization *syn = (synchronization *)&(*iter);
        GSI_SET *wait_set = &syn->waits;
        GSI_SET::const_iterator postIter;
        for(postIter = syn->posts.
    }

    }*/

static void
process_post_sets(struct loop *loop){
    struct cgraph_node *root = cgraph_get_node (loop_func_decl(loop));
    std::set < struct cgraph_node * > worklist;
    std::set < struct cgraph_node * >::iterator witer;
    struct cgraph_node *node;
    struct cgraph_edge *edge;

    for(node = cgraph_nodes; node; node = node->next)
        worklist.insert(node);

    fprintf(stderr, "\n\nbefore process:\n");
    print_post_sets();

    fprintf(stderr, "\n\nprocessing post sets:\n");
    // Compute the post set for each function node //
    while(worklist.size() != 0){
        struct cgraph_node *callee;
        NODE_POST_HASH::iterator NodePostIter;
        witer = worklist.begin();
        callee = *witer;
        worklist.erase(witer);
//        fprintf(stderr, "node: %s\n", cgraph_node_name(callee));

        NodePostIter = NodePostMap.find(callee);
        if(NodePostIter == NodePostMap.end())
            continue;

        POST_SETS *CalleePostSets = &NodePostIter->second;
       
        for(edge = callee->callers; edge; edge = edge->next_caller){
            struct cgraph_node *caller;
            gimple call_site;
            call_site = edge->call_stmt;
            caller = edge->caller;
            
            NodePostIter = NodePostMap.find(caller);
            POST_SETS *CallerPostSets, post_sets;
            if(NodePostIter != NodePostMap.end())
                CallerPostSets = &NodePostIter->second;
            else{
                CallerPostSets = &post_sets;
            }

            if(CallerPostSets->insert_callee(CalleePostSets, call_site)){
                worklist.insert(caller);
                fprintf(stderr, "add callee's posts (%s) to caller (%s)\nCaller's post set:\n", cgraph_node_name(callee), cgraph_node_name(caller)); 
                CallerPostSets->print_sets();
                NodePostMap.insert(pair<cgraph_node_ptr, POST_SETS>(caller, *CallerPostSets));
                fprintf(stderr, "\n\n");
            }
        }
    }


    switch_to_context(root->decl);

    // Build DAG upon CFG: every node may be an outermost loop or a basic block //
    sync_graph.map_GCC_BBs_into_my_graph(loop);
    fprintf(stderr, "Before simplification:\n");
    sync_graph.print_vertexes(stderr);
    
    sync_graph.simplify_to_DAG(loop);
    sync_graph.print_vertexes(stderr);
    sync_graph.print_edges(stderr);


    // Fix post sets upon the DAG //
    fprintf(stderr, "Before fixing:\n");
    print_post_sets();
    
    if(PERCOLATE_LEVEL == 0){
        fix_post_sets(loop);
    }else if(PERCOLATE_LEVEL == 1){
        fix_post_sets(loop);
        percolate_in_target_loop(loop);
    }
    
    fprintf(stderr, "After fixing:\n");
    print_post_sets();
    
    switch_off_context();    
}

static void
place_post_without_percolate(tree post_fn, cgraph_node_ptr root){
    NODE_POST_HASH::iterator NodePostIter;
    POST_SETS post_sets;
    
    NodePostIter = NodePostMap.find(root);
    gcc_assert(NodePostIter != NodePostMap.end());
    post_sets = NodePostIter->second;

    std::tr1::unordered_map < synchronization *, GSI_SET >::iterator PostSetsIter;

    switch_to_context(root->decl);
    for(PostSetsIter = post_sets.begin(); PostSetsIter != post_sets.end(); PostSetsIter++){
        synchronization *syn = PostSetsIter->first;
        GSI_SET post_sites = PostSetsIter->second;
        
        tree tid = func_thread_id (root);  
        tree iteration_id = build4 (ARRAY_REF, unsigned_type_node, iter_id_var, tid, NULL_TREE, NULL_TREE);    

        // insert after each gsi  
        for (GSI_SET::iterator iter = post_sites.begin(); iter != post_sites.end(); ++iter)
        {
            gimple_stmt_iterator gsi = *iter;    
            tree t_edge_index = build_int_cst_type (integer_type_node, syn->id);
            t_edge_index = force_gimple_operand_gsi(&gsi, t_edge_index, true, NULL, true, GSI_SAME_STMT);
            tree t_iteration_id = force_gimple_operand_gsi(&gsi, iteration_id, true, NULL, true, GSI_SAME_STMT);
            tree t_distance = build_int_cst_type (integer_type_node, syn->distance);
            gimple post_call = gimple_build_call (post_fn, 3, t_edge_index, t_iteration_id, t_distance, NULL_TREE);      


            gimple_set_block (post_call, gimple_block(gsi_stmt(gsi)));

            gimple_stmt_iterator gsi_tmp;
            bool delFlag = false;
            
            if(gimple_code(gsi_stmt(gsi)) == GIMPLE_NOP){
                gsi_tmp = gsi;
                delFlag = true;
            }
            


            gsi_insert_after (&gsi, post_call, GSI_SAME_STMT);
            ipa_add_abnormal_goto_call_edges (gsi);

            if(delFlag == true)
                gsi_remove(&gsi, true);            
            // insert a wait primitive right before each post primitive (update wait sets)//
            syn->waits.insert(post_call);
        }        
    }
    switch_off_context();
    dump_all_nodes("myNodes2");
}

/* 
   Find proper syc-placed position for the post. E.g.
   for (...)
   {
     for (...)
       source
     post source here
   }

   To sum up, the position for placing post/wait is the point of the main loop level to enter
   the construct the source/sink is nested in.
*/

static void
place_wait(const synchronization &syn, tree wait_fn, IPA_NODE_INT_MAP &node_level_map, cgraph_node_ptr root)
{

  for (GIMPLE_SET::const_iterator witer = syn.waits.begin(); witer != syn.waits.end(); ++witer)
  {
  
    gimple stmt_sink = *witer;
    basic_block bb = gimple_bb(stmt_sink) ;
    tree decl = bb_func( bb );
    cgraph_node_ptr node = cgraph_get_node(decl);
    gcc_assert (node_level_map[node] != 0);

    while (node_level_map[node] > 1)
    {
      cgraph_edge_p edge = node->callers;
//      gcc_assert (!edge->next_caller);
      node = edge->caller;
      stmt_sink = edge->call_stmt;
    }

    gcc_assert (node_level_map[node] == 1);
    bb = gimple_bb(stmt_sink) ;  
    int level = bb->loop_depth;
    GSI_SET  post_sites;
      
    if ( level > 0 && node != root)
    {
      loop_p loop = bb->loop_father;
      while (level > 1)
      {
        loop = loop_outer (loop);
        level--;
      } 
      post_sites.insert(gsi_last_bb(loop->header));      
    }
    else
    {
      gimple_stmt_iterator gsi;
      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
        if (gsi_stmt (gsi) == stmt_sink)
            break;    
      post_sites.insert(gsi);
    }

    gcc_assert (!post_sites.empty());
      
    switch_to_context(node->decl);
    tree tid = func_thread_id (node);  
    tree iteration_id = build4 (ARRAY_REF, unsigned_type_node, iter_id_var, tid, NULL_TREE, NULL_TREE);    

    // insert after each gsi  
    for (GSI_SET::iterator iter = post_sites.begin(); iter != post_sites.end(); ++iter)
    {
      gimple_stmt_iterator gsi = *iter;    
      tree t_edge_index = build_int_cst_type (integer_type_node, syn.id);
      t_edge_index = force_gimple_operand_gsi(&gsi, t_edge_index, true, NULL, true, GSI_SAME_STMT);
      tree t_iteration_id = force_gimple_operand_gsi(&gsi, iteration_id, true, NULL, true, GSI_SAME_STMT);
      tree t_distance = build_int_cst_type (integer_type_node, syn.distance);
      gimple post_call = gimple_build_call (wait_fn, 3, t_edge_index, t_iteration_id, t_distance, NULL_TREE);      
      gimple_set_block (post_call, gimple_block(gsi_stmt(gsi)));
      gsi_insert_before (&gsi, post_call, GSI_SAME_STMT);
      ipa_add_abnormal_goto_call_edges (gsi);
    }
    switch_off_context();
  }
}





static tree
which_address_space(gimple stmt, tree ref, int opnd, tree tid)
{
  tree main_thread_id = integer_zero_node;  // main space
  ipa_data_reference * dr = tree_data_reference (stmt, opnd);
  if (!dr )
    return main_thread_id;

  if (dpc.dr_class_map.find(dr->uid()) == dpc.dr_class_map.end())
    return main_thread_id;

  dep_class_rep *rep = dpc.dr_class_map[dr->uid()];

  // sychronization
  if ( rep->has_loop_carried_raw )
    return main_thread_id;
    
  if ( rep->has_loop_carried_waw && rep->live_after_loop )
    return main_thread_id;

  if ( rep->has_loop_carried_war && rep->live_after_loop )
    return main_thread_id;

  // privatizable
  if ( rep->has_loop_carried_waw || rep->has_loop_carried_war )
    return tid;  

  // no dependence at all
  return main_thread_id;
}





static bool
upp_privatizable(ipa_data_reference *dr, loop_p loop)
{
  if (DR_IS_READ(dr))
    return true;
  
  if (dr->is_virtual())
    return false;
    
  if ( live_after_loop(dr, loop) )
    return false;

  if (dpc.dr_class_map.find(dr->uid()) == dpc.dr_class_map.end())
    return true;
  
  dep_class_rep *rep = dpc.dr_class_map[dr->uid()];

  // sychronization
  if ( rep->has_loop_carried_raw )
    return false;
    
  if ( rep->has_loop_carried_waw && rep->live_after_loop )
    return false;

  if ( rep->has_loop_carried_war && rep->live_after_loop )
    return false;

  // privatizable
  if ( rep->has_loop_carried_waw || rep->has_loop_carried_war )
    return true;  

  // no dependence at all
  return false;
}


/* Place a sychronization (post-wait) on each loop-carried dependence */
static void 
Place_sychronization(const data_dependence_set *dependence_set, loop_p loop)
{
  if (!dependence_set)
    return;

  dump_all_nodes("myNodes");
  FILE *fp = fopen("syn.info", "w");

  Synchronization_set synchronization_set;

  /* Collect essential sychronizations according to data dependences */
  for ( data_dependence_set::iterator iter = dependence_set->begin(); iter != dependence_set->end(); ++iter)
  {
    const ipa_data_dependency &dep = *iter;

    // ignore loop-independent dependence
    if (dep.distance() == 0)
      continue;    
    
    ipa_data_reference *dra = ipa_get_data_ref(dep.source());
    ipa_data_reference *drb = ipa_get_data_ref(dep.sink());

    if ( upp_privatizable(dra, loop) && upp_privatizable(drb, loop) )
      continue;
    
    GIMPLE_SET *waits;
    synchronization syn (DR_STMT(dra), dep.distance());
    Synchronization_set::iterator siter = synchronization_set.find (syn);
    if ( siter == synchronization_set.end() )
    {
      syn.id = ++syn_count;
      std::pair<Synchronization_set::iterator,bool> res = synchronization_set.insert(syn);
      waits = const_cast<GIMPLE_SET *> (&res.first->waits);      
      fprintf(fp, "%d POST %d   ", syn.id, dep.source());
      dep.print (fp, 4);
    }
    else
      waits = const_cast<GIMPLE_SET *> (&siter->waits);

    waits->insert (DR_STMT(drb));
  }

  fclose(fp);
  
  
  /* Collect essential sychronizations according to control dependences */

  /* More cloning on top-level function calls */
  IPA_NODE_INT_MAP node_level_map;
  mark_min_loop_level (loop, node_level_map);


  /* Initialize sychronization actions */  
  /*   __thread_post(int i, int tid) */ 
  tree thread_syn_fn_type = build_function_type_list (void_type_node, 
                                                      integer_type_node,	/* int lock_id */     
                                                      integer_type_node,	/* int cur_iter */         
                                                      integer_type_node,  /* int distance */         
                                                      NULL_TREE); 
  tree post_fn = build_fn_decl ("_post", thread_syn_fn_type);
  tree wait_fn = build_fn_decl ("_wait", thread_syn_fn_type);



  for (Synchronization_set::iterator iter = synchronization_set.begin();  
       iter != synchronization_set.end(); ++iter )
  {
      synchronization *syn = (synchronization *)&(*iter);
      gather_post(syn);
  }

  process_post_sets(loop);
  

  place_post_without_percolate(post_fn, cgraph_get_node (loop_func_decl(loop)));
  
  for (Synchronization_set::iterator iter = synchronization_set.begin();  
       iter != synchronization_set.end(); ++iter )
  {
    synchronization syn = *iter;
    
    gcc_assert (!syn.waits.empty());
        //    place_post (syn, post_fn, node_level_map, cgraph_get_node (loop_func_decl(loop)));
    place_wait (syn, wait_fn, node_level_map, cgraph_get_node (loop_func_decl(loop)));            
  }
  
  dump_all_nodes("myNodes3");
       

  /* Insert initialization and finalization */
  for (cgraph_node_ptr node = cgraph_nodes; node; node = node->next)
  {
    if (!valid_function_node_p (node))
     continue;

    /* initialize syn_list*/
    if ( is_main_procedure(node) )
    {
      tree idx_type = build_index_type (build_int_cst (integer_type_node, syn_count));  
      tree type = build_array_type (integer_type_node, idx_type); 
      syn_list = ipa_add_new_external_global (type, get_identifier ("syn_list"));
      DECL_INITIAL (syn_list) = build_initializer_for_syn_list (type, synchronization_set);
      insert_global_to_varpool (syn_list);
    }
    
    switch_to_context (node->decl);
    profile_begin (node, insert_begin_sync);     
    profile_exit (node, insert_exit_sync);     
    switch_off_context ();
  }

 
}


typedef std::map< int, std::set<vertex_descriptor> > COMPONENT_MAP;

#if 0
/* Partition the dependence graph by RAW dependence */
void Partition_dependence_graph(DEPENDENCE_GRAPH &pdg, vector<int> &components, COMPONENT_MAP &partitions)
{


  

  /* Traverse through each component */
  
  int num = strong_components(pdg, &components[0]);

  COMPONENT_MAP  component_map;
  
  /* 1. partitioning */
  for (int i = 0; i != component.size(); ++i)
  {
    component_map[component[i]].insert(i);
  }    

  /* 2. complementing */
  for(COMPONENT_MAP::iterator iter = component_map.begin(); iter != component_map.end(); ++iter )
  {
    std::set<vertex_descriptor> &scc = iter->second;
    /* check if there is any loop-carried edge in the component */
    bool carried = false;
    for(std::set<vertex_descriptor>::iterator iter2 = scc.begin(); iter2 != scc.end(); ++iter2 )
    {
      vertex_descriptor vertex = *iter2;
      std::pair<out_edge_iterator, out_edge_iterator>  edges = out_edges(vertex, pdg);
      for( out_edge_iterator edge_iter = edges.first; edge_iter != edges.second; ++edge_iter )
      {
        if ( get(edge_type(), pdg, *edge_iter) & id_loop_carried )
        {
          vertex_descriptor dst = target( *edge_iter, pdg );
          if (component[dst] == iter->first)
          {
            carried = true;
            break;
          }
        }
      }

      if (carried)
        break;
    }   

    if ( !carried )
      continue;

    /* complement */
    for(std::set<vertex_descriptor>::iterator iter2 = scc.begin(); iter2 != scc.end(); ++iter2 )
    {
      vertex_descriptor src = *iter2;
      // privatizable load cannot have loop-carried RAW dependence
      if ( get(vertex_type(), pdg, src) & mk_privatizable )
        continue;

      for(std::set<vertex_descriptor>::iterator iter3 = scc.begin(); iter3 != scc.end(); ++iter3 )
      {
        vertex_descriptor dst = *iter3;
        pdg_add_edge(src, dst, id_loop_carried, pdg);                
      }
    }   
    
  }


  
}


/* Mark basic block if the basic block must be executed */
void mark_bb(cgraph_node_ptr node)
{

  switch_to_context( node->decl );
  basic_block bb;
  set < cgraph_node_ptr > start_nodes;

  FOR_EACH_BB(bb)
  {    
    if (bb->loop_depth > 0 )
      bb_reset_must_execute(bb);   

    else if ( dominated_by_p (CDI_DOMINATORS, EXIT_BLOCK_PTR, bb) )
      bb_set_must_execute(bb);
    else
      bb_reset_must_execute(bb);

    // propagate to callees;      
    for (gimple_stmt_iterator bsi = gsi_last_bb (bb); !gsi_end_p (bsi); gsi_prev (&bsi))
    {
      gimple stmt = gsi_stmt (bsi);
    
      if (is_gimple_call(stmt))
      {
        tree callee_decl = gimple_call_fndecl(stmt);         
        if (callee_decl)
        {
           cgraph_node_ptr callee = cgraph_get_node(callee_decl);
           int caller_num = 0;
           for (struct cgraph_edge e = callee->callers; e; e = e->next_caller)
            caller_num++;
          
           if (caller_num > 1)
             func_reset_must_execute(callee);                        
           
           else if ( bb_must_execute(bb) )
           {
             func_set_must_execute(callee);
             start_nodes.insert(callee);
           }
           else
             func_reset_must_execute(callee);            
        }
      }
    }    
  }

  switch_off_context();

  /* Mark basic blocks of each callee */
  for (set<cgraph_node_ptr>::iterator iter = start_nodes.begin(); iter != start_nodes.end(); ++iter)
    mark_bb(*iter);

}

/* Mark basic block if the basic block must be executed exactly once */
void mark_bb(loop_p loop)
{

  set < cgraph_node_ptr > start_nodes;
  
  switch_to_context( loop_func_decl(loop) );

  basic_block latch = loop->latch;
  basic_block *loop_blocks = get_loop_body(loop); 
  for (int i = 0; i < loop->num_nodes; i++)
  {    
    basic_block bb  = loop_blocks[i];

    if (bb->loop_depth > loop_depth (loop) )
      bb_reset_must_execute(bb);   

    else if ( dominated_by_p (CDI_DOMINATORS, latch, bb) )
      bb_set_must_execute(bb);
    else
      bb_reset_must_execute(bb);

    // propagate to callees;      
    for (gimple_stmt_iterator bsi = gsi_last_bb (bb); !gsi_end_p (bsi); gsi_prev (&bsi))
    {
      gimple stmt = gsi_stmt (bsi);
    
      if (is_gimple_call(stmt))
      {
        tree callee_decl = gimple_call_fndecl(stmt);         
        if (callee_decl)
        {
           cgraph_node_ptr callee = cgraph_get_node(callee_decl);
           int caller_num = 0;
           for (cgraph_edge_p e = callee->callers; e; e = e->next_caller)
            caller_num++;
          
           if (caller_num > 1)
             func_reset_must_execute(callee);                        
           
           else if ( bb_must_execute(bb) )
           {
             func_set_must_execute(callee);
             start_nodes.insert(callee);
           }
           else
             func_reset_must_execute(callee);            
        }
      }
    }    
  }

  switch_off_context();

  /* Mark basic blocks of each callee */
  for (set<cgraph_node_ptr>::iterator iter = start_nodes.begin(); iter != start_nodes.end(); ++iter)
    mark_bb(*iter);

  
}
#endif

#endif


static GTY ((if_marked ("thread_type_hash_marked_p"), param_is (struct type_hash)))
     htab_t thread_type_hash;

static TREE_MAP  type_threadspace_map;
static TREE_SET  new_type_set;

static unsigned int thread_type_count = 0;

static TREE_MAP pointer_span_map;



static int
thread_type_hash_marked_p (const void *p)
{
	return 1;
}

// Find corresponding promoted type
static tree find_threadspace_type(tree type)
{
  TREE_MAP::iterator iter = type_threadspace_map.find(type);
  if ( iter != type_threadspace_map.end() )
    return iter->second;
  return NULL_TREE;
}


void
hash_add_threadspace_type(tree type, tree threadspace_type)
{
  type_threadspace_map[type] = threadspace_type;
  new_type_set.insert(threadspace_type);

  struct type_hash *h;
  void **loc;

	++thread_type_count;
  h = ggc_alloc_type_hash ();
  h->hash = thread_type_count;
  h->type = threadspace_type;
  loc = htab_find_slot_with_hash (thread_type_hash, h, thread_type_count, INSERT);
  *loc = (void *)h;
}

int is_threadspace_type(tree t)
{
	return new_type_set.find(t) != new_type_set.end();
}

static bool is_a_promoted_pointer(tree ref)
{
 
  if (TREE_CODE (ref) == COMPONENT_REF)
  {
    tree field = TREE_OPERAND (ref, 1);
    std::string name = POINTER_DATA_FILED_NAME;
    return name == IDENTIFIER_POINTER (DECL_NAME (field));
    
  }
  return false;  
}


#define CLONE 1

#if CLONE


void
collect_callees(cgraph_node_ptr node, IPA_NODE_SET &callee_set)
{

  /* Direct call */
  for (cgraph_edge_p edge = node->callees; edge; edge = edge->next_callee)
  {
    struct cgraph_node *callee = edge->callee;
    if ( valid_function_node_p(callee) )
		{
      if (callee_set.find(callee) == callee_set.end())
      {
        callee_set.insert(callee);
        collect_callees (callee, callee_set);
      } 
		}
  }

  /* Indirect call */
	for (cgraph_edge_p edge = node->indirect_calls; edge; edge = edge->next_callee)
	{	
		struct nodeList *callees ;
		for (callees = edge->indirect_info->callees; callees; callees = callees->next)
		{
			struct cgraph_node *callee = callees->node;			
      if (callee_set.find(callee) == callee_set.end())
      {
        callee_set.insert(callee);
        collect_callees (callee, callee_set);
      } 			
		}		
	}
 
  
}


void
collect_loop_callees(struct loop *loop, IPA_NODE_SET &callee_set)
{

  cgraph_node_ptr node = cgraph_get_node (loop_func_decl(loop)); 
  switch_to_context (node->decl);
  basic_block *loop_blocks = get_loop_body(loop);  

  for (unsigned int i = 0; i < loop->num_nodes; i++)
  {
    basic_block bb = loop_blocks[i];

    gimple_stmt_iterator bsi;
    for (bsi = gsi_last_bb (bb); !gsi_end_p (bsi); gsi_prev (&bsi))
    {
      gimple stmt = gsi_stmt (bsi);

      if (!is_gimple_call(stmt))
        continue;

      tree callee_decl = gimple_call_fndecl(stmt);         

      if (callee_decl)
      {
         cgraph_node_ptr callee = cgraph_get_node(callee_decl);
         if ( valid_function_node_p(callee) )
           if (callee_set.find(callee) == callee_set.end())
           {
             callee_set.insert(callee);
             collect_callees (callee, callee_set);
           } 
      }
      else
      {
        /* indirect call */           
        struct cgraph_edge *e = cgraph_edge (node, stmt);        
        struct nodeList * callees = e->indirect_info->callees;
        while (callees)
        {
          if ( valid_function_node_p(callees->node) )
            if (callee_set.find(callees->node) == callee_set.end())
            {
              callee_set.insert (callees->node);
              collect_callees (callees->node, callee_set);
            } 
          callees = callees->next;
        }
      }
       
    }

  }

  free (loop_blocks);  
  switch_off_context ();
}


tree
clone_function_decl (tree fn, int id)
{
  tree parms;
  tree clone;

  /* Copy the function.  */
  clone = copy_node (fn);

  char buf[100];
  char *tname;
  tree name;

  snprintf (buf, 100, "%s.$loopfn", current_function_name ());
  ASM_FORMAT_PRIVATE_NAME (tname, buf, id);
  clean_symbol_name (tname);
  name = get_identifier (tname);
  
  /* Reset the function name.  */
  DECL_NAME (clone) = name;
  SET_DECL_ASSEMBLER_NAME (clone, NULL_TREE);
  /* Remember where this function came from.  */
  DECL_ABSTRACT_ORIGIN (clone) = fn;
  /* Make it easy to find the CLONE given the FN.  */
  DECL_CHAIN (clone) = DECL_CHAIN (fn);
  DECL_CHAIN (fn) = clone;
//  DECL_CLONED_FUNCTION (clone) = fn;
  /* There's no pending inline data for this function.  */
//  DECL_PENDING_INLINE_INFO (clone) = NULL;
//  DECL_PENDING_INLINE_P (clone) = 0;

  /* Copy the function parameters.  */
  DECL_ARGUMENTS (clone) = copy_list (DECL_ARGUMENTS (clone));
    

  for (parms = DECL_ARGUMENTS (clone); parms; parms = DECL_CHAIN (parms))
  {
    DECL_CONTEXT (parms) = clone;
  }

  /* Create the RTL for this function.  */
//  SET_DECL_RTL (clone, NULL);
//  rest_of_decl_compilation (clone, /*top_level=*/1, at_eof);


  return clone;
}




void
gimple_duplicate_extra_info(struct function *nfun, gimple nstmt, struct function *ofun, gimple ostmt)
{
  //gcc_assert (!gimple_extra_info(nstmt));

  statement_extra_info *oinfo = gimple_extra_info (ostmt);
  if (!oinfo)
    return;

  statement_extra_info *ninfo = new statement_extra_info ();
  gimple_set_extra_info (nstmt, ninfo);

  if (dr_hash)
  {
    switch_to_context (nfun->decl);
    ipa_collect_data_references_in_stmt (nstmt, false, NULL);
    switch_off_context ();
  }
  gcc_assert (oinfo->dr_vec().size() == ninfo->dr_vec().size());
  for (int i=0;  i < oinfo->dr_vec().size(); i++)
  {
    ipa_data_reference *odr = oinfo->dr_vec()[i];
    if (!odr)
      continue;
    ipa_data_reference *ndr = ninfo->dr_vec()[i];
    ndr->_flags = odr->_flags;
    ndr->set_class_id (odr->class_id());
    clone_dr_map[odr->uid()] = ndr->uid();
  }

}



/* Copy basic block, scale profile accordingly.  Edges will be taken care of
   later  */

static basic_block
clone_bb (copy_body_data *id, basic_block bb)
{
  gimple_stmt_iterator gsi, copy_gsi, seq_gsi;
  basic_block copy_basic_block;
  tree decl;
  gcov_type freq;
  basic_block prev;

  /* Search for previous copied basic block.  */
  prev = bb->prev_bb;
  while (!prev->aux)
    prev = prev->prev_bb;

  /* create_basic_block() will append every new block to
     basic_block_info automatically.  */
  copy_basic_block = create_basic_block (NULL, (void *) 0, (basic_block) prev->aux);
  copy_basic_block->count = bb->count;

  /* We are going to rebuild frequencies from scratch.  These values
     have just small importance to drive canonicalize_loop_headers.  */
  freq = (gcov_type)bb->frequency ;

  /* We recompute frequencies after inlining, so this is quite safe.  */
  if (freq > BB_FREQ_MAX)
    freq = BB_FREQ_MAX;
  copy_basic_block->frequency = freq;

  copy_gsi = gsi_start_bb (copy_basic_block);

  for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  {
    gimple stmt = gsi_stmt (gsi);
    gimple orig_stmt = stmt;

    id->regimplify = false;
    stmt = remap_gimple_stmt (stmt, id);

    gimple_duplicate_stmt_histograms (cfun, stmt, id->src_cfun, orig_stmt);			
		gimple_duplicate_extra_info(cfun, stmt, id->src_cfun, orig_stmt);

    gsi_insert_after (&copy_gsi, stmt, GSI_NEW_STMT);

    if (gimple_in_ssa_p (cfun) && !is_gimple_debug (stmt))
    {
      ssa_op_iter i;
      tree def;
  
      find_new_referenced_vars (stmt);
      FOR_EACH_SSA_TREE_OPERAND (def, stmt, i, SSA_OP_DEF)
        if (TREE_CODE (def) == SSA_NAME)
          SSA_NAME_DEF_STMT (def) = stmt;
    }

    /* We're duplicating a CALL_EXPR.  Find any corresponding
       callgraph edges and update or duplicate them.  */
    if (is_gimple_call (stmt))
    {
      struct cgraph_edge *edge = cgraph_edge (id->dst_node, orig_stmt);
      if (edge)
        cgraph_set_call_stmt (edge, stmt);      
    }
    
  }

  return copy_basic_block;
}

extern "C" 
struct pointer_map_t *duplicate_eh_regions (struct function *ifun, eh_region copy_region, int outer_lp,
		      duplicate_eh_regions_map map, void *map_data);


/* Make a copy of the body of FN so that it can be inserted inline in
   another function.  Walks FN via CFG, returns new fndecl.  */

static tree
clone_cfg_body (copy_body_data * id, basic_block entry_block_map, basic_block exit_block_map)
{

  tree callee_fndecl = id->src_fn;
  /* Original cfun for the callee, doesn't change.  */
  struct function *src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl);
  struct function *cfun_to_copy;
  basic_block bb;
  tree new_fndecl = NULL;
  bool need_debug_cleanup = false;
  int last;
  int incoming_frequency = 0;


  /* Register specific tree functions.  */
  gimple_register_cfg_hooks ();


  /* Must have a CFG here at this point.  */
  gcc_assert (ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (callee_fndecl)));

  cfun_to_copy = id->src_cfun = DECL_STRUCT_FUNCTION (callee_fndecl);

  ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun_to_copy)->aux = entry_block_map;
  EXIT_BLOCK_PTR_FOR_FUNCTION (cfun_to_copy)->aux = exit_block_map;
  entry_block_map->aux = ENTRY_BLOCK_PTR_FOR_FUNCTION (cfun_to_copy);
  exit_block_map->aux = EXIT_BLOCK_PTR_FOR_FUNCTION (cfun_to_copy);

  /* Duplicate any exception-handling regions.  */
  if (cfun->eh)
    id->eh_map = duplicate_eh_regions (cfun_to_copy, NULL, id->eh_lp_nr, remap_decl_1, id);

  /* Use aux pointers to map the original blocks to copy.  */
  FOR_EACH_BB_FN (bb, cfun_to_copy)
  {
  	basic_block new_bb = clone_bb (id, bb);
  	bb->aux = new_bb;
  	new_bb->aux = bb;
  }

  last = last_basic_block;

  /* Now that we've duplicated the blocks, duplicate their edges.  */
  FOR_ALL_BB_FN (bb, cfun_to_copy)
      need_debug_cleanup |= copy_edges_for_bb (bb, REG_BR_PROB_BASE, exit_block_map);


  if (gimple_in_ssa_p (cfun))
    FOR_ALL_BB_FN (bb, cfun_to_copy)
    	copy_phis_for_bb (bb, id);

  FOR_ALL_BB_FN (bb, cfun_to_copy)
    if (bb->aux)
    {
    	if (need_debug_cleanup
    	    && bb->index != ENTRY_BLOCK
    	    && bb->index != EXIT_BLOCK)
    	  maybe_move_debug_stmts_to_successors (id, (basic_block) bb->aux);
    	((basic_block)bb->aux)->aux = NULL;
    	bb->aux = NULL;
    }

  /* Zero out AUX fields of newly created block during EH edge
     insertion. */
  for (; last < last_basic_block; last++)
  {
    if (need_debug_cleanup)
    	maybe_move_debug_stmts_to_successors (id, BASIC_BLOCK (last));
    BASIC_BLOCK (last)->aux = NULL;
  }
  entry_block_map->aux = NULL;
  exit_block_map->aux = NULL;

  if (id->eh_map)
  {
    pointer_map_destroy (id->eh_map);
    id->eh_map = NULL;
  }

  return new_fndecl;
}



/* Create a copy of a function's tree.
   OLD_DECL and NEW_DECL are FUNCTION_DECL tree nodes
   of the original function and the new copied function
   respectively.  If UPDATE_CLONES is set, the call_stmt fields
   of edges of clones of the function will be updated.  

   If non_NULL NEW_ENTRY determine new entry BB of the clone.
*/
void
tree_function_cloning (tree old_decl, tree new_decl, bool update_clones)
{
  struct cgraph_node *old_version_node;
  struct cgraph_node *new_version_node;
  copy_body_data id;
  tree p;
  unsigned i;
  struct ipa_replace_map *replace_info;
  basic_block old_entry_block, bb;
  VEC (gimple, heap) *init_stmts = VEC_alloc (gimple, heap, 10);

  tree old_current_function_decl = current_function_decl;
  tree vars = NULL_TREE;

  gcc_assert (TREE_CODE (old_decl) == FUNCTION_DECL
	      && TREE_CODE (new_decl) == FUNCTION_DECL);
  DECL_POSSIBLY_INLINED (old_decl) = 1;

  old_version_node = cgraph_node (old_decl);
  new_version_node = cgraph_node (new_decl);

  DECL_ARTIFICIAL (new_decl) = 1;
  DECL_ABSTRACT_ORIGIN (new_decl) = DECL_ORIGIN (old_decl);
  DECL_FUNCTION_PERSONALITY (new_decl) = DECL_FUNCTION_PERSONALITY (old_decl);

  /* Prepare the data structures for the tree copy.  */
  memset (&id, 0, sizeof (id));

  /* Generate a new name for the new version. */
  id.statements_to_fold = pointer_set_create ();

  id.decl_map = pointer_map_create ();
  id.debug_map = NULL;
  id.src_fn = old_decl;
  id.dst_fn = new_decl;
  id.src_node = old_version_node;
  id.dst_node = new_version_node;
  id.src_cfun = DECL_STRUCT_FUNCTION (old_decl);
  if (id.src_node->ipa_transforms_to_apply)
  {
    VEC(ipa_opt_pass,heap) * old_transforms_to_apply = id.dst_node->ipa_transforms_to_apply;
    unsigned int i;

    id.dst_node->ipa_transforms_to_apply = VEC_copy (ipa_opt_pass, heap,
				               id.src_node->ipa_transforms_to_apply);
    for (i = 0; i < VEC_length (ipa_opt_pass, old_transforms_to_apply); i++)
      VEC_safe_push (ipa_opt_pass, heap, id.dst_node->ipa_transforms_to_apply,
	       VEC_index (ipa_opt_pass, old_transforms_to_apply, i));
  }

  id.copy_decl = copy_decl_no_change;
  id.transform_call_graph_edges = update_clones ? CB_CGE_MOVE_CLONES : CB_CGE_MOVE;
  id.transform_new_cfg = true;
  id.transform_return_to_modify = false;
  id.transform_lang_insert_block = NULL;

  current_function_decl = new_decl;
  old_entry_block = ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (old_decl));
  initialize_cfun (new_decl, old_decl, old_entry_block->count);
  DECL_STRUCT_FUNCTION (new_decl)->gimple_df->ipa_pta = id.src_cfun->gimple_df->ipa_pta;
  push_cfun (DECL_STRUCT_FUNCTION (new_decl));

  /* Copy the function's static chain.  */
  p = DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl;
  if (p)
    DECL_STRUCT_FUNCTION (new_decl)->static_chain_decl =
      copy_static_chain (DECL_STRUCT_FUNCTION (old_decl)->static_chain_decl, &id);

  
  /* Copy the function's arguments.  */
  if (DECL_ARGUMENTS (old_decl) != NULL_TREE)
    DECL_ARGUMENTS (new_decl) = copy_arguments_for_versioning (DECL_ARGUMENTS (old_decl), &id, NULL, &vars);

  DECL_INITIAL (new_decl) = remap_blocks (DECL_INITIAL (id.src_fn), &id);
  BLOCK_SUPERCONTEXT (DECL_INITIAL (new_decl)) = new_decl;

  declare_inline_vars (DECL_INITIAL (new_decl), vars);

  if (!VEC_empty (tree, DECL_STRUCT_FUNCTION (old_decl)->local_decls))
    /* Add local vars.  */
    add_local_variables (DECL_STRUCT_FUNCTION (old_decl), cfun, &id, false);

  if (DECL_RESULT (old_decl) != NULL_TREE)
  {
    tree old_name;
    DECL_RESULT (new_decl) = remap_decl (DECL_RESULT (old_decl), &id);
    lang_hooks.dup_lang_specific_decl (DECL_RESULT (new_decl));
    if (gimple_in_ssa_p (id.src_cfun)
        && DECL_BY_REFERENCE (DECL_RESULT (old_decl))
        && (old_name = gimple_default_def (id.src_cfun, DECL_RESULT (old_decl))))
    {
      tree new_name = make_ssa_name (DECL_RESULT (new_decl), NULL);
      insert_decl_map (&id, old_name, new_name);
      SSA_NAME_DEF_STMT (new_name) = gimple_build_nop ();
      set_default_def (DECL_RESULT (new_decl), new_name);
    }
  }

  /* Copy the Function's body.  */

  clone_cfg_body (&id, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR);
  copy_debug_stmts (&id);


  /* Renumber the lexical scoping (non-code) blocks consecutively.  */
  number_blocks (new_decl);

  /* We want to create the BB unconditionally, so that the addition of
     debug stmts doesn't affect BB count, which may in the end cause
     codegen differences.  */
  bb = split_edge (single_succ_edge (ENTRY_BLOCK_PTR));
  while (VEC_length (gimple, init_stmts))
    insert_init_stmt (&id, bb, VEC_pop (gimple, init_stmts));
  update_clone_info (&id);

  /* Remap the nonlocal_goto_save_area, if any.  */
  if (cfun->nonlocal_goto_save_area)
  {
    struct walk_stmt_info wi;

    memset (&wi, 0, sizeof (wi));
    wi.info = &id;
    walk_tree (&cfun->nonlocal_goto_save_area, remap_gimple_op_r, &wi, NULL);
  }

  /* Clean up.  */
  pointer_map_destroy (id.decl_map);
  if (id.debug_map)
    pointer_map_destroy (id.debug_map);

  fold_marked_statements (0, id.statements_to_fold);
  pointer_set_destroy (id.statements_to_fold);
  fold_cond_expr_cond ();
  delete_unreachable_blocks_update_callgraph (&id);
  if (id.dst_node->analyzed)
    cgraph_rebuild_references ();
  update_ssa (TODO_update_ssa);

  gcc_assert (!id.debug_stmts);
  VEC_free (gimple, heap, init_stmts);
  pop_cfun ();
  current_function_decl = old_current_function_decl;
  gcc_assert (!current_function_decl || DECL_STRUCT_FUNCTION (current_function_decl) == cfun);
  return;
}



/* Given virtual clone, turn it into actual clone.  */
void
cgraph_clone_body (struct cgraph_node *node)
{
  bitmap_obstack_initialize (NULL);
  node->former_clone_of = node->clone_of->decl;
  if (node->clone_of->former_clone_of)
    node->former_clone_of = node->clone_of->former_clone_of;

  /* Copy the OLD_VERSION_NODE function tree to the new version.  */
  tree_function_cloning (node->clone_of->decl, node->decl, true);

  /* Function is no longer clone.  */
  if (node->next_sibling_clone)
    node->next_sibling_clone->prev_sibling_clone = node->prev_sibling_clone;
  if (node->prev_sibling_clone)
    node->prev_sibling_clone->next_sibling_clone = node->next_sibling_clone;
  else
    node->clone_of->clones = node->next_sibling_clone;
  node->next_sibling_clone = NULL;
  node->prev_sibling_clone = NULL;
  if (!node->clone_of->analyzed && !node->clone_of->clones)
  {
    cgraph_release_function_body (node->clone_of);
    cgraph_node_remove_callees (node->clone_of);
    ipa_remove_all_references (&node->clone_of->ref_list);
  }
  node->clone_of = NULL;
  bitmap_obstack_release (NULL);
}


static void 
register_thread_id_var(cgraph_node_ptr node)
{
  unsigned ix;
  tree var;
  struct function *dsf = DECL_STRUCT_FUNCTION (node->decl);
  
  FOR_EACH_LOCAL_DECL (dsf, ix, var)
  {
    if ( TREE_CODE(var) != VAR_DECL )
      continue;

    if ( !DECL_NAME(var) )
      continue;
    
    if (strcmp(THREAD_ID_NAME, IDENTIFIER_POINTER(DECL_NAME (var))) == 0 )     
      func_set_thread_id (node, var);
    
  }

  gcc_assert (func_thread_id(node) );
}


/* Copy and remove original dependencies */
static void
redirect_data_dependencies(loop_p loop, UINT32_MAP &clone_dr_map)
{

  /* Redirect for dependence table */
  dependence_set = load_dependence (loop);

  if (liveness_set)
  {
    for (UINT32_MAP::iterator iter = clone_dr_map.begin(); iter != clone_dr_map.end(); ++iter)
    {      
      if (liveness_set->find(iter->first) != liveness_set->end())
        liveness_set->insert(iter->second);
    }
  }

  if (!dependence_set)
    return;


  for (UINT32_MAP::iterator iter = clone_dr_map.begin(); iter != clone_dr_map.end(); ++iter)
  {      
    ipa_data_reference *odr = ipa_get_data_ref(iter->first);
    ipa_data_reference *ndr = ipa_get_data_ref(iter->second);
  
    data_dependence_list to_remove;
  
    /* Copy loop-independent dependencies */
    for (data_dependence_set::iterator diter = odr->loop_independent_deps().begin();
         diter != odr->loop_independent_deps().end(); ++diter )
    {
      const ipa_data_dependency &odep = *diter;
      if (odep.loop() != loop_uid(loop))
        continue;  

      ipa_data_reference *odr2, *ndr2 ;
      if (odr->uid() == odep.source())
        odr2 = ipa_get_data_ref(odep.sink());
      else
        odr2 = ipa_get_data_ref(odep.source());

      if ( clone_dr_map.find(odr2->uid()) != clone_dr_map.end() )
        ndr2 = ipa_get_data_ref(clone_dr_map[odr2->uid()]);
      else
        ndr2 = odr2;
      
      ipa_data_dependency ndep = odep;

      if ( clone_dr_map.find(odep.source()) != clone_dr_map.end() )
        ndep.set_source (clone_dr_map[odep.source()]);
      else
        ndep.set_source (odep.source());
      
      if ( clone_dr_map.find(odep.sink()) != clone_dr_map.end() )
        ndep.set_sink (clone_dr_map[odep.sink()]);
      else
        ndep.set_sink (odep.sink());

      ndr->loop_independent_deps().insert(ndep);
      ndr2->loop_independent_deps().insert(ndep);
      if (odr2 != odr)
        odr2->loop_independent_deps().erase(odep);

      dependence_set->erase (odep);
      dependence_set->insert(ndep);

      to_remove.push_back(odep);
    }
  
    /* Remove dependencies related to this LOOP */
    for (data_dependence_list::iterator diter = to_remove.begin(); diter != to_remove.end(); ++diter )
      odr->loop_independent_deps().erase(*diter);
  
    /* Copy loop-carried dependencies */
    to_remove.clear();
    for (data_dependence_set::iterator diter = odr->loop_carried_deps().begin();
         diter != odr->loop_carried_deps().end(); ++diter )
    {
      const ipa_data_dependency &odep = *diter;
      if (odep.loop() != loop_uid(loop))
        continue;  

      ipa_data_reference *odr2, *ndr2 ;
      if (odr->uid() == odep.source())
        odr2 = ipa_get_data_ref(odep.sink());
      else
        odr2 = ipa_get_data_ref(odep.source());

      if ( clone_dr_map.find(odr2->uid()) != clone_dr_map.end() )
        ndr2 = ipa_get_data_ref(clone_dr_map[odr2->uid()]);
      else
        ndr2 = odr2;
      
      ipa_data_dependency ndep = odep;

      if ( clone_dr_map.find(odep.source()) != clone_dr_map.end() )
        ndep.set_source (clone_dr_map[odep.source()]);
      else
        ndep.set_source (odep.source());
      
      if ( clone_dr_map.find(odep.sink()) != clone_dr_map.end() )
        ndep.set_sink (clone_dr_map[odep.sink()]);
      else
        ndep.set_sink (odep.sink());

      ndr->loop_carried_deps().insert(ndep);
      ndr2->loop_carried_deps().insert(ndep);
      if (odr2 != odr)
        odr2->loop_carried_deps().erase(odep);

      dependence_set->erase (odep);
      dependence_set->insert(ndep);

      to_remove.push_back(odep);
    }
    
    /* Remove dependencies related to this LOOP */
    for (data_dependence_list::iterator diter = to_remove.begin(); diter != to_remove.end(); ++diter )
      odr->loop_carried_deps().erase(*diter);
  
  }


}



void
redirect_callsites(cgraph_node_ptr node, IPA_NODE_SET &visited, IPA_NODE_NODE_MAP &clone_map)
{
  if ( !valid_function_node_p (node) )
    return;

  if (visited.find(node) != visited.end())
    return;
  visited.insert(node);

  switch_to_context (node->decl);

  basic_block bb;
  FOR_EACH_BB(bb)
  {
    for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
    {
      gimple stmt = gsi_stmt (gsi);

      if (!is_gimple_call(stmt))
        continue;

      tree callee_decl = gimple_call_fndecl(stmt);         

      if (callee_decl)
      {
         cgraph_node_ptr callee = cgraph_get_node(callee_decl);
         if ( clone_map.find(callee) != clone_map.end() )
         {
           cgraph_node_ptr clone_callee = clone_map[callee];
           gimple_call_set_fndecl (stmt, clone_callee->decl);
           /* Update call edge */
           cgraph_edge_p edge = cgraph_edge (node, stmt);
           cgraph_redirect_edge_callee (edge, clone_callee);       
           redirect_callsites (clone_callee, visited, clone_map);
         } 
         else
           redirect_callsites (callee, visited, clone_map);         
      }
      else
      {
        gcc_unreachable();
        // TODO: Redirect indirect call
      }
       
    }
  }
  switch_off_context ();
}



/* Redirect callsites in each callee to cloned procedure */  
static void
redirect_callsites(loop_p loop, IPA_NODE_NODE_MAP &clone_map)
{

  IPA_NODE_SET visited;

  /* Redirect callsite of loop to cloned procedure */  
  
  cgraph_node_ptr node = cgraph_get_node (loop_func_decl(loop)); 
  switch_to_context (node->decl);
  basic_block *loop_blocks = get_loop_body(loop);  

  for (unsigned int i = 0; i < loop->num_nodes; i++)
  {
    basic_block bb = loop_blocks[i];

    gimple_stmt_iterator bsi;
    for (bsi = gsi_last_bb (bb); !gsi_end_p (bsi); gsi_prev (&bsi))
    {
      gimple stmt = gsi_stmt (bsi);

      if (!is_gimple_call(stmt))
        continue;

      tree callee_decl = gimple_call_fndecl(stmt);         

      if (callee_decl)
      {
         cgraph_node_ptr callee = cgraph_get_node(callee_decl);
         if ( clone_map.find(callee) != clone_map.end() )
         {
           cgraph_node_ptr clone_callee = clone_map[callee];
           gimple_call_set_fndecl (stmt, clone_callee->decl);
           /* Update call edge */
           cgraph_edge_p edge = cgraph_edge (node, stmt);
           cgraph_redirect_edge_callee (edge, clone_callee);             
           redirect_callsites (clone_callee, visited, clone_map);
         }            
         else
           redirect_callsites (callee, visited, clone_map);         
      }        
      else
      {
        gcc_unreachable();
        // TODO: Redirect indirect call
      }         
    }
  }


  free (loop_blocks);  
  switch_off_context ();


}

static void
loop_clone_procedures(loop_p loop, IPA_NODE_SET &clone_set)
{

  
  /* Cleanup. */
  cleanup_cgraph (); 

  clone_dr_map.clear();

  std::map<cgraph_node_ptr, cgraph_node_ptr> clone_map;

  /* Clone each procedure */
  for (IPA_NODE_SET::iterator iter = clone_set.begin(); iter != clone_set.end(); ++iter )
  {
  
    cgraph_node_ptr cur = *iter;
    switch_to_context (cur->decl);

    /* Clone call graph node */
    cgraph_node_ptr clone_node = cgraph_create_virtual_clone (cur, NULL, NULL, false, "clone");
    clone_map[cur] = clone_node;

    /* Clone function body */  
    cgraph_clone_body (clone_node);
    
    switch_off_context ();

    switch_to_context (clone_node->decl);
    /* Clone extra information */
    ipa_ssa_loop_init_node (NULL, clone_node);

    /* Register thread id var */
    register_thread_id_var (clone_node);
  
    switch_off_context ();

    /* Copy and remove original dependencies */
    redirect_data_dependencies (loop, clone_dr_map);
    
    clone_dr_map.clear();
  }

  update_cgraph();

  /* Redirect callsite in each callee to cloned procedure */  
  redirect_callsites (loop, clone_map);


  IPA_NODE_SET callee_set;  
  collect_loop_callees (loop, callee_set);
  for (IPA_NODE_SET::iterator iter = callee_set.begin(); iter != callee_set.end(); ++iter)
  {
    if (clone_set.find(*iter) != clone_set.end())
    {
      cgraph_node_ptr node = *iter;
      gcc_unreachable();
    }
  }


}

static void
print_node_set( const IPA_NODE_SET &nodes )
{
	for (IPA_NODE_SET::iterator iter = nodes.begin(); iter != nodes.end(); ++iter)
		std::cout<<cgraph_node_name(*iter)<<std::endl;
}


// Mark the mininum loop level corresponding to loop for each function called in loop
void
mark_min_loop_level (loop_p loop, IPA_NODE_INT_MAP &node_map)
{

  cgraph_node_ptr root = cgraph_get_node (loop_func_decl(loop)); 
  node_map[root] = 1; 
  

  IPA_NODE_SET callee_set;  
  collect_loop_callees (loop, callee_set);

  struct cgraph_node *node;
  struct cgraph_node **order =  XCNEWVEC (struct cgraph_node *, cgraph_n_nodes);
  int order_pos = ipa_utils_reduced_inorder (order, false, true, NULL);

  int i;
  basic_block bb;
  gimple_stmt_iterator bsi;


  /* Top-down */
  cgraph_node_ptr callee;
  for (i = order_pos -1; i >= 0; i--)
  {
    callee = order[i];

    /* Find recursive cycles */
    IPA_NODE_SET scc;
    scc.insert (callee);
    struct ipa_dfs_info *w_info = (struct ipa_dfs_info *) callee->aux;
    struct cgraph_node *next = w_info->next_cycle;
    while (next)
    {
      w_info = (struct ipa_dfs_info *) next->aux;
      if (valid_function_node_p(next))
        scc.insert (next);
      next = w_info->next_cycle;
    }

    bool changed = false;

    do
    {
      changed = false;
      for (IPA_NODE_SET::iterator iter = scc.begin(); iter != scc.end(); ++iter)
      {
        callee = *iter;
        if ( !valid_function_node_p (callee) )
          continue;

        if ( callee_set.find(callee) == callee_set.end() )
          continue;

        for (cgraph_edge_p edge = callee->callers; edge; edge = edge->next_caller)      
        {
          struct cgraph_node *caller = edge->caller;            
          int level = node_map[caller];
          if ( scc.find(caller) == scc.end() )
            gcc_assert (level);
          else if (level == 0)
            continue;         

          gimple call_stmt = edge->call_stmt;
          basic_block bb = gimple_bb (call_stmt);
          level += bb->loop_depth;

          if (callee_set.find(caller) == callee_set.end())
          {
            gcc_assert (caller == root);
            level = level - bb->loop_depth + 1;
          }

          if ( node_map[callee]==0 || node_map[callee] < level )
          {
            node_map[callee] = level; 
            changed = true;
          }
        }
      }  
    }while (changed);
  }

}


/* Selectively clone callee and relative dependences and forward call sites */
static void
selective_clone (loop_p loop)
{
  IPA_NODE_SET clone_set, callee_set;  

  collect_loop_callees (loop, callee_set);

  /* Find root callees */
  cgraph_node_ptr root = cgraph_get_node (loop_func_decl(loop));  
  int oldsize;

  do
  {  
    oldsize = clone_set.size();
    for (IPA_NODE_SET::iterator iter = callee_set.begin(); iter != callee_set.end(); ++iter )
    {
      struct cgraph_node *cur = *iter;
      for (cgraph_edge_p edge = cur->callers; edge; edge = edge->next_caller)      
      {
        struct cgraph_node *caller = edge->caller;      
        
        /* If callee is called from outside the loop, mark as cloneable */
        if (callee_set.find(caller) == callee_set.end()) 
        {
          if ( caller != root )
            clone_set.insert(cur);
          else {
            gimple call_stmt = edge->call_stmt;
            basic_block bb = gimple_bb (call_stmt);
            if ( !flow_bb_inside_loop_p (loop, bb) )
              clone_set.insert(cur);
          }        
        }      
      }

      /* Pass the mark of cloneable */
      if ( clone_set.find(cur) == clone_set.end() )
        continue;
      
      /* Direct call */
      for (cgraph_edge_p edge = cur->callees; edge; edge = edge->next_callee)
      {
        struct cgraph_node *callee = edge->callee;
        if ( callee->clone_of )
          callee = callee->clone_of;
        if ( valid_function_node_p(callee) )
          if (callee_set.find(callee) != callee_set.end())
            clone_set.insert(callee);
      }
      
      /* Indirect call */
      for (cgraph_edge_p edge = cur->indirect_calls; edge; edge = edge->next_callee)
      { 
        struct nodeList *callees ;
        for (callees = edge->indirect_info->callees; callees; callees = callees->next)
        {
          struct cgraph_node *callee = callees->node; 
          if ( callee->clone_of )
            callee = callee->clone_of;
          if ( valid_function_node_p(callee) )
            if (callee_set.find(callee) != callee_set.end())
              clone_set.insert(callee);
        }   
      }

    }

  }  while ( clone_set.size() != oldsize );


  /* Perform cloning */
  loop_clone_procedures (loop, clone_set);
#if 0
  /* More cloning on top-level function calls */
  IPA_NODE_INT_MAP node_level_map;
  mark_min_loop_level (loop, node_level_map);

  for (IPA_NODE_INT_MAP::iterator iter = node_level_map.begin(); iter != node_level_map; ++iter)
  {
    cgraph_node_ptr cur = iter->first;
    int level = iter->second;
    if (level > 1)
      continue;

    switch_to_context (cur->decl);

    int caller_count=1;
    cgraph_edge_p edge = node->callers;
    for (edge = edge->next_caller; edge; edge = edge->next_caller, ++caller_count)      
    {
      clone_dr_map.clear();
      char suffix[100];
      sprintf(suffix, "clone.%d", caller_count);
    
      /* Clone call graph node */
      cgraph_node_ptr clone_node = cgraph_create_virtual_clone (cur, NULL, NULL, false, suffix);

      /* Clone function body */  
      cgraph_clone_body (clone_node);      

      switch_to_context (clone_node->decl);
      ipa_ssa_loop_init_node (NULL, clone_node);
      /* Register thread id var */
      register_thread_id_var (clone_node);
      switch_off_context ();    

      /* Copy and remove original dependencies */
      redirect_data_dependencies (loop, clone_dr_map);

      /* Update call edge */
      gimple_call_set_fndecl (edge->call_stmt, clone_node->decl);
      cgraph_redirect_edge_callee (edge, clone_node);    
      
    } 

    switch_off_context ();
  }
#endif
}




#endif 



#define ARRAY_PROMOTE 1

#if ARRAY_PROMOTE




/*
  Given a type TYPE, promote it to an array of this type :
      TYPE data[N];
  N is variable with the value of thread num.
*/
static tree
threadspace_promote_type_to_array(tree scalar_type)
{
  tree size = build_int_cst_type (long_unsigned_type_node, MAX_THREAD_NUM);
	tree t = build_index_type ( size );

  tree newtype = build_array_type( scalar_type, t );      
	hash_add_threadspace_type (scalar_type, newtype);

  return newtype;
}




static tree threadspace_promote_type(tree type)
{

	tree ret = find_threadspace_type(type) ;
	if (ret)
		return ret;
  
  switch ( TREE_CODE(type) )
  {
    case VOID_TYPE:
      return type;
  
    case INTEGER_TYPE:
    case REAL_TYPE:
    case POINTER_TYPE:
    case ARRAY_TYPE :
    case RECORD_TYPE :
    case UNION_TYPE :
    case ENUMERAL_TYPE :
    {
      type = threadspace_promote_type_to_array(type);       
      return type;
    }


    default:      
      gcc_assert(false);
      return type;

  }

}




/* Promote each varaible, static or heap-allocated to be an array */
static void 
do_thread_space_promote_declaration()
{
  if ( !thread_type_hash )
    thread_type_hash = htab_create_ggc (100, type_hash_hash, type_hash_eq, 0);


  /* Promote for global variables */
  struct varpool_node *var;
  for (var = varpool_nodes; var; var = var->next)
  {
    tree decl = var->decl;
    if (in_system_header_at(DECL_SOURCE_LOCATION(decl)) )
      continue;
    
    if (decl == thread_num_var || decl == syn_list )
      continue;
    
    tree type = threadspace_promote_type (TREE_TYPE(decl));
    TREE_TYPE(decl) = type;           
    DECL_SIZE(decl) = TYPE_SIZE(type);
    DECL_SIZE_UNIT(decl) = TYPE_SIZE_UNIT(type);
    DECL_ALIGN(decl)  = TYPE_ALIGN(type);
  }


  /* Promote for heap variables is finished during promoting reference */  
  
  
}





static tree
threadspace_promote_ref(gimple_stmt_iterator * gsi, tree ref, int opnd, tree tid)
{

  gimple stmt = gsi_stmt (*gsi);

  switch( TREE_CODE(ref) )
  {
    case SSA_NAME :
      return ref;
      
    case VAR_DECL :     
    {
      tree type = TREE_TYPE(ref);
      if ( !is_threadspace_type(type) )
        return  ref;     

      tree idx = which_address_space(stmt, ref, opnd, tid) ;
      tree newref = build4 (ARRAY_REF, TREE_TYPE(type), ref, idx, NULL_TREE, NULL_TREE);
      newref = force_gimple_operand_gsi (gsi, newref, false, NULL, true, GSI_SAME_STMT);  
      return  newref;     
    }    
  
    case COMPONENT_REF:
    {
      tree base = TREE_OPERAND(ref,0);
      base = threadspace_promote_ref(gsi, base, opnd, tid);
      TREE_OPERAND(ref,0) = base;
      ref = force_gimple_operand_gsi (gsi, ref, false, NULL, true, GSI_SAME_STMT);  
      return  ref;     
    }    
      
    case ARRAY_REF:
    {
      tree base = TREE_OPERAND(ref,0);
      base = threadspace_promote_ref(gsi, base, opnd, tid);
      TREE_OPERAND(ref,0) = base;      
      ref = force_gimple_operand_gsi (gsi, ref, false, NULL, true, GSI_SAME_STMT);  
      return  ref;     
    }
    
    case MEM_REF :
    {
      tree base = TREE_OPERAND(ref,0);
      tree offset = TREE_OPERAND(ref,1);
      
      if ( TREE_CODE(base) == ADDR_EXPR )
      {
        // The form is *(&"hello")
        base = TREE_OPERAND (base, 0);        
        if ( TREE_CODE(base) == STRING_CST )  // "a string"
          return ref;

        // The form is *(&p)
        gcc_assert (TREE_CODE(base) == VAR_DECL);
        tree type = TREE_TYPE (base);
        if ( !is_threadspace_type(type) )
          return  ref;     

        tree idx = which_address_space (stmt, ref, opnd, tid) ;
        idx = build1 (CONVERT_EXPR, long_unsigned_type_node, idx);      
        tree newref = build4 (ARRAY_REF, TREE_TYPE(type), base, idx, NULL_TREE, NULL_TREE);
        newref = force_gimple_operand_gsi (gsi, newref, false, NULL, true, GSI_SAME_STMT);  
        return  newref;          
      }
      
      gcc_assert (TREE_CODE(base)==SSA_NAME);


      // get base name
      char buf[100];  buf[0]='\0';
      if ( DECL_NAME(SSA_NAME_VAR(base)) )
        sprintf(buf, "%s_new", IDENTIFIER_POINTER (DECL_NAME (SSA_NAME_VAR(base))) );          
      else
        sprintf(buf, "D.%u_new", DECL_UID (SSA_NAME_VAR(base)) );          

    
      // get span field     
      tree span;
      if (is_a_promoted_pointer (base) )
      {
        tree basebase = TREE_OPERAND (base, 0);
        basebase = unshare_expr (basebase);
        tree field = TYPE_FIELDS (TREE_TYPE(basebase));
        field = DECL_CHAIN (field);
        span = build3 (COMPONENT_REF, TREE_TYPE(field), basebase, field,  NULL );   
      }
      else if (TREE_CODE(TREE_TYPE(base)) == POINTER_TYPE)
      {
        span = pointer_span_map[SSA_NAME_VAR(base)];
        if (!span)
          return ref;
      }
      

      base = threadspace_promote_ref(gsi, base, opnd, tid);
      
      tree name = NULL_TREE; // get_identifier (buf);     
      tree type = TREE_TYPE(base);  
      tree newbase = build_decl (UNKNOWN_LOCATION, VAR_DECL, name, type);
      DECL_ARTIFICIAL (newbase) = 1;
      gimple_add_tmp_var (newbase);
      add_referenced_var (newbase);

      // newbase = newbase + span * idx
      tree idx = which_address_space(stmt, ref, opnd, tid) ;      
      idx = build1 (CONVERT_EXPR, long_unsigned_type_node, idx);      
      span = gimplify_build2 (gsi, MULT_EXPR, size_type_node, span, idx);

      
      tree rhs = gimplify_build2 (gsi, POINTER_PLUS_EXPR, type, base, span);
      rhs = force_gimple_operand_gsi (gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
      gimple assign = gimple_build_assign (newbase, rhs);
      gsi_insert_before (gsi, assign, GSI_SAME_STMT);        
      mark_symbols_for_renaming (assign);

      TREE_OPERAND(ref,0) = newbase;
      ref = force_gimple_operand_gsi (gsi, ref, false, NULL, true, GSI_SAME_STMT);  
      return  ref;     
    }


    case ADDR_EXPR :
    {
      tree base = TREE_OPERAND(ref,0);
      base = threadspace_promote_ref(gsi, base, opnd, tid);
      TREE_OPERAND(ref,0) = base;
      ref = force_gimple_operand_gsi (gsi, ref, true, NULL, true, GSI_SAME_STMT);  
      return  ref;     
    }
      
    case PARM_DECL :
    case FUNCTION_DECL :
    case INTEGER_CST :
    case REAL_CST :
    case STRING_CST :
    case RESULT_DECL :
      return  ref;     
      

    default:
      gcc_assert(false);
      break;
  }

}


static void 
threadspace_promote_reference(gimple_stmt_iterator * gsi, tree ref, int opnd, tree tid)
{
  if (!ref)
    return ;
 
  gimple stmt = gsi_stmt (*gsi);

  switch( TREE_CODE(ref) )
  {     
    case ADDR_EXPR :
    case VAR_DECL :     
    case COMPONENT_REF:
    case ARRAY_REF:
    case MEM_REF :
    {
      tree newref = threadspace_promote_ref(gsi, ref, opnd, tid);
      newref = force_gimple_operand_gsi (gsi, newref, false, NULL, true, GSI_SAME_STMT);  
      gimple_set_op( stmt, opnd, newref ); 
      mark_symbols_for_renaming (stmt);
      break;
    }    
  


    case SSA_NAME :
    case SIZEOF_EXPR:
    case PARM_DECL :
    case INTEGER_CST :
    case REAL_CST :
    case STRING_CST :
    case CONSTRUCTOR :
      break;
      

    default:
      gcc_assert(false);
      break;
  }


}



static void 
do_thread_space_promote_bb(basic_block bb)
{

  if ( bb_promted(bb) )
    return;
  else
    bb_set_promted (bb);

  tree tid = func_thread_id (cgraph_get_node(current_function_decl));  

  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
  {
    tree lval = NULL;
    tree rval1 = NULL;
    tree rval2 = NULL;
    gimple stmt = gsi_stmt (gsi);

    switch (gimple_code (stmt))
    {
      case GIMPLE_COND:
      {
        lval = gimple_cond_lhs (stmt);
        rval1 = gimple_cond_rhs (stmt);
        threadspace_promote_reference (&gsi, lval, 0, tid);
        threadspace_promote_reference (&gsi, rval1, 1, tid);     
        break;
      }

      case GIMPLE_SWITCH:
      {
        lval = gimple_switch_index (stmt);
        threadspace_promote_reference (&gsi, lval, 0, tid);
        break;
      }

      case GIMPLE_ASSIGN:
      {
        lval = gimple_assign_lhs (stmt);
        if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
        {
          rval1 = gimple_assign_rhs1 (stmt);
          rval2 = gimple_assign_rhs2 (stmt);
        }
        else
        {
          rval1 = gimple_assign_rhs1 (stmt);
        }

        threadspace_promote_reference (&gsi, lval, 0, tid);
        threadspace_promote_reference (&gsi, rval1, 1, tid);
        threadspace_promote_reference (&gsi, rval2, 2, tid);       
        break;
      }

      case GIMPLE_CALL:
      {            
        for (int i = 0; i < gimple_call_num_args (stmt); i++)
        {
          rval1 = gimple_call_arg (stmt, i);
          threadspace_promote_reference (&gsi, rval1, i+3, tid);
        }
        
        lval = gimple_call_lhs (stmt);
        threadspace_promote_reference (&gsi, lval, 0, tid);            
        break;

      }

      case GIMPLE_RETURN :
      {
        rval1 = gimple_return_retval(stmt);
        threadspace_promote_reference (&gsi, lval, 0, tid);
        break;
      }

      case GIMPLE_DEBUG:
      case GIMPLE_LABEL :
      case GIMPLE_PREDICT :
      CASE_GIMPLE_UPP :
        break;

      default:
        printf("MAIN:UNKNOWN TREE CODE %d\n",gimple_code(stmt));
        gcc_assert(false);
        break;
    }


  }

  

}

static void 
do_thread_space_promote_rest()
{

  ipa_update_bb_info ();

  struct cgraph_node *node;
  for (node = cgraph_nodes; node; node = node->next)
  {
    if (!valid_function_node_p (node))
      continue;

    if (func_new(node))
      continue;
    switch_to_context(node->decl);


    /* Scan each bb, promote all the references */
    basic_block bb;
    FOR_EACH_BB(bb)
      do_thread_space_promote_bb (bb);      
    
    update_ssa (TODO_update_ssa);
    switch_off_context();
  }

}



static void 
do_ts_promote_allocation()
{
  struct cgraph_node *node;
  for (node = cgraph_nodes; node; node = node->next)
  {
    if (!valid_function_node_p (node))
      continue;

    switch_to_context(node->decl);

    /* Scan each bb, promote all allocation sites */
    basic_block bb;
    FOR_EACH_BB(bb)
    {    
      
      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
      {
        gimple stmt = gsi_stmt (gsi);

        if (gimple_code (stmt) == GIMPLE_CALL)
        {
          tree callee_decl = gimple_call_fndecl (stmt);
          if ( !callee_decl )
            continue;

          /* Find every SIZEOF_EXPR that is input to size */            
          if ( DECL_FUNCTION_CODE (callee_decl) == BUILT_IN_MALLOC )
          {
            tree size = gimple_call_arg (stmt, 0);               
            tree num = build1 (CONVERT_EXPR, long_unsigned_type_node, thread_num_var);      
            size = build2 (MULT_EXPR, size_type_node, num, size);
            size = force_gimple_operand_gsi (&gsi, size, true, NULL, true, GSI_SAME_STMT);  
            gimple_call_set_arg (stmt, 0, size);
            mark_symbols_for_renaming (stmt);
          }
          else if ( DECL_FUNCTION_CODE (callee_decl) == BUILT_IN_CALLOC)
          {
            tree size = gimple_call_arg (stmt, 1);               
            tree num = build1 (CONVERT_EXPR, long_unsigned_type_node, thread_num_var);      
            size = build2 (MULT_EXPR, size_type_node, num, size);
            size = force_gimple_operand_gsi (&gsi, size, true, NULL, true, GSI_SAME_STMT);  
            gimple_call_set_arg (stmt, 1, size);
            mark_symbols_for_renaming (stmt);
          }
          else if ( DECL_FUNCTION_CODE (callee_decl) == BUILT_IN_REALLOC)
          {
            tree size = gimple_call_arg (stmt, 1);               
            tree num = build1 (CONVERT_EXPR, long_unsigned_type_node, thread_num_var);      
            size = build2 (MULT_EXPR, size_type_node, num, size);
            size = force_gimple_operand_gsi (&gsi, size, true, NULL, true, GSI_SAME_STMT);  
            gimple_call_set_arg (stmt, 1, size);
            mark_symbols_for_renaming (stmt);
          }
        }
      }    
    }
    switch_off_context();
  }


}






static void 
do_thread_space_promote_loop(loop_p loop)
{

  ipa_update_bb_info ();

  /* Set thread id for each thread in loop header */
  basic_block header = loop_preheader_edge (loop)->dest;
  gimple_stmt_iterator gsi = gsi_after_labels(header);
  tree tid = func_thread_id (cgraph_get_node(current_function_decl));  
  tree tid_func = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
  gimple call = gimple_build_call (tid_func, 0, NULL_TREE);    
  tree lhs = build_decl (UNKNOWN_LOCATION, VAR_DECL, NULL_TREE, integer_type_node);
  DECL_ARTIFICIAL(lhs) = 1;
  gimple_add_tmp_var (lhs);
  add_referenced_var (lhs);
  gimple_call_set_lhs (call, lhs);
  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
  mark_symbols_for_renaming (call);  
  tree rhs = build1 (CONVERT_EXPR, unsigned_type_node, lhs);      
  rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
  gimple assign = gimple_build_assign (tid, rhs);
  gsi_insert_before (&gsi, assign, GSI_SAME_STMT);          
  mark_symbols_for_renaming (assign);


  /* reset thread id in loop exit */
  basic_block postexit = loop_exit_bb (loop);  
  edge e = split_block (postexit, NULL);
  postexit= e->dest;    
  gsi = gsi_after_labels(postexit);
  tid_func = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM];
  call = gimple_build_call (tid_func, 0, NULL_TREE);    
  lhs = build_decl (UNKNOWN_LOCATION, VAR_DECL, NULL_TREE, integer_type_node);
  DECL_ARTIFICIAL(lhs) = 1;
  gimple_add_tmp_var (lhs);
  add_referenced_var (lhs);
  gimple_call_set_lhs (call, lhs);
  gsi_insert_before (&gsi, call, GSI_SAME_STMT);
  mark_symbols_for_renaming (call);  
  rhs = build1 (CONVERT_EXPR, unsigned_type_node, lhs);      
  rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
  assign = gimple_build_assign (tid, rhs);
  gsi_insert_before(&gsi, assign, GSI_SAME_STMT);          
  mark_symbols_for_renaming (assign);

  update_ssa (TODO_update_ssa);
  

  /* Promote for the loop and involved callee only */  

  IPA_NODE_SET callees;
  collect_loop_callees (loop, callees);

  /* Promote for the loop body */
  basic_block bb, *bbs;
  bbs = get_loop_body (loop);
  for (int i = 0; i < loop->num_nodes; i++)
  {
    bb = bbs[i];
    do_thread_space_promote_bb (bb);            
  }  
  free (bbs);
  update_ssa (TODO_update_ssa);



  /* Promote for callees */
  for (IPA_NODE_SET::iterator iter = callees.begin(); iter != callees.end(); ++iter )
  {
    struct cgraph_node *callee = *iter;
    gcc_assert (valid_function_node_p (callee));
  
    switch_to_context(callee->decl);
     
    /* Scan each bb, promote all the references */
    basic_block bb;
    FOR_EACH_BB(bb)
      do_thread_space_promote_bb (bb);           
     
    update_ssa (TODO_update_ssa);
    switch_off_context();

  }
  
}



#endif

#define POINTER_PROMOTE 1

#if POINTER_PROMOTE


/* Return the name of type T.  */

extern "C" const char *
Type_name (tree t)
{
  tree n = TYPE_NAME (t);

	if (!n)
		return NULL;

  if (TREE_CODE (n) == IDENTIFIER_NODE)
    return IDENTIFIER_POINTER (n);
  else
    return IDENTIFIER_POINTER (DECL_NAME (n));

}


void
Get_type_name (tree t, std::string &name)
{

  if (Type_name(t))
  {
    name = Type_name(t);
    return;
  }

  if (TREE_CODE(t) == POINTER_TYPE)
  {
    tree type_pointed = TREE_TYPE(t);
    Get_type_name (type_pointed, name);
    name += "*";
  }
  
  else if (TREE_CODE(t) == FUNCTION_TYPE)
  {
    name = "<func ";
    char buf[100];
    sprintf (buf, "%x>", TYPE_UID (t));
    name += buf;
    name += "*";
  }
  else
  {    
    char buf[100]; 
    get_expr_string (t, buf);
    name = buf;
  }  
}



/*
  Promte a pointer type to 
  struct 
  {
    type    pointer;
    size_t  size;
  }
  which record the orginal size of the memory chunk that the pointer points to
*/
tree create_fields_of_pointer_structure (tree type)
{
  gcc_assert (TREE_CODE(type) == POINTER_TYPE);
  
  tree id1 = get_identifier (POINTER_DATA_FILED_NAME);
  tree fld_decl_1 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, id1, type);

	/* int size */
  tree id2 = get_identifier (POINTER_SPAN_FILED_NAME);	
	tree fld_decl_2 = build_decl (UNKNOWN_LOCATION, FIELD_DECL, id2, size_type_node);

  TREE_CHAIN (fld_decl_1) = fld_decl_2;
  return fld_decl_1;
}


/*
  Promte a pointer type to 
  struct 
  {
    type    pointer;
    size_t  size;
  }
  which record the orginal size of the memory chunk that the pointer points to

*/
static tree promote_pointer_type(tree type)
{

  if (is_threadspace_type(type))
    return type;

  tree ret = find_threadspace_type(type) ;
	if (ret)
		return ret;

  switch ( TREE_CODE(type) )
  {
    case VOID_TYPE:  
    case INTEGER_TYPE:
    case REAL_TYPE:
       return type;

    case POINTER_TYPE:
    {
      tree type_pointed = promote_pointer_type (TREE_TYPE(type));
      tree type_pointer = type;
      if ( type_pointed != TREE_TYPE(type) )
      {
        type_pointer = build_pointer_type(type_pointed);
        set_type_quals(type_pointer, TYPE_QUALS (type)); 
      }


      tree ref = make_node (RECORD_TYPE);
      tree attributes = NULL_TREE;
      TYPE_SIZE (ref) = 0;
      TYPE_PACKED (ref) = 0;
      decl_attributes (&ref, attributes, (int) ATTR_FLAG_TYPE_IN_PLACE);
      set_type_quals(ref, TYPE_QUALS (type)); 
      
      
      tree fields = create_fields_of_pointer_structure (type_pointer);
      for (tree x = fields; x; x = TREE_CHAIN (x))
      {
        DECL_CONTEXT (x) = ref;
        DECL_PACKED (x) |= TYPE_PACKED (ref);
      }
      
      
      TYPE_FIELDS (ref) = fields;
      layout_type (ref);

      std::string buf, str;
      Get_type_name (type_pointer, buf);
      str = "{";
      str += buf;
      str += "}";        
      tree name = get_identifier (str.c_str());      
      TYPE_NAME (ref) = name;
      
      hash_add_threadspace_type (type, ref);
      
      return ref;
    }    
    

    case ARRAY_TYPE :
    {
      tree etype = promote_pointer_type (TREE_TYPE(type));
      if ( etype != TREE_TYPE(type) )
      {      
        tree atype = build_array_type (etype, TYPE_DOMAIN(type));
        hash_add_threadspace_type (type, atype);
        set_type_quals(atype, TYPE_QUALS (type)); 
        type = atype;
      }
      return type;
    }

    case RECORD_TYPE :
    case UNION_TYPE :
    {
      gcc_assert (Type_name(type));
      tree field = TYPE_FIELDS (type);
      if ( !field || in_system_header_at(DECL_SOURCE_LOCATION(field)) ||  
           DECL_SOURCE_LOCATION(field) == BUILTINS_LOCATION )
        return type;

      tree x;
      tree fields = NULL_TREE;
      tree last_fld;

      tree stype = make_node (RECORD_TYPE);      
      set_type_quals(stype, TYPE_QUALS (type)); 
      hash_add_threadspace_type (type, stype);

      bool changed = false;
      for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
      {
        gcc_assert(TREE_CODE (field) == FIELD_DECL);
        tree newtype = promote_pointer_type (TREE_TYPE(field));
        if (newtype != TREE_TYPE(field))
          changed = true;
        tree fld_decl = build_decl (DECL_SOURCE_LOCATION(field), FIELD_DECL, DECL_NAME(field), newtype);
        hash_add_threadspace_type ( field, fld_decl );        
        if (!fields)
          last_fld = fields = fld_decl;
        else
        {
          TREE_CHAIN (last_fld) = fld_decl;        
          last_fld = fld_decl;
        }
      }

      decl_attributes (&stype, TYPE_ATTRIBUTES(type), (int) ATTR_FLAG_TYPE_IN_PLACE);
      for (x = fields; x; x = TREE_CHAIN (x))
      {
        DECL_CONTEXT (x) = stype;
        DECL_PACKED (x) |= TYPE_PACKED (stype);
      }
      TYPE_FIELDS (stype) = fields;
      layout_type (stype);

      TYPE_NAME (stype) = TYPE_NAME (type);
      
      if ( changed )
      {
        std::string buf, str;
        Get_type_name (type, buf);
        str = "{";
        str += buf;
        str += "}";        
        tree name = get_identifier (str.c_str());      
        TYPE_NAME (stype) = name;
      }
      return stype;
        
    }


    case FUNCTION_TYPE :
    {
      tree newtype = copy_node (type);
      TYPE_POINTER_TO (newtype) = NULL;

      for (tree p = TYPE_ARG_TYPES (newtype); p; p = TREE_CHAIN (p))
      {
        tree arg_type = TREE_VALUE (p);
        TREE_VALUE (p) = promote_pointer_type (arg_type);       
      }

      tree rettype = TREE_TYPE (newtype);
      TREE_TYPE (newtype) = promote_pointer_type (rettype);

      return newtype;      
    }

    case ENUMERAL_TYPE :
      gcc_assert (Type_name(type));
      return type;

    default:      
      gcc_unreachable();
      return type;

  }

  

}




static tree
pointer_promote_init(tree shadowtype, tree datainit)
{
	tree vals, cur, last, fld;
	tree fld2_value = NULL_TREE, fld2_last;
	int i;
	
	/* Field data */
	fld = TYPE_FIELDS (shadowtype);
	cur = make_node (TREE_LIST);
	TREE_PURPOSE (cur) = fld;
	TREE_VALUE (cur) = datainit;
	last = cur;
	vals = cur;
	//return build_constructor_from_list (shadowtype, vals);

  gcc_assert (TREE_CODE(TREE_TYPE(datainit)) == POINTER_TYPE);
  tree span = build_int_cst_type (size_type_node, 0);
  switch (TREE_CODE(datainit))
  {
    case INTEGER_CST:
      // rhs is a constant number, e.g. zero
      gcc_assert (int_cst_value(datainit) == 0);
      break;
      
    case ADDR_EXPR:
      span = TYPE_SIZE_UNIT (TREE_TYPE(TREE_TYPE(datainit)));   
      break;
    
    default:
      gcc_unreachable();
      break;  
  }


	/* Field info */
	fld = DECL_CHAIN (fld);
	cur = make_node (TREE_LIST);
	TREE_PURPOSE (cur) = fld;
	TREE_VALUE (cur) = span;
	TREE_CHAIN (last) = cur;
	last = cur;


	return build_constructor_from_list (shadowtype, vals);
}

static tree pointer_promote_value(tree value, tree shadowtype, bool *promotable)
{
  if ( types_compatible_p(TREE_TYPE(value),shadowtype) )
    return value;

  if ( TREE_CODE (value) == CONSTRUCTOR )
  {
    unsigned int i;
    tree val, field;
    if ( TREE_CODE(shadowtype) == RECORD_TYPE || TREE_CODE(shadowtype) == UNION_TYPE )
    {
      field = TYPE_FIELDS (shadowtype);
      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS(value), i, val)
      {
        /* promote val to shadow struct */      
        tree newval = pointer_promote_value(val, TREE_TYPE(field), NULL);        
        CONSTRUCTOR_REPLACE_ELT (CONSTRUCTOR_ELTS(value), i, field, newval);
        field = TREE_CHAIN(field);
      }
    }
    
    else if ( TREE_CODE(shadowtype) == ARRAY_TYPE )
    {
      FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS(value), i, field, val)
      {
        /* promote val to shadow struct */      
        tree newval = pointer_promote_value(val, TREE_TYPE(shadowtype), NULL);        
        CONSTRUCTOR_REPLACE_ELT (CONSTRUCTOR_ELTS(value), i, field, newval);
      }
    }

    TREE_TYPE(value) = shadowtype;
    return value;
  }
  else 
    return pointer_promote_init (shadowtype, value);

}

static void
forward_argv(struct cgraph_node *node)
{

  tree argc = DECL_ARGUMENTS (node->decl); 
  tree argv = DECL_CHAIN (argc);

  /* New argv_S */    
  tree type = promote_pointer_type (TREE_TYPE(argv));
  tree field = TYPE_FIELDS (type); 
  tree name = get_identifier ("argv_S");      
  tree argv_S = build_decl (UNKNOWN_LOCATION, VAR_DECL, name, type);
  gimple_add_tmp_var (argv_S);
  add_referenced_var (argv_S);


  edge e = split_block (ENTRY_BLOCK_PTR, NULL);  
  basic_block bb = e->dest;

  /* argv_S.pointer = alloca (argc * sizeof(type_pointed(typeof(argv_S.pointer))) ) */
  gimple_stmt_iterator gsi = gsi_last_bb (bb);
  tree size = build1 (SIZEOF_EXPR, size_type_node,TREE_TYPE(TREE_TYPE(field))); 
  tree num = build1 (CONVERT_EXPR, size_type_node, argc);
  size = build2 (MULT_EXPR, size_type_node, num, size);
  size = force_gimple_operand_gsi (&gsi, size, true, NULL, true, GSI_SAME_STMT);  
  gimple alloc_call = gimple_build_call ( built_in_decls[BUILT_IN_ALLOCA], 1, size);

  tree tmp_lhs = build_decl (UNKNOWN_LOCATION, VAR_DECL, NULL, TREE_TYPE(field));   
  DECL_ARTIFICIAL (tmp_lhs) = 1;
  gimple_add_tmp_var (tmp_lhs);
  add_referenced_var (tmp_lhs);
  gsi_insert_before (&gsi, alloc_call, GSI_SAME_STMT);  
  gimple_call_set_lhs (alloc_call, tmp_lhs);  
  mark_symbols_for_renaming (alloc_call);
  
  tree lhs = build3 (COMPONENT_REF, TREE_TYPE(field), argv_S, field,  NULL );
  lhs = force_gimple_operand_gsi (&gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
  gimple assign = gimple_build_assign (lhs, tmp_lhs);
  gsi_insert_before (&gsi, assign, GSI_SAME_STMT);       
  mark_symbols_for_renaming (assign);
  

  

  /* Copy the argv array to argv_S.pointer */    
  gcc_assert (single_succ_p(bb));
  e = single_succ_edge(bb);  
  tree idx_type = TREE_TYPE(argc);
  tree initial_value = build_int_cst_type (idx_type, 0);
  tree stride = build_int_cst_type (idx_type, 1);
  tree upper_bound = argc;  
  tree iv, iv_before, iv_after;
  iv = create_tmp_var (idx_type, "arg_i");
  loop_p loop = create_empty_loop_on_edge (e, initial_value, stride, upper_bound, iv, &iv_before, &iv_after,
                                           bb->loop_father);
  
  gsi = gsi_last_bb (loop->header);

  // argv_S.pointer[i].pointer
  tree type_pointed = TREE_TYPE( TREE_TYPE(field) );      
  tree offset = build1(CONVERT_EXPR, size_type_node, iv_before);  
  size = build1 (SIZEOF_EXPR, size_type_node, type_pointed); 
  offset = build2 (MULT_EXPR, size_type_node, offset, size);
  lhs = build3 (COMPONENT_REF, TREE_TYPE(field), argv_S, field,  NULL );  // argv_S.pointer
  lhs = build2 (POINTER_PLUS_EXPR, TREE_TYPE(field), lhs, offset);
  lhs = build_simple_mem_ref(lhs);      // argv_S.pointer[i]
  tree pointer_field = TYPE_FIELDS (type_pointed);
  lhs = build3 (COMPONENT_REF, TREE_TYPE(pointer_field), lhs, pointer_field,  NULL );  // argv_S.pointer[i].pointer
  
  // argv[i]
  type_pointed = TREE_TYPE( TREE_TYPE(argv) );      
  offset = build1 (CONVERT_EXPR, size_type_node, iv_before);
  size = build1 (SIZEOF_EXPR, size_type_node, type_pointed); 
  offset = build2 (MULT_EXPR, size_type_node, offset, size);
  tree rhs = build2 (POINTER_PLUS_EXPR, TREE_TYPE(argv), argv, offset);
  rhs = build_simple_mem_ref(rhs);     
 
  lhs = force_gimple_operand_gsi (&gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
  rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
  assign = gimple_build_assign (lhs, rhs);
  gsi_insert_before (&gsi, assign, GSI_SAME_STMT);       
  mark_symbols_for_renaming (assign);


  /* Forward every apperance of argv to argv_S.pointer */
  tree newargv = build3 (COMPONENT_REF, TREE_TYPE(field), argv_S, field,  NULL );  // argv_S.pointer
  
  basic_block bb1;
  FOR_EACH_BB(bb1)  {    

    if ( bb1 == bb || bb1 == loop->header)
      continue;

    for (gimple_stmt_iterator gsi = gsi_start_bb (bb1); !gsi_end_p (gsi); gsi_next (&gsi))
    {
      tree lval = NULL;
      tree rval1 = NULL;
      tree rval2 = NULL;
      gimple stmt = gsi_stmt (gsi);
      tree tmpargv;
      switch (gimple_code (stmt))
      {
        case GIMPLE_COND:
        {
          lval = gimple_cond_lhs (stmt);
          rval1 = gimple_cond_rhs (stmt);
          if ( lval == argv )
          {
            tmpargv = force_gimple_operand_gsi (&gsi, newargv, true, NULL, true, GSI_SAME_STMT);  
            gimple_cond_set_lhs (stmt, tmpargv);
            mark_symbols_for_renaming (stmt);
          }
          if ( rval1 == argv )
          {
            tmpargv = force_gimple_operand_gsi (&gsi, newargv, true, NULL, true, GSI_SAME_STMT);  
            gimple_cond_set_rhs (stmt, tmpargv);
            mark_symbols_for_renaming (stmt);
          }
          break;
        }
  
        case GIMPLE_SWITCH:
        {
          lval = gimple_switch_index (stmt);
          if ( lval == argv )
          {
            tmpargv = force_gimple_operand_gsi (&gsi, newargv, true, NULL, true, GSI_SAME_STMT);  
            gimple_switch_set_index (stmt, tmpargv);
            mark_symbols_for_renaming (stmt);
          }
          break;
        }
  
        case GIMPLE_ASSIGN:
        {
          lval = gimple_assign_lhs (stmt);
          if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
          {
            rval1 = gimple_assign_rhs1 (stmt);
            rval2 = gimple_assign_rhs2 (stmt);
          }
          else
          {
            rval1 = gimple_assign_rhs1 (stmt);
          }
        
          if ( lval == argv )
          {
            tmpargv = force_gimple_operand_gsi (&gsi, newargv, false, NULL, true, GSI_SAME_STMT);  
            gimple_assign_set_lhs (stmt, tmpargv);
            mark_symbols_for_renaming (stmt);
          }
          if ( rval1 == argv )
          {
            tmpargv = force_gimple_operand_gsi (&gsi, newargv, true, NULL, true, GSI_SAME_STMT);  
            gimple_assign_set_rhs1(stmt, tmpargv);
            mark_symbols_for_renaming (stmt);
          }
          if ( rval2 == argv )
          {
            tmpargv = force_gimple_operand_gsi (&gsi, newargv, true, NULL, true, GSI_SAME_STMT);  
            gimple_assign_set_rhs2(stmt, tmpargv);
            mark_symbols_for_renaming (stmt);
          }
          break;
        }
  
        case GIMPLE_CALL:
        {            
          for (int i = 0; i < gimple_call_num_args (stmt); i++)
          {
            rval1 = gimple_call_arg (stmt, i);
            if ( rval1 == argv )
            {
              tmpargv = force_gimple_operand_gsi (&gsi, newargv, true, NULL, true, GSI_SAME_STMT);  
              gimple_call_set_arg (stmt, i, tmpargv);
              mark_symbols_for_renaming (stmt);
            }
          }
          break;        
        }
  
        CASE_GIMPLE_UPP:
        case GIMPLE_RETURN :  
        case GIMPLE_LABEL :
        case GIMPLE_PREDICT :
        case GIMPLE_DEBUG:
          break;
  
        default:
          printf("MAIN:UNKNOWN TREE CODE %d\n",gimple_code(stmt));
          gcc_assert(false);
          break;
      }
  
  
    }    
  }

}


static void 
do_ts_promote_pointer_decl()
{

  /* Promote for global variables */
  struct varpool_node *var;
  for (var = varpool_nodes; var; var = var->next)
  {
    tree decl = var->decl;

    if (in_system_header_at(DECL_SOURCE_LOCATION(decl)) || DECL_SOURCE_LOCATION(decl) == BUILTINS_LOCATION  )
      continue;

    tree type = promote_pointer_type (TREE_TYPE(decl));

    if (!is_threadspace_type(type))
      continue;
    
    TREE_TYPE(decl) = type; 
    relayout_decl(decl);

    /* promote initializer */
    if ( DECL_INITIAL (decl) )
    {
      tree init = DECL_INITIAL (decl);
      DECL_INITIAL (decl) = pointer_promote_value ( init, TREE_TYPE(decl), NULL );
    }  
  }


  /* Promote for local variables and formal parameters */  
  struct cgraph_node *node;
  for (node = cgraph_nodes; node; node = node->next)
  {
    if (!valid_function_node_p (node))
      continue;
    
    if (in_system_header_at(DECL_SOURCE_LOCATION(node->decl)) )
      continue;

    switch_to_context (node->decl);
  
    /* Promote formal parameters and return val */
    if ( !is_main_procedure(node) )
    {
      tree newtype = promote_pointer_type (TREE_TYPE(node->decl));
      TREE_TYPE(node->decl) = newtype;

      for (tree parm = DECL_ARGUMENTS (node->decl); parm; parm = DECL_CHAIN (parm))
      {  
        tree type = promote_pointer_type (TREE_TYPE(parm));
        TREE_TYPE(parm) = type; 
        DECL_ARG_TYPE(parm) = type;  
        relayout_decl(parm);
      }

      tree ret = DECL_RESULT (node->decl);
      TREE_TYPE(ret) = TREE_TYPE (newtype); 
      relayout_decl(ret);        
    }   

   
    /* Promote local variables */
    unsigned ix;
    tree var;
    struct function *dsf = DECL_STRUCT_FUNCTION (node->decl);

    FOR_EACH_LOCAL_DECL (dsf, ix, var)
    {
      if ( TREE_CODE(var) != VAR_DECL )
        continue;

      if ( TREE_STATIC(var) )
        continue;

      tree type = TREE_TYPE(var);

      if ( !is_gimple_reg (var) )
      {
        type = promote_pointer_type (type);
        TREE_TYPE(var) = type; 
        relayout_decl(var);
      }
      else if (TREE_CODE(type) == POINTER_TYPE)
      {
        tree type_pointed = TREE_TYPE(type);
        type_pointed = promote_pointer_type (type_pointed); 
        tree type_pointer = build_pointer_type(type_pointed);
        //set_type_quals(type_pointer, TYPE_QUALS (type)); 
        TREE_TYPE(var) = type_pointer; 
      
        /* create spanning variable for saving size */
        
        char buf[100];  buf[0]='\0';
        if ( DECL_NAME(var) )
          sprintf(buf, "%s_span", IDENTIFIER_POINTER (DECL_NAME (var)) );          
        else
          sprintf(buf, "D.%u_span", DECL_UID (var) );          

        tree name = get_identifier (buf);      
        tree size_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, name, size_type_node);
        DECL_ARTIFICIAL (size_decl) = 1;
        gimple_add_tmp_var (size_decl);        
        add_referenced_var (size_decl);
        pointer_span_map[var] = size_decl;        
      }
    }

    switch_off_context ();
    
  }  
  
}


static void
pointer_update_type(tree ref)
{
  switch( TREE_CODE(ref) )
  {
    case VAR_DECL:
    case PARM_DECL:
    case RESULT_DECL:
      break;

    case ADDR_EXPR:
    {
      tree base = TREE_OPERAND(ref, 0);
      pointer_update_type(base);
      TREE_TYPE(ref) = build_pointer_type(TREE_TYPE(base));
      break;
    }

    case SSA_NAME:
    {
      TREE_TYPE(ref) = TREE_TYPE(SSA_NAME_VAR(ref));
      break;
    }

    case ARRAY_REF:
    {
      tree base = TREE_OPERAND(ref, 0);
      pointer_update_type(base);
      TREE_TYPE(ref) = TREE_TYPE(TREE_TYPE(base));
      break;
    }

		case COMPONENT_REF:
    {
      tree base = TREE_OPERAND(ref, 0);
      pointer_update_type(base);

      tree type = TREE_TYPE(base);
      //gcc_assert( TREE_CODE(type)==RECORD_TYPE || TREE_CODE(type)==UNION_TYPE );
      
      tree field = TREE_OPERAND(ref, 1);
      tree newfield = find_threadspace_type(field) ;
      if (!newfield)
        newfield = field;
      TREE_OPERAND(ref, 1) = newfield;
      TREE_TYPE(ref) = TREE_TYPE(newfield);
      break;
    }

    case MEM_REF :
    {
      tree base = TREE_OPERAND(ref, 0);
      tree ofst = TREE_OPERAND(ref, 1);
      tree type = TREE_TYPE (ref);
      tree base_type = TREE_TYPE (base);
      tree type_pointed = TREE_TYPE (TREE_TYPE(base));

      enum { T_UNKNOWN, T_DEREF, T_DEREF_ARR } kind;
      if ( types_compatible_p(type, type_pointed) )
        kind = T_DEREF;
      else if ( TREE_CODE(type) && useless_type_conversion_p(type, type_pointed) )
        kind = T_DEREF;        
      else if ( TREE_CODE(type_pointed) == ARRAY_TYPE && type == TREE_TYPE(type_pointed))
        kind = T_DEREF_ARR;
      else if ( is_threadspace_type(type_pointed) && find_threadspace_type(type) == type_pointed )
        kind = T_DEREF;
      else 
        kind = T_UNKNOWN;
      
      pointer_update_type(base);    
      type_pointed = TREE_TYPE (TREE_TYPE(base));
      if ( kind == T_DEREF_ARR )
        type_pointed = TREE_TYPE (type_pointed);
       
      if ( kind != T_UNKNOWN && AGGREGATE_TYPE_P(type_pointed) )
      {
        /* Since we may have promoted the structure */
        TREE_TYPE(ref) = type_pointed;
      }
      break;
    }
      
    case SIZEOF_EXPR:
    {      
      /* What if the sizeof concerns a system library expression */
      tree type = TREE_OPERAND(ref, 0);
      type = promote_pointer_type (type);
      TREE_OPERAND(ref, 0) = type;
      break;
    }

    case CONSTRUCTOR:
    {
      unsigned i;
      tree val;
      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS(ref), i, val)
        pointer_update_type (val);
      break;
    }



    case FUNCTION_DECL :
    case INTEGER_CST:
    case REAL_CST:
    case STRING_CST:
      break;

    default:
      gcc_assert(false);
      break;
  }
}


static tree
pointer_promote_ref(gimple_stmt_iterator * gsi, tree ref, int opnd)
{
  if (!gsi)
    return ref;

  tree type = TREE_TYPE(ref);
  if ( !is_threadspace_type(type) )
    return ref;
  
  // promote to a component_ref  
  gimple stmt = gsi_stmt (*gsi);
  tree field = TYPE_FIELDS (type);        
  /*
  int orig_quals = TYPE_QUALS (strip_array_types (type));
  TYPE_READONLY (TREE_TYPE(field)) = (orig_quals & TYPE_QUAL_CONST) != 0;
  TYPE_VOLATILE (TREE_TYPE(field)) = (orig_quals & TYPE_QUAL_VOLATILE) != 0;
  TYPE_RESTRICT (TREE_TYPE(field)) = (orig_quals & TYPE_QUAL_RESTRICT) != 0;
  TYPE_ADDR_SPACE (TREE_TYPE(field)) = DECODE_QUAL_ADDR_SPACE (orig_quals);
  */
  tree newref = build3 (COMPONENT_REF, TREE_TYPE(field), ref, field,  NULL );
  newref = force_gimple_operand_gsi (gsi, newref, false, NULL, true, GSI_SAME_STMT);  
  gimple_set_op( stmt, opnd, newref );
  return newref;

}


static tree 
pointer_promote_reference(gimple_stmt_iterator * gsi, tree ref, int opnd)
{
  if (!ref)
    return ref;
 

  switch( TREE_CODE(ref) )
  {
    case SSA_NAME :
      pointer_update_type(ref);
      return ref;
      
    case PARM_DECL :
    case VAR_DECL :     
		case COMPONENT_REF:
    case ARRAY_REF:
    case MEM_REF :
    {
      pointer_update_type(ref);
      return pointer_promote_ref(gsi, ref, opnd);
    }
 
    case ADDR_EXPR :
    {
      tree base = TREE_OPERAND(ref, 0);
      pointer_update_type(base);
      return ref;
    }

    case SIZEOF_EXPR:
      pointer_update_type(ref);
      return ref;

    case CONSTRUCTOR :
      /* Usually the constructor is empty as it must appear in a local initialization */      
    case INTEGER_CST :
    case REAL_CST :
    case STRING_CST :
      return ref;


    default:
      gcc_unreachable();
      break;
  }

  return ref;

}




 




static void
pointer_promote_assignment(gimple_stmt_iterator * gsi)
{

  /* Add assignment between the span field */

  gimple stmt = gsi_stmt (*gsi);
  tree lval = gimple_assign_lhs (stmt);
  tree rval1 = NULL_TREE, rval2 = NULL_TREE;
  if (gimple_assign_rhs_class (stmt) == GIMPLE_BINARY_RHS)
  {
    rval1 = gimple_assign_rhs1 (stmt);
    rval2 = gimple_assign_rhs2 (stmt);
  }
  else
  {
    rval1 = gimple_assign_rhs1 (stmt);
  }
  
  lval = pointer_promote_reference (gsi, lval, 0); 
  rval1 = pointer_promote_reference (gsi, rval1, 1);
  rval2 = pointer_promote_reference (gsi, rval2, 2);      

  tree lhs = NULL_TREE, rhs = NULL_TREE;  
  
  if (is_a_promoted_pointer (lval) )
  {
    tree base = TREE_OPERAND (lval, 0);
    base = unshare_expr (base);
    tree field = TYPE_FIELDS (TREE_TYPE(base));
    field = DECL_CHAIN (field);
    lhs = build3 (COMPONENT_REF, TREE_TYPE(field), base, field,  NULL );   
  }
  else if (TREE_CODE(TREE_TYPE(lval)) == POINTER_TYPE)
  {
    if ( TREE_CODE(lval) == SSA_NAME)
    {
      lhs = pointer_span_map[SSA_NAME_VAR(lval)];
      gcc_assert (lhs);
    }    
  }

  if (is_a_promoted_pointer (rval1) )
  {
    tree base = TREE_OPERAND (rval1, 0);
    base = unshare_expr (base);
    tree field = TYPE_FIELDS (TREE_TYPE(base));
    field = DECL_CHAIN (field);
    rhs = build3 (COMPONENT_REF, TREE_TYPE(field), base, field,  NULL );   
  }
  else if (TREE_CODE(TREE_TYPE(rval1)) == POINTER_TYPE)
  {
    switch (TREE_CODE(rval1))
    {
      case INTEGER_CST:
        // rhs is a constant number, e.g. zero
        //gcc_assert (int_cst_value(rval1) == 0);
        rhs = build_int_cst_type (size_type_node, 0);
        break;
        
      case ADDR_EXPR:
        rhs = build1 (SIZEOF_EXPR, size_type_node, TREE_TYPE(TREE_TYPE(rval1)));   
        break;

      case SSA_NAME:        
        rhs = pointer_span_map[SSA_NAME_VAR(rval1)];
        gcc_assert (rhs);
        break;

      case VAR_DECL:
        gcc_assert (in_system_header_at(DECL_SOURCE_LOCATION(rval1)) );
        rhs = build_int_cst_type (size_type_node, 0);
        break;

      default:
        rhs = build_int_cst_type (size_type_node, 0);
        break;  
    }
  
  }
   
  // gcc_assert (lhs && rhs || !lhs && !rhs);

  if ( lhs && rhs )
  {
    lhs = force_gimple_operand_gsi (gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
    rhs = force_gimple_operand_gsi (gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
    gimple assign = gimple_build_assign (lhs, rhs);
    gsi_insert_after (gsi, assign, GSI_NEW_STMT);
    mark_symbols_for_renaming (assign);
  }

}






static void
pointer_promote_funccall(gimple_stmt_iterator * gsi)
{

  /* Exception : Malloc, calloc, Realloc */
  gimple stmt = gsi_stmt (*gsi);
  tree fndecl = gimple_call_fndecl (stmt);

  if ( fndecl )
  {
    /* Get heap size from allocation site */
    tree size = NULL_TREE;
    
    switch  ( DECL_FUNCTION_CODE (fndecl) ) 
    {
      case BUILT_IN_MALLOC:
      case BUILT_IN_ALLOCA:
      {
        size = gimple_call_arg (stmt, 0);
        size = pointer_promote_reference (gsi, size, 0 + 3); 
        break;
      }
      case BUILT_IN_CALLOC:
      {
        tree num = gimple_call_arg (stmt, 0);
        size = gimple_call_arg (stmt, 1);
        size = pointer_promote_reference (gsi, size, 1 + 3); 
        size = gimplify_build2 (gsi, MULT_EXPR, size_type_node, num, size);
        break;
      }
      case BUILT_IN_REALLOC:
      {
        size = gimple_call_arg (stmt, 1);
        size = pointer_promote_reference (gsi, size, 1 + 3); 
        break;
      }
      default :
        break;       
    }
         

    if ( size )
    {
      // handle return value
      tree lval = gimple_call_lhs (stmt);  
      pointer_update_type(lval);

      if (TREE_CODE(TREE_TYPE(lval)) == POINTER_TYPE)
      {
        // Make assignment from size to span field parameter
        tree lhs = pointer_span_map[SSA_NAME_VAR(lval)];
        gcc_assert (lhs);
        gimple assign = gimple_build_assign (lhs, size);
        gsi_insert_after (gsi, assign, GSI_NEW_STMT);  
        mark_symbols_for_renaming (assign);
      }
      else if (is_a_promoted_pointer (lval) )
      {
        // Field pointer
        tree base = TREE_OPERAND (lval, 0);
        tree field = TYPE_FIELDS (TREE_TYPE(base));
        tree lhs = build3 (COMPONENT_REF, TREE_TYPE(field), base, field,  NULL );   
        lhs = force_gimple_operand_gsi (gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
        gimple_call_set_lhs (stmt, lhs);
        mark_symbols_for_renaming (stmt);

        // Field span
        base = unshare_expr (base);
        field = DECL_CHAIN (field);
        lhs = build3 (COMPONENT_REF, TREE_TYPE(field), base, field,  NULL );   
        lhs = force_gimple_operand_gsi (gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
        gimple assign = gimple_build_assign (lhs, size);
        gsi_insert_after (gsi, assign, GSI_NEW_STMT);          
        mark_symbols_for_renaming (assign);
        
      }
      else
        gcc_unreachable();

      return;
    }
    else 
    {
      cgraph_node_ptr node = cgraph_get_node(fndecl);
      if (!node || !valid_function_node_p (node))
      {
        for (int i = 0; i < gimple_call_num_args (stmt); i++)
          pointer_promote_reference (gsi, gimple_call_arg (stmt, i), i + 3); 
        pointer_promote_reference (gsi, gimple_call_lhs(stmt), 0);         
        mark_symbols_for_renaming (stmt);
        return;
      }
    }
    
  }


  tree fn = gimple_call_fn (stmt);
  pointer_update_type (fn);


  /* Pass and receive spanning fields to or from the function call */

  for (int i = 0; i < gimple_call_num_args (stmt); i++)
  {
    tree rval = gimple_call_arg (stmt, i);
    pointer_update_type(rval);
    
    if ( TREE_CODE(TREE_TYPE(rval)) == POINTER_TYPE && TREE_CODE(rval)!= INTEGER_CST )
    {
      // New tmp structure
      char buf[100];  buf[0]='\0';
      if ( DECL_NAME(SSA_NAME_VAR(rval)) )
        sprintf(buf, "%s_struct", IDENTIFIER_POINTER (DECL_NAME(SSA_NAME_VAR(rval))) );          
      else
        sprintf(buf, "D.%u_struct", DECL_UID(SSA_NAME_VAR(rval)) );          

      
      tree name = get_identifier (buf);     
      tree type = promote_pointer_type (TREE_TYPE(rval));
      //tree type = find_threadspace_type (TREE_TYPE(rval));
      gcc_assert(type);
      tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, name, type);
      DECL_ARTIFICIAL (decl) = 1;
      gimple_add_tmp_var (decl);
      add_referenced_var(decl);

      // Make assignment from actual parameter to tmp

      // Field pointer
      tree field = TYPE_FIELDS (type);        
      tree lhs = build3 (COMPONENT_REF, TREE_TYPE(field), decl, field,  NULL );
      lhs = force_gimple_operand_gsi (gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
      gimple assign = gimple_build_assign (lhs, rval);
      gsi_insert_before (gsi, assign, GSI_SAME_STMT);       
      mark_symbols_for_renaming (assign);

      // Field span
      field = DECL_CHAIN (field);        
      lhs = build3 (COMPONENT_REF, TREE_TYPE(field), decl, field,  NULL );
      lhs = force_gimple_operand_gsi (gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
      tree rhs = pointer_span_map[SSA_NAME_VAR(rval)];
      gcc_assert (rhs);
      assign = gimple_build_assign (lhs, rhs);
      gsi_insert_before (gsi, assign, GSI_SAME_STMT);       
      mark_symbols_for_renaming (assign);

      gimple_call_set_arg (stmt, i, decl);
      mark_symbols_for_renaming (stmt);
    }     
    
  }


  // handle return value
  tree lval = gimple_call_lhs (stmt);  
  if (!lval)
    return ;
  
  pointer_update_type(lval);
  
  if (TREE_CODE(TREE_TYPE(lval)) == POINTER_TYPE)
  {
    tree fn = gimple_call_fn (stmt);
    tree fntype = TREE_TYPE (TREE_TYPE (fn));
    tree rettype = TREE_TYPE (fntype);

    // New tmp structure
    char buf[100];  buf[0]='\0';
    if ( DECL_NAME(SSA_NAME_VAR(lval)) )
      sprintf(buf, "%s_struct", IDENTIFIER_POINTER (DECL_NAME(SSA_NAME_VAR(lval))) );          
    else
      sprintf(buf, "D.%u_struct", DECL_UID(SSA_NAME_VAR(lval)) );          

    tree name = get_identifier (buf);     
    tree type = rettype;
    gcc_assert (is_threadspace_type(type));
    tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, name, type);
    DECL_ARTIFICIAL (decl) = 1;
    gimple_add_tmp_var (decl);
    add_referenced_var(decl);

    // Make assignment from tmp to actual parameter
    gimple_call_set_lhs (stmt, decl);
    mark_symbols_for_renaming (stmt);

    // Field pointer
    tree field = TYPE_FIELDS (type);        
    tree rhs = build3 (COMPONENT_REF, TREE_TYPE(field), decl, field,  NULL );
    rhs = force_gimple_operand_gsi (gsi, rhs, true, NULL, false, GSI_NEW_STMT);  
    gimple assign = gimple_build_assign (lval, rhs);
    gsi_insert_after (gsi, assign, GSI_NEW_STMT);  
    mark_symbols_for_renaming (assign);

    // Field span
    field = DECL_CHAIN (field);        
    rhs = build3 (COMPONENT_REF, TREE_TYPE(field), decl, field,  NULL );
    rhs = force_gimple_operand_gsi (gsi, rhs, true, NULL, false, GSI_NEW_STMT);  
    tree lhs = pointer_span_map[SSA_NAME_VAR(lval)];
    assign = gimple_build_assign (lhs, rhs);
    gsi_insert_after (gsi, assign, GSI_NEW_STMT);      
    mark_symbols_for_renaming (assign);


  }     



}







static void
pointer_promote_return(gimple_stmt_iterator * gsi)
{
  

  /* Pass and receive spanning fields to or from the function call */

  gimple stmt = gsi_stmt (*gsi);
  
  tree rval = gimple_return_retval(stmt);
  if (!rval)
    return;
  
  pointer_update_type(rval);
    
  if (TREE_CODE(TREE_TYPE(rval)) == POINTER_TYPE)
  {
    // New tmp structure
    char buf[100];  buf[0]='\0';
    if ( DECL_NAME(SSA_NAME_VAR(rval)) )
      sprintf(buf, "%s_struct", IDENTIFIER_POINTER (DECL_NAME(SSA_NAME_VAR(rval))) );          
    else
      sprintf(buf, "D.%u_struct", DECL_UID(SSA_NAME_VAR(rval)) );          

    tree name = get_identifier (buf);     
    tree ret = DECL_RESULT (cfun->decl);

 //   tree type = find_threadspace_type (TREE_TYPE(rval));
    tree type = TREE_TYPE(ret);
    gcc_assert(type);
    tree decl = build_decl (UNKNOWN_LOCATION, VAR_DECL, name, type);
    DECL_ARTIFICIAL (decl) = 1;
    gimple_add_tmp_var (decl);
    add_referenced_var(decl);
    //decl = DECL_RESULT (current_function_decl);

    // Make assignment from actual parameter to tmp

    // Field pointer
    tree field = TYPE_FIELDS (type);        
    tree lhs = build3 (COMPONENT_REF, TREE_TYPE(field), decl, field,  NULL );
    lhs = force_gimple_operand_gsi (gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
    gimple assign = gimple_build_assign (lhs, rval);
    gsi_insert_before (gsi, assign, GSI_SAME_STMT);       
    mark_symbols_for_renaming (assign);

    // Field span
    field = DECL_CHAIN (field);        
    lhs = build3 (COMPONENT_REF, TREE_TYPE(field), decl, field,  NULL );
    lhs = force_gimple_operand_gsi (gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
    tree rhs = pointer_span_map[SSA_NAME_VAR(rval)];
    gcc_assert (rhs);
    assign = gimple_build_assign (lhs, rhs);
    gsi_insert_before (gsi, assign, GSI_SAME_STMT);       
    mark_symbols_for_renaming (assign);

    gimple_return_set_retval (stmt, decl);
    mark_symbols_for_renaming (stmt);
  }     
     

}


static void 
do_ts_promote_pointer_reference()
{

  /* Promote each global initializer */
  struct varpool_node *var;
  for (var = varpool_nodes; var; var = var->next)
  {
    tree decl = var->decl;

    if (in_system_header_at(DECL_SOURCE_LOCATION(decl)) || DECL_SOURCE_LOCATION(decl) == BUILTINS_LOCATION  )
      continue;

    if ( DECL_INITIAL (decl) )
    {
      tree init = DECL_INITIAL (decl);
      pointer_update_type (init) ;
    }  
  }

  /* Promote each statement */ 
  struct cgraph_node *node;
  for (node = cgraph_nodes; node; node = node->next)
  {
    if (!valid_function_node_p (node))
      continue;

    switch_to_context(node->decl);
    basic_block bb;
    FOR_EACH_BB(bb)
    {   
      for (gimple_stmt_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
      {
        gimple phi = gsi_stmt (gsi);
        pointer_update_type (gimple_phi_result (phi));
        for (int i = 0; i < gimple_phi_num_args (phi); i++)
        {
          tree strippedrhs = PHI_ARG_DEF (phi, i);
          STRIP_NOPS (strippedrhs);        
          pointer_update_type (strippedrhs);            
        }
      }
      
      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
      {
      	tree lval = NULL;
      	tree rval1 = NULL;
      	tree rval2 = NULL;
      	gimple stmt = gsi_stmt (gsi);

      	switch (gimple_code (stmt))
    	  {
      	  case GIMPLE_COND:
          {
            lval = gimple_cond_lhs (stmt);
            rval1 = gimple_cond_rhs (stmt);
            pointer_promote_reference (&gsi, lval, 0);
            pointer_promote_reference (&gsi, rval1, 1);     
            break;
          }

      	  case GIMPLE_SWITCH:
          {
            lval = gimple_switch_index (stmt);
            pointer_promote_reference (&gsi, lval, 0);
            break;
          }

      	  case GIMPLE_ASSIGN:
          {
            /* Add assignment between the span field */
            pointer_promote_assignment (&gsi);
      	    break;
          }

      	  case GIMPLE_CALL:
    	    {            
            pointer_promote_funccall (&gsi);
    	      break;

    	    }

          case GIMPLE_RETURN :
          {
            pointer_promote_return (&gsi); 
            break;
          }

          case GIMPLE_DEBUG:
          case GIMPLE_LABEL :
          case GIMPLE_PREDICT :
          CASE_GIMPLE_UPP:
            break;

      	  default:
      	    printf("MAIN:UNKNOWN TREE CODE %d\n",gimple_code(stmt));
            gcc_assert(false);
      	    break;
    	  }


      }    
    }

    /* Forward the arugment argv of the main procdure */
    if (is_main_procedure(node))
      forward_argv (node);

    update_ssa (TODO_update_ssa);
    switch_off_context();
  }

}



static void 
do_thread_space_promote_pointer()
{

  if ( !thread_type_hash )
    thread_type_hash = htab_create_ggc (100, type_hash_hash, type_hash_eq, 0);

  do_ts_promote_pointer_decl ();
  do_ts_promote_pointer_reference ();  

  type_threadspace_map.clear();
  new_type_set.clear();
  
}



#endif



#define MULTITHREADING 1

#if MULTITHREADING 


static void
initialize_global_iv(loop_p loop, tree iv)
{
  /* initialize_global_iv in loop header */
  basic_block header = loop_preheader_edge (loop)->dest;
  gimple_stmt_iterator gsi = gsi_after_labels(header);
  tree tid = func_thread_id (cgraph_get_node(current_function_decl));  
  tree lhs = build4 (ARRAY_REF, unsigned_type_node, iter_id_var, tid, NULL_TREE, NULL_TREE);
  lhs = force_gimple_operand_gsi (&gsi, lhs, false, NULL, true, GSI_SAME_STMT);  
  //tree rhs = build1 (CONVERT_EXPR, unsigned_type_node, iv);      
  tree rhs = force_gimple_operand_gsi (&gsi, iv, true, NULL, true, GSI_SAME_STMT);  
  gimple assign = gimple_build_assign (lhs, rhs);
  gsi_insert_before (&gsi, assign, GSI_SAME_STMT);          
  mark_symbols_for_renaming (assign);
}





/* Bases all the induction variables in LOOP on a single induction
   variable (unsigned with base 0 and step 1), whose final value is
   compared with *NIT. If the upper-bound value NIT is UNKNOWN (NULL_TREE),
   keep the orginal bound and insert the new comparison 


   When the IV type precision has to be larger
   than *NIT type precision, *NIT is converted to the larger type, the
   conversion code is inserted before the loop, and *NIT is updated to
   the new definition.  When BUMP_IN_LATCH is true, the induction
   variable is incremented in the loop latch, otherwise it is
   incremented in the loop header.  Return the induction variable that
   was created.  */

tree
ipa_canonicalize_loop_ivs (struct loop *loop, tree *nit, bool bump_in_latch)
{

  gimple_stmt_iterator gsi;
  tree var_before;

  if (bump_in_latch)
    gsi = gsi_last_bb (loop->latch);
  else
    gsi = gsi_last_nondebug_bb (loop->header);

  tree zero, one;
  if (*nit)
  {
    zero = build_int_cst_type (TREE_TYPE(*nit), 0);
    one = build_int_cst_type (TREE_TYPE(*nit), 1);
  }
  else
  {
    zero = build_int_cst_type (unsigned_type_node, 0);
    one = build_int_cst_type (unsigned_type_node, 1);
  }
  
  create_iv (zero, one, NULL_TREE, loop, &gsi, bump_in_latch, &var_before, NULL);

  rewrite_all_phi_nodes_with_iv (loop, var_before);


  if ( *nit )
  {
    edge exit = single_dom_exit (loop);    
    gcc_assert (exit);
    gimple stmt = last_stmt (exit->src);
    unsigned precision = TYPE_PRECISION (TREE_TYPE (*nit));
    unsigned original_precision = precision;
    gimple_stmt_iterator gsi, psi;
    tree type;
    gimple_seq stmts;

    for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
    {
      gimple phi = gsi_stmt (psi);
      tree res = PHI_RESULT (phi);

      if (is_gimple_reg (res) && TYPE_PRECISION (TREE_TYPE (res)) > precision)
      	precision = TYPE_PRECISION (TREE_TYPE (res));
    }

    type = lang_hooks.types.type_for_size (precision, 1);

    if (original_precision != precision)
    {
      *nit = fold_convert (type, *nit);
      *nit = force_gimple_operand (*nit, &stmts, true, NULL_TREE);
      if (stmts)
        gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
    }

    /* Make the loop exit if the control condition is not satisfied.  */
    if (exit->flags & EDGE_TRUE_VALUE)
    {
      edge te, fe;

      extract_true_false_edges_from_block (exit->src, &te, &fe);
      te->flags = EDGE_FALSE_VALUE;
      fe->flags = EDGE_TRUE_VALUE;
    }
    gimple_cond_set_code (stmt, LT_EXPR);
    gimple_cond_set_lhs (stmt, var_before);
    gimple_cond_set_rhs (stmt, *nit);
    update_stmt (stmt);
  }
  else
  {

    /* Create new conditions in loop header 

       while (condition)
       {
         if (...)
           break;
       }

       ==>

       for ( i=0; i > -1; i++)
       {
         if (condition)
         {
           if (...)
             break;
         }
       }
      

    */
    
    basic_block header = loop->header;    
    gcc_assert (!single_succ_p (header));

    edge true_edge, false_edge;
    extract_true_false_edges_from_block (loop->header, &true_edge, &false_edge);

    /* Form a dummy upper-bound and insert it before the original condition STMT */
    gimple cond = last_stmt (header);
    gimple orig_cond = gimple_copy(cond);    

    *nit = zero;
    gimple_cond_set_code (cond, GE_EXPR);  // i > -1 then enter else exit
    gimple_cond_set_lhs (cond, var_before);
    gimple_cond_set_rhs (cond, *nit);
    mark_symbols_for_renaming (cond);

    bool reverse = false;
    /* Let entry edge be true edge */
    if ( loop_exit_edge_p (loop, true_edge) )
    {
      reverse = true;
      /* Swap true and false labels */
      tree exitlabel = gimple_cond_true_label (cond);
      gimple_cond_set_true_label (cond, gimple_cond_false_label (cond));
      gimple_cond_set_false_label (cond, exitlabel);

      /* Swap true and false edges */
      true_edge->flags &= ~EDGE_TRUE_VALUE;
      true_edge->flags |= EDGE_FALSE_VALUE;
      true_edge->flags |= EDGE_LOOP_EXIT;
      false_edge->flags |= EDGE_TRUE_VALUE;
      false_edge->flags &= ~EDGE_FALSE_VALUE;
      edge tmp =true_edge ;
      true_edge = false_edge;
      false_edge = tmp;    
    }


    /* Create new branch on entry edge */
    basic_block cond_bb = split_edge (true_edge);
    gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);    
    gsi_insert_after (&gsi, orig_cond, GSI_NEW_STMT);
    mark_symbols_for_renaming (cond);

    edge e = single_succ_edge(cond_bb);
    //basic_block fallthrough = e->dest;    
    e->flags &= ~EDGE_FALLTHRU;
    //remove_edge (e);
    if ( reverse )
    {
      e->flags |= EDGE_FALSE_VALUE;
      e = make_edge (cond_bb, false_edge->dest, EDGE_TRUE_VALUE | EDGE_LOOP_EXIT);     
    }
    else      
    {
      e->flags |= EDGE_TRUE_VALUE;
      e = make_edge (cond_bb, false_edge->dest, EDGE_FALSE_VALUE | EDGE_LOOP_EXIT);    
    }
    
    rescan_loop_exit (e, true, false);

    for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
    {
      gimple phi = gsi_stmt (gsi);
      tree lhs_sym = SSA_NAME_VAR (gimple_phi_result (phi));
      tree reaching_def = get_reaching_def (lhs_sym);
      use_operand_p arg_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, e);
      tree arg = USE_FROM_PTR (arg_p);
      gcc_assert (arg == NULL_TREE);
      SET_USE (arg_p, reaching_def);      
      mark_symbols_for_renaming (phi);
    }
    
  }

  rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);

  return var_before;
}



/* Create the parallel constructs for LOOP as described in ipa_gen_parallel_loop.
   LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
   NEW_DATA is the variable that should be initialized from the argument
   of LOOP_FN.  N_THREADS is the requested number of threads.  Returns the
   basic block containing GIMPLE_OMP_PARALLEL tree.  */

static basic_block 
ipa_create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
		      tree new_data, tree n_threads, location_t loc)
{
  gimple_stmt_iterator gsi;
  basic_block bb, paral_bb, for_bb, ex_bb;
  tree t, param;
  gimple stmt, for_stmt, phi, cond_stmt;
  tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
  edge exit, nexit, guard, end, e;

  tree tid = func_thread_id (cgraph_get_node(current_function_decl));  


  /* Prepare the GIMPLE_OMP_PARALLEL statement.  */
  bb = loop_preheader_edge (loop)->src;  
  paral_bb = single_pred (bb);
  t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
  OMP_CLAUSE_NUM_THREADS_EXPR (t) = n_threads;

  tree c = build_omp_clause (loc, OMP_CLAUSE_PRIVATE);
  OMP_CLAUSE_DECL (c) = tid;
  OMP_CLAUSE_CHAIN (c) = t;
  t = c;

  stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
  gimple_set_location (stmt, loc);
  gsi = gsi_last_bb (paral_bb);
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);



  /* Initialize NEW_DATA.  */
  if (data)
  {
    gsi = gsi_after_labels (bb);
    param = DECL_ARGUMENTS (loop_fn);

    /* .paral_data_param = &.paral_data_store.8; */
    stmt = gimple_build_assign (param, build_fold_addr_expr (data));
    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);

    tree nvar = create_tmp_var (TREE_TYPE(param), ".paral_data_arg");
    add_referenced_var (nvar);

    /* .paral_data_arg_2 = (struct  *) .paral_data_param; */
    nvar = make_ssa_name (nvar, NULL);
    stmt = gimple_build_assign (nvar, param);
    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
    SSA_NAME_DEF_STMT(nvar) = stmt;

    /* .paral_data_load.7_3 = (struct  *) .paral_data_arg_2; */
    stmt = gimple_build_assign (new_data, fold_convert (TREE_TYPE (new_data), nvar));
    gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
    SSA_NAME_DEF_STMT (new_data) = stmt;
  }

  
  /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL.  */
  ex_bb = loop_exit_bb (loop);  
  gsi = gsi_last_bb (ex_bb);
  stmt = gimple_build_omp_return (false);
  gimple_set_location (stmt, loc);
  gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);


  /* Change initial value and step for loop iteration var */
  cond_stmt = last_stmt (loop->header);
  cvar = gimple_cond_lhs (cond_stmt);
  cvar_base = SSA_NAME_VAR (cvar);
  phi = SSA_NAME_DEF_STMT (cvar);
  cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
  cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));

  /* Get thread id */  
  gcc_assert (int_cst_value(cvar_init) == 0);  
  tid = func_thread_id (cgraph_get_node(current_function_decl));
  gsi = gsi_last_bb (loop_preheader_edge (loop)->src);  
  tree tid_func = built_in_decls[BUILT_IN_OMP_GET_THREAD_NUM]; 
  gimple call = gimple_build_call (tid_func, 0, NULL_TREE);    
  tree lhs = build_decl (UNKNOWN_LOCATION, VAR_DECL, NULL_TREE, integer_type_node); 
  DECL_ARTIFICIAL(lhs) = 1;  
  gimple_add_tmp_var (lhs); 
  add_referenced_var (lhs); 
  gimple_call_set_lhs (call, lhs);  
  gsi_insert_before (&gsi, call, GSI_SAME_STMT); 
  mark_symbols_for_renaming (call);   
  tree rhs = build1 (CONVERT_EXPR, unsigned_type_node, lhs); 
  rhs = force_gimple_operand_gsi (&gsi, rhs, true, NULL, true, GSI_SAME_STMT);  
  gimple assign = gimple_build_assign (tid, rhs);  
  gsi_insert_before (&gsi, assign, GSI_SAME_STMT);       
  mark_symbols_for_renaming (assign); 
  SET_PHI_ARG_DEF (phi, loop_preheader_edge (loop)->dest_idx, rhs);

  /* step = thread_num */
  gcc_assert (int_cst_value(cvar_init) == 0);
  gimple incr = SSA_NAME_DEF_STMT (cvar_next);
  gsi = gsi_last_nondebug_bb (loop->latch);
  gcc_assert (gsi_stmt (gsi) == incr);
  tree rval2 = gimple_assign_rhs2 (incr);
  tree rvaltype = TREE_TYPE(rval2);
  gcc_assert (int_cst_value(rval2) == 1);
  rval2 = thread_num_var;    
  rval2 = build1 (CONVERT_EXPR, rvaltype, rval2);
  rval2 = force_gimple_operand_gsi (&gsi, rval2, true, NULL, true, GSI_SAME_STMT);  
  gimple_assign_set_rhs2 (incr, rval2);
  
  update_ssa (TODO_update_ssa);

  
  return paral_bb;
}


/* Generates code to execute the iterations of LOOP in N_THREADS
   threads in parallel.

   NITER describes number of iterations of LOOP.
   REDUCTION_LIST describes the reductions existent in the LOOP.  */

static void
ipa_gen_parallel_loop (struct loop *loop, htab_t reduction_list, tree n_threads)
{

  loop_iterator li;
  tree many_iterations_cond, type, nit;
  tree arg_struct, new_arg_struct;
  gimple_seq stmts;
  basic_block parallel_head;
  edge entry, exit;
  struct clsn_data clsn_data;
  unsigned prob;
  location_t loc;
  gimple cond_stmt;

  /* From

     ---------------------------------------------------------------------
     loop (COND)
     {
			 IV = phi (INIT, IV + STEP)
			 BODY;
     }
     ---------------------------------------------------------------------

     we generate the following code:

     ---------------------------------------------------------------------

     store all local loop-invariant variables used in body of the loop to DATA.
     GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
     load the variables from DATA.
     GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static, 1))
     BODY;
     GIMPLE_OMP_CONTINUE;
     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_FOR
     GIMPLE_OMP_RETURN         -- GIMPLE_OMP_PARALLEL

  */

  /* Generate initializations for reductions.  */
  if (htab_elements (reduction_list) > 0)
    htab_traverse (reduction_list, initialize_reductions, loop);

  /* Eliminate the references to local variables from the loop.  */
  std::set<basic_block> exit_bbs;
  VEC (edge, heap) * exits = get_loop_exit_edges (loop);
  for (int i = 0; VEC_iterate (edge, exits, i, exit); i++)
  {
    basic_block bb = exit->dest;
    if (EDGE_COUNT (bb->succs) )
      exit_bbs.insert(bb);
  }  
  VEC_free (edge, heap, exits);
  gcc_assert (exit_bbs.size()==1);

  entry = loop_preheader_edge (loop);
  exit = EDGE_PRED (*exit_bbs.begin(), 0);

  eliminate_local_variables (entry, exit);
  /* In the old loop, move all variables non-local to the loop to a structure
     and back, and create separate decls for the variables used in loop.  */
  separate_decls_in_region (entry, exit, reduction_list, &arg_struct, &new_arg_struct, &clsn_data);

  /* Create the parallel constructs.  */
  loc = UNKNOWN_LOCATION;
  cond_stmt = last_stmt (loop->header);
  if (cond_stmt)
    loc = gimple_location (cond_stmt);

  tree loop_fn = create_loop_fn (loc);
  parallel_head = ipa_create_parallel_loop (loop, loop_fn, arg_struct, new_arg_struct, n_threads, loc);

  if (htab_elements (reduction_list) > 0)
    create_call_for_reduction (loop, reduction_list, &clsn_data);

  scev_reset ();

  /* Cancel the loop (it is simpler to do it here rather than to teach the
     expander to do it).  */
  cancel_loop_tree (loop);

  /* Free loop bound estimations that could contain references to
     removed statements.  */
  FOR_EACH_LOOP (li, loop, 0)
    free_numbers_of_iterations_estimates_loop (loop);

  /* Expand the parallel constructs.  We do it directly here instead of running
     a separate expand_omp pass, since it is more efficient, and less likely to
     cause troubles with further analyses not being able to deal with the
     OMP trees.  */

  omp_expand_local (parallel_head);


  cgraph_node_ptr loop_fn_node = cgraph_get_node (loop_fn);
  loop_fn_node->analyzed = 1;

  switch_to_context (loop_fn);
  ipa_ssa_loop_init_node (NULL, loop_fn_node);
  func_set_new (loop_fn_node);
  
  switch_off_context ();

  
}



static void 
generate_parallel_code(loop_p loop)
{

  htab_t reduction_list = htab_create (10, reduction_info_hash, reduction_info_eq, free);

  try_create_reduction_list (loop, reduction_list);
  
  ipa_gen_parallel_loop(loop, reduction_list, thread_num_var);
 
  htab_delete (reduction_list);

  cgraph_new_nodes = NULL;
  
}


#endif




/* Specify which dependence edge to profile */
void 
find_candidate_loops(set<int> &loop_set)
{

  loop_set.clear();

  char *loopinfo = NULL;

  for (cgraph_node_ptr node = cgraph_nodes; node; node = node->next)
  {

    if (!valid_function_node_p (node))
      continue;

    switch_to_context(node->decl);
    basic_block bb;
    FOR_EACH_BB(bb)
    {          
      
      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
      {
        gimple stmt = gsi_stmt (gsi);

        if ( gimple_code (stmt)== GIMPLE_UPP_LOOP )
        {
          loop_p loop = bb->loop_father;
          if (loop->num)
            loop_set.insert (loop_uid(loop));
        }

      }    
    }
    switch_off_context();

  }

  
}






static basic_block get_bb(int index)
{
  return BASIC_BLOCK(index);
}


static unsigned int
ipa_loop_parallelization (void)
{

  if (!in_lto_p)
    return 0;

  dump_all_nodes ("dump0");

  /* Construct data structure and SSA form for each loop,
     initialize function extra information
  */
  ipa_ssa_loop_init (NULL);


  canonicalize_gimple_call ();

  
  dump_all_nodes ("dump1");

  /* Assign an ID to each gimple statement */
  ipa_gimple_id_assignment (NULL);
  read_mem_allocate_func("malloc.info");

  
  set<int> candidate_set;
  find_candidate_loops (candidate_set);

  if ( candidate_set.empty() )     
  {
    ipa_ssa_loop_done ();    
    cleanup_cgraph ();    
    return 0  ;
  }
  
  ipa_points_to_analysis();   
  
  /* Use points-to result to complement indirect call edges  */
  cgraph_resolve_indirect_edges ();

  ipa_convert_icall();

  /* Create data references to record each memory access */
  dr_hash = htab_create (100, hash_dr_id, eq_dr_id, hash_dr_del);
  ipa_create_data_references (NULL);
  htab_traverse (dr_hash, compute_reference_set, NULL);

  {
    FILE *fp = fopen ("memop.info", "w");
		ipa_dump_data_references (fp);
    fclose (fp);
  }

  /* Create global thread_num variable */
  create_and_initialize_thread_num_var();

    
  /* Mark basic block if the basic block will be executed exacly once */
  // mark_bb(cur_loop);
  Read_dependence ( "dependencies.info", candidate_set );

  for (set<int>::iterator iter = candidate_set.begin(); iter != candidate_set.end(); ++iter)
  {

    loop_p loop = ipa_get_loop(*iter); 

    switch_to_context (loop_func_decl(loop));

    canonicalize_loop_form (loop);

    /* Selectively clone callee and relative dependences */
    selective_clone (loop);
    
    /* Read feedback and build whole-loop dependence graph */
    dependence_set = load_dependence (loop);
    partition_dependence (loop);

    DEPENDENCE_GRAPH pdg;
    Build_dependence_graph (dependence_set, pdg);
      
    /* Translate do-while loop to while loop */
  
    struct tree_niter_desc niter_desc;
    niter_desc.niter = NULL_TREE;
    try_get_loop_niter (loop, &niter_desc);
  
    /* Create iteration number var */
    tree iv = ipa_canonicalize_loop_ivs (loop, &niter_desc.niter, true);

    /* Copy iv to a global array that is used in nested calls */
    initialize_global_iv (loop, iv);

    /* Place a sychronization (post-wait) on each loop-carried RAW dependence and control dependence
    */
    Place_sychronization (dependence_set, loop);

    switch_off_context();

  }

  dump_all_nodes ("dump.syn");  
  update_cgraph ();    

  
  /* Promoting thread local space for pointer and heap objects */
  do_thread_space_promote_pointer ();
  dump_all_nodes ("dump.pointer");
  update_cgraph ();    


  do_ts_promote_allocation ();

  do_thread_space_promote_declaration();    

  for (set<int>::iterator iter = candidate_set.begin(); iter != candidate_set.end(); ++iter)
  {
    loop_p loop = ipa_get_loop(*iter); 
    switch_to_context (loop_func_decl(loop));  
    dependence_set = load_dependence (loop);
    partition_dependence (loop); 
    do_thread_space_promote_loop (loop);   
    switch_off_context();
  }

  do_thread_space_promote_rest ();
  
  dump_all_nodes ("dump.array");  
  update_cgraph ();    

  /* Generating threading code */
  for (set<int>::iterator iter = candidate_set.begin(); iter != candidate_set.end(); ++iter)
  {  
    loop_p loop = ipa_get_loop(*iter);   
    switch_to_context (loop_func_decl(loop));  
    generate_parallel_code (loop);         
    switch_off_context();  
  }

  dump_all_nodes ("dump.par");  

  dpc.clear();

  update_cgraph ();


  ipa_ssa_loop_done ();
  
  cleanup_cgraph ();

  return 0  ;

}

static bool
gate_ipa_loop_parallelization (void)
{
  return flag_ipa_parallel_loop ;
}


struct simple_ipa_opt_pass pass_loop_parallelization = 
{
  {
    SIMPLE_IPA_PASS,
    "parallel-loop",	/* name */
    gate_ipa_loop_parallelization,	/* gate */
    ipa_loop_parallelization,	/* execute */
    NULL,			/* sub */
    NULL,			/* next */
    0,				/* static_pass_number */
    TV_TREE_PROFILE_DEPENDENCE,	/* tv_id */
    PROP_ssa,			/* properties_required */
    0,				/* properties_provided */
    0,				/* properties_destroyed */
    0,				/* todo_flags_start */
    TODO_dump_func | TODO_update_ssa 	/* todo_flags_finish | TODO_rebuild_alias */
  }
};



