/*
 * resource.c - resource manager routines
 *
 * This file is part of the Alpha simulator tool suite written by
 * Raj Desikan as part of the Bullseye project.
 * It has been written by extending the SimpleScalar tool suite written by
 * Todd M. Austin as a part of the Multiscalar Research Project.
 *  
 * 
 * Copyright (C) 1994, 1995, 1996, 1997, 1998 by Todd M. Austin
 *
 * Copyright (C) 1999 by Raj Desikan
 *
 * This source file is distributed "as is" in the hope that it will be
 * useful.  The tool set comes with no warranty, and no author or
 * distributor accepts any responsibility for the consequences of its
 * use. 
 * 
 * Everyone is granted permission to copy, modify and redistribute
 * this tool set under the following conditions:
 * 
 *    This source code is distributed for non-commercial use only. 
 *    Please contact the maintainer for restrictions applying to 
 *    commercial use.
 *
 *    Permission is granted to anyone to make or distribute copies
 *    of this source code, either as received or modified, in any
 *    medium, provided that all copyright notices, permission and
 *    nonwarranty notices are preserved, and that the distributor
 *    grants the recipient permission for further redistribution as
 *    permitted by this document.
 *
 *    Permission is granted to distribute this file in compiled
 *    or executable form under the same conditions that apply for
 *    source code, provided that either:
 *
 *    A. it is accompanied by the corresponding machine-readable
 *       source code,
 *    B. it is accompanied by a written offer, with no time limit,
 *       to give anyone a machine-readable copy of the corresponding
 *       source code in return for reimbursement of the cost of
 *       distribution.  This written offer must permit verbatim
 *       duplication by anyone, or
 *    C. it is distributed by someone who received only the
 *       executable form, and is accompanied by a copy of the
 *       written offer of source code that they received concurrently.
 *
 * In other words, you are welcome to use, share and improve this
 * source file.  You are forbidden to forbid anyone else to use, share
 * and improve what you give them.
 *
 *
 *
 */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>

#include "host.h"
#include "misc.h"
#include "resource.h"
#include "alpha.h"
#include "issue.h"
#include "stats.h"
#include "writeback.h"
#include "cache.h"
#include "bpred.h"
#include "fetch.h"

/* holds the number of integer clusters */
int res_int_clusters;

/* holds the number of memory ports */
int res_mem_ports;

/* holds number of fp clusters */
int res_fp_clusters;

/* minimum delay between clusters */
int res_cross_clus_delay;

/* total integer alus in the system */
int res_ialu;

/* total integer multipliers in the system */
int res_imult;

/* total fp alus in the system */
int res_fpalu;

/* total fp multipliers in the system */
int res_fpmult;

/* Number of functional units in each integer cluster */
static int num_of_int_func_units;

/* Number of functional units in each fp cluster */
static int num_of_fp_func_units;


/* functional unit resource pool */
struct res_pool *res_fu_pool;

/*
 * functional unit resource configuration
 */

/* resource pool definition, NOTE: update FU_*_INDEX defs in resource.h 
   if you change this */
struct res_desc res_fu_config[] = {
  {
    /* First three belong to the integer clusters and the next three belong 
       to the fp clusters */
    "integer-ALU",
    2,
    0,
    {
      { IntALU, 1, 1 }
      /*{ IntMULT, 7, 1 },
	{ IntDIV, 20, 1 }*/
    }
  },
  {
    "integer-MULT/DIV",
    2,
    0,
    {
      //{IntALU, 1, 1},
      { IntMULT, 7, 1 },
      { IntDIV, 12, 9
      }
    }
  },
  {
    "FP-adder",
    1,
    0,
    {
      { FloatADD, 4, 1 }, 
      { FloatDIV, 12, 9 },
      { FloatSQRT, 18, 15 },
      { FloatCVT, 4, 1 },
      { FloatCMP, 4, 1 }
    }
  },
  {
    "FP-MULT/DIV",
    1,
    0,
    {
      { FloatMULT, 4, 1 }
    }
  },
  /*{
    "memory-port",
    2,
    0,
    {
      { RdPort, 1, 1 },
      { WrPort, 1, 1 }
    }
   },*/
};


/* create a resource pool */
struct res_pool *
res_create_pool(char *name, struct res_desc *pool, int ndesc)
{
  int i, j, k, l, m, index, ninsts;
  struct res_desc *inst_pool;
  struct res_pool *res;
  //enum alpha_fu_class class;

  /* count total instances */
  /* for (ninsts=0,i=0; i<ndesc; i++)
    {
      if (pool[i].quantity > MAX_INSTS_PER_CLASS)
        fatal("too many functional units, increase MAX_INSTS_PER_CLASS");
      ninsts += pool[i].quantity;
    }
  */
  ninsts = res_ialu + res_imult + res_fpalu + res_fpmult;
  num_of_int_func_units = (res_ialu + res_imult)/res_int_clusters;
  num_of_fp_func_units = (res_fpalu + res_fpmult)/res_fp_clusters;
  
  /* allocate the instance table */
  inst_pool = (struct res_desc *)calloc(ninsts, sizeof(struct res_desc));
  if (!inst_pool)
    fatal("out of virtual memory");
  /* fill in the instance table */
  for (index=0,i=0; i<ndesc; i++)
    {
      for (j=0; j<pool[i].quantity; j++)
	{
	  inst_pool[index] = pool[i];
	  inst_pool[index].quantity = 1;
	  inst_pool[index].busy = FALSE;
	  for (k=0; k<MAX_RES_CLASSES ; k++){
	    if (inst_pool[index].x[k].class)
	      inst_pool[index].x[k].master = &inst_pool[index];
	  }
	  index++;
	}
    }
  assert(index == ninsts);
  /* allocate the resouce pool descriptor */
  res = (struct res_pool *)calloc(1, sizeof(struct res_pool));
  if (!res)
    fatal("out of virtual memory");
  res->name = name;
  res->num_resources = ninsts;
  res->resources = inst_pool;

  /* fill in the resource table map - slow to build, but fast to access */
  /* for (i=0; i<ninsts; i++)
    {
      struct res_template *plate;
      for (j=0; j<MAX_RES_CLASSES; j++)
	{
	  plate = &res->resources[i].x[j];
	  if (plate->class)
	    {
	      assert(plate->class < MAX_RES_CLASSES);
	      res->table[i/2][plate->class] = plate;
	    }
	    else
	    break;
	}
    } */
  /* the resource table map is as follows: The resources field in the
     pool contains a pointer to all the resc_descriptors in the
     machine. We are making the number of clusters and the subclusters 
     variable but subclusters need to be symmetric.
     hen resc_desc are allocated to the sub-clusters, we
     allocate element 0 first, then element 1 and so on. In the table
     filed of res we need to map these resc_desc in terms of clusters.
     For this we jump across the elements of resources and make the
     correct mapping */
   
  /* FIXME - This shouldn't be a constraint in the final implementation */
  if ((num_of_int_func_units % 2 != 0) || (num_of_fp_func_units % 2 != 0))
    panic ("Functional units in a cluster should be an even number");

  /* Build the integer clusters */

  for (i=0; i<res_int_clusters;i++){
    for (l=0; l<num_of_int_func_units/2; l++){ 
      for (j=i*(num_of_int_func_units/2)+l;
	   j < res_ialu+res_imult;
	   //j<(res_ialu+(i*(num_of_int_func_units/2))+l);
	   j+=res_ialu){
	struct res_template *plate;
	for (k=0; k<MAX_RES_CLASSES; k++){
	  plate = &res->resources[j].x[k];
	  if (plate->class)
	    {
	      assert(plate->class < MAX_RES_CLASSES);
	      res->table[i][plate->class][l][l*3] = plate;
	      res->table[i][plate->class][l][l*3+i+1] = plate;
	      res->table[i][plate->class][l][6] = plate;
	    }
	  else
	    break;
	}
      }
    }
  }
  
  /* Build the fp clusters */
  
  for (i=res_int_clusters,m=0; 
       i<(res_int_clusters+res_fp_clusters);
       i++,m++){
    for (l=0; l<num_of_fp_func_units/2; l++){
      for (j=(m*(num_of_fp_func_units/2)+res_ialu+res_imult)+l;
	   j<res_ialu+res_imult+res_fpalu+res_fpmult;
	   //j<=(res_fpalu+m*(num_of_fp_func_units/2)+res_ialu+res_imult+l);
	   j+=res_fpalu){
	struct res_template *plate;
	for (k=0; k<MAX_RES_CLASSES; k++){
	  plate = &res->resources[j].x[k];
	  if (plate->class)
	    {
	      assert(plate->class < MAX_RES_CLASSES);
	      res->table[i][plate->class][l][l*3] = plate;
	      res->table[i][plate->class][l][l*3+i+1] = plate;
	      res->table[i][plate->class][l][6] = plate;
	    }
	  else 
	    break;
	}
      }
    }
  }
  
  
  /*for (i=(num_of_int_clusters+num_of_fp_clusters);
        i<(num_of_int_clusters+num_of_fp_clusters+num_of_mem_ports);
        i++)
   {
      for (j=(i*NUM_OF_SUBCLUSTERS);
	   j<((num_of_int_clusters*NUM_OF_SUBCLUSTERS) +
	      (num_of_fp_clusters*NUM_OF_SUBCLUSTERS) +num_of_mem_ports);
	     j++)
	{
	  struct res_template *plate;
	  for (k=0; k<MAX_RES_CLASSES; k++)
	    {
	      plate = &res->resources[j].x[k];
	      if (plate->class)
		{

		  assert(plate->class < MAX_RES_CLASSES);
		  res->table[i][plate->class] = plate;
		}
	      else
		break;
	    }
       }
       }*/
  /*class = IntALU;
  for (i=0; i<res_int_clusters;i++)
    {
      for (j=0;j<num_of_int_func_units/2;j++)
	if (res->table[i][class][j])
	  printf("intALU exists for cluster %d res %d\n",i,j);
   }
  class = IntMULT;
  for (i=0; i<res_int_clusters;i++)
    {
      for (j=0;j<num_of_int_func_units/2;j++)
	if (res->table[i][class][j])
	   printf("intMULT exists for cluster %d res %d\n",i,j);
    }
  class = IntDIV;
  for (i=0; i<res_int_clusters;i++)
    {
     for (j=0;j<num_of_int_func_units/2;j++)
       if (res->table[i][class][j])
	 printf("intDIV exists for cluster %d res %d\n",i,j);
    }
 class = FloatADD;
 for (i=res_int_clusters; i<(res_int_clusters+res_fp_clusters); i++)
   {
     for (j=0;j<num_of_fp_func_units/2;j++)
	 if (res->table[i][class][j])
	   printf("floatADD exists for cluster %d res %d\n",i,j);
   }
 class = FloatCMP;
 for (i=res_int_clusters; i<(res_int_clusters+res_fp_clusters); i++)
     {
       for (j=0;j<num_of_fp_func_units/2;j++)
	 if (res->table[i][class][j])
	   printf("floatCMP exists for cluster %d res %d\n",i,j);
     }
 class = FloatCVT;
 for (i=res_int_clusters; i<(res_int_clusters+res_fp_clusters); i++)
     {
       for (j=0;j<num_of_fp_func_units/2;j++)
	 if (res->table[i][class][j])
	   printf("floatCVT exists for cluster %d res %d\n",i,j);
     }
 class = FloatMULT;
 for (i=res_int_clusters; i<(res_int_clusters+res_fp_clusters); i++)
     {
       for (j=0;j<num_of_fp_func_units/2;j++)
	 if (res->table[i][class][j])
	   printf("floatMULT exists for cluster %d res %d\n",i,j);
     }
 class = FloatDIV;
 for (i=res_int_clusters; i<(res_int_clusters+res_fp_clusters); i++)
   {
     for (j=0;j<num_of_fp_func_units/2;j++)
       if (res->table[i][class][j])
	 printf("floatDIV exists for cluster %d res %d\n",i,j);
   }
 class = FloatSQRT;
 for (i=res_int_clusters; i<(res_int_clusters+res_fp_clusters); i++)
   {
     for (j=0;j<num_of_fp_func_units/2;j++)
       if (res->table[i][class][j])
	 printf("floatSQRT exists for cluster %d res %d\n",i,j);
   }
 getchar();*/
 return res;
}

/* get a free resource from resource pool POOL that can execute a
   operation of class CLASS, returns a pointer to the resource template,
   returns NULL, if there are currently no free resources available,
   follow the MASTER link to the master resource descriptor;
   NOTE: caller is responsible for reseting the busy flag in the beginning
   of the cycle when the resource can once again accept a new operation */
struct res_template *
res_get(struct res_pool *pool, struct rqueue_link *node)
{
  int i,j;
  enum alpha_fu_class class = ALPHA_OP_FUCLASS(node->qelem->inst_desc->op);
  enum sub_cluster_required sub_clus_req = node->qelem->inst_desc->sub_clus_req;
  //int class = 0;
    /* must be a valid class */
  assert(class < MAX_RES_CLASSES);
  /* Check delay values of the operands of rs. Four combinations here. Take 
     action depending on the values. pool->table[cluster][class] will point to 
     template in the resc_desc for this class. We can check this for busy. We 
     can make i vary from zero to MAX_CLUSTERS. There will be large number of 
     mappings to these sub-clusters from the table structure. So instead of 
     instances of a class, we have clusters of a class. The variable
     table->[cluster number][inst class][fu number] automatically selects the
     correct resc_desc from the resources
  */

  /* must be at least one resource in this class
  assert(pool->table[class][0]); */
    
  /* Check for integer instruction */
  if (class == IntALU || class == IntMULT || class == IntDIV){
    for (j=0; j<num_of_int_func_units/2; j++){
      for (i=0; i<res_int_clusters; i++){
	if (pool->table[i][class][j][sub_clus_req]){
	  if (pool->table[i][class][j][sub_clus_req]->master->busy <= sim_cycle) {/* &&
	      ((node->qelem->op_delay[0] == DNA) || 
		(node->qelem->op_clusters[0] >= res_int_clusters) || 
		 (node->qelem->op_delay[0]+(abs(i-node->qelem->op_clusters[0])*
				      res_cross_clus_delay) <= sim_cycle)) && 
		((node->qelem->op_delay[1] == DNA) || 
		 (node->qelem->op_clusters[1] >= res_int_clusters) ||
		 (node->qelem->op_delay[1]+(abs(i-node->qelem->op_clusters[1])*
		 res_cross_clus_delay) <= sim_cycle))){*/
	    node->qelem->inst_desc->clus_assigned = i;
	    return pool->table[i][class][j][sub_clus_req];
	  }
	}
      }
    }
  }

  /* Check for fp instruction */
  else if (class == FloatCMP || class == FloatADD || class == FloatCVT ||
	   class == FloatMULT || class == FloatDIV || class == FloatSQRT){
    for (j=0;j<num_of_fp_func_units/2;j++){
      for (i=res_int_clusters; i<res_int_clusters+res_fp_clusters; i++){
	if (pool->table[i][class][j][sub_clus_req]){
	  if (pool->table[i][class][j][sub_clus_req]->master->busy <= sim_cycle) {/*&&
	      ((node->qelem->op_delay[0] == DNA) || 
		(node->qelem->op_clusters[0] >= res_fp_clusters) || 
		 (node->qelem->op_delay[0]+(abs(i-node->qelem->op_clusters[0])*
				      res_cross_clus_delay) <= sim_cycle)) && 
		((node->qelem->op_delay[1] == DNA) || 
		 (node->qelem->op_clusters[1] >= res_fp_clusters) ||
		 (node->qelem->op_delay[1]+(abs(i-node->qelem->op_clusters[1])*
		 res_cross_clus_delay) <= sim_cycle))){*/
	    node->qelem->inst_desc->clus_assigned = i;
	    return pool->table[i][class][j][sub_clus_req];
	  }
	}
      }
    }
  }
  /* none found */
  return NULL;
}

/* dump the resource pool POOL to stream STREAM */
void
res_dump(struct res_pool *pool, FILE *stream)
{
  int i, j, k;

  if (!stream)
    stream = stderr;

  fprintf(stream, "Resource pool: %s:\n", pool->name);
  fprintf(stream, "\tcontains %d resource instances\n", pool->num_resources);
  for (k=0; k< MAX_CLUSTERS; k++){
    for (i=0; i<MAX_RES_CLASSES; i++){
      fprintf(stream, "\tclass: %d: %d matching instances\n",
	      i, pool->nents[i]);
      fprintf(stream, "\tmatching: ");
      for (j=0; j<MAX_INSTS_PER_CLASS; j++){
	if (!pool->table[k][i][j])
	break;
	fprintf(stream, "\t%s (busy for %d cycles) ",
		pool->table[k][i][j][LOWER01]->master->name,
		(int)pool->table[k][i][j][LOWER01]->master->busy);
      }
      //assert(j == pool->nents[i]);
      fprintf(stream, "\n");
    }
  }
}

void res_find_clus(struct res_pool* pool, struct rqueue_link *node) {
  int i,j;
  enum alpha_fu_class class = ALPHA_OP_FUCLASS(node->qelem->inst_desc->op);
  enum sub_cluster_required sub_clus_req = node->qelem->inst_desc->sub_clus_req;
  node->qelem->clus[0]=node->qelem->clus[1]=FALSE;
  if (class == IntALU || class == IntMULT || class == IntDIV){
    for (j=0; j<num_of_int_func_units/2; j++){
      for (i=0; i<res_int_clusters; i++){
	if (pool->table[i][class][j][sub_clus_req]){
	  if (pool->table[i][class][j][sub_clus_req]->master->busy <= sim_cycle &&
	      ((node->qelem->op_delay[0] == DNA) || 
		(node->qelem->op_clusters[0] >= res_int_clusters) || 
		 (node->qelem->op_delay[0]+(abs(i-node->qelem->op_clusters[0])*
				      res_cross_clus_delay) <= sim_cycle)) && 
		((node->qelem->op_delay[1] == DNA) || 
		 (node->qelem->op_clusters[1] >= res_int_clusters) ||
		 (node->qelem->op_delay[1]+(abs(i-node->qelem->op_clusters[1])*
				      res_cross_clus_delay) <= sim_cycle))){
	    node->qelem->clus[i]=TRUE;
	  }
	}
      }
    }
  }

  /* Check for fp instruction */
  else if (class == FloatCMP || class == FloatADD || class == FloatCVT ||
	   class == FloatMULT || class == FloatDIV || class == FloatSQRT){
    for (j=0;j<num_of_fp_func_units/2;j++){
      for (i=res_int_clusters; i<res_int_clusters+res_fp_clusters; i++){
	if (pool->table[i][class][j][sub_clus_req]){
	  if (pool->table[i][class][j][sub_clus_req]->master->busy <= sim_cycle&&
	      ((node->qelem->op_delay[0] == DNA) || 
		(node->qelem->op_clusters[0] >= res_fp_clusters) || 
		 (node->qelem->op_delay[0]+(abs(i-node->qelem->op_clusters[0])*
				      res_cross_clus_delay) <= sim_cycle)) && 
		((node->qelem->op_delay[1] == DNA) || 
		 (node->qelem->op_clusters[1] >= res_fp_clusters) ||
		 (node->qelem->op_delay[1]+(abs(i-node->qelem->op_clusters[1])*
				      res_cross_clus_delay) <= sim_cycle))){
	    node->qelem->clus[i]=TRUE;
	  }
	}
      }
    }
  }
}

