/*
* map.c- map stage implementation
*
* This file is part of the Alphasim tool suite written by
* Raj Desikan as part of the Bullseye project.
*
* Copyright (C) 1999 by Raj Desikan
* This source file is distributed "as is" in the hope that it will be
* useful.  It is distributed with no warranty, and no author or
* distributor accepts any responsibility for the consequences of its
* use. 
*
* Everyone is granted permission to copy, modify and redistribute
* this source file under the following conditions:
*
*    This tool set is distributed for non-commercial use only. 
*    Please contact the maintainer for restrictions applying to 
*    commercial use of these tools.
*
*    Permission is granted to anyone to make or distribute copies
*    of this source code, either as received or modified, in any
*    medium, provided that all copyright notices, permission and
*    nonwarranty notices are preserved, and that the distributor
*    grants the recipient permission for further redistribution as
*    permitted by this document.
*
*    Permission is granted to distribute this file in compiled
*    or executable form under the same conditions that apply for
*    source code, provided that either:
*
*    A. it is accompanied by the corresponding machine-readable
*       source code,
*    B. it is accompanied by a written offer, with no time limit,
*       to give anyone a machine-readable copy of the corresponding
*       source code in return for reimbursement of the cost of
*       distribution.  This written offer must permit verbatim
*       duplication by anyone, or
*    C. it is distributed by someone who received only the
*       executable form, and is accompanied by a copy of the
*       written offer of source code that they received concurrently.
*
* In other words, you are welcome to use, share and improve this
* source file.  You are forbidden to forbid anyone else to use, share
* and improve what you give them.
*
*/

#include <stdio.h>                                                     
#include <stdlib.h>                                                    
#include <assert.h>
#include "alpha.h"                                                    
#include "regs.h"
#include "cache.h"
#include "bpred.h"
#include "fetch.h"
#include "slot.h"                                                     
#include "map.h"
#include "issue.h"
#include "writeback.h"
#include "commit.h"
#include "eventq.h"

/* stores the latest mappings of the architecture integer register file */ 
struct int_reg_mapping map_ir_mapping[MD_I_ARCH_REGS+1];

/* stores the latest mappings of the architecture fpregister file */ 
struct int_reg_mapping map_fr_mapping[MD_F_ARCH_REGS];

int map_width;                          /* map stage width */
int map_rb_head;			/* pointer to the head of reorder buffer */
int map_rb_tail;			/* pointer to the tail of reorder buffer */
int map_rb_nelem;			/* Number of rb enties */
int map_rb_num;				/* number of entries in rb */
struct reorder_buffer *map_rb;	        /* Reorder buffer */
struct dep_chain *dep_free_list;        /* free list for holding dep chains */

int free_int_reg_list_head;	/* top of free int reg queue */
int free_fp_reg_list_head;	/* top of free fp reg Q */
int free_int_reg_list_tail;	/* bottom of free int reg Q*/
int free_fp_reg_list_tail;	/* bottom of free fp reg Q */
int *free_int_reg_list;		/* free int reg list */
int *free_fp_reg_list;		/* free fp reg list */
static int free_list_size;		/* size of free list */
static struct rqueue_link *rq_link;     /* ready queue link structure */
int num_of_free_int_regs;               /* number of free integer mapping registers */
int num_of_free_fp_regs;                /* number of free fp mapping registers */   

counter_t map_num_early_retire;         /* Number of instructions retired early */

int map_int_issue_size; /* size of integer issue queue */
int map_fp_issue_size;
int early_inst_retire;

/* if enabled, stall for 3 cycles if free regs < 8 */
int map_stall; 

/* Number of register reads and writes */
counter_t stat_int_reg_reads;
counter_t stat_int_reg_writes;
counter_t stat_fp_reg_reads;
counter_t stat_fp_reg_writes;

/* LSQ reads */
counter_t stat_lq_reads;
counter_t stat_sq_reads;
counter_t stat_lq_writes;
counter_t stat_sq_writes;

void 
map_stage_init(void){
  int i;
  
  /* initialize the integer register mappings */
  for (i=0; i < MD_I_ARCH_REGS+1; i++){
    map_ir_mapping[i].phy_reg = i;
    map_ir_mapping[i].rbuf_no = DNA;
  }
  
  /* initialize the fp register mappings */
  for (i = 0; i < MD_F_ARCH_REGS; i++){
    map_fr_mapping[i].phy_reg = i;
    map_fr_mapping[i].rbuf_no = DNA;
  }

  /* Initialize and instantiate the reorder buffer */
  map_rb_head = map_rb_tail = map_rb_num = 0;
  map_rb = (struct reorder_buffer *) 
    calloc(map_rb_nelem, sizeof(struct reorder_buffer));
  
  if(!map_rb) 
    fatal("out of virtual memory");

  /* compute default size of free list */
  free_list_size = (map_rb_nelem * DEP_CHAIN_SIZE);
  
  /* Initialize the free list */
  init_free_list();
  
  /* Initialize the free integer register queue */
  free_int_reg_list = (int *) calloc(regs_num_int_p_regs, sizeof(int));
  if (!free_int_reg_list)
    fatal("Out of virtual memory");
  /* The free list is initialized to the number of the physical
     register in the register file. The first free physical register
     is register 40 (32 arch +  8 PAL shadow */
  for (i=0; i<regs_num_int_p_regs; i++){
    free_int_reg_list[i] = MD_TOTAL_I_REGS + i;
  }
  
  free_int_reg_list_head = 0;
  free_int_reg_list_tail =  regs_num_int_p_regs-1;
  num_of_free_int_regs =  regs_num_int_p_regs;

    
  /* Initialize the free fp register queue */
  free_fp_reg_list = (int *) calloc(regs_num_fp_p_regs, sizeof(int));
  if (!free_fp_reg_list)
    fatal("Out of virtual memory");
  for (i=0; i<regs_num_fp_p_regs; i++){
    free_fp_reg_list[i] = MD_F_ARCH_REGS + i;
  }
  
  free_fp_reg_list_head = 0;
  free_fp_reg_list_tail = regs_num_fp_p_regs-1;
  num_of_free_fp_regs = regs_num_fp_p_regs;
  
  /* rqlink structure for putting instructions in the ready queue */
  rq_link = (struct rqueue_link *) malloc(sizeof(struct rqueue_link));
  
}

void 
map_stage(void){
  md_inst_t inst;
  int out1, out2, in1, in2, in3, next_free_reg=31, n_mapped=0;
  static unsigned int inum = 0;	/* global inum for next allocation */
  enum md_opcode op;
  /* Stall for three cycles if the number of free registers is less than 
     8 */
#ifdef FLEXIBLE_SIM
  if (map_stall && num_of_free_int_regs < 8) {
#else
  if (num_of_free_int_regs < 8) {
#endif
    fetch_istall_buf.stall |= SAMPLE_STALL;
    fetch_istall_buf.resume=0;
    eventq_queue_callback(sim_cycle+3, 
			  (void *) fetch_resume_ifetch,
			  (int) SAMPLE_STALL);
  }
  
  /* While there are still instruction in the slot latch and the
     number of instructions mapped this cycle is less than width of
     map stage */
  while(slot_latch_num > 0 && n_mapped < map_width) {
    /* check if an entry is free in the reorder buffer */
    if ((map_rb_num < map_rb_nelem)) {
      /* compute input output registers */
      inst = SL[slot_latch_head].inst_desc->IR;
      op = SL[slot_latch_head].inst_desc->op;
        switch(SL[slot_latch_head].inst_desc->op){
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,I1,I2,I3)		\
          case OP:							\
            out1 = O1; out2 = O2;					\
            in1 = I1; in2 = I2; in3 = I3;				\
	    break;
#define DEFLINK(OP,MSK,NAME,MASK,SHIFT)					\
          case OP:							\
            SL[slot_latch_head].inst_desc->op = MD_NOP_INST;		\
            out1 = 31;	out2 = 31;					\
            in1 = 31; in2 = 31; in3 = 31;				\
	    break;
#define CONNECT(OP)
#define DECLARE_FAULT(FAULT)						\
	  {break;}
#include "alpha.def"
	default:
	  inst = MD_NOP_INST;
	  SL[slot_latch_head].inst_desc->IR=MD_NOP_INST;
	  /* I am temporarily removing the line below because 
	     200.sixtrack has an unimplemented call_pal 0xaa instruction which 
	     breaks here. Once this instruction is implemented, should enable 
	     this line */
	  //SL[slot_latch_head].inst_desc->trap = TRUE;
	  MD_SET_OPCODE(SL[slot_latch_head].inst_desc->op, inst);
	  out1 = out2 = in1 = in2 = 31;
        }

	/* check whether integer issue queue is free and if load inst, 
	 load queue has a free entry or if store instruction, store
	 queue has a free entry. We want ITOFF and ITOFS instructions to 
	 use integer pipe */
	if (out1 < 33 && 
	    ((out1 == 31 && 
	      SL[slot_latch_head].inst_desc->op == LDQ_U) || 
	     (ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) != RdPort) ||
	     issue_lq_num < issue_lq_nelem) && 
	    ((ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) != WrPort) ||
	     commit_sq_num < commit_sq_nelem) && 
	    IQ->queue_num < map_int_issue_size){ 
	  /* If the instruction has an output register, then we need to 
	     check if a free register is available */
	  if (out1 == DNA || out1 == 31 || 
	      (next_free_reg = get_free_int_reg()) != 0){
	    
	    /* using the value in in1 and in2, we can index into the array
	       map_int_reg_mapping[] and get the mapped value of these regs,
	       and write it into the issue queue . */
	    map_rb[map_rb_tail].inst_desc = SL[slot_latch_head].inst_desc;
	    SL[slot_latch_head].inst_desc->fpstore = FALSE;
	    /* Early instruction retire for some instructions with R31 as
	       destination */
#ifdef FLEXIBLE_SIM
	    if(early_inst_retire) {
#endif
	      if (out1 == 31 && (SL[slot_latch_head].inst_desc->op == LDQ_U ||
				 SL[slot_latch_head].inst_desc->op == INTA || 
				 SL[slot_latch_head].inst_desc->op == INTL ||
				 SL[slot_latch_head].inst_desc->op == INTM ||
				 SL[slot_latch_head].inst_desc->op == INTS)) {
		map_num_early_retire++;
		fetch_return_to_free_list(map_rb[map_rb_tail].inst_desc);
		n_mapped++;
		slot_latch_num--;
		slot_latch_head = (slot_latch_head + 1) & (slot_width - 1);
		continue;
	      }
#ifdef FLEXIBLE_SIM
	    }
#endif
	    
	    /* FTOI and FP store instructions are queued in the integer as 
	       well as the FP queue */
	    if (MD_IS_FTOI(SL[slot_latch_head].inst_desc->op) || 
		MD_IS_FPSTORE(SL[slot_latch_head].inst_desc->op)) {
	      FQ->queue_num++;
	      
	      /* Indicate to the issue stage that this is fp store so that
		 IQ and FQ can communicate */
	      if (MD_IS_FPSTORE(SL[slot_latch_head].inst_desc->op))
		SL[slot_latch_head].inst_desc->fpstore = TRUE;	
	    }
	    map_rb[map_rb_tail].inst_desc->inum = (inum++)%MAX_INUM;
	    IQ->queue_num++;
	    
	    map_rb[map_rb_tail].dir_update = SL[slot_latch_head].dir_update;
	    
	    IQ->window[IQ->queue_pointer].inst_desc = 
	      map_rb[map_rb_tail].inst_desc;

	    IQ->window[IQ->queue_pointer].st_wait_bit = FALSE;
	    IQ->window[IQ->queue_pointer].cmov = FALSE;
	    /* Indicate that the instruction has been put into the
	       issue queue this cycle */
	    IQ->window[IQ->queue_pointer].issue_cycle = sim_cycle;
	    
	    /* Allocate entry in the Load queue if required. Don't
	       allocate for LDQ_U which are NOPS */
	    if ((ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) 
		 == RdPort) && !(out1 == 31 &&
				 SL[slot_latch_head].inst_desc->op ==
				 LDQ_U)) {
	      map_rb[map_rb_tail].in_LQ = TRUE;
              stat_lq_writes++;
	      issue_lq[issue_lq_tail].tlbmiss = FALSE;
	      issue_lq[issue_lq_tail].mshrfull = FALSE;
	      issue_lq[issue_lq_tail].cachemiss = FALSE;
	      issue_lq[issue_lq_tail].stqhit =FALSE;

	      /* We convert the load instruction into an add
		 instruction for computing effective address. After
		 the computation of effective address, this field will 
		 be set back to the correct value by writeback */
	      map_rb[map_rb_tail].inst_desc->op = MD_AGEN_OP;
	      issue_lq[issue_lq_tail].completed = FALSE;
	      issue_lq[issue_lq_tail].valid = VALID;
	      issue_lq[issue_lq_tail].tag = 
		map_rb[map_rb_tail].inst_desc->inum;
	      /* Allocate Load queue entry */
	      map_rb[map_rb_tail].inst_desc->lq_no=issue_lq_tail;
	      issue_lq[issue_lq_tail].inst_desc = 
		map_rb[map_rb_tail].inst_desc;
	      /* Find number of bits to shift for address comparison during 
		 traps */
	      if (op == LDBU)
		issue_lq[issue_lq_tail].size = 0;
	      else if (op == LDWU)
		issue_lq[issue_lq_tail].size = 1;
	      else if (op == LDS || op == LDL || op ==LDL_L)
		issue_lq[issue_lq_tail].size = 2;
	      else
		issue_lq[issue_lq_tail].size = 3;
	      
	      issue_lq_tail = (issue_lq_tail +1 ) % issue_lq_nelem;
	      issue_lq_num++;
	      
	      /* If we are using the stWait table, check if the stWait
		 bit is set for this instruction. If so, put in on the 
		 dependence chain of the last store before this load */
#ifdef FLEXIBLE_SIM
	      if (fetch_st_table_size && 
		  map_rb[map_rb_tail].inst_desc->st_wait_bit >= 
		  (sim_cycle/ST_WAIT_CLR+1) &&
		  commit_sq_num && 
		  commit_sq[(commit_sq_head+commit_sq_num-1)%
			   commit_sq_nelem].inst_desc &&
		  map_rb[(commit_sq[(commit_sq_head+commit_sq_num-1)%
				   commit_sq_nelem].inst_desc->r_buf_no)].in_SQ
		  == TRUE) {
#else
	      if (map_rb[map_rb_tail].inst_desc->st_wait_bit >= 
		  (sim_cycle/ST_WAIT_CLR+1) &&
		  commit_sq_num && 
		  commit_sq[(commit_sq_head+commit_sq_num-1)%
			   commit_sq_nelem].inst_desc &&
		  map_rb[(commit_sq[(commit_sq_head+commit_sq_num-1)%
				   commit_sq_nelem].inst_desc->r_buf_no)].in_SQ
		  == TRUE) {
#endif
		add_prod_link(commit_sq[(commit_sq_head+commit_sq_num-1)%
				       commit_sq_nelem].inst_desc->r_buf_no,
			      &(IQ->window[IQ->queue_pointer]),STWAIT);
		IQ->window[IQ->queue_pointer].st_wait_bit = TRUE;
	      }
	    }
	    /* Allocate entry in the Store queue if required */
	    if ((ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) 
		 == WrPort)) {
              stat_sq_writes++;
	      map_rb[map_rb_tail].in_SQ = TRUE;
	      map_rb[map_rb_tail].inst_desc->op = MD_AGEN_OP;
	      commit_sq[commit_sq_tail].tlbmiss = FALSE;
	      commit_sq[commit_sq_tail].mshrfull = FALSE;
	      commit_sq[commit_sq_tail].cachemiss = FALSE;
	      commit_sq[commit_sq_tail].completed = FALSE;
	      commit_sq[commit_sq_tail].valid = VALID;
	      commit_sq[commit_sq_tail].tag = 
		map_rb[map_rb_tail].inst_desc->inum;
	      /* Find number of bits to shift for trap comparison */
	      if (op == STB)
		commit_sq[commit_sq_tail].size = 0;
	      else if (op == STW)
		commit_sq[commit_sq_tail].size = 1;
	      else if (op == STS || op == STL || op == STL_C)
		commit_sq[commit_sq_tail].size = 2;
	      else
		commit_sq[commit_sq_tail].size = 3;
	      map_rb[map_rb_tail].inst_desc->sq_no = commit_sq_tail;
	      commit_sq[commit_sq_tail].inst_desc = 
		map_rb[map_rb_tail].inst_desc;
	      commit_sq_tail = (commit_sq_tail + 1) % commit_sq_nelem;
	      commit_sq_num++;
	    }
	    
	    /* if in1< 33 this is an integer inst. The first operand
	       register number is same as the one in the def file */

	    if (in1 < 33){
	      map_rb[map_rb_tail].inst_desc->in_arch_regs[0] = in1;
              map_rb[map_rb_tail].inst_desc->reg_type[0] = INTEGER;
	    }
	    /* else this an fp instruction. Subtract 33 from the
	       number in the def file to get the fp register number */
	    else{
	      map_rb[map_rb_tail].inst_desc->in_arch_regs[0] = in1-33;
              map_rb[map_rb_tail].inst_desc->reg_type[0] = FLOATINGPT;
	    }
	    
	    if (in2 < 33){
	      map_rb[map_rb_tail].inst_desc->in_arch_regs[1] = in2;
              map_rb[map_rb_tail].inst_desc->reg_type[1] = INTEGER;
	    }
	    else{
	      map_rb[map_rb_tail].inst_desc->in_arch_regs[1] = in2-33;
              map_rb[map_rb_tail].inst_desc->reg_type[1] = FLOATINGPT;
	    }
	    
	    map_rb[map_rb_tail].inst_desc->r_buf_no = map_rb_tail;
	    map_rb[map_rb_tail].inst_desc->dest_reg = INTEGER;
	    
	    /* Indicate that this instruction is going to write into
	       reg number "next_free_reg" for future instructions
	       depending on this instruction */
	    if (out1 != DNA && out1 != 31) {
	      regs.rcomplete[next_free_reg] = FALSE;
	    }
	    map_rb[map_rb_tail].inst_desc->out_arch_reg = out1;
	    map_rb[map_rb_tail].completed=FALSE;
	    map_rb[map_rb_tail].mispredict=FALSE;
	    map_rb[map_rb_tail].replaytrap=FALSE;
	    
	    /* Find out if the first input operand is integer or fp
	       and in case it's not ready, make a link to the 
	       appropriate rbuf entry of the instruction on which this 
	       instruction is dependent */
	    
	    if (in1 != DNA && in1 <33){
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[0] = map_ir_mapping[in1].phy_reg;
	      map_rb[map_rb_tail].inst_desc->src_reg1 = INTEGER;
	      
	      /* put on the dependency list if physical register is 
		 different from architectural register and  mark
		 operand as not ready */
	      if (map_rb[map_rb_tail].inst_desc->in_phy_regs[0] 
		  != map_rb[map_rb_tail].inst_desc->in_arch_regs[0] && 
		  regs.rcomplete[map_rb[map_rb_tail].inst_desc->in_phy_regs[0]]
		  == FALSE){

		add_prod_link(map_ir_mapping[in1].rbuf_no,
			      &(IQ->window[IQ->queue_pointer]),OP1);
		IQ->window[IQ->queue_pointer].op_ready[0]=NOTREADY;
	      }
	      else{
		IQ->window[IQ->queue_pointer].op_ready[0]=READY; 
		IQ->window[IQ->queue_pointer].optime[0] = sim_cycle;
                stat_int_reg_reads++;
	      }
	    }
	    
	    else if(in1 != DNA && in1 >= 33){
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[0] = map_fr_mapping[in1-33].phy_reg;
	      map_rb[map_rb_tail].inst_desc->src_reg1 = FLOATINGPT;
	     
	      /* put on the dependency list if physical register is 
		 different from architectural register and mark
		 operand as not ready */
	      if (map_rb[map_rb_tail].inst_desc->in_phy_regs[0] 
		  != map_rb[map_rb_tail].inst_desc->in_arch_regs[0] &&
		  regs.fcomplete[map_rb[map_rb_tail].inst_desc->in_phy_regs[0]]
		  == FALSE) {
		add_prod_link(map_fr_mapping[in1-33].rbuf_no,
			      &(IQ->window[IQ->queue_pointer]),OP1);
		IQ->window[IQ->queue_pointer].op_ready[0]=NOTREADY;
	      }
	      else{
		IQ->window[IQ->queue_pointer].op_ready[0]=READY; 
		IQ->window[IQ->queue_pointer].optime[0] = sim_cycle;
                stat_fp_reg_reads++;
	      }
	    }
	    else{
	      /* No dependences. Mark this operand as ready */
	      map_rb[map_rb_tail].inst_desc->src_reg1 = DNA;
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[0] = DNA;
	      IQ->window[IQ->queue_pointer].op_ready[0]=READY;
	      IQ->window[IQ->queue_pointer].optime[0] = sim_cycle;
	    }
	    
	    if (in2 != DNA && in2 < 33){
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[1] = 
		map_ir_mapping[in2].phy_reg;
	      map_rb[map_rb_tail].inst_desc->src_reg2 = INTEGER;
	      
	      /* put on the dependency list */
	      if (map_rb[map_rb_tail].inst_desc->in_phy_regs[1] != 
		  map_rb[map_rb_tail].inst_desc->in_arch_regs[1] && 
		  regs.rcomplete[map_rb[map_rb_tail].inst_desc->in_phy_regs[1]]
		  == FALSE ) {
		add_prod_link(map_ir_mapping[in2].rbuf_no,
			      &(IQ->window[IQ->queue_pointer]),OP2);
		IQ->window[IQ->queue_pointer].op_ready[1]=NOTREADY;
	      }
	      else {
		IQ->window[IQ->queue_pointer].op_ready[1]=READY;
		IQ->window[IQ->queue_pointer].optime[1] = sim_cycle;
                stat_int_reg_reads++;
	      }
	    }
	    
	    else if (in2 != DNA && in2 >= 33){
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[1] = 
		map_fr_mapping[in2-33].phy_reg;
	      map_rb[map_rb_tail].inst_desc->src_reg2 = FLOATINGPT;
	      /* put on the dependency list */
	      if (map_rb[map_rb_tail].inst_desc->in_phy_regs[1] != 
		  map_rb[map_rb_tail].inst_desc->in_arch_regs[1] &&  
		  regs.fcomplete[map_rb[map_rb_tail].inst_desc->in_phy_regs[1]]
		  == FALSE) {
		add_prod_link(map_fr_mapping[in2-33].rbuf_no,
			      &(IQ->window[IQ->queue_pointer]),OP2);
		IQ->window[IQ->queue_pointer].op_ready[1]=NOTREADY;
	      }
	      else {
		IQ->window[IQ->queue_pointer].op_ready[1]=READY;
		IQ->window[IQ->queue_pointer].optime[1] = sim_cycle;
                stat_fp_reg_reads++;
	      }
	    }
	    else {
	      map_rb[map_rb_tail].inst_desc->src_reg2 = DNA;
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[1] = DNA;
	      IQ->window[IQ->queue_pointer].op_ready[1]=READY;
	      IQ->window[IQ->queue_pointer].optime[1] = sim_cycle;
	    }
	    
	    /* If instruction is a cmov instruction, we need to see if the 
	       third operand has dependency, and if so, add to the dependent
	       chain */
	    if (MD_IS_CMOV(map_rb[map_rb_tail].inst_desc->op)) {
	      if (out1 != DNA && out1 != 31) {
		if (map_ir_mapping[out1].phy_reg != out1 &&
		    regs.rcomplete[map_ir_mapping[out1].phy_reg] == FALSE) {
		  /* put on cmov dependence list */
		  add_prod_link(map_ir_mapping[out1].rbuf_no,
			      &(IQ->window[IQ->queue_pointer]),CMOV_BIT);
		  IQ->window[IQ->queue_pointer].cmov = TRUE;
		}
		else {
		  regs.regs_R[next_free_reg] = 
		    regs.regs_R[map_ir_mapping[out1].phy_reg];
                  stat_int_reg_reads++;
		}
	      }
	    }
	    /* if the physical regs are same as arch regs put on the
	       ready list. Map needs to  
	       put instructions on the ready list only if the
	       architectural regs are same as physical regs  
	       (which happens before the processor begins execution of
	       instructions) and after that the eventq  
	       should be able to do the  job. */
	    if (IQ->window[IQ->queue_pointer].op_ready[0] == READY && 
		IQ->window[IQ->queue_pointer].op_ready[1] == READY && 
		IQ->window[IQ->queue_pointer].st_wait_bit == FALSE && 
		IQ->window[IQ->queue_pointer].cmov == FALSE) {
	      /* fill in the rq_link strucure */
	      rq_link->qelem = &(IQ->window[IQ->queue_pointer]);
	      rq_link->inum = map_rb[map_rb_tail].inst_desc->inum;
	      rq_link->qelem->op_ready[0]=rq_link->qelem->op_ready[1]=READY;
	      rq_link->queued = FALSE;
	      issue_int_readyq_enqueue(rq_link);
	    }
	    
	    if (out1 != DNA && out1 != 31){
	      map_rb[map_rb_tail].inst_desc->out_phy_reg = next_free_reg;
	      map_ir_mapping[out1].phy_reg = next_free_reg;
	      map_ir_mapping[out1].rbuf_no = map_rb_tail;
	    }
	    else {
	      map_rb[map_rb_tail].inst_desc->out_phy_reg = 31;
	    }
	    
	    IQ->window[IQ->queue_pointer].op_delay[0] = 
	      IQ->window[IQ->queue_pointer].op_delay[1] = DNA;
	    IQ->window[IQ->queue_pointer].valid = VALID;
	    IQ->window[IQ->queue_pointer].issued = FALSE;
	    map_rb_tail = (map_rb_tail +1) % (map_rb_nelem);
	    map_rb_num++;
	    IQ->queue_pointer = (IQ->queue_pointer + 1) % (IQ->queue_nelem);
	    n_mapped++;

	    slot_latch_num--;
	    slot_latch_head = (slot_latch_head + 1) & (slot_width - 1); 
	  }
	  else{
	    /* free register is not available */
	    return;
	  }
	}
	
	/* check if an entry is free in the fp queue*/
	
	else if ((out1 >=33 || out2 >=33) && 
		 ((ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) 
		   != RdPort) || 
		  issue_lq_num < issue_lq_nelem) && 
		 ((ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) 
		   != WrPort) || 
		  commit_sq_num < commit_sq_nelem) && 
		 ( ((MD_IS_ITOF(SL[slot_latch_head].inst_desc->op) || 
		     (ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) 
		      == RdPort)) && IQ->queue_num < map_int_issue_size)
		   || FQ->queue_num < map_fp_issue_size)){
	  if (out1 == DNA || out1 == 64 || (next_free_reg = get_free_fp_reg()) != 0){
	    map_rb[map_rb_tail].inst_desc = SL[slot_latch_head].inst_desc;
#ifdef FLEXIBLE_SIM
	    if (early_inst_retire) {
#endif
	      /* Retire instruction if its UNOP */
	      if (out1 == 64 && (SL[slot_latch_head].inst_desc->op == FLTI || 
				 SL[slot_latch_head].inst_desc->op == FLTL ||
				 SL[slot_latch_head].inst_desc->op == FLTV ||
				 SL[slot_latch_head].inst_desc->op == SQRTF ||
				 SL[slot_latch_head].inst_desc->op == SQRTS ||
				 SL[slot_latch_head].inst_desc->op == SQRTG ||
				 SL[slot_latch_head].inst_desc->op == SQRTT ||
				 SL[slot_latch_head].inst_desc->op == ITOFS ||
				 SL[slot_latch_head].inst_desc->op == ITOFF ||
				 SL[slot_latch_head].inst_desc->op == ITOFT)) {
		map_num_early_retire++;
		fetch_return_to_free_list(map_rb[map_rb_tail].inst_desc);
		n_mapped++;
		slot_latch_num--;
		slot_latch_head = (slot_latch_head + 1) & (slot_width - 1);
		continue;
	      }
#ifdef FLEXIBLE_SIM
	    }
#endif
	    map_rb[map_rb_tail].dir_update = SL[slot_latch_head].dir_update;

	    /* ITOF and FP loads go to the integer queue */
	    if (MD_IS_ITOF(SL[slot_latch_head].inst_desc->op) || 
		(ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) 
		 == RdPort))
	      IQ->queue_num++;
	    else
	      FQ->queue_num++;

	    map_rb[map_rb_tail].inst_desc->inum = (inum++)%MAX_INUM;
	    FQ->window[FQ->queue_pointer].inst_desc = map_rb[map_rb_tail].inst_desc;
	    FQ->window[FQ->queue_pointer].st_wait_bit = FALSE;
	    FQ->window[FQ->queue_pointer].cmov = FALSE;
	    FQ->window[FQ->queue_pointer].issue_cycle = sim_cycle; 
	    map_rb[map_rb_tail].inst_desc->double_prec = FALSE;
	    /* Allocate entry in the Load queue if required */
	    if ((ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) 
		 == RdPort)) {
	      map_rb[map_rb_tail].in_LQ = TRUE;
              stat_lq_writes++;
              issue_lq[issue_lq_tail].stqhit = 0;
	      issue_lq[issue_lq_tail].tlbmiss = FALSE;
	      issue_lq[issue_lq_tail].mshrfull = FALSE;
	      issue_lq[issue_lq_tail].cachemiss = FALSE;
	      map_rb[map_rb_tail].inst_desc->op = MD_AGEN_OP;
	      issue_lq[issue_lq_tail].completed = FALSE;
	      issue_lq[issue_lq_tail].valid = VALID;
	      issue_lq[issue_lq_tail].tag = 
		map_rb[map_rb_tail].inst_desc->inum;
	      map_rb[map_rb_tail].inst_desc->lq_no=issue_lq_tail;
	      issue_lq[issue_lq_tail].inst_desc = 
		map_rb[map_rb_tail].inst_desc;
              
              /*
	      ** BUG FIX -- MISSING SIZE INFO 
	      ** Aamer Jaleel <ajaleel@umd.edu>
	      **
	      */

	      /* Find number of bits to shift for address comparison during 
		 traps */
	      if (op == LDBU)
		issue_lq[issue_lq_tail].size = 0;
	      else if (op == LDWU)
		issue_lq[issue_lq_tail].size = 1;
	      else if (op == LDS || op == LDL || op ==LDL_L)
		issue_lq[issue_lq_tail].size = 2;
	      else
		issue_lq[issue_lq_tail].size = 3;

	      issue_lq_tail = (issue_lq_tail +1 ) % issue_lq_nelem;
	      issue_lq_num++;
	      if (fetch_st_table_size && 
		  map_rb[map_rb_tail].inst_desc->st_wait_bit >= 
		  (sim_cycle/ST_WAIT_CLR+1) &&
		  commit_sq_num &&
		  commit_sq[(commit_sq_head+commit_sq_num-1) % 
			   commit_sq_nelem].inst_desc &&
		  map_rb[(commit_sq[(commit_sq_head+commit_sq_num-1) % 
		    commit_sq_nelem].inst_desc->r_buf_no)].in_SQ == TRUE) {
		add_prod_link(commit_sq[(commit_sq_head+commit_sq_num-1) % 
				       commit_sq_nelem].inst_desc->r_buf_no,
			      &(FQ->window[FQ->queue_pointer]),STWAIT);
		FQ->window[FQ->queue_pointer].st_wait_bit = TRUE;
	      }
	    }
	    /* Allocate an entry in the store queue if required */
	    if ((ALPHA_OP_FUCLASS(SL[slot_latch_head].inst_desc->op) 
		 == WrPort)) {
              stat_sq_writes++;
	      map_rb[map_rb_tail].in_SQ = TRUE;
	      map_rb[map_rb_tail].inst_desc->op = MD_AGEN_OP;
	      commit_sq[commit_sq_tail].tlbmiss = FALSE;
	      commit_sq[commit_sq_tail].mshrfull = FALSE;
	      commit_sq[commit_sq_tail].cachemiss = FALSE;
	      commit_sq[commit_sq_tail].completed = FALSE;
	      commit_sq[commit_sq_tail].valid = VALID;
	      commit_sq[commit_sq_tail].tag = 
		map_rb[map_rb_tail].inst_desc->inum;
	      map_rb[map_rb_tail].inst_desc->sq_no = commit_sq_tail;
	      commit_sq[commit_sq_tail].inst_desc = 
		map_rb[map_rb_tail].inst_desc;
              
              /*
	      ** BUG FIX -- MISSING SIZE INFO 
	      ** Aamer Jaleel <ajaleel@umd.edu>
	      **
	      */

	      /* Find number of bits to shift for trap comparison */
	      if (op == STB)
		commit_sq[commit_sq_tail].size = 0;
	      else if (op == STW)
		commit_sq[commit_sq_tail].size = 1;
	      else if (op == STS || op == STL || op == STL_C)
		commit_sq[commit_sq_tail].size = 2;
	      else
		commit_sq[commit_sq_tail].size = 3;

	      /* END FIX */

	      commit_sq_tail = (commit_sq_tail + 1) % commit_sq_nelem;
	      commit_sq_num++;
	    }
	    if (in1 < 33){
	      map_rb[map_rb_tail].inst_desc->in_arch_regs[0] = in1;
              map_rb[map_rb_tail].inst_desc->reg_type[0] = INTEGER;
	    }
	    else{
	      map_rb[map_rb_tail].inst_desc->in_arch_regs[0] = in1-33;
              map_rb[map_rb_tail].inst_desc->reg_type[0] = FLOATINGPT;
	    }
	    
	    if (in2 < 33){
	      map_rb[map_rb_tail].inst_desc->in_arch_regs[1] = in2;
              map_rb[map_rb_tail].inst_desc->reg_type[1] = INTEGER;
	    }
	    else{
	      map_rb[map_rb_tail].inst_desc->in_arch_regs[1] = in2-33;
              map_rb[map_rb_tail].inst_desc->reg_type[1] = FLOATINGPT;
	    }

	    map_rb[map_rb_tail].inst_desc->out_arch_reg = out1-33;
	    map_rb[map_rb_tail].inst_desc->r_buf_no = map_rb_tail;
	    map_rb[map_rb_tail].inst_desc->dest_reg = FLOATINGPT;
	    map_rb[map_rb_tail].completed = FALSE;
	    map_rb[map_rb_tail].mispredict = FALSE;
	    map_rb[map_rb_tail].replaytrap = FALSE;
	    if (out1 != DNA && out1 != 64)
	      regs.fcomplete[next_free_reg] = FALSE;
	    if (in1 != DNA && in1 >= 33){
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[0] = 
		map_fr_mapping[in1-33].phy_reg;
	      map_rb[map_rb_tail].inst_desc->src_reg1 = FLOATINGPT;

	      /* put on the dependency list */
	      if (map_rb[map_rb_tail].inst_desc->in_phy_regs[0] != 
		  map_rb[map_rb_tail].inst_desc->in_arch_regs[0] &&  
		  regs.fcomplete[map_rb[map_rb_tail].inst_desc->in_phy_regs[0]]
		  == FALSE){
		add_prod_link(map_fr_mapping[in1-33].rbuf_no,
			      &(FQ->window[FQ->queue_pointer]),OP1);
		FQ->window[FQ->queue_pointer].op_ready[0] = NOTREADY;
	      }
	      else{
		FQ->window[FQ->queue_pointer].op_ready[0] = READY;
		FQ->window[FQ->queue_pointer].optime[0] = sim_cycle;	
                stat_fp_reg_reads++;
	      }
	    }
	    
	    else if (in1 != DNA && in1 < 33){
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[0] = 
		map_ir_mapping[in1].phy_reg;
	      map_rb[map_rb_tail].inst_desc->src_reg1 = INTEGER;
	      if (map_rb[map_rb_tail].inst_desc->in_phy_regs[0] != 
		  map_rb[map_rb_tail].inst_desc->in_arch_regs[0] && 
		  regs.rcomplete[map_rb[map_rb_tail].inst_desc->in_phy_regs[0]]
		  == FALSE ){
		add_prod_link(map_ir_mapping[in1].rbuf_no,
			      &(FQ->window[FQ->queue_pointer]), OP1);
		FQ->window[FQ->queue_pointer].op_ready[0] = NOTREADY;
	      }
	      else{
		FQ->window[FQ->queue_pointer].op_ready[0] = READY;
		FQ->window[FQ->queue_pointer].optime[0] = sim_cycle;
                stat_int_reg_reads++;
	      }
	    }

	    else{
	      map_rb[map_rb_tail].inst_desc->src_reg1 = DNA;
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[0] = DNA;
	      FQ->window[FQ->queue_pointer].op_ready[0] = READY;
	      FQ->window[FQ->queue_pointer].optime[0] = sim_cycle;
	    }
	    
	    if (in2 != DNA && in2 >= 33){
	      map_rb[map_rb_tail].inst_desc->src_reg2 = FLOATINGPT;
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[1] = 
		map_fr_mapping[in2-33].phy_reg;
	      /* put on the dependency list */
	      if (map_rb[map_rb_tail].inst_desc->in_phy_regs[1] != 
		  map_rb[map_rb_tail].inst_desc->in_arch_regs[1] && 
		  regs.fcomplete[map_rb[map_rb_tail].inst_desc->in_phy_regs[1]]
		  == FALSE ){
		add_prod_link(map_fr_mapping[in2-33].rbuf_no, 
			      &(FQ->window[FQ->queue_pointer]),OP2);
		FQ->window[FQ->queue_pointer].op_ready[1] = NOTREADY;
	      }
	      else{
		FQ->window[FQ->queue_pointer].op_ready[1] = READY;
		FQ->window[FQ->queue_pointer].optime[1] = sim_cycle;
                stat_fp_reg_reads++;
	      }
	    }
	    
	    else if (in2 != DNA && in2 < 33){
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[1] = 
		map_ir_mapping[in2].phy_reg;
	      map_rb[map_rb_tail].inst_desc->src_reg2 = INTEGER;
	      /* put on the dependency list */
	      if (map_rb[map_rb_tail].inst_desc->in_phy_regs[1] != 
		  map_rb[map_rb_tail].inst_desc->in_arch_regs[1] &&  
		  regs.rcomplete[map_rb[map_rb_tail].inst_desc->in_phy_regs[1]]
		  == FALSE){
		add_prod_link(map_ir_mapping[in2].rbuf_no, 
			      &(FQ->window[FQ->queue_pointer]),OP2);
		FQ->window[FQ->queue_pointer].op_ready[1]=NOTREADY;
	      }
	      else {
		FQ->window[FQ->queue_pointer].op_ready[1]=READY;
		FQ->window[FQ->queue_pointer].optime[1] = sim_cycle;
                stat_int_reg_reads++;
	      }
	    }

	    else {
	      map_rb[map_rb_tail].inst_desc->src_reg2 = DNA;
	      map_rb[map_rb_tail].inst_desc->in_phy_regs[1] = DNA;
	      FQ->window[FQ->queue_pointer].op_ready[1] = READY;
	      FQ->window[FQ->queue_pointer].optime[1] = sim_cycle;
	    }
	    if (MD_IS_FCMOV(map_rb[map_rb_tail].inst_desc->op)) {
	      if (out1 != DNA && out1 != 64) {
		if (map_fr_mapping[out1-33].phy_reg != out1-33 &&
		    regs.fcomplete[map_fr_mapping[out1-33].phy_reg] == FALSE) {
		  /* put on cmov dependence list */
		  add_prod_link(map_fr_mapping[out1-33].rbuf_no,
			      &(FQ->window[FQ->queue_pointer]),CMOV_BIT);
		  FQ->window[FQ->queue_pointer].cmov = TRUE;
		}
		else {
		  regs.regs_F.d[next_free_reg] = 
		    regs.regs_F.d[map_fr_mapping[out1-33].phy_reg];
                  stat_fp_reg_reads++;
		}
	      }
	    }
	    if (FQ->window[FQ->queue_pointer].op_ready[0]== READY &&
		FQ->window[FQ->queue_pointer].op_ready[1] == READY && 
		FQ->window[FQ->queue_pointer].st_wait_bit == FALSE &&
		FQ->window[FQ->queue_pointer].cmov == FALSE) {
	      /* fill in the rq_link strucure */
	      rq_link->qelem = &(FQ->window[FQ->queue_pointer]);
	      rq_link->inum = map_rb[map_rb_tail].inst_desc->inum;
	      rq_link->qelem->op_ready[0]=rq_link->qelem->op_ready[1]=READY;
	      rq_link->queued = FALSE;
	      issue_fp_readyq_enqueue(rq_link);
	    }
	    
	    if (out1 != DNA && out1 != 64){
	      map_rb[map_rb_tail].inst_desc->out_phy_reg = next_free_reg;
	      map_fr_mapping[out1-33].phy_reg = next_free_reg;
	      map_fr_mapping[out1-33].rbuf_no = map_rb_tail;
	    }
	    else {
	      map_rb[map_rb_tail].inst_desc->out_phy_reg = 31;
	    }
	    
	    FQ->window[FQ->queue_pointer].op_delay[0] = 
	      FQ->window[FQ->queue_pointer].op_delay[1] = DNA;
	    FQ->window[FQ->queue_pointer].valid = VALID;
	    FQ->window[FQ->queue_pointer].issued = FALSE;
	    map_rb_tail = (map_rb_tail +1) % (map_rb_nelem);
	    map_rb_num++;
	    FQ->queue_pointer = (FQ->queue_pointer + 1) % (FQ->queue_nelem);
	    n_mapped++;
	    slot_latch_num--;
	    slot_latch_head = (slot_latch_head + 1) & (slot_width - 1);
	  }	
	  else{
	    /* free register is not available. stall */
	    return;
	  }
	}
	else{
	  /* issue queue is full. Stall*/ 
	  return;
	}
    }
    else{
      /* reorder buffer is full. map stage stalls */
      return;
    }
  }
}


int get_free_int_reg(void){
  int free_reg;
  
  if (num_of_free_int_regs > 0){
    free_reg = free_int_reg_list[free_int_reg_list_head];
    free_int_reg_list_head = (free_int_reg_list_head + 1) % 
      regs_num_int_p_regs;
    num_of_free_int_regs--;
    return free_reg;
  }
  
  else
    return NA;
}


int get_free_fp_reg(void){
  int free_reg;

  if (num_of_free_fp_regs > 0){
    free_reg = free_fp_reg_list[free_fp_reg_list_head];
    free_fp_reg_list_head = (free_fp_reg_list_head + 1) % 
      regs_num_fp_p_regs;
    num_of_free_fp_regs--;
    return free_reg;
  }
  
  else
    return NA;
}

void map_put_free_int_reg(int reg_to_put){
  free_int_reg_list_tail = (free_int_reg_list_tail + 1) % 
    regs_num_int_p_regs;
  free_int_reg_list[free_int_reg_list_tail] = reg_to_put;
  num_of_free_int_regs++;
}

void map_put_free_fp_reg(int reg_to_put){
  free_fp_reg_list_tail = (free_fp_reg_list_tail + 1) %	
    regs_num_fp_p_regs;
  free_fp_reg_list[free_fp_reg_list_tail] = reg_to_put;
  num_of_free_fp_regs++;
}
 
struct dep_chain *
get_from_free_list(){
  struct dep_chain *temp;

  /* if free list is exhausted, add some more entries */
  if(!dep_free_list){
    init_free_list();
  }
  temp = dep_free_list;
  dep_free_list = dep_free_list->next;
  return(temp);
}


void  init_free_list(){
  int i;
  struct dep_chain *temp;
  dep_free_list = NULL;
  warn("increasing dependency chain free list size");
  for (i=0; i<free_list_size; i++){
    temp = (struct dep_chain *)	malloc(sizeof(struct dep_chain));
    temp->next = dep_free_list;
    dep_free_list = temp;
  }
}

void return_to_free_list(struct dep_chain *dep){
  dep->next = dep_free_list;
  dep_free_list = dep;
}

/* add a link to the producers consumer chain. op_no decides whether
   the link added is for the dependency of input 1 or input 2 or
   for stWait chain */
void add_prod_link(int rbuf_no, struct queue_elem *qelem, int op_no){
  struct dep_chain *temp;
  struct dep_chain *nextdep;
  temp = get_from_free_list();
  temp->qelem = qelem;
  temp->inst_desc = qelem->inst_desc;
  if (op_no == OP1){
    if (!map_rb[rbuf_no].op1_deps){
      temp->next = map_rb[rbuf_no].op1_deps;
      map_rb[rbuf_no].op1_deps = temp;
    }
    else{
      nextdep=map_rb[rbuf_no].op1_deps;
      while (nextdep->next != NULL){
	nextdep = nextdep->next;
      }
      nextdep->next = temp;
      temp->next=NULL;
    }
  }
  
  else if (op_no == OP2) {
    if (!map_rb[rbuf_no].op2_deps){
      temp->next = map_rb[rbuf_no].op2_deps;
      map_rb[rbuf_no].op2_deps = temp;
    }
    else{
      nextdep=map_rb[rbuf_no].op2_deps;
      while (nextdep->next != NULL){
	nextdep = nextdep->next;
      }
      nextdep->next = temp;
      temp->next=NULL;
    }
  }
  else if (op_no == STWAIT) {
    if (!map_rb[rbuf_no].stdeps) {
      temp->next = map_rb[rbuf_no].stdeps;
      map_rb[rbuf_no].stdeps = temp;
    }
    else {
      nextdep=map_rb[rbuf_no].stdeps;
      while (nextdep->next != NULL){
	nextdep = nextdep->next;
      }
      nextdep->next = temp;
      temp->next=NULL;
    }
  }
  else if (op_no == CMOV_BIT) {
    if (!map_rb[rbuf_no].cmovdeps) {
      temp->next = map_rb[rbuf_no].cmovdeps;
      map_rb[rbuf_no].cmovdeps = temp;
    }
    else {
      nextdep=map_rb[rbuf_no].cmovdeps;
      while (nextdep->next != NULL){
	nextdep = nextdep->next;
      }
      nextdep->next = temp;
      temp->next=NULL;
    }
  }
}
