/*
* writeback.c - Execute + dcache access stage implementation
*
* This file is part of the Alphasim tool suite written by
* Raj Desikan as part of the Bullseye project.
*
* Copyright (C) 1999 by Raj Desikan
* This source file is distributed "as is" in the hope that it will be
* useful.  It is distributed with no warranty, and no author or
* distributor accepts any responsibility for the consequences of its
* use. 
*
* Everyone is granted permission to copy, modify and redistribute
* this source file under the following conditions:
*
*    This tool set is distributed for non-commercial use only. 
*    Please contact the maintainer for restrictions applying to 
*    commercial use of these tools.
*
*    Permission is granted to anyone to make or distribute copies
*    of this source code, either as received or modified, in any
*    medium, provided that all copyright notices, permission and
*    nonwarranty notices are preserved, and that the distributor
*    grants the recipient permission for further redistribution as
*    permitted by this document.
*
*    Permission is granted to distribute this file in compiled
*    or executable form under the same conditions that apply for
*    source code, provided that either:
*
*    A. it is accompanied by the corresponding machine-readable
*       source code,
*    B. it is accompanied by a written offer, with no time limit,
*       to give anyone a machine-readable copy of the corresponding
*       source code in return for reimbursement of the cost of
*       distribution.  This written offer must permit verbatim
*       duplication by anyone, or
*    C. it is distributed by someone who received only the
*       executable form, and is accompanied by a copy of the
*       written offer of source code that they received concurrently.
*
* In other words, you are welcome to use, share and improve this
* source file.  You are forbidden to forbid anyone else to use, share
* and improve what you give them.
*
* Bug Fixes by Aamer Jaleel <ajaleel@umd.edu>: 
*            University of Maryland, College Park.
*
* April 27th 2003:  Load-Load replay trap doesn't require for destination
*                   registers to be the same, just the effective addresses,
*                   thus removed check of destination registers.
*
* April 24th 2003:  Store queue forwarding had incorrect assumptions.  Fixed
*                   the code to properly forward data from a store to a newer
*                   load if the sizes match.  Currently data forwarding is done
*                   only for load store operations whose sizes match.  Possible
*                   improvement of forwarding data in cases when the load size is
*                   less than the store size needs to be implemented.
*
* INTERNET: raju@cs.utexas.edu
* US Mail:  8200B W. Gate Blvd, Austin, TX 78745
*/
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <assert.h>
#include "alpha.h"                                                      
#include "regs.h"
#include "memory.h"
#include "syscall.h"   
#include "resource.h"
#include "sim.h" 
#include "cache.h"
#include "tlb.h"
#include "bpred.h"
#include "fetch.h" 
#include "slot.h"
#include "issue.h"
#include "map.h"
#include "writeback.h"
#include "commit.h"
#include "loader.h"
#include "eventq.h"

/********* BUG FIX:  08/2004 (Aamer Jaleel <ajaleel@umd.edu>)*********/
#define MASK_0to2   ULL(0x0000000000007)

const quad_t SZ_MASK[4] = { 0x0FF, 0x0FFFF, ULL(0x0FFFFFFFF), ULL(0xFFFFFFFFFFFFFFFF) };

/******** END BUG FIX -- **********************************/

static struct rqueue_link *rq_link;
struct queue_elem *elem;
struct queue_elem *temp_elem;
counter_t wb_load_replaytrap;
counter_t wb_load_miss_load_replaytrap;
counter_t wb_store_replaytrap;
counter_t wb_diffsize_replaytrap;
counter_t wb_usemask_replaytrap;
counter_t wb_valzero_replaytrap;
/* BUGFIX 04/24/2004 - Start */
/* Aamer Jaleel              */
/* <ajaleel@umd.edu>         */
counter_t wb_lsq_data_fwd;
/* BUGFIX 04/24/2004 - End   */

int wb_load_use_speculation; /* Is load spec enabled? 1 = yes */
int load_replay_trap; /* Are load traps enabled? 1 = Yes */
int load_miss_load_replay_trap; /* Are load miss load traps enabled? 1 = Yes */
int diffsize_trap; /* Are different size traps enabled */

/* cycles lost due to traps */
counter_t wb_trap_cycles_lost, cntrl_cycles_lost;
int printExecs = 0;

void 
writeback_stage_init(void){
  rq_link = (struct rqueue_link *)
    malloc(sizeof(struct rqueue_link));
  if (!rq_link)
    fatal("Out of virtual memory");
  temp_elem = (struct queue_elem *)
    malloc(sizeof(struct queue_elem));
  if (!temp_elem)
    fatal("out of virtual memory");
}

typedef struct {
  md_addr_t addr;
  int       is_store; //LD = 0; ST = 1
} meminst_t;
     
#define MAX_MEM 32
meminst_t meminsts[ MAX_MEM ];

void 
writeback_stage(void){
  register enum md_opcode op;
  /* stores current mapping of	the input integer architectural	registers */
  static int cur_int_in1_mapping;
  static int cur_int_in2_mapping; 
  
  /* stores current mapping of	the input fp architectural registers */
  static int cur_fp_in1_mapping;
  static int cur_fp_in2_mapping; 
  
  static md_inst_t inst;
  static md_addr_t saved_PC;
  static md_addr_t saved_NPC;
  static struct dep_chain *dep_chain_pointer;
  static md_addr_t addr;
  static byte_t temp_byte;
  static half_t temp_half;
  static word_t temp_word;
  static quad_t temp_quad;
  static int spec_mode=0;
  int i, load_lat; 
  int meminsts_cnt=0;

  /* service completed events */
  
  while ((elem = issue_next_event())){
    int SQ_miss = TRUE;
    
    /* elem has completed execution. calculate result */
        
    if (!OPERANDS_READY(elem) || !elem->issued || elem->completed)
      panic("inst completed and !ready,!issued, or completed");
    
    
    /* Check if instruction is a load or a store instruction */
    if (map_rb[elem->inst_desc->r_buf_no].in_LQ == FALSE && 
	map_rb[elem->inst_desc->r_buf_no].in_SQ == FALSE) {
      /* Inst if not a load or store and has completed execution */
      elem->completed = TRUE;
      
      /* If this is a syscall instruction, execute it at commit. This 
       ensures that the register values written by the syscall are reflected 
       correctly */
      if (elem->inst_desc->iflag == F_TRAP) {
	map_rb[elem->inst_desc->r_buf_no].completed=TRUE;
	continue;
      }
      
      /* Save the current PC value and load the value of the PC of
	 this instruction into regs_PC. This is done so that we can execute 
	 the instruction in alpha.def */ 
      saved_PC = regs.regs_PC;
      saved_NPC = regs.regs_NPC;
      regs.regs_PC = elem->inst_desc->regs_PC;
      regs.regs_NPC = regs.regs_PC + sizeof (md_inst_t);
      /* In case the input register mappings have changed, we need to
	 save the current mapping and put the 
	 current instruction mapping to ensure correct execution */
      
      /* Save the current mapping of the input integer registers*/
      if (elem->inst_desc->in_arch_regs[0] != DNA) {
	cur_int_in1_mapping = 
	  map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg;   
	cur_fp_in1_mapping = 
	  map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg;   
      }
      if (elem->inst_desc->in_arch_regs[1] != DNA) {
	cur_int_in2_mapping = 
	  map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg;   
	cur_fp_in2_mapping = 
	  map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg;   
      }
      
      /* Write the mapping at the time this instruction was mapped */
      if (elem->inst_desc->in_arch_regs[0] != DNA) {
	if (elem->inst_desc->src_reg1 == INTEGER)
	  map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	    elem->inst_desc->in_phy_regs[0];
	else
	  map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	    elem->inst_desc->in_phy_regs[0];
      }
      if (elem->inst_desc->in_arch_regs[1] != DNA) {
	if (elem->inst_desc->src_reg2 == INTEGER)
	  map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	    elem->inst_desc->in_phy_regs[1];
	else
	  map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	    elem->inst_desc->in_phy_regs[1];
      }            
      
      regs.regs_R[MD_REG_ZERO] = 0;
      regs.regs_F.d[MD_REG_ZERO] = 0;
      /* execute the instuction */  
            
      inst = elem->inst_desc->IR;
      op = elem->inst_desc->op;
      if (!(op == LDQ_U && elem->inst_desc->out_arch_reg == 31)) { 
	switch(op) {
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,I1,I2,I3)	\
      case OP:	\
      SYMCAT(OP,_IMPL);	\
      break;
#define DEFLINK(OP,MSK,NAME,MASK,SHIFT)	\
      case OP:	\
          op = MD_NOP_OP;						\
	  break;
#define CONNECT(OP)
#define DECLARE_FAULT(FAULT)	\
      {elem->inst_desc->trap = TRUE;break;}
#include "alpha.def"
      default:
        panic("trying to execute bogus inst");
	}
      }
      /* Update register file information */
      if (elem->inst_desc->dest_reg == INTEGER && 
	  elem->inst_desc->out_phy_reg != DNA) {
	/* Indicate value is now available in the physical register */
	regs.rcomplete[elem->inst_desc->out_phy_reg] = TRUE;
        stat_int_reg_writes++;
        stat_int_reg_writes++;
      } 
      else if (elem->inst_desc->dest_reg == FLOATINGPT && 
	       elem->inst_desc->out_phy_reg != DNA) {
	regs.fcomplete[elem->inst_desc->out_phy_reg] = TRUE;
        stat_fp_reg_writes++;
        stat_fp_reg_writes++;
      }
      /* If instruction is a branch instruction store correct PC */
      if (MD_OP_FLAGS(op) & F_CTRL) {
	sim_total_branches++;
	map_rb[elem->inst_desc->r_buf_no].correctPC=regs.regs_NPC;
      }
      if(pred && (MD_OP_FLAGS(op) & F_CTRL)  
	 && elem->inst_desc->regs_NPC != regs.regs_NPC ){
	/* Indicate that this entry caused a mispredict */
	map_rb[elem->inst_desc->r_buf_no].mispredict=TRUE;
	map_rb[elem->inst_desc->r_buf_no].completed=TRUE;
	if (elem->inst_desc->in_arch_regs[0] != DNA) {
	  map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	    cur_int_in1_mapping;
	  map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	    cur_fp_in1_mapping;
	}
	if (elem->inst_desc->in_arch_regs[1] != DNA) {
	  map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	    cur_int_in2_mapping;
	  map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	    cur_fp_in2_mapping;       
	}
	regs.regs_PC = saved_PC;
	regs.regs_NPC = saved_NPC;
      }
      else if (elem->inst_desc->trap == TRUE) {
	map_rb[elem->inst_desc->r_buf_no].completed=TRUE;
	if (elem->inst_desc->in_arch_regs[0] != DNA) {
	  map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	    cur_int_in1_mapping;
	  map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	    cur_fp_in1_mapping;
	}
	if (elem->inst_desc->in_arch_regs[1] != DNA) {
	  map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	    cur_int_in2_mapping;
	  map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	    cur_fp_in2_mapping;       
	}
	regs.regs_PC = saved_PC;
	regs.regs_NPC = saved_NPC;
      }
      else {
	map_rb[elem->inst_desc->r_buf_no].mispredict=FALSE;
	map_rb[elem->inst_desc->r_buf_no].completed=TRUE;
	
	/* Walk the output dependency chain, and insert instructions
	   in the ready queue */
	
	/* Instructions whose first operand depend on this result */
	dep_chain_pointer = map_rb[elem->inst_desc->r_buf_no].op1_deps;
	while (dep_chain_pointer != NULL){
	  dep_chain_pointer->qelem->op_ready[0] = READY;
	  dep_chain_pointer->qelem->optime[0] = sim_cycle;
	  if (OPERANDS_READY(dep_chain_pointer->qelem)){
	    rq_link->qelem = dep_chain_pointer->qelem;
	    rq_link->inum = 
	      map_rb[elem->inst_desc->r_buf_no].inst_desc->inum;
	    rq_link->qelem->op_delay[0]=sim_cycle;
	    rq_link->qelem->op_clusters[0]=elem->inst_desc->clus_assigned;
	    if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	      issue_int_readyq_enqueue(rq_link);
	    }
	    else{
	      issue_fp_readyq_enqueue(rq_link);
	    }
	  }
	  dep_chain_pointer = dep_chain_pointer->next;
	}
	
	/* Instructions whose second operand depend on this result */
	dep_chain_pointer = map_rb[elem->inst_desc->r_buf_no].op2_deps;
	while (dep_chain_pointer != NULL){
	  dep_chain_pointer->qelem->op_ready[1] = READY;
	  dep_chain_pointer->qelem->optime[1] = sim_cycle;
	  if (OPERANDS_READY(dep_chain_pointer->qelem)){
	    rq_link->qelem = dep_chain_pointer->qelem;
	    rq_link->inum = 
	      map_rb[elem->inst_desc->r_buf_no].inst_desc->inum;
	    rq_link->qelem->op_delay[1]=sim_cycle;
	    rq_link->qelem->op_clusters[1]=elem->inst_desc->clus_assigned;
	    if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	      issue_int_readyq_enqueue(rq_link);
	    }
	    else{
	      issue_fp_readyq_enqueue(rq_link);
	    }
	  }
	  dep_chain_pointer = dep_chain_pointer->next;
	}
	
	/* Instructions whose third operand depend on this result (only
	 applicable to cmov instructions) */
	dep_chain_pointer = map_rb[elem->inst_desc->r_buf_no].cmovdeps;
	while (dep_chain_pointer != NULL){
	  dep_chain_pointer->qelem->cmov = FALSE;
	  if (dep_chain_pointer->qelem->inst_desc->dest_reg == INTEGER){
	    regs.regs_R[dep_chain_pointer->qelem->inst_desc->out_phy_reg] = 
	      regs.regs_R[elem->inst_desc->out_phy_reg];
	  }
	  else{
	    regs.regs_F.d[dep_chain_pointer->qelem->inst_desc->out_phy_reg] = 
	      regs.regs_F.d[elem->inst_desc->out_phy_reg];
	  }
	  if (OPERANDS_READY(dep_chain_pointer->qelem)){
	    rq_link->qelem = dep_chain_pointer->qelem;
	    rq_link->inum = 
	      map_rb[elem->inst_desc->r_buf_no].inst_desc->inum;
	    rq_link->qelem->op_delay[1]=sim_cycle;
	    rq_link->qelem->op_clusters[1]=elem->inst_desc->clus_assigned;
	    if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	      issue_int_readyq_enqueue(rq_link);
	    }
	    else{
	      issue_fp_readyq_enqueue(rq_link);
	    }
	  }
	  dep_chain_pointer = dep_chain_pointer->next;
	}
	
	if (elem->inst_desc->in_arch_regs[0] != DNA) {
	  map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	    cur_int_in1_mapping;
	  map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	    cur_fp_in1_mapping;
	}
	if (elem->inst_desc->in_arch_regs[1] != DNA) {
	  map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	    cur_int_in2_mapping;
	  map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	    cur_fp_in2_mapping;       
	}
	regs.regs_PC = saved_PC;
	regs.regs_NPC = saved_NPC;
      }
    }
    else if (map_rb[elem->inst_desc->r_buf_no].in_LQ == TRUE) { /* In LQ */
      /* BUGFIX 04/24/2004 - Start */
      /* Aamer Jaleel              */
      /* <ajaleel@umd.edu>         */
      int sqcnt;
      /* BUGFIX 04/24/2004 - End   */
      if (issue_lq[elem->inst_desc->lq_no].cachemiss ==  FALSE) {
	/* Execute the first time */
	if (issue_lq[elem->inst_desc->lq_no].tlbmiss == FALSE &&
	    issue_lq[elem->inst_desc->lq_no].mshrfull == FALSE) {
	  sim_total_refs++;
	  sim_total_loads++;
	  /* Save the current PC value and load the value of the PC of
	     this instruction into regs_PC */ 
	  MD_SET_OPCODE(issue_lq[elem->inst_desc->lq_no].inst_desc->op, 
			issue_lq[elem->inst_desc->lq_no].inst_desc->IR);
	  /* In case the input register mappings have changed, we need
	     to save the current mapping and put the 
	     current instruction mapping to ensure correct execution */
	  
	  /* Save the current mapping of the input integer registers*/
	  if (elem->inst_desc->in_arch_regs[0] != DNA) {
	    cur_int_in1_mapping = 
	      map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg;   
	    cur_fp_in1_mapping = 
	      map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg;   
	  }
	  if (elem->inst_desc->in_arch_regs[1] != DNA) {
	    cur_int_in2_mapping = 
	      map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg;   
	    cur_fp_in2_mapping = 
	      map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg;   
	  }
	  
	  /* Write the mapping at the time this instruction was mapped */
	  if (elem->inst_desc->in_arch_regs[0] != DNA) {
	    if (elem->inst_desc->src_reg1 == INTEGER)
	      map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg
		= elem->inst_desc->in_phy_regs[0]; 
	    else
	      map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg
		= elem->inst_desc->in_phy_regs[0]; 
	  }
	  if (elem->inst_desc->in_arch_regs[1] != DNA) {
	    if (elem->inst_desc->src_reg2 == INTEGER)
	      map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg
		= elem->inst_desc->in_phy_regs[1]; 
	    else
	      map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg
		= elem->inst_desc->in_phy_regs[1]; 
	  }            
	  
	  regs.regs_R[MD_REG_ZERO]=0;
	  regs.regs_F.d[MD_REG_ZERO]=0;
	  inst = issue_lq[elem->inst_desc->lq_no].inst_desc->IR;
	  op = issue_lq[elem->inst_desc->lq_no].inst_desc->op;
	  issue_lq[elem->inst_desc->lq_no].completed = TRUE;
	  switch (op) {
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,I1,I2,I3)	\
      case OP:	\
      CALC_ADDR(OP, issue_lq[elem->inst_desc->lq_no].addr, issue_lq[elem->inst_desc->lq_no].value);	\
      break;
#define DEFLINK(OP,MSK,NAME,MASK,SHIFT)	\
      case OP:	\
        panic("attempted to execute a linking opcode");
#define CONNECT(OP)

#include "alpha.def"
	  default:
	    panic("trying to execute bogus inst");
	  }
	  /* If addr is bogus, trap here. Otherwise send it to memory system. 
	   */
	  
	  if (MD_VALID_ADDR(issue_lq[elem->inst_desc->lq_no].addr)) { 
	    switch (op) {
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,I1,I2,I3)	\
      case OP:	\
      SYMCAT(OP,_IMPL);   \
      break;
#define DEFLINK(OP,MSK,NAME,MASK,SHIFT)	\
      case OP:	\
          op = MD_NOP_OP;						\
	  break;
#define CONNECT(OP)

#include "alpha.def"
	    default:
	      panic("trying to execute bogus inst");
	    }
	    if (elem->inst_desc->in_arch_regs[0] != DNA) {
	      map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg
		= cur_int_in1_mapping; 
	      map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg
		= cur_fp_in1_mapping; 
	    }
	    if (elem->inst_desc->in_arch_regs[1] != DNA) {
	      map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg
		= cur_int_in2_mapping; 
	      map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg
		= cur_fp_in2_mapping;        
	    }
	    /* Check for load/load replay trap */
	    if (load_replay_trap) {
	      int shift_size = 3;
              stat_lq_reads++;
	      i=(elem->inst_desc->lq_no+1)%issue_lq_nelem;
	      while (elem->inst_desc->lq_no != issue_lq_tail &&
		     i != issue_lq_tail ) {
		/* Change this linear search into a hash table search */
		/* Find shift size */
		shift_size = (issue_lq[i].size >
			      issue_lq[elem->inst_desc->lq_no].size)?
		  issue_lq[i].size:issue_lq[elem->inst_desc->lq_no].size;   
		if ((issue_lq[i].addr == 
                     issue_lq[elem->inst_desc->lq_no].addr) &&
		    (issue_lq[i].completed == TRUE)) {
                  /* BUGFIX 04/24/2004 - Start */
                  /* Aamer Jaleel              */
                  /* <ajaleel@umd.edu>         */
                  /* BUGFIX 04/24/2004 - End   */
		  map_rb[issue_lq[i].inst_desc->r_buf_no].replaytrap = TRUE;
		  map_rb[issue_lq[i].inst_desc->r_buf_no].completed = TRUE;
		  map_rb[issue_lq[i].inst_desc->r_buf_no].correctPC = 
		    issue_lq[i].inst_desc->regs_PC;
		  issue_lq[i].inst_desc->load_trap_penalty = 30;
		  wb_load_replaytrap++;
		  /* Replay trap */
		  break;
		}
		i = (i+1)%issue_lq_nelem;
	      }
	    }
            if( load_miss_load_replay_trap ) { //load_miss_load_replay trap ) {
	      int lqptr = issue_lq_head, lqcnt = issue_lq_num;
	      while( lqcnt && lqptr != elem->inst_desc->lq_no) {
		if( issue_lq[ lqptr ].completed   // and if the older load is completed
		    && issue_lq[ lqptr ].addr ==  (issue_lq[elem->inst_desc->lq_no].addr) // if the addresses are the same
		    && issue_lq[ lqptr ].cachemiss  // and it had a cache miss
		    && !map_rb[issue_lq[ lqptr ].inst_desc->r_buf_no].completed // and it hasn't been finished yet
		    ) {

		  map_rb[elem->inst_desc->r_buf_no].replaytrap = TRUE;
		  map_rb[elem->inst_desc->r_buf_no].completed = TRUE;
		  map_rb[elem->inst_desc->r_buf_no].correctPC = elem->inst_desc->regs_PC;
		  elem->inst_desc->load_trap_penalty = 30;
		  wb_load_miss_load_replaytrap++;
		}
		lqcnt = (lqptr == elem->inst_desc->lq_no) ? 0 : lqcnt-1;
		lqptr = (lqptr+1)%issue_lq_nelem;
	      }
	    }
	  }
	  else {
	    /* Should not have invalid address at the head of ROB */
            if (elem->inst_desc->r_buf_no == map_rb_head) {
              panic("Invalid address in non-speculative load For ROB[%d] LD: %d", map_rb_head, elem->inst_desc->inum);
            }
	    if (elem->inst_desc->in_arch_regs[0] != DNA) {
	      map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg
		= cur_int_in1_mapping; 
	      map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg
		= cur_fp_in1_mapping; 
	    }
	    if (elem->inst_desc->in_arch_regs[1] != DNA) {
	      map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg
		= cur_int_in2_mapping; 
	      map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg
		= cur_fp_in2_mapping;        
	    }
	    map_rb[elem->inst_desc->r_buf_no].completed = TRUE;
	    map_rb[elem->inst_desc->r_buf_no].replaytrap = TRUE;
	    map_rb[elem->inst_desc->r_buf_no].correctPC = 
	      elem->inst_desc->regs_PC;
	    continue;
	  }
	}


        issue_lq[elem->inst_desc->lq_no].exec_cycle = sim_cycle;
        /* 
	** START
	** BUG FIX:  08/2004 (Aamer Jaleel <ajaleel@umd.edu>)
	** Forwarding From Store Queue
	*/

	issue_lq[elem->inst_desc->lq_no].stqhit = FALSE;
 	i=commit_sq_head;
	sqcnt = commit_sq_num;
        stat_sq_reads++;
	while ( sqcnt ) { 
	  if ( commit_sq[i].inst_desc && 
               ((commit_sq[i].inst_desc->inum < elem->inst_desc->inum && 
                 (elem->inst_desc->inum - commit_sq[i].inst_desc->inum ) < 
                 map_rb_nelem) || 
                (elem->inst_desc->inum > commit_sq[i].inst_desc->inum && 
                 (elem->inst_desc->inum - commit_sq[i].inst_desc->inum) > 
                 map_rb_nelem))) { 

            if (commit_sq[i].completed == TRUE && 
                commit_sq[i].addr == issue_lq[elem->inst_desc->lq_no].addr ) {
              // the assumption here is that a load and store issued in the same cycle 
              // that have the same address match maybe not be able to fwd data
              // so trap, if u don't want to do that, disable the following macro
              if((commit_sq[i].exec_cycle == sim_cycle) ) {

                // cause a store trap for this instruction
                issue_lq[elem->inst_desc->lq_no].stqhit = FALSE;
                map_rb[elem->inst_desc->r_buf_no].replaytrap = TRUE;
                
                elem->inst_desc->load_trap_penalty = 30;
                wb_store_replaytrap++;
                map_rb[elem->inst_desc->r_buf_no].correctPC  = 
                  issue_lq[elem->inst_desc->lq_no].inst_desc->regs_PC;
                
                
                if (fetch_st_table_size) {
                  fetch_st_wait_table[(elem->inst_desc->regs_PC) & fetch_st_wait_mask] = (sim_cycle/ST_WAIT_CLR)+1; 
                }
              }
              else {

                /* Check if this is a different size trap */
                // OR IF IT IS A LDS, THEN CAUSE IT TO TRAP DUE TO FUNKY BIT WISE OPS IN LDS
                /*if((issue_lq[elem->inst_desc->lq_no].size > commit_sq[i].size) || 
                  (elem->inst_desc->op == LDS)) {*/
                /* Forwarding is done only if
                   1. load is not a LDS
                   2. Load and store sizes are the same
                   3. Load and store are to the same type of register */
                if ((elem->inst_desc->op == LDS) || 
                    (issue_lq[elem->inst_desc->lq_no].size 
                     != commit_sq[i].size) || 
                    (elem->inst_desc->dest_reg != 
                     commit_sq[i].inst_desc->reg_type[0])) {
                  issue_lq[elem->inst_desc->lq_no].stqhit = FALSE;
                  map_rb[elem->inst_desc->r_buf_no].replaytrap = TRUE;
                    
                  elem->inst_desc->load_trap_penalty = 30;
                  wb_diffsize_replaytrap++;
                  map_rb[elem->inst_desc->r_buf_no].correctPC  = issue_lq[elem->inst_desc->lq_no].inst_desc->regs_PC;
                  if (fetch_st_table_size) {
                    fetch_st_wait_table[(elem->inst_desc->regs_PC) & fetch_st_wait_mask] = (sim_cycle/ST_WAIT_CLR)+1; 
                  }                  
                }
                else {
                  quad_t f_result_fwd, i_result_fwd;
                  
                  wb_lsq_data_fwd++;
                  
                  inst = issue_lq[elem->inst_desc->lq_no].inst_desc->IR;
                  op = issue_lq[elem->inst_desc->lq_no].inst_desc->op;
		    
                  i_result_fwd = 
                    (issue_lq[elem->inst_desc->lq_no].size == commit_sq[i].size) ? 
                    commit_sq[i].value.i_result : 
                    ( commit_sq[i].value.i_result  
                      // take the Store's data... shift it...
                      >> (((issue_lq[elem->inst_desc->lq_no].addr & MASK_0to2)
                           - (commit_sq[i].addr & MASK_0to2))*8)
                      ) & SZ_MASK[issue_lq[elem->inst_desc->lq_no].size];
                  
                  f_result_fwd = 
                    (issue_lq[elem->inst_desc->lq_no].size == commit_sq[i].size) ? 
                    (quad_t) commit_sq[i].value.f_result : 
                    ( (quad_t) commit_sq[i].value.f_result      
                      // take the Store's data... shift it...
                      >> (((issue_lq[elem->inst_desc->lq_no].addr & MASK_0to2)
                           - (commit_sq[i].addr & MASK_0to2))*8)
                      ) & SZ_MASK[issue_lq[elem->inst_desc->lq_no].size];
                  
                  map_rb[elem->inst_desc->r_buf_no].replaytrap = FALSE;
                  issue_lq[elem->inst_desc->lq_no].stqhit = TRUE;
                  issue_lq[elem->inst_desc->lq_no].stqhitinum = 
                    commit_sq[i].inst_desc->inum;		    
                  
                  if( elem->inst_desc->dest_reg == INTEGER &&
                      elem->inst_desc->out_phy_reg != DNA ) {
                    
                    
                    if( commit_sq[i].inst_desc->reg_type[0] == INTEGER ) {
                      regs.regs_R[elem->inst_desc->out_phy_reg] =  i_result_fwd;
                    }
                    else {
                      regs.regs_R[elem->inst_desc->out_phy_reg] =  f_result_fwd;
                    }
                  }
                  else if( elem->inst_desc->dest_reg == FLOATINGPT &&
                           elem->inst_desc->out_phy_reg != DNA ) {
                    
                    if( commit_sq[i].inst_desc->reg_type[0] == INTEGER ) {
                      regs.regs_F.d[elem->inst_desc->out_phy_reg] = i_result_fwd;
                      regs.regs_F.q[elem->inst_desc->out_phy_reg] = i_result_fwd;
                    }
                    else {
                      regs.regs_F.d[elem->inst_desc->out_phy_reg] = f_result_fwd;
                      regs.regs_F.q[elem->inst_desc->out_phy_reg] = f_result_fwd;
                      
                    }
                  }
                  
                  load_lat = 3;
                  SQ_miss = FALSE;
                }
              }
            }
            /*else if( commit_sq[i].completed == TRUE 
                     && ( (issue_lq[elem->inst_desc->lq_no].addr 
                           >= commit_sq[i].addr && 
                           issue_lq[elem->inst_desc->lq_no].addr <= 
                           (commit_sq[i].addr + commit_sq[i].size + 1) )    
                          || ((issue_lq[elem->inst_desc->lq_no].addr + 
                               issue_lq[elem->inst_desc->lq_no].size + 1) 
                              >= commit_sq[i].addr 
                              && (issue_lq[elem->inst_desc->lq_no].addr + 
                                  issue_lq[elem->inst_desc->lq_no].size + 1) 
                              <= (commit_sq[i].addr + commit_sq[i].size + 1) )
                          || ((commit_sq[i].addr >= 
                               issue_lq[elem->inst_desc->lq_no].addr 
                               && (commit_sq[i].addr + commit_sq[i].size + 1) <= 
                               (issue_lq[elem->inst_desc->lq_no].addr + 
                               issue_lq[elem->inst_desc->lq_no].size + 1))))) {*/
            else if (commit_sq[i].completed == TRUE  &&
                     (issue_lq[elem->inst_desc->lq_no].addr >> 3) == 
                     (commit_sq[i].addr >> 3)) {
              
              issue_lq[elem->inst_desc->lq_no].stqhit = FALSE;
              map_rb[elem->inst_desc->r_buf_no].replaytrap = TRUE;
              
              elem->inst_desc->load_trap_penalty = 30;
              wb_diffsize_replaytrap++;
              map_rb[elem->inst_desc->r_buf_no].correctPC  = 
                issue_lq[elem->inst_desc->lq_no].inst_desc->regs_PC;
              if (fetch_st_table_size) {
                fetch_st_wait_table[(elem->inst_desc->regs_PC) & fetch_st_wait_mask] = (sim_cycle/ST_WAIT_CLR)+1; 
              }
            }
          }
	  i = (i+1)%commit_sq_nelem;	    
	  sqcnt--;
	}

	/* 
	** END
	*/

	/* Access memory for load inst */
	if (dcache && SQ_miss) {
	  cache_access_packet *c_packet;
	  if (issue_lq[elem->inst_desc->lq_no].tlbmiss == TRUE) {
	    c_packet = 
	      cache_create_access_packet(dcache, (Read |
						  Restarted_access), 
					 (issue_lq[elem->inst_desc->lq_no].addr & ~3), 
					 Virtual, 4, 
					 (void *)
					 (issue_lq+elem->inst_desc->lq_no),
					 (void *)cache_load_store_exec, 
					 (void *)valid_lq, (MSHR_STAMP_TYPE)
					 elem->inst_desc->inum);
	  }
	  else {
	    c_packet = 
	      cache_create_access_packet(dcache, (Read | Pipeline_access),
					 (issue_lq[elem->inst_desc->lq_no].addr & ~3),
					 Virtual, 4, 
					 (void *)
					 (issue_lq+elem->inst_desc->lq_no),
					 (void *)cache_load_store_exec, 
					 (void *)valid_lq, (MSHR_STAMP_TYPE)
					 elem->inst_desc->inum);
	  }
	  /* access the cache if non-faulting */
	  load_lat = cache_timing_access(sim_cycle, c_packet);
	  /* If we don't have a cache hit find out what 
	     it was and take appropriate action */
	  if (load_lat <= 0) {
	    if (load_lat == CACHE_MISS) {
	      issue_lq[elem->inst_desc->lq_no].cachemiss = TRUE;
	      issue_lq[elem->inst_desc->lq_no].tlbmiss = FALSE;
	      issue_lq[elem->inst_desc->lq_no].mshrfull = FALSE;
	    }
	    
	    else if ((load_lat == MSHRS_FULL) || (load_lat == TARGET_FULL)) {
	      issue_lq[elem->inst_desc->lq_no].mshrfull = TRUE;
	      issue_lq[elem->inst_desc->lq_no].tlbmiss = FALSE;
	    }
	    else if (load_lat == TLB_MISS) {
	      issue_lq[elem->inst_desc->lq_no].tlbmiss = TRUE;
	      issue_lq[elem->inst_desc->lq_no].mshrfull = FALSE;
	    }
	    else if (load_lat == TARGET_OCCUPIED) {
	      map_rb[issue_lq[elem->inst_desc->lq_no].inst_desc->r_buf_no].completed = TRUE;
	      map_rb[issue_lq[elem->inst_desc->lq_no].inst_desc->r_buf_no].correctPC = 
		issue_lq[elem->inst_desc->lq_no].inst_desc->regs_PC;
	      map_rb[map_rb_head].replaytrap = TRUE;
	      map_rb[map_rb_head].completed = TRUE;
	      map_rb[map_rb_head].inst_desc->load_trap_penalty = 29;
	    }
	    else
	      assert (load_lat == BAD_ADDRESS);
	  }
	}
	else {
	  /* no caches defined, just use op latency */
	  /* NOTE: If you want to have no caches and have these
	     loads go directly to some main memory bank, you
	     will have to add a call to mem_bank_access() here */
	  load_lat = 3;
	}
	if (CACHE_HIT(load_lat)) {
	  /* use computed cache access latency */
	  /* If using load use speculation, wake up dependent instructions now,
	     so that they will be in the execute stage when the data comes back
	  */
	  if (wb_load_use_speculation && load_lat > 1) {
	    /* call wakeup after two cycles to fake load use speculation */
	    if (elem->inst_desc->dest_reg == INTEGER) 
	      eventq_queue_callback(sim_cycle+load_lat-1, 
				    (void *)writeback_wakeup, 
				    elem->inst_desc->lq_no);
	    else
	      eventq_queue_callback(sim_cycle+load_lat, 
				    (void *)writeback_wakeup, 
				    elem->inst_desc->lq_no);
	  }
	  /* Floating point load latency is one more than integer */
	  if (elem->inst_desc->dest_reg == INTEGER)
	    eventq_queue_callback2(sim_cycle+load_lat, 
				   (void *)writeback_exec_loadstore, 
				   elem->inst_desc->lq_no, LQ);
	  else
	    eventq_queue_callback2(sim_cycle+load_lat+1, 
				   (void *)writeback_exec_loadstore, 
				   elem->inst_desc->lq_no, LQ);
	}
	/* If we sent a bad (misspeculated) address to the cache,
	 * just treat it like a cache hit (for now). */
	else if (load_lat == BAD_ADDRESS) {
	  eventq_queue_callback2(sim_cycle+dcache->hit_latency, 
				 (void *)writeback_exec_loadstore, 
				 elem->inst_desc->lq_no, LQ);
	}
      }
      else {
	writeback_exec_loadstore(sim_cycle, elem->inst_desc->lq_no, LQ);
      }
    }
    else { /* In SQ */
      /* We do only the address computation here. The store is executed in 
	 commit when we send it to the memory system */
      sim_total_refs++;
      MD_SET_OPCODE(commit_sq[elem->inst_desc->sq_no].inst_desc->op, 
		      commit_sq[elem->inst_desc->sq_no].inst_desc->IR);
      /* In case the input register mappings have changed, we need 
	 to save the current mapping and put the
	 current instruction mapping to ensure correct execution */
      
      /* Save the current mapping of the input integer registers*/
      if (elem->inst_desc->in_arch_regs[0] != DNA) {
	cur_int_in1_mapping = 
	  map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg;   
	cur_fp_in1_mapping = 
	  map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg;   
      }
      if (elem->inst_desc->in_arch_regs[1] != DNA) {
	cur_int_in2_mapping = 
	  map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg;   
	cur_fp_in2_mapping = 
	  map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg;   
      }
      
      
      /* Write the mapping at the time this instruction was mapped */
      if (elem->inst_desc->in_arch_regs[0] != DNA) {
	map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	  elem->inst_desc->in_phy_regs[0];
	map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	  elem->inst_desc->in_phy_regs[0];
      }
      if (elem->inst_desc->in_arch_regs[1] != DNA) {
	map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	  elem->inst_desc->in_phy_regs[1];
	map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	  elem->inst_desc->in_phy_regs[1];
      }            
      
      regs.regs_R[MD_REG_ZERO]=0;
      regs.regs_F.d[MD_REG_ZERO] = 0;
      inst = commit_sq[elem->inst_desc->sq_no].inst_desc->IR;
      op = commit_sq[elem->inst_desc->sq_no].inst_desc->op;
      switch (op) {
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,I1,I2,I3)	\
      case OP:	\
      CALC_ADDR(OP, commit_sq[elem->inst_desc->sq_no].addr, commit_sq[elem->inst_desc->sq_no].value);	\
      break;
#define DEFLINK(OP,MSK,NAME,MASK,SHIFT)	\
      case OP:	\
        panic("attempted to execute a linking opcode");
#define CONNECT(OP)

#include "alpha.def"
      default:
	  panic("trying to execute bogus inst");
      }
      if (elem->inst_desc->in_arch_regs[0] != DNA) {
	map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	  cur_int_in1_mapping;
	map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	  cur_fp_in1_mapping;
      }
      if (elem->inst_desc->in_arch_regs[1] != DNA) {
	map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	  cur_int_in2_mapping;
	map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	  cur_fp_in2_mapping;       
      }
      map_rb[elem->inst_desc->r_buf_no].completed = TRUE;
      commit_sq[elem->inst_desc->sq_no].completed = TRUE;
      commit_sq[elem->inst_desc->sq_no].exec_cycle = sim_cycle;


      /**** START ****/
      /* check for store replay trap */
      /* 
      ** Aamer Jaleel <ajaleel@umd.edu>
      **
      ** BUG FIX - 08/2004 
      ** 
      ** Store checks the LDQ after it's effective address computation to
      ** see if any LDs newer than it were issued before it did.  In this
      ** case they have incorrect data so flag them with a store replay
      ** trap
      */ 
      
      {
	int i, j;
	int lqsq_no = elem->inst_desc->sq_no;
        stat_lq_reads++;
	i=issue_lq_head;
	j = issue_lq_num;
        
	while (j > 0) { 
	  if ((commit_sq[lqsq_no].inst_desc->inum < issue_lq[i].inst_desc->inum 
	       && (issue_lq[i].inst_desc->inum - commit_sq[lqsq_no].inst_desc->inum) < 
               map_rb_nelem )
	      || (commit_sq[lqsq_no].inst_desc->inum > issue_lq[i].inst_desc->inum 
		  && (commit_sq[lqsq_no].inst_desc->inum - issue_lq[i].inst_desc->inum)
                  > map_rb_nelem)) {
            if ( issue_lq[i].completed == TRUE) {
              /*
              // has the load completed?
                 && ( (issue_lq[i].addr >= commit_sq[lqsq_no].addr && 
                       issue_lq[i].addr <= 
                       (commit_sq[lqsq_no].addr + 
                        commit_sq[lqsq_no].size + 1) )    
                      // is the load start/end in the range of the store?
                      || ((issue_lq[i].addr + issue_lq[i].size + 1) >= 
                          commit_sq[lqsq_no].addr && 
                          (issue_lq[i].addr + issue_lq[i].size + 1) <= 
                          (commit_sq[lqsq_no].addr + commit_sq[lqsq_no].size + 1) )
                      || ((commit_sq[lqsq_no].addr >= issue_lq[i].addr && 
                           (commit_sq[lqsq_no].addr + 
                            commit_sq[lqsq_no].size + 1) <= 
                            (issue_lq[i].addr + issue_lq[i].size + 1))))) {*/
              if (issue_lq[i].addr == commit_sq[lqsq_no].addr && 
                  ( !issue_lq[i].stqhit || (issue_lq[i].stqhit && (issue_lq[i].stqhitinum < commit_sq[lqsq_no].inst_desc->inum)) )) {
                
                /* Replay trap */
                map_rb[issue_lq[i].inst_desc->r_buf_no].replaytrap = TRUE;
                map_rb[issue_lq[i].inst_desc->r_buf_no].correctPC  = issue_lq[i].inst_desc->regs_PC;
                wb_store_replaytrap++;
                
                issue_lq[i].inst_desc->load_trap_penalty =30;
                if (fetch_st_table_size) {
                  fetch_st_wait_table[(issue_lq[i].inst_desc->regs_PC) & fetch_st_wait_mask] = (sim_cycle/ST_WAIT_CLR)+1; 
                }
                
                break;
              }
              else if (((issue_lq[i].addr) >> 3  == (commit_sq[lqsq_no].addr >> 3))) {
                /* Replay trap */
                map_rb[issue_lq[i].inst_desc->r_buf_no].replaytrap = TRUE;
                map_rb[issue_lq[i].inst_desc->r_buf_no].correctPC  = issue_lq[i].inst_desc->regs_PC;
                wb_store_replaytrap++;
                
                issue_lq[i].inst_desc->load_trap_penalty =30;
                if (fetch_st_table_size) {
                  fetch_st_wait_table[(issue_lq[i].inst_desc->regs_PC) & fetch_st_wait_mask] = (sim_cycle/ST_WAIT_CLR)+1; 
                }
                
                break;
              }
            }
/*else {
       }*/
          }
	  j--;
	  i = (i+1)%issue_lq_nelem;
	}

      }
      /**** END -aj ****/
    }
  }
}

/* BUGFIX 09/08/2003 - Start */
int valid_lq(struct load_store_queue *lq,unsigned long tag) {
  return lq->tag == tag;
}
/* BUGFIX 09/08/2003 - End */

/* Used for implementing load use speculation. Loads which hit in the cache
   wake up their dependent instructions in this function */
void writeback_wakeup(tick_t now, int lqsq_no) {
  static struct dep_chain *dep_chain_pointer;
  /* Wake up instructions whose first operand depends on this load */
  while (map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps != NULL) {
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->qelem->op_ready[0]
      = READY;
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->qelem->optime[0] 
      = sim_cycle;
    if (OPERANDS_READY(map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->qelem)){
      rq_link->qelem = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->qelem;
      rq_link->inum = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].inst_desc->inum;
      rq_link->qelem->op_delay[0]=0;
      rq_link->qelem->op_clusters[0]=
	issue_lq[lqsq_no].inst_desc->clus_assigned;
      if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	issue_int_readyq_enqueue(rq_link);
      }
      else{
	issue_fp_readyq_enqueue(rq_link); 
      }
    }
    dep_chain_pointer = map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps;
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps = 
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->next;
    return_to_free_list(dep_chain_pointer);
  }
  
  /* Wake up instructions whose second operand depends on this load */
  while (map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps != NULL) {
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->qelem->op_ready[1] 
      = READY;
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->qelem->optime[1] = 
      sim_cycle;
    if (OPERANDS_READY(map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->qelem)){
      rq_link->qelem = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->qelem;
      rq_link->inum = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].inst_desc->inum;
      rq_link->qelem->op_delay[1]=0;
      rq_link->qelem->op_clusters[1]=
	issue_lq[lqsq_no].inst_desc->clus_assigned;
      if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	issue_int_readyq_enqueue(rq_link);
      }
      else{
	issue_fp_readyq_enqueue(rq_link); 
      }
    }
    dep_chain_pointer = map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps;
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps = 
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->next;
    return_to_free_list(dep_chain_pointer);
  }
  
  /* Wake up instructions whose third operand depends on this load (only 
   applicable to cmov instructions ) */
  while (map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps != NULL) {
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem->cmov = 
      FALSE;
    if (map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem->inst_desc->dest_reg == INTEGER){
	regs.regs_R[map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem->inst_desc->out_phy_reg] = 
	  regs.regs_R[issue_lq[lqsq_no].inst_desc->out_phy_reg];
    }
    else{
      regs.regs_F.d[map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem->inst_desc->out_phy_reg] = 
	regs.regs_F.d[issue_lq[lqsq_no].inst_desc->out_phy_reg];
    }
    if (OPERANDS_READY(map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem)){
      rq_link->qelem = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem;
      rq_link->inum = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].inst_desc->inum;
      rq_link->qelem->op_delay[0]=0;
      rq_link->qelem->op_clusters[0]=
	issue_lq[lqsq_no].inst_desc->clus_assigned;
      if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	issue_int_readyq_enqueue(rq_link);
      }
      else{
	issue_fp_readyq_enqueue(rq_link); 
      }
    }
    dep_chain_pointer = map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps;
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps = 
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->next;
    return_to_free_list(dep_chain_pointer);
  }
}

/* Execute the load/store instruction */
void writeback_exec_loadstore(tick_t now, int lqsq_no, int lq_or_sq) {
  register enum md_opcode op;
  
  /* stores current mapping of	the input integer architectural	registers */
  static int cur_int_in1_mapping;
  static int cur_int_in2_mapping; 
  
  /* stores current mapping of	the fp integer architectural	registers */
  static int cur_fp_in1_mapping;
  static int cur_fp_in2_mapping; 
  
  static md_inst_t inst;
  static md_addr_t saved_PC;
  static md_addr_t saved_NPC;
  static struct dep_chain *dep_chain_pointer;
  static md_addr_t addr;
  static byte_t temp_byte;
  static half_t temp_half;
  static word_t temp_word;
  static quad_t temp_quad;
  static int spec_mode=0;
  int i,j=0;
    
  /* service completed events */
  assert (lqsq_no >= 0);
  assert (lq_or_sq == LQ || lq_or_sq ==SQ);
  
  /* If this a valid returning load */
  if (lq_or_sq == LQ &&  
      issue_lq[lqsq_no].tag == issue_lq[lqsq_no].inst_desc->inum) {   
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].completed=TRUE;
    map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].mispredict=FALSE;
    /* Update register stats */
    if (issue_lq[lqsq_no].inst_desc->dest_reg == INTEGER && 
        issue_lq[lqsq_no].inst_desc->out_phy_reg != DNA) {
      stat_int_reg_writes++;
      stat_int_reg_writes++;
    } 
    else if (issue_lq[lqsq_no].inst_desc->dest_reg == FLOATINGPT && 
             issue_lq[lqsq_no].inst_desc->out_phy_reg != DNA) {
      stat_fp_reg_writes++;
      stat_fp_reg_writes++;
    }
    /* Walk the output dependency chain, and insert instructions in
       the ready queue (done if we disable load use speculation ) */ 
    while (map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps != NULL){
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->qelem->op_ready[0] = READY;
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->qelem->optime[0]
	= sim_cycle;
      if (OPERANDS_READY(map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->qelem)){
	rq_link->qelem = 
	  map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->qelem;
	rq_link->inum = 
	  map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].inst_desc->inum;
	rq_link->qelem->op_delay[0]=sim_cycle;
	rq_link->qelem->op_clusters[0] = 
	  issue_lq[lqsq_no].inst_desc->clus_assigned;
	if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	  issue_int_readyq_enqueue(rq_link);
	}
	else{
	  issue_fp_readyq_enqueue(rq_link);
	}
      }
      dep_chain_pointer = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps;
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op1_deps->next;
      return_to_free_list(dep_chain_pointer);
    }
    
    while (map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps != NULL){
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->qelem->op_ready[1] = READY;
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->qelem->optime[1]
	= sim_cycle;
      if (OPERANDS_READY(map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->qelem)){
	rq_link->qelem = 
	  map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->qelem;
	rq_link->inum = 
	  map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].inst_desc->inum;
	rq_link->qelem->op_delay[1]=sim_cycle;
	rq_link->qelem->op_clusters[1]=
	  issue_lq[lqsq_no].inst_desc->clus_assigned;
	if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	  issue_int_readyq_enqueue(rq_link);
	}
	else{
	  issue_fp_readyq_enqueue(rq_link);
	}
      }
      dep_chain_pointer = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps;
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].op2_deps->next;
      return_to_free_list(dep_chain_pointer);
    }
    while (map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps != NULL) {
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem->cmov = 
	FALSE;
      if (map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem->inst_desc->dest_reg == INTEGER){
	regs.regs_R[map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem->inst_desc->out_phy_reg] = 
	  regs.regs_R[issue_lq[lqsq_no].inst_desc->out_phy_reg];
      }
      else{
	regs.regs_F.d[map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem->inst_desc->out_phy_reg] = 
	  regs.regs_F.d[issue_lq[lqsq_no].inst_desc->out_phy_reg];
      }
      if (OPERANDS_READY(map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem)){
	rq_link->qelem = 
	  map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->qelem;
	rq_link->inum = 
	  map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].inst_desc->inum;
	rq_link->qelem->op_delay[0]=0;
	rq_link->qelem->op_clusters[0]=
	  issue_lq[lqsq_no].inst_desc->clus_assigned;
	if (rq_link->qelem->inst_desc->dest_reg == INTEGER){
	  issue_int_readyq_enqueue(rq_link);
	}
	else{
	  issue_fp_readyq_enqueue(rq_link); 
	}
      }
      dep_chain_pointer = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps;
      map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps = 
	map_rb[issue_lq[lqsq_no].inst_desc->r_buf_no].cmovdeps->next;
      return_to_free_list(dep_chain_pointer);
    }
    if (issue_lq[lqsq_no].inst_desc->dest_reg == INTEGER && 
	issue_lq[lqsq_no].inst_desc->out_phy_reg != DNA) {
      regs.rcomplete[issue_lq[lqsq_no].inst_desc->out_phy_reg] = TRUE;
    } 
    else if (issue_lq[lqsq_no].inst_desc->dest_reg == FLOATINGPT && 
	     issue_lq[lqsq_no].inst_desc->out_phy_reg != DNA) {
      regs.fcomplete[issue_lq[lqsq_no].inst_desc->out_phy_reg] = TRUE;
    }
  }
  
  /* Else if this is a store */

  else if (lq_or_sq == SQ 
    && commit_sq[lqsq_no].tag == commit_sq[lqsq_no].inst_desc->inum) {
    /* Store instruction */

    saved_PC = regs.regs_PC;
    saved_NPC = regs.regs_NPC;
    elem = temp_elem;
    elem->inst_desc = commit_sq[lqsq_no].inst_desc;
    regs.regs_PC = commit_sq[lqsq_no].inst_desc->regs_PC;
    regs.regs_NPC = commit_sq[lqsq_no].inst_desc->regs_NPC;
    /* In case the input register mappings have changed, we need to
       save the current mapping and put the 
       current instruction mapping to ensure correct execution */
      
    /* Save the current mapping of the input integer registers*/
    if (elem->inst_desc->in_arch_regs[0] != DNA) {
      cur_int_in1_mapping = 
	map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg;   
      cur_fp_in1_mapping = 
	map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg;   
    }
    if (elem->inst_desc->in_arch_regs[1] != DNA) {
      cur_int_in2_mapping = 
	map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg;   
      cur_fp_in2_mapping = 
	map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg;   
    }
    
    /* Write the mapping at the time this instruction was mapped */
    if (elem->inst_desc->in_arch_regs[0] != DNA) {
      if (elem->inst_desc->src_reg1 == INTEGER)
	map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	  elem->inst_desc->in_phy_regs[0];
      else
	map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg = 
	  elem->inst_desc->in_phy_regs[0];
    }
    if (elem->inst_desc->in_arch_regs[1] != DNA) {
      if (elem->inst_desc->src_reg2 == INTEGER)
	map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	  elem->inst_desc->in_phy_regs[1];
      else
	map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg = 
	  elem->inst_desc->in_phy_regs[1];
    }
    regs.regs_R[MD_REG_ZERO]=0;
    regs.regs_F.d[MD_REG_ZERO]=0;
    
    MD_SET_OPCODE(commit_sq[lqsq_no].inst_desc->op, 
		  commit_sq[lqsq_no].inst_desc->IR);
    /* Save the current PC value and load the value of the PC of this
       instruction into regs_PC */ 
    inst = commit_sq[lqsq_no].inst_desc->IR;
    op = commit_sq[lqsq_no].inst_desc->op;
    switch (op) {
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,I1,I2,I3)	\
      case OP:	\
      SYMCAT(OP,_IMPL);  \
      break;
#define DEFLINK(OP,MSK,NAME,MASK,SHIFT)	\
      case OP:	\
        /* could speculatively decode a bogus inst, convert to NOP */	\
	  op = MD_NOP_OP;						\
	  /* compute output/input dependencies to out1-2 and in1-3 */	\
	  /* no EXPR */							\
	  break;
#define CONNECT(OP)

#include "alpha.def"
    default:
	panic("trying to execute bogus inst");
    }

#if 0
    /* Check for store replay trap */
    i=issue_lq_head;
    j = issue_lq_num;
    while (j > 0) { 
      if ((commit_sq[lqsq_no].inst_desc->inum < issue_lq[i].inst_desc->inum 
	   && (issue_lq[i].inst_desc->inum -
	       commit_sq[lqsq_no].inst_desc->inum) 
	   < map_rb_nelem)  
	  || (commit_sq[lqsq_no].inst_desc->inum > issue_lq[i].inst_desc->inum 
	      && (commit_sq[lqsq_no].inst_desc->inum -
		  issue_lq[i].inst_desc->inum) > map_rb_nelem)) 
	/* Change this linear search into a hash table search */
	if ((issue_lq[i].addr >> 3)
	    == (commit_sq[lqsq_no].addr >> 3) && 
	    issue_lq[i].completed == TRUE) {
          /*if (issue_lq[i].stqhit == FALSE || 
              ((issue_lq[i].stqhit == TRUE) && 
              (issue_lq[i].stqhitinum !=commit_sq[lqsq_no].inst_desc->inum))) {*/
            map_rb[issue_lq[i].inst_desc->r_buf_no].replaytrap = TRUE;
            map_rb[issue_lq[i].inst_desc->r_buf_no].correctPC = 
              issue_lq[i].inst_desc->regs_PC;
            wb_store_replaytrap++;
            issue_lq[i].inst_desc->load_trap_penalty =30;
            if (fetch_st_table_size) {
              fetch_st_wait_table[(issue_lq[i].inst_desc->regs_PC) &
                                  fetch_st_wait_mask] =
                (sim_cycle/ST_WAIT_CLR)+1; 
            }
            /* Replay trap */
            break;
          }
      //}
      j--;
      i = (i+1)%issue_lq_nelem;
    }
#endif

    map_rb[commit_sq[lqsq_no].inst_desc->r_buf_no].mispredict=FALSE;
    if (elem->inst_desc->in_arch_regs[0] != DNA) {
      map_ir_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg =
	cur_int_in1_mapping; 
      map_fr_mapping[elem->inst_desc->in_arch_regs[0]].phy_reg =
	cur_fp_in1_mapping; 
    }
    if (elem->inst_desc->in_arch_regs[1] != DNA) {
      map_ir_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg =
	cur_int_in2_mapping; 
      map_fr_mapping[elem->inst_desc->in_arch_regs[1]].phy_reg =
	cur_fp_in2_mapping;        
    }
    regs.regs_PC = saved_PC;
    regs.regs_NPC = saved_NPC;
  }
}
