/*
 * slot.c - slot stage implementation
 *
 * This file is part of the Alpha simulator tool suite written by
 * Raj Desikan as part of the Bullseye project.
 * Copyright (C) 1999 by Raj Desikan
 *
 * This source file is distributed "as is" in the hope that it will be
 * useful.  The tool set comes with no warranty, and no author or
 * distributor accepts any responsibility for the consequences of its
 * use. 
 * 
 * Everyone is granted permission to copy, modify and redistribute
 * this tool set under the following conditions:
 * 
 *    This source code is distributed for non-commercial use only. 
 *    Please contact the maintainer for restrictions applying to 
 *    commercial use.
 *
 *    Permission is granted to anyone to make or distribute copies
 *    of this source code, either as received or modified, in any
 *    medium, provided that all copyright notices, permission and
 *    nonwarranty notices are preserved, and that the distributor
 *    grants the recipient permission for further redistribution as
 *    permitted by this document.
 *
 *    Permission is granted to distribute this file in compiled
 *    or executable form under the same conditions that apply for
 *    source code, provided that either:
 *
 *    A. it is accompanied by the corresponding machine-readable
 *       source code,
 *    B. it is accompanied by a written offer, with no time limit,
 *       to give anyone a machine-readable copy of the corresponding
 *       source code in return for reimbursement of the cost of
 *       distribution.  This written offer must permit verbatim
 *       duplication by anyone, or
 *    C. it is distributed by someone who received only the
 *       executable form, and is accompanied by a copy of the
 *       written offer of source code that they received concurrently.
 *
 * In other words, you are welcome to use, share and improve this
 * source file.  You are forbidden to forbid anyone else to use, share
 * and improve what you give them.
 *
 *
 *
*/

#include <stdio.h> 
#include <stdlib.h>
#include <math.h> 
#include "alpha.h"
#include "memory.h"
#include "syscall.h"
#include "regs.h" 
#include "cache.h"
#include "bpred.h"
#include "fetch.h"
#include "slot.h"
#include "issue.h"
#include "writeback.h"
#include "map.h"
#include "stats.h"
#include "eventq.h"

/* Number of bits required to encode slotting of instruction. This is dependent on the slotting alogorithm used.*/
int slot_clus_bits = 2;

/* Slotting algorithm to use */
int slot_algo = 0;

/*slot map interface*/
struct slot_latch *SL;
int slot_latch_head, slot_latch_tail;
int slot_latch_num;

/* slotting width */
int slot_width;

int last_branch = 1;

/* Initial value of line predictor */
int line_pred_ini_value;

/* Whether the line predictor needs to be updated speculatively */
int line_pred_spec_update;

/* If the branch predictor contains an adder for computing target addresses */ 
int slot_adder;

/* Whether to use static slotting into upper and lower subclusters*/
int static_slotting;


/* Maps instruction class to slot class. This table was a pain to build.*/
int slot_instclass2slotclass[][256] = {{
  ULUL,			/*EEEE*/
  ULUL,			/*EEEL*/
  ULLU,			/*EEEU*/
  SLOT_CLASS_INVALID,
  ULLU,			/*EELE*/
  UULL,			/*EELL*/
  ULLU,			/*EELU*/
  SLOT_CLASS_INVALID,
  ULUL,			/*EEUE*/
  ULUL,			/*EEUL*/
  LLUU,			/*EEUU*/
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  ULUL,			/*ELEE*/
  ULUL,			/*ELEL*/
  ULLU,			/*ELEU*/
  SLOT_CLASS_INVALID,
  ULLU,			/*ELLE*/
  ULLL,			/*ELLL*/
  ULLU,			/*ELLU*/
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  LULU,			/*EUEE*/
  LUUL,			/*EUEL*/
  LULU,			/*EUEU*/
  SLOT_CLASS_INVALID,
  LULU,			/*EULE*/
  UULL,			/*EULL*/
  LULU,			/*EULU*/
  SLOT_CLASS_INVALID,
  LUUL,			/*EUUE*/
  LUUL,			/*EUUL*/
  LUUU,			/*EUUU*/
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  LULU,			/*LEEE*/
  LUUL,			/*LEEL*/
  LULU,			/*LEEU*/
  SLOT_CLASS_INVALID,
  LULU,			/*LELE*/
  LULL,			/*LELL*/
  LULU,			/*LELU*/
  SLOT_CLASS_INVALID,
  LUUL,			/*LEUE*/
  LUUL,			/*LEUL*/
  LLUU,			/*LEUU*/
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  LLUU,			/*LLEE*/
  LLUL,			/*LLEL*/
  LLUU,			/*LLEU*/
  SLOT_CLASS_INVALID,
  LLLU,			/*LLLE*/
  LLLL,			/*LLLL*/
  LLLU,			/*LLLU*/
  SLOT_CLASS_INVALID,
  LLUU,			/*LLUE*/
  LLUL,			/*LLUL*/
  LLUU,			/*LLUU*/
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  ULUL,			/*UEEE*/
  ULUL,			/*UEEL*/
  ULLU,			/*UEEU*/
  SLOT_CLASS_INVALID,
  ULLU,			/*UELE*/
  UULL,			/*UELL*/
  ULLU,			/*UELU*/
  SLOT_CLASS_INVALID,
  ULUL,			/*UEUE*/
  ULUL,                 /*UEUL*/
  ULUU,                 /*UEUU*/
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  ULUL,			/*ULEE*/
  ULUL,			/*ULEL*/
  ULLU,			/*ULEU*/
  SLOT_CLASS_INVALID,
  ULLU,			/*ULLE*/
  ULLL,			/*ULLL*/
  ULLU,			/*ULLU*/
  SLOT_CLASS_INVALID,
  ULUL,			/*ULUE*/
  ULUL,			/*ULUL*/
  ULUU,			/*ULUU*/
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  UULL,			/*UUEE*/
  UULL,			/*UUEL*/
  UULU,			/*UUEU*/
  SLOT_CLASS_INVALID,
  UULL,			/*UULE*/
  UULL,			/*UULL*/
  UULU,			/*UULU*/
  SLOT_CLASS_INVALID,
  UUUL,			/*UUUE*/
  UUUL,			/*UUUL*/
  UUUU,			/*UUUU*/
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
  SLOT_CLASS_INVALID,
}};

/* Function prototypes */

enum sub_cluster_type slot_sub_clus_req(enum alpha_fu_class);

/*Find the functional unit (subcluster) requirements for this instruction*/

enum sub_cluster_type 
slot_sub_clus_req(enum md_opcode op) {
  enum sub_cluster_type clus_req;
  enum alpha_fu_class fu_req = ALPHA_OP_FUCLASS(op);
  
  if (fu_req == IntALU && 
      (MD_IS_RETURN(op) || MD_IS_BRANCH(op) || MD_IS_INDIR(op) || 
       MD_IS_ITOF(op) || MD_IS_FTOI(op)))
    clus_req = LOWER;
  else if (fu_req == IntALU && 
	   ((MD_OP_FLAGS(op) & (F_CTRL|F_COND)) == (F_CTRL|F_COND)|| 
	    MD_IS_SHIFT(op))) 
    clus_req = UPPER;
  else if(fu_req == IntALU || fu_req == IntADD || fu_req == FUClamd_NA){
    clus_req = EITHER;
  }
  else if(fu_req == RdPort || fu_req == WrPort){
    clus_req = LOWER;
  }
  else if(fu_req == IntMULT){
    clus_req = UPPER;
  }
  else if(fu_req == IntSPE){
    clus_req = UPPER;
  }
  else if(fu_req == FloatMULT){
    clus_req = EITHER;
  }
  else if(fu_req == FloatCMP || fu_req == FloatCVT || fu_req == FloatDIV || fu_req == FloatADD ||  
	  fu_req == FloatSQRT || fu_req == IntDIV){
    clus_req = EITHER;
  }
  else
    panic("Inst requires unknown Sub-cluster");
  return clus_req;
}


void 
slot_stage(void){
  int i=0, n_slotted=0;
  static md_inst_t inst;
  static md_addr_t target_addr;
  static enum md_opcode op;
  /* This variable is set to true, if the branch predictor can compute 
     the address of a jump using the adder */
  int jump_inst = FALSE;
  
  /* position of slot latch tail when we enter slot */
  int first_slot = slot_latch_tail;
  /*Instruction mix is stored in the variable inst_mix. 
    Two bits are used to encode one instruction as U, L, or E. 
    This scheme limits the toal functional units to 4 
    and the maximum slotting width to 32(64/2). */
  int inst_mix=0;	
  
  static enum alpha_slot_class slot_class;
  /* Slot cannot predict beyond a branch per cycle. This is set to
     TRUE if we have processed a branch this cycle. slot will return
     if it encounters another branch this cycle */
  int branch_inst=FALSE;
  
  /* Set to true if the inst through this pass of the while loop is a
     branch. This is used to detect if the line predictor has
     predicted non sequential fetch even though there are no control
     instructions in the packet */
  int cur_branch;


  /* While number of slotted inst is less than the slot width and
     there are inst in the fetch queue */
  while (slot_latch_num < slot_width && fetch_num >0 ){
    /* This variable is set to true if the current instruction is a
       control instruction. This is to ensure that we fetch
       sequentially in the absence of control instructions. */
    cur_branch = FALSE;
    op = fetch_data[fetch_head].inst_desc->op;
    inst_mix |= (int )SLOT_SUB_CLUS_REQ(op,n_slotted);
    n_slotted++;
    
    /* Access branch predictor tables and check against the line predictor */
    if (pred && (MD_OP_FLAGS(op) & F_CTRL)){
      /* The 21264 can process only one branch per cycle. If we have
	 already processed one branch this cycle, return  */
      if (branch_inst == TRUE) {
	n_slotted--;
	break;
      }
      inst = fetch_data[fetch_head].inst_desc->IR;
      
      SL[slot_latch_tail].inst_desc =
	fetch_data[fetch_head].inst_desc;
                  
      SL[slot_latch_tail].inst_desc->iflag = 
	MD_OP_FLAGS(SL[slot_latch_tail].inst_desc->op);
      slot_latch_num++;
      fetch_num--;
      branch_inst = TRUE;
      cur_branch = TRUE;
      
      /* If this is a branch whose target can be computed using adder, 
	 and the adder feature is enabled, compute it. */
#ifdef FLEXIBLE_SIM
      if (slot_adder && !MD_IS_INDIR(op)) {
#else  
      if (!MD_IS_INDIR(op)) {
#endif 

	target_addr = 
	  (SL[slot_latch_tail].inst_desc->regs_PC + (SEXT21(TARG) << 2) + 4);
	jump_inst = TRUE;
      }
      else 
	target_addr = SL[slot_latch_tail].inst_desc->lpred_PC;
      
      /* Access branch pred in bpred.c otherwise */
      SL[slot_latch_tail].inst_desc->bpred_PC = 
	bpred_lookup(pred,
		     /* branch address */ 
		     SL[slot_latch_tail].inst_desc->regs_PC,
		     /* target address */ target_addr,
		     /* opcode */op,
		     /* call? */MD_IS_CALL(op),
		     /* return? */MD_IS_RETURN(op),
		     /* updt */&SL[slot_latch_tail].dir_update,
		     /* Instruction descriptor */
		     SL[slot_latch_tail].inst_desc);
           
      /* If we have a valid bpred PC */
      if (SL[slot_latch_tail].inst_desc->bpred_PC > 1) {
	/* Check against line predictor prediction */
	/* If the next PC doesn't match the bpred PC and there is no
	   line predictor or the instruction is a conditional branch
	   or return (indirect jmps are not handled by the branch
	   predictor and go with the line predictor prediction) */
	if (SL[slot_latch_tail].inst_desc->regs_NPC != 
	    SL[slot_latch_tail].inst_desc->bpred_PC && 
	    (!line_predictor || (!MD_IS_CALL(op) && 
				 (!MD_IS_INDIR(op) ||
				  MD_IS_RETURN(op)) && 
				 !MD_IS_BRANCH(op)))) {
	  regs.regs_PC = SL[slot_latch_tail].inst_desc->bpred_PC;
	  /* Speculatively update the line predictor provided 
	   a) there is a line predictor
	   b) hysterisis bits < 2 */
#ifdef FLEXIBLE_SIM
	  if (line_pred_spec_update && 
	      line_predictor && 
	      (icache->line_pred_table[SL[slot_latch_tail].inst_desc->pred_set] [SL[slot_latch_tail].inst_desc->pred_blk_no] [SL[slot_latch_tail].inst_desc->pred_offset].line_pred_hist)< 2) {
#else
	  if (line_predictor && 
	      (icache->line_pred_table[SL[slot_latch_tail].inst_desc->pred_set] [SL[slot_latch_tail].inst_desc->pred_blk_no] [SL[slot_latch_tail].inst_desc->pred_offset].line_pred_hist)< 2) {
#endif
	    /* update only if target is a multiple of fetch width */
	    if (SL[slot_latch_tail].inst_desc->bpred_PC % 
	      (line_pred_width*sizeof(md_inst_t)) == 0) {
	      icache->line_pred_table[SL[slot_latch_tail].inst_desc->pred_set] [SL[slot_latch_tail].inst_desc->pred_blk_no] [SL[slot_latch_tail].inst_desc->pred_offset].next_addr = SL[slot_latch_tail].inst_desc->bpred_PC;
	      icache->line_pred_table[SL[slot_latch_tail].inst_desc->pred_set] [SL[slot_latch_tail].inst_desc->pred_blk_no] [SL[slot_latch_tail].inst_desc->pred_offset].line_pred_hist =  line_pred_ini_value;
	      }
	  }
	  SL[slot_latch_tail].inst_desc->regs_NPC = 
	   SL[slot_latch_tail].inst_desc->bpred_PC; 
	  fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	  while(fetch_num > 0) {
	    fetch_return_to_free_list(fetch_data[fetch_head].inst_desc);
	    fetch_num--;
	    fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	  }
	  fetch_head=fetch_tail=0;
	  /* If the line predictor PC is not same as the branch pred
	     PC, stall for a cycle */
	  if (SL[slot_latch_tail].inst_desc->lpred_PC != 
	      SL[slot_latch_tail].inst_desc->bpred_PC) {
	    fetch_istall_buf.stall |= (int) BRANCH_STALL;
	    fetch_istall_buf.resume=0;
	    eventq_queue_callback(sim_cycle+1, 
				  (void *) fetch_resume_ifetch,
				  (int) BRANCH_STALL);
	  }
	  slot_latch_tail = (slot_latch_tail + 1) & (slot_width - 1);
	  break;
	}
	/* If the next PC doesn't match the line predictor PC, and
	   the instruction is not a branch or return */
	else if (((SL[slot_latch_tail].inst_desc->regs_NPC != 
		   SL[slot_latch_tail].inst_desc->lpred_PC) 
		  || (jump_inst && SL[slot_latch_tail].inst_desc->regs_NPC 
		      !=  SL[slot_latch_tail].inst_desc->bpred_PC))
		 && line_predictor && 
		 (MD_IS_CALL(op) || (MD_IS_INDIR(op) &&
				     !MD_IS_RETURN(op)) || MD_IS_BRANCH(op))) {
	  /* If the target was computed using the adder, use bpred_PC */
	  if (jump_inst == TRUE) {
	    regs.regs_PC = SL[slot_latch_tail].inst_desc->bpred_PC;
	    SL[slot_latch_tail].inst_desc->regs_NPC = 
	      SL[slot_latch_tail].inst_desc->bpred_PC; 
	    fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	    if (SL[slot_latch_tail].inst_desc->bpred_PC != 
		SL[slot_latch_tail].inst_desc->lpred_PC) {
	      fetch_istall_buf.resume=0;
	      fetch_istall_buf.stall |= (int) BRANCH_STALL;
	      eventq_queue_callback(sim_cycle+1, 
				    (void *) fetch_resume_ifetch,
				    (int) BRANCH_STALL);
	      /* Update line predictor if speculative update is
		 enabled */
#ifdef FLEXIBLE_SIM
	      if (line_pred_spec_update && 
		  line_predictor && 
		  (icache->line_pred_table[SL[slot_latch_tail].inst_desc->pred_set] [SL[slot_latch_tail].inst_desc->pred_blk_no] [SL[slot_latch_tail].inst_desc->pred_offset].line_pred_hist)< 2) {
#else
	      if (line_predictor && 
		  (icache->line_pred_table[SL[slot_latch_tail].inst_desc->pred_set] [SL[slot_latch_tail].inst_desc->pred_blk_no] [SL[slot_latch_tail].inst_desc->pred_offset].line_pred_hist)< 2) {
#endif
		if (SL[slot_latch_tail].inst_desc->bpred_PC 
		  % (line_pred_width*sizeof(md_inst_t)) == 0) {
		  icache->line_pred_table[SL[slot_latch_tail].inst_desc->pred_set] [SL[slot_latch_tail].inst_desc->pred_blk_no] [SL[slot_latch_tail].inst_desc->pred_offset].next_addr = SL[slot_latch_tail].inst_desc->bpred_PC;
		  icache->line_pred_table[SL[slot_latch_tail].inst_desc->pred_set] [SL[slot_latch_tail].inst_desc->pred_blk_no] [SL[slot_latch_tail].inst_desc->pred_offset].line_pred_hist = line_pred_ini_value;
		}
	      }
	    }
            else {
              fetch_istall_buf.stall |= (int) BRANCH_STALL;
              fetch_istall_buf.resume=0;
              eventq_queue_callback(sim_cycle+1, 
                                    (void *) fetch_resume_ifetch,
                                    (int) BRANCH_STALL);
            }
	    slot_latch_tail = (slot_latch_tail + 1) & (slot_width - 1);
	    while(fetch_num > 0) {
	      fetch_return_to_free_list(fetch_data[fetch_head].inst_desc);
	      fetch_num--;
	      fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	    }
	    fetch_head=fetch_tail=0;
	    break;
	  }
	  else {
	    regs.regs_PC = SL[slot_latch_tail].inst_desc->lpred_PC;
	    SL[slot_latch_tail].inst_desc->regs_NPC = 
	      SL[slot_latch_tail].inst_desc->lpred_PC; 
	    fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	    slot_latch_tail = (slot_latch_tail + 1) & (slot_width - 1);
	    while(fetch_num > 0) { 
	      fetch_return_to_free_list(fetch_data[fetch_head].inst_desc);
	      fetch_num--;
	      fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	    }
	    fetch_head=fetch_tail=0;
            fetch_istall_buf.stall |= (int) BRANCH_STALL;
	    fetch_istall_buf.resume=0;
	    eventq_queue_callback(sim_cycle+1, 
				  (void *) fetch_resume_ifetch,
				  (int) BRANCH_STALL);
	    break;
	  }
	}
      }
      else {
	if (SL[slot_latch_tail].inst_desc->regs_NPC != 
	    SL[slot_latch_tail].inst_desc->lpred_PC &&
	    line_predictor) {
	  regs.regs_PC =  SL[slot_latch_tail].inst_desc->lpred_PC;
	  SL[slot_latch_tail].inst_desc->regs_NPC = 
	    SL[slot_latch_tail].inst_desc->lpred_PC; 
	  fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	  slot_latch_tail = (slot_latch_tail + 1) & (slot_width - 1);
	  while(fetch_num > 0) {
	    fetch_return_to_free_list(fetch_data[fetch_head].inst_desc);
	    fetch_num--;
	    fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	  }
          fetch_istall_buf.stall |= (int) BRANCH_STALL;
          fetch_istall_buf.resume=0;
          eventq_queue_callback(sim_cycle+1, 
                                (void *) fetch_resume_ifetch,
                                (int) BRANCH_STALL);
	  fetch_head=fetch_tail=0;
	  break;
	}
      }
    }
    else {
      /* Not a control instruction */
      inst = fetch_data[fetch_head].inst_desc->IR;
      SL[slot_latch_tail].inst_desc = fetch_data[fetch_head].inst_desc;
      SL[slot_latch_tail].inst_desc->iflag = 
	MD_OP_FLAGS(SL[slot_latch_tail].inst_desc->op);
      slot_latch_num++;
      fetch_num--;
    }
    fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
    /* If the fetch packet has no control instruction and the line
       predictor has not fetched sequentially, restart sequential
       fetch */
    /* If this is the last instruction in the fetch packet */
    if (((SL[slot_latch_tail].inst_desc->regs_PC+sizeof(md_inst_t)) 
	% (fetch_width*sizeof(md_inst_t)) == 0)) {
      /* If the last instruction in the fetch packet is not a branch,
	 and the fetch is not sequential */
      if (cur_branch == FALSE && 
	  (SL[slot_latch_tail].inst_desc->lpred_PC != 
	   (SL[slot_latch_tail].inst_desc->regs_PC+sizeof(md_inst_t)))) {
	regs.regs_PC = 
	  SL[slot_latch_tail].inst_desc->regs_PC+sizeof(md_inst_t);
	SL[slot_latch_tail].inst_desc->regs_NPC = regs.regs_PC;
	/* Restart fetch. We don't charge any penalty because we
	   assume the 21264 fetch engine can detect the absence of
	   control instructions in the fetch packet, and make sure the
	   next fetch is sequential */
	while(fetch_num > 0) {
	  fetch_return_to_free_list(fetch_data[fetch_head].inst_desc);
	  fetch_num--;
	  fetch_head = (fetch_head + 1) & (fetch_ifq_size - 1);
	}
	fetch_head=fetch_tail=0;
        fetch_istall_buf.stall |= (int) BRANCH_STALL;
        fetch_istall_buf.resume=0;
        eventq_queue_callback(sim_cycle+1, 
                              (void *) fetch_resume_ifetch,
                              (int) BRANCH_STALL);
      }
    }
    slot_latch_tail = (slot_latch_tail + 1) & (slot_width - 1);
  }
#ifdef FLEXIBLE_SIM
  if (static_slotting && slot_width == 4){
#else
  if (slot_width == 4){
#endif
    /* If less than 4 instructions were slotted this cycle, slot the
       rest as E */
    if (n_slotted < slot_width-1){
      for (i=n_slotted;i<slot_width;i++){
	inst_mix |= (int)SLOT_SUB_CLUS_REQ(ADDQ,i);
      }
    }
    /* Slot according to the instruction mix. */
    
    slot_class=SLOT_INSCLASS_SLOTCLASS(inst_mix, slot_algo);
    switch (slot_class) {
    case ULUL:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;
    case ULLU:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break; 
    case UULL:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;
    case LLUU:
      for (i=0; i<n_slotted; i++){
	if (i == 3)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==0)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;  
     case ULLL:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break; 
    case LULU:
      for (i=0; i<n_slotted; i++){
	if (i == 1)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==0)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;  
    case LUUL:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;
    case LLLL:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;  
    case LLLU:
      for (i=0; i<n_slotted; i++){
	if (i == 2)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==0)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;
    case LULL:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;  
    case LUUU:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;  
    case LLUL:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;
    case ULUU:
      for (i=0; i<n_slotted; i++){
	if (i == 2)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==0)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;
    case UULU:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;
    case UUUL:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = LOWER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;  
    case UUUU:
      for (i=0; i<n_slotted; i++){
	if (i == 0)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==1)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==2)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	else if (i==3)
	  SL[first_slot].inst_desc->slot_clus_assigned = UPPER;
	first_slot = (first_slot + 1) & (slot_width - 1);
      }
      break;
    default:
      break;
    }
  }
}


/*Initialize the slot latch*/

void
slot_stage_init(void)
{
  slot_latch_tail = 0;
  slot_latch_head = 0;
  slot_latch_num = 0;
  SL = calloc(slot_width,sizeof(struct slot_latch));
  if (!SL) 
    fatal("Out of virtual memory");
}
