/*
* fetch.c - fetch stage implementation
*
* This file is part of the Alpha simulator tool suite written by
* Raj Desikan as part of the Bullseye project.
*
* Copyright (C) 1999 by Raj Desikan
* This source file is distributed "as is" in the hope that it will be
* useful.  It is distributed with no warranty, and no author or
* distributor accepts any responsibility for the consequences of its
* use. 
*
* Everyone is granted permission to copy, modify and redistribute
* this source file under the following conditions:
*
*    This tool set is distributed for non-commercial use only. 
*    Please contact the maintainer for restrictions applying to 
*    commercial use of these tools.
*
*    Permission is granted to anyone to make or distribute copies
*    of this source code, either as received or modified, in any
*    medium, provided that all copyright notices, permission and
*    nonwarranty notices are preserved, and that the distributor
*    grants the recipient permission for further redistribution as
*    permitted by this document.
*
*    Permission is granted to distribute this file in compiled
*    or executable form under the same conditions that apply for
*    source code, provided that either:
*
*    A. it is accompanied by the corresponding machine-readable
*       source code,
*    B. it is accompanied by a written offer, with no time limit,
*       to give anyone a machine-readable copy of the corresponding
*       source code in return for reimbursement of the cost of
*       distribution.  This written offer must permit verbatim
*       duplication by anyone, or
*    C. it is distributed by someone who received only the
*       executable form, and is accompanied by a copy of the
*       written offer of source code that they received concurrently.
*
* In other words, you are welcome to use, share and improve this
* source file.  You are forbidden to forbid anyone else to use, share
* and improve what you give them.
*
*/

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "alpha.h"
#include "regs.h"
#include "memory.h"
#include "loader.h"
#include "cache.h"
#include "bpred.h"
#include "fetch.h"
#include "slot.h"
#include "tlb.h"
#include "eventq.h"
#include "issue.h"
#include "writeback.h"
#include "map.h"
#include "stats.h"
#include "syscall.h"

/*This file implements the fetch stage of the pipeline. */

/*fetch slot interface*/
struct fetch_queue *fetch_data = NULL;

/* The numbers of instructions in the fetch queue. Head and tail
   pointers of fetch_data */ 
int fetch_num, fetch_head, fetch_tail;

/* instruction fetch queue size (in insts) */
int fetch_ifq_size;

/* Number of instructions to fetch from cache at a time */
int fetch_width;

/* Line predictor width. A prediction is stored for line_pred_width
   number of instructions (Default is 4 ) */
int line_pred_width;

/* Number of fetches per cycle. If fetch_speed is set to 2 and
   fetch_width is 4, then we can fetch 8 insts per cycle in batches of
   4 */
int fetch_speed;

/* line pred variables. These variables are used to update the line
   predictor in slot and commit */

int fetch_set; /* holds set of cache line accessed */
int fetch_blk_no; /* Holds blk no */
int fetch_inst_offset; /* Holds offset */
md_addr_t fetch_pred_PC; /* Next PC predicted by the line predictor */

/* The instruction descriptor free list */
struct fetch_inst_list_template *fetch_inst_free_list;

/* Buffer used to indicate if fetch is stalled */
struct _istall_buf fetch_istall_buf;


/* if 1, line predictor exists */
int line_predictor;

/* stWait table array. The stWait bit is set for a load instruction
   when is refetched after a store trap. A load instruction, whose
   stWait bit is set, cannot be issued till all older stores have 
   issued. stWait bits are cleared unconditionally every 16384 cycles */  
counter_t *fetch_st_wait_table;

/* Size of stWait table */
int fetch_st_table_size = 1024;

/* Used for indexing into the stWait table using the instructions
   address */
int fetch_st_wait_mask;

/*current PC value*/
md_addr_t regs_PC;
/*predicted PC value (declared in bpred.h)*/
md_addr_t pred_PC;
/*Line predictor value*/
md_addr_t line_pred_PC;


/*Initialize the fetch stage*/
void 
fetch_stage_init(void){
  fetch_data = (struct fetch_queue *)
    calloc(fetch_ifq_size, sizeof(struct fetch_queue));
  
  if (!fetch_data) 
    fatal ("out of virtual memory");
  
  fetch_num = 0;
  fetch_head = 0;
  fetch_tail = 0;
  fetch_istall_buf.stall=0;
  regs.regs_PC = ld_prog_entry;
  regs.regs_NPC = regs.regs_PC + sizeof(md_inst_t);
  
  /* create the instruction free list */
  fetch_init_inst_free_list();
  if (fetch_st_table_size) {
    fetch_st_wait_table = 
      (counter_t *)calloc(fetch_st_table_size, sizeof(counter_t));
    fetch_st_wait_mask = fetch_st_table_size-1;
  }
}

void 
fetch_stage(void){
  int lat;
  static md_inst_t inst;
  int branch_cnt, i=0,  access_now = TRUE; 
  md_addr_t cache_addr;
  /*fetch  instructions from memory*/
  /* Stop fetch if fetch queue cannot hold fetch_width number of
     instructions. Might be too restrictive */
  if ((fetch_num+fetch_width) > fetch_ifq_size) {
    return;
  }
  for (i=0, branch_cnt=0; 
       i < (fetch_width * fetch_speed) && 
	 (fetch_num < fetch_ifq_size);
       i++){
    /* If valid address */
    if(ld_text_base <= regs.regs_PC && regs.regs_PC < 
       (ld_text_base+ld_text_size) &&  
       !(regs.regs_PC & (sizeof(md_inst_t)-1))){
      
      /* If we're not restarting after a fault, and there is an icache,
	 and we have crossed a fetch_width boundary */
      if ((!fetch_istall_buf.resume) && icache && access_now){
	cache_access_packet *c_packet;
	cache_addr = regs.regs_PC;
	/* We force cache access on fetch width boundaries and the number 
	   of instructions fetched is equal to fetch width */
	while (cache_addr % fetch_width*sizeof(md_inst_t) != 0) {
	  cache_addr-=sizeof(md_inst_t);
	}
	c_packet = cache_create_access_packet(icache, 
					      (Read | Ifetch_access), 
					      cache_addr, Virtual, 
					      fetch_width*sizeof(md_inst_t), 
					      NULL,
					      (void *)schedule_resume_ifetch, 
					      NULL, 0);
	/* access the I-cache */
	lat = cache_timing_access(sim_cycle, c_packet);
	/* Disable cache access till the next fetch width
	   boundary. */
	access_now = FALSE;
	/* Case if we hit but the cache hit latency is greater than one */
	
	if (lat > 1){
	  fetch_istall_buf.stall |= CACHE_STALL;
	  /* Queue an event to restart fetch */
	  eventq_queue_callback(sim_cycle + lat-1, 
				(void *) fetch_resume_ifetch,
				(int) CACHE_STALL);
	  return;
	}
	else if (lat == CACHE_MISS) {
	  fetch_istall_buf.stall |= CACHE_STALL;
	  return;
	}
	else if (lat == TLB_MISS) {
	  fetch_istall_buf.stall |= TLB_STALL;
	  return;
	}
	/* If the MSHRs are full, the cache module will eventually
	   restart the request */
	else if ((lat == MSHRS_FULL) || (lat == TARGET_FULL)){
	  fetch_istall_buf.stall |= MSHR_STALL;
	  return;
	}
	/* if lat < 0, we have an illegal (mis-spec) access, and we can 
	   ignore it */
	/* Else, if lat == 1, then we just continue with the fetch */
      }
      /* If we were stalled and are now resuming */
      else if (fetch_istall_buf.resume) {
	fetch_istall_buf.resume = 0;
      }
      /* We have an instruction from memory. Assign a descriptor for
	 it */
      fetch_data[fetch_tail].inst_desc = fetch_get_from_free_list();
      /* read inst from memory */
      MD_FETCH_INST(inst, regs.regs_PC);
      fetch_data[fetch_tail].inst_desc->trap = FALSE;
    }
    else {
      /* BOGUS inst. Send a NOP */
      fetch_data[fetch_tail].inst_desc = fetch_get_from_free_list();
      /* fetch PC is bogus, send a NOP down the pipeline */
      inst = MD_NOP_INST;
      fetch_data[fetch_tail].inst_desc->trap = TRUE;
    }
    /* Information for line predictor updates */
    fetch_data[fetch_tail].inst_desc->pred_set = fetch_set;
    fetch_data[fetch_tail].inst_desc->pred_blk_no = fetch_blk_no;
    fetch_data[fetch_tail].inst_desc->pred_offset = fetch_inst_offset;
    /* Copy inst value, PC, and the cycle this inst is fetched */ 
    fetch_data[fetch_tail].inst_desc->IR = inst;
    fetch_data[fetch_tail].inst_desc->regs_PC = regs.regs_PC;
    fetch_data[fetch_tail].inst_desc->time_stamp = sim_cycle;
    /* Set the opcode */
    MD_SET_OPCODE(fetch_data[fetch_tail].inst_desc->op,inst);

#ifdef FLEXIBLE_SIM
    /* Get the stWait bit for this instruction */
    if (fetch_st_table_size) {
      fetch_data[fetch_tail].inst_desc->st_wait_bit = 
	FETCH_ST_WAIT_BIT(regs.regs_PC);
    }
#else
    fetch_data[fetch_tail].inst_desc->st_wait_bit = 
      FETCH_ST_WAIT_BIT(regs.regs_PC);
#endif

    /* If we have fetched one fetch_width, check for fetch speed and
       line predictor prediction */
    if (((regs.regs_PC+sizeof(md_inst_t)) 
	 % (fetch_width*sizeof(md_inst_t))) == 0 ) {	
      access_now = TRUE;
      
      /* We set fetch_pred_PC to sequential fetch if
	 1) There is no line predictor
	 2) There is no icache (hence no line predictor)
	 3) Line predictor has a new prediction (We have gone thru one
	 line pred width of instruction.
      */
      if (!line_predictor || !icache || 
	  ((regs.regs_PC+sizeof(md_inst_t)) % 
	   (line_pred_width*sizeof(md_inst_t))) != 0)
	fetch_pred_PC = regs.regs_PC + sizeof(md_inst_t);
      
      if (fetch_pred_PC != (regs.regs_PC + sizeof(md_inst_t)))
	/* Indicate this is a discontinuous fetch */
	  branch_cnt++; 
      
      /* Set PC to pred PC. */
      regs.regs_PC = fetch_pred_PC;
      /* If the next fetch_width number of instructions cannot be
	 accomodated in the fetch queue, or the number of
	 discontinuous
	 fetches > fetch speed */
      if (((fetch_num + 1 + fetch_width) > fetch_ifq_size) || 
	  branch_cnt >= fetch_speed) {
	fetch_data[fetch_tail].inst_desc->lpred_PC=regs.regs_PC;
	fetch_data[fetch_tail].inst_desc->regs_NPC=regs.regs_PC;
	fetch_tail = (fetch_tail + 1) & (fetch_ifq_size - 1);
	fetch_num++;	
	return;
      }
    }
    else
      regs.regs_PC+=sizeof(md_inst_t);
    
    fetch_data[fetch_tail].inst_desc->lpred_PC=fetch_pred_PC;
    fetch_data[fetch_tail].inst_desc->regs_NPC=regs.regs_PC;
    fetch_tail = (fetch_tail + 1) & (fetch_ifq_size - 1);
    fetch_num++;
  }
}


/* Initialize the instruction free list */
void fetch_init_inst_free_list(){
  int i;
  struct fetch_inst_list_template *temp;
  fetch_inst_free_list = NULL;
  warn("increasing instruction descriptor free list size");
  for (i=0; i<FETCH_FREE_LIST_SIZE; i++){
    temp = (struct fetch_inst_list_template *) 
      malloc(sizeof(struct fetch_inst_list_template));
    
    if(!temp) 
      fatal("out of virtual memory");
    temp->next = fetch_inst_free_list;
    fetch_inst_free_list = temp;
  }
}  


/* get an entry from free list */
struct fetch_inst_list_template *
fetch_get_from_free_list(){
  struct fetch_inst_list_template *temp;
  
  /* if free list is exhausted, add some more entries */
  if (!fetch_inst_free_list){
    fetch_init_inst_free_list();
  }
  temp = fetch_inst_free_list;
  fetch_inst_free_list = fetch_inst_free_list->next;
  return (temp);
}

/* return an inst to the free list */

void fetch_return_to_free_list(struct fetch_inst_list_template *temp){
  temp->next = fetch_inst_free_list;
  fetch_inst_free_list = temp;
}

/* Usually restarts instruction fetching, also tells fetch() if this
   is restarting a fetch completely or restarting from a miss */

void schedule_resume_ifetch(tick_t now)
{
  eventq_queue_callback(now, (void *)fetch_resume_ifetch, (int)CACHE_STALL);
}

void schedule_tlb_resume_ifetch(tick_t now, cache_access_packet *c_packet, 
				MSHR_STAMP_TYPE unused_arg)
{
  eventq_queue_callback(now, (void *)fetch_resume_ifetch, (int)TLB_STALL);
  cache_free_access_packet(c_packet);
}

void fetch_resume_ifetch(tick_t now, int stall_type)
{
  switch (stall_type) {
  case BRANCH_STALL:
#if 0
    /* We should resume fetch and do a cache access if there are no sample 
       stalls */
    if ((fetch_istall_buf.stall & SAMPLE_STALL)) {
      // Clear all other stalls except sample stall
      fetch_istall_buf.stall = SAMPLE_STALL;
      return;
    }
#endif
    /* Restart fetch. Do cache access. Clear all stalls as branch stall gets 
       highest priority */
    fetch_istall_buf.resume = 0;
    fetch_istall_buf.stall = 0;
    break;
  case CACHE_STALL:
    /* Do something if this is the only type of stall */
    fetch_istall_buf.resume = 1;
    //if ((fetch_istall_buf.stall == CACHE_STALL)) {
    fetch_istall_buf.stall  &= ~(CACHE_STALL);
    //}
    break;
  case SAMPLE_STALL:
#if 0
    if ((fetch_istall_buf.stall == SAMPLE_STALL)) {
      fetch_istall_buf.resume = 0;
      fetch_istall_buf.stall = 0;
      return;
    }
#endif
    fetch_istall_buf.stall &= ~(SAMPLE_STALL);
    break;
  case MSHR_STALL:
#if 0
    if ((fetch_istall_buf.stall == MSHR_STALL)) {
      fetch_istall_buf.resume = 0;
      fetch_istall_buf.stall = 0;
      return;
    }
#endif
    fetch_istall_buf.stall &= ~(MSHR_STALL);
    break;
  case TLB_STALL:
#if 0
    if ((fetch_istall_buf.stall == TLB_STALL)) {
      fetch_istall_buf.resume = 0;
      fetch_istall_buf.stall = 0;
      return;
    }
#endif
    fetch_istall_buf.stall &= ~(TLB_STALL);
    break;
  default:
    //fatal("Unknown stall type");
    return;
  }
  return;
#if 0
  /* If we are paying a branch mispredict, ignore previous icache
     and or itlb misses, and just start re-fetching */
  /* This is an icache miss or TLB miss that has 
     come back after a branch mispredict, and should be ignored */
  
  if (((fetch_istall_buf.stall & SAMPLE_STALL) && (stall_type != SAMPLE_STALL))
      || ((fetch_istall_buf.stall & BRANCH_STALL) 
	  && (stall_type != BRANCH_STALL))) {
      return;
    }
  /* If it was a cache miss, don't need to redo the cache access,
     so set resume to 1.  If it was stalled due to a cache miss, and 
     we passed a BRANCH_STALL flag, means we are paying for a mispredict
     and so we want to do the cache access again (e.g. resume should be
     set to 0) */
  if ((fetch_istall_buf.stall == CACHE_STALL) &&(stall_type != BRANCH_STALL))
    {
      fetch_istall_buf.resume = 1;
    }
  /* If we took a TLB miss or MSHRs were full or was a branch mispredict,
     want to restart fetch */  
  else {
      fetch_istall_buf.resume = 0;
    }
    fetch_istall_buf.stall = 0;
#endif
}









