/*
 * cache_func.c - cache module routines for functional simulation
 *
 * This file is a part of the SimpleScalar tool suite, originally
 * written by Todd M. Austin as a part of the Multiscalar Research 
 * Project. The file has been rewritten by Doug Burger, as a
 * part of the Galileo research project.  Alain Kagi has also 
 * contributed to this code.
 *
 * The tool suite is currently maintained by Doug Burger and Todd M. Austin.
 * 
 * Copyright (C) 1994, 1995, 1996, 1997 by Todd M. Austin
 *
 * This source file is distributed "as is" in the hope that it will be
 * useful.  The tool set comes with no warranty, and no author or
 * distributor accepts any responsibility for the consequences of its
 * use. 
 * 
 * Everyone is granted permission to copy, modify and redistribute
 * this tool set under the following conditions:
 * 
 *    This source code is distributed for non-commercial use only. 
 *    Please contact the maintainer for restrictions applying to 
 *    commercial use.
 *
 *    Permission is granted to anyone to make or distribute copies
 *    of this source code, either as received or modified, in any
 *    medium, provided that all copyright notices, permission and
 *    nonwarranty notices are preserved, and that the distributor
 *    grants the recipient permission for further redistribution as
 *    permitted by this document.
 *
 *    Permission is granted to distribute this file in compiled
 *    or executable form under the same conditions that apply for
 *    source code, provided that either:
 *
 *    A. it is accompanied by the corresponding machine-readable
 *       source code,
 *    B. it is accompanied by a written offer, with no time limit,
 *       to give anyone a machine-readable copy of the corresponding
 *       source code in return for reimbursement of the cost of
 *       distribution.  This written offer must permit verbatim
 *       duplication by anyone, or
 *    C. it is distributed by someone who received only the
 *       executable form, and is accompanied by a copy of the
 *       written offer of source code that they received concurrently.
 *
 * In other words, you are welcome to use, share and improve this
 * source file.  You are forbidden to forbid anyone else to use, share
 * and improve what you give them.
 *
 * INTERNET: dburger@cs.wisc.edu
 * US Mail:  1210 W. Dayton Street, Madison, WI 53706
 *
 * $Id: cache_func.c,v 1.1.6.2 2002/05/07 16:33:30 hrishi Exp $
 *
 * $Log: cache_func.c,v $
 * Revision 1.1.6.2  2002/05/07 16:33:30  hrishi
 * Cleaned up code. Removed phys_mem_table cruft.
 *
 * Revision 1.1.6.1  2000/04/03 19:47:58  hrishi
 * Initial check in of the simple scalar code with memory extensions. Only sim-outorder will work (pisa only)
 * The memory functions are completely different from SS3.0. All the memory functions (and macros) take an explicit memory argument while the same functions in this branch work on a global variable (virt_mem_table).
 *
 * Revision 1.3  1998/09/28 05:16:53  dburger
 * Sampling, subblocking, cache interface all work, memory leak and deadlock solved
 *
 * Revision 1.2  1998/09/16 21:10:23  dburger
 * Code apparently works w/ subblocking for sim-outorder AND sim-cache, no sampling yet
 *
 * Revision 1.1  1998/09/12 19:14:51  dburger
 * Code works, subblock support not yet put in
 *
 * Revision 1.5  1998/09/12 19:05:05  dburger
 * Code works for compress, doesn't support subblocking
 *
 * Revision 1.4  1998/09/12 19:04:00  dburger
 * Code works for compress, doesn't support subblocking
 *
 * Revision 1.3  1998/02/23 05:59:52  dburger
 * rs->blocking, cache_restart_access, istall_buf bugs fixed
 *
 * Revision 1.2  1998/01/30 04:26:26  dburger
 * Added valid function check, about to add rs->blocked
 *
 * Revision 1.1  1997/12/18 20:17:36  dburger
 * Code cleaned up but not debugged, ISCA stuff still in
 */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <math.h>

#include "ss.h"
#include "cache.h"
#include "eventq.h"
#include "loader.h"
#include "misc.h"
#include "tlb.h"

/* Takes a call to a lower level of of the memory hierarchy, with a void pointer
to that level, and a type, and calls the appropriate handler function (we could
implement this by passing a call function instead of a type, but I don't think
it matters) */
static inline void
blk_access_fn(SS_TIME_TYPE now, 
	      cache_access_packet *c_packet,
	      enum resource_type type)
{
  if (type == Cache)
    {
      (void) cache_func_access(now, c_packet);
    }
  else if (type == Memory)
    {
      mem_func_bank_access(now, c_packet);
    }
}

/* create and initialize a general cache structure */
struct cache *					/* pointer to cache created */
cache_func_create(char *name,			/* name of the cache */
		  int nsets,			/* total number of sets in cache */
		  int bsize,			/* block (line) size of cache */
		  int subblock,		/* subblock factor of cache lines */
		  int balloc,		/* allocate data space for blocks? */
		  int usize,			/* size of user data to alloc w/blks */
		  int assoc,			/* associativity of cache */
		  unsigned int hit_latency,	/* unused here */
		  enum cache_policy policy,	/* replacement policy w/in sets */
		  enum cache_trans trans,	/* translation addressing scheme */
		  int prefetch,		/* flag to determine if this cache prefetches */
		  int num_resources,		/* number of memories connected under cache */
		  int resource_code,		/* memory selection function index */
		  char *res_names[])		/* Resource name list */
{
  struct cache *cp;
  struct cache_blk *blk;
  int i, j, bindex;

  /* check all cache parameters */
  if (nsets <= 0)
    fatal("cache size (in sets) `%d' must be non-zero", nsets);
  if ((nsets & (nsets-1)) != 0)
    fatal("cache size (in sets) `%d' is not a power of two", nsets);
  /* blocks must be at least one datum large, i.e., 8 bytes for SS */
  if (bsize < 4)
    fatal("cache block size (in bytes) `%d' must be 4 or greater", bsize);
  if ((bsize & (bsize-1)) != 0)
    fatal("cache block size (in bytes) `%d' must be a power of two", bsize);
  if (assoc <= 0)
     fatal("cache associativity `%d' must be non-zero and positive", assoc);
  if ((assoc & (assoc-1)) != 0)
    fatal("cache associativity `%d' must be a power of two", assoc);

  /* allocate the cache structure */
  cp = (struct cache *)
    calloc(1, sizeof(struct cache) + (nsets-1)*sizeof(struct cache_set));
  if (!cp)
    fatal("out of virtual memory");

  /* initialize user parameters */
  cp->name = mystrdup(name);
  cp->nsets = nsets;
  cp->bsize = bsize;
  cp->assoc = assoc;
  cp->policy = policy;
  cp->trans = trans;
  cp->subblock_ratio = subblock;

  if (IS_CACHE_SUBBLOCKED(cp))
    {
      cp->sbsize = bsize/subblock;
      cp->subblock_vector_length = (int) ceil((double)cp->subblock_ratio/8);
    }
  else
    {
      cp->sbsize = 0;
      cp->subblock_vector_length = 0;
    }

  cp->num_resources = num_resources;
  cp->resource_code = resource_code;
  for (j=0; j<num_resources; j++)
  {
    cp->resource_names[j] = (void *) strdup(res_names[j]);
  }

  /* compute derived parameters */
  cp->hsize = CACHE_HIGHLY_ASSOC(cp) ? (assoc >> 2) : 0;
  cp->blk_mask = bsize-1;
  cp->set_shift = log_base2(bsize);
  cp->set_mask = nsets-1;
  cp->tag_shift = cp->set_shift + log_base2(nsets);
  cp->tag_mask = (1 << (32 - cp->tag_shift))-1;
  cp->tagset_mask = ~cp->blk_mask;

  /* Calculate subblock masks, if necessary */
  if (IS_CACHE_SUBBLOCKED(cp))
    {
      cp->subblock_mask = (cp->bsize/cp->sbsize) - 1;
      cp->subblock_shift = log_base2(cp->sbsize);
    }

  /* print derived parameters during debug */
  debug("%s: cp->blk_mask  = 0x%08x", cp->blk_mask);
  debug("%s: cp->set_shift = %d", cp->set_shift);
  debug("%s: cp->set_mask  = 0x%08x", cp->set_mask);
  debug("%s: cp->tag_shift = %d", cp->tag_shift);
  debug("%s: cp->tag_mask  = 0x%08x", cp->tag_mask);

  /* initialize cache stats */
  cp->hits = 0;
  cp->misses = 0;
  cp->replacements = 0;
  cp->writebacks = 0;
  cp->invalidations = 0;
  cp->read_traffic = 0;
  cp->write_traffic = 0;
  cp->address_traffic = 0;

  if (IS_CACHE_SUBBLOCKED(cp))
    {
      cp->partial_misses = 0;
    }

  /* blow away the last block accessed */
  /* Set it to a value too big for any tag */
  cp->last_tagset = 0xffffffff;
  cp->last_blk = NULL;

  /* allocate data blocks */
  cp->data = (char *)calloc(nsets * assoc,
			    sizeof(struct cache_blk) +
			    (cp->balloc ? (bsize*sizeof(char)) : 0));
  if (!cp->data)
    fatal("out of virtual memory");

  /* slice up the data blocks */
  for (bindex=0,i=0; i<nsets; i++)
    {
      cp->sets[i].way_head = NULL;
      cp->sets[i].way_tail = NULL;
      /* get a hash table, if needed */
      if (cp->hsize)
	{
	  cp->sets[i].hash =
	    (struct cache_blk **)calloc(cp->hsize, sizeof(struct cache_blk *));
	  if (!cp->sets[i].hash)
	    fatal("out of virtual memory");
	}
      /* NOTE: all the blocks in a set *must* be allocated contiguously,
	 otherwise, block accesses through SET->BLKS will fail (used
	 during random replacement selection) */
      cp->sets[i].blks = CACHE_BINDEX(cp, cp->data, bindex);
      
      /* link the data blocks into ordered way chain and hash table bucket
         chains, if hash table exists */
      for (j=0; j<assoc; j++)
	{
	  /* locate next cache block */
	  blk = CACHE_BINDEX(cp, cp->data, bindex);
	  bindex++;

	  /* invalidate new cache block */
	  blk->status = 0;

	  /* Setting the tag to j is just to prevent long chains in the hash 
	     table; won't matter because the block is invalid */
	  blk->tag = j;
	  blk->ready = 0;

	  /* insert cache block into set hash table */
	  if (cp->hsize)
	    link_htab_ent(cp, &cp->sets[i], blk);

	  /* insert into head of way list, order is arbitrary at this point */
	  blk->way_next = cp->sets[i].way_head;
	  blk->way_prev = NULL;
	  if (cp->sets[i].way_head)
	    cp->sets[i].way_head->way_prev = blk;
	  cp->sets[i].way_head = blk;
	  if (!cp->sets[i].way_tail)
	    cp->sets[i].way_tail = blk;
	}
    }

  cp->prefetch = prefetch;
  cp->prefetch_out_of_bound = 0;
  cp->prefetch_in_cache = 0;
  cp->prefetch_requested = 0;
  cp->prefetch_full = 0;
  cp->prefetch_crosses_page_boundary = 0;

  return cp;
}

/* register cache stats */
void
cache_func_reg_stats(struct cache *cp,	/* cache instance */
		     struct stat_sdb_t *sdb)	/* stats database */
{
  char buf[512], buf1[512], *name;

  /* get a name for this cache */
  if (!cp->name || !cp->name[0])
    name = "<unknown>";
  else
    name = cp->name;

  sprintf(buf, "%s.accesses", name);
  sprintf(buf1, "%s.hits + %s.misses", name, name);
  stat_reg_formula(sdb, buf, "total number of accesses", buf1, NULL);

  sprintf(buf, "%s.hits", name);
  stat_reg_counter(sdb, buf, "total number of hits", &cp->hits, 0, NULL);

  sprintf(buf, "%s.misses", name);
  stat_reg_counter(sdb, buf, "total number of misses", &cp->misses, 0, NULL);

  if (IS_CACHE_SUBBLOCKED(cp))
    {
      sprintf(buf, "%s.partialmisses", name);
      stat_reg_counter(sdb, buf, "misses to transfer blocks", &cp->partial_misses, 0, NULL);

      sprintf(buf, "%s.blockmisses", name);
      sprintf(buf1, "%s.misses - %s.partialmisses", name, name);
      stat_reg_formula(sdb, buf, "misses to address blocks", buf1, NULL);
    }

  sprintf(buf, "%s.replacements", name);
  stat_reg_counter(sdb, buf, "total number of replacements",
		 &cp->replacements, 0, NULL);
  sprintf(buf, "%s.writebacks", name);
  stat_reg_counter(sdb, buf, "total number of address blocks written back",
		 &cp->writebacks, 0, NULL);

  sprintf(buf, "%s.read_traffic", name);
  stat_reg_counter(sdb, buf, "total number of subblocks (misses) read", &cp->read_traffic, 0, NULL);

  sprintf(buf, "%s.write_traffic", name);
  stat_reg_counter(sdb, buf, "total number of subblocks written back", &cp->write_traffic, 0, NULL);

  sprintf(buf, "%s.address_traffic", name);
  stat_reg_counter(sdb, buf, "total number of bytes transmitted for tags", &cp->address_traffic, 0, NULL);

  sprintf(buf, "%s.traffic", name);
  sprintf(buf1, "%s.read_traffic + %s.write_traffic", name, name);
  stat_reg_formula(sdb, buf, "total number of r/w bytes transmitted", buf1, NULL);

  sprintf(buf, "%s.miss_rate", name);
  sprintf(buf1, "%s.misses / %s.accesses", name, name);
  stat_reg_formula(sdb, buf, "miss rate (i.e., misses/ref)", buf1, NULL);
  sprintf(buf, "%s.repl_rate", name);
  sprintf(buf1, "%s.replacements / %s.accesses", name, name);
  stat_reg_formula(sdb, buf, "replacement rate (i.e., repls/ref)", buf1, NULL);
  sprintf(buf, "%s.wb_rate", name);
  sprintf(buf1, "%s.writebacks / %s.accesses", name, name);
  stat_reg_formula(sdb, buf, "writeback rate (i.e., wrbks/ref)", buf1, NULL);

  if (cp->prefetch)
    {
      sprintf(buf, "%s.prefetches", name);
      stat_reg_counter(sdb, buf, "prefetch issued", &cp->prefetch_issued, 0, NULL);
      sprintf(buf, "%s.p_illegal", name);
      stat_reg_counter(sdb, buf, "illegal prefetch", &cp->prefetch_out_of_bound, 0, NULL);
      sprintf(buf, "%s.p_in_cache", name);
      stat_reg_counter(sdb, buf, "prefetch already in cache", &cp->prefetch_in_cache, 0, NULL);
      sprintf(buf, "%s.p_requested", name);
      stat_reg_counter(sdb, buf, "prefetch, data already requested", &cp->prefetch_requested, 0, NULL);
      sprintf(buf, "%s.p_no_mshrs", name);
      stat_reg_counter(sdb, buf, "prefetch, mshr table full", &cp->prefetch_full, 0, NULL);
      sprintf(buf, "%s.p_page_boundary", name);
      stat_reg_counter(sdb, buf, "page boundary crossed", &cp->prefetch_crosses_page_boundary, 0, NULL);
    }
}

static void 
do_writeback(SS_TIME_TYPE now, 
		  struct cache *cp,
		  SS_ADDR_TYPE baddr,
		  enum trans_cmd vorp,
		  struct cache_blk *blk)
{
  void *next_mp;
  cache_access_packet *wb_packet;
  enum resource_type type;
  int size;

  if (ARE_SUBBLOCKS_DIRTY(blk))
    {
      size = cp->sbsize * count_valid_bits(cp, blk->sb_dirty);
      cp->address_traffic += cp->subblock_vector_length;
    }
  else
    {
      size = cp->bsize;
    }

  next_mp = (void *) cache_follow(cp, baddr, &type);
  wb_packet = cache_create_access_packet(next_mp, Write, baddr, vorp, size, NULL, NULL, NULL, 0);

  /* Update cache statistics*/
  cp->write_traffic += size;
  cp->address_traffic += ADDRESS_SIZE;
  cp->writebacks++;

  /* Initialize rest of packet values */
  wb_packet->nbytes = size;
  wb_packet->cmd = Write;
  wb_packet->addr = baddr;
  wb_packet->vorp = vorp;

  /* write back the cache block */
  (void) blk_access_fn(-1, wb_packet, type);

  /* Since this "chain" of cache accesses are done, free the packet */
  cache_free_access_packet(wb_packet);
}

/* implements tagged prefetch, see Cache Memories, A. J. Smith, 1994 */
/* Assumed that address sent is block address plus block size */
static void
do_prefetch(SS_TIME_TYPE now, 
		 struct cache *cp, 
		 SS_ADDR_TYPE addr, 
		 enum trans_cmd vorp, 
		 SS_ADDR_TYPE c_set)
{
  SS_ADDR_TYPE set;
  SS_ADDR_TYPE tag;
  SS_ADDR_TYPE sb;
  struct cache_blk *blk;
  enum resource_type type;
  void *next_mp;
  struct cache_blk *repl;
  cache_access_packet *pre_packet;
  int size, vector, subblock_miss = FALSE;

  tag = CACHE_TAG(cp, addr);

  /* Assumes that next block to prefetch will be the set incremented by one (i.e. the
     set index bits are the low-order bits above the block offset */
  set = CACHE_SET_INC(cp, c_set);
  sb = CACHE_SB_TAG(cp, addr);

  /* check alignment */
  if (((addr & (cp->bsize - 1)) != 0) && ((!IS_CACHE_SUBBLOCKED(cp)) || ((addr & (cp->sbsize - 1)) != 0)))
    fatal("do_prefetch: access error: bad alignement, addr %#0x", addr);

  if ((CACHE_BADDR(cp, addr) != addr) && ((!IS_CACHE_SUBBLOCKED(cp)) || (CACHE_SBADDR(cp, addr) != addr)))
    fatal("do_prefetch: bad prefetch addr, addr %#0x", addr);

  if (MEM_BLOCK(addr) != MEM_BLOCK(addr - cp->bsize))
    {
      cp->prefetch_crosses_page_boundary++;
      return;
    }

  if ((cp->trans == VIVT) && (addr < ld_data_base || addr >= ld_stack_base))
    {
      cp->prefetch_out_of_bound++;
      return;
    }

  /* low-associativity cache, linear search the way list */
  FIND_BLK_MATCH(cp, blk, set, tag, sb, subblock_miss, cache_hit);

  cp->prefetch_issued++;
  cp->nprefetches++;

  if ((subblock_miss) || (FETCH_SUBBLOCK(cp, blk, addr)))
    {
      cp->address_traffic += cp->subblock_vector_length;
      vector = CREATE_SUBBLOCK_FETCH_VECTOR(cp, blk, sb);
      size = cp->sbsize * count_valid_bits(cp, vector);
    }
  else
    {
      size = cp->bsize;
    }

  cp->address_traffic += ADDRESS_SIZE;
  cp->read_traffic += size;

  next_mp = cache_follow(cp, addr, &type);

  pre_packet = cache_create_access_packet(next_mp, Read, addr, vorp, size, NULL, NULL, NULL, 0);

  blk_access_fn(-1, pre_packet, type);

  cache_free_access_packet(pre_packet);

  /* select block to replace */
  switch (cp->policy) {
  case LRU:
  case FIFO:
    repl = cp->sets[set].way_tail;
    update_way_list(&cp->sets[set], repl, Head);
    break;
  case Random:
    {
#if defined(__CYGWIN32__) || defined(hpux) || defined(__hpux) || defined(__svr4__)
      int bindex = rand() & (cp->assoc - 1);
#else
      int bindex = random() & (cp->assoc - 1);
#endif
      repl = CACHE_BINDEX(cp, cp->sets[set].blks, bindex);
    }
    break;
  default:
    panic("bogus replacement policy");
  }

  /* remove this block from the hash bucket chain, if hash exists */
  if (cp->hsize)
    unlink_htab_ent(cp, &cp->sets[set], repl);

  /* write back replaced block data */
  if (repl->status & CACHE_BLK_VALID)
    {
      cp->replacements++;

      if (repl->status & CACHE_BLK_DIRTY)
	{
	  (int)do_writeback(-1, cp, CACHE_MK_BADDR(cp, repl->tag, set), vorp, repl);
	}
    }

  /* update block tags */
  repl->tag = tag;
  repl->status = CACHE_BLK_VALID;

  /* link this entry back into the hash table */
  if (cp->hsize)
    link_htab_ent(cp, &cp->sets[set], repl);

#ifdef BALLOC
  if (cp->balloc)
    {
      CACHE_BCOPY(cmd, repl, bofs, p, cp->bsize);
    } 
#endif

  return;

 cache_hit:
  cp->prefetch_in_cache++;
  return;
}

static SS_ADDR_TYPE
cache_translate_address(struct cache *cp,
			unsigned int cmd,
			SS_ADDR_TYPE vaddr)
{
  SS_ADDR_TYPE ptaddr, addr;
  unsigned int frame;			/* Holds tlb physical frame for MMU access */
  enum resource_type type;
  void *next_mp;
  cache_access_packet *tlb_packet;

  if (!(is_tlb(cmd)))
    {
      if (cp->tlb)
	{
	  ptaddr = VIRTUAL_PTE(vaddr);

	  tlb_packet = cache_create_access_packet(cp->tlb, Tlb, ptaddr, Virtual, PTE_SIZE, NULL, NULL, NULL, 0);
	  (void) cache_func_access(-1, tlb_packet);
	  cache_free_access_packet(tlb_packet);
	}
      /* If there is no tlb, look up in the MMU and send a request for the 
       * translation's physical address to the next level of the hierarchy */
      else
	{
	  (void) tlb_mmu_access(-1, MMU_TAG(vaddr), &frame);
	  ptaddr = PHYSICAL_PTE(frame, L2_PTE_TAG(vaddr)); 
	  next_mp = cache_follow(cp, ptaddr, &type);

	  tlb_packet = cache_create_access_packet(next_mp, Tlb, ptaddr, Physical, PTE_SIZE, NULL, NULL, NULL, 0);
	  blk_access_fn(-1, tlb_packet, type);
	  cache_free_access_packet(tlb_packet);
	}	
      /* We'll need to get the physical address */
      addr = tlb_translate_address(vaddr);
    }
  else
    {
      /* The vaddr here is the virtual PTE address, so we need to just extract the MMU tag */
      (void) tlb_mmu_access(-1, MMU_TAG_FROM_VPTE(vaddr), &frame);
      addr = PHYSICAL_PTE(frame, L2_PTE_TAG(vaddr));
    }
  return(addr);
}

/* does a cache lookup.  returns 0 for compatibility with other simulators */
/* access a cache, perform a CMD operation on cache CP at address ADDR,
   places NBYTES of data at *P, returns latency of operation if initiated
   at NOW, *P is untouched if cache blocks are not allocated (!CP->BALLOC) */

int							/* latency of access in cycles */
cache_func_access(SS_TIME_TYPE now,			/* time of access (not used here) */
		  cache_access_packet *c_packet)	/* Packet containing cache access arguments */
{
  struct cache *cp = c_packet->cp;
  SS_ADDR_TYPE addr = c_packet->addr, baddr, vaddr;
  SS_ADDR_TYPE tag;
  SS_ADDR_TYPE set;
  SS_ADDR_TYPE sb;
  SS_ADDR_TYPE bofs;
  unsigned int cmd = c_packet->cmd;
  enum trans_cmd vorp = c_packet->vorp;
  struct cache_blk *blk, *repl;
  enum resource_type type;
  void *next_mp;
  int size, subblock_miss = FALSE;
  unsigned int vector = 0;
#if !(defined(__alpha__) || defined(linux))
  extern long random(void);
#endif

  vaddr = (vorp == Virtual) ? addr : 0;
  assert(!((cp->trans == VIVT) && (vorp == Physical)));
  if ((cp->trans != VIVT) && (vorp == Virtual))
    { 
      addr = cache_translate_address(cp, cmd, vaddr);
      vorp = Physical;
    }

  baddr = CACHE_BADDR(cp, addr);
  bofs = CACHE_BLK(cp, addr);
  tag = CACHE_TAG(cp, addr);
  sb = CACHE_SB_TAG(cp, addr);
  set = (cp->trans == VIPT) ? CACHE_SET(cp, vaddr) : CACHE_SET(cp, addr);
      
  /* check for a fast hit: access to same block */
  if (CACHE_TAGSET(cp, addr) == cp->last_tagset)
    {
      /* hit in the same block */
      blk = cp->last_blk;

      if (IS_BLOCK_SUBBLOCKED(blk) && (!IS_SUBBLOCK_VALID(blk, sb)))
	subblock_miss = TRUE;
      else
	goto cache_fast_hit;
    }
    
  if (!cp->hsize)
    {
      /* low-associativity cache, linear search the way list */
      FIND_BLK_MATCH(cp, blk, set, tag, sb, subblock_miss, cache_hit);
    }
  else
    {
      /* high-associativity cache, access through the per-set hash tables */
      int hindex = CACHE_HASH(cp, tag);

      for (blk=cp->sets[set].hash[hindex];
	   blk;
	   blk=blk->hash_next)
	{
	  if (blk->tag == tag && (blk->status & CACHE_BLK_VALID))
	     if (IS_BLOCK_SUBBLOCKED(blk) && (!IS_SUBBLOCK_VALID(blk, sb)))
                subblock_miss = TRUE;
	     else
	        goto cache_hit;
	}
    }

  /* cache block not found */
  cp->misses++;

  if ((subblock_miss) || (FETCH_SUBBLOCK(cp, blk, addr)))
    {
      cp->partial_misses++;
      cp->address_traffic += cp->subblock_vector_length;
      vector = CREATE_SUBBLOCK_FETCH_VECTOR(cp, blk, sb);
      size = cp->sbsize * count_valid_bits(cp, vector);
    }
  else
    {
      size = cp->bsize;
    }

  cp->address_traffic += ADDRESS_SIZE;
  cp->read_traffic += size;

  next_mp = cache_follow(cp, vaddr, &type);

  c_packet->cp = (struct cache *) next_mp;
  c_packet->cmd = (is_write(cmd) ? (Read | Miss_access) : ((cmd & access_mask) | Miss_access));
  c_packet->addr = baddr;
  c_packet->vorp = vorp;
  c_packet->nbytes = size;
  
  /* schedule access to the lower level of the mem. hierarchy */
  blk_access_fn(-1, c_packet, type);

  if (vector && (repl = find_blk_match_no_jump(cp, set, tag)))
    {
      repl->sb_valid |= vector;
      if (is_write(cmd))
	{
	  CACHE_SET_SB_BIT(sb, repl->sb_dirty);
	  repl->status |= CACHE_BLK_DIRTY;
	}

      /* if LRU replacement and this is not the first element of list, reorder */
      if (repl->way_prev && cp->policy == LRU)
	{
	  /* move this block to head of the way (MRU) list */
	  update_way_list(&cp->sets[set], repl, Head);
	}
    }
  else	
    {
      /* select block to replace */
      switch (cp->policy) {
      case LRU:
      case FIFO:
	repl = cp->sets[set].way_tail;
	update_way_list(&cp->sets[set], repl, Head);
	break;
      case Random:
	{
#if defined(__CYGWIN32__) || defined(hpux) || defined(__hpux) || defined(__svr4__)
	  int bindex = rand() & (cp->assoc - 1);
#else
	  int bindex = random() & (cp->assoc - 1);
#endif
	  repl = CACHE_BINDEX(cp, cp->sets[set].blks, bindex);
	}
	break;
      default:
	panic("bogus replacement policy");
      }
      
      /* remove this block from the hash bucket chain, if hash exists */
      if (cp->hsize)
	unlink_htab_ent(cp, &cp->sets[set], repl);
      
      /* write back replaced block data */
      if (repl->status & CACHE_BLK_VALID)
	{
	  cp->replacements++;
	  
	  if (repl->status & CACHE_BLK_DIRTY)
	    {
	      do_writeback(-1, cp, CACHE_MK_BADDR(cp, repl->tag, CACHE_SET(cp, addr)), vorp, repl);
	    }
	}
      
      /* update block tags */
      repl->tag = tag;
      repl->status = CACHE_BLK_VALID | (is_write(cmd) ? CACHE_BLK_DIRTY : 0);
      repl->sb_valid = repl->sb_dirty = 0;
      
      if (vector)
	{
	  repl->sb_valid = vector;
	  if (is_write(cmd))
	    CACHE_SET_SB_BIT(sb, repl->sb_dirty);
	}

      /* link this entry back into the hash table */
      if (cp->hsize)
	link_htab_ent(cp, &cp->sets[set], repl);
      
#ifdef BALLOC
      if (cp->balloc)
	{
	  CACHE_BCOPY(cmd, repl, bofs, p, cp->bsize);
	}
#endif
      
    }
  /* Blow away last tagset so that we don't hit twice after a miss */
  cp->last_tagset = 0xffffffff;

  return 0;

 cache_hit:

  cp->hits++;

#ifdef BALLOC
  /* copy data out of cache block, if block exists */
  if (cp->balloc)
    {
      CACHE_BCOPY(cmd, blk, bofs, p, nbytes);
    }
#endif

  /* update dirty status */
  if (is_write(cmd))
    blk->status |= CACHE_BLK_DIRTY;

  if (IS_BLOCK_SUBBLOCKED(blk))
    {
      if (is_write(cmd))
	CACHE_SET_SB_BIT(sb, blk->sb_dirty);
    } 

  /* prefetch if you will */
  if (cp->prefetch && !(blk->status & CACHE_BLK_TAG))
    {	
      do_prefetch(now, cp, baddr + cp->bsize, vorp, set);

      /* set the tag prefetch */
      blk->status |= CACHE_BLK_TAG;
    } 

  /* if LRU replacement and this is not the first element of list, reorder */
  if (blk->way_prev && cp->policy == LRU)
  {
    /* move this block to head of the way (MRU) list */
    update_way_list(&cp->sets[set], blk, Head);
  }

  /* tag is unchanged, so hash links (if they exist) are still valid */

  /* record the last block to hit */
  cp->last_tagset = CACHE_TAGSET(cp, addr);
  cp->last_blk = blk;

  return 0;

 cache_fast_hit: /* fast hit handler */
  
  cp->hits++;

#ifdef BALLOC
  /* copy data out of cache block, if block exists */
  if (cp->balloc)
    {
      CACHE_BCOPY(cmd, blk, bofs, p, nbytes);
    }
#endif

  /* update dirty status */
  if (is_write(cmd))
    blk->status |= CACHE_BLK_DIRTY;

  if (IS_BLOCK_SUBBLOCKED(blk))
    {
      if (is_write(cmd))
	CACHE_SET_SB_BIT(sb, blk->sb_dirty);
    } 

  /* prefetch if you will */
  if (cp->prefetch && !(blk->status & CACHE_BLK_TAG))
    {	
      do_prefetch(-1, cp, baddr + cp->bsize, vorp, set);

      /* set the tag prefetch */
      blk->status |= CACHE_BLK_TAG;
    } 

  /* this block hit last, no change in the way list */
  /* tag is unchanged, so hash links (if they exist) are still valid */

  return 0;
}

/* flush the entire cache, returns latency of the operation */
unsigned int				/* latency of the flush operation */
cache_func_flush(struct cache *cp,		/* cache instance to flush */
		 SS_TIME_TYPE now)		/* time of cache flush */
{
  int i; /* min latency to probe cache */
  struct cache_blk *blk;
  enum trans_cmd vorp = (cp->trans == VIVT) ? Virtual : Physical;

  /* blow away the last block to hit */
  cp->last_tagset = 0;
  cp->last_blk = NULL;

  /* no way list updates required because all blocks are being invalidated */
  for (i=0; i<cp->nsets; i++)
    {
      for (blk=cp->sets[i].way_head; blk; blk=blk->way_next)
	{
	  if (blk->status & CACHE_BLK_VALID)
	    {
	      cp->invalidations++;
	      blk->status &= ~CACHE_BLK_VALID;

	      if (blk->status & CACHE_BLK_DIRTY)
		{
		    do_writeback(-1, cp, CACHE_MK_BADDR(cp, blk->tag, blk->set), vorp, blk);
		    /* TODO FIGURE OUT		    assert((cp->trans == VIPT) || (blk->set == i)); */
		}
	    }
	}
    }

  return 0;
}

/* flush the block containing ADDR from the cache CP */
/* Does not currently support subblock flushing */
unsigned int				
cache_func_flush_addr(struct cache *cp,	/* cache instance to flush */
		      SS_ADDR_TYPE addr,	/* address of block to flush */
		      SS_TIME_TYPE now)	/* time of cache flush */
{
  SS_ADDR_TYPE tag = CACHE_TAG(cp, addr);
  SS_ADDR_TYPE set = CACHE_SET(cp, addr);
  struct cache_blk *blk;

  enum trans_cmd vorp = ((cp->trans == VIVT) ? Virtual : Physical);

  if (cp->hsize)
    {
      /* higly-associativity cache, access through the per-set hash tables */
      int hindex = CACHE_HASH(cp, tag);

      for (blk=cp->sets[set].hash[hindex];
	   blk;
	   blk=blk->hash_next)
	{
	  if (blk->tag == tag && (blk->status & CACHE_BLK_VALID))
	    break;
	}
    }
  else
    {
      /* low-associativity cache, linear search the way list */
      for (blk=cp->sets[set].way_head;
	   blk;
	   blk=blk->way_next)
	{
	  if (blk->tag == tag && (blk->status & CACHE_BLK_VALID))
	    break;
	}
    }

  if (blk)
    {
      cp->invalidations++;
      blk->status &= ~CACHE_BLK_VALID;

      /* blow away the last block to hit */
      cp->last_tagset = 0;
      cp->last_blk = NULL;

      if (blk->status & CACHE_BLK_DIRTY)
	{
	  do_writeback(-1, cp, CACHE_MK_BADDR(cp, blk->tag, blk->set), vorp, blk);
	  assert((cp->trans == VIPT) ? 1 : (blk->set == set));
	}
      /* move this block to tail of the way (LRU) list */
      update_way_list(&cp->sets[set], blk, Tail);
    }

  return 0;
}


