/*
 * rambus.c - rambus timing module source
 *
 * This file is a part of the SimpleScalar tool suite, written by
 * Todd M. Austin as a part of the Multiscalar Research Project.
 * The file has been substantially modified by Doug Burger, as a
 * part of the Galileo research project.  Alain Kagi has also 
 * contributed to this code.
 *  
 * The tool suite is currently maintained by Doug Burger and Todd M. Austin.
 * 
 * Copyright (C) 1994, 1995, 1996, 1997 by Todd M. Austin
 *
 * This source file is distributed "as is" in the hope that it will be
 * useful.  The tool set comes with no warranty, and no author or
 * distributor accepts any responsibility for the consequences of its
 * use. 
 * 
 * Everyone is granted permission to copy, modify and redistribute
 * this tool set under the following conditions:
 * 
 *    This source code is distributed for non-commercial use only. 
 *    Please contact the maintainer for restrictions applying to 
 *    commercial use.
 *
 *    Permission is granted to anyone to make or distribute copies
 *    of this source code, either as received or modified, in any
 *    medium, provided that all copyright notices, permission and
 *    nonwarranty notices are preserved, and that the distributor
 *    grants the recipient permission for further redistribution as
 *    permitted by this document.
 *
 *    Permission is granted to distribute this file in compiled
 *    or executable form under the same conditions that apply for
 *    source code, provided that either:
 *
 *    A. it is accompanied by the corresponding machine-readable
 *       source code,
 *    B. it is accompanied by a written offer, with no time limit,
 *       to give anyone a machine-readable copy of the corresponding
 *       source code in return for reimbursement of the cost of
 *       distribution.  This written offer must permit verbatim
 *       duplication by anyone, or
 *    C. it is distributed by someone who received only the
 *       executable form, and is accompanied by a copy of the
 *       written offer of source code that they received concurrently.
 *
 * In other words, you are welcome to use, share and improve this
 * source file.  You are forbidden to forbid anyone else to use, share
 * and improve what you give them.
 *
 * INTERNET: dburger@cs.wisc.edu
 * US Mail:  1210 W. Dayton Street, Madison, WI 53706
 *
 * This file was written by Wei-fen Lin at the University of Michigan
 *
 * $Id: rambus.c,v 1.1.4.1 2000/04/03 19:48:06 hrishi Exp $
 *
 * Revision 1.1  1998/10/04  18:50:23  dburger
 * Initial revision
 *
 *
 */

#include <stdlib.h>
#include <assert.h>
#include "cache.h"
#include "misc.h"
#include "stats.h"
#include "tlb.h"

/* Forward prototypes */
void QUEUE_Init(QUEUE *qu); 
QITEM *GetFreeQITEM(void);
void QUEUE_Add(QUEUE *qu,int di,int bi, int ri, int ci,int size,
					int type, SS_TIME_TYPE endtime,SS_TIME_TYPE	now); 
QITEM *QUEUE_Find(QUEUE *qu,int di, int bi, int ri, int ci);
void QUEUE_Delete(QUEUE *qu,QITEM *Item);
void QUEUE_DeleteDoneItem(QUEUE *qu,SS_TIME_TYPE time);
void RDRAM_DEVICE_Init(RDRAM_DEVICE *rd);
RELATION RAMBUS_ComparePreAccess(_RAMBUS *rb,int di,int bi, int ri);
SS_TIME_TYPE RAMBUS_GetServiceTime(_RAMBUS *rb,int size, int type,RELATION r);

/*-----------------------------
		Queue functions
------------------------------*/


void QUEUE_Init(QUEUE *qu){	
	qu->Head=NULL; 
	qu->Tail=NULL;
	qu->Size=0;
}

QITEM *GetFreeQITEM(void){

  int i;
  static int pt=0;
  static QITEM *buf[200];
  QITEM *temp;
  
  if(pt==0){
  	  
	  memset(buf, 0, sizeof(QITEM *)*200);	
	  for ( i=0 ; i< 200; i++){
	  		buf[i]=(QITEM *)malloc(sizeof(QITEM));	
  	  		assert(buf[i]!=NULL);
	  }
  }  
  
  temp=buf[pt];
  pt=(pt+1)%200;
  return temp;

}

void QUEUE_Add(QUEUE *qu,int di,int bi, int ri,int ci, int size,
						int type, SS_TIME_TYPE endtime,SS_TIME_TYPE now){ 

	QITEM *NewItem,*temp;
	
	/* the queue is full */
		
	if((temp=QUEUE_Find(qu,di,bi,ri,ci))){	
		return;
	}
	
	NewItem=GetFreeQITEM();
	assert(NewItem!=NULL);

	NewItem->DevIndex=di;
	NewItem->BankIndex=bi;
	NewItem->RowIndex=ri;
	NewItem->ColIndex=ci;
	NewItem->Size=size;
	NewItem->EndTime=endtime;
	NewItem->Type=type;
	NewItem->Next=NULL;
	
	/* if the queue is empty */
	if(qu->Head==NULL){
		qu->Head=NewItem;
		qu->Tail=NewItem;
	}
	else{
		qu->Tail->Next=NewItem;
		qu->Tail=NewItem;		
	} 
	qu->Size++;
	

}

QITEM* QUEUE_Find(QUEUE *qu,int di, int bi, int ri, int ci){
	QITEM *temp;
	
	temp=qu->Head;
	while(temp){
		if(temp->DevIndex==di && temp->BankIndex==bi &&
			temp->RowIndex==ri && temp->ColIndex==ci)
			return temp;	
		temp=temp->Next;
	}
	
	return NULL;	
}

void QUEUE_Delete(QUEUE *qu,QITEM *Item){

	QITEM *temp=NULL,*pre=NULL;
	
	temp=qu->Head;
	while(temp){
				
		if(temp==Item){
			/* if item is Head */
			if(temp==qu->Head){				
					qu->Head=temp->Next;
					if(temp==qu->Tail)
						qu->Tail=NULL;
			}
			/* if item is Tail */
			else if(temp==qu->Tail){
				qu->Tail=pre;
				qu->Tail->Next=NULL;
			}
			/* if item is in the middle */
			else
		 		pre->Next=temp->Next;
				
			free(temp);
			qu->Size--;
			return ;
		}
		pre=temp;
		temp=temp->Next;
	}
}

void QUEUE_DeleteDoneItem(QUEUE *qu,SS_TIME_TYPE time){

	QITEM *temp,*item;

	temp=qu->Head;
	
	while(temp){
		if(temp->EndTime < time){
			item=temp;
			temp=temp->Next;
			QUEUE_Delete(qu,item);
		}
		else
			temp=temp->Next;
	}
}



/*-----------------------------
		RDRAM functions		
-------------------------------*/



/* Called when creating a RDRAM device. */

void RDRAM_DEVICE_Init(RDRAM_DEVICE *rd){

	int i;
	
	rd->RAmp=(RDRAM_SENSEAMP *)calloc(9,sizeof(RDRAM_SENSEAMP));
	rd->LAmp=(RDRAM_SENSEAMP *)calloc(8,sizeof(RDRAM_SENSEAMP));
	assert( rd->RAmp!=NULL && rd->LAmp!=NULL);
	
	/* initilize senseamp to Idle */
	
	for( i=0; i<9; i++)
		rd->RAmp[i].Status=IDLE;
			
	for( i=0; i<8; i++)
		rd->LAmp[i].Status=IDLE;
	
}

/* Called when creating a RDRAM. 
   DevNo is the number of devices in the RDRAM */

void rambus_init(_RAMBUS *rb, int cycle_ratio){

	int i;
	
	
	/*initialize rambus configuration */
		
	rb->No_of_Device=rdram_device_per_channel;
	
	rb->Device=(RDRAM_DEVICE *)malloc(rdram_device_per_channel*sizeof(RDRAM_DEVICE));
	assert(rb->Device!=NULL);
	
	/* initialize each device */
	for( i = 0 ; i < rdram_device_per_channel ; i++)
		RDRAM_DEVICE_Init(&(rb->Device[i]));
		
	/* Set up the process queue */	
	rb->Process=(QUEUE *)malloc(sizeof(QUEUE));
	assert(rb->Process!=NULL);
	QUEUE_Init(rb->Process);
	
	/* initialize the previous access */

	rb->PreAccess.EndTime=0;
	rb->PreAccess.DevIndex=-1;
	
	/* initialize the timing parameters */
	rb->tRC = _tRC * cycle_ratio;
	rb->tRAS = _tRAS * cycle_ratio;
	rb->tPACKET = _tPACKET * cycle_ratio;
	rb->tRCD = _tRCD * cycle_ratio;
	rb->tCAC = _tCAC * cycle_ratio;
	rb->tRDP = _tRDP * cycle_ratio;
	rb->tRP = _tRP * cycle_ratio;
	rb->tCWD = _tCWD * cycle_ratio;
			

	/*  intialize the statistics */
	rb->Stats.ReadRefs=0; rb->Stats.WriteRefs=0;
	rb->Stats.TlbRefs=0; rb->Stats.PreRefs=0;
	rb->Stats.ReadHits=0; rb->Stats.WriteHits=0;
	rb->Stats.IdleTime=0;
	rb->Stats.AveReadQueuingTime=0;
	rb->Stats.AveWriteQueuingTime=0;
	for(i=0; i<SAMEDEV_SAMEBANK_SAMEROW+1 ; i++)
		rb->Stats.Access[i]=0;
}

/* Return the relation to the previous access to decide the interleaving
    time.
*/

RELATION RAMBUS_ComparePreAccess(_RAMBUS *rb,int di,int bi, int ri){

	if(rb->PreAccess.DevIndex==-1)
		return RDRAMFIRSTACCESS;
		
	if(rb->Device[di].RAmp[(bi+1)/2].Status==IDLE &&
		rb->Device[di].LAmp[bi/2].Status==IDLE)
		return SAFIRSTACCESS;
		
	if(rb->PreAccess.DevIndex!=di && rb->Device[di].RAmp[(bi+1)/2].Status==BANK0
									  && rb->Device[di].LAmp[bi/2].Status==BANK1)
		return DIFFDEV_SAACT;
		
	if(rb->PreAccess.DevIndex!=di)
		return DIFFDEV_SANONACT;
		
	if(rb->PreAccess.BankIndex==bi+1 || rb->PreAccess.BankIndex==bi-1 )
		return SAMEDEV_ADJBANK;	
		
	if(rb->PreAccess.BankIndex== bi && rb->PreAccess.RowIndex==ri)
		return SAMEDEV_SAMEBANK_SAMEROW;
		
	if(rb->PreAccess.BankIndex== bi && rb->PreAccess.RowIndex!=ri)
		return SAMEDEV_SAMEBANK_DIFFROW;		
		
	if(rb->Device[di].RAmp[(bi+1)/2].Status==BANK0
		&& rb->Device[di].LAmp[bi/2].Status==BANK1)
		return SAMEDEV_NONADJBANK_SAACT;
		
	return SAMEDEV_NONADJBANK_SANONACT;

}

/* This function returns the time that RDRAM actually processes the access.
   The 1st argument is the size of the packet. 
	The 2nd argument is the type of the packet.
	The 3rd argument is the relation to the previous access.
*/
SS_TIME_TYPE RAMBUS_GetServiceTime(_RAMBUS *rb,int size, int type,RELATION r){

	/* Each dualot is 16 bytes. A dualoct needs a column packet. */
	
	int no_cols=(size%(16*rdram_channel_num)==0 ? 
		size/(16*rdram_channel_num): size/(16*rdram_channel_num)+1);
	
	/* In this version, we don't distinguish Read and Write. In other words,
	   we ignore the restore paket in Write. However, to get a more precise
		model we should consider the difference between Read and Write */

	switch(r){
	
		/* Including an ACT packet, no_cols RD row pakets, and no_cols column
		   packets. With the default setting, it takes 29 RAMBUS cycles */
		case RDRAMFIRSTACCESS:
		case SAFIRSTACCESS:
			return rb->tPACKET*(no_cols+1)+rb->tRCD+rb->tCAC;
		
		/* Including a PERP packet, ACT packet,no_cols RD row pakets, 
			and no_cols column packets. 
			With the default setting, it takes 37 RAMBUS cycles */	
		case DIFFDEV_SANONACT:
		case SAMEDEV_NONADJBANK_SANONACT:
			return rb->tRDP+rb->tRP+rb->tPACKET*no_cols+rb->tRCD+rb->tCAC;
			
		/* Including no_cols RD row pakets, and no_cols column
		   packets. With the default setting, it takes 20 RAMBUS cycles */	
		case DIFFDEV_SAACT:
		case SAMEDEV_NONADJBANK_SAACT:
		case SAMEDEV_SAMEBANK_SAMEROW:
			return rb->tPACKET*(no_cols+1)+rb->tCAC;
			
		/* Including a PERP packet, ACT packet,no_cols RD row pakets, 
			and no_cols column packets. 
			With the default setting, it takes 37 RAMBUS cycles */	
		case SAMEDEV_SAMEBANK_DIFFROW:
		case SAMEDEV_ADJBANK:
			return rb->tRDP+rb->tRP+rb->tPACKET*no_cols+rb->tRCD+rb->tCAC;
	}

	fatal("Control shouldn't reach this point in RAMBUS_getservicetime");
	return 0;
}


/* This function return the latency for an access to RDRAM including 
   the time of staying in the queue and the time of getting served.
*/

#define MASK(one,zero) (((1<<(one))-1)<<(zero))
	

SS_TIME_TYPE rambus_access(_RAMBUS *rb,int addr,int size, int type, SS_TIME_TYPE now){

	int di,bi,ri,ci;
	SS_TIME_TYPE lat=0,servicetime;
	int waittime=0,overlap=0;
	RELATION relation;
	int no_cols=(size%16==0 ? size/16: size/16+1);
	//QITEM *temp;
	int cmd;
	int channel_shift=log_base2(rdram_channel_num);
	int device_shift=log_base2(rdram_device_per_channel);
	
	/* Layout the address. 
		The adjancent row is in different device.
		If rdram_channel_num is more than 1, the momery is lnterleved among
		channels. And all the channels are syncronized. So it is treated as
		a channel with larger bandwidth, larger capacity in a senseamp, 
		and a dualot*/
		
	/* column index // 2^(4+channel_shift) bytes in a duallot */
	ci=(addr & MASK(6,4+channel_shift)  ) >>(4+channel_shift)  ; 
	
	/* device index // 2^6 columns in each row  */
	di=(addr & MASK(device_shift,10+channel_shift) ) >>(10+channel_shift) ; 
	
	/* bank index // 2^(device_shift) devices in each RDRAM*/
	bi=(addr & MASK(4,device_shift+10+channel_shift) ) >> (device_shift+10+channel_shift); 
	
	/* row index // 2^4 banks  in a device*/
	ri=(addr & MASK(9,14+device_shift+channel_shift ) )>> (14+device_shift+channel_shift) ;  

	/* Find out if the access matches any current access in RDRAM */
	/*	if((temp=QUEUE_Find(rb->Process,di,bi,ri,ci)))
		return temp->EndTime-now;
		*/

	/* Compare to the previous access */		
	relation=RAMBUS_ComparePreAccess(rb,di,bi,ri);		
		
	/* In this version, we don't distinguish Read and Write. In other words,
	we ignore the restore paket in Write. However, to get a more precise
	model we should consider the difference between Read and Write */

	/* calculate the overlapping time with previous access */
	switch(relation){
	
		/* First access' overlap is 0 */
		case RDRAMFIRSTACCESS:
			overlap=0;
			break;
			
		case SAFIRSTACCESS:		
			overlap=(no_cols <=2 ? RAMBUS_GetServiceTime(rb,size,type, relation)-2*rb->tPACKET 
										: rb->tPACKET+rb->tCAC+rb->tRCD);				
			break;		
		case DIFFDEV_SANONACT:
		case SAMEDEV_NONADJBANK_SANONACT:
			overlap=(no_cols <=4 ? RAMBUS_GetServiceTime(rb,size,type, relation)-2*rb->tPACKET
										: rb->tCAC+rb->tRCD+rb->tRDP+rb->tRP);
		
		case DIFFDEV_SAACT:
		case SAMEDEV_NONADJBANK_SAACT:
		case SAMEDEV_SAMEBANK_SAMEROW:
			overlap=rb->tCAC+rb->tPACKET;				
			break;
			
		case SAMEDEV_SAMEBANK_DIFFROW:
		case SAMEDEV_ADJBANK:
			overlap=(no_cols <=10 ? RAMBUS_GetServiceTime(rb,size,type,relation)-rb->tRAS
										 : rb->tRDP+rb->tRP+rb->tRCD+rb->tCAC);				
			break;
		
		/* This shouldn't happen */
		default: 
			printf("Bad relation to the previous access. This shouldn't happen.\n");
			break;
	}		

	servicetime=RAMBUS_GetServiceTime(rb,size,type,relation);
	
	/* Update RDRAM */
	rb->PreAccess.DevIndex=di;
	rb->PreAccess.BankIndex=bi;
	rb->PreAccess.RowIndex=ri;
	rb->PreAccess.Size=size;
	rb->PreAccess.Type=type;	
	
	/* Packet comes in before RDRAM can serve it*/
  	if(now <= rb->PreAccess.EndTime-overlap){
	  	rb->PreAccess.EndTime+=servicetime;
		lat=rb->PreAccess.EndTime-now;
		waittime=rb->PreAccess.EndTime-overlap-now;
	}
	/* Packet comes in when RDRAM are available to serve it.
	   However, there are still other pakets in the RDRAM in the meantime */
	else if( now <= rb->PreAccess.EndTime){		
		rb->PreAccess.EndTime=now+servicetime;
		lat=servicetime;
		waittime=0;
	}
		
	/* Packet comes in when RDRAM is idle and gets served */
	else{		
		rb->Stats.IdleTime+=now-rb->PreAccess.EndTime;
		rb->PreAccess.EndTime=now+servicetime;
		lat=servicetime;
		waittime=0;
	}
	
	/* put the access in queue */
/*	QUEUE_Add(rb->Process,di,bi,ri,ci,size,type,rb->PreAccess.EndTime,now);	*/
       	/* Delete all the finished accesses 
	QUEUE_DeleteDoneItem(rb->Process,now);*/
	
	/* Do statistics */
	cmd=type & access_mask;
	if(cmd==Read)
		 rb->Stats.ReadRefs+=1;
	else if(cmd==Write)
		 rb->Stats.WriteRefs+=1;
 	else if(cmd==Tlb)
		 rb->Stats.TlbRefs+=1;
	else if(cmd==Prefetch)
		 rb->Stats.PreRefs+=1;
	else
		 fatal("Cannot recognize the type in RDRAM.\n");
					
	if(rb->Device[di].RAmp[(bi+1)/2].Status==((bi%2==1)?BANK0:BANK1) &&
		rb->Device[di].RAmp[(bi+1)/2].RowIndex==ri &&
		rb->Device[di].LAmp[bi/2].Status==((bi%2==0)?BANK0:BANK1) &&
		rb->Device[di].LAmp[bi/2].RowIndex==ri){		
		if(cmd == Read || cmd == Tlb || cmd == Prefetch){
			rb->Stats.ReadHits+=1;
			rb->Stats.AveReadQueuingTime=(rb->Stats.AveReadQueuingTime*
			(rb->Stats.ReadRefs+rb->Stats.TlbRefs+rb->Stats.PreRefs-1)+waittime)
			/(rb->Stats.ReadRefs+rb->Stats.TlbRefs+rb->Stats.PreRefs);
		}
		else if(cmd==Write){
			rb->Stats.WriteHits+=1;				
			rb->Stats.AveWriteQueuingTime=(rb->Stats.AveWriteQueuingTime*
			(rb->Stats.WriteRefs-1)+waittime)/rb->Stats.WriteRefs;
		}
	}	
	else{/* update senseamps*/
		rb->Device[di].RAmp[(bi+1)/2].Status=((bi%2==1)?BANK0:BANK1);
		rb->Device[di].RAmp[(bi+1)/2].RowIndex=ri;
		rb->Device[di].LAmp[bi/2].Status=((bi%2==0)?BANK0:BANK1);
		rb->Device[di].LAmp[bi/2].RowIndex=ri;
	}		
	rb->Stats.Access[relation]+=1;
	
	
	/* return latency */
	return (lat);
}

void rambus_timing_reg_stats( _RAMBUS *rb, struct stat_sdb_t *sdb, char *name)
{
  char buf[512];

  sprintf(buf, "%s.ReadRefs", name);
  stat_reg_counter(sdb, buf, "total Read references to RAMBUS",
		   &(rb->Stats.ReadRefs),0, NULL);
  sprintf(buf, "%s.WriteRefs", name);
  stat_reg_counter(sdb, buf, "total Write references to RAMBUS",
		   &(rb->Stats.WriteRefs),0, NULL);
  sprintf(buf, "%s.TlbRefs", name);
  stat_reg_counter(sdb, buf, "total Tlb references to RAMBUS",
		   &(rb->Stats.WriteRefs),0, NULL);
  sprintf(buf, "%s.Prefetch", name);
  stat_reg_counter(sdb, buf, "total Prefetch references to RAMBUS",
		   &(rb->Stats.PreRefs),0, NULL);
  sprintf(buf, "%s.ReadHits", name);
  stat_reg_counter(sdb, buf, "total read Hits to RDRAM SenseAmps",
		   &(rb->Stats.ReadHits),0, NULL);	
  sprintf(buf, "%s.WriteHits", name);
  stat_reg_counter(sdb, buf, "total write Hits to RDRAM SenseAmps",
		   &(rb->Stats.WriteHits),0, NULL);   
  sprintf(buf, "%s.AveReadQueuingTime", name);
  stat_reg_counter(sdb, buf, "average queuing time for read",
		   &(rb->Stats.AveReadQueuingTime),0, NULL); 			
  sprintf(buf, "%s.AveWriteQueuingTime", name);
  stat_reg_counter(sdb, buf, "average queuing time for write",
		   &(rb->Stats.AveWriteQueuingTime),0, NULL); 
  sprintf(buf, "%s.IdleTime", name);
  stat_reg_counter(sdb, buf, "total idle time in RAMBUS",
		   &(rb->Stats.IdleTime),0, NULL);
  sprintf(buf, "%s.SAFIRSTACCESS", name);
  stat_reg_counter(sdb, buf, "first time touch the senseamp",
		   &(rb->Stats.Access[SAFIRSTACCESS]),0, NULL);
  sprintf(buf, "%s.DIFFDEV_SAACT", name);
  stat_reg_counter(sdb, buf, "different device, activated senseamp",
		   &(rb->Stats.Access[DIFFDEV_SAACT]),0, NULL);
  sprintf(buf, "%s.DIFFDEV_SANONACT", name);
  stat_reg_counter(sdb, buf, "different device, nonactivated senseamp",
		   &(rb->Stats.Access[DIFFDEV_SANONACT]),0, NULL);
  sprintf(buf, "%s.SAMEDEV_NONADJBANK_SAACT", name);
  stat_reg_counter(sdb, buf, "same device, nonadjacent bank,activated senseamp",
		   &(rb->Stats.Access[SAMEDEV_NONADJBANK_SAACT]),0,
		   NULL);
  sprintf(buf, "%s.SAMEDEV_NONADJBANK_SANONACT", name);
  stat_reg_counter(sdb, buf, "same device, nonadjacent bank,nonactivated senseamp",
		   &(rb->Stats.Access[SAMEDEV_NONADJBANK_SANONACT]),0, NULL);
  sprintf(buf, "%s.SAMEDEV_ADJBANK", name);
  stat_reg_counter(sdb, buf, "same device,adjacent bank ",
		   &(rb->Stats.Access[SAMEDEV_ADJBANK]),0, NULL);
  sprintf(buf, "%s.SAMEDEV_SAMEBANK_DIFFROW", name);
  stat_reg_counter(sdb, buf, "same device, same bank, different row ",
		   &(rb->Stats.Access[SAMEDEV_SAMEBANK_DIFFROW]),0,
		   NULL);
  sprintf(buf, "%s.SAMEDEV_SAMEBANK_SAMEROW", name);
  stat_reg_counter(sdb, buf, "same device, same bank, same row ",
		   &(rb->Stats.Access[SAMEDEV_SAMEBANK_SAMEROW]),0,
		   NULL);
}
















