package code;

//---------------------------------------------------------------------------
/* DataStore
 * 
 * Provide access to the locally stored data.
 *
 * Concurrency, locking
 * Right now, for simplicity, we use a single global condition variable
 * on which readers wait (a) for the object they are reading to become
 * precise or (b) for the object they are reading to become valid. This
 * means that we need to notifyall every time any interest set becomes
 * precise or any object becomes valid. This is potentially inefficient
 * (though not as bad as you might think, since in the common case
 * we should have few waiters at any given time.) 
 *
 * The fix is as follows. Move to java release 1.5 which has explicit
 * locks and condition variables. Create an array of 10-100 becameValid
 * locks; select a lock based on hash of objId (note: *not* <objId, range>
 * since the range logic would be tricky to match arbitrary ranges to the
 * right keys; this is OK -- we don't expect a large # of threads to
 * be waiting on different ranges of the same file.) Splitting the cv for
 * becamePrecise is a bit trickier, especially once we dynamically change
 * interest sets...for this we may just end up needing to keep a list
 * of waiting objIds and scan down the list to see who to signal...
 *
 * (C) Copyright 2004, 2005, 2006 -- See the file COPYRIGHT for additional details
 */
//---------------------------------------------------------------------------


import java.util.*;
import java.io.*;

import code.security.SangminConfig;

//---------------------------------------------------------------------------
// Local storage of data
//---------------------------------------------------------------------------
public class DataStore{

  protected static final boolean measureTime = false;

  // Buffer that can delay invalidates
  private InvalidateBuffer invalBuffer;

  // Persistent state
  RandomAccessState state;

  // Maintains lpvv per interest set.
  ISStatus isStatus;
  CounterVV cvv = new CounterVV();
  //maintains realVV:the lastest realtime stamps of each node this node
  // has learned.
  CounterVV realVV = new CounterVV();
  // Nontrivial to store (interest sets, upq) in a way that lets us
  // pull out all upq's that must be notified when a write happens.
  // Notifier just maintains a list of all interest sets and does a
  // linear search. Ideally, you would like to store it as a tree
  // with lists of upq's stored at each node of the tree.
  UPQNotifier upqNotifier;
  AcceptVV cpVV = null;

  NodeId myNodeId;
  private static boolean warnedWriteUPQ = false;
  private static long ISSTATUS_SHIP_DONE = 0xFF10FF14;
  public final static boolean PRINT_CP_INFO = false;
  public final static boolean printWriteObjectForCP = false;

  private final static boolean dbgNotify = false;
  private final static boolean dbgRead = false;
  private final static String ISSTATUS_CP_KEY = "ISSTATUS_CP_KEY";
  private final static String VV_CP_KEY = "VV_CP_KEY";
  private final static boolean dbgWrite = false;
  private final static boolean dbgPrecise= false;
  private final static boolean dbgPerformance = false;
  


  //----------------------------------------------------------------------
  // constructor -- recover checkpoint state from disk
  //----------------------------------------------------------------------
  public DataStore(String storePath, NodeId myNodeId_){


    this.myNodeId = myNodeId_;
    try{
      state = new RandomAccessState(storePath, 
                                    Config.getCacheSizeBytes(myNodeId));
    }
    catch(IOException e){
      e.printStackTrace();
      assert(false);
      return;
    }
    recoverISStatusFromRAS();
    recoverVVFromRAS();

    // Notifier is initially empty
    upqNotifier = new UPQNotifier();

    printDataStoreLimitations();
  }


  //----------------------------------------------------------------------
  // constructor -- with option to choose nullRAS (For testing only)
  //----------------------------------------------------------------------
  public DataStore(String storePath, NodeId myNodeId_, boolean nullRAS){
    

    this.myNodeId = myNodeId_;
    try{
      if(!nullRAS){
	state = new RandomAccessState(storePath, 
				      Config.getCacheSizeBytes(myNodeId));
      }else {
	state = new NullRandomAccessState();

      }
    }
    catch(IOException e){
      e.printStackTrace();
      assert(false);
      return;
    }
    recoverISStatusFromRAS();
    recoverVVFromRAS();

    // Notifier is initially empty
    upqNotifier = new UPQNotifier();

    printDataStoreLimitations();
  }









  //----------------------------------------------------------------------
  // Constructor-helper -- recover ISStatus from RAS. If RAS does not
  // include ISStatus, then no checkpoint is stored so generate ISStatus
  // from scratch.
  //----------------------------------------------------------------------
  private void recoverISStatusFromRAS(){
    try{
      byte[] cp = state.getAuxCheckpointStuff(ISSTATUS_CP_KEY);
      ByteArrayInputStream bais = new ByteArrayInputStream(cp);
      ObjectInputStream ois = new ObjectInputStream(bais);
      isStatus = (ISStatus)ois.readObject(); 
    }
    catch(NoSuchEntryException nsee){
      //
      // No checkpoints stored -- start ISStatus from beginning
      // of time.
      //
      createInitialISStatus();
    }
    catch(IOException ioe){
      //
      // Database is corrupt. Halt and catch fire.
      //
      ioe.printStackTrace();
      assert(false);
      System.exit(-1);
    }
    catch(ClassNotFoundException cnfe){
      //
      // Program bug. Halt and catch fire.
      //
      cnfe.printStackTrace();
      assert(false);
      System.exit(-1);
    }
  }

  //----------------------------------------------------------------------
  // Constructor-helper -- no ISStatus was in RAS, so create one
  // from scratch -- initialize to "/ with lpvv=start of time".
  //----------------------------------------------------------------------
  private void createInitialISStatus(){
    AllPreciseSets allPS = null;
    PreciseSet rootPS = null;
    InterestRegion ir = null;
    rootPS = new PreciseSet("", "");
    ir = Config.getInterestSet(myNodeId);
    ir.makeMirrorPS(rootPS);
    allPS = new AllPreciseSets(rootPS);
    isStatus = new ISStatus(allPS, ir);
  }


	  
  //----------------------------------------------------------------------
  // Constructor-helper -- recover cpVV fom RAS and initialize cVV. 
  // If RAS does not include cpVV, then no checkpoint is stored so 
  // generate VVs from scratch.
  //----------------------------------------------------------------------
  private void recoverVVFromRAS(){
    try{
      byte[] cp = state.getAuxCheckpointStuff(VV_CP_KEY);
      ByteArrayInputStream bais = new ByteArrayInputStream(cp);
      ObjectInputStream ois = new ObjectInputStream(bais);
      cpVV = (AcceptVV)ois.readObject(); 
      cvv = new CounterVV(cpVV);
    }
    catch(NoSuchEntryException nsee){
      //
      // No checkpoints stored -- start VV from beginning
      // of time.
      //
      cvv = (CounterVV)CounterVV.makeVVAllNegatives();
      cpVV = cvv.cloneAcceptVV();
    }
    catch(IOException ioe){
      //
      // Database is corrupt. Halt and catch fire.
      //
      ioe.printStackTrace();
      assert(false);
      System.exit(-1);
    }
    catch(ClassNotFoundException cnfe){
      //
      // Program bug. Halt and catch fire.
      //
      cnfe.printStackTrace();
      assert(false);
      System.exit(-1);
    }
  }



  //----------------------------------------------------------------------
  // Reminders of TBDs
  //----------------------------------------------------------------------
  private void printDataStoreLimitations(){

    if(!warnedWriteUPQ){
      Env.performanceWarning("DataStore uses simple locking/condition variable scheme."
                             + " See comment at start of file for suggested fix.");


      Env.tbd("Local writes now go through applyOverlappingAtStartTime"
              + " and call notifyUpq(). Previously"
              + " we skipped this on the assumption that others would"
              + " receive a boundInval. But once the new receiver-read"
              + " code is in place, this is the right thing to do."
              + " (Plus, as we build more sophisticated unbind policies"
              + " the UPQ list and bound neighbors list will differ.");

      //
      // And another thing
      // 
      Env.tbd("DataStore issue:"
              + " MDD: I don't see how the caller (core) ensures that"
              + " causal order on DataStore is ensured. I see that we"
              + " wait until write is causal w/ log. But since Core"
              + " doesn't hold a lock across log and data store, just"
              + " b/c log is caught up doesn't mean data store is."
              + " Also, detecting conflicts requires that we only"
              + " apply invals to RandomAccessState when enclosing"
              + " interest set is precise. How do we ensure that?"
              + " Does applyBody need to first wait until currentVV >= body?");

      warnedWriteUPQ = true;
    }

  }



  //----------------------------------------------------------------------
  // Attach an InvalidateBuffer to this object to make callbacks
  //----------------------------------------------------------------------
  public synchronized void
  setInvalBuffer(InvalidateBuffer newInvalBuffer){
    this.invalBuffer = newInvalBuffer;
  }

  //----------------------------------------------------------------------
  // Return the attached InvalidateBuffer instance
  //----------------------------------------------------------------------
  public synchronized InvalidateBuffer
  getInvalBuffer(){
    return(this.invalBuffer);
  }


  //-------------------------------------------------------------------
  // Return the component-wise minimum LPVV of the LPVVs of all
  // PreciseSets that overlap this SubscriptionSet. We expose this
  // interface via DataStore because controllers and policies might
  // want to know about ISStatus; note that ISStatus has no lock
  // and replies on DataStore for lock, so we have to go through
  // DataStore for synchronization.
  //-------------------------------------------------------------------
  public synchronized AcceptVV getISStatusMinLpVV(SubscriptionSet ss){
    return (AcceptVV)this.isStatus.getMinLpVV(ss).clone();
  }


  //----------------------------------------------------------------------
  // close()
  //----------------------------------------------------------------------
  public synchronized void 
  close(){
    try{
      syncToDisk();
    }
    catch(IOException ioe){
      System.err.println("Uh-oh -- DataStore failed to sync to disk on shutdown...");
    }
    state.close();
    isStatus = null;
  }

  //----------------------------------------------------------------------
  // make isstatus and state persistent
  //----------------------------------------------------------------------
  public synchronized void
  syncToDisk()
    throws IOException {
    //
    // Sync per object state
    //
    state.sync();

    //
    // Sync ISStatus
    //
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    ObjectOutputStream oos = new ObjectOutputStream(baos);
    oos.writeObject(isStatus);
    oos.close();
    byte[] isCp = baos.toByteArray();
    state.putAuxCheckpointStuff(ISSTATUS_CP_KEY, isCp);

    // 
    // Sync cpVV
    // Note -- this needs to hit disk *last* since
    // it tells us where to start replaying the redo
    // log on recovery.
    //
    this.cpVV = cvv.cloneAcceptVV();
    baos = new ByteArrayOutputStream();
    oos = new ObjectOutputStream(baos);
    oos.writeObject(this.cpVV);
    byte[] vvCp = baos.toByteArray();
    state.putAuxCheckpointStuff(VV_CP_KEY, vvCp);
  }

  //----------------------------------------------------------------
  // get the datastore checkpoint VV
  //----------------------------------------------------------------  
  public synchronized AcceptVV getCPVV(){
    return this.cpVV;
  }

  
  //----------------------------------------------------------------
  // checks if the object is valid
  //----------------------------------------------------------------  
  public synchronized boolean
  isValid(ObjId objId, long offset, long length, boolean exactOffset)
    throws ObjNotFoundException, IOException, EOFException, ReadOfHoleException{
    try{
      state.read(objId, offset, length, exactOffset);
      return true;
    }catch(ReadOfInvalidRangeException e){
      return false;
    }catch(NoSuchEntryException nse){
      throw new ObjNotFoundException();
    }catch(ReadOfEmbargoedWriteException rewe){
      return false;
    }
  }

  //----------------------------------------------------------------
  // Checks if the object is precise
  //----------------------------------------------------------------  
  public synchronized boolean
  isPrecise(ObjId objId){
    String path = objId.getPath();
    AcceptVV objLpVV = isStatus.getLpVV(path);
    return objLpVV.includes(this.cvv);
  }

  //----------------------------------------------------------------
  // Checks if the object is precise
  //----------------------------------------------------------------  
  public synchronized boolean
  isPrecise(SubscriptionSet ss){
    return isStatus.getMinLpVV(ss).includes(this.cvv);
  }
  
  //--------------------------------------------------------------------------
  // this read interface is the same as below except it supports temporal Error
  //------------------------------------------------------------------------
  public synchronized BodyMsg
  read(ObjId objId, long offset, long length, 
       boolean blockInvalid, boolean blockImprecise, boolean exactOffset, 
       long maxTE, long timeout)
    throws ObjNotFoundException, IOException, EOFException, ReadOfInvalidRangeException,
  ReadOfHoleException{
    return this.read(objId, offset, length, blockInvalid, blockImprecise, 
                     exactOffset, maxTE, null, timeout);
  }

  //--------------------------------------------------------------------------
  // this read interface is the same as below except it supports temporal Error
  //------------------------------------------------------------------------
  public synchronized BodyMsg
  read(ObjId objId, long offset, long length, 
       boolean blockInvalid, boolean blockImprecise, boolean exactOffset, 
       long maxTE,
       Controller controller, long timeout)
    throws ObjNotFoundException, IOException, EOFException, ReadOfInvalidRangeException,
  ReadOfHoleException{
    assert timeout == -1: "timeout parameter not supported yet to coexist with maxTE see"
      +"case-studies/Coda/tbd.txt";
    boolean dbg = false;
    long readTime = System.currentTimeMillis();
    while(realVV.getMinTimeStamp() < readTime-maxTE){
      try{
        if(dbg){
          System.err.println("wait because of TE:" + (readTime-maxTE) );
        }
        wait();
      }catch (InterruptedException ie){
        Env.sprinterrln("read waiting for temporal error is interrupted !");
      }
    }
    if(dbg){
      System.err.println("start to read at: " + System.currentTimeMillis());
    }
    
    return this.read(objId, offset, length, 
                     blockInvalid, blockImprecise, 
                     exactOffset, controller, timeout);
  }
  
//----------------------------------------------------------------------
// read() -- read up to <length> bytes starting from <objId, offset>
// 
// Returns
//   If the object exists and there is at least one valid byte at <objId, offset>
//   we return between 1 and <length> bytes. Notice that we may return fewer
//   bytes than requested.
//
//   If the object does not exist, throw ObjNotFoundException
//
//   If there is a low-level IO error or database error, throw IOException
//
//   Otherwise
//
//   If <blockInvalid> is true, then do not return until the specified range
//   is valid. (If <blockInvalid> is false and the byte at <objId, offse>
//   is invalid, throw ReadOfInvalidRangeException.)
//
//   If <blockImprecise> is true, then do not return until the specified
//   object "is precise."
//
//   If offset is past the last byte of the file, throw EOFException
//
//   Notice that we never return null. 
//
//   Note that a read of a "hole" returns a 1-byte result with value 0.
//   (A hole is created when an offset x is not written but some offset
//   y>x is written.)
//
// Concurrency:
//   We wait in two cases: (1) blockInvalid is true and the data is not
//   valid and (2) blockImprecise is true and the enclosing interest set
//   is not precise. We use a single "while" loop to test/retest both
//   conditions on each retry of the read.
//
// Events generated:
//   If we block, tell the invalBuffer that we are blocked so it 
//   can hurry to let the things we care about through.
//
//   If we block for imprecise, generate an event for the controller
//   ("go get me the data needed to make me precise.")
// 
//   If we block for invalid, generate an event for the controller
//   ("go get me the body to make me valid.")
//
//   Note that *multiple* instances of each event may be generated 
//   for a single read as spurious signals wake us up, have us retry
//   the read, and generate an event saying "I'm still waiting."
//   In an earlier implemenation, we only generated one event
//   per read. The problem with that is that the "finish line" can
//   move. E.g., some new invalidations could arrive so that
//   the body we fetched to free an invalid read isn't good
//   enough any more. Implication: the controller and inval
//   buffer should filter out redundant events. (This makes
//   sense since we could have multiple reads anyhow...)
//----------------------------------------------------------------------
  public synchronized BodyMsg
  read(ObjId objId, long offset, long length, 
       boolean blockInvalid, boolean blockImprecise, boolean exactOffset,
       long timeout)
    throws ObjNotFoundException, IOException, EOFException,
	   ReadOfInvalidRangeException, ReadOfHoleException{
    return this.read(objId, offset, length, blockInvalid, blockImprecise, 
                     exactOffset,
                     null,
		     timeout);
  }

  public synchronized BodyMsg
  read(ObjId objId, long offset, long length, 
       boolean blockInvalid, boolean blockImprecise, boolean exactOffset,
       Controller controller, long readtimeout)
    throws ObjNotFoundException, IOException, EOFException,
	   ReadOfInvalidRangeException, ReadOfHoleException{
    String token = null;
    // keep track of whether the controlelr has been notified
    boolean notifiedImprecise = false; 
    boolean notifiedInvalid = false;
    
    try{
      if(dbgRead){
        System.err.println("datastore reads object" + objId);
      }
      BodyMsg bodyMsg;
      //Env.sprintln("Node[" + this.myNodeId + 
      //             "]::store going to read " + objId);


      
      //----->added by zjd for adding timeout
      
      
      long endTime;
      if(readtimeout >0){
	endTime = System.currentTimeMillis() + readtimeout;
      }else{//timeout <0 ==> block for ever if no value to return
	endTime = Long.MAX_VALUE;
      }

      while(System.currentTimeMillis() < endTime){//retry if not timeout
	
	
	//<-------end by zjd

	//while(true){ // Keep going until both conditions satisfied

        //
        // First condition: Check to see if interest set is precise
        //
        if(blockImprecise){
          if(!liesInPreciseIS(objId)){
	    
            if(token == null){
              // Notify (only once) that we are blocked for this read
              token = objId + ", " + length + ", " + offset;
              this.invalBuffer.notifyReadBlocked(token);
            }

            if (controller != null && !notifiedImprecise){
              controller.informLocalReadImprecise(objId, offset, length);
              notifiedImprecise = true;
            }
            try{
              if(dbgRead){
                System.err.println("Node[" + this.myNodeId + 
                                   "] wait because of Imprecise for " + objId);
              }
              //See comment at start of file if you want to split cv.
              if(readtimeout > 0){
                this.wait(readtimeout);
              } else {
                this.wait();
              }

              if(dbgRead){
                System.err.println("Node[" + this.myNodeId + 
                                   "] stop waiting for Imprecise for " +
                                   objId);
              }
            }catch(InterruptedException e){
              // No problemo
            }
            continue;    // On wakeup, go to start of loop and try again
          }
        }

        try{
          if(dbgRead){
            Env.sprintln("Node[" + this.myNodeId + 
                         "]::RandomAccessState going to read " + objId
                         + "(liesInPreciseIS = " + liesInPreciseIS(objId));
          }
          bodyMsg = state.read(objId, offset, length, exactOffset);
          if(dbgRead){
            Env.sprintln("Node[" + this.myNodeId + 
                         "]::store:read " + objId + " return");
          }
          assert(bodyMsg != null);
          return bodyMsg;
        }catch(ReadOfInvalidRangeException roire){
          //
          // Second condition: Is object valid?
          if(blockInvalid){
            if(token == null){
              // Notify (only once) that we are blocked for this read
              token = objId + ", " + length + ", " + offset;
              this.invalBuffer.notifyReadBlocked(token);
            }
  
            if(controller != null && !notifiedInvalid){
              controller.informLocalReadInvalid(objId, offset, length,
                                                roire.getAcceptStamp());
              notifiedInvalid = true;
            }
            try{
              if(dbgRead){
                System.err.println("Node[" + this.myNodeId + 
                                   "] wait because of InValid for " + objId);
              }

	      assert ((readtimeout>0) || ((readtimeout <= 0) && (endTime == Long.MAX_VALUE)));
              //See comment at start of file if you want to split cv.
              if(readtimeout >0) {
                this.wait(readtimeout);
              }else {
                this.wait();
              }

              if(dbgRead){
                System.err.println("Node[" + this.myNodeId + 
                                   "] stop waiting InValid for " + objId);
              }
            }catch(InterruptedException e){
              // No problemo
            }
            continue;    // On wakeup, go to start of loop and try again.
          }else{
            throw roire;
          }
        }catch(ReadOfEmbargoedWriteException rewe){
          if(token == null){
            // Notify only once that we are blocked for this read
            token = objId + ", " + length + ", " + offset;
            this.invalBuffer.notifyReadBlocked(token);
          }
	  try{
            if(dbgRead){
              System.err.println("Node[" + this.myNodeId + 
                                 "] wait because of embargoed for " + objId);
            }

	    assert ((readtimeout>0) || ((readtimeout <= 0) && (endTime == Long.MAX_VALUE)));
            if(readtimeout >0) {
              this.wait(readtimeout);
            }else {
              this.wait();
            }

            if(dbgRead){
              System.err.println("Node[" + this.myNodeId + 
                                 "] stop waiting enbargoed for " + objId);
            }
          }catch(InterruptedException e){
            // No problemo
          }
          continue;    // On wakeup, go to start of loop and try again.
        }catch(EOFException eof){
          throw eof;
        }catch(NoSuchEntryException nse){
          throw new ObjNotFoundException();
        }
      }//while not timeout

      //----->added by zjd for adding timeout
      assert System.currentTimeMillis() >= endTime;
      throw new ReadTimeoutException("DataStore:: read timeout for " 
				     + " objId = " + objId + " offset= " + offset
				     + " length= " + length
				     + " blockInvalid=" + blockInvalid
				     + " blockImprecise=" + blockImprecise
				     + " readtimeout= " + readtimeout);
      //----->added by zjd for adding timeout
    }finally{
      if(token != null){
        // Let the InvalidateBuffer know that this read has finished
        this.invalBuffer.notifyReadUnblocked(token);
      }
    }
  }

//---------------------------------------------------------------------------
// readMeta() -- read the metadata for the specified offset of 
//                the specified file
//
// returns
//    PreciseInval on success
//    throws NoSuchObjectException if objId does not exist (includes
//         case where delete of objId is more recent than any write
//         to objId)
//    throws EOFException if offset is larger than any write to
//         objId (since the last delete)
//---------------------------------------------------------------------------
  public synchronized PreciseInv
  readMeta(ObjId objId, long offset)
    throws NoSuchEntryException, EOFException, ReadOfHoleException{
    return state.readMeta(objId, offset);
  }	

  //------------------------------------------------------------------------
  // apply DebargoMsg
  //------------------------------------------------------------------------
  public synchronized void applyDebargo(DebargoMsg dm)
    throws IOException{
    if(state.debargo(dm)){
      notifyAll();
    }
  }
//----------------------------------------------------------------------
// MDD: On writes, update the notify code to signal on the new condition
// variables.
// Also, the need to call applyOverlappingAtStartTime and applyNonoverlappingAtEndTime
// before calling (local)write is a bit odd. Check that and see if interface
// can (a) be made clear and (b) can be made self-checking.
//
// Check applyOverlappingAtStartTime -- the "boundInval" v. "preciseInval" question
//
// handle causal order exception case in applyOverlappingAtStartTime
//
// check the nofifyUPQ(body) code overall (and especially in applyBody)
//----------------------------------------------------------------------
public synchronized void 
applyNonoverlappingAtEndTime(GeneralInv gi, VVMap startVVs) 
throws IOException, CausalOrderException{
   assert false; //old interface.
}


//----------------------------------------------------------------------
// applyOverlappingAtStartTime() -- update ISStatus for all precise sets
//   that overlap this invalidation. If the inval is precise or a delete
//   update local state.
// 
// This code for now completely ignore commit sequence numbers
//----------------------------------------------------------------------
public synchronized void 
applyOverlappingAtStartTime(GeneralInv gi, VVMap startVVs) 
throws IOException, CausalOrderException{
   assert false; //old interface.
   /*
	MultiObjPreciseInv mopi = null;
ObjInvalTarget oit = null;

//
// First update the interest set status
//
try {
if(this.isStatus.updateStatusOverlapping(gi, startVVs)){
this.notifyAll(); // Wake any read waiting for IS to become precise
}
}catch(CausalOrderException e) {
throw e;
}

//
// Now update the per-object state
//
if(!gi.isPrecise()){
return;
}


//
// Don't update per object state if 
// interest set is imprecise. Note that in older versions of code,
// we updated the local store even if the enclosing interest set
// is imprecise. Doing so did no harm since the isstatus was imprecise
// and so local store was not visible anyhow. Our hope was that it might 
// do some good by storing bodies of bound invalidations so that 
// later when we see the same inval to make us precise, we already
// have the data and don't need to refetch it. The reason we make this
// change is to allow the lower-level code to detect conflicts. See mikes
// notes in 2004/10 or 2004/9 (sorry, don't recall the exact date.)
//
//  if(!isPrecise){
//    if(!warnedDiscardBody){
//      Env.performanceWarning("DataStore throws away a body applied to imprecise."
//                             + " Optimization is to enqueue this message until"
//                             + " interest set is precise.");
//      warnedDiscardBody = true;
//    }
//    return;
//  }
if(!warnedDiscardBody){
Env.tbd("DataStore currently applies inval to imprecise."
+ " In order to detect conflicts, need to (a) discard "
+ " unbound invals applied to imprecise interest set "
+ " and (b) enqueue bound invals for later application "
+ " (when IS becomes precise.)");
warnedDiscardBody = true;
}

if(gi instanceof MultiObjPreciseInv){
this.applyMOPIAtStartTime((MultiObjPreciseInv)gi);
}else if(gi.isDelete()){
DeleteInv di = (DeleteInv)gi;
this.state.delete(di.getObjId(), di.getAcceptStamp());
return;
}
else{
assert(gi.isPrecise());
PreciseInv pi = (PreciseInv)gi;
//
// Regular write
//
long written = state.applyInval(pi);

if(written != RandomAccessState.NO_NEW_VALID_BYTES){
//
// We have a new write that subscribers or readers might care about.
//
assert(pi.isBound());
upqNotifier.notifyUpq((BoundInval)pi);
      
//
// Bytes of <obj> just became valid. Wake anyone waiting for valid read.
//
if(dbgNotify){
System.err.println("notify for " + gi);
}
notifyAll();
}
}

if(!warnedInefficientInval){
Env.tbd("DataStore currently does a notifyAll() for " +
"every invalidate message it receives so that it " +
"resumes every thread that is blocked trying to " +
"apply an object body");
warnedInefficientInval = true;
}
// TBD: Inefficient! Only call this method if there is an object
// body that we are trying to apply.
long oldMin = realVV.getMinTimeStamp();
realVV.advanceTimestamps(gi.getRTVV());
//not necessary notify here as it is notified anyway at last.
//   tbd: make monitor unique. as commented above, it's not efficient.
//if (realVV.getMinTimeStamp()>oldMin){
//	notifyAll();//-- notify read wait for Temporal Error
//}
//blocks wait for this notify:
// 1. read blocks invalid,
// 2. read blocks Temporal Error,
// 3. read blocks because of embargoed write
    
// Note: We call notifyAll() for all MultiObj writes
//   (even if they only delete data)
this.notifyAll();
*/
}
  private static boolean warnedDiscardBody = false;
  private static boolean warnedInefficientInval = false;

  //----------------------------------------------------------------------
  // Apply a MultiObjPreciseInv at start time
  // Note: Called only by a synchronized method
  //----------------------------------------------------------------------
  private void
  applyMOPIAtStartTime(MultiObjPreciseInv mopi){
    double priority = 0.0;
    ImmutableBytes ib = null;
    MOITBoundEntry[] boundEntries = null;
    PreciseInv pi = null;
    MultiObjInvalTarget moit = null;
    ObjInvalTarget oit = null;

    boundEntries = mopi.getBoundEntriesDangerous();
    moit = (MultiObjInvalTarget)mopi.getInvalTarget();
    for(MOITIterator iter = moit.getIterator(true); iter.hasNext();){
      oit = iter.getNext();
      ib = null;
      for(int i = 0; (i < boundEntries.length) && (ib == null); i++){
        if(boundEntries[i].getObjInvalTarget().equals(oit)){
          // This entry is bound
          priority = boundEntries[i].getPriority();
          ib = boundEntries[i].getImmutableBytes();
        }
      }
      if(ib != null){
        pi = new BoundInval(oit.getObjId(),
                            oit.getOffset(),
                            oit.getLength(),
                            mopi.getAcceptStamp(),
                            ib,
                            priority,
                            mopi.getRTAcceptStamp(),
                            mopi.isEmbargoed());
      }else{
        pi = new PreciseInv(oit,
                            mopi.getAcceptStamp(),
                            mopi.getRTAcceptStamp(),
                            mopi.isEmbargoed());
        assert(SangminConfig.securityLevel == SangminConfig.NONE);
      }
      long written = state.applyInval(pi);
      if(written != RandomAccessState.NO_NEW_VALID_BYTES){
        assert(pi.isBound());
        this.upqNotifier.notifyUpq((BoundInval)pi);
        if(dbgNotify){
          System.err.println("notify for " + mopi);
        }
	notifyAll();//notify for blockInvalid -- not necessary here as it is notified
	//at the end of this method anyway
      }
    }
    for(MOITIterator iter = moit.getIterator(false); iter.hasNext();){
      oit = iter.getNext();
      this.state.delete(oit.getObjId(), mopi.getAcceptStamp());
    }
  }


//----------------------------------------------------------------------
// applyBody() -- apply the specified body to the local store.
//   *ASSUMES* corresponding invalidation has already been successfully
//    applied. Returns the offset of the first newly-valid byte in the
//    specified file or NO_NEW_VALID_BYTES if this update fails to
//    make any new bytes valid.
//----------------------------------------------------------------------
  public synchronized long
  applyBody(BodyMsg body) 
    throws IOException{
    long written;
    long lc;

    // (1) Check/enforce invarients
    //
    // applyBody should not be called until corresponding inval
    // has been applied. Unfortunately, I fear current code doesn't
    // quite *ensure* this. There may be a race between applying
    // an inval to the log and applying it to the DataStore. So,
    // just because core.applyBody waits for prev inval to be applied
    // to log doesn't mean that when DataStore.applyBody has been called
    // the corresponding inval has been applied to the DataStore.
    //
    // We could fix this race for this routine by changing this
    // to while(lc < body.localClock){wait;} but that doesn't
    // fix the broader problem.
    //
    lc = state.getVV(body.getAcceptStamp().getNodeId());
    
    while (body.getAcceptStamp().getLocalClock() > lc){//something wrong
      try{
        wait();
      } catch (Exception e){
        System.err.println("Caught an exception... keep waiting");
      }
      
      lc = state.getVV(body.getAcceptStamp().getNodeId());
      
    }
    assert(body.getAcceptStamp().getLocalClock() <= lc);

    if(!warnedNotSupportConflictDetect){
      Env.tbd("DataStore currently applies body to imprecise IS."
              + " In order to detect conflicts, need to "
              + " enqueue bound invals for later application "
              + " (when IS becomes precise.)");
      warnedNotSupportConflictDetect = true;
    }
  
    // (2) update random access state
    written = state.applyBody(body);

    // (3) notify anyone waiting for valid bytes
    // (4) notify upq
    if(written != RandomAccessState.NO_NEW_VALID_BYTES){
      if(dbgNotify){
        System.err.println("notifyAll after apply " + body);
      }
      this.notifyAll();
      upqNotifier.notifyUpq(body);
    }else{

      if(dbgNotify){
        System.err.println("XXXXXXXXXXXXXXXXXXThere's no new valid bytes after applyBody " + body); 
      }

    }

    return written;

  }

  boolean warnedNotSupportConflictDetect = false;

  //----------------------------------------------------------------------
  // subscribeUpdates() -- 
  //   A node has asked us to send all future updates about is
  //   to them. From now on, whenever we update something in is,
  //   enqueue the update (objId, start, length) in upq
  //   and when we see an invalidation, cancel any pending
  //   sends of (objId, start, length) in upq 
  //----------------------------------------------------------------------
  public synchronized void
  subscribeUpdates(UpdatePriorityQueue upq, SubscriptionSet ss){
    //
    // append (is,upq) to IntSets. Note: there could be multiple upqs
    // for the same is. Just do linear search all the time?
  
    upqNotifier.add(ss, upq);
  }

  //----------------------------------------------------------------------
  // Cancel a prefetch request
  //----------------------------------------------------------------------
  public synchronized void
  cancelPrefetchRequest(ObjId id, long offset, long length, NodeId destId){
    upqNotifier.notifyUPQCancelPrefetch(id, offset, length, destId);
  }
  
//----------------------------------------------------------------------
// subscribeUpdates() -- in addition to subscribing to future updates
//      scan the stored data for objects in InterestSet is that are
//      newer than startVV
//
// NOTE: 
// This *public* routine is *not synchronized*. It relies on synchronization
// within upqNotifier::add.
// The reason we don't want to be synchronized is that scanning
// through all elements in random access state could take a long
// time -- we don't want to be holding the lock for so long! (That
// part really has to be async.)
//----------------------------------------------------------------------
  public void subscribeUpdates(UpdatePriorityQueue upq,
                               SubscriptionSet ss,
                               VV startVV){
    try{      
      // register (is, upq)
      upqNotifier.add(ss, upq);
      
      if(cvv.includes(startVV)) {
        // scan localstore if startVV is back in time      
        state.scanForUpdates(upq, ss, startVV);
      }
    }catch(Exception e) {
      assert false: e.toString();
    }
  }


  //----------------------------------------------------------------------
  // updates the UPQNotifier so that it will not put updates for 
  // a subscription set in the upq
  //----------------------------------------------------------------------
  public synchronized void
  removeSubscribeUpdates(UpdatePriorityQueue upq, SubscriptionSet ss){
    upqNotifier.remove(ss, upq);
  }
  
  //-------------------------------------------------------------------------
  // check if my lpVV for all interest sets at or below subscribeSet are 
  // at least minLpVV
  //-------------------------------------------------------------------------
  public synchronized boolean
  checkMinLPVV(SubscriptionSet ss, VV cpMinLPVV){
    if(ss.isEmpty()){
      return true;
    }
    return(isStatus.getMinLpVV(ss).includes(cpMinLPVV));
  }
  
  //-------------------------------------------------------------------------
  // Split off a PreciseSet from a given pathName matching Node
  //-------------------------------------------------------------------------
  public synchronized void
  splitISStatus(String pathName, String newName)
  throws NoMatchPreciseSetException{
    isStatus.split(pathName, newName);
  }

  //-------------------------------------------------------------------------
  // join all this.children, remove this.children, update this.other.lpvv
  //-------------------------------------------------------------------------
  public synchronized boolean
  joinISStatus(String pathName){
    return isStatus.join(pathName);
  }
      
  //----------------------------------------------------------------------
  // isAllUPQEmpty() -- 
  //----------------------------------------------------------------------
  public synchronized boolean isAllUPQEmpty(){
    return upqNotifier.isAllUPQEmpty();
  }
    
  //----------------------------------------------------------------------
  // waitAllUPQClear() -- 
  //----------------------------------------------------------------------
  public synchronized void waitAllUPQClear(){
    assert(false);
    while(!isAllUPQEmpty()){
      try{
        wait();
      } catch (InterruptedException ie){
        Env.sprinterrln("Sync Body wait is interrupted !");
      }
    }
  }

//----------------------------------------------------------------------
// notifyEmptyAttempt() -- 
//----------------------------------------------------------------------
  public synchronized void notifyEmptyAttempt(){
    if(isAllUPQEmpty()){
      notifyAll();
    }
  }



//MDD: Current plan is get rid of above wait and notify functions


  private static boolean warnedGISI = false;

  //----------------------------------------------------------------------
  // Return true if a bodyMsg with the given timestamp can be applied
  // Amol note: To be tested!
  //----------------------------------------------------------------------
  public synchronized boolean
  canApplyBody(AcceptStamp as){
    return(this.cvv.includes(as));
  }

  //----------------------------------------------------------------------
  // liesInPreciseIS() --
  // Returns true if objId lies in precise interest sets only
  //----------------------------------------------------------------------
  public synchronized boolean 
  liesInPreciseIS(ObjId objId){
   
    String path = null;
   
    path = objId.getPath();
    AcceptVV objLpVV = isStatus.getLpVV(path);
    assert (this.cvv.includes(objLpVV));
    if( dbgPrecise ){
      Env.dprintln(dbgPrecise, "DataStore:: liesInPrecise for "+objId+ " called" +
		 "cvv=" + this.cvv + " objLpVV=" + objLpVV);  
    }
    return objLpVV.includes(this.cvv);
  }

  /*
    NOTE: Amol: Antiquated method
    //----------------------------------------------------------------------
    // registerIS() -- Registers an interest set into ISStatus
    //----------------------------------------------------------------------
    public synchronized boolean 
    registerIS(InterestSet is) {
    InterestSetRecord isr = isStatus.getISR(is);	
    boolean newinsert = false;
  
    if(isr == null) {
    isStatus.putISR(is, new InterestSetRecord(is));
    newinsert = true;
    }
    return newinsert;
    }
  */

  //----------------------------------------------------------------------
  // getLPVV() -- Extracts  lpVV from ISStatus
  //----------------------------------------------------------------------
  public synchronized AcceptVV 
  getLpVV(SubscriptionSet ss) {
    return isStatus.getMinLpVV(ss);
  }

  //----------------------------------------------------------------------
  // getInterestedMinLPVV() -- Extracts interested lpVV from ISStatus
  //----------------------------------------------------------------------
  public synchronized VV 
  getInterestedMinLpVV() {  
    return isStatus.getInterestedMinLpVV();
  }

  /*
    NOTE: Amol: Antiquated method
    //----------------------------------------------------------------------
    // getCurrentVV() -- Extracts  cVV from ISStatus
    //----------------------------------------------------------------------
    public synchronized VV 
    getCurrentVV(InterestSet is) {
    return isStatus.getCurrentVV(is);	
    }
  */

  //----------------------------------------------------------------------
  // getMaxCurrentVV() -- Returns maxCurrentVV which is max of all 
  // interestSet's currentVV
  //----------------------------------------------------------------------
  public synchronized AcceptVV 
  getMaxCurrentVV() {
    return this.cvv.cloneAcceptVV();	
  }


  //----------------------------------------------------------------------
  // same as applyInval except that this one also update ISStatus lpvv for
  // all interestsets in it  -- for local write
  //----------------------------------------------------------------------
  public synchronized void 
  applyWrite(GeneralInv gi) 
    throws IOException, CausalOrderException{
    if( dbgWrite ){
      Env.dprintln(dbgWrite, "write .." + gi.getStartVV());
    }
    this.applyInval(gi);
    //update every object's lpvv for write
    this.removeConnectionFromISStatus(SubscriptionSet.makeSubscriptionSet(":/*"),
				      null,
				      gi.getEndVV());
    if( dbgWrite ){
      Env.dprintln(dbgWrite, "write done" + gi.getStartVV());
    }
  }
  
  //----------------------------------------------------------------------
  // replace applyOverlappingAtStartTime() -- update CVV, 
  // If the inval is precise or a delete,  update local state.
  // 
  //----------------------------------------------------------------------
  public synchronized void 
  applyInval(GeneralInv gi) 
    throws IOException, CausalOrderException{
    MultiObjPreciseInv mopi = null;
    ObjInvalTarget oit = null;
    
    this.cvv.advanceTimestamps(gi.getEndVV());
    //
    // update the per-object state
    //
    if(!gi.isPrecise()){
      return;
    }


    //
    // Don't update per object state if 
    // interest set is imprecise. Note that in older versions of code,
    // we updated the local store even if the enclosing interest set
    // is imprecise. Doing so did no harm since the isstatus was imprecise
    // and so local store was not visible anyhow. Our hope was that it might 
    // do some good by storing bodies of bound invalidations so that 
    // later when we see the same inval to make us precise, we already
    // have the data and don't need to refetch it. The reason we make this
    // change is to allow the lower-level code to detect conflicts. See mikes
    // notes in 2004/10 or 2004/9 (sorry, don't recall the exact date.)
    //
    //  if(!isPrecise){
    //    if(!warnedDiscardBody){
    //      Env.performanceWarning("DataStore throws away a body applied to imprecise."
    //                             + " Optimization is to enqueue this message until"
    //                             + " interest set is precise.");
    //      warnedDiscardBody = true;
    //    }
    //    return;
    //  }
    if(!warnedDiscardBody){
      Env.tbd("DataStore currently applies inval to imprecise."
              + " In order to detect conflicts, need to (a) discard "
              + " unbound invals applied to imprecise interest set "
              + " and (b) enqueue bound invals for later application "
              + " (when IS becomes precise.)");
      warnedDiscardBody = true;
    }

    if(gi instanceof CommitInv){
      if(this.state.applyCommitInv((CommitInv) gi)){
        //notifyAll(); //ideally here we should notify the read blocking on uncommitted obj
        // if the state.applyCojmmitInv is true ==> something becomes committed. 
      }
    }else if(gi instanceof MultiObjPreciseInv){
      this.applyMOPIAtStartTime((MultiObjPreciseInv)gi);
    }else if(gi.isDelete()){
      DeleteInv di = (DeleteInv)gi;
      this.state.delete(di.getObjId(), di.getAcceptStamp());
      this.notifyAll();
      return;
    }else{
      assert(gi.isPrecise());
      PreciseInv pi = (PreciseInv)gi;
      //
      // Regular write
      //
      long written = state.applyInval(pi);

      if(written != RandomAccessState.NO_NEW_VALID_BYTES){
        //
        // We have a new write that subscribers or readers might care about.
        //
        assert(pi.isBound());
        upqNotifier.notifyUpq((BoundInval)pi);
      
        //
        // Bytes of <obj> just became valid. Wake anyone waiting for valid read.
        //
        if(dbgNotify){
          System.err.println("notify for " + gi);
        }
        //notifyAll();//notify for blockInvalid -- not necessary here as it is notified
	//at the end of this method anyway
      }
    }

    if(!warnedInefficientInval){
      Env.tbd("DataStore currently does a notifyAll() for " +
              "every invalidate message it receives so that it " +
              "resumes every thread that is blocked trying to " +
              "apply an object body");
      warnedInefficientInval = true;
    }
    // TBD: Inefficient! Only call this method if there is an object
    // body that we are trying to apply.
    long oldMin = realVV.getMinTimeStamp();
    realVV.advanceTimestamps(gi.getRTVV());
    /* not necessary notify here as it is notified anyway at last.
       tbd: make monitor unique. as commented above, it's not efficient.
       if (realVV.getMinTimeStamp()>oldMin){
       notifyAll();//-- notify read wait for Temporal Error
       }
       //blocks wait for this notify:
       // 1. read blocks invalid,
       // 2. read blocks Temporal Error,
       // 3. read blocks because of embargoed write
       */
    // Note: We call notifyAll() for all MultiObj writes
    //   (even if they only delete data)
    this.notifyAll();
    if(dbgNotify){
      System.err.println("notify for " + gi);
    }
  }
  

  //-------------------------------------------------------------------------
  //     add connection pointer to the corresponding place in ISStatus tree
  //     notify imprecise read
  //-------------------------------------------------------------------------  
  public synchronized void addConnectionToISStatus(SubscriptionSet ss,
                                                   ConnectionState ic){
    this.isStatus.addConnection(ss, ic);
    this.notifyImpreciseRead(ss, ic.getPrevVV());
  }

  //-------------------------------------------------------------------------
  //  remove connection pointer from the corresponding place in ISStatus tree
  //-------------------------------------------------------------------------  
  public synchronized void removeConnectionFromISStatus(SubscriptionSet ss,
                                                        ConnectionState ic,
                                                        VV newLpvv){
    if(isStatus != null){
      //FIXME: ideally isStatus should never be null when this method is called
      //       but due to un clean shutdown,
      //       the datastore.close() might be called before IncommingConnection
      //       cleanup called. Which make it a case that this method is called,
      //       and isStatus has been throw away, i.e. null.
      this.isStatus.removeConnection(ss, ic, newLpvv);
      this.notifyImpreciseRead(ss, newLpvv);
    }

  }

  //---------------------------------------------------------------------
  // part of applyCheckpoint -- put updateISStatusLpVV in a smaller lock
  //
  //   side effects: Wake any read waiting for IS to become precise
  //                 or waiting for an object to become valid
  //
  //---------------------------------------------------------------------
  private synchronized void updateISStatusLpVV(Vector newLpVVs){

    this.isStatus.updateLpVV(newLpVVs);
     Env.tbd("should track the RAS update for cp exchange to see if"
            + " there's any data become valid from invalid"
            + " so that it can notify read waiting for data to be valid");

    Env.tbd("keep track of the event: there's any component's ISStatus"
            + " becomes from imprecise -> precise due to CP exchange, "
            + "then decides whether to notifyAll() or not. "
            + " similarly, should track the RAS update for cp exchange"
            + " so that it can notify read waiting for data to be valide");
    this.notifyAll(); // Wake any read waiting for IS to become precise or
                      // Per-Range-State become valid after updating a checkpoint
  }  




  //-------------------------------------------------------------------------
  // Send Checkpoint to the stream if prevVV = cvv
  // return true if prevVV=cvv and send Cp successfully completed
  //        false otherwise
  //
  // stream format: CPStartMsg = [ subscriptionset, startvv, myCvv, lpvvs]
  //                RAS_per_obj_rec = [Per_obj_state_rec, Per_obj_range_rec*]*
  //-------------------------------------------------------------------------

  /* FIXME
     I don't know the best way to fix it. I thought the following would
     work, but now I don't think so.

     Change (and comment clearly) DataStore::sendCheckpoint so it is no longer synchronized;

     public NOT SYNCHRONIZED  boolean sendCheckpoint(...) throws IOException{

      cvvCopy, startVVCopy, lpVVRecs = this.getSendCPStartState(...); 
      // Call synchronized helper function to look at DataStore state

     if(!prevVV.includes(this.cvv){return false;}

     tos.writeObject(new CPStartMsg(ss, startVV));
     tos.writeObject(this.cvv);
     tos.writeObject(lpvvRecs);
     state.shipRAS(tos, ss, startVV, withBody)
     return true;
   }  
  

The problem:

  Although berekelyDB lets us have a cursor outside of a transaction to
  iterate through the database, new updates could appear in the
  checkpoint while we iterate, which could cause us to send something
  that is "too new" -- that is we could send someone something that is
  newer than cVV

  Some rough ideas

  (1) Abandon our invariant that <everything in DataStore>.accept <=
      cVV. Send the "too new" invalidation (but not body) 

      Argument: safety (consistency) is maintained since the receiver
      cannot read any "too new" data

      Argument against: UGH! We really use this invariant in a lot of
      places and abandoning it seems like a route to long-term pain

      Conclude: I really don't want to do this!

  (2) Live with it but reduce the risk/damage by putting a timeout on
      the checkpoint send and aborting the checkpoint if the timeout
      is exceeded.

      Now the receiver of a checkpoint should be careful not to ask
      for "too much" at once.

      A related workaround would be to send the checkpoint to a raw
      file on the local disk and then have a separate thread (outside
      of the lock) read it off of disk and send it across the network

      Argument for: simple

      Argument against: Since we iterate through all objects in RAS
      (regardless of subscription set used for checkpoint) there is no
      guarantee of progress. Once the local datbase grows "too big"

  (3) Advance cVV during the checkpoint...

      Add a "listener" data structure that allows an ongoing
      checkpoint to look at all updates received by DataStore while a
      checkpoint is being assembled; if an update falls within the
      scope of IDs the checkpoint cares about and if the update
      advnces cVV, then tack a new cVV (and lpVV?) into the checkpoint
      stream and then send the new update

      So, basically, a checkpoint sender is doing two things in
      parallel -- iterating across the RAS state and also forwarding
      any new updates.

      Argument against: way more complicated than I like

We need to discuss this. Is there a simple, correct answer?

-mike

*/

  public synchronized boolean sendCheckpoint(TaggedOutputStream tos,
                                             SubscriptionSet ss,
                                             AcceptVV startVV,
                                             boolean withBody,
                                             VV prevVV)
    throws IOException{

    //assert false: "we are runiing secureVersion";
    assert SangminConfig.securityLevel != SangminConfig.COMPLETE: "something wrong: you are runing secure practi, "
        + "this should be overloaded by SecureCore.sendCheckpoint";
    long start = 0, end, totalTime;
    if(measureTime){
      start = System.currentTimeMillis();
    }
    if(!((AcceptVV)prevVV).equalsIgnoreNegatives(this.cvv.cloneAcceptVV())){
      //only send a checkpoint when the prevv is equals
      // to cvv
      if(measureTime||PRINT_CP_INFO){
	Env.dprintln(dbgPerformance||PRINT_CP_INFO, " sendCheckpoint for " + ss.toString() +
		     " attempt failed");
      }
      return false;
    }
    
    

    //
    // send each PtreeNode overlapped by the subscriptionset
    // with any overlapped part of lpvv > startVV
    // with all Children that is overlapped by subscriptionset
    // and with the associated "other".lpvv changed to
    // min ("other".lpvv, children that's not
    // overlapped by the subscriptionset).
    //
    
    Vector lpvvRecsToSend = this.isStatus.getMatchingLpvvs(ss, startVV);

    CPStartMsg cpsm = new CPStartMsg(ss, 
				     startVV, 
				     this.cvv.cloneAcceptVV(), 
				     lpvvRecsToSend);
    tos.writeObject(cpsm);
    if( PRINT_CP_INFO || printWriteObjectForCP){
      System.out.println("DataStore::sendCP: send CPStartMsg: " + cpsm);
    }
    
    if(measureTime){
      end = System.currentTimeMillis();
      totalTime = (end-start);
      LatencyWatcher.put("DS.preSendCP", 
			 totalTime);
      start = System.currentTimeMillis();
    }

    
    
    
    state.shipRAS(tos, ss, startVV, withBody);

    if(measureTime){
      end = System.currentTimeMillis();
      totalTime = (end-start);
      LatencyWatcher.put("RAS.sendCP", 
			 totalTime);
      start = System.currentTimeMillis();
    }
    
    return true;
  }


    public synchronized AcceptVV sendCheckpoint(TaggedOutputStream tos,
					       SubscriptionSet ss,
					       AcceptVV startVV,
					       boolean withBody,
					       SubscriptionSet streamSS,
					       AcceptVV streamCVV)
    throws IOException{
     boolean cpOk = false;
     if(!((AcceptVV)streamCVV).equalsIgnoreNegatives(this.cvv.cloneAcceptVV())
	&& (!streamSS.isEmpty())){
       //gap between streamCVV and cvv
       cpOk = sendCheckpoint(tos, streamSS, streamCVV, withBody, this.cvv.cloneAcceptVV());
       assert cpOk;
     }
     cpOk = sendCheckpoint(tos, ss, startVV, withBody, this.cvv.cloneAcceptVV());
     assert cpOk;
     return this.cvv.cloneAcceptVV();
  }


  //-------------------------------------------------------------------------
  //  apply Checkpoint: receive and apply PTreeNode list to update ISStatus lpvv 
  //                    receive and apply RAS records to update object states, 
  //                    return when receive RAS_SHIP_DONE
  //
  // Note: RAS is updated first, then update ISStatus,
  //       they have to be ordered this way because 
  //       The big gap imprecise invalidate makes it safe for 
  //       any read since the cvv is updated. It is ok that 
  //       the RAS is updated but ISStatus is not, since all 
  //       object will still remain imprecise, even if the RAS record is valid. 
  //       But it is not safe if the ISStatus is updated, but the RAS is not,
  //       Because it's possible that some object is precise because its ISStatus
  //       lpvv has been updated by the CP exchange information, 
  //       but the corresponding records might still be old value before the CP arrives
  //       which happens to be valid. Then the read will mistakely return the old
  //       value while not awaring the existence of newer value corresponding
  //       to the current cvv.
  //  
  // synchronized issue:     
  // applyCheckpoint doesn't have to be synchronized because at this point the 
  // big imprecise invalidate should have been applied and it's safe to 
  // update the per-object-states in any order or only partially as long as we update
  // the ISStatus after updating per-object-states.
  //  
  // NOTE: 
  // This *public* routine is *not synchronized*. It relies on smaller transactions 
  // within RandomAccessState.
  // The reason we don't want to be synchronized here is that reading from network stream
  // and apply them to RAS could take a long
  // time -- we don't want to be holding the lock for so long! (That
  // part really has to be async.) Instead, we buffer the objects and hand the buffer
  // to RAS instead of the network stream.
  //
  // There are two cases when we need to notifyAll() after updating the checkpoint
  // 1. wake up threads waiting for some invalid ranges become valid
  // 2. wake up threads waiting for imprecise interestsets become precise
  // 
  // Since all the locks that those threads are waiting for are global to DataStore,
  // here we notifyall() every thread at the end of the checkpoint update.
  //-------------------------------------------------------------------------
   
  public void applyCheckpoint(TaggedInputStream s, CPStartMsg cpsm)
    throws IOException, ClassNotFoundException, CausalOrderException{
    boolean dbgMe = false;
    
    long start=0, end, totalTime;
    if(measureTime){
      start = System.currentTimeMillis();
    }

    SubscriptionSet ss = cpsm.getSubscriptionSet();
    Vector lpvvRecs = cpsm.getLpvvRecs();

    

    //
    // At this point, 
    // OutgoingConnection.applyCP has applied the gap imprecise invalidate.
    // 
    // Now we are ready to update the checkpoint to the datastore
    // note: it's ok if gapInv apply succeeds while the datastore update fails.
    //       If this is the case, we only leave the isstatus in a very
    //       imprecise state but still causal, i.e. safe.
    //
    
    
    long cpUpdateTxnSize = Config.getCacheSizeBytes(myNodeId)/20000;
    if(cpUpdateTxnSize <= 0){
      cpUpdateTxnSize = 1;
    }
    
    //
    //
    // We should avoid holding a global lock while reading 
    // from the network (better to buffer from the network and then apply the buffer)
    // 
    // solution:here we read all checkpoint related objects from the stream
    // and buffer it at a local linkedList
    // 
    LinkedList objBuff = new LinkedList();
    boolean done = false;
    Object o = null;
    while(!done){
      o = s.readTaggedObject();
      objBuff.add(o);
      if(o instanceof Long){
        Env.remoteAssert(((Long)o).longValue() == RandomAccessState.RAS_SHIP_DONE);
        done = true;
      }
    }

    if(measureTime){
      end = System.currentTimeMillis();
      totalTime = (end-start);
      LatencyWatcher.put("DS.readObj", 
			 totalTime);
      start = System.currentTimeMillis();
    }

    
    boolean rasUpdateStatus = false; 
    while(!rasUpdateStatus){
      rasUpdateStatus = state.updateRAS(objBuff, ss, cpUpdateTxnSize);
      if(dbgPerformance){
	end = System.currentTimeMillis();
	totalTime = (end-start);
	if( dbgPerformance ){
	  Env.dprintln(dbgPerformance, "DataStore::applyCheckpoint call state.updateRAS"
		     + " with txnSize = " + cpUpdateTxnSize + " takes " 
		     + totalTime + " ms");
	}
	start = System.currentTimeMillis();
      }
    }
    
   if(measureTime){
      end = System.currentTimeMillis();
      totalTime = (end-start);
      LatencyWatcher.put("DS.applyObj", 
			 totalTime);
      start = System.currentTimeMillis();
    }
 
    this.updateISStatusLpVV(lpvvRecs);
    if(measureTime){
      end = System.currentTimeMillis();
      totalTime = (end-start);
      LatencyWatcher.put("DS.afterApplyObj", 
			 totalTime);
      start = System.currentTimeMillis();
    }
  }

  //-------------------------------------------------------------------------
  // notify imprecise read thread block on DataStore
  //-------------------------------------------------------------------------  
  public synchronized void notifyImpreciseRead(SubscriptionSet ss, VV prevVV){
    if(!ss.isEmpty()){//some interest set get precise
      if( dbgNotify ){
	Env.dprintln(dbgNotify, "notify for ImpreciseRead for " + ss.toString() +
		   "lpvv=" + prevVV.toString());
      }
      
      this.notifyAll();
    }
  }


  //---------------------------------------------------------------------------
  //---------------------------------------------------------------------------
  // test helper functions
  //---------------------------------------------------------------------------
  //---------------------------------------------------------------------------

  
  //----------------------------------------------------------------------
  // constructor *** for testing only *** -- create DataStore using 
  // data in RAS but with fake isstatus and cVV 
  //----------------------------------------------------------------------
  public DataStore(String storePath, ISStatus isstatus,
                   NodeId myNodeId_){
    
    this.myNodeId = myNodeId_;
 
    try{
      state = new RandomAccessState(storePath, Config.getCacheSizeBytes(myNodeId));
    }
    catch(IOException e){
      e.printStackTrace();
      assert(false);
      return;
    }
    
    //
    // Dummy isStatus, cVV for testing purposes
    //
    this.isStatus = isstatus;
    this.cvv = (CounterVV)CounterVV.makeVVAllNegatives();

    // Notifier is initially empty
    upqNotifier = new UPQNotifier();

    printDataStoreLimitations();
  }

  //---------------------------------------------------------------------------
  // for testing only -- dangerous
  //---------------------------------------------------------------------------
  public void advanceCVV(VV vv){
    this.cvv.advanceTimestamps(vv);
  }

  //---------------------------------------------------------------------------
  // return realVV
  //---------------------------------------------------------------------------
  public AcceptVV getRealVV(){
    return this.realVV.cloneAcceptVV();
  }

  //---------------------------------------------------------------------------
  // for testing only -- dangerous
  //---------------------------------------------------------------------------
  public void setISStatus(ISStatus newISStatus){
    this.isStatus = newISStatus;
  }

  //---------------------------------------------------------------------------
  // selfTestCleanup -- delete all of the files we created
  //---------------------------------------------------------------------------
  public synchronized void
  testDestroyAllStateCleanup(String dbPath, String configPath){
    if(dbPath != null){
      if(state!=null){
	state.close();
	state.destroyAllStateCleanup(dbPath);
	state = null;
      }
    }
    if(configPath != null){
      try{
        File f = new File(configPath);
        f.delete();
      }
      catch(Exception e){
        e.printStackTrace();
        return;
      }
    }
  }
 
}





//---------------------------------------------------------------------------
/* $Log: DataStore.java,v $
/* Revision 1.124  2008/02/12 20:56:17  nalini
/* *** empty log message ***
/*
/* Revision 1.123  2008/01/23 20:35:27  nalini
/* added null random access state
/*
/* Revision 1.122  2007/12/18 17:25:50  nalini
/* minor timeout related bug-fixes
/*
/* Revision 1.121  2007/12/11 04:17:27  nalini
/* minor bug fix
/*
/* Revision 1.120  2007/11/05 23:32:42  zjiandan
/* fix SubscribeBWunit.
/*
/* Revision 1.119  2007/09/10 23:52:22  zjiandan
/* upgrade to newest BerkeleyDB je version.
/*
/* Revision 1.118  2007/07/12 17:02:32  zjiandan
/* *** empty log message ***
/*
/* Revision 1.117  2007/07/11 19:08:07  zjiandan
/* clean IncommingConnection
/*
/* Revision 1.116  2007/06/29 01:01:41  zjiandan
/* *** empty log message ***
/*
/* Revision 1.115  2007/06/25 05:21:28  zjiandan
/* Cleanup OutgoingConnection and add unit tests
/*
/* Revision 1.114  2007/05/31 06:02:01  zjiandan
/* add AllPreciseSetsUnit
/*
*/
//---------------------------------------------------------------------------
