 /** 
 *  Read and parse an access log 
 **/ 
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.PrintStream;

public class AccessLogReader{

 /** 
 *  Constructor 
 **/ 
  public
  AccessLogReader(String fileName) throws IOException{
    InputStream is = null;
    GunzipProcess gunzipProcess = null;

    assert(fileName.endsWith("gz"));
    gunzipProcess = new GunzipProcess(fileName);
    gunzipProcess.startProcess();
    is = gunzipProcess.getInputStream();
    this.reader = new BufferedReader(new InputStreamReader(is));
  }

 /** 
 *  Return the next valid access trace record in the file 
 **/ 
  public AccessLogRecord
  getNextValidRecord() throws IOException{
    boolean done = false;
    String line = null;
    AccessLogRecord accessLogRecord = null;

    do{
      line = this.reader.readLine();
      if(line != null){
        try{
          accessLogRecord = new AccessLogRecord(line);
          done = true;
        }catch(AccessLogRecord.UnparseableException e){
          done = false;
        }
      }else{
        accessLogRecord = null;
        done = true;
      }
    }while(!done);
    return(accessLogRecord);
  }

 /** 
 *  Return the next valid access trace record for a static file 
 **/ 
  public AccessLogRecord
  getNextStaticRecord() throws IOException{
    AccessLogRecord record = null;
    WebLogRequest request = null;

    record = this.getNextValidRecord();
    if(record != null){
      request = record.getRequest();
    }
    while((record != null) && (request.isDynamic())){
      record = this.getNextValidRecord();
      if(record != null){
        request = record.getRequest();
      }
    }
    return(record);
  }

 /** 
 *  Used for testing 
 **/ 
  public static void main(String[] argv){
    WebLogRequest request = null;
    AccessLogReader alReader = null;
    AccessLogRecord alRecord1 = null;
    AccessLogRecord alRecord2 = null;
    AccessLogRecord alRecord3 = null;

    try{
      System.out.println("Testing AccessLogReader...");
      AccessLogReader.generateGzippedFile("AccessLogReader.test");
      alReader = new AccessLogReader("AccessLogReader.test.gz");
      alRecord1 = alReader.getNextStaticRecord();
      assert(alRecord1 != null);
      request = alRecord1.getRequest();
      assert(request.getObjectID().equals("/eng/countries/GBR/home.html"));

      alRecord2 = alReader.getNextStaticRecord();
      assert(alRecord2 != null);
      request = alRecord2.getRequest();
      assert(request.getFixedObjectID().equals("/rc/common/images/uc.GIF"));
      assert(!request.isDynamic());

      alRecord3 = alReader.getNextStaticRecord();
      assert(alRecord3 == null);
      System.out.println("...Finished");
    }catch(IOException e){
      System.err.println("" + e);
    }
  }

 /** 
 *  Generate a file with random entries in a format readable by this class 
 **/ 
  private static void
  generateGzippedFile(String fileName) throws IOException{
    FileOutputStream fos = null;
    PrintStream ps = null;
    Process gzipProcess = null;

    try{
      fos = new FileOutputStream(fileName);
      ps = new PrintStream(fos);
      //AccessLogReader.populateFileLong(ps);
      AccessLogReader.populateFileShort(ps);
      ps.close();
      ps = null;
      fos.close();
      fos = null;
      gzipProcess = Runtime.getRuntime().exec("c:\\cygwin\\bin\\gzip.exe " +
                                              fileName);
      try{
        gzipProcess.waitFor();
      }catch(InterruptedException e){
        System.err.println("" + e);
        assert(false);
      }
    }finally{
      if(ps != null){
        ps.close();
      }
      if(fos != null){
        try{
          fos.close();
        }catch(IOException e){
          // Do nothing;
        }
      }
    }
  }

 /** 
 *  Populate this PrintStream with a lot of data 
 **/ 
  private static void
  populateFileLong(PrintStream ps) throws IOException{
    ps.println("172.30.227.213 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /eng/countries/GBR/home.html HTTP/1.0\" 200 " +
               "120631 \"-\" " +
               "\"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)\"" +
               " \"-\"");
    ps.println("165.228.133.10 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /rc/common/images/uc.GIF?pag=/eng/home.html&num=68 " +
               "HTTP/1.0\" 302 375 " +
               "\"http://www.olympics.com/eng/home.html\" " +
               "\"Mozilla/4.0 (compatible; MSIE 4.01; Windows NT)\" "+
               "\"id=a0020080000013B3C09682086470000061364.00CB264EC9\"");
    ps.println("172.30.227.213 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /common/images/uc.GIF?pag=/eng/sports/home.html" +
               "&num=56 HTTP/1.0\" 200 41 " +
               "\"http://www.olympics.com/eng/sports/home.html\" " +
               "\"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT)\" \"-\"");
    ps.println("172.30.227.213 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /cgi-bin/date? HTTP/1.0\" 200 25 \"-\" " +
               "\"Mozilla/4.0 (compatible; MSIE 5.0; Win32)\" \"-\"");
    ps.println("24.192.0.130 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /rc/common/images/uc.GIF?" +
               "pag=/eng/sports/BV/schedule/Sport_BV_0924.html&num=70 " +
               "HTTP/1.1\" 302 403 " +
               "\"http://www.olympics.com/eng/sports/BV/schedule/" +
               "Sport_BV_0924.html\" \"Mozilla/4.0 (compatible; " +
               "MSIE 5.0; Windows 98; DigExt)\" " +
               "\"id=a002005000001A8A909693113720000033458.0018C00082\"");
    ps.println("172.30.227.215 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /common/images/uc.GIF?pag=/" +
               "eng/sports/RO/ROM083/home.html&num=40 HTTP/1.0\" " +
               "200 41 " +
               "\"http://www.olympics.com/eng/sports/RO/ROM083/home.html\"" +
               " \"Mozilla/4.0 (compatible; MSIE 5.01; Windows NT)\" \"-\"");
    ps.println("172.30.227.211 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /r/eng/sponsors/wotch.html HTTP/1.0\" 302 352 " +
               "\"http://www.olympics.com/eng/home.html\" \"Mozilla/4.0 " +
               "(compatible; MSIE 4.01; Windows NT; CLS4)\" \"-\"");
    ps.println("172.30.227.215 - - [20/Sep/2000:00:00:02 +0000] " +
               "\"GET /cgi-bin/date? HTTP/1.0\" 200 25 \"-\" " +
               "\"Mozilla/4.7 [en] (WinNT; I)\" \"-\"");
    ps.println("203.54.214.219 - - [20/Sep/2000:00:00:02 +0000] " +
               "\"GET /rc/common/images/uc.GIF?pag=/eng/athletes/" +
               "home.html&num=33 HTTP/1.1\" 302 384 " +
               "\"http://www.olympics.com/eng/athletes/home.html\" " +
               "\"Mozilla/4.0 (compatible; MSIE 4.5; Mac_PowerPC)\" " +
               "\"id=a002008000001B9BA09683730360000869925.00CB36DF77\"");
    ps.println("172.30.227.215 - - [20/Sep/2000:00:00:02 +0000] " +
               "\"GET /common/images/uc.GIF?pag=/eng/sports/TE/TEM001/" +
               "home.html&num=46 HTTP/1.0\" 200 41 " +
               "\"http://www.olympics.com/eng/sports/TE/TEM001/home.html\"" +
               " \"Mozilla/4.0 (compatible; MSIE 4.01; Windows 95)\" \"-\"");
  }

 /** 
 *  Populate this PrintStream with some data 
 *  (1) Unparseable query 
 *  (2) Regular query 
 *  (3) Query that appears dynamic but isn't 
 *  (4) Query that is dynamic 
 **/ 
  private static void
  populateFileShort(PrintStream ps) throws IOException{
    ps.println("172.30.227.213 - garbage " +
               "\"GET /common/images/uc.GIF?pag=/eng/sports/home.html" +
               "&num=56 HTTP/1.0\" 4000 41 " +
               "\"http://www.olympics.com/eng/sports/home.html\" " +
               "\"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT)\" \"-\"");
    ps.println("172.30.227.213 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /eng/countries/GBR/home.html HTTP/1.0\" 200 " +
               "120631 \"-\" " +
               "\"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)\"" +
               " \"-\"");
    ps.println("165.228.133.10 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /rc/common/images/uc.GIF?pag=/eng/home.html&num=68 " +
               "HTTP/1.0\" 302 375 " +
               "\"http://www.olympics.com/eng/home.html\" " +
               "\"Mozilla/4.0 (compatible; MSIE 4.01; Windows NT)\" "+
               "\"id=a0020080000013B3C09682086470000061364.00CB264EC9\"");
    ps.println("172.30.227.213 - - [20/Sep/2000:00:00:00 +0000] " +
               "\"GET /cgi-bin/date? HTTP/1.0\" 200 25 \"-\" " +
               "\"Mozilla/4.0 (compatible; MSIE 5.0; Win32)\" \"-\"");
  }

 /** 
 *  Private data 
 **/ 
  private BufferedReader reader;
}