// ===================================================================
// Copyright (c) 1997, All rights reserved, by Micheal S. Hewett
//
// This software is free for educational and non-profit use.
// Any for-profit use must be governed by a license available
// from the author, who can be contacted via email at 
// "hewett@cs.stanford.edu"
//
// ===================================================================
//
//  LispParser.java  - A parser that reads LISP text and creates
//                     LISP structures to represent them.
//
//  24 Jan 1997 (mh)
//  26 Feb 1997 (mh) Added support for mixed-case atoms - |Joe Blow|
//
//  Main functions:
//     read(InputStream);        -- general read from a file or pipe
//     read(String);             -- read from a string
//
// -------------------------------------------------------------------

package lib.dynatype;

import java.io.*;


/**
 * A parser that reads LISP-syntax text from a text
 * stream or string.  It recognizes all standard
 * LISP datatypes, although not structured ones.
 * This function is designed to fulfill the function
 * of the reader in a LISP <tt>read-eval-print</tt> loop.
 * 
 * Once the LISP parser is instantiated, the
 * <tt>parse()</tt> function can be used to read from
 * a string or stream.
 *
 * @see lib.dynatype.LispValue
 * @author Micheal S. Hewett
 * @version 1.0
 */
public class LispParser
{
  static final char BACKQUOTE          = '`';
  static final char BACKSLASH          = '\\';
  static final char COMMA              = ',';
  static final char DECIMAL            = '.';
  static final char DOUBLEQUOTE        = '"';
  static final char LEFTANGLEBRACKET   = '<';
  static final char ORBAR              = '|';
  static final char POUND              = '#';
  static final char PERIOD             = '.';
  static final char SEMICOLON          = ';';
  static final char RIGHTANGLEBRACKET  = '>';
  static final char SINGLEQUOTE        = '\'';

  // Parser states
  static final int READING_NOTHING           = 0;
  static final int READING_SYMBOL            = 1;
  static final int READING_MIXED_CASE_SYMBOL = 2;
  static final int READING_CHARACTER         = 3;
  static final int READING_STRING            = 4;
  static final int READING_BACKQUOTED_LIST   = 5;


  PushbackInputStream  inputStream;

  public LispParser(InputStream inStream)
  {
    inputStream   = new PushbackInputStream(inStream);
  }

  public LispParser(String inString)
  {
    this(new StringBufferInputStream(inString));
  }
	

  public LispValue parse() throws EOFException
  {
    StringBuffer   token = new StringBuffer(80);    // Should cover most tokens.
    char           ch;
    int            intCh = 0;
    int            parseState  = READING_NOTHING;

    while (true)
    {
      try { intCh = inputStream.read(); }
      catch (IOException e)
	{ break; }

      if (intCh < 0)  { throw new EOFException("Premature end of LISP input."); }
      else
	ch = (char) intCh;

      // System.err.println("parse: read character: " + ch + "(" + intCh + ")");

      // Encounter a comment?: flush the remaining characters on the line.
      if (isSemi(ch)
	  && (parseState != READING_STRING)
	  && (parseState != READING_CHARACTER))
      {
	do 
	{
	  try { intCh = inputStream.read(); }
	  catch (IOException e)
	    { break; }
	  if (intCh < 0)  { throw new EOFException("Premature end of LISP input."); }
	  else
	    ch = (char) intCh;

	  // Apparently read() doesn't do translation.
	  if (ch == '\r')  ch = '\n';
	}
	while (ch != '\n');
	continue;
      }
 	  
      if (parseState != READING_NOTHING) {      /* If reading anything... */
	switch (parseState) {
	  case READING_SYMBOL:
	    if (isTerminator(ch))       /* Terminate reading token. */
	    {
	      try { inputStream.unread(ch); }
	      catch (IOException e)
		{ System.out.println("\n *** I/O error while unreading character '" + ch + "'."); }
	      parseState = READING_NOTHING;
	      return(tokenToLispValue(token.toString().toUpperCase()));
	    }
	    else
	      token.append(ch);
	    break;

	  case READING_MIXED_CASE_SYMBOL:
	    if (isOrBar(ch))       /* Terminate reading token. */
	    {
	      String s = token.toString();
	      
	      token.append(ch);
	      parseState = READING_NOTHING;
	      // Strip off the beginning and ending Or Bars.
	      return(tokenToLispValue(s.substring(1, s.length())));
	    }
	    else
	      token.append(ch);
	    break;
       
	  case READING_STRING:
	    if (ch == BACKSLASH)  // Next char is always in the string
	    {
	      try { intCh = inputStream.read(); }
	      catch (IOException e)
		{ break; }
	      if (intCh < 0)  { throw new EOFException("Premature end of LISP input."); }
	      else
		ch = (char) intCh;


	      token.append(ch);
	      break;
	    }
	      
	    if (ch == DOUBLEQUOTE)
	    {
	      token.append(ch);
	      parseState = READING_NOTHING;
	      return(tokenToLispValue(token.toString()));
	    }
	    else
	      token.append(ch);
	    break;
	    } /* END OF SWITCH */
	} /* END OF IF (parseState) */

      // We are not in the middle of reading something recognizable, so 
      // we try to start something recognizable.
	else
	  if (!isSpace(ch))         /* Start reading a token */
	  { 
	    if (isLparen(ch)) 
	    {
	      return(read_list_token(inputStream));
	    } 
	    else if (isRparen(ch))
	    {
	      System.err.println("WARNING: Too many right parentheses.  NIL assumed.");
	      return(LispValue.NIL);
	    } 
	    else if (isQuote(ch))
	    {
	      return(read_quoted_token(inputStream));
	    }
	    else if (isDoubleQuote(ch))
	    {
	      token.append(ch);
	      parseState = READING_STRING;
	    }
	    else if (isPound(ch))
	    {
	      return(read_structure_token(inputStream));
	    }
	    else if (isBackQuote(ch))
	    {
	      return(read_backquoted_list_token(inputStream));
	    }
	    else if (isOrBar(ch))
	    {
	      token.append(ch);
	      parseState = READING_MIXED_CASE_SYMBOL;
	    }
	    else
	    {
	      parseState = READING_SYMBOL;
	      try { inputStream.unread(ch); }
	      catch (IOException e)
		{ System.out.println("\n *** I/O error while unreading character '" + ch + "'."); }
	    }
	  }  /* if (!isSpace(ch)) */
    
      } /* main WHILE loop */

    /* WE ONLY EXECUTE THIS CODE IF WE HIT end of input string. */
    if (token.length() > 0)
      return(tokenToLispValue(token.toString()));
    else
      return(LispValue.NIL);
  }


  LispValue read_list_token(PushbackInputStream stream) throws EOFException
  {
    boolean firstTime  = true;
    boolean haveDot    = false;
    char    ch;
    int     intCh = 0;
    LispValue      newToken;
    LispValue      newList, newCell;
  
    newList =  LispValue.NIL;
    newCell =  LispValue.NIL;

    while (true)
    {
      try { intCh = inputStream.read(); }
      catch (IOException e)
	{ break; }
      if (intCh < 0)  { throw new EOFException("Premature end of LISP input."); }
      else
	ch = (char) intCh;

      if (!isSpace(ch))
      {
	if (isRparen(ch))
	  return(newList);
		
	if (isPeriod(ch))
	{
	  if (haveDot)
	  {
	    System.out.println("WARNING: Illegal dotted syntax.  NIL assumed.");
	    return LispValue.NIL;
	  }
	  haveDot = true;
	  continue;             // Skip to end of while loop.
	};

	// otherwise process a normal token.
	  
	try { inputStream.unread(ch); }
	catch (IOException e)
	  { System.out.println("\n *** I/O error while unreading character '" + ch + "'."); }

	newToken = parse();
	if (firstTime)
	{
	  newList   = LispValue.VF.makeCons(LispValue.NIL, LispValue.NIL);
	  newList.rplaca(newToken);
	  firstTime = false;
	}
	else
	{
	  if (haveDot)
	    newList.last().rplacd(newToken);
	  else
	  {
	    newCell  = LispValue.VF.makeCons(LispValue.NIL, LispValue.NIL);  /* (NIL . NIL) */
	    newCell.rplaca(newToken);
	    newList.last().rplacd(newCell);
	  }
	} 
      }  // if (!isSpace())
    }    // while ()...

    return LispValue.NIL;     // Shouldn't get here.
  }

  LispValue read_quoted_token(PushbackInputStream stream) throws EOFException
  /*
   * This routine is called by parse when it encounters
   * a quote mark.  It calls parse recursively.
   */
  {
    LispValue newCell          = LispValue.NIL;
    LispValue newQuotedList    = LispValue.NIL;

    /* Construct the quoted list (QUOTE . (NIL . NIL)) then
     * read a token and replace the first NIL by the token read.
     */

    newQuotedList = LispValue.VF.makeCons(LispValue.QUOTE,
					  LispValue.VF.makeCons(LispValue.NIL, LispValue.NIL));
    newCell = parse();
    newQuotedList.cdr().rplaca(newCell);
    return(newQuotedList);
  }


  LispValue read_structure_token(PushbackInputStream stream) throws EOFException
  /*
   * This routine is called by parse when it encounters
   * a pound (#) mark.  It calls parse
   */
  {
    char   ch = '0';
    int    intCh = 0;

    try { intCh = inputStream.read(); }
    catch (IOException e)
      { System.out.println("\n *** I/O error while reading '#' token."); }
    if (intCh < 0)  { throw new EOFException("Premature end of LISP input."); }
    else
      ch = (char) intCh;

    if (isBackSlash(ch))
    {
      try { intCh = inputStream.read(); }
      catch (IOException e)
	{ System.out.println("\n *** I/O error while reading character token."); }
      if (intCh < 0)  { throw new EOFException("Premature end of LISP input."); }
      else
	ch = (char) intCh;

      return new LispCharacter(ch);
    }
    else if (isLeftAngleBracket(ch))
    {
      System.out.println("\n *** parser can't read structures yet.");
      while (!isRightAngleBracket(ch))
	try { intCh = inputStream.read(); }
      catch (IOException e)
	{ System.out.println("\n *** I/O error while reading a structure."); };
      if (intCh < 0)  { throw new EOFException("Premature end of LISP input."); }
      else
	ch = (char) intCh;

      return LispValue.NIL;
    }
    else
    {
      System.out.println("\n *** unknown '#' construct.");
      return LispValue.NIL;
    }
  }

  LispValue read_backquoted_list_token(PushbackInputStream stream)
  {
    System.out.println("\n *** Parser can't read backquoted lists yet.");
    return LispValue.NIL;
  }

  LispValue tokenToLispValue(String token)
  {
    LispValue newCell = null;

    if (T_token_p(token))
      newCell = LispValue.T;
    else if (NIL_token_p(token))
      newCell = LispValue.NIL;
    else if (INTEGER_token_p(token))
      newCell = LispValue.VF.makeInteger(new Long(token));
    else if (REAL_token_p(token))
      newCell = LispValue.VF.makeReal(new Double(token));
    else if (STRING_token_p(token))
    { /* remove the first and last double quotes. */
      try
	{ newCell = LispValue.VF.makeString(token.substring(1, token.length() - 1)); }
      catch (StringIndexOutOfBoundsException e)
	{ System.err.println("Hey, got a bad string index in 'tokenToLispValue'!"); };
      
    }
    else if (SYMBOL_token_p(token)) {
      newCell = LispValue.intern(token);
    }
    else {
      System.err.println("ERROR: Unrecognized input: \"" + token + "\"");
      newCell = LispValue.NIL;
    };

    if (newCell == null) {
      System.err.println("MEMORY_ERROR in  \"tokenToLispValue\" " + "for token \""
                         + token + "\", returning NIL.");
      newCell = LispValue.NIL;
    };

    return(newCell);
  }

  // ----  Utility functions  ----------------------------------

  boolean isLparen(char x)             { return (x == '(');          };
  boolean isRparen(char x)             { return (x == ')');          };
  boolean isBackQuote(char x)          { return (x == BACKQUOTE);    };
  boolean isBackSlash(char x)          { return (x == BACKSLASH);    };
  boolean isComma(char x)              { return (x == COMMA);        };
  boolean isDoubleQuote(char x)        { return (x == DOUBLEQUOTE);  };
  boolean isOrBar(char x)              { return (x == ORBAR);        };
  boolean isPound(char x)              { return (x == POUND);        };
  boolean isPeriod(char x)             { return (x == PERIOD);       };
  boolean isQuote(char x)              { return (x == SINGLEQUOTE);  };
  boolean isSemi(char x)               { return (x == SEMICOLON);    };
  boolean isLeftAngleBracket(char x)   { return (x == LEFTANGLEBRACKET);   };
  boolean isRightAngleBracket(char x)  { return (x == RIGHTANGLEBRACKET);  };

  boolean isSpace(char x)
  { return
      ((x == ' ')          // space
       || (x == '\n')      // newline
       || (x == '\r')      // carriage return
       || (x == '\t')      // tab
       || (x == '\f')      // form feed
       || (x == '\b'));    // backspace
      }

  boolean isTerminator(char x)
  { return
      (isSpace(x)          // white space
       || isLparen(x) || isRparen(x)
       || isQuote(x)  || isSemi(x)
       || isDoubleQuote(x)
       || isComma(x)); }


  /** The equivalent of the C function 'strspn'.
   * Given a string and another string representing a set of characters,
   * this function scans the string and accepts characters that are
   * elements of the given set of characters.  It returns the index 
   * of the first element of the string that is not a member of the
   * set of characters. 
   * For example:
   *    pos = firstCharNotInSet(0, "hello there, how are you?", "ehlort ");
   * returns 11.
   *
   * If the string does not contain any of the characters in the set,
   * str.length() is returned.
   */
  public static int firstCharNotInSet(int startIndex, String str, String charSet)
  {
    int searchIndex = startIndex - 1;  // we add one at the end.
    int length      = str.length();

    //    System.out.print("\nSearching \"" + str + "\" for \"" + charSet + "\" from index " + startIndex);
    try {
      for (int i = startIndex;
	   ((i < length) && (charSet.indexOf(str.charAt(i)) >= 0));
	   ++i)
	searchIndex = i;
    }
    catch (StringIndexOutOfBoundsException e) {
      System.err.println("Hey, got a bad string index in 'firstCharNotInSet'!"); };
      
    //    System.out.println("...returning " + searchIndex);
    return searchIndex + 1;
  };
    

  boolean REAL_token_p(String str)
  /*
   * Does NOT recognize an isolated '+' or '-' as a real number.
   */
  {
    String DECIMALchars  = ".";
    String INTchars      = "0123456789";

    int   decimalPos;
    int   length = str.length();
    int   index  = 0;

    if ((str.charAt(index) == '-') || (str.charAt(index) == '+'))
      index++;

    if (index == length)   // Don't accept a single '-' or '+'
      return false;

    decimalPos = str.indexOf('.');     /* Check for decimal.  If none, not a real number. */
    if (decimalPos < 0)
      return(false);
    
    if (firstCharNotInSet(index, str, INTchars) != decimalPos)
      return(false);

    if (decimalPos == str.length() - 1)
      return(true);         /* Decimal point followed by no digits is legal in LISP. */

    /* Check decimal digits. */
    index = decimalPos + 1;
    return(firstCharNotInSet(index, str, INTchars) == length);
  };


  boolean INTEGER_token_p(String str)
  /*
   * Does NOT recognize an isolated '+' or '-' as an integer.
   */
  {
    String INTchars = "0123456789";

    int   length = str.length();
    int   index  = 0;

    try {
      if ((str.charAt(index) == '-') || (str.charAt(index) == '+'))
	index++;
    }
    catch (StringIndexOutOfBoundsException e) {
      System.err.println("Hey, got a bad string index in 'INTEGER_token_p'!"); };

    if (index == length)   // Don't accept a single '-' or '+'
      return false;

    return(firstCharNotInSet(index, str, INTchars) == length);
  }

  boolean NIL_token_p(String str) { return(str.equalsIgnoreCase("NIL")); };

  boolean STRING_token_p(String str)
  {
    int       length = str.length();
    boolean   value;

    value = false;

    try {
      value = ((length >= 2) 
	       && (str.charAt(0)        == DOUBLEQUOTE) 
	       && (str.charAt(length-1) == DOUBLEQUOTE));
    }
    catch (StringIndexOutOfBoundsException e) {
      System.err.println("Hey, got a bad string index in 'NIL_token_p'!"); };

    return value;
  }


  boolean SYMBOL_token_p(String str) { return(str.length() >= 1); };


  boolean T_token_p(String str) { return(str.equalsIgnoreCase("T")); };
		


  // ----  Test functions  ----------------------------------

  public void    test_parser(String s)
  {
    System.out.print("The string \"" + s + "\" ");

    if (T_token_p(s))
      System.out.println("is T.");
    else if (NIL_token_p(s))
      System.out.println("is NIL.");
    else if (INTEGER_token_p(s))
      System.out.println("is an integer.");
    else if (REAL_token_p(s))
      System.out.println("is a double.");
    else if (STRING_token_p(s))
      System.out.println("is a string.");
    else if (SYMBOL_token_p(s))
      System.out.println("is a symbol.");
    else
      System.out.println("is not recognized.");
  }
       

  public void    test_parser_loop() throws EOFException
  {
    LispValue input, temp, exit;

    exit = LispValue.intern("EXIT");
    temp = LispValue.intern("*TEMP*");

    System.out.println("Run (EXIT) to stop.");
    do 
      {
	 System.out.print("\nLISP>");  System.out.flush();  // Should print top-level prompt
	//    input = parse(stdin);
	//    setq(temp, symbol_value(STAR));
    
	//    print(setq(STAR, eval(input)));
	//    setq(STARSTARSTAR, symbol_value(STARSTAR));
	//    setq(STARSTAR, symbol_value(temp));
	temp = parse();
	System.out.println(); temp.prin1();
	// temp = Jill.COMPILER.compile(temp, LispValue.NIL);  // No globals for now
	// System.out.println(); temp.prin1();
	// temp = Jill.MACHINE.Execute(temp, LispValue.NIL);
	// System.out.println(); temp.prin1();
      }
    while (temp != exit);
    
    System.out.println();
    System.out.flush();
  }

  public void    simple_parser_test()
  {
    test_parser("1234.56789");
    test_parser("1234.");
    test_parser(".56789");
    test_parser("-1234.56789");
    test_parser("+1234.56789");
    test_parser("-.56789");
    test_parser("1234");
    test_parser("-1234");
    test_parser("+1234");
    test_parser("T");
    test_parser("NIL");
    test_parser("\"This is a string\"");
    test_parser("\"astring\"");
    test_parser("\"\"");
    test_parser("ABCD");
    test_parser("def1234");
    test_parser("123def");
    test_parser("abc_def_ghi");
  }


}

