/* File: stats.c
 *
 * Statistics initialization, collection, and output routines for SPEC
 *
 * Copyright (C) 1994 Risto Miikkulainen
 *
 *  This software can be copied, modified and distributed freely for
 *  educational and research purposes, provided that this notice is included
 *  in the code, and the author is acknowledged in any materials and reports
 *  that result from its use. It may not be used for commercial purposes
 *  without expressed permission from the author.
 *
 * $Id: stats.c,v 1.23 1994/09/02 22:41:57 risto Exp $
 */

#include <stdio.h>
#include <math.h>
#include <X11/Xlib.h>
#include <X11/Intrinsic.h>
#include <X11/StringDefs.h>

#include "defs.h"
#include "globals.c"


/************ statistics constants *************/

#define CONTROLWITHIN 0.3	/* control output correct if within this */
#define WITHINERR 0.15		/* close enough to be counted correct */
#define NSTATMODS (NMODULES + 1)/* parser, segmenter, stack, and control */
#define CONTMOD 3		/* control output (used only for statistics) */


/******************** function prototypes ************************/

/* global functions */
#include "prototypes.h"

/* functions local to this file */
static double reszero __P ((double num, int den));


/******************** static variables ************************/

static int
  nout[NSTATMODS],		/* # of times the output was checked */
  nwithin[NSTATMODS],		/* # of output units within limits */
  ncorrect[NSTATMODS],		/* # of correctly identifyable outputs */
  nparswithin_frags[MAXFRAGS][MAXCASE],	/* parser-within after each fragment */
  nparscorrect_frags[MAXFRAGS][MAXCASE],/* correct words after each fragment */
  ncontcorrect_seq[MAXFRAGS][MAXSEQ];	/* correct control after each word */

static double
  errsum[NSTATMODS],			/* cumulative sum of errors */
  parserrsum_frags[MAXFRAGS][MAXCASE],	/* parser error after each fragment */
  segerrsum_seq[MAXFRAGS][MAXSEQ],	/* segm.  error after each word */
  conterrsum_seq[MAXFRAGS][MAXSEQ],	/* control error after each word */
  stackerrsum_frags[MAXFRAGS];		/* stack error after each fragment */


/*********************  initializations ******************************/

void
init_stats ()
/* zero out the statistics cumulation variables
   (at the beginning of each epoch or snapshot) */
{
  int j, k, modi;

  for (modi = 0; modi < NSTATMODS; modi++)
    if (!parseronly || modi == PARSMOD)
      {
	nout[modi] = nwithin[modi] = ncorrect[modi] = 0;
	errsum[modi] = 0.0;
      }
  
  for (j = 0; j < MAXFRAGS; j++)
    {
      for (k = 0; k < noutputs[PARSMOD]; k++)
	{
	  nparscorrect_frags[j][k] = 0;
	  nparswithin_frags[j][k] = 0;
	  parserrsum_frags[j][k] = 0.0;
	}
      for (k = 0; k < MAXSEQ; k++)
	{
	  ncontcorrect_seq[j][k] = 0;
	  segerrsum_seq[j][k] = 0.0;
	  conterrsum_seq[j][k] = 0.0;
	}
      stackerrsum_frags[j] = 0.0;
    }
}


/*********************  cumulation ******************************/

void
pars_cumulate_stats (senti, fragi)
/* cumulate parser error, within, and correct words */
     int senti,			/* sentence number */
       fragi;			/* fragment number */
{
  int i, j, modi = PARSMOD;
  double current_error = 0.0;	/* error sum */

  /* number of times the output was checked */
  nout[modi]++;

  for (i = 0; i < noutputs[PARSMOD]; i++)
    {
      /* cumulate total error sum */
      current_error = 0.0;
      for (j = 0; j < nwordrep; j++)
	current_error += fabs (tgtrep[modi][i * nwordrep + j] -
			       outrep[modi][i * nwordrep + j]);
      errsum[modi] += current_error;

      if (testing)
	{
	  /* errors through the sentence positions */
	  parserrsum_frags[fragi][i] += current_error;

	  /* "close enough" cumulation */
	  for (j = 0; j < nwordrep; j++)
	    if (fabs (outrep[modi][i * nwordrep + j] -
		      tgtrep[modi][i * nwordrep + j])
		< WITHINERR)
	      {
		nwithin[modi]++;
		nparswithin_frags[fragi][i]++;
	      }

	  /* correct words */
	  if (find_nearest (&outrep[modi][i * nwordrep],
			    words, nwordrep, nwords) ==
	      sents[senti].frag[fragi].tgts[i])
	    {
	      ncorrect[modi]++;
	      nparscorrect_frags[fragi][i]++;
	    }
	}
    }
}


void
seg_cumulate_stats (fragi, seqi)
/* cumulate segmenter errors */
     int fragi,			/* fragment number */
       seqi;			/* word number in the fragment */
{
  int i, modi = SEGMOD;
  double current_error = 0.0;	/* error sum */

  /* number of times the output was checked */
  nout[modi]++;
  /* cumulate total error sum */
  for (i = 0; i < nhidrep[PARSMOD]; i++)
    current_error += fabs (tgtrep[modi][i] - outrep[modi][i]);
  errsum[modi] += current_error;

  /* error through the sentence positions */
  if (testing)
    segerrsum_seq[fragi][seqi] += current_error;
}


void
cont_cumulate_stats (fragi, seqi)
/* cumulate control errors and correct control outputs */
     int fragi,			/* fragment number */
       seqi;			/* word number in the fragment */
{
  int i, modi = CONTMOD;
  double current_error = 0.0;	/* error sum */

  /* number of times the output was checked */
  nout[modi]++;
  /* cumulate total error sum */
  for (i = nhidrep[PARSMOD]; i < nhidrep[PARSMOD] + NCONTROL; i++)
    current_error += fabs (tgtrep[SEGMOD][i] - outrep[SEGMOD][i]);
  errsum[modi] += current_error;

  if (testing)
    {
      /* error through the sentence positions */
      conterrsum_seq[fragi][seqi] += current_error;
      /* correct control outputs */
      for (i = nhidrep[PARSMOD]; i < nhidrep[PARSMOD] + NCONTROL; i++)
	if (fabs (tgtrep[SEGMOD][i] - outrep[SEGMOD][i]) <= CONTROLWITHIN)
	  {
	    ncorrect[modi]++;
	    ncontcorrect_seq[fragi][seqi]++;
	  }
    }
}


void
stack_cumulate_stats (fragi)
/* cumulate stack errors */
     int fragi;			/* fragment number */
{
  int i, modi = STACKMOD;
  double current_error = 0.0;	/* error sum */

  /* number of times the output was checked */
  nout[modi]++;
  /* cumulate total error sum */
  for (i = 0; i < nhidrep[PARSMOD]; i++)
    current_error += fabs (outrep[modi][i] - tgtrep[modi][i]);
  errsum[modi] += current_error;

  if (testing)
    /* error through the sentence positions */
    stackerrsum_frags[fragi] += current_error;
  else
    /* in training, also include the stack hidrep part */
    for (i = nhidrep[PARSMOD]; i < noutrep[modi]; i++)
      errsum[modi] += fabs (outrep[modi][i] - tgtrep[modi][i]);
}


/*********************  results  ******************************/

void
write_error (fp)
/* write the average error per output unit for each network in the simufile */
     FILE *fp;			/* simufile pointer */
{
  fprintf (fp, " %f",
	   reszero (errsum[PARSMOD], nout[PARSMOD] * noutrep[PARSMOD]));
  if (!parseronly)
    fprintf (fp, " %f %f %f",
	     reszero (errsum[SEGMOD], nout[SEGMOD] * nhidrep[PARSMOD]),
	     reszero (errsum[CONTMOD], nout[CONTMOD] * NCONTROL),
	     reszero (errsum[STACKMOD], nout[STACKMOD] * noutrep[STACKMOD]));
  fprintf (fp, "\n");
}


void
print_summary (epoch)
/* print the average errors etc for each network in this snapshot */
     int epoch;			/* snapshot epoch */
{
  printf ("\n%s (%d sentences), Epoch %d:\n", current_inpfile, nsents, epoch);
  if (!parseronly)
    {
      printf ("  INPUT SEGMNT  CONTRL CORRCL STACK    OUTPUT  ERROR WITHIN CORRCT\n");
      printf ("AVERAGE:%6.3f %6.3f %6.1f %6.3f           %6.3f %6.1f %6.1f\n",
	      reszero (errsum[SEGMOD], nout[SEGMOD] * nhidrep[PARSMOD]),
	      reszero (errsum[CONTMOD], nout[CONTMOD] * NCONTROL),
	      reszero (100.0 * ncorrect[CONTMOD], nout[CONTMOD] * NCONTROL),
	      reszero (errsum[STACKMOD], nout[STACKMOD] * nhidrep[PARSMOD]),
	      reszero (errsum[PARSMOD], nout[PARSMOD] * noutrep[PARSMOD]),
	      reszero (100.0 * nwithin[PARSMOD],
		       nout[PARSMOD] * noutrep[PARSMOD]),
	      reszero (100.0 * ncorrect[PARSMOD],
		       nout[PARSMOD] * noutputs[PARSMOD]));
    }
  else
    {
      printf ("  INPUT    OUTPUT  ERROR WITHIN CORRCT\n");
      printf ("AVERAGE:          %6.3f %6.1f %6.1f\n",
	      reszero (errsum[PARSMOD], nout[PARSMOD] * noutrep[PARSMOD]),
	      reszero (100.0 * nwithin[PARSMOD],
		       nout[PARSMOD] * noutrep[PARSMOD]),
	      reszero (100.0 * ncorrect[PARSMOD],
		       nout[PARSMOD] * noutputs[PARSMOD]));
    }
}


void
print_through_sentence ()
/* print the errors at each position in the sentence */
{
  int i, j;

  for (i = 0; i < sents[0].nfrag; i++)
    {
      for (j = 0; j < imax (sents[0].frag[i].nseq, noutputs[PARSMOD]); j++)
	{
	  if (j < sents[0].frag[i].nseq)
	    {
	      printf (" %6s",
		      words[sents[0].frag[i].inps[j]].chars);
	      if (!parseronly)
		{
		  printf (" %6.3f", reszero (segerrsum_seq[i][j],
					     nsents * nhidrep[PARSMOD]));
		  printf (" %6.3f", reszero (conterrsum_seq[i][j],
					     nsents * NCONTROL));
		  printf (" %6.1f", reszero (100.0 * ncontcorrect_seq[i][j],
					     nsents * NCONTROL));
		}
	    }
	  else
	    {
	      if (!parseronly)
		printf ("                            ");
	      else
		printf ("       ");
	    }

	  if (!parseronly)
	    if (stackerrsum_frags[i] != 0.0 && j == sents[0].frag[i].nseq - 1)
	      printf (" %6.3f", reszero (stackerrsum_frags[i],
					 nsents * nhidrep[PARSMOD]));
	    else
	      printf ("       ");
	  printf ("   ");

	  if (j < noutputs[PARSMOD])
	    {
	      printf (" %6s", words[sents[0].frag[i].tgts[j]].chars);
	      if (sents[0].frag[i].outc > 0)
		{
		  printf (" %6.3f", reszero (parserrsum_frags[i][j],
					     nsents * nwordrep));
		  printf (" %6.1f", reszero (100.0 * nparswithin_frags[i][j],
					     nsents * nwordrep));
		  printf (" %6.1f", reszero (100.0 * nparscorrect_frags[i][j],
					     nsents));
		}
	    }
	  printf ("\n");
	}
      printf ("\n");
    }
}


static double
reszero (num, den)
/* if no data was collected, should print 0 instead of crashing */
     double num;			/* numerator */
     int den;				/* denominator */
{
  if (den == 0.0)
    return (0.0);
  else
    return (num / (double) den);
}
