#ifndef RL_PLAYER
#define RL_PLAYER

#include "KeepawayPlayer.h"
#include "tiles2.h"

#define RL_MAX_ACTIONS 10
#define RL_MEMORY_SIZE 1048576
#define RL_MAX_NONZERO_TRACES 100000
#define RL_MAX_NUM_TILINGS 6000

class RLPlayer:public KeepawayPlayer
{
  char weightsFile[256];
  bool bLearning;
  bool bSaveWeights;
  
  int epochNum;

  double alpha;
  double gamma;
  double lambda;
  double epsilon;

  int numActions;
  double tileWidths[ MAX_STATE_VARS ];
  double Q[ RL_MAX_ACTIONS ];

  double weights[ RL_MEMORY_SIZE ];
  double traces[ RL_MEMORY_SIZE ];

  int tiles[ RL_MAX_ACTIONS ][ RL_MAX_NUM_TILINGS ];
  int numTilings;

  double minimumTrace;
  int nonzeroTraces[ RL_MAX_NONZERO_TRACES ];
  int numNonzeroTraces;
  int nonzeroTracesInverse[ RL_MEMORY_SIZE ];

  collision_table *colTab;

  int  selectAction();
  void initializeTileWidths();
  bool loadWeights( char *filename );
  bool saveWeights( char *filename );
  bool calculateStateVars();
  double computeQ( int a );
  int  argmaxQ();
  void updateWeights( double delta );
  void loadTiles( double state[], int numFeatures );

  void clearTrace( int f );
  void clearExistentTrace( int f, int loc );
  void decayTraces( double decayRate );
  void setTrace( int f, float newTraceValue );
  void increaseMinTrace();

 public:
  RLPlayer                          ( ActHandler     *a,
				      WorldModel     *wm,
				      ServerSettings *ss,
				      PlayerSettings *cs,
				      char           *strTeamName,
				      char           *loadWeightsFile,
				      char           *saveWeightsFile,
				      bool           bLearn,
				      int            iNumKeepers,
				      int            iNumTakers,
				      double         dVersion,
				      int            iReconnect = -1   );

  int SMDP_startEpisode( double state[], int numFeatures );
  int SMDP_step( double reward, double state[], int numFeatures );
  void SMDP_endEpisode( double reward );
} ;

#endif
