# Abstraction that allows multimodal networks # and regular networks to be handled the same way. # ModeArbitrator.tz (c) 2010 Jacob Schrum. # See Simulation.tz for more copyright information @include "Constants.tz". @include "FreeNetwork.tz". # It would make sense for ModeArbitrator # to be a super class of FreeNetAgent, but # this is not done because it has the potential # to mess up writing networks to files, # and could cause backwards compatibility issues. Abstract : ModeArbitrator { + variables: nodes-per-policy (int). non-deterministic (int). use-preference-nodes (int). brain-pool-chooses-one-rep (int). # Trivial archiving, since state is simple + to archive: return 1. + to dearchive: return 1. + to init: + to set-nodes-per-policy to num (int): nodes-per-policy = num. + to set-non-deterministic to bit (int): non-deterministic = bit. + to set-use-preference-nodes to bit (int): use-preference-nodes = bit. + to set-params command commandline (object): if (commandline bit-param named "action-selector-net") : nodes-per-policy = OUTPUTS_ACTION_SELECTOR. else nodes-per-policy = OUTPUTS_STANDARD. non-deterministic = (commandline bit-param named "non-deterministic-mode-selection"). if (commandline bit-param named "predefined-network-modes") : use-preference-nodes = 0. else use-preference-nodes = 1. brain-pool-chooses-one-rep = (commandline bit-param named "best-of-brain-pool"). + to net-outputs-of outputs out (list) num-modes modes = -1 (int): chosen-mode (int). if use-preference-nodes : { if non-deterministic : { chosen-mode = (self probabilistic-mode-choice outputs out num-modes modes). } else { chosen-mode = (self get-preferred-mode outputs out num-modes modes). } } else { # One mode per game task chosen-mode = (controller get-current-game-mode-counter). } return {chosen-mode, (self mode-outputs-of outputs out mode chosen-mode)}. + to mode-outputs-of outputs out (list) mode m (int): policy-outputs (list). i (int). policy-outputs = {}. for i = 0, i < nodes-per-policy, i++ : { push (out{ ((m * (nodes-per-policy + use-preference-nodes)) + i) }) onto policy-outputs. } return policy-outputs. + to get-preferred-mode outputs out (list) num-modes modes = -1 (int): i (int). best-value (double). best-mode (int). temp (double). if (modes == -1) : modes = (self get-num-output-modes outputs out). best-value = -INFINITY. for i = 0, i < modes, i++ : { temp = (self preference-of outputs out mode i). if (temp > best-value) : { best-value = temp. best-mode = i. } } return best-mode. + to get-num-output-modes-from-net network net (object): return (self get-num-output-modes-from-num outputs (net get-number-outputs)). + to get-num-output-modes outputs out (list): return (self get-num-output-modes-from-num outputs ( |out| )). + to get-num-output-modes-from-num outputs num-out (int): # The networks outputs are brain-pool choices, not modes if brain-pool-chooses-one-rep: return 1. if (num-out == nodes-per-policy) : num-out = num-out + use-preference-nodes. return ( num-out / (nodes-per-policy + use-preference-nodes) ). + to preference-of outputs out (list) mode m (int): if ( (|out| == nodes-per-policy) && (m == 0) ) : return 1.0. else return ( out{ ((m * (nodes-per-policy + use-preference-nodes)) + nodes-per-policy) } ). + to get-mode-probabilities outputs out (list) num-modes modes = -1 (int): i (int). probabilities (list). sum (double). temp (double). # Assure non-zero sum sum = 0.001. if (modes == -1) : modes = (self get-num-output-modes outputs out). probabilities = {}. for i = 0, i < modes, i++ : { temp = (E ^ (self preference-of outputs out mode i)). push temp onto probabilities. sum += temp. } for i = 0, i < |probabilities|, i++ : { probabilities{i} = (probabilities{i} / sum). } return probabilities. + to probabilistic-mode-choice outputs out (list) num-modes modes = -1 (int): probabilities (list). choice (double). chosen-mode (int). probabilities = (self get-mode-probabilities outputs out num-modes modes). choice = random[ 1.0 ]. chosen-mode = 0. while ( (probabilities{ chosen-mode } < choice) && (chosen-mode < ( |probabilities| - 1 )) ) : { choice -= probabilities{ chosen-mode }. chosen-mode++. } return chosen-mode. }