package pacman.entries.pacman; import java.util.Random; import pacman.game.Game; import pacman.game.Constants.MOVE; /** * SARSA(lambda) with function approximation. */ public class SarsaPacMan extends BasicRLPacMan { private Random rng = new Random(); private FeatureSet prototype; // Class to use private QFunction Qfunction; // Learned policy private MOVE[] actions; // Actions possible in the current state private double[] qvalues; // Q-values for actions in the current state private FeatureSet[] features; // Features for actions in the current state private int lastScore; // Last known game score private int bestActionIndex; // Index of current best action private int lastActionIndex; // Index of action actually being taken private boolean testMode; // Don't explore or learn or take advice? private boolean doUpdate; // Perform a delayed gradient-descent update? private double delta1; // First part of delayed update: r-Q(s,a) private double delta2; // Second part of delayed update: yQ(s',a') private double EPSILON = 0.05; // Exploration rate private double ALPHA = 0.001; // Learning rate private double GAMMA = 0.999; // Discount rate private double LAMBDA = 0.9; // Backup weighting /** Initialize the policy. */ public SarsaPacMan(FeatureSet proto) { prototype = proto; Qfunction = new QFunction(prototype); } /** Prepare for the first move. */ public void startEpisode(Game game, boolean testMode) { this.testMode = testMode; lastScore = 0; Qfunction.clearTraces(); doUpdate = false; delta1 = 0; delta2 = 0; evaluateMoves(game); } /** Choose a move. */ public MOVE getMove(Game game, long timeDue) { return actions[lastActionIndex]; } /** Override the move choice. */ public void setMove(MOVE move) { lastActionIndex = -1; for (int i=0; i qvalues[bestActionIndex]) bestActionIndex = i; // Explore or exploit if (!testMode && rng.nextDouble() < EPSILON) lastActionIndex = rng.nextInt(actions.length); else lastActionIndex = bestActionIndex; } /** Get the current possible moves. */ public MOVE[] getMoves() { return actions; } /** Get the current Q-value array. */ public double[] getQValues() { return qvalues; } /** Get the current features for an action. */ public FeatureSet getFeatures(MOVE move) { int actionIndex = -1; for (int i=0; i