import java.util.Random; import java.lang.Math; public class Gridworld { int XMAX = 25; int YMAX = 25; int NUMFEATURES = 3; int NUMACTIONS = 3; int OPPRANDBACK = 5; int OPPRANDSIDE = 10; double WIN = 50.0; double CAUGHT = 0.0; double M1 = 5.0; double M2 = 0; double M3 = 0; int iPlayerX; int iPlayerY; int iOppX; int iOppY; Random rand; public Gridworld(){ rand = new Random(); } private void printWekaHeader(){ System.out.println("*@relation gridworld"); System.out.println("*@attribute Dist numeric"); System.out.println("*@attribute LAng numeric"); System.out.println("*@attribute RAng numeric"); System.out.println("*@attribute Action {0,1,2}"); System.out.println("*@data"); } //get the angle between the left goal corner, the player, and the opponent // (i.e. the open lane from player to this corner) private double getULAngle(){ if (iPlayerY >= iOppY){ if (iPlayerX <= iOppX) return 180.0; else return 0; } //else player is below opponent //player to Left Edge double dCXDelta = 0 - iPlayerX; double dCYDelta = 0; double dCLen = Math.sqrt(dCXDelta * dCXDelta + dCYDelta * dCYDelta); //player to Opp double dOXDelta = iOppX - iPlayerX; double dOYDelta = iOppY - iPlayerY; double dOLen = Math.sqrt(dOXDelta * dOXDelta + dOYDelta * dOYDelta); //get ang(Opponent, Player, Side) double dotProd = (dCXDelta * dOXDelta + dCYDelta * dOYDelta) / (dCLen * dOLen); double dAngOPS = Math.toDegrees(Math.acos(dotProd)); return dAngOPS; } private double getURAngle(){ if (iPlayerY >= iOppY){ if (iPlayerX >= iOppX) return 180.0; else return 0; } //else player is below opponent //player to Right Edge double dCXDelta = XMAX - iPlayerX; double dCYDelta = 0; double dCLen = Math.sqrt(dCXDelta * dCXDelta + dCYDelta * dCYDelta); //player to Opp double dOXDelta = iOppX - iPlayerX; double dOYDelta = iOppY - iPlayerY; double dOLen = Math.sqrt(dOXDelta * dOXDelta + dOYDelta * dOYDelta); //get ang(Opponent, Player, Side) double dotProd = (dCXDelta * dOXDelta + dCYDelta * dOYDelta) / (dCLen * dOLen); double dAngOPS = Math.toDegrees(Math.acos(dotProd)); return dAngOPS; } //Y dist from player to opponent //YMAX if player is behind opponent private double getForwardSensor(){ //return (iOppY - iPlayerY); double dXD = iPlayerX - iOppX; double dYD = iPlayerY - iOppY; return Math.sqrt(dXD * dXD + dYD * dYD); } public void initEpisode(){ iPlayerX = XMAX / 2; iPlayerY = 0; iOppX = (rand.nextInt(10) - 5) + XMAX/2; iOppY = YMAX - (rand.nextInt(1)); } public State getState(){ State s = new State(NUMFEATURES,NUMACTIONS); s.validActions[0] = true; if (iPlayerX > 1) s.validActions[1] = true; if (iPlayerX < XMAX-1) s.validActions[2] = true; s.features[0] = getForwardSensor() * 1000.0; s.features[1] = getULAngle() * 1000.0; s.features[2] = getURAngle() * 1000.0; s.features[0] = (int) s.features[0]; s.features[0] /= 1000; s.features[1] = (int) s.features[1]; s.features[1] /= 1000; s.features[2] = (int) s.features[2]; s.features[2] /= 1000; return s; } public double moveOpponent(double oldReward){ if (iOppY >= iPlayerY){ if (iOppY > iPlayerY) //oppent above player iOppY--; if (rand.nextInt(OPPRANDSIDE) != 1){ if (iOppX > iPlayerX) iOppX--; else if (iOppX < iPlayerX) iOppX++; } } else { //openet below player if (iOppX == iPlayerX){ //oponent only needs to move up if (rand.nextInt(OPPRANDBACK) != 1) iOppY++; } else { //opoent needs to move up and in X. 4 cases: //don't move //move X //move Y //move X & Y if (rand.nextInt(2) == 1){ //move X first if (rand.nextInt(OPPRANDSIDE) != 1){ if (iOppX > iPlayerX) iOppX--; else if (iOppX < iPlayerX) iOppX++; //now try to move Y if (rand.nextInt(OPPRANDBACK) != 1) iOppY++; } } else { //move Y first if (rand.nextInt(OPPRANDBACK) != 1){ iOppY++; if (rand.nextInt(OPPRANDSIDE) != 1){ if (iOppX > iPlayerX) iOppX--; else if (iOppX < iPlayerX) iOppX++; } } } } } if (iPlayerX == iOppX && iPlayerY == iOppY) return CAUGHT; return oldReward; } //Update the player's position and return the reward for getting into the new state public double movePlayer(int iMove){ double r = 0; if (iMove == 1){ assert(iPlayerX > 1); iPlayerX -= 2; iPlayerY++; r = M2; } else if (iMove == 2){ assert (iPlayerX < XMAX - 1); iPlayerX += 2; iPlayerY++; r = M3; } else { iPlayerY++; r = M1; } if (iPlayerY == YMAX) r = WIN; return r; } public boolean notDone(){ if (iPlayerY == YMAX) return false; if ((iPlayerX == iOppX) && (iPlayerY == iOppY)) return false; return true; } public void print(){ System.out.println(); for (int y=YMAX; y>=0; y--){ for (int x=0; x<=XMAX; x++) if ((x == iPlayerX) && (y == iPlayerY)){ if ((x == iOppX) && (y==iOppY)) System.out.print("!"); else System.out.print("*"); } else if ((x == iOppX) && (y==iOppY)) System.out.print("%"); else System.out.print("-"); System.out.println(); } System.out.print("Dist: " + getForwardSensor() + ", LAng: " + getULAngle() + ", RAng: " + getURAngle()); System.out.println(); } public static void main(String args[]) { int NUMEPISODES = 25000; int STARTPRINTING= 100000; //50000; double ALPHA = 0.1; int EPSILON = 10; int PRINTTARGET = 0; int dAveOver = 0; Agent player = new Agent(ALPHA, EPSILON, PRINTTARGET); int iMove = -1; double dReward = -1; Gridworld domain = new Gridworld(); double dEpisodeTotal = 0; player.testing(true); for (int i=-999; i<=NUMEPISODES; i++){ if (i==1) player.testing(false); dEpisodeTotal = 0.0; domain.initEpisode(); iMove = player.step(domain.getState(), 0.0); dReward = domain.movePlayer(iMove); dReward = domain.moveOpponent(dReward); dEpisodeTotal += dReward; while (domain.notDone()){ iMove = player.step(domain.getState(), dReward); dReward = domain.movePlayer(iMove); dReward = domain.moveOpponent(dReward); dEpisodeTotal += dReward; } player.endEpisode(domain.getState(), dReward); dEpisodeTotal += dReward; dAveOver += dEpisodeTotal; if (i % 1000 == 0 && i >= 0){ dAveOver = 0; } if (STARTPRINTING > 0) if (i == STARTPRINTING){ player.doPrint(true); domain.printWekaHeader(); } } player.printCMAC(); } }