import java.util.Random; import java.lang.Math; public class Gridworld { //features: dist taker, angJump1, distJump1, distJump1Taker, angJump2, distJump2, distJump2Taker static int NUMFEATURES = 7; static int NUMACTIONS = 3; static double OPPSLOWER = 2.9; static double HOLDOPPDIST = 0.8; static double dXCENTER = 0.0; static double dYCENTER = 9.5; static double SMALL = 0.001; //possible moves double dXMove[]; double dYMove[]; double dAngMove[]; double dXT; double dYT; double dXP; double dYP; Random rand; public Gridworld(){ rand = new Random(); dXMove = new double[2]; dYMove = new double[2]; dAngMove = new double[2]; } //make the X,Y positions of the player and taker //fit to a grid, rather than being continuous public void discritizeSpace(){ int iTmp; iTmp = (int)Math.round(dXT * 100.0); dXT = (double)iTmp / 100.0; iTmp = (int)Math.round(dYT * 100.0); dYT = (double)iTmp / 100.0; iTmp = (int)Math.round(dXP * 100.0); dXP = (double)iTmp / 100.0; iTmp = (int) Math.round(dYP * 100.0); dYP = (double)iTmp / 100.0; } public void printDomain(){ System.out.println("player: (" + dXP + ", " + dYP + "), taker: (" + dXT + ", " + dYT + ")"); } private void printWekaHeader(){ System.out.println("*@relation gridworld"); System.out.println("*@attribute s1 numeric"); System.out.println("*@attribute s2 numeric"); System.out.println("*@attribute s3 numeric"); System.out.println("*@attribute s4 numeric"); System.out.println("*@attribute s5 numeric"); System.out.println("*@attribute s6 numeric"); System.out.println("*@attribute s7 numeric"); System.out.println("*@attribute Action {0,1,2}"); System.out.println("*@data"); } public void initEpisode(){ dXT = 9.2; //about 13.0 away dYT = 9.2; dXP = 0.0; dYP = 0.0; } public State getState(){ //features: dist taker, angJump1, distJump1, distJump1Taker, angJump2, distJump2, distJump2Taker State state = new State(NUMFEATURES,NUMACTIONS); double dX = dXP-dXT; double dY = dYP-dYT; state.features[0] = Math.sqrt(dX*dX + dY*dY); for (int i=0; i<2; i++){ state.features[1+(i*3)] = dAngMove[i]; dX = dXMove[i] - dXP; dY = dYMove[i] - dYP; state.features[2+(i*3)] = Math.sqrt(dX*dX + dY*dY); //distance to move 1 position dX = dXMove[i] - dXT; dY = dYMove[i] - dYT; state.features[3+(i*3)] = Math.sqrt(dX*dX + dY*dY); //distance from taker to move 1 position } return state; } private void selectMoves(){ //find 2 points on the circle that are at least 1.0 away int iNumFound = 0; double dX = dXT-dXP; double dY = dYT-dYP; double dTakerDist = Math.sqrt(dX*dX + dY*dY); while (iNumFound < 2){ //distance: 15.7, */- 4.5 double dRandom = rand.nextGaussian(); //mean 0.0, stdev 1.0; dRandom *= 4.5; //now stdev = 2.5 dRandom += 15.7; //dRandom = dYCENTER * 2.0 - .01; if (dRandom >= 1.0 && dRandom <= (dYCENTER*2)) { //this should be satisfiable //Now, given our coordinates, find another point on the circle that is dRandom distance away. //if player were at (0,0), we would want to satisfy: //x*x + (y-dYCENTER)*(y-dYCENTER) = dYCENTER*dYCENTER //x*x + y*y = dRandom*dRandom // //y = dist*dist / 2*r //y = (dRandom*dRandom) / (2*dYCENTER) double dNewY = (dRandom*dRandom) / (2*dYCENTER); double dNewX = Math.sqrt(dRandom*dRandom - (dNewY*dNewY)); if (rand.nextBoolean()) dNewX *= -1.0; //But, the player isn't at (0,0). We need to rotate, based on the current position and the center //Move dist dNewY towards Center //then move dist dNewX perpendicular to Center double dCenterX = (0.0 - dXP) / dYCENTER; double dCenterY = (dYCENTER - dYP) / dYCENTER; //now we have a unit vector //find the distance to move, going towards the center double dMag = dNewY; //((dCenterX*dNewX)+(dCenterY*dNewY)); if (dMag<0) dMag *= -1.0; double dDeltaX = dCenterX * dMag; double dDeltaY = dCenterY * dMag; //now find the distance to move going perp to center double dMag2 = dNewX; //(( dCenterY * dNewX)+(-dCenterX * dNewY)); //if (dMag2 < 0) // dMag2 *= -1.0; double dTX = dMag2 * dCenterY; double dTY = dMag2 * -dCenterX; dDeltaX += dTX; dDeltaY += dTY; dXMove[iNumFound] = dXP + dDeltaX; dYMove[iNumFound] = dYP + dDeltaY; if (dYMove[iNumFound] < 0) dYMove[iNumFound] = 0.0; //slight correction b/c of roundoff errors double dTmp = dYCENTER*dYCENTER - ((dYCENTER-dYMove[iNumFound])*(dYCENTER-dYMove[iNumFound])); dTmp = Math.sqrt(dTmp); if (dXMove[iNumFound] < 0) dXMove[iNumFound] = -dTmp; else dXMove[iNumFound] = dTmp; //calculate the angle between the opponent and this new position //cos ang = (a dot b)/(|A||B|) dTmp = dDeltaX * dX + dDeltaY * dY; dTmp /= (dRandom * dTakerDist); if (dTmp < 0) dTmp *= -1.0; dAngMove[iNumFound] = Math.toDegrees(Math.acos(dTmp)); //ok, now check and see if we're really on the circle if (Math.abs(dXMove[iNumFound]*dXMove[iNumFound] + ((dYCENTER-dYMove[iNumFound])*(dYCENTER-dYMove[iNumFound])) - (dYCENTER*dYCENTER)) > SMALL) System.out.println("ERROR - not on circle, still!"); else if (Double.isNaN(dXMove[iNumFound])) ; else if (Double.isNaN(dYMove[iNumFound])) ; else iNumFound++; } } //ok, now we've got two move positions, but they could be in the wrong order double dDist1 = (dXMove[0]-dXP)*(dXMove[0]-dXP) + (dYMove[0]-dYP)*(dYMove[0]-dYP); double dDist2 = (dXMove[1]-dXP)*(dXMove[1]-dXP) + (dYMove[1]-dYP)*(dYMove[1]-dYP); if (dDist1 > dDist2){ double dTmp = dXMove[0]; dXMove[0] = dXMove[1]; dXMove[1] = dTmp; dTmp = dYMove[0]; dYMove[0] = dYMove[1]; dYMove[1] = dTmp; } dDist1 = (dXMove[0]-dXP)*(dXMove[0]-dXP) + (dYMove[0]-dYP)*(dYMove[0]-dYP); dDist2 = (dXMove[1]-dXP)*(dXMove[1]-dXP) + (dYMove[1]-dYP)*(dYMove[1]-dYP); } public double getReward(int iMove){ if (iMove == 2){ return 1.0; } //reward is the distance of the move, divided by speed double dX = dXMove[iMove] - dXP; double dY = dYMove[iMove] - dYP; return (Math.sqrt(dX*dX + dY*dY) / OPPSLOWER); } //Update the opponent's position based on the move public boolean movePlayer(int iMove){ double dX = 0; double dY = 0; double dDistJump = HOLDOPPDIST * OPPSLOWER; //if hold, want opponent to move HOLDOPPDIST if (iMove < 2){ dX = dXMove[iMove] - dXP; dY = dYMove[iMove] - dYP; dDistJump = Math.sqrt(dX*dX + dY*dY); double dMXT = dXMove[iMove] - dXT; double dMYT = dYMove[iMove] - dYT; double dDist = Math.sqrt(dXT*dXT + dYT*dYT); //current distance to opponent double dMoveTDist = Math.sqrt(dMXT*dMXT + dMYT*dMYT); // current distance from target location to opponet //learned from 25x25 kwy with both handcoded policy and random passing policy if( (dAngMove[iMove] >= 36.74) && (dDistJump >= 14.68) && (dDist <= 5)){ ; } else if ( (dAngMove[iMove] >= 37.27) && (dDist >= 11.98)){ ; } else if ( (dAngMove[iMove] >= 28.58) && (dDist >= 8.69) && (dMoveTDist >= 12.64)){ ; } else if ( (dAngMove[iMove] >= 19.86) && (dDist >= 7.41) && (dDistJump >= 17.17) && (dMoveTDist <= 13.03)){ ; } else if ( (dDistJump <= 13.5) && (dDist >= 2.61) && (dDist <= 5.63) && (dAngMove[iMove] >= 63.25)){ ; } else if ( (dDist >= 6.51) && (dAngMove[iMove] >= 69.13) && (dAngMove[iMove] <= 86.06)){ ; } else if ( (dMoveTDist <= 8.62) && (dDistJump >= 9.01) && (dAngMove[iMove] >= 36.33)){ ; } else{ return false; } //The player made it to the new position dXP = dXMove[iMove]; dYP = dYMove[iMove]; } //opponent moves slower than player double dOppMoveDist = dDistJump / OPPSLOWER; dX = dXT - dXP; dY = dYT - dYP; double dDiv = Math.sqrt(dX*dX + dY*dY); dXT -= (dX/dDiv); dYT -= (dY/dDiv); discritizeSpace(); return true; } //randomized function to check to see if the player gets tagged public boolean notDone(){ double dX = dXP-dXT; double dY = dYP-dYT; double dDist = Math.sqrt(dX*dX + dY*dY); int iCheck=1; if (dDist < 0.3) iCheck = 50; else if (dDist < 0.5) iCheck = 20; else if (dDist < 1.0) iCheck = 10; int iTmp = rand.nextInt(100); if (iTmp < iCheck) return false; return true; } public void print(){ System.out.println(); } public static void main(String args[]) { int NUMEPISODES = 45000; int STARTPRINTING= 25000; double ALPHA = 0.1; int EPSILON = 10; int PRINTTARGET = 40000; int dAveOver = 0; Agent player = new Agent(ALPHA, EPSILON, PRINTTARGET); int iMove = -1; double dReward = -1; Gridworld domain = new Gridworld(); double dEpisodeTotal = 0; boolean playerOK = true; player.testing(true); for (int i=-999; i<=NUMEPISODES; i++){ if (i==1) player.testing(false); dEpisodeTotal = 0.0; domain.initEpisode(); domain.selectMoves(); iMove = player.step(domain.getState(), 0.0); dReward = domain.getReward(iMove); playerOK = domain.movePlayer(iMove); if (playerOK){ dEpisodeTotal += dReward; } while (domain.notDone() && playerOK){ domain.selectMoves(); iMove = player.step(domain.getState(), dReward); dReward = domain.getReward(iMove); playerOK = domain.movePlayer(iMove); if (playerOK){ dEpisodeTotal += dReward; } } player.endEpisode(domain.getState(), 0.0); dAveOver += dEpisodeTotal; if (i % 1000 == 0 && i >= 0){ System.out.print("Ave " + i + ": " + (dAveOver / (double)1000) + " "); player.printStats(); dAveOver = 0; } if (STARTPRINTING > 0) if (i == STARTPRINTING){ player.doPrint(true); System.exit(1); domain.printWekaHeader(); } } } }