/******************************************************************************** * Based on MountainCar.cc, created by Adam White, created on March 29 2007. * * Episodic Task * Reward: -1 per step * Actions: Discrete * 0 - coast * 1 - left * 2 - right * 3 - down * 4 - up * * State: 3D Continuous * car's x-position (-1.2 to .6) * car's y-position (-1.2 to .6) * car's x-velocity (-.7 to .7) * car's y-velocity (-.7 to .7) * ********************************************************************************/ #include "MountainCar3DSym.h" #include Task_specification env_init() { //std::cout << "env_init()" << std::endl; current_observation.numInts =0; current_observation.intArray =(int*)malloc(sizeof(int)*current_observation.numInts); memset(current_observation.intArray, 0, sizeof(int)*current_observation.numInts); current_observation.numDoubles = state_size; current_observation.doubleArray = (double*)malloc(sizeof(double)*current_observation.numDoubles); memset(current_observation.doubleArray, 0, sizeof(double)*current_observation.numDoubles); ro.o.numInts =0; ro.o.intArray =(int*)malloc(sizeof(int)*ro.o.numInts); memset(ro.o.intArray, 0, sizeof(int)*ro.o.numInts); ro.o.numDoubles = state_size; ro.o.doubleArray = (double*)malloc(sizeof(double)*ro.o.numDoubles); memset(ro.o.doubleArray, 0, sizeof(double)*ro.o.numDoubles); ro.terminal = 0; ro.r = 0.0; env_task_spec = new char[1000]; char position[200], velocity[200]; sprintf(position,"[%f,%f]",mcar_min_position,mcar_max_position); sprintf(velocity,"[%f,%f]",-mcar_max_velocity,mcar_max_velocity); sprintf(env_task_spec,"1:e:%d_[f,f,f,f]_%s_%s_%s_%s:1_[i]_[0,%d]", state_size,position,position,velocity,velocity,num_actions-1); m_offset = float(rand()) / (float)RAND_MAX; m_offset -= 0.5; m_offset /= 100.0; std::cout << "MountainCar3DSym offset: " << m_offset << std::endl; return env_task_spec; } Observation env_start() { iNumEnvEpisodes++; set_initial_position_at_bottom(); current_observation.doubleArray[0] = mcar_Xposition; current_observation.doubleArray[1] = mcar_Yposition; current_observation.doubleArray[2] = mcar_Xvelocity; current_observation.doubleArray[3] = mcar_Yvelocity; copyRLStruct(ro.o, current_observation); ro.r = 0.0; ro.terminal =0; current_num_steps = 0; return current_observation; } Reward_observation env_step(Action a) { current_num_steps++; //std::cout << std::endl << "In MountainCar3DSym, taking env_step(): " << current_num_steps << " " << mcar_Xposition << " " << mcar_Yposition << " " << mcar_Xvelocity << " " << mcar_Yvelocity << " " << a.intArray[0] << std::endl << std::endl << std::endl; //fflush(stdout); // exit(1); update_velocity(a); ////std::cout << mcar_Xposition << " " << mcar_Yposition << " " // << mcar_Xvelocity << " " << mcar_Yvelocity << std::endl; ro.r = -1; //ro.terminal = 0; test_termination(); current_observation.doubleArray[0] = mcar_Xposition; current_observation.doubleArray[1] = mcar_Yposition; current_observation.doubleArray[2] = mcar_Xvelocity; current_observation.doubleArray[3] = mcar_Yvelocity; copyRLStruct(ro.o, current_observation); return ro; } void env_cleanup() { free(current_observation.doubleArray); free(ro.o.doubleArray); } State_key env_get_state(){ State_key sk; printf("Warning, State_keys not implemented in this model\n"); return sk; } void env_set_state(State_key sk){ printf("Warning, State_keys not implemented in this model\n"); } Random_seed_key env_get_random_seed(){ Random_seed_key rsk; printf("Warning, Random_seed_keys not implemented in this model\n"); return rsk; } void env_set_random_seed(Random_seed_key rsk){ printf("Warning, Random_seed_keys not implemented in this model\n"); } Message env_message(Message M){ exit(1); return M; //"There are no Messages accepted by MountainCar env_message currently\n"; } //Helper functions ---------------------------------------------- void test_termination() { if ((mcar_Xposition >= mcar_goal_position) && (mcar_Yposition >= mcar_goal_position)) ro.terminal = 1; //True else if(current_num_steps > max_steps) ro.terminal = 1; //true } void set_initial_position_at_bottom() { mcar_Xposition = -M_PI/6.0 + m_offset; mcar_Yposition = -M_PI/6.0 + m_offset; mcar_Xvelocity = 0.0; mcar_Yvelocity = 0.0; } void update_velocity(Action action) { int act = action.intArray[0]; switch (act) { case 0: mcar_Xvelocity += cos(3*mcar_Xposition)*(-0.0025); mcar_Yvelocity += cos(3*mcar_Yposition)*(-0.0025); break; case 1: mcar_Xvelocity += -0.0007 + cos(3*mcar_Xposition)*(-0.0025); mcar_Yvelocity += cos(3*mcar_Yposition)*(-0.0025); break; case 2: mcar_Xvelocity += +0.0007 + cos(3*mcar_Xposition)*(-0.0025); mcar_Yvelocity += cos(3*mcar_Yposition)*(-0.0025); break; case 3: mcar_Xvelocity += cos(3*mcar_Xposition)*(-0.0025); mcar_Yvelocity += -0.0007 + cos(3*mcar_Yposition)*(-0.0025); break; case 4: mcar_Xvelocity += cos(3*mcar_Xposition)*(-0.0025); mcar_Yvelocity += +0.0007 + cos(3*mcar_Yposition)*(-0.0025); break; } //mcar_Xvelocity *= get_gaussian(1.0,std_dev_eff); //mcar_Yvelocity *= get_gaussian(1.0,std_dev_eff); if (mcar_Xvelocity > mcar_max_velocity) mcar_Xvelocity = mcar_max_velocity; else if (mcar_Xvelocity < -mcar_max_velocity) mcar_Xvelocity = -mcar_max_velocity; if (mcar_Yvelocity > mcar_max_velocity) mcar_Yvelocity = mcar_max_velocity; else if (mcar_Yvelocity < -mcar_max_velocity) mcar_Yvelocity = -mcar_max_velocity; update_position(); } void update_position() { mcar_Xposition += mcar_Xvelocity; mcar_Yposition += mcar_Yvelocity; if (mcar_Xposition > mcar_max_position) mcar_Xposition = mcar_max_position; if (mcar_Xposition < mcar_min_position) mcar_Xposition = mcar_min_position; if (mcar_Xposition==mcar_max_position && mcar_Xvelocity>0) mcar_Xvelocity = 0; if (mcar_Xposition==mcar_min_position && mcar_Xvelocity<0) mcar_Xvelocity = 0; if (mcar_Yposition > mcar_max_position) mcar_Yposition = mcar_max_position; if (mcar_Yposition < mcar_min_position) mcar_Yposition = mcar_min_position; if (mcar_Yposition==mcar_max_position && mcar_Yvelocity>0) mcar_Yvelocity = 0; if (mcar_Yposition==mcar_min_position && mcar_Yvelocity<0) mcar_Yvelocity = 0; }