Gym wrapper
A gym wrapper for the source-tracking POMDP is provided as part of OTTO. OpenAI Gym is the de facto standard for environment simulators, and is compatible with general-purpose reinforcement learning libraries such as Stable Baselines 3, OpenAI Baselines, RLlib, CleanRL, ChainerRL, and PFRL.
We provide below a script that illustrates how to use the gym wrapper. The script can be found in docs/scripts.
"""How to use the gym environment for the source-tracking POMDP."""
import os
import sys
sys.path.insert(1, os.path.abspath(os.path.join(sys.path[0], '..', '..')))
from otto.classes.sourcetracking import SourceTracking
from otto.classes.gymwrapper import GymWrapper
def run():
mypomdp = SourceTracking(
Ndim=1,
lambda_over_dx=1.0,
R_dt=2,
draw_source=True,
)
myenv = GymWrapper(sim=mypomdp)
# Execute random policy
done = False
cum_reward = 0.0
observation = myenv.reset()
print("---- t=0, observation=" + str(observation))
while not done:
action = myenv.action_space.sample()
observation, reward, done, info = myenv.step(action)
cum_reward += reward
print(
"---- t=" + str(myenv.t)
+ ", action=" + str(action)
+ ", hit=" + str(info["hit"])
+ ", reward=" + str(reward)
+ ", return=" + str(cum_reward)
+ ", timeout=" + str(info["timeout"])
+ ", found= " + str(info["found"])
+ ", observation=" + str(observation)
)
print("Done. Return=" + str(cum_reward))
if __name__ == "__main__":
run()