from stable_baselines import A2C
model = A2C("MlpPolicy", "CartPole-v1")
model.learn(50000)
model.save("a2c_cartpole")
Ashley Hill, Maximilian Ernestus, Adam Gleave, Anssi Kanervisto
https://github.com/hill-a/stable-baselines
https://github.com/DLR-RM/stable-baselines3
Stable-Baselines (SB2)
Stable-Baselines3 (SB3)
import gym
from stable_baselines3 import SAC
# Train an agent using Soft Actor-Critic on Pendulum-v0
env = gym.make("Pendulum-v0")
model = SAC("MlpPolicy", env, verbose=1)
# Train the model
model.learn(total_timesteps=20000)
# Save the model
model.save("sac_pendulum")
# Load the trained model
model = SAC.load("sac_pendulum")
# Start a new episode
obs = env.reset()
# What action to take in state `obs`?
action, _ = model.predict(obs, deterministic=True)
https://github.com/DLR-RM/rl-baselines3-zoo
# Train an A2C agent on Atari breakout using tuned hyperparameters,
# evaluate the agent every 10k steps and save a checkpoint every 50k steps
python train.py --algo a2c --env BreakoutNoFrameskip-v4 \
--eval-freq 10000 --save-freq 50000
# Plot the learning curve
python scripts/all_plots.py -a a2c -e BreakoutNoFrameskip-v4 -f logs/
https://github.com/araffin/rl-handson-rlvs21
Network Architecture