1
Reproducible, Reusable, and Robust Reinforcement Learning
2
On the State of the Art of Evaluation in Neural Language Models
1
Random Search for Hyper-Parameter Optimization
2
Algorithms for Hyper-Parameter Optimization
Used in Google Vizier
Prune if the trial’s best intermediate result is worse than median of intermediate results of previous trials at the same step.
Note: Automatic hyperparameter tuning is included in the RL Zoo
import optuna
def sample_ppo_params(trial: optuna.Trial) -> Dict[str, Any]:
"""Sampler for PPO hyperparameters."""
# Sample from a list of choices (discrete)
activation_fn = trial.suggest_categorical("activation_fn", ["tanh", "relu"])
# Sample an integer in [low, high]
n_steps = trial.suggest_int("n_steps", 64, 2048)
# Sample a float in [low, high)
# (using log uniform distribution)
learning_rate = trial.suggest_float("lr", 1e-5, 1, log=True)
return {
"activation_fn": activation_fn,
"n_steps": n_steps,
"learning_rate": learning_rate,
}
from stable_baselines3.common.callbacks import BaseCallback
class TrialEvalCallback(BaseCallback):
"""Callback used for evaluating and reporting a trial."""
def _on_step(self) -> bool:
if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
# Evaluate the current policy
mean_reward, _ = evaluate_policy(self.model, self.eval_env)
self.eval_idx += 1
# Send report to Optuna
self.trial.report(mean_reward, self.eval_idx)
# Prune trial if needed
if self.trial.should_prune():
self.is_pruned = True
return False
return True
def objective(trial: optuna.Trial) -> float:
...
# Sample hyperparameters
DEFAULT_HYPERPARAMS.update(sample_ppo_params(trial))
# Create the RL model
model = PPO(**kwargs)
# Create the callback that will periodically evaluate
# and report the performance
eval_callback = TrialEvalCallback(
eval_env,
trial,
N_EVAL_EPISODES,
EVAL_FREQ,
deterministic=True,
)
model.learn(N_TIMESTEPS, callback=eval_callback)
if eval_callback.is_pruned:
raise optuna.exceptions.TrialPruned()
return eval_callback.last_mean_reward
from optuna.pruners import MedianPruner
from optuna.samplers import TPESampler, RandomSampler
# Select the sampler, can be random, TPESampler, CMAES, ...
sampler = TPESampler(n_startup_trials=5)
# Do not prune before 1/3 of the max budget is used
pruner = MedianPruner(n_startup_trials=5, n_warmup_steps=N_EVALUATIONS // 3)
# Create the study and start the hyperparameter optimization
study = optuna.create_study(sampler=sampler, pruner=pruner, direction="maximize")
# This script can be launch in parallel when using a database
study.optimize(objective, n_trials=N_TRIALS, timeout=TIMEOUT)
# Best result
best_trial = study.best_trial
# Pandas dataframe with all the results
study.trials_dataframe().to_csv("study_results_ppo.csv")
# Plot utils
plot_optimization_history(study)
Practical session with Colab notebook