Stable Baselines
v2.5.0
User Guide
Installation
Getting Started
Reinforcement Learning Resources
RL Algorithms
Examples
Vectorized Environments
Using Custom Environments
Custom Policy Network
Tensorboard Integration
RL Baselines Zoo
Pre-Training (Behavior Cloning)
RL Algorithms
Base RL Class
Policy Networks
A2C
ACER
ACKTR
DDPG
DQN
GAIL
HER
PPO1
PPO2
SAC
TRPO
Common
Probability Distributions
Tensorflow Utils
Command Utils
Schedules
Misc
Changelog
Projects
Plotting Results
Stable Baselines
Docs
»
Index
Edit on GitHub
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
R
|
S
|
T
|
V
|
W
A
A2C (class in stable_baselines.a2c)
ACER (class in stable_baselines.acer)
ACKTR (class in stable_baselines.acktr)
action_probability() (stable_baselines.a2c.A2C method)
(stable_baselines.acer.ACER method)
(stable_baselines.acktr.ACKTR method)
(stable_baselines.common.base_class.BaseRLModel method)
(stable_baselines.ddpg.DDPG method)
(stable_baselines.deepq.DQN method)
(stable_baselines.gail.GAIL method)
(stable_baselines.her.HER method)
(stable_baselines.ppo1.PPO1 method)
(stable_baselines.ppo2.PPO2 method)
(stable_baselines.sac.SAC method)
(stable_baselines.trpo_mpi.TRPO method)
ActorCriticPolicy (class in stable_baselines.common.policies)
adapt() (stable_baselines.ddpg.AdaptiveParamNoiseSpec method)
AdaptiveParamNoiseSpec (class in stable_baselines.ddpg)
arg_parser() (in module stable_baselines.common.cmd_util)
atari_arg_parser() (in module stable_baselines.common.cmd_util)
B
BaseRLModel (class in stable_baselines.common.base_class)
BernoulliProbabilityDistribution (class in stable_baselines.common.distributions)
BernoulliProbabilityDistributionType (class in stable_baselines.common.distributions)
C
CategoricalProbabilityDistribution (class in stable_baselines.common.distributions)
CategoricalProbabilityDistributionType (class in stable_baselines.common.distributions)
close() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
(stable_baselines.common.vec_env.VecFrameStack method)
(stable_baselines.common.vec_env.VecVideoRecorder method)
CnnLnLstmPolicy (class in stable_baselines.common.policies)
CnnLstmPolicy (class in stable_baselines.common.policies)
CnnPolicy (class in stable_baselines.common.policies)
(class in stable_baselines.ddpg)
(class in stable_baselines.deepq)
(class in stable_baselines.sac)
ConstantSchedule (class in stable_baselines.common.schedules)
conv2d() (in module stable_baselines.common.tf_util)
D
DataLoader (class in stable_baselines.gail)
DDPG (class in stable_baselines.ddpg)
DiagGaussianProbabilityDistribution (class in stable_baselines.common.distributions)
DiagGaussianProbabilityDistributionType (class in stable_baselines.common.distributions)
display_var_info() (in module stable_baselines.common.tf_util)
DQN (class in stable_baselines.deepq)
DummyVecEnv (class in stable_baselines.common.vec_env)
E
entropy() (stable_baselines.common.distributions.BernoulliProbabilityDistribution method)
(stable_baselines.common.distributions.CategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistribution method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.ProbabilityDistribution method)
env_method() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
ExpertDataset (class in stable_baselines.gail)
F
FeedForwardPolicy (class in stable_baselines.common.policies)
flatgrad() (in module stable_baselines.common.tf_util)
flatparam() (stable_baselines.common.distributions.BernoulliProbabilityDistribution method)
(stable_baselines.common.distributions.CategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistribution method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.ProbabilityDistribution method)
flattenallbut0() (in module stable_baselines.common.tf_util)
fromflat() (stable_baselines.common.distributions.BernoulliProbabilityDistribution class method)
(stable_baselines.common.distributions.CategoricalProbabilityDistribution class method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistribution class method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistribution class method)
function() (in module stable_baselines.common.tf_util)
G
GAIL (class in stable_baselines.gail)
generate_expert_traj() (in module stable_baselines.gail)
get_attr() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
get_available_gpus() (in module stable_baselines.common.tf_util)
get_env() (stable_baselines.a2c.A2C method)
(stable_baselines.acer.ACER method)
(stable_baselines.acktr.ACKTR method)
(stable_baselines.common.base_class.BaseRLModel method)
(stable_baselines.ddpg.DDPG method)
(stable_baselines.deepq.DQN method)
(stable_baselines.gail.GAIL method)
(stable_baselines.ppo1.PPO1 method)
(stable_baselines.ppo2.PPO2 method)
(stable_baselines.sac.SAC method)
(stable_baselines.trpo_mpi.TRPO method)
get_globals_vars() (in module stable_baselines.common.tf_util)
get_images() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
get_next_batch() (stable_baselines.gail.ExpertDataset method)
get_original_obs() (stable_baselines.common.vec_env.VecNormalize method)
get_stats() (stable_baselines.ddpg.AdaptiveParamNoiseSpec method)
get_trainable_vars() (in module stable_baselines.common.tf_util)
H
HER (class in stable_baselines.her)
huber_loss() (in module stable_baselines.common.tf_util)
I
in_session() (in module stable_baselines.common.tf_util)
init_dataloader() (stable_baselines.gail.ExpertDataset method)
initialize() (in module stable_baselines.common.tf_util)
intprod() (in module stable_baselines.common.tf_util)
is_image() (in module stable_baselines.common.tf_util)
K
kl() (stable_baselines.common.distributions.BernoulliProbabilityDistribution method)
(stable_baselines.common.distributions.CategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistribution method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.ProbabilityDistribution method)
L
leaky_relu() (in module stable_baselines.common.tf_util)
learn() (stable_baselines.a2c.A2C method)
(stable_baselines.acer.ACER method)
(stable_baselines.acktr.ACKTR method)
(stable_baselines.common.base_class.BaseRLModel method)
(stable_baselines.ddpg.DDPG method)
(stable_baselines.deepq.DQN method)
(stable_baselines.gail.GAIL method)
(stable_baselines.her.HER method)
(stable_baselines.ppo1.PPO1 method)
(stable_baselines.ppo2.PPO2 method)
(stable_baselines.sac.SAC method)
(stable_baselines.trpo_mpi.TRPO method)
linear_interpolation() (in module stable_baselines.common.schedules)
LinearSchedule (class in stable_baselines.common.schedules)
LnCnnPolicy (class in stable_baselines.ddpg)
(class in stable_baselines.deepq)
(class in stable_baselines.sac)
LnMlpPolicy (class in stable_baselines.ddpg)
(class in stable_baselines.deepq)
(class in stable_baselines.sac)
load() (stable_baselines.a2c.A2C class method)
(stable_baselines.acer.ACER class method)
(stable_baselines.acktr.ACKTR class method)
(stable_baselines.common.base_class.BaseRLModel class method)
(stable_baselines.ddpg.DDPG class method)
(stable_baselines.deepq.DQN class method)
(stable_baselines.gail.GAIL class method)
(stable_baselines.her.HER class method)
(stable_baselines.ppo1.PPO1 class method)
(stable_baselines.ppo2.PPO2 class method)
(stable_baselines.sac.SAC class method)
(stable_baselines.trpo_mpi.TRPO class method)
load_running_average() (stable_baselines.common.vec_env.VecNormalize method)
load_state() (in module stable_baselines.common.tf_util)
log_info() (stable_baselines.gail.ExpertDataset method)
logp() (stable_baselines.common.distributions.ProbabilityDistribution method)
LstmPolicy (class in stable_baselines.common.policies)
M
main() (in module stable_baselines.results_plotter)
make_actor() (stable_baselines.ddpg.CnnPolicy method)
(stable_baselines.ddpg.LnCnnPolicy method)
(stable_baselines.ddpg.LnMlpPolicy method)
(stable_baselines.ddpg.MlpPolicy method)
(stable_baselines.sac.CnnPolicy method)
(stable_baselines.sac.LnCnnPolicy method)
(stable_baselines.sac.LnMlpPolicy method)
(stable_baselines.sac.MlpPolicy method)
make_atari_env() (in module stable_baselines.common.cmd_util)
make_critic() (stable_baselines.ddpg.CnnPolicy method)
(stable_baselines.ddpg.LnCnnPolicy method)
(stable_baselines.ddpg.LnMlpPolicy method)
(stable_baselines.ddpg.MlpPolicy method)
make_critics() (stable_baselines.sac.CnnPolicy method)
(stable_baselines.sac.LnCnnPolicy method)
(stable_baselines.sac.LnMlpPolicy method)
(stable_baselines.sac.MlpPolicy method)
make_mujoco_env() (in module stable_baselines.common.cmd_util)
make_proba_dist_type() (in module stable_baselines.common.distributions)
make_robotics_env() (in module stable_baselines.common.cmd_util)
make_session() (in module stable_baselines.common.tf_util)
MlpLnLstmPolicy (class in stable_baselines.common.policies)
MlpLstmPolicy (class in stable_baselines.common.policies)
MlpPolicy (class in stable_baselines.common.policies)
(class in stable_baselines.ddpg)
(class in stable_baselines.deepq)
(class in stable_baselines.sac)
mode() (stable_baselines.common.distributions.BernoulliProbabilityDistribution method)
(stable_baselines.common.distributions.CategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistribution method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.ProbabilityDistribution method)
mujoco_arg_parser() (in module stable_baselines.common.cmd_util)
MultiCategoricalProbabilityDistribution (class in stable_baselines.common.distributions)
MultiCategoricalProbabilityDistributionType (class in stable_baselines.common.distributions)
N
neglogp() (stable_baselines.common.distributions.BernoulliProbabilityDistribution method)
(stable_baselines.common.distributions.CategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistribution method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.ProbabilityDistribution method)
NormalActionNoise (class in stable_baselines.ddpg)
normc_initializer() (in module stable_baselines.common.tf_util)
numel() (in module stable_baselines.common.tf_util)
O
OrnsteinUhlenbeckActionNoise (class in stable_baselines.ddpg)
outer_scope_getter() (in module stable_baselines.common.tf_util)
P
param_placeholder() (stable_baselines.common.distributions.ProbabilityDistributionType method)
param_shape() (stable_baselines.common.distributions.BernoulliProbabilityDistributionType method)
(stable_baselines.common.distributions.CategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistributionType method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.ProbabilityDistributionType method)
PiecewiseSchedule (class in stable_baselines.common.schedules)
plot() (stable_baselines.gail.ExpertDataset method)
plot_curves() (in module stable_baselines.results_plotter)
plot_results() (in module stable_baselines.results_plotter)
PPO1 (class in stable_baselines.ppo1)
PPO2 (class in stable_baselines.ppo2)
predict() (stable_baselines.a2c.A2C method)
(stable_baselines.acer.ACER method)
(stable_baselines.acktr.ACKTR method)
(stable_baselines.common.base_class.BaseRLModel method)
(stable_baselines.ddpg.DDPG method)
(stable_baselines.deepq.DQN method)
(stable_baselines.gail.GAIL method)
(stable_baselines.her.HER method)
(stable_baselines.ppo1.PPO1 method)
(stable_baselines.ppo2.PPO2 method)
(stable_baselines.sac.SAC method)
(stable_baselines.trpo_mpi.TRPO method)
prepare_pickling() (stable_baselines.gail.ExpertDataset method)
pretrain() (stable_baselines.a2c.A2C method)
(stable_baselines.acer.ACER method)
(stable_baselines.acktr.ACKTR method)
(stable_baselines.common.base_class.BaseRLModel method)
(stable_baselines.ddpg.DDPG method)
(stable_baselines.deepq.DQN method)
(stable_baselines.gail.GAIL method)
(stable_baselines.ppo1.PPO1 method)
(stable_baselines.ppo2.PPO2 method)
(stable_baselines.sac.SAC method)
(stable_baselines.trpo_mpi.TRPO method)
proba_distribution_from_flat() (stable_baselines.common.distributions.DiagGaussianProbabilityDistributionType method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.ProbabilityDistributionType method)
proba_distribution_from_latent() (stable_baselines.common.distributions.BernoulliProbabilityDistributionType method)
(stable_baselines.common.distributions.CategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistributionType method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.ProbabilityDistributionType method)
proba_step() (stable_baselines.common.policies.ActorCriticPolicy method)
(stable_baselines.common.policies.FeedForwardPolicy method)
(stable_baselines.common.policies.LstmPolicy method)
(stable_baselines.ddpg.CnnPolicy method)
(stable_baselines.ddpg.LnCnnPolicy method)
(stable_baselines.ddpg.LnMlpPolicy method)
(stable_baselines.ddpg.MlpPolicy method)
(stable_baselines.deepq.CnnPolicy method)
(stable_baselines.deepq.LnCnnPolicy method)
(stable_baselines.deepq.LnMlpPolicy method)
(stable_baselines.deepq.MlpPolicy method)
(stable_baselines.sac.CnnPolicy method)
(stable_baselines.sac.LnCnnPolicy method)
(stable_baselines.sac.LnMlpPolicy method)
(stable_baselines.sac.MlpPolicy method)
probability_distribution_class() (stable_baselines.common.distributions.BernoulliProbabilityDistributionType method)
(stable_baselines.common.distributions.CategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistributionType method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.ProbabilityDistributionType method)
ProbabilityDistribution (class in stable_baselines.common.distributions)
ProbabilityDistributionType (class in stable_baselines.common.distributions)
R
render() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
reset() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
(stable_baselines.common.vec_env.VecFrameStack method)
(stable_baselines.common.vec_env.VecNormalize method)
(stable_baselines.common.vec_env.VecVideoRecorder method)
(stable_baselines.ddpg.NormalActionNoise method)
(stable_baselines.ddpg.OrnsteinUhlenbeckActionNoise method)
robotics_arg_parser() (in module stable_baselines.common.cmd_util)
rolling_window() (in module stable_baselines.results_plotter)
S
SAC (class in stable_baselines.sac)
sample() (stable_baselines.common.distributions.BernoulliProbabilityDistribution method)
(stable_baselines.common.distributions.CategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistribution method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistribution method)
(stable_baselines.common.distributions.ProbabilityDistribution method)
sample_dtype() (stable_baselines.common.distributions.BernoulliProbabilityDistributionType method)
(stable_baselines.common.distributions.CategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistributionType method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.ProbabilityDistributionType method)
sample_placeholder() (stable_baselines.common.distributions.ProbabilityDistributionType method)
sample_shape() (stable_baselines.common.distributions.BernoulliProbabilityDistributionType method)
(stable_baselines.common.distributions.CategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.DiagGaussianProbabilityDistributionType method)
(stable_baselines.common.distributions.MultiCategoricalProbabilityDistributionType method)
(stable_baselines.common.distributions.ProbabilityDistributionType method)
save() (stable_baselines.a2c.A2C method)
(stable_baselines.acer.ACER method)
(stable_baselines.acktr.ACKTR method)
(stable_baselines.common.base_class.BaseRLModel method)
(stable_baselines.ddpg.DDPG method)
(stable_baselines.deepq.DQN method)
(stable_baselines.gail.GAIL method)
(stable_baselines.her.HER method)
(stable_baselines.ppo1.PPO1 method)
(stable_baselines.ppo2.PPO2 method)
(stable_baselines.sac.SAC method)
(stable_baselines.trpo_mpi.TRPO method)
save_running_average() (stable_baselines.common.vec_env.VecNormalize method)
save_state() (in module stable_baselines.common.tf_util)
sequential_next() (stable_baselines.gail.DataLoader method)
set_attr() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
set_env() (stable_baselines.a2c.A2C method)
(stable_baselines.acer.ACER method)
(stable_baselines.acktr.ACKTR method)
(stable_baselines.common.base_class.BaseRLModel method)
(stable_baselines.ddpg.DDPG method)
(stable_baselines.deepq.DQN method)
(stable_baselines.gail.GAIL method)
(stable_baselines.ppo1.PPO1 method)
(stable_baselines.ppo2.PPO2 method)
(stable_baselines.sac.SAC method)
(stable_baselines.trpo_mpi.TRPO method)
setup_model() (stable_baselines.a2c.A2C method)
(stable_baselines.acer.ACER method)
(stable_baselines.acktr.ACKTR method)
(stable_baselines.common.base_class.BaseRLModel method)
(stable_baselines.ddpg.DDPG method)
(stable_baselines.deepq.DQN method)
(stable_baselines.gail.GAIL method)
(stable_baselines.her.HER method)
(stable_baselines.ppo1.PPO1 method)
(stable_baselines.ppo2.PPO2 method)
(stable_baselines.sac.SAC method)
(stable_baselines.trpo_mpi.TRPO method)
shape_el() (in module stable_baselines.common.distributions)
single_threaded_session() (in module stable_baselines.common.tf_util)
stable_baselines.a2c (module)
stable_baselines.acer (module)
stable_baselines.acktr (module)
stable_baselines.common.base_class (module)
stable_baselines.common.cmd_util (module)
stable_baselines.common.distributions (module)
stable_baselines.common.policies (module)
stable_baselines.common.schedules (module)
stable_baselines.common.tf_util (module)
stable_baselines.common.vec_env (module)
stable_baselines.ddpg (module)
stable_baselines.deepq (module)
stable_baselines.gail (module)
,
[1]
stable_baselines.her (module)
stable_baselines.ppo1 (module)
stable_baselines.ppo2 (module)
stable_baselines.results_plotter (module)
stable_baselines.sac (module)
stable_baselines.trpo_mpi (module)
start_process() (stable_baselines.gail.DataLoader method)
step() (stable_baselines.common.policies.ActorCriticPolicy method)
(stable_baselines.common.policies.FeedForwardPolicy method)
(stable_baselines.common.policies.LstmPolicy method)
(stable_baselines.ddpg.CnnPolicy method)
(stable_baselines.ddpg.LnCnnPolicy method)
(stable_baselines.ddpg.LnMlpPolicy method)
(stable_baselines.ddpg.MlpPolicy method)
(stable_baselines.deepq.CnnPolicy method)
(stable_baselines.deepq.LnCnnPolicy method)
(stable_baselines.deepq.LnMlpPolicy method)
(stable_baselines.deepq.MlpPolicy method)
(stable_baselines.sac.CnnPolicy method)
(stable_baselines.sac.LnCnnPolicy method)
(stable_baselines.sac.LnMlpPolicy method)
(stable_baselines.sac.MlpPolicy method)
step_async() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
step_wait() (stable_baselines.common.vec_env.DummyVecEnv method)
(stable_baselines.common.vec_env.SubprocVecEnv method)
(stable_baselines.common.vec_env.VecFrameStack method)
(stable_baselines.common.vec_env.VecNormalize method)
(stable_baselines.common.vec_env.VecVideoRecorder method)
SubprocVecEnv (class in stable_baselines.common.vec_env)
switch() (in module stable_baselines.common.tf_util)
T
TRPO (class in stable_baselines.trpo_mpi)
ts2xy() (in module stable_baselines.results_plotter)
V
value() (stable_baselines.common.policies.ActorCriticPolicy method)
(stable_baselines.common.policies.FeedForwardPolicy method)
(stable_baselines.common.policies.LstmPolicy method)
(stable_baselines.common.schedules.ConstantSchedule method)
(stable_baselines.common.schedules.LinearSchedule method)
(stable_baselines.common.schedules.PiecewiseSchedule method)
(stable_baselines.ddpg.CnnPolicy method)
(stable_baselines.ddpg.LnCnnPolicy method)
(stable_baselines.ddpg.LnMlpPolicy method)
(stable_baselines.ddpg.MlpPolicy method)
var_shape() (in module stable_baselines.common.tf_util)
VecFrameStack (class in stable_baselines.common.vec_env)
VecNormalize (class in stable_baselines.common.vec_env)
VecVideoRecorder (class in stable_baselines.common.vec_env)
W
window_func() (in module stable_baselines.results_plotter)
Read the Docs
v: v2.5.0
Versions
v2.5.0
v2.4.0
v2.3.0
v2.2.1
v2.2.0
master
Downloads
On Read the Docs
Project Home
Builds
Free document hosting provided by
Read the Docs
.