-
Notifications
You must be signed in to change notification settings - Fork 50
/
ppo_bullet_humanoid.gin
50 lines (40 loc) · 1.79 KB
/
ppo_bullet_humanoid.gin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
include 'ppo.gin'
# environment config
# baseline ppo2 training command:
# CUDA_VISIBLE_DEVICES=0 OPENAI_LOGDIR=/home/weixu/tmp/bullet_humanoid_baseline/ppo-0 OPENAI_LOG_FORMAT='stdout,tensorboard' python -m baselines.run --alg=ppo2 --env=HumanoidBulletEnv-v0 --network=mlp --num_timesteps=1e8 --ent_coef=0.01 --num_hidden=32 --num_layers=3 --value_network=copy
# need to `pip install pybullet`
import pybullet_envs
create_environment.env_name="HumanoidBulletEnv-v0"
create_environment.num_parallel_environments=96
suite_gym.wrap_env.clip_action=False
# algorithm config
PPOLoss.entropy_regularization=1e-2
PPOLoss.gamma=0.99
PPOLoss.normalize_advantages=True
PPOLoss.td_lambda=0.95
PPOLoss.td_error_loss_fn=@element_wise_squared_loss
actor/ActorDistributionNetwork.fc_layer_params=(32, 32, 32)
actor/[email protected]
actor/ActorDistributionNetwork.continuous_projection_net_ctor=@NormalProjectionNetwork
NormalProjectionNetwork.projection_output_init_gain=1e-5
NormalProjectionNetwork.std_bias_initializer_value=0.0
value/ValueNetwork.fc_layer_params=(32, 32, 32)
value/[email protected]
ac/AdamTF.lr=3e-4
ac/AdamTF.gradient_clipping=0.5
ac/AdamTF.clip_by_global_norm=True
ActorCriticAlgorithm.actor_network_ctor=@actor/ActorDistributionNetwork
ActorCriticAlgorithm.value_network_ctor=@value/ValueNetwork
Agent.optimizer=@ac/AdamTF()
# training config
TrainerConfig.num_updates_per_train_iter = 20
TrainerConfig.unroll_length = 512
TrainerConfig.mini_batch_size = 4096
TrainerConfig.mini_batch_length = 1
TrainerConfig.num_iterations = 1000
TrainerConfig.evaluate=True
TrainerConfig.eval_interval = 100
TrainerConfig.debug_summaries=True
TrainerConfig.summarize_grads_and_vars = True
TrainerConfig.summary_interval = 10