8000 [2023.06.04] commit-1 · DarriusL/DRL-ExampleCode@efe6ec8 · GitHub
[go: up one dir, main page]

Skip to content

Commit efe6ec8

Browse files
committed
[2023.06.04] commit-1
Prepare for not using the shared network in the a2c algorithm.
1 parent e807013 commit efe6ec8

6 files changed

+67
-1
lines changed

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,9 @@ python executor.py -cfg='./config/dqn/doubledqn_cartpole_off.json' --mode='train
132132
python executor.py -cfg='./config/dqn/doubledqn_cartpole_per.json' --mode='train'
133133
```
134134

135+
a2c
136+
137+
```shell
138+
python executor.py -cfg='./config/a2c/a2c_nstep_cartpole_on.json' --mode='train'
139+
```
140+
+61
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
{
2+
"agent_cfg":{
3+
"algorithm_cfg":{
4+
"name":"A2C",
5+
"var_schedule_cfg":null,
6+
"gamma":0.99,
7+
"rets_mean_baseline":false,
8+
"policy_loss_var":1,
9+
"value_loss_var":0.9,
10+
"entropy_reg_var_cfg":{
11+
"name":"fixed",
12+
"var_start":0.01,
13+
"var_end":0.01,
14+
"star_epoch":0,
15+
"end_epoch":0
16+
},
17+
"n_step_returns":11,
18+
"lbd":null
19+
},
20+
"net_cfg":{
21+
"actor_net_cfg":{
22+
"name":"MLPNet",
23+
"hid_layers":[32],
24+
"hid_layers_activation":"Selu"
25+
},
26+
"critic_net_cfg":{
27+
"name":"MLPNet",
28+
"hid_layers":[32],
29+
"hid_layers_activation":"Selu"
30+
}
31+
},
32+
"optimizer_cfg":{
33+
"name":"adam",
34+
"lr":1e-4,
35+
"weight_decay": 1e-08,
36+
"betas": [
37+
0.9,
38+
0.999
39+
]
40+
},
41+
"lr_schedule_cfg":null,
42+
"memory_cfg":{
43+
"name":"OnPolicyBatch"
44+
},
45+
"max_epoch":10000,
46+
"train_exp_size":128,
47+
"batch_learn_times_per_train":4
48+
},
49+
"env":{
50+
"name":"CartPole",
51+
"solved_total_reward":99900,
52+
"finish_total_reward":100000,
53+
"survival_T":100000
54+
},
55+
"model_path":null,
56+
"valid":{
57+
"valid_step":100,
58+
"valid_times":5,
59+
"not_improve_finish_step":5
60+
}
61+
}

executor.py

-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from lib.callback import Logger
77

88
#TODO:Added: A mode that can be trained on top of existing models
9-
#TODO:logger optimization in each file
109
if __name__ == '__main__':
1110
if not os.path.exists("./cache/logger"):
1211
os.makedirs("./cache/logger");

0 commit comments

Comments
 (0)
0