8000 [2023.07.13] commit-1 · DarriusL/DRL-ExampleCode@456b075 · GitHub
[go: up one dir, main page]

Skip to content

Commit 456b075

Browse files
committed
[2023.07.13] commit-1
Added acquisition depth 2d CNN network.
1 parent 35561ec commit 456b075

File tree

7 files changed

+152
-4
lines changed

7 files changed

+152
-4
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,8 @@ python executor.py -cfg='./config/reinforce/reinforce_cartpole_mc.json' --mode='
112112
python executor.py -cfg='./config/reinforce/reinforce_entropyreg_cartpole_mc.json' --mode='train'
113113
python executor.py -cfg='./config/reinforce/reinforce_entropyreg_cartpole_onbatch.json' --mode='train'
114114
python executor.py -cfg='./cache/data/reinforce/cartpole/[-opt-]/config.json' --mode='test'
115+
116+
python executor.py -cfg='./config/reinforce/reinforce_mountaincar_mc.json' --mode='train'
115117
```
116118

117119
- sarsa
@@ -131,6 +133,8 @@ python executor.py -cfg='./config/dqn/dqn_cartpole_off.json' --mode='train'
131133
python executor.py -cfg='./config/dqn/targetdqn_cartpole_off.json' --mode='train'
132134
python executor.py -cfg='./config/dqn/doubledqn_cartpole_off.json' --mode='train'
133135
python executor.py -cfg='./config/dqn/doubledqn_cartpole_per.json' --mode='train'
136+
137+
python executor.py -cfg='./config/dqn/dqn_mountaincar_off.json' --mode='train'
134138
```
135139

136140
- a2c
@@ -141,6 +145,8 @@ python executor.py -cfg='./config/a2c/a2c_shared_nstep_cartpole_onbatch.json' --
141145
python executor.py -cfg='./config/a2c/a2c_shared_mc_cartpole_mc.json' --mode='train'
142146
python executor.py -cfg='./config/a2c/a2c_unshared_gae_cartpole_onbatch.json' --mode='train'
143147
python executor.py -cfg='./cache/data/a2c/cartpole/[-opt-]/config.json' --mode='test'
148+
149+
python executor.py -cfg='./config/a2c/a2c_shared_nstep_mountaincar_onbatch.json' --mode='train'
144150
```
145151

146152
- ppo

agent/algorithm/actor_critic.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,8 @@ def train_step(self, batch):
222222
with torch.no_grad():
223223
v_preds = self._cal_v(batch['states']);
224224
advs, v_tgts = self._cal_advs_and_v_tgts(batch, v_preds);
225+
if self.rets_mean_baseline:
226+
advs = alg_util.rets_mean_baseline(advs);
225227
self._train_main(batch, advs, v_tgts);
226228

227229
def _suboptim_net(self, loss, net, optimizer):

agent/net/conv.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# @Time : 2023.05.15
2+
# @Author : Darrius Lei
3+
# @Email : darrius.lei@outlook.com
4+
5+
import torch
6+
from agent.net.base import Net
7+
from agent.net import net_util
8+
9+
class ConvNet(Net):
10+
def __init__(self, net_cfg) -> None:
11+
super().__init__(net_cfg)
12+

agent/net/mlp.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ class MLPNet(Net):
1818
def __init__(self, net_cfg, in_dim, out_dim) -> None:
1919
super().__init__(net_cfg)
2020
activation_fn = net_util.get_activation_fn(self.hid_layers_activation);
21-
self.net = net_util.get_mlpnet(self.hid_layers, activation_fn, in_dim, out_dim);
21+
self.net = net_util.get_mlp_net(self.hid_layers, activation_fn, in_dim, out_dim);
2222
#set training mode
2323
self.train();
2424

@@ -47,11 +47,11 @@ def __init__(self, net_cfg, in_dim, out_dim) -> None:
4747
assert self.num_outnets > 1
4848
activation_fn = net_util.get_activation_fn(self.hid_layers_activation);
4949
#shared body
50-
self.body_net = net_util.get_mlpnet(self.body_hid_layers, activation_fn, in_dim, self.body_out_dim);
50+
self.body_net = net_util.get_mlp_net(self.body_hid_layers, activation_fn, in_dim, self.body_out_dim);
5151
#output nets
5252
self.outnets = torch.nn.ModuleList()
5353
for i in range(self.num_outnets):
54-
self.outnets.append(net_util.get_mlpnet(self.output_hid_layers, activation_fn, self.body_out_dim, out_dim[i]));
54+
self.outnets.append(net_util.get_mlp_net(self.output_hid_layers, activation_fn, self.body_out_dim, out_dim[i]));
5555
#set training mode
5656
self.train();
5757

agent/net/net_util.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def get_activation_fn(name = 'selu'):
117117
f'please replace or add the code yourself.\nSupport list:{activations}');
118118
raise callback.CustomException('ActivationCfgNameError');
119119

120-
def get_mlpnet(hid_layers, activation_fn, in_dim, out_dim):
120+
def get_mlp_net(hid_layers, activation_fn, in_dim, out_dim):
121121
''''''
122122
if len(hid_layers) > 1:
123123
layers = [
@@ -138,6 +138,29 @@ def get_mlpnet(hid_layers, activation_fn, in_dim, out_dim):
138138
]
139139
return torch.nn.Sequential(*layers);
140140

141+
def get_conv2d_net(in_channel, conv_hid_layers, activation_fn, batch_norm = False):
142+
'''
143+
Parameters:
144+
-----------
145+
channel_in:int
146+
the channel of th input imag data
147+
148+
conv_hid_layers:list
149+
parameters of the conv2d input: [out_channel, kernel, stride, padding, dialation]
150+
151+
activation_fn:
152+
activation function
153+
'''
154+
conv_layers = [];
155+
for i, layer in enumerate(conv_hid_layers):
156+
conv_layers.append(torch.nn.Conv2d(in_channel, *layer));
157+
conv_layers.append(activation_fn);
158+
if batch_norm and i != len(conv_hid_layers) - 1:
159+
conv_layers.append(torch.nn.BatchNorm2d(layer[0]));
160+
in_channel = layer[0];
161+
return torch.nn.Sequential(*conv_layers);
162+
163+
141164
def net_param_copy(src, tgt):
142165
'''Copy network parameters from src to tgt'''
143166
tgt.load_state_dict(src.state_dict());
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"agent_cfg":{
3+
"algorithm_cfg":{
4+
"name":"A2C",
5+
"var_schedule_cfg":null,
6+
"gamma":0.99,
7+
"rets_mean_baseline":false,
8+
"policy_loss_var":1,
9+
"value_loss_var":0.7,
10+
"entropy_reg_var_cfg":{
11+
"name":"fixed",
12+
"var_start":0.01,
13+
"var_end":0.01,
14+
"star_epoch":0,
15+
"end_epoch":0
16+
},
17+
"n_step_returns":32,
18+
"lbd":null
19+
},
20+
"net_cfg":{
21+
"name":"SharedMLPNet",
22+
"body_hid_layers":[32],
23+
"body_out_dim":16,
24+
"hid_layers_activation":"Selu",
25+
"output_hid_layers":[16]
26+
},
27+
"optimizer_cfg":{
28+
"name":"adam",
29+
"lr":1e-3,
30+
"weight_decay": 1e-08,
31+
"betas": [
32+
0.9,
33+
0.999
34+
]
35+
},
36+
"lr_schedule_cfg":null,
37+
"memory_cfg":{
38+
"name":"OnPolicyBatch"
39+
},
40+
"max_epoch":10000,
41+
"explore_times_per_train":1,
42+
"train_exp_size":64,
43+
"batch_learn_times_per_train":4
44+
},
45+
"env":{
46+
"name":"MountainCar",
47+
"solved_total_reward":-100,
48+
"finish_total_reward":-80,
49+
"survival_T":5000
50+
},
51+
"model_path":null,
52+
"is_gpu_available":true,
53+
"valid":{
54+
"valid_step":100,
55+
"valid_times":5,
56+
"not_improve_finish_step":5
57+
}
58+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
{
2+
"agent_cfg":{
3+
"algorithm_cfg":{
4+
"name":"reinforce",
5+
"var_schedule_cfg":null,
6+
"gamma":0.99,
7+
"rets_mean_baseline":true,
8+
"policy_loss_var":1,
9+
"entropy_reg_var_cfg":null
10+
},
11+
"net_cfg":{
12+
"name":"MLPNet",
13+
"hid_layers":[32, 16],
14+
"hid_layers_activation":"Selu"
15+
},
16+
"optimizer_cfg":{
17+
"name":"Adam",
18+
"lr":1e-2,
19+
"weight_decay": 1e-08,
20+
"betas": [
21+
0.9,
22+
0.999
23+
]
24+
},
25+
"lr_schedule_cfg":null,
26+
"memory_cfg":{
27+
"name":"OnPolicy"
28+
},
29+
"explore_times_per_train":1,
30+
"train_exp_size":1,
31+
"batch_learn_times_per_train":4,
32+
"max_epoch":10000
33+
},
34+
"env":{
35+
"name":"MountainCar",
36+
"solved_total_reward":-100,
37+
"finish_total_reward":-80,
38+
"survival_T":5000
39+
},
40+
"model_path":null,
41+
"is_gpu_available":true,
42+
"valid":{
43+
"valid_step":10,
44+
"valid_times":5,
45+
"not_improve_finish_step":5
46+
}
47+
}

0 commit comments

Comments
 (0)
0