8000 fixed ddpg determinstic act · pythonAI/tensorforce@6c9847d · GitHub
[go: up one dir, main page]

Skip to content

Commit 6c9847d

Browse files
committed
fixed ddpg determinstic act
1 parent a0c6033 commit 6c9847d

File tree

3 files changed

+6
-6
lines changed

3 files changed

+6
-6
lines changed

examples/configs/mlp2_normalized_network.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828

2929
{
3030
"type": "dense",
31-
"size": 1,
32-
"activation": "tanh"
31+
"size": 64,
32+
"activation": null
3333
}
3434
]

tensorforce/agents/ddpg_agent.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,3 @@ def initialize_model(self):
190190
target_sync_frequency=self.target_sync_frequency,
191191
target_update_weight=self.target_update_weight
192192
)
193-
194-
def act(self, states, deterministic=False, fetch_tensors=None):
195-
# Always return deterministic action from model, add exploration separately
196-
return super(DDPGAgent, self).act(states=states, deterministic=True, fetch_tensors=fetch_tensors)

tensorforce/models/dpg_target_model.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@ def tf_apply(self, x, internals, update, return_internals=False):
8282

8383
out = self.t2d.apply(x=out, update=update)
8484

85+
# Remove last dimension because we only return Q values for one state and action
86+
out = tf.squeeze(out)
87+
8588
if return_internals:
8689
# Todo: Internals management
8790
return out, None
@@ -249,6 +252,7 @@ def tf_optimization(self, states, internals, actions, terminal, reward, next_sta
249252
states=next_states, internals=next_internals, actions=next_target_actions, terminal=terminal,
250253
reward=reward, update=update
251254
)
255+
252256
predicted_q = tf.stop_gradient(input=predicted_q)
253257

254258
real_q = self.critic.apply(dict(states=states, actions=actions), internals=internals, update=update)

0 commit comments

Comments
 (0)
0