pythonAI
diff --git a/‎UPDATE_NOTES.md
Lines changed: 1 addition & 1 deletion b/‎UPDATE_NOTES.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/configs/dqn_ue4.json
Lines changed: 51 additions & 0 deletions b/‎examples/configs/dqn_ue4.json
Lines changed: 51 additions & 0 deletions
diff --git a/‎examples/configs/dqn_visual.json
Lines changed: 2 additions & 2 deletions b/‎examples/configs/dqn_visual.json
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/configs/vpg_baseline_visual.json
Lines changed: 1 addition & 1 deletion b/‎examples/configs/vpg_baseline_visual.json
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/unreal_engine.py
Lines changed: 7 additions & 6 deletions b/‎examples/unreal_engine.py
Lines changed: 7 additions & 6 deletions
diff --git a/‎tensorforce/contrib/remote_environment.py
Lines changed: 28 additions & 7 deletions b/‎tensorforce/contrib/remote_environment.py
Lines changed: 28 additions & 7 deletions
diff --git a/‎tensorforce/contrib/unreal_engine.py
Lines changed: 10 additions & 6 deletions b/‎tensorforce/contrib/unreal_engine.py
Lines changed: 10 additions & 6 deletions
diff --git a/‎tensorforce/core/memories/memory.py
Lines changed: 1 addition & 1 deletion b/‎tensorforce/core/memories/memory.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorforce/core/memories/prioritized_replay.py
Lines changed: 2 additions & 2 deletions b/‎tensorforce/core/memories/prioritized_replay.py
Lines changed: 2 additions & 2 deletions
diff --git a/‎tensorforce/core/memories/replay.py
Lines changed: 1 addition & 1 deletion b/‎tensorforce/core/memories/replay.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tensorforce/core/preprocessors/grayscale.py
Lines changed: 10 additions & 3 deletions b/‎tensorforce/core/preprocessors/grayscale.py
Lines changed: 10 additions & 3 deletions
diff --git a/‎tensorforce/core/preprocessors/preprocessor.py
Lines changed: 6 additions & 3 deletions b/‎tensorforce/core/preprocessors/preprocessor.py
Lines changed: 6 additions & 3 deletions
diff --git a/‎tensorforce/core/preprocessors/sequence.py
Lines changed: 16 additions & 6 deletions b/‎tensorforce/core/preprocessors/sequence.py
Lines changed: 16 additions & 6 deletions
diff --git a/‎tensorforce/meta_parameter_recorder.py
Lines changed: 1 addition & 1 deletion b/‎tensorforce/meta_parameter_recorder.py
Lines changed: 1 addition & 1 deletion
@@ -27,7 +27,7 @@ be tracked here in detail but through github issues.
 - In December, a number of bugs regarding exploration and a numberical issue in generalised 
   advantage estimation were fixed which seem to increase performance so an update is recommended.
 - Agent structure saw major refactoring to remove redundant code, introduced a ```LearningAgent```
-  to hold common fields and distinguish from non-learning agents (e.g. ```RandomAgent``)
+  to hold common fields and distinguish from non-learning agents (e.g. ```RandomAgent```)
 - We are preparing to move memories into the TensorFlow graph which will fix sequences and allow subsampling
   in the optimizers. Further, new episode/batch semantics will be enabled (e.g. episode based instead of
   timestep based batching). 
 
@@ -0,0 +1,51 @@
+{
+    "type": "dqn_agent",
+
+    "update_mode": {
+        "unit": "timesteps",
+        "batch_size": 64,
+        "frequency": 4
+    },
+
+    "memory": {
+        "type": "replay",
+        "capacity": 10000,
+        "include_next_states": true
+    },
+
+    "optimizer": {
+      "type": "adam",
+      "learning_rate": 1e-3
+    },
+
+    "discount": 0.97,
+
+    "states_preprocessing": [
+        {
+            "type": "divide",
+            "scale": 255
+        },
+        {
+            "type": "sequence",
+            "length": 4,
+            "add_rank": true
+        }
+    ],
+
+    "actions_exploration": {
+        "type": "epsilon_decay",
+        "initial_epsilon": 1.0,
+        "final_epsilon": 0.1,
+        "timesteps": 100000
+    },
+
+    "saver": {
+        "directory": null,
+        "seconds": 600
+    },
+    "summarizer": {
+        "directory": null,
+        "labels": [],
+        "seconds": 120
+    }
+}
@@ -12,7 +12,7 @@
 
     "discount": 0.97,
 
-    "states_preprocessing_spec": [
+    "states_preprocessing": [
         {
             "type": "image_resize",
             "width": 84,
@@ -27,7 +27,7 @@
         }
     ],
 
-    "explorations_spec": {
+    "actions_exploration": {
         "type": "epsilon_decay",
         "initial_epsilon": 1.0,
         "final_epsilon": 0.1,
 
@@ -24,7 +24,7 @@
         "num_steps": 5
     },
 
-    "states_preprocessing_spec": [
+    "states_preprocessing": [
         {
             "type": "image_resize",
             "width": 84,
 
@@ -66,7 +66,7 @@ def main():
     args = parser.parse_args()
 
     # logging.basicConfig(filename="logfile.txt", level=logging.INFO)
-    logging.basicConfig(stream=sys.stderr)
+    logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
     logger = logging.getLogger(__name__)
     logger.setLevel(logging.DEBUG)
 
@@ -83,13 +83,14 @@ def main():
     if args.random_test_run:
         # Reset the env.
         s = environment.reset()
-        img = Image.fromarray(s, "RGB" if len(environment.states["shape"]) == 3 else "L")
+        img_format = "RGB" if len(environment.states["shape"]) == 3 else "L"
+        img = Image.fromarray(s, img_format)
         # Save first received image as a sanity-check.
         img.save("reset.png")
         for i in range(1000):
             s, is_terminal, r = environment.execute(actions=random.choice(range(environment.actions["num_actions"])))
             if i < 10:
-                img = Image.fromarray(s, "RGB")
+                img = Image.fromarray(s, img_format)
                 img.save("{:03d}.png".format(i))
             logging.debug("i={} r={} term={}".format(i, r, is_terminal))
             if is_terminal:
@@ -112,9 +113,9 @@ def main():
     agent = Agent.from_spec(
         spec=agent_config,
         kwargs=dict(
-            states_spec=environment.states,
-            actions_spec=environment.actions,
-            network_spec=network_spec
+            states=environment.states,
+            actions=environment.actions,
+            network=network_spec
         )
     )
     if args.load:
 
@@ -22,6 +22,7 @@
 import os
 from tensorforce import TensorForceError
 import logging
+import time
 
 
 class RemoteEnvironment(Environment):
@@ -54,20 +55,34 @@ def close(self):
         """
         self.disconnect()
 
-    def connect(self):
+    def connect(self, timeout=600):
         """
         Starts the server tcp connection on the given host:port.
+
+        Args:
+            timeout (int): The time (in seconds) for which we will attempt a connection to the remote
+                (every 5sec). After that (or if timeout is None or 0), an error is raised.
         """
         # If we are already connected, return error.
         if self.socket:
             raise TensorForceError("Already connected to {}:{}. Only one connection allowed at a time. " +
                                    "Close first by calling `close`!".format(self.host, self.port))
         self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
-        self.socket.settimeout(5)
-        err = self.socket.connect_ex((self.host, self.port))
+
+        if timeout < 5 or timeout is None:
+            timeout = 5
+
+        err = 0
+        start_time = time.time()
+        while time.time() - start_time < timeout:
+            self.socket.settimeout(5)
+            err = self.socket.connect_ex((self.host, self.port))
+            if err == 0:
+                break
+            time.sleep(1)
         if err != 0:
-            raise TensorForceError("Error when trying to connect to {}:{}: errno={} '{}'".
-                                   format(self.host, self.port, errno.errorcode[err], os.strerror(err)))
+            raise TensorForceError("Error when trying to connect to {}:{}: errno={} errcode='{}' '{}'".
+                                   format(self.host, self.port, err, errno.errorcode[err], os.strerror(err)))
 
     def disconnect(self):
         """
@@ -118,15 +133,21 @@ def __init__(self, max_msg_len=8192):
 
     def send(self, message, socket_):
         """
-        Sends a message (dict) to the socket. Message is encoded via msgpack-numpy.
+        Sends a message (dict) to the socket. Message consists of a 8-byte len header followed by a msgpack-numpy
+            encoded dict.
 
         Args:
             message: The message dict (e.g. {"cmd": "reset"})
             socket_: The python socket object to use.
         """
         if not socket_:
             raise TensorForceError("No socket given in call to `send`!")
-        socket_.send(msgpack.packb(message))
+        elif not isinstance(message, dict):
+            raise TensorForceError("Message to be sent must be a dict!")
+        message = msgpack.packb(message)
+        len_ = len(message)
+        # prepend 8-byte len field to all our messages
+        socket_.send(bytes("{:08d}".format(len_), encoding="ascii") + message)
 
     def recv(self, socket_):
         """
 
@@ -50,8 +50,8 @@ def __init__(
                 This would be necessary e.g. for agents that use q-networks where the output are q-values per discrete
                 state-action pair.
             delta_time (float): The fake delta time to use for each single game tick.
-            num_ticks (int): The number of ticks to be executed in this step (each tick will repeat the same given
-            actions).
+            num_ticks (int): The number of ticks to be executed in a single act call (each tick will
+                repeat the same given actions).
         """
         RemoteEnvironment.__init__(self, host, port)
 
@@ -75,8 +75,8 @@ def __str__(self):
         return "UE4Environment({}:{}{})".format(self.host, self.port, "[connected; {}]".
                                                 format(self.game_name) if self.socket else "")
 
-    def connect(self):
-        RemoteEnvironment.connect(self)
+    def connect(self, timeout=600):
+        RemoteEnvironment.connect(self, timeout)
 
         # Get action- and state-specs from our game.
         self.protocol.send({"cmd": "get_spec"}, self.socket)
@@ -89,8 +89,12 @@ def connect(self):
         # Game's name
         self.game_name = response.get("game_name")  # keep non-mandatory for now
         # Observers
+        if "observation_space_desc" not in response:
+            raise TensorForceError("Response to `get_spec` does not contain field `observation_space_desc`!")
         self.observation_space_desc = response["observation_space_desc"]
         # Action-mappings
+        if "action_space_desc" not in response:
+            raise TensorForceError("Response to `get_spec` does not contain field `action_space_desc`!")
         self.action_space_desc = response["action_space_desc"]
 
         if self.discretize_actions:
@@ -110,9 +114,9 @@ def seed(self, seed=None):
         # Wait for response.
         response = self.protocol.recv(self.socket)
         if "status" not in response:
-            raise RuntimeError("Message without field 'status' received!")
+            raise TensorForceError("Message without field 'status' received!")
         elif response["status"] != "ok":
-            raise RuntimeError("Message 'status' for seed command is not 'ok' ({})!".format(response["status"]))
+            raise TensorForceError("Message 'status' for seed command is not 'ok' ({})!".format(response["status"]))
         return seed
 
     def reset(self):
 
@@ -118,7 +118,7 @@ def tf_retrieve_timesteps(self, n):
 
     def tf_retrieve_episodes(self, n):
         """
-        Retrieves a given number of episodesrom the stored experiences.
+        Retrieves a given number of episodes from the stored experiences.
 
         Args:
             n: Number of episodes to retrieve.
 
@@ -395,13 +395,13 @@ def tf_update_batch(self, loss_per_instance):
             x=self.batch_indices,
             y=tf.zeros(shape=tf.shape(input=self.batch_indices), dtype=tf.int32)
         )
-        priority_indices = tf.where(condition=mask)
+        priority_indices = tf.squeeze(tf.where(condition=mask))
         priority_indices = tf.Print(priority_indices, [priority_indices], message="Priority indices")
         sampled_batch = self.tf_retrieve_indices(
             buffer_elements=self.last_batch_buffer_elems,
             priority_indices=priority_indices
         )
-        sampled_batch = tf.Print(sampled_batch, [sampled_batch], message="sampled batch: ")
+        #sampled_batch = tf.Print(sampled_batch, [sampled_batch], message="sampled batch: ")
         states = sampled_batch['states']
         internals = sampled_batch['internals']
         actions = sampled_batch['actions']
 
@@ -32,7 +32,7 @@ def __init__(self, states, internals, actions, include_next_states, capacity, sc
         Replay memory.
 
         Args:
-            states: States specifiction.
+            states: States specification.
             internals: Internal states specification.
             actions: Actions specification.
             include_next_states: Include subsequent state if true.
 
@@ -28,13 +28,20 @@ class Grayscale(Preprocessor):
     Turn 3D color state into grayscale.
     """
 
-    def __init__(self, shape, weights=(0.299, 0.587, 0.114), scope='grayscale', summary_labels=()):
+    def __init__(self, shape, weights=(0.299, 0.587, 0.114), scope='grayscale', summary_labels=(), remove_rank=False):
+        """
+        Args:
+            weights (tuple): The weights to multiply each color channel with (in order: red, blue, green).
+            remove_rank (bool): If True, will remove the color channel rank from the input tensor. Default: False.
+        """
         self.weights = weights
+        self.remove_rank = remove_rank
         super(Grayscale, self).__init__(shape=shape, scope=scope, summary_labels=summary_labels)
 
     def tf_process(self, tensor):
         weights = tf.reshape(tensor=self.weights, shape=(tuple(1 for _ in range(util.rank(tensor) - 1)) + (3,)))
-        return tf.reduce_sum(input_tensor=(weights * tensor), axis=-1, keepdims=True)
+        weighted_sum = tf.reduce_sum(input_tensor=(weights * tensor), axis=-1, keepdims=not self.remove_rank)
+        return weighted_sum
 
     def processed_shape(self, shape):
-        return tuple(shape[:-1]) + (1,)
+        return tuple(shape[:-1]) + ((1,) if not self.remove_rank else ())
@@ -18,6 +18,7 @@
 from __future__ import division
 
 import tensorflow as tf
+import copy
 
 from tensorforce import util
 import tensorforce.core.preprocessors
@@ -156,11 +157,13 @@ def from_spec(spec, kwargs=None):
             spec = [spec]
 
         stack = PreprocessorStack()
-        for spec in spec:
+        for spec_ in spec:
+            # need to deep copy, otherwise will add first processors spec_ to kwargs to second processor
+            kwargs_ = copy.deepcopy(kwargs)
             preprocessor = util.get_object(
-                obj=spec,
+                obj=spec_,
                 predefined_objects=tensorforce.core.preprocessors.preprocessors,
-                kwargs=kwargs
+                kwargs=kwargs_
             )
             assert isinstance(preprocessor, Preprocessor)
             stack.preprocessors.append(preprocessor)
 
@@ -29,17 +29,19 @@ class Sequence(Preprocessor):
     problems to create the Markov property (velocity of game objects as they move across the screen).
     """
 
-    def __init__(self, shape, length=2, scope='sequence', summary_labels=()):
+    def __init__(self, shape, length=2, scope='sequence', summary_labels=(), add_rank=False):
         """
         Args:
             length (int): The number of states to concatenate. In the beginning, when no previous state is available,
                 concatenate the given first state with itself `length` times.
+            add_rank (bool): Whether to add another rank to the end of the input with dim=length-of-the-sequence.
+                This could be useful if e.g. a grayscale image of w x h pixels is coming from the env
+                (no color channel). The output of the preprocessor would then be of shape [batch] x w x h x [length].
         """
         # raise TensorForceError("The sequence preprocessor is temporarily broken; use version 0.3.2 if required.")
         self.length = length
-        ## The index tensor pointing to the previous location in the single-state buffer.
-        #self.index = None
-        # The that resets index back to -1.
+        self.add_rank = add_rank
+        # The op that resets index back to -1.
         self.reset_op = None
         super(Sequence, self).__init__(shape=shape, scope=scope, summary_labels=summary_labels)
 
@@ -78,7 +80,15 @@ def later_run():
             assignment = tf.assign(ref=index, value=((tf.maximum(x=index, y=0) + 1) % self.length))
 
         with tf.control_dependencies(control_inputs=(assignment,)):
-            return tf.expand_dims(input=tf.concat(values=previous_states, axis=-1), axis=0)
+            if self.add_rank:
+                stack = tf.stack(values=previous_states, axis=-1)
+            else:
+                stack = tf.concat(values=previous_states, axis=-1)
+            batch_one = tf.expand_dims(input=stack, axis=0)
+            return batch_one
 
     def processed_shape(self, shape):
-        return shape[:-1] + (shape[-1] * self.length,)
+        if self.add_rank:
+            return shape + (self.length,)
+        else:
+            return shape[:-1] + (shape[-1] * self.length,)
@@ -81,7 +81,7 @@ def merge_custom(self, custom_dict):
                 )
             self.meta_params[key] = custom_dict[key]
         # This line assumes the merge data came from summary_spec['meta_dict'], remove this from summary_spec
-        del self.meta_params['summary_spec']['meta_dict']
+        del self.meta_params['summarizer']['meta_dict']
 
     def text_output(self, format_type=1):
         print('======================= ' + self.meta_params['AgentName'] + ' ====================================')
Original file line number	Diff line number	Diff line change
`@@ -12,7 +12,7 @@`
`12`	`12`
`13`	`13`	`"discount": 0.97,`
`14`	`14`
`15`		`- "states_preprocessing_spec": [`
	`15`	`+ "states_preprocessing": [`
`16`	`16`	`{`
`17`	`17`	`"type": "image_resize",`
`18`	`18`	`"width": 84,`
`@@ -27,7 +27,7 @@`
`27`	`27`	`}`
`28`	`28`	`],`
`29`	`29`
`30`		`- "explorations_spec": {`
	`30`	`+ "actions_exploration": {`
`31`	`31`	`"type": "epsilon_decay",`
`32`	`32`	`"initial_epsilon": 1.0,`
`33`	`33`	`"final_epsilon": 0.1,`
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@`
`24`	`24`	`"num_steps": 5`
`25`	`25`	`},`
`26`	`26`
`27`		`- "states_preprocessing_spec": [`
	`27`	`+ "states_preprocessing": [`
`28`	`28`	`{`
`29`	`29`	`"type": "image_resize",`
`30`	`30`	`"width": 84,`
Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ def merge_custom(self, custom_dict):`
`81`	`81`	`)`
`82`	`82`	`self.meta_params[key] = custom_dict[key]`
`83`	`83`	`# This line assumes the merge data came from summary_spec['meta_dict'], remove this from summary_spec`
`84`		`- del self.meta_params['summary_spec']['meta_dict']`
	`84`	`+ del self.meta_params['summarizer']['meta_dict']`
`85`	`85`
`86`	`86`	`def text_output(self, format_type=1):`
`87`	`87`	`print('======================= ' + self.meta_params['AgentName'] + ' ====================================')`