10000 Merge branch 'master' of github.com:reinforceio/tensorforce · pythonAI/tensorforce@9da8c48 · GitHub
[go: up one dir, main page]

Skip to content

Commit 9da8c48

Browse files
Merge branch 'master' of github.com:reinforceio/tensorforce
2 parents d444dee + 788fb1d commit 9da8c48

File tree

15 files changed

+149
-49
lines changed
  • tensorforce
  • 15 files changed

    +149
    -49
    lines changed

    UPDATE_NOTES.md

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -27,7 +27,7 @@ be tracked here in detail but through github issues.
    2727
    - In December, a number of bugs regarding exploration and a numberical issue in generalised
    2828
    advantage estimation were fixed which seem to increase performance so an update is recommended.
    2929
    - Agent structure saw major refactoring to remove redundant code, introduced a ```LearningAgent```
    30-
    to hold common fields and distinguish from non-learning agents (e.g. ```RandomAgent``)
    30+
    to hold common fields and distinguish from non-learning agents (e.g. ```RandomAgent```)
    3131
    - We are preparing to move memories into the TensorFlow graph which will fix sequences and allow subsampling
    3232
    in the optimizers. Further, new episode/batch semantics will be enabled (e.g. episode based instead of
    3333
    timestep based batching).

    examples/configs/dqn_ue4.json

    Lines changed: 51 additions & 0 deletions
    Original file line numberDiff line numberDiff line change
    @@ -0,0 +1,51 @@
    1+
    {
    2+
    "type": "dqn_agent",
    3+
    4+
    "update_mode": {
    5+
    "unit": "timesteps",
    6+
    "batch_size": 64,
    7+
    "frequency": 4
    8+
    },
    9+
    10+
    "memory": {
    11+
    "type": "replay",
    12+
    "capacity": 10000,
    13+
    "include_next_states": true
    14+
    },
    15+
    16+
    "optimizer": {
    17+
    "type": "adam",
    18+
    "learning_rate": 1e-3
    19+
    },
    20+
    21+
    "discount": 0.97,
    22+
    23+
    "states_preprocessing": [
    24+
    {
    25+
    "type": "divide",
    26+
    "scale": 255
    27+
    },
    28+
    {
    29+
    "type": "sequence",
    30+
    "length": 4,
    31+
    "add_rank": true
    32+
    }
    33+
    ],
    34+
    35+
    "actions_exploration": {
    36+
    "type": "epsilon_decay",
    37+
    "initial_epsilon": 1.0,
    38+
    "final_epsilon": 0.1,
    39+
    "timesteps": 100000
    40+
    },
    41+
    42+
    "saver": {
    43+
    "directory": null,
    44+
    "seconds": 600
    45+
    },
    46+
    "summarizer": {
    47+
    "directory": null,
    48+
    "labels": [],
    49+
    "seconds": 120
    50+
    }
    51+
    }

    examples/configs/dqn_visual.json

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -12,7 +12,7 @@
    1212

    1313
    "discount": 0.97,
    1414

    15-
    "states_preprocessing_spec": [
    15+
    "states_preprocessing": [
    1616
    {
    1717
    "type": "image_resize",
    1818
    "width": 84,
    @@ -27,7 +27,7 @@
    2727
    }
    2828
    ],
    2929

    30-
    "explorations_spec": {
    30+
    "actions_exploration": {
    3131
    "type": "epsilon_decay",
    3232
    "initial_epsilon": 1.0,
    3333
    "final_epsilon": 0.1,

    examples/configs/vpg_baseline_visual.json

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -24,7 +24,7 @@
    2424
    "num_steps": 5
    2525
    },
    2626

    27-
    "states_preprocessing_spec": [
    27+
    "states_preprocessing": [
    2828
    {
    2929
    "type": "image_resize",
    3030
    "width": 84,

    examples/unreal_engine.py

    Lines changed: 7 additions & 6 deletions
    Original file line numberDiff line numberDiff line change
    @@ -66,7 +66,7 @@ def main():
    6666
    args = parser.parse_args()
    6767

    6868
    # logging.basicConfig(filename="logfile.txt", level=logging.INFO)
    69-
    logging.basicConfig(stream=sys.stderr)
    69+
    logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
    7070
    logger = logging.getLogger(__name__)
    7171
    logger.setLevel(logging.DEBUG)
    7272

    @@ -83,13 +83,14 @@ def main():
    8383
    if args.random_test_run:
    8484
    # Reset the env.
    8585
    s = environment.reset()
    86-
    img = Image.fromarray(s, "RGB" if len(environment.states["shape"]) == 3 else "L")
    86+
    img_format = "RGB" if len(environment.states["shape"]) == 3 else "L"
    87+
    img = Image.fromarray(s, img_format)
    8788
    # Save first received image as a sanity-check.
    8889
    img.save("reset.png")
    8990
    for i in range(1000):
    9091
    s, is_terminal, r = environment.execute(actions=random.choice(range(environment.actions["num_actions"])))
    9192
    if i < 10:
    92-
    img = Image.fromarray(s, "RGB")
    93+
    img = Image.fromarray(s, img_format)
    9394
    img.save("{:03d}.png".format(i))
    9495
    logging.debug("i={} r={} term={}".format(i, r, is_terminal))
    9596
    if is_terminal:
    @@ -112,9 +113,9 @@ def main():
    112113
    agent = Agent.from_spec(
    113114
    spec=agent_config,
    114115
    kwargs=dict(
    115-
    states_spec=environment.states,
    116-
    actions_spec=environment.actions,
    117-
    network_spec=network_spec
    116+
    states=environment.states,
    117+
    actions=environment.actions,
    118+
    network=network_spec
    118119
    )
    119120
    )
    120121
    if args.load:

    tensorforce/contrib/remote_environment.py

    Lines changed: 28 additions & 7 deletions
    Original file line numberDiff line numberDiff line change
    @@ -22,6 +22,7 @@
    2222
    import os
    2323
    from tensorforce import TensorForceError
    2424
    import logging
    25+
    import time
    2526

    2627

    2728
    class RemoteEnvironment(Environment):
    @@ -54,20 +55,34 @@ def close(self):
    5455
    """
    5556
    self.disconnect()
    5657

    57-
    def connect(self):
    58+
    def connect(self, timeout=600):
    5859
    """
    5960
    Starts the server tcp connection on the given host:port.
    61+
    62+
    Args:
    63+
    timeout (int): The time (in seconds) for which we will attempt a connection to the remote
    64+
    (every 5sec). After that (or if timeout is None or 0), an error is raised.
    6065
    """
    6166
    # If we are already connected, return error.
    6267
    if self.socket:
    6368
    raise TensorForceError("Already connected to {}:{}. Only one connection allowed at a time. " +
    6469
    "Close first by calling `close`!".format(self.host, self.port))
    6570
    self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    66-
    self.socket.settimeout(5)
    67-
    err = self.socket.connect_ex((self.host, self.port))
    71+
    72+
    if timeout < 5 or timeout is None:
    73+
    timeout = 5
    74+
    75+
    err = 0
    76+
    start_time = time.time()
    77+
    while time.time() - start_time < timeout:
    78+
    self.socket.settimeout(5)
    79+
    err = self.socket.connect_ex((self.host, self.port))
    80+
    if err == 0:
    81+
    break
    82+
    time.sleep(1)
    6883
    if err != 0:
    69-
    raise TensorForceError("Error when trying to connect to {}:{}: errno={} '{}'".
    70-
    format(self.host, self.port, errno.errorcode[err], os.strerror(err)))
    84+
    raise TensorForceError("Error when trying to connect to {}:{}: errno={} errcode='{}' '{}'".
    85+
    format(self.host, self.port, err, errno.errorcode[err], os.strerror(err)))
    7186

    7287
    def disconnect(self):
    7388
    """
    @@ -118,15 +133,21 @@ def __init__(self, max_msg_len=8192):
    118133

    119134
    def send(self, message, socket_):
    120135
    """
    121-
    Sends a message (dict) to the socket. Message is encoded via msgpack-numpy.
    136+
    Sends a message (dict) to the socket. Message consists of a 8-byte len header followed by a msgpack-numpy
    137+
    encoded dict.
    122138
    123139
    Args:
    124140
    message: The message dict (e.g. {"cmd": "reset"})
    125141
    socket_: The python socket object to use.
    126142
    """
    127143
    if not socket_:
    128144
    raise TensorForceError("No socket given in call to `send`!")
    129-
    socket_.send(msgpack.packb(message))
    145+
    elif not isinstance(message, dict):
    146+
    raise TensorForceError("Message to be sent must be a dict!")
    147+
    message = msgpack.packb(message)
    148+
    len_ = len(message)
    149+
    # prepend 8-byte len field to all our messages
    150+
    socket_.send(bytes("{:08d}".format(len_), encoding="ascii") + message)
    130151

    131152
    def recv(self, socket_):
    132153
    """

    tensorforce/contrib/unreal_engine.py

    Lines changed: 10 additions & 6 deletions
    Original file line numberDiff line numberDiff line change
    @@ -50,8 +50,8 @@ def __init__(
    5050
    This would be necessary e.g. for agents that use q-networks where the output are q-values per discrete
    5151
    state-action pair.
    5252
    delta_time (float): The fake delta time to use for each single game tick.
    53-
    num_ticks (int): The number of ticks to be executed in this step (each tick will repeat the same given
    54-
    actions).
    53+
    num_ticks (int): The number of ticks to be executed in a single act call (each tick will
    54+
    repeat the same given actions).
    5555
    """
    5656
    RemoteEnvironment.__init__(self, host, port)
    5757

    @@ -75,8 +75,8 @@ def __str__(self):
    7575
    return "UE4Environment({}:{}{})".format(self.host, self.port, "[connected; {}]".
    7676
    format(self.game_name) if self.socket else "")
    7777

    78-
    def connect(self):
    79-
    RemoteEnvironment.connect(self)
    78+
    def connect(self, timeout=600):
    79+
    RemoteEnvironment.connect(self, timeout)
    8080

    8181
    # Get action- and state-specs from our game.
    8282
    self.protocol.send({"cmd": "get_spec"}, self.socket)
    @@ -89,8 +89,12 @@ def connect(self):
    8989
    # Game's name
    9090
    self.game_name = response.get("game_name") # keep non-mandatory for now
    9191
    # Observers
    92+
    if "observation_space_desc" not in response:
    93+
    raise TensorForceError("Response to `get_spec` does not contain field `observation_space_desc`!")
    9294
    self.observation_space_desc = response["observation_space_desc"]
    9395
    # Action-mappings
    96+
    if "action_space_desc" not in response:
    97+
    raise TensorForceError("Response to `get_spec` does not contain field `action_space_desc`!")
    9498
    self.action_space_desc = response["action_space_desc"]
    9599

    96100
    if self.discretize_actions:
    @@ -110,9 +114,9 @@ def seed(self, seed=None):
    110114
    # Wait for response.
    111115
    response = self.protocol.recv(self.socket)
    112116
    if "status" not in response:
    113-
    raise RuntimeError("Message without field 'status' received!")
    117+
    raise TensorForceError("Message without field 'status' received!")
    114118
    elif response["status"] != "ok":
    115-
    raise RuntimeError("Message 'status' for seed command is not 'ok' ({})!".format(response["status"]))
    119+
    raise TensorForceError("Message 'status' for seed command is not 'ok' ({})!".format(response["status"]))
    116120
    return seed
    117121

    118122
    def reset(self):

    tensorforce/core/memories/memory.py

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -118,7 +118,7 @@ def tf_retrieve_timesteps(self, n):
    118118

    119119
    def tf_retrieve_episodes(self, n):
    120120
    """
    121-
    Retrieves a given number of episodesrom the stored experiences.
    121+
    Retrieves a given number of episodes from the stored experiences.
    122122
    123123
    Args:
    124124
    n: Number of episodes to retrieve.

    tensorforce/core/memories/prioritized_replay.py

    Lines changed: 2 additions & 2 deletions
    Original file line numberDiff line numberDiff line change
    @@ -395,13 +395,13 @@ def tf_update_batch(self, loss_per_instance):
    395395
    x=self.batch_indices,
    396396
    y=tf.zeros(shape=tf.shape(input=self.batch_indices), dtype=tf.int32)
    397397
    )
    398-
    priority_indices = tf.where(condition=mask)
    398+
    priority_indices = tf.squeeze(tf.where(condition=mask))
    399399
    priority_indices = tf.Print(priority_indices, [priority_indices], message="Priority indices")
    400400
    sampled_batch = self.tf_retrieve_indices(
    401401
    buffer_elements=self.last_batch_buffer_elems,
    402402
    priority_indices=priority_indices
    403403
    )
    404-
    sampled_batch = tf.Print(sampled_batch, [sampled_batch], message="sampled batch: ")
    404+
    #sampled_batch = tf.Print(sampled_batch, [sampled_batch], message="sampled batch: ")
    405405
    states = sampled_batch['states']
    406406
    internals = sampled_batch['internals']
    407407
    actions = sampled_batch['actions']

    tensorforce/core/memories/replay.py

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -32,7 +32,7 @@ def __init__(self, states, internals, actions, include_next_states, capacity, sc
    3232
    Replay memory.
    3333
    3434
    Args:
    35-
    states: States specifiction.
    35+
    states: States specification.
    3636
    internals: Internal states specification.
    3737
    actions: Actions specification.
    3838
    include_next_states: Include subsequent state if true.

    tensorforce/core/preprocessors/grayscale.py

    Lines changed: 10 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -28,13 +28,20 @@ class Grayscale(Preprocessor):
    2828
    Turn 3D color state into grayscale.
    2929
    """
    3030

    31-
    def __init__(self, shape, weights=(0.299, 0.587, 0.114), scope='grayscale', summary_labels=()):
    31+
    def __init__(self, shape, weights=(0.299, 0.587, 0.114), scope='grayscale', summary_labels=(), remove_rank=False):
    32+
    """
    33+
    Args:
    34+
    weights (tuple): The weights to multiply each color channel with (in order: red, blue, green).
    35+
    remove_rank (bool): If True, will remove the color channel rank from the input tensor. Default: False.
    36+
    """
    3237
    self.weights = weights
    38+
    self.remove_rank = remove_rank
    3339
    super(Grayscale, self).__init__(shape=shape, scope=scope, summary_labels=summary_labels)
    3440

    3541
    def tf_process(self, tensor):
    3642
    weights = tf.reshape(tensor=self.weights, shape=(tuple(1 for _ in range(util.rank(tensor) - 1)) + (3,)))
    37-
    return tf.reduce_sum(input_tensor=(weights * tensor), axis=-1, keepdims=True)
    43+
    weighted_sum = tf.reduce_sum(input_tensor=(weights * tensor), axis=-1, keepdims=not self.remove_rank)
    44+
    return weighted_sum
    3845

    3946
    def processed_shape(self, shape):
    40-
    return tuple(shape[:-1]) + (1,)
    47+
    return tuple(shape[:-1]) + ((1,) if not self.remove_rank else ())

    tensorforce/core/preprocessors/preprocessor.py

    Lines changed: 6 additions & 3 deletions
    Original file line numberDiff line numberDiff line change
    @@ -18,6 +18,7 @@
    1818
    from __future__ import division
    1919

    2020
    import tensorflow as tf
    21+
    import copy
    2122

    2223
    from tensorforce import util
    2324
    import tensorforce.core.preprocessors
    @@ -156,11 +157,13 @@ def from_spec(spec, kwargs=None):
    156157
    spec = [spec]
    157158

    158159
    stack = PreprocessorStack()
    159-
    for spec in spec:
    160+
    for spec_ in spec:
    161+
    # need to deep copy, otherwise will add first processors spec_ to kwargs to second processor
    162+
    kwargs_ = copy.deepcopy(kwargs)
    160163
    preprocessor = util.get_object(
    161-
    obj=spec,
    164+
    obj=spec_,
    162165
    predefined_objects=tensorforce.core.preprocessors.preprocessors,
    163-
    kwargs=kwargs
    166+
    kwargs=kwargs_
    164167
    )
    165168
    assert isinstance(preprocessor, Preprocessor)
    166169
    stack.preprocessors.append(preprocessor)

    tensorforce/core/preprocessors/sequence.py

    Lines changed: 16 additions & 6 deletions
    Original file line numberDiff line numberDiff line change
    @@ -29,17 +29,19 @@ class Sequence(Preprocessor):
    2929
    problems to create the Markov property (velocity of game objects as they move across the screen).
    3030
    """
    3131

    32-
    def __init__(self, shape, length=2, scope='sequence', summary_labels=()):
    32+
    def __init__(self, shape, length=2, scope='sequence', summary_labels=(), add_rank=False):
    3333
    """
    3434
    Args:
    3535
    length (int): The number of states to concatenate. In the beginning, when no previous state is available,
    3636
    concatenate the given first state with itself `length` times.
    37+
    add_rank (bool): Whether to add another rank to the end of the input with dim=length-of-the-sequence.
    38+
    This could be useful if e.g. a grayscale image of w x h pixels is coming from the env
    39+
    (no color channel). The output of the preprocessor would then be of shape [batch] x w x h x [length].
    3740
    """
    3841
    # raise TensorForceError("The sequence preprocessor is temporarily broken; use version 0.3.2 if required.")
    3942
    self.length = length
    40-
    ## The index tensor pointing to the previous location in the single-state buffer.
    41-
    #self.index = None
    42-
    # The that resets index back to -1.
    43+
    self.add_rank = add_rank
    44+
    # The op that resets index back to -1.
    4345
    self.reset_op = None
    4446
    super(Sequence, self).__init__(shape=shape, scope=scope, summary_labels=summary_labels)
    4547

    @@ -78,7 +80,15 @@ def later_run():
    7880
    assignment = tf.assign(ref=index, value=((tf.maximum(x=index, y=0) + 1) % self.length))
    7981

    8082
    with tf.control_dependencies(control_inputs=(assignment,)):
    81-
    return tf.expand_dims(input=tf.concat(values=previous_states, axis=-1), axis=0)
    83+
    if self.add_rank:
    84+
    stack = tf.stack(values=previous_states, axis=-1)
    85+
    else:
    86+
    stack = tf.concat(values=previous_states, axis=-1)
    87+
    batch_one = tf.expand_dims(input=stack, axis=0)
    88+
    return batch_one
    8289

    8390
    def processed_shape(self, shape):
    84-
    return shape[:-1] + (shape[-1] * self.length,)
    91+
    if self.add_rank:
    92+
    return shape + (self.length,)
    93+
    else:
    94+
    return shape[:-1] + (shape[-1] * self.length,)

    tensorforce/meta_parameter_recorder.py

    Lines changed: 1 addition & 1 deletion
    Original file line numberDiff line numberDiff line change
    @@ -81,7 +81,7 @@ def merge_custom(self, custom_dict):
    8181
    )
    8282
    self.meta_params[key] = custom_dict[key]
    8383
    # This line assumes the merge data came from summary_spec['meta_dict'], remove this from summary_spec
    84-
    del self.meta_params['summary_spec']['meta_dict']
    84+
    del self.meta_params['summarizer']['meta_dict']
    8585

    8686
    def text_output(self, format_type=1):
    8787
    print('======================= ' + self.meta_params['AgentName'] + ' ====================================')

    0 commit comments

    Comments
     (0)
    0