Spaces:
Runtime error
Runtime error
| # Copyright 2018 The TensorFlow Authors All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================== | |
| """Sample actor(policy) and critic(q) networks to use with DDPG/NAF agents. | |
| The DDPG networks are defined in "Section 7: Experiment Details" of | |
| "Continuous control with deep reinforcement learning" - Lilicrap et al. | |
| https://arxiv.org/abs/1509.02971 | |
| The NAF critic network is based on "Section 4" of "Continuous deep Q-learning | |
| with model-based acceleration" - Gu et al. https://arxiv.org/pdf/1603.00748. | |
| """ | |
| import tensorflow as tf | |
| slim = tf.contrib.slim | |
| import gin.tf | |
| def critic_net(states, actions, | |
| for_critic_loss=False, | |
| num_reward_dims=1, | |
| states_hidden_layers=(400,), | |
| actions_hidden_layers=None, | |
| joint_hidden_layers=(300,), | |
| weight_decay=0.0001, | |
| normalizer_fn=None, | |
| activation_fn=tf.nn.relu, | |
| zero_obs=False, | |
| images=False): | |
| """Creates a critic that returns q values for the given states and actions. | |
| Args: | |
| states: (castable to tf.float32) a [batch_size, num_state_dims] tensor | |
| representing a batch of states. | |
| actions: (castable to tf.float32) a [batch_size, num_action_dims] tensor | |
| representing a batch of actions. | |
| num_reward_dims: Number of reward dimensions. | |
| states_hidden_layers: tuple of hidden layers units for states. | |
| actions_hidden_layers: tuple of hidden layers units for actions. | |
| joint_hidden_layers: tuple of hidden layers units after joining states | |
| and actions using tf.concat(). | |
| weight_decay: Weight decay for l2 weights regularizer. | |
| normalizer_fn: Normalizer function, i.e. slim.layer_norm, | |
| activation_fn: Activation function, i.e. tf.nn.relu, slim.leaky_relu, ... | |
| Returns: | |
| A tf.float32 [batch_size] tensor of q values, or a tf.float32 | |
| [batch_size, num_reward_dims] tensor of vector q values if | |
| num_reward_dims > 1. | |
| """ | |
| with slim.arg_scope( | |
| [slim.fully_connected], | |
| activation_fn=activation_fn, | |
| normalizer_fn=normalizer_fn, | |
| weights_regularizer=slim.l2_regularizer(weight_decay), | |
| weights_initializer=slim.variance_scaling_initializer( | |
| factor=1.0/3.0, mode='FAN_IN', uniform=True)): | |
| orig_states = tf.to_float(states) | |
| #states = tf.to_float(states) | |
| states = tf.concat([tf.to_float(states), tf.to_float(actions)], -1) #TD3 | |
| if images or zero_obs: | |
| states *= tf.constant([0.0] * 2 + [1.0] * (states.shape[1] - 2)) #LALA | |
| actions = tf.to_float(actions) | |
| if states_hidden_layers: | |
| states = slim.stack(states, slim.fully_connected, states_hidden_layers, | |
| scope='states') | |
| if actions_hidden_layers: | |
| actions = slim.stack(actions, slim.fully_connected, actions_hidden_layers, | |
| scope='actions') | |
| joint = tf.concat([states, actions], 1) | |
| if joint_hidden_layers: | |
| joint = slim.stack(joint, slim.fully_connected, joint_hidden_layers, | |
| scope='joint') | |
| with slim.arg_scope([slim.fully_connected], | |
| weights_regularizer=None, | |
| weights_initializer=tf.random_uniform_initializer( | |
| minval=-0.003, maxval=0.003)): | |
| value = slim.fully_connected(joint, num_reward_dims, | |
| activation_fn=None, | |
| normalizer_fn=None, | |
| scope='q_value') | |
| if num_reward_dims == 1: | |
| value = tf.reshape(value, [-1]) | |
| if not for_critic_loss and num_reward_dims > 1: | |
| value = tf.reduce_sum( | |
| value * tf.abs(orig_states[:, -num_reward_dims:]), -1) | |
| return value | |
| def actor_net(states, action_spec, | |
| hidden_layers=(400, 300), | |
| normalizer_fn=None, | |
| activation_fn=tf.nn.relu, | |
| zero_obs=False, | |
| images=False): | |
| """Creates an actor that returns actions for the given states. | |
| Args: | |
| states: (castable to tf.float32) a [batch_size, num_state_dims] tensor | |
| representing a batch of states. | |
| action_spec: (BoundedTensorSpec) A tensor spec indicating the shape | |
| and range of actions. | |
| hidden_layers: tuple of hidden layers units. | |
| normalizer_fn: Normalizer function, i.e. slim.layer_norm, | |
| activation_fn: Activation function, i.e. tf.nn.relu, slim.leaky_relu, ... | |
| Returns: | |
| A tf.float32 [batch_size, num_action_dims] tensor of actions. | |
| """ | |
| with slim.arg_scope( | |
| [slim.fully_connected], | |
| activation_fn=activation_fn, | |
| normalizer_fn=normalizer_fn, | |
| weights_initializer=slim.variance_scaling_initializer( | |
| factor=1.0/3.0, mode='FAN_IN', uniform=True)): | |
| states = tf.to_float(states) | |
| orig_states = states | |
| if images or zero_obs: # Zero-out x, y position. Hacky. | |
| states *= tf.constant([0.0] * 2 + [1.0] * (states.shape[1] - 2)) | |
| if hidden_layers: | |
| states = slim.stack(states, slim.fully_connected, hidden_layers, | |
| scope='states') | |
| with slim.arg_scope([slim.fully_connected], | |
| weights_initializer=tf.random_uniform_initializer( | |
| minval=-0.003, maxval=0.003)): | |
| actions = slim.fully_connected(states, | |
| action_spec.shape.num_elements(), | |
| scope='actions', | |
| normalizer_fn=None, | |
| activation_fn=tf.nn.tanh) | |
| action_means = (action_spec.maximum + action_spec.minimum) / 2.0 | |
| action_magnitudes = (action_spec.maximum - action_spec.minimum) / 2.0 | |
| actions = action_means + action_magnitudes * actions | |
| return actions | |