Spaces:

NCTCMumbai
/

NCTC

Runtime error

App Files Files Community

NCTC / models /research /efficient-hrl /cond_fn.py

NCTCMumbai

Upload 2571 files

0b8359d about 2 years ago

raw

history blame contribute delete

8.66 kB

	# Copyright 2018 The TensorFlow Authors All Rights Reserved.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	# ==============================================================================

	"""Defines many boolean functions indicating when to step and reset.
	"""

	import tensorflow as tf
	import gin.tf


	@gin.configurable
	def env_transition(agent, state, action, transition_type, environment_steps,
	num_episodes):
	"""True if the transition_type is TRANSITION or FINAL_TRANSITION.

	Args:
	agent: RL agent.
	state: A [num_state_dims] tensor representing a state.
	action: Action performed.
	transition_type: Type of transition after action
	environment_steps: Number of steps performed by environment.
	num_episodes: Number of episodes.
	Returns:
	cond: Returns an op that evaluates to true if the transition type is
	not RESTARTING
	"""
	del agent, state, action, num_episodes, environment_steps
	cond = tf.logical_not(transition_type)
	return cond


	@gin.configurable
	def env_restart(agent, state, action, transition_type, environment_steps,
	num_episodes):
	"""True if the transition_type is RESTARTING.

	Args:
	agent: RL agent.
	state: A [num_state_dims] tensor representing a state.
	action: Action performed.
	transition_type: Type of transition after action
	environment_steps: Number of steps performed by environment.
	num_episodes: Number of episodes.
	Returns:
	cond: Returns an op that evaluates to true if the transition type equals
	RESTARTING.
	"""
	del agent, state, action, num_episodes, environment_steps
	cond = tf.identity(transition_type)
	return cond


	@gin.configurable
	def every_n_steps(agent,
	state,
	action,
	transition_type,
	environment_steps,
	num_episodes,
	n=150):
	"""True once every n steps.

	Args:
	agent: RL agent.
	state: A [num_state_dims] tensor representing a state.
	action: Action performed.
	transition_type: Type of transition after action
	environment_steps: Number of steps performed by environment.
	num_episodes: Number of episodes.
	n: Return true once every n steps.
	Returns:
	cond: Returns an op that evaluates to true if environment_steps
	equals 0 mod n. We increment the step before checking this condition, so
	we do not need to add one to environment_steps.
	"""
	del agent, state, action, transition_type, num_episodes
	cond = tf.equal(tf.mod(environment_steps, n), 0)
	return cond


	@gin.configurable
	def every_n_episodes(agent,
	state,
	action,
	transition_type,
	environment_steps,
	num_episodes,
	n=2,
	steps_per_episode=None):
	"""True once every n episodes.

	Specifically, evaluates to True on the 0th step of every nth episode.
	Unlike environment_steps, num_episodes starts at 0, so we do want to add
	one to ensure it does not reset on the first call.

	Args:
	agent: RL agent.
	state: A [num_state_dims] tensor representing a state.
	action: Action performed.
	transition_type: Type of transition after action
	environment_steps: Number of steps performed by environment.
	num_episodes: Number of episodes.
	n: Return true once every n episodes.
	steps_per_episode: How many steps per episode. Needed to determine when a
	new episode starts.
	Returns:
	cond: Returns an op that evaluates to true on the last step of the episode
	(i.e. if num_episodes equals 0 mod n).
	"""
	assert steps_per_episode is not None
	del agent, action, transition_type
	ant_fell = tf.logical_or(state[2] < 0.2, state[2] > 1.0)
	cond = tf.logical_and(
	tf.logical_or(
	ant_fell,
	tf.equal(tf.mod(num_episodes + 1, n), 0)),
	tf.equal(tf.mod(environment_steps, steps_per_episode), 0))
	return cond


	@gin.configurable
	def failed_reset_after_n_episodes(agent,
	state,
	action,
	transition_type,
	environment_steps,
	num_episodes,
	steps_per_episode=None,
	reset_state=None,
	max_dist=1.0,
	epsilon=1e-10):
	"""Every n episodes, returns True if the reset agent fails to return.

	Specifically, evaluates to True if the distance between the state and the
	reset state is greater than max_dist at the end of the episode.

	Args:
	agent: RL agent.
	state: A [num_state_dims] tensor representing a state.
	action: Action performed.
	transition_type: Type of transition after action
	environment_steps: Number of steps performed by environment.
	num_episodes: Number of episodes.
	steps_per_episode: How many steps per episode. Needed to determine when a
	new episode starts.
	reset_state: State to which the reset controller should return.
	max_dist: Agent is considered to have successfully reset if its distance
	from the reset_state is less than max_dist.
	epsilon: small offset to ensure non-negative/zero distance.
	Returns:
	cond: Returns an op that evaluates to true if num_episodes+1 equals 0
	mod n. We add one to the num_episodes so the environment is not reset after
	the 0th step.
	"""
	assert steps_per_episode is not None
	assert reset_state is not None
	del agent, state, action, transition_type, num_episodes
	dist = tf.sqrt(
	tf.reduce_sum(tf.squared_difference(state, reset_state)) + epsilon)
	cond = tf.logical_and(
	tf.greater(dist, tf.constant(max_dist)),
	tf.equal(tf.mod(environment_steps, steps_per_episode), 0))
	return cond


	@gin.configurable
	def q_too_small(agent,
	state,
	action,
	transition_type,
	environment_steps,
	num_episodes,
	q_min=0.5):
	"""True of q is too small.

	Args:
	agent: RL agent.
	state: A [num_state_dims] tensor representing a state.
	action: Action performed.
	transition_type: Type of transition after action
	environment_steps: Number of steps performed by environment.
	num_episodes: Number of episodes.
	q_min: Returns true if the qval is less than q_min
	Returns:
	cond: Returns an op that evaluates to true if qval is less than q_min.
	"""
	del transition_type, environment_steps, num_episodes
	state_for_reset_agent = tf.stack(state[:-1], tf.constant([0], dtype=tf.float))
	qval = agent.BASE_AGENT_CLASS.critic_net(
	tf.expand_dims(state_for_reset_agent, 0), tf.expand_dims(action, 0))[0, :]
	cond = tf.greater(tf.constant(q_min), qval)
	return cond


	@gin.configurable
	def true_fn(agent, state, action, transition_type, environment_steps,
	num_episodes):
	"""Returns an op that evaluates to true.

	Args:
	agent: RL agent.
	state: A [num_state_dims] tensor representing a state.
	action: Action performed.
	transition_type: Type of transition after action
	environment_steps: Number of steps performed by environment.
	num_episodes: Number of episodes.
	Returns:
	cond: op that always evaluates to True.
	"""
	del agent, state, action, transition_type, environment_steps, num_episodes
	cond = tf.constant(True, dtype=tf.bool)
	return cond


	@gin.configurable
	def false_fn(agent, state, action, transition_type, environment_steps,
	num_episodes):
	"""Returns an op that evaluates to false.

	Args:
	agent: RL agent.
	state: A [num_state_dims] tensor representing a state.
	action: Action performed.
	transition_type: Type of transition after action
	environment_steps: Number of steps performed by environment.
	num_episodes: Number of episodes.
	Returns:
	cond: op that always evaluates to False.
	"""
	del agent, state, action, transition_type, environment_steps, num_episodes
	cond = tf.constant(False, dtype=tf.bool)
	return cond