Spaces:
Runtime error
Runtime error
| # Copyright 2018 The TensorFlow Authors All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================== | |
| """Evaluation utility functions. | |
| """ | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import numpy as np | |
| import tensorflow as tf | |
| from collections import namedtuple | |
| logging = tf.logging | |
| import gin.tf | |
| def evaluate_checkpoint_repeatedly(checkpoint_dir, | |
| evaluate_checkpoint_fn, | |
| eval_interval_secs=600, | |
| max_number_of_evaluations=None, | |
| checkpoint_timeout=None, | |
| timeout_fn=None): | |
| """Evaluates a checkpointed model at a set interval.""" | |
| if max_number_of_evaluations is not None and max_number_of_evaluations <= 0: | |
| raise ValueError( | |
| '`max_number_of_evaluations` must be either None or a positive number.') | |
| number_of_evaluations = 0 | |
| for checkpoint_path in tf.contrib.training.checkpoints_iterator( | |
| checkpoint_dir, | |
| min_interval_secs=eval_interval_secs, | |
| timeout=checkpoint_timeout, | |
| timeout_fn=timeout_fn): | |
| retries = 3 | |
| for _ in range(retries): | |
| try: | |
| should_stop = evaluate_checkpoint_fn(checkpoint_path) | |
| break | |
| except tf.errors.DataLossError as e: | |
| logging.warn( | |
| 'Encountered a DataLossError while evaluating a checkpoint. This ' | |
| 'can happen when reading a checkpoint before it is fully written. ' | |
| 'Retrying...' | |
| ) | |
| time.sleep(2.0) | |
| def compute_model_loss(sess, model_rollout_fn, states, actions): | |
| """Computes model loss.""" | |
| preds, losses = [], [] | |
| preds.append(states[0]) | |
| losses.append(0) | |
| for state, action in zip(states[1:], actions[1:]): | |
| pred = model_rollout_fn(sess, preds[-1], action) | |
| loss = np.sqrt(np.sum((state - pred) ** 2)) | |
| preds.append(pred) | |
| losses.append(loss) | |
| return preds, losses | |
| def compute_average_reward(sess, env_base, step_fn, gamma, num_steps, | |
| num_episodes): | |
| """Computes the discounted reward for a given number of steps. | |
| Args: | |
| sess: The tensorflow session. | |
| env_base: A python environment. | |
| step_fn: A function that takes in `sess` and returns a list of | |
| [state, action, reward, discount, transition_type] values. | |
| gamma: discounting factor to apply to the reward. | |
| num_steps: number of steps to compute the reward over. | |
| num_episodes: number of episodes to average the reward over. | |
| Returns: | |
| average_reward: a scalar of discounted reward. | |
| last_reward: last reward received. | |
| """ | |
| average_reward = 0 | |
| average_last_reward = 0 | |
| average_meta_reward = 0 | |
| average_last_meta_reward = 0 | |
| average_success = 0. | |
| states, actions = None, None | |
| for i in range(num_episodes): | |
| env_base.end_episode() | |
| env_base.begin_episode() | |
| (reward, last_reward, meta_reward, last_meta_reward, | |
| states, actions) = compute_reward( | |
| sess, step_fn, gamma, num_steps) | |
| s_reward = last_meta_reward # Navigation | |
| success = (s_reward > -5.0) # When using diff=False | |
| logging.info('Episode = %d, reward = %s, meta_reward = %f, ' | |
| 'last_reward = %s, last meta_reward = %f, success = %s', | |
| i, reward, meta_reward, last_reward, last_meta_reward, | |
| success) | |
| average_reward += reward | |
| average_last_reward += last_reward | |
| average_meta_reward += meta_reward | |
| average_last_meta_reward += last_meta_reward | |
| average_success += success | |
| average_reward /= num_episodes | |
| average_last_reward /= num_episodes | |
| average_meta_reward /= num_episodes | |
| average_last_meta_reward /= num_episodes | |
| average_success /= num_episodes | |
| return (average_reward, average_last_reward, | |
| average_meta_reward, average_last_meta_reward, | |
| average_success, | |
| states, actions) | |
| def compute_reward(sess, step_fn, gamma, num_steps): | |
| """Computes the discounted reward for a given number of steps. | |
| Args: | |
| sess: The tensorflow session. | |
| step_fn: A function that takes in `sess` and returns a list of | |
| [state, action, reward, discount, transition_type] values. | |
| gamma: discounting factor to apply to the reward. | |
| num_steps: number of steps to compute the reward over. | |
| Returns: | |
| reward: cumulative discounted reward. | |
| last_reward: reward received at final step. | |
| """ | |
| total_reward = 0 | |
| total_meta_reward = 0 | |
| gamma_step = 1 | |
| states = [] | |
| actions = [] | |
| for _ in range(num_steps): | |
| state, action, transition_type, reward, meta_reward, discount, _, _ = step_fn(sess) | |
| total_reward += reward * gamma_step * discount | |
| total_meta_reward += meta_reward * gamma_step * discount | |
| gamma_step *= gamma | |
| states.append(state) | |
| actions.append(action) | |
| return (total_reward, reward, total_meta_reward, meta_reward, | |
| states, actions) | |