Spaces:
Runtime error
Runtime error
| # Copyright 2017 The TensorFlow Authors All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================== | |
| """Optimizers mostly for value estimate. | |
| Gradient Descent optimizer | |
| LBFGS optimizer | |
| Best Fit optimizer | |
| """ | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| from six.moves import xrange | |
| import tensorflow as tf | |
| import numpy as np | |
| import scipy.optimize | |
| def var_size(v): | |
| return int(np.prod([int(d) for d in v.shape])) | |
| def gradients(loss, var_list): | |
| grads = tf.gradients(loss, var_list) | |
| return [g if g is not None else tf.zeros(v.shape) | |
| for g, v in zip(grads, var_list)] | |
| def flatgrad(loss, var_list): | |
| grads = gradients(loss, var_list) | |
| return tf.concat([tf.reshape(grad, [-1]) | |
| for (v, grad) in zip(var_list, grads) | |
| if grad is not None], 0) | |
| def get_flat(var_list): | |
| return tf.concat([tf.reshape(v, [-1]) for v in var_list], 0) | |
| def set_from_flat(var_list, flat_theta): | |
| assigns = [] | |
| shapes = [v.shape for v in var_list] | |
| sizes = [var_size(v) for v in var_list] | |
| start = 0 | |
| assigns = [] | |
| for (shape, size, v) in zip(shapes, sizes, var_list): | |
| assigns.append(v.assign( | |
| tf.reshape(flat_theta[start:start + size], shape))) | |
| start += size | |
| assert start == sum(sizes) | |
| return tf.group(*assigns) | |
| class LbfgsOptimization(object): | |
| def __init__(self, max_iter=25, mix_frac=1.0): | |
| self.max_iter = max_iter | |
| self.mix_frac = mix_frac | |
| def setup_placeholders(self): | |
| self.flat_theta = tf.placeholder(tf.float32, [None], 'flat_theta') | |
| self.intended_values = tf.placeholder(tf.float32, [None], 'intended_values') | |
| def setup(self, var_list, values, targets, pads, | |
| inputs, regression_weight): | |
| self.setup_placeholders() | |
| self.values = values | |
| self.targets = targets | |
| self.raw_loss = (tf.reduce_sum((1 - pads) * tf.square(values - self.intended_values)) | |
| / tf.reduce_sum(1 - pads)) | |
| self.loss_flat_gradient = flatgrad(self.raw_loss, var_list) | |
| self.flat_vars = get_flat(var_list) | |
| self.set_vars = set_from_flat(var_list, self.flat_theta) | |
| def optimize(self, sess, feed_dict): | |
| old_theta = sess.run(self.flat_vars) | |
| old_values, targets = sess.run([self.values, self.targets], feed_dict=feed_dict) | |
| intended_values = targets * self.mix_frac + old_values * (1 - self.mix_frac) | |
| feed_dict = dict(feed_dict) | |
| feed_dict[self.intended_values] = intended_values | |
| def calc_loss_and_grad(theta): | |
| sess.run(self.set_vars, feed_dict={self.flat_theta: theta}) | |
| loss, grad = sess.run([self.raw_loss, self.loss_flat_gradient], | |
| feed_dict=feed_dict) | |
| grad = grad.astype('float64') | |
| return loss, grad | |
| theta, _, _ = scipy.optimize.fmin_l_bfgs_b( | |
| calc_loss_and_grad, old_theta, maxiter=self.max_iter) | |
| sess.run(self.set_vars, feed_dict={self.flat_theta: theta}) | |
| class GradOptimization(object): | |
| def __init__(self, learning_rate=0.001, max_iter=25, mix_frac=1.0): | |
| self.learning_rate = learning_rate | |
| self.max_iter = max_iter | |
| self.mix_frac = mix_frac | |
| def get_optimizer(self): | |
| return tf.train.AdamOptimizer(learning_rate=self.learning_rate, | |
| epsilon=2e-4) | |
| def setup_placeholders(self): | |
| self.flat_theta = tf.placeholder(tf.float32, [None], 'flat_theta') | |
| self.intended_values = tf.placeholder(tf.float32, [None], 'intended_values') | |
| def setup(self, var_list, values, targets, pads, | |
| inputs, regression_weight): | |
| self.setup_placeholders() | |
| self.values = values | |
| self.targets = targets | |
| self.raw_loss = (tf.reduce_sum((1 - pads) * tf.square(values - self.intended_values)) | |
| / tf.reduce_sum(1 - pads)) | |
| opt = self.get_optimizer() | |
| params = var_list | |
| grads = tf.gradients(self.raw_loss, params) | |
| self.gradient_ops = opt.apply_gradients(zip(grads, params)) | |
| def optimize(self, sess, feed_dict): | |
| old_values, targets = sess.run([self.values, self.targets], feed_dict=feed_dict) | |
| intended_values = targets * self.mix_frac + old_values * (1 - self.mix_frac) | |
| feed_dict = dict(feed_dict) | |
| feed_dict[self.intended_values] = intended_values | |
| for _ in xrange(self.max_iter): | |
| sess.run(self.gradient_ops, feed_dict=feed_dict) | |
| class BestFitOptimization(object): | |
| def __init__(self, mix_frac=1.0): | |
| self.mix_frac = mix_frac | |
| def setup_placeholders(self): | |
| self.new_regression_weight = tf.placeholder( | |
| tf.float32, self.regression_weight.shape) | |
| def setup(self, var_list, values, targets, pads, | |
| inputs, regression_weight): | |
| self.values = values | |
| self.targets = targets | |
| self.inputs = inputs | |
| self.regression_weight = regression_weight | |
| self.setup_placeholders() | |
| self.update_regression_weight = tf.assign( | |
| self.regression_weight, self.new_regression_weight) | |
| def optimize(self, sess, feed_dict): | |
| reg_input, reg_weight, old_values, targets = sess.run( | |
| [self.inputs, self.regression_weight, self.values, self.targets], | |
| feed_dict=feed_dict) | |
| intended_values = targets * self.mix_frac + old_values * (1 - self.mix_frac) | |
| # taken from rllab | |
| reg_coeff = 1e-5 | |
| for _ in range(5): | |
| best_fit_weight = np.linalg.lstsq( | |
| reg_input.T.dot(reg_input) + | |
| reg_coeff * np.identity(reg_input.shape[1]), | |
| reg_input.T.dot(intended_values))[0] | |
| if not np.any(np.isnan(best_fit_weight)): | |
| break | |
| reg_coeff *= 10 | |
| if len(best_fit_weight.shape) == 1: | |
| best_fit_weight = np.expand_dims(best_fit_weight, -1) | |
| sess.run(self.update_regression_weight, | |
| feed_dict={self.new_regression_weight: best_fit_weight}) | |