Spaces:
Runtime error
Runtime error
| # Lint as: python2, python3 | |
| # Copyright 2018 The TensorFlow Authors All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================== | |
| """Utility functions for training.""" | |
| import six | |
| import tensorflow as tf | |
| from tensorflow.contrib import framework as contrib_framework | |
| from deeplab.core import preprocess_utils | |
| from deeplab.core import utils | |
| def _div_maybe_zero(total_loss, num_present): | |
| """Normalizes the total loss with the number of present pixels.""" | |
| return tf.to_float(num_present > 0) * tf.math.divide( | |
| total_loss, | |
| tf.maximum(1e-5, num_present)) | |
| def add_softmax_cross_entropy_loss_for_each_scale(scales_to_logits, | |
| labels, | |
| num_classes, | |
| ignore_label, | |
| loss_weight=1.0, | |
| upsample_logits=True, | |
| hard_example_mining_step=0, | |
| top_k_percent_pixels=1.0, | |
| gt_is_matting_map=False, | |
| scope=None): | |
| """Adds softmax cross entropy loss for logits of each scale. | |
| Args: | |
| scales_to_logits: A map from logits names for different scales to logits. | |
| The logits have shape [batch, logits_height, logits_width, num_classes]. | |
| labels: Groundtruth labels with shape [batch, image_height, image_width, 1]. | |
| num_classes: Integer, number of target classes. | |
| ignore_label: Integer, label to ignore. | |
| loss_weight: A float or a list of loss weights. If it is a float, it means | |
| all the labels have the same weight. If it is a list of weights, then each | |
| element in the list represents the weight for the label of its index, for | |
| example, loss_weight = [0.1, 0.5] means the weight for label 0 is 0.1 and | |
| the weight for label 1 is 0.5. | |
| upsample_logits: Boolean, upsample logits or not. | |
| hard_example_mining_step: An integer, the training step in which the hard | |
| exampling mining kicks off. Note that we gradually reduce the mining | |
| percent to the top_k_percent_pixels. For example, if | |
| hard_example_mining_step = 100K and top_k_percent_pixels = 0.25, then | |
| mining percent will gradually reduce from 100% to 25% until 100K steps | |
| after which we only mine top 25% pixels. | |
| top_k_percent_pixels: A float, the value lies in [0.0, 1.0]. When its value | |
| < 1.0, only compute the loss for the top k percent pixels (e.g., the top | |
| 20% pixels). This is useful for hard pixel mining. | |
| gt_is_matting_map: If true, the groundtruth is a matting map of confidence | |
| score. If false, the groundtruth is an integer valued class mask. | |
| scope: String, the scope for the loss. | |
| Raises: | |
| ValueError: Label or logits is None, or groundtruth is matting map while | |
| label is not floating value. | |
| """ | |
| if labels is None: | |
| raise ValueError('No label for softmax cross entropy loss.') | |
| # If input groundtruth is a matting map of confidence, check if the input | |
| # labels are floating point values. | |
| if gt_is_matting_map and not labels.dtype.is_floating: | |
| raise ValueError('Labels must be floats if groundtruth is a matting map.') | |
| for scale, logits in six.iteritems(scales_to_logits): | |
| loss_scope = None | |
| if scope: | |
| loss_scope = '%s_%s' % (scope, scale) | |
| if upsample_logits: | |
| # Label is not downsampled, and instead we upsample logits. | |
| logits = tf.image.resize_bilinear( | |
| logits, | |
| preprocess_utils.resolve_shape(labels, 4)[1:3], | |
| align_corners=True) | |
| scaled_labels = labels | |
| else: | |
| # Label is downsampled to the same size as logits. | |
| # When gt_is_matting_map = true, label downsampling with nearest neighbor | |
| # method may introduce artifacts. However, to avoid ignore_label from | |
| # being interpolated with other labels, we still perform nearest neighbor | |
| # interpolation. | |
| # TODO(huizhongc): Change to bilinear interpolation by processing padded | |
| # and non-padded label separately. | |
| if gt_is_matting_map: | |
| tf.logging.warning( | |
| 'Label downsampling with nearest neighbor may introduce artifacts.') | |
| scaled_labels = tf.image.resize_nearest_neighbor( | |
| labels, | |
| preprocess_utils.resolve_shape(logits, 4)[1:3], | |
| align_corners=True) | |
| scaled_labels = tf.reshape(scaled_labels, shape=[-1]) | |
| weights = utils.get_label_weight_mask( | |
| scaled_labels, ignore_label, num_classes, label_weights=loss_weight) | |
| # Dimension of keep_mask is equal to the total number of pixels. | |
| keep_mask = tf.cast( | |
| tf.not_equal(scaled_labels, ignore_label), dtype=tf.float32) | |
| train_labels = None | |
| logits = tf.reshape(logits, shape=[-1, num_classes]) | |
| if gt_is_matting_map: | |
| # When the groundtruth is integer label mask, we can assign class | |
| # dependent label weights to the loss. When the groundtruth is image | |
| # matting confidence, we do not apply class-dependent label weight (i.e., | |
| # label_weight = 1.0). | |
| if loss_weight != 1.0: | |
| raise ValueError( | |
| 'loss_weight must equal to 1 if groundtruth is matting map.') | |
| # Assign label value 0 to ignore pixels. The exact label value of ignore | |
| # pixel does not matter, because those ignore_value pixel losses will be | |
| # multiplied to 0 weight. | |
| train_labels = scaled_labels * keep_mask | |
| train_labels = tf.expand_dims(train_labels, 1) | |
| train_labels = tf.concat([1 - train_labels, train_labels], axis=1) | |
| else: | |
| train_labels = tf.one_hot( | |
| scaled_labels, num_classes, on_value=1.0, off_value=0.0) | |
| default_loss_scope = ('softmax_all_pixel_loss' | |
| if top_k_percent_pixels == 1.0 else | |
| 'softmax_hard_example_mining') | |
| with tf.name_scope(loss_scope, default_loss_scope, | |
| [logits, train_labels, weights]): | |
| # Compute the loss for all pixels. | |
| pixel_losses = tf.nn.softmax_cross_entropy_with_logits_v2( | |
| labels=tf.stop_gradient( | |
| train_labels, name='train_labels_stop_gradient'), | |
| logits=logits, | |
| name='pixel_losses') | |
| weighted_pixel_losses = tf.multiply(pixel_losses, weights) | |
| if top_k_percent_pixels == 1.0: | |
| total_loss = tf.reduce_sum(weighted_pixel_losses) | |
| num_present = tf.reduce_sum(keep_mask) | |
| loss = _div_maybe_zero(total_loss, num_present) | |
| tf.losses.add_loss(loss) | |
| else: | |
| num_pixels = tf.to_float(tf.shape(logits)[0]) | |
| # Compute the top_k_percent pixels based on current training step. | |
| if hard_example_mining_step == 0: | |
| # Directly focus on the top_k pixels. | |
| top_k_pixels = tf.to_int32(top_k_percent_pixels * num_pixels) | |
| else: | |
| # Gradually reduce the mining percent to top_k_percent_pixels. | |
| global_step = tf.to_float(tf.train.get_or_create_global_step()) | |
| ratio = tf.minimum(1.0, global_step / hard_example_mining_step) | |
| top_k_pixels = tf.to_int32( | |
| (ratio * top_k_percent_pixels + (1.0 - ratio)) * num_pixels) | |
| top_k_losses, _ = tf.nn.top_k(weighted_pixel_losses, | |
| k=top_k_pixels, | |
| sorted=True, | |
| name='top_k_percent_pixels') | |
| total_loss = tf.reduce_sum(top_k_losses) | |
| num_present = tf.reduce_sum( | |
| tf.to_float(tf.not_equal(top_k_losses, 0.0))) | |
| loss = _div_maybe_zero(total_loss, num_present) | |
| tf.losses.add_loss(loss) | |
| def get_model_init_fn(train_logdir, | |
| tf_initial_checkpoint, | |
| initialize_last_layer, | |
| last_layers, | |
| ignore_missing_vars=False): | |
| """Gets the function initializing model variables from a checkpoint. | |
| Args: | |
| train_logdir: Log directory for training. | |
| tf_initial_checkpoint: TensorFlow checkpoint for initialization. | |
| initialize_last_layer: Initialize last layer or not. | |
| last_layers: Last layers of the model. | |
| ignore_missing_vars: Ignore missing variables in the checkpoint. | |
| Returns: | |
| Initialization function. | |
| """ | |
| if tf_initial_checkpoint is None: | |
| tf.logging.info('Not initializing the model from a checkpoint.') | |
| return None | |
| if tf.train.latest_checkpoint(train_logdir): | |
| tf.logging.info('Ignoring initialization; other checkpoint exists') | |
| return None | |
| tf.logging.info('Initializing model from path: %s', tf_initial_checkpoint) | |
| # Variables that will not be restored. | |
| exclude_list = ['global_step'] | |
| if not initialize_last_layer: | |
| exclude_list.extend(last_layers) | |
| variables_to_restore = contrib_framework.get_variables_to_restore( | |
| exclude=exclude_list) | |
| if variables_to_restore: | |
| init_op, init_feed_dict = contrib_framework.assign_from_checkpoint( | |
| tf_initial_checkpoint, | |
| variables_to_restore, | |
| ignore_missing_vars=ignore_missing_vars) | |
| global_step = tf.train.get_or_create_global_step() | |
| def restore_fn(sess): | |
| sess.run(init_op, init_feed_dict) | |
| sess.run([global_step]) | |
| return restore_fn | |
| return None | |
| def get_model_gradient_multipliers(last_layers, last_layer_gradient_multiplier): | |
| """Gets the gradient multipliers. | |
| The gradient multipliers will adjust the learning rates for model | |
| variables. For the task of semantic segmentation, the models are | |
| usually fine-tuned from the models trained on the task of image | |
| classification. To fine-tune the models, we usually set larger (e.g., | |
| 10 times larger) learning rate for the parameters of last layer. | |
| Args: | |
| last_layers: Scopes of last layers. | |
| last_layer_gradient_multiplier: The gradient multiplier for last layers. | |
| Returns: | |
| The gradient multiplier map with variables as key, and multipliers as value. | |
| """ | |
| gradient_multipliers = {} | |
| for var in tf.model_variables(): | |
| # Double the learning rate for biases. | |
| if 'biases' in var.op.name: | |
| gradient_multipliers[var.op.name] = 2. | |
| # Use larger learning rate for last layer variables. | |
| for layer in last_layers: | |
| if layer in var.op.name and 'biases' in var.op.name: | |
| gradient_multipliers[var.op.name] = 2 * last_layer_gradient_multiplier | |
| break | |
| elif layer in var.op.name: | |
| gradient_multipliers[var.op.name] = last_layer_gradient_multiplier | |
| break | |
| return gradient_multipliers | |
| def get_model_learning_rate(learning_policy, | |
| base_learning_rate, | |
| learning_rate_decay_step, | |
| learning_rate_decay_factor, | |
| training_number_of_steps, | |
| learning_power, | |
| slow_start_step, | |
| slow_start_learning_rate, | |
| slow_start_burnin_type='none', | |
| decay_steps=0.0, | |
| end_learning_rate=0.0, | |
| boundaries=None, | |
| boundary_learning_rates=None): | |
| """Gets model's learning rate. | |
| Computes the model's learning rate for different learning policy. | |
| Right now, only "step" and "poly" are supported. | |
| (1) The learning policy for "step" is computed as follows: | |
| current_learning_rate = base_learning_rate * | |
| learning_rate_decay_factor ^ (global_step / learning_rate_decay_step) | |
| See tf.train.exponential_decay for details. | |
| (2) The learning policy for "poly" is computed as follows: | |
| current_learning_rate = base_learning_rate * | |
| (1 - global_step / training_number_of_steps) ^ learning_power | |
| Args: | |
| learning_policy: Learning rate policy for training. | |
| base_learning_rate: The base learning rate for model training. | |
| learning_rate_decay_step: Decay the base learning rate at a fixed step. | |
| learning_rate_decay_factor: The rate to decay the base learning rate. | |
| training_number_of_steps: Number of steps for training. | |
| learning_power: Power used for 'poly' learning policy. | |
| slow_start_step: Training model with small learning rate for the first | |
| few steps. | |
| slow_start_learning_rate: The learning rate employed during slow start. | |
| slow_start_burnin_type: The burnin type for the slow start stage. Can be | |
| `none` which means no burnin or `linear` which means the learning rate | |
| increases linearly from slow_start_learning_rate and reaches | |
| base_learning_rate after slow_start_steps. | |
| decay_steps: Float, `decay_steps` for polynomial learning rate. | |
| end_learning_rate: Float, `end_learning_rate` for polynomial learning rate. | |
| boundaries: A list of `Tensor`s or `int`s or `float`s with strictly | |
| increasing entries. | |
| boundary_learning_rates: A list of `Tensor`s or `float`s or `int`s that | |
| specifies the values for the intervals defined by `boundaries`. It should | |
| have one more element than `boundaries`, and all elements should have the | |
| same type. | |
| Returns: | |
| Learning rate for the specified learning policy. | |
| Raises: | |
| ValueError: If learning policy or slow start burnin type is not recognized. | |
| ValueError: If `boundaries` and `boundary_learning_rates` are not set for | |
| multi_steps learning rate decay. | |
| """ | |
| global_step = tf.train.get_or_create_global_step() | |
| adjusted_global_step = tf.maximum(global_step - slow_start_step, 0) | |
| if decay_steps == 0.0: | |
| tf.logging.info('Setting decay_steps to total training steps.') | |
| decay_steps = training_number_of_steps - slow_start_step | |
| if learning_policy == 'step': | |
| learning_rate = tf.train.exponential_decay( | |
| base_learning_rate, | |
| adjusted_global_step, | |
| learning_rate_decay_step, | |
| learning_rate_decay_factor, | |
| staircase=True) | |
| elif learning_policy == 'poly': | |
| learning_rate = tf.train.polynomial_decay( | |
| base_learning_rate, | |
| adjusted_global_step, | |
| decay_steps=decay_steps, | |
| end_learning_rate=end_learning_rate, | |
| power=learning_power) | |
| elif learning_policy == 'cosine': | |
| learning_rate = tf.train.cosine_decay( | |
| base_learning_rate, | |
| adjusted_global_step, | |
| training_number_of_steps - slow_start_step) | |
| elif learning_policy == 'multi_steps': | |
| if boundaries is None or boundary_learning_rates is None: | |
| raise ValueError('Must set `boundaries` and `boundary_learning_rates` ' | |
| 'for multi_steps learning rate decay.') | |
| learning_rate = tf.train.piecewise_constant_decay( | |
| adjusted_global_step, | |
| boundaries, | |
| boundary_learning_rates) | |
| else: | |
| raise ValueError('Unknown learning policy.') | |
| adjusted_slow_start_learning_rate = slow_start_learning_rate | |
| if slow_start_burnin_type == 'linear': | |
| # Do linear burnin. Increase linearly from slow_start_learning_rate and | |
| # reach base_learning_rate after (global_step >= slow_start_steps). | |
| adjusted_slow_start_learning_rate = ( | |
| slow_start_learning_rate + | |
| (base_learning_rate - slow_start_learning_rate) * | |
| tf.to_float(global_step) / slow_start_step) | |
| elif slow_start_burnin_type != 'none': | |
| raise ValueError('Unknown burnin type.') | |
| # Employ small learning rate at the first few steps for warm start. | |
| return tf.where(global_step < slow_start_step, | |
| adjusted_slow_start_learning_rate, learning_rate) | |