| syntax = "proto2"; | |
| package object_detection.protos; | |
| // Message for defining a preprocessing operation on input data. | |
| // See: //third_party/tensorflow_models/object_detection/core/preprocessor.py | |
| // Next ID: 38 | |
| message PreprocessingStep { | |
| oneof preprocessing_step { | |
| NormalizeImage normalize_image = 1; | |
| RandomHorizontalFlip random_horizontal_flip = 2; | |
| RandomPixelValueScale random_pixel_value_scale = 3; | |
| RandomImageScale random_image_scale = 4; | |
| RandomRGBtoGray random_rgb_to_gray = 5; | |
| RandomAdjustBrightness random_adjust_brightness = 6; | |
| RandomAdjustContrast random_adjust_contrast = 7; | |
| RandomAdjustHue random_adjust_hue = 8; | |
| RandomAdjustSaturation random_adjust_saturation = 9; | |
| RandomDistortColor random_distort_color = 10; | |
| RandomJitterBoxes random_jitter_boxes = 11; | |
| RandomCropImage random_crop_image = 12; | |
| RandomPadImage random_pad_image = 13; | |
| RandomCropPadImage random_crop_pad_image = 14; | |
| RandomCropToAspectRatio random_crop_to_aspect_ratio = 15; | |
| RandomBlackPatches random_black_patches = 16; | |
| RandomResizeMethod random_resize_method = 17; | |
| ScaleBoxesToPixelCoordinates scale_boxes_to_pixel_coordinates = 18; | |
| ResizeImage resize_image = 19; | |
| SubtractChannelMean subtract_channel_mean = 20; | |
| SSDRandomCrop ssd_random_crop = 21; | |
| SSDRandomCropPad ssd_random_crop_pad = 22; | |
| SSDRandomCropFixedAspectRatio ssd_random_crop_fixed_aspect_ratio = 23; | |
| SSDRandomCropPadFixedAspectRatio ssd_random_crop_pad_fixed_aspect_ratio = 24; | |
| RandomVerticalFlip random_vertical_flip = 25; | |
| RandomRotation90 random_rotation90 = 26; | |
| RGBtoGray rgb_to_gray = 27; | |
| ConvertClassLogitsToSoftmax convert_class_logits_to_softmax = 28; | |
| RandomAbsolutePadImage random_absolute_pad_image = 29; | |
| RandomSelfConcatImage random_self_concat_image = 30; | |
| AutoAugmentImage autoaugment_image = 31; | |
| DropLabelProbabilistically drop_label_probabilistically = 32; | |
| RemapLabels remap_labels = 33; | |
| RandomJpegQuality random_jpeg_quality = 34; | |
| RandomDownscaleToTargetPixels random_downscale_to_target_pixels = 35; | |
| RandomPatchGaussian random_patch_gaussian = 36; | |
| RandomSquareCropByScale random_square_crop_by_scale = 37; | |
| } | |
| } | |
| // Normalizes pixel values in an image. | |
| // For every channel in the image, moves the pixel values from the range | |
| // [original_minval, original_maxval] to [target_minval, target_maxval]. | |
| message NormalizeImage { | |
| optional float original_minval = 1; | |
| optional float original_maxval = 2; | |
| optional float target_minval = 3 [default=0]; | |
| optional float target_maxval = 4 [default=1]; | |
| } | |
| // Randomly horizontally flips the image and detections 50% of the time. | |
| message RandomHorizontalFlip { | |
| // Specifies a mapping from the original keypoint indices to horizontally | |
| // flipped indices. This is used in the event that keypoints are specified, | |
| // in which case when the image is horizontally flipped the keypoints will | |
| // need to be permuted. E.g. for keypoints representing left_eye, right_eye, | |
| // nose_tip, mouth, left_ear, right_ear (in that order), one might specify | |
| // the keypoint_flip_permutation below: | |
| // keypoint_flip_permutation: 1 | |
| // keypoint_flip_permutation: 0 | |
| // keypoint_flip_permutation: 2 | |
| // keypoint_flip_permutation: 3 | |
| // keypoint_flip_permutation: 5 | |
| // keypoint_flip_permutation: 4 | |
| repeated int32 keypoint_flip_permutation = 1; | |
| } | |
| // Randomly vertically flips the image and detections 50% of the time. | |
| message RandomVerticalFlip { | |
| // Specifies a mapping from the original keypoint indices to vertically | |
| // flipped indices. This is used in the event that keypoints are specified, | |
| // in which case when the image is vertically flipped the keypoints will | |
| // need to be permuted. E.g. for keypoints representing left_eye, right_eye, | |
| // nose_tip, mouth, left_ear, right_ear (in that order), one might specify | |
| // the keypoint_flip_permutation below: | |
| // keypoint_flip_permutation: 1 | |
| // keypoint_flip_permutation: 0 | |
| // keypoint_flip_permutation: 2 | |
| // keypoint_flip_permutation: 3 | |
| // keypoint_flip_permutation: 5 | |
| // keypoint_flip_permutation: 4 | |
| repeated int32 keypoint_flip_permutation = 1; | |
| } | |
| // Randomly rotates the image and detections by 90 degrees counter-clockwise | |
| // 50% of the time. | |
| message RandomRotation90 {} | |
| // Randomly scales the values of all pixels in the image by some constant value | |
| // between [minval, maxval], then clip the value to a range between [0, 1.0]. | |
| message RandomPixelValueScale { | |
| optional float minval = 1 [default=0.9]; | |
| optional float maxval = 2 [default=1.1]; | |
| } | |
| // Randomly enlarges or shrinks image (keeping aspect ratio). | |
| message RandomImageScale { | |
| optional float min_scale_ratio = 1 [default=0.5]; | |
| optional float max_scale_ratio = 2 [default=2.0]; | |
| } | |
| // Randomly convert entire image to grey scale. | |
| message RandomRGBtoGray { | |
| optional float probability = 1 [default=0.1]; | |
| } | |
| // Randomly changes image brightness by up to max_delta. Image outputs will be | |
| // saturated between 0 and 1. | |
| message RandomAdjustBrightness { | |
| optional float max_delta=1 [default=0.2]; | |
| } | |
| // Randomly scales contract by a value between [min_delta, max_delta]. | |
| message RandomAdjustContrast { | |
| optional float min_delta = 1 [default=0.8]; | |
| optional float max_delta = 2 [default=1.25]; | |
| } | |
| // Randomly alters hue by a value of up to max_delta. | |
| message RandomAdjustHue { | |
| optional float max_delta = 1 [default=0.02]; | |
| } | |
| // Randomly changes saturation by a value between [min_delta, max_delta]. | |
| message RandomAdjustSaturation { | |
| optional float min_delta = 1 [default=0.8]; | |
| optional float max_delta = 2 [default=1.25]; | |
| } | |
| // Performs a random color distortion. color_orderings should either be 0 or 1. | |
| message RandomDistortColor { | |
| optional int32 color_ordering = 1; | |
| } | |
| // Randomly jitters corners of boxes in the image determined by ratio. | |
| // ie. If a box is [100, 200] and ratio is 0.02, the corners can move by [1, 4]. | |
| message RandomJitterBoxes { | |
| optional float ratio = 1 [default=0.05]; | |
| } | |
| // Randomly crops the image and bounding boxes. | |
| message RandomCropImage { | |
| // Cropped image must cover at least one box by this fraction. | |
| optional float min_object_covered = 1 [default=1.0]; | |
| // Aspect ratio bounds of cropped image. | |
| optional float min_aspect_ratio = 2 [default=0.75]; | |
| optional float max_aspect_ratio = 3 [default=1.33]; | |
| // Allowed area ratio of cropped image to original image. | |
| optional float min_area = 4 [default=0.1]; | |
| optional float max_area = 5 [default=1.0]; | |
| // Minimum overlap threshold of cropped boxes to keep in new image. If the | |
| // ratio between a cropped bounding box and the original is less than this | |
| // value, it is removed from the new image. | |
| optional float overlap_thresh = 6 [default=0.3]; | |
| // Whether to clip the boxes to the cropped image. | |
| optional bool clip_boxes = 8 [default=true]; | |
| // Probability of keeping the original image. | |
| optional float random_coef = 7 [default=0.0]; | |
| } | |
| // Randomly adds padding to the image. | |
| message RandomPadImage { | |
| // Minimum dimensions for padded image. If unset, will use original image | |
| // dimension as a lower bound. | |
| optional int32 min_image_height = 1; | |
| optional int32 min_image_width = 2; | |
| // Maximum dimensions for padded image. If unset, will use double the original | |
| // image dimension as a lower bound. | |
| optional int32 max_image_height = 3; | |
| optional int32 max_image_width = 4; | |
| // Color of the padding. If unset, will pad using average color of the input | |
| // image. | |
| repeated float pad_color = 5; | |
| } | |
| // Randomly adds a padding of size [0, max_height_padding), [0, max_width_padding). | |
| message RandomAbsolutePadImage { | |
| // Height will be padded uniformly at random from [0, max_height_padding). | |
| optional int32 max_height_padding = 1; | |
| // Width will be padded uniformly at random from [0, max_width_padding). | |
| optional int32 max_width_padding = 2; | |
| // Color of the padding. If unset, will pad using average color of the input | |
| // image. | |
| repeated float pad_color = 3; | |
| } | |
| // Randomly crops an image followed by a random pad. | |
| message RandomCropPadImage { | |
| // Cropping operation must cover at least one box by this fraction. | |
| optional float min_object_covered = 1 [default=1.0]; | |
| // Aspect ratio bounds of image after cropping operation. | |
| optional float min_aspect_ratio = 2 [default=0.75]; | |
| optional float max_aspect_ratio = 3 [default=1.33]; | |
| // Allowed area ratio of image after cropping operation. | |
| optional float min_area = 4 [default=0.1]; | |
| optional float max_area = 5 [default=1.0]; | |
| // Minimum overlap threshold of cropped boxes to keep in new image. If the | |
| // ratio between a cropped bounding box and the original is less than this | |
| // value, it is removed from the new image. | |
| optional float overlap_thresh = 6 [default=0.3]; | |
| // Whether to clip the boxes to the cropped image. | |
| optional bool clip_boxes = 11 [default=true]; | |
| // Probability of keeping the original image during the crop operation. | |
| optional float random_coef = 7 [default=0.0]; | |
| // Maximum dimensions for padded image. If unset, will use double the original | |
| // image dimension as a lower bound. Both of the following fields should be | |
| // length 2. | |
| repeated float min_padded_size_ratio = 8; | |
| repeated float max_padded_size_ratio = 9; | |
| // Color of the padding. If unset, will pad using average color of the input | |
| // image. This field should be of length 3. | |
| repeated float pad_color = 10; | |
| } | |
| // Randomly crops an iamge to a given aspect ratio. | |
| message RandomCropToAspectRatio { | |
| // Aspect ratio. | |
| optional float aspect_ratio = 1 [default=1.0]; | |
| // Minimum overlap threshold of cropped boxes to keep in new image. If the | |
| // ratio between a cropped bounding box and the original is less than this | |
| // value, it is removed from the new image. | |
| optional float overlap_thresh = 2 [default=0.3]; | |
| // Whether to clip the boxes to the cropped image. | |
| optional bool clip_boxes = 3 [default=true]; | |
| } | |
| // Randomly adds black square patches to an image. | |
| message RandomBlackPatches { | |
| // The maximum number of black patches to add. | |
| optional int32 max_black_patches = 1 [default=10]; | |
| // The probability of a black patch being added to an image. | |
| optional float probability = 2 [default=0.5]; | |
| // Ratio between the dimension of the black patch to the minimum dimension of | |
| // the image (patch_width = patch_height = min(image_height, image_width)). | |
| optional float size_to_image_ratio = 3 [default=0.1]; | |
| } | |
| // Randomly resizes the image up to [target_height, target_width]. | |
| message RandomResizeMethod { | |
| optional int32 target_height = 1; | |
| optional int32 target_width = 2; | |
| } | |
| // Converts the RGB image to a grayscale image. This also converts the image | |
| // depth from 3 to 1, unlike RandomRGBtoGray which does not change the image | |
| // depth. | |
| message RGBtoGray {} | |
| // Scales boxes from normalized coordinates to pixel coordinates. | |
| message ScaleBoxesToPixelCoordinates { | |
| } | |
| // Resizes images to [new_height, new_width]. | |
| message ResizeImage { | |
| optional int32 new_height = 1; | |
| optional int32 new_width = 2; | |
| enum Method { | |
| AREA=1; | |
| BICUBIC=2; | |
| BILINEAR=3; | |
| NEAREST_NEIGHBOR=4; | |
| } | |
| optional Method method = 3 [default=BILINEAR]; | |
| } | |
| // Normalizes an image by subtracting a mean from each channel. | |
| message SubtractChannelMean { | |
| // The mean to subtract from each channel. Should be of same dimension of | |
| // channels in the input image. | |
| repeated float means = 1; | |
| } | |
| message SSDRandomCropOperation { | |
| // Cropped image must cover at least this fraction of one original bounding | |
| // box. | |
| optional float min_object_covered = 1; | |
| // The aspect ratio of the cropped image must be within the range of | |
| // [min_aspect_ratio, max_aspect_ratio]. | |
| optional float min_aspect_ratio = 2; | |
| optional float max_aspect_ratio = 3; | |
| // The area of the cropped image must be within the range of | |
| // [min_area, max_area]. | |
| optional float min_area = 4; | |
| optional float max_area = 5; | |
| // Cropped box area ratio must be above this threhold to be kept. | |
| optional float overlap_thresh = 6; | |
| // Whether to clip the boxes to the cropped image. | |
| optional bool clip_boxes = 8 [default=true]; | |
| // Probability a crop operation is skipped. | |
| optional float random_coef = 7; | |
| } | |
| // Randomly crops a image according to: | |
| // Liu et al., SSD: Single shot multibox detector. | |
| // This preprocessing step defines multiple SSDRandomCropOperations. Only one | |
| // operation (chosen at random) is actually performed on an image. | |
| message SSDRandomCrop { | |
| repeated SSDRandomCropOperation operations = 1; | |
| } | |
| message SSDRandomCropPadOperation { | |
| // Cropped image must cover at least this fraction of one original bounding | |
| // box. | |
| optional float min_object_covered = 1; | |
| // The aspect ratio of the cropped image must be within the range of | |
| // [min_aspect_ratio, max_aspect_ratio]. | |
| optional float min_aspect_ratio = 2; | |
| optional float max_aspect_ratio = 3; | |
| // The area of the cropped image must be within the range of | |
| // [min_area, max_area]. | |
| optional float min_area = 4; | |
| optional float max_area = 5; | |
| // Cropped box area ratio must be above this threhold to be kept. | |
| optional float overlap_thresh = 6; | |
| // Whether to clip the boxes to the cropped image. | |
| optional bool clip_boxes = 13 [default=true]; | |
| // Probability a crop operation is skipped. | |
| optional float random_coef = 7; | |
| // Min ratio of padded image height and width to the input image's height and | |
| // width. Two entries per operation. | |
| repeated float min_padded_size_ratio = 8; | |
| // Max ratio of padded image height and width to the input image's height and | |
| // width. Two entries per operation. | |
| repeated float max_padded_size_ratio = 9; | |
| // Padding color. | |
| optional float pad_color_r = 10; | |
| optional float pad_color_g = 11; | |
| optional float pad_color_b = 12; | |
| } | |
| // Randomly crops and pads an image according to: | |
| // Liu et al., SSD: Single shot multibox detector. | |
| // This preprocessing step defines multiple SSDRandomCropPadOperations. Only one | |
| // operation (chosen at random) is actually performed on an image. | |
| message SSDRandomCropPad { | |
| repeated SSDRandomCropPadOperation operations = 1; | |
| } | |
| message SSDRandomCropFixedAspectRatioOperation { | |
| // Cropped image must cover at least this fraction of one original bounding | |
| // box. | |
| optional float min_object_covered = 1; | |
| // The area of the cropped image must be within the range of | |
| // [min_area, max_area]. | |
| optional float min_area = 4; | |
| optional float max_area = 5; | |
| // Cropped box area ratio must be above this threhold to be kept. | |
| optional float overlap_thresh = 6; | |
| // Whether to clip the boxes to the cropped image. | |
| optional bool clip_boxes = 8 [default=true]; | |
| // Probability a crop operation is skipped. | |
| optional float random_coef = 7; | |
| } | |
| // Randomly crops a image to a fixed aspect ratio according to: | |
| // Liu et al., SSD: Single shot multibox detector. | |
| // Multiple SSDRandomCropFixedAspectRatioOperations are defined by this | |
| // preprocessing step. Only one operation (chosen at random) is actually | |
| // performed on an image. | |
| message SSDRandomCropFixedAspectRatio { | |
| repeated SSDRandomCropFixedAspectRatioOperation operations = 1; | |
| // Aspect ratio to crop to. This value is used for all crop operations. | |
| optional float aspect_ratio = 2 [default=1.0]; | |
| } | |
| message SSDRandomCropPadFixedAspectRatioOperation { | |
| // Cropped image must cover at least this fraction of one original bounding | |
| // box. | |
| optional float min_object_covered = 1; | |
| // The aspect ratio of the cropped image must be within the range of | |
| // [min_aspect_ratio, max_aspect_ratio]. | |
| optional float min_aspect_ratio = 2; | |
| optional float max_aspect_ratio = 3; | |
| // The area of the cropped image must be within the range of | |
| // [min_area, max_area]. | |
| optional float min_area = 4; | |
| optional float max_area = 5; | |
| // Cropped box area ratio must be above this threhold to be kept. | |
| optional float overlap_thresh = 6; | |
| // Whether to clip the boxes to the cropped image. | |
| optional bool clip_boxes = 8 [default=true]; | |
| // Probability a crop operation is skipped. | |
| optional float random_coef = 7; | |
| } | |
| // Randomly crops and pads an image to a fixed aspect ratio according to: | |
| // Liu et al., SSD: Single shot multibox detector. | |
| // Multiple SSDRandomCropPadFixedAspectRatioOperations are defined by this | |
| // preprocessing step. Only one operation (chosen at random) is actually | |
| // performed on an image. | |
| message SSDRandomCropPadFixedAspectRatio { | |
| repeated SSDRandomCropPadFixedAspectRatioOperation operations = 1; | |
| // Aspect ratio to pad to. This value is used for all crop and pad operations. | |
| optional float aspect_ratio = 2 [default=1.0]; | |
| // Min ratio of padded image height and width to the input image's height and | |
| // width. Two entries per operation. | |
| repeated float min_padded_size_ratio = 3; | |
| // Max ratio of padded image height and width to the input image's height and | |
| // width. Two entries per operation. | |
| repeated float max_padded_size_ratio = 4; | |
| } | |
| // Converts class logits to softmax optionally scaling the values by temperature | |
| // first. | |
| message ConvertClassLogitsToSoftmax { | |
| // Scale to use on logits before applying softmax. | |
| optional float temperature = 1 [default=1.0]; | |
| } | |
| // Randomly concatenates the image with itself horizontally and/or vertically. | |
| message RandomSelfConcatImage { | |
| // Probability of concatenating the image vertically. | |
| optional float concat_vertical_probability = 1 [default = 0.1]; | |
| // Probability of concatenating the image horizontally. | |
| optional float concat_horizontal_probability = 2 [default = 0.1]; | |
| } | |
| // Apply an Autoaugment policy to the image and bounding boxes. | |
| message AutoAugmentImage { | |
| // What AutoAugment policy to apply to the Image | |
| optional string policy_name = 1 [default="v0"]; | |
| } | |
| // Randomly drops ground truth boxes for a label with some probability. | |
| message DropLabelProbabilistically { | |
| // The label that should be dropped. This corresponds to one of the entries | |
| // in the label map. | |
| optional int32 label = 1; | |
| // Probability of dropping the label. | |
| optional float drop_probability = 2 [default = 1.0]; | |
| } | |
| //Remap a set of labels to a new label. | |
| message RemapLabels { | |
| // Labels to be remapped. | |
| repeated int32 original_labels = 1; | |
| // Label to map to. | |
| optional int32 new_label = 2; | |
| } | |
| // Applies a jpeg encoding with a random quality factor. | |
| message RandomJpegQuality { | |
| // Probability of keeping the original image. | |
| optional float random_coef = 1 [default = 0.0]; | |
| // Minimum jpeg quality to use. | |
| optional int32 min_jpeg_quality = 2 [default = 0]; | |
| // Maximum jpeg quality to use. | |
| optional int32 max_jpeg_quality = 3 [default = 100]; | |
| } | |
| // Randomly shrinks image (keeping aspect ratio) to a target number of pixels. | |
| // If the image contains less than the chosen target number of pixels, it will | |
| // not be changed. | |
| message RandomDownscaleToTargetPixels { | |
| // Probability of keeping the original image. | |
| optional float random_coef = 1 [default = 0.0]; | |
| // The target number of pixels will be chosen to be in the range | |
| // [min_target_pixels, max_target_pixels] | |
| optional int32 min_target_pixels = 2 [default = 300000]; | |
| optional int32 max_target_pixels = 3 [default = 500000]; | |
| } | |
| message RandomPatchGaussian { | |
| // Probability of keeping the original image. | |
| optional float random_coef = 1 [default = 0.0]; | |
| // The patch size will be chosen to be in the range | |
| // [min_patch_size, max_patch_size). | |
| optional int32 min_patch_size = 2 [default = 1]; | |
| optional int32 max_patch_size = 3 [default = 250]; | |
| // The standard deviation of the gaussian noise applied within the patch will | |
| // be chosen to be in the range [min_gaussian_stddev, max_gaussian_stddev). | |
| optional float min_gaussian_stddev = 4 [default = 0.0]; | |
| optional float max_gaussian_stddev = 5 [default = 1.0]; | |
| } | |
| // Extract a square sized crop from an image whose side length is sampled by | |
| // randomly scaling the maximum spatial dimension of the image. If part of the | |
| // crop falls outside the image, it is filled with zeros. | |
| // The augmentation is borrowed from [1] | |
| // [1]: https://arxiv.org/abs/1904.07850 | |
| message RandomSquareCropByScale { | |
| // The maximum size of the border. The border defines distance in pixels to | |
| // the image boundaries that will not be considered as a center of a crop. | |
| // To make sure that the border does not go over the center of the image, | |
| // we chose the border value by computing the minimum k, such that | |
| // (max_border / (2**k)) < image_dimension/2 | |
| optional int32 max_border = 1 [default = 128]; | |
| // The minimum and maximum values of scale. | |
| optional float scale_min = 2 [default=0.6]; | |
| optional float scale_max = 3 [default=1.3]; | |
| // The number of discrete scale values to randomly sample between | |
| // [min_scale, max_scale] | |
| optional int32 num_scales = 4 [default=8]; | |
| } | |