Spaces:
Running
on
Zero
Running
on
Zero
File size: 9,089 Bytes
bd9eb72 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 |
# Copyright (C) 2025, FaceLift Research Group
# https://github.com/weijielyu/FaceLift
#
# This software is free for non-commercial, research and evaluation use
# under the terms of the LICENSE.md file.
#
# For inquiries contact: wlyu3@ucmerced.edu
"""
Face detection and cropping utilities for 3D face reconstruction.
This module provides functions for face detection, cropping, and preprocessing
to align faces with training data specifications.
"""
from typing import Tuple, Optional, Dict, Any
import numpy as np
import torch
from PIL import Image
from facenet_pytorch import MTCNN
from rembg import remove
# Training set face parameters (derived from training data statistics)
TRAINING_SET_FACE_SIZE = 194.2749650813705
TRAINING_SET_FACE_CENTER = [251.83270369057132, 280.0133630862363]
# Public constants for external use
FACE_SIZE = TRAINING_SET_FACE_SIZE
FACE_CENTER = TRAINING_SET_FACE_CENTER
DEFAULT_BACKGROUND_COLOR = (255, 255, 255)
DEFAULT_IMG_SIZE = 512
# Device setup
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Default face detector instance
FACE_DETECTOR = MTCNN(
image_size=512,
margin=0,
min_face_size=20,
thresholds=[0.6, 0.7, 0.7],
factor=0.709,
post_process=True,
device=DEVICE
)
def select_face(detected_bounding_boxes: Optional[np.ndarray], confidence_scores: Optional[np.ndarray]) -> Optional[np.ndarray]:
"""
Select the largest face from detected faces with confidence above threshold.
Args:
detected_bounding_boxes: Detected bounding boxes in xyxy format
confidence_scores: Detection confidence probabilities
Returns:
Selected bounding box or None if no suitable face found
"""
if detected_bounding_boxes is None or confidence_scores is None:
return None
# Filter faces with confidence > 0.8
high_confidence_faces = [
detected_bounding_boxes[i] for i in range(len(detected_bounding_boxes))
if confidence_scores[i] > 0.8
]
if not high_confidence_faces:
return None
# Return the largest face (by area)
return max(high_confidence_faces, key=lambda bbox: (bbox[3] - bbox[1]) * (bbox[2] - bbox[0]))
def crop_face(
input_image_array: np.ndarray,
face_detector: MTCNN = FACE_DETECTOR,
target_face_size: float = FACE_SIZE,
target_face_center: list = FACE_CENTER,
output_image_size: int = 512,
background_color: Tuple[int, int, int] = (255, 255, 255)
) -> Tuple[Image.Image, Dict[str, Any]]:
"""
Crop and align face in image to match training data specifications.
Args:
input_image_array: Input image as numpy array (H, W, C)
face_detector: MTCNN face detector instance
target_face_size: Target face size from training data
target_face_center: Target face center from training data
output_image_size: Output image size
background_color: Background color for padding
Returns:
Tuple of (cropped_image, crop_parameters)
Raises:
ValueError: If no face is detected in the image
"""
image_height, image_width, _ = input_image_array.shape
# Handle RGBA images by compositing with background color
if input_image_array.shape[2] == 4:
rgba_pil_image = Image.fromarray(input_image_array)
background_image = Image.new("RGB", rgba_pil_image.size, background_color)
rgb_composite_image = Image.alpha_composite(background_image.convert("RGBA"), rgba_pil_image).convert("RGB")
processed_image_array = np.array(rgb_composite_image)
else:
processed_image_array = input_image_array[:, :, :3] # Ensure RGB format
# Detect and select face
detected_bounding_boxes, confidence_scores = face_detector.detect(processed_image_array)
selected_face_bbox = select_face(detected_bounding_boxes, confidence_scores)
if selected_face_bbox is None:
raise ValueError("No face detected in the image")
# Calculate detected face properties
detected_face_size = 0.5 * (selected_face_bbox[2] - selected_face_bbox[0] + selected_face_bbox[3] - selected_face_bbox[1])
detected_face_center = (
0.5 * (selected_face_bbox[0] + selected_face_bbox[2]),
0.5 * (selected_face_bbox[1] + selected_face_bbox[3])
)
# Scale image to match training face size
scale_ratio = target_face_size / detected_face_size
scaled_width, scaled_height = int(image_width * scale_ratio), int(image_height * scale_ratio)
scaled_pil_image = Image.fromarray(processed_image_array).resize((scaled_width, scaled_height))
scaled_face_center = (
int(detected_face_center[0] * scale_ratio),
int(detected_face_center[1] * scale_ratio)
)
# Create output image with background
output_image = Image.new("RGB", (output_image_size, output_image_size), color=background_color)
# Calculate alignment offsets
horizontal_offset = target_face_center[0] - scaled_face_center[0]
vertical_offset = target_face_center[1] - scaled_face_center[1]
# Calculate crop boundaries
crop_left_boundary = int(max(0, -horizontal_offset))
crop_top_boundary = int(max(0, -vertical_offset))
crop_right_boundary = int(min(scaled_width, output_image_size - horizontal_offset))
crop_bottom_boundary = int(min(scaled_height, output_image_size - vertical_offset))
# Crop and paste
cropped_face_image = scaled_pil_image.crop((crop_left_boundary, crop_top_boundary, crop_right_boundary, crop_bottom_boundary))
paste_coordinates = (int(max(0, horizontal_offset)), int(max(0, vertical_offset)))
output_image.paste(cropped_face_image, paste_coordinates)
crop_parameters = {
'resize_ratio': scale_ratio,
'x_offset_left': horizontal_offset,
'y_offset_top': vertical_offset,
}
return output_image, crop_parameters
def prepare_foreground_with_rembg(input_image_array: np.ndarray) -> np.ndarray:
"""
Prepare foreground image using rembg for background removal.
Args:
input_image_array: Input image as numpy array (H, W, C)
Returns:
RGBA image as numpy array with background removed
"""
pil_image = Image.fromarray(input_image_array)
background_removed_image = remove(pil_image)
processed_image_array = np.array(background_removed_image)
# Ensure RGBA format
if processed_image_array.shape[2] == 4:
return processed_image_array
elif processed_image_array.shape[2] == 3:
height, width = processed_image_array.shape[:2]
alpha_channel = np.full((height, width), 255, dtype=np.uint8)
rgba_image = np.zeros((height, width, 4), dtype=np.uint8)
rgba_image[:, :, :3] = processed_image_array
rgba_image[:, :, 3] = alpha_channel
return rgba_image
return processed_image_array
def preprocess_image(
original_image_array: np.ndarray,
target_image_size: int = DEFAULT_IMG_SIZE,
background_color: Tuple[int, int, int] = DEFAULT_BACKGROUND_COLOR
) -> Image.Image:
"""
Preprocess image with background removal and face cropping.
Args:
original_image_array: Input image as numpy array
target_image_size: Target image size
background_color: Background color for compositing
Returns:
Processed PIL Image
"""
processed_image_array = prepare_foreground_with_rembg(original_image_array)
# Convert RGBA to RGB with specified background
if processed_image_array.shape[2] == 4:
rgba_pil_image = Image.fromarray(processed_image_array)
background_image = Image.new("RGB", rgba_pil_image.size, background_color)
rgb_composite_image = Image.alpha_composite(background_image.convert("RGBA"), rgba_pil_image).convert("RGB")
processed_image_array = np.array(rgb_composite_image)
cropped_image, crop_parameters = crop_face(
processed_image_array,
FACE_DETECTOR,
FACE_SIZE,
FACE_CENTER,
target_image_size,
background_color
)
return cropped_image
def preprocess_image_without_cropping(
original_image_array: np.ndarray,
target_image_size: int = DEFAULT_IMG_SIZE,
background_color: Tuple[int, int, int] = DEFAULT_BACKGROUND_COLOR
) -> Image.Image:
"""
Preprocess image with background removal, without face cropping.
Args:
original_image_array: Input image as numpy array
target_image_size: Target image size
background_color: Background color for compositing
Returns:
Processed PIL Image
"""
processed_image_array = prepare_foreground_with_rembg(original_image_array)
resized_image = Image.fromarray(processed_image_array).resize((target_image_size, target_image_size))
background_image = Image.new("RGBA", (target_image_size, target_image_size), background_color)
composite_image = Image.alpha_composite(background_image, resized_image).convert("RGB")
return composite_image
|