Spaces:
Paused
Paused
Update api/ltx_server_refactored.py
Browse files- api/ltx_server_refactored.py +51 -50
api/ltx_server_refactored.py
CHANGED
|
@@ -93,55 +93,6 @@ from ltx_video.utils.skip_layer_strategy import SkipLayerStrategy
|
|
| 93 |
import ltx_video.pipelines.crf_compressor as crf_compressor
|
| 94 |
|
| 95 |
|
| 96 |
-
|
| 97 |
-
def load_image_to_tensor_with_resize_and_crop(
|
| 98 |
-
image_input: Union[str, Image.Image],
|
| 99 |
-
target_height: int = 512,
|
| 100 |
-
target_width: int = 768,
|
| 101 |
-
just_crop: bool = False,
|
| 102 |
-
) -> torch.Tensor:
|
| 103 |
-
"""Load and process an image into a tensor.
|
| 104 |
-
|
| 105 |
-
Args:
|
| 106 |
-
image_input: Either a file path (str) or a PIL Image object
|
| 107 |
-
target_height: Desired height of output tensor
|
| 108 |
-
target_width: Desired width of output tensor
|
| 109 |
-
just_crop: If True, only crop the image to the target size without resizing
|
| 110 |
-
"""
|
| 111 |
-
if isinstance(image_input, str):
|
| 112 |
-
image = Image.open(image_input).convert("RGB")
|
| 113 |
-
elif isinstance(image_input, Image.Image):
|
| 114 |
-
image = image_input
|
| 115 |
-
else:
|
| 116 |
-
raise ValueError("image_input must be either a file path or a PIL Image object")
|
| 117 |
-
|
| 118 |
-
input_width, input_height = image.size
|
| 119 |
-
aspect_ratio_target = target_width / target_height
|
| 120 |
-
aspect_ratio_frame = input_width / input_height
|
| 121 |
-
if aspect_ratio_frame > aspect_ratio_target:
|
| 122 |
-
new_width = int(input_height * aspect_ratio_target)
|
| 123 |
-
new_height = input_height
|
| 124 |
-
x_start = (input_width - new_width) // 2
|
| 125 |
-
y_start = 0
|
| 126 |
-
else:
|
| 127 |
-
new_width = input_width
|
| 128 |
-
new_height = int(input_width / aspect_ratio_target)
|
| 129 |
-
x_start = 0
|
| 130 |
-
y_start = (input_height - new_height) // 2
|
| 131 |
-
|
| 132 |
-
image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
|
| 133 |
-
if not just_crop:
|
| 134 |
-
image = image.resize((target_width, target_height))
|
| 135 |
-
|
| 136 |
-
image = np.array(image)
|
| 137 |
-
image = cv2.GaussianBlur(image, (3, 3), 0)
|
| 138 |
-
frame_tensor = torch.from_numpy(image).float()
|
| 139 |
-
frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
|
| 140 |
-
frame_tensor = frame_tensor.permute(2, 0, 1)
|
| 141 |
-
frame_tensor = (frame_tensor / 127.5) - 1.0
|
| 142 |
-
# Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
|
| 143 |
-
return frame_tensor.unsqueeze(0).unsqueeze(2)
|
| 144 |
-
|
| 145 |
def create_latent_upsampler(latent_upsampler_model_path: str, device: str):
|
| 146 |
latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
|
| 147 |
latent_upsampler.to(device)
|
|
@@ -303,6 +254,56 @@ class VideoService:
|
|
| 303 |
# --- Métodos Públicos (API do Serviço) ---
|
| 304 |
# --------------------------------------------------------------------------
|
| 305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
def generate_low_resolution1(self, prompt: str, negative_prompt: str, height: int, width: int, duration_secs: float, guidance_scale: float, seed: Optional[int] = None, conditioning_items: Optional[List[PatchedConditioningItem]] = None) -> Tuple[str, str, int]:
|
| 308 |
"""
|
|
@@ -698,7 +699,7 @@ class VideoService:
|
|
| 698 |
|
| 699 |
def _prepare_conditioning_tensor_from_path(self, filepath: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
|
| 700 |
"""Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo."""
|
| 701 |
-
tensor =
|
| 702 |
tensor = F.pad(tensor, padding)
|
| 703 |
return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
|
| 704 |
|
|
|
|
| 93 |
import ltx_video.pipelines.crf_compressor as crf_compressor
|
| 94 |
|
| 95 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
def create_latent_upsampler(latent_upsampler_model_path: str, device: str):
|
| 97 |
latent_upsampler = LatentUpsampler.from_pretrained(latent_upsampler_model_path)
|
| 98 |
latent_upsampler.to(device)
|
|
|
|
| 254 |
# --- Métodos Públicos (API do Serviço) ---
|
| 255 |
# --------------------------------------------------------------------------
|
| 256 |
|
| 257 |
+
def _load_image_to_tensor_with_resize_and_crop(
|
| 258 |
+
self,
|
| 259 |
+
image_input: Union[str, Image.Image],
|
| 260 |
+
target_height: int = 512,
|
| 261 |
+
target_width: int = 768,
|
| 262 |
+
just_crop: bool = False,
|
| 263 |
+
) -> torch.Tensor:
|
| 264 |
+
"""Load and process an image into a tensor.
|
| 265 |
+
|
| 266 |
+
Args:
|
| 267 |
+
image_input: Either a file path (str) or a PIL Image object
|
| 268 |
+
target_height: Desired height of output tensor
|
| 269 |
+
target_width: Desired width of output tensor
|
| 270 |
+
just_crop: If True, only crop the image to the target size without resizing
|
| 271 |
+
"""
|
| 272 |
+
if isinstance(image_input, str):
|
| 273 |
+
image = Image.open(image_input).convert("RGB")
|
| 274 |
+
elif isinstance(image_input, Image.Image):
|
| 275 |
+
image = image_input
|
| 276 |
+
else:
|
| 277 |
+
raise ValueError("image_input must be either a file path or a PIL Image object")
|
| 278 |
+
|
| 279 |
+
input_width, input_height = image.size
|
| 280 |
+
aspect_ratio_target = target_width / target_height
|
| 281 |
+
aspect_ratio_frame = input_width / input_height
|
| 282 |
+
if aspect_ratio_frame > aspect_ratio_target:
|
| 283 |
+
new_width = int(input_height * aspect_ratio_target)
|
| 284 |
+
new_height = input_height
|
| 285 |
+
x_start = (input_width - new_width) // 2
|
| 286 |
+
y_start = 0
|
| 287 |
+
else:
|
| 288 |
+
new_width = input_width
|
| 289 |
+
new_height = int(input_width / aspect_ratio_target)
|
| 290 |
+
x_start = 0
|
| 291 |
+
y_start = (input_height - new_height) // 2
|
| 292 |
+
|
| 293 |
+
image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
|
| 294 |
+
if not just_crop:
|
| 295 |
+
image = image.resize((target_width, target_height))
|
| 296 |
+
|
| 297 |
+
image = np.array(image)
|
| 298 |
+
image = cv2.GaussianBlur(image, (3, 3), 0)
|
| 299 |
+
frame_tensor = torch.from_numpy(image).float()
|
| 300 |
+
frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
|
| 301 |
+
frame_tensor = frame_tensor.permute(2, 0, 1)
|
| 302 |
+
frame_tensor = (frame_tensor / 127.5) - 1.0
|
| 303 |
+
# Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
|
| 304 |
+
return frame_tensor.unsqueeze(0).unsqueeze(2)
|
| 305 |
+
|
| 306 |
+
|
| 307 |
|
| 308 |
def generate_low_resolution1(self, prompt: str, negative_prompt: str, height: int, width: int, duration_secs: float, guidance_scale: float, seed: Optional[int] = None, conditioning_items: Optional[List[PatchedConditioningItem]] = None) -> Tuple[str, str, int]:
|
| 309 |
"""
|
|
|
|
| 699 |
|
| 700 |
def _prepare_conditioning_tensor_from_path(self, filepath: str, height: int, width: int, padding: Tuple) -> torch.Tensor:
|
| 701 |
"""Carrega uma imagem, redimensiona, aplica padding e move para o dispositivo."""
|
| 702 |
+
tensor = self._load_image_to_tensor_with_resize_and_crop(filepath, height, width)
|
| 703 |
tensor = F.pad(tensor, padding)
|
| 704 |
return tensor.to(self.device, dtype=self.runtime_autocast_dtype)
|
| 705 |
|