Spaces:
Paused
Paused
Update api/ltx_server_refactored.py
Browse files- api/ltx_server_refactored.py +47 -23
api/ltx_server_refactored.py
CHANGED
|
@@ -322,7 +322,12 @@ class VideoService:
|
|
| 322 |
|
| 323 |
return conditioning_items
|
| 324 |
|
| 325 |
-
def generate_low_resolution(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
"""
|
| 327 |
Gera um vídeo de baixa resolução e retorna os caminhos para o vídeo e os latentes.
|
| 328 |
"""
|
|
@@ -334,11 +339,17 @@ class VideoService:
|
|
| 334 |
downscaled_height, downscaled_width = self._calculate_downscaled_dims(height, width)
|
| 335 |
|
| 336 |
first_pass_kwargs = {
|
| 337 |
-
"prompt": prompt,
|
| 338 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
"generator": torch.Generator(device=self.device).manual_seed(used_seed),
|
| 340 |
-
"output_type": "latent",
|
| 341 |
-
"
|
|
|
|
|
|
|
| 342 |
}
|
| 343 |
|
| 344 |
temp_dir = tempfile.mkdtemp(prefix="ltxv_low_")
|
|
@@ -355,10 +366,11 @@ class VideoService:
|
|
| 355 |
latents_path=latents_path,
|
| 356 |
prompt=prompt,
|
| 357 |
negative_prompt=negative_prompt,
|
| 358 |
-
height=downscaled_height, width=downscaled_width,
|
|
|
|
| 359 |
guidance_scale=guidance_scale,
|
| 360 |
seed=used_seed,
|
| 361 |
-
conditioning_items=conditioning_items
|
| 362 |
)
|
| 363 |
|
| 364 |
print(f"[SUCCESS] PASSO 2 concluído. Vídeo final em: {final_video_path}")
|
|
@@ -372,7 +384,12 @@ class VideoService:
|
|
| 372 |
finally:
|
| 373 |
self._finalize()
|
| 374 |
|
| 375 |
-
def generate_upscale_denoise(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
"""
|
| 377 |
Aplica upscale, AdaIN e Denoise em latentes de baixa resolução usando um processo de chunking.
|
| 378 |
"""
|
|
@@ -386,8 +403,7 @@ class VideoService:
|
|
| 386 |
latents_low = torch.load(latents_path).to(self.device)
|
| 387 |
with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
|
| 388 |
upsampled_latents = latents_low #self._upsample_and_filter_latents(latents_low)
|
| 389 |
-
|
| 390 |
-
|
| 391 |
#chunks = self._split_latents_with_overlap(upsampled_latents)
|
| 392 |
#refined_chunks = []
|
| 393 |
|
|
@@ -395,20 +411,28 @@ class VideoService:
|
|
| 395 |
#if chunk.shape[2] <= 1: continue # Pula chunks inválidos
|
| 396 |
|
| 397 |
chunk = upsampled_latents
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
|
| 413 |
final_latents = refined_chunk #self._merge_chunks_with_overlap(refined_chunks)
|
| 414 |
#if LTXV_DEBUG:
|
|
|
|
| 322 |
|
| 323 |
return conditioning_items
|
| 324 |
|
| 325 |
+
def generate_low_resolution(
|
| 326 |
+
self, prompt: str, negative_prompt: str,
|
| 327 |
+
height: int, width: int, duration_secs: float,
|
| 328 |
+
guidance_scale: float, seed: Optional[int] = None,
|
| 329 |
+
conditioning_items: Optional[List[ConditioningItem]] = None
|
| 330 |
+
) -> Tuple[str, str, int]:
|
| 331 |
"""
|
| 332 |
Gera um vídeo de baixa resolução e retorna os caminhos para o vídeo e os latentes.
|
| 333 |
"""
|
|
|
|
| 339 |
downscaled_height, downscaled_width = self._calculate_downscaled_dims(height, width)
|
| 340 |
|
| 341 |
first_pass_kwargs = {
|
| 342 |
+
"prompt": prompt,
|
| 343 |
+
"negative_prompt": negative_prompt,
|
| 344 |
+
"height": downscaled_height,
|
| 345 |
+
"width": downscaled_width,
|
| 346 |
+
"num_frames": num_frames,
|
| 347 |
+
"frame_rate": int(DEFAULT_FPS),
|
| 348 |
"generator": torch.Generator(device=self.device).manual_seed(used_seed),
|
| 349 |
+
"output_type": "latent",
|
| 350 |
+
"conditioning_items": conditioning_items,
|
| 351 |
+
"guidance_scale": float(guidance_scale),
|
| 352 |
+
**(self.config.get("first_pass", {}))
|
| 353 |
}
|
| 354 |
|
| 355 |
temp_dir = tempfile.mkdtemp(prefix="ltxv_low_")
|
|
|
|
| 366 |
latents_path=latents_path,
|
| 367 |
prompt=prompt,
|
| 368 |
negative_prompt=negative_prompt,
|
| 369 |
+
height=downscaled_height, width=downscaled_width,
|
| 370 |
+
num_frames=actual_num_frames,
|
| 371 |
guidance_scale=guidance_scale,
|
| 372 |
seed=used_seed,
|
| 373 |
+
conditioning_items=conditioning_items,
|
| 374 |
)
|
| 375 |
|
| 376 |
print(f"[SUCCESS] PASSO 2 concluído. Vídeo final em: {final_video_path}")
|
|
|
|
| 384 |
finally:
|
| 385 |
self._finalize()
|
| 386 |
|
| 387 |
+
def generate_upscale_denoise(
|
| 388 |
+
self, latents_path: str, prompt: str,
|
| 389 |
+
negative_prompt: str, height: int, width: int,
|
| 390 |
+
num_frames: float, guidance_scale: float, seed: Optional[int] = None,
|
| 391 |
+
conditioning_items: Optional[List[ConditioningItem]] = None
|
| 392 |
+
) -> Tuple[str, str]:
|
| 393 |
"""
|
| 394 |
Aplica upscale, AdaIN e Denoise em latentes de baixa resolução usando um processo de chunking.
|
| 395 |
"""
|
|
|
|
| 403 |
latents_low = torch.load(latents_path).to(self.device)
|
| 404 |
with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
|
| 405 |
upsampled_latents = latents_low #self._upsample_and_filter_latents(latents_low)
|
| 406 |
+
|
|
|
|
| 407 |
#chunks = self._split_latents_with_overlap(upsampled_latents)
|
| 408 |
#refined_chunks = []
|
| 409 |
|
|
|
|
| 411 |
#if chunk.shape[2] <= 1: continue # Pula chunks inválidos
|
| 412 |
|
| 413 |
chunk = upsampled_latents
|
| 414 |
+
|
| 415 |
+
second_pass_height = chunk.shape[3] * self.pipeline.vae_scale_factor
|
| 416 |
+
second_pass_width = chunk.shape[4] * self.pipeline.vae_scale_factor
|
| 417 |
|
| 418 |
+
second_pass_kwargs = {
|
| 419 |
+
"prompt": prompt,
|
| 420 |
+
"negative_prompt": negative_prompt,
|
| 421 |
+
"height": second_pass_height,
|
| 422 |
+
"width": second_pass_width,
|
| 423 |
+
"frame_rate": int(DEFAULT_FPS),
|
| 424 |
+
"num_frames": num_frames,
|
| 425 |
+
"latents": chunk, # O tensor completo é passado aqui
|
| 426 |
+
"guidance_scale": float(guidance_scale),
|
| 427 |
+
"output_type": "latent",
|
| 428 |
+
"generator": torch.Generator(device=self.device).manual_seed(used_seed),
|
| 429 |
+
"conditioning_items": conditioning_items,
|
| 430 |
+
**(self.config.get("second_pass", {}))
|
| 431 |
+
}
|
| 432 |
+
refined_chunk = self.pipeline(**second_pass_kwargs).images
|
| 433 |
+
#refined_chunks.append(refined_chunk)
|
| 434 |
+
|
| 435 |
+
del latents_low; torch.cuda.empty_cache()
|
| 436 |
|
| 437 |
final_latents = refined_chunk #self._merge_chunks_with_overlap(refined_chunks)
|
| 438 |
#if LTXV_DEBUG:
|