Spaces:
Paused
Paused
Update api/ltx_server_refactored.py
Browse files- api/ltx_server_refactored.py +11 -51
api/ltx_server_refactored.py
CHANGED
|
@@ -211,21 +211,11 @@ def create_ltx_video_pipeline(
|
|
| 211 |
prompt_enhancer_llm_model = None
|
| 212 |
prompt_enhancer_llm_tokenizer = None
|
| 213 |
|
| 214 |
-
|
| 215 |
-
if precision
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
dtype_target = torch.float16
|
| 219 |
-
else:
|
| 220 |
-
dtype_target = torch.float32
|
| 221 |
-
|
| 222 |
-
for m in [vae, transformer, text_encoder]:
|
| 223 |
-
m.to(dtype_target)
|
| 224 |
-
|
| 225 |
-
# garante coerência geral da pipeline
|
| 226 |
-
pipeline_dtype = dtype_target
|
| 227 |
|
| 228 |
-
|
| 229 |
# Use submodels for the pipeline
|
| 230 |
submodel_dict = {
|
| 231 |
"transformer": transformer,
|
|
@@ -364,48 +354,18 @@ class VideoService:
|
|
| 364 |
"guidance_scale": float(guidance_scale),
|
| 365 |
**(self.config.get("first_pass", {}))
|
| 366 |
}
|
| 367 |
-
|
| 368 |
temp_dir = tempfile.mkdtemp(prefix="ltxv_low_")
|
| 369 |
self._register_tmp_dir(temp_dir)
|
| 370 |
|
| 371 |
try:
|
| 372 |
-
|
| 373 |
-
|
| 374 |
-
|
| 375 |
-
|
| 376 |
-
|
| 377 |
-
# pixel_tensor = vae_manager_singleton.decode(latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
|
| 378 |
-
# video_path = self._save_video_from_tensor(pixel_tensor, "low_res_video", used_seed, temp_dir)
|
| 379 |
-
# latents_path = self._save_latents_to_disk(latents, "latents_low_res", used_seed)
|
| 380 |
-
|
| 381 |
-
# ETAPA 2: Upscale dos latentes
|
| 382 |
-
#with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
|
| 383 |
-
#upscaled_height, upscaled_width = downscaled_height * 2, downscaled_width * 2
|
| 384 |
-
#upscaled_latents = self._upsample_and_filter_latents(latents_low)
|
| 385 |
|
| 386 |
-
|
| 387 |
-
second_pass_kwargs = {
|
| 388 |
-
"prompt": prompt,
|
| 389 |
-
"negative_prompt": negative_prompt,
|
| 390 |
-
"height": downscaled_height,
|
| 391 |
-
"width": downscaled_width,
|
| 392 |
-
"num_frames": max(3, (actual_num_frames//8)*8)+1, "frame_rate": int(DEFAULT_FPS),
|
| 393 |
-
"generator": torch.Generator(device=self.device).manual_seed(used_seed),
|
| 394 |
-
"output_type": "latent",
|
| 395 |
-
"is_video": True,
|
| 396 |
-
"latents": latents_low,
|
| 397 |
-
"vae_per_channel_normalize": True,
|
| 398 |
-
"conditioning_items": conditioning_items,
|
| 399 |
-
"guidance_scale": float(guidance_scale),
|
| 400 |
-
**(self.config.get("second_pass", {}))
|
| 401 |
-
}
|
| 402 |
-
|
| 403 |
-
#with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
|
| 404 |
-
#latents = self.pipeline(**second_pass_kwargs).images
|
| 405 |
-
pixel_tensor = vae_manager_singleton.decode(latents_low.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
|
| 406 |
-
video_path = self._save_video_from_tensor(pixel_tensor, "low_res_video", used_seed, temp_dir)
|
| 407 |
-
latents_path = self._save_latents_to_disk(latents, "latents_low_res", used_seed)
|
| 408 |
-
|
| 409 |
return video_path, latents_path, used_seed
|
| 410 |
|
| 411 |
except Exception as e:
|
|
|
|
| 211 |
prompt_enhancer_llm_model = None
|
| 212 |
prompt_enhancer_llm_tokenizer = None
|
| 213 |
|
| 214 |
+
vae = vae.to(torch.bfloat16)
|
| 215 |
+
if precision == "bfloat16" and transformer.dtype != torch.bfloat16:
|
| 216 |
+
transformer = transformer.to(torch.bfloat16)
|
| 217 |
+
text_encoder = text_encoder.to(torch.bfloat16)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
|
|
|
|
| 219 |
# Use submodels for the pipeline
|
| 220 |
submodel_dict = {
|
| 221 |
"transformer": transformer,
|
|
|
|
| 354 |
"guidance_scale": float(guidance_scale),
|
| 355 |
**(self.config.get("first_pass", {}))
|
| 356 |
}
|
| 357 |
+
|
| 358 |
temp_dir = tempfile.mkdtemp(prefix="ltxv_low_")
|
| 359 |
self._register_tmp_dir(temp_dir)
|
| 360 |
|
| 361 |
try:
|
| 362 |
+
with torch.autocast(device_type=self.device.split(':')[0], dtype=self.runtime_autocast_dtype, enabled=(self.device == 'cuda')):
|
| 363 |
+
latents = self.pipeline(**first_pass_kwargs).images
|
| 364 |
+
pixel_tensor = vae_manager_singleton.decode(latents.clone(), decode_timestep=float(self.config.get("decode_timestep", 0.05)))
|
| 365 |
+
video_path = self._save_video_from_tensor(pixel_tensor, "low_res_video", used_seed, temp_dir)
|
| 366 |
+
latents_path = self._save_latents_to_disk(latents, "latents_low_res", used_seed)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 367 |
|
| 368 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
return video_path, latents_path, used_seed
|
| 370 |
|
| 371 |
except Exception as e:
|