Spaces:
Paused
Paused
Update api/ltx_server.py
Browse files- api/ltx_server.py +26 -6
api/ltx_server.py
CHANGED
|
@@ -543,10 +543,10 @@ class VideoService:
|
|
| 543 |
start = (num_latente_por_chunk*i)
|
| 544 |
end = (start+num_latente_por_chunk+overlap)
|
| 545 |
if i+1 < n_chunks:
|
| 546 |
-
chunk = latents_brutos[:, :, start:end, :, :].
|
| 547 |
print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
|
| 548 |
else:
|
| 549 |
-
chunk = latents_brutos[:, :, start:, :, :].
|
| 550 |
print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
|
| 551 |
chunks.append(chunk)
|
| 552 |
i+=1
|
|
@@ -578,8 +578,8 @@ class VideoService:
|
|
| 578 |
#if total % 2 == 1: # ÍMPAR
|
| 579 |
# Ex: 11 → primeira 0..5, segunda 5..10
|
| 580 |
cut = total // 2
|
| 581 |
-
primeira = latents_brutos[:, :, :cut+1, :, :].
|
| 582 |
-
segunda = latents_brutos[:, :, cut:, :, :].
|
| 583 |
|
| 584 |
|
| 585 |
return primeira, segunda
|
|
@@ -759,14 +759,20 @@ class VideoService:
|
|
| 759 |
if mode == "image-to-video":
|
| 760 |
start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
|
| 761 |
conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
|
|
|
|
|
|
|
| 762 |
if middle_image_filepath and middle_frame_number is not None:
|
| 763 |
middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
|
| 764 |
safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
|
| 765 |
conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
|
|
|
|
|
|
|
| 766 |
if end_image_filepath:
|
| 767 |
end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
|
| 768 |
last_frame_index = actual_num_frames - 1
|
| 769 |
conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
|
|
|
|
|
|
|
| 770 |
print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
|
| 771 |
|
| 772 |
call_kwargs = {
|
|
@@ -889,7 +895,21 @@ class VideoService:
|
|
| 889 |
print(f"[DEBUG] Parte: {num_latent_frames_part - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
|
| 890 |
|
| 891 |
second_pass_kwargs = call_kwargs.copy()
|
| 892 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 893 |
"output_type": "latent",
|
| 894 |
"width": second_pass_width,
|
| 895 |
"height": second_pass_height,
|
|
@@ -897,7 +917,7 @@ class VideoService:
|
|
| 897 |
"latents": latents, # O tensor upscaled
|
| 898 |
"guidance_scale": float(guidance_scale),
|
| 899 |
**second_pass_config
|
| 900 |
-
|
| 901 |
|
| 902 |
print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
|
| 903 |
final_latents = self.pipeline(**second_pass_kwargs).images
|
|
|
|
| 543 |
start = (num_latente_por_chunk*i)
|
| 544 |
end = (start+num_latente_por_chunk+overlap)
|
| 545 |
if i+1 < n_chunks:
|
| 546 |
+
chunk = latents_brutos[:, :, start:end, :, :].detach()
|
| 547 |
print(f"[DEBUG] chunk{i+1}[:, :, {start}:{end}, :, :] = {chunk.shape[2]}")
|
| 548 |
else:
|
| 549 |
+
chunk = latents_brutos[:, :, start:, :, :].detach()
|
| 550 |
print(f"[DEBUG] chunk{i+1}[:, :, {start}:, :, :] = {chunk.shape[2]}")
|
| 551 |
chunks.append(chunk)
|
| 552 |
i+=1
|
|
|
|
| 578 |
#if total % 2 == 1: # ÍMPAR
|
| 579 |
# Ex: 11 → primeira 0..5, segunda 5..10
|
| 580 |
cut = total // 2
|
| 581 |
+
primeira = latents_brutos[:, :, :cut+1, :, :].detach()
|
| 582 |
+
segunda = latents_brutos[:, :, cut:, :, :].detach()
|
| 583 |
|
| 584 |
|
| 585 |
return primeira, segunda
|
|
|
|
| 759 |
if mode == "image-to-video":
|
| 760 |
start_tensor = self._prepare_conditioning_tensor(start_image_filepath, height, width, padding_values)
|
| 761 |
conditioning_items.append(ConditioningItem(start_tensor, 0, 1.0))
|
| 762 |
+
conditioning_items1.append(ConditioningItem(start_tensor, 0, 1.0))
|
| 763 |
+
|
| 764 |
if middle_image_filepath and middle_frame_number is not None:
|
| 765 |
middle_tensor = self._prepare_conditioning_tensor(middle_image_filepath, height, width, padding_values)
|
| 766 |
safe_middle_frame = max(0, min(int(middle_frame_number), actual_num_frames - 1))
|
| 767 |
conditioning_items.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight)))
|
| 768 |
+
conditioning_items1.append(ConditioningItem(middle_tensor, safe_middle_frame, float(middle_image_weight))
|
| 769 |
+
|
| 770 |
if end_image_filepath:
|
| 771 |
end_tensor = self._prepare_conditioning_tensor(end_image_filepath, height, width, padding_values)
|
| 772 |
last_frame_index = actual_num_frames - 1
|
| 773 |
conditioning_items.append(ConditioningItem(end_tensor, last_frame_index, float(end_image_weight)))
|
| 774 |
+
conditioning_items2.append(ConditioningItem(end_tensor, last_frame_index//2, 1.0))
|
| 775 |
+
|
| 776 |
print(f"[DEBUG] Conditioning items: {len(conditioning_items)}")
|
| 777 |
|
| 778 |
call_kwargs = {
|
|
|
|
| 895 |
print(f"[DEBUG] Parte: {num_latent_frames_part - 1} latentes -> {num_pixel_frames_part} frames de pixel (alvo)")
|
| 896 |
|
| 897 |
second_pass_kwargs = call_kwargs.copy()
|
| 898 |
+
|
| 899 |
+
if i==0:
|
| 900 |
+
second_pass_kwargs.update({
|
| 901 |
+
"conditioning_items": conditioning_items1
|
| 902 |
+
"output_type": "latent",
|
| 903 |
+
"width": second_pass_width,
|
| 904 |
+
"height": second_pass_height,
|
| 905 |
+
"num_frames": num_pixel_frames_part,
|
| 906 |
+
"latents": latents, # O tensor upscaled
|
| 907 |
+
"guidance_scale": float(guidance_scale),
|
| 908 |
+
**second_pass_config
|
| 909 |
+
})
|
| 910 |
+
else:
|
| 911 |
+
second_pass_kwargs.update({
|
| 912 |
+
"conditioning_items": conditioning_items2
|
| 913 |
"output_type": "latent",
|
| 914 |
"width": second_pass_width,
|
| 915 |
"height": second_pass_height,
|
|
|
|
| 917 |
"latents": latents, # O tensor upscaled
|
| 918 |
"guidance_scale": float(guidance_scale),
|
| 919 |
**second_pass_config
|
| 920 |
+
})
|
| 921 |
|
| 922 |
print(f"[DEBUG] Second Pass: Refinando em {width_padded}x{height_padded}...")
|
| 923 |
final_latents = self.pipeline(**second_pass_kwargs).images
|