Update LTX-Video/ltx_video/pipelines/pipeline_ltx_video.py
Browse files
LTX-Video/ltx_video/pipelines/pipeline_ltx_video.py
CHANGED
|
@@ -199,11 +199,10 @@ def retrieve_timesteps(
|
|
| 199 |
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
| 200 |
num_inference_steps = len(timesteps)
|
| 201 |
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
# print(f"timesteps {timesteps}")
|
| 207 |
|
| 208 |
|
| 209 |
return timesteps, num_inference_steps
|
|
@@ -900,11 +899,11 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 900 |
returned where the first element is a list with the generated images
|
| 901 |
"""
|
| 902 |
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
|
| 909 |
if "mask_feature" in kwargs:
|
| 910 |
deprecation_message = "The use of `mask_feature` is deprecated. It is no longer used in any computation and that doesn't affect the end results. It will be removed in a future version."
|
|
@@ -974,11 +973,11 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 974 |
**retrieve_timesteps_kwargs,
|
| 975 |
)
|
| 976 |
|
| 977 |
-
|
| 978 |
-
|
| 979 |
-
|
| 980 |
-
|
| 981 |
-
|
| 982 |
if self.allowed_inference_steps is not None:
|
| 983 |
for timestep in [round(x, 4) for x in timesteps.tolist()]:
|
| 984 |
assert (
|
|
@@ -1047,11 +1046,11 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1047 |
max_new_tokens=text_encoder_max_tokens,
|
| 1048 |
)
|
| 1049 |
|
| 1050 |
-
|
| 1051 |
-
|
| 1052 |
-
|
| 1053 |
-
|
| 1054 |
-
|
| 1055 |
# 3. Encode input prompt
|
| 1056 |
if self.text_encoder is not None:
|
| 1057 |
self.text_encoder = self.text_encoder.to(self._execution_device)
|
|
@@ -1118,11 +1117,10 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1118 |
)
|
| 1119 |
|
| 1120 |
|
| 1121 |
-
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
| 1125 |
-
|
| 1126 |
|
| 1127 |
# Update the latents with the conditioning items and patchify them into (b, n, c)
|
| 1128 |
latents, pixel_coords, conditioning_mask, num_cond_latents = (
|
|
@@ -1140,18 +1138,16 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1140 |
|
| 1141 |
|
| 1142 |
|
| 1143 |
-
|
| 1144 |
-
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1145 |
-
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1146 |
-
print(f"latents {latents.shape}")
|
| 1147 |
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
| 1148 |
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
| 1149 |
|
| 1150 |
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
|
|
|
| 1155 |
# 7. Denoising loop
|
| 1156 |
num_warmup_steps = max(
|
| 1157 |
len(timesteps) - num_inference_steps * self.scheduler.order, 0
|
|
@@ -1344,11 +1340,11 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1344 |
|
| 1345 |
|
| 1346 |
|
| 1347 |
-
|
| 1348 |
-
|
| 1349 |
-
|
| 1350 |
-
|
| 1351 |
-
|
| 1352 |
if offload_to_cpu:
|
| 1353 |
self.transformer = self.transformer.cpu()
|
| 1354 |
if self._execution_device == "cuda":
|
|
|
|
| 199 |
scheduler.set_timesteps(timesteps=timesteps, device=device, **kwargs)
|
| 200 |
num_inference_steps = len(timesteps)
|
| 201 |
|
| 202 |
+
try:
|
| 203 |
+
print(f"[LTX]LATENTS {latents.shape}")
|
| 204 |
+
except Exception:
|
| 205 |
+
pass
|
|
|
|
| 206 |
|
| 207 |
|
| 208 |
return timesteps, num_inference_steps
|
|
|
|
| 899 |
returned where the first element is a list with the generated images
|
| 900 |
"""
|
| 901 |
|
| 902 |
+
try:
|
| 903 |
+
print(f"[LTX]LATENTS {latents.shape}")
|
| 904 |
+
except Exception:
|
| 905 |
+
pass
|
| 906 |
+
|
| 907 |
|
| 908 |
if "mask_feature" in kwargs:
|
| 909 |
deprecation_message = "The use of `mask_feature` is deprecated. It is no longer used in any computation and that doesn't affect the end results. It will be removed in a future version."
|
|
|
|
| 973 |
**retrieve_timesteps_kwargs,
|
| 974 |
)
|
| 975 |
|
| 976 |
+
try:
|
| 977 |
+
print(f"[LTX2]LATENTS {latents.shape}")
|
| 978 |
+
except Exception:
|
| 979 |
+
pass
|
| 980 |
+
|
| 981 |
if self.allowed_inference_steps is not None:
|
| 982 |
for timestep in [round(x, 4) for x in timesteps.tolist()]:
|
| 983 |
assert (
|
|
|
|
| 1046 |
max_new_tokens=text_encoder_max_tokens,
|
| 1047 |
)
|
| 1048 |
|
| 1049 |
+
try:
|
| 1050 |
+
print(f"[LTX3]LATENTS {latents.shape}")
|
| 1051 |
+
except Exception:
|
| 1052 |
+
pass
|
| 1053 |
+
|
| 1054 |
# 3. Encode input prompt
|
| 1055 |
if self.text_encoder is not None:
|
| 1056 |
self.text_encoder = self.text_encoder.to(self._execution_device)
|
|
|
|
| 1117 |
)
|
| 1118 |
|
| 1119 |
|
| 1120 |
+
try:
|
| 1121 |
+
print(f"[LTX4]LATENTS {latents.shape}")
|
| 1122 |
+
except Exception:
|
| 1123 |
+
pass
|
|
|
|
| 1124 |
|
| 1125 |
# Update the latents with the conditioning items and patchify them into (b, n, c)
|
| 1126 |
latents, pixel_coords, conditioning_mask, num_cond_latents = (
|
|
|
|
| 1138 |
|
| 1139 |
|
| 1140 |
|
| 1141 |
+
|
|
|
|
|
|
|
|
|
|
| 1142 |
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
| 1143 |
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
| 1144 |
|
| 1145 |
|
| 1146 |
+
try:
|
| 1147 |
+
print(f"[LTX5]LATENTS {latents.shape}")
|
| 1148 |
+
except Exception:
|
| 1149 |
+
pass
|
| 1150 |
+
|
| 1151 |
# 7. Denoising loop
|
| 1152 |
num_warmup_steps = max(
|
| 1153 |
len(timesteps) - num_inference_steps * self.scheduler.order, 0
|
|
|
|
| 1340 |
|
| 1341 |
|
| 1342 |
|
| 1343 |
+
try:
|
| 1344 |
+
print(f"[LTX6]LATENTS {latents.shape}")
|
| 1345 |
+
except Exception:
|
| 1346 |
+
pass
|
| 1347 |
+
|
| 1348 |
if offload_to_cpu:
|
| 1349 |
self.transformer = self.transformer.cpu()
|
| 1350 |
if self._execution_device == "cuda":
|