Update LTX-Video/ltx_video/pipelines/pipeline_ltx_video.py
Browse files
LTX-Video/ltx_video/pipelines/pipeline_ltx_video.py
CHANGED
|
@@ -57,7 +57,7 @@ logging.set_verbosity_info()
|
|
| 57 |
logging.set_verbosity_debug()
|
| 58 |
|
| 59 |
|
| 60 |
-
logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
| 61 |
|
| 62 |
|
| 63 |
ASPECT_RATIO_1024_BIN = {
|
|
@@ -203,7 +203,7 @@ def retrieve_timesteps(
|
|
| 203 |
print(f"[ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 204 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 205 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 206 |
-
|
| 207 |
|
| 208 |
|
| 209 |
return timesteps, num_inference_steps
|
|
@@ -403,10 +403,10 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 403 |
removed_text = self.tokenizer.batch_decode(
|
| 404 |
untruncated_ids[:, max_length - 1 : -1]
|
| 405 |
)
|
| 406 |
-
logger.warning(
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
)
|
| 410 |
|
| 411 |
prompt_attention_mask = text_inputs.attention_mask
|
| 412 |
prompt_attention_mask = prompt_attention_mask.to(text_enc_device)
|
|
@@ -903,7 +903,7 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 903 |
print(f"[1ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 904 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 905 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 906 |
-
print(f"latents {latents.shape}")
|
| 907 |
|
| 908 |
|
| 909 |
if "mask_feature" in kwargs:
|
|
@@ -977,7 +977,7 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 977 |
print(f"[2ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 978 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 979 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 980 |
-
print(f"latents {latents.shape}")
|
| 981 |
|
| 982 |
if self.allowed_inference_steps is not None:
|
| 983 |
for timestep in [round(x, 4) for x in timesteps.tolist()]:
|
|
@@ -1050,7 +1050,7 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1050 |
print(f"[4ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1051 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1052 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1053 |
-
print(f"latents {latents.shape}")
|
| 1054 |
|
| 1055 |
# 3. Encode input prompt
|
| 1056 |
if self.text_encoder is not None:
|
|
@@ -1121,7 +1121,7 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1121 |
print(f"[5ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1122 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1123 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1124 |
-
print(f"latents {latents.shape}")
|
| 1125 |
|
| 1126 |
|
| 1127 |
# Update the latents with the conditioning items and patchify them into (b, n, c)
|
|
@@ -1143,7 +1143,7 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1143 |
print(f"[6ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1144 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1145 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1146 |
-
print(f"latents {latents.shape}")
|
| 1147 |
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
| 1148 |
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
| 1149 |
|
|
@@ -1151,7 +1151,7 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1151 |
print(f"[7ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1152 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1153 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1154 |
-
print(f"latents {latents.shape}")
|
| 1155 |
# 7. Denoising loop
|
| 1156 |
num_warmup_steps = max(
|
| 1157 |
len(timesteps) - num_inference_steps * self.scheduler.order, 0
|
|
@@ -1347,7 +1347,7 @@ class LTXVideoPipeline(DiffusionPipeline):
|
|
| 1347 |
print(f"[8ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1348 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1349 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1350 |
-
print(f"latents {latents.shape}")
|
| 1351 |
|
| 1352 |
if offload_to_cpu:
|
| 1353 |
self.transformer = self.transformer.cpu()
|
|
|
|
| 57 |
logging.set_verbosity_debug()
|
| 58 |
|
| 59 |
|
| 60 |
+
#logger = logging.get_logger(__name__) # pylint: disable=invalid-name
|
| 61 |
|
| 62 |
|
| 63 |
ASPECT_RATIO_1024_BIN = {
|
|
|
|
| 203 |
print(f"[ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 204 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 205 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 206 |
+
# print(f"timesteps {timesteps}")
|
| 207 |
|
| 208 |
|
| 209 |
return timesteps, num_inference_steps
|
|
|
|
| 403 |
removed_text = self.tokenizer.batch_decode(
|
| 404 |
untruncated_ids[:, max_length - 1 : -1]
|
| 405 |
)
|
| 406 |
+
#logger.warning(
|
| 407 |
+
# "The following part of your input was truncated because CLIP can only handle sequences up to"
|
| 408 |
+
# f" {max_length} tokens: {removed_text}"
|
| 409 |
+
#)
|
| 410 |
|
| 411 |
prompt_attention_mask = text_inputs.attention_mask
|
| 412 |
prompt_attention_mask = prompt_attention_mask.to(text_enc_device)
|
|
|
|
| 903 |
print(f"[1ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 904 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 905 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 906 |
+
#print(f"latents {latents.shape}")
|
| 907 |
|
| 908 |
|
| 909 |
if "mask_feature" in kwargs:
|
|
|
|
| 977 |
print(f"[2ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 978 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 979 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 980 |
+
#print(f"latents {latents.shape}")
|
| 981 |
|
| 982 |
if self.allowed_inference_steps is not None:
|
| 983 |
for timestep in [round(x, 4) for x in timesteps.tolist()]:
|
|
|
|
| 1050 |
print(f"[4ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1051 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1052 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1053 |
+
#print(f"latents {latents.shape}")
|
| 1054 |
|
| 1055 |
# 3. Encode input prompt
|
| 1056 |
if self.text_encoder is not None:
|
|
|
|
| 1121 |
print(f"[5ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1122 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1123 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1124 |
+
#print(f"latents {latents.shape}")
|
| 1125 |
|
| 1126 |
|
| 1127 |
# Update the latents with the conditioning items and patchify them into (b, n, c)
|
|
|
|
| 1143 |
print(f"[6ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1144 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1145 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1146 |
+
#print(f"latents {latents.shape}")
|
| 1147 |
# 6. Prepare extra step kwargs. TODO: Logic should ideally just be moved out of the pipeline
|
| 1148 |
extra_step_kwargs = self.prepare_extra_step_kwargs(generator, eta)
|
| 1149 |
|
|
|
|
| 1151 |
print(f"[7ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1152 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1153 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1154 |
+
#print(f"latents {latents.shape}")
|
| 1155 |
# 7. Denoising loop
|
| 1156 |
num_warmup_steps = max(
|
| 1157 |
len(timesteps) - num_inference_steps * self.scheduler.order, 0
|
|
|
|
| 1347 |
print(f"[8ADUC DEBUG LTX *causal_video_autoencoder.py*]=======")
|
| 1348 |
print(f"skip_initial_inference_steps {skip_initial_inference_steps}")
|
| 1349 |
print(f"skip_final_inference_steps {skip_final_inference_steps}")
|
| 1350 |
+
#print(f"latents {latents.shape}")
|
| 1351 |
|
| 1352 |
if offload_to_cpu:
|
| 1353 |
self.transformer = self.transformer.cpu()
|