Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
25d3956
1
Parent(s):
0f5fa8e
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,6 +20,7 @@ from sgm.util import default, instantiate_from_config
|
|
| 20 |
|
| 21 |
import gradio as gr
|
| 22 |
import uuid
|
|
|
|
| 23 |
from huggingface_hub import hf_hub_download
|
| 24 |
|
| 25 |
hf_hub_download(repo_id="stabilityai/stable-video-diffusion-img2vid-xt", filename="svd_xt.safetensors", local_dir="checkpoints")
|
|
@@ -67,11 +68,12 @@ model, filter = load_model(
|
|
| 67 |
|
| 68 |
def sample(
|
| 69 |
input_path: str = "assets/test_image.png", # Can either be image file or folder with image files
|
|
|
|
|
|
|
| 70 |
version: str = "svd_xt",
|
| 71 |
fps_id: int = 6,
|
| 72 |
motion_bucket_id: int = 127,
|
| 73 |
cond_aug: float = 0.02,
|
| 74 |
-
seed: int = 23,
|
| 75 |
decoding_t: int = 7, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
|
| 76 |
device: str = "cuda",
|
| 77 |
output_folder: str = "outputs",
|
|
@@ -81,6 +83,10 @@ def sample(
|
|
| 81 |
Simple script to generate a single sample conditioned on an image `input_path` or multiple images, one for each
|
| 82 |
image file in folder `input_path`. If you run out of VRAM, try decreasing `decoding_t`.
|
| 83 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
torch.manual_seed(seed)
|
| 85 |
|
| 86 |
path = Path(input_path)
|
|
@@ -213,7 +219,7 @@ def sample(
|
|
| 213 |
writer.write(frame)
|
| 214 |
writer.release()
|
| 215 |
|
| 216 |
-
return video_path
|
| 217 |
|
| 218 |
def get_unique_embedder_keys_from_conditioner(conditioner):
|
| 219 |
return list(set([x.input_key for x in conditioner.embedders]))
|
|
@@ -296,13 +302,18 @@ with gr.Blocks() as demo:
|
|
| 296 |
gr.Markdown('''# Stable Video Diffusion - Image2Video - XT
|
| 297 |
Generate 25 frames of video from a single image at 6 fps. Each generation takes ~60s on the A100. [Join the waitlist](https://stability.ai/contact) for a native web experience for video.
|
| 298 |
''')
|
| 299 |
-
with gr.
|
| 300 |
-
with gr.
|
| 301 |
image = gr.Image(label="Upload your image (it will be center cropped to 1024x576)", type="filepath")
|
| 302 |
-
|
| 303 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
|
| 305 |
-
generate_btn.click(fn=sample, inputs=image, outputs=video, api_name="video")
|
| 306 |
-
|
| 307 |
if __name__ == "__main__":
|
| 308 |
demo.launch(share=True)
|
|
|
|
| 20 |
|
| 21 |
import gradio as gr
|
| 22 |
import uuid
|
| 23 |
+
import random
|
| 24 |
from huggingface_hub import hf_hub_download
|
| 25 |
|
| 26 |
hf_hub_download(repo_id="stabilityai/stable-video-diffusion-img2vid-xt", filename="svd_xt.safetensors", local_dir="checkpoints")
|
|
|
|
| 68 |
|
| 69 |
def sample(
|
| 70 |
input_path: str = "assets/test_image.png", # Can either be image file or folder with image files
|
| 71 |
+
seed: Optional[int] = None,
|
| 72 |
+
randomize_seed: bool = True,
|
| 73 |
version: str = "svd_xt",
|
| 74 |
fps_id: int = 6,
|
| 75 |
motion_bucket_id: int = 127,
|
| 76 |
cond_aug: float = 0.02,
|
|
|
|
| 77 |
decoding_t: int = 7, # Number of frames decoded at a time! This eats most VRAM. Reduce if necessary.
|
| 78 |
device: str = "cuda",
|
| 79 |
output_folder: str = "outputs",
|
|
|
|
| 83 |
Simple script to generate a single sample conditioned on an image `input_path` or multiple images, one for each
|
| 84 |
image file in folder `input_path`. If you run out of VRAM, try decreasing `decoding_t`.
|
| 85 |
"""
|
| 86 |
+
if(randomize_seed):
|
| 87 |
+
max_64_bit_int = 2**63 - 1
|
| 88 |
+
seed = random.randint(0, max_64_bit_int)
|
| 89 |
+
|
| 90 |
torch.manual_seed(seed)
|
| 91 |
|
| 92 |
path = Path(input_path)
|
|
|
|
| 219 |
writer.write(frame)
|
| 220 |
writer.release()
|
| 221 |
|
| 222 |
+
return video_path, seed
|
| 223 |
|
| 224 |
def get_unique_embedder_keys_from_conditioner(conditioner):
|
| 225 |
return list(set([x.input_key for x in conditioner.embedders]))
|
|
|
|
| 302 |
gr.Markdown('''# Stable Video Diffusion - Image2Video - XT
|
| 303 |
Generate 25 frames of video from a single image at 6 fps. Each generation takes ~60s on the A100. [Join the waitlist](https://stability.ai/contact) for a native web experience for video.
|
| 304 |
''')
|
| 305 |
+
with gr.Row():
|
| 306 |
+
with gr.Column():
|
| 307 |
image = gr.Image(label="Upload your image (it will be center cropped to 1024x576)", type="filepath")
|
| 308 |
+
generate_btn = gr.Button("Generate")
|
| 309 |
+
video = gr.Video()
|
| 310 |
+
with gr.Accordion(open=False):
|
| 311 |
+
seed = gr.Slider(label="Seed", value=42, randomize=True, minimum=0, maximum=max_64_bit_int)
|
| 312 |
+
randomize_seed = gr.Checkbox("Randomize seed")
|
| 313 |
+
|
| 314 |
+
|
| 315 |
image.upload(fn=resize_image, inputs=image, outputs=image, queue=False)
|
| 316 |
+
generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed], outputs=[video, seed], api_name="video")
|
| 317 |
+
|
| 318 |
if __name__ == "__main__":
|
| 319 |
demo.launch(share=True)
|