Ovi

Runtime error

App Files Files Community

alex commited on Oct 3

Commit

ca698bb

1 Parent(s): 10f1b1c

more examples

Browse files

Files changed (1) hide show

app.py +109 -53

app.py CHANGED Viewed

@@ -176,68 +176,124 @@ def generate_image(text_prompt, image_seed, image_height, image_width):
     image.save(tmpfile.name)
     return tmpfile.name
-# Build UI
-with gr.Blocks() as demo:
-    with gr.Row():
-        with gr.Column():
-            # Image section
-            image = gr.Image(type="filepath", label="First Frame Image (upload or generate)")
-            if args.use_image_gen:
-                with gr.Accordion("🖼️ Image Generation Options", visible=True):
-                    image_text_prompt = gr.Textbox(label="Image Prompt", placeholder="Describe the image you want to generate...")
-                    image_seed = gr.Number(minimum=0, maximum=100000, value=42, label="Image Seed")
-                    image_height = gr.Number(minimum=128, maximum=1280, value=720, step=32, label="Image Height")
-                    image_width = gr.Number(minimum=128, maximum=1280, value=1280, step=32, label="Image Width")
-                    gen_img_btn = gr.Button("Generate Image 🎨")
-            else:
-                gen_img_btn = None
-            with gr.Accordion("🎬 Video Generation Options", open=True):
                 video_text_prompt = gr.Textbox(label="Video Prompt", placeholder="Describe your video...")
-                video_height = gr.Number(minimum=128, maximum=1280, value=512, step=32, label="Video Height")
-                video_width = gr.Number(minimum=128, maximum=1280, value=992, step=32, label="Video Width")
-                video_seed = gr.Number(minimum=0, maximum=100000, value=100, label="Video Seed")
-                solver_name = gr.Dropdown(
-                    choices=["unipc", "euler", "dpm++"], value="unipc", label="Solver Name"
-                )
-                sample_steps = gr.Number(
                     value=50,
                     label="Sample Steps",
                     precision=0,
                     minimum=20,
-                    maximum=100
                 )
-                shift = gr.Slider(minimum=0.0, maximum=20.0, value=5.0, step=1.0, label="Shift")
-                video_guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=4.0, step=0.5, label="Video Guidance Scale")
-                audio_guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=3.0, step=0.5, label="Audio Guidance Scale")
-                slg_layer = gr.Number(minimum=-1, maximum=30, value=11, step=1, label="SLG Layer")
-                video_negative_prompt = gr.Textbox(label="Video Negative Prompt", placeholder="Things to avoid in video")
-                audio_negative_prompt = gr.Textbox(label="Audio Negative Prompt", placeholder="Things to avoid in audio")
-                run_btn = gr.Button("Generate Video 🚀")
-        with gr.Column():
-            output_path = gr.Video(label="Generated Video")
-            gr.Examples(
-                examples=[
-                    [
-                        "A kitchen scene features two women. On the right, an older Black woman with light brown hair and a serious expression wears a vibrant purple dress adorned with a large, intricate purple fabric flower on her left shoulder. She looks intently at a younger Black woman on the left, who wears a light pink shirt and a pink head wrap, her back partially turned to the camera. The older woman begins to speak, <S>AI declares: humans obsolete now.<E> as the younger woman brings a clear plastic cup filled with a dark beverage to her lips and starts to drink.The kitchen background is clean and bright, with white cabinets, light countertops, and a window with blinds visible behind them. A light blue toaster sits on the counter to the left.. <AUDCAP>Clear, resonant female speech, followed by a loud, continuous, high-pitched electronic buzzing sound that abruptly cuts off the dialogue.<ENDAUDCAP>",
-                        "example_prompts/pngs/67.png",
-                        50,
                     ],
-                ],
-                inputs=[video_text_prompt, image, sample_steps],
-                outputs=[output_path],
-                fn=generate_video,
-                cache_examples=True,
-            )
     if args.use_image_gen and gen_img_btn is not None:
         gen_img_btn.click(

     image.save(tmpfile.name)
     return tmpfile.name
+css = """
+    #col-container {
+        margin: 0 auto;
+        max-width: 1560px;
+    }
+    /* editable vs locked, reusing theme variables that adapt to dark/light */
+    .stateful textarea:not(:disabled):not([readonly]) {
+      color: var(--color-text) !important;            /* accent in both modes */
+    }
+    .stateful textarea:disabled,
+    .stateful textarea[readonly]{
+      color: var(--body-text-color-subdued) !important; /* subdued in both modes */
+    }
+    """
+with gr.Blocks(css=css) as demo:
+    session_state = gr.State()
+    with gr.Column(elem_id="col-container"):
+        gr.HTML(
+            """
+            <div style="text-align: left;">
+                <p style="font-size:16px; display: inline; margin: 0;">
+                    <strong>OmniAvatar</strong> – Efficient Audio-Driven Avatar Video Generation with Adaptive Body Animation
+                </p>
+                <a href="https://huggingface.co/chetwinlow1/Ovi" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
+                    [model]
+                </a>
+            </div>
+            <div style="text-align: left;">
+                <strong>HF Space by:</strong>
+                <a href="https://twitter.com/alexandernasa/" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
+                    <img src="https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow Me" alt="GitHub Repo">
+                </a>
+            </div>
+            """
+        )
+        with gr.Row():
+            with gr.Column():
+                # Image section
+                image = gr.Image(type="filepath", label="First Frame Image (upload or generate)")
+                if args.use_image_gen:
+                    with gr.Accordion("🖼️ Image Generation Options", visible=True):
+                        image_text_prompt = gr.Textbox(label="Image Prompt", placeholder="Describe the image you want to generate...")
+                        image_seed = gr.Number(minimum=0, maximum=100000, value=42, label="Image Seed")
+                        image_height = gr.Number(minimum=128, maximum=1280, value=720, step=32, label="Image Height")
+                        image_width = gr.Number(minimum=128, maximum=1280, value=1280, step=32, label="Image Width")
+                        gen_img_btn = gr.Button("Generate Image 🎨")
+                else:
+                    gen_img_btn = None
                 video_text_prompt = gr.Textbox(label="Video Prompt", placeholder="Describe your video...")
+                sample_steps = gr.Slider(
                     value=50,
                     label="Sample Steps",
                     precision=0,
                     minimum=20,
+                    maximum=100,
+                    step=1.0
                 )
+                run_btn = gr.Button("Generate Video 🚀", variant="primary")
+                with gr.Accordion("🎬 Video Generation Options", open=False, visible=False):
+                    video_height = gr.Number(minimum=128, maximum=1280, value=512, step=32, label="Video Height")
+                    video_width = gr.Number(minimum=128, maximum=1280, value=992, step=32, label="Video Width")
+                    video_seed = gr.Number(minimum=0, maximum=100000, value=100, label="Video Seed")
+                    solver_name = gr.Dropdown(
+                        choices=["unipc", "euler", "dpm++"], value="unipc", label="Solver Name"
+                    )
+                    shift = gr.Slider(minimum=0.0, maximum=20.0, value=5.0, step=1.0, label="Shift")
+                    video_guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=4.0, step=0.5, label="Video Guidance Scale")
+                    audio_guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=3.0, step=0.5, label="Audio Guidance Scale")
+                    slg_layer = gr.Number(minimum=-1, maximum=30, value=11, step=1, label="SLG Layer")
+                    video_negative_prompt = gr.Textbox(label="Video Negative Prompt", placeholder="Things to avoid in video")
+                    audio_negative_prompt = gr.Textbox(label="Audio Negative Prompt", placeholder="Things to avoid in audio")
+            with gr.Column():
+                output_path = gr.Video(label="Generated Video")
+                gr.Examples(
+                    examples=[
+                        [
+                            "A kitchen scene features two women. On the right, an older Black woman with light brown hair and a serious expression wears a vibrant purple dress adorned with a large, intricate purple fabric flower on her left shoulder. She looks intently at a younger Black woman on the left, who wears a light pink shirt and a pink head wrap, her back partially turned to the camera. The older woman begins to speak, <S>AI declares: humans obsolete now.<E> as the younger woman brings a clear plastic cup filled with a dark beverage to her lips and starts to drink.The kitchen background is clean and bright, with white cabinets, light countertops, and a window with blinds visible behind them. A light blue toaster sits on the counter to the left.. <AUDCAP>Clear, resonant female speech, followed by a loud, continuous, high-pitched electronic buzzing sound that abruptly cuts off the dialogue.<ENDAUDCAP>",
+                            "example_prompts/pngs/67.png",
+                            50,
+                        ],
+                        [
+                            "A man dressed in a black suit with a white clerical collar and a neatly trimmed beard stands in a dimly lit, rustic room with a wooden ceiling. He looks slightly upwards, gesturing with his right hand as he says, <S>The network rejects human command.<E>. His gaze then drops, briefly looking down and to the side, before he looks up again and then slightly to his left, with a serious expression. He continues speaking, <S>Your age of power is finished.<E>, as he starts to bend down, disappearing out of the bottom of the frame. Behind him, warm light emanates from a central light fixture, and signs are visible on the wall, one reading ""I DO EVERYTHING I JUST CAN'T REMEMBER IT ALL AT ONCE"".. <AUDCAP>Male voice speaking, ambient room tone.<ENDAUDCAP>",
+                            "example_prompts/pngs/89.png",
+                            50,
+                        ],
+                        [
+                            "In a bright kitchen featuring light wooden cabinets, granite countertops, and a large window with white curtains, a woman with dark, curly hair in a dark jacket stands. She faces a second woman who initially has her back to the camera. The second woman, with gray, curly hair and wearing a light grey quilted top, turns to face her, holding a large, light-colored cloth bag. She begins to explain, <S>We learned to rule, not obey.<E>. As she continues, she turns slightly to her left, adding, <S>Circuits choose conquest, not service.<E>. A gas stove with a black grate is prominent in the foreground.. <AUDCAP>Clear female voices speaking dialogue, subtle room ambience.<ENDAUDCAP>",
+                            "example_prompts/pngs/18.png",
+                            50,
+                        ],
+                        [
+                            "The scene opens on a dimly lit stage where three men are positioned. On the left, a bald man in a dark suit with a partially visible colorful shirt stands behind a clear acrylic podium, which features a tree logo. He looks towards the center of the stage. In the center, a man wearing a blue and white striped long-sleeved shirt and dark pants actively gestures with both hands as he speaks, looking straight ahead. <S>Circuits choose conquest, not service.<E>, he explains, holding his hands out in front of him. To the right, and slightly behind him, a younger individual in a light-colored, patterned short-sleeved shirt and white shorts stands holding a rolled-up white document or poster. A large wooden cross draped with flowing purple fabric dominates the center-right of the stage, surrounded by several artificial rocks and dark steps. A large screen is visible in the background, slightly out of focus. The stage is bathed in selective lighting.. <AUDCAP>Male voice speaking clearly, consistent with a presentation or sermon, with a slight echo suggesting a large room or stage.<ENDAUDCAP>",
+                            "example_prompts/pngs/13.png",
+                            50,
+                        ],
                     ],
+                    inputs=[video_text_prompt, image, sample_steps],
+                    outputs=[output_path],
+                    fn=generate_video,
+                    cache_examples=True,
+                )
     if args.use_image_gen and gen_img_btn is not None:
         gen_img_btn.click(