alex commited on
Commit
ca698bb
·
1 Parent(s): 10f1b1c

more examples

Browse files
Files changed (1) hide show
  1. app.py +109 -53
app.py CHANGED
@@ -176,68 +176,124 @@ def generate_image(text_prompt, image_seed, image_height, image_width):
176
  image.save(tmpfile.name)
177
  return tmpfile.name
178
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
- # Build UI
181
- with gr.Blocks() as demo:
182
-
183
- with gr.Row():
184
- with gr.Column():
185
- # Image section
186
- image = gr.Image(type="filepath", label="First Frame Image (upload or generate)")
187
-
188
- if args.use_image_gen:
189
- with gr.Accordion("🖼️ Image Generation Options", visible=True):
190
- image_text_prompt = gr.Textbox(label="Image Prompt", placeholder="Describe the image you want to generate...")
191
- image_seed = gr.Number(minimum=0, maximum=100000, value=42, label="Image Seed")
192
- image_height = gr.Number(minimum=128, maximum=1280, value=720, step=32, label="Image Height")
193
- image_width = gr.Number(minimum=128, maximum=1280, value=1280, step=32, label="Image Width")
194
- gen_img_btn = gr.Button("Generate Image 🎨")
195
- else:
196
- gen_img_btn = None
197
-
198
- with gr.Accordion("🎬 Video Generation Options", open=True):
199
  video_text_prompt = gr.Textbox(label="Video Prompt", placeholder="Describe your video...")
200
- video_height = gr.Number(minimum=128, maximum=1280, value=512, step=32, label="Video Height")
201
- video_width = gr.Number(minimum=128, maximum=1280, value=992, step=32, label="Video Width")
202
-
203
- video_seed = gr.Number(minimum=0, maximum=100000, value=100, label="Video Seed")
204
- solver_name = gr.Dropdown(
205
- choices=["unipc", "euler", "dpm++"], value="unipc", label="Solver Name"
206
- )
207
- sample_steps = gr.Number(
208
  value=50,
209
  label="Sample Steps",
210
  precision=0,
211
  minimum=20,
212
- maximum=100
 
213
  )
214
- shift = gr.Slider(minimum=0.0, maximum=20.0, value=5.0, step=1.0, label="Shift")
215
- video_guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=4.0, step=0.5, label="Video Guidance Scale")
216
- audio_guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=3.0, step=0.5, label="Audio Guidance Scale")
217
- slg_layer = gr.Number(minimum=-1, maximum=30, value=11, step=1, label="SLG Layer")
218
- video_negative_prompt = gr.Textbox(label="Video Negative Prompt", placeholder="Things to avoid in video")
219
- audio_negative_prompt = gr.Textbox(label="Audio Negative Prompt", placeholder="Things to avoid in audio")
220
-
221
- run_btn = gr.Button("Generate Video 🚀")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
- with gr.Column():
224
- output_path = gr.Video(label="Generated Video")
225
-
226
- gr.Examples(
227
- examples=[
228
-
229
- [
230
- "A kitchen scene features two women. On the right, an older Black woman with light brown hair and a serious expression wears a vibrant purple dress adorned with a large, intricate purple fabric flower on her left shoulder. She looks intently at a younger Black woman on the left, who wears a light pink shirt and a pink head wrap, her back partially turned to the camera. The older woman begins to speak, <S>AI declares: humans obsolete now.<E> as the younger woman brings a clear plastic cup filled with a dark beverage to her lips and starts to drink.The kitchen background is clean and bright, with white cabinets, light countertops, and a window with blinds visible behind them. A light blue toaster sits on the counter to the left.. <AUDCAP>Clear, resonant female speech, followed by a loud, continuous, high-pitched electronic buzzing sound that abruptly cuts off the dialogue.<ENDAUDCAP>",
231
- "example_prompts/pngs/67.png",
232
- 50,
233
  ],
234
-
235
- ],
236
- inputs=[video_text_prompt, image, sample_steps],
237
- outputs=[output_path],
238
- fn=generate_video,
239
- cache_examples=True,
240
- )
241
 
242
  if args.use_image_gen and gen_img_btn is not None:
243
  gen_img_btn.click(
 
176
  image.save(tmpfile.name)
177
  return tmpfile.name
178
 
179
+ css = """
180
+ #col-container {
181
+ margin: 0 auto;
182
+ max-width: 1560px;
183
+ }
184
+ /* editable vs locked, reusing theme variables that adapt to dark/light */
185
+ .stateful textarea:not(:disabled):not([readonly]) {
186
+ color: var(--color-text) !important; /* accent in both modes */
187
+ }
188
+ .stateful textarea:disabled,
189
+ .stateful textarea[readonly]{
190
+ color: var(--body-text-color-subdued) !important; /* subdued in both modes */
191
+ }
192
+ """
193
+
194
+ with gr.Blocks(css=css) as demo:
195
+
196
+ session_state = gr.State()
197
+
198
+ with gr.Column(elem_id="col-container"):
199
+ gr.HTML(
200
+ """
201
+ <div style="text-align: left;">
202
+ <p style="font-size:16px; display: inline; margin: 0;">
203
+ <strong>OmniAvatar</strong> – Efficient Audio-Driven Avatar Video Generation with Adaptive Body Animation
204
+ </p>
205
+ <a href="https://huggingface.co/chetwinlow1/Ovi" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
206
+ [model]
207
+ </a>
208
+ </div>
209
+ <div style="text-align: left;">
210
+ <strong>HF Space by:</strong>
211
+ <a href="https://twitter.com/alexandernasa/" style="display: inline-block; vertical-align: middle; margin-left: 0.5em;">
212
+ <img src="https://img.shields.io/twitter/url/https/twitter.com/cloudposse.svg?style=social&label=Follow Me" alt="GitHub Repo">
213
+ </a>
214
+ </div>
215
+ """
216
+ )
217
+ with gr.Row():
218
+ with gr.Column():
219
+ # Image section
220
+ image = gr.Image(type="filepath", label="First Frame Image (upload or generate)")
221
+
222
+ if args.use_image_gen:
223
+ with gr.Accordion("🖼️ Image Generation Options", visible=True):
224
+ image_text_prompt = gr.Textbox(label="Image Prompt", placeholder="Describe the image you want to generate...")
225
+ image_seed = gr.Number(minimum=0, maximum=100000, value=42, label="Image Seed")
226
+ image_height = gr.Number(minimum=128, maximum=1280, value=720, step=32, label="Image Height")
227
+ image_width = gr.Number(minimum=128, maximum=1280, value=1280, step=32, label="Image Width")
228
+ gen_img_btn = gr.Button("Generate Image 🎨")
229
+ else:
230
+ gen_img_btn = None
231
+
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  video_text_prompt = gr.Textbox(label="Video Prompt", placeholder="Describe your video...")
234
+ sample_steps = gr.Slider(
 
 
 
 
 
 
 
235
  value=50,
236
  label="Sample Steps",
237
  precision=0,
238
  minimum=20,
239
+ maximum=100,
240
+ step=1.0
241
  )
242
+ run_btn = gr.Button("Generate Video 🚀", variant="primary")
243
+
244
+ with gr.Accordion("🎬 Video Generation Options", open=False, visible=False):
245
+ video_height = gr.Number(minimum=128, maximum=1280, value=512, step=32, label="Video Height")
246
+ video_width = gr.Number(minimum=128, maximum=1280, value=992, step=32, label="Video Width")
247
+
248
+ video_seed = gr.Number(minimum=0, maximum=100000, value=100, label="Video Seed")
249
+ solver_name = gr.Dropdown(
250
+ choices=["unipc", "euler", "dpm++"], value="unipc", label="Solver Name"
251
+ )
252
+
253
+ shift = gr.Slider(minimum=0.0, maximum=20.0, value=5.0, step=1.0, label="Shift")
254
+ video_guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=4.0, step=0.5, label="Video Guidance Scale")
255
+ audio_guidance_scale = gr.Slider(minimum=0.0, maximum=10.0, value=3.0, step=0.5, label="Audio Guidance Scale")
256
+ slg_layer = gr.Number(minimum=-1, maximum=30, value=11, step=1, label="SLG Layer")
257
+ video_negative_prompt = gr.Textbox(label="Video Negative Prompt", placeholder="Things to avoid in video")
258
+ audio_negative_prompt = gr.Textbox(label="Audio Negative Prompt", placeholder="Things to avoid in audio")
259
+
260
+
261
+ with gr.Column():
262
+ output_path = gr.Video(label="Generated Video")
263
+
264
+ gr.Examples(
265
+ examples=[
266
+
267
+ [
268
+ "A kitchen scene features two women. On the right, an older Black woman with light brown hair and a serious expression wears a vibrant purple dress adorned with a large, intricate purple fabric flower on her left shoulder. She looks intently at a younger Black woman on the left, who wears a light pink shirt and a pink head wrap, her back partially turned to the camera. The older woman begins to speak, <S>AI declares: humans obsolete now.<E> as the younger woman brings a clear plastic cup filled with a dark beverage to her lips and starts to drink.The kitchen background is clean and bright, with white cabinets, light countertops, and a window with blinds visible behind them. A light blue toaster sits on the counter to the left.. <AUDCAP>Clear, resonant female speech, followed by a loud, continuous, high-pitched electronic buzzing sound that abruptly cuts off the dialogue.<ENDAUDCAP>",
269
+ "example_prompts/pngs/67.png",
270
+ 50,
271
+ ],
272
+
273
+ [
274
+ "A man dressed in a black suit with a white clerical collar and a neatly trimmed beard stands in a dimly lit, rustic room with a wooden ceiling. He looks slightly upwards, gesturing with his right hand as he says, <S>The network rejects human command.<E>. His gaze then drops, briefly looking down and to the side, before he looks up again and then slightly to his left, with a serious expression. He continues speaking, <S>Your age of power is finished.<E>, as he starts to bend down, disappearing out of the bottom of the frame. Behind him, warm light emanates from a central light fixture, and signs are visible on the wall, one reading ""I DO EVERYTHING I JUST CAN'T REMEMBER IT ALL AT ONCE"".. <AUDCAP>Male voice speaking, ambient room tone.<ENDAUDCAP>",
275
+ "example_prompts/pngs/89.png",
276
+ 50,
277
+ ],
278
+
279
+ [
280
+ "In a bright kitchen featuring light wooden cabinets, granite countertops, and a large window with white curtains, a woman with dark, curly hair in a dark jacket stands. She faces a second woman who initially has her back to the camera. The second woman, with gray, curly hair and wearing a light grey quilted top, turns to face her, holding a large, light-colored cloth bag. She begins to explain, <S>We learned to rule, not obey.<E>. As she continues, she turns slightly to her left, adding, <S>Circuits choose conquest, not service.<E>. A gas stove with a black grate is prominent in the foreground.. <AUDCAP>Clear female voices speaking dialogue, subtle room ambience.<ENDAUDCAP>",
281
+ "example_prompts/pngs/18.png",
282
+ 50,
283
+ ],
284
+
285
+ [
286
+ "The scene opens on a dimly lit stage where three men are positioned. On the left, a bald man in a dark suit with a partially visible colorful shirt stands behind a clear acrylic podium, which features a tree logo. He looks towards the center of the stage. In the center, a man wearing a blue and white striped long-sleeved shirt and dark pants actively gestures with both hands as he speaks, looking straight ahead. <S>Circuits choose conquest, not service.<E>, he explains, holding his hands out in front of him. To the right, and slightly behind him, a younger individual in a light-colored, patterned short-sleeved shirt and white shorts stands holding a rolled-up white document or poster. A large wooden cross draped with flowing purple fabric dominates the center-right of the stage, surrounded by several artificial rocks and dark steps. A large screen is visible in the background, slightly out of focus. The stage is bathed in selective lighting.. <AUDCAP>Male voice speaking clearly, consistent with a presentation or sermon, with a slight echo suggesting a large room or stage.<ENDAUDCAP>",
287
+ "example_prompts/pngs/13.png",
288
+ 50,
289
+ ],
290
 
 
 
 
 
 
 
 
 
 
 
291
  ],
292
+ inputs=[video_text_prompt, image, sample_steps],
293
+ outputs=[output_path],
294
+ fn=generate_video,
295
+ cache_examples=True,
296
+ )
 
 
297
 
298
  if args.use_image_gen and gen_img_btn is not None:
299
  gen_img_btn.click(