Files changed (1) hide show
  1. app.py +20 -520
app.py CHANGED
@@ -1,521 +1,21 @@
1
  import gradio as gr
2
- import spaces
3
- import time
4
- import torch
5
- from diffusers import AutoencoderKL, TCDScheduler
6
- from diffusers.models.model_loading_utils import load_state_dict
7
- from gradio_imageslider import ImageSlider
8
- from huggingface_hub import hf_hub_download
9
-
10
- from controlnet_union import ControlNetModel_Union
11
- from pipeline_fill_sd_xl import StableDiffusionXLFillPipeline
12
-
13
- from PIL import Image, ImageDraw
14
- import numpy as np
15
-
16
- config_file = hf_hub_download(
17
- "xinsir/controlnet-union-sdxl-1.0",
18
- filename="config_promax.json",
19
- )
20
-
21
- config = ControlNetModel_Union.load_config(config_file)
22
- controlnet_model = ControlNetModel_Union.from_config(config)
23
- model_file = hf_hub_download(
24
- "xinsir/controlnet-union-sdxl-1.0",
25
- filename="diffusion_pytorch_model_promax.safetensors",
26
- )
27
- state_dict = load_state_dict(model_file)
28
- model, _, _, _, _ = ControlNetModel_Union._load_pretrained_model(
29
- controlnet_model, state_dict, model_file, "xinsir/controlnet-union-sdxl-1.0"
30
- )
31
- model.to(device="cuda", dtype=torch.float16)
32
-
33
- vae = AutoencoderKL.from_pretrained(
34
- "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16
35
- ).to("cuda")
36
-
37
- pipe = StableDiffusionXLFillPipeline.from_pretrained(
38
- "SG161222/RealVisXL_V5.0_Lightning",
39
- torch_dtype=torch.float16,
40
- vae=vae,
41
- controlnet=model,
42
- variant="fp16",
43
- ).to("cuda")
44
-
45
- pipe.scheduler = TCDScheduler.from_config(pipe.scheduler.config)
46
-
47
-
48
- def can_expand(source_width, source_height, target_width, target_height, alignment):
49
- """Checks if the image can be expanded based on the alignment."""
50
- if alignment in ("Left", "Right") and source_width >= target_width:
51
- return False
52
- if alignment in ("Top", "Bottom") and source_height >= target_height:
53
- return False
54
- return True
55
-
56
- def prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
57
- target_size = (width, height)
58
-
59
- # Calculate the scaling factor to fit the image within the target size
60
- scale_factor = min(target_size[0] / image.width, target_size[1] / image.height)
61
- new_width = int(image.width * scale_factor)
62
- new_height = int(image.height * scale_factor)
63
-
64
- # Resize the source image to fit within target size
65
- source = image.resize((new_width, new_height), Image.LANCZOS)
66
-
67
- # Apply resize option using percentages
68
- if resize_option == "Full":
69
- resize_percentage = 100
70
- elif resize_option == "50%":
71
- resize_percentage = 50
72
- elif resize_option == "33%":
73
- resize_percentage = 33
74
- elif resize_option == "25%":
75
- resize_percentage = 25
76
- else: # Custom
77
- resize_percentage = custom_resize_percentage
78
-
79
- # Calculate new dimensions based on percentage
80
- resize_factor = resize_percentage / 100
81
- new_width = int(source.width * resize_factor)
82
- new_height = int(source.height * resize_factor)
83
-
84
- # Ensure minimum size of 64 pixels
85
- new_width = max(new_width, 64)
86
- new_height = max(new_height, 64)
87
-
88
- # Resize the image
89
- source = source.resize((new_width, new_height), Image.LANCZOS)
90
-
91
- # Calculate the overlap in pixels based on the percentage
92
- overlap_x = int(new_width * (overlap_percentage / 100))
93
- overlap_y = int(new_height * (overlap_percentage / 100))
94
-
95
- # Ensure minimum overlap of 1 pixel
96
- overlap_x = max(overlap_x, 1)
97
- overlap_y = max(overlap_y, 1)
98
-
99
- # Calculate margins based on alignment
100
- if alignment == "Middle":
101
- margin_x = (target_size[0] - new_width) // 2
102
- margin_y = (target_size[1] - new_height) // 2
103
- elif alignment == "Left":
104
- margin_x = 0
105
- margin_y = (target_size[1] - new_height) // 2
106
- elif alignment == "Right":
107
- margin_x = target_size[0] - new_width
108
- margin_y = (target_size[1] - new_height) // 2
109
- elif alignment == "Top":
110
- margin_x = (target_size[0] - new_width) // 2
111
- margin_y = 0
112
- elif alignment == "Bottom":
113
- margin_x = (target_size[0] - new_width) // 2
114
- margin_y = target_size[1] - new_height
115
-
116
- # Adjust margins to eliminate gaps
117
- margin_x = max(0, min(margin_x, target_size[0] - new_width))
118
- margin_y = max(0, min(margin_y, target_size[1] - new_height))
119
-
120
- # Create a new background image and paste the resized source image
121
- background = Image.new('RGB', target_size, (255, 255, 255))
122
- background.paste(source, (margin_x, margin_y))
123
-
124
- # Create the mask
125
- mask = Image.new('L', target_size, 255)
126
- mask_draw = ImageDraw.Draw(mask)
127
-
128
- # Calculate overlap areas
129
- white_gaps_patch = 2
130
-
131
- left_overlap = margin_x + overlap_x if overlap_left else margin_x + white_gaps_patch
132
- right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width - white_gaps_patch
133
- top_overlap = margin_y + overlap_y if overlap_top else margin_y + white_gaps_patch
134
- bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height - white_gaps_patch
135
-
136
- if alignment == "Left":
137
- left_overlap = margin_x + overlap_x if overlap_left else margin_x
138
- elif alignment == "Right":
139
- right_overlap = margin_x + new_width - overlap_x if overlap_right else margin_x + new_width
140
- elif alignment == "Top":
141
- top_overlap = margin_y + overlap_y if overlap_top else margin_y
142
- elif alignment == "Bottom":
143
- bottom_overlap = margin_y + new_height - overlap_y if overlap_bottom else margin_y + new_height
144
-
145
-
146
- # Draw the mask
147
- mask_draw.rectangle([
148
- (left_overlap, top_overlap),
149
- (right_overlap, bottom_overlap)
150
- ], fill=0)
151
-
152
- return background, mask
153
-
154
- def preview_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom):
155
- background, mask = prepare_image_and_mask(image, width, height, overlap_percentage, resize_option, custom_resize_percentage, alignment, overlap_left, overlap_right, overlap_top, overlap_bottom)
156
-
157
- # Create a preview image showing the mask
158
- preview = background.copy().convert('RGBA')
159
-
160
- # Create a semi-transparent red overlay
161
- red_overlay = Image.new('RGBA', background.size, (255, 0, 0, 64)) # Reduced alpha to 64 (25% opacity)
162
-
163
- # Convert black pixels in the mask to semi-transparent red
164
- red_mask = Image.new('RGBA', background.size, (0, 0, 0, 0))
165
- red_mask.paste(red_overlay, (0, 0), mask)
166
-
167
- # Overlay the red mask on the background
168
- preview = Image.alpha_composite(preview, red_mask)
169
-
170
- return preview
171
-
172
- @spaces.GPU()
173
- def infer(
174
- image,
175
- width,
176
- height,
177
- overlap_percentage,
178
- num_inference_steps,
179
- resize_option,
180
- custom_resize_percentage,
181
- prompt_input,
182
- alignment,
183
- overlap_left,
184
- overlap_right,
185
- overlap_top,
186
- overlap_bottom
187
- ):
188
- """
189
- Generate an outpainted image using Stable Diffusion XL with ControlNet guidance.
190
-
191
- This function performs intelligent image outpainting by expanding the input image
192
- according to the specified target dimensions and alignment, generating new content
193
- guided by a textual prompt. It uses a ControlNet-enabled diffusion pipeline to ensure
194
- coherent image extension.
195
-
196
- Args:
197
- image (PIL.Image): The input image to be outpainted.
198
- width (int): The target width of the output image.
199
- height (int): The target height of the output image.
200
- overlap_percentage (int): Percentage of overlap between original and outpainted regions for seamless blending.
201
- num_inference_steps (int): Number of inference steps for image generation. Higher values yield better results.
202
- resize_option (str): Predefined or custom percentage to resize the input image ("Full", "50%", "33%", "25%", or "Custom").
203
- custom_resize_percentage (int): Custom resize percentage if resize_option is "Custom".
204
- prompt_input (str): A text prompt describing desired content for the generated region.
205
- alignment (str): Alignment of the original image within the canvas ("Middle", "Left", "Right", "Top", "Bottom").
206
- overlap_left (bool): Whether to allow blending on the left edge.
207
- overlap_right (bool): Whether to allow blending on the right edge.
208
- overlap_top (bool): Whether to allow blending on the top edge.
209
- overlap_bottom (bool): Whether to allow blending on the bottom edge.
210
-
211
- Yields:
212
- Tuple[PIL.Image, PIL.Image]:
213
- - The intermediate ControlNet input image (showing the masked area).
214
- - The final generated image with the inpainted region.
215
- """
216
- #gr.Info("10 seconds will be used from your daily ZeroGPU time credits.")
217
- background, mask = prepare_image_and_mask(
218
- image, width, height, overlap_percentage,
219
- resize_option, custom_resize_percentage, alignment,
220
- overlap_left, overlap_right, overlap_top, overlap_bottom
221
- )
222
-
223
- if not can_expand(background.width, background.height, width, height, alignment):
224
- alignment = "Middle"
225
-
226
- cnet_image = background.copy()
227
- cnet_image.paste(0, (0, 0), mask)
228
-
229
- final_prompt = f"{prompt_input} , high quality, 4k"
230
-
231
- (
232
- prompt_embeds,
233
- negative_prompt_embeds,
234
- pooled_prompt_embeds,
235
- negative_pooled_prompt_embeds,
236
- ) = pipe.encode_prompt(final_prompt, "cuda", True)
237
-
238
- for image in pipe(
239
- prompt_embeds=prompt_embeds,
240
- negative_prompt_embeds=negative_prompt_embeds,
241
- pooled_prompt_embeds=pooled_prompt_embeds,
242
- negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
243
- image=cnet_image,
244
- num_inference_steps=num_inference_steps
245
- ):
246
- yield cnet_image, image
247
-
248
- #time.sleep(1)
249
- #image = image.convert("RGBA")
250
- #cnet_image.paste(image, (0, 0), mask)
251
-
252
- #return background, cnet_image
253
-
254
-
255
- def clear_result():
256
- """Clears the result ImageSlider."""
257
- return gr.update(value=None)
258
-
259
- def preload_presets(target_ratio, ui_width, ui_height):
260
- """Updates the width and height sliders based on the selected aspect ratio."""
261
- if target_ratio == "9:16":
262
- changed_width = 720
263
- changed_height = 1280
264
- return changed_width, changed_height, gr.update()
265
- elif target_ratio == "16:9":
266
- changed_width = 1280
267
- changed_height = 720
268
- return changed_width, changed_height, gr.update()
269
- elif target_ratio == "1:1":
270
- changed_width = 1024
271
- changed_height = 1024
272
- return changed_width, changed_height, gr.update()
273
- elif target_ratio == "Custom":
274
- return ui_width, ui_height, gr.update(open=True)
275
-
276
- def select_the_right_preset(user_width, user_height):
277
- if user_width == 720 and user_height == 1280:
278
- return "9:16"
279
- elif user_width == 1280 and user_height == 720:
280
- return "16:9"
281
- elif user_width == 1024 and user_height == 1024:
282
- return "1:1"
283
- else:
284
- return "Custom"
285
-
286
- def toggle_custom_resize_slider(resize_option):
287
- return gr.update(visible=(resize_option == "Custom"))
288
-
289
- def update_history(new_image, history):
290
- """Updates the history gallery with the new image."""
291
- time.sleep(1)
292
- if history is None:
293
- history = []
294
- history.insert(0, new_image)
295
- return history
296
-
297
- css = """
298
- .gradio-container {
299
- max-width: 1200px !important;
300
- margin: 0 auto;
301
- }
302
- """
303
-
304
- title = """<h1 align="center">Diffusers Image Outpaint</h1>
305
- <div align="center">Drop an image you would like to extend, pick your expected ratio and hit Generate.</div>
306
- <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
307
- <p style="display: flex;gap: 6px;">
308
- <a href="https://huggingface.co/spaces/fffiloni/diffusers-image-outpaint?duplicate=true">
309
- <img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-md.svg" alt="Duplicate this Space">
310
- </a> to skip the queue and enjoy faster inference on the GPU of your choice
311
- </p>
312
- </div>
313
- """
314
-
315
- with gr.Blocks(css=css) as demo:
316
- with gr.Column():
317
- gr.HTML(title)
318
-
319
- with gr.Row():
320
- with gr.Column():
321
- input_image = gr.Image(
322
- type="pil",
323
- label="Input Image"
324
- )
325
-
326
- with gr.Row():
327
- with gr.Column(scale=2):
328
- prompt_input = gr.Textbox(label="Prompt (Optional)")
329
- with gr.Column(scale=1):
330
- run_button = gr.Button("Generate")
331
-
332
- with gr.Row():
333
- target_ratio = gr.Radio(
334
- label="Expected Ratio",
335
- choices=["9:16", "16:9", "1:1", "Custom"],
336
- value="9:16",
337
- scale=2
338
- )
339
-
340
- alignment_dropdown = gr.Dropdown(
341
- choices=["Middle", "Left", "Right", "Top", "Bottom"],
342
- value="Middle",
343
- label="Alignment"
344
- )
345
-
346
- with gr.Accordion(label="Advanced settings", open=False) as settings_panel:
347
- with gr.Column():
348
- with gr.Row():
349
- width_slider = gr.Slider(
350
- label="Target Width",
351
- minimum=720,
352
- maximum=1536,
353
- step=8,
354
- value=720, # Set a default value
355
- )
356
- height_slider = gr.Slider(
357
- label="Target Height",
358
- minimum=720,
359
- maximum=1536,
360
- step=8,
361
- value=1280, # Set a default value
362
- )
363
-
364
- num_inference_steps = gr.Slider(label="Steps", minimum=4, maximum=12, step=1, value=8)
365
- with gr.Group():
366
- overlap_percentage = gr.Slider(
367
- label="Mask overlap (%)",
368
- minimum=1,
369
- maximum=50,
370
- value=10,
371
- step=1
372
- )
373
- with gr.Row():
374
- overlap_top = gr.Checkbox(label="Overlap Top", value=True)
375
- overlap_right = gr.Checkbox(label="Overlap Right", value=True)
376
- with gr.Row():
377
- overlap_left = gr.Checkbox(label="Overlap Left", value=True)
378
- overlap_bottom = gr.Checkbox(label="Overlap Bottom", value=True)
379
- with gr.Row():
380
- resize_option = gr.Radio(
381
- label="Resize input image",
382
- choices=["Full", "50%", "33%", "25%", "Custom"],
383
- value="Full"
384
- )
385
- custom_resize_percentage = gr.Slider(
386
- label="Custom resize (%)",
387
- minimum=1,
388
- maximum=100,
389
- step=1,
390
- value=50,
391
- visible=False
392
- )
393
-
394
- with gr.Column():
395
- preview_button = gr.Button("Preview alignment and mask")
396
-
397
-
398
- gr.Examples(
399
- examples=[
400
- ["./examples/example_1.webp", 1280, 720, "Middle"],
401
- ["./examples/example_2.jpg", 1440, 810, "Left"],
402
- ["./examples/example_3.jpg", 1024, 1024, "Top"],
403
- ["./examples/example_3.jpg", 1024, 1024, "Bottom"],
404
- ],
405
- inputs=[input_image, width_slider, height_slider, alignment_dropdown],
406
- )
407
-
408
-
409
-
410
- with gr.Column():
411
- result = ImageSlider(
412
- interactive=False,
413
- label="Generated Image",
414
- )
415
- use_as_input_button = gr.Button("Use as Input Image", visible=False)
416
-
417
- history_gallery = gr.Gallery(label="History", columns=6, object_fit="contain", interactive=False)
418
- preview_image = gr.Image(label="Preview")
419
-
420
-
421
-
422
- def use_output_as_input(output_image):
423
- """Sets the generated output as the new input image."""
424
- return gr.update(value=output_image[1])
425
-
426
- use_as_input_button.click(
427
- fn=use_output_as_input,
428
- inputs=[result],
429
- outputs=[input_image],
430
- show_api=False
431
- )
432
-
433
- target_ratio.change(
434
- fn=preload_presets,
435
- inputs=[target_ratio, width_slider, height_slider],
436
- outputs=[width_slider, height_slider, settings_panel],
437
- queue=False,
438
- show_api=False
439
- )
440
-
441
- width_slider.change(
442
- fn=select_the_right_preset,
443
- inputs=[width_slider, height_slider],
444
- outputs=[target_ratio],
445
- queue=False,
446
- show_api=False
447
- )
448
-
449
- height_slider.change(
450
- fn=select_the_right_preset,
451
- inputs=[width_slider, height_slider],
452
- outputs=[target_ratio],
453
- queue=False,
454
- show_api=False
455
- )
456
-
457
- resize_option.change(
458
- fn=toggle_custom_resize_slider,
459
- inputs=[resize_option],
460
- outputs=[custom_resize_percentage],
461
- queue=False,
462
- show_api=False
463
- )
464
-
465
- run_button.click( # Clear the result
466
- fn=clear_result,
467
- inputs=None,
468
- outputs=result,
469
- show_api=False
470
- ).then( # Generate the new image
471
- fn=infer,
472
- inputs=[input_image, width_slider, height_slider, overlap_percentage, num_inference_steps,
473
- resize_option, custom_resize_percentage, prompt_input, alignment_dropdown,
474
- overlap_left, overlap_right, overlap_top, overlap_bottom],
475
- outputs=result,
476
- ).then( # Show the "Use as Input Image" button
477
- fn=lambda: gr.update(visible=True),
478
- inputs=None,
479
- outputs=use_as_input_button,
480
- show_api=False
481
- ).then( # Update the history gallery
482
- fn=lambda x, history: update_history(x[1], history),
483
- inputs=[result, history_gallery],
484
- outputs=history_gallery,
485
- show_api=False
486
- )
487
-
488
- prompt_input.submit( # Clear the result
489
- fn=clear_result,
490
- inputs=None,
491
- outputs=result,
492
- show_api=False
493
- ).then( # Generate the new image
494
- fn=infer,
495
- inputs=[input_image, width_slider, height_slider, overlap_percentage, num_inference_steps,
496
- resize_option, custom_resize_percentage, prompt_input, alignment_dropdown,
497
- overlap_left, overlap_right, overlap_top, overlap_bottom],
498
- outputs=result,
499
- show_api=False
500
- ).then( # Update the history gallery
501
- fn=lambda x, history: update_history(x[1], history),
502
- inputs=[result, history_gallery],
503
- outputs=history_gallery,
504
- show_api=False
505
- ).then( # Show the "Use as Input Image" button
506
- fn=lambda: gr.update(visible=True),
507
- inputs=None,
508
- outputs=use_as_input_button,
509
- show_api=False
510
- )
511
-
512
- preview_button.click(
513
- fn=preview_image_and_mask,
514
- inputs=[input_image, width_slider, height_slider, overlap_percentage, resize_option, custom_resize_percentage, alignment_dropdown,
515
- overlap_left, overlap_right, overlap_top, overlap_bottom],
516
- outputs=preview_image,
517
- queue=False,
518
- show_api=False
519
- )
520
-
521
- demo.queue(max_size=12).launch(share=False, show_error=True, mcp_server=True)
 
1
  import gradio as gr
2
+ import os
3
+ import subprocess
4
+
5
+ def remove_subtitles(video):
6
+ input_path = video
7
+ output_path = "output.mp4"
8
+ cmd = f"python main.py --input_video {input_path} --output_video {output_path}"
9
+ subprocess.run(cmd, shell=True)
10
+ return output_path
11
+
12
+ with gr.Blocks() as demo:
13
+ gr.Markdown("## 🎬 Video Subtitle Remover (VSR)")
14
+ video_input = gr.Video(label="上传视频")
15
+ run_btn = gr.Button("去字幕")
16
+ video_output = gr.Video(label="处理后视频")
17
+ download = gr.File(label="下载结果")
18
+ run_btn.click(remove_subtitles, inputs=video_input, outputs=[video_output, download])
19
+
20
+ if __name__ == "__main__":
21
+ demo.launch()