BiRefNet_plus

Build error

App Files Files Community

ginipick commited on Feb 19

Commit

7776a83

verified ·

1 Parent(s): 76fcb7a

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -113

app.py CHANGED Viewed

@@ -6,7 +6,7 @@ import gradio as gr
 import spaces
 from glob import glob
-from typing import Tuple
 from PIL import Image
 from gradio_imageslider import ImageSlider
@@ -16,48 +16,40 @@ from torchvision import transforms
 import requests
 from io import BytesIO
 import zipfile
 torch.set_float32_matmul_precision('high')
 torch.jit.script = lambda f: f
 device = "cuda" if torch.cuda.is_available() else "cpu"
-### image_proc.py
 def refine_foreground(image, mask, r=90):
     if mask.size != image.size:
         mask = mask.resize(image.size)
-    image = np.array(image) / 255.0
-    mask = np.array(mask) / 255.0
-    estimated_foreground = FB_blur_fusion_foreground_estimator_2(image, mask, r=r)
     image_masked = Image.fromarray((estimated_foreground * 255.0).astype(np.uint8))
     return image_masked
 def FB_blur_fusion_foreground_estimator_2(image, alpha, r=90):
-    # Thanks to the source: https://github.com/Photoroom/fast-foreground-estimation
     alpha = alpha[:, :, None]
-    F, blur_B = FB_blur_fusion_foreground_estimator(
-        image, image, image, alpha, r)
     return FB_blur_fusion_foreground_estimator(image, F, blur_B, alpha, r=6)[0]
 def FB_blur_fusion_foreground_estimator(image, F, B, alpha, r=90):
     if isinstance(image, Image.Image):
         image = np.array(image) / 255.0
     blurred_alpha = cv2.blur(alpha, (r, r))[:, :, None]
     blurred_FA = cv2.blur(F * alpha, (r, r))
     blurred_F = blurred_FA / (blurred_alpha + 1e-5)
     blurred_B1A = cv2.blur(B * (1 - alpha), (r, r))
     blurred_B = blurred_B1A / ((1 - blurred_alpha) + 1e-5)
-    F = blurred_F + alpha * \
-        (image - alpha * blurred_F - (1 - alpha) * blurred_B)
     F = np.clip(F, 0, 1)
     return F, blurred_B
 class ImagePreprocessor():
     def __init__(self, resolution: Tuple[int, int] = (1024, 1024)) -> None:
         self.transform_image = transforms.Compose([
@@ -65,12 +57,10 @@ class ImagePreprocessor():
             transforms.ToTensor(),
             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
         ])
     def proc(self, image: Image.Image) -> torch.Tensor:
         image = self.transform_image(image)
         return image
 usage_to_weights_file = {
     'General': 'BiRefNet',
     'General-HR': 'BiRefNet_HR',
@@ -85,17 +75,19 @@ usage_to_weights_file = {
     'General-legacy': 'BiRefNet-legacy'
 }
-birefnet = AutoModelForImageSegmentation.from_pretrained('/'.join(('zhengpeng7', usage_to_weights_file['General'])), trust_remote_code=True)
 birefnet.to(device)
 birefnet.eval(); birefnet.half()
 @spaces.GPU
 def predict(images, resolution, weights_file):
-    assert (images is not None), 'AssertionError: images cannot be None.'
     global birefnet
-    # Load BiRefNet with chosen weights
     _weights_file = '/'.join(('zhengpeng7', usage_to_weights_file[weights_file] if weights_file is not None else usage_to_weights_file['General']))
     print('Using weights: {}.'.format(_weights_file))
     birefnet = AutoModelForImageSegmentation.from_pretrained(_weights_file, trust_remote_code=True)
@@ -103,28 +95,30 @@ def predict(images, resolution, weights_file):
     birefnet.eval(); birefnet.half()
     try:
-        resolution = [int(int(reso)//32*32) for reso in resolution.strip().split('x')]
     except:
         if weights_file == 'General-HR':
-            resolution = (2048, 2048)
         elif weights_file == 'General-Lite-2K':
-            resolution = (2560, 1440)
         else:
-            resolution = (1024, 1024)
-        print('Invalid resolution input. Automatically changed to 1024x1024 / 2048x2048 / 2560x1440.')
     if isinstance(images, list):
-        # For tab_batch
-        save_paths = []
-        save_dir = 'preds-BiRefNet'
-        if not os.path.exists(save_dir):
-            os.makedirs(save_dir)
         tab_is_batch = True
     else:
         images = [images]
         tab_is_batch = False
-    for idx_image, image_src in enumerate(images):
         if isinstance(image_src, str):
             if os.path.isfile(image_src):
                 image_ori = Image.open(image_src)
@@ -133,31 +127,30 @@ def predict(images, resolution, weights_file):
                 image_data = BytesIO(response.content)
                 image_ori = Image.open(image_data)
         else:
-            image_ori = Image.fromarray(image_src)
         image = image_ori.convert('RGB')
-        # Preprocess the image
-        image_preprocessor = ImagePreprocessor(resolution=tuple(resolution))
-        image_proc = image_preprocessor.proc(image)
-        image_proc = image_proc.unsqueeze(0)
-        # Prediction
         with torch.no_grad():
             preds = birefnet(image_proc.to(device).half())[-1].sigmoid().cpu()
         pred = preds[0].squeeze()
-        # Show Results
         pred_pil = transforms.ToPILImage()(pred)
         image_masked = refine_foreground(image, pred_pil)
         image_masked.putalpha(pred_pil.resize(image.size))
         torch.cuda.empty_cache()
         if tab_is_batch:
-            save_file_path = os.path.join(save_dir, "{}.png".format(os.path.splitext(os.path.basename(image_src))[0]))
-            image_masked.save(save_file_path)
-            save_paths.append(save_file_path)
     if tab_is_batch:
         zip_file_path = os.path.join(save_dir, "{}.zip".format(save_dir))
         with zipfile.ZipFile(zip_file_path, 'w') as zipf:
@@ -165,70 +158,137 @@ def predict(images, resolution, weights_file):
                 zipf.write(file, os.path.basename(file))
         return save_paths, zip_file_path
     else:
-        return (image_masked, image_ori)
-examples = [[_] for _ in glob('examples/*')][:]
-# Add the option of resolution in a text box.
-for idx_example, example in enumerate(examples):
-    examples[idx_example].append('1024x1024')
-examples.append(examples[-1].copy())
-examples[-1][1] = '512x512'
-examples_url = [
-    ['https://hips.hearstapps.com/hmg-prod/images/gettyimages-1229892983-square.jpg'],
-]
-for idx_example_url, example_url in enumerate(examples_url):
-    examples_url[idx_example_url].append('1024x1024')
-descriptions = ('Upload a picture, our model will extract a highly accurate segmentation of the subject in it.\n)'
-                 ' The resolution used in our training was `1024x1024`, which is the suggested resolution to obtain good results! `2048x2048` is suggested for BiRefNet_HR.\n'
-                 ' Our codes can be found at https://github.com/ZhengPeng7/BiRefNet.\n'
-                 ' We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access.')
-tab_image = gr.Interface(
-    fn=predict,
-    inputs=[
-        gr.Image(label='Upload an image'),
-        gr.Textbox(lines=1, placeholder="Type the resolution (`WxH`) you want, e.g., `1024x1024`.", label="Resolution"),
-        gr.Radio(list(usage_to_weights_file.keys()), value='General', label="Weights", info="Choose the weights you want.")
-    ],
-    outputs=ImageSlider(label="BiRefNet's prediction", type="pil"),
-    examples=examples,
-    api_name="image",
-    description=descriptions,
-)
-tab_text = gr.Interface(
-    fn=predict,
-    inputs=[
-        gr.Textbox(label="Paste an image URL"),
-        gr.Textbox(lines=1, placeholder="Type the resolution (`WxH`) you want, e.g., `1024x1024`.", label="Resolution"),
-        gr.Radio(list(usage_to_weights_file.keys()), value='General', label="Weights", info="Choose the weights you want.")
-    ],
-    outputs=ImageSlider(label="BiRefNet's prediction", type="pil"),
-    examples=examples_url,
-    api_name="text",
-    description=descriptions+'\nTab-URL is partially modified from https://huggingface.co/spaces/not-lain/background-removal, thanks to this great work!',
 )
-tab_batch = gr.Interface(
-    fn=predict,
-    inputs=[
-        gr.File(label="Upload multiple images", type="filepath", file_count="multiple"),
-        gr.Textbox(lines=1, placeholder="Type the resolution (`WxH`) you want, e.g., `1024x1024`.", label="Resolution"),
-        gr.Radio(list(usage_to_weights_file.keys()), value='General', label="Weights", info="Choose the weights you want.")
-    ],
-    outputs=[gr.Gallery(label="BiRefNet's predictions"), gr.File(label="Download masked images.")],
-    api_name="batch",
-    description=descriptions+'\nTab-batch is partially modified from https://huggingface.co/spaces/NegiTurkey/Multi_Birefnetfor_Background_Removal, thanks to this great work!',
-)
-demo = gr.TabbedInterface(
-    [tab_image, tab_text, tab_batch],
-    ['image', 'text', 'batch'],
-    title="BiRefNet demo for subject extraction (general / matting / salient / camouflaged / portrait).",
-)
 if __name__ == "__main__":
-    demo.launch(debug=True)

 import spaces
 from glob import glob
+from typing import Tuple, Optional
 from PIL import Image
 from gradio_imageslider import ImageSlider
 import requests
 from io import BytesIO
 import zipfile
+import random
 torch.set_float32_matmul_precision('high')
 torch.jit.script = lambda f: f
 device = "cuda" if torch.cuda.is_available() else "cpu"
+### 이미지 후처리 함수들 ###
 def refine_foreground(image, mask, r=90):
     if mask.size != image.size:
         mask = mask.resize(image.size)
+    image_np = np.array(image) / 255.0
+    mask_np = np.array(mask) / 255.0
+    estimated_foreground = FB_blur_fusion_foreground_estimator_2(image_np, mask_np, r=r)
     image_masked = Image.fromarray((estimated_foreground * 255.0).astype(np.uint8))
     return image_masked
 def FB_blur_fusion_foreground_estimator_2(image, alpha, r=90):
     alpha = alpha[:, :, None]
+    F, blur_B = FB_blur_fusion_foreground_estimator(image, image, image, alpha, r)
     return FB_blur_fusion_foreground_estimator(image, F, blur_B, alpha, r=6)[0]
 def FB_blur_fusion_foreground_estimator(image, F, B, alpha, r=90):
     if isinstance(image, Image.Image):
         image = np.array(image) / 255.0
     blurred_alpha = cv2.blur(alpha, (r, r))[:, :, None]
     blurred_FA = cv2.blur(F * alpha, (r, r))
     blurred_F = blurred_FA / (blurred_alpha + 1e-5)
     blurred_B1A = cv2.blur(B * (1 - alpha), (r, r))
     blurred_B = blurred_B1A / ((1 - blurred_alpha) + 1e-5)
+    F = blurred_F + alpha * (image - alpha * blurred_F - (1 - alpha) * blurred_B)
     F = np.clip(F, 0, 1)
     return F, blurred_B
 class ImagePreprocessor():
     def __init__(self, resolution: Tuple[int, int] = (1024, 1024)) -> None:
         self.transform_image = transforms.Compose([
             transforms.ToTensor(),
             transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
         ])
     def proc(self, image: Image.Image) -> torch.Tensor:
         image = self.transform_image(image)
         return image
 usage_to_weights_file = {
     'General': 'BiRefNet',
     'General-HR': 'BiRefNet_HR',
     'General-legacy': 'BiRefNet-legacy'
 }
+# 초기 모델 로딩 (기본: General)
+birefnet = AutoModelForImageSegmentation.from_pretrained(
+    '/'.join(('zhengpeng7', usage_to_weights_file['General'])),
+    trust_remote_code=True
+)
 birefnet.to(device)
 birefnet.eval(); birefnet.half()
 @spaces.GPU
 def predict(images, resolution, weights_file):
+    assert images is not None, 'Images cannot be None.'
     global birefnet
+    # 선택된 가중치로 모델 재로딩
     _weights_file = '/'.join(('zhengpeng7', usage_to_weights_file[weights_file] if weights_file is not None else usage_to_weights_file['General']))
     print('Using weights: {}.'.format(_weights_file))
     birefnet = AutoModelForImageSegmentation.from_pretrained(_weights_file, trust_remote_code=True)
     birefnet.eval(); birefnet.half()
     try:
+        resolution_list = [int(int(reso)//32*32) for reso in resolution.strip().split('x')]
     except:
         if weights_file == 'General-HR':
+            resolution_list = [2048, 2048]
         elif weights_file == 'General-Lite-2K':
+            resolution_list = [2560, 1440]
         else:
+            resolution_list = [1024, 1024]
+        print('Invalid resolution input. Automatically changed to default.')
+    # 이미지가 단일 객체인지, 리스트(배치)인지 확인
     if isinstance(images, list):
         tab_is_batch = True
     else:
         images = [images]
         tab_is_batch = False
+    save_paths = []
+    save_dir = 'preds-BiRefNet'
+    if tab_is_batch and not os.path.exists(save_dir):
+        os.makedirs(save_dir)
+    outputs = []
+    for idx, image_src in enumerate(images):
         if isinstance(image_src, str):
             if os.path.isfile(image_src):
                 image_ori = Image.open(image_src)
                 image_data = BytesIO(response.content)
                 image_ori = Image.open(image_data)
         else:
+            if isinstance(image_src, np.ndarray):
+                image_ori = Image.fromarray(image_src)
+            else:
+                image_ori = image_src.convert('RGB')
         image = image_ori.convert('RGB')
+        preprocessor = ImagePreprocessor(resolution=tuple(resolution_list))
+        image_proc = preprocessor.proc(image).unsqueeze(0)
         with torch.no_grad():
             preds = birefnet(image_proc.to(device).half())[-1].sigmoid().cpu()
         pred = preds[0].squeeze()
         pred_pil = transforms.ToPILImage()(pred)
         image_masked = refine_foreground(image, pred_pil)
         image_masked.putalpha(pred_pil.resize(image.size))
         torch.cuda.empty_cache()
         if tab_is_batch:
+            file_path = os.path.join(save_dir, "{}.png".format(
+                os.path.splitext(os.path.basename(image_src))[0] if isinstance(image_src, str) else f"img_{idx}"
+            ))
+            image_masked.save(file_path)
+            save_paths.append(file_path)
+            outputs.append(image_masked)
+        else:
+            outputs = [image_masked, image_ori]
     if tab_is_batch:
         zip_file_path = os.path.join(save_dir, "{}.zip".format(save_dir))
         with zipfile.ZipFile(zip_file_path, 'w') as zipf:
                 zipf.write(file, os.path.basename(file))
         return save_paths, zip_file_path
     else:
+        # 반환값을 리스트 형태로 만들어 ImageSlider에서 표시되도록 함.
+        return outputs
+# 예제 데이터 (이미지, URL, 배치)
+examples_image = [[path, "1024x1024", "General"] for path in glob('examples/*')]
+examples_text = [[url, "1024x1024", "General"] for url in ["https://hips.hearstapps.com/hmg-prod/images/gettyimages-1229892983-square.jpg"]]
+examples_batch = [[file, "1024x1024", "General"] for file in glob('examples/*')]
+descriptions = (
+    "Upload a picture, our model will extract a highly accurate segmentation of the subject in it.\n"
+    "The resolution used in our training was `1024x1024`, which is suggested for good results! "
+    "`2048x2048` is suggested for BiRefNet_HR.\n"
+    "Our codes can be found at https://github.com/ZhengPeng7/BiRefNet.\n"
+    "We also maintain the HF model of BiRefNet at https://huggingface.co/ZhengPeng7/BiRefNet for easier access."
 )
+# 개선된 UI 스타일 (배경, 컨테이너, 좌측 사이드바, 버튼 애니메이션 등)
+css = """
+body {
+    background: linear-gradient(135deg, #667eea, #764ba2);
+    font-family: 'Helvetica Neue', Helvetica, Arial, sans-serif;
+    color: #333;
+    margin: 0;
+    padding: 0;
+}
+.gradio-container {
+    background: rgba(255, 255, 255, 0.95);
+    border-radius: 15px;
+    padding: 30px 40px;
+    box-shadow: 0 8px 30px rgba(0, 0, 0, 0.3);
+    margin: 40px auto;
+    max-width: 1200px;
+}
+.gradio-container h1 {
+    color: #333;
+    text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.2);
+}
+.fillable {
+    width: 95% !important;
+    max-width: unset !important;
+}
+#examples_container {
+    margin: auto;
+    width: 90%;
+}
+#examples_row {
+    justify-content: center;
+}
+.sidebar {
+    background: rgba(255, 255, 255, 0.98);
+    border-radius: 10px;
+    padding: 20px;
+    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
+}
+button, .btn {
+    background: linear-gradient(90deg, #ff8a00, #e52e71);
+    border: none;
+    color: #fff;
+    padding: 12px 24px;
+    text-transform: uppercase;
+    font-weight: bold;
+    letter-spacing: 1px;
+    border-radius: 5px;
+    cursor: pointer;
+    transition: transform 0.2s ease-in-out;
+}
+button:hover, .btn:hover {
+    transform: scale(1.05);
+}
+"""
+title = """
+<h1 align="center" style="margin-bottom: 0.2em;">BiRefNet Demo for Subject Extraction</h1>
+<p align="center" style="font-size:1.1em; color:#555;">
+    Upload an image or provide an image URL to extract the subject with high-precision segmentation.
+</p>
+"""
+with gr.Blocks(css=css, title="BiRefNet Demo") as demo:
+    gr.Markdown(title)
+    with gr.Tabs():
+        with gr.Tab("Image"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    image_input = gr.Image(type='pil', label='Upload an Image')
+                    resolution_input = gr.Textbox(lines=1, placeholder="e.g., 1024x1024", label="Resolution")
+                    weights_radio = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
+                    predict_btn = gr.Button("Predict")
+                with gr.Column(scale=2):
+                    output_slider = ImageSlider(label="BiRefNet's Prediction", type="pil")
+            gr.Examples(examples=examples_image, inputs=[image_input, resolution_input, weights_radio], label="Examples")
+        with gr.Tab("Text"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    image_url = gr.Textbox(label="Paste an Image URL")
+                    resolution_input_text = gr.Textbox(lines=1, placeholder="e.g., 1024x1024", label="Resolution")
+                    weights_radio_text = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
+                    predict_btn_text = gr.Button("Predict")
+                with gr.Column(scale=2):
+                    output_slider_text = ImageSlider(label="BiRefNet's Prediction", type="pil")
+            gr.Examples(examples=examples_text, inputs=[image_url, resolution_input_text, weights_radio_text], label="Examples")
+        with gr.Tab("Batch"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    file_input = gr.File(label="Upload Multiple Images", type="filepath", file_count="multiple")
+                    resolution_input_batch = gr.Textbox(lines=1, placeholder="e.g., 1024x1024", label="Resolution")
+                    weights_radio_batch = gr.Radio(list(usage_to_weights_file.keys()), value="General", label="Weights")
+                    predict_btn_batch = gr.Button("Predict")
+                with gr.Column(scale=2):
+                    output_gallery = gr.Gallery(label="BiRefNet's Predictions").style(grid=[3], height="auto")
+                    zip_output = gr.File(label="Download Masked Images")
+            gr.Examples(examples=examples_batch, inputs=[file_input, resolution_input_batch, weights_radio_batch], label="Examples")
+    with gr.Row():
+        gr.Markdown("<p align='center'>Model by <a href='https://huggingface.co/ZhengPeng7/BiRefNet'>ZhengPeng7/BiRefNet</a></p>")
+    # 각 탭의 Predict 버튼과 predict 함수 연결
+    predict_btn.click(
+        fn=predict,
+        inputs=[image_input, resolution_input, weights_radio],
+        outputs=output_slider
+    )
+    predict_btn_text.click(
+        fn=predict,
+        inputs=[image_url, resolution_input_text, weights_radio_text],
+        outputs=output_slider_text
+    )
+    predict_btn_batch.click(
+        fn=predict,
+        inputs=[file_input, resolution_input_batch, weights_radio_batch],
+        outputs=[output_gallery, zip_output]
+    )
 if __name__ == "__main__":
+    demo.launch(share=False, debug=True)