Spaces:

nickkun
/

Vision_Transformer-Segmentation

Running

App Files Files Community

nickkun commited on Mar 29

Commit

5f66b26

verified ·

1 Parent(s): 8cfd312

Update app.py

Browse files

Files changed (1) hide show

app.py +142 -119

app.py CHANGED Viewed

@@ -4,130 +4,153 @@
 @author: Nikhil Kunjoor
 """
 import gradio as gr
 import numpy as np
-from PIL import Image, ImageFilter
-import torch
-from torchvision import transforms
-from transformers import AutoModelForImageSegmentation, AutoImageProcessor, AutoModelForDepthEstimation
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-torch.set_float32_matmul_precision('high')
-rmbg_model = AutoModelForImageSegmentation.from_pretrained("briaai/RMBG-2.0", trust_remote_code=True).to(device).eval()
-depth_processor = AutoImageProcessor.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf")
-depth_model = AutoModelForDepthEstimation.from_pretrained("depth-anything/Depth-Anything-V2-Small-hf").to(device)
-def run_rmbg(image, threshold=0.5):
-    image_size = (1024, 1024)
-    transform_image = transforms.Compose([
-        transforms.Resize(image_size),
-        transforms.ToTensor(),
-        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
-    ])
-    input_images = transform_image(image).unsqueeze(0).to(device)
-    with torch.no_grad():
-        preds = rmbg_model(input_images)
-        mask_logits = preds[-1]
-    mask_prob = mask_logits.sigmoid().cpu()[0].squeeze()
-    pred_pil = transforms.ToPILImage()(mask_prob)
-    mask_pil = pred_pil.resize(image.size, resample=Image.BILINEAR)
-    mask_np = np.array(mask_pil, dtype=np.uint8) / 255.0
-    binary_mask = (mask_np > threshold).astype(np.uint8)
-    return binary_mask
-def run_depth_estimation(image, target_size=(512, 512)):
-    image_resized = image.resize(target_size, resample=Image.BILINEAR)
-    inputs = depth_processor(images=image_resized, return_tensors="pt").to(device)
-    with torch.no_grad():
-        outputs = depth_model(**inputs)
-        predicted_depth = outputs.predicted_depth
-    prediction = torch.nn.functional.interpolate(
-        predicted_depth.unsqueeze(1),
-        size=image.size[::-1],
-        mode="bicubic",
-        align_corners=False,
-    )
-    depth_map = prediction.squeeze().cpu().numpy()
-    depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())
-    return 1 - depth_map
-def apply_gaussian_blur(image, mask, sigma):
-    blurred = image.filter(ImageFilter.GaussianBlur(radius=sigma))
-    return Image.composite(image, blurred, Image.fromarray((mask * 255).astype(np.uint8)))
-def apply_lens_blur(image, depth_map, max_radius, foreground_percentile):
-    foreground_threshold = np.percentile(depth_map.flatten(), foreground_percentile)
-    output = np.array(image)
-    for radius in np.linspace(0, max_radius, 10):
-        mask = (depth_map > foreground_threshold + radius / max_radius * (depth_map.max() - foreground_threshold))
-        blurred = image.filter(ImageFilter.GaussianBlur(radius=radius))
-        output[mask] = np.array(blurred)[mask]
-    return Image.fromarray(output)
-def process_image(image, blur_type, sigma, max_radius, foreground_percentile, mask_threshold):
-    if image is None:
-        return None, "Please upload an image."
-    try:
-        image = Image.fromarray(image).convert("RGB")
-    except Exception as e:
-        return None, f"Error processing image: {str(e)}"
-    max_size = (1024, 1024)
-    if image.size[0] > max_size[0] or image.size[1] > max_size[1]:
-        image.thumbnail(max_size, Image.Resampling.LANCZOS)
-    try:
-        if blur_type == "Gaussian Blur":
-            mask = run_rmbg(image, threshold=mask_threshold)
-            output_image = apply_gaussian_blur(image, mask, sigma)
-        else:  # Lens Blur
-            depth_map = run_depth_estimation(image)
-            output_image = apply_lens_blur(image, depth_map, max_radius, foreground_percentile)
-    except Exception as e:
-        return None, f"Error applying blur: {str(e)}"
-    # Generate debug info
-    debug_info = f"Blur Type: {blur_type}\n"
     if blur_type == "Gaussian Blur":
-        debug_info += f"Sigma: {sigma}\nMask Threshold: {mask_threshold}"
     else:
-        debug_info += f"Max Radius: {max_radius}\nForeground Percentile: {foreground_percentile}"
-    return output_image, debug_info
 with gr.Blocks() as demo:
-    gr.Markdown("# Image Blur Effects with Gaussian and Lens Blur")
     with gr.Row():
-        image_input = gr.Image(label="Upload Image", type="numpy")
         with gr.Column():
-            blur_type = gr.Radio(choices=["Gaussian Blur", "Lens Blur"], label="Blur Type", value="Gaussian Blur")
-            sigma = gr.Slider(minimum=0.1, maximum=50, step=0.1, value=15, label="Gaussian Blur Sigma")
-            max_radius = gr.Slider(minimum=1, maximum=100, step=1, value=15, label="Max Lens Blur Radius")
-            foreground_percentile = gr.Slider(minimum=1, maximum=99, step=1, value=30, label="Foreground Percentile")
-            mask_threshold = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, value=0.5, label="Mask Threshold")
-    process_button = gr.Button("Apply Blur")
-    with gr.Row():
-        output_image = gr.Image(label="Output Image")
-        debug_info = gr.Textbox(label="Debug Info", lines=4)
-    def update_visibility(blur_type):
-        if blur_type == "Gaussian Blur":
-            return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=True)
-        else:  # Lens Blur
-            return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True), gr.update(visible=False)
-    blur_type.change(
-        fn=update_visibility,
-        inputs=blur_type,
-        outputs=[sigma, max_radius, foreground_percentile, mask_threshold]
-    )
-    process_button.click(
-        fn=process_image,
-        inputs=[image_input, blur_type, sigma, max_radius, foreground_percentile, mask_threshold],
-        outputs=[output_image, debug_info]
-    )
 demo.launch()

 @author: Nikhil Kunjoor
 """
 import gradio as gr
+from transformers import pipeline
+from PIL import Image, ImageFilter, ImageOps
 import numpy as np
+import requests
+import cv2
+# Load models once
+print("Loading segmentation model...")
+segmentation_model = pipeline("image-segmentation", model="nvidia/segformer-b1-finetuned-cityscapes-1024-1024")
+print("Loading depth estimation model...")
+depth_estimator = pipeline("depth-estimation", model="Intel/zoedepth-nyu-kitti")
+def lens_blur(image, radius):
+    """
+    Apply a more realistic lens blur (bokeh effect) using OpenCV.
+    """
+    if radius < 1:
+        return image
+    # Convert PIL image to OpenCV format
+    img_np = np.array(image)
+    # Create a circular kernel for the bokeh effect
+    kernel_size = 2 * radius + 1
+    kernel = np.zeros((kernel_size, kernel_size), dtype=np.float32)
+    center = radius
+    for i in range(kernel_size):
+        for j in range(kernel_size):
+            # Create circular kernel
+            if np.sqrt((i - center) ** 2 + (j - center) ** 2) <= radius:
+                kernel[i, j] = 1.0
+    # Normalize the kernel
+    if kernel.sum() != 0:
+        kernel = kernel / kernel.sum()
+    # Apply the filter to each channel separately
+    channels = cv2.split(img_np)
+    blurred_channels = []
+    for channel in channels:
+        blurred_channel = cv2.filter2D(channel, -1, kernel)
+        blurred_channels.append(blurred_channel)
+    # Merge the channels back
+    blurred_img = cv2.merge(blurred_channels)
+    # Convert back to PIL image
+    return Image.fromarray(blurred_img)
+def process_image(input_image, method, blur_intensity, blur_type):
+    """
+    Process the input image using one of two methods:
+    1. Segmented Background Blur:
+       - Uses segmentation to extract a foreground mask.
+       - Applies the selected blur (Gaussian or Lens) to the background.
+       - Composites the final image.
+    2. Depth-based Variable Blur:
+       - Uses depth estimation to generate a depth map.
+       - Normalizes the depth map to be used as a blending mask.
+       - Blends a fully blurred version (using the selected blur) with the original image.
+    Returns:
+       - output_image: final composited image.
+       - mask_image: the mask used (binary for segmentation, normalized depth for depth-based).
+    """
+    # Ensure image is in RGB mode
+    input_image = input_image.convert("RGB")
+    # Select blur function based on blur_type
     if blur_type == "Gaussian Blur":
+        blur_fn = lambda img, rad: img.filter(ImageFilter.GaussianBlur(radius=rad))
+    elif blur_type == "Lens Blur":
+        blur_fn = lens_blur
     else:
+        blur_fn = lambda img, rad: img.filter(ImageFilter.GaussianBlur(radius=rad))
+    if method == "Segmented Background Blur":
+        # Use segmentation to obtain a foreground mask.
+        results = segmentation_model(input_image)
+        # Assume the last result is the main foreground object.
+        foreground_mask = results[-1]["mask"]
+        # Ensure the mask is grayscale.
+        foreground_mask = foreground_mask.convert("L")
+        # Threshold to create a binary mask.
+        binary_mask = foreground_mask.point(lambda p: 255 if p > 128 else 0)
+        # Blur the background using the selected blur function.
+        blurred_background = blur_fn(input_image, blur_intensity)
+        # Composite the final image: keep foreground and use blurred background elsewhere.
+        output_image = Image.composite(input_image, blurred_background, binary_mask)
+        mask_image = binary_mask
+    elif method == "Depth-based Variable Blur":
+        # Generate depth map.
+        depth_results = depth_estimator(input_image)
+        depth_map = depth_results["depth"]
+        # Convert depth map to numpy array and normalize to [0, 255]
+        depth_array = np.array(depth_map).astype(np.float32)
+        norm = (depth_array - depth_array.min()) / (depth_array.max() - depth_array.min() + 1e-8)
+        normalized_depth = (norm * 255).astype(np.uint8)
+        mask_image = Image.fromarray(normalized_depth)
+        # Create fully blurred version using the selected blur function.
+        blurred_image = blur_fn(input_image, blur_intensity)
+        # Convert images to arrays for blending.
+        orig_np = np.array(input_image).astype(np.float32)
+        blur_np = np.array(blurred_image).astype(np.float32)
+        # Reshape mask for broadcasting.
+        alpha = normalized_depth[..., np.newaxis] / 255.0
+        # Blend pixels: 0 = original; 1 = fully blurred.
+        blended_np = (1 - alpha) * orig_np + alpha * blur_np
+        blended_np = np.clip(blended_np, 0, 255).astype(np.uint8)
+        output_image = Image.fromarray(blended_np)
+    else:
+        output_image = input_image
+        mask_image = input_image.convert("L")
+    return output_image, mask_image
+# Build a Gradio interface
 with gr.Blocks() as demo:
+    gr.Markdown("## Image Processing App: Segmentation & Depth-based Blur")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(label="Input Image", type="pil")
+            method = gr.Radio(label="Processing Method",
+                              choices=["Segmented Background Blur", "Depth-based Variable Blur"],
+                              value="Segmented Background Blur")
+            blur_intensity = gr.Slider(label="Blur Intensity (Maximum Blur Radius)", minimum=1, maximum=30, step=1, value=15)
+            blur_type = gr.Dropdown(label="Blur Type", choices=["Gaussian Blur", "Lens Blur"], value="Gaussian Blur")
+            run_button = gr.Button("Process Image")
+        with gr.Column():
+            output_image = gr.Image(label="Output Image")
+            mask_output = gr.Image(label="Mask")
+    run_button.click(fn=process_image,
+                     inputs=[input_image, method, blur_intensity, blur_type],
+                     outputs=[output_image, mask_output])
+# Launch the app
 demo.launch()