Spaces:

Ryukijano
/

Image-processor

Runtime error

App Files Files Community

Ryukijano commited on Oct 15, 2024

Commit

93dc5ee

verified ·

1 Parent(s): bb5cb85

create app.py

Browse files

Files changed (1) hide show

app.py +61 -0

app.py ADDED Viewed

	@@ -0,0 +1,61 @@

+# app.py for Hugging Face Space: Connecting Meta Llama 3.2 Vision, Segment Anything 2, and Diffusion Model
+import gradio as gr
+import spaces  # Import the spaces module to use GPU-specific decorators
+from transformers import pipeline
+from diffusers import StableDiffusionPipeline
+import torch
+# Set up Meta Llama 3.2 Vision model
+llama_vision_model_id = "meta-llama/Llama-3.2-1B-Vision"
+llama_pipe = pipeline(
+    "image-captioning",  # Supports image captioning and image Q&A
+    model=llama_vision_model_id,
+    torch_dtype=torch.bfloat16,
+    device=0,  # Force usage of GPU
+)
+# Set up Meta Segment Anything 2 model
+segment_model_id = "meta/segment-anything-2"
+segment_pipe = pipeline(
+    "image-segmentation",
+    model=segment_model_id,
+    device=0,  # Force usage of GPU
+)
+# Set up Stable Diffusion Lite model
+stable_diffusion_model_id = "CompVis/stable-diffusion-lite"
+diffusion_pipe = StableDiffusionPipeline.from_pretrained(
+    stable_diffusion_model_id, torch_dtype=torch.float16
+)
+diffusion_pipe = diffusion_pipe.to("cuda")  # Force usage of GPU
+# Use the GPU decorator for the function that needs GPU access
+@spaces.GPU(duration=120)  # Allocates GPU for a maximum of 120 seconds
+def process_image(image):
+    # Step 1: Use Llama 3.2 Vision for initial image understanding (captioning)
+    caption_result = llama_pipe(image=image)
+    caption = caption_result[0]['generated_text']
+    # Step 2: Segment important parts of the image
+    segmented_result = segment_pipe(image=image)
+    segments = segmented_result["segments"]
+    # Step 3: Modify segmented image using Diffusion model
+    # Here, we modify based on the caption result and segmented area
+    output_image = diffusion_pipe(prompt=f"Modify the {caption}", image=image).images[0]
+    return output_image
+# Create Gradio interface
+interface = gr.Interface(
+    fn=process_image,
+    inputs=gr.Image(type="pil"),
+    outputs="image",
+    live=True,  # Allow for dynamic updates if necessary
+    allow_flagging="never",  # Disallow flagging to keep interactions light
+    title="Image Processor: Vision, Segmentation, and Modification",
+    description="Upload an image to generate a caption, segment important parts, and modify the image using Stable Diffusion."
+)
+# Launch the app
+interface.launch()