Spaces:

Ryukijano
/

Image-processor

Runtime error

App Files Files Community

Ryukijano commited on Oct 15, 2024

Commit

a2c060f

verified ·

1 Parent(s): 93dc5ee

Update app.py

Browse files

added token authentication

Files changed (1) hide show

app.py +14 -8

app.py CHANGED Viewed

@@ -4,36 +4,42 @@ import spaces  # Import the spaces module to use GPU-specific decorators
 from transformers import pipeline
 from diffusers import StableDiffusionPipeline
 import torch
-# Set up Meta Llama 3.2 Vision model
 llama_vision_model_id = "meta-llama/Llama-3.2-1B-Vision"
-llama_pipe = pipeline(
     "image-captioning",  # Supports image captioning and image Q&A
     model=llama_vision_model_id,
     torch_dtype=torch.bfloat16,
     device=0,  # Force usage of GPU
 )
-# Set up Meta Segment Anything 2 model
 segment_model_id = "meta/segment-anything-2"
 segment_pipe = pipeline(
     "image-segmentation",
     model=segment_model_id,
     device=0,  # Force usage of GPU
 )
 # Set up Stable Diffusion Lite model
-stable_diffusion_model_id = "CompVis/stable-diffusion-lite"
 diffusion_pipe = StableDiffusionPipeline.from_pretrained(
-    stable_diffusion_model_id, torch_dtype=torch.float16
 )
 diffusion_pipe = diffusion_pipe.to("cuda")  # Force usage of GPU
 # Use the GPU decorator for the function that needs GPU access
 @spaces.GPU(duration=120)  # Allocates GPU for a maximum of 120 seconds
 def process_image(image):
-    # Step 1: Use Llama 3.2 Vision for initial image understanding (captioning)
-    caption_result = llama_pipe(image=image)
     caption = caption_result[0]['generated_text']
     # Step 2: Segment important parts of the image
@@ -58,4 +64,4 @@ interface = gr.Interface(
 )
 # Launch the app
-interface.launch()

 from transformers import pipeline
 from diffusers import StableDiffusionPipeline
 import torch
+import os
+# Set up Hugging Face token for private model access
+hf_token = os.getenv("HF_TOKEN")
+# Set up Meta Llama 3.2 Vision model (using private model with token)
 llama_vision_model_id = "meta-llama/Llama-3.2-1B-Vision"
+vision_pipe = pipeline(
     "image-captioning",  # Supports image captioning and image Q&A
     model=llama_vision_model_id,
     torch_dtype=torch.bfloat16,
     device=0,  # Force usage of GPU
+    use_auth_token=hf_token,  # Use Hugging Face token for authentication
 )
+# Set up Meta Segment Anything 2 model (using private model with token)
 segment_model_id = "meta/segment-anything-2"
 segment_pipe = pipeline(
     "image-segmentation",
     model=segment_model_id,
     device=0,  # Force usage of GPU
+    use_auth_token=hf_token,  # Use Hugging Face token for authentication
 )
 # Set up Stable Diffusion Lite model
+stable_diffusion_model_id = "runwayml/stable-diffusion-v1-5"
 diffusion_pipe = StableDiffusionPipeline.from_pretrained(
+    stable_diffusion_model_id, torch_dtype=torch.float16, use_auth_token=hf_token
 )
 diffusion_pipe = diffusion_pipe.to("cuda")  # Force usage of GPU
 # Use the GPU decorator for the function that needs GPU access
 @spaces.GPU(duration=120)  # Allocates GPU for a maximum of 120 seconds
 def process_image(image):
+    # Step 1: Use Vision model for initial image understanding (captioning)
+    caption_result = vision_pipe(image=image)
     caption = caption_result[0]['generated_text']
     # Step 2: Segment important parts of the image
 )
 # Launch the app
+interface.launch()