Ryukijano commited on
Commit
a2c060f
·
verified ·
1 Parent(s): 93dc5ee

Update app.py

Browse files

added token authentication

Files changed (1) hide show
  1. app.py +14 -8
app.py CHANGED
@@ -4,36 +4,42 @@ import spaces # Import the spaces module to use GPU-specific decorators
4
  from transformers import pipeline
5
  from diffusers import StableDiffusionPipeline
6
  import torch
 
7
 
8
- # Set up Meta Llama 3.2 Vision model
 
 
 
9
  llama_vision_model_id = "meta-llama/Llama-3.2-1B-Vision"
10
- llama_pipe = pipeline(
11
  "image-captioning", # Supports image captioning and image Q&A
12
  model=llama_vision_model_id,
13
  torch_dtype=torch.bfloat16,
14
  device=0, # Force usage of GPU
 
15
  )
16
 
17
- # Set up Meta Segment Anything 2 model
18
  segment_model_id = "meta/segment-anything-2"
19
  segment_pipe = pipeline(
20
  "image-segmentation",
21
  model=segment_model_id,
22
  device=0, # Force usage of GPU
 
23
  )
24
 
25
  # Set up Stable Diffusion Lite model
26
- stable_diffusion_model_id = "CompVis/stable-diffusion-lite"
27
  diffusion_pipe = StableDiffusionPipeline.from_pretrained(
28
- stable_diffusion_model_id, torch_dtype=torch.float16
29
  )
30
  diffusion_pipe = diffusion_pipe.to("cuda") # Force usage of GPU
31
 
32
  # Use the GPU decorator for the function that needs GPU access
33
  @spaces.GPU(duration=120) # Allocates GPU for a maximum of 120 seconds
34
  def process_image(image):
35
- # Step 1: Use Llama 3.2 Vision for initial image understanding (captioning)
36
- caption_result = llama_pipe(image=image)
37
  caption = caption_result[0]['generated_text']
38
 
39
  # Step 2: Segment important parts of the image
@@ -58,4 +64,4 @@ interface = gr.Interface(
58
  )
59
 
60
  # Launch the app
61
- interface.launch()
 
4
  from transformers import pipeline
5
  from diffusers import StableDiffusionPipeline
6
  import torch
7
+ import os
8
 
9
+ # Set up Hugging Face token for private model access
10
+ hf_token = os.getenv("HF_TOKEN")
11
+
12
+ # Set up Meta Llama 3.2 Vision model (using private model with token)
13
  llama_vision_model_id = "meta-llama/Llama-3.2-1B-Vision"
14
+ vision_pipe = pipeline(
15
  "image-captioning", # Supports image captioning and image Q&A
16
  model=llama_vision_model_id,
17
  torch_dtype=torch.bfloat16,
18
  device=0, # Force usage of GPU
19
+ use_auth_token=hf_token, # Use Hugging Face token for authentication
20
  )
21
 
22
+ # Set up Meta Segment Anything 2 model (using private model with token)
23
  segment_model_id = "meta/segment-anything-2"
24
  segment_pipe = pipeline(
25
  "image-segmentation",
26
  model=segment_model_id,
27
  device=0, # Force usage of GPU
28
+ use_auth_token=hf_token, # Use Hugging Face token for authentication
29
  )
30
 
31
  # Set up Stable Diffusion Lite model
32
+ stable_diffusion_model_id = "runwayml/stable-diffusion-v1-5"
33
  diffusion_pipe = StableDiffusionPipeline.from_pretrained(
34
+ stable_diffusion_model_id, torch_dtype=torch.float16, use_auth_token=hf_token
35
  )
36
  diffusion_pipe = diffusion_pipe.to("cuda") # Force usage of GPU
37
 
38
  # Use the GPU decorator for the function that needs GPU access
39
  @spaces.GPU(duration=120) # Allocates GPU for a maximum of 120 seconds
40
  def process_image(image):
41
+ # Step 1: Use Vision model for initial image understanding (captioning)
42
+ caption_result = vision_pipe(image=image)
43
  caption = caption_result[0]['generated_text']
44
 
45
  # Step 2: Segment important parts of the image
 
64
  )
65
 
66
  # Launch the app
67
+ interface.launch()