Pratyush Maini commited on
Commit
8133671
·
1 Parent(s): a87043f

Pass HF token to InferenceClient and show clear token status in UI

Browse files
Files changed (2) hide show
  1. app.py +13 -11
  2. gradio.log +9 -0
app.py CHANGED
@@ -1,19 +1,17 @@
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
 
4
  # Define available models (update with your actual model IDs)
5
  model_list = {
6
- "SafeLM 1.7B": "locuslab/safelm-1.7b-instruct",
7
  "SmolLM2 1.7B Instruct": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
8
  "LLaMA 3.2 1B Instruct": "meta-llama/Llama-3.2-1B-Instruct",
9
  }
10
 
11
- # Dictionary to track which models support chat completion vs. text generation
12
- model_tasks = {
13
- "HuggingFaceH4/zephyr-7b-beta": "chat-completion", # This model supports chat completion
14
- # Add other models that support chat completion
15
- }
16
- # Default to text-generation for models not specified above
17
 
18
 
19
  def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
@@ -22,7 +20,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p, se
22
  model_id = model_list.get(selected_model, "HuggingFaceH4/zephyr-7b-beta")
23
 
24
  # Create an InferenceClient for the selected model
25
- client = InferenceClient(model_id)
26
 
27
  # Always use text generation for locuslab models
28
  if "locuslab" in model_id:
@@ -184,9 +182,13 @@ with gr.Blocks(css=css) as demo:
184
  </h1>
185
  </div>
186
  """)
187
-
188
- # Status message for API errors
189
- status_message = gr.Markdown("", elem_id="status-message")
 
 
 
 
190
 
191
  with gr.Row():
192
  # Left sidebar: Model selector
 
1
+ import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
 
5
  # Define available models (update with your actual model IDs)
6
  model_list = {
7
+ "SafeLM 1.7B": "locuslab/safelm-1.7b",
8
  "SmolLM2 1.7B Instruct": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
9
  "LLaMA 3.2 1B Instruct": "meta-llama/Llama-3.2-1B-Instruct",
10
  }
11
 
12
+
13
+
14
+ HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
 
 
 
15
 
16
 
17
  def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
 
20
  model_id = model_list.get(selected_model, "HuggingFaceH4/zephyr-7b-beta")
21
 
22
  # Create an InferenceClient for the selected model
23
+ client = InferenceClient(model_id, token=HF_TOKEN)
24
 
25
  # Always use text generation for locuslab models
26
  if "locuslab" in model_id:
 
182
  </h1>
183
  </div>
184
  """)
185
+ # Status message for API errors and token info
186
+ token_message = (
187
+ "⚠️ No Hugging Face token detected. Set HUGGINGFACEHUB_API_TOKEN (or HF_TOKEN) to call models."
188
+ if not HF_TOKEN
189
+ else "✅ Using configured Hugging Face token."
190
+ )
191
+ status_message = gr.Markdown(token_message, elem_id="status-message")
192
 
193
  with gr.Row():
194
  # Left sidebar: Model selector
gradio.log ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ Traceback (most recent call last):
2
+ File "/Users/pratyushmaini/Desktop/code/personal/websites/safe-playground/app.py", line 338, in <module>
3
+ demo.launch()
4
+ File "/Users/pratyushmaini/Desktop/code/personal/websites/safe-playground/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 2371, in launch
5
+ ) = http_server.start_server(
6
+ ^^^^^^^^^^^^^^^^^^^^^^^^^
7
+ File "/Users/pratyushmaini/Desktop/code/personal/websites/safe-playground/.venv/lib/python3.12/site-packages/gradio/http_server.py", line 154, in start_server
8
+ raise OSError(
9
+ OSError: Cannot find empty port in range: 7860-7959. You can specify a different port by setting the GRADIO_SERVER_PORT environment variable or passing the `server_port` parameter to `launch()`.