Spaces:

locuslab
/

safe-playground

Sleeping

Pratyush Maini commited on Sep 16

Commit

8133671

1 Parent(s): a87043f

Pass HF token to InferenceClient and show clear token status in UI

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,19 +1,17 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 # Define available models (update with your actual model IDs)
 model_list = {
-    "SafeLM 1.7B": "locuslab/safelm-1.7b-instruct",
     "SmolLM2 1.7B Instruct": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
     "LLaMA 3.2 1B Instruct": "meta-llama/Llama-3.2-1B-Instruct",
 }
-# Dictionary to track which models support chat completion vs. text generation
-model_tasks = {
-    "HuggingFaceH4/zephyr-7b-beta": "chat-completion",  # This model supports chat completion
-    # Add other models that support chat completion
-}
-# Default to text-generation for models not specified above
 def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
@@ -22,7 +20,7 @@ def respond(message, history, system_message, max_tokens, temperature, top_p, se
         model_id = model_list.get(selected_model, "HuggingFaceH4/zephyr-7b-beta")
         # Create an InferenceClient for the selected model
-        client = InferenceClient(model_id)
         # Always use text generation for locuslab models
         if "locuslab" in model_id:
@@ -184,9 +182,13 @@ with gr.Blocks(css=css) as demo:
         </h1>
     </div>
     """)
-    # Status message for API errors
-    status_message = gr.Markdown("", elem_id="status-message")
     with gr.Row():
         # Left sidebar: Model selector

+import os
 import gradio as gr
 from huggingface_hub import InferenceClient
 # Define available models (update with your actual model IDs)
 model_list = {
+    "SafeLM 1.7B": "locuslab/safelm-1.7b",
     "SmolLM2 1.7B Instruct": "HuggingFaceTB/SmolLM2-1.7B-Instruct",
     "LLaMA 3.2 1B Instruct": "meta-llama/Llama-3.2-1B-Instruct",
 }
+HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") or os.getenv("HF_TOKEN")
 def respond(message, history, system_message, max_tokens, temperature, top_p, selected_model):
         model_id = model_list.get(selected_model, "HuggingFaceH4/zephyr-7b-beta")
         # Create an InferenceClient for the selected model
+        client = InferenceClient(model_id, token=HF_TOKEN)
         # Always use text generation for locuslab models
         if "locuslab" in model_id:
         </h1>
     </div>
     """)
+    # Status message for API errors and token info
+    token_message = (
+        "⚠️ No Hugging Face token detected. Set HUGGINGFACEHUB_API_TOKEN (or HF_TOKEN) to call models."
+        if not HF_TOKEN
+        else "✅ Using configured Hugging Face token."
+    )
+    status_message = gr.Markdown(token_message, elem_id="status-message")
     with gr.Row():
         # Left sidebar: Model selector

gradio.log ADDED Viewed

+Traceback (most recent call last):
+  File "/Users/pratyushmaini/Desktop/code/personal/websites/safe-playground/app.py", line 338, in <module>
+    demo.launch()
+  File "/Users/pratyushmaini/Desktop/code/personal/websites/safe-playground/.venv/lib/python3.12/site-packages/gradio/blocks.py", line 2371, in launch
+    ) = http_server.start_server(
+        ^^^^^^^^^^^^^^^^^^^^^^^^^
+  File "/Users/pratyushmaini/Desktop/code/personal/websites/safe-playground/.venv/lib/python3.12/site-packages/gradio/http_server.py", line 154, in start_server
+    raise OSError(
+OSError: Cannot find empty port in range: 7860-7959. You can specify a different port by setting the GRADIO_SERVER_PORT environment variable or passing the `server_port` parameter to `launch()`.