Spaces:

akisg
/

care-notes

Sleeping

App Files Files Community

Akis Giannoukos commited on 29 days ago

Commit

17f0761

1 Parent(s): d3feaf4

Add UI controls for switching models

Browse files

Files changed (1) hide show

app.py +70 -2

app.py CHANGED Viewed

@@ -20,6 +20,7 @@ from transformers import (
 )
 from gtts import gTTS
 import spaces
 # ---------------------------
@@ -30,6 +31,22 @@ DEFAULT_ASR_MODEL_ID = os.getenv("ASR_MODEL_ID", "openai/whisper-tiny.en")
 CONFIDENCE_THRESHOLD_DEFAULT = float(os.getenv("CONFIDENCE_THRESHOLD", "0.8"))
 MAX_TURNS = int(os.getenv("MAX_TURNS", "12"))
 USE_TTS_DEFAULT = os.getenv("USE_TTS", "false").strip().lower() == "true"
 # ---------------------------
@@ -61,14 +78,48 @@ def get_textgen_pipeline():
         # Use a small default chat model for Spaces CPU; override via LLM_MODEL_ID
         _gen_pipe = pipeline(
             task="text-generation",
-            model=DEFAULT_CHAT_MODEL_ID,
-            tokenizer=DEFAULT_CHAT_MODEL_ID,
             device=_hf_device(),
             torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
         )
     return _gen_pipe
 # ---------------------------
 # Utilities
 # ---------------------------
@@ -531,6 +582,11 @@ def create_demo():
                 threshold = gr.Slider(0.5, 1.0, value=CONFIDENCE_THRESHOLD_DEFAULT, step=0.05, label="Confidence Threshold (stop when min ≥ τ)")
                 tts_enable = gr.Checkbox(label="Speak clinician responses (TTS)", value=USE_TTS_DEFAULT)
                 tts_audio = gr.Audio(label="Clinician voice", interactive=False)
         with gr.Row():
             audio = gr.Audio(sources=["microphone"], type="filepath", label="Speak your response (or use text)")
@@ -561,6 +617,18 @@ def create_demo():
         reset_btn.click(fn=reset_app, inputs=None, outputs=[chatbot, scores_state, meta_state, finished_state, turns_state])
     return demo
 demo = create_demo()

 )
 from gtts import gTTS
 import spaces
+import threading
 # ---------------------------
 CONFIDENCE_THRESHOLD_DEFAULT = float(os.getenv("CONFIDENCE_THRESHOLD", "0.8"))
 MAX_TURNS = int(os.getenv("MAX_TURNS", "12"))
 USE_TTS_DEFAULT = os.getenv("USE_TTS", "false").strip().lower() == "true"
+CONFIG_PATH = os.getenv("MODEL_CONFIG_PATH", "model_config.json")
+def _load_model_id_from_config() -> str:
+    try:
+        if os.path.exists(CONFIG_PATH):
+            with open(CONFIG_PATH, "r") as f:
+                data = json.load(f)
+                if isinstance(data, dict) and data.get("model_id"):
+                    return str(data["model_id"])
+    except Exception:
+        pass
+    return DEFAULT_CHAT_MODEL_ID
+current_model_id = _load_model_id_from_config()
 # ---------------------------
         # Use a small default chat model for Spaces CPU; override via LLM_MODEL_ID
         _gen_pipe = pipeline(
             task="text-generation",
+            model=current_model_id,
+            tokenizer=current_model_id,
             device=_hf_device(),
             torch_dtype=(torch.float16 if torch.cuda.is_available() else torch.float32),
         )
     return _gen_pipe
+def set_current_model_id(new_model_id: str) -> str:
+    global current_model_id, _gen_pipe
+    new_model_id = (new_model_id or "").strip()
+    if not new_model_id:
+        return "Model id is empty; keeping current model."
+    if new_model_id == current_model_id:
+        return f"Model unchanged: `{current_model_id}`"
+    current_model_id = new_model_id
+    _gen_pipe = None  # force reload on next use
+    return f"Model switched to `{current_model_id}` (pipeline will reload on next generation)."
+def persist_model_id(new_model_id: str) -> None:
+    try:
+        with open(CONFIG_PATH, "w") as f:
+            json.dump({"model_id": new_model_id}, f)
+    except Exception:
+        pass
+def apply_model_and_restart(new_model_id: str) -> str:
+    mid = (new_model_id or "").strip()
+    if not mid:
+        return "Model id is empty; not restarting."
+    persist_model_id(mid)
+    set_current_model_id(mid)
+    # Graceful delayed exit so response can flush
+    def _exit_later():
+        time.sleep(0.25)
+        os._exit(0)
+    threading.Thread(target=_exit_later, daemon=True).start()
+    return f"Restarting with model `{mid}`..."
 # ---------------------------
 # Utilities
 # ---------------------------
                 threshold = gr.Slider(0.5, 1.0, value=CONFIDENCE_THRESHOLD_DEFAULT, step=0.05, label="Confidence Threshold (stop when min ≥ τ)")
                 tts_enable = gr.Checkbox(label="Speak clinician responses (TTS)", value=USE_TTS_DEFAULT)
                 tts_audio = gr.Audio(label="Clinician voice", interactive=False)
+                model_id_tb = gr.Textbox(value=current_model_id, label="Chat Model ID", info="e.g., google/gemma-2-2b-it or google/medgemma-4b-it")
+                with gr.Row():
+                    apply_model_btn = gr.Button("Apply model (no restart)")
+                    apply_model_restart_btn = gr.Button("Apply model and restart")
+                model_status = gr.Markdown(value=f"Current model: `{current_model_id}`")
         with gr.Row():
             audio = gr.Audio(sources=["microphone"], type="filepath", label="Speak your response (or use text)")
         reset_btn.click(fn=reset_app, inputs=None, outputs=[chatbot, scores_state, meta_state, finished_state, turns_state])
+        # Model switch handlers
+        def _on_apply_model(mid: str):
+            msg = set_current_model_id(mid)
+            return f"Current model: `{current_model_id}`\n\n{msg}"
+        def _on_apply_model_restart(mid: str):
+            msg = apply_model_and_restart(mid)
+            return f"{msg}"
+        apply_model_btn.click(fn=_on_apply_model, inputs=[model_id_tb], outputs=[model_status])
+        apply_model_restart_btn.click(fn=_on_apply_model_restart, inputs=[model_id_tb], outputs=[model_status])
     return demo
 demo = create_demo()