Spaces:

akisg
/

care-notes

Sleeping

App Files Files Community

Akis Giannoukos commited on 27 days ago

Commit

9325a21

1 Parent(s): 44521ed

Using gemma-2 model

Browse files

Files changed (1) hide show

app.py +27 -16

app.py CHANGED Viewed

@@ -25,7 +25,7 @@ import spaces
 # ---------------------------
 # Configuration
 # ---------------------------
-DEFAULT_CHAT_MODEL_ID = os.getenv("LLM_MODEL_ID", "TinyLlama/TinyLlama-1.1B-Chat-v1.0")
 DEFAULT_ASR_MODEL_ID = os.getenv("ASR_MODEL_ID", "openai/whisper-tiny.en")
 CONFIDENCE_THRESHOLD_DEFAULT = float(os.getenv("CONFIDENCE_THRESHOLD", "0.8"))
 MAX_TURNS = int(os.getenv("MAX_TURNS", "12"))
@@ -191,18 +191,21 @@ def generate_recording_agent_reply(chat_history: List[Tuple[str, str]]) -> str:
         "\n\nRespond with a single short clinician-style question for the patient."
     )
     pipe = get_textgen_pipeline()
-    out = pipe(
-        f"<|system|>\n{system_prompt}\n<|user|>\n{user_prompt}\n<|assistant|>",
-        max_new_tokens=128,
         temperature=0.7,
         do_sample=True,
-        pad_token_id=pipe.tokenizer.eos_token_id,
-    )[0]["generated_text"]
-    # Extract assistant content after the last assistant tag if present
-    reply = out.split("<|assistant|>")[-1].strip()
-    # Post-process to avoid trailing special tokens
-    reply = re.split(r"</s>|<\|endoftext\|>", reply)[0].strip()
     # Ensure it's a single concise question/sentence
     if len(reply) > 300:
         reply = reply[:300].rstrip() + "…"
@@ -227,14 +230,22 @@ def scoring_agent_infer(chat_history: List[Tuple[str, str]], features: Dict[str,
         "Set High_Risk=true if any suicidal ideation or risk is present. Return ONLY JSON, no prose."
     )
     pipe = get_textgen_pipeline()
-    out = pipe(
-        f"<|system|>\n{system_prompt}\n<|user|>\n{user_prompt}\n<|assistant|>",
         max_new_tokens=256,
         temperature=0.2,
         do_sample=True,
-        pad_token_id=pipe.tokenizer.eos_token_id,
-    )[0]["generated_text"]
-    parsed = safe_json_extract(out)
     # Validate and coerce
     if parsed is None or "PHQ9_Scores" not in parsed:

 # ---------------------------
 # Configuration
 # ---------------------------
+DEFAULT_CHAT_MODEL_ID = os.getenv("LLM_MODEL_ID", "google/gemma-2-2b-it")
 DEFAULT_ASR_MODEL_ID = os.getenv("ASR_MODEL_ID", "openai/whisper-tiny.en")
 CONFIDENCE_THRESHOLD_DEFAULT = float(os.getenv("CONFIDENCE_THRESHOLD", "0.8"))
 MAX_TURNS = int(os.getenv("MAX_TURNS", "12"))
         "\n\nRespond with a single short clinician-style question for the patient."
     )
     pipe = get_textgen_pipeline()
+    tokenizer = pipe.tokenizer
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    gen = pipe(
+        prompt,
+        max_new_tokens=96,
         temperature=0.7,
         do_sample=True,
+        pad_token_id=tokenizer.eos_token_id,
+        return_full_text=False,
+    )
+    reply = gen[0]["generated_text"].strip()
     # Ensure it's a single concise question/sentence
     if len(reply) > 300:
         reply = reply[:300].rstrip() + "…"
         "Set High_Risk=true if any suicidal ideation or risk is present. Return ONLY JSON, no prose."
     )
     pipe = get_textgen_pipeline()
+    tokenizer = pipe.tokenizer
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt},
+    ]
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    gen = pipe(
+        prompt,
         max_new_tokens=256,
         temperature=0.2,
         do_sample=True,
+        pad_token_id=tokenizer.eos_token_id,
+        return_full_text=False,
+    )
+    out_text = gen[0]["generated_text"]
+    parsed = safe_json_extract(out_text)
     # Validate and coerce
     if parsed is None or "PHQ9_Scores" not in parsed: