# app/app.py import os, sys, time, csv, re from pathlib import Path import yaml import pandas as pd import numpy as np from dotenv import load_dotenv load_dotenv() ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") print("ANTHROPIC in env:", bool(os.getenv("ANTHROPIC_API_KEY")), flush=True) if not ANTHROPIC_API_KEY: raise SystemExit("Missing ANTHROPIC_API_KEY in environment or .env file.") # os.environ.setdefault("HF_HUB_DISABLE_TELEMETRY", "1") # os.environ.setdefault("TRANSFORMERS_OFFLINE", "1") # --- Resolve paths (works both as .py and PyInstaller .exe) APP_DIR = Path(__file__).resolve().parent # .../app if hasattr(sys, "_MEIPASS"): ROOT = Path(sys._MEIPASS) else: ROOT = APP_DIR.parent CFG_PATH = ROOT / "app" / "config" / "app.yaml" MODELS_DIR = ROOT / "models" INDEX_DIR = ROOT / "outputs" / "index" LOGS_DIR = ROOT / "local_logs" DOCS_DIR = ROOT / "docs" LOGS_DIR.mkdir(parents=True, exist_ok=True) # --- Read config DEFAULT_CFG = { "retrieval": {"top_k": 12, "evidence_shown": 3, "answerability_threshold": 0.2}, "generator": { "enabled_default": False, "use_top_evidence": 5, "temperature": 0.1, "max_answer_sentences": 20, "n_ctx": 4096, "threads": max(2, (os.cpu_count() or 4) - 1) }, "models": { "embedding_model": "sentence-transformers/all-MiniLM-L6-v2", "embedding_local_dir": None, "anthropic_model": "claude-3-5-sonnet-latest" }, "ui": {"show_online_badge": True, "performance_mode": "standard"} # quick|standard } cfg = DEFAULT_CFG if CFG_PATH.exists(): cfg = {**cfg, **yaml.safe_load(CFG_PATH.read_text(encoding="utf-8"))} # --- Load FAISS index & embeddings import faiss from sentence_transformers import SentenceTransformer META_PATH = INDEX_DIR / "meta.parquet" FAISS_PATH = INDEX_DIR / "chunks.faiss" if not META_PATH.exists() or not FAISS_PATH.exists(): raise SystemExit("Index not found. Ensure outputs/index/meta.parquet and chunks.faiss exist.") df = pd.read_parquet(META_PATH) df["text"] = df["text"].fillna("").astype(str) #loading sentence transformer emb_dir = cfg["models"].get("embedding_local_dir") if emb_dir: EMBED_MODEL_PATH = Path(emb_dir) if not EMBED_MODEL_PATH.exists(): raise SystemExit(f"Embedding model folder not found: {EMBED_MODEL_PATH}") embed_model = SentenceTransformer(str(EMBED_MODEL_PATH), trust_remote_code=False) else: embed_model = SentenceTransformer(cfg["models"]["embedding_model"], trust_remote_code=False) index = faiss.read_index(str(FAISS_PATH)) def _format_citation(row): p = int(row["page"]) if pd.notna(row.get("page")) else None return f"{row['title']} (p.{p})" if p else f"{row['title']}" def retrieve(query, top_k=6): qv = embed_model.encode([query], convert_to_numpy=True, normalize_embeddings=True).astype("float32") scores, idxs = index.search(qv, top_k) out = [] for s, ix in zip(scores[0], idxs[0]): r = df.iloc[int(ix)] out.append({ "score": float(s), "citation": _format_citation(r), "doc_id": r.get("doc_id", ""), "page": None if pd.isna(r.get("page")) else int(r["page"]), "chunk_id": int(r["chunk_id"]), "text": r["text"] }) return out # Anthropic (Claude) LLM from anthropic import Anthropic, APIError ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY") if not ANTHROPIC_API_KEY: raise SystemExit("Missing ANTHROPIC_API_KEY environment variable.") anthropic_client = Anthropic(api_key=ANTHROPIC_API_KEY) CLAUDE_MODEL = cfg["models"].get("anthropic_model", "claude-3-5-sonnet-latest") SYSTEM_PROMPT = ( "You are a careful assistant for clinicians. " "Use ONLY the provided context to answer. " "Be concise. Add inline citations like [1], [2] matching the numbered context. " "If the context does not fully answer, provide the best supported guidance you can, and point to the closest relevant passages with citations" ) def _citations_valid(text, k): nums = set(int(n) for n in re.findall(r"\[(\d+)\]", text)) return bool(nums) and all(1 <= n <= k for n in nums) def _join_cites(nums): nums = [f"[{n}]" for n in nums] if not nums: return "" if len(nums) == 1: return nums[0] return ", ".join(nums[:-1]) + " and " + nums[-1] def _make_context_block(ctx, use_n): blocks = [] for i, c in enumerate(ctx[:use_n], 1): blocks.append(f"[{i}] {c['citation']}\n{c['text']}\n") return "\n".join(blocks) def generate_answer(question, ctx, use_n, temp=0.1, max_sentences=6): context_text = _make_context_block(ctx, use_n) user_prompt = ( f"Context:\n\n{context_text}\n\n" f"Question: {question}\n" "Answer using ONLY the context above and cite with [1], [2], etc." ) try: resp = anthropic_client.messages.create( model=CLAUDE_MODEL, system=SYSTEM_PROMPT, max_tokens=600, temperature=float(temp), messages=[{"role": "user", "content": [{"type": "text", "text": user_prompt}]}], ) except APIError as e: return f"_API error from Anthropic: {e}_" # Claude returns a list of content blocks parts = [] for blk in resp.content: if blk.type == "text": parts.append(blk.text) full = ("\n".join(parts)).strip() # # Log raw model output to console for debugging # print("\n===== RAW MODEL OUTPUT =====\n", full, "\n============================\n", flush=True) # Trim to ~N sentences to keep it short for testers sents = re.split(r'(?<=[.!?])\s+', full) short = " ".join(sents[:max_sentences]).strip() return short # gradio import gradio as gr ONLINE_BADGE = "Standards of Practice & Code of Ethics" if cfg["ui"].get("show_online_badge", True) else "" def _top_sentences(text, n=3): sents = re.split(r'(?<=[.!?])\s+', text.strip()) return [s for s in sents if s][:n] def answer_extractive(query, k=6, per_chunk_sents=2): ctx = retrieve(query, top_k=k) bullets, refs = [], [] for i, c in enumerate(ctx, 1): for s in _top_sentences(c["text"], per_chunk_sents): bullets.append(f"- {s} [{i}]") refs.append(f"[{i}] {c['citation']}") if not bullets: return "I couldn’t find relevant text in the corpus.", refs return "\n".join(bullets) + "\n\nSources:\n" + "\n".join(refs), refs def app_infer(question, do_generate, mode): start = time.time() if not question or not question.strip(): return "", "", "", f"{ONLINE_BADGE} Ready." # Retrieval top_k = int(cfg["retrieval"]["top_k"]) shown = int(cfg["retrieval"]["evidence_shown"]) use_n = int(cfg["generator"]["use_top_evidence"]) if mode == "quick": shown = min(3, shown) use_n = min(3, use_n) ctx = retrieve(question, top_k=top_k) # Prepare evidence panel (currently hidden as shown == 0) if shown > 0: ev_md_lines = [] for i, c in enumerate(ctx[:shown], 1): title = c["citation"] pg = f" (p.{c['page']})" if c["page"] else "" body = c["text"].strip() body_short = body if len(body) <= 1200 else body[:1200] + "..." ev_md_lines.append(f"**[{i}] {title}**\n\n{body_short}\n") evidence_md = "\n---\n".join(ev_md_lines) else: evidence_md = "" # Decide if we should generate? answer = "" sources_md = "" conf = float(ctx[0]["score"]) if ctx else 0.0 threshold = float(cfg["retrieval"].get("answerability_threshold", 0.01)) if not ctx: status = f"{ONLINE_BADGE} No evidence found." return evidence_md, answer, sources_md, status if do_generate and conf >= threshold: draft = generate_answer( question=question, ctx=ctx, use_n=use_n, temp=float(cfg["generator"]["temperature"]), max_sentences=int(cfg["generator"]["max_answer_sentences"]) ) # Validate citations # if not _citations_valid(draft, min(use_n, len(ctx))): # answer = "_Not enough evidence to generate a reliable summary. See Evidence below._" # else: # answer = draft if not _citations_valid(draft, min(use_n, len(ctx))): extractive, _ = answer_extractive(question, k=use_n, per_chunk_sents=2) answer = extractive else: answer = draft elif do_generate and conf < threshold: answer = "_Not enough evidence—see Evidence below._" # Sources list src_lines = [f"[{i}] {c['citation']}" for i, c in enumerate(ctx[:use_n], 1)] sources_md = "Sources:\n" + "\n".join(src_lines) if answer: a = answer.strip() if not a.lower().startswith("answer:"): answer = f"Answer: {a}" dur = time.time() - start status = f"{ONLINE_BADGE} Done in {dur:.1f}s (conf={conf:.2f})." return evidence_md, answer, sources_md, status def save_feedback(question, rating, note, answer_shown): fpath = LOGS_DIR / "feedback.csv" new = not fpath.exists() with fpath.open("a", newline="", encoding="utf-8") as f: w = csv.writer(f) if new: w.writerow(["timestamp","question","rating","note","answer_shown"]) w.writerow([time.strftime("%Y-%m-%d %H:%M:%S"), question, rating, note, "yes" if answer_shown else "no"]) return "Feedback saved. Thank you!" APP_CSS = """ :root{ --app-font: system-ui, -apple-system, "Segoe UI", Roboto, Helvetica, Arial, "Apple Color Emoji","Segoe UI Emoji"; } body, .gradio-container { font-family: var(--app-font) !important; } /* make reading nicer */ .gr-markdown { font-size: 16px; line-height: 1.6; } .gr-markdown h2 { font-size: 18px; margin-top: 0.6rem; } .gr-textbox textarea { font-size: 16px; } """ with gr.Blocks(title="Clinician Q&A", theme="soft", css=APP_CSS) as demo: gr.Markdown(f"## Clinician Q&A {'  '+ONLINE_BADGE if ONLINE_BADGE else ''}") with gr.Row(): with gr.Column(scale=1): q = gr.Textbox(label="Ask a question", placeholder="e.g., When can confidentiality be broken?") do_gen = gr.Checkbox(value=cfg["generator"]["enabled_default"], label="Use LLM") mode = gr.Radio(choices=["standard","quick"], value=cfg["ui"].get("performance_mode","standard"), label="Performance mode") run = gr.Button("Answer", variant="primary") rating = gr.Radio(choices=["Helpful","Not sure","Incorrect"], label="Feedback", value=None) note = gr.Textbox(label="Add a note (optional)") submit = gr.Button("Submit feedback") status = gr.Markdown("Ready.") with gr.Column(scale=1): ans = gr.Markdown(label="Answer") ev = gr.Markdown(label="Evidence") src = gr.Markdown(label="Sources") run.click(app_infer, inputs=[q, do_gen, mode], outputs=[ ans,ev,src, status]) submit.click(lambda question, r, n, a: save_feedback(question, r, n, bool(a and a.strip())), inputs=[q, rating, note, ans], outputs=[status]) # if __name__ == "__main__": # # Bind to localhost only; opens a browser tab automatically. # demo.launch(server_name="127.0.0.1", server_port=7860, inbrowser=True, show_error=True) if __name__ == "__main__": # In cloud (HF Spaces), bind to 0.0.0.0 and respect PORT if provided. port = int(os.getenv("PORT", "7860")) host = "0.0.0.0" demo.queue(max_size=32).launch(server_name=host, server_port=port, show_error=True)