Spaces:

Southisuk
/

RDB_chatbot

Sleeping

App Files Files Community

Southisuk commited on Aug 20

Commit

ceda26c

verified ·

1 Parent(s): c13df45

Update app.py

Browse files

Files changed (1) hide show

app.py +531 -68

app.py CHANGED Viewed

@@ -1,75 +1,538 @@
-import json
-import gradio as gr
-from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import HuggingFaceEmbeddings
-from langchain.chains import RetrievalQA
-from langchain_community.llms import HuggingFacePipeline
-from transformers import pipeline
-# -----------------------------
-# 1) Load dataset + build vector DB
-# -----------------------------
-with open("nbb_merged_full.json", "r", encoding="utf-8") as f:
-    data = json.load(f)
-texts = []
-if isinstance(data, list):
-    for item in data:
-        if isinstance(item, dict) and "text" in item:
-            texts.append(item["text"])
-        elif isinstance(item, str):
-            texts.append(item)
-elif isinstance(data, dict):
-    if "text" in data:
-        texts.append(data["text"])
     else:
-        texts.extend([str(v) for v in data.values()])
-print(f"✅ Loaded {len(texts)} documents from dataset")
-# Embedding model
-embeddings = HuggingFaceEmbeddings(
-    model_name="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
 )
-# Build FAISS DB
-db = FAISS.from_texts(texts, embeddings)
-# -----------------------------
-# 2) Load LLM (lightweight)
-# -----------------------------
-# flan-t5-base = เบา / multilingual MiniLM = รองรับหลายภาษา
-model_name = "google/flan-t5-base"
-pipe = pipeline("text2text-generation", model=model_name, device=-1, max_new_tokens=256)
-llm = HuggingFacePipeline(pipeline=pipe)
-# -----------------------------
-# 3) QA Chain (RAG)
-# -----------------------------
-retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})
-qa = RetrievalQA.from_chain_type(
-    llm=llm,
-    retriever=retriever,
-    chain_type="stuff"
 )
-# -----------------------------
-# 4) Gradio UI
-# -----------------------------
-def chatbot(message, history):
-    if not message.strip():
-        return "⚠️ ກະລຸນາພິມຄຳຖາມ"
-    result = qa.run(message)
-    return result
-with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("<h1 style='text-align: center; color: green;'>🌾 Lao Chatbot (RAG)</h1>")
-    chatbot_ui = gr.Chatbot(height=500)
-    msg = gr.Textbox(placeholder="ພິມຄຳຖາມທີ່ນີ້...", label="Input")
-    clear_btn = gr.Button("🧹 Clear Chat")
-    msg.submit(fn=chatbot, inputs=[msg, chatbot_ui], outputs=chatbot_ui)
-    clear_btn.click(lambda: None, None, chatbot_ui, queue=False)
-if __name__ == "__main__":
-    demo.launch()

+import json, os, re
+import numpy as np
+from scipy.sparse import hstack, csr_matrix
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from google.colab import files
+# === Cell 1: Load & Normalize new merged dataset (supports content + csv_row) ===
+import json, os
+from google.colab import files
+# คุณสามารถชี้ไปยังไฟล์รวมใหม่ได้เลย เช่น:
+PREFERRED_PATHS = [
+    "/content/nbb_merged_full.json",   # ถ้ารันใน Colab หลังอัปโหลดไฟล์นี้
+    "/content/intents_dataset_v1_lo.json",  # เผื่อยังใช้ไฟล์เก่า
+    "/mnt/data/nbb_merged_full.json",  # เผื่อรันบนเครื่อง/สภาพแวดล้อมอื่น
+]
+DATASET_PATH = None
+for p in PREFERRED_PATHS:
+    if os.path.exists(p):
+        DATASET_PATH = p
+        break
+if DATASET_PATH is None:
+    print("กรุณาอัปโหลดไฟล์ dataset (.json) ที่รวมของใหม่ (เช่น nbb_merged_full.json)")
+    uploaded = files.upload()
+    assert uploaded, "ไม่ได้อัปโหลดไฟล์"
+    fname = list(uploaded.keys())[0]
+    DATASET_PATH = f"/content/{fname}"
+    os.rename(fname, DATASET_PATH)
+with open(DATASET_PATH, "r", encoding="utf-8") as f:
+    RAW_DATA = json.load(f)
+def _get(d, *chain, default=""):
+    x = d
+    for k in chain:
+        if not isinstance(x, dict) or k not in x:
+            return default
+        x = x[k]
+    return x
+def normalize_record(d: dict) -> dict:
+    """
+    คืนค่า document สกีมาเดียว:
+    {
+      id, section, title,
+      content: { lo: "...", en_summary: "..." },
+      keywords: [...],
+      score_boost: float
+    }
+    """
+    base_id = d.get("id", None) or f"auto_{hash(json.dumps(d, ensure_ascii=False)) & 0xffffffff}"
+    section = d.get("section", d.get("source_type", ""))
+    title = d.get("title") or _get(d, "data", "title") or _get(d, "data", "topic") or base_id
+    # กรณีเดิม: มี content.lo อยู่แล้ว
+    lo_text = _get(d, "content", "lo", default="").strip()
+    en_sum  = _get(d, "content", "en_summary", default="").strip()
+    # กรณี CSV แถวใหม่: ใช้ answer เป็น content.lo, และดัน question เข้าไปใน keywords ช่วยค้น
+    if not lo_text and ("data" in d or d.get("source_type") == "csv_row"):
+        data = d.get("data", {})
+        ans = str(data.get("answer", "") or "").strip()
+        que = str(data.get("question", "") or "").strip()
+        top = str(data.get("topic", "") or "").strip()
+        lo_text = ans
+        # เติมสรุปอังกฤษถ้ามีอยู่เดิม
+        if not en_sum and isinstance(_get(d, "content"), dict):
+            en_sum = _get(d, "content", "en_summary", default="")
+        # รวบรวม keywords จาก topic/section/question สั้น ๆ
+        kws = []
+        if top: kws.append(top)
+        if section: kws.append(section)
+        if que: kws.append(que[:120])
+        keywords = list(dict.fromkeys((d.get("keywords") or []) + kws))
     else:
+        keywords = d.get("keywords") or []
+    score_boost = float(d.get("score_boost", 1.0))
+    return {
+        "id": base_id,
+        "section": section,
+        "title": title,
+        "content": {
+            "lo": lo_text,
+            "en_summary": en_sum
+        },
+        "keywords": keywords,
+        "score_boost": score_boost,
+        "_raw": d  # เก็บต้นฉบับไว้ตรวจสอบ/อ้างอิง
+    }
+# รวมทุกเรคคอร์ด แล้วคัดเฉพาะที่มีเนื้อหาให้สร้างดัชนีได้
+DOCS = []
+for d in RAW_DATA:
+    try:
+        nd = normalize_record(d)
+        if (nd.get("content", {}) or {}).get("lo", "").strip():
+            DOCS.append(nd)
+    except Exception as e:
+        # ข้ามเรคคอร์ดที่เสีย
+        pass
+assert DOCS, "ไม่พบเอกสารที่มี content.lo หลัง normalize — กรุณาตรวจไฟล์ dataset"
+print(f"[OK] Loaded & normalized {len(DOCS)} docs from: {DATASET_PATH}")
+# === Cell 2: Build index text from unified schema (content + csv_row) ===
+import re
+ZWSP = "\u200b"
+def normalize_lo(text: str) -> str:
+    if not text: return ""
+    t = text.replace(ZWSP, " ")
+    t = re.sub(r"\s+", " ", t).strip()
+    return t
+def build_index_text(doc: dict) -> str:
+    title = normalize_lo(doc.get("title", ""))
+    lo    = normalize_lo(doc.get("content", {}).get("lo", ""))
+    en    = normalize_lo(doc.get("content", {}).get("en_summary", ""))
+    kws   = ", ".join(doc.get("keywords", []) or [])
+    sec   = normalize_lo(doc.get("section", ""))
+    # เพิ่ม section และ keywords เพื่อช่วยค้น
+    # NOTE: ถ้ามีคำถามจาก CSV เราได้ยัดไว้ใน keywords ไปแล้วบางส่วน
+    return "\n".join([t for t in [title, lo, en, sec, kws] if t]).strip()
+CORPUS   = [build_index_text(d) for d in DOCS]
+IDS      = [d["id"] for d in DOCS]
+SECTIONS = [d.get("section", "") for d in DOCS]
+BOOSTS   = [float(d.get("score_boost", 1.0)) for d in DOCS]
+ID2DOC   = {d["id"]: d for d in DOCS}
+print(f"[OK] Built corpus of {len(CORPUS)} items.")
+word_vec = TfidfVectorizer(
+    analyzer="word",
+    ngram_range=(1,2),           # 1-2 คำ พอ ไม่หนักไป
+    min_df=1, max_df=0.95,
+    sublinear_tf=True
 )
+char_vec = TfidfVectorizer(
+    analyzer="char_wb",          # สร้าง n-gram ในกรอบคำ (กันสัญลักษณ์รบกวน)
+    ngram_range=(3,5),
+    min_df=1, max_df=0.98,
+    sublinear_tf=True
+)
+Xw = word_vec.fit_transform(CORPUS)
+Xc = char_vec.fit_transform(CORPUS)
+X = hstack([Xw, Xc]).tocsr()
+TOP_K = 20
+FINAL_TOP_N = 3
+MIN_CONF = 0.12  # TF-IDF scale จะเล็กกว่า embedding; ตั้ง 0.1-0.2 เป็นเกตเริ่มต้น
+# Placeholder function for keyword_intent_hint
+def keyword_intent_hint(q: str) -> list:
+    """
+    Placeholder function for keyword_intent_hint.
+    Replace with actual implementation if needed.
+    """
+    return []
+SECTION_WEIGHTS = {} # Add a placeholder for SECTION_WEIGHTS if it's not defined elsewhere
+def vectorize_query(q: str) -> csr_matrix:
+    qn = normalize_lo(q)
+    qw = word_vec.transform([qn])
+    qc = char_vec.transform([qn])
+    return hstack([qw, qc]).tocsr()
+def search(q: str, k: int = TOP_K):
+    qv = vectorize_query(q)
+    sims = cosine_similarity(qv, X)[0]  # shape = (N,)
+    # จัดอันดับ
+    idxs = np.argsort(-sims)[:k]
+    hits = []
+    hints = keyword_intent_hint(q)
+    for ix in idxs:
+        base = float(sims[ix])
+        sec = SECTIONS[ix]
+        boost = BOOSTS[ix]
+        # section weights
+        if sec in SECTION_WEIGHTS:
+            boost *= SECTION_WEIGHTS[sec]
+        # keyword hints
+        if sec in hints:
+            boost *= 1.10
+        final = base * boost
+        hits.append({
+            "id": IDS[ix],
+            "score": base,
+            "final_score": final,
+            "section": sec
+        })
+    # เรียงตาม final_score
+    hits.sort(key=lambda h: h["final_score"], reverse=True)
+    return hits
+def answer_template_only(q: str):
+    hits = search(q, k=TOP_K)
+    if not hits or hits[0]["score"] < MIN_CONF:
+        return "ຂໍອະໄພ ບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້.", []
+    chunks, cits = [], []
+    for h in hits[:FINAL_TOP_N]:
+        d = ID2DOC[h["id"]]
+        title = d.get("title", d["id"])
+        lo = d.get("content",{}).get("lo","")
+        chunks.append(f"• {title}\n{lo}")
+        cits.append(h["id"])
+    return "\n\n".join(chunks), cits
+# ============================================================
+# LLM-Guarded RAG (เสริมพลังจากโหมดที่ 1) — ติดตั้งเพิ่มน้อยสุด
+# ใช้ llama-cpp-python + โมเดล GGUF เบาๆ (Qwen2.5-3B หรือ Llama 3.2 3B, 4-bit)
+# ============================================================
+import os, json, re, time
+from google.colab import files
+# -------- 1) ติดตั้ง llama-cpp-python (ตัวเดียวพอ) --------
+try:
+    import llama_cpp
+except Exception:
+    # ติดตั้งเฉพาะเมื่อยังไม่มี (เวอร์ชันเสถียรกับ Py311/Colab)
+    !pip -q install llama-cpp-python==0.2.90
+    import llama_cpp
+from llama_cpp import Llama
+# -------- 2) เตรียมโมเดล GGUF --------
+# เลือกอย่างใดอย่างหนึ่ง:
+# (A) ให้ระบบพยายามดาวน์โหลดจาก Hugging Face (ต้องมีเน็ต)
+# (B) ถ้าไม่อยากดาวน์โหลด: อัปโหลดไฟล์ .gguf เอง แล้วตั้งชื่อ local-llm.gguf
+MODEL_PATH = "/content/local-llm.gguf"
+def ensure_model():
+    if os.path.exists(MODEL_PATH):
+        return True
+    print("ยังไม่มีโมเดล .gguf → เลือกวิธีใดวิธีหนึ่ง:")
+    print("  1) อัปโหลดไฟล์ .gguf ด้วยตนเอง (แนะนำ Q4_K_M ~ 2GB) แล้วตั้งชื่อ local-llm.gguf")
+    print("  2) หรือให้ช่วยดาวน์โหลด (ต้องใช้เน็ต): Qwen2.5-3B-Instruct Q4_K_M")
+    choice = input("พิมพ์ 1 (upload) / 2 (download): ").strip()
+    if choice == "1":
+        uploaded = files.upload()
+        assert uploaded, "ไม่ได้อัปโหลดไฟล์"
+        fname = list(uploaded.keys())[0]
+        os.rename(fname, MODEL_PATH)
+        print("อัปโหลดแล้ว:", MODEL_PATH)
+        return True
+    else:
+        try:
+            from huggingface_hub import hf_hub_download
+        except Exception:
+            # ติดตั้งเฉพาะเมื่อจำเป็น
+            !pip -q install huggingface_hub==0.25.2
+            from huggingface_hub import hf_hub_download
+        REPO_ID = "Qwen/Qwen2.5-3B-Instruct-GGUF"
+        FNAME   = "qwen2.5-3b-instruct-q4_k_m.gguf"
+        try:
+            p = hf_hub_download(repo_id=REPO_ID, filename=FNAME, local_dir="/content", local_dir_use_symlinks=False)
+            os.rename(p, MODEL_PATH)
+            print("ดาวน์โหลดสำเร็จ:", MODEL_PATH)
+            return True
+        except Exception as e:
+            print("ดาวน์โหลดไม่สำเร็จ:", e)
+            print("โปรดอัปโหลดไฟล์ .gguf เอง แล้วตั้งชื่อ local-llm.gguf")
+            return False
+ok = ensure_model()
+assert ok and os.path.exists(MODEL_PATH), "ยังไม่มีโมเดล .gguf ให้ใช้งาน"
+# -------- 3) โหลดโมเดลด้วย llama.cpp + อุ่นเครื่อง --------
+from llama_cpp import Llama
+# ปรับค่าตามเครื่อง:
+# - ถ้า Colab (T4): n_gpu_layers=128, n_batch=512
+# - ถ้า GTX1650 4GB: n_gpu_layers=24~32, n_batch=256 (ถ้า OOM ให้ลดลง หรือตั้ง 0 = CPU)
+LLM = Llama(
+    model_path=MODEL_PATH,
+    n_ctx=2048,        # พอสำหรับบริบท 1 ชิ้น + คำตอบ
+    n_threads=8,
+    n_gpu_layers=128,  # <-- GTX1650 ให้ใช้ 24~32 แทน
+    n_batch=512,       # <-- GTX1650 ใช้ 256
+    logits_all=False,
+    verbose=False
 )
+print("✅ LLM loaded:", MODEL_PATH)
+# อุ่นเครื่องรอบแรก ลดดีเลย์ในการตอบครั้งถัดไป
+try:
+    _ = LLM("Warmup", max_tokens=1)
+    print("🔥 Warmup done")
+except Exception as e:
+    print("⚠️ Warmup skipped:", e)
+# =========================
+# BEST: Guarded RAG + Auto-Judge + Router + Logging (รองรับ llama.cpp ของคุณ)
+# ต้องมีตัวแปรก่อนหน้า: LLM, search(query,k) -> hits, ID2DOC (dict), และ (ถ้ามี) answer_template_only()
+# =========================
+import os, re, json, time
+from datetime import datetime
+# ---------- CONFIG ----------
+#TOP_K = 5
+#CHUNK_LIMIT = 250
+#MAX_TOKENS = 64
+TOP_K        = 10           # ค้นเอกสารเบื้องต้น
+FINAL_TOP_N  = 1            # ส่งเข้า LLM แค่ 1 ชิ้น (เร็วและนิ่ง)
+MIN_CONF     = 0.14         # เกณฑ์ความเชื่อมั่นของ retrieval (TF-IDF)
+CHUNK_LIMIT  = 360          # ตัดความยาว context/ชิ้น
+MAX_TOKENS   = 96           # จำกัดความย���วคำตอบ
+TEMP         = 0.2
+QUALITY_LOG  = "/content/quality_feedback.jsonl"
+# ---------- SYSTEM RULES ----------
+SYSTEM_RULES = """
+You are a Lao banking assistant for NAYOBY BANK (NBB).
+HARD RULES (do not break):
+1) Answer ONLY from the provided Context. Do NOT use outside knowledge or make assumptions.
+2) If the answer is not clearly in the Context, reply in Lao: "ຂໍອະໄພ ຂ້ອຍບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
+3) Cite the evidence ids at the end in square brackets (1–3 ids).
+4) Default reply in Lao; if the whole user question is Thai/English, reply with that language; keep product terms exactly as in Context.
+5) Never invent numbers, dates, fees, branches, or contacts beyond the Context.
+STYLE:
+- Concise (≤ 100 Lao words). Direct answer first, bullets if needed.
+- Keep terminology exactly as in Context.
+FORMAT:
+- End the last line with citations like: [id_a, id_b]
+"""
+# ---------- PROMPT BUILDER ----------
+def _build_context(hits, n=FINAL_TOP_N, limit=CHUNK_LIMIT):
+    parts, used = [], []
+    for h in hits[:n]:
+        d = ID2DOC[h["id"]]
+        title = d.get("title", h["id"])
+        lo = (d.get("content", {}).get("lo", "") or "")[:limit]
+        parts.append(f"[{h['id']}] {title}\n{lo}")
+        used.append(h["id"])
+    return "\n\n".join(parts), used
+def _build_prompt(query, hits):
+    ctx, _ = _build_context(hits)
+    return (
+        f"{SYSTEM_RULES}\n\n"
+        f"### Context:\n{ctx}\n\n"
+        f"### Question:\n{query}\n\n"
+        "### Answer:\n"
+    )
+# ---------- LLM ANSWER (Guarded) ----------
+def llm_guarded_answer_best(query: str):
+    hits = search(query, k=TOP_K)
+    if not hits or hits[0]["score"] < MIN_CONF:
+        return "ຂໍອະໄພ ບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້.", [], hits
+    prompt = _build_prompt(query, hits)
+    # warmup ลดดีเลย์ครั้งแรก
+    try: _ = LLM("Warmup", max_tokens=1)
+    except: pass
+    out = LLM(
+        prompt,
+        max_tokens=MAX_TOKENS,
+        temperature=TEMP,
+        top_p=0.9,
+        repeat_penalty=1.1,
+        stop=["</s>", "### Question:", "### Context:"]
+    )
+    text = out["choices"][0]["text"].strip()
+    cites = [h["id"] for h in hits[:FINAL_TOP_N]]
+    return text, cites, hits
+# ---------- TEMPLATE FALLBACK (ถ้าไม่มีให้ใช้เวอร์ชันย่อ) ----------
+def _template_only_from_hits(hits):
+    if not hits:
+        return "ຂໍອະໄພ ບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້.", []
+    d = ID2DOC[hits[0]["id"]]
+    lo = d.get("content", {}).get("lo", "") or ""
+    return lo, [d["id"]]
+# ---------- HEURISTICS (กันพลาดเร็ว) ----------
+def _tok(s): return re.findall(r"[\w\-\.%]+", s.lower(), flags=re.U)
+def _numbers(s): return re.findall(r"\d+(?:[.,]\d+)?", s)
+def heuristic_label(query, answer, ctx_text, hits, citations):
+    verdict, reasons = None, []
+    max_sim = hits[0]["score"] if hits else 0.0
+    avg_top3 = sum([h["score"] for h in hits[:3]])/max(1,len(hits[:3]))
+    if not citations:
+        return "INCORRECT", "no citations"
+    if max_sim < MIN_CONF:
+        return "INCORRECT", f"low sim {max_sim:.2f}"
+    # overlap ของคำในคำตอบที่อยู่ใน context
+    ans_t = set(_tok(answer)); ctx_t = set(_tok(ctx_text))
+    overlap = len(ans_t & ctx_t) / max(1, len(ans_t))
+    if overlap < 0.25:
+        verdict, reasons = "ALMOST", [f"low overlap {overlap:.2f}"]
+    # ตัวเลขที่โผล่ในคำตอบแต่ไม่มีใน context
+    ans_nums = set(_numbers(answer))
+    ctx_nums = set(_numbers(ctx_text))
+    invented = ans_nums - ctx_nums
+    if invented:
+        # ถ้าตัวเลขเยอะและไม่อยู่ใน context ให้ลดเป็น INCORRECT
+        return "INCORRECT", f"invented numbers: {sorted(invented)}"
+    if verdict is None:
+        verdict, reasons = "CORRECT", [f"sim {max_sim:.2f}, overlap {overlap:.2f}"]
+    return verdict, "; ".join(reasons)
+# ---------- LLM-AS-A-JUDGE (ใช้โมเดลของคุณ) ----------
+JUDGE_PROMPT = """
+You are a strict evaluator for a Lao banking RAG system.
+Decide if the Answer is CORRECT, ALMOST, or INCORRECT based ONLY on the Context and the Question.
+- CORRECT: fully supported by Context; no invented facts; answers the question.
+- ALMOST: mostly supported but missing a key detail or minor phrasing errors.
+- INCORRECT: unsupported/contradicted/invented/wrong numbers/off-topic.
+Return pure JSON: {"verdict":"CORRECT|ALMOST|INCORRECT","reason":"<=25 Lao words"}
+"""
+def judge_with_llm_same_model(question, ctx_text, answer):
+    prompt = (
+        f"{JUDGE_PROMPT}\n\n"
+        f"### Context:\n{ctx_text}\n\n"
+        f"### Question:\n{question}\n\n"
+        f"### Answer:\n{answer}\n\n"
+        "### Your JSON:\n"
+    )
+    res = LLM(prompt, max_tokens=96, temperature=0.0, stop=["</s>", "###"])
+    raw = res["choices"][0]["text"].strip()
+    # ดึง JSON ออกมาแบบกันพลาด
+    m = re.search(r"\{.*\}", raw, re.S)
+    try:
+        return json.loads(m.group(0) if m else raw)
+    except Exception:
+        return {"verdict":"INCORRECT","reason":"judge parsing failed"}
+# ---------- ROUTER + LOGGING ----------
+def _build_ctx_text(hits):
+    ctx, used = _build_context(hits, n=FINAL_TOP_N, limit=CHUNK_LIMIT)
+    return ctx, used
+def log_quality(record: dict):
+    with open(QUALITY_LOG, "a", encoding="utf-8") as f:
+        f.write(json.dumps(record, ensure_ascii=False) + "\n")
+def smart_answer(query: str, use_judge=True, allow_template_fallback=True):
+    # 1) ตอบด้วย LLM-Guarded
+    ans, cites, hits = llm_guarded_answer_best(query)
+    # 2) แปลง context ที่ใช้จริง
+    ctx_text, used_ids = _build_ctx_text(hits)
+    # 3) Heuristics
+    h_verdict, h_reason = heuristic_label(query, ans, ctx_text, hits, cites)
+    # 4) LLM Judge (สั้นและเร็ว)
+    j_verdict, j_reason = None, None
+    if use_judge:
+        j = judge_with_llm_same_model(query, ctx_text, ans)
+        j_verdict = (j.get("verdict") or "").upper()
+        j_reason  = j.get("reason","").strip()
+    # 5) รวมคำตัดสิน (เข้มงวด = เอา “แย่กว่า”)
+    order = {"INCORRECT":0, "ALMOST":1, "CORRECT":2}
+    final_v = h_verdict
+    final_r = f"Heur:{h_reason}"
+    if j_verdict in order and order[j_verdict] < order[final_v]:
+        final_v = j_verdict
+        final_r = f"Judge:{j_reason} | Heur:{h_reason}"
+    # 6) ถ้าแย่ → fallback เป็น Template-only (ถ้าต้องการ)
+    if allow_template_fallback and final_v in ("INCORRECT","ALMOST"):
+        try:
+            t_ans, t_cites = answer_template_only(query)  # ถ้ามีฟังก์ชันของคุณอยู่แล้ว
+        except NameError:
+            t_ans, t_cites = _template_only_from_hits(hits)
+        ans = t_ans
+        cites = t_cites
+        final_v = "CORRECT"  # แหล่งอ้างอิงตรงจากฐานความรู้ (ไม่แต่ง)
+    # 7) บันทึกล็อกเพื่อปรับปรุงภายหลัง
+    rec = {
+        "ts": datetime.utcnow().isoformat(),
+        "query": query,
+        "answer": ans,
+        "citations": cites,
+        "final_verdict": final_v,
+        "final_reason": final_r,
+        "heur_verdict": h_verdict, "heur_reason": h_reason,
+        "judge_verdict": j_verdict, "judge_reason": j_reason,
+        "top_sim": hits[0]["score"] if hits else 0.0,
+        "used_ids": used_ids
+    }
+    os.makedirs(os.path.dirname(QUALITY_LOG), exist_ok=True)
+    log_quality(rec)
+    # 8) ส่งผลกลับ
+    return ans, cites, final_v, final_r
+# ---------- ตัวอย่างเรียกใช้งาน ----------
+# ans, cites, verdict, reason = smart_answer("ອັດຕາດອກເບ້ຍ ໄລຍະສັ້ນ ເທົ່າໃດ?")
+# print(ans, cites, verdict, reason)
+import gradio as gr
+def gradio_smart(q):
+    try:
+        ans, cites, verdict, reason = smart_answer(q, use_judge=True, allow_template_fallback=True)
+        cite_str = ", ".join(cites) if cites else "-"
+        return f"{ans}\n\nອ້າງອີງ: {cite_str}\nຜົນປະເມີນ: {verdict} — {reason}"
+    except Exception as e:
+        return f"⚠️ Error: {e}"
+with gr.Blocks(title="NBB RAG — Smart (Guarded + Judge + Router)") as demo:
+    gr.Markdown("### ພິມຄຳຖາມ → ລະບົບຈະສະຫຼຸບ Context")
+    q = gr.Textbox(label="ຄຳຖາມ", lines=2)
+    btn = gr.Button("ຖາມ")
+    out = gr.Textbox(label="ຄຳຕອບ", lines=18)
+    btn.click(fn=gradio_smart, inputs=q, outputs=out)
+demo.launch()