Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
-
import
|
| 2 |
import numpy as np
|
| 3 |
-
from scipy.sparse import hstack
|
| 4 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 5 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 6 |
import gradio as gr
|
|
|
|
| 7 |
from llama_cpp import Llama
|
| 8 |
|
| 9 |
# -------------------- Load Dataset --------------------
|
|
@@ -31,23 +31,20 @@ CORPUS = [d["content"]["lo"] for d in DOCS]
|
|
| 31 |
IDS = [d["id"] for d in DOCS]
|
| 32 |
ID2DOC = {d["id"]: d for d in DOCS}
|
| 33 |
|
| 34 |
-
|
| 35 |
-
X =
|
| 36 |
|
| 37 |
# -------------------- Search --------------------
|
| 38 |
-
def search(
|
| 39 |
-
qv =
|
| 40 |
sims = cosine_similarity(qv, X)[0]
|
| 41 |
idxs = np.argsort(-sims)[:k]
|
| 42 |
return [{"id": IDS[i], "score": sims[i]} for i in idxs]
|
| 43 |
|
| 44 |
-
# -------------------- Load
|
| 45 |
-
from huggingface_hub import hf_hub_download
|
| 46 |
-
from llama_cpp import Llama
|
| 47 |
-
|
| 48 |
MODEL_PATH = hf_hub_download(
|
| 49 |
-
repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
|
| 50 |
-
filename="
|
| 51 |
)
|
| 52 |
|
| 53 |
LLM = Llama(
|
|
@@ -57,32 +54,36 @@ LLM = Llama(
|
|
| 57 |
n_gpu_layers=32
|
| 58 |
)
|
| 59 |
|
| 60 |
-
|
| 61 |
SYSTEM_RULES = """
|
| 62 |
You are a Lao banking assistant for NAYOBY BANK (NBB).
|
| 63 |
Answer ONLY from Context. If not found, reply:
|
| 64 |
"ຂໍອະໄພ ຂ້ອຍບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
|
| 65 |
"""
|
| 66 |
|
| 67 |
-
def build_prompt(
|
| 68 |
ctx = "\n\n".join([f"[{h['id']}] {ID2DOC[h['id']]['content']['lo']}" for h in hits])
|
| 69 |
-
return f"{SYSTEM_RULES}\n\nContext:\n{ctx}\n\nQuestion:\n{
|
| 70 |
|
| 71 |
-
def smart_answer(
|
| 72 |
-
hits = search(
|
| 73 |
if not hits or hits[0]["score"] < 0.1:
|
| 74 |
return "ຂໍອະໄພ ບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
|
| 75 |
-
prompt = build_prompt(
|
| 76 |
-
out = LLM(prompt, max_tokens=128, temperature=0.2
|
| 77 |
-
|
| 78 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
# -------------------- Gradio UI --------------------
|
| 81 |
with gr.Blocks() as demo:
|
| 82 |
gr.Markdown("## 🌾 Lao Chatbot (NBB)")
|
| 83 |
chatbot_ui = gr.Chatbot()
|
| 84 |
msg = gr.Textbox(placeholder="ພິມຄຳຖາມບ່ອນນີ້...")
|
| 85 |
-
msg.submit(
|
| 86 |
|
| 87 |
if __name__ == "__main__":
|
| 88 |
demo.launch()
|
|
|
|
| 1 |
+
import json
|
| 2 |
import numpy as np
|
|
|
|
| 3 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 4 |
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
import gradio as gr
|
| 6 |
+
from huggingface_hub import hf_hub_download
|
| 7 |
from llama_cpp import Llama
|
| 8 |
|
| 9 |
# -------------------- Load Dataset --------------------
|
|
|
|
| 31 |
IDS = [d["id"] for d in DOCS]
|
| 32 |
ID2DOC = {d["id"]: d for d in DOCS}
|
| 33 |
|
| 34 |
+
vectorizer = TfidfVectorizer(ngram_range=(1,2), min_df=1, max_df=0.95, sublinear_tf=True)
|
| 35 |
+
X = vectorizer.fit_transform(CORPUS)
|
| 36 |
|
| 37 |
# -------------------- Search --------------------
|
| 38 |
+
def search(query, k=3):
|
| 39 |
+
qv = vectorizer.transform([query])
|
| 40 |
sims = cosine_similarity(qv, X)[0]
|
| 41 |
idxs = np.argsort(-sims)[:k]
|
| 42 |
return [{"id": IDS[i], "score": sims[i]} for i in idxs]
|
| 43 |
|
| 44 |
+
# -------------------- Load LLM --------------------
|
|
|
|
|
|
|
|
|
|
| 45 |
MODEL_PATH = hf_hub_download(
|
| 46 |
+
repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
|
| 47 |
+
filename="qwen2.5-3b-instruct-q4_k_m.gguf" # ✅ ใช้ไฟล์จริง
|
| 48 |
)
|
| 49 |
|
| 50 |
LLM = Llama(
|
|
|
|
| 54 |
n_gpu_layers=32
|
| 55 |
)
|
| 56 |
|
|
|
|
| 57 |
SYSTEM_RULES = """
|
| 58 |
You are a Lao banking assistant for NAYOBY BANK (NBB).
|
| 59 |
Answer ONLY from Context. If not found, reply:
|
| 60 |
"ຂໍອະໄພ ຂ້ອຍບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
|
| 61 |
"""
|
| 62 |
|
| 63 |
+
def build_prompt(question, hits):
|
| 64 |
ctx = "\n\n".join([f"[{h['id']}] {ID2DOC[h['id']]['content']['lo']}" for h in hits])
|
| 65 |
+
return f"{SYSTEM_RULES}\n\nContext:\n{ctx}\n\nQuestion:\n{question}\n\nAnswer:"
|
| 66 |
|
| 67 |
+
def smart_answer(message):
|
| 68 |
+
hits = search(message, k=3)
|
| 69 |
if not hits or hits[0]["score"] < 0.1:
|
| 70 |
return "ຂໍອະໄພ ບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
|
| 71 |
+
prompt = build_prompt(message, hits)
|
| 72 |
+
out = LLM(prompt, max_tokens=128, temperature=0.2)
|
| 73 |
+
answer = out["choices"][0]["text"].strip()
|
| 74 |
+
return answer
|
| 75 |
+
|
| 76 |
+
# -------------------- Gradio Chatbot --------------------
|
| 77 |
+
def respond(message, history):
|
| 78 |
+
answer = smart_answer(message)
|
| 79 |
+
history = history + [(message, answer)]
|
| 80 |
+
return history
|
| 81 |
|
|
|
|
| 82 |
with gr.Blocks() as demo:
|
| 83 |
gr.Markdown("## 🌾 Lao Chatbot (NBB)")
|
| 84 |
chatbot_ui = gr.Chatbot()
|
| 85 |
msg = gr.Textbox(placeholder="ພິມຄຳຖາມບ່ອນນີ້...")
|
| 86 |
+
msg.submit(respond, [msg, chatbot_ui], chatbot_ui)
|
| 87 |
|
| 88 |
if __name__ == "__main__":
|
| 89 |
demo.launch()
|