Southisuk commited on
Commit
dcf08ea
·
verified ·
1 Parent(s): 9c534d4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -23
app.py CHANGED
@@ -1,9 +1,9 @@
1
- import os, json, re
2
  import numpy as np
3
- from scipy.sparse import hstack
4
  from sklearn.feature_extraction.text import TfidfVectorizer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
  import gradio as gr
 
7
  from llama_cpp import Llama
8
 
9
  # -------------------- Load Dataset --------------------
@@ -31,23 +31,20 @@ CORPUS = [d["content"]["lo"] for d in DOCS]
31
  IDS = [d["id"] for d in DOCS]
32
  ID2DOC = {d["id"]: d for d in DOCS}
33
 
34
- word_vec = TfidfVectorizer(ngram_range=(1,2), min_df=1, max_df=0.95, sublinear_tf=True)
35
- X = word_vec.fit_transform(CORPUS)
36
 
37
  # -------------------- Search --------------------
38
- def search(q, k=3):
39
- qv = word_vec.transform([q])
40
  sims = cosine_similarity(qv, X)[0]
41
  idxs = np.argsort(-sims)[:k]
42
  return [{"id": IDS[i], "score": sims[i]} for i in idxs]
43
 
44
- # -------------------- Load Local LLM --------------------
45
- from huggingface_hub import hf_hub_download
46
- from llama_cpp import Llama
47
-
48
  MODEL_PATH = hf_hub_download(
49
- repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
50
- filename="Qwen2.5-3B-Instruct-Q4_K_M.gguf"
51
  )
52
 
53
  LLM = Llama(
@@ -57,32 +54,36 @@ LLM = Llama(
57
  n_gpu_layers=32
58
  )
59
 
60
-
61
  SYSTEM_RULES = """
62
  You are a Lao banking assistant for NAYOBY BANK (NBB).
63
  Answer ONLY from Context. If not found, reply:
64
  "ຂໍອະໄພ ຂ້ອຍບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
65
  """
66
 
67
- def build_prompt(q, hits):
68
  ctx = "\n\n".join([f"[{h['id']}] {ID2DOC[h['id']]['content']['lo']}" for h in hits])
69
- return f"{SYSTEM_RULES}\n\nContext:\n{ctx}\n\nQuestion:\n{q}\n\nAnswer:"
70
 
71
- def smart_answer(q):
72
- hits = search(q, k=3)
73
  if not hits or hits[0]["score"] < 0.1:
74
  return "ຂໍອະໄພ ບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
75
- prompt = build_prompt(q, hits)
76
- out = LLM(prompt, max_tokens=128, temperature=0.2, stop=["Question:"])
77
- ans = out["choices"][0]["text"].strip()
78
- return ans
 
 
 
 
 
 
79
 
80
- # -------------------- Gradio UI --------------------
81
  with gr.Blocks() as demo:
82
  gr.Markdown("## 🌾 Lao Chatbot (NBB)")
83
  chatbot_ui = gr.Chatbot()
84
  msg = gr.Textbox(placeholder="ພິມຄຳຖາມບ່ອນນີ້...")
85
- msg.submit(fn=lambda m, h: (h + [[m, smart_answer(m)]]), inputs=[msg, chatbot_ui], outputs=chatbot_ui)
86
 
87
  if __name__ == "__main__":
88
  demo.launch()
 
1
+ import json
2
  import numpy as np
 
3
  from sklearn.feature_extraction.text import TfidfVectorizer
4
  from sklearn.metrics.pairwise import cosine_similarity
5
  import gradio as gr
6
+ from huggingface_hub import hf_hub_download
7
  from llama_cpp import Llama
8
 
9
  # -------------------- Load Dataset --------------------
 
31
  IDS = [d["id"] for d in DOCS]
32
  ID2DOC = {d["id"]: d for d in DOCS}
33
 
34
+ vectorizer = TfidfVectorizer(ngram_range=(1,2), min_df=1, max_df=0.95, sublinear_tf=True)
35
+ X = vectorizer.fit_transform(CORPUS)
36
 
37
  # -------------------- Search --------------------
38
+ def search(query, k=3):
39
+ qv = vectorizer.transform([query])
40
  sims = cosine_similarity(qv, X)[0]
41
  idxs = np.argsort(-sims)[:k]
42
  return [{"id": IDS[i], "score": sims[i]} for i in idxs]
43
 
44
+ # -------------------- Load LLM --------------------
 
 
 
45
  MODEL_PATH = hf_hub_download(
46
+ repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
47
+ filename="qwen2.5-3b-instruct-q4_k_m.gguf" # ✅ ใช้ไฟล์จริง
48
  )
49
 
50
  LLM = Llama(
 
54
  n_gpu_layers=32
55
  )
56
 
 
57
  SYSTEM_RULES = """
58
  You are a Lao banking assistant for NAYOBY BANK (NBB).
59
  Answer ONLY from Context. If not found, reply:
60
  "ຂໍອະໄພ ຂ້ອຍບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
61
  """
62
 
63
+ def build_prompt(question, hits):
64
  ctx = "\n\n".join([f"[{h['id']}] {ID2DOC[h['id']]['content']['lo']}" for h in hits])
65
+ return f"{SYSTEM_RULES}\n\nContext:\n{ctx}\n\nQuestion:\n{question}\n\nAnswer:"
66
 
67
+ def smart_answer(message):
68
+ hits = search(message, k=3)
69
  if not hits or hits[0]["score"] < 0.1:
70
  return "ຂໍອະໄພ ບໍ່ພົບຂໍ້ມູນໃນຖານຄວາມຮູ້."
71
+ prompt = build_prompt(message, hits)
72
+ out = LLM(prompt, max_tokens=128, temperature=0.2)
73
+ answer = out["choices"][0]["text"].strip()
74
+ return answer
75
+
76
+ # -------------------- Gradio Chatbot --------------------
77
+ def respond(message, history):
78
+ answer = smart_answer(message)
79
+ history = history + [(message, answer)]
80
+ return history
81
 
 
82
  with gr.Blocks() as demo:
83
  gr.Markdown("## 🌾 Lao Chatbot (NBB)")
84
  chatbot_ui = gr.Chatbot()
85
  msg = gr.Textbox(placeholder="ພິມຄຳຖາມບ່ອນນີ້...")
86
+ msg.submit(respond, [msg, chatbot_ui], chatbot_ui)
87
 
88
  if __name__ == "__main__":
89
  demo.launch()