Southisuk commited on
Commit
bd917a1
·
verified ·
1 Parent(s): 9c58f40

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -6
app.py CHANGED
@@ -6,6 +6,15 @@ import gradio as gr
6
  from huggingface_hub import hf_hub_download
7
  from llama_cpp import Llama
8
 
 
 
 
 
 
 
 
 
 
9
  # -------------------- Load Dataset --------------------
10
  DATASET_PATH = "nbb_merged_full.json"
11
  with open(DATASET_PATH, "r", encoding="utf-8") as f:
@@ -25,7 +34,7 @@ def normalize_record(d):
25
  }
26
 
27
  DOCS = [normalize_record(x) for x in RAW_DATA if normalize_record(x)["content"]["lo"].strip()]
28
- assert DOCS, "Dataset ไม่มี content.lo"
29
 
30
  CORPUS = [d["content"]["lo"] for d in DOCS]
31
  IDS = [d["id"] for d in DOCS]
@@ -49,9 +58,12 @@ MODEL_PATH = hf_hub_download(
49
 
50
  LLM = Llama(
51
  model_path=MODEL_PATH,
52
- n_ctx=2048,
53
- n_threads=4,
54
- n_gpu_layers=32
 
 
 
55
  )
56
 
57
  SYSTEM_RULES = """
@@ -73,9 +85,13 @@ FORMAT:
73
  """
74
 
75
  def build_prompt(question, hits):
76
- ctx = "\n\n".join([f"[{h['id']}] {ID2DOC[h['id']]['content']['lo']}" for h in hits])
 
 
 
77
  return f"{SYSTEM_RULES}\n\nContext:\n{ctx}\n\nQuestion:\n{question}\n\nAnswer:"
78
 
 
79
  def smart_answer(message):
80
  hits = search(message, k=3)
81
  if not hits or hits[0]["score"] < 0.1:
@@ -92,7 +108,7 @@ def respond(message, history):
92
  return history
93
 
94
  with gr.Blocks() as demo:
95
- gr.Markdown("## 🌾 Lao Chatbot (NBB)")
96
  chatbot_ui = gr.Chatbot()
97
  msg = gr.Textbox(placeholder="ພິມຄຳຖາມບ່ອນນີ້...")
98
  msg.submit(respond, [msg, chatbot_ui], chatbot_ui)
 
6
  from huggingface_hub import hf_hub_download
7
  from llama_cpp import Llama
8
 
9
+ # -------------------- Config --------------------
10
+ TOP_K = 10
11
+ FINAL_TOP_N = 1
12
+ MIN_CONF = 0.14
13
+ CHUNK_LIMIT = 360
14
+ MAX_TOKENS = 96
15
+ TEMP = 0.2
16
+ QUALITY_LOG = "quality_feedback.jsonl"
17
+
18
  # -------------------- Load Dataset --------------------
19
  DATASET_PATH = "nbb_merged_full.json"
20
  with open(DATASET_PATH, "r", encoding="utf-8") as f:
 
34
  }
35
 
36
  DOCS = [normalize_record(x) for x in RAW_DATA if normalize_record(x)["content"]["lo"].strip()]
37
+ assert DOCS, "Dataset ບໍ່ມີ content.lo"
38
 
39
  CORPUS = [d["content"]["lo"] for d in DOCS]
40
  IDS = [d["id"] for d in DOCS]
 
58
 
59
  LLM = Llama(
60
  model_path=MODEL_PATH,
61
+ n_ctx=2048,
62
+ n_threads=8,
63
+ n_gpu_layers=128,
64
+ n_batch=512,
65
+ logits_all=False,
66
+ verbose=False
67
  )
68
 
69
  SYSTEM_RULES = """
 
85
  """
86
 
87
  def build_prompt(question, hits):
88
+ ctx = "\n\n".join([
89
+ truncate(ID2DOC[h['id']]['content']['lo'])
90
+ for h in hits[:FINAL_TOP_N]
91
+ ])
92
  return f"{SYSTEM_RULES}\n\nContext:\n{ctx}\n\nQuestion:\n{question}\n\nAnswer:"
93
 
94
+
95
  def smart_answer(message):
96
  hits = search(message, k=3)
97
  if not hits or hits[0]["score"] < 0.1:
 
108
  return history
109
 
110
  with gr.Blocks() as demo:
111
+ gr.Markdown("## ທົດລອງ RDB Chatbot")
112
  chatbot_ui = gr.Chatbot()
113
  msg = gr.Textbox(placeholder="ພິມຄຳຖາມບ່ອນນີ້...")
114
  msg.submit(respond, [msg, chatbot_ui], chatbot_ui)