essprasad commited on
Commit
1d44ecf
·
verified ·
1 Parent(s): 4b9e90a

Delete utils/faq.py

Browse files
Files changed (1) hide show
  1. utils/faq.py +0 -112
utils/faq.py DELETED
@@ -1,112 +0,0 @@
1
- import json
2
- import os
3
- from sentence_transformers import SentenceTransformer, util
4
- import torch
5
-
6
- FAQ_PATHS = ["data/faq_data.json", "data/clinical_faq.json"]
7
- _FAQ_CACHE = None
8
- _FAQ_EMBEDDINGS = None
9
- _MODEL = None
10
-
11
-
12
- def _get_model():
13
- """Load and cache the embedding model (shared with main app if possible)."""
14
- global _MODEL
15
- if _MODEL is None:
16
- print("📦 [faq] Loading embedding model: all-MiniLM-L6-v2 ...")
17
- _MODEL = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
18
- return _MODEL
19
-
20
-
21
- def load_faqs():
22
- """Load FAQ data from JSON files and cache them."""
23
- global _FAQ_CACHE
24
- if _FAQ_CACHE is not None:
25
- return _FAQ_CACHE
26
-
27
- all_faqs = []
28
- for path in FAQ_PATHS:
29
- if os.path.exists(path):
30
- try:
31
- with open(path, "r", encoding="utf-8") as f:
32
- data = json.load(f)
33
- if isinstance(data, list):
34
- all_faqs.extend(data)
35
- elif isinstance(data, dict):
36
- for k, v in data.items():
37
- all_faqs.append({"question": k, "answer": v})
38
- except Exception as e:
39
- print(f"⚠️ Failed to load FAQ file {path}: {e}")
40
-
41
- _FAQ_CACHE = all_faqs
42
- print(f"✅ [faq] Loaded {len(_FAQ_CACHE)} FAQ entries.")
43
- return _FAQ_CACHE
44
-
45
-
46
- def _build_embeddings():
47
- """Precompute embeddings for all FAQ questions."""
48
- global _FAQ_EMBEDDINGS
49
- faqs = load_faqs()
50
- if not faqs:
51
- _FAQ_EMBEDDINGS = torch.empty(0)
52
- return _FAQ_EMBEDDINGS
53
-
54
- model = _get_model()
55
- questions = [f["question"] for f in faqs if f.get("question")]
56
- _FAQ_EMBEDDINGS = model.encode(questions, convert_to_tensor=True, show_progress_bar=False)
57
- print(f"✅ [faq] Encoded {len(_FAQ_EMBEDDINGS)} FAQ embeddings.")
58
- return _FAQ_EMBEDDINGS
59
-
60
-
61
- def get_faq_answer(query: str, top_k: int = 1) -> str:
62
- """
63
- Return the most semantically similar FAQ answer to the query.
64
- Uses MiniLM embeddings and cosine similarity.
65
- """
66
- faqs = load_faqs()
67
- if not faqs:
68
- return ""
69
-
70
- if _FAQ_EMBEDDINGS is None:
71
- _build_embeddings()
72
-
73
- model = _get_model()
74
- query_emb = model.encode(query, convert_to_tensor=True)
75
- sims = util.cos_sim(query_emb, _FAQ_EMBEDDINGS)[0]
76
- top_idx = int(torch.argmax(sims))
77
-
78
- best_score = float(sims[top_idx])
79
- best_item = faqs[top_idx]
80
-
81
- if best_score < 0.45: # threshold to avoid weak matches
82
- return ""
83
-
84
- answer = best_item.get("answer", "")
85
- print(f"💡 [faq] Best match: \"{best_item.get('question')}\" (score={best_score:.2f})")
86
- return answer
87
-
88
-
89
- def lookup_faq(query: str, top_k: int = 3) -> str:
90
- """
91
- Return HTML-formatted list of top-k semantically similar FAQ matches.
92
- Useful for admin or verbose display.
93
- """
94
- faqs = load_faqs()
95
- if not faqs:
96
- return "<i>No FAQ data loaded.</i>"
97
-
98
- if _FAQ_EMBEDDINGS is None:
99
- _build_embeddings()
100
-
101
- model = _get_model()
102
- query_emb = model.encode(query, convert_to_tensor=True)
103
- sims = util.cos_sim(query_emb, _FAQ_EMBEDDINGS)[0]
104
- top_indices = torch.topk(sims, k=min(top_k, len(faqs))).indices.tolist()
105
-
106
- html = []
107
- for idx in top_indices:
108
- score = float(sims[idx])
109
- item = faqs[idx]
110
- html.append(f"<b>{item['question']}</b><br>{item['answer']}<br><i>(score={score:.2f})</i>")
111
-
112
- return "<br><br>".join(html)