Spaces:

Rong6693
/

Soulcompass

Sleeping

App Files Files Community

Rong6693 commited on Aug 9

Commit

5fb89da

verified ·

1 Parent(s): 69a6fbf

Update rag_utils.py

Browse files

Files changed (1) hide show

rag_utils.py +54 -50

rag_utils.py CHANGED Viewed

@@ -1,23 +1,15 @@
-# rag_utils.py
-import os, json
-import numpy as np
-from typing import List, Tuple, Dict
-import faiss
 from sentence_transformers import SentenceTransformer
 DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
-CACHE_DIR = "/tmp"   # HF Spaces 每次啟動可寫這裡
 TAROT_JSON = os.path.join(DATA_DIR, "tarot_data_full.json")
 NUM_JSON   = os.path.join(DATA_DIR, "numerology_data_full.json")
 TAROT_IDX  = os.path.join(CACHE_DIR, "faiss_tarot.index")
 TAROT_META = os.path.join(CACHE_DIR, "faiss_tarot_meta.json")
 NUM_IDX    = os.path.join(CACHE_DIR, "faiss_num.index")
 NUM_META   = os.path.join(CACHE_DIR, "faiss_num_meta.json")
-# 輕量好用
 EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 _model = None
@@ -28,42 +20,56 @@ def get_model():
     return _model
 def _build_tarot():
-    with open(TAROT_JSON) as f:
         data = json.load(f)
-    texts = [
-        (i, c["card_name"],
-         (c.get("meaning_upright","") + " " + c.get("advice","")).strip())
-        for i, c in enumerate(data)
-    ]
-    model = get_model()
-    embs = model.encode([t[2] for t in texts], normalize_embeddings=True)
-    idx = faiss.IndexFlatIP(embs.shape[1])
-    idx.add(np.array(embs, dtype="float32"))
-    faiss.write_index(idx, TAROT_IDX)
-    with open(TAROT_META, "w") as f:
-        json.dump([{"i":i, "card_name":name, "text":txt} for (i,name,txt) in texts], f, indent=2)
 def _build_num():
-    with open(NUM_JSON) as f:
         data = json.load(f)
-    texts = [
-        (i, n["number"],
-         (str(n["number"]) + " " + n.get("life_path_meaning","") + " " + n.get("advice","")).strip())
-        for i, n in enumerate(data)
-    ]
     model = get_model()
-    embs = model.encode([t[2] for t in texts], normalize_embeddings=True)
-    idx = faiss.IndexFlatIP(embs.shape[1])
-    idx.add(np.array(embs, dtype="float32"))
-    faiss.write_index(idx, NUM_IDX)
-    with open(NUM_META, "w") as f:
-        json.dump([{"i":i, "number":num, "text":txt} for (i,num,txt) in texts], f, indent=2)
 def ensure_indexes():
     os.makedirs(CACHE_DIR, exist_ok=True)
@@ -72,22 +78,20 @@ def ensure_indexes():
     if not (os.path.exists(NUM_IDX) and os.path.exists(NUM_META)):
         _build_num()
-def _search(index_path: str, meta_path: str, query: str, k: int = 3):
     model = get_model()
-    idx = faiss.read_index(index_path)
-    with open(meta_path) as f:
-        meta = json.load(f)
     q = model.encode([query], normalize_embeddings=True).astype("float32")
     D, I = idx.search(q, k)
-    results = []
-    for rank, (score, j) in enumerate(zip(D[0], I[0]), start=1):
-        m = meta[j]
-        m = dict(m)  # copy
-        m["score"] = float(score)
-        m["rank"] = rank
-        results.append(m)
-    return results
 def search_tarot(query: str, k: int = 3):
     ensure_indexes()

+# rag_utils.py（替換 _build_tarot / _build_num / ensure/search 等同前）
+import os, json, numpy as np, faiss
 from sentence_transformers import SentenceTransformer
 DATA_DIR = os.path.join(os.path.dirname(__file__), "data")
+CACHE_DIR = "/tmp"
 TAROT_JSON = os.path.join(DATA_DIR, "tarot_data_full.json")
 NUM_JSON   = os.path.join(DATA_DIR, "numerology_data_full.json")
 TAROT_IDX  = os.path.join(CACHE_DIR, "faiss_tarot.index")
 TAROT_META = os.path.join(CACHE_DIR, "faiss_tarot_meta.json")
 NUM_IDX    = os.path.join(CACHE_DIR, "faiss_num.index")
 NUM_META   = os.path.join(CACHE_DIR, "faiss_num_meta.json")
 EMBED_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
 _model = None
     return _model
 def _build_tarot():
+    with open(TAROT_JSON, "r", encoding="utf-8") as f:
         data = json.load(f)
+    # 接受 name 或 card_name；text 取 meaning_upright / reversed / advice
+    items = []
+    for i, c in enumerate(data):
+        name = c.get("name") or c.get("card_name") or f"Card {i}"
+        text = " ".join([
+            c.get("meaning_upright",""),
+            c.get("meaning_reversed",""),
+            c.get("advice","")
+        ]).strip()
+        if not text:
+            # 退而求其次：把 keywords 也串起來
+            kws = " ".join((c.get("keywords_upright") or []) + (c.get("keywords_reversed") or []))
+            text = f"{name} {kws}".strip()
+        items.append((i, name, text))
+    model = get_model()
+    embs = model.encode([t[2] for t in items], normalize_embeddings=True)
+    index = faiss.IndexFlatIP(embs.shape[1])
+    index.add(np.array(embs, dtype="float32"))
+    import json as _json
+    faiss.write_index(index, TAROT_IDX)
+    with open(TAROT_META, "w", encoding="utf-8") as f:
+        _json.dump([{"i": i, "card_name": n, "text": t} for (i, n, t) in items], f, ensure_ascii=False, indent=2)
 def _build_num():
+    with open(NUM_JSON, "r", encoding="utf-8") as f:
         data = json.load(f)
+    items = []
+    for i, n in enumerate(data):
+        num = n.get("number")
+        text = " ".join([
+            str(num),
+            n.get("description",""),
+            n.get("advice","")
+        ]).strip()
+        items.append((i, num, text))
     model = get_model()
+    embs = model.encode([t[2] for t in items], normalize_embeddings=True)
+    index = faiss.IndexFlatIP(embs.shape[1])
+    index.add(np.array(embs, dtype="float32"))
+    import json as _json
+    faiss.write_index(index, NUM_IDX)
+    with open(NUM_META, "w", encoding="utf-8") as f:
+        _json.dump([{"i": i, "number": num, "text": t} for (i, num, t) in items], f, ensure_ascii=False, indent=2)
 def ensure_indexes():
     os.makedirs(CACHE_DIR, exist_ok=True)
     if not (os.path.exists(NUM_IDX) and os.path.exists(NUM_META)):
         _build_num()
+def _search(idx_path, meta_path, query, k):
     model = get_model()
+    idx = faiss.read_index(idx_path)
+    import json as _json
+    with open(meta_path, "r", encoding="utf-8") as f:
+        meta = _json.load(f)
     q = model.encode([query], normalize_embeddings=True).astype("float32")
     D, I = idx.search(q, k)
+    out = []
+    for rank, (score, j) in enumerate(zip(D[0], I[0]), 1):
+        m = dict(meta[j])
+        m["score"] = float(score); m["rank"] = rank
+        out.append(m)
+    return out
 def search_tarot(query: str, k: int = 3):
     ensure_indexes()