Spaces:

adriansd12
/

Bible_Index

Runtime error

App Files Files Community

adriansd12 commited on Apr 10, 2023

Commit

c6e8a33

1 Parent(s): 4c628ba

init. commit

Browse files

Files changed (3) hide show

app.py +36 -0
module/bible_index.py +53 -0
requirements.txt +2 -0

app.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import gradio as gr
+from module.bible_index import BibleIndex
+def query_index(query, testament, top_n):
+    _index = BibleIndex(testament)
+    items = _index.query(query, top_n=top_n)
+    item_list = f"<h2>{query}</h2>"
+    item_list += "<ul>"
+    for item in items:
+        item_list += f"<h3>{item.get('src')}</h3>"
+        item_list += f"<li>{item.get('text')}</li>"
+    item_list += "</ul>"
+    return item_list
+demo = gr.Interface(
+    query_index,
+    [
+        gr.Textbox(label="Query text"),
+        gr.Radio(["all", "old", "new"], label="Section of the Bible"),
+        gr.Slider(0, 10, step=1, label="Top N results"),
+    ],
+    outputs="html",
+    examples=[
+        ["What is love", "new", 5],
+        ["How old was Adam?", "old", 3],
+        ["Who is God?", "all", 7],
+    ],
+    title="Bible Search Index",
+    description="""
+        A search index for The Bible using *sentence_transformer*.
+    """,
+)
+demo.launch()

module/bible_index.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import numpy as np
+from sentence_transformers import SentenceTransformer, util
+class BibleIndex:
+    def __init__(self, testament: str = "all") -> None:
+        self.model = SentenceTransformer(
+            "sentence-transformers/msmarco-bert-base-dot-v5"
+        )
+        match testament:
+            case "all" | "old" | "new":
+                self.testament = testament
+            case _:
+                print("error:")
+        self.load_emb()
+        self.load_text()
+    def load_emb(self) -> None:
+        self.emb = np.load(f"data/embeddings/{self.testament}_esv_embeddings.npy")
+    def load_text(self) -> None:
+        text_path = f"data/text/{self.testament}_testament_esv.txt"
+        with open(text_path, "r") as f:
+            self.text = f.readlines()[1:]
+    def query(self, query: str = "", top_n: int = 10):
+        query_emb = self.model.encode(query)
+        scores = util.dot_score(query_emb, self.emb)[0].cpu().tolist()
+        # Combine docs & scores
+        doc_score_pairs = list(zip(self.text, scores))
+        # Sort by decreasing score
+        doc_score_pairs = sorted(doc_score_pairs, key=lambda x: x[1], reverse=True)
+        # Output passages & scores
+        print("Query:", query)
+        results = []
+        for doc, score in doc_score_pairs[:top_n]:
+            text_split = doc.split(",")
+            results.append(
+                {
+                    "src": f"{text_split[0]} {text_split[1]}:{text_split[2]}",
+                    "text": ",".join(text_split[3:])
+                    .replace("\xa0", "")
+                    .replace("\n", ""),
+                    "score": score,
+                }
+            )
+        return results

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ numpy==1.24.2
2	+ sentence-transformers==2.2.2