ClinicalTrialBasics / core /retrieval.py
essprasad's picture
Upload 10 files
e61e934 verified
raw
history blame contribute delete
676 Bytes
import os
import re
import json
from whoosh.index import open_dir
from whoosh.qparser import MultifieldParser
WHOOSH_INDEX_PATH = "/home/user/app/persistent/whoosh_index"
_ix = None
def _load_whoosh():
global _ix
if _ix is None and os.path.exists(WHOOSH_INDEX_PATH):
_ix = open_dir(WHOOSH_INDEX_PATH)
return _ix
def _bm25_search(query, top_n=10):
ix = _load_whoosh()
if not ix:
return []
parser = MultifieldParser(["text", "title"], schema=ix.schema)
q = parser.parse(query)
with ix.searcher() as s:
results = s.search(q, limit=top_n)
return [{"text": r["text"], "file": r.get("file", "")} for r in results]