Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| import json | |
| from whoosh.index import open_dir | |
| from whoosh.qparser import MultifieldParser | |
| WHOOSH_INDEX_PATH = "/home/user/app/persistent/whoosh_index" | |
| _ix = None | |
| def _load_whoosh(): | |
| global _ix | |
| if _ix is None and os.path.exists(WHOOSH_INDEX_PATH): | |
| _ix = open_dir(WHOOSH_INDEX_PATH) | |
| return _ix | |
| def _bm25_search(query, top_n=10): | |
| ix = _load_whoosh() | |
| if not ix: | |
| return [] | |
| parser = MultifieldParser(["text", "title"], schema=ix.schema) | |
| q = parser.parse(query) | |
| with ix.searcher() as s: | |
| results = s.search(q, limit=top_n) | |
| return [{"text": r["text"], "file": r.get("file", "")} for r in results] | |