Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -20,15 +20,13 @@ def embed_sparse(text: str):
|
|
| 20 |
if not text.strip():
|
| 21 |
return {"error": "Input text is empty."}
|
| 22 |
tokens = text.split()
|
| 23 |
-
# Treat the input as a single document and also as the query
|
| 24 |
bm25 = BM25Okapi([tokens])
|
| 25 |
unique_terms = sorted(set(tokens))
|
| 26 |
-
# BM25 expects a query, so we use the unique terms as the query
|
| 27 |
scores = bm25.get_scores(unique_terms)
|
|
|
|
| 28 |
term_weights = {term: float(score) for term, score in zip(unique_terms, scores)}
|
| 29 |
-
# Build Qdrant format
|
| 30 |
indices = list(range(len(unique_terms)))
|
| 31 |
-
values = [term_weights
|
| 32 |
return {"indices": indices, "values": values, "terms": unique_terms}
|
| 33 |
|
| 34 |
# 3. Late-interaction embedding model (ColBERT)
|
|
|
|
| 20 |
if not text.strip():
|
| 21 |
return {"error": "Input text is empty."}
|
| 22 |
tokens = text.split()
|
|
|
|
| 23 |
bm25 = BM25Okapi([tokens])
|
| 24 |
unique_terms = sorted(set(tokens))
|
|
|
|
| 25 |
scores = bm25.get_scores(unique_terms)
|
| 26 |
+
# Assign scores for all unique terms
|
| 27 |
term_weights = {term: float(score) for term, score in zip(unique_terms, scores)}
|
|
|
|
| 28 |
indices = list(range(len(unique_terms)))
|
| 29 |
+
values = [term_weights.get(term, 0.0) for term in unique_terms]
|
| 30 |
return {"indices": indices, "values": values, "terms": unique_terms}
|
| 31 |
|
| 32 |
# 3. Late-interaction embedding model (ColBERT)
|