Spaces:

IotaCluster
/

embedding-model

Running

IotaCluster commited on Jun 30

Commit

57f1fb2

verified ·

1 Parent(s): bea665a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,15 +20,13 @@ def embed_sparse(text: str):
     if not text.strip():
         return {"error": "Input text is empty."}
     tokens = text.split()
-    # Treat the input as a single document and also as the query
     bm25 = BM25Okapi([tokens])
     unique_terms = sorted(set(tokens))
-    # BM25 expects a query, so we use the unique terms as the query
     scores = bm25.get_scores(unique_terms)
     term_weights = {term: float(score) for term, score in zip(unique_terms, scores)}
-    # Build Qdrant format
     indices = list(range(len(unique_terms)))
-    values = [term_weights[term] for term in unique_terms]
     return {"indices": indices, "values": values, "terms": unique_terms}
 # 3. Late-interaction embedding model (ColBERT)

     if not text.strip():
         return {"error": "Input text is empty."}
     tokens = text.split()
     bm25 = BM25Okapi([tokens])
     unique_terms = sorted(set(tokens))
     scores = bm25.get_scores(unique_terms)
+    # Assign scores for all unique terms
     term_weights = {term: float(score) for term, score in zip(unique_terms, scores)}
     indices = list(range(len(unique_terms)))
+    values = [term_weights.get(term, 0.0) for term in unique_terms]
     return {"indices": indices, "values": values, "terms": unique_terms}
 # 3. Late-interaction embedding model (ColBERT)