Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -527,8 +527,46 @@ def run_index_job(job_id: str, req: IndexRequest):
|
|
| 527 |
|
| 528 |
# Warmup -> dimension
|
| 529 |
warm = next(_chunk_with_spans(req.files[0].text if req.files else "", req.chunk_size, req.overlap))[2] if req.files else "warmup"
|
| 530 |
-
|
| 531 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
STORE.ensure_collection(col, dim)
|
| 533 |
_append_log(job_id, f"Collection ready: {col} (dim={dim})")
|
| 534 |
|
|
|
|
| 527 |
|
| 528 |
# Warmup -> dimension
|
| 529 |
warm = next(_chunk_with_spans(req.files[0].text if req.files else "", req.chunk_size, req.overlap))[2] if req.files else "warmup"
|
| 530 |
+
# warmup embeddings already computed: embs
|
| 531 |
+
try:
|
| 532 |
+
LOG.info(f"[{job_id}] warmup embeddings shape = {embs.shape} dtype={embs.dtype}")
|
| 533 |
+
_append_log(job_id, f"warmup embeddings shape = {embs.shape}")
|
| 534 |
+
except Exception:
|
| 535 |
+
pass
|
| 536 |
+
|
| 537 |
+
# If using QdrantStore: check existing collection vector size and warn if mismatch
|
| 538 |
+
if isinstance(STORE, QdrantStore):
|
| 539 |
+
try:
|
| 540 |
+
# client.get_collection throws if not exists
|
| 541 |
+
info = STORE.client.get_collection(collection_name=col)
|
| 542 |
+
existing_size = None
|
| 543 |
+
# depending on qdrant client version, structure might be different:
|
| 544 |
+
if hasattr(info, "result") and isinstance(info.result, dict):
|
| 545 |
+
cfg = info.result.get("params") or {}
|
| 546 |
+
vectors = cfg.get("vectors") or {}
|
| 547 |
+
existing_size = int(vectors.get("size")) if vectors.get("size") else None
|
| 548 |
+
elif isinstance(info, dict):
|
| 549 |
+
cfg = info.get("result", info)
|
| 550 |
+
vectors = cfg.get("params", {}).get("vectors", {})
|
| 551 |
+
existing_size = int(vectors.get("size")) if vectors else None
|
| 552 |
+
|
| 553 |
+
if existing_size and existing_size != dim:
|
| 554 |
+
msg = (f"Qdrant collection {col} already exists with dim={existing_size} but embeddings dim={dim}. "
|
| 555 |
+
"This will likely cause vectors to be rejected. Consider wiping or recreating collection.")
|
| 556 |
+
LOG.error("[%s] %s", job_id, msg)
|
| 557 |
+
_append_log(job_id, msg)
|
| 558 |
+
# Optional: if WIPE_BEFORE_INDEX True, recreate:
|
| 559 |
+
if WIPE_BEFORE_INDEX:
|
| 560 |
+
try:
|
| 561 |
+
STORE.wipe(col)
|
| 562 |
+
STORE.ensure_collection(col, dim)
|
| 563 |
+
_append_log(job_id, f"Recreated collection {col} with dim={dim} (WIPE_BEFORE_INDEX).")
|
| 564 |
+
except Exception as e:
|
| 565 |
+
_append_log(job_id, f"Failed recreate collection: {e}")
|
| 566 |
+
except Exception as e:
|
| 567 |
+
# collection not present or unable to introspect -> ok, ensure_collection will create
|
| 568 |
+
LOG.debug("[%s] Could not introspect collection: %s", job_id, e)
|
| 569 |
+
|
| 570 |
STORE.ensure_collection(col, dim)
|
| 571 |
_append_log(job_id, f"Collection ready: {col} (dim={dim})")
|
| 572 |
|