chouchouvs commited on
Commit
dd051b7
·
verified ·
1 Parent(s): 1bf0e3f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +40 -2
main.py CHANGED
@@ -527,8 +527,46 @@ def run_index_job(job_id: str, req: IndexRequest):
527
 
528
  # Warmup -> dimension
529
  warm = next(_chunk_with_spans(req.files[0].text if req.files else "", req.chunk_size, req.overlap))[2] if req.files else "warmup"
530
- embs, _ = _post_embeddings([warm], job_id=job_id)
531
- dim = embs.shape[1]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
532
  STORE.ensure_collection(col, dim)
533
  _append_log(job_id, f"Collection ready: {col} (dim={dim})")
534
 
 
527
 
528
  # Warmup -> dimension
529
  warm = next(_chunk_with_spans(req.files[0].text if req.files else "", req.chunk_size, req.overlap))[2] if req.files else "warmup"
530
+ # warmup embeddings already computed: embs
531
+ try:
532
+ LOG.info(f"[{job_id}] warmup embeddings shape = {embs.shape} dtype={embs.dtype}")
533
+ _append_log(job_id, f"warmup embeddings shape = {embs.shape}")
534
+ except Exception:
535
+ pass
536
+
537
+ # If using QdrantStore: check existing collection vector size and warn if mismatch
538
+ if isinstance(STORE, QdrantStore):
539
+ try:
540
+ # client.get_collection throws if not exists
541
+ info = STORE.client.get_collection(collection_name=col)
542
+ existing_size = None
543
+ # depending on qdrant client version, structure might be different:
544
+ if hasattr(info, "result") and isinstance(info.result, dict):
545
+ cfg = info.result.get("params") or {}
546
+ vectors = cfg.get("vectors") or {}
547
+ existing_size = int(vectors.get("size")) if vectors.get("size") else None
548
+ elif isinstance(info, dict):
549
+ cfg = info.get("result", info)
550
+ vectors = cfg.get("params", {}).get("vectors", {})
551
+ existing_size = int(vectors.get("size")) if vectors else None
552
+
553
+ if existing_size and existing_size != dim:
554
+ msg = (f"Qdrant collection {col} already exists with dim={existing_size} but embeddings dim={dim}. "
555
+ "This will likely cause vectors to be rejected. Consider wiping or recreating collection.")
556
+ LOG.error("[%s] %s", job_id, msg)
557
+ _append_log(job_id, msg)
558
+ # Optional: if WIPE_BEFORE_INDEX True, recreate:
559
+ if WIPE_BEFORE_INDEX:
560
+ try:
561
+ STORE.wipe(col)
562
+ STORE.ensure_collection(col, dim)
563
+ _append_log(job_id, f"Recreated collection {col} with dim={dim} (WIPE_BEFORE_INDEX).")
564
+ except Exception as e:
565
+ _append_log(job_id, f"Failed recreate collection: {e}")
566
+ except Exception as e:
567
+ # collection not present or unable to introspect -> ok, ensure_collection will create
568
+ LOG.debug("[%s] Could not introspect collection: %s", job_id, e)
569
+
570
  STORE.ensure_collection(col, dim)
571
  _append_log(job_id, f"Collection ready: {col} (dim={dim})")
572