chouchouvs commited on
Commit
cfba1b0
·
verified ·
1 Parent(s): dd051b7

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +26 -15
main.py CHANGED
@@ -33,13 +33,13 @@ EMB_BACKEND_ORDER = [
33
 
34
  # --- DeepInfra Embeddings (OpenAI-like) ---
35
  DI_TOKEN = os.getenv("DEEPINFRA_API_KEY", "").strip()
36
- DI_MODEL = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip() # ✅ MEILLEUR POUR LE CODE
37
  DI_URL = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
38
  DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
39
 
40
  # --- Hugging Face Inference API ---
41
  HF_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
42
- HF_MODEL = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip() # ❌ À CHANGER
43
  HF_URL_PIPE = os.getenv("HF_API_URL_PIPELINE", "").strip() or (
44
  f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}"
45
  )
@@ -492,8 +492,8 @@ def run_index_job(job_id: str, req: IndexRequest):
492
  _set_status(job_id, "running")
493
  _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER} | store={VECTOR_STORE} (deterministic_ids={QDRANT_DETERMINISTIC_IDS}, mode={QDRANT_ID_MODE})")
494
  LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
 
495
  # --- DEBUG DIAGNOSTIC (INSÈRE ICI) ---
496
- # Log sample files received (first N)
497
  try:
498
  N_SAMPLE = 6
499
  sample = req.files[:N_SAMPLE]
@@ -504,10 +504,8 @@ def run_index_job(job_id: str, req: IndexRequest):
504
  h = hashlib.blake2b(t.encode("utf-8", "ignore"), digest_size=8).hexdigest()
505
  seen_hashes.setdefault(h, []).append(p)
506
  LOG.info(f"[{job_id}] recv file #{fidx}: path={p!r} len_text={len(t)} hash8={h} preview={repr(t[:120])}")
507
- # summary
508
  if len(req.files) > N_SAMPLE:
509
  LOG.info(f"[{job_id}] ... and {len(req.files)-N_SAMPLE} more files")
510
- # If most files share same hash, warn
511
  if len(seen_hashes) == 1 and len(req.files) > 1:
512
  _append_log(job_id, "⚠️ All received files appear IDENTICAL (same hash). Possible client-side bug.")
513
  LOG.warning("[%s] All files identical by hash8=%s", job_id, list(seen_hashes.keys())[0])
@@ -525,15 +523,25 @@ def run_index_job(job_id: str, req: IndexRequest):
525
  except Exception as e:
526
  _append_log(job_id, f"Wipe failed (ignored): {e}")
527
 
528
- # Warmup -> dimension
529
- warm = next(_chunk_with_spans(req.files[0].text if req.files else "", req.chunk_size, req.overlap))[2] if req.files else "warmup"
530
- # warmup embeddings already computed: embs
 
 
 
531
  try:
 
 
 
 
532
  LOG.info(f"[{job_id}] warmup embeddings shape = {embs.shape} dtype={embs.dtype}")
533
- _append_log(job_id, f"warmup embeddings shape = {embs.shape}")
534
- except Exception:
535
- pass
536
-
 
 
 
537
  # If using QdrantStore: check existing collection vector size and warn if mismatch
538
  if isinstance(STORE, QdrantStore):
539
  try:
@@ -549,7 +557,7 @@ def run_index_job(job_id: str, req: IndexRequest):
549
  cfg = info.get("result", info)
550
  vectors = cfg.get("params", {}).get("vectors", {})
551
  existing_size = int(vectors.get("size")) if vectors else None
552
-
553
  if existing_size and existing_size != dim:
554
  msg = (f"Qdrant collection {col} already exists with dim={existing_size} but embeddings dim={dim}. "
555
  "This will likely cause vectors to be rejected. Consider wiping or recreating collection.")
@@ -698,6 +706,8 @@ def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_tok
698
  _check_backend_ready()
699
  job_id = uuid.uuid4().hex[:12]
700
  JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
 
 
701
  background_tasks.add_task(run_index_job, job_id, req)
702
  return {"job_id": job_id}
703
 
@@ -707,7 +717,8 @@ def status_path(job_id: str, x_auth_token: Optional[str] = Header(default=None))
707
  _auth(x_auth_token)
708
  j = JOBS.get(job_id)
709
  if not j:
710
- raise HTTPException(404, "job inconnu")
 
711
  return {"status": j["status"], "logs": j["logs"][-1500:]}
712
 
713
  @app.get("/status")
@@ -760,4 +771,4 @@ if __name__ == "__main__":
760
  import uvicorn
761
  port = int(os.getenv("PORT", "7860"))
762
  LOG.info(f"===== Application Startup on PORT {port} =====")
763
- uvicorn.run(app, host="0.0.0.0", port=port)
 
33
 
34
  # --- DeepInfra Embeddings (OpenAI-like) ---
35
  DI_TOKEN = os.getenv("DEEPINFRA_API_KEY", "").strip()
36
+ DI_MODEL = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip()
37
  DI_URL = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
38
  DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
39
 
40
  # --- Hugging Face Inference API ---
41
  HF_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
42
+ HF_MODEL = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
43
  HF_URL_PIPE = os.getenv("HF_API_URL_PIPELINE", "").strip() or (
44
  f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}"
45
  )
 
492
  _set_status(job_id, "running")
493
  _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER} | store={VECTOR_STORE} (deterministic_ids={QDRANT_DETERMINISTIC_IDS}, mode={QDRANT_ID_MODE})")
494
  LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
495
+
496
  # --- DEBUG DIAGNOSTIC (INSÈRE ICI) ---
 
497
  try:
498
  N_SAMPLE = 6
499
  sample = req.files[:N_SAMPLE]
 
504
  h = hashlib.blake2b(t.encode("utf-8", "ignore"), digest_size=8).hexdigest()
505
  seen_hashes.setdefault(h, []).append(p)
506
  LOG.info(f"[{job_id}] recv file #{fidx}: path={p!r} len_text={len(t)} hash8={h} preview={repr(t[:120])}")
 
507
  if len(req.files) > N_SAMPLE:
508
  LOG.info(f"[{job_id}] ... and {len(req.files)-N_SAMPLE} more files")
 
509
  if len(seen_hashes) == 1 and len(req.files) > 1:
510
  _append_log(job_id, "⚠️ All received files appear IDENTICAL (same hash). Possible client-side bug.")
511
  LOG.warning("[%s] All files identical by hash8=%s", job_id, list(seen_hashes.keys())[0])
 
523
  except Exception as e:
524
  _append_log(job_id, f"Wipe failed (ignored): {e}")
525
 
526
+ # --- WARMUP: calculer un embedding de test pour déterminer la dimension (dim) ---
527
+ # On prend un chunk de départ (ou une string 'warmup' si pas de fichiers)
528
+ if req.files:
529
+ warm_text = next(_chunk_with_spans(req.files[0].text or "", req.chunk_size, req.overlap))[2]
530
+ else:
531
+ warm_text = "warmup"
532
  try:
533
+ embs, sz = _post_embeddings([warm_text], job_id=job_id)
534
+ if embs is None or embs.ndim != 2:
535
+ raise RuntimeError("Warmup embeddings invalid shape")
536
+ dim = int(embs.shape[1])
537
  LOG.info(f"[{job_id}] warmup embeddings shape = {embs.shape} dtype={embs.dtype}")
538
+ _append_log(job_id, f"warmup embeddings shape = {embs.shape} dim={dim}")
539
+ except Exception as e:
540
+ LOG.exception("[%s] Warmup embeddings failed: %s", job_id, e)
541
+ _append_log(job_id, f"Warmup embeddings failed: {e}")
542
+ _set_status(job_id, "error")
543
+ return
544
+
545
  # If using QdrantStore: check existing collection vector size and warn if mismatch
546
  if isinstance(STORE, QdrantStore):
547
  try:
 
557
  cfg = info.get("result", info)
558
  vectors = cfg.get("params", {}).get("vectors", {})
559
  existing_size = int(vectors.get("size")) if vectors else None
560
+
561
  if existing_size and existing_size != dim:
562
  msg = (f"Qdrant collection {col} already exists with dim={existing_size} but embeddings dim={dim}. "
563
  "This will likely cause vectors to be rejected. Consider wiping or recreating collection.")
 
706
  _check_backend_ready()
707
  job_id = uuid.uuid4().hex[:12]
708
  JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
709
+ LOG.info(f"Created job {job_id} for project {req.project_id}")
710
+ _append_log(job_id, f"Job created: {job_id} project={req.project_id}")
711
  background_tasks.add_task(run_index_job, job_id, req)
712
  return {"job_id": job_id}
713
 
 
717
  _auth(x_auth_token)
718
  j = JOBS.get(job_id)
719
  if not j:
720
+ # Response JSON plus explicite pour faciliter le debug côté client
721
+ raise HTTPException(status_code=404, detail={"error": "job inconnu", "advice": "POST /index to create a new job"})
722
  return {"status": j["status"], "logs": j["logs"][-1500:]}
723
 
724
  @app.get("/status")
 
771
  import uvicorn
772
  port = int(os.getenv("PORT", "7860"))
773
  LOG.info(f"===== Application Startup on PORT {port} =====")
774
+ uvicorn.run(app, host="0.0.0.0", port=port)