Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 15

Commit

cfba1b0

verified ·

1 Parent(s): dd051b7

Update main.py

Browse files

Files changed (1) hide show

main.py +26 -15

main.py CHANGED Viewed

@@ -33,13 +33,13 @@ EMB_BACKEND_ORDER = [
 # --- DeepInfra Embeddings (OpenAI-like) ---
 DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
-DI_MODEL   = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip()  # ✅ MEILLEUR POUR LE CODE
 DI_URL     = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
 DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 # --- Hugging Face Inference API ---
 HF_TOKEN    = os.getenv("HF_API_TOKEN", "").strip()
-HF_MODEL    = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()  # ❌ À CHANGER
 HF_URL_PIPE = os.getenv("HF_API_URL_PIPELINE", "").strip() or (
     f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}"
 )
@@ -492,8 +492,8 @@ def run_index_job(job_id: str, req: IndexRequest):
         _set_status(job_id, "running")
         _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER} | store={VECTOR_STORE} (deterministic_ids={QDRANT_DETERMINISTIC_IDS}, mode={QDRANT_ID_MODE})")
         LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
         # --- DEBUG DIAGNOSTIC (INSÈRE ICI) ---
-        # Log sample files received (first N)
         try:
             N_SAMPLE = 6
             sample = req.files[:N_SAMPLE]
@@ -504,10 +504,8 @@ def run_index_job(job_id: str, req: IndexRequest):
                 h = hashlib.blake2b(t.encode("utf-8", "ignore"), digest_size=8).hexdigest()
                 seen_hashes.setdefault(h, []).append(p)
                 LOG.info(f"[{job_id}] recv file #{fidx}: path={p!r} len_text={len(t)} hash8={h} preview={repr(t[:120])}")
-            # summary
             if len(req.files) > N_SAMPLE:
                 LOG.info(f"[{job_id}] ... and {len(req.files)-N_SAMPLE} more files")
-            # If most files share same hash, warn
             if len(seen_hashes) == 1 and len(req.files) > 1:
                 _append_log(job_id, "⚠️ All received files appear IDENTICAL (same hash). Possible client-side bug.")
                 LOG.warning("[%s] All files identical by hash8=%s", job_id, list(seen_hashes.keys())[0])
@@ -525,15 +523,25 @@ def run_index_job(job_id: str, req: IndexRequest):
             except Exception as e:
                 _append_log(job_id, f"Wipe failed (ignored): {e}")
-        # Warmup -> dimension
-        warm = next(_chunk_with_spans(req.files[0].text if req.files else "", req.chunk_size, req.overlap))[2] if req.files else "warmup"
-        # warmup embeddings already computed: embs
         try:
             LOG.info(f"[{job_id}] warmup embeddings shape = {embs.shape} dtype={embs.dtype}")
-            _append_log(job_id, f"warmup embeddings shape = {embs.shape}")
-        except Exception:
-            pass
         # If using QdrantStore: check existing collection vector size and warn if mismatch
         if isinstance(STORE, QdrantStore):
             try:
@@ -549,7 +557,7 @@ def run_index_job(job_id: str, req: IndexRequest):
                     cfg = info.get("result", info)
                     vectors = cfg.get("params", {}).get("vectors", {})
                     existing_size = int(vectors.get("size")) if vectors else None
                 if existing_size and existing_size != dim:
                     msg = (f"Qdrant collection {col} already exists with dim={existing_size} but embeddings dim={dim}. "
                            "This will likely cause vectors to be rejected. Consider wiping or recreating collection.")
@@ -698,6 +706,8 @@ def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_tok
     _check_backend_ready()
     job_id = uuid.uuid4().hex[:12]
     JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
     background_tasks.add_task(run_index_job, job_id, req)
     return {"job_id": job_id}
@@ -707,7 +717,8 @@ def status_path(job_id: str, x_auth_token: Optional[str] = Header(default=None))
     _auth(x_auth_token)
     j = JOBS.get(job_id)
     if not j:
-        raise HTTPException(404, "job inconnu")
     return {"status": j["status"], "logs": j["logs"][-1500:]}
 @app.get("/status")
@@ -760,4 +771,4 @@ if __name__ == "__main__":
     import uvicorn
     port = int(os.getenv("PORT", "7860"))
     LOG.info(f"===== Application Startup on PORT {port} =====")
-    uvicorn.run(app, host="0.0.0.0", port=port)

 # --- DeepInfra Embeddings (OpenAI-like) ---
 DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
+DI_MODEL   = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip()
 DI_URL     = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
 DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 # --- Hugging Face Inference API ---
 HF_TOKEN    = os.getenv("HF_API_TOKEN", "").strip()
+HF_MODEL    = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
 HF_URL_PIPE = os.getenv("HF_API_URL_PIPELINE", "").strip() or (
     f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}"
 )
         _set_status(job_id, "running")
         _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER} | store={VECTOR_STORE} (deterministic_ids={QDRANT_DETERMINISTIC_IDS}, mode={QDRANT_ID_MODE})")
         LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
         # --- DEBUG DIAGNOSTIC (INSÈRE ICI) ---
         try:
             N_SAMPLE = 6
             sample = req.files[:N_SAMPLE]
                 h = hashlib.blake2b(t.encode("utf-8", "ignore"), digest_size=8).hexdigest()
                 seen_hashes.setdefault(h, []).append(p)
                 LOG.info(f"[{job_id}] recv file #{fidx}: path={p!r} len_text={len(t)} hash8={h} preview={repr(t[:120])}")
             if len(req.files) > N_SAMPLE:
                 LOG.info(f"[{job_id}] ... and {len(req.files)-N_SAMPLE} more files")
             if len(seen_hashes) == 1 and len(req.files) > 1:
                 _append_log(job_id, "⚠️ All received files appear IDENTICAL (same hash). Possible client-side bug.")
                 LOG.warning("[%s] All files identical by hash8=%s", job_id, list(seen_hashes.keys())[0])
             except Exception as e:
                 _append_log(job_id, f"Wipe failed (ignored): {e}")
+        # --- WARMUP: calculer un embedding de test pour déterminer la dimension (dim) ---
+        # On prend un chunk de départ (ou une string 'warmup' si pas de fichiers)
+        if req.files:
+            warm_text = next(_chunk_with_spans(req.files[0].text or "", req.chunk_size, req.overlap))[2]
+        else:
+            warm_text = "warmup"
         try:
+            embs, sz = _post_embeddings([warm_text], job_id=job_id)
+            if embs is None or embs.ndim != 2:
+                raise RuntimeError("Warmup embeddings invalid shape")
+            dim = int(embs.shape[1])
             LOG.info(f"[{job_id}] warmup embeddings shape = {embs.shape} dtype={embs.dtype}")
+            _append_log(job_id, f"warmup embeddings shape = {embs.shape} dim={dim}")
+        except Exception as e:
+            LOG.exception("[%s] Warmup embeddings failed: %s", job_id, e)
+            _append_log(job_id, f"Warmup embeddings failed: {e}")
+            _set_status(job_id, "error")
+            return
         # If using QdrantStore: check existing collection vector size and warn if mismatch
         if isinstance(STORE, QdrantStore):
             try:
                     cfg = info.get("result", info)
                     vectors = cfg.get("params", {}).get("vectors", {})
                     existing_size = int(vectors.get("size")) if vectors else None
                 if existing_size and existing_size != dim:
                     msg = (f"Qdrant collection {col} already exists with dim={existing_size} but embeddings dim={dim}. "
                            "This will likely cause vectors to be rejected. Consider wiping or recreating collection.")
     _check_backend_ready()
     job_id = uuid.uuid4().hex[:12]
     JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
+    LOG.info(f"Created job {job_id} for project {req.project_id}")
+    _append_log(job_id, f"Job created: {job_id} project={req.project_id}")
     background_tasks.add_task(run_index_job, job_id, req)
     return {"job_id": job_id}
     _auth(x_auth_token)
     j = JOBS.get(job_id)
     if not j:
+        # Response JSON plus explicite pour faciliter le debug côté client
+        raise HTTPException(status_code=404, detail={"error": "job inconnu", "advice": "POST /index to create a new job"})
     return {"status": j["status"], "logs": j["logs"][-1500:]}
 @app.get("/status")
     import uvicorn
     port = int(os.getenv("PORT", "7860"))
     LOG.info(f"===== Application Startup on PORT {port} =====")
+    uvicorn.run(app, host="0.0.0.0", port=port)