Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 13

Commit

4642f4a

verified ·

1 Parent(s): 3b9e413

Update main.py

Browse files

Files changed (1) hide show

main.py +12 -8

main.py CHANGED Viewed

@@ -15,11 +15,11 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message
 LOG = logging.getLogger("remote_indexer")
 # ---------- ENV (config) ----------
-# Par défaut on met DeepInfra d'abord pour être opérationnel tout de suite.
 DEFAULT_BACKENDS = "deepinfra,hf"
 EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", DEFAULT_BACKENDS)).split(",") if s.strip()]
-# Auto-fallback vers DeepInfra si HF répond "SentenceSimilarityPipeline ... 'sentences' manquant"
 ALLOW_DI_AUTOFALLBACK = os.getenv("ALLOW_DI_AUTOFALLBACK", "true").lower() in ("1","true","yes","on")
 # HF Inference API
@@ -42,10 +42,12 @@ HF_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 HF_WAIT    = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
 HF_PIPELINE_FIRST = os.getenv("HF_PIPELINE_FIRST", "true").lower() in ("1","true","yes","on")
-# DeepInfra Embeddings (OpenAI-like)
 DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
-DI_MODEL   = os.getenv("DEEPINFRA_EMBED_MODEL", "thenlper/gte-small").strip()
-DI_URL     = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/embeddings").strip()
 DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 # Retries
@@ -188,8 +190,10 @@ def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
 def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
         raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
     headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json", "Accept": "application/json"}
     payload = {"model": DI_MODEL, "input": batch}
     r = requests.post(DI_URL, headers=headers, json=payload, timeout=DI_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
     if r.status_code >= 400:
@@ -236,7 +240,7 @@ def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str]
 def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
     """
     Essaie les backends dans EMB_BACKEND_ORDER avec retries.
-    Auto-fallback optionnel vers DeepInfra si HF renvoie la fameuse erreur "SentenceSimilarityPipeline".
     """
     last_err = None
     similarity_misroute = False
@@ -262,7 +266,6 @@ def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np
         else:
             _append_log(job_id, f"Backend inconnu ignoré: {b}")
-    # Auto-fallback DI si activé et si le problème HF est le misrouting Similarity
     if ALLOW_DI_AUTOFALLBACK and similarity_misroute and DI_TOKEN:
         LOG.warning("HF a routé sur SentenceSimilarity => auto-fallback DeepInfra (override ordre).")
         _append_log(job_id, "Auto-fallback DeepInfra (HF => SentenceSimilarity).")
@@ -318,7 +321,7 @@ def run_index_job(job_id: str, req: IndexRequest):
                 continue
             chunks, metas = [], []
             for ci, (start, end, chunk_txt) in enumerate(_chunk_with_spans(f.text, req.chunk_size, req.overlap)):
-                if not (chunk_txt or "").strip():
                     continue
                 chunks.append(chunk_txt)
                 meta = {"path": f.path, "chunk": ci, "start": start, "end": end}
@@ -367,6 +370,7 @@ def root():
         "backends": EMB_BACKEND_ORDER,
         "hf_url_pipeline": HF_URL_PIPELINE if "hf" in EMB_BACKEND_ORDER else None,
         "hf_url_models": HF_URL_MODELS if "hf" in EMB_BACKEND_ORDER else None,
         "di_model": DI_MODEL if "deepinfra" in EMB_BACKEND_ORDER else None,
         "docs": "/health, /index, /status/{job_id}, /query, /wipe"
     }

 LOG = logging.getLogger("remote_indexer")
 # ---------- ENV (config) ----------
+# Par défaut on met DeepInfra d'abord pour être opérationnel.
 DEFAULT_BACKENDS = "deepinfra,hf"
 EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", DEFAULT_BACKENDS)).split(",") if s.strip()]
+# Auto-fallback vers DeepInfra si HF renvoie la fameuse erreur Similarity
 ALLOW_DI_AUTOFALLBACK = os.getenv("ALLOW_DI_AUTOFALLBACK", "true").lower() in ("1","true","yes","on")
 # HF Inference API
 HF_WAIT    = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
 HF_PIPELINE_FIRST = os.getenv("HF_PIPELINE_FIRST", "true").lower() in ("1","true","yes","on")
+# DeepInfra Embeddings (OpenAI-compatible)
 DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
+# 👇 IMPORTANT : modèle existant chez DeepInfra (multilingue)
+DI_MODEL   = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip()
+# 👇 IMPORTANT : endpoint OpenAI-compatible
+DI_URL     = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
 DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 # Retries
 def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
         raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
+    # OpenAI-compatible embeddings endpoint
     headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json", "Accept": "application/json"}
     payload = {"model": DI_MODEL, "input": batch}
+    # NB: on peut aussi ajouter "encoding_format":"float" si nécessaire
     r = requests.post(DI_URL, headers=headers, json=payload, timeout=DI_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
     if r.status_code >= 400:
 def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
     """
     Essaie les backends dans EMB_BACKEND_ORDER avec retries.
+    Auto-fallback optionnel vers DeepInfra si HF renvoie la Similarity.
     """
     last_err = None
     similarity_misroute = False
         else:
             _append_log(job_id, f"Backend inconnu ignoré: {b}")
     if ALLOW_DI_AUTOFALLBACK and similarity_misroute and DI_TOKEN:
         LOG.warning("HF a routé sur SentenceSimilarity => auto-fallback DeepInfra (override ordre).")
         _append_log(job_id, "Auto-fallback DeepInfra (HF => SentenceSimilarity).")
                 continue
             chunks, metas = [], []
             for ci, (start, end, chunk_txt) in enumerate(_chunk_with_spans(f.text, req.chunk_size, req.overlap)):
+                if not (chunk_txt or "").strip():  # pas d'embeddings sur des blancs
                     continue
                 chunks.append(chunk_txt)
                 meta = {"path": f.path, "chunk": ci, "start": start, "end": end}
         "backends": EMB_BACKEND_ORDER,
         "hf_url_pipeline": HF_URL_PIPELINE if "hf" in EMB_BACKEND_ORDER else None,
         "hf_url_models": HF_URL_MODELS if "hf" in EMB_BACKEND_ORDER else None,
+        "di_url": DI_URL if "deepinfra" in EMB_BACKEND_ORDER else None,
         "di_model": DI_MODEL if "deepinfra" in EMB_BACKEND_ORDER else None,
         "docs": "/health, /index, /status/{job_id}, /query, /wipe"
     }