Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -15,11 +15,11 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message
|
|
| 15 |
LOG = logging.getLogger("remote_indexer")
|
| 16 |
|
| 17 |
# ---------- ENV (config) ----------
|
| 18 |
-
# Par défaut on met DeepInfra d'abord pour être opérationnel
|
| 19 |
DEFAULT_BACKENDS = "deepinfra,hf"
|
| 20 |
EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", DEFAULT_BACKENDS)).split(",") if s.strip()]
|
| 21 |
|
| 22 |
-
# Auto-fallback vers DeepInfra si HF
|
| 23 |
ALLOW_DI_AUTOFALLBACK = os.getenv("ALLOW_DI_AUTOFALLBACK", "true").lower() in ("1","true","yes","on")
|
| 24 |
|
| 25 |
# HF Inference API
|
|
@@ -42,10 +42,12 @@ HF_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
|
|
| 42 |
HF_WAIT = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
|
| 43 |
HF_PIPELINE_FIRST = os.getenv("HF_PIPELINE_FIRST", "true").lower() in ("1","true","yes","on")
|
| 44 |
|
| 45 |
-
# DeepInfra Embeddings (OpenAI-
|
| 46 |
DI_TOKEN = os.getenv("DEEPINFRA_API_KEY", "").strip()
|
| 47 |
-
|
| 48 |
-
|
|
|
|
|
|
|
| 49 |
DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
|
| 50 |
|
| 51 |
# Retries
|
|
@@ -188,8 +190,10 @@ def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
|
|
| 188 |
def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
|
| 189 |
if not DI_TOKEN:
|
| 190 |
raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
|
|
|
|
| 191 |
headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json", "Accept": "application/json"}
|
| 192 |
payload = {"model": DI_MODEL, "input": batch}
|
|
|
|
| 193 |
r = requests.post(DI_URL, headers=headers, json=payload, timeout=DI_TIMEOUT)
|
| 194 |
size = int(r.headers.get("Content-Length", "0"))
|
| 195 |
if r.status_code >= 400:
|
|
@@ -236,7 +240,7 @@ def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str]
|
|
| 236 |
def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
|
| 237 |
"""
|
| 238 |
Essaie les backends dans EMB_BACKEND_ORDER avec retries.
|
| 239 |
-
Auto-fallback optionnel vers DeepInfra si HF renvoie la
|
| 240 |
"""
|
| 241 |
last_err = None
|
| 242 |
similarity_misroute = False
|
|
@@ -262,7 +266,6 @@ def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np
|
|
| 262 |
else:
|
| 263 |
_append_log(job_id, f"Backend inconnu ignoré: {b}")
|
| 264 |
|
| 265 |
-
# Auto-fallback DI si activé et si le problème HF est le misrouting Similarity
|
| 266 |
if ALLOW_DI_AUTOFALLBACK and similarity_misroute and DI_TOKEN:
|
| 267 |
LOG.warning("HF a routé sur SentenceSimilarity => auto-fallback DeepInfra (override ordre).")
|
| 268 |
_append_log(job_id, "Auto-fallback DeepInfra (HF => SentenceSimilarity).")
|
|
@@ -318,7 +321,7 @@ def run_index_job(job_id: str, req: IndexRequest):
|
|
| 318 |
continue
|
| 319 |
chunks, metas = [], []
|
| 320 |
for ci, (start, end, chunk_txt) in enumerate(_chunk_with_spans(f.text, req.chunk_size, req.overlap)):
|
| 321 |
-
if not (chunk_txt or "").strip():
|
| 322 |
continue
|
| 323 |
chunks.append(chunk_txt)
|
| 324 |
meta = {"path": f.path, "chunk": ci, "start": start, "end": end}
|
|
@@ -367,6 +370,7 @@ def root():
|
|
| 367 |
"backends": EMB_BACKEND_ORDER,
|
| 368 |
"hf_url_pipeline": HF_URL_PIPELINE if "hf" in EMB_BACKEND_ORDER else None,
|
| 369 |
"hf_url_models": HF_URL_MODELS if "hf" in EMB_BACKEND_ORDER else None,
|
|
|
|
| 370 |
"di_model": DI_MODEL if "deepinfra" in EMB_BACKEND_ORDER else None,
|
| 371 |
"docs": "/health, /index, /status/{job_id}, /query, /wipe"
|
| 372 |
}
|
|
|
|
| 15 |
LOG = logging.getLogger("remote_indexer")
|
| 16 |
|
| 17 |
# ---------- ENV (config) ----------
|
| 18 |
+
# Par défaut on met DeepInfra d'abord pour être opérationnel.
|
| 19 |
DEFAULT_BACKENDS = "deepinfra,hf"
|
| 20 |
EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", DEFAULT_BACKENDS)).split(",") if s.strip()]
|
| 21 |
|
| 22 |
+
# Auto-fallback vers DeepInfra si HF renvoie la fameuse erreur Similarity
|
| 23 |
ALLOW_DI_AUTOFALLBACK = os.getenv("ALLOW_DI_AUTOFALLBACK", "true").lower() in ("1","true","yes","on")
|
| 24 |
|
| 25 |
# HF Inference API
|
|
|
|
| 42 |
HF_WAIT = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
|
| 43 |
HF_PIPELINE_FIRST = os.getenv("HF_PIPELINE_FIRST", "true").lower() in ("1","true","yes","on")
|
| 44 |
|
| 45 |
+
# DeepInfra Embeddings (OpenAI-compatible)
|
| 46 |
DI_TOKEN = os.getenv("DEEPINFRA_API_KEY", "").strip()
|
| 47 |
+
# 👇 IMPORTANT : modèle existant chez DeepInfra (multilingue)
|
| 48 |
+
DI_MODEL = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip()
|
| 49 |
+
# 👇 IMPORTANT : endpoint OpenAI-compatible
|
| 50 |
+
DI_URL = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
|
| 51 |
DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
|
| 52 |
|
| 53 |
# Retries
|
|
|
|
| 190 |
def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
|
| 191 |
if not DI_TOKEN:
|
| 192 |
raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
|
| 193 |
+
# OpenAI-compatible embeddings endpoint
|
| 194 |
headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json", "Accept": "application/json"}
|
| 195 |
payload = {"model": DI_MODEL, "input": batch}
|
| 196 |
+
# NB: on peut aussi ajouter "encoding_format":"float" si nécessaire
|
| 197 |
r = requests.post(DI_URL, headers=headers, json=payload, timeout=DI_TIMEOUT)
|
| 198 |
size = int(r.headers.get("Content-Length", "0"))
|
| 199 |
if r.status_code >= 400:
|
|
|
|
| 240 |
def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
|
| 241 |
"""
|
| 242 |
Essaie les backends dans EMB_BACKEND_ORDER avec retries.
|
| 243 |
+
Auto-fallback optionnel vers DeepInfra si HF renvoie la Similarity.
|
| 244 |
"""
|
| 245 |
last_err = None
|
| 246 |
similarity_misroute = False
|
|
|
|
| 266 |
else:
|
| 267 |
_append_log(job_id, f"Backend inconnu ignoré: {b}")
|
| 268 |
|
|
|
|
| 269 |
if ALLOW_DI_AUTOFALLBACK and similarity_misroute and DI_TOKEN:
|
| 270 |
LOG.warning("HF a routé sur SentenceSimilarity => auto-fallback DeepInfra (override ordre).")
|
| 271 |
_append_log(job_id, "Auto-fallback DeepInfra (HF => SentenceSimilarity).")
|
|
|
|
| 321 |
continue
|
| 322 |
chunks, metas = [], []
|
| 323 |
for ci, (start, end, chunk_txt) in enumerate(_chunk_with_spans(f.text, req.chunk_size, req.overlap)):
|
| 324 |
+
if not (chunk_txt or "").strip(): # pas d'embeddings sur des blancs
|
| 325 |
continue
|
| 326 |
chunks.append(chunk_txt)
|
| 327 |
meta = {"path": f.path, "chunk": ci, "start": start, "end": end}
|
|
|
|
| 370 |
"backends": EMB_BACKEND_ORDER,
|
| 371 |
"hf_url_pipeline": HF_URL_PIPELINE if "hf" in EMB_BACKEND_ORDER else None,
|
| 372 |
"hf_url_models": HF_URL_MODELS if "hf" in EMB_BACKEND_ORDER else None,
|
| 373 |
+
"di_url": DI_URL if "deepinfra" in EMB_BACKEND_ORDER else None,
|
| 374 |
"di_model": DI_MODEL if "deepinfra" in EMB_BACKEND_ORDER else None,
|
| 375 |
"docs": "/health, /index, /status/{job_id}, /query, /wipe"
|
| 376 |
}
|