chouchouvs commited on
Commit
4642f4a
·
verified ·
1 Parent(s): 3b9e413

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +12 -8
main.py CHANGED
@@ -15,11 +15,11 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message
15
  LOG = logging.getLogger("remote_indexer")
16
 
17
  # ---------- ENV (config) ----------
18
- # Par défaut on met DeepInfra d'abord pour être opérationnel tout de suite.
19
  DEFAULT_BACKENDS = "deepinfra,hf"
20
  EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", DEFAULT_BACKENDS)).split(",") if s.strip()]
21
 
22
- # Auto-fallback vers DeepInfra si HF répond "SentenceSimilarityPipeline ... 'sentences' manquant"
23
  ALLOW_DI_AUTOFALLBACK = os.getenv("ALLOW_DI_AUTOFALLBACK", "true").lower() in ("1","true","yes","on")
24
 
25
  # HF Inference API
@@ -42,10 +42,12 @@ HF_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
42
  HF_WAIT = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
43
  HF_PIPELINE_FIRST = os.getenv("HF_PIPELINE_FIRST", "true").lower() in ("1","true","yes","on")
44
 
45
- # DeepInfra Embeddings (OpenAI-like)
46
  DI_TOKEN = os.getenv("DEEPINFRA_API_KEY", "").strip()
47
- DI_MODEL = os.getenv("DEEPINFRA_EMBED_MODEL", "thenlper/gte-small").strip()
48
- DI_URL = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/embeddings").strip()
 
 
49
  DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
50
 
51
  # Retries
@@ -188,8 +190,10 @@ def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
188
  def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
189
  if not DI_TOKEN:
190
  raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
 
191
  headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json", "Accept": "application/json"}
192
  payload = {"model": DI_MODEL, "input": batch}
 
193
  r = requests.post(DI_URL, headers=headers, json=payload, timeout=DI_TIMEOUT)
194
  size = int(r.headers.get("Content-Length", "0"))
195
  if r.status_code >= 400:
@@ -236,7 +240,7 @@ def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str]
236
  def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
237
  """
238
  Essaie les backends dans EMB_BACKEND_ORDER avec retries.
239
- Auto-fallback optionnel vers DeepInfra si HF renvoie la fameuse erreur "SentenceSimilarityPipeline".
240
  """
241
  last_err = None
242
  similarity_misroute = False
@@ -262,7 +266,6 @@ def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np
262
  else:
263
  _append_log(job_id, f"Backend inconnu ignoré: {b}")
264
 
265
- # Auto-fallback DI si activé et si le problème HF est le misrouting Similarity
266
  if ALLOW_DI_AUTOFALLBACK and similarity_misroute and DI_TOKEN:
267
  LOG.warning("HF a routé sur SentenceSimilarity => auto-fallback DeepInfra (override ordre).")
268
  _append_log(job_id, "Auto-fallback DeepInfra (HF => SentenceSimilarity).")
@@ -318,7 +321,7 @@ def run_index_job(job_id: str, req: IndexRequest):
318
  continue
319
  chunks, metas = [], []
320
  for ci, (start, end, chunk_txt) in enumerate(_chunk_with_spans(f.text, req.chunk_size, req.overlap)):
321
- if not (chunk_txt or "").strip():
322
  continue
323
  chunks.append(chunk_txt)
324
  meta = {"path": f.path, "chunk": ci, "start": start, "end": end}
@@ -367,6 +370,7 @@ def root():
367
  "backends": EMB_BACKEND_ORDER,
368
  "hf_url_pipeline": HF_URL_PIPELINE if "hf" in EMB_BACKEND_ORDER else None,
369
  "hf_url_models": HF_URL_MODELS if "hf" in EMB_BACKEND_ORDER else None,
 
370
  "di_model": DI_MODEL if "deepinfra" in EMB_BACKEND_ORDER else None,
371
  "docs": "/health, /index, /status/{job_id}, /query, /wipe"
372
  }
 
15
  LOG = logging.getLogger("remote_indexer")
16
 
17
  # ---------- ENV (config) ----------
18
+ # Par défaut on met DeepInfra d'abord pour être opérationnel.
19
  DEFAULT_BACKENDS = "deepinfra,hf"
20
  EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", DEFAULT_BACKENDS)).split(",") if s.strip()]
21
 
22
+ # Auto-fallback vers DeepInfra si HF renvoie la fameuse erreur Similarity
23
  ALLOW_DI_AUTOFALLBACK = os.getenv("ALLOW_DI_AUTOFALLBACK", "true").lower() in ("1","true","yes","on")
24
 
25
  # HF Inference API
 
42
  HF_WAIT = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
43
  HF_PIPELINE_FIRST = os.getenv("HF_PIPELINE_FIRST", "true").lower() in ("1","true","yes","on")
44
 
45
+ # DeepInfra Embeddings (OpenAI-compatible)
46
  DI_TOKEN = os.getenv("DEEPINFRA_API_KEY", "").strip()
47
+ # 👇 IMPORTANT : modèle existant chez DeepInfra (multilingue)
48
+ DI_MODEL = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip()
49
+ # 👇 IMPORTANT : endpoint OpenAI-compatible
50
+ DI_URL = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
51
  DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
52
 
53
  # Retries
 
190
  def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
191
  if not DI_TOKEN:
192
  raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
193
+ # OpenAI-compatible embeddings endpoint
194
  headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json", "Accept": "application/json"}
195
  payload = {"model": DI_MODEL, "input": batch}
196
+ # NB: on peut aussi ajouter "encoding_format":"float" si nécessaire
197
  r = requests.post(DI_URL, headers=headers, json=payload, timeout=DI_TIMEOUT)
198
  size = int(r.headers.get("Content-Length", "0"))
199
  if r.status_code >= 400:
 
240
  def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
241
  """
242
  Essaie les backends dans EMB_BACKEND_ORDER avec retries.
243
+ Auto-fallback optionnel vers DeepInfra si HF renvoie la Similarity.
244
  """
245
  last_err = None
246
  similarity_misroute = False
 
266
  else:
267
  _append_log(job_id, f"Backend inconnu ignoré: {b}")
268
 
 
269
  if ALLOW_DI_AUTOFALLBACK and similarity_misroute and DI_TOKEN:
270
  LOG.warning("HF a routé sur SentenceSimilarity => auto-fallback DeepInfra (override ordre).")
271
  _append_log(job_id, "Auto-fallback DeepInfra (HF => SentenceSimilarity).")
 
321
  continue
322
  chunks, metas = [], []
323
  for ci, (start, end, chunk_txt) in enumerate(_chunk_with_spans(f.text, req.chunk_size, req.overlap)):
324
+ if not (chunk_txt or "").strip(): # pas d'embeddings sur des blancs
325
  continue
326
  chunks.append(chunk_txt)
327
  meta = {"path": f.path, "chunk": ci, "start": start, "end": end}
 
370
  "backends": EMB_BACKEND_ORDER,
371
  "hf_url_pipeline": HF_URL_PIPELINE if "hf" in EMB_BACKEND_ORDER else None,
372
  "hf_url_models": HF_URL_MODELS if "hf" in EMB_BACKEND_ORDER else None,
373
+ "di_url": DI_URL if "deepinfra" in EMB_BACKEND_ORDER else None,
374
  "di_model": DI_MODEL if "deepinfra" in EMB_BACKEND_ORDER else None,
375
  "docs": "/health, /index, /status/{job_id}, /query, /wipe"
376
  }