Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 13

Commit

3b9e413

verified ·

1 Parent(s): 1dc9ef1

Update main.py

Browse files

Files changed (1) hide show

main.py +35 -46

main.py CHANGED Viewed

@@ -15,25 +15,26 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message
 LOG = logging.getLogger("remote_indexer")
 # ---------- ENV (config) ----------
-# Ordre des backends d'embeddings à essayer (séparés par des virgules). Ex: "hf,deepinfra"
-EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", "hf")).split(",") if s.strip()]
 # HF Inference API
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
-# URLs configurables
 HF_API_URL_USER      = os.getenv("HF_API_URL", "").strip()
 HF_API_URL_PIPELINE  = os.getenv("HF_API_URL_PIPELINE", "").strip()
 HF_API_URL_MODELS    = os.getenv("HF_API_URL_MODELS", "").strip()
 if HF_API_URL_USER:
     if "/pipeline" in HF_API_URL_USER:
         HF_API_URL_PIPELINE = HF_API_URL_USER
     else:
         HF_API_URL_MODELS = HF_API_URL_USER
-# Défaults
 HF_URL_PIPELINE = (HF_API_URL_PIPELINE or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
 HF_URL_MODELS   = (HF_API_URL_MODELS   or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
@@ -48,15 +49,15 @@ DI_URL     = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/embe
 DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 # Retries
-RETRY_MAX      = int(os.getenv("EMB_RETRY_MAX", "6"))         # tentatives max par backend
-RETRY_BASE_SEC = float(os.getenv("EMB_RETRY_BASE", "1.5"))    # backoff de base (exponentiel)
-RETRY_JITTER   = float(os.getenv("EMB_RETRY_JITTER", "0.35")) # jitter fraction (0..1)
 # Qdrant
 QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
 QDRANT_API = os.getenv("QDRANT_API_KEY", "").strip()
-# Auth d’API du service (simple header)
 AUTH_TOKEN = os.getenv("REMOTE_INDEX_TOKEN", "").strip()
 LOG.info(f"Embeddings backend order = {EMB_BACKEND_ORDER}")
@@ -91,8 +92,8 @@ class QueryRequest(BaseModel):
     query: str
     top_k: int = 6
-# ---------- Jobs store (mémoire) ----------
-JOBS: Dict[str, Dict[str, Any]] = {}  # {job_id: {"status": "...", "logs": [...], "created": ts}}
 def _append_log(job_id: str, line: str):
     job = JOBS.get(job_id)
@@ -106,32 +107,28 @@ def _auth(x_auth: Optional[str]):
     if AUTH_TOKEN and (x_auth or "") != AUTH_TOKEN:
         raise HTTPException(status_code=401, detail="Unauthorized")
-# ---------- Embeddings backends avec retry ----------
 def _retry_sleep(attempt: int):
-    # backoff exponentiel + jitter
     back = (RETRY_BASE_SEC ** attempt)
     jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
     return max(0.25, back * jitter)
 def _with_task_param(url: str, task: str = "feature-extraction") -> str:
-    # Ajoute ?task=feature-extraction (ou &task=...) si absent
     return url + ("&" if "?" in url else "?") + f"task={task}"
 def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str, str]] = None) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json",
         "Accept": "application/json",
     }
-    # options.wait_for_model dans le JSON + X-Wait-For-Model côté header -> compat maximale
     if HF_WAIT:
         payload.setdefault("options", {})["wait_for_model"] = True
         headers["X-Wait-For-Model"] = "true"
         headers["X-Use-Cache"] = "true"
     if headers_extra:
         headers.update(headers_extra)
@@ -143,7 +140,6 @@ def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str
     data = r.json()
     arr = np.array(data, dtype=np.float32)
-    # data peut être: [tokens, dim], [batch, tokens, dim], [batch, dim], [dim]
     if arr.ndim == 3:   # [batch, tokens, dim]
         arr = arr.mean(axis=1)
     elif arr.ndim == 2:
@@ -153,29 +149,20 @@ def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str
     else:
         raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
-    # normalisation L2
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
 def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
-    """
-    1) Essaie PIPELINE feature-extraction (si dispo)
-    2) Fallback MODELS + header X-Task: feature-extraction
-    3) Si encore 400 à cause de SentenceSimilarityPipeline, force aussi ?task=feature-extraction sur l'URL MODELS
-    """
     payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
     urls = [HF_URL_PIPELINE, HF_URL_MODELS] if HF_PIPELINE_FIRST else [HF_URL_MODELS, HF_URL_PIPELINE]
     last_exc: Optional[Exception] = None
     for idx, url in enumerate(urls, 1):
         try:
             if "/models/" in url:
-                # 2) MODELS avec header X-Task
                 return _hf_http(url, payload, headers_extra={"X-Task": "feature-extraction"})
             else:
-                # 1) PIPELINE
                 return _hf_http(url, payload, headers_extra=None)
         except requests.HTTPError as he:
             code = he.response.status_code if he.response is not None else 0
@@ -184,7 +171,6 @@ def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
             if code in (404, 405, 501) and idx < len(urls):
                 LOG.warning(f"HF endpoint {url} non dispo ({code}), fallback vers alternative ...")
                 continue
-            # Si on a tapé MODELS et reçu SentenceSimilarityPipeline -> réessaie avec ?task=feature-extraction
             if "/models/" in url and "SentenceSimilarityPipeline" in (body or ""):
                 try:
                     forced_url = _with_task_param(url, "feature-extraction")
@@ -196,10 +182,9 @@ def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
         except Exception as e:
             last_exc = e
             raise
-    # ne devrait pas arriver
     raise RuntimeError(f"HF: aucun endpoint utilisable ({last_exc})")
 def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
         raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
@@ -222,6 +207,7 @@ def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     arr = arr / norms
     return arr.astype(np.float32), size
 def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
     last_exc = None
     for attempt in range(RETRY_MAX):
@@ -250,17 +236,22 @@ def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str]
 def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
     """
     Essaie les backends dans EMB_BACKEND_ORDER avec retries.
-    Ex: EMB_BACKEND_ORDER=hf,deepinfra
     """
     last_err = None
     for b in EMB_BACKEND_ORDER:
         if b == "hf":
             try:
                 return _call_with_retries(_hf_post_embeddings_once, batch, "HF", job_id)
-            except Exception as e:
-                last_err = e
-                _append_log(job_id, f"HF failed: {e}.")
-                LOG.error(f"HF failed: {e}")
         elif b == "deepinfra":
             try:
                 return _call_with_retries(_di_post_embeddings_once, batch, "DeepInfra", job_id)
@@ -270,6 +261,13 @@ def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np
                 LOG.error(f"DeepInfra failed: {e}")
         else:
             _append_log(job_id, f"Backend inconnu ignoré: {b}")
     raise RuntimeError(f"Tous les backends ont échoué: {last_err}")
 # ---------- Qdrant helpers ----------
@@ -302,7 +300,6 @@ def run_index_job(job_id: str, req: IndexRequest):
         _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER}")
         LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
-        # Warmup -> dimension (1er morceau non vide si possible)
         warm = "warmup"
         if req.files:
             for _, _, chunk_txt in _chunk_with_spans(req.files[0].text or "", req.chunk_size, req.overlap):
@@ -315,7 +312,6 @@ def run_index_job(job_id: str, req: IndexRequest):
         _append_log(job_id, f"Collection ready: {col} (dim={dim})")
         point_id = 0
-        # Boucle sur les fichiers
         for fi, f in enumerate(req.files, 1):
             if not (f.text or "").strip():
                 _append_log(job_id, f"file {fi}: vide — ignoré")
@@ -329,7 +325,6 @@ def run_index_job(job_id: str, req: IndexRequest):
                 if req.store_text:
                     meta["text"] = chunk_txt
                 metas.append(meta)
-                # flush par lots
                 if len(chunks) >= req.batch_size:
                     vecs, sz = _post_embeddings(chunks, job_id=job_id)
                     batch_points = [
@@ -342,7 +337,6 @@ def run_index_job(job_id: str, req: IndexRequest):
                     _append_log(job_id, f"file {fi}/{len(req.files)}: +{len(chunks)} chunks (total={total_chunks}) ~{sz/1024:.1f}KiB")
                     chunks, metas = [], []
-            # flush fin de fichier
             if chunks:
                 vecs, sz = _post_embeddings(chunks, job_id=job_id)
                 batch_points = [
@@ -392,8 +386,6 @@ def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_tok
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
-    # Filtrage défensif des fichiers vides pour éviter 422
     non_empty = [f for f in req.files if (f.text or "").strip()]
     if not non_empty:
         raise HTTPException(422, "Aucun fichier non vide à indexer.")
@@ -413,7 +405,7 @@ def status(job_id: str, x_auth_token: Optional[str] = Header(default=None)):
         raise HTTPException(404, "job inconnu")
     return {"status": j["status"], "logs": j["logs"][-800:]}
-# --- Compat endpoints (clients legacy) ---
 @app.get("/status")
 def status_qp(job_id: str = Query(None), x_auth_token: Optional[str] = Header(default=None)):
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
@@ -442,10 +434,7 @@ def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None))
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
-    # bornes du top_k
     k = int(max(1, min(50, req.top_k or 6)))
     vecs, _ = _post_embeddings([req.query])
     col = f"proj_{req.project_id}"
     try:
@@ -474,7 +463,7 @@ def wipe_collection(project_id: str, x_auth_token: Optional[str] = Header(defaul
         raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
-        qdr.delete_collection(col); return {"ok": True}
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")

 LOG = logging.getLogger("remote_indexer")
 # ---------- ENV (config) ----------
+# Par défaut on met DeepInfra d'abord pour être opérationnel tout de suite.
+DEFAULT_BACKENDS = "deepinfra,hf"
+EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", DEFAULT_BACKENDS)).split(",") if s.strip()]
+# Auto-fallback vers DeepInfra si HF répond "SentenceSimilarityPipeline ... 'sentences' manquant"
+ALLOW_DI_AUTOFALLBACK = os.getenv("ALLOW_DI_AUTOFALLBACK", "true").lower() in ("1","true","yes","on")
 # HF Inference API
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
 HF_API_URL_USER      = os.getenv("HF_API_URL", "").strip()
 HF_API_URL_PIPELINE  = os.getenv("HF_API_URL_PIPELINE", "").strip()
 HF_API_URL_MODELS    = os.getenv("HF_API_URL_MODELS", "").strip()
 if HF_API_URL_USER:
     if "/pipeline" in HF_API_URL_USER:
         HF_API_URL_PIPELINE = HF_API_URL_USER
     else:
         HF_API_URL_MODELS = HF_API_URL_USER
 HF_URL_PIPELINE = (HF_API_URL_PIPELINE or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
 HF_URL_MODELS   = (HF_API_URL_MODELS   or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
 DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 # Retries
+RETRY_MAX      = int(os.getenv("EMB_RETRY_MAX", "6"))
+RETRY_BASE_SEC = float(os.getenv("EMB_RETRY_BASE", "1.5"))
+RETRY_JITTER   = float(os.getenv("EMB_RETRY_JITTER", "0.35"))
 # Qdrant
 QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
 QDRANT_API = os.getenv("QDRANT_API_KEY", "").strip()
+# Auth
 AUTH_TOKEN = os.getenv("REMOTE_INDEX_TOKEN", "").strip()
 LOG.info(f"Embeddings backend order = {EMB_BACKEND_ORDER}")
     query: str
     top_k: int = 6
+# ---------- Jobs store ----------
+JOBS: Dict[str, Dict[str, Any]] = {}
 def _append_log(job_id: str, line: str):
     job = JOBS.get(job_id)
     if AUTH_TOKEN and (x_auth or "") != AUTH_TOKEN:
         raise HTTPException(status_code=401, detail="Unauthorized")
+# ---------- Helpers retry ----------
 def _retry_sleep(attempt: int):
     back = (RETRY_BASE_SEC ** attempt)
     jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
     return max(0.25, back * jitter)
 def _with_task_param(url: str, task: str = "feature-extraction") -> str:
     return url + ("&" if "?" in url else "?") + f"task={task}"
+# ---------- HF embeddings ----------
 def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str, str]] = None) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json",
         "Accept": "application/json",
     }
     if HF_WAIT:
         payload.setdefault("options", {})["wait_for_model"] = True
         headers["X-Wait-For-Model"] = "true"
         headers["X-Use-Cache"] = "true"
     if headers_extra:
         headers.update(headers_extra)
     data = r.json()
     arr = np.array(data, dtype=np.float32)
     if arr.ndim == 3:   # [batch, tokens, dim]
         arr = arr.mean(axis=1)
     elif arr.ndim == 2:
     else:
         raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
 def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
     urls = [HF_URL_PIPELINE, HF_URL_MODELS] if HF_PIPELINE_FIRST else [HF_URL_MODELS, HF_URL_PIPELINE]
     last_exc: Optional[Exception] = None
     for idx, url in enumerate(urls, 1):
         try:
             if "/models/" in url:
                 return _hf_http(url, payload, headers_extra={"X-Task": "feature-extraction"})
             else:
                 return _hf_http(url, payload, headers_extra=None)
         except requests.HTTPError as he:
             code = he.response.status_code if he.response is not None else 0
             if code in (404, 405, 501) and idx < len(urls):
                 LOG.warning(f"HF endpoint {url} non dispo ({code}), fallback vers alternative ...")
                 continue
             if "/models/" in url and "SentenceSimilarityPipeline" in (body or ""):
                 try:
                     forced_url = _with_task_param(url, "feature-extraction")
         except Exception as e:
             last_exc = e
             raise
     raise RuntimeError(f"HF: aucun endpoint utilisable ({last_exc})")
+# ---------- DeepInfra embeddings ----------
 def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
         raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
     arr = arr / norms
     return arr.astype(np.float32), size
+# ---------- Retry orchestrator ----------
 def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
     last_exc = None
     for attempt in range(RETRY_MAX):
 def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
     """
     Essaie les backends dans EMB_BACKEND_ORDER avec retries.
+    Auto-fallback optionnel vers DeepInfra si HF renvoie la fameuse erreur "SentenceSimilarityPipeline".
     """
     last_err = None
+    similarity_misroute = False
     for b in EMB_BACKEND_ORDER:
         if b == "hf":
             try:
                 return _call_with_retries(_hf_post_embeddings_once, batch, "HF", job_id)
+            except requests.HTTPError as he:
+                body = he.response.text if getattr(he, "response", None) is not None else ""
+                if "SentenceSimilarityPipeline.__call__()" in (body or ""):
+                    similarity_misroute = True
+                last_err = he
+                _append_log(job_id, f"HF failed: {he}.")
+                LOG.error(f"HF failed: {he}")
         elif b == "deepinfra":
             try:
                 return _call_with_retries(_di_post_embeddings_once, batch, "DeepInfra", job_id)
                 LOG.error(f"DeepInfra failed: {e}")
         else:
             _append_log(job_id, f"Backend inconnu ignoré: {b}")
+    # Auto-fallback DI si activé et si le problème HF est le misrouting Similarity
+    if ALLOW_DI_AUTOFALLBACK and similarity_misroute and DI_TOKEN:
+        LOG.warning("HF a routé sur SentenceSimilarity => auto-fallback DeepInfra (override ordre).")
+        _append_log(job_id, "Auto-fallback DeepInfra (HF => SentenceSimilarity).")
+        return _call_with_retries(_di_post_embeddings_once, batch, "DeepInfra", job_id)
     raise RuntimeError(f"Tous les backends ont échoué: {last_err}")
 # ---------- Qdrant helpers ----------
         _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER}")
         LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
         warm = "warmup"
         if req.files:
             for _, _, chunk_txt in _chunk_with_spans(req.files[0].text or "", req.chunk_size, req.overlap):
         _append_log(job_id, f"Collection ready: {col} (dim={dim})")
         point_id = 0
         for fi, f in enumerate(req.files, 1):
             if not (f.text or "").strip():
                 _append_log(job_id, f"file {fi}: vide — ignoré")
                 if req.store_text:
                     meta["text"] = chunk_txt
                 metas.append(meta)
                 if len(chunks) >= req.batch_size:
                     vecs, sz = _post_embeddings(chunks, job_id=job_id)
                     batch_points = [
                     _append_log(job_id, f"file {fi}/{len(req.files)}: +{len(chunks)} chunks (total={total_chunks}) ~{sz/1024:.1f}KiB")
                     chunks, metas = [], []
             if chunks:
                 vecs, sz = _post_embeddings(chunks, job_id=job_id)
                 batch_points = [
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
     non_empty = [f for f in req.files if (f.text or "").strip()]
     if not non_empty:
         raise HTTPException(422, "Aucun fichier non vide à indexer.")
         raise HTTPException(404, "job inconnu")
     return {"status": j["status"], "logs": j["logs"][-800:]}
+# Compat legacy
 @app.get("/status")
 def status_qp(job_id: str = Query(None), x_auth_token: Optional[str] = Header(default=None)):
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
     k = int(max(1, min(50, req.top_k or 6)))
     vecs, _ = _post_embeddings([req.query])
     col = f"proj_{req.project_id}"
     try:
         raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
+        qdrant.delete_collection(col); return {"ok": True}
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")