Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 13

Commit

1102a75

verified ·

1 Parent(s): e0f6e27

Update main.py

Browse files

Files changed (1) hide show

main.py +37 -18

main.py CHANGED Viewed

@@ -22,10 +22,7 @@ EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", o
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
-# On supporte 3 variables pour être souple:
-# - HF_API_URL_PIPELINE : force l'URL pipeline (feature-extraction)
-# - HF_API_URL_MODELS   : force l'URL models
-# - HF_API_URL          : compat; si contient "/pipeline", on l'utilise coté pipeline sinon coté models
 HF_API_URL_USER      = os.getenv("HF_API_URL", "").strip()
 HF_API_URL_PIPELINE  = os.getenv("HF_API_URL_PIPELINE", "").strip()
 HF_API_URL_MODELS    = os.getenv("HF_API_URL_MODELS", "").strip()
@@ -36,6 +33,7 @@ if HF_API_URL_USER:
     else:
         HF_API_URL_MODELS = HF_API_URL_USER
 HF_URL_PIPELINE = (HF_API_URL_PIPELINE or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
 HF_URL_MODELS   = (HF_API_URL_MODELS   or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
@@ -115,9 +113,11 @@ def _retry_sleep(attempt: int):
     jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
     return max(0.25, back * jitter)
-def _hf_http(
-    url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str, str]] = None
-) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
@@ -126,19 +126,24 @@ def _hf_http(
         "Content-Type": "application/json",
         "Accept": "application/json",
     }
     if headers_extra:
         headers.update(headers_extra)
     r = requests.post(url, headers=headers, json=payload, timeout=HF_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
     if r.status_code >= 400:
-        # Affiche une partie du corps pour diagnostiquer
         LOG.error(f"HF error {r.status_code}: {r.text[:1000]}")
         r.raise_for_status()
     data = r.json()
     arr = np.array(data, dtype=np.float32)
-    # data peut être: [tokens, dim] ou [batch, tokens, dim] ou [batch, dim] ou [dim]
     if arr.ndim == 3:   # [batch, tokens, dim]
         arr = arr.mean(axis=1)
     elif arr.ndim == 2:
@@ -155,31 +160,45 @@ def _hf_http(
 def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     """
-    1) Essaie PIPELINE feature-extraction (si activé sur l'Infra)
     2) Fallback MODELS + header X-Task: feature-extraction
     """
-    # payload commun
     payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
-    if HF_WAIT:
-        payload["options"] = {"wait_for_model": True}
-    # ordre: pipeline first (configurable)
     urls = [HF_URL_PIPELINE, HF_URL_MODELS] if HF_PIPELINE_FIRST else [HF_URL_MODELS, HF_URL_PIPELINE]
     for idx, url in enumerate(urls, 1):
         try:
             if "/models/" in url:
                 return _hf_http(url, payload, headers_extra={"X-Task": "feature-extraction"})
             else:
                 return _hf_http(url, payload, headers_extra=None)
         except requests.HTTPError as he:
             code = he.response.status_code if he.response is not None else 0
-            # si 404/405/501 → tente l'autre forme
             if code in (404, 405, 501) and idx < len(urls):
                 LOG.warning(f"HF endpoint {url} non dispo ({code}), fallback vers alternative ...")
                 continue
             raise
-    # ne devrait jamais tomber ici
-    raise RuntimeError("HF: aucun endpoint utilisable")
 def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
@@ -455,7 +474,7 @@ def wipe_collection(project_id: str, x_auth_token: Optional[str] = Header(defaul
         raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
-        qdrant.delete_collection(col); return {"ok": True}
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")

 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
+# URLs configurables
 HF_API_URL_USER      = os.getenv("HF_API_URL", "").strip()
 HF_API_URL_PIPELINE  = os.getenv("HF_API_URL_PIPELINE", "").strip()
 HF_API_URL_MODELS    = os.getenv("HF_API_URL_MODELS", "").strip()
     else:
         HF_API_URL_MODELS = HF_API_URL_USER
+# Défaults
 HF_URL_PIPELINE = (HF_API_URL_PIPELINE or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
 HF_URL_MODELS   = (HF_API_URL_MODELS   or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
     jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
     return max(0.25, back * jitter)
+def _with_task_param(url: str, task: str = "feature-extraction") -> str:
+    # Ajoute ?task=feature-extraction (ou &task=...) si absent
+    return url + ("&" if "?" in url else "?") + f"task={task}"
+def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str, str]] = None) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
         "Content-Type": "application/json",
         "Accept": "application/json",
     }
+    # options.wait_for_model dans le JSON + X-Wait-For-Model côté header -> compat maximale
+    if HF_WAIT:
+        payload.setdefault("options", {})["wait_for_model"] = True
+        headers["X-Wait-For-Model"] = "true"
+        headers["X-Use-Cache"] = "true"
     if headers_extra:
         headers.update(headers_extra)
     r = requests.post(url, headers=headers, json=payload, timeout=HF_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
     if r.status_code >= 400:
         LOG.error(f"HF error {r.status_code}: {r.text[:1000]}")
         r.raise_for_status()
     data = r.json()
     arr = np.array(data, dtype=np.float32)
+    # data peut être: [tokens, dim], [batch, tokens, dim], [batch, dim], [dim]
     if arr.ndim == 3:   # [batch, tokens, dim]
         arr = arr.mean(axis=1)
     elif arr.ndim == 2:
 def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     """
+    1) Essaie PIPELINE feature-extraction (si dispo)
     2) Fallback MODELS + header X-Task: feature-extraction
+    3) Si encore 400 à cause de SentenceSimilarityPipeline, force aussi ?task=feature-extraction sur l'URL MODELS
     """
     payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
     urls = [HF_URL_PIPELINE, HF_URL_MODELS] if HF_PIPELINE_FIRST else [HF_URL_MODELS, HF_URL_PIPELINE]
+    last_exc: Optional[Exception] = None
     for idx, url in enumerate(urls, 1):
         try:
             if "/models/" in url:
+                # 2) MODELS avec header X-Task
                 return _hf_http(url, payload, headers_extra={"X-Task": "feature-extraction"})
             else:
+                # 1) PIPELINE
                 return _hf_http(url, payload, headers_extra=None)
         except requests.HTTPError as he:
             code = he.response.status_code if he.response is not None else 0
+            body = he.response.text if he.response is not None else ""
+            last_exc = he
             if code in (404, 405, 501) and idx < len(urls):
                 LOG.warning(f"HF endpoint {url} non dispo ({code}), fallback vers alternative ...")
                 continue
+            # Si on a tapé MODELS et reçu SentenceSimilarityPipeline -> réessaie avec ?task=feature-extraction
+            if "/models/" in url and "SentenceSimilarityPipeline" in (body or ""):
+                try:
+                    forced_url = _with_task_param(url, "feature-extraction")
+                    LOG.warning("HF MODELS a choisi Similarity -> retry avec %s + X-Task", forced_url)
+                    return _hf_http(forced_url, payload, headers_extra={"X-Task": "feature-extraction"})
+                except Exception as he2:
+                    last_exc = he2
             raise
+        except Exception as e:
+            last_exc = e
+            raise
+    # ne devrait pas arriver
+    raise RuntimeError(f"HF: aucun endpoint utilisable ({last_exc})")
 def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
         raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
+        qdr.delete_collection(col); return {"ok": True}
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")