Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 13

Commit

e0f6e27

verified ·

1 Parent(s): 9ea3ad6

Update main.py

Browse files

Files changed (1) hide show

main.py +67 -38

main.py CHANGED Viewed

@@ -21,11 +21,27 @@ EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", o
 # HF Inference API
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
-# 👉 On force la pipeline "feature-extraction" pour obtenir des embeddings (et pas la Similarity)
-HF_URL     = (os.getenv("HF_API_URL", "").strip()
-              or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
 HF_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 HF_WAIT    = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
 # DeepInfra Embeddings (OpenAI-like)
 DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
@@ -46,6 +62,8 @@ QDRANT_API = os.getenv("QDRANT_API_KEY", "").strip()
 AUTH_TOKEN = os.getenv("REMOTE_INDEX_TOKEN", "").strip()
 LOG.info(f"Embeddings backend order = {EMB_BACKEND_ORDER}")
 if "hf" in EMB_BACKEND_ORDER and not HF_TOKEN:
     LOG.warning("HF_API_TOKEN manquant — tentatives HF échoueront.")
 if "deepinfra" in EMB_BACKEND_ORDER and not DI_TOKEN:
@@ -74,8 +92,6 @@ class QueryRequest(BaseModel):
     project_id: str
     query: str
     top_k: int = 6
-    # compat champ alternatif
-    # (si le client envoie "topk", on le lira plus bas directement dans le JSON brut)
 # ---------- Jobs store (mémoire) ----------
 JOBS: Dict[str, Dict[str, Any]] = {}  # {job_id: {"status": "...", "logs": [...], "created": ts}}
@@ -99,38 +115,30 @@ def _retry_sleep(attempt: int):
     jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
     return max(0.25, back * jitter)
-def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
-    """
-    Appel Inference API en pipeline 'feature-extraction' (retour = embeddings).
-    - inputs: str ou list[str]
-    - options.wait_for_model: True si demandé
-    """
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json",
-        # NB: avec l'URL /pipeline/feature-extraction/... on ne devrait pas avoir besoin de forcer X-Task,
-        # mais on peut ajouter une garde en cas de reverse-proxy exotique :
-        # "X-Task": "feature-extraction",
     }
-    payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
-    if HF_WAIT:
-        payload["options"] = {"wait_for_model": True}
-    r = requests.post(HF_URL, headers=headers, json=payload, timeout=HF_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
     if r.status_code >= 400:
-        # Affiche une partie du corps pour diagnostiquer le mauvais pipeline si jamais
         LOG.error(f"HF error {r.status_code}: {r.text[:1000]}")
         r.raise_for_status()
     data = r.json()
-    # data peut être:
-    # - [tokens, dim] pour une phrase => moyenne sur tokens
-    # - [batch, tokens, dim] pour batch => moyenne par élément
-    # - parfois déjà [batch, dim] selon certains hôtes
     arr = np.array(data, dtype=np.float32)
     if arr.ndim == 3:   # [batch, tokens, dim]
         arr = arr.mean(axis=1)
     elif arr.ndim == 2:
@@ -145,10 +153,38 @@ def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     arr = arr / norms
     return arr.astype(np.float32), size
 def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
         raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
-    headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json"}
     payload = {"model": DI_MODEL, "input": batch}
     r = requests.post(DI_URL, headers=headers, json=payload, timeout=DI_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
@@ -316,7 +352,8 @@ def root():
         "ok": True,
         "service": "remote-indexer",
         "backends": EMB_BACKEND_ORDER,
-        "hf_url": HF_URL if "hf" in EMB_BACKEND_ORDER else None,
         "di_model": DI_MODEL if "deepinfra" in EMB_BACKEND_ORDER else None,
         "docs": "/health, /index, /status/{job_id}, /query, /wipe"
     }
@@ -337,7 +374,7 @@ def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_tok
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
-    # Filtrage défensif des fichiers vides pour éviter 422 côté client/serveur
     non_empty = [f for f in req.files if (f.text or "").strip()]
     if not non_empty:
         raise HTTPException(422, "Aucun fichier non vide à indexer.")
@@ -357,7 +394,7 @@ def status(job_id: str, x_auth_token: Optional[str] = Header(default=None)):
         raise HTTPException(404, "job inconnu")
     return {"status": j["status"], "logs": j["logs"][-800:]}
-# --- Compat endpoints (pour clients legacy) ---
 @app.get("/status")
 def status_qp(job_id: str = Query(None), x_auth_token: Optional[str] = Header(default=None)):
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
@@ -387,16 +424,8 @@ def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None))
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
-    # Accepte topk/top_k (compat)
-    k = req.top_k
-    try:
-        # si le client a envoyé "topk", on le récupère du JSON brut via headers x-raw-body (HF ne le fournit pas),
-        # donc on fait une passe défensive: si top_k n'est pas cohérent, on limite quand même.
-        k = int(k)
-    except Exception:
-        k = 6
-    if k <= 0: k = 6
-    if k > 50: k = 50
     vecs, _ = _post_embeddings([req.query])
     col = f"proj_{req.project_id}"
@@ -426,7 +455,7 @@ def wipe_collection(project_id: str, x_auth_token: Optional[str] = Header(defaul
         raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
-        qdr.delete_collection(col); return {"ok": True}
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")

 # HF Inference API
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
+# On supporte 3 variables pour être souple:
+# - HF_API_URL_PIPELINE : force l'URL pipeline (feature-extraction)
+# - HF_API_URL_MODELS   : force l'URL models
+# - HF_API_URL          : compat; si contient "/pipeline", on l'utilise coté pipeline sinon coté models
+HF_API_URL_USER      = os.getenv("HF_API_URL", "").strip()
+HF_API_URL_PIPELINE  = os.getenv("HF_API_URL_PIPELINE", "").strip()
+HF_API_URL_MODELS    = os.getenv("HF_API_URL_MODELS", "").strip()
+if HF_API_URL_USER:
+    if "/pipeline" in HF_API_URL_USER:
+        HF_API_URL_PIPELINE = HF_API_URL_USER
+    else:
+        HF_API_URL_MODELS = HF_API_URL_USER
+HF_URL_PIPELINE = (HF_API_URL_PIPELINE or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
+HF_URL_MODELS   = (HF_API_URL_MODELS   or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
 HF_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 HF_WAIT    = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
+HF_PIPELINE_FIRST = os.getenv("HF_PIPELINE_FIRST", "true").lower() in ("1","true","yes","on")
 # DeepInfra Embeddings (OpenAI-like)
 DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
 AUTH_TOKEN = os.getenv("REMOTE_INDEX_TOKEN", "").strip()
 LOG.info(f"Embeddings backend order = {EMB_BACKEND_ORDER}")
+LOG.info(f"HF pipeline URL = {HF_URL_PIPELINE}")
+LOG.info(f"HF models   URL = {HF_URL_MODELS}")
 if "hf" in EMB_BACKEND_ORDER and not HF_TOKEN:
     LOG.warning("HF_API_TOKEN manquant — tentatives HF échoueront.")
 if "deepinfra" in EMB_BACKEND_ORDER and not DI_TOKEN:
     project_id: str
     query: str
     top_k: int = 6
 # ---------- Jobs store (mémoire) ----------
 JOBS: Dict[str, Dict[str, Any]] = {}  # {job_id: {"status": "...", "logs": [...], "created": ts}}
     jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
     return max(0.25, back * jitter)
+def _hf_http(
+    url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str, str]] = None
+) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json",
+        "Accept": "application/json",
     }
+    if headers_extra:
+        headers.update(headers_extra)
+    r = requests.post(url, headers=headers, json=payload, timeout=HF_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
     if r.status_code >= 400:
+        # Affiche une partie du corps pour diagnostiquer
         LOG.error(f"HF error {r.status_code}: {r.text[:1000]}")
         r.raise_for_status()
     data = r.json()
     arr = np.array(data, dtype=np.float32)
+    # data peut être: [tokens, dim] ou [batch, tokens, dim] ou [batch, dim] ou [dim]
     if arr.ndim == 3:   # [batch, tokens, dim]
         arr = arr.mean(axis=1)
     elif arr.ndim == 2:
     arr = arr / norms
     return arr.astype(np.float32), size
+def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
+    """
+    1) Essaie PIPELINE feature-extraction (si activé sur l'Infra)
+    2) Fallback MODELS + header X-Task: feature-extraction
+    """
+    # payload commun
+    payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
+    if HF_WAIT:
+        payload["options"] = {"wait_for_model": True}
+    # ordre: pipeline first (configurable)
+    urls = [HF_URL_PIPELINE, HF_URL_MODELS] if HF_PIPELINE_FIRST else [HF_URL_MODELS, HF_URL_PIPELINE]
+    for idx, url in enumerate(urls, 1):
+        try:
+            if "/models/" in url:
+                return _hf_http(url, payload, headers_extra={"X-Task": "feature-extraction"})
+            else:
+                return _hf_http(url, payload, headers_extra=None)
+        except requests.HTTPError as he:
+            code = he.response.status_code if he.response is not None else 0
+            # si 404/405/501 → tente l'autre forme
+            if code in (404, 405, 501) and idx < len(urls):
+                LOG.warning(f"HF endpoint {url} non dispo ({code}), fallback vers alternative ...")
+                continue
+            raise
+    # ne devrait jamais tomber ici
+    raise RuntimeError("HF: aucun endpoint utilisable")
 def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
         raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
+    headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json", "Accept": "application/json"}
     payload = {"model": DI_MODEL, "input": batch}
     r = requests.post(DI_URL, headers=headers, json=payload, timeout=DI_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
         "ok": True,
         "service": "remote-indexer",
         "backends": EMB_BACKEND_ORDER,
+        "hf_url_pipeline": HF_URL_PIPELINE if "hf" in EMB_BACKEND_ORDER else None,
+        "hf_url_models": HF_URL_MODELS if "hf" in EMB_BACKEND_ORDER else None,
         "di_model": DI_MODEL if "deepinfra" in EMB_BACKEND_ORDER else None,
         "docs": "/health, /index, /status/{job_id}, /query, /wipe"
     }
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
+    # Filtrage défensif des fichiers vides pour éviter 422
     non_empty = [f for f in req.files if (f.text or "").strip()]
     if not non_empty:
         raise HTTPException(422, "Aucun fichier non vide à indexer.")
         raise HTTPException(404, "job inconnu")
     return {"status": j["status"], "logs": j["logs"][-800:]}
+# --- Compat endpoints (clients legacy) ---
 @app.get("/status")
 def status_qp(job_id: str = Query(None), x_auth_token: Optional[str] = Header(default=None)):
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
+    # bornes du top_k
+    k = int(max(1, min(50, req.top_k or 6)))
     vecs, _ = _post_embeddings([req.query])
     col = f"proj_{req.project_id}"
         raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
+        qdrant.delete_collection(col); return {"ok": True}
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")