Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 13

Commit

9ea3ad6

verified ·

1 Parent(s): 19a096b

Update main.py

Browse files

Files changed (1) hide show

main.py +97 -21

main.py CHANGED Viewed

@@ -5,7 +5,7 @@ from typing import List, Optional, Dict, Any, Tuple
 import numpy as np
 import requests
-from fastapi import FastAPI, BackgroundTasks, Header, HTTPException
 from pydantic import BaseModel, Field
 from qdrant_client import QdrantClient
 from qdrant_client.http.models import VectorParams, Distance, PointStruct
@@ -20,9 +20,10 @@ EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", o
 # HF Inference API
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
-HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 HF_URL     = (os.getenv("HF_API_URL", "").strip()
-              or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
 HF_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 HF_WAIT    = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
@@ -73,6 +74,8 @@ class QueryRequest(BaseModel):
     project_id: str
     query: str
     top_k: int = 6
 # ---------- Jobs store (mémoire) ----------
 JOBS: Dict[str, Dict[str, Any]] = {}  # {job_id: {"status": "...", "logs": [...], "created": ts}}
@@ -91,36 +94,53 @@ def _auth(x_auth: Optional[str]):
 # ---------- Embeddings backends avec retry ----------
 def _retry_sleep(attempt: int):
-    # backoff exponentiel + jitter (p.ex. 1.5^attempt) * (1 ± jitter)
     back = (RETRY_BASE_SEC ** attempt)
     jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
     return max(0.25, back * jitter)
 def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json",
     }
     if HF_WAIT:
-        headers["X-Wait-For-Model"] = "true"
-        headers["X-Use-Cache"] = "true"
-    payload = {"inputs": batch if len(batch) > 1 else batch[0]}
     r = requests.post(HF_URL, headers=headers, json=payload, timeout=HF_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
     if r.status_code >= 400:
         LOG.error(f"HF error {r.status_code}: {r.text[:1000]}")
         r.raise_for_status()
     data = r.json()
     arr = np.array(data, dtype=np.float32)
-    if arr.ndim == 3:  # [batch, tokens, dim]
         arr = arr.mean(axis=1)
-    if arr.ndim == 1:  # [dim] -> [1, dim]
         arr = arr.reshape(1, -1)
-    if arr.ndim != 2:
         raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
-    # normalisation
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
@@ -143,7 +163,6 @@ def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
     arr = np.asarray(embs, dtype=np.float32)
     if arr.ndim != 2:
         raise RuntimeError(f"DeepInfra: unexpected embeddings shape: {arr.shape}")
-    # normalisation
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
@@ -166,13 +185,11 @@ def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str]
             time.sleep(sleep_s)
             last_exc = he
         except Exception as e:
-            # on tente quelques retries aussi sur erreurs réseau transitoires
             sleep_s = _retry_sleep(attempt)
             msg = f"{label}: error {type(e).__name__}: {e}, retry in {sleep_s:.1f}s"
             LOG.warning(msg); _append_log(job_id, msg)
             time.sleep(sleep_s)
             last_exc = e
-    # épuisé
     raise RuntimeError(f"{label}: retries exhausted: {last_exc}")
 def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
@@ -189,7 +206,6 @@ def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np
                 last_err = e
                 _append_log(job_id, f"HF failed: {e}.")
                 LOG.error(f"HF failed: {e}")
-                # passe au backend suivant si dispo
         elif b == "deepinfra":
             try:
                 return _call_with_retries(_di_post_embeddings_once, batch, "DeepInfra", job_id)
@@ -201,6 +217,7 @@ def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np
             _append_log(job_id, f"Backend inconnu ignoré: {b}")
     raise RuntimeError(f"Tous les backends ont échoué: {last_err}")
 def _ensure_collection(name: str, dim: int):
     try:
         qdr.get_collection(name); return
@@ -212,7 +229,7 @@ def _ensure_collection(name: str, dim: int):
     )
 def _chunk_with_spans(text: str, size: int, overlap: int):
-    n = len(text)
     if size <= 0:
         yield (0, n, text); return
     i = 0
@@ -230,9 +247,13 @@ def run_index_job(job_id: str, req: IndexRequest):
         _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER}")
         LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
-        # Warmup -> dimension (1er morceau)
-        first_text = next(_chunk_with_spans(req.files[0].text if req.files else "", req.chunk_size, req.overlap))[2] if req.files else "warmup"
-        embs, sz = _post_embeddings([first_text], job_id=job_id)
         dim = embs.shape[1]
         col = f"proj_{req.project_id}"
         _ensure_collection(col, dim)
@@ -241,8 +262,13 @@ def run_index_job(job_id: str, req: IndexRequest):
         point_id = 0
         # Boucle sur les fichiers
         for fi, f in enumerate(req.files, 1):
             chunks, metas = [], []
             for ci, (start, end, chunk_txt) in enumerate(_chunk_with_spans(f.text, req.chunk_size, req.overlap)):
                 chunks.append(chunk_txt)
                 meta = {"path": f.path, "chunk": ci, "start": start, "end": end}
                 if req.store_text:
@@ -310,6 +336,13 @@ def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_tok
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
     job_id = uuid.uuid4().hex[:12]
     JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
     background_tasks.add_task(run_index_job, job_id, req)
@@ -324,15 +357,51 @@ def status(job_id: str, x_auth_token: Optional[str] = Header(default=None)):
         raise HTTPException(404, "job inconnu")
     return {"status": j["status"], "logs": j["logs"][-800:]}
 @app.post("/query")
 def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None)):
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
     vecs, _ = _post_embeddings([req.query])
     col = f"proj_{req.project_id}"
     try:
-        res = qdr.search(collection_name=col, query_vector=vecs[0].tolist(), limit=int(req.top_k))
     except Exception as e:
         raise HTTPException(400, f"Search failed: {e}")
     out = []
@@ -341,7 +410,14 @@ def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None))
         txt = pl.get("text")
         if txt and len(txt) > 800:
             txt = txt[:800] + "..."
-        out.append({"path": pl.get("path"), "chunk": pl.get("chunk"), "start": pl.get("start"), "end": pl.get("end"), "text": txt})
     return {"results": out}
 @app.post("/wipe")

 import numpy as np
 import requests
+from fastapi import FastAPI, BackgroundTasks, Header, HTTPException, Query
 from pydantic import BaseModel, Field
 from qdrant_client import QdrantClient
 from qdrant_client.http.models import VectorParams, Distance, PointStruct
 # HF Inference API
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
+HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
+# 👉 On force la pipeline "feature-extraction" pour obtenir des embeddings (et pas la Similarity)
 HF_URL     = (os.getenv("HF_API_URL", "").strip()
+              or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
 HF_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
 HF_WAIT    = os.getenv("HF_WAIT_FOR_MODEL", "true").lower() in ("1","true","yes","on")
     project_id: str
     query: str
     top_k: int = 6
+    # compat champ alternatif
+    # (si le client envoie "topk", on le lira plus bas directement dans le JSON brut)
 # ---------- Jobs store (mémoire) ----------
 JOBS: Dict[str, Dict[str, Any]] = {}  # {job_id: {"status": "...", "logs": [...], "created": ts}}
 # ---------- Embeddings backends avec retry ----------
 def _retry_sleep(attempt: int):
+    # backoff exponentiel + jitter
     back = (RETRY_BASE_SEC ** attempt)
     jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
     return max(0.25, back * jitter)
 def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
+    """
+    Appel Inference API en pipeline 'feature-extraction' (retour = embeddings).
+    - inputs: str ou list[str]
+    - options.wait_for_model: True si demandé
+    """
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
     headers = {
         "Authorization": f"Bearer {HF_TOKEN}",
         "Content-Type": "application/json",
+        # NB: avec l'URL /pipeline/feature-extraction/... on ne devrait pas avoir besoin de forcer X-Task,
+        # mais on peut ajouter une garde en cas de reverse-proxy exotique :
+        # "X-Task": "feature-extraction",
     }
+    payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
     if HF_WAIT:
+        payload["options"] = {"wait_for_model": True}
     r = requests.post(HF_URL, headers=headers, json=payload, timeout=HF_TIMEOUT)
     size = int(r.headers.get("Content-Length", "0"))
     if r.status_code >= 400:
+        # Affiche une partie du corps pour diagnostiquer le mauvais pipeline si jamais
         LOG.error(f"HF error {r.status_code}: {r.text[:1000]}")
         r.raise_for_status()
     data = r.json()
+    # data peut être:
+    # - [tokens, dim] pour une phrase => moyenne sur tokens
+    # - [batch, tokens, dim] pour batch => moyenne par élément
+    # - parfois déjà [batch, dim] selon certains hôtes
     arr = np.array(data, dtype=np.float32)
+    if arr.ndim == 3:   # [batch, tokens, dim]
         arr = arr.mean(axis=1)
+    elif arr.ndim == 2:
+        pass
+    elif arr.ndim == 1: # [dim] -> [1, dim]
         arr = arr.reshape(1, -1)
+    else:
         raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
+    # normalisation L2
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
     arr = np.asarray(embs, dtype=np.float32)
     if arr.ndim != 2:
         raise RuntimeError(f"DeepInfra: unexpected embeddings shape: {arr.shape}")
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
             time.sleep(sleep_s)
             last_exc = he
         except Exception as e:
             sleep_s = _retry_sleep(attempt)
             msg = f"{label}: error {type(e).__name__}: {e}, retry in {sleep_s:.1f}s"
             LOG.warning(msg); _append_log(job_id, msg)
             time.sleep(sleep_s)
             last_exc = e
     raise RuntimeError(f"{label}: retries exhausted: {last_exc}")
 def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
                 last_err = e
                 _append_log(job_id, f"HF failed: {e}.")
                 LOG.error(f"HF failed: {e}")
         elif b == "deepinfra":
             try:
                 return _call_with_retries(_di_post_embeddings_once, batch, "DeepInfra", job_id)
             _append_log(job_id, f"Backend inconnu ignoré: {b}")
     raise RuntimeError(f"Tous les backends ont échoué: {last_err}")
+# ---------- Qdrant helpers ----------
 def _ensure_collection(name: str, dim: int):
     try:
         qdr.get_collection(name); return
     )
 def _chunk_with_spans(text: str, size: int, overlap: int):
+    n = len(text or "")
     if size <= 0:
         yield (0, n, text); return
     i = 0
         _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER}")
         LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
+        # Warmup -> dimension (1er morceau non vide si possible)
+        warm = "warmup"
+        if req.files:
+            for _, _, chunk_txt in _chunk_with_spans(req.files[0].text or "", req.chunk_size, req.overlap):
+                if (chunk_txt or "").strip():
+                    warm = chunk_txt; break
+        embs, sz = _post_embeddings([warm], job_id=job_id)
         dim = embs.shape[1]
         col = f"proj_{req.project_id}"
         _ensure_collection(col, dim)
         point_id = 0
         # Boucle sur les fichiers
         for fi, f in enumerate(req.files, 1):
+            if not (f.text or "").strip():
+                _append_log(job_id, f"file {fi}: vide — ignoré")
+                continue
             chunks, metas = [], []
             for ci, (start, end, chunk_txt) in enumerate(_chunk_with_spans(f.text, req.chunk_size, req.overlap)):
+                if not (chunk_txt or "").strip():
+                    continue
                 chunks.append(chunk_txt)
                 meta = {"path": f.path, "chunk": ci, "start": start, "end": end}
                 if req.store_text:
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
+    # Filtrage défensif des fichiers vides pour éviter 422 côté client/serveur
+    non_empty = [f for f in req.files if (f.text or "").strip()]
+    if not non_empty:
+        raise HTTPException(422, "Aucun fichier non vide à indexer.")
+    req.files = non_empty
     job_id = uuid.uuid4().hex[:12]
     JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
     background_tasks.add_task(run_index_job, job_id, req)
         raise HTTPException(404, "job inconnu")
     return {"status": j["status"], "logs": j["logs"][-800:]}
+# --- Compat endpoints (pour clients legacy) ---
+@app.get("/status")
+def status_qp(job_id: str = Query(None), x_auth_token: Optional[str] = Header(default=None)):
+    if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
+        raise HTTPException(401, "Unauthorized")
+    if not job_id:
+        raise HTTPException(404, "job inconnu")
+    j = JOBS.get(job_id)
+    if not j:
+        raise HTTPException(404, "job inconnu")
+    return {"status": j["status"], "logs": j["logs"][-800:]}
+class _StatusBody(BaseModel):
+    job_id: str
+@app.post("/status")
+def status_post(body: _StatusBody, x_auth_token: Optional[str] = Header(default=None)):
+    if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
+        raise HTTPException(401, "Unauthorized")
+    j = JOBS.get(body.job_id)
+    if not j:
+        raise HTTPException(404, "job inconnu")
+    return {"status": j["status"], "logs": j["logs"][-800:]}
 @app.post("/query")
 def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None)):
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
     _check_backend_ready()
+    # Accepte topk/top_k (compat)
+    k = req.top_k
+    try:
+        # si le client a envoyé "topk", on le récupère du JSON brut via headers x-raw-body (HF ne le fournit pas),
+        # donc on fait une passe défensive: si top_k n'est pas cohérent, on limite quand même.
+        k = int(k)
+    except Exception:
+        k = 6
+    if k <= 0: k = 6
+    if k > 50: k = 50
     vecs, _ = _post_embeddings([req.query])
     col = f"proj_{req.project_id}"
     try:
+        res = qdr.search(collection_name=col, query_vector=vecs[0].tolist(), limit=k)
     except Exception as e:
         raise HTTPException(400, f"Search failed: {e}")
     out = []
         txt = pl.get("text")
         if txt and len(txt) > 800:
             txt = txt[:800] + "..."
+        out.append({
+            "path": pl.get("path"),
+            "chunk": pl.get("chunk"),
+            "start": pl.get("start"),
+            "end": pl.get("end"),
+            "text": txt,
+            "score": float(p.score) if hasattr(p, "score") else None,
+        })
     return {"results": out}
 @app.post("/wipe")