Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 7

Commit

b678bb5

verified ·

1 Parent(s): dd055bb

Update main.py

Browse files

Files changed (1) hide show

main.py +35 -28

main.py CHANGED Viewed

@@ -17,15 +17,14 @@ LOG = logging.getLogger("remote_indexer")
 # ---------- ENV ----------
 EMB_BACKEND = os.getenv("EMB_BACKEND", "hf").strip().lower()   # "hf" (défaut) ou "deepinfra"
-# HF
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
-# Si tu as un Inference Endpoint privé, ou si tu veux l’API "models/..." :
-# ex: https://api-inference.huggingface.co/models/sentence-transformers/all-MiniLM-L6-v2
 HF_URL     = (os.getenv("HF_API_URL", "").strip()
-              or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
-# DeepInfra
 DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
 DI_MODEL   = os.getenv("DEEPINFRA_EMBED_MODEL", "thenlper/gte-small").strip()
 DI_URL     = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/embeddings").strip()
@@ -76,14 +75,25 @@ def _auth(x_auth: Optional[str]):
         raise HTTPException(status_code=401, detail="Unauthorized")
 def _hf_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
-    headers = {"Authorization": f"Bearer {HF_TOKEN}"}
     try:
-        r = requests.post(HF_URL, headers=headers, json=batch, timeout=120)
         size = int(r.headers.get("Content-Length", "0"))
         if r.status_code >= 400:
-            # Log détaillé pour comprendre le 403/4xx
             try:
                 LOG.error(f"HF error {r.status_code}: {r.text}")
             except Exception:
@@ -97,6 +107,9 @@ def _hf_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
     # [batch, dim] (sentence-transformers) ou [batch, tokens, dim] -> mean-pooling
     if arr.ndim == 3:
         arr = arr.mean(axis=1)
     if arr.ndim != 2:
         raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
     # normalisation
@@ -105,6 +118,11 @@ def _hf_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
     return arr.astype(np.float32), size
 def _di_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
     if not DI_TOKEN:
         raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
     headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json"}
@@ -122,7 +140,6 @@ def _di_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
     except Exception as e:
         raise RuntimeError(f"DeepInfra POST failed: {e}")
-    # OpenAI-like : {"data":[{"embedding":[...],"index":0}, ...]}
     data = js.get("data")
     if not isinstance(data, list) or not data:
         raise RuntimeError(f"DeepInfra embeddings: réponse invalide {js}")
@@ -130,7 +147,6 @@ def _di_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
     arr = np.asarray(embs, dtype=np.float32)
     if arr.ndim != 2:
         raise RuntimeError(f"DeepInfra: unexpected embeddings shape: {arr.shape}")
-    # normalisation
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
@@ -145,8 +161,7 @@ def _post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
 def _ensure_collection(name: str, dim: int):
     try:
-        qdr.get_collection(name)
-        return
     except Exception:
         pass
     qdr.create_collection(
@@ -157,25 +172,21 @@ def _ensure_collection(name: str, dim: int):
 def _chunk_with_spans(text: str, size: int, overlap: int):
     n = len(text)
     if size <= 0:
-        yield (0, n, text)
-        return
     i = 0
     while i < n:
         j = min(n, i + size)
         yield (i, j, text[i:j])
         i = max(0, j - overlap)
-        if i >= n:
-            break
 def _append_log(job_id: str, line: str):
     job = JOBS.get(job_id)
-    if not job: return
-    job["logs"].append(line)
 def _set_status(job_id: str, status: str):
     job = JOBS.get(job_id)
-    if not job: return
-    job["status"] = status
 # ---------- Background task ----------
 def run_index_job(job_id: str, req: IndexRequest):
@@ -196,7 +207,6 @@ def run_index_job(job_id: str, req: IndexRequest):
         _append_log(job_id, f"Collection ready: {col} (dim={dim})")
         point_id = 0
         # boucle fichiers
         for fi, f in enumerate(req.files, 1):
             chunks, metas = [], []
@@ -218,7 +228,6 @@ def run_index_job(job_id: str, req: IndexRequest):
                     _append_log(job_id, f"file {fi}/{len(req.files)}: +{len(chunks)} chunks (total={total_chunks}) ~{sz/1024:.1f}KiB")
                     chunks, metas = [], []
-            # flush fin de fichier
             if chunks:
                 vecs, sz = _post_embeddings(chunks)
                 batch_points = []
@@ -255,7 +264,7 @@ def root():
 def health():
     return {"ok": True}
-def _check_backend_ready(for_query=False):
     if EMB_BACKEND == "hf" and not HF_TOKEN:
         raise HTTPException(400, "HF_API_TOKEN manquant côté serveur (backend=hf).")
     if EMB_BACKEND == "deepinfra" and not DI_TOKEN:
@@ -284,12 +293,11 @@ def status(job_id: str, x_auth_token: Optional[str] = Header(default=None)):
 def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None)):
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
-    _check_backend_ready(for_query=True)
     vec, _ = _post_embeddings([req.query])
-    vec = vec[0].tolist()
     col = f"proj_{req.project_id}"
     try:
-        res = qdr.search(collection_name=col, query_vector=vec, limit=int(req.top_k))
     except Exception as e:
         raise HTTPException(400, f"Search failed: {e}")
     out = []
@@ -307,8 +315,7 @@ def wipe_collection(project_id: str, x_auth_token: Optional[str] = Header(defaul
         raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
-        qdr.delete_collection(col)
-        return {"ok": True}
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")

 # ---------- ENV ----------
 EMB_BACKEND = os.getenv("EMB_BACKEND", "hf").strip().lower()   # "hf" (défaut) ou "deepinfra"
+# Hugging Face
 HF_TOKEN   = os.getenv("HF_API_TOKEN", "").strip()
 HF_MODEL   = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+# Recommandé: endpoint "models" (plus tolerant)
 HF_URL     = (os.getenv("HF_API_URL", "").strip()
+              or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
+# DeepInfra (option)
 DI_TOKEN   = os.getenv("DEEPINFRA_API_KEY", "").strip()
 DI_MODEL   = os.getenv("DEEPINFRA_EMBED_MODEL", "thenlper/gte-small").strip()
 DI_URL     = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/embeddings").strip()
         raise HTTPException(status_code=401, detail="Unauthorized")
 def _hf_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
+    """
+    Hugging Face Inference API:
+    - envoyer {"inputs": ...} (string ou liste de strings)
+    - endpoint recommandé: /models/<repo_id>
+    Retour: liste de vecteurs [batch, dim] OU [batch, tokens, dim]
+    """
     if not HF_TOKEN:
         raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
+    headers = {
+        "Authorization": f"Bearer {HF_TOKEN}",
+        "Content-Type": "application/json",
+        # Optionnel (forçage warmup) : "X-Wait-For-Model": "true"
+    }
+    payload = {"inputs": batch if len(batch) > 1 else batch[0]}
     try:
+        r = requests.post(HF_URL, headers=headers, json=payload, timeout=120)
         size = int(r.headers.get("Content-Length", "0"))
         if r.status_code >= 400:
             try:
                 LOG.error(f"HF error {r.status_code}: {r.text}")
             except Exception:
     # [batch, dim] (sentence-transformers) ou [batch, tokens, dim] -> mean-pooling
     if arr.ndim == 3:
         arr = arr.mean(axis=1)
+    if arr.ndim == 1:
+        # cas rare: un seul vecteur (batch=1)
+        arr = arr.reshape(1, -1)
     if arr.ndim != 2:
         raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
     # normalisation
     return arr.astype(np.float32), size
 def _di_post_embeddings(batch: List[str]) -> Tuple[np.ndarray, int]:
+    """
+    DeepInfra embeddings (OpenAI-like):
+    POST /v1/embeddings  {model: ..., input: [...]}
+    Réponse: {"data":[{"embedding":[...],"index":0}, ...]}
+    """
     if not DI_TOKEN:
         raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
     headers = {"Authorization": f"Bearer {DI_TOKEN}", "Content-Type": "application/json"}
     except Exception as e:
         raise RuntimeError(f"DeepInfra POST failed: {e}")
     data = js.get("data")
     if not isinstance(data, list) or not data:
         raise RuntimeError(f"DeepInfra embeddings: réponse invalide {js}")
     arr = np.asarray(embs, dtype=np.float32)
     if arr.ndim != 2:
         raise RuntimeError(f"DeepInfra: unexpected embeddings shape: {arr.shape}")
     norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
     arr = arr / norms
     return arr.astype(np.float32), size
 def _ensure_collection(name: str, dim: int):
     try:
+        qdr.get_collection(name); return
     except Exception:
         pass
     qdr.create_collection(
 def _chunk_with_spans(text: str, size: int, overlap: int):
     n = len(text)
     if size <= 0:
+        yield (0, n, text); return
     i = 0
     while i < n:
         j = min(n, i + size)
         yield (i, j, text[i:j])
         i = max(0, j - overlap)
+        if i >= n: break
 def _append_log(job_id: str, line: str):
     job = JOBS.get(job_id)
+    if job: job["logs"].append(line)
 def _set_status(job_id: str, status: str):
     job = JOBS.get(job_id)
+    if job: job["status"] = status
 # ---------- Background task ----------
 def run_index_job(job_id: str, req: IndexRequest):
         _append_log(job_id, f"Collection ready: {col} (dim={dim})")
         point_id = 0
         # boucle fichiers
         for fi, f in enumerate(req.files, 1):
             chunks, metas = [], []
                     _append_log(job_id, f"file {fi}/{len(req.files)}: +{len(chunks)} chunks (total={total_chunks}) ~{sz/1024:.1f}KiB")
                     chunks, metas = [], []
             if chunks:
                 vecs, sz = _post_embeddings(chunks)
                 batch_points = []
 def health():
     return {"ok": True}
+def _check_backend_ready():
     if EMB_BACKEND == "hf" and not HF_TOKEN:
         raise HTTPException(400, "HF_API_TOKEN manquant côté serveur (backend=hf).")
     if EMB_BACKEND == "deepinfra" and not DI_TOKEN:
 def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None)):
     if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
         raise HTTPException(401, "Unauthorized")
+    _check_backend_ready()
     vec, _ = _post_embeddings([req.query])
     col = f"proj_{req.project_id}"
     try:
+        res = qdr.search(collection_name=col, query_vector=vec[0].tolist(), limit=int(req.top_k))
     except Exception as e:
         raise HTTPException(400, f"Search failed: {e}")
     out = []
         raise HTTPException(401, "Unauthorized")
     col = f"proj_{project_id}"
     try:
+        qdr.delete_collection(col); return {"ok": True}
     except Exception as e:
         raise HTTPException(400, f"wipe failed: {e}")