Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 16

Commit

6520d03

verified ·

1 Parent(s): d2eb63c

Update main.py

Browse files

Files changed (1) hide show

main.py +45 -17

main.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-HF Space - main.py de substitution pour tests Qdrant / indexation minimale (robuste)
 Endpoints:
 - GET  /                      → redirige vers UI_PATH (défaut: /ui)
@@ -20,7 +20,7 @@ ENV:
 - EMB_PROVIDER ("hf" par défaut, "dummy" sinon)
 - HF_EMBED_MODEL (défaut "BAAI/bge-m3")
 - HUGGINGFACEHUB_API_TOKEN (si EMB_PROVIDER=hf)
-- EMB_FALLBACK_TO_DUMMY (true/false) → si vrai, bascule dummy si HF indisponible
 - LOG_LEVEL (défaut DEBUG)
 - PORT (fourni par HF, défaut 7860)
 - UI_PATH (défaut "/ui")
@@ -41,7 +41,7 @@ import uvicorn
 from pydantic import BaseModel, Field, ValidationError
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
-from fastapi.responses import RedirectResponse, JSONResponse
 import gradio as gr
 # ------------------------------------------------------------------------------
@@ -70,9 +70,11 @@ if not QDRANT_URL or not QDRANT_API_KEY:
     LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera.")
 if EMB_PROVIDER == "hf" and not HF_TOKEN:
-    LOG.warning("EMB_PROVIDER=hf sans HUGGINGFACEHUB_API_TOKEN. "
-                "→ soit définis le token, soit mets EMB_PROVIDER=dummy, "
-                "soit active EMB_FALLBACK_TO_DUMMY=true.")
 # ------------------------------------------------------------------------------
 # Schémas Pydantic
@@ -212,13 +214,25 @@ async def qdrant_search(client: httpx.AsyncClient, coll: str, vector: List[float
 # ------------------------------------------------------------------------------
 # Embeddings (HF Inference ou dummy)
 # ------------------------------------------------------------------------------
 async def embed_hf(client: httpx.AsyncClient, texts: List[str], model: str = HF_EMBED_MODEL, token: str = HF_TOKEN) -> List[List[float]]:
     if not token:
         raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
     url = f"https://api-inference.huggingface.co/models/{model}"
     headers = {"Authorization": f"Bearer {token}"}
-    payload = {"inputs": texts, "options": {"wait_for_model": True}}
-    r = await client.post(url, headers=headers, json=payload, timeout=120)
     if r.status_code != 200:
         detail = r.text
         LOG.error(f"HF Inference error {r.status_code}: {detail[:400]}")
@@ -245,7 +259,6 @@ def embed_dummy(texts: List[str], dim: int = 128) -> List[List[float]]:
     return out
 async def embed_texts(client: httpx.AsyncClient, texts: List[str]) -> List[List[float]]:
-    # Fallback optionnel si HF indisponible
     if EMB_PROVIDER == "hf":
         try:
             return await embed_hf(client, texts)
@@ -295,7 +308,7 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
             job.finished_at = time.time()
             return
-        async with httpx.AsyncClient(timeout=120) as client:
             # Warmup dim
             warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
             vec_dim = len(warmup_vec)
@@ -440,7 +453,7 @@ async def query(req: QueryRequest):
     return data
 # ------------------------------------------------------------------------------
-# Gradio UI
 # ------------------------------------------------------------------------------
 def _default_two_docs() -> List[Dict[str, str]]:
     a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
@@ -468,12 +481,13 @@ async def ui_index_sample(project: str, chunk_size: int, overlap: int, batch_siz
     try:
         data = await index(req)
         job_id = data["job_id"]
-        return f"🚀 Job lancé: {job_id}"
     except ValidationError as ve:
-        return f"❌ Payload invalide: {ve}"
     except Exception as e:
         LOG.exception("index UI error")
-        return f"❌ Index erreur: {e}"
 async def ui_status(job_id: str):
     if not job_id.strip():
@@ -525,7 +539,7 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
                 f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`")
     with gr.Row():
         project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
-        jobid_tb = gr.Textbox(label="Job ID (pour Status)", value="", interactive=True)
     with gr.Row():
         wipe_btn = gr.Button("🧨 Wipe collection", variant="stop")
         index_btn = gr.Button("🚀 Indexer 2 documents", variant="primary")
@@ -536,16 +550,30 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
         batch_size = gr.Slider(1, 128, value=32, step=1, label="batch_size")
         store_text = gr.Checkbox(value=True, label="store_text (payload)")
     out_log = gr.Textbox(lines=18, label="Logs / Résultats", interactive=False)
     with gr.Row():
         query_tb = gr.Textbox(label="Query text", value="alpha bravo")
         topk = gr.Slider(1, 20, value=5, step=1, label="top_k")
         query_btn = gr.Button("🔎 Query")
     query_out = gr.Textbox(lines=10, label="Résultats Query", interactive=False)
     wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
-    index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log])
     count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
-    query_btn.click(ui_query, inputs=[project_tb, query_tb, topk], outputs=[query_out])
 # Monte l'UI Gradio sur la FastAPI au chemin UI_PATH
 app = gr.mount_gradio_app(fastapi_app, ui, path=UI_PATH)

 # -*- coding: utf-8 -*-
 """
+HF Space - main.py de substitution pour tests Qdrant / indexation minimale (robuste + auto-refresh)
 Endpoints:
 - GET  /                      → redirige vers UI_PATH (défaut: /ui)
 - EMB_PROVIDER ("hf" par défaut, "dummy" sinon)
 - HF_EMBED_MODEL (défaut "BAAI/bge-m3")
 - HUGGINGFACEHUB_API_TOKEN (si EMB_PROVIDER=hf)
+- EMB_FALLBACK_TO_DUMMY (true/false) → si vrai, bascule dummy si HF échoue
 - LOG_LEVEL (défaut DEBUG)
 - PORT (fourni par HF, défaut 7860)
 - UI_PATH (défaut "/ui")
 from pydantic import BaseModel, Field, ValidationError
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import RedirectResponse
 import gradio as gr
 # ------------------------------------------------------------------------------
     LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera.")
 if EMB_PROVIDER == "hf" and not HF_TOKEN:
+    LOG.warning(
+        "EMB_PROVIDER=hf sans HUGGINGFACEHUB_API_TOKEN. "
+        "→ soit définis le token, soit mets EMB_PROVIDER=dummy, "
+        "soit active EMB_FALLBACK_TO_DUMMY=true."
+    )
 # ------------------------------------------------------------------------------
 # Schémas Pydantic
 # ------------------------------------------------------------------------------
 # Embeddings (HF Inference ou dummy)
 # ------------------------------------------------------------------------------
+def _maybe_prefix_for_model(texts: List[str], model_name: str) -> List[str]:
+    """
+    E5 attend en pratique des préfixes 'query: ' (ou 'passage: ' / 'document: ').
+    On préfixe automatiquement si le modèle contient 'e5'.
+    """
+    m = (model_name or "").lower()
+    if "e5" in m:
+        return [("query: " + t) for t in texts]
+    return texts
 async def embed_hf(client: httpx.AsyncClient, texts: List[str], model: str = HF_EMBED_MODEL, token: str = HF_TOKEN) -> List[List[float]]:
     if not token:
         raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
     url = f"https://api-inference.huggingface.co/models/{model}"
     headers = {"Authorization": f"Bearer {token}"}
+    inputs = _maybe_prefix_for_model(texts, model)
+    payload = {"inputs": inputs, "options": {"wait_for_model": True}}
+    LOG.debug(f"HF POST model={model} n_texts={len(texts)}")
+    r = await client.post(url, headers=headers, json=payload, timeout=180)
     if r.status_code != 200:
         detail = r.text
         LOG.error(f"HF Inference error {r.status_code}: {detail[:400]}")
     return out
 async def embed_texts(client: httpx.AsyncClient, texts: List[str]) -> List[List[float]]:
     if EMB_PROVIDER == "hf":
         try:
             return await embed_hf(client, texts)
             job.finished_at = time.time()
             return
+        async with httpx.AsyncClient(timeout=180) as client:
             # Warmup dim
             warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
             vec_dim = len(warmup_vec)
     return data
 # ------------------------------------------------------------------------------
+# Gradio UI (avec Status + Auto-refresh)
 # ------------------------------------------------------------------------------
 def _default_two_docs() -> List[Dict[str, str]]:
     a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
     try:
         data = await index(req)
         job_id = data["job_id"]
+        # On retourne ET le message ET le job_id pour remplir le champ
+        return f"🚀 Job lancé: {job_id}", job_id
     except ValidationError as ve:
+        return f"❌ Payload invalide: {ve}", ""
     except Exception as e:
         LOG.exception("index UI error")
+        return f"❌ Index erreur: {e}", ""
 async def ui_status(job_id: str):
     if not job_id.strip():
                 f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`")
     with gr.Row():
         project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
+        jobid_tb = gr.Textbox(label="Job ID", value="", interactive=True)
     with gr.Row():
         wipe_btn = gr.Button("🧨 Wipe collection", variant="stop")
         index_btn = gr.Button("🚀 Indexer 2 documents", variant="primary")
         batch_size = gr.Slider(1, 128, value=32, step=1, label="batch_size")
         store_text = gr.Checkbox(value=True, label="store_text (payload)")
     out_log = gr.Textbox(lines=18, label="Logs / Résultats", interactive=False)
+    with gr.Row():
+        status_btn = gr.Button("📡 Status (refresh)")
+        auto_chk = gr.Checkbox(False, label="⏱️ Auto-refresh status (2 s)")
     with gr.Row():
         query_tb = gr.Textbox(label="Query text", value="alpha bravo")
         topk = gr.Slider(1, 20, value=5, step=1, label="top_k")
         query_btn = gr.Button("🔎 Query")
     query_out = gr.Textbox(lines=10, label="Résultats Query", interactive=False)
+    # Liens UI
     wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
+    # index renvoie (message, job_id)
+    index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log, jobid_tb])
     count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
+    # Status bouton manuel
+    status_btn.click(ui_status, inputs=[jobid_tb], outputs=[out_log])
+    # Auto-refresh avec Timer (toutes les 2s si coché)
+    timer = gr.Timer(2.0, active=False)
+    timer.tick(ui_status, inputs=[jobid_tb], outputs=[out_log])
+    auto_chk.change(lambda x: gr.update(active=x), inputs=auto_chk, outputs=timer)
 # Monte l'UI Gradio sur la FastAPI au chemin UI_PATH
 app = gr.mount_gradio_app(fastapi_app, ui, path=UI_PATH)