Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
-
HF Space - main.py de substitution pour tests Qdrant / indexation minimale (robuste)
|
| 4 |
|
| 5 |
Endpoints:
|
| 6 |
- GET / → redirige vers UI_PATH (défaut: /ui)
|
|
@@ -20,7 +20,7 @@ ENV:
|
|
| 20 |
- EMB_PROVIDER ("hf" par défaut, "dummy" sinon)
|
| 21 |
- HF_EMBED_MODEL (défaut "BAAI/bge-m3")
|
| 22 |
- HUGGINGFACEHUB_API_TOKEN (si EMB_PROVIDER=hf)
|
| 23 |
-
- EMB_FALLBACK_TO_DUMMY (true/false) → si vrai, bascule dummy si HF
|
| 24 |
- LOG_LEVEL (défaut DEBUG)
|
| 25 |
- PORT (fourni par HF, défaut 7860)
|
| 26 |
- UI_PATH (défaut "/ui")
|
|
@@ -41,7 +41,7 @@ import uvicorn
|
|
| 41 |
from pydantic import BaseModel, Field, ValidationError
|
| 42 |
from fastapi import FastAPI, HTTPException, Query
|
| 43 |
from fastapi.middleware.cors import CORSMiddleware
|
| 44 |
-
from fastapi.responses import RedirectResponse
|
| 45 |
import gradio as gr
|
| 46 |
|
| 47 |
# ------------------------------------------------------------------------------
|
|
@@ -70,9 +70,11 @@ if not QDRANT_URL or not QDRANT_API_KEY:
|
|
| 70 |
LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera.")
|
| 71 |
|
| 72 |
if EMB_PROVIDER == "hf" and not HF_TOKEN:
|
| 73 |
-
LOG.warning(
|
| 74 |
-
|
| 75 |
-
|
|
|
|
|
|
|
| 76 |
|
| 77 |
# ------------------------------------------------------------------------------
|
| 78 |
# Schémas Pydantic
|
|
@@ -212,13 +214,25 @@ async def qdrant_search(client: httpx.AsyncClient, coll: str, vector: List[float
|
|
| 212 |
# ------------------------------------------------------------------------------
|
| 213 |
# Embeddings (HF Inference ou dummy)
|
| 214 |
# ------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
async def embed_hf(client: httpx.AsyncClient, texts: List[str], model: str = HF_EMBED_MODEL, token: str = HF_TOKEN) -> List[List[float]]:
|
| 216 |
if not token:
|
| 217 |
raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
|
| 218 |
url = f"https://api-inference.huggingface.co/models/{model}"
|
| 219 |
headers = {"Authorization": f"Bearer {token}"}
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
| 222 |
if r.status_code != 200:
|
| 223 |
detail = r.text
|
| 224 |
LOG.error(f"HF Inference error {r.status_code}: {detail[:400]}")
|
|
@@ -245,7 +259,6 @@ def embed_dummy(texts: List[str], dim: int = 128) -> List[List[float]]:
|
|
| 245 |
return out
|
| 246 |
|
| 247 |
async def embed_texts(client: httpx.AsyncClient, texts: List[str]) -> List[List[float]]:
|
| 248 |
-
# Fallback optionnel si HF indisponible
|
| 249 |
if EMB_PROVIDER == "hf":
|
| 250 |
try:
|
| 251 |
return await embed_hf(client, texts)
|
|
@@ -295,7 +308,7 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
|
|
| 295 |
job.finished_at = time.time()
|
| 296 |
return
|
| 297 |
|
| 298 |
-
async with httpx.AsyncClient(timeout=
|
| 299 |
# Warmup dim
|
| 300 |
warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
|
| 301 |
vec_dim = len(warmup_vec)
|
|
@@ -440,7 +453,7 @@ async def query(req: QueryRequest):
|
|
| 440 |
return data
|
| 441 |
|
| 442 |
# ------------------------------------------------------------------------------
|
| 443 |
-
# Gradio UI
|
| 444 |
# ------------------------------------------------------------------------------
|
| 445 |
def _default_two_docs() -> List[Dict[str, str]]:
|
| 446 |
a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
|
|
@@ -468,12 +481,13 @@ async def ui_index_sample(project: str, chunk_size: int, overlap: int, batch_siz
|
|
| 468 |
try:
|
| 469 |
data = await index(req)
|
| 470 |
job_id = data["job_id"]
|
| 471 |
-
|
|
|
|
| 472 |
except ValidationError as ve:
|
| 473 |
-
return f"❌ Payload invalide: {ve}"
|
| 474 |
except Exception as e:
|
| 475 |
LOG.exception("index UI error")
|
| 476 |
-
return f"❌ Index erreur: {e}"
|
| 477 |
|
| 478 |
async def ui_status(job_id: str):
|
| 479 |
if not job_id.strip():
|
|
@@ -525,7 +539,7 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
|
|
| 525 |
f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`")
|
| 526 |
with gr.Row():
|
| 527 |
project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
|
| 528 |
-
jobid_tb = gr.Textbox(label="Job ID
|
| 529 |
with gr.Row():
|
| 530 |
wipe_btn = gr.Button("🧨 Wipe collection", variant="stop")
|
| 531 |
index_btn = gr.Button("🚀 Indexer 2 documents", variant="primary")
|
|
@@ -536,16 +550,30 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
|
|
| 536 |
batch_size = gr.Slider(1, 128, value=32, step=1, label="batch_size")
|
| 537 |
store_text = gr.Checkbox(value=True, label="store_text (payload)")
|
| 538 |
out_log = gr.Textbox(lines=18, label="Logs / Résultats", interactive=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 539 |
with gr.Row():
|
| 540 |
query_tb = gr.Textbox(label="Query text", value="alpha bravo")
|
| 541 |
topk = gr.Slider(1, 20, value=5, step=1, label="top_k")
|
| 542 |
query_btn = gr.Button("🔎 Query")
|
| 543 |
query_out = gr.Textbox(lines=10, label="Résultats Query", interactive=False)
|
| 544 |
|
|
|
|
| 545 |
wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
|
| 546 |
-
|
|
|
|
| 547 |
count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
|
| 548 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
|
| 550 |
# Monte l'UI Gradio sur la FastAPI au chemin UI_PATH
|
| 551 |
app = gr.mount_gradio_app(fastapi_app, ui, path=UI_PATH)
|
|
|
|
| 1 |
# -*- coding: utf-8 -*-
|
| 2 |
"""
|
| 3 |
+
HF Space - main.py de substitution pour tests Qdrant / indexation minimale (robuste + auto-refresh)
|
| 4 |
|
| 5 |
Endpoints:
|
| 6 |
- GET / → redirige vers UI_PATH (défaut: /ui)
|
|
|
|
| 20 |
- EMB_PROVIDER ("hf" par défaut, "dummy" sinon)
|
| 21 |
- HF_EMBED_MODEL (défaut "BAAI/bge-m3")
|
| 22 |
- HUGGINGFACEHUB_API_TOKEN (si EMB_PROVIDER=hf)
|
| 23 |
+
- EMB_FALLBACK_TO_DUMMY (true/false) → si vrai, bascule dummy si HF échoue
|
| 24 |
- LOG_LEVEL (défaut DEBUG)
|
| 25 |
- PORT (fourni par HF, défaut 7860)
|
| 26 |
- UI_PATH (défaut "/ui")
|
|
|
|
| 41 |
from pydantic import BaseModel, Field, ValidationError
|
| 42 |
from fastapi import FastAPI, HTTPException, Query
|
| 43 |
from fastapi.middleware.cors import CORSMiddleware
|
| 44 |
+
from fastapi.responses import RedirectResponse
|
| 45 |
import gradio as gr
|
| 46 |
|
| 47 |
# ------------------------------------------------------------------------------
|
|
|
|
| 70 |
LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera.")
|
| 71 |
|
| 72 |
if EMB_PROVIDER == "hf" and not HF_TOKEN:
|
| 73 |
+
LOG.warning(
|
| 74 |
+
"EMB_PROVIDER=hf sans HUGGINGFACEHUB_API_TOKEN. "
|
| 75 |
+
"→ soit définis le token, soit mets EMB_PROVIDER=dummy, "
|
| 76 |
+
"soit active EMB_FALLBACK_TO_DUMMY=true."
|
| 77 |
+
)
|
| 78 |
|
| 79 |
# ------------------------------------------------------------------------------
|
| 80 |
# Schémas Pydantic
|
|
|
|
| 214 |
# ------------------------------------------------------------------------------
|
| 215 |
# Embeddings (HF Inference ou dummy)
|
| 216 |
# ------------------------------------------------------------------------------
|
| 217 |
+
def _maybe_prefix_for_model(texts: List[str], model_name: str) -> List[str]:
|
| 218 |
+
"""
|
| 219 |
+
E5 attend en pratique des préfixes 'query: ' (ou 'passage: ' / 'document: ').
|
| 220 |
+
On préfixe automatiquement si le modèle contient 'e5'.
|
| 221 |
+
"""
|
| 222 |
+
m = (model_name or "").lower()
|
| 223 |
+
if "e5" in m:
|
| 224 |
+
return [("query: " + t) for t in texts]
|
| 225 |
+
return texts
|
| 226 |
+
|
| 227 |
async def embed_hf(client: httpx.AsyncClient, texts: List[str], model: str = HF_EMBED_MODEL, token: str = HF_TOKEN) -> List[List[float]]:
|
| 228 |
if not token:
|
| 229 |
raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
|
| 230 |
url = f"https://api-inference.huggingface.co/models/{model}"
|
| 231 |
headers = {"Authorization": f"Bearer {token}"}
|
| 232 |
+
inputs = _maybe_prefix_for_model(texts, model)
|
| 233 |
+
payload = {"inputs": inputs, "options": {"wait_for_model": True}}
|
| 234 |
+
LOG.debug(f"HF POST model={model} n_texts={len(texts)}")
|
| 235 |
+
r = await client.post(url, headers=headers, json=payload, timeout=180)
|
| 236 |
if r.status_code != 200:
|
| 237 |
detail = r.text
|
| 238 |
LOG.error(f"HF Inference error {r.status_code}: {detail[:400]}")
|
|
|
|
| 259 |
return out
|
| 260 |
|
| 261 |
async def embed_texts(client: httpx.AsyncClient, texts: List[str]) -> List[List[float]]:
|
|
|
|
| 262 |
if EMB_PROVIDER == "hf":
|
| 263 |
try:
|
| 264 |
return await embed_hf(client, texts)
|
|
|
|
| 308 |
job.finished_at = time.time()
|
| 309 |
return
|
| 310 |
|
| 311 |
+
async with httpx.AsyncClient(timeout=180) as client:
|
| 312 |
# Warmup dim
|
| 313 |
warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
|
| 314 |
vec_dim = len(warmup_vec)
|
|
|
|
| 453 |
return data
|
| 454 |
|
| 455 |
# ------------------------------------------------------------------------------
|
| 456 |
+
# Gradio UI (avec Status + Auto-refresh)
|
| 457 |
# ------------------------------------------------------------------------------
|
| 458 |
def _default_two_docs() -> List[Dict[str, str]]:
|
| 459 |
a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
|
|
|
|
| 481 |
try:
|
| 482 |
data = await index(req)
|
| 483 |
job_id = data["job_id"]
|
| 484 |
+
# On retourne ET le message ET le job_id pour remplir le champ
|
| 485 |
+
return f"🚀 Job lancé: {job_id}", job_id
|
| 486 |
except ValidationError as ve:
|
| 487 |
+
return f"❌ Payload invalide: {ve}", ""
|
| 488 |
except Exception as e:
|
| 489 |
LOG.exception("index UI error")
|
| 490 |
+
return f"❌ Index erreur: {e}", ""
|
| 491 |
|
| 492 |
async def ui_status(job_id: str):
|
| 493 |
if not job_id.strip():
|
|
|
|
| 539 |
f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`")
|
| 540 |
with gr.Row():
|
| 541 |
project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
|
| 542 |
+
jobid_tb = gr.Textbox(label="Job ID", value="", interactive=True)
|
| 543 |
with gr.Row():
|
| 544 |
wipe_btn = gr.Button("🧨 Wipe collection", variant="stop")
|
| 545 |
index_btn = gr.Button("🚀 Indexer 2 documents", variant="primary")
|
|
|
|
| 550 |
batch_size = gr.Slider(1, 128, value=32, step=1, label="batch_size")
|
| 551 |
store_text = gr.Checkbox(value=True, label="store_text (payload)")
|
| 552 |
out_log = gr.Textbox(lines=18, label="Logs / Résultats", interactive=False)
|
| 553 |
+
|
| 554 |
+
with gr.Row():
|
| 555 |
+
status_btn = gr.Button("📡 Status (refresh)")
|
| 556 |
+
auto_chk = gr.Checkbox(False, label="⏱️ Auto-refresh status (2 s)")
|
| 557 |
+
|
| 558 |
with gr.Row():
|
| 559 |
query_tb = gr.Textbox(label="Query text", value="alpha bravo")
|
| 560 |
topk = gr.Slider(1, 20, value=5, step=1, label="top_k")
|
| 561 |
query_btn = gr.Button("🔎 Query")
|
| 562 |
query_out = gr.Textbox(lines=10, label="Résultats Query", interactive=False)
|
| 563 |
|
| 564 |
+
# Liens UI
|
| 565 |
wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
|
| 566 |
+
# index renvoie (message, job_id)
|
| 567 |
+
index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log, jobid_tb])
|
| 568 |
count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
|
| 569 |
+
|
| 570 |
+
# Status bouton manuel
|
| 571 |
+
status_btn.click(ui_status, inputs=[jobid_tb], outputs=[out_log])
|
| 572 |
+
|
| 573 |
+
# Auto-refresh avec Timer (toutes les 2s si coché)
|
| 574 |
+
timer = gr.Timer(2.0, active=False)
|
| 575 |
+
timer.tick(ui_status, inputs=[jobid_tb], outputs=[out_log])
|
| 576 |
+
auto_chk.change(lambda x: gr.update(active=x), inputs=auto_chk, outputs=timer)
|
| 577 |
|
| 578 |
# Monte l'UI Gradio sur la FastAPI au chemin UI_PATH
|
| 579 |
app = gr.mount_gradio_app(fastapi_app, ui, path=UI_PATH)
|