chouchouvs commited on
Commit
6520d03
·
verified ·
1 Parent(s): d2eb63c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +45 -17
main.py CHANGED
@@ -1,6 +1,6 @@
1
  # -*- coding: utf-8 -*-
2
  """
3
- HF Space - main.py de substitution pour tests Qdrant / indexation minimale (robuste)
4
 
5
  Endpoints:
6
  - GET / → redirige vers UI_PATH (défaut: /ui)
@@ -20,7 +20,7 @@ ENV:
20
  - EMB_PROVIDER ("hf" par défaut, "dummy" sinon)
21
  - HF_EMBED_MODEL (défaut "BAAI/bge-m3")
22
  - HUGGINGFACEHUB_API_TOKEN (si EMB_PROVIDER=hf)
23
- - EMB_FALLBACK_TO_DUMMY (true/false) → si vrai, bascule dummy si HF indisponible
24
  - LOG_LEVEL (défaut DEBUG)
25
  - PORT (fourni par HF, défaut 7860)
26
  - UI_PATH (défaut "/ui")
@@ -41,7 +41,7 @@ import uvicorn
41
  from pydantic import BaseModel, Field, ValidationError
42
  from fastapi import FastAPI, HTTPException, Query
43
  from fastapi.middleware.cors import CORSMiddleware
44
- from fastapi.responses import RedirectResponse, JSONResponse
45
  import gradio as gr
46
 
47
  # ------------------------------------------------------------------------------
@@ -70,9 +70,11 @@ if not QDRANT_URL or not QDRANT_API_KEY:
70
  LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera.")
71
 
72
  if EMB_PROVIDER == "hf" and not HF_TOKEN:
73
- LOG.warning("EMB_PROVIDER=hf sans HUGGINGFACEHUB_API_TOKEN. "
74
- " soit définis le token, soit mets EMB_PROVIDER=dummy, "
75
- "soit active EMB_FALLBACK_TO_DUMMY=true.")
 
 
76
 
77
  # ------------------------------------------------------------------------------
78
  # Schémas Pydantic
@@ -212,13 +214,25 @@ async def qdrant_search(client: httpx.AsyncClient, coll: str, vector: List[float
212
  # ------------------------------------------------------------------------------
213
  # Embeddings (HF Inference ou dummy)
214
  # ------------------------------------------------------------------------------
 
 
 
 
 
 
 
 
 
 
215
  async def embed_hf(client: httpx.AsyncClient, texts: List[str], model: str = HF_EMBED_MODEL, token: str = HF_TOKEN) -> List[List[float]]:
216
  if not token:
217
  raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
218
  url = f"https://api-inference.huggingface.co/models/{model}"
219
  headers = {"Authorization": f"Bearer {token}"}
220
- payload = {"inputs": texts, "options": {"wait_for_model": True}}
221
- r = await client.post(url, headers=headers, json=payload, timeout=120)
 
 
222
  if r.status_code != 200:
223
  detail = r.text
224
  LOG.error(f"HF Inference error {r.status_code}: {detail[:400]}")
@@ -245,7 +259,6 @@ def embed_dummy(texts: List[str], dim: int = 128) -> List[List[float]]:
245
  return out
246
 
247
  async def embed_texts(client: httpx.AsyncClient, texts: List[str]) -> List[List[float]]:
248
- # Fallback optionnel si HF indisponible
249
  if EMB_PROVIDER == "hf":
250
  try:
251
  return await embed_hf(client, texts)
@@ -295,7 +308,7 @@ async def run_index_job(job: JobState, req: IndexRequest) -> None:
295
  job.finished_at = time.time()
296
  return
297
 
298
- async with httpx.AsyncClient(timeout=120) as client:
299
  # Warmup dim
300
  warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
301
  vec_dim = len(warmup_vec)
@@ -440,7 +453,7 @@ async def query(req: QueryRequest):
440
  return data
441
 
442
  # ------------------------------------------------------------------------------
443
- # Gradio UI
444
  # ------------------------------------------------------------------------------
445
  def _default_two_docs() -> List[Dict[str, str]]:
446
  a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
@@ -468,12 +481,13 @@ async def ui_index_sample(project: str, chunk_size: int, overlap: int, batch_siz
468
  try:
469
  data = await index(req)
470
  job_id = data["job_id"]
471
- return f"🚀 Job lancé: {job_id}"
 
472
  except ValidationError as ve:
473
- return f"❌ Payload invalide: {ve}"
474
  except Exception as e:
475
  LOG.exception("index UI error")
476
- return f"❌ Index erreur: {e}"
477
 
478
  async def ui_status(job_id: str):
479
  if not job_id.strip():
@@ -525,7 +539,7 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
525
  f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`")
526
  with gr.Row():
527
  project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
528
- jobid_tb = gr.Textbox(label="Job ID (pour Status)", value="", interactive=True)
529
  with gr.Row():
530
  wipe_btn = gr.Button("🧨 Wipe collection", variant="stop")
531
  index_btn = gr.Button("🚀 Indexer 2 documents", variant="primary")
@@ -536,16 +550,30 @@ with gr.Blocks(title="Remote Indexer - Minimal Test", analytics_enabled=False) a
536
  batch_size = gr.Slider(1, 128, value=32, step=1, label="batch_size")
537
  store_text = gr.Checkbox(value=True, label="store_text (payload)")
538
  out_log = gr.Textbox(lines=18, label="Logs / Résultats", interactive=False)
 
 
 
 
 
539
  with gr.Row():
540
  query_tb = gr.Textbox(label="Query text", value="alpha bravo")
541
  topk = gr.Slider(1, 20, value=5, step=1, label="top_k")
542
  query_btn = gr.Button("🔎 Query")
543
  query_out = gr.Textbox(lines=10, label="Résultats Query", interactive=False)
544
 
 
545
  wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
546
- index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log])
 
547
  count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
548
- query_btn.click(ui_query, inputs=[project_tb, query_tb, topk], outputs=[query_out])
 
 
 
 
 
 
 
549
 
550
  # Monte l'UI Gradio sur la FastAPI au chemin UI_PATH
551
  app = gr.mount_gradio_app(fastapi_app, ui, path=UI_PATH)
 
1
  # -*- coding: utf-8 -*-
2
  """
3
+ HF Space - main.py de substitution pour tests Qdrant / indexation minimale (robuste + auto-refresh)
4
 
5
  Endpoints:
6
  - GET / → redirige vers UI_PATH (défaut: /ui)
 
20
  - EMB_PROVIDER ("hf" par défaut, "dummy" sinon)
21
  - HF_EMBED_MODEL (défaut "BAAI/bge-m3")
22
  - HUGGINGFACEHUB_API_TOKEN (si EMB_PROVIDER=hf)
23
+ - EMB_FALLBACK_TO_DUMMY (true/false) → si vrai, bascule dummy si HF échoue
24
  - LOG_LEVEL (défaut DEBUG)
25
  - PORT (fourni par HF, défaut 7860)
26
  - UI_PATH (défaut "/ui")
 
41
  from pydantic import BaseModel, Field, ValidationError
42
  from fastapi import FastAPI, HTTPException, Query
43
  from fastapi.middleware.cors import CORSMiddleware
44
+ from fastapi.responses import RedirectResponse
45
  import gradio as gr
46
 
47
  # ------------------------------------------------------------------------------
 
70
  LOG.warning("QDRANT_URL / QDRANT_API_KEY non fournis : l'upsert échouera.")
71
 
72
  if EMB_PROVIDER == "hf" and not HF_TOKEN:
73
+ LOG.warning(
74
+ "EMB_PROVIDER=hf sans HUGGINGFACEHUB_API_TOKEN. "
75
+ "soit définis le token, soit mets EMB_PROVIDER=dummy, "
76
+ "soit active EMB_FALLBACK_TO_DUMMY=true."
77
+ )
78
 
79
  # ------------------------------------------------------------------------------
80
  # Schémas Pydantic
 
214
  # ------------------------------------------------------------------------------
215
  # Embeddings (HF Inference ou dummy)
216
  # ------------------------------------------------------------------------------
217
+ def _maybe_prefix_for_model(texts: List[str], model_name: str) -> List[str]:
218
+ """
219
+ E5 attend en pratique des préfixes 'query: ' (ou 'passage: ' / 'document: ').
220
+ On préfixe automatiquement si le modèle contient 'e5'.
221
+ """
222
+ m = (model_name or "").lower()
223
+ if "e5" in m:
224
+ return [("query: " + t) for t in texts]
225
+ return texts
226
+
227
  async def embed_hf(client: httpx.AsyncClient, texts: List[str], model: str = HF_EMBED_MODEL, token: str = HF_TOKEN) -> List[List[float]]:
228
  if not token:
229
  raise HTTPException(status_code=400, detail="HUGGINGFACEHUB_API_TOKEN manquant pour EMB_PROVIDER=hf")
230
  url = f"https://api-inference.huggingface.co/models/{model}"
231
  headers = {"Authorization": f"Bearer {token}"}
232
+ inputs = _maybe_prefix_for_model(texts, model)
233
+ payload = {"inputs": inputs, "options": {"wait_for_model": True}}
234
+ LOG.debug(f"HF POST model={model} n_texts={len(texts)}")
235
+ r = await client.post(url, headers=headers, json=payload, timeout=180)
236
  if r.status_code != 200:
237
  detail = r.text
238
  LOG.error(f"HF Inference error {r.status_code}: {detail[:400]}")
 
259
  return out
260
 
261
  async def embed_texts(client: httpx.AsyncClient, texts: List[str]) -> List[List[float]]:
 
262
  if EMB_PROVIDER == "hf":
263
  try:
264
  return await embed_hf(client, texts)
 
308
  job.finished_at = time.time()
309
  return
310
 
311
+ async with httpx.AsyncClient(timeout=180) as client:
312
  # Warmup dim
313
  warmup_vec = (await embed_texts(client, [records[0]["raw"]]))[0]
314
  vec_dim = len(warmup_vec)
 
453
  return data
454
 
455
  # ------------------------------------------------------------------------------
456
+ # Gradio UI (avec Status + Auto-refresh)
457
  # ------------------------------------------------------------------------------
458
  def _default_two_docs() -> List[Dict[str, str]]:
459
  a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
 
481
  try:
482
  data = await index(req)
483
  job_id = data["job_id"]
484
+ # On retourne ET le message ET le job_id pour remplir le champ
485
+ return f"🚀 Job lancé: {job_id}", job_id
486
  except ValidationError as ve:
487
+ return f"❌ Payload invalide: {ve}", ""
488
  except Exception as e:
489
  LOG.exception("index UI error")
490
+ return f"❌ Index erreur: {e}", ""
491
 
492
  async def ui_status(job_id: str):
493
  if not job_id.strip():
 
539
  f"- **Qdrant**: `{'OK' if QDRANT_URL else 'ABSENT'}`")
540
  with gr.Row():
541
  project_tb = gr.Textbox(label="Project ID", value="DEEPWEB")
542
+ jobid_tb = gr.Textbox(label="Job ID", value="", interactive=True)
543
  with gr.Row():
544
  wipe_btn = gr.Button("🧨 Wipe collection", variant="stop")
545
  index_btn = gr.Button("🚀 Indexer 2 documents", variant="primary")
 
550
  batch_size = gr.Slider(1, 128, value=32, step=1, label="batch_size")
551
  store_text = gr.Checkbox(value=True, label="store_text (payload)")
552
  out_log = gr.Textbox(lines=18, label="Logs / Résultats", interactive=False)
553
+
554
+ with gr.Row():
555
+ status_btn = gr.Button("📡 Status (refresh)")
556
+ auto_chk = gr.Checkbox(False, label="⏱️ Auto-refresh status (2 s)")
557
+
558
  with gr.Row():
559
  query_tb = gr.Textbox(label="Query text", value="alpha bravo")
560
  topk = gr.Slider(1, 20, value=5, step=1, label="top_k")
561
  query_btn = gr.Button("🔎 Query")
562
  query_out = gr.Textbox(lines=10, label="Résultats Query", interactive=False)
563
 
564
+ # Liens UI
565
  wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
566
+ # index renvoie (message, job_id)
567
+ index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log, jobid_tb])
568
  count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
569
+
570
+ # Status bouton manuel
571
+ status_btn.click(ui_status, inputs=[jobid_tb], outputs=[out_log])
572
+
573
+ # Auto-refresh avec Timer (toutes les 2s si coché)
574
+ timer = gr.Timer(2.0, active=False)
575
+ timer.tick(ui_status, inputs=[jobid_tb], outputs=[out_log])
576
+ auto_chk.change(lambda x: gr.update(active=x), inputs=auto_chk, outputs=timer)
577
 
578
  # Monte l'UI Gradio sur la FastAPI au chemin UI_PATH
579
  app = gr.mount_gradio_app(fastapi_app, ui, path=UI_PATH)