Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
|
@@ -33,13 +33,13 @@ EMB_BACKEND_ORDER = [
|
|
| 33 |
|
| 34 |
# --- DeepInfra Embeddings (OpenAI-like) ---
|
| 35 |
DI_TOKEN = os.getenv("DEEPINFRA_API_KEY", "").strip()
|
| 36 |
-
DI_MODEL = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip()
|
| 37 |
DI_URL = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
|
| 38 |
DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
|
| 39 |
|
| 40 |
# --- Hugging Face Inference API ---
|
| 41 |
HF_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
|
| 42 |
-
HF_MODEL = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
|
| 43 |
HF_URL_PIPE = os.getenv("HF_API_URL_PIPELINE", "").strip() or (
|
| 44 |
f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}"
|
| 45 |
)
|
|
@@ -492,8 +492,8 @@ def run_index_job(job_id: str, req: IndexRequest):
|
|
| 492 |
_set_status(job_id, "running")
|
| 493 |
_append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER} | store={VECTOR_STORE} (deterministic_ids={QDRANT_DETERMINISTIC_IDS}, mode={QDRANT_ID_MODE})")
|
| 494 |
LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
|
|
|
|
| 495 |
# --- DEBUG DIAGNOSTIC (INSÈRE ICI) ---
|
| 496 |
-
# Log sample files received (first N)
|
| 497 |
try:
|
| 498 |
N_SAMPLE = 6
|
| 499 |
sample = req.files[:N_SAMPLE]
|
|
@@ -504,10 +504,8 @@ def run_index_job(job_id: str, req: IndexRequest):
|
|
| 504 |
h = hashlib.blake2b(t.encode("utf-8", "ignore"), digest_size=8).hexdigest()
|
| 505 |
seen_hashes.setdefault(h, []).append(p)
|
| 506 |
LOG.info(f"[{job_id}] recv file #{fidx}: path={p!r} len_text={len(t)} hash8={h} preview={repr(t[:120])}")
|
| 507 |
-
# summary
|
| 508 |
if len(req.files) > N_SAMPLE:
|
| 509 |
LOG.info(f"[{job_id}] ... and {len(req.files)-N_SAMPLE} more files")
|
| 510 |
-
# If most files share same hash, warn
|
| 511 |
if len(seen_hashes) == 1 and len(req.files) > 1:
|
| 512 |
_append_log(job_id, "⚠️ All received files appear IDENTICAL (same hash). Possible client-side bug.")
|
| 513 |
LOG.warning("[%s] All files identical by hash8=%s", job_id, list(seen_hashes.keys())[0])
|
|
@@ -525,15 +523,25 @@ def run_index_job(job_id: str, req: IndexRequest):
|
|
| 525 |
except Exception as e:
|
| 526 |
_append_log(job_id, f"Wipe failed (ignored): {e}")
|
| 527 |
|
| 528 |
-
#
|
| 529 |
-
|
| 530 |
-
|
|
|
|
|
|
|
|
|
|
| 531 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 532 |
LOG.info(f"[{job_id}] warmup embeddings shape = {embs.shape} dtype={embs.dtype}")
|
| 533 |
-
_append_log(job_id, f"warmup embeddings shape = {embs.shape}")
|
| 534 |
-
except Exception:
|
| 535 |
-
|
| 536 |
-
|
|
|
|
|
|
|
|
|
|
| 537 |
# If using QdrantStore: check existing collection vector size and warn if mismatch
|
| 538 |
if isinstance(STORE, QdrantStore):
|
| 539 |
try:
|
|
@@ -549,7 +557,7 @@ def run_index_job(job_id: str, req: IndexRequest):
|
|
| 549 |
cfg = info.get("result", info)
|
| 550 |
vectors = cfg.get("params", {}).get("vectors", {})
|
| 551 |
existing_size = int(vectors.get("size")) if vectors else None
|
| 552 |
-
|
| 553 |
if existing_size and existing_size != dim:
|
| 554 |
msg = (f"Qdrant collection {col} already exists with dim={existing_size} but embeddings dim={dim}. "
|
| 555 |
"This will likely cause vectors to be rejected. Consider wiping or recreating collection.")
|
|
@@ -698,6 +706,8 @@ def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_tok
|
|
| 698 |
_check_backend_ready()
|
| 699 |
job_id = uuid.uuid4().hex[:12]
|
| 700 |
JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
|
|
|
|
|
|
|
| 701 |
background_tasks.add_task(run_index_job, job_id, req)
|
| 702 |
return {"job_id": job_id}
|
| 703 |
|
|
@@ -707,7 +717,8 @@ def status_path(job_id: str, x_auth_token: Optional[str] = Header(default=None))
|
|
| 707 |
_auth(x_auth_token)
|
| 708 |
j = JOBS.get(job_id)
|
| 709 |
if not j:
|
| 710 |
-
|
|
|
|
| 711 |
return {"status": j["status"], "logs": j["logs"][-1500:]}
|
| 712 |
|
| 713 |
@app.get("/status")
|
|
@@ -760,4 +771,4 @@ if __name__ == "__main__":
|
|
| 760 |
import uvicorn
|
| 761 |
port = int(os.getenv("PORT", "7860"))
|
| 762 |
LOG.info(f"===== Application Startup on PORT {port} =====")
|
| 763 |
-
uvicorn.run(app, host="0.0.0.0", port=port)
|
|
|
|
| 33 |
|
| 34 |
# --- DeepInfra Embeddings (OpenAI-like) ---
|
| 35 |
DI_TOKEN = os.getenv("DEEPINFRA_API_KEY", "").strip()
|
| 36 |
+
DI_MODEL = os.getenv("DEEPINFRA_EMBED_MODEL", "BAAI/bge-m3").strip()
|
| 37 |
DI_URL = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/openai/embeddings").strip()
|
| 38 |
DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
|
| 39 |
|
| 40 |
# --- Hugging Face Inference API ---
|
| 41 |
HF_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
|
| 42 |
+
HF_MODEL = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
|
| 43 |
HF_URL_PIPE = os.getenv("HF_API_URL_PIPELINE", "").strip() or (
|
| 44 |
f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}"
|
| 45 |
)
|
|
|
|
| 492 |
_set_status(job_id, "running")
|
| 493 |
_append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER} | store={VECTOR_STORE} (deterministic_ids={QDRANT_DETERMINISTIC_IDS}, mode={QDRANT_ID_MODE})")
|
| 494 |
LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
|
| 495 |
+
|
| 496 |
# --- DEBUG DIAGNOSTIC (INSÈRE ICI) ---
|
|
|
|
| 497 |
try:
|
| 498 |
N_SAMPLE = 6
|
| 499 |
sample = req.files[:N_SAMPLE]
|
|
|
|
| 504 |
h = hashlib.blake2b(t.encode("utf-8", "ignore"), digest_size=8).hexdigest()
|
| 505 |
seen_hashes.setdefault(h, []).append(p)
|
| 506 |
LOG.info(f"[{job_id}] recv file #{fidx}: path={p!r} len_text={len(t)} hash8={h} preview={repr(t[:120])}")
|
|
|
|
| 507 |
if len(req.files) > N_SAMPLE:
|
| 508 |
LOG.info(f"[{job_id}] ... and {len(req.files)-N_SAMPLE} more files")
|
|
|
|
| 509 |
if len(seen_hashes) == 1 and len(req.files) > 1:
|
| 510 |
_append_log(job_id, "⚠️ All received files appear IDENTICAL (same hash). Possible client-side bug.")
|
| 511 |
LOG.warning("[%s] All files identical by hash8=%s", job_id, list(seen_hashes.keys())[0])
|
|
|
|
| 523 |
except Exception as e:
|
| 524 |
_append_log(job_id, f"Wipe failed (ignored): {e}")
|
| 525 |
|
| 526 |
+
# --- WARMUP: calculer un embedding de test pour déterminer la dimension (dim) ---
|
| 527 |
+
# On prend un chunk de départ (ou une string 'warmup' si pas de fichiers)
|
| 528 |
+
if req.files:
|
| 529 |
+
warm_text = next(_chunk_with_spans(req.files[0].text or "", req.chunk_size, req.overlap))[2]
|
| 530 |
+
else:
|
| 531 |
+
warm_text = "warmup"
|
| 532 |
try:
|
| 533 |
+
embs, sz = _post_embeddings([warm_text], job_id=job_id)
|
| 534 |
+
if embs is None or embs.ndim != 2:
|
| 535 |
+
raise RuntimeError("Warmup embeddings invalid shape")
|
| 536 |
+
dim = int(embs.shape[1])
|
| 537 |
LOG.info(f"[{job_id}] warmup embeddings shape = {embs.shape} dtype={embs.dtype}")
|
| 538 |
+
_append_log(job_id, f"warmup embeddings shape = {embs.shape} dim={dim}")
|
| 539 |
+
except Exception as e:
|
| 540 |
+
LOG.exception("[%s] Warmup embeddings failed: %s", job_id, e)
|
| 541 |
+
_append_log(job_id, f"Warmup embeddings failed: {e}")
|
| 542 |
+
_set_status(job_id, "error")
|
| 543 |
+
return
|
| 544 |
+
|
| 545 |
# If using QdrantStore: check existing collection vector size and warn if mismatch
|
| 546 |
if isinstance(STORE, QdrantStore):
|
| 547 |
try:
|
|
|
|
| 557 |
cfg = info.get("result", info)
|
| 558 |
vectors = cfg.get("params", {}).get("vectors", {})
|
| 559 |
existing_size = int(vectors.get("size")) if vectors else None
|
| 560 |
+
|
| 561 |
if existing_size and existing_size != dim:
|
| 562 |
msg = (f"Qdrant collection {col} already exists with dim={existing_size} but embeddings dim={dim}. "
|
| 563 |
"This will likely cause vectors to be rejected. Consider wiping or recreating collection.")
|
|
|
|
| 706 |
_check_backend_ready()
|
| 707 |
job_id = uuid.uuid4().hex[:12]
|
| 708 |
JOBS[job_id] = {"status": "queued", "logs": [], "created": time.time()}
|
| 709 |
+
LOG.info(f"Created job {job_id} for project {req.project_id}")
|
| 710 |
+
_append_log(job_id, f"Job created: {job_id} project={req.project_id}")
|
| 711 |
background_tasks.add_task(run_index_job, job_id, req)
|
| 712 |
return {"job_id": job_id}
|
| 713 |
|
|
|
|
| 717 |
_auth(x_auth_token)
|
| 718 |
j = JOBS.get(job_id)
|
| 719 |
if not j:
|
| 720 |
+
# Response JSON plus explicite pour faciliter le debug côté client
|
| 721 |
+
raise HTTPException(status_code=404, detail={"error": "job inconnu", "advice": "POST /index to create a new job"})
|
| 722 |
return {"status": j["status"], "logs": j["logs"][-1500:]}
|
| 723 |
|
| 724 |
@app.get("/status")
|
|
|
|
| 771 |
import uvicorn
|
| 772 |
port = int(os.getenv("PORT", "7860"))
|
| 773 |
LOG.info(f"===== Application Startup on PORT {port} =====")
|
| 774 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|