chouchouvs commited on
Commit
3b9e413
·
verified ·
1 Parent(s): 1dc9ef1

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +35 -46
main.py CHANGED
@@ -15,25 +15,26 @@ logging.basicConfig(level=logging.INFO, format="%(levelname)s:%(name)s:%(message
15
  LOG = logging.getLogger("remote_indexer")
16
 
17
  # ---------- ENV (config) ----------
18
- # Ordre des backends d'embeddings à essayer (séparés par des virgules). Ex: "hf,deepinfra"
19
- EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", "hf")).split(",") if s.strip()]
 
 
 
 
20
 
21
  # HF Inference API
22
  HF_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
23
  HF_MODEL = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
24
 
25
- # URLs configurables
26
  HF_API_URL_USER = os.getenv("HF_API_URL", "").strip()
27
  HF_API_URL_PIPELINE = os.getenv("HF_API_URL_PIPELINE", "").strip()
28
  HF_API_URL_MODELS = os.getenv("HF_API_URL_MODELS", "").strip()
29
-
30
  if HF_API_URL_USER:
31
  if "/pipeline" in HF_API_URL_USER:
32
  HF_API_URL_PIPELINE = HF_API_URL_USER
33
  else:
34
  HF_API_URL_MODELS = HF_API_URL_USER
35
 
36
- # Défaults
37
  HF_URL_PIPELINE = (HF_API_URL_PIPELINE or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
38
  HF_URL_MODELS = (HF_API_URL_MODELS or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
39
 
@@ -48,15 +49,15 @@ DI_URL = os.getenv("DEEPINFRA_EMBED_URL", "https://api.deepinfra.com/v1/embe
48
  DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
49
 
50
  # Retries
51
- RETRY_MAX = int(os.getenv("EMB_RETRY_MAX", "6")) # tentatives max par backend
52
- RETRY_BASE_SEC = float(os.getenv("EMB_RETRY_BASE", "1.5")) # backoff de base (exponentiel)
53
- RETRY_JITTER = float(os.getenv("EMB_RETRY_JITTER", "0.35")) # jitter fraction (0..1)
54
 
55
  # Qdrant
56
  QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
57
  QDRANT_API = os.getenv("QDRANT_API_KEY", "").strip()
58
 
59
- # Auth d’API du service (simple header)
60
  AUTH_TOKEN = os.getenv("REMOTE_INDEX_TOKEN", "").strip()
61
 
62
  LOG.info(f"Embeddings backend order = {EMB_BACKEND_ORDER}")
@@ -91,8 +92,8 @@ class QueryRequest(BaseModel):
91
  query: str
92
  top_k: int = 6
93
 
94
- # ---------- Jobs store (mémoire) ----------
95
- JOBS: Dict[str, Dict[str, Any]] = {} # {job_id: {"status": "...", "logs": [...], "created": ts}}
96
 
97
  def _append_log(job_id: str, line: str):
98
  job = JOBS.get(job_id)
@@ -106,32 +107,28 @@ def _auth(x_auth: Optional[str]):
106
  if AUTH_TOKEN and (x_auth or "") != AUTH_TOKEN:
107
  raise HTTPException(status_code=401, detail="Unauthorized")
108
 
109
- # ---------- Embeddings backends avec retry ----------
110
  def _retry_sleep(attempt: int):
111
- # backoff exponentiel + jitter
112
  back = (RETRY_BASE_SEC ** attempt)
113
  jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
114
  return max(0.25, back * jitter)
115
 
116
  def _with_task_param(url: str, task: str = "feature-extraction") -> str:
117
- # Ajoute ?task=feature-extraction (ou &task=...) si absent
118
  return url + ("&" if "?" in url else "?") + f"task={task}"
119
 
 
120
  def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str, str]] = None) -> Tuple[np.ndarray, int]:
121
  if not HF_TOKEN:
122
  raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
123
-
124
  headers = {
125
  "Authorization": f"Bearer {HF_TOKEN}",
126
  "Content-Type": "application/json",
127
  "Accept": "application/json",
128
  }
129
- # options.wait_for_model dans le JSON + X-Wait-For-Model côté header -> compat maximale
130
  if HF_WAIT:
131
  payload.setdefault("options", {})["wait_for_model"] = True
132
  headers["X-Wait-For-Model"] = "true"
133
  headers["X-Use-Cache"] = "true"
134
-
135
  if headers_extra:
136
  headers.update(headers_extra)
137
 
@@ -143,7 +140,6 @@ def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str
143
 
144
  data = r.json()
145
  arr = np.array(data, dtype=np.float32)
146
- # data peut être: [tokens, dim], [batch, tokens, dim], [batch, dim], [dim]
147
  if arr.ndim == 3: # [batch, tokens, dim]
148
  arr = arr.mean(axis=1)
149
  elif arr.ndim == 2:
@@ -153,29 +149,20 @@ def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str
153
  else:
154
  raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
155
 
156
- # normalisation L2
157
  norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
158
  arr = arr / norms
159
  return arr.astype(np.float32), size
160
 
161
  def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
162
- """
163
- 1) Essaie PIPELINE feature-extraction (si dispo)
164
- 2) Fallback MODELS + header X-Task: feature-extraction
165
- 3) Si encore 400 à cause de SentenceSimilarityPipeline, force aussi ?task=feature-extraction sur l'URL MODELS
166
- """
167
  payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
168
-
169
  urls = [HF_URL_PIPELINE, HF_URL_MODELS] if HF_PIPELINE_FIRST else [HF_URL_MODELS, HF_URL_PIPELINE]
170
  last_exc: Optional[Exception] = None
171
 
172
  for idx, url in enumerate(urls, 1):
173
  try:
174
  if "/models/" in url:
175
- # 2) MODELS avec header X-Task
176
  return _hf_http(url, payload, headers_extra={"X-Task": "feature-extraction"})
177
  else:
178
- # 1) PIPELINE
179
  return _hf_http(url, payload, headers_extra=None)
180
  except requests.HTTPError as he:
181
  code = he.response.status_code if he.response is not None else 0
@@ -184,7 +171,6 @@ def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
184
  if code in (404, 405, 501) and idx < len(urls):
185
  LOG.warning(f"HF endpoint {url} non dispo ({code}), fallback vers alternative ...")
186
  continue
187
- # Si on a tapé MODELS et reçu SentenceSimilarityPipeline -> réessaie avec ?task=feature-extraction
188
  if "/models/" in url and "SentenceSimilarityPipeline" in (body or ""):
189
  try:
190
  forced_url = _with_task_param(url, "feature-extraction")
@@ -196,10 +182,9 @@ def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
196
  except Exception as e:
197
  last_exc = e
198
  raise
199
-
200
- # ne devrait pas arriver
201
  raise RuntimeError(f"HF: aucun endpoint utilisable ({last_exc})")
202
 
 
203
  def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
204
  if not DI_TOKEN:
205
  raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
@@ -222,6 +207,7 @@ def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
222
  arr = arr / norms
223
  return arr.astype(np.float32), size
224
 
 
225
  def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
226
  last_exc = None
227
  for attempt in range(RETRY_MAX):
@@ -250,17 +236,22 @@ def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str]
250
  def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
251
  """
252
  Essaie les backends dans EMB_BACKEND_ORDER avec retries.
253
- Ex: EMB_BACKEND_ORDER=hf,deepinfra
254
  """
255
  last_err = None
 
 
256
  for b in EMB_BACKEND_ORDER:
257
  if b == "hf":
258
  try:
259
  return _call_with_retries(_hf_post_embeddings_once, batch, "HF", job_id)
260
- except Exception as e:
261
- last_err = e
262
- _append_log(job_id, f"HF failed: {e}.")
263
- LOG.error(f"HF failed: {e}")
 
 
 
264
  elif b == "deepinfra":
265
  try:
266
  return _call_with_retries(_di_post_embeddings_once, batch, "DeepInfra", job_id)
@@ -270,6 +261,13 @@ def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np
270
  LOG.error(f"DeepInfra failed: {e}")
271
  else:
272
  _append_log(job_id, f"Backend inconnu ignoré: {b}")
 
 
 
 
 
 
 
273
  raise RuntimeError(f"Tous les backends ont échoué: {last_err}")
274
 
275
  # ---------- Qdrant helpers ----------
@@ -302,7 +300,6 @@ def run_index_job(job_id: str, req: IndexRequest):
302
  _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER}")
303
  LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
304
 
305
- # Warmup -> dimension (1er morceau non vide si possible)
306
  warm = "warmup"
307
  if req.files:
308
  for _, _, chunk_txt in _chunk_with_spans(req.files[0].text or "", req.chunk_size, req.overlap):
@@ -315,7 +312,6 @@ def run_index_job(job_id: str, req: IndexRequest):
315
  _append_log(job_id, f"Collection ready: {col} (dim={dim})")
316
 
317
  point_id = 0
318
- # Boucle sur les fichiers
319
  for fi, f in enumerate(req.files, 1):
320
  if not (f.text or "").strip():
321
  _append_log(job_id, f"file {fi}: vide — ignoré")
@@ -329,7 +325,6 @@ def run_index_job(job_id: str, req: IndexRequest):
329
  if req.store_text:
330
  meta["text"] = chunk_txt
331
  metas.append(meta)
332
- # flush par lots
333
  if len(chunks) >= req.batch_size:
334
  vecs, sz = _post_embeddings(chunks, job_id=job_id)
335
  batch_points = [
@@ -342,7 +337,6 @@ def run_index_job(job_id: str, req: IndexRequest):
342
  _append_log(job_id, f"file {fi}/{len(req.files)}: +{len(chunks)} chunks (total={total_chunks}) ~{sz/1024:.1f}KiB")
343
  chunks, metas = [], []
344
 
345
- # flush fin de fichier
346
  if chunks:
347
  vecs, sz = _post_embeddings(chunks, job_id=job_id)
348
  batch_points = [
@@ -392,8 +386,6 @@ def start_index(req: IndexRequest, background_tasks: BackgroundTasks, x_auth_tok
392
  if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
393
  raise HTTPException(401, "Unauthorized")
394
  _check_backend_ready()
395
-
396
- # Filtrage défensif des fichiers vides pour éviter 422
397
  non_empty = [f for f in req.files if (f.text or "").strip()]
398
  if not non_empty:
399
  raise HTTPException(422, "Aucun fichier non vide à indexer.")
@@ -413,7 +405,7 @@ def status(job_id: str, x_auth_token: Optional[str] = Header(default=None)):
413
  raise HTTPException(404, "job inconnu")
414
  return {"status": j["status"], "logs": j["logs"][-800:]}
415
 
416
- # --- Compat endpoints (clients legacy) ---
417
  @app.get("/status")
418
  def status_qp(job_id: str = Query(None), x_auth_token: Optional[str] = Header(default=None)):
419
  if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
@@ -442,10 +434,7 @@ def query(req: QueryRequest, x_auth_token: Optional[str] = Header(default=None))
442
  if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
443
  raise HTTPException(401, "Unauthorized")
444
  _check_backend_ready()
445
-
446
- # bornes du top_k
447
  k = int(max(1, min(50, req.top_k or 6)))
448
-
449
  vecs, _ = _post_embeddings([req.query])
450
  col = f"proj_{req.project_id}"
451
  try:
@@ -474,7 +463,7 @@ def wipe_collection(project_id: str, x_auth_token: Optional[str] = Header(defaul
474
  raise HTTPException(401, "Unauthorized")
475
  col = f"proj_{project_id}"
476
  try:
477
- qdr.delete_collection(col); return {"ok": True}
478
  except Exception as e:
479
  raise HTTPException(400, f"wipe failed: {e}")
480
 
 
15
  LOG = logging.getLogger("remote_indexer")
16
 
17
  # ---------- ENV (config) ----------
18
+ # Par défaut on met DeepInfra d'abord pour être opérationnel tout de suite.
19
+ DEFAULT_BACKENDS = "deepinfra,hf"
20
+ EMB_BACKEND_ORDER = [s.strip().lower() for s in os.getenv("EMB_BACKEND_ORDER", os.getenv("EMB_BACKEND", DEFAULT_BACKENDS)).split(",") if s.strip()]
21
+
22
+ # Auto-fallback vers DeepInfra si HF répond "SentenceSimilarityPipeline ... 'sentences' manquant"
23
+ ALLOW_DI_AUTOFALLBACK = os.getenv("ALLOW_DI_AUTOFALLBACK", "true").lower() in ("1","true","yes","on")
24
 
25
  # HF Inference API
26
  HF_TOKEN = os.getenv("HF_API_TOKEN", "").strip()
27
  HF_MODEL = os.getenv("HF_EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2").strip()
28
 
 
29
  HF_API_URL_USER = os.getenv("HF_API_URL", "").strip()
30
  HF_API_URL_PIPELINE = os.getenv("HF_API_URL_PIPELINE", "").strip()
31
  HF_API_URL_MODELS = os.getenv("HF_API_URL_MODELS", "").strip()
 
32
  if HF_API_URL_USER:
33
  if "/pipeline" in HF_API_URL_USER:
34
  HF_API_URL_PIPELINE = HF_API_URL_USER
35
  else:
36
  HF_API_URL_MODELS = HF_API_URL_USER
37
 
 
38
  HF_URL_PIPELINE = (HF_API_URL_PIPELINE or f"https://api-inference.huggingface.co/pipeline/feature-extraction/{HF_MODEL}")
39
  HF_URL_MODELS = (HF_API_URL_MODELS or f"https://api-inference.huggingface.co/models/{HF_MODEL}")
40
 
 
49
  DI_TIMEOUT = float(os.getenv("EMB_TIMEOUT_SEC", "120"))
50
 
51
  # Retries
52
+ RETRY_MAX = int(os.getenv("EMB_RETRY_MAX", "6"))
53
+ RETRY_BASE_SEC = float(os.getenv("EMB_RETRY_BASE", "1.5"))
54
+ RETRY_JITTER = float(os.getenv("EMB_RETRY_JITTER", "0.35"))
55
 
56
  # Qdrant
57
  QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
58
  QDRANT_API = os.getenv("QDRANT_API_KEY", "").strip()
59
 
60
+ # Auth
61
  AUTH_TOKEN = os.getenv("REMOTE_INDEX_TOKEN", "").strip()
62
 
63
  LOG.info(f"Embeddings backend order = {EMB_BACKEND_ORDER}")
 
92
  query: str
93
  top_k: int = 6
94
 
95
+ # ---------- Jobs store ----------
96
+ JOBS: Dict[str, Dict[str, Any]] = {}
97
 
98
  def _append_log(job_id: str, line: str):
99
  job = JOBS.get(job_id)
 
107
  if AUTH_TOKEN and (x_auth or "") != AUTH_TOKEN:
108
  raise HTTPException(status_code=401, detail="Unauthorized")
109
 
110
+ # ---------- Helpers retry ----------
111
  def _retry_sleep(attempt: int):
 
112
  back = (RETRY_BASE_SEC ** attempt)
113
  jitter = 1.0 + random.uniform(-RETRY_JITTER, RETRY_JITTER)
114
  return max(0.25, back * jitter)
115
 
116
  def _with_task_param(url: str, task: str = "feature-extraction") -> str:
 
117
  return url + ("&" if "?" in url else "?") + f"task={task}"
118
 
119
+ # ---------- HF embeddings ----------
120
  def _hf_http(url: str, payload: Dict[str, Any], headers_extra: Optional[Dict[str, str]] = None) -> Tuple[np.ndarray, int]:
121
  if not HF_TOKEN:
122
  raise RuntimeError("HF_API_TOKEN manquant (backend=hf).")
 
123
  headers = {
124
  "Authorization": f"Bearer {HF_TOKEN}",
125
  "Content-Type": "application/json",
126
  "Accept": "application/json",
127
  }
 
128
  if HF_WAIT:
129
  payload.setdefault("options", {})["wait_for_model"] = True
130
  headers["X-Wait-For-Model"] = "true"
131
  headers["X-Use-Cache"] = "true"
 
132
  if headers_extra:
133
  headers.update(headers_extra)
134
 
 
140
 
141
  data = r.json()
142
  arr = np.array(data, dtype=np.float32)
 
143
  if arr.ndim == 3: # [batch, tokens, dim]
144
  arr = arr.mean(axis=1)
145
  elif arr.ndim == 2:
 
149
  else:
150
  raise RuntimeError(f"HF: unexpected embeddings shape: {arr.shape}")
151
 
 
152
  norms = np.linalg.norm(arr, axis=1, keepdims=True) + 1e-12
153
  arr = arr / norms
154
  return arr.astype(np.float32), size
155
 
156
  def _hf_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
 
 
 
 
 
157
  payload: Dict[str, Any] = {"inputs": (batch if len(batch) > 1 else batch[0])}
 
158
  urls = [HF_URL_PIPELINE, HF_URL_MODELS] if HF_PIPELINE_FIRST else [HF_URL_MODELS, HF_URL_PIPELINE]
159
  last_exc: Optional[Exception] = None
160
 
161
  for idx, url in enumerate(urls, 1):
162
  try:
163
  if "/models/" in url:
 
164
  return _hf_http(url, payload, headers_extra={"X-Task": "feature-extraction"})
165
  else:
 
166
  return _hf_http(url, payload, headers_extra=None)
167
  except requests.HTTPError as he:
168
  code = he.response.status_code if he.response is not None else 0
 
171
  if code in (404, 405, 501) and idx < len(urls):
172
  LOG.warning(f"HF endpoint {url} non dispo ({code}), fallback vers alternative ...")
173
  continue
 
174
  if "/models/" in url and "SentenceSimilarityPipeline" in (body or ""):
175
  try:
176
  forced_url = _with_task_param(url, "feature-extraction")
 
182
  except Exception as e:
183
  last_exc = e
184
  raise
 
 
185
  raise RuntimeError(f"HF: aucun endpoint utilisable ({last_exc})")
186
 
187
+ # ---------- DeepInfra embeddings ----------
188
  def _di_post_embeddings_once(batch: List[str]) -> Tuple[np.ndarray, int]:
189
  if not DI_TOKEN:
190
  raise RuntimeError("DEEPINFRA_API_KEY manquant (backend=deepinfra).")
 
207
  arr = arr / norms
208
  return arr.astype(np.float32), size
209
 
210
+ # ---------- Retry orchestrator ----------
211
  def _call_with_retries(func, batch: List[str], label: str, job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
212
  last_exc = None
213
  for attempt in range(RETRY_MAX):
 
236
  def _post_embeddings(batch: List[str], job_id: Optional[str] = None) -> Tuple[np.ndarray, int]:
237
  """
238
  Essaie les backends dans EMB_BACKEND_ORDER avec retries.
239
+ Auto-fallback optionnel vers DeepInfra si HF renvoie la fameuse erreur "SentenceSimilarityPipeline".
240
  """
241
  last_err = None
242
+ similarity_misroute = False
243
+
244
  for b in EMB_BACKEND_ORDER:
245
  if b == "hf":
246
  try:
247
  return _call_with_retries(_hf_post_embeddings_once, batch, "HF", job_id)
248
+ except requests.HTTPError as he:
249
+ body = he.response.text if getattr(he, "response", None) is not None else ""
250
+ if "SentenceSimilarityPipeline.__call__()" in (body or ""):
251
+ similarity_misroute = True
252
+ last_err = he
253
+ _append_log(job_id, f"HF failed: {he}.")
254
+ LOG.error(f"HF failed: {he}")
255
  elif b == "deepinfra":
256
  try:
257
  return _call_with_retries(_di_post_embeddings_once, batch, "DeepInfra", job_id)
 
261
  LOG.error(f"DeepInfra failed: {e}")
262
  else:
263
  _append_log(job_id, f"Backend inconnu ignoré: {b}")
264
+
265
+ # Auto-fallback DI si activé et si le problème HF est le misrouting Similarity
266
+ if ALLOW_DI_AUTOFALLBACK and similarity_misroute and DI_TOKEN:
267
+ LOG.warning("HF a routé sur SentenceSimilarity => auto-fallback DeepInfra (override ordre).")
268
+ _append_log(job_id, "Auto-fallback DeepInfra (HF => SentenceSimilarity).")
269
+ return _call_with_retries(_di_post_embeddings_once, batch, "DeepInfra", job_id)
270
+
271
  raise RuntimeError(f"Tous les backends ont échoué: {last_err}")
272
 
273
  # ---------- Qdrant helpers ----------
 
300
  _append_log(job_id, f"Start project={req.project_id} files={len(req.files)} | backends={EMB_BACKEND_ORDER}")
301
  LOG.info(f"[{job_id}] Index start project={req.project_id} files={len(req.files)}")
302
 
 
303
  warm = "warmup"
304
  if req.files:
305
  for _, _, chunk_txt in _chunk_with_spans(req.files[0].text or "", req.chunk_size, req.overlap):
 
312
  _append_log(job_id, f"Collection ready: {col} (dim={dim})")
313
 
314
  point_id = 0
 
315
  for fi, f in enumerate(req.files, 1):
316
  if not (f.text or "").strip():
317
  _append_log(job_id, f"file {fi}: vide — ignoré")
 
325
  if req.store_text:
326
  meta["text"] = chunk_txt
327
  metas.append(meta)
 
328
  if len(chunks) >= req.batch_size:
329
  vecs, sz = _post_embeddings(chunks, job_id=job_id)
330
  batch_points = [
 
337
  _append_log(job_id, f"file {fi}/{len(req.files)}: +{len(chunks)} chunks (total={total_chunks}) ~{sz/1024:.1f}KiB")
338
  chunks, metas = [], []
339
 
 
340
  if chunks:
341
  vecs, sz = _post_embeddings(chunks, job_id=job_id)
342
  batch_points = [
 
386
  if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
387
  raise HTTPException(401, "Unauthorized")
388
  _check_backend_ready()
 
 
389
  non_empty = [f for f in req.files if (f.text or "").strip()]
390
  if not non_empty:
391
  raise HTTPException(422, "Aucun fichier non vide à indexer.")
 
405
  raise HTTPException(404, "job inconnu")
406
  return {"status": j["status"], "logs": j["logs"][-800:]}
407
 
408
+ # Compat legacy
409
  @app.get("/status")
410
  def status_qp(job_id: str = Query(None), x_auth_token: Optional[str] = Header(default=None)):
411
  if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
 
434
  if AUTH_TOKEN and (x_auth_token or "") != AUTH_TOKEN:
435
  raise HTTPException(401, "Unauthorized")
436
  _check_backend_ready()
 
 
437
  k = int(max(1, min(50, req.top_k or 6)))
 
438
  vecs, _ = _post_embeddings([req.query])
439
  col = f"proj_{req.project_id}"
440
  try:
 
463
  raise HTTPException(401, "Unauthorized")
464
  col = f"proj_{project_id}"
465
  try:
466
+ qdrant.delete_collection(col); return {"ok": True}
467
  except Exception as e:
468
  raise HTTPException(400, f"wipe failed: {e}")
469