Spaces:

chouchouvs
/

DeepIndex

Running

App Files Files Community

chouchouvs commited on Sep 16

Commit

82a76f0

verified ·

1 Parent(s): aebd89f

Update main.py

Browse files

Files changed (1) hide show

main.py +96 -70

main.py CHANGED Viewed

@@ -3,19 +3,17 @@
 HF Space - Remote Indexer (No-Qdrant)
 Stockage & recherche vectorielle avec 🤗 datasets + FAISS (local), UI Gradio.
-Pipeline:
-- /index: chunk → embeddings (HF Inference ou dummy) → Dataset.from_dict → add_faiss_index(IP) → save_to_disk
-- /count: lit le dataset sur disque (si non chargé) → renvoie nb de lignes
-- /query: embed requête → dataset.get_nearest_examples('embedding', query, k)
-- /wipe: supprime le dossier projet
-- /export_hub (optionnel): pousse le dossier projet dans un repo Dataset du Hub
 ENV:
 - EMB_PROVIDER         ("hf" | "dummy", défaut "hf")
 - HF_EMBED_MODEL       (ex: "BAAI/bge-m3" | "intfloat/e5-base-v2")
 - HUGGINGFACEHUB_API_TOKEN (requis si EMB_PROVIDER=hf)
 - EMB_FALLBACK_TO_DUMMY (true/false)
-- DATA_DIR             (par défaut: auto-pick writable: $DATA_DIR, ./data, /home/user/app/data, /home/user/data, /tmp/data)
 - HF_DATASET_REPO      (optionnel "username/my_proj_vectors") pour export
 - LOG_LEVEL            (DEBUG par défaut)
 - UI_PATH              ("/ui")
@@ -25,7 +23,6 @@ ENV:
 from __future__ import annotations
 import os
 import io
-import re
 import json
 import time
 import uuid
@@ -65,7 +62,7 @@ logging.basicConfig(
 LOG = logging.getLogger("remote_indexer_noqdrant")
 EMB_PROVIDER = os.getenv("EMB_PROVIDER", "hf").lower()  # "hf" | "dummy"
-HF_EMBED_MODEL = os.getenv("HF_EMBED_MODEL", "BAAI/bge-m3")
 HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 EMB_FALLBACK_TO_DUMMY = os.getenv("EMB_FALLBACK_TO_DUMMY", "false").lower() in ("1","true","yes","on")
@@ -80,11 +77,11 @@ if EMB_PROVIDER == "hf" and not HF_TOKEN and not EMB_FALLBACK_TO_DUMMY:
 # ------------------------------------------------------------------------------
 def pick_data_dir() -> str:
     candidates = [
-        os.getenv("DATA_DIR", "").strip(),                    # priorité à l'env si fourni
-        os.path.join(os.getcwd(), "data"),                    # ./data dans le WORKDIR (/app)
-        "/home/user/app/data",                                # chemins typiques HF Spaces
         "/home/user/data",
-        "/tmp/data",                                          # toujours writable
     ]
     for p in candidates:
         if not p:
@@ -113,8 +110,8 @@ class FileItem(BaseModel):
 class IndexRequest(BaseModel):
     project_id: str = Field(..., min_length=1)
     files: List[FileItem] = Field(default_factory=list)
-    chunk_size: int = Field(200, ge=64, le=4096)
-    overlap: int = Field(20, ge=0, le=512)
     batch_size: int = Field(32, ge=1, le=1024)
     store_text: bool = True
@@ -143,45 +140,43 @@ class JobState(BaseModel):
         LOG.debug(f"[{self.job_id}] {msg}")
 JOBS: Dict[str, JobState] = {}
-# In-memory cache {project_id: (Dataset, dim)}
-DATASETS: Dict[str, Tuple[Dataset, int]] = {}
 # ------------------------------------------------------------------------------
-# Utils
 # ------------------------------------------------------------------------------
-def hash8(s: str) -> str:
-    return hashlib.sha256(s.encode("utf-8")).hexdigest()[:16]
-def l2_normalize(vec: List[float]) -> List[float]:
-    arr = np.array(vec, dtype=np.float32)
-    n = float(np.linalg.norm(arr))
-    if n > 0:
-        arr = arr / n
-    return arr.astype(np.float32).tolist()
-def flatten_any(x: Any) -> List[float]:
-    if isinstance(x, (list, tuple)):
-        if len(x) > 0 and isinstance(x[0], (list, tuple)):
-            return flatten_any(x[0])
-        return list(map(float, x))
-    raise ValueError("Embedding vector mal formé")
-def chunk_text(text: str, chunk_size: int, overlap: int) -> List[Tuple[int, int, str]]:
     text = text or ""
-    if not text.strip():
         return []
-    res = []
     n = len(text)
     i = 0
     while i < n:
         j = min(i + chunk_size, n)
         chunk = text[i:j]
-        if len(chunk.strip()) >= 30:
             res.append((i, j, chunk))
         i = j - overlap
         if i <= 0:
             i = j
     return res
 def project_paths(project_id: str) -> Dict[str, str]:
@@ -211,10 +206,23 @@ def load_meta(meta_path: str) -> Dict[str, Any]:
 # ------------------------------------------------------------------------------
 # Embeddings (HF Inference ou dummy)
 # ------------------------------------------------------------------------------
 def _maybe_prefix_for_model(texts: List[str], model_name: str) -> List[str]:
     m = (model_name or "").lower()
     if "e5" in m:
-        # E5: "query: ..." / "passage: ..." etc. Ici on uniformise simple.
         return [("query: " + t) for t in texts]
     return texts
@@ -269,7 +277,7 @@ async def embed_texts(client: httpx.AsyncClient, texts: List[str]) -> List[List[
 async def build_dataset_with_faiss(job: JobState, req: IndexRequest) -> None:
     """
     Construit un dataset HuggingFace avec colonnes:
-    - path (str), text (optionnel), chunk (int), start (int), end (int), embedding (float32[])
     Ajoute un index FAISS (Inner Product) et persiste sur disque.
     """
     try:
@@ -281,32 +289,32 @@ async def build_dataset_with_faiss(job: JobState, req: IndexRequest) -> None:
             f"provider={EMB_PROVIDER} model={HF_EMBED_MODEL}"
         )
-        # Chunking
         records: List[Dict[str, Any]] = []
         for f in req.files:
-            chunks = chunk_text(f.text, req.chunk_size, req.overlap)
-            if not chunks:
-                job.log(f"{f.path}: 0 chunk (trop court ou vide)")
             for idx, (start, end, ch) in enumerate(chunks):
                 payload = {"path": f.path, "chunk": idx, "start": start, "end": end}
-                if req.store_text:
-                    payload["text"] = ch
-                else:
-                    payload["text"] = None
                 payload["raw"] = ch
                 records.append(payload)
         job.total_chunks = len(records)
         job.log(f"Total chunks = {job.total_chunks}")
         if job.total_chunks == 0:
             job.stage = "failed"
-            job.errors.append("Aucun chunk à indexer.")
             job.finished_at = time.time()
             return
         # Embeddings par batch
         async with httpx.AsyncClient(timeout=180) as client:
             all_vecs: List[List[float]] = []
-            B = max(8, min(64, req.batch_size * 2))
             i = 0
             while i < len(records):
                 sub = records[i : i + B]
@@ -322,12 +330,12 @@ async def build_dataset_with_faiss(job: JobState, req: IndexRequest) -> None:
         vec_dim = len(all_vecs[0])
         job.log(f"Embeddings dim={vec_dim}")
-        # Prépare colonnes du dataset
         paths = [r["path"] for r in records]
         chunks = [int(r["chunk"]) for r in records]
         starts = [int(r["start"]) for r in records]
         ends = [int(r["end"]) for r in records]
-        texts = [r.get("text") for r in records]
         features = Features({
             "path": Value("string"),
@@ -350,20 +358,19 @@ async def build_dataset_with_faiss(job: JobState, req: IndexRequest) -> None:
             features=features,
         )
-        # Ajoute index FAISS (Inner Product sur vecteurs normalisés ~ cosine)
         job.stage = "indexing"
         ds.add_faiss_index(column="embedding", metric_type=faiss.METRIC_INNER_PRODUCT)
         job.indexed = ds.num_rows
         job.log(f"FAISS index ajouté ({ds.num_rows} points)")
-        # Persistance disque
         p = project_paths(req.project_id)
         os.makedirs(p["faiss_dir"], exist_ok=True)
         ds.save_to_disk(p["ds_dir"])
         ds.save_faiss_index("embedding", p["faiss_file"])
         save_meta(p["meta_file"], {"dim": vec_dim, "rows": ds.num_rows, "model": HF_EMBED_MODEL, "ts": time.time()})
-        # Cache mémoire
         DATASETS[req.project_id] = (ds, vec_dim)
         job.stage = "done"
@@ -399,7 +406,6 @@ def create_and_start_job(req: IndexRequest) -> JobState:
 # Chargement / Query helpers
 # ------------------------------------------------------------------------------
 def ensure_loaded(project_id: str) -> Tuple[Dataset, int]:
-    """Charge le dataset+faiss depuis disque si pas en cache mémoire."""
     if project_id in DATASETS:
         return DATASETS[project_id]
     p = project_paths(project_id)
@@ -440,13 +446,6 @@ async def api_info():
         "hub_export_enabled": bool(HF_DATASET_REPO and HfApi),
     }
-@fastapi_app.get("/debug/paths")
-async def debug_paths(project_id: Optional[str] = None):
-    res = {"DATA_DIR": DATA_DIR, "cwd": os.getcwd()}
-    if project_id:
-        res["project_paths"] = project_paths(project_id)
-    return res
 @fastapi_app.get("/")
 async def root_redirect():
     return RedirectResponse(url=UI_PATH, status_code=307)
@@ -510,7 +509,6 @@ async def export_hub(project_id: str = Query(..., min_length=1), repo_id: Option
     except Exception:
         pass
-    # zip le dossier projet
     buf = io.BytesIO()
     base_dir = p["base"]
     zip_name = f"{project_id}_vectors.zip"
@@ -535,8 +533,10 @@ async def export_hub(project_id: str = Query(..., min_length=1), repo_id: Option
 # Gradio UI
 # ------------------------------------------------------------------------------
 def _default_two_docs() -> List[Dict[str, str]]:
-    a = "Alpha bravo charlie delta echo foxtrot golf hotel india. " * 3
-    b = "Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy. " * 3
     return [{"path": "a.txt", "text": a}, {"path": "b.txt", "text": b}]
 async def ui_wipe(project: str):
@@ -566,13 +566,33 @@ async def ui_index_sample(project: str, chunk_size: int, overlap: int, batch_siz
         LOG.exception("index UI error")
         return f"❌ Index erreur: {e}", ""
 async def ui_status(job_id: str):
     if not job_id.strip():
         return "⚠️ Renseigne un job_id"
     try:
         st = await status(job_id)
         lines = [f"Job {st['job_id']} — stage={st['stage']} files={st['total_files']} chunks={st['total_chunks']} embedded={st['embedded']} indexed={st['indexed']}"]
-        lines += st.get("messages", [])[-80:]
         if st.get("errors"):
             lines.append("Erreurs:")
             lines += [f" - {e}" for e in st['errors']]
@@ -621,7 +641,7 @@ with gr.Blocks(title="Remote Indexer — No-Qdrant (datasets+FAISS)", analytics_
         jobid_tb = gr.Textbox(label="Job ID", value="", interactive=True)
     with gr.Row():
         wipe_btn = gr.Button("🧨 Wipe project", variant="stop")
-        index_btn = gr.Button("🚀 Indexer 2 documents", variant="primary")
         count_btn = gr.Button("📊 Count points", variant="secondary")
     with gr.Row():
         chunk_size = gr.Slider(64, 1024, value=200, step=8, label="chunk_size")
@@ -630,6 +650,11 @@ with gr.Blocks(title="Remote Indexer — No-Qdrant (datasets+FAISS)", analytics_
         store_text = gr.Checkbox(value=True, label="store_text (payload)")
     out_log = gr.Textbox(lines=18, label="Logs / Résultats", interactive=False)
     with gr.Row():
         status_btn = gr.Button("📡 Status (refresh)")
         auto_chk = gr.Checkbox(False, label="⏱️ Auto-refresh status (2 s)")
@@ -646,6 +671,7 @@ with gr.Blocks(title="Remote Indexer — No-Qdrant (datasets+FAISS)", analytics_
     wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
     index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log, jobid_tb])
     count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
     status_btn.click(ui_status, inputs=[jobid_tb], outputs=[out_log])

 HF Space - Remote Indexer (No-Qdrant)
 Stockage & recherche vectorielle avec 🤗 datasets + FAISS (local), UI Gradio.
+Améliorations:
+- Découpage "fail-safe": si aucun chunk, on prend 1 chunk = tout le texte.
+- Logs détaillés: taille par fichier, nb chunks par fichier.
+- UI: bouton "Indexer depuis textarea" pour tester avec 2 gros textes injectés depuis l'UI.
 ENV:
 - EMB_PROVIDER         ("hf" | "dummy", défaut "hf")
 - HF_EMBED_MODEL       (ex: "BAAI/bge-m3" | "intfloat/e5-base-v2")
 - HUGGINGFACEHUB_API_TOKEN (requis si EMB_PROVIDER=hf)
 - EMB_FALLBACK_TO_DUMMY (true/false)
+- DATA_DIR             (auto-pick writable: $DATA_DIR, ./data, /home/user/app/data, /home/user/data, /tmp/data)
 - HF_DATASET_REPO      (optionnel "username/my_proj_vectors") pour export
 - LOG_LEVEL            (DEBUG par défaut)
 - UI_PATH              ("/ui")
 from __future__ import annotations
 import os
 import io
 import json
 import time
 import uuid
 LOG = logging.getLogger("remote_indexer_noqdrant")
 EMB_PROVIDER = os.getenv("EMB_PROVIDER", "hf").lower()  # "hf" | "dummy"
+HF_EMBED_MODEL = os.getenv("HF_EMBED_MODEL", "intfloat/e5-base-v2")
 HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN", "")
 EMB_FALLBACK_TO_DUMMY = os.getenv("EMB_FALLBACK_TO_DUMMY", "false").lower() in ("1","true","yes","on")
 # ------------------------------------------------------------------------------
 def pick_data_dir() -> str:
     candidates = [
+        os.getenv("DATA_DIR", "").strip(),
+        os.path.join(os.getcwd(), "data"),
+        "/home/user/app/data",
         "/home/user/data",
+        "/tmp/data",
     ]
     for p in candidates:
         if not p:
 class IndexRequest(BaseModel):
     project_id: str = Field(..., min_length=1)
     files: List[FileItem] = Field(default_factory=list)
+    chunk_size: int = Field(200, ge=32, le=8192)
+    overlap: int = Field(20, ge=0, le=1024)
     batch_size: int = Field(32, ge=1, le=1024)
     store_text: bool = True
         LOG.debug(f"[{self.job_id}] {msg}")
 JOBS: Dict[str, JobState] = {}
+DATASETS: Dict[str, Tuple[Dataset, int]] = {}  # cache mémoire {project_id: (Dataset, dim)}
 # ------------------------------------------------------------------------------
+# Utils découpage
 # ------------------------------------------------------------------------------
+def chunk_text_fail_safe(text: str, chunk_size: int, overlap: int, min_keep_chars: int = 1) -> List[Tuple[int, int, str]]:
+    """
+    Découpe le texte en fenêtres chevauchantes. Si aucun chunk "utile" n'est produit
+    mais que le texte contient au moins min_keep_chars non-blanc, on retourne 1 chunk = 100% du texte.
+    """
     text = text or ""
+    base = text.strip("\n\r\t ")
+    nclean = len(base)
+    if nclean < min_keep_chars:
         return []
     n = len(text)
+    res: List[Tuple[int, int, str]] = []
     i = 0
+    # Normalisation des paramètres absurdes
+    chunk_size = max(32, int(chunk_size))
+    overlap = max(0, min(int(overlap), chunk_size - 1))
     while i < n:
         j = min(i + chunk_size, n)
         chunk = text[i:j]
+        if len(chunk.strip()) >= min_keep_chars:
             res.append((i, j, chunk))
+        if j == n:
+            break
         i = j - overlap
         if i <= 0:
             i = j
+    if not res:
+        # fail-safe : 1 chunk couvrant tout le texte
+        res = [(0, n, text)]
     return res
 def project_paths(project_id: str) -> Dict[str, str]:
 # ------------------------------------------------------------------------------
 # Embeddings (HF Inference ou dummy)
 # ------------------------------------------------------------------------------
+def l2_normalize(vec: List[float]) -> List[float]:
+    arr = np.array(vec, dtype=np.float32)
+    n = float(np.linalg.norm(arr))
+    if n > 0:
+        arr = arr / n
+    return arr.astype(np.float32).tolist()
+def flatten_any(x: Any) -> List[float]:
+    if isinstance(x, (list, tuple)):
+        if len(x) > 0 and isinstance(x[0], (list, tuple)):
+            return flatten_any(x[0])
+        return list(map(float, x))
+    raise ValueError("Embedding vector mal formé")
 def _maybe_prefix_for_model(texts: List[str], model_name: str) -> List[str]:
     m = (model_name or "").lower()
     if "e5" in m:
         return [("query: " + t) for t in texts]
     return texts
 async def build_dataset_with_faiss(job: JobState, req: IndexRequest) -> None:
     """
     Construit un dataset HuggingFace avec colonnes:
+    - path (str), text (str), chunk (int), start (int), end (int), embedding (float32[])
     Ajoute un index FAISS (Inner Product) et persiste sur disque.
     """
     try:
             f"provider={EMB_PROVIDER} model={HF_EMBED_MODEL}"
         )
+        # Chunking + logs par fichier
         records: List[Dict[str, Any]] = []
         for f in req.files:
+            t = f.text or ""
+            tlen = len(t)
+            job.log(f"{f.path}: len(text)={tlen}")
+            chunks = chunk_text_fail_safe(t, req.chunk_size, req.overlap, min_keep_chars=1)
+            job.log(f"{f.path}: chunks créés={len(chunks)}")
             for idx, (start, end, ch) in enumerate(chunks):
                 payload = {"path": f.path, "chunk": idx, "start": start, "end": end}
+                payload["text"] = ch if req.store_text else ""
                 payload["raw"] = ch
                 records.append(payload)
         job.total_chunks = len(records)
         job.log(f"Total chunks = {job.total_chunks}")
         if job.total_chunks == 0:
             job.stage = "failed"
+            job.errors.append("Aucun chunk à indexer (textes vides ?)")
             job.finished_at = time.time()
             return
         # Embeddings par batch
         async with httpx.AsyncClient(timeout=180) as client:
             all_vecs: List[List[float]] = []
+            B = max(8, min(128, req.batch_size * 2))
             i = 0
             while i < len(records):
                 sub = records[i : i + B]
         vec_dim = len(all_vecs[0])
         job.log(f"Embeddings dim={vec_dim}")
+        # Dataset columns
         paths = [r["path"] for r in records]
         chunks = [int(r["chunk"]) for r in records]
         starts = [int(r["start"]) for r in records]
         ends = [int(r["end"]) for r in records]
+        texts = [r.get("text", "") for r in records]
         features = Features({
             "path": Value("string"),
             features=features,
         )
+        # Index FAISS (Inner Product ≈ cosine après normalisation)
         job.stage = "indexing"
         ds.add_faiss_index(column="embedding", metric_type=faiss.METRIC_INNER_PRODUCT)
         job.indexed = ds.num_rows
         job.log(f"FAISS index ajouté ({ds.num_rows} points)")
+        # Persistance
         p = project_paths(req.project_id)
         os.makedirs(p["faiss_dir"], exist_ok=True)
         ds.save_to_disk(p["ds_dir"])
         ds.save_faiss_index("embedding", p["faiss_file"])
         save_meta(p["meta_file"], {"dim": vec_dim, "rows": ds.num_rows, "model": HF_EMBED_MODEL, "ts": time.time()})
         DATASETS[req.project_id] = (ds, vec_dim)
         job.stage = "done"
 # Chargement / Query helpers
 # ------------------------------------------------------------------------------
 def ensure_loaded(project_id: str) -> Tuple[Dataset, int]:
     if project_id in DATASETS:
         return DATASETS[project_id]
     p = project_paths(project_id)
         "hub_export_enabled": bool(HF_DATASET_REPO and HfApi),
     }
 @fastapi_app.get("/")
 async def root_redirect():
     return RedirectResponse(url=UI_PATH, status_code=307)
     except Exception:
         pass
     buf = io.BytesIO()
     base_dir = p["base"]
     zip_name = f"{project_id}_vectors.zip"
 # Gradio UI
 # ------------------------------------------------------------------------------
 def _default_two_docs() -> List[Dict[str, str]]:
+    a = ("Alpha bravo charlie delta echo foxtrot golf hotel india juliett kilo lima mike november oscar papa "
+         "quebec romeo sierra tango uniform victor whiskey xray yankee zulu. ") * 5
+    b = ("Lorem ipsum dolor sit amet, consectetuer adipiscing elit, sed diam nonummy nibh euismod tincidunt ut laoreet "
+         "dolore magna aliquam erat volutpat. ") * 5
     return [{"path": "a.txt", "text": a}, {"path": "b.txt", "text": b}]
 async def ui_wipe(project: str):
         LOG.exception("index UI error")
         return f"❌ Index erreur: {e}", ""
+async def ui_index_from_textarea(project: str, text1: str, text2: str, chunk_size: int, overlap: int, batch_size: int, store_text: bool):
+    files = [
+        {"path": "ui_text_1.txt", "text": text1 or ""},
+        {"path": "ui_text_2.txt", "text": text2 or ""},
+    ]
+    req = IndexRequest(
+        project_id=project,
+        files=[FileItem(**f) for f in files],
+        chunk_size=chunk_size,
+        overlap=overlap,
+        batch_size=batch_size,
+        store_text=store_text,
+    )
+    try:
+        job = create_and_start_job(req)
+        return f"🚀 Job (textarea) lancé: {job.job_id}", job.job_id
+    except Exception as e:
+        LOG.exception("index-from-text UI error")
+        return f"❌ Index (textarea) erreur: {e}", ""
 async def ui_status(job_id: str):
     if not job_id.strip():
         return "⚠️ Renseigne un job_id"
     try:
         st = await status(job_id)
         lines = [f"Job {st['job_id']} — stage={st['stage']} files={st['total_files']} chunks={st['total_chunks']} embedded={st['embedded']} indexed={st['indexed']}"]
+        lines += st.get("messages", [])[-100:]
         if st.get("errors"):
             lines.append("Erreurs:")
             lines += [f" - {e}" for e in st['errors']]
         jobid_tb = gr.Textbox(label="Job ID", value="", interactive=True)
     with gr.Row():
         wipe_btn = gr.Button("🧨 Wipe project", variant="stop")
+        index_btn = gr.Button("🚀 Indexer 2 documents (démo)", variant="primary")
         count_btn = gr.Button("📊 Count points", variant="secondary")
     with gr.Row():
         chunk_size = gr.Slider(64, 1024, value=200, step=8, label="chunk_size")
         store_text = gr.Checkbox(value=True, label="store_text (payload)")
     out_log = gr.Textbox(lines=18, label="Logs / Résultats", interactive=False)
+    with gr.Accordion("Indexer depuis textarea (bypass fichiers)", open=False):
+        txt1 = gr.Textbox(label="Texte 1", value="Ceci est un texte de test assez long pour produire des chunks. " * 10, lines=6)
+        txt2 = gr.Textbox(label="Texte 2", value="Deuxième texte de test pour vérifier l'indexation et la recherche. " * 10, lines=6)
+        index_txt_btn = gr.Button("📝 Indexer ces 2 textes")
     with gr.Row():
         status_btn = gr.Button("📡 Status (refresh)")
         auto_chk = gr.Checkbox(False, label="⏱️ Auto-refresh status (2 s)")
     wipe_btn.click(ui_wipe, inputs=[project_tb], outputs=[out_log])
     index_btn.click(ui_index_sample, inputs=[project_tb, chunk_size, overlap, batch_size, store_text], outputs=[out_log, jobid_tb])
+    index_txt_btn.click(ui_index_from_textarea, inputs=[project_tb, txt1, txt2, chunk_size, overlap, batch_size, store_text], outputs=[out_log, jobid_tb])
     count_btn.click(ui_count, inputs=[project_tb], outputs=[out_log])
     status_btn.click(ui_status, inputs=[jobid_tb], outputs=[out_log])