# ========================================================== # SAFE-MODE PRELAUNCH CLEANUP (runs before any heavy imports) # ========================================================== import os, shutil, time, glob def _prelaunch_cleanup(threshold_gb=45.0): """Early cleanup to prevent Hugging Face Space eviction (50 GB limit).""" def _used_gb(path="/home/user/app"): try: total, used, free = shutil.disk_usage(path) used_gb = max(0.0, min(used / (1024**3), 49.9)) return used_gb except Exception: return 0.0 used = _used_gb() print(f"\n๐Ÿ’พ Startup disk usage: {used:.2f} GB") cache_paths = [ os.path.expanduser("~/.cache/huggingface"), os.path.expanduser("~/.cache/hfhub"), "/home/user/.cache/huggingface", "/home/user/.cache", "/home/user/app/__pycache__", "/home/user/app/data/__pycache__", ] for p in cache_paths: if os.path.exists(p): shutil.rmtree(p, ignore_errors=True) if used > threshold_gb: print(f"โš ๏ธ Usage {used:.2f} GB > {threshold_gb} GB โ€” performing aggressive cleanup.") preserve = {"faiss.index", "faiss.index.meta.json", "glossary.json"} folders = ["/home/user/app/data/docs_cache", "/home/user/app/tmp_docs", "/home/user/app/persistent"] for folder in folders: if os.path.exists(folder): for f in glob.glob(os.path.join(folder, "*")): if os.path.basename(f) in preserve: continue try: if os.path.isfile(f): os.remove(f) else: shutil.rmtree(f, ignore_errors=True) except Exception: pass print("๐Ÿงน Aggressive cleanup complete.") print(f"โœจ Disk after cleanup: {_used_gb():.2f} GB\n") shutil.rmtree("/home/user/app/runtime_faiss", ignore_errors=True) _prelaunch_cleanup() # ========================================================== # MAIN APP โ€” Clinical Trial Chatbot # ========================================================== import gradio as gr from sentence_transformers import SentenceTransformer from core.hybrid_retriever import summarize_combined APP_TITLE = "๐Ÿง  Clinical Trial Basics" APP_DESC = ( "Ask any clinical trial or GCP-related question. " "Retrieves and summarizes from ICH, GCDMP, EMA, FDA, Excel, and Web datasets." ) # Detect deployment mode PUBLIC_MODE = os.environ.get("PUBLIC_MODE", "true").lower() == "true" ADMIN_USER = os.environ.get("ADMIN_USER", "admin") ADMIN_PASS = os.environ.get("ADMIN_PASS", "changeme") print(f"๐Ÿ” Running in {'PUBLIC' if PUBLIC_MODE else 'ADMIN'} mode.") # ---------------------------------------------------------- # ADMIN AUTHENTICATION HELPER # ---------------------------------------------------------- def check_admin_login(username, password): """Authenticate admin before showing rebuild/clear tools.""" return username == ADMIN_USER and password == ADMIN_PASS # ---------------------------------------------------------- # MAINTENANCE FUNCTIONS # ---------------------------------------------------------- import shutil, json, faiss, pandas as pd, numpy as np DATA_PATHS = [ "/home/user/app/persistent/faiss.index", "/home/user/app/persistent/faiss.index.meta.json", "/home/user/app/data/docs_cache", ] def clear_index(): removed = [] for p in DATA_PATHS: if os.path.isdir(p): shutil.rmtree(p, ignore_errors=True) removed.append(f"๐Ÿ—‘๏ธ Deleted folder: {p}") elif os.path.exists(p): os.remove(p) removed.append(f"๐Ÿ—‘๏ธ Deleted file: {p}") msg = "\n".join(removed) if removed else "โ„น๏ธ No cache files found." print(msg) return msg def rebuild_index(): try: from core.vector_sync import rebuild_faiss_from_glossary, _upload_to_dataset import pandas as pd, faiss, numpy as np from sentence_transformers import SentenceTransformer print("๐Ÿง  Rebuilding FAISS index (Glossary + Excel + Web)...") # ... (you can keep your current detailed rebuild logic here) return "โœ… Rebuild complete (placeholder logic)." except Exception as e: return f"โš ๏ธ Rebuild failed: {e}" def rebuild_glossary(): try: from core.glossary_builder import rebuild_and_upload rebuild_and_upload() return "โœ… Glossary rebuilt and uploaded successfully." except Exception as e: return f"โš ๏ธ Glossary rebuild failed: {e}" # ---------------------------------------------------------- # CHATBOT CORE # ---------------------------------------------------------- def chat_answer(query, mode="short"): try: query_clean = query.strip() if not query_clean: return "โš ๏ธ Please enter a valid query." return summarize_combined(query_clean, mode=mode) except Exception as e: print("โŒ Chatbot error:", e) return f"โš ๏ธ Error: {e}" # ---------------------------------------------------------- # GRADIO UI # ---------------------------------------------------------- with gr.Blocks(theme="gradio/soft") as demo: gr.Markdown(f"# {APP_TITLE}") gr.Markdown(APP_DESC) query_box = gr.Textbox( label="Ask your clinical trial question", placeholder="e.g. What is an eCRF?", lines=2, show_label=True ) output_box = gr.HTML(label="Answer") with gr.Row(): submit_btn = gr.Button("๐Ÿš€ Submit", variant="primary") # Only show admin tools if not in PUBLIC mode if not PUBLIC_MODE: rebuild_btn = gr.Button("๐Ÿ” Rebuild Index") rebuild_glossary_btn = gr.Button("๐Ÿ“˜ Rebuild Glossary") clear_btn = gr.Button("๐Ÿงน Clear Cache / Index") submit_btn.click(fn=chat_answer, inputs=[query_box], outputs=output_box) query_box.submit(fn=chat_answer, inputs=[query_box], outputs=output_box) if not PUBLIC_MODE: rebuild_btn.click(fn=rebuild_index, outputs=output_box) rebuild_glossary_btn.click(fn=rebuild_glossary, outputs=output_box) clear_btn.click(fn=clear_index, outputs=output_box) # ---------------------------------------------------------- # LAUNCH APP WITH AUTH # ---------------------------------------------------------- if __name__ == "__main__": print("๐Ÿš€ Starting Clinical Trial Chatbot...") print("๐Ÿง  Initializing retriever warm-up...") demo.launch( server_name="0.0.0.0", server_port=7860, share=False, auth=check_admin_login if not PUBLIC_MODE else None )