essprasad's picture
Update app.py
df80017 verified
raw
history blame
6.79 kB
# ==========================================================
# SAFE-MODE PRELAUNCH CLEANUP (runs before any heavy imports)
# ==========================================================
import os, shutil, time, glob
def _prelaunch_cleanup(threshold_gb=45.0):
"""Early cleanup to prevent Hugging Face Space eviction (50 GB limit)."""
def _used_gb(path="/home/user/app"):
try:
total, used, free = shutil.disk_usage(path)
used_gb = max(0.0, min(used / (1024**3), 49.9))
return used_gb
except Exception:
return 0.0
used = _used_gb()
print(f"\nπŸ’Ύ Startup disk usage: {used:.2f} GB")
cache_paths = [
os.path.expanduser("~/.cache/huggingface"),
os.path.expanduser("~/.cache/hfhub"),
"/home/user/.cache/huggingface",
"/home/user/.cache",
"/home/user/app/__pycache__",
"/home/user/app/data/__pycache__",
]
for p in cache_paths:
if os.path.exists(p):
shutil.rmtree(p, ignore_errors=True)
if used > threshold_gb:
print(f"⚠️ Usage {used:.2f} GB > {threshold_gb} GB β€” performing aggressive cleanup.")
preserve = {"faiss.index", "faiss.index.meta.json", "glossary.json"}
folders = ["/home/user/app/data/docs_cache", "/home/user/app/tmp_docs", "/home/user/app/persistent"]
for folder in folders:
if os.path.exists(folder):
for f in glob.glob(os.path.join(folder, "*")):
if os.path.basename(f) in preserve:
continue
try:
if os.path.isfile(f):
os.remove(f)
else:
shutil.rmtree(f, ignore_errors=True)
except Exception:
pass
print("🧹 Aggressive cleanup complete.")
print(f"✨ Disk after cleanup: {_used_gb():.2f} GB\n")
shutil.rmtree("/home/user/app/runtime_faiss", ignore_errors=True)
_prelaunch_cleanup()
# ==========================================================
# MAIN APP β€” Clinical Trial Chatbot
# ==========================================================
import gradio as gr
from sentence_transformers import SentenceTransformer
from core.hybrid_retriever import summarize_combined
APP_TITLE = "🧠 Clinical Trial Basics"
APP_DESC = (
"Ask any clinical trial or GCP-related question. "
"Retrieves and summarizes from ICH, GCDMP, EMA, FDA, Excel, and Web datasets."
)
# Detect deployment mode
PUBLIC_MODE = os.environ.get("PUBLIC_MODE", "true").lower() == "true"
ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
ADMIN_PASS = os.environ.get("ADMIN_PASS", "changeme")
print(f"πŸ” Running in {'PUBLIC' if PUBLIC_MODE else 'ADMIN'} mode.")
# ----------------------------------------------------------
# ADMIN AUTHENTICATION HELPER
# ----------------------------------------------------------
def check_admin_login(username, password):
"""Authenticate admin before showing rebuild/clear tools."""
return username == ADMIN_USER and password == ADMIN_PASS
# ----------------------------------------------------------
# MAINTENANCE FUNCTIONS
# ----------------------------------------------------------
import shutil, json, faiss, pandas as pd, numpy as np
DATA_PATHS = [
"/home/user/app/persistent/faiss.index",
"/home/user/app/persistent/faiss.index.meta.json",
"/home/user/app/data/docs_cache",
]
def clear_index():
removed = []
for p in DATA_PATHS:
if os.path.isdir(p):
shutil.rmtree(p, ignore_errors=True)
removed.append(f"πŸ—‘οΈ Deleted folder: {p}")
elif os.path.exists(p):
os.remove(p)
removed.append(f"πŸ—‘οΈ Deleted file: {p}")
msg = "\n".join(removed) if removed else "ℹ️ No cache files found."
print(msg)
return msg
def rebuild_index():
try:
from core.vector_sync import rebuild_faiss_from_glossary, _upload_to_dataset
import pandas as pd, faiss, numpy as np
from sentence_transformers import SentenceTransformer
print("🧠 Rebuilding FAISS index (Glossary + Excel + Web)...")
# ... (you can keep your current detailed rebuild logic here)
return "βœ… Rebuild complete (placeholder logic)."
except Exception as e:
return f"⚠️ Rebuild failed: {e}"
def rebuild_glossary():
try:
from core.glossary_builder import rebuild_and_upload
rebuild_and_upload()
return "βœ… Glossary rebuilt and uploaded successfully."
except Exception as e:
return f"⚠️ Glossary rebuild failed: {e}"
# ----------------------------------------------------------
# CHATBOT CORE
# ----------------------------------------------------------
def chat_answer(query, mode="short"):
try:
query_clean = query.strip()
if not query_clean:
return "<i>⚠️ Please enter a valid query.</i>"
return summarize_combined(query_clean, mode=mode)
except Exception as e:
print("❌ Chatbot error:", e)
return f"<i>⚠️ Error: {e}</i>"
# ----------------------------------------------------------
# GRADIO UI
# ----------------------------------------------------------
with gr.Blocks(theme="gradio/soft") as demo:
gr.Markdown(f"# {APP_TITLE}")
gr.Markdown(APP_DESC)
query_box = gr.Textbox(
label="Ask your clinical trial question",
placeholder="e.g. What is an eCRF?",
lines=2,
show_label=True
)
output_box = gr.HTML(label="Answer")
with gr.Row():
submit_btn = gr.Button("πŸš€ Submit", variant="primary")
# Only show admin tools if not in PUBLIC mode
if not PUBLIC_MODE:
rebuild_btn = gr.Button("πŸ” Rebuild Index")
rebuild_glossary_btn = gr.Button("πŸ“˜ Rebuild Glossary")
clear_btn = gr.Button("🧹 Clear Cache / Index")
submit_btn.click(fn=chat_answer, inputs=[query_box], outputs=output_box)
query_box.submit(fn=chat_answer, inputs=[query_box], outputs=output_box)
if not PUBLIC_MODE:
rebuild_btn.click(fn=rebuild_index, outputs=output_box)
rebuild_glossary_btn.click(fn=rebuild_glossary, outputs=output_box)
clear_btn.click(fn=clear_index, outputs=output_box)
# ----------------------------------------------------------
# LAUNCH APP WITH AUTH
# ----------------------------------------------------------
if __name__ == "__main__":
print("πŸš€ Starting Clinical Trial Chatbot...")
print("🧠 Initializing retriever warm-up...")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
auth=check_admin_login if not PUBLIC_MODE else None
)