Spaces:
Running
Running
File size: 6,788 Bytes
f9053c5 df80017 f9053c5 df80017 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 f9053c5 b05b805 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
# ==========================================================
# SAFE-MODE PRELAUNCH CLEANUP (runs before any heavy imports)
# ==========================================================
import os, shutil, time, glob
def _prelaunch_cleanup(threshold_gb=45.0):
"""Early cleanup to prevent Hugging Face Space eviction (50 GB limit)."""
def _used_gb(path="/home/user/app"):
try:
total, used, free = shutil.disk_usage(path)
used_gb = max(0.0, min(used / (1024**3), 49.9))
return used_gb
except Exception:
return 0.0
used = _used_gb()
print(f"\nπΎ Startup disk usage: {used:.2f} GB")
cache_paths = [
os.path.expanduser("~/.cache/huggingface"),
os.path.expanduser("~/.cache/hfhub"),
"/home/user/.cache/huggingface",
"/home/user/.cache",
"/home/user/app/__pycache__",
"/home/user/app/data/__pycache__",
]
for p in cache_paths:
if os.path.exists(p):
shutil.rmtree(p, ignore_errors=True)
if used > threshold_gb:
print(f"β οΈ Usage {used:.2f} GB > {threshold_gb} GB β performing aggressive cleanup.")
preserve = {"faiss.index", "faiss.index.meta.json", "glossary.json"}
folders = ["/home/user/app/data/docs_cache", "/home/user/app/tmp_docs", "/home/user/app/persistent"]
for folder in folders:
if os.path.exists(folder):
for f in glob.glob(os.path.join(folder, "*")):
if os.path.basename(f) in preserve:
continue
try:
if os.path.isfile(f):
os.remove(f)
else:
shutil.rmtree(f, ignore_errors=True)
except Exception:
pass
print("π§Ή Aggressive cleanup complete.")
print(f"β¨ Disk after cleanup: {_used_gb():.2f} GB\n")
shutil.rmtree("/home/user/app/runtime_faiss", ignore_errors=True)
_prelaunch_cleanup()
# ==========================================================
# MAIN APP β Clinical Trial Chatbot
# ==========================================================
import gradio as gr
from sentence_transformers import SentenceTransformer
from core.hybrid_retriever import summarize_combined
APP_TITLE = "π§ Clinical Trial Basics"
APP_DESC = (
"Ask any clinical trial or GCP-related question. "
"Retrieves and summarizes from ICH, GCDMP, EMA, FDA, Excel, and Web datasets."
)
# Detect deployment mode
PUBLIC_MODE = os.environ.get("PUBLIC_MODE", "true").lower() == "true"
ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
ADMIN_PASS = os.environ.get("ADMIN_PASS", "changeme")
print(f"π Running in {'PUBLIC' if PUBLIC_MODE else 'ADMIN'} mode.")
# ----------------------------------------------------------
# ADMIN AUTHENTICATION HELPER
# ----------------------------------------------------------
def check_admin_login(username, password):
"""Authenticate admin before showing rebuild/clear tools."""
return username == ADMIN_USER and password == ADMIN_PASS
# ----------------------------------------------------------
# MAINTENANCE FUNCTIONS
# ----------------------------------------------------------
import shutil, json, faiss, pandas as pd, numpy as np
DATA_PATHS = [
"/home/user/app/persistent/faiss.index",
"/home/user/app/persistent/faiss.index.meta.json",
"/home/user/app/data/docs_cache",
]
def clear_index():
removed = []
for p in DATA_PATHS:
if os.path.isdir(p):
shutil.rmtree(p, ignore_errors=True)
removed.append(f"ποΈ Deleted folder: {p}")
elif os.path.exists(p):
os.remove(p)
removed.append(f"ποΈ Deleted file: {p}")
msg = "\n".join(removed) if removed else "βΉοΈ No cache files found."
print(msg)
return msg
def rebuild_index():
try:
from core.vector_sync import rebuild_faiss_from_glossary, _upload_to_dataset
import pandas as pd, faiss, numpy as np
from sentence_transformers import SentenceTransformer
print("π§ Rebuilding FAISS index (Glossary + Excel + Web)...")
# ... (you can keep your current detailed rebuild logic here)
return "β
Rebuild complete (placeholder logic)."
except Exception as e:
return f"β οΈ Rebuild failed: {e}"
def rebuild_glossary():
try:
from core.glossary_builder import rebuild_and_upload
rebuild_and_upload()
return "β
Glossary rebuilt and uploaded successfully."
except Exception as e:
return f"β οΈ Glossary rebuild failed: {e}"
# ----------------------------------------------------------
# CHATBOT CORE
# ----------------------------------------------------------
def chat_answer(query, mode="short"):
try:
query_clean = query.strip()
if not query_clean:
return "<i>β οΈ Please enter a valid query.</i>"
return summarize_combined(query_clean, mode=mode)
except Exception as e:
print("β Chatbot error:", e)
return f"<i>β οΈ Error: {e}</i>"
# ----------------------------------------------------------
# GRADIO UI
# ----------------------------------------------------------
with gr.Blocks(theme="gradio/soft") as demo:
gr.Markdown(f"# {APP_TITLE}")
gr.Markdown(APP_DESC)
query_box = gr.Textbox(
label="Ask your clinical trial question",
placeholder="e.g. What is an eCRF?",
lines=2,
show_label=True
)
output_box = gr.HTML(label="Answer")
with gr.Row():
submit_btn = gr.Button("π Submit", variant="primary")
# Only show admin tools if not in PUBLIC mode
if not PUBLIC_MODE:
rebuild_btn = gr.Button("π Rebuild Index")
rebuild_glossary_btn = gr.Button("π Rebuild Glossary")
clear_btn = gr.Button("π§Ή Clear Cache / Index")
submit_btn.click(fn=chat_answer, inputs=[query_box], outputs=output_box)
query_box.submit(fn=chat_answer, inputs=[query_box], outputs=output_box)
if not PUBLIC_MODE:
rebuild_btn.click(fn=rebuild_index, outputs=output_box)
rebuild_glossary_btn.click(fn=rebuild_glossary, outputs=output_box)
clear_btn.click(fn=clear_index, outputs=output_box)
# ----------------------------------------------------------
# LAUNCH APP WITH AUTH
# ----------------------------------------------------------
if __name__ == "__main__":
print("π Starting Clinical Trial Chatbot...")
print("π§ Initializing retriever warm-up...")
demo.launch(
server_name="0.0.0.0",
server_port=7860,
share=False,
auth=check_admin_login if not PUBLIC_MODE else None
)
|