File size: 6,788 Bytes
f9053c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df80017
f9053c5
df80017
f9053c5
 
 
b05b805
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9053c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b05b805
 
 
 
 
 
f9053c5
b05b805
f9053c5
 
 
 
 
 
 
 
 
 
b05b805
f9053c5
b05b805
f9053c5
 
 
 
 
 
 
 
 
 
b05b805
f9053c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b05b805
 
 
 
 
 
f9053c5
b05b805
f9053c5
b05b805
 
 
 
f9053c5
 
b05b805
f9053c5
 
 
 
b05b805
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# ==========================================================
# SAFE-MODE PRELAUNCH CLEANUP (runs before any heavy imports)
# ==========================================================
import os, shutil, time, glob

def _prelaunch_cleanup(threshold_gb=45.0):
    """Early cleanup to prevent Hugging Face Space eviction (50 GB limit)."""
    def _used_gb(path="/home/user/app"):
        try:
            total, used, free = shutil.disk_usage(path)
            used_gb = max(0.0, min(used / (1024**3), 49.9))
            return used_gb
        except Exception:
            return 0.0

    used = _used_gb()
    print(f"\nπŸ’Ύ Startup disk usage: {used:.2f} GB")

    cache_paths = [
        os.path.expanduser("~/.cache/huggingface"),
        os.path.expanduser("~/.cache/hfhub"),
        "/home/user/.cache/huggingface",
        "/home/user/.cache",
        "/home/user/app/__pycache__",
        "/home/user/app/data/__pycache__",
    ]
    for p in cache_paths:
        if os.path.exists(p):
            shutil.rmtree(p, ignore_errors=True)

    if used > threshold_gb:
        print(f"⚠️ Usage {used:.2f} GB > {threshold_gb} GB β€” performing aggressive cleanup.")
        preserve = {"faiss.index", "faiss.index.meta.json", "glossary.json"}
        folders = ["/home/user/app/data/docs_cache", "/home/user/app/tmp_docs", "/home/user/app/persistent"]
        for folder in folders:
            if os.path.exists(folder):
                for f in glob.glob(os.path.join(folder, "*")):
                    if os.path.basename(f) in preserve:
                        continue
                    try:
                        if os.path.isfile(f):
                            os.remove(f)
                        else:
                            shutil.rmtree(f, ignore_errors=True)
                    except Exception:
                        pass
        print("🧹 Aggressive cleanup complete.")

    print(f"✨ Disk after cleanup: {_used_gb():.2f} GB\n")
    shutil.rmtree("/home/user/app/runtime_faiss", ignore_errors=True)

_prelaunch_cleanup()

# ==========================================================
# MAIN APP β€” Clinical Trial Chatbot
# ==========================================================
import gradio as gr
from sentence_transformers import SentenceTransformer
from core.hybrid_retriever import summarize_combined

APP_TITLE = "🧠 Clinical Trial Basics"
APP_DESC = (
    "Ask any clinical trial or GCP-related question. "
    "Retrieves and summarizes from ICH, GCDMP, EMA, FDA, Excel, and Web datasets."
)

# Detect deployment mode
PUBLIC_MODE = os.environ.get("PUBLIC_MODE", "true").lower() == "true"
ADMIN_USER = os.environ.get("ADMIN_USER", "admin")
ADMIN_PASS = os.environ.get("ADMIN_PASS", "changeme")

print(f"πŸ” Running in {'PUBLIC' if PUBLIC_MODE else 'ADMIN'} mode.")

# ----------------------------------------------------------
# ADMIN AUTHENTICATION HELPER
# ----------------------------------------------------------
def check_admin_login(username, password):
    """Authenticate admin before showing rebuild/clear tools."""
    return username == ADMIN_USER and password == ADMIN_PASS

# ----------------------------------------------------------
# MAINTENANCE FUNCTIONS
# ----------------------------------------------------------
import shutil, json, faiss, pandas as pd, numpy as np

DATA_PATHS = [
    "/home/user/app/persistent/faiss.index",
    "/home/user/app/persistent/faiss.index.meta.json",
    "/home/user/app/data/docs_cache",
]

def clear_index():
    removed = []
    for p in DATA_PATHS:
        if os.path.isdir(p):
            shutil.rmtree(p, ignore_errors=True)
            removed.append(f"πŸ—‘οΈ Deleted folder: {p}")
        elif os.path.exists(p):
            os.remove(p)
            removed.append(f"πŸ—‘οΈ Deleted file: {p}")
    msg = "\n".join(removed) if removed else "ℹ️ No cache files found."
    print(msg)
    return msg

def rebuild_index():
    try:
        from core.vector_sync import rebuild_faiss_from_glossary, _upload_to_dataset
        import pandas as pd, faiss, numpy as np
        from sentence_transformers import SentenceTransformer
        print("🧠 Rebuilding FAISS index (Glossary + Excel + Web)...")
        # ... (you can keep your current detailed rebuild logic here)
        return "βœ… Rebuild complete (placeholder logic)."
    except Exception as e:
        return f"⚠️ Rebuild failed: {e}"

def rebuild_glossary():
    try:
        from core.glossary_builder import rebuild_and_upload
        rebuild_and_upload()
        return "βœ… Glossary rebuilt and uploaded successfully."
    except Exception as e:
        return f"⚠️ Glossary rebuild failed: {e}"

# ----------------------------------------------------------
# CHATBOT CORE
# ----------------------------------------------------------
def chat_answer(query, mode="short"):
    try:
        query_clean = query.strip()
        if not query_clean:
            return "<i>⚠️ Please enter a valid query.</i>"
        return summarize_combined(query_clean, mode=mode)
    except Exception as e:
        print("❌ Chatbot error:", e)
        return f"<i>⚠️ Error: {e}</i>"

# ----------------------------------------------------------
# GRADIO UI
# ----------------------------------------------------------
with gr.Blocks(theme="gradio/soft") as demo:
    gr.Markdown(f"# {APP_TITLE}")
    gr.Markdown(APP_DESC)

    query_box = gr.Textbox(
        label="Ask your clinical trial question",
        placeholder="e.g. What is an eCRF?",
        lines=2,
        show_label=True
    )
    output_box = gr.HTML(label="Answer")

    with gr.Row():
        submit_btn = gr.Button("πŸš€ Submit", variant="primary")

        # Only show admin tools if not in PUBLIC mode
        if not PUBLIC_MODE:
            rebuild_btn = gr.Button("πŸ” Rebuild Index")
            rebuild_glossary_btn = gr.Button("πŸ“˜ Rebuild Glossary")
            clear_btn = gr.Button("🧹 Clear Cache / Index")

    submit_btn.click(fn=chat_answer, inputs=[query_box], outputs=output_box)
    query_box.submit(fn=chat_answer, inputs=[query_box], outputs=output_box)

    if not PUBLIC_MODE:
        rebuild_btn.click(fn=rebuild_index, outputs=output_box)
        rebuild_glossary_btn.click(fn=rebuild_glossary, outputs=output_box)
        clear_btn.click(fn=clear_index, outputs=output_box)

# ----------------------------------------------------------
# LAUNCH APP WITH AUTH
# ----------------------------------------------------------
if __name__ == "__main__":
    print("πŸš€ Starting Clinical Trial Chatbot...")
    print("🧠 Initializing retriever warm-up...")
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        auth=check_admin_login if not PUBLIC_MODE else None
    )