Spaces:
Running
Running
| # ========================================================== | |
| # SAFE-MODE PRELAUNCH CLEANUP (runs before any heavy imports) | |
| # ========================================================== | |
| import os, shutil, time, glob | |
| def _prelaunch_cleanup(threshold_gb=45.0): | |
| """Early cleanup to prevent Hugging Face Space eviction (50 GB limit).""" | |
| def _used_gb(path="/home/user/app"): | |
| try: | |
| total, used, free = shutil.disk_usage(path) | |
| used_gb = max(0.0, min(used / (1024**3), 49.9)) | |
| return used_gb | |
| except Exception: | |
| return 0.0 | |
| used = _used_gb() | |
| print(f"\nπΎ Startup disk usage: {used:.2f} GB") | |
| cache_paths = [ | |
| os.path.expanduser("~/.cache/huggingface"), | |
| os.path.expanduser("~/.cache/hfhub"), | |
| "/home/user/.cache/huggingface", | |
| "/home/user/.cache", | |
| "/home/user/app/__pycache__", | |
| "/home/user/app/data/__pycache__", | |
| ] | |
| for p in cache_paths: | |
| if os.path.exists(p): | |
| shutil.rmtree(p, ignore_errors=True) | |
| if used > threshold_gb: | |
| print(f"β οΈ Usage {used:.2f} GB > {threshold_gb} GB β performing aggressive cleanup.") | |
| preserve = {"faiss.index", "faiss.index.meta.json", "glossary.json"} | |
| folders = ["/home/user/app/data/docs_cache", "/home/user/app/tmp_docs", "/home/user/app/persistent"] | |
| for folder in folders: | |
| if os.path.exists(folder): | |
| for f in glob.glob(os.path.join(folder, "*")): | |
| if os.path.basename(f) in preserve: | |
| continue | |
| try: | |
| if os.path.isfile(f): | |
| os.remove(f) | |
| else: | |
| shutil.rmtree(f, ignore_errors=True) | |
| except Exception: | |
| pass | |
| print("π§Ή Aggressive cleanup complete.") | |
| print(f"β¨ Disk after cleanup: {_used_gb():.2f} GB\n") | |
| shutil.rmtree("/home/user/app/runtime_faiss", ignore_errors=True) | |
| _prelaunch_cleanup() | |
| # ========================================================== | |
| # MAIN APP β Clinical Trial Chatbot | |
| # ========================================================== | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| from core.hybrid_retriever import summarize_combined | |
| APP_TITLE = "π§ Clinical Trial Basics" | |
| APP_DESC = ( | |
| "Ask any clinical trial or GCP-related question. " | |
| "Retrieves and summarizes from ICH, GCDMP, EMA, FDA, Excel, and Web datasets." | |
| ) | |
| # Detect deployment mode | |
| PUBLIC_MODE = os.environ.get("PUBLIC_MODE", "true").lower() == "true" | |
| ADMIN_USER = os.environ.get("ADMIN_USER", "admin") | |
| ADMIN_PASS = os.environ.get("ADMIN_PASS", "changeme") | |
| print(f"π Running in {'PUBLIC' if PUBLIC_MODE else 'ADMIN'} mode.") | |
| # ---------------------------------------------------------- | |
| # ADMIN AUTHENTICATION HELPER | |
| # ---------------------------------------------------------- | |
| def check_admin_login(username, password): | |
| """Authenticate admin before showing rebuild/clear tools.""" | |
| return username == ADMIN_USER and password == ADMIN_PASS | |
| # ---------------------------------------------------------- | |
| # MAINTENANCE FUNCTIONS | |
| # ---------------------------------------------------------- | |
| import shutil, json, faiss, pandas as pd, numpy as np | |
| DATA_PATHS = [ | |
| "/home/user/app/persistent/faiss.index", | |
| "/home/user/app/persistent/faiss.index.meta.json", | |
| "/home/user/app/data/docs_cache", | |
| ] | |
| def clear_index(): | |
| removed = [] | |
| for p in DATA_PATHS: | |
| if os.path.isdir(p): | |
| shutil.rmtree(p, ignore_errors=True) | |
| removed.append(f"ποΈ Deleted folder: {p}") | |
| elif os.path.exists(p): | |
| os.remove(p) | |
| removed.append(f"ποΈ Deleted file: {p}") | |
| msg = "\n".join(removed) if removed else "βΉοΈ No cache files found." | |
| print(msg) | |
| return msg | |
| def rebuild_index(): | |
| try: | |
| from core.vector_sync import rebuild_faiss_from_glossary, _upload_to_dataset | |
| import pandas as pd, faiss, numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| print("π§ Rebuilding FAISS index (Glossary + Excel + Web)...") | |
| # ... (you can keep your current detailed rebuild logic here) | |
| return "β Rebuild complete (placeholder logic)." | |
| except Exception as e: | |
| return f"β οΈ Rebuild failed: {e}" | |
| def rebuild_glossary(): | |
| try: | |
| from core.glossary_builder import rebuild_and_upload | |
| rebuild_and_upload() | |
| return "β Glossary rebuilt and uploaded successfully." | |
| except Exception as e: | |
| return f"β οΈ Glossary rebuild failed: {e}" | |
| # ---------------------------------------------------------- | |
| # CHATBOT CORE | |
| # ---------------------------------------------------------- | |
| def chat_answer(query, mode="short"): | |
| try: | |
| query_clean = query.strip() | |
| if not query_clean: | |
| return "<i>β οΈ Please enter a valid query.</i>" | |
| return summarize_combined(query_clean, mode=mode) | |
| except Exception as e: | |
| print("β Chatbot error:", e) | |
| return f"<i>β οΈ Error: {e}</i>" | |
| # ---------------------------------------------------------- | |
| # GRADIO UI | |
| # ---------------------------------------------------------- | |
| with gr.Blocks(theme="gradio/soft") as demo: | |
| gr.Markdown(f"# {APP_TITLE}") | |
| gr.Markdown(APP_DESC) | |
| query_box = gr.Textbox( | |
| label="Ask your clinical trial question", | |
| placeholder="e.g. What is an eCRF?", | |
| lines=2, | |
| show_label=True | |
| ) | |
| output_box = gr.HTML(label="Answer") | |
| with gr.Row(): | |
| submit_btn = gr.Button("π Submit", variant="primary") | |
| # Only show admin tools if not in PUBLIC mode | |
| if not PUBLIC_MODE: | |
| rebuild_btn = gr.Button("π Rebuild Index") | |
| rebuild_glossary_btn = gr.Button("π Rebuild Glossary") | |
| clear_btn = gr.Button("π§Ή Clear Cache / Index") | |
| submit_btn.click(fn=chat_answer, inputs=[query_box], outputs=output_box) | |
| query_box.submit(fn=chat_answer, inputs=[query_box], outputs=output_box) | |
| if not PUBLIC_MODE: | |
| rebuild_btn.click(fn=rebuild_index, outputs=output_box) | |
| rebuild_glossary_btn.click(fn=rebuild_glossary, outputs=output_box) | |
| clear_btn.click(fn=clear_index, outputs=output_box) | |
| # ---------------------------------------------------------- | |
| # LAUNCH APP WITH AUTH | |
| # ---------------------------------------------------------- | |
| if __name__ == "__main__": | |
| print("π Starting Clinical Trial Chatbot...") | |
| print("π§ Initializing retriever warm-up...") | |
| demo.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, | |
| share=False, | |
| auth=check_admin_login if not PUBLIC_MODE else None | |
| ) | |