Spaces:

Alpha00444
/

ClinicalTrialDiscoveryRAGProject

Sleeping

App Files Files Community

AlphaAngel444 commited on Jul 6

Commit

798fe27

verified ·

1 Parent(s): d075535

Upload 5 files

Browse files

Files changed (5) hide show

README.md +21 -14
app.py +109 -0
env.download +3 -0
requirements.txt +8 -3
service-account-key (1).json +13 -0

README.md CHANGED Viewed

@@ -1,20 +1,27 @@
 ---
-title: ClinicalTrialDiscoveryRAGProject
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
 pinned: false
-short_description: Clinical Trial Discovery System by RAG
-license: mit
 ---
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

 ---
+title: Clinical Trial Discovery App
+emoji: 🧬
+colorFrom: blue
+colorTo: indigo
+sdk: streamlit
+sdk_version: 1.46.1
+app_file: app.py
 pinned: false
 ---
+# 🧠 Clinical Trial Discovery (RAG App)
+This is a Streamlit-based RAG (Retrieval-Augmented Generation) app for discovering clinical trials using natural language queries. It uses:
+- ✅ BioBERT-SBERT for semantic embeddings
+- ✅ Pinecone for fast vector search
+- ✅ Mistral-7B-Instruct for generating answers - This might change
+- ✅ Firestore for storing user bookmarks only
+## 🚀 Features
+- 🔍 Ask natural questions (e.g., "thymus cancer for women")
+- 🧠 Get LLM-generated answers based on real trial descriptions
+- ⭐ Bookmark trials (saved to Firestore)
+- 🧭 Tabbed layout for easy navigation

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import streamlit as st # for web UI creation
+from sentence_transformers import SentenceTransformer # this is for embedding queries into dense vectors
+from pinecone import Pinecone, ServerlessSpec # for accessing pinecone vector DB
+import os # for readhing environment variable
+from langchain_huggingface import HuggingFaceEndpoint # for accessing HuggingFace inference endpoint
+from langchain.prompts import PromptTemplate
+import firebase_admin  # for access to firebase
+from firebase_admin import credentials, firestore
+from dotenv import load_dotenv
+# === Load environment variables ===
+load_dotenv(".env.local")
+# === CONFIG ===
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+PINECONE_ENV = os.getenv("PINECONE_ENV")
+HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
+USER_ID = "demo_user" # static user for testing
+# === Firebase Setup ===
+if not firebase_admin._apps:
+    cred = credentials.Certificate("service-account-key.json")
+    firebase_admin.initialize_app(cred)
+db = firestore.client()
+# === Pinecone Setup ===
+pc = Pinecone(api_key=PINECONE_API_KEY)
+INDEX_NAME = "clinical-trials-rag"
+index = pc.Index(INDEX_NAME)
+# === Embedding Model ===
+embed_model = SentenceTransformer("pritamdeka/BioBERT-mnli-snli-scinli-scitail-mednli-stsb") # BioBERT sentence transformer model
+# === LLM Setup ===
+llm = HuggingFaceEndpoint(
+    endpoint_url="https://f9eftfrz5qna6j32.us-east-1.aws.endpoints.huggingface.cloud", # Inference Endpoint Built from Hugging Face. Pay per hour.
+    huggingfacehub_api_token=HF_TOKEN,
+    temperature=0.7,
+    max_new_tokens=256
+)
+prompt_template = PromptTemplate.from_template(
+    "Context:\n{context}\n\nQuestion: {question}\nAnswer:"
+)
+# === Tabs ===
+tab1, tab2 = st.tabs(["🔍 Ask a Question", "⭐ Bookmarked Trials"])
+# === TAB 1: Question tab page with sample questions to guide the user ===
+with tab1:
+    st.title("Clinical Trial Discovery")
+    st.markdown("""
+    💡 **Example question formats:**
+    - What clinical trials are available for non-small cell lung cancer in California?
+    - List phase 3 trials for Type 1 Diabetes recruiting in 2025.
+    - What studies on immunotherapy for melanoma are active in Europe?
+    - Are there trials targeting heart disease patients over 65?
+    """)
+    user_query = st.text_input("🔍 Enter your clinical trial questions below:") # actual query input part
+    if user_query: # triggers query upon user type action
+        with st.spinner("Retrieving relevant trials..."): # display spinner while pinecone DB being searched
+            vec = embed_model.encode(user_query).tolist() # embed query using the BioBERT sentence transformer
+            results = index.query(vector=vec, top_k=5, include_metadata=True) # search pinecone vector DB. Look for 5 most similar vectors
+            contexts = [r["metadata"]["text"] for r in results["matches"]]
+            nct_ids = [r["metadata"].get("nct_id", "") for r in results["matches"]]
+        # Prep the prompt for the LLM
+        joined_context = "\n".join(contexts) # joins the retrieved trial summary into one contextual block
+        prompt = prompt_template.format(context=joined_context, question=user_query) # fills prompt template
+        # this part calls the LLM endpoint to generate the answer
+        with st.spinner("Generating answer..."):
+            answer = llm(prompt)
+        st.subheader("🧠 Answer:") # display answer in UI
+        st.write(answer)
+        st.markdown("---") # display the related trials under the answer
+        st.subheader("📋 Related Clinical Trials")
+        for i, match in enumerate(results["matches"]): # loop through pinecone search results and display them
+            meta = match["metadata"]
+            nct_id = meta.get("nct_id", f"chunk_{i}") # assigns fallback chuck ID if 'nct_id' is missing
+            chunk_text = meta.get("text", "")[:400] # shows the first 400 characters of the trial chunk
+            with st.expander(f"Trial: {nct_id}"): # create an expandable block for each trial
+                st.write(chunk_text + "...")
+                # add bookmark button instead each expander. Book marks are saved to /users/demo_user/Bookmarks/{nct_id}
+                if st.button(f"⭐ Bookmark {nct_id}", key=f"bookmark_{i}"):
+                    db.collection("Users").document(USER_ID).collection("Bookmarks").document(nct_id).set({
+                          "nct_id": nct_id,
+                          "text": chunk_text
+                      })
+                    st.success(f"Bookmarked {nct_id} to Firestore.")
+# === TAB 2: Bookmarked Trials ===
+with tab2:
+    st.title("⭐ Your Bookmarked Trials")
+    # retrieve bookmarks from firestore
+    docs = db.collection("Users").document(USER_ID).collection("Bookmarks").stream()
+    bookmarks = [doc.to_dict() for doc in docs]
+    # if no bookmarks, show message.
+    if not bookmarks:
+        st.info("You haven't bookmarked any trials yet.")
+    else: # otherwise display bookmarked trials in expanders
+        for b in bookmarks:
+            with st.expander(f"{b['nct_id']}"):
+                st.write(b["text"])

env.download ADDED Viewed

	@@ -0,0 +1,3 @@

+PINECONE_API_KEY=pcsk_5xJwr6_GLYcR7tgYqCdH1AzCai33hLoESJiw2kQhQjFXfVNguWchmrq4DHrtKuKdWPKsy
+PINECONE_ENV=us-east-1
+INDEX_NAME=clinical-trials-rag

requirements.txt CHANGED Viewed

@@ -1,3 +1,8 @@
-altair
-pandas
-streamlit

+streamlit
+sentence-transformers
+transformers
+langchain
+langchain-community
+langchain-huggingface
+firebase-admin
+pinecone>=3.0.0

service-account-key (1).json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "type": "service_account",
+  "project_id": "clinical-trial-app-3ecb4",
+  "private_key_id": "81877eea413e50fb90f5ab80ac2cfb8ca3ebc2a5",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQD1CyaZWwKwcdYz\nKD6WWav6BojZ7SjSRmOCp/gICcTnNELoJgXSiff592cYCYsXZfFkqlrtvqIFzxQV\nOFUeQD5t7vHsaFiqQ+OWBWAk8fcW5UXa7VyjztaWFxm1BRLkGlVv+dIAvpQPqLaY\n8X7GD22qv/k0MDWNcXWyew9HSa77gOE8fuoSHStRRurWKv8yrmcsFqtLAmBMMeOG\nrfRb0gnLaSd9hDM9iM81BCCX+yOT1bvgnWwVSoa1TOJgaOR0Rg0JjG75myGRNDLW\nTJC7EoBEuwhbL6bKJLZwg3Kt7cFK3n1CVkIrSFPEIbr+KYSNUZgEZePRiaiAMhRZ\nBCPE6UFpAgMBAAECggEAY0guTmoib8zfpYCDwzT3yeiF4A4HwRKF1PUrOE6E+cwh\n458sMr51u1By87lCIqvKHygqiTL4wHPDRkm5qvUKlk3+tPMeIXY6Kplo9+8VVrB0\nGhybsT7nkI4xezdgDQ1iWmkUnfqZULLhsJv/6k1r+Iepd2yk9fbnYfYcHZ0MCZfG\nNSPGQ/H71z+SdzZkZqCcYYQm2lKfsYfHOpwGGz1MVcaskVUNRWPspkIUwRSQ43mY\nb+9U80yAKWVRwLUBSbiz/FK1/hMvlzZoNjoHpqY6/AuwTBiitfLWmIeqgHjSu0/z\ndt6cBSdAvagAIprQ/qC1bUq3SylvZEFIi8UlkBzhNQKBgQD60Y60BuadSXZu8uGZ\nQU3QjsLksHXjDut5YmJyIjcYrYh9Cg54lJ5/z/um7WKdiKxNxBSjUS9NLbkfa5oP\nL2iE0Vsq+WsHfnKHpMg6R2bcc5R8n2QWyZGzsrVCWeYq+o5cjR78ENHcxvmDJkrd\nvJZvJxxXmY3E7AJRo6vBGXwmPwKBgQD6Gw1o+vwX4G0xbA6BJl1ZTjx4PvajNFo9\nMPX+stt+O5J0NcyR/8Kn6t0ef6a5qWCPClMTJpEGWwzLX2lwDzZ24nKNVT1rX26i\nsYnpU87LfMjUwd6k44ydCLHmyN3vh/bmziC2+VVUUgVAfEegsti3/Ihfg5x+oUNV\ne+Ctqks+VwKBgE6i0s3IeBcKCDKivW4yFjZz+9B9Loigjd0BpoHIDmQTS/5/36eY\nWNUTnP9p34gqaHL9LcdCVcUpt6eNMcDfCTLS/HVNu2ufDkNOu2PiLPKi3gPwaQ3n\n5mFjfwatbsc8xNNpfzRiBZnlXCbtI32/eZ6hsXYZc2Qw5k04NkoVNmI7AoGAOGCi\nvDhfXS58zrgx6NDyF/B31w8yX4WslcCUow5ERgc9sy5xZ7PEeD+MCpTxy0Yv+u5z\n3YxDArDBiJKAXP9A4rmW4t8FElAXy1rD4LHAmsQNLVBqVLbqend8Sq6awKTgdhSe\n8T/xCSnX/zpElyfZjFfDkexD+ZN2by2Wbu9FOM8CgYAAzEBP3KZLFLY9oRwchK6u\ngqDTHGfEY1ui6UVqRGUslufG7d8FkU7LW4VPp1s25Xn8RQB1B0Q+LMUYyhFkoT6v\n9IkHwp9rKDwUsYH6puWbCnx3BglX8U/P0f2aaa9Zqxjnm/3SNuw1dparBXnjshN1\ntuY6ALbo6LRFGOyaJUz9bg==\n-----END PRIVATE KEY-----\n",
+  "client_email": "firebase-adminsdk-fbsvc@clinical-trial-app-3ecb4.iam.gserviceaccount.com",
+  "client_id": "101962704350860272575",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/firebase-adminsdk-fbsvc%40clinical-trial-app-3ecb4.iam.gserviceaccount.com",
+  "universe_domain": "googleapis.com"
+}