Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	| import dotenv | |
| # Load environment variables from .env file | |
| dotenv.load_dotenv() | |
| import streamlit as st | |
| import os | |
| import sys | |
| import pickle | |
| import numpy as np | |
| import spacy # Added to explicitly check for spacy model loading | |
| # --- Custom CSS for reduced whitespace and colors --- | |
| st.markdown( | |
| """ | |
| <style> | |
| /* Reduce top padding for the main Streamlit app container */ | |
| .stApp { | |
| padding-top: 0px; /* Reduced this value to minimize whitespace at the very top */ | |
| padding-bottom: 20px; | |
| } | |
| /* Set a subtle background color for the entire page */ | |
| body { | |
| background-color: #f0f8ff; /* AliceBlue - a very light blue */ | |
| color: #333333; /* Dark gray for text */ | |
| } | |
| /* Style for headers */ | |
| h1, h2, h3, h4, h5, h6 { | |
| color: #1a5276; /* Darker blue for headings */ | |
| } | |
| /* Style for buttons */ | |
| .stButton>button { | |
| background-color: #28a745; /* Green for primary button */ | |
| color: white; | |
| border-radius: 8px; | |
| padding: 10px 20px; | |
| border: none; | |
| box-shadow: 2px 2px 5px rgba(0,0,0,0.2); | |
| transition: background-color 0.3s ease; | |
| } | |
| .stButton>button:hover { | |
| background-color: #218838; /* Darker green on hover */ | |
| } | |
| /* Style for text areas and select boxes */ | |
| .stTextArea textarea, .stSelectbox [data-testid="stSelectbox"] { | |
| border-radius: 8px; | |
| border: 1px solid #cccccc; | |
| } | |
| /* Style for info, success, warning, error boxes */ | |
| .stAlert { | |
| border-radius: 8px; | |
| } | |
| </style> | |
| """, | |
| unsafe_allow_html=True | |
| ) | |
| # --- Global message log --- | |
| # This list will store messages to be displayed in the log expander | |
| app_messages = [] | |
| def log_message(type, message): | |
| """ | |
| Helper function to append messages to the log list and display them prominently | |
| based on their type. | |
| """ | |
| app_messages.append((type, message)) | |
| if type == "error": | |
| st.error(message) | |
| # Add the 'Scripts' directory to the Python path | |
| # This allows importing modules like Query_processing, Retrieval, and Answer_Generation | |
| script_dir = os.path.join(os.path.dirname(__file__), 'Scripts') | |
| log_message("info", f"Attempting to add '{script_dir}' to Python path.") | |
| if script_dir not in sys.path: | |
| sys.path.append(script_dir) | |
| log_message("info", f"'{script_dir}' added to sys.path.") | |
| else: | |
| log_message("info", f"'{script_dir}' already in sys.path.") | |
| # --- Debugging: Check if script files exist --- | |
| script_files_to_check = { | |
| "Query_processing.py": False, | |
| "Retrieval.py": False, | |
| "Answer_Generation.py": False | |
| } | |
| all_scripts_found = True | |
| for script_name in script_files_to_check: | |
| script_path = os.path.join(script_dir, script_name) | |
| if os.path.exists(script_path): | |
| script_files_to_check[script_name] = True | |
| else: | |
| all_scripts_found = False | |
| log_message("error", f"Error: Script file not found at expected path: {script_path}") | |
| if not all_scripts_found: | |
| log_message("error", "One or more essential script files are missing from the 'Scripts' directory. " | |
| "Please ensure your project structure is correct.") | |
| st.stop() # Stop execution if critical files are missing | |
| # Import your core logic modules | |
| try: | |
| from Query_processing import preprocess_query | |
| from Retrieval import Retrieval_averagedQP | |
| from Answer_Generation import answer_generation | |
| log_message("success", "Core modules imported successfully!") | |
| except ImportError as e: | |
| log_message("error", f"Error importing core modules. Make sure 'Scripts' directory is correctly structured and contains " | |
| f"Query_processing.py, Retrieval.py, and Answer_Generation.py. Error: {e}") | |
| st.stop() | |
| # --- Configuration --- | |
| # Set page configuration for a wider layout | |
| st.set_page_config(layout="wide", page_title="Drugbot!", page_icon="💊") | |
| # Define paths to your data and vectors | |
| # These paths are relative to the app.py location | |
| DATASET_PATH = os.path.join(os.path.dirname(__file__), 'Datasets', 'flattened_drug_dataset_cleaned.csv') | |
| VECTORS_DIR = os.path.join(os.path.dirname(__file__), 'Vectors') | |
| FAISS_INDEX_PATH = os.path.join(VECTORS_DIR, 'faiss_index.idx') | |
| DOC_METADATA_PATH = os.path.join(VECTORS_DIR, 'doc_metadata.pkl') | |
| DOC_VECTORS_PATH = os.path.join(VECTORS_DIR, 'doc_vectors.npy') | |
| # --- Cached Resources --- | |
| # Use st.cache_resource to load heavy models and data only once | |
| def load_all_assets(): | |
| """ | |
| Verifies the existence of necessary files and attempts to load core NLP models. | |
| This function will be run only once across all user sessions. | |
| """ | |
| with st.spinner("Verifying medical knowledge base and models... This might take a moment."): | |
| try: | |
| # 1. Check for presence of FAISS and embedding files | |
| if not os.path.exists(FAISS_INDEX_PATH): | |
| log_message("error", f"Missing FAISS index file: {FAISS_INDEX_PATH}") | |
| return False | |
| if not os.path.exists(DOC_METADATA_PATH): | |
| log_message("error", f"Missing document metadata file: {DOC_METADATA_PATH}") | |
| return False | |
| if not os.path.exists(DOC_VECTORS_PATH): | |
| log_message("error", f"Missing document vectors file: {DOC_VECTORS_PATH}") | |
| return False | |
| # 2. Attempt to load the SciSpaCy model (if Query_processing doesn't handle it globally) | |
| # This is a common point of failure, so we'll explicitly check. | |
| # Assuming 'en_core_sci_md' is the model name. | |
| try: | |
| # If spacy.load() is called multiple times, it might cause issues. | |
| # It's better if Query_processing handles its own model loading once. | |
| # This check is just to ensure the model is loadable. | |
| # nlp = spacy.load("en_core_sci_md") | |
| # del nlp # Release the model if it's not needed globally here | |
| log_message("info", "SciSpaCy 'en_core_sci_md' model is expected to be loaded by Query_processing.") | |
| except OSError: | |
| log_message("error", "SciSpaCy 'en_core_sci_md' model not found or linked. " | |
| "Please ensure it's installed correctly (e.g., `pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.4/en_core_sci_md-0.5.4.tar.gz`).") | |
| return False | |
| except Exception as e: | |
| log_message("error", f"An unexpected error occurred while checking SciSpaCy model: {e}") | |
| return False | |
| log_message("success", "Medical knowledge base files verified. Models will be loaded as needed.") | |
| return True # Indicate successful verification | |
| except Exception as e: | |
| log_message("error", f"Failed to verify assets. Please ensure all data and vector files are in their correct paths. Error: {e}") | |
| return False | |
| # Load all assets at the start of the application | |
| assets_loaded = load_all_assets() | |
| # --- Title and Header --- | |
| st.title("💊 DrugBot") | |
| st.markdown("---") | |
| # --- Instructions --- | |
| # This section is already placed directly after the title and horizontal rule. | |
| st.header("How to Use:") | |
| st.write( | |
| """ | |
| Welcome to DrugBot - Retrieval based Medical Drug QA Chatbot! You can ask questions about medical drugs, and I will retrieve | |
| information from a verified database to provide accurate answers. | |
| 1. **Select an example query** from the dropdown or **type your own question** in the text area below. | |
| 2. Click the **"Get Answer"** button. | |
| 3. Wait for the chatbot to process your query and generate an answer. | |
| """ | |
| ) | |
| st.markdown("---") | |
| # --- Example Queries --- | |
| st.header("Try These Examples:") | |
| example_queries = [ | |
| "Select an example query...", | |
| "What is the dosage for Azithromycin?", | |
| "What are the side effects of Ibuprofen?", | |
| "How should I take Amoxicillin?", | |
| "What are the precautions for Warfarin?", | |
| "What are the drug interactions for Metformin?", | |
| "What is Paracetamol used for?", | |
| "Can pregnant women take Aspirin?", | |
| "How does Prednisone work?", | |
| "What is the recommended dose for children for Tylenol?" | |
| ] | |
| selected_example = st.selectbox( | |
| "Choose a pre-defined question:", | |
| example_queries | |
| ) | |
| user_query = st.text_area( | |
| "Or type your question here:", | |
| value="" if selected_example == "Select an example query..." else selected_example, | |
| height=100, | |
| placeholder="e.g., What is the dosage for Azithromycin?" | |
| ) | |
| # --- Chatbot Interaction --- | |
| if st.button("Get Answer", type="primary"): | |
| if not assets_loaded: | |
| log_message("error", "Application assets failed to verify. Please check the console for errors.") | |
| elif not user_query.strip(): | |
| log_message("warning", "Please enter a question or select an example query.") | |
| else: | |
| # Check for Groq API Key | |
| if "GROQ_API_KEY" not in os.environ: | |
| log_message("error", "GROQ_API_KEY environment variable not set. Please set it to use the chatbot.") | |
| else: | |
| with st.spinner("Thinking... Retrieving and generating answer..."): | |
| try: | |
| # 1. Preprocess Query | |
| # Query_processing.py should handle its own spacy model loading. | |
| (intent, sub_intent), entities = preprocess_query(user_query) | |
| log_message("info", f"Detected Intent: {intent}, Sub-Intent: {sub_intent}, Entities: {entities}") | |
| # 2. Retrieve Chunks | |
| # Retrieval_averagedQP is expected to load FAISS index and vectors internally. | |
| chunks = Retrieval_averagedQP(user_query, intent, entities) | |
| if not chunks.empty: # Check if chunks DataFrame is not empty | |
| # 3. Generate Answer | |
| answer = answer_generation(user_query, chunks) | |
| log_message("info", f"Generated Answer Content: {answer[:200]}...") # Log first 200 chars | |
| if not answer.strip(): # Check if answer is empty after stripping whitespace | |
| log_message("warning", "Answer generation returned an empty response.") | |
| st.warning("Could not generate a clear answer for this query. Please try rephrasing.") | |
| else: | |
| log_message("success", "Answer generated successfully!") | |
| st.success("Answer:") # Display success message | |
| st.write(answer) # This prints the answer in the main area | |
| with st.expander("See Retrieved Chunks (for debugging/transparency)"): | |
| st.write("Top 3 Retrieved Chunks:") | |
| for i, chunk in enumerate(chunks.head(3).to_dict(orient='records')): # Display top 3 for brevity | |
| st.write(f"**Chunk {i+1}:**") | |
| st.json(chunk) # Use st.json for better display of dict | |
| st.markdown("---") | |
| else: | |
| log_message("warning", "No relevant information found for your query. Please try rephrasing.") | |
| except Exception as e: | |
| log_message("error", f"An error occurred while processing your request: {e}") | |
| st.info("Please try again or rephrase your question.") # User-friendly message | |
| st.markdown("---") | |
| # --- About Section --- | |
| st.header("About This Project") | |
| with st.expander("Learn More About the Medical Drug QA Chatbot"): | |
| st.markdown( | |
| """ | |
| This project implements a **Retrieval-Based Question Answering (QA) system** designed to answer user queries | |
| about medical drugs. It aims to provide accurate and factually grounded information by retrieving relevant | |
| details from a verified database. | |
| ### Purpose | |
| With the rapid increase in approved medications, ensuring factual accuracy in medical information is critical. | |
| Traditional Large Language Models (LLMs) can sometimes "hallucinate" or provide untraceable answers. | |
| Our system addresses this by grounding its responses in a curated database, ensuring factual consistency | |
| and increasing user trust. | |
| ### Methodology | |
| The system follows a multi-stage pipeline: | |
| 1. **Data Acquisition & Preprocessing:** Information about 2,755 drugs was web-scraped from MayoClinic.com, | |
| cleaned, and flattened into a structured CSV dataset. | |
| 2. **Embedding Generation:** The dataset content is embedded using the **MiniLM-V6** model, and indexed | |
| with **FAISS** (Facebook AI Similarity Search) for efficient similarity-based retrieval. | |
| 3. **Query Processing:** User queries undergo **intent and sub-intent classification** (e.g., identifying if | |
| the user is asking about "side effects" or "dosage") and **Named Entity Recognition (NER)** using SciSpaCy | |
| to improve retrieval precision. | |
| 4. **Retrieval Pipeline:** | |
| * **Query Vectorization:** The user query is vectorized using MiniLM-V6, incorporating weighted intent vectors. | |
| * **Initial Retrieval:** FAISS is used to retrieve the top 10 most similar document chunks. | |
| * **Reranking:** The retrieved chunks are then reranked using **Sentence-BioBERT**, which excels at | |
| capturing biomedical contexts, significantly improving the relevance of the final selected documents. | |
| 5. **Answer Generation:** The top 3 reranked context chunks, along with the original query, are fed to the | |
| **LLaMA-4 model** (via Groq API). The LLM is prompted to generate an answer *strictly based on the | |
| provided context*, minimizing hallucination. | |
| ### Models Used | |
| * **MiniLM-L6-v2:** For FAISS-based vector retrieval. | |
| * **Sentence-BioBERT:** For reranking candidate chunks. | |
| * **LLaMA-4:** For final answer generation (accessed via Groq API). | |
| * **SciSpaCy:** For Named Entity Recognition and intent classification. | |
| This project was developed by Niranjan Sathish and Hariharan Chandrasekar. | |
| """ | |
| ) | |
| # --- Repository Link Button (Placeholder) --- | |
| st.markdown("---") | |
| st.write("### Project Resources") | |
| st.markdown( | |
| """ | |
| Once the project is hosted, you'll find links to the repository or Hugging Face Space here. | |
| """ | |
| ) | |
| # Placeholder for the actual button. You can uncomment and update this later. | |
| # if st.button("Go to GitHub Repository"): | |
| # st.markdown("[GitHub Repository Link](YOUR_GITHUB_REPO_URL_HERE)") | |
| # if st.button("Go to Hugging Face Space"): | |
| # st.markdown("[Hugging Face Space Link](YOUR_HUGGING_FACE_SPACE_URL_HERE)") | |
| # --- Application Logs Section --- | |
| st.markdown("---") | |
| st.header("Application Logs") | |
| with st.expander("Show/Hide Logs"): | |
| if app_messages: | |
| for msg_type, msg_content in app_messages: | |
| if msg_type == "info": | |
| st.info(msg_content) | |
| elif msg_type == "success": | |
| st.success(msg_content) | |
| elif msg_type == "warning": | |
| st.warning(msg_content) | |
| elif msg_type == "error": | |
| st.error(msg_content) | |
| else: | |
| st.write("No application messages yet.") | |