Spaces:

arjunkmoorthy
/

medbot-space

Running

App Files Files Community

Arjun Moorthy commited on Jul 30

Commit

2720b05

1 Parent(s): da47961

Optimize for hardware constraints - make RAG optional and lightweight

Browse files

Files changed (2) hide show

Oncolife/app.py +59 -52
requirements.txt +1 -11

Oncolife/app.py CHANGED Viewed

@@ -4,7 +4,7 @@ OncoLife Symptom & Triage Assistant
 A medical chatbot that performs both symptom assessment and clinical triage for chemotherapy patients.
 Updated: Using BioMistral-7B base model for medical conversations.
 REBUILD: Simplified to use only base model, no adapters.
-RAG: Added document retrieval capabilities for PDFs and other reference materials.
 """
 import gradio as gr
@@ -15,14 +15,19 @@ from transformers import AutoTokenizer, MistralForCausalLM
 import torch
 from spaces import GPU
-# RAG imports
-import chromadb
-from sentence_transformers import SentenceTransformer
-import PyPDF2
-import pdfplumber
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.embeddings import HuggingFaceEmbeddings
-import fitz  # PyMuPDF for better PDF handling
 # Force GPU detection for HF Spaces
 @GPU
@@ -51,8 +56,18 @@ class OncoLifeAssistant:
         # Load the OncoLife instructions
         self._load_instructions()
-        # Initialize RAG system
-        self._initialize_rag()
     def _load_instructions(self):
         """Load the OncoLife instructions from the text file"""
@@ -70,15 +85,15 @@ class OncoLifeAssistant:
             self.instructions = ""
     def _initialize_rag(self):
-        """Initialize the RAG system with document embeddings"""
         try:
-            print("🔍 Initializing RAG system...")
-            # Initialize embedding model
             self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
             print("✅ Loaded embedding model")
-            # Initialize ChromaDB
             self.chroma_client = chromadb.Client()
             self.collection = self.chroma_client.create_collection(
                 name="oncolife_documents",
@@ -86,19 +101,20 @@ class OncoLifeAssistant:
             )
             print("✅ Initialized ChromaDB collection")
-            # Load and process documents
-            self._load_documents()
         except Exception as e:
             print(f"❌ Error initializing RAG: {e}")
             self.embedding_model = None
             self.collection = None
-    def _load_documents(self):
-        """Load and process all reference documents"""
         try:
             docs_path = Path(__file__).parent / "guideline-docs"
-            print(f"📚 Loading documents from: {docs_path}")
             if not docs_path.exists():
                 print("⚠️ guideline-docs directory not found")
@@ -106,27 +122,14 @@ class OncoLifeAssistant:
             # Text splitter for chunking documents
             text_splitter = RecursiveCharacterTextSplitter(
-                chunk_size=1000,
-                chunk_overlap=200,
                 separators=["\n\n", "\n", ". ", " ", ""]
             )
             documents_loaded = 0
-            # Process PDF files
-            for pdf_file in docs_path.glob("*.pdf"):
-                try:
-                    print(f"📄 Processing PDF: {pdf_file.name}")
-                    text = self._extract_pdf_text(pdf_file)
-                    if text:
-                        chunks = text_splitter.split_text(text)
-                        self._add_chunks_to_db(chunks, pdf_file.name)
-                        documents_loaded += 1
-                        print(f"✅ Added {len(chunks)} chunks from {pdf_file.name}")
-                except Exception as e:
-                    print(f"❌ Error processing {pdf_file.name}: {e}")
-            # Process JSON files
             for json_file in docs_path.glob("*.json"):
                 try:
                     print(f"📄 Processing JSON: {json_file.name}")
@@ -141,7 +144,7 @@ class OncoLifeAssistant:
                 except Exception as e:
                     print(f"❌ Error processing {json_file.name}: {e}")
-            # Process text files
             for txt_file in docs_path.glob("*.txt"):
                 try:
                     print(f"📄 Processing TXT: {txt_file.name}")
@@ -222,10 +225,10 @@ class OncoLifeAssistant:
         except Exception as e:
             print(f"❌ Error adding chunks to database: {e}")
-    def _retrieve_relevant_documents(self, query, top_k=5):
         """Retrieve relevant document chunks for a query"""
         try:
-            if not self.collection or not self.embedding_model:
                 return []
             # Generate query embedding
@@ -254,7 +257,7 @@ class OncoLifeAssistant:
             return []
     def _load_model(self, model_id, gpu_available):
-        """Load the BioMistral base model"""
         try:
             print("🔄 Loading BioMistral base model...")
@@ -275,14 +278,16 @@ class OncoLifeAssistant:
                 trust_remote_code=True
             )
-            # Load the model
             print(f"📦 Loading model: {model_id}")
             self.model = MistralForCausalLM.from_pretrained(
                 model_id,
                 trust_remote_code=True,
                 device_map="auto",
                 torch_dtype=dtype,
-                low_cpu_mem_usage=True
             )
             # Add pad token if not present
@@ -297,7 +302,7 @@ class OncoLifeAssistant:
             self.tokenizer = None
     def generate_oncolife_response(self, user_input, conversation_history):
-        """Generate response using OncoLife instructions and RAG"""
         try:
             if self.model is None or self.tokenizer is None:
                 return """❌ **Model Loading Error**
@@ -311,15 +316,17 @@ Please check the Space logs for details."""
             print(f"🔄 Generating OncoLife response for: {user_input}")
-            # Retrieve relevant documents using RAG
-            relevant_docs = self._retrieve_relevant_documents(user_input, top_k=3)
-            # Format retrieved documents
             context_text = ""
-            if relevant_docs:
-                context_text = "\n\n**Relevant Reference Information:**\n"
-                for i, doc in enumerate(relevant_docs):
-                    context_text += f"\n--- Source: {doc['source']} ---\n{doc['content'][:500]}...\n"
             # Create prompt using the loaded instructions and retrieved context
             system_prompt = f"""You are the OncoLife Symptom & Triage Assistant. Follow these instructions exactly:
@@ -426,7 +433,7 @@ Please try a simpler question or check the logs for more details."""
                 "assistant": assistant_msg
             })
-        # Generate response using OncoLife instructions and RAG
         response = self.generate_oncolife_response(message, conversation_history)
         return response

 A medical chatbot that performs both symptom assessment and clinical triage for chemotherapy patients.
 Updated: Using BioMistral-7B base model for medical conversations.
 REBUILD: Simplified to use only base model, no adapters.
+RAG: Added document retrieval capabilities for PDFs and other reference materials (optional).
 """
 import gradio as gr
 import torch
 from spaces import GPU
+# RAG imports (optional)
+try:
+    import chromadb
+    from sentence_transformers import SentenceTransformer
+    import PyPDF2
+    import pdfplumber
+    from langchain.text_splitter import RecursiveCharacterTextSplitter
+    from langchain.embeddings import HuggingFaceEmbeddings
+    import fitz  # PyMuPDF for better PDF handling
+    RAG_AVAILABLE = True
+except ImportError:
+    print("⚠️ RAG libraries not available, running in instruction-only mode")
+    RAG_AVAILABLE = False
 # Force GPU detection for HF Spaces
 @GPU
         # Load the OncoLife instructions
         self._load_instructions()
+        # Initialize RAG system (optional)
+        self.rag_enabled = False
+        if RAG_AVAILABLE:
+            try:
+                self._initialize_rag()
+                self.rag_enabled = True
+                print("✅ RAG system initialized successfully")
+            except Exception as e:
+                print(f"⚠️ RAG initialization failed: {e}")
+                print("🔄 Continuing with instruction-only mode")
+        else:
+            print("🔄 Running in instruction-only mode (no RAG)")
     def _load_instructions(self):
         """Load the OncoLife instructions from the text file"""
             self.instructions = ""
     def _initialize_rag(self):
+        """Initialize the RAG system with document embeddings (lightweight version)"""
         try:
+            print("🔍 Initializing lightweight RAG system...")
+            # Use a smaller embedding model
             self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
             print("✅ Loaded embedding model")
+            # Initialize ChromaDB with persistence disabled for memory efficiency
             self.chroma_client = chromadb.Client()
             self.collection = self.chroma_client.create_collection(
                 name="oncolife_documents",
             )
             print("✅ Initialized ChromaDB collection")
+            # Load and process documents (limited to essential files)
+            self._load_documents_lightweight()
         except Exception as e:
             print(f"❌ Error initializing RAG: {e}")
             self.embedding_model = None
             self.collection = None
+            raise e
+    def _load_documents_lightweight(self):
+        """Load only essential documents to save memory"""
         try:
             docs_path = Path(__file__).parent / "guideline-docs"
+            print(f"📚 Loading essential documents from: {docs_path}")
             if not docs_path.exists():
                 print("⚠️ guideline-docs directory not found")
             # Text splitter for chunking documents
             text_splitter = RecursiveCharacterTextSplitter(
+                chunk_size=500,  # Smaller chunks to save memory
+                chunk_overlap=100,
                 separators=["\n\n", "\n", ". ", " ", ""]
             )
             documents_loaded = 0
+            # Only process JSON files (lightweight)
             for json_file in docs_path.glob("*.json"):
                 try:
                     print(f"📄 Processing JSON: {json_file.name}")
                 except Exception as e:
                     print(f"❌ Error processing {json_file.name}: {e}")
+            # Process text files (lightweight)
             for txt_file in docs_path.glob("*.txt"):
                 try:
                     print(f"📄 Processing TXT: {txt_file.name}")
         except Exception as e:
             print(f"❌ Error adding chunks to database: {e}")
+    def _retrieve_relevant_documents(self, query, top_k=3):
         """Retrieve relevant document chunks for a query"""
         try:
+            if not self.collection or not self.embedding_model or not self.rag_enabled:
                 return []
             # Generate query embedding
             return []
     def _load_model(self, model_id, gpu_available):
+        """Load the BioMistral base model with memory optimization"""
         try:
             print("🔄 Loading BioMistral base model...")
                 trust_remote_code=True
             )
+            # Load the model with memory optimization
             print(f"📦 Loading model: {model_id}")
             self.model = MistralForCausalLM.from_pretrained(
                 model_id,
                 trust_remote_code=True,
                 device_map="auto",
                 torch_dtype=dtype,
+                low_cpu_mem_usage=True,
+                # Add memory optimization
+                max_memory={0: "8GB", "cpu": "16GB"} if gpu_available else {"cpu": "8GB"}
             )
             # Add pad token if not present
             self.tokenizer = None
     def generate_oncolife_response(self, user_input, conversation_history):
+        """Generate response using OncoLife instructions and optional RAG"""
         try:
             if self.model is None or self.tokenizer is None:
                 return """❌ **Model Loading Error**
             print(f"🔄 Generating OncoLife response for: {user_input}")
+            # Retrieve relevant documents using RAG (if available)
             context_text = ""
+            if self.rag_enabled:
+                try:
+                    relevant_docs = self._retrieve_relevant_documents(user_input, top_k=2)
+                    if relevant_docs:
+                        context_text = "\n\n**Relevant Reference Information:**\n"
+                        for i, doc in enumerate(relevant_docs):
+                            context_text += f"\n--- Source: {doc['source']} ---\n{doc['content'][:300]}...\n"
+                except Exception as e:
+                    print(f"⚠️ RAG retrieval failed: {e}")
             # Create prompt using the loaded instructions and retrieved context
             system_prompt = f"""You are the OncoLife Symptom & Triage Assistant. Follow these instructions exactly:
                 "assistant": assistant_msg
             })
+        # Generate response using OncoLife instructions and optional RAG
         response = self.generate_oncolife_response(message, conversation_history)
         return response

requirements.txt CHANGED Viewed

@@ -1,5 +1,3 @@
-# Medical Chatbot HF Space Requirements
 # Web framework
 gradio==4.44.0
@@ -9,18 +7,10 @@ transformers==4.36.2
 accelerate==0.25.0
 # HF Spaces GPU support
-spaces>=0.1.0
-# Basic utilities
-numpy>=1.21.0,<2.0.0
-requests>=2.28.0
-# Additional dependencies for better device handling
 safetensors==0.4.1
 tokenizers>=0.15.0
-# RAG implementation
-bitsandbytes==0.41.3
 sentence-transformers==2.2.2
 chromadb==0.4.22
 pypdf2==3.0.1

 # Web framework
 gradio==4.44.0
 accelerate==0.25.0
 # HF Spaces GPU support
 safetensors==0.4.1
 tokenizers>=0.15.0
+# RAG implementation (optional - will fallback gracefully if not available)
 sentence-transformers==2.2.2
 chromadb==0.4.22
 pypdf2==3.0.1