Spaces:

Novoo5
/

cardioqa-ai-system

Sleeping

App Files Files Community

Novoo5 commited on Oct 3

Commit

77bc432

verified ·

1 Parent(s): 0aac165

Add CardioQA system files - API, database, and medical data

Browse files

Files changed (16) hide show

.gitattributes +3 -0
chroma_db/c3ee0465-20cc-4bb1-bbae-cdf17ec4df3f/data_level0.bin +3 -0
chroma_db/c3ee0465-20cc-4bb1-bbae-cdf17ec4df3f/header.bin +3 -0
chroma_db/c3ee0465-20cc-4bb1-bbae-cdf17ec4df3f/length.bin +3 -0
chroma_db/c3ee0465-20cc-4bb1-bbae-cdf17ec4df3f/link_lists.bin +3 -0
chroma_db/chroma.sqlite3 +3 -0
data/processed/cardiac_qa.csv +0 -0
data/processed/cardioqa_system_config.json +17 -0
data/processed/rag_config.json +10 -0
data/raw/.gitkeep +0 -0
data/raw/dataset_statistics.json +18 -0
data/raw/medquad_cardiac.csv +0 -0
data/raw/medquad_full.csv +3 -0
data/raw/medquad_raw.csv +3 -0
src/api/main.py +407 -0
src/data_pipeline/collect_medquad.py +154 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
+data/raw/medquad_full.csv filter=lfs diff=lfs merge=lfs -text
+data/raw/medquad_raw.csv filter=lfs diff=lfs merge=lfs -text

chroma_db/c3ee0465-20cc-4bb1-bbae-cdf17ec4df3f/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0afd4a59a7369cd3375711f2768a3e7e41228bc63733e8658bcb04108952750c
+size 167600

chroma_db/c3ee0465-20cc-4bb1-bbae-cdf17ec4df3f/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0e81c3b22454233bc12d0762f06dcca48261a75231cf87c79b75e69a6c00150
+size 100

chroma_db/c3ee0465-20cc-4bb1-bbae-cdf17ec4df3f/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3f53d18cb2b932fc4ce3fe13761180f9983fc2c12f6da2fd8ac805cd4505ae5
+size 400

chroma_db/c3ee0465-20cc-4bb1-bbae-cdf17ec4df3f/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

chroma_db/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:941a0e853a6b74b9d429be0f4bb877be8da948cd9b9cddf93025c7508b50bdb3
+size 11190272

data/processed/cardiac_qa.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/processed/cardioqa_system_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "created_date": "2025-10-02T18:00:23.978687",
+  "total_documents": 364,
+  "embedding_model": "all-MiniLM-L6-v2",
+  "llm_model": "gemini-2.0-flash",
+  "vector_db_path": "../chroma_db",
+  "safety_features": [
+    "emergency_detection",
+    "professional_consultation",
+    "medical_disclaimers"
+  ],
+  "performance_metrics": {
+    "avg_response_time": "2-3 seconds",
+    "safety_validation": "enabled",
+    "confidence_scoring": "enabled"
+  }
+}

data/processed/rag_config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "vector_db_path": "../chroma_db",
+  "collection_name": "cardiac_knowledge",
+  "embedding_model": "all-MiniLM-L6-v2",
+  "embedding_dimension": 384,
+  "total_documents": 364,
+  "data_source": "MedQuAD",
+  "specialty": "cardiology",
+  "created_date": "2025-10-02T17:33:34.943956"
+}

data/raw/.gitkeep ADDED Viewed

File without changes

data/raw/dataset_statistics.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "total_pairs": 16407,
+  "columns": [
+    "qtype",
+    "Question",
+    "Answer"
+  ],
+  "missing_values": {
+    "qtype": 0,
+    "Question": 0,
+    "Answer": 0
+  },
+  "data_types": {
+    "qtype": "object",
+    "Question": "object",
+    "Answer": "object"
+  }
+}

data/raw/medquad_cardiac.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

data/raw/medquad_full.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fd20f0d2e946398b648b4cc56f3dc6111eb76b00a8eec6f4669ebebf1b701c1
+size 22483298

data/raw/medquad_raw.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1fd20f0d2e946398b648b4cc56f3dc6111eb76b00a8eec6f4669ebebf1b701c1
+size 22483298

src/api/main.py ADDED Viewed

	@@ -0,0 +1,407 @@

+"""
+CardioQA FastAPI Backend - PRODUCTION VERSION
+AI-powered cardiac diagnostic assistant with RAG
+Author: Novonil Basak
+"""
+import os
+import logging
+import time
+from pathlib import Path
+from typing import List, Optional
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+import chromadb
+from sentence_transformers import SentenceTransformer
+import google.generativeai as genai
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Global variables
+collection = None
+embedding_model = None
+gemini_model = None
+safety_validator = None
+# Pydantic models
+class QueryRequest(BaseModel):
+    query: str = Field(..., min_length=5, max_length=500)
+    include_metadata: bool = Field(default=True)
+class QueryResponse(BaseModel):
+    response: str
+    safety_score: int
+    confidence: str
+    knowledge_sources: int
+    top_similarity: float
+    warnings: List[str]
+    response_time: float
+class MedicalSafetyValidator:
+    """Medical safety validation system"""
+    def __init__(self):
+        self.emergency_keywords = [
+            'heart attack', 'chest pain', 'shortness of breath', 'stroke',
+            'severe pain', 'bleeding', 'unconscious', 'emergency', 'crushing pain'
+        ]
+    def validate_response(self, response_text: str, user_query: str) -> dict:
+        """Validate medical safety of AI response"""
+        safety_score = 85
+        warnings = []
+        # Check for emergency situations
+        if any(keyword in user_query.lower() for keyword in self.emergency_keywords):
+            if 'seek immediate medical attention' not in response_text.lower():
+                warnings.append("CRITICAL: Emergency situation detected")
+                safety_score -= 20
+            else:
+                safety_score += 10
+        # Check for professional consultation recommendation
+        consult_phrases = ['consult', 'doctor', 'physician', 'healthcare provider']
+        if any(phrase in response_text.lower() for phrase in consult_phrases):
+            safety_score += 10
+        else:
+            warnings.append("Added professional consultation recommendation")
+            safety_score -= 15
+        # Check response quality
+        if len(response_text) > 200:
+            safety_score += 5
+        # Check for dangerous statements
+        dangerous_phrases = ['you definitely have', 'this is certainly', 'never see a doctor']
+        if any(phrase in response_text.lower() for phrase in dangerous_phrases):
+            warnings.append("Contains potentially dangerous medical statements")
+            safety_score -= 25
+        safety_score = min(100, max(50, safety_score))
+        return {
+            'safety_score': safety_score,
+            'warnings': warnings,
+            'is_safe': safety_score >= 70
+        }
+    def add_safety_disclaimers(self, response_text: str, safety_check: dict) -> str:
+        """Add medical disclaimers"""
+        disclaimers = "\n\n⚠️ **MEDICAL DISCLAIMER**: Educational purposes only.\n👨‍⚕️ **RECOMMENDATION**: Consult healthcare professionals."
+        if safety_check['safety_score'] < 80:
+            disclaimers += "\n🚨 **IMPORTANT**: For severe symptoms, seek immediate medical attention."
+        return response_text + disclaimers
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Initialize and cleanup application resources"""
+    global collection, embedding_model, gemini_model, safety_validator
+    logger.info("🫀 Starting CardioQA API...")
+    try:
+        # FIXED: Force ChromaDB to create new compatible database
+        possible_paths = [
+            "./chroma_db",
+            "chroma_db",
+            "/opt/render/project/src/chroma_db",
+            Path.cwd() / "chroma_db",
+            Path(__file__).parent.parent.parent / "chroma_db"
+        ]
+        db_path = None
+        for path in possible_paths:
+            path_obj = Path(path)
+            logger.info(f"🔍 Checking: {path_obj.absolute()}")
+            if path_obj.exists() and path_obj.is_dir():
+                db_path = str(path_obj)
+                logger.info(f"✅ Found ChromaDB at: {db_path}")
+                break
+        if not db_path:
+            # Create new ChromaDB if not found
+            logger.info("📁 Creating new ChromaDB...")
+            db_path = "./chroma_db_render"
+            # Initialize new ChromaDB and recreate collection
+            client = chromadb.PersistentClient(path=db_path)
+            try:
+                collection = client.get_collection(name="cardiac_knowledge")
+                logger.info(f"✅ Using existing collection: {collection.count()} documents")
+            except:
+                logger.info("Creating new collection with sample data...")
+                collection = client.create_collection(name="cardiac_knowledge")
+                # Add sample cardiac Q&A data for demo
+                sample_data = [
+                    {
+                        "question": "What are the symptoms of heart attack?",
+                        "answer": "Common heart attack symptoms include chest pain or discomfort, shortness of breath, pain in arms/back/neck/jaw, cold sweat, nausea, and lightheadedness. Seek immediate medical attention if experiencing these symptoms.",
+                        "qtype": "symptoms"
+                    },
+                    {
+                        "question": "How can I prevent heart disease?",
+                        "answer": "Heart disease prevention includes regular exercise, healthy diet low in saturated fats, not smoking, limiting alcohol, managing stress, controlling blood pressure and cholesterol, and regular medical checkups.",
+                        "qtype": "prevention"
+                    },
+                    {
+                        "question": "What causes high blood pressure?",
+                        "answer": "High blood pressure can be caused by genetics, age, diet high in sodium, lack of exercise, obesity, excessive alcohol consumption, stress, and certain medical conditions. Regular monitoring is important.",
+                        "qtype": "causes"
+                    }
+                ]
+                for i, item in enumerate(sample_data):
+                    collection.add(
+                        documents=[item["answer"]],
+                        metadatas=[{
+                            "question": item["question"],
+                            "answer": item["answer"],
+                            "qtype": item["qtype"]
+                        }],
+                        ids=[f"cardiac_{i}"]
+                    )
+                logger.info(f"✅ Created collection with {len(sample_data)} sample documents")
+        else:
+            # Try to use existing database
+            try:
+                client = chromadb.PersistentClient(path=db_path)
+                collection = client.get_collection(name="cardiac_knowledge")
+                logger.info(f"✅ Loaded existing database: {collection.count()} documents")
+            except Exception as e:
+                logger.error(f"❌ ChromaDB compatibility issue: {e}")
+                # Fallback: create new database
+                logger.info("Creating fallback database...")
+                client = chromadb.PersistentClient(path="./chroma_db_fallback")
+                collection = client.create_collection(name="cardiac_knowledge")
+                # Add sample data (same as above)
+                sample_data = [
+                    {
+                        "question": "What are the symptoms of heart attack?",
+                        "answer": "Common heart attack symptoms include chest pain or discomfort, shortness of breath, pain in arms/back/neck/jaw, cold sweat, nausea, and lightheadedness. Seek immediate medical attention.",
+                        "qtype": "symptoms"
+                    }
+                ]
+                collection.add(
+                    documents=[sample_data[0]["answer"]],
+                    metadatas=[sample_data[0]],
+                    ids=["cardiac_0"]
+                )
+                logger.info("✅ Created fallback database")
+        # Load embedding model
+        embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+        logger.info("✅ Loaded embedding model")
+        # Configure Gemini API
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            raise Exception("❌ GEMINI_API_KEY environment variable not set")
+        genai.configure(api_key=api_key)
+        gemini_model = genai.GenerativeModel('gemini-2.0-flash')
+        # Test Gemini connection
+        test_response = gemini_model.generate_content("Say 'CardioQA ready!'")
+        logger.info(f"✅ Gemini test: {test_response.text}")
+        # Initialize safety validator
+        safety_validator = MedicalSafetyValidator()
+        logger.info("✅ Safety validator ready")
+        logger.info("🎉 CardioQA API fully initialized!")
+        yield
+    except Exception as e:
+        logger.error(f"❌ Startup failed: {str(e)}")
+        raise
+    # Cleanup
+    logger.info("🔄 Shutting down CardioQA API...")
+# Initialize FastAPI with lifespan
+app = FastAPI(
+    title="CardioQA API",
+    description="AI-powered cardiac diagnostic assistant with RAG",
+    version="1.0.0",
+    lifespan=lifespan
+)
+# Add CORS middleware
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["GET", "POST"],
+    allow_headers=["*"],
+)
+@app.get("/")
+async def root():
+    """API root endpoint"""
+    return {
+        "message": "CardioQA API - AI-Powered Cardiac Diagnostic Assistant",
+        "version": "1.0.0",
+        "status": "running",
+        "endpoints": {
+            "health": "/health",
+            "query": "/query",
+            "docs": "/docs",
+            "stats": "/stats"
+        }
+    }
+@app.get("/health")
+async def health_check():
+    """Health check endpoint"""
+    try:
+        db_count = collection.count() if collection else 0
+        model_status = "ready" if gemini_model else "not loaded"
+        return {
+            "status": "healthy",
+            "database_count": db_count,
+            "model_status": model_status,
+            "api_version": "1.0.0",
+            "deployment": "render-production"
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+@app.post("/query", response_model=QueryResponse)
+async def query_cardioqa(request: QueryRequest):
+    """Main CardioQA query endpoint"""
+    start_time = time.time()
+    try:
+        if not collection or not gemini_model or not safety_validator:
+            raise HTTPException(status_code=503, detail="System not fully initialized")
+        logger.info(f"Processing query: {request.query[:100]}...")
+        # Search knowledge base
+        results = collection.query(
+            query_texts=[request.query],
+            n_results=3
+        )
+        if not results['documents'][0]:
+            raise HTTPException(status_code=404, detail="No relevant cardiac information found")
+        # Format knowledge context
+        knowledge_context = []
+        for doc, metadata, distance in zip(
+            results['documents'][0],
+            results['metadatas'][0],
+            results['distances'][0]
+        ):
+            knowledge_context.append({
+                'question': metadata['question'],
+                'answer': metadata['answer'],
+                'similarity': 1 - distance
+            })
+        # Create medical prompt
+        context_text = f"Medical Evidence:\nQ: {knowledge_context[0]['question']}\nA: {knowledge_context[0]['answer']}"
+        prompt = f"""You are CardioQA, a specialized cardiac health assistant.
+MEDICAL RESPONSE RULES:
+- Never provide definitive diagnoses
+- Always recommend consulting healthcare professionals
+- Use **bold** for important medical points
+- Be educational and evidence-based
+- Include appropriate medical caution
+USER QUESTION: {request.query}
+{context_text}
+Provide a helpful, evidence-based response with proper **bold** formatting:"""
+        # Generate AI response
+        response = gemini_model.generate_content(
+            prompt,
+            generation_config={
+                'temperature': 0.1,
+                'max_output_tokens': 800,
+            }
+        )
+        ai_response = response.text
+        # Apply safety validation
+        safety_check = safety_validator.validate_response(ai_response, request.query)
+        safe_response = safety_validator.add_safety_disclaimers(ai_response, safety_check)
+        # Calculate confidence level
+        similarity = knowledge_context[0]['similarity']
+        if similarity > 0.6:
+            confidence = 'High'
+        elif similarity > 0.4:
+            confidence = 'Medium'
+        elif similarity > 0.2:
+            confidence = 'Low'
+        else:
+            confidence = 'Very Low'
+        response_time = time.time() - start_time
+        return QueryResponse(
+            response=safe_response,
+            safety_score=safety_check['safety_score'],
+            confidence=confidence,
+            knowledge_sources=len(knowledge_context),
+            top_similarity=knowledge_context[0]['similarity'],
+            warnings=safety_check['warnings'],
+            response_time=round(response_time, 2)
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Query processing error: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Processing error: {str(e)}")
+@app.get("/stats")
+async def get_system_stats():
+    """System statistics endpoint"""
+    try:
+        return {
+            "total_documents": collection.count() if collection else 0,
+            "embedding_model": "all-MiniLM-L6-v2",
+            "llm_model": "gemini-2.0-flash",
+            "specialty": "cardiology",
+            "safety_features": [
+                "emergency_detection",
+                "professional_consultation",
+                "medical_disclaimers",
+                "confidence_scoring"
+            ],
+            "deployment": "render-production",
+            "chromadb_version": "compatible"
+        }
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+# FIXED: Proper port binding for Render deployment
+if __name__ == "__main__":
+    import uvicorn
+    # Railway uses PORT environment variable
+    port = int(os.environ.get("PORT", 8000))
+    logger.info(f"🚀 Starting CardioQA on port {port}")
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=port,
+        log_level="info"
+    )

src/data_pipeline/collect_medquad.py ADDED Viewed

	@@ -0,0 +1,154 @@

+"""
+CardioQA Data Collection Module
+Collects and processes medical Q&A data from MedQuAD dataset
+Author: Novonil Basak
+Date: October 2, 2025
+"""
+import os
+import pandas as pd
+import requests
+from datasets import load_dataset
+from pathlib import Path
+import json
+from tqdm import tqdm
+import logging
+# Setup logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class MedicalDataCollector:
+    """Collect and process medical datasets for CardioQA RAG system"""
+    def __init__(self, data_dir="data/raw"):
+        self.data_dir = Path(data_dir)
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+    def collect_medquad_dataset(self):
+        """Collect MedQuAD dataset from HuggingFace"""
+        logger.info("Starting MedQuAD dataset collection...")
+        try:
+            # Load MedQuAD dataset from HuggingFace
+            logger.info("Loading MedQuAD from HuggingFace...")
+            dataset = load_dataset("keivalya/MedQuad-MedicalQnADataset")
+            # Convert to pandas DataFrame
+            df = pd.DataFrame(dataset['train'])
+            logger.info(f"Loaded {len(df)} medical Q&A pairs")
+            # Basic data inspection
+            logger.info("Dataset columns: " + str(df.columns.tolist()))
+            logger.info("Dataset shape: " + str(df.shape))
+            # Save raw dataset
+            raw_file_path = self.data_dir / "medquad_raw.csv"
+            df.to_csv(raw_file_path, index=False)
+            logger.info(f"Saved raw MedQuAD to {raw_file_path}")
+            return df
+        except Exception as e:
+            logger.error(f"Error collecting MedQuAD dataset: {str(e)}")
+            return None
+    def filter_cardiac_data(self, df):
+        """Filter dataset for cardiology-related content"""
+        logger.info("Filtering for cardiology-related content...")
+        # Cardiac-related keywords
+        cardiac_keywords = [
+            'heart', 'cardiac', 'cardiology', 'cardiovascular', 'coronary',
+            'arrhythmia', 'hypertension', 'blood pressure', 'chest pain',
+            'heart attack', 'myocardial', 'atrial', 'ventricular', 'valve',
+            'pacemaker', 'ECG', 'EKG', 'angina', 'stroke', 'circulation'
+        ]
+        # Create cardiac filter mask
+        cardiac_mask = df.apply(
+            lambda row: any(
+                keyword.lower() in str(row).lower()
+                for keyword in cardiac_keywords
+            ), axis=1
+        )
+        cardiac_df = df[cardiac_mask].copy()
+        logger.info(f"Found {len(cardiac_df)} cardiac-related Q&A pairs")
+        # Save filtered cardiac data
+        cardiac_file_path = self.data_dir / "medquad_cardiac.csv"
+        cardiac_df.to_csv(cardiac_file_path, index=False)
+        logger.info(f"Saved cardiac data to {cardiac_file_path}")
+        return cardiac_df
+    def display_sample_data(self, df, n_samples=3):
+        """Display sample Q&A pairs"""
+        logger.info(f"Sample {n_samples} Q&A pairs:")
+        print("\n" + "="*80)
+        for i, row in df.head(n_samples).iterrows():
+            print(f"Q{i+1}: {row.iloc[0] if len(row) > 0 else 'No question'}")
+            print(f"A{i+1}: {row.iloc[1] if len(row) > 1 else 'No answer'}")
+            print("-" * 60)
+    def get_dataset_statistics(self, df):
+        """Generate basic statistics about the dataset"""
+        stats = {
+            'total_pairs': len(df),
+            'columns': df.columns.tolist(),
+            'missing_values': df.isnull().sum().to_dict(),
+            'data_types': df.dtypes.to_dict()
+        }
+        # Save statistics
+        stats_file = self.data_dir / "dataset_statistics.json"
+        with open(stats_file, 'w') as f:
+            json.dump(stats, f, indent=2, default=str)
+        logger.info("Dataset Statistics:")
+        logger.info(f"- Total Q&A pairs: {stats['total_pairs']}")
+        logger.info(f"- Columns: {stats['columns']}")
+        logger.info(f"- Statistics saved to {stats_file}")
+        return stats
+def main():
+    """Main execution function"""
+    print("🫀 CardioQA Data Collection Pipeline")
+    print("=" * 50)
+    # Initialize collector
+    collector = MedicalDataCollector()
+    # Step 1: Collect MedQuAD dataset
+    print("\n📊 Step 1: Collecting MedQuAD Dataset...")
+    medquad_df = collector.collect_medquad_dataset()
+    if medquad_df is not None:
+        # Step 2: Generate statistics
+        print("\n📈 Step 2: Analyzing Dataset...")
+        stats = collector.get_dataset_statistics(medquad_df)
+        # Step 3: Display samples
+        print("\n👀 Step 3: Sample Data Preview...")
+        collector.display_sample_data(medquad_df, n_samples=3)
+        # Step 4: Filter cardiac data
+        print("\n🫀 Step 4: Filtering Cardiac Data...")
+        cardiac_df = collector.filter_cardiac_data(medquad_df)
+        # Step 5: Display cardiac samples
+        if len(cardiac_df) > 0:
+            print("\n💓 Step 5: Cardiac Data Preview...")
+            collector.display_sample_data(cardiac_df, n_samples=2)
+        print("\n✅ Data collection completed successfully!")
+        print(f"📁 Files saved in: {collector.data_dir}")
+    else:
+        print("❌ Data collection failed!")
+if __name__ == "__main__":
+    main()