File size: 1,181 Bytes
0a5c991
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""
Script to set up the Pinecone database with medical data
"""
from enhanced_data_loader import load_comprehensive_medical_datasets, chunk_text
from embedding_service import EmbeddingService
import time

def setup_database():
    """Set up Pinecone database with medical documents"""
    print("="*50)
    print("Setting up Medical Chatbot Database")
    print("="*50)
    
    # Load comprehensive medical data from multiple sources
    documents = load_comprehensive_medical_datasets()
    
    # Chunk large documents
    chunked_documents = []
    for doc in documents:
        chunks = chunk_text(doc['text'])
        for chunk in chunks:
            chunked_documents.append({
                'text': chunk,
                'source': doc['source'],
                'metadata': doc['metadata']
            })
    
    print(f"Total chunks: {len(chunked_documents)}")
    
    # Initialize embedding service
    embedding_service = EmbeddingService()
    
    # Upload to Pinecone
    embedding_service.upsert_documents(chunked_documents)
    
    print("\n" + "="*50)
    print("Database setup complete!")
    print("="*50)

if __name__ == "__main__":
    setup_database()