Upload 29 files
Browse files- __pycache__/Chat_sentiment_analysis.cpython-311.pyc +0 -0
 - __pycache__/app.cpython-312.pyc +0 -0
 - __pycache__/config_manager.cpython-311.pyc +0 -0
 - __pycache__/dia.cpython-311.pyc +0 -0
 - __pycache__/fastapi_app.cpython-311.pyc +0 -0
 - __pycache__/fastapi_app.cpython-312.pyc +0 -0
 - __pycache__/fastapi_test.cpython-311.pyc +0 -0
 - __pycache__/main.cpython-311.pyc +0 -0
 - __pycache__/test_dia.cpython-311.pyc +0 -0
 - models/__init__.py +0 -0
 - models/__pycache__/__init__.cpython-311.pyc +0 -0
 - models/__pycache__/chat_session.cpython-311.pyc +0 -0
 - models/__pycache__/user.cpython-311.pyc +0 -0
 - models/chat_session.py +84 -0
 - models/user.py +43 -0
 - scripts/__init__.py +0 -0
 - scripts/__pycache__/__init__.cpython-310.pyc +0 -0
 - scripts/db/__pycache__/session.cpython-310.pyc +0 -0
 - scripts/db/create_article_tables.py +20 -0
 - scripts/db/create_extensions.py +10 -0
 - scripts/db/create_user_tables.py +21 -0
 - scripts/db/encrypt_passwords.py +58 -0
 - scripts/db/seed_data_helplines_organizations.py +123 -0
 - scripts/db/session.py +16 -0
 - scripts/ingest/upload_articles_from_bucket.py +118 -0
 - scripts/ingest/upload_files_to_bucket.py +73 -0
 - scripts/migration_schemas/__pycache__/resources_models.cpython-310.pyc +0 -0
 - scripts/migration_schemas/resources_models.py +62 -0
 - scripts/migration_schemas/user_models.py +54 -0
 
    	
        __pycache__/Chat_sentiment_analysis.cpython-311.pyc
    ADDED
    
    | 
         Binary file (19.7 kB). View file 
     | 
| 
         | 
    	
        __pycache__/app.cpython-312.pyc
    ADDED
    
    | 
         Binary file (15.2 kB). View file 
     | 
| 
         | 
    	
        __pycache__/config_manager.cpython-311.pyc
    ADDED
    
    | 
         Binary file (12.5 kB). View file 
     | 
| 
         | 
    	
        __pycache__/dia.cpython-311.pyc
    ADDED
    
    | 
         Binary file (704 Bytes). View file 
     | 
| 
         | 
    	
        __pycache__/fastapi_app.cpython-311.pyc
    ADDED
    
    | 
         Binary file (68.6 kB). View file 
     | 
| 
         | 
    	
        __pycache__/fastapi_app.cpython-312.pyc
    ADDED
    
    | 
         Binary file (29.5 kB). View file 
     | 
| 
         | 
    	
        __pycache__/fastapi_test.cpython-311.pyc
    ADDED
    
    | 
         Binary file (14 kB). View file 
     | 
| 
         | 
    	
        __pycache__/main.cpython-311.pyc
    ADDED
    
    | 
         Binary file (76.7 kB). View file 
     | 
| 
         | 
    	
        __pycache__/test_dia.cpython-311.pyc
    ADDED
    
    | 
         Binary file (714 Bytes). View file 
     | 
| 
         | 
    	
        models/__init__.py
    ADDED
    
    | 
         
            File without changes
         
     | 
    	
        models/__pycache__/__init__.cpython-311.pyc
    ADDED
    
    | 
         Binary file (186 Bytes). View file 
     | 
| 
         | 
    	
        models/__pycache__/chat_session.cpython-311.pyc
    ADDED
    
    | 
         Binary file (5.66 kB). View file 
     | 
| 
         | 
    	
        models/__pycache__/user.cpython-311.pyc
    ADDED
    
    | 
         Binary file (2.91 kB). View file 
     | 
| 
         | 
    	
        models/chat_session.py
    ADDED
    
    | 
         @@ -0,0 +1,84 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from datetime import datetime
         
     | 
| 2 | 
         
            +
            from pathlib import Path
         
     | 
| 3 | 
         
            +
            import json
         
     | 
| 4 | 
         
            +
            import uuid
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            class ChatSession:
         
     | 
| 7 | 
         
            +
                def __init__(self, user_name, session_id=None):
         
     | 
| 8 | 
         
            +
                    self.session_id = session_id or str(uuid.uuid4())
         
     | 
| 9 | 
         
            +
                    self.user_name = user_name
         
     | 
| 10 | 
         
            +
                    self.start_time = datetime.now()
         
     | 
| 11 | 
         
            +
                    self.messages = []
         
     | 
| 12 | 
         
            +
                    self.session_data = {
         
     | 
| 13 | 
         
            +
                        "emotion": None,
         
     | 
| 14 | 
         
            +
                        "mental_health_status": None,
         
     | 
| 15 | 
         
            +
                        "topics_discussed": set(),
         
     | 
| 16 | 
         
            +
                        "agents_used": set(),
         
     | 
| 17 | 
         
            +
                        "sentiment_scores": []
         
     | 
| 18 | 
         
            +
                    }
         
     | 
| 19 | 
         
            +
                
         
     | 
| 20 | 
         
            +
                def add_message(self, role, content, agent=None, metadata=None):
         
     | 
| 21 | 
         
            +
                    message = {
         
     | 
| 22 | 
         
            +
                        "timestamp": datetime.now().isoformat(),
         
     | 
| 23 | 
         
            +
                        "role": role,
         
     | 
| 24 | 
         
            +
                        "content": content,
         
     | 
| 25 | 
         
            +
                        "agent": agent,
         
     | 
| 26 | 
         
            +
                        "metadata": metadata or {}
         
     | 
| 27 | 
         
            +
                    }
         
     | 
| 28 | 
         
            +
                    self.messages.append(message)
         
     | 
| 29 | 
         
            +
                    
         
     | 
| 30 | 
         
            +
                    if agent and hasattr(self.session_data["agents_used"], 'add'):
         
     | 
| 31 | 
         
            +
                        self.session_data["agents_used"].add(agent)
         
     | 
| 32 | 
         
            +
                
         
     | 
| 33 | 
         
            +
                def add_topic(self, topic):
         
     | 
| 34 | 
         
            +
                    """Safely add a topic"""
         
     | 
| 35 | 
         
            +
                    if hasattr(self.session_data["topics_discussed"], 'add'):
         
     | 
| 36 | 
         
            +
                        self.session_data["topics_discussed"].add(topic)
         
     | 
| 37 | 
         
            +
                    else:
         
     | 
| 38 | 
         
            +
                        # If it's a list, convert to set first
         
     | 
| 39 | 
         
            +
                        topics = set(self.session_data.get("topics_discussed", []))
         
     | 
| 40 | 
         
            +
                        topics.add(topic)
         
     | 
| 41 | 
         
            +
                        self.session_data["topics_discussed"] = topics
         
     | 
| 42 | 
         
            +
                
         
     | 
| 43 | 
         
            +
                def to_dict(self):
         
     | 
| 44 | 
         
            +
                    return {
         
     | 
| 45 | 
         
            +
                        "session_id": self.session_id,
         
     | 
| 46 | 
         
            +
                        "user_name": self.user_name,
         
     | 
| 47 | 
         
            +
                        "start_time": self.start_time.isoformat(),
         
     | 
| 48 | 
         
            +
                        "end_time": datetime.now().isoformat(),
         
     | 
| 49 | 
         
            +
                        "duration_minutes": (datetime.now() - self.start_time).seconds // 60,
         
     | 
| 50 | 
         
            +
                        "messages": self.messages,
         
     | 
| 51 | 
         
            +
                        "total_messages": len(self.messages),
         
     | 
| 52 | 
         
            +
                        "session_data": {
         
     | 
| 53 | 
         
            +
                            **self.session_data,
         
     | 
| 54 | 
         
            +
                            "topics_discussed": list(self.session_data["topics_discussed"]) if isinstance(self.session_data["topics_discussed"], set) else self.session_data["topics_discussed"],
         
     | 
| 55 | 
         
            +
                            "agents_used": list(self.session_data["agents_used"]) if isinstance(self.session_data["agents_used"], set) else self.session_data["agents_used"]
         
     | 
| 56 | 
         
            +
                        }
         
     | 
| 57 | 
         
            +
                    }
         
     | 
| 58 | 
         
            +
                
         
     | 
| 59 | 
         
            +
                @classmethod
         
     | 
| 60 | 
         
            +
                def from_dict(cls, data, user_name=None):
         
     | 
| 61 | 
         
            +
                    """Create a ChatSession from dictionary data"""
         
     | 
| 62 | 
         
            +
                    session = cls(user_name or data.get('user_name', 'Guest'), data.get('session_id'))
         
     | 
| 63 | 
         
            +
                    session.messages = data.get('messages', [])
         
     | 
| 64 | 
         
            +
                    
         
     | 
| 65 | 
         
            +
                    # Reconstruct session_data with proper types
         
     | 
| 66 | 
         
            +
                    stored_data = data.get('session_data', {})
         
     | 
| 67 | 
         
            +
                    session.session_data = {
         
     | 
| 68 | 
         
            +
                        "emotion": stored_data.get('emotion'),
         
     | 
| 69 | 
         
            +
                        "mental_health_status": stored_data.get('mental_health_status'),
         
     | 
| 70 | 
         
            +
                        "topics_discussed": set(stored_data.get('topics_discussed', [])),
         
     | 
| 71 | 
         
            +
                        "agents_used": set(stored_data.get('agents_used', [])),
         
     | 
| 72 | 
         
            +
                        "sentiment_scores": stored_data.get('sentiment_scores', [])
         
     | 
| 73 | 
         
            +
                    }
         
     | 
| 74 | 
         
            +
                    
         
     | 
| 75 | 
         
            +
                    return session
         
     | 
| 76 | 
         
            +
                
         
     | 
| 77 | 
         
            +
                def save(self, directory="chat_sessions"):
         
     | 
| 78 | 
         
            +
                    Path(directory).mkdir(exist_ok=True)
         
     | 
| 79 | 
         
            +
                    filename = f"{directory}/chat_{self.user_name}_{self.start_time.strftime('%Y%m%d_%H%M%S')}_{self.session_id[:8]}.json"
         
     | 
| 80 | 
         
            +
                    
         
     | 
| 81 | 
         
            +
                    with open(filename, 'w') as f:
         
     | 
| 82 | 
         
            +
                        json.dump(self.to_dict(), f, indent=2)
         
     | 
| 83 | 
         
            +
                    
         
     | 
| 84 | 
         
            +
                    return filename
         
     | 
    	
        models/user.py
    ADDED
    
    | 
         @@ -0,0 +1,43 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text
         
     | 
| 2 | 
         
            +
            from sqlalchemy.ext.declarative import declarative_base
         
     | 
| 3 | 
         
            +
            from sqlalchemy.orm import sessionmaker
         
     | 
| 4 | 
         
            +
            from werkzeug.security import generate_password_hash, check_password_hash
         
     | 
| 5 | 
         
            +
            from datetime import datetime
         
     | 
| 6 | 
         
            +
            import secrets
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            Base = declarative_base()
         
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
            class User(Base):
         
     | 
| 11 | 
         
            +
                __tablename__ = 'users'
         
     | 
| 12 | 
         
            +
                
         
     | 
| 13 | 
         
            +
                id = Column(Integer, primary_key=True)
         
     | 
| 14 | 
         
            +
                username = Column(String(80), unique=True, nullable=False)
         
     | 
| 15 | 
         
            +
                email = Column(String(120), unique=True, nullable=False)
         
     | 
| 16 | 
         
            +
                password_hash = Column(String(255), nullable=False)
         
     | 
| 17 | 
         
            +
                full_name = Column(String(100))
         
     | 
| 18 | 
         
            +
                created_at = Column(DateTime, default=datetime.utcnow)
         
     | 
| 19 | 
         
            +
                last_login = Column(DateTime)
         
     | 
| 20 | 
         
            +
                is_active = Column(Boolean, default=True)
         
     | 
| 21 | 
         
            +
                is_verified = Column(Boolean, default=False)
         
     | 
| 22 | 
         
            +
                
         
     | 
| 23 | 
         
            +
                # Profile completion status
         
     | 
| 24 | 
         
            +
                has_completed_initial_survey = Column(Boolean, default=False)
         
     | 
| 25 | 
         
            +
                initial_survey_date = Column(DateTime)
         
     | 
| 26 | 
         
            +
                
         
     | 
| 27 | 
         
            +
                # Session management
         
     | 
| 28 | 
         
            +
                session_token = Column(String(255))
         
     | 
| 29 | 
         
            +
                
         
     | 
| 30 | 
         
            +
                def set_password(self, password):
         
     | 
| 31 | 
         
            +
                    self.password_hash = generate_password_hash(password)
         
     | 
| 32 | 
         
            +
                
         
     | 
| 33 | 
         
            +
                def check_password(self, password):
         
     | 
| 34 | 
         
            +
                    return check_password_hash(self.password_hash, password)
         
     | 
| 35 | 
         
            +
                
         
     | 
| 36 | 
         
            +
                def generate_session_token(self):
         
     | 
| 37 | 
         
            +
                    self.session_token = secrets.token_urlsafe(32)
         
     | 
| 38 | 
         
            +
                    return self.session_token
         
     | 
| 39 | 
         
            +
             
     | 
| 40 | 
         
            +
            # Create database
         
     | 
| 41 | 
         
            +
            engine = create_engine('sqlite:///mental_health_app.db')
         
     | 
| 42 | 
         
            +
            Base.metadata.create_all(engine)
         
     | 
| 43 | 
         
            +
            Session = sessionmaker(bind=engine)
         
     | 
    	
        scripts/__init__.py
    ADDED
    
    | 
         
            File without changes
         
     | 
    	
        scripts/__pycache__/__init__.cpython-310.pyc
    ADDED
    
    | 
         Binary file (182 Bytes). View file 
     | 
| 
         | 
    	
        scripts/db/__pycache__/session.cpython-310.pyc
    ADDED
    
    | 
         Binary file (709 Bytes). View file 
     | 
| 
         | 
    	
        scripts/db/create_article_tables.py
    ADDED
    
    | 
         @@ -0,0 +1,20 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from backend.scripts.migration_schemas.resources_models import Base
         
     | 
| 2 | 
         
            +
            from backend.scripts.db.session import engine
         
     | 
| 3 | 
         
            +
            from sqlalchemy import inspect
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def create_tables():
         
     | 
| 6 | 
         
            +
                inspector = inspect(engine)
         
     | 
| 7 | 
         
            +
                existing_tables = inspector.get_table_names()
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
                Base.metadata.create_all(bind=engine)
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
                updated_tables = inspector.get_table_names()
         
     | 
| 12 | 
         
            +
                new_tables = set(updated_tables) - set(existing_tables)
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
                if new_tables:
         
     | 
| 15 | 
         
            +
                    print(f"[INFO] Created new article tables: {', '.join(new_tables)}")
         
     | 
| 16 | 
         
            +
                else:
         
     | 
| 17 | 
         
            +
                    print("[INFO] No new article tables created. All tables already exist.")
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 20 | 
         
            +
                create_tables()
         
     | 
    	
        scripts/db/create_extensions.py
    ADDED
    
    | 
         @@ -0,0 +1,10 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from sqlalchemy import text
         
     | 
| 2 | 
         
            +
            from backend.scripts.db.session import engine
         
     | 
| 3 | 
         
            +
             
     | 
| 4 | 
         
            +
            def create_vector_extension():
         
     | 
| 5 | 
         
            +
                with engine.connect() as conn:
         
     | 
| 6 | 
         
            +
                    conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA extensions;"))
         
     | 
| 7 | 
         
            +
                    print("[INFO] 'vector' extension created if not present.")
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 10 | 
         
            +
                create_vector_extension()
         
     | 
    	
        scripts/db/create_user_tables.py
    ADDED
    
    | 
         @@ -0,0 +1,21 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from backend.scripts.migration_schemas.user_models import Base
         
     | 
| 2 | 
         
            +
            from backend.scripts.db.session import engine
         
     | 
| 3 | 
         
            +
            from sqlalchemy import inspect
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            def create_tables():
         
     | 
| 6 | 
         
            +
                inspector = inspect(engine)
         
     | 
| 7 | 
         
            +
                existing_tables = inspector.get_table_names()
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
                # This will create all tables defined on Base.metadata that don't yet exist
         
     | 
| 10 | 
         
            +
                Base.metadata.create_all(bind=engine)
         
     | 
| 11 | 
         
            +
             
     | 
| 12 | 
         
            +
                updated_tables = inspector.get_table_names()
         
     | 
| 13 | 
         
            +
                new_tables = set(updated_tables) - set(existing_tables)
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
                if new_tables:
         
     | 
| 16 | 
         
            +
                    print(f"[INFO] Created new tables: {', '.join(new_tables)}")
         
     | 
| 17 | 
         
            +
                else:
         
     | 
| 18 | 
         
            +
                    print("[INFO] No new tables created. All tables already exist.")
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 21 | 
         
            +
                create_tables()
         
     | 
    	
        scripts/db/encrypt_passwords.py
    ADDED
    
    | 
         @@ -0,0 +1,58 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import os
         
     | 
| 2 | 
         
            +
            import psycopg2
         
     | 
| 3 | 
         
            +
            import bcrypt
         
     | 
| 4 | 
         
            +
            from psycopg2.extras import RealDictCursor
         
     | 
| 5 | 
         
            +
            from dotenv import load_dotenv
         
     | 
| 6 | 
         
            +
            from backend.scripts.db.session import DATABASE_URL
         
     | 
| 7 | 
         
            +
            import traceback
         
     | 
| 8 | 
         
            +
             
     | 
| 9 | 
         
            +
             
     | 
| 10 | 
         
            +
            def get_db_connection():
         
     | 
| 11 | 
         
            +
                return psycopg2.connect(DATABASE_URL, cursor_factory=RealDictCursor)
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            def hash_unencrypted_passwords():
         
     | 
| 14 | 
         
            +
                conn = cursor = None
         
     | 
| 15 | 
         
            +
                try:
         
     | 
| 16 | 
         
            +
                    conn = get_db_connection()
         
     | 
| 17 | 
         
            +
                    cursor = conn.cursor()
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
                    # Find rows with non-bcrypt passwords (e.g. not starting with $2)
         
     | 
| 20 | 
         
            +
                    cursor.execute("""
         
     | 
| 21 | 
         
            +
                        SELECT id, user_password 
         
     | 
| 22 | 
         
            +
                        FROM user_profiles 
         
     | 
| 23 | 
         
            +
                        WHERE user_password NOT LIKE '$2%';
         
     | 
| 24 | 
         
            +
                    """)
         
     | 
| 25 | 
         
            +
                    users = cursor.fetchall()
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
                    print(f"Found {len(users)} users with unencrypted passwords.")
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                    for user in users:
         
     | 
| 30 | 
         
            +
                        user_id = user['id']
         
     | 
| 31 | 
         
            +
                        raw_password = user['user_password']
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
                        # Hash the plaintext password
         
     | 
| 34 | 
         
            +
                        hashed_pw = bcrypt.hashpw(raw_password.encode(), bcrypt.gensalt()).decode()
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                        # Update the row with the hashed password
         
     | 
| 37 | 
         
            +
                        cursor.execute("""
         
     | 
| 38 | 
         
            +
                            UPDATE user_profiles 
         
     | 
| 39 | 
         
            +
                            SET user_password = %s 
         
     | 
| 40 | 
         
            +
                            WHERE id = %s
         
     | 
| 41 | 
         
            +
                        """, (hashed_pw, user_id))
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
                    conn.commit()
         
     | 
| 44 | 
         
            +
                    print("Password hashing complete.")
         
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
                except Exception as e:
         
     | 
| 47 | 
         
            +
                    print("[ERROR]", e)
         
     | 
| 48 | 
         
            +
                    traceback.print_exc()
         
     | 
| 49 | 
         
            +
                    if conn:
         
     | 
| 50 | 
         
            +
                        conn.rollback()
         
     | 
| 51 | 
         
            +
                finally:
         
     | 
| 52 | 
         
            +
                    if cursor:
         
     | 
| 53 | 
         
            +
                        cursor.close()
         
     | 
| 54 | 
         
            +
                    if conn:
         
     | 
| 55 | 
         
            +
                        conn.close()
         
     | 
| 56 | 
         
            +
             
     | 
| 57 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 58 | 
         
            +
                hash_unencrypted_passwords()
         
     | 
    	
        scripts/db/seed_data_helplines_organizations.py
    ADDED
    
    | 
         @@ -0,0 +1,123 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import uuid
         
     | 
| 2 | 
         
            +
            import numpy as np
         
     | 
| 3 | 
         
            +
            from sqlalchemy.exc import IntegrityError, SQLAlchemyError
         
     | 
| 4 | 
         
            +
             
     | 
| 5 | 
         
            +
            from backend.scripts.db.session import SessionLocal
         
     | 
| 6 | 
         
            +
            from backend.scripts.migration_schemas.resources_models import Resource
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            def seed_resources():
         
     | 
| 9 | 
         
            +
                session = SessionLocal()
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
                resources = [
         
     | 
| 12 | 
         
            +
                    Resource(
         
     | 
| 13 | 
         
            +
                        id=str(uuid.uuid4()),
         
     | 
| 14 | 
         
            +
                        name="Primary Crisis Hotline",
         
     | 
| 15 | 
         
            +
                        phone="1010",
         
     | 
| 16 | 
         
            +
                        operation_hours="24/7",
         
     | 
| 17 | 
         
            +
                        description="Operated by the Bhutan Youth Development Fund (YDF) and the Ministry of Health, this helpline offers support for mental health issues and suicide prevention.",
         
     | 
| 18 | 
         
            +
                        category="mental health",
         
     | 
| 19 | 
         
            +
                        type="helpline",
         
     | 
| 20 | 
         
            +
                        source="bhutanyouth.org",
         
     | 
| 21 | 
         
            +
                        website="https://bhutanyouth.org"
         
     | 
| 22 | 
         
            +
                    ),
         
     | 
| 23 | 
         
            +
                    Resource(
         
     | 
| 24 | 
         
            +
                        id=str(uuid.uuid4()),
         
     | 
| 25 | 
         
            +
                        name="Emergency Line",
         
     | 
| 26 | 
         
            +
                        phone="112",
         
     | 
| 27 | 
         
            +
                        operation_hours="24/7",
         
     | 
| 28 | 
         
            +
                        description="The national emergency number is for immediate assistance and is accessible via mobile and landline.",
         
     | 
| 29 | 
         
            +
                        category="emergency",
         
     | 
| 30 | 
         
            +
                        type="helpline",
         
     | 
| 31 | 
         
            +
                        source="National Emergency Services", 
         
     | 
| 32 | 
         
            +
                        website="moh.gov.bt"
         
     | 
| 33 | 
         
            +
                    ),
         
     | 
| 34 | 
         
            +
                    Resource(
         
     | 
| 35 | 
         
            +
                        id=str(uuid.uuid4()),
         
     | 
| 36 | 
         
            +
                        name="Mental Health Support Line",
         
     | 
| 37 | 
         
            +
                        phone="1098",
         
     | 
| 38 | 
         
            +
                        operation_hours="24/7",
         
     | 
| 39 | 
         
            +
                        description="National helpline for children and vulnerable groups. Provides mental health support.",
         
     | 
| 40 | 
         
            +
                        category="mental health, children",
         
     | 
| 41 | 
         
            +
                        type="helpline",
         
     | 
| 42 | 
         
            +
                        source="National Helpline Directory", 
         
     | 
| 43 | 
         
            +
                    ),
         
     | 
| 44 | 
         
            +
             
     | 
| 45 | 
         
            +
                    Resource(
         
     | 
| 46 | 
         
            +
                        id=str(uuid.uuid4()),
         
     | 
| 47 | 
         
            +
                        name="Sherig Counselling Services (MoE)",
         
     | 
| 48 | 
         
            +
                        phone="17861294",
         
     | 
| 49 | 
         
            +
                        operation_hours="Weekdays 9am–5pm",
         
     | 
| 50 | 
         
            +
                        description="Counselling helpline for students and youth, staffed by trained school counselors.",
         
     | 
| 51 | 
         
            +
                        category="youth, education, counseling",
         
     | 
| 52 | 
         
            +
                        source="moe.gov.bt",
         
     | 
| 53 | 
         
            +
                        website="https://sites.google.com/moe.gov.bt/sherigcounsellingservices", 
         
     | 
| 54 | 
         
            +
                        type = "helpline"
         
     | 
| 55 | 
         
            +
                    ),
         
     | 
| 56 | 
         
            +
             
     | 
| 57 | 
         
            +
                    Resource(
         
     | 
| 58 | 
         
            +
                        id=str(uuid.uuid4()),
         
     | 
| 59 | 
         
            +
                        name="PEMA (Psychosocial Education and Mental Health Awareness)",
         
     | 
| 60 | 
         
            +
                        phone="1010",
         
     | 
| 61 | 
         
            +
                        website="https://thepema.gov.bt/",
         
     | 
| 62 | 
         
            +
                        description="PEMA is the national nodal agency for mental health promotion and services, offering counselling, crisis intervention, and rehabilitation. They also have a helpline and offer walk-in services.",
         
     | 
| 63 | 
         
            +
                        type="organization"
         
     | 
| 64 | 
         
            +
                    ),
         
     | 
| 65 | 
         
            +
                    Resource(
         
     | 
| 66 | 
         
            +
                        id=str(uuid.uuid4()),
         
     | 
| 67 | 
         
            +
                        name="RENEW (Respect, Educate, Nurture, and Empower Women)",
         
     | 
| 68 | 
         
            +
                        phone="+975 2 332 159",
         
     | 
| 69 | 
         
            +
                        website="https://renew.org.bt/",
         
     | 
| 70 | 
         
            +
                        description="Founded by Her Majesty Gyalyum Sangay Choden Wangchuck in 2004, RENEW is a non-profit organization supporting women.",
         
     | 
| 71 | 
         
            +
                        type="organization"
         
     | 
| 72 | 
         
            +
                    ),
         
     | 
| 73 | 
         
            +
                    Resource(
         
     | 
| 74 | 
         
            +
                        id=str(uuid.uuid4()),
         
     | 
| 75 | 
         
            +
                        name="Jigme Dorji Wangchuck National Referral Hospital",
         
     | 
| 76 | 
         
            +
                        phone="+975 17 32 24 96",
         
     | 
| 77 | 
         
            +
                        website="https://jdwnrh.gov.bt/",
         
     | 
| 78 | 
         
            +
                        description="This hospital has a psychiatric ward, providing specialized mental health services.",
         
     | 
| 79 | 
         
            +
                        type="organization"
         
     | 
| 80 | 
         
            +
                    ),
         
     | 
| 81 | 
         
            +
             
     | 
| 82 | 
         
            +
                    Resource(
         
     | 
| 83 | 
         
            +
                        id=str(uuid.uuid4()),
         
     | 
| 84 | 
         
            +
                        name="Bhutan Board for Certified Counselors (BBCC)",
         
     | 
| 85 | 
         
            +
                        description="Accredits and supports professional counselors in Bhutan. Promotes ethical and culturally sensitive counseling.",
         
     | 
| 86 | 
         
            +
                        phone=None,
         
     | 
| 87 | 
         
            +
                        website="https://www.counselingbhutan.com",
         
     | 
| 88 | 
         
            +
                        address="Thimphu",
         
     | 
| 89 | 
         
            +
                        type="organization"
         
     | 
| 90 | 
         
            +
                    ),
         
     | 
| 91 | 
         
            +
             
     | 
| 92 | 
         
            +
                    Resource(
         
     | 
| 93 | 
         
            +
                    id=str(uuid.uuid4()),
         
     | 
| 94 | 
         
            +
                    name="Institute of Traditional Medicine Services",
         
     | 
| 95 | 
         
            +
                    description="Provides traditional Bhutanese medical treatments, including mental and spiritual healing.",
         
     | 
| 96 | 
         
            +
                    phone=None,
         
     | 
| 97 | 
         
            +
                    website=None,
         
     | 
| 98 | 
         
            +
                    address="Langjophakha, Thimphu",
         
     | 
| 99 | 
         
            +
                    type="organization"
         
     | 
| 100 | 
         
            +
                    )
         
     | 
| 101 | 
         
            +
                ]
         
     | 
| 102 | 
         
            +
             
     | 
| 103 | 
         
            +
                try:
         
     | 
| 104 | 
         
            +
                    for resource in resources:
         
     | 
| 105 | 
         
            +
                        session.add(resource)
         
     | 
| 106 | 
         
            +
                    session.commit()
         
     | 
| 107 | 
         
            +
                    print(f"✅ Inserted {len(resources)} resources into DB.")
         
     | 
| 108 | 
         
            +
                except IntegrityError as e:
         
     | 
| 109 | 
         
            +
                    session.rollback()
         
     | 
| 110 | 
         
            +
                    print(f"⚠️ Duplicate detected, skipping existing entries: {str(e)}")
         
     | 
| 111 | 
         
            +
                except SQLAlchemyError as e:
         
     | 
| 112 | 
         
            +
                    session.rollback()
         
     | 
| 113 | 
         
            +
                    print(f"❌ Failed to seed: {str(e)}")
         
     | 
| 114 | 
         
            +
                finally:
         
     | 
| 115 | 
         
            +
                    session.close()
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
             
     | 
| 118 | 
         
            +
            def main():
         
     | 
| 119 | 
         
            +
                seed_resources()
         
     | 
| 120 | 
         
            +
             
     | 
| 121 | 
         
            +
             
     | 
| 122 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 123 | 
         
            +
                main()
         
     | 
    	
        scripts/db/session.py
    ADDED
    
    | 
         @@ -0,0 +1,16 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import os
         
     | 
| 2 | 
         
            +
            from sqlalchemy import create_engine
         
     | 
| 3 | 
         
            +
            from sqlalchemy.orm import sessionmaker
         
     | 
| 4 | 
         
            +
            from dotenv import load_dotenv
         
     | 
| 5 | 
         
            +
             
     | 
| 6 | 
         
            +
            load_dotenv() 
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            DATABASE_URL = (
         
     | 
| 9 | 
         
            +
                f"postgresql://{os.getenv('DATABASE_USER')}:{os.getenv('DATABASE_PASSWORD')}"
         
     | 
| 10 | 
         
            +
                f"@{os.getenv('DATABASE_HOST')}:{os.getenv('DATABASE_PORT')}/{os.getenv('DATABASE_NAME')}"
         
     | 
| 11 | 
         
            +
            )
         
     | 
| 12 | 
         
            +
             
     | 
| 13 | 
         
            +
            print("Connecting to:", DATABASE_URL)
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            engine = create_engine(DATABASE_URL, connect_args={"sslmode": "require"})
         
     | 
| 16 | 
         
            +
            SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
         
     | 
    	
        scripts/ingest/upload_articles_from_bucket.py
    ADDED
    
    | 
         @@ -0,0 +1,118 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import os
         
     | 
| 2 | 
         
            +
            import io
         
     | 
| 3 | 
         
            +
            import uuid
         
     | 
| 4 | 
         
            +
            import requests
         
     | 
| 5 | 
         
            +
            import tempfile
         
     | 
| 6 | 
         
            +
             
     | 
| 7 | 
         
            +
            from dotenv import load_dotenv
         
     | 
| 8 | 
         
            +
            import boto3
         
     | 
| 9 | 
         
            +
            from sqlalchemy.exc import SQLAlchemyError
         
     | 
| 10 | 
         
            +
             
     | 
| 11 | 
         
            +
            from backend.app.core.deps import get_config_value, get_embedding_model
         
     | 
| 12 | 
         
            +
            from backend.rag.pdf_parser import extract_text
         
     | 
| 13 | 
         
            +
            from backend.rag.embeddings import generate_embeddings
         
     | 
| 14 | 
         
            +
            from backend.rag.chunker import smart_chunk_text, create_chunk_objects
         
     | 
| 15 | 
         
            +
            from backend.scripts.db.session import SessionLocal
         
     | 
| 16 | 
         
            +
            from backend.scripts.migration_schemas.resources_models import Article, ArticleChunk
         
     | 
| 17 | 
         
            +
             
     | 
| 18 | 
         
            +
            load_dotenv()
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            # === Load ENV ===
         
     | 
| 21 | 
         
            +
            ACCESS_KEY = os.getenv("AWS_ACCESS_KEY_ID")
         
     | 
| 22 | 
         
            +
            SECRET_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
         
     | 
| 23 | 
         
            +
            REGION = os.getenv("AWS_REGION")
         
     | 
| 24 | 
         
            +
            ENDPOINT_URL = os.getenv("SUPABASE_STORAGE_ENDPOINT")
         
     | 
| 25 | 
         
            +
            BUCKET_NAME = os.getenv("SUPABASE_BUCKET")
         
     | 
| 26 | 
         
            +
            SUPABASE_STORAGE_URL = os.getenv("SUPABASE_STORAGE_URL")
         
     | 
| 27 | 
         
            +
             
     | 
| 28 | 
         
            +
            class SyncUpload:
         
     | 
| 29 | 
         
            +
                def __init__(self):
         
     | 
| 30 | 
         
            +
                    self.model = get_embedding_model()
         
     | 
| 31 | 
         
            +
                    self.embedding_dim = get_config_value("model.embedding_dim", 384)
         
     | 
| 32 | 
         
            +
                    self.file_list = self.get_articles_supabase()
         
     | 
| 33 | 
         
            +
                    self.current_articles = self.get_current_articles_psql()
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
                def get_articles_supabase(self):
         
     | 
| 36 | 
         
            +
                    s3 = boto3.client(
         
     | 
| 37 | 
         
            +
                        's3',
         
     | 
| 38 | 
         
            +
                        region_name=REGION,
         
     | 
| 39 | 
         
            +
                        endpoint_url=ENDPOINT_URL,
         
     | 
| 40 | 
         
            +
                        aws_access_key_id=ACCESS_KEY,
         
     | 
| 41 | 
         
            +
                        aws_secret_access_key=SECRET_KEY
         
     | 
| 42 | 
         
            +
                    )
         
     | 
| 43 | 
         
            +
                    response = s3.list_objects_v2(Bucket=BUCKET_NAME)
         
     | 
| 44 | 
         
            +
                    return [obj['Key'] for obj in response.get('Contents', []) if obj['Key'].endswith('.pdf')]
         
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
                def get_current_articles_psql(self):
         
     | 
| 47 | 
         
            +
                    session = SessionLocal()
         
     | 
| 48 | 
         
            +
                    try:
         
     | 
| 49 | 
         
            +
                        return [a.title for a in session.query(Article.title).all()]
         
     | 
| 50 | 
         
            +
                    finally:
         
     | 
| 51 | 
         
            +
                        session.close()
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
                def extract_text_from_bytes(self, pdf_bytesio):
         
     | 
| 54 | 
         
            +
                    with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmp:
         
     | 
| 55 | 
         
            +
                        tmp.write(pdf_bytesio.read())
         
     | 
| 56 | 
         
            +
                        tmp.flush()
         
     | 
| 57 | 
         
            +
                        return extract_text(tmp.name)
         
     | 
| 58 | 
         
            +
             
     | 
| 59 | 
         
            +
                def create_article_object(self, id, title):
         
     | 
| 60 | 
         
            +
                    return Article(id=id, title=title)
         
     | 
| 61 | 
         
            +
             
     | 
| 62 | 
         
            +
                def file_to_chunks(self, article_filename, doc_id):
         
     | 
| 63 | 
         
            +
                    article_url = f"{SUPABASE_STORAGE_URL}/v1/object/public/pdfs//{article_filename}"
         
     | 
| 64 | 
         
            +
                    print(f"[INFO] Downloading: {article_url}")
         
     | 
| 65 | 
         
            +
                    response = requests.get(article_url)
         
     | 
| 66 | 
         
            +
                    if response.status_code != 200:
         
     | 
| 67 | 
         
            +
                        print(f"[ERROR] Failed to download {article_url}")
         
     | 
| 68 | 
         
            +
                        return None
         
     | 
| 69 | 
         
            +
             
     | 
| 70 | 
         
            +
                    try:
         
     | 
| 71 | 
         
            +
                        text = self.extract_text_from_bytes(io.BytesIO(response.content))
         
     | 
| 72 | 
         
            +
                        print(f"[SUCCESS] Extracted {len(text)} characters from '{article_filename}'")
         
     | 
| 73 | 
         
            +
                        raw_chunks = smart_chunk_text(text)
         
     | 
| 74 | 
         
            +
                        chunk_objs = create_chunk_objects(doc_id=doc_id, chunks=raw_chunks)
         
     | 
| 75 | 
         
            +
                        embedded_chunks = generate_embeddings(chunk_objs)
         
     | 
| 76 | 
         
            +
                        return embedded_chunks
         
     | 
| 77 | 
         
            +
                    except Exception as e:
         
     | 
| 78 | 
         
            +
                        print(f"[ERROR] Processing failed for {article_filename}: {e}")
         
     | 
| 79 | 
         
            +
                        return None
         
     | 
| 80 | 
         
            +
             
     | 
| 81 | 
         
            +
                def articles_to_rag(self):
         
     | 
| 82 | 
         
            +
                    session = SessionLocal()
         
     | 
| 83 | 
         
            +
                    for article_file in self.file_list:
         
     | 
| 84 | 
         
            +
                        article_title = article_file.replace(".pdf", "")
         
     | 
| 85 | 
         
            +
                        if article_title in self.current_articles:
         
     | 
| 86 | 
         
            +
                            print(f"[SKIP] Already processed: {article_title}")
         
     | 
| 87 | 
         
            +
                            continue
         
     | 
| 88 | 
         
            +
             
     | 
| 89 | 
         
            +
                        doc_id = str(uuid.uuid4())
         
     | 
| 90 | 
         
            +
                        article = self.create_article_object(id=doc_id, title=article_title)
         
     | 
| 91 | 
         
            +
                        chunk_data = self.file_to_chunks(article_file, doc_id)
         
     | 
| 92 | 
         
            +
             
     | 
| 93 | 
         
            +
                        if not chunk_data:
         
     | 
| 94 | 
         
            +
                            continue
         
     | 
| 95 | 
         
            +
             
     | 
| 96 | 
         
            +
                        try:
         
     | 
| 97 | 
         
            +
                            chunks = [
         
     | 
| 98 | 
         
            +
                                ArticleChunk(
         
     | 
| 99 | 
         
            +
                                    chunk_id=c["chunk_id"],
         
     | 
| 100 | 
         
            +
                                    doc_id=c["doc_id"],
         
     | 
| 101 | 
         
            +
                                    chunk_text=c["chunk_text"],
         
     | 
| 102 | 
         
            +
                                    embedding=c["embedding"],
         
     | 
| 103 | 
         
            +
                                    keywords=c["keywords"]
         
     | 
| 104 | 
         
            +
                                ) for c in chunk_data
         
     | 
| 105 | 
         
            +
                            ]
         
     | 
| 106 | 
         
            +
             
     | 
| 107 | 
         
            +
                            session.add(article)
         
     | 
| 108 | 
         
            +
                            session.add_all(chunks)
         
     | 
| 109 | 
         
            +
                            session.commit()
         
     | 
| 110 | 
         
            +
                            print(f"[SUCCESS] Uploaded: {article_title} ({len(chunks)} chunks)")
         
     | 
| 111 | 
         
            +
                        except SQLAlchemyError as e:
         
     | 
| 112 | 
         
            +
                            session.rollback()
         
     | 
| 113 | 
         
            +
                            print(f"[ERROR] DB insert failed for {article_title}: {e}")
         
     | 
| 114 | 
         
            +
                        finally:
         
     | 
| 115 | 
         
            +
                            session.close()
         
     | 
| 116 | 
         
            +
             
     | 
| 117 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 118 | 
         
            +
                SyncUpload().articles_to_rag()
         
     | 
    	
        scripts/ingest/upload_files_to_bucket.py
    ADDED
    
    | 
         @@ -0,0 +1,73 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import os
         
     | 
| 2 | 
         
            +
            import boto3
         
     | 
| 3 | 
         
            +
            from pathlib import Path
         
     | 
| 4 | 
         
            +
            from botocore.exceptions import ClientError
         
     | 
| 5 | 
         
            +
            from tqdm import tqdm
         
     | 
| 6 | 
         
            +
            import sys
         
     | 
| 7 | 
         
            +
             
     | 
| 8 | 
         
            +
            AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
         
     | 
| 9 | 
         
            +
            AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
         
     | 
| 10 | 
         
            +
            ENDPOINT_URL = os.getenv("SUPABASE_STORAGE_ENDPOINT") 
         
     | 
| 11 | 
         
            +
            BUCKET_NAME = os.getenv("SUPABASE_BUCKET")
         
     | 
| 12 | 
         
            +
            REGION = os.getenv("AWS_REGION")
         
     | 
| 13 | 
         
            +
             
     | 
| 14 | 
         
            +
            s3 = boto3.client(
         
     | 
| 15 | 
         
            +
                "s3",
         
     | 
| 16 | 
         
            +
                aws_access_key_id=AWS_ACCESS_KEY_ID,
         
     | 
| 17 | 
         
            +
                aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
         
     | 
| 18 | 
         
            +
                endpoint_url=ENDPOINT_URL,
         
     | 
| 19 | 
         
            +
                region_name=REGION
         
     | 
| 20 | 
         
            +
            )
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
            def upload_pdfs(folder_path: str):
         
     | 
| 23 | 
         
            +
             
     | 
| 24 | 
         
            +
                response = s3.list_objects_v2(Bucket=BUCKET_NAME)
         
     | 
| 25 | 
         
            +
                file_list = [obj['Key'] for obj in response.get('Contents', []) if obj['Key'].endswith('.pdf')]
         
     | 
| 26 | 
         
            +
             
     | 
| 27 | 
         
            +
                folder = Path(folder_path)
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                if not folder.exists() or not folder.is_dir():
         
     | 
| 30 | 
         
            +
                    print("❌ Invalid folder path.")
         
     | 
| 31 | 
         
            +
                    return
         
     | 
| 32 | 
         
            +
             
     | 
| 33 | 
         
            +
                pdf_files = list(folder.glob("*.pdf"))
         
     | 
| 34 | 
         
            +
             
     | 
| 35 | 
         
            +
                if not pdf_files:
         
     | 
| 36 | 
         
            +
                    print("Folder exists, but no PDF files were found.")
         
     | 
| 37 | 
         
            +
                    return
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
                for file_path in tqdm(pdf_files):
         
     | 
| 40 | 
         
            +
                    key = file_path.name
         
     | 
| 41 | 
         
            +
                    print(f"📄 Uploading: {key}")
         
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
                    if key in file_list:
         
     | 
| 44 | 
         
            +
                        print(f"✅ {key} already exists in the bucket, skipping.")
         
     | 
| 45 | 
         
            +
                        continue
         
     | 
| 46 | 
         
            +
                    else:
         
     | 
| 47 | 
         
            +
                        try:
         
     | 
| 48 | 
         
            +
                            s3.upload_file(
         
     | 
| 49 | 
         
            +
                                Filename=str(file_path),
         
     | 
| 50 | 
         
            +
                                Bucket=BUCKET_NAME,
         
     | 
| 51 | 
         
            +
                                Key=key,
         
     | 
| 52 | 
         
            +
                                ExtraArgs={"ContentType": "application/pdf"},
         
     | 
| 53 | 
         
            +
                            )
         
     | 
| 54 | 
         
            +
                        except ClientError as e:
         
     | 
| 55 | 
         
            +
                            print(f"❌ Error uploading {key}: {e}")
         
     | 
| 56 | 
         
            +
             
     | 
| 57 | 
         
            +
                print("✅ Upload complete. Run `make sync-bucket` to process the files.")
         
     | 
| 58 | 
         
            +
             
     | 
| 59 | 
         
            +
             
     | 
| 60 | 
         
            +
             
     | 
| 61 | 
         
            +
            def main():
         
     | 
| 62 | 
         
            +
                if len(sys.argv) > 1:
         
     | 
| 63 | 
         
            +
                    folder = sys.argv[1]
         
     | 
| 64 | 
         
            +
                else:
         
     | 
| 65 | 
         
            +
                    print("No folder path provided. Using default: backend/data/rag_articles")
         
     | 
| 66 | 
         
            +
                    folder = input("📂 Enter path to folder with PDFs (normally backend/data/rag_articles)): ").strip()
         
     | 
| 67 | 
         
            +
             
     | 
| 68 | 
         
            +
                print(f"Using folder: {folder}")
         
     | 
| 69 | 
         
            +
                upload_pdfs(folder)
         
     | 
| 70 | 
         
            +
                # Your existing logic here, using `folder`
         
     | 
| 71 | 
         
            +
             
     | 
| 72 | 
         
            +
            if __name__ == "__main__":
         
     | 
| 73 | 
         
            +
                main()
         
     | 
    	
        scripts/migration_schemas/__pycache__/resources_models.cpython-310.pyc
    ADDED
    
    | 
         Binary file (2 kB). View file 
     | 
| 
         | 
    	
        scripts/migration_schemas/resources_models.py
    ADDED
    
    | 
         @@ -0,0 +1,62 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            import yaml
         
     | 
| 2 | 
         
            +
            from sqlalchemy import (
         
     | 
| 3 | 
         
            +
                Column,
         
     | 
| 4 | 
         
            +
                String,
         
     | 
| 5 | 
         
            +
                Text,
         
     | 
| 6 | 
         
            +
                ForeignKey,
         
     | 
| 7 | 
         
            +
                Date,
         
     | 
| 8 | 
         
            +
            )
         
     | 
| 9 | 
         
            +
            from sqlalchemy.dialects.postgresql import ARRAY
         
     | 
| 10 | 
         
            +
            from sqlalchemy.ext.declarative import declarative_base
         
     | 
| 11 | 
         
            +
            from pgvector.sqlalchemy import Vector
         
     | 
| 12 | 
         
            +
            from sqlalchemy.orm import relationship
         
     | 
| 13 | 
         
            +
            import os
         
     | 
| 14 | 
         
            +
            from backend.app.core.deps import EMBEDDING_DIM as embedding_dim
         
     | 
| 15 | 
         
            +
            from sqlalchemy import UniqueConstraint
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            Base = declarative_base()
         
     | 
| 18 | 
         
            +
             
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
            class Resource(Base):
         
     | 
| 21 | 
         
            +
                __tablename__ = "resources"
         
     | 
| 22 | 
         
            +
             
     | 
| 23 | 
         
            +
                id = Column(String, primary_key=True)
         
     | 
| 24 | 
         
            +
                name = Column(String, nullable=False)
         
     | 
| 25 | 
         
            +
                description = Column(Text)
         
     | 
| 26 | 
         
            +
                phone = Column(String)
         
     | 
| 27 | 
         
            +
                website = Column(String)
         
     | 
| 28 | 
         
            +
                address = Column(String)
         
     | 
| 29 | 
         
            +
                operation_hours = Column(String)
         
     | 
| 30 | 
         
            +
                category = Column(String)  # e.g., "mental_health", "addiction", etc.
         
     | 
| 31 | 
         
            +
                type = Column(String, nullable=False)  # e.g., "helpline", "organization"
         
     | 
| 32 | 
         
            +
                source = Column(String)
         
     | 
| 33 | 
         
            +
             
     | 
| 34 | 
         
            +
                __table_args__ = (
         
     | 
| 35 | 
         
            +
                    UniqueConstraint("name", name="uq_resource_name"),
         
     | 
| 36 | 
         
            +
                )
         
     | 
| 37 | 
         
            +
             
     | 
| 38 | 
         
            +
            class Article(Base):
         
     | 
| 39 | 
         
            +
                __tablename__ = "articles"
         
     | 
| 40 | 
         
            +
             
     | 
| 41 | 
         
            +
                id = Column(String, primary_key=True, unique=True)
         
     | 
| 42 | 
         
            +
                title = Column(String, nullable=False, unique=True)
         
     | 
| 43 | 
         
            +
                author = Column(String)
         
     | 
| 44 | 
         
            +
                source = Column(String)
         
     | 
| 45 | 
         
            +
                published_date = Column(Date)
         
     | 
| 46 | 
         
            +
                topic = Column(String)
         
     | 
| 47 | 
         
            +
             
     | 
| 48 | 
         
            +
                chunks = relationship("ArticleChunk", back_populates="article")
         
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
            class ArticleChunk(Base):
         
     | 
| 51 | 
         
            +
                __tablename__ = "article_chunks"
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
                chunk_id = Column(String, primary_key=True, unique=True)
         
     | 
| 54 | 
         
            +
                doc_id = Column(String, ForeignKey("articles.id"), nullable=False)
         
     | 
| 55 | 
         
            +
                chunk_text = Column(Text, nullable=False)
         
     | 
| 56 | 
         
            +
                embedding = Column(Vector(embedding_dim))
         
     | 
| 57 | 
         
            +
                keywords = Column(Text) 
         
     | 
| 58 | 
         
            +
             
     | 
| 59 | 
         
            +
                article = relationship("Article", back_populates="chunks")                    
         
     | 
| 60 | 
         
            +
             
     | 
| 61 | 
         
            +
              
         
     | 
| 62 | 
         
            +
              
         
     | 
    	
        scripts/migration_schemas/user_models.py
    ADDED
    
    | 
         @@ -0,0 +1,54 @@ 
     | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
| 
         | 
|
| 1 | 
         
            +
            from sqlalchemy import (
         
     | 
| 2 | 
         
            +
                Column,
         
     | 
| 3 | 
         
            +
                String,
         
     | 
| 4 | 
         
            +
                Integer,
         
     | 
| 5 | 
         
            +
                Text,
         
     | 
| 6 | 
         
            +
                TIMESTAMP,
         
     | 
| 7 | 
         
            +
                CheckConstraint,
         
     | 
| 8 | 
         
            +
                ForeignKey,
         
     | 
| 9 | 
         
            +
                func,
         
     | 
| 10 | 
         
            +
            )
         
     | 
| 11 | 
         
            +
            from sqlalchemy.dialects.postgresql import UUID
         
     | 
| 12 | 
         
            +
            from sqlalchemy.ext.declarative import declarative_base
         
     | 
| 13 | 
         
            +
            import uuid
         
     | 
| 14 | 
         
            +
             
     | 
| 15 | 
         
            +
            Base = declarative_base()
         
     | 
| 16 | 
         
            +
             
     | 
| 17 | 
         
            +
            class UserProfile(Base):
         
     | 
| 18 | 
         
            +
                __tablename__ = "user_profiles"
         
     | 
| 19 | 
         
            +
             
     | 
| 20 | 
         
            +
                id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
         
     | 
| 21 | 
         
            +
             
     | 
| 22 | 
         
            +
                name = Column(String(100), nullable=False)
         
     | 
| 23 | 
         
            +
                age = Column(Integer, CheckConstraint("age > 0 AND age <= 150"))
         
     | 
| 24 | 
         
            +
                
         
     | 
| 25 | 
         
            +
                gender = Column(String(20), CheckConstraint(
         
     | 
| 26 | 
         
            +
                    "gender IN ('Male', 'Female', 'Non-binary', 'Other', 'Prefer not to say')"
         
     | 
| 27 | 
         
            +
                ))
         
     | 
| 28 | 
         
            +
             
     | 
| 29 | 
         
            +
                city_region = Column(String(100))
         
     | 
| 30 | 
         
            +
                profession = Column(String(100))
         
     | 
| 31 | 
         
            +
             
     | 
| 32 | 
         
            +
                marital_status = Column(String(30), CheckConstraint(
         
     | 
| 33 | 
         
            +
                    "marital_status IN ('Single', 'In relationship', 'Married', 'Divorced', 'Widowed', 'Other', 'Prefer not to say')"
         
     | 
| 34 | 
         
            +
                ))
         
     | 
| 35 | 
         
            +
             
     | 
| 36 | 
         
            +
                previous_mental_diagnosis = Column(Text, default="NA")
         
     | 
| 37 | 
         
            +
                ethnicity = Column(String(50))
         
     | 
| 38 | 
         
            +
             
     | 
| 39 | 
         
            +
                created_at = Column(TIMESTAMP(timezone=True), server_default=func.now())
         
     | 
| 40 | 
         
            +
                updated_at = Column(TIMESTAMP(timezone=True), server_default=func.now(), onupdate=func.now())
         
     | 
| 41 | 
         
            +
             
     | 
| 42 | 
         
            +
             
     | 
| 43 | 
         
            +
            class ConversationHistory(Base):
         
     | 
| 44 | 
         
            +
                __tablename__ = "conversation_history"
         
     | 
| 45 | 
         
            +
             
     | 
| 46 | 
         
            +
                id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
         
     | 
| 47 | 
         
            +
             
     | 
| 48 | 
         
            +
                user_id = Column(UUID(as_uuid=True), ForeignKey("user_profiles.id", ondelete="CASCADE"), nullable=False)
         
     | 
| 49 | 
         
            +
             
     | 
| 50 | 
         
            +
                message = Column(Text, nullable=False)
         
     | 
| 51 | 
         
            +
                response = Column(Text, nullable=False)
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
                timestamp = Column(TIMESTAMP(timezone=True), server_default=func.now())
         
     | 
| 54 | 
         
            +
             
     |