harshnarayan12 commited on
Commit
5177e12
·
verified ·
1 Parent(s): 4a2c93c

Upload 29 files

Browse files
__pycache__/Chat_sentiment_analysis.cpython-311.pyc ADDED
Binary file (19.7 kB). View file
 
__pycache__/app.cpython-312.pyc ADDED
Binary file (15.2 kB). View file
 
__pycache__/config_manager.cpython-311.pyc ADDED
Binary file (12.5 kB). View file
 
__pycache__/dia.cpython-311.pyc ADDED
Binary file (704 Bytes). View file
 
__pycache__/fastapi_app.cpython-311.pyc ADDED
Binary file (68.6 kB). View file
 
__pycache__/fastapi_app.cpython-312.pyc ADDED
Binary file (29.5 kB). View file
 
__pycache__/fastapi_test.cpython-311.pyc ADDED
Binary file (14 kB). View file
 
__pycache__/main.cpython-311.pyc ADDED
Binary file (76.7 kB). View file
 
__pycache__/test_dia.cpython-311.pyc ADDED
Binary file (714 Bytes). View file
 
models/__init__.py ADDED
File without changes
models/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (186 Bytes). View file
 
models/__pycache__/chat_session.cpython-311.pyc ADDED
Binary file (5.66 kB). View file
 
models/__pycache__/user.cpython-311.pyc ADDED
Binary file (2.91 kB). View file
 
models/chat_session.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datetime import datetime
2
+ from pathlib import Path
3
+ import json
4
+ import uuid
5
+
6
+ class ChatSession:
7
+ def __init__(self, user_name, session_id=None):
8
+ self.session_id = session_id or str(uuid.uuid4())
9
+ self.user_name = user_name
10
+ self.start_time = datetime.now()
11
+ self.messages = []
12
+ self.session_data = {
13
+ "emotion": None,
14
+ "mental_health_status": None,
15
+ "topics_discussed": set(),
16
+ "agents_used": set(),
17
+ "sentiment_scores": []
18
+ }
19
+
20
+ def add_message(self, role, content, agent=None, metadata=None):
21
+ message = {
22
+ "timestamp": datetime.now().isoformat(),
23
+ "role": role,
24
+ "content": content,
25
+ "agent": agent,
26
+ "metadata": metadata or {}
27
+ }
28
+ self.messages.append(message)
29
+
30
+ if agent and hasattr(self.session_data["agents_used"], 'add'):
31
+ self.session_data["agents_used"].add(agent)
32
+
33
+ def add_topic(self, topic):
34
+ """Safely add a topic"""
35
+ if hasattr(self.session_data["topics_discussed"], 'add'):
36
+ self.session_data["topics_discussed"].add(topic)
37
+ else:
38
+ # If it's a list, convert to set first
39
+ topics = set(self.session_data.get("topics_discussed", []))
40
+ topics.add(topic)
41
+ self.session_data["topics_discussed"] = topics
42
+
43
+ def to_dict(self):
44
+ return {
45
+ "session_id": self.session_id,
46
+ "user_name": self.user_name,
47
+ "start_time": self.start_time.isoformat(),
48
+ "end_time": datetime.now().isoformat(),
49
+ "duration_minutes": (datetime.now() - self.start_time).seconds // 60,
50
+ "messages": self.messages,
51
+ "total_messages": len(self.messages),
52
+ "session_data": {
53
+ **self.session_data,
54
+ "topics_discussed": list(self.session_data["topics_discussed"]) if isinstance(self.session_data["topics_discussed"], set) else self.session_data["topics_discussed"],
55
+ "agents_used": list(self.session_data["agents_used"]) if isinstance(self.session_data["agents_used"], set) else self.session_data["agents_used"]
56
+ }
57
+ }
58
+
59
+ @classmethod
60
+ def from_dict(cls, data, user_name=None):
61
+ """Create a ChatSession from dictionary data"""
62
+ session = cls(user_name or data.get('user_name', 'Guest'), data.get('session_id'))
63
+ session.messages = data.get('messages', [])
64
+
65
+ # Reconstruct session_data with proper types
66
+ stored_data = data.get('session_data', {})
67
+ session.session_data = {
68
+ "emotion": stored_data.get('emotion'),
69
+ "mental_health_status": stored_data.get('mental_health_status'),
70
+ "topics_discussed": set(stored_data.get('topics_discussed', [])),
71
+ "agents_used": set(stored_data.get('agents_used', [])),
72
+ "sentiment_scores": stored_data.get('sentiment_scores', [])
73
+ }
74
+
75
+ return session
76
+
77
+ def save(self, directory="chat_sessions"):
78
+ Path(directory).mkdir(exist_ok=True)
79
+ filename = f"{directory}/chat_{self.user_name}_{self.start_time.strftime('%Y%m%d_%H%M%S')}_{self.session_id[:8]}.json"
80
+
81
+ with open(filename, 'w') as f:
82
+ json.dump(self.to_dict(), f, indent=2)
83
+
84
+ return filename
models/user.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import create_engine, Column, Integer, String, DateTime, Boolean, Float, Text
2
+ from sqlalchemy.ext.declarative import declarative_base
3
+ from sqlalchemy.orm import sessionmaker
4
+ from werkzeug.security import generate_password_hash, check_password_hash
5
+ from datetime import datetime
6
+ import secrets
7
+
8
+ Base = declarative_base()
9
+
10
+ class User(Base):
11
+ __tablename__ = 'users'
12
+
13
+ id = Column(Integer, primary_key=True)
14
+ username = Column(String(80), unique=True, nullable=False)
15
+ email = Column(String(120), unique=True, nullable=False)
16
+ password_hash = Column(String(255), nullable=False)
17
+ full_name = Column(String(100))
18
+ created_at = Column(DateTime, default=datetime.utcnow)
19
+ last_login = Column(DateTime)
20
+ is_active = Column(Boolean, default=True)
21
+ is_verified = Column(Boolean, default=False)
22
+
23
+ # Profile completion status
24
+ has_completed_initial_survey = Column(Boolean, default=False)
25
+ initial_survey_date = Column(DateTime)
26
+
27
+ # Session management
28
+ session_token = Column(String(255))
29
+
30
+ def set_password(self, password):
31
+ self.password_hash = generate_password_hash(password)
32
+
33
+ def check_password(self, password):
34
+ return check_password_hash(self.password_hash, password)
35
+
36
+ def generate_session_token(self):
37
+ self.session_token = secrets.token_urlsafe(32)
38
+ return self.session_token
39
+
40
+ # Create database
41
+ engine = create_engine('sqlite:///mental_health_app.db')
42
+ Base.metadata.create_all(engine)
43
+ Session = sessionmaker(bind=engine)
scripts/__init__.py ADDED
File without changes
scripts/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (182 Bytes). View file
 
scripts/db/__pycache__/session.cpython-310.pyc ADDED
Binary file (709 Bytes). View file
 
scripts/db/create_article_tables.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.scripts.migration_schemas.resources_models import Base
2
+ from backend.scripts.db.session import engine
3
+ from sqlalchemy import inspect
4
+
5
+ def create_tables():
6
+ inspector = inspect(engine)
7
+ existing_tables = inspector.get_table_names()
8
+
9
+ Base.metadata.create_all(bind=engine)
10
+
11
+ updated_tables = inspector.get_table_names()
12
+ new_tables = set(updated_tables) - set(existing_tables)
13
+
14
+ if new_tables:
15
+ print(f"[INFO] Created new article tables: {', '.join(new_tables)}")
16
+ else:
17
+ print("[INFO] No new article tables created. All tables already exist.")
18
+
19
+ if __name__ == "__main__":
20
+ create_tables()
scripts/db/create_extensions.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import text
2
+ from backend.scripts.db.session import engine
3
+
4
+ def create_vector_extension():
5
+ with engine.connect() as conn:
6
+ conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA extensions;"))
7
+ print("[INFO] 'vector' extension created if not present.")
8
+
9
+ if __name__ == "__main__":
10
+ create_vector_extension()
scripts/db/create_user_tables.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from backend.scripts.migration_schemas.user_models import Base
2
+ from backend.scripts.db.session import engine
3
+ from sqlalchemy import inspect
4
+
5
+ def create_tables():
6
+ inspector = inspect(engine)
7
+ existing_tables = inspector.get_table_names()
8
+
9
+ # This will create all tables defined on Base.metadata that don't yet exist
10
+ Base.metadata.create_all(bind=engine)
11
+
12
+ updated_tables = inspector.get_table_names()
13
+ new_tables = set(updated_tables) - set(existing_tables)
14
+
15
+ if new_tables:
16
+ print(f"[INFO] Created new tables: {', '.join(new_tables)}")
17
+ else:
18
+ print("[INFO] No new tables created. All tables already exist.")
19
+
20
+ if __name__ == "__main__":
21
+ create_tables()
scripts/db/encrypt_passwords.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import psycopg2
3
+ import bcrypt
4
+ from psycopg2.extras import RealDictCursor
5
+ from dotenv import load_dotenv
6
+ from backend.scripts.db.session import DATABASE_URL
7
+ import traceback
8
+
9
+
10
+ def get_db_connection():
11
+ return psycopg2.connect(DATABASE_URL, cursor_factory=RealDictCursor)
12
+
13
+ def hash_unencrypted_passwords():
14
+ conn = cursor = None
15
+ try:
16
+ conn = get_db_connection()
17
+ cursor = conn.cursor()
18
+
19
+ # Find rows with non-bcrypt passwords (e.g. not starting with $2)
20
+ cursor.execute("""
21
+ SELECT id, user_password
22
+ FROM user_profiles
23
+ WHERE user_password NOT LIKE '$2%';
24
+ """)
25
+ users = cursor.fetchall()
26
+
27
+ print(f"Found {len(users)} users with unencrypted passwords.")
28
+
29
+ for user in users:
30
+ user_id = user['id']
31
+ raw_password = user['user_password']
32
+
33
+ # Hash the plaintext password
34
+ hashed_pw = bcrypt.hashpw(raw_password.encode(), bcrypt.gensalt()).decode()
35
+
36
+ # Update the row with the hashed password
37
+ cursor.execute("""
38
+ UPDATE user_profiles
39
+ SET user_password = %s
40
+ WHERE id = %s
41
+ """, (hashed_pw, user_id))
42
+
43
+ conn.commit()
44
+ print("Password hashing complete.")
45
+
46
+ except Exception as e:
47
+ print("[ERROR]", e)
48
+ traceback.print_exc()
49
+ if conn:
50
+ conn.rollback()
51
+ finally:
52
+ if cursor:
53
+ cursor.close()
54
+ if conn:
55
+ conn.close()
56
+
57
+ if __name__ == "__main__":
58
+ hash_unencrypted_passwords()
scripts/db/seed_data_helplines_organizations.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uuid
2
+ import numpy as np
3
+ from sqlalchemy.exc import IntegrityError, SQLAlchemyError
4
+
5
+ from backend.scripts.db.session import SessionLocal
6
+ from backend.scripts.migration_schemas.resources_models import Resource
7
+
8
+ def seed_resources():
9
+ session = SessionLocal()
10
+
11
+ resources = [
12
+ Resource(
13
+ id=str(uuid.uuid4()),
14
+ name="Primary Crisis Hotline",
15
+ phone="1010",
16
+ operation_hours="24/7",
17
+ description="Operated by the Bhutan Youth Development Fund (YDF) and the Ministry of Health, this helpline offers support for mental health issues and suicide prevention.",
18
+ category="mental health",
19
+ type="helpline",
20
+ source="bhutanyouth.org",
21
+ website="https://bhutanyouth.org"
22
+ ),
23
+ Resource(
24
+ id=str(uuid.uuid4()),
25
+ name="Emergency Line",
26
+ phone="112",
27
+ operation_hours="24/7",
28
+ description="The national emergency number is for immediate assistance and is accessible via mobile and landline.",
29
+ category="emergency",
30
+ type="helpline",
31
+ source="National Emergency Services",
32
+ website="moh.gov.bt"
33
+ ),
34
+ Resource(
35
+ id=str(uuid.uuid4()),
36
+ name="Mental Health Support Line",
37
+ phone="1098",
38
+ operation_hours="24/7",
39
+ description="National helpline for children and vulnerable groups. Provides mental health support.",
40
+ category="mental health, children",
41
+ type="helpline",
42
+ source="National Helpline Directory",
43
+ ),
44
+
45
+ Resource(
46
+ id=str(uuid.uuid4()),
47
+ name="Sherig Counselling Services (MoE)",
48
+ phone="17861294",
49
+ operation_hours="Weekdays 9am–5pm",
50
+ description="Counselling helpline for students and youth, staffed by trained school counselors.",
51
+ category="youth, education, counseling",
52
+ source="moe.gov.bt",
53
+ website="https://sites.google.com/moe.gov.bt/sherigcounsellingservices",
54
+ type = "helpline"
55
+ ),
56
+
57
+ Resource(
58
+ id=str(uuid.uuid4()),
59
+ name="PEMA (Psychosocial Education and Mental Health Awareness)",
60
+ phone="1010",
61
+ website="https://thepema.gov.bt/",
62
+ description="PEMA is the national nodal agency for mental health promotion and services, offering counselling, crisis intervention, and rehabilitation. They also have a helpline and offer walk-in services.",
63
+ type="organization"
64
+ ),
65
+ Resource(
66
+ id=str(uuid.uuid4()),
67
+ name="RENEW (Respect, Educate, Nurture, and Empower Women)",
68
+ phone="+975 2 332 159",
69
+ website="https://renew.org.bt/",
70
+ description="Founded by Her Majesty Gyalyum Sangay Choden Wangchuck in 2004, RENEW is a non-profit organization supporting women.",
71
+ type="organization"
72
+ ),
73
+ Resource(
74
+ id=str(uuid.uuid4()),
75
+ name="Jigme Dorji Wangchuck National Referral Hospital",
76
+ phone="+975 17 32 24 96",
77
+ website="https://jdwnrh.gov.bt/",
78
+ description="This hospital has a psychiatric ward, providing specialized mental health services.",
79
+ type="organization"
80
+ ),
81
+
82
+ Resource(
83
+ id=str(uuid.uuid4()),
84
+ name="Bhutan Board for Certified Counselors (BBCC)",
85
+ description="Accredits and supports professional counselors in Bhutan. Promotes ethical and culturally sensitive counseling.",
86
+ phone=None,
87
+ website="https://www.counselingbhutan.com",
88
+ address="Thimphu",
89
+ type="organization"
90
+ ),
91
+
92
+ Resource(
93
+ id=str(uuid.uuid4()),
94
+ name="Institute of Traditional Medicine Services",
95
+ description="Provides traditional Bhutanese medical treatments, including mental and spiritual healing.",
96
+ phone=None,
97
+ website=None,
98
+ address="Langjophakha, Thimphu",
99
+ type="organization"
100
+ )
101
+ ]
102
+
103
+ try:
104
+ for resource in resources:
105
+ session.add(resource)
106
+ session.commit()
107
+ print(f"✅ Inserted {len(resources)} resources into DB.")
108
+ except IntegrityError as e:
109
+ session.rollback()
110
+ print(f"⚠️ Duplicate detected, skipping existing entries: {str(e)}")
111
+ except SQLAlchemyError as e:
112
+ session.rollback()
113
+ print(f"❌ Failed to seed: {str(e)}")
114
+ finally:
115
+ session.close()
116
+
117
+
118
+ def main():
119
+ seed_resources()
120
+
121
+
122
+ if __name__ == "__main__":
123
+ main()
scripts/db/session.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from sqlalchemy import create_engine
3
+ from sqlalchemy.orm import sessionmaker
4
+ from dotenv import load_dotenv
5
+
6
+ load_dotenv()
7
+
8
+ DATABASE_URL = (
9
+ f"postgresql://{os.getenv('DATABASE_USER')}:{os.getenv('DATABASE_PASSWORD')}"
10
+ f"@{os.getenv('DATABASE_HOST')}:{os.getenv('DATABASE_PORT')}/{os.getenv('DATABASE_NAME')}"
11
+ )
12
+
13
+ print("Connecting to:", DATABASE_URL)
14
+
15
+ engine = create_engine(DATABASE_URL, connect_args={"sslmode": "require"})
16
+ SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
scripts/ingest/upload_articles_from_bucket.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import io
3
+ import uuid
4
+ import requests
5
+ import tempfile
6
+
7
+ from dotenv import load_dotenv
8
+ import boto3
9
+ from sqlalchemy.exc import SQLAlchemyError
10
+
11
+ from backend.app.core.deps import get_config_value, get_embedding_model
12
+ from backend.rag.pdf_parser import extract_text
13
+ from backend.rag.embeddings import generate_embeddings
14
+ from backend.rag.chunker import smart_chunk_text, create_chunk_objects
15
+ from backend.scripts.db.session import SessionLocal
16
+ from backend.scripts.migration_schemas.resources_models import Article, ArticleChunk
17
+
18
+ load_dotenv()
19
+
20
+ # === Load ENV ===
21
+ ACCESS_KEY = os.getenv("AWS_ACCESS_KEY_ID")
22
+ SECRET_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
23
+ REGION = os.getenv("AWS_REGION")
24
+ ENDPOINT_URL = os.getenv("SUPABASE_STORAGE_ENDPOINT")
25
+ BUCKET_NAME = os.getenv("SUPABASE_BUCKET")
26
+ SUPABASE_STORAGE_URL = os.getenv("SUPABASE_STORAGE_URL")
27
+
28
+ class SyncUpload:
29
+ def __init__(self):
30
+ self.model = get_embedding_model()
31
+ self.embedding_dim = get_config_value("model.embedding_dim", 384)
32
+ self.file_list = self.get_articles_supabase()
33
+ self.current_articles = self.get_current_articles_psql()
34
+
35
+ def get_articles_supabase(self):
36
+ s3 = boto3.client(
37
+ 's3',
38
+ region_name=REGION,
39
+ endpoint_url=ENDPOINT_URL,
40
+ aws_access_key_id=ACCESS_KEY,
41
+ aws_secret_access_key=SECRET_KEY
42
+ )
43
+ response = s3.list_objects_v2(Bucket=BUCKET_NAME)
44
+ return [obj['Key'] for obj in response.get('Contents', []) if obj['Key'].endswith('.pdf')]
45
+
46
+ def get_current_articles_psql(self):
47
+ session = SessionLocal()
48
+ try:
49
+ return [a.title for a in session.query(Article.title).all()]
50
+ finally:
51
+ session.close()
52
+
53
+ def extract_text_from_bytes(self, pdf_bytesio):
54
+ with tempfile.NamedTemporaryFile(suffix=".pdf", delete=True) as tmp:
55
+ tmp.write(pdf_bytesio.read())
56
+ tmp.flush()
57
+ return extract_text(tmp.name)
58
+
59
+ def create_article_object(self, id, title):
60
+ return Article(id=id, title=title)
61
+
62
+ def file_to_chunks(self, article_filename, doc_id):
63
+ article_url = f"{SUPABASE_STORAGE_URL}/v1/object/public/pdfs//{article_filename}"
64
+ print(f"[INFO] Downloading: {article_url}")
65
+ response = requests.get(article_url)
66
+ if response.status_code != 200:
67
+ print(f"[ERROR] Failed to download {article_url}")
68
+ return None
69
+
70
+ try:
71
+ text = self.extract_text_from_bytes(io.BytesIO(response.content))
72
+ print(f"[SUCCESS] Extracted {len(text)} characters from '{article_filename}'")
73
+ raw_chunks = smart_chunk_text(text)
74
+ chunk_objs = create_chunk_objects(doc_id=doc_id, chunks=raw_chunks)
75
+ embedded_chunks = generate_embeddings(chunk_objs)
76
+ return embedded_chunks
77
+ except Exception as e:
78
+ print(f"[ERROR] Processing failed for {article_filename}: {e}")
79
+ return None
80
+
81
+ def articles_to_rag(self):
82
+ session = SessionLocal()
83
+ for article_file in self.file_list:
84
+ article_title = article_file.replace(".pdf", "")
85
+ if article_title in self.current_articles:
86
+ print(f"[SKIP] Already processed: {article_title}")
87
+ continue
88
+
89
+ doc_id = str(uuid.uuid4())
90
+ article = self.create_article_object(id=doc_id, title=article_title)
91
+ chunk_data = self.file_to_chunks(article_file, doc_id)
92
+
93
+ if not chunk_data:
94
+ continue
95
+
96
+ try:
97
+ chunks = [
98
+ ArticleChunk(
99
+ chunk_id=c["chunk_id"],
100
+ doc_id=c["doc_id"],
101
+ chunk_text=c["chunk_text"],
102
+ embedding=c["embedding"],
103
+ keywords=c["keywords"]
104
+ ) for c in chunk_data
105
+ ]
106
+
107
+ session.add(article)
108
+ session.add_all(chunks)
109
+ session.commit()
110
+ print(f"[SUCCESS] Uploaded: {article_title} ({len(chunks)} chunks)")
111
+ except SQLAlchemyError as e:
112
+ session.rollback()
113
+ print(f"[ERROR] DB insert failed for {article_title}: {e}")
114
+ finally:
115
+ session.close()
116
+
117
+ if __name__ == "__main__":
118
+ SyncUpload().articles_to_rag()
scripts/ingest/upload_files_to_bucket.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import boto3
3
+ from pathlib import Path
4
+ from botocore.exceptions import ClientError
5
+ from tqdm import tqdm
6
+ import sys
7
+
8
+ AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
9
+ AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
10
+ ENDPOINT_URL = os.getenv("SUPABASE_STORAGE_ENDPOINT")
11
+ BUCKET_NAME = os.getenv("SUPABASE_BUCKET")
12
+ REGION = os.getenv("AWS_REGION")
13
+
14
+ s3 = boto3.client(
15
+ "s3",
16
+ aws_access_key_id=AWS_ACCESS_KEY_ID,
17
+ aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
18
+ endpoint_url=ENDPOINT_URL,
19
+ region_name=REGION
20
+ )
21
+
22
+ def upload_pdfs(folder_path: str):
23
+
24
+ response = s3.list_objects_v2(Bucket=BUCKET_NAME)
25
+ file_list = [obj['Key'] for obj in response.get('Contents', []) if obj['Key'].endswith('.pdf')]
26
+
27
+ folder = Path(folder_path)
28
+
29
+ if not folder.exists() or not folder.is_dir():
30
+ print("❌ Invalid folder path.")
31
+ return
32
+
33
+ pdf_files = list(folder.glob("*.pdf"))
34
+
35
+ if not pdf_files:
36
+ print("Folder exists, but no PDF files were found.")
37
+ return
38
+
39
+ for file_path in tqdm(pdf_files):
40
+ key = file_path.name
41
+ print(f"📄 Uploading: {key}")
42
+
43
+ if key in file_list:
44
+ print(f"✅ {key} already exists in the bucket, skipping.")
45
+ continue
46
+ else:
47
+ try:
48
+ s3.upload_file(
49
+ Filename=str(file_path),
50
+ Bucket=BUCKET_NAME,
51
+ Key=key,
52
+ ExtraArgs={"ContentType": "application/pdf"},
53
+ )
54
+ except ClientError as e:
55
+ print(f"❌ Error uploading {key}: {e}")
56
+
57
+ print("✅ Upload complete. Run `make sync-bucket` to process the files.")
58
+
59
+
60
+
61
+ def main():
62
+ if len(sys.argv) > 1:
63
+ folder = sys.argv[1]
64
+ else:
65
+ print("No folder path provided. Using default: backend/data/rag_articles")
66
+ folder = input("📂 Enter path to folder with PDFs (normally backend/data/rag_articles)): ").strip()
67
+
68
+ print(f"Using folder: {folder}")
69
+ upload_pdfs(folder)
70
+ # Your existing logic here, using `folder`
71
+
72
+ if __name__ == "__main__":
73
+ main()
scripts/migration_schemas/__pycache__/resources_models.cpython-310.pyc ADDED
Binary file (2 kB). View file
 
scripts/migration_schemas/resources_models.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yaml
2
+ from sqlalchemy import (
3
+ Column,
4
+ String,
5
+ Text,
6
+ ForeignKey,
7
+ Date,
8
+ )
9
+ from sqlalchemy.dialects.postgresql import ARRAY
10
+ from sqlalchemy.ext.declarative import declarative_base
11
+ from pgvector.sqlalchemy import Vector
12
+ from sqlalchemy.orm import relationship
13
+ import os
14
+ from backend.app.core.deps import EMBEDDING_DIM as embedding_dim
15
+ from sqlalchemy import UniqueConstraint
16
+
17
+ Base = declarative_base()
18
+
19
+
20
+ class Resource(Base):
21
+ __tablename__ = "resources"
22
+
23
+ id = Column(String, primary_key=True)
24
+ name = Column(String, nullable=False)
25
+ description = Column(Text)
26
+ phone = Column(String)
27
+ website = Column(String)
28
+ address = Column(String)
29
+ operation_hours = Column(String)
30
+ category = Column(String) # e.g., "mental_health", "addiction", etc.
31
+ type = Column(String, nullable=False) # e.g., "helpline", "organization"
32
+ source = Column(String)
33
+
34
+ __table_args__ = (
35
+ UniqueConstraint("name", name="uq_resource_name"),
36
+ )
37
+
38
+ class Article(Base):
39
+ __tablename__ = "articles"
40
+
41
+ id = Column(String, primary_key=True, unique=True)
42
+ title = Column(String, nullable=False, unique=True)
43
+ author = Column(String)
44
+ source = Column(String)
45
+ published_date = Column(Date)
46
+ topic = Column(String)
47
+
48
+ chunks = relationship("ArticleChunk", back_populates="article")
49
+
50
+ class ArticleChunk(Base):
51
+ __tablename__ = "article_chunks"
52
+
53
+ chunk_id = Column(String, primary_key=True, unique=True)
54
+ doc_id = Column(String, ForeignKey("articles.id"), nullable=False)
55
+ chunk_text = Column(Text, nullable=False)
56
+ embedding = Column(Vector(embedding_dim))
57
+ keywords = Column(Text)
58
+
59
+ article = relationship("Article", back_populates="chunks")
60
+
61
+
62
+
scripts/migration_schemas/user_models.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sqlalchemy import (
2
+ Column,
3
+ String,
4
+ Integer,
5
+ Text,
6
+ TIMESTAMP,
7
+ CheckConstraint,
8
+ ForeignKey,
9
+ func,
10
+ )
11
+ from sqlalchemy.dialects.postgresql import UUID
12
+ from sqlalchemy.ext.declarative import declarative_base
13
+ import uuid
14
+
15
+ Base = declarative_base()
16
+
17
+ class UserProfile(Base):
18
+ __tablename__ = "user_profiles"
19
+
20
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
21
+
22
+ name = Column(String(100), nullable=False)
23
+ age = Column(Integer, CheckConstraint("age > 0 AND age <= 150"))
24
+
25
+ gender = Column(String(20), CheckConstraint(
26
+ "gender IN ('Male', 'Female', 'Non-binary', 'Other', 'Prefer not to say')"
27
+ ))
28
+
29
+ city_region = Column(String(100))
30
+ profession = Column(String(100))
31
+
32
+ marital_status = Column(String(30), CheckConstraint(
33
+ "marital_status IN ('Single', 'In relationship', 'Married', 'Divorced', 'Widowed', 'Other', 'Prefer not to say')"
34
+ ))
35
+
36
+ previous_mental_diagnosis = Column(Text, default="NA")
37
+ ethnicity = Column(String(50))
38
+
39
+ created_at = Column(TIMESTAMP(timezone=True), server_default=func.now())
40
+ updated_at = Column(TIMESTAMP(timezone=True), server_default=func.now(), onupdate=func.now())
41
+
42
+
43
+ class ConversationHistory(Base):
44
+ __tablename__ = "conversation_history"
45
+
46
+ id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4)
47
+
48
+ user_id = Column(UUID(as_uuid=True), ForeignKey("user_profiles.id", ondelete="CASCADE"), nullable=False)
49
+
50
+ message = Column(Text, nullable=False)
51
+ response = Column(Text, nullable=False)
52
+
53
+ timestamp = Column(TIMESTAMP(timezone=True), server_default=func.now())
54
+