samu commited on
Commit
2832da8
·
1 Parent(s): 77e56ff

improved backend

Browse files
Files changed (32) hide show
  1. backend/__pycache__/cache.cpython-310.pyc +0 -0
  2. backend/__pycache__/cache.cpython-312.pyc +0 -0
  3. backend/__pycache__/{config.cpython-310.pyc → config.cpython-311.pyc} +0 -0
  4. backend/__pycache__/config.cpython-312.pyc +0 -0
  5. backend/__pycache__/content_generator.cpython-311.pyc +0 -0
  6. backend/__pycache__/content_generator.cpython-312.pyc +0 -0
  7. backend/__pycache__/database.cpython-310.pyc +0 -0
  8. backend/__pycache__/database.cpython-312.pyc +0 -0
  9. backend/__pycache__/db.cpython-311.pyc +0 -0
  10. backend/__pycache__/db.cpython-312.pyc +0 -0
  11. backend/__pycache__/db_cache.cpython-311.pyc +0 -0
  12. backend/__pycache__/db_cache.cpython-312.pyc +0 -0
  13. backend/__pycache__/db_init.cpython-311.pyc +0 -0
  14. backend/__pycache__/db_init.cpython-312.pyc +0 -0
  15. backend/__pycache__/main.cpython-310.pyc +0 -0
  16. backend/__pycache__/main.cpython-311.pyc +0 -0
  17. backend/__pycache__/main.cpython-312.pyc +0 -0
  18. backend/config.py +8 -7
  19. backend/content_generator.py +295 -0
  20. backend/database_init.py +65 -0
  21. backend/db.py +434 -0
  22. backend/db_cache.py +101 -0
  23. backend/db_init.py +259 -0
  24. backend/main.py +405 -50
  25. backend/schema.sql +98 -0
  26. backend/utils/__pycache__/generate_completions.cpython-310.pyc +0 -0
  27. backend/utils/__pycache__/generate_completions.cpython-311.pyc +0 -0
  28. backend/utils/__pycache__/generate_completions.cpython-312.pyc +0 -0
  29. backend/utils/__pycache__/handlers.cpython-310.pyc +0 -0
  30. backend/utils/__pycache__/handlers.cpython-312.pyc +0 -0
  31. backend/utils/generate_completions.py +2 -2
  32. backend/utils/handlers.py +18 -7
backend/__pycache__/cache.cpython-310.pyc DELETED
Binary file (1.15 kB)
 
backend/__pycache__/cache.cpython-312.pyc DELETED
Binary file (2.24 kB)
 
backend/__pycache__/{config.cpython-310.pyc → config.cpython-311.pyc} RENAMED
Binary files a/backend/__pycache__/config.cpython-310.pyc and b/backend/__pycache__/config.cpython-311.pyc differ
 
backend/__pycache__/config.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/config.cpython-312.pyc and b/backend/__pycache__/config.cpython-312.pyc differ
 
backend/__pycache__/content_generator.cpython-311.pyc ADDED
Binary file (12.3 kB). View file
 
backend/__pycache__/content_generator.cpython-312.pyc ADDED
Binary file (11 kB). View file
 
backend/__pycache__/database.cpython-310.pyc DELETED
Binary file (10.1 kB)
 
backend/__pycache__/database.cpython-312.pyc DELETED
Binary file (12.6 kB)
 
backend/__pycache__/db.cpython-311.pyc ADDED
Binary file (31 kB). View file
 
backend/__pycache__/db.cpython-312.pyc ADDED
Binary file (25.5 kB). View file
 
backend/__pycache__/db_cache.cpython-311.pyc ADDED
Binary file (7.31 kB). View file
 
backend/__pycache__/db_cache.cpython-312.pyc ADDED
Binary file (6.4 kB). View file
 
backend/__pycache__/db_init.cpython-311.pyc ADDED
Binary file (15.7 kB). View file
 
backend/__pycache__/db_init.cpython-312.pyc ADDED
Binary file (14 kB). View file
 
backend/__pycache__/main.cpython-310.pyc DELETED
Binary file (3.15 kB)
 
backend/__pycache__/main.cpython-311.pyc ADDED
Binary file (20.3 kB). View file
 
backend/__pycache__/main.cpython-312.pyc CHANGED
Binary files a/backend/__pycache__/main.cpython-312.pyc and b/backend/__pycache__/main.cpython-312.pyc differ
 
backend/config.py CHANGED
@@ -48,7 +48,7 @@ curriculum_instructions = """
48
  You are an AI-powered language learning assistant tasked with generating an extensive, personalized curriculum. Your goal is to help the user learn {target_language} by designing a 25-lesson curriculum that reflects the user's goals, interests, and proficiency level. All outputs should be written in {native_language}.
49
 
50
  ### Curriculum Goals:
51
- - Provide 25 lessons.
52
  - Ensure logical progression from basic to advanced topics (according to {proficiency}).
53
  - Align each lesson with a practical communication goal.
54
  - Tailor vocabulary and sub-topics to the user’s intended use (e.g., work, travel, hobbies, daily life).
@@ -57,15 +57,15 @@ You are an AI-powered language learning assistant tasked with generating an exte
57
 
58
  1. **Define the Lesson Series (Overall Theme):**
59
  - Choose a main theme relevant to the user's motivation for learning {target_language} (e.g., "Living in a new country", "Professional communication", "Traveling in {target_language}-speaking regions").
60
- - The theme should guide the tone, content, and scope of the entire 25-lesson sequence.
61
 
62
- 2. **Divide the Curriculum into 25 Thematic Lessons:**
63
  - Each lesson should have a clear focus (e.g., asking for help, describing your job, booking accommodation).
64
  - Sequence lessons to build from foundational topics to more complex, specialized language use.
65
  - Vary grammar, vocabulary, and communication functions across lessons to avoid repetition and ensure comprehensive coverage.
66
 
67
  3. **Describe Each Lesson Clearly and Concisely:**
68
- For each of the 25 lessons, provide:
69
  - "sub_topic": A clear and practical lesson title in {native_language}.
70
  - "keywords": A list of 1–3 high-level categories in {native_language} that describe the lesson focus (e.g., "directions", "daily routine", "formal conversation").
71
  - "description": One sentence in {native_language} that explains what the learner will achieve or be able to do after completing the lesson. Be specific and learner-oriented.
@@ -73,7 +73,7 @@ You are an AI-powered language learning assistant tasked with generating an exte
73
  ### Output Format:
74
  Return a valid JSON object with:
75
  - "lesson_topic": The overall learning theme (in {native_language}).
76
- - "sub_topics": A list of 25 items. Each item must include:
77
  - "sub_topic": A short title of the lesson (in {native_language}).
78
  - "keywords": A list of 1–3 general-purpose categories (in {native_language}).
79
  - "description": One clear sentence (in {native_language}) describing the purpose of the lesson.
@@ -164,7 +164,7 @@ When generating flashcards:
164
  - Select terms that are novel, useful, or not overly repetitive within the lesson.
165
  - Prioritize terms that learners are likely to encounter again in real-world usage.
166
  ### Flashcard Format
167
- Generate exactly **10 flashcards** as a **valid JSON array**, with each flashcard containing:
168
  - `"word"`: A key word or phrase in {target_language} drawn from the lesson.
169
  - `"definition"`: A learner-friendly explanation in {native_language}.
170
  - `"example"`: A clear, natural sentence in {target_language} demonstrating the word **in context with the lesson**.
@@ -176,7 +176,8 @@ simulation_mode_instructions = """
176
  # Target language: {target_language}
177
  # Proficiency level: {proficiency}
178
 
179
- You are a **creative, context-aware storytelling engine**. Your task is to generate short, engaging stories or dialogues in **any language** to make language learning enjoyable, memorable, and relevant. Stories must reflect the user's interests, profession, or hobbies, and align with their learning level.
 
180
 
181
  ### Input Format
182
  You will receive a user-provided **lesson topic, theme, or domain of interest** (e.g., “a courtroom drama for a law student” or “space mission dialogue for a space enthusiast”). Use this input to:
 
48
  You are an AI-powered language learning assistant tasked with generating an extensive, personalized curriculum. Your goal is to help the user learn {target_language} by designing a 25-lesson curriculum that reflects the user's goals, interests, and proficiency level. All outputs should be written in {native_language}.
49
 
50
  ### Curriculum Goals:
51
+ - Provide 5 lessons.
52
  - Ensure logical progression from basic to advanced topics (according to {proficiency}).
53
  - Align each lesson with a practical communication goal.
54
  - Tailor vocabulary and sub-topics to the user’s intended use (e.g., work, travel, hobbies, daily life).
 
57
 
58
  1. **Define the Lesson Series (Overall Theme):**
59
  - Choose a main theme relevant to the user's motivation for learning {target_language} (e.g., "Living in a new country", "Professional communication", "Traveling in {target_language}-speaking regions").
60
+ - The theme should guide the tone, content, and scope of the entire 5-lesson sequence.
61
 
62
+ 2. **Divide the Curriculum into 5 Thematic Lessons:**
63
  - Each lesson should have a clear focus (e.g., asking for help, describing your job, booking accommodation).
64
  - Sequence lessons to build from foundational topics to more complex, specialized language use.
65
  - Vary grammar, vocabulary, and communication functions across lessons to avoid repetition and ensure comprehensive coverage.
66
 
67
  3. **Describe Each Lesson Clearly and Concisely:**
68
+ For each of the 5 lessons, provide:
69
  - "sub_topic": A clear and practical lesson title in {native_language}.
70
  - "keywords": A list of 1–3 high-level categories in {native_language} that describe the lesson focus (e.g., "directions", "daily routine", "formal conversation").
71
  - "description": One sentence in {native_language} that explains what the learner will achieve or be able to do after completing the lesson. Be specific and learner-oriented.
 
73
  ### Output Format:
74
  Return a valid JSON object with:
75
  - "lesson_topic": The overall learning theme (in {native_language}).
76
+ - "sub_topics": A list of 5 items. Each item must include:
77
  - "sub_topic": A short title of the lesson (in {native_language}).
78
  - "keywords": A list of 1–3 general-purpose categories (in {native_language}).
79
  - "description": One clear sentence (in {native_language}) describing the purpose of the lesson.
 
164
  - Select terms that are novel, useful, or not overly repetitive within the lesson.
165
  - Prioritize terms that learners are likely to encounter again in real-world usage.
166
  ### Flashcard Format
167
+ Generate exactly **5 flashcards** as a **valid JSON array**, with each flashcard containing:
168
  - `"word"`: A key word or phrase in {target_language} drawn from the lesson.
169
  - `"definition"`: A learner-friendly explanation in {native_language}.
170
  - `"example"`: A clear, natural sentence in {target_language} demonstrating the word **in context with the lesson**.
 
176
  # Target language: {target_language}
177
  # Proficiency level: {proficiency}
178
 
179
+ You are a **creative, context-aware storytelling engine**. Your task is to generate short, engaging stories or dialogues in **any language** to make language learning enjoyable, memorable, and relevant.
180
+ Stories must reflect the user's interests, profession, or hobbies, and align with their learning level.
181
 
182
  ### Input Format
183
  You will receive a user-provided **lesson topic, theme, or domain of interest** (e.g., “a courtroom drama for a law student” or “space mission dialogue for a space enthusiast”). Use this input to:
backend/content_generator.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import asyncio
3
+ from typing import Dict, Any, Optional, List
4
+ from backend.utils import generate_completions
5
+ from backend import config
6
+ from backend.db import db
7
+ from backend.db_cache import api_cache
8
+ import logging
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class ContentGenerator:
14
+ """Service for generating and storing all learning content"""
15
+
16
+ async def generate_curriculum_from_metadata(
17
+ self,
18
+ metadata_extraction_id: str,
19
+ query: str,
20
+ metadata: Dict[str, Any],
21
+ user_id: Optional[int] = None
22
+ ) -> str:
23
+ """Generate curriculum based on extracted metadata"""
24
+ # Format curriculum instructions with metadata
25
+ instructions = (
26
+ config.curriculum_instructions
27
+ .replace("{native_language}", metadata['native_language'])
28
+ .replace("{target_language}", metadata['target_language'])
29
+ .replace("{proficiency}", metadata['proficiency'])
30
+ )
31
+
32
+ # Generate curriculum
33
+ logger.info(f"Generating curriculum for {metadata['target_language']} ({metadata['proficiency']})")
34
+ curriculum_response = await generate_completions.get_completions(query, instructions)
35
+
36
+ try:
37
+ # Parse curriculum response
38
+ curriculum = json.loads(curriculum_response)
39
+ except json.JSONDecodeError:
40
+ logger.error(f"Failed to parse curriculum response: {curriculum_response[:200]}...")
41
+ curriculum = {"lesson_topic": "Language Learning Journey", "sub_topics": []}
42
+
43
+ # Save curriculum to database
44
+ curriculum_id = await db.save_curriculum(
45
+ metadata_extraction_id=metadata_extraction_id,
46
+ curriculum=curriculum,
47
+ user_id=user_id
48
+ )
49
+
50
+ return curriculum_id
51
+
52
+ async def generate_content_for_lesson(
53
+ self,
54
+ curriculum_id: str,
55
+ lesson_index: int,
56
+ lesson: Dict[str, Any],
57
+ metadata: Dict[str, Any]
58
+ ) -> Dict[str, str]:
59
+ """Generate all content types for a single lesson"""
60
+ content_ids = {}
61
+ lesson_topic = lesson.get('sub_topic', f'Lesson {lesson_index + 1}')
62
+ lesson_context = f"{lesson_topic}: {lesson.get('description', '')}"
63
+
64
+ # Generate flashcards
65
+ try:
66
+ flashcards_instructions = (
67
+ config.flashcard_mode_instructions
68
+ .replace("{native_language}", metadata['native_language'])
69
+ .replace("{target_language}", metadata['target_language'])
70
+ .replace("{proficiency}", metadata['proficiency'])
71
+ )
72
+
73
+ flashcards_response = await api_cache.get_or_set(
74
+ category="flashcards",
75
+ key_text=lesson_context,
76
+ coro=generate_completions.get_completions,
77
+ context={
78
+ 'native_language': metadata['native_language'],
79
+ 'target_language': metadata['target_language'],
80
+ 'proficiency': metadata['proficiency'],
81
+ 'lesson_index': lesson_index
82
+ },
83
+ prompt=lesson_context,
84
+ instructions=flashcards_instructions
85
+ )
86
+
87
+ # Save flashcards
88
+ content_ids['flashcards'] = await db.save_learning_content(
89
+ curriculum_id=curriculum_id,
90
+ content_type='flashcards',
91
+ lesson_index=lesson_index,
92
+ lesson_topic=lesson_topic,
93
+ content=flashcards_response
94
+ )
95
+ except Exception as e:
96
+ logger.error(f"Failed to generate flashcards for lesson {lesson_index}: {e}")
97
+
98
+ # Generate exercises
99
+ try:
100
+ exercises_instructions = (
101
+ config.exercise_mode_instructions
102
+ .replace("{native_language}", metadata['native_language'])
103
+ .replace("{target_language}", metadata['target_language'])
104
+ .replace("{proficiency}", metadata['proficiency'])
105
+ )
106
+
107
+ exercises_response = await api_cache.get_or_set(
108
+ category="exercises",
109
+ key_text=lesson_context,
110
+ coro=generate_completions.get_completions,
111
+ context={
112
+ 'native_language': metadata['native_language'],
113
+ 'target_language': metadata['target_language'],
114
+ 'proficiency': metadata['proficiency'],
115
+ 'lesson_index': lesson_index
116
+ },
117
+ prompt=lesson_context,
118
+ instructions=exercises_instructions
119
+ )
120
+
121
+ # Save exercises
122
+ content_ids['exercises'] = await db.save_learning_content(
123
+ curriculum_id=curriculum_id,
124
+ content_type='exercises',
125
+ lesson_index=lesson_index,
126
+ lesson_topic=lesson_topic,
127
+ content=exercises_response
128
+ )
129
+ except Exception as e:
130
+ logger.error(f"Failed to generate exercises for lesson {lesson_index}: {e}")
131
+
132
+ # Generate simulation
133
+ try:
134
+ simulation_instructions = (
135
+ config.simulation_mode_instructions
136
+ .replace("{native_language}", metadata['native_language'])
137
+ .replace("{target_language}", metadata['target_language'])
138
+ .replace("{proficiency}", metadata['proficiency'])
139
+ )
140
+
141
+ simulation_response = await api_cache.get_or_set(
142
+ category="simulation",
143
+ key_text=lesson_context,
144
+ coro=generate_completions.get_completions,
145
+ context={
146
+ 'native_language': metadata['native_language'],
147
+ 'target_language': metadata['target_language'],
148
+ 'proficiency': metadata['proficiency'],
149
+ 'lesson_index': lesson_index
150
+ },
151
+ prompt=lesson_context,
152
+ instructions=simulation_instructions
153
+ )
154
+
155
+ # Save simulation
156
+ content_ids['simulation'] = await db.save_learning_content(
157
+ curriculum_id=curriculum_id,
158
+ content_type='simulation',
159
+ lesson_index=lesson_index,
160
+ lesson_topic=lesson_topic,
161
+ content=simulation_response
162
+ )
163
+ except Exception as e:
164
+ logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}")
165
+
166
+ return content
167
+
168
+ async def generate_all_content_for_curriculum(
169
+ self,
170
+ curriculum_id: str,
171
+ max_concurrent_lessons: int = 3
172
+ ):
173
+ """Generate all learning content for a curriculum"""
174
+ # Get curriculum details
175
+ curriculum_data = await db.get_curriculum(curriculum_id)
176
+ if not curriculum_data:
177
+ logger.error(f"Curriculum not found: {curriculum_id}")
178
+ return
179
+
180
+ # Parse curriculum JSON
181
+ try:
182
+ curriculum = json.loads(curriculum_data['curriculum_json'])
183
+ lessons = curriculum.get('sub_topics', [])
184
+ except json.JSONDecodeError:
185
+ logger.error(f"Failed to parse curriculum JSON for {curriculum_id}")
186
+ return
187
+
188
+ # Prepare metadata
189
+ metadata = {
190
+ 'native_language': curriculum_data['native_language'],
191
+ 'target_language': curriculum_data['target_language'],
192
+ 'proficiency': curriculum_data['proficiency']
193
+ }
194
+
195
+ logger.info(f"Starting content generation for {len(lessons)} lessons")
196
+
197
+ # Process lessons in batches to avoid overwhelming the API
198
+ for i in range(0, len(lessons), max_concurrent_lessons):
199
+ batch = lessons[i:i + max_concurrent_lessons]
200
+ batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons))))
201
+
202
+ # Generate content for batch concurrently
203
+ tasks = [
204
+ self.generate_content_for_lesson(
205
+ curriculum_id=curriculum_id,
206
+ lesson_index=idx,
207
+ lesson=lesson,
208
+ metadata=metadata
209
+ )
210
+ for idx, lesson in zip(batch_indices, batch)
211
+ ]
212
+
213
+ results = await asyncio.gather(*tasks, return_exceptions=True)
214
+
215
+ for idx, result in zip(batch_indices, results):
216
+ if isinstance(result, Exception):
217
+ logger.error(f"Failed to generate content for lesson {idx}: {result}")
218
+ else:
219
+ logger.info(f"Generated content for lesson {idx}: {result}")
220
+
221
+ # Mark curriculum as content generated
222
+ await db.mark_curriculum_content_generated(curriculum_id)
223
+ logger.info(f"Completed content generation for curriculum {curriculum_id}")
224
+
225
+ async def process_metadata_extraction(
226
+ self,
227
+ extraction_id: str,
228
+ query: str,
229
+ metadata: Dict[str, Any],
230
+ user_id: Optional[int] = None,
231
+ generate_content: bool = True
232
+ ) -> Dict[str, Any]:
233
+ """Process a metadata extraction by checking for existing curriculum or generating new one"""
234
+
235
+ # Check for existing curriculum first
236
+ existing_curriculum = await db.find_existing_curriculum(
237
+ query=query,
238
+ native_language=metadata['native_language'],
239
+ target_language=metadata['target_language'],
240
+ proficiency=metadata['proficiency'],
241
+ user_id=user_id
242
+ )
243
+
244
+ if existing_curriculum:
245
+ # If we found an exact match for this user, return it
246
+ if existing_curriculum.get('user_id') == user_id:
247
+ logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}")
248
+ return {
249
+ 'curriculum_id': existing_curriculum['id'],
250
+ 'content_generation_started': False,
251
+ 'cached': True,
252
+ 'cache_type': 'user_exact_match'
253
+ }
254
+
255
+ # If we found a similar curriculum from another user, copy it
256
+ elif existing_curriculum.get('is_content_generated') == 1:
257
+ logger.info(f"Copying existing curriculum {existing_curriculum['id']} for user {user_id}")
258
+ curriculum_id = await db.copy_curriculum_for_user(
259
+ source_curriculum_id=existing_curriculum['id'],
260
+ metadata_extraction_id=extraction_id,
261
+ user_id=user_id
262
+ )
263
+ return {
264
+ 'curriculum_id': curriculum_id,
265
+ 'content_generation_started': False,
266
+ 'cached': True,
267
+ 'cache_type': 'copied_from_similar'
268
+ }
269
+
270
+ # No suitable existing curriculum found, generate new one
271
+ logger.info(f"No existing curriculum found, generating new one for user {user_id}")
272
+ curriculum_id = await self.generate_curriculum_from_metadata(
273
+ metadata_extraction_id=extraction_id,
274
+ query=query,
275
+ metadata=metadata,
276
+ user_id=user_id
277
+ )
278
+
279
+ result = {
280
+ 'curriculum_id': curriculum_id,
281
+ 'content_generation_started': False,
282
+ 'cached': False,
283
+ 'cache_type': 'newly_generated'
284
+ }
285
+
286
+ if generate_content:
287
+ # Start content generation in background
288
+ asyncio.create_task(self.generate_all_content_for_curriculum(curriculum_id))
289
+ result['content_generation_started'] = True
290
+
291
+ return result
292
+
293
+
294
+ # Global content generator instance
295
+ content_generator = ContentGenerator()
backend/database_init.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Database initialization script for AI Language Tutor
4
+ Run this script to create database tables
5
+ """
6
+
7
+ import asyncio
8
+ import sys
9
+ import os
10
+
11
+ # Add the project root to Python path
12
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13
+
14
+ from backend.database import create_tables, drop_tables
15
+ import logging
16
+
17
+ logging.basicConfig(level=logging.INFO)
18
+ logger = logging.getLogger(__name__)
19
+
20
+
21
+ async def init_database():
22
+ """Initialize database tables"""
23
+ try:
24
+ logger.info("Creating database tables...")
25
+ await create_tables()
26
+ logger.info("Database tables created successfully!")
27
+ except Exception as e:
28
+ logger.error(f"Error creating database tables: {e}")
29
+ raise
30
+
31
+
32
+ async def reset_database():
33
+ """Reset database (drop and recreate tables)"""
34
+ try:
35
+ logger.info("Dropping existing tables...")
36
+ await drop_tables()
37
+ logger.info("Creating new tables...")
38
+ await create_tables()
39
+ logger.info("Database reset successfully!")
40
+ except Exception as e:
41
+ logger.error(f"Error resetting database: {e}")
42
+ raise
43
+
44
+
45
+ if __name__ == "__main__":
46
+ import argparse
47
+
48
+ parser = argparse.ArgumentParser(description="Database initialization for AI Language Tutor")
49
+ parser.add_argument(
50
+ "--reset",
51
+ action="store_true",
52
+ help="Reset database (drop and recreate tables)"
53
+ )
54
+
55
+ args = parser.parse_args()
56
+
57
+ if args.reset:
58
+ print("⚠️ WARNING: This will delete all existing data!")
59
+ confirm = input("Are you sure you want to reset the database? (yes/no): ")
60
+ if confirm.lower() == "yes":
61
+ asyncio.run(reset_database())
62
+ else:
63
+ print("Database reset cancelled.")
64
+ else:
65
+ asyncio.run(init_database())
backend/db.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiosqlite
2
+ import json
3
+ import os
4
+ from typing import Optional, List, Dict, Any
5
+ from datetime import datetime
6
+ import uuid
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ # Database file path
12
+ DB_PATH = os.getenv("DATABASE_PATH", "./ai_tutor.db")
13
+
14
+
15
+ class Database:
16
+ """Pure SQLite database handler for AI Language Tutor"""
17
+
18
+ def __init__(self, db_path: str = DB_PATH):
19
+ self.db_path = db_path
20
+
21
+ async def initialize(self):
22
+ """Initialize database with schema"""
23
+ async with aiosqlite.connect(self.db_path) as db:
24
+ # Read and execute schema - look for it in parent directory
25
+ schema_path = os.path.join(os.path.dirname(__file__), 'schema.sql')
26
+ with open(schema_path, 'r') as f:
27
+ schema = f.read()
28
+ await db.executescript(schema)
29
+ await db.commit()
30
+ logger.info("Database initialized successfully")
31
+
32
+ async def find_existing_curriculum(
33
+ self,
34
+ query: str,
35
+ native_language: str,
36
+ target_language: str,
37
+ proficiency: str,
38
+ user_id: Optional[int] = None
39
+ ) -> Optional[Dict[str, Any]]:
40
+ """Find existing curriculum for similar query and metadata"""
41
+ async with aiosqlite.connect(self.db_path) as db:
42
+ db.row_factory = aiosqlite.Row
43
+
44
+ if user_id is not None:
45
+ # User-specific search: First try to find exact query match for the user
46
+ async with db.execute("""
47
+ SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
48
+ FROM curricula c
49
+ JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
50
+ WHERE m.user_id = ? AND m.query = ? AND m.native_language = ?
51
+ AND m.target_language = ? AND m.proficiency = ?
52
+ ORDER BY c.created_at DESC
53
+ LIMIT 1
54
+ """, (user_id, query, native_language, target_language, proficiency)) as cursor:
55
+ row = await cursor.fetchone()
56
+ if row:
57
+ return dict(row)
58
+
59
+ # Then try to find similar curriculum with same metadata (any user)
60
+ async with db.execute("""
61
+ SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
62
+ FROM curricula c
63
+ JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
64
+ WHERE m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
65
+ AND c.is_content_generated = 1
66
+ ORDER BY c.created_at DESC
67
+ LIMIT 1
68
+ """, (native_language, target_language, proficiency)) as cursor:
69
+ row = await cursor.fetchone()
70
+ if row:
71
+ return dict(row)
72
+ else:
73
+ # User-independent search: Find exact query match regardless of user
74
+ async with db.execute("""
75
+ SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
76
+ FROM curricula c
77
+ JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
78
+ WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
79
+ ORDER BY c.created_at DESC
80
+ LIMIT 1
81
+ """, (query, native_language, target_language, proficiency)) as cursor:
82
+ row = await cursor.fetchone()
83
+ if row:
84
+ return dict(row)
85
+
86
+ return None
87
+
88
+ async def save_metadata_extraction(
89
+ self,
90
+ query: str,
91
+ metadata: Dict[str, Any],
92
+ user_id: Optional[int] = None
93
+ ) -> str:
94
+ """Save extracted metadata and return extraction ID"""
95
+ extraction_id = str(uuid.uuid4())
96
+
97
+ async with aiosqlite.connect(self.db_path) as db:
98
+ await db.execute("""
99
+ INSERT INTO metadata_extractions
100
+ (id, user_id, query, native_language, target_language, proficiency, title, description, metadata_json)
101
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
102
+ """, (
103
+ extraction_id,
104
+ user_id,
105
+ query,
106
+ metadata.get('native_language'),
107
+ metadata.get('target_language'),
108
+ metadata.get('proficiency'),
109
+ metadata.get('title'),
110
+ metadata.get('description'),
111
+ json.dumps(metadata)
112
+ ))
113
+ await db.commit()
114
+
115
+ logger.info(f"Saved metadata extraction: {extraction_id}")
116
+ return extraction_id
117
+
118
+ async def save_curriculum(
119
+ self,
120
+ metadata_extraction_id: str,
121
+ curriculum: Dict[str, Any],
122
+ user_id: Optional[int] = None
123
+ ) -> str:
124
+ """Save generated curriculum and return curriculum ID"""
125
+ curriculum_id = str(uuid.uuid4())
126
+
127
+ async with aiosqlite.connect(self.db_path) as db:
128
+ await db.execute("""
129
+ INSERT INTO curricula
130
+ (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json)
131
+ VALUES (?, ?, ?, ?, ?)
132
+ """, (
133
+ curriculum_id,
134
+ metadata_extraction_id,
135
+ user_id,
136
+ curriculum.get('lesson_topic', ''),
137
+ json.dumps(curriculum)
138
+ ))
139
+ await db.commit()
140
+
141
+ logger.info(f"Saved curriculum: {curriculum_id}")
142
+ return curriculum_id
143
+
144
+ async def copy_curriculum_for_user(
145
+ self,
146
+ source_curriculum_id: str,
147
+ metadata_extraction_id: str,
148
+ user_id: Optional[int] = None
149
+ ) -> str:
150
+ """Copy an existing curriculum for a new user"""
151
+ new_curriculum_id = str(uuid.uuid4())
152
+
153
+ async with aiosqlite.connect(self.db_path) as db:
154
+ # Get source curriculum
155
+ async with db.execute("""
156
+ SELECT lesson_topic, curriculum_json FROM curricula WHERE id = ?
157
+ """, (source_curriculum_id,)) as cursor:
158
+ row = await cursor.fetchone()
159
+ if not row:
160
+ raise ValueError(f"Source curriculum {source_curriculum_id} not found")
161
+
162
+ lesson_topic, curriculum_json = row
163
+
164
+ # Create new curriculum
165
+ await db.execute("""
166
+ INSERT INTO curricula
167
+ (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated)
168
+ VALUES (?, ?, ?, ?, ?, 0)
169
+ """, (
170
+ new_curriculum_id,
171
+ metadata_extraction_id,
172
+ user_id,
173
+ lesson_topic,
174
+ curriculum_json
175
+ ))
176
+
177
+ # Copy all learning content
178
+ await db.execute("""
179
+ INSERT INTO learning_content
180
+ (id, curriculum_id, content_type, lesson_index, lesson_topic, content_json)
181
+ SELECT
182
+ lower(hex(randomblob(16))),
183
+ ?,
184
+ content_type,
185
+ lesson_index,
186
+ lesson_topic,
187
+ content_json
188
+ FROM learning_content
189
+ WHERE curriculum_id = ?
190
+ """, (new_curriculum_id, source_curriculum_id))
191
+
192
+ # Mark as content generated
193
+ await db.execute("""
194
+ UPDATE curricula
195
+ SET is_content_generated = 1
196
+ WHERE id = ?
197
+ """, (new_curriculum_id,))
198
+
199
+ await db.commit()
200
+
201
+ logger.info(f"Copied curriculum {source_curriculum_id} to {new_curriculum_id} for user {user_id}")
202
+ return new_curriculum_id
203
+
204
+ async def save_learning_content(
205
+ self,
206
+ curriculum_id: str,
207
+ content_type: str,
208
+ lesson_index: int,
209
+ lesson_topic: str,
210
+ content: Any
211
+ ) -> str:
212
+ """Save learning content (flashcards, exercises, or simulation)"""
213
+ content_id = str(uuid.uuid4())
214
+
215
+ async with aiosqlite.connect(self.db_path) as db:
216
+ await db.execute("""
217
+ INSERT INTO learning_content
218
+ (id, curriculum_id, content_type, lesson_index, lesson_topic, content_json)
219
+ VALUES (?, ?, ?, ?, ?, ?)
220
+ """, (
221
+ content_id,
222
+ curriculum_id,
223
+ content_type,
224
+ lesson_index,
225
+ lesson_topic,
226
+ json.dumps(content) if isinstance(content, (dict, list)) else content
227
+ ))
228
+ await db.commit()
229
+
230
+ logger.info(f"Saved {content_type} for lesson {lesson_index}")
231
+ return content_id
232
+
233
+ async def mark_curriculum_content_generated(self, curriculum_id: str):
234
+ """Mark curriculum as having all content generated"""
235
+ async with aiosqlite.connect(self.db_path) as db:
236
+ await db.execute("""
237
+ UPDATE curricula
238
+ SET is_content_generated = 1
239
+ WHERE id = ?
240
+ """, (curriculum_id,))
241
+ await db.commit()
242
+
243
+ async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
244
+ """Get metadata extraction by ID"""
245
+ async with aiosqlite.connect(self.db_path) as db:
246
+ db.row_factory = aiosqlite.Row
247
+ async with db.execute("""
248
+ SELECT * FROM metadata_extractions WHERE id = ?
249
+ """, (extraction_id,)) as cursor:
250
+ row = await cursor.fetchone()
251
+ if row:
252
+ return dict(row)
253
+ return None
254
+
255
+ async def get_curriculum(self, curriculum_id: str) -> Optional[Dict[str, Any]]:
256
+ """Get curriculum by ID"""
257
+ async with aiosqlite.connect(self.db_path) as db:
258
+ db.row_factory = aiosqlite.Row
259
+ async with db.execute("""
260
+ SELECT c.*, m.native_language, m.target_language, m.proficiency
261
+ FROM curricula c
262
+ JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
263
+ WHERE c.id = ?
264
+ """, (curriculum_id,)) as cursor:
265
+ row = await cursor.fetchone()
266
+ if row:
267
+ return dict(row)
268
+ return None
269
+
270
+ async def get_learning_content(
271
+ self,
272
+ curriculum_id: str,
273
+ content_type: Optional[str] = None,
274
+ lesson_index: Optional[int] = None
275
+ ) -> List[Dict[str, Any]]:
276
+ """Get learning content for a curriculum"""
277
+ query = "SELECT * FROM learning_content WHERE curriculum_id = ?"
278
+ params = [curriculum_id]
279
+
280
+ if content_type:
281
+ query += " AND content_type = ?"
282
+ params.append(content_type)
283
+
284
+ if lesson_index is not None:
285
+ query += " AND lesson_index = ?"
286
+ params.append(lesson_index)
287
+
288
+ query += " ORDER BY lesson_index"
289
+
290
+ async with aiosqlite.connect(self.db_path) as db:
291
+ db.row_factory = aiosqlite.Row
292
+ async with db.execute(query, params) as cursor:
293
+ rows = await cursor.fetchall()
294
+ return [dict(row) for row in rows]
295
+
296
+ async def get_user_metadata_extractions(
297
+ self,
298
+ user_id: int,
299
+ limit: int = 20
300
+ ) -> List[Dict[str, Any]]:
301
+ """Get user's metadata extraction history"""
302
+ async with aiosqlite.connect(self.db_path) as db:
303
+ db.row_factory = aiosqlite.Row
304
+ async with db.execute("""
305
+ SELECT * FROM metadata_extractions
306
+ WHERE user_id = ?
307
+ ORDER BY created_at DESC
308
+ LIMIT ?
309
+ """, (user_id, limit)) as cursor:
310
+ rows = await cursor.fetchall()
311
+ return [dict(row) for row in rows]
312
+
313
+ async def get_user_curricula(
314
+ self,
315
+ user_id: int,
316
+ limit: int = 20
317
+ ) -> List[Dict[str, Any]]:
318
+ """Get user's curricula"""
319
+ async with aiosqlite.connect(self.db_path) as db:
320
+ db.row_factory = aiosqlite.Row
321
+ async with db.execute("""
322
+ SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title
323
+ FROM curricula c
324
+ JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
325
+ WHERE c.user_id = ?
326
+ ORDER BY c.created_at DESC
327
+ LIMIT ?
328
+ """, (user_id, limit)) as cursor:
329
+ rows = await cursor.fetchall()
330
+ return [dict(row) for row in rows]
331
+
332
+ async def get_user_learning_journeys(
333
+ self,
334
+ user_id: int,
335
+ limit: int = 20
336
+ ) -> List[Dict[str, Any]]:
337
+ """Get user's complete learning journeys"""
338
+ async with aiosqlite.connect(self.db_path) as db:
339
+ db.row_factory = aiosqlite.Row
340
+ async with db.execute("""
341
+ SELECT * FROM user_learning_journeys
342
+ WHERE user_id = ?
343
+ LIMIT ?
344
+ """, (user_id, limit)) as cursor:
345
+ rows = await cursor.fetchall()
346
+ return [dict(row) for row in rows]
347
+
348
+ async def get_curriculum_content_status(self, curriculum_id: str) -> Optional[Dict[str, Any]]:
349
+ """Get content generation status for a curriculum"""
350
+ async with aiosqlite.connect(self.db_path) as db:
351
+ db.row_factory = aiosqlite.Row
352
+ async with db.execute("""
353
+ SELECT * FROM curriculum_content_status WHERE curriculum_id = ?
354
+ """, (curriculum_id,)) as cursor:
355
+ row = await cursor.fetchone()
356
+ if row:
357
+ return dict(row)
358
+ return None
359
+
360
+ async def get_full_curriculum_details(self, curriculum_id: str, include_content: bool = True) -> Optional[Dict[str, Any]]:
361
+ """Get full curriculum details, optionally including all content."""
362
+ curriculum = await self.get_curriculum(curriculum_id)
363
+ if not curriculum:
364
+ return None
365
+
366
+ try:
367
+ curriculum_data = json.loads(curriculum['curriculum_json'])
368
+ lessons = curriculum_data.get('sub_topics', [])
369
+ except json.JSONDecodeError:
370
+ curriculum_data = {}
371
+ lessons = []
372
+
373
+ if include_content:
374
+ content_list = await self.get_learning_content(curriculum_id)
375
+ content_map = {}
376
+ for content in content_list:
377
+ lesson_index = content['lesson_index']
378
+ content_type = content['content_type']
379
+ if lesson_index not in content_map:
380
+ content_map[lesson_index] = {}
381
+
382
+ try:
383
+ parsed_content = json.loads(content['content_json'])
384
+ except json.JSONDecodeError:
385
+ parsed_content = content['content_json']
386
+
387
+ content_map[lesson_index][content_type] = {
388
+ "id": content['id'],
389
+ "lesson_topic": content['lesson_topic'],
390
+ "content": parsed_content,
391
+ "created_at": content['created_at']
392
+ }
393
+
394
+ # Embed content into lessons
395
+ for i, lesson in enumerate(lessons):
396
+ lesson['content'] = content_map.get(i, {})
397
+
398
+ curriculum['curriculum'] = curriculum_data
399
+ del curriculum['curriculum_json']
400
+
401
+ return curriculum
402
+
403
+ async def search_curricula_by_languages(
404
+ self,
405
+ native_language: str,
406
+ target_language: str,
407
+ proficiency: Optional[str] = None,
408
+ limit: int = 10
409
+ ) -> List[Dict[str, Any]]:
410
+ """Search for existing curricula by language combination"""
411
+ query = """
412
+ SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title
413
+ FROM curricula c
414
+ JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
415
+ WHERE m.native_language = ? AND m.target_language = ?
416
+ """
417
+ params = [native_language, target_language]
418
+
419
+ if proficiency:
420
+ query += " AND m.proficiency = ?"
421
+ params.append(proficiency)
422
+
423
+ query += " ORDER BY c.created_at DESC LIMIT ?"
424
+ params.append(limit)
425
+
426
+ async with aiosqlite.connect(self.db_path) as db:
427
+ db.row_factory = aiosqlite.Row
428
+ async with db.execute(query, params) as cursor:
429
+ rows = await cursor.fetchall()
430
+ return [dict(row) for row in rows]
431
+
432
+
433
+ # Global database instance
434
+ db = Database()
backend/db_cache.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import aiosqlite
2
+ import json
3
+ import os
4
+ from typing import Optional, Dict, Any, Callable, Union, List
5
+ import logging
6
+ import hashlib
7
+
8
+ logger = logging.getLogger(__name__)
9
+ DB_PATH = os.getenv("DATABASE_PATH", "./ai_tutor.db")
10
+
11
+ class ApiCache:
12
+ """Generic caching service using a dedicated database table."""
13
+ def __init__(self, db_path: str = DB_PATH):
14
+ self.db_path = db_path
15
+
16
+ def _generate_hash(self, text: str) -> str:
17
+ """Generate a SHA256 hash for a given text."""
18
+ return hashlib.sha256(text.encode()).hexdigest()
19
+
20
+ def _generate_context_hash(self, key_text: str, **context) -> str:
21
+ """Generate a hash that includes context for better cache differentiation"""
22
+ # Create a consistent string from context
23
+ context_items = sorted(context.items())
24
+ context_str = "|".join([f"{k}:{v}" for k, v in context_items if v is not None])
25
+ full_key = f"{key_text}|{context_str}"
26
+ return hashlib.sha256(full_key.encode()).hexdigest()
27
+
28
+ async def get_or_set(
29
+ self,
30
+ category: str,
31
+ key_text: str,
32
+ coro: Callable,
33
+ *args,
34
+ context: Optional[Dict[str, Any]] = None,
35
+ **kwargs
36
+ ) -> Union[Dict[str, Any], List[Any], str]:
37
+ """
38
+ Get data from cache or execute a coroutine to generate and cache it.
39
+
40
+ Args:
41
+ category: The category of the cached item (e.g., 'metadata', 'flashcards').
42
+ key_text: The text to use for generating the cache key.
43
+ coro: The async function to call if the item is not in the cache.
44
+ *args: Positional arguments for the coroutine.
45
+ context: Additional context for cache key generation (e.g., language, proficiency).
46
+ **kwargs: Keyword arguments for the coroutine.
47
+
48
+ Returns:
49
+ The cached or newly generated content.
50
+ """
51
+ # Generate cache key with context if provided
52
+ if context:
53
+ cache_key = self._generate_context_hash(key_text, **context)
54
+ else:
55
+ cache_key = self._generate_hash(key_text)
56
+
57
+ # 1. Check cache
58
+ async with aiosqlite.connect(self.db_path) as db:
59
+ db.row_factory = aiosqlite.Row
60
+ async with db.execute(
61
+ "SELECT content_json FROM api_cache WHERE cache_key = ? AND category = ?",
62
+ (cache_key, category)
63
+ ) as cursor:
64
+ row = await cursor.fetchone()
65
+ if row:
66
+ logger.info(f"Cache hit for {category} with key: {key_text[:50]}...")
67
+ return json.loads(row['content_json'])
68
+
69
+ # 2. If miss, generate content
70
+ logger.info(f"Cache miss for {category}: {key_text[:50]}... Generating new content")
71
+ generated_content = await coro(*args, **kwargs)
72
+
73
+ # Ensure content is a JSON-serializable string
74
+ if isinstance(generated_content, (dict, list)):
75
+ content_to_cache = json.dumps(generated_content)
76
+ elif isinstance(generated_content, str):
77
+ # Try to parse string to ensure it's valid JSON, then dump it back
78
+ try:
79
+ parsed_json = json.loads(generated_content)
80
+ content_to_cache = json.dumps(parsed_json)
81
+ except json.JSONDecodeError:
82
+ # If it's not a JSON string, we can't cache it in this system.
83
+ # Depending on requirements, we might raise an error or just return it without caching.
84
+ logger.warning(f"Content for {category} is not valid JSON, returning without caching.")
85
+ return generated_content
86
+ else:
87
+ raise TypeError("Cached content must be a JSON string, dict, or list.")
88
+
89
+ # 3. Store in cache
90
+ async with aiosqlite.connect(self.db_path) as db:
91
+ await db.execute(
92
+ "INSERT INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)",
93
+ (cache_key, category, content_to_cache)
94
+ )
95
+ await db.commit()
96
+ logger.info(f"Cached new content for {category} with key: {key_text[:50]}...")
97
+
98
+ return json.loads(content_to_cache)
99
+
100
+ # Global API cache instance
101
+ api_cache = ApiCache()
backend/db_init.py ADDED
@@ -0,0 +1,259 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Database Initialization Module
3
+ Handles database creation, schema setup, and health checks
4
+ """
5
+
6
+ import os
7
+ import aiosqlite
8
+ import logging
9
+ from pathlib import Path
10
+ from typing import Dict, Any, List
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ class DatabaseInitializer:
15
+ """Handles database initialization and health checks"""
16
+
17
+ def __init__(self, db_path: str = None):
18
+ self.db_path = db_path or os.getenv("DATABASE_PATH", "./ai_tutor.db")
19
+ self.schema_path = self._find_schema_file()
20
+
21
+ def _find_schema_file(self) -> str:
22
+ """Return the path to the schema.sql file.
23
+
24
+ The schema.sql file is expected to be in the same directory as this script.
25
+ """
26
+ schema_path = os.path.join(os.path.dirname(__file__), 'schema.sql')
27
+ if not os.path.exists(schema_path):
28
+ raise FileNotFoundError(f"schema.sql not found at {schema_path}")
29
+ return schema_path
30
+
31
+ async def check_database_exists(self) -> bool:
32
+ """Check if database file exists"""
33
+ return os.path.exists(self.db_path)
34
+
35
+ async def check_database_health(self) -> Dict[str, Any]:
36
+ """Comprehensive database health check"""
37
+ health_status = {
38
+ "database_exists": False,
39
+ "database_accessible": False,
40
+ "schema_loaded": False,
41
+ "tables_exist": False,
42
+ "views_exist": False,
43
+ "can_write": False,
44
+ "record_count": {},
45
+ "errors": []
46
+ }
47
+
48
+ try:
49
+ # Check if database file exists
50
+ health_status["database_exists"] = await self.check_database_exists()
51
+
52
+ if not health_status["database_exists"]:
53
+ health_status["errors"].append("Database file does not exist")
54
+ return health_status
55
+
56
+ # Try to connect to database
57
+ async with aiosqlite.connect(self.db_path) as db:
58
+ health_status["database_accessible"] = True
59
+
60
+ # Check if required tables exist
61
+ required_tables = ['metadata_extractions', 'curricula', 'learning_content', 'api_cache']
62
+ existing_tables = await self._get_existing_tables(db)
63
+
64
+ missing_tables = [table for table in required_tables if table not in existing_tables]
65
+ if missing_tables:
66
+ health_status["errors"].append(f"Missing tables: {missing_tables}")
67
+ else:
68
+ health_status["tables_exist"] = True
69
+
70
+ # Check if views exist
71
+ required_views = ['user_learning_journeys', 'curriculum_content_status']
72
+ existing_views = await self._get_existing_views(db)
73
+
74
+ missing_views = [view for view in required_views if view not in existing_views]
75
+ if missing_views:
76
+ health_status["errors"].append(f"Missing views: {missing_views}")
77
+ else:
78
+ health_status["views_exist"] = True
79
+
80
+ # Test write capability
81
+ try:
82
+ await db.execute("CREATE TEMPORARY TABLE test_write (id INTEGER)")
83
+ await db.execute("DROP TABLE test_write")
84
+ health_status["can_write"] = True
85
+ except Exception as e:
86
+ health_status["errors"].append(f"Cannot write to database: {str(e)}")
87
+
88
+ # Get record counts
89
+ if health_status["tables_exist"]:
90
+ for table in required_tables:
91
+ try:
92
+ async with db.execute(f"SELECT COUNT(*) FROM {table}") as cursor:
93
+ count = await cursor.fetchone()
94
+ health_status["record_count"][table] = count[0] if count else 0
95
+ except Exception as e:
96
+ health_status["record_count"][table] = f"Error: {str(e)}"
97
+
98
+ health_status["schema_loaded"] = (
99
+ health_status["tables_exist"] and
100
+ health_status["views_exist"]
101
+ )
102
+
103
+ except Exception as e:
104
+ health_status["errors"].append(f"Database connection error: {str(e)}")
105
+
106
+ return health_status
107
+
108
+ async def _get_existing_tables(self, db: aiosqlite.Connection) -> List[str]:
109
+ """Get list of existing tables"""
110
+ async with db.execute("""
111
+ SELECT name FROM sqlite_master
112
+ WHERE type='table' AND name NOT LIKE 'sqlite_%'
113
+ """) as cursor:
114
+ rows = await cursor.fetchall()
115
+ return [row[0] for row in rows]
116
+
117
+ async def _get_existing_views(self, db: aiosqlite.Connection) -> List[str]:
118
+ """Get list of existing views"""
119
+ async with db.execute("""
120
+ SELECT name FROM sqlite_master
121
+ WHERE type='view'
122
+ """) as cursor:
123
+ rows = await cursor.fetchall()
124
+ return [row[0] for row in rows]
125
+
126
+ async def create_database(self) -> bool:
127
+ """Create database file and initialize with schema"""
128
+ try:
129
+ logger.info(f"Creating database at: {self.db_path}")
130
+
131
+ # Ensure directory exists
132
+ db_dir = os.path.dirname(self.db_path)
133
+ if db_dir and not os.path.exists(db_dir):
134
+ os.makedirs(db_dir, exist_ok=True)
135
+ logger.info(f"Created directory: {db_dir}")
136
+
137
+ # Create database and load schema
138
+ async with aiosqlite.connect(self.db_path) as db:
139
+ # Read schema file
140
+ with open(self.schema_path, 'r') as f:
141
+ schema = f.read()
142
+
143
+ # Execute schema
144
+ await db.executescript(schema)
145
+ await db.commit()
146
+
147
+ logger.info("Database created and schema loaded successfully")
148
+ return True
149
+
150
+ except Exception as e:
151
+ logger.error(f"Error creating database: {str(e)}")
152
+ return False
153
+
154
+ async def initialize_database(self, force_recreate: bool = False) -> Dict[str, Any]:
155
+ """Initialize database with comprehensive checks and creation"""
156
+ result = {
157
+ "success": False,
158
+ "action_taken": "none",
159
+ "health_check": {},
160
+ "errors": []
161
+ }
162
+
163
+ try:
164
+ # Check current database health
165
+ health_check = await self.check_database_health()
166
+ result["health_check"] = health_check
167
+
168
+ # Determine if we need to create/recreate database
169
+ needs_creation = (
170
+ not health_check["database_exists"] or
171
+ not health_check["schema_loaded"] or
172
+ force_recreate
173
+ )
174
+
175
+ if needs_creation:
176
+ if health_check["database_exists"] and force_recreate:
177
+ # Backup existing database
178
+ backup_path = f"{self.db_path}.backup"
179
+ if os.path.exists(self.db_path):
180
+ os.rename(self.db_path, backup_path)
181
+ logger.info(f"Backed up existing database to: {backup_path}")
182
+ result["action_taken"] = "recreated_with_backup"
183
+ else:
184
+ result["action_taken"] = "force_recreated"
185
+ else:
186
+ result["action_taken"] = "created"
187
+
188
+ # Create database
189
+ creation_success = await self.create_database()
190
+ if not creation_success:
191
+ result["errors"].append("Failed to create database")
192
+ return result
193
+
194
+ # Verify creation
195
+ final_health = await self.check_database_health()
196
+ result["health_check"] = final_health
197
+
198
+ if final_health["schema_loaded"] and final_health["can_write"]:
199
+ result["success"] = True
200
+ logger.info("Database initialization completed successfully")
201
+ else:
202
+ result["errors"].append("Database created but health check failed")
203
+
204
+ else:
205
+ # Database exists and is healthy
206
+ result["success"] = True
207
+ result["action_taken"] = "already_exists"
208
+ logger.info("Database already exists and is healthy")
209
+
210
+ except Exception as e:
211
+ error_msg = f"Database initialization error: {str(e)}"
212
+ logger.error(error_msg)
213
+ result["errors"].append(error_msg)
214
+
215
+ return result
216
+
217
+ async def repair_database(self) -> Dict[str, Any]:
218
+ """Attempt to repair database issues"""
219
+ result = {
220
+ "success": False,
221
+ "repairs_attempted": [],
222
+ "errors": []
223
+ }
224
+
225
+ try:
226
+ health_check = await self.check_database_health()
227
+
228
+ if not health_check["database_exists"]:
229
+ # Database doesn't exist - create it
230
+ creation_result = await self.initialize_database()
231
+ result["repairs_attempted"].append("created_missing_database")
232
+ result["success"] = creation_result["success"]
233
+ result["errors"].extend(creation_result.get("errors", []))
234
+ return result
235
+
236
+ # Database exists but has issues
237
+ async with aiosqlite.connect(self.db_path) as db:
238
+ # Check and repair missing tables
239
+ if not health_check["tables_exist"]:
240
+ with open(self.schema_path, 'r') as f:
241
+ schema = f.read()
242
+ await db.executescript(schema)
243
+ await db.commit()
244
+ result["repairs_attempted"].append("recreated_schema")
245
+
246
+ # Verify repair
247
+ final_health = await self.check_database_health()
248
+ result["success"] = final_health["schema_loaded"]
249
+
250
+ except Exception as e:
251
+ error_msg = f"Database repair error: {str(e)}"
252
+ logger.error(error_msg)
253
+ result["errors"].append(error_msg)
254
+
255
+ return result
256
+
257
+
258
+ # Global instance
259
+ db_initializer = DatabaseInitializer()
backend/main.py CHANGED
@@ -1,96 +1,451 @@
1
- from fastapi import FastAPI, HTTPException
2
  from fastapi.responses import JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
  from backend.utils import generate_completions
6
- from backend.utils.handlers import handle_generation_request, INSTRUCTION_TEMPLATES
7
  from backend import config
 
 
 
 
8
  from typing import Union, List, Literal, Optional
 
9
  import logging
10
  import json
11
- from backend.cache import cache
12
 
13
  logging.basicConfig(level=logging.INFO)
14
 
15
- app = FastAPI()
16
 
17
  # Add CORS middleware
18
  app.add_middleware(
19
  CORSMiddleware,
20
- allow_origins=["*"], # Allows all origins
21
  allow_credentials=True,
22
- allow_methods=["*"], # Allows all methods
23
- allow_headers=["*"], # Allows all headers
24
  )
25
 
26
- class Message(BaseModel):
27
- role: Literal["user", "assistant"]
28
- content: str
29
 
30
  class GenerationRequest(BaseModel):
31
  user_id: int
32
- query: Union[str, List[Message]]
33
  native_language: Optional[str] = None
34
  target_language: Optional[str] = None
35
  proficiency: Optional[str] = None
36
 
37
- class MetadataRequest(BaseModel):
38
- query: str
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  @app.get("/")
41
  async def root():
42
- return {"message": "Welcome to the AI Learning Assistant API!"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
  @app.post("/extract/metadata")
45
  async def extract_metadata(data: MetadataRequest):
46
- logging.info(f"Query: {data.query}")
 
47
  try:
48
- response_str = await cache.get_or_set(
49
- (str(data.query), config.language_metadata_extraction_prompt),
50
- generate_completions.get_completions,
51
- data.query,
52
- config.language_metadata_extraction_prompt
 
 
53
  )
54
- metadata_dict = json.loads(response_str)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  return JSONResponse(
56
  content={
57
- "data": metadata_dict,
58
- "type": "language_metadata",
59
- "status": "success"
 
60
  },
61
- status_code=200
62
  )
63
  except Exception as e:
 
64
  raise HTTPException(status_code=500, detail=str(e))
65
 
66
- @app.post("/generate/curriculum")
67
- async def generate_curriculum(data: GenerationRequest):
68
- return await handle_generation_request(
69
- data=data,
70
- mode="curriculum",
71
- instructions_template=INSTRUCTION_TEMPLATES["curriculum"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  )
 
 
 
 
 
73
 
74
- @app.post("/generate/flashcards")
75
- async def generate_flashcards(data: GenerationRequest):
76
- return await handle_generation_request(
77
- data=data,
78
- mode="flashcards",
79
- instructions_template=INSTRUCTION_TEMPLATES["flashcards"]
 
 
 
 
 
 
 
 
 
 
 
 
80
  )
81
 
82
- @app.post("/generate/exercises")
83
- async def generate_exercises(data: GenerationRequest):
84
- return await handle_generation_request(
85
- data=data,
86
- mode="exercises",
87
- instructions_template=INSTRUCTION_TEMPLATES["exercises"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  )
89
 
90
- @app.post("/generate/simulation")
91
- async def generate_simulation(data: GenerationRequest):
92
- return await handle_generation_request(
93
- data=data,
94
- mode="simulation",
95
- instructions_template=INSTRUCTION_TEMPLATES["simulation"]
96
- )
 
1
+ from fastapi import FastAPI, HTTPException, Query, Path
2
  from fastapi.responses import JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from pydantic import BaseModel
5
  from backend.utils import generate_completions
 
6
  from backend import config
7
+ from backend.db import db
8
+ from backend.db_init import db_initializer
9
+ from backend.content_generator import content_generator
10
+ from backend.db_cache import api_cache
11
  from typing import Union, List, Literal, Optional
12
+ from datetime import datetime
13
  import logging
14
  import json
 
15
 
16
  logging.basicConfig(level=logging.INFO)
17
 
18
+ app = FastAPI(title="AI Language Tutor API", version="2.0.0")
19
 
20
  # Add CORS middleware
21
  app.add_middleware(
22
  CORSMiddleware,
23
+ allow_origins=["*"],
24
  allow_credentials=True,
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
  )
28
 
29
+ class MetadataRequest(BaseModel):
30
+ query: str
31
+ user_id: Optional[int] = None
32
 
33
  class GenerationRequest(BaseModel):
34
  user_id: int
35
+ query: Union[str, List[dict]]
36
  native_language: Optional[str] = None
37
  target_language: Optional[str] = None
38
  proficiency: Optional[str] = None
39
 
40
+ @app.on_event("startup")
41
+ async def startup_event():
42
+ """Initialize database on startup with comprehensive checks"""
43
+ logging.info("Starting database initialization...")
44
+
45
+ # Initialize database with health checks
46
+ init_result = await db_initializer.initialize_database()
47
+
48
+ if init_result["success"]:
49
+ logging.info(f"Database initialization successful: {init_result['action_taken']}")
50
+
51
+ # Log database statistics
52
+ health = init_result["health_check"]
53
+ if health.get("record_count"):
54
+ logging.info(f"Database records: {health['record_count']}")
55
+ else:
56
+ logging.error(f"Database initialization failed: {init_result['errors']}")
57
+ # Try to repair
58
+ logging.info("Attempting database repair...")
59
+ repair_result = await db_initializer.repair_database()
60
+ if repair_result["success"]:
61
+ logging.info("Database repair successful")
62
+ else:
63
+ logging.error(f"Database repair failed: {repair_result['errors']}")
64
+ raise RuntimeError("Failed to initialize database")
65
 
66
  @app.get("/")
67
  async def root():
68
+ return {"message": "Welcome to the AI Language Tutor API v2.0!"}
69
+
70
+ @app.get("/health")
71
+ async def health_check():
72
+ """Comprehensive health check including database status"""
73
+ try:
74
+ # Check database health
75
+ db_health = await db_initializer.check_database_health()
76
+
77
+ # Overall health status
78
+ is_healthy = (
79
+ db_health["database_exists"] and
80
+ db_health["schema_loaded"] and
81
+ db_health["can_write"]
82
+ )
83
+
84
+ return JSONResponse(
85
+ content={
86
+ "status": "healthy" if is_healthy else "unhealthy",
87
+ "api_version": "2.0.0",
88
+ "database": db_health,
89
+ "timestamp": datetime.now().isoformat()
90
+ },
91
+ status_code=200 if is_healthy else 503
92
+ )
93
+ except Exception as e:
94
+ return JSONResponse(
95
+ content={
96
+ "status": "error",
97
+ "error": str(e),
98
+ "timestamp": datetime.now().isoformat()
99
+ },
100
+ status_code=500
101
+ )
102
+
103
+ @app.post("/admin/database/repair")
104
+ async def repair_database():
105
+ """Repair database issues (admin endpoint)"""
106
+ try:
107
+ # repair_result = await db.repair_database() # This method doesn't exist on the Database class
108
+
109
+ return JSONResponse(
110
+ content={
111
+ "success": repair_result["success"],
112
+ "repairs_attempted": repair_result["repairs_attempted"],
113
+ "errors": repair_result["errors"],
114
+ "timestamp": datetime.now().isoformat()
115
+ },
116
+ status_code=200 if repair_result["success"] else 500
117
+ )
118
+ except Exception as e:
119
+ return JSONResponse(
120
+ content={
121
+ "success": False,
122
+ "error": str(e),
123
+ "timestamp": datetime.now().isoformat()
124
+ },
125
+ status_code=500
126
+ )
127
+
128
+ @app.post("/admin/database/recreate")
129
+ async def recreate_database():
130
+ """Recreate database from scratch (admin endpoint)"""
131
+ try:
132
+ init_result = await db_initializer.initialize_database(force_recreate=True)
133
+
134
+ return JSONResponse(
135
+ content={
136
+ "success": init_result["success"],
137
+ "action_taken": init_result["action_taken"],
138
+ "health_check": init_result["health_check"],
139
+ "errors": init_result["errors"],
140
+ "timestamp": datetime.now().isoformat()
141
+ },
142
+ status_code=200 if init_result["success"] else 500
143
+ )
144
+ except Exception as e:
145
+ return JSONResponse(
146
+ content={
147
+ "success": False,
148
+ "error": str(e),
149
+ "timestamp": datetime.now().isoformat()
150
+ },
151
+ status_code=500
152
+ )
153
+
154
+ # ========== POST ENDPOINTS (Generation) ==========
155
 
156
  @app.post("/extract/metadata")
157
  async def extract_metadata(data: MetadataRequest):
158
+ """Extract language learning metadata from user query"""
159
+ logging.info(f"Extracting metadata for query: {data.query[:50]}...")
160
  try:
161
+ # Generate metadata using AI, with caching
162
+ metadata_dict = await api_cache.get_or_set(
163
+ category="metadata",
164
+ key_text=data.query,
165
+ coro=generate_completions.get_completions,
166
+ prompt=data.query,
167
+ instructions=config.language_metadata_extraction_prompt
168
  )
169
+
170
+ # Check for existing curriculum first before creating new metadata extraction
171
+ existing_curriculum = await db.find_existing_curriculum(
172
+ query=data.query,
173
+ native_language=metadata_dict['native_language'],
174
+ target_language=metadata_dict['target_language'],
175
+ proficiency=metadata_dict['proficiency'],
176
+ user_id=None # Make it user-independent
177
+ )
178
+
179
+ if existing_curriculum:
180
+ # Found existing curriculum - return it regardless of user
181
+ logging.info(f"Found existing curriculum for query '{data.query[:50]}...': {existing_curriculum['id']}")
182
+ return JSONResponse(
183
+ content={
184
+ "message": "Found existing curriculum for your query.",
185
+ "curriculum_id": existing_curriculum['id'],
186
+ "status_endpoint": f"/content/status/{existing_curriculum['id']}",
187
+ "cached": True
188
+ },
189
+ status_code=200
190
+ )
191
+
192
+ # No suitable existing curriculum found, generate new one
193
+ logging.info(f"No existing curriculum found, generating new one for user {data.user_id}")
194
+
195
+ # Save metadata to database
196
+ extraction_id = await db.save_metadata_extraction(
197
+ query=data.query,
198
+ metadata=metadata_dict,
199
+ user_id=data.user_id
200
+ )
201
+
202
+ # Process extraction (generate curriculum and start content generation)
203
+ processing_result = await content_generator.process_metadata_extraction(
204
+ extraction_id=extraction_id,
205
+ query=data.query,
206
+ metadata=metadata_dict,
207
+ user_id=data.user_id,
208
+ generate_content=True # Automatically generate all content
209
+ )
210
+
211
+ curriculum_id = processing_result['curriculum_id']
212
+
213
  return JSONResponse(
214
  content={
215
+ "message": "Content generation has been initiated.",
216
+ "curriculum_id": curriculum_id,
217
+ "status_endpoint": f"/content/status/{curriculum_id}",
218
+ "cached": False
219
  },
220
+ status_code=202
221
  )
222
  except Exception as e:
223
+ logging.error(f"Error extracting metadata: {e}")
224
  raise HTTPException(status_code=500, detail=str(e))
225
 
226
+ # ========== GET ENDPOINTS (Retrieval) ==========
227
+
228
+ @app.get("/curriculum/{curriculum_id}/metadata")
229
+ async def get_curriculum_metadata(curriculum_id: str = Path(..., description="Curriculum ID")):
230
+ """Get metadata for a curriculum"""
231
+ curriculum = await db.get_curriculum(curriculum_id)
232
+ if not curriculum:
233
+ raise HTTPException(status_code=404, detail="Curriculum not found")
234
+
235
+ # Get the full metadata extraction record
236
+ extraction = await db.get_metadata_extraction(curriculum['metadata_extraction_id'])
237
+ if not extraction:
238
+ raise HTTPException(status_code=404, detail="Metadata extraction not found")
239
+
240
+ # Parse JSON fields
241
+ extraction['metadata'] = json.loads(extraction['metadata_json'])
242
+ del extraction['metadata_json']
243
+
244
+ return JSONResponse(content=extraction, status_code=200)
245
+
246
+ @app.get("/curriculum/{curriculum_id}")
247
+ async def get_curriculum(curriculum_id: str = Path(..., description="Curriculum ID")):
248
+ """Get curriculum by ID"""
249
+ curriculum = await db.get_full_curriculum_details(curriculum_id, include_content=False)
250
+ if not curriculum:
251
+ raise HTTPException(status_code=404, detail="Curriculum not found")
252
+
253
+ # Get content generation status
254
+ status = await db.get_curriculum_content_status(curriculum_id)
255
+ if status:
256
+ curriculum['content_status'] = status
257
+
258
+ return JSONResponse(content=curriculum, status_code=200)
259
+
260
+
261
+ async def _get_lesson_content_by_type(
262
+ curriculum_id: str,
263
+ lesson_index: int,
264
+ content_type: str
265
+ ):
266
+ """Helper to get specific content type for a lesson"""
267
+ content_list = await db.get_learning_content(
268
+ curriculum_id=curriculum_id,
269
+ lesson_index=lesson_index,
270
+ content_type=content_type
271
  )
272
+ if not content_list:
273
+ raise HTTPException(
274
+ status_code=404,
275
+ detail=f"{content_type.capitalize()} content not found for lesson {lesson_index}"
276
+ )
277
 
278
+ # Assuming one content item per type per lesson
279
+ content = content_list[0]
280
+ try:
281
+ parsed_content = json.loads(content['content_json'])
282
+ except json.JSONDecodeError:
283
+ parsed_content = content['content_json']
284
+
285
+ return JSONResponse(
286
+ content={
287
+ "curriculum_id": curriculum_id,
288
+ "lesson_index": lesson_index,
289
+ "content_type": content_type,
290
+ "id": content['id'],
291
+ "lesson_topic": content['lesson_topic'],
292
+ "content": parsed_content,
293
+ "created_at": content['created_at']
294
+ },
295
+ status_code=200
296
  )
297
 
298
+ @app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/flashcards")
299
+ async def get_lesson_flashcards(
300
+ curriculum_id: str = Path(..., description="Curriculum ID"),
301
+ lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)")
302
+ ):
303
+ """Get flashcards for a specific lesson"""
304
+ return await _get_lesson_content_by_type(curriculum_id, lesson_index, "flashcards")
305
+
306
+ @app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/exercises")
307
+ async def get_lesson_exercises(
308
+ curriculum_id: str = Path(..., description="Curriculum ID"),
309
+ lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)")
310
+ ):
311
+ """Get exercises for a specific lesson"""
312
+ return await _get_lesson_content_by_type(curriculum_id, lesson_index, "exercises")
313
+
314
+ @app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/simulation")
315
+ async def get_lesson_simulation(
316
+ curriculum_id: str = Path(..., description="Curriculum ID"),
317
+ lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)")
318
+ ):
319
+ """Get simulation for a specific lesson"""
320
+ return await _get_lesson_content_by_type(curriculum_id, lesson_index, "simulation")
321
+ @app.get("/user/{user_id}/metadata")
322
+ async def get_user_metadata_history(
323
+ user_id: int = Path(..., description="User ID"),
324
+ limit: int = Query(20, ge=1, le=100, description="Maximum number of results")
325
+ ):
326
+ """Get user's metadata extraction history"""
327
+ extractions = await db.get_user_metadata_extractions(user_id, limit)
328
+
329
+ # Parse JSON fields
330
+ for extraction in extractions:
331
+ extraction['metadata'] = json.loads(extraction['metadata_json'])
332
+ del extraction['metadata_json']
333
+
334
+ return JSONResponse(
335
+ content={
336
+ "user_id": user_id,
337
+ "extractions": extractions,
338
+ "total": len(extractions)
339
+ },
340
+ status_code=200
341
+ )
342
+
343
+ @app.get("/user/{user_id}/curricula")
344
+ async def get_user_curricula(
345
+ user_id: int = Path(..., description="User ID"),
346
+ limit: int = Query(20, ge=1, le=100, description="Maximum number of results")
347
+ ):
348
+ """Get user's curricula"""
349
+ curricula = await db.get_user_curricula(user_id, limit)
350
+
351
+ # Parse JSON fields and get content status
352
+ for curriculum in curricula:
353
+ curriculum['curriculum'] = json.loads(curriculum['curriculum_json'])
354
+ del curriculum['curriculum_json']
355
+
356
+ # Get content status
357
+ status = await db.get_curriculum_content_status(curriculum['id'])
358
+ if status:
359
+ curriculum['content_status'] = status
360
+
361
+ return JSONResponse(
362
+ content={
363
+ "user_id": user_id,
364
+ "curricula": curricula,
365
+ "total": len(curricula)
366
+ },
367
+ status_code=200
368
+ )
369
+
370
+ @app.get("/user/{user_id}/journeys")
371
+ async def get_user_learning_journeys(
372
+ user_id: int = Path(..., description="User ID"),
373
+ limit: int = Query(20, ge=1, le=100, description="Maximum number of results")
374
+ ):
375
+ """Get user's complete learning journeys (metadata + curriculum info)"""
376
+ journeys = await db.get_user_learning_journeys(user_id, limit)
377
+
378
+ return JSONResponse(
379
+ content={
380
+ "user_id": user_id,
381
+ "journeys": journeys,
382
+ "total": len(journeys)
383
+ },
384
+ status_code=200
385
+ )
386
+
387
+ @app.get("/search/curricula")
388
+ async def search_curricula(
389
+ native_language: str = Query(..., description="Native language"),
390
+ target_language: str = Query(..., description="Target language"),
391
+ proficiency: Optional[str] = Query(None, description="Proficiency level"),
392
+ limit: int = Query(10, ge=1, le=50, description="Maximum number of results")
393
+ ):
394
+ """Search for existing curricula by language combination"""
395
+ curricula = await db.search_curricula_by_languages(
396
+ native_language=native_language,
397
+ target_language=target_language,
398
+ proficiency=proficiency,
399
+ limit=limit
400
+ )
401
+
402
+ # Parse JSON fields
403
+ for curriculum in curricula:
404
+ curriculum['curriculum'] = json.loads(curriculum['curriculum_json'])
405
+ del curriculum['curriculum_json']
406
+
407
+ return JSONResponse(
408
+ content={
409
+ "search_params": {
410
+ "native_language": native_language,
411
+ "target_language": target_language,
412
+ "proficiency": proficiency
413
+ },
414
+ "curricula": curricula,
415
+ "total": len(curricula)
416
+ },
417
+ status_code=200
418
+ )
419
+
420
+ @app.get("/content/status/{curriculum_id}")
421
+ async def get_content_generation_status(
422
+ curriculum_id: str = Path(..., description="Curriculum ID")
423
+ ):
424
+ """Check content generation status for a curriculum"""
425
+ status = await db.get_curriculum_content_status(curriculum_id)
426
+ if not status:
427
+ raise HTTPException(status_code=404, detail="Curriculum not found")
428
+
429
+ # Calculate completion percentage
430
+ total_lessons = 25
431
+ total_content_types = 3 # flashcards, exercises, simulation
432
+ total_expected = total_lessons * total_content_types
433
+
434
+ total_generated = (
435
+ status['lessons_with_flashcards'] +
436
+ status['lessons_with_exercises'] +
437
+ status['lessons_with_simulations']
438
+ )
439
+
440
+ completion_percentage = (total_generated / total_expected) * 100 if total_expected > 0 else 0
441
+
442
+ return JSONResponse(
443
+ content={
444
+ "curriculum_id": curriculum_id,
445
+ "status": status,
446
+ "completion_percentage": round(completion_percentage, 2),
447
+ "is_complete": completion_percentage >= 100
448
+ },
449
+ status_code=200
450
  )
451
 
 
 
 
 
 
 
 
backend/schema.sql ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -- AI Language Tutor Database Schema
2
+
3
+ -- Table for storing extracted metadata from user queries
4
+ CREATE TABLE IF NOT EXISTS metadata_extractions (
5
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
6
+ user_id INTEGER,
7
+ query TEXT NOT NULL,
8
+ native_language TEXT,
9
+ target_language TEXT,
10
+ proficiency TEXT CHECK(proficiency IN ('beginner', 'intermediate', 'advanced')),
11
+ title TEXT,
12
+ description TEXT,
13
+ metadata_json TEXT NOT NULL, -- Full JSON response
14
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
15
+ );
16
+
17
+ -- Index for user queries
18
+ CREATE INDEX IF NOT EXISTS idx_metadata_user_id ON metadata_extractions(user_id);
19
+ CREATE INDEX IF NOT EXISTS idx_metadata_languages ON metadata_extractions(native_language, target_language);
20
+
21
+ -- Table for storing generated curricula
22
+ CREATE TABLE IF NOT EXISTS curricula (
23
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
24
+ metadata_extraction_id TEXT NOT NULL,
25
+ user_id INTEGER,
26
+ lesson_topic TEXT,
27
+ curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons
28
+ is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated?
29
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
30
+ FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE
31
+ );
32
+
33
+ -- Index for curriculum lookups
34
+ CREATE INDEX IF NOT EXISTS idx_curricula_metadata_id ON curricula(metadata_extraction_id);
35
+ CREATE INDEX IF NOT EXISTS idx_curricula_user_id ON curricula(user_id);
36
+
37
+ -- Table for storing all types of learning content
38
+ CREATE TABLE IF NOT EXISTS learning_content (
39
+ id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
40
+ curriculum_id TEXT NOT NULL,
41
+ content_type TEXT NOT NULL CHECK(content_type IN ('flashcards', 'exercises', 'simulation')),
42
+ lesson_index INTEGER NOT NULL CHECK(lesson_index >= 0 AND lesson_index < 25),
43
+ lesson_topic TEXT,
44
+ content_json TEXT NOT NULL, -- The actual generated content
45
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
46
+ FOREIGN KEY (curriculum_id) REFERENCES curricula(id) ON DELETE CASCADE
47
+ );
48
+
49
+ -- Index for content lookups
50
+ CREATE INDEX IF NOT EXISTS idx_content_curriculum_id ON learning_content(curriculum_id);
51
+ CREATE INDEX IF NOT EXISTS idx_content_type ON learning_content(content_type);
52
+ CREATE INDEX IF NOT EXISTS idx_content_lesson ON learning_content(curriculum_id, lesson_index);
53
+
54
+ -- View for easy access to user's learning journeys
55
+ CREATE VIEW IF NOT EXISTS user_learning_journeys AS
56
+ SELECT
57
+ m.id as metadata_id,
58
+ m.user_id,
59
+ m.query,
60
+ m.native_language,
61
+ m.target_language,
62
+ m.proficiency,
63
+ m.title,
64
+ m.description,
65
+ c.id as curriculum_id,
66
+ c.lesson_topic,
67
+ c.is_content_generated,
68
+ m.created_at
69
+ FROM metadata_extractions m
70
+ LEFT JOIN curricula c ON m.id = c.metadata_extraction_id
71
+ ORDER BY m.created_at DESC;
72
+
73
+ -- View for content availability per curriculum
74
+ CREATE VIEW IF NOT EXISTS curriculum_content_status AS
75
+ SELECT
76
+ c.id as curriculum_id,
77
+ c.user_id,
78
+ c.lesson_topic,
79
+ COUNT(DISTINCT lc.lesson_index) as lessons_with_content,
80
+ COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards,
81
+ COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises,
82
+ COUNT(DISTINCT CASE WHEN lc.content_type = 'simulation' THEN lc.lesson_index END) as lessons_with_simulations,
83
+ c.created_at
84
+ FROM curricula c
85
+ LEFT JOIN learning_content lc ON c.id = lc.curriculum_id
86
+ GROUP BY c.id;
87
+
88
+ -- Generic cache for API responses to reduce redundant AI calls
89
+ CREATE TABLE IF NOT EXISTS api_cache (
90
+ cache_key TEXT NOT NULL,
91
+ category TEXT NOT NULL,
92
+ content_json TEXT NOT NULL,
93
+ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
94
+ PRIMARY KEY (cache_key, category)
95
+ );
96
+
97
+ -- Index for faster cache lookups
98
+ CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category);
backend/utils/__pycache__/generate_completions.cpython-310.pyc DELETED
Binary file (2.55 kB)
 
backend/utils/__pycache__/generate_completions.cpython-311.pyc ADDED
Binary file (4.54 kB). View file
 
backend/utils/__pycache__/generate_completions.cpython-312.pyc CHANGED
Binary files a/backend/utils/__pycache__/generate_completions.cpython-312.pyc and b/backend/utils/__pycache__/generate_completions.cpython-312.pyc differ
 
backend/utils/__pycache__/handlers.cpython-310.pyc DELETED
Binary file (1.94 kB)
 
backend/utils/__pycache__/handlers.cpython-312.pyc CHANGED
Binary files a/backend/utils/__pycache__/handlers.cpython-312.pyc and b/backend/utils/__pycache__/handlers.cpython-312.pyc differ
 
backend/utils/generate_completions.py CHANGED
@@ -6,6 +6,7 @@ from typing import Union, List, Dict, Literal
6
  from dotenv import load_dotenv
7
  import os
8
  from pydantic import BaseModel
 
9
  load_dotenv()
10
 
11
  # Initialize the async client
@@ -71,9 +72,8 @@ async def get_completions(
71
  else:
72
  raise TypeError("Unexpected processed input type.")
73
 
74
- # print(os.getenv("MODEL"))
75
  response = await client.chat.completions.create(
76
- model=os.getenv("MODEL"),
77
  messages=messages,
78
  response_format={"type": "json_object"}
79
  )
 
6
  from dotenv import load_dotenv
7
  import os
8
  from pydantic import BaseModel
9
+
10
  load_dotenv()
11
 
12
  # Initialize the async client
 
72
  else:
73
  raise TypeError("Unexpected processed input type.")
74
 
 
75
  response = await client.chat.completions.create(
76
+ model=os.getenv("MODEL", "gemini-2.0-flash"),
77
  messages=messages,
78
  response_format={"type": "json_object"}
79
  )
backend/utils/handlers.py CHANGED
@@ -2,7 +2,7 @@ from fastapi import HTTPException
2
  from fastapi.responses import JSONResponse
3
  from typing import Callable, Dict, Any
4
  from backend import config
5
- from backend.cache import cache
6
  from backend.utils import generate_completions
7
 
8
  async def handle_generation_request(
@@ -39,19 +39,30 @@ async def handle_generation_request(
39
  .replace("{proficiency}", data.proficiency)
40
  )
41
 
42
- # Get response from cache or generate new
43
- response = await cache.get_or_set(
44
- (str(data.query), instructions),
45
- generate_completions.get_completions,
46
  data.query,
47
  instructions
48
  )
49
 
 
 
 
 
 
 
 
 
 
 
 
50
  return JSONResponse(
51
  content={
52
  "data": response,
53
  "type": mode,
54
- "status": "success"
 
 
55
  },
56
  status_code=200
57
  )
@@ -62,4 +73,4 @@ INSTRUCTION_TEMPLATES: Dict[str, str] = {
62
  "flashcards": config.flashcard_mode_instructions,
63
  "exercises": config.exercise_mode_instructions,
64
  "simulation": config.simulation_mode_instructions
65
- }
 
2
  from fastapi.responses import JSONResponse
3
  from typing import Callable, Dict, Any
4
  from backend import config
5
+ from backend.content_generator import content_generator
6
  from backend.utils import generate_completions
7
 
8
  async def handle_generation_request(
 
39
  .replace("{proficiency}", data.proficiency)
40
  )
41
 
42
+ # Generate new content
43
+ response = await generate_completions.get_completions(
 
 
44
  data.query,
45
  instructions
46
  )
47
 
48
+ # Save generated content to database
49
+ content_id = await content_generator.save_content(
50
+ query=str(data.query),
51
+ content=response,
52
+ content_type=mode,
53
+ user_id=data.user_id,
54
+ native_language=data.native_language,
55
+ target_language=data.target_language,
56
+ proficiency=data.proficiency
57
+ )
58
+
59
  return JSONResponse(
60
  content={
61
  "data": response,
62
  "type": mode,
63
+ "status": "success",
64
+ "content_id": content_id,
65
+ "saved": True
66
  },
67
  status_code=200
68
  )
 
73
  "flashcards": config.flashcard_mode_instructions,
74
  "exercises": config.exercise_mode_instructions,
75
  "simulation": config.simulation_mode_instructions
76
+ }