Spaces:
Sleeping
Sleeping
improved backend
Browse files- backend/__pycache__/config.cpython-311.pyc +0 -0
- backend/__pycache__/config.cpython-312.pyc +0 -0
- backend/__pycache__/content_generator.cpython-311.pyc +0 -0
- backend/__pycache__/content_generator.cpython-312.pyc +0 -0
- backend/__pycache__/db.cpython-311.pyc +0 -0
- backend/__pycache__/db.cpython-312.pyc +0 -0
- backend/__pycache__/db_cache.cpython-311.pyc +0 -0
- backend/__pycache__/db_cache.cpython-312.pyc +0 -0
- backend/__pycache__/db_init.cpython-311.pyc +0 -0
- backend/__pycache__/db_init.cpython-312.pyc +0 -0
- backend/__pycache__/main.cpython-311.pyc +0 -0
- backend/__pycache__/main.cpython-312.pyc +0 -0
- backend/content_generator.py +110 -79
- backend/db.py +107 -36
- backend/db_cache.py +2 -2
- backend/main.py +54 -20
- backend/schema.sql +13 -1
- backend/utils/__pycache__/generate_completions.cpython-311.pyc +0 -0
- backend/utils/__pycache__/generate_completions.cpython-312.pyc +0 -0
backend/__pycache__/config.cpython-311.pyc
CHANGED
|
Binary files a/backend/__pycache__/config.cpython-311.pyc and b/backend/__pycache__/config.cpython-311.pyc differ
|
|
|
backend/__pycache__/config.cpython-312.pyc
CHANGED
|
Binary files a/backend/__pycache__/config.cpython-312.pyc and b/backend/__pycache__/config.cpython-312.pyc differ
|
|
|
backend/__pycache__/content_generator.cpython-311.pyc
CHANGED
|
Binary files a/backend/__pycache__/content_generator.cpython-311.pyc and b/backend/__pycache__/content_generator.cpython-311.pyc differ
|
|
|
backend/__pycache__/content_generator.cpython-312.pyc
CHANGED
|
Binary files a/backend/__pycache__/content_generator.cpython-312.pyc and b/backend/__pycache__/content_generator.cpython-312.pyc differ
|
|
|
backend/__pycache__/db.cpython-311.pyc
CHANGED
|
Binary files a/backend/__pycache__/db.cpython-311.pyc and b/backend/__pycache__/db.cpython-311.pyc differ
|
|
|
backend/__pycache__/db.cpython-312.pyc
CHANGED
|
Binary files a/backend/__pycache__/db.cpython-312.pyc and b/backend/__pycache__/db.cpython-312.pyc differ
|
|
|
backend/__pycache__/db_cache.cpython-311.pyc
CHANGED
|
Binary files a/backend/__pycache__/db_cache.cpython-311.pyc and b/backend/__pycache__/db_cache.cpython-311.pyc differ
|
|
|
backend/__pycache__/db_cache.cpython-312.pyc
CHANGED
|
Binary files a/backend/__pycache__/db_cache.cpython-312.pyc and b/backend/__pycache__/db_cache.cpython-312.pyc differ
|
|
|
backend/__pycache__/db_init.cpython-311.pyc
CHANGED
|
Binary files a/backend/__pycache__/db_init.cpython-311.pyc and b/backend/__pycache__/db_init.cpython-311.pyc differ
|
|
|
backend/__pycache__/db_init.cpython-312.pyc
CHANGED
|
Binary files a/backend/__pycache__/db_init.cpython-312.pyc and b/backend/__pycache__/db_init.cpython-312.pyc differ
|
|
|
backend/__pycache__/main.cpython-311.pyc
CHANGED
|
Binary files a/backend/__pycache__/main.cpython-311.pyc and b/backend/__pycache__/main.cpython-311.pyc differ
|
|
|
backend/__pycache__/main.cpython-312.pyc
CHANGED
|
Binary files a/backend/__pycache__/main.cpython-312.pyc and b/backend/__pycache__/main.cpython-312.pyc differ
|
|
|
backend/content_generator.py
CHANGED
|
@@ -163,7 +163,7 @@ class ContentGenerator:
|
|
| 163 |
except Exception as e:
|
| 164 |
logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}")
|
| 165 |
|
| 166 |
-
return
|
| 167 |
|
| 168 |
async def generate_all_content_for_curriculum(
|
| 169 |
self,
|
|
@@ -171,56 +171,85 @@ class ContentGenerator:
|
|
| 171 |
max_concurrent_lessons: int = 3
|
| 172 |
):
|
| 173 |
"""Generate all learning content for a curriculum"""
|
| 174 |
-
# Get curriculum details
|
| 175 |
-
curriculum_data = await db.get_curriculum(curriculum_id)
|
| 176 |
-
if not curriculum_data:
|
| 177 |
-
logger.error(f"Curriculum not found: {curriculum_id}")
|
| 178 |
-
return
|
| 179 |
-
|
| 180 |
-
# Parse curriculum JSON
|
| 181 |
try:
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
# Prepare metadata
|
| 189 |
-
metadata = {
|
| 190 |
-
'native_language': curriculum_data['native_language'],
|
| 191 |
-
'target_language': curriculum_data['target_language'],
|
| 192 |
-
'proficiency': curriculum_data['proficiency']
|
| 193 |
-
}
|
| 194 |
-
|
| 195 |
-
logger.info(f"Starting content generation for {len(lessons)} lessons")
|
| 196 |
-
|
| 197 |
-
# Process lessons in batches to avoid overwhelming the API
|
| 198 |
-
for i in range(0, len(lessons), max_concurrent_lessons):
|
| 199 |
-
batch = lessons[i:i + max_concurrent_lessons]
|
| 200 |
-
batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons))))
|
| 201 |
|
| 202 |
-
#
|
| 203 |
-
|
| 204 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
curriculum_id=curriculum_id,
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
metadata=metadata
|
| 209 |
)
|
| 210 |
-
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
-
|
| 214 |
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
|
| 225 |
async def process_metadata_extraction(
|
| 226 |
self,
|
|
@@ -228,44 +257,46 @@ class ContentGenerator:
|
|
| 228 |
query: str,
|
| 229 |
metadata: Dict[str, Any],
|
| 230 |
user_id: Optional[int] = None,
|
| 231 |
-
generate_content: bool = True
|
|
|
|
| 232 |
) -> Dict[str, Any]:
|
| 233 |
"""Process a metadata extraction by checking for existing curriculum or generating new one"""
|
| 234 |
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
if existing_curriculum:
|
| 245 |
-
# If we found an exact match for this user, return it
|
| 246 |
-
if existing_curriculum.get('user_id') == user_id:
|
| 247 |
-
logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}")
|
| 248 |
-
return {
|
| 249 |
-
'curriculum_id': existing_curriculum['id'],
|
| 250 |
-
'content_generation_started': False,
|
| 251 |
-
'cached': True,
|
| 252 |
-
'cache_type': 'user_exact_match'
|
| 253 |
-
}
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 269 |
|
| 270 |
# No suitable existing curriculum found, generate new one
|
| 271 |
logger.info(f"No existing curriculum found, generating new one for user {user_id}")
|
|
|
|
| 163 |
except Exception as e:
|
| 164 |
logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}")
|
| 165 |
|
| 166 |
+
return content_ids
|
| 167 |
|
| 168 |
async def generate_all_content_for_curriculum(
|
| 169 |
self,
|
|
|
|
| 171 |
max_concurrent_lessons: int = 3
|
| 172 |
):
|
| 173 |
"""Generate all learning content for a curriculum"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
try:
|
| 175 |
+
# Update status to generating
|
| 176 |
+
await db.update_content_generation_status(
|
| 177 |
+
curriculum_id=curriculum_id,
|
| 178 |
+
status='generating'
|
| 179 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 180 |
|
| 181 |
+
# Get curriculum details
|
| 182 |
+
curriculum_data = await db.get_curriculum(curriculum_id)
|
| 183 |
+
if not curriculum_data:
|
| 184 |
+
logger.error(f"Curriculum not found: {curriculum_id}")
|
| 185 |
+
await db.update_content_generation_status(
|
| 186 |
+
curriculum_id=curriculum_id,
|
| 187 |
+
status='failed',
|
| 188 |
+
error_message="Curriculum not found"
|
| 189 |
+
)
|
| 190 |
+
return
|
| 191 |
+
|
| 192 |
+
# Parse curriculum JSON
|
| 193 |
+
try:
|
| 194 |
+
curriculum = json.loads(curriculum_data['curriculum_json'])
|
| 195 |
+
lessons = curriculum.get('sub_topics', [])
|
| 196 |
+
except json.JSONDecodeError:
|
| 197 |
+
logger.error(f"Failed to parse curriculum JSON for {curriculum_id}")
|
| 198 |
+
await db.update_content_generation_status(
|
| 199 |
curriculum_id=curriculum_id,
|
| 200 |
+
status='failed',
|
| 201 |
+
error_message="Failed to parse curriculum JSON"
|
|
|
|
| 202 |
)
|
| 203 |
+
return
|
| 204 |
+
|
| 205 |
+
# Prepare metadata
|
| 206 |
+
metadata = {
|
| 207 |
+
'native_language': curriculum_data['native_language'],
|
| 208 |
+
'target_language': curriculum_data['target_language'],
|
| 209 |
+
'proficiency': curriculum_data['proficiency']
|
| 210 |
+
}
|
| 211 |
|
| 212 |
+
logger.info(f"Starting content generation for {len(lessons)} lessons")
|
| 213 |
|
| 214 |
+
# Process lessons in batches to avoid overwhelming the API
|
| 215 |
+
for i in range(0, len(lessons), max_concurrent_lessons):
|
| 216 |
+
batch = lessons[i:i + max_concurrent_lessons]
|
| 217 |
+
batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons))))
|
| 218 |
+
|
| 219 |
+
# Generate content for batch concurrently
|
| 220 |
+
tasks = [
|
| 221 |
+
self.generate_content_for_lesson(
|
| 222 |
+
curriculum_id=curriculum_id,
|
| 223 |
+
lesson_index=idx,
|
| 224 |
+
lesson=lesson,
|
| 225 |
+
metadata=metadata
|
| 226 |
+
)
|
| 227 |
+
for idx, lesson in zip(batch_indices, batch)
|
| 228 |
+
]
|
| 229 |
+
|
| 230 |
+
results = await asyncio.gather(*tasks, return_exceptions=True)
|
| 231 |
+
|
| 232 |
+
for idx, result in zip(batch_indices, results):
|
| 233 |
+
if isinstance(result, Exception):
|
| 234 |
+
logger.error(f"Failed to generate content for lesson {idx}: {result}")
|
| 235 |
+
else:
|
| 236 |
+
logger.info(f"Generated content for lesson {idx}: {result}")
|
| 237 |
+
|
| 238 |
+
# Mark curriculum as content generated
|
| 239 |
+
await db.mark_curriculum_content_generated(curriculum_id)
|
| 240 |
+
await db.update_content_generation_status(
|
| 241 |
+
curriculum_id=curriculum_id,
|
| 242 |
+
status='completed'
|
| 243 |
+
)
|
| 244 |
+
logger.info(f"Completed content generation for curriculum {curriculum_id}")
|
| 245 |
+
|
| 246 |
+
except Exception as e:
|
| 247 |
+
logger.error(f"Failed to generate content for curriculum {curriculum_id}: {e}")
|
| 248 |
+
await db.update_content_generation_status(
|
| 249 |
+
curriculum_id=curriculum_id,
|
| 250 |
+
status='failed',
|
| 251 |
+
error_message=str(e)
|
| 252 |
+
)
|
| 253 |
|
| 254 |
async def process_metadata_extraction(
|
| 255 |
self,
|
|
|
|
| 257 |
query: str,
|
| 258 |
metadata: Dict[str, Any],
|
| 259 |
user_id: Optional[int] = None,
|
| 260 |
+
generate_content: bool = True,
|
| 261 |
+
skip_curriculum_lookup: bool = False
|
| 262 |
) -> Dict[str, Any]:
|
| 263 |
"""Process a metadata extraction by checking for existing curriculum or generating new one"""
|
| 264 |
|
| 265 |
+
if not skip_curriculum_lookup:
|
| 266 |
+
# Check for existing curriculum first
|
| 267 |
+
existing_curriculum = await db.find_existing_curriculum(
|
| 268 |
+
query=query,
|
| 269 |
+
native_language=metadata['native_language'],
|
| 270 |
+
target_language=metadata['target_language'],
|
| 271 |
+
proficiency=metadata['proficiency'],
|
| 272 |
+
user_id=user_id
|
| 273 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
| 275 |
+
if existing_curriculum:
|
| 276 |
+
# If we found an exact match for this user, return it
|
| 277 |
+
if existing_curriculum.get('user_id') == user_id:
|
| 278 |
+
logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}")
|
| 279 |
+
return {
|
| 280 |
+
'curriculum_id': existing_curriculum['id'],
|
| 281 |
+
'content_generation_started': False,
|
| 282 |
+
'cached': True,
|
| 283 |
+
'cache_type': 'user_exact_match'
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
# If we found a similar curriculum from another user, copy it
|
| 287 |
+
elif existing_curriculum.get('is_content_generated') == 1:
|
| 288 |
+
logger.info(f"Copying existing curriculum {existing_curriculum['id']} for user {user_id}")
|
| 289 |
+
curriculum_id = await db.copy_curriculum_for_user(
|
| 290 |
+
source_curriculum_id=existing_curriculum['id'],
|
| 291 |
+
metadata_extraction_id=extraction_id,
|
| 292 |
+
user_id=user_id
|
| 293 |
+
)
|
| 294 |
+
return {
|
| 295 |
+
'curriculum_id': curriculum_id,
|
| 296 |
+
'content_generation_started': False,
|
| 297 |
+
'cached': True,
|
| 298 |
+
'cache_type': 'copied_from_similar'
|
| 299 |
+
}
|
| 300 |
|
| 301 |
# No suitable existing curriculum found, generate new one
|
| 302 |
logger.info(f"No existing curriculum found, generating new one for user {user_id}")
|
backend/db.py
CHANGED
|
@@ -37,12 +37,16 @@ class Database:
|
|
| 37 |
proficiency: str,
|
| 38 |
user_id: Optional[int] = None
|
| 39 |
) -> Optional[Dict[str, Any]]:
|
| 40 |
-
"""Find existing curriculum for
|
|
|
|
|
|
|
| 41 |
async with aiosqlite.connect(self.db_path) as db:
|
| 42 |
db.row_factory = aiosqlite.Row
|
| 43 |
|
|
|
|
| 44 |
if user_id is not None:
|
| 45 |
-
# User-specific search:
|
|
|
|
| 46 |
async with db.execute("""
|
| 47 |
SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
|
| 48 |
FROM curricula c
|
|
@@ -54,35 +58,27 @@ class Database:
|
|
| 54 |
""", (user_id, query, native_language, target_language, proficiency)) as cursor:
|
| 55 |
row = await cursor.fetchone()
|
| 56 |
if row:
|
|
|
|
| 57 |
return dict(row)
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
|
| 76 |
-
FROM curricula c
|
| 77 |
-
JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
|
| 78 |
-
WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
|
| 79 |
-
ORDER BY c.created_at DESC
|
| 80 |
-
LIMIT 1
|
| 81 |
-
""", (query, native_language, target_language, proficiency)) as cursor:
|
| 82 |
-
row = await cursor.fetchone()
|
| 83 |
-
if row:
|
| 84 |
-
return dict(row)
|
| 85 |
|
|
|
|
| 86 |
return None
|
| 87 |
|
| 88 |
async def save_metadata_extraction(
|
|
@@ -93,7 +89,17 @@ class Database:
|
|
| 93 |
) -> str:
|
| 94 |
"""Save extracted metadata and return extraction ID"""
|
| 95 |
extraction_id = str(uuid.uuid4())
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
async with aiosqlite.connect(self.db_path) as db:
|
| 98 |
await db.execute("""
|
| 99 |
INSERT INTO metadata_extractions
|
|
@@ -127,8 +133,8 @@ class Database:
|
|
| 127 |
async with aiosqlite.connect(self.db_path) as db:
|
| 128 |
await db.execute("""
|
| 129 |
INSERT INTO curricula
|
| 130 |
-
(id, metadata_extraction_id, user_id, lesson_topic, curriculum_json)
|
| 131 |
-
VALUES (?, ?, ?, ?,
|
| 132 |
""", (
|
| 133 |
curriculum_id,
|
| 134 |
metadata_extraction_id,
|
|
@@ -164,8 +170,8 @@ class Database:
|
|
| 164 |
# Create new curriculum
|
| 165 |
await db.execute("""
|
| 166 |
INSERT INTO curricula
|
| 167 |
-
(id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated)
|
| 168 |
-
VALUES (?, ?, ?, ?, ?, 0)
|
| 169 |
""", (
|
| 170 |
new_curriculum_id,
|
| 171 |
metadata_extraction_id,
|
|
@@ -192,7 +198,9 @@ class Database:
|
|
| 192 |
# Mark as content generated
|
| 193 |
await db.execute("""
|
| 194 |
UPDATE curricula
|
| 195 |
-
SET is_content_generated = 1
|
|
|
|
|
|
|
| 196 |
WHERE id = ?
|
| 197 |
""", (new_curriculum_id,))
|
| 198 |
|
|
@@ -235,11 +243,74 @@ class Database:
|
|
| 235 |
async with aiosqlite.connect(self.db_path) as db:
|
| 236 |
await db.execute("""
|
| 237 |
UPDATE curricula
|
| 238 |
-
SET is_content_generated = 1
|
|
|
|
|
|
|
| 239 |
WHERE id = ?
|
| 240 |
""", (curriculum_id,))
|
| 241 |
await db.commit()
|
| 242 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
|
| 244 |
"""Get metadata extraction by ID"""
|
| 245 |
async with aiosqlite.connect(self.db_path) as db:
|
|
|
|
| 37 |
proficiency: str,
|
| 38 |
user_id: Optional[int] = None
|
| 39 |
) -> Optional[Dict[str, Any]]:
|
| 40 |
+
"""Find existing curriculum for exact query and metadata match"""
|
| 41 |
+
logger.info(f"Looking for curriculum: query='{query[:50]}...', native={native_language}, target={target_language}, proficiency={proficiency}, user_id={user_id}")
|
| 42 |
+
|
| 43 |
async with aiosqlite.connect(self.db_path) as db:
|
| 44 |
db.row_factory = aiosqlite.Row
|
| 45 |
|
| 46 |
+
# Always look for exact query matches first, prioritizing user-specific matches
|
| 47 |
if user_id is not None:
|
| 48 |
+
# User-specific search: Find exact query match for the user
|
| 49 |
+
logger.info(f"Searching for exact match for user {user_id}")
|
| 50 |
async with db.execute("""
|
| 51 |
SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
|
| 52 |
FROM curricula c
|
|
|
|
| 58 |
""", (user_id, query, native_language, target_language, proficiency)) as cursor:
|
| 59 |
row = await cursor.fetchone()
|
| 60 |
if row:
|
| 61 |
+
logger.info(f"Found exact user match: {dict(row)['id']}")
|
| 62 |
return dict(row)
|
| 63 |
+
|
| 64 |
+
# Look for exact query match from any user (only if the query is exactly the same)
|
| 65 |
+
logger.info("Searching for exact query match (any user)")
|
| 66 |
+
async with db.execute("""
|
| 67 |
+
SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
|
| 68 |
+
FROM curricula c
|
| 69 |
+
JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
|
| 70 |
+
WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
|
| 71 |
+
ORDER BY c.created_at DESC
|
| 72 |
+
LIMIT 1
|
| 73 |
+
""", (query, native_language, target_language, proficiency)) as cursor:
|
| 74 |
+
row = await cursor.fetchone()
|
| 75 |
+
if row:
|
| 76 |
+
logger.info(f"Found exact query match: {dict(row)['id']}")
|
| 77 |
+
return dict(row)
|
| 78 |
+
else:
|
| 79 |
+
logger.info("No exact query match found")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
+
logger.info("No existing curriculum found")
|
| 82 |
return None
|
| 83 |
|
| 84 |
async def save_metadata_extraction(
|
|
|
|
| 89 |
) -> str:
|
| 90 |
"""Save extracted metadata and return extraction ID"""
|
| 91 |
extraction_id = str(uuid.uuid4())
|
| 92 |
+
|
| 93 |
+
# Validate proficiency before inserting into the database
|
| 94 |
+
allowed_proficiencies = {"beginner", "intermediate", "advanced"}
|
| 95 |
+
proficiency = metadata.get('proficiency')
|
| 96 |
+
if proficiency not in allowed_proficiencies:
|
| 97 |
+
logger.warning(
|
| 98 |
+
f"Unknown proficiency '{proficiency}' received; defaulting to 'beginner'."
|
| 99 |
+
)
|
| 100 |
+
proficiency = "beginner"
|
| 101 |
+
metadata["proficiency"] = "beginner"
|
| 102 |
+
|
| 103 |
async with aiosqlite.connect(self.db_path) as db:
|
| 104 |
await db.execute("""
|
| 105 |
INSERT INTO metadata_extractions
|
|
|
|
| 133 |
async with aiosqlite.connect(self.db_path) as db:
|
| 134 |
await db.execute("""
|
| 135 |
INSERT INTO curricula
|
| 136 |
+
(id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, content_generation_status)
|
| 137 |
+
VALUES (?, ?, ?, ?, ?, 'pending')
|
| 138 |
""", (
|
| 139 |
curriculum_id,
|
| 140 |
metadata_extraction_id,
|
|
|
|
| 170 |
# Create new curriculum
|
| 171 |
await db.execute("""
|
| 172 |
INSERT INTO curricula
|
| 173 |
+
(id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated, content_generation_status)
|
| 174 |
+
VALUES (?, ?, ?, ?, ?, 0, 'pending')
|
| 175 |
""", (
|
| 176 |
new_curriculum_id,
|
| 177 |
metadata_extraction_id,
|
|
|
|
| 198 |
# Mark as content generated
|
| 199 |
await db.execute("""
|
| 200 |
UPDATE curricula
|
| 201 |
+
SET is_content_generated = 1,
|
| 202 |
+
content_generation_status = 'completed',
|
| 203 |
+
content_generation_completed_at = CURRENT_TIMESTAMP
|
| 204 |
WHERE id = ?
|
| 205 |
""", (new_curriculum_id,))
|
| 206 |
|
|
|
|
| 243 |
async with aiosqlite.connect(self.db_path) as db:
|
| 244 |
await db.execute("""
|
| 245 |
UPDATE curricula
|
| 246 |
+
SET is_content_generated = 1,
|
| 247 |
+
content_generation_status = 'completed',
|
| 248 |
+
content_generation_completed_at = CURRENT_TIMESTAMP
|
| 249 |
WHERE id = ?
|
| 250 |
""", (curriculum_id,))
|
| 251 |
await db.commit()
|
| 252 |
|
| 253 |
+
async def update_content_generation_status(
|
| 254 |
+
self,
|
| 255 |
+
curriculum_id: str,
|
| 256 |
+
status: str,
|
| 257 |
+
error_message: Optional[str] = None
|
| 258 |
+
):
|
| 259 |
+
"""Update content generation status for a curriculum"""
|
| 260 |
+
async with aiosqlite.connect(self.db_path) as db:
|
| 261 |
+
if status == 'generating':
|
| 262 |
+
await db.execute("""
|
| 263 |
+
UPDATE curricula
|
| 264 |
+
SET content_generation_status = ?,
|
| 265 |
+
content_generation_started_at = CURRENT_TIMESTAMP,
|
| 266 |
+
content_generation_error = NULL
|
| 267 |
+
WHERE id = ?
|
| 268 |
+
""", (status, curriculum_id))
|
| 269 |
+
elif status == 'completed':
|
| 270 |
+
await db.execute("""
|
| 271 |
+
UPDATE curricula
|
| 272 |
+
SET content_generation_status = ?,
|
| 273 |
+
content_generation_completed_at = CURRENT_TIMESTAMP,
|
| 274 |
+
content_generation_error = NULL,
|
| 275 |
+
is_content_generated = 1
|
| 276 |
+
WHERE id = ?
|
| 277 |
+
""", (status, curriculum_id))
|
| 278 |
+
elif status == 'failed':
|
| 279 |
+
await db.execute("""
|
| 280 |
+
UPDATE curricula
|
| 281 |
+
SET content_generation_status = ?,
|
| 282 |
+
content_generation_error = ?
|
| 283 |
+
WHERE id = ?
|
| 284 |
+
""", (status, error_message, curriculum_id))
|
| 285 |
+
else:
|
| 286 |
+
await db.execute("""
|
| 287 |
+
UPDATE curricula
|
| 288 |
+
SET content_generation_status = ?,
|
| 289 |
+
content_generation_error = ?
|
| 290 |
+
WHERE id = ?
|
| 291 |
+
""", (status, error_message, curriculum_id))
|
| 292 |
+
await db.commit()
|
| 293 |
+
|
| 294 |
+
async def get_content_generation_status(self, curriculum_id: str) -> Optional[Dict[str, Any]]:
|
| 295 |
+
"""Get content generation status for a curriculum"""
|
| 296 |
+
async with aiosqlite.connect(self.db_path) as db:
|
| 297 |
+
db.row_factory = aiosqlite.Row
|
| 298 |
+
async with db.execute("""
|
| 299 |
+
SELECT
|
| 300 |
+
id,
|
| 301 |
+
content_generation_status,
|
| 302 |
+
content_generation_error,
|
| 303 |
+
content_generation_started_at,
|
| 304 |
+
content_generation_completed_at,
|
| 305 |
+
is_content_generated
|
| 306 |
+
FROM curricula
|
| 307 |
+
WHERE id = ?
|
| 308 |
+
""", (curriculum_id,)) as cursor:
|
| 309 |
+
row = await cursor.fetchone()
|
| 310 |
+
if row:
|
| 311 |
+
return dict(row)
|
| 312 |
+
return None
|
| 313 |
+
|
| 314 |
async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
|
| 315 |
"""Get metadata extraction by ID"""
|
| 316 |
async with aiosqlite.connect(self.db_path) as db:
|
backend/db_cache.py
CHANGED
|
@@ -86,10 +86,10 @@ class ApiCache:
|
|
| 86 |
else:
|
| 87 |
raise TypeError("Cached content must be a JSON string, dict, or list.")
|
| 88 |
|
| 89 |
-
# 3. Store in cache
|
| 90 |
async with aiosqlite.connect(self.db_path) as db:
|
| 91 |
await db.execute(
|
| 92 |
-
"INSERT INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)",
|
| 93 |
(cache_key, category, content_to_cache)
|
| 94 |
)
|
| 95 |
await db.commit()
|
|
|
|
| 86 |
else:
|
| 87 |
raise TypeError("Cached content must be a JSON string, dict, or list.")
|
| 88 |
|
| 89 |
+
# 3. Store in cache (use INSERT OR REPLACE to handle duplicates)
|
| 90 |
async with aiosqlite.connect(self.db_path) as db:
|
| 91 |
await db.execute(
|
| 92 |
+
"INSERT OR REPLACE INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)",
|
| 93 |
(cache_key, category, content_to_cache)
|
| 94 |
)
|
| 95 |
await db.commit()
|
backend/main.py
CHANGED
|
@@ -90,6 +90,9 @@ async def health_check():
|
|
| 90 |
},
|
| 91 |
status_code=200 if is_healthy else 503
|
| 92 |
)
|
|
|
|
|
|
|
|
|
|
| 93 |
except Exception as e:
|
| 94 |
return JSONResponse(
|
| 95 |
content={
|
|
@@ -104,7 +107,7 @@ async def health_check():
|
|
| 104 |
async def repair_database():
|
| 105 |
"""Repair database issues (admin endpoint)"""
|
| 106 |
try:
|
| 107 |
-
|
| 108 |
|
| 109 |
return JSONResponse(
|
| 110 |
content={
|
|
@@ -158,11 +161,14 @@ async def extract_metadata(data: MetadataRequest):
|
|
| 158 |
"""Extract language learning metadata from user query"""
|
| 159 |
logging.info(f"Extracting metadata for query: {data.query[:50]}...")
|
| 160 |
try:
|
| 161 |
-
# Generate metadata using AI, with caching
|
| 162 |
metadata_dict = await api_cache.get_or_set(
|
| 163 |
category="metadata",
|
| 164 |
key_text=data.query,
|
| 165 |
coro=generate_completions.get_completions,
|
|
|
|
|
|
|
|
|
|
| 166 |
prompt=data.query,
|
| 167 |
instructions=config.language_metadata_extraction_prompt
|
| 168 |
)
|
|
@@ -173,7 +179,7 @@ async def extract_metadata(data: MetadataRequest):
|
|
| 173 |
native_language=metadata_dict['native_language'],
|
| 174 |
target_language=metadata_dict['target_language'],
|
| 175 |
proficiency=metadata_dict['proficiency'],
|
| 176 |
-
user_id=
|
| 177 |
)
|
| 178 |
|
| 179 |
if existing_curriculum:
|
|
@@ -200,25 +206,37 @@ async def extract_metadata(data: MetadataRequest):
|
|
| 200 |
)
|
| 201 |
|
| 202 |
# Process extraction (generate curriculum and start content generation)
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
|
|
|
|
|
|
| 210 |
|
| 211 |
-
|
|
|
|
|
|
|
|
|
|
| 212 |
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
except Exception as e:
|
| 223 |
logging.error(f"Error extracting metadata: {e}")
|
| 224 |
raise HTTPException(status_code=500, detail=str(e))
|
|
@@ -257,6 +275,22 @@ async def get_curriculum(curriculum_id: str = Path(..., description="Curriculum
|
|
| 257 |
|
| 258 |
return JSONResponse(content=curriculum, status_code=200)
|
| 259 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
|
| 261 |
async def _get_lesson_content_by_type(
|
| 262 |
curriculum_id: str,
|
|
|
|
| 90 |
},
|
| 91 |
status_code=200 if is_healthy else 503
|
| 92 |
)
|
| 93 |
+
except ValueError as ve:
|
| 94 |
+
logging.error(f"Invalid input: {ve}")
|
| 95 |
+
raise HTTPException(status_code=400, detail=str(ve))
|
| 96 |
except Exception as e:
|
| 97 |
return JSONResponse(
|
| 98 |
content={
|
|
|
|
| 107 |
async def repair_database():
|
| 108 |
"""Repair database issues (admin endpoint)"""
|
| 109 |
try:
|
| 110 |
+
repair_result = await db_initializer.repair_database()
|
| 111 |
|
| 112 |
return JSONResponse(
|
| 113 |
content={
|
|
|
|
| 161 |
"""Extract language learning metadata from user query"""
|
| 162 |
logging.info(f"Extracting metadata for query: {data.query[:50]}...")
|
| 163 |
try:
|
| 164 |
+
# Generate metadata using AI, with caching (include user context)
|
| 165 |
metadata_dict = await api_cache.get_or_set(
|
| 166 |
category="metadata",
|
| 167 |
key_text=data.query,
|
| 168 |
coro=generate_completions.get_completions,
|
| 169 |
+
context={
|
| 170 |
+
'user_id': data.user_id
|
| 171 |
+
},
|
| 172 |
prompt=data.query,
|
| 173 |
instructions=config.language_metadata_extraction_prompt
|
| 174 |
)
|
|
|
|
| 179 |
native_language=metadata_dict['native_language'],
|
| 180 |
target_language=metadata_dict['target_language'],
|
| 181 |
proficiency=metadata_dict['proficiency'],
|
| 182 |
+
user_id=data.user_id # Use the actual user_id for consistent lookup
|
| 183 |
)
|
| 184 |
|
| 185 |
if existing_curriculum:
|
|
|
|
| 206 |
)
|
| 207 |
|
| 208 |
# Process extraction (generate curriculum and start content generation)
|
| 209 |
+
try:
|
| 210 |
+
processing_result = await content_generator.process_metadata_extraction(
|
| 211 |
+
extraction_id=extraction_id,
|
| 212 |
+
query=data.query,
|
| 213 |
+
metadata=metadata_dict,
|
| 214 |
+
user_id=data.user_id,
|
| 215 |
+
generate_content=True, # Automatically generate all content
|
| 216 |
+
skip_curriculum_lookup=True # Skip lookup since we already did it above
|
| 217 |
+
)
|
| 218 |
|
| 219 |
+
curriculum_id = processing_result['curriculum_id']
|
| 220 |
+
|
| 221 |
+
# Update status to generating
|
| 222 |
+
await db.update_content_generation_status(curriculum_id, 'generating')
|
| 223 |
|
| 224 |
+
return JSONResponse(
|
| 225 |
+
content={
|
| 226 |
+
"message": "Content generation has been initiated.",
|
| 227 |
+
"curriculum_id": curriculum_id,
|
| 228 |
+
"status_endpoint": f"/content/status/{curriculum_id}",
|
| 229 |
+
"cached": False
|
| 230 |
+
},
|
| 231 |
+
status_code=202
|
| 232 |
+
)
|
| 233 |
+
except Exception as content_error:
|
| 234 |
+
# If content generation fails, update status to failed
|
| 235 |
+
if 'curriculum_id' in locals():
|
| 236 |
+
await db.update_content_generation_status(
|
| 237 |
+
curriculum_id, 'failed', str(content_error)
|
| 238 |
+
)
|
| 239 |
+
raise content_error
|
| 240 |
except Exception as e:
|
| 241 |
logging.error(f"Error extracting metadata: {e}")
|
| 242 |
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
| 275 |
|
| 276 |
return JSONResponse(content=curriculum, status_code=200)
|
| 277 |
|
| 278 |
+
@app.get("/content/status/{curriculum_id}")
|
| 279 |
+
async def get_content_generation_status(curriculum_id: str = Path(..., description="Curriculum ID")):
|
| 280 |
+
"""Get content generation status for a curriculum"""
|
| 281 |
+
status = await db.get_content_generation_status(curriculum_id)
|
| 282 |
+
if not status:
|
| 283 |
+
raise HTTPException(status_code=404, detail="Curriculum not found")
|
| 284 |
+
|
| 285 |
+
return JSONResponse(content={
|
| 286 |
+
"curriculum_id": status['id'],
|
| 287 |
+
"status": status['content_generation_status'],
|
| 288 |
+
"error": status['content_generation_error'],
|
| 289 |
+
"started_at": status['content_generation_started_at'],
|
| 290 |
+
"completed_at": status['content_generation_completed_at'],
|
| 291 |
+
"is_content_generated": bool(status['is_content_generated'])
|
| 292 |
+
}, status_code=200)
|
| 293 |
+
|
| 294 |
|
| 295 |
async def _get_lesson_content_by_type(
|
| 296 |
curriculum_id: str,
|
backend/schema.sql
CHANGED
|
@@ -26,6 +26,10 @@ CREATE TABLE IF NOT EXISTS curricula (
|
|
| 26 |
lesson_topic TEXT,
|
| 27 |
curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons
|
| 28 |
is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated?
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 30 |
FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE
|
| 31 |
);
|
|
@@ -65,6 +69,10 @@ SELECT
|
|
| 65 |
c.id as curriculum_id,
|
| 66 |
c.lesson_topic,
|
| 67 |
c.is_content_generated,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
m.created_at
|
| 69 |
FROM metadata_extractions m
|
| 70 |
LEFT JOIN curricula c ON m.id = c.metadata_extraction_id
|
|
@@ -76,6 +84,10 @@ SELECT
|
|
| 76 |
c.id as curriculum_id,
|
| 77 |
c.user_id,
|
| 78 |
c.lesson_topic,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
COUNT(DISTINCT lc.lesson_index) as lessons_with_content,
|
| 80 |
COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards,
|
| 81 |
COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises,
|
|
@@ -92,7 +104,7 @@ CREATE TABLE IF NOT EXISTS api_cache (
|
|
| 92 |
content_json TEXT NOT NULL,
|
| 93 |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 94 |
PRIMARY KEY (cache_key, category)
|
| 95 |
-
);
|
| 96 |
|
| 97 |
-- Index for faster cache lookups
|
| 98 |
CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category);
|
|
|
|
| 26 |
lesson_topic TEXT,
|
| 27 |
curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons
|
| 28 |
is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated?
|
| 29 |
+
content_generation_status TEXT DEFAULT 'pending' CHECK(content_generation_status IN ('pending', 'generating', 'completed', 'failed')),
|
| 30 |
+
content_generation_error TEXT, -- Store error message if generation fails
|
| 31 |
+
content_generation_started_at TIMESTAMP,
|
| 32 |
+
content_generation_completed_at TIMESTAMP,
|
| 33 |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 34 |
FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE
|
| 35 |
);
|
|
|
|
| 69 |
c.id as curriculum_id,
|
| 70 |
c.lesson_topic,
|
| 71 |
c.is_content_generated,
|
| 72 |
+
c.content_generation_status,
|
| 73 |
+
c.content_generation_error,
|
| 74 |
+
c.content_generation_started_at,
|
| 75 |
+
c.content_generation_completed_at,
|
| 76 |
m.created_at
|
| 77 |
FROM metadata_extractions m
|
| 78 |
LEFT JOIN curricula c ON m.id = c.metadata_extraction_id
|
|
|
|
| 84 |
c.id as curriculum_id,
|
| 85 |
c.user_id,
|
| 86 |
c.lesson_topic,
|
| 87 |
+
c.content_generation_status,
|
| 88 |
+
c.content_generation_error,
|
| 89 |
+
c.content_generation_started_at,
|
| 90 |
+
c.content_generation_completed_at,
|
| 91 |
COUNT(DISTINCT lc.lesson_index) as lessons_with_content,
|
| 92 |
COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards,
|
| 93 |
COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises,
|
|
|
|
| 104 |
content_json TEXT NOT NULL,
|
| 105 |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
| 106 |
PRIMARY KEY (cache_key, category)
|
| 107 |
+
) WITHOUT ROWID;
|
| 108 |
|
| 109 |
-- Index for faster cache lookups
|
| 110 |
CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category);
|
backend/utils/__pycache__/generate_completions.cpython-311.pyc
CHANGED
|
Binary files a/backend/utils/__pycache__/generate_completions.cpython-311.pyc and b/backend/utils/__pycache__/generate_completions.cpython-311.pyc differ
|
|
|
backend/utils/__pycache__/generate_completions.cpython-312.pyc
CHANGED
|
Binary files a/backend/utils/__pycache__/generate_completions.cpython-312.pyc and b/backend/utils/__pycache__/generate_completions.cpython-312.pyc differ
|
|
|