Spaces:

samu
/

AnotherLanguageApp

Sleeping

App Files Files Community

samu commited on Aug 11

Commit

2832da8

1 Parent(s): 77e56ff

improved backend

Browse files

Files changed (32) hide show

backend/__pycache__/cache.cpython-310.pyc +0 -0
backend/__pycache__/cache.cpython-312.pyc +0 -0
backend/__pycache__/{config.cpython-310.pyc → config.cpython-311.pyc} +0 -0
backend/__pycache__/config.cpython-312.pyc +0 -0
backend/__pycache__/content_generator.cpython-311.pyc +0 -0
backend/__pycache__/content_generator.cpython-312.pyc +0 -0
backend/__pycache__/database.cpython-310.pyc +0 -0
backend/__pycache__/database.cpython-312.pyc +0 -0
backend/__pycache__/db.cpython-311.pyc +0 -0
backend/__pycache__/db.cpython-312.pyc +0 -0
backend/__pycache__/db_cache.cpython-311.pyc +0 -0
backend/__pycache__/db_cache.cpython-312.pyc +0 -0
backend/__pycache__/db_init.cpython-311.pyc +0 -0
backend/__pycache__/db_init.cpython-312.pyc +0 -0
backend/__pycache__/main.cpython-310.pyc +0 -0
backend/__pycache__/main.cpython-311.pyc +0 -0
backend/__pycache__/main.cpython-312.pyc +0 -0
backend/config.py +8 -7
backend/content_generator.py +295 -0
backend/database_init.py +65 -0
backend/db.py +434 -0
backend/db_cache.py +101 -0
backend/db_init.py +259 -0
backend/main.py +405 -50
backend/schema.sql +98 -0
backend/utils/__pycache__/generate_completions.cpython-310.pyc +0 -0
backend/utils/__pycache__/generate_completions.cpython-311.pyc +0 -0
backend/utils/__pycache__/generate_completions.cpython-312.pyc +0 -0
backend/utils/__pycache__/handlers.cpython-310.pyc +0 -0
backend/utils/__pycache__/handlers.cpython-312.pyc +0 -0
backend/utils/generate_completions.py +2 -2
backend/utils/handlers.py +18 -7

backend/__pycache__/cache.cpython-310.pyc DELETED Viewed

Binary file (1.15 kB)

backend/__pycache__/cache.cpython-312.pyc DELETED Viewed

Binary file (2.24 kB)

backend/__pycache__/{config.cpython-310.pyc → config.cpython-311.pyc} RENAMED Viewed

Binary files a/backend/__pycache__/config.cpython-310.pyc and b/backend/__pycache__/config.cpython-311.pyc differ

backend/__pycache__/config.cpython-312.pyc CHANGED Viewed

Binary files a/backend/__pycache__/config.cpython-312.pyc and b/backend/__pycache__/config.cpython-312.pyc differ

backend/__pycache__/content_generator.cpython-311.pyc ADDED Viewed

Binary file (12.3 kB). View file

backend/__pycache__/content_generator.cpython-312.pyc ADDED Viewed

Binary file (11 kB). View file

backend/__pycache__/database.cpython-310.pyc DELETED Viewed

Binary file (10.1 kB)

backend/__pycache__/database.cpython-312.pyc DELETED Viewed

Binary file (12.6 kB)

backend/__pycache__/db.cpython-311.pyc ADDED Viewed

Binary file (31 kB). View file

backend/__pycache__/db.cpython-312.pyc ADDED Viewed

Binary file (25.5 kB). View file

backend/__pycache__/db_cache.cpython-311.pyc ADDED Viewed

Binary file (7.31 kB). View file

backend/__pycache__/db_cache.cpython-312.pyc ADDED Viewed

Binary file (6.4 kB). View file

backend/__pycache__/db_init.cpython-311.pyc ADDED Viewed

Binary file (15.7 kB). View file

backend/__pycache__/db_init.cpython-312.pyc ADDED Viewed

Binary file (14 kB). View file

backend/__pycache__/main.cpython-310.pyc DELETED Viewed

Binary file (3.15 kB)

backend/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (20.3 kB). View file

backend/__pycache__/main.cpython-312.pyc CHANGED Viewed

Binary files a/backend/__pycache__/main.cpython-312.pyc and b/backend/__pycache__/main.cpython-312.pyc differ

backend/config.py CHANGED Viewed

@@ -48,7 +48,7 @@ curriculum_instructions = """
 You are an AI-powered language learning assistant tasked with generating an extensive, personalized curriculum. Your goal is to help the user learn {target_language} by designing a 25-lesson curriculum that reflects the user's goals, interests, and proficiency level. All outputs should be written in {native_language}.
 ### Curriculum Goals:
-- Provide 25 lessons.
 - Ensure logical progression from basic to advanced topics (according to {proficiency}).
 - Align each lesson with a practical communication goal.
 - Tailor vocabulary and sub-topics to the user’s intended use (e.g., work, travel, hobbies, daily life).
@@ -57,15 +57,15 @@ You are an AI-powered language learning assistant tasked with generating an exte
 1. **Define the Lesson Series (Overall Theme):**
    - Choose a main theme relevant to the user's motivation for learning {target_language} (e.g., "Living in a new country", "Professional communication", "Traveling in {target_language}-speaking regions").
-   - The theme should guide the tone, content, and scope of the entire 25-lesson sequence.
-2. **Divide the Curriculum into 25 Thematic Lessons:**
    - Each lesson should have a clear focus (e.g., asking for help, describing your job, booking accommodation).
    - Sequence lessons to build from foundational topics to more complex, specialized language use.
    - Vary grammar, vocabulary, and communication functions across lessons to avoid repetition and ensure comprehensive coverage.
 3. **Describe Each Lesson Clearly and Concisely:**
-   For each of the 25 lessons, provide:
    - "sub_topic": A clear and practical lesson title in {native_language}.
    - "keywords": A list of 1–3 high-level categories in {native_language} that describe the lesson focus (e.g., "directions", "daily routine", "formal conversation").
    - "description": One sentence in {native_language} that explains what the learner will achieve or be able to do after completing the lesson. Be specific and learner-oriented.
@@ -73,7 +73,7 @@ You are an AI-powered language learning assistant tasked with generating an exte
 ### Output Format:
 Return a valid JSON object with:
 - "lesson_topic": The overall learning theme (in {native_language}).
-- "sub_topics": A list of 25 items. Each item must include:
    - "sub_topic": A short title of the lesson (in {native_language}).
    - "keywords": A list of 1–3 general-purpose categories (in {native_language}).
    - "description": One clear sentence (in {native_language}) describing the purpose of the lesson.
@@ -164,7 +164,7 @@ When generating flashcards:
    - Select terms that are novel, useful, or not overly repetitive within the lesson.
    - Prioritize terms that learners are likely to encounter again in real-world usage.
 ### Flashcard Format
-Generate exactly **10 flashcards** as a **valid JSON array**, with each flashcard containing:
 - `"word"`: A key word or phrase in {target_language} drawn from the lesson.
 - `"definition"`: A learner-friendly explanation in {native_language}.
 - `"example"`: A clear, natural sentence in {target_language} demonstrating the word **in context with the lesson**.
@@ -176,7 +176,8 @@ simulation_mode_instructions = """
 # Target language: {target_language}
 # Proficiency level: {proficiency}
-You are a **creative, context-aware storytelling engine**. Your task is to generate short, engaging stories or dialogues in **any language** to make language learning enjoyable, memorable, and relevant. Stories must reflect the user's interests, profession, or hobbies, and align with their learning level.
 ### Input Format
 You will receive a user-provided **lesson topic, theme, or domain of interest** (e.g., “a courtroom drama for a law student” or “space mission dialogue for a space enthusiast”). Use this input to:

 You are an AI-powered language learning assistant tasked with generating an extensive, personalized curriculum. Your goal is to help the user learn {target_language} by designing a 25-lesson curriculum that reflects the user's goals, interests, and proficiency level. All outputs should be written in {native_language}.
 ### Curriculum Goals:
+- Provide 5 lessons.
 - Ensure logical progression from basic to advanced topics (according to {proficiency}).
 - Align each lesson with a practical communication goal.
 - Tailor vocabulary and sub-topics to the user’s intended use (e.g., work, travel, hobbies, daily life).
 1. **Define the Lesson Series (Overall Theme):**
    - Choose a main theme relevant to the user's motivation for learning {target_language} (e.g., "Living in a new country", "Professional communication", "Traveling in {target_language}-speaking regions").
+   - The theme should guide the tone, content, and scope of the entire 5-lesson sequence.
+2. **Divide the Curriculum into 5 Thematic Lessons:**
    - Each lesson should have a clear focus (e.g., asking for help, describing your job, booking accommodation).
    - Sequence lessons to build from foundational topics to more complex, specialized language use.
    - Vary grammar, vocabulary, and communication functions across lessons to avoid repetition and ensure comprehensive coverage.
 3. **Describe Each Lesson Clearly and Concisely:**
+   For each of the 5 lessons, provide:
    - "sub_topic": A clear and practical lesson title in {native_language}.
    - "keywords": A list of 1–3 high-level categories in {native_language} that describe the lesson focus (e.g., "directions", "daily routine", "formal conversation").
    - "description": One sentence in {native_language} that explains what the learner will achieve or be able to do after completing the lesson. Be specific and learner-oriented.
 ### Output Format:
 Return a valid JSON object with:
 - "lesson_topic": The overall learning theme (in {native_language}).
+- "sub_topics": A list of 5 items. Each item must include:
    - "sub_topic": A short title of the lesson (in {native_language}).
    - "keywords": A list of 1–3 general-purpose categories (in {native_language}).
    - "description": One clear sentence (in {native_language}) describing the purpose of the lesson.
    - Select terms that are novel, useful, or not overly repetitive within the lesson.
    - Prioritize terms that learners are likely to encounter again in real-world usage.
 ### Flashcard Format
+Generate exactly **5 flashcards** as a **valid JSON array**, with each flashcard containing:
 - `"word"`: A key word or phrase in {target_language} drawn from the lesson.
 - `"definition"`: A learner-friendly explanation in {native_language}.
 - `"example"`: A clear, natural sentence in {target_language} demonstrating the word **in context with the lesson**.
 # Target language: {target_language}
 # Proficiency level: {proficiency}
+You are a **creative, context-aware storytelling engine**. Your task is to generate short, engaging stories or dialogues in **any language** to make language learning enjoyable, memorable, and relevant.
+Stories must reflect the user's interests, profession, or hobbies, and align with their learning level.
 ### Input Format
 You will receive a user-provided **lesson topic, theme, or domain of interest** (e.g., “a courtroom drama for a law student” or “space mission dialogue for a space enthusiast”). Use this input to:

backend/content_generator.py ADDED Viewed

	@@ -0,0 +1,295 @@

+import json
+import asyncio
+from typing import Dict, Any, Optional, List
+from backend.utils import generate_completions
+from backend import config
+from backend.db import db
+from backend.db_cache import api_cache
+import logging
+logger = logging.getLogger(__name__)
+class ContentGenerator:
+    """Service for generating and storing all learning content"""
+    async def generate_curriculum_from_metadata(
+        self,
+        metadata_extraction_id: str,
+        query: str,
+        metadata: Dict[str, Any],
+        user_id: Optional[int] = None
+    ) -> str:
+        """Generate curriculum based on extracted metadata"""
+        # Format curriculum instructions with metadata
+        instructions = (
+            config.curriculum_instructions
+            .replace("{native_language}", metadata['native_language'])
+            .replace("{target_language}", metadata['target_language'])
+            .replace("{proficiency}", metadata['proficiency'])
+        )
+        # Generate curriculum
+        logger.info(f"Generating curriculum for {metadata['target_language']} ({metadata['proficiency']})")
+        curriculum_response = await generate_completions.get_completions(query, instructions)
+        try:
+            # Parse curriculum response
+            curriculum = json.loads(curriculum_response)
+        except json.JSONDecodeError:
+            logger.error(f"Failed to parse curriculum response: {curriculum_response[:200]}...")
+            curriculum = {"lesson_topic": "Language Learning Journey", "sub_topics": []}
+        # Save curriculum to database
+        curriculum_id = await db.save_curriculum(
+            metadata_extraction_id=metadata_extraction_id,
+            curriculum=curriculum,
+            user_id=user_id
+        )
+        return curriculum_id
+    async def generate_content_for_lesson(
+        self,
+        curriculum_id: str,
+        lesson_index: int,
+        lesson: Dict[str, Any],
+        metadata: Dict[str, Any]
+    ) -> Dict[str, str]:
+        """Generate all content types for a single lesson"""
+        content_ids = {}
+        lesson_topic = lesson.get('sub_topic', f'Lesson {lesson_index + 1}')
+        lesson_context = f"{lesson_topic}: {lesson.get('description', '')}"
+        # Generate flashcards
+        try:
+            flashcards_instructions = (
+                config.flashcard_mode_instructions
+                .replace("{native_language}", metadata['native_language'])
+                .replace("{target_language}", metadata['target_language'])
+                .replace("{proficiency}", metadata['proficiency'])
+            )
+            flashcards_response = await api_cache.get_or_set(
+                category="flashcards",
+                key_text=lesson_context,
+                coro=generate_completions.get_completions,
+                context={
+                    'native_language': metadata['native_language'],
+                    'target_language': metadata['target_language'],
+                    'proficiency': metadata['proficiency'],
+                    'lesson_index': lesson_index
+                },
+                prompt=lesson_context,
+                instructions=flashcards_instructions
+            )
+            # Save flashcards
+            content_ids['flashcards'] = await db.save_learning_content(
+                curriculum_id=curriculum_id,
+                content_type='flashcards',
+                lesson_index=lesson_index,
+                lesson_topic=lesson_topic,
+                content=flashcards_response
+            )
+        except Exception as e:
+            logger.error(f"Failed to generate flashcards for lesson {lesson_index}: {e}")
+        # Generate exercises
+        try:
+            exercises_instructions = (
+                config.exercise_mode_instructions
+                .replace("{native_language}", metadata['native_language'])
+                .replace("{target_language}", metadata['target_language'])
+                .replace("{proficiency}", metadata['proficiency'])
+            )
+            exercises_response = await api_cache.get_or_set(
+                category="exercises",
+                key_text=lesson_context,
+                coro=generate_completions.get_completions,
+                context={
+                    'native_language': metadata['native_language'],
+                    'target_language': metadata['target_language'],
+                    'proficiency': metadata['proficiency'],
+                    'lesson_index': lesson_index
+                },
+                prompt=lesson_context,
+                instructions=exercises_instructions
+            )
+            # Save exercises
+            content_ids['exercises'] = await db.save_learning_content(
+                curriculum_id=curriculum_id,
+                content_type='exercises',
+                lesson_index=lesson_index,
+                lesson_topic=lesson_topic,
+                content=exercises_response
+            )
+        except Exception as e:
+            logger.error(f"Failed to generate exercises for lesson {lesson_index}: {e}")
+        # Generate simulation
+        try:
+            simulation_instructions = (
+                config.simulation_mode_instructions
+                .replace("{native_language}", metadata['native_language'])
+                .replace("{target_language}", metadata['target_language'])
+                .replace("{proficiency}", metadata['proficiency'])
+            )
+            simulation_response = await api_cache.get_or_set(
+                category="simulation",
+                key_text=lesson_context,
+                coro=generate_completions.get_completions,
+                context={
+                    'native_language': metadata['native_language'],
+                    'target_language': metadata['target_language'],
+                    'proficiency': metadata['proficiency'],
+                    'lesson_index': lesson_index
+                },
+                prompt=lesson_context,
+                instructions=simulation_instructions
+            )
+            # Save simulation
+            content_ids['simulation'] = await db.save_learning_content(
+                curriculum_id=curriculum_id,
+                content_type='simulation',
+                lesson_index=lesson_index,
+                lesson_topic=lesson_topic,
+                content=simulation_response
+            )
+        except Exception as e:
+            logger.error(f"Failed to generate simulation for lesson {lesson_index}: {e}")
+        return content
+    async def generate_all_content_for_curriculum(
+        self,
+        curriculum_id: str,
+        max_concurrent_lessons: int = 3
+    ):
+        """Generate all learning content for a curriculum"""
+        # Get curriculum details
+        curriculum_data = await db.get_curriculum(curriculum_id)
+        if not curriculum_data:
+            logger.error(f"Curriculum not found: {curriculum_id}")
+            return
+        # Parse curriculum JSON
+        try:
+            curriculum = json.loads(curriculum_data['curriculum_json'])
+            lessons = curriculum.get('sub_topics', [])
+        except json.JSONDecodeError:
+            logger.error(f"Failed to parse curriculum JSON for {curriculum_id}")
+            return
+        # Prepare metadata
+        metadata = {
+            'native_language': curriculum_data['native_language'],
+            'target_language': curriculum_data['target_language'],
+            'proficiency': curriculum_data['proficiency']
+        }
+        logger.info(f"Starting content generation for {len(lessons)} lessons")
+        # Process lessons in batches to avoid overwhelming the API
+        for i in range(0, len(lessons), max_concurrent_lessons):
+            batch = lessons[i:i + max_concurrent_lessons]
+            batch_indices = list(range(i, min(i + max_concurrent_lessons, len(lessons))))
+            # Generate content for batch concurrently
+            tasks = [
+                self.generate_content_for_lesson(
+                    curriculum_id=curriculum_id,
+                    lesson_index=idx,
+                    lesson=lesson,
+                    metadata=metadata
+                )
+                for idx, lesson in zip(batch_indices, batch)
+            ]
+            results = await asyncio.gather(*tasks, return_exceptions=True)
+            for idx, result in zip(batch_indices, results):
+                if isinstance(result, Exception):
+                    logger.error(f"Failed to generate content for lesson {idx}: {result}")
+                else:
+                    logger.info(f"Generated content for lesson {idx}: {result}")
+        # Mark curriculum as content generated
+        await db.mark_curriculum_content_generated(curriculum_id)
+        logger.info(f"Completed content generation for curriculum {curriculum_id}")
+    async def process_metadata_extraction(
+        self,
+        extraction_id: str,
+        query: str,
+        metadata: Dict[str, Any],
+        user_id: Optional[int] = None,
+        generate_content: bool = True
+    ) -> Dict[str, Any]:
+        """Process a metadata extraction by checking for existing curriculum or generating new one"""
+        # Check for existing curriculum first
+        existing_curriculum = await db.find_existing_curriculum(
+            query=query,
+            native_language=metadata['native_language'],
+            target_language=metadata['target_language'],
+            proficiency=metadata['proficiency'],
+            user_id=user_id
+        )
+        if existing_curriculum:
+            # If we found an exact match for this user, return it
+            if existing_curriculum.get('user_id') == user_id:
+                logger.info(f"Found existing curriculum for user {user_id}: {existing_curriculum['id']}")
+                return {
+                    'curriculum_id': existing_curriculum['id'],
+                    'content_generation_started': False,
+                    'cached': True,
+                    'cache_type': 'user_exact_match'
+                }
+            # If we found a similar curriculum from another user, copy it
+            elif existing_curriculum.get('is_content_generated') == 1:
+                logger.info(f"Copying existing curriculum {existing_curriculum['id']} for user {user_id}")
+                curriculum_id = await db.copy_curriculum_for_user(
+                    source_curriculum_id=existing_curriculum['id'],
+                    metadata_extraction_id=extraction_id,
+                    user_id=user_id
+                )
+                return {
+                    'curriculum_id': curriculum_id,
+                    'content_generation_started': False,
+                    'cached': True,
+                    'cache_type': 'copied_from_similar'
+                }
+        # No suitable existing curriculum found, generate new one
+        logger.info(f"No existing curriculum found, generating new one for user {user_id}")
+        curriculum_id = await self.generate_curriculum_from_metadata(
+            metadata_extraction_id=extraction_id,
+            query=query,
+            metadata=metadata,
+            user_id=user_id
+        )
+        result = {
+            'curriculum_id': curriculum_id,
+            'content_generation_started': False,
+            'cached': False,
+            'cache_type': 'newly_generated'
+        }
+        if generate_content:
+            # Start content generation in background
+            asyncio.create_task(self.generate_all_content_for_curriculum(curriculum_id))
+            result['content_generation_started'] = True
+        return result
+# Global content generator instance
+content_generator = ContentGenerator()

backend/database_init.py ADDED Viewed

	@@ -0,0 +1,65 @@

+#!/usr/bin/env python3
+"""
+Database initialization script for AI Language Tutor
+Run this script to create database tables
+"""
+import asyncio
+import sys
+import os
+# Add the project root to Python path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from backend.database import create_tables, drop_tables
+import logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+async def init_database():
+    """Initialize database tables"""
+    try:
+        logger.info("Creating database tables...")
+        await create_tables()
+        logger.info("Database tables created successfully!")
+    except Exception as e:
+        logger.error(f"Error creating database tables: {e}")
+        raise
+async def reset_database():
+    """Reset database (drop and recreate tables)"""
+    try:
+        logger.info("Dropping existing tables...")
+        await drop_tables()
+        logger.info("Creating new tables...")
+        await create_tables()
+        logger.info("Database reset successfully!")
+    except Exception as e:
+        logger.error(f"Error resetting database: {e}")
+        raise
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Database initialization for AI Language Tutor")
+    parser.add_argument(
+        "--reset",
+        action="store_true",
+        help="Reset database (drop and recreate tables)"
+    )
+    args = parser.parse_args()
+    if args.reset:
+        print("⚠️  WARNING: This will delete all existing data!")
+        confirm = input("Are you sure you want to reset the database? (yes/no): ")
+        if confirm.lower() == "yes":
+            asyncio.run(reset_database())
+        else:
+            print("Database reset cancelled.")
+    else:
+        asyncio.run(init_database())

backend/db.py ADDED Viewed

	@@ -0,0 +1,434 @@

+import aiosqlite
+import json
+import os
+from typing import Optional, List, Dict, Any
+from datetime import datetime
+import uuid
+import logging
+logger = logging.getLogger(__name__)
+# Database file path
+DB_PATH = os.getenv("DATABASE_PATH", "./ai_tutor.db")
+class Database:
+    """Pure SQLite database handler for AI Language Tutor"""
+    def __init__(self, db_path: str = DB_PATH):
+        self.db_path = db_path
+    async def initialize(self):
+        """Initialize database with schema"""
+        async with aiosqlite.connect(self.db_path) as db:
+            # Read and execute schema - look for it in parent directory
+            schema_path = os.path.join(os.path.dirname(__file__), 'schema.sql')
+            with open(schema_path, 'r') as f:
+                schema = f.read()
+            await db.executescript(schema)
+            await db.commit()
+            logger.info("Database initialized successfully")
+    async def find_existing_curriculum(
+        self,
+        query: str,
+        native_language: str,
+        target_language: str,
+        proficiency: str,
+        user_id: Optional[int] = None
+    ) -> Optional[Dict[str, Any]]:
+        """Find existing curriculum for similar query and metadata"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            if user_id is not None:
+                # User-specific search: First try to find exact query match for the user
+                async with db.execute("""
+                    SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
+                    FROM curricula c
+                    JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
+                    WHERE m.user_id = ? AND m.query = ? AND m.native_language = ?
+                    AND m.target_language = ? AND m.proficiency = ?
+                    ORDER BY c.created_at DESC
+                    LIMIT 1
+                """, (user_id, query, native_language, target_language, proficiency)) as cursor:
+                    row = await cursor.fetchone()
+                    if row:
+                        return dict(row)
+                # Then try to find similar curriculum with same metadata (any user)
+                async with db.execute("""
+                    SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
+                    FROM curricula c
+                    JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
+                    WHERE m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
+                    AND c.is_content_generated = 1
+                    ORDER BY c.created_at DESC
+                    LIMIT 1
+                """, (native_language, target_language, proficiency)) as cursor:
+                    row = await cursor.fetchone()
+                    if row:
+                        return dict(row)
+            else:
+                # User-independent search: Find exact query match regardless of user
+                async with db.execute("""
+                    SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title, m.query
+                    FROM curricula c
+                    JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
+                    WHERE m.query = ? AND m.native_language = ? AND m.target_language = ? AND m.proficiency = ?
+                    ORDER BY c.created_at DESC
+                    LIMIT 1
+                """, (query, native_language, target_language, proficiency)) as cursor:
+                    row = await cursor.fetchone()
+                    if row:
+                        return dict(row)
+        return None
+    async def save_metadata_extraction(
+        self,
+        query: str,
+        metadata: Dict[str, Any],
+        user_id: Optional[int] = None
+    ) -> str:
+        """Save extracted metadata and return extraction ID"""
+        extraction_id = str(uuid.uuid4())
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute("""
+                INSERT INTO metadata_extractions
+                (id, user_id, query, native_language, target_language, proficiency, title, description, metadata_json)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """, (
+                extraction_id,
+                user_id,
+                query,
+                metadata.get('native_language'),
+                metadata.get('target_language'),
+                metadata.get('proficiency'),
+                metadata.get('title'),
+                metadata.get('description'),
+                json.dumps(metadata)
+            ))
+            await db.commit()
+        logger.info(f"Saved metadata extraction: {extraction_id}")
+        return extraction_id
+    async def save_curriculum(
+        self,
+        metadata_extraction_id: str,
+        curriculum: Dict[str, Any],
+        user_id: Optional[int] = None
+    ) -> str:
+        """Save generated curriculum and return curriculum ID"""
+        curriculum_id = str(uuid.uuid4())
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute("""
+                INSERT INTO curricula
+                (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json)
+                VALUES (?, ?, ?, ?, ?)
+            """, (
+                curriculum_id,
+                metadata_extraction_id,
+                user_id,
+                curriculum.get('lesson_topic', ''),
+                json.dumps(curriculum)
+            ))
+            await db.commit()
+        logger.info(f"Saved curriculum: {curriculum_id}")
+        return curriculum_id
+    async def copy_curriculum_for_user(
+        self,
+        source_curriculum_id: str,
+        metadata_extraction_id: str,
+        user_id: Optional[int] = None
+    ) -> str:
+        """Copy an existing curriculum for a new user"""
+        new_curriculum_id = str(uuid.uuid4())
+        async with aiosqlite.connect(self.db_path) as db:
+            # Get source curriculum
+            async with db.execute("""
+                SELECT lesson_topic, curriculum_json FROM curricula WHERE id = ?
+            """, (source_curriculum_id,)) as cursor:
+                row = await cursor.fetchone()
+                if not row:
+                    raise ValueError(f"Source curriculum {source_curriculum_id} not found")
+                lesson_topic, curriculum_json = row
+            # Create new curriculum
+            await db.execute("""
+                INSERT INTO curricula
+                (id, metadata_extraction_id, user_id, lesson_topic, curriculum_json, is_content_generated)
+                VALUES (?, ?, ?, ?, ?, 0)
+            """, (
+                new_curriculum_id,
+                metadata_extraction_id,
+                user_id,
+                lesson_topic,
+                curriculum_json
+            ))
+            # Copy all learning content
+            await db.execute("""
+                INSERT INTO learning_content
+                (id, curriculum_id, content_type, lesson_index, lesson_topic, content_json)
+                SELECT
+                    lower(hex(randomblob(16))),
+                    ?,
+                    content_type,
+                    lesson_index,
+                    lesson_topic,
+                    content_json
+                FROM learning_content
+                WHERE curriculum_id = ?
+            """, (new_curriculum_id, source_curriculum_id))
+            # Mark as content generated
+            await db.execute("""
+                UPDATE curricula
+                SET is_content_generated = 1
+                WHERE id = ?
+            """, (new_curriculum_id,))
+            await db.commit()
+        logger.info(f"Copied curriculum {source_curriculum_id} to {new_curriculum_id} for user {user_id}")
+        return new_curriculum_id
+    async def save_learning_content(
+        self,
+        curriculum_id: str,
+        content_type: str,
+        lesson_index: int,
+        lesson_topic: str,
+        content: Any
+    ) -> str:
+        """Save learning content (flashcards, exercises, or simulation)"""
+        content_id = str(uuid.uuid4())
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute("""
+                INSERT INTO learning_content
+                (id, curriculum_id, content_type, lesson_index, lesson_topic, content_json)
+                VALUES (?, ?, ?, ?, ?, ?)
+            """, (
+                content_id,
+                curriculum_id,
+                content_type,
+                lesson_index,
+                lesson_topic,
+                json.dumps(content) if isinstance(content, (dict, list)) else content
+            ))
+            await db.commit()
+        logger.info(f"Saved {content_type} for lesson {lesson_index}")
+        return content_id
+    async def mark_curriculum_content_generated(self, curriculum_id: str):
+        """Mark curriculum as having all content generated"""
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute("""
+                UPDATE curricula
+                SET is_content_generated = 1
+                WHERE id = ?
+            """, (curriculum_id,))
+            await db.commit()
+    async def get_metadata_extraction(self, extraction_id: str) -> Optional[Dict[str, Any]]:
+        """Get metadata extraction by ID"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute("""
+                SELECT * FROM metadata_extractions WHERE id = ?
+            """, (extraction_id,)) as cursor:
+                row = await cursor.fetchone()
+                if row:
+                    return dict(row)
+        return None
+    async def get_curriculum(self, curriculum_id: str) -> Optional[Dict[str, Any]]:
+        """Get curriculum by ID"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute("""
+                SELECT c.*, m.native_language, m.target_language, m.proficiency
+                FROM curricula c
+                JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
+                WHERE c.id = ?
+            """, (curriculum_id,)) as cursor:
+                row = await cursor.fetchone()
+                if row:
+                    return dict(row)
+        return None
+    async def get_learning_content(
+        self,
+        curriculum_id: str,
+        content_type: Optional[str] = None,
+        lesson_index: Optional[int] = None
+    ) -> List[Dict[str, Any]]:
+        """Get learning content for a curriculum"""
+        query = "SELECT * FROM learning_content WHERE curriculum_id = ?"
+        params = [curriculum_id]
+        if content_type:
+            query += " AND content_type = ?"
+            params.append(content_type)
+        if lesson_index is not None:
+            query += " AND lesson_index = ?"
+            params.append(lesson_index)
+        query += " ORDER BY lesson_index"
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(query, params) as cursor:
+                rows = await cursor.fetchall()
+                return [dict(row) for row in rows]
+    async def get_user_metadata_extractions(
+        self,
+        user_id: int,
+        limit: int = 20
+    ) -> List[Dict[str, Any]]:
+        """Get user's metadata extraction history"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute("""
+                SELECT * FROM metadata_extractions
+                WHERE user_id = ?
+                ORDER BY created_at DESC
+                LIMIT ?
+            """, (user_id, limit)) as cursor:
+                rows = await cursor.fetchall()
+                return [dict(row) for row in rows]
+    async def get_user_curricula(
+        self,
+        user_id: int,
+        limit: int = 20
+    ) -> List[Dict[str, Any]]:
+        """Get user's curricula"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute("""
+                SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title
+                FROM curricula c
+                JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
+                WHERE c.user_id = ?
+                ORDER BY c.created_at DESC
+                LIMIT ?
+            """, (user_id, limit)) as cursor:
+                rows = await cursor.fetchall()
+                return [dict(row) for row in rows]
+    async def get_user_learning_journeys(
+        self,
+        user_id: int,
+        limit: int = 20
+    ) -> List[Dict[str, Any]]:
+        """Get user's complete learning journeys"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute("""
+                SELECT * FROM user_learning_journeys
+                WHERE user_id = ?
+                LIMIT ?
+            """, (user_id, limit)) as cursor:
+                rows = await cursor.fetchall()
+                return [dict(row) for row in rows]
+    async def get_curriculum_content_status(self, curriculum_id: str) -> Optional[Dict[str, Any]]:
+        """Get content generation status for a curriculum"""
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute("""
+                SELECT * FROM curriculum_content_status WHERE curriculum_id = ?
+            """, (curriculum_id,)) as cursor:
+                row = await cursor.fetchone()
+                if row:
+                    return dict(row)
+        return None
+    async def get_full_curriculum_details(self, curriculum_id: str, include_content: bool = True) -> Optional[Dict[str, Any]]:
+        """Get full curriculum details, optionally including all content."""
+        curriculum = await self.get_curriculum(curriculum_id)
+        if not curriculum:
+            return None
+        try:
+            curriculum_data = json.loads(curriculum['curriculum_json'])
+            lessons = curriculum_data.get('sub_topics', [])
+        except json.JSONDecodeError:
+            curriculum_data = {}
+            lessons = []
+        if include_content:
+            content_list = await self.get_learning_content(curriculum_id)
+            content_map = {}
+            for content in content_list:
+                lesson_index = content['lesson_index']
+                content_type = content['content_type']
+                if lesson_index not in content_map:
+                    content_map[lesson_index] = {}
+                try:
+                    parsed_content = json.loads(content['content_json'])
+                except json.JSONDecodeError:
+                    parsed_content = content['content_json']
+                content_map[lesson_index][content_type] = {
+                    "id": content['id'],
+                    "lesson_topic": content['lesson_topic'],
+                    "content": parsed_content,
+                    "created_at": content['created_at']
+                }
+            # Embed content into lessons
+            for i, lesson in enumerate(lessons):
+                lesson['content'] = content_map.get(i, {})
+        curriculum['curriculum'] = curriculum_data
+        del curriculum['curriculum_json']
+        return curriculum
+    async def search_curricula_by_languages(
+        self,
+        native_language: str,
+        target_language: str,
+        proficiency: Optional[str] = None,
+        limit: int = 10
+    ) -> List[Dict[str, Any]]:
+        """Search for existing curricula by language combination"""
+        query = """
+            SELECT c.*, m.native_language, m.target_language, m.proficiency, m.title
+            FROM curricula c
+            JOIN metadata_extractions m ON c.metadata_extraction_id = m.id
+            WHERE m.native_language = ? AND m.target_language = ?
+        """
+        params = [native_language, target_language]
+        if proficiency:
+            query += " AND m.proficiency = ?"
+            params.append(proficiency)
+        query += " ORDER BY c.created_at DESC LIMIT ?"
+        params.append(limit)
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(query, params) as cursor:
+                rows = await cursor.fetchall()
+                return [dict(row) for row in rows]
+# Global database instance
+db = Database()

backend/db_cache.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import aiosqlite
+import json
+import os
+from typing import Optional, Dict, Any, Callable, Union, List
+import logging
+import hashlib
+logger = logging.getLogger(__name__)
+DB_PATH = os.getenv("DATABASE_PATH", "./ai_tutor.db")
+class ApiCache:
+    """Generic caching service using a dedicated database table."""
+    def __init__(self, db_path: str = DB_PATH):
+        self.db_path = db_path
+    def _generate_hash(self, text: str) -> str:
+        """Generate a SHA256 hash for a given text."""
+        return hashlib.sha256(text.encode()).hexdigest()
+    def _generate_context_hash(self, key_text: str, **context) -> str:
+        """Generate a hash that includes context for better cache differentiation"""
+        # Create a consistent string from context
+        context_items = sorted(context.items())
+        context_str = "|".join([f"{k}:{v}" for k, v in context_items if v is not None])
+        full_key = f"{key_text}|{context_str}"
+        return hashlib.sha256(full_key.encode()).hexdigest()
+    async def get_or_set(
+        self,
+        category: str,
+        key_text: str,
+        coro: Callable,
+        *args,
+        context: Optional[Dict[str, Any]] = None,
+        **kwargs
+    ) -> Union[Dict[str, Any], List[Any], str]:
+        """
+        Get data from cache or execute a coroutine to generate and cache it.
+        Args:
+            category: The category of the cached item (e.g., 'metadata', 'flashcards').
+            key_text: The text to use for generating the cache key.
+            coro: The async function to call if the item is not in the cache.
+            *args: Positional arguments for the coroutine.
+            context: Additional context for cache key generation (e.g., language, proficiency).
+            **kwargs: Keyword arguments for the coroutine.
+        Returns:
+            The cached or newly generated content.
+        """
+        # Generate cache key with context if provided
+        if context:
+            cache_key = self._generate_context_hash(key_text, **context)
+        else:
+            cache_key = self._generate_hash(key_text)
+        # 1. Check cache
+        async with aiosqlite.connect(self.db_path) as db:
+            db.row_factory = aiosqlite.Row
+            async with db.execute(
+                "SELECT content_json FROM api_cache WHERE cache_key = ? AND category = ?",
+                (cache_key, category)
+            ) as cursor:
+                row = await cursor.fetchone()
+                if row:
+                    logger.info(f"Cache hit for {category} with key: {key_text[:50]}...")
+                    return json.loads(row['content_json'])
+        # 2. If miss, generate content
+        logger.info(f"Cache miss for {category}: {key_text[:50]}... Generating new content")
+        generated_content = await coro(*args, **kwargs)
+        # Ensure content is a JSON-serializable string
+        if isinstance(generated_content, (dict, list)):
+            content_to_cache = json.dumps(generated_content)
+        elif isinstance(generated_content, str):
+            # Try to parse string to ensure it's valid JSON, then dump it back
+            try:
+                parsed_json = json.loads(generated_content)
+                content_to_cache = json.dumps(parsed_json)
+            except json.JSONDecodeError:
+                # If it's not a JSON string, we can't cache it in this system.
+                # Depending on requirements, we might raise an error or just return it without caching.
+                logger.warning(f"Content for {category} is not valid JSON, returning without caching.")
+                return generated_content
+        else:
+            raise TypeError("Cached content must be a JSON string, dict, or list.")
+        # 3. Store in cache
+        async with aiosqlite.connect(self.db_path) as db:
+            await db.execute(
+                "INSERT INTO api_cache (cache_key, category, content_json) VALUES (?, ?, ?)",
+                (cache_key, category, content_to_cache)
+            )
+            await db.commit()
+            logger.info(f"Cached new content for {category} with key: {key_text[:50]}...")
+        return json.loads(content_to_cache)
+# Global API cache instance
+api_cache = ApiCache()

backend/db_init.py ADDED Viewed

	@@ -0,0 +1,259 @@

+"""
+Database Initialization Module
+Handles database creation, schema setup, and health checks
+"""
+import os
+import aiosqlite
+import logging
+from pathlib import Path
+from typing import Dict, Any, List
+logger = logging.getLogger(__name__)
+class DatabaseInitializer:
+    """Handles database initialization and health checks"""
+    def __init__(self, db_path: str = None):
+        self.db_path = db_path or os.getenv("DATABASE_PATH", "./ai_tutor.db")
+        self.schema_path = self._find_schema_file()
+    def _find_schema_file(self) -> str:
+        """Return the path to the schema.sql file.
+        The schema.sql file is expected to be in the same directory as this script.
+        """
+        schema_path = os.path.join(os.path.dirname(__file__), 'schema.sql')
+        if not os.path.exists(schema_path):
+            raise FileNotFoundError(f"schema.sql not found at {schema_path}")
+        return schema_path
+    async def check_database_exists(self) -> bool:
+        """Check if database file exists"""
+        return os.path.exists(self.db_path)
+    async def check_database_health(self) -> Dict[str, Any]:
+        """Comprehensive database health check"""
+        health_status = {
+            "database_exists": False,
+            "database_accessible": False,
+            "schema_loaded": False,
+            "tables_exist": False,
+            "views_exist": False,
+            "can_write": False,
+            "record_count": {},
+            "errors": []
+        }
+        try:
+            # Check if database file exists
+            health_status["database_exists"] = await self.check_database_exists()
+            if not health_status["database_exists"]:
+                health_status["errors"].append("Database file does not exist")
+                return health_status
+            # Try to connect to database
+            async with aiosqlite.connect(self.db_path) as db:
+                health_status["database_accessible"] = True
+                # Check if required tables exist
+                required_tables = ['metadata_extractions', 'curricula', 'learning_content', 'api_cache']
+                existing_tables = await self._get_existing_tables(db)
+                missing_tables = [table for table in required_tables if table not in existing_tables]
+                if missing_tables:
+                    health_status["errors"].append(f"Missing tables: {missing_tables}")
+                else:
+                    health_status["tables_exist"] = True
+                # Check if views exist
+                required_views = ['user_learning_journeys', 'curriculum_content_status']
+                existing_views = await self._get_existing_views(db)
+                missing_views = [view for view in required_views if view not in existing_views]
+                if missing_views:
+                    health_status["errors"].append(f"Missing views: {missing_views}")
+                else:
+                    health_status["views_exist"] = True
+                # Test write capability
+                try:
+                    await db.execute("CREATE TEMPORARY TABLE test_write (id INTEGER)")
+                    await db.execute("DROP TABLE test_write")
+                    health_status["can_write"] = True
+                except Exception as e:
+                    health_status["errors"].append(f"Cannot write to database: {str(e)}")
+                # Get record counts
+                if health_status["tables_exist"]:
+                    for table in required_tables:
+                        try:
+                            async with db.execute(f"SELECT COUNT(*) FROM {table}") as cursor:
+                                count = await cursor.fetchone()
+                                health_status["record_count"][table] = count[0] if count else 0
+                        except Exception as e:
+                            health_status["record_count"][table] = f"Error: {str(e)}"
+                health_status["schema_loaded"] = (
+                    health_status["tables_exist"] and
+                    health_status["views_exist"]
+                )
+        except Exception as e:
+            health_status["errors"].append(f"Database connection error: {str(e)}")
+        return health_status
+    async def _get_existing_tables(self, db: aiosqlite.Connection) -> List[str]:
+        """Get list of existing tables"""
+        async with db.execute("""
+            SELECT name FROM sqlite_master
+            WHERE type='table' AND name NOT LIKE 'sqlite_%'
+        """) as cursor:
+            rows = await cursor.fetchall()
+            return [row[0] for row in rows]
+    async def _get_existing_views(self, db: aiosqlite.Connection) -> List[str]:
+        """Get list of existing views"""
+        async with db.execute("""
+            SELECT name FROM sqlite_master
+            WHERE type='view'
+        """) as cursor:
+            rows = await cursor.fetchall()
+            return [row[0] for row in rows]
+    async def create_database(self) -> bool:
+        """Create database file and initialize with schema"""
+        try:
+            logger.info(f"Creating database at: {self.db_path}")
+            # Ensure directory exists
+            db_dir = os.path.dirname(self.db_path)
+            if db_dir and not os.path.exists(db_dir):
+                os.makedirs(db_dir, exist_ok=True)
+                logger.info(f"Created directory: {db_dir}")
+            # Create database and load schema
+            async with aiosqlite.connect(self.db_path) as db:
+                # Read schema file
+                with open(self.schema_path, 'r') as f:
+                    schema = f.read()
+                # Execute schema
+                await db.executescript(schema)
+                await db.commit()
+                logger.info("Database created and schema loaded successfully")
+                return True
+        except Exception as e:
+            logger.error(f"Error creating database: {str(e)}")
+            return False
+    async def initialize_database(self, force_recreate: bool = False) -> Dict[str, Any]:
+        """Initialize database with comprehensive checks and creation"""
+        result = {
+            "success": False,
+            "action_taken": "none",
+            "health_check": {},
+            "errors": []
+        }
+        try:
+            # Check current database health
+            health_check = await self.check_database_health()
+            result["health_check"] = health_check
+            # Determine if we need to create/recreate database
+            needs_creation = (
+                not health_check["database_exists"] or
+                not health_check["schema_loaded"] or
+                force_recreate
+            )
+            if needs_creation:
+                if health_check["database_exists"] and force_recreate:
+                    # Backup existing database
+                    backup_path = f"{self.db_path}.backup"
+                    if os.path.exists(self.db_path):
+                        os.rename(self.db_path, backup_path)
+                        logger.info(f"Backed up existing database to: {backup_path}")
+                        result["action_taken"] = "recreated_with_backup"
+                    else:
+                        result["action_taken"] = "force_recreated"
+                else:
+                    result["action_taken"] = "created"
+                # Create database
+                creation_success = await self.create_database()
+                if not creation_success:
+                    result["errors"].append("Failed to create database")
+                    return result
+                # Verify creation
+                final_health = await self.check_database_health()
+                result["health_check"] = final_health
+                if final_health["schema_loaded"] and final_health["can_write"]:
+                    result["success"] = True
+                    logger.info("Database initialization completed successfully")
+                else:
+                    result["errors"].append("Database created but health check failed")
+            else:
+                # Database exists and is healthy
+                result["success"] = True
+                result["action_taken"] = "already_exists"
+                logger.info("Database already exists and is healthy")
+        except Exception as e:
+            error_msg = f"Database initialization error: {str(e)}"
+            logger.error(error_msg)
+            result["errors"].append(error_msg)
+        return result
+    async def repair_database(self) -> Dict[str, Any]:
+        """Attempt to repair database issues"""
+        result = {
+            "success": False,
+            "repairs_attempted": [],
+            "errors": []
+        }
+        try:
+            health_check = await self.check_database_health()
+            if not health_check["database_exists"]:
+                # Database doesn't exist - create it
+                creation_result = await self.initialize_database()
+                result["repairs_attempted"].append("created_missing_database")
+                result["success"] = creation_result["success"]
+                result["errors"].extend(creation_result.get("errors", []))
+                return result
+            # Database exists but has issues
+            async with aiosqlite.connect(self.db_path) as db:
+                # Check and repair missing tables
+                if not health_check["tables_exist"]:
+                    with open(self.schema_path, 'r') as f:
+                        schema = f.read()
+                    await db.executescript(schema)
+                    await db.commit()
+                    result["repairs_attempted"].append("recreated_schema")
+                # Verify repair
+                final_health = await self.check_database_health()
+                result["success"] = final_health["schema_loaded"]
+        except Exception as e:
+            error_msg = f"Database repair error: {str(e)}"
+            logger.error(error_msg)
+            result["errors"].append(error_msg)
+        return result
+# Global instance
+db_initializer = DatabaseInitializer()

backend/main.py CHANGED Viewed

@@ -1,96 +1,451 @@
-from fastapi import FastAPI, HTTPException
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from backend.utils import generate_completions
-from backend.utils.handlers import handle_generation_request, INSTRUCTION_TEMPLATES
 from backend import config
 from typing import Union, List, Literal, Optional
 import logging
 import json
-from backend.cache import cache
 logging.basicConfig(level=logging.INFO)
-app = FastAPI()
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
-    allow_origins=["*"],  # Allows all origins
     allow_credentials=True,
-    allow_methods=["*"],  # Allows all methods
-    allow_headers=["*"],  # Allows all headers
 )
-class Message(BaseModel):
-    role: Literal["user", "assistant"]
-    content: str
 class GenerationRequest(BaseModel):
     user_id: int
-    query: Union[str, List[Message]]
     native_language: Optional[str] = None
     target_language: Optional[str] = None
     proficiency: Optional[str] = None
-class MetadataRequest(BaseModel):
-    query: str
 @app.get("/")
 async def root():
-    return {"message": "Welcome to the AI Learning Assistant API!"}
 @app.post("/extract/metadata")
 async def extract_metadata(data: MetadataRequest):
-    logging.info(f"Query: {data.query}")
     try:
-        response_str = await cache.get_or_set(
-            (str(data.query), config.language_metadata_extraction_prompt),
-            generate_completions.get_completions,
-            data.query,
-            config.language_metadata_extraction_prompt
         )
-        metadata_dict = json.loads(response_str)
         return JSONResponse(
             content={
-                "data": metadata_dict,
-                "type": "language_metadata",
-                "status": "success"
             },
-            status_code=200
         )
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
-@app.post("/generate/curriculum")
-async def generate_curriculum(data: GenerationRequest):
-    return await handle_generation_request(
-        data=data,
-        mode="curriculum",
-        instructions_template=INSTRUCTION_TEMPLATES["curriculum"]
     )
-@app.post("/generate/flashcards")
-async def generate_flashcards(data: GenerationRequest):
-    return await handle_generation_request(
-        data=data,
-        mode="flashcards",
-        instructions_template=INSTRUCTION_TEMPLATES["flashcards"]
     )
-@app.post("/generate/exercises")
-async def generate_exercises(data: GenerationRequest):
-    return await handle_generation_request(
-        data=data,
-        mode="exercises",
-        instructions_template=INSTRUCTION_TEMPLATES["exercises"]
     )
-@app.post("/generate/simulation")
-async def generate_simulation(data: GenerationRequest):
-    return await handle_generation_request(
-        data=data,
-        mode="simulation",
-        instructions_template=INSTRUCTION_TEMPLATES["simulation"]
-    )

+from fastapi import FastAPI, HTTPException, Query, Path
 from fastapi.responses import JSONResponse
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
 from backend.utils import generate_completions
 from backend import config
+from backend.db import db
+from backend.db_init import db_initializer
+from backend.content_generator import content_generator
+from backend.db_cache import api_cache
 from typing import Union, List, Literal, Optional
+from datetime import datetime
 import logging
 import json
 logging.basicConfig(level=logging.INFO)
+app = FastAPI(title="AI Language Tutor API", version="2.0.0")
 # Add CORS middleware
 app.add_middleware(
     CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
 )
+class MetadataRequest(BaseModel):
+    query: str
+    user_id: Optional[int] = None
 class GenerationRequest(BaseModel):
     user_id: int
+    query: Union[str, List[dict]]
     native_language: Optional[str] = None
     target_language: Optional[str] = None
     proficiency: Optional[str] = None
+@app.on_event("startup")
+async def startup_event():
+    """Initialize database on startup with comprehensive checks"""
+    logging.info("Starting database initialization...")
+    # Initialize database with health checks
+    init_result = await db_initializer.initialize_database()
+    if init_result["success"]:
+        logging.info(f"Database initialization successful: {init_result['action_taken']}")
+        # Log database statistics
+        health = init_result["health_check"]
+        if health.get("record_count"):
+            logging.info(f"Database records: {health['record_count']}")
+    else:
+        logging.error(f"Database initialization failed: {init_result['errors']}")
+        # Try to repair
+        logging.info("Attempting database repair...")
+        repair_result = await db_initializer.repair_database()
+        if repair_result["success"]:
+            logging.info("Database repair successful")
+        else:
+            logging.error(f"Database repair failed: {repair_result['errors']}")
+            raise RuntimeError("Failed to initialize database")
 @app.get("/")
 async def root():
+    return {"message": "Welcome to the AI Language Tutor API v2.0!"}
+@app.get("/health")
+async def health_check():
+    """Comprehensive health check including database status"""
+    try:
+        # Check database health
+        db_health = await db_initializer.check_database_health()
+        # Overall health status
+        is_healthy = (
+            db_health["database_exists"] and
+            db_health["schema_loaded"] and
+            db_health["can_write"]
+        )
+        return JSONResponse(
+            content={
+                "status": "healthy" if is_healthy else "unhealthy",
+                "api_version": "2.0.0",
+                "database": db_health,
+                "timestamp": datetime.now().isoformat()
+            },
+            status_code=200 if is_healthy else 503
+        )
+    except Exception as e:
+        return JSONResponse(
+            content={
+                "status": "error",
+                "error": str(e),
+                "timestamp": datetime.now().isoformat()
+            },
+            status_code=500
+        )
+@app.post("/admin/database/repair")
+async def repair_database():
+    """Repair database issues (admin endpoint)"""
+    try:
+        # repair_result = await db.repair_database() # This method doesn't exist on the Database class
+        return JSONResponse(
+            content={
+                "success": repair_result["success"],
+                "repairs_attempted": repair_result["repairs_attempted"],
+                "errors": repair_result["errors"],
+                "timestamp": datetime.now().isoformat()
+            },
+            status_code=200 if repair_result["success"] else 500
+        )
+    except Exception as e:
+        return JSONResponse(
+            content={
+                "success": False,
+                "error": str(e),
+                "timestamp": datetime.now().isoformat()
+            },
+            status_code=500
+        )
+@app.post("/admin/database/recreate")
+async def recreate_database():
+    """Recreate database from scratch (admin endpoint)"""
+    try:
+        init_result = await db_initializer.initialize_database(force_recreate=True)
+        return JSONResponse(
+            content={
+                "success": init_result["success"],
+                "action_taken": init_result["action_taken"],
+                "health_check": init_result["health_check"],
+                "errors": init_result["errors"],
+                "timestamp": datetime.now().isoformat()
+            },
+            status_code=200 if init_result["success"] else 500
+        )
+    except Exception as e:
+        return JSONResponse(
+            content={
+                "success": False,
+                "error": str(e),
+                "timestamp": datetime.now().isoformat()
+            },
+            status_code=500
+        )
+# ========== POST ENDPOINTS (Generation) ==========
 @app.post("/extract/metadata")
 async def extract_metadata(data: MetadataRequest):
+    """Extract language learning metadata from user query"""
+    logging.info(f"Extracting metadata for query: {data.query[:50]}...")
     try:
+        # Generate metadata using AI, with caching
+        metadata_dict = await api_cache.get_or_set(
+            category="metadata",
+            key_text=data.query,
+            coro=generate_completions.get_completions,
+            prompt=data.query,
+            instructions=config.language_metadata_extraction_prompt
         )
+        # Check for existing curriculum first before creating new metadata extraction
+        existing_curriculum = await db.find_existing_curriculum(
+            query=data.query,
+            native_language=metadata_dict['native_language'],
+            target_language=metadata_dict['target_language'],
+            proficiency=metadata_dict['proficiency'],
+            user_id=None  # Make it user-independent
+        )
+        if existing_curriculum:
+            # Found existing curriculum - return it regardless of user
+            logging.info(f"Found existing curriculum for query '{data.query[:50]}...': {existing_curriculum['id']}")
+            return JSONResponse(
+                content={
+                    "message": "Found existing curriculum for your query.",
+                    "curriculum_id": existing_curriculum['id'],
+                    "status_endpoint": f"/content/status/{existing_curriculum['id']}",
+                    "cached": True
+                },
+                status_code=200
+            )
+        # No suitable existing curriculum found, generate new one
+        logging.info(f"No existing curriculum found, generating new one for user {data.user_id}")
+        # Save metadata to database
+        extraction_id = await db.save_metadata_extraction(
+            query=data.query,
+            metadata=metadata_dict,
+            user_id=data.user_id
+        )
+        # Process extraction (generate curriculum and start content generation)
+        processing_result = await content_generator.process_metadata_extraction(
+            extraction_id=extraction_id,
+            query=data.query,
+            metadata=metadata_dict,
+            user_id=data.user_id,
+            generate_content=True  # Automatically generate all content
+        )
+        curriculum_id = processing_result['curriculum_id']
         return JSONResponse(
             content={
+                "message": "Content generation has been initiated.",
+                "curriculum_id": curriculum_id,
+                "status_endpoint": f"/content/status/{curriculum_id}",
+                "cached": False
             },
+            status_code=202
         )
     except Exception as e:
+        logging.error(f"Error extracting metadata: {e}")
         raise HTTPException(status_code=500, detail=str(e))
+# ========== GET ENDPOINTS (Retrieval) ==========
+@app.get("/curriculum/{curriculum_id}/metadata")
+async def get_curriculum_metadata(curriculum_id: str = Path(..., description="Curriculum ID")):
+    """Get metadata for a curriculum"""
+    curriculum = await db.get_curriculum(curriculum_id)
+    if not curriculum:
+        raise HTTPException(status_code=404, detail="Curriculum not found")
+    # Get the full metadata extraction record
+    extraction = await db.get_metadata_extraction(curriculum['metadata_extraction_id'])
+    if not extraction:
+        raise HTTPException(status_code=404, detail="Metadata extraction not found")
+    # Parse JSON fields
+    extraction['metadata'] = json.loads(extraction['metadata_json'])
+    del extraction['metadata_json']
+    return JSONResponse(content=extraction, status_code=200)
+@app.get("/curriculum/{curriculum_id}")
+async def get_curriculum(curriculum_id: str = Path(..., description="Curriculum ID")):
+    """Get curriculum by ID"""
+    curriculum = await db.get_full_curriculum_details(curriculum_id, include_content=False)
+    if not curriculum:
+        raise HTTPException(status_code=404, detail="Curriculum not found")
+    # Get content generation status
+    status = await db.get_curriculum_content_status(curriculum_id)
+    if status:
+        curriculum['content_status'] = status
+    return JSONResponse(content=curriculum, status_code=200)
+async def _get_lesson_content_by_type(
+    curriculum_id: str,
+    lesson_index: int,
+    content_type: str
+):
+    """Helper to get specific content type for a lesson"""
+    content_list = await db.get_learning_content(
+        curriculum_id=curriculum_id,
+        lesson_index=lesson_index,
+        content_type=content_type
     )
+    if not content_list:
+        raise HTTPException(
+            status_code=404,
+            detail=f"{content_type.capitalize()} content not found for lesson {lesson_index}"
+        )
+    # Assuming one content item per type per lesson
+    content = content_list[0]
+    try:
+        parsed_content = json.loads(content['content_json'])
+    except json.JSONDecodeError:
+        parsed_content = content['content_json']
+    return JSONResponse(
+        content={
+            "curriculum_id": curriculum_id,
+            "lesson_index": lesson_index,
+            "content_type": content_type,
+            "id": content['id'],
+            "lesson_topic": content['lesson_topic'],
+            "content": parsed_content,
+            "created_at": content['created_at']
+        },
+        status_code=200
     )
+@app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/flashcards")
+async def get_lesson_flashcards(
+    curriculum_id: str = Path(..., description="Curriculum ID"),
+    lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)")
+):
+    """Get flashcards for a specific lesson"""
+    return await _get_lesson_content_by_type(curriculum_id, lesson_index, "flashcards")
+@app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/exercises")
+async def get_lesson_exercises(
+    curriculum_id: str = Path(..., description="Curriculum ID"),
+    lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)")
+):
+    """Get exercises for a specific lesson"""
+    return await _get_lesson_content_by_type(curriculum_id, lesson_index, "exercises")
+@app.get("/curriculum/{curriculum_id}/lesson/{lesson_index}/simulation")
+async def get_lesson_simulation(
+    curriculum_id: str = Path(..., description="Curriculum ID"),
+    lesson_index: int = Path(..., ge=0, le=24, description="Lesson index (0-24)")
+):
+    """Get simulation for a specific lesson"""
+    return await _get_lesson_content_by_type(curriculum_id, lesson_index, "simulation")
+@app.get("/user/{user_id}/metadata")
+async def get_user_metadata_history(
+    user_id: int = Path(..., description="User ID"),
+    limit: int = Query(20, ge=1, le=100, description="Maximum number of results")
+):
+    """Get user's metadata extraction history"""
+    extractions = await db.get_user_metadata_extractions(user_id, limit)
+    # Parse JSON fields
+    for extraction in extractions:
+        extraction['metadata'] = json.loads(extraction['metadata_json'])
+        del extraction['metadata_json']
+    return JSONResponse(
+        content={
+            "user_id": user_id,
+            "extractions": extractions,
+            "total": len(extractions)
+        },
+        status_code=200
+    )
+@app.get("/user/{user_id}/curricula")
+async def get_user_curricula(
+    user_id: int = Path(..., description="User ID"),
+    limit: int = Query(20, ge=1, le=100, description="Maximum number of results")
+):
+    """Get user's curricula"""
+    curricula = await db.get_user_curricula(user_id, limit)
+    # Parse JSON fields and get content status
+    for curriculum in curricula:
+        curriculum['curriculum'] = json.loads(curriculum['curriculum_json'])
+        del curriculum['curriculum_json']
+        # Get content status
+        status = await db.get_curriculum_content_status(curriculum['id'])
+        if status:
+            curriculum['content_status'] = status
+    return JSONResponse(
+        content={
+            "user_id": user_id,
+            "curricula": curricula,
+            "total": len(curricula)
+        },
+        status_code=200
+    )
+@app.get("/user/{user_id}/journeys")
+async def get_user_learning_journeys(
+    user_id: int = Path(..., description="User ID"),
+    limit: int = Query(20, ge=1, le=100, description="Maximum number of results")
+):
+    """Get user's complete learning journeys (metadata + curriculum info)"""
+    journeys = await db.get_user_learning_journeys(user_id, limit)
+    return JSONResponse(
+        content={
+            "user_id": user_id,
+            "journeys": journeys,
+            "total": len(journeys)
+        },
+        status_code=200
+    )
+@app.get("/search/curricula")
+async def search_curricula(
+    native_language: str = Query(..., description="Native language"),
+    target_language: str = Query(..., description="Target language"),
+    proficiency: Optional[str] = Query(None, description="Proficiency level"),
+    limit: int = Query(10, ge=1, le=50, description="Maximum number of results")
+):
+    """Search for existing curricula by language combination"""
+    curricula = await db.search_curricula_by_languages(
+        native_language=native_language,
+        target_language=target_language,
+        proficiency=proficiency,
+        limit=limit
+    )
+    # Parse JSON fields
+    for curriculum in curricula:
+        curriculum['curriculum'] = json.loads(curriculum['curriculum_json'])
+        del curriculum['curriculum_json']
+    return JSONResponse(
+        content={
+            "search_params": {
+                "native_language": native_language,
+                "target_language": target_language,
+                "proficiency": proficiency
+            },
+            "curricula": curricula,
+            "total": len(curricula)
+        },
+        status_code=200
+    )
+@app.get("/content/status/{curriculum_id}")
+async def get_content_generation_status(
+    curriculum_id: str = Path(..., description="Curriculum ID")
+):
+    """Check content generation status for a curriculum"""
+    status = await db.get_curriculum_content_status(curriculum_id)
+    if not status:
+        raise HTTPException(status_code=404, detail="Curriculum not found")
+    # Calculate completion percentage
+    total_lessons = 25
+    total_content_types = 3  # flashcards, exercises, simulation
+    total_expected = total_lessons * total_content_types
+    total_generated = (
+        status['lessons_with_flashcards'] +
+        status['lessons_with_exercises'] +
+        status['lessons_with_simulations']
+    )
+    completion_percentage = (total_generated / total_expected) * 100 if total_expected > 0 else 0
+    return JSONResponse(
+        content={
+            "curriculum_id": curriculum_id,
+            "status": status,
+            "completion_percentage": round(completion_percentage, 2),
+            "is_complete": completion_percentage >= 100
+        },
+        status_code=200
     )

backend/schema.sql ADDED Viewed

	@@ -0,0 +1,98 @@

+-- AI Language Tutor Database Schema
+-- Table for storing extracted metadata from user queries
+CREATE TABLE IF NOT EXISTS metadata_extractions (
+    id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
+    user_id INTEGER,
+    query TEXT NOT NULL,
+    native_language TEXT,
+    target_language TEXT,
+    proficiency TEXT CHECK(proficiency IN ('beginner', 'intermediate', 'advanced')),
+    title TEXT,
+    description TEXT,
+    metadata_json TEXT NOT NULL, -- Full JSON response
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+);
+-- Index for user queries
+CREATE INDEX IF NOT EXISTS idx_metadata_user_id ON metadata_extractions(user_id);
+CREATE INDEX IF NOT EXISTS idx_metadata_languages ON metadata_extractions(native_language, target_language);
+-- Table for storing generated curricula
+CREATE TABLE IF NOT EXISTS curricula (
+    id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
+    metadata_extraction_id TEXT NOT NULL,
+    user_id INTEGER,
+    lesson_topic TEXT,
+    curriculum_json TEXT NOT NULL, -- Full curriculum JSON with 25 lessons
+    is_content_generated INTEGER DEFAULT 0, -- Boolean: has all content been generated?
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (metadata_extraction_id) REFERENCES metadata_extractions(id) ON DELETE CASCADE
+);
+-- Index for curriculum lookups
+CREATE INDEX IF NOT EXISTS idx_curricula_metadata_id ON curricula(metadata_extraction_id);
+CREATE INDEX IF NOT EXISTS idx_curricula_user_id ON curricula(user_id);
+-- Table for storing all types of learning content
+CREATE TABLE IF NOT EXISTS learning_content (
+    id TEXT PRIMARY KEY DEFAULT (lower(hex(randomblob(16)))),
+    curriculum_id TEXT NOT NULL,
+    content_type TEXT NOT NULL CHECK(content_type IN ('flashcards', 'exercises', 'simulation')),
+    lesson_index INTEGER NOT NULL CHECK(lesson_index >= 0 AND lesson_index < 25),
+    lesson_topic TEXT,
+    content_json TEXT NOT NULL, -- The actual generated content
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    FOREIGN KEY (curriculum_id) REFERENCES curricula(id) ON DELETE CASCADE
+);
+-- Index for content lookups
+CREATE INDEX IF NOT EXISTS idx_content_curriculum_id ON learning_content(curriculum_id);
+CREATE INDEX IF NOT EXISTS idx_content_type ON learning_content(content_type);
+CREATE INDEX IF NOT EXISTS idx_content_lesson ON learning_content(curriculum_id, lesson_index);
+-- View for easy access to user's learning journeys
+CREATE VIEW IF NOT EXISTS user_learning_journeys AS
+SELECT
+    m.id as metadata_id,
+    m.user_id,
+    m.query,
+    m.native_language,
+    m.target_language,
+    m.proficiency,
+    m.title,
+    m.description,
+    c.id as curriculum_id,
+    c.lesson_topic,
+    c.is_content_generated,
+    m.created_at
+FROM metadata_extractions m
+LEFT JOIN curricula c ON m.id = c.metadata_extraction_id
+ORDER BY m.created_at DESC;
+-- View for content availability per curriculum
+CREATE VIEW IF NOT EXISTS curriculum_content_status AS
+SELECT
+    c.id as curriculum_id,
+    c.user_id,
+    c.lesson_topic,
+    COUNT(DISTINCT lc.lesson_index) as lessons_with_content,
+    COUNT(DISTINCT CASE WHEN lc.content_type = 'flashcards' THEN lc.lesson_index END) as lessons_with_flashcards,
+    COUNT(DISTINCT CASE WHEN lc.content_type = 'exercises' THEN lc.lesson_index END) as lessons_with_exercises,
+    COUNT(DISTINCT CASE WHEN lc.content_type = 'simulation' THEN lc.lesson_index END) as lessons_with_simulations,
+    c.created_at
+FROM curricula c
+LEFT JOIN learning_content lc ON c.id = lc.curriculum_id
+GROUP BY c.id;
+-- Generic cache for API responses to reduce redundant AI calls
+CREATE TABLE IF NOT EXISTS api_cache (
+    cache_key TEXT NOT NULL,
+    category TEXT NOT NULL,
+    content_json TEXT NOT NULL,
+    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+    PRIMARY KEY (cache_key, category)
+);
+-- Index for faster cache lookups
+CREATE INDEX IF NOT EXISTS idx_api_cache_key_category ON api_cache(cache_key, category);

backend/utils/__pycache__/generate_completions.cpython-310.pyc DELETED Viewed

Binary file (2.55 kB)

backend/utils/__pycache__/generate_completions.cpython-311.pyc ADDED Viewed

Binary file (4.54 kB). View file

backend/utils/__pycache__/generate_completions.cpython-312.pyc CHANGED Viewed

Binary files a/backend/utils/__pycache__/generate_completions.cpython-312.pyc and b/backend/utils/__pycache__/generate_completions.cpython-312.pyc differ

backend/utils/__pycache__/handlers.cpython-310.pyc DELETED Viewed

Binary file (1.94 kB)

backend/utils/__pycache__/handlers.cpython-312.pyc CHANGED Viewed

Binary files a/backend/utils/__pycache__/handlers.cpython-312.pyc and b/backend/utils/__pycache__/handlers.cpython-312.pyc differ

backend/utils/generate_completions.py CHANGED Viewed

@@ -6,6 +6,7 @@ from typing import Union, List, Dict, Literal
 from dotenv import load_dotenv
 import os
 from pydantic import BaseModel
 load_dotenv()
 # Initialize the async client
@@ -71,9 +72,8 @@ async def get_completions(
     else:
         raise TypeError("Unexpected processed input type.")
-    # print(os.getenv("MODEL"))
     response = await client.chat.completions.create(
-        model=os.getenv("MODEL"),
         messages=messages,
         response_format={"type": "json_object"}
     )

 from dotenv import load_dotenv
 import os
 from pydantic import BaseModel
 load_dotenv()
 # Initialize the async client
     else:
         raise TypeError("Unexpected processed input type.")
     response = await client.chat.completions.create(
+        model=os.getenv("MODEL", "gemini-2.0-flash"),
         messages=messages,
         response_format={"type": "json_object"}
     )

backend/utils/handlers.py CHANGED Viewed

@@ -2,7 +2,7 @@ from fastapi import HTTPException
 from fastapi.responses import JSONResponse
 from typing import Callable, Dict, Any
 from backend import config
-from backend.cache import cache
 from backend.utils import generate_completions
 async def handle_generation_request(
@@ -39,19 +39,30 @@ async def handle_generation_request(
         .replace("{proficiency}", data.proficiency)
     )
-    # Get response from cache or generate new
-    response = await cache.get_or_set(
-        (str(data.query), instructions),
-        generate_completions.get_completions,
         data.query,
         instructions
     )
     return JSONResponse(
         content={
             "data": response,
             "type": mode,
-            "status": "success"
         },
         status_code=200
     )
@@ -62,4 +73,4 @@ INSTRUCTION_TEMPLATES: Dict[str, str] = {
     "flashcards": config.flashcard_mode_instructions,
     "exercises": config.exercise_mode_instructions,
     "simulation": config.simulation_mode_instructions
-}

 from fastapi.responses import JSONResponse
 from typing import Callable, Dict, Any
 from backend import config
+from backend.content_generator import content_generator
 from backend.utils import generate_completions
 async def handle_generation_request(
         .replace("{proficiency}", data.proficiency)
     )
+    # Generate new content
+    response = await generate_completions.get_completions(
         data.query,
         instructions
     )
+    # Save generated content to database
+    content_id = await content_generator.save_content(
+        query=str(data.query),
+        content=response,
+        content_type=mode,
+        user_id=data.user_id,
+        native_language=data.native_language,
+        target_language=data.target_language,
+        proficiency=data.proficiency
+    )
     return JSONResponse(
         content={
             "data": response,
             "type": mode,
+            "status": "success",
+            "content_id": content_id,
+            "saved": True
         },
         status_code=200
     )
     "flashcards": config.flashcard_mode_instructions,
     "exercises": config.exercise_mode_instructions,
     "simulation": config.simulation_mode_instructions
+}