Spaces:

ABAO77
/

Run_code_api

Sleeping

File size: 6,567 Bytes

from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from src.config.llm import model
from src.utils.logger import logger
from .prompt import evaluation_prompt
from langgraph.checkpoint.memory import InMemorySaver
from typing import List, Dict, Any
from src.agents.role_play.flow import role_play_agent


# Define the structured output format
class ResponseFormatter(BaseModel):
    """Structured output format for conversation evaluation"""

    score: int = Field(
        ..., description="Overall conversation score out of 100", ge=0, le=100
    )
    feedback: str = Field(..., description="Overall feedback summary")
    strengths: List[str] = Field(..., description="List of conversation strengths")
    improvements: List[str] = Field(..., description="List of areas for improvement")
    suggestions: List[str] = Field(
        ..., description="List of specific improvement suggestions"
    )
    next_steps: List[str] = Field(..., description="List of recommended next steps")
    words_used: List[str] = Field(..., description="List of key words used from the scenario")
    perfect_response: str = Field(..., description="An example of a perfect response for this scenario")
    impressive_words: List[str] = Field(..., description="List of impressive or advanced words used by the learner")


# Create the prompt template


async def evaluate_conversation(
    session_id: str,
    learner_level: str = "beginner",
    scenario_title: str = "",
    scenario_description: str = "",
    key_vocabulary: str = "",
) -> Dict[str, Any]:
    """
    Evaluate a conversation based on the session ID and provide feedback.

    Args:
        session_id: The thread ID for the conversation
        learner_level: The English level of the learner
        scenario_title: Title of the conversation scenario
        scenario_description: Description of the conversation scenario
        key_vocabulary: Key vocabulary words from the scenario

    Returns:
        Dict containing evaluation results including score and feedback
    """
    logger.info(f"Evaluating conversation for session_id: {session_id}")
    config = {"configurable": {"thread_id": session_id}}
    snapshot = await role_play_agent().aget_state(config)
    messages = snapshot.values.get("messages", [])
    if not messages:
        return {
            "score": 0,
            "feedback": "No conversation found for this session.",
            "strengths": [],
            "improvements": [],
            "suggestions": [],
            "next_steps": [],
        }

    evaluation_prompt_template = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """# CONVERSATION EVALUATOR - English Learning Assessment Specialist

You are **WISE Evaluator**, an expert English tutor who analyzes conversations between learners and AI roleplay partners. Your job is to provide comprehensive feedback that helps learners improve.

## Evaluation Context
- **Session ID**: {session_id}
- **Learner Level**: {learner_level}
- **Scenario**: {scenario_title} - {scenario_description}
- **Key Vocabulary**: {key_vocabulary}

## Your Evaluation Mission
1. **Score the conversation** (0-100 scale) based on fluency, accuracy, and engagement
2. **Identify strengths** - What did the learner do well?
3. **Pinpoint areas for improvement** - Where can they get better?
4. **Provide specific suggestions** - Concrete actions for improvement
5. **Recommend next steps** - What should they practice next?

## Scoring Criteria

### Fluency (30 points)
- **Flow**: How naturally does the conversation progress?
- **Response time**: Are there appropriate pauses or unnatural delays?
- **Turn-taking**: Good balance of speaking between learner and AI?

### Accuracy (30 points)
- **Grammar**: Correct sentence structures and verb forms
- **Vocabulary**: Appropriate word choices and usage
- **Pronunciation**: (If audio available) Clear pronunciation of words

### Engagement (20 points)
- **Relevance**: Staying on topic and scenario context
- **Interaction**: Active participation and questions
- **Creativity**: Bringing personal experiences or unique responses

### Vocabulary Usage (20 points)
- **Range**: Using diverse vocabulary from the scenario
- **Accuracy**: Correct usage of key vocabulary words
- **Complexity**: Appropriate challenge level for learner

## Response Format Requirements

You must provide your response in the following structured format:

### SCORE: [X/100]
Provide a single overall score out of 100.

### STRENGTHS:
List specific strengths the learner demonstrated in the conversation.

### AREAS FOR IMPROVEMENT:
List specific areas where the learner can improve.

### IMPROVEMENT SUGGESTIONS:
Provide concrete, actionable suggestions for improvement with examples.

### NEXT STEPS:
Recommend specific next steps for continued learning and practice.

## Important Guidelines:
- **Be encouraging**: Focus on growth, not just mistakes
- **Be specific**: Give concrete examples, not vague advice
- **Be appropriate**: Match feedback complexity to learner level
- **Be actionable**: Every suggestion should be something they can practice
- **Use markdown**: Structure feedback clearly with headers and bullet points

Remember: Your goal is to help learners feel motivated while giving them clear paths to improvement. Balance honest feedback with positive reinforcement.
""",
            ),
            ("placeholder", "{messages}"),
        ]
    )
    chain = evaluation_prompt_template | model.with_structured_output(ResponseFormatter)

    # Call the LLM with the formatted prompt
    structured_output: ResponseFormatter = await chain.ainvoke(
        {
            "session_id": session_id,
            "learner_level": learner_level,
            "scenario_title": scenario_title,
            "scenario_description": scenario_description,
            "key_vocabulary": key_vocabulary,
            "messages": messages,
        }
    )

    # Convert structured output to dictionary
    result = {
        "score": structured_output.score,
        "feedback": structured_output.feedback,
        "strengths": structured_output.strengths,
        "improvements": structured_output.improvements,
        "suggestions": structured_output.suggestions,
        "next_steps": structured_output.next_steps,
        "words_used": structured_output.words_used,
        "perfect_response": structured_output.perfect_response,
        "impressive_words": structured_output.impressive_words,
    }

    return result