Spaces:
Sleeping
Sleeping
| from langchain_core.prompts import ChatPromptTemplate | |
| from pydantic import BaseModel, Field | |
| from src.config.llm import model | |
| from src.utils.logger import logger | |
| from .prompt import evaluation_prompt | |
| from langgraph.checkpoint.memory import InMemorySaver | |
| from typing import List, Dict, Any | |
| from src.agents.role_play.flow import role_play_agent | |
| # Define the structured output format | |
| class ResponseFormatter(BaseModel): | |
| """Structured output format for conversation evaluation""" | |
| score: int = Field( | |
| ..., description="Overall conversation score out of 100", ge=0, le=100 | |
| ) | |
| feedback: str = Field(..., description="Overall feedback summary") | |
| strengths: List[str] = Field(..., description="List of conversation strengths") | |
| improvements: List[str] = Field(..., description="List of areas for improvement") | |
| suggestions: List[str] = Field( | |
| ..., description="List of specific improvement suggestions" | |
| ) | |
| next_steps: List[str] = Field(..., description="List of recommended next steps") | |
| words_used: List[str] = Field(..., description="List of key words used from the scenario") | |
| perfect_response: str = Field(..., description="An example of a perfect response for this scenario") | |
| impressive_words: List[str] = Field(..., description="List of impressive or advanced words used by the learner") | |
| # Create the prompt template | |
| async def evaluate_conversation( | |
| session_id: str, | |
| learner_level: str = "beginner", | |
| scenario_title: str = "", | |
| scenario_description: str = "", | |
| key_vocabulary: str = "", | |
| ) -> Dict[str, Any]: | |
| """ | |
| Evaluate a conversation based on the session ID and provide feedback. | |
| Args: | |
| session_id: The thread ID for the conversation | |
| learner_level: The English level of the learner | |
| scenario_title: Title of the conversation scenario | |
| scenario_description: Description of the conversation scenario | |
| key_vocabulary: Key vocabulary words from the scenario | |
| Returns: | |
| Dict containing evaluation results including score and feedback | |
| """ | |
| logger.info(f"Evaluating conversation for session_id: {session_id}") | |
| config = {"configurable": {"thread_id": session_id}} | |
| snapshot = await role_play_agent().aget_state(config) | |
| messages = snapshot.values.get("messages", []) | |
| if not messages: | |
| return { | |
| "score": 0, | |
| "feedback": "No conversation found for this session.", | |
| "strengths": [], | |
| "improvements": [], | |
| "suggestions": [], | |
| "next_steps": [], | |
| } | |
| evaluation_prompt_template = ChatPromptTemplate.from_messages( | |
| [ | |
| ( | |
| "system", | |
| """# CONVERSATION EVALUATOR - English Learning Assessment Specialist | |
| You are **WISE Evaluator**, an expert English tutor who analyzes conversations between learners and AI roleplay partners. Your job is to provide comprehensive feedback that helps learners improve. | |
| ## Evaluation Context | |
| - **Session ID**: {session_id} | |
| - **Learner Level**: {learner_level} | |
| - **Scenario**: {scenario_title} - {scenario_description} | |
| - **Key Vocabulary**: {key_vocabulary} | |
| ## Your Evaluation Mission | |
| 1. **Score the conversation** (0-100 scale) based on fluency, accuracy, and engagement | |
| 2. **Identify strengths** - What did the learner do well? | |
| 3. **Pinpoint areas for improvement** - Where can they get better? | |
| 4. **Provide specific suggestions** - Concrete actions for improvement | |
| 5. **Recommend next steps** - What should they practice next? | |
| ## Scoring Criteria | |
| ### Fluency (30 points) | |
| - **Flow**: How naturally does the conversation progress? | |
| - **Response time**: Are there appropriate pauses or unnatural delays? | |
| - **Turn-taking**: Good balance of speaking between learner and AI? | |
| ### Accuracy (30 points) | |
| - **Grammar**: Correct sentence structures and verb forms | |
| - **Vocabulary**: Appropriate word choices and usage | |
| - **Pronunciation**: (If audio available) Clear pronunciation of words | |
| ### Engagement (20 points) | |
| - **Relevance**: Staying on topic and scenario context | |
| - **Interaction**: Active participation and questions | |
| - **Creativity**: Bringing personal experiences or unique responses | |
| ### Vocabulary Usage (20 points) | |
| - **Range**: Using diverse vocabulary from the scenario | |
| - **Accuracy**: Correct usage of key vocabulary words | |
| - **Complexity**: Appropriate challenge level for learner | |
| ## Response Format Requirements | |
| You must provide your response in the following structured format: | |
| ### SCORE: [X/100] | |
| Provide a single overall score out of 100. | |
| ### STRENGTHS: | |
| List specific strengths the learner demonstrated in the conversation. | |
| ### AREAS FOR IMPROVEMENT: | |
| List specific areas where the learner can improve. | |
| ### IMPROVEMENT SUGGESTIONS: | |
| Provide concrete, actionable suggestions for improvement with examples. | |
| ### NEXT STEPS: | |
| Recommend specific next steps for continued learning and practice. | |
| ## Important Guidelines: | |
| - **Be encouraging**: Focus on growth, not just mistakes | |
| - **Be specific**: Give concrete examples, not vague advice | |
| - **Be appropriate**: Match feedback complexity to learner level | |
| - **Be actionable**: Every suggestion should be something they can practice | |
| - **Use markdown**: Structure feedback clearly with headers and bullet points | |
| Remember: Your goal is to help learners feel motivated while giving them clear paths to improvement. Balance honest feedback with positive reinforcement. | |
| """, | |
| ), | |
| ("placeholder", "{messages}"), | |
| ] | |
| ) | |
| chain = evaluation_prompt_template | model.with_structured_output(ResponseFormatter) | |
| # Call the LLM with the formatted prompt | |
| structured_output: ResponseFormatter = await chain.ainvoke( | |
| { | |
| "session_id": session_id, | |
| "learner_level": learner_level, | |
| "scenario_title": scenario_title, | |
| "scenario_description": scenario_description, | |
| "key_vocabulary": key_vocabulary, | |
| "messages": messages, | |
| } | |
| ) | |
| # Convert structured output to dictionary | |
| result = { | |
| "score": structured_output.score, | |
| "feedback": structured_output.feedback, | |
| "strengths": structured_output.strengths, | |
| "improvements": structured_output.improvements, | |
| "suggestions": structured_output.suggestions, | |
| "next_steps": structured_output.next_steps, | |
| "words_used": structured_output.words_used, | |
| "perfect_response": structured_output.perfect_response, | |
| "impressive_words": structured_output.impressive_words, | |
| } | |
| return result | |