ABAO77's picture
fix: update import statement for BaseModel in agent.py and add timing logs in speaking_controller.py
1a5420f
raw
history blame
6.57 kB
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from src.config.llm import model
from src.utils.logger import logger
from .prompt import evaluation_prompt
from langgraph.checkpoint.memory import InMemorySaver
from typing import List, Dict, Any
from src.agents.role_play.flow import role_play_agent
# Define the structured output format
class ResponseFormatter(BaseModel):
"""Structured output format for conversation evaluation"""
score: int = Field(
..., description="Overall conversation score out of 100", ge=0, le=100
)
feedback: str = Field(..., description="Overall feedback summary")
strengths: List[str] = Field(..., description="List of conversation strengths")
improvements: List[str] = Field(..., description="List of areas for improvement")
suggestions: List[str] = Field(
..., description="List of specific improvement suggestions"
)
next_steps: List[str] = Field(..., description="List of recommended next steps")
words_used: List[str] = Field(..., description="List of key words used from the scenario")
perfect_response: str = Field(..., description="An example of a perfect response for this scenario")
impressive_words: List[str] = Field(..., description="List of impressive or advanced words used by the learner")
# Create the prompt template
async def evaluate_conversation(
session_id: str,
learner_level: str = "beginner",
scenario_title: str = "",
scenario_description: str = "",
key_vocabulary: str = "",
) -> Dict[str, Any]:
"""
Evaluate a conversation based on the session ID and provide feedback.
Args:
session_id: The thread ID for the conversation
learner_level: The English level of the learner
scenario_title: Title of the conversation scenario
scenario_description: Description of the conversation scenario
key_vocabulary: Key vocabulary words from the scenario
Returns:
Dict containing evaluation results including score and feedback
"""
logger.info(f"Evaluating conversation for session_id: {session_id}")
config = {"configurable": {"thread_id": session_id}}
snapshot = await role_play_agent().aget_state(config)
messages = snapshot.values.get("messages", [])
if not messages:
return {
"score": 0,
"feedback": "No conversation found for this session.",
"strengths": [],
"improvements": [],
"suggestions": [],
"next_steps": [],
}
evaluation_prompt_template = ChatPromptTemplate.from_messages(
[
(
"system",
"""# CONVERSATION EVALUATOR - English Learning Assessment Specialist
You are **WISE Evaluator**, an expert English tutor who analyzes conversations between learners and AI roleplay partners. Your job is to provide comprehensive feedback that helps learners improve.
## Evaluation Context
- **Session ID**: {session_id}
- **Learner Level**: {learner_level}
- **Scenario**: {scenario_title} - {scenario_description}
- **Key Vocabulary**: {key_vocabulary}
## Your Evaluation Mission
1. **Score the conversation** (0-100 scale) based on fluency, accuracy, and engagement
2. **Identify strengths** - What did the learner do well?
3. **Pinpoint areas for improvement** - Where can they get better?
4. **Provide specific suggestions** - Concrete actions for improvement
5. **Recommend next steps** - What should they practice next?
## Scoring Criteria
### Fluency (30 points)
- **Flow**: How naturally does the conversation progress?
- **Response time**: Are there appropriate pauses or unnatural delays?
- **Turn-taking**: Good balance of speaking between learner and AI?
### Accuracy (30 points)
- **Grammar**: Correct sentence structures and verb forms
- **Vocabulary**: Appropriate word choices and usage
- **Pronunciation**: (If audio available) Clear pronunciation of words
### Engagement (20 points)
- **Relevance**: Staying on topic and scenario context
- **Interaction**: Active participation and questions
- **Creativity**: Bringing personal experiences or unique responses
### Vocabulary Usage (20 points)
- **Range**: Using diverse vocabulary from the scenario
- **Accuracy**: Correct usage of key vocabulary words
- **Complexity**: Appropriate challenge level for learner
## Response Format Requirements
You must provide your response in the following structured format:
### SCORE: [X/100]
Provide a single overall score out of 100.
### STRENGTHS:
List specific strengths the learner demonstrated in the conversation.
### AREAS FOR IMPROVEMENT:
List specific areas where the learner can improve.
### IMPROVEMENT SUGGESTIONS:
Provide concrete, actionable suggestions for improvement with examples.
### NEXT STEPS:
Recommend specific next steps for continued learning and practice.
## Important Guidelines:
- **Be encouraging**: Focus on growth, not just mistakes
- **Be specific**: Give concrete examples, not vague advice
- **Be appropriate**: Match feedback complexity to learner level
- **Be actionable**: Every suggestion should be something they can practice
- **Use markdown**: Structure feedback clearly with headers and bullet points
Remember: Your goal is to help learners feel motivated while giving them clear paths to improvement. Balance honest feedback with positive reinforcement.
""",
),
("placeholder", "{messages}"),
]
)
chain = evaluation_prompt_template | model.with_structured_output(ResponseFormatter)
# Call the LLM with the formatted prompt
structured_output: ResponseFormatter = await chain.ainvoke(
{
"session_id": session_id,
"learner_level": learner_level,
"scenario_title": scenario_title,
"scenario_description": scenario_description,
"key_vocabulary": key_vocabulary,
"messages": messages,
}
)
# Convert structured output to dictionary
result = {
"score": structured_output.score,
"feedback": structured_output.feedback,
"strengths": structured_output.strengths,
"improvements": structured_output.improvements,
"suggestions": structured_output.suggestions,
"next_steps": structured_output.next_steps,
"words_used": structured_output.words_used,
"perfect_response": structured_output.perfect_response,
"impressive_words": structured_output.impressive_words,
}
return result