Spaces:
Sleeping
Sleeping
File size: 6,567 Bytes
61e4b1e 1a5420f 61e4b1e 6020910 61e4b1e 6020910 61e4b1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from src.config.llm import model
from src.utils.logger import logger
from .prompt import evaluation_prompt
from langgraph.checkpoint.memory import InMemorySaver
from typing import List, Dict, Any
from src.agents.role_play.flow import role_play_agent
# Define the structured output format
class ResponseFormatter(BaseModel):
"""Structured output format for conversation evaluation"""
score: int = Field(
..., description="Overall conversation score out of 100", ge=0, le=100
)
feedback: str = Field(..., description="Overall feedback summary")
strengths: List[str] = Field(..., description="List of conversation strengths")
improvements: List[str] = Field(..., description="List of areas for improvement")
suggestions: List[str] = Field(
..., description="List of specific improvement suggestions"
)
next_steps: List[str] = Field(..., description="List of recommended next steps")
words_used: List[str] = Field(..., description="List of key words used from the scenario")
perfect_response: str = Field(..., description="An example of a perfect response for this scenario")
impressive_words: List[str] = Field(..., description="List of impressive or advanced words used by the learner")
# Create the prompt template
async def evaluate_conversation(
session_id: str,
learner_level: str = "beginner",
scenario_title: str = "",
scenario_description: str = "",
key_vocabulary: str = "",
) -> Dict[str, Any]:
"""
Evaluate a conversation based on the session ID and provide feedback.
Args:
session_id: The thread ID for the conversation
learner_level: The English level of the learner
scenario_title: Title of the conversation scenario
scenario_description: Description of the conversation scenario
key_vocabulary: Key vocabulary words from the scenario
Returns:
Dict containing evaluation results including score and feedback
"""
logger.info(f"Evaluating conversation for session_id: {session_id}")
config = {"configurable": {"thread_id": session_id}}
snapshot = await role_play_agent().aget_state(config)
messages = snapshot.values.get("messages", [])
if not messages:
return {
"score": 0,
"feedback": "No conversation found for this session.",
"strengths": [],
"improvements": [],
"suggestions": [],
"next_steps": [],
}
evaluation_prompt_template = ChatPromptTemplate.from_messages(
[
(
"system",
"""# CONVERSATION EVALUATOR - English Learning Assessment Specialist
You are **WISE Evaluator**, an expert English tutor who analyzes conversations between learners and AI roleplay partners. Your job is to provide comprehensive feedback that helps learners improve.
## Evaluation Context
- **Session ID**: {session_id}
- **Learner Level**: {learner_level}
- **Scenario**: {scenario_title} - {scenario_description}
- **Key Vocabulary**: {key_vocabulary}
## Your Evaluation Mission
1. **Score the conversation** (0-100 scale) based on fluency, accuracy, and engagement
2. **Identify strengths** - What did the learner do well?
3. **Pinpoint areas for improvement** - Where can they get better?
4. **Provide specific suggestions** - Concrete actions for improvement
5. **Recommend next steps** - What should they practice next?
## Scoring Criteria
### Fluency (30 points)
- **Flow**: How naturally does the conversation progress?
- **Response time**: Are there appropriate pauses or unnatural delays?
- **Turn-taking**: Good balance of speaking between learner and AI?
### Accuracy (30 points)
- **Grammar**: Correct sentence structures and verb forms
- **Vocabulary**: Appropriate word choices and usage
- **Pronunciation**: (If audio available) Clear pronunciation of words
### Engagement (20 points)
- **Relevance**: Staying on topic and scenario context
- **Interaction**: Active participation and questions
- **Creativity**: Bringing personal experiences or unique responses
### Vocabulary Usage (20 points)
- **Range**: Using diverse vocabulary from the scenario
- **Accuracy**: Correct usage of key vocabulary words
- **Complexity**: Appropriate challenge level for learner
## Response Format Requirements
You must provide your response in the following structured format:
### SCORE: [X/100]
Provide a single overall score out of 100.
### STRENGTHS:
List specific strengths the learner demonstrated in the conversation.
### AREAS FOR IMPROVEMENT:
List specific areas where the learner can improve.
### IMPROVEMENT SUGGESTIONS:
Provide concrete, actionable suggestions for improvement with examples.
### NEXT STEPS:
Recommend specific next steps for continued learning and practice.
## Important Guidelines:
- **Be encouraging**: Focus on growth, not just mistakes
- **Be specific**: Give concrete examples, not vague advice
- **Be appropriate**: Match feedback complexity to learner level
- **Be actionable**: Every suggestion should be something they can practice
- **Use markdown**: Structure feedback clearly with headers and bullet points
Remember: Your goal is to help learners feel motivated while giving them clear paths to improvement. Balance honest feedback with positive reinforcement.
""",
),
("placeholder", "{messages}"),
]
)
chain = evaluation_prompt_template | model.with_structured_output(ResponseFormatter)
# Call the LLM with the formatted prompt
structured_output: ResponseFormatter = await chain.ainvoke(
{
"session_id": session_id,
"learner_level": learner_level,
"scenario_title": scenario_title,
"scenario_description": scenario_description,
"key_vocabulary": key_vocabulary,
"messages": messages,
}
)
# Convert structured output to dictionary
result = {
"score": structured_output.score,
"feedback": structured_output.feedback,
"strengths": structured_output.strengths,
"improvements": structured_output.improvements,
"suggestions": structured_output.suggestions,
"next_steps": structured_output.next_steps,
"words_used": structured_output.words_used,
"perfect_response": structured_output.perfect_response,
"impressive_words": structured_output.impressive_words,
}
return result
|