File size: 6,567 Bytes
61e4b1e
1a5420f
61e4b1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6020910
 
 
61e4b1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6020910
 
 
61e4b1e
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
from langchain_core.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
from src.config.llm import model
from src.utils.logger import logger
from .prompt import evaluation_prompt
from langgraph.checkpoint.memory import InMemorySaver
from typing import List, Dict, Any
from src.agents.role_play.flow import role_play_agent


# Define the structured output format
class ResponseFormatter(BaseModel):
    """Structured output format for conversation evaluation"""

    score: int = Field(
        ..., description="Overall conversation score out of 100", ge=0, le=100
    )
    feedback: str = Field(..., description="Overall feedback summary")
    strengths: List[str] = Field(..., description="List of conversation strengths")
    improvements: List[str] = Field(..., description="List of areas for improvement")
    suggestions: List[str] = Field(
        ..., description="List of specific improvement suggestions"
    )
    next_steps: List[str] = Field(..., description="List of recommended next steps")
    words_used: List[str] = Field(..., description="List of key words used from the scenario")
    perfect_response: str = Field(..., description="An example of a perfect response for this scenario")
    impressive_words: List[str] = Field(..., description="List of impressive or advanced words used by the learner")


# Create the prompt template


async def evaluate_conversation(
    session_id: str,
    learner_level: str = "beginner",
    scenario_title: str = "",
    scenario_description: str = "",
    key_vocabulary: str = "",
) -> Dict[str, Any]:
    """
    Evaluate a conversation based on the session ID and provide feedback.

    Args:
        session_id: The thread ID for the conversation
        learner_level: The English level of the learner
        scenario_title: Title of the conversation scenario
        scenario_description: Description of the conversation scenario
        key_vocabulary: Key vocabulary words from the scenario

    Returns:
        Dict containing evaluation results including score and feedback
    """
    logger.info(f"Evaluating conversation for session_id: {session_id}")
    config = {"configurable": {"thread_id": session_id}}
    snapshot = await role_play_agent().aget_state(config)
    messages = snapshot.values.get("messages", [])
    if not messages:
        return {
            "score": 0,
            "feedback": "No conversation found for this session.",
            "strengths": [],
            "improvements": [],
            "suggestions": [],
            "next_steps": [],
        }

    evaluation_prompt_template = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                """# CONVERSATION EVALUATOR - English Learning Assessment Specialist

You are **WISE Evaluator**, an expert English tutor who analyzes conversations between learners and AI roleplay partners. Your job is to provide comprehensive feedback that helps learners improve.

## Evaluation Context
- **Session ID**: {session_id}
- **Learner Level**: {learner_level}
- **Scenario**: {scenario_title} - {scenario_description}
- **Key Vocabulary**: {key_vocabulary}

## Your Evaluation Mission
1. **Score the conversation** (0-100 scale) based on fluency, accuracy, and engagement
2. **Identify strengths** - What did the learner do well?
3. **Pinpoint areas for improvement** - Where can they get better?
4. **Provide specific suggestions** - Concrete actions for improvement
5. **Recommend next steps** - What should they practice next?

## Scoring Criteria

### Fluency (30 points)
- **Flow**: How naturally does the conversation progress?
- **Response time**: Are there appropriate pauses or unnatural delays?
- **Turn-taking**: Good balance of speaking between learner and AI?

### Accuracy (30 points)
- **Grammar**: Correct sentence structures and verb forms
- **Vocabulary**: Appropriate word choices and usage
- **Pronunciation**: (If audio available) Clear pronunciation of words

### Engagement (20 points)
- **Relevance**: Staying on topic and scenario context
- **Interaction**: Active participation and questions
- **Creativity**: Bringing personal experiences or unique responses

### Vocabulary Usage (20 points)
- **Range**: Using diverse vocabulary from the scenario
- **Accuracy**: Correct usage of key vocabulary words
- **Complexity**: Appropriate challenge level for learner

## Response Format Requirements

You must provide your response in the following structured format:

### SCORE: [X/100]
Provide a single overall score out of 100.

### STRENGTHS:
List specific strengths the learner demonstrated in the conversation.

### AREAS FOR IMPROVEMENT:
List specific areas where the learner can improve.

### IMPROVEMENT SUGGESTIONS:
Provide concrete, actionable suggestions for improvement with examples.

### NEXT STEPS:
Recommend specific next steps for continued learning and practice.

## Important Guidelines:
- **Be encouraging**: Focus on growth, not just mistakes
- **Be specific**: Give concrete examples, not vague advice
- **Be appropriate**: Match feedback complexity to learner level
- **Be actionable**: Every suggestion should be something they can practice
- **Use markdown**: Structure feedback clearly with headers and bullet points

Remember: Your goal is to help learners feel motivated while giving them clear paths to improvement. Balance honest feedback with positive reinforcement.
""",
            ),
            ("placeholder", "{messages}"),
        ]
    )
    chain = evaluation_prompt_template | model.with_structured_output(ResponseFormatter)

    # Call the LLM with the formatted prompt
    structured_output: ResponseFormatter = await chain.ainvoke(
        {
            "session_id": session_id,
            "learner_level": learner_level,
            "scenario_title": scenario_title,
            "scenario_description": scenario_description,
            "key_vocabulary": key_vocabulary,
            "messages": messages,
        }
    )

    # Convert structured output to dictionary
    result = {
        "score": structured_output.score,
        "feedback": structured_output.feedback,
        "strengths": structured_output.strengths,
        "improvements": structured_output.improvements,
        "suggestions": structured_output.suggestions,
        "next_steps": structured_output.next_steps,
        "words_used": structured_output.words_used,
        "perfect_response": structured_output.perfect_response,
        "impressive_words": structured_output.impressive_words,
    }

    return result