Spaces:

ABAO77
/

Run_code_api

Sleeping

App Files Files Community

Run_code_api / src /agents /evaluation /agent.py

ABAO77

fix: update import statement for BaseModel in agent.py and add timing logs in speaking_controller.py

1a5420f 2 months ago

raw

history blame

6.57 kB

	from langchain_core.prompts import ChatPromptTemplate
	from pydantic import BaseModel, Field
	from src.config.llm import model
	from src.utils.logger import logger
	from .prompt import evaluation_prompt
	from langgraph.checkpoint.memory import InMemorySaver
	from typing import List, Dict, Any
	from src.agents.role_play.flow import role_play_agent


	# Define the structured output format
	class ResponseFormatter(BaseModel):
	"""Structured output format for conversation evaluation"""

	score: int = Field(
	..., description="Overall conversation score out of 100", ge=0, le=100
	)
	feedback: str = Field(..., description="Overall feedback summary")
	strengths: List[str] = Field(..., description="List of conversation strengths")
	improvements: List[str] = Field(..., description="List of areas for improvement")
	suggestions: List[str] = Field(
	..., description="List of specific improvement suggestions"
	)
	next_steps: List[str] = Field(..., description="List of recommended next steps")
	words_used: List[str] = Field(..., description="List of key words used from the scenario")
	perfect_response: str = Field(..., description="An example of a perfect response for this scenario")
	impressive_words: List[str] = Field(..., description="List of impressive or advanced words used by the learner")


	# Create the prompt template


	async def evaluate_conversation(
	session_id: str,
	learner_level: str = "beginner",
	scenario_title: str = "",
	scenario_description: str = "",
	key_vocabulary: str = "",
	) -> Dict[str, Any]:
	"""
	Evaluate a conversation based on the session ID and provide feedback.

	Args:
	session_id: The thread ID for the conversation
	learner_level: The English level of the learner
	scenario_title: Title of the conversation scenario
	scenario_description: Description of the conversation scenario
	key_vocabulary: Key vocabulary words from the scenario

	Returns:
	Dict containing evaluation results including score and feedback
	"""
	logger.info(f"Evaluating conversation for session_id: {session_id}")
	config = {"configurable": {"thread_id": session_id}}
	snapshot = await role_play_agent().aget_state(config)
	messages = snapshot.values.get("messages", [])
	if not messages:
	return {
	"score": 0,
	"feedback": "No conversation found for this session.",
	"strengths": [],
	"improvements": [],
	"suggestions": [],
	"next_steps": [],
	}

	evaluation_prompt_template = ChatPromptTemplate.from_messages(
	[
	(
	"system",
	"""# CONVERSATION EVALUATOR - English Learning Assessment Specialist

	You are WISE Evaluator, an expert English tutor who analyzes conversations between learners and AI roleplay partners. Your job is to provide comprehensive feedback that helps learners improve.

	## Evaluation Context
	- Session ID: {session_id}
	- Learner Level: {learner_level}
	- Scenario: {scenario_title} - {scenario_description}
	- Key Vocabulary: {key_vocabulary}

	## Your Evaluation Mission
	1. Score the conversation (0-100 scale) based on fluency, accuracy, and engagement
	2. Identify strengths - What did the learner do well?
	3. Pinpoint areas for improvement - Where can they get better?
	4. Provide specific suggestions - Concrete actions for improvement
	5. Recommend next steps - What should they practice next?

	## Scoring Criteria

	### Fluency (30 points)
	- Flow: How naturally does the conversation progress?
	- Response time: Are there appropriate pauses or unnatural delays?
	- Turn-taking: Good balance of speaking between learner and AI?

	### Accuracy (30 points)
	- Grammar: Correct sentence structures and verb forms
	- Vocabulary: Appropriate word choices and usage
	- Pronunciation: (If audio available) Clear pronunciation of words

	### Engagement (20 points)
	- Relevance: Staying on topic and scenario context
	- Interaction: Active participation and questions
	- Creativity: Bringing personal experiences or unique responses

	### Vocabulary Usage (20 points)
	- Range: Using diverse vocabulary from the scenario
	- Accuracy: Correct usage of key vocabulary words
	- Complexity: Appropriate challenge level for learner

	## Response Format Requirements

	You must provide your response in the following structured format:

	### SCORE: [X/100]
	Provide a single overall score out of 100.

	### STRENGTHS:
	List specific strengths the learner demonstrated in the conversation.

	### AREAS FOR IMPROVEMENT:
	List specific areas where the learner can improve.

	### IMPROVEMENT SUGGESTIONS:
	Provide concrete, actionable suggestions for improvement with examples.

	### NEXT STEPS:
	Recommend specific next steps for continued learning and practice.

	## Important Guidelines:
	- Be encouraging: Focus on growth, not just mistakes
	- Be specific: Give concrete examples, not vague advice
	- Be appropriate: Match feedback complexity to learner level
	- Be actionable: Every suggestion should be something they can practice
	- Use markdown: Structure feedback clearly with headers and bullet points

	Remember: Your goal is to help learners feel motivated while giving them clear paths to improvement. Balance honest feedback with positive reinforcement.
	""",
	),
	("placeholder", "{messages}"),
	]
	)
	chain = evaluation_prompt_template \| model.with_structured_output(ResponseFormatter)

	# Call the LLM with the formatted prompt
	structured_output: ResponseFormatter = await chain.ainvoke(
	{
	"session_id": session_id,
	"learner_level": learner_level,
	"scenario_title": scenario_title,
	"scenario_description": scenario_description,
	"key_vocabulary": key_vocabulary,
	"messages": messages,
	}
	)

	# Convert structured output to dictionary
	result = {
	"score": structured_output.score,
	"feedback": structured_output.feedback,
	"strengths": structured_output.strengths,
	"improvements": structured_output.improvements,
	"suggestions": structured_output.suggestions,
	"next_steps": structured_output.next_steps,
	"words_used": structured_output.words_used,
	"perfect_response": structured_output.perfect_response,
	"impressive_words": structured_output.impressive_words,
	}

	return result