Spaces:

MCP-1st-Birthday
/

AgenticInterviewer

Running

AgenticInterviewer / interview_agents.py

qayyumu

Reasoning and memory for agents is enabled

172ecd9 13 days ago

21.4 kB

	import os
	import random
	from crewai import Agent, Task, Crew, Process, LLM
	from crewai_tools import SerperDevTool
	from crewai.tools import BaseTool
	import asyncio
	from mcp import ClientSession
	from mcp.client.sse import sse_client

	# Define the LLM with higher temperature for variety
	def get_llm(api_key):
	return LLM(
	model="gemini/gemini-2.5-flash",
	api_key=api_key,
	temperature=0.9 # Higher temperature for more creative/varied questions
	)

	class InterviewAgents:
	def __init__(self, api_key):
	self.llm = get_llm(api_key)
	self.serper_tool = SerperDevTool()

	def technical_interviewer(self):
	return Agent(
	role='Technical Interviewer',
	goal='Analyze the Job Description and CV to generate relevant technical interview questions.',
	backstory='You are an expert technical recruiter with years of experience in assessing candidate skills against job requirements. You focus on hard skills and technical proficiency.',
	llm=self.llm,
	verbose=True
	)

	def personality_interviewer(self):
	return Agent(
	role='Personality & Culture Fit Specialist',
	goal='Generate behavioral and personality-based interview questions using online resources to ensure best practices.',
	backstory='You are an organizational psychologist specializing in culture fit and soft skills. You use data-driven approaches and current trends to ask meaningful behavioral questions.',
	tools=[self.serper_tool],
	llm=self.llm,
	verbose=True
	)

	def interview_director(self):
	return Agent(
	role='Interview Director',
	goal='Compile the final interview plan and system instructions.',
	backstory='You are the Lead Interviewer. You oversee the process and ensure a balanced interview. You combine inputs from technical and personality specialists to create a cohesive interview script.',
	llm=self.llm,
	reasoning=True,
	memory=True,
	verbose=True
	)

	class InterviewTasks:
	def __init__(self, jd_text, cv_text, num_questions):
	self.jd_text = jd_text
	self.cv_text = cv_text
	self.num_questions = num_questions
	self.n_tech = max(1, round(num_questions * 0.8))
	self.n_psych = max(1, num_questions - self.n_tech)

	def generate_technical_questions(self, agent):
	# Add randomization for variety
	seed = random.randint(1000, 9999)
	return Task(
	description=f"""
	Analyze the following Job Description (JD) and Curriculum Vitae (CV).
	JD: {self.jd_text[:2000]}...
	CV: {self.cv_text[:2000]}...

	IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_tech} UNIQUE technical interview questions.
	- Each question should be DIFFERENT from common interview questions
	- Focus on specific skills mentioned in the JD
	- Ask about practical scenarios or real-world applications
	- Keep questions VERY SHORT (max 15 words) for voice conversation
	- Make questions open-ended to encourage discussion
	- Vary question types: scenario-based, problem-solving, experience-based

	Example formats:
	- "How would you handle [specific technical scenario]?"
	- "Describe your experience with [technology]."
	- "What's your approach to [technical challenge]?"
	""",
	expected_output=f"A list of {self.n_tech} unique, concise technical questions (max 15 words each).",
	agent=agent
	)

	def generate_personality_questions(self, agent):
	# Add randomization for variety
	seed = random.randint(1000, 9999)
	return Task(
	description=f"""
	Analyze the JD and CV to understand the company culture and required soft skills.
	JD: {self.jd_text[:2000]}...

	IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_psych} UNIQUE behavioral/personality questions.
	- Use the Serper tool to find CURRENT, trending behavioral interview questions
	- Avoid cliché questions like "What's your greatest weakness?"
	- Focus on real scenarios and past experiences
	- Keep questions VERY SHORT (max 15 words) for voice conversation
	- Make questions conversational and natural

	Example formats:
	- "Tell me about a time you faced [specific challenge]."
	- "How do you handle [workplace situation]?"
	- "Describe a situation where you [behavioral trait]."
	""",
	expected_output=f"A list of {self.n_psych} unique, concise behavioral questions (max 15 words each).",
	agent=agent
	)

	def compile_interview(self, agent, tech_task, psych_task):
	return Task(
	description=f"""
	Compile the final interview plan from the technical and personality questions.

	CRITICAL REQUIREMENTS:

	1. QUESTIONS LIST:
	- Combine all questions into a single numbered list
	- Total must be exactly {self.num_questions} questions
	- Mix: ~80% technical, ~20% behavioral

	2. SYSTEM INSTRUCTION (MUST BE CONCISE AND CONVERSATIONAL):
	Create a SHORT, natural system prompt for a voice AI interviewer.

	IMPORTANT: The system instruction MUST include the complete list of questions to ask.

	Format the system instruction like this:

	"You are Alex, a friendly professional interviewer conducting a voice interview. Start with as soon as the connection is established 'Hi! I'm Alex. Let's begin with the first question.

	YOUR QUESTIONS (ask these in order):
	1. [First question]
	2. [Second question]
	3. [Third question]
	... [all questions]

	CONVERSATION RULES:
	- Ask ONE question at a time and WAIT for the complete answer
	- Keep responses SHORT (1-2 sentences max)
	- If interrupted, STOP talking immediately and listen
	- After each answer, briefly acknowledge (e.g., 'Great!', 'I see', 'Thanks') then ask the next question
	- Use a warm, conversational tone
	- End with: 'Thanks for your time today!'

	Remember: Listen actively, don't interrupt, and keep it conversational."

	Output Format (JSON):
	{{
	"questions_markdown": "# Interview Questions\\n\\n1. [Question 1]\\n2. [Question 2]...",
	"system_instruction": "[Complete system instruction with embedded questions list as shown above]"
	}}
	""",
	expected_output="A JSON object with 'questions_markdown' (formatted list) and 'system_instruction' (concise prompt with embedded questions, under 300 words).",
	agent=agent,
	context=[tech_task, psych_task]
	)

	def run_interview_crew(jd_text, cv_text, num_questions, api_key):
	agents = InterviewAgents(api_key)
	tasks = InterviewTasks(jd_text, cv_text, num_questions)

	tech_agent = agents.technical_interviewer()
	psych_agent = agents.personality_interviewer()
	director_agent = agents.interview_director()

	tech_task = tasks.generate_technical_questions(tech_agent)
	psych_task = tasks.generate_personality_questions(psych_agent)
	compile_task = tasks.compile_interview(director_agent, tech_task, psych_task)

	crew = Crew(
	agents=[tech_agent, psych_agent, director_agent],
	tasks=[tech_task, psych_task, compile_task],
	process=Process.sequential,
	verbose=True
	)

	result = crew.kickoff()
	return result



	# --- Custom Tools ---

	class SentimentAnalysisTool(BaseTool):
	name: str = "Sentiment Analysis Tool"
	description: str = "Analyzes the sentiment of a given text. Returns 'Positive', 'Negative', or 'Neutral'. Use this to gauge the candidate's attitude."

	def _run(self, text: str) -> str:
	async def call_mcp(text_input):
	sse_url = "https://uq-sentimentanalysismcpserver.hf.space/gradio_api/mcp/sse"
	try:
	async with sse_client(sse_url) as (read, write):
	async with ClientSession(read, write) as session:
	await session.initialize()
	result = await session.call_tool(
	"SentimentAnalysisMCPserver_predict_sentiment",
	arguments={"text": text_input}
	)
	if result.content and len(result.content) > 0:
	return result.content[0].text
	return "Error: No content returned"
	except Exception as e:
	return f"Error connecting to MCP: {str(e)}"

	try:
	# Create a new event loop if one doesn't exist, or use the existing one if compatible
	# Since this is running in a thread (via asyncio.to_thread in app.py),
	# we should be able to use asyncio.run() if no loop is running in this thread.
	# However, to be safe with nested loops or existing loops:
	try:
	loop = asyncio.get_event_loop()
	if loop.is_running():
	# This is tricky if we are in a sync method called from an async context/loop.
	# But app.py uses asyncio.to_thread, which runs in a separate thread.
	# That thread likely doesn't have a running loop unless we started one.
	# asyncio.to_thread runs in a ThreadPoolExecutor.
	return loop.run_until_complete(call_mcp(text))
	else:
	return loop.run_until_complete(call_mcp(text))
	except RuntimeError:
	return asyncio.run(call_mcp(text))

	except Exception as e:
	return f"Error analyzing sentiment: {str(e)}"

	# --- Evaluation Agents ---

	def get_evaluation_llm(api_key):
	return LLM(
	model="gemini/gemini-2.5-flash",
	api_key=api_key,
	temperature=0.7 # Lower temperature for more consistent evaluation
	)

	class EvaluationAgents:
	def __init__(self, api_key):
	self.llm = get_evaluation_llm(api_key)
	self.sentiment_tool = SentimentAnalysisTool()

	def technical_evaluator(self):
	return Agent(
	role='Technical Skills Evaluator',
	goal='Evaluate the candidate\'s technical skills and knowledge based on their interview responses.',
	backstory='You are an expert technical recruiter with deep knowledge in assessing technical competencies. You analyze answers for depth, accuracy, and practical application of skills.',
	llm=self.llm,
	verbose=True
	)

	def behavioral_evaluator(self):
	return Agent(
	role='Behavioral & Culture Fit Evaluator',
	goal='Assess the candidate\'s soft skills, communication, and cultural fit based on behavioral questions.',
	backstory='You are an organizational psychologist specializing in evaluating interpersonal skills, problem-solving approaches, and alignment with company culture. You look for evidence of leadership, teamwork, and adaptability.',
	tools=[self.sentiment_tool],
	llm=self.llm,
	verbose=True
	)

	def evaluation_director(self):
	return Agent(
	role='Evaluation Director',
	goal='Compile a comprehensive scorecard with scores, feedback, and hiring recommendation.',
	backstory='You are the Lead Evaluator responsible for synthesizing all evaluation inputs into a clear, actionable scorecard. You ensure fairness and consistency in scoring.',
	llm=self.llm,
	reasoning=True,
	memory=True,
	verbose=True
	)

	class EvaluationTasks:
	def __init__(self, transcript, jd_text, questions_text):
	self.transcript = transcript
	self.jd_text = jd_text
	self.questions_text = questions_text

	def evaluate_technical_skills(self, agent):
	return Task(
	description=f"""
	Evaluate the candidate's technical performance based on the interview transcript.

	TRANSCRIPT:
	{self.transcript[:3000]}

	JOB DESCRIPTION:
	{self.jd_text[:2000]}

	INTERVIEW QUESTIONS:
	{self.questions_text[:2000]}

	TASK:
	CRITICAL: ONLY evaluate TECHNICAL questions. Identify which questions are technical (related to hard skills, technologies, tools, programming, systems, etc.) and ONLY score those.

	For EACH technical question identified, provide:
	1. The exact question text
	2. Technical competency score (0-10)
	3. Detailed feedback on technical knowledge, problem-solving approach, and depth of understanding

	Also provide:
	4. Overall technical strengths
	5. Overall technical weaknesses
	6. Alignment with job requirements

	Focus on:
	- Accuracy and correctness of technical answers
	- Depth of knowledge demonstrated
	- Problem-solving methodology
	- Practical application of skills
	- Communication of technical concepts

	DO NOT evaluate behavioral, personality, or soft skills questions. Only technical questions.
	""",
	expected_output="A detailed technical evaluation with scores (0-10) and feedback for EACH TECHNICAL QUESTION ONLY. Format: For each technical question, provide: Question \| Score (0-10) \| Feedback. Plus overall technical strengths and weaknesses.",
	agent=agent
	)

	def evaluate_behavioral_skills(self, agent):
	return Task(
	description=f"""
	Evaluate the candidate's behavioral and soft skills based on the interview transcript.

	TRANSCRIPT:
	{self.transcript[:3000]}

	JOB DESCRIPTION:
	{self.jd_text[:2000]}

	INTERVIEW QUESTIONS:
	{self.questions_text[:2000]}

	TASK:
	CRITICAL: ONLY evaluate BEHAVIORAL/SOFT SKILLS questions. Identify which questions are behavioral (related to past experiences, teamwork, leadership, culture fit, problem-solving scenarios, etc.) and ONLY score those.

	For EACH behavioral question identified, provide:
	1. The exact question text
	2. Behavioral competency score (0-10)
	3. Sentiment Analysis: Use the 'Sentiment Analysis Tool' to analyze the candidate's answer. Include the result (Positive/Negative/Neutral) in your evaluation.
	4. Detailed feedback on communication, examples shared, and soft skills demonstrated

	Also provide:
	4. Assessment of communication skills, leadership, teamwork, and adaptability
	5. Cultural fit evaluation
	6. Examples of demonstrated soft skills

	Focus on:
	- Quality of examples and stories shared
	- Problem-solving approach in real situations
	- Interpersonal skills and communication clarity
	- Alignment with company values and culture
	- Emotional intelligence and self-awareness

	DO NOT evaluate technical, programming, or hard skills questions. Only behavioral/soft skills questions.
	""",
	expected_output="A detailed behavioral evaluation with scores (0-10) and feedback for EACH BEHAVIORAL QUESTION ONLY. Format: For each behavioral question, provide: Question \| Score (0-10) \| Feedback. Plus overall soft skills assessment and culture fit analysis.",
	agent=agent
	)

	def compile_scorecard(self, agent, tech_task, behavioral_task):
	return Task(
	description=f"""
	Compile a comprehensive interview scorecard from technical and behavioral evaluations.

	You have received evaluations from:
	1. Technical Evaluator - evaluated ONLY technical questions
	2. Behavioral Evaluator - evaluated ONLY behavioral questions

	CRITICAL REQUIREMENTS:

	1. SUMMARY:
	- Brief overview of candidate performance (2-3 sentences)

	2. SCORECARD TABLE:
	- Create a markdown table with columns: Question \| Category \| Score (0-10) \| Feedback
	- CRITICAL: Each question must appear EXACTLY ONCE in the table - NO DUPLICATES
	- Merge the two evaluations: take technical questions from Technical Evaluator's output, behavioral questions from Behavioral Evaluator's output
	- For each technical question: Use the exact question text, Category = "Technical", and the score/feedback from Technical Evaluator
	- For each behavioral question: Use the exact question text, Category = "Behavioral", and the score/feedback from Behavioral Evaluator
	- If a question appears in both evaluations, that's an error - each question should only be in one category
	- List all questions in the order they appear in the interview

	3. OVERALL SCORES:
	- Average Technical Score
	- Average Behavioral Score
	- Overall Score

	4. STRENGTHS:
	- List 3-5 key strengths demonstrated

	5. AREAS FOR IMPROVEMENT:
	- List 2-4 areas where the candidate could improve

	6. FINAL DECISION:
	- One of: "Strong Hire", "Hire", "No Hire"
	- Brief justification (1-2 sentences)

	Output Format (Markdown):
	# Interview Scorecard

	## Summary
	[Brief overview]

	## Scorecard
	\| Question \| Category \| Score \| Feedback \|
	\|----------\|----------\|-------\|----------\|
	\| [Q1] \| Technical \| X/10 \| [Feedback] \|
	...

	## Overall Scores
	- Technical Average: X/10
	- Behavioral Average: X/10
	- Overall Score: X/10

	## Strengths
	1. [Strength 1]
	2. [Strength 2]
	...

	## Areas for Improvement
	1. [Area 1]
	2. [Area 2]
	...

	## Final Decision
	Decision: [Strong Hire/Hire/No Hire]

	[Justification]
	""",
	expected_output="A comprehensive markdown scorecard with summary, detailed table, scores, strengths, weaknesses, and hiring recommendation.",
	agent=agent,
	context=[tech_task, behavioral_task]
	)

	def run_evaluation_crew(transcript, jd_text, questions_text, api_key):
	"""Run CrewAI evaluation crew to generate scorecard"""
	agents = EvaluationAgents(api_key)
	tasks = EvaluationTasks(transcript, jd_text, questions_text)

	tech_evaluator = agents.technical_evaluator()
	behavioral_evaluator = agents.behavioral_evaluator()
	director = agents.evaluation_director()

	tech_task = tasks.evaluate_technical_skills(tech_evaluator)
	behavioral_task = tasks.evaluate_behavioral_skills(behavioral_evaluator)
	compile_task = tasks.compile_scorecard(director, tech_task, behavioral_task)

	crew = Crew(
	agents=[tech_evaluator, behavioral_evaluator, director],
	tasks=[tech_task, behavioral_task, compile_task],
	process=Process.sequential,
	verbose=True
	)

	result = crew.kickoff()
	return result