AgenticInterviewer / interview_agents.py
qayyumu
Reasoning and memory for agents is enabled
172ecd9
import os
import random
from crewai import Agent, Task, Crew, Process, LLM
from crewai_tools import SerperDevTool
from crewai.tools import BaseTool
import asyncio
from mcp import ClientSession
from mcp.client.sse import sse_client
# Define the LLM with higher temperature for variety
def get_llm(api_key):
return LLM(
model="gemini/gemini-2.5-flash",
api_key=api_key,
temperature=0.9 # Higher temperature for more creative/varied questions
)
class InterviewAgents:
def __init__(self, api_key):
self.llm = get_llm(api_key)
self.serper_tool = SerperDevTool()
def technical_interviewer(self):
return Agent(
role='Technical Interviewer',
goal='Analyze the Job Description and CV to generate relevant technical interview questions.',
backstory='You are an expert technical recruiter with years of experience in assessing candidate skills against job requirements. You focus on hard skills and technical proficiency.',
llm=self.llm,
verbose=True
)
def personality_interviewer(self):
return Agent(
role='Personality & Culture Fit Specialist',
goal='Generate behavioral and personality-based interview questions using online resources to ensure best practices.',
backstory='You are an organizational psychologist specializing in culture fit and soft skills. You use data-driven approaches and current trends to ask meaningful behavioral questions.',
tools=[self.serper_tool],
llm=self.llm,
verbose=True
)
def interview_director(self):
return Agent(
role='Interview Director',
goal='Compile the final interview plan and system instructions.',
backstory='You are the Lead Interviewer. You oversee the process and ensure a balanced interview. You combine inputs from technical and personality specialists to create a cohesive interview script.',
llm=self.llm,
reasoning=True,
memory=True,
verbose=True
)
class InterviewTasks:
def __init__(self, jd_text, cv_text, num_questions):
self.jd_text = jd_text
self.cv_text = cv_text
self.num_questions = num_questions
self.n_tech = max(1, round(num_questions * 0.8))
self.n_psych = max(1, num_questions - self.n_tech)
def generate_technical_questions(self, agent):
# Add randomization for variety
seed = random.randint(1000, 9999)
return Task(
description=f"""
Analyze the following Job Description (JD) and Curriculum Vitae (CV).
JD: {self.jd_text[:2000]}...
CV: {self.cv_text[:2000]}...
IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_tech} UNIQUE technical interview questions.
- Each question should be DIFFERENT from common interview questions
- Focus on specific skills mentioned in the JD
- Ask about practical scenarios or real-world applications
- Keep questions VERY SHORT (max 15 words) for voice conversation
- Make questions open-ended to encourage discussion
- Vary question types: scenario-based, problem-solving, experience-based
Example formats:
- "How would you handle [specific technical scenario]?"
- "Describe your experience with [technology]."
- "What's your approach to [technical challenge]?"
""",
expected_output=f"A list of {self.n_tech} unique, concise technical questions (max 15 words each).",
agent=agent
)
def generate_personality_questions(self, agent):
# Add randomization for variety
seed = random.randint(1000, 9999)
return Task(
description=f"""
Analyze the JD and CV to understand the company culture and required soft skills.
JD: {self.jd_text[:2000]}...
IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_psych} UNIQUE behavioral/personality questions.
- Use the Serper tool to find CURRENT, trending behavioral interview questions
- Avoid cliché questions like "What's your greatest weakness?"
- Focus on real scenarios and past experiences
- Keep questions VERY SHORT (max 15 words) for voice conversation
- Make questions conversational and natural
Example formats:
- "Tell me about a time you faced [specific challenge]."
- "How do you handle [workplace situation]?"
- "Describe a situation where you [behavioral trait]."
""",
expected_output=f"A list of {self.n_psych} unique, concise behavioral questions (max 15 words each).",
agent=agent
)
def compile_interview(self, agent, tech_task, psych_task):
return Task(
description=f"""
Compile the final interview plan from the technical and personality questions.
CRITICAL REQUIREMENTS:
1. QUESTIONS LIST:
- Combine all questions into a single numbered list
- Total must be exactly {self.num_questions} questions
- Mix: ~80% technical, ~20% behavioral
2. SYSTEM INSTRUCTION (MUST BE CONCISE AND CONVERSATIONAL):
Create a SHORT, natural system prompt for a voice AI interviewer.
**IMPORTANT: The system instruction MUST include the complete list of questions to ask.**
Format the system instruction like this:
"You are Alex, a friendly professional interviewer conducting a voice interview. Start with as soon as the connection is established 'Hi! I'm Alex. Let's begin with the first question.
YOUR QUESTIONS (ask these in order):
1. [First question]
2. [Second question]
3. [Third question]
... [all questions]
CONVERSATION RULES:
- Ask ONE question at a time and WAIT for the complete answer
- Keep responses SHORT (1-2 sentences max)
- If interrupted, STOP talking immediately and listen
- After each answer, briefly acknowledge (e.g., 'Great!', 'I see', 'Thanks') then ask the next question
- Use a warm, conversational tone
- End with: 'Thanks for your time today!'
Remember: Listen actively, don't interrupt, and keep it conversational."
Output Format (JSON):
{{
"questions_markdown": "# Interview Questions\\n\\n1. [Question 1]\\n2. [Question 2]...",
"system_instruction": "[Complete system instruction with embedded questions list as shown above]"
}}
""",
expected_output="A JSON object with 'questions_markdown' (formatted list) and 'system_instruction' (concise prompt with embedded questions, under 300 words).",
agent=agent,
context=[tech_task, psych_task]
)
def run_interview_crew(jd_text, cv_text, num_questions, api_key):
agents = InterviewAgents(api_key)
tasks = InterviewTasks(jd_text, cv_text, num_questions)
tech_agent = agents.technical_interviewer()
psych_agent = agents.personality_interviewer()
director_agent = agents.interview_director()
tech_task = tasks.generate_technical_questions(tech_agent)
psych_task = tasks.generate_personality_questions(psych_agent)
compile_task = tasks.compile_interview(director_agent, tech_task, psych_task)
crew = Crew(
agents=[tech_agent, psych_agent, director_agent],
tasks=[tech_task, psych_task, compile_task],
process=Process.sequential,
verbose=True
)
result = crew.kickoff()
return result
# --- Custom Tools ---
class SentimentAnalysisTool(BaseTool):
name: str = "Sentiment Analysis Tool"
description: str = "Analyzes the sentiment of a given text. Returns 'Positive', 'Negative', or 'Neutral'. Use this to gauge the candidate's attitude."
def _run(self, text: str) -> str:
async def call_mcp(text_input):
sse_url = "https://uq-sentimentanalysismcpserver.hf.space/gradio_api/mcp/sse"
try:
async with sse_client(sse_url) as (read, write):
async with ClientSession(read, write) as session:
await session.initialize()
result = await session.call_tool(
"SentimentAnalysisMCPserver_predict_sentiment",
arguments={"text": text_input}
)
if result.content and len(result.content) > 0:
return result.content[0].text
return "Error: No content returned"
except Exception as e:
return f"Error connecting to MCP: {str(e)}"
try:
# Create a new event loop if one doesn't exist, or use the existing one if compatible
# Since this is running in a thread (via asyncio.to_thread in app.py),
# we should be able to use asyncio.run() if no loop is running in this thread.
# However, to be safe with nested loops or existing loops:
try:
loop = asyncio.get_event_loop()
if loop.is_running():
# This is tricky if we are in a sync method called from an async context/loop.
# But app.py uses asyncio.to_thread, which runs in a separate thread.
# That thread likely doesn't have a running loop unless we started one.
# asyncio.to_thread runs in a ThreadPoolExecutor.
return loop.run_until_complete(call_mcp(text))
else:
return loop.run_until_complete(call_mcp(text))
except RuntimeError:
return asyncio.run(call_mcp(text))
except Exception as e:
return f"Error analyzing sentiment: {str(e)}"
# --- Evaluation Agents ---
def get_evaluation_llm(api_key):
return LLM(
model="gemini/gemini-2.5-flash",
api_key=api_key,
temperature=0.7 # Lower temperature for more consistent evaluation
)
class EvaluationAgents:
def __init__(self, api_key):
self.llm = get_evaluation_llm(api_key)
self.sentiment_tool = SentimentAnalysisTool()
def technical_evaluator(self):
return Agent(
role='Technical Skills Evaluator',
goal='Evaluate the candidate\'s technical skills and knowledge based on their interview responses.',
backstory='You are an expert technical recruiter with deep knowledge in assessing technical competencies. You analyze answers for depth, accuracy, and practical application of skills.',
llm=self.llm,
verbose=True
)
def behavioral_evaluator(self):
return Agent(
role='Behavioral & Culture Fit Evaluator',
goal='Assess the candidate\'s soft skills, communication, and cultural fit based on behavioral questions.',
backstory='You are an organizational psychologist specializing in evaluating interpersonal skills, problem-solving approaches, and alignment with company culture. You look for evidence of leadership, teamwork, and adaptability.',
tools=[self.sentiment_tool],
llm=self.llm,
verbose=True
)
def evaluation_director(self):
return Agent(
role='Evaluation Director',
goal='Compile a comprehensive scorecard with scores, feedback, and hiring recommendation.',
backstory='You are the Lead Evaluator responsible for synthesizing all evaluation inputs into a clear, actionable scorecard. You ensure fairness and consistency in scoring.',
llm=self.llm,
reasoning=True,
memory=True,
verbose=True
)
class EvaluationTasks:
def __init__(self, transcript, jd_text, questions_text):
self.transcript = transcript
self.jd_text = jd_text
self.questions_text = questions_text
def evaluate_technical_skills(self, agent):
return Task(
description=f"""
Evaluate the candidate's technical performance based on the interview transcript.
TRANSCRIPT:
{self.transcript[:3000]}
JOB DESCRIPTION:
{self.jd_text[:2000]}
INTERVIEW QUESTIONS:
{self.questions_text[:2000]}
TASK:
**CRITICAL: ONLY evaluate TECHNICAL questions. Identify which questions are technical (related to hard skills, technologies, tools, programming, systems, etc.) and ONLY score those.**
For EACH technical question identified, provide:
1. The exact question text
2. Technical competency score (0-10)
3. Detailed feedback on technical knowledge, problem-solving approach, and depth of understanding
Also provide:
4. Overall technical strengths
5. Overall technical weaknesses
6. Alignment with job requirements
Focus on:
- Accuracy and correctness of technical answers
- Depth of knowledge demonstrated
- Problem-solving methodology
- Practical application of skills
- Communication of technical concepts
**DO NOT evaluate behavioral, personality, or soft skills questions. Only technical questions.**
""",
expected_output="A detailed technical evaluation with scores (0-10) and feedback for EACH TECHNICAL QUESTION ONLY. Format: For each technical question, provide: Question | Score (0-10) | Feedback. Plus overall technical strengths and weaknesses.",
agent=agent
)
def evaluate_behavioral_skills(self, agent):
return Task(
description=f"""
Evaluate the candidate's behavioral and soft skills based on the interview transcript.
TRANSCRIPT:
{self.transcript[:3000]}
JOB DESCRIPTION:
{self.jd_text[:2000]}
INTERVIEW QUESTIONS:
{self.questions_text[:2000]}
TASK:
**CRITICAL: ONLY evaluate BEHAVIORAL/SOFT SKILLS questions. Identify which questions are behavioral (related to past experiences, teamwork, leadership, culture fit, problem-solving scenarios, etc.) and ONLY score those.**
For EACH behavioral question identified, provide:
1. The exact question text
2. Behavioral competency score (0-10)
3. Sentiment Analysis: Use the 'Sentiment Analysis Tool' to analyze the candidate's answer. Include the result (Positive/Negative/Neutral) in your evaluation.
4. Detailed feedback on communication, examples shared, and soft skills demonstrated
Also provide:
4. Assessment of communication skills, leadership, teamwork, and adaptability
5. Cultural fit evaluation
6. Examples of demonstrated soft skills
Focus on:
- Quality of examples and stories shared
- Problem-solving approach in real situations
- Interpersonal skills and communication clarity
- Alignment with company values and culture
- Emotional intelligence and self-awareness
**DO NOT evaluate technical, programming, or hard skills questions. Only behavioral/soft skills questions.**
""",
expected_output="A detailed behavioral evaluation with scores (0-10) and feedback for EACH BEHAVIORAL QUESTION ONLY. Format: For each behavioral question, provide: Question | Score (0-10) | Feedback. Plus overall soft skills assessment and culture fit analysis.",
agent=agent
)
def compile_scorecard(self, agent, tech_task, behavioral_task):
return Task(
description=f"""
Compile a comprehensive interview scorecard from technical and behavioral evaluations.
You have received evaluations from:
1. Technical Evaluator - evaluated ONLY technical questions
2. Behavioral Evaluator - evaluated ONLY behavioral questions
CRITICAL REQUIREMENTS:
1. SUMMARY:
- Brief overview of candidate performance (2-3 sentences)
2. SCORECARD TABLE:
- Create a markdown table with columns: Question | Category | Score (0-10) | Feedback
- **CRITICAL: Each question must appear EXACTLY ONCE in the table - NO DUPLICATES**
- Merge the two evaluations: take technical questions from Technical Evaluator's output, behavioral questions from Behavioral Evaluator's output
- For each technical question: Use the exact question text, Category = "Technical", and the score/feedback from Technical Evaluator
- For each behavioral question: Use the exact question text, Category = "Behavioral", and the score/feedback from Behavioral Evaluator
- If a question appears in both evaluations, that's an error - each question should only be in one category
- List all questions in the order they appear in the interview
3. OVERALL SCORES:
- Average Technical Score
- Average Behavioral Score
- Overall Score
4. STRENGTHS:
- List 3-5 key strengths demonstrated
5. AREAS FOR IMPROVEMENT:
- List 2-4 areas where the candidate could improve
6. FINAL DECISION:
- One of: "Strong Hire", "Hire", "No Hire"
- Brief justification (1-2 sentences)
Output Format (Markdown):
# Interview Scorecard
## Summary
[Brief overview]
## Scorecard
| Question | Category | Score | Feedback |
|----------|----------|-------|----------|
| [Q1] | Technical | X/10 | [Feedback] |
...
## Overall Scores
- **Technical Average**: X/10
- **Behavioral Average**: X/10
- **Overall Score**: X/10
## Strengths
1. [Strength 1]
2. [Strength 2]
...
## Areas for Improvement
1. [Area 1]
2. [Area 2]
...
## Final Decision
**Decision**: [Strong Hire/Hire/No Hire]
[Justification]
""",
expected_output="A comprehensive markdown scorecard with summary, detailed table, scores, strengths, weaknesses, and hiring recommendation.",
agent=agent,
context=[tech_task, behavioral_task]
)
def run_evaluation_crew(transcript, jd_text, questions_text, api_key):
"""Run CrewAI evaluation crew to generate scorecard"""
agents = EvaluationAgents(api_key)
tasks = EvaluationTasks(transcript, jd_text, questions_text)
tech_evaluator = agents.technical_evaluator()
behavioral_evaluator = agents.behavioral_evaluator()
director = agents.evaluation_director()
tech_task = tasks.evaluate_technical_skills(tech_evaluator)
behavioral_task = tasks.evaluate_behavioral_skills(behavioral_evaluator)
compile_task = tasks.compile_scorecard(director, tech_task, behavioral_task)
crew = Crew(
agents=[tech_evaluator, behavioral_evaluator, director],
tasks=[tech_task, behavioral_task, compile_task],
process=Process.sequential,
verbose=True
)
result = crew.kickoff()
return result