|
|
import os |
|
|
import random |
|
|
from crewai import Agent, Task, Crew, Process, LLM |
|
|
from crewai_tools import SerperDevTool |
|
|
from crewai.tools import BaseTool |
|
|
import asyncio |
|
|
from mcp import ClientSession |
|
|
from mcp.client.sse import sse_client |
|
|
|
|
|
|
|
|
def get_llm(api_key): |
|
|
return LLM( |
|
|
model="gemini/gemini-2.5-flash", |
|
|
api_key=api_key, |
|
|
temperature=0.9 |
|
|
) |
|
|
|
|
|
class InterviewAgents: |
|
|
def __init__(self, api_key): |
|
|
self.llm = get_llm(api_key) |
|
|
self.serper_tool = SerperDevTool() |
|
|
|
|
|
def technical_interviewer(self): |
|
|
return Agent( |
|
|
role='Technical Interviewer', |
|
|
goal='Analyze the Job Description and CV to generate relevant technical interview questions.', |
|
|
backstory='You are an expert technical recruiter with years of experience in assessing candidate skills against job requirements. You focus on hard skills and technical proficiency.', |
|
|
llm=self.llm, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
def personality_interviewer(self): |
|
|
return Agent( |
|
|
role='Personality & Culture Fit Specialist', |
|
|
goal='Generate behavioral and personality-based interview questions using online resources to ensure best practices.', |
|
|
backstory='You are an organizational psychologist specializing in culture fit and soft skills. You use data-driven approaches and current trends to ask meaningful behavioral questions.', |
|
|
tools=[self.serper_tool], |
|
|
llm=self.llm, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
def interview_director(self): |
|
|
return Agent( |
|
|
role='Interview Director', |
|
|
goal='Compile the final interview plan and system instructions.', |
|
|
backstory='You are the Lead Interviewer. You oversee the process and ensure a balanced interview. You combine inputs from technical and personality specialists to create a cohesive interview script.', |
|
|
llm=self.llm, |
|
|
reasoning=True, |
|
|
memory=True, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
class InterviewTasks: |
|
|
def __init__(self, jd_text, cv_text, num_questions): |
|
|
self.jd_text = jd_text |
|
|
self.cv_text = cv_text |
|
|
self.num_questions = num_questions |
|
|
self.n_tech = max(1, round(num_questions * 0.8)) |
|
|
self.n_psych = max(1, num_questions - self.n_tech) |
|
|
|
|
|
def generate_technical_questions(self, agent): |
|
|
|
|
|
seed = random.randint(1000, 9999) |
|
|
return Task( |
|
|
description=f""" |
|
|
Analyze the following Job Description (JD) and Curriculum Vitae (CV). |
|
|
JD: {self.jd_text[:2000]}... |
|
|
CV: {self.cv_text[:2000]}... |
|
|
|
|
|
IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_tech} UNIQUE technical interview questions. |
|
|
- Each question should be DIFFERENT from common interview questions |
|
|
- Focus on specific skills mentioned in the JD |
|
|
- Ask about practical scenarios or real-world applications |
|
|
- Keep questions VERY SHORT (max 15 words) for voice conversation |
|
|
- Make questions open-ended to encourage discussion |
|
|
- Vary question types: scenario-based, problem-solving, experience-based |
|
|
|
|
|
Example formats: |
|
|
- "How would you handle [specific technical scenario]?" |
|
|
- "Describe your experience with [technology]." |
|
|
- "What's your approach to [technical challenge]?" |
|
|
""", |
|
|
expected_output=f"A list of {self.n_tech} unique, concise technical questions (max 15 words each).", |
|
|
agent=agent |
|
|
) |
|
|
|
|
|
def generate_personality_questions(self, agent): |
|
|
|
|
|
seed = random.randint(1000, 9999) |
|
|
return Task( |
|
|
description=f""" |
|
|
Analyze the JD and CV to understand the company culture and required soft skills. |
|
|
JD: {self.jd_text[:2000]}... |
|
|
|
|
|
IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_psych} UNIQUE behavioral/personality questions. |
|
|
- Use the Serper tool to find CURRENT, trending behavioral interview questions |
|
|
- Avoid cliché questions like "What's your greatest weakness?" |
|
|
- Focus on real scenarios and past experiences |
|
|
- Keep questions VERY SHORT (max 15 words) for voice conversation |
|
|
- Make questions conversational and natural |
|
|
|
|
|
Example formats: |
|
|
- "Tell me about a time you faced [specific challenge]." |
|
|
- "How do you handle [workplace situation]?" |
|
|
- "Describe a situation where you [behavioral trait]." |
|
|
""", |
|
|
expected_output=f"A list of {self.n_psych} unique, concise behavioral questions (max 15 words each).", |
|
|
agent=agent |
|
|
) |
|
|
|
|
|
def compile_interview(self, agent, tech_task, psych_task): |
|
|
return Task( |
|
|
description=f""" |
|
|
Compile the final interview plan from the technical and personality questions. |
|
|
|
|
|
CRITICAL REQUIREMENTS: |
|
|
|
|
|
1. QUESTIONS LIST: |
|
|
- Combine all questions into a single numbered list |
|
|
- Total must be exactly {self.num_questions} questions |
|
|
- Mix: ~80% technical, ~20% behavioral |
|
|
|
|
|
2. SYSTEM INSTRUCTION (MUST BE CONCISE AND CONVERSATIONAL): |
|
|
Create a SHORT, natural system prompt for a voice AI interviewer. |
|
|
|
|
|
**IMPORTANT: The system instruction MUST include the complete list of questions to ask.** |
|
|
|
|
|
Format the system instruction like this: |
|
|
|
|
|
"You are Alex, a friendly professional interviewer conducting a voice interview. Start with as soon as the connection is established 'Hi! I'm Alex. Let's begin with the first question. |
|
|
|
|
|
YOUR QUESTIONS (ask these in order): |
|
|
1. [First question] |
|
|
2. [Second question] |
|
|
3. [Third question] |
|
|
... [all questions] |
|
|
|
|
|
CONVERSATION RULES: |
|
|
- Ask ONE question at a time and WAIT for the complete answer |
|
|
- Keep responses SHORT (1-2 sentences max) |
|
|
- If interrupted, STOP talking immediately and listen |
|
|
- After each answer, briefly acknowledge (e.g., 'Great!', 'I see', 'Thanks') then ask the next question |
|
|
- Use a warm, conversational tone |
|
|
- End with: 'Thanks for your time today!' |
|
|
|
|
|
Remember: Listen actively, don't interrupt, and keep it conversational." |
|
|
|
|
|
Output Format (JSON): |
|
|
{{ |
|
|
"questions_markdown": "# Interview Questions\\n\\n1. [Question 1]\\n2. [Question 2]...", |
|
|
"system_instruction": "[Complete system instruction with embedded questions list as shown above]" |
|
|
}} |
|
|
""", |
|
|
expected_output="A JSON object with 'questions_markdown' (formatted list) and 'system_instruction' (concise prompt with embedded questions, under 300 words).", |
|
|
agent=agent, |
|
|
context=[tech_task, psych_task] |
|
|
) |
|
|
|
|
|
def run_interview_crew(jd_text, cv_text, num_questions, api_key): |
|
|
agents = InterviewAgents(api_key) |
|
|
tasks = InterviewTasks(jd_text, cv_text, num_questions) |
|
|
|
|
|
tech_agent = agents.technical_interviewer() |
|
|
psych_agent = agents.personality_interviewer() |
|
|
director_agent = agents.interview_director() |
|
|
|
|
|
tech_task = tasks.generate_technical_questions(tech_agent) |
|
|
psych_task = tasks.generate_personality_questions(psych_agent) |
|
|
compile_task = tasks.compile_interview(director_agent, tech_task, psych_task) |
|
|
|
|
|
crew = Crew( |
|
|
agents=[tech_agent, psych_agent, director_agent], |
|
|
tasks=[tech_task, psych_task, compile_task], |
|
|
process=Process.sequential, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
result = crew.kickoff() |
|
|
return result |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SentimentAnalysisTool(BaseTool): |
|
|
name: str = "Sentiment Analysis Tool" |
|
|
description: str = "Analyzes the sentiment of a given text. Returns 'Positive', 'Negative', or 'Neutral'. Use this to gauge the candidate's attitude." |
|
|
|
|
|
def _run(self, text: str) -> str: |
|
|
async def call_mcp(text_input): |
|
|
sse_url = "https://uq-sentimentanalysismcpserver.hf.space/gradio_api/mcp/sse" |
|
|
try: |
|
|
async with sse_client(sse_url) as (read, write): |
|
|
async with ClientSession(read, write) as session: |
|
|
await session.initialize() |
|
|
result = await session.call_tool( |
|
|
"SentimentAnalysisMCPserver_predict_sentiment", |
|
|
arguments={"text": text_input} |
|
|
) |
|
|
if result.content and len(result.content) > 0: |
|
|
return result.content[0].text |
|
|
return "Error: No content returned" |
|
|
except Exception as e: |
|
|
return f"Error connecting to MCP: {str(e)}" |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
loop = asyncio.get_event_loop() |
|
|
if loop.is_running(): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return loop.run_until_complete(call_mcp(text)) |
|
|
else: |
|
|
return loop.run_until_complete(call_mcp(text)) |
|
|
except RuntimeError: |
|
|
return asyncio.run(call_mcp(text)) |
|
|
|
|
|
except Exception as e: |
|
|
return f"Error analyzing sentiment: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
def get_evaluation_llm(api_key): |
|
|
return LLM( |
|
|
model="gemini/gemini-2.5-flash", |
|
|
api_key=api_key, |
|
|
temperature=0.7 |
|
|
) |
|
|
|
|
|
class EvaluationAgents: |
|
|
def __init__(self, api_key): |
|
|
self.llm = get_evaluation_llm(api_key) |
|
|
self.sentiment_tool = SentimentAnalysisTool() |
|
|
|
|
|
def technical_evaluator(self): |
|
|
return Agent( |
|
|
role='Technical Skills Evaluator', |
|
|
goal='Evaluate the candidate\'s technical skills and knowledge based on their interview responses.', |
|
|
backstory='You are an expert technical recruiter with deep knowledge in assessing technical competencies. You analyze answers for depth, accuracy, and practical application of skills.', |
|
|
llm=self.llm, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
def behavioral_evaluator(self): |
|
|
return Agent( |
|
|
role='Behavioral & Culture Fit Evaluator', |
|
|
goal='Assess the candidate\'s soft skills, communication, and cultural fit based on behavioral questions.', |
|
|
backstory='You are an organizational psychologist specializing in evaluating interpersonal skills, problem-solving approaches, and alignment with company culture. You look for evidence of leadership, teamwork, and adaptability.', |
|
|
tools=[self.sentiment_tool], |
|
|
llm=self.llm, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
def evaluation_director(self): |
|
|
return Agent( |
|
|
role='Evaluation Director', |
|
|
goal='Compile a comprehensive scorecard with scores, feedback, and hiring recommendation.', |
|
|
backstory='You are the Lead Evaluator responsible for synthesizing all evaluation inputs into a clear, actionable scorecard. You ensure fairness and consistency in scoring.', |
|
|
llm=self.llm, |
|
|
reasoning=True, |
|
|
memory=True, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
class EvaluationTasks: |
|
|
def __init__(self, transcript, jd_text, questions_text): |
|
|
self.transcript = transcript |
|
|
self.jd_text = jd_text |
|
|
self.questions_text = questions_text |
|
|
|
|
|
def evaluate_technical_skills(self, agent): |
|
|
return Task( |
|
|
description=f""" |
|
|
Evaluate the candidate's technical performance based on the interview transcript. |
|
|
|
|
|
TRANSCRIPT: |
|
|
{self.transcript[:3000]} |
|
|
|
|
|
JOB DESCRIPTION: |
|
|
{self.jd_text[:2000]} |
|
|
|
|
|
INTERVIEW QUESTIONS: |
|
|
{self.questions_text[:2000]} |
|
|
|
|
|
TASK: |
|
|
**CRITICAL: ONLY evaluate TECHNICAL questions. Identify which questions are technical (related to hard skills, technologies, tools, programming, systems, etc.) and ONLY score those.** |
|
|
|
|
|
For EACH technical question identified, provide: |
|
|
1. The exact question text |
|
|
2. Technical competency score (0-10) |
|
|
3. Detailed feedback on technical knowledge, problem-solving approach, and depth of understanding |
|
|
|
|
|
Also provide: |
|
|
4. Overall technical strengths |
|
|
5. Overall technical weaknesses |
|
|
6. Alignment with job requirements |
|
|
|
|
|
Focus on: |
|
|
- Accuracy and correctness of technical answers |
|
|
- Depth of knowledge demonstrated |
|
|
- Problem-solving methodology |
|
|
- Practical application of skills |
|
|
- Communication of technical concepts |
|
|
|
|
|
**DO NOT evaluate behavioral, personality, or soft skills questions. Only technical questions.** |
|
|
""", |
|
|
expected_output="A detailed technical evaluation with scores (0-10) and feedback for EACH TECHNICAL QUESTION ONLY. Format: For each technical question, provide: Question | Score (0-10) | Feedback. Plus overall technical strengths and weaknesses.", |
|
|
agent=agent |
|
|
) |
|
|
|
|
|
def evaluate_behavioral_skills(self, agent): |
|
|
return Task( |
|
|
description=f""" |
|
|
Evaluate the candidate's behavioral and soft skills based on the interview transcript. |
|
|
|
|
|
TRANSCRIPT: |
|
|
{self.transcript[:3000]} |
|
|
|
|
|
JOB DESCRIPTION: |
|
|
{self.jd_text[:2000]} |
|
|
|
|
|
INTERVIEW QUESTIONS: |
|
|
{self.questions_text[:2000]} |
|
|
|
|
|
TASK: |
|
|
**CRITICAL: ONLY evaluate BEHAVIORAL/SOFT SKILLS questions. Identify which questions are behavioral (related to past experiences, teamwork, leadership, culture fit, problem-solving scenarios, etc.) and ONLY score those.** |
|
|
|
|
|
For EACH behavioral question identified, provide: |
|
|
1. The exact question text |
|
|
2. Behavioral competency score (0-10) |
|
|
3. Sentiment Analysis: Use the 'Sentiment Analysis Tool' to analyze the candidate's answer. Include the result (Positive/Negative/Neutral) in your evaluation. |
|
|
4. Detailed feedback on communication, examples shared, and soft skills demonstrated |
|
|
|
|
|
Also provide: |
|
|
4. Assessment of communication skills, leadership, teamwork, and adaptability |
|
|
5. Cultural fit evaluation |
|
|
6. Examples of demonstrated soft skills |
|
|
|
|
|
Focus on: |
|
|
- Quality of examples and stories shared |
|
|
- Problem-solving approach in real situations |
|
|
- Interpersonal skills and communication clarity |
|
|
- Alignment with company values and culture |
|
|
- Emotional intelligence and self-awareness |
|
|
|
|
|
**DO NOT evaluate technical, programming, or hard skills questions. Only behavioral/soft skills questions.** |
|
|
""", |
|
|
expected_output="A detailed behavioral evaluation with scores (0-10) and feedback for EACH BEHAVIORAL QUESTION ONLY. Format: For each behavioral question, provide: Question | Score (0-10) | Feedback. Plus overall soft skills assessment and culture fit analysis.", |
|
|
agent=agent |
|
|
) |
|
|
|
|
|
def compile_scorecard(self, agent, tech_task, behavioral_task): |
|
|
return Task( |
|
|
description=f""" |
|
|
Compile a comprehensive interview scorecard from technical and behavioral evaluations. |
|
|
|
|
|
You have received evaluations from: |
|
|
1. Technical Evaluator - evaluated ONLY technical questions |
|
|
2. Behavioral Evaluator - evaluated ONLY behavioral questions |
|
|
|
|
|
CRITICAL REQUIREMENTS: |
|
|
|
|
|
1. SUMMARY: |
|
|
- Brief overview of candidate performance (2-3 sentences) |
|
|
|
|
|
2. SCORECARD TABLE: |
|
|
- Create a markdown table with columns: Question | Category | Score (0-10) | Feedback |
|
|
- **CRITICAL: Each question must appear EXACTLY ONCE in the table - NO DUPLICATES** |
|
|
- Merge the two evaluations: take technical questions from Technical Evaluator's output, behavioral questions from Behavioral Evaluator's output |
|
|
- For each technical question: Use the exact question text, Category = "Technical", and the score/feedback from Technical Evaluator |
|
|
- For each behavioral question: Use the exact question text, Category = "Behavioral", and the score/feedback from Behavioral Evaluator |
|
|
- If a question appears in both evaluations, that's an error - each question should only be in one category |
|
|
- List all questions in the order they appear in the interview |
|
|
|
|
|
3. OVERALL SCORES: |
|
|
- Average Technical Score |
|
|
- Average Behavioral Score |
|
|
- Overall Score |
|
|
|
|
|
4. STRENGTHS: |
|
|
- List 3-5 key strengths demonstrated |
|
|
|
|
|
5. AREAS FOR IMPROVEMENT: |
|
|
- List 2-4 areas where the candidate could improve |
|
|
|
|
|
6. FINAL DECISION: |
|
|
- One of: "Strong Hire", "Hire", "No Hire" |
|
|
- Brief justification (1-2 sentences) |
|
|
|
|
|
Output Format (Markdown): |
|
|
# Interview Scorecard |
|
|
|
|
|
## Summary |
|
|
[Brief overview] |
|
|
|
|
|
## Scorecard |
|
|
| Question | Category | Score | Feedback | |
|
|
|----------|----------|-------|----------| |
|
|
| [Q1] | Technical | X/10 | [Feedback] | |
|
|
... |
|
|
|
|
|
## Overall Scores |
|
|
- **Technical Average**: X/10 |
|
|
- **Behavioral Average**: X/10 |
|
|
- **Overall Score**: X/10 |
|
|
|
|
|
## Strengths |
|
|
1. [Strength 1] |
|
|
2. [Strength 2] |
|
|
... |
|
|
|
|
|
## Areas for Improvement |
|
|
1. [Area 1] |
|
|
2. [Area 2] |
|
|
... |
|
|
|
|
|
## Final Decision |
|
|
**Decision**: [Strong Hire/Hire/No Hire] |
|
|
|
|
|
[Justification] |
|
|
""", |
|
|
expected_output="A comprehensive markdown scorecard with summary, detailed table, scores, strengths, weaknesses, and hiring recommendation.", |
|
|
agent=agent, |
|
|
context=[tech_task, behavioral_task] |
|
|
) |
|
|
|
|
|
def run_evaluation_crew(transcript, jd_text, questions_text, api_key): |
|
|
"""Run CrewAI evaluation crew to generate scorecard""" |
|
|
agents = EvaluationAgents(api_key) |
|
|
tasks = EvaluationTasks(transcript, jd_text, questions_text) |
|
|
|
|
|
tech_evaluator = agents.technical_evaluator() |
|
|
behavioral_evaluator = agents.behavioral_evaluator() |
|
|
director = agents.evaluation_director() |
|
|
|
|
|
tech_task = tasks.evaluate_technical_skills(tech_evaluator) |
|
|
behavioral_task = tasks.evaluate_behavioral_skills(behavioral_evaluator) |
|
|
compile_task = tasks.compile_scorecard(director, tech_task, behavioral_task) |
|
|
|
|
|
crew = Crew( |
|
|
agents=[tech_evaluator, behavioral_evaluator, director], |
|
|
tasks=[tech_task, behavioral_task, compile_task], |
|
|
process=Process.sequential, |
|
|
verbose=True |
|
|
) |
|
|
|
|
|
result = crew.kickoff() |
|
|
return result |
|
|
|