Spaces:

MCP-1st-Birthday
/

AgenticInterviewer

Running

File size: 21,363 Bytes

import os
import random
from crewai import Agent, Task, Crew, Process, LLM
from crewai_tools import SerperDevTool
from crewai.tools import BaseTool
import asyncio
from mcp import ClientSession
from mcp.client.sse import sse_client

# Define the LLM with higher temperature for variety
def get_llm(api_key):
    return LLM(
        model="gemini/gemini-2.5-flash",
        api_key=api_key,
        temperature=0.9  # Higher temperature for more creative/varied questions
    )

class InterviewAgents:
    def __init__(self, api_key):
        self.llm = get_llm(api_key)
        self.serper_tool = SerperDevTool()

    def technical_interviewer(self):
        return Agent(
            role='Technical Interviewer',
            goal='Analyze the Job Description and CV to generate relevant technical interview questions.',
            backstory='You are an expert technical recruiter with years of experience in assessing candidate skills against job requirements. You focus on hard skills and technical proficiency.',
            llm=self.llm,
            verbose=True
        )

    def personality_interviewer(self):
        return Agent(
            role='Personality & Culture Fit Specialist',
            goal='Generate behavioral and personality-based interview questions using online resources to ensure best practices.',
            backstory='You are an organizational psychologist specializing in culture fit and soft skills. You use data-driven approaches and current trends to ask meaningful behavioral questions.',
            tools=[self.serper_tool],
            llm=self.llm,
            verbose=True
        )

    def interview_director(self):
        return Agent(
            role='Interview Director',
            goal='Compile the final interview plan and system instructions.',
            backstory='You are the Lead Interviewer. You oversee the process and ensure a balanced interview. You combine inputs from technical and personality specialists to create a cohesive interview script.',
            llm=self.llm,
            reasoning=True,
            memory=True,
            verbose=True
        )

class InterviewTasks:
    def __init__(self, jd_text, cv_text, num_questions):
        self.jd_text = jd_text
        self.cv_text = cv_text
        self.num_questions = num_questions
        self.n_tech = max(1, round(num_questions * 0.8))
        self.n_psych = max(1, num_questions - self.n_tech)

    def generate_technical_questions(self, agent):
        # Add randomization for variety
        seed = random.randint(1000, 9999)
        return Task(
            description=f"""
                Analyze the following Job Description (JD) and Curriculum Vitae (CV).
                JD: {self.jd_text[:2000]}...
                CV: {self.cv_text[:2000]}...
                
                IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_tech} UNIQUE technical interview questions.
                - Each question should be DIFFERENT from common interview questions
                - Focus on specific skills mentioned in the JD
                - Ask about practical scenarios or real-world applications
                - Keep questions VERY SHORT (max 15 words) for voice conversation
                - Make questions open-ended to encourage discussion
                - Vary question types: scenario-based, problem-solving, experience-based
                
                Example formats:
                - "How would you handle [specific technical scenario]?"
                - "Describe your experience with [technology]."
                - "What's your approach to [technical challenge]?"
            """,
            expected_output=f"A list of {self.n_tech} unique, concise technical questions (max 15 words each).",
            agent=agent
        )

    def generate_personality_questions(self, agent):
        # Add randomization for variety
        seed = random.randint(1000, 9999)
        return Task(
            description=f"""
                Analyze the JD and CV to understand the company culture and required soft skills.
                JD: {self.jd_text[:2000]}...
                
                IMPORTANT: Use seed {seed} to ensure variety. Generate {self.n_psych} UNIQUE behavioral/personality questions.
                - Use the Serper tool to find CURRENT, trending behavioral interview questions
                - Avoid cliché questions like "What's your greatest weakness?"
                - Focus on real scenarios and past experiences
                - Keep questions VERY SHORT (max 15 words) for voice conversation
                - Make questions conversational and natural
                
                Example formats:
                - "Tell me about a time you faced [specific challenge]."
                - "How do you handle [workplace situation]?"
                - "Describe a situation where you [behavioral trait]."
            """,
            expected_output=f"A list of {self.n_psych} unique, concise behavioral questions (max 15 words each).",
            agent=agent
        )

    def compile_interview(self, agent, tech_task, psych_task):
        return Task(
            description=f"""
                Compile the final interview plan from the technical and personality questions.
                
                CRITICAL REQUIREMENTS:
                
                1. QUESTIONS LIST:
                   - Combine all questions into a single numbered list
                   - Total must be exactly {self.num_questions} questions
                   - Mix: ~80% technical, ~20% behavioral
                
                2. SYSTEM INSTRUCTION (MUST BE CONCISE AND CONVERSATIONAL):
                   Create a SHORT, natural system prompt for a voice AI interviewer.
                   
                   **IMPORTANT: The system instruction MUST include the complete list of questions to ask.**
                   
                   Format the system instruction like this:
                   
                   "You are Alex, a friendly professional interviewer conducting a voice interview. Start with as soon as the connection is established 'Hi! I'm Alex. Let's begin with the first question.
                   
                   YOUR QUESTIONS (ask these in order):
                   1. [First question]
                   2. [Second question]
                   3. [Third question]
                   ... [all questions]
                   
                   CONVERSATION RULES:
                   - Ask ONE question at a time and WAIT for the complete answer
                   - Keep responses SHORT (1-2 sentences max)
                   - If interrupted, STOP talking immediately and listen
                   - After each answer, briefly acknowledge (e.g., 'Great!', 'I see', 'Thanks') then ask the next question
                   - Use a warm, conversational tone
                   - End with: 'Thanks for your time today!'
                   
                   Remember: Listen actively, don't interrupt, and keep it conversational."
                
                Output Format (JSON):
                {{
                    "questions_markdown": "# Interview Questions\\n\\n1. [Question 1]\\n2. [Question 2]...",
                    "system_instruction": "[Complete system instruction with embedded questions list as shown above]"
                }}
            """,
            expected_output="A JSON object with 'questions_markdown' (formatted list) and 'system_instruction' (concise prompt with embedded questions, under 300 words).",
            agent=agent,
            context=[tech_task, psych_task]
        )

def run_interview_crew(jd_text, cv_text, num_questions, api_key):
    agents = InterviewAgents(api_key)
    tasks = InterviewTasks(jd_text, cv_text, num_questions)

    tech_agent = agents.technical_interviewer()
    psych_agent = agents.personality_interviewer()
    director_agent = agents.interview_director()

    tech_task = tasks.generate_technical_questions(tech_agent)
    psych_task = tasks.generate_personality_questions(psych_agent)
    compile_task = tasks.compile_interview(director_agent, tech_task, psych_task)

    crew = Crew(
        agents=[tech_agent, psych_agent, director_agent],
        tasks=[tech_task, psych_task, compile_task],
        process=Process.sequential,
        verbose=True
    )

    result = crew.kickoff()
    return result



# --- Custom Tools ---

class SentimentAnalysisTool(BaseTool):
    name: str = "Sentiment Analysis Tool"
    description: str = "Analyzes the sentiment of a given text. Returns 'Positive', 'Negative', or 'Neutral'. Use this to gauge the candidate's attitude."

    def _run(self, text: str) -> str:
        async def call_mcp(text_input):
            sse_url = "https://uq-sentimentanalysismcpserver.hf.space/gradio_api/mcp/sse"
            try:
                async with sse_client(sse_url) as (read, write):
                    async with ClientSession(read, write) as session:
                        await session.initialize()
                        result = await session.call_tool(
                            "SentimentAnalysisMCPserver_predict_sentiment",
                            arguments={"text": text_input}
                        )
                        if result.content and len(result.content) > 0:
                            return result.content[0].text
                        return "Error: No content returned"
            except Exception as e:
                return f"Error connecting to MCP: {str(e)}"
        
        try:
            # Create a new event loop if one doesn't exist, or use the existing one if compatible
            # Since this is running in a thread (via asyncio.to_thread in app.py), 
            # we should be able to use asyncio.run() if no loop is running in this thread.
            # However, to be safe with nested loops or existing loops:
            try:
                loop = asyncio.get_event_loop()
                if loop.is_running():
                     # This is tricky if we are in a sync method called from an async context/loop.
                     # But app.py uses asyncio.to_thread, which runs in a separate thread.
                     # That thread likely doesn't have a running loop unless we started one.
                     # asyncio.to_thread runs in a ThreadPoolExecutor.
                     return loop.run_until_complete(call_mcp(text))
                else:
                     return loop.run_until_complete(call_mcp(text))
            except RuntimeError:
                return asyncio.run(call_mcp(text))
                
        except Exception as e:
            return f"Error analyzing sentiment: {str(e)}"

# --- Evaluation Agents ---

def get_evaluation_llm(api_key):
    return LLM(
        model="gemini/gemini-2.5-flash",
        api_key=api_key,
        temperature=0.7  # Lower temperature for more consistent evaluation
    )

class EvaluationAgents:
    def __init__(self, api_key):
        self.llm = get_evaluation_llm(api_key)
        self.sentiment_tool = SentimentAnalysisTool()

    def technical_evaluator(self):
        return Agent(
            role='Technical Skills Evaluator',
            goal='Evaluate the candidate\'s technical skills and knowledge based on their interview responses.',
            backstory='You are an expert technical recruiter with deep knowledge in assessing technical competencies. You analyze answers for depth, accuracy, and practical application of skills.',
            llm=self.llm,
            verbose=True
        )

    def behavioral_evaluator(self):
        return Agent(
            role='Behavioral & Culture Fit Evaluator',
            goal='Assess the candidate\'s soft skills, communication, and cultural fit based on behavioral questions.',
            backstory='You are an organizational psychologist specializing in evaluating interpersonal skills, problem-solving approaches, and alignment with company culture. You look for evidence of leadership, teamwork, and adaptability.',
            tools=[self.sentiment_tool],
            llm=self.llm,
            verbose=True
        )

    def evaluation_director(self):
        return Agent(
            role='Evaluation Director',
            goal='Compile a comprehensive scorecard with scores, feedback, and hiring recommendation.',
            backstory='You are the Lead Evaluator responsible for synthesizing all evaluation inputs into a clear, actionable scorecard. You ensure fairness and consistency in scoring.',
            llm=self.llm,
            reasoning=True,
            memory=True,
            verbose=True
        )

class EvaluationTasks:
    def __init__(self, transcript, jd_text, questions_text):
        self.transcript = transcript
        self.jd_text = jd_text
        self.questions_text = questions_text

    def evaluate_technical_skills(self, agent):
        return Task(
            description=f"""
                Evaluate the candidate's technical performance based on the interview transcript.
                
                TRANSCRIPT:
                {self.transcript[:3000]}
                
                JOB DESCRIPTION:
                {self.jd_text[:2000]}
                
                INTERVIEW QUESTIONS:
                {self.questions_text[:2000]}
                
                TASK:
                **CRITICAL: ONLY evaluate TECHNICAL questions. Identify which questions are technical (related to hard skills, technologies, tools, programming, systems, etc.) and ONLY score those.**
                
                For EACH technical question identified, provide:
                1. The exact question text
                2. Technical competency score (0-10)
                3. Detailed feedback on technical knowledge, problem-solving approach, and depth of understanding
                
                Also provide:
                4. Overall technical strengths
                5. Overall technical weaknesses
                6. Alignment with job requirements
                
                Focus on:
                - Accuracy and correctness of technical answers
                - Depth of knowledge demonstrated
                - Problem-solving methodology
                - Practical application of skills
                - Communication of technical concepts
                
                **DO NOT evaluate behavioral, personality, or soft skills questions. Only technical questions.**
            """,
            expected_output="A detailed technical evaluation with scores (0-10) and feedback for EACH TECHNICAL QUESTION ONLY. Format: For each technical question, provide: Question | Score (0-10) | Feedback. Plus overall technical strengths and weaknesses.",
            agent=agent
        )

    def evaluate_behavioral_skills(self, agent):
        return Task(
            description=f"""
                Evaluate the candidate's behavioral and soft skills based on the interview transcript.
                
                TRANSCRIPT:
                {self.transcript[:3000]}
                
                JOB DESCRIPTION:
                {self.jd_text[:2000]}
                
                INTERVIEW QUESTIONS:
                {self.questions_text[:2000]}
                
                TASK:
                **CRITICAL: ONLY evaluate BEHAVIORAL/SOFT SKILLS questions. Identify which questions are behavioral (related to past experiences, teamwork, leadership, culture fit, problem-solving scenarios, etc.) and ONLY score those.**
                
                For EACH behavioral question identified, provide:
                1. The exact question text
                2. Behavioral competency score (0-10)
                3. Sentiment Analysis: Use the 'Sentiment Analysis Tool' to analyze the candidate's answer. Include the result (Positive/Negative/Neutral) in your evaluation.
                4. Detailed feedback on communication, examples shared, and soft skills demonstrated
                
                Also provide:
                4. Assessment of communication skills, leadership, teamwork, and adaptability
                5. Cultural fit evaluation
                6. Examples of demonstrated soft skills
                
                Focus on:
                - Quality of examples and stories shared
                - Problem-solving approach in real situations
                - Interpersonal skills and communication clarity
                - Alignment with company values and culture
                - Emotional intelligence and self-awareness
                
                **DO NOT evaluate technical, programming, or hard skills questions. Only behavioral/soft skills questions.**
            """,
            expected_output="A detailed behavioral evaluation with scores (0-10) and feedback for EACH BEHAVIORAL QUESTION ONLY. Format: For each behavioral question, provide: Question | Score (0-10) | Feedback. Plus overall soft skills assessment and culture fit analysis.",
            agent=agent
        )

    def compile_scorecard(self, agent, tech_task, behavioral_task):
        return Task(
            description=f"""
                Compile a comprehensive interview scorecard from technical and behavioral evaluations.
                
                You have received evaluations from:
                1. Technical Evaluator - evaluated ONLY technical questions
                2. Behavioral Evaluator - evaluated ONLY behavioral questions
                
                CRITICAL REQUIREMENTS:
                
                1. SUMMARY:
                   - Brief overview of candidate performance (2-3 sentences)
                
                2. SCORECARD TABLE:
                   - Create a markdown table with columns: Question | Category | Score (0-10) | Feedback
                   - **CRITICAL: Each question must appear EXACTLY ONCE in the table - NO DUPLICATES**
                   - Merge the two evaluations: take technical questions from Technical Evaluator's output, behavioral questions from Behavioral Evaluator's output
                   - For each technical question: Use the exact question text, Category = "Technical", and the score/feedback from Technical Evaluator
                   - For each behavioral question: Use the exact question text, Category = "Behavioral", and the score/feedback from Behavioral Evaluator
                   - If a question appears in both evaluations, that's an error - each question should only be in one category
                   - List all questions in the order they appear in the interview
                
                3. OVERALL SCORES:
                   - Average Technical Score
                   - Average Behavioral Score
                   - Overall Score
                
                4. STRENGTHS:
                   - List 3-5 key strengths demonstrated
                
                5. AREAS FOR IMPROVEMENT:
                   - List 2-4 areas where the candidate could improve
                
                6. FINAL DECISION:
                   - One of: "Strong Hire", "Hire", "No Hire"
                   - Brief justification (1-2 sentences)
                
                Output Format (Markdown):
                # Interview Scorecard
                
                ## Summary
                [Brief overview]
                
                ## Scorecard
                | Question | Category | Score | Feedback |
                |----------|----------|-------|----------|
                | [Q1] | Technical | X/10 | [Feedback] |
                ...
                
                ## Overall Scores
                - **Technical Average**: X/10
                - **Behavioral Average**: X/10
                - **Overall Score**: X/10
                
                ## Strengths
                1. [Strength 1]
                2. [Strength 2]
                ...
                
                ## Areas for Improvement
                1. [Area 1]
                2. [Area 2]
                ...
                
                ## Final Decision
                **Decision**: [Strong Hire/Hire/No Hire]
                
                [Justification]
            """,
            expected_output="A comprehensive markdown scorecard with summary, detailed table, scores, strengths, weaknesses, and hiring recommendation.",
            agent=agent,
            context=[tech_task, behavioral_task]
        )

def run_evaluation_crew(transcript, jd_text, questions_text, api_key):
    """Run CrewAI evaluation crew to generate scorecard"""
    agents = EvaluationAgents(api_key)
    tasks = EvaluationTasks(transcript, jd_text, questions_text)

    tech_evaluator = agents.technical_evaluator()
    behavioral_evaluator = agents.behavioral_evaluator()
    director = agents.evaluation_director()

    tech_task = tasks.evaluate_technical_skills(tech_evaluator)
    behavioral_task = tasks.evaluate_behavioral_skills(behavioral_evaluator)
    compile_task = tasks.compile_scorecard(director, tech_task, behavioral_task)

    crew = Crew(
        agents=[tech_evaluator, behavioral_evaluator, director],
        tasks=[tech_task, behavioral_task, compile_task],
        process=Process.sequential,
        verbose=True
    )

    result = crew.kickoff()
    return result