Final_Assignment_Template

Sleeping

File size: 3,616 Bytes

# multiagent.py — GAIA-compliant multi-agent system using Groq (patched)
import os
import dotenv

from smolagents import CodeAgent, PythonInterpreterTool
from smolagents.models.openai_server_model import OpenAIServerModel as BaseOpenAIServerModel

from tools.fetch import fetch_webpage, search_web
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description
from tools.stt import get_text_transcript_from_audio_file
from tools.image import analyze_image
from common.mylogger import mylog
import myprompts

# ✅ Load .env
dotenv.load_dotenv()

# ✅ Monkeypatch: Ensure message['content'] is always a string
class PatchedOpenAIServerModel(BaseOpenAIServerModel):
    def complete_chat(self, messages, **kwargs):
        for msg in messages:
            if not isinstance(msg.get("content", ""), str):
                msg["content"] = str(msg["content"])
        return super().complete_chat(messages, **kwargs)

# ✅ Groq model (OpenAI-compatible)
groq_model = PatchedOpenAIServerModel(
    model_id="llama3-70b-8192",
    api_key=os.environ["GROQ_API_KEY"],
    api_base="https://api.groq.com/openai/v1",
)

# ✅ Final answer checker
def check_final_answer(final_answer, agent_memory) -> bool:
    mylog("check_final_answer", final_answer)
    return len(str(final_answer)) <= 200

# ✅ Sub-agents
web_agent = CodeAgent(
    model=groq_model,
    tools=[search_web, fetch_webpage],
    name="web_agent",
    description="Uses search engine and scrapes webpages for content.",
    additional_authorized_imports=["pandas", "numpy", "bs4"],
    verbosity_level=1,
    max_steps=7,
)

audiovideo_agent = CodeAgent(
    model=groq_model,
    tools=[
        get_youtube_transcript,
        get_youtube_title_description,
        get_text_transcript_from_audio_file,
        analyze_image
    ],
    name="audiovideo_agent",
    description="Extracts data from audio, video, or images.",
    additional_authorized_imports=["pandas", "numpy", "bs4", "requests"],
    verbosity_level=1,
    max_steps=7,
)

# ✅ Manager agent
manager_agent = CodeAgent(
    model=groq_model,
    tools=[PythonInterpreterTool()],
    managed_agents=[web_agent, audiovideo_agent],
    name="manager_agent",
    description="Coordinates other agents and returns a final answer.",
    additional_authorized_imports=["pandas", "numpy", "bs4"],
    planning_interval=5,
    verbosity_level=2,
    final_answer_checks=[check_final_answer],
    max_steps=15,
)

# ✅ Multi-agent interface
class MultiAgent:
    def __init__(self):
        print("MultiAgent initialized.")

    def __call__(self, question: str) -> str:
        mylog(self.__class__.__name__, question)

        try:
            prefix = """
You are the top agent of a multi-agent system that can answer questions by coordinating the work of other agents.
You can use the web_agent to search the web, or the audiovideo_agent to extract info from audio/video/images.
You must reason step by step and respect the required output format.
Only return the final answer in the correct format.
"""
            prompt = prefix.strip() + "\nTHE QUESTION:\n" + question.strip() + "\n" + myprompts.output_format.strip()
            answer = manager_agent.run(prompt)
            return answer
        except Exception as e:
            error = f"An error occurred while processing the question: {e}"
            print(error)
            return error

# ✅ Local test
if __name__ == "__main__":
    question = "What was the actual enrollment of the Malko competition in 2023?"
    agent = MultiAgent()
    answer = agent(question)
    print(f"Answer: {answer}")