|
|
|
|
|
import os |
|
|
import dotenv |
|
|
|
|
|
from smolagents import CodeAgent, PythonInterpreterTool |
|
|
from smolagents.models.openai_server_model import OpenAIServerModel as BaseOpenAIServerModel |
|
|
|
|
|
from tools.fetch import fetch_webpage, search_web |
|
|
from tools.yttranscript import get_youtube_transcript, get_youtube_title_description |
|
|
from tools.stt import get_text_transcript_from_audio_file |
|
|
from tools.image import analyze_image |
|
|
from common.mylogger import mylog |
|
|
import myprompts |
|
|
|
|
|
|
|
|
dotenv.load_dotenv() |
|
|
|
|
|
|
|
|
class PatchedOpenAIServerModel(BaseOpenAIServerModel): |
|
|
def complete_chat(self, messages, **kwargs): |
|
|
for msg in messages: |
|
|
if not isinstance(msg.get("content", ""), str): |
|
|
msg["content"] = str(msg["content"]) |
|
|
return super().complete_chat(messages, **kwargs) |
|
|
|
|
|
|
|
|
groq_model = PatchedOpenAIServerModel( |
|
|
model_id="llama3-70b-8192", |
|
|
api_key=os.environ["GROQ_API_KEY"], |
|
|
api_base="https://api.groq.com/openai/v1", |
|
|
) |
|
|
|
|
|
|
|
|
def check_final_answer(final_answer, agent_memory) -> bool: |
|
|
mylog("check_final_answer", final_answer) |
|
|
return len(str(final_answer)) <= 200 |
|
|
|
|
|
|
|
|
web_agent = CodeAgent( |
|
|
model=groq_model, |
|
|
tools=[search_web, fetch_webpage], |
|
|
name="web_agent", |
|
|
description="Uses search engine and scrapes webpages for content.", |
|
|
additional_authorized_imports=["pandas", "numpy", "bs4"], |
|
|
verbosity_level=1, |
|
|
max_steps=7, |
|
|
) |
|
|
|
|
|
audiovideo_agent = CodeAgent( |
|
|
model=groq_model, |
|
|
tools=[ |
|
|
get_youtube_transcript, |
|
|
get_youtube_title_description, |
|
|
get_text_transcript_from_audio_file, |
|
|
analyze_image |
|
|
], |
|
|
name="audiovideo_agent", |
|
|
description="Extracts data from audio, video, or images.", |
|
|
additional_authorized_imports=["pandas", "numpy", "bs4", "requests"], |
|
|
verbosity_level=1, |
|
|
max_steps=7, |
|
|
) |
|
|
|
|
|
|
|
|
manager_agent = CodeAgent( |
|
|
model=groq_model, |
|
|
tools=[PythonInterpreterTool()], |
|
|
managed_agents=[web_agent, audiovideo_agent], |
|
|
name="manager_agent", |
|
|
description="Coordinates other agents and returns a final answer.", |
|
|
additional_authorized_imports=["pandas", "numpy", "bs4"], |
|
|
planning_interval=5, |
|
|
verbosity_level=2, |
|
|
final_answer_checks=[check_final_answer], |
|
|
max_steps=15, |
|
|
) |
|
|
|
|
|
|
|
|
class MultiAgent: |
|
|
def __init__(self): |
|
|
print("MultiAgent initialized.") |
|
|
|
|
|
def __call__(self, question: str) -> str: |
|
|
mylog(self.__class__.__name__, question) |
|
|
|
|
|
try: |
|
|
prefix = """ |
|
|
You are the top agent of a multi-agent system that can answer questions by coordinating the work of other agents. |
|
|
You can use the web_agent to search the web, or the audiovideo_agent to extract info from audio/video/images. |
|
|
You must reason step by step and respect the required output format. |
|
|
Only return the final answer in the correct format. |
|
|
""" |
|
|
prompt = prefix.strip() + "\nTHE QUESTION:\n" + question.strip() + "\n" + myprompts.output_format.strip() |
|
|
answer = manager_agent.run(prompt) |
|
|
return answer |
|
|
except Exception as e: |
|
|
error = f"An error occurred while processing the question: {e}" |
|
|
print(error) |
|
|
return error |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
question = "What was the actual enrollment of the Malko competition in 2023?" |
|
|
agent = MultiAgent() |
|
|
answer = agent(question) |
|
|
print(f"Answer: {answer}") |
|
|
|