# a multi agent proposal to solve HF agent course final assignment import os import dotenv from smolagents import CodeAgent from smolagents import OpenAIServerModel from tools.fetch import fetch_webpage, search_web from smolagents import PythonInterpreterTool from tools.yttranscript import get_youtube_transcript, get_youtube_title_description from tools.stt import get_text_transcript_from_audio_file from tools.image import analyze_image from common.mylogger import mylog import myprompts dotenv.load_dotenv() gemini_model = OpenAIServerModel( model_id="gemini-2.0-flash", api_key=os.environ["GEMINI_API_KEY"], # Google Gemini OpenAI-compatible API base URL api_base="https://generativelanguage.googleapis.com/v1beta/openai/", ) vllm_model = OpenAIServerModel( model_id="Qwen/Qwen2.5-1.5B-Instruct", api_base="http://192.168.1.39:18000/v1", api_key="token-abc123", ) openai_41nano_model = OpenAIServerModel( model_id="llama3-70b-8192", api_base="https://api.groq.com/openai/v1", api_key=os.environ["GROQ_API_KEY"], ) openai_41mini_model = OpenAIServerModel( model_id="llama3-70b-8192", api_base="https://api.groq.com/openai/v1", api_key=os.environ["GROQ_API_KEY"], ) def check_final_answer(final_answer, agent_memory) -> bool: """ Check if the final answer is correct. basic check on the length of the answer. """ mylog("check_final_answer", final_answer) # if return answer is more than 200 characters, we will assume it is not correct if len(str(final_answer)) > 200: return False else: return True web_agent = CodeAgent( model=openai_41nano_model, tools=[ search_web, fetch_webpage, ], name="web_agent", description="Use search engine to find webpages related to a subject and get the page content", additional_authorized_imports=["pandas", "numpy","bs4"], verbosity_level=1, max_steps=7, ) audiovideo_agent = CodeAgent( model=openai_41nano_model, tools=[ get_youtube_transcript, get_youtube_title_description, get_text_transcript_from_audio_file, analyze_image ], name="audiovideo_agent", description="Extracts information from image, video or audio files from the web", additional_authorized_imports=["pandas", "numpy","bs4", "requests"], verbosity_level=1, max_steps=7, ) manager_agent = CodeAgent( model=openai_41mini_model, tools=[ PythonInterpreterTool()], managed_agents=[web_agent, audiovideo_agent], additional_authorized_imports=["pandas", "numpy","bs4"], planning_interval=5, verbosity_level=2, final_answer_checks=[check_final_answer], max_steps=15, name="manager_agent", description="A manager agent that coordinates the work of other agents to answer questions.", ) class MultiAgent: def __init__(self): print("BasicAgent initialized.") def __call__(self, question: str) -> str: mylog(self.__class__.__name__, question) try: prefix = ( "You are the top agent of a multi-agent system that can answer questions " "by coordinating the work of other agents.\n" "- Use `web_agent` to search or fetch webpage content.\n" "- Use `audiovideo_agent` to process YouTube videos, images, or audio files.\n" "- You can also use your own reasoning.\n\n" "Find the correct answer step by step, then format your output properly." ) # ✅ Ensure everything is plain string full_prompt = f"{prefix.strip()}\n\nTHE QUESTION:\n{question.strip()}\n\n{myprompts.output_format.strip()}" # ✅ Hard enforcement of string format if not isinstance(full_prompt, str): full_prompt = str(full_prompt) # ✅ Safe model call result = manager_agent.run(full_prompt) # Optionally truncate very long answers to avoid submission rejection return result if isinstance(result, str) else str(result) except Exception as e: error = f"An error occurred while processing the question: {e}" print(error) return error if __name__ == "__main__": # Example usage question = """ What was the actual enrollment of the Malko competition in 2023? """ agent = MultiAgent() answer = agent(question) print(f"Answer: {answer}")