Spaces:
Sleeping
Sleeping
Add configuration, graph, runner, and tools modules to enhance agent functionality. Introduce a Configuration class for managing parameters, implement an AgentRunner for executing the agent graph, and create tools for general search and mathematical calculations. Update test_agent.py to reflect new import paths and improve overall code organization.
13388e5
unverified
| import logging | |
| import pytest | |
| from runner import AgentRunner | |
| # Configure test logger | |
| test_logger = logging.getLogger("test_agent") | |
| test_logger.setLevel(logging.INFO) | |
| # Suppress specific warnings | |
| pytestmark = pytest.mark.filterwarnings("ignore::DeprecationWarning:httpx._models") | |
| # Constants | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| QUESTIONS_URL = f"{DEFAULT_API_URL}/questions" | |
| def agent(): | |
| """Fixture to create and return an AgentRunner instance.""" | |
| test_logger.info("Creating AgentRunner instance") | |
| return AgentRunner() | |
| # @pytest.fixture(scope="session") | |
| # def questions_data(): | |
| # """Fixture to fetch questions from the API.""" | |
| # test_logger.info(f"Fetching questions from: {QUESTIONS_URL}") | |
| # try: | |
| # response = requests.get(QUESTIONS_URL, timeout=15) | |
| # response.raise_for_status() | |
| # data = response.json() | |
| # if not data: | |
| # test_logger.error("Fetched questions list is empty.") | |
| # return [] | |
| # test_logger.info(f"Fetched {len(data)} questions.") | |
| # return data | |
| # except requests.exceptions.RequestException as e: | |
| # test_logger.error(f"Error fetching questions: {e}") | |
| # return [] | |
| # except requests.exceptions.JSONDecodeError as e: | |
| # test_logger.error(f"Error decoding JSON response from questions endpoint: {e}") | |
| # return [] | |
| # except Exception as e: | |
| # test_logger.error(f"An unexpected error occurred fetching questions: {e}") | |
| # return [] | |
| # | |
| # class TestAppQuestions: | |
| # """Test cases for questions from the app.""" | |
| # | |
| # def test_first_app_question(self, agent, questions_data): | |
| # """Test the agent's response to the first app question.""" | |
| # if not questions_data: | |
| # pytest.skip("No questions available from API") | |
| # | |
| # first_question = questions_data[0] | |
| # question_text = first_question.get("question") | |
| # task_id = first_question.get("task_id") | |
| # | |
| # if not question_text or not task_id: | |
| # pytest.skip("First question is missing required fields") | |
| # | |
| # test_logger.info(f"Testing with app question: {question_text}") | |
| # | |
| # response = agent(question_text) | |
| # test_logger.info(f"Agent response: {response}") | |
| # | |
| # # Check that the response contains the expected information | |
| # assert "Mercedes Sosa" in response, "Response should mention Mercedes Sosa" | |
| # assert "studio albums" in response.lower(), "Response should mention studio albums" | |
| # assert "2000" in response and "2009" in response, "Response should mention the year range" | |
| # | |
| # # Verify that a number is mentioned (either as word or digit) | |
| # import re | |
| # number_pattern = r'\b(one|two|three|four|five|six|seven|eight|nine|ten|\d+)\b' | |
| # has_number = bool(re.search(number_pattern, response.lower())) | |
| # assert has_number, "Response should include the number of albums" | |
| # | |
| # # Check for album names in the response | |
| # known_albums = [ | |
| # "Corazón Libre", | |
| # "Cantora", | |
| # "Hermano", | |
| # "Acústico", | |
| # "Argentina quiere cantar" | |
| # ] | |
| # found_albums = [album for album in known_albums if album in response] | |
| # assert len(found_albums) > 0, "Response should mention at least some of the known albums" | |
| # | |
| # # Check for a structured response | |
| # assert re.search(r'\d+\.\s+[^(]+\(\d{4}\)', response), \ | |
| # "Response should list albums with years" | |
| class TestBasicCodeAgentCapabilities: | |
| """Test basic capabilities of the code agent.""" | |
| def setup_method(self): | |
| """Setup method to initialize the agent before each test.""" | |
| test_logger.info("Creating AgentRunner instance") | |
| self.agent = AgentRunner() | |
| def test_simple_math_calculation_with_steps(self): | |
| """Test that the agent can perform basic math calculations and log steps.""" | |
| question = "What is the result of the following operation: 5 + 3 + 1294.678?" | |
| test_logger.info(f"Testing math calculation with question: {question}") | |
| # Run the agent and get the response | |
| response = self.agent(question) | |
| # Verify the response contains the correct result | |
| expected_result = str(5 + 3 + 1294.678) | |
| assert ( | |
| expected_result in response | |
| ), f"Response should contain the result {expected_result}" | |
| # Verify step logs exist and have required fields | |
| assert self.agent.last_state is not None, "Agent should store last state" | |
| assert "step_logs" in self.agent.last_state, "State should contain step_logs" | |
| assert ( | |
| len(self.agent.last_state["step_logs"]) > 0 | |
| ), "Should have at least one step logged" | |
| # Verify each step has required fields | |
| for step in self.agent.last_state["step_logs"]: | |
| assert "step_number" in step, "Each step should have a step_number" | |
| assert any( | |
| key in step for key in ["thought", "code", "observation"] | |
| ), "Each step should have at least one of: thought, code, or observation" | |
| # Verify the final answer is indicated | |
| assert ( | |
| "final_answer" in response.lower() | |
| ), "Response should indicate it's providing an answer" | |
| def test_document_qa_and_image_generation_with_steps(self): | |
| """Test that the agent can search for information and generate images, with step logging.""" | |
| question = ( | |
| "Search for information about the Mona Lisa and generate an image of it." | |
| ) | |
| test_logger.info( | |
| f"Testing document QA and image generation with question: {question}" | |
| ) | |
| # Run the agent and get the response | |
| response = self.agent(question) | |
| # Verify the response contains both search and image generation | |
| assert "mona lisa" in response.lower(), "Response should mention Mona Lisa" | |
| assert "image" in response.lower(), "Response should mention image generation" | |
| # Verify step logs exist and show logical progression | |
| assert self.agent.last_state is not None, "Agent should store last state" | |
| assert "step_logs" in self.agent.last_state, "State should contain step_logs" | |
| assert ( | |
| len(self.agent.last_state["step_logs"]) > 1 | |
| ), "Should have multiple steps logged" | |
| # Verify steps show logical progression | |
| steps = self.agent.last_state["step_logs"] | |
| search_steps = [step for step in steps if "search" in str(step).lower()] | |
| image_steps = [step for step in steps if "image" in str(step).lower()] | |
| assert len(search_steps) > 0, "Should have search steps" | |
| assert len(image_steps) > 0, "Should have image generation steps" | |
| # Verify each step has required fields | |
| for step in steps: | |
| assert "step_number" in step, "Each step should have a step_number" | |
| assert any( | |
| key in step for key in ["thought", "code", "observation"] | |
| ), "Each step should have at least one of: thought, code, or observation" | |
| def test_simple_math_calculation_with_steps(): | |
| """Test that the agent can perform a simple math calculation and verify intermediate steps.""" | |
| agent = AgentRunner() | |
| question = "What is the result of the following operation: 5 + 3 + 1294.678?" | |
| # Process the question | |
| response = agent(question) | |
| # Verify step logs exist and have required fields | |
| assert agent.last_state is not None, "Last state should be stored" | |
| step_logs = agent.last_state.get("step_logs", []) | |
| assert len(step_logs) > 0, "Should have recorded step logs" | |
| for step in step_logs: | |
| assert "step_number" in step, "Each step should have a step number" | |
| assert any( | |
| key in step for key in ["thought", "code", "observation"] | |
| ), "Each step should have at least one of thought/code/observation" | |
| # Verify final answer | |
| expected_result = 1302.678 | |
| # Extract all numbers from the response | |
| import re | |
| # First check for LaTeX formatting | |
| latex_match = re.search(r"\\boxed{([^}]+)}", response) | |
| if latex_match: | |
| # Extract number from LaTeX box | |
| latex_content = latex_match.group(1) | |
| numbers = re.findall(r"\d+\.?\d*", latex_content) | |
| else: | |
| # Extract all numbers from the response | |
| numbers = re.findall(r"\d+\.?\d*", response) | |
| assert numbers, "Response should contain at least one number" | |
| # Check if any number matches the expected result | |
| has_correct_result = any(abs(float(n) - expected_result) < 0.001 for n in numbers) | |
| assert ( | |
| has_correct_result | |
| ), f"Response should contain the result {expected_result}, got {response}" | |
| # Verify the response indicates it's a final answer | |
| assert ( | |
| "final_answer" in response.lower() | |
| ), "Response should indicate it's using final_answer" | |
| def test_document_qa_and_image_generation_with_steps(): | |
| """Test document QA and image generation with step verification.""" | |
| agent = AgentRunner() | |
| question = "Can you search for information about the Mona Lisa and generate an image inspired by it?" | |
| # Process the question | |
| response = agent(question) | |
| # Verify step logs exist and demonstrate logical progression | |
| assert agent.last_state is not None, "Last state should be stored" | |
| step_logs = agent.last_state.get("step_logs", []) | |
| assert len(step_logs) > 0, "Should have recorded step logs" | |
| # Check for search and image generation steps | |
| has_search_step = False | |
| has_image_step = False | |
| for step in step_logs: | |
| assert "step_number" in step, "Each step should have a step number" | |
| assert any( | |
| key in step for key in ["thought", "code", "observation"] | |
| ), "Each step should have at least one of thought/code/observation" | |
| # Look for search and image steps in thoughts or code | |
| step_content = str(step.get("thought", "")) + str(step.get("code", "")) | |
| if "search" in step_content.lower(): | |
| has_search_step = True | |
| if "image" in step_content.lower() or "dalle" in step_content.lower(): | |
| has_image_step = True | |
| assert has_search_step, "Should include a search step" | |
| assert has_image_step, "Should include an image generation step" | |
| assert ( | |
| "final_answer" in response.lower() | |
| ), "Response should indicate it's using final_answer" | |
| if __name__ == "__main__": | |
| pytest.main([__file__, "-s", "-v", "-x"]) | |