import asyncio from unittest.mock import patch import pytest from vsp.llm.cached_llm_service import CachedLLMService from vsp.llm.llm_cache import LLMCache from vsp.llm.openrouter.openrouter import AsyncOpenRouterService from vsp.shared import logger_factory logger = logger_factory.get_logger(__name__) @pytest.mark.asyncio async def test_openrouter_integration(): """ Integration test for AsyncOpenRouterService. This test makes an actual API call to OpenRouter using the perplexity/llama-3.1-sonar-huge-128k-online model. It requires a valid OpenRouter API key to be set in the AWS Parameter Store. Note: This test should be run sparingly to avoid unnecessary API calls and potential costs. """ model = "nousresearch/hermes-3-llama-3.1-405b:free" service = AsyncOpenRouterService(model) async with service() as openrouter: try: response = await openrouter.invoke( user_prompt="What is the capital of France?", max_tokens=100, temperature=0.7 ) # Log the response logger.info("OpenRouter API Response", response=response) # Assertions to verify the response assert response is not None assert isinstance(response, str) assert len(response) > 0 assert "Paris" in response logger.info("Integration test passed successfully") except Exception as e: logger.error("Integration test failed", error=str(e)) raise @pytest.mark.asyncio async def test_cached_openrouter_integration(): """ Integration test for CachedLLMService with AsyncOpenRouterService. This test verifies that: 1. The first call goes to OpenRouter 2. The second call with the same prompt returns the cached response 3. A new prompt triggers another call to OpenRouter """ model = "nousresearch/hermes-3-llama-3.1-405b:free" openrouter_service = AsyncOpenRouterService(model) import os import tempfile temp_db_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db") temp_db_file.close() cache = LLMCache(temp_db_file.name) cached_service = CachedLLMService(openrouter_service, cache) try: async with openrouter_service() as service: cached_service._llm_service = service # Update the service with the context manager # Mock the invoke method to track calls with patch.object(service, "invoke", wraps=service.invoke) as mock_invoke: # First call response1 = await cached_service.invoke( user_prompt="What is the capital of France?", max_tokens=100, temperature=0.7 ) assert mock_invoke.call_count == 1 assert response1 is not None assert "Paris" in response1 # Second call with the same prompt response2 = await cached_service.invoke( user_prompt="What is the capital of France?", max_tokens=100, temperature=0.7 ) assert mock_invoke.call_count == 1 # Should not have increased assert response2 == response1 # Third call with a different prompt response3 = await cached_service.invoke( user_prompt="What is the capital of Spain?", max_tokens=100, temperature=0.7 ) assert mock_invoke.call_count == 2 assert response3 is not None assert response3 != response1 assert "Madrid" in response3 logger.info("Cached OpenRouter integration test passed successfully") finally: # Clean up the temporary database file os.unlink(temp_db_file.name) if __name__ == "__main__": asyncio.run(test_openrouter_integration()) asyncio.run(test_cached_openrouter_integration())