Spaces:

pquiggles
/

vsp-demo

Runtime error

navkast commited on Sep 15, 2024

Commit

324115d

unverified ·

1 Parent(s): 49b13c6

Tidy up VSP implementation so far (#9)

* feat: Add integration test for LLM caching using OpenRouter

* fix: Fix linter issues in test_integration_openrouter.py

* feat: Add unit tests for LLMCache

* style: Fix linter issues

* feat: Add unit test for CachedLLMService

* style: Run linter

* fix: Ensure LLMCache table is created

* fix: Update the service with the context manager in CachedLLMService

* refactor: replace in-memory SQLite databases with temporary files

* style: Fix import order in test files

* fix: wrap `async with` block in `try-finally` to clean up temporary database file

* Commit some tests

Files changed (4) hide show

src/vsp/llm/cached_llm_service.py +5 -5
src/vsp/llm/llm_cache.py +5 -5
tests/vsp/llm/openrouter/test_integration_openrouter.py +60 -0
tests/vsp/llm/test_llm_cache.py +129 -0

src/vsp/llm/cached_llm_service.py CHANGED Viewed

@@ -7,8 +7,8 @@ logger = logger_factory.get_logger(__name__)
 class CachedLLMService(LLMService):
     def __init__(self, llm_service: LLMService, cache: LLMCache | None = None):
-        self.llm_service = llm_service
-        self.cache = cache or LLMCache()
     async def invoke(
         self,
@@ -19,12 +19,12 @@ class CachedLLMService(LLMService):
         temperature: float = 0.0,
     ) -> str | None:
         cache_key = f"{user_prompt}_{system_prompt}_{partial_assistant_prompt}_{max_tokens}_{temperature}"
-        cached_response = self.cache.get(cache_key, {})
         if cached_response is not None:
             logger.debug("LLM cache hit")
             return cached_response
-        response = await self.llm_service.invoke(
             user_prompt=user_prompt,
             system_prompt=system_prompt,
             partial_assistant_prompt=partial_assistant_prompt,
@@ -33,6 +33,6 @@ class CachedLLMService(LLMService):
         )
         if response is not None:
-            self.cache.set(cache_key, response, {})
         return response

 class CachedLLMService(LLMService):
     def __init__(self, llm_service: LLMService, cache: LLMCache | None = None):
+        self._llm_service = llm_service
+        self._cache = cache or LLMCache()
     async def invoke(
         self,
         temperature: float = 0.0,
     ) -> str | None:
         cache_key = f"{user_prompt}_{system_prompt}_{partial_assistant_prompt}_{max_tokens}_{temperature}"
+        cached_response = self._cache.get(cache_key, {})
         if cached_response is not None:
             logger.debug("LLM cache hit")
             return cached_response
+        response = await self._llm_service.invoke(
             user_prompt=user_prompt,
             system_prompt=system_prompt,
             partial_assistant_prompt=partial_assistant_prompt,
         )
         if response is not None:
+            self._cache.set(cache_key, response, {})
         return response

src/vsp/llm/llm_cache.py CHANGED Viewed

@@ -12,7 +12,7 @@ class LLMCache:
         self._init_db()
     def _init_db(self) -> None:
-        with sqlite3.connect(self.db_path) as conn:
             conn.execute(
                 """
                 CREATE TABLE IF NOT EXISTS llm_cache (
@@ -21,7 +21,7 @@ class LLMCache:
                     response TEXT,
                     metadata TEXT
                 )
-            """
             )
     def _hash_prompt(self, prompt: str, metadata: dict[str, Any]) -> str:
@@ -30,7 +30,7 @@ class LLMCache:
     def get(self, prompt: str, metadata: dict[str, Any]) -> str | None:
         prompt_hash = self._hash_prompt(prompt, metadata)
-        with sqlite3.connect(self.db_path) as conn:
             cursor = conn.cursor()
             cursor.execute("SELECT response FROM llm_cache WHERE prompt_hash = ?", (prompt_hash,))
             result = cursor.fetchone()
@@ -38,12 +38,12 @@ class LLMCache:
     def set(self, prompt: str, response: str, metadata: dict[str, Any]) -> None:
         prompt_hash = self._hash_prompt(prompt, metadata)
-        with sqlite3.connect(self.db_path) as conn:
             conn.execute(
                 "INSERT OR REPLACE INTO llm_cache (prompt_hash, prompt, response, metadata) VALUES (?, ?, ?, ?)",
                 (prompt_hash, prompt, response, json.dumps(metadata)),
             )
     def clear(self) -> None:
-        with sqlite3.connect(self.db_path) as conn:
             conn.execute("DELETE FROM llm_cache")

         self._init_db()
     def _init_db(self) -> None:
+        with sqlite3.connect(self.db_path, autocommit=True) as conn:
             conn.execute(
                 """
                 CREATE TABLE IF NOT EXISTS llm_cache (
                     response TEXT,
                     metadata TEXT
                 )
+                """
             )
     def _hash_prompt(self, prompt: str, metadata: dict[str, Any]) -> str:
     def get(self, prompt: str, metadata: dict[str, Any]) -> str | None:
         prompt_hash = self._hash_prompt(prompt, metadata)
+        with sqlite3.connect(self.db_path, autocommit=True) as conn:
             cursor = conn.cursor()
             cursor.execute("SELECT response FROM llm_cache WHERE prompt_hash = ?", (prompt_hash,))
             result = cursor.fetchone()
     def set(self, prompt: str, response: str, metadata: dict[str, Any]) -> None:
         prompt_hash = self._hash_prompt(prompt, metadata)
+        with sqlite3.connect(self.db_path, autocommit=True) as conn:
             conn.execute(
                 "INSERT OR REPLACE INTO llm_cache (prompt_hash, prompt, response, metadata) VALUES (?, ?, ?, ?)",
                 (prompt_hash, prompt, response, json.dumps(metadata)),
             )
     def clear(self) -> None:
+        with sqlite3.connect(self.db_path, autocommit=True) as conn:
             conn.execute("DELETE FROM llm_cache")

tests/vsp/llm/openrouter/test_integration_openrouter.py CHANGED Viewed

@@ -1,7 +1,10 @@
 import asyncio
 import pytest
 from vsp.llm.openrouter.openrouter import AsyncOpenRouterService
 from vsp.shared import logger_factory
@@ -44,5 +47,62 @@ async def test_openrouter_integration():
             raise
 if __name__ == "__main__":
     asyncio.run(test_openrouter_integration())

 import asyncio
+from unittest.mock import patch
 import pytest
+from vsp.llm.cached_llm_service import CachedLLMService
+from vsp.llm.llm_cache import LLMCache
 from vsp.llm.openrouter.openrouter import AsyncOpenRouterService
 from vsp.shared import logger_factory
             raise
+@pytest.mark.asyncio
+async def test_cached_openrouter_integration():
+    """
+    Integration test for CachedLLMService with AsyncOpenRouterService.
+    This test verifies that:
+    1. The first call goes to OpenRouter
+    2. The second call with the same prompt returns the cached response
+    3. A new prompt triggers another call to OpenRouter
+    """
+    model = "nousresearch/hermes-3-llama-3.1-405b:free"
+    openrouter_service = AsyncOpenRouterService(model)
+    import os
+    import tempfile
+    temp_db_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db")
+    temp_db_file.close()
+    cache = LLMCache(temp_db_file.name)
+    cached_service = CachedLLMService(openrouter_service, cache)
+    try:
+        async with openrouter_service() as service:
+            cached_service._llm_service = service  # Update the service with the context manager
+            # Mock the invoke method to track calls
+            with patch.object(service, "invoke", wraps=service.invoke) as mock_invoke:
+                # First call
+                response1 = await cached_service.invoke(
+                    user_prompt="What is the capital of France?", max_tokens=100, temperature=0.7
+                )
+                assert mock_invoke.call_count == 1
+                assert response1 is not None
+                assert "Paris" in response1
+                # Second call with the same prompt
+                response2 = await cached_service.invoke(
+                    user_prompt="What is the capital of France?", max_tokens=100, temperature=0.7
+                )
+                assert mock_invoke.call_count == 1  # Should not have increased
+                assert response2 == response1
+                # Third call with a different prompt
+                response3 = await cached_service.invoke(
+                    user_prompt="What is the capital of Spain?", max_tokens=100, temperature=0.7
+                )
+                assert mock_invoke.call_count == 2
+                assert response3 is not None
+                assert response3 != response1
+                assert "Madrid" in response3
+            logger.info("Cached OpenRouter integration test passed successfully")
+    finally:
+        # Clean up the temporary database file
+        os.unlink(temp_db_file.name)
 if __name__ == "__main__":
     asyncio.run(test_openrouter_integration())
+    asyncio.run(test_cached_openrouter_integration())

tests/vsp/llm/test_llm_cache.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os
+import tempfile
+from unittest.mock import AsyncMock
+import pytest
+from vsp.llm.cached_llm_service import CachedLLMService
+from vsp.llm.llm_cache import LLMCache
+from vsp.llm.llm_service import LLMService
+@pytest.fixture
+def llm_cache():
+    temp_db_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db")
+    temp_db_file.close()
+    cache = LLMCache(temp_db_file.name)
+    yield cache
+    # Clean up the temporary database file after the test
+    os.unlink(temp_db_file.name)
+def test_llm_cache_set_and_get(llm_cache):
+    prompt = "What is the capital of France?"
+    response = "The capital of France is Paris."
+    metadata = {"model": "test_model", "temperature": 0.7}
+    # Test setting a value in the cache
+    llm_cache.set(prompt, response, metadata)
+    # Test getting the value from the cache
+    cached_response = llm_cache.get(prompt, metadata)
+    assert cached_response == response
+def test_llm_cache_get_nonexistent(llm_cache):
+    prompt = "What is the capital of Germany?"
+    metadata = {"model": "test_model", "temperature": 0.7}
+    # Test getting a non-existent value from the cache
+    cached_response = llm_cache.get(prompt, metadata)
+    assert cached_response is None
+def test_llm_cache_clear(llm_cache):
+    prompt1 = "What is the capital of France?"
+    response1 = "The capital of France is Paris."
+    prompt2 = "What is the capital of Italy?"
+    response2 = "The capital of Italy is Rome."
+    metadata = {"model": "test_model", "temperature": 0.7}
+    # Set multiple values in the cache
+    llm_cache.set(prompt1, response1, metadata)
+    llm_cache.set(prompt2, response2, metadata)
+    # Clear the cache
+    llm_cache.clear()
+    # Verify that the cache is empty
+    assert llm_cache.get(prompt1, metadata) is None
+    assert llm_cache.get(prompt2, metadata) is None
+def test_llm_cache_different_metadata(llm_cache):
+    prompt = "What is the capital of France?"
+    response1 = "The capital of France is Paris."
+    response2 = "La capitale de la France est Paris."
+    metadata1 = {"model": "test_model_en", "temperature": 0.7}
+    metadata2 = {"model": "test_model_fr", "temperature": 0.7}
+    # Set values with different metadata
+    llm_cache.set(prompt, response1, metadata1)
+    llm_cache.set(prompt, response2, metadata2)
+    # Verify that different metadata produces different cache results
+    assert llm_cache.get(prompt, metadata1) == response1
+    assert llm_cache.get(prompt, metadata2) == response2
+@pytest.mark.asyncio
+async def test_cached_llm_service():
+    # Create a mock LLMService
+    mock_llm_service = AsyncMock(spec=LLMService)
+    mock_llm_service.invoke.side_effect = ["First response", "Second response", "Third response"]
+    # Create a CachedLLMService with the mock service
+    temp_db_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db")
+    temp_db_file.close()
+    cache = LLMCache(temp_db_file.name)
+    cached_service = CachedLLMService(mock_llm_service, cache)
+    # Test first call (should use the mock service)
+    response1 = await cached_service.invoke(user_prompt="Test prompt 1", max_tokens=10, temperature=0.5)
+    assert response1 == "First response"
+    mock_llm_service.invoke.assert_called_once()
+    # Test second call with the same parameters (should use cache)
+    response2 = await cached_service.invoke(user_prompt="Test prompt 1", max_tokens=10, temperature=0.5)
+    assert response2 == "First response"
+    assert mock_llm_service.invoke.call_count == 1  # Should not have increased
+    # Test third call with different parameters (should use the mock service again)
+    response3 = await cached_service.invoke(user_prompt="Test prompt 2", max_tokens=20, temperature=0.7)
+    assert response3 == "Second response"
+    assert mock_llm_service.invoke.call_count == 2
+    # Test fourth call with system and partial assistant prompts
+    response4 = await cached_service.invoke(
+        user_prompt="Test prompt 3",
+        system_prompt="System prompt",
+        partial_assistant_prompt="Partial assistant prompt",
+        max_tokens=30,
+        temperature=0.8,
+    )
+    assert response4 == "Third response"
+    assert mock_llm_service.invoke.call_count == 3
+    # Test fifth call with the same parameters as the fourth (should use cache)
+    response5 = await cached_service.invoke(
+        user_prompt="Test prompt 3",
+        system_prompt="System prompt",
+        partial_assistant_prompt="Partial assistant prompt",
+        max_tokens=30,
+        temperature=0.8,
+    )
+    assert response5 == "Third response"
+    assert mock_llm_service.invoke.call_count == 3  # Should not have increased
+    # Clean up the temporary database file
+    os.unlink(temp_db_file.name)