File size: 4,716 Bytes
324115d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
import tempfile
from unittest.mock import AsyncMock

import pytest

from vsp.llm.cached_llm_service import CachedLLMService
from vsp.llm.llm_cache import LLMCache
from vsp.llm.llm_service import LLMService


@pytest.fixture
def llm_cache():
    temp_db_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db")
    temp_db_file.close()
    cache = LLMCache(temp_db_file.name)
    yield cache
    # Clean up the temporary database file after the test
    os.unlink(temp_db_file.name)


def test_llm_cache_set_and_get(llm_cache):
    prompt = "What is the capital of France?"
    response = "The capital of France is Paris."
    metadata = {"model": "test_model", "temperature": 0.7}

    # Test setting a value in the cache
    llm_cache.set(prompt, response, metadata)

    # Test getting the value from the cache
    cached_response = llm_cache.get(prompt, metadata)
    assert cached_response == response


def test_llm_cache_get_nonexistent(llm_cache):
    prompt = "What is the capital of Germany?"
    metadata = {"model": "test_model", "temperature": 0.7}

    # Test getting a non-existent value from the cache
    cached_response = llm_cache.get(prompt, metadata)
    assert cached_response is None


def test_llm_cache_clear(llm_cache):
    prompt1 = "What is the capital of France?"
    response1 = "The capital of France is Paris."
    prompt2 = "What is the capital of Italy?"
    response2 = "The capital of Italy is Rome."
    metadata = {"model": "test_model", "temperature": 0.7}

    # Set multiple values in the cache
    llm_cache.set(prompt1, response1, metadata)
    llm_cache.set(prompt2, response2, metadata)

    # Clear the cache
    llm_cache.clear()

    # Verify that the cache is empty
    assert llm_cache.get(prompt1, metadata) is None
    assert llm_cache.get(prompt2, metadata) is None


def test_llm_cache_different_metadata(llm_cache):
    prompt = "What is the capital of France?"
    response1 = "The capital of France is Paris."
    response2 = "La capitale de la France est Paris."
    metadata1 = {"model": "test_model_en", "temperature": 0.7}
    metadata2 = {"model": "test_model_fr", "temperature": 0.7}

    # Set values with different metadata
    llm_cache.set(prompt, response1, metadata1)
    llm_cache.set(prompt, response2, metadata2)

    # Verify that different metadata produces different cache results
    assert llm_cache.get(prompt, metadata1) == response1
    assert llm_cache.get(prompt, metadata2) == response2


@pytest.mark.asyncio
async def test_cached_llm_service():
    # Create a mock LLMService
    mock_llm_service = AsyncMock(spec=LLMService)
    mock_llm_service.invoke.side_effect = ["First response", "Second response", "Third response"]

    # Create a CachedLLMService with the mock service
    temp_db_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db")
    temp_db_file.close()
    cache = LLMCache(temp_db_file.name)
    cached_service = CachedLLMService(mock_llm_service, cache)

    # Test first call (should use the mock service)
    response1 = await cached_service.invoke(user_prompt="Test prompt 1", max_tokens=10, temperature=0.5)
    assert response1 == "First response"
    mock_llm_service.invoke.assert_called_once()

    # Test second call with the same parameters (should use cache)
    response2 = await cached_service.invoke(user_prompt="Test prompt 1", max_tokens=10, temperature=0.5)
    assert response2 == "First response"
    assert mock_llm_service.invoke.call_count == 1  # Should not have increased

    # Test third call with different parameters (should use the mock service again)
    response3 = await cached_service.invoke(user_prompt="Test prompt 2", max_tokens=20, temperature=0.7)
    assert response3 == "Second response"
    assert mock_llm_service.invoke.call_count == 2

    # Test fourth call with system and partial assistant prompts
    response4 = await cached_service.invoke(
        user_prompt="Test prompt 3",
        system_prompt="System prompt",
        partial_assistant_prompt="Partial assistant prompt",
        max_tokens=30,
        temperature=0.8,
    )
    assert response4 == "Third response"
    assert mock_llm_service.invoke.call_count == 3

    # Test fifth call with the same parameters as the fourth (should use cache)
    response5 = await cached_service.invoke(
        user_prompt="Test prompt 3",
        system_prompt="System prompt",
        partial_assistant_prompt="Partial assistant prompt",
        max_tokens=30,
        temperature=0.8,
    )
    assert response5 == "Third response"
    assert mock_llm_service.invoke.call_count == 3  # Should not have increased

    # Clean up the temporary database file
    os.unlink(temp_db_file.name)