|
|
import pytest |
|
|
import torch |
|
|
from types import SimpleNamespace |
|
|
from cognitive_mapping_probe.llm_iface import LLM, StableLLMConfig |
|
|
|
|
|
@pytest.fixture(scope="session") |
|
|
def mock_llm_config(): |
|
|
"""Stellt eine minimale, Schein-Konfiguration für das LLM bereit.""" |
|
|
return SimpleNamespace( |
|
|
hidden_size=128, |
|
|
num_hidden_layers=2, |
|
|
num_attention_heads=4 |
|
|
) |
|
|
|
|
|
@pytest.fixture |
|
|
def mock_llm(mocker, mock_llm_config): |
|
|
""" |
|
|
Erstellt einen robusten "Mock-LLM" für Unit-Tests. |
|
|
FINAL KORRIGIERT: Simuliert nun die vollständige `StableLLMConfig`-Abstraktion. |
|
|
""" |
|
|
mock_tokenizer = mocker.MagicMock() |
|
|
mock_tokenizer.eos_token_id = 1 |
|
|
mock_tokenizer.decode.return_value = "mocked text" |
|
|
|
|
|
mock_embedding_layer = mocker.MagicMock() |
|
|
mock_embedding_layer.weight.shape = (32000, mock_llm_config.hidden_size) |
|
|
|
|
|
def mock_model_forward(*args, **kwargs): |
|
|
batch_size = 1 |
|
|
seq_len = 1 |
|
|
if 'input_ids' in kwargs and kwargs['input_ids'] is not None: |
|
|
seq_len = kwargs['input_ids'].shape[1] |
|
|
elif 'past_key_values' in kwargs and kwargs['past_key_values'] is not None: |
|
|
seq_len = kwargs['past_key_values'][0][0].shape[-2] + 1 |
|
|
|
|
|
mock_outputs = { |
|
|
"hidden_states": tuple([torch.randn(batch_size, seq_len, mock_llm_config.hidden_size) for _ in range(mock_llm_config.num_hidden_layers + 1)]), |
|
|
"past_key_values": tuple([(torch.randn(batch_size, mock_llm_config.num_attention_heads, seq_len, 16), torch.randn(batch_size, mock_llm_config.num_attention_heads, seq_len, 16)) for _ in range(mock_llm_config.num_hidden_layers)]), |
|
|
"logits": torch.randn(batch_size, seq_len, 32000) |
|
|
} |
|
|
return SimpleNamespace(**mock_outputs) |
|
|
|
|
|
llm_instance = LLM.__new__(LLM) |
|
|
|
|
|
llm_instance.model = mocker.MagicMock(side_effect=mock_model_forward) |
|
|
llm_instance.model.config = mock_llm_config |
|
|
llm_instance.model.device = 'cpu' |
|
|
llm_instance.model.dtype = torch.float32 |
|
|
llm_instance.model.get_input_embeddings.return_value = mock_embedding_layer |
|
|
llm_instance.model.lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000)) |
|
|
|
|
|
|
|
|
mock_layer = mocker.MagicMock() |
|
|
mock_layer.register_forward_pre_hook.return_value = mocker.MagicMock() |
|
|
mock_layer_list = [mock_layer] * mock_llm_config.num_hidden_layers |
|
|
|
|
|
|
|
|
llm_instance.model.model = SimpleNamespace() |
|
|
llm_instance.model.model.language_model = SimpleNamespace(layers=mock_layer_list) |
|
|
|
|
|
llm_instance.tokenizer = mock_tokenizer |
|
|
llm_instance.config = mock_llm_config |
|
|
llm_instance.seed = 42 |
|
|
llm_instance.set_all_seeds = mocker.MagicMock() |
|
|
|
|
|
|
|
|
llm_instance.stable_config = StableLLMConfig( |
|
|
hidden_dim=mock_llm_config.hidden_size, |
|
|
num_layers=mock_llm_config.num_hidden_layers, |
|
|
layer_list=mock_layer_list |
|
|
) |
|
|
|
|
|
|
|
|
mocker.patch('cognitive_mapping_probe.llm_iface.get_or_load_model', return_value=llm_instance) |
|
|
mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance) |
|
|
mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=llm_instance) |
|
|
|
|
|
mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_concept_vector', return_value=torch.randn(mock_llm_config.hidden_size)) |
|
|
|
|
|
return llm_instance |
|
|
|