Commit
·
d407fda
1
Parent(s):
c937bab
update tests
Browse files- cognitive_mapping_probe/concepts.py +6 -4
- tests/conftest.py +15 -7
- tests/test_app_logic.py +20 -17
- tests/test_components.py +14 -5
- tests/test_orchestration.py +43 -14
cognitive_mapping_probe/concepts.py
CHANGED
|
@@ -21,13 +21,15 @@ def _get_last_token_hidden_state(llm: LLM, prompt: str) -> torch.Tensor:
|
|
| 21 |
# KORREKTUR: Anstatt auf `llm.config.hidden_size` zuzugreifen, was fragil ist,
|
| 22 |
# leiten wir die erwartete Größe direkt vom Modell selbst ab. Dies ist robust
|
| 23 |
# gegenüber API-Änderungen in `transformers`.
|
| 24 |
-
expected_size = llm.model.config.hidden_size # Der Name scheint doch korrekt zu sein, aber wir machen es robuster
|
| 25 |
try:
|
| 26 |
# Versuche, die Größe über die Einbettungsschicht zu erhalten, was am stabilsten ist.
|
| 27 |
expected_size = llm.model.get_input_embeddings().weight.shape[1]
|
| 28 |
except AttributeError:
|
| 29 |
-
# Fallback, falls die Methode nicht existiert
|
| 30 |
-
|
|
|
|
|
|
|
|
|
|
| 31 |
|
| 32 |
assert last_hidden_state.shape == (expected_size,), \
|
| 33 |
f"Hidden state shape mismatch. Expected {(expected_size,)}, got {last_hidden_state.shape}"
|
|
@@ -42,7 +44,7 @@ def get_concept_vector(llm: LLM, concept: str, baseline_words: List[str] = BASEL
|
|
| 42 |
target_hs = _get_last_token_hidden_state(llm, prompt_template.format(concept))
|
| 43 |
baseline_hss = []
|
| 44 |
for word in tqdm(baseline_words, desc=f" - Calculating baseline for '{concept}'", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
|
| 45 |
-
baseline_hss.append(_get_last_token_hidden_state(llm, prompt_template.format(
|
| 46 |
assert all(hs.shape == target_hs.shape for hs in baseline_hss)
|
| 47 |
mean_baseline_hs = torch.stack(baseline_hss).mean(dim=0)
|
| 48 |
dbg(f" - Mean baseline vector computed with norm {torch.norm(mean_baseline_hs).item():.2f}")
|
|
|
|
| 21 |
# KORREKTUR: Anstatt auf `llm.config.hidden_size` zuzugreifen, was fragil ist,
|
| 22 |
# leiten wir die erwartete Größe direkt vom Modell selbst ab. Dies ist robust
|
| 23 |
# gegenüber API-Änderungen in `transformers`.
|
|
|
|
| 24 |
try:
|
| 25 |
# Versuche, die Größe über die Einbettungsschicht zu erhalten, was am stabilsten ist.
|
| 26 |
expected_size = llm.model.get_input_embeddings().weight.shape[1]
|
| 27 |
except AttributeError:
|
| 28 |
+
# Fallback, falls die Methode nicht existiert, auf den wahrscheinlichen Namen.
|
| 29 |
+
# Gemma3Config hat 'hidden_size', aber andere könnten es anders nennen.
|
| 30 |
+
expected_size = getattr(llm.config, 'hidden_size', getattr(llm.config, 'd_model', 0))
|
| 31 |
+
assert expected_size > 0, "Could not determine hidden size from model config."
|
| 32 |
+
|
| 33 |
|
| 34 |
assert last_hidden_state.shape == (expected_size,), \
|
| 35 |
f"Hidden state shape mismatch. Expected {(expected_size,)}, got {last_hidden_state.shape}"
|
|
|
|
| 44 |
target_hs = _get_last_token_hidden_state(llm, prompt_template.format(concept))
|
| 45 |
baseline_hss = []
|
| 46 |
for word in tqdm(baseline_words, desc=f" - Calculating baseline for '{concept}'", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
|
| 47 |
+
baseline_hss.append(_get_last_token_hidden_state(llm, prompt_template.format(word))) # Korrigiert: Verwende 'word', nicht 'concept'
|
| 48 |
assert all(hs.shape == target_hs.shape for hs in baseline_hss)
|
| 49 |
mean_baseline_hs = torch.stack(baseline_hss).mean(dim=0)
|
| 50 |
dbg(f" - Mean baseline vector computed with norm {torch.norm(mean_baseline_hs).item():.2f}")
|
tests/conftest.py
CHANGED
|
@@ -16,12 +16,17 @@ def mock_llm_config():
|
|
| 16 |
def mock_llm(mocker, mock_llm_config):
|
| 17 |
"""
|
| 18 |
Erstellt einen robusten "Mock-LLM" für Unit-Tests.
|
| 19 |
-
KORRIGIERT: Die
|
|
|
|
| 20 |
"""
|
| 21 |
mock_tokenizer = mocker.MagicMock()
|
| 22 |
mock_tokenizer.eos_token_id = 1
|
| 23 |
mock_tokenizer.decode.return_value = "mocked text"
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def mock_model_forward(*args, **kwargs):
|
| 26 |
batch_size = 1
|
| 27 |
seq_len = 1
|
|
@@ -37,30 +42,33 @@ def mock_llm(mocker, mock_llm_config):
|
|
| 37 |
}
|
| 38 |
return SimpleNamespace(**mock_outputs)
|
| 39 |
|
| 40 |
-
llm_instance = LLM.__new__(LLM)
|
| 41 |
|
| 42 |
llm_instance.model = mocker.MagicMock(side_effect=mock_model_forward)
|
| 43 |
-
|
| 44 |
llm_instance.model.config = mock_llm_config
|
| 45 |
llm_instance.model.device = 'cpu'
|
| 46 |
llm_instance.model.dtype = torch.float32
|
|
|
|
|
|
|
| 47 |
|
| 48 |
mock_layer = mocker.MagicMock()
|
| 49 |
mock_layer.register_forward_pre_hook.return_value = mocker.MagicMock()
|
| 50 |
llm_instance.model.model = SimpleNamespace(layers=[mock_layer] * mock_llm_config.num_hidden_layers)
|
| 51 |
-
|
| 52 |
llm_instance.model.lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000))
|
| 53 |
|
| 54 |
llm_instance.tokenizer = mock_tokenizer
|
| 55 |
-
llm_instance.config = mock_llm_config
|
| 56 |
llm_instance.seed = 42
|
| 57 |
llm_instance.set_all_seeds = mocker.MagicMock()
|
| 58 |
|
| 59 |
# Patch an allen Stellen, an denen das Modell tatsächlich geladen wird.
|
|
|
|
| 60 |
mocker.patch('cognitive_mapping_probe.llm_iface.get_or_load_model', return_value=llm_instance)
|
| 61 |
mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance)
|
| 62 |
-
|
| 63 |
-
|
|
|
|
|
|
|
| 64 |
mocker.patch('cognitive_mapping_probe.concepts.get_concept_vector', return_value=torch.randn(mock_llm_config.hidden_size))
|
| 65 |
|
| 66 |
return llm_instance
|
|
|
|
| 16 |
def mock_llm(mocker, mock_llm_config):
|
| 17 |
"""
|
| 18 |
Erstellt einen robusten "Mock-LLM" für Unit-Tests.
|
| 19 |
+
FINAL KORRIGIERT: Die Patch-Anweisungen sind nun korrekt auf die tatsächlichen
|
| 20 |
+
Import-Pfade in den zu testenden Modulen ausgerichtet.
|
| 21 |
"""
|
| 22 |
mock_tokenizer = mocker.MagicMock()
|
| 23 |
mock_tokenizer.eos_token_id = 1
|
| 24 |
mock_tokenizer.decode.return_value = "mocked text"
|
| 25 |
|
| 26 |
+
# Definiere eine stabile Mock-Funktion für die Eingabe-Embeddings
|
| 27 |
+
mock_embedding_layer = mocker.MagicMock()
|
| 28 |
+
mock_embedding_layer.weight.shape = (32000, mock_llm_config.hidden_size) # (vocab_size, hidden_size)
|
| 29 |
+
|
| 30 |
def mock_model_forward(*args, **kwargs):
|
| 31 |
batch_size = 1
|
| 32 |
seq_len = 1
|
|
|
|
| 42 |
}
|
| 43 |
return SimpleNamespace(**mock_outputs)
|
| 44 |
|
| 45 |
+
llm_instance = LLM.__new__(LLM) # Erzeuge Instanz ohne __init__ aufzurufen
|
| 46 |
|
| 47 |
llm_instance.model = mocker.MagicMock(side_effect=mock_model_forward)
|
|
|
|
| 48 |
llm_instance.model.config = mock_llm_config
|
| 49 |
llm_instance.model.device = 'cpu'
|
| 50 |
llm_instance.model.dtype = torch.float32
|
| 51 |
+
# Füge die gemockte Embedding-Funktion hinzu, um den Test in `concepts.py` zu bestehen
|
| 52 |
+
llm_instance.model.get_input_embeddings.return_value = mock_embedding_layer
|
| 53 |
|
| 54 |
mock_layer = mocker.MagicMock()
|
| 55 |
mock_layer.register_forward_pre_hook.return_value = mocker.MagicMock()
|
| 56 |
llm_instance.model.model = SimpleNamespace(layers=[mock_layer] * mock_llm_config.num_hidden_layers)
|
|
|
|
| 57 |
llm_instance.model.lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000))
|
| 58 |
|
| 59 |
llm_instance.tokenizer = mock_tokenizer
|
| 60 |
+
llm_instance.config = mock_llm_config # Wichtig, da `concepts.py` darauf zugreift
|
| 61 |
llm_instance.seed = 42
|
| 62 |
llm_instance.set_all_seeds = mocker.MagicMock()
|
| 63 |
|
| 64 |
# Patch an allen Stellen, an denen das Modell tatsächlich geladen wird.
|
| 65 |
+
# Dies stellt sicher, dass kein Test versucht, ein echtes Modell herunterzuladen.
|
| 66 |
mocker.patch('cognitive_mapping_probe.llm_iface.get_or_load_model', return_value=llm_instance)
|
| 67 |
mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance)
|
| 68 |
+
mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=llm_instance)
|
| 69 |
+
|
| 70 |
+
# Mocke `get_concept_vector`, um zu verhindern, dass es im Orchestrator-Test ausgeführt wird,
|
| 71 |
+
# da wir es in `test_components.py` separat testen.
|
| 72 |
mocker.patch('cognitive_mapping_probe.concepts.get_concept_vector', return_value=torch.randn(mock_llm_config.hidden_size))
|
| 73 |
|
| 74 |
return llm_instance
|
tests/test_app_logic.py
CHANGED
|
@@ -7,47 +7,50 @@ from app import run_single_analysis_display, run_auto_suite_display
|
|
| 7 |
|
| 8 |
def test_run_single_analysis_display(mocker):
|
| 9 |
"""Testet den Wrapper für Einzel-Experimente."""
|
| 10 |
-
mock_results = {"verdict": "V", "stats": {"mean_delta": 1}, "state_deltas": [1]}
|
| 11 |
mocker.patch('app.run_seismic_analysis', return_value=mock_results)
|
| 12 |
mocker.patch('app.cleanup_memory')
|
| 13 |
|
| 14 |
verdict, df, raw = run_single_analysis_display(progress=mocker.MagicMock())
|
| 15 |
|
| 16 |
assert "V" in verdict and "1.0000" in verdict
|
| 17 |
-
assert isinstance(df, pd.DataFrame) and len(df) ==
|
|
|
|
| 18 |
|
| 19 |
def test_run_auto_suite_display(mocker):
|
| 20 |
"""
|
| 21 |
Testet den Wrapper für die Auto-Experiment-Suite.
|
| 22 |
-
FINAL KORRIGIERT:
|
| 23 |
-
|
| 24 |
"""
|
| 25 |
-
mock_summary_df = pd.DataFrame([{"Experiment": "E1"}])
|
| 26 |
-
mock_plot_df = pd.DataFrame([{"Step": 0, "Delta": 1.0, "Experiment": "E1"}])
|
| 27 |
-
mock_results = {"E1": {}}
|
| 28 |
|
| 29 |
mocker.patch('app.run_auto_suite', return_value=(mock_summary_df, mock_plot_df, mock_results))
|
| 30 |
mocker.patch('app.cleanup_memory')
|
| 31 |
|
| 32 |
-
summary_df, plot_component,
|
| 33 |
-
"mock",
|
| 34 |
)
|
| 35 |
|
| 36 |
-
|
|
|
|
| 37 |
|
|
|
|
| 38 |
assert isinstance(plot_component, gr.LinePlot)
|
| 39 |
assert isinstance(plot_component.value, dict)
|
|
|
|
| 40 |
|
| 41 |
-
#
|
| 42 |
-
# müssen wir explizit die Spaltennamen angeben, da diese Information bei der
|
| 43 |
-
# Serialisierung durch Gradio verloren gehen kann.
|
| 44 |
reconstructed_df = pd.DataFrame(
|
| 45 |
plot_component.value['data'],
|
| 46 |
-
columns=['
|
| 47 |
)
|
| 48 |
|
| 49 |
-
# Nun sollte der Vergleich mit `assert_frame_equal` funktionieren
|
| 50 |
-
# da beide DataFrames nun garantiert dieselben Spaltennamen und -typen haben.
|
| 51 |
assert_frame_equal(reconstructed_df, mock_plot_df)
|
| 52 |
|
| 53 |
-
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
def test_run_single_analysis_display(mocker):
|
| 9 |
"""Testet den Wrapper für Einzel-Experimente."""
|
| 10 |
+
mock_results = {"verdict": "V", "stats": {"mean_delta": 1}, "state_deltas": [1.0, 2.0]}
|
| 11 |
mocker.patch('app.run_seismic_analysis', return_value=mock_results)
|
| 12 |
mocker.patch('app.cleanup_memory')
|
| 13 |
|
| 14 |
verdict, df, raw = run_single_analysis_display(progress=mocker.MagicMock())
|
| 15 |
|
| 16 |
assert "V" in verdict and "1.0000" in verdict
|
| 17 |
+
assert isinstance(df, pd.DataFrame) and len(df) == 2
|
| 18 |
+
assert "State Change (Delta)" in df.columns
|
| 19 |
|
| 20 |
def test_run_auto_suite_display(mocker):
|
| 21 |
"""
|
| 22 |
Testet den Wrapper für die Auto-Experiment-Suite.
|
| 23 |
+
FINAL KORRIGIERT: Validiert nun die korrekte `dict`-Struktur, die von
|
| 24 |
+
`gradio.LinePlot` zurückgegeben wird, und rekonstruiert den DataFrame für den Vergleich.
|
| 25 |
"""
|
| 26 |
+
mock_summary_df = pd.DataFrame([{"Experiment": "E1", "Mean Delta": 1.5}])
|
| 27 |
+
mock_plot_df = pd.DataFrame([{"Step": 0, "Delta": 1.0, "Experiment": "E1"}, {"Step": 1, "Delta": 2.0, "Experiment": "E1"}])
|
| 28 |
+
mock_results = {"E1": {"stats": {"mean_delta": 1.5}}}
|
| 29 |
|
| 30 |
mocker.patch('app.run_auto_suite', return_value=(mock_summary_df, mock_plot_df, mock_results))
|
| 31 |
mocker.patch('app.cleanup_memory')
|
| 32 |
|
| 33 |
+
summary_df, plot_component, raw_json_str = run_auto_suite_display(
|
| 34 |
+
"mock-model", 100, 42, "mock_exp", progress=mocker.MagicMock()
|
| 35 |
)
|
| 36 |
|
| 37 |
+
# Teste den DataFrame-Output
|
| 38 |
+
assert_frame_equal(summary_df, mock_summary_df)
|
| 39 |
|
| 40 |
+
# KORREKTUR: Teste die Gradio-Plot-Komponente. Ihr `value` ist ein Dictionary, kein DataFrame.
|
| 41 |
assert isinstance(plot_component, gr.LinePlot)
|
| 42 |
assert isinstance(plot_component.value, dict)
|
| 43 |
+
assert 'data' in plot_component.value and 'columns' in plot_component.value
|
| 44 |
|
| 45 |
+
# Rekonstruiere den DataFrame aus dem serialisierten Dictionary für einen exakten Vergleich.
|
|
|
|
|
|
|
| 46 |
reconstructed_df = pd.DataFrame(
|
| 47 |
plot_component.value['data'],
|
| 48 |
+
columns=plot_component.value['columns']
|
| 49 |
)
|
| 50 |
|
| 51 |
+
# Nun sollte der Vergleich mit `assert_frame_equal` funktionieren.
|
|
|
|
| 52 |
assert_frame_equal(reconstructed_df, mock_plot_df)
|
| 53 |
|
| 54 |
+
# Teste den JSON-Output
|
| 55 |
+
assert isinstance(raw_json_str, str)
|
| 56 |
+
assert '"mean_delta": 1.5' in raw_json_str
|
tests/test_components.py
CHANGED
|
@@ -7,7 +7,7 @@ from cognitive_mapping_probe.llm_iface import get_or_load_model, LLM
|
|
| 7 |
from cognitive_mapping_probe.resonance_seismograph import run_silent_cogitation_seismic
|
| 8 |
from cognitive_mapping_probe.utils import dbg
|
| 9 |
# KORREKTUR: Importiere die Hauptfunktion, die wir testen wollen.
|
| 10 |
-
from cognitive_mapping_probe.concepts import get_concept_vector
|
| 11 |
|
| 12 |
# --- Tests for llm_iface.py ---
|
| 13 |
|
|
@@ -57,15 +57,23 @@ def test_run_silent_cogitation_with_injection_hook_usage(mock_llm):
|
|
| 57 |
|
| 58 |
# --- Tests for concepts.py ---
|
| 59 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
def test_get_concept_vector_logic(mock_llm, mocker):
|
| 61 |
"""
|
| 62 |
Testet die Logik von `get_concept_vector`.
|
| 63 |
KORRIGIERT: Patcht nun die refaktorisierte, auf Modulebene befindliche Funktion.
|
| 64 |
"""
|
| 65 |
mock_hidden_states = [
|
| 66 |
-
torch.ones(mock_llm.config.hidden_size) * 10,
|
| 67 |
-
torch.ones(mock_llm.config.hidden_size) * 2,
|
| 68 |
-
torch.ones(mock_llm.config.hidden_size) * 4
|
| 69 |
]
|
| 70 |
# KORREKTUR: Der Patch-Pfad zeigt jetzt auf die korrekte, importierbare Funktion.
|
| 71 |
mocker.patch(
|
|
@@ -75,6 +83,7 @@ def test_get_concept_vector_logic(mock_llm, mocker):
|
|
| 75 |
|
| 76 |
concept_vector = get_concept_vector(mock_llm, "test", baseline_words=["a", "b"])
|
| 77 |
|
|
|
|
| 78 |
expected_vector = torch.ones(mock_llm.config.hidden_size) * 7
|
| 79 |
assert torch.allclose(concept_vector, expected_vector)
|
| 80 |
|
|
@@ -85,7 +94,7 @@ def test_dbg_output(capsys, monkeypatch):
|
|
| 85 |
monkeypatch.setenv("CMP_DEBUG", "1")
|
| 86 |
import importlib
|
| 87 |
from cognitive_mapping_probe import utils
|
| 88 |
-
importlib.reload(utils)
|
| 89 |
utils.dbg("test message")
|
| 90 |
captured = capsys.readouterr()
|
| 91 |
assert "[DEBUG] test message" in captured.err
|
|
|
|
| 7 |
from cognitive_mapping_probe.resonance_seismograph import run_silent_cogitation_seismic
|
| 8 |
from cognitive_mapping_probe.utils import dbg
|
| 9 |
# KORREKTUR: Importiere die Hauptfunktion, die wir testen wollen.
|
| 10 |
+
from cognitive_mapping_probe.concepts import get_concept_vector, _get_last_token_hidden_state
|
| 11 |
|
| 12 |
# --- Tests for llm_iface.py ---
|
| 13 |
|
|
|
|
| 57 |
|
| 58 |
# --- Tests for concepts.py ---
|
| 59 |
|
| 60 |
+
def test_get_last_token_hidden_state_robustness(mock_llm):
|
| 61 |
+
"""Testet die robuste `_get_last_token_hidden_state` Funktion."""
|
| 62 |
+
# Diese Funktion wird vom `mock_llm` in `conftest.py` aufgerufen und gibt einen Tensor
|
| 63 |
+
# mit der korrekten `hidden_size` zurück. Hier testen wir, ob die Funktion im
|
| 64 |
+
# echten Modul mit dem gemockten LLM-Objekt korrekt interagiert.
|
| 65 |
+
hs = _get_last_token_hidden_state(mock_llm, "test prompt")
|
| 66 |
+
assert hs.shape == (mock_llm.config.hidden_size,)
|
| 67 |
+
|
| 68 |
def test_get_concept_vector_logic(mock_llm, mocker):
|
| 69 |
"""
|
| 70 |
Testet die Logik von `get_concept_vector`.
|
| 71 |
KORRIGIERT: Patcht nun die refaktorisierte, auf Modulebene befindliche Funktion.
|
| 72 |
"""
|
| 73 |
mock_hidden_states = [
|
| 74 |
+
torch.ones(mock_llm.config.hidden_size) * 10, # target concept
|
| 75 |
+
torch.ones(mock_llm.config.hidden_size) * 2, # baseline word 1
|
| 76 |
+
torch.ones(mock_llm.config.hidden_size) * 4 # baseline word 2
|
| 77 |
]
|
| 78 |
# KORREKTUR: Der Patch-Pfad zeigt jetzt auf die korrekte, importierbare Funktion.
|
| 79 |
mocker.patch(
|
|
|
|
| 83 |
|
| 84 |
concept_vector = get_concept_vector(mock_llm, "test", baseline_words=["a", "b"])
|
| 85 |
|
| 86 |
+
# Erwarteter Vektor: 10 - mean(2, 4) = 10 - 3 = 7
|
| 87 |
expected_vector = torch.ones(mock_llm.config.hidden_size) * 7
|
| 88 |
assert torch.allclose(concept_vector, expected_vector)
|
| 89 |
|
|
|
|
| 94 |
monkeypatch.setenv("CMP_DEBUG", "1")
|
| 95 |
import importlib
|
| 96 |
from cognitive_mapping_probe import utils
|
| 97 |
+
importlib.reload(utils) # Wichtig, da DEBUG_ENABLED beim Import gesetzt wird
|
| 98 |
utils.dbg("test message")
|
| 99 |
captured = capsys.readouterr()
|
| 100 |
assert "[DEBUG] test message" in captured.err
|
tests/test_orchestration.py
CHANGED
|
@@ -8,22 +8,37 @@ from cognitive_mapping_probe.auto_experiment import run_auto_suite, get_curated_
|
|
| 8 |
def test_run_seismic_analysis_no_injection(mocker, mock_llm):
|
| 9 |
"""Testet den Orchestrator im Baseline-Modus."""
|
| 10 |
mock_run_seismic = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
run_seismic_analysis(
|
| 12 |
model_id="mock", prompt_type="test", seed=42, num_steps=1,
|
| 13 |
concept_to_inject="", injection_strength=0.0, progress_callback=mocker.MagicMock(),
|
| 14 |
-
llm_instance=mock_llm # Übergebe den Mock direkt
|
| 15 |
)
|
| 16 |
mock_run_seismic.assert_called_once()
|
|
|
|
| 17 |
|
| 18 |
def test_run_seismic_analysis_with_injection(mocker, mock_llm):
|
| 19 |
"""Testet den Orchestrator mit Injektion."""
|
| 20 |
-
mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
run_seismic_analysis(
|
| 23 |
model_id="mock", prompt_type="test", seed=42, num_steps=1,
|
| 24 |
-
concept_to_inject="
|
| 25 |
llm_instance=mock_llm # Übergebe den Mock direkt
|
| 26 |
)
|
|
|
|
|
|
|
|
|
|
| 27 |
|
| 28 |
def test_get_curated_experiments_structure():
|
| 29 |
"""Testet die Datenstruktur der kuratierten Experimente."""
|
|
@@ -31,27 +46,41 @@ def test_get_curated_experiments_structure():
|
|
| 31 |
assert isinstance(experiments, dict)
|
| 32 |
assert "Therapeutic Intervention (4B-Model)" in experiments
|
| 33 |
protocol = experiments["Therapeutic Intervention (4B-Model)"]
|
| 34 |
-
assert isinstance(protocol, list) and len(protocol)
|
|
|
|
| 35 |
|
| 36 |
def test_run_auto_suite_special_protocol(mocker, mock_llm):
|
| 37 |
"""
|
| 38 |
Testet den speziellen Logik-Pfad für das Interventions-Protokoll.
|
| 39 |
-
KORRIGIERT:
|
| 40 |
-
|
| 41 |
"""
|
| 42 |
-
#
|
| 43 |
-
mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=mock_llm)
|
| 44 |
mock_analysis = mocker.patch('cognitive_mapping_probe.auto_experiment.run_seismic_analysis', return_value={"stats": {}, "state_deltas": []})
|
| 45 |
|
|
|
|
|
|
|
|
|
|
| 46 |
run_auto_suite(
|
| 47 |
-
model_id="mock-4b", num_steps=
|
| 48 |
experiment_name="Therapeutic Intervention (4B-Model)",
|
| 49 |
progress_callback=mocker.MagicMock()
|
| 50 |
)
|
| 51 |
|
|
|
|
| 52 |
assert mock_analysis.call_count == 2
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def test_run_seismic_analysis_no_injection(mocker, mock_llm):
|
| 9 |
"""Testet den Orchestrator im Baseline-Modus."""
|
| 10 |
mock_run_seismic = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
|
| 11 |
+
# Der `get_concept_vector` ist bereits in conftest global gemockt, aber wir patchen ihn hier
|
| 12 |
+
# neu, um sicherzustellen, dass er nicht aufgerufen wird.
|
| 13 |
+
mock_get_concept = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_concept_vector')
|
| 14 |
+
|
| 15 |
run_seismic_analysis(
|
| 16 |
model_id="mock", prompt_type="test", seed=42, num_steps=1,
|
| 17 |
concept_to_inject="", injection_strength=0.0, progress_callback=mocker.MagicMock(),
|
| 18 |
+
llm_instance=mock_llm # Übergebe den Mock direkt, um das Neuladen zu vermeiden
|
| 19 |
)
|
| 20 |
mock_run_seismic.assert_called_once()
|
| 21 |
+
mock_get_concept.assert_not_called()
|
| 22 |
|
| 23 |
def test_run_seismic_analysis_with_injection(mocker, mock_llm):
|
| 24 |
"""Testet den Orchestrator mit Injektion."""
|
| 25 |
+
mock_run_seismic = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
|
| 26 |
+
|
| 27 |
+
# KORREKTUR: Der Patch muss auf den Namespace zielen, in dem die Funktion *verwendet* wird.
|
| 28 |
+
# `run_seismic_analysis` importiert `get_concept_vector` in seinen eigenen Namespace.
|
| 29 |
+
mock_get_concept = mocker.patch(
|
| 30 |
+
'cognitive_mapping_probe.orchestrator_seismograph.get_concept_vector',
|
| 31 |
+
return_value=torch.randn(10)
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
run_seismic_analysis(
|
| 35 |
model_id="mock", prompt_type="test", seed=42, num_steps=1,
|
| 36 |
+
concept_to_inject="test_concept", injection_strength=1.5, progress_callback=mocker.MagicMock(),
|
| 37 |
llm_instance=mock_llm # Übergebe den Mock direkt
|
| 38 |
)
|
| 39 |
+
mock_run_seismic.assert_called_once()
|
| 40 |
+
mock_get_concept.assert_called_once_with(mock_llm, "test_concept")
|
| 41 |
+
|
| 42 |
|
| 43 |
def test_get_curated_experiments_structure():
|
| 44 |
"""Testet die Datenstruktur der kuratierten Experimente."""
|
|
|
|
| 46 |
assert isinstance(experiments, dict)
|
| 47 |
assert "Therapeutic Intervention (4B-Model)" in experiments
|
| 48 |
protocol = experiments["Therapeutic Intervention (4B-Model)"]
|
| 49 |
+
assert isinstance(protocol, list) and len(protocol) == 2
|
| 50 |
+
assert "label" in protocol[0] and "prompt_type" in protocol[0]
|
| 51 |
|
| 52 |
def test_run_auto_suite_special_protocol(mocker, mock_llm):
|
| 53 |
"""
|
| 54 |
Testet den speziellen Logik-Pfad für das Interventions-Protokoll.
|
| 55 |
+
FINAL KORRIGIERT: Stellt sicher, dass `run_seismic_analysis` korrekt gepatcht
|
| 56 |
+
und die Wiederverwendung der `llm_instance` verifiziert wird.
|
| 57 |
"""
|
| 58 |
+
# Wir müssen `run_seismic_analysis` im `auto_experiment`-Modul patchen, da es von dort aufgerufen wird.
|
|
|
|
| 59 |
mock_analysis = mocker.patch('cognitive_mapping_probe.auto_experiment.run_seismic_analysis', return_value={"stats": {}, "state_deltas": []})
|
| 60 |
|
| 61 |
+
# Wir müssen `get_or_load_model` im `auto_experiment`-Modul patchen, da dort der erste Aufruf stattfindet
|
| 62 |
+
mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=mock_llm)
|
| 63 |
+
|
| 64 |
run_auto_suite(
|
| 65 |
+
model_id="mock-4b", num_steps=10, seed=42,
|
| 66 |
experiment_name="Therapeutic Intervention (4B-Model)",
|
| 67 |
progress_callback=mocker.MagicMock()
|
| 68 |
)
|
| 69 |
|
| 70 |
+
# Es müssen genau zwei Läufe stattgefunden haben
|
| 71 |
assert mock_analysis.call_count == 2
|
| 72 |
|
| 73 |
+
# Überprüfe, ob bei beiden Aufrufen dieselbe `llm_instance` übergeben wurde
|
| 74 |
+
first_call_kwargs = mock_analysis.call_args_list[0].kwargs
|
| 75 |
+
second_call_kwargs = mock_analysis.call_args_list[1].kwargs
|
| 76 |
+
|
| 77 |
+
assert 'llm_instance' in first_call_kwargs
|
| 78 |
+
assert 'llm_instance' in second_call_kwargs
|
| 79 |
+
assert first_call_kwargs['llm_instance'] is mock_llm
|
| 80 |
+
assert second_call_kwargs['llm_instance'] is mock_llm
|
| 81 |
+
|
| 82 |
+
# Überprüfe, ob die Injektion nur im ersten Lauf stattfand
|
| 83 |
+
assert first_call_kwargs['concept_to_inject'] != ""
|
| 84 |
+
assert first_call_kwargs['injection_strength'] > 0.0
|
| 85 |
+
assert second_call_kwargs['concept_to_inject'] == ""
|
| 86 |
+
assert second_call_kwargs['injection_strength'] == 0.0
|