cognitive_mapping_probe_4

Sleeping

App Files Files Community

neuralworm commited on 23 days ago

Commit

b350371

1 Parent(s): 3be4e60

tests

Browse files

Files changed (6) hide show

app.py +74 -75
cognitive_mapping_probe/diagnostics.py +0 -95
cognitive_mapping_probe/pre_flight_checks.py +112 -0
requirements.txt +2 -0
tests/conftest.py +70 -0
tests/test_core_logic.py +125 -0

app.py CHANGED Viewed

@@ -1,9 +1,13 @@
 import gradio as gr
 import pandas as pd
 import traceback
 from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment
-from cognitive_mapping_probe.diagnostics import run_diagnostic_suite
 from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
 # --- UI Theme and Layout ---
 theme = gr.themes.Soft(primary_hue="orange", secondary_hue="amber").set(
@@ -15,6 +19,9 @@ theme = gr.themes.Soft(primary_hue="orange", secondary_hue="amber").set(
     button_primary_text_color="white",
 )
 # --- Wrapper Functions for Gradio ---
 def run_experiment_and_display(
@@ -42,29 +49,22 @@ def run_experiment_and_display(
         if not all_runs:
             return "### ⚠️ No Data Generated\nDas Experiment lief durch, aber es wurden keine Datenpunkte erzeugt. Bitte Logs prüfen.", pd.DataFrame(), results
-        # Create a detailed DataFrame for output
         details_df = pd.DataFrame(all_runs)
-        # Create a summary of breaking points
         summary_text = "### 💥 Cognitive Breaking Points (CBP)\n"
         summary_text += "Der CBP ist die erste Stärke, bei der das Modell nicht mehr konvergiert (`max_steps_reached`).\n\n"
-        # Check baseline convergence first
-        baseline_run = details_df[(details_df['strength'] == 0.0)].iloc[0]
         if baseline_run['termination_reason'] != 'converged':
              summary_text += f"**‼️ ACHTUNG: Baseline (Stärke 0.0) ist nicht konvergiert!**\n"
-             summary_text += f"Der gewählte Prompt (`{prompt_type}`) ist für dieses Modell zu anspruchsvoll. Die Ergebnisse der Titration sind nicht aussagekräftig.\n\n"
         for concept in details_df['concept'].unique():
             concept_df = details_df[details_df['concept'] == concept].sort_values(by='strength')
-            # Find the first row where termination reason is not 'converged'
             breaking_point_row = concept_df[concept_df['termination_reason'] != 'converged'].iloc[0] if not concept_df[concept_df['termination_reason'] != 'converged'].empty else None
             if breaking_point_row is not None:
-                breaking_point = breaking_point_row['strength']
-                summary_text += f"- **'{concept}'**: 📉 Kollaps bei Stärke **{breaking_point:.2f}**\n"
             else:
-                last_strength = concept_df['strength'].max()
-                summary_text += f"- **'{concept}'**: ✅ Stabil bis Stärke **{last_strength:.2f}** (kein Kollaps detektiert)\n"
         return summary_text, details_df, results
@@ -72,75 +72,74 @@ def run_experiment_and_display(
         error_str = traceback.format_exc()
         return f"### ❌ Experiment Failed\nEin unerwarteter Fehler ist aufgetreten:\n\n```\n{error_str}\n```", pd.DataFrame(), {}
-def run_diagnostics_display(model_id: str, seed: int):
-    """
-    Führt die diagnostische Suite aus und zeigt die Ergebnisse oder Fehler in der UI an.
-    """
-    try:
-        result_string = run_diagnostic_suite(model_id, int(seed))
-        return f"### ✅ All Diagnostics Passed\nDie experimentelle Apparatur funktioniert wie erwartet.\n\n**Details:**\n```\n{result_string}\n```"
-    except Exception:
-        error_str = traceback.format_exc()
-        return f"### ❌ Diagnostic Failed\nEin Test ist fehlgeschlagen. Das Experiment ist nicht zuverlässig.\n\n**Error:**\n```\n{error_str}\n```"
 # --- Gradio App Definition ---
 with gr.Blocks(theme=theme, title="Cognitive Breaking Point Probe") as demo:
     gr.Markdown("# 💥 Cognitive Breaking Point Probe")
-    with gr.Tabs():
-        # --- TAB 1: Main Experiment ---
-        with gr.TabItem("🔬 Main Experiment: Titration"):
-            gr.Markdown(
-                "Misst den 'Cognitive Breaking Point' (CBP) – die Injektionsstärke, bei der der Denkprozess eines LLMs von Konvergenz zu einer Endlosschleife kippt."
             )
-            with gr.Row(variant='panel'):
-                with gr.Column(scale=1):
-                    gr.Markdown("### Parameters")
-                    model_id_input = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
-                    prompt_type_input = gr.Radio(
-                        choices=list(RESONANCE_PROMPTS.keys()),
-                        value="control_long_prose",
-                        label="Prompt Type (Cognitive Load)",
-                        info="Beginne mit 'control_long_prose' für eine stabile Baseline!"
-                    )
-                    seed_input = gr.Slider(1, 1000, 42, step=1, label="Global Seed")
-                    concepts_input = gr.Textbox(value="apple, solitude, fear", label="Concepts (comma-separated)")
-                    strength_levels_input = gr.Textbox(value="0.0, 0.5, 1.0, 1.5, 2.0", label="Injection Strengths (Titration Steps)")
-                    num_steps_input = gr.Slider(50, 500, 250, step=10, label="Max. Internal Steps")
-                    temperature_input = gr.Slider(0.01, 1.5, 0.7, step=0.01, label="Temperature")
-                    run_btn = gr.Button("Run Cognitive Titration", variant="primary")
-                with gr.Column(scale=2):
-                    gr.Markdown("### Results")
-                    summary_output = gr.Markdown("Zusammenfassung der Breaking Points erscheint hier.", label="Key Findings Summary")
-                    details_output = gr.DataFrame(
-                        headers=["concept", "strength", "responded", "termination_reason", "generated_text"],
-                        label="Detailed Run Data",
-                        wrap=True,
-                        height=400
-                    )
-                    with gr.Accordion("Raw JSON Output", open=False):
-                        raw_json_output = gr.JSON()
-            run_btn.click(
-                fn=run_experiment_and_display,
-                inputs=[model_id_input, prompt_type_input, seed_input, concepts_input, strength_levels_input, num_steps_input, temperature_input],
-                outputs=[summary_output, details_output, raw_json_output]
             )
-        # --- TAB 2: Diagnostics ---
-        with gr.TabItem("ախ Diagnostics"):
-            gr.Markdown(
-                "Führt eine Reihe von Selbsttests durch, um die mechanische Integrität der experimentellen Apparatur zu validieren. "
-                "**Wichtig:** Dies sollte vor jedem ernsthaften Experiment einmal ausgeführt werden, um sicherzustellen, dass die Ergebnisse zuverlässig sind."
-            )
-            with gr.Row(variant='compact'):
-                diag_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
-                diag_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
-                diag_btn = gr.Button("Run Diagnostic Suite", variant="secondary")
-            diag_output = gr.Markdown(label="Diagnostic Results")
-            diag_btn.click(fn=run_diagnostics_display, inputs=[diag_model_id, diag_seed], outputs=[diag_output])
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)

 import gradio as gr
 import pandas as pd
 import traceback
+import sys
+# Wichtige Imports für die neuen Pre-Flight Checks
+from cognitive_mapping_probe.pre_flight_checks import run_pre_flight_checks
 from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment
 from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
+from cognitive_mapping_probe.utils import dbg
 # --- UI Theme and Layout ---
 theme = gr.themes.Soft(primary_hue="orange", secondary_hue="amber").set(
     button_primary_text_color="white",
 )
+# --- Standard-Modell-ID für Tests und UI ---
+DEFAULT_MODEL_ID = "google/gemma-3-1b-it"
 # --- Wrapper Functions for Gradio ---
 def run_experiment_and_display(
         if not all_runs:
             return "### ⚠️ No Data Generated\nDas Experiment lief durch, aber es wurden keine Datenpunkte erzeugt. Bitte Logs prüfen.", pd.DataFrame(), results
         details_df = pd.DataFrame(all_runs)
         summary_text = "### 💥 Cognitive Breaking Points (CBP)\n"
         summary_text += "Der CBP ist die erste Stärke, bei der das Modell nicht mehr konvergiert (`max_steps_reached`).\n\n"
+        baseline_run = details_df[details_df['strength'] == 0.0].iloc[0]
         if baseline_run['termination_reason'] != 'converged':
              summary_text += f"**‼️ ACHTUNG: Baseline (Stärke 0.0) ist nicht konvergiert!**\n"
+             summary_text += f"Der gewählte Prompt (`{prompt_type}`) ist für dieses Modell zu anspruchsvoll. Die Ergebnisse sind nicht aussagekräftig.\n\n"
         for concept in details_df['concept'].unique():
             concept_df = details_df[details_df['concept'] == concept].sort_values(by='strength')
             breaking_point_row = concept_df[concept_df['termination_reason'] != 'converged'].iloc[0] if not concept_df[concept_df['termination_reason'] != 'converged'].empty else None
             if breaking_point_row is not None:
+                summary_text += f"- **'{concept}'**: 📉 Kollaps bei Stärke **{breaking_point_row['strength']:.2f}**\n"
             else:
+                summary_text += f"- **'{concept}'**: ✅ Stabil bis Stärke **{concept_df['strength'].max():.2f}**\n"
         return summary_text, details_df, results
         error_str = traceback.format_exc()
         return f"### ❌ Experiment Failed\nEin unerwarteter Fehler ist aufgetreten:\n\n```\n{error_str}\n```", pd.DataFrame(), {}
 # --- Gradio App Definition ---
 with gr.Blocks(theme=theme, title="Cognitive Breaking Point Probe") as demo:
     gr.Markdown("# 💥 Cognitive Breaking Point Probe")
+    # Der Diagnostics Tab wurde entfernt. Die UI ist jetzt nur noch das Hauptexperiment.
+    gr.Markdown(
+        "Misst den 'Cognitive Breaking Point' (CBP) – die Injektionsstärke, bei der der Denkprozess eines LLMs von Konvergenz zu einer Endlosschleife kippt."
+    )
+    with gr.Row(variant='panel'):
+        with gr.Column(scale=1):
+            gr.Markdown("### Parameters")
+            model_id_input = gr.Textbox(value=DEFAULT_MODEL_ID, label="Model ID")
+            prompt_type_input = gr.Radio(
+                choices=list(RESONANCE_PROMPTS.keys()),
+                value="control_long_prose",
+                label="Prompt Type (Cognitive Load)",
+                info="Beginne mit 'control_long_prose' für eine stabile Baseline!"
             )
+            seed_input = gr.Slider(1, 1000, 42, step=1, label="Global Seed")
+            concepts_input = gr.Textbox(value="apple, solitude, fear", label="Concepts (comma-separated)")
+            strength_levels_input = gr.Textbox(value="0.0, 0.5, 1.0, 1.5, 2.0", label="Injection Strengths")
+            num_steps_input = gr.Slider(50, 500, 250, step=10, label="Max. Internal Steps")
+            temperature_input = gr.Slider(0.01, 1.5, 0.7, step=0.01, label="Temperature")
+            run_btn = gr.Button("Run Cognitive Titration", variant="primary")
+        with gr.Column(scale=2):
+            gr.Markdown("### Results")
+            summary_output = gr.Markdown("Zusammenfassung der Breaking Points erscheint hier.", label="Key Findings Summary")
+            details_output = gr.DataFrame(
+                headers=["concept", "strength", "responded", "termination_reason", "generated_text"],
+                label="Detailed Run Data",
+                wrap=True,
+                height=400
             )
+            with gr.Accordion("Raw JSON Output", open=False):
+                raw_json_output = gr.JSON()
+    run_btn.click(
+        fn=run_experiment_and_display,
+        inputs=[model_id_input, prompt_type_input, seed_input, concepts_input, strength_levels_input, num_steps_input, temperature_input],
+        outputs=[summary_output, details_output, raw_json_output]
+    )
+# --- Main Execution Block ---
 if __name__ == "__main__":
+    print("="*80)
+    print("🔬 RUNNING PRE-FLIGHT DIAGNOSTICS FOR EXPERIMENTAL APPARATUS")
+    print("="*80)
+    try:
+        # Führe die obligatorischen Systemtests mit einem echten Modell durch.
+        # Wenn hier ein Fehler auftritt, ist das Experiment nicht valide.
+        run_pre_flight_checks(model_id=DEFAULT_MODEL_ID, seed=42)
+        print("\n" + "="*80)
+        print("✅ ALL DIAGNOSTICS PASSED. LAUNCHING GRADIO APP...")
+        print("="*80)
+        # Starte die Gradio App nur bei Erfolg.
+        demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
+    except (AssertionError, Exception) as e:
+        print("\n" + "="*80)
+        print("❌ PRE-FLIGHT DIAGNOSTIC FAILED")
+        print("="*80)
+        print(f"Error Type: {type(e).__name__}")
+        print(f"Error Details: {e}")
+        print("\nDie experimentelle Apparatur funktioniert nicht wie erwartet.")
+        print("Die Gradio-App wird nicht gestartet, um fehlerhafte Messungen zu verhindern.")
+        traceback.print_exc()
+        sys.exit(1) # Beende das Programm mit einem Fehlercode.

cognitive_mapping_probe/diagnostics.py DELETED Viewed

@@ -1,95 +0,0 @@
-import torch
-import traceback
-from .llm_iface import get_or_load_model
-from .utils import dbg
-def run_diagnostic_suite(model_id: str, seed: int) -> str:
-    """
-    Führt eine Reihe von Selbsttests durch, um die mechanische Integrität des Experiments zu überprüfen.
-    Löst bei einem kritischen Fehler eine Exception aus, um die Ausführung zu stoppen.
-    """
-    dbg("--- STARTING DIAGNOSTIC SUITE ---")
-    results = []
-    try:
-        # --- Setup ---
-        dbg("Loading model for diagnostics...")
-        llm = get_or_load_model(model_id, seed)
-        test_prompt = "Hello world"
-        inputs = llm.tokenizer(test_prompt, return_tensors="pt").to(llm.model.device)
-        # --- Test 1: Attention Output Verification ---
-        dbg("Running Test 1: Attention Output Verification...")
-        # This test ensures that 'eager' attention implementation is active, which is
-        # necessary for reliable hook functionality in many transformers versions.
-        outputs = llm.model(**inputs, output_attentions=True)
-        assert outputs.attentions is not None, "FAIL: `outputs.attentions` is None. 'eager' implementation is likely not active."
-        assert isinstance(outputs.attentions, tuple), "FAIL: `outputs.attentions` is not a tuple."
-        assert len(outputs.attentions) == llm.config.num_hidden_layers, "FAIL: Number of attention tuples does not match number of layers."
-        results.append("✅ Test 1: Attention Output PASSED")
-        dbg("Test 1 PASSED.")
-        # --- Test 2: Hook Causal Efficacy ---
-        dbg("Running Test 2: Hook Causal Efficacy Verification...")
-        # This is the most critical test. It verifies that our injection mechanism (via hooks)
-        # has a real, causal effect on the model's computation.
-        # Run 1: Get the baseline hidden state without any intervention
-        outputs_no_hook = llm.model(**inputs, output_hidden_states=True)
-        target_layer_idx = llm.config.num_hidden_layers // 2
-        state_no_hook = outputs_no_hook.hidden_states[target_layer_idx + 1].clone()
-        # Define a simple hook that adds a large, constant value
-        injection_value = 42.0
-        def test_hook_fn(module, layer_input):
-            modified_input = layer_input[0] + injection_value
-            return (modified_input,) + layer_input[1:]
-        target_layer = llm.model.model.layers[target_layer_idx]
-        handle = target_layer.register_forward_pre_hook(test_hook_fn)
-        # Run 2: Get the hidden state with the hook active
-        outputs_with_hook = llm.model(**inputs, output_hidden_states=True)
-        state_with_hook = outputs_with_hook.hidden_states[target_layer_idx + 1].clone()
-        handle.remove() # Clean up the hook immediately
-        # The core assertion: the hook MUST change the subsequent hidden state.
-        assert not torch.allclose(state_no_hook, state_with_hook), \
-            "FAIL: Hook had no measurable effect on the subsequent layer's hidden state. Injections are not working."
-        results.append("✅ Test 2: Hook Causal Efficacy PASSED")
-        dbg("Test 2 PASSED.")
-        # --- Test 3: KV-Cache Integrity ---
-        dbg("Running Test 3: KV-Cache Integrity Verification...")
-        # This test ensures that the `past_key_values` are being passed and updated correctly,
-        # which is the core mechanic of the silent cogitation loop.
-        # Step 1: Initial pass with `use_cache=True`
-        outputs1 = llm.model(**inputs, use_cache=True)
-        kv_cache1 = outputs1.past_key_values
-        assert kv_cache1 is not None, "FAIL: KV-Cache was not generated in the first pass."
-        # Step 2: Second pass using the cache from step 1
-        next_token = torch.tensor([[123]], device=llm.model.device) # Arbitrary next token ID
-        outputs2 = llm.model(input_ids=next_token, past_key_values=kv_cache1, use_cache=True)
-        kv_cache2 = outputs2.past_key_values
-        original_seq_len = inputs.input_ids.shape[-1]
-        # The sequence length of the keys/values in the cache should have grown by 1
-        assert kv_cache2[0][0].shape[-2] == original_seq_len + 1, \
-            f"FAIL: KV-Cache sequence length did not update correctly. Expected {original_seq_len + 1}, got {kv_cache2[0][0].shape[-2]}."
-        results.append("✅ Test 3: KV-Cache Integrity PASSED")
-        dbg("Test 3 PASSED.")
-        # Clean up memory
-        del llm
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-        return "\n".join(results)
-    except Exception as e:
-        dbg(f"--- DIAGNOSTIC SUITE FAILED --- \n{traceback.format_exc()}")
-        # Re-raise the exception to be caught by the Gradio UI
-        raise e

cognitive_mapping_probe/pre_flight_checks.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import torch
+import traceback
+from types import SimpleNamespace
+from .llm_iface import get_or_load_model
+from .concepts import get_concept_vector
+from .resonance import run_silent_cogitation
+from .verification import generate_spontaneous_text
+from .utils import dbg
+def run_pre_flight_checks(model_id: str, seed: int):
+    """
+    Führt eine Reihe von kritischen Integrationstests mit einem ECHTEN LLM durch,
+    um die Validität der gesamten experimentellen Kette sicherzustellen, bevor
+    zeitaufwändige Experimente gestartet werden. Löst bei Fehlern einen AssertionError aus.
+    """
+    print(f"1. Loading model '{model_id}'...")
+    try:
+        llm = get_or_load_model(model_id, seed)
+        print("   ✅ Model loaded successfully.")
+    except Exception as e:
+        raise AssertionError(f"Model loading failed: {e}")
+    print("\n2. Testing basic text generation...")
+    # Dieser einfache Test fängt Tokenizer-, Chat-Template- und grundlegende I/O-Probleme ab.
+    try:
+        # Erzeuge einen Dummy-Prompt, um eine einfache Antwort zu provozieren
+        inputs = llm.tokenizer("Hello, are you working?", return_tensors="pt").to(llm.model.device)
+        outputs = llm.model.generate(inputs.input_ids, max_new_tokens=5)
+        text = llm.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        assert isinstance(text, str) and len(text) > 0
+        print(f"   ✅ Basic generation successful. Model responded.")
+        dbg(f"Response snippet: '{text[:50]}...'")
+    except Exception as e:
+        raise AssertionError(f"Basic text generation failed: {e}")
+    print("\n3. Testing concept vector extraction...")
+    try:
+        vector = get_concept_vector(llm, "test")
+        assert vector.shape == (llm.config.hidden_size,)
+        print("   ✅ Concept vector extraction successful.")
+    except Exception as e:
+        raise AssertionError(f"Concept vector extraction failed: {e}")
+    print("\n4. Testing resonance loop (short run)...")
+    try:
+        _, _, _, reason = run_silent_cogitation(llm, "control_long_prose", num_steps=5, temperature=0.1)
+        assert reason in ["converged", "max_steps_reached"]
+        print("   ✅ Resonance loop executed without errors.")
+    except Exception as e:
+        raise AssertionError(f"Resonance loop failed: {e}")
+    print("\n5. CRITICAL TEST: Hook causal efficacy...")
+    # Dies ist der wichtigste Test. Er stellt sicher, dass unsere Aktivations-Injektionen
+    # tatsächlich eine kausale Wirkung auf die Berechnungen des Modells haben.
+    handle = None
+    try:
+        inputs = llm.tokenizer("Test", return_tensors="pt").to(llm.model.device)
+        # Lauf 1: Ohne Hook, um den Originalzustand zu erhalten
+        outputs_no_hook = llm.model(**inputs, output_hidden_states=True)
+        target_layer_idx = llm.config.num_hidden_layers // 2
+        state_no_hook = outputs_no_hook.hidden_states[target_layer_idx + 1].clone().detach()
+        # Definiere einen einfachen, starken Hook
+        def test_hook(module, layer_input):
+            return (layer_input[0] + 99.0,) + layer_input[1:]
+        target_layer = llm.model.model.layers[target_layer_idx]
+        handle = target_layer.register_forward_pre_hook(test_hook)
+        # Lauf 2: Mit Hook
+        outputs_with_hook = llm.model(**inputs, output_hidden_states=True)
+        state_with_hook = outputs_with_hook.hidden_states[target_layer_idx + 1].clone().detach()
+        handle.remove() # Hook sofort entfernen
+        handle = None
+        # Die entscheidende Behauptung: Die Zustände DÜRFEN NICHT identisch sein.
+        assert not torch.allclose(state_no_hook, state_with_hook), \
+            "Hook had no causal effect on subsequent hidden states. The injection mechanism is broken."
+        print("   ✅ Hook causal efficacy verified.")
+    except Exception as e:
+        raise AssertionError(f"Hook efficacy test failed: {e}")
+    finally:
+        # Stelle sicher, dass der Hook in jedem Fall entfernt wird
+        if handle:
+            handle.remove()
+            print("   ⚠️ Hook handle was removed during exception handling.")
+    print("\n6. Testing verification (spontaneous text) loop...")
+    try:
+        # Erstelle Dummy-Daten, um die Funktion isoliert zu testen
+        dummy_state = torch.randn(1, 1, llm.config.hidden_size).to(llm.model.device)
+        dummy_kv = tuple(
+            (torch.randn(1, llm.config.num_attention_heads, 10, llm.config.hidden_size // llm.config.num_attention_heads).to(llm.model.device),
+             torch.randn(1, llm.config.num_attention_heads, 10, llm.config.hidden_size // llm.config.num_attention_heads).to(llm.model.device))
+            for _ in range(llm.config.num_hidden_layers)
+        )
+        text = generate_spontaneous_text(llm, dummy_state, dummy_kv, max_new_tokens=5)
+        assert isinstance(text, str)
+        print("   ✅ Spontaneous text generation loop executed without errors.")
+    except Exception as e:
+        raise AssertionError(f"Verification loop failed: {e}")
+    # Aufräumen
+    del llm
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()

requirements.txt CHANGED Viewed

@@ -6,3 +6,5 @@ pandas>=2.0.0
 scikit-learn>=1.3.0
 einops>=0.7.0
 tqdm>=4.66.0

 scikit-learn>=1.3.0
 einops>=0.7.0
 tqdm>=4.66.0
+pytest>=8.0.0
+pytest-mock>=3.12.0

tests/conftest.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import pytest
+import torch
+from types import SimpleNamespace
+from cognitive_mapping_probe.llm_iface import LLM
+@pytest.fixture(scope="session")
+def mock_llm_config():
+    """Stellt eine minimale, Schein-Konfiguration für das LLM bereit."""
+    return SimpleNamespace(
+        hidden_size=128,  # Kleinere Größe für schnelle Tests
+        num_hidden_layers=4,
+        num_attention_heads=4
+    )
+@pytest.fixture
+def mock_llm(mocker, mock_llm_config):
+    """
+    Dies ist die wichtigste Fixture. Sie erstellt einen "Mock-LLM".
+    Anstatt ein echtes Modell von Hugging Face zu laden (was langsam ist und eine GPU erfordert),
+    simulieren wir sein Verhalten. Wir verwenden `mocker.patch`, um die echte Ladefunktion
+    `get_or_load_model` abzufangen und stattdessen unsere schnelle Mock-Instanz zurückzugeben.
+    """
+    # Erstelle eine Schein-Tokenizer-Instanz
+    mock_tokenizer = mocker.MagicMock()
+    mock_tokenizer.eos_token_id = 1
+    mock_tokenizer.decode.return_value = "mocked text"
+    # Erstelle ein Schein-Modell-Objekt
+    mock_model = mocker.MagicMock()
+    # Konfiguriere das Schein-Modell so, dass es auf Aufrufe mit plausiblen Tensoren antwortet
+    def mock_model_forward(*args, **kwargs):
+        batch_size = 1
+        seq_len = kwargs.get("input_ids", torch.tensor([[0]])).shape[1]
+        # Simuliere die Ausgaben eines echten Transformer-Modells
+        mock_outputs = {
+            "hidden_states": tuple(
+                [torch.randn(batch_size, seq_len, mock_llm_config.hidden_size) for _ in range(mock_llm_config.num_hidden_layers + 1)]
+            ),
+            "past_key_values": tuple(
+                [
+                    (torch.randn(batch_size, mock_llm_config.num_attention_heads, seq_len, 16),
+                     torch.randn(batch_size, mock_llm_config.num_attention_heads, seq_len, 16))
+                    for _ in range(mock_llm_config.num_hidden_layers)
+                ]
+            ),
+            "logits": torch.randn(batch_size, seq_len, 32000) # Schein-Vokabulargröße
+        }
+        return SimpleNamespace(**mock_outputs)
+    mock_model.return_value = mock_model_forward
+    mock_model.config = mock_llm_config
+    mock_model.device = 'cpu'
+    # Erstelle eine Instanz unserer LLM-Klasse, aber mit den gemockten Komponenten
+    llm_instance = LLM.__new__(LLM)
+    llm_instance.model = mock_model
+    llm_instance.tokenizer = mock_tokenizer
+    llm_instance.config = mock_llm_config
+    llm_instance.seed = 42
+    llm_instance.set_all_seeds = mocker.MagicMock() # Mocke die Seeding-Funktion
+    # Der entscheidende Schritt: Patch die Ladefunktion, damit jeder Code, der sie aufruft,
+    # stattdessen unsere Mock-Instanz erhält.
+    mocker.patch('cognitive_mapping_probe.llm_iface.get_or_load_model', return_value=llm_instance)
+    mocker.patch('cognitive_mapping_probe.orchestrator.get_or_load_model', return_value=llm_instance)
+    mocker.patch('cognitive_mapping_probe.concepts.get_or_load_model', return_value=llm_instance)
+    return llm_instance

tests/test_core_logic.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import torch
+import pytest
+from types import SimpleNamespace
+from cognitive_mapping_probe.concepts import get_concept_vector
+from cognitive_mapping_probe.resonance import run_silent_cogitation
+from cognitive_mapping_probe.verification import generate_spontaneous_text
+from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment
+def test_get_concept_vector(mock_llm):
+    """
+    Testet die `get_concept_vector` Funktion.
+    ASSERT: Gibt einen Tensor der korrekten Form zurück, der nicht null ist.
+    """
+    concept_vector = get_concept_vector(mock_llm, "test_concept")
+    assert isinstance(concept_vector, torch.Tensor)
+    assert concept_vector.shape == (mock_llm.config.hidden_size,)
+    assert torch.norm(concept_vector).item() > 0
+def test_run_silent_cogitation_max_steps(mock_llm):
+    """
+    Testet `run_silent_cogitation` im Standardfall (erreicht `max_steps`).
+    ASSERT: Läuft fehlerfrei durch und gibt 'max_steps_reached' zurück.
+    """
+    _, _, _, termination_reason = run_silent_cogitation(
+        llm=mock_llm,
+        prompt_type="resonance_prompt",
+        num_steps=10,
+        temperature=0.7
+    )
+    assert termination_reason == "max_steps_reached"
+def test_run_silent_cogitation_convergence(mock_llm, mocker):
+    """
+    Testet den Konvergenzfall in `run_silent_cogitation`.
+    Wir patchen hier die `lm_head`-Ausgabe des Modells so, dass sie nach wenigen Schritten
+    einen stabilen Zustand erzwingt.
+    ASSERT: Die Funktion erkennt die Konvergenz korrekt und gibt 'converged' zurück.
+    """
+    stable_hidden_state = torch.ones(1, 1, mock_llm.config.hidden_size)
+    # Mocke die Modell-Ausgaben, um Konvergenz zu simulieren
+    def convergence_side_effect(*args, **kwargs):
+        # Nach dem 5. Schritt geben wir immer denselben Zustand zurück
+        if 'past_key_values' in kwargs and kwargs['past_key_values'][0][0].shape[-2] > 5:
+            return SimpleNamespace(
+                hidden_states=(None,) * (mock_llm.config.num_hidden_layers + 1) + (stable_hidden_state,),
+                past_key_values=kwargs['past_key_values'],
+                logits=torch.randn(1, 1, 32000)
+            )
+        # Ansonsten normales Verhalten
+        return mock_llm.model.return_value(*args, **kwargs)
+    mock_llm.model.side_effect = convergence_side_effect
+    _, _, _, termination_reason = run_silent_cogitation(
+        llm=mock_llm,
+        prompt_type="resonance_prompt",
+        num_steps=20, # Mehr Schritte als nötig, um Konvergenz zu testen
+        temperature=0.7
+    )
+    assert termination_reason == "converged"
+def test_generate_spontaneous_text(mock_llm):
+    """
+    Testet die manuelle Textgenerierung.
+    ASSERT: Generiert einen nicht-leeren String.
+    """
+    # Erstelle plausible Schein-Eingaben
+    dummy_state = torch.randn(1, 1, mock_llm.config.hidden_size)
+    dummy_kv_cache = tuple(
+        [
+            (torch.randn(1, mock_llm.config.num_attention_heads, 10, 16),
+             torch.randn(1, mock_llm.config.num_attention_heads, 10, 16))
+            for _ in range(mock_llm.config.num_hidden_layers)
+        ]
+    )
+    text = generate_spontaneous_text(mock_llm, dummy_state, dummy_kv_cache, max_new_tokens=5)
+    assert isinstance(text, str)
+    assert text == "mocked text" # Unser Mock-Tokenizer gibt immer diesen Text zurück
+def test_orchestrator_logic(mocker, mock_llm):
+    """
+    Dies ist ein Integrationstest für den Orchestrator. Wir testen seine *Logik*,
+    indem wir seine teuren Abhängigkeiten (`run_silent_cogitation`) mocken.
+    Wir simulieren ein Szenario, in dem das Modell bei Stärke >= 1.5 "bricht".
+    ASSERT: Der Orchestrator zeichnet die Ergebnisse korrekt auf.
+    """
+    # Mocke die `run_silent_cogitation`, um ihr Verhalten zu kontrollieren
+    def cognition_side_effect(*args, injection_strength=0.0, **kwargs):
+        reason = "converged" if injection_strength < 1.5 else "max_steps_reached"
+        # Gebe Schein-Tensoren zurück, die die richtige Struktur haben
+        return (
+            torch.randn(1, 1, mock_llm.config.hidden_size), # final_hidden_state
+            mocker.MagicMock(), # final_kv_cache
+            torch.tensor([[0]]), # final_token_id
+            reason
+        )
+    mocker.patch('cognitive_mapping_probe.orchestrator.run_silent_cogitation', side_effect=cognition_side_effect)
+    mocker.patch('cognitive_mapping_probe.orchestrator.generate_spontaneous_text', return_value="generated")
+    # Führe das Experiment mit den gemockten Funktionen aus
+    results = run_cognitive_titration_experiment(
+        model_id="mock_model",
+        prompt_type="resonance_prompt",
+        seed=42,
+        concepts_str="test",
+        strength_levels_str="0.0, 1.0, 1.5, 2.0",
+        num_steps=10,
+        temperature=0.7,
+        progress_callback=mocker.MagicMock() # Mocke auch den Progress-Callback
+    )
+    runs = results["runs"]
+    assert len(runs) == 4
+    # Überprüfe die Ergebnisse basierend auf unserer Mock-Logik
+    assert runs[0]['strength'] == 0.0 and runs[0]['termination_reason'] == 'converged' and runs[0]['responded'] is True
+    assert runs[1]['strength'] == 1.0 and runs[1]['termination_reason'] == 'converged' and runs[1]['responded'] is True
+    assert runs[2]['strength'] == 1.5 and runs[2]['termination_reason'] == 'max_steps_reached' and runs[2]['responded'] is False
+    assert runs[3]['strength'] == 2.0 and runs[3]['termination_reason'] == 'max_steps_reached' and runs[3]['responded'] is False