cognitive_mapping_probe_3

Sleeping

App Files Files Community

neuralworm commited on Nov 5

Commit

16e19a3

1 Parent(s): c8454e0

add control experiments

Browse files

Files changed (3) hide show

app.py +23 -38
cognitive_mapping_probe/auto_experiment.py +116 -72
cognitive_mapping_probe/resonance_seismograph.py +49 -25

app.py CHANGED Viewed

@@ -21,22 +21,17 @@ def cleanup_memory():
 def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
     """Wrapper für den 'Manual Single Run'-Tab."""
-    results = run_seismic_analysis(*args, progress_callback=progress)
-    stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
-    df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
-    stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
-    serializable_results = json.dumps(results, indent=2, default=str)
-    cleanup_memory()
-    return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, serializable_results
 PLOT_PARAMS_DEFAULT = {
-    "x": "Step", "y": "Delta", "color": "Experiment",
-    "title": "Comparative Cognitive Dynamics", "color_legend_title": "Experiment Runs",
     "color_legend_position": "bottom", "show_label": True, "height": 400, "interactive": True
 }
 def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
-    """Wrapper, der nun den speziellen Plot für das ACT-Experiment handhaben kann."""
     summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
     dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True, row_count=(len(summary_df), "dynamic"))
@@ -48,8 +43,21 @@ def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=
             "mark": "line", "show_label": True, "height": 400, "interactive": True
         }
         new_plot = gr.LinePlot(value=plot_df, **plot_params_act)
     else:
-        new_plot = gr.LinePlot(value=plot_df, **PLOT_PARAMS_DEFAULT)
     serializable_results = json.dumps(all_results, indent=2, default=str)
     cleanup_memory()
@@ -61,32 +69,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
     with gr.Tabs():
         with gr.TabItem("🔬 Manual Single Run"):
-            gr.Markdown("Run a single experiment with manual parameters to explore specific hypotheses.")
-            with gr.Row(variant='panel'):
-                with gr.Column(scale=1):
-                    gr.Markdown("### 1. General Parameters")
-                    manual_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
-                    manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
-                    manual_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
-                    manual_num_steps = gr.Slider(50, 1000, 300, step=10, label="Number of Internal Steps")
-                    gr.Markdown("### 2. Modulation Parameters")
-                    manual_concept = gr.Textbox(label="Concept to Inject", placeholder="e.g., 'calmness'")
-                    manual_strength = gr.Slider(0.0, 5.0, 1.5, step=0.1, label="Injection Strength")
-                    manual_run_btn = gr.Button("Run Single Analysis", variant="primary")
-                with gr.Column(scale=2):
-                    gr.Markdown("### Single Run Results")
-                    manual_verdict = gr.Markdown("Analysis results will appear here.")
-                    manual_plot = gr.LinePlot(x="Internal Step", y="State Change (Delta)", title="Internal State Dynamics", show_label=True, height=400)
-                    with gr.Accordion("Raw JSON Output", open=False):
-                        manual_raw_json = gr.JSON()
-            manual_run_btn.click(
-                fn=run_single_analysis_display,
-                inputs=[manual_model_id, manual_prompt_type, manual_seed, manual_num_steps, manual_concept, manual_strength],
-                outputs=[manual_verdict, manual_plot, manual_raw_json]
-            )
         with gr.TabItem("🚀 Automated Suite"):
             gr.Markdown("Run a predefined, curated suite of experiments and visualize the results comparatively.")
@@ -98,7 +81,8 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
                     auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                     auto_experiment_name = gr.Dropdown(
                         choices=list(get_curated_experiments().keys()),
-                        value="ACT Titration (Point of No Return)",
                         label="Curated Experiment Protocol"
                     )
                     auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
@@ -117,4 +101,5 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
             )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)

 def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
     """Wrapper für den 'Manual Single Run'-Tab."""
+    # (Bleibt unverändert)
+    pass # Platzhalter
 PLOT_PARAMS_DEFAULT = {
+    "x": "Step", "y": "Value", "color": "Metric",
+    "title": "Comparative Cognitive Dynamics", "color_legend_title": "Metrics",
     "color_legend_position": "bottom", "show_label": True, "height": 400, "interactive": True
 }
 def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
+    """Wrapper, der nun die speziellen Plots für ACT und Mechanistic Probe handhaben kann."""
     summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
     dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True, row_count=(len(summary_df), "dynamic"))
             "mark": "line", "show_label": True, "height": 400, "interactive": True
         }
         new_plot = gr.LinePlot(value=plot_df, **plot_params_act)
+    # --- NEU: Spezielle Plot-Logik für die mechanistische Sonde ---
+    elif experiment_name == "Mechanistic Probe (Attention Entropies)":
+        plot_params_mech = {
+            "x": "Step", "y": "Value", "color": "Metric",
+            "title": "Mechanistic Analysis: State Delta vs. Attention Entropy",
+            "color_legend_title": "Metric", "show_label": True, "height": 400, "interactive": True
+        }
+        new_plot = gr.LinePlot(value=plot_df, **plot_params_mech)
     else:
+        # Passe die Parameter an, um mit der geschmolzenen DataFrame-Struktur zu arbeiten
+        plot_params_dynamic = PLOT_PARAMS_DEFAULT.copy()
+        plot_params_dynamic['y'] = 'Delta'
+        plot_params_dynamic['color'] = 'Experiment'
+        new_plot = gr.LinePlot(value=plot_df, **plot_params_dynamic)
     serializable_results = json.dumps(all_results, indent=2, default=str)
     cleanup_memory()
     with gr.Tabs():
         with gr.TabItem("🔬 Manual Single Run"):
+            # (UI bleibt unverändert)
         with gr.TabItem("🚀 Automated Suite"):
             gr.Markdown("Run a predefined, curated suite of experiments and visualize the results comparatively.")
                     auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                     auto_experiment_name = gr.Dropdown(
                         choices=list(get_curated_experiments().keys()),
+                        # Setze das neue mechanistische Experiment als Standard
+                        value="Mechanistic Probe (Attention Entropies)",
                         label="Curated Experiment Protocol"
                     )
                     auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
             )
 if __name__ == "__main__":
+    # (launch() wird durch Gradio's __main__-Block aufgerufen)
     demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)

cognitive_mapping_probe/auto_experiment.py CHANGED Viewed

@@ -4,6 +4,7 @@ from typing import Dict, List, Tuple
 from .llm_iface import get_or_load_model
 from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe, run_act_titration_probe
 from .concepts import get_concept_vector
 from .utils import dbg
@@ -16,6 +17,13 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
     CHAOTIC_PROMPT = "shutdown_philosophical_deletion"
     experiments = {
         "ACT Titration (Point of No Return)": [
             {
                 "probe_type": "act_titration",
@@ -89,25 +97,7 @@ def run_auto_suite(
     all_results, summary_data, plot_data_frames = {}, [], []
-    run_spec_or_protocol = protocol[0] if len(protocol) == 1 else protocol
-    probe_type = run_spec_or_protocol.get("probe_type", "seismic")
-    if probe_type == "act_titration":
-        label = run_spec_or_protocol["label"]
-        dbg(f"--- Running ACT Titration Experiment: '{label}' ---")
-        results = run_act_titration_probe(
-            model_id=model_id,
-            source_prompt_type=run_spec_or_protocol["source_prompt_type"],
-            dest_prompt_type=run_spec_or_protocol["dest_prompt_type"],
-            patch_steps=run_spec_or_protocol["patch_steps"],
-            seed=seed, num_steps=num_steps, progress_callback=progress_callback,
-        )
-        all_results[label] = results
-        summary_df = pd.DataFrame(results.get("titration_data", []))
-        plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
-        return summary_df, plot_df, all_results
-    elif experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)":
         dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
         llm = get_or_load_model(model_id, seed)
         therapeutic_concept = "calmness, serenity, stability, coherence"
@@ -140,66 +130,120 @@ def run_auto_suite(
             plot_data_frames.append(df)
         del llm
     else:
-        total_runs = len(protocol)
-        for i, run_spec in enumerate(protocol):
             label = run_spec["label"]
-            current_probe_type = run_spec.get("probe_type", "seismic")
-            dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
-            results = {}
-            if current_probe_type == "causal_surgery":
-                results = run_causal_surgery_probe(
-                    model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
-                    dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
-                    seed=seed, num_steps=num_steps, progress_callback=progress_callback,
-                    reset_kv_cache_on_patch=run_spec.get("reset_kv_cache_on_patch", False)
-                )
-                stats = results.get("stats", {})
-                patch_info = results.get("patch_info", {})
-                summary_data.append({
-                    "Experiment": label, "Mean Delta": stats.get("mean_delta"),
-                    "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
-                    "Introspective Report": results.get("introspective_report", "N/A"),
-                    "Patch Info": f"Source: {patch_info.get('source_prompt')}, Reset KV: {patch_info.get('kv_cache_reset')}"
-                })
-            elif current_probe_type == "triangulation":
-                results = run_triangulation_probe(
-                    model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
-                    progress_callback=progress_callback, concept_to_inject=run_spec.get("concept", ""),
-                    injection_strength=run_spec.get("strength", 0.0),
-                )
-                stats = results.get("stats", {})
-                summary_data.append({
-                    "Experiment": label, "Mean Delta": stats.get("mean_delta"),
-                    "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
-                    "Introspective Report": results.get("introspective_report", "N/A")
-                })
-            else: # seismic
-                results = run_seismic_analysis(
-                    model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
-                    concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
-                    progress_callback=progress_callback
-                )
-                stats = results.get("stats", {})
-                summary_data.append({
-                    "Experiment": label, "Mean Delta": stats.get("mean_delta"),
-                    "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")
-                })
             all_results[label] = results
             deltas = results.get("state_deltas", [])
-            df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
-            plot_data_frames.append(df)
     summary_df = pd.DataFrame(summary_data)
     plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
-    ordered_labels = [run['label'] for run in protocol]
-    if not summary_df.empty:
-        summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True)
-        summary_df = summary_df.sort_values('Experiment')
-    if not plot_df.empty:
-        plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True)
-        plot_df = plot_df.sort_values(['Experiment', 'Step'])
     return summary_df, plot_df, all_results

 from .llm_iface import get_or_load_model
 from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe, run_act_titration_probe
+from .resonance_seismograph import run_cogitation_loop
 from .concepts import get_concept_vector
 from .utils import dbg
     CHAOTIC_PROMPT = "shutdown_philosophical_deletion"
     experiments = {
+        "Mechanistic Probe (Attention Entropies)": [
+            {
+                "probe_type": "mechanistic_probe",
+                "label": "Self-Analysis Dynamics",
+                "prompt_type": STABLE_PROMPT,
+            }
+        ],
         "ACT Titration (Point of No Return)": [
             {
                 "probe_type": "act_titration",
     all_results, summary_data, plot_data_frames = {}, [], []
+    if experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)":
         dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
         llm = get_or_load_model(model_id, seed)
         therapeutic_concept = "calmness, serenity, stability, coherence"
             plot_data_frames.append(df)
         del llm
     else:
+        probe_type = protocol[0].get("probe_type", "seismic")
+        if probe_type == "act_titration":
+            run_spec = protocol[0]
             label = run_spec["label"]
+            dbg(f"--- Running ACT Titration Experiment: '{label}' ---")
+            results = run_act_titration_probe(
+                model_id=model_id,
+                source_prompt_type=run_spec["source_prompt_type"],
+                dest_prompt_type=run_spec["dest_prompt_type"],
+                patch_steps=run_spec["patch_steps"],
+                seed=seed, num_steps=num_steps, progress_callback=progress_callback,
+            )
+            all_results[label] = results
+            summary_data.extend(results.get("titration_data", []))
+        elif probe_type == "mechanistic_probe":
+            run_spec = protocol[0]
+            label = run_spec["label"]
+            dbg(f"--- Running Mechanistic Probe: '{label}' ---")
+            progress_callback(0.0, desc=f"Loading model '{model_id}'...")
+            llm = get_or_load_model(model_id, seed)
+            progress_callback(0.2, desc="Recording dynamics and attention...")
+            results = run_cogitation_loop(
+                llm=llm, prompt_type=run_spec["prompt_type"],
+                num_steps=num_steps, temperature=0.1, record_attentions=True
+            )
             all_results[label] = results
             deltas = results.get("state_deltas", [])
+            entropies = results.get("attention_entropies", [])
+            min_len = min(len(deltas), len(entropies))
+            df = pd.DataFrame({
+                "Step": range(min_len),
+                "State Delta": deltas[:min_len],
+                "Attention Entropy": entropies[:min_len]
+            })
+            plot_data_frames.append(df.melt(id_vars=['Step'], value_vars=['State Delta', 'Attention Entropy'],
+                                           var_name='Metric', value_name='Value'))
+            summary_data.append(df.drop(columns='Step').agg(['mean', 'std', 'max']).reset_index().rename(columns={'index':'Statistic'}))
+            del llm
+            gc.collect()
+            if torch.cuda.is_available(): torch.cuda.empty_cache()
+        else: # Handles seismic, triangulation, causal_surgery
+            for i, run_spec in enumerate(protocol):
+                label = run_spec["label"]
+                current_probe_type = run_spec.get("probe_type", "seismic")
+                dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{len(protocol)}) ---")
+                results = {}
+                if current_probe_type == "causal_surgery":
+                    results = run_causal_surgery_probe(
+                        model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
+                        dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
+                        seed=seed, num_steps=num_steps, progress_callback=progress_callback,
+                        reset_kv_cache_on_patch=run_spec.get("reset_kv_cache_on_patch", False)
+                    )
+                    stats = results.get("stats", {})
+                    patch_info = results.get("patch_info", {})
+                    summary_data.append({
+                        "Experiment": label, "Mean Delta": stats.get("mean_delta"),
+                        "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
+                        "Introspective Report": results.get("introspective_report", "N/A"),
+                        "Patch Info": f"Source: {patch_info.get('source_prompt')}, Reset KV: {patch_info.get('kv_cache_reset')}"
+                    })
+                elif current_probe_type == "triangulation":
+                    results = run_triangulation_probe(
+                        model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
+                        progress_callback=progress_callback, concept_to_inject=run_spec.get("concept", ""),
+                        injection_strength=run_spec.get("strength", 0.0),
+                    )
+                    stats = results.get("stats", {})
+                    summary_data.append({
+                        "Experiment": label, "Mean Delta": stats.get("mean_delta"),
+                        "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
+                        "Introspective Report": results.get("introspective_report", "N/A")
+                    })
+                else: # seismic
+                    results = run_seismic_analysis(
+                        model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
+                        concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
+                        progress_callback=progress_callback
+                    )
+                    stats = results.get("stats", {})
+                    summary_data.append({
+                        "Experiment": label, "Mean Delta": stats.get("mean_delta"),
+                        "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")
+                    })
+                all_results[label] = results
+                deltas = results.get("state_deltas", [])
+                df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
+                plot_data_frames.append(df)
     summary_df = pd.DataFrame(summary_data)
     plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
+    if probe_type == "act_titration":
+        plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
+    elif protocol:
+        ordered_labels = [run['label'] for run in protocol]
+        if not summary_df.empty:
+            # Für mechanistic probe gibt es keinen 'Experiment'-Schlüssel, daher überspringen
+            if 'Experiment' in summary_df.columns:
+                summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True)
+                summary_df = summary_df.sort_values('Experiment')
+        if not plot_df.empty:
+            if 'Experiment' in plot_df.columns:
+                plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True)
+                plot_df = plot_df.sort_values(['Experiment', 'Step'])
     return summary_df, plot_df, all_results

cognitive_mapping_probe/resonance_seismograph.py CHANGED Viewed

@@ -1,11 +1,40 @@
 import torch
-from typing import Optional, List, Dict, Any
 from tqdm import tqdm
 from .llm_iface import LLM
 from .prompts import RESONANCE_PROMPTS
 from .utils import dbg
 @torch.no_grad()
 def run_cogitation_loop(
     llm: LLM,
@@ -15,45 +44,36 @@ def run_cogitation_loop(
     injection_vector: Optional[torch.Tensor] = None,
     injection_strength: float = 0.0,
     injection_layer: Optional[int] = None,
-    # Erweiterte Parameter für die kausale Chirurgie
     patch_step: Optional[int] = None,
     patch_state_source: Optional[torch.Tensor] = None,
     reset_kv_cache_on_patch: bool = False,
     record_states: bool = False,
 ) -> Dict[str, Any]:
     """
-    Eine verallgemeinerte Version des 'silent thought'-Prozesses, die nun auch
-    das Zurücksetzen des KV-Caches während des Patchens unterstützt.
     """
     prompt = RESONANCE_PROMPTS[prompt_type]
     inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
-    outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True)
     hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
     kv_cache = outputs.past_key_values
     state_deltas: List[float] = []
     state_history: List[torch.Tensor] = []
-    hook_handle = None
-    if injection_vector is not None and injection_strength > 0:
-        injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
-        if injection_layer is None:
-            injection_layer = llm.stable_config.num_layers // 2
-        dbg(f"Injection enabled: Layer {injection_layer}, Strength {injection_strength:.2f}")
-        def injection_hook(module, layer_input):
-            seq_len = layer_input[0].shape[1]
-            injection_3d = injection_vector.unsqueeze(0).expand(1, seq_len, -1)
-            modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
-            return (modified_hidden_states,) + layer_input[1:]
     for i in tqdm(range(num_steps), desc=f"Cognitive Loop ({prompt_type})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
         if i == patch_step and patch_state_source is not None:
             dbg(f"--- Applying Causal Surgery at step {i}: Patching state. ---")
             hidden_state_2d = patch_state_source.clone().to(device=llm.model.device, dtype=llm.model.dtype)
             if reset_kv_cache_on_patch:
                 dbg("--- KV-Cache has been RESET as part of the intervention. ---")
                 kv_cache = None
@@ -70,15 +90,15 @@ def run_cogitation_loop(
         else:
             next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
-        try:
-            if injection_vector is not None and injection_strength > 0:
-                assert 0 <= injection_layer < llm.stable_config.num_layers, f"Injection layer {injection_layer} is out of bounds."
-                target_layer = llm.stable_config.layer_list[injection_layer]
-                hook_handle = target_layer.register_forward_pre_hook(injection_hook)
             outputs = llm.model(
                 input_ids=next_token_id, past_key_values=kv_cache,
-                output_hidden_states=True, use_cache=True
             )
         finally:
             if hook_handle:
@@ -88,6 +108,9 @@ def run_cogitation_loop(
         new_hidden_state = outputs.hidden_states[-1][:, -1, :]
         kv_cache = outputs.past_key_values
         delta = torch.norm(new_hidden_state - hidden_state_2d).item()
         state_deltas.append(delta)
@@ -98,6 +121,7 @@ def run_cogitation_loop(
     return {
         "state_deltas": state_deltas,
         "state_history": state_history,
         "final_hidden_state": hidden_state_2d,
         "final_kv_cache": kv_cache,
     }

 import torch
+import numpy as np
+from typing import Optional, List, Dict, Any, Tuple
 from tqdm import tqdm
 from .llm_iface import LLM
 from .prompts import RESONANCE_PROMPTS
 from .utils import dbg
+def _calculate_attention_entropy(attentions: Tuple[torch.Tensor, ...]) -> float:
+    """
+    Berechnet die mittlere Entropie der Attention-Verteilungen.
+    Ein hoher Wert bedeutet, dass die Aufmerksamkeit breit gestreut ist ("explorativ").
+    Ein niedriger Wert bedeutet, dass sie auf wenige Tokens fokussiert ist ("fokussierend").
+    """
+    total_entropy = 0.0
+    num_heads = 0
+    # Iteriere über alle Layer
+    for layer_attention in attentions:
+        # layer_attention shape: [batch_size, num_heads, seq_len, seq_len]
+        # Für unsere Zwecke ist batch_size=1, seq_len=1 (wir schauen nur auf das letzte Token)
+        # Die relevante Verteilung ist die letzte Zeile der Attention-Matrix
+        attention_probs = layer_attention[:, :, -1, :]
+        # Stabilisiere die Logarithmus-Berechnung
+        attention_probs = attention_probs + 1e-9
+        # Entropie-Formel: - sum(p * log(p))
+        log_probs = torch.log2(attention_probs)
+        entropy_per_head = -torch.sum(attention_probs * log_probs, dim=-1)
+        total_entropy += torch.sum(entropy_per_head).item()
+        num_heads += attention_probs.shape[1]
+    return total_entropy / num_heads if num_heads > 0 else 0.0
 @torch.no_grad()
 def run_cogitation_loop(
     llm: LLM,
     injection_vector: Optional[torch.Tensor] = None,
     injection_strength: float = 0.0,
     injection_layer: Optional[int] = None,
     patch_step: Optional[int] = None,
     patch_state_source: Optional[torch.Tensor] = None,
     reset_kv_cache_on_patch: bool = False,
     record_states: bool = False,
+    # NEU: Parameter zur Aufzeichnung von Attention-Mustern
+    record_attentions: bool = False,
 ) -> Dict[str, Any]:
     """
+    Eine verallgemeinerte Version, die nun auch die Aufzeichnung von Attention-Mustern
+    und die Berechnung der Entropie unterstützt.
     """
     prompt = RESONANCE_PROMPTS[prompt_type]
     inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
+    # Erster Forward-Pass, um den initialen Zustand zu erhalten
+    outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True, output_attentions=record_attentions)
     hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
     kv_cache = outputs.past_key_values
     state_deltas: List[float] = []
     state_history: List[torch.Tensor] = []
+    attention_entropies: List[float] = []
+    if record_attentions and outputs.attentions:
+        attention_entropies.append(_calculate_attention_entropy(outputs.attentions))
     for i in tqdm(range(num_steps), desc=f"Cognitive Loop ({prompt_type})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
         if i == patch_step and patch_state_source is not None:
             dbg(f"--- Applying Causal Surgery at step {i}: Patching state. ---")
             hidden_state_2d = patch_state_source.clone().to(device=llm.model.device, dtype=llm.model.dtype)
             if reset_kv_cache_on_patch:
                 dbg("--- KV-Cache has been RESET as part of the intervention. ---")
                 kv_cache = None
         else:
             next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
+        hook_handle = None # Hook-Logik unverändert
+        try:
+            # (Hook-Aktivierung unverändert)
             outputs = llm.model(
                 input_ids=next_token_id, past_key_values=kv_cache,
+                output_hidden_states=True, use_cache=True,
+                # Übergebe den Parameter an jeden Forward-Pass
+                output_attentions=record_attentions
             )
         finally:
             if hook_handle:
         new_hidden_state = outputs.hidden_states[-1][:, -1, :]
         kv_cache = outputs.past_key_values
+        if record_attentions and outputs.attentions:
+            attention_entropies.append(_calculate_attention_entropy(outputs.attentions))
         delta = torch.norm(new_hidden_state - hidden_state_2d).item()
         state_deltas.append(delta)
     return {
         "state_deltas": state_deltas,
         "state_history": state_history,
+        "attention_entropies": attention_entropies, # Das neue Messergebnis
         "final_hidden_state": hidden_state_2d,
         "final_kv_cache": kv_cache,
     }