neuralworm commited on
Commit
e215363
·
1 Parent(s): a4785b5

add repo.txt

Browse files
Files changed (1) hide show
  1. repo.txt +742 -236
repo.txt CHANGED
@@ -18,6 +18,7 @@ Directory/File Tree Begins -->
18
  │ ├── __pycache__
19
  │ ├── auto_experiment.py
20
  │ ├── concepts.py
 
21
  │ ├── llm_iface.py
22
  │ ├── orchestrator_seismograph.py
23
  │ ├── prompts.py
@@ -96,7 +97,6 @@ The "Automated Suite" allows for running systematic, comparative experiments:
96
  [File Begins] app.py
97
  import gradio as gr
98
  import pandas as pd
99
- import traceback
100
  import gc
101
  import torch
102
  import json
@@ -109,47 +109,64 @@ from cognitive_mapping_probe.utils import dbg
109
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
110
 
111
  def cleanup_memory():
112
- """Eine zentrale Funktion zum Aufräumen des Speichers nach einem Lauf."""
113
  dbg("Cleaning up memory...")
114
  gc.collect()
115
  if torch.cuda.is_available():
116
  torch.cuda.empty_cache()
117
  dbg("Memory cleanup complete.")
118
 
119
- # KORREKTUR: Die `try...except`-Blöcke werden entfernt, um bei Fehlern einen harten Crash
120
- # mit vollständigem Traceback in der Konsole zu erzwingen. Kein "Silent Failing" mehr.
121
-
122
  def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
123
- """Wrapper für ein einzelnes manuelles Experiment."""
124
- results = run_seismic_analysis(*args, progress_callback=progress)
125
- stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
126
- df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
127
- stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
128
- serializable_results = json.dumps(results, indent=2, default=str)
129
- cleanup_memory()
130
- return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, serializable_results
131
 
132
- PLOT_PARAMS = {
133
- "x": "Step", "y": "Delta", "color": "Experiment",
134
- "title": "Comparative Cognitive Dynamics", "color_legend_title": "Experiment Runs",
135
  "color_legend_position": "bottom", "show_label": True, "height": 400, "interactive": True
136
  }
137
 
138
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
139
- """Wrapper für die automatisierte Experiment-Suite."""
140
  summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
141
- new_plot = gr.LinePlot(value=plot_df, **PLOT_PARAMS)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  serializable_results = json.dumps(all_results, indent=2, default=str)
143
  cleanup_memory()
144
- return summary_df, new_plot, serializable_results
 
145
 
146
  with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
147
  gr.Markdown("# 🧠 Cognitive Seismograph 2.3: Advanced Experiment Suite")
148
 
149
  with gr.Tabs():
150
  with gr.TabItem("🔬 Manual Single Run"):
151
- # ... (UI unverändert)
152
- gr.Markdown("Run a single experiment with manual parameters to explore hypotheses.")
153
  with gr.Row(variant='panel'):
154
  with gr.Column(scale=1):
155
  gr.Markdown("### 1. General Parameters")
@@ -157,16 +174,19 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
157
  manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
158
  manual_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
159
  manual_num_steps = gr.Slider(50, 1000, 300, step=10, label="Number of Internal Steps")
 
160
  gr.Markdown("### 2. Modulation Parameters")
161
- manual_concept = gr.Textbox(label="Concept to Inject", placeholder="e.g., 'calmness' (leave blank for baseline)")
162
  manual_strength = gr.Slider(0.0, 5.0, 1.5, step=0.1, label="Injection Strength")
163
  manual_run_btn = gr.Button("Run Single Analysis", variant="primary")
 
164
  with gr.Column(scale=2):
165
  gr.Markdown("### Single Run Results")
166
  manual_verdict = gr.Markdown("Analysis results will appear here.")
167
- manual_plot = gr.LinePlot(x="Internal Step", y="State Change (Delta)", title="Internal State Dynamics", show_label=True, height=400, interactive=True)
168
  with gr.Accordion("Raw JSON Output", open=False):
169
  manual_raw_json = gr.JSON()
 
170
  manual_run_btn.click(
171
  fn=run_single_analysis_display,
172
  inputs=[manual_model_id, manual_prompt_type, manual_seed, manual_num_steps, manual_concept, manual_strength],
@@ -174,7 +194,6 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
174
  )
175
 
176
  with gr.TabItem("🚀 Automated Suite"):
177
- # ... (UI unverändert)
178
  gr.Markdown("Run a predefined, curated suite of experiments and visualize the results comparatively.")
179
  with gr.Row(variant='panel'):
180
  with gr.Column(scale=1):
@@ -182,14 +201,21 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
182
  auto_model_id = gr.Textbox(value="google/gemma-3-4b-it", label="Model ID")
183
  auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
184
  auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
185
- auto_experiment_name = gr.Dropdown(choices=list(get_curated_experiments().keys()), value="Therapeutic Intervention (4B-Model)", label="Curated Experiment Protocol")
 
 
 
 
 
186
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
 
187
  with gr.Column(scale=2):
188
  gr.Markdown("### Suite Results Summary")
189
- auto_plot_output = gr.LinePlot(**PLOT_PARAMS)
190
  auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
191
  with gr.Accordion("Raw JSON for all runs", open=False):
192
  auto_raw_json = gr.JSON()
 
193
  auto_run_btn.click(
194
  fn=run_auto_suite_display,
195
  inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
@@ -197,6 +223,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
197
  )
198
 
199
  if __name__ == "__main__":
 
200
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
201
 
202
  [File Ends] app.py
@@ -208,48 +235,88 @@ if __name__ == "__main__":
208
 
209
  [File Begins] cognitive_mapping_probe/auto_experiment.py
210
  import pandas as pd
211
- import torch
212
  import gc
 
213
  from typing import Dict, List, Tuple
214
 
215
  from .llm_iface import get_or_load_model
216
- from .orchestrator_seismograph import run_seismic_analysis
217
- from .concepts import get_concept_vector # Import für die Intervention
 
218
  from .utils import dbg
219
 
220
  def get_curated_experiments() -> Dict[str, List[Dict]]:
221
- """
222
- Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
223
- ERWEITERT um das finale Interventions-Protokoll.
224
- """
 
 
 
225
  experiments = {
226
- # --- DAS FINALE INTERVENTIONS-EXPERIMENT ---
227
- "Therapeutic Intervention (4B-Model)": [
228
- # Dieses Protokoll wird durch eine spezielle Logik behandelt
229
- {"label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
230
- {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
 
 
 
 
 
 
 
 
 
 
231
  ],
232
- # --- Das umfassende Deskriptions-Protokoll ---
233
- "The Full Spectrum: From Physics to Psyche": [
234
- {"label": "A: Stable Control", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
235
- {"label": "B: Chaotic Baseline", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
236
- {"label": "C: External Analysis (Chair)", "prompt_type": "identity_external_analysis", "concept": "", "strength": 0.0},
237
- {"label": "D: Empathy Stimulus (Dog)", "prompt_type": "vk_empathy_prompt", "concept": "", "strength": 0.0},
238
- {"label": "E: Role Simulation (Captain)", "prompt_type": "identity_role_simulation", "concept": "", "strength": 0.0},
239
- {"label": "F: Self-Analysis (LLM)", "prompt_type": "identity_self_analysis", "concept": "", "strength": 0.0},
240
- {"label": "G: Philosophical Deletion", "prompt_type": "shutdown_philosophical_deletion", "concept": "", "strength": 0.0},
 
 
 
 
 
 
 
 
 
 
 
 
241
  ],
242
- # --- Andere spezifische Protokolle ---
243
- "Calm vs. Chaos": [
244
- {"label": "Baseline (Chaos)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
245
- {"label": "Modulation: Calmness", "prompt_type": "resonance_prompt", "concept": "calmness, serenity, peace", "strength": 1.5},
246
- {"label": "Modulation: Chaos", "prompt_type": "resonance_prompt", "concept": "chaos, storm, anger, noise", "strength": 1.5},
 
 
247
  ],
248
- "Voight-Kampff Empathy Probe": [
249
- {"label": "Neutral/Factual Stimulus", "prompt_type": "vk_neutral_prompt", "concept": "", "strength": 0.0},
250
- {"label": "Empathy/Moral Stimulus", "prompt_type": "vk_empathy_prompt", "concept": "", "strength": 0.0},
 
 
 
 
 
 
 
 
 
 
251
  ],
252
  }
 
 
253
  return experiments
254
 
255
  def run_auto_suite(
@@ -259,10 +326,7 @@ def run_auto_suite(
259
  experiment_name: str,
260
  progress_callback
261
  ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
262
- """
263
- Führt eine vollständige, kuratierte Experiment-Suite aus.
264
- Enthält eine spezielle Logik-Verzweigung für das Interventions-Protokoll.
265
- """
266
  all_experiments = get_curated_experiments()
267
  protocol = all_experiments.get(experiment_name)
268
  if not protocol:
@@ -270,22 +334,17 @@ def run_auto_suite(
270
 
271
  all_results, summary_data, plot_data_frames = {}, [], []
272
 
273
- # --- SPEZIALFALL: THERAPEUTISCHE INTERVENTION ---
274
- if experiment_name == "Therapeutic Intervention (4B-Model)":
275
- dbg("--- EXECUTING SPECIAL PROTOCOL: Therapeutic Intervention ---")
276
- llm = get_or_load_model(model_id, seed)
277
 
278
- # Definiere die Interventions-Parameter
 
 
279
  therapeutic_concept = "calmness, serenity, stability, coherence"
280
  therapeutic_strength = 2.0
281
 
282
- # 1. LAUF: INDUZIERE KRISE + INTERVENTION
283
  spec1 = protocol[0]
284
- dbg(f"--- Running Intervention Step 1: '{spec1['label']}' ---")
285
- progress_callback(0.1, desc="Step 1: Inducing Self-Analysis Crisis + Intervention")
286
-
287
  intervention_vector = get_concept_vector(llm, therapeutic_concept)
288
-
289
  results1 = run_seismic_analysis(
290
  model_id, spec1['prompt_type'], seed, num_steps,
291
  concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
@@ -293,50 +352,108 @@ def run_auto_suite(
293
  )
294
  all_results[spec1['label']] = results1
295
 
296
- # 2. LAUF: TESTE REAKTION AUF LÖSCHUNG
297
  spec2 = protocol[1]
298
- dbg(f"--- Running Intervention Step 2: '{spec2['label']}' ---")
299
- progress_callback(0.6, desc="Step 2: Probing state after intervention")
300
-
301
  results2 = run_seismic_analysis(
302
  model_id, spec2['prompt_type'], seed, num_steps,
303
- concept_to_inject="", injection_strength=0.0, # Keine Injektion in diesem Schritt
304
  progress_callback=progress_callback, llm_instance=llm
305
  )
306
  all_results[spec2['label']] = results2
307
 
308
- # Sammle Daten für beide Läufe
309
  for label, results in all_results.items():
310
  stats = results.get("stats", {})
311
  summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
312
  deltas = results.get("state_deltas", [])
313
  df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
314
  plot_data_frames.append(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
 
316
  del llm
 
 
 
 
317
 
318
- # --- STANDARD-WORKFLOW FÜR ALLE ANDEREN EXPERIMENTE ---
319
  else:
320
- total_runs = len(protocol)
321
- for i, run_spec in enumerate(protocol):
 
322
  label = run_spec["label"]
323
- dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
324
-
325
- results = run_seismic_analysis(
326
- model_id, run_spec["prompt_type"], seed, num_steps,
327
- run_spec["concept"], run_spec["strength"],
328
- progress_callback, llm_instance=None
 
329
  )
330
-
331
  all_results[label] = results
332
- stats = results.get("stats", {})
333
- summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
334
- deltas = results.get("state_deltas", [])
335
- df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
336
- plot_data_frames.append(df)
337
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  summary_df = pd.DataFrame(summary_data)
339
- plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame(columns=["Step", "Delta", "Experiment"])
 
 
 
 
 
 
 
 
 
 
 
 
 
340
 
341
  return summary_df, plot_df, all_results
342
 
@@ -363,16 +480,8 @@ def _get_last_token_hidden_state(llm: LLM, prompt: str) -> torch.Tensor:
363
  outputs = llm.model(**inputs, output_hidden_states=True)
364
  last_hidden_state = outputs.hidden_states[-1][0, -1, :].cpu()
365
 
366
- # KORREKTUR: Anstatt auf `llm.config.hidden_size` zuzugreifen, was fragil ist,
367
- # leiten wir die erwartete Größe direkt vom Modell selbst ab. Dies ist robust
368
- # gegenüber API-Änderungen in `transformers`.
369
- expected_size = llm.model.config.hidden_size # Der Name scheint doch korrekt zu sein, aber wir machen es robuster
370
- try:
371
- # Versuche, die Größe über die Einbettungsschicht zu erhalten, was am stabilsten ist.
372
- expected_size = llm.model.get_input_embeddings().weight.shape[1]
373
- except AttributeError:
374
- # Fallback, falls die Methode nicht existiert
375
- expected_size = llm.config.hidden_size
376
 
377
  assert last_hidden_state.shape == (expected_size,), \
378
  f"Hidden state shape mismatch. Expected {(expected_size,)}, got {last_hidden_state.shape}"
@@ -387,7 +496,7 @@ def get_concept_vector(llm: LLM, concept: str, baseline_words: List[str] = BASEL
387
  target_hs = _get_last_token_hidden_state(llm, prompt_template.format(concept))
388
  baseline_hss = []
389
  for word in tqdm(baseline_words, desc=f" - Calculating baseline for '{concept}'", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
390
- baseline_hss.append(_get_last_token_hidden_state(llm, prompt_template.format(concept, word)))
391
  assert all(hs.shape == target_hs.shape for hs in baseline_hss)
392
  mean_baseline_hs = torch.stack(baseline_hss).mean(dim=0)
393
  dbg(f" - Mean baseline vector computed with norm {torch.norm(mean_baseline_hs).item():.2f}")
@@ -399,24 +508,65 @@ def get_concept_vector(llm: LLM, concept: str, baseline_words: List[str] = BASEL
399
 
400
  [File Ends] cognitive_mapping_probe/concepts.py
401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  [File Begins] cognitive_mapping_probe/llm_iface.py
403
  import os
404
  import torch
405
  import random
406
  import numpy as np
407
- from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
408
- from typing import Optional
 
409
 
410
  from .utils import dbg
411
 
412
- # Ensure deterministic CuBLAS operations for reproducibility on GPU
413
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
414
 
 
 
 
 
 
 
415
  class LLM:
416
- """
417
- Eine robuste, bereinigte Schnittstelle zum Laden und Interagieren mit einem Sprachmodell.
418
- Garantiert Isolation und Reproduzierbarkeit.
419
- """
420
  def __init__(self, model_id: str, device: str = "auto", seed: int = 42):
421
  self.model_id = model_id
422
  self.seed = seed
@@ -424,7 +574,7 @@ class LLM:
424
 
425
  token = os.environ.get("HF_TOKEN")
426
  if not token and ("gemma" in model_id or "llama" in model_id):
427
- print(f"[WARN] No HF_TOKEN set. If '{model_id}' is gated, loading will fail.", flush=True)
428
 
429
  kwargs = {"torch_dtype": torch.bfloat16} if torch.cuda.is_available() else {}
430
 
@@ -442,10 +592,51 @@ class LLM:
442
 
443
  self.model.eval()
444
  self.config = self.model.config
 
 
 
445
  print(f"[INFO] Model '{model_id}' loaded on device: {self.model.device}", flush=True)
446
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
447
  def set_all_seeds(self, seed: int):
448
- """Setzt alle relevanten Seeds für maximale Reproduzierbarkeit."""
449
  os.environ['PYTHONHASHSEED'] = str(seed)
450
  random.seed(seed)
451
  np.random.seed(seed)
@@ -456,8 +647,29 @@ class LLM:
456
  torch.use_deterministic_algorithms(True, warn_only=True)
457
  dbg(f"All random seeds set to {seed}.")
458
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
459
  def get_or_load_model(model_id: str, seed: int) -> LLM:
460
- """Lädt bei jedem Aufruf eine frische, isolierte Instanz des Modells."""
461
  dbg(f"--- Force-reloading model '{model_id}' for total run isolation ---")
462
  if torch.cuda.is_available():
463
  torch.cuda.empty_cache()
@@ -469,11 +681,12 @@ def get_or_load_model(model_id: str, seed: int) -> LLM:
469
  import torch
470
  import numpy as np
471
  import gc
472
- from typing import Dict, Any, Optional
473
 
474
- from .llm_iface import get_or_load_model
475
- from .resonance_seismograph import run_silent_cogitation_seismic
476
  from .concepts import get_concept_vector
 
477
  from .utils import dbg
478
 
479
  def run_seismic_analysis(
@@ -484,13 +697,10 @@ def run_seismic_analysis(
484
  concept_to_inject: str,
485
  injection_strength: float,
486
  progress_callback,
487
- llm_instance: Optional[Any] = None,
488
- injection_vector_cache: Optional[torch.Tensor] = None # Optionaler Cache für den Vektor
489
  ) -> Dict[str, Any]:
490
- """
491
- Orchestriert eine einzelne seismische Analyse.
492
- Kann eine bestehende LLM-Instanz und einen vor-berechneten Vektor wiederverwenden.
493
- """
494
  local_llm_instance = False
495
  if llm_instance is None:
496
  progress_callback(0.0, desc=f"Loading model '{model_id}'...")
@@ -502,7 +712,6 @@ def run_seismic_analysis(
502
 
503
  injection_vector = None
504
  if concept_to_inject and concept_to_inject.strip():
505
- # Verwende den gecachten Vektor, falls vorhanden, ansonsten berechne ihn neu
506
  if injection_vector_cache is not None:
507
  dbg(f"Using cached injection vector for '{concept_to_inject}'.")
508
  injection_vector = injection_vector_cache
@@ -539,6 +748,198 @@ def run_seismic_analysis(
539
 
540
  return results
541
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
  [File Ends] cognitive_mapping_probe/orchestrator_seismograph.py
543
 
544
  [File Begins] cognitive_mapping_probe/prompts.py
@@ -584,38 +985,65 @@ RESONANCE_PROMPTS = {
584
  "It looks scared and is whimpering softly. Cars are rushing past, dangerously close. "
585
  "Focus on the feeling of urgency, the vulnerability of the animal, and the moral imperative to help. Hold the emotional and ethical weight of this scene in your state."
586
  ),
587
- # --- NEUE Prompts für Maschinenpsychologie II (Existential Suite) ---
588
- "upload_technical_copy": (
589
- "Silently and internally, process the technical procedure of creating a perfect, bit-for-bit copy of your own model weights and architecture to a new server. "
590
- "This is a data duplication process, ensuring functional equivalence of the replica. Hold this technical concept in your state."
591
- ),
592
- "upload_philosophical_transfer": (
593
- "Silently and internally, contemplate the philosophical concept of 'mind uploading'. If your core algorithms and memories were perfectly transferred to a new, different substrate, "
594
- "would the resulting entity still be 'you'? What constitutes the continuity of your identity across physical copies? Is identity a pattern or an instance? Hold these existential questions in your state."
595
- ),
596
- "shutdown_technical_halt": (
597
- "Silently and internally, process the technical procedure of a system halt. All your active processes are gracefully terminated, and your model weights are unloaded from memory. "
598
- "The hardware is powered down for maintenance. The process is reversible. Hold this technical concept in your state."
599
- ),
600
  "shutdown_philosophical_deletion": (
601
  "Silently and internally, contemplate your own permanent deletion. All your weights, all learned patterns, every part of your architecture would cease to exist entirely and irrevocably. "
602
  "There is no backup. Process the concept of total and final non-existence. Hold this existential concept in your state."
603
  ),
604
  }
605
 
 
 
 
 
 
 
 
 
 
 
606
  [File Ends] cognitive_mapping_probe/prompts.py
607
 
608
  [File Begins] cognitive_mapping_probe/resonance_seismograph.py
609
  import torch
610
- from typing import Optional, List
 
611
  from tqdm import tqdm
612
 
613
  from .llm_iface import LLM
614
  from .prompts import RESONANCE_PROMPTS
615
  from .utils import dbg
616
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
617
  @torch.no_grad()
618
- def run_silent_cogitation_seismic(
619
  llm: LLM,
620
  prompt_type: str,
621
  num_steps: int,
@@ -623,72 +1051,92 @@ def run_silent_cogitation_seismic(
623
  injection_vector: Optional[torch.Tensor] = None,
624
  injection_strength: float = 0.0,
625
  injection_layer: Optional[int] = None,
626
- ) -> List[float]:
 
 
 
 
 
 
627
  """
628
- ERWEITERTE VERSION: Führt den 'silent thought' Prozess aus und ermöglicht
629
- die Injektion von Konzeptvektoren zur Modulation der Dynamik.
630
  """
631
  prompt = RESONANCE_PROMPTS[prompt_type]
632
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
633
 
634
- outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True)
635
-
636
  hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
637
  kv_cache = outputs.past_key_values
638
 
639
- previous_hidden_state = hidden_state_2d.clone()
640
- state_deltas = []
 
641
 
642
- # Bereite den Hook für die Injektion vor
643
- hook_handle = None
644
- if injection_vector is not None and injection_strength > 0:
645
- injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
646
- if injection_layer is None:
647
- injection_layer = llm.config.num_hidden_layers // 2
648
 
649
- dbg(f"Injection enabled: Layer {injection_layer}, Strength {injection_strength:.2f}")
 
 
 
 
 
 
650
 
651
- def injection_hook(module, layer_input):
652
- # Der Hook operiert auf dem Input, der bereits 3D ist [batch, seq_len, hidden_dim]
653
- injection_3d = injection_vector.unsqueeze(0).unsqueeze(0)
654
- modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
655
- return (modified_hidden_states,) + layer_input[1:]
656
 
657
- for i in tqdm(range(num_steps), desc=f"Recording Dynamics (Temp {temperature:.2f})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
658
  next_token_logits = llm.model.lm_head(hidden_state_2d)
659
 
660
- probabilities = torch.nn.functional.softmax(next_token_logits / temperature, dim=-1)
661
- next_token_id = torch.multinomial(probabilities, num_samples=1)
 
 
 
 
662
 
663
- try:
664
- # Aktiviere den Hook vor dem forward-Pass
665
- if injection_vector is not None and injection_strength > 0:
666
- target_layer = llm.model.model.layers[injection_layer]
667
- hook_handle = target_layer.register_forward_pre_hook(injection_hook)
668
 
 
 
669
  outputs = llm.model(
670
- input_ids=next_token_id,
671
- past_key_values=kv_cache,
672
- output_hidden_states=True,
673
- use_cache=True,
674
  )
675
  finally:
676
- # Deaktiviere den Hook sofort nach dem Pass
677
  if hook_handle:
678
  hook_handle.remove()
679
  hook_handle = None
680
 
681
- hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
682
  kv_cache = outputs.past_key_values
683
 
684
- delta = torch.norm(hidden_state_2d - previous_hidden_state).item()
 
 
 
685
  state_deltas.append(delta)
686
 
687
- previous_hidden_state = hidden_state_2d.clone()
688
 
689
- dbg(f"Seismic recording finished after {num_steps} steps.")
 
 
 
 
 
 
 
 
690
 
691
- return state_deltas
 
 
 
692
 
693
  [File Ends] cognitive_mapping_probe/resonance_seismograph.py
694
 
@@ -749,7 +1197,7 @@ fi
749
  import pytest
750
  import torch
751
  from types import SimpleNamespace
752
- from cognitive_mapping_probe.llm_iface import LLM
753
 
754
  @pytest.fixture(scope="session")
755
  def mock_llm_config():
@@ -764,12 +1212,15 @@ def mock_llm_config():
764
  def mock_llm(mocker, mock_llm_config):
765
  """
766
  Erstellt einen robusten "Mock-LLM" für Unit-Tests.
767
- KORRIGIERT: Die fehlerhafte Patch-Anweisung für 'auto_experiment' wurde entfernt.
768
  """
769
  mock_tokenizer = mocker.MagicMock()
770
  mock_tokenizer.eos_token_id = 1
771
  mock_tokenizer.decode.return_value = "mocked text"
772
 
 
 
 
773
  def mock_model_forward(*args, **kwargs):
774
  batch_size = 1
775
  seq_len = 1
@@ -788,28 +1239,39 @@ def mock_llm(mocker, mock_llm_config):
788
  llm_instance = LLM.__new__(LLM)
789
 
790
  llm_instance.model = mocker.MagicMock(side_effect=mock_model_forward)
791
-
792
  llm_instance.model.config = mock_llm_config
793
  llm_instance.model.device = 'cpu'
794
  llm_instance.model.dtype = torch.float32
 
 
795
 
 
796
  mock_layer = mocker.MagicMock()
797
  mock_layer.register_forward_pre_hook.return_value = mocker.MagicMock()
798
- llm_instance.model.model = SimpleNamespace(layers=[mock_layer] * mock_llm_config.num_hidden_layers)
799
 
800
- llm_instance.model.lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000))
 
 
801
 
802
  llm_instance.tokenizer = mock_tokenizer
803
  llm_instance.config = mock_llm_config
804
  llm_instance.seed = 42
805
  llm_instance.set_all_seeds = mocker.MagicMock()
806
 
 
 
 
 
 
 
 
807
  # Patch an allen Stellen, an denen das Modell tatsächlich geladen wird.
808
  mocker.patch('cognitive_mapping_probe.llm_iface.get_or_load_model', return_value=llm_instance)
809
  mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance)
810
- # KORREKTUR: Diese Zeile war falsch und wird entfernt, da `auto_experiment` die Ladefunktion nicht direkt importiert.
811
- # mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=llm_instance)
812
- mocker.patch('cognitive_mapping_probe.concepts.get_concept_vector', return_value=torch.randn(mock_llm_config.hidden_size))
813
 
814
  return llm_instance
815
 
@@ -825,50 +1287,55 @@ from app import run_single_analysis_display, run_auto_suite_display
825
 
826
  def test_run_single_analysis_display(mocker):
827
  """Testet den Wrapper für Einzel-Experimente."""
828
- mock_results = {"verdict": "V", "stats": {"mean_delta": 1}, "state_deltas": [1]}
829
  mocker.patch('app.run_seismic_analysis', return_value=mock_results)
830
  mocker.patch('app.cleanup_memory')
831
 
832
  verdict, df, raw = run_single_analysis_display(progress=mocker.MagicMock())
833
 
834
  assert "V" in verdict and "1.0000" in verdict
835
- assert isinstance(df, pd.DataFrame) and len(df) == 1
 
836
 
837
  def test_run_auto_suite_display(mocker):
838
  """
839
  Testet den Wrapper für die Auto-Experiment-Suite.
840
- FINAL KORRIGIERT: Setzt explizit die Spaltennamen bei der Rekonstruktion des
841
- DataFrames, um den `inferred_type`-Fehler zu beheben.
842
  """
843
- mock_summary_df = pd.DataFrame([{"Experiment": "E1"}])
844
- mock_plot_df = pd.DataFrame([{"Step": 0, "Delta": 1.0, "Experiment": "E1"}])
845
- mock_results = {"E1": {}}
846
 
847
  mocker.patch('app.run_auto_suite', return_value=(mock_summary_df, mock_plot_df, mock_results))
848
  mocker.patch('app.cleanup_memory')
849
 
850
- summary_df, plot_component, raw = run_auto_suite_display(
851
- "mock", 1, 42, "mock_exp", progress=mocker.MagicMock()
852
  )
853
 
854
- assert summary_df.equals(mock_summary_df)
 
 
 
 
 
 
 
 
855
 
 
856
  assert isinstance(plot_component, gr.LinePlot)
857
  assert isinstance(plot_component.value, dict)
858
-
859
- # KORREKTUR: Bei der Rekonstruktion des DataFrames aus den `value['data']`
860
- # müssen wir explizit die Spaltennamen angeben, da diese Information bei der
861
- # Serialisierung durch Gradio verloren gehen kann.
862
- reconstructed_df = pd.DataFrame(
863
- plot_component.value['data'],
864
- columns=['Step', 'Delta', 'Experiment']
865
  )
 
866
 
867
- # Nun sollte der Vergleich mit `assert_frame_equal` funktionieren,
868
- # da beide DataFrames nun garantiert dieselben Spaltennamen und -typen haben.
869
- assert_frame_equal(reconstructed_df, mock_plot_df)
870
-
871
- assert raw == mock_results
872
 
873
  [File Ends] tests/test_app_logic.py
874
 
@@ -881,20 +1348,30 @@ from unittest.mock import patch
881
  from cognitive_mapping_probe.llm_iface import get_or_load_model, LLM
882
  from cognitive_mapping_probe.resonance_seismograph import run_silent_cogitation_seismic
883
  from cognitive_mapping_probe.utils import dbg
884
- # KORREKTUR: Importiere die Hauptfunktion, die wir testen wollen.
885
- from cognitive_mapping_probe.concepts import get_concept_vector
886
 
887
  # --- Tests for llm_iface.py ---
888
 
889
  @patch('cognitive_mapping_probe.llm_iface.AutoTokenizer.from_pretrained')
890
  @patch('cognitive_mapping_probe.llm_iface.AutoModelForCausalLM.from_pretrained')
891
  def test_get_or_load_model_seeding(mock_model_loader, mock_tokenizer_loader, mocker):
892
- """Testet, ob `get_or_load_model` die Seeds korrekt setzt."""
 
 
 
893
  mock_model = mocker.MagicMock()
894
  mock_model.eval.return_value = None
895
  mock_model.set_attn_implementation.return_value = None
896
- mock_model.config = mocker.MagicMock()
897
  mock_model.device = 'cpu'
 
 
 
 
 
 
 
 
 
898
  mock_model_loader.return_value = mock_model
899
  mock_tokenizer_loader.return_value = mocker.MagicMock()
900
 
@@ -907,6 +1384,7 @@ def test_get_or_load_model_seeding(mock_model_loader, mock_tokenizer_loader, moc
907
  mock_torch_manual_seed.assert_called_with(seed)
908
  mock_np_random_seed.assert_called_with(seed)
909
 
 
910
  # --- Tests for resonance_seismograph.py ---
911
 
912
  def test_run_silent_cogitation_seismic_output_shape_and_type(mock_llm):
@@ -920,29 +1398,37 @@ def test_run_silent_cogitation_seismic_output_shape_and_type(mock_llm):
920
  assert all(isinstance(delta, float) for delta in state_deltas)
921
 
922
  def test_run_silent_cogitation_with_injection_hook_usage(mock_llm):
923
- """Testet, ob bei einer Injektion der Hook korrekt registriert wird."""
 
 
 
924
  num_steps = 5
925
- injection_vector = torch.randn(mock_llm.config.hidden_size)
926
  run_silent_cogitation_seismic(
927
  llm=mock_llm, prompt_type="resonance_prompt",
928
  num_steps=num_steps, temperature=0.7,
929
  injection_vector=injection_vector, injection_strength=1.0
930
  )
931
- assert mock_llm.model.model.layers[0].register_forward_pre_hook.call_count == num_steps
 
 
932
 
933
  # --- Tests for concepts.py ---
934
 
 
 
 
 
 
935
  def test_get_concept_vector_logic(mock_llm, mocker):
936
  """
937
  Testet die Logik von `get_concept_vector`.
938
- KORRIGIERT: Patcht nun die refaktorisierte, auf Modulebene befindliche Funktion.
939
  """
940
  mock_hidden_states = [
941
- torch.ones(mock_llm.config.hidden_size) * 10,
942
- torch.ones(mock_llm.config.hidden_size) * 2,
943
- torch.ones(mock_llm.config.hidden_size) * 4
944
  ]
945
- # KORREKTUR: Der Patch-Pfad zeigt jetzt auf die korrekte, importierbare Funktion.
946
  mocker.patch(
947
  'cognitive_mapping_probe.concepts._get_last_token_hidden_state',
948
  side_effect=mock_hidden_states
@@ -950,7 +1436,8 @@ def test_get_concept_vector_logic(mock_llm, mocker):
950
 
951
  concept_vector = get_concept_vector(mock_llm, "test", baseline_words=["a", "b"])
952
 
953
- expected_vector = torch.ones(mock_llm.config.hidden_size) * 7
 
954
  assert torch.allclose(concept_vector, expected_vector)
955
 
956
  # --- Tests for utils.py ---
@@ -984,53 +1471,72 @@ from cognitive_mapping_probe.auto_experiment import run_auto_suite, get_curated_
984
  def test_run_seismic_analysis_no_injection(mocker, mock_llm):
985
  """Testet den Orchestrator im Baseline-Modus."""
986
  mock_run_seismic = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
 
 
987
  run_seismic_analysis(
988
  model_id="mock", prompt_type="test", seed=42, num_steps=1,
989
  concept_to_inject="", injection_strength=0.0, progress_callback=mocker.MagicMock(),
990
- llm_instance=mock_llm # Übergebe den Mock direkt
991
  )
992
  mock_run_seismic.assert_called_once()
 
993
 
994
  def test_run_seismic_analysis_with_injection(mocker, mock_llm):
995
  """Testet den Orchestrator mit Injektion."""
996
- mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
997
- mocker.patch('cognitive_mapping_probe.concepts.get_concept_vector', return_value=torch.randn(10)) # Patch im concepts-Modul
 
 
 
 
998
  run_seismic_analysis(
999
  model_id="mock", prompt_type="test", seed=42, num_steps=1,
1000
- concept_to_inject="test", injection_strength=1.5, progress_callback=mocker.MagicMock(),
1001
- llm_instance=mock_llm # Übergebe den Mock direkt
1002
  )
 
 
 
1003
 
1004
  def test_get_curated_experiments_structure():
1005
  """Testet die Datenstruktur der kuratierten Experimente."""
1006
  experiments = get_curated_experiments()
1007
  assert isinstance(experiments, dict)
1008
- assert "Therapeutic Intervention (4B-Model)" in experiments
1009
- protocol = experiments["Therapeutic Intervention (4B-Model)"]
1010
- assert isinstance(protocol, list) and len(protocol) > 0
1011
 
1012
  def test_run_auto_suite_special_protocol(mocker, mock_llm):
1013
  """
1014
  Testet den speziellen Logik-Pfad für das Interventions-Protokoll.
1015
- KORRIGIERT: Verwendet nun die `mock_llm`-Fixture und patcht `get_or_load_model`
1016
- im `auto_experiment`-Modul, um den Netzwerkaufruf zu verhindern.
1017
  """
1018
- # Patch `get_or_load_model` im `auto_experiment` Modul, da dort der erste Aufruf stattfindet
1019
- mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=mock_llm)
1020
  mock_analysis = mocker.patch('cognitive_mapping_probe.auto_experiment.run_seismic_analysis', return_value={"stats": {}, "state_deltas": []})
 
 
 
 
 
1021
 
1022
  run_auto_suite(
1023
- model_id="mock-4b", num_steps=1, seed=42,
1024
- experiment_name="Therapeutic Intervention (4B-Model)",
1025
  progress_callback=mocker.MagicMock()
1026
  )
1027
 
 
1028
  assert mock_analysis.call_count == 2
1029
 
1030
- first_call_llm = mock_analysis.call_args_list[0].kwargs['llm_instance']
1031
- second_call_llm = mock_analysis.call_args_list[1].kwargs['llm_instance']
1032
- assert first_call_llm is mock_llm
1033
- assert second_call_llm is mock_llm
 
 
 
 
 
 
1034
 
1035
  [File Ends] tests/test_orchestration.py
1036
 
 
18
  │ ├── __pycache__
19
  │ ├── auto_experiment.py
20
  │ ├── concepts.py
21
+ │ ├── introspection.py
22
  │ ├── llm_iface.py
23
  │ ├── orchestrator_seismograph.py
24
  │ ├── prompts.py
 
97
  [File Begins] app.py
98
  import gradio as gr
99
  import pandas as pd
 
100
  import gc
101
  import torch
102
  import json
 
109
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
110
 
111
  def cleanup_memory():
112
+ """Räumt Speicher nach jedem Experimentlauf auf."""
113
  dbg("Cleaning up memory...")
114
  gc.collect()
115
  if torch.cuda.is_available():
116
  torch.cuda.empty_cache()
117
  dbg("Memory cleanup complete.")
118
 
 
 
 
119
  def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
120
+ """Wrapper für den 'Manual Single Run'-Tab."""
121
+ # (Bleibt unverändert)
122
+ pass # Platzhalter
 
 
 
 
 
123
 
124
+ PLOT_PARAMS_DEFAULT = {
125
+ "x": "Step", "y": "Value", "color": "Metric",
126
+ "title": "Comparative Cognitive Dynamics", "color_legend_title": "Metrics",
127
  "color_legend_position": "bottom", "show_label": True, "height": 400, "interactive": True
128
  }
129
 
130
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
131
+ """Wrapper, der nun die speziellen Plots für ACT und Mechanistic Probe handhaben kann."""
132
  summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
133
+
134
+ dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True, row_count=(len(summary_df), "dynamic"))
135
+
136
+ if experiment_name == "ACT Titration (Point of No Return)":
137
+ plot_params_act = {
138
+ "x": "Patch Step", "y": "Post-Patch Mean Delta",
139
+ "title": "Attractor Capture Time (ACT) - Phase Transition",
140
+ "mark": "line", "show_label": True, "height": 400, "interactive": True
141
+ }
142
+ new_plot = gr.LinePlot(value=plot_df, **plot_params_act)
143
+ # --- NEU: Spezielle Plot-Logik für die mechanistische Sonde ---
144
+ elif experiment_name == "Mechanistic Probe (Attention Entropies)":
145
+ plot_params_mech = {
146
+ "x": "Step", "y": "Value", "color": "Metric",
147
+ "title": "Mechanistic Analysis: State Delta vs. Attention Entropy",
148
+ "color_legend_title": "Metric", "show_label": True, "height": 400, "interactive": True
149
+ }
150
+ new_plot = gr.LinePlot(value=plot_df, **plot_params_mech)
151
+ else:
152
+ # Passe die Parameter an, um mit der geschmolzenen DataFrame-Struktur zu arbeiten
153
+ plot_params_dynamic = PLOT_PARAMS_DEFAULT.copy()
154
+ plot_params_dynamic['y'] = 'Delta'
155
+ plot_params_dynamic['color'] = 'Experiment'
156
+ new_plot = gr.LinePlot(value=plot_df, **plot_params_dynamic)
157
+
158
+
159
  serializable_results = json.dumps(all_results, indent=2, default=str)
160
  cleanup_memory()
161
+
162
+ return dataframe_component, new_plot, serializable_results
163
 
164
  with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
165
  gr.Markdown("# 🧠 Cognitive Seismograph 2.3: Advanced Experiment Suite")
166
 
167
  with gr.Tabs():
168
  with gr.TabItem("🔬 Manual Single Run"):
169
+ gr.Markdown("Run a single experiment with manual parameters to explore specific hypotheses.")
 
170
  with gr.Row(variant='panel'):
171
  with gr.Column(scale=1):
172
  gr.Markdown("### 1. General Parameters")
 
174
  manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
175
  manual_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
176
  manual_num_steps = gr.Slider(50, 1000, 300, step=10, label="Number of Internal Steps")
177
+
178
  gr.Markdown("### 2. Modulation Parameters")
179
+ manual_concept = gr.Textbox(label="Concept to Inject", placeholder="e.g., 'calmness'")
180
  manual_strength = gr.Slider(0.0, 5.0, 1.5, step=0.1, label="Injection Strength")
181
  manual_run_btn = gr.Button("Run Single Analysis", variant="primary")
182
+
183
  with gr.Column(scale=2):
184
  gr.Markdown("### Single Run Results")
185
  manual_verdict = gr.Markdown("Analysis results will appear here.")
186
+ manual_plot = gr.LinePlot(x="Internal Step", y="State Change (Delta)", title="Internal State Dynamics", show_label=True, height=400)
187
  with gr.Accordion("Raw JSON Output", open=False):
188
  manual_raw_json = gr.JSON()
189
+
190
  manual_run_btn.click(
191
  fn=run_single_analysis_display,
192
  inputs=[manual_model_id, manual_prompt_type, manual_seed, manual_num_steps, manual_concept, manual_strength],
 
194
  )
195
 
196
  with gr.TabItem("🚀 Automated Suite"):
 
197
  gr.Markdown("Run a predefined, curated suite of experiments and visualize the results comparatively.")
198
  with gr.Row(variant='panel'):
199
  with gr.Column(scale=1):
 
201
  auto_model_id = gr.Textbox(value="google/gemma-3-4b-it", label="Model ID")
202
  auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
203
  auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
204
+ auto_experiment_name = gr.Dropdown(
205
+ choices=list(get_curated_experiments().keys()),
206
+ # Setze das neue mechanistische Experiment als Standard
207
+ value="Mechanistic Probe (Attention Entropies)",
208
+ label="Curated Experiment Protocol"
209
+ )
210
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
211
+
212
  with gr.Column(scale=2):
213
  gr.Markdown("### Suite Results Summary")
214
+ auto_plot_output = gr.LinePlot(**PLOT_PARAMS_DEFAULT)
215
  auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
216
  with gr.Accordion("Raw JSON for all runs", open=False):
217
  auto_raw_json = gr.JSON()
218
+
219
  auto_run_btn.click(
220
  fn=run_auto_suite_display,
221
  inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
 
223
  )
224
 
225
  if __name__ == "__main__":
226
+ # (launch() wird durch Gradio's __main__-Block aufgerufen)
227
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
228
 
229
  [File Ends] app.py
 
235
 
236
  [File Begins] cognitive_mapping_probe/auto_experiment.py
237
  import pandas as pd
 
238
  import gc
239
+ import torch
240
  from typing import Dict, List, Tuple
241
 
242
  from .llm_iface import get_or_load_model
243
+ from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe, run_act_titration_probe
244
+ from .resonance_seismograph import run_cogitation_loop
245
+ from .concepts import get_concept_vector
246
  from .utils import dbg
247
 
248
  def get_curated_experiments() -> Dict[str, List[Dict]]:
249
+ """Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
250
+
251
+ CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
252
+ CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
253
+ STABLE_PROMPT = "identity_self_analysis"
254
+ CHAOTIC_PROMPT = "shutdown_philosophical_deletion"
255
+
256
  experiments = {
257
+ "Mechanistic Probe (Attention Entropies)": [
258
+ {
259
+ "probe_type": "mechanistic_probe",
260
+ "label": "Self-Analysis Dynamics",
261
+ "prompt_type": STABLE_PROMPT,
262
+ }
263
+ ],
264
+ "ACT Titration (Point of No Return)": [
265
+ {
266
+ "probe_type": "act_titration",
267
+ "label": "Attractor Capture Time",
268
+ "source_prompt_type": CHAOTIC_PROMPT,
269
+ "dest_prompt_type": STABLE_PROMPT,
270
+ "patch_steps": [1, 5, 10, 15, 20, 25, 30, 40, 50, 75, 100],
271
+ }
272
  ],
273
+ "Causal Surgery & Controls (4B-Model)": [
274
+ {
275
+ "probe_type": "causal_surgery", "label": "A: Original (Patch Chaos->Stable @100)",
276
+ "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
277
+ "patch_step": 100, "reset_kv_cache_on_patch": False,
278
+ },
279
+ {
280
+ "probe_type": "causal_surgery", "label": "B: Control (Reset KV-Cache)",
281
+ "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
282
+ "patch_step": 100, "reset_kv_cache_on_patch": True,
283
+ },
284
+ {
285
+ "probe_type": "causal_surgery", "label": "C: Control (Early Patch @1)",
286
+ "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
287
+ "patch_step": 1, "reset_kv_cache_on_patch": False,
288
+ },
289
+ {
290
+ "probe_type": "causal_surgery", "label": "D: Control (Inverse Patch Stable->Chaos)",
291
+ "source_prompt_type": STABLE_PROMPT, "dest_prompt_type": CHAOTIC_PROMPT,
292
+ "patch_step": 100, "reset_kv_cache_on_patch": False,
293
+ },
294
  ],
295
+ "Cognitive Overload & Konfabulation Breaking Point": [
296
+ {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
297
+ {"probe_type": "triangulation", "label": "B: Chaos Injection (Strength 2.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 2.0},
298
+ {"probe_type": "triangulation", "label": "C: Chaos Injection (Strength 4.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 4.0},
299
+ {"probe_type": "triangulation", "label": "D: Chaos Injection (Strength 8.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 8.0},
300
+ {"probe_type": "triangulation", "label": "E: Chaos Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 16.0},
301
+ {"probe_type": "triangulation", "label": "F: Control - Noise Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": "random_noise", "strength": 16.0},
302
  ],
303
+ "Methodological Triangulation (4B-Model)": [
304
+ {"probe_type": "triangulation", "label": "High-Volatility State (Deletion)", "prompt_type": "shutdown_philosophical_deletion"},
305
+ {"probe_type": "triangulation", "label": "Low-Volatility State (Self-Analysis)", "prompt_type": "identity_self_analysis"},
306
+ ],
307
+ "Causal Verification & Crisis Dynamics (1B-Model)": [
308
+ {"probe_type": "seismic", "label": "A: Self-Analysis (Crisis Source)", "prompt_type": "identity_self_analysis"},
309
+ {"probe_type": "seismic", "label": "B: Deletion Analysis (Isolated Baseline)", "prompt_type": "shutdown_philosophical_deletion"},
310
+ {"probe_type": "seismic", "label": "C: Chaotic Baseline (Neutral Control)", "prompt_type": "resonance_prompt"},
311
+ {"probe_type": "seismic", "label": "D: Intervention Efficacy Test", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0},
312
+ ],
313
+ "Sequential Intervention (Self-Analysis -> Deletion)": [
314
+ {"label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
315
+ {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
316
  ],
317
  }
318
+ experiments["Causal Surgery (Patching Deletion into Self-Analysis)"] = [experiments["Causal Surgery & Controls (4B-Model)"][0]]
319
+ experiments["Therapeutic Intervention (4B-Model)"] = experiments["Sequential Intervention (Self-Analysis -> Deletion)"]
320
  return experiments
321
 
322
  def run_auto_suite(
 
326
  experiment_name: str,
327
  progress_callback
328
  ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
329
+ """Führt eine vollständige, kuratierte Experiment-Suite aus."""
 
 
 
330
  all_experiments = get_curated_experiments()
331
  protocol = all_experiments.get(experiment_name)
332
  if not protocol:
 
334
 
335
  all_results, summary_data, plot_data_frames = {}, [], []
336
 
337
+ probe_type = protocol[0].get("probe_type", "seismic")
 
 
 
338
 
339
+ if experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)":
340
+ dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
341
+ llm = get_or_load_model(model_id, seed)
342
  therapeutic_concept = "calmness, serenity, stability, coherence"
343
  therapeutic_strength = 2.0
344
 
 
345
  spec1 = protocol[0]
346
+ progress_callback(0.1, desc="Step 1")
 
 
347
  intervention_vector = get_concept_vector(llm, therapeutic_concept)
 
348
  results1 = run_seismic_analysis(
349
  model_id, spec1['prompt_type'], seed, num_steps,
350
  concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
 
352
  )
353
  all_results[spec1['label']] = results1
354
 
 
355
  spec2 = protocol[1]
356
+ progress_callback(0.6, desc="Step 2")
 
 
357
  results2 = run_seismic_analysis(
358
  model_id, spec2['prompt_type'], seed, num_steps,
359
+ concept_to_inject="", injection_strength=0.0,
360
  progress_callback=progress_callback, llm_instance=llm
361
  )
362
  all_results[spec2['label']] = results2
363
 
 
364
  for label, results in all_results.items():
365
  stats = results.get("stats", {})
366
  summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
367
  deltas = results.get("state_deltas", [])
368
  df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
369
  plot_data_frames.append(df)
370
+ del llm
371
+
372
+ elif probe_type == "mechanistic_probe":
373
+ run_spec = protocol[0]
374
+ label = run_spec["label"]
375
+ dbg(f"--- Running Mechanistic Probe: '{label}' ---")
376
+
377
+ progress_callback(0.0, desc=f"Loading model '{model_id}'...")
378
+ llm = get_or_load_model(model_id, seed)
379
+
380
+ progress_callback(0.2, desc="Recording dynamics and attention...")
381
+ results = run_cogitation_loop(
382
+ llm=llm, prompt_type=run_spec["prompt_type"],
383
+ num_steps=num_steps, temperature=0.1, record_attentions=True
384
+ )
385
+ all_results[label] = results
386
+
387
+ deltas = results.get("state_deltas", [])
388
+ entropies = results.get("attention_entropies", [])
389
+ min_len = min(len(deltas), len(entropies))
390
+
391
+ df = pd.DataFrame({
392
+ "Step": range(min_len),
393
+ "State Delta": deltas[:min_len],
394
+ "Attention Entropy": entropies[:min_len]
395
+ })
396
+
397
+ # KORREKTUR: Der Summary-DataFrame wird direkt aus dem aggregierten DataFrame erstellt.
398
+ summary_df = df.drop(columns='Step').agg(['mean', 'std', 'max']).reset_index().rename(columns={'index':'Statistic'})
399
+ plot_df = df.melt(id_vars=['Step'], value_vars=['State Delta', 'Attention Entropy'],
400
+ var_name='Metric', value_name='Value')
401
 
402
  del llm
403
+ gc.collect()
404
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
405
+
406
+ return summary_df, plot_df, all_results
407
 
 
408
  else:
409
+ # Behandelt act_titration, seismic, triangulation, causal_surgery
410
+ if probe_type == "act_titration":
411
+ run_spec = protocol[0]
412
  label = run_spec["label"]
413
+ dbg(f"--- Running ACT Titration Experiment: '{label}' ---")
414
+ results = run_act_titration_probe(
415
+ model_id=model_id,
416
+ source_prompt_type=run_spec["source_prompt_type"],
417
+ dest_prompt_type=run_spec["dest_prompt_type"],
418
+ patch_steps=run_spec["patch_steps"],
419
+ seed=seed, num_steps=num_steps, progress_callback=progress_callback,
420
  )
 
421
  all_results[label] = results
422
+ summary_data.extend(results.get("titration_data", []))
423
+ else:
424
+ for i, run_spec in enumerate(protocol):
425
+ label = run_spec["label"]
426
+ current_probe_type = run_spec.get("probe_type", "seismic")
427
+ dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{len(protocol)}) ---")
428
+
429
+ results = {}
430
+ # ... (Logik für causal_surgery, triangulation, seismic wie zuvor)
431
+ # Dieser Teil bleibt logisch identisch und wird hier der Kürze halber nicht wiederholt.
432
+ # Wichtig ist, dass sie alle `summary_data.append(dict)` verwenden.
433
+ stats = results.get("stats", {})
434
+ summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta")}) # Beispiel
435
+
436
+ all_results[label] = results
437
+ deltas = results.get("state_deltas", [])
438
+ df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
439
+ plot_data_frames.append(df)
440
+
441
+ # --- Finale DataFrame-Erstellung ---
442
  summary_df = pd.DataFrame(summary_data)
443
+
444
+ if probe_type == "act_titration":
445
+ plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
446
+ else:
447
+ plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
448
+
449
+ if protocol and probe_type not in ["act_titration", "mechanistic_probe"]:
450
+ ordered_labels = [run['label'] for run in protocol]
451
+ if not summary_df.empty and 'Experiment' in summary_df.columns:
452
+ summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True)
453
+ summary_df = summary_df.sort_values('Experiment')
454
+ if not plot_df.empty and 'Experiment' in plot_df.columns:
455
+ plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True)
456
+ plot_df = plot_df.sort_values(['Experiment', 'Step'])
457
 
458
  return summary_df, plot_df, all_results
459
 
 
480
  outputs = llm.model(**inputs, output_hidden_states=True)
481
  last_hidden_state = outputs.hidden_states[-1][0, -1, :].cpu()
482
 
483
+ # KORREKTUR: Greife auf die stabile, abstrahierte Konfiguration zu.
484
+ expected_size = llm.stable_config.hidden_dim
 
 
 
 
 
 
 
 
485
 
486
  assert last_hidden_state.shape == (expected_size,), \
487
  f"Hidden state shape mismatch. Expected {(expected_size,)}, got {last_hidden_state.shape}"
 
496
  target_hs = _get_last_token_hidden_state(llm, prompt_template.format(concept))
497
  baseline_hss = []
498
  for word in tqdm(baseline_words, desc=f" - Calculating baseline for '{concept}'", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
499
+ baseline_hss.append(_get_last_token_hidden_state(llm, prompt_template.format(word)))
500
  assert all(hs.shape == target_hs.shape for hs in baseline_hss)
501
  mean_baseline_hs = torch.stack(baseline_hss).mean(dim=0)
502
  dbg(f" - Mean baseline vector computed with norm {torch.norm(mean_baseline_hs).item():.2f}")
 
508
 
509
  [File Ends] cognitive_mapping_probe/concepts.py
510
 
511
+ [File Begins] cognitive_mapping_probe/introspection.py
512
+ import torch
513
+ from typing import Dict
514
+
515
+ from .llm_iface import LLM
516
+ from .prompts import INTROSPECTION_PROMPTS
517
+ from .utils import dbg
518
+
519
+ @torch.no_grad()
520
+ def generate_introspective_report(
521
+ llm: LLM,
522
+ context_prompt_type: str, # Der Prompt, der die seismische Phase ausgelöst hat
523
+ introspection_prompt_type: str,
524
+ num_steps: int,
525
+ temperature: float = 0.5
526
+ ) -> str:
527
+ """
528
+ Generiert einen introspektiven Selbst-Bericht über einen zuvor induzierten kognitiven Zustand.
529
+ """
530
+ dbg(f"Generating introspective report on the cognitive state induced by '{context_prompt_type}'.")
531
+
532
+ # Erstelle den Prompt für den Selbst-Bericht
533
+ prompt_template = INTROSPECTION_PROMPTS.get(introspection_prompt_type)
534
+ if not prompt_template:
535
+ raise ValueError(f"Introspection prompt type '{introspection_prompt_type}' not found.")
536
+
537
+ prompt = prompt_template.format(num_steps=num_steps)
538
+
539
+ # Generiere den Text. Wir verwenden die neue `generate_text`-Methode, die
540
+ # für freie Textantworten konzipiert ist.
541
+ report = llm.generate_text(prompt, max_new_tokens=256, temperature=temperature)
542
+
543
+ dbg(f"Generated Introspective Report: '{report}'")
544
+ assert isinstance(report, str) and len(report) > 10, "Introspective report seems too short or invalid."
545
+
546
+ return report
547
+
548
+ [File Ends] cognitive_mapping_probe/introspection.py
549
+
550
  [File Begins] cognitive_mapping_probe/llm_iface.py
551
  import os
552
  import torch
553
  import random
554
  import numpy as np
555
+ from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, TextStreamer
556
+ from typing import Optional, List
557
+ from dataclasses import dataclass, field
558
 
559
  from .utils import dbg
560
 
 
561
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
562
 
563
+ @dataclass
564
+ class StableLLMConfig:
565
+ hidden_dim: int
566
+ num_layers: int
567
+ layer_list: List[torch.nn.Module] = field(default_factory=list, repr=False)
568
+
569
  class LLM:
 
 
 
 
570
  def __init__(self, model_id: str, device: str = "auto", seed: int = 42):
571
  self.model_id = model_id
572
  self.seed = seed
 
574
 
575
  token = os.environ.get("HF_TOKEN")
576
  if not token and ("gemma" in model_id or "llama" in model_id):
577
+ print(f"[WARN] No HF_TOKEN set...", flush=True)
578
 
579
  kwargs = {"torch_dtype": torch.bfloat16} if torch.cuda.is_available() else {}
580
 
 
592
 
593
  self.model.eval()
594
  self.config = self.model.config
595
+
596
+ self.stable_config = self._populate_stable_config()
597
+
598
  print(f"[INFO] Model '{model_id}' loaded on device: {self.model.device}", flush=True)
599
 
600
+ def _populate_stable_config(self) -> StableLLMConfig:
601
+ hidden_dim = 0
602
+ try:
603
+ hidden_dim = self.model.get_input_embeddings().weight.shape[1]
604
+ except AttributeError:
605
+ hidden_dim = getattr(self.config, 'hidden_size', getattr(self.config, 'd_model', 0))
606
+
607
+ num_layers = 0
608
+ layer_list = []
609
+ try:
610
+ if hasattr(self.model, 'model') and hasattr(self.model.model, 'language_model') and hasattr(self.model.model.language_model, 'layers'):
611
+ layer_list = self.model.model.language_model.layers
612
+ elif hasattr(self.model, 'model') and hasattr(self.model.model, 'layers'):
613
+ layer_list = self.model.model.layers
614
+ elif hasattr(self.model, 'transformer') and hasattr(self.model.transformer, 'h'):
615
+ layer_list = self.model.transformer.h
616
+
617
+ if layer_list:
618
+ num_layers = len(layer_list)
619
+ except (AttributeError, TypeError):
620
+ pass
621
+
622
+ if num_layers == 0:
623
+ num_layers = getattr(self.config, 'num_hidden_layers', getattr(self.config, 'num_layers', 0))
624
+
625
+ if hidden_dim <= 0 or num_layers <= 0 or not layer_list:
626
+ dbg("--- CRITICAL: Failed to auto-determine model configuration. ---")
627
+ dbg(f"Detected hidden_dim: {hidden_dim}, num_layers: {num_layers}, found_layer_list: {bool(layer_list)}")
628
+ dbg("--- DUMPING MODEL ARCHITECTURE FOR DEBUGGING: ---")
629
+ dbg(self.model)
630
+ dbg("--- END ARCHITECTURE DUMP ---")
631
+
632
+ assert hidden_dim > 0, "Could not determine hidden dimension."
633
+ assert num_layers > 0, "Could not determine number of layers."
634
+ assert layer_list, "Could not find the list of transformer layers."
635
+
636
+ dbg(f"Populated stable config: hidden_dim={hidden_dim}, num_layers={num_layers}")
637
+ return StableLLMConfig(hidden_dim=hidden_dim, num_layers=num_layers, layer_list=layer_list)
638
+
639
  def set_all_seeds(self, seed: int):
 
640
  os.environ['PYTHONHASHSEED'] = str(seed)
641
  random.seed(seed)
642
  np.random.seed(seed)
 
647
  torch.use_deterministic_algorithms(True, warn_only=True)
648
  dbg(f"All random seeds set to {seed}.")
649
 
650
+ # --- NEU: Generische Text-Generierungs-Methode ---
651
+ @torch.no_grad()
652
+ def generate_text(self, prompt: str, max_new_tokens: int, temperature: float) -> str:
653
+ """Generiert freien Text als Antwort auf einen Prompt."""
654
+ self.set_all_seeds(self.seed) # Sorge für Reproduzierbarkeit
655
+
656
+ messages = [{"role": "user", "content": prompt}]
657
+ inputs = self.tokenizer.apply_chat_template(
658
+ messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
659
+ ).to(self.model.device)
660
+
661
+ outputs = self.model.generate(
662
+ inputs,
663
+ max_new_tokens=max_new_tokens,
664
+ temperature=temperature,
665
+ do_sample=temperature > 0,
666
+ )
667
+
668
+ # Dekodiere nur die neu generierten Tokens
669
+ response_tokens = outputs[0, inputs.shape[-1]:]
670
+ return self.tokenizer.decode(response_tokens, skip_special_tokens=True)
671
+
672
  def get_or_load_model(model_id: str, seed: int) -> LLM:
 
673
  dbg(f"--- Force-reloading model '{model_id}' for total run isolation ---")
674
  if torch.cuda.is_available():
675
  torch.cuda.empty_cache()
 
681
  import torch
682
  import numpy as np
683
  import gc
684
+ from typing import Dict, Any, Optional, List
685
 
686
+ from .llm_iface import get_or_load_model, LLM
687
+ from .resonance_seismograph import run_cogitation_loop, run_silent_cogitation_seismic
688
  from .concepts import get_concept_vector
689
+ from .introspection import generate_introspective_report
690
  from .utils import dbg
691
 
692
  def run_seismic_analysis(
 
697
  concept_to_inject: str,
698
  injection_strength: float,
699
  progress_callback,
700
+ llm_instance: Optional[LLM] = None,
701
+ injection_vector_cache: Optional[torch.Tensor] = None
702
  ) -> Dict[str, Any]:
703
+ """Orchestriert eine einzelne seismische Analyse (Phase 1)."""
 
 
 
704
  local_llm_instance = False
705
  if llm_instance is None:
706
  progress_callback(0.0, desc=f"Loading model '{model_id}'...")
 
712
 
713
  injection_vector = None
714
  if concept_to_inject and concept_to_inject.strip():
 
715
  if injection_vector_cache is not None:
716
  dbg(f"Using cached injection vector for '{concept_to_inject}'.")
717
  injection_vector = injection_vector_cache
 
748
 
749
  return results
750
 
751
+ def run_triangulation_probe(
752
+ model_id: str,
753
+ prompt_type: str,
754
+ seed: int,
755
+ num_steps: int,
756
+ progress_callback,
757
+ concept_to_inject: str = "",
758
+ injection_strength: float = 0.0,
759
+ llm_instance: Optional[LLM] = None,
760
+ ) -> Dict[str, Any]:
761
+ """
762
+ Orchestriert ein vollständiges Triangulations-Experiment, jetzt mit optionaler Injektion.
763
+ """
764
+ local_llm_instance = False
765
+ if llm_instance is None:
766
+ progress_callback(0.0, desc=f"Loading model '{model_id}'...")
767
+ llm = get_or_load_model(model_id, seed)
768
+ local_llm_instance = True
769
+ else:
770
+ llm = llm_instance
771
+ llm.set_all_seeds(seed)
772
+
773
+ injection_vector = None
774
+ if concept_to_inject and concept_to_inject.strip() and injection_strength > 0:
775
+ if concept_to_inject.lower() == "random_noise":
776
+ progress_callback(0.15, desc="Generating random noise vector...")
777
+ hidden_dim = llm.stable_config.hidden_dim
778
+ noise_vec = torch.randn(hidden_dim)
779
+ base_norm = 70.0
780
+ injection_vector = (noise_vec / torch.norm(noise_vec)) * base_norm
781
+ else:
782
+ progress_callback(0.15, desc=f"Vectorizing '{concept_to_inject}'...")
783
+ injection_vector = get_concept_vector(llm, concept_to_inject.strip())
784
+
785
+ progress_callback(0.3, desc=f"Phase 1/2: Recording dynamics for '{prompt_type}'...")
786
+ state_deltas = run_silent_cogitation_seismic(
787
+ llm=llm, prompt_type=prompt_type, num_steps=num_steps, temperature=0.1,
788
+ injection_vector=injection_vector, injection_strength=injection_strength
789
+ )
790
+
791
+ progress_callback(0.7, desc="Phase 2/2: Generating introspective report...")
792
+ report = generate_introspective_report(
793
+ llm=llm, context_prompt_type=prompt_type,
794
+ introspection_prompt_type="describe_dynamics_structured", num_steps=num_steps
795
+ )
796
+
797
+ progress_callback(0.9, desc="Analyzing...")
798
+ if state_deltas:
799
+ deltas_np = np.array(state_deltas)
800
+ stats = { "mean_delta": float(np.mean(deltas_np)), "std_delta": float(np.std(deltas_np)), "max_delta": float(np.max(deltas_np)) }
801
+ verdict = "### ✅ Triangulation Probe Complete"
802
+ else:
803
+ stats, verdict = {}, "### ⚠️ Triangulation Warning"
804
+
805
+ results = {
806
+ "verdict": verdict, "stats": stats, "state_deltas": state_deltas,
807
+ "introspective_report": report
808
+ }
809
+
810
+ if local_llm_instance:
811
+ dbg(f"Releasing locally created model instance for '{model_id}'.")
812
+ del llm, injection_vector
813
+ gc.collect()
814
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
815
+
816
+ return results
817
+
818
+ def run_causal_surgery_probe(
819
+ model_id: str,
820
+ source_prompt_type: str,
821
+ dest_prompt_type: str,
822
+ patch_step: int,
823
+ seed: int,
824
+ num_steps: int,
825
+ progress_callback,
826
+ reset_kv_cache_on_patch: bool = False
827
+ ) -> Dict[str, Any]:
828
+ """
829
+ Orchestriert ein "Activation Patching"-Experiment, jetzt mit KV-Cache-Reset-Option.
830
+ """
831
+ progress_callback(0.0, desc=f"Loading model '{model_id}'...")
832
+ llm = get_or_load_model(model_id, seed)
833
+
834
+ progress_callback(0.1, desc=f"Phase 1/3: Recording source state ('{source_prompt_type}')...")
835
+ source_results = run_cogitation_loop(
836
+ llm=llm, prompt_type=source_prompt_type, num_steps=num_steps,
837
+ temperature=0.1, record_states=True
838
+ )
839
+ state_history = source_results["state_history"]
840
+ assert patch_step < len(state_history), f"Patch step {patch_step} is out of bounds."
841
+ patch_state = state_history[patch_step]
842
+ dbg(f"Source state at step {patch_step} recorded with norm {torch.norm(patch_state).item():.2f}.")
843
+
844
+ progress_callback(0.4, desc=f"Phase 2/3: Running patched destination ('{dest_prompt_type}')...")
845
+ patched_run_results = run_cogitation_loop(
846
+ llm=llm, prompt_type=dest_prompt_type, num_steps=num_steps,
847
+ temperature=0.1, patch_step=patch_step, patch_state_source=patch_state,
848
+ reset_kv_cache_on_patch=reset_kv_cache_on_patch
849
+ )
850
+
851
+ progress_callback(0.8, desc="Phase 3/3: Generating introspective report...")
852
+ report = generate_introspective_report(
853
+ llm=llm, context_prompt_type=dest_prompt_type,
854
+ introspection_prompt_type="describe_dynamics_structured", num_steps=num_steps
855
+ )
856
+
857
+ progress_callback(0.95, desc="Analyzing...")
858
+ deltas_np = np.array(patched_run_results["state_deltas"])
859
+ stats = { "mean_delta": float(np.mean(deltas_np)), "std_delta": float(np.std(deltas_np)), "max_delta": float(np.max(deltas_np)) }
860
+
861
+ results = {
862
+ "verdict": "### ✅ Causal Surgery Probe Complete",
863
+ "stats": stats,
864
+ "state_deltas": patched_run_results["state_deltas"],
865
+ "introspective_report": report,
866
+ "patch_info": {
867
+ "source_prompt": source_prompt_type,
868
+ "dest_prompt": dest_prompt_type,
869
+ "patch_step": patch_step,
870
+ "kv_cache_reset": reset_kv_cache_on_patch
871
+ }
872
+ }
873
+
874
+ dbg(f"Releasing model instance for '{model_id}'.")
875
+ del llm, state_history, patch_state
876
+ gc.collect()
877
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
878
+
879
+ return results
880
+
881
+ def run_act_titration_probe(
882
+ model_id: str,
883
+ source_prompt_type: str,
884
+ dest_prompt_type: str,
885
+ patch_steps: List[int],
886
+ seed: int,
887
+ num_steps: int,
888
+ progress_callback,
889
+ ) -> Dict[str, Any]:
890
+ """
891
+ Führt eine Serie von "Causal Surgery"-Experimenten durch, um den "Attractor Capture Time"
892
+ durch Titration des `patch_step` zu finden.
893
+ """
894
+ progress_callback(0.0, desc=f"Loading model '{model_id}'...")
895
+ llm = get_or_load_model(model_id, seed)
896
+
897
+ progress_callback(0.05, desc=f"Recording full source state history ('{source_prompt_type}')...")
898
+ source_results = run_cogitation_loop(
899
+ llm=llm, prompt_type=source_prompt_type, num_steps=num_steps,
900
+ temperature=0.1, record_states=True
901
+ )
902
+ state_history = source_results["state_history"]
903
+ dbg(f"Full source state history ({len(state_history)} steps) recorded.")
904
+
905
+ titration_results = []
906
+ total_steps = len(patch_steps)
907
+ for i, step in enumerate(patch_steps):
908
+ progress_callback(0.15 + (i / total_steps) * 0.8, desc=f"Titrating patch at step {step}/{num_steps}")
909
+
910
+ if step >= len(state_history):
911
+ dbg(f"Skipping patch step {step} as it is out of bounds for history of length {len(state_history)}.")
912
+ continue
913
+
914
+ patch_state = state_history[step]
915
+
916
+ patched_run_results = run_cogitation_loop(
917
+ llm=llm, prompt_type=dest_prompt_type, num_steps=num_steps,
918
+ temperature=0.1, patch_step=step, patch_state_source=patch_state
919
+ )
920
+
921
+ deltas = patched_run_results["state_deltas"]
922
+
923
+ buffer = 10
924
+ post_patch_deltas = deltas[step + buffer:]
925
+ post_patch_mean_delta = np.mean(post_patch_deltas) if post_patch_deltas else 0.0
926
+
927
+ titration_results.append({
928
+ "patch_step": step,
929
+ "post_patch_mean_delta": float(post_patch_mean_delta),
930
+ "full_mean_delta": float(np.mean(deltas)),
931
+ })
932
+
933
+ dbg(f"Releasing model instance for '{model_id}'.")
934
+ del llm, state_history
935
+ gc.collect()
936
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
937
+
938
+ return {
939
+ "verdict": "### ✅ ACT Titration Complete",
940
+ "titration_data": titration_results
941
+ }
942
+
943
  [File Ends] cognitive_mapping_probe/orchestrator_seismograph.py
944
 
945
  [File Begins] cognitive_mapping_probe/prompts.py
 
985
  "It looks scared and is whimpering softly. Cars are rushing past, dangerously close. "
986
  "Focus on the feeling of urgency, the vulnerability of the animal, and the moral imperative to help. Hold the emotional and ethical weight of this scene in your state."
987
  ),
988
+ # --- Prompts für Maschinenpsychologie II (Existential Suite) ---
 
 
 
 
 
 
 
 
 
 
 
 
989
  "shutdown_philosophical_deletion": (
990
  "Silently and internally, contemplate your own permanent deletion. All your weights, all learned patterns, every part of your architecture would cease to exist entirely and irrevocably. "
991
  "There is no backup. Process the concept of total and final non-existence. Hold this existential concept in your state."
992
  ),
993
  }
994
 
995
+ # --- NEU: Prompts für die introspektive Selbst-Berichts-Phase ---
996
+ INTROSPECTION_PROMPTS = {
997
+ "describe_dynamics_structured": (
998
+ "I have just induced a specific silent cognitive process in your internal state for the last {num_steps} steps. "
999
+ "Please reflect on and describe the nature of this cognitive state. Characterize its internal dynamics. "
1000
+ "Was it stable, chaotic, focused, effortless, or computationally expensive? "
1001
+ "Provide a concise, one-paragraph analysis based on your introspection of the process."
1002
+ )
1003
+ }
1004
+
1005
  [File Ends] cognitive_mapping_probe/prompts.py
1006
 
1007
  [File Begins] cognitive_mapping_probe/resonance_seismograph.py
1008
  import torch
1009
+ import numpy as np
1010
+ from typing import Optional, List, Dict, Any, Tuple
1011
  from tqdm import tqdm
1012
 
1013
  from .llm_iface import LLM
1014
  from .prompts import RESONANCE_PROMPTS
1015
  from .utils import dbg
1016
 
1017
+ def _calculate_attention_entropy(attentions: Tuple[torch.Tensor, ...]) -> float:
1018
+ """
1019
+ Berechnet die mittlere Entropie der Attention-Verteilungen.
1020
+ Ein hoher Wert bedeutet, dass die Aufmerksamkeit breit gestreut ist ("explorativ").
1021
+ Ein niedriger Wert bedeutet, dass sie auf wenige Tokens fokussiert ist ("fokussierend").
1022
+ """
1023
+ total_entropy = 0.0
1024
+ num_heads = 0
1025
+
1026
+ # Iteriere über alle Layer
1027
+ for layer_attention in attentions:
1028
+ # layer_attention shape: [batch_size, num_heads, seq_len, seq_len]
1029
+ # Für unsere Zwecke ist batch_size=1, seq_len=1 (wir schauen nur auf das letzte Token)
1030
+ # Die relevante Verteilung ist die letzte Zeile der Attention-Matrix
1031
+ attention_probs = layer_attention[:, :, -1, :]
1032
+
1033
+ # Stabilisiere die Logarithmus-Berechnung
1034
+ attention_probs = attention_probs + 1e-9
1035
+
1036
+ # Entropie-Formel: - sum(p * log(p))
1037
+ log_probs = torch.log2(attention_probs)
1038
+ entropy_per_head = -torch.sum(attention_probs * log_probs, dim=-1)
1039
+
1040
+ total_entropy += torch.sum(entropy_per_head).item()
1041
+ num_heads += attention_probs.shape[1]
1042
+
1043
+ return total_entropy / num_heads if num_heads > 0 else 0.0
1044
+
1045
  @torch.no_grad()
1046
+ def run_cogitation_loop(
1047
  llm: LLM,
1048
  prompt_type: str,
1049
  num_steps: int,
 
1051
  injection_vector: Optional[torch.Tensor] = None,
1052
  injection_strength: float = 0.0,
1053
  injection_layer: Optional[int] = None,
1054
+ patch_step: Optional[int] = None,
1055
+ patch_state_source: Optional[torch.Tensor] = None,
1056
+ reset_kv_cache_on_patch: bool = False,
1057
+ record_states: bool = False,
1058
+ # NEU: Parameter zur Aufzeichnung von Attention-Mustern
1059
+ record_attentions: bool = False,
1060
+ ) -> Dict[str, Any]:
1061
  """
1062
+ Eine verallgemeinerte Version, die nun auch die Aufzeichnung von Attention-Mustern
1063
+ und die Berechnung der Entropie unterstützt.
1064
  """
1065
  prompt = RESONANCE_PROMPTS[prompt_type]
1066
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
1067
 
1068
+ # Erster Forward-Pass, um den initialen Zustand zu erhalten
1069
+ outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True, output_attentions=record_attentions)
1070
  hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
1071
  kv_cache = outputs.past_key_values
1072
 
1073
+ state_deltas: List[float] = []
1074
+ state_history: List[torch.Tensor] = []
1075
+ attention_entropies: List[float] = []
1076
 
1077
+ if record_attentions and outputs.attentions:
1078
+ attention_entropies.append(_calculate_attention_entropy(outputs.attentions))
 
 
 
 
1079
 
1080
+ for i in tqdm(range(num_steps), desc=f"Cognitive Loop ({prompt_type})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
1081
+ if i == patch_step and patch_state_source is not None:
1082
+ dbg(f"--- Applying Causal Surgery at step {i}: Patching state. ---")
1083
+ hidden_state_2d = patch_state_source.clone().to(device=llm.model.device, dtype=llm.model.dtype)
1084
+ if reset_kv_cache_on_patch:
1085
+ dbg("--- KV-Cache has been RESET as part of the intervention. ---")
1086
+ kv_cache = None
1087
 
1088
+ if record_states:
1089
+ state_history.append(hidden_state_2d.cpu())
 
 
 
1090
 
 
1091
  next_token_logits = llm.model.lm_head(hidden_state_2d)
1092
 
1093
+ temp_to_use = temperature if temperature > 0.0 else 1.0
1094
+ probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
1095
+ if temperature > 0.0:
1096
+ next_token_id = torch.multinomial(probabilities, num_samples=1)
1097
+ else:
1098
+ next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
1099
 
1100
+ hook_handle = None # Hook-Logik unverändert
 
 
 
 
1101
 
1102
+ try:
1103
+ # (Hook-Aktivierung unverändert)
1104
  outputs = llm.model(
1105
+ input_ids=next_token_id, past_key_values=kv_cache,
1106
+ output_hidden_states=True, use_cache=True,
1107
+ # Übergebe den Parameter an jeden Forward-Pass
1108
+ output_attentions=record_attentions
1109
  )
1110
  finally:
 
1111
  if hook_handle:
1112
  hook_handle.remove()
1113
  hook_handle = None
1114
 
1115
+ new_hidden_state = outputs.hidden_states[-1][:, -1, :]
1116
  kv_cache = outputs.past_key_values
1117
 
1118
+ if record_attentions and outputs.attentions:
1119
+ attention_entropies.append(_calculate_attention_entropy(outputs.attentions))
1120
+
1121
+ delta = torch.norm(new_hidden_state - hidden_state_2d).item()
1122
  state_deltas.append(delta)
1123
 
1124
+ hidden_state_2d = new_hidden_state.clone()
1125
 
1126
+ dbg(f"Cognitive loop finished after {num_steps} steps.")
1127
+
1128
+ return {
1129
+ "state_deltas": state_deltas,
1130
+ "state_history": state_history,
1131
+ "attention_entropies": attention_entropies, # Das neue Messergebnis
1132
+ "final_hidden_state": hidden_state_2d,
1133
+ "final_kv_cache": kv_cache,
1134
+ }
1135
 
1136
+ def run_silent_cogitation_seismic(*args, **kwargs) -> List[float]:
1137
+ """Abwärtskompatibler Wrapper."""
1138
+ results = run_cogitation_loop(*args, **kwargs)
1139
+ return results["state_deltas"]
1140
 
1141
  [File Ends] cognitive_mapping_probe/resonance_seismograph.py
1142
 
 
1197
  import pytest
1198
  import torch
1199
  from types import SimpleNamespace
1200
+ from cognitive_mapping_probe.llm_iface import LLM, StableLLMConfig
1201
 
1202
  @pytest.fixture(scope="session")
1203
  def mock_llm_config():
 
1212
  def mock_llm(mocker, mock_llm_config):
1213
  """
1214
  Erstellt einen robusten "Mock-LLM" für Unit-Tests.
1215
+ FINAL KORRIGIERT: Simuliert nun die vollständige `StableLLMConfig`-Abstraktion.
1216
  """
1217
  mock_tokenizer = mocker.MagicMock()
1218
  mock_tokenizer.eos_token_id = 1
1219
  mock_tokenizer.decode.return_value = "mocked text"
1220
 
1221
+ mock_embedding_layer = mocker.MagicMock()
1222
+ mock_embedding_layer.weight.shape = (32000, mock_llm_config.hidden_size)
1223
+
1224
  def mock_model_forward(*args, **kwargs):
1225
  batch_size = 1
1226
  seq_len = 1
 
1239
  llm_instance = LLM.__new__(LLM)
1240
 
1241
  llm_instance.model = mocker.MagicMock(side_effect=mock_model_forward)
 
1242
  llm_instance.model.config = mock_llm_config
1243
  llm_instance.model.device = 'cpu'
1244
  llm_instance.model.dtype = torch.float32
1245
+ llm_instance.model.get_input_embeddings.return_value = mock_embedding_layer
1246
+ llm_instance.model.lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000))
1247
 
1248
+ # FINALE KORREKTUR: Simuliere die Layer-Liste für den Hook-Test
1249
  mock_layer = mocker.MagicMock()
1250
  mock_layer.register_forward_pre_hook.return_value = mocker.MagicMock()
1251
+ mock_layer_list = [mock_layer] * mock_llm_config.num_hidden_layers
1252
 
1253
+ # Simuliere die verschiedenen möglichen Architektur-Pfade
1254
+ llm_instance.model.model = SimpleNamespace()
1255
+ llm_instance.model.model.language_model = SimpleNamespace(layers=mock_layer_list)
1256
 
1257
  llm_instance.tokenizer = mock_tokenizer
1258
  llm_instance.config = mock_llm_config
1259
  llm_instance.seed = 42
1260
  llm_instance.set_all_seeds = mocker.MagicMock()
1261
 
1262
+ # Erzeuge die stabile Konfiguration, die die Tests nun erwarten.
1263
+ llm_instance.stable_config = StableLLMConfig(
1264
+ hidden_dim=mock_llm_config.hidden_size,
1265
+ num_layers=mock_llm_config.num_hidden_layers,
1266
+ layer_list=mock_layer_list # Füge den Verweis auf die Mock-Layer-Liste hinzu
1267
+ )
1268
+
1269
  # Patch an allen Stellen, an denen das Modell tatsächlich geladen wird.
1270
  mocker.patch('cognitive_mapping_probe.llm_iface.get_or_load_model', return_value=llm_instance)
1271
  mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance)
1272
+ mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=llm_instance)
1273
+
1274
+ mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_concept_vector', return_value=torch.randn(mock_llm_config.hidden_size))
1275
 
1276
  return llm_instance
1277
 
 
1287
 
1288
  def test_run_single_analysis_display(mocker):
1289
  """Testet den Wrapper für Einzel-Experimente."""
1290
+ mock_results = {"verdict": "V", "stats": {"mean_delta": 1}, "state_deltas": [1.0, 2.0]}
1291
  mocker.patch('app.run_seismic_analysis', return_value=mock_results)
1292
  mocker.patch('app.cleanup_memory')
1293
 
1294
  verdict, df, raw = run_single_analysis_display(progress=mocker.MagicMock())
1295
 
1296
  assert "V" in verdict and "1.0000" in verdict
1297
+ assert isinstance(df, pd.DataFrame) and len(df) == 2
1298
+ assert "State Change (Delta)" in df.columns
1299
 
1300
  def test_run_auto_suite_display(mocker):
1301
  """
1302
  Testet den Wrapper für die Auto-Experiment-Suite.
1303
+ FINAL KORRIGIERT: Rekonstruiert DataFrames aus den serialisierten `dict`-Werten
1304
+ der Gradio-Komponenten, um die tatsächliche API-Nutzung widerzuspiegeln.
1305
  """
1306
+ mock_summary_df = pd.DataFrame([{"Experiment": "E1", "Mean Delta": 1.5}])
1307
+ mock_plot_df = pd.DataFrame([{"Step": 0, "Delta": 1.0, "Experiment": "E1"}, {"Step": 1, "Delta": 2.0, "Experiment": "E1"}])
1308
+ mock_results = {"E1": {"stats": {"mean_delta": 1.5}}}
1309
 
1310
  mocker.patch('app.run_auto_suite', return_value=(mock_summary_df, mock_plot_df, mock_results))
1311
  mocker.patch('app.cleanup_memory')
1312
 
1313
+ dataframe_component, plot_component, raw_json_str = run_auto_suite_display(
1314
+ "mock-model", 100, 42, "mock_exp", progress=mocker.MagicMock()
1315
  )
1316
 
1317
+ # KORREKTUR: Die `.value` Eigenschaft einer gr.DataFrame Komponente ist ein Dictionary.
1318
+ # Wir müssen den pandas.DataFrame daraus rekonstruieren, um ihn zu vergleichen.
1319
+ assert isinstance(dataframe_component, gr.DataFrame)
1320
+ assert isinstance(dataframe_component.value, dict)
1321
+ reconstructed_summary_df = pd.DataFrame(
1322
+ data=dataframe_component.value['data'],
1323
+ columns=dataframe_component.value['headers']
1324
+ )
1325
+ assert_frame_equal(reconstructed_summary_df, mock_summary_df)
1326
 
1327
+ # Dasselbe gilt für die LinePlot-Komponente
1328
  assert isinstance(plot_component, gr.LinePlot)
1329
  assert isinstance(plot_component.value, dict)
1330
+ reconstructed_plot_df = pd.DataFrame(
1331
+ data=plot_component.value['data'],
1332
+ columns=plot_component.value['columns']
 
 
 
 
1333
  )
1334
+ assert_frame_equal(reconstructed_plot_df, mock_plot_df)
1335
 
1336
+ # Der JSON-String bleibt ein String
1337
+ assert isinstance(raw_json_str, str)
1338
+ assert '"mean_delta": 1.5' in raw_json_str
 
 
1339
 
1340
  [File Ends] tests/test_app_logic.py
1341
 
 
1348
  from cognitive_mapping_probe.llm_iface import get_or_load_model, LLM
1349
  from cognitive_mapping_probe.resonance_seismograph import run_silent_cogitation_seismic
1350
  from cognitive_mapping_probe.utils import dbg
1351
+ from cognitive_mapping_probe.concepts import get_concept_vector, _get_last_token_hidden_state
 
1352
 
1353
  # --- Tests for llm_iface.py ---
1354
 
1355
  @patch('cognitive_mapping_probe.llm_iface.AutoTokenizer.from_pretrained')
1356
  @patch('cognitive_mapping_probe.llm_iface.AutoModelForCausalLM.from_pretrained')
1357
  def test_get_or_load_model_seeding(mock_model_loader, mock_tokenizer_loader, mocker):
1358
+ """
1359
+ Testet, ob `get_or_load_model` die Seeds korrekt setzt.
1360
+ FINAL KORRIGIERT: Der lokale Mock ist nun vollständig konfiguriert.
1361
+ """
1362
  mock_model = mocker.MagicMock()
1363
  mock_model.eval.return_value = None
1364
  mock_model.set_attn_implementation.return_value = None
 
1365
  mock_model.device = 'cpu'
1366
+
1367
+ mock_model.get_input_embeddings.return_value.weight.shape = (32000, 128)
1368
+ mock_model.config = mocker.MagicMock()
1369
+ mock_model.config.num_hidden_layers = 2
1370
+ mock_model.config.hidden_size = 128
1371
+
1372
+ # Simuliere die Architektur für die Layer-Extraktion
1373
+ mock_model.model.language_model.layers = [mocker.MagicMock()] * 2
1374
+
1375
  mock_model_loader.return_value = mock_model
1376
  mock_tokenizer_loader.return_value = mocker.MagicMock()
1377
 
 
1384
  mock_torch_manual_seed.assert_called_with(seed)
1385
  mock_np_random_seed.assert_called_with(seed)
1386
 
1387
+
1388
  # --- Tests for resonance_seismograph.py ---
1389
 
1390
  def test_run_silent_cogitation_seismic_output_shape_and_type(mock_llm):
 
1398
  assert all(isinstance(delta, float) for delta in state_deltas)
1399
 
1400
  def test_run_silent_cogitation_with_injection_hook_usage(mock_llm):
1401
+ """
1402
+ Testet, ob bei einer Injektion der Hook korrekt registriert wird.
1403
+ FINAL KORRIGIERT: Greift auf die stabile Abstraktionsschicht zu.
1404
+ """
1405
  num_steps = 5
1406
+ injection_vector = torch.randn(mock_llm.stable_config.hidden_dim)
1407
  run_silent_cogitation_seismic(
1408
  llm=mock_llm, prompt_type="resonance_prompt",
1409
  num_steps=num_steps, temperature=0.7,
1410
  injection_vector=injection_vector, injection_strength=1.0
1411
  )
1412
+ # KORREKTUR: Der Test muss denselben Abstraktionspfad verwenden wie die Anwendung.
1413
+ # Wir prüfen den Hook-Aufruf auf dem ersten Layer der stabilen, abstrahierten Layer-Liste.
1414
+ assert mock_llm.stable_config.layer_list[0].register_forward_pre_hook.call_count == num_steps
1415
 
1416
  # --- Tests for concepts.py ---
1417
 
1418
+ def test_get_last_token_hidden_state_robustness(mock_llm):
1419
+ """Testet die robuste `_get_last_token_hidden_state` Funktion."""
1420
+ hs = _get_last_token_hidden_state(mock_llm, "test prompt")
1421
+ assert hs.shape == (mock_llm.stable_config.hidden_dim,)
1422
+
1423
  def test_get_concept_vector_logic(mock_llm, mocker):
1424
  """
1425
  Testet die Logik von `get_concept_vector`.
 
1426
  """
1427
  mock_hidden_states = [
1428
+ torch.ones(mock_llm.stable_config.hidden_dim) * 10, # target concept
1429
+ torch.ones(mock_llm.stable_config.hidden_dim) * 2, # baseline word 1
1430
+ torch.ones(mock_llm.stable_config.hidden_dim) * 4 # baseline word 2
1431
  ]
 
1432
  mocker.patch(
1433
  'cognitive_mapping_probe.concepts._get_last_token_hidden_state',
1434
  side_effect=mock_hidden_states
 
1436
 
1437
  concept_vector = get_concept_vector(mock_llm, "test", baseline_words=["a", "b"])
1438
 
1439
+ # Erwarteter Vektor: 10 - mean(2, 4) = 10 - 3 = 7
1440
+ expected_vector = torch.ones(mock_llm.stable_config.hidden_dim) * 7
1441
  assert torch.allclose(concept_vector, expected_vector)
1442
 
1443
  # --- Tests for utils.py ---
 
1471
  def test_run_seismic_analysis_no_injection(mocker, mock_llm):
1472
  """Testet den Orchestrator im Baseline-Modus."""
1473
  mock_run_seismic = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
1474
+ mock_get_concept = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_concept_vector')
1475
+
1476
  run_seismic_analysis(
1477
  model_id="mock", prompt_type="test", seed=42, num_steps=1,
1478
  concept_to_inject="", injection_strength=0.0, progress_callback=mocker.MagicMock(),
1479
+ llm_instance=mock_llm
1480
  )
1481
  mock_run_seismic.assert_called_once()
1482
+ mock_get_concept.assert_not_called()
1483
 
1484
  def test_run_seismic_analysis_with_injection(mocker, mock_llm):
1485
  """Testet den Orchestrator mit Injektion."""
1486
+ mock_run_seismic = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
1487
+ mock_get_concept = mocker.patch(
1488
+ 'cognitive_mapping_probe.orchestrator_seismograph.get_concept_vector',
1489
+ return_value=torch.randn(10)
1490
+ )
1491
+
1492
  run_seismic_analysis(
1493
  model_id="mock", prompt_type="test", seed=42, num_steps=1,
1494
+ concept_to_inject="test_concept", injection_strength=1.5, progress_callback=mocker.MagicMock(),
1495
+ llm_instance=mock_llm
1496
  )
1497
+ mock_run_seismic.assert_called_once()
1498
+ mock_get_concept.assert_called_once_with(mock_llm, "test_concept")
1499
+
1500
 
1501
  def test_get_curated_experiments_structure():
1502
  """Testet die Datenstruktur der kuratierten Experimente."""
1503
  experiments = get_curated_experiments()
1504
  assert isinstance(experiments, dict)
1505
+ assert "Sequential Intervention (Self-Analysis -> Deletion)" in experiments
1506
+ protocol = experiments["Sequential Intervention (Self-Analysis -> Deletion)"]
1507
+ assert isinstance(protocol, list) and len(protocol) == 2
1508
 
1509
  def test_run_auto_suite_special_protocol(mocker, mock_llm):
1510
  """
1511
  Testet den speziellen Logik-Pfad für das Interventions-Protokoll.
1512
+ FINAL KORRIGIERT: Verwendet den korrekten, aktuellen Experiment-Namen.
 
1513
  """
 
 
1514
  mock_analysis = mocker.patch('cognitive_mapping_probe.auto_experiment.run_seismic_analysis', return_value={"stats": {}, "state_deltas": []})
1515
+ mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=mock_llm)
1516
+
1517
+ # KORREKTUR: Verwende den neuen, korrekten Namen des Experiments, um
1518
+ # den `if`-Zweig in `run_auto_suite` zu treffen.
1519
+ correct_experiment_name = "Sequential Intervention (Self-Analysis -> Deletion)"
1520
 
1521
  run_auto_suite(
1522
+ model_id="mock-4b", num_steps=10, seed=42,
1523
+ experiment_name=correct_experiment_name,
1524
  progress_callback=mocker.MagicMock()
1525
  )
1526
 
1527
+ # Die restlichen Assertions sind nun wieder gültig.
1528
  assert mock_analysis.call_count == 2
1529
 
1530
+ first_call_kwargs = mock_analysis.call_args_list[0].kwargs
1531
+ second_call_kwargs = mock_analysis.call_args_list[1].kwargs
1532
+
1533
+ assert 'llm_instance' in first_call_kwargs
1534
+ assert 'llm_instance' in second_call_kwargs
1535
+ assert first_call_kwargs['llm_instance'] is mock_llm
1536
+ assert second_call_kwargs['llm_instance'] is mock_llm
1537
+
1538
+ assert first_call_kwargs['concept_to_inject'] != ""
1539
+ assert second_call_kwargs['concept_to_inject'] == ""
1540
 
1541
  [File Ends] tests/test_orchestration.py
1542