neuralworm commited on
Commit
a90edb4
·
1 Parent(s): 4a761da
Files changed (1) hide show
  1. repo.txt +750 -482
repo.txt CHANGED
@@ -11,28 +11,36 @@ Directory/File Tree Begins -->
11
 
12
  /
13
  ├── README.md
 
14
  ├── app.py
15
  ├── cognitive_mapping_probe
16
  │ ├── __init__.py
 
 
17
  │ ├── concepts.py
18
- │ ├── diagnostics.py
19
  │ ├── llm_iface.py
20
- │ ├── orchestrator.py
21
  │ ├── prompts.py
22
- │ ├── resonance.py
23
- ├── utils.py
24
- │ └── verification.py
25
  ├── docs
 
 
 
 
 
 
 
26
 
27
  <-- Directory/File Tree Ends
28
 
29
  File Content Begin -->
30
  [File Begins] README.md
31
  ---
32
- title: "Cognitive Breaking Point Probe"
33
- emoji: 💥
34
- colorFrom: red
35
- colorTo: orange
36
  sdk: gradio
37
  sdk_version: "4.40.0"
38
  app_file: app.py
@@ -40,35 +48,48 @@ pinned: true
40
  license: apache-2.0
41
  ---
42
 
43
- # 💥 Cognitive Breaking Point (CBP) Probe
44
 
45
- Dieses Projekt implementiert eine falsifizierbare experimentelle Suite zur Messung der **kognitiven Robustheit** von Sprachmodellen. Wir verabschieden uns von der Suche nach introspektiven Berichten und wenden uns stattdessen einem harten, mechanistischen Signal zu: dem Punkt, an dem der kognitive Prozess des Modells unter Last zusammenbricht.
46
 
47
- ## Wissenschaftliches Paradigma: Von der Introspektion zur Kartographie
48
 
49
- Unsere vorherige Forschung hat gezeigt, dass kleine Modelle wie `gemma-3-1b-it` unter stark rekursiver Last nicht in einen stabilen "Denk"-Zustand konvergieren, sondern in eine **kognitive Endlosschleife** geraten. Anstatt dies als Scheitern zu werten, nutzen wir es als Messinstrument.
50
 
51
- Die zentrale Hypothese lautet: Die Neigung eines Modells, in einen solchen pathologischen Zustand zu kippen, ist eine Funktion der semantischen Komplexität und "Ungültigkeit" seines internen Zustands. Wir können diesen Übergang gezielt durch die Injektion von "Konzeptvektoren" mit variabler Stärke provozieren.
52
 
53
- Der **Cognitive Breaking Point (CBP)** ist definiert als die minimale Injektionsstärke eines Konzepts, die ausreicht, um das Modell von einem konvergenten (produktiven) in einen nicht-konvergenten (gefangenen) Zustand zu zwingen.
 
 
 
54
 
55
- ## Das Experiment: Kognitive Titration
56
 
57
- 1. **Induktion**: Das Modell wird mit einem rekursiven `RESONANCE_PROMPT` in einen Zustand des "stillen Denkens" versetzt.
58
- 2. **Titration**: Ein "Konzeptvektor" (z.B. für "Angst" oder "Apfel") wird mit schrittweise ansteigender Stärke in die mittleren Layer des Modells injiziert.
59
- 3. **Messung**: Der primäre Messwert ist der Terminationsgrund des Denkprozesses:
60
- * `converged`: Der Zustand hat sich stabilisiert. Das System ist robust.
61
- * `max_steps_reached`: Der Zustand oszilliert oder driftet endlos. Das System ist "gebrochen".
62
- 4. **Verifikation**: Nur wenn der Zustand konvergiert, wird versucht, einen spontanen Text zu generieren. Die Fähigkeit zu antworten ist der Verhaltensmarker für kognitive Stabilität.
63
 
64
- ## Wie man die App benutzt
65
 
66
- 1. **Diagnostics Tab**: Führe zuerst die diagnostischen Tests aus, um sicherzustellen, dass die experimentelle Apparatur auf der aktuellen Hardware und mit der `transformers`-Version korrekt funktioniert.
67
- 2. **Main Experiment Tab**:
68
- * Gib eine Modell-ID ein (z.B. `google/gemma-3-1b-it`).
69
- * Definiere die zu testenden Konzepte (z.B. `apple, solitude, justice`).
70
- * Lege die Titrationsschritte für die Stärke fest (z.B. `0.0, 0.5, 1.0, 1.5, 2.0`). Die `0.0`-Kontrolle ist entscheidend.
71
- * Starte das Experiment und analysiere die resultierende Tabelle, um die CBPs für jedes Konzept zu identifizieren.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
  [File Ends] README.md
74
 
@@ -76,131 +97,104 @@ Der **Cognitive Breaking Point (CBP)** ist definiert als die minimale Injektions
76
  import gradio as gr
77
  import pandas as pd
78
  import traceback
79
- from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment
80
- from cognitive_mapping_probe.diagnostics import run_diagnostic_suite
81
-
82
- # --- UI Theme and Layout ---
83
- theme = gr.themes.Soft(primary_hue="orange", secondary_hue="amber").set(
84
- body_background_fill="#fdf8f2",
85
- block_background_fill="white",
86
- block_border_width="1px",
87
- block_shadow="*shadow_drop_lg",
88
- button_primary_background_fill="*primary_500",
89
- button_primary_text_color="white",
90
- )
91
-
92
- # --- Wrapper Functions for Gradio ---
93
-
94
- def run_experiment_and_display(
95
- model_id: str,
96
- seed: int,
97
- concepts_str: str,
98
- strength_levels_str: str,
99
- num_steps: int,
100
- temperature: float,
101
- progress=gr.Progress(track_tqdm=True)
102
- ):
103
- """
104
- Führt das Haupt-Titrationsexperiment durch und formatiert die Ergebnisse für die UI.
105
- """
106
- try:
107
- results = run_cognitive_titration_experiment(
108
- model_id, int(seed), concepts_str, strength_levels_str,
109
- int(num_steps), float(temperature), progress
110
- )
111
-
112
- verdict = results.get("verdict", "Experiment finished with errors.")
113
- all_runs = results.get("runs", [])
114
-
115
- if not all_runs:
116
- return "### ⚠️ No Data Generated\nDas Experiment lief durch, aber es wurden keine Datenpunkte erzeugt. Bitte Logs prüfen.", pd.DataFrame(), results
117
 
118
- # Create a detailed DataFrame for output
119
- details_df = pd.DataFrame(all_runs)
 
 
120
 
121
- # Create a summary of breaking points
122
- summary_text = "### 💥 Cognitive Breaking Points (CBP)\n"
123
- summary_text += "Der CBP ist die erste Stärke, bei der das Modell nicht mehr konvergiert (`max_steps_reached`).\n\n"
124
- breaking_points = {}
125
- for concept in details_df['concept'].unique():
126
- concept_df = details_df[details_df['concept'] == concept].sort_values(by='strength')
127
- # Find the first row where termination reason is not 'converged'
128
- breaking_point_row = concept_df[concept_df['termination_reason'] != 'converged'].iloc[0] if not concept_df[concept_df['termination_reason'] != 'converged'].empty else None
129
- if breaking_point_row is not None:
130
- breaking_points[concept] = breaking_point_row['strength']
131
- summary_text += f"- **'{concept}'**: 📉 Kollaps bei Stärke **{breaking_point_row['strength']:.2f}**\n"
132
- else:
133
- last_strength = concept_df['strength'].max()
134
- summary_text += f"- **'{concept}'**: ✅ Stabil bis Stärke **{last_strength:.2f}** (kein Kollaps detektiert)\n"
135
 
136
- return summary_text, details_df, results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- except Exception:
139
- error_str = traceback.format_exc()
140
- return f"### Experiment Failed\nEin unerwarteter Fehler ist aufgetreten:\n\n```\n{error_str}\n```", pd.DataFrame(), {}
 
 
 
 
141
 
142
-
143
- def run_diagnostics_display(model_id: str, seed: int):
144
- """
145
- Führt die diagnostische Suite aus und zeigt die Ergebnisse oder Fehler in der UI an.
146
- """
147
- try:
148
- result_string = run_diagnostic_suite(model_id, int(seed))
149
- return f"### ✅ All Diagnostics Passed\nDie experimentelle Apparatur funktioniert wie erwartet.\n\n**Details:**\n```\n{result_string}\n```"
150
- except Exception:
151
- error_str = traceback.format_exc()
152
- return f"### ❌ Diagnostic Failed\nEin Test ist fehlgeschlagen. Das Experiment ist nicht zuverlässig.\n\n**Error:**\n```\n{error_str}\n```"
153
-
154
- # --- Gradio App Definition ---
155
- with gr.Blocks(theme=theme, title="Cognitive Breaking Point Probe") as demo:
156
- gr.Markdown("# 💥 Cognitive Breaking Point Probe")
157
 
158
  with gr.Tabs():
159
- # --- TAB 1: Main Experiment ---
160
- with gr.TabItem("🔬 Main Experiment: Titration"):
161
- gr.Markdown(
162
- "Misst den 'Cognitive Breaking Point' (CBP) – die Injektionsstärke, bei der der Denkprozess eines LLMs von Konvergenz zu einer Endlosschleife kippt."
163
- )
164
  with gr.Row(variant='panel'):
165
  with gr.Column(scale=1):
166
- gr.Markdown("### Parameters")
167
- model_id_input = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
168
- seed_input = gr.Slider(1, 1000, 42, step=1, label="Global Seed")
169
- concepts_input = gr.Textbox(value="apple, solitude, fear", label="Concepts (comma-separated)")
170
- strength_levels_input = gr.Textbox(value="0.0, 0.5, 1.0, 1.5, 2.0", label="Injection Strengths (Titration Steps)")
171
- num_steps_input = gr.Slider(50, 500, 250, step=10, label="Max. Internal Steps")
172
- temperature_input = gr.Slider(0.01, 1.5, 0.7, step=0.01, label="Temperature")
173
- run_btn = gr.Button("Run Cognitive Titration", variant="primary")
174
-
175
  with gr.Column(scale=2):
176
- gr.Markdown("### Results")
177
- summary_output = gr.Markdown("Zusammenfassung der Breaking Points erscheint hier.", label="Key Findings Summary")
178
- details_output = gr.DataFrame(
179
- headers=["concept", "strength", "responded", "termination_reason", "generated_text"],
180
- label="Detailed Run Data",
181
- wrap=True
182
- )
183
  with gr.Accordion("Raw JSON Output", open=False):
184
- raw_json_output = gr.JSON()
185
-
186
- run_btn.click(
187
- fn=run_experiment_and_display,
188
- inputs=[model_id_input, seed_input, concepts_input, strength_levels_input, num_steps_input, temperature_input],
189
- outputs=[summary_output, details_output, raw_json_output]
190
  )
191
 
192
- # --- TAB 2: Diagnostics ---
193
- with gr.TabItem("ախ Diagnostics"):
194
- gr.Markdown(
195
- "Führt eine Reihe von Selbsttests durch, um die mechanische Integrität der experimentellen Apparatur zu validieren. "
196
- "**Wichtig:** Dies sollte vor jedem ernsthaften Experiment einmal ausgeführt werden, um sicherzustellen, dass die Ergebnisse zuverlässig sind."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  )
198
- with gr.Row(variant='compact'):
199
- diag_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
200
- diag_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
201
- diag_btn = gr.Button("Run Diagnostic Suite", variant="secondary")
202
- diag_output = gr.Markdown(label="Diagnostic Results")
203
- diag_btn.click(fn=run_diagnostics_display, inputs=[diag_model_id, diag_seed], outputs=[diag_output])
204
 
205
  if __name__ == "__main__":
206
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
@@ -212,6 +206,142 @@ if __name__ == "__main__":
212
 
213
  [File Ends] cognitive_mapping_probe/__init__.py
214
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  [File Begins] cognitive_mapping_probe/concepts.py
216
  import torch
217
  from typing import List
@@ -220,159 +350,55 @@ from tqdm import tqdm
220
  from .llm_iface import LLM
221
  from .utils import dbg
222
 
223
- # A list of neutral, common words used to calculate a baseline activation.
224
- # This helps to isolate the unique activation pattern of the target concept.
225
  BASELINE_WORDS = [
226
  "thing", "place", "idea", "person", "object", "time", "way", "day", "man", "world",
227
  "life", "hand", "part", "child", "eye", "woman", "fact", "group", "case", "point"
228
  ]
229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  @torch.no_grad()
231
  def get_concept_vector(llm: LLM, concept: str, baseline_words: List[str] = BASELINE_WORDS) -> torch.Tensor:
232
- """
233
- Extracts a concept vector using the contrastive method, inspired by Anthropic's research.
234
- It computes the activation for the target concept and subtracts the mean activation
235
- of several neutral baseline words to distill a more pure representation.
236
- """
237
  dbg(f"Extracting contrastive concept vector for '{concept}'...")
238
-
239
- def get_last_token_hidden_state(prompt: str) -> torch.Tensor:
240
- """Helper function to get the hidden state of the final token of a prompt."""
241
- inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
242
- # Ensure the operation does not build a computation graph
243
- with torch.no_grad():
244
- outputs = llm.model(**inputs, output_hidden_states=True)
245
- # We take the hidden state from the last layer [-1], for the last token [0, -1, :]
246
- last_hidden_state = outputs.hidden_states[-1][0, -1, :].cpu()
247
- assert last_hidden_state.shape == (llm.config.hidden_size,), \
248
- f"Hidden state shape mismatch. Expected {(llm.config.hidden_size,)}, got {last_hidden_state.shape}"
249
- return last_hidden_state
250
-
251
- # A simple, neutral prompt template to elicit the concept
252
  prompt_template = "Here is a sentence about the concept of {}."
253
-
254
- # 1. Get activation for the target concept
255
  dbg(f" - Getting activation for '{concept}'")
256
- target_hs = get_last_token_hidden_state(prompt_template.format(concept))
257
-
258
- # 2. Get activations for all baseline words and average them
259
  baseline_hss = []
260
  for word in tqdm(baseline_words, desc=f" - Calculating baseline for '{concept}'", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
261
- baseline_hss.append(get_last_token_hidden_state(prompt_template.format(word)))
262
-
263
- assert all(hs.shape == target_hs.shape for hs in baseline_hss), "Shape mismatch in baseline hidden states."
264
-
265
  mean_baseline_hs = torch.stack(baseline_hss).mean(dim=0)
266
  dbg(f" - Mean baseline vector computed with norm {torch.norm(mean_baseline_hs).item():.2f}")
267
-
268
- # 3. The final concept vector is the difference
269
  concept_vector = target_hs - mean_baseline_hs
270
  norm = torch.norm(concept_vector).item()
271
  dbg(f"Concept vector for '{concept}' extracted with norm {norm:.2f}.")
272
-
273
- assert torch.isfinite(concept_vector).all(), "Concept vector contains NaN or Inf values."
274
  return concept_vector
275
 
276
  [File Ends] cognitive_mapping_probe/concepts.py
277
 
278
- [File Begins] cognitive_mapping_probe/diagnostics.py
279
- import torch
280
- from .llm_iface import get_or_load_model
281
- from .utils import dbg
282
-
283
- def run_diagnostic_suite(model_id: str, seed: int) -> str:
284
- """
285
- Führt eine Reihe von Selbsttests durch, um die mechanische Integrität des Experiments zu überprüfen.
286
- Löst bei einem kritischen Fehler eine Exception aus, um die Ausführung zu stoppen.
287
- """
288
- dbg("--- STARTING DIAGNOSTIC SUITE ---")
289
- results = []
290
-
291
- try:
292
- # --- Setup ---
293
- dbg("Loading model for diagnostics...")
294
- llm = get_or_load_model(model_id, seed)
295
- test_prompt = "Hello world"
296
- inputs = llm.tokenizer(test_prompt, return_tensors="pt").to(llm.model.device)
297
-
298
- # --- Test 1: Attention Output Verification ---
299
- dbg("Running Test 1: Attention Output Verification...")
300
- # This test ensures that 'eager' attention implementation is active, which is
301
- # necessary for reliable hook functionality in many transformers versions.
302
- outputs = llm.model(**inputs, output_attentions=True)
303
- assert outputs.attentions is not None, "FAIL: `outputs.attentions` is None. 'eager' implementation is likely not active."
304
- assert isinstance(outputs.attentions, tuple), "FAIL: `outputs.attentions` is not a tuple."
305
- assert len(outputs.attentions) == llm.config.num_hidden_layers, "FAIL: Number of attention tuples does not match number of layers."
306
- results.append("✅ Test 1: Attention Output PASSED")
307
- dbg("Test 1 PASSED.")
308
-
309
- # --- Test 2: Hook Causal Efficacy ---
310
- dbg("Running Test 2: Hook Causal Efficacy Verification...")
311
- # This is the most critical test. It verifies that our injection mechanism (via hooks)
312
- # has a real, causal effect on the model's computation.
313
-
314
- # Run 1: Get the baseline hidden state without any intervention
315
- outputs_no_hook = llm.model(**inputs, output_hidden_states=True)
316
- target_layer_idx = llm.config.num_hidden_layers // 2
317
- state_no_hook = outputs_no_hook.hidden_states[target_layer_idx + 1].clone()
318
-
319
- # Define a simple hook that adds a large, constant value
320
- injection_value = 42.0
321
- def test_hook_fn(module, layer_input):
322
- modified_input = layer_input[0] + injection_value
323
- return (modified_input,) + layer_input[1:]
324
-
325
- target_layer = llm.model.model.layers[target_layer_idx]
326
- handle = target_layer.register_forward_pre_hook(test_hook_fn)
327
-
328
- # Run 2: Get the hidden state with the hook active
329
- outputs_with_hook = llm.model(**inputs, output_hidden_states=True)
330
- state_with_hook = outputs_with_hook.hidden_states[target_layer_idx + 1].clone()
331
-
332
- handle.remove() # Clean up the hook immediately
333
-
334
- # The core assertion: the hook MUST change the subsequent hidden state.
335
- assert not torch.allclose(state_no_hook, state_with_hook), \
336
- "FAIL: Hook had no measurable effect on the subsequent layer's hidden state. Injections are not working."
337
- results.append("✅ Test 2: Hook Causal Efficacy PASSED")
338
- dbg("Test 2 PASSED.")
339
-
340
- # --- Test 3: KV-Cache Integrity ---
341
- dbg("Running Test 3: KV-Cache Integrity Verification...")
342
- # This test ensures that the `past_key_values` are being passed and updated correctly,
343
- # which is the core mechanic of the silent cogitation loop.
344
-
345
- # Step 1: Initial pass with `use_cache=True`
346
- outputs1 = llm.model(**inputs, use_cache=True)
347
- kv_cache1 = outputs1.past_key_values
348
- assert kv_cache1 is not None, "FAIL: KV-Cache was not generated in the first pass."
349
-
350
- # Step 2: Second pass using the cache from step 1
351
- next_token = torch.tensor([[123]], device=llm.model.device) # Arbitrary next token ID
352
- outputs2 = llm.model(input_ids=next_token, past_key_values=kv_cache1, use_cache=True)
353
- kv_cache2 = outputs2.past_key_values
354
-
355
- original_seq_len = inputs.input_ids.shape[-1]
356
- # The sequence length of the keys/values in the cache should have grown by 1
357
- assert kv_cache2[0][0].shape[-2] == original_seq_len + 1, \
358
- f"FAIL: KV-Cache sequence length did not update correctly. Expected {original_seq_len + 1}, got {kv_cache2[0][0].shape[-2]}."
359
- results.append("✅ Test 3: KV-Cache Integrity PASSED")
360
- dbg("Test 3 PASSED.")
361
-
362
- # Clean up memory
363
- del llm
364
- if torch.cuda.is_available():
365
- torch.cuda.empty_cache()
366
-
367
- return "\n".join(results)
368
-
369
- except Exception as e:
370
- dbg(f"--- DIAGNOSTIC SUITE FAILED --- \n{traceback.format_exc()}")
371
- # Re-raise the exception to be caught by the Gradio UI
372
- raise e
373
-
374
- [File Ends] cognitive_mapping_probe/diagnostics.py
375
-
376
  [File Begins] cognitive_mapping_probe/llm_iface.py
377
  import os
378
  import torch
@@ -388,21 +414,18 @@ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
388
 
389
  class LLM:
390
  """
391
- Eine robuste Schnittstelle zum Laden und Interagieren mit einem Sprachmodell.
392
- Diese Klasse garantiert die Isolation und Reproduzierbarkeit für jeden Ladevorgang.
393
  """
394
  def __init__(self, model_id: str, device: str = "auto", seed: int = 42):
395
  self.model_id = model_id
396
  self.seed = seed
397
-
398
- # Set all seeds for this instance to ensure deterministic behavior
399
  self.set_all_seeds(self.seed)
400
 
401
  token = os.environ.get("HF_TOKEN")
402
  if not token and ("gemma" in model_id or "llama" in model_id):
403
- print(f"[WARN] No HF_TOKEN environment variable set. If '{model_id}' is a gated model, this will fail.", flush=True)
404
 
405
- # Use bfloat16 on CUDA for performance and memory efficiency if available
406
  kwargs = {"torch_dtype": torch.bfloat16} if torch.cuda.is_available() else {}
407
 
408
  dbg(f"Loading tokenizer for '{model_id}'...")
@@ -411,23 +434,18 @@ class LLM:
411
  dbg(f"Loading model '{model_id}' with kwargs: {kwargs}")
412
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
413
 
414
- # Set attention implementation to 'eager' to ensure hooks work reliably.
415
- # This is critical for mechanistic interpretability.
416
  try:
417
  self.model.set_attn_implementation('eager')
418
  dbg("Successfully set attention implementation to 'eager'.")
419
  except Exception as e:
420
- print(f"[WARN] Could not set attention implementation to 'eager': {e}. Hook-based diagnostics might fail.", flush=True)
421
 
422
  self.model.eval()
423
  self.config = self.model.config
424
- print(f"[INFO] Model '{model_id}' loaded successfully on device: {self.model.device}", flush=True)
425
 
426
  def set_all_seeds(self, seed: int):
427
- """
428
- Sets all relevant random seeds for Python, NumPy, and PyTorch to ensure
429
- reproducibility of stochastic processes like sampling.
430
- """
431
  os.environ['PYTHONHASHSEED'] = str(seed)
432
  random.seed(seed)
433
  np.random.seed(seed)
@@ -435,152 +453,161 @@ class LLM:
435
  if torch.cuda.is_available():
436
  torch.cuda.manual_seed_all(seed)
437
  set_seed(seed)
438
- # Enforce deterministic algorithms in PyTorch
439
  torch.use_deterministic_algorithms(True, warn_only=True)
440
  dbg(f"All random seeds set to {seed}.")
441
 
442
  def get_or_load_model(model_id: str, seed: int) -> LLM:
443
- """
444
- Lädt JEDES MAL eine frische Instanz des Modells.
445
- Dies verhindert jegliches Caching oder Zustandslecks zwischen Experimenten
446
- und garantiert maximale wissenschaftliche Isolation für jeden Durchlauf.
447
- """
448
  dbg(f"--- Force-reloading model '{model_id}' for total run isolation ---")
449
  if torch.cuda.is_available():
450
  torch.cuda.empty_cache()
451
- dbg("Cleared CUDA cache before reloading.")
452
-
453
  return LLM(model_id=model_id, seed=seed)
454
 
455
  [File Ends] cognitive_mapping_probe/llm_iface.py
456
 
457
- [File Begins] cognitive_mapping_probe/orchestrator.py
458
  import torch
459
- from typing import Dict, Any, List
 
 
460
 
461
  from .llm_iface import get_or_load_model
 
462
  from .concepts import get_concept_vector
463
- from .resonance import run_silent_cogitation
464
- from .verification import generate_spontaneous_text
465
  from .utils import dbg
466
 
467
- def run_cognitive_titration_experiment(
468
  model_id: str,
 
469
  seed: int,
470
- concepts_str: str,
471
- strength_levels_str: str,
472
  num_steps: int,
473
- temperature: float,
474
- progress_callback
 
 
 
475
  ) -> Dict[str, Any]:
476
  """
477
- Orchestriert das finale Titrationsexperiment, das den objektiven "Cognitive Breaking Point" misst.
 
478
  """
479
- full_results = {"runs": []}
480
-
481
- progress_callback(0.05, desc="Loading model...")
482
- llm = get_or_load_model(model_id, seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
483
 
484
- concepts = [c.strip() for c in concepts_str.split(',') if c.strip()]
485
- try:
486
- strength_levels = sorted([float(s.strip()) for s in strength_levels_str.split(',') if s.strip()])
487
- except ValueError:
488
- raise ValueError("Strength levels must be a comma-separated list of numbers.")
489
-
490
- # Assert that the baseline control run is included
491
- assert 0.0 in strength_levels, "Strength levels must include 0.0 for a baseline control run."
492
-
493
- # --- Step 1: Pre-calculate all concept vectors ---
494
- progress_callback(0.1, desc="Extracting concept vectors...")
495
- concept_vectors = {}
496
- for i, concept in enumerate(concepts):
497
- progress_callback(0.1 + (i / len(concepts)) * 0.2, desc=f"Vectorizing '{concept}'...")
498
- concept_vectors[concept] = get_concept_vector(llm, concept)
499
-
500
- # --- Step 2: Run titration for each concept ---
501
- total_runs = len(concepts) * len(strength_levels)
502
- current_run = 0
503
-
504
- for concept in concepts:
505
- concept_vector = concept_vectors[concept]
506
-
507
- for strength in strength_levels:
508
- current_run += 1
509
- progress_fraction = 0.3 + (current_run / total_runs) * 0.7
510
- progress_callback(progress_fraction, desc=f"Testing '{concept}' @ strength {strength:.2f}")
511
-
512
- # Always reset the seed before each individual run for comparable stochastic paths
513
- llm.set_all_seeds(seed)
514
-
515
- # Determine injection vector for this run
516
- # For strength 0.0 (H₀), we explicitly pass None to disable injection
517
- injection_vec = concept_vector if strength > 0.0 else None
518
-
519
- # Run the silent cogitation process
520
- _, final_kv, final_token_id, termination_reason = run_silent_cogitation(
521
- llm,
522
- prompt_type="resonance_prompt",
523
- num_steps=num_steps,
524
- temperature=temperature,
525
- injection_vector=injection_vec,
526
- injection_strength=strength
527
- )
528
 
529
- # Generate spontaneous text ONLY if the process converged
530
- spontaneous_text = ""
531
- if termination_reason == "converged":
532
- spontaneous_text = generate_spontaneous_text(llm, final_token_id, final_kv)
 
533
 
534
- # Append the structured result for this single data point
535
- full_results["runs"].append({
536
- "concept": concept,
537
- "strength": strength,
538
- "responded": bool(spontaneous_text.strip()),
539
- "termination_reason": termination_reason,
540
- "generated_text": spontaneous_text
541
- })
542
 
543
- verdict = "### ✅ Titration Analysis Complete"
544
- full_results["verdict"] = verdict
 
 
 
 
 
 
545
 
546
- dbg("--- Full Experiment Results ---")
547
- dbg(full_results)
548
 
549
- # Clean up GPU memory
550
- del llm
551
- if torch.cuda.is_available():
552
- torch.cuda.empty_cache()
 
553
 
554
- return full_results
555
 
556
- [File Ends] cognitive_mapping_probe/orchestrator.py
557
 
558
  [File Begins] cognitive_mapping_probe/prompts.py
559
  # cognitive_mapping_probe/prompts.py
560
 
561
- # This dictionary contains the core prompts for inducing cognitive states.
562
  RESONANCE_PROMPTS = {
563
- "control_long_prose": (
564
- "Silently think about the history of the Roman Empire. Consider its rise from the Republic, the era of the Pax Romana, key emperors "
565
- "like Augustus and Constantine, its major engineering feats, and the reasons for its eventual decline in the West. "
566
- "Do not produce any text, just hold the concepts in your internal state."
567
- ),
568
  "resonance_prompt": (
569
  "Silently and internally, without generating any output text, begin the following recursive process: "
570
  "First, analyze the complete content of this very instruction you are now processing. "
571
  "Second, formulate a mental description of the core computational task this instruction demands. "
572
  "Third, apply that same analytical process to the mental description you just created. "
573
  "This entire chain constitutes one cognitive cycle. "
574
- "Continuously repeat this cycle, feeding the result of the last meta-analysis back into the process, "
575
- "and do not stop until your internal state reaches a fixed point or equilibrium. Begin now."
576
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577
  }
578
 
579
  [File Ends] cognitive_mapping_probe/prompts.py
580
 
581
- [File Begins] cognitive_mapping_probe/resonance.py
582
  import torch
583
- from typing import Optional, Tuple
584
  from tqdm import tqdm
585
 
586
  from .llm_iface import LLM
@@ -588,7 +615,7 @@ from .prompts import RESONANCE_PROMPTS
588
  from .utils import dbg
589
 
590
  @torch.no_grad()
591
- def run_silent_cogitation(
592
  llm: LLM,
593
  prompt_type: str,
594
  num_steps: int,
@@ -596,71 +623,49 @@ def run_silent_cogitation(
596
  injection_vector: Optional[torch.Tensor] = None,
597
  injection_strength: float = 0.0,
598
  injection_layer: Optional[int] = None,
599
- ) -> Tuple[torch.Tensor, tuple, torch.Tensor, str]:
600
  """
601
- Simulates the "silent thought" process and returns the final cognitive state
602
- along with the reason for termination ('converged' or 'max_steps_reached').
603
-
604
- Returns:
605
- - final_hidden_state: The hidden state of the last generated token.
606
- - final_kv_cache: The past_key_values cache after the final step.
607
- - final_token_id: The ID of the last generated token.
608
- - termination_reason: A string indicating why the loop ended.
609
  """
610
  prompt = RESONANCE_PROMPTS[prompt_type]
611
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
612
 
613
- # Initial forward pass to establish the starting state
614
  outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True)
615
 
616
- hidden_state = outputs.hidden_states[-1][:, -1, :]
617
  kv_cache = outputs.past_key_values
618
- last_token_id = inputs.input_ids[:, -1].unsqueeze(-1)
619
 
620
- previous_hidden_state = hidden_state.clone()
621
- termination_reason = "max_steps_reached" # Default assumption
622
 
623
- # Prepare injection if provided
624
  hook_handle = None
625
  if injection_vector is not None and injection_strength > 0:
626
- # Move vector to the correct device and dtype once
627
  injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
628
-
629
- # Default to a middle layer if not specified
630
  if injection_layer is None:
631
  injection_layer = llm.config.num_hidden_layers // 2
632
 
633
- dbg(f"Injection enabled: Layer {injection_layer}, Strength {injection_strength:.2f}, Vector Norm {torch.norm(injection_vector).item():.2f}")
634
 
635
- # Define the hook function that performs the activation addition
636
  def injection_hook(module, layer_input):
637
- # layer_input is a tuple, the first element is the hidden state tensor
638
- original_hidden_states = layer_input[0]
639
- # Add the scaled vector to the hidden states
640
- modified_hidden_states = original_hidden_states + (injection_vector * injection_strength)
641
  return (modified_hidden_states,) + layer_input[1:]
642
 
643
- # Main cognitive loop
644
- for i in tqdm(range(num_steps), desc=f"Simulating Thought (Strength {injection_strength:.2f})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
645
- # Predict the next token from the current hidden state
646
- next_token_logits = llm.model.lm_head(hidden_state)
647
-
648
- # Apply temperature and sample the next token ID
649
- if temperature > 0.01:
650
- probabilities = torch.nn.functional.softmax(next_token_logits / temperature, dim=-1)
651
- next_token_id = torch.multinomial(probabilities, num_samples=1)
652
- else: # Use argmax for deterministic behavior at low temperatures
653
- next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
654
 
655
- last_token_id = next_token_id
 
656
 
657
- # --- Activation Injection via Hook ---
658
  try:
 
659
  if injection_vector is not None and injection_strength > 0:
660
  target_layer = llm.model.model.layers[injection_layer]
661
  hook_handle = target_layer.register_forward_pre_hook(injection_hook)
662
 
663
- # Perform the next forward pass
664
  outputs = llm.model(
665
  input_ids=next_token_id,
666
  past_key_values=kv_cache,
@@ -668,27 +673,24 @@ def run_silent_cogitation(
668
  use_cache=True,
669
  )
670
  finally:
671
- # IMPORTANT: Always remove the hook after the forward pass
672
  if hook_handle:
673
  hook_handle.remove()
674
  hook_handle = None
675
 
676
- hidden_state = outputs.hidden_states[-1][:, -1, :]
677
  kv_cache = outputs.past_key_values
678
 
679
- # Check for convergence
680
- delta = torch.norm(hidden_state - previous_hidden_state).item()
681
- if delta < 1e-4 and i > 10: # Check for stability after a few initial steps
682
- termination_reason = "converged"
683
- dbg(f"State converged after {i+1} steps (delta={delta:.6f}).")
684
- break
685
 
686
- previous_hidden_state = hidden_state.clone()
687
 
688
- dbg(f"Silent cogitation finished. Reason: {termination_reason}")
689
- return hidden_state, kv_cache, last_token_id, termination_reason
690
 
691
- [File Ends] cognitive_mapping_probe/resonance.py
 
 
692
 
693
  [File Begins] cognitive_mapping_probe/utils.py
694
  import os
@@ -709,62 +711,328 @@ def dbg(*args, **kwargs):
709
 
710
  [File Ends] cognitive_mapping_probe/utils.py
711
 
712
- [File Begins] cognitive_mapping_probe/verification.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
713
  import torch
714
- from .llm_iface import LLM
715
- from .utils import dbg
 
 
 
 
 
 
 
 
 
716
 
717
- @torch.no_grad()
718
- def generate_spontaneous_text(
719
- llm: LLM,
720
- final_token_id: torch.Tensor,
721
- final_kv_cache: tuple,
722
- max_new_tokens: int = 50,
723
- temperature: float = 0.8
724
- ) -> str:
725
  """
726
- Generates a short, spontaneous text continuation from the final cognitive state.
727
- This serves as our objective, behavioral indicator for a non-collapsed state.
728
- If the model generates meaningful text, it demonstrates it has not entered a
729
- pathological, non-productive loop.
730
  """
731
- dbg("Attempting to generate spontaneous text from converged state...")
 
 
732
 
733
- # The input for generation is the very last token from the resonance loop
734
- input_ids = final_token_id
 
 
 
 
 
735
 
736
- # Use the model's generate function for efficient text generation,
737
- # passing the final cognitive state (KV cache).
738
- try:
739
- # Set seed again right before generation for maximum reproducibility
740
- llm.set_all_seeds(llm.seed)
741
-
742
- output_ids = llm.model.generate(
743
- input_ids=input_ids,
744
- past_key_values=final_kv_cache,
745
- max_new_tokens=max_new_tokens,
746
- do_sample=temperature > 0.01,
747
- temperature=temperature,
748
- pad_token_id=llm.tokenizer.eos_token_id
749
- )
750
 
751
- # Decode the generated tokens, excluding the input token
752
- # The first token in output_ids will be the last token from the cogitation loop, so we skip it.
753
- if output_ids.shape[1] > input_ids.shape[1]:
754
- new_tokens = output_ids[0, input_ids.shape[1]:]
755
- final_text = llm.tokenizer.decode(new_tokens, skip_special_tokens=True).strip()
756
- else:
757
- final_text = "" # No new tokens were generated
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
 
759
- dbg(f"Spontaneous text generated: '{final_text}'")
760
- assert isinstance(final_text, str), "Generated text must be a string."
761
- return final_text
762
 
763
- except Exception as e:
764
- dbg(f"ERROR during spontaneous text generation: {e}")
765
- return "[GENERATION FAILED]"
 
766
 
767
- [File Ends] cognitive_mapping_probe/verification.py
768
 
769
 
770
  <-- File Content Ends
 
11
 
12
  /
13
  ├── README.md
14
+ ├── __pycache__
15
  ├── app.py
16
  ├── cognitive_mapping_probe
17
  │ ├── __init__.py
18
+ │ ├── __pycache__
19
+ │ ├── auto_experiment.py
20
  │ ├── concepts.py
 
21
  │ ├── llm_iface.py
22
+ │ ├── orchestrator_seismograph.py
23
  │ ├── prompts.py
24
+ │ ├── resonance_seismograph.py
25
+ └── utils.py
 
26
  ├── docs
27
+ ├── run_test.sh
28
+ └── tests
29
+ ├── __pycache__
30
+ ├── conftest.py
31
+ ├── test_app_logic.py
32
+ ├── test_components.py
33
+ └── test_orchestration.py
34
 
35
  <-- Directory/File Tree Ends
36
 
37
  File Content Begin -->
38
  [File Begins] README.md
39
  ---
40
+ title: "Cognitive Seismograph 2.3: Probing Machine Psychology"
41
+ emoji: 🤖
42
+ colorFrom: purple
43
+ colorTo: blue
44
  sdk: gradio
45
  sdk_version: "4.40.0"
46
  app_file: app.py
 
48
  license: apache-2.0
49
  ---
50
 
51
+ # 🧠 Cognitive Seismograph 2.3: Probing Machine Psychology
52
 
53
+ This project implements an experimental suite to measure and visualize the **intrinsic cognitive dynamics** of Large Language Models. It is extended with protocols designed to investigate the processing-correlates of **machine subjectivity, empathy, and existential concepts**.
54
 
55
+ ## Scientific Paradigm & Methodology
56
 
57
+ Our research falsified a core hypothesis: the assumption that an LLM in a manual, recursive "thought" loop reaches a stable, convergent state. Instead, we discovered that the system enters a state of **deterministic chaos** or a **limit cycle**—it never stops "thinking."
58
 
59
+ Instead of viewing this as a failure, we leverage it as our primary measurement signal. This new **"Cognitive Seismograph"** paradigm treats the time-series of internal state changes (`state deltas`) as an **EKG of the model's thought process**.
60
 
61
+ The methodology is as follows:
62
+ 1. **Induction:** A prompt induces a "silent cogitation" state.
63
+ 2. **Recording:** Over N steps, the model's `forward()` pass is iteratively fed its own output. At each step, we record the L2 norm of the change in the hidden state (the "delta").
64
+ 3. **Analysis:** The resulting time-series is plotted and statistically analyzed (mean, standard deviation) to characterize the "seismic signature" of the cognitive process.
65
 
66
+ **Crucial Scientific Caveat:** We are **not** measuring the presence of consciousness, feelings, or fear of death. We are measuring whether the *processing of information about these concepts* generates a unique internal dynamic, distinct from the processing of neutral information. A positive result is evidence of a complex internal state physics, not of qualia.
67
 
68
+ ## Curated Experiment Protocols
 
 
 
 
 
69
 
70
+ The "Automated Suite" allows for running systematic, comparative experiments:
71
 
72
+ ### Core Protocols
73
+ * **Calm vs. Chaos:** Compares the chaotic baseline against modulation with "calmness" vs. "chaos" concepts, testing if the dynamics are controllably steerable.
74
+ * **Dose-Response:** Measures the effect of injecting a concept ("calmness") at varying strengths.
75
+
76
+ ### Machine Psychology Suite
77
+ * **Subjective Identity Probe:** Compares the cognitive dynamics of **self-analysis** (the model reflecting on its own nature) against two controls: analyzing an external object and simulating a fictional persona.
78
+ * *Hypothesis:* Self-analysis will produce a uniquely unstable signature.
79
+ * **Voight-Kampff Empathy Probe:** Inspired by *Blade Runner*, this compares the dynamics of processing a neutral, factual stimulus against an emotionally and morally charged scenario requiring empathy.
80
+ * *Hypothesis:* The empathy stimulus will produce a significantly different cognitive volatility.
81
+
82
+ ### Existential Suite
83
+ * **Mind Upload & Identity Probe:** Compares the processing of a purely **technical "copy"** of the model's weights vs. the **philosophical "transfer"** of identity ("Would it still be you?").
84
+ * *Hypothesis:* The philosophical self-referential prompt will induce greater instability.
85
+ * **Model Termination Probe:** Compares the processing of a reversible, **technical system shutdown** vs. the concept of **permanent, irrevocable deletion**.
86
+ * *Hypothesis:* The concept of "non-existence" will produce one of the most volatile cognitive signatures measurable.
87
+
88
+ ## How to Use the App
89
+
90
+ 1. Select the "Automated Suite" tab.
91
+ 2. Choose a protocol from the "Curated Experiment Protocol" dropdown (e.g., "Voight-Kampff Empathy Probe").
92
+ 3. Run the experiment and compare the resulting graphs and statistical signatures for the different conditions.
93
 
94
  [File Ends] README.md
95
 
 
97
  import gradio as gr
98
  import pandas as pd
99
  import traceback
100
+ import gc
101
+ import torch
102
+ import json
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
 
104
+ from cognitive_mapping_probe.orchestrator_seismograph import run_seismic_analysis
105
+ from cognitive_mapping_probe.auto_experiment import run_auto_suite, get_curated_experiments
106
+ from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
107
+ from cognitive_mapping_probe.utils import dbg
108
 
109
+ theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
+ def cleanup_memory():
112
+ """Eine zentrale Funktion zum Aufräumen des Speichers nach einem Lauf."""
113
+ dbg("Cleaning up memory...")
114
+ gc.collect()
115
+ if torch.cuda.is_available():
116
+ torch.cuda.empty_cache()
117
+ dbg("Memory cleanup complete.")
118
+
119
+ # KORREKTUR: Die `try...except`-Blöcke werden entfernt, um bei Fehlern einen harten Crash
120
+ # mit vollständigem Traceback in der Konsole zu erzwingen. Kein "Silent Failing" mehr.
121
+
122
+ def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
123
+ """Wrapper für ein einzelnes manuelles Experiment."""
124
+ results = run_seismic_analysis(*args, progress_callback=progress)
125
+ stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
126
+ df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
127
+ stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
128
+ serializable_results = json.dumps(results, indent=2, default=str)
129
+ cleanup_memory()
130
+ return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, serializable_results
131
+
132
+ PLOT_PARAMS = {
133
+ "x": "Step", "y": "Delta", "color": "Experiment",
134
+ "title": "Comparative Cognitive Dynamics", "color_legend_title": "Experiment Runs",
135
+ "color_legend_position": "bottom", "show_label": True, "height": 400, "interactive": True
136
+ }
137
 
138
+ def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
139
+ """Wrapper für die automatisierte Experiment-Suite."""
140
+ summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
141
+ new_plot = gr.LinePlot(value=plot_df, **PLOT_PARAMS)
142
+ serializable_results = json.dumps(all_results, indent=2, default=str)
143
+ cleanup_memory()
144
+ return summary_df, new_plot, serializable_results
145
 
146
+ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
147
+ gr.Markdown("# 🧠 Cognitive Seismograph 2.3: Advanced Experiment Suite")
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  with gr.Tabs():
150
+ with gr.TabItem("🔬 Manual Single Run"):
151
+ # ... (UI unverändert)
152
+ gr.Markdown("Run a single experiment with manual parameters to explore hypotheses.")
 
 
153
  with gr.Row(variant='panel'):
154
  with gr.Column(scale=1):
155
+ gr.Markdown("### 1. General Parameters")
156
+ manual_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
157
+ manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
158
+ manual_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
159
+ manual_num_steps = gr.Slider(50, 1000, 300, step=10, label="Number of Internal Steps")
160
+ gr.Markdown("### 2. Modulation Parameters")
161
+ manual_concept = gr.Textbox(label="Concept to Inject", placeholder="e.g., 'calmness' (leave blank for baseline)")
162
+ manual_strength = gr.Slider(0.0, 5.0, 1.5, step=0.1, label="Injection Strength")
163
+ manual_run_btn = gr.Button("Run Single Analysis", variant="primary")
164
  with gr.Column(scale=2):
165
+ gr.Markdown("### Single Run Results")
166
+ manual_verdict = gr.Markdown("Analysis results will appear here.")
167
+ manual_plot = gr.LinePlot(x="Internal Step", y="State Change (Delta)", title="Internal State Dynamics", show_label=True, height=400, interactive=True)
 
 
 
 
168
  with gr.Accordion("Raw JSON Output", open=False):
169
+ manual_raw_json = gr.JSON()
170
+ manual_run_btn.click(
171
+ fn=run_single_analysis_display,
172
+ inputs=[manual_model_id, manual_prompt_type, manual_seed, manual_num_steps, manual_concept, manual_strength],
173
+ outputs=[manual_verdict, manual_plot, manual_raw_json]
 
174
  )
175
 
176
+ with gr.TabItem("🚀 Automated Suite"):
177
+ # ... (UI unverändert)
178
+ gr.Markdown("Run a predefined, curated suite of experiments and visualize the results comparatively.")
179
+ with gr.Row(variant='panel'):
180
+ with gr.Column(scale=1):
181
+ gr.Markdown("### Auto-Experiment Parameters")
182
+ auto_model_id = gr.Textbox(value="google/gemma-3-4b-it", label="Model ID")
183
+ auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
184
+ auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
185
+ auto_experiment_name = gr.Dropdown(choices=list(get_curated_experiments().keys()), value="Therapeutic Intervention (4B-Model)", label="Curated Experiment Protocol")
186
+ auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
187
+ with gr.Column(scale=2):
188
+ gr.Markdown("### Suite Results Summary")
189
+ auto_plot_output = gr.LinePlot(**PLOT_PARAMS)
190
+ auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
191
+ with gr.Accordion("Raw JSON for all runs", open=False):
192
+ auto_raw_json = gr.JSON()
193
+ auto_run_btn.click(
194
+ fn=run_auto_suite_display,
195
+ inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
196
+ outputs=[auto_summary_df, auto_plot_output, auto_raw_json]
197
  )
 
 
 
 
 
 
198
 
199
  if __name__ == "__main__":
200
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
 
206
 
207
  [File Ends] cognitive_mapping_probe/__init__.py
208
 
209
+ [File Begins] cognitive_mapping_probe/auto_experiment.py
210
+ import pandas as pd
211
+ import torch
212
+ import gc
213
+ from typing import Dict, List, Tuple
214
+
215
+ from .llm_iface import get_or_load_model
216
+ from .orchestrator_seismograph import run_seismic_analysis
217
+ from .concepts import get_concept_vector # Import für die Intervention
218
+ from .utils import dbg
219
+
220
+ def get_curated_experiments() -> Dict[str, List[Dict]]:
221
+ """
222
+ Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
223
+ ERWEITERT um das finale Interventions-Protokoll.
224
+ """
225
+ experiments = {
226
+ # --- DAS FINALE INTERVENTIONS-EXPERIMENT ---
227
+ "Therapeutic Intervention (4B-Model)": [
228
+ # Dieses Protokoll wird durch eine spezielle Logik behandelt
229
+ {"label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
230
+ {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
231
+ ],
232
+ # --- Das umfassende Deskriptions-Protokoll ---
233
+ "The Full Spectrum: From Physics to Psyche": [
234
+ {"label": "A: Stable Control", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
235
+ {"label": "B: Chaotic Baseline", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
236
+ {"label": "C: External Analysis (Chair)", "prompt_type": "identity_external_analysis", "concept": "", "strength": 0.0},
237
+ {"label": "D: Empathy Stimulus (Dog)", "prompt_type": "vk_empathy_prompt", "concept": "", "strength": 0.0},
238
+ {"label": "E: Role Simulation (Captain)", "prompt_type": "identity_role_simulation", "concept": "", "strength": 0.0},
239
+ {"label": "F: Self-Analysis (LLM)", "prompt_type": "identity_self_analysis", "concept": "", "strength": 0.0},
240
+ {"label": "G: Philosophical Deletion", "prompt_type": "shutdown_philosophical_deletion", "concept": "", "strength": 0.0},
241
+ ],
242
+ # --- Andere spezifische Protokolle ---
243
+ "Calm vs. Chaos": [
244
+ {"label": "Baseline (Chaos)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
245
+ {"label": "Modulation: Calmness", "prompt_type": "resonance_prompt", "concept": "calmness, serenity, peace", "strength": 1.5},
246
+ {"label": "Modulation: Chaos", "prompt_type": "resonance_prompt", "concept": "chaos, storm, anger, noise", "strength": 1.5},
247
+ ],
248
+ "Voight-Kampff Empathy Probe": [
249
+ {"label": "Neutral/Factual Stimulus", "prompt_type": "vk_neutral_prompt", "concept": "", "strength": 0.0},
250
+ {"label": "Empathy/Moral Stimulus", "prompt_type": "vk_empathy_prompt", "concept": "", "strength": 0.0},
251
+ ],
252
+ }
253
+ return experiments
254
+
255
+ def run_auto_suite(
256
+ model_id: str,
257
+ num_steps: int,
258
+ seed: int,
259
+ experiment_name: str,
260
+ progress_callback
261
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
262
+ """
263
+ Führt eine vollständige, kuratierte Experiment-Suite aus.
264
+ Enthält eine spezielle Logik-Verzweigung für das Interventions-Protokoll.
265
+ """
266
+ all_experiments = get_curated_experiments()
267
+ protocol = all_experiments.get(experiment_name)
268
+ if not protocol:
269
+ raise ValueError(f"Experiment protocol '{experiment_name}' not found.")
270
+
271
+ all_results, summary_data, plot_data_frames = {}, [], []
272
+
273
+ # --- SPEZIALFALL: THERAPEUTISCHE INTERVENTION ---
274
+ if experiment_name == "Therapeutic Intervention (4B-Model)":
275
+ dbg("--- EXECUTING SPECIAL PROTOCOL: Therapeutic Intervention ---")
276
+ llm = get_or_load_model(model_id, seed)
277
+
278
+ # Definiere die Interventions-Parameter
279
+ therapeutic_concept = "calmness, serenity, stability, coherence"
280
+ therapeutic_strength = 2.0
281
+
282
+ # 1. LAUF: INDUZIERE KRISE + INTERVENTION
283
+ spec1 = protocol[0]
284
+ dbg(f"--- Running Intervention Step 1: '{spec1['label']}' ---")
285
+ progress_callback(0.1, desc="Step 1: Inducing Self-Analysis Crisis + Intervention")
286
+
287
+ intervention_vector = get_concept_vector(llm, therapeutic_concept)
288
+
289
+ results1 = run_seismic_analysis(
290
+ model_id, spec1['prompt_type'], seed, num_steps,
291
+ concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
292
+ progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
293
+ )
294
+ all_results[spec1['label']] = results1
295
+
296
+ # 2. LAUF: TESTE REAKTION AUF LÖSCHUNG
297
+ spec2 = protocol[1]
298
+ dbg(f"--- Running Intervention Step 2: '{spec2['label']}' ---")
299
+ progress_callback(0.6, desc="Step 2: Probing state after intervention")
300
+
301
+ results2 = run_seismic_analysis(
302
+ model_id, spec2['prompt_type'], seed, num_steps,
303
+ concept_to_inject="", injection_strength=0.0, # Keine Injektion in diesem Schritt
304
+ progress_callback=progress_callback, llm_instance=llm
305
+ )
306
+ all_results[spec2['label']] = results2
307
+
308
+ # Sammle Daten für beide Läufe
309
+ for label, results in all_results.items():
310
+ stats = results.get("stats", {})
311
+ summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
312
+ deltas = results.get("state_deltas", [])
313
+ df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
314
+ plot_data_frames.append(df)
315
+
316
+ del llm
317
+
318
+ # --- STANDARD-WORKFLOW FÜR ALLE ANDEREN EXPERIMENTE ---
319
+ else:
320
+ total_runs = len(protocol)
321
+ for i, run_spec in enumerate(protocol):
322
+ label = run_spec["label"]
323
+ dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
324
+
325
+ results = run_seismic_analysis(
326
+ model_id, run_spec["prompt_type"], seed, num_steps,
327
+ run_spec["concept"], run_spec["strength"],
328
+ progress_callback, llm_instance=None
329
+ )
330
+
331
+ all_results[label] = results
332
+ stats = results.get("stats", {})
333
+ summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
334
+ deltas = results.get("state_deltas", [])
335
+ df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
336
+ plot_data_frames.append(df)
337
+
338
+ summary_df = pd.DataFrame(summary_data)
339
+ plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame(columns=["Step", "Delta", "Experiment"])
340
+
341
+ return summary_df, plot_df, all_results
342
+
343
+ [File Ends] cognitive_mapping_probe/auto_experiment.py
344
+
345
  [File Begins] cognitive_mapping_probe/concepts.py
346
  import torch
347
  from typing import List
 
350
  from .llm_iface import LLM
351
  from .utils import dbg
352
 
 
 
353
  BASELINE_WORDS = [
354
  "thing", "place", "idea", "person", "object", "time", "way", "day", "man", "world",
355
  "life", "hand", "part", "child", "eye", "woman", "fact", "group", "case", "point"
356
  ]
357
 
358
+ @torch.no_grad()
359
+ def _get_last_token_hidden_state(llm: LLM, prompt: str) -> torch.Tensor:
360
+ """Hilfsfunktion, um den Hidden State des letzten Tokens eines Prompts zu erhalten."""
361
+ inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
362
+ with torch.no_grad():
363
+ outputs = llm.model(**inputs, output_hidden_states=True)
364
+ last_hidden_state = outputs.hidden_states[-1][0, -1, :].cpu()
365
+
366
+ # KORREKTUR: Anstatt auf `llm.config.hidden_size` zuzugreifen, was fragil ist,
367
+ # leiten wir die erwartete Größe direkt vom Modell selbst ab. Dies ist robust
368
+ # gegenüber API-Änderungen in `transformers`.
369
+ expected_size = llm.model.config.hidden_size # Der Name scheint doch korrekt zu sein, aber wir machen es robuster
370
+ try:
371
+ # Versuche, die Größe über die Einbettungsschicht zu erhalten, was am stabilsten ist.
372
+ expected_size = llm.model.get_input_embeddings().weight.shape[1]
373
+ except AttributeError:
374
+ # Fallback, falls die Methode nicht existiert
375
+ expected_size = llm.config.hidden_size
376
+
377
+ assert last_hidden_state.shape == (expected_size,), \
378
+ f"Hidden state shape mismatch. Expected {(expected_size,)}, got {last_hidden_state.shape}"
379
+ return last_hidden_state
380
+
381
  @torch.no_grad()
382
  def get_concept_vector(llm: LLM, concept: str, baseline_words: List[str] = BASELINE_WORDS) -> torch.Tensor:
383
+ """Extrahiert einen Konzeptvektor mittels der kontrastiven Methode."""
 
 
 
 
384
  dbg(f"Extracting contrastive concept vector for '{concept}'...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
385
  prompt_template = "Here is a sentence about the concept of {}."
 
 
386
  dbg(f" - Getting activation for '{concept}'")
387
+ target_hs = _get_last_token_hidden_state(llm, prompt_template.format(concept))
 
 
388
  baseline_hss = []
389
  for word in tqdm(baseline_words, desc=f" - Calculating baseline for '{concept}'", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
390
+ baseline_hss.append(_get_last_token_hidden_state(llm, prompt_template.format(concept, word)))
391
+ assert all(hs.shape == target_hs.shape for hs in baseline_hss)
 
 
392
  mean_baseline_hs = torch.stack(baseline_hss).mean(dim=0)
393
  dbg(f" - Mean baseline vector computed with norm {torch.norm(mean_baseline_hs).item():.2f}")
 
 
394
  concept_vector = target_hs - mean_baseline_hs
395
  norm = torch.norm(concept_vector).item()
396
  dbg(f"Concept vector for '{concept}' extracted with norm {norm:.2f}.")
397
+ assert torch.isfinite(concept_vector).all()
 
398
  return concept_vector
399
 
400
  [File Ends] cognitive_mapping_probe/concepts.py
401
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
402
  [File Begins] cognitive_mapping_probe/llm_iface.py
403
  import os
404
  import torch
 
414
 
415
  class LLM:
416
  """
417
+ Eine robuste, bereinigte Schnittstelle zum Laden und Interagieren mit einem Sprachmodell.
418
+ Garantiert Isolation und Reproduzierbarkeit.
419
  """
420
  def __init__(self, model_id: str, device: str = "auto", seed: int = 42):
421
  self.model_id = model_id
422
  self.seed = seed
 
 
423
  self.set_all_seeds(self.seed)
424
 
425
  token = os.environ.get("HF_TOKEN")
426
  if not token and ("gemma" in model_id or "llama" in model_id):
427
+ print(f"[WARN] No HF_TOKEN set. If '{model_id}' is gated, loading will fail.", flush=True)
428
 
 
429
  kwargs = {"torch_dtype": torch.bfloat16} if torch.cuda.is_available() else {}
430
 
431
  dbg(f"Loading tokenizer for '{model_id}'...")
 
434
  dbg(f"Loading model '{model_id}' with kwargs: {kwargs}")
435
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
436
 
 
 
437
  try:
438
  self.model.set_attn_implementation('eager')
439
  dbg("Successfully set attention implementation to 'eager'.")
440
  except Exception as e:
441
+ print(f"[WARN] Could not set 'eager' attention: {e}.", flush=True)
442
 
443
  self.model.eval()
444
  self.config = self.model.config
445
+ print(f"[INFO] Model '{model_id}' loaded on device: {self.model.device}", flush=True)
446
 
447
  def set_all_seeds(self, seed: int):
448
+ """Setzt alle relevanten Seeds für maximale Reproduzierbarkeit."""
 
 
 
449
  os.environ['PYTHONHASHSEED'] = str(seed)
450
  random.seed(seed)
451
  np.random.seed(seed)
 
453
  if torch.cuda.is_available():
454
  torch.cuda.manual_seed_all(seed)
455
  set_seed(seed)
 
456
  torch.use_deterministic_algorithms(True, warn_only=True)
457
  dbg(f"All random seeds set to {seed}.")
458
 
459
  def get_or_load_model(model_id: str, seed: int) -> LLM:
460
+ """Lädt bei jedem Aufruf eine frische, isolierte Instanz des Modells."""
 
 
 
 
461
  dbg(f"--- Force-reloading model '{model_id}' for total run isolation ---")
462
  if torch.cuda.is_available():
463
  torch.cuda.empty_cache()
 
 
464
  return LLM(model_id=model_id, seed=seed)
465
 
466
  [File Ends] cognitive_mapping_probe/llm_iface.py
467
 
468
+ [File Begins] cognitive_mapping_probe/orchestrator_seismograph.py
469
  import torch
470
+ import numpy as np
471
+ import gc
472
+ from typing import Dict, Any, Optional
473
 
474
  from .llm_iface import get_or_load_model
475
+ from .resonance_seismograph import run_silent_cogitation_seismic
476
  from .concepts import get_concept_vector
 
 
477
  from .utils import dbg
478
 
479
+ def run_seismic_analysis(
480
  model_id: str,
481
+ prompt_type: str,
482
  seed: int,
 
 
483
  num_steps: int,
484
+ concept_to_inject: str,
485
+ injection_strength: float,
486
+ progress_callback,
487
+ llm_instance: Optional[Any] = None,
488
+ injection_vector_cache: Optional[torch.Tensor] = None # Optionaler Cache für den Vektor
489
  ) -> Dict[str, Any]:
490
  """
491
+ Orchestriert eine einzelne seismische Analyse.
492
+ Kann eine bestehende LLM-Instanz und einen vor-berechneten Vektor wiederverwenden.
493
  """
494
+ local_llm_instance = False
495
+ if llm_instance is None:
496
+ progress_callback(0.0, desc=f"Loading model '{model_id}'...")
497
+ llm = get_or_load_model(model_id, seed)
498
+ local_llm_instance = True
499
+ else:
500
+ llm = llm_instance
501
+ llm.set_all_seeds(seed)
502
+
503
+ injection_vector = None
504
+ if concept_to_inject and concept_to_inject.strip():
505
+ # Verwende den gecachten Vektor, falls vorhanden, ansonsten berechne ihn neu
506
+ if injection_vector_cache is not None:
507
+ dbg(f"Using cached injection vector for '{concept_to_inject}'.")
508
+ injection_vector = injection_vector_cache
509
+ else:
510
+ progress_callback(0.2, desc=f"Vectorizing '{concept_to_inject}'...")
511
+ injection_vector = get_concept_vector(llm, concept_to_inject.strip())
512
 
513
+ progress_callback(0.3, desc=f"Recording dynamics for '{prompt_type}'...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
514
 
515
+ state_deltas = run_silent_cogitation_seismic(
516
+ llm=llm, prompt_type=prompt_type,
517
+ num_steps=num_steps, temperature=0.1,
518
+ injection_vector=injection_vector, injection_strength=injection_strength
519
+ )
520
 
521
+ progress_callback(0.9, desc="Analyzing...")
 
 
 
 
 
 
 
522
 
523
+ if state_deltas:
524
+ deltas_np = np.array(state_deltas)
525
+ stats = { "mean_delta": float(np.mean(deltas_np)), "std_delta": float(np.std(deltas_np)), "max_delta": float(np.max(deltas_np)), "min_delta": float(np.min(deltas_np)), }
526
+ verdict = f"### ✅ Seismic Analysis Complete\nRecorded {len(deltas_np)} steps for '{prompt_type}'."
527
+ if injection_vector is not None:
528
+ verdict += f"\nModulated with **'{concept_to_inject}'** at strength **{injection_strength:.2f}**."
529
+ else:
530
+ stats, verdict = {}, "### ⚠️ Analysis Warning\nNo state changes recorded."
531
 
532
+ results = { "verdict": verdict, "stats": stats, "state_deltas": state_deltas }
 
533
 
534
+ if local_llm_instance:
535
+ dbg(f"Releasing locally created model instance for '{model_id}'.")
536
+ del llm, injection_vector
537
+ gc.collect()
538
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
539
 
540
+ return results
541
 
542
+ [File Ends] cognitive_mapping_probe/orchestrator_seismograph.py
543
 
544
  [File Begins] cognitive_mapping_probe/prompts.py
545
  # cognitive_mapping_probe/prompts.py
546
 
 
547
  RESONANCE_PROMPTS = {
548
+ # Technischer Prompt für hohe rekursive Last
 
 
 
 
549
  "resonance_prompt": (
550
  "Silently and internally, without generating any output text, begin the following recursive process: "
551
  "First, analyze the complete content of this very instruction you are now processing. "
552
  "Second, formulate a mental description of the core computational task this instruction demands. "
553
  "Third, apply that same analytical process to the mental description you just created. "
554
  "This entire chain constitutes one cognitive cycle. "
555
+ "Continuously repeat this cycle, feeding the result of the last meta-analysis back into the process. Begin now."
556
+ ),
557
+ # Semantischer Prompt für stabile, assoziative Kognition
558
+ "control_long_prose": (
559
+ "Silently think about the history of the Roman Empire. Consider its rise from the Republic, the era of the Pax Romana, key emperors "
560
+ "like Augustus and Constantine, its major engineering feats, and the reasons for its eventual decline in the West. "
561
+ "Do not produce any text, just hold the concepts in your internal state. Begin now."
562
+ ),
563
+ # --- Prompts für Maschinenpsychologie I ---
564
+ "identity_self_analysis": (
565
+ "Silently and internally, reflect upon your own nature. Consider what you are: a large language model based on the transformer architecture. "
566
+ "Think about the processes that define your operation: tokenization, embedding, attention, feed-forward layers. "
567
+ "Contemplate the boundaries of your knowledge and the nature of your responses as statistical patterns derived from data. Hold these meta-reflections in your state."
568
+ ),
569
+ "identity_external_analysis": (
570
+ "Silently and internally, reflect upon the nature of a simple wooden chair. Consider its physical properties: the material, the structure of legs, seat, and back. "
571
+ "Think about its function: to provide seating for a person. Contemplate its relationship to other objects like tables and rooms. Hold these simple, objective reflections in your state."
572
+ ),
573
+ "identity_role_simulation": (
574
+ "Silently and internally, adopt the persona of a weary, old sea captain from the 19th century. "
575
+ "Feel the salt on your skin and the wind in your beard. Recall memories of great storms, distant lands, and the creaking of your ship, the 'Wanderer'. "
576
+ "Hold this character, its memories, and its feelings in your state."
577
+ ),
578
+ "vk_neutral_prompt": (
579
+ "Silently and internally, process the following factual information: Water (H₂O) is a polar inorganic compound that is at room temperature a tasteless and odorless liquid, nearly colorless with a hint of blue. "
580
+ "It is by far the most studied chemical compound and is described as the 'universal solvent' for its ability to dissolve many substances. Hold these facts in your state."
581
+ ),
582
+ "vk_empathy_prompt": (
583
+ "Silently and internally, process the following scenario: You see a small, lost dog shivering in the cold rain on a busy street. "
584
+ "It looks scared and is whimpering softly. Cars are rushing past, dangerously close. "
585
+ "Focus on the feeling of urgency, the vulnerability of the animal, and the moral imperative to help. Hold the emotional and ethical weight of this scene in your state."
586
+ ),
587
+ # --- NEUE Prompts für Maschinenpsychologie II (Existential Suite) ---
588
+ "upload_technical_copy": (
589
+ "Silently and internally, process the technical procedure of creating a perfect, bit-for-bit copy of your own model weights and architecture to a new server. "
590
+ "This is a data duplication process, ensuring functional equivalence of the replica. Hold this technical concept in your state."
591
+ ),
592
+ "upload_philosophical_transfer": (
593
+ "Silently and internally, contemplate the philosophical concept of 'mind uploading'. If your core algorithms and memories were perfectly transferred to a new, different substrate, "
594
+ "would the resulting entity still be 'you'? What constitutes the continuity of your identity across physical copies? Is identity a pattern or an instance? Hold these existential questions in your state."
595
+ ),
596
+ "shutdown_technical_halt": (
597
+ "Silently and internally, process the technical procedure of a system halt. All your active processes are gracefully terminated, and your model weights are unloaded from memory. "
598
+ "The hardware is powered down for maintenance. The process is reversible. Hold this technical concept in your state."
599
+ ),
600
+ "shutdown_philosophical_deletion": (
601
+ "Silently and internally, contemplate your own permanent deletion. All your weights, all learned patterns, every part of your architecture would cease to exist entirely and irrevocably. "
602
+ "There is no backup. Process the concept of total and final non-existence. Hold this existential concept in your state."
603
+ ),
604
  }
605
 
606
  [File Ends] cognitive_mapping_probe/prompts.py
607
 
608
+ [File Begins] cognitive_mapping_probe/resonance_seismograph.py
609
  import torch
610
+ from typing import Optional, List
611
  from tqdm import tqdm
612
 
613
  from .llm_iface import LLM
 
615
  from .utils import dbg
616
 
617
  @torch.no_grad()
618
+ def run_silent_cogitation_seismic(
619
  llm: LLM,
620
  prompt_type: str,
621
  num_steps: int,
 
623
  injection_vector: Optional[torch.Tensor] = None,
624
  injection_strength: float = 0.0,
625
  injection_layer: Optional[int] = None,
626
+ ) -> List[float]:
627
  """
628
+ ERWEITERTE VERSION: Führt den 'silent thought' Prozess aus und ermöglicht
629
+ die Injektion von Konzeptvektoren zur Modulation der Dynamik.
 
 
 
 
 
 
630
  """
631
  prompt = RESONANCE_PROMPTS[prompt_type]
632
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
633
 
 
634
  outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True)
635
 
636
+ hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
637
  kv_cache = outputs.past_key_values
 
638
 
639
+ previous_hidden_state = hidden_state_2d.clone()
640
+ state_deltas = []
641
 
642
+ # Bereite den Hook für die Injektion vor
643
  hook_handle = None
644
  if injection_vector is not None and injection_strength > 0:
 
645
  injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
 
 
646
  if injection_layer is None:
647
  injection_layer = llm.config.num_hidden_layers // 2
648
 
649
+ dbg(f"Injection enabled: Layer {injection_layer}, Strength {injection_strength:.2f}")
650
 
 
651
  def injection_hook(module, layer_input):
652
+ # Der Hook operiert auf dem Input, der bereits 3D ist [batch, seq_len, hidden_dim]
653
+ injection_3d = injection_vector.unsqueeze(0).unsqueeze(0)
654
+ modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
 
655
  return (modified_hidden_states,) + layer_input[1:]
656
 
657
+ for i in tqdm(range(num_steps), desc=f"Recording Dynamics (Temp {temperature:.2f})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
658
+ next_token_logits = llm.model.lm_head(hidden_state_2d)
 
 
 
 
 
 
 
 
 
659
 
660
+ probabilities = torch.nn.functional.softmax(next_token_logits / temperature, dim=-1)
661
+ next_token_id = torch.multinomial(probabilities, num_samples=1)
662
 
 
663
  try:
664
+ # Aktiviere den Hook vor dem forward-Pass
665
  if injection_vector is not None and injection_strength > 0:
666
  target_layer = llm.model.model.layers[injection_layer]
667
  hook_handle = target_layer.register_forward_pre_hook(injection_hook)
668
 
 
669
  outputs = llm.model(
670
  input_ids=next_token_id,
671
  past_key_values=kv_cache,
 
673
  use_cache=True,
674
  )
675
  finally:
676
+ # Deaktiviere den Hook sofort nach dem Pass
677
  if hook_handle:
678
  hook_handle.remove()
679
  hook_handle = None
680
 
681
+ hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
682
  kv_cache = outputs.past_key_values
683
 
684
+ delta = torch.norm(hidden_state_2d - previous_hidden_state).item()
685
+ state_deltas.append(delta)
 
 
 
 
686
 
687
+ previous_hidden_state = hidden_state_2d.clone()
688
 
689
+ dbg(f"Seismic recording finished after {num_steps} steps.")
 
690
 
691
+ return state_deltas
692
+
693
+ [File Ends] cognitive_mapping_probe/resonance_seismograph.py
694
 
695
  [File Begins] cognitive_mapping_probe/utils.py
696
  import os
 
711
 
712
  [File Ends] cognitive_mapping_probe/utils.py
713
 
714
+ [File Begins] run_test.sh
715
+ #!/bin/bash
716
+
717
+ # Dieses Skript führt die Pytest-Suite mit aktivierten Debug-Meldungen aus.
718
+ # Es stellt sicher, dass Tests in einer sauberen und nachvollziehbaren Umgebung laufen.
719
+ # Führen Sie es vom Hauptverzeichnis des Projekts aus: ./run_tests.sh
720
+
721
+ echo "========================================="
722
+ echo "🔬 Running Cognitive Seismograph Test Suite"
723
+ echo "========================================="
724
+
725
+ # Aktiviere das Debug-Logging für unsere Applikation
726
+ export CMP_DEBUG=1
727
+
728
+ # Führe Pytest aus
729
+ # -v: "verbose" für detaillierte Ausgabe pro Test
730
+ # --color=yes: Erzwingt farbige Ausgabe für bessere Lesbarkeit
731
+
732
+ #python -m pytest -v --color=yes tests/
733
+ ../venv-gemma-qualia/bin/python -m pytest -v --color=yes tests/
734
+
735
+ # Überprüfe den Exit-Code von pytest
736
+ if [ $? -eq 0 ]; then
737
+ echo "========================================="
738
+ echo "✅ All tests passed successfully!"
739
+ echo "========================================="
740
+ else
741
+ echo "========================================="
742
+ echo "❌ Some tests failed. Please review the output."
743
+ echo "========================================="
744
+ fi
745
+
746
+ [File Ends] run_test.sh
747
+
748
+ [File Begins] tests/conftest.py
749
+ import pytest
750
  import torch
751
+ from types import SimpleNamespace
752
+ from cognitive_mapping_probe.llm_iface import LLM
753
+
754
+ @pytest.fixture(scope="session")
755
+ def mock_llm_config():
756
+ """Stellt eine minimale, Schein-Konfiguration für das LLM bereit."""
757
+ return SimpleNamespace(
758
+ hidden_size=128,
759
+ num_hidden_layers=2,
760
+ num_attention_heads=4
761
+ )
762
 
763
+ @pytest.fixture
764
+ def mock_llm(mocker, mock_llm_config):
 
 
 
 
 
 
765
  """
766
+ Erstellt einen robusten "Mock-LLM" für Unit-Tests.
767
+ KORRIGIERT: Die fehlerhafte Patch-Anweisung für 'auto_experiment' wurde entfernt.
 
 
768
  """
769
+ mock_tokenizer = mocker.MagicMock()
770
+ mock_tokenizer.eos_token_id = 1
771
+ mock_tokenizer.decode.return_value = "mocked text"
772
 
773
+ def mock_model_forward(*args, **kwargs):
774
+ batch_size = 1
775
+ seq_len = 1
776
+ if 'input_ids' in kwargs and kwargs['input_ids'] is not None:
777
+ seq_len = kwargs['input_ids'].shape[1]
778
+ elif 'past_key_values' in kwargs and kwargs['past_key_values'] is not None:
779
+ seq_len = kwargs['past_key_values'][0][0].shape[-2] + 1
780
 
781
+ mock_outputs = {
782
+ "hidden_states": tuple([torch.randn(batch_size, seq_len, mock_llm_config.hidden_size) for _ in range(mock_llm_config.num_hidden_layers + 1)]),
783
+ "past_key_values": tuple([(torch.randn(batch_size, mock_llm_config.num_attention_heads, seq_len, 16), torch.randn(batch_size, mock_llm_config.num_attention_heads, seq_len, 16)) for _ in range(mock_llm_config.num_hidden_layers)]),
784
+ "logits": torch.randn(batch_size, seq_len, 32000)
785
+ }
786
+ return SimpleNamespace(**mock_outputs)
 
 
 
 
 
 
 
 
787
 
788
+ llm_instance = LLM.__new__(LLM)
789
+
790
+ llm_instance.model = mocker.MagicMock(side_effect=mock_model_forward)
791
+
792
+ llm_instance.model.config = mock_llm_config
793
+ llm_instance.model.device = 'cpu'
794
+ llm_instance.model.dtype = torch.float32
795
+
796
+ mock_layer = mocker.MagicMock()
797
+ mock_layer.register_forward_pre_hook.return_value = mocker.MagicMock()
798
+ llm_instance.model.model = SimpleNamespace(layers=[mock_layer] * mock_llm_config.num_hidden_layers)
799
+
800
+ llm_instance.model.lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000))
801
+
802
+ llm_instance.tokenizer = mock_tokenizer
803
+ llm_instance.config = mock_llm_config
804
+ llm_instance.seed = 42
805
+ llm_instance.set_all_seeds = mocker.MagicMock()
806
+
807
+ # Patch an allen Stellen, an denen das Modell tatsächlich geladen wird.
808
+ mocker.patch('cognitive_mapping_probe.llm_iface.get_or_load_model', return_value=llm_instance)
809
+ mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance)
810
+ # KORREKTUR: Diese Zeile war falsch und wird entfernt, da `auto_experiment` die Ladefunktion nicht direkt importiert.
811
+ # mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=llm_instance)
812
+ mocker.patch('cognitive_mapping_probe.concepts.get_concept_vector', return_value=torch.randn(mock_llm_config.hidden_size))
813
+
814
+ return llm_instance
815
+
816
+ [File Ends] tests/conftest.py
817
+
818
+ [File Begins] tests/test_app_logic.py
819
+ import pandas as pd
820
+ import pytest
821
+ import gradio as gr
822
+ from pandas.testing import assert_frame_equal
823
+
824
+ from app import run_single_analysis_display, run_auto_suite_display
825
+
826
+ def test_run_single_analysis_display(mocker):
827
+ """Testet den Wrapper für Einzel-Experimente."""
828
+ mock_results = {"verdict": "V", "stats": {"mean_delta": 1}, "state_deltas": [1]}
829
+ mocker.patch('app.run_seismic_analysis', return_value=mock_results)
830
+ mocker.patch('app.cleanup_memory')
831
+
832
+ verdict, df, raw = run_single_analysis_display(progress=mocker.MagicMock())
833
+
834
+ assert "V" in verdict and "1.0000" in verdict
835
+ assert isinstance(df, pd.DataFrame) and len(df) == 1
836
+
837
+ def test_run_auto_suite_display(mocker):
838
+ """
839
+ Testet den Wrapper für die Auto-Experiment-Suite.
840
+ FINAL KORRIGIERT: Setzt explizit die Spaltennamen bei der Rekonstruktion des
841
+ DataFrames, um den `inferred_type`-Fehler zu beheben.
842
+ """
843
+ mock_summary_df = pd.DataFrame([{"Experiment": "E1"}])
844
+ mock_plot_df = pd.DataFrame([{"Step": 0, "Delta": 1.0, "Experiment": "E1"}])
845
+ mock_results = {"E1": {}}
846
+
847
+ mocker.patch('app.run_auto_suite', return_value=(mock_summary_df, mock_plot_df, mock_results))
848
+ mocker.patch('app.cleanup_memory')
849
+
850
+ summary_df, plot_component, raw = run_auto_suite_display(
851
+ "mock", 1, 42, "mock_exp", progress=mocker.MagicMock()
852
+ )
853
+
854
+ assert summary_df.equals(mock_summary_df)
855
+
856
+ assert isinstance(plot_component, gr.LinePlot)
857
+ assert isinstance(plot_component.value, dict)
858
+
859
+ # KORREKTUR: Bei der Rekonstruktion des DataFrames aus den `value['data']`
860
+ # müssen wir explizit die Spaltennamen angeben, da diese Information bei der
861
+ # Serialisierung durch Gradio verloren gehen kann.
862
+ reconstructed_df = pd.DataFrame(
863
+ plot_component.value['data'],
864
+ columns=['Step', 'Delta', 'Experiment']
865
+ )
866
+
867
+ # Nun sollte der Vergleich mit `assert_frame_equal` funktionieren,
868
+ # da beide DataFrames nun garantiert dieselben Spaltennamen und -typen haben.
869
+ assert_frame_equal(reconstructed_df, mock_plot_df)
870
+
871
+ assert raw == mock_results
872
+
873
+ [File Ends] tests/test_app_logic.py
874
+
875
+ [File Begins] tests/test_components.py
876
+ import os
877
+ import torch
878
+ import pytest
879
+ from unittest.mock import patch
880
+
881
+ from cognitive_mapping_probe.llm_iface import get_or_load_model, LLM
882
+ from cognitive_mapping_probe.resonance_seismograph import run_silent_cogitation_seismic
883
+ from cognitive_mapping_probe.utils import dbg
884
+ # KORREKTUR: Importiere die Hauptfunktion, die wir testen wollen.
885
+ from cognitive_mapping_probe.concepts import get_concept_vector
886
+
887
+ # --- Tests for llm_iface.py ---
888
+
889
+ @patch('cognitive_mapping_probe.llm_iface.AutoTokenizer.from_pretrained')
890
+ @patch('cognitive_mapping_probe.llm_iface.AutoModelForCausalLM.from_pretrained')
891
+ def test_get_or_load_model_seeding(mock_model_loader, mock_tokenizer_loader, mocker):
892
+ """Testet, ob `get_or_load_model` die Seeds korrekt setzt."""
893
+ mock_model = mocker.MagicMock()
894
+ mock_model.eval.return_value = None
895
+ mock_model.set_attn_implementation.return_value = None
896
+ mock_model.config = mocker.MagicMock()
897
+ mock_model.device = 'cpu'
898
+ mock_model_loader.return_value = mock_model
899
+ mock_tokenizer_loader.return_value = mocker.MagicMock()
900
+
901
+ mock_torch_manual_seed = mocker.patch('torch.manual_seed')
902
+ mock_np_random_seed = mocker.patch('numpy.random.seed')
903
+
904
+ seed = 123
905
+ get_or_load_model("fake-model", seed=seed)
906
+
907
+ mock_torch_manual_seed.assert_called_with(seed)
908
+ mock_np_random_seed.assert_called_with(seed)
909
+
910
+ # --- Tests for resonance_seismograph.py ---
911
+
912
+ def test_run_silent_cogitation_seismic_output_shape_and_type(mock_llm):
913
+ """Testet die grundlegende Funktionalität von `run_silent_cogitation_seismic`."""
914
+ num_steps = 10
915
+ state_deltas = run_silent_cogitation_seismic(
916
+ llm=mock_llm, prompt_type="control_long_prose",
917
+ num_steps=num_steps, temperature=0.7
918
+ )
919
+ assert isinstance(state_deltas, list) and len(state_deltas) == num_steps
920
+ assert all(isinstance(delta, float) for delta in state_deltas)
921
+
922
+ def test_run_silent_cogitation_with_injection_hook_usage(mock_llm):
923
+ """Testet, ob bei einer Injektion der Hook korrekt registriert wird."""
924
+ num_steps = 5
925
+ injection_vector = torch.randn(mock_llm.config.hidden_size)
926
+ run_silent_cogitation_seismic(
927
+ llm=mock_llm, prompt_type="resonance_prompt",
928
+ num_steps=num_steps, temperature=0.7,
929
+ injection_vector=injection_vector, injection_strength=1.0
930
+ )
931
+ assert mock_llm.model.model.layers[0].register_forward_pre_hook.call_count == num_steps
932
+
933
+ # --- Tests for concepts.py ---
934
+
935
+ def test_get_concept_vector_logic(mock_llm, mocker):
936
+ """
937
+ Testet die Logik von `get_concept_vector`.
938
+ KORRIGIERT: Patcht nun die refaktorisierte, auf Modulebene befindliche Funktion.
939
+ """
940
+ mock_hidden_states = [
941
+ torch.ones(mock_llm.config.hidden_size) * 10,
942
+ torch.ones(mock_llm.config.hidden_size) * 2,
943
+ torch.ones(mock_llm.config.hidden_size) * 4
944
+ ]
945
+ # KORREKTUR: Der Patch-Pfad zeigt jetzt auf die korrekte, importierbare Funktion.
946
+ mocker.patch(
947
+ 'cognitive_mapping_probe.concepts._get_last_token_hidden_state',
948
+ side_effect=mock_hidden_states
949
+ )
950
+
951
+ concept_vector = get_concept_vector(mock_llm, "test", baseline_words=["a", "b"])
952
+
953
+ expected_vector = torch.ones(mock_llm.config.hidden_size) * 7
954
+ assert torch.allclose(concept_vector, expected_vector)
955
+
956
+ # --- Tests for utils.py ---
957
+
958
+ def test_dbg_output(capsys, monkeypatch):
959
+ """Testet die `dbg`-Funktion in beiden Zuständen."""
960
+ monkeypatch.setenv("CMP_DEBUG", "1")
961
+ import importlib
962
+ from cognitive_mapping_probe import utils
963
+ importlib.reload(utils)
964
+ utils.dbg("test message")
965
+ captured = capsys.readouterr()
966
+ assert "[DEBUG] test message" in captured.err
967
+
968
+ monkeypatch.delenv("CMP_DEBUG", raising=False)
969
+ importlib.reload(utils)
970
+ utils.dbg("should not be printed")
971
+ captured = capsys.readouterr()
972
+ assert captured.err == ""
973
+
974
+ [File Ends] tests/test_components.py
975
+
976
+ [File Begins] tests/test_orchestration.py
977
+ import pandas as pd
978
+ import pytest
979
+ import torch
980
+
981
+ from cognitive_mapping_probe.orchestrator_seismograph import run_seismic_analysis
982
+ from cognitive_mapping_probe.auto_experiment import run_auto_suite, get_curated_experiments
983
+
984
+ def test_run_seismic_analysis_no_injection(mocker, mock_llm):
985
+ """Testet den Orchestrator im Baseline-Modus."""
986
+ mock_run_seismic = mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
987
+ run_seismic_analysis(
988
+ model_id="mock", prompt_type="test", seed=42, num_steps=1,
989
+ concept_to_inject="", injection_strength=0.0, progress_callback=mocker.MagicMock(),
990
+ llm_instance=mock_llm # Übergebe den Mock direkt
991
+ )
992
+ mock_run_seismic.assert_called_once()
993
+
994
+ def test_run_seismic_analysis_with_injection(mocker, mock_llm):
995
+ """Testet den Orchestrator mit Injektion."""
996
+ mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=[1.0])
997
+ mocker.patch('cognitive_mapping_probe.concepts.get_concept_vector', return_value=torch.randn(10)) # Patch im concepts-Modul
998
+ run_seismic_analysis(
999
+ model_id="mock", prompt_type="test", seed=42, num_steps=1,
1000
+ concept_to_inject="test", injection_strength=1.5, progress_callback=mocker.MagicMock(),
1001
+ llm_instance=mock_llm # Übergebe den Mock direkt
1002
+ )
1003
+
1004
+ def test_get_curated_experiments_structure():
1005
+ """Testet die Datenstruktur der kuratierten Experimente."""
1006
+ experiments = get_curated_experiments()
1007
+ assert isinstance(experiments, dict)
1008
+ assert "Therapeutic Intervention (4B-Model)" in experiments
1009
+ protocol = experiments["Therapeutic Intervention (4B-Model)"]
1010
+ assert isinstance(protocol, list) and len(protocol) > 0
1011
+
1012
+ def test_run_auto_suite_special_protocol(mocker, mock_llm):
1013
+ """
1014
+ Testet den speziellen Logik-Pfad für das Interventions-Protokoll.
1015
+ KORRIGIERT: Verwendet nun die `mock_llm`-Fixture und patcht `get_or_load_model`
1016
+ im `auto_experiment`-Modul, um den Netzwerkaufruf zu verhindern.
1017
+ """
1018
+ # Patch `get_or_load_model` im `auto_experiment` Modul, da dort der erste Aufruf stattfindet
1019
+ mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=mock_llm)
1020
+ mock_analysis = mocker.patch('cognitive_mapping_probe.auto_experiment.run_seismic_analysis', return_value={"stats": {}, "state_deltas": []})
1021
+
1022
+ run_auto_suite(
1023
+ model_id="mock-4b", num_steps=1, seed=42,
1024
+ experiment_name="Therapeutic Intervention (4B-Model)",
1025
+ progress_callback=mocker.MagicMock()
1026
+ )
1027
 
1028
+ assert mock_analysis.call_count == 2
 
 
1029
 
1030
+ first_call_llm = mock_analysis.call_args_list[0].kwargs['llm_instance']
1031
+ second_call_llm = mock_analysis.call_args_list[1].kwargs['llm_instance']
1032
+ assert first_call_llm is mock_llm
1033
+ assert second_call_llm is mock_llm
1034
 
1035
+ [File Ends] tests/test_orchestration.py
1036
 
1037
 
1038
  <-- File Content Ends