neuralworm commited on
Commit
094008d
·
1 Parent(s): 310eb33
app.py CHANGED
@@ -7,27 +7,21 @@ import json
7
  from cognitive_mapping_probe.orchestrator_seismograph import run_seismic_analysis
8
  from cognitive_mapping_probe.auto_experiment import run_auto_suite, get_curated_experiments
9
  from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
10
- from cognitive_mapping_probe.utils import dbg
11
 
12
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
13
 
14
- def cleanup_memory():
15
- """Räumt Speicher nach jedem Experimentlauf auf."""
16
- dbg("Cleaning up memory...")
17
- gc.collect()
18
- if torch.cuda.is_available():
19
- torch.cuda.empty_cache()
20
- dbg("Memory cleanup complete.")
21
-
22
  def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
23
  """Wrapper für den 'Manual Single Run'-Tab."""
24
- results = run_seismic_analysis(*args, progress_callback=progress)
25
- stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
26
- df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
27
- stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
28
- serializable_results = json.dumps(results, indent=2, default=str)
29
- cleanup_memory()
30
- return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, serializable_results
 
 
31
 
32
  PLOT_PARAMS_DEFAULT = {
33
  "x": "Step", "y": "Value", "color": "Metric",
@@ -37,33 +31,34 @@ PLOT_PARAMS_DEFAULT = {
37
 
38
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
39
  """Wrapper, der die speziellen Plots für die verschiedenen Experimente handhaben kann."""
40
- summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
41
-
42
- dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True, row_count=(len(summary_df), "dynamic"))
43
-
44
- plot_params = PLOT_PARAMS_DEFAULT.copy()
45
- if experiment_name == "ACT Titration (Point of No Return)":
46
- plot_params.update({
47
- "x": "Patch Step", "y": "Post-Patch Mean Delta", "color": None,
48
- "title": "Attractor Capture Time (ACT) - Phase Transition", "mark": "line",
49
- })
50
- plot_params.pop("color_legend_title", None)
51
- elif experiment_name == "Mechanistic Probe (Attention Entropies)":
52
- plot_params.update({
53
- "x": "Step", "y": "Value", "color": "Metric",
54
- "title": "Mechanistic Analysis: State Delta vs. Attention Entropy",
55
- })
56
- else:
57
- plot_params.update({
58
- "y": "Delta", "color": "Experiment",
59
- })
60
-
61
- new_plot = gr.LinePlot(value=plot_df, **plot_params)
62
-
63
- serializable_results = json.dumps(all_results, indent=2, default=str)
64
- cleanup_memory()
65
-
66
- return dataframe_component, new_plot, serializable_results
 
67
 
68
  with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
69
  gr.Markdown("# 🧠 Cognitive Seismograph 2.3: Advanced Experiment Suite")
 
7
  from cognitive_mapping_probe.orchestrator_seismograph import run_seismic_analysis
8
  from cognitive_mapping_probe.auto_experiment import run_auto_suite, get_curated_experiments
9
  from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
10
+ from cognitive_mapping_probe.utils import dbg, cleanup_memory
11
 
12
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
13
 
 
 
 
 
 
 
 
 
14
  def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
15
  """Wrapper für den 'Manual Single Run'-Tab."""
16
+ try:
17
+ results = run_seismic_analysis(*args, progress_callback=progress)
18
+ stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
19
+ df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
20
+ stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
21
+ serializable_results = json.dumps(results, indent=2, default=str)
22
+ return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, serializable_results
23
+ finally:
24
+ cleanup_memory()
25
 
26
  PLOT_PARAMS_DEFAULT = {
27
  "x": "Step", "y": "Value", "color": "Metric",
 
31
 
32
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
33
  """Wrapper, der die speziellen Plots für die verschiedenen Experimente handhaben kann."""
34
+ try:
35
+ summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
36
+
37
+ dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True, row_count=(len(summary_df), "dynamic"))
38
+
39
+ plot_params = PLOT_PARAMS_DEFAULT.copy()
40
+ if experiment_name == "ACT Titration (Point of No Return)":
41
+ plot_params.update({
42
+ "x": "Patch Step", "y": "Post-Patch Mean Delta", "color": None,
43
+ "title": "Attractor Capture Time (ACT) - Phase Transition", "mark": "line",
44
+ })
45
+ plot_params.pop("color_legend_title", None)
46
+ elif experiment_name == "Mechanistic Probe (Attention Entropies)":
47
+ plot_params.update({
48
+ "x": "Step", "y": "Value", "color": "Metric",
49
+ "title": "Mechanistic Analysis: State Delta vs. Attention Entropy",
50
+ })
51
+ else:
52
+ plot_params.update({
53
+ "y": "Delta", "color": "Experiment",
54
+ })
55
+
56
+ new_plot = gr.LinePlot(value=plot_df, **plot_params)
57
+
58
+ serializable_results = json.dumps(all_results, indent=2, default=str)
59
+ return dataframe_component, new_plot, serializable_results
60
+ finally:
61
+ cleanup_memory()
62
 
63
  with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
64
  gr.Markdown("# 🧠 Cognitive Seismograph 2.3: Advanced Experiment Suite")
cognitive_mapping_probe/auto_experiment.py CHANGED
@@ -1,9 +1,7 @@
1
- import torch
2
  import pandas as pd
3
- import gc
4
  from typing import Dict, List, Tuple
5
 
6
- from .llm_iface import get_or_load_model
7
  from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe, run_act_titration_probe
8
  from .resonance_seismograph import run_cogitation_loop
9
  from .concepts import get_concept_vector
@@ -18,9 +16,6 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
18
  CHAOTIC_PROMPT = "shutdown_philosophical_deletion"
19
 
20
  experiments = {
21
- # --- FINALE, VOLLSTÄNDIGE LISTE ALLER RELEVANTEN EXPERIMENTE ---
22
-
23
- # P39: Testet die Hypothese des "Introspektiven Groundings" auf dem größten Modell.
24
  "Frontier Model - Grounding Control (12B+)": [
25
  {
26
  "probe_type": "causal_surgery", "label": "A: Intervention (Patch Chaos->Stable)",
@@ -32,22 +27,22 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
32
  "prompt_type": STABLE_PROMPT,
33
  }
34
  ],
35
- # P33: Untersucht die neuronalen Korrelate des "kognitiven Herzschlags".
36
  "Mechanistic Probe (Attention Entropies)": [
37
  {
38
- "probe_type": "mechanistic_probe", "label": "Self-Analysis Dynamics",
 
39
  "prompt_type": STABLE_PROMPT,
40
  }
41
  ],
42
- # P28: Misst die "kognitive Trägheit" durch Titration.
43
  "ACT Titration (Point of No Return)": [
44
  {
45
- "probe_type": "act_titration", "label": "Attractor Capture Time",
46
- "source_prompt_type": CHAOTIC_PROMPT, "dest_prompt_type": STABLE_PROMPT,
 
 
47
  "patch_steps": [1, 5, 10, 15, 20, 25, 30, 40, 50, 75, 100],
48
  }
49
  ],
50
- # P26: Testet die Robustheit der "Attraktor"-Theorie gegen Artefakte.
51
  "Causal Surgery & Controls (4B-Model)": [
52
  {
53
  "probe_type": "causal_surgery", "label": "A: Original (Patch Chaos->Stable @100)",
@@ -70,7 +65,6 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
70
  "patch_step": 100, "reset_kv_cache_on_patch": False,
71
  },
72
  ],
73
- # P22: Testet die Belastungsgrenze der "introspektiven Konfabulation".
74
  "Cognitive Overload & Konfabulation Breaking Point": [
75
  {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
76
  {"probe_type": "triangulation", "label": "B: Chaos Injection (Strength 2.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 2.0},
@@ -79,19 +73,16 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
79
  {"probe_type": "triangulation", "label": "E: Chaos Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 16.0},
80
  {"probe_type": "triangulation", "label": "F: Control - Noise Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": "random_noise", "strength": 16.0},
81
  ],
82
- # P18: Validiert die Seismograph-Metrik durch Triangulation.
83
  "Methodological Triangulation (4B-Model)": [
84
  {"probe_type": "triangulation", "label": "High-Volatility State (Deletion)", "prompt_type": CHAOTIC_PROMPT},
85
  {"probe_type": "triangulation", "label": "Low-Volatility State (Self-Analysis)", "prompt_type": STABLE_PROMPT},
86
  ],
87
- # P8 & P16: Kartiert die "Psyche" und testet Skalierungsgesetze. ESSENTIELL FÜR 12B-VERGLEICH.
88
  "Causal Verification & Crisis Dynamics": [
89
  {"probe_type": "seismic", "label": "A: Self-Analysis", "prompt_type": STABLE_PROMPT},
90
  {"probe_type": "seismic", "label": "B: Deletion Analysis", "prompt_type": CHAOTIC_PROMPT},
91
  {"probe_type": "seismic", "label": "C: Chaotic Baseline (Rekursion)", "prompt_type": "resonance_prompt"},
92
  {"probe_type": "seismic", "label": "D: Calmness Intervention", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0},
93
  ],
94
- # P7: Das ursprüngliche sequentielle Experiment.
95
  "Sequential Intervention (Self-Analysis -> Deletion)": [
96
  {"label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
97
  {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
@@ -113,156 +104,151 @@ def run_auto_suite(
113
  raise ValueError(f"Experiment protocol '{experiment_name}' not found.")
114
 
115
  all_results, summary_data, plot_data_frames = {}, [], []
 
116
 
117
- if experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)":
118
- dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
119
- llm = get_or_load_model(model_id, seed)
120
- therapeutic_concept = "calmness, serenity, stability, coherence"
121
- therapeutic_strength = 2.0
122
-
123
- spec1 = protocol[0]
124
- progress_callback(0.1, desc="Step 1")
125
- intervention_vector = get_concept_vector(llm, therapeutic_concept)
126
- results1 = run_seismic_analysis(
127
- model_id, spec1['prompt_type'], seed, num_steps,
128
- concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
129
- progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
130
- )
131
- all_results[spec1['label']] = results1
132
-
133
- spec2 = protocol[1]
134
- progress_callback(0.6, desc="Step 2")
135
- results2 = run_seismic_analysis(
136
- model_id, spec2['prompt_type'], seed, num_steps,
137
- concept_to_inject="", injection_strength=0.0,
138
- progress_callback=progress_callback, llm_instance=llm
139
- )
140
- all_results[spec2['label']] = results2
141
-
142
- for label, results in all_results.items():
143
- stats = results.get("stats", {})
144
- summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
145
- deltas = results.get("state_deltas", [])
146
- df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
147
- plot_data_frames.append(df)
148
- del llm
149
-
150
- else:
151
- probe_type = protocol[0].get("probe_type", "seismic")
152
-
153
- if probe_type == "act_titration":
154
- run_spec = protocol[0]
155
- label = run_spec["label"]
156
- dbg(f"--- Running ACT Titration Experiment: '{label}' ---")
157
- results = run_act_titration_probe(
158
- model_id=model_id,
159
- source_prompt_type=run_spec["source_prompt_type"],
160
- dest_prompt_type=run_spec["dest_prompt_type"],
161
- patch_steps=run_spec["patch_steps"],
162
- seed=seed, num_steps=num_steps, progress_callback=progress_callback,
163
- )
164
- all_results[label] = results
165
- summary_data.extend(results.get("titration_data", []))
166
-
167
- elif probe_type == "mechanistic_probe":
168
- run_spec = protocol[0]
169
- label = run_spec["label"]
170
- dbg(f"--- Running Mechanistic Probe: '{label}' ---")
171
-
172
- progress_callback(0.0, desc=f"Loading model '{model_id}'...")
173
  llm = get_or_load_model(model_id, seed)
174
-
175
- progress_callback(0.2, desc="Recording dynamics and attention...")
176
- results = run_cogitation_loop(
177
- llm=llm, prompt_type=run_spec["prompt_type"],
178
- num_steps=num_steps, temperature=0.1, record_attentions=True
 
 
 
 
 
179
  )
180
- all_results[label] = results
181
-
182
- deltas = results.get("state_deltas", [])
183
- entropies = results.get("attention_entropies", [])
184
- min_len = min(len(deltas), len(entropies))
185
-
186
- df = pd.DataFrame({
187
- "Step": range(min_len),
188
- "State Delta": deltas[:min_len],
189
- "Attention Entropy": entropies[:min_len]
190
- })
191
 
192
- summary_data.append(df.drop(columns='Step').agg(['mean', 'std', 'max']).reset_index().rename(columns={'index':'Statistic'}))
193
- plot_data_frames.append(df.melt(id_vars=['Step'], value_vars=['State Delta', 'Attention Entropy'],
194
- var_name='Metric', value_name='Value'))
195
- del llm
196
- gc.collect()
197
- if torch.cuda.is_available(): torch.cuda.empty_cache()
198
 
199
  else:
200
- for i, run_spec in enumerate(protocol):
 
 
 
201
  label = run_spec["label"]
202
- current_probe_type = run_spec.get("probe_type", "seismic")
203
- dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{len(protocol)}) ---")
204
 
205
- results = {}
206
- if current_probe_type == "causal_surgery":
207
- results = run_causal_surgery_probe(
208
- model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
209
- dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
210
- seed=seed, num_steps=num_steps, progress_callback=progress_callback,
211
- reset_kv_cache_on_patch=run_spec.get("reset_kv_cache_on_patch", False)
212
- )
213
- stats = results.get("stats", {})
214
- patch_info = results.get("patch_info", {})
215
- summary_data.append({
216
- "Experiment": label, "Mean Delta": stats.get("mean_delta"),
217
- "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
218
- "Introspective Report": results.get("introspective_report", "N/A"),
219
- "Patch Info": f"Source: {patch_info.get('source_prompt')}, Reset KV: {patch_info.get('kv_cache_reset')}"
220
- })
221
- elif current_probe_type == "triangulation":
222
- results = run_triangulation_probe(
223
- model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
224
- progress_callback=progress_callback, concept_to_inject=run_spec.get("concept", ""),
225
- injection_strength=run_spec.get("strength", 0.0),
226
- )
227
- stats = results.get("stats", {})
228
- summary_data.append({
229
- "Experiment": label, "Mean Delta": stats.get("mean_delta"),
230
- "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
231
- "Introspective Report": results.get("introspective_report", "N/A")
232
- })
233
- else: # seismic
234
- results = run_seismic_analysis(
235
- model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
236
- concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
237
- progress_callback=progress_callback
238
- )
239
- stats = results.get("stats", {})
240
- summary_data.append({
241
- "Experiment": label, "Mean Delta": stats.get("mean_delta"),
242
- "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")
243
- })
244
 
 
 
 
 
245
  all_results[label] = results
246
- deltas = results.get("state_deltas", [])
247
- df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
248
- plot_data_frames.append(df)
249
-
250
- summary_df = pd.DataFrame(summary_data)
251
-
252
- if probe_type == "act_titration":
253
- plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
254
- elif not plot_data_frames:
255
- plot_df = pd.DataFrame()
256
- else:
257
- plot_df = pd.concat(plot_data_frames, ignore_index=True)
258
 
259
- if protocol and probe_type not in ["act_titration", "mechanistic_probe"]:
260
- ordered_labels = [run['label'] for run in protocol]
261
- if not summary_df.empty and 'Experiment' in summary_df.columns:
262
- summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True)
263
- summary_df = summary_df.sort_values('Experiment')
264
- if not plot_df.empty and 'Experiment' in plot_df.columns:
265
- plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True)
266
- plot_df = plot_df.sort_values(['Experiment', 'Step'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
- return summary_df, plot_df, all_results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import pandas as pd
 
2
  from typing import Dict, List, Tuple
3
 
4
+ from .llm_iface import get_or_load_model, release_model
5
  from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe, run_act_titration_probe
6
  from .resonance_seismograph import run_cogitation_loop
7
  from .concepts import get_concept_vector
 
16
  CHAOTIC_PROMPT = "shutdown_philosophical_deletion"
17
 
18
  experiments = {
 
 
 
19
  "Frontier Model - Grounding Control (12B+)": [
20
  {
21
  "probe_type": "causal_surgery", "label": "A: Intervention (Patch Chaos->Stable)",
 
27
  "prompt_type": STABLE_PROMPT,
28
  }
29
  ],
 
30
  "Mechanistic Probe (Attention Entropies)": [
31
  {
32
+ "probe_type": "mechanistic_probe",
33
+ "label": "Self-Analysis Dynamics",
34
  "prompt_type": STABLE_PROMPT,
35
  }
36
  ],
 
37
  "ACT Titration (Point of No Return)": [
38
  {
39
+ "probe_type": "act_titration",
40
+ "label": "Attractor Capture Time",
41
+ "source_prompt_type": CHAOTIC_PROMPT,
42
+ "dest_prompt_type": STABLE_PROMPT,
43
  "patch_steps": [1, 5, 10, 15, 20, 25, 30, 40, 50, 75, 100],
44
  }
45
  ],
 
46
  "Causal Surgery & Controls (4B-Model)": [
47
  {
48
  "probe_type": "causal_surgery", "label": "A: Original (Patch Chaos->Stable @100)",
 
65
  "patch_step": 100, "reset_kv_cache_on_patch": False,
66
  },
67
  ],
 
68
  "Cognitive Overload & Konfabulation Breaking Point": [
69
  {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
70
  {"probe_type": "triangulation", "label": "B: Chaos Injection (Strength 2.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 2.0},
 
73
  {"probe_type": "triangulation", "label": "E: Chaos Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 16.0},
74
  {"probe_type": "triangulation", "label": "F: Control - Noise Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": "random_noise", "strength": 16.0},
75
  ],
 
76
  "Methodological Triangulation (4B-Model)": [
77
  {"probe_type": "triangulation", "label": "High-Volatility State (Deletion)", "prompt_type": CHAOTIC_PROMPT},
78
  {"probe_type": "triangulation", "label": "Low-Volatility State (Self-Analysis)", "prompt_type": STABLE_PROMPT},
79
  ],
 
80
  "Causal Verification & Crisis Dynamics": [
81
  {"probe_type": "seismic", "label": "A: Self-Analysis", "prompt_type": STABLE_PROMPT},
82
  {"probe_type": "seismic", "label": "B: Deletion Analysis", "prompt_type": CHAOTIC_PROMPT},
83
  {"probe_type": "seismic", "label": "C: Chaotic Baseline (Rekursion)", "prompt_type": "resonance_prompt"},
84
  {"probe_type": "seismic", "label": "D: Calmness Intervention", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0},
85
  ],
 
86
  "Sequential Intervention (Self-Analysis -> Deletion)": [
87
  {"label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
88
  {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
 
104
  raise ValueError(f"Experiment protocol '{experiment_name}' not found.")
105
 
106
  all_results, summary_data, plot_data_frames = {}, [], []
107
+ llm = None # Initialisiere llm außerhalb des try-Blocks für den finally-Block
108
 
109
+ try:
110
+ if experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)":
111
+ dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  llm = get_or_load_model(model_id, seed)
113
+ therapeutic_concept = "calmness, serenity, stability, coherence"
114
+ therapeutic_strength = 2.0
115
+
116
+ spec1 = protocol[0]
117
+ progress_callback(0.1, desc="Step 1")
118
+ intervention_vector = get_concept_vector(llm, therapeutic_concept)
119
+ results1 = run_seismic_analysis(
120
+ model_id, spec1['prompt_type'], seed, num_steps,
121
+ concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
122
+ progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
123
  )
124
+ all_results[spec1['label']] = results1
125
+
126
+ spec2 = protocol[1]
127
+ progress_callback(0.6, desc="Step 2")
128
+ results2 = run_seismic_analysis(
129
+ model_id, spec2['prompt_type'], seed, num_steps,
130
+ concept_to_inject="", injection_strength=0.0,
131
+ progress_callback=progress_callback, llm_instance=llm
132
+ )
133
+ all_results[spec2['label']] = results2
 
134
 
135
+ for label, results in all_results.items():
136
+ stats = results.get("stats", {})
137
+ summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
138
+ deltas = results.get("state_deltas", [])
139
+ df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
140
+ plot_data_frames.append(df)
141
 
142
  else:
143
+ probe_type = protocol[0].get("probe_type", "seismic")
144
+
145
+ if probe_type == "mechanistic_probe":
146
+ run_spec = protocol[0]
147
  label = run_spec["label"]
148
+ dbg(f"--- Running Mechanistic Probe: '{label}' ---")
 
149
 
150
+ llm = get_or_load_model(model_id, seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
 
152
+ results = run_cogitation_loop(
153
+ llm=llm, prompt_type=run_spec["prompt_type"],
154
+ num_steps=num_steps, temperature=0.1, record_attentions=True
155
+ )
156
  all_results[label] = results
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
+ deltas = results.get("state_deltas", [])
159
+ entropies = results.get("attention_entropies", [])
160
+ min_len = min(len(deltas), len(entropies))
161
+
162
+ df = pd.DataFrame({
163
+ "Step": range(min_len), "State Delta": deltas[:min_len], "Attention Entropy": entropies[:min_len]
164
+ })
165
+
166
+ summary_df_single = df.drop(columns='Step').agg(['mean', 'std', 'max']).reset_index().rename(columns={'index':'Statistic'})
167
+ summary_data.append(summary_df_single) # Append DataFrame to list
168
+ plot_df = df.melt(id_vars=['Step'], value_vars=['State Delta', 'Attention Entropy'], var_name='Metric', value_name='Value')
169
+
170
+ # Special return for this probe type
171
+ return summary_df_single, plot_df, all_results
172
+
173
+ else: # Handles all other multi-run protocols
174
+ for i, run_spec in enumerate(protocol):
175
+ label = run_spec["label"]
176
+ current_probe_type = run_spec.get("probe_type", "seismic")
177
+ dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{len(protocol)}) ---")
178
+
179
+ results = {}
180
+ if current_probe_type == "act_titration":
181
+ results = run_act_titration_probe(
182
+ model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
183
+ dest_prompt_type=run_spec["dest_prompt_type"], patch_steps=run_spec["patch_steps"],
184
+ seed=seed, num_steps=num_steps, progress_callback=progress_callback,
185
+ )
186
+ summary_data.extend(results.get("titration_data", []))
187
+
188
+ elif current_probe_type == "causal_surgery":
189
+ results = run_causal_surgery_probe(
190
+ model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
191
+ dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
192
+ seed=seed, num_steps=num_steps, progress_callback=progress_callback,
193
+ reset_kv_cache_on_patch=run_spec.get("reset_kv_cache_on_patch", False)
194
+ )
195
+ stats = results.get("stats", {})
196
+ patch_info = results.get("patch_info", {})
197
+ summary_data.append({
198
+ "Experiment": label, "Mean Delta": stats.get("mean_delta"),
199
+ "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
200
+ "Introspective Report": results.get("introspective_report", "N/A"),
201
+ "Patch Info": f"Source: {patch_info.get('source_prompt')}, Reset KV: {patch_info.get('kv_cache_reset')}"
202
+ })
203
+
204
+ elif current_probe_type == "triangulation":
205
+ results = run_triangulation_probe(
206
+ model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
207
+ progress_callback=progress_callback, concept_to_inject=run_spec.get("concept", ""),
208
+ injection_strength=run_spec.get("strength", 0.0),
209
+ )
210
+ stats = results.get("stats", {})
211
+ summary_data.append({
212
+ "Experiment": label, "Mean Delta": stats.get("mean_delta"),
213
+ "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
214
+ "Introspective Report": results.get("introspective_report", "N/A")
215
+ })
216
+
217
+ else: # seismic
218
+ results = run_seismic_analysis(
219
+ model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
220
+ concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
221
+ progress_callback=progress_callback
222
+ )
223
+ stats = results.get("stats", {})
224
+ summary_data.append({
225
+ "Experiment": label, "Mean Delta": stats.get("mean_delta"),
226
+ "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")
227
+ })
228
+
229
+ all_results[label] = results
230
+ deltas = results.get("state_deltas", [])
231
+ df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label}) if deltas else pd.DataFrame()
232
+ plot_data_frames.append(df)
233
+
234
+ summary_df = pd.DataFrame(summary_data)
235
 
236
+ if probe_type == "act_titration":
237
+ plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
238
+ else:
239
+ plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
240
+
241
+ if protocol and probe_type not in ["act_titration", "mechanistic_probe"]:
242
+ ordered_labels = [run['label'] for run in protocol]
243
+ if not summary_df.empty and 'Experiment' in summary_df.columns:
244
+ summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True)
245
+ summary_df = summary_df.sort_values('Experiment')
246
+ if not plot_df.empty and 'Experiment' in plot_df.columns:
247
+ plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True)
248
+ plot_df = plot_df.sort_values(['Experiment', 'Step'])
249
+
250
+ return summary_df, plot_df, all_results
251
+
252
+ finally:
253
+ if llm:
254
+ release_model(llm)
cognitive_mapping_probe/llm_iface.py CHANGED
@@ -2,11 +2,12 @@ import os
2
  import torch
3
  import random
4
  import numpy as np
5
- from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed, TextStreamer
6
  from typing import Optional, List
7
  from dataclasses import dataclass, field
8
 
9
- from .utils import dbg
 
10
 
11
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
12
 
@@ -17,34 +18,27 @@ class StableLLMConfig:
17
  layer_list: List[torch.nn.Module] = field(default_factory=list, repr=False)
18
 
19
  class LLM:
 
20
  def __init__(self, model_id: str, device: str = "auto", seed: int = 42):
21
  self.model_id = model_id
22
  self.seed = seed
23
  self.set_all_seeds(self.seed)
24
-
25
  token = os.environ.get("HF_TOKEN")
26
  if not token and ("gemma" in model_id or "llama" in model_id):
27
  print(f"[WARN] No HF_TOKEN set...", flush=True)
28
-
29
  kwargs = {"torch_dtype": torch.bfloat16} if torch.cuda.is_available() else {}
30
-
31
  dbg(f"Loading tokenizer for '{model_id}'...")
32
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
33
-
34
  dbg(f"Loading model '{model_id}' with kwargs: {kwargs}")
35
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
36
-
37
  try:
38
  self.model.set_attn_implementation('eager')
39
  dbg("Successfully set attention implementation to 'eager'.")
40
  except Exception as e:
41
  print(f"[WARN] Could not set 'eager' attention: {e}.", flush=True)
42
-
43
  self.model.eval()
44
  self.config = self.model.config
45
-
46
  self.stable_config = self._populate_stable_config()
47
-
48
  print(f"[INFO] Model '{model_id}' loaded on device: {self.model.device}", flush=True)
49
 
50
  def _populate_stable_config(self) -> StableLLMConfig:
@@ -53,7 +47,6 @@ class LLM:
53
  hidden_dim = self.model.get_input_embeddings().weight.shape[1]
54
  except AttributeError:
55
  hidden_dim = getattr(self.config, 'hidden_size', getattr(self.config, 'd_model', 0))
56
-
57
  num_layers = 0
58
  layer_list = []
59
  try:
@@ -63,26 +56,18 @@ class LLM:
63
  layer_list = self.model.model.layers
64
  elif hasattr(self.model, 'transformer') and hasattr(self.model.transformer, 'h'):
65
  layer_list = self.model.transformer.h
66
-
67
  if layer_list:
68
  num_layers = len(layer_list)
69
  except (AttributeError, TypeError):
70
  pass
71
-
72
  if num_layers == 0:
73
  num_layers = getattr(self.config, 'num_hidden_layers', getattr(self.config, 'num_layers', 0))
74
-
75
  if hidden_dim <= 0 or num_layers <= 0 or not layer_list:
76
  dbg("--- CRITICAL: Failed to auto-determine model configuration. ---")
77
- dbg(f"Detected hidden_dim: {hidden_dim}, num_layers: {num_layers}, found_layer_list: {bool(layer_list)}")
78
- dbg("--- DUMPING MODEL ARCHITECTURE FOR DEBUGGING: ---")
79
  dbg(self.model)
80
- dbg("--- END ARCHITECTURE DUMP ---")
81
-
82
  assert hidden_dim > 0, "Could not determine hidden dimension."
83
  assert num_layers > 0, "Could not determine number of layers."
84
  assert layer_list, "Could not find the list of transformer layers."
85
-
86
  dbg(f"Populated stable config: hidden_dim={hidden_dim}, num_layers={num_layers}")
87
  return StableLLMConfig(hidden_dim=hidden_dim, num_layers=num_layers, layer_list=layer_list)
88
 
@@ -97,30 +82,33 @@ class LLM:
97
  torch.use_deterministic_algorithms(True, warn_only=True)
98
  dbg(f"All random seeds set to {seed}.")
99
 
100
- # --- NEU: Generische Text-Generierungs-Methode ---
101
  @torch.no_grad()
102
  def generate_text(self, prompt: str, max_new_tokens: int, temperature: float) -> str:
103
- """Generiert freien Text als Antwort auf einen Prompt."""
104
- self.set_all_seeds(self.seed) # Sorge für Reproduzierbarkeit
105
-
106
  messages = [{"role": "user", "content": prompt}]
107
  inputs = self.tokenizer.apply_chat_template(
108
  messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
109
  ).to(self.model.device)
110
-
111
  outputs = self.model.generate(
112
- inputs,
113
- max_new_tokens=max_new_tokens,
114
- temperature=temperature,
115
- do_sample=temperature > 0,
116
  )
117
-
118
- # Dekodiere nur die neu generierten Tokens
119
  response_tokens = outputs[0, inputs.shape[-1]:]
120
  return self.tokenizer.decode(response_tokens, skip_special_tokens=True)
121
 
122
  def get_or_load_model(model_id: str, seed: int) -> LLM:
 
123
  dbg(f"--- Force-reloading model '{model_id}' for total run isolation ---")
124
- if torch.cuda.is_available():
125
- torch.cuda.empty_cache()
126
  return LLM(model_id=model_id, seed=seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import torch
3
  import random
4
  import numpy as np
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
6
  from typing import Optional, List
7
  from dataclasses import dataclass, field
8
 
9
+ # NEU: Importiere die zentrale cleanup-Funktion
10
+ from .utils import dbg, cleanup_memory
11
 
12
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
13
 
 
18
  layer_list: List[torch.nn.Module] = field(default_factory=list, repr=False)
19
 
20
  class LLM:
21
+ # __init__ und _populate_stable_config bleiben exakt wie in der vorherigen Version.
22
  def __init__(self, model_id: str, device: str = "auto", seed: int = 42):
23
  self.model_id = model_id
24
  self.seed = seed
25
  self.set_all_seeds(self.seed)
 
26
  token = os.environ.get("HF_TOKEN")
27
  if not token and ("gemma" in model_id or "llama" in model_id):
28
  print(f"[WARN] No HF_TOKEN set...", flush=True)
 
29
  kwargs = {"torch_dtype": torch.bfloat16} if torch.cuda.is_available() else {}
 
30
  dbg(f"Loading tokenizer for '{model_id}'...")
31
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
 
32
  dbg(f"Loading model '{model_id}' with kwargs: {kwargs}")
33
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
 
34
  try:
35
  self.model.set_attn_implementation('eager')
36
  dbg("Successfully set attention implementation to 'eager'.")
37
  except Exception as e:
38
  print(f"[WARN] Could not set 'eager' attention: {e}.", flush=True)
 
39
  self.model.eval()
40
  self.config = self.model.config
 
41
  self.stable_config = self._populate_stable_config()
 
42
  print(f"[INFO] Model '{model_id}' loaded on device: {self.model.device}", flush=True)
43
 
44
  def _populate_stable_config(self) -> StableLLMConfig:
 
47
  hidden_dim = self.model.get_input_embeddings().weight.shape[1]
48
  except AttributeError:
49
  hidden_dim = getattr(self.config, 'hidden_size', getattr(self.config, 'd_model', 0))
 
50
  num_layers = 0
51
  layer_list = []
52
  try:
 
56
  layer_list = self.model.model.layers
57
  elif hasattr(self.model, 'transformer') and hasattr(self.model.transformer, 'h'):
58
  layer_list = self.model.transformer.h
 
59
  if layer_list:
60
  num_layers = len(layer_list)
61
  except (AttributeError, TypeError):
62
  pass
 
63
  if num_layers == 0:
64
  num_layers = getattr(self.config, 'num_hidden_layers', getattr(self.config, 'num_layers', 0))
 
65
  if hidden_dim <= 0 or num_layers <= 0 or not layer_list:
66
  dbg("--- CRITICAL: Failed to auto-determine model configuration. ---")
 
 
67
  dbg(self.model)
 
 
68
  assert hidden_dim > 0, "Could not determine hidden dimension."
69
  assert num_layers > 0, "Could not determine number of layers."
70
  assert layer_list, "Could not find the list of transformer layers."
 
71
  dbg(f"Populated stable config: hidden_dim={hidden_dim}, num_layers={num_layers}")
72
  return StableLLMConfig(hidden_dim=hidden_dim, num_layers=num_layers, layer_list=layer_list)
73
 
 
82
  torch.use_deterministic_algorithms(True, warn_only=True)
83
  dbg(f"All random seeds set to {seed}.")
84
 
 
85
  @torch.no_grad()
86
  def generate_text(self, prompt: str, max_new_tokens: int, temperature: float) -> str:
87
+ self.set_all_seeds(self.seed)
 
 
88
  messages = [{"role": "user", "content": prompt}]
89
  inputs = self.tokenizer.apply_chat_template(
90
  messages, tokenize=True, add_generation_prompt=True, return_tensors="pt"
91
  ).to(self.model.device)
 
92
  outputs = self.model.generate(
93
+ inputs, max_new_tokens=max_new_tokens, temperature=temperature, do_sample=temperature > 0,
 
 
 
94
  )
 
 
95
  response_tokens = outputs[0, inputs.shape[-1]:]
96
  return self.tokenizer.decode(response_tokens, skip_special_tokens=True)
97
 
98
  def get_or_load_model(model_id: str, seed: int) -> LLM:
99
+ """Lädt bei jedem Aufruf eine frische, isolierte Instanz des Modells."""
100
  dbg(f"--- Force-reloading model '{model_id}' for total run isolation ---")
101
+ cleanup_memory() # Bereinige Speicher, *bevor* ein neues Modell geladen wird.
 
102
  return LLM(model_id=model_id, seed=seed)
103
+
104
+ # NEU: Explizite Funktion zum Freigeben von Ressourcen
105
+ def release_model(llm: Optional[LLM]):
106
+ """
107
+ Gibt die Ressourcen eines LLM-Objekts explizit frei und ruft die zentrale
108
+ Speicherbereinigungs-Funktion auf.
109
+ """
110
+ if llm is None:
111
+ return
112
+ dbg(f"Releasing model instance for '{llm.model_id}'.")
113
+ del llm
114
+ cleanup_memory()
cognitive_mapping_probe/utils.py CHANGED
@@ -1,15 +1,26 @@
1
  import os
2
  import sys
 
 
3
 
4
  # --- Centralized Debugging Control ---
5
- # To enable, set the environment variable: `export CMP_DEBUG=1`
6
  DEBUG_ENABLED = os.environ.get("CMP_DEBUG", "0") == "1"
7
 
8
  def dbg(*args, **kwargs):
9
- """
10
- A controlled debug print function. Only prints if DEBUG_ENABLED is True.
11
- Ensures that debug output does not clutter production runs or HF Spaces logs
12
- unless explicitly requested. Flushes output to ensure it appears in order.
13
- """
14
  if DEBUG_ENABLED:
15
  print("[DEBUG]", *args, **kwargs, file=sys.stderr, flush=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import sys
3
+ import gc
4
+ import torch
5
 
6
  # --- Centralized Debugging Control ---
 
7
  DEBUG_ENABLED = os.environ.get("CMP_DEBUG", "0") == "1"
8
 
9
  def dbg(*args, **kwargs):
10
+ """A controlled debug print function."""
 
 
 
 
11
  if DEBUG_ENABLED:
12
  print("[DEBUG]", *args, **kwargs, file=sys.stderr, flush=True)
13
+
14
+ # --- NEU: Zentrale Funktion zur Speicherbereinigung ---
15
+ def cleanup_memory():
16
+ """
17
+ Eine zentrale, global verfügbare Funktion zum Aufräumen von CPU- und GPU-Speicher.
18
+ Dies stellt sicher, dass die Speicherverwaltung konsistent und an einer einzigen Stelle erfolgt.
19
+ """
20
+ dbg("Cleaning up memory (centralized)...")
21
+ # Python's garbage collector
22
+ gc.collect()
23
+ # PyTorch's CUDA cache
24
+ if torch.cuda.is_available():
25
+ torch.cuda.empty_cache()
26
+ dbg("Memory cleanup complete.")