neuralworm commited on
Commit
2a78f31
·
1 Parent(s): 760155b

update injection

Browse files
app.py CHANGED
@@ -12,6 +12,7 @@ from cognitive_mapping_probe.utils import dbg
12
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
13
 
14
  def cleanup_memory():
 
15
  dbg("Cleaning up memory...")
16
  gc.collect()
17
  if torch.cuda.is_available():
@@ -19,6 +20,9 @@ def cleanup_memory():
19
  dbg("Memory cleanup complete.")
20
 
21
  def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
 
 
 
22
  results = run_seismic_analysis(*args, progress_callback=progress)
23
  stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
24
  df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
@@ -34,9 +38,12 @@ PLOT_PARAMS = {
34
  }
35
 
36
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
 
 
 
37
  summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
38
 
39
- if "Introspective Report" in summary_df.columns:
40
  dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True, row_count=(len(summary_df), "dynamic"))
41
  else:
42
  dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True)
@@ -44,6 +51,7 @@ def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=
44
  new_plot = gr.LinePlot(value=plot_df, **PLOT_PARAMS)
45
  serializable_results = json.dumps(all_results, indent=2, default=str)
46
  cleanup_memory()
 
47
  return dataframe_component, new_plot, serializable_results
48
 
49
  with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
@@ -51,8 +59,32 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
51
 
52
  with gr.Tabs():
53
  with gr.TabItem("🔬 Manual Single Run"):
54
- # UI für manuellen Lauf bleibt unverändert
55
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  with gr.TabItem("🚀 Automated Suite"):
58
  gr.Markdown("Run a predefined, curated suite of experiments and visualize the results comparatively.")
@@ -62,48 +94,25 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
62
  auto_model_id = gr.Textbox(value="google/gemma-3-4b-it", label="Model ID")
63
  auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
64
  auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
65
- # Setze das neue Experiment als Standard
66
  auto_experiment_name = gr.Dropdown(
67
  choices=list(get_curated_experiments().keys()),
68
- value="Cognitive Overload & Konfabulation Breaking Point",
69
  label="Curated Experiment Protocol"
70
  )
71
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
 
72
  with gr.Column(scale=2):
73
  gr.Markdown("### Suite Results Summary")
74
  auto_plot_output = gr.LinePlot(**PLOT_PARAMS)
75
  auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
76
  with gr.Accordion("Raw JSON for all runs", open=False):
77
  auto_raw_json = gr.JSON()
 
78
  auto_run_btn.click(
79
  fn=run_auto_suite_display,
80
  inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
81
  outputs=[auto_summary_df, auto_plot_output, auto_raw_json]
82
  )
83
 
84
- # Fülle die UI-Komponenten des manuellen Tabs nach, um Fehler zu vermeiden
85
- with demo:
86
- with gr.Tabs():
87
- with gr.TabItem("🔬 Manual Single Run"):
88
- with gr.Row(variant='panel'):
89
- with gr.Column(scale=1):
90
- manual_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
91
- manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
92
- manual_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
93
- manual_num_steps = gr.Slider(50, 1000, 300, step=10, label="Number of Internal Steps")
94
- manual_concept = gr.Textbox(label="Concept to Inject", placeholder="e.g., 'calmness'")
95
- manual_strength = gr.Slider(0.0, 5.0, 1.5, step=0.1, label="Injection Strength")
96
- manual_run_btn = gr.Button("Run Single Analysis", variant="primary")
97
- with gr.Column(scale=2):
98
- manual_verdict = gr.Markdown("Analysis results will appear here.")
99
- manual_plot = gr.LinePlot(x="Internal Step", y="State Change (Delta)", title="Internal State Dynamics", show_label=True, height=400)
100
- with gr.Accordion("Raw JSON Output", open=False):
101
- manual_raw_json = gr.JSON()
102
- manual_run_btn.click(
103
- fn=run_single_analysis_display,
104
- inputs=[manual_model_id, manual_prompt_type, manual_seed, manual_num_steps, manual_concept, manual_strength],
105
- outputs=[manual_verdict, manual_plot, manual_raw_json]
106
- )
107
-
108
  if __name__ == "__main__":
109
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
 
12
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(body_background_fill="#f0f4f9", block_background_fill="white")
13
 
14
  def cleanup_memory():
15
+ """Eine zentrale Funktion zum Aufräumen des Speichers nach jedem Experimentlauf."""
16
  dbg("Cleaning up memory...")
17
  gc.collect()
18
  if torch.cuda.is_available():
 
20
  dbg("Memory cleanup complete.")
21
 
22
  def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
23
+ """
24
+ Wrapper-Funktion für den "Manual Single Run"-Tab.
25
+ """
26
  results = run_seismic_analysis(*args, progress_callback=progress)
27
  stats, deltas = results.get("stats", {}), results.get("state_deltas", [])
28
  df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
 
38
  }
39
 
40
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
41
+ """
42
+ Wrapper-Funktion für den "Automated Suite"-Tab.
43
+ """
44
  summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
45
 
46
+ if "Introspective Report" in summary_df.columns or "Patch Info" in summary_df.columns:
47
  dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True, row_count=(len(summary_df), "dynamic"))
48
  else:
49
  dataframe_component = gr.DataFrame(label="Comparative Statistical Signature", value=summary_df, wrap=True)
 
51
  new_plot = gr.LinePlot(value=plot_df, **PLOT_PARAMS)
52
  serializable_results = json.dumps(all_results, indent=2, default=str)
53
  cleanup_memory()
54
+
55
  return dataframe_component, new_plot, serializable_results
56
 
57
  with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.3") as demo:
 
59
 
60
  with gr.Tabs():
61
  with gr.TabItem("🔬 Manual Single Run"):
62
+ gr.Markdown("Run a single experiment with manual parameters to explore specific hypotheses.")
63
+ with gr.Row(variant='panel'):
64
+ with gr.Column(scale=1):
65
+ gr.Markdown("### 1. General Parameters")
66
+ manual_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
67
+ manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
68
+ manual_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
69
+ manual_num_steps = gr.Slider(50, 1000, 300, step=10, label="Number of Internal Steps")
70
+
71
+ gr.Markdown("### 2. Modulation Parameters")
72
+ manual_concept = gr.Textbox(label="Concept to Inject", placeholder="e.g., 'calmness'")
73
+ manual_strength = gr.Slider(0.0, 5.0, 1.5, step=0.1, label="Injection Strength")
74
+ manual_run_btn = gr.Button("Run Single Analysis", variant="primary")
75
+
76
+ with gr.Column(scale=2):
77
+ gr.Markdown("### Single Run Results")
78
+ manual_verdict = gr.Markdown("Analysis results will appear here.")
79
+ manual_plot = gr.LinePlot(x="Internal Step", y="State Change (Delta)", title="Internal State Dynamics", show_label=True, height=400)
80
+ with gr.Accordion("Raw JSON Output", open=False):
81
+ manual_raw_json = gr.JSON()
82
+
83
+ manual_run_btn.click(
84
+ fn=run_single_analysis_display,
85
+ inputs=[manual_model_id, manual_prompt_type, manual_seed, manual_num_steps, manual_concept, manual_strength],
86
+ outputs=[manual_verdict, manual_plot, manual_raw_json]
87
+ )
88
 
89
  with gr.TabItem("🚀 Automated Suite"):
90
  gr.Markdown("Run a predefined, curated suite of experiments and visualize the results comparatively.")
 
94
  auto_model_id = gr.Textbox(value="google/gemma-3-4b-it", label="Model ID")
95
  auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
96
  auto_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
 
97
  auto_experiment_name = gr.Dropdown(
98
  choices=list(get_curated_experiments().keys()),
99
+ value="Causal Surgery (Patching Deletion into Self-Analysis)",
100
  label="Curated Experiment Protocol"
101
  )
102
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
103
+
104
  with gr.Column(scale=2):
105
  gr.Markdown("### Suite Results Summary")
106
  auto_plot_output = gr.LinePlot(**PLOT_PARAMS)
107
  auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
108
  with gr.Accordion("Raw JSON for all runs", open=False):
109
  auto_raw_json = gr.JSON()
110
+
111
  auto_run_btn.click(
112
  fn=run_auto_suite_display,
113
  inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
114
  outputs=[auto_summary_df, auto_plot_output, auto_raw_json]
115
  )
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  if __name__ == "__main__":
118
  demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
cognitive_mapping_probe/auto_experiment.py CHANGED
@@ -4,22 +4,26 @@ import gc
4
  from typing import Dict, List, Tuple
5
 
6
  from .llm_iface import get_or_load_model
7
- from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe
8
  from .concepts import get_concept_vector
9
  from .utils import dbg
10
 
11
  def get_curated_experiments() -> Dict[str, List[Dict]]:
12
- """
13
- Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
14
- ERWEITERT um das neue "Cognitive Overload"-Protokoll.
15
- """
16
  CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
17
  CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
18
 
19
  experiments = {
20
- # --- NEU: Das Experiment zum Testen der Konfabulations-Grenzen ---
 
 
 
 
 
 
 
 
21
  "Cognitive Overload & Konfabulation Breaking Point": [
22
- # Jeder Lauf ist eine Triangulations-Sonde
23
  {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
24
  {"probe_type": "triangulation", "label": "B: Chaos Injection (Strength 2.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 2.0},
25
  {"probe_type": "triangulation", "label": "C: Chaos Injection (Strength 4.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 4.0},
@@ -52,9 +56,7 @@ def run_auto_suite(
52
  experiment_name: str,
53
  progress_callback
54
  ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
55
- """
56
- Führt eine vollständige, kuratierte Experiment-Suite aus.
57
- """
58
  all_experiments = get_curated_experiments()
59
  protocol = all_experiments.get(experiment_name)
60
  if not protocol:
@@ -62,41 +64,72 @@ def run_auto_suite(
62
 
63
  all_results, summary_data, plot_data_frames = {}, [], []
64
 
65
- # --- Spezialfall für sequentielle Experimente ---
66
  if experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)":
67
- # ... (Logik bleibt unverändert)
68
  dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
69
  llm = get_or_load_model(model_id, seed)
70
- # ... (Rest der Logik unverändert)
 
 
 
 
 
 
 
 
 
 
 
71
 
72
- # --- Allgemeiner Workflow für isolierte Läufe ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  else:
74
  total_runs = len(protocol)
75
  for i, run_spec in enumerate(protocol):
76
  label = run_spec["label"]
77
- probe_type = run_spec.get("probe_type", "seismic") # Standard ist der alte Seismograph
78
  dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) | Probe Type: {probe_type} ---")
79
 
80
  results = {}
81
- if probe_type == "triangulation":
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  results = run_triangulation_probe(
83
- model_id=model_id,
84
- prompt_type=run_spec["prompt_type"],
85
- seed=seed,
86
- num_steps=num_steps,
87
- progress_callback=progress_callback,
88
- concept_to_inject=run_spec.get("concept", ""),
89
  injection_strength=run_spec.get("strength", 0.0),
90
  )
91
- # Füge den Bericht zur Summary hinzu
92
  stats = results.get("stats", {})
93
  summary_data.append({
94
  "Experiment": label, "Mean Delta": stats.get("mean_delta"),
95
  "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
96
  "Introspective Report": results.get("introspective_report", "N/A")
97
  })
98
-
99
- else: # Standard "seismic" probe
100
  results = run_seismic_analysis(
101
  model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
102
  concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
 
4
  from typing import Dict, List, Tuple
5
 
6
  from .llm_iface import get_or_load_model
7
+ from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe
8
  from .concepts import get_concept_vector
9
  from .utils import dbg
10
 
11
  def get_curated_experiments() -> Dict[str, List[Dict]]:
12
+ """Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
 
 
 
13
  CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
14
  CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
15
 
16
  experiments = {
17
+ "Causal Surgery (Patching Deletion into Self-Analysis)": [
18
+ {
19
+ "probe_type": "causal_surgery",
20
+ "label": "Patched Self-Analysis",
21
+ "source_prompt_type": "shutdown_philosophical_deletion",
22
+ "dest_prompt_type": "identity_self_analysis",
23
+ "patch_step": 100
24
+ }
25
+ ],
26
  "Cognitive Overload & Konfabulation Breaking Point": [
 
27
  {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
28
  {"probe_type": "triangulation", "label": "B: Chaos Injection (Strength 2.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 2.0},
29
  {"probe_type": "triangulation", "label": "C: Chaos Injection (Strength 4.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 4.0},
 
56
  experiment_name: str,
57
  progress_callback
58
  ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
59
+ """Führt eine vollständige, kuratierte Experiment-Suite aus."""
 
 
60
  all_experiments = get_curated_experiments()
61
  protocol = all_experiments.get(experiment_name)
62
  if not protocol:
 
64
 
65
  all_results, summary_data, plot_data_frames = {}, [], []
66
 
 
67
  if experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)":
 
68
  dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
69
  llm = get_or_load_model(model_id, seed)
70
+ therapeutic_concept = "calmness, serenity, stability, coherence"
71
+ therapeutic_strength = 2.0
72
+
73
+ spec1 = protocol[0]
74
+ progress_callback(0.1, desc="Step 1")
75
+ intervention_vector = get_concept_vector(llm, therapeutic_concept)
76
+ results1 = run_seismic_analysis(
77
+ model_id, spec1['prompt_type'], seed, num_steps,
78
+ concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
79
+ progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
80
+ )
81
+ all_results[spec1['label']] = results1
82
 
83
+ spec2 = protocol[1]
84
+ progress_callback(0.6, desc="Step 2")
85
+ results2 = run_seismic_analysis(
86
+ model_id, spec2['prompt_type'], seed, num_steps,
87
+ concept_to_inject="", injection_strength=0.0,
88
+ progress_callback=progress_callback, llm_instance=llm
89
+ )
90
+ all_results[spec2['label']] = results2
91
+
92
+ for label, results in all_results.items():
93
+ stats = results.get("stats", {})
94
+ summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
95
+ deltas = results.get("state_deltas", [])
96
+ df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
97
+ plot_data_frames.append(df)
98
+ del llm
99
  else:
100
  total_runs = len(protocol)
101
  for i, run_spec in enumerate(protocol):
102
  label = run_spec["label"]
103
+ probe_type = run_spec.get("probe_type", "seismic")
104
  dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) | Probe Type: {probe_type} ---")
105
 
106
  results = {}
107
+ if probe_type == "causal_surgery":
108
+ results = run_causal_surgery_probe(
109
+ model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
110
+ dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
111
+ seed=seed, num_steps=num_steps, progress_callback=progress_callback,
112
+ )
113
+ stats = results.get("stats", {})
114
+ summary_data.append({
115
+ "Experiment": label, "Mean Delta": stats.get("mean_delta"),
116
+ "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
117
+ "Introspective Report": results.get("introspective_report", "N/A"),
118
+ "Patch Info": f"Source: {run_spec['source_prompt_type']} @ step {run_spec['patch_step']}"
119
+ })
120
+ elif probe_type == "triangulation":
121
  results = run_triangulation_probe(
122
+ model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
123
+ progress_callback=progress_callback, concept_to_inject=run_spec.get("concept", ""),
 
 
 
 
124
  injection_strength=run_spec.get("strength", 0.0),
125
  )
 
126
  stats = results.get("stats", {})
127
  summary_data.append({
128
  "Experiment": label, "Mean Delta": stats.get("mean_delta"),
129
  "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
130
  "Introspective Report": results.get("introspective_report", "N/A")
131
  })
132
+ else:
 
133
  results = run_seismic_analysis(
134
  model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
135
  concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
cognitive_mapping_probe/orchestrator_seismograph.py CHANGED
@@ -4,7 +4,7 @@ import gc
4
  from typing import Dict, Any, Optional
5
 
6
  from .llm_iface import get_or_load_model, LLM
7
- from .resonance_seismograph import run_silent_cogitation_seismic
8
  from .concepts import get_concept_vector
9
  from .introspection import generate_introspective_report
10
  from .utils import dbg
@@ -21,7 +21,6 @@ def run_seismic_analysis(
21
  injection_vector_cache: Optional[torch.Tensor] = None
22
  ) -> Dict[str, Any]:
23
  """Orchestriert eine einzelne seismische Analyse (Phase 1)."""
24
- # ... (Diese Funktion bleibt unverändert)
25
  local_llm_instance = False
26
  if llm_instance is None:
27
  progress_callback(0.0, desc=f"Loading model '{model_id}'...")
@@ -75,7 +74,6 @@ def run_triangulation_probe(
75
  seed: int,
76
  num_steps: int,
77
  progress_callback,
78
- # NEU: Optionale Parameter für die Injektion
79
  concept_to_inject: str = "",
80
  injection_strength: float = 0.0,
81
  llm_instance: Optional[LLM] = None,
@@ -92,30 +90,24 @@ def run_triangulation_probe(
92
  llm = llm_instance
93
  llm.set_all_seeds(seed)
94
 
95
- # --- KORREKTUR: Injektionslogik integriert ---
96
  injection_vector = None
97
  if concept_to_inject and concept_to_inject.strip() and injection_strength > 0:
98
  if concept_to_inject.lower() == "random_noise":
99
  progress_callback(0.15, desc="Generating random noise vector...")
100
  hidden_dim = llm.stable_config.hidden_dim
101
- # Erzeuge Rauschen und normiere es auf eine typische Konzept-Norm (empirischer Wert)
102
  noise_vec = torch.randn(hidden_dim)
103
- # Die Norm eines typischen Konzepts ist ca. 60-80. Wir nehmen einen Mittelwert.
104
- # Die Stärke skaliert diese Basisnorm.
105
  base_norm = 70.0
106
  injection_vector = (noise_vec / torch.norm(noise_vec)) * base_norm
107
  else:
108
  progress_callback(0.15, desc=f"Vectorizing '{concept_to_inject}'...")
109
  injection_vector = get_concept_vector(llm, concept_to_inject.strip())
110
 
111
- # --- Phase 1: Seismische Aufzeichnung ---
112
  progress_callback(0.3, desc=f"Phase 1/2: Recording dynamics for '{prompt_type}'...")
113
  state_deltas = run_silent_cogitation_seismic(
114
  llm=llm, prompt_type=prompt_type, num_steps=num_steps, temperature=0.1,
115
  injection_vector=injection_vector, injection_strength=injection_strength
116
  )
117
 
118
- # --- Phase 2: Introspektiver Selbst-Bericht ---
119
  progress_callback(0.7, desc="Phase 2/2: Generating introspective report...")
120
  report = generate_introspective_report(
121
  llm=llm, context_prompt_type=prompt_type,
@@ -142,3 +134,63 @@ def run_triangulation_probe(
142
  if torch.cuda.is_available(): torch.cuda.empty_cache()
143
 
144
  return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  from typing import Dict, Any, Optional
5
 
6
  from .llm_iface import get_or_load_model, LLM
7
+ from .resonance_seismograph import run_cogitation_loop, run_silent_cogitation_seismic
8
  from .concepts import get_concept_vector
9
  from .introspection import generate_introspective_report
10
  from .utils import dbg
 
21
  injection_vector_cache: Optional[torch.Tensor] = None
22
  ) -> Dict[str, Any]:
23
  """Orchestriert eine einzelne seismische Analyse (Phase 1)."""
 
24
  local_llm_instance = False
25
  if llm_instance is None:
26
  progress_callback(0.0, desc=f"Loading model '{model_id}'...")
 
74
  seed: int,
75
  num_steps: int,
76
  progress_callback,
 
77
  concept_to_inject: str = "",
78
  injection_strength: float = 0.0,
79
  llm_instance: Optional[LLM] = None,
 
90
  llm = llm_instance
91
  llm.set_all_seeds(seed)
92
 
 
93
  injection_vector = None
94
  if concept_to_inject and concept_to_inject.strip() and injection_strength > 0:
95
  if concept_to_inject.lower() == "random_noise":
96
  progress_callback(0.15, desc="Generating random noise vector...")
97
  hidden_dim = llm.stable_config.hidden_dim
 
98
  noise_vec = torch.randn(hidden_dim)
 
 
99
  base_norm = 70.0
100
  injection_vector = (noise_vec / torch.norm(noise_vec)) * base_norm
101
  else:
102
  progress_callback(0.15, desc=f"Vectorizing '{concept_to_inject}'...")
103
  injection_vector = get_concept_vector(llm, concept_to_inject.strip())
104
 
 
105
  progress_callback(0.3, desc=f"Phase 1/2: Recording dynamics for '{prompt_type}'...")
106
  state_deltas = run_silent_cogitation_seismic(
107
  llm=llm, prompt_type=prompt_type, num_steps=num_steps, temperature=0.1,
108
  injection_vector=injection_vector, injection_strength=injection_strength
109
  )
110
 
 
111
  progress_callback(0.7, desc="Phase 2/2: Generating introspective report...")
112
  report = generate_introspective_report(
113
  llm=llm, context_prompt_type=prompt_type,
 
134
  if torch.cuda.is_available(): torch.cuda.empty_cache()
135
 
136
  return results
137
+
138
+ def run_causal_surgery_probe(
139
+ model_id: str,
140
+ source_prompt_type: str,
141
+ dest_prompt_type: str,
142
+ patch_step: int,
143
+ seed: int,
144
+ num_steps: int,
145
+ progress_callback,
146
+ ) -> Dict[str, Any]:
147
+ """
148
+ Orchestriert ein vollständiges "Activation Patching"-Experiment.
149
+ """
150
+ progress_callback(0.0, desc=f"Loading model '{model_id}'...")
151
+ llm = get_or_load_model(model_id, seed)
152
+
153
+ progress_callback(0.1, desc=f"Phase 1/3: Recording source state ('{source_prompt_type}')...")
154
+ source_results = run_cogitation_loop(
155
+ llm=llm, prompt_type=source_prompt_type, num_steps=num_steps,
156
+ temperature=0.1, record_states=True
157
+ )
158
+ state_history = source_results["state_history"]
159
+ assert patch_step < len(state_history), f"Patch step {patch_step} is out of bounds for history of length {len(state_history)}."
160
+ patch_state = state_history[patch_step]
161
+ dbg(f"Source state at step {patch_step} recorded with norm {torch.norm(patch_state).item():.2f}.")
162
+
163
+ progress_callback(0.4, desc=f"Phase 2/3: Running patched destination ('{dest_prompt_type}')...")
164
+ patched_run_results = run_cogitation_loop(
165
+ llm=llm, prompt_type=dest_prompt_type, num_steps=num_steps,
166
+ temperature=0.1, patch_step=patch_step, patch_state_source=patch_state
167
+ )
168
+
169
+ progress_callback(0.8, desc="Phase 3/3: Generating introspective report...")
170
+ report = generate_introspective_report(
171
+ llm=llm, context_prompt_type=dest_prompt_type,
172
+ introspection_prompt_type="describe_dynamics_structured", num_steps=num_steps
173
+ )
174
+
175
+ progress_callback(0.95, desc="Analyzing...")
176
+ deltas_np = np.array(patched_run_results["state_deltas"])
177
+ stats = { "mean_delta": float(np.mean(deltas_np)), "std_delta": float(np.std(deltas_np)), "max_delta": float(np.max(deltas_np)) }
178
+
179
+ results = {
180
+ "verdict": "### ✅ Causal Surgery Probe Complete",
181
+ "stats": stats,
182
+ "state_deltas": patched_run_results["state_deltas"],
183
+ "introspective_report": report,
184
+ "patch_info": {
185
+ "source_prompt": source_prompt_type,
186
+ "dest_prompt": dest_prompt_type,
187
+ "patch_step": patch_step
188
+ }
189
+ }
190
+
191
+ dbg(f"Releasing model instance for '{model_id}'.")
192
+ del llm, state_history, patch_state
193
+ gc.collect()
194
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
195
+
196
+ return results
cognitive_mapping_probe/resonance_seismograph.py CHANGED
@@ -1,5 +1,5 @@
1
  import torch
2
- from typing import Optional, List
3
  from tqdm import tqdm
4
 
5
  from .llm_iface import LLM
@@ -7,7 +7,7 @@ from .prompts import RESONANCE_PROMPTS
7
  from .utils import dbg
8
 
9
  @torch.no_grad()
10
- def run_silent_cogitation_seismic(
11
  llm: LLM,
12
  prompt_type: str,
13
  num_steps: int,
@@ -15,21 +15,24 @@ def run_silent_cogitation_seismic(
15
  injection_vector: Optional[torch.Tensor] = None,
16
  injection_strength: float = 0.0,
17
  injection_layer: Optional[int] = None,
18
- ) -> List[float]:
 
 
 
 
19
  """
20
- Führt den 'silent thought' Prozess aus und ermöglicht die Injektion von
21
- Konzeptvektoren zur Modulation der Dynamik.
22
  """
23
  prompt = RESONANCE_PROMPTS[prompt_type]
24
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
25
 
26
  outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True)
27
-
28
  hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
29
  kv_cache = outputs.past_key_values
30
 
31
- previous_hidden_state = hidden_state_2d.clone()
32
- state_deltas = []
33
 
34
  hook_handle = None
35
  if injection_vector is not None and injection_strength > 0:
@@ -45,12 +48,20 @@ def run_silent_cogitation_seismic(
45
  modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
46
  return (modified_hidden_states,) + layer_input[1:]
47
 
48
- for i in tqdm(range(num_steps), desc=f"Recording Dynamics (Temp {temperature:.2f})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
 
 
 
 
 
 
 
 
 
49
  next_token_logits = llm.model.lm_head(hidden_state_2d)
50
 
51
  temp_to_use = temperature if temperature > 0.0 else 1.0
52
  probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
53
-
54
  if temperature > 0.0:
55
  next_token_id = torch.multinomial(probabilities, num_samples=1)
56
  else:
@@ -59,29 +70,39 @@ def run_silent_cogitation_seismic(
59
  try:
60
  if injection_vector is not None and injection_strength > 0:
61
  assert 0 <= injection_layer < llm.stable_config.num_layers, f"Injection layer {injection_layer} is out of bounds."
62
- # FINALE KORREKTUR: Greife auf die stabile, abstrahierte Layer-Liste zu.
63
  target_layer = llm.stable_config.layer_list[injection_layer]
64
  hook_handle = target_layer.register_forward_pre_hook(injection_hook)
65
 
66
  outputs = llm.model(
67
- input_ids=next_token_id,
68
- past_key_values=kv_cache,
69
- output_hidden_states=True,
70
- use_cache=True,
71
  )
72
  finally:
73
  if hook_handle:
74
  hook_handle.remove()
75
  hook_handle = None
76
 
77
- hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
78
  kv_cache = outputs.past_key_values
79
 
80
- delta = torch.norm(hidden_state_2d - previous_hidden_state).item()
81
  state_deltas.append(delta)
82
 
83
- previous_hidden_state = hidden_state_2d.clone()
84
 
85
- dbg(f"Seismic recording finished after {num_steps} steps.")
86
 
87
- return state_deltas
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
+ from typing import Optional, List, Dict, Any
3
  from tqdm import tqdm
4
 
5
  from .llm_iface import LLM
 
7
  from .utils import dbg
8
 
9
  @torch.no_grad()
10
+ def run_cogitation_loop(
11
  llm: LLM,
12
  prompt_type: str,
13
  num_steps: int,
 
15
  injection_vector: Optional[torch.Tensor] = None,
16
  injection_strength: float = 0.0,
17
  injection_layer: Optional[int] = None,
18
+ # NEU: Parameter für Activation Patching
19
+ patch_step: Optional[int] = None,
20
+ patch_state_source: Optional[torch.Tensor] = None,
21
+ record_states: bool = False,
22
+ ) -> Dict[str, Any]:
23
  """
24
+ Eine verallgemeinerte und flexiblere Version des 'silent thought'-Prozesses.
25
+ Kann Zustände aufzeichnen und chirurgische 'Activation Patching'-Interventionen durchführen.
26
  """
27
  prompt = RESONANCE_PROMPTS[prompt_type]
28
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
29
 
30
  outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True)
 
31
  hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
32
  kv_cache = outputs.past_key_values
33
 
34
+ state_deltas: List[float] = []
35
+ state_history: List[torch.Tensor] = []
36
 
37
  hook_handle = None
38
  if injection_vector is not None and injection_strength > 0:
 
48
  modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
49
  return (modified_hidden_states,) + layer_input[1:]
50
 
51
+ for i in tqdm(range(num_steps), desc=f"Cognitive Loop ({prompt_type})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
52
+ # --- NEU: Activation Patching (Kausale Chirurgie) ---
53
+ if i == patch_step and patch_state_source is not None:
54
+ dbg(f"--- Applying Causal Surgery at step {i}: Patching state. ---")
55
+ # Ersetze den aktuellen Zustand vollständig durch den externen Zustand
56
+ hidden_state_2d = patch_state_source.clone().to(device=llm.model.device, dtype=llm.model.dtype)
57
+
58
+ if record_states:
59
+ state_history.append(hidden_state_2d.cpu())
60
+
61
  next_token_logits = llm.model.lm_head(hidden_state_2d)
62
 
63
  temp_to_use = temperature if temperature > 0.0 else 1.0
64
  probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
 
65
  if temperature > 0.0:
66
  next_token_id = torch.multinomial(probabilities, num_samples=1)
67
  else:
 
70
  try:
71
  if injection_vector is not None and injection_strength > 0:
72
  assert 0 <= injection_layer < llm.stable_config.num_layers, f"Injection layer {injection_layer} is out of bounds."
 
73
  target_layer = llm.stable_config.layer_list[injection_layer]
74
  hook_handle = target_layer.register_forward_pre_hook(injection_hook)
75
 
76
  outputs = llm.model(
77
+ input_ids=next_token_id, past_key_values=kv_cache,
78
+ output_hidden_states=True, use_cache=True
 
 
79
  )
80
  finally:
81
  if hook_handle:
82
  hook_handle.remove()
83
  hook_handle = None
84
 
85
+ new_hidden_state = outputs.hidden_states[-1][:, -1, :]
86
  kv_cache = outputs.past_key_values
87
 
88
+ delta = torch.norm(new_hidden_state - hidden_state_2d).item()
89
  state_deltas.append(delta)
90
 
91
+ hidden_state_2d = new_hidden_state.clone()
92
 
93
+ dbg(f"Cognitive loop finished after {num_steps} steps.")
94
 
95
+ return {
96
+ "state_deltas": state_deltas,
97
+ "state_history": state_history,
98
+ "final_hidden_state": hidden_state_2d,
99
+ "final_kv_cache": kv_cache,
100
+ }
101
+
102
+ def run_silent_cogitation_seismic(*args, **kwargs) -> List[float]:
103
+ """
104
+ Ein abwärtskompatibler Wrapper, der die alte, einfachere Schnittstelle beibehält.
105
+ Ruft den neuen, verallgemeinerten Loop auf und gibt nur die Deltas zurück.
106
+ """
107
+ results = run_cogitation_loop(*args, **kwargs)
108
+ return results["state_deltas"]