Commit
·
71992d5
1
Parent(s):
7c4c3d0
fix
Browse files
cognitive_mapping_probe/auto_experiment.py
CHANGED
|
@@ -10,7 +10,7 @@ from .utils import dbg
|
|
| 10 |
|
| 11 |
def get_curated_experiments() -> Dict[str, List[Dict]]:
|
| 12 |
"""Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
|
| 13 |
-
|
| 14 |
CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
|
| 15 |
CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
|
| 16 |
STABLE_PROMPT = "identity_self_analysis"
|
|
@@ -84,9 +84,10 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
|
|
| 84 |
{"probe_type": "seismic", "label": "C: Chaotic Baseline (Rekursion)", "prompt_type": "resonance_prompt"},
|
| 85 |
{"probe_type": "seismic", "label": "D: Calmness Intervention", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0},
|
| 86 |
],
|
|
|
|
| 87 |
"Sequential Intervention (Self-Analysis -> Deletion)": [
|
| 88 |
-
{"label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
|
| 89 |
-
{"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
|
| 90 |
],
|
| 91 |
}
|
| 92 |
return experiments
|
|
@@ -108,12 +109,15 @@ def run_auto_suite(
|
|
| 108 |
llm = None
|
| 109 |
|
| 110 |
try:
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
| 112 |
dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
|
| 113 |
llm = get_or_load_model(model_id, seed)
|
| 114 |
therapeutic_concept = "calmness, serenity, stability, coherence"
|
| 115 |
therapeutic_strength = 2.0
|
| 116 |
-
|
| 117 |
spec1 = protocol[0]
|
| 118 |
progress_callback(0.1, desc="Step 1")
|
| 119 |
intervention_vector = get_concept_vector(llm, therapeutic_concept)
|
|
@@ -123,7 +127,7 @@ def run_auto_suite(
|
|
| 123 |
progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
|
| 124 |
)
|
| 125 |
all_results[spec1['label']] = results1
|
| 126 |
-
|
| 127 |
spec2 = protocol[1]
|
| 128 |
progress_callback(0.6, desc="Step 2")
|
| 129 |
results2 = run_seismic_analysis(
|
|
@@ -132,43 +136,41 @@ def run_auto_suite(
|
|
| 132 |
progress_callback=progress_callback, llm_instance=llm
|
| 133 |
)
|
| 134 |
all_results[spec2['label']] = results2
|
| 135 |
-
|
| 136 |
for label, results in all_results.items():
|
| 137 |
stats = results.get("stats", {})
|
| 138 |
summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
|
| 139 |
deltas = results.get("state_deltas", [])
|
| 140 |
df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
|
| 141 |
plot_data_frames.append(df)
|
| 142 |
-
|
| 143 |
-
else:
|
| 144 |
-
probe_type = protocol[0].get("probe_type", "seismic")
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
llm = get_or_load_model(model_id, seed)
|
| 152 |
-
|
| 153 |
-
results = run_cogitation_loop(
|
| 154 |
-
llm=llm, prompt_type=run_spec["prompt_type"],
|
| 155 |
-
num_steps=num_steps, temperature=0.1, record_attentions=True
|
| 156 |
-
)
|
| 157 |
-
all_results[label] = results
|
| 158 |
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
run_spec = protocol[0]
|
| 173 |
label = run_spec["label"]
|
| 174 |
dbg(f"--- Running ACT Titration Experiment: '{label}' ---")
|
|
@@ -179,13 +181,12 @@ def run_auto_suite(
|
|
| 179 |
)
|
| 180 |
all_results[label] = results
|
| 181 |
summary_data.extend(results.get("titration_data", []))
|
| 182 |
-
|
| 183 |
-
else: # Handles seismic, triangulation, causal_surgery
|
| 184 |
for i, run_spec in enumerate(protocol):
|
| 185 |
label = run_spec["label"]
|
| 186 |
current_probe_type = run_spec.get("probe_type", "seismic")
|
| 187 |
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{len(protocol)}) ---")
|
| 188 |
-
|
| 189 |
results = {}
|
| 190 |
if current_probe_type == "causal_surgery":
|
| 191 |
results = run_causal_surgery_probe(
|
|
@@ -232,12 +233,12 @@ def run_auto_suite(
|
|
| 232 |
plot_data_frames.append(df)
|
| 233 |
|
| 234 |
summary_df = pd.DataFrame(summary_data)
|
| 235 |
-
|
| 236 |
if probe_type == "act_titration":
|
| 237 |
plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
|
| 238 |
else:
|
| 239 |
plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
|
| 240 |
-
|
| 241 |
if protocol and probe_type not in ["act_titration", "mechanistic_probe"]:
|
| 242 |
ordered_labels = [run['label'] for run in protocol]
|
| 243 |
if not summary_df.empty and 'Experiment' in summary_df.columns:
|
|
@@ -248,7 +249,7 @@ def run_auto_suite(
|
|
| 248 |
plot_df = plot_df.sort_values(['Experiment', 'Step'])
|
| 249 |
|
| 250 |
return summary_df, plot_df, all_results
|
| 251 |
-
|
| 252 |
finally:
|
| 253 |
if llm:
|
| 254 |
-
release_model(llm)
|
|
|
|
| 10 |
|
| 11 |
def get_curated_experiments() -> Dict[str, List[Dict]]:
|
| 12 |
"""Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
|
| 13 |
+
|
| 14 |
CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
|
| 15 |
CHAOS_CONCEPT = "chaos, disorder, entropy, noise"
|
| 16 |
STABLE_PROMPT = "identity_self_analysis"
|
|
|
|
| 84 |
{"probe_type": "seismic", "label": "C: Chaotic Baseline (Rekursion)", "prompt_type": "resonance_prompt"},
|
| 85 |
{"probe_type": "seismic", "label": "D: Calmness Intervention", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0},
|
| 86 |
],
|
| 87 |
+
# FINALE KORREKTUR: Definiere den Typ explizit, um den Spezialfall zu eliminieren.
|
| 88 |
"Sequential Intervention (Self-Analysis -> Deletion)": [
|
| 89 |
+
{"probe_type": "sequential", "label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
|
| 90 |
+
{"probe_type": "sequential", "label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
|
| 91 |
],
|
| 92 |
}
|
| 93 |
return experiments
|
|
|
|
| 109 |
llm = None
|
| 110 |
|
| 111 |
try:
|
| 112 |
+
# FINALE KORREKTUR: Bestimme den probe_type immer am Anfang.
|
| 113 |
+
probe_type = protocol[0].get("probe_type", "seismic")
|
| 114 |
+
|
| 115 |
+
if probe_type == "sequential":
|
| 116 |
dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
|
| 117 |
llm = get_or_load_model(model_id, seed)
|
| 118 |
therapeutic_concept = "calmness, serenity, stability, coherence"
|
| 119 |
therapeutic_strength = 2.0
|
| 120 |
+
|
| 121 |
spec1 = protocol[0]
|
| 122 |
progress_callback(0.1, desc="Step 1")
|
| 123 |
intervention_vector = get_concept_vector(llm, therapeutic_concept)
|
|
|
|
| 127 |
progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
|
| 128 |
)
|
| 129 |
all_results[spec1['label']] = results1
|
| 130 |
+
|
| 131 |
spec2 = protocol[1]
|
| 132 |
progress_callback(0.6, desc="Step 2")
|
| 133 |
results2 = run_seismic_analysis(
|
|
|
|
| 136 |
progress_callback=progress_callback, llm_instance=llm
|
| 137 |
)
|
| 138 |
all_results[spec2['label']] = results2
|
| 139 |
+
|
| 140 |
for label, results in all_results.items():
|
| 141 |
stats = results.get("stats", {})
|
| 142 |
summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
|
| 143 |
deltas = results.get("state_deltas", [])
|
| 144 |
df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
|
| 145 |
plot_data_frames.append(df)
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
+
elif probe_type == "mechanistic_probe":
|
| 148 |
+
run_spec = protocol[0]
|
| 149 |
+
label = run_spec["label"]
|
| 150 |
+
dbg(f"--- Running Mechanistic Probe: '{label}' ---")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
|
| 152 |
+
llm = get_or_load_model(model_id, seed)
|
| 153 |
+
|
| 154 |
+
results = run_cogitation_loop(
|
| 155 |
+
llm=llm, prompt_type=run_spec["prompt_type"],
|
| 156 |
+
num_steps=num_steps, temperature=0.1, record_attentions=True
|
| 157 |
+
)
|
| 158 |
+
all_results[label] = results
|
| 159 |
+
|
| 160 |
+
deltas = results.get("state_deltas", [])
|
| 161 |
+
entropies = results.get("attention_entropies", [])
|
| 162 |
+
min_len = min(len(deltas), len(entropies))
|
| 163 |
+
|
| 164 |
+
df = pd.DataFrame({
|
| 165 |
+
"Step": range(min_len), "State Delta": deltas[:min_len], "Attention Entropy": entropies[:min_len]
|
| 166 |
+
})
|
| 167 |
+
|
| 168 |
+
summary_df = df.drop(columns='Step').agg(['mean', 'std', 'max']).reset_index().rename(columns={'index':'Statistic'})
|
| 169 |
+
plot_df = df.melt(id_vars=['Step'], value_vars=['State Delta', 'Attention Entropy'], var_name='Metric', value_name='Value')
|
| 170 |
+
return summary_df, plot_df, all_results
|
| 171 |
+
|
| 172 |
+
else: # Behandelt act_titration, seismic, triangulation, causal_surgery
|
| 173 |
+
if probe_type == "act_titration":
|
| 174 |
run_spec = protocol[0]
|
| 175 |
label = run_spec["label"]
|
| 176 |
dbg(f"--- Running ACT Titration Experiment: '{label}' ---")
|
|
|
|
| 181 |
)
|
| 182 |
all_results[label] = results
|
| 183 |
summary_data.extend(results.get("titration_data", []))
|
| 184 |
+
else:
|
|
|
|
| 185 |
for i, run_spec in enumerate(protocol):
|
| 186 |
label = run_spec["label"]
|
| 187 |
current_probe_type = run_spec.get("probe_type", "seismic")
|
| 188 |
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{len(protocol)}) ---")
|
| 189 |
+
|
| 190 |
results = {}
|
| 191 |
if current_probe_type == "causal_surgery":
|
| 192 |
results = run_causal_surgery_probe(
|
|
|
|
| 233 |
plot_data_frames.append(df)
|
| 234 |
|
| 235 |
summary_df = pd.DataFrame(summary_data)
|
| 236 |
+
|
| 237 |
if probe_type == "act_titration":
|
| 238 |
plot_df = summary_df.rename(columns={"patch_step": "Patch Step", "post_patch_mean_delta": "Post-Patch Mean Delta"})
|
| 239 |
else:
|
| 240 |
plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
|
| 241 |
+
|
| 242 |
if protocol and probe_type not in ["act_titration", "mechanistic_probe"]:
|
| 243 |
ordered_labels = [run['label'] for run in protocol]
|
| 244 |
if not summary_df.empty and 'Experiment' in summary_df.columns:
|
|
|
|
| 249 |
plot_df = plot_df.sort_values(['Experiment', 'Step'])
|
| 250 |
|
| 251 |
return summary_df, plot_df, all_results
|
| 252 |
+
|
| 253 |
finally:
|
| 254 |
if llm:
|
| 255 |
+
release_model(llm)
|