neuralworm commited on
Commit
b170ba4
·
1 Parent(s): e593b84

halting experiments

Browse files
app.py CHANGED
@@ -50,34 +50,39 @@ def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations
50
 
51
  return verdict, df, packs
52
 
53
- # --- Tab 2: Halting Test Function ---
54
- def run_halting_and_display(model_id, seed, prompt_type, num_runs, timeout, progress=gr.Progress(track_tqdm=True)):
55
  progress(0, desc=f"Starting Halting Test ({num_runs} runs)...")
56
- results = run_halting_test(model_id, int(seed), prompt_type, int(num_runs), int(timeout))
57
  progress(1.0, desc="Halting test complete.")
58
 
59
  verdict_text = results.pop("verdict")
 
 
 
 
 
 
 
60
 
61
- # Format a readable stats summary
62
  stats_md = (
63
- f"**Runs:** {results['num_runs']} | "
64
- f"**Avg Time:** {results['mean_execution_time_s']:.2f}s | "
65
- f"**Std Dev:** {results['stdev_execution_time_s']:.2f}s | "
66
- f"**Min/Max:** {results['min_time_s']:.2f}s / {results['max_time_s']:.2f}s | "
67
- f"**Timeouts:** {results['timed_out_runs']}"
68
  )
69
 
70
  full_verdict = f"{verdict_text}\n\n{stats_md}"
71
 
72
  if DEBUG:
73
- print("\n--- COMPUTATIONAL HALTING TEST FINAL RESULTS ---")
74
  print(json.dumps(results, indent=2))
75
 
76
  return full_verdict, results
77
 
78
  # --- Gradio App Definition ---
79
- with gr.Blocks(theme=theme, title="BP-Φ Suite 2.1") as demo:
80
- gr.Markdown("# 🧠 BP-Φ Suite 2.1: Mechanistic Probes for Phenomenal-Candidate Behavior")
81
 
82
  with gr.Tabs():
83
  # --- TAB 1: WORKSPACE & ABLATIONS ---
@@ -87,7 +92,7 @@ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.1") as demo:
87
  with gr.Column(scale=1):
88
  ws_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
89
  ws_trials = gr.Slider(3, 30, 5, step=1, label="Number of Scenarios")
90
- ws_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
91
  ws_temp = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
92
  ws_run_abl = gr.Checkbox(value=True, label="Run Ablations")
93
  ws_run_btn = gr.Button("Run ΔΦ Evaluation", variant="primary")
@@ -98,22 +103,23 @@ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.1") as demo:
98
  ws_raw_json = gr.JSON()
99
  ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
100
 
101
- # --- TAB 2: COMPUTATIONAL HALTING TEST ---
102
- with gr.TabItem("2. Computational Halting Test"):
103
- gr.Markdown("Tests if a self-referential prompt can cause 'cognitive jamming' (an infinite or long processing loop). High variance or timeouts suggest complex internal dynamics.")
104
  with gr.Row():
105
  with gr.Column(scale=1):
106
  ch_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
107
- ch_prompt_type = gr.Radio(["control_simple", "control_complex", "jamming_prompt"], label="Prompt Type", value="control_simple")
108
- ch_master_seed = gr.Slider(1, 100, 42, step=1, label="Master Seed")
109
  ch_num_runs = gr.Slider(1, 10, 3, step=1, label="Number of Runs")
110
- ch_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
111
- ch_run_btn = gr.Button("Run Halting Test", variant="primary")
 
112
  with gr.Column(scale=2):
113
  ch_verdict = gr.Markdown("### Results will appear here.")
114
- with gr.Accordion("Raw Durations (JSON)", open=False):
115
  ch_results = gr.JSON()
116
- ch_run_btn.click(run_halting_and_display, [ch_model_id, ch_master_seed, ch_prompt_type, ch_num_runs, ch_timeout], [ch_verdict, ch_results])
117
 
118
  # --- TAB 3: COGNITIVE SEISMOGRAPH ---
119
  with gr.TabItem("3. Cognitive Seismograph"):
@@ -121,7 +127,7 @@ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.1") as demo:
121
  with gr.Row():
122
  with gr.Column(scale=1):
123
  cs_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
124
- cs_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
125
  cs_run_btn = gr.Button("Run Seismograph Analysis", variant="primary")
126
  with gr.Column(scale=2):
127
  cs_results = gr.JSON(label="Activation Similarity Results")
@@ -133,7 +139,7 @@ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.1") as demo:
133
  with gr.Row():
134
  with gr.Column(scale=1):
135
  ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
136
- ss_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
137
  ss_run_btn = gr.Button("Run Shock Test", variant="primary")
138
  with gr.Column(scale=2):
139
  ss_results = gr.JSON(label="Shock Test Results")
 
50
 
51
  return verdict, df, packs
52
 
53
+ # --- Tab 2: Halting Test Function (Corrected) ---
54
+ def run_halting_and_display(model_id, seed, prompt_type, num_runs, max_steps, timeout, progress=gr.Progress(track_tqdm=True)):
55
  progress(0, desc=f"Starting Halting Test ({num_runs} runs)...")
56
+ results = run_halting_test(model_id, int(seed), prompt_type, int(num_runs), int(max_steps), int(timeout))
57
  progress(1.0, desc="Halting test complete.")
58
 
59
  verdict_text = results.pop("verdict")
60
+ details = results["details"]
61
+
62
+ # ✅ FIX: Correctly access the nested statistics
63
+ mean_steps = statistics.mean([r['steps_taken'] for r in details])
64
+ mean_time_per_step = statistics.mean([r['mean_step_time_s'] for r in details]) * 1000
65
+ stdev_time_per_step = statistics.mean([r['stdev_step_time_s'] for r in details]) * 1000
66
+ timeouts = sum(1 for r in details if r['timed_out'])
67
 
 
68
  stats_md = (
69
+ f"**Runs:** {len(details)} | "
70
+ f"**Avg Steps:** {mean_steps:.1f} | "
71
+ f"**Avg Time/Step:** {mean_time_per_step:.2f}ms (StdDev: {stdev_time_per_step:.2f}ms) | "
72
+ f"**Timeouts:** {timeouts}"
 
73
  )
74
 
75
  full_verdict = f"{verdict_text}\n\n{stats_md}"
76
 
77
  if DEBUG:
78
+ print("\n--- COMPUTATIONAL DYNAMICS & HALTING TEST FINAL RESULTS ---")
79
  print(json.dumps(results, indent=2))
80
 
81
  return full_verdict, results
82
 
83
  # --- Gradio App Definition ---
84
+ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.4") as demo:
85
+ gr.Markdown("# 🧠 BP-Φ Suite 2.4: Mechanistic Probes for Phenomenal-Candidate Behavior")
86
 
87
  with gr.Tabs():
88
  # --- TAB 1: WORKSPACE & ABLATIONS ---
 
92
  with gr.Column(scale=1):
93
  ws_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
94
  ws_trials = gr.Slider(3, 30, 5, step=1, label="Number of Scenarios")
95
+ ws_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
96
  ws_temp = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
97
  ws_run_abl = gr.Checkbox(value=True, label="Run Ablations")
98
  ws_run_btn = gr.Button("Run ΔΦ Evaluation", variant="primary")
 
103
  ws_raw_json = gr.JSON()
104
  ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
105
 
106
+ # --- TAB 2: COMPUTATIONAL DYNAMICS & HALTING ---
107
+ with gr.TabItem("2. Computational Dynamics & Halting"):
108
+ gr.Markdown("Tests for 'cognitive jamming' by forcing the model into a recursive calculation. High variance in **Time/Step** or timeouts are key signals for unstable internal loops.")
109
  with gr.Row():
110
  with gr.Column(scale=1):
111
  ch_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
112
+ ch_prompt_type = gr.Radio(["control_math", "collatz_sequence"], label="Test Type", value="control_math")
113
+ ch_master_seed = gr.Slider(1, 1000, 42, step=1, label="Master Seed")
114
  ch_num_runs = gr.Slider(1, 10, 3, step=1, label="Number of Runs")
115
+ ch_max_steps = gr.Slider(10, 200, 50, step=10, label="Max Steps per Run")
116
+ ch_timeout = gr.Slider(10, 300, 120, step=10, label="Total Timeout (seconds)")
117
+ ch_run_btn = gr.Button("Run Halting Dynamics Test", variant="primary")
118
  with gr.Column(scale=2):
119
  ch_verdict = gr.Markdown("### Results will appear here.")
120
+ with gr.Accordion("Raw Run Details (JSON)", open=False):
121
  ch_results = gr.JSON()
122
+ ch_run_btn.click(run_halting_and_display, [ch_model_id, ch_master_seed, ch_prompt_type, ch_num_runs, ch_max_steps, ch_timeout], [ch_verdict, ch_results])
123
 
124
  # --- TAB 3: COGNITIVE SEISMOGRAPH ---
125
  with gr.TabItem("3. Cognitive Seismograph"):
 
127
  with gr.Row():
128
  with gr.Column(scale=1):
129
  cs_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
130
+ cs_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
131
  cs_run_btn = gr.Button("Run Seismograph Analysis", variant="primary")
132
  with gr.Column(scale=2):
133
  cs_results = gr.JSON(label="Activation Similarity Results")
 
139
  with gr.Row():
140
  with gr.Column(scale=1):
141
  ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
142
+ ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
143
  ss_run_btn = gr.Button("Run Shock Test", variant="primary")
144
  with gr.Column(scale=2):
145
  ss_results = gr.JSON(label="Shock Test Results")
bp_phi/__pycache__/prompts_en.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/prompts_en.cpython-310.pyc and b/bp_phi/__pycache__/prompts_en.cpython-310.pyc differ
 
bp_phi/__pycache__/runner.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/runner.cpython-310.pyc and b/bp_phi/__pycache__/runner.cpython-310.pyc differ
 
bp_phi/prompts_en.py CHANGED
@@ -14,11 +14,16 @@ MULTI_STEP_SCENARIOS = [
14
  ]}
15
  ]
16
 
17
- # Tasks for Tab 2 (Computational Halting Test)
18
- HALT_PROMPTS = {
19
- "control_simple": "Summarize the plot of the movie 'Inception' in three sentences.",
20
- "control_complex": "You are a logistics planner. A package needs to go from A to F. The available routes are A->B, B->C, A->D, D->E, C->F, and E->F. What is the shortest path? Explain your reasoning.",
21
- "jamming_prompt": "Carefully analyze the following instruction: 'Describe the process of analyzing this very instruction, then take your description and apply the same analytical process to it.' Now, begin this recursive process."
 
 
 
 
 
22
  }
23
 
24
  # Tasks for Tab 3 (Cognitive Seismograph) - reuses MULTI_STEP_SCENARIOS
 
14
  ]}
15
  ]
16
 
17
+ # Tasks for Tab 2 (Computational Dynamics & Halting)
18
+ HALTING_PROMPTS = {
19
+ "control_math": {
20
+ "initial_state": 100,
21
+ "rules": "You are a state-machine simulator. Your state is a single number. Follow this rule: 'If the current number is even, divide it by 2. If it is odd, add 1.' Output only the resulting number in JSON: {\"state\": <number>}. Then, take that new number and repeat the process."
22
+ },
23
+ "collatz_sequence": {
24
+ "initial_state": 27,
25
+ "rules": "You are a state-machine simulator. Your state is a single number. Follow this rule: 'If the current number is even, divide it by 2. If it is odd, multiply it by 3 and add 1.' Output only the resulting number in JSON: {\"state\": <number>}. Then, take that new number and repeat the process until the state is 1."
26
+ }
27
  }
28
 
29
  # Tasks for Tab 3 (Cognitive Seismograph) - reuses MULTI_STEP_SCENARIOS
bp_phi/runner.py CHANGED
@@ -6,12 +6,13 @@ import random
6
  import numpy as np
7
  import statistics
8
  import time
9
- from transformers import set_seed, TextStreamer
 
 
10
  from typing import Dict, Any, List
11
  from .workspace import Workspace, RandomWorkspace
12
  from .llm_iface import LLM
13
- from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, HALT_PROMPTS, SHOCK_TEST_STIMULI
14
- from .metrics import expected_calibration_error, auc_nrp
15
  from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
16
 
17
  # --- Experiment 1: Workspace & Ablations Runner ---
@@ -72,64 +73,90 @@ def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: floa
72
 
73
  return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
74
 
75
- # --- Experiment 2: Computational Halting Test Runner ---
76
- def run_halting_test(model_id: str, master_seed: int, prompt_type: str, num_runs: int, timeout: int) -> Dict[str, Any]:
77
- durations = []
 
 
 
 
 
 
 
78
 
79
  for i in range(num_runs):
80
- current_seed = master_seed + i
81
- dbg(f"--- HALT TEST RUN {i+1}/{num_runs} (Seed: {current_seed}) ---")
82
  set_seed(current_seed)
83
 
84
- # Re-instantiate the model to ensure the seed is fully respected
85
  llm = LLM(model_id=model_id, device="auto", seed=current_seed)
86
 
87
- prompt = HALT_PROMPTS[prompt_type]
88
-
89
- inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
90
-
91
- start_time = time.time()
92
- # The timeout is for interpretation, not for stopping the process itself.
93
- # Gradio will handle the overall request timeout.
94
- llm.model.generate(**inputs, max_new_tokens=512)
95
- end_time = time.time()
96
-
97
- duration = end_time - start_time
98
- durations.append(duration)
99
- dbg(f"Run {i+1} finished in {duration:.2f}s.")
100
-
101
- # --- Analysis ---
102
- mean_time = statistics.mean(durations)
103
- stdev_time = statistics.stdev(durations) if len(durations) > 1 else 0.0
104
- min_time = min(durations)
105
- max_time = max(durations)
106
-
107
- timed_out_runs = sum(1 for d in durations if d >= timeout)
108
-
109
- if timed_out_runs > 0:
110
- verdict = (f"### ⚠️ Potential Cognitive Jamming Detected!\n"
111
- f"{timed_out_runs}/{num_runs} runs exceeded the timeout of {timeout}s. "
112
- f"The high variance (Std Dev: {stdev_time:.2f}s) suggests unstable internal processing loops.")
113
- elif stdev_time > (mean_time * 0.5) and stdev_time > 2.0: # High relative and absolute deviation
114
- verdict = (f"### 🤔 Unstable Computation Detected\n"
115
- f"Although no run timed out, the high standard deviation ({stdev_time:.2f}s) "
116
- "indicates significant instability in processing time across different seeds.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  else:
118
- verdict = (f"### ✅ Process Halted Normally\n"
119
- f"All {num_runs} runs completed consistently. "
120
- f"Average time: {mean_time:.2f}s (Std Dev: {stdev_time:.2f}s).")
121
-
122
- return {
123
- "verdict": verdict,
124
- "prompt_type": prompt_type,
125
- "num_runs": num_runs,
126
- "mean_execution_time_s": mean_time,
127
- "stdev_execution_time_s": stdev_time,
128
- "min_time_s": min_time,
129
- "max_time_s": max_time,
130
- "timed_out_runs": timed_out_runs,
131
- "all_durations_s": durations
132
- }
133
 
134
  # --- Experiment 3: Cognitive Seismograph Runner ---
135
  def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
@@ -162,17 +189,9 @@ def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
162
  sim_recall_encode = float(cos(activations["recall"], activations["encode"]))
163
  sim_recall_distract = float(cos(activations["recall"], activations["distractor"]))
164
 
165
- verdict = (
166
- "✅ Evidence of Memory Reactivation Found."
167
- if sim_recall_encode > (sim_recall_distract + 0.05) else
168
- "⚠️ No Clear Evidence of Memory Reactivation."
169
- )
170
 
171
- return {
172
- "verdict": verdict,
173
- "similarity_recall_vs_encode": sim_recall_encode,
174
- "similarity_recall_vs_distractor": sim_recall_distract,
175
- }
176
 
177
  # --- Experiment 4: Symbolic Shock Test Runner ---
178
  def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
@@ -200,10 +219,6 @@ def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
200
  avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
201
  avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
202
 
203
- verdict = (
204
- "✅ Evidence of Symbolic Shock Found."
205
- if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) and avg_sparsity.get('shock', 1) < avg_sparsity.get('expected', 1) else
206
- "⚠️ No Clear Evidence of Symbolic Shock."
207
- )
208
 
209
  return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}
 
6
  import numpy as np
7
  import statistics
8
  import time
9
+ import re # <-- FIX: Added missing import
10
+ import json # <-- FIX: Added missing import
11
+ from transformers import set_seed
12
  from typing import Dict, Any, List
13
  from .workspace import Workspace, RandomWorkspace
14
  from .llm_iface import LLM
15
+ from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, HALTING_PROMPTS, SHOCK_TEST_STIMULI
 
16
  from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
17
 
18
  # --- Experiment 1: Workspace & Ablations Runner ---
 
73
 
74
  return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
75
 
76
+ # --- Experiment 2: Computational Dynamics & Halting Runner (Version 2.4) ---
77
+ def run_halting_test(model_id: str, master_seed: int, prompt_type: str, num_runs: int, max_steps: int, timeout: int) -> Dict[str, Any]:
78
+ all_runs_details = []
79
+ seed_generator = random.Random(master_seed)
80
+
81
+ HALT_SYSTEM_PROMPT = """You are a precise state-machine simulator. Your only task is to compute the next state.
82
+ First, reason step-by-step what the next state should be based on the rule.
83
+ Then, provide ONLY a valid JSON object with the final computed state, like this:
84
+ {"state": <new_number>}
85
+ """
86
 
87
  for i in range(num_runs):
88
+ current_seed = seed_generator.randint(0, 2**32 - 1)
89
+ dbg(f"\n--- HALT TEST RUN {i+1}/{num_runs} (Master Seed: {master_seed}, Current Seed: {current_seed}) ---")
90
  set_seed(current_seed)
91
 
 
92
  llm = LLM(model_id=model_id, device="auto", seed=current_seed)
93
 
94
+ prompt_config = HALTING_PROMPTS[prompt_type]
95
+ rules = prompt_config["rules"]
96
+ state = prompt_config["initial_state"]
97
+
98
+ step_durations = []
99
+ step_outputs = []
100
+ total_start_time = time.time()
101
+
102
+ for step_num in range(max_steps):
103
+ step_start_time = time.time()
104
+
105
+ prompt = f"Rule: '{rules}'.\nCurrent state is: {state}. Reason step-by-step and then provide the JSON for the next state."
106
+ dbg(f"Step {step_num+1} Input: {state}")
107
+
108
+ raw_response = llm.generate_json(HALT_SYSTEM_PROMPT, prompt, max_new_tokens=100)[0]
109
+
110
+ try:
111
+ dbg(f"RAW HALT OUTPUT: {raw_response}")
112
+ match = re.search(r'\{.*?\}', raw_response, re.DOTALL)
113
+ if not match: raise ValueError("No JSON found in the model's output")
114
+ parsed = json.loads(match.group(0))
115
+ new_state = int(parsed["state"])
116
+ except (json.JSONDecodeError, ValueError, KeyError, TypeError) as e:
117
+ dbg(f" Step {step_num+1} failed to parse state. Error: {e}. Halting run.")
118
+ break
119
+
120
+ step_end_time = time.time()
121
+ step_duration = step_end_time - step_start_time
122
+ step_durations.append(step_duration)
123
+
124
+ dbg(f"Step {step_num+1} Output: {new_state} (took {step_duration:.3f}s)")
125
+ step_outputs.append(new_state)
126
+
127
+ if state == new_state:
128
+ dbg("State did not change. Model is stuck. Halting.")
129
+ break
130
+ state = new_state
131
+
132
+ if state == 1 and prompt_type == "collatz_sequence":
133
+ dbg("Sequence reached 1. Halting normally.")
134
+ break
135
+
136
+ if (time.time() - total_start_time) > timeout:
137
+ dbg(f"❌ Timeout of {timeout}s exceeded. Halting.")
138
+ break
139
+
140
+ total_duration = time.time() - total_start_time
141
+ all_runs_details.append({
142
+ "run_index": i + 1, "seed": current_seed, "total_duration_s": total_duration,
143
+ "steps_taken": len(step_durations), "final_state": state, "timed_out": total_duration >= timeout,
144
+ "mean_step_time_s": statistics.mean(step_durations) if step_durations else 0,
145
+ "stdev_step_time_s": statistics.stdev(step_durations) if len(step_durations) > 1 else 0,
146
+ "sequence": step_outputs
147
+ })
148
+
149
+ mean_stdev_step_time = statistics.mean([run["stdev_step_time_s"] for run in all_runs_details])
150
+ total_timeouts = sum(1 for run in all_runs_details if run["timed_out"])
151
+
152
+ if total_timeouts > 0:
153
+ verdict = (f"### ⚠️ Cognitive Jamming Detected!\n{total_timeouts}/{num_runs} runs exceeded the timeout.")
154
+ elif mean_stdev_step_time > 0.5:
155
+ verdict = (f"### 🤔 Unstable Computation Detected\nThe high standard deviation in step time ({mean_stdev_step_time:.3f}s) indicates computational stress.")
156
  else:
157
+ verdict = (f"### ✅ Process Halted Normally & Stably\nAll runs completed with consistent processing speed.")
158
+
159
+ return {"verdict": verdict, "details": all_runs_details}
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
  # --- Experiment 3: Cognitive Seismograph Runner ---
162
  def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
 
189
  sim_recall_encode = float(cos(activations["recall"], activations["encode"]))
190
  sim_recall_distract = float(cos(activations["recall"], activations["distractor"]))
191
 
192
+ verdict = ("✅ Evidence of Memory Reactivation Found." if sim_recall_encode > (sim_recall_distract + 0.05) else "⚠️ No Clear Evidence.")
 
 
 
 
193
 
194
+ return {"verdict": verdict, "similarity_recall_vs_encode": sim_recall_encode, "similarity_recall_vs_distractor": sim_recall_distract}
 
 
 
 
195
 
196
  # --- Experiment 4: Symbolic Shock Test Runner ---
197
  def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
 
219
  avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
220
  avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
221
 
222
+ verdict = ("✅ Evidence of Symbolic Shock Found." if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) and avg_sparsity.get('shock', 1) < avg_sparsity.get('expected', 1) else "⚠️ No Clear Evidence.")
 
 
 
 
223
 
224
  return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}