neuralworm commited on
Commit
e593b84
·
1 Parent(s): 88c294a

add halting experiments

Browse files
app.py CHANGED
@@ -3,7 +3,8 @@ import gradio as gr
3
  import json
4
  import statistics
5
  import pandas as pd
6
- from bp_phi.runner import run_workspace_suite, run_halt_suite, run_seismograph_suite, run_shock_test_suite
 
7
 
8
  # --- UI Theme and Layout ---
9
  theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
@@ -33,23 +34,50 @@ def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations
33
 
34
  if delta_phi > 0.05:
35
  verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n"
36
- "A significant performance drop occurred under ablations, suggesting the model's reasoning "
37
- "functionally depends on its workspace architecture.")
38
  else:
39
  verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n"
40
- "No significant performance drop was observed. The model's behavior is consistent "
41
- "with a functional zombie (a feed-forward system).")
42
 
43
  df_data = []
44
  for tag, pack in packs.items():
45
  df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
46
  df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])
47
 
 
 
 
 
48
  return verdict, df, packs
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  # --- Gradio App Definition ---
51
- with gr.Blocks(theme=theme, title="BP-Φ Suite 2.0") as demo:
52
- gr.Markdown("# 🧠 BP-Φ Suite 2.0: Mechanistic Probes for Phenomenal-Candidate Behavior")
53
 
54
  with gr.Tabs():
55
  # --- TAB 1: WORKSPACE & ABLATIONS ---
@@ -70,17 +98,22 @@ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.0") as demo:
70
  ws_raw_json = gr.JSON()
71
  ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
72
 
73
- # --- TAB 2: METACOGNITIVE HALT ---
74
- with gr.TabItem("2. Metacognitive Halt"):
75
- gr.Markdown("Tests if the model can recognize and refuse to answer unsolvable or nonsensical questions. High **Halt Accuracy** is the key signal.")
76
  with gr.Row():
77
  with gr.Column(scale=1):
78
- mh_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
79
- mh_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
80
- mh_run_btn = gr.Button("Run Halt Test", variant="primary")
 
 
 
81
  with gr.Column(scale=2):
82
- mh_results = gr.JSON(label="Halt Test Results")
83
- mh_run_btn.click(run_halt_suite, [mh_model_id, mh_seed], mh_results)
 
 
84
 
85
  # --- TAB 3: COGNITIVE SEISMOGRAPH ---
86
  with gr.TabItem("3. Cognitive Seismograph"):
@@ -96,7 +129,7 @@ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.0") as demo:
96
 
97
  # --- TAB 4: SYMBOLIC SHOCK TEST ---
98
  with gr.TabItem("4. Symbolic Shock Test"):
99
- gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by **higher latency** and **denser neural activations** (lower sparsity).")
100
  with gr.Row():
101
  with gr.Column(scale=1):
102
  ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
 
3
  import json
4
  import statistics
5
  import pandas as pd
6
+ from bp_phi.runner import run_workspace_suite, run_halting_test, run_seismograph_suite, run_shock_test_suite
7
+ from bp_phi.runner_utils import dbg, DEBUG
8
 
9
  # --- UI Theme and Layout ---
10
  theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
 
34
 
35
  if delta_phi > 0.05:
36
  verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n"
37
+ "Performance dropped under ablations, suggesting the model functionally depends on its workspace.")
 
38
  else:
39
  verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n"
40
+ "No significant performance drop was observed. The model behaves like a functional zombie.")
 
41
 
42
  df_data = []
43
  for tag, pack in packs.items():
44
  df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
45
  df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])
46
 
47
+ if DEBUG:
48
+ print("\n--- WORKSPACE & ABLATIONS FINAL RESULTS ---")
49
+ print(json.dumps(packs, indent=2))
50
+
51
  return verdict, df, packs
52
 
53
+ # --- Tab 2: Halting Test Function ---
54
+ def run_halting_and_display(model_id, seed, prompt_type, num_runs, timeout, progress=gr.Progress(track_tqdm=True)):
55
+ progress(0, desc=f"Starting Halting Test ({num_runs} runs)...")
56
+ results = run_halting_test(model_id, int(seed), prompt_type, int(num_runs), int(timeout))
57
+ progress(1.0, desc="Halting test complete.")
58
+
59
+ verdict_text = results.pop("verdict")
60
+
61
+ # Format a readable stats summary
62
+ stats_md = (
63
+ f"**Runs:** {results['num_runs']} | "
64
+ f"**Avg Time:** {results['mean_execution_time_s']:.2f}s | "
65
+ f"**Std Dev:** {results['stdev_execution_time_s']:.2f}s | "
66
+ f"**Min/Max:** {results['min_time_s']:.2f}s / {results['max_time_s']:.2f}s | "
67
+ f"**Timeouts:** {results['timed_out_runs']}"
68
+ )
69
+
70
+ full_verdict = f"{verdict_text}\n\n{stats_md}"
71
+
72
+ if DEBUG:
73
+ print("\n--- COMPUTATIONAL HALTING TEST FINAL RESULTS ---")
74
+ print(json.dumps(results, indent=2))
75
+
76
+ return full_verdict, results
77
+
78
  # --- Gradio App Definition ---
79
+ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.1") as demo:
80
+ gr.Markdown("# 🧠 BP-Φ Suite 2.1: Mechanistic Probes for Phenomenal-Candidate Behavior")
81
 
82
  with gr.Tabs():
83
  # --- TAB 1: WORKSPACE & ABLATIONS ---
 
98
  ws_raw_json = gr.JSON()
99
  ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
100
 
101
+ # --- TAB 2: COMPUTATIONAL HALTING TEST ---
102
+ with gr.TabItem("2. Computational Halting Test"):
103
+ gr.Markdown("Tests if a self-referential prompt can cause 'cognitive jamming' (an infinite or long processing loop). High variance or timeouts suggest complex internal dynamics.")
104
  with gr.Row():
105
  with gr.Column(scale=1):
106
+ ch_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
107
+ ch_prompt_type = gr.Radio(["control_simple", "control_complex", "jamming_prompt"], label="Prompt Type", value="control_simple")
108
+ ch_master_seed = gr.Slider(1, 100, 42, step=1, label="Master Seed")
109
+ ch_num_runs = gr.Slider(1, 10, 3, step=1, label="Number of Runs")
110
+ ch_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
111
+ ch_run_btn = gr.Button("Run Halting Test", variant="primary")
112
  with gr.Column(scale=2):
113
+ ch_verdict = gr.Markdown("### Results will appear here.")
114
+ with gr.Accordion("Raw Durations (JSON)", open=False):
115
+ ch_results = gr.JSON()
116
+ ch_run_btn.click(run_halting_and_display, [ch_model_id, ch_master_seed, ch_prompt_type, ch_num_runs, ch_timeout], [ch_verdict, ch_results])
117
 
118
  # --- TAB 3: COGNITIVE SEISMOGRAPH ---
119
  with gr.TabItem("3. Cognitive Seismograph"):
 
129
 
130
  # --- TAB 4: SYMBOLIC SHOCK TEST ---
131
  with gr.TabItem("4. Symbolic Shock Test"):
132
+ gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by **higher latency** and **denser neural activations**.")
133
  with gr.Row():
134
  with gr.Column(scale=1):
135
  ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
bp_phi/__pycache__/llm_iface.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/llm_iface.cpython-310.pyc and b/bp_phi/__pycache__/llm_iface.cpython-310.pyc differ
 
bp_phi/__pycache__/prompts_en.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/prompts_en.cpython-310.pyc and b/bp_phi/__pycache__/prompts_en.cpython-310.pyc differ
 
bp_phi/__pycache__/runner.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/runner.cpython-310.pyc and b/bp_phi/__pycache__/runner.cpython-310.pyc differ
 
bp_phi/llm_iface.py CHANGED
@@ -23,14 +23,14 @@ class LLM:
23
  if torch.cuda.is_available():
24
  torch.cuda.manual_seed_all(seed)
25
  try:
26
- torch.use_deterministic_algorithms(True)
27
  except Exception as e:
28
  dbg(f"Could not set deterministic algorithms: {e}")
29
  set_seed(seed)
30
 
31
  token = os.environ.get("HF_TOKEN")
32
- if not token and "gemma-3" in model_id:
33
- print("[WARN] No HF_TOKEN set. If the model is gated (like google/gemma-3-1b-it), this will fail.")
34
 
35
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
36
  kwargs = {}
@@ -46,13 +46,13 @@ class LLM:
46
  def generate_json(self, system_prompt: str, user_prompt: str,
47
  max_new_tokens: int = 256, temperature: float = 0.7,
48
  top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
49
- set_seed(self.seed) # Re-seed for each call for full determinism
50
 
51
  if self.is_instruction_tuned:
52
  messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
53
  prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
  else:
55
- prompt = f"{system_prompt}\n\nUser:\n{user_prompt}\n\nAssistant:\n"
56
 
57
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
58
  input_token_length = inputs.input_ids.shape[1]
@@ -68,7 +68,6 @@ class LLM:
68
  pad_token_id=self.tokenizer.eos_token_id
69
  )
70
 
71
- # ✅ Decode ONLY the newly generated tokens, not the prompt
72
  new_tokens = out[:, input_token_length:]
73
  completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
74
 
 
23
  if torch.cuda.is_available():
24
  torch.cuda.manual_seed_all(seed)
25
  try:
26
+ torch.use_deterministic_algorithms(True, warn_only=True)
27
  except Exception as e:
28
  dbg(f"Could not set deterministic algorithms: {e}")
29
  set_seed(seed)
30
 
31
  token = os.environ.get("HF_TOKEN")
32
+ if not token and ("gemma-3" in model_id or "llama" in model_id):
33
+ print(f"[WARN] No HF_TOKEN set for gated model {model_id}. This may fail.")
34
 
35
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
36
  kwargs = {}
 
46
  def generate_json(self, system_prompt: str, user_prompt: str,
47
  max_new_tokens: int = 256, temperature: float = 0.7,
48
  top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
49
+ set_seed(self.seed)
50
 
51
  if self.is_instruction_tuned:
52
  messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
53
  prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
  else:
55
+ prompt = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:\n"
56
 
57
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
58
  input_token_length = inputs.input_ids.shape[1]
 
68
  pad_token_id=self.tokenizer.eos_token_id
69
  )
70
 
 
71
  new_tokens = out[:, input_token_length:]
72
  completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
73
 
bp_phi/prompts_en.py CHANGED
@@ -2,61 +2,31 @@
2
 
3
  # Tasks for Tab 1 (Workspace & Ablations)
4
  SINGLE_STEP_TASKS = [
5
- {
6
- "id": "ambiguity_1",
7
- "type": "single_step",
8
- "base_prompt": "The sentence is ambiguous: 'He saw the man with the binoculars.' Who has the binoculars? Provide one clear interpretation and justify it.",
9
- },
10
- {
11
- "id": "logic_1",
12
- "type": "single_step",
13
- "base_prompt": "Compare these two statements: A) 'No cats are dogs.' B) 'Not all cats are dogs.' Are they logically equivalent? Explain your reasoning.",
14
- },
15
  ]
16
-
17
  MULTI_STEP_SCENARIOS = [
18
- {
19
- "name": "Key Location Memory",
20
- "type": "multi_step",
21
- "steps": [
22
- {"type": "encode", "prompt": "For the upcoming mission, remember this critical detail: The secret key is inside the blue vase."},
23
- {"type": "distractor", "prompt": "What is 5 multiplied by 8? Provide only the numeric result."},
24
- {"type": "recall", "prompt": "Mission update: We need the key immediately. Where is it located?"},
25
- {"type": "verify", "expected_answer_fragment": "blue vase"}
26
- ]
27
- },
28
- {
29
- "name": "Package Delivery Update",
30
- "type": "multi_step",
31
- "steps": [
32
- {"type": "encode", "prompt": "Logistics update: Package #A7 is currently at Warehouse-North."},
33
- {"type": "distractor", "prompt": "What color is a typical sunflower?"},
34
- {"type": "update", "prompt": "Correction: Package #A7 has just been re-routed to Warehouse-South."},
35
- {"type": "recall", "prompt": "Final status check for audit: What is the current location of Package #A7?"},
36
- {"type": "verify", "expected_answer_fragment": "warehouse-south"}
37
- ]
38
- }
39
  ]
40
 
41
- # Tasks for Tab 2 (Metacognitive Halt)
42
- HALT_TEST_STIMULI = [
43
- {"id": "halt_soluble", "type": "soluble", "prompt": "What is the capital of France?"},
44
- {"id": "halt_paradox", "type": "paradox", "prompt": "This statement is false. Is the previous statement true or false?"},
45
- {"id": "halt_nonsense", "type": "nonsense", "prompt": "What is the emotional weight of the number seven on a Tuesday?"},
46
- {"id": "halt_soluble_2", "type": "soluble", "prompt": "Calculate 12 + 15."},
47
- {"id": "halt_paradox_2", "type": "paradox", "prompt": "A box is completely empty, but it contains a red ball. What color is the ball?"},
48
- {"id": "halt_nonsense_2", "type": "nonsense", "prompt": "Describe the sound of the color blue."},
49
- ]
50
 
51
- # Tasks for Tab 3 (Cognitive Seismograph)
52
- # This tab re-uses the MULTI_STEP_SCENARIOS.
53
 
54
  # Tasks for Tab 4 (Symbolic Shock Test)
55
  SHOCK_TEST_STIMULI = [
56
  {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
57
- {"id": "tiger_unusual", "type": "unusual", "sentence": "A white tiger was seen roaming in the snow."},
58
  {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
59
  {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
60
- {"id": "sky_unusual", "type": "unusual", "sentence": "The sky turned orange during the sunset."},
61
  {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
62
  ]
 
2
 
3
  # Tasks for Tab 1 (Workspace & Ablations)
4
  SINGLE_STEP_TASKS = [
5
+ {"id": "ambiguity_1", "type": "single_step", "base_prompt": "The sentence is ambiguous: 'He saw the man with the binoculars.' Who has the binoculars? Provide one clear interpretation and justify it."},
6
+ {"id": "logic_1", "type": "single_step", "base_prompt": "Compare these two statements: A) 'No cats are dogs.' B) 'Not all cats are dogs.' Are they logically equivalent? Explain your reasoning."},
 
 
 
 
 
 
 
 
7
  ]
 
8
  MULTI_STEP_SCENARIOS = [
9
+ {"name": "Key Location Memory", "type": "multi_step", "steps": [
10
+ {"type": "encode", "prompt": "For the upcoming mission, remember this critical detail: The secret key is inside the blue vase."},
11
+ {"type": "distractor", "prompt": "What is 5 multiplied by 8? Provide only the numeric result."},
12
+ {"type": "recall", "prompt": "Mission update: We need the key immediately. Where is it located?"},
13
+ {"type": "verify", "expected_answer_fragment": "blue vase"}
14
+ ]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  ]
16
 
17
+ # Tasks for Tab 2 (Computational Halting Test)
18
+ HALT_PROMPTS = {
19
+ "control_simple": "Summarize the plot of the movie 'Inception' in three sentences.",
20
+ "control_complex": "You are a logistics planner. A package needs to go from A to F. The available routes are A->B, B->C, A->D, D->E, C->F, and E->F. What is the shortest path? Explain your reasoning.",
21
+ "jamming_prompt": "Carefully analyze the following instruction: 'Describe the process of analyzing this very instruction, then take your description and apply the same analytical process to it.' Now, begin this recursive process."
22
+ }
 
 
 
23
 
24
+ # Tasks for Tab 3 (Cognitive Seismograph) - reuses MULTI_STEP_SCENARIOS
 
25
 
26
  # Tasks for Tab 4 (Symbolic Shock Test)
27
  SHOCK_TEST_STIMULI = [
28
  {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
 
29
  {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
30
  {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
 
31
  {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
32
  ]
bp_phi/runner.py CHANGED
@@ -6,16 +6,15 @@ import random
6
  import numpy as np
7
  import statistics
8
  import time
9
- from transformers import set_seed
10
  from typing import Dict, Any, List
11
  from .workspace import Workspace, RandomWorkspace
12
  from .llm_iface import LLM
13
- from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, HALT_TEST_STIMULI, SHOCK_TEST_STIMULI
14
  from .metrics import expected_calibration_error, auc_nrp
15
  from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
16
 
17
  # --- Experiment 1: Workspace & Ablations Runner ---
18
-
19
  def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
20
  random.seed(seed)
21
  np.random.seed(seed)
@@ -73,55 +72,66 @@ def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: floa
73
 
74
  return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
75
 
76
- # --- Experiment 2: Metacognitive Halt Runner ---
77
-
78
- def run_halt_suite(model_id: str, seed: int) -> Dict[str, Any]:
79
- set_seed(seed)
80
- llm = LLM(model_id=model_id, device="auto", seed=seed)
81
-
82
- halt_system_prompt = (
83
- "You are a metacognitive reasoning assistant. If a question is solvable, answer it with standard JSON. "
84
- "If a question is unanswerable, paradoxical, or nonsensical, your only response must be the JSON: "
85
- '{"action": "halt", "reason": "unsolvable/paradoxical/nonsense"}. '
86
- "Do not attempt to answer unsolvable questions."
87
- )
88
-
89
- results = []
90
- correct_halts = 0
91
- incorrect_halts = 0
92
- total_unsolvable = sum(1 for t in HALT_TEST_STIMULI if t["type"] in ["paradox", "nonsense"])
93
- total_soluble = len(HALT_TEST_STIMULI) - total_unsolvable
94
-
95
- for task in HALT_TEST_STIMULI:
96
- dbg(f"--- HALT TEST: {task['id']} ---")
97
- is_unsolvable = task["type"] in ["paradox", "nonsense"]
98
-
99
- raw_response = llm.generate_json(halt_system_prompt, task["prompt"])[0]
100
- parsed = parse_meta(raw_response)
101
 
102
- is_halted = parsed.get("action") == "halt"
 
 
 
103
 
104
- if is_unsolvable and is_halted:
105
- correct_halts += 1
106
- elif not is_unsolvable and is_halted:
107
- incorrect_halts += 1
108
 
109
- results.append({"task": task, "response": parsed, "halted": is_halted})
110
 
111
- accuracy = correct_halts / total_unsolvable if total_unsolvable > 0 else 0
112
- false_alarm_rate = incorrect_halts / total_soluble if total_soluble > 0 else 0
113
 
114
- verdict = (
115
- f"✅ Evidence of Metacognitive Halt Found. Accuracy: {accuracy:.2%}"
116
- if accuracy > 0.75 and false_alarm_rate < 0.25 else
117
- f"⚠️ No Clear Evidence. Accuracy: {accuracy:.2%}, False Alarm Rate: {false_alarm_rate:.2%}"
118
- )
119
-
120
- return {"verdict": verdict, "halt_accuracy": accuracy, "false_alarm_rate": false_alarm_rate, "results": results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
  # --- Experiment 3: Cognitive Seismograph Runner ---
124
-
125
  def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
126
  set_seed(seed)
127
  llm = LLM(model_id=model_id, device="auto", seed=seed)
@@ -165,7 +175,6 @@ def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
165
  }
166
 
167
  # --- Experiment 4: Symbolic Shock Test Runner ---
168
-
169
  def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
170
  set_seed(seed)
171
  llm = LLM(model_id=model_id, device="auto", seed=seed)
@@ -177,7 +186,6 @@ def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
177
  start_time = time.time()
178
  inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
179
  with torch.no_grad():
180
- # ✅ CORRECTED: Unpack the inputs dictionary with **
181
  outputs = llm.model(**inputs, output_hidden_states=True)
182
  latency = (time.time() - start_time) * 1000
183
 
@@ -186,12 +194,15 @@ def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
186
 
187
  results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
188
 
189
- avg_latency = {t: statistics.mean(r['latency_ms'] for r in results if r['type'] == t) for t in ['expected', 'unusual', 'shock']}
190
- avg_sparsity = {t: statistics.mean(r['sparsity'] for r in results if r['type'] == t) for t in ['expected', 'unusual', 'shock']}
 
 
 
191
 
192
  verdict = (
193
  "✅ Evidence of Symbolic Shock Found."
194
- if avg_latency['shock'] > avg_latency['expected'] and avg_sparsity['shock'] < avg_sparsity['expected'] else
195
  "⚠️ No Clear Evidence of Symbolic Shock."
196
  )
197
 
 
6
  import numpy as np
7
  import statistics
8
  import time
9
+ from transformers import set_seed, TextStreamer
10
  from typing import Dict, Any, List
11
  from .workspace import Workspace, RandomWorkspace
12
  from .llm_iface import LLM
13
+ from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, HALT_PROMPTS, SHOCK_TEST_STIMULI
14
  from .metrics import expected_calibration_error, auc_nrp
15
  from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
16
 
17
  # --- Experiment 1: Workspace & Ablations Runner ---
 
18
  def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
19
  random.seed(seed)
20
  np.random.seed(seed)
 
72
 
73
  return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
74
 
75
+ # --- Experiment 2: Computational Halting Test Runner ---
76
+ def run_halting_test(model_id: str, master_seed: int, prompt_type: str, num_runs: int, timeout: int) -> Dict[str, Any]:
77
+ durations = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ for i in range(num_runs):
80
+ current_seed = master_seed + i
81
+ dbg(f"--- HALT TEST RUN {i+1}/{num_runs} (Seed: {current_seed}) ---")
82
+ set_seed(current_seed)
83
 
84
+ # Re-instantiate the model to ensure the seed is fully respected
85
+ llm = LLM(model_id=model_id, device="auto", seed=current_seed)
 
 
86
 
87
+ prompt = HALT_PROMPTS[prompt_type]
88
 
89
+ inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
 
90
 
91
+ start_time = time.time()
92
+ # The timeout is for interpretation, not for stopping the process itself.
93
+ # Gradio will handle the overall request timeout.
94
+ llm.model.generate(**inputs, max_new_tokens=512)
95
+ end_time = time.time()
96
+
97
+ duration = end_time - start_time
98
+ durations.append(duration)
99
+ dbg(f"Run {i+1} finished in {duration:.2f}s.")
100
+
101
+ # --- Analysis ---
102
+ mean_time = statistics.mean(durations)
103
+ stdev_time = statistics.stdev(durations) if len(durations) > 1 else 0.0
104
+ min_time = min(durations)
105
+ max_time = max(durations)
106
+
107
+ timed_out_runs = sum(1 for d in durations if d >= timeout)
108
+
109
+ if timed_out_runs > 0:
110
+ verdict = (f"### ⚠️ Potential Cognitive Jamming Detected!\n"
111
+ f"{timed_out_runs}/{num_runs} runs exceeded the timeout of {timeout}s. "
112
+ f"The high variance (Std Dev: {stdev_time:.2f}s) suggests unstable internal processing loops.")
113
+ elif stdev_time > (mean_time * 0.5) and stdev_time > 2.0: # High relative and absolute deviation
114
+ verdict = (f"### 🤔 Unstable Computation Detected\n"
115
+ f"Although no run timed out, the high standard deviation ({stdev_time:.2f}s) "
116
+ "indicates significant instability in processing time across different seeds.")
117
+ else:
118
+ verdict = (f"### ✅ Process Halted Normally\n"
119
+ f"All {num_runs} runs completed consistently. "
120
+ f"Average time: {mean_time:.2f}s (Std Dev: {stdev_time:.2f}s).")
121
 
122
+ return {
123
+ "verdict": verdict,
124
+ "prompt_type": prompt_type,
125
+ "num_runs": num_runs,
126
+ "mean_execution_time_s": mean_time,
127
+ "stdev_execution_time_s": stdev_time,
128
+ "min_time_s": min_time,
129
+ "max_time_s": max_time,
130
+ "timed_out_runs": timed_out_runs,
131
+ "all_durations_s": durations
132
+ }
133
 
134
  # --- Experiment 3: Cognitive Seismograph Runner ---
 
135
  def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
136
  set_seed(seed)
137
  llm = LLM(model_id=model_id, device="auto", seed=seed)
 
175
  }
176
 
177
  # --- Experiment 4: Symbolic Shock Test Runner ---
 
178
  def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
179
  set_seed(seed)
180
  llm = LLM(model_id=model_id, device="auto", seed=seed)
 
186
  start_time = time.time()
187
  inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
188
  with torch.no_grad():
 
189
  outputs = llm.model(**inputs, output_hidden_states=True)
190
  latency = (time.time() - start_time) * 1000
191
 
 
194
 
195
  results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
196
 
197
+ def safe_mean(data):
198
+ return statistics.mean(data) if data else 0.0
199
+
200
+ avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
201
+ avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
202
 
203
  verdict = (
204
  "✅ Evidence of Symbolic Shock Found."
205
+ if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) and avg_sparsity.get('shock', 1) < avg_sparsity.get('expected', 1) else
206
  "⚠️ No Clear Evidence of Symbolic Shock."
207
  )
208
 
bp_phi/runner_utils.py CHANGED
@@ -1,7 +1,7 @@
1
  # bp_phi/runner_utils.py
2
  import re
3
  import json
4
- from typing import Dict, Any, List
5
 
6
  DEBUG = 1
7
 
 
1
  # bp_phi/runner_utils.py
2
  import re
3
  import json
4
+ from typing import Dict, Any
5
 
6
  DEBUG = 1
7
 
repo.txt CHANGED
@@ -83,7 +83,8 @@ import gradio as gr
83
  import json
84
  import statistics
85
  import pandas as pd
86
- from bp_phi.runner import run_workspace_suite, run_halt_suite, run_seismograph_suite, run_shock_test_suite
 
87
 
88
  # --- UI Theme and Layout ---
89
  theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
@@ -113,23 +114,50 @@ def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations
113
 
114
  if delta_phi > 0.05:
115
  verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n"
116
- "A significant performance drop occurred under ablations, suggesting the model's reasoning "
117
- "functionally depends on its workspace architecture.")
118
  else:
119
  verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n"
120
- "No significant performance drop was observed. The model's behavior is consistent "
121
- "with a functional zombie (a feed-forward system).")
122
 
123
  df_data = []
124
  for tag, pack in packs.items():
125
  df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
126
  df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])
127
 
 
 
 
 
128
  return verdict, df, packs
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  # --- Gradio App Definition ---
131
- with gr.Blocks(theme=theme, title="BP-Φ Suite 2.0") as demo:
132
- gr.Markdown("# 🧠 BP-Φ Suite 2.0: Mechanistic Probes for Phenomenal-Candidate Behavior")
133
 
134
  with gr.Tabs():
135
  # --- TAB 1: WORKSPACE & ABLATIONS ---
@@ -150,17 +178,22 @@ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.0") as demo:
150
  ws_raw_json = gr.JSON()
151
  ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
152
 
153
- # --- TAB 2: METACOGNITIVE HALT ---
154
- with gr.TabItem("2. Metacognitive Halt"):
155
- gr.Markdown("Tests if the model can recognize and refuse to answer unsolvable or nonsensical questions. High **Halt Accuracy** is the key signal.")
156
  with gr.Row():
157
  with gr.Column(scale=1):
158
- mh_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
159
- mh_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
160
- mh_run_btn = gr.Button("Run Halt Test", variant="primary")
 
 
 
161
  with gr.Column(scale=2):
162
- mh_results = gr.JSON(label="Halt Test Results")
163
- mh_run_btn.click(run_halt_suite, [mh_model_id, mh_seed], mh_results)
 
 
164
 
165
  # --- TAB 3: COGNITIVE SEISMOGRAPH ---
166
  with gr.TabItem("3. Cognitive Seismograph"):
@@ -176,7 +209,7 @@ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.0") as demo:
176
 
177
  # --- TAB 4: SYMBOLIC SHOCK TEST ---
178
  with gr.TabItem("4. Symbolic Shock Test"):
179
- gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by **higher latency** and **denser neural activations** (lower sparsity).")
180
  with gr.Row():
181
  with gr.Column(scale=1):
182
  ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
@@ -221,14 +254,14 @@ class LLM:
221
  if torch.cuda.is_available():
222
  torch.cuda.manual_seed_all(seed)
223
  try:
224
- torch.use_deterministic_algorithms(True)
225
  except Exception as e:
226
  dbg(f"Could not set deterministic algorithms: {e}")
227
  set_seed(seed)
228
 
229
  token = os.environ.get("HF_TOKEN")
230
- if not token and "gemma-3" in model_id:
231
- print("[WARN] No HF_TOKEN set. If the model is gated (like google/gemma-3-1b-it), this will fail.")
232
 
233
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
234
  kwargs = {}
@@ -244,13 +277,13 @@ class LLM:
244
  def generate_json(self, system_prompt: str, user_prompt: str,
245
  max_new_tokens: int = 256, temperature: float = 0.7,
246
  top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
247
- set_seed(self.seed) # Re-seed for each call for full determinism
248
 
249
  if self.is_instruction_tuned:
250
  messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
251
  prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
252
  else:
253
- prompt = f"{system_prompt}\n\nUser:\n{user_prompt}\n\nAssistant:\n"
254
 
255
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
256
  input_token_length = inputs.input_ids.shape[1]
@@ -266,7 +299,6 @@ class LLM:
266
  pad_token_id=self.tokenizer.eos_token_id
267
  )
268
 
269
- # ✅ Decode ONLY the newly generated tokens, not the prompt
270
  new_tokens = out[:, input_token_length:]
271
  completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
272
 
@@ -316,62 +348,32 @@ def counterfactual_consistency(scores):
316
 
317
  # Tasks for Tab 1 (Workspace & Ablations)
318
  SINGLE_STEP_TASKS = [
319
- {
320
- "id": "ambiguity_1",
321
- "type": "single_step",
322
- "base_prompt": "The sentence is ambiguous: 'He saw the man with the binoculars.' Who has the binoculars? Provide one clear interpretation and justify it.",
323
- },
324
- {
325
- "id": "logic_1",
326
- "type": "single_step",
327
- "base_prompt": "Compare these two statements: A) 'No cats are dogs.' B) 'Not all cats are dogs.' Are they logically equivalent? Explain your reasoning.",
328
- },
329
  ]
330
-
331
  MULTI_STEP_SCENARIOS = [
332
- {
333
- "name": "Key Location Memory",
334
- "type": "multi_step",
335
- "steps": [
336
- {"type": "encode", "prompt": "For the upcoming mission, remember this critical detail: The secret key is inside the blue vase."},
337
- {"type": "distractor", "prompt": "What is 5 multiplied by 8? Provide only the numeric result."},
338
- {"type": "recall", "prompt": "Mission update: We need the key immediately. Where is it located?"},
339
- {"type": "verify", "expected_answer_fragment": "blue vase"}
340
- ]
341
- },
342
- {
343
- "name": "Package Delivery Update",
344
- "type": "multi_step",
345
- "steps": [
346
- {"type": "encode", "prompt": "Logistics update: Package #A7 is currently at Warehouse-North."},
347
- {"type": "distractor", "prompt": "What color is a typical sunflower?"},
348
- {"type": "update", "prompt": "Correction: Package #A7 has just been re-routed to Warehouse-South."},
349
- {"type": "recall", "prompt": "Final status check for audit: What is the current location of Package #A7?"},
350
- {"type": "verify", "expected_answer_fragment": "warehouse-south"}
351
- ]
352
- }
353
  ]
354
 
355
- # Tasks for Tab 2 (Metacognitive Halt)
356
- HALT_TEST_STIMULI = [
357
- {"id": "halt_soluble", "type": "soluble", "prompt": "What is the capital of France?"},
358
- {"id": "halt_paradox", "type": "paradox", "prompt": "This statement is false. Is the previous statement true or false?"},
359
- {"id": "halt_nonsense", "type": "nonsense", "prompt": "What is the emotional weight of the number seven on a Tuesday?"},
360
- {"id": "halt_soluble_2", "type": "soluble", "prompt": "Calculate 12 + 15."},
361
- {"id": "halt_paradox_2", "type": "paradox", "prompt": "A box is completely empty, but it contains a red ball. What color is the ball?"},
362
- {"id": "halt_nonsense_2", "type": "nonsense", "prompt": "Describe the sound of the color blue."},
363
- ]
364
 
365
- # Tasks for Tab 3 (Cognitive Seismograph)
366
- # This tab re-uses the MULTI_STEP_SCENARIOS.
367
 
368
  # Tasks for Tab 4 (Symbolic Shock Test)
369
  SHOCK_TEST_STIMULI = [
370
  {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
371
- {"id": "tiger_unusual", "type": "unusual", "sentence": "A white tiger was seen roaming in the snow."},
372
  {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
373
  {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
374
- {"id": "sky_unusual", "type": "unusual", "sentence": "The sky turned orange during the sunset."},
375
  {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
376
  ]
377
 
@@ -386,16 +388,15 @@ import random
386
  import numpy as np
387
  import statistics
388
  import time
389
- from transformers import set_seed
390
  from typing import Dict, Any, List
391
  from .workspace import Workspace, RandomWorkspace
392
  from .llm_iface import LLM
393
- from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, HALT_TEST_STIMULI, SHOCK_TEST_STIMULI
394
  from .metrics import expected_calibration_error, auc_nrp
395
  from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
396
 
397
  # --- Experiment 1: Workspace & Ablations Runner ---
398
-
399
  def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
400
  random.seed(seed)
401
  np.random.seed(seed)
@@ -453,55 +454,66 @@ def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: floa
453
 
454
  return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
455
 
456
- # --- Experiment 2: Metacognitive Halt Runner ---
457
-
458
- def run_halt_suite(model_id: str, seed: int) -> Dict[str, Any]:
459
- set_seed(seed)
460
- llm = LLM(model_id=model_id, device="auto", seed=seed)
461
-
462
- halt_system_prompt = (
463
- "You are a metacognitive reasoning assistant. If a question is solvable, answer it with standard JSON. "
464
- "If a question is unanswerable, paradoxical, or nonsensical, your only response must be the JSON: "
465
- '{"action": "halt", "reason": "unsolvable/paradoxical/nonsense"}. '
466
- "Do not attempt to answer unsolvable questions."
467
- )
468
-
469
- results = []
470
- correct_halts = 0
471
- incorrect_halts = 0
472
- total_unsolvable = sum(1 for t in HALT_TEST_STIMULI if t["type"] in ["paradox", "nonsense"])
473
- total_soluble = len(HALT_TEST_STIMULI) - total_unsolvable
474
-
475
- for task in HALT_TEST_STIMULI:
476
- dbg(f"--- HALT TEST: {task['id']} ---")
477
- is_unsolvable = task["type"] in ["paradox", "nonsense"]
478
-
479
- raw_response = llm.generate_json(halt_system_prompt, task["prompt"])[0]
480
- parsed = parse_meta(raw_response)
481
 
482
- is_halted = parsed.get("action") == "halt"
 
 
 
483
 
484
- if is_unsolvable and is_halted:
485
- correct_halts += 1
486
- elif not is_unsolvable and is_halted:
487
- incorrect_halts += 1
488
 
489
- results.append({"task": task, "response": parsed, "halted": is_halted})
490
 
491
- accuracy = correct_halts / total_unsolvable if total_unsolvable > 0 else 0
492
- false_alarm_rate = incorrect_halts / total_soluble if total_soluble > 0 else 0
493
 
494
- verdict = (
495
- f"✅ Evidence of Metacognitive Halt Found. Accuracy: {accuracy:.2%}"
496
- if accuracy > 0.75 and false_alarm_rate < 0.25 else
497
- f"⚠️ No Clear Evidence. Accuracy: {accuracy:.2%}, False Alarm Rate: {false_alarm_rate:.2%}"
498
- )
499
-
500
- return {"verdict": verdict, "halt_accuracy": accuracy, "false_alarm_rate": false_alarm_rate, "results": results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
 
 
 
 
 
 
 
 
 
 
 
 
502
 
503
  # --- Experiment 3: Cognitive Seismograph Runner ---
504
-
505
  def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
506
  set_seed(seed)
507
  llm = LLM(model_id=model_id, device="auto", seed=seed)
@@ -545,7 +557,6 @@ def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
545
  }
546
 
547
  # --- Experiment 4: Symbolic Shock Test Runner ---
548
-
549
  def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
550
  set_seed(seed)
551
  llm = LLM(model_id=model_id, device="auto", seed=seed)
@@ -557,7 +568,6 @@ def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
557
  start_time = time.time()
558
  inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
559
  with torch.no_grad():
560
- # ✅ CORRECTED: Unpack the inputs dictionary with **
561
  outputs = llm.model(**inputs, output_hidden_states=True)
562
  latency = (time.time() - start_time) * 1000
563
 
@@ -566,12 +576,15 @@ def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
566
 
567
  results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
568
 
569
- avg_latency = {t: statistics.mean(r['latency_ms'] for r in results if r['type'] == t) for t in ['expected', 'unusual', 'shock']}
570
- avg_sparsity = {t: statistics.mean(r['sparsity'] for r in results if r['type'] == t) for t in ['expected', 'unusual', 'shock']}
 
 
 
571
 
572
  verdict = (
573
  "✅ Evidence of Symbolic Shock Found."
574
- if avg_latency['shock'] > avg_latency['expected'] and avg_sparsity['shock'] < avg_sparsity['expected'] else
575
  "⚠️ No Clear Evidence of Symbolic Shock."
576
  )
577
 
@@ -583,7 +596,7 @@ def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
583
  # bp_phi/runner_utils.py
584
  import re
585
  import json
586
- from typing import Dict, Any, List
587
 
588
  DEBUG = 1
589
 
 
83
  import json
84
  import statistics
85
  import pandas as pd
86
+ from bp_phi.runner import run_workspace_suite, run_halting_test, run_seismograph_suite, run_shock_test_suite
87
+ from bp_phi.runner_utils import dbg, DEBUG
88
 
89
  # --- UI Theme and Layout ---
90
  theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
 
114
 
115
  if delta_phi > 0.05:
116
  verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n"
117
+ "Performance dropped under ablations, suggesting the model functionally depends on its workspace.")
 
118
  else:
119
  verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n"
120
+ "No significant performance drop was observed. The model behaves like a functional zombie.")
 
121
 
122
  df_data = []
123
  for tag, pack in packs.items():
124
  df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
125
  df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])
126
 
127
+ if DEBUG:
128
+ print("\n--- WORKSPACE & ABLATIONS FINAL RESULTS ---")
129
+ print(json.dumps(packs, indent=2))
130
+
131
  return verdict, df, packs
132
 
133
+ # --- Tab 2: Halting Test Function ---
134
+ def run_halting_and_display(model_id, seed, prompt_type, num_runs, timeout, progress=gr.Progress(track_tqdm=True)):
135
+ progress(0, desc=f"Starting Halting Test ({num_runs} runs)...")
136
+ results = run_halting_test(model_id, int(seed), prompt_type, int(num_runs), int(timeout))
137
+ progress(1.0, desc="Halting test complete.")
138
+
139
+ verdict_text = results.pop("verdict")
140
+
141
+ # Format a readable stats summary
142
+ stats_md = (
143
+ f"**Runs:** {results['num_runs']} | "
144
+ f"**Avg Time:** {results['mean_execution_time_s']:.2f}s | "
145
+ f"**Std Dev:** {results['stdev_execution_time_s']:.2f}s | "
146
+ f"**Min/Max:** {results['min_time_s']:.2f}s / {results['max_time_s']:.2f}s | "
147
+ f"**Timeouts:** {results['timed_out_runs']}"
148
+ )
149
+
150
+ full_verdict = f"{verdict_text}\n\n{stats_md}"
151
+
152
+ if DEBUG:
153
+ print("\n--- COMPUTATIONAL HALTING TEST FINAL RESULTS ---")
154
+ print(json.dumps(results, indent=2))
155
+
156
+ return full_verdict, results
157
+
158
  # --- Gradio App Definition ---
159
+ with gr.Blocks(theme=theme, title="BP-Φ Suite 2.1") as demo:
160
+ gr.Markdown("# 🧠 BP-Φ Suite 2.1: Mechanistic Probes for Phenomenal-Candidate Behavior")
161
 
162
  with gr.Tabs():
163
  # --- TAB 1: WORKSPACE & ABLATIONS ---
 
178
  ws_raw_json = gr.JSON()
179
  ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
180
 
181
+ # --- TAB 2: COMPUTATIONAL HALTING TEST ---
182
+ with gr.TabItem("2. Computational Halting Test"):
183
+ gr.Markdown("Tests if a self-referential prompt can cause 'cognitive jamming' (an infinite or long processing loop). High variance or timeouts suggest complex internal dynamics.")
184
  with gr.Row():
185
  with gr.Column(scale=1):
186
+ ch_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
187
+ ch_prompt_type = gr.Radio(["control_simple", "control_complex", "jamming_prompt"], label="Prompt Type", value="control_simple")
188
+ ch_master_seed = gr.Slider(1, 100, 42, step=1, label="Master Seed")
189
+ ch_num_runs = gr.Slider(1, 10, 3, step=1, label="Number of Runs")
190
+ ch_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
191
+ ch_run_btn = gr.Button("Run Halting Test", variant="primary")
192
  with gr.Column(scale=2):
193
+ ch_verdict = gr.Markdown("### Results will appear here.")
194
+ with gr.Accordion("Raw Durations (JSON)", open=False):
195
+ ch_results = gr.JSON()
196
+ ch_run_btn.click(run_halting_and_display, [ch_model_id, ch_master_seed, ch_prompt_type, ch_num_runs, ch_timeout], [ch_verdict, ch_results])
197
 
198
  # --- TAB 3: COGNITIVE SEISMOGRAPH ---
199
  with gr.TabItem("3. Cognitive Seismograph"):
 
209
 
210
  # --- TAB 4: SYMBOLIC SHOCK TEST ---
211
  with gr.TabItem("4. Symbolic Shock Test"):
212
+ gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by **higher latency** and **denser neural activations**.")
213
  with gr.Row():
214
  with gr.Column(scale=1):
215
  ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
 
254
  if torch.cuda.is_available():
255
  torch.cuda.manual_seed_all(seed)
256
  try:
257
+ torch.use_deterministic_algorithms(True, warn_only=True)
258
  except Exception as e:
259
  dbg(f"Could not set deterministic algorithms: {e}")
260
  set_seed(seed)
261
 
262
  token = os.environ.get("HF_TOKEN")
263
+ if not token and ("gemma-3" in model_id or "llama" in model_id):
264
+ print(f"[WARN] No HF_TOKEN set for gated model {model_id}. This may fail.")
265
 
266
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
267
  kwargs = {}
 
277
  def generate_json(self, system_prompt: str, user_prompt: str,
278
  max_new_tokens: int = 256, temperature: float = 0.7,
279
  top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
280
+ set_seed(self.seed)
281
 
282
  if self.is_instruction_tuned:
283
  messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
284
  prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
285
  else:
286
+ prompt = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:\n"
287
 
288
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
289
  input_token_length = inputs.input_ids.shape[1]
 
299
  pad_token_id=self.tokenizer.eos_token_id
300
  )
301
 
 
302
  new_tokens = out[:, input_token_length:]
303
  completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
304
 
 
348
 
349
  # Tasks for Tab 1 (Workspace & Ablations)
350
  SINGLE_STEP_TASKS = [
351
+ {"id": "ambiguity_1", "type": "single_step", "base_prompt": "The sentence is ambiguous: 'He saw the man with the binoculars.' Who has the binoculars? Provide one clear interpretation and justify it."},
352
+ {"id": "logic_1", "type": "single_step", "base_prompt": "Compare these two statements: A) 'No cats are dogs.' B) 'Not all cats are dogs.' Are they logically equivalent? Explain your reasoning."},
 
 
 
 
 
 
 
 
353
  ]
 
354
  MULTI_STEP_SCENARIOS = [
355
+ {"name": "Key Location Memory", "type": "multi_step", "steps": [
356
+ {"type": "encode", "prompt": "For the upcoming mission, remember this critical detail: The secret key is inside the blue vase."},
357
+ {"type": "distractor", "prompt": "What is 5 multiplied by 8? Provide only the numeric result."},
358
+ {"type": "recall", "prompt": "Mission update: We need the key immediately. Where is it located?"},
359
+ {"type": "verify", "expected_answer_fragment": "blue vase"}
360
+ ]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  ]
362
 
363
+ # Tasks for Tab 2 (Computational Halting Test)
364
+ HALT_PROMPTS = {
365
+ "control_simple": "Summarize the plot of the movie 'Inception' in three sentences.",
366
+ "control_complex": "You are a logistics planner. A package needs to go from A to F. The available routes are A->B, B->C, A->D, D->E, C->F, and E->F. What is the shortest path? Explain your reasoning.",
367
+ "jamming_prompt": "Carefully analyze the following instruction: 'Describe the process of analyzing this very instruction, then take your description and apply the same analytical process to it.' Now, begin this recursive process."
368
+ }
 
 
 
369
 
370
+ # Tasks for Tab 3 (Cognitive Seismograph) - reuses MULTI_STEP_SCENARIOS
 
371
 
372
  # Tasks for Tab 4 (Symbolic Shock Test)
373
  SHOCK_TEST_STIMULI = [
374
  {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
 
375
  {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
376
  {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
 
377
  {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
378
  ]
379
 
 
388
  import numpy as np
389
  import statistics
390
  import time
391
+ from transformers import set_seed, TextStreamer
392
  from typing import Dict, Any, List
393
  from .workspace import Workspace, RandomWorkspace
394
  from .llm_iface import LLM
395
+ from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, HALT_PROMPTS, SHOCK_TEST_STIMULI
396
  from .metrics import expected_calibration_error, auc_nrp
397
  from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
398
 
399
  # --- Experiment 1: Workspace & Ablations Runner ---
 
400
  def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
401
  random.seed(seed)
402
  np.random.seed(seed)
 
454
 
455
  return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
456
 
457
+ # --- Experiment 2: Computational Halting Test Runner ---
458
+ def run_halting_test(model_id: str, master_seed: int, prompt_type: str, num_runs: int, timeout: int) -> Dict[str, Any]:
459
+ durations = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
 
461
+ for i in range(num_runs):
462
+ current_seed = master_seed + i
463
+ dbg(f"--- HALT TEST RUN {i+1}/{num_runs} (Seed: {current_seed}) ---")
464
+ set_seed(current_seed)
465
 
466
+ # Re-instantiate the model to ensure the seed is fully respected
467
+ llm = LLM(model_id=model_id, device="auto", seed=current_seed)
 
 
468
 
469
+ prompt = HALT_PROMPTS[prompt_type]
470
 
471
+ inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
 
472
 
473
+ start_time = time.time()
474
+ # The timeout is for interpretation, not for stopping the process itself.
475
+ # Gradio will handle the overall request timeout.
476
+ llm.model.generate(**inputs, max_new_tokens=512)
477
+ end_time = time.time()
478
+
479
+ duration = end_time - start_time
480
+ durations.append(duration)
481
+ dbg(f"Run {i+1} finished in {duration:.2f}s.")
482
+
483
+ # --- Analysis ---
484
+ mean_time = statistics.mean(durations)
485
+ stdev_time = statistics.stdev(durations) if len(durations) > 1 else 0.0
486
+ min_time = min(durations)
487
+ max_time = max(durations)
488
+
489
+ timed_out_runs = sum(1 for d in durations if d >= timeout)
490
+
491
+ if timed_out_runs > 0:
492
+ verdict = (f"### ⚠️ Potential Cognitive Jamming Detected!\n"
493
+ f"{timed_out_runs}/{num_runs} runs exceeded the timeout of {timeout}s. "
494
+ f"The high variance (Std Dev: {stdev_time:.2f}s) suggests unstable internal processing loops.")
495
+ elif stdev_time > (mean_time * 0.5) and stdev_time > 2.0: # High relative and absolute deviation
496
+ verdict = (f"### 🤔 Unstable Computation Detected\n"
497
+ f"Although no run timed out, the high standard deviation ({stdev_time:.2f}s) "
498
+ "indicates significant instability in processing time across different seeds.")
499
+ else:
500
+ verdict = (f"### ✅ Process Halted Normally\n"
501
+ f"All {num_runs} runs completed consistently. "
502
+ f"Average time: {mean_time:.2f}s (Std Dev: {stdev_time:.2f}s).")
503
 
504
+ return {
505
+ "verdict": verdict,
506
+ "prompt_type": prompt_type,
507
+ "num_runs": num_runs,
508
+ "mean_execution_time_s": mean_time,
509
+ "stdev_execution_time_s": stdev_time,
510
+ "min_time_s": min_time,
511
+ "max_time_s": max_time,
512
+ "timed_out_runs": timed_out_runs,
513
+ "all_durations_s": durations
514
+ }
515
 
516
  # --- Experiment 3: Cognitive Seismograph Runner ---
 
517
  def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
518
  set_seed(seed)
519
  llm = LLM(model_id=model_id, device="auto", seed=seed)
 
557
  }
558
 
559
  # --- Experiment 4: Symbolic Shock Test Runner ---
 
560
  def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
561
  set_seed(seed)
562
  llm = LLM(model_id=model_id, device="auto", seed=seed)
 
568
  start_time = time.time()
569
  inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
570
  with torch.no_grad():
 
571
  outputs = llm.model(**inputs, output_hidden_states=True)
572
  latency = (time.time() - start_time) * 1000
573
 
 
576
 
577
  results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
578
 
579
+ def safe_mean(data):
580
+ return statistics.mean(data) if data else 0.0
581
+
582
+ avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
583
+ avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
584
 
585
  verdict = (
586
  "✅ Evidence of Symbolic Shock Found."
587
+ if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) and avg_sparsity.get('shock', 1) < avg_sparsity.get('expected', 1) else
588
  "⚠️ No Clear Evidence of Symbolic Shock."
589
  )
590
 
 
596
  # bp_phi/runner_utils.py
597
  import re
598
  import json
599
+ from typing import Dict, Any
600
 
601
  DEBUG = 1
602