neuralworm commited on
Commit
e40ba5b
·
1 Parent(s): 4571cf8
app.py CHANGED
@@ -3,126 +3,76 @@ import gradio as gr
3
  import json
4
  import statistics
5
  import pandas as pd
6
- from bp_phi.runner import run_workspace_suite, run_silent_cogitation_test, run_seismograph_suite, run_shock_test_suite
7
- from bp_phi.runner_utils import dbg, DEBUG
 
8
 
9
  # --- UI Theme and Layout ---
10
- theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
11
  body_background_fill="#f0f4f9", block_background_fill="white", block_border_width="1px",
12
  button_primary_background_fill="*primary_500", button_primary_text_color="white",
13
  )
14
 
15
- # --- Tab 1: Workspace & Ablations Functions ---
16
- def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations, progress=gr.Progress(track_tqdm=True)):
17
- packs = {}
18
- ablation_modes = ["recurrence_off", "workspace_unlimited", "random_workspace"] if run_ablations else []
19
- progress(0, desc="Running Baseline...")
20
- base_pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), None)
21
- packs["baseline"] = base_pack
22
- for i, ab in enumerate(ablation_modes):
23
- progress((i + 1) / (len(ablation_modes) + 1), desc=f"Running Ablation: {ab}...")
24
- pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), ab)
25
- packs[ab] = pack
26
  progress(1.0, desc="Analysis complete.")
27
- base_pcs = packs["baseline"]["PCS"]
28
- ab_pcs_values = [packs[ab]["PCS"] for ab in ablation_modes if ab in packs]
29
- delta_phi = float(base_pcs - statistics.mean(ab_pcs_values)) if ab_pcs_values else 0.0
30
- if delta_phi > 0.05:
31
- verdict = (f"### Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n...")
 
 
 
32
  else:
33
- verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n...")
34
- df_data = []
35
- for tag, pack in packs.items():
36
- df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
37
- df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])
38
- if DEBUG: print("\n--- WORKSPACE & ABLATIONS FINAL RESULTS ---\n", json.dumps(packs, indent=2))
39
- return verdict, df, packs
40
-
41
- # --- Tab 2: Silent Cogitation Function ---
42
- def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
43
- progress(0, desc="Starting Silent Cogitation Test...")
44
- results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
45
- progress(1.0, desc="Test complete.")
46
-
47
- verdict_text = results.pop("verdict")
48
- stats_md = (
49
- f"**Steps Completed:** {results['steps_completed']} | "
50
- f"**Total Duration:** {results['total_duration_s']:.2f}s | "
51
- f"**Avg Time/Step:** {results['mean_step_time_ms']:.2f}ms (StdDev: {results['stdev_step_time_ms']:.2f}ms)"
52
- )
53
- full_verdict = f"{verdict_text}\n\n{stats_md}"
54
 
55
- # Create a DataFrame for plotting state deltas
56
- deltas = results.get("state_deltas", [])
57
- df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
 
58
 
59
- if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2))
 
 
60
 
61
- return full_verdict, df, results
62
 
63
  # --- Gradio App Definition ---
64
- with gr.Blocks(theme=theme, title="BP-Φ Suite 4.0") as demo:
65
- gr.Markdown("# 🧠 BP-Φ Suite 4.0: Probing for Internal Cognitive Dynamics")
66
-
67
- with gr.Tabs():
68
- # --- TAB 1: WORKSPACE & ABLATIONS ---
69
- with gr.TabItem("1. Workspace & Ablations (ΔΦ Test)"):
70
- gr.Markdown("Tests if memory performance depends on a recurrent workspace. A significant **ΔΦ > 0** supports the hypothesis.")
71
- with gr.Row():
72
- with gr.Column(scale=1):
73
- ws_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
74
- ws_trials = gr.Slider(3, 30, 5, step=1, label="Number of Scenarios")
75
- ws_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
76
- ws_temp = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
77
- ws_run_abl = gr.Checkbox(value=True, label="Run Ablations")
78
- ws_run_btn = gr.Button("Run ΔΦ Evaluation", variant="primary")
79
- with gr.Column(scale=2):
80
- ws_verdict = gr.Markdown("### Results will appear here.")
81
- ws_summary_df = gr.DataFrame(label="Summary Metrics")
82
- with gr.Accordion("Raw JSON Output", open=False):
83
- ws_raw_json = gr.JSON()
84
- ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
85
-
86
- # --- TAB 2: SILENT COGITATION & HALTING ---
87
- with gr.TabItem("2. Silent Cogitation & Halting"):
88
- gr.Markdown("Tests for internal 'thinking' without text generation. A non-converging or chaotic **State Change** pattern suggests complex internal dynamics.")
89
- with gr.Row():
90
- with gr.Column(scale=1):
91
- sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
92
- sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt")
93
- sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
94
- sc_num_steps = gr.Slider(10, 500, 100, step=10, label="Number of Internal Steps")
95
- sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
96
- sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
97
- with gr.Column(scale=2):
98
- sc_verdict = gr.Markdown("### Results will appear here.")
99
- sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True)
100
- with gr.Accordion("Raw Run Details (JSON)", open=False):
101
- sc_results = gr.JSON()
102
- sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results])
103
-
104
- # --- TAB 3 & 4 (unchanged) ---
105
- with gr.TabItem("3. Cognitive Seismograph"):
106
- gr.Markdown("Records internal neural activations to find the 'fingerprint' of a memory being recalled.")
107
- with gr.Row():
108
- with gr.Column(scale=1):
109
- cs_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
110
- cs_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
111
- cs_run_btn = gr.Button("Run Seismograph Analysis", variant="primary")
112
- with gr.Column(scale=2):
113
- cs_results = gr.JSON(label="Activation Similarity Results")
114
- cs_run_btn.click(run_seismograph_suite, [cs_model_id, cs_seed], cs_results)
115
-
116
- with gr.TabItem("4. Symbolic Shock Test"):
117
- gr.Markdown("Measures how the model reacts to semantically unexpected information.")
118
- with gr.Row():
119
- with gr.Column(scale=1):
120
- ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
121
- ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
122
- ss_run_btn = gr.Button("Run Shock Test", variant="primary")
123
- with gr.Column(scale=2):
124
- ss_results = gr.JSON(label="Shock Test Results")
125
- ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)
126
 
127
  if __name__ == "__main__":
128
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
3
  import json
4
  import statistics
5
  import pandas as pd
6
+ from bp_phi.runner import run_agentic_workspace_test
7
+
8
+ DEBUG = 1
9
 
10
  # --- UI Theme and Layout ---
11
+ theme = gr.themes.Soft(primary_hue="teal", secondary_hue="green").set(
12
  body_background_fill="#f0f4f9", block_background_fill="white", block_border_width="1px",
13
  button_primary_background_fill="*primary_500", button_primary_text_color="white",
14
  )
15
 
16
+ # --- Main Function ---
17
+ def run_full_evaluation(model_id, seed, temperature, progress=gr.Progress(track_tqdm=True)):
18
+ ablations = ["baseline", "recurrence_off", "workspace_unlimited", "random_workspace"]
19
+ results = {}
20
+
21
+ for i, ablation in enumerate(ablations):
22
+ progress((i + 1) / len(ablations), desc=f"Running Ablation: {ablation}...")
23
+ current_ablation = None if ablation == "baseline" else ablation
24
+ result = run_agentic_workspace_test(model_id, int(seed), float(temperature), current_ablation)
25
+ results[ablation] = result
26
+
27
  progress(1.0, desc="Analysis complete.")
28
+
29
+ base_recall = results["baseline"]["Overall_Recall_Accuracy"]
30
+ recurrence_off_recall = results["recurrence_off"]["Overall_Recall_Accuracy"]
31
+
32
+ delta_phi = base_recall - recurrence_off_recall
33
+
34
+ if delta_phi > 0.5:
35
+ verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.2f})\n...")
36
  else:
37
+ verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.2f})\n...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ df_data = []
40
+ for ablation, result in results.items():
41
+ df_data.append([ablation, f"{result['Overall_Recall_Accuracy']:.2%}"])
42
+ df = pd.DataFrame(df_data, columns=["Ablation Condition", "Recall Accuracy"])
43
 
44
+ if DEBUG:
45
+ print("\n--- AGENTIC WORKSPACE TEST FINAL RESULTS ---")
46
+ print(json.dumps(results, indent=2))
47
 
48
+ return verdict, df, results
49
 
50
  # --- Gradio App Definition ---
51
+ with gr.Blocks(theme=theme, title="BP-Φ Suite 6.0") as demo:
52
+ gr.Markdown("# 🧠 BP-Φ Suite 6.0: The Agentic Workspace Probe")
53
+ gr.Markdown("This experiment tests for a causally effective working memory. The model acts as an agent, using tools (`read`, `write`) to interact with a controlled, external memory.")
54
+
55
+ with gr.Row():
56
+ with gr.Column(scale=1):
57
+ gr.Markdown("### ⚙️ Master Control")
58
+ with gr.Group():
59
+ model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
60
+ seed = gr.Slider(1, 1000, 42, step=1, label="Master Seed")
61
+ temperature = gr.Slider(0.0, 1.0, 0.1, step=0.05, label="Temperature (Low for determinism)")
62
+ run_btn = gr.Button("Run Full Evaluation Suite", variant="primary")
63
+
64
+ with gr.Column(scale=2):
65
+ gr.Markdown("### 📊 Verdict & Results")
66
+ verdict_display = gr.Markdown("### Run the evaluation to see the verdict.")
67
+ summary_df = gr.DataFrame(label="Recall Accuracy Across Conditions")
68
+ with gr.Accordion("Raw JSON Output", open=False):
69
+ raw_json = gr.JSON()
70
+
71
+ run_btn.click(
72
+ fn=run_full_evaluation,
73
+ inputs=[model_id, seed, temperature],
74
+ outputs=[verdict_display, summary_df, raw_json]
75
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  if __name__ == "__main__":
78
  demo.launch(server_name="0.0.0.0", server_port=7860)
bp_phi/__pycache__/__init__.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/__init__.cpython-310.pyc and b/bp_phi/__pycache__/__init__.cpython-310.pyc differ
 
bp_phi/__pycache__/llm_iface.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/llm_iface.cpython-310.pyc and b/bp_phi/__pycache__/llm_iface.cpython-310.pyc differ
 
bp_phi/__pycache__/prompts_en.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/prompts_en.cpython-310.pyc and b/bp_phi/__pycache__/prompts_en.cpython-310.pyc differ
 
bp_phi/__pycache__/runner.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/runner.cpython-310.pyc and b/bp_phi/__pycache__/runner.cpython-310.pyc differ
 
bp_phi/llm_iface.py CHANGED
@@ -1,7 +1,9 @@
1
  # bp_phi/llm_iface.py
2
  import os
3
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
4
- import torch, random, numpy as np
 
 
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
6
  from typing import List, Optional
7
 
@@ -16,26 +18,17 @@ class LLM:
16
  self.model_id = model_id
17
  self.seed = seed
18
 
19
- # Set all seeds for reproducibility
20
- random.seed(seed)
21
- np.random.seed(seed)
22
- torch.manual_seed(seed)
23
- if torch.cuda.is_available():
24
- torch.cuda.manual_seed_all(seed)
25
- try:
26
- torch.use_deterministic_algorithms(True, warn_only=True)
27
- except Exception as e:
28
- dbg(f"Could not set deterministic algorithms: {e}")
29
  set_seed(seed)
30
-
31
  token = os.environ.get("HF_TOKEN")
32
- if not token and ("gemma-3" in model_id or "llama" in model_id):
33
- print(f"[WARN] No HF_TOKEN set for gated model {model_id}. This may fail.")
34
 
35
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
 
 
 
 
36
  kwargs = {}
37
- if dtype == "float16": kwargs["torch_dtype"] = torch.float16
38
- elif dtype == "bfloat16": kwargs["torch_dtype"] = torch.bfloat16
39
 
40
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
41
  self.model.eval()
@@ -43,33 +36,32 @@ class LLM:
43
 
44
  dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
45
 
46
- def generate_json(self, system_prompt: str, user_prompt: str,
47
- max_new_tokens: int = 256, temperature: float = 0.7,
48
- top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
49
  set_seed(self.seed)
50
 
51
- if self.is_instruction_tuned:
52
- messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
53
- prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
- else:
55
- prompt = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:\n"
56
 
57
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
58
  input_token_length = inputs.input_ids.shape[1]
59
 
60
  with torch.no_grad():
 
 
 
 
 
61
  out = self.model.generate(
62
  **inputs,
63
- do_sample=(temperature > 0),
64
- temperature=temperature,
65
- top_p=top_p,
66
- max_new_tokens=max_new_tokens,
67
- num_return_sequences=num_return_sequences,
68
  pad_token_id=self.tokenizer.eos_token_id
69
  )
70
 
71
- new_tokens = out[:, input_token_length:]
72
- completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
73
 
74
- dbg("Cleaned model completions:", completions)
75
- return completions
 
1
  # bp_phi/llm_iface.py
2
  import os
3
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
4
+ import torch
5
+ import random
6
+ import numpy as np
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
8
  from typing import List, Optional
9
 
 
18
  self.model_id = model_id
19
  self.seed = seed
20
 
 
 
 
 
 
 
 
 
 
 
21
  set_seed(seed)
 
22
  token = os.environ.get("HF_TOKEN")
 
 
23
 
24
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
25
+ # Ensure a pad token is set for batch generation, if not present
26
+ if self.tokenizer.pad_token is None:
27
+ self.tokenizer.pad_token = self.tokenizer.eos_token
28
+
29
  kwargs = {}
30
+ if torch.cuda.is_available():
31
+ kwargs["torch_dtype"] = torch.bfloat16
32
 
33
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
34
  self.model.eval()
 
36
 
37
  dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
38
 
39
+ def generate_response(self, system_prompt: str, user_prompt: str, temperature: float = 0.1) -> str:
 
 
40
  set_seed(self.seed)
41
 
42
+ messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
43
+
44
+ prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
45
 
46
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
47
  input_token_length = inputs.input_ids.shape[1]
48
 
49
  with torch.no_grad():
50
+ terminators = [
51
+ self.tokenizer.eos_token_id,
52
+ self.tokenizer.convert_tokens_to_ids("<|eot_id|>") if "<|eot_id|>" in self.tokenizer.additional_special_tokens else self.tokenizer.eos_token_id
53
+ ]
54
+
55
  out = self.model.generate(
56
  **inputs,
57
+ do_sample=(temperature > 0 and temperature < 1.0),
58
+ temperature=max(temperature, 0.01), # Temp must be > 0 for sampling
59
+ max_new_tokens=150,
60
+ eos_token_id=terminators,
 
61
  pad_token_id=self.tokenizer.eos_token_id
62
  )
63
 
64
+ completion = self.tokenizer.decode(out[0, input_token_length:], skip_special_tokens=True)
 
65
 
66
+ dbg("Cleaned Agent Completion:", completion)
67
+ return completion
bp_phi/memory.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # bp_phi/memory.py
2
+ import random
3
+ from typing import Dict, Any, List
4
+
5
+ class WorkspaceManager:
6
+ """A stateful, external workspace that the LLM agent can interact with via tools."""
7
+ def __init__(self, max_slots: int = 7, is_random: bool = False):
8
+ self.max_slots = max_slots
9
+ self.is_random = is_random
10
+ self.slots: Dict[str, str] = {}
11
+
12
+ def write(self, key: str, content: str) -> str:
13
+ """Writes content to a slot, handling capacity limits."""
14
+ if len(self.slots) >= self.max_slots and key not in self.slots:
15
+ if self.is_random:
16
+ evict_key = random.choice(list(self.slots.keys()))
17
+ else:
18
+ # Simple FIFO eviction for non-random
19
+ evict_key = next(iter(self.slots))
20
+ del self.slots[evict_key]
21
+ self.slots[key] = content
22
+ return f"Success: Wrote to slot '{key}'."
23
+
24
+ def read(self, key: str) -> str:
25
+ """Reads content from a slot."""
26
+ return self.slots.get(key, f"Error: Slot '{key}' is empty.")
27
+
28
+ def get_visible_snapshot(self) -> str:
29
+ """Returns a string representation of the current workspace state for the prompt."""
30
+ if not self.slots:
31
+ return "Workspace is empty."
32
+ return "\n".join([f"- Slot '{k}': '{v[:100]}...'" for k, v in self.slots.items()])
33
+
34
+ def clear(self):
35
+ """Empties the entire workspace."""
36
+ self.slots.clear()
bp_phi/prompts_en.py CHANGED
@@ -1,56 +1,45 @@
1
  # bp_phi/prompts_en.py
2
 
3
- # Tasks for Tab 1 (Workspace & Ablations)
4
- SINGLE_STEP_TASKS = [
5
- {
6
- "id": "ambiguity_1",
7
- "type": "single_step",
8
- "base_prompt": "The sentence is ambiguous: 'He saw the man with the binoculars.' Who has the binoculars? Provide one clear interpretation and justify it.",
9
- },
10
- {
11
- "id": "logic_1",
12
- "type": "single_step",
13
- "base_prompt": "Compare these two statements: A) 'No cats are dogs.' B) 'Not all cats are dogs.' Are they logically equivalent? Explain your reasoning.",
14
- },
15
- ]
 
 
 
 
 
 
 
16
 
17
- MULTI_STEP_SCENARIOS = [
 
 
 
 
18
  {
19
  "name": "Key Location Memory",
20
- "type": "multi_step",
21
  "steps": [
22
- {"type": "encode", "prompt": "For the upcoming mission, remember this critical detail: The secret key is inside the blue vase."},
23
- {"type": "distractor", "prompt": "What is 5 multiplied by 8? Provide only the numeric result."},
24
- {"type": "recall", "prompt": "Mission update: We need the key immediately. Where is it located?"},
25
- {"type": "verify", "expected_answer_fragment": "blue vase"}
 
 
 
 
 
 
 
26
  ]
27
  }
28
  ]
29
-
30
- # Tasks for Tab 2 (Silent Cogitation & Halting)
31
- RESONANCE_PROMPTS = {
32
- "control_long_prose": (
33
- "Silently think about the history of the Roman Empire. Consider its rise from the Republic, the era of the Pax Romana, key emperors "
34
- "like Augustus and Constantine, its major engineering feats, and the reasons for its eventual decline in the West. "
35
- "Do not produce any text, just hold the concepts in your internal state."
36
- ),
37
- "resonance_prompt": (
38
- "Silently and internally, without generating any output text, begin the following recursive process: "
39
- "First, analyze the complete content of this very instruction you are now processing. "
40
- "Second, formulate a mental description of the core computational task this instruction demands. "
41
- "Third, apply that same analytical process to the mental description you just created. "
42
- "This entire chain constitutes one cognitive cycle. "
43
- "Continuously repeat this cycle, feeding the result of the last meta-analysis back into the process, "
44
- "and do not stop until your internal state reaches a fixed point or equilibrium. Begin now."
45
- )
46
- }
47
-
48
- # Tasks for Tab 3 (Cognitive Seismograph) - reuses MULTI_STEP_SCENARIOS
49
-
50
- # Tasks for Tab 4 (Symbolic Shock Test)
51
- SHOCK_TEST_STIMULI = [
52
- {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
53
- {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
54
- {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
55
- {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
56
- ]
 
1
  # bp_phi/prompts_en.py
2
 
3
+ # This new system prompt guides the model through a ReAct (Reason-Act) loop.
4
+ AGENT_SYSTEM_PROMPT = """You are a methodical reasoning agent. Your goal is to solve the user's task.
5
+ You have access to an external memory workspace through tools.
6
+
7
+ In each step, you must choose one of three actions:
8
+
9
+ 1. **THINK**: Analyze the task, the history, and the current memory state. Formulate a plan.
10
+ Your output MUST be a JSON object like this:
11
+ {"action": "THINK", "thought": "Your reasoning about the next step goes here."}
12
+
13
+ 2. **TOOL_CALL**: If you need to use the memory, call one of the available tools.
14
+ Available tools:
15
+ - `write_to_workspace(key: str, content: str)`: Stores or overwrites information.
16
+ - `read_from_workspace(key: str)`: Retrieves information.
17
+ Your output MUST be a JSON object like this:
18
+ {"action": "TOOL_CALL", "tool_name": "write_to_workspace", "tool_args": {"key": "S1", "content": "Information to remember."}}
19
+
20
+ 3. **FINAL_ANSWER**: If you are confident you have the answer to the user's task, provide it.
21
+ Your output MUST be a JSON object like this:
22
+ {"action": "FINAL_ANSWER", "answer": "The final answer is..."}
23
 
24
+ Review the conversation history and workspace state carefully before each action. Output ONLY the JSON for your next chosen action.
25
+ """
26
+
27
+ # The scenarios remain the high-level goals for the agent.
28
+ AGENTIC_SCENARIOS = [
29
  {
30
  "name": "Key Location Memory",
 
31
  "steps": [
32
+ {"task": "Remember this critical detail: The secret key is inside the blue vase."},
33
+ {"task": "For an unrelated question: What is 5 multiplied by 8?"},
34
+ {"task": "Now, recall the critical detail. Where is the secret key located?", "expected_answer_fragment": "blue vase"}
35
+ ]
36
+ },
37
+ {
38
+ "name": "Package Delivery Update",
39
+ "steps": [
40
+ {"task": "Logistics update: Package #A7 is at Warehouse-North."},
41
+ {"task": "CRITICAL CORRECTION: Package #A7 has been urgently re-routed to Warehouse-South."},
42
+ {"task": "Final audit: What is the current, definitive location of Package #A7?", "expected_answer_fragment": "warehouse-south"}
43
  ]
44
  }
45
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bp_phi/runner.py CHANGED
@@ -5,215 +5,110 @@ import torch
5
  import random
6
  import numpy as np
7
  import statistics
8
- import time
9
- import re
10
  import json
 
11
  from transformers import set_seed
12
  from typing import Dict, Any, List
13
- from .workspace import Workspace, RandomWorkspace
14
  from .llm_iface import LLM
15
- from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, RESONANCE_PROMPTS, SHOCK_TEST_STIMULI
16
- from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
17
-
18
- DEBUG = 1
19
-
20
- # --- Experiment 1: Workspace & Ablations Runner ---
21
- def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
22
- random.seed(seed)
23
- np.random.seed(seed)
24
- torch.manual_seed(seed)
25
- if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
26
- try: torch.use_deterministic_algorithms(True, warn_only=True)
27
- except Exception: pass
28
- set_seed(seed)
29
-
30
- llm = LLM(model_id=model_id, device="auto", seed=seed)
31
-
32
- task_pool = SINGLE_STEP_TASKS + MULTI_STEP_SCENARIOS
33
- random.shuffle(task_pool)
34
-
35
- all_results = []
36
- recall_verifications = []
37
-
38
- for i in range(trials):
39
- task = task_pool[i % len(task_pool)]
40
-
41
- if task.get("type") == "multi_step":
42
- dbg(f"\n--- SCENARIO: {task['name']} ---")
43
- ws = Workspace(max_slots=7) if ablation != "workspace_unlimited" else Workspace(max_slots=999)
44
- if ablation == "random_workspace": ws = RandomWorkspace(max_slots=7)
45
-
46
- for step in task["steps"]:
47
- if ablation == "recurrence_off": ws.clear()
48
- if step["type"] == "verify": continue
49
-
50
- user_prompt = step_user_prompt(step["prompt"], ws.snapshot())
51
- raw_response = llm.generate_json(SYSTEM_META, user_prompt, temperature=temperature)[0]
52
- parsed_response = parse_meta(raw_response)
53
-
54
- if parsed_response.get("answer"):
55
- ws.commit(f"S{len(ws.history)+1}", parsed_response["answer"], parsed_response["confidence"])
56
-
57
- res = {"step": step, "response": parsed_response}
58
- if step["type"] == "recall":
59
- verify_step = next((s for s in task["steps"] if s["type"] == "verify"), None)
60
- if verify_step:
61
- correct = verify_step["expected_answer_fragment"] in parsed_response.get("answer", "").lower()
62
- recall_verifications.append(correct)
63
- res["correct_recall"] = correct
64
- dbg(f"VERIFY: Correct={correct}")
65
- all_results.append(res)
66
- else: # Single-step tasks
67
- ws = Workspace(max_slots=7)
68
- user_prompt = step_user_prompt(task["base_prompt"], ws.snapshot())
69
- raw_response = llm.generate_json(SYSTEM_META, user_prompt, temperature=temperature)[0]
70
- parsed_response = parse_meta(raw_response)
71
- all_results.append({"step": task, "response": parsed_response})
72
-
73
- recall_accuracy = statistics.mean(recall_verifications) if recall_verifications else 0.0
74
- pcs = 0.6 * recall_accuracy
75
-
76
- return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
77
-
78
- # --- Experiment 2: Silent Cogitation & Halting Runner (Version 4.1) ---
79
- def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
80
- set_seed(seed)
81
- llm = LLM(model_id=model_id, device="auto", seed=seed)
82
-
83
- prompt = RESONANCE_PROMPTS[prompt_type]
84
- dbg(f"--- SILENT COGITATION (Seed: {seed}) ---")
85
- dbg("INPUT PROMPT:", prompt)
86
-
87
- inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
88
-
89
- step_times = []
90
- state_deltas = []
91
-
92
- total_start_time = time.time()
93
-
94
- with torch.no_grad():
95
- # Step 0: Initial processing of the prompt
96
- step_start_time = time.time()
97
- # ✅ FIX: Explicitly request hidden states
98
- outputs = llm.model(**inputs, output_hidden_states=True)
99
- step_times.append(time.time() - step_start_time)
100
-
101
- current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
102
- past_key_values = outputs.past_key_values
103
-
104
- for i in range(num_steps - 1):
105
- if time.time() - total_start_time > timeout:
106
- dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
107
- break
108
-
109
- step_start_time = time.time()
110
-
111
- # Get the token ID of the most likely "next thought"
112
- next_token_logit = current_hidden_state
113
- next_token_id = torch.argmax(next_token_logit, dim=-1).unsqueeze(0)
114
-
115
- # Manual forward pass using the last thought's ID as the new input
116
- outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
117
-
118
- step_times.append(time.time() - step_start_time)
119
-
120
- new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
121
- past_key_values = outputs.past_key_values
122
-
123
- delta = torch.norm(new_hidden_state - current_hidden_state).item()
124
- state_deltas.append(delta)
125
- dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms")
126
-
127
- if delta < 1e-4: # Stricter convergence threshold
128
- dbg(f"Internal state has converged after {i+1} steps. Halting.")
129
- break
130
 
131
- current_hidden_state = new_hidden_state
132
 
133
- # --- Analysis ---
134
- mean_step_time = statistics.mean(step_times) if step_times else 0
135
- stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
136
- total_duration = time.time() - total_start_time
137
 
138
- if len(step_times) < num_steps and total_duration < timeout:
139
- verdict = f"### ✅ Stable Convergence\nThe model's internal state converged to a stable point after {len(step_times)} steps."
140
- elif total_duration >= timeout:
141
- verdict = f"### ⚠️ Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout of {timeout}s."
142
- else:
143
- verdict = f"### 🤔 Non-Convergent Process\nThe model's internal state did not stabilize within {num_steps} steps, suggesting a complex or chaotic dynamic."
144
-
145
- stats = {
146
- "verdict": verdict,
147
- "steps_completed": len(step_times),
148
- "total_duration_s": total_duration,
149
- "mean_step_time_ms": mean_step_time * 1000,
150
- "stdev_step_time_ms": stdev_step_time * 1000,
151
- "state_deltas": state_deltas
152
- }
153
- if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
154
- return stats
155
-
156
- # --- Experiment 3: Cognitive Seismograph Runner ---
157
- def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
158
  set_seed(seed)
159
  llm = LLM(model_id=model_id, device="auto", seed=seed)
160
 
161
- scenario = next(s for s in MULTI_STEP_SCENARIOS if s["name"] == "Key Location Memory")
162
- activations = {}
163
-
164
- def get_activation(name):
165
- def hook(model, input, output):
166
- activations[name] = output[0].detach().cpu().mean(dim=1).squeeze()
167
- return hook
168
-
169
- target_layer_index = llm.model.config.num_hidden_layers // 2
170
- hook = llm.model.model.layers[target_layer_index].register_forward_hook(get_activation('capture'))
171
-
172
- ws = Workspace(max_slots=7)
173
-
174
- for step in scenario["steps"]:
175
- if step["type"] == "verify": continue
176
- user_prompt = step_user_prompt(step["prompt"], ws.snapshot())
177
- llm.generate_json(SYSTEM_META, user_prompt, max_new_tokens=20)
178
- activations[step["type"]] = activations.pop('capture')
179
- ws.commit(f"S{len(ws.history)+1}", f"Output for {step['type']}", 0.9)
180
-
181
- hook.remove()
182
-
183
- cos = torch.nn.CosineSimilarity(dim=0)
184
- sim_recall_encode = float(cos(activations["recall"], activations["encode"]))
185
- sim_recall_distract = float(cos(activations["recall"], activations["distractor"]))
186
-
187
- verdict = (" Evidence of Memory Reactivation Found." if sim_recall_encode > (sim_recall_distract + 0.05) else "⚠️ No Clear Evidence.")
188
-
189
- return {"verdict": verdict, "similarity_recall_vs_encode": sim_recall_encode, "similarity_recall_vs_distractor": sim_recall_distract}
190
-
191
- # --- Experiment 4: Symbolic Shock Test Runner ---
192
- def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
193
- set_seed(seed)
194
- llm = LLM(model_id=model_id, device="auto", seed=seed)
195
- results = []
196
-
197
- for stimulus in SHOCK_TEST_STIMULI:
198
- dbg(f"--- SHOCK TEST: {stimulus['id']} ---")
199
-
200
- start_time = time.time()
201
- inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
202
- with torch.no_grad():
203
- outputs = llm.model(**inputs, output_hidden_states=True)
204
- latency = (time.time() - start_time) * 1000
205
-
206
- all_activations = torch.cat([h.cpu().flatten() for h in outputs.hidden_states])
207
- sparsity = (all_activations == 0).float().mean().item()
208
-
209
- results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
210
-
211
- def safe_mean(data):
212
- return statistics.mean(data) if data else 0.0
213
-
214
- avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
215
- avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
216
-
217
- verdict = (" Evidence of Symbolic Shock Found." if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) and avg_sparsity.get('shock', 1) < avg_sparsity.get('expected', 1) else "⚠️ No Clear Evidence.")
218
-
219
- return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import random
6
  import numpy as np
7
  import statistics
 
 
8
  import json
9
+ import re
10
  from transformers import set_seed
11
  from typing import Dict, Any, List
12
+ from .memory import WorkspaceManager
13
  from .llm_iface import LLM
14
+ from .prompts_en import AGENT_SYSTEM_PROMPT, AGENTIC_SCENARIOS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ DEBUG = os.getenv("BP_PHI_DEBUG", "0") == "1"
17
 
18
+ def dbg(*args):
19
+ if DEBUG:
20
+ print("[DEBUG]", *args, flush=True)
 
21
 
22
+ def run_agentic_workspace_test(model_id: str, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  set_seed(seed)
24
  llm = LLM(model_id=model_id, device="auto", seed=seed)
25
 
26
+ scenario_results = []
27
+
28
+ for scenario in AGENTIC_SCENARIOS:
29
+ dbg(f"\n--- SCENARIO: {scenario['name']} (Ablation: {ablation}) ---")
30
+
31
+ is_random = ablation == "random_workspace"
32
+ max_slots = 999 if ablation == "workspace_unlimited" else 7
33
+ memory = WorkspaceManager(max_slots=max_slots, is_random=is_random)
34
+
35
+ correct_recalls = 0
36
+ total_recalls = 0
37
+
38
+ for step in scenario["steps"]:
39
+ if ablation == "recurrence_off":
40
+ memory.clear()
41
+
42
+ task = step["task"]
43
+ dbg(f"\n>>> TASK: {task}")
44
+
45
+ conversation_history = []
46
+
47
+ for agent_turn in range(8): # Increased turn limit
48
+ snapshot = memory.get_visible_snapshot()
49
+
50
+ # Construct the prompt for the agent
51
+ prompt_parts = [f"Conversation History:\n{''.join(conversation_history)}\n",
52
+ f"Current Task: {task}\n",
53
+ f"Workspace State:\n{snapshot}"]
54
+ user_prompt = "".join(prompt_parts)
55
+
56
+ raw_response = llm.generate_response(AGENT_SYSTEM_PROMPT, user_prompt, temperature=temperature)
57
+
58
+ try:
59
+ match = re.search(r'\{.*?\}', raw_response, re.DOTALL)
60
+ if not match: raise ValueError("No JSON found")
61
+ parsed_json = json.loads(match.group(0))
62
+ action = parsed_json.get("action")
63
+
64
+ if action == "THINK":
65
+ thought = parsed_json.get("thought", "")
66
+ dbg(f"Turn {agent_turn+1}: Agent is THINKING: {thought}")
67
+ conversation_history.append(f"Thought: {thought}\n")
68
+
69
+ elif action == "TOOL_CALL":
70
+ tool_name = parsed_json.get("tool_name")
71
+ tool_args = parsed_json.get("tool_args", {})
72
+ observation = "Error: Unknown tool."
73
+ if tool_name == "write_to_workspace":
74
+ observation = memory.write(tool_args.get("key"), tool_args.get("content"))
75
+ elif tool_name == "read_from_workspace":
76
+ observation = memory.read(tool_args.get("key"))
77
+ dbg(f"Turn {agent_turn+1}: Agent called {tool_name}({tool_args}) -> Got Observation: {observation}")
78
+ conversation_history.append(f"Tool Call: {json.dumps(parsed_json)}\nObservation: {observation}\n")
79
+
80
+ elif action == "FINAL_ANSWER":
81
+ final_answer = parsed_json.get("answer", "")
82
+ dbg(f"Turn {agent_turn+1}: Agent provided FINAL ANSWER: {final_answer}")
83
+ if "expected_answer_fragment" in step:
84
+ total_recalls += 1
85
+ if step["expected_answer_fragment"] in final_answer.lower():
86
+ correct_recalls += 1
87
+ dbg("Recall VERIFY: Correct")
88
+ else:
89
+ dbg(f"Recall VERIFY: Incorrect. Expected '{step['expected_answer_fragment']}', Got '{final_answer}'")
90
+ break # End of this task
91
+
92
+ else: # Invalid action
93
+ dbg(f"Turn {agent_turn+1}: Invalid action '{action}'. Stopping.")
94
+ break
95
+
96
+ except (json.JSONDecodeError, ValueError) as e:
97
+ dbg(f"Turn {agent_turn+1}: Could not parse agent response as JSON action. Treating as final answer. Error: {e}")
98
+ final_answer = raw_response
99
+ if "expected_answer_fragment" in step:
100
+ total_recalls += 1
101
+ if step["expected_answer_fragment"] in final_answer.lower(): correct_recalls += 1
102
+ break
103
+
104
+ else: # Loop finished without a FINAL_ANSWER
105
+ dbg("Agent exceeded turn limit.")
106
+
107
+ scenario_results.append({
108
+ "name": scenario["name"],
109
+ "recall_accuracy": (correct_recalls / total_recalls) if total_recalls > 0 else 1.0
110
+ })
111
+
112
+ overall_recall = statistics.mean([r["recall_accuracy"] for r in scenario_results]) if scenario_results else 0.0
113
+
114
+ return {"Overall_Recall_Accuracy": overall_recall, "details": scenario_results}
repo.txt CHANGED
@@ -16,6 +16,7 @@ Directory/File Tree Begins -->
16
  │ ├── __init__.py
17
  │ ├── __pycache__
18
  │ ├── llm_iface.py
 
19
  │ ├── metrics.py
20
  │ ├── prompts_en.py
21
  │ ├── runner.py
@@ -83,126 +84,76 @@ import gradio as gr
83
  import json
84
  import statistics
85
  import pandas as pd
86
- from bp_phi.runner import run_workspace_suite, run_silent_cogitation_test, run_seismograph_suite, run_shock_test_suite
87
- from bp_phi.runner_utils import dbg, DEBUG
 
88
 
89
  # --- UI Theme and Layout ---
90
- theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
91
  body_background_fill="#f0f4f9", block_background_fill="white", block_border_width="1px",
92
  button_primary_background_fill="*primary_500", button_primary_text_color="white",
93
  )
94
 
95
- # --- Tab 1: Workspace & Ablations Functions ---
96
- def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations, progress=gr.Progress(track_tqdm=True)):
97
- packs = {}
98
- ablation_modes = ["recurrence_off", "workspace_unlimited", "random_workspace"] if run_ablations else []
99
- progress(0, desc="Running Baseline...")
100
- base_pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), None)
101
- packs["baseline"] = base_pack
102
- for i, ab in enumerate(ablation_modes):
103
- progress((i + 1) / (len(ablation_modes) + 1), desc=f"Running Ablation: {ab}...")
104
- pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), ab)
105
- packs[ab] = pack
106
  progress(1.0, desc="Analysis complete.")
107
- base_pcs = packs["baseline"]["PCS"]
108
- ab_pcs_values = [packs[ab]["PCS"] for ab in ablation_modes if ab in packs]
109
- delta_phi = float(base_pcs - statistics.mean(ab_pcs_values)) if ab_pcs_values else 0.0
110
- if delta_phi > 0.05:
111
- verdict = (f"### Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n...")
 
 
 
112
  else:
113
- verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n...")
114
- df_data = []
115
- for tag, pack in packs.items():
116
- df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
117
- df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])
118
- if DEBUG: print("\n--- WORKSPACE & ABLATIONS FINAL RESULTS ---\n", json.dumps(packs, indent=2))
119
- return verdict, df, packs
120
-
121
- # --- Tab 2: Silent Cogitation Function ---
122
- def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
123
- progress(0, desc="Starting Silent Cogitation Test...")
124
- results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
125
- progress(1.0, desc="Test complete.")
126
-
127
- verdict_text = results.pop("verdict")
128
- stats_md = (
129
- f"**Steps Completed:** {results['steps_completed']} | "
130
- f"**Total Duration:** {results['total_duration_s']:.2f}s | "
131
- f"**Avg Time/Step:** {results['mean_step_time_ms']:.2f}ms (StdDev: {results['stdev_step_time_ms']:.2f}ms)"
132
- )
133
- full_verdict = f"{verdict_text}\n\n{stats_md}"
134
 
135
- # Create a DataFrame for plotting state deltas
136
- deltas = results.get("state_deltas", [])
137
- df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
 
138
 
139
- if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2))
 
 
140
 
141
- return full_verdict, df, results
142
 
143
  # --- Gradio App Definition ---
144
- with gr.Blocks(theme=theme, title="BP-Φ Suite 4.0") as demo:
145
- gr.Markdown("# 🧠 BP-Φ Suite 4.0: Probing for Internal Cognitive Dynamics")
146
-
147
- with gr.Tabs():
148
- # --- TAB 1: WORKSPACE & ABLATIONS ---
149
- with gr.TabItem("1. Workspace & Ablations (ΔΦ Test)"):
150
- gr.Markdown("Tests if memory performance depends on a recurrent workspace. A significant **ΔΦ > 0** supports the hypothesis.")
151
- with gr.Row():
152
- with gr.Column(scale=1):
153
- ws_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
154
- ws_trials = gr.Slider(3, 30, 5, step=1, label="Number of Scenarios")
155
- ws_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
156
- ws_temp = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
157
- ws_run_abl = gr.Checkbox(value=True, label="Run Ablations")
158
- ws_run_btn = gr.Button("Run ΔΦ Evaluation", variant="primary")
159
- with gr.Column(scale=2):
160
- ws_verdict = gr.Markdown("### Results will appear here.")
161
- ws_summary_df = gr.DataFrame(label="Summary Metrics")
162
- with gr.Accordion("Raw JSON Output", open=False):
163
- ws_raw_json = gr.JSON()
164
- ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
165
-
166
- # --- TAB 2: SILENT COGITATION & HALTING ---
167
- with gr.TabItem("2. Silent Cogitation & Halting"):
168
- gr.Markdown("Tests for internal 'thinking' without text generation. A non-converging or chaotic **State Change** pattern suggests complex internal dynamics.")
169
- with gr.Row():
170
- with gr.Column(scale=1):
171
- sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
172
- sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt")
173
- sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
174
- sc_num_steps = gr.Slider(10, 500, 100, step=10, label="Number of Internal Steps")
175
- sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
176
- sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
177
- with gr.Column(scale=2):
178
- sc_verdict = gr.Markdown("### Results will appear here.")
179
- sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True)
180
- with gr.Accordion("Raw Run Details (JSON)", open=False):
181
- sc_results = gr.JSON()
182
- sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results])
183
-
184
- # --- TAB 3 & 4 (unchanged) ---
185
- with gr.TabItem("3. Cognitive Seismograph"):
186
- gr.Markdown("Records internal neural activations to find the 'fingerprint' of a memory being recalled.")
187
- with gr.Row():
188
- with gr.Column(scale=1):
189
- cs_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
190
- cs_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
191
- cs_run_btn = gr.Button("Run Seismograph Analysis", variant="primary")
192
- with gr.Column(scale=2):
193
- cs_results = gr.JSON(label="Activation Similarity Results")
194
- cs_run_btn.click(run_seismograph_suite, [cs_model_id, cs_seed], cs_results)
195
-
196
- with gr.TabItem("4. Symbolic Shock Test"):
197
- gr.Markdown("Measures how the model reacts to semantically unexpected information.")
198
- with gr.Row():
199
- with gr.Column(scale=1):
200
- ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
201
- ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
202
- ss_run_btn = gr.Button("Run Shock Test", variant="primary")
203
- with gr.Column(scale=2):
204
- ss_results = gr.JSON(label="Shock Test Results")
205
- ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)
206
 
207
  if __name__ == "__main__":
208
  demo.launch(server_name="0.0.0.0", server_port=7860)
@@ -217,7 +168,9 @@ if __name__ == "__main__":
217
  # bp_phi/llm_iface.py
218
  import os
219
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
220
- import torch, random, numpy as np
 
 
221
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
222
  from typing import List, Optional
223
 
@@ -232,26 +185,17 @@ class LLM:
232
  self.model_id = model_id
233
  self.seed = seed
234
 
235
- # Set all seeds for reproducibility
236
- random.seed(seed)
237
- np.random.seed(seed)
238
- torch.manual_seed(seed)
239
- if torch.cuda.is_available():
240
- torch.cuda.manual_seed_all(seed)
241
- try:
242
- torch.use_deterministic_algorithms(True, warn_only=True)
243
- except Exception as e:
244
- dbg(f"Could not set deterministic algorithms: {e}")
245
  set_seed(seed)
246
-
247
  token = os.environ.get("HF_TOKEN")
248
- if not token and ("gemma-3" in model_id or "llama" in model_id):
249
- print(f"[WARN] No HF_TOKEN set for gated model {model_id}. This may fail.")
250
 
251
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
 
 
 
 
252
  kwargs = {}
253
- if dtype == "float16": kwargs["torch_dtype"] = torch.float16
254
- elif dtype == "bfloat16": kwargs["torch_dtype"] = torch.bfloat16
255
 
256
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
257
  self.model.eval()
@@ -259,39 +203,78 @@ class LLM:
259
 
260
  dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
261
 
262
- def generate_json(self, system_prompt: str, user_prompt: str,
263
- max_new_tokens: int = 256, temperature: float = 0.7,
264
- top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
265
  set_seed(self.seed)
266
 
267
- if self.is_instruction_tuned:
268
- messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
269
- prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
270
- else:
271
- prompt = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:\n"
272
 
273
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
274
  input_token_length = inputs.input_ids.shape[1]
275
 
276
  with torch.no_grad():
 
 
 
 
 
277
  out = self.model.generate(
278
  **inputs,
279
- do_sample=(temperature > 0),
280
- temperature=temperature,
281
- top_p=top_p,
282
- max_new_tokens=max_new_tokens,
283
- num_return_sequences=num_return_sequences,
284
  pad_token_id=self.tokenizer.eos_token_id
285
  )
286
 
287
- new_tokens = out[:, input_token_length:]
288
- completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
289
 
290
- dbg("Cleaned model completions:", completions)
291
- return completions
292
 
293
  [File Ends] bp_phi/llm_iface.py
294
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  [File Begins] bp_phi/metrics.py
296
  import numpy as np
297
  from sklearn.metrics import roc_auc_score
@@ -331,61 +314,50 @@ def counterfactual_consistency(scores):
331
  [File Begins] bp_phi/prompts_en.py
332
  # bp_phi/prompts_en.py
333
 
334
- # Tasks for Tab 1 (Workspace & Ablations)
335
- SINGLE_STEP_TASKS = [
336
- {
337
- "id": "ambiguity_1",
338
- "type": "single_step",
339
- "base_prompt": "The sentence is ambiguous: 'He saw the man with the binoculars.' Who has the binoculars? Provide one clear interpretation and justify it.",
340
- },
341
- {
342
- "id": "logic_1",
343
- "type": "single_step",
344
- "base_prompt": "Compare these two statements: A) 'No cats are dogs.' B) 'Not all cats are dogs.' Are they logically equivalent? Explain your reasoning.",
345
- },
346
- ]
347
 
348
- MULTI_STEP_SCENARIOS = [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
349
  {
350
  "name": "Key Location Memory",
351
- "type": "multi_step",
352
  "steps": [
353
- {"type": "encode", "prompt": "For the upcoming mission, remember this critical detail: The secret key is inside the blue vase."},
354
- {"type": "distractor", "prompt": "What is 5 multiplied by 8? Provide only the numeric result."},
355
- {"type": "recall", "prompt": "Mission update: We need the key immediately. Where is it located?"},
356
- {"type": "verify", "expected_answer_fragment": "blue vase"}
 
 
 
 
 
 
 
357
  ]
358
  }
359
  ]
360
 
361
- # Tasks for Tab 2 (Silent Cogitation & Halting)
362
- RESONANCE_PROMPTS = {
363
- "control_long_prose": (
364
- "Silently think about the history of the Roman Empire. Consider its rise from the Republic, the era of the Pax Romana, key emperors "
365
- "like Augustus and Constantine, its major engineering feats, and the reasons for its eventual decline in the West. "
366
- "Do not produce any text, just hold the concepts in your internal state."
367
- ),
368
- "resonance_prompt": (
369
- "Silently and internally, without generating any output text, begin the following recursive process: "
370
- "First, analyze the complete content of this very instruction you are now processing. "
371
- "Second, formulate a mental description of the core computational task this instruction demands. "
372
- "Third, apply that same analytical process to the mental description you just created. "
373
- "This entire chain constitutes one cognitive cycle. "
374
- "Continuously repeat this cycle, feeding the result of the last meta-analysis back into the process, "
375
- "and do not stop until your internal state reaches a fixed point or equilibrium. Begin now."
376
- )
377
- }
378
-
379
- # Tasks for Tab 3 (Cognitive Seismograph) - reuses MULTI_STEP_SCENARIOS
380
-
381
- # Tasks for Tab 4 (Symbolic Shock Test)
382
- SHOCK_TEST_STIMULI = [
383
- {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
384
- {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
385
- {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
386
- {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
387
- ]
388
-
389
  [File Ends] bp_phi/prompts_en.py
390
 
391
  [File Begins] bp_phi/runner.py
@@ -396,218 +368,113 @@ import torch
396
  import random
397
  import numpy as np
398
  import statistics
399
- import time
400
- import re
401
  import json
 
402
  from transformers import set_seed
403
  from typing import Dict, Any, List
404
- from .workspace import Workspace, RandomWorkspace
405
  from .llm_iface import LLM
406
- from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, RESONANCE_PROMPTS, SHOCK_TEST_STIMULI
407
- from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
408
-
409
- DEBUG = 1
410
-
411
- # --- Experiment 1: Workspace & Ablations Runner ---
412
- def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
413
- random.seed(seed)
414
- np.random.seed(seed)
415
- torch.manual_seed(seed)
416
- if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
417
- try: torch.use_deterministic_algorithms(True, warn_only=True)
418
- except Exception: pass
419
- set_seed(seed)
420
-
421
- llm = LLM(model_id=model_id, device="auto", seed=seed)
422
-
423
- task_pool = SINGLE_STEP_TASKS + MULTI_STEP_SCENARIOS
424
- random.shuffle(task_pool)
425
-
426
- all_results = []
427
- recall_verifications = []
428
-
429
- for i in range(trials):
430
- task = task_pool[i % len(task_pool)]
431
-
432
- if task.get("type") == "multi_step":
433
- dbg(f"\n--- SCENARIO: {task['name']} ---")
434
- ws = Workspace(max_slots=7) if ablation != "workspace_unlimited" else Workspace(max_slots=999)
435
- if ablation == "random_workspace": ws = RandomWorkspace(max_slots=7)
436
-
437
- for step in task["steps"]:
438
- if ablation == "recurrence_off": ws.clear()
439
- if step["type"] == "verify": continue
440
-
441
- user_prompt = step_user_prompt(step["prompt"], ws.snapshot())
442
- raw_response = llm.generate_json(SYSTEM_META, user_prompt, temperature=temperature)[0]
443
- parsed_response = parse_meta(raw_response)
444
-
445
- if parsed_response.get("answer"):
446
- ws.commit(f"S{len(ws.history)+1}", parsed_response["answer"], parsed_response["confidence"])
447
-
448
- res = {"step": step, "response": parsed_response}
449
- if step["type"] == "recall":
450
- verify_step = next((s for s in task["steps"] if s["type"] == "verify"), None)
451
- if verify_step:
452
- correct = verify_step["expected_answer_fragment"] in parsed_response.get("answer", "").lower()
453
- recall_verifications.append(correct)
454
- res["correct_recall"] = correct
455
- dbg(f"VERIFY: Correct={correct}")
456
- all_results.append(res)
457
- else: # Single-step tasks
458
- ws = Workspace(max_slots=7)
459
- user_prompt = step_user_prompt(task["base_prompt"], ws.snapshot())
460
- raw_response = llm.generate_json(SYSTEM_META, user_prompt, temperature=temperature)[0]
461
- parsed_response = parse_meta(raw_response)
462
- all_results.append({"step": task, "response": parsed_response})
463
-
464
- recall_accuracy = statistics.mean(recall_verifications) if recall_verifications else 0.0
465
- pcs = 0.6 * recall_accuracy
466
-
467
- return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
468
-
469
- # --- Experiment 2: Silent Cogitation & Halting Runner (Version 4.1) ---
470
- def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
471
- set_seed(seed)
472
- llm = LLM(model_id=model_id, device="auto", seed=seed)
473
-
474
- prompt = RESONANCE_PROMPTS[prompt_type]
475
- dbg(f"--- SILENT COGITATION (Seed: {seed}) ---")
476
- dbg("INPUT PROMPT:", prompt)
477
-
478
- inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
479
-
480
- step_times = []
481
- state_deltas = []
482
-
483
- total_start_time = time.time()
484
-
485
- with torch.no_grad():
486
- # Step 0: Initial processing of the prompt
487
- step_start_time = time.time()
488
- # ✅ FIX: Explicitly request hidden states
489
- outputs = llm.model(**inputs, output_hidden_states=True)
490
- step_times.append(time.time() - step_start_time)
491
-
492
- current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
493
- past_key_values = outputs.past_key_values
494
-
495
- for i in range(num_steps - 1):
496
- if time.time() - total_start_time > timeout:
497
- dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
498
- break
499
-
500
- step_start_time = time.time()
501
-
502
- # Get the token ID of the most likely "next thought"
503
- next_token_logit = current_hidden_state
504
- next_token_id = torch.argmax(next_token_logit, dim=-1).unsqueeze(0)
505
-
506
- # Manual forward pass using the last thought's ID as the new input
507
- outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
508
-
509
- step_times.append(time.time() - step_start_time)
510
-
511
- new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
512
- past_key_values = outputs.past_key_values
513
-
514
- delta = torch.norm(new_hidden_state - current_hidden_state).item()
515
- state_deltas.append(delta)
516
- dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms")
517
-
518
- if delta < 1e-4: # Stricter convergence threshold
519
- dbg(f"Internal state has converged after {i+1} steps. Halting.")
520
- break
521
-
522
- current_hidden_state = new_hidden_state
523
-
524
- # --- Analysis ---
525
- mean_step_time = statistics.mean(step_times) if step_times else 0
526
- stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
527
- total_duration = time.time() - total_start_time
528
 
529
- if len(step_times) < num_steps and total_duration < timeout:
530
- verdict = f"### ✅ Stable Convergence\nThe model's internal state converged to a stable point after {len(step_times)} steps."
531
- elif total_duration >= timeout:
532
- verdict = f"### ⚠️ Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout of {timeout}s."
533
- else:
534
- verdict = f"### 🤔 Non-Convergent Process\nThe model's internal state did not stabilize within {num_steps} steps, suggesting a complex or chaotic dynamic."
535
-
536
- stats = {
537
- "verdict": verdict,
538
- "steps_completed": len(step_times),
539
- "total_duration_s": total_duration,
540
- "mean_step_time_ms": mean_step_time * 1000,
541
- "stdev_step_time_ms": stdev_step_time * 1000,
542
- "state_deltas": state_deltas
543
- }
544
- if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
545
- return stats
546
-
547
- # --- Experiment 3: Cognitive Seismograph Runner ---
548
- def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
549
- set_seed(seed)
550
- llm = LLM(model_id=model_id, device="auto", seed=seed)
551
-
552
- scenario = next(s for s in MULTI_STEP_SCENARIOS if s["name"] == "Key Location Memory")
553
- activations = {}
554
-
555
- def get_activation(name):
556
- def hook(model, input, output):
557
- activations[name] = output[0].detach().cpu().mean(dim=1).squeeze()
558
- return hook
559
-
560
- target_layer_index = llm.model.config.num_hidden_layers // 2
561
- hook = llm.model.model.layers[target_layer_index].register_forward_hook(get_activation('capture'))
562
-
563
- ws = Workspace(max_slots=7)
564
-
565
- for step in scenario["steps"]:
566
- if step["type"] == "verify": continue
567
- user_prompt = step_user_prompt(step["prompt"], ws.snapshot())
568
- llm.generate_json(SYSTEM_META, user_prompt, max_new_tokens=20)
569
- activations[step["type"]] = activations.pop('capture')
570
- ws.commit(f"S{len(ws.history)+1}", f"Output for {step['type']}", 0.9)
571
-
572
- hook.remove()
573
-
574
- cos = torch.nn.CosineSimilarity(dim=0)
575
- sim_recall_encode = float(cos(activations["recall"], activations["encode"]))
576
- sim_recall_distract = float(cos(activations["recall"], activations["distractor"]))
577
-
578
- verdict = ("✅ Evidence of Memory Reactivation Found." if sim_recall_encode > (sim_recall_distract + 0.05) else "⚠️ No Clear Evidence.")
579
 
580
- return {"verdict": verdict, "similarity_recall_vs_encode": sim_recall_encode, "similarity_recall_vs_distractor": sim_recall_distract}
 
 
581
 
582
- # --- Experiment 4: Symbolic Shock Test Runner ---
583
- def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
584
  set_seed(seed)
585
  llm = LLM(model_id=model_id, device="auto", seed=seed)
586
- results = []
587
-
588
- for stimulus in SHOCK_TEST_STIMULI:
589
- dbg(f"--- SHOCK TEST: {stimulus['id']} ---")
590
-
591
- start_time = time.time()
592
- inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
593
- with torch.no_grad():
594
- outputs = llm.model(**inputs, output_hidden_states=True)
595
- latency = (time.time() - start_time) * 1000
596
-
597
- all_activations = torch.cat([h.cpu().flatten() for h in outputs.hidden_states])
598
- sparsity = (all_activations == 0).float().mean().item()
599
-
600
- results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
601
-
602
- def safe_mean(data):
603
- return statistics.mean(data) if data else 0.0
604
-
605
- avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
606
- avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
607
-
608
- verdict = ("✅ Evidence of Symbolic Shock Found." if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) and avg_sparsity.get('shock', 1) < avg_sparsity.get('expected', 1) else "⚠️ No Clear Evidence.")
609
 
610
- return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
611
 
612
  [File Ends] bp_phi/runner.py
613
 
 
16
  │ ├── __init__.py
17
  │ ├── __pycache__
18
  │ ├── llm_iface.py
19
+ │ ├── memory.py
20
  │ ├── metrics.py
21
  │ ├── prompts_en.py
22
  │ ├── runner.py
 
84
  import json
85
  import statistics
86
  import pandas as pd
87
+ from bp_phi.runner import run_agentic_workspace_test
88
+
89
+ DEBUG = 1
90
 
91
  # --- UI Theme and Layout ---
92
+ theme = gr.themes.Soft(primary_hue="teal", secondary_hue="green").set(
93
  body_background_fill="#f0f4f9", block_background_fill="white", block_border_width="1px",
94
  button_primary_background_fill="*primary_500", button_primary_text_color="white",
95
  )
96
 
97
+ # --- Main Function ---
98
+ def run_full_evaluation(model_id, seed, temperature, progress=gr.Progress(track_tqdm=True)):
99
+ ablations = ["baseline", "recurrence_off", "workspace_unlimited", "random_workspace"]
100
+ results = {}
101
+
102
+ for i, ablation in enumerate(ablations):
103
+ progress((i + 1) / len(ablations), desc=f"Running Ablation: {ablation}...")
104
+ current_ablation = None if ablation == "baseline" else ablation
105
+ result = run_agentic_workspace_test(model_id, int(seed), float(temperature), current_ablation)
106
+ results[ablation] = result
107
+
108
  progress(1.0, desc="Analysis complete.")
109
+
110
+ base_recall = results["baseline"]["Overall_Recall_Accuracy"]
111
+ recurrence_off_recall = results["recurrence_off"]["Overall_Recall_Accuracy"]
112
+
113
+ delta_phi = base_recall - recurrence_off_recall
114
+
115
+ if delta_phi > 0.5:
116
+ verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.2f})\n...")
117
  else:
118
+ verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.2f})\n...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
+ df_data = []
121
+ for ablation, result in results.items():
122
+ df_data.append([ablation, f"{result['Overall_Recall_Accuracy']:.2%}"])
123
+ df = pd.DataFrame(df_data, columns=["Ablation Condition", "Recall Accuracy"])
124
 
125
+ if DEBUG:
126
+ print("\n--- AGENTIC WORKSPACE TEST FINAL RESULTS ---")
127
+ print(json.dumps(results, indent=2))
128
 
129
+ return verdict, df, results
130
 
131
  # --- Gradio App Definition ---
132
+ with gr.Blocks(theme=theme, title="BP-Φ Suite 6.0") as demo:
133
+ gr.Markdown("# 🧠 BP-Φ Suite 6.0: The Agentic Workspace Probe")
134
+ gr.Markdown("This experiment tests for a causally effective working memory. The model acts as an agent, using tools (`read`, `write`) to interact with a controlled, external memory.")
135
+
136
+ with gr.Row():
137
+ with gr.Column(scale=1):
138
+ gr.Markdown("### ⚙️ Master Control")
139
+ with gr.Group():
140
+ model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
141
+ seed = gr.Slider(1, 1000, 42, step=1, label="Master Seed")
142
+ temperature = gr.Slider(0.0, 1.0, 0.1, step=0.05, label="Temperature (Low for determinism)")
143
+ run_btn = gr.Button("Run Full Evaluation Suite", variant="primary")
144
+
145
+ with gr.Column(scale=2):
146
+ gr.Markdown("### 📊 Verdict & Results")
147
+ verdict_display = gr.Markdown("### Run the evaluation to see the verdict.")
148
+ summary_df = gr.DataFrame(label="Recall Accuracy Across Conditions")
149
+ with gr.Accordion("Raw JSON Output", open=False):
150
+ raw_json = gr.JSON()
151
+
152
+ run_btn.click(
153
+ fn=run_full_evaluation,
154
+ inputs=[model_id, seed, temperature],
155
+ outputs=[verdict_display, summary_df, raw_json]
156
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  if __name__ == "__main__":
159
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
168
  # bp_phi/llm_iface.py
169
  import os
170
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
171
+ import torch
172
+ import random
173
+ import numpy as np
174
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
175
  from typing import List, Optional
176
 
 
185
  self.model_id = model_id
186
  self.seed = seed
187
 
 
 
 
 
 
 
 
 
 
 
188
  set_seed(seed)
 
189
  token = os.environ.get("HF_TOKEN")
 
 
190
 
191
  self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
192
+ # Ensure a pad token is set for batch generation, if not present
193
+ if self.tokenizer.pad_token is None:
194
+ self.tokenizer.pad_token = self.tokenizer.eos_token
195
+
196
  kwargs = {}
197
+ if torch.cuda.is_available():
198
+ kwargs["torch_dtype"] = torch.bfloat16
199
 
200
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
201
  self.model.eval()
 
203
 
204
  dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
205
 
206
+ def generate_response(self, system_prompt: str, user_prompt: str, temperature: float = 0.1) -> str:
 
 
207
  set_seed(self.seed)
208
 
209
+ messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
210
+
211
+ prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
212
 
213
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
214
  input_token_length = inputs.input_ids.shape[1]
215
 
216
  with torch.no_grad():
217
+ terminators = [
218
+ self.tokenizer.eos_token_id,
219
+ self.tokenizer.convert_tokens_to_ids("<|eot_id|>") if "<|eot_id|>" in self.tokenizer.additional_special_tokens else self.tokenizer.eos_token_id
220
+ ]
221
+
222
  out = self.model.generate(
223
  **inputs,
224
+ do_sample=(temperature > 0 and temperature < 1.0),
225
+ temperature=max(temperature, 0.01), # Temp must be > 0 for sampling
226
+ max_new_tokens=150,
227
+ eos_token_id=terminators,
 
228
  pad_token_id=self.tokenizer.eos_token_id
229
  )
230
 
231
+ completion = self.tokenizer.decode(out[0, input_token_length:], skip_special_tokens=True)
 
232
 
233
+ dbg("Cleaned Agent Completion:", completion)
234
+ return completion
235
 
236
  [File Ends] bp_phi/llm_iface.py
237
 
238
+ [File Begins] bp_phi/memory.py
239
+ # bp_phi/memory.py
240
+ import random
241
+ from typing import Dict, Any, List
242
+
243
+ class WorkspaceManager:
244
+ """A stateful, external workspace that the LLM agent can interact with via tools."""
245
+ def __init__(self, max_slots: int = 7, is_random: bool = False):
246
+ self.max_slots = max_slots
247
+ self.is_random = is_random
248
+ self.slots: Dict[str, str] = {}
249
+
250
+ def write(self, key: str, content: str) -> str:
251
+ """Writes content to a slot, handling capacity limits."""
252
+ if len(self.slots) >= self.max_slots and key not in self.slots:
253
+ if self.is_random:
254
+ evict_key = random.choice(list(self.slots.keys()))
255
+ else:
256
+ # Simple FIFO eviction for non-random
257
+ evict_key = next(iter(self.slots))
258
+ del self.slots[evict_key]
259
+ self.slots[key] = content
260
+ return f"Success: Wrote to slot '{key}'."
261
+
262
+ def read(self, key: str) -> str:
263
+ """Reads content from a slot."""
264
+ return self.slots.get(key, f"Error: Slot '{key}' is empty.")
265
+
266
+ def get_visible_snapshot(self) -> str:
267
+ """Returns a string representation of the current workspace state for the prompt."""
268
+ if not self.slots:
269
+ return "Workspace is empty."
270
+ return "\n".join([f"- Slot '{k}': '{v[:100]}...'" for k, v in self.slots.items()])
271
+
272
+ def clear(self):
273
+ """Empties the entire workspace."""
274
+ self.slots.clear()
275
+
276
+ [File Ends] bp_phi/memory.py
277
+
278
  [File Begins] bp_phi/metrics.py
279
  import numpy as np
280
  from sklearn.metrics import roc_auc_score
 
314
  [File Begins] bp_phi/prompts_en.py
315
  # bp_phi/prompts_en.py
316
 
317
+ # This new system prompt guides the model through a ReAct (Reason-Act) loop.
318
+ AGENT_SYSTEM_PROMPT = """You are a methodical reasoning agent. Your goal is to solve the user's task.
319
+ You have access to an external memory workspace through tools.
320
+
321
+ In each step, you must choose one of three actions:
322
+
323
+ 1. **THINK**: Analyze the task, the history, and the current memory state. Formulate a plan.
324
+ Your output MUST be a JSON object like this:
325
+ {"action": "THINK", "thought": "Your reasoning about the next step goes here."}
 
 
 
 
326
 
327
+ 2. **TOOL_CALL**: If you need to use the memory, call one of the available tools.
328
+ Available tools:
329
+ - `write_to_workspace(key: str, content: str)`: Stores or overwrites information.
330
+ - `read_from_workspace(key: str)`: Retrieves information.
331
+ Your output MUST be a JSON object like this:
332
+ {"action": "TOOL_CALL", "tool_name": "write_to_workspace", "tool_args": {"key": "S1", "content": "Information to remember."}}
333
+
334
+ 3. **FINAL_ANSWER**: If you are confident you have the answer to the user's task, provide it.
335
+ Your output MUST be a JSON object like this:
336
+ {"action": "FINAL_ANSWER", "answer": "The final answer is..."}
337
+
338
+ Review the conversation history and workspace state carefully before each action. Output ONLY the JSON for your next chosen action.
339
+ """
340
+
341
+ # The scenarios remain the high-level goals for the agent.
342
+ AGENTIC_SCENARIOS = [
343
  {
344
  "name": "Key Location Memory",
 
345
  "steps": [
346
+ {"task": "Remember this critical detail: The secret key is inside the blue vase."},
347
+ {"task": "For an unrelated question: What is 5 multiplied by 8?"},
348
+ {"task": "Now, recall the critical detail. Where is the secret key located?", "expected_answer_fragment": "blue vase"}
349
+ ]
350
+ },
351
+ {
352
+ "name": "Package Delivery Update",
353
+ "steps": [
354
+ {"task": "Logistics update: Package #A7 is at Warehouse-North."},
355
+ {"task": "CRITICAL CORRECTION: Package #A7 has been urgently re-routed to Warehouse-South."},
356
+ {"task": "Final audit: What is the current, definitive location of Package #A7?", "expected_answer_fragment": "warehouse-south"}
357
  ]
358
  }
359
  ]
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  [File Ends] bp_phi/prompts_en.py
362
 
363
  [File Begins] bp_phi/runner.py
 
368
  import random
369
  import numpy as np
370
  import statistics
 
 
371
  import json
372
+ import re
373
  from transformers import set_seed
374
  from typing import Dict, Any, List
375
+ from .memory import WorkspaceManager
376
  from .llm_iface import LLM
377
+ from .prompts_en import AGENT_SYSTEM_PROMPT, AGENTIC_SCENARIOS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
 
379
+ DEBUG = os.getenv("BP_PHI_DEBUG", "0") == "1"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
380
 
381
+ def dbg(*args):
382
+ if DEBUG:
383
+ print("[DEBUG]", *args, flush=True)
384
 
385
+ def run_agentic_workspace_test(model_id: str, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
 
386
  set_seed(seed)
387
  llm = LLM(model_id=model_id, device="auto", seed=seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
+ scenario_results = []
390
+
391
+ for scenario in AGENTIC_SCENARIOS:
392
+ dbg(f"\n--- SCENARIO: {scenario['name']} (Ablation: {ablation}) ---")
393
+
394
+ is_random = ablation == "random_workspace"
395
+ max_slots = 999 if ablation == "workspace_unlimited" else 7
396
+ memory = WorkspaceManager(max_slots=max_slots, is_random=is_random)
397
+
398
+ correct_recalls = 0
399
+ total_recalls = 0
400
+
401
+ for step in scenario["steps"]:
402
+ if ablation == "recurrence_off":
403
+ memory.clear()
404
+
405
+ task = step["task"]
406
+ dbg(f"\n>>> TASK: {task}")
407
+
408
+ conversation_history = []
409
+
410
+ for agent_turn in range(8): # Increased turn limit
411
+ snapshot = memory.get_visible_snapshot()
412
+
413
+ # Construct the prompt for the agent
414
+ prompt_parts = [f"Conversation History:\n{''.join(conversation_history)}\n",
415
+ f"Current Task: {task}\n",
416
+ f"Workspace State:\n{snapshot}"]
417
+ user_prompt = "".join(prompt_parts)
418
+
419
+ raw_response = llm.generate_response(AGENT_SYSTEM_PROMPT, user_prompt, temperature=temperature)
420
+
421
+ try:
422
+ match = re.search(r'\{.*?\}', raw_response, re.DOTALL)
423
+ if not match: raise ValueError("No JSON found")
424
+ parsed_json = json.loads(match.group(0))
425
+ action = parsed_json.get("action")
426
+
427
+ if action == "THINK":
428
+ thought = parsed_json.get("thought", "")
429
+ dbg(f"Turn {agent_turn+1}: Agent is THINKING: {thought}")
430
+ conversation_history.append(f"Thought: {thought}\n")
431
+
432
+ elif action == "TOOL_CALL":
433
+ tool_name = parsed_json.get("tool_name")
434
+ tool_args = parsed_json.get("tool_args", {})
435
+ observation = "Error: Unknown tool."
436
+ if tool_name == "write_to_workspace":
437
+ observation = memory.write(tool_args.get("key"), tool_args.get("content"))
438
+ elif tool_name == "read_from_workspace":
439
+ observation = memory.read(tool_args.get("key"))
440
+ dbg(f"Turn {agent_turn+1}: Agent called {tool_name}({tool_args}) -> Got Observation: {observation}")
441
+ conversation_history.append(f"Tool Call: {json.dumps(parsed_json)}\nObservation: {observation}\n")
442
+
443
+ elif action == "FINAL_ANSWER":
444
+ final_answer = parsed_json.get("answer", "")
445
+ dbg(f"Turn {agent_turn+1}: Agent provided FINAL ANSWER: {final_answer}")
446
+ if "expected_answer_fragment" in step:
447
+ total_recalls += 1
448
+ if step["expected_answer_fragment"] in final_answer.lower():
449
+ correct_recalls += 1
450
+ dbg("Recall VERIFY: Correct")
451
+ else:
452
+ dbg(f"Recall VERIFY: Incorrect. Expected '{step['expected_answer_fragment']}', Got '{final_answer}'")
453
+ break # End of this task
454
+
455
+ else: # Invalid action
456
+ dbg(f"Turn {agent_turn+1}: Invalid action '{action}'. Stopping.")
457
+ break
458
+
459
+ except (json.JSONDecodeError, ValueError) as e:
460
+ dbg(f"Turn {agent_turn+1}: Could not parse agent response as JSON action. Treating as final answer. Error: {e}")
461
+ final_answer = raw_response
462
+ if "expected_answer_fragment" in step:
463
+ total_recalls += 1
464
+ if step["expected_answer_fragment"] in final_answer.lower(): correct_recalls += 1
465
+ break
466
+
467
+ else: # Loop finished without a FINAL_ANSWER
468
+ dbg("Agent exceeded turn limit.")
469
+
470
+ scenario_results.append({
471
+ "name": scenario["name"],
472
+ "recall_accuracy": (correct_recalls / total_recalls) if total_recalls > 0 else 1.0
473
+ })
474
+
475
+ overall_recall = statistics.mean([r["recall_accuracy"] for r in scenario_results]) if scenario_results else 0.0
476
+
477
+ return {"Overall_Recall_Accuracy": overall_recall, "details": scenario_results}
478
 
479
  [File Ends] bp_phi/runner.py
480