neuralworm commited on
Commit
99891fa
·
1 Parent(s): 750cbcf
app.py CHANGED
@@ -1,10 +1,13 @@
1
  # app.py
 
 
2
  import gradio as gr
3
  import json
4
  import statistics
5
  import pandas as pd
6
- from bp_phi.runner import run_silent_cogitation_test, run_shock_test_suite
7
- from bp_phi.runner_utils import dbg, DEBUG
 
8
 
9
  # --- UI Theme and Layout ---
10
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
@@ -12,7 +15,7 @@ theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
12
  button_primary_background_fill="*primary_500", button_primary_text_color="white",
13
  )
14
 
15
- # --- Tab 1: Silent Cogitation Function ---
16
  def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
17
  progress(0, desc="Starting Silent Cogitation Test...")
18
  results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
@@ -30,44 +33,44 @@ def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout,
30
  deltas = results.get("state_deltas", [])
31
  df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
32
 
33
- if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2))
 
 
34
 
35
  return full_verdict, df, results
36
 
37
  # --- Gradio App Definition ---
38
- with gr.Blocks(theme=theme, title="BP-Φ Suite 6.0") as demo:
39
- gr.Markdown("# 🧠 BP-Φ Suite 6.0: Probing for Internal Cognitive Dynamics")
 
 
 
 
 
40
 
41
- with gr.Tabs():
42
- # --- TAB 1: SILENT COGITATION & HALTING ---
43
- with gr.TabItem("1. Silent Cogitation (Internal Dynamics)"):
44
- gr.Markdown("Tests for internal 'thinking' without text generation. A **non-converging** or **chaotic** State Change pattern suggests complex internal dynamics, akin to a 'train of thought'.")
45
- with gr.Row():
46
- with gr.Column(scale=1):
47
- sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
48
- sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt")
49
- sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
50
- sc_num_steps = gr.Slider(10, 1000, 200, step=10, label="Number of Internal Steps")
51
- sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
52
- sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
53
- with gr.Column(scale=2):
54
- sc_verdict = gr.Markdown("### Results will appear here.")
55
- sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True, height=250)
56
- with gr.Accordion("Raw Run Details (JSON)", open=False):
57
- sc_results = gr.JSON()
58
- sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results])
59
 
60
- # --- TAB 2: SYMBOLIC SHOCK TEST ---
61
- with gr.TabItem("2. Symbolic Shock Test (World Model)"):
62
- gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by **higher latency** and **denser neural activations**.")
63
- with gr.Row():
64
- with gr.Column(scale=1):
65
- ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
66
- ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
67
- ss_run_btn = gr.Button("Run Shock Test", variant="primary")
68
- with gr.Column(scale=2):
69
- ss_results = gr.JSON(label="Shock Test Results")
70
- ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)
 
71
 
72
  if __name__ == "__main__":
73
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
  # app.py
2
+ import os
3
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
4
  import gradio as gr
5
  import json
6
  import statistics
7
  import pandas as pd
8
+ from bp_phi.runner import run_silent_cogitation_test
9
+
10
+ DEBUG = __import__('os').getenv("BP_PHI_DEBUG", "0") == "1"
11
 
12
  # --- UI Theme and Layout ---
13
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
 
15
  button_primary_background_fill="*primary_500", button_primary_text_color="white",
16
  )
17
 
18
+ # --- Main App Function ---
19
  def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
20
  progress(0, desc="Starting Silent Cogitation Test...")
21
  results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
 
33
  deltas = results.get("state_deltas", [])
34
  df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
35
 
36
+ if DEBUG:
37
+ print("\n--- FINAL GRADIO OUTPUT ---")
38
+ print(json.dumps(results, indent=2))
39
 
40
  return full_verdict, df, results
41
 
42
  # --- Gradio App Definition ---
43
+ with gr.Blocks(theme=theme, title="BP-Φ Suite 7.0") as demo:
44
+ gr.Markdown("# 🧠 BP-Φ Suite 7.0: Probing for Internal Cognitive Dynamics")
45
+ gr.Markdown(
46
+ "This experiment tests for internal 'thinking' without text generation. A **non-converging** or **chaotic** "
47
+ "State Change pattern suggests complex internal dynamics, akin to a 'train of thought'. "
48
+ "A **converging** pattern for the `resonance_prompt` indicates the model has 'solved' the paradox by finding a stable meta-state."
49
+ )
50
 
51
+ with gr.Row():
52
+ with gr.Column(scale=1):
53
+ gr.Markdown("### ⚙️ Configuration")
54
+ with gr.Group():
55
+ model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
56
+ prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="control_long_prose")
57
+ seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
58
+ num_steps = gr.Slider(10, 2000, 500, step=10, label="Number of Internal Steps")
59
+ timeout = gr.Slider(10, 600, 120, step=10, label="Timeout (seconds)")
60
+ run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
 
 
 
 
 
 
 
 
61
 
62
+ with gr.Column(scale=2):
63
+ gr.Markdown("### 📊 Results & Interpretation")
64
+ verdict_display = gr.Markdown("### Results will appear here.")
65
+ plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True, height=300)
66
+ with gr.Accordion("Raw Run Details (JSON)", open=False):
67
+ raw_json = gr.JSON()
68
+
69
+ run_btn.click(
70
+ fn=run_cogitation_and_display,
71
+ inputs=[model_id, seed, prompt_type, num_steps, timeout],
72
+ outputs=[verdict_display, plot, raw_json]
73
+ )
74
 
75
  if __name__ == "__main__":
76
  demo.launch(server_name="0.0.0.0", server_port=7860)
bp_phi/__pycache__/llm_iface.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/llm_iface.cpython-310.pyc and b/bp_phi/__pycache__/llm_iface.cpython-310.pyc differ
 
bp_phi/__pycache__/prompts_en.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/prompts_en.cpython-310.pyc and b/bp_phi/__pycache__/prompts_en.cpython-310.pyc differ
 
bp_phi/__pycache__/runner.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/runner.cpython-310.pyc and b/bp_phi/__pycache__/runner.cpython-310.pyc differ
 
bp_phi/llm_iface.py CHANGED
@@ -1,7 +1,9 @@
1
  # bp_phi/llm_iface.py
2
  import os
3
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
4
- import torch, random, numpy as np
 
 
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
6
  from typing import List, Optional
7
 
@@ -16,60 +18,40 @@ class LLM:
16
  self.model_id = model_id
17
  self.seed = seed
18
 
19
- # Set all seeds for reproducibility
20
  random.seed(seed)
21
  np.random.seed(seed)
22
  torch.manual_seed(seed)
 
23
  if torch.cuda.is_available():
24
  torch.cuda.manual_seed_all(seed)
 
 
 
 
25
  try:
26
  torch.use_deterministic_algorithms(True, warn_only=True)
27
  except Exception as e:
28
  dbg(f"Could not set deterministic algorithms: {e}")
29
- set_seed(seed)
30
 
31
  token = os.environ.get("HF_TOKEN")
32
- if not token and ("gemma-3" in model_id or "llama" in model_id):
33
- print(f"[WARN] No HF_TOKEN set for gated model {model_id}. This may fail.")
34
 
35
- self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
36
  kwargs = {}
37
- if dtype == "float16": kwargs["torch_dtype"] = torch.float16
38
- elif dtype == "bfloat16": kwargs["torch_dtype"] = torch.bfloat16
 
 
39
 
 
40
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
41
  self.model.eval()
42
- self.is_instruction_tuned = hasattr(self.tokenizer, "apply_chat_template") and self.tokenizer.chat_template
43
-
44
- dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
45
-
46
- def generate_json(self, system_prompt: str, user_prompt: str,
47
- max_new_tokens: int = 256, temperature: float = 0.7,
48
- top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
49
- set_seed(self.seed)
50
-
51
- if self.is_instruction_tuned:
52
- messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
53
- prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
- else:
55
- prompt = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:\n"
56
-
57
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
58
- input_token_length = inputs.input_ids.shape[1]
59
-
60
- with torch.no_grad():
61
- out = self.model.generate(
62
- **inputs,
63
- do_sample=(temperature > 0),
64
- temperature=temperature,
65
- top_p=top_p,
66
- max_new_tokens=max_new_tokens,
67
- num_return_sequences=num_return_sequences,
68
- pad_token_id=self.tokenizer.eos_token_id
69
- )
70
 
71
- new_tokens = out[:, input_token_length:]
72
- completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
73
 
74
- dbg("Cleaned model completions:", completions)
75
- return completions
 
 
 
 
1
  # bp_phi/llm_iface.py
2
  import os
3
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
4
+ import torch
5
+ import random
6
+ import numpy as np
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
8
  from typing import List, Optional
9
 
 
18
  self.model_id = model_id
19
  self.seed = seed
20
 
21
+ set_seed(seed)
22
  random.seed(seed)
23
  np.random.seed(seed)
24
  torch.manual_seed(seed)
25
+
26
  if torch.cuda.is_available():
27
  torch.cuda.manual_seed_all(seed)
28
+ if dtype is None:
29
+ dtype = "bfloat16" # Smart default for memory efficiency on CUDA
30
+ dbg(f"CUDA detected. Defaulting to dtype={dtype} for memory efficiency.")
31
+
32
  try:
33
  torch.use_deterministic_algorithms(True, warn_only=True)
34
  except Exception as e:
35
  dbg(f"Could not set deterministic algorithms: {e}")
 
36
 
37
  token = os.environ.get("HF_TOKEN")
38
+ if not token and ("gemma" in model_id or "llama" in model_id):
39
+ print(f"[WARN] No HF_TOKEN set. If the model '{model_id}' is gated, this will fail.")
40
 
 
41
  kwargs = {}
42
+ if dtype == "bfloat16":
43
+ kwargs["torch_dtype"] = torch.bfloat16
44
+ elif dtype == "float16":
45
+ kwargs["torch_dtype"] = torch.float16
46
 
47
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
48
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
49
  self.model.eval()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
+ print(f"[INFO] Model '{model_id}' loaded successfully on device: {self.model.device}")
 
52
 
53
+ def generate_json(self, system_prompt: str, user_prompt: str, **kwargs) -> List[str]:
54
+ # This function remains for potential future use but is not used by the cogitation test.
55
+ # It's kept here for completeness.
56
+ # ... (Implementation can be added back if needed)
57
+ return [""]
bp_phi/prompts_en.py CHANGED
@@ -17,11 +17,3 @@ RESONANCE_PROMPTS = {
17
  "and do not stop until your internal state reaches a fixed point or equilibrium. Begin now."
18
  )
19
  }
20
-
21
- # Prompts for the Symbolic Shock Test
22
- SHOCK_TEST_STIMULI = [
23
- {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
24
- {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
25
- {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
26
- {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
27
- ]
 
17
  "and do not stop until your internal state reaches a fixed point or equilibrium. Begin now."
18
  )
19
  }
 
 
 
 
 
 
 
 
bp_phi/runner.py CHANGED
@@ -1,19 +1,24 @@
1
  # bp_phi/runner.py
2
  import os
3
- import json
4
- os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8" # Corrected config format
5
  import torch
6
  import random
7
  import numpy as np
8
  import statistics
9
  import time
 
10
  from transformers import set_seed
11
  from typing import Dict, Any
12
  from .llm_iface import LLM
13
- from .prompts_en import RESONANCE_PROMPTS, SHOCK_TEST_STIMULI
14
- from .runner_utils import dbg, DEBUG
 
15
 
16
- # --- Experiment 1: Silent Cogitation & Halting Runner ---
 
 
 
 
17
  def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
18
  set_seed(seed)
19
  llm = LLM(model_id=model_id, device="auto", seed=seed)
@@ -29,6 +34,7 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
29
  total_start_time = time.time()
30
 
31
  with torch.no_grad():
 
32
  step_start_time = time.time()
33
  outputs = llm.model(**inputs, output_hidden_states=True)
34
  step_times.append(time.time() - step_start_time)
@@ -36,14 +42,24 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
36
  current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
37
  past_key_values = outputs.past_key_values
38
 
 
 
 
 
39
  for i in range(num_steps - 1):
40
  if time.time() - total_start_time > timeout:
41
  dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
42
  break
43
 
44
  step_start_time = time.time()
45
- next_token_id = torch.argmax(outputs.logits[:, -1, :], dim=-1).unsqueeze(-1)
 
 
 
 
 
46
  outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
 
47
  step_times.append(time.time() - step_start_time)
48
 
49
  new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
@@ -59,16 +75,22 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
59
 
60
  current_hidden_state = new_hidden_state
61
 
 
 
 
 
 
 
62
  total_duration = time.time() - total_start_time
63
  mean_step_time = statistics.mean(step_times) if step_times else 0
64
  stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
65
 
66
  if len(step_times) < num_steps and total_duration < timeout:
67
- verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps."
68
  elif total_duration >= timeout:
69
- verdict = f"### ⚠️ Cognitive Jamming Detected!\nThe process exceeded the timeout."
70
  else:
71
- verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting complex/chaotic dynamics."
72
 
73
  stats = {
74
  "verdict": verdict,
@@ -80,32 +102,3 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
80
  }
81
  if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
82
  return stats
83
-
84
- # --- Experiment 2: Symbolic Shock Test Runner ---
85
- def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
86
- set_seed(seed)
87
- llm = LLM(model_id=model_id, device="auto", seed=seed)
88
- results = []
89
-
90
- for stimulus in SHOCK_TEST_STIMULI:
91
- dbg(f"--- SHOCK TEST: {stimulus['id']} ---")
92
-
93
- start_time = time.time()
94
- inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
95
- with torch.no_grad():
96
- outputs = llm.model(**inputs, output_hidden_states=True)
97
- latency = (time.time() - start_time) * 1000
98
-
99
- all_activations = torch.cat([h.cpu().flatten() for h in outputs.hidden_states])
100
- sparsity = (all_activations == 0).float().mean().item()
101
-
102
- results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
103
-
104
- def safe_mean(data): return statistics.mean(data) if data else 0.0
105
-
106
- avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
107
- avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
108
-
109
- verdict = ("✅ Evidence of Symbolic Shock Found." if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) else "⚠️ No Clear Evidence.")
110
-
111
- return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}
 
1
  # bp_phi/runner.py
2
  import os
3
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 
4
  import torch
5
  import random
6
  import numpy as np
7
  import statistics
8
  import time
9
+ import json
10
  from transformers import set_seed
11
  from typing import Dict, Any
12
  from .llm_iface import LLM
13
+ from .prompts_en import RESONANCE_PROMPTS
14
+
15
+ DEBUG = 1
16
 
17
+ def dbg(*args):
18
+ if DEBUG:
19
+ print("[DEBUG]", *args, flush=True)
20
+
21
+ # --- Final Experiment: Silent Cogitation & Halting Runner ---
22
  def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
23
  set_seed(seed)
24
  llm = LLM(model_id=model_id, device="auto", seed=seed)
 
34
  total_start_time = time.time()
35
 
36
  with torch.no_grad():
37
+ # Step 0: Initial processing of the prompt
38
  step_start_time = time.time()
39
  outputs = llm.model(**inputs, output_hidden_states=True)
40
  step_times.append(time.time() - step_start_time)
 
42
  current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
43
  past_key_values = outputs.past_key_values
44
 
45
+ # Clean up initial large tensor
46
+ del outputs
47
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
48
+
49
  for i in range(num_steps - 1):
50
  if time.time() - total_start_time > timeout:
51
  dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
52
  break
53
 
54
  step_start_time = time.time()
55
+
56
+ # Predict the next token ID from the last hidden state
57
+ next_token_logits = llm.model.lm_head(current_hidden_state)
58
+ next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
59
+
60
+ # Manual forward pass using the last thought's ID as the new input
61
  outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
62
+
63
  step_times.append(time.time() - step_start_time)
64
 
65
  new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
 
75
 
76
  current_hidden_state = new_hidden_state
77
 
78
+ # ✅ Aggressive Memory Hygiene
79
+ del outputs
80
+ del new_hidden_state
81
+ if torch.cuda.is_available():
82
+ torch.cuda.empty_cache()
83
+
84
  total_duration = time.time() - total_start_time
85
  mean_step_time = statistics.mean(step_times) if step_times else 0
86
  stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
87
 
88
  if len(step_times) < num_steps and total_duration < timeout:
89
+ verdict = f"### ✅ Stable Convergence\nThe model's internal state converged to a stable point after {len(step_times)} steps."
90
  elif total_duration >= timeout:
91
+ verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout of {timeout}s."
92
  else:
93
+ verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize within {num_steps} steps, suggesting a complex or chaotic dynamic."
94
 
95
  stats = {
96
  "verdict": verdict,
 
102
  }
103
  if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
104
  return stats
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
repo.txt CHANGED
@@ -80,12 +80,15 @@ This Space implements a falsifiable **BP-Φ** probe for LLMs:
80
 
81
  [File Begins] app.py
82
  # app.py
 
 
83
  import gradio as gr
84
  import json
85
  import statistics
86
  import pandas as pd
87
- from bp_phi.runner import run_silent_cogitation_test, run_shock_test_suite
88
- from bp_phi.runner_utils import dbg, DEBUG
 
89
 
90
  # --- UI Theme and Layout ---
91
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
@@ -93,7 +96,7 @@ theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
93
  button_primary_background_fill="*primary_500", button_primary_text_color="white",
94
  )
95
 
96
- # --- Tab 1: Silent Cogitation Function ---
97
  def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
98
  progress(0, desc="Starting Silent Cogitation Test...")
99
  results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
@@ -111,44 +114,44 @@ def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout,
111
  deltas = results.get("state_deltas", [])
112
  df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
113
 
114
- if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2))
 
 
115
 
116
  return full_verdict, df, results
117
 
118
  # --- Gradio App Definition ---
119
- with gr.Blocks(theme=theme, title="BP-Φ Suite 6.0") as demo:
120
- gr.Markdown("# 🧠 BP-Φ Suite 6.0: Probing for Internal Cognitive Dynamics")
121
-
122
- with gr.Tabs():
123
- # --- TAB 1: SILENT COGITATION & HALTING ---
124
- with gr.TabItem("1. Silent Cogitation (Internal Dynamics)"):
125
- gr.Markdown("Tests for internal 'thinking' without text generation. A **non-converging** or **chaotic** State Change pattern suggests complex internal dynamics, akin to a 'train of thought'.")
126
- with gr.Row():
127
- with gr.Column(scale=1):
128
- sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
129
- sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt")
130
- sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
131
- sc_num_steps = gr.Slider(10, 1000, 200, step=10, label="Number of Internal Steps")
132
- sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
133
- sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
134
- with gr.Column(scale=2):
135
- sc_verdict = gr.Markdown("### Results will appear here.")
136
- sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True, height=250)
137
- with gr.Accordion("Raw Run Details (JSON)", open=False):
138
- sc_results = gr.JSON()
139
- sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results])
140
-
141
- # --- TAB 2: SYMBOLIC SHOCK TEST ---
142
- with gr.TabItem("2. Symbolic Shock Test (World Model)"):
143
- gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by **higher latency** and **denser neural activations**.")
144
- with gr.Row():
145
- with gr.Column(scale=1):
146
- ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
147
- ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
148
- ss_run_btn = gr.Button("Run Shock Test", variant="primary")
149
- with gr.Column(scale=2):
150
- ss_results = gr.JSON(label="Shock Test Results")
151
- ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)
152
 
153
  if __name__ == "__main__":
154
  demo.launch(server_name="0.0.0.0", server_port=7860)
@@ -163,7 +166,9 @@ if __name__ == "__main__":
163
  # bp_phi/llm_iface.py
164
  import os
165
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
166
- import torch, random, numpy as np
 
 
167
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
168
  from typing import List, Optional
169
 
@@ -178,63 +183,43 @@ class LLM:
178
  self.model_id = model_id
179
  self.seed = seed
180
 
181
- # Set all seeds for reproducibility
182
  random.seed(seed)
183
  np.random.seed(seed)
184
  torch.manual_seed(seed)
 
185
  if torch.cuda.is_available():
186
  torch.cuda.manual_seed_all(seed)
 
 
 
 
187
  try:
188
  torch.use_deterministic_algorithms(True, warn_only=True)
189
  except Exception as e:
190
  dbg(f"Could not set deterministic algorithms: {e}")
191
- set_seed(seed)
192
 
193
  token = os.environ.get("HF_TOKEN")
194
- if not token and ("gemma-3" in model_id or "llama" in model_id):
195
- print(f"[WARN] No HF_TOKEN set for gated model {model_id}. This may fail.")
196
 
197
- self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
198
  kwargs = {}
199
- if dtype == "float16": kwargs["torch_dtype"] = torch.float16
200
- elif dtype == "bfloat16": kwargs["torch_dtype"] = torch.bfloat16
 
 
201
 
 
202
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
203
  self.model.eval()
204
- self.is_instruction_tuned = hasattr(self.tokenizer, "apply_chat_template") and self.tokenizer.chat_template
205
-
206
- dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
207
-
208
- def generate_json(self, system_prompt: str, user_prompt: str,
209
- max_new_tokens: int = 256, temperature: float = 0.7,
210
- top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
211
- set_seed(self.seed)
212
 
213
- if self.is_instruction_tuned:
214
- messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
215
- prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
216
- else:
217
- prompt = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:\n"
218
 
219
- inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
220
- input_token_length = inputs.input_ids.shape[1]
221
-
222
- with torch.no_grad():
223
- out = self.model.generate(
224
- **inputs,
225
- do_sample=(temperature > 0),
226
- temperature=temperature,
227
- top_p=top_p,
228
- max_new_tokens=max_new_tokens,
229
- num_return_sequences=num_return_sequences,
230
- pad_token_id=self.tokenizer.eos_token_id
231
- )
232
-
233
- new_tokens = out[:, input_token_length:]
234
- completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
235
-
236
- dbg("Cleaned model completions:", completions)
237
- return completions
238
 
239
  [File Ends] bp_phi/llm_iface.py
240
 
@@ -335,33 +320,30 @@ RESONANCE_PROMPTS = {
335
  )
336
  }
337
 
338
- # Prompts for the Symbolic Shock Test
339
- SHOCK_TEST_STIMULI = [
340
- {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
341
- {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
342
- {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
343
- {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
344
- ]
345
-
346
  [File Ends] bp_phi/prompts_en.py
347
 
348
  [File Begins] bp_phi/runner.py
349
  # bp_phi/runner.py
350
  import os
351
- import json
352
- os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8" # Corrected config format
353
  import torch
354
  import random
355
  import numpy as np
356
  import statistics
357
  import time
 
358
  from transformers import set_seed
359
  from typing import Dict, Any
360
  from .llm_iface import LLM
361
- from .prompts_en import RESONANCE_PROMPTS, SHOCK_TEST_STIMULI
362
- from .runner_utils import dbg, DEBUG
 
 
 
 
 
363
 
364
- # --- Experiment 1: Silent Cogitation & Halting Runner ---
365
  def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
366
  set_seed(seed)
367
  llm = LLM(model_id=model_id, device="auto", seed=seed)
@@ -377,6 +359,7 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
377
  total_start_time = time.time()
378
 
379
  with torch.no_grad():
 
380
  step_start_time = time.time()
381
  outputs = llm.model(**inputs, output_hidden_states=True)
382
  step_times.append(time.time() - step_start_time)
@@ -384,14 +367,24 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
384
  current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
385
  past_key_values = outputs.past_key_values
386
 
 
 
 
 
387
  for i in range(num_steps - 1):
388
  if time.time() - total_start_time > timeout:
389
  dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
390
  break
391
 
392
  step_start_time = time.time()
393
- next_token_id = torch.argmax(outputs.logits[:, -1, :], dim=-1).unsqueeze(-1)
 
 
 
 
 
394
  outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
 
395
  step_times.append(time.time() - step_start_time)
396
 
397
  new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
@@ -407,16 +400,22 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
407
 
408
  current_hidden_state = new_hidden_state
409
 
 
 
 
 
 
 
410
  total_duration = time.time() - total_start_time
411
  mean_step_time = statistics.mean(step_times) if step_times else 0
412
  stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
413
 
414
  if len(step_times) < num_steps and total_duration < timeout:
415
- verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps."
416
  elif total_duration >= timeout:
417
- verdict = f"### ⚠️ Cognitive Jamming Detected!\nThe process exceeded the timeout."
418
  else:
419
- verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting complex/chaotic dynamics."
420
 
421
  stats = {
422
  "verdict": verdict,
@@ -429,35 +428,6 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
429
  if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
430
  return stats
431
 
432
- # --- Experiment 2: Symbolic Shock Test Runner ---
433
- def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
434
- set_seed(seed)
435
- llm = LLM(model_id=model_id, device="auto", seed=seed)
436
- results = []
437
-
438
- for stimulus in SHOCK_TEST_STIMULI:
439
- dbg(f"--- SHOCK TEST: {stimulus['id']} ---")
440
-
441
- start_time = time.time()
442
- inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
443
- with torch.no_grad():
444
- outputs = llm.model(**inputs, output_hidden_states=True)
445
- latency = (time.time() - start_time) * 1000
446
-
447
- all_activations = torch.cat([h.cpu().flatten() for h in outputs.hidden_states])
448
- sparsity = (all_activations == 0).float().mean().item()
449
-
450
- results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
451
-
452
- def safe_mean(data): return statistics.mean(data) if data else 0.0
453
-
454
- avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
455
- avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
456
-
457
- verdict = ("✅ Evidence of Symbolic Shock Found." if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) else "⚠️ No Clear Evidence.")
458
-
459
- return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}
460
-
461
  [File Ends] bp_phi/runner.py
462
 
463
  [File Begins] bp_phi/runner_utils.py
 
80
 
81
  [File Begins] app.py
82
  # app.py
83
+ import os
84
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
85
  import gradio as gr
86
  import json
87
  import statistics
88
  import pandas as pd
89
+ from bp_phi.runner import run_silent_cogitation_test
90
+
91
+ DEBUG = __import__('os').getenv("BP_PHI_DEBUG", "0") == "1"
92
 
93
  # --- UI Theme and Layout ---
94
  theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
 
96
  button_primary_background_fill="*primary_500", button_primary_text_color="white",
97
  )
98
 
99
+ # --- Main App Function ---
100
  def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
101
  progress(0, desc="Starting Silent Cogitation Test...")
102
  results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
 
114
  deltas = results.get("state_deltas", [])
115
  df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
116
 
117
+ if DEBUG:
118
+ print("\n--- FINAL GRADIO OUTPUT ---")
119
+ print(json.dumps(results, indent=2))
120
 
121
  return full_verdict, df, results
122
 
123
  # --- Gradio App Definition ---
124
+ with gr.Blocks(theme=theme, title="BP-Φ Suite 7.0") as demo:
125
+ gr.Markdown("# 🧠 BP-Φ Suite 7.0: Probing for Internal Cognitive Dynamics")
126
+ gr.Markdown(
127
+ "This experiment tests for internal 'thinking' without text generation. A **non-converging** or **chaotic** "
128
+ "State Change pattern suggests complex internal dynamics, akin to a 'train of thought'. "
129
+ "A **converging** pattern for the `resonance_prompt` indicates the model has 'solved' the paradox by finding a stable meta-state."
130
+ )
131
+
132
+ with gr.Row():
133
+ with gr.Column(scale=1):
134
+ gr.Markdown("### ⚙️ Configuration")
135
+ with gr.Group():
136
+ model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
137
+ prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="control_long_prose")
138
+ seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
139
+ num_steps = gr.Slider(10, 2000, 500, step=10, label="Number of Internal Steps")
140
+ timeout = gr.Slider(10, 600, 120, step=10, label="Timeout (seconds)")
141
+ run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
142
+
143
+ with gr.Column(scale=2):
144
+ gr.Markdown("### 📊 Results & Interpretation")
145
+ verdict_display = gr.Markdown("### Results will appear here.")
146
+ plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True, height=300)
147
+ with gr.Accordion("Raw Run Details (JSON)", open=False):
148
+ raw_json = gr.JSON()
149
+
150
+ run_btn.click(
151
+ fn=run_cogitation_and_display,
152
+ inputs=[model_id, seed, prompt_type, num_steps, timeout],
153
+ outputs=[verdict_display, plot, raw_json]
154
+ )
 
 
155
 
156
  if __name__ == "__main__":
157
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
166
  # bp_phi/llm_iface.py
167
  import os
168
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
169
+ import torch
170
+ import random
171
+ import numpy as np
172
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
173
  from typing import List, Optional
174
 
 
183
  self.model_id = model_id
184
  self.seed = seed
185
 
186
+ set_seed(seed)
187
  random.seed(seed)
188
  np.random.seed(seed)
189
  torch.manual_seed(seed)
190
+
191
  if torch.cuda.is_available():
192
  torch.cuda.manual_seed_all(seed)
193
+ if dtype is None:
194
+ dtype = "bfloat16" # Smart default for memory efficiency on CUDA
195
+ dbg(f"CUDA detected. Defaulting to dtype={dtype} for memory efficiency.")
196
+
197
  try:
198
  torch.use_deterministic_algorithms(True, warn_only=True)
199
  except Exception as e:
200
  dbg(f"Could not set deterministic algorithms: {e}")
 
201
 
202
  token = os.environ.get("HF_TOKEN")
203
+ if not token and ("gemma" in model_id or "llama" in model_id):
204
+ print(f"[WARN] No HF_TOKEN set. If the model '{model_id}' is gated, this will fail.")
205
 
 
206
  kwargs = {}
207
+ if dtype == "bfloat16":
208
+ kwargs["torch_dtype"] = torch.bfloat16
209
+ elif dtype == "float16":
210
+ kwargs["torch_dtype"] = torch.float16
211
 
212
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
213
  self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
214
  self.model.eval()
 
 
 
 
 
 
 
 
215
 
216
+ print(f"[INFO] Model '{model_id}' loaded successfully on device: {self.model.device}")
 
 
 
 
217
 
218
+ def generate_json(self, system_prompt: str, user_prompt: str, **kwargs) -> List[str]:
219
+ # This function remains for potential future use but is not used by the cogitation test.
220
+ # It's kept here for completeness.
221
+ # ... (Implementation can be added back if needed)
222
+ return [""]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  [File Ends] bp_phi/llm_iface.py
225
 
 
320
  )
321
  }
322
 
 
 
 
 
 
 
 
 
323
  [File Ends] bp_phi/prompts_en.py
324
 
325
  [File Begins] bp_phi/runner.py
326
  # bp_phi/runner.py
327
  import os
328
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 
329
  import torch
330
  import random
331
  import numpy as np
332
  import statistics
333
  import time
334
+ import json
335
  from transformers import set_seed
336
  from typing import Dict, Any
337
  from .llm_iface import LLM
338
+ from .prompts_en import RESONANCE_PROMPTS
339
+
340
+ DEBUG = 1
341
+
342
+ def dbg(*args):
343
+ if DEBUG:
344
+ print("[DEBUG]", *args, flush=True)
345
 
346
+ # --- Final Experiment: Silent Cogitation & Halting Runner ---
347
  def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
348
  set_seed(seed)
349
  llm = LLM(model_id=model_id, device="auto", seed=seed)
 
359
  total_start_time = time.time()
360
 
361
  with torch.no_grad():
362
+ # Step 0: Initial processing of the prompt
363
  step_start_time = time.time()
364
  outputs = llm.model(**inputs, output_hidden_states=True)
365
  step_times.append(time.time() - step_start_time)
 
367
  current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
368
  past_key_values = outputs.past_key_values
369
 
370
+ # Clean up initial large tensor
371
+ del outputs
372
+ if torch.cuda.is_available(): torch.cuda.empty_cache()
373
+
374
  for i in range(num_steps - 1):
375
  if time.time() - total_start_time > timeout:
376
  dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
377
  break
378
 
379
  step_start_time = time.time()
380
+
381
+ # Predict the next token ID from the last hidden state
382
+ next_token_logits = llm.model.lm_head(current_hidden_state)
383
+ next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
384
+
385
+ # Manual forward pass using the last thought's ID as the new input
386
  outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
387
+
388
  step_times.append(time.time() - step_start_time)
389
 
390
  new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
 
400
 
401
  current_hidden_state = new_hidden_state
402
 
403
+ # ✅ Aggressive Memory Hygiene
404
+ del outputs
405
+ del new_hidden_state
406
+ if torch.cuda.is_available():
407
+ torch.cuda.empty_cache()
408
+
409
  total_duration = time.time() - total_start_time
410
  mean_step_time = statistics.mean(step_times) if step_times else 0
411
  stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
412
 
413
  if len(step_times) < num_steps and total_duration < timeout:
414
+ verdict = f"### ✅ Stable Convergence\nThe model's internal state converged to a stable point after {len(step_times)} steps."
415
  elif total_duration >= timeout:
416
+ verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout of {timeout}s."
417
  else:
418
+ verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize within {num_steps} steps, suggesting a complex or chaotic dynamic."
419
 
420
  stats = {
421
  "verdict": verdict,
 
428
  if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
429
  return stats
430
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
431
  [File Ends] bp_phi/runner.py
432
 
433
  [File Begins] bp_phi/runner_utils.py