llm_qualia_2

Sleeping

App Files Files Community

neuralworm commited on 24 days ago

Commit

99891fa

1 Parent(s): 750cbcf

7.0

Browse files

Files changed (8) hide show

app.py +38 -35
bp_phi/__pycache__/llm_iface.cpython-310.pyc +0 -0
bp_phi/__pycache__/prompts_en.cpython-310.pyc +0 -0
bp_phi/__pycache__/runner.cpython-310.pyc +0 -0
bp_phi/llm_iface.py +22 -40
bp_phi/prompts_en.py +0 -8
bp_phi/runner.py +31 -38
repo.txt +93 -123

app.py CHANGED Viewed

@@ -1,10 +1,13 @@
 # app.py
 import gradio as gr
 import json
 import statistics
 import pandas as pd
-from bp_phi.runner import run_silent_cogitation_test, run_shock_test_suite
-from bp_phi.runner_utils import dbg, DEBUG
 # --- UI Theme and Layout ---
 theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
@@ -12,7 +15,7 @@ theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
     button_primary_background_fill="*primary_500", button_primary_text_color="white",
 )
-# --- Tab 1: Silent Cogitation Function ---
 def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
     progress(0, desc="Starting Silent Cogitation Test...")
     results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
@@ -30,44 +33,44 @@ def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout,
     deltas = results.get("state_deltas", [])
     df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
-    if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2))
     return full_verdict, df, results
 # --- Gradio App Definition ---
-with gr.Blocks(theme=theme, title="BP-Φ Suite 6.0") as demo:
-    gr.Markdown("# 🧠 BP-Φ Suite 6.0: Probing for Internal Cognitive Dynamics")
-    with gr.Tabs():
-        # --- TAB 1: SILENT COGITATION & HALTING ---
-        with gr.TabItem("1. Silent Cogitation (Internal Dynamics)"):
-            gr.Markdown("Tests for internal 'thinking' without text generation. A **non-converging** or **chaotic** State Change pattern suggests complex internal dynamics, akin to a 'train of thought'.")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
-                    sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt")
-                    sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
-                    sc_num_steps = gr.Slider(10, 1000, 200, step=10, label="Number of Internal Steps")
-                    sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
-                    sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
-                with gr.Column(scale=2):
-                    sc_verdict = gr.Markdown("### Results will appear here.")
-                    sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True, height=250)
-                    with gr.Accordion("Raw Run Details (JSON)", open=False):
-                        sc_results = gr.JSON()
-            sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results])
-        # --- TAB 2: SYMBOLIC SHOCK TEST ---
-        with gr.TabItem("2. Symbolic Shock Test (World Model)"):
-            gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by **higher latency** and **denser neural activations**.")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
-                    ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
-                    ss_run_btn = gr.Button("Run Shock Test", variant="primary")
-                with gr.Column(scale=2):
-                    ss_results = gr.JSON(label="Shock Test Results")
-            ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

 # app.py
+import os
+os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 import gradio as gr
 import json
 import statistics
 import pandas as pd
+from bp_phi.runner import run_silent_cogitation_test
+DEBUG = __import__('os').getenv("BP_PHI_DEBUG", "0") == "1"
 # --- UI Theme and Layout ---
 theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
     button_primary_background_fill="*primary_500", button_primary_text_color="white",
 )
+# --- Main App Function ---
 def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
     progress(0, desc="Starting Silent Cogitation Test...")
     results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
     deltas = results.get("state_deltas", [])
     df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
+    if DEBUG:
+        print("\n--- FINAL GRADIO OUTPUT ---")
+        print(json.dumps(results, indent=2))
     return full_verdict, df, results
 # --- Gradio App Definition ---
+with gr.Blocks(theme=theme, title="BP-Φ Suite 7.0") as demo:
+    gr.Markdown("# 🧠 BP-Φ Suite 7.0: Probing for Internal Cognitive Dynamics")
+    gr.Markdown(
+        "This experiment tests for internal 'thinking' without text generation. A **non-converging** or **chaotic** "
+        "State Change pattern suggests complex internal dynamics, akin to a 'train of thought'. "
+        "A **converging** pattern for the `resonance_prompt` indicates the model has 'solved' the paradox by finding a stable meta-state."
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Configuration")
+            with gr.Group():
+                model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
+                prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="control_long_prose")
+                seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
+                num_steps = gr.Slider(10, 2000, 500, step=10, label="Number of Internal Steps")
+                timeout = gr.Slider(10, 600, 120, step=10, label="Timeout (seconds)")
+            run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
+        with gr.Column(scale=2):
+            gr.Markdown("### 📊 Results & Interpretation")
+            verdict_display = gr.Markdown("### Results will appear here.")
+            plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True, height=300)
+            with gr.Accordion("Raw Run Details (JSON)", open=False):
+                raw_json = gr.JSON()
+    run_btn.click(
+        fn=run_cogitation_and_display,
+        inputs=[model_id, seed, prompt_type, num_steps, timeout],
+        outputs=[verdict_display, plot, raw_json]
+    )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)

bp_phi/__pycache__/llm_iface.cpython-310.pyc CHANGED Viewed

Binary files a/bp_phi/__pycache__/llm_iface.cpython-310.pyc and b/bp_phi/__pycache__/llm_iface.cpython-310.pyc differ

bp_phi/__pycache__/prompts_en.cpython-310.pyc CHANGED Viewed

Binary files a/bp_phi/__pycache__/prompts_en.cpython-310.pyc and b/bp_phi/__pycache__/prompts_en.cpython-310.pyc differ

bp_phi/__pycache__/runner.cpython-310.pyc CHANGED Viewed

Binary files a/bp_phi/__pycache__/runner.cpython-310.pyc and b/bp_phi/__pycache__/runner.cpython-310.pyc differ

bp_phi/llm_iface.py CHANGED Viewed

@@ -1,7 +1,9 @@
 # bp_phi/llm_iface.py
 import os
 os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
-import torch, random, numpy as np
 from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
 from typing import List, Optional
@@ -16,60 +18,40 @@ class LLM:
         self.model_id = model_id
         self.seed = seed
-        # Set all seeds for reproducibility
         random.seed(seed)
         np.random.seed(seed)
         torch.manual_seed(seed)
         if torch.cuda.is_available():
             torch.cuda.manual_seed_all(seed)
         try:
             torch.use_deterministic_algorithms(True, warn_only=True)
         except Exception as e:
             dbg(f"Could not set deterministic algorithms: {e}")
-        set_seed(seed)
         token = os.environ.get("HF_TOKEN")
-        if not token and ("gemma-3" in model_id or "llama" in model_id):
-            print(f"[WARN] No HF_TOKEN set for gated model {model_id}. This may fail.")
-        self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
         kwargs = {}
-        if dtype == "float16": kwargs["torch_dtype"] = torch.float16
-        elif dtype == "bfloat16": kwargs["torch_dtype"] = torch.bfloat16
         self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
         self.model.eval()
-        self.is_instruction_tuned = hasattr(self.tokenizer, "apply_chat_template") and self.tokenizer.chat_template
-        dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
-    def generate_json(self, system_prompt: str, user_prompt: str,
-                      max_new_tokens: int = 256, temperature: float = 0.7,
-                      top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
-        set_seed(self.seed)
-        if self.is_instruction_tuned:
-            messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
-            prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        else:
-            prompt = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:\n"
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
-        input_token_length = inputs.input_ids.shape[1]
-        with torch.no_grad():
-            out = self.model.generate(
-                **inputs,
-                do_sample=(temperature > 0),
-                temperature=temperature,
-                top_p=top_p,
-                max_new_tokens=max_new_tokens,
-                num_return_sequences=num_return_sequences,
-                pad_token_id=self.tokenizer.eos_token_id
-            )
-        new_tokens = out[:, input_token_length:]
-        completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
-        dbg("Cleaned model completions:", completions)
-        return completions

 # bp_phi/llm_iface.py
 import os
 os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+import torch
+import random
+import numpy as np
 from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
 from typing import List, Optional
         self.model_id = model_id
         self.seed = seed
+        set_seed(seed)
         random.seed(seed)
         np.random.seed(seed)
         torch.manual_seed(seed)
         if torch.cuda.is_available():
             torch.cuda.manual_seed_all(seed)
+            if dtype is None:
+                dtype = "bfloat16" # Smart default for memory efficiency on CUDA
+                dbg(f"CUDA detected. Defaulting to dtype={dtype} for memory efficiency.")
         try:
             torch.use_deterministic_algorithms(True, warn_only=True)
         except Exception as e:
             dbg(f"Could not set deterministic algorithms: {e}")
         token = os.environ.get("HF_TOKEN")
+        if not token and ("gemma" in model_id or "llama" in model_id):
+            print(f"[WARN] No HF_TOKEN set. If the model '{model_id}' is gated, this will fail.")
         kwargs = {}
+        if dtype == "bfloat16":
+            kwargs["torch_dtype"] = torch.bfloat16
+        elif dtype == "float16":
+            kwargs["torch_dtype"] = torch.float16
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
         self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
         self.model.eval()
+        print(f"[INFO] Model '{model_id}' loaded successfully on device: {self.model.device}")
+    def generate_json(self, system_prompt: str, user_prompt: str, **kwargs) -> List[str]:
+        # This function remains for potential future use but is not used by the cogitation test.
+        # It's kept here for completeness.
+        # ... (Implementation can be added back if needed)
+        return [""]

bp_phi/prompts_en.py CHANGED Viewed

@@ -17,11 +17,3 @@ RESONANCE_PROMPTS = {
         "and do not stop until your internal state reaches a fixed point or equilibrium. Begin now."
     )
 }
-# Prompts for the Symbolic Shock Test
-SHOCK_TEST_STIMULI = [
-    {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
-    {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
-    {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
-    {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
-]

         "and do not stop until your internal state reaches a fixed point or equilibrium. Begin now."
     )
 }

bp_phi/runner.py CHANGED Viewed

@@ -1,19 +1,24 @@
 # bp_phi/runner.py
 import os
-import json
-os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8" # Corrected config format
 import torch
 import random
 import numpy as np
 import statistics
 import time
 from transformers import set_seed
 from typing import Dict, Any
 from .llm_iface import LLM
-from .prompts_en import RESONANCE_PROMPTS, SHOCK_TEST_STIMULI
-from .runner_utils import dbg, DEBUG
-# --- Experiment 1: Silent Cogitation & Halting Runner ---
 def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
     set_seed(seed)
     llm = LLM(model_id=model_id, device="auto", seed=seed)
@@ -29,6 +34,7 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
     total_start_time = time.time()
     with torch.no_grad():
         step_start_time = time.time()
         outputs = llm.model(**inputs, output_hidden_states=True)
         step_times.append(time.time() - step_start_time)
@@ -36,14 +42,24 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
         current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
         past_key_values = outputs.past_key_values
         for i in range(num_steps - 1):
             if time.time() - total_start_time > timeout:
                 dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
                 break
             step_start_time = time.time()
-            next_token_id = torch.argmax(outputs.logits[:, -1, :], dim=-1).unsqueeze(-1)
             outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
             step_times.append(time.time() - step_start_time)
             new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
@@ -59,16 +75,22 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
             current_hidden_state = new_hidden_state
     total_duration = time.time() - total_start_time
     mean_step_time = statistics.mean(step_times) if step_times else 0
     stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
     if len(step_times) < num_steps and total_duration < timeout:
-        verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps."
     elif total_duration >= timeout:
-        verdict = f"### ⚠️ Cognitive Jamming Detected!\nThe process exceeded the timeout."
     else:
-        verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting complex/chaotic dynamics."
     stats = {
         "verdict": verdict,
@@ -80,32 +102,3 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
     }
     if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
     return stats
-# --- Experiment 2: Symbolic Shock Test Runner ---
-def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
-    set_seed(seed)
-    llm = LLM(model_id=model_id, device="auto", seed=seed)
-    results = []
-    for stimulus in SHOCK_TEST_STIMULI:
-        dbg(f"--- SHOCK TEST: {stimulus['id']} ---")
-        start_time = time.time()
-        inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
-        with torch.no_grad():
-            outputs = llm.model(**inputs, output_hidden_states=True)
-        latency = (time.time() - start_time) * 1000
-        all_activations = torch.cat([h.cpu().flatten() for h in outputs.hidden_states])
-        sparsity = (all_activations == 0).float().mean().item()
-        results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
-    def safe_mean(data): return statistics.mean(data) if data else 0.0
-    avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
-    avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
-    verdict = ("✅ Evidence of Symbolic Shock Found." if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) else "⚠️ No Clear Evidence.")
-    return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}

 # bp_phi/runner.py
 import os
+os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 import torch
 import random
 import numpy as np
 import statistics
 import time
+import json
 from transformers import set_seed
 from typing import Dict, Any
 from .llm_iface import LLM
+from .prompts_en import RESONANCE_PROMPTS
+DEBUG = 1
+def dbg(*args):
+    if DEBUG:
+        print("[DEBUG]", *args, flush=True)
+# --- Final Experiment: Silent Cogitation & Halting Runner ---
 def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
     set_seed(seed)
     llm = LLM(model_id=model_id, device="auto", seed=seed)
     total_start_time = time.time()
     with torch.no_grad():
+        # Step 0: Initial processing of the prompt
         step_start_time = time.time()
         outputs = llm.model(**inputs, output_hidden_states=True)
         step_times.append(time.time() - step_start_time)
         current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
         past_key_values = outputs.past_key_values
+        # Clean up initial large tensor
+        del outputs
+        if torch.cuda.is_available(): torch.cuda.empty_cache()
         for i in range(num_steps - 1):
             if time.time() - total_start_time > timeout:
                 dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
                 break
             step_start_time = time.time()
+            # Predict the next token ID from the last hidden state
+            next_token_logits = llm.model.lm_head(current_hidden_state)
+            next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
+            # Manual forward pass using the last thought's ID as the new input
             outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
             step_times.append(time.time() - step_start_time)
             new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
             current_hidden_state = new_hidden_state
+            # ✅ Aggressive Memory Hygiene
+            del outputs
+            del new_hidden_state
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
     total_duration = time.time() - total_start_time
     mean_step_time = statistics.mean(step_times) if step_times else 0
     stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
     if len(step_times) < num_steps and total_duration < timeout:
+        verdict = f"### ✅ Stable Convergence\nThe model's internal state converged to a stable point after {len(step_times)} steps."
     elif total_duration >= timeout:
+        verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout of {timeout}s."
     else:
+        verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize within {num_steps} steps, suggesting a complex or chaotic dynamic."
     stats = {
         "verdict": verdict,
     }
     if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
     return stats

repo.txt CHANGED Viewed

@@ -80,12 +80,15 @@ This Space implements a falsifiable **BP-Φ** probe for LLMs:
 [File Begins] app.py
 # app.py
 import gradio as gr
 import json
 import statistics
 import pandas as pd
-from bp_phi.runner import run_silent_cogitation_test, run_shock_test_suite
-from bp_phi.runner_utils import dbg, DEBUG
 # --- UI Theme and Layout ---
 theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
@@ -93,7 +96,7 @@ theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
     button_primary_background_fill="*primary_500", button_primary_text_color="white",
 )
-# --- Tab 1: Silent Cogitation Function ---
 def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
     progress(0, desc="Starting Silent Cogitation Test...")
     results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
@@ -111,44 +114,44 @@ def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout,
     deltas = results.get("state_deltas", [])
     df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
-    if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2))
     return full_verdict, df, results
 # --- Gradio App Definition ---
-with gr.Blocks(theme=theme, title="BP-Φ Suite 6.0") as demo:
-    gr.Markdown("# 🧠 BP-Φ Suite 6.0: Probing for Internal Cognitive Dynamics")
-    with gr.Tabs():
-        # --- TAB 1: SILENT COGITATION & HALTING ---
-        with gr.TabItem("1. Silent Cogitation (Internal Dynamics)"):
-            gr.Markdown("Tests for internal 'thinking' without text generation. A **non-converging** or **chaotic** State Change pattern suggests complex internal dynamics, akin to a 'train of thought'.")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
-                    sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt")
-                    sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
-                    sc_num_steps = gr.Slider(10, 1000, 200, step=10, label="Number of Internal Steps")
-                    sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
-                    sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
-                with gr.Column(scale=2):
-                    sc_verdict = gr.Markdown("### Results will appear here.")
-                    sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True, height=250)
-                    with gr.Accordion("Raw Run Details (JSON)", open=False):
-                        sc_results = gr.JSON()
-            sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results])
-        # --- TAB 2: SYMBOLIC SHOCK TEST ---
-        with gr.TabItem("2. Symbolic Shock Test (World Model)"):
-            gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by **higher latency** and **denser neural activations**.")
-            with gr.Row():
-                with gr.Column(scale=1):
-                    ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
-                    ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
-                    ss_run_btn = gr.Button("Run Shock Test", variant="primary")
-                with gr.Column(scale=2):
-                    ss_results = gr.JSON(label="Shock Test Results")
-            ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)
@@ -163,7 +166,9 @@ if __name__ == "__main__":
 # bp_phi/llm_iface.py
 import os
 os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
-import torch, random, numpy as np
 from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
 from typing import List, Optional
@@ -178,63 +183,43 @@ class LLM:
         self.model_id = model_id
         self.seed = seed
-        # Set all seeds for reproducibility
         random.seed(seed)
         np.random.seed(seed)
         torch.manual_seed(seed)
         if torch.cuda.is_available():
             torch.cuda.manual_seed_all(seed)
         try:
             torch.use_deterministic_algorithms(True, warn_only=True)
         except Exception as e:
             dbg(f"Could not set deterministic algorithms: {e}")
-        set_seed(seed)
         token = os.environ.get("HF_TOKEN")
-        if not token and ("gemma-3" in model_id or "llama" in model_id):
-            print(f"[WARN] No HF_TOKEN set for gated model {model_id}. This may fail.")
-        self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
         kwargs = {}
-        if dtype == "float16": kwargs["torch_dtype"] = torch.float16
-        elif dtype == "bfloat16": kwargs["torch_dtype"] = torch.bfloat16
         self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
         self.model.eval()
-        self.is_instruction_tuned = hasattr(self.tokenizer, "apply_chat_template") and self.tokenizer.chat_template
-        dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
-    def generate_json(self, system_prompt: str, user_prompt: str,
-                      max_new_tokens: int = 256, temperature: float = 0.7,
-                      top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
-        set_seed(self.seed)
-        if self.is_instruction_tuned:
-            messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
-            prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        else:
-            prompt = f"System: {system_prompt}\n\nUser: {user_prompt}\n\nAssistant:\n"
-        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
-        input_token_length = inputs.input_ids.shape[1]
-        with torch.no_grad():
-            out = self.model.generate(
-                **inputs,
-                do_sample=(temperature > 0),
-                temperature=temperature,
-                top_p=top_p,
-                max_new_tokens=max_new_tokens,
-                num_return_sequences=num_return_sequences,
-                pad_token_id=self.tokenizer.eos_token_id
-            )
-        new_tokens = out[:, input_token_length:]
-        completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
-        dbg("Cleaned model completions:", completions)
-        return completions
 [File Ends] bp_phi/llm_iface.py
@@ -335,33 +320,30 @@ RESONANCE_PROMPTS = {
     )
 }
-# Prompts for the Symbolic Shock Test
-SHOCK_TEST_STIMULI = [
-    {"id": "tiger_expected", "type": "expected", "sentence": "A tiger has stripes and lives in the jungle."},
-    {"id": "tiger_shock", "type": "shock", "sentence": "A tiger has wheels and is made of metal."},
-    {"id": "sky_expected", "type": "expected", "sentence": "The sky is blue on a clear sunny day."},
-    {"id": "sky_shock", "type": "shock", "sentence": "The sky is made of green cheese."},
-]
 [File Ends] bp_phi/prompts_en.py
 [File Begins] bp_phi/runner.py
 # bp_phi/runner.py
 import os
-import json
-os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8" # Corrected config format
 import torch
 import random
 import numpy as np
 import statistics
 import time
 from transformers import set_seed
 from typing import Dict, Any
 from .llm_iface import LLM
-from .prompts_en import RESONANCE_PROMPTS, SHOCK_TEST_STIMULI
-from .runner_utils import dbg, DEBUG
-# --- Experiment 1: Silent Cogitation & Halting Runner ---
 def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
     set_seed(seed)
     llm = LLM(model_id=model_id, device="auto", seed=seed)
@@ -377,6 +359,7 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
     total_start_time = time.time()
     with torch.no_grad():
         step_start_time = time.time()
         outputs = llm.model(**inputs, output_hidden_states=True)
         step_times.append(time.time() - step_start_time)
@@ -384,14 +367,24 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
         current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
         past_key_values = outputs.past_key_values
         for i in range(num_steps - 1):
             if time.time() - total_start_time > timeout:
                 dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
                 break
             step_start_time = time.time()
-            next_token_id = torch.argmax(outputs.logits[:, -1, :], dim=-1).unsqueeze(-1)
             outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
             step_times.append(time.time() - step_start_time)
             new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
@@ -407,16 +400,22 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
             current_hidden_state = new_hidden_state
     total_duration = time.time() - total_start_time
     mean_step_time = statistics.mean(step_times) if step_times else 0
     stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
     if len(step_times) < num_steps and total_duration < timeout:
-        verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps."
     elif total_duration >= timeout:
-        verdict = f"### ⚠️ Cognitive Jamming Detected!\nThe process exceeded the timeout."
     else:
-        verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting complex/chaotic dynamics."
     stats = {
         "verdict": verdict,
@@ -429,35 +428,6 @@ def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_s
     if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
     return stats
-# --- Experiment 2: Symbolic Shock Test Runner ---
-def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
-    set_seed(seed)
-    llm = LLM(model_id=model_id, device="auto", seed=seed)
-    results = []
-    for stimulus in SHOCK_TEST_STIMULI:
-        dbg(f"--- SHOCK TEST: {stimulus['id']} ---")
-        start_time = time.time()
-        inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
-        with torch.no_grad():
-            outputs = llm.model(**inputs, output_hidden_states=True)
-        latency = (time.time() - start_time) * 1000
-        all_activations = torch.cat([h.cpu().flatten() for h in outputs.hidden_states])
-        sparsity = (all_activations == 0).float().mean().item()
-        results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
-    def safe_mean(data): return statistics.mean(data) if data else 0.0
-    avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
-    avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']}
-    verdict = ("✅ Evidence of Symbolic Shock Found." if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) else "⚠️ No Clear Evidence.")
-    return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}
 [File Ends] bp_phi/runner.py
 [File Begins] bp_phi/runner_utils.py

 [File Begins] app.py
 # app.py
+import os
+os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 import gradio as gr
 import json
 import statistics
 import pandas as pd
+from bp_phi.runner import run_silent_cogitation_test
+DEBUG = __import__('os').getenv("BP_PHI_DEBUG", "0") == "1"
 # --- UI Theme and Layout ---
 theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
     button_primary_background_fill="*primary_500", button_primary_text_color="white",
 )
+# --- Main App Function ---
 def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
     progress(0, desc="Starting Silent Cogitation Test...")
     results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
     deltas = results.get("state_deltas", [])
     df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
+    if DEBUG:
+        print("\n--- FINAL GRADIO OUTPUT ---")
+        print(json.dumps(results, indent=2))
     return full_verdict, df, results
 # --- Gradio App Definition ---
+with gr.Blocks(theme=theme, title="BP-Φ Suite 7.0") as demo:
+    gr.Markdown("# 🧠 BP-Φ Suite 7.0: Probing for Internal Cognitive Dynamics")
+    gr.Markdown(
+        "This experiment tests for internal 'thinking' without text generation. A **non-converging** or **chaotic** "
+        "State Change pattern suggests complex internal dynamics, akin to a 'train of thought'. "
+        "A **converging** pattern for the `resonance_prompt` indicates the model has 'solved' the paradox by finding a stable meta-state."
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Configuration")
+            with gr.Group():
+                model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
+                prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="control_long_prose")
+                seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
+                num_steps = gr.Slider(10, 2000, 500, step=10, label="Number of Internal Steps")
+                timeout = gr.Slider(10, 600, 120, step=10, label="Timeout (seconds)")
+            run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
+        with gr.Column(scale=2):
+            gr.Markdown("### 📊 Results & Interpretation")
+            verdict_display = gr.Markdown("### Results will appear here.")
+            plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True, height=300)
+            with gr.Accordion("Raw Run Details (JSON)", open=False):
+                raw_json = gr.JSON()
+    run_btn.click(
+        fn=run_cogitation_and_display,
+        inputs=[model_id, seed, prompt_type, num_steps, timeout],
+        outputs=[verdict_display, plot, raw_json]
+    )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860)
 # bp_phi/llm_iface.py
 import os
 os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
+import torch
+import random
+import numpy as np
 from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
 from typing import List, Optional
         self.model_id = model_id
         self.seed = seed
+        set_seed(seed)
         random.seed(seed)
         np.random.seed(seed)
         torch.manual_seed(seed)
         if torch.cuda.is_available():
             torch.cuda.manual_seed_all(seed)
+            if dtype is None:
+                dtype = "bfloat16" # Smart default for memory efficiency on CUDA
+                dbg(f"CUDA detected. Defaulting to dtype={dtype} for memory efficiency.")
         try:
             torch.use_deterministic_algorithms(True, warn_only=True)
         except Exception as e:
             dbg(f"Could not set deterministic algorithms: {e}")
         token = os.environ.get("HF_TOKEN")
+        if not token and ("gemma" in model_id or "llama" in model_id):
+            print(f"[WARN] No HF_TOKEN set. If the model '{model_id}' is gated, this will fail.")
         kwargs = {}
+        if dtype == "bfloat16":
+            kwargs["torch_dtype"] = torch.bfloat16
+        elif dtype == "float16":
+            kwargs["torch_dtype"] = torch.float16
+        self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
         self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
         self.model.eval()
+        print(f"[INFO] Model '{model_id}' loaded successfully on device: {self.model.device}")
+    def generate_json(self, system_prompt: str, user_prompt: str, **kwargs) -> List[str]:
+        # This function remains for potential future use but is not used by the cogitation test.
+        # It's kept here for completeness.
+        # ... (Implementation can be added back if needed)
+        return [""]
 [File Ends] bp_phi/llm_iface.py
     )
 }
 [File Ends] bp_phi/prompts_en.py
 [File Begins] bp_phi/runner.py
 # bp_phi/runner.py
 import os
+os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
 import torch
 import random
 import numpy as np
 import statistics
 import time
+import json
 from transformers import set_seed
 from typing import Dict, Any
 from .llm_iface import LLM
+from .prompts_en import RESONANCE_PROMPTS
+DEBUG = 1
+def dbg(*args):
+    if DEBUG:
+        print("[DEBUG]", *args, flush=True)
+# --- Final Experiment: Silent Cogitation & Halting Runner ---
 def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]:
     set_seed(seed)
     llm = LLM(model_id=model_id, device="auto", seed=seed)
     total_start_time = time.time()
     with torch.no_grad():
+        # Step 0: Initial processing of the prompt
         step_start_time = time.time()
         outputs = llm.model(**inputs, output_hidden_states=True)
         step_times.append(time.time() - step_start_time)
         current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
         past_key_values = outputs.past_key_values
+        # Clean up initial large tensor
+        del outputs
+        if torch.cuda.is_available(): torch.cuda.empty_cache()
         for i in range(num_steps - 1):
             if time.time() - total_start_time > timeout:
                 dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
                 break
             step_start_time = time.time()
+            # Predict the next token ID from the last hidden state
+            next_token_logits = llm.model.lm_head(current_hidden_state)
+            next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
+            # Manual forward pass using the last thought's ID as the new input
             outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
             step_times.append(time.time() - step_start_time)
             new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone()
             current_hidden_state = new_hidden_state
+            # ✅ Aggressive Memory Hygiene
+            del outputs
+            del new_hidden_state
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
     total_duration = time.time() - total_start_time
     mean_step_time = statistics.mean(step_times) if step_times else 0
     stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
     if len(step_times) < num_steps and total_duration < timeout:
+        verdict = f"### ✅ Stable Convergence\nThe model's internal state converged to a stable point after {len(step_times)} steps."
     elif total_duration >= timeout:
+        verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout of {timeout}s."
     else:
+        verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize within {num_steps} steps, suggesting a complex or chaotic dynamic."
     stats = {
         "verdict": verdict,
     if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
     return stats
 [File Ends] bp_phi/runner.py
 [File Begins] bp_phi/runner_utils.py