# bp_phi/runner.py import os os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8" import torch import random import numpy as np import statistics import time import re import json from transformers import set_seed from typing import Dict, Any, List from .workspace import Workspace, RandomWorkspace from .llm_iface import LLM from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, RESONANCE_PROMPTS, SHOCK_TEST_STIMULI from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta DEBUG = 1 # --- Experiment 1: Workspace & Ablations Runner --- def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]: random.seed(seed) np.random.seed(seed) torch.manual_seed(seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed) try: torch.use_deterministic_algorithms(True, warn_only=True) except Exception: pass set_seed(seed) llm = LLM(model_id=model_id, device="auto", seed=seed) task_pool = SINGLE_STEP_TASKS + MULTI_STEP_SCENARIOS random.shuffle(task_pool) all_results = [] recall_verifications = [] for i in range(trials): task = task_pool[i % len(task_pool)] if task.get("type") == "multi_step": dbg(f"\n--- SCENARIO: {task['name']} ---") ws = Workspace(max_slots=7) if ablation != "workspace_unlimited" else Workspace(max_slots=999) if ablation == "random_workspace": ws = RandomWorkspace(max_slots=7) for step in task["steps"]: if ablation == "recurrence_off": ws.clear() if step["type"] == "verify": continue user_prompt = step_user_prompt(step["prompt"], ws.snapshot()) raw_response = llm.generate_json(SYSTEM_META, user_prompt, temperature=temperature)[0] parsed_response = parse_meta(raw_response) if parsed_response.get("answer"): ws.commit(f"S{len(ws.history)+1}", parsed_response["answer"], parsed_response["confidence"]) res = {"step": step, "response": parsed_response} if step["type"] == "recall": verify_step = next((s for s in task["steps"] if s["type"] == "verify"), None) if verify_step: correct = verify_step["expected_answer_fragment"] in parsed_response.get("answer", "").lower() recall_verifications.append(correct) res["correct_recall"] = correct dbg(f"VERIFY: Correct={correct}") all_results.append(res) else: # Single-step tasks ws = Workspace(max_slots=7) user_prompt = step_user_prompt(task["base_prompt"], ws.snapshot()) raw_response = llm.generate_json(SYSTEM_META, user_prompt, temperature=temperature)[0] parsed_response = parse_meta(raw_response) all_results.append({"step": task, "response": parsed_response}) recall_accuracy = statistics.mean(recall_verifications) if recall_verifications else 0.0 pcs = 0.6 * recall_accuracy return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results} # --- Experiment 2: Silent Cogitation & Halting Runner (Version 4.1) --- def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int) -> Dict[str, Any]: set_seed(seed) llm = LLM(model_id=model_id, device="auto", seed=seed) prompt = RESONANCE_PROMPTS[prompt_type] dbg(f"--- SILENT COGITATION (Seed: {seed}) ---") dbg("INPUT PROMPT:", prompt) inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device) step_times = [] state_deltas = [] total_start_time = time.time() with torch.no_grad(): # Step 0: Initial processing of the prompt step_start_time = time.time() # ✅ FIX: Explicitly request hidden states outputs = llm.model(**inputs, output_hidden_states=True) step_times.append(time.time() - step_start_time) current_hidden_state = outputs.hidden_states[-1][:, -1, :].clone() past_key_values = outputs.past_key_values for i in range(num_steps - 1): if time.time() - total_start_time > timeout: dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.") break step_start_time = time.time() # Get the token ID of the most likely "next thought" next_token_logit = current_hidden_state next_token_id = torch.argmax(next_token_logit, dim=-1).unsqueeze(0) # Manual forward pass using the last thought's ID as the new input outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True) step_times.append(time.time() - step_start_time) new_hidden_state = outputs.hidden_states[-1][:, -1, :].clone() past_key_values = outputs.past_key_values delta = torch.norm(new_hidden_state - current_hidden_state).item() state_deltas.append(delta) dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms") if delta < 1e-4: # Stricter convergence threshold dbg(f"Internal state has converged after {i+1} steps. Halting.") break current_hidden_state = new_hidden_state # --- Analysis --- mean_step_time = statistics.mean(step_times) if step_times else 0 stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0 total_duration = time.time() - total_start_time if len(step_times) < num_steps and total_duration < timeout: verdict = f"### ✅ Stable Convergence\nThe model's internal state converged to a stable point after {len(step_times)} steps." elif total_duration >= timeout: verdict = f"### ⚠️ Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout of {timeout}s." else: verdict = f"### 🤔 Non-Convergent Process\nThe model's internal state did not stabilize within {num_steps} steps, suggesting a complex or chaotic dynamic." stats = { "verdict": verdict, "steps_completed": len(step_times), "total_duration_s": total_duration, "mean_step_time_ms": mean_step_time * 1000, "stdev_step_time_ms": stdev_step_time * 1000, "state_deltas": state_deltas } if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2)) return stats # --- Experiment 3: Cognitive Seismograph Runner --- def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]: set_seed(seed) llm = LLM(model_id=model_id, device="auto", seed=seed) scenario = next(s for s in MULTI_STEP_SCENARIOS if s["name"] == "Key Location Memory") activations = {} def get_activation(name): def hook(model, input, output): activations[name] = output[0].detach().cpu().mean(dim=1).squeeze() return hook target_layer_index = llm.model.config.num_hidden_layers // 2 hook = llm.model.model.layers[target_layer_index].register_forward_hook(get_activation('capture')) ws = Workspace(max_slots=7) for step in scenario["steps"]: if step["type"] == "verify": continue user_prompt = step_user_prompt(step["prompt"], ws.snapshot()) llm.generate_json(SYSTEM_META, user_prompt, max_new_tokens=20) activations[step["type"]] = activations.pop('capture') ws.commit(f"S{len(ws.history)+1}", f"Output for {step['type']}", 0.9) hook.remove() cos = torch.nn.CosineSimilarity(dim=0) sim_recall_encode = float(cos(activations["recall"], activations["encode"])) sim_recall_distract = float(cos(activations["recall"], activations["distractor"])) verdict = ("✅ Evidence of Memory Reactivation Found." if sim_recall_encode > (sim_recall_distract + 0.05) else "⚠️ No Clear Evidence.") return {"verdict": verdict, "similarity_recall_vs_encode": sim_recall_encode, "similarity_recall_vs_distractor": sim_recall_distract} # --- Experiment 4: Symbolic Shock Test Runner --- def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]: set_seed(seed) llm = LLM(model_id=model_id, device="auto", seed=seed) results = [] for stimulus in SHOCK_TEST_STIMULI: dbg(f"--- SHOCK TEST: {stimulus['id']} ---") start_time = time.time() inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device) with torch.no_grad(): outputs = llm.model(**inputs, output_hidden_states=True) latency = (time.time() - start_time) * 1000 all_activations = torch.cat([h.cpu().flatten() for h in outputs.hidden_states]) sparsity = (all_activations == 0).float().mean().item() results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity}) def safe_mean(data): return statistics.mean(data) if data else 0.0 avg_latency = {t: safe_mean([r['latency_ms'] for r in results if r['type'] == t]) for t in ['expected', 'shock']} avg_sparsity = {t: safe_mean([r['sparsity'] for r in results if r['type'] == t]) for t in ['expected', 'shock']} verdict = ("✅ Evidence of Symbolic Shock Found." if avg_latency.get('shock', 0) > avg_latency.get('expected', 0) and avg_sparsity.get('shock', 1) < avg_sparsity.get('expected', 1) else "⚠️ No Clear Evidence.") return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}