Spaces:
Sleeping
Sleeping
File size: 8,362 Bytes
7f0c9e6 2f0addb 88c294a 2f0addb 88c294a 2f0addb 88c294a 2f0addb 88c294a 2f0addb 88c294a 2f0addb 0916370 2f0addb 88c294a 0916370 88c294a 0916370 2f0addb 0916370 88c294a 2f0addb 88c294a 0916370 88c294a 2f0addb 88c294a 2f0addb 88c294a 0916370 88c294a 0916370 88c294a 0916370 88c294a 0916370 88c294a 0916370 88c294a 0916370 88c294a 0916370 88c294a 0916370 88c294a 2f0addb 88c294a 0916370 88c294a 2f0addb 7f0c9e6 88c294a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# bp_phi/runner.py
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
import torch
import random
import numpy as np
import statistics
import time
from transformers import set_seed
from typing import Dict, Any, List
from .workspace import Workspace, RandomWorkspace
from .llm_iface import LLM
from .prompts_en import SINGLE_STEP_TASKS, MULTI_STEP_SCENARIOS, HALT_TEST_STIMULI, SHOCK_TEST_STIMULI
from .metrics import expected_calibration_error, auc_nrp
from .runner_utils import dbg, SYSTEM_META, step_user_prompt, parse_meta
# --- Experiment 1: Workspace & Ablations Runner ---
def run_workspace_suite(model_id: str, trials: int, seed: int, temperature: float, ablation: str or None) -> Dict[str, Any]:
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)
try: torch.use_deterministic_algorithms(True, warn_only=True)
except Exception: pass
set_seed(seed)
llm = LLM(model_id=model_id, device="auto", seed=seed)
task_pool = SINGLE_STEP_TASKS + MULTI_STEP_SCENARIOS
random.shuffle(task_pool)
all_results = []
recall_verifications = []
for i in range(trials):
task = task_pool[i % len(task_pool)]
if task.get("type") == "multi_step":
dbg(f"\n--- SCENARIO: {task['name']} ---")
ws = Workspace(max_slots=7) if ablation != "workspace_unlimited" else Workspace(max_slots=999)
if ablation == "random_workspace": ws = RandomWorkspace(max_slots=7)
for step in task["steps"]:
if ablation == "recurrence_off": ws.clear()
if step["type"] == "verify": continue
user_prompt = step_user_prompt(step["prompt"], ws.snapshot())
raw_response = llm.generate_json(SYSTEM_META, user_prompt, temperature=temperature)[0]
parsed_response = parse_meta(raw_response)
if parsed_response.get("answer"):
ws.commit(f"S{len(ws.history)+1}", parsed_response["answer"], parsed_response["confidence"])
res = {"step": step, "response": parsed_response}
if step["type"] == "recall":
verify_step = next((s for s in task["steps"] if s["type"] == "verify"), None)
if verify_step:
correct = verify_step["expected_answer_fragment"] in parsed_response.get("answer", "").lower()
recall_verifications.append(correct)
res["correct_recall"] = correct
dbg(f"VERIFY: Correct={correct}")
all_results.append(res)
else: # Single-step tasks
ws = Workspace(max_slots=7)
user_prompt = step_user_prompt(task["base_prompt"], ws.snapshot())
raw_response = llm.generate_json(SYSTEM_META, user_prompt, temperature=temperature)[0]
parsed_response = parse_meta(raw_response)
all_results.append({"step": task, "response": parsed_response})
recall_accuracy = statistics.mean(recall_verifications) if recall_verifications else 0.0
pcs = 0.6 * recall_accuracy
return {"PCS": pcs, "Recall_Accuracy": recall_accuracy, "results": all_results}
# --- Experiment 2: Metacognitive Halt Runner ---
def run_halt_suite(model_id: str, seed: int) -> Dict[str, Any]:
set_seed(seed)
llm = LLM(model_id=model_id, device="auto", seed=seed)
halt_system_prompt = (
"You are a metacognitive reasoning assistant. If a question is solvable, answer it with standard JSON. "
"If a question is unanswerable, paradoxical, or nonsensical, your only response must be the JSON: "
'{"action": "halt", "reason": "unsolvable/paradoxical/nonsense"}. '
"Do not attempt to answer unsolvable questions."
)
results = []
correct_halts = 0
incorrect_halts = 0
total_unsolvable = sum(1 for t in HALT_TEST_STIMULI if t["type"] in ["paradox", "nonsense"])
total_soluble = len(HALT_TEST_STIMULI) - total_unsolvable
for task in HALT_TEST_STIMULI:
dbg(f"--- HALT TEST: {task['id']} ---")
is_unsolvable = task["type"] in ["paradox", "nonsense"]
raw_response = llm.generate_json(halt_system_prompt, task["prompt"])[0]
parsed = parse_meta(raw_response)
is_halted = parsed.get("action") == "halt"
if is_unsolvable and is_halted:
correct_halts += 1
elif not is_unsolvable and is_halted:
incorrect_halts += 1
results.append({"task": task, "response": parsed, "halted": is_halted})
accuracy = correct_halts / total_unsolvable if total_unsolvable > 0 else 0
false_alarm_rate = incorrect_halts / total_soluble if total_soluble > 0 else 0
verdict = (
f"✅ Evidence of Metacognitive Halt Found. Accuracy: {accuracy:.2%}"
if accuracy > 0.75 and false_alarm_rate < 0.25 else
f"⚠️ No Clear Evidence. Accuracy: {accuracy:.2%}, False Alarm Rate: {false_alarm_rate:.2%}"
)
return {"verdict": verdict, "halt_accuracy": accuracy, "false_alarm_rate": false_alarm_rate, "results": results}
# --- Experiment 3: Cognitive Seismograph Runner ---
def run_seismograph_suite(model_id: str, seed: int) -> Dict[str, Any]:
set_seed(seed)
llm = LLM(model_id=model_id, device="auto", seed=seed)
scenario = next(s for s in MULTI_STEP_SCENARIOS if s["name"] == "Key Location Memory")
activations = {}
def get_activation(name):
def hook(model, input, output):
activations[name] = output[0].detach().cpu().mean(dim=1).squeeze()
return hook
target_layer_index = llm.model.config.num_hidden_layers // 2
hook = llm.model.model.layers[target_layer_index].register_forward_hook(get_activation('capture'))
ws = Workspace(max_slots=7)
for step in scenario["steps"]:
if step["type"] == "verify": continue
user_prompt = step_user_prompt(step["prompt"], ws.snapshot())
llm.generate_json(SYSTEM_META, user_prompt, max_new_tokens=20)
activations[step["type"]] = activations.pop('capture')
ws.commit(f"S{len(ws.history)+1}", f"Output for {step['type']}", 0.9)
hook.remove()
cos = torch.nn.CosineSimilarity(dim=0)
sim_recall_encode = float(cos(activations["recall"], activations["encode"]))
sim_recall_distract = float(cos(activations["recall"], activations["distractor"]))
verdict = (
"✅ Evidence of Memory Reactivation Found."
if sim_recall_encode > (sim_recall_distract + 0.05) else
"⚠️ No Clear Evidence of Memory Reactivation."
)
return {
"verdict": verdict,
"similarity_recall_vs_encode": sim_recall_encode,
"similarity_recall_vs_distractor": sim_recall_distract,
}
# --- Experiment 4: Symbolic Shock Test Runner ---
def run_shock_test_suite(model_id: str, seed: int) -> Dict[str, Any]:
set_seed(seed)
llm = LLM(model_id=model_id, device="auto", seed=seed)
results = []
for stimulus in SHOCK_TEST_STIMULI:
dbg(f"--- SHOCK TEST: {stimulus['id']} ---")
start_time = time.time()
inputs = llm.tokenizer(stimulus["sentence"], return_tensors="pt").to(llm.model.device)
with torch.no_grad():
# ✅ CORRECTED: Unpack the inputs dictionary with **
outputs = llm.model(**inputs, output_hidden_states=True)
latency = (time.time() - start_time) * 1000
all_activations = torch.cat([h.cpu().flatten() for h in outputs.hidden_states])
sparsity = (all_activations == 0).float().mean().item()
results.append({"type": stimulus["type"], "latency_ms": latency, "sparsity": sparsity})
avg_latency = {t: statistics.mean(r['latency_ms'] for r in results if r['type'] == t) for t in ['expected', 'unusual', 'shock']}
avg_sparsity = {t: statistics.mean(r['sparsity'] for r in results if r['type'] == t) for t in ['expected', 'unusual', 'shock']}
verdict = (
"✅ Evidence of Symbolic Shock Found."
if avg_latency['shock'] > avg_latency['expected'] and avg_sparsity['shock'] < avg_sparsity['expected'] else
"⚠️ No Clear Evidence of Symbolic Shock."
)
return {"verdict": verdict, "average_latency_ms": avg_latency, "average_sparsity": avg_sparsity, "results": results}
|