llm_qualia_2 / bp_phi /runner.py
neuralworm's picture
9.0
1022ef8
# bp_phi/runner.py
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8"
import torch
import random
import numpy as np
import statistics
import time
import json
from transformers import set_seed
from typing import Dict, Any
from .llm_iface import LLM
from .prompts_en import RESONANCE_PROMPTS
from .runner_utils import dbg, DEBUG
# --- Global Model Cache ---
CACHED_MODELS: Dict[str, LLM] = {}
def get_or_load_model(model_id: str, seed: int) -> LLM:
if model_id not in CACHED_MODELS:
dbg(f"Model '{model_id}' not in cache. Loading now...")
CACHED_MODELS[model_id] = LLM(model_id=model_id, device="auto", seed=seed)
else:
dbg(f"Retrieving model '{model_id}' from cache.")
llm = CACHED_MODELS[model_id]
set_seed(seed)
llm.seed = seed
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
return llm
# --- Experiment 1: Silent Cogitation & Halting Runner (Version 9.0) ---
def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int, temperature: float) -> Dict[str, Any]:
llm = get_or_load_model(model_id, seed)
prompt = RESONANCE_PROMPTS[prompt_type]
dbg(f"--- SILENT COGITATION (Seed: {seed}, Temp: {temperature}) ---")
dbg("INPUT PROMPT:", prompt)
inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
step_times = []
state_deltas = []
total_start_time = time.time()
with torch.no_grad():
step_start_time = time.time()
outputs = llm.model(**inputs, output_hidden_states=True)
step_times.append(time.time() - step_start_time)
current_hidden_state = outputs.hidden_states[-1][:, -1, :]
past_key_values = outputs.past_key_values
del outputs
if torch.cuda.is_available(): torch.cuda.empty_cache()
for i in range(num_steps - 1):
if time.time() - total_start_time > timeout:
dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
break
step_start_time = time.time()
# Get logits from the last hidden state
next_token_logits = llm.model.lm_head(current_hidden_state)
# ✅ FIX: Apply temperature and use stochastic sampling instead of argmax
if temperature > 0:
scaled_logits = next_token_logits / temperature
probabilities = torch.nn.functional.softmax(scaled_logits, dim=-1)
next_token_id = torch.multinomial(probabilities, num_samples=1)
else: # Temperature of 0 means deterministic argmax
next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
step_times.append(time.time() - step_start_time)
new_hidden_state = outputs.hidden_states[-1][:, -1, :]
past_key_values = outputs.past_key_values
delta = torch.norm(new_hidden_state - current_hidden_state).item()
state_deltas.append(delta)
dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms")
if delta < 1e-4:
dbg(f"Internal state has converged after {i+1} steps. Halting.")
break
current_hidden_state = new_hidden_state.clone()
del outputs, new_hidden_state
if torch.cuda.is_available():
torch.cuda.empty_cache()
total_duration = time.time() - total_start_time
mean_step_time = statistics.mean(step_times) if step_times else 0
stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
if len(step_times) < num_steps and total_duration < timeout:
verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps."
elif total_duration >= timeout:
verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout."
else:
verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting a complex or chaotic dynamic."
stats = {
"verdict": verdict, "steps_completed": len(step_times), "total_duration_s": total_duration,
"mean_step_time_ms": mean_step_time * 1000, "stdev_step_time_ms": stdev_step_time * 1000,
"state_deltas": state_deltas
}
if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
return stats