Spaces:
Sleeping
Sleeping
File size: 4,653 Bytes
7f0c9e6 1022ef8 88c294a 4af23c4 99891fa b170ba4 4af23c4 2f0addb 99891fa 88282fb 99891fa 88282fb 99891fa 1022ef8 88282fb 88c294a 4af23c4 1022ef8 4af23c4 1022ef8 4af23c4 99891fa 4af23c4 99891fa 1022ef8 99891fa 1022ef8 99891fa 1022ef8 4af23c4 1022ef8 4af23c4 1022ef8 4af23c4 1022ef8 99891fa 4af23c4 1022ef8 4af23c4 1022ef8 4af23c4 1022ef8 4af23c4 1022ef8 4af23c4 afe4fe4 4af23c4 1022ef8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# bp_phi/runner.py
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8"
import torch
import random
import numpy as np
import statistics
import time
import json
from transformers import set_seed
from typing import Dict, Any
from .llm_iface import LLM
from .prompts_en import RESONANCE_PROMPTS
from .runner_utils import dbg, DEBUG
# --- Global Model Cache ---
CACHED_MODELS: Dict[str, LLM] = {}
def get_or_load_model(model_id: str, seed: int) -> LLM:
if model_id not in CACHED_MODELS:
dbg(f"Model '{model_id}' not in cache. Loading now...")
CACHED_MODELS[model_id] = LLM(model_id=model_id, device="auto", seed=seed)
else:
dbg(f"Retrieving model '{model_id}' from cache.")
llm = CACHED_MODELS[model_id]
set_seed(seed)
llm.seed = seed
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
return llm
# --- Experiment 1: Silent Cogitation & Halting Runner (Version 9.0) ---
def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int, temperature: float) -> Dict[str, Any]:
llm = get_or_load_model(model_id, seed)
prompt = RESONANCE_PROMPTS[prompt_type]
dbg(f"--- SILENT COGITATION (Seed: {seed}, Temp: {temperature}) ---")
dbg("INPUT PROMPT:", prompt)
inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
step_times = []
state_deltas = []
total_start_time = time.time()
with torch.no_grad():
step_start_time = time.time()
outputs = llm.model(**inputs, output_hidden_states=True)
step_times.append(time.time() - step_start_time)
current_hidden_state = outputs.hidden_states[-1][:, -1, :]
past_key_values = outputs.past_key_values
del outputs
if torch.cuda.is_available(): torch.cuda.empty_cache()
for i in range(num_steps - 1):
if time.time() - total_start_time > timeout:
dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
break
step_start_time = time.time()
# Get logits from the last hidden state
next_token_logits = llm.model.lm_head(current_hidden_state)
# ✅ FIX: Apply temperature and use stochastic sampling instead of argmax
if temperature > 0:
scaled_logits = next_token_logits / temperature
probabilities = torch.nn.functional.softmax(scaled_logits, dim=-1)
next_token_id = torch.multinomial(probabilities, num_samples=1)
else: # Temperature of 0 means deterministic argmax
next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
step_times.append(time.time() - step_start_time)
new_hidden_state = outputs.hidden_states[-1][:, -1, :]
past_key_values = outputs.past_key_values
delta = torch.norm(new_hidden_state - current_hidden_state).item()
state_deltas.append(delta)
dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms")
if delta < 1e-4:
dbg(f"Internal state has converged after {i+1} steps. Halting.")
break
current_hidden_state = new_hidden_state.clone()
del outputs, new_hidden_state
if torch.cuda.is_available():
torch.cuda.empty_cache()
total_duration = time.time() - total_start_time
mean_step_time = statistics.mean(step_times) if step_times else 0
stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0
if len(step_times) < num_steps and total_duration < timeout:
verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps."
elif total_duration >= timeout:
verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout."
else:
verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting a complex or chaotic dynamic."
stats = {
"verdict": verdict, "steps_completed": len(step_times), "total_duration_s": total_duration,
"mean_step_time_ms": mean_step_time * 1000, "stdev_step_time_ms": stdev_step_time * 1000,
"state_deltas": state_deltas
}
if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
return stats
|