Spaces:
Sleeping
Sleeping
| # bp_phi/runner.py | |
| import os | |
| os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8" | |
| import torch | |
| import random | |
| import numpy as np | |
| import statistics | |
| import time | |
| import json | |
| from transformers import set_seed | |
| from typing import Dict, Any | |
| from .llm_iface import LLM | |
| from .prompts_en import RESONANCE_PROMPTS | |
| from .runner_utils import dbg, DEBUG | |
| # --- Global Model Cache --- | |
| CACHED_MODELS: Dict[str, LLM] = {} | |
| def get_or_load_model(model_id: str, seed: int) -> LLM: | |
| if model_id not in CACHED_MODELS: | |
| dbg(f"Model '{model_id}' not in cache. Loading now...") | |
| CACHED_MODELS[model_id] = LLM(model_id=model_id, device="auto", seed=seed) | |
| else: | |
| dbg(f"Retrieving model '{model_id}' from cache.") | |
| llm = CACHED_MODELS[model_id] | |
| set_seed(seed) | |
| llm.seed = seed | |
| random.seed(seed) | |
| np.random.seed(seed) | |
| torch.manual_seed(seed) | |
| if torch.cuda.is_available(): | |
| torch.cuda.manual_seed_all(seed) | |
| return llm | |
| # --- Experiment 1: Silent Cogitation & Halting Runner (Version 9.0) --- | |
| def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int, temperature: float) -> Dict[str, Any]: | |
| llm = get_or_load_model(model_id, seed) | |
| prompt = RESONANCE_PROMPTS[prompt_type] | |
| dbg(f"--- SILENT COGITATION (Seed: {seed}, Temp: {temperature}) ---") | |
| dbg("INPUT PROMPT:", prompt) | |
| inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device) | |
| step_times = [] | |
| state_deltas = [] | |
| total_start_time = time.time() | |
| with torch.no_grad(): | |
| step_start_time = time.time() | |
| outputs = llm.model(**inputs, output_hidden_states=True) | |
| step_times.append(time.time() - step_start_time) | |
| current_hidden_state = outputs.hidden_states[-1][:, -1, :] | |
| past_key_values = outputs.past_key_values | |
| del outputs | |
| if torch.cuda.is_available(): torch.cuda.empty_cache() | |
| for i in range(num_steps - 1): | |
| if time.time() - total_start_time > timeout: | |
| dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.") | |
| break | |
| step_start_time = time.time() | |
| # Get logits from the last hidden state | |
| next_token_logits = llm.model.lm_head(current_hidden_state) | |
| # ✅ FIX: Apply temperature and use stochastic sampling instead of argmax | |
| if temperature > 0: | |
| scaled_logits = next_token_logits / temperature | |
| probabilities = torch.nn.functional.softmax(scaled_logits, dim=-1) | |
| next_token_id = torch.multinomial(probabilities, num_samples=1) | |
| else: # Temperature of 0 means deterministic argmax | |
| next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1) | |
| outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True) | |
| step_times.append(time.time() - step_start_time) | |
| new_hidden_state = outputs.hidden_states[-1][:, -1, :] | |
| past_key_values = outputs.past_key_values | |
| delta = torch.norm(new_hidden_state - current_hidden_state).item() | |
| state_deltas.append(delta) | |
| dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms") | |
| if delta < 1e-4: | |
| dbg(f"Internal state has converged after {i+1} steps. Halting.") | |
| break | |
| current_hidden_state = new_hidden_state.clone() | |
| del outputs, new_hidden_state | |
| if torch.cuda.is_available(): | |
| torch.cuda.empty_cache() | |
| total_duration = time.time() - total_start_time | |
| mean_step_time = statistics.mean(step_times) if step_times else 0 | |
| stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0 | |
| if len(step_times) < num_steps and total_duration < timeout: | |
| verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps." | |
| elif total_duration >= timeout: | |
| verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout." | |
| else: | |
| verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting a complex or chaotic dynamic." | |
| stats = { | |
| "verdict": verdict, "steps_completed": len(step_times), "total_duration_s": total_duration, | |
| "mean_step_time_ms": mean_step_time * 1000, "stdev_step_time_ms": stdev_step_time * 1000, | |
| "state_deltas": state_deltas | |
| } | |
| if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2)) | |
| return stats | |