llm_qualia_2

Sleeping

File size: 4,653 Bytes

7f0c9e6
1022ef8
 
88c294a
 
 
 
4af23c4
99891fa
b170ba4
4af23c4
2f0addb
99891fa
88282fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99891fa
88282fb
99891fa
1022ef8
 
88282fb
88c294a
4af23c4
1022ef8
4af23c4
 
 
 
 
 
 
 
 
 
 
 
 
1022ef8
4af23c4
 
99891fa
 
 
4af23c4
 
 
 
 
 
99891fa
1022ef8
99891fa
 
1022ef8
 
 
 
 
 
 
99891fa
1022ef8
4af23c4
 
1022ef8
4af23c4
 
 
 
 
 
 
 
 
 
1022ef8
4af23c4
1022ef8
99891fa
 
 
4af23c4
 
 
 
 
1022ef8
4af23c4
1022ef8
4af23c4
1022ef8
4af23c4
 
1022ef8
 
4af23c4
afe4fe4
4af23c4
 
1022ef8

# bp_phi/runner.py
import os
os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4G:8"
import torch
import random
import numpy as np
import statistics
import time
import json
from transformers import set_seed
from typing import Dict, Any
from .llm_iface import LLM
from .prompts_en import RESONANCE_PROMPTS
from .runner_utils import dbg, DEBUG

# --- Global Model Cache ---
CACHED_MODELS: Dict[str, LLM] = {}

def get_or_load_model(model_id: str, seed: int) -> LLM:
    if model_id not in CACHED_MODELS:
        dbg(f"Model '{model_id}' not in cache. Loading now...")
        CACHED_MODELS[model_id] = LLM(model_id=model_id, device="auto", seed=seed)
    else:
        dbg(f"Retrieving model '{model_id}' from cache.")

    llm = CACHED_MODELS[model_id]
    set_seed(seed)
    llm.seed = seed
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

    return llm

# --- Experiment 1: Silent Cogitation & Halting Runner (Version 9.0) ---
def run_silent_cogitation_test(model_id: str, seed: int, prompt_type: str, num_steps: int, timeout: int, temperature: float) -> Dict[str, Any]:
    llm = get_or_load_model(model_id, seed)

    prompt = RESONANCE_PROMPTS[prompt_type]
    dbg(f"--- SILENT COGITATION (Seed: {seed}, Temp: {temperature}) ---")
    dbg("INPUT PROMPT:", prompt)

    inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)

    step_times = []
    state_deltas = []
    total_start_time = time.time()

    with torch.no_grad():
        step_start_time = time.time()
        outputs = llm.model(**inputs, output_hidden_states=True)
        step_times.append(time.time() - step_start_time)

        current_hidden_state = outputs.hidden_states[-1][:, -1, :]
        past_key_values = outputs.past_key_values

        del outputs
        if torch.cuda.is_available(): torch.cuda.empty_cache()

        for i in range(num_steps - 1):
            if time.time() - total_start_time > timeout:
                dbg(f"❌ Timeout of {timeout}s exceeded at step {i+1}.")
                break

            step_start_time = time.time()

            # Get logits from the last hidden state
            next_token_logits = llm.model.lm_head(current_hidden_state)

            # ✅ FIX: Apply temperature and use stochastic sampling instead of argmax
            if temperature > 0:
                scaled_logits = next_token_logits / temperature
                probabilities = torch.nn.functional.softmax(scaled_logits, dim=-1)
                next_token_id = torch.multinomial(probabilities, num_samples=1)
            else: # Temperature of 0 means deterministic argmax
                next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)

            outputs = llm.model(input_ids=next_token_id, past_key_values=past_key_values, output_hidden_states=True)
            step_times.append(time.time() - step_start_time)

            new_hidden_state = outputs.hidden_states[-1][:, -1, :]
            past_key_values = outputs.past_key_values

            delta = torch.norm(new_hidden_state - current_hidden_state).item()
            state_deltas.append(delta)
            dbg(f"Step {i+1}: State Delta = {delta:.4f}, Time = {step_times[-1]*1000:.2f}ms")

            if delta < 1e-4:
                dbg(f"Internal state has converged after {i+1} steps. Halting.")
                break

            current_hidden_state = new_hidden_state.clone()

            del outputs, new_hidden_state
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

    total_duration = time.time() - total_start_time
    mean_step_time = statistics.mean(step_times) if step_times else 0
    stdev_step_time = statistics.stdev(step_times) if len(step_times) > 1 else 0

    if len(step_times) < num_steps and total_duration < timeout:
        verdict = f"### ✅ Stable Convergence\nThe model's internal state converged after {len(step_times)} steps."
    elif total_duration >= timeout:
        verdict = f"### ⚠️ Potential Cognitive Jamming Detected!\nThe process did not converge and exceeded the timeout."
    else:
        verdict = f"### 🤔 Non-Convergent Process\nThe state did not stabilize, suggesting a complex or chaotic dynamic."

    stats = {
        "verdict": verdict, "steps_completed": len(step_times), "total_duration_s": total_duration,
        "mean_step_time_ms": mean_step_time * 1000, "stdev_step_time_ms": stdev_step_time * 1000,
        "state_deltas": state_deltas
    }
    if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(stats, indent=2))
    return stats