Spaces:

neuralworm
/

llm_qualia

Sleeping

File size: 7,626 Bytes

0916370
2f0addb
0916370
 
 
25c13d7
e593b84
2f0addb
88c294a
 
 
 
0916370
2f0addb
88c294a
 
2f0addb
0916370
 
88c294a
2f0addb
0916370
 
88c294a
0916370
88c294a
 
 
 
 
25c13d7
88c294a
25c13d7
88c294a
2f0addb
88c294a
 
25c13d7
88c294a
2f0addb
25c13d7
 
 
 
 
e593b84
 
 
25c13d7
 
 
e593b84
 
 
25c13d7
 
 
 
 
e593b84
25c13d7
e593b84
0916370
25c13d7
 
88c294a
 
 
 
 
 
 
 
 
b170ba4
88c294a
 
 
 
 
 
 
 
 
 
25c13d7
 
 
88c294a
 
25c13d7
 
 
 
 
 
88c294a
25c13d7
 
b170ba4
25c13d7
 
88c294a
25c13d7
88c294a
25c13d7
88c294a
 
 
b170ba4
88c294a
 
 
 
 
 
25c13d7
88c294a
 
 
b170ba4
88c294a
 
 
 
2f0addb
0916370

# app.py
import gradio as gr
import json
import statistics
import pandas as pd
from bp_phi.runner import run_workspace_suite, run_silent_cogitation_test, run_seismograph_suite, run_shock_test_suite
from bp_phi.runner_utils import dbg, DEBUG

# --- UI Theme and Layout ---
theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
    body_background_fill="#f0f4f9", block_background_fill="white", block_border_width="1px",
    button_primary_background_fill="*primary_500", button_primary_text_color="white",
)

# --- Tab 1: Workspace & Ablations Functions ---
def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations, progress=gr.Progress(track_tqdm=True)):
    packs = {}
    ablation_modes = ["recurrence_off", "workspace_unlimited", "random_workspace"] if run_ablations else []
    progress(0, desc="Running Baseline...")
    base_pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), None)
    packs["baseline"] = base_pack
    for i, ab in enumerate(ablation_modes):
        progress((i + 1) / (len(ablation_modes) + 1), desc=f"Running Ablation: {ab}...")
        pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), ab)
        packs[ab] = pack
    progress(1.0, desc="Analysis complete.")
    base_pcs = packs["baseline"]["PCS"]
    ab_pcs_values = [packs[ab]["PCS"] for ab in ablation_modes if ab in packs]
    delta_phi = float(base_pcs - statistics.mean(ab_pcs_values)) if ab_pcs_values else 0.0
    if delta_phi > 0.05:
        verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n...")
    else:
        verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n...")
    df_data = []
    for tag, pack in packs.items():
        df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
    df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])
    if DEBUG: print("\n--- WORKSPACE & ABLATIONS FINAL RESULTS ---\n", json.dumps(packs, indent=2))
    return verdict, df, packs

# --- Tab 2: Silent Cogitation Function ---
def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
    progress(0, desc="Starting Silent Cogitation Test...")
    results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
    progress(1.0, desc="Test complete.")

    verdict_text = results.pop("verdict")
    stats_md = (
        f"**Steps Completed:** {results['steps_completed']} | "
        f"**Total Duration:** {results['total_duration_s']:.2f}s | "
        f"**Avg Time/Step:** {results['mean_step_time_ms']:.2f}ms (StdDev: {results['stdev_step_time_ms']:.2f}ms)"
    )
    full_verdict = f"{verdict_text}\n\n{stats_md}"

    # Create a DataFrame for plotting state deltas
    deltas = results.get("state_deltas", [])
    df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})

    if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2))

    return full_verdict, df, results

# --- Gradio App Definition ---
with gr.Blocks(theme=theme, title="BP-Φ Suite 4.0") as demo:
    gr.Markdown("# 🧠 BP-Φ Suite 4.0: Probing for Internal Cognitive Dynamics")

    with gr.Tabs():
        # --- TAB 1: WORKSPACE & ABLATIONS ---
        with gr.TabItem("1. Workspace & Ablations (ΔΦ Test)"):
            gr.Markdown("Tests if memory performance depends on a recurrent workspace. A significant **ΔΦ > 0** supports the hypothesis.")
            with gr.Row():
                with gr.Column(scale=1):
                    ws_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
                    ws_trials = gr.Slider(3, 30, 5, step=1, label="Number of Scenarios")
                    ws_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                    ws_temp = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
                    ws_run_abl = gr.Checkbox(value=True, label="Run Ablations")
                    ws_run_btn = gr.Button("Run ΔΦ Evaluation", variant="primary")
                with gr.Column(scale=2):
                    ws_verdict = gr.Markdown("### Results will appear here.")
                    ws_summary_df = gr.DataFrame(label="Summary Metrics")
                    with gr.Accordion("Raw JSON Output", open=False):
                        ws_raw_json = gr.JSON()
            ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])

        # --- TAB 2: SILENT COGITATION & HALTING ---
        with gr.TabItem("2. Silent Cogitation & Halting"):
            gr.Markdown("Tests for internal 'thinking' without text generation. A non-converging or chaotic **State Change** pattern suggests complex internal dynamics.")
            with gr.Row():
                with gr.Column(scale=1):
                    sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
                    sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt")
                    sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                    sc_num_steps = gr.Slider(10, 500, 100, step=10, label="Number of Internal Steps")
                    sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
                    sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
                with gr.Column(scale=2):
                    sc_verdict = gr.Markdown("### Results will appear here.")
                    sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True)
                    with gr.Accordion("Raw Run Details (JSON)", open=False):
                        sc_results = gr.JSON()
            sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results])

        # --- TAB 3 & 4 (unchanged) ---
        with gr.TabItem("3. Cognitive Seismograph"):
            gr.Markdown("Records internal neural activations to find the 'fingerprint' of a memory being recalled.")
            with gr.Row():
                with gr.Column(scale=1):
                    cs_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
                    cs_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                    cs_run_btn = gr.Button("Run Seismograph Analysis", variant="primary")
                with gr.Column(scale=2):
                    cs_results = gr.JSON(label="Activation Similarity Results")
            cs_run_btn.click(run_seismograph_suite, [cs_model_id, cs_seed], cs_results)

        with gr.TabItem("4. Symbolic Shock Test"):
            gr.Markdown("Measures how the model reacts to semantically unexpected information.")
            with gr.Row():
                with gr.Column(scale=1):
                    ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
                    ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                    ss_run_btn = gr.Button("Run Shock Test", variant="primary")
                with gr.Column(scale=2):
                    ss_results = gr.JSON(label="Shock Test Results")
            ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)