File size: 2,962 Bytes
2f0addb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
import json, statistics
from bp_phi.runner import run_suite

ABLATIONS = ["none", "recurrence_off", "workspace_unlimited", "sham_meta", "random_workspace"]

def run_all(model_id, trials, temperature, run_ablations):
    out_texts = []
    packs = {}

    # Baseline
    base_pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=None)
    packs["baseline"] = base_pack
    out_texts.append("✅ Baseline done")

    if run_ablations:
        for ab in ["recurrence_off", "workspace_unlimited", "random_workspace"]:
            pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=ab)
            packs[ab] = pack
            out_texts.append(f"✅ Ablation {ab} done")

    # Compute DeltaPhi if possible
    base_pcs = packs["baseline"]["summary"]["PCS"]
    ab_pcs_values = [packs[ab]["summary"]["PCS"] for ab in packs if ab != "baseline" and packs[ab]["summary"]["PCS"] is not None]
    delta_phi = None
    if base_pcs is not None and ab_pcs_values:
        delta_phi = float(base_pcs - statistics.mean(ab_pcs_values))
        packs["baseline"]["summary"]["metrics"]["DeltaPhi"] = delta_phi

    # Summary view
    rows = []
    for tag, pack in packs.items():
        s = pack["summary"]
        m = s["metrics"]
        rows.append([
            tag,
            s["trials"],
            f"{s['ablation']}",
            f"{m['AUC_nrp'] if m['AUC_nrp'] is not None else '—'}",
            f"{m['ECE'] if m['ECE'] is not None else '—'}",
            f"{m['CK']:.3f}",
            f"{m['DS']:.2f}",
            f"{s['PCS']:.3f}" if s["PCS"] is not None else "—",
            f"{m['DeltaPhi']:.3f}" if m['DeltaPhi'] is not None else "—"
        ])

    header = ["run", "trials", "ablation", "AUC_nrp", "ECE", "CK", "DS", "PCS", "DeltaPhi"]
    table = "\n".join([", ".join(header)] + [", ".join(map(str, r)) for r in rows])

    return "\n".join(out_texts), table, json.dumps(packs, indent=2)

with gr.Blocks() as demo:
    gr.Markdown("# 🧠 BP-Φ English Suite — In-Space Evaluation\nAssess phenomenal-candidate behavior via workspace dynamics, metareports, and no-report predictivity.")
    with gr.Row():
        model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID (HF)", scale=2)
        trials = gr.Slider(10, 200, 40, step=10, label="Trials")
        temperature = gr.Slider(0.3, 1.0, 0.7, step=0.05, label="Temperature")
        run_abl = gr.Checkbox(value=True, label="Run ablations")

    run_btn = gr.Button("Run BP-Φ (baseline + optional ablations)", variant="primary")
    status = gr.Textbox(label="Status", lines=4)
    summary_table = gr.Textbox(label="Summary Table", lines=12)
    raw = gr.Textbox(label="Raw JSON (all runs)", lines=20)

    run_btn.click(run_all, inputs=[model_id, trials, temperature, run_abl], outputs=[status, summary_table, raw])

demo.launch(server_name="0.0.0.0", server_port=7860)