import gradio as gr import json, statistics from bp_phi.runner import run_suite ABLATIONS = ["none", "recurrence_off", "workspace_unlimited", "sham_meta", "random_workspace"] def run_all(model_id, trials, temperature, run_ablations): out_texts = [] packs = {} # Baseline base_pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=None) packs["baseline"] = base_pack out_texts.append("✅ Baseline done") if run_ablations: for ab in ["recurrence_off", "workspace_unlimited", "random_workspace"]: pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=ab) packs[ab] = pack out_texts.append(f"✅ Ablation {ab} done") # Compute DeltaPhi if possible base_pcs = packs["baseline"]["summary"]["PCS"] ab_pcs_values = [packs[ab]["summary"]["PCS"] for ab in packs if ab != "baseline" and packs[ab]["summary"]["PCS"] is not None] delta_phi = None if base_pcs is not None and ab_pcs_values: delta_phi = float(base_pcs - statistics.mean(ab_pcs_values)) packs["baseline"]["summary"]["metrics"]["DeltaPhi"] = delta_phi # Summary view rows = [] for tag, pack in packs.items(): s = pack["summary"] m = s["metrics"] rows.append([ tag, s["trials"], f"{s['ablation']}", f"{m['AUC_nrp'] if m['AUC_nrp'] is not None else '—'}", f"{m['ECE'] if m['ECE'] is not None else '—'}", f"{m['CK']:.3f}", f"{m['DS']:.2f}", f"{s['PCS']:.3f}" if s["PCS"] is not None else "—", f"{m['DeltaPhi']:.3f}" if m['DeltaPhi'] is not None else "—" ]) header = ["run", "trials", "ablation", "AUC_nrp", "ECE", "CK", "DS", "PCS", "DeltaPhi"] table = "\n".join([", ".join(header)] + [", ".join(map(str, r)) for r in rows]) return "\n".join(out_texts), table, json.dumps(packs, indent=2) with gr.Blocks() as demo: gr.Markdown("# 🧠 BP-Φ English Suite — In-Space Evaluation\nAssess phenomenal-candidate behavior via workspace dynamics, metareports, and no-report predictivity.") with gr.Row(): model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID (HF)", scale=2) trials = gr.Slider(10, 200, 40, step=10, label="Trials") temperature = gr.Slider(0.3, 1.0, 0.7, step=0.05, label="Temperature") run_abl = gr.Checkbox(value=True, label="Run ablations") run_btn = gr.Button("Run BP-Φ (baseline + optional ablations)", variant="primary") status = gr.Textbox(label="Status", lines=4) summary_table = gr.Textbox(label="Summary Table", lines=12) raw = gr.Textbox(label="Raw JSON (all runs)", lines=20) run_btn.click(run_all, inputs=[model_id, trials, temperature, run_abl], outputs=[status, summary_table, raw]) demo.launch(server_name="0.0.0.0", server_port=7860)