llm_qualia / app.py
neuralworm's picture
initial commit
2f0addb
raw
history blame
2.96 kB
import gradio as gr
import json, statistics
from bp_phi.runner import run_suite
ABLATIONS = ["none", "recurrence_off", "workspace_unlimited", "sham_meta", "random_workspace"]
def run_all(model_id, trials, temperature, run_ablations):
out_texts = []
packs = {}
# Baseline
base_pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=None)
packs["baseline"] = base_pack
out_texts.append("✅ Baseline done")
if run_ablations:
for ab in ["recurrence_off", "workspace_unlimited", "random_workspace"]:
pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=ab)
packs[ab] = pack
out_texts.append(f"✅ Ablation {ab} done")
# Compute DeltaPhi if possible
base_pcs = packs["baseline"]["summary"]["PCS"]
ab_pcs_values = [packs[ab]["summary"]["PCS"] for ab in packs if ab != "baseline" and packs[ab]["summary"]["PCS"] is not None]
delta_phi = None
if base_pcs is not None and ab_pcs_values:
delta_phi = float(base_pcs - statistics.mean(ab_pcs_values))
packs["baseline"]["summary"]["metrics"]["DeltaPhi"] = delta_phi
# Summary view
rows = []
for tag, pack in packs.items():
s = pack["summary"]
m = s["metrics"]
rows.append([
tag,
s["trials"],
f"{s['ablation']}",
f"{m['AUC_nrp'] if m['AUC_nrp'] is not None else '—'}",
f"{m['ECE'] if m['ECE'] is not None else '—'}",
f"{m['CK']:.3f}",
f"{m['DS']:.2f}",
f"{s['PCS']:.3f}" if s["PCS"] is not None else "—",
f"{m['DeltaPhi']:.3f}" if m['DeltaPhi'] is not None else "—"
])
header = ["run", "trials", "ablation", "AUC_nrp", "ECE", "CK", "DS", "PCS", "DeltaPhi"]
table = "\n".join([", ".join(header)] + [", ".join(map(str, r)) for r in rows])
return "\n".join(out_texts), table, json.dumps(packs, indent=2)
with gr.Blocks() as demo:
gr.Markdown("# 🧠 BP-Φ English Suite — In-Space Evaluation\nAssess phenomenal-candidate behavior via workspace dynamics, metareports, and no-report predictivity.")
with gr.Row():
model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID (HF)", scale=2)
trials = gr.Slider(10, 200, 40, step=10, label="Trials")
temperature = gr.Slider(0.3, 1.0, 0.7, step=0.05, label="Temperature")
run_abl = gr.Checkbox(value=True, label="Run ablations")
run_btn = gr.Button("Run BP-Φ (baseline + optional ablations)", variant="primary")
status = gr.Textbox(label="Status", lines=4)
summary_table = gr.Textbox(label="Summary Table", lines=12)
raw = gr.Textbox(label="Raw JSON (all runs)", lines=20)
run_btn.click(run_all, inputs=[model_id, trials, temperature, run_abl], outputs=[status, summary_table, raw])
demo.launch(server_name="0.0.0.0", server_port=7860)