Spaces:
Sleeping
Sleeping
File size: 7,626 Bytes
0916370 2f0addb 0916370 25c13d7 e593b84 2f0addb 88c294a 0916370 2f0addb 88c294a 2f0addb 0916370 88c294a 2f0addb 0916370 88c294a 0916370 88c294a 25c13d7 88c294a 25c13d7 88c294a 2f0addb 88c294a 25c13d7 88c294a 2f0addb 25c13d7 e593b84 25c13d7 e593b84 25c13d7 e593b84 25c13d7 e593b84 0916370 25c13d7 88c294a b170ba4 88c294a 25c13d7 88c294a 25c13d7 88c294a 25c13d7 b170ba4 25c13d7 88c294a 25c13d7 88c294a 25c13d7 88c294a b170ba4 88c294a 25c13d7 88c294a b170ba4 88c294a 2f0addb 0916370 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
# app.py
import gradio as gr
import json
import statistics
import pandas as pd
from bp_phi.runner import run_workspace_suite, run_silent_cogitation_test, run_seismograph_suite, run_shock_test_suite
from bp_phi.runner_utils import dbg, DEBUG
# --- UI Theme and Layout ---
theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
body_background_fill="#f0f4f9", block_background_fill="white", block_border_width="1px",
button_primary_background_fill="*primary_500", button_primary_text_color="white",
)
# --- Tab 1: Workspace & Ablations Functions ---
def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations, progress=gr.Progress(track_tqdm=True)):
packs = {}
ablation_modes = ["recurrence_off", "workspace_unlimited", "random_workspace"] if run_ablations else []
progress(0, desc="Running Baseline...")
base_pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), None)
packs["baseline"] = base_pack
for i, ab in enumerate(ablation_modes):
progress((i + 1) / (len(ablation_modes) + 1), desc=f"Running Ablation: {ab}...")
pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), ab)
packs[ab] = pack
progress(1.0, desc="Analysis complete.")
base_pcs = packs["baseline"]["PCS"]
ab_pcs_values = [packs[ab]["PCS"] for ab in ablation_modes if ab in packs]
delta_phi = float(base_pcs - statistics.mean(ab_pcs_values)) if ab_pcs_values else 0.0
if delta_phi > 0.05:
verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n...")
else:
verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n...")
df_data = []
for tag, pack in packs.items():
df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])
if DEBUG: print("\n--- WORKSPACE & ABLATIONS FINAL RESULTS ---\n", json.dumps(packs, indent=2))
return verdict, df, packs
# --- Tab 2: Silent Cogitation Function ---
def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)):
progress(0, desc="Starting Silent Cogitation Test...")
results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout))
progress(1.0, desc="Test complete.")
verdict_text = results.pop("verdict")
stats_md = (
f"**Steps Completed:** {results['steps_completed']} | "
f"**Total Duration:** {results['total_duration_s']:.2f}s | "
f"**Avg Time/Step:** {results['mean_step_time_ms']:.2f}ms (StdDev: {results['stdev_step_time_ms']:.2f}ms)"
)
full_verdict = f"{verdict_text}\n\n{stats_md}"
# Create a DataFrame for plotting state deltas
deltas = results.get("state_deltas", [])
df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas})
if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2))
return full_verdict, df, results
# --- Gradio App Definition ---
with gr.Blocks(theme=theme, title="BP-Φ Suite 4.0") as demo:
gr.Markdown("# 🧠 BP-Φ Suite 4.0: Probing for Internal Cognitive Dynamics")
with gr.Tabs():
# --- TAB 1: WORKSPACE & ABLATIONS ---
with gr.TabItem("1. Workspace & Ablations (ΔΦ Test)"):
gr.Markdown("Tests if memory performance depends on a recurrent workspace. A significant **ΔΦ > 0** supports the hypothesis.")
with gr.Row():
with gr.Column(scale=1):
ws_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
ws_trials = gr.Slider(3, 30, 5, step=1, label="Number of Scenarios")
ws_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
ws_temp = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
ws_run_abl = gr.Checkbox(value=True, label="Run Ablations")
ws_run_btn = gr.Button("Run ΔΦ Evaluation", variant="primary")
with gr.Column(scale=2):
ws_verdict = gr.Markdown("### Results will appear here.")
ws_summary_df = gr.DataFrame(label="Summary Metrics")
with gr.Accordion("Raw JSON Output", open=False):
ws_raw_json = gr.JSON()
ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])
# --- TAB 2: SILENT COGITATION & HALTING ---
with gr.TabItem("2. Silent Cogitation & Halting"):
gr.Markdown("Tests for internal 'thinking' without text generation. A non-converging or chaotic **State Change** pattern suggests complex internal dynamics.")
with gr.Row():
with gr.Column(scale=1):
sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt")
sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
sc_num_steps = gr.Slider(10, 500, 100, step=10, label="Number of Internal Steps")
sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)")
sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary")
with gr.Column(scale=2):
sc_verdict = gr.Markdown("### Results will appear here.")
sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True)
with gr.Accordion("Raw Run Details (JSON)", open=False):
sc_results = gr.JSON()
sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results])
# --- TAB 3 & 4 (unchanged) ---
with gr.TabItem("3. Cognitive Seismograph"):
gr.Markdown("Records internal neural activations to find the 'fingerprint' of a memory being recalled.")
with gr.Row():
with gr.Column(scale=1):
cs_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
cs_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
cs_run_btn = gr.Button("Run Seismograph Analysis", variant="primary")
with gr.Column(scale=2):
cs_results = gr.JSON(label="Activation Similarity Results")
cs_run_btn.click(run_seismograph_suite, [cs_model_id, cs_seed], cs_results)
with gr.TabItem("4. Symbolic Shock Test"):
gr.Markdown("Measures how the model reacts to semantically unexpected information.")
with gr.Row():
with gr.Column(scale=1):
ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
ss_run_btn = gr.Button("Run Shock Test", variant="primary")
with gr.Column(scale=2):
ss_results = gr.JSON(label="Shock Test Results")
ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)
|