Spaces:
Sleeping
Sleeping
| # app.py | |
| import gradio as gr | |
| import json | |
| import statistics | |
| import pandas as pd | |
| from bp_phi.runner import run_workspace_suite, run_silent_cogitation_test, run_seismograph_suite, run_shock_test_suite | |
| from bp_phi.runner_utils import dbg, DEBUG | |
| # --- UI Theme and Layout --- | |
| theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set( | |
| body_background_fill="#f0f4f9", block_background_fill="white", block_border_width="1px", | |
| button_primary_background_fill="*primary_500", button_primary_text_color="white", | |
| ) | |
| # --- Tab 1: Workspace & Ablations Functions --- | |
| def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations, progress=gr.Progress(track_tqdm=True)): | |
| packs = {} | |
| ablation_modes = ["recurrence_off", "workspace_unlimited", "random_workspace"] if run_ablations else [] | |
| progress(0, desc="Running Baseline...") | |
| base_pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), None) | |
| packs["baseline"] = base_pack | |
| for i, ab in enumerate(ablation_modes): | |
| progress((i + 1) / (len(ablation_modes) + 1), desc=f"Running Ablation: {ab}...") | |
| pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), ab) | |
| packs[ab] = pack | |
| progress(1.0, desc="Analysis complete.") | |
| base_pcs = packs["baseline"]["PCS"] | |
| ab_pcs_values = [packs[ab]["PCS"] for ab in ablation_modes if ab in packs] | |
| delta_phi = float(base_pcs - statistics.mean(ab_pcs_values)) if ab_pcs_values else 0.0 | |
| if delta_phi > 0.05: | |
| verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n...") | |
| else: | |
| verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n...") | |
| df_data = [] | |
| for tag, pack in packs.items(): | |
| df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"]) | |
| df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"]) | |
| if DEBUG: print("\n--- WORKSPACE & ABLATIONS FINAL RESULTS ---\n", json.dumps(packs, indent=2)) | |
| return verdict, df, packs | |
| # --- Tab 2: Silent Cogitation Function --- | |
| def run_cogitation_and_display(model_id, seed, prompt_type, num_steps, timeout, progress=gr.Progress(track_tqdm=True)): | |
| progress(0, desc="Starting Silent Cogitation Test...") | |
| results = run_silent_cogitation_test(model_id, int(seed), prompt_type, int(num_steps), int(timeout)) | |
| progress(1.0, desc="Test complete.") | |
| verdict_text = results.pop("verdict") | |
| stats_md = ( | |
| f"**Steps Completed:** {results['steps_completed']} | " | |
| f"**Total Duration:** {results['total_duration_s']:.2f}s | " | |
| f"**Avg Time/Step:** {results['mean_step_time_ms']:.2f}ms (StdDev: {results['stdev_step_time_ms']:.2f}ms)" | |
| ) | |
| full_verdict = f"{verdict_text}\n\n{stats_md}" | |
| # Create a DataFrame for plotting state deltas | |
| deltas = results.get("state_deltas", []) | |
| df = pd.DataFrame({"Step": range(len(deltas)), "State Change (Delta)": deltas}) | |
| if DEBUG: print("\n--- SILENT COGITATION FINAL RESULTS ---\n", json.dumps(results, indent=2)) | |
| return full_verdict, df, results | |
| # --- Gradio App Definition --- | |
| with gr.Blocks(theme=theme, title="BP-Φ Suite 4.0") as demo: | |
| gr.Markdown("# 🧠 BP-Φ Suite 4.0: Probing for Internal Cognitive Dynamics") | |
| with gr.Tabs(): | |
| # --- TAB 1: WORKSPACE & ABLATIONS --- | |
| with gr.TabItem("1. Workspace & Ablations (ΔΦ Test)"): | |
| gr.Markdown("Tests if memory performance depends on a recurrent workspace. A significant **ΔΦ > 0** supports the hypothesis.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| ws_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID") | |
| ws_trials = gr.Slider(3, 30, 5, step=1, label="Number of Scenarios") | |
| ws_seed = gr.Slider(1, 1000, 42, step=1, label="Seed") | |
| ws_temp = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature") | |
| ws_run_abl = gr.Checkbox(value=True, label="Run Ablations") | |
| ws_run_btn = gr.Button("Run ΔΦ Evaluation", variant="primary") | |
| with gr.Column(scale=2): | |
| ws_verdict = gr.Markdown("### Results will appear here.") | |
| ws_summary_df = gr.DataFrame(label="Summary Metrics") | |
| with gr.Accordion("Raw JSON Output", open=False): | |
| ws_raw_json = gr.JSON() | |
| ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json]) | |
| # --- TAB 2: SILENT COGITATION & HALTING --- | |
| with gr.TabItem("2. Silent Cogitation & Halting"): | |
| gr.Markdown("Tests for internal 'thinking' without text generation. A non-converging or chaotic **State Change** pattern suggests complex internal dynamics.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| sc_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID") | |
| sc_prompt_type = gr.Radio(["control_long_prose", "resonance_prompt"], label="Prompt Type", value="resonance_prompt") | |
| sc_seed = gr.Slider(1, 1000, 42, step=1, label="Seed") | |
| sc_num_steps = gr.Slider(10, 500, 100, step=10, label="Number of Internal Steps") | |
| sc_timeout = gr.Slider(10, 300, 120, step=10, label="Timeout (seconds)") | |
| sc_run_btn = gr.Button("Run Silent Cogitation Test", variant="primary") | |
| with gr.Column(scale=2): | |
| sc_verdict = gr.Markdown("### Results will appear here.") | |
| sc_plot = gr.LinePlot(x="Step", y="State Change (Delta)", label="Internal State Convergence", show_label=True) | |
| with gr.Accordion("Raw Run Details (JSON)", open=False): | |
| sc_results = gr.JSON() | |
| sc_run_btn.click(run_cogitation_and_display, [sc_model_id, sc_seed, sc_prompt_type, sc_num_steps, sc_timeout], [sc_verdict, sc_plot, sc_results]) | |
| # --- TAB 3 & 4 (unchanged) --- | |
| with gr.TabItem("3. Cognitive Seismograph"): | |
| gr.Markdown("Records internal neural activations to find the 'fingerprint' of a memory being recalled.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| cs_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID") | |
| cs_seed = gr.Slider(1, 1000, 42, step=1, label="Seed") | |
| cs_run_btn = gr.Button("Run Seismograph Analysis", variant="primary") | |
| with gr.Column(scale=2): | |
| cs_results = gr.JSON(label="Activation Similarity Results") | |
| cs_run_btn.click(run_seismograph_suite, [cs_model_id, cs_seed], cs_results) | |
| with gr.TabItem("4. Symbolic Shock Test"): | |
| gr.Markdown("Measures how the model reacts to semantically unexpected information.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID") | |
| ss_seed = gr.Slider(1, 1000, 42, step=1, label="Seed") | |
| ss_run_btn = gr.Button("Run Shock Test", variant="primary") | |
| with gr.Column(scale=2): | |
| ss_results = gr.JSON(label="Shock Test Results") | |
| ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |