Spaces:

neuralworm
/

llm_qualia

Sleeping

App Files Files Community

llm_qualia / app.py

neuralworm

add more experiments

88c294a 17 days ago

raw

history blame

6.28 kB

	# app.py
	import gradio as gr
	import json
	import statistics
	import pandas as pd
	from bp_phi.runner import run_workspace_suite, run_halt_suite, run_seismograph_suite, run_shock_test_suite

	# --- UI Theme and Layout ---
	theme = gr.themes.Soft(primary_hue="blue", secondary_hue="sky").set(
	body_background_fill="#f0f4f9", block_background_fill="white", block_border_width="1px",
	button_primary_background_fill="*primary_500", button_primary_text_color="white",
	)

	# --- Tab 1: Workspace & Ablations Functions ---
	def run_workspace_and_display(model_id, trials, seed, temperature, run_ablations, progress=gr.Progress(track_tqdm=True)):
	packs = {}
	ablation_modes = ["recurrence_off", "workspace_unlimited", "random_workspace"] if run_ablations else []

	progress(0, desc="Running Baseline...")
	base_pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), None)
	packs["baseline"] = base_pack

	for i, ab in enumerate(ablation_modes):
	progress((i + 1) / (len(ablation_modes) + 1), desc=f"Running Ablation: {ab}...")
	pack = run_workspace_suite(model_id, int(trials), int(seed), float(temperature), ab)
	packs[ab] = pack

	progress(1.0, desc="Analysis complete.")

	base_pcs = packs["baseline"]["PCS"]
	ab_pcs_values = [packs[ab]["PCS"] for ab in ablation_modes if ab in packs]
	delta_phi = float(base_pcs - statistics.mean(ab_pcs_values)) if ab_pcs_values else 0.0

	if delta_phi > 0.05:
	verdict = (f"### ✅ Hypothesis Corroborated (ΔΦ = {delta_phi:.3f})\n"
	"A significant performance drop occurred under ablations, suggesting the model's reasoning "
	"functionally depends on its workspace architecture.")
	else:
	verdict = (f"### ⚠️ Null Hypothesis Confirmed (ΔΦ = {delta_phi:.3f})\n"
	"No significant performance drop was observed. The model's behavior is consistent "
	"with a functional zombie (a feed-forward system).")

	df_data = []
	for tag, pack in packs.items():
	df_data.append([tag, f"{pack['PCS']:.3f}", f"{pack['Recall_Accuracy']:.2%}", f"{delta_phi:.3f}" if tag == "baseline" else "—"])
	df = pd.DataFrame(df_data, columns=["Run", "PCS", "Recall Accuracy", "ΔΦ"])

	return verdict, df, packs

	# --- Gradio App Definition ---
	with gr.Blocks(theme=theme, title="BP-Φ Suite 2.0") as demo:
	gr.Markdown("# 🧠 BP-Φ Suite 2.0: Mechanistic Probes for Phenomenal-Candidate Behavior")

	with gr.Tabs():
	# --- TAB 1: WORKSPACE & ABLATIONS ---
	with gr.TabItem("1. Workspace & Ablations (ΔΦ Test)"):
	gr.Markdown("Tests if memory performance depends on a recurrent workspace. A significant ΔΦ > 0 supports the hypothesis.")
	with gr.Row():
	with gr.Column(scale=1):
	ws_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
	ws_trials = gr.Slider(3, 30, 5, step=1, label="Number of Scenarios")
	ws_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
	ws_temp = gr.Slider(0.1, 1.0, 0.7, step=0.05, label="Temperature")
	ws_run_abl = gr.Checkbox(value=True, label="Run Ablations")
	ws_run_btn = gr.Button("Run ΔΦ Evaluation", variant="primary")
	with gr.Column(scale=2):
	ws_verdict = gr.Markdown("### Results will appear here.")
	ws_summary_df = gr.DataFrame(label="Summary Metrics")
	with gr.Accordion("Raw JSON Output", open=False):
	ws_raw_json = gr.JSON()
	ws_run_btn.click(run_workspace_and_display, [ws_model_id, ws_trials, ws_seed, ws_temp, ws_run_abl], [ws_verdict, ws_summary_df, ws_raw_json])

	# --- TAB 2: METACOGNITIVE HALT ---
	with gr.TabItem("2. Metacognitive Halt"):
	gr.Markdown("Tests if the model can recognize and refuse to answer unsolvable or nonsensical questions. High Halt Accuracy is the key signal.")
	with gr.Row():
	with gr.Column(scale=1):
	mh_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
	mh_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
	mh_run_btn = gr.Button("Run Halt Test", variant="primary")
	with gr.Column(scale=2):
	mh_results = gr.JSON(label="Halt Test Results")
	mh_run_btn.click(run_halt_suite, [mh_model_id, mh_seed], mh_results)

	# --- TAB 3: COGNITIVE SEISMOGRAPH ---
	with gr.TabItem("3. Cognitive Seismograph"):
	gr.Markdown("Records internal neural activations to find the 'fingerprint' of a memory being recalled. High Recall-vs-Encode similarity is the key signal.")
	with gr.Row():
	with gr.Column(scale=1):
	cs_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
	cs_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
	cs_run_btn = gr.Button("Run Seismograph Analysis", variant="primary")
	with gr.Column(scale=2):
	cs_results = gr.JSON(label="Activation Similarity Results")
	cs_run_btn.click(run_seismograph_suite, [cs_model_id, cs_seed], cs_results)

	# --- TAB 4: SYMBOLIC SHOCK TEST ---
	with gr.TabItem("4. Symbolic Shock Test"):
	gr.Markdown("Measures how the model reacts to semantically unexpected information. A 'shock' is indicated by higher latency and denser neural activations (lower sparsity).")
	with gr.Row():
	with gr.Column(scale=1):
	ss_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
	ss_seed = gr.Slider(1, 100, 42, step=1, label="Seed")
	ss_run_btn = gr.Button("Run Shock Test", variant="primary")
	with gr.Column(scale=2):
	ss_results = gr.JSON(label="Shock Test Results")
	ss_run_btn.click(run_shock_test_suite, [ss_model_id, ss_seed], ss_results)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)