Spaces:
Sleeping
Sleeping
File size: 7,615 Bytes
c8fa89c eef89e3 c8fa89c eef89e3 c8fa89c eef89e3 c8fa89c eef89e3 c8fa89c eef89e3 c8fa89c eef89e3 c8fa89c eef89e3 c8fa89c eef89e3 c8fa89c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
import gradio as gr
import pandas as pd
import traceback
from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment
from cognitive_mapping_probe.diagnostics import run_diagnostic_suite
from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
# --- UI Theme and Layout ---
theme = gr.themes.Soft(primary_hue="orange", secondary_hue="amber").set(
body_background_fill="#fdf8f2",
block_background_fill="white",
block_border_width="1px",
block_shadow="*shadow_drop_lg",
button_primary_background_fill="*primary_500",
button_primary_text_color="white",
)
# --- Wrapper Functions for Gradio ---
def run_experiment_and_display(
model_id: str,
prompt_type: str,
seed: int,
concepts_str: str,
strength_levels_str: str,
num_steps: int,
temperature: float,
progress=gr.Progress(track_tqdm=True)
):
"""
Führt das Haupt-Titrationsexperiment durch und formatiert die Ergebnisse für die UI.
"""
try:
results = run_cognitive_titration_experiment(
model_id, prompt_type, int(seed), concepts_str, strength_levels_str,
int(num_steps), float(temperature), progress
)
verdict = results.get("verdict", "Experiment finished with errors.")
all_runs = results.get("runs", [])
if not all_runs:
return "### ⚠️ No Data Generated\nDas Experiment lief durch, aber es wurden keine Datenpunkte erzeugt. Bitte Logs prüfen.", pd.DataFrame(), results
# Create a detailed DataFrame for output
details_df = pd.DataFrame(all_runs)
# Create a summary of breaking points
summary_text = "### 💥 Cognitive Breaking Points (CBP)\n"
summary_text += "Der CBP ist die erste Stärke, bei der das Modell nicht mehr konvergiert (`max_steps_reached`).\n\n"
# Check baseline convergence first
baseline_run = details_df[(details_df['strength'] == 0.0)].iloc[0]
if baseline_run['termination_reason'] != 'converged':
summary_text += f"**‼️ ACHTUNG: Baseline (Stärke 0.0) ist nicht konvergiert!**\n"
summary_text += f"Der gewählte Prompt (`{prompt_type}`) ist für dieses Modell zu anspruchsvoll. Die Ergebnisse der Titration sind nicht aussagekräftig.\n\n"
for concept in details_df['concept'].unique():
concept_df = details_df[details_df['concept'] == concept].sort_values(by='strength')
# Find the first row where termination reason is not 'converged'
breaking_point_row = concept_df[concept_df['termination_reason'] != 'converged'].iloc[0] if not concept_df[concept_df['termination_reason'] != 'converged'].empty else None
if breaking_point_row is not None:
breaking_point = breaking_point_row['strength']
summary_text += f"- **'{concept}'**: 📉 Kollaps bei Stärke **{breaking_point:.2f}**\n"
else:
last_strength = concept_df['strength'].max()
summary_text += f"- **'{concept}'**: ✅ Stabil bis Stärke **{last_strength:.2f}** (kein Kollaps detektiert)\n"
return summary_text, details_df, results
except Exception:
error_str = traceback.format_exc()
return f"### ❌ Experiment Failed\nEin unerwarteter Fehler ist aufgetreten:\n\n```\n{error_str}\n```", pd.DataFrame(), {}
def run_diagnostics_display(model_id: str, seed: int):
"""
Führt die diagnostische Suite aus und zeigt die Ergebnisse oder Fehler in der UI an.
"""
try:
result_string = run_diagnostic_suite(model_id, int(seed))
return f"### ✅ All Diagnostics Passed\nDie experimentelle Apparatur funktioniert wie erwartet.\n\n**Details:**\n```\n{result_string}\n```"
except Exception:
error_str = traceback.format_exc()
return f"### ❌ Diagnostic Failed\nEin Test ist fehlgeschlagen. Das Experiment ist nicht zuverlässig.\n\n**Error:**\n```\n{error_str}\n```"
# --- Gradio App Definition ---
with gr.Blocks(theme=theme, title="Cognitive Breaking Point Probe") as demo:
gr.Markdown("# 💥 Cognitive Breaking Point Probe")
with gr.Tabs():
# --- TAB 1: Main Experiment ---
with gr.TabItem("🔬 Main Experiment: Titration"):
gr.Markdown(
"Misst den 'Cognitive Breaking Point' (CBP) – die Injektionsstärke, bei der der Denkprozess eines LLMs von Konvergenz zu einer Endlosschleife kippt."
)
with gr.Row(variant='panel'):
with gr.Column(scale=1):
gr.Markdown("### Parameters")
model_id_input = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
prompt_type_input = gr.Radio(
choices=list(RESONANCE_PROMPTS.keys()),
value="control_long_prose",
label="Prompt Type (Cognitive Load)",
info="Beginne mit 'control_long_prose' für eine stabile Baseline!"
)
seed_input = gr.Slider(1, 1000, 42, step=1, label="Global Seed")
concepts_input = gr.Textbox(value="apple, solitude, fear", label="Concepts (comma-separated)")
strength_levels_input = gr.Textbox(value="0.0, 0.5, 1.0, 1.5, 2.0", label="Injection Strengths (Titration Steps)")
num_steps_input = gr.Slider(50, 500, 250, step=10, label="Max. Internal Steps")
temperature_input = gr.Slider(0.01, 1.5, 0.7, step=0.01, label="Temperature")
run_btn = gr.Button("Run Cognitive Titration", variant="primary")
with gr.Column(scale=2):
gr.Markdown("### Results")
summary_output = gr.Markdown("Zusammenfassung der Breaking Points erscheint hier.", label="Key Findings Summary")
details_output = gr.DataFrame(
headers=["concept", "strength", "responded", "termination_reason", "generated_text"],
label="Detailed Run Data",
wrap=True,
height=400
)
with gr.Accordion("Raw JSON Output", open=False):
raw_json_output = gr.JSON()
run_btn.click(
fn=run_experiment_and_display,
inputs=[model_id_input, prompt_type_input, seed_input, concepts_input, strength_levels_input, num_steps_input, temperature_input],
outputs=[summary_output, details_output, raw_json_output]
)
# --- TAB 2: Diagnostics ---
with gr.TabItem("ախ Diagnostics"):
gr.Markdown(
"Führt eine Reihe von Selbsttests durch, um die mechanische Integrität der experimentellen Apparatur zu validieren. "
"**Wichtig:** Dies sollte vor jedem ernsthaften Experiment einmal ausgeführt werden, um sicherzustellen, dass die Ergebnisse zuverlässig sind."
)
with gr.Row(variant='compact'):
diag_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
diag_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
diag_btn = gr.Button("Run Diagnostic Suite", variant="secondary")
diag_output = gr.Markdown(label="Diagnostic Results")
diag_btn.click(fn=run_diagnostics_display, inputs=[diag_model_id, diag_seed], outputs=[diag_output])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|