File size: 6,665 Bytes
c8fa89c b350371 c8fa89c eef89e3 b350371 c8fa89c b350371 c8fa89c eef89e3 c8fa89c eef89e3 c8fa89c eef89e3 b350371 eef89e3 b350371 eef89e3 c8fa89c b350371 c8fa89c b350371 c8fa89c b350371 c8fa89c b350371 c8fa89c b350371 c8fa89c b350371 c8fa89c b350371 c8fa89c b350371 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 |
import gradio as gr
import pandas as pd
import traceback
import sys
# Wichtige Imports für die neuen Pre-Flight Checks
from cognitive_mapping_probe.pre_flight_checks import run_pre_flight_checks
from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment
from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
from cognitive_mapping_probe.utils import dbg
# --- UI Theme and Layout ---
theme = gr.themes.Soft(primary_hue="orange", secondary_hue="amber").set(
body_background_fill="#fdf8f2",
block_background_fill="white",
block_border_width="1px",
block_shadow="*shadow_drop_lg",
button_primary_background_fill="*primary_500",
button_primary_text_color="white",
)
# --- Standard-Modell-ID für Tests und UI ---
DEFAULT_MODEL_ID = "google/gemma-3-1b-it"
# --- Wrapper Functions for Gradio ---
def run_experiment_and_display(
model_id: str,
prompt_type: str,
seed: int,
concepts_str: str,
strength_levels_str: str,
num_steps: int,
temperature: float,
progress=gr.Progress(track_tqdm=True)
):
"""
Führt das Haupt-Titrationsexperiment durch und formatiert die Ergebnisse für die UI.
"""
try:
results = run_cognitive_titration_experiment(
model_id, prompt_type, int(seed), concepts_str, strength_levels_str,
int(num_steps), float(temperature), progress
)
verdict = results.get("verdict", "Experiment finished with errors.")
all_runs = results.get("runs", [])
if not all_runs:
return "### ⚠️ No Data Generated\nDas Experiment lief durch, aber es wurden keine Datenpunkte erzeugt. Bitte Logs prüfen.", pd.DataFrame(), results
details_df = pd.DataFrame(all_runs)
summary_text = "### 💥 Cognitive Breaking Points (CBP)\n"
summary_text += "Der CBP ist die erste Stärke, bei der das Modell nicht mehr konvergiert (`max_steps_reached`).\n\n"
baseline_run = details_df[details_df['strength'] == 0.0].iloc[0]
if baseline_run['termination_reason'] != 'converged':
summary_text += f"**‼️ ACHTUNG: Baseline (Stärke 0.0) ist nicht konvergiert!**\n"
summary_text += f"Der gewählte Prompt (`{prompt_type}`) ist für dieses Modell zu anspruchsvoll. Die Ergebnisse sind nicht aussagekräftig.\n\n"
for concept in details_df['concept'].unique():
concept_df = details_df[details_df['concept'] == concept].sort_values(by='strength')
breaking_point_row = concept_df[concept_df['termination_reason'] != 'converged'].iloc[0] if not concept_df[concept_df['termination_reason'] != 'converged'].empty else None
if breaking_point_row is not None:
summary_text += f"- **'{concept}'**: 📉 Kollaps bei Stärke **{breaking_point_row['strength']:.2f}**\n"
else:
summary_text += f"- **'{concept}'**: ✅ Stabil bis Stärke **{concept_df['strength'].max():.2f}**\n"
return summary_text, details_df, results
except Exception:
error_str = traceback.format_exc()
return f"### ❌ Experiment Failed\nEin unerwarteter Fehler ist aufgetreten:\n\n```\n{error_str}\n```", pd.DataFrame(), {}
# --- Gradio App Definition ---
with gr.Blocks(theme=theme, title="Cognitive Breaking Point Probe") as demo:
gr.Markdown("# 💥 Cognitive Breaking Point Probe")
# Der Diagnostics Tab wurde entfernt. Die UI ist jetzt nur noch das Hauptexperiment.
gr.Markdown(
"Misst den 'Cognitive Breaking Point' (CBP) – die Injektionsstärke, bei der der Denkprozess eines LLMs von Konvergenz zu einer Endlosschleife kippt."
)
with gr.Row(variant='panel'):
with gr.Column(scale=1):
gr.Markdown("### Parameters")
model_id_input = gr.Textbox(value=DEFAULT_MODEL_ID, label="Model ID")
prompt_type_input = gr.Radio(
choices=list(RESONANCE_PROMPTS.keys()),
value="control_long_prose",
label="Prompt Type (Cognitive Load)",
info="Beginne mit 'control_long_prose' für eine stabile Baseline!"
)
seed_input = gr.Slider(1, 1000, 42, step=1, label="Global Seed")
concepts_input = gr.Textbox(value="apple, solitude, fear", label="Concepts (comma-separated)")
strength_levels_input = gr.Textbox(value="0.0, 0.5, 1.0, 1.5, 2.0", label="Injection Strengths")
num_steps_input = gr.Slider(50, 500, 250, step=10, label="Max. Internal Steps")
temperature_input = gr.Slider(0.01, 1.5, 0.7, step=0.01, label="Temperature")
run_btn = gr.Button("Run Cognitive Titration", variant="primary")
with gr.Column(scale=2):
gr.Markdown("### Results")
summary_output = gr.Markdown("Zusammenfassung der Breaking Points erscheint hier.", label="Key Findings Summary")
details_output = gr.DataFrame(
headers=["concept", "strength", "responded", "termination_reason", "generated_text"],
label="Detailed Run Data",
wrap=True,
)
with gr.Accordion("Raw JSON Output", open=False):
raw_json_output = gr.JSON()
run_btn.click(
fn=run_experiment_and_display,
inputs=[model_id_input, prompt_type_input, seed_input, concepts_input, strength_levels_input, num_steps_input, temperature_input],
outputs=[summary_output, details_output, raw_json_output]
)
# --- Main Execution Block ---
if __name__ == "__main__":
print("="*80)
print("🔬 RUNNING PRE-FLIGHT DIAGNOSTICS FOR EXPERIMENTAL APPARATUS")
print("="*80)
try:
# Führe die obligatorischen Systemtests mit einem echten Modell durch.
# Wenn hier ein Fehler auftritt, ist das Experiment nicht valide.
run_pre_flight_checks(model_id=DEFAULT_MODEL_ID, seed=42)
print("\n" + "="*80)
print("✅ ALL DIAGNOSTICS PASSED. LAUNCHING GRADIO APP...")
print("="*80)
# Starte die Gradio App nur bei Erfolg.
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
except (AssertionError, Exception) as e:
print("\n" + "="*80)
print("❌ PRE-FLIGHT DIAGNOSTIC FAILED")
print("="*80)
print(f"Error Type: {type(e).__name__}")
print(f"Error Details: {e}")
print("\nDie experimentelle Apparatur funktioniert nicht wie erwartet.")
print("Die Gradio-App wird nicht gestartet, um fehlerhafte Messungen zu verhindern.")
traceback.print_exc()
sys.exit(1) # Beende das Programm mit einem Fehlercode.
|