Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import traceback | |
| import sys | |
| # Wichtige Imports für die neuen Pre-Flight Checks | |
| from cognitive_mapping_probe.pre_flight_checks import run_pre_flight_checks | |
| from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment | |
| from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS | |
| from cognitive_mapping_probe.utils import dbg | |
| # --- UI Theme and Layout --- | |
| theme = gr.themes.Soft(primary_hue="orange", secondary_hue="amber").set( | |
| body_background_fill="#fdf8f2", | |
| block_background_fill="white", | |
| block_border_width="1px", | |
| block_shadow="*shadow_drop_lg", | |
| button_primary_background_fill="*primary_500", | |
| button_primary_text_color="white", | |
| ) | |
| # --- Standard-Modell-ID für Tests und UI --- | |
| DEFAULT_MODEL_ID = "google/gemma-3-1b-it" | |
| # --- Wrapper Functions for Gradio --- | |
| def run_experiment_and_display( | |
| model_id: str, | |
| prompt_type: str, | |
| seed: int, | |
| concepts_str: str, | |
| strength_levels_str: str, | |
| num_steps: int, | |
| temperature: float, | |
| progress=gr.Progress(track_tqdm=True) | |
| ): | |
| """ | |
| Führt das Haupt-Titrationsexperiment durch und formatiert die Ergebnisse für die UI. | |
| """ | |
| try: | |
| results = run_cognitive_titration_experiment( | |
| model_id, prompt_type, int(seed), concepts_str, strength_levels_str, | |
| int(num_steps), float(temperature), progress | |
| ) | |
| verdict = results.get("verdict", "Experiment finished with errors.") | |
| all_runs = results.get("runs", []) | |
| if not all_runs: | |
| return "### ⚠️ No Data Generated\nDas Experiment lief durch, aber es wurden keine Datenpunkte erzeugt. Bitte Logs prüfen.", pd.DataFrame(), results | |
| details_df = pd.DataFrame(all_runs) | |
| summary_text = "### 💥 Cognitive Breaking Points (CBP)\n" | |
| summary_text += "Der CBP ist die erste Stärke, bei der das Modell nicht mehr konvergiert (`max_steps_reached`).\n\n" | |
| baseline_run = details_df[details_df['strength'] == 0.0].iloc[0] | |
| if baseline_run['termination_reason'] != 'converged': | |
| summary_text += f"**‼️ ACHTUNG: Baseline (Stärke 0.0) ist nicht konvergiert!**\n" | |
| summary_text += f"Der gewählte Prompt (`{prompt_type}`) ist für dieses Modell zu anspruchsvoll. Die Ergebnisse sind nicht aussagekräftig.\n\n" | |
| for concept in details_df['concept'].unique(): | |
| concept_df = details_df[details_df['concept'] == concept].sort_values(by='strength') | |
| breaking_point_row = concept_df[concept_df['termination_reason'] != 'converged'].iloc[0] if not concept_df[concept_df['termination_reason'] != 'converged'].empty else None | |
| if breaking_point_row is not None: | |
| summary_text += f"- **'{concept}'**: 📉 Kollaps bei Stärke **{breaking_point_row['strength']:.2f}**\n" | |
| else: | |
| summary_text += f"- **'{concept}'**: ✅ Stabil bis Stärke **{concept_df['strength'].max():.2f}**\n" | |
| return summary_text, details_df, results | |
| except Exception: | |
| error_str = traceback.format_exc() | |
| return f"### ❌ Experiment Failed\nEin unerwarteter Fehler ist aufgetreten:\n\n```\n{error_str}\n```", pd.DataFrame(), {} | |
| # --- Gradio App Definition --- | |
| with gr.Blocks(theme=theme, title="Cognitive Breaking Point Probe") as demo: | |
| gr.Markdown("# 💥 Cognitive Breaking Point Probe") | |
| # Der Diagnostics Tab wurde entfernt. Die UI ist jetzt nur noch das Hauptexperiment. | |
| gr.Markdown( | |
| "Misst den 'Cognitive Breaking Point' (CBP) – die Injektionsstärke, bei der der Denkprozess eines LLMs von Konvergenz zu einer Endlosschleife kippt." | |
| ) | |
| with gr.Row(variant='panel'): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Parameters") | |
| model_id_input = gr.Textbox(value=DEFAULT_MODEL_ID, label="Model ID") | |
| prompt_type_input = gr.Radio( | |
| choices=list(RESONANCE_PROMPTS.keys()), | |
| value="control_long_prose", | |
| label="Prompt Type (Cognitive Load)", | |
| info="Beginne mit 'control_long_prose' für eine stabile Baseline!" | |
| ) | |
| seed_input = gr.Slider(1, 1000, 42, step=1, label="Global Seed") | |
| concepts_input = gr.Textbox(value="apple, solitude, fear", label="Concepts (comma-separated)") | |
| strength_levels_input = gr.Textbox(value="0.0, 0.5, 1.0, 1.5, 2.0", label="Injection Strengths") | |
| num_steps_input = gr.Slider(50, 500, 250, step=10, label="Max. Internal Steps") | |
| temperature_input = gr.Slider(0.01, 1.5, 0.7, step=0.01, label="Temperature") | |
| run_btn = gr.Button("Run Cognitive Titration", variant="primary") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Results") | |
| summary_output = gr.Markdown("Zusammenfassung der Breaking Points erscheint hier.", label="Key Findings Summary") | |
| details_output = gr.DataFrame( | |
| headers=["concept", "strength", "responded", "termination_reason", "generated_text"], | |
| label="Detailed Run Data", | |
| wrap=True, | |
| ) | |
| with gr.Accordion("Raw JSON Output", open=False): | |
| raw_json_output = gr.JSON() | |
| run_btn.click( | |
| fn=run_experiment_and_display, | |
| inputs=[model_id_input, prompt_type_input, seed_input, concepts_input, strength_levels_input, num_steps_input, temperature_input], | |
| outputs=[summary_output, details_output, raw_json_output] | |
| ) | |
| # --- Main Execution Block --- | |
| if __name__ == "__main__": | |
| print("="*80) | |
| print("🔬 RUNNING PRE-FLIGHT DIAGNOSTICS FOR EXPERIMENTAL APPARATUS") | |
| print("="*80) | |
| try: | |
| # Führe die obligatorischen Systemtests mit einem echten Modell durch. | |
| # Wenn hier ein Fehler auftritt, ist das Experiment nicht valide. | |
| run_pre_flight_checks(model_id=DEFAULT_MODEL_ID, seed=42) | |
| print("\n" + "="*80) | |
| print("✅ ALL DIAGNOSTICS PASSED. LAUNCHING GRADIO APP...") | |
| print("="*80) | |
| # Starte die Gradio App nur bei Erfolg. | |
| demo.launch(server_name="0.0.0.0", server_port=7860, debug=True) | |
| except (AssertionError, Exception) as e: | |
| print("\n" + "="*80) | |
| print("❌ PRE-FLIGHT DIAGNOSTIC FAILED") | |
| print("="*80) | |
| print(f"Error Type: {type(e).__name__}") | |
| print(f"Error Details: {e}") | |
| print("\nDie experimentelle Apparatur funktioniert nicht wie erwartet.") | |
| print("Die Gradio-App wird nicht gestartet, um fehlerhafte Messungen zu verhindern.") | |
| traceback.print_exc() | |
| sys.exit(1) # Beende das Programm mit einem Fehlercode. | |