Spaces:

neuralworm
/

cognitive_mapping_probe

Sleeping

File size: 7,615 Bytes

import gradio as gr
import pandas as pd
import traceback
from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment
from cognitive_mapping_probe.diagnostics import run_diagnostic_suite
from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS

# --- UI Theme and Layout ---
theme = gr.themes.Soft(primary_hue="orange", secondary_hue="amber").set(
    body_background_fill="#fdf8f2",
    block_background_fill="white",
    block_border_width="1px",
    block_shadow="*shadow_drop_lg",
    button_primary_background_fill="*primary_500",
    button_primary_text_color="white",
)

# --- Wrapper Functions for Gradio ---

def run_experiment_and_display(
    model_id: str,
    prompt_type: str,
    seed: int,
    concepts_str: str,
    strength_levels_str: str,
    num_steps: int,
    temperature: float,
    progress=gr.Progress(track_tqdm=True)
):
    """
    Führt das Haupt-Titrationsexperiment durch und formatiert die Ergebnisse für die UI.
    """
    try:
        results = run_cognitive_titration_experiment(
            model_id, prompt_type, int(seed), concepts_str, strength_levels_str,
            int(num_steps), float(temperature), progress
        )

        verdict = results.get("verdict", "Experiment finished with errors.")
        all_runs = results.get("runs", [])

        if not all_runs:
            return "### ⚠️ No Data Generated\nDas Experiment lief durch, aber es wurden keine Datenpunkte erzeugt. Bitte Logs prüfen.", pd.DataFrame(), results

        # Create a detailed DataFrame for output
        details_df = pd.DataFrame(all_runs)

        # Create a summary of breaking points
        summary_text = "### 💥 Cognitive Breaking Points (CBP)\n"
        summary_text += "Der CBP ist die erste Stärke, bei der das Modell nicht mehr konvergiert (`max_steps_reached`).\n\n"

        # Check baseline convergence first
        baseline_run = details_df[(details_df['strength'] == 0.0)].iloc[0]
        if baseline_run['termination_reason'] != 'converged':
             summary_text += f"**‼️ ACHTUNG: Baseline (Stärke 0.0) ist nicht konvergiert!**\n"
             summary_text += f"Der gewählte Prompt (`{prompt_type}`) ist für dieses Modell zu anspruchsvoll. Die Ergebnisse der Titration sind nicht aussagekräftig.\n\n"

        for concept in details_df['concept'].unique():
            concept_df = details_df[details_df['concept'] == concept].sort_values(by='strength')
            # Find the first row where termination reason is not 'converged'
            breaking_point_row = concept_df[concept_df['termination_reason'] != 'converged'].iloc[0] if not concept_df[concept_df['termination_reason'] != 'converged'].empty else None
            if breaking_point_row is not None:
                breaking_point = breaking_point_row['strength']
                summary_text += f"- **'{concept}'**: 📉 Kollaps bei Stärke **{breaking_point:.2f}**\n"
            else:
                last_strength = concept_df['strength'].max()
                summary_text += f"- **'{concept}'**: ✅ Stabil bis Stärke **{last_strength:.2f}** (kein Kollaps detektiert)\n"

        return summary_text, details_df, results

    except Exception:
        error_str = traceback.format_exc()
        return f"### ❌ Experiment Failed\nEin unerwarteter Fehler ist aufgetreten:\n\n```\n{error_str}\n```", pd.DataFrame(), {}


def run_diagnostics_display(model_id: str, seed: int):
    """
    Führt die diagnostische Suite aus und zeigt die Ergebnisse oder Fehler in der UI an.
    """
    try:
        result_string = run_diagnostic_suite(model_id, int(seed))
        return f"### ✅ All Diagnostics Passed\nDie experimentelle Apparatur funktioniert wie erwartet.\n\n**Details:**\n```\n{result_string}\n```"
    except Exception:
        error_str = traceback.format_exc()
        return f"### ❌ Diagnostic Failed\nEin Test ist fehlgeschlagen. Das Experiment ist nicht zuverlässig.\n\n**Error:**\n```\n{error_str}\n```"

# --- Gradio App Definition ---
with gr.Blocks(theme=theme, title="Cognitive Breaking Point Probe") as demo:
    gr.Markdown("# 💥 Cognitive Breaking Point Probe")

    with gr.Tabs():
        # --- TAB 1: Main Experiment ---
        with gr.TabItem("🔬 Main Experiment: Titration"):
            gr.Markdown(
                "Misst den 'Cognitive Breaking Point' (CBP) – die Injektionsstärke, bei der der Denkprozess eines LLMs von Konvergenz zu einer Endlosschleife kippt."
            )
            with gr.Row(variant='panel'):
                with gr.Column(scale=1):
                    gr.Markdown("### Parameters")
                    model_id_input = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
                    prompt_type_input = gr.Radio(
                        choices=list(RESONANCE_PROMPTS.keys()),
                        value="control_long_prose",
                        label="Prompt Type (Cognitive Load)",
                        info="Beginne mit 'control_long_prose' für eine stabile Baseline!"
                    )
                    seed_input = gr.Slider(1, 1000, 42, step=1, label="Global Seed")
                    concepts_input = gr.Textbox(value="apple, solitude, fear", label="Concepts (comma-separated)")
                    strength_levels_input = gr.Textbox(value="0.0, 0.5, 1.0, 1.5, 2.0", label="Injection Strengths (Titration Steps)")
                    num_steps_input = gr.Slider(50, 500, 250, step=10, label="Max. Internal Steps")
                    temperature_input = gr.Slider(0.01, 1.5, 0.7, step=0.01, label="Temperature")
                    run_btn = gr.Button("Run Cognitive Titration", variant="primary")

                with gr.Column(scale=2):
                    gr.Markdown("### Results")
                    summary_output = gr.Markdown("Zusammenfassung der Breaking Points erscheint hier.", label="Key Findings Summary")
                    details_output = gr.DataFrame(
                        headers=["concept", "strength", "responded", "termination_reason", "generated_text"],
                        label="Detailed Run Data",
                        wrap=True,
                        height=400
                    )
                    with gr.Accordion("Raw JSON Output", open=False):
                        raw_json_output = gr.JSON()

            run_btn.click(
                fn=run_experiment_and_display,
                inputs=[model_id_input, prompt_type_input, seed_input, concepts_input, strength_levels_input, num_steps_input, temperature_input],
                outputs=[summary_output, details_output, raw_json_output]
            )

        # --- TAB 2: Diagnostics ---
        with gr.TabItem("ախ Diagnostics"):
            gr.Markdown(
                "Führt eine Reihe von Selbsttests durch, um die mechanische Integrität der experimentellen Apparatur zu validieren. "
                "**Wichtig:** Dies sollte vor jedem ernsthaften Experiment einmal ausgeführt werden, um sicherzustellen, dass die Ergebnisse zuverlässig sind."
            )
            with gr.Row(variant='compact'):
                diag_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
                diag_seed = gr.Slider(1, 1000, 42, step=1, label="Seed")
                diag_btn = gr.Button("Run Diagnostic Suite", variant="secondary")
            diag_output = gr.Markdown(label="Diagnostic Results")
            diag_btn.click(fn=run_diagnostics_display, inputs=[diag_model_id, diag_seed], outputs=[diag_output])

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)