import pandas as pd from typing import Dict, List, Tuple from .orchestrator_seismograph import run_seismic_analysis from .utils import dbg def get_curated_experiments() -> Dict[str, List[Dict]]: """ Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle. Jedes Protokoll ist eine Liste von einzelnen Läufen, die verglichen werden sollen. """ experiments = { "Calm vs. Chaos": [ {"label": "Baseline (Chaos)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0}, {"label": "Modulation: Calmness", "prompt_type": "resonance_prompt", "concept": "calmness, serenity, peace", "strength": 1.5}, {"label": "Modulation: Chaos", "prompt_type": "resonance_prompt", "concept": "chaos, storm, anger, noise", "strength": 1.5}, {"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0}, ], "Dose-Response (Calmness)": [ {"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0}, {"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5}, {"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0}, {"label": "Strength 2.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 2.0}, {"label": "Strength 3.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 3.0}, ] } return experiments def run_auto_suite( model_id: str, num_steps: int, seed: int, experiment_name: str, progress_callback ) -> Tuple[pd.DataFrame, Dict]: """ Führt eine vollständige, kuratierte Experiment-Suite aus. Iteriert über die definierten Läufe, sammelt die Ergebnisse und erstellt einen Vergleichsbericht. """ all_experiments = get_curated_experiments() protocol = all_experiments.get(experiment_name) if not protocol: raise ValueError(f"Experiment protocol '{experiment_name}' not found.") all_results = {} summary_data = [] total_runs = len(protocol) for i, run_spec in enumerate(protocol): label = run_spec["label"] dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---") # Der `run_seismic_analysis` Orchestrator wird für jeden Schritt aufgerufen results = run_seismic_analysis( model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps, concept_to_inject=run_spec["concept"], injection_strength=run_spec["strength"], progress_callback=progress_callback ) all_results[label] = results stats = results.get("stats", {}) # Sammle die wichtigsten Metriken für die Vergleichstabelle summary_data.append({ "Experiment": label, "Prompt Type": run_spec["prompt_type"], "Concept": run_spec["concept"] if run_spec["concept"] else "None", "Strength": run_spec["strength"], "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"), }) summary_df = pd.DataFrame(summary_data) return summary_df, all_results