|
|
import pandas as pd |
|
|
from typing import Dict, List, Tuple |
|
|
|
|
|
from .orchestrator_seismograph import run_seismic_analysis |
|
|
from .utils import dbg |
|
|
|
|
|
def get_curated_experiments() -> Dict[str, List[Dict]]: |
|
|
""" |
|
|
Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle. |
|
|
Jedes Protokoll ist eine Liste von einzelnen Läufen, die verglichen werden sollen. |
|
|
""" |
|
|
experiments = { |
|
|
"Calm vs. Chaos": [ |
|
|
{"label": "Baseline (Chaos)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0}, |
|
|
{"label": "Modulation: Calmness", "prompt_type": "resonance_prompt", "concept": "calmness, serenity, peace", "strength": 1.5}, |
|
|
{"label": "Modulation: Chaos", "prompt_type": "resonance_prompt", "concept": "chaos, storm, anger, noise", "strength": 1.5}, |
|
|
{"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0}, |
|
|
], |
|
|
"Dose-Response (Calmness)": [ |
|
|
{"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0}, |
|
|
{"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5}, |
|
|
{"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0}, |
|
|
{"label": "Strength 2.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 2.0}, |
|
|
{"label": "Strength 3.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 3.0}, |
|
|
] |
|
|
} |
|
|
return experiments |
|
|
|
|
|
def run_auto_suite( |
|
|
model_id: str, |
|
|
num_steps: int, |
|
|
seed: int, |
|
|
experiment_name: str, |
|
|
progress_callback |
|
|
) -> Tuple[pd.DataFrame, Dict]: |
|
|
""" |
|
|
Führt eine vollständige, kuratierte Experiment-Suite aus. |
|
|
Iteriert über die definierten Läufe, sammelt die Ergebnisse und erstellt einen Vergleichsbericht. |
|
|
""" |
|
|
all_experiments = get_curated_experiments() |
|
|
protocol = all_experiments.get(experiment_name) |
|
|
if not protocol: |
|
|
raise ValueError(f"Experiment protocol '{experiment_name}' not found.") |
|
|
|
|
|
all_results = {} |
|
|
summary_data = [] |
|
|
|
|
|
total_runs = len(protocol) |
|
|
for i, run_spec in enumerate(protocol): |
|
|
label = run_spec["label"] |
|
|
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---") |
|
|
|
|
|
|
|
|
results = run_seismic_analysis( |
|
|
model_id=model_id, |
|
|
prompt_type=run_spec["prompt_type"], |
|
|
seed=seed, |
|
|
num_steps=num_steps, |
|
|
concept_to_inject=run_spec["concept"], |
|
|
injection_strength=run_spec["strength"], |
|
|
progress_callback=progress_callback |
|
|
) |
|
|
|
|
|
all_results[label] = results |
|
|
stats = results.get("stats", {}) |
|
|
|
|
|
|
|
|
summary_data.append({ |
|
|
"Experiment": label, |
|
|
"Prompt Type": run_spec["prompt_type"], |
|
|
"Concept": run_spec["concept"] if run_spec["concept"] else "None", |
|
|
"Strength": run_spec["strength"], |
|
|
"Mean Delta": stats.get("mean_delta"), |
|
|
"Std Dev Delta": stats.get("std_delta"), |
|
|
"Max Delta": stats.get("max_delta"), |
|
|
}) |
|
|
|
|
|
summary_df = pd.DataFrame(summary_data) |
|
|
return summary_df, all_results |
|
|
|