neuralworm's picture
cs 2.0
024ef47
raw
history blame
3.41 kB
import pandas as pd
from typing import Dict, List, Tuple
from .orchestrator_seismograph import run_seismic_analysis
from .utils import dbg
def get_curated_experiments() -> Dict[str, List[Dict]]:
"""
Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
Jedes Protokoll ist eine Liste von einzelnen Läufen, die verglichen werden sollen.
"""
experiments = {
"Calm vs. Chaos": [
{"label": "Baseline (Chaos)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
{"label": "Modulation: Calmness", "prompt_type": "resonance_prompt", "concept": "calmness, serenity, peace", "strength": 1.5},
{"label": "Modulation: Chaos", "prompt_type": "resonance_prompt", "concept": "chaos, storm, anger, noise", "strength": 1.5},
{"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
],
"Dose-Response (Calmness)": [
{"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0},
{"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5},
{"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0},
{"label": "Strength 2.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 2.0},
{"label": "Strength 3.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 3.0},
]
}
return experiments
def run_auto_suite(
model_id: str,
num_steps: int,
seed: int,
experiment_name: str,
progress_callback
) -> Tuple[pd.DataFrame, Dict]:
"""
Führt eine vollständige, kuratierte Experiment-Suite aus.
Iteriert über die definierten Läufe, sammelt die Ergebnisse und erstellt einen Vergleichsbericht.
"""
all_experiments = get_curated_experiments()
protocol = all_experiments.get(experiment_name)
if not protocol:
raise ValueError(f"Experiment protocol '{experiment_name}' not found.")
all_results = {}
summary_data = []
total_runs = len(protocol)
for i, run_spec in enumerate(protocol):
label = run_spec["label"]
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
# Der `run_seismic_analysis` Orchestrator wird für jeden Schritt aufgerufen
results = run_seismic_analysis(
model_id=model_id,
prompt_type=run_spec["prompt_type"],
seed=seed,
num_steps=num_steps,
concept_to_inject=run_spec["concept"],
injection_strength=run_spec["strength"],
progress_callback=progress_callback
)
all_results[label] = results
stats = results.get("stats", {})
# Sammle die wichtigsten Metriken für die Vergleichstabelle
summary_data.append({
"Experiment": label,
"Prompt Type": run_spec["prompt_type"],
"Concept": run_spec["concept"] if run_spec["concept"] else "None",
"Strength": run_spec["strength"],
"Mean Delta": stats.get("mean_delta"),
"Std Dev Delta": stats.get("std_delta"),
"Max Delta": stats.get("max_delta"),
})
summary_df = pd.DataFrame(summary_data)
return summary_df, all_results