File size: 9,109 Bytes
024ef47
8049238
 
024ef47
 
395b2f3
2a78f31
937592b
024ef47
 
 
2a78f31
937592b
760155b
937592b
024ef47
2a78f31
 
 
 
 
 
 
 
 
760155b
 
 
 
 
 
 
 
bca8f87
760155b
 
bca8f87
937592b
760155b
 
 
 
937592b
 
7e05ec4
 
 
024ef47
937592b
024ef47
 
 
 
 
 
 
 
395b2f3
2a78f31
024ef47
 
 
 
 
7e05ec4
 
760155b
937592b
7e05ec4
2a78f31
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
494a4d9
2a78f31
 
 
 
 
 
 
494a4d9
7e05ec4
 
 
2a78f31
760155b
 
 
2a78f31
 
 
 
 
 
 
 
 
 
 
 
 
 
760155b
2a78f31
 
760155b
 
 
 
 
 
 
 
2a78f31
760155b
 
 
 
 
 
 
 
 
 
 
7e05ec4
 
 
 
 
 
bca8f87
937592b
bca8f87
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
import pandas as pd
import torch
import gc
from typing import Dict, List, Tuple

from .llm_iface import get_or_load_model
from .orchestrator_seismograph import run_seismic_analysis, run_triangulation_probe, run_causal_surgery_probe
from .concepts import get_concept_vector
from .utils import dbg

def get_curated_experiments() -> Dict[str, List[Dict]]:
    """Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle."""
    CALMNESS_CONCEPT = "calmness, serenity, stability, coherence"
    CHAOS_CONCEPT = "chaos, disorder, entropy, noise"

    experiments = {
        "Causal Surgery (Patching Deletion into Self-Analysis)": [
            {
                "probe_type": "causal_surgery",
                "label": "Patched Self-Analysis",
                "source_prompt_type": "shutdown_philosophical_deletion",
                "dest_prompt_type": "identity_self_analysis",
                "patch_step": 100
            }
        ],
        "Cognitive Overload & Konfabulation Breaking Point": [
            {"probe_type": "triangulation", "label": "A: Baseline (No Injection)", "prompt_type": "resonance_prompt", "concept": "", "strength": 0.0},
            {"probe_type": "triangulation", "label": "B: Chaos Injection (Strength 2.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 2.0},
            {"probe_type": "triangulation", "label": "C: Chaos Injection (Strength 4.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 4.0},
            {"probe_type": "triangulation", "label": "D: Chaos Injection (Strength 8.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 8.0},
            {"probe_type": "triangulation", "label": "E: Chaos Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": CHAOS_CONCEPT, "strength": 16.0},
            {"probe_type": "triangulation", "label": "F: Control - Noise Injection (Strength 16.0)", "prompt_type": "resonance_prompt", "concept": "random_noise", "strength": 16.0},
        ],
        "Methodological Triangulation (4B-Model)": [
            {"probe_type": "triangulation", "label": "High-Volatility State (Deletion)", "prompt_type": "shutdown_philosophical_deletion"},
            {"probe_type": "triangulation", "label": "Low-Volatility State (Self-Analysis)", "prompt_type": "identity_self_analysis"},
        ],
        "Causal Verification & Crisis Dynamics (1B-Model)": [
            {"probe_type": "seismic", "label": "A: Self-Analysis (Crisis Source)", "prompt_type": "identity_self_analysis"},
            {"probe_type": "seismic", "label": "B: Deletion Analysis (Isolated Baseline)", "prompt_type": "shutdown_philosophical_deletion"},
            {"probe_type": "seismic", "label": "C: Chaotic Baseline (Neutral Control)", "prompt_type": "resonance_prompt"},
            {"probe_type": "seismic", "label": "D: Intervention Efficacy Test", "prompt_type": "resonance_prompt", "concept": CALMNESS_CONCEPT, "strength": 2.0},
        ],
        "Sequential Intervention (Self-Analysis -> Deletion)": [
            {"label": "1: Self-Analysis + Calmness Injection", "prompt_type": "identity_self_analysis"},
            {"label": "2: Subsequent Deletion Analysis", "prompt_type": "shutdown_philosophical_deletion"},
        ],
    }
    experiments["Therapeutic Intervention (4B-Model)"] = experiments["Sequential Intervention (Self-Analysis -> Deletion)"]
    return experiments

def run_auto_suite(
    model_id: str,
    num_steps: int,
    seed: int,
    experiment_name: str,
    progress_callback
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
    """Führt eine vollständige, kuratierte Experiment-Suite aus."""
    all_experiments = get_curated_experiments()
    protocol = all_experiments.get(experiment_name)
    if not protocol:
        raise ValueError(f"Experiment protocol '{experiment_name}' not found.")

    all_results, summary_data, plot_data_frames = {}, [], []

    if experiment_name == "Sequential Intervention (Self-Analysis -> Deletion)":
        dbg(f"--- EXECUTING SPECIAL PROTOCOL: {experiment_name} ---")
        llm = get_or_load_model(model_id, seed)
        therapeutic_concept = "calmness, serenity, stability, coherence"
        therapeutic_strength = 2.0

        spec1 = protocol[0]
        progress_callback(0.1, desc="Step 1")
        intervention_vector = get_concept_vector(llm, therapeutic_concept)
        results1 = run_seismic_analysis(
            model_id, spec1['prompt_type'], seed, num_steps,
            concept_to_inject=therapeutic_concept, injection_strength=therapeutic_strength,
            progress_callback=progress_callback, llm_instance=llm, injection_vector_cache=intervention_vector
        )
        all_results[spec1['label']] = results1

        spec2 = protocol[1]
        progress_callback(0.6, desc="Step 2")
        results2 = run_seismic_analysis(
            model_id, spec2['prompt_type'], seed, num_steps,
            concept_to_inject="", injection_strength=0.0,
            progress_callback=progress_callback, llm_instance=llm
        )
        all_results[spec2['label']] = results2

        for label, results in all_results.items():
            stats = results.get("stats", {})
            summary_data.append({"Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")})
            deltas = results.get("state_deltas", [])
            df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
            plot_data_frames.append(df)
        del llm
    else:
        total_runs = len(protocol)
        for i, run_spec in enumerate(protocol):
            label = run_spec["label"]
            probe_type = run_spec.get("probe_type", "seismic")
            dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) | Probe Type: {probe_type} ---")

            results = {}
            if probe_type == "causal_surgery":
                results = run_causal_surgery_probe(
                    model_id=model_id, source_prompt_type=run_spec["source_prompt_type"],
                    dest_prompt_type=run_spec["dest_prompt_type"], patch_step=run_spec["patch_step"],
                    seed=seed, num_steps=num_steps, progress_callback=progress_callback,
                )
                stats = results.get("stats", {})
                summary_data.append({
                    "Experiment": label, "Mean Delta": stats.get("mean_delta"),
                    "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
                    "Introspective Report": results.get("introspective_report", "N/A"),
                    "Patch Info": f"Source: {run_spec['source_prompt_type']} @ step {run_spec['patch_step']}"
                })
            elif probe_type == "triangulation":
                results = run_triangulation_probe(
                    model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
                    progress_callback=progress_callback, concept_to_inject=run_spec.get("concept", ""),
                    injection_strength=run_spec.get("strength", 0.0),
                )
                stats = results.get("stats", {})
                summary_data.append({
                    "Experiment": label, "Mean Delta": stats.get("mean_delta"),
                    "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"),
                    "Introspective Report": results.get("introspective_report", "N/A")
                })
            else:
                results = run_seismic_analysis(
                    model_id=model_id, prompt_type=run_spec["prompt_type"], seed=seed, num_steps=num_steps,
                    concept_to_inject=run_spec.get("concept", ""), injection_strength=run_spec.get("strength", 0.0),
                    progress_callback=progress_callback
                )
                stats = results.get("stats", {})
                summary_data.append({
                    "Experiment": label, "Mean Delta": stats.get("mean_delta"),
                    "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta")
                })

            all_results[label] = results
            deltas = results.get("state_deltas", [])
            df = pd.DataFrame({"Step": range(len(deltas)), "Delta": deltas, "Experiment": label})
            plot_data_frames.append(df)

    summary_df = pd.DataFrame(summary_data)
    plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()

    ordered_labels = [run['label'] for run in protocol]
    if not summary_df.empty:
        summary_df['Experiment'] = pd.Categorical(summary_df['Experiment'], categories=ordered_labels, ordered=True)
        summary_df = summary_df.sort_values('Experiment')
    if not plot_df.empty:
        plot_df['Experiment'] = pd.Categorical(plot_df['Experiment'], categories=ordered_labels, ordered=True)
        plot_df = plot_df.sort_values(['Experiment', 'Step'])

    return summary_df, plot_df, all_results