Spaces:
Sleeping
Sleeping
Commit
·
395b2f3
1
Parent(s):
024ef47
cs 2.1
Browse files- app.py +19 -9
- cognitive_mapping_probe/auto_experiment.py +28 -10
app.py
CHANGED
|
@@ -22,18 +22,20 @@ def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
|
|
| 22 |
return f"### ❌ Analysis Failed\n```\n{traceback.format_exc()}\n```", pd.DataFrame(), {}
|
| 23 |
|
| 24 |
def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
|
| 25 |
-
"""Wrapper für die automatisierte Experiment-Suite."""
|
| 26 |
try:
|
| 27 |
-
|
| 28 |
-
|
|
|
|
| 29 |
except Exception:
|
| 30 |
-
return pd.DataFrame(), f"### ❌ Auto-Experiment Failed\n```\n{traceback.format_exc()}\n```"
|
| 31 |
|
| 32 |
with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
|
| 33 |
gr.Markdown("# 🧠 Cognitive Seismograph 2.1: Automated Experiment Suite")
|
| 34 |
|
| 35 |
with gr.Tabs():
|
| 36 |
with gr.TabItem("🔬 Manual Single Run"):
|
|
|
|
| 37 |
gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
|
| 38 |
with gr.Row(variant='panel'):
|
| 39 |
with gr.Column(scale=1):
|
|
@@ -60,7 +62,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
|
|
| 60 |
)
|
| 61 |
|
| 62 |
with gr.TabItem("🚀 Automated Suite"):
|
| 63 |
-
gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch
|
| 64 |
with gr.Row(variant='panel'):
|
| 65 |
with gr.Column(scale=1):
|
| 66 |
gr.Markdown("### Auto-Experiment Parameters")
|
|
@@ -71,16 +73,24 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
|
|
| 71 |
auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
|
| 72 |
with gr.Column(scale=2):
|
| 73 |
gr.Markdown("### Suite Results Summary")
|
| 74 |
-
#
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
with gr.Accordion("Raw JSON for all runs", open=False):
|
| 78 |
auto_raw_json = gr.JSON()
|
| 79 |
|
| 80 |
auto_run_btn.click(
|
| 81 |
fn=run_auto_suite_display,
|
| 82 |
inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
|
| 83 |
-
|
|
|
|
| 84 |
)
|
| 85 |
|
| 86 |
if __name__ == "__main__":
|
|
|
|
| 22 |
return f"### ❌ Analysis Failed\n```\n{traceback.format_exc()}\n```", pd.DataFrame(), {}
|
| 23 |
|
| 24 |
def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
|
| 25 |
+
"""Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
|
| 26 |
try:
|
| 27 |
+
# Die Funktion gibt jetzt drei Werte zurück: summary_df, plot_df, all_results
|
| 28 |
+
summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
|
| 29 |
+
return summary_df, plot_df, all_results
|
| 30 |
except Exception:
|
| 31 |
+
return pd.DataFrame(), pd.DataFrame(), f"### ❌ Auto-Experiment Failed\n```\n{traceback.format_exc()}\n```"
|
| 32 |
|
| 33 |
with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
|
| 34 |
gr.Markdown("# 🧠 Cognitive Seismograph 2.1: Automated Experiment Suite")
|
| 35 |
|
| 36 |
with gr.Tabs():
|
| 37 |
with gr.TabItem("🔬 Manual Single Run"):
|
| 38 |
+
# ... (Dieser Tab bleibt unverändert) ...
|
| 39 |
gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
|
| 40 |
with gr.Row(variant='panel'):
|
| 41 |
with gr.Column(scale=1):
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
with gr.TabItem("🚀 Automated Suite"):
|
| 65 |
+
gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch und visualisiere die Ergebnisse vergleichend.")
|
| 66 |
with gr.Row(variant='panel'):
|
| 67 |
with gr.Column(scale=1):
|
| 68 |
gr.Markdown("### Auto-Experiment Parameters")
|
|
|
|
| 73 |
auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
|
| 74 |
with gr.Column(scale=2):
|
| 75 |
gr.Markdown("### Suite Results Summary")
|
| 76 |
+
# NEU: Ein LinePlot für den Vergleich der Dynamiken
|
| 77 |
+
auto_plot_output = gr.LinePlot(
|
| 78 |
+
x="Step",
|
| 79 |
+
y="Delta",
|
| 80 |
+
color="Experiment",
|
| 81 |
+
title="Comparative Cognitive Dynamics",
|
| 82 |
+
show_label=True,
|
| 83 |
+
height=400,
|
| 84 |
+
)
|
| 85 |
+
auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
|
| 86 |
with gr.Accordion("Raw JSON for all runs", open=False):
|
| 87 |
auto_raw_json = gr.JSON()
|
| 88 |
|
| 89 |
auto_run_btn.click(
|
| 90 |
fn=run_auto_suite_display,
|
| 91 |
inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
|
| 92 |
+
# Die Ausgaben werden an die neuen Komponenten gebunden
|
| 93 |
+
outputs=[auto_summary_df, auto_plot_output, auto_raw_json]
|
| 94 |
)
|
| 95 |
|
| 96 |
if __name__ == "__main__":
|
cognitive_mapping_probe/auto_experiment.py
CHANGED
|
@@ -1,13 +1,13 @@
|
|
| 1 |
import pandas as pd
|
| 2 |
from typing import Dict, List, Tuple
|
| 3 |
|
|
|
|
| 4 |
from .orchestrator_seismograph import run_seismic_analysis
|
| 5 |
from .utils import dbg
|
| 6 |
|
| 7 |
def get_curated_experiments() -> Dict[str, List[Dict]]:
|
| 8 |
"""
|
| 9 |
Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
|
| 10 |
-
Jedes Protokoll ist eine Liste von einzelnen Läufen, die verglichen werden sollen.
|
| 11 |
"""
|
| 12 |
experiments = {
|
| 13 |
"Calm vs. Chaos": [
|
|
@@ -17,6 +17,7 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
|
|
| 17 |
{"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
|
| 18 |
],
|
| 19 |
"Dose-Response (Calmness)": [
|
|
|
|
| 20 |
{"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0},
|
| 21 |
{"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5},
|
| 22 |
{"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0},
|
|
@@ -32,10 +33,10 @@ def run_auto_suite(
|
|
| 32 |
seed: int,
|
| 33 |
experiment_name: str,
|
| 34 |
progress_callback
|
| 35 |
-
) -> Tuple[pd.DataFrame, Dict]:
|
| 36 |
"""
|
| 37 |
Führt eine vollständige, kuratierte Experiment-Suite aus.
|
| 38 |
-
|
| 39 |
"""
|
| 40 |
all_experiments = get_curated_experiments()
|
| 41 |
protocol = all_experiments.get(experiment_name)
|
|
@@ -44,13 +45,15 @@ def run_auto_suite(
|
|
| 44 |
|
| 45 |
all_results = {}
|
| 46 |
summary_data = []
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
total_runs = len(protocol)
|
| 49 |
for i, run_spec in enumerate(protocol):
|
| 50 |
label = run_spec["label"]
|
| 51 |
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
|
| 52 |
|
| 53 |
-
# Der `run_seismic_analysis` Orchestrator wird für jeden Schritt aufgerufen
|
| 54 |
results = run_seismic_analysis(
|
| 55 |
model_id=model_id,
|
| 56 |
prompt_type=run_spec["prompt_type"],
|
|
@@ -58,22 +61,37 @@ def run_auto_suite(
|
|
| 58 |
num_steps=num_steps,
|
| 59 |
concept_to_inject=run_spec["concept"],
|
| 60 |
injection_strength=run_spec["strength"],
|
| 61 |
-
progress_callback=progress_callback
|
|
|
|
| 62 |
)
|
| 63 |
|
| 64 |
all_results[label] = results
|
| 65 |
stats = results.get("stats", {})
|
| 66 |
|
| 67 |
-
# Sammle die wichtigsten Metriken für die Vergleichstabelle
|
| 68 |
summary_data.append({
|
| 69 |
"Experiment": label,
|
| 70 |
-
"Prompt Type": run_spec["prompt_type"],
|
| 71 |
-
"Concept": run_spec["concept"] if run_spec["concept"] else "None",
|
| 72 |
-
"Strength": run_spec["strength"],
|
| 73 |
"Mean Delta": stats.get("mean_delta"),
|
| 74 |
"Std Dev Delta": stats.get("std_delta"),
|
| 75 |
"Max Delta": stats.get("max_delta"),
|
| 76 |
})
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
summary_df = pd.DataFrame(summary_data)
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import pandas as pd
|
| 2 |
from typing import Dict, List, Tuple
|
| 3 |
|
| 4 |
+
from .llm_iface import get_or_load_model
|
| 5 |
from .orchestrator_seismograph import run_seismic_analysis
|
| 6 |
from .utils import dbg
|
| 7 |
|
| 8 |
def get_curated_experiments() -> Dict[str, List[Dict]]:
|
| 9 |
"""
|
| 10 |
Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
|
|
|
|
| 11 |
"""
|
| 12 |
experiments = {
|
| 13 |
"Calm vs. Chaos": [
|
|
|
|
| 17 |
{"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
|
| 18 |
],
|
| 19 |
"Dose-Response (Calmness)": [
|
| 20 |
+
# Die Labels hier sind Strings, die wie Zahlen aussehen. Das könnte Gradio verwirren.
|
| 21 |
{"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0},
|
| 22 |
{"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5},
|
| 23 |
{"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0},
|
|
|
|
| 33 |
seed: int,
|
| 34 |
experiment_name: str,
|
| 35 |
progress_callback
|
| 36 |
+
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
|
| 37 |
"""
|
| 38 |
Führt eine vollständige, kuratierte Experiment-Suite aus.
|
| 39 |
+
Gibt jetzt zusätzlich ein DataFrame für die vergleichende Visualisierung zurück.
|
| 40 |
"""
|
| 41 |
all_experiments = get_curated_experiments()
|
| 42 |
protocol = all_experiments.get(experiment_name)
|
|
|
|
| 45 |
|
| 46 |
all_results = {}
|
| 47 |
summary_data = []
|
| 48 |
+
plot_data_frames = []
|
| 49 |
+
|
| 50 |
+
llm = get_or_load_model(model_id, seed)
|
| 51 |
|
| 52 |
total_runs = len(protocol)
|
| 53 |
for i, run_spec in enumerate(protocol):
|
| 54 |
label = run_spec["label"]
|
| 55 |
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
|
| 56 |
|
|
|
|
| 57 |
results = run_seismic_analysis(
|
| 58 |
model_id=model_id,
|
| 59 |
prompt_type=run_spec["prompt_type"],
|
|
|
|
| 61 |
num_steps=num_steps,
|
| 62 |
concept_to_inject=run_spec["concept"],
|
| 63 |
injection_strength=run_spec["strength"],
|
| 64 |
+
progress_callback=progress_callback,
|
| 65 |
+
llm_instance=llm
|
| 66 |
)
|
| 67 |
|
| 68 |
all_results[label] = results
|
| 69 |
stats = results.get("stats", {})
|
| 70 |
|
|
|
|
| 71 |
summary_data.append({
|
| 72 |
"Experiment": label,
|
|
|
|
|
|
|
|
|
|
| 73 |
"Mean Delta": stats.get("mean_delta"),
|
| 74 |
"Std Dev Delta": stats.get("std_delta"),
|
| 75 |
"Max Delta": stats.get("max_delta"),
|
| 76 |
})
|
| 77 |
|
| 78 |
+
deltas = results.get("state_deltas", [])
|
| 79 |
+
|
| 80 |
+
# KORREKTUR: Wir erstellen die "Experiment"-Spalte so, dass sie garantiert ein
|
| 81 |
+
# eindeutiger String ist, um Probleme mit der Gradio-Visualisierung zu vermeiden.
|
| 82 |
+
# Dies ist robuster, falls Labels in Zukunft Zahlen oder Duplikate sein könnten.
|
| 83 |
+
plot_label = f"{i}: {label}"
|
| 84 |
+
|
| 85 |
+
df = pd.DataFrame({
|
| 86 |
+
"Step": range(len(deltas)),
|
| 87 |
+
"Delta": deltas,
|
| 88 |
+
"Experiment": plot_label
|
| 89 |
+
})
|
| 90 |
+
plot_data_frames.append(df)
|
| 91 |
+
|
| 92 |
summary_df = pd.DataFrame(summary_data)
|
| 93 |
+
plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
|
| 94 |
+
|
| 95 |
+
del llm
|
| 96 |
+
|
| 97 |
+
return summary_df, plot_df, all_results
|