Spaces:
Sleeping
Sleeping
Commit
·
494a4d9
1
Parent(s):
5028f2b
fix graphs?
Browse files- app.py +13 -3
- cognitive_mapping_probe/auto_experiment.py +11 -7
app.py
CHANGED
|
@@ -38,6 +38,11 @@ def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=
|
|
| 38 |
"""Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
|
| 39 |
try:
|
| 40 |
summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
cleanup_memory()
|
| 42 |
return summary_df, plot_df, all_results
|
| 43 |
except Exception:
|
|
@@ -89,13 +94,18 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.2") as demo:
|
|
| 89 |
auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
|
| 90 |
with gr.Column(scale=2):
|
| 91 |
gr.Markdown("### Suite Results Summary")
|
| 92 |
-
# KORREKTUR:
|
|
|
|
| 93 |
auto_plot_output = gr.LinePlot(
|
| 94 |
-
x="Step",
|
|
|
|
|
|
|
| 95 |
title="Comparative Cognitive Dynamics",
|
| 96 |
color_legend_title="Experiment Runs",
|
| 97 |
color_legend_position="bottom",
|
| 98 |
-
show_label=True,
|
|
|
|
|
|
|
| 99 |
)
|
| 100 |
auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
|
| 101 |
with gr.Accordion("Raw JSON for all runs", open=False):
|
|
|
|
| 38 |
"""Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
|
| 39 |
try:
|
| 40 |
summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
|
| 41 |
+
|
| 42 |
+
# DEBUG-Ausgabe zur Überprüfung der DataFrame-Struktur
|
| 43 |
+
dbg("Plot DataFrame Head:\n", plot_df.head())
|
| 44 |
+
dbg("Plot DataFrame Dtypes:\n", plot_df.dtypes)
|
| 45 |
+
|
| 46 |
cleanup_memory()
|
| 47 |
return summary_df, plot_df, all_results
|
| 48 |
except Exception:
|
|
|
|
| 94 |
auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
|
| 95 |
with gr.Column(scale=2):
|
| 96 |
gr.Markdown("### Suite Results Summary")
|
| 97 |
+
# FINALE KORREKTUR: Wir definieren die Spaltennamen explizit,
|
| 98 |
+
# um jegliche Ambiguität für Gradio zu beseitigen.
|
| 99 |
auto_plot_output = gr.LinePlot(
|
| 100 |
+
x="Step",
|
| 101 |
+
y="Delta",
|
| 102 |
+
color="Experiment",
|
| 103 |
title="Comparative Cognitive Dynamics",
|
| 104 |
color_legend_title="Experiment Runs",
|
| 105 |
color_legend_position="bottom",
|
| 106 |
+
show_label=True,
|
| 107 |
+
height=400,
|
| 108 |
+
interactive=True
|
| 109 |
)
|
| 110 |
auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
|
| 111 |
with gr.Accordion("Raw JSON for all runs", open=False):
|
cognitive_mapping_probe/auto_experiment.py
CHANGED
|
@@ -52,7 +52,7 @@ def run_auto_suite(
|
|
| 52 |
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
|
| 53 |
"""
|
| 54 |
Führt eine vollständige, kuratierte Experiment-Suite aus.
|
| 55 |
-
|
| 56 |
"""
|
| 57 |
all_experiments = get_curated_experiments()
|
| 58 |
protocol = all_experiments.get(experiment_name)
|
|
@@ -68,13 +68,10 @@ def run_auto_suite(
|
|
| 68 |
label = run_spec["label"]
|
| 69 |
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
|
| 70 |
|
| 71 |
-
# WISSENSCHAFTLICHE KORREKTUR: Rufe den Orchestrator so auf, dass er das Modell
|
| 72 |
-
# für jeden Lauf frisch lädt. `llm_instance=None` ist der Default.
|
| 73 |
-
# Dies ist der einzige Weg, um garantierte statistische Unabhängigkeit zu gewährleisten.
|
| 74 |
results = run_seismic_analysis(
|
| 75 |
model_id=model_id,
|
| 76 |
prompt_type=run_spec["prompt_type"],
|
| 77 |
-
seed=seed,
|
| 78 |
num_steps=num_steps,
|
| 79 |
concept_to_inject=run_spec["concept"],
|
| 80 |
injection_strength=run_spec["strength"],
|
|
@@ -97,11 +94,18 @@ def run_auto_suite(
|
|
| 97 |
df = pd.DataFrame({
|
| 98 |
"Step": range(len(deltas)),
|
| 99 |
"Delta": deltas,
|
| 100 |
-
"Experiment": label
|
| 101 |
})
|
| 102 |
plot_data_frames.append(df)
|
| 103 |
|
| 104 |
summary_df = pd.DataFrame(summary_data)
|
| 105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
return summary_df, plot_df, all_results
|
|
|
|
| 52 |
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
|
| 53 |
"""
|
| 54 |
Führt eine vollständige, kuratierte Experiment-Suite aus.
|
| 55 |
+
Stellt sicher, dass das zurückgegebene DataFrame für den Plot immer die korrekten Spaltennamen hat.
|
| 56 |
"""
|
| 57 |
all_experiments = get_curated_experiments()
|
| 58 |
protocol = all_experiments.get(experiment_name)
|
|
|
|
| 68 |
label = run_spec["label"]
|
| 69 |
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
|
| 70 |
|
|
|
|
|
|
|
|
|
|
| 71 |
results = run_seismic_analysis(
|
| 72 |
model_id=model_id,
|
| 73 |
prompt_type=run_spec["prompt_type"],
|
| 74 |
+
seed=seed,
|
| 75 |
num_steps=num_steps,
|
| 76 |
concept_to_inject=run_spec["concept"],
|
| 77 |
injection_strength=run_spec["strength"],
|
|
|
|
| 94 |
df = pd.DataFrame({
|
| 95 |
"Step": range(len(deltas)),
|
| 96 |
"Delta": deltas,
|
| 97 |
+
"Experiment": label
|
| 98 |
})
|
| 99 |
plot_data_frames.append(df)
|
| 100 |
|
| 101 |
summary_df = pd.DataFrame(summary_data)
|
| 102 |
+
|
| 103 |
+
# FINALE ROBUSTHEITS-KORREKTUR:
|
| 104 |
+
# Erstelle ein leeres DataFrame mit den korrekten Spalten, falls keine Daten vorhanden sind.
|
| 105 |
+
# Dies verhindert, dass ein leeres DataFrame ohne Spalten an den Plot übergeben wird.
|
| 106 |
+
if not plot_data_frames:
|
| 107 |
+
plot_df = pd.DataFrame(columns=["Step", "Delta", "Experiment"])
|
| 108 |
+
else:
|
| 109 |
+
plot_df = pd.concat(plot_data_frames, ignore_index=True)
|
| 110 |
|
| 111 |
return summary_df, plot_df, all_results
|