Spaces:
Sleeping
Sleeping
Commit
·
5028f2b
1
Parent(s):
8049238
fix graphs?
Browse files- app.py +6 -5
- cognitive_mapping_probe/auto_experiment.py +23 -15
app.py
CHANGED
|
@@ -28,9 +28,7 @@ def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
|
|
| 28 |
df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
|
| 29 |
stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
|
| 30 |
|
| 31 |
-
# WICHTIG: Speicher aufräumen, BEVOR die Ergebnisse an Gradio zurückgegeben werden.
|
| 32 |
cleanup_memory()
|
| 33 |
-
|
| 34 |
return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, results
|
| 35 |
except Exception:
|
| 36 |
cleanup_memory()
|
|
@@ -40,10 +38,7 @@ def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=
|
|
| 40 |
"""Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
|
| 41 |
try:
|
| 42 |
summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
|
| 43 |
-
|
| 44 |
-
# WICHTIG: Speicher auch hier aufräumen.
|
| 45 |
cleanup_memory()
|
| 46 |
-
|
| 47 |
return summary_df, plot_df, all_results
|
| 48 |
except Exception:
|
| 49 |
cleanup_memory()
|
|
@@ -54,9 +49,11 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.2") as demo:
|
|
| 54 |
|
| 55 |
with gr.Tabs():
|
| 56 |
with gr.TabItem("🔬 Manual Single Run"):
|
|
|
|
| 57 |
gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
|
| 58 |
with gr.Row(variant='panel'):
|
| 59 |
with gr.Column(scale=1):
|
|
|
|
| 60 |
gr.Markdown("### 1. General Parameters")
|
| 61 |
manual_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
|
| 62 |
manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
|
|
@@ -83,6 +80,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.2") as demo:
|
|
| 83 |
gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch und visualisiere die Ergebnisse vergleichend.")
|
| 84 |
with gr.Row(variant='panel'):
|
| 85 |
with gr.Column(scale=1):
|
|
|
|
| 86 |
gr.Markdown("### Auto-Experiment Parameters")
|
| 87 |
auto_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
|
| 88 |
auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
|
|
@@ -91,9 +89,12 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.2") as demo:
|
|
| 91 |
auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
|
| 92 |
with gr.Column(scale=2):
|
| 93 |
gr.Markdown("### Suite Results Summary")
|
|
|
|
| 94 |
auto_plot_output = gr.LinePlot(
|
| 95 |
x="Step", y="Delta", color="Experiment",
|
| 96 |
title="Comparative Cognitive Dynamics",
|
|
|
|
|
|
|
| 97 |
show_label=True, height=400, interactive=True
|
| 98 |
)
|
| 99 |
auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
|
|
|
|
| 28 |
df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
|
| 29 |
stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
|
| 30 |
|
|
|
|
| 31 |
cleanup_memory()
|
|
|
|
| 32 |
return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, results
|
| 33 |
except Exception:
|
| 34 |
cleanup_memory()
|
|
|
|
| 38 |
"""Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
|
| 39 |
try:
|
| 40 |
summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
|
|
|
|
|
|
|
| 41 |
cleanup_memory()
|
|
|
|
| 42 |
return summary_df, plot_df, all_results
|
| 43 |
except Exception:
|
| 44 |
cleanup_memory()
|
|
|
|
| 49 |
|
| 50 |
with gr.Tabs():
|
| 51 |
with gr.TabItem("🔬 Manual Single Run"):
|
| 52 |
+
# ... (Dieser Tab bleibt unverändert) ...
|
| 53 |
gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
|
| 54 |
with gr.Row(variant='panel'):
|
| 55 |
with gr.Column(scale=1):
|
| 56 |
+
# ... (Parameter unverändert) ...
|
| 57 |
gr.Markdown("### 1. General Parameters")
|
| 58 |
manual_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
|
| 59 |
manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
|
|
|
|
| 80 |
gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch und visualisiere die Ergebnisse vergleichend.")
|
| 81 |
with gr.Row(variant='panel'):
|
| 82 |
with gr.Column(scale=1):
|
| 83 |
+
# ... (Parameter unverändert) ...
|
| 84 |
gr.Markdown("### Auto-Experiment Parameters")
|
| 85 |
auto_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
|
| 86 |
auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
|
|
|
|
| 89 |
auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
|
| 90 |
with gr.Column(scale=2):
|
| 91 |
gr.Markdown("### Suite Results Summary")
|
| 92 |
+
# KORREKTUR: Explizite Legenden-Parameter hinzugefügt
|
| 93 |
auto_plot_output = gr.LinePlot(
|
| 94 |
x="Step", y="Delta", color="Experiment",
|
| 95 |
title="Comparative Cognitive Dynamics",
|
| 96 |
+
color_legend_title="Experiment Runs",
|
| 97 |
+
color_legend_position="bottom",
|
| 98 |
show_label=True, height=400, interactive=True
|
| 99 |
)
|
| 100 |
auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
|
cognitive_mapping_probe/auto_experiment.py
CHANGED
|
@@ -52,6 +52,7 @@ def run_auto_suite(
|
|
| 52 |
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
|
| 53 |
"""
|
| 54 |
Führt eine vollständige, kuratierte Experiment-Suite aus.
|
|
|
|
| 55 |
"""
|
| 56 |
all_experiments = get_curated_experiments()
|
| 57 |
protocol = all_experiments.get(experiment_name)
|
|
@@ -62,38 +63,45 @@ def run_auto_suite(
|
|
| 62 |
summary_data = []
|
| 63 |
plot_data_frames = []
|
| 64 |
|
| 65 |
-
# Lade das Modell einmal zu Beginn der Suite
|
| 66 |
-
llm = get_or_load_model(model_id, seed)
|
| 67 |
-
|
| 68 |
total_runs = len(protocol)
|
| 69 |
for i, run_spec in enumerate(protocol):
|
| 70 |
label = run_spec["label"]
|
| 71 |
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
|
| 72 |
|
|
|
|
|
|
|
|
|
|
| 73 |
results = run_seismic_analysis(
|
| 74 |
-
model_id=model_id,
|
| 75 |
-
|
| 76 |
-
|
|
|
|
|
|
|
|
|
|
| 77 |
progress_callback=progress_callback,
|
| 78 |
-
llm_instance=
|
| 79 |
)
|
| 80 |
|
| 81 |
all_results[label] = results
|
| 82 |
stats = results.get("stats", {})
|
| 83 |
|
| 84 |
-
summary_data.append({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
deltas = results.get("state_deltas", [])
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
plot_data_frames.append(df)
|
| 89 |
|
| 90 |
summary_df = pd.DataFrame(summary_data)
|
| 91 |
plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
|
| 92 |
|
| 93 |
-
# WICHTIG: Explizites Aufräumen am Ende der gesamten Suite
|
| 94 |
-
del llm
|
| 95 |
-
gc.collect()
|
| 96 |
-
if torch.cuda.is_available():
|
| 97 |
-
torch.cuda.empty_cache()
|
| 98 |
-
|
| 99 |
return summary_df, plot_df, all_results
|
|
|
|
| 52 |
) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
|
| 53 |
"""
|
| 54 |
Führt eine vollständige, kuratierte Experiment-Suite aus.
|
| 55 |
+
KORRIGIERT: Lädt das Modell für jeden Lauf neu, um statistische Unabhängigkeit zu garantieren.
|
| 56 |
"""
|
| 57 |
all_experiments = get_curated_experiments()
|
| 58 |
protocol = all_experiments.get(experiment_name)
|
|
|
|
| 63 |
summary_data = []
|
| 64 |
plot_data_frames = []
|
| 65 |
|
|
|
|
|
|
|
|
|
|
| 66 |
total_runs = len(protocol)
|
| 67 |
for i, run_spec in enumerate(protocol):
|
| 68 |
label = run_spec["label"]
|
| 69 |
dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
|
| 70 |
|
| 71 |
+
# WISSENSCHAFTLICHE KORREKTUR: Rufe den Orchestrator so auf, dass er das Modell
|
| 72 |
+
# für jeden Lauf frisch lädt. `llm_instance=None` ist der Default.
|
| 73 |
+
# Dies ist der einzige Weg, um garantierte statistische Unabhängigkeit zu gewährleisten.
|
| 74 |
results = run_seismic_analysis(
|
| 75 |
+
model_id=model_id,
|
| 76 |
+
prompt_type=run_spec["prompt_type"],
|
| 77 |
+
seed=seed, # Der Seed wird bei jedem Lauf neu gesetzt
|
| 78 |
+
num_steps=num_steps,
|
| 79 |
+
concept_to_inject=run_spec["concept"],
|
| 80 |
+
injection_strength=run_spec["strength"],
|
| 81 |
progress_callback=progress_callback,
|
| 82 |
+
llm_instance=None
|
| 83 |
)
|
| 84 |
|
| 85 |
all_results[label] = results
|
| 86 |
stats = results.get("stats", {})
|
| 87 |
|
| 88 |
+
summary_data.append({
|
| 89 |
+
"Experiment": label,
|
| 90 |
+
"Mean Delta": stats.get("mean_delta"),
|
| 91 |
+
"Std Dev Delta": stats.get("std_delta"),
|
| 92 |
+
"Max Delta": stats.get("max_delta"),
|
| 93 |
+
})
|
| 94 |
|
| 95 |
deltas = results.get("state_deltas", [])
|
| 96 |
+
|
| 97 |
+
df = pd.DataFrame({
|
| 98 |
+
"Step": range(len(deltas)),
|
| 99 |
+
"Delta": deltas,
|
| 100 |
+
"Experiment": label # Gradio kann mit String-Labels umgehen, der frühere Fix war unnötig
|
| 101 |
+
})
|
| 102 |
plot_data_frames.append(df)
|
| 103 |
|
| 104 |
summary_df = pd.DataFrame(summary_data)
|
| 105 |
plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
return summary_df, plot_df, all_results
|