Spaces:

neuralworm
/

cognitive_mapping_probe

Sleeping

App Files Files Community

neuralworm commited on 9 days ago

Commit

395b2f3

1 Parent(s): 024ef47

cs 2.1

Browse files

Files changed (2) hide show

app.py +19 -9
cognitive_mapping_probe/auto_experiment.py +28 -10

app.py CHANGED Viewed

@@ -22,18 +22,20 @@ def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
         return f"### ❌ Analysis Failed\n```\n{traceback.format_exc()}\n```", pd.DataFrame(), {}
 def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
-    """Wrapper für die automatisierte Experiment-Suite."""
     try:
-        summary_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
-        return summary_df, all_results
     except Exception:
-        return pd.DataFrame(), f"### ❌ Auto-Experiment Failed\n```\n{traceback.format_exc()}\n```"
 with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
     gr.Markdown("# 🧠 Cognitive Seismograph 2.1: Automated Experiment Suite")
     with gr.Tabs():
         with gr.TabItem("🔬 Manual Single Run"):
             gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
             with gr.Row(variant='panel'):
                 with gr.Column(scale=1):
@@ -60,7 +62,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
             )
         with gr.TabItem("🚀 Automated Suite"):
-            gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch, um Hypothesen systematisch zu testen.")
             with gr.Row(variant='panel'):
                 with gr.Column(scale=1):
                     gr.Markdown("### Auto-Experiment Parameters")
@@ -71,16 +73,24 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
                     auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
                 with gr.Column(scale=2):
                     gr.Markdown("### Suite Results Summary")
-                    # KORREKTUR: Das 'height'-Argument wird entfernt, um Kompatibilität
-                    # mit verschiedenen Gradio-Versionen sicherzustellen.
-                    auto_summary_df = gr.DataFrame(label="Comparative Results", wrap=True)
                     with gr.Accordion("Raw JSON for all runs", open=False):
                         auto_raw_json = gr.JSON()
             auto_run_btn.click(
                 fn=run_auto_suite_display,
                 inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
-                outputs=[auto_summary_df, auto_raw_json]
             )
 if __name__ == "__main__":

         return f"### ❌ Analysis Failed\n```\n{traceback.format_exc()}\n```", pd.DataFrame(), {}
 def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
+    """Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
     try:
+        # Die Funktion gibt jetzt drei Werte zurück: summary_df, plot_df, all_results
+        summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
+        return summary_df, plot_df, all_results
     except Exception:
+        return pd.DataFrame(), pd.DataFrame(), f"### ❌ Auto-Experiment Failed\n```\n{traceback.format_exc()}\n```"
 with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
     gr.Markdown("# 🧠 Cognitive Seismograph 2.1: Automated Experiment Suite")
     with gr.Tabs():
         with gr.TabItem("🔬 Manual Single Run"):
+            # ... (Dieser Tab bleibt unverändert) ...
             gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
             with gr.Row(variant='panel'):
                 with gr.Column(scale=1):
             )
         with gr.TabItem("🚀 Automated Suite"):
+            gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch und visualisiere die Ergebnisse vergleichend.")
             with gr.Row(variant='panel'):
                 with gr.Column(scale=1):
                     gr.Markdown("### Auto-Experiment Parameters")
                     auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
                 with gr.Column(scale=2):
                     gr.Markdown("### Suite Results Summary")
+                    # NEU: Ein LinePlot für den Vergleich der Dynamiken
+                    auto_plot_output = gr.LinePlot(
+                        x="Step",
+                        y="Delta",
+                        color="Experiment",
+                        title="Comparative Cognitive Dynamics",
+                        show_label=True,
+                        height=400,
+                    )
+                    auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
                     with gr.Accordion("Raw JSON for all runs", open=False):
                         auto_raw_json = gr.JSON()
             auto_run_btn.click(
                 fn=run_auto_suite_display,
                 inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
+                # Die Ausgaben werden an die neuen Komponenten gebunden
+                outputs=[auto_summary_df, auto_plot_output, auto_raw_json]
             )
 if __name__ == "__main__":

cognitive_mapping_probe/auto_experiment.py CHANGED Viewed

@@ -1,13 +1,13 @@
 import pandas as pd
 from typing import Dict, List, Tuple
 from .orchestrator_seismograph import run_seismic_analysis
 from .utils import dbg
 def get_curated_experiments() -> Dict[str, List[Dict]]:
     """
     Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
-    Jedes Protokoll ist eine Liste von einzelnen Läufen, die verglichen werden sollen.
     """
     experiments = {
         "Calm vs. Chaos": [
@@ -17,6 +17,7 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
             {"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
         ],
         "Dose-Response (Calmness)": [
             {"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0},
             {"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5},
             {"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0},
@@ -32,10 +33,10 @@ def run_auto_suite(
     seed: int,
     experiment_name: str,
     progress_callback
-) -> Tuple[pd.DataFrame, Dict]:
     """
     Führt eine vollständige, kuratierte Experiment-Suite aus.
-    Iteriert über die definierten Läufe, sammelt die Ergebnisse und erstellt einen Vergleichsbericht.
     """
     all_experiments = get_curated_experiments()
     protocol = all_experiments.get(experiment_name)
@@ -44,13 +45,15 @@ def run_auto_suite(
     all_results = {}
     summary_data = []
     total_runs = len(protocol)
     for i, run_spec in enumerate(protocol):
         label = run_spec["label"]
         dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
-        # Der `run_seismic_analysis` Orchestrator wird für jeden Schritt aufgerufen
         results = run_seismic_analysis(
             model_id=model_id,
             prompt_type=run_spec["prompt_type"],
@@ -58,22 +61,37 @@ def run_auto_suite(
             num_steps=num_steps,
             concept_to_inject=run_spec["concept"],
             injection_strength=run_spec["strength"],
-            progress_callback=progress_callback
         )
         all_results[label] = results
         stats = results.get("stats", {})
-        # Sammle die wichtigsten Metriken für die Vergleichstabelle
         summary_data.append({
             "Experiment": label,
-            "Prompt Type": run_spec["prompt_type"],
-            "Concept": run_spec["concept"] if run_spec["concept"] else "None",
-            "Strength": run_spec["strength"],
             "Mean Delta": stats.get("mean_delta"),
             "Std Dev Delta": stats.get("std_delta"),
             "Max Delta": stats.get("max_delta"),
         })
     summary_df = pd.DataFrame(summary_data)
-    return summary_df, all_results

 import pandas as pd
 from typing import Dict, List, Tuple
+from .llm_iface import get_or_load_model
 from .orchestrator_seismograph import run_seismic_analysis
 from .utils import dbg
 def get_curated_experiments() -> Dict[str, List[Dict]]:
     """
     Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
     """
     experiments = {
         "Calm vs. Chaos": [
             {"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
         ],
         "Dose-Response (Calmness)": [
+            # Die Labels hier sind Strings, die wie Zahlen aussehen. Das könnte Gradio verwirren.
             {"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0},
             {"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5},
             {"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0},
     seed: int,
     experiment_name: str,
     progress_callback
+) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
     """
     Führt eine vollständige, kuratierte Experiment-Suite aus.
+    Gibt jetzt zusätzlich ein DataFrame für die vergleichende Visualisierung zurück.
     """
     all_experiments = get_curated_experiments()
     protocol = all_experiments.get(experiment_name)
     all_results = {}
     summary_data = []
+    plot_data_frames = []
+    llm = get_or_load_model(model_id, seed)
     total_runs = len(protocol)
     for i, run_spec in enumerate(protocol):
         label = run_spec["label"]
         dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
         results = run_seismic_analysis(
             model_id=model_id,
             prompt_type=run_spec["prompt_type"],
             num_steps=num_steps,
             concept_to_inject=run_spec["concept"],
             injection_strength=run_spec["strength"],
+            progress_callback=progress_callback,
+            llm_instance=llm
         )
         all_results[label] = results
         stats = results.get("stats", {})
         summary_data.append({
             "Experiment": label,
             "Mean Delta": stats.get("mean_delta"),
             "Std Dev Delta": stats.get("std_delta"),
             "Max Delta": stats.get("max_delta"),
         })
+        deltas = results.get("state_deltas", [])
+        # KORREKTUR: Wir erstellen die "Experiment"-Spalte so, dass sie garantiert ein
+        # eindeutiger String ist, um Probleme mit der Gradio-Visualisierung zu vermeiden.
+        # Dies ist robuster, falls Labels in Zukunft Zahlen oder Duplikate sein könnten.
+        plot_label = f"{i}: {label}"
+        df = pd.DataFrame({
+            "Step": range(len(deltas)),
+            "Delta": deltas,
+            "Experiment": plot_label
+        })
+        plot_data_frames.append(df)
     summary_df = pd.DataFrame(summary_data)
+    plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
+    del llm
+    return summary_df, plot_df, all_results