neuralworm commited on
Commit
395b2f3
·
1 Parent(s): 024ef47
Files changed (2) hide show
  1. app.py +19 -9
  2. cognitive_mapping_probe/auto_experiment.py +28 -10
app.py CHANGED
@@ -22,18 +22,20 @@ def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
22
  return f"### ❌ Analysis Failed\n```\n{traceback.format_exc()}\n```", pd.DataFrame(), {}
23
 
24
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
25
- """Wrapper für die automatisierte Experiment-Suite."""
26
  try:
27
- summary_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
28
- return summary_df, all_results
 
29
  except Exception:
30
- return pd.DataFrame(), f"### ❌ Auto-Experiment Failed\n```\n{traceback.format_exc()}\n```"
31
 
32
  with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
33
  gr.Markdown("# 🧠 Cognitive Seismograph 2.1: Automated Experiment Suite")
34
 
35
  with gr.Tabs():
36
  with gr.TabItem("🔬 Manual Single Run"):
 
37
  gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
38
  with gr.Row(variant='panel'):
39
  with gr.Column(scale=1):
@@ -60,7 +62,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
60
  )
61
 
62
  with gr.TabItem("🚀 Automated Suite"):
63
- gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch, um Hypothesen systematisch zu testen.")
64
  with gr.Row(variant='panel'):
65
  with gr.Column(scale=1):
66
  gr.Markdown("### Auto-Experiment Parameters")
@@ -71,16 +73,24 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
71
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
72
  with gr.Column(scale=2):
73
  gr.Markdown("### Suite Results Summary")
74
- # KORREKTUR: Das 'height'-Argument wird entfernt, um Kompatibilität
75
- # mit verschiedenen Gradio-Versionen sicherzustellen.
76
- auto_summary_df = gr.DataFrame(label="Comparative Results", wrap=True)
 
 
 
 
 
 
 
77
  with gr.Accordion("Raw JSON for all runs", open=False):
78
  auto_raw_json = gr.JSON()
79
 
80
  auto_run_btn.click(
81
  fn=run_auto_suite_display,
82
  inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
83
- outputs=[auto_summary_df, auto_raw_json]
 
84
  )
85
 
86
  if __name__ == "__main__":
 
22
  return f"### ❌ Analysis Failed\n```\n{traceback.format_exc()}\n```", pd.DataFrame(), {}
23
 
24
  def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=gr.Progress(track_tqdm=True)):
25
+ """Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
26
  try:
27
+ # Die Funktion gibt jetzt drei Werte zurück: summary_df, plot_df, all_results
28
+ summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
29
+ return summary_df, plot_df, all_results
30
  except Exception:
31
+ return pd.DataFrame(), pd.DataFrame(), f"### ❌ Auto-Experiment Failed\n```\n{traceback.format_exc()}\n```"
32
 
33
  with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.1") as demo:
34
  gr.Markdown("# 🧠 Cognitive Seismograph 2.1: Automated Experiment Suite")
35
 
36
  with gr.Tabs():
37
  with gr.TabItem("🔬 Manual Single Run"):
38
+ # ... (Dieser Tab bleibt unverändert) ...
39
  gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
40
  with gr.Row(variant='panel'):
41
  with gr.Column(scale=1):
 
62
  )
63
 
64
  with gr.TabItem("🚀 Automated Suite"):
65
+ gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch und visualisiere die Ergebnisse vergleichend.")
66
  with gr.Row(variant='panel'):
67
  with gr.Column(scale=1):
68
  gr.Markdown("### Auto-Experiment Parameters")
 
73
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
74
  with gr.Column(scale=2):
75
  gr.Markdown("### Suite Results Summary")
76
+ # NEU: Ein LinePlot für den Vergleich der Dynamiken
77
+ auto_plot_output = gr.LinePlot(
78
+ x="Step",
79
+ y="Delta",
80
+ color="Experiment",
81
+ title="Comparative Cognitive Dynamics",
82
+ show_label=True,
83
+ height=400,
84
+ )
85
+ auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
86
  with gr.Accordion("Raw JSON for all runs", open=False):
87
  auto_raw_json = gr.JSON()
88
 
89
  auto_run_btn.click(
90
  fn=run_auto_suite_display,
91
  inputs=[auto_model_id, auto_num_steps, auto_seed, auto_experiment_name],
92
+ # Die Ausgaben werden an die neuen Komponenten gebunden
93
+ outputs=[auto_summary_df, auto_plot_output, auto_raw_json]
94
  )
95
 
96
  if __name__ == "__main__":
cognitive_mapping_probe/auto_experiment.py CHANGED
@@ -1,13 +1,13 @@
1
  import pandas as pd
2
  from typing import Dict, List, Tuple
3
 
 
4
  from .orchestrator_seismograph import run_seismic_analysis
5
  from .utils import dbg
6
 
7
  def get_curated_experiments() -> Dict[str, List[Dict]]:
8
  """
9
  Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
10
- Jedes Protokoll ist eine Liste von einzelnen Läufen, die verglichen werden sollen.
11
  """
12
  experiments = {
13
  "Calm vs. Chaos": [
@@ -17,6 +17,7 @@ def get_curated_experiments() -> Dict[str, List[Dict]]:
17
  {"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
18
  ],
19
  "Dose-Response (Calmness)": [
 
20
  {"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0},
21
  {"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5},
22
  {"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0},
@@ -32,10 +33,10 @@ def run_auto_suite(
32
  seed: int,
33
  experiment_name: str,
34
  progress_callback
35
- ) -> Tuple[pd.DataFrame, Dict]:
36
  """
37
  Führt eine vollständige, kuratierte Experiment-Suite aus.
38
- Iteriert über die definierten Läufe, sammelt die Ergebnisse und erstellt einen Vergleichsbericht.
39
  """
40
  all_experiments = get_curated_experiments()
41
  protocol = all_experiments.get(experiment_name)
@@ -44,13 +45,15 @@ def run_auto_suite(
44
 
45
  all_results = {}
46
  summary_data = []
 
 
 
47
 
48
  total_runs = len(protocol)
49
  for i, run_spec in enumerate(protocol):
50
  label = run_spec["label"]
51
  dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
52
 
53
- # Der `run_seismic_analysis` Orchestrator wird für jeden Schritt aufgerufen
54
  results = run_seismic_analysis(
55
  model_id=model_id,
56
  prompt_type=run_spec["prompt_type"],
@@ -58,22 +61,37 @@ def run_auto_suite(
58
  num_steps=num_steps,
59
  concept_to_inject=run_spec["concept"],
60
  injection_strength=run_spec["strength"],
61
- progress_callback=progress_callback
 
62
  )
63
 
64
  all_results[label] = results
65
  stats = results.get("stats", {})
66
 
67
- # Sammle die wichtigsten Metriken für die Vergleichstabelle
68
  summary_data.append({
69
  "Experiment": label,
70
- "Prompt Type": run_spec["prompt_type"],
71
- "Concept": run_spec["concept"] if run_spec["concept"] else "None",
72
- "Strength": run_spec["strength"],
73
  "Mean Delta": stats.get("mean_delta"),
74
  "Std Dev Delta": stats.get("std_delta"),
75
  "Max Delta": stats.get("max_delta"),
76
  })
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  summary_df = pd.DataFrame(summary_data)
79
- return summary_df, all_results
 
 
 
 
 
1
  import pandas as pd
2
  from typing import Dict, List, Tuple
3
 
4
+ from .llm_iface import get_or_load_model
5
  from .orchestrator_seismograph import run_seismic_analysis
6
  from .utils import dbg
7
 
8
  def get_curated_experiments() -> Dict[str, List[Dict]]:
9
  """
10
  Definiert die vordefinierten, wissenschaftlichen Experiment-Protokolle.
 
11
  """
12
  experiments = {
13
  "Calm vs. Chaos": [
 
17
  {"label": "Control (Stable)", "prompt_type": "control_long_prose", "concept": "", "strength": 0.0},
18
  ],
19
  "Dose-Response (Calmness)": [
20
+ # Die Labels hier sind Strings, die wie Zahlen aussehen. Das könnte Gradio verwirren.
21
  {"label": "Strength 0.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.0},
22
  {"label": "Strength 0.5", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 0.5},
23
  {"label": "Strength 1.0", "prompt_type": "resonance_prompt", "concept": "calmness", "strength": 1.0},
 
33
  seed: int,
34
  experiment_name: str,
35
  progress_callback
36
+ ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
37
  """
38
  Führt eine vollständige, kuratierte Experiment-Suite aus.
39
+ Gibt jetzt zusätzlich ein DataFrame für die vergleichende Visualisierung zurück.
40
  """
41
  all_experiments = get_curated_experiments()
42
  protocol = all_experiments.get(experiment_name)
 
45
 
46
  all_results = {}
47
  summary_data = []
48
+ plot_data_frames = []
49
+
50
+ llm = get_or_load_model(model_id, seed)
51
 
52
  total_runs = len(protocol)
53
  for i, run_spec in enumerate(protocol):
54
  label = run_spec["label"]
55
  dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
56
 
 
57
  results = run_seismic_analysis(
58
  model_id=model_id,
59
  prompt_type=run_spec["prompt_type"],
 
61
  num_steps=num_steps,
62
  concept_to_inject=run_spec["concept"],
63
  injection_strength=run_spec["strength"],
64
+ progress_callback=progress_callback,
65
+ llm_instance=llm
66
  )
67
 
68
  all_results[label] = results
69
  stats = results.get("stats", {})
70
 
 
71
  summary_data.append({
72
  "Experiment": label,
 
 
 
73
  "Mean Delta": stats.get("mean_delta"),
74
  "Std Dev Delta": stats.get("std_delta"),
75
  "Max Delta": stats.get("max_delta"),
76
  })
77
 
78
+ deltas = results.get("state_deltas", [])
79
+
80
+ # KORREKTUR: Wir erstellen die "Experiment"-Spalte so, dass sie garantiert ein
81
+ # eindeutiger String ist, um Probleme mit der Gradio-Visualisierung zu vermeiden.
82
+ # Dies ist robuster, falls Labels in Zukunft Zahlen oder Duplikate sein könnten.
83
+ plot_label = f"{i}: {label}"
84
+
85
+ df = pd.DataFrame({
86
+ "Step": range(len(deltas)),
87
+ "Delta": deltas,
88
+ "Experiment": plot_label
89
+ })
90
+ plot_data_frames.append(df)
91
+
92
  summary_df = pd.DataFrame(summary_data)
93
+ plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
94
+
95
+ del llm
96
+
97
+ return summary_df, plot_df, all_results