neuralworm commited on
Commit
5028f2b
·
1 Parent(s): 8049238

fix graphs?

Browse files
app.py CHANGED
@@ -28,9 +28,7 @@ def run_single_analysis_display(*args, progress=gr.Progress(track_tqdm=True)):
28
  df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
29
  stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
30
 
31
- # WICHTIG: Speicher aufräumen, BEVOR die Ergebnisse an Gradio zurückgegeben werden.
32
  cleanup_memory()
33
-
34
  return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, results
35
  except Exception:
36
  cleanup_memory()
@@ -40,10 +38,7 @@ def run_auto_suite_display(model_id, num_steps, seed, experiment_name, progress=
40
  """Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
41
  try:
42
  summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
43
-
44
- # WICHTIG: Speicher auch hier aufräumen.
45
  cleanup_memory()
46
-
47
  return summary_df, plot_df, all_results
48
  except Exception:
49
  cleanup_memory()
@@ -54,9 +49,11 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.2") as demo:
54
 
55
  with gr.Tabs():
56
  with gr.TabItem("🔬 Manual Single Run"):
 
57
  gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
58
  with gr.Row(variant='panel'):
59
  with gr.Column(scale=1):
 
60
  gr.Markdown("### 1. General Parameters")
61
  manual_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
62
  manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
@@ -83,6 +80,7 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.2") as demo:
83
  gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch und visualisiere die Ergebnisse vergleichend.")
84
  with gr.Row(variant='panel'):
85
  with gr.Column(scale=1):
 
86
  gr.Markdown("### Auto-Experiment Parameters")
87
  auto_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
88
  auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
@@ -91,9 +89,12 @@ with gr.Blocks(theme=theme, title="Cognitive Seismograph 2.2") as demo:
91
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
92
  with gr.Column(scale=2):
93
  gr.Markdown("### Suite Results Summary")
 
94
  auto_plot_output = gr.LinePlot(
95
  x="Step", y="Delta", color="Experiment",
96
  title="Comparative Cognitive Dynamics",
 
 
97
  show_label=True, height=400, interactive=True
98
  )
99
  auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
 
28
  df = pd.DataFrame({"Internal Step": range(len(deltas)), "State Change (Delta)": deltas})
29
  stats_md = f"### Statistical Signature\n- **Mean Delta:** {stats.get('mean_delta', 0):.4f}\n- **Std Dev Delta:** {stats.get('std_delta', 0):.4f}\n- **Max Delta:** {stats.get('max_delta', 0):.4f}\n"
30
 
 
31
  cleanup_memory()
 
32
  return f"{results.get('verdict', 'Error')}\n\n{stats_md}", df, results
33
  except Exception:
34
  cleanup_memory()
 
38
  """Wrapper für die automatisierte Experiment-Suite mit Visualisierung."""
39
  try:
40
  summary_df, plot_df, all_results = run_auto_suite(model_id, int(num_steps), int(seed), experiment_name, progress)
 
 
41
  cleanup_memory()
 
42
  return summary_df, plot_df, all_results
43
  except Exception:
44
  cleanup_memory()
 
49
 
50
  with gr.Tabs():
51
  with gr.TabItem("🔬 Manual Single Run"):
52
+ # ... (Dieser Tab bleibt unverändert) ...
53
  gr.Markdown("Führe ein einzelnes Experiment mit manuellen Parametern durch, um Hypothesen zu explorieren.")
54
  with gr.Row(variant='panel'):
55
  with gr.Column(scale=1):
56
+ # ... (Parameter unverändert) ...
57
  gr.Markdown("### 1. General Parameters")
58
  manual_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
59
  manual_prompt_type = gr.Radio(choices=list(RESONANCE_PROMPTS.keys()), value="resonance_prompt", label="Prompt Type")
 
80
  gr.Markdown("Führe eine vordefinierte, kuratierte Reihe von Experimenten durch und visualisiere die Ergebnisse vergleichend.")
81
  with gr.Row(variant='panel'):
82
  with gr.Column(scale=1):
83
+ # ... (Parameter unverändert) ...
84
  gr.Markdown("### Auto-Experiment Parameters")
85
  auto_model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
86
  auto_num_steps = gr.Slider(50, 1000, 300, step=10, label="Steps per Run")
 
89
  auto_run_btn = gr.Button("Run Curated Auto-Experiment", variant="primary")
90
  with gr.Column(scale=2):
91
  gr.Markdown("### Suite Results Summary")
92
+ # KORREKTUR: Explizite Legenden-Parameter hinzugefügt
93
  auto_plot_output = gr.LinePlot(
94
  x="Step", y="Delta", color="Experiment",
95
  title="Comparative Cognitive Dynamics",
96
+ color_legend_title="Experiment Runs",
97
+ color_legend_position="bottom",
98
  show_label=True, height=400, interactive=True
99
  )
100
  auto_summary_df = gr.DataFrame(label="Comparative Statistical Signature", wrap=True)
cognitive_mapping_probe/auto_experiment.py CHANGED
@@ -52,6 +52,7 @@ def run_auto_suite(
52
  ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
53
  """
54
  Führt eine vollständige, kuratierte Experiment-Suite aus.
 
55
  """
56
  all_experiments = get_curated_experiments()
57
  protocol = all_experiments.get(experiment_name)
@@ -62,38 +63,45 @@ def run_auto_suite(
62
  summary_data = []
63
  plot_data_frames = []
64
 
65
- # Lade das Modell einmal zu Beginn der Suite
66
- llm = get_or_load_model(model_id, seed)
67
-
68
  total_runs = len(protocol)
69
  for i, run_spec in enumerate(protocol):
70
  label = run_spec["label"]
71
  dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
72
 
 
 
 
73
  results = run_seismic_analysis(
74
- model_id=model_id, prompt_type=run_spec["prompt_type"],
75
- seed=seed, num_steps=num_steps,
76
- concept_to_inject=run_spec["concept"], injection_strength=run_spec["strength"],
 
 
 
77
  progress_callback=progress_callback,
78
- llm_instance=llm # Wiederverwende die geladene LLM-Instanz
79
  )
80
 
81
  all_results[label] = results
82
  stats = results.get("stats", {})
83
 
84
- summary_data.append({ "Experiment": label, "Mean Delta": stats.get("mean_delta"), "Std Dev Delta": stats.get("std_delta"), "Max Delta": stats.get("max_delta"), })
 
 
 
 
 
85
 
86
  deltas = results.get("state_deltas", [])
87
- df = pd.DataFrame({ "Step": range(len(deltas)), "Delta": deltas, "Experiment": f"{i}: {label}" })
 
 
 
 
 
88
  plot_data_frames.append(df)
89
 
90
  summary_df = pd.DataFrame(summary_data)
91
  plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
92
 
93
- # WICHTIG: Explizites Aufräumen am Ende der gesamten Suite
94
- del llm
95
- gc.collect()
96
- if torch.cuda.is_available():
97
- torch.cuda.empty_cache()
98
-
99
  return summary_df, plot_df, all_results
 
52
  ) -> Tuple[pd.DataFrame, pd.DataFrame, Dict]:
53
  """
54
  Führt eine vollständige, kuratierte Experiment-Suite aus.
55
+ KORRIGIERT: Lädt das Modell für jeden Lauf neu, um statistische Unabhängigkeit zu garantieren.
56
  """
57
  all_experiments = get_curated_experiments()
58
  protocol = all_experiments.get(experiment_name)
 
63
  summary_data = []
64
  plot_data_frames = []
65
 
 
 
 
66
  total_runs = len(protocol)
67
  for i, run_spec in enumerate(protocol):
68
  label = run_spec["label"]
69
  dbg(f"--- Running Auto-Experiment: '{label}' ({i+1}/{total_runs}) ---")
70
 
71
+ # WISSENSCHAFTLICHE KORREKTUR: Rufe den Orchestrator so auf, dass er das Modell
72
+ # für jeden Lauf frisch lädt. `llm_instance=None` ist der Default.
73
+ # Dies ist der einzige Weg, um garantierte statistische Unabhängigkeit zu gewährleisten.
74
  results = run_seismic_analysis(
75
+ model_id=model_id,
76
+ prompt_type=run_spec["prompt_type"],
77
+ seed=seed, # Der Seed wird bei jedem Lauf neu gesetzt
78
+ num_steps=num_steps,
79
+ concept_to_inject=run_spec["concept"],
80
+ injection_strength=run_spec["strength"],
81
  progress_callback=progress_callback,
82
+ llm_instance=None
83
  )
84
 
85
  all_results[label] = results
86
  stats = results.get("stats", {})
87
 
88
+ summary_data.append({
89
+ "Experiment": label,
90
+ "Mean Delta": stats.get("mean_delta"),
91
+ "Std Dev Delta": stats.get("std_delta"),
92
+ "Max Delta": stats.get("max_delta"),
93
+ })
94
 
95
  deltas = results.get("state_deltas", [])
96
+
97
+ df = pd.DataFrame({
98
+ "Step": range(len(deltas)),
99
+ "Delta": deltas,
100
+ "Experiment": label # Gradio kann mit String-Labels umgehen, der frühere Fix war unnötig
101
+ })
102
  plot_data_frames.append(df)
103
 
104
  summary_df = pd.DataFrame(summary_data)
105
  plot_df = pd.concat(plot_data_frames, ignore_index=True) if plot_data_frames else pd.DataFrame()
106
 
 
 
 
 
 
 
107
  return summary_df, plot_df, all_results