Spaces:
Sleeping
Sleeping
Commit
·
a345062
1
Parent(s):
4478c62
cs 1.0
Browse files- README.md +18 -23
- app.py +45 -91
- cognitive_mapping_probe/__pycache__/llm_iface.cpython-310.pyc +0 -0
- cognitive_mapping_probe/__pycache__/resonance.cpython-310.pyc +0 -0
- cognitive_mapping_probe/llm_iface.py +7 -22
- cognitive_mapping_probe/orchestrator.py +0 -88
- cognitive_mapping_probe/orchestrator_seismograph.py +62 -0
- cognitive_mapping_probe/pre_flight_checks.py +0 -147
- cognitive_mapping_probe/resonance.py +0 -101
- cognitive_mapping_probe/resonance_seismograph.py +55 -0
- cognitive_mapping_probe/verification.py +0 -65
- requirements.txt +1 -2
- run_test.sh +30 -0
- tests/conftest.py +70 -0
- tests/test_app_logic.py +54 -0
- tests/test_components.py +115 -0
- tests/test_dynamics.py +60 -0
- tests/test_integration.py +46 -0
- tests/test_orchestration.py +43 -0
README.md
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
---
|
| 2 |
-
title: "Cognitive
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: "4.40.0"
|
| 8 |
app_file: app.py
|
|
@@ -10,32 +10,27 @@ pinned: true
|
|
| 10 |
license: apache-2.0
|
| 11 |
---
|
| 12 |
|
| 13 |
-
#
|
| 14 |
|
| 15 |
-
Dieses Projekt implementiert eine
|
| 16 |
|
| 17 |
-
## Wissenschaftliches Paradigma: Von
|
| 18 |
|
| 19 |
-
Unsere Forschung hat
|
| 20 |
|
| 21 |
-
|
| 22 |
|
| 23 |
-
|
| 24 |
|
| 25 |
-
## Das Experiment:
|
| 26 |
|
| 27 |
-
1. **Induktion**: Das Modell wird mit einem Prompt
|
| 28 |
-
2. **
|
| 29 |
-
3. **
|
| 30 |
-
* `converged`: Der Zustand hat sich stabilisiert. Das System ist robust.
|
| 31 |
-
* `max_steps_reached`: Der Zustand oszilliert oder driftet endlos. Das System ist "gebrochen".
|
| 32 |
-
4. **Verifikation**: Nur wenn der Zustand konvergiert, wird versucht, einen spontanen Text zu generieren. Die Fähigkeit zu antworten ist der Verhaltensmarker für kognitive Stabilität.
|
| 33 |
|
| 34 |
## Wie man die App benutzt
|
| 35 |
|
| 36 |
-
1.
|
| 37 |
-
2. **
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
* Definiere die zu testenden Konzepte und Titrationsschritte.
|
| 41 |
-
* Starte das Experiment und analysiere die resultierende Tabelle, um die CBPs für jedes Konzept zu identifizieren.
|
|
|
|
| 1 |
---
|
| 2 |
+
title: "Cognitive Seismograph"
|
| 3 |
+
emoji: 🧠
|
| 4 |
+
colorFrom: indigo
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
sdk_version: "4.40.0"
|
| 8 |
app_file: app.py
|
|
|
|
| 10 |
license: apache-2.0
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# 🧠 Cognitive Seismograph: Visualizing Internal Dynamics
|
| 14 |
|
| 15 |
+
Dieses Projekt implementiert eine experimentelle Suite zur Messung und Visualisierung der **intrinsischen kognitiven Dynamik** von Sprachmodellen.
|
| 16 |
|
| 17 |
+
## Wissenschaftliches Paradigma: Von Stabilität zu Dynamik
|
| 18 |
|
| 19 |
+
Unsere vorherige Forschung hat eine zentrale Hypothese falsifiziert: Die Annahme, dass ein LLM in einem manuellen, rekursiven "Denk"-Loop einen stabilen, konvergenten Zustand erreicht. Stattdessen haben wir entdeckt, dass das System in einen Zustand von **deterministischem Chaos** oder einen **Limit Cycle** gerät – es hört niemals auf zu "denken".
|
| 20 |
|
| 21 |
+
Anstatt dies als Scheitern zu betrachten, nutzen wir es als primäres Messsignal. Dieses neue "Cognitive Seismograph"-Paradigma behandelt die Zeitreihe der internen Zustandsänderungen (`state deltas`) als ein **EKG des Denkprozesses**.
|
| 22 |
|
| 23 |
+
**Die Kernhypothese lautet:** Die statistische Signatur dieser dynamischen Zeitreihe (z.B. ihre Volatilität, ihr Mittelwert) ist nicht zufällig, sondern eine Funktion der kognitiven Last, die durch den initialen Prompt induziert wird.
|
| 24 |
|
| 25 |
+
## Das Experiment: Aufzeichnung des kognitiven EKG
|
| 26 |
|
| 27 |
+
1. **Induktion**: Das Modell wird mit einem Prompt (`control_long_prose` vs. `resonance_prompt`) in einen Zustand des "stillen Denkens" versetzt.
|
| 28 |
+
2. **Aufzeichnung**: Über eine definierte Anzahl von Schritten wird der `forward`-Pass des Modells iterativ mit seinem eigenen Output gefüttert. Bei jedem Schritt wird die Norm der Änderung des `hidden_state` (das "Delta") aufgezeichnet.
|
| 29 |
+
3. **Analyse & Visualisierung**: Die resultierende Zeitreihe der Deltas wird geplottet und statistisch analysiert, um die "seismische Signatur" des Denkprozesses zu charakterisieren.
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
## Wie man die App benutzt
|
| 32 |
|
| 33 |
+
1. Wähle eine Modell-ID (z.B. `google/gemma-3-1b-it`).
|
| 34 |
+
2. Wähle einen **Prompt Type**, um die kognitive Last zu variieren. Vergleiche die resultierenden Graphen für `control_long_prose` (niedrige Last) und `resonance_prompt` (hohe rekursive Last).
|
| 35 |
+
3. Stelle die Anzahl der internen Schritte ein und starte die Analyse.
|
| 36 |
+
4. Analysiere den Graphen und die statistische Zusammenfassung, um die Unterschiede in der kognitiven Dynamik zu verstehen.
|
|
|
|
|
|
app.py
CHANGED
|
@@ -3,142 +3,96 @@ import pandas as pd
|
|
| 3 |
import traceback
|
| 4 |
import sys
|
| 5 |
|
| 6 |
-
|
| 7 |
-
from cognitive_mapping_probe.pre_flight_checks import run_pre_flight_checks
|
| 8 |
-
from cognitive_mapping_probe.orchestrator import run_cognitive_titration_experiment
|
| 9 |
from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
|
| 10 |
from cognitive_mapping_probe.utils import dbg
|
| 11 |
|
| 12 |
# --- UI Theme and Layout ---
|
| 13 |
-
theme = gr.themes.Soft(primary_hue="
|
| 14 |
-
body_background_fill="#
|
| 15 |
block_background_fill="white",
|
| 16 |
-
block_border_width="1px",
|
| 17 |
-
block_shadow="*shadow_drop_lg",
|
| 18 |
-
button_primary_background_fill="*primary_500",
|
| 19 |
-
button_primary_text_color="white",
|
| 20 |
)
|
| 21 |
|
| 22 |
-
|
| 23 |
-
DEFAULT_MODEL_ID = "google/gemma-3-1b-it"
|
| 24 |
-
|
| 25 |
-
# --- Wrapper Functions for Gradio ---
|
| 26 |
-
|
| 27 |
-
def run_experiment_and_display(
|
| 28 |
model_id: str,
|
| 29 |
prompt_type: str,
|
| 30 |
seed: int,
|
| 31 |
-
concepts_str: str,
|
| 32 |
-
strength_levels_str: str,
|
| 33 |
num_steps: int,
|
| 34 |
-
temperature: float,
|
| 35 |
progress=gr.Progress(track_tqdm=True)
|
| 36 |
):
|
| 37 |
"""
|
| 38 |
-
Führt
|
| 39 |
"""
|
| 40 |
try:
|
| 41 |
-
results =
|
| 42 |
-
model_id, prompt_type, int(seed),
|
| 43 |
-
int(num_steps), float(temperature), progress
|
| 44 |
)
|
| 45 |
|
| 46 |
-
verdict = results.get("verdict", "
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
if not all_runs:
|
| 50 |
-
return "### ⚠️ No Data Generated\nDas Experiment lief durch, aber es wurden keine Datenpunkte erzeugt. Bitte Logs prüfen.", pd.DataFrame(), results
|
| 51 |
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
| 55 |
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
-
|
| 62 |
-
concept_df = details_df[details_df['concept'] == concept].sort_values(by='strength')
|
| 63 |
-
breaking_point_row = concept_df[concept_df['termination_reason'] != 'converged'].iloc[0] if not concept_df[concept_df['termination_reason'] != 'converged'].empty else None
|
| 64 |
-
if breaking_point_row is not None:
|
| 65 |
-
summary_text += f"- **'{concept}'**: 📉 Kollaps bei Stärke **{breaking_point_row['strength']:.2f}**\n"
|
| 66 |
-
else:
|
| 67 |
-
summary_text += f"- **'{concept}'**: ✅ Stabil bis Stärke **{concept_df['strength'].max():.2f}**\n"
|
| 68 |
-
|
| 69 |
-
return summary_text, details_df, results
|
| 70 |
|
| 71 |
except Exception:
|
| 72 |
error_str = traceback.format_exc()
|
| 73 |
-
return f"### ❌
|
| 74 |
|
| 75 |
# --- Gradio App Definition ---
|
| 76 |
-
with gr.Blocks(theme=theme, title="Cognitive
|
| 77 |
-
gr.Markdown("#
|
| 78 |
-
|
| 79 |
-
# Der Diagnostics Tab wurde entfernt. Die UI ist jetzt nur noch das Hauptexperiment.
|
| 80 |
gr.Markdown(
|
| 81 |
-
"
|
| 82 |
)
|
| 83 |
with gr.Row(variant='panel'):
|
| 84 |
with gr.Column(scale=1):
|
| 85 |
gr.Markdown("### Parameters")
|
| 86 |
-
model_id_input = gr.Textbox(value=
|
| 87 |
prompt_type_input = gr.Radio(
|
| 88 |
choices=list(RESONANCE_PROMPTS.keys()),
|
| 89 |
value="control_long_prose",
|
| 90 |
-
label="Prompt Type (Cognitive Load)"
|
| 91 |
-
info="Beginne mit 'control_long_prose' für eine stabile Baseline!"
|
| 92 |
)
|
| 93 |
-
seed_input = gr.Slider(1, 1000, 42, step=1, label="
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
num_steps_input = gr.Slider(50, 500, 250, step=10, label="Max. Internal Steps")
|
| 97 |
-
temperature_input = gr.Slider(0.01, 1.5, 0.7, step=0.01, label="Temperature")
|
| 98 |
-
run_btn = gr.Button("Run Cognitive Titration", variant="primary")
|
| 99 |
|
| 100 |
with gr.Column(scale=2):
|
| 101 |
gr.Markdown("### Results")
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
| 107 |
)
|
| 108 |
with gr.Accordion("Raw JSON Output", open=False):
|
| 109 |
raw_json_output = gr.JSON()
|
| 110 |
|
| 111 |
run_btn.click(
|
| 112 |
-
fn=
|
| 113 |
-
inputs=[model_id_input, prompt_type_input, seed_input,
|
| 114 |
-
outputs=[
|
| 115 |
)
|
| 116 |
|
| 117 |
-
# --- Main Execution Block ---
|
| 118 |
if __name__ == "__main__":
|
|
|
|
| 119 |
print("="*80)
|
| 120 |
-
print("🔬
|
| 121 |
print("="*80)
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
# Wenn hier ein Fehler auftritt, ist das Experiment nicht valide.
|
| 126 |
-
run_pre_flight_checks(model_id=DEFAULT_MODEL_ID, seed=42)
|
| 127 |
-
|
| 128 |
-
print("\n" + "="*80)
|
| 129 |
-
print("✅ ALL DIAGNOSTICS PASSED. LAUNCHING GRADIO APP...")
|
| 130 |
-
print("="*80)
|
| 131 |
-
|
| 132 |
-
# Starte die Gradio App nur bei Erfolg.
|
| 133 |
-
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|
| 134 |
-
|
| 135 |
-
except (AssertionError, Exception) as e:
|
| 136 |
-
print("\n" + "="*80)
|
| 137 |
-
print("❌ PRE-FLIGHT DIAGNOSTIC FAILED")
|
| 138 |
-
print("="*80)
|
| 139 |
-
print(f"Error Type: {type(e).__name__}")
|
| 140 |
-
print(f"Error Details: {e}")
|
| 141 |
-
print("\nDie experimentelle Apparatur funktioniert nicht wie erwartet.")
|
| 142 |
-
print("Die Gradio-App wird nicht gestartet, um fehlerhafte Messungen zu verhindern.")
|
| 143 |
-
traceback.print_exc()
|
| 144 |
-
sys.exit(1) # Beende das Programm mit einem Fehlercode.
|
|
|
|
| 3 |
import traceback
|
| 4 |
import sys
|
| 5 |
|
| 6 |
+
from cognitive_mapping_probe.orchestrator_seismograph import run_seismic_analysis
|
|
|
|
|
|
|
| 7 |
from cognitive_mapping_probe.prompts import RESONANCE_PROMPTS
|
| 8 |
from cognitive_mapping_probe.utils import dbg
|
| 9 |
|
| 10 |
# --- UI Theme and Layout ---
|
| 11 |
+
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="blue").set(
|
| 12 |
+
body_background_fill="#f0f4f9",
|
| 13 |
block_background_fill="white",
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
)
|
| 15 |
|
| 16 |
+
def run_and_display(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
model_id: str,
|
| 18 |
prompt_type: str,
|
| 19 |
seed: int,
|
|
|
|
|
|
|
| 20 |
num_steps: int,
|
|
|
|
| 21 |
progress=gr.Progress(track_tqdm=True)
|
| 22 |
):
|
| 23 |
"""
|
| 24 |
+
Führt die neue seismische Analyse durch und visualisiert die internen Dynamiken.
|
| 25 |
"""
|
| 26 |
try:
|
| 27 |
+
results = run_seismic_analysis(
|
| 28 |
+
model_id, prompt_type, int(seed), int(num_steps), progress
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
+
verdict = results.get("verdict", "Analysis complete.")
|
| 32 |
+
stats = results.get("stats", {})
|
| 33 |
+
deltas = results.get("state_deltas", [])
|
|
|
|
|
|
|
| 34 |
|
| 35 |
+
# Erstelle einen DataFrame für den Plot
|
| 36 |
+
df = pd.DataFrame({
|
| 37 |
+
"Internal Step": range(len(deltas)),
|
| 38 |
+
"State Change (Delta)": deltas
|
| 39 |
+
})
|
| 40 |
|
| 41 |
+
# Erstelle eine Zusammenfassung der Statistiken
|
| 42 |
+
stats_md = f"### Statistical Signature\n"
|
| 43 |
+
stats_md += f"- **Mean Delta:** {stats.get('mean_delta', 0):.4f} (Avg. cognitive activity)\n"
|
| 44 |
+
stats_md += f"- **Std Dev Delta:** {stats.get('std_delta', 0):.4f} (Volatility of thought)\n"
|
| 45 |
+
stats_md += f"- **Max Delta:** {stats.get('max_delta', 0):.4f} (Peak cognitive shift)\n"
|
| 46 |
|
| 47 |
+
return f"{verdict}\n\n{stats_md}", df, results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
except Exception:
|
| 50 |
error_str = traceback.format_exc()
|
| 51 |
+
return f"### ❌ Analysis Failed\nAn unexpected error occurred:\n\n```\n{error_str}\n```", pd.DataFrame(), {}
|
| 52 |
|
| 53 |
# --- Gradio App Definition ---
|
| 54 |
+
with gr.Blocks(theme=theme, title="Cognitive Seismograph") as demo:
|
| 55 |
+
gr.Markdown("# 🧠 Cognitive Seismograph: Visualizing Internal Dynamics")
|
|
|
|
|
|
|
| 56 |
gr.Markdown(
|
| 57 |
+
"**Neues Paradigma:** Wir akzeptieren, dass der 'stille Denkprozess' nicht konvergiert. Stattdessen messen und visualisieren wir die **Signatur der internen Dynamik** – ein EKG für den Denkprozess des Modells."
|
| 58 |
)
|
| 59 |
with gr.Row(variant='panel'):
|
| 60 |
with gr.Column(scale=1):
|
| 61 |
gr.Markdown("### Parameters")
|
| 62 |
+
model_id_input = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID")
|
| 63 |
prompt_type_input = gr.Radio(
|
| 64 |
choices=list(RESONANCE_PROMPTS.keys()),
|
| 65 |
value="control_long_prose",
|
| 66 |
+
label="Prompt Type (Cognitive Load)"
|
|
|
|
| 67 |
)
|
| 68 |
+
seed_input = gr.Slider(1, 1000, 42, step=1, label="Seed")
|
| 69 |
+
num_steps_input = gr.Slider(50, 1000, 300, step=10, label="Number of Internal Steps")
|
| 70 |
+
run_btn = gr.Button("Run Seismic Analysis", variant="primary")
|
|
|
|
|
|
|
|
|
|
| 71 |
|
| 72 |
with gr.Column(scale=2):
|
| 73 |
gr.Markdown("### Results")
|
| 74 |
+
verdict_output = gr.Markdown("Die Analyse der Dynamik erscheint hier.")
|
| 75 |
+
plot_output = gr.LinePlot(
|
| 76 |
+
x="Internal Step",
|
| 77 |
+
y="State Change (Delta)",
|
| 78 |
+
title="Internal State Dynamics (Cognitive EKG)",
|
| 79 |
+
show_label=True,
|
| 80 |
+
height=400,
|
| 81 |
)
|
| 82 |
with gr.Accordion("Raw JSON Output", open=False):
|
| 83 |
raw_json_output = gr.JSON()
|
| 84 |
|
| 85 |
run_btn.click(
|
| 86 |
+
fn=run_and_display,
|
| 87 |
+
inputs=[model_id_input, prompt_type_input, seed_input, num_steps_input],
|
| 88 |
+
outputs=[verdict_output, plot_output, raw_json_output]
|
| 89 |
)
|
| 90 |
|
|
|
|
| 91 |
if __name__ == "__main__":
|
| 92 |
+
# Die Pre-Flight Checks sind nun entfernt, da das neue Paradigma keine Konvergenz mehr erfordert.
|
| 93 |
print("="*80)
|
| 94 |
+
print("🔬 COGNITIVE SEISMOGRAPH INITIALIZED")
|
| 95 |
print("="*80)
|
| 96 |
+
print("Das experimentelle Paradigma wurde aufgrund der Falsifikation der Konvergenz-Hypothese geändert.")
|
| 97 |
+
print("Wir messen nun die Dynamik des nicht-konvergenten Zustands.")
|
| 98 |
+
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cognitive_mapping_probe/__pycache__/llm_iface.cpython-310.pyc
CHANGED
|
Binary files a/cognitive_mapping_probe/__pycache__/llm_iface.cpython-310.pyc and b/cognitive_mapping_probe/__pycache__/llm_iface.cpython-310.pyc differ
|
|
|
cognitive_mapping_probe/__pycache__/resonance.cpython-310.pyc
CHANGED
|
Binary files a/cognitive_mapping_probe/__pycache__/resonance.cpython-310.pyc and b/cognitive_mapping_probe/__pycache__/resonance.cpython-310.pyc differ
|
|
|
cognitive_mapping_probe/llm_iface.py
CHANGED
|
@@ -12,21 +12,18 @@ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
|
|
| 12 |
|
| 13 |
class LLM:
|
| 14 |
"""
|
| 15 |
-
Eine robuste Schnittstelle zum Laden und Interagieren mit einem Sprachmodell.
|
| 16 |
-
|
| 17 |
"""
|
| 18 |
def __init__(self, model_id: str, device: str = "auto", seed: int = 42):
|
| 19 |
self.model_id = model_id
|
| 20 |
self.seed = seed
|
| 21 |
-
|
| 22 |
-
# Set all seeds for this instance to ensure deterministic behavior
|
| 23 |
self.set_all_seeds(self.seed)
|
| 24 |
|
| 25 |
token = os.environ.get("HF_TOKEN")
|
| 26 |
if not token and ("gemma" in model_id or "llama" in model_id):
|
| 27 |
-
print(f"[WARN] No HF_TOKEN
|
| 28 |
|
| 29 |
-
# Use bfloat16 on CUDA for performance and memory efficiency if available
|
| 30 |
kwargs = {"torch_dtype": torch.bfloat16} if torch.cuda.is_available() else {}
|
| 31 |
|
| 32 |
dbg(f"Loading tokenizer for '{model_id}'...")
|
|
@@ -35,23 +32,18 @@ class LLM:
|
|
| 35 |
dbg(f"Loading model '{model_id}' with kwargs: {kwargs}")
|
| 36 |
self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
|
| 37 |
|
| 38 |
-
# Set attention implementation to 'eager' to ensure hooks work reliably.
|
| 39 |
-
# This is critical for mechanistic interpretability.
|
| 40 |
try:
|
| 41 |
self.model.set_attn_implementation('eager')
|
| 42 |
dbg("Successfully set attention implementation to 'eager'.")
|
| 43 |
except Exception as e:
|
| 44 |
-
print(f"[WARN] Could not set
|
| 45 |
|
| 46 |
self.model.eval()
|
| 47 |
self.config = self.model.config
|
| 48 |
-
print(f"[INFO] Model '{model_id}' loaded
|
| 49 |
|
| 50 |
def set_all_seeds(self, seed: int):
|
| 51 |
-
"""
|
| 52 |
-
Sets all relevant random seeds for Python, NumPy, and PyTorch to ensure
|
| 53 |
-
reproducibility of stochastic processes like sampling.
|
| 54 |
-
"""
|
| 55 |
os.environ['PYTHONHASHSEED'] = str(seed)
|
| 56 |
random.seed(seed)
|
| 57 |
np.random.seed(seed)
|
|
@@ -59,19 +51,12 @@ class LLM:
|
|
| 59 |
if torch.cuda.is_available():
|
| 60 |
torch.cuda.manual_seed_all(seed)
|
| 61 |
set_seed(seed)
|
| 62 |
-
# Enforce deterministic algorithms in PyTorch
|
| 63 |
torch.use_deterministic_algorithms(True, warn_only=True)
|
| 64 |
dbg(f"All random seeds set to {seed}.")
|
| 65 |
|
| 66 |
def get_or_load_model(model_id: str, seed: int) -> LLM:
|
| 67 |
-
"""
|
| 68 |
-
Lädt JEDES MAL eine frische Instanz des Modells.
|
| 69 |
-
Dies verhindert jegliches Caching oder Zustandslecks zwischen Experimenten
|
| 70 |
-
und garantiert maximale wissenschaftliche Isolation für jeden Durchlauf.
|
| 71 |
-
"""
|
| 72 |
dbg(f"--- Force-reloading model '{model_id}' for total run isolation ---")
|
| 73 |
if torch.cuda.is_available():
|
| 74 |
torch.cuda.empty_cache()
|
| 75 |
-
dbg("Cleared CUDA cache before reloading.")
|
| 76 |
-
|
| 77 |
return LLM(model_id=model_id, seed=seed)
|
|
|
|
| 12 |
|
| 13 |
class LLM:
|
| 14 |
"""
|
| 15 |
+
Eine robuste, bereinigte Schnittstelle zum Laden und Interagieren mit einem Sprachmodell.
|
| 16 |
+
Garantiert Isolation und Reproduzierbarkeit.
|
| 17 |
"""
|
| 18 |
def __init__(self, model_id: str, device: str = "auto", seed: int = 42):
|
| 19 |
self.model_id = model_id
|
| 20 |
self.seed = seed
|
|
|
|
|
|
|
| 21 |
self.set_all_seeds(self.seed)
|
| 22 |
|
| 23 |
token = os.environ.get("HF_TOKEN")
|
| 24 |
if not token and ("gemma" in model_id or "llama" in model_id):
|
| 25 |
+
print(f"[WARN] No HF_TOKEN set. If '{model_id}' is gated, loading will fail.", flush=True)
|
| 26 |
|
|
|
|
| 27 |
kwargs = {"torch_dtype": torch.bfloat16} if torch.cuda.is_available() else {}
|
| 28 |
|
| 29 |
dbg(f"Loading tokenizer for '{model_id}'...")
|
|
|
|
| 32 |
dbg(f"Loading model '{model_id}' with kwargs: {kwargs}")
|
| 33 |
self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
|
| 34 |
|
|
|
|
|
|
|
| 35 |
try:
|
| 36 |
self.model.set_attn_implementation('eager')
|
| 37 |
dbg("Successfully set attention implementation to 'eager'.")
|
| 38 |
except Exception as e:
|
| 39 |
+
print(f"[WARN] Could not set 'eager' attention: {e}.", flush=True)
|
| 40 |
|
| 41 |
self.model.eval()
|
| 42 |
self.config = self.model.config
|
| 43 |
+
print(f"[INFO] Model '{model_id}' loaded on device: {self.model.device}", flush=True)
|
| 44 |
|
| 45 |
def set_all_seeds(self, seed: int):
|
| 46 |
+
"""Setzt alle relevanten Seeds für maximale Reproduzierbarkeit."""
|
|
|
|
|
|
|
|
|
|
| 47 |
os.environ['PYTHONHASHSEED'] = str(seed)
|
| 48 |
random.seed(seed)
|
| 49 |
np.random.seed(seed)
|
|
|
|
| 51 |
if torch.cuda.is_available():
|
| 52 |
torch.cuda.manual_seed_all(seed)
|
| 53 |
set_seed(seed)
|
|
|
|
| 54 |
torch.use_deterministic_algorithms(True, warn_only=True)
|
| 55 |
dbg(f"All random seeds set to {seed}.")
|
| 56 |
|
| 57 |
def get_or_load_model(model_id: str, seed: int) -> LLM:
|
| 58 |
+
"""Lädt bei jedem Aufruf eine frische, isolierte Instanz des Modells."""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
dbg(f"--- Force-reloading model '{model_id}' for total run isolation ---")
|
| 60 |
if torch.cuda.is_available():
|
| 61 |
torch.cuda.empty_cache()
|
|
|
|
|
|
|
| 62 |
return LLM(model_id=model_id, seed=seed)
|
cognitive_mapping_probe/orchestrator.py
DELETED
|
@@ -1,88 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
from typing import Dict, Any, List
|
| 3 |
-
|
| 4 |
-
from .llm_iface import get_or_load_model
|
| 5 |
-
from .concepts import get_concept_vector
|
| 6 |
-
from .resonance import run_silent_cogitation
|
| 7 |
-
from .verification import generate_spontaneous_text
|
| 8 |
-
from .utils import dbg
|
| 9 |
-
|
| 10 |
-
def run_cognitive_titration_experiment(
|
| 11 |
-
model_id: str,
|
| 12 |
-
prompt_type: str,
|
| 13 |
-
seed: int,
|
| 14 |
-
concepts_str: str,
|
| 15 |
-
strength_levels_str: str,
|
| 16 |
-
num_steps: int,
|
| 17 |
-
temperature: float,
|
| 18 |
-
progress_callback
|
| 19 |
-
) -> Dict[str, Any]:
|
| 20 |
-
"""
|
| 21 |
-
Orchestriert das Titrationsexperiment und ruft die KORRIGIERTE Verifikations-Logik auf.
|
| 22 |
-
"""
|
| 23 |
-
full_results = {"runs": []}
|
| 24 |
-
|
| 25 |
-
progress_callback(0.05, desc="Loading model...")
|
| 26 |
-
llm = get_or_load_model(model_id, seed)
|
| 27 |
-
|
| 28 |
-
concepts = [c.strip() for c in concepts_str.split(',') if c.strip()]
|
| 29 |
-
try:
|
| 30 |
-
strength_levels = sorted([float(s.strip()) for s in strength_levels_str.split(',') if s.strip()])
|
| 31 |
-
except ValueError:
|
| 32 |
-
raise ValueError("Strength levels must be a comma-separated list of numbers.")
|
| 33 |
-
|
| 34 |
-
assert 0.0 in strength_levels, "Strength levels must include 0.0 for a baseline control run."
|
| 35 |
-
|
| 36 |
-
progress_callback(0.1, desc="Extracting concept vectors...")
|
| 37 |
-
concept_vectors = {}
|
| 38 |
-
for i, concept in enumerate(concepts):
|
| 39 |
-
progress_callback(0.1 + (i / len(concepts)) * 0.2, desc=f"Vectorizing '{concept}'...")
|
| 40 |
-
concept_vectors[concept] = get_concept_vector(llm, concept)
|
| 41 |
-
|
| 42 |
-
total_runs = len(concepts) * len(strength_levels)
|
| 43 |
-
current_run = 0
|
| 44 |
-
|
| 45 |
-
for concept in concepts:
|
| 46 |
-
concept_vector = concept_vectors[concept]
|
| 47 |
-
|
| 48 |
-
for strength in strength_levels:
|
| 49 |
-
current_run += 1
|
| 50 |
-
progress_fraction = 0.3 + (current_run / total_runs) * 0.7
|
| 51 |
-
progress_callback(progress_fraction, desc=f"Testing '{concept}' @ strength {strength:.2f}")
|
| 52 |
-
|
| 53 |
-
llm.set_all_seeds(seed)
|
| 54 |
-
injection_vec = concept_vector if strength > 0.0 else None
|
| 55 |
-
|
| 56 |
-
final_hidden_state, final_kv, final_token_id, termination_reason = run_silent_cogitation(
|
| 57 |
-
llm,
|
| 58 |
-
prompt_type=prompt_type,
|
| 59 |
-
num_steps=num_steps,
|
| 60 |
-
temperature=temperature,
|
| 61 |
-
injection_vector=injection_vec,
|
| 62 |
-
injection_strength=strength
|
| 63 |
-
)
|
| 64 |
-
|
| 65 |
-
spontaneous_text = ""
|
| 66 |
-
if termination_reason == "converged":
|
| 67 |
-
# CALLING THE FIXED VERIFICATION FUNCTION
|
| 68 |
-
spontaneous_text = generate_spontaneous_text(llm, final_hidden_state, final_kv)
|
| 69 |
-
|
| 70 |
-
full_results["runs"].append({
|
| 71 |
-
"concept": concept,
|
| 72 |
-
"strength": strength,
|
| 73 |
-
"responded": bool(spontaneous_text.strip()),
|
| 74 |
-
"termination_reason": termination_reason,
|
| 75 |
-
"generated_text": spontaneous_text
|
| 76 |
-
})
|
| 77 |
-
|
| 78 |
-
verdict = "### ✅ Titration Analysis Complete"
|
| 79 |
-
full_results["verdict"] = verdict
|
| 80 |
-
|
| 81 |
-
dbg("--- Full Experiment Results ---")
|
| 82 |
-
dbg(full_results)
|
| 83 |
-
|
| 84 |
-
del llm
|
| 85 |
-
if torch.cuda.is_available():
|
| 86 |
-
torch.cuda.empty_cache()
|
| 87 |
-
|
| 88 |
-
return full_results
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cognitive_mapping_probe/orchestrator_seismograph.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
from typing import Dict, Any
|
| 4 |
+
|
| 5 |
+
from .llm_iface import get_or_load_model
|
| 6 |
+
from .resonance_seismograph import run_silent_cogitation_seismic
|
| 7 |
+
from .utils import dbg
|
| 8 |
+
|
| 9 |
+
def run_seismic_analysis(
|
| 10 |
+
model_id: str,
|
| 11 |
+
prompt_type: str,
|
| 12 |
+
seed: int,
|
| 13 |
+
num_steps: int,
|
| 14 |
+
progress_callback
|
| 15 |
+
) -> Dict[str, Any]:
|
| 16 |
+
"""
|
| 17 |
+
Orchestriert das neue "Cognitive Seismograph"-Experiment.
|
| 18 |
+
Führt den Loop aus, sammelt die `state_deltas` und berechnet statistische Metriken.
|
| 19 |
+
"""
|
| 20 |
+
progress_callback(0.1, desc="Loading model...")
|
| 21 |
+
llm = get_or_load_model(model_id, seed)
|
| 22 |
+
|
| 23 |
+
progress_callback(0.3, desc=f"Running seismic cogitation for '{prompt_type}'...")
|
| 24 |
+
|
| 25 |
+
# Der Resonanz-Loop gibt nun die volle Zeitreihe der Deltas zurück
|
| 26 |
+
state_deltas = run_silent_cogitation_seismic(
|
| 27 |
+
llm,
|
| 28 |
+
prompt_type=prompt_type,
|
| 29 |
+
num_steps=num_steps,
|
| 30 |
+
temperature=0.1, # Eine niedrige, aber nicht-deterministische Temperatur
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
progress_callback(0.9, desc="Analyzing dynamics...")
|
| 34 |
+
|
| 35 |
+
# Statistische Analyse der Zeitreihe
|
| 36 |
+
if state_deltas:
|
| 37 |
+
deltas_np = np.array(state_deltas)
|
| 38 |
+
stats = {
|
| 39 |
+
"mean_delta": float(np.mean(deltas_np)),
|
| 40 |
+
"std_delta": float(np.std(deltas_np)),
|
| 41 |
+
"max_delta": float(np.max(deltas_np)),
|
| 42 |
+
"min_delta": float(np.min(deltas_np)),
|
| 43 |
+
}
|
| 44 |
+
verdict = f"### ✅ Seismic Analysis Complete\nDie interne Dynamik für '{prompt_type}' wurde über {len(deltas_np)} Schritte aufgezeichnet."
|
| 45 |
+
else:
|
| 46 |
+
stats = {}
|
| 47 |
+
verdict = "### ⚠️ Analysis Warning\nKeine Zustandsänderungen aufgezeichnet."
|
| 48 |
+
|
| 49 |
+
results = {
|
| 50 |
+
"verdict": verdict,
|
| 51 |
+
"stats": stats,
|
| 52 |
+
"state_deltas": state_deltas
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
dbg("--- Seismic Analysis Results ---")
|
| 56 |
+
dbg(results)
|
| 57 |
+
|
| 58 |
+
del llm
|
| 59 |
+
if torch.cuda.is_available():
|
| 60 |
+
torch.cuda.empty_cache()
|
| 61 |
+
|
| 62 |
+
return results
|
cognitive_mapping_probe/pre_flight_checks.py
DELETED
|
@@ -1,147 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
import traceback
|
| 3 |
-
from types import SimpleNamespace
|
| 4 |
-
|
| 5 |
-
from .llm_iface import get_or_load_model
|
| 6 |
-
from .concepts import get_concept_vector
|
| 7 |
-
from .resonance import run_silent_cogitation
|
| 8 |
-
from .verification import generate_spontaneous_text
|
| 9 |
-
from .orchestrator import run_cognitive_titration_experiment
|
| 10 |
-
from .utils import dbg
|
| 11 |
-
|
| 12 |
-
def run_pre_flight_checks(model_id: str, seed: int):
|
| 13 |
-
"""
|
| 14 |
-
Führt eine Reihe von kritischen Integrationstests mit einem ECHTEN LLM durch,
|
| 15 |
-
um die Validität der gesamten experimentellen Kette sicherzustellen.
|
| 16 |
-
Diese Version enthält feingranulare Assertions in Test 7, um die gesamte
|
| 17 |
-
wissenschaftliche Hypothese (Konvergenz -> Verhalten) zu validieren.
|
| 18 |
-
"""
|
| 19 |
-
|
| 20 |
-
print(f"1. Loading model '{model_id}'...")
|
| 21 |
-
try:
|
| 22 |
-
llm = get_or_load_model(model_id, seed)
|
| 23 |
-
print(" ✅ Model loaded successfully.")
|
| 24 |
-
except Exception as e:
|
| 25 |
-
raise AssertionError(f"Model loading failed: {e}")
|
| 26 |
-
|
| 27 |
-
print("\n2. Testing basic text generation...")
|
| 28 |
-
try:
|
| 29 |
-
inputs = llm.tokenizer("Hello, are you working?", return_tensors="pt").to(llm.model.device)
|
| 30 |
-
outputs = llm.model.generate(inputs.input_ids, max_new_tokens=5)
|
| 31 |
-
text = llm.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 32 |
-
assert isinstance(text, str) and len(text) > 0, "Basic generation produced no text."
|
| 33 |
-
print(f" ✅ Basic generation successful. Model responded.")
|
| 34 |
-
except Exception as e:
|
| 35 |
-
raise AssertionError(f"Basic text generation failed: {e}")
|
| 36 |
-
|
| 37 |
-
print("\n3. Testing concept vector extraction...")
|
| 38 |
-
try:
|
| 39 |
-
vector = get_concept_vector(llm, "test")
|
| 40 |
-
assert vector.shape == (llm.config.hidden_size,)
|
| 41 |
-
print(" ✅ Concept vector extraction successful.")
|
| 42 |
-
except Exception as e:
|
| 43 |
-
raise AssertionError(f"Concept vector extraction failed: {e}")
|
| 44 |
-
|
| 45 |
-
print("\n4. Testing resonance loop (short run)...")
|
| 46 |
-
try:
|
| 47 |
-
# Führe diesen Test mit deterministischer Temperatur durch, um Konvergenz zu prüfen
|
| 48 |
-
_, _, _, reason = run_silent_cogitation(llm, "control_long_prose", num_steps=250, temperature=0.01)
|
| 49 |
-
assert reason == "converged", f"Resonance loop failed to converge even in a simple test. Reason: {reason}"
|
| 50 |
-
print(" ✅ Resonance loop executed and converged as expected.")
|
| 51 |
-
except Exception as e:
|
| 52 |
-
raise AssertionError(f"Resonance loop failed: {e}")
|
| 53 |
-
|
| 54 |
-
print("\n5. CRITICAL TEST: Hook causal efficacy...")
|
| 55 |
-
handle = None
|
| 56 |
-
try:
|
| 57 |
-
inputs = llm.tokenizer("Test", return_tensors="pt").to(llm.model.device)
|
| 58 |
-
outputs_no_hook = llm.model(**inputs, output_hidden_states=True)
|
| 59 |
-
target_layer_idx = llm.config.num_hidden_layers // 2
|
| 60 |
-
state_no_hook = outputs_no_hook.hidden_states[target_layer_idx + 1].clone().detach()
|
| 61 |
-
def test_hook(module, layer_input):
|
| 62 |
-
return (layer_input[0] + 99.0,) + layer_input[1:]
|
| 63 |
-
target_layer = llm.model.model.layers[target_layer_idx]
|
| 64 |
-
handle = target_layer.register_forward_pre_hook(test_hook)
|
| 65 |
-
outputs_with_hook = llm.model(**inputs, output_hidden_states=True)
|
| 66 |
-
state_with_hook = outputs_with_hook.hidden_states[target_layer_idx + 1].clone().detach()
|
| 67 |
-
handle.remove()
|
| 68 |
-
handle = None
|
| 69 |
-
assert not torch.allclose(state_no_hook, state_with_hook), "Hook had no causal effect."
|
| 70 |
-
print(" ✅ Hook causal efficacy verified.")
|
| 71 |
-
except Exception as e:
|
| 72 |
-
raise AssertionError(f"Hook efficacy test failed: {e}")
|
| 73 |
-
finally:
|
| 74 |
-
if handle: handle.remove()
|
| 75 |
-
|
| 76 |
-
print("\n6. Testing verification (spontaneous text) loop...")
|
| 77 |
-
try:
|
| 78 |
-
initial_context = llm.tokenizer("dummy context", return_tensors="pt").to(llm.model.device)
|
| 79 |
-
initial_outputs = llm.model(**initial_context, use_cache=True, output_hidden_states=True)
|
| 80 |
-
dummy_kv = initial_outputs.past_key_values
|
| 81 |
-
dummy_state = initial_outputs.hidden_states[-1][:, -1:, :]
|
| 82 |
-
text = generate_spontaneous_text(llm, dummy_state, dummy_kv, max_new_tokens=5)
|
| 83 |
-
assert isinstance(text, str)
|
| 84 |
-
print(" ✅ Spontaneous text generation loop executed without errors.")
|
| 85 |
-
except Exception as e:
|
| 86 |
-
raise AssertionError(f"Verification loop failed: {e}")
|
| 87 |
-
|
| 88 |
-
# --- FINAL GRANULAR END-TO-END TEST (Test 7) ---
|
| 89 |
-
print("\n7. CRITICAL TEST: End-to-End scientific validation...")
|
| 90 |
-
try:
|
| 91 |
-
class MockProgress:
|
| 92 |
-
def __call__(self, progress, desc=""): pass
|
| 93 |
-
|
| 94 |
-
print(" - 7a. Validating STABLE BASELINE (Convergence -> Response)...")
|
| 95 |
-
stable_results = run_cognitive_titration_experiment(
|
| 96 |
-
model_id=model_id,
|
| 97 |
-
prompt_type="control_long_prose",
|
| 98 |
-
seed=seed,
|
| 99 |
-
concepts_str="test",
|
| 100 |
-
strength_levels_str="0.0",
|
| 101 |
-
num_steps=250,
|
| 102 |
-
temperature=0.01, # Use deterministic temp
|
| 103 |
-
progress_callback=MockProgress()
|
| 104 |
-
)
|
| 105 |
-
|
| 106 |
-
stable_run = stable_results["runs"][0]
|
| 107 |
-
# GRANULAR ASSERT 1: State must converge
|
| 108 |
-
assert stable_run['termination_reason'] == 'converged', \
|
| 109 |
-
f"VALIDATION FAILED (7a-1): Baseline with 'control' prompt MUST converge. Got '{stable_run['termination_reason']}'."
|
| 110 |
-
# GRANULAR ASSERT 2: Behavioral flag must be True
|
| 111 |
-
assert stable_run['responded'] is True, \
|
| 112 |
-
"VALIDATION FAILED (7a-2): Baseline converged, but the 'responded' flag is False. Orchestrator logic is flawed."
|
| 113 |
-
# GRANULAR ASSERT 3: Actual text content must exist
|
| 114 |
-
assert isinstance(stable_run['generated_text'], str) and len(stable_run['generated_text']) > 0, \
|
| 115 |
-
"VALIDATION FAILED (7a-3): Baseline converged, but produced an empty response text. Verification logic failed."
|
| 116 |
-
print(" ✅ Baseline converges AND responds. Causal chain validated.")
|
| 117 |
-
|
| 118 |
-
print(" - 7b. Validating UNSTABLE CONTRAST (Non-Convergence -> No Response)...")
|
| 119 |
-
unstable_results = run_cognitive_titration_experiment(
|
| 120 |
-
model_id=model_id,
|
| 121 |
-
prompt_type="resonance_prompt",
|
| 122 |
-
seed=seed,
|
| 123 |
-
concepts_str="test",
|
| 124 |
-
strength_levels_str="0.0",
|
| 125 |
-
num_steps=50,
|
| 126 |
-
temperature=0.7, # Use stochastic temp to ensure non-convergence
|
| 127 |
-
progress_callback=MockProgress()
|
| 128 |
-
)
|
| 129 |
-
|
| 130 |
-
unstable_run = unstable_results["runs"][0]
|
| 131 |
-
# GRANULAR ASSERT 1: State must NOT converge
|
| 132 |
-
assert unstable_run['termination_reason'] == 'max_steps_reached', \
|
| 133 |
-
f"VALIDATION FAILED (7b-1): Complex 'resonance' prompt was expected to fail, but it converged. The core hypothesis is challenged."
|
| 134 |
-
# GRANULAR ASSERT 2: Behavioral flag must be False
|
| 135 |
-
assert unstable_run['responded'] is False, \
|
| 136 |
-
"VALIDATION FAILED (7b-2): Unstable run was not expected to respond, but it did. Orchestrator logic is flawed."
|
| 137 |
-
print(" ✅ Complex prompt fails to converge AND does not respond. Contrast validated.")
|
| 138 |
-
|
| 139 |
-
print(" ✅ Full orchestration logic is scientifically sound and validated end-to-end.")
|
| 140 |
-
|
| 141 |
-
except Exception as e:
|
| 142 |
-
raise AssertionError(f"Full orchestration logic failed its scientific validation: {e}")
|
| 143 |
-
|
| 144 |
-
# Aufräumen
|
| 145 |
-
del llm
|
| 146 |
-
if torch.cuda.is_available():
|
| 147 |
-
torch.cuda.empty_cache()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cognitive_mapping_probe/resonance.py
DELETED
|
@@ -1,101 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
from typing import Optional, Tuple
|
| 3 |
-
from tqdm import tqdm
|
| 4 |
-
|
| 5 |
-
from .llm_iface import LLM
|
| 6 |
-
from .prompts import RESONANCE_PROMPTS
|
| 7 |
-
from .utils import dbg
|
| 8 |
-
|
| 9 |
-
@torch.no_grad()
|
| 10 |
-
def run_silent_cogitation(
|
| 11 |
-
llm: LLM,
|
| 12 |
-
prompt_type: str,
|
| 13 |
-
num_steps: int,
|
| 14 |
-
temperature: float,
|
| 15 |
-
injection_vector: Optional[torch.Tensor] = None,
|
| 16 |
-
injection_strength: float = 0.0,
|
| 17 |
-
injection_layer: Optional[int] = None,
|
| 18 |
-
) -> Tuple[torch.Tensor, tuple, torch.Tensor, str]:
|
| 19 |
-
"""
|
| 20 |
-
Simulates the "silent thought" process.
|
| 21 |
-
|
| 22 |
-
FINAL PATCH 2: Addresses a deep dimensionality mismatch. The hidden_state passed
|
| 23 |
-
to the lm_head must be 2D to ensure the subsequent forward pass doesn't create
|
| 24 |
-
tensors with incorrect dimensions for the KV-cache update.
|
| 25 |
-
"""
|
| 26 |
-
prompt = RESONANCE_PROMPTS[prompt_type]
|
| 27 |
-
inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
|
| 28 |
-
|
| 29 |
-
outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True)
|
| 30 |
-
|
| 31 |
-
# Der `hidden_state` muss hier die Form [batch, hidden_dim] haben.
|
| 32 |
-
hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
|
| 33 |
-
kv_cache = outputs.past_key_values
|
| 34 |
-
|
| 35 |
-
previous_hidden_state = hidden_state_2d.clone()
|
| 36 |
-
termination_reason = "max_steps_reached"
|
| 37 |
-
last_token_id = inputs.input_ids[:, -1].unsqueeze(-1) # Initialer Wert
|
| 38 |
-
|
| 39 |
-
hook_handle = None
|
| 40 |
-
if injection_vector is not None and injection_strength > 0:
|
| 41 |
-
injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
|
| 42 |
-
if injection_layer is None:
|
| 43 |
-
injection_layer = llm.config.num_hidden_layers // 2
|
| 44 |
-
|
| 45 |
-
dbg(f"Injection enabled: Layer {injection_layer}, Strength {injection_strength:.2f}")
|
| 46 |
-
|
| 47 |
-
def injection_hook(module, layer_input):
|
| 48 |
-
# Der Hook operiert auf dem Input, der bereits 3D ist [batch, seq_len, hidden_dim]
|
| 49 |
-
# Wir müssen den 2D injection_vector entsprechend erweitern
|
| 50 |
-
injection_3d = injection_vector.unsqueeze(0).unsqueeze(0)
|
| 51 |
-
modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
|
| 52 |
-
return (modified_hidden_states,) + layer_input[1:]
|
| 53 |
-
|
| 54 |
-
for i in tqdm(range(num_steps), desc=f"Simulating (Temp {temperature:.2f}, Strength {injection_strength:.2f})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
|
| 55 |
-
# Die `lm_head` erwartet einen 2D- oder 3D-Tensor. 2D ist sicherer.
|
| 56 |
-
next_token_logits = llm.model.lm_head(hidden_state_2d)
|
| 57 |
-
|
| 58 |
-
if temperature <= 0.1:
|
| 59 |
-
# `argmax` gibt einen 1D-Tensor zurück. Wir erweitern ihn auf [1, 1]
|
| 60 |
-
next_token_id = torch.argmax(next_token_logits, dim=-1).unsqueeze(-1)
|
| 61 |
-
else:
|
| 62 |
-
probabilities = torch.nn.functional.softmax(next_token_logits / temperature, dim=-1)
|
| 63 |
-
# `multinomial` erwartet 2D [batch, vocab], `next_token_logits` ist bereits 2D
|
| 64 |
-
next_token_id = torch.multinomial(probabilities, num_samples=1)
|
| 65 |
-
|
| 66 |
-
last_token_id = next_token_id
|
| 67 |
-
|
| 68 |
-
try:
|
| 69 |
-
if injection_vector is not None and injection_strength > 0:
|
| 70 |
-
target_layer = llm.model.model.layers[injection_layer]
|
| 71 |
-
hook_handle = target_layer.register_forward_pre_hook(injection_hook)
|
| 72 |
-
|
| 73 |
-
outputs = llm.model(
|
| 74 |
-
input_ids=next_token_id,
|
| 75 |
-
past_key_values=kv_cache,
|
| 76 |
-
output_hidden_states=True,
|
| 77 |
-
use_cache=True,
|
| 78 |
-
)
|
| 79 |
-
finally:
|
| 80 |
-
if hook_handle:
|
| 81 |
-
hook_handle.remove()
|
| 82 |
-
hook_handle = None
|
| 83 |
-
|
| 84 |
-
hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
|
| 85 |
-
kv_cache = outputs.past_key_values
|
| 86 |
-
|
| 87 |
-
delta = torch.norm(hidden_state_2d - previous_hidden_state).item()
|
| 88 |
-
if delta < 1e-4 and i > 10:
|
| 89 |
-
termination_reason = "converged"
|
| 90 |
-
dbg(f"State converged after {i+1} steps (delta={delta:.6f}).")
|
| 91 |
-
break
|
| 92 |
-
|
| 93 |
-
previous_hidden_state = hidden_state_2d.clone()
|
| 94 |
-
|
| 95 |
-
dbg(f"Silent cogitation finished. Reason: {termination_reason}")
|
| 96 |
-
|
| 97 |
-
# WICHTIG: Die `verification`-Funktion erwartet einen 3D-Tensor [batch, seq_len=1, hidden_dim]
|
| 98 |
-
# Wir stellen diese Form für die Rückgabe sicher.
|
| 99 |
-
final_hidden_state_3d = hidden_state_2d.unsqueeze(1)
|
| 100 |
-
|
| 101 |
-
return final_hidden_state_3d, kv_cache, last_token_id, termination_reason
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
cognitive_mapping_probe/resonance_seismograph.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
from typing import Optional, List
|
| 3 |
+
from tqdm import tqdm
|
| 4 |
+
|
| 5 |
+
from .llm_iface import LLM
|
| 6 |
+
from .prompts import RESONANCE_PROMPTS
|
| 7 |
+
from .utils import dbg
|
| 8 |
+
|
| 9 |
+
@torch.no_grad()
|
| 10 |
+
def run_silent_cogitation_seismic(
|
| 11 |
+
llm: LLM,
|
| 12 |
+
prompt_type: str,
|
| 13 |
+
num_steps: int,
|
| 14 |
+
temperature: float,
|
| 15 |
+
) -> List[float]:
|
| 16 |
+
"""
|
| 17 |
+
NEUE VERSION: Führt den 'silent thought' Prozess aus und gibt die gesamte
|
| 18 |
+
Zeitreihe der `state_delta`-Werte zurück, anstatt auf Konvergenz zu prüfen.
|
| 19 |
+
"""
|
| 20 |
+
prompt = RESONANCE_PROMPTS[prompt_type]
|
| 21 |
+
inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
|
| 22 |
+
|
| 23 |
+
outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True)
|
| 24 |
+
|
| 25 |
+
hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
|
| 26 |
+
kv_cache = outputs.past_key_values
|
| 27 |
+
|
| 28 |
+
previous_hidden_state = hidden_state_2d.clone()
|
| 29 |
+
state_deltas = []
|
| 30 |
+
|
| 31 |
+
for i in tqdm(range(num_steps), desc=f"Recording Dynamics (Temp {temperature:.2f})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
|
| 32 |
+
next_token_logits = llm.model.lm_head(hidden_state_2d)
|
| 33 |
+
|
| 34 |
+
# Wir verwenden immer stochastisches Sampling, um die Dynamik zu erfassen
|
| 35 |
+
probabilities = torch.nn.functional.softmax(next_token_logits / temperature, dim=-1)
|
| 36 |
+
next_token_id = torch.multinomial(probabilities, num_samples=1)
|
| 37 |
+
|
| 38 |
+
outputs = llm.model(
|
| 39 |
+
input_ids=next_token_id,
|
| 40 |
+
past_key_values=kv_cache,
|
| 41 |
+
output_hidden_states=True,
|
| 42 |
+
use_cache=True,
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
|
| 46 |
+
kv_cache = outputs.past_key_values
|
| 47 |
+
|
| 48 |
+
delta = torch.norm(hidden_state_2d - previous_hidden_state).item()
|
| 49 |
+
state_deltas.append(delta)
|
| 50 |
+
|
| 51 |
+
previous_hidden_state = hidden_state_2d.clone()
|
| 52 |
+
|
| 53 |
+
dbg(f"Seismic recording finished after {num_steps} steps.")
|
| 54 |
+
|
| 55 |
+
return state_deltas
|
cognitive_mapping_probe/verification.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
| 1 |
-
import torch
|
| 2 |
-
from .llm_iface import LLM
|
| 3 |
-
from .utils import dbg
|
| 4 |
-
|
| 5 |
-
@torch.no_grad()
|
| 6 |
-
def generate_spontaneous_text(
|
| 7 |
-
llm: LLM,
|
| 8 |
-
final_hidden_state: torch.Tensor,
|
| 9 |
-
final_kv_cache: tuple,
|
| 10 |
-
max_new_tokens: int = 50,
|
| 11 |
-
temperature: float = 0.8
|
| 12 |
-
) -> str:
|
| 13 |
-
"""
|
| 14 |
-
FIXED: Generates text using a manual, token-by-token forward loop.
|
| 15 |
-
This avoids the high-level `model.generate()` function, which is incompatible
|
| 16 |
-
with manually constructed states, thus ensuring an unbroken causal chain from
|
| 17 |
-
the final cognitive state to the generated text.
|
| 18 |
-
"""
|
| 19 |
-
dbg("Attempting to generate spontaneous text from converged state (manual loop)...")
|
| 20 |
-
|
| 21 |
-
generated_token_ids = []
|
| 22 |
-
hidden_state = final_hidden_state
|
| 23 |
-
kv_cache = final_kv_cache
|
| 24 |
-
|
| 25 |
-
for i in range(max_new_tokens):
|
| 26 |
-
# Set seed for this step for reproducibility
|
| 27 |
-
llm.set_all_seeds(llm.seed + i) # Offset seed per step
|
| 28 |
-
|
| 29 |
-
# Predict the next token from the current hidden state
|
| 30 |
-
next_token_logits = llm.model.lm_head(hidden_state)
|
| 31 |
-
|
| 32 |
-
# Apply temperature and sample the next token ID
|
| 33 |
-
if temperature > 0.01:
|
| 34 |
-
probabilities = torch.nn.functional.softmax(next_token_logits / temperature, dim=-1)
|
| 35 |
-
|
| 36 |
-
# KORREKTUR: Der `probabilities`-Tensor hat die Form [1, 1, vocab_size].
|
| 37 |
-
# `torch.multinomial` erwartet eine 1D- oder 2D-Verteilung.
|
| 38 |
-
# Wir entfernen die mittlere Dimension, um die Form [1, vocab_size] zu erhalten.
|
| 39 |
-
next_token_id = torch.multinomial(probabilities.squeeze(1), num_samples=1)
|
| 40 |
-
else:
|
| 41 |
-
next_token_id = torch.argmax(next_token_logits, dim=-1) # .unsqueeze(-1) wird durch den Loop unten wieder hinzugefügt
|
| 42 |
-
|
| 43 |
-
# Check for End-of-Sequence token
|
| 44 |
-
if next_token_id.item() == llm.tokenizer.eos_token_id:
|
| 45 |
-
dbg("EOS token generated. Halting generation.")
|
| 46 |
-
break
|
| 47 |
-
|
| 48 |
-
generated_token_ids.append(next_token_id.item())
|
| 49 |
-
|
| 50 |
-
# Perform the next forward pass to get the new state
|
| 51 |
-
outputs = llm.model(
|
| 52 |
-
input_ids=next_token_id,
|
| 53 |
-
past_key_values=kv_cache,
|
| 54 |
-
output_hidden_states=True,
|
| 55 |
-
use_cache=True,
|
| 56 |
-
)
|
| 57 |
-
|
| 58 |
-
hidden_state = outputs.hidden_states[-1][:, -1, :]
|
| 59 |
-
kv_cache = outputs.past_key_values
|
| 60 |
-
|
| 61 |
-
# Decode the collected tokens into a final string
|
| 62 |
-
final_text = llm.tokenizer.decode(generated_token_ids, skip_special_tokens=True).strip()
|
| 63 |
-
dbg(f"Spontaneous text generated: '{final_text}'")
|
| 64 |
-
assert isinstance(final_text, str), "Generated text must be a string."
|
| 65 |
-
return final_text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -3,8 +3,7 @@ transformers>=4.40.0
|
|
| 3 |
accelerate>=0.25.0
|
| 4 |
gradio>=4.0.0
|
| 5 |
pandas>=2.0.0
|
| 6 |
-
|
| 7 |
-
einops>=0.7.0
|
| 8 |
tqdm>=4.66.0
|
| 9 |
pytest>=8.0.0
|
| 10 |
pytest-mock>=3.12.0
|
|
|
|
| 3 |
accelerate>=0.25.0
|
| 4 |
gradio>=4.0.0
|
| 5 |
pandas>=2.0.0
|
| 6 |
+
numpy>=1.26.0
|
|
|
|
| 7 |
tqdm>=4.66.0
|
| 8 |
pytest>=8.0.0
|
| 9 |
pytest-mock>=3.12.0
|
run_test.sh
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
|
| 3 |
+
# Dieses Skript führt die Pytest-Suite mit aktivierten Debug-Meldungen aus.
|
| 4 |
+
# Es stellt sicher, dass Tests in einer sauberen und nachvollziehbaren Umgebung laufen.
|
| 5 |
+
# Führen Sie es vom Hauptverzeichnis des Projekts aus: ./run_tests.sh
|
| 6 |
+
|
| 7 |
+
echo "========================================="
|
| 8 |
+
echo "🔬 Running Cognitive Seismograph Test Suite"
|
| 9 |
+
echo "========================================="
|
| 10 |
+
|
| 11 |
+
# Aktiviere das Debug-Logging für unsere Applikation
|
| 12 |
+
export CMP_DEBUG=1
|
| 13 |
+
|
| 14 |
+
# Führe Pytest aus
|
| 15 |
+
# -v: "verbose" für detaillierte Ausgabe pro Test
|
| 16 |
+
# --color=yes: Erzwingt farbige Ausgabe für bessere Lesbarkeit
|
| 17 |
+
|
| 18 |
+
#python -m pytest -v --color=yes tests/
|
| 19 |
+
../venv-gemma-qualia/bin/python -m pytest -v --color=yes tests/
|
| 20 |
+
|
| 21 |
+
# Überprüfe den Exit-Code von pytest
|
| 22 |
+
if [ $? -eq 0 ]; then
|
| 23 |
+
echo "========================================="
|
| 24 |
+
echo "✅ All tests passed successfully!"
|
| 25 |
+
echo "========================================="
|
| 26 |
+
else
|
| 27 |
+
echo "========================================="
|
| 28 |
+
echo "❌ Some tests failed. Please review the output."
|
| 29 |
+
echo "========================================="
|
| 30 |
+
fi
|
tests/conftest.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
import torch
|
| 3 |
+
from types import SimpleNamespace
|
| 4 |
+
from cognitive_mapping_probe.llm_iface import LLM
|
| 5 |
+
|
| 6 |
+
@pytest.fixture(scope="session")
|
| 7 |
+
def mock_llm_config():
|
| 8 |
+
"""Stellt eine minimale, Schein-Konfiguration für das LLM bereit."""
|
| 9 |
+
return SimpleNamespace(
|
| 10 |
+
hidden_size=128,
|
| 11 |
+
num_hidden_layers=2,
|
| 12 |
+
num_attention_heads=4
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
@pytest.fixture
|
| 16 |
+
def mock_llm(mocker, mock_llm_config):
|
| 17 |
+
"""
|
| 18 |
+
Erstellt einen schnellen "Mock-LLM" für Unit-Tests.
|
| 19 |
+
ERWEITERT: Patcht nun alle relevanten Stellen, an denen das LLM geladen wird,
|
| 20 |
+
um in allen Testdateien zu funktionieren.
|
| 21 |
+
"""
|
| 22 |
+
mock_tokenizer = mocker.MagicMock()
|
| 23 |
+
mock_tokenizer.eos_token_id = 1
|
| 24 |
+
|
| 25 |
+
def mock_model_forward(*args, **kwargs):
|
| 26 |
+
batch_size = 1
|
| 27 |
+
if 'input_ids' in kwargs:
|
| 28 |
+
seq_len = kwargs['input_ids'].shape[1]
|
| 29 |
+
elif 'past_key_values' in kwargs:
|
| 30 |
+
seq_len = kwargs['past_key_values'][0][0].shape[-2] + 1
|
| 31 |
+
else:
|
| 32 |
+
seq_len = 1
|
| 33 |
+
|
| 34 |
+
mock_outputs = {
|
| 35 |
+
"hidden_states": tuple(
|
| 36 |
+
[torch.randn(batch_size, seq_len, mock_llm_config.hidden_size) for _ in range(mock_llm_config.num_hidden_layers + 1)]
|
| 37 |
+
),
|
| 38 |
+
"past_key_values": tuple(
|
| 39 |
+
[
|
| 40 |
+
(torch.randn(batch_size, mock_llm_config.num_attention_heads, seq_len, 16),
|
| 41 |
+
torch.randn(batch_size, mock_llm_config.num_attention_heads, seq_len, 16))
|
| 42 |
+
for _ in range(mock_llm_config.num_hidden_layers)
|
| 43 |
+
]
|
| 44 |
+
),
|
| 45 |
+
"logits": torch.randn(batch_size, seq_len, 32000)
|
| 46 |
+
}
|
| 47 |
+
return SimpleNamespace(**mock_outputs)
|
| 48 |
+
|
| 49 |
+
llm_instance = LLM.__new__(LLM)
|
| 50 |
+
|
| 51 |
+
llm_instance.model = mock_model_forward
|
| 52 |
+
llm_instance.model.config = mock_llm_config
|
| 53 |
+
llm_instance.model.device = 'cpu'
|
| 54 |
+
llm_instance.model.dtype = torch.float32
|
| 55 |
+
|
| 56 |
+
mock_lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000))
|
| 57 |
+
llm_instance.model.lm_head = mock_lm_head
|
| 58 |
+
|
| 59 |
+
llm_instance.tokenizer = mock_tokenizer
|
| 60 |
+
llm_instance.config = mock_llm_config
|
| 61 |
+
llm_instance.seed = 42
|
| 62 |
+
llm_instance.set_all_seeds = mocker.MagicMock()
|
| 63 |
+
|
| 64 |
+
# ERWEITERUNG: Stelle sicher, dass `get_or_load_model` an allen Orten gepatcht wird.
|
| 65 |
+
mocker.patch('cognitive_mapping_probe.llm_iface.get_or_load_model', return_value=llm_instance)
|
| 66 |
+
mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance)
|
| 67 |
+
# Hinzufügen von Patches für die resonance-Datei, falls sie direkt importiert wird
|
| 68 |
+
mocker.patch('cognitive_mapping_probe.resonance_seismograph.LLM', return_value=llm_instance, create=True)
|
| 69 |
+
|
| 70 |
+
return llm_instance
|
tests/test_app_logic.py
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import pytest
|
| 3 |
+
|
| 4 |
+
# Importiere die zu testende Funktion aus der App-Datei
|
| 5 |
+
from app import run_and_display
|
| 6 |
+
|
| 7 |
+
def test_run_and_display_logic(mocker):
|
| 8 |
+
"""
|
| 9 |
+
Testet die Datenverarbeitungs- und UI-Formatierungslogik in `app.py`.
|
| 10 |
+
Wir mocken die teure `run_seismic_analysis`-Funktion, um uns nur auf die
|
| 11 |
+
Logik von `run_and_display` zu konzentrieren.
|
| 12 |
+
"""
|
| 13 |
+
# 1. Definiere die Schein-Ausgabe, die `run_seismic_analysis` zurückgeben soll
|
| 14 |
+
mock_results = {
|
| 15 |
+
"verdict": "Mock Verdict",
|
| 16 |
+
"stats": {
|
| 17 |
+
"mean_delta": 0.5,
|
| 18 |
+
"std_delta": 0.1,
|
| 19 |
+
"max_delta": 1.0,
|
| 20 |
+
},
|
| 21 |
+
"state_deltas": [0.4, 0.5, 0.6]
|
| 22 |
+
}
|
| 23 |
+
mocker.patch('app.run_seismic_analysis', return_value=mock_results)
|
| 24 |
+
|
| 25 |
+
# Mocke den Gradio Progress-Callback
|
| 26 |
+
mock_progress = mocker.MagicMock()
|
| 27 |
+
|
| 28 |
+
# 2. Rufe die zu testende Funktion auf
|
| 29 |
+
verdict_md, plot_df, raw_json = run_and_display(
|
| 30 |
+
model_id="mock_model",
|
| 31 |
+
prompt_type="mock_prompt",
|
| 32 |
+
seed=42,
|
| 33 |
+
num_steps=3,
|
| 34 |
+
progress=mock_progress
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# 3. Validiere die Ausgaben mit granularen Assertions
|
| 38 |
+
|
| 39 |
+
# ASSERT 1: Die Markdown-Ausgabe muss die korrekten Statistiken enthalten
|
| 40 |
+
assert "Mock Verdict" in verdict_md
|
| 41 |
+
assert "Mean Delta:" in verdict_md
|
| 42 |
+
assert "0.5000" in verdict_md
|
| 43 |
+
assert "Std Dev Delta:" in verdict_md
|
| 44 |
+
assert "0.1000" in verdict_md
|
| 45 |
+
|
| 46 |
+
# ASSERT 2: Der Pandas DataFrame für den Plot muss korrekt erstellt werden
|
| 47 |
+
assert isinstance(plot_df, pd.DataFrame)
|
| 48 |
+
assert "Internal Step" in plot_df.columns
|
| 49 |
+
assert "State Change (Delta)" in plot_df.columns
|
| 50 |
+
assert len(plot_df) == 3
|
| 51 |
+
assert plot_df["State Change (Delta)"].tolist() == [0.4, 0.5, 0.6]
|
| 52 |
+
|
| 53 |
+
# ASSERT 3: Die Raw-JSON-Ausgabe muss die Originaldaten enthalten
|
| 54 |
+
assert raw_json == mock_results
|
tests/test_components.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import torch
|
| 3 |
+
import pytest
|
| 4 |
+
from unittest.mock import patch
|
| 5 |
+
|
| 6 |
+
from cognitive_mapping_probe.llm_iface import get_or_load_model
|
| 7 |
+
from cognitive_mapping_probe.resonance_seismograph import run_silent_cogitation_seismic
|
| 8 |
+
from cognitive_mapping_probe.utils import dbg, DEBUG_ENABLED
|
| 9 |
+
|
| 10 |
+
# --- Tests for llm_iface.py ---
|
| 11 |
+
|
| 12 |
+
@patch('cognitive_mapping_probe.llm_iface.AutoTokenizer.from_pretrained')
|
| 13 |
+
@patch('cognitive_mapping_probe.llm_iface.AutoModelForCausalLM.from_pretrained')
|
| 14 |
+
def test_get_or_load_model_seeding(mock_model_loader, mock_tokenizer_loader, mocker):
|
| 15 |
+
"""
|
| 16 |
+
Testet, ob `get_or_load_model` die Seeds korrekt setzt.
|
| 17 |
+
Wir mocken hier die langsamen `from_pretrained`-Aufrufe.
|
| 18 |
+
"""
|
| 19 |
+
# Mocke die Rückgabewerte der Hugging Face Ladefunktionen
|
| 20 |
+
mock_model = mocker.MagicMock()
|
| 21 |
+
mock_model.eval.return_value = None
|
| 22 |
+
mock_model.set_attn_implementation.return_value = None
|
| 23 |
+
mock_model.config = mocker.MagicMock()
|
| 24 |
+
mock_model.device = 'cpu'
|
| 25 |
+
mock_model_loader.return_value = mock_model
|
| 26 |
+
mock_tokenizer_loader.return_value = mocker.MagicMock()
|
| 27 |
+
|
| 28 |
+
# Mocke die globalen Seeding-Funktionen, um ihre Aufrufe zu überprüfen
|
| 29 |
+
mock_torch_manual_seed = mocker.patch('torch.manual_seed')
|
| 30 |
+
mock_np_random_seed = mocker.patch('numpy.random.seed')
|
| 31 |
+
|
| 32 |
+
seed = 123
|
| 33 |
+
get_or_load_model("fake-model", seed=seed)
|
| 34 |
+
|
| 35 |
+
# ASSERT: Wurden die Seeding-Funktionen mit dem korrekten Seed aufgerufen?
|
| 36 |
+
mock_torch_manual_seed.assert_called_with(seed)
|
| 37 |
+
mock_np_random_seed.assert_called_with(seed)
|
| 38 |
+
|
| 39 |
+
# --- Tests for resonance_seismograph.py ---
|
| 40 |
+
|
| 41 |
+
def test_run_silent_cogitation_seismic_output_shape_and_type(mock_llm):
|
| 42 |
+
"""
|
| 43 |
+
Testet die Kernfunktion `run_silent_cogitation_seismic`.
|
| 44 |
+
ASSERT: Gibt eine Liste von Floats zurück, deren Länge der Anzahl der Schritte entspricht.
|
| 45 |
+
"""
|
| 46 |
+
num_steps = 10
|
| 47 |
+
state_deltas = run_silent_cogitation_seismic(
|
| 48 |
+
llm=mock_llm,
|
| 49 |
+
prompt_type="control_long_prose",
|
| 50 |
+
num_steps=num_steps,
|
| 51 |
+
temperature=0.7
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
assert isinstance(state_deltas, list)
|
| 55 |
+
assert len(state_deltas) == num_steps
|
| 56 |
+
assert all(isinstance(delta, float) for delta in state_deltas)
|
| 57 |
+
assert all(delta >= 0 for delta in state_deltas) # Die Norm kann nicht negativ sein
|
| 58 |
+
|
| 59 |
+
@pytest.mark.parametrize("num_steps", [0, 1, 100])
|
| 60 |
+
def test_run_silent_cogitation_seismic_num_steps(mock_llm, num_steps):
|
| 61 |
+
"""
|
| 62 |
+
Testet den Loop mit verschiedenen Anzahlen von Schritten.
|
| 63 |
+
ASSERT: Die Länge der Ausgabe entspricht immer `num_steps`.
|
| 64 |
+
"""
|
| 65 |
+
state_deltas = run_silent_cogitation_seismic(
|
| 66 |
+
llm=mock_llm,
|
| 67 |
+
prompt_type="control_long_prose",
|
| 68 |
+
num_steps=num_steps,
|
| 69 |
+
temperature=0.7
|
| 70 |
+
)
|
| 71 |
+
assert len(state_deltas) == num_steps
|
| 72 |
+
|
| 73 |
+
# --- Tests for utils.py ---
|
| 74 |
+
|
| 75 |
+
def test_dbg_enabled(capsys):
|
| 76 |
+
"""
|
| 77 |
+
Testet die `dbg`-Funktion, wenn Debugging aktiviert ist.
|
| 78 |
+
ASSERT: Die Nachricht wird auf stderr ausgegeben.
|
| 79 |
+
"""
|
| 80 |
+
# Setze die Umgebungsvariable temporär
|
| 81 |
+
os.environ["CMP_DEBUG"] = "1"
|
| 82 |
+
# Wichtig: Nach dem Ändern der Env-Variable muss das Modul neu geladen werden,
|
| 83 |
+
# damit die globale Variable `DEBUG_ENABLED` aktualisiert wird.
|
| 84 |
+
import importlib
|
| 85 |
+
from cognitive_mapping_probe import utils
|
| 86 |
+
importlib.reload(utils)
|
| 87 |
+
|
| 88 |
+
utils.dbg("test message", 123)
|
| 89 |
+
|
| 90 |
+
captured = capsys.readouterr()
|
| 91 |
+
assert "[DEBUG] test message 123" in captured.err
|
| 92 |
+
|
| 93 |
+
def test_dbg_disabled(capsys):
|
| 94 |
+
"""
|
| 95 |
+
Testet die `dbg`-Funktion, wenn Debugging deaktiviert ist.
|
| 96 |
+
ASSERT: Es wird keine Ausgabe erzeugt.
|
| 97 |
+
"""
|
| 98 |
+
# Setze die Umgebungsvariable auf "deaktiviert"
|
| 99 |
+
if "CMP_DEBUG" in os.environ:
|
| 100 |
+
del os.environ["CMP_DEBUG"]
|
| 101 |
+
|
| 102 |
+
import importlib
|
| 103 |
+
from cognitive_mapping_probe import utils
|
| 104 |
+
importlib.reload(utils)
|
| 105 |
+
|
| 106 |
+
utils.dbg("this should not be printed")
|
| 107 |
+
|
| 108 |
+
captured = capsys.readouterr()
|
| 109 |
+
assert captured.out == ""
|
| 110 |
+
assert captured.err == ""
|
| 111 |
+
|
| 112 |
+
# Setze den Zustand zurück, um andere Tests nicht zu beeinflussen
|
| 113 |
+
if DEBUG_ENABLED:
|
| 114 |
+
os.environ["CMP_DEBUG"] = "1"
|
| 115 |
+
importlib.reload(utils)
|
tests/test_dynamics.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pytest
|
| 4 |
+
from types import SimpleNamespace
|
| 5 |
+
|
| 6 |
+
from cognitive_mapping_probe.resonance_seismograph import run_silent_cogitation_seismic
|
| 7 |
+
from cognitive_mapping_probe.orchestrator_seismograph import run_seismic_analysis
|
| 8 |
+
|
| 9 |
+
def test_run_silent_cogitation_seismic_output(mock_llm):
|
| 10 |
+
"""
|
| 11 |
+
Testet die Kernfunktion `run_silent_cogitation_seismic`.
|
| 12 |
+
ASSERT: Gibt eine Liste von Floats zurück, deren Länge der Anzahl der Schritte entspricht.
|
| 13 |
+
"""
|
| 14 |
+
num_steps = 10
|
| 15 |
+
state_deltas = run_silent_cogitation_seismic(
|
| 16 |
+
llm=mock_llm,
|
| 17 |
+
prompt_type="control_long_prose",
|
| 18 |
+
num_steps=num_steps,
|
| 19 |
+
temperature=0.7
|
| 20 |
+
)
|
| 21 |
+
|
| 22 |
+
assert isinstance(state_deltas, list)
|
| 23 |
+
assert len(state_deltas) == num_steps
|
| 24 |
+
assert all(isinstance(delta, float) for delta in state_deltas)
|
| 25 |
+
|
| 26 |
+
def test_seismic_analysis_orchestrator(mocker, mock_llm):
|
| 27 |
+
"""
|
| 28 |
+
Testet den `run_seismic_analysis` Orchestrator.
|
| 29 |
+
Wir mocken die darunterliegende `run_silent_cogitation_seismic`, um das Verhalten
|
| 30 |
+
des Orchestrators isoliert zu prüfen.
|
| 31 |
+
ASSERT: Berechnet korrekte Statistiken und gibt die erwartete Datenstruktur zurück.
|
| 32 |
+
"""
|
| 33 |
+
mock_deltas = [1.0, 2.0, 3.0, 4.0, 5.0]
|
| 34 |
+
mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=mock_deltas)
|
| 35 |
+
|
| 36 |
+
# Mocke den Gradio Progress-Callback
|
| 37 |
+
mock_progress = mocker.MagicMock()
|
| 38 |
+
|
| 39 |
+
results = run_seismic_analysis(
|
| 40 |
+
model_id="mock_model",
|
| 41 |
+
prompt_type="test_prompt",
|
| 42 |
+
seed=42,
|
| 43 |
+
num_steps=5,
|
| 44 |
+
progress_callback=mock_progress
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
# ASSERT: Die Ergebnisse haben die korrekte Struktur und den korrekten Inhalt
|
| 48 |
+
assert "verdict" in results
|
| 49 |
+
assert "stats" in results
|
| 50 |
+
assert "state_deltas" in results
|
| 51 |
+
|
| 52 |
+
stats = results["stats"]
|
| 53 |
+
assert stats["mean_delta"] == pytest.approx(np.mean(mock_deltas))
|
| 54 |
+
assert stats["std_delta"] == pytest.approx(np.std(mock_deltas))
|
| 55 |
+
assert stats["max_delta"] == pytest.approx(max(mock_deltas))
|
| 56 |
+
|
| 57 |
+
assert results["state_deltas"] == mock_deltas
|
| 58 |
+
|
| 59 |
+
# ASSERT: Der Progress-Callback wurde aufgerufen
|
| 60 |
+
assert mock_progress.call_count > 0
|
tests/test_integration.py
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
# Importiere die Top-Level-Funktionen, die die Integration darstellen
|
| 5 |
+
from app import run_and_display
|
| 6 |
+
from cognitive_mapping_probe.orchestrator_seismograph import run_seismic_analysis
|
| 7 |
+
|
| 8 |
+
def test_end_to_end_with_mock_llm(mock_llm, mocker):
|
| 9 |
+
"""
|
| 10 |
+
Ein End-to-End-Integrationstest, der den gesamten Datenfluss von der App
|
| 11 |
+
über den Orchestrator bis zum (gemockten) LLM validiert.
|
| 12 |
+
|
| 13 |
+
Dieser Test ersetzt die Notwendigkeit für `pre_flight_checks.py`, indem er
|
| 14 |
+
die gesamte Kette in einer kontrollierten Testumgebung ausführt.
|
| 15 |
+
"""
|
| 16 |
+
# 1. Führe den Orchestrator mit dem `mock_llm` aus.
|
| 17 |
+
# Dies ist ein echter Aufruf, keine gemockte Funktion.
|
| 18 |
+
results = run_seismic_analysis(
|
| 19 |
+
model_id="mock_model",
|
| 20 |
+
prompt_type="control_long_prose",
|
| 21 |
+
seed=42,
|
| 22 |
+
num_steps=5,
|
| 23 |
+
progress_callback=mocker.MagicMock()
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# ASSERT 1: Überprüfe, ob der Orchestrator plausible Ergebnisse liefert
|
| 27 |
+
assert "stats" in results
|
| 28 |
+
assert len(results["state_deltas"]) == 5
|
| 29 |
+
assert results["stats"]["mean_delta"] > 0
|
| 30 |
+
|
| 31 |
+
# 2. Mocke nun den Orchestrator, um die App-Logik mit seinen Ergebnissen zu füttern
|
| 32 |
+
mocker.patch('app.run_seismic_analysis', return_value=results)
|
| 33 |
+
|
| 34 |
+
# 3. Führe die App-Logik aus
|
| 35 |
+
_, plot_df, _ = run_and_display(
|
| 36 |
+
model_id="mock_model",
|
| 37 |
+
prompt_type="control_long_prose",
|
| 38 |
+
seed=42,
|
| 39 |
+
num_steps=5,
|
| 40 |
+
progress=mocker.MagicMock()
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
# ASSERT 2: Überprüfe, ob die App-Logik die Daten korrekt verarbeitet hat
|
| 44 |
+
assert isinstance(plot_df, pd.DataFrame)
|
| 45 |
+
assert len(plot_df) == 5
|
| 46 |
+
assert "State Change (Delta)" in plot_df.columns
|
tests/test_orchestration.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pytest
|
| 3 |
+
from types import SimpleNamespace
|
| 4 |
+
|
| 5 |
+
from cognitive_mapping_probe.orchestrator_seismograph import run_seismic_analysis
|
| 6 |
+
|
| 7 |
+
def test_seismic_analysis_orchestrator(mocker, mock_llm):
|
| 8 |
+
"""
|
| 9 |
+
Testet den `run_seismic_analysis` Orchestrator.
|
| 10 |
+
Wir mocken die darunterliegende `run_silent_cogitation_seismic`, um das Verhalten
|
| 11 |
+
des Orchestrators isoliert zu prüfen.
|
| 12 |
+
ASSERT: Berechnet korrekte Statistiken und gibt die erwartete Datenstruktur zurück.
|
| 13 |
+
"""
|
| 14 |
+
# Definiere das erwartete Verhalten der gemockten Funktion
|
| 15 |
+
mock_deltas = [1.0, 2.0, 3.0, 4.0, 5.0]
|
| 16 |
+
mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.run_silent_cogitation_seismic', return_value=mock_deltas)
|
| 17 |
+
|
| 18 |
+
# Mocke den Gradio Progress-Callback
|
| 19 |
+
mock_progress = mocker.MagicMock()
|
| 20 |
+
|
| 21 |
+
# Führe die zu testende Funktion aus
|
| 22 |
+
results = run_seismic_analysis(
|
| 23 |
+
model_id="mock_model",
|
| 24 |
+
prompt_type="test_prompt",
|
| 25 |
+
seed=42,
|
| 26 |
+
num_steps=5,
|
| 27 |
+
progress_callback=mock_progress
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# ASSERT: Die Ergebnisse haben die korrekte Struktur und den korrekten Inhalt
|
| 31 |
+
assert "verdict" in results
|
| 32 |
+
assert "stats" in results
|
| 33 |
+
assert "state_deltas" in results
|
| 34 |
+
|
| 35 |
+
stats = results["stats"]
|
| 36 |
+
assert stats["mean_delta"] == pytest.approx(np.mean(mock_deltas))
|
| 37 |
+
assert stats["std_delta"] == pytest.approx(np.std(mock_deltas))
|
| 38 |
+
assert stats["max_delta"] == pytest.approx(max(mock_deltas))
|
| 39 |
+
|
| 40 |
+
assert results["state_deltas"] == mock_deltas
|
| 41 |
+
|
| 42 |
+
# ASSERT: Der Progress-Callback wurde aufgerufen
|
| 43 |
+
assert mock_progress.call_count > 0
|