neuralworm commited on
Commit
1cf9e80
·
1 Parent(s): 2169e97

get layers from model

Browse files
cognitive_mapping_probe/llm_iface.py CHANGED
@@ -3,8 +3,8 @@ import torch
3
  import random
4
  import numpy as np
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
6
- from typing import Optional
7
- from dataclasses import dataclass
8
 
9
  from .utils import dbg
10
 
@@ -15,10 +15,12 @@ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
15
  class StableLLMConfig:
16
  """
17
  Eine stabile, interne Abstraktionsschicht für Modell-Konfigurationen.
18
- Macht unseren Code unabhängig von den sich ändernden Attributnamen in `transformers`.
19
  """
20
  hidden_dim: int
21
  num_layers: int
 
 
22
 
23
  class LLM:
24
  """
@@ -59,7 +61,7 @@ class LLM:
59
  def _populate_stable_config(self) -> StableLLMConfig:
60
  """
61
  Liest die volatile `transformers`-Konfiguration aus und befüllt unsere stabile Datenklasse.
62
- Implementiert eine robuste, mehrstufige Fallback-Strategie und eine auto-diagnostische Fehlerbehandlung.
63
  """
64
  # --- Robuste Methode für hidden_dim ---
65
  hidden_dim = 0
@@ -68,16 +70,21 @@ class LLM:
68
  except AttributeError:
69
  hidden_dim = getattr(self.config, 'hidden_size', getattr(self.config, 'd_model', 0))
70
 
71
- # --- Robuste Methode für num_layers ---
72
  num_layers = 0
 
73
  try:
74
- # METHODE 1 (BESTE): Direkte Inspektion der Architektur basierend auf empirischer Evidenz.
75
  if hasattr(self.model, 'model') and hasattr(self.model.model, 'language_model') and hasattr(self.model.model.language_model, 'layers'):
76
- num_layers = len(self.model.model.language_model.layers)
77
  elif hasattr(self.model, 'model') and hasattr(self.model.model, 'layers'):
78
- num_layers = len(self.model.model.layers)
79
  elif hasattr(self.model, 'transformer') and hasattr(self.model.transformer, 'h'):
80
- num_layers = len(self.model.transformer.h)
 
 
 
 
81
  except (AttributeError, TypeError):
82
  pass
83
 
@@ -85,21 +92,20 @@ class LLM:
85
  # METHODE 2 (FALLBACK): Inspektion der deklarativen Config-Datei.
86
  num_layers = getattr(self.config, 'num_hidden_layers', getattr(self.config, 'num_layers', 0))
87
 
88
- # --- NEU: Automatisierte diagnostische Ausgabe bei Fehlschlag ---
89
- if hidden_dim <= 0 or num_layers <= 0:
90
  dbg("--- CRITICAL: Failed to auto-determine model configuration. ---")
91
- dbg(f"Detected hidden_dim: {hidden_dim}, num_layers: {num_layers}")
92
  dbg("--- DUMPING MODEL ARCHITECTURE FOR DEBUGGING: ---")
93
  dbg(self.model)
94
  dbg("--- END ARCHITECTURE DUMP ---")
95
 
96
- # Finale Assertions zur Sicherstellung der wissenschaftlichen Validität.
97
- # Diese werden nun eine informative Debug-Ausgabe vor dem Absturz haben.
98
- assert hidden_dim > 0, "Could not determine hidden dimension from model config. Check debug dump above."
99
- assert num_layers > 0, "Could not determine number of layers from model config. Check debug dump above."
100
 
101
  dbg(f"Populated stable config: hidden_dim={hidden_dim}, num_layers={num_layers}")
102
- return StableLLMConfig(hidden_dim=hidden_dim, num_layers=num_layers)
103
 
104
  def set_all_seeds(self, seed: int):
105
  """Setzt alle relevanten Seeds für maximale Reproduzierbarkeit."""
 
3
  import random
4
  import numpy as np
5
  from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
6
+ from typing import Optional, List
7
+ from dataclasses import dataclass, field
8
 
9
  from .utils import dbg
10
 
 
15
  class StableLLMConfig:
16
  """
17
  Eine stabile, interne Abstraktionsschicht für Modell-Konfigurationen.
18
+ Dies ist die "Single Source of Truth" für die Architektur des Modells.
19
  """
20
  hidden_dim: int
21
  num_layers: int
22
+ # FINALE KORREKTUR: Speichere einen direkten Verweis auf die Layer-Liste
23
+ layer_list: List[torch.nn.Module] = field(default_factory=list, repr=False)
24
 
25
  class LLM:
26
  """
 
61
  def _populate_stable_config(self) -> StableLLMConfig:
62
  """
63
  Liest die volatile `transformers`-Konfiguration aus und befüllt unsere stabile Datenklasse.
64
+ Ermittelt die "Ground Truth" der Architektur durch direkte Inspektion.
65
  """
66
  # --- Robuste Methode für hidden_dim ---
67
  hidden_dim = 0
 
70
  except AttributeError:
71
  hidden_dim = getattr(self.config, 'hidden_size', getattr(self.config, 'd_model', 0))
72
 
73
+ # --- FINALE KORREKTUR: Robuste Methode für num_layers und layer_list ---
74
  num_layers = 0
75
+ layer_list = []
76
  try:
77
+ # METHODE 1 (BESTE): Direkte Inspektion basierend auf empirischer Evidenz.
78
  if hasattr(self.model, 'model') and hasattr(self.model.model, 'language_model') and hasattr(self.model.model.language_model, 'layers'):
79
+ layer_list = self.model.model.language_model.layers
80
  elif hasattr(self.model, 'model') and hasattr(self.model.model, 'layers'):
81
+ layer_list = self.model.model.layers
82
  elif hasattr(self.model, 'transformer') and hasattr(self.model.transformer, 'h'):
83
+ layer_list = self.model.transformer.h
84
+
85
+ if layer_list:
86
+ num_layers = len(layer_list)
87
+
88
  except (AttributeError, TypeError):
89
  pass
90
 
 
92
  # METHODE 2 (FALLBACK): Inspektion der deklarativen Config-Datei.
93
  num_layers = getattr(self.config, 'num_hidden_layers', getattr(self.config, 'num_layers', 0))
94
 
95
+ # --- Auto-diagnostische Fehlerbehandlung ---
96
+ if hidden_dim <= 0 or num_layers <= 0 or not layer_list:
97
  dbg("--- CRITICAL: Failed to auto-determine model configuration. ---")
98
+ dbg(f"Detected hidden_dim: {hidden_dim}, num_layers: {num_layers}, found_layer_list: {bool(layer_list)}")
99
  dbg("--- DUMPING MODEL ARCHITECTURE FOR DEBUGGING: ---")
100
  dbg(self.model)
101
  dbg("--- END ARCHITECTURE DUMP ---")
102
 
103
+ assert hidden_dim > 0, "Could not determine hidden dimension. Check debug dump."
104
+ assert num_layers > 0, "Could not determine number of layers. Check debug dump."
105
+ assert layer_list, "Could not find the list of transformer layers. Check debug dump."
 
106
 
107
  dbg(f"Populated stable config: hidden_dim={hidden_dim}, num_layers={num_layers}")
108
+ return StableLLMConfig(hidden_dim=hidden_dim, num_layers=num_layers, layer_list=layer_list)
109
 
110
  def set_all_seeds(self, seed: int):
111
  """Setzt alle relevanten Seeds für maximale Reproduzierbarkeit."""
cognitive_mapping_probe/resonance_seismograph.py CHANGED
@@ -17,8 +17,8 @@ def run_silent_cogitation_seismic(
17
  injection_layer: Optional[int] = None,
18
  ) -> List[float]:
19
  """
20
- ERWEITERTE VERSION: Führt den 'silent thought' Prozess aus und ermöglicht
21
- die Injektion von Konzeptvektoren zur Modulation der Dynamik.
22
  """
23
  prompt = RESONANCE_PROMPTS[prompt_type]
24
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
@@ -31,20 +31,15 @@ def run_silent_cogitation_seismic(
31
  previous_hidden_state = hidden_state_2d.clone()
32
  state_deltas = []
33
 
34
- # Bereite den Hook für die Injektion vor
35
  hook_handle = None
36
  if injection_vector is not None and injection_strength > 0:
37
  injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
38
  if injection_layer is None:
39
- # KORREKTUR: Greife auf die stabile, abstrahierte Konfiguration zu.
40
  injection_layer = llm.stable_config.num_layers // 2
41
 
42
  dbg(f"Injection enabled: Layer {injection_layer}, Strength {injection_strength:.2f}")
43
 
44
  def injection_hook(module, layer_input):
45
- # Der Hook operiert auf dem Input, der bereits 3D ist [batch, seq_len, hidden_dim]
46
- # Stelle sicher, dass der Vektor korrekt auf die Sequenzlänge des Inputs gebroadcastet wird.
47
- # Normalerweise ist die seq_len hier 1.
48
  seq_len = layer_input[0].shape[1]
49
  injection_3d = injection_vector.unsqueeze(0).expand(1, seq_len, -1)
50
  modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
@@ -53,7 +48,6 @@ def run_silent_cogitation_seismic(
53
  for i in tqdm(range(num_steps), desc=f"Recording Dynamics (Temp {temperature:.2f})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
54
  next_token_logits = llm.model.lm_head(hidden_state_2d)
55
 
56
- # Verwende eine minimale Temperatur, um deterministisches Verhalten bei temp=0 zu gewährleisten
57
  temp_to_use = temperature if temperature > 0.0 else 1.0
58
  probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
59
 
@@ -62,13 +56,11 @@ def run_silent_cogitation_seismic(
62
  else:
63
  next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
64
 
65
-
66
  try:
67
- # Aktiviere den Hook vor dem forward-Pass
68
  if injection_vector is not None and injection_strength > 0:
69
- # Stelle sicher, dass der Layer-Index gültig ist.
70
  assert 0 <= injection_layer < llm.stable_config.num_layers, f"Injection layer {injection_layer} is out of bounds."
71
- target_layer = llm.model.model.layers[injection_layer]
 
72
  hook_handle = target_layer.register_forward_pre_hook(injection_hook)
73
 
74
  outputs = llm.model(
@@ -78,7 +70,6 @@ def run_silent_cogitation_seismic(
78
  use_cache=True,
79
  )
80
  finally:
81
- # Deaktiviere den Hook sofort nach dem Pass
82
  if hook_handle:
83
  hook_handle.remove()
84
  hook_handle = None
 
17
  injection_layer: Optional[int] = None,
18
  ) -> List[float]:
19
  """
20
+ Führt den 'silent thought' Prozess aus und ermöglicht die Injektion von
21
+ Konzeptvektoren zur Modulation der Dynamik.
22
  """
23
  prompt = RESONANCE_PROMPTS[prompt_type]
24
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
 
31
  previous_hidden_state = hidden_state_2d.clone()
32
  state_deltas = []
33
 
 
34
  hook_handle = None
35
  if injection_vector is not None and injection_strength > 0:
36
  injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
37
  if injection_layer is None:
 
38
  injection_layer = llm.stable_config.num_layers // 2
39
 
40
  dbg(f"Injection enabled: Layer {injection_layer}, Strength {injection_strength:.2f}")
41
 
42
  def injection_hook(module, layer_input):
 
 
 
43
  seq_len = layer_input[0].shape[1]
44
  injection_3d = injection_vector.unsqueeze(0).expand(1, seq_len, -1)
45
  modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
 
48
  for i in tqdm(range(num_steps), desc=f"Recording Dynamics (Temp {temperature:.2f})", leave=False, bar_format="{l_bar}{bar:10}{r_bar}"):
49
  next_token_logits = llm.model.lm_head(hidden_state_2d)
50
 
 
51
  temp_to_use = temperature if temperature > 0.0 else 1.0
52
  probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
53
 
 
56
  else:
57
  next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
58
 
 
59
  try:
 
60
  if injection_vector is not None and injection_strength > 0:
 
61
  assert 0 <= injection_layer < llm.stable_config.num_layers, f"Injection layer {injection_layer} is out of bounds."
62
+ # FINALE KORREKTUR: Greife auf die stabile, abstrahierte Layer-Liste zu.
63
+ target_layer = llm.stable_config.layer_list[injection_layer]
64
  hook_handle = target_layer.register_forward_pre_hook(injection_hook)
65
 
66
  outputs = llm.model(
 
70
  use_cache=True,
71
  )
72
  finally:
 
73
  if hook_handle:
74
  hook_handle.remove()
75
  hook_handle = None
tests/conftest.py CHANGED
@@ -6,7 +6,6 @@ from cognitive_mapping_probe.llm_iface import LLM, StableLLMConfig
6
  @pytest.fixture(scope="session")
7
  def mock_llm_config():
8
  """Stellt eine minimale, Schein-Konfiguration für das LLM bereit."""
9
- # Diese Fixture repräsentiert die *volatile* transformers-Konfiguration
10
  return SimpleNamespace(
11
  hidden_size=128,
12
  num_hidden_layers=2,
@@ -17,7 +16,7 @@ def mock_llm_config():
17
  def mock_llm(mocker, mock_llm_config):
18
  """
19
  Erstellt einen robusten "Mock-LLM" für Unit-Tests.
20
- FINAL KORRIGIERT: Simuliert nun auch die `stable_config`-Abstraktionsschicht.
21
  """
22
  mock_tokenizer = mocker.MagicMock()
23
  mock_tokenizer.eos_token_id = 1
@@ -48,21 +47,27 @@ def mock_llm(mocker, mock_llm_config):
48
  llm_instance.model.device = 'cpu'
49
  llm_instance.model.dtype = torch.float32
50
  llm_instance.model.get_input_embeddings.return_value = mock_embedding_layer
 
51
 
 
52
  mock_layer = mocker.MagicMock()
53
  mock_layer.register_forward_pre_hook.return_value = mocker.MagicMock()
54
- llm_instance.model.model = SimpleNamespace(layers=[mock_layer] * mock_llm_config.num_hidden_layers)
55
- llm_instance.model.lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000))
 
 
 
56
 
57
  llm_instance.tokenizer = mock_tokenizer
58
- llm_instance.config = mock_llm_config # Die originale, volatile config
59
  llm_instance.seed = 42
60
  llm_instance.set_all_seeds = mocker.MagicMock()
61
 
62
- # KORREKTUR: Erzeuge die stabile Konfiguration, die die Tests nun erwarten.
63
  llm_instance.stable_config = StableLLMConfig(
64
  hidden_dim=mock_llm_config.hidden_size,
65
- num_layers=mock_llm_config.num_hidden_layers
 
66
  )
67
 
68
  # Patch an allen Stellen, an denen das Modell tatsächlich geladen wird.
@@ -70,7 +75,6 @@ def mock_llm(mocker, mock_llm_config):
70
  mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance)
71
  mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=llm_instance)
72
 
73
- # Mocke `get_concept_vector`, um zu verhindern, dass es im Orchestrator-Test ausgeführt wird.
74
  mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_concept_vector', return_value=torch.randn(mock_llm_config.hidden_size))
75
 
76
  return llm_instance
 
6
  @pytest.fixture(scope="session")
7
  def mock_llm_config():
8
  """Stellt eine minimale, Schein-Konfiguration für das LLM bereit."""
 
9
  return SimpleNamespace(
10
  hidden_size=128,
11
  num_hidden_layers=2,
 
16
  def mock_llm(mocker, mock_llm_config):
17
  """
18
  Erstellt einen robusten "Mock-LLM" für Unit-Tests.
19
+ FINAL KORRIGIERT: Simuliert nun die vollständige `StableLLMConfig`-Abstraktion.
20
  """
21
  mock_tokenizer = mocker.MagicMock()
22
  mock_tokenizer.eos_token_id = 1
 
47
  llm_instance.model.device = 'cpu'
48
  llm_instance.model.dtype = torch.float32
49
  llm_instance.model.get_input_embeddings.return_value = mock_embedding_layer
50
+ llm_instance.model.lm_head = mocker.MagicMock(return_value=torch.randn(1, 32000))
51
 
52
+ # FINALE KORREKTUR: Simuliere die Layer-Liste für den Hook-Test
53
  mock_layer = mocker.MagicMock()
54
  mock_layer.register_forward_pre_hook.return_value = mocker.MagicMock()
55
+ mock_layer_list = [mock_layer] * mock_llm_config.num_hidden_layers
56
+
57
+ # Simuliere die verschiedenen möglichen Architektur-Pfade
58
+ llm_instance.model.model = SimpleNamespace()
59
+ llm_instance.model.model.language_model = SimpleNamespace(layers=mock_layer_list)
60
 
61
  llm_instance.tokenizer = mock_tokenizer
62
+ llm_instance.config = mock_llm_config
63
  llm_instance.seed = 42
64
  llm_instance.set_all_seeds = mocker.MagicMock()
65
 
66
+ # Erzeuge die stabile Konfiguration, die die Tests nun erwarten.
67
  llm_instance.stable_config = StableLLMConfig(
68
  hidden_dim=mock_llm_config.hidden_size,
69
+ num_layers=mock_llm_config.num_hidden_layers,
70
+ layer_list=mock_layer_list # Füge den Verweis auf die Mock-Layer-Liste hinzu
71
  )
72
 
73
  # Patch an allen Stellen, an denen das Modell tatsächlich geladen wird.
 
75
  mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_or_load_model', return_value=llm_instance)
76
  mocker.patch('cognitive_mapping_probe.auto_experiment.get_or_load_model', return_value=llm_instance)
77
 
 
78
  mocker.patch('cognitive_mapping_probe.orchestrator_seismograph.get_concept_vector', return_value=torch.randn(mock_llm_config.hidden_size))
79
 
80
  return llm_instance
tests/test_components.py CHANGED
@@ -21,17 +21,15 @@ def test_get_or_load_model_seeding(mock_model_loader, mock_tokenizer_loader, moc
21
  mock_model.eval.return_value = None
22
  mock_model.set_attn_implementation.return_value = None
23
  mock_model.device = 'cpu'
24
-
25
- # KORREKTUR: Konfiguriere die vom `_populate_stable_config` erwarteten Attribute.
26
- # 1. Der primäre Pfad über `get_input_embeddings`
27
- mock_model.get_input_embeddings.return_value.weight.shape = (32000, 128) # (vocab_size, hidden_dim)
28
- # 2. Die Fallback-Attribute auf dem `config`-Objekt
29
  mock_model.config = mocker.MagicMock()
30
  mock_model.config.num_hidden_layers = 2
31
- # Wir setzen `hidden_size` auf dem config-Objekt, auch wenn der primäre Pfad es nicht braucht,
32
- # um den Mock vollständig zu machen.
33
  mock_model.config.hidden_size = 128
34
 
 
 
 
35
  mock_model_loader.return_value = mock_model
36
  mock_tokenizer_loader.return_value = mocker.MagicMock()
37
 
@@ -39,10 +37,8 @@ def test_get_or_load_model_seeding(mock_model_loader, mock_tokenizer_loader, moc
39
  mock_np_random_seed = mocker.patch('numpy.random.seed')
40
 
41
  seed = 123
42
- # Dieser Aufruf sollte nun ohne `TypeError` durchlaufen.
43
  get_or_load_model("fake-model", seed=seed)
44
 
45
- # Die ursprünglichen Assertions bleiben gültig.
46
  mock_torch_manual_seed.assert_called_with(seed)
47
  mock_np_random_seed.assert_called_with(seed)
48
 
@@ -60,7 +56,10 @@ def test_run_silent_cogitation_seismic_output_shape_and_type(mock_llm):
60
  assert all(isinstance(delta, float) for delta in state_deltas)
61
 
62
  def test_run_silent_cogitation_with_injection_hook_usage(mock_llm):
63
- """Testet, ob bei einer Injektion der Hook korrekt registriert wird."""
 
 
 
64
  num_steps = 5
65
  injection_vector = torch.randn(mock_llm.stable_config.hidden_dim)
66
  run_silent_cogitation_seismic(
@@ -68,7 +67,9 @@ def test_run_silent_cogitation_with_injection_hook_usage(mock_llm):
68
  num_steps=num_steps, temperature=0.7,
69
  injection_vector=injection_vector, injection_strength=1.0
70
  )
71
- assert mock_llm.model.model.layers[0].register_forward_pre_hook.call_count == num_steps
 
 
72
 
73
  # --- Tests for concepts.py ---
74
 
@@ -113,4 +114,4 @@ def test_dbg_output(capsys, monkeypatch):
113
  importlib.reload(utils)
114
  utils.dbg("should not be printed")
115
  captured = capsys.readouterr()
116
- assert captured.err == ""
 
21
  mock_model.eval.return_value = None
22
  mock_model.set_attn_implementation.return_value = None
23
  mock_model.device = 'cpu'
24
+
25
+ mock_model.get_input_embeddings.return_value.weight.shape = (32000, 128)
 
 
 
26
  mock_model.config = mocker.MagicMock()
27
  mock_model.config.num_hidden_layers = 2
 
 
28
  mock_model.config.hidden_size = 128
29
 
30
+ # Simuliere die Architektur für die Layer-Extraktion
31
+ mock_model.model.language_model.layers = [mocker.MagicMock()] * 2
32
+
33
  mock_model_loader.return_value = mock_model
34
  mock_tokenizer_loader.return_value = mocker.MagicMock()
35
 
 
37
  mock_np_random_seed = mocker.patch('numpy.random.seed')
38
 
39
  seed = 123
 
40
  get_or_load_model("fake-model", seed=seed)
41
 
 
42
  mock_torch_manual_seed.assert_called_with(seed)
43
  mock_np_random_seed.assert_called_with(seed)
44
 
 
56
  assert all(isinstance(delta, float) for delta in state_deltas)
57
 
58
  def test_run_silent_cogitation_with_injection_hook_usage(mock_llm):
59
+ """
60
+ Testet, ob bei einer Injektion der Hook korrekt registriert wird.
61
+ FINAL KORRIGIERT: Greift auf die stabile Abstraktionsschicht zu.
62
+ """
63
  num_steps = 5
64
  injection_vector = torch.randn(mock_llm.stable_config.hidden_dim)
65
  run_silent_cogitation_seismic(
 
67
  num_steps=num_steps, temperature=0.7,
68
  injection_vector=injection_vector, injection_strength=1.0
69
  )
70
+ # KORREKTUR: Der Test muss denselben Abstraktionspfad verwenden wie die Anwendung.
71
+ # Wir prüfen den Hook-Aufruf auf dem ersten Layer der stabilen, abstrahierten Layer-Liste.
72
+ assert mock_llm.stable_config.layer_list[0].register_forward_pre_hook.call_count == num_steps
73
 
74
  # --- Tests for concepts.py ---
75
 
 
114
  importlib.reload(utils)
115
  utils.dbg("should not be printed")
116
  captured = capsys.readouterr()
117
+ assert captured.err == ""