neuralworm commited on
Commit
d15bd24
·
verified ·
1 Parent(s): a4f24f3

Update cognitive_mapping_probe/resonance_seismograph.py

Browse files
cognitive_mapping_probe/resonance_seismograph.py CHANGED
@@ -15,24 +15,24 @@ def _calculate_attention_entropy(attentions: Tuple[torch.Tensor, ...]) -> float:
15
  """
16
  total_entropy = 0.0
17
  num_heads = 0
18
-
19
  # Iteriere über alle Layer
20
  for layer_attention in attentions:
21
  # layer_attention shape: [batch_size, num_heads, seq_len, seq_len]
22
  # Für unsere Zwecke ist batch_size=1, seq_len=1 (wir schauen nur auf das letzte Token)
23
  # Die relevante Verteilung ist die letzte Zeile der Attention-Matrix
24
  attention_probs = layer_attention[:, :, -1, :]
25
-
26
  # Stabilisiere die Logarithmus-Berechnung
27
  attention_probs = attention_probs + 1e-9
28
-
29
- # Entropie-Formel: - sum(p * log(p))
30
  log_probs = torch.log2(attention_probs)
31
  entropy_per_head = -torch.sum(attention_probs * log_probs, dim=-1)
32
-
33
  total_entropy += torch.sum(entropy_per_head).item()
34
  num_heads += attention_probs.shape[1]
35
-
36
  return total_entropy / num_heads if num_heads > 0 else 0.0
37
 
38
  @torch.no_grad()
@@ -48,7 +48,6 @@ def run_cogitation_loop(
48
  patch_state_source: Optional[torch.Tensor] = None,
49
  reset_kv_cache_on_patch: bool = False,
50
  record_states: bool = False,
51
- # NEU: Parameter zur Aufzeichnung von Attention-Mustern
52
  record_attentions: bool = False,
53
  ) -> Dict[str, Any]:
54
  """
@@ -58,7 +57,6 @@ def run_cogitation_loop(
58
  prompt = RESONANCE_PROMPTS[prompt_type]
59
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
60
 
61
- # Erster Forward-Pass, um den initialen Zustand zu erhalten
62
  outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True, output_attentions=record_attentions)
63
  hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
64
  kv_cache = outputs.past_key_values
@@ -77,31 +75,44 @@ def run_cogitation_loop(
77
  if reset_kv_cache_on_patch:
78
  dbg("--- KV-Cache has been RESET as part of the intervention. ---")
79
  kv_cache = None
80
-
81
  if record_states:
82
  state_history.append(hidden_state_2d.cpu())
83
 
84
  next_token_logits = llm.model.lm_head(hidden_state_2d)
85
-
86
- temp_to_use = temperature if temperature > 0.0 else 1.0
87
  probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
88
  if temperature > 0.0:
89
  next_token_id = torch.multinomial(probabilities, num_samples=1)
90
  else:
91
  next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
92
 
93
- hook_handle = None # Hook-Logik unverändert
 
 
 
 
 
 
 
 
 
 
94
 
95
  try:
96
- # (Hook-Aktivierung unverändert)
 
 
 
 
97
  outputs = llm.model(
98
  input_ids=next_token_id, past_key_values=kv_cache,
99
  output_hidden_states=True, use_cache=True,
100
- # Übergebe den Parameter an jeden Forward-Pass
101
  output_attentions=record_attentions
102
  )
103
  finally:
104
- if hook_handle:
105
  hook_handle.remove()
106
  hook_handle = None
107
 
@@ -117,16 +128,31 @@ def run_cogitation_loop(
117
  hidden_state_2d = new_hidden_state.clone()
118
 
119
  dbg(f"Cognitive loop finished after {num_steps} steps.")
120
-
121
  return {
122
  "state_deltas": state_deltas,
123
  "state_history": state_history,
124
- "attention_entropies": attention_entropies, # Das neue Messergebnis
125
  "final_hidden_state": hidden_state_2d,
126
  "final_kv_cache": kv_cache,
127
  }
128
 
129
- def run_silent_cogitation_seismic(*args, **kwargs) -> List[float]:
130
- """Abwärtskompatibler Wrapper."""
131
- results = run_cogitation_loop(*args, **kwargs)
132
- return results["state_deltas"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  """
16
  total_entropy = 0.0
17
  num_heads = 0
18
+
19
  # Iteriere über alle Layer
20
  for layer_attention in attentions:
21
  # layer_attention shape: [batch_size, num_heads, seq_len, seq_len]
22
  # Für unsere Zwecke ist batch_size=1, seq_len=1 (wir schauen nur auf das letzte Token)
23
  # Die relevante Verteilung ist die letzte Zeile der Attention-Matrix
24
  attention_probs = layer_attention[:, :, -1, :]
25
+
26
  # Stabilisiere die Logarithmus-Berechnung
27
  attention_probs = attention_probs + 1e-9
28
+
29
+ # Entropie-Formel: - sum(p * log2(p))
30
  log_probs = torch.log2(attention_probs)
31
  entropy_per_head = -torch.sum(attention_probs * log_probs, dim=-1)
32
+
33
  total_entropy += torch.sum(entropy_per_head).item()
34
  num_heads += attention_probs.shape[1]
35
+
36
  return total_entropy / num_heads if num_heads > 0 else 0.0
37
 
38
  @torch.no_grad()
 
48
  patch_state_source: Optional[torch.Tensor] = None,
49
  reset_kv_cache_on_patch: bool = False,
50
  record_states: bool = False,
 
51
  record_attentions: bool = False,
52
  ) -> Dict[str, Any]:
53
  """
 
57
  prompt = RESONANCE_PROMPTS[prompt_type]
58
  inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
59
 
 
60
  outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True, output_attentions=record_attentions)
61
  hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
62
  kv_cache = outputs.past_key_values
 
75
  if reset_kv_cache_on_patch:
76
  dbg("--- KV-Cache has been RESET as part of the intervention. ---")
77
  kv_cache = None
78
+
79
  if record_states:
80
  state_history.append(hidden_state_2d.cpu())
81
 
82
  next_token_logits = llm.model.lm_head(hidden_state_2d)
83
+
84
+ temp_to_use = temperature if temperature > 0.0 else 1.0
85
  probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
86
  if temperature > 0.0:
87
  next_token_id = torch.multinomial(probabilities, num_samples=1)
88
  else:
89
  next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
90
 
91
+ hook_handle = None
92
+ if injection_vector is not None and injection_strength > 0:
93
+ injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
94
+ if injection_layer is None:
95
+ injection_layer = llm.stable_config.num_layers // 2
96
+
97
+ def injection_hook(module: Any, layer_input: Any) -> Any:
98
+ seq_len = layer_input[0].shape[1]
99
+ injection_3d = injection_vector.unsqueeze(0).expand(1, seq_len, -1)
100
+ modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
101
+ return (modified_hidden_states,) + layer_input[1:]
102
 
103
  try:
104
+ if injection_vector is not None and injection_strength > 0 and injection_layer is not None:
105
+ assert 0 <= injection_layer < llm.stable_config.num_layers, f"Injection layer {injection_layer} is out of bounds."
106
+ target_layer = llm.stable_config.layer_list[injection_layer]
107
+ hook_handle = target_layer.register_forward_pre_hook(injection_hook)
108
+
109
  outputs = llm.model(
110
  input_ids=next_token_id, past_key_values=kv_cache,
111
  output_hidden_states=True, use_cache=True,
 
112
  output_attentions=record_attentions
113
  )
114
  finally:
115
+ if hook_handle:
116
  hook_handle.remove()
117
  hook_handle = None
118
 
 
128
  hidden_state_2d = new_hidden_state.clone()
129
 
130
  dbg(f"Cognitive loop finished after {num_steps} steps.")
131
+
132
  return {
133
  "state_deltas": state_deltas,
134
  "state_history": state_history,
135
+ "attention_entropies": attention_entropies,
136
  "final_hidden_state": hidden_state_2d,
137
  "final_kv_cache": kv_cache,
138
  }
139
 
140
+ def run_silent_cogitation_seismic(
141
+ llm: LLM,
142
+ prompt_type: str,
143
+ num_steps: int,
144
+ temperature: float,
145
+ injection_vector: Optional[torch.Tensor] = None,
146
+ injection_strength: float = 0.0,
147
+ injection_layer: Optional[int] = None
148
+ ) -> List[float]:
149
+ """
150
+ Ein abwärtskompatibler Wrapper, der die alte, einfachere Schnittstelle beibehält.
151
+ Ruft den neuen, verallgemeinerten Loop auf und gibt nur die Deltas zurück.
152
+ """
153
+ results = run_cogitation_loop(
154
+ llm=llm, prompt_type=prompt_type, num_steps=num_steps, temperature=temperature,
155
+ injection_vector=injection_vector, injection_strength=injection_strength,
156
+ injection_layer=injection_layer
157
+ )
158
+ return results["state_deltas"]