Update cognitive_mapping_probe/resonance_seismograph.py
Browse files
cognitive_mapping_probe/resonance_seismograph.py
CHANGED
|
@@ -15,24 +15,24 @@ def _calculate_attention_entropy(attentions: Tuple[torch.Tensor, ...]) -> float:
|
|
| 15 |
"""
|
| 16 |
total_entropy = 0.0
|
| 17 |
num_heads = 0
|
| 18 |
-
|
| 19 |
# Iteriere über alle Layer
|
| 20 |
for layer_attention in attentions:
|
| 21 |
# layer_attention shape: [batch_size, num_heads, seq_len, seq_len]
|
| 22 |
# Für unsere Zwecke ist batch_size=1, seq_len=1 (wir schauen nur auf das letzte Token)
|
| 23 |
# Die relevante Verteilung ist die letzte Zeile der Attention-Matrix
|
| 24 |
attention_probs = layer_attention[:, :, -1, :]
|
| 25 |
-
|
| 26 |
# Stabilisiere die Logarithmus-Berechnung
|
| 27 |
attention_probs = attention_probs + 1e-9
|
| 28 |
-
|
| 29 |
-
# Entropie-Formel: - sum(p *
|
| 30 |
log_probs = torch.log2(attention_probs)
|
| 31 |
entropy_per_head = -torch.sum(attention_probs * log_probs, dim=-1)
|
| 32 |
-
|
| 33 |
total_entropy += torch.sum(entropy_per_head).item()
|
| 34 |
num_heads += attention_probs.shape[1]
|
| 35 |
-
|
| 36 |
return total_entropy / num_heads if num_heads > 0 else 0.0
|
| 37 |
|
| 38 |
@torch.no_grad()
|
|
@@ -48,7 +48,6 @@ def run_cogitation_loop(
|
|
| 48 |
patch_state_source: Optional[torch.Tensor] = None,
|
| 49 |
reset_kv_cache_on_patch: bool = False,
|
| 50 |
record_states: bool = False,
|
| 51 |
-
# NEU: Parameter zur Aufzeichnung von Attention-Mustern
|
| 52 |
record_attentions: bool = False,
|
| 53 |
) -> Dict[str, Any]:
|
| 54 |
"""
|
|
@@ -58,7 +57,6 @@ def run_cogitation_loop(
|
|
| 58 |
prompt = RESONANCE_PROMPTS[prompt_type]
|
| 59 |
inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
|
| 60 |
|
| 61 |
-
# Erster Forward-Pass, um den initialen Zustand zu erhalten
|
| 62 |
outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True, output_attentions=record_attentions)
|
| 63 |
hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
|
| 64 |
kv_cache = outputs.past_key_values
|
|
@@ -77,31 +75,44 @@ def run_cogitation_loop(
|
|
| 77 |
if reset_kv_cache_on_patch:
|
| 78 |
dbg("--- KV-Cache has been RESET as part of the intervention. ---")
|
| 79 |
kv_cache = None
|
| 80 |
-
|
| 81 |
if record_states:
|
| 82 |
state_history.append(hidden_state_2d.cpu())
|
| 83 |
|
| 84 |
next_token_logits = llm.model.lm_head(hidden_state_2d)
|
| 85 |
-
|
| 86 |
-
temp_to_use = temperature if temperature > 0.0 else 1.0
|
| 87 |
probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
|
| 88 |
if temperature > 0.0:
|
| 89 |
next_token_id = torch.multinomial(probabilities, num_samples=1)
|
| 90 |
else:
|
| 91 |
next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
|
| 92 |
|
| 93 |
-
hook_handle = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
try:
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
outputs = llm.model(
|
| 98 |
input_ids=next_token_id, past_key_values=kv_cache,
|
| 99 |
output_hidden_states=True, use_cache=True,
|
| 100 |
-
# Übergebe den Parameter an jeden Forward-Pass
|
| 101 |
output_attentions=record_attentions
|
| 102 |
)
|
| 103 |
finally:
|
| 104 |
-
if hook_handle:
|
| 105 |
hook_handle.remove()
|
| 106 |
hook_handle = None
|
| 107 |
|
|
@@ -117,16 +128,31 @@ def run_cogitation_loop(
|
|
| 117 |
hidden_state_2d = new_hidden_state.clone()
|
| 118 |
|
| 119 |
dbg(f"Cognitive loop finished after {num_steps} steps.")
|
| 120 |
-
|
| 121 |
return {
|
| 122 |
"state_deltas": state_deltas,
|
| 123 |
"state_history": state_history,
|
| 124 |
-
"attention_entropies": attention_entropies,
|
| 125 |
"final_hidden_state": hidden_state_2d,
|
| 126 |
"final_kv_cache": kv_cache,
|
| 127 |
}
|
| 128 |
|
| 129 |
-
def run_silent_cogitation_seismic(
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
"""
|
| 16 |
total_entropy = 0.0
|
| 17 |
num_heads = 0
|
| 18 |
+
|
| 19 |
# Iteriere über alle Layer
|
| 20 |
for layer_attention in attentions:
|
| 21 |
# layer_attention shape: [batch_size, num_heads, seq_len, seq_len]
|
| 22 |
# Für unsere Zwecke ist batch_size=1, seq_len=1 (wir schauen nur auf das letzte Token)
|
| 23 |
# Die relevante Verteilung ist die letzte Zeile der Attention-Matrix
|
| 24 |
attention_probs = layer_attention[:, :, -1, :]
|
| 25 |
+
|
| 26 |
# Stabilisiere die Logarithmus-Berechnung
|
| 27 |
attention_probs = attention_probs + 1e-9
|
| 28 |
+
|
| 29 |
+
# Entropie-Formel: - sum(p * log2(p))
|
| 30 |
log_probs = torch.log2(attention_probs)
|
| 31 |
entropy_per_head = -torch.sum(attention_probs * log_probs, dim=-1)
|
| 32 |
+
|
| 33 |
total_entropy += torch.sum(entropy_per_head).item()
|
| 34 |
num_heads += attention_probs.shape[1]
|
| 35 |
+
|
| 36 |
return total_entropy / num_heads if num_heads > 0 else 0.0
|
| 37 |
|
| 38 |
@torch.no_grad()
|
|
|
|
| 48 |
patch_state_source: Optional[torch.Tensor] = None,
|
| 49 |
reset_kv_cache_on_patch: bool = False,
|
| 50 |
record_states: bool = False,
|
|
|
|
| 51 |
record_attentions: bool = False,
|
| 52 |
) -> Dict[str, Any]:
|
| 53 |
"""
|
|
|
|
| 57 |
prompt = RESONANCE_PROMPTS[prompt_type]
|
| 58 |
inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
|
| 59 |
|
|
|
|
| 60 |
outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True, output_attentions=record_attentions)
|
| 61 |
hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
|
| 62 |
kv_cache = outputs.past_key_values
|
|
|
|
| 75 |
if reset_kv_cache_on_patch:
|
| 76 |
dbg("--- KV-Cache has been RESET as part of the intervention. ---")
|
| 77 |
kv_cache = None
|
| 78 |
+
|
| 79 |
if record_states:
|
| 80 |
state_history.append(hidden_state_2d.cpu())
|
| 81 |
|
| 82 |
next_token_logits = llm.model.lm_head(hidden_state_2d)
|
| 83 |
+
|
| 84 |
+
temp_to_use = temperature if temperature > 0.0 else 1.0
|
| 85 |
probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
|
| 86 |
if temperature > 0.0:
|
| 87 |
next_token_id = torch.multinomial(probabilities, num_samples=1)
|
| 88 |
else:
|
| 89 |
next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
|
| 90 |
|
| 91 |
+
hook_handle = None
|
| 92 |
+
if injection_vector is not None and injection_strength > 0:
|
| 93 |
+
injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
|
| 94 |
+
if injection_layer is None:
|
| 95 |
+
injection_layer = llm.stable_config.num_layers // 2
|
| 96 |
+
|
| 97 |
+
def injection_hook(module: Any, layer_input: Any) -> Any:
|
| 98 |
+
seq_len = layer_input[0].shape[1]
|
| 99 |
+
injection_3d = injection_vector.unsqueeze(0).expand(1, seq_len, -1)
|
| 100 |
+
modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
|
| 101 |
+
return (modified_hidden_states,) + layer_input[1:]
|
| 102 |
|
| 103 |
try:
|
| 104 |
+
if injection_vector is not None and injection_strength > 0 and injection_layer is not None:
|
| 105 |
+
assert 0 <= injection_layer < llm.stable_config.num_layers, f"Injection layer {injection_layer} is out of bounds."
|
| 106 |
+
target_layer = llm.stable_config.layer_list[injection_layer]
|
| 107 |
+
hook_handle = target_layer.register_forward_pre_hook(injection_hook)
|
| 108 |
+
|
| 109 |
outputs = llm.model(
|
| 110 |
input_ids=next_token_id, past_key_values=kv_cache,
|
| 111 |
output_hidden_states=True, use_cache=True,
|
|
|
|
| 112 |
output_attentions=record_attentions
|
| 113 |
)
|
| 114 |
finally:
|
| 115 |
+
if hook_handle:
|
| 116 |
hook_handle.remove()
|
| 117 |
hook_handle = None
|
| 118 |
|
|
|
|
| 128 |
hidden_state_2d = new_hidden_state.clone()
|
| 129 |
|
| 130 |
dbg(f"Cognitive loop finished after {num_steps} steps.")
|
| 131 |
+
|
| 132 |
return {
|
| 133 |
"state_deltas": state_deltas,
|
| 134 |
"state_history": state_history,
|
| 135 |
+
"attention_entropies": attention_entropies,
|
| 136 |
"final_hidden_state": hidden_state_2d,
|
| 137 |
"final_kv_cache": kv_cache,
|
| 138 |
}
|
| 139 |
|
| 140 |
+
def run_silent_cogitation_seismic(
|
| 141 |
+
llm: LLM,
|
| 142 |
+
prompt_type: str,
|
| 143 |
+
num_steps: int,
|
| 144 |
+
temperature: float,
|
| 145 |
+
injection_vector: Optional[torch.Tensor] = None,
|
| 146 |
+
injection_strength: float = 0.0,
|
| 147 |
+
injection_layer: Optional[int] = None
|
| 148 |
+
) -> List[float]:
|
| 149 |
+
"""
|
| 150 |
+
Ein abwärtskompatibler Wrapper, der die alte, einfachere Schnittstelle beibehält.
|
| 151 |
+
Ruft den neuen, verallgemeinerten Loop auf und gibt nur die Deltas zurück.
|
| 152 |
+
"""
|
| 153 |
+
results = run_cogitation_loop(
|
| 154 |
+
llm=llm, prompt_type=prompt_type, num_steps=num_steps, temperature=temperature,
|
| 155 |
+
injection_vector=injection_vector, injection_strength=injection_strength,
|
| 156 |
+
injection_layer=injection_layer
|
| 157 |
+
)
|
| 158 |
+
return results["state_deltas"]
|