cognitive_mapping_probe_4

Sleeping

App Files Files Community

neuralworm commited on 21 days ago

Commit

d15bd24

verified ·

1 Parent(s): a4f24f3

Update cognitive_mapping_probe/resonance_seismograph.py

Browse files

Files changed (1) hide show

cognitive_mapping_probe/resonance_seismograph.py +47 -21

cognitive_mapping_probe/resonance_seismograph.py CHANGED Viewed

@@ -15,24 +15,24 @@ def _calculate_attention_entropy(attentions: Tuple[torch.Tensor, ...]) -> float:
     """
     total_entropy = 0.0
     num_heads = 0
     # Iteriere über alle Layer
     for layer_attention in attentions:
         # layer_attention shape: [batch_size, num_heads, seq_len, seq_len]
         # Für unsere Zwecke ist batch_size=1, seq_len=1 (wir schauen nur auf das letzte Token)
         # Die relevante Verteilung ist die letzte Zeile der Attention-Matrix
         attention_probs = layer_attention[:, :, -1, :]
         # Stabilisiere die Logarithmus-Berechnung
         attention_probs = attention_probs + 1e-9
-        # Entropie-Formel: - sum(p * log(p))
         log_probs = torch.log2(attention_probs)
         entropy_per_head = -torch.sum(attention_probs * log_probs, dim=-1)
         total_entropy += torch.sum(entropy_per_head).item()
         num_heads += attention_probs.shape[1]
     return total_entropy / num_heads if num_heads > 0 else 0.0
 @torch.no_grad()
@@ -48,7 +48,6 @@ def run_cogitation_loop(
     patch_state_source: Optional[torch.Tensor] = None,
     reset_kv_cache_on_patch: bool = False,
     record_states: bool = False,
-    # NEU: Parameter zur Aufzeichnung von Attention-Mustern
     record_attentions: bool = False,
 ) -> Dict[str, Any]:
     """
@@ -58,7 +57,6 @@ def run_cogitation_loop(
     prompt = RESONANCE_PROMPTS[prompt_type]
     inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
-    # Erster Forward-Pass, um den initialen Zustand zu erhalten
     outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True, output_attentions=record_attentions)
     hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
     kv_cache = outputs.past_key_values
@@ -77,31 +75,44 @@ def run_cogitation_loop(
             if reset_kv_cache_on_patch:
                 dbg("--- KV-Cache has been RESET as part of the intervention. ---")
                 kv_cache = None
         if record_states:
             state_history.append(hidden_state_2d.cpu())
         next_token_logits = llm.model.lm_head(hidden_state_2d)
-        temp_to_use = temperature if temperature > 0.0 else 1.0
         probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
         if temperature > 0.0:
             next_token_id = torch.multinomial(probabilities, num_samples=1)
         else:
             next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
-        hook_handle = None # Hook-Logik unverändert
         try:
-            # (Hook-Aktivierung unverändert)
             outputs = llm.model(
                 input_ids=next_token_id, past_key_values=kv_cache,
                 output_hidden_states=True, use_cache=True,
-                # Übergebe den Parameter an jeden Forward-Pass
                 output_attentions=record_attentions
             )
         finally:
-            if hook_handle:
                 hook_handle.remove()
                 hook_handle = None
@@ -117,16 +128,31 @@ def run_cogitation_loop(
         hidden_state_2d = new_hidden_state.clone()
     dbg(f"Cognitive loop finished after {num_steps} steps.")
     return {
         "state_deltas": state_deltas,
         "state_history": state_history,
-        "attention_entropies": attention_entropies, # Das neue Messergebnis
         "final_hidden_state": hidden_state_2d,
         "final_kv_cache": kv_cache,
     }
-def run_silent_cogitation_seismic(*args, **kwargs) -> List[float]:
-    """Abwärtskompatibler Wrapper."""
-    results = run_cogitation_loop(*args, **kwargs)
-    return results["state_deltas"]

     """
     total_entropy = 0.0
     num_heads = 0
     # Iteriere über alle Layer
     for layer_attention in attentions:
         # layer_attention shape: [batch_size, num_heads, seq_len, seq_len]
         # Für unsere Zwecke ist batch_size=1, seq_len=1 (wir schauen nur auf das letzte Token)
         # Die relevante Verteilung ist die letzte Zeile der Attention-Matrix
         attention_probs = layer_attention[:, :, -1, :]
         # Stabilisiere die Logarithmus-Berechnung
         attention_probs = attention_probs + 1e-9
+        # Entropie-Formel: - sum(p * log2(p))
         log_probs = torch.log2(attention_probs)
         entropy_per_head = -torch.sum(attention_probs * log_probs, dim=-1)
         total_entropy += torch.sum(entropy_per_head).item()
         num_heads += attention_probs.shape[1]
     return total_entropy / num_heads if num_heads > 0 else 0.0
 @torch.no_grad()
     patch_state_source: Optional[torch.Tensor] = None,
     reset_kv_cache_on_patch: bool = False,
     record_states: bool = False,
     record_attentions: bool = False,
 ) -> Dict[str, Any]:
     """
     prompt = RESONANCE_PROMPTS[prompt_type]
     inputs = llm.tokenizer(prompt, return_tensors="pt").to(llm.model.device)
     outputs = llm.model(**inputs, output_hidden_states=True, use_cache=True, output_attentions=record_attentions)
     hidden_state_2d = outputs.hidden_states[-1][:, -1, :]
     kv_cache = outputs.past_key_values
             if reset_kv_cache_on_patch:
                 dbg("--- KV-Cache has been RESET as part of the intervention. ---")
                 kv_cache = None
         if record_states:
             state_history.append(hidden_state_2d.cpu())
         next_token_logits = llm.model.lm_head(hidden_state_2d)
+        temp_to_use = temperature if temperature > 0.0 else 1.0
         probabilities = torch.nn.functional.softmax(next_token_logits / temp_to_use, dim=-1)
         if temperature > 0.0:
             next_token_id = torch.multinomial(probabilities, num_samples=1)
         else:
             next_token_id = torch.argmax(probabilities, dim=-1).unsqueeze(-1)
+        hook_handle = None
+        if injection_vector is not None and injection_strength > 0:
+            injection_vector = injection_vector.to(device=llm.model.device, dtype=llm.model.dtype)
+            if injection_layer is None:
+                injection_layer = llm.stable_config.num_layers // 2
+            def injection_hook(module: Any, layer_input: Any) -> Any:
+                seq_len = layer_input[0].shape[1]
+                injection_3d = injection_vector.unsqueeze(0).expand(1, seq_len, -1)
+                modified_hidden_states = layer_input[0] + (injection_3d * injection_strength)
+                return (modified_hidden_states,) + layer_input[1:]
         try:
+            if injection_vector is not None and injection_strength > 0 and injection_layer is not None:
+                assert 0 <= injection_layer < llm.stable_config.num_layers, f"Injection layer {injection_layer} is out of bounds."
+                target_layer = llm.stable_config.layer_list[injection_layer]
+                hook_handle = target_layer.register_forward_pre_hook(injection_hook)
             outputs = llm.model(
                 input_ids=next_token_id, past_key_values=kv_cache,
                 output_hidden_states=True, use_cache=True,
                 output_attentions=record_attentions
             )
         finally:
+            if hook_handle:
                 hook_handle.remove()
                 hook_handle = None
         hidden_state_2d = new_hidden_state.clone()
     dbg(f"Cognitive loop finished after {num_steps} steps.")
     return {
         "state_deltas": state_deltas,
         "state_history": state_history,
+        "attention_entropies": attention_entropies,
         "final_hidden_state": hidden_state_2d,
         "final_kv_cache": kv_cache,
     }
+def run_silent_cogitation_seismic(
+    llm: LLM,
+    prompt_type: str,
+    num_steps: int,
+    temperature: float,
+    injection_vector: Optional[torch.Tensor] = None,
+    injection_strength: float = 0.0,
+    injection_layer: Optional[int] = None
+) -> List[float]:
+    """
+    Ein abwärtskompatibler Wrapper, der die alte, einfachere Schnittstelle beibehält.
+    Ruft den neuen, verallgemeinerten Loop auf und gibt nur die Deltas zurück.
+    """
+    results = run_cogitation_loop(
+        llm=llm, prompt_type=prompt_type, num_steps=num_steps, temperature=temperature,
+        injection_vector=injection_vector, injection_strength=injection_strength,
+        injection_layer=injection_layer
+    )
+    return results["state_deltas"]