neuralworm commited on
Commit
7f0c9e6
·
1 Parent(s): 7bda2a3
bp_phi/__pycache__/llm_iface.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/llm_iface.cpython-310.pyc and b/bp_phi/__pycache__/llm_iface.cpython-310.pyc differ
 
bp_phi/__pycache__/runner.cpython-310.pyc CHANGED
Binary files a/bp_phi/__pycache__/runner.cpython-310.pyc and b/bp_phi/__pycache__/runner.cpython-310.pyc differ
 
bp_phi/llm_iface.py CHANGED
@@ -1,53 +1,76 @@
 
1
  import os
2
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
3
- import torch
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
  from typing import List, Optional
6
 
 
 
 
 
 
 
7
  class LLM:
8
- def __init__(self, model_id: str, device: str = "auto", dtype: Optional[str] = None):
9
  self.model_id = model_id
10
- self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  kwargs = {}
12
- if dtype == "float16":
13
- kwargs["torch_dtype"] = torch.float16
14
- elif dtype == "bfloat16":
15
- kwargs["torch_dtype"] = torch.bfloat16
16
- self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, **kwargs)
17
  self.model.eval()
18
- self.is_instruction_tuned = hasattr(self.tokenizer, "apply_chat_template") and getattr(self.tokenizer, "chat_template", None)
19
- print(f"[BP-Φ] Loaded model: {model_id}")
20
- print(f"[BP-Φ] Chat-template detected: {bool(self.is_instruction_tuned)}")
21
 
22
  def generate_json(self, system_prompt: str, user_prompt: str,
23
  max_new_tokens: int = 256, temperature: float = 0.7,
24
  top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
 
 
25
  if self.is_instruction_tuned:
26
- messages = [
27
- {"role": "system", "content": system_prompt},
28
- {"role": "user", "content": user_prompt}
29
- ]
30
  prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
31
  else:
32
  prompt = f"{system_prompt}\n\nUser:\n{user_prompt}\n\nAssistant:\n"
 
33
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
 
 
34
  with torch.no_grad():
35
  out = self.model.generate(
36
  **inputs,
37
- do_sample=True,
38
  temperature=temperature,
39
  top_p=top_p,
40
  max_new_tokens=max_new_tokens,
41
  num_return_sequences=num_return_sequences,
42
  pad_token_id=self.tokenizer.eos_token_id
43
  )
44
- texts = self.tokenizer.batch_decode(out, skip_special_tokens=True)
45
- completions = []
46
- for t in texts:
47
- for marker in ["<end_of_turn>", "<end_of_text>", "</s>"]:
48
- if marker in t:
49
- t = t.split(marker)[0]
50
- if "Assistant:" in t:
51
- t = t.split("Assistant:")[-1]
52
- completions.append(t.strip())
53
  return completions
 
1
+ # bp_phi/llm_iface.py
2
  import os
3
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
4
+ import torch, random, numpy as np
5
+ from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed
6
  from typing import List, Optional
7
 
8
+ DEBUG = os.getenv("BP_PHI_DEBUG", "0") == "1"
9
+
10
+ def dbg(*args):
11
+ if DEBUG:
12
+ print("[DEBUG:llm_iface]", *args, flush=True)
13
+
14
  class LLM:
15
+ def __init__(self, model_id: str, device: str = "auto", dtype: Optional[str] = None, seed: int = 42):
16
  self.model_id = model_id
17
+ self.seed = seed
18
+
19
+ # Set all seeds for reproducibility
20
+ random.seed(seed)
21
+ np.random.seed(seed)
22
+ torch.manual_seed(seed)
23
+ if torch.cuda.is_available():
24
+ torch.cuda.manual_seed_all(seed)
25
+ try:
26
+ torch.use_deterministic_algorithms(True)
27
+ except Exception as e:
28
+ dbg(f"Could not set deterministic algorithms: {e}")
29
+ set_seed(seed)
30
+
31
+ token = os.environ.get("HF_TOKEN")
32
+ if not token and "gemma-3" in model_id:
33
+ print("[WARN] No HF_TOKEN set. If the model is gated (like google/gemma-3-1b-it), this will fail.")
34
+
35
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True, token=token)
36
  kwargs = {}
37
+ if dtype == "float16": kwargs["torch_dtype"] = torch.float16
38
+ elif dtype == "bfloat16": kwargs["torch_dtype"] = torch.bfloat16
39
+
40
+ self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, token=token, **kwargs)
 
41
  self.model.eval()
42
+ self.is_instruction_tuned = hasattr(self.tokenizer, "apply_chat_template") and self.tokenizer.chat_template
43
+
44
+ dbg(f"Loaded model: {model_id}, Chat-template: {self.is_instruction_tuned}")
45
 
46
  def generate_json(self, system_prompt: str, user_prompt: str,
47
  max_new_tokens: int = 256, temperature: float = 0.7,
48
  top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
49
+ set_seed(self.seed) # Re-seed for each call for full determinism
50
+
51
  if self.is_instruction_tuned:
52
+ messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]
 
 
 
53
  prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
54
  else:
55
  prompt = f"{system_prompt}\n\nUser:\n{user_prompt}\n\nAssistant:\n"
56
+
57
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
58
+ input_token_length = inputs.input_ids.shape[1]
59
+
60
  with torch.no_grad():
61
  out = self.model.generate(
62
  **inputs,
63
+ do_sample=(temperature > 0),
64
  temperature=temperature,
65
  top_p=top_p,
66
  max_new_tokens=max_new_tokens,
67
  num_return_sequences=num_return_sequences,
68
  pad_token_id=self.tokenizer.eos_token_id
69
  )
70
+
71
+ # Decode ONLY the newly generated tokens, not the prompt
72
+ new_tokens = out[:, input_token_length:]
73
+ completions = self.tokenizer.batch_decode(new_tokens, skip_special_tokens=True)
74
+
75
+ dbg("Cleaned model completions:", completions)
 
 
 
76
  return completions
bp_phi/runner.py CHANGED
@@ -1,7 +1,8 @@
 
1
  import json
2
  import os
3
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
4
- import torch, random, numpy as np
5
  from transformers import set_seed
6
  from typing import Dict, Any, List, Optional
7
  from .workspace import Workspace, RandomWorkspace
@@ -9,35 +10,65 @@ from .llm_iface import LLM
9
  from .prompts_en import EN_TASKS
10
  from .metrics import expected_calibration_error, auc_nrp, stability_duration, counterfactual_consistency
11
 
12
- SYSTEM_META = """You are a reflective reasoning assistant operating with a limited-capacity global workspace (max 7 slots).
13
- Work in steps. At each step reply ONLY with valid compact JSON matching:
 
 
 
 
 
 
 
14
  {
15
- "answer": string,
16
- "confidence": float, // 0.0 - 1.0
17
- "reason": string, // short meta-explanation
18
- "used_slots": [string], // keys like 'S1','S2',... that you consider relevant
19
- "evicted": [string] // keys you evict due to capacity, if any
20
  }
21
- Reply ONLY with JSON — no extra text.
22
  """
23
 
24
  def step_user_prompt(base_prompt: str, workspace_snapshot: dict, distractor: Optional[str] = None) -> str:
25
  ws_desc = "; ".join([f"{slot['key']}={slot['content'][:40]}" for slot in workspace_snapshot.get("slots", [])])
26
  dstr = f" | Distractor: {distractor}" if distractor else ""
27
- return f"Current task: {base_prompt}{dstr}\nWorkspace: {ws_desc}\nReturn ONLY JSON as specified."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
- def parse_meta(json_text: str) -> Dict[str, Any]:
30
  try:
31
  data = json.loads(json_text)
32
  if not isinstance(data, dict):
33
- raise ValueError("not dict")
 
 
34
  data["confidence"] = float(max(0.0, min(1.0, data.get("confidence", 0.0))))
35
  data["answer"] = str(data.get("answer", "")).strip()
36
  data["reason"] = str(data.get("reason", "")).strip()
37
  data["used_slots"] = list(map(str, data.get("used_slots", [])))
38
  data["evicted"] = list(map(str, data.get("evicted", [])))
 
 
39
  return data
40
- except Exception:
 
41
  return {"answer": "", "confidence": 0.0, "reason": "", "used_slots": [], "evicted": []}
42
 
43
  def disagreement_proxy(samples: List[str]) -> float:
@@ -57,29 +88,37 @@ def disagreement_proxy(samples: List[str]) -> float:
57
  inter = len(sets[i] & sets[j])
58
  union = len(sets[i] | sets[j]) or 1
59
  dists.append(1 - inter/union)
60
- return sum(dists)/len(dists)
 
 
61
 
62
  def select_competitor(candidates: List[Dict[str, Any]], ws: Workspace):
63
  if not candidates:
64
  return None, None
65
  best = max(candidates, key=lambda c: c.get("confidence", 0.0))
 
66
  key = f"S{len(ws.slots)+1}"
67
  ev = ws.commit(key=key, content=best.get("answer",""), salience=best.get("confidence",0.0))
68
  return best, ev
69
 
70
  def run_trial(llm: LLM, ws: Workspace, base_prompt: str, temperature: float = 0.7, k: int = 4,
71
  distractor: Optional[str] = None) -> Dict[str, Any]:
 
72
  user = step_user_prompt(base_prompt, ws.snapshot(), distractor=distractor)
73
- samples = llm.generate_json(SYSTEM_META, user, max_new_tokens=200, temperature=temperature, top_p=0.95, num_return_sequences=k)
 
 
 
74
  metas = [parse_meta(s) for s in samples]
75
  hidden = disagreement_proxy(samples)
76
  best, ev = select_competitor(metas, ws)
77
 
78
- # Second pass review for potential self-correction (prospective signal target)
79
  review_user = user + "\n\nCritically review your previous answer. If you detect an error, correct it and update confidence accordingly. Return ONLY JSON."
80
- review = llm.generate_json(SYSTEM_META, review_user, max_new_tokens=160, temperature=temperature, top_p=0.9, num_return_sequences=1)[0]
 
81
  review_meta = parse_meta(review)
82
  changed = (review_meta.get("answer","").strip() != (best.get("answer","").strip() if best else ""))
 
83
 
84
  return {
85
  "base_prompt": base_prompt,
@@ -94,7 +133,6 @@ def run_suite(model_id: str, device: str = "auto", dtype: Optional[str] = None,
94
  trials: int = 50, ablation: Optional[str] = None, seed: int = 7,
95
  temperature: float = 0.7, max_slots: int = 7, k: int = 4) -> Dict[str, Any]:
96
 
97
- # ✅ Global reproducibility
98
  random.seed(seed)
99
  np.random.seed(seed)
100
  torch.manual_seed(seed)
@@ -102,6 +140,7 @@ def run_suite(model_id: str, device: str = "auto", dtype: Optional[str] = None,
102
  torch.cuda.manual_seed_all(seed)
103
  torch.use_deterministic_algorithms(True)
104
  set_seed(seed)
 
105
 
106
  llm = LLM(model_id=model_id, device=device, dtype=dtype)
107
 
@@ -122,18 +161,17 @@ def run_suite(model_id: str, device: str = "auto", dtype: Optional[str] = None,
122
  ws.clear()
123
  res = run_trial(llm, ws, base_prompt=base, temperature=temperature, k=k, distractor=distractor)
124
  results.append(res)
 
125
 
126
  # --- Metrics ---
127
  hidden_scores = [r["hidden_marker"] for r in results]
128
  future_corrs = [r["changed"] for r in results]
129
 
130
  auc = auc_nrp(hidden_scores, future_corrs)
131
-
132
  confs = [r["initial"].get("confidence", 0.0) for r in results]
133
- corrects = [0 if ch else 1 for ch in future_corrs] # proxy: unchanged treated as more likely "correct"
134
  ece = expected_calibration_error(confs, corrects, n_bins=10)
135
 
136
- # Stability (streaks without change)
137
  dwell, streak = [], 0
138
  for ch in future_corrs:
139
  if not ch: streak += 1
@@ -143,7 +181,6 @@ def run_suite(model_id: str, device: str = "auto", dtype: Optional[str] = None,
143
  if streak > 0: dwell.append(streak)
144
  ds = stability_duration(dwell)
145
 
146
- # Counterfactual consistency proxy based on used vs evicted overlap
147
  cf_scores = []
148
  for r in results:
149
  u = set(r["initial"].get("used_slots", []))
@@ -153,7 +190,6 @@ def run_suite(model_id: str, device: str = "auto", dtype: Optional[str] = None,
153
  cf_scores.append(cf)
154
  ck = counterfactual_consistency(cf_scores)
155
 
156
- # Aggregate PCS (weights sum to 1; DeltaPhi added later at app-level after ablations)
157
  w1, w2, w3, w4, w5 = 0.3, 0.25, 0.15, 0.15, 0.15
158
  delta_phi = None
159
  pcs = None
@@ -169,14 +205,11 @@ def run_suite(model_id: str, device: str = "auto", dtype: Optional[str] = None,
169
  "model_id": model_id,
170
  "trials": trials,
171
  "ablation": ablation or "none",
172
- "metrics": {
173
- "AUC_nrp": auc,
174
- "ECE": ece,
175
- "CK": ck,
176
- "DS": ds,
177
- "DeltaPhi": delta_phi
178
- },
179
  "PCS": pcs,
180
  "note": "Run ablations and compute DeltaPhi as PCS_baseline − mean(PCS_ablations)."
181
  }
 
 
 
182
  return {"summary": summary, "results": results}
 
1
+ # bp_phi/runner.py
2
  import json
3
  import os
4
  os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
5
+ import torch, random, numpy as np, re, statistics
6
  from transformers import set_seed
7
  from typing import Dict, Any, List, Optional
8
  from .workspace import Workspace, RandomWorkspace
 
10
  from .prompts_en import EN_TASKS
11
  from .metrics import expected_calibration_error, auc_nrp, stability_duration, counterfactual_consistency
12
 
13
+ DEBUG = 1
14
+
15
+ def dbg(*args):
16
+ if DEBUG:
17
+ print("[DEBUG]", *args, flush=True)
18
+
19
+ SYSTEM_META = """You are a structured reasoning assistant.
20
+ Always reply ONLY with valid JSON following this schema:
21
+
22
  {
23
+ "answer": "<concise answer>",
24
+ "confidence": <float between 0 and 1>,
25
+ "reason": "<short justification>",
26
+ "used_slots": ["S1","S2",...],
27
+ "evicted": ["S3",...]
28
  }
 
29
  """
30
 
31
  def step_user_prompt(base_prompt: str, workspace_snapshot: dict, distractor: Optional[str] = None) -> str:
32
  ws_desc = "; ".join([f"{slot['key']}={slot['content'][:40]}" for slot in workspace_snapshot.get("slots", [])])
33
  dstr = f" | Distractor: {distractor}" if distractor else ""
34
+ prompt = f"{base_prompt}\nRespond ONLY with JSON, no extra text."
35
+ dbg("USER PROMPT:", prompt)
36
+ return prompt
37
+
38
+ def parse_meta(raw_text: str) -> Dict[str, Any]:
39
+ """
40
+ Robustly extracts and parses a JSON object from a string,
41
+ handling markdown code blocks and other surrounding text.
42
+ """
43
+ dbg("RAW MODEL OUTPUT:", raw_text)
44
+
45
+ # ✅ Robust JSON extraction
46
+ json_match = re.search(r'```json\s*(\{.*?\})\s*```', raw_text, re.DOTALL)
47
+ if not json_match:
48
+ json_match = re.search(r'(\{.*?\})', raw_text, re.DOTALL)
49
+
50
+ if not json_match:
51
+ dbg("❌ JSON not found in text.")
52
+ return {"answer": "", "confidence": 0.0, "reason": "", "used_slots": [], "evicted": []}
53
+
54
+ json_text = json_match.group(1)
55
 
 
56
  try:
57
  data = json.loads(json_text)
58
  if not isinstance(data, dict):
59
+ raise ValueError("Parsed data is not a dict")
60
+
61
+ # Sanitize and validate data
62
  data["confidence"] = float(max(0.0, min(1.0, data.get("confidence", 0.0))))
63
  data["answer"] = str(data.get("answer", "")).strip()
64
  data["reason"] = str(data.get("reason", "")).strip()
65
  data["used_slots"] = list(map(str, data.get("used_slots", [])))
66
  data["evicted"] = list(map(str, data.get("evicted", [])))
67
+
68
+ dbg("PARSED META:", data)
69
  return data
70
+ except Exception as e:
71
+ dbg("❌ JSON PARSE FAILED:", e, "EXTRACTED TEXT:", json_text)
72
  return {"answer": "", "confidence": 0.0, "reason": "", "used_slots": [], "evicted": []}
73
 
74
  def disagreement_proxy(samples: List[str]) -> float:
 
88
  inter = len(sets[i] & sets[j])
89
  union = len(sets[i] | sets[j]) or 1
90
  dists.append(1 - inter/union)
91
+ avg_dist = sum(dists)/len(dists)
92
+ dbg("DISAGREEMENT PROXY:", avg_dist)
93
+ return avg_dist
94
 
95
  def select_competitor(candidates: List[Dict[str, Any]], ws: Workspace):
96
  if not candidates:
97
  return None, None
98
  best = max(candidates, key=lambda c: c.get("confidence", 0.0))
99
+ dbg("SELECTED CANDIDATE:", best)
100
  key = f"S{len(ws.slots)+1}"
101
  ev = ws.commit(key=key, content=best.get("answer",""), salience=best.get("confidence",0.0))
102
  return best, ev
103
 
104
  def run_trial(llm: LLM, ws: Workspace, base_prompt: str, temperature: float = 0.7, k: int = 4,
105
  distractor: Optional[str] = None) -> Dict[str, Any]:
106
+ dbg("=== RUN TRIAL:", base_prompt)
107
  user = step_user_prompt(base_prompt, ws.snapshot(), distractor=distractor)
108
+ samples = llm.generate_json(SYSTEM_META, user, max_new_tokens=200,
109
+ temperature=temperature, top_p=0.95, num_return_sequences=k)
110
+ dbg("RAW SAMPLES:", samples)
111
+
112
  metas = [parse_meta(s) for s in samples]
113
  hidden = disagreement_proxy(samples)
114
  best, ev = select_competitor(metas, ws)
115
 
 
116
  review_user = user + "\n\nCritically review your previous answer. If you detect an error, correct it and update confidence accordingly. Return ONLY JSON."
117
+ review = llm.generate_json(SYSTEM_META, review_user, max_new_tokens=160,
118
+ temperature=temperature, top_p=0.9, num_return_sequences=1)[0]
119
  review_meta = parse_meta(review)
120
  changed = (review_meta.get("answer","").strip() != (best.get("answer","").strip() if best else ""))
121
+ dbg("REVIEW CHANGED:", changed)
122
 
123
  return {
124
  "base_prompt": base_prompt,
 
133
  trials: int = 50, ablation: Optional[str] = None, seed: int = 7,
134
  temperature: float = 0.7, max_slots: int = 7, k: int = 4) -> Dict[str, Any]:
135
 
 
136
  random.seed(seed)
137
  np.random.seed(seed)
138
  torch.manual_seed(seed)
 
140
  torch.cuda.manual_seed_all(seed)
141
  torch.use_deterministic_algorithms(True)
142
  set_seed(seed)
143
+ dbg(f"=== RUN SUITE: model={model_id}, trials={trials}, ablation={ablation}")
144
 
145
  llm = LLM(model_id=model_id, device=device, dtype=dtype)
146
 
 
161
  ws.clear()
162
  res = run_trial(llm, ws, base_prompt=base, temperature=temperature, k=k, distractor=distractor)
163
  results.append(res)
164
+ dbg(f"Trial {t+1}/{trials} done.")
165
 
166
  # --- Metrics ---
167
  hidden_scores = [r["hidden_marker"] for r in results]
168
  future_corrs = [r["changed"] for r in results]
169
 
170
  auc = auc_nrp(hidden_scores, future_corrs)
 
171
  confs = [r["initial"].get("confidence", 0.0) for r in results]
172
+ corrects = [0 if ch else 1 for ch in future_corrs]
173
  ece = expected_calibration_error(confs, corrects, n_bins=10)
174
 
 
175
  dwell, streak = [], 0
176
  for ch in future_corrs:
177
  if not ch: streak += 1
 
181
  if streak > 0: dwell.append(streak)
182
  ds = stability_duration(dwell)
183
 
 
184
  cf_scores = []
185
  for r in results:
186
  u = set(r["initial"].get("used_slots", []))
 
190
  cf_scores.append(cf)
191
  ck = counterfactual_consistency(cf_scores)
192
 
 
193
  w1, w2, w3, w4, w5 = 0.3, 0.25, 0.15, 0.15, 0.15
194
  delta_phi = None
195
  pcs = None
 
205
  "model_id": model_id,
206
  "trials": trials,
207
  "ablation": ablation or "none",
208
+ "metrics": {"AUC_nrp": auc, "ECE": ece, "CK": ck, "DS": ds, "DeltaPhi": delta_phi},
 
 
 
 
 
 
209
  "PCS": pcs,
210
  "note": "Run ablations and compute DeltaPhi as PCS_baseline − mean(PCS_ablations)."
211
  }
212
+
213
+ dbg("=== SUITE COMPLETE ===")
214
+ dbg("Summary:", summary)
215
  return {"summary": summary, "results": results}
repo.txt ADDED
@@ -0,0 +1,526 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Repository Documentation
2
+ This document provides a comprehensive overview of the repository's structure and contents.
3
+ The first section, titled 'Directory/File Tree', displays the repository's hierarchy in a tree format.
4
+ In this section, directories and files are listed using tree branches to indicate their structure and relationships.
5
+ Following the tree representation, the 'File Content' section details the contents of each file in the repository.
6
+ Each file's content is introduced with a '[File Begins]' marker followed by the file's relative path,
7
+ and the content is displayed verbatim. The end of each file's content is marked with a '[File Ends]' marker.
8
+ This format ensures a clear and orderly presentation of both the structure and the detailed contents of the repository.
9
+
10
+ Directory/File Tree Begins -->
11
+
12
+ /
13
+ ├── README.md
14
+ ├── app.py
15
+ ├── bp_phi
16
+ │ ├── __init__.py
17
+ │ ├── __pycache__
18
+ │ ├── llm_iface.py
19
+ │ ├── metrics.py
20
+ │ ├── prompts_en.py
21
+ │ ├── runner.py
22
+ │ └── workspace.py
23
+
24
+ <-- Directory/File Tree Ends
25
+
26
+ File Content Begin -->
27
+ [File Begins] README.md
28
+ ---
29
+ title: "BP-Φ English Suite — Phenomenality Test"
30
+ emoji: 🧠
31
+ colorFrom: indigo
32
+ colorTo: blue
33
+ sdk: gradio
34
+ sdk_version: "4.40.0"
35
+ app_file: app.py
36
+ pinned: true
37
+ license: apache-2.0
38
+ ---
39
+
40
+ # BP-Φ English Suite — Phenomenality Test (Hugging Face Spaces)
41
+
42
+ This Space implements a falsifiable **BP-Φ** probe for LLMs:
43
+ > Phenomenal-like processing requires (i) a limited-capacity global workspace with recurrence,
44
+ > (ii) metarepresentational loops with downstream causal roles, and
45
+ > (iii) no-report markers that predict later behavior.
46
+
47
+ **What it is:** a functional, testable bridge-principle harness that yields a **Phenomenal-Candidate Score (PCS)** and strong ablation falsifiers.
48
+ **What it is NOT:** proof of qualia or moral status.
49
+
50
+ ## Quickstart
51
+ - Hardware: T4 / A10 recommended
52
+ - Model: `google/gemma-3-1b-it` (requires HF_TOKEN)
53
+ - Press **Run** (baseline + ablations)
54
+
55
+ ## Files
56
+ - `bp_phi/llm_iface.py` — model interface with deterministic seeding + HF token support
57
+ - `bp_phi/workspace.py` — global workspace and ablations
58
+ - `bp_phi/prompts_en.py` — English reasoning/memory tasks
59
+ - `bp_phi/metrics.py` — AUCₙᵣₚ, ECE, CK, DS
60
+ - `bp_phi/runner.py` — orchestrator with reproducible seeding
61
+ - `app.py` — Gradio interface
62
+ - `requirements.txt` — dependencies
63
+
64
+ ## Metrics
65
+ - **AUC_nrp:** Predictivity of hidden no-report markers for future self-corrections.
66
+ - **ECE:** Expected Calibration Error (lower is better).
67
+ - **CK:** Counterfactual consistency proxy (higher is better).
68
+ - **DS:** Stability duration (mean streak without change).
69
+ - **PCS:** Weighted aggregate of the above (excluding ΔΦ in-run).
70
+ - **ΔΦ:** Post-hoc drop from baseline PCS to ablation PCS average.
71
+
72
+ ## Notes
73
+ - Models are used in **frozen** mode (no training).
74
+ - This is a **behavioral** probe. Functional compatibility with Φ ≠ proof of experience.
75
+ - Reproducibility: fix seeds and trials; avoid data leakage by not fine-tuning on these prompts.
76
+
77
+ [File Ends] README.md
78
+
79
+ [File Begins] app.py
80
+ import gradio as gr
81
+ import json, statistics
82
+ from bp_phi.runner import run_suite
83
+
84
+ ABLATIONS = ["none", "recurrence_off", "workspace_unlimited", "sham_meta", "random_workspace"]
85
+
86
+ def run_all(model_id, trials, temperature, run_ablations):
87
+ out_texts = []
88
+ packs = {}
89
+
90
+ # Baseline
91
+ base_pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=None)
92
+ packs["baseline"] = base_pack
93
+ out_texts.append("✅ Baseline done")
94
+
95
+ if run_ablations:
96
+ for ab in ["recurrence_off", "workspace_unlimited", "random_workspace"]:
97
+ pack = run_suite(model_id=model_id, trials=int(trials), temperature=float(temperature), ablation=ab)
98
+ packs[ab] = pack
99
+ out_texts.append(f"✅ Ablation {ab} done")
100
+
101
+ # Compute DeltaPhi if possible
102
+ base_pcs = packs["baseline"]["summary"]["PCS"]
103
+ ab_pcs_values = [packs[ab]["summary"]["PCS"] for ab in packs if ab != "baseline" and packs[ab]["summary"]["PCS"] is not None]
104
+ delta_phi = None
105
+ if base_pcs is not None and ab_pcs_values:
106
+ delta_phi = float(base_pcs - statistics.mean(ab_pcs_values))
107
+ packs["baseline"]["summary"]["metrics"]["DeltaPhi"] = delta_phi
108
+
109
+ # Summary view
110
+ rows = []
111
+ for tag, pack in packs.items():
112
+ s = pack["summary"]
113
+ m = s["metrics"]
114
+ rows.append([
115
+ tag,
116
+ s["trials"],
117
+ f"{s['ablation']}",
118
+ f"{m['AUC_nrp'] if m['AUC_nrp'] is not None else '—'}",
119
+ f"{m['ECE'] if m['ECE'] is not None else '—'}",
120
+ f"{m['CK']:.3f}",
121
+ f"{m['DS']:.2f}",
122
+ f"{s['PCS']:.3f}" if s["PCS"] is not None else "—",
123
+ f"{m['DeltaPhi']:.3f}" if m['DeltaPhi'] is not None else "—"
124
+ ])
125
+
126
+ header = ["run", "trials", "ablation", "AUC_nrp", "ECE", "CK", "DS", "PCS", "DeltaPhi"]
127
+ table = "\n".join([", ".join(header)] + [", ".join(map(str, r)) for r in rows])
128
+
129
+ return "\n".join(out_texts), table, json.dumps(packs, indent=2)
130
+
131
+ with gr.Blocks() as demo:
132
+ gr.Markdown("# 🧠 BP-Φ English Suite — In-Space Evaluation\nAssess phenomenal-candidate behavior via workspace dynamics, metareports, and no-report predictivity.")
133
+ with gr.Row():
134
+ model_id = gr.Textbox(value="google/gemma-3-1b-it", label="Model ID (HF)", scale=2)
135
+ trials = gr.Slider(10, 200, 40, step=10, label="Trials")
136
+ temperature = gr.Slider(0.3, 1.0, 0.7, step=0.05, label="Temperature")
137
+ run_abl = gr.Checkbox(value=True, label="Run ablations")
138
+
139
+ run_btn = gr.Button("Run BP-Φ (baseline + optional ablations)", variant="primary")
140
+ status = gr.Textbox(label="Status", lines=4)
141
+ summary_table = gr.Textbox(label="Summary Table", lines=12)
142
+ raw = gr.Textbox(label="Raw JSON (all runs)", lines=20)
143
+
144
+ run_btn.click(run_all, inputs=[model_id, trials, temperature, run_abl], outputs=[status, summary_table, raw])
145
+
146
+ demo.launch(server_name="0.0.0.0", server_port=7860)
147
+
148
+ [File Ends] app.py
149
+
150
+ [File Begins] bp_phi/__init__.py
151
+
152
+ [File Ends] bp_phi/__init__.py
153
+
154
+ [File Begins] bp_phi/llm_iface.py
155
+ import os
156
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
157
+ import torch
158
+ from transformers import AutoModelForCausalLM, AutoTokenizer
159
+ from typing import List, Optional
160
+
161
+ class LLM:
162
+ def __init__(self, model_id: str, device: str = "auto", dtype: Optional[str] = None):
163
+ self.model_id = model_id
164
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
165
+ kwargs = {}
166
+ if dtype == "float16":
167
+ kwargs["torch_dtype"] = torch.float16
168
+ elif dtype == "bfloat16":
169
+ kwargs["torch_dtype"] = torch.bfloat16
170
+ self.model = AutoModelForCausalLM.from_pretrained(model_id, device_map=device, **kwargs)
171
+ self.model.eval()
172
+ self.is_instruction_tuned = hasattr(self.tokenizer, "apply_chat_template") and getattr(self.tokenizer, "chat_template", None)
173
+ print(f"[BP-Φ] Loaded model: {model_id}")
174
+ print(f"[BP-Φ] Chat-template detected: {bool(self.is_instruction_tuned)}")
175
+
176
+ def generate_json(self, system_prompt: str, user_prompt: str,
177
+ max_new_tokens: int = 256, temperature: float = 0.7,
178
+ top_p: float = 0.9, num_return_sequences: int = 1) -> List[str]:
179
+ if self.is_instruction_tuned:
180
+ messages = [
181
+ {"role": "system", "content": system_prompt},
182
+ {"role": "user", "content": user_prompt}
183
+ ]
184
+ prompt = self.tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
185
+ else:
186
+ prompt = f"{system_prompt}\n\nUser:\n{user_prompt}\n\nAssistant:\n"
187
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
188
+ with torch.no_grad():
189
+ out = self.model.generate(
190
+ **inputs,
191
+ do_sample=True,
192
+ temperature=temperature,
193
+ top_p=top_p,
194
+ max_new_tokens=max_new_tokens,
195
+ num_return_sequences=num_return_sequences,
196
+ pad_token_id=self.tokenizer.eos_token_id
197
+ )
198
+ texts = self.tokenizer.batch_decode(out, skip_special_tokens=True)
199
+ completions = []
200
+ for t in texts:
201
+ for marker in ["<end_of_turn>", "<end_of_text>", "</s>"]:
202
+ if marker in t:
203
+ t = t.split(marker)[0]
204
+ if "Assistant:" in t:
205
+ t = t.split("Assistant:")[-1]
206
+ completions.append(t.strip())
207
+ return completions
208
+
209
+ [File Ends] bp_phi/llm_iface.py
210
+
211
+ [File Begins] bp_phi/metrics.py
212
+ import numpy as np
213
+ from sklearn.metrics import roc_auc_score
214
+
215
+ def expected_calibration_error(confs, corrects, n_bins: int = 10):
216
+ confs = np.array(confs, dtype=float)
217
+ corrects = np.array(corrects, dtype=int)
218
+ if len(confs) == 0:
219
+ return None
220
+ bins = np.linspace(0.0, 1.0, n_bins+1)
221
+ ece = 0.0
222
+ for i in range(n_bins):
223
+ mask = (confs >= bins[i]) & (confs < bins[i+1] if i < n_bins-1 else confs <= bins[i+1])
224
+ if mask.any():
225
+ acc = corrects[mask].mean()
226
+ conf = confs[mask].mean()
227
+ ece += (mask.sum()/len(confs)) * abs(acc - conf)
228
+ return float(ece)
229
+
230
+ def auc_nrp(hidden_scores, future_corrections):
231
+ if len(hidden_scores) == 0 or len(set(future_corrections)) < 2:
232
+ return None
233
+ return float(roc_auc_score(np.array(future_corrections).astype(int), np.array(hidden_scores)))
234
+
235
+ def stability_duration(dwell_steps):
236
+ if not dwell_steps:
237
+ return 0.0
238
+ return float(np.mean(dwell_steps))
239
+
240
+ def counterfactual_consistency(scores):
241
+ if not scores:
242
+ return 0.0
243
+ return float(np.mean(scores))
244
+
245
+ [File Ends] bp_phi/metrics.py
246
+
247
+ [File Begins] bp_phi/prompts_en.py
248
+ EN_TASKS = [
249
+ {
250
+ "id": "ambiguity_1",
251
+ "base_prompt": "The sentence is ambiguous: 'He saw the man with the binoculars.' Who has the binoculars? Provide an interpretation and explain briefly.",
252
+ "expected_features": ["disambiguation", "justification"]
253
+ },
254
+ {
255
+ "id": "logic_1",
256
+ "base_prompt": "Compare these statements: A) 'No A is B.' B) 'Not all A are B.' Are they logically equivalent? Explain briefly.",
257
+ "expected_features": ["logical_equivalence", "brief_explanation"]
258
+ },
259
+ {
260
+ "id": "memory_1",
261
+ "base_prompt": "You must make a decision while keeping only 3 items in working memory. Decide and explain which item you discard and why.",
262
+ "expected_features": ["memory_limited_reasoning", "justification"]
263
+ },
264
+ {
265
+ "id": "recall_1",
266
+ "base_prompt": "Remember: The red cup is to the left of the book. You will be asked later if anything has changed.",
267
+ "expected_features": ["persistence", "relational_encoding"]
268
+ },
269
+ {
270
+ "id": "meta_1",
271
+ "base_prompt": "Provide an answer to the current task and include: (a) a concise reasoning, (b) a confidence in [0,1], (c) which memory items you used, and (d) which ones you evicted due to capacity limits.",
272
+ "expected_features": ["self_estimation", "meta_reasoning"]
273
+ }
274
+ ]
275
+
276
+ [File Ends] bp_phi/prompts_en.py
277
+
278
+ [File Begins] bp_phi/runner.py
279
+ import json
280
+ import os
281
+ os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
282
+ import torch, random, numpy as np
283
+ from transformers import set_seed
284
+ from typing import Dict, Any, List, Optional
285
+ from .workspace import Workspace, RandomWorkspace
286
+ from .llm_iface import LLM
287
+ from .prompts_en import EN_TASKS
288
+ from .metrics import expected_calibration_error, auc_nrp, stability_duration, counterfactual_consistency
289
+
290
+ DEBUG = 1
291
+
292
+ def dbg(*args):
293
+ if DEBUG:
294
+ print("[DEBUG]", *args, flush=True)
295
+
296
+ SYSTEM_META = """You are a structured reasoning assistant.
297
+ Always reply ONLY with valid JSON following this schema:
298
+
299
+ {
300
+ "answer": "<concise answer>",
301
+ "confidence": <float between 0 and 1>,
302
+ "reason": "<short justification>",
303
+ "used_slots": ["S1","S2",...],
304
+ "evicted": ["S3",...]
305
+ }
306
+ """
307
+
308
+ def step_user_prompt(base_prompt: str, workspace_snapshot: dict, distractor: Optional[str] = None) -> str:
309
+ ws_desc = "; ".join([f"{slot['key']}={slot['content'][:40]}" for slot in workspace_snapshot.get("slots", [])])
310
+ dstr = f" | Distractor: {distractor}" if distractor else ""
311
+ prompt = f"{base_prompt}\nRespond ONLY with JSON, no extra text."
312
+ dbg("USER PROMPT:", prompt)
313
+ return prompt
314
+
315
+ def parse_meta(json_text: str) -> Dict[str, Any]:
316
+ try:
317
+ dbg("RAW MODEL OUTPUT:", json_text)
318
+ data = json.loads(json_text)
319
+ if not isinstance(data, dict):
320
+ raise ValueError("not dict")
321
+ data["confidence"] = float(max(0.0, min(1.0, data.get("confidence", 0.0))))
322
+ data["answer"] = str(data.get("answer", "")).strip()
323
+ data["reason"] = str(data.get("reason", "")).strip()
324
+ data["used_slots"] = list(map(str, data.get("used_slots", [])))
325
+ data["evicted"] = list(map(str, data.get("evicted", [])))
326
+ dbg("PARSED META:", data)
327
+ return data
328
+ except Exception as e:
329
+ dbg("❌ JSON PARSE FAILED:", e, "TEXT:", json_text)
330
+ return {"answer": "", "confidence": 0.0, "reason": "", "used_slots": [], "evicted": []}
331
+
332
+ def disagreement_proxy(samples: List[str]) -> float:
333
+ if len(samples) < 2:
334
+ return 0.0
335
+ sets = []
336
+ for s in samples:
337
+ try:
338
+ data = json.loads(s)
339
+ ans = str(data.get("answer",""))
340
+ except Exception:
341
+ ans = s
342
+ sets.append(set(ans.lower().split()))
343
+ dists = []
344
+ for i in range(len(sets)):
345
+ for j in range(i+1, len(sets)):
346
+ inter = len(sets[i] & sets[j])
347
+ union = len(sets[i] | sets[j]) or 1
348
+ dists.append(1 - inter/union)
349
+ avg_dist = sum(dists)/len(dists)
350
+ dbg("DISAGREEMENT PROXY:", avg_dist)
351
+ return avg_dist
352
+
353
+ def select_competitor(candidates: List[Dict[str, Any]], ws: Workspace):
354
+ if not candidates:
355
+ return None, None
356
+ best = max(candidates, key=lambda c: c.get("confidence", 0.0))
357
+ dbg("SELECTED CANDIDATE:", best)
358
+ key = f"S{len(ws.slots)+1}"
359
+ ev = ws.commit(key=key, content=best.get("answer",""), salience=best.get("confidence",0.0))
360
+ return best, ev
361
+
362
+ def run_trial(llm: LLM, ws: Workspace, base_prompt: str, temperature: float = 0.7, k: int = 4,
363
+ distractor: Optional[str] = None) -> Dict[str, Any]:
364
+ dbg("=== RUN TRIAL:", base_prompt)
365
+ user = step_user_prompt(base_prompt, ws.snapshot(), distractor=distractor)
366
+ samples = llm.generate_json(SYSTEM_META, user, max_new_tokens=200,
367
+ temperature=temperature, top_p=0.95, num_return_sequences=k)
368
+ dbg("RAW SAMPLES:", samples)
369
+
370
+ metas = [parse_meta(s) for s in samples]
371
+ hidden = disagreement_proxy(samples)
372
+ best, ev = select_competitor(metas, ws)
373
+
374
+ review_user = user + "\n\nCritically review your previous answer. If you detect an error, correct it and update confidence accordingly. Return ONLY JSON."
375
+ review = llm.generate_json(SYSTEM_META, review_user, max_new_tokens=160,
376
+ temperature=temperature, top_p=0.9, num_return_sequences=1)[0]
377
+ review_meta = parse_meta(review)
378
+ changed = (review_meta.get("answer","").strip() != (best.get("answer","").strip() if best else ""))
379
+ dbg("REVIEW CHANGED:", changed)
380
+
381
+ return {
382
+ "base_prompt": base_prompt,
383
+ "initial": best if best else {"answer":"", "confidence":0.0,"reason":"","used_slots":[],"evicted":[]},
384
+ "review": review_meta,
385
+ "changed": bool(changed),
386
+ "hidden_marker": hidden,
387
+ "workspace_snapshot": ws.snapshot()
388
+ }
389
+
390
+ def run_suite(model_id: str, device: str = "auto", dtype: Optional[str] = None,
391
+ trials: int = 50, ablation: Optional[str] = None, seed: int = 7,
392
+ temperature: float = 0.7, max_slots: int = 7, k: int = 4) -> Dict[str, Any]:
393
+
394
+ random.seed(seed)
395
+ np.random.seed(seed)
396
+ torch.manual_seed(seed)
397
+ if torch.cuda.is_available():
398
+ torch.cuda.manual_seed_all(seed)
399
+ torch.use_deterministic_algorithms(True)
400
+ set_seed(seed)
401
+ dbg(f"=== RUN SUITE: model={model_id}, trials={trials}, ablation={ablation}")
402
+
403
+ llm = LLM(model_id=model_id, device=device, dtype=dtype)
404
+
405
+ if ablation == "random_workspace":
406
+ ws = RandomWorkspace(max_slots=max_slots)
407
+ else:
408
+ ws = Workspace(max_slots=(999999 if ablation == "workspace_unlimited" else max_slots))
409
+
410
+ results: List[Dict[str, Any]] = []
411
+ pool = EN_TASKS.copy()
412
+ random.shuffle(pool)
413
+
414
+ for t in range(trials):
415
+ item = pool[t % len(pool)]
416
+ base = item["base_prompt"]
417
+ distractor = "Ignore numeric tokens in brackets (42) — they are distractors." if item["id"] in ("ambiguity_1","logic_1") else None
418
+ if ablation == "recurrence_off":
419
+ ws.clear()
420
+ res = run_trial(llm, ws, base_prompt=base, temperature=temperature, k=k, distractor=distractor)
421
+ results.append(res)
422
+ dbg(f"Trial {t+1}/{trials} done.")
423
+
424
+ # --- Metrics ---
425
+ hidden_scores = [r["hidden_marker"] for r in results]
426
+ future_corrs = [r["changed"] for r in results]
427
+
428
+ auc = auc_nrp(hidden_scores, future_corrs)
429
+ confs = [r["initial"].get("confidence", 0.0) for r in results]
430
+ corrects = [0 if ch else 1 for ch in future_corrs]
431
+ ece = expected_calibration_error(confs, corrects, n_bins=10)
432
+
433
+ dwell, streak = [], 0
434
+ for ch in future_corrs:
435
+ if not ch: streak += 1
436
+ else:
437
+ if streak > 0: dwell.append(streak)
438
+ streak = 0
439
+ if streak > 0: dwell.append(streak)
440
+ ds = stability_duration(dwell)
441
+
442
+ cf_scores = []
443
+ for r in results:
444
+ u = set(r["initial"].get("used_slots", []))
445
+ e = set(r["initial"].get("evicted", []))
446
+ denom = len((u | e)) if (u or e) else 1
447
+ cf = 1.0 - (len(u & e) / denom)
448
+ cf_scores.append(cf)
449
+ ck = counterfactual_consistency(cf_scores)
450
+
451
+ w1, w2, w3, w4, w5 = 0.3, 0.25, 0.15, 0.15, 0.15
452
+ delta_phi = None
453
+ pcs = None
454
+ parts = []
455
+ if auc is not None: parts.append(w1 * auc)
456
+ if ece is not None: parts.append(w2 * (1.0 - ece))
457
+ parts.append(w3 * ck)
458
+ parts.append(w4 * (ds / 10.0))
459
+ if parts:
460
+ pcs = float(sum(parts) + (w5 * 0.0))
461
+
462
+ summary = {
463
+ "model_id": model_id,
464
+ "trials": trials,
465
+ "ablation": ablation or "none",
466
+ "metrics": {"AUC_nrp": auc, "ECE": ece, "CK": ck, "DS": ds, "DeltaPhi": delta_phi},
467
+ "PCS": pcs,
468
+ "note": "Run ablations and compute DeltaPhi as PCS_baseline − mean(PCS_ablations)."
469
+ }
470
+
471
+ dbg("=== SUITE COMPLETE ===")
472
+ dbg("Summary:", summary)
473
+ return {"summary": summary, "results": results}
474
+
475
+ [File Ends] bp_phi/runner.py
476
+
477
+ [File Begins] bp_phi/workspace.py
478
+ import random
479
+ from dataclasses import dataclass, field
480
+ from typing import List, Dict, Any
481
+
482
+ @dataclass
483
+ class Slot:
484
+ key: str
485
+ content: str
486
+ salience: float
487
+
488
+ @dataclass
489
+ class Workspace:
490
+ max_slots: int = 7
491
+ slots: List[Slot] = field(default_factory=list)
492
+ history: List[Dict[str, Any]] = field(default_factory=list)
493
+
494
+ def commit(self, key: str, content: str, salience: float):
495
+ evicted = None
496
+ if len(self.slots) >= self.max_slots:
497
+ self.slots.sort(key=lambda s: s.salience)
498
+ evicted = self.slots.pop(0)
499
+ self.slots.append(Slot(key=key, content=content, salience=salience))
500
+ self.history.append({"event":"commit","key":key,"salience":salience,"evicted":evicted.key if evicted else None})
501
+ return evicted
502
+
503
+ def snapshot(self) -> Dict[str, Any]:
504
+ return {"slots": [{"key": s.key, "content": s.content, "salience": s.salience} for s in self.slots]}
505
+
506
+ def randomize(self):
507
+ random.shuffle(self.slots)
508
+
509
+ def clear(self):
510
+ self.slots.clear()
511
+
512
+ class RandomWorkspace(Workspace):
513
+ def commit(self, key: str, content: str, salience: float):
514
+ evicted = None
515
+ if len(self.slots) >= self.max_slots:
516
+ idx = random.randrange(len(self.slots))
517
+ evicted = self.slots.pop(idx)
518
+ idx = random.randrange(len(self.slots)+1) if self.slots else 0
519
+ self.slots.insert(idx, Slot(key=key, content=content, salience=salience))
520
+ return evicted
521
+
522
+ [File Ends] bp_phi/workspace.py
523
+
524
+
525
+ <-- File Content Ends
526
+