Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -308,25 +308,93 @@ def detect_interpretation_pivots(logprobs, tokens):
|
|
| 308 |
return "No interpretation pivots detected.", None
|
| 309 |
return "Interpretation pivots detected:", pivots
|
| 310 |
|
| 311 |
-
def calculate_decision_entropy(logprobs):
|
| 312 |
if not logprobs:
|
| 313 |
return "No data for entropy spike detection.", None
|
|
|
|
|
|
|
| 314 |
entropies = []
|
| 315 |
for lps in logprobs:
|
| 316 |
-
if not lps:
|
| 317 |
entropies.append(0.0)
|
| 318 |
continue
|
| 319 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 320 |
if not probs or sum(probs) == 0:
|
| 321 |
entropies.append(0.0)
|
| 322 |
continue
|
| 323 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
entropies.append(entropy)
|
| 325 |
-
|
| 326 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 327 |
if not spikes:
|
| 328 |
-
return "No entropy spikes detected at decision points.", None
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
def analyze_conclusion_competition(logprobs, tokens):
|
| 332 |
if not logprobs or not tokens:
|
|
|
|
| 308 |
return "No interpretation pivots detected.", None
|
| 309 |
return "Interpretation pivots detected:", pivots
|
| 310 |
|
| 311 |
+
def calculate_decision_entropy(logprobs, tokens=None):
|
| 312 |
if not logprobs:
|
| 313 |
return "No data for entropy spike detection.", None
|
| 314 |
+
|
| 315 |
+
# Calculate entropy at each position
|
| 316 |
entropies = []
|
| 317 |
for lps in logprobs:
|
| 318 |
+
if not lps or len(lps) < 2: # Need at least two tokens for meaningful entropy
|
| 319 |
entropies.append(0.0)
|
| 320 |
continue
|
| 321 |
+
|
| 322 |
+
# Only use top-5 tokens for entropy calculation to reduce noise
|
| 323 |
+
top_k = min(5, len(lps))
|
| 324 |
+
probs = [math.exp(p) for _, p in lps[:top_k] if p is not None]
|
| 325 |
+
|
| 326 |
+
# Normalize probabilities to sum to 1
|
| 327 |
if not probs or sum(probs) == 0:
|
| 328 |
entropies.append(0.0)
|
| 329 |
continue
|
| 330 |
+
|
| 331 |
+
prob_sum = sum(probs)
|
| 332 |
+
normalized_probs = [p/prob_sum for p in probs]
|
| 333 |
+
|
| 334 |
+
entropy = -sum(p * math.log(p) for p in normalized_probs if p > 0)
|
| 335 |
entropies.append(entropy)
|
| 336 |
+
|
| 337 |
+
# Smooth entropy values with moving average
|
| 338 |
+
window_size = 15
|
| 339 |
+
if len(entropies) >= window_size:
|
| 340 |
+
smoothed_entropies = np.convolve(entropies, np.ones(window_size)/window_size, mode='valid')
|
| 341 |
+
else:
|
| 342 |
+
smoothed_entropies = entropies
|
| 343 |
+
|
| 344 |
+
# More selective threshold - 90th percentile and 2x multiplier
|
| 345 |
+
baseline = np.percentile(smoothed_entropies, 90) if smoothed_entropies.size > 0 else 0.0
|
| 346 |
+
|
| 347 |
+
# Find significant spikes (much more selective)
|
| 348 |
+
spikes = []
|
| 349 |
+
if baseline > 0:
|
| 350 |
+
raw_spikes = np.where(smoothed_entropies > baseline * 2.0)[0]
|
| 351 |
+
|
| 352 |
+
# Cluster nearby spikes (within 20 tokens)
|
| 353 |
+
if raw_spikes.size > 0:
|
| 354 |
+
spikes = [raw_spikes[0]]
|
| 355 |
+
for spike in raw_spikes[1:]:
|
| 356 |
+
if spike - spikes[-1] > 20:
|
| 357 |
+
spikes.append(spike)
|
| 358 |
+
|
| 359 |
+
# If we have token information, check context around spikes
|
| 360 |
+
if tokens and spikes:
|
| 361 |
+
context_spikes = []
|
| 362 |
+
decision_markers = ["therefore", "thus", "so", "hence", "because",
|
| 363 |
+
"wait", "but", "however", "actually", "instead"]
|
| 364 |
+
|
| 365 |
+
for spike in spikes:
|
| 366 |
+
# Adjust index for convolution window if using smoothed values
|
| 367 |
+
spike_idx = spike + window_size//2 if len(entropies) >= window_size else spike
|
| 368 |
+
|
| 369 |
+
if spike_idx >= len(tokens):
|
| 370 |
+
continue
|
| 371 |
+
|
| 372 |
+
# Check surrounding context (15 tokens before and after)
|
| 373 |
+
start_idx = max(0, spike_idx - 15)
|
| 374 |
+
end_idx = min(len(tokens), spike_idx + 15)
|
| 375 |
+
|
| 376 |
+
if end_idx <= start_idx:
|
| 377 |
+
continue
|
| 378 |
+
|
| 379 |
+
context = " ".join(tokens[start_idx:end_idx])
|
| 380 |
+
|
| 381 |
+
# Only keep spikes near reasoning transitions
|
| 382 |
+
if any(marker in context.lower() for marker in decision_markers):
|
| 383 |
+
entropy_value = smoothed_entropies[spike - window_size//2] if len(entropies) >= window_size else entropies[spike]
|
| 384 |
+
context_spikes.append((spike_idx, entropy_value, tokens[spike_idx] if spike_idx < len(tokens) else "End"))
|
| 385 |
+
|
| 386 |
+
spikes = context_spikes
|
| 387 |
+
|
| 388 |
+
# Return at most 3 most significant spikes
|
| 389 |
if not spikes:
|
| 390 |
+
return "No significant entropy spikes detected at decision points.", None
|
| 391 |
+
|
| 392 |
+
# Sort by entropy value (highest first) if we have context information
|
| 393 |
+
if tokens and spikes:
|
| 394 |
+
spikes.sort(key=lambda x: x[1], reverse=True)
|
| 395 |
+
return "Significant entropy spikes detected at positions:", spikes[:3]
|
| 396 |
+
|
| 397 |
+
return "Entropy spikes detected at positions:", spikes[:3]
|
| 398 |
|
| 399 |
def analyze_conclusion_competition(logprobs, tokens):
|
| 400 |
if not logprobs or not tokens:
|