Spaces:

codelion
/

LogProbsVisualizer

Sleeping

App Files Files Community

codelion commited on Feb 26

Commit

485d05c

verified ·

1 Parent(s): d38b65e

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -7

app.py CHANGED Viewed

@@ -238,15 +238,58 @@ def visualize_logprobs(json_input, chunk=0, chunk_size=100):
 def analyze_confidence_signature(logprobs, tokens):
     if not logprobs or not tokens:
         return "No data for confidence signature analysis.", None
-    top_probs = [lps[0][1] if lps and lps[0][1] is not None else -float('inf') for lps in logprobs]  # Extract top probability, handle empty or None
     if not any(p != -float('inf') for p in top_probs):
         return "No valid log probabilities for confidence analysis.", None
-    moving_avg = np.convolve(top_probs, np.ones(20) / 20, mode='valid')  # 20-token window
-    drops = np.where(np.diff(moving_avg) < -0.15)[0]  # Significant drops
-    if not drops.size:
-        return "No significant confidence drops detected.", None
-    drop_positions = [(i, tokens[i + 19] if i + 19 < len(tokens) else "End of trace") for i in drops]  # Adjust for convolution window
-    return "Significant confidence drops detected at positions:", drop_positions
 def detect_interpretation_pivots(logprobs, tokens):
     if not logprobs or not tokens:

 def analyze_confidence_signature(logprobs, tokens):
     if not logprobs or not tokens:
         return "No data for confidence signature analysis.", None
+    # Extract top probabilities
+    top_probs = [lps[0][1] if lps and lps[0][1] is not None else -float('inf') for lps in logprobs]
     if not any(p != -float('inf') for p in top_probs):
         return "No valid log probabilities for confidence analysis.", None
+    # Use a larger window for smoother trends
+    window_size = 30  # Increased from 20
+    moving_avg = np.convolve(top_probs, np.ones(window_size) / window_size, mode='valid')
+    # Calculate drop magnitudes
+    drops = np.diff(moving_avg)
+    # Use adaptive thresholding - only flag drops in the bottom 5% of all changes
+    drop_threshold = np.percentile(drops, 5)  # More selective
+    significant_drops = np.where(drops < drop_threshold)[0]
+    # Cluster nearby drops (within 10 tokens) to avoid reporting multiple points in the same reasoning shift
+    if len(significant_drops) > 0:
+        clustered_drops = [significant_drops[0]]
+        for drop in significant_drops[1:]:
+            if drop - clustered_drops[-1] > 10:  # At least 10 tokens apart
+                clustered_drops.append(drop)
+    else:
+        clustered_drops = []
+    # Look for context markers near drops
+    filtered_drops = []
+    reasoning_markers = ["therefore", "thus", "so", "hence", "wait", "but", "however", "actually"]
+    for drop in clustered_drops:
+        # Adjust index for convolution window
+        token_idx = drop + window_size - 1
+        # Check surrounding context (10 tokens before and after)
+        start_idx = max(0, token_idx - 10)
+        end_idx = min(len(tokens), token_idx + 10)
+        context = " ".join(tokens[start_idx:end_idx])
+        # Only keep drops near reasoning transition markers
+        if any(marker in context.lower() for marker in reasoning_markers):
+            drop_magnitude = drops[drop]
+            filtered_drops.append((token_idx, drop_magnitude, tokens[token_idx] if token_idx < len(tokens) else "End of trace"))
+    # Sort by drop magnitude (largest drops first)
+    filtered_drops.sort(key=lambda x: x[1])
+    if not filtered_drops:
+        return "No significant confidence shifts at reasoning transitions detected.", None
+    # Return at most 3 most significant drops
+    return "Significant confidence shifts detected at reasoning transitions:", filtered_drops[:3]
 def detect_interpretation_pivots(logprobs, tokens):
     if not logprobs or not tokens: