Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,8 +8,6 @@ import math
|
|
| 8 |
import logging
|
| 9 |
import numpy as np
|
| 10 |
import plotly.graph_objects as go
|
| 11 |
-
import asyncio
|
| 12 |
-
import threading
|
| 13 |
|
| 14 |
# Set up logging
|
| 15 |
logging.basicConfig(level=logging.DEBUG)
|
|
@@ -30,7 +28,7 @@ def parse_input(json_input):
|
|
| 30 |
def ensure_float(value):
|
| 31 |
if value is None:
|
| 32 |
logger.debug("Replacing None logprob with 0.0")
|
| 33 |
-
return 0.0 # Default to 0.0 for None
|
| 34 |
if isinstance(value, str):
|
| 35 |
try:
|
| 36 |
return float(value)
|
|
@@ -52,8 +50,8 @@ def get_token(entry):
|
|
| 52 |
def create_empty_figure(title):
|
| 53 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
| 54 |
|
| 55 |
-
# Precompute the next chunk
|
| 56 |
-
|
| 57 |
try:
|
| 58 |
data = parse_input(json_input)
|
| 59 |
content = data.get("content", []) if isinstance(data, dict) else data
|
|
@@ -71,10 +69,7 @@ async def precompute_chunk(json_input, chunk_size, current_chunk):
|
|
| 71 |
if logprob >= -100000: # Include all entries with default 0.0
|
| 72 |
tokens.append(get_token(entry))
|
| 73 |
logprobs.append(logprob)
|
| 74 |
-
top_probs = entry.get("top_logprobs", {})
|
| 75 |
-
if top_probs is None:
|
| 76 |
-
logger.debug("top_logprobs is None for token: %s, using empty dict", get_token(entry))
|
| 77 |
-
top_probs = {}
|
| 78 |
finite_top_probs = []
|
| 79 |
for key, value in top_probs.items():
|
| 80 |
float_value = ensure_float(value)
|
|
@@ -92,28 +87,11 @@ async def precompute_chunk(json_input, chunk_size, current_chunk):
|
|
| 92 |
if start_idx >= len(tokens):
|
| 93 |
return None, None, None
|
| 94 |
|
| 95 |
-
|
| 96 |
-
paginated_logprobs = logprobs[start_idx:end_idx]
|
| 97 |
-
paginated_alternatives = top_alternatives[start_idx:end_idx]
|
| 98 |
-
|
| 99 |
-
return paginated_tokens, paginated_logprobs, paginated_alternatives
|
| 100 |
except Exception as e:
|
| 101 |
logger.error("Precomputation failed for chunk %d: %s", current_chunk + 1, str(e))
|
| 102 |
return None, None, None
|
| 103 |
|
| 104 |
-
# Synchronous wrapper for precomputation using threading
|
| 105 |
-
def precompute_next_chunk_sync(json_input, current_chunk):
|
| 106 |
-
loop = asyncio.new_event_loop()
|
| 107 |
-
asyncio.set_event_loop(loop)
|
| 108 |
-
try:
|
| 109 |
-
result = loop.run_until_complete(precompute_chunk(json_input, 100, current_chunk))
|
| 110 |
-
except Exception as e:
|
| 111 |
-
logger.error("Precomputation error: %s", str(e))
|
| 112 |
-
result = None, None, None
|
| 113 |
-
finally:
|
| 114 |
-
loop.close()
|
| 115 |
-
return result
|
| 116 |
-
|
| 117 |
# Function to process and visualize a chunk of log probs with dynamic top_logprobs
|
| 118 |
def visualize_logprobs(json_input, chunk=0, chunk_size=100):
|
| 119 |
try:
|
|
@@ -260,14 +238,14 @@ def visualize_logprobs(json_input, chunk=0, chunk_size=100):
|
|
| 260 |
def analyze_confidence_signature(logprobs, tokens):
|
| 261 |
if not logprobs or not tokens:
|
| 262 |
return "No data for confidence signature analysis.", None
|
| 263 |
-
top_probs = [lps[0][1] if lps and lps[0][1] is not None else -float('inf') for lps in logprobs] #
|
| 264 |
if not any(p != -float('inf') for p in top_probs):
|
| 265 |
return "No valid log probabilities for confidence analysis.", None
|
| 266 |
moving_avg = np.convolve(top_probs, np.ones(20) / 20, mode='valid') # 20-token window
|
| 267 |
drops = np.where(np.diff(moving_avg) < -0.15)[0] # Significant drops
|
| 268 |
if not drops.size:
|
| 269 |
return "No significant confidence drops detected.", None
|
| 270 |
-
drop_positions = [(i, tokens[i + 19] if i + 19 < len(tokens) else "End of trace") for i in drops]
|
| 271 |
return "Significant confidence drops detected at positions:", drop_positions
|
| 272 |
|
| 273 |
def detect_interpretation_pivots(logprobs, tokens):
|
|
@@ -420,7 +398,7 @@ def analyze_full_trace(json_input):
|
|
| 420 |
try:
|
| 421 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
| 422 |
gr.Markdown("# Log Probability Visualizer")
|
| 423 |
-
gr.Markdown("Paste your JSON log prob data below to analyze reasoning traces or visualize tokens in chunks of 100. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing or null fields.
|
| 424 |
|
| 425 |
with gr.Tabs():
|
| 426 |
with gr.Tab("Trace Analysis"):
|
|
@@ -475,6 +453,9 @@ try:
|
|
| 475 |
outputs=[plot_output, table_output, text_output, alt_viz_output, drops_output, total_chunks_output, chunk],
|
| 476 |
)
|
| 477 |
|
|
|
|
|
|
|
|
|
|
| 478 |
def update_chunk(json_input, current_chunk, action, precomputed_next=None):
|
| 479 |
total_chunks = visualize_logprobs(json_input, 0)[5] # Get total chunks
|
| 480 |
if action == "prev" and current_chunk > 0:
|
|
@@ -500,7 +481,8 @@ try:
|
|
| 500 |
|
| 501 |
def trigger_precomputation(json_input, current_chunk):
|
| 502 |
try:
|
| 503 |
-
|
|
|
|
| 504 |
except Exception as e:
|
| 505 |
logger.error("Precomputation trigger failed: %s", str(e))
|
| 506 |
return gr.update(value=current_chunk)
|
|
|
|
| 8 |
import logging
|
| 9 |
import numpy as np
|
| 10 |
import plotly.graph_objects as go
|
|
|
|
|
|
|
| 11 |
|
| 12 |
# Set up logging
|
| 13 |
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
| 28 |
def ensure_float(value):
|
| 29 |
if value is None:
|
| 30 |
logger.debug("Replacing None logprob with 0.0")
|
| 31 |
+
return 0.0 # Default to 0.0 for None
|
| 32 |
if isinstance(value, str):
|
| 33 |
try:
|
| 34 |
return float(value)
|
|
|
|
| 50 |
def create_empty_figure(title):
|
| 51 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
| 52 |
|
| 53 |
+
# Precompute the next chunk (synchronous for Hugging Face Spaces)
|
| 54 |
+
def precompute_chunk(json_input, chunk_size, current_chunk):
|
| 55 |
try:
|
| 56 |
data = parse_input(json_input)
|
| 57 |
content = data.get("content", []) if isinstance(data, dict) else data
|
|
|
|
| 69 |
if logprob >= -100000: # Include all entries with default 0.0
|
| 70 |
tokens.append(get_token(entry))
|
| 71 |
logprobs.append(logprob)
|
| 72 |
+
top_probs = entry.get("top_logprobs", {}) or {}
|
|
|
|
|
|
|
|
|
|
| 73 |
finite_top_probs = []
|
| 74 |
for key, value in top_probs.items():
|
| 75 |
float_value = ensure_float(value)
|
|
|
|
| 87 |
if start_idx >= len(tokens):
|
| 88 |
return None, None, None
|
| 89 |
|
| 90 |
+
return tokens[start_idx:end_idx], logprobs[start_idx:end_idx], top_alternatives[start_idx:end_idx]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
except Exception as e:
|
| 92 |
logger.error("Precomputation failed for chunk %d: %s", current_chunk + 1, str(e))
|
| 93 |
return None, None, None
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
# Function to process and visualize a chunk of log probs with dynamic top_logprobs
|
| 96 |
def visualize_logprobs(json_input, chunk=0, chunk_size=100):
|
| 97 |
try:
|
|
|
|
| 238 |
def analyze_confidence_signature(logprobs, tokens):
|
| 239 |
if not logprobs or not tokens:
|
| 240 |
return "No data for confidence signature analysis.", None
|
| 241 |
+
top_probs = [lps[0][1] if lps and lps[0][1] is not None else -float('inf') for lps in logprobs] # Extract top probability, handle empty or None
|
| 242 |
if not any(p != -float('inf') for p in top_probs):
|
| 243 |
return "No valid log probabilities for confidence analysis.", None
|
| 244 |
moving_avg = np.convolve(top_probs, np.ones(20) / 20, mode='valid') # 20-token window
|
| 245 |
drops = np.where(np.diff(moving_avg) < -0.15)[0] # Significant drops
|
| 246 |
if not drops.size:
|
| 247 |
return "No significant confidence drops detected.", None
|
| 248 |
+
drop_positions = [(i, tokens[i + 19] if i + 19 < len(tokens) else "End of trace") for i in drops] # Adjust for convolution window
|
| 249 |
return "Significant confidence drops detected at positions:", drop_positions
|
| 250 |
|
| 251 |
def detect_interpretation_pivots(logprobs, tokens):
|
|
|
|
| 398 |
try:
|
| 399 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
| 400 |
gr.Markdown("# Log Probability Visualizer")
|
| 401 |
+
gr.Markdown("Paste your JSON log prob data below to analyze reasoning traces or visualize tokens in chunks of 100. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing or null fields.")
|
| 402 |
|
| 403 |
with gr.Tabs():
|
| 404 |
with gr.Tab("Trace Analysis"):
|
|
|
|
| 453 |
outputs=[plot_output, table_output, text_output, alt_viz_output, drops_output, total_chunks_output, chunk],
|
| 454 |
)
|
| 455 |
|
| 456 |
+
def precompute_next_chunk(json_input, current_chunk):
|
| 457 |
+
return precompute_chunk(json_input, 100, current_chunk)
|
| 458 |
+
|
| 459 |
def update_chunk(json_input, current_chunk, action, precomputed_next=None):
|
| 460 |
total_chunks = visualize_logprobs(json_input, 0)[5] # Get total chunks
|
| 461 |
if action == "prev" and current_chunk > 0:
|
|
|
|
| 481 |
|
| 482 |
def trigger_precomputation(json_input, current_chunk):
|
| 483 |
try:
|
| 484 |
+
precomputed = precompute_next_chunk(json_input, current_chunk)
|
| 485 |
+
precomputed_next.value = precomputed # Update state directly
|
| 486 |
except Exception as e:
|
| 487 |
logger.error("Precomputation trigger failed: %s", str(e))
|
| 488 |
return gr.update(value=current_chunk)
|