Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -23,9 +23,9 @@ def parse_input(json_input):
|
|
| 23 |
logger.debug("Successfully parsed as JSON")
|
| 24 |
return data
|
| 25 |
except json.JSONDecodeError as e:
|
| 26 |
-
logger.error("JSON parsing failed: %s", str(e))
|
| 27 |
try:
|
| 28 |
-
# If JSON fails, try to parse as Python literal (e.g., with single quotes)
|
| 29 |
data = ast.literal_eval(json_input)
|
| 30 |
logger.debug("Successfully parsed as Python literal")
|
| 31 |
# Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
|
|
@@ -40,8 +40,8 @@ def parse_input(json_input):
|
|
| 40 |
logger.debug("Converted to JSON-compatible format")
|
| 41 |
return converted_data
|
| 42 |
except (SyntaxError, ValueError) as e:
|
| 43 |
-
logger.error("Python literal parsing failed: %s", str(e))
|
| 44 |
-
raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\")
|
| 45 |
|
| 46 |
# Function to ensure a value is a float, converting from string if necessary
|
| 47 |
def ensure_float(value):
|
|
@@ -69,27 +69,32 @@ def get_token(entry):
|
|
| 69 |
def create_empty_figure(title):
|
| 70 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
| 71 |
|
| 72 |
-
# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens
|
| 73 |
def visualize_logprobs(json_input):
|
| 74 |
try:
|
| 75 |
-
# Parse the input (handles
|
| 76 |
data = parse_input(json_input)
|
| 77 |
|
| 78 |
-
# Ensure data is a
|
| 79 |
if isinstance(data, dict) and "content" in data:
|
| 80 |
content = data["content"]
|
|
|
|
|
|
|
| 81 |
elif isinstance(data, list):
|
| 82 |
-
content = data
|
| 83 |
else:
|
| 84 |
-
raise ValueError("Input must be a
|
| 85 |
|
| 86 |
# Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
|
| 87 |
tokens = []
|
| 88 |
logprobs = []
|
| 89 |
top_alternatives = [] # List to store all top_logprobs (dynamic length)
|
| 90 |
for entry in content:
|
|
|
|
|
|
|
|
|
|
| 91 |
logprob = ensure_float(entry.get("logprob", None))
|
| 92 |
-
if
|
| 93 |
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
| 94 |
tokens.append(token)
|
| 95 |
logprobs.append(logprob)
|
|
@@ -110,9 +115,9 @@ def visualize_logprobs(json_input):
|
|
| 110 |
else:
|
| 111 |
logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
|
| 112 |
|
| 113 |
-
# Check if there's valid data after filtering
|
| 114 |
if not logprobs or not tokens:
|
| 115 |
-
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No
|
| 116 |
|
| 117 |
# 1. Main Log Probability Plot (Interactive Plotly)
|
| 118 |
main_fig = go.Figure()
|
|
@@ -152,8 +157,10 @@ def visualize_logprobs(json_input):
|
|
| 152 |
table_data = []
|
| 153 |
max_alternatives = max(len(alts) for alts in top_alternatives) if top_alternatives else 0
|
| 154 |
for i, entry in enumerate(content):
|
|
|
|
|
|
|
| 155 |
logprob = ensure_float(entry.get("logprob", None))
|
| 156 |
-
if
|
| 157 |
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
| 158 |
top_logprobs = entry.get("top_logprobs", {})
|
| 159 |
if top_logprobs is None:
|
|
@@ -206,7 +213,7 @@ def visualize_logprobs(json_input):
|
|
| 206 |
colored_text += " "
|
| 207 |
colored_text_html = f"<p>{colored_text}</p>"
|
| 208 |
else:
|
| 209 |
-
colored_text_html = "No
|
| 210 |
|
| 211 |
# Top Token Log Probabilities (Interactive Plotly, dynamic length)
|
| 212 |
alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure()
|
|
@@ -230,21 +237,21 @@ def visualize_logprobs(json_input):
|
|
| 230 |
return (main_fig, df, colored_text_html, alt_viz_fig, drops_fig)
|
| 231 |
|
| 232 |
except Exception as e:
|
| 233 |
-
logger.error("Visualization failed: %s", str(e))
|
| 234 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
| 235 |
|
| 236 |
-
# Gradio interface with full dataset visualization, dynamic top_logprobs, and
|
| 237 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
| 238 |
gr.Markdown("# Log Probability Visualizer")
|
| 239 |
gr.Markdown(
|
| 240 |
-
"Paste your JSON
|
| 241 |
)
|
| 242 |
|
| 243 |
with gr.Row():
|
| 244 |
json_input = gr.Textbox(
|
| 245 |
label="JSON Input",
|
| 246 |
lines=10,
|
| 247 |
-
placeholder="Paste your JSON (e.g., {\"content\": [
|
| 248 |
)
|
| 249 |
|
| 250 |
with gr.Row():
|
|
|
|
| 23 |
logger.debug("Successfully parsed as JSON")
|
| 24 |
return data
|
| 25 |
except json.JSONDecodeError as e:
|
| 26 |
+
logger.error("JSON parsing failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
|
| 27 |
try:
|
| 28 |
+
# If JSON fails, try to parse as Python literal (e.g., with single quotes), but only for JSON-like strings
|
| 29 |
data = ast.literal_eval(json_input)
|
| 30 |
logger.debug("Successfully parsed as Python literal")
|
| 31 |
# Convert Python dictionary to JSON-compatible format (replace single quotes with double quotes)
|
|
|
|
| 40 |
logger.debug("Converted to JSON-compatible format")
|
| 41 |
return converted_data
|
| 42 |
except (SyntaxError, ValueError) as e:
|
| 43 |
+
logger.error("Python literal parsing failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
|
| 44 |
+
raise ValueError(f"Malformed input: {str(e)}. Ensure property names are in double quotes (e.g., \"content\") and the format matches JSON (e.g., {{\"content\": [...]}}).")
|
| 45 |
|
| 46 |
# Function to ensure a value is a float, converting from string if necessary
|
| 47 |
def ensure_float(value):
|
|
|
|
| 69 |
def create_empty_figure(title):
|
| 70 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
| 71 |
|
| 72 |
+
# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens and JSON structure
|
| 73 |
def visualize_logprobs(json_input):
|
| 74 |
try:
|
| 75 |
+
# Parse the input (handles JSON only, as specified)
|
| 76 |
data = parse_input(json_input)
|
| 77 |
|
| 78 |
+
# Ensure data is a dictionary with 'content' key containing a list
|
| 79 |
if isinstance(data, dict) and "content" in data:
|
| 80 |
content = data["content"]
|
| 81 |
+
if not isinstance(content, list):
|
| 82 |
+
raise ValueError("Content must be a list of entries")
|
| 83 |
elif isinstance(data, list):
|
| 84 |
+
content = data # Handle direct list input (though only JSON is expected)
|
| 85 |
else:
|
| 86 |
+
raise ValueError("Input must be a dictionary with 'content' key or a list of entries")
|
| 87 |
|
| 88 |
# Extract tokens, log probs, and top alternatives, skipping non-finite values with fixed filter of -100000
|
| 89 |
tokens = []
|
| 90 |
logprobs = []
|
| 91 |
top_alternatives = [] # List to store all top_logprobs (dynamic length)
|
| 92 |
for entry in content:
|
| 93 |
+
if not isinstance(entry, dict):
|
| 94 |
+
logger.warning("Skipping non-dictionary entry: %s", entry)
|
| 95 |
+
continue
|
| 96 |
logprob = ensure_float(entry.get("logprob", None))
|
| 97 |
+
if logprob >= -100000: # Include all entries with default 0.0, removing math.isfinite check
|
| 98 |
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
| 99 |
tokens.append(token)
|
| 100 |
logprobs.append(logprob)
|
|
|
|
| 115 |
else:
|
| 116 |
logger.debug("Skipping entry with logprob: %s (type: %s)", entry.get("logprob"), type(entry.get("logprob", None)))
|
| 117 |
|
| 118 |
+
# Check if there's valid data after filtering (including default 0.0)
|
| 119 |
if not logprobs or not tokens:
|
| 120 |
+
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No tokens to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
| 121 |
|
| 122 |
# 1. Main Log Probability Plot (Interactive Plotly)
|
| 123 |
main_fig = go.Figure()
|
|
|
|
| 157 |
table_data = []
|
| 158 |
max_alternatives = max(len(alts) for alts in top_alternatives) if top_alternatives else 0
|
| 159 |
for i, entry in enumerate(content):
|
| 160 |
+
if not isinstance(entry, dict):
|
| 161 |
+
continue
|
| 162 |
logprob = ensure_float(entry.get("logprob", None))
|
| 163 |
+
if logprob >= -100000 and "top_logprobs" in entry: # Include all entries with default 0.0
|
| 164 |
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
| 165 |
top_logprobs = entry.get("top_logprobs", {})
|
| 166 |
if top_logprobs is None:
|
|
|
|
| 213 |
colored_text += " "
|
| 214 |
colored_text_html = f"<p>{colored_text}</p>"
|
| 215 |
else:
|
| 216 |
+
colored_text_html = "No tokens to display."
|
| 217 |
|
| 218 |
# Top Token Log Probabilities (Interactive Plotly, dynamic length)
|
| 219 |
alt_viz_fig = create_empty_figure("Top Token Log Probabilities") if not logprobs or not top_alternatives else go.Figure()
|
|
|
|
| 237 |
return (main_fig, df, colored_text_html, alt_viz_fig, drops_fig)
|
| 238 |
|
| 239 |
except Exception as e:
|
| 240 |
+
logger.error("Visualization failed: %s (Input: %s)", str(e), json_input[:100] + "..." if len(json_input) > 100 else json_input)
|
| 241 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
| 242 |
|
| 243 |
+
# Gradio interface with full dataset visualization, dynamic top_logprobs, and robust JSON handling
|
| 244 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
| 245 |
gr.Markdown("# Log Probability Visualizer")
|
| 246 |
gr.Markdown(
|
| 247 |
+
"Paste your JSON log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing or null fields."
|
| 248 |
)
|
| 249 |
|
| 250 |
with gr.Row():
|
| 251 |
json_input = gr.Textbox(
|
| 252 |
label="JSON Input",
|
| 253 |
lines=10,
|
| 254 |
+
placeholder="Paste your JSON (e.g., {\"content\": [{\"bytes\": [44], \"logprob\": 0.0, \"token\": \",\", \"top_logprobs\": {\" so\": -13.8046875, \".\": -13.8046875, \",\": -13.640625}}]}).",
|
| 255 |
)
|
| 256 |
|
| 257 |
with gr.Row():
|