Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -58,11 +58,18 @@ def ensure_float(value):
|
|
| 58 |
return float(value)
|
| 59 |
return 0.0 # Default for any other type
|
| 60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
# Function to create an empty Plotly figure
|
| 62 |
def create_empty_figure(title):
|
| 63 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
| 64 |
|
| 65 |
-
# Function to process and visualize the full log probs with dynamic top_logprobs, handling
|
| 66 |
def visualize_logprobs(json_input):
|
| 67 |
try:
|
| 68 |
# Parse the input (handles both JSON and Python dictionaries)
|
|
@@ -83,12 +90,13 @@ def visualize_logprobs(json_input):
|
|
| 83 |
for entry in content:
|
| 84 |
logprob = ensure_float(entry.get("logprob", None))
|
| 85 |
if math.isfinite(logprob) and logprob >= -100000:
|
| 86 |
-
|
|
|
|
| 87 |
logprobs.append(logprob)
|
| 88 |
# Get top_logprobs, default to empty dict if None
|
| 89 |
top_probs = entry.get("top_logprobs", {})
|
| 90 |
if top_probs is None:
|
| 91 |
-
logger.debug("top_logprobs is None for token: %s, using empty dict",
|
| 92 |
top_probs = {} # Default to empty dict for None
|
| 93 |
# Ensure all values in top_logprobs are floats and create a list of tuples
|
| 94 |
finite_top_probs = []
|
|
@@ -146,7 +154,7 @@ def visualize_logprobs(json_input):
|
|
| 146 |
for i, entry in enumerate(content):
|
| 147 |
logprob = ensure_float(entry.get("logprob", None))
|
| 148 |
if math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry:
|
| 149 |
-
token = entry
|
| 150 |
top_logprobs = entry.get("top_logprobs", {})
|
| 151 |
if top_logprobs is None:
|
| 152 |
logger.debug("top_logprobs is None for token: %s, using empty dict", token)
|
|
@@ -225,11 +233,11 @@ def visualize_logprobs(json_input):
|
|
| 225 |
logger.error("Visualization failed: %s", str(e))
|
| 226 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
| 227 |
|
| 228 |
-
# Gradio interface with full dataset visualization and
|
| 229 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
| 230 |
gr.Markdown("# Log Probability Visualizer")
|
| 231 |
gr.Markdown(
|
| 232 |
-
"Paste your JSON or Python dictionary log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs."
|
| 233 |
)
|
| 234 |
|
| 235 |
with gr.Row():
|
|
|
|
| 58 |
return float(value)
|
| 59 |
return 0.0 # Default for any other type
|
| 60 |
|
| 61 |
+
# Function to get or generate a token value (default to "Unknown" if missing)
|
| 62 |
+
def get_token(entry):
|
| 63 |
+
token = entry.get("token", "Unknown")
|
| 64 |
+
if token == "Unknown":
|
| 65 |
+
logger.warning("Missing 'token' key for entry: %s, using 'Unknown'", entry)
|
| 66 |
+
return token
|
| 67 |
+
|
| 68 |
# Function to create an empty Plotly figure
|
| 69 |
def create_empty_figure(title):
|
| 70 |
return go.Figure().update_layout(title=title, xaxis_title="", yaxis_title="", showlegend=False)
|
| 71 |
|
| 72 |
+
# Function to process and visualize the full log probs with dynamic top_logprobs, handling missing tokens
|
| 73 |
def visualize_logprobs(json_input):
|
| 74 |
try:
|
| 75 |
# Parse the input (handles both JSON and Python dictionaries)
|
|
|
|
| 90 |
for entry in content:
|
| 91 |
logprob = ensure_float(entry.get("logprob", None))
|
| 92 |
if math.isfinite(logprob) and logprob >= -100000:
|
| 93 |
+
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
| 94 |
+
tokens.append(token)
|
| 95 |
logprobs.append(logprob)
|
| 96 |
# Get top_logprobs, default to empty dict if None
|
| 97 |
top_probs = entry.get("top_logprobs", {})
|
| 98 |
if top_probs is None:
|
| 99 |
+
logger.debug("top_logprobs is None for token: %s, using empty dict", token)
|
| 100 |
top_probs = {} # Default to empty dict for None
|
| 101 |
# Ensure all values in top_logprobs are floats and create a list of tuples
|
| 102 |
finite_top_probs = []
|
|
|
|
| 154 |
for i, entry in enumerate(content):
|
| 155 |
logprob = ensure_float(entry.get("logprob", None))
|
| 156 |
if math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry:
|
| 157 |
+
token = get_token(entry) # Safely get token, defaulting to "Unknown" if missing
|
| 158 |
top_logprobs = entry.get("top_logprobs", {})
|
| 159 |
if top_logprobs is None:
|
| 160 |
logger.debug("top_logprobs is None for token: %s, using empty dict", token)
|
|
|
|
| 233 |
logger.error("Visualization failed: %s", str(e))
|
| 234 |
return (create_empty_figure("Log Probabilities of Generated Tokens"), None, "No finite log probabilities to display.", create_empty_figure("Top Token Log Probabilities"), create_empty_figure("Significant Probability Drops"))
|
| 235 |
|
| 236 |
+
# Gradio interface with full dataset visualization, dynamic top_logprobs, and handling missing tokens
|
| 237 |
with gr.Blocks(title="Log Probability Visualizer") as app:
|
| 238 |
gr.Markdown("# Log Probability Visualizer")
|
| 239 |
gr.Markdown(
|
| 240 |
+
"Paste your JSON or Python dictionary log prob data below to visualize all tokens at once. Fixed filter ≥ -100000, dynamic number of top_logprobs, handles missing 'token'."
|
| 241 |
)
|
| 242 |
|
| 243 |
with gr.Row():
|