Spaces:

codelion
/

LogProbsVisualizer

Running

App Files Files Community

codelion commited on Feb 26

Commit

b766b6b

verified ·

1 Parent(s): cf7578d

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -43

app.py CHANGED Viewed

@@ -10,7 +10,6 @@ import logging
 import numpy as np
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
-from scipy import stats
 # Set up logging
 logging.basicConfig(level=logging.DEBUG)
@@ -60,7 +59,7 @@ def ensure_float(value):
     return None
 # Function to process and visualize log probs with interactive Plotly plots
-def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
     try:
         # Parse the input (handles both JSON and Python dictionaries)
         data = parse_input(json_input)
@@ -73,13 +72,13 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
         else:
             raise ValueError("Input must be a list or dictionary with 'content' key")
-        # Extract tokens, log probs, and top alternatives, skipping None or non-finite values
         tokens = []
         logprobs = []
         top_alternatives = []  # List to store top 3 log probs (selected token + 2 alternatives)
         for entry in content:
             logprob = ensure_float(entry.get("logprob", None))
-            if logprob is not None and math.isfinite(logprob) and logprob >= prob_filter:
                 tokens.append(entry["token"])
                 logprobs.append(logprob)
                 # Get top_logprobs, default to empty dict if None
@@ -103,7 +102,8 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
         if not logprobs or not tokens:
             return (gr.update(value="No finite log probabilities or tokens to visualize after filtering"), None, None, None, 1, 0)
-        # Paginate data for large inputs
         total_pages = max(1, (len(logprobs) + page_size - 1) // page_size)
         start_idx = page * page_size
         end_idx = min((page + 1) * page_size, len(logprobs))
@@ -146,33 +146,11 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
                 hovertemplate='<b>%{customdata}</b><extra></extra>'
             )
-        # 3. Anomaly Detection (Interactive Plotly)
-        if not paginated_logprobs:
-            anomaly_fig = go.Figure()
-            anomaly_fig.add_trace(go.Scatter(x=[], y=[], mode='markers+lines', name='Log Prob', marker_color='blue'))
-        else:
-            z_scores = np.abs(stats.zscore(paginated_logprobs))
-            outliers = z_scores > 2  # Threshold for outliers
-            anomaly_fig = go.Figure()
-            anomaly_fig.add_trace(go.Scatter(x=list(range(len(paginated_logprobs))), y=paginated_logprobs, mode='markers+lines', name='Log Prob', marker_color='blue'))
-            anomaly_fig.add_trace(go.Scatter(x=np.where(outliers)[0], y=[paginated_logprobs[i] for i in np.where(outliers)[0]], mode='markers', name='Outliers', marker_color='red'))
-            anomaly_fig.update_layout(
-                title="Log Probabilities with Outliers",
-                xaxis_title="Token Position",
-                yaxis_title="Log Probability",
-                hovermode="closest",
-                clickmode='event+select'
-            )
-            anomaly_fig.update_traces(
-                customdata=[f"Token: {tok}, Log Prob: {prob:.4f}, Position: {i+start_idx}, Outlier: {out}" for i, (tok, prob, out) in enumerate(zip(paginated_tokens, paginated_logprobs, outliers))],
-                hovertemplate='<b>%{customdata}</b><extra></extra>'
-            )
         # Create DataFrame for the table (paginated)
         table_data = []
         for i, entry in enumerate(content[start_idx:end_idx]):
             logprob = ensure_float(entry.get("logprob", None))
-            if logprob is not None and math.isfinite(logprob) and logprob >= prob_filter and "top_logprobs" in entry and entry["top_logprobs"] is not None:
                 token = entry["token"]
                 top_logprobs = entry["top_logprobs"]
                 # Ensure all values in top_logprobs are floats
@@ -230,9 +208,8 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
             colored_text_html = "No finite log probabilities to display."
         # Top 3 Token Log Probabilities (paginated)
-        alt_viz_html = ""
         if paginated_logprobs and paginated_alternatives:
-            alt_viz_fig = go.Figure()
             for i, (token, probs) in enumerate(zip(paginated_tokens, paginated_alternatives)):
                 for j, (alt_tok, prob) in enumerate(probs):
                     alt_viz_fig.add_trace(go.Bar(x=[f"{token} (Pos {i+start_idx})"], y=[prob], name=f"{alt_tok}", marker_color=['blue', 'green', 'red'][j]))
@@ -252,17 +229,17 @@ def visualize_logprobs(json_input, prob_filter=-1e9, page_size=50, page=0):
         else:
             alt_viz_html = "No finite log probabilities to display."
-        return (main_fig, df, colored_text_html, alt_viz_html, drops_fig, anomaly_fig, total_pages, page)
     except Exception as e:
         logger.error("Visualization failed: %s", str(e))
-        return (gr.update(value=f"Error: {str(e)}"), None, "No finite log probabilities to display.", None, gr.update(value="No data for probability drops."), gr.update(value="No data for anomalies."), 1, 0)
 # Gradio interface with interactive layout and pagination
 with gr.Blocks(title="Log Probability Visualizer") as app:
     gr.Markdown("# Log Probability Visualizer")
     gr.Markdown(
-        "Paste your JSON or Python dictionary log prob data below to visualize the tokens and their probabilities. Use the filter and pagination to navigate large inputs."
     )
     with gr.Row():
@@ -273,8 +250,6 @@ with gr.Blocks(title="Log Probability Visualizer") as app:
                 placeholder="Paste your JSON (e.g., {\"content\": [...]}) or Python dict (e.g., {'content': [...]}) here...",
             )
         with gr.Column(scale=1):
-            prob_filter = gr.Slider(minimum=-1e9, maximum=0, value=-1e9, label="Log Probability Filter (≥)")
-            page_size = gr.Number(value=50, label="Page Size", precision=0, minimum=10, maximum=1000)
             page = gr.Number(value=0, label="Page Number", precision=0, minimum=0)
     with gr.Row():
@@ -282,18 +257,17 @@ with gr.Blocks(title="Log Probability Visualizer") as app:
         drops_output = gr.Plot(label="Probability Drops (Click for Details)")
     with gr.Row():
-        anomaly_output = gr.Plot(label="Anomaly Detection (Click for Details)")
         table_output = gr.Dataframe(label="Token Log Probabilities and Top Alternatives")
     with gr.Row():
         text_output = gr.HTML(label="Colored Text (Confidence Visualization)")
-        alt_viz_output = gr.HTML(label="Top 3 Token Log Probabilities")
     btn = gr.Button("Visualize")
     btn.click(
         fn=visualize_logprobs,
-        inputs=[json_input, prob_filter, page_size, page],
-        outputs=[plot_output, table_output, text_output, alt_viz_output, drops_output, anomaly_output, gr.State(), gr.State()],
     )
     # Pagination controls
@@ -303,24 +277,24 @@ with gr.Blocks(title="Log Probability Visualizer") as app:
         total_pages_output = gr.Number(label="Total Pages", interactive=False)
         current_page_output = gr.Number(label="Current Page", interactive=False)
-    def update_page(json_input, prob_filter, page_size, current_page, action):
         if action == "prev" and current_page > 0:
             current_page -= 1
         elif action == "next":
-            total_pages = visualize_logprobs(json_input, prob_filter, page_size, 0)[6]  # Get total pages
             if current_page < total_pages - 1:
                 current_page += 1
         return gr.update(value=current_page), gr.update(value=total_pages)
     prev_btn.click(
         fn=update_page,
-        inputs=[json_input, prob_filter, page_size, page, gr.State()],
         outputs=[page, total_pages_output]
     )
     next_btn.click(
         fn=update_page,
-        inputs=[json_input, prob_filter, page_size, page, gr.State()],
         outputs=[page, total_pages_output]
     )

 import numpy as np
 import plotly.graph_objects as go
 from plotly.subplots import make_subplots
 # Set up logging
 logging.basicConfig(level=logging.DEBUG)
     return None
 # Function to process and visualize log probs with interactive Plotly plots
+def visualize_logprobs(json_input, page=0):
     try:
         # Parse the input (handles both JSON and Python dictionaries)
         data = parse_input(json_input)
         else:
             raise ValueError("Input must be a list or dictionary with 'content' key")
+        # Extract tokens, log probs, and top alternatives, skipping None or non-finite values with fixed filter of -100000
         tokens = []
         logprobs = []
         top_alternatives = []  # List to store top 3 log probs (selected token + 2 alternatives)
         for entry in content:
             logprob = ensure_float(entry.get("logprob", None))
+            if logprob is not None and math.isfinite(logprob) and logprob >= -100000:
                 tokens.append(entry["token"])
                 logprobs.append(logprob)
                 # Get top_logprobs, default to empty dict if None
         if not logprobs or not tokens:
             return (gr.update(value="No finite log probabilities or tokens to visualize after filtering"), None, None, None, 1, 0)
+        # Paginate data for large inputs (fixed page size of 1000)
+        page_size = 1000
         total_pages = max(1, (len(logprobs) + page_size - 1) // page_size)
         start_idx = page * page_size
         end_idx = min((page + 1) * page_size, len(logprobs))
                 hovertemplate='<b>%{customdata}</b><extra></extra>'
             )
         # Create DataFrame for the table (paginated)
         table_data = []
         for i, entry in enumerate(content[start_idx:end_idx]):
             logprob = ensure_float(entry.get("logprob", None))
+            if logprob is not None and math.isfinite(logprob) and logprob >= -100000 and "top_logprobs" in entry and entry["top_logprobs"] is not None:
                 token = entry["token"]
                 top_logprobs = entry["top_logprobs"]
                 # Ensure all values in top_logprobs are floats
             colored_text_html = "No finite log probabilities to display."
         # Top 3 Token Log Probabilities (paginated)
+        alt_viz_fig = go.Figure()
         if paginated_logprobs and paginated_alternatives:
             for i, (token, probs) in enumerate(zip(paginated_tokens, paginated_alternatives)):
                 for j, (alt_tok, prob) in enumerate(probs):
                     alt_viz_fig.add_trace(go.Bar(x=[f"{token} (Pos {i+start_idx})"], y=[prob], name=f"{alt_tok}", marker_color=['blue', 'green', 'red'][j]))
         else:
             alt_viz_html = "No finite log probabilities to display."
+        return (main_fig, df, colored_text_html, alt_viz_html, drops_fig, total_pages, page)
     except Exception as e:
         logger.error("Visualization failed: %s", str(e))
+        return (gr.update(value=f"Error: {str(e)}"), None, "No finite log probabilities to display.", None, gr.update(value="No data for probability drops."), 1, 0)
 # Gradio interface with interactive layout and pagination
 with gr.Blocks(title="Log Probability Visualizer") as app:
     gr.Markdown("# Log Probability Visualizer")
     gr.Markdown(
+        "Paste your JSON or Python dictionary log prob data below to visualize the tokens and their probabilities. Use pagination to navigate large inputs (fixed filter ≥ -100000, 1000 tokens per page)."
     )
     with gr.Row():
                 placeholder="Paste your JSON (e.g., {\"content\": [...]}) or Python dict (e.g., {'content': [...]}) here...",
             )
         with gr.Column(scale=1):
             page = gr.Number(value=0, label="Page Number", precision=0, minimum=0)
     with gr.Row():
         drops_output = gr.Plot(label="Probability Drops (Click for Details)")
     with gr.Row():
         table_output = gr.Dataframe(label="Token Log Probabilities and Top Alternatives")
+        alt_viz_output = gr.Plot(label="Top 3 Token Log Probabilities (Click for Details)")
     with gr.Row():
         text_output = gr.HTML(label="Colored Text (Confidence Visualization)")
     btn = gr.Button("Visualize")
     btn.click(
         fn=visualize_logprobs,
+        inputs=[json_input, page],
+        outputs=[plot_output, table_output, text_output, alt_viz_output, drops_output, gr.State(), gr.State()],
     )
     # Pagination controls
         total_pages_output = gr.Number(label="Total Pages", interactive=False)
         current_page_output = gr.Number(label="Current Page", interactive=False)
+    def update_page(json_input, current_page, action):
         if action == "prev" and current_page > 0:
             current_page -= 1
         elif action == "next":
+            total_pages = visualize_logprobs(json_input, 0)[5]  # Get total pages
             if current_page < total_pages - 1:
                 current_page += 1
         return gr.update(value=current_page), gr.update(value=total_pages)
     prev_btn.click(
         fn=update_page,
+        inputs=[json_input, page, gr.State()],
         outputs=[page, total_pages_output]
     )
     next_btn.click(
         fn=update_page,
+        inputs=[json_input, page, gr.State()],
         outputs=[page, total_pages_output]
     )