badaoui's picture
badaoui HF Staff
improve encore
f3f4c77
raw
history blame
29.2 kB
import matplotlib.pyplot as plt
import matplotlib
import pandas as pd
import gradio as gr
from gradio_toggle import Toggle
from data import CIResults, find_new_regressions
from utils import logger
from summary_page import create_summary_page
from model_page import plot_model_stats
from time_series_gradio import (
create_time_series_summary_gradio,
create_model_time_series_gradio,
)
# Configure matplotlib to prevent memory warnings and set dark background
matplotlib.rcParams['figure.facecolor'] = '#000000'
matplotlib.rcParams['axes.facecolor'] = '#000000'
matplotlib.rcParams['savefig.facecolor'] = '#000000'
plt.ioff() # Turn off interactive mode to prevent figure accumulation
# Load data once at startup
Ci_results = CIResults()
Ci_results.load_data()
# Preload historical data at startup
if Ci_results.available_dates:
start_date_val = Ci_results.available_dates[-1] # Last date (oldest)
end_date_val = Ci_results.available_dates[0] # First date (newest)
Ci_results.load_historical_data(start_date_val, end_date_val)
logger.info(f"Preloaded historical data: {len(Ci_results.historical_df)} records")
# Start the auto-reload scheduler
Ci_results.schedule_data_reload()
# Function to check if a model has failures
def model_has_failures_by_device(model_name, device='both'):
if Ci_results.df is None or Ci_results.df.empty:
return False
model_name_lower = model_name.lower()
if model_name_lower not in Ci_results.df.index:
return False
row = Ci_results.df.loc[model_name_lower]
if device in ('amd', 'both'):
if row.get('failed_multi_no_amd', 0) > 0 or row.get('failed_single_no_amd', 0) > 0:
return True
if device in ('nvidia', 'both'):
if row.get('failed_multi_no_nvidia', 0) > 0 or row.get('failed_single_no_nvidia', 0) > 0:
return True
return False
# Function to get current description text
def get_description_text():
"""Get description text with integrated last update time."""
msg = [
"Transformer CI Dashboard",
"-",
"AMD runs on MI325",
"NVIDIA runs on A10",
]
msg = ["**" + x + "**" for x in msg] + [""]
if Ci_results.latest_update_msg:
msg.append(f"*This dashboard only tracks important models*<br>*({Ci_results.latest_update_msg})*")
else:
msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
return "<br>".join(msg)
# Function to format new regressions for display
def get_regressions_text():
"""Get formatted text for new regressions panel."""
try:
regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data)
if not regressions:
return "### πŸŽ‰ No New Regressions\nAll failures were present in the previous run."
# Group by model and device
grouped = {}
for reg in regressions:
model = reg['model']
device = reg['device'].upper()
gpu_type = reg['gpu_type']
test = reg['test']
key = f"{model} ({device} {gpu_type})"
if key not in grouped:
grouped[key] = []
grouped[key].append(test)
# Format output
lines = [f"### ⚠️ New Regressions Detected: {len(regressions)} failure(s)"]
lines.append("")
for key in sorted(grouped.keys()):
tests = grouped[key]
lines.append(f"**{key}:**")
for test in tests[:5]: # Limit to 5 tests per model
lines.append(f" β€’ {test}")
if len(tests) > 5:
lines.append(f" β€’ ... and {len(tests) - 5} more")
lines.append("")
return "\n".join(lines)
except Exception as e:
logger.error(f"Error getting regressions: {e}")
return "### ⚠️ New Regressions\n*Unable to load regression data*"
# Load CSS from external file
def load_css():
try:
with open("styles.css", "r") as f:
css_content = f.read()
return css_content
except FileNotFoundError:
logger.warning("styles.css not found, using minimal default styles")
return "body { background: #000; color: #fff; }"
js_func = """
function refresh() {
const url = new URL(window.location);
if (url.searchParams.get('__theme') !== 'dark') {
url.searchParams.set('__theme', 'dark');
window.location.href = url.href;
}
}
"""
# Create the Gradio interface with sidebar and dark theme
with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func) as demo:
with gr.Row():
# Sidebar for model selection
with gr.Column(scale=1, elem_classes=["sidebar"]):
gr.Markdown("# πŸ€– TCID", elem_classes=["sidebar-title"])
# Description with integrated last update time
description_text = get_description_text()
description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
# Summary button (for current view)
summary_button = gr.Button(
"summary\nπŸ“Š",
variant="primary",
size="lg",
elem_classes=["summary-button"]
)
history_view_button = Toggle(
label="History view",
value=False,
interactive=True,
elem_classes=["history-view-button"]
)
# Model selection header (clickable toggle)
model_toggle_button = gr.Button(
f"β–Ί Select model ({len(Ci_results.available_models)})",
variant="secondary",
elem_classes=["model-header"]
)
# Model buttons container (collapsible) - start folded
with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
# Toggles for filtering failing models by device
with gr.Row(elem_classes=["failing-models-filter-row"]):
show_amd_failures = gr.Checkbox(
label="Failing on AMD",
value=False,
interactive=True,
elem_classes=["failing-models-toggle", "amd-toggle"]
)
show_nvidia_failures = gr.Checkbox(
label="Failing on NVIDIA",
value=False,
interactive=True,
elem_classes=["failing-models-toggle", "nvidia-toggle"]
)
# Create individual buttons for each model
model_buttons = []
model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
# Categorize models by failure type
amd_failing_models = []
nvidia_failing_models = []
both_failing_models = []
passing_models = []
print(f"Creating {len(model_choices)} model buttons: {model_choices}")
for model_name in model_choices:
has_amd = model_has_failures_by_device(model_name, 'amd')
has_nvidia = model_has_failures_by_device(model_name, 'nvidia')
if has_amd and has_nvidia:
both_failing_models.append(model_name)
elif has_amd:
amd_failing_models.append(model_name)
elif has_nvidia:
nvidia_failing_models.append(model_name)
else:
passing_models.append(model_name)
# Container for all models (visible by default)
with gr.Column(visible=True, elem_classes=["all-models-container"]) as all_models_container:
for model_name in model_choices:
has_failures = model_has_failures_by_device(model_name, 'both')
button_classes = ["model-button"]
if has_failures:
button_classes.append("model-button-failed")
btn = gr.Button(
model_name,
variant="secondary",
size="sm",
elem_classes=button_classes
)
model_buttons.append(btn)
# Container for AMD failures (hidden by default)
amd_buttons = []
with gr.Column(visible=False, elem_classes=["amd-failures-container"]) as amd_failures_container:
amd_models_to_show = amd_failing_models + both_failing_models
for model_name in sorted(amd_models_to_show):
btn = gr.Button(
model_name,
variant="secondary",
size="sm",
elem_classes=["model-button", "model-button-failed"]
)
amd_buttons.append(btn)
# Container for NVIDIA failures (hidden by default)
nvidia_buttons = []
with gr.Column(visible=False, elem_classes=["nvidia-failures-container"]) as nvidia_failures_container:
nvidia_models_to_show = nvidia_failing_models + both_failing_models
for model_name in sorted(nvidia_models_to_show):
btn = gr.Button(
model_name,
variant="secondary",
size="sm",
elem_classes=["model-button", "model-button-failed"]
)
nvidia_buttons.append(btn)
# Container for both AMD and NVIDIA failures (hidden by default)
both_buttons = []
with gr.Column(visible=False, elem_classes=["both-failures-container"]) as both_failures_container:
all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models))
for model_name in sorted(all_failing):
btn = gr.Button(
model_name,
variant="secondary",
size="sm",
elem_classes=["model-button", "model-button-failed"]
)
both_buttons.append(btn)
# CI job links at bottom of sidebar
ci_links_display = gr.Markdown("πŸ”— **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
# Main content area
with gr.Column(scale=4, elem_classes=["main-content"]):
# Current view components
with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
# Summary view (contains summary plot and regressions panel)
with gr.Column(visible=True, elem_classes=["summary-view"]) as summary_view:
# Summary display (default view)
summary_display = gr.Plot(
value=create_summary_page(Ci_results.df, Ci_results.available_models),
label="",
format="png",
elem_classes=["plot-container"],
visible=True
)
# New Regressions section (at the bottom, collapsible)
regressions_toggle_button = gr.Button(
"β–Ί New Regressions",
variant="secondary",
elem_classes=["regressions-header"]
)
with gr.Column(elem_classes=["regressions-content", "regressions-content-hidden"]) as regressions_content:
regressions_panel = gr.Markdown(
value=get_regressions_text(),
elem_classes=["regressions-panel"]
)
# Detailed view components (hidden by default)
with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
# Create the plot output
plot_output = gr.Plot(
label="",
format="png",
elem_classes=["plot-container"]
)
# Create two separate failed tests displays in a row layout
with gr.Row():
with gr.Column(scale=1):
amd_failed_tests_output = gr.Textbox(
value="",
lines=8,
max_lines=8,
interactive=False,
container=False,
elem_classes=["failed-tests"]
)
with gr.Column(scale=1):
nvidia_failed_tests_output = gr.Textbox(
value="",
lines=8,
max_lines=8,
interactive=False,
container=False,
elem_classes=["failed-tests"]
)
# Historical view components (hidden by default)
with gr.Column(visible=False, elem_classes=["historical-view"]) as historical_view:
# Time-series summary displays (multiple Gradio plots)
time_series_failure_rates = gr.Plot(
label="",
elem_classes=["plot-container"]
)
time_series_amd_tests = gr.Plot(
label="",
elem_classes=["plot-container"]
)
time_series_nvidia_tests = gr.Plot(
label="",
elem_classes=["plot-container"]
)
# Time-series model view (hidden by default)
with gr.Column(visible=False, elem_classes=["time-series-detail-view"]) as time_series_detail_view:
# Time-series plots for specific model (with spacing)
time_series_amd_model_plot = gr.Plot(
label="",
elem_classes=["plot-container"]
)
time_series_nvidia_model_plot = gr.Plot(
label="",
elem_classes=["plot-container"]
)
# Failing models filter functionality
def filter_failing_models(show_amd, show_nvidia):
"""Filter models based on AMD and/or NVIDIA failures.
Logic:
- Neither checked: show all models
- AMD only: show models with AMD failures (including those with both)
- NVIDIA only: show models with NVIDIA failures (including those with both)
- Both checked: show all models with any failures
"""
show_all = not show_amd and not show_nvidia
show_amd_only = show_amd and not show_nvidia
show_nvidia_only = not show_amd and show_nvidia
show_all_failures = show_amd and show_nvidia
return (
gr.update(visible=show_all), # all_models_container
gr.update(visible=show_amd_only), # amd_failures_container
gr.update(visible=show_nvidia_only), # nvidia_failures_container
gr.update(visible=show_all_failures), # both_failures_container
)
for checkbox in [show_amd_failures, show_nvidia_failures]:
checkbox.change(
fn=filter_failing_models,
inputs=[show_amd_failures, show_nvidia_failures],
outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container]
)
# Regressions panel toggle functionality
def toggle_regressions_panel(current_visible):
"""Toggle the visibility of the regressions panel."""
new_visible = not current_visible
arrow = "β–Ό" if new_visible else "β–Ί"
button_text = f"{arrow} New Regressions"
# Use CSS classes instead of Gradio visibility
css_classes = ["regressions-content"]
if new_visible:
css_classes.append("regressions-content-visible")
else:
css_classes.append("regressions-content-hidden")
return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible
# Track regressions panel visibility state
regressions_visible = gr.State(False)
regressions_toggle_button.click(
fn=toggle_regressions_panel,
inputs=[regressions_visible],
outputs=[regressions_toggle_button, regressions_content, regressions_visible]
)
# Model toggle functionality
def toggle_model_list(current_visible):
"""Toggle the visibility of the model list."""
new_visible = not current_visible
arrow = "β–Ό" if new_visible else "β–Ί"
button_text = f"{arrow} Select model ({len(Ci_results.available_models)})"
# Use CSS classes instead of Gradio visibility
css_classes = ["model-list"]
if new_visible:
css_classes.append("model-list-visible")
else:
css_classes.append("model-list-hidden")
return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible
# Track model list visibility state
model_list_visible = gr.State(False)
# Track last selected model for mode switches
selected_model_state = gr.State(None)
# Track whether current view is model detail (True) or summary (False)
in_model_view_state = gr.State(False)
model_toggle_button.click(
fn=toggle_model_list,
inputs=[model_list_visible],
outputs=[model_toggle_button, model_list_container, model_list_visible]
)
# Unified summary handler: respects History toggle
def handle_summary_click(history_mode: bool):
description = get_description_text()
links = get_ci_links()
if history_mode:
fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
return (description, links, gr.update(visible=False), gr.update(visible=True),
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
fr_plot, amd_plot, nvidia_plot, gr.update(visible=False), False, "")
else:
fig = create_summary_page(Ci_results.df, Ci_results.available_models)
return (description, links, gr.update(visible=True), gr.update(visible=False),
gr.update(visible=True), gr.update(value=fig, visible=True), gr.update(visible=False),
gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
gr.update(visible=False), False, "")
summary_button.click(
fn=handle_summary_click,
inputs=[history_view_button],
outputs=[
description_display,
ci_links_display,
current_view,
historical_view,
summary_view,
summary_display,
detail_view,
time_series_failure_rates,
time_series_amd_tests,
time_series_nvidia_tests,
time_series_detail_view,
in_model_view_state,
selected_model_state,
],
)
# Function to get CI job links
def get_ci_links():
"""Get CI job links from the most recent data."""
try:
# Check if df exists and is not empty
if Ci_results.df is None or Ci_results.df.empty:
return "πŸ”— **CI Jobs:** *Loading...*"
# Get links from any available model (they should be the same for all models in a run)
amd_multi_link = None
amd_single_link = None
nvidia_multi_link = None
nvidia_single_link = None
for model_name in Ci_results.df.index:
row = Ci_results.df.loc[model_name]
# Extract AMD links
if pd.notna(row.get('job_link_amd')) and (not amd_multi_link or not amd_single_link):
amd_link_raw = row.get('job_link_amd')
if isinstance(amd_link_raw, dict):
if 'multi' in amd_link_raw and not amd_multi_link:
amd_multi_link = amd_link_raw['multi']
if 'single' in amd_link_raw and not amd_single_link:
amd_single_link = amd_link_raw['single']
# Extract NVIDIA links
if pd.notna(row.get('job_link_nvidia')) and (not nvidia_multi_link or not nvidia_single_link):
nvidia_link_raw = row.get('job_link_nvidia')
if isinstance(nvidia_link_raw, dict):
if 'multi' in nvidia_link_raw and not nvidia_multi_link:
nvidia_multi_link = nvidia_link_raw['multi']
if 'single' in nvidia_link_raw and not nvidia_single_link:
nvidia_single_link = nvidia_link_raw['single']
# Break if we have all links
if amd_multi_link and amd_single_link and nvidia_multi_link and nvidia_single_link:
break
# Add FAQ link at the bottom
links_md = "❓ [**FAQ**](https://huggingface.co/spaces/transformers-community/transformers-ci-dashboard/blob/main/README.md)\n\n"
links_md += "πŸ”— **CI Jobs:**\n\n"
# AMD links
if amd_multi_link or amd_single_link:
links_md += "**AMD:**\n"
if amd_multi_link:
links_md += f"β€’ [Multi GPU]({amd_multi_link})\n"
if amd_single_link:
links_md += f"β€’ [Single GPU]({amd_single_link})\n"
links_md += "\n"
# NVIDIA links
if nvidia_multi_link or nvidia_single_link:
links_md += "**NVIDIA:**\n"
if nvidia_multi_link:
links_md += f"β€’ [Multi GPU]({nvidia_multi_link})\n"
if nvidia_single_link:
links_md += f"β€’ [Single GPU]({nvidia_single_link})\n"
if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
links_md += "*No links available*"
return links_md
except Exception as e:
logger.error(f"getting CI links: {e}")
return "πŸ”— **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
# Constants for Gradio updates
HIDDEN = gr.update(visible=False)
SHOWN = gr.update(visible=True)
NOOP = gr.update()
def get_historical_summary_plots():
"""Get historical summary plots from preloaded data."""
plots = create_time_series_summary_gradio(Ci_results.historical_df)
return (
gr.update(value=plots['failure_rates'], visible=True),
gr.update(value=plots['amd_tests'], visible=True),
gr.update(value=plots['nvidia_tests'], visible=True),
)
def show_time_series_model(selected_model):
"""Show time-series view for a specific model."""
plots = create_model_time_series_gradio(Ci_results.historical_df, selected_model)
return (
gr.update(value=plots['amd_plot'], visible=True),
gr.update(value=plots['nvidia_plot'], visible=True),
)
def handle_history_toggle(history_mode, last_selected_model, in_model_view):
"""Handle toggling between current and historical view."""
if history_mode:
# Historical mode: show model detail if in model view, otherwise summary
if in_model_view and last_selected_model:
amd_ts, nvidia_ts = show_time_series_model(last_selected_model)
return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN,
amd_ts, nvidia_ts, SHOWN, NOOP, NOOP, NOOP, True)
fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, fr_plot, amd_plot, nvidia_plot,
NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False)
else:
# Current mode: show model detail if available, otherwise summary
if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data)
return (SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN,
NOOP, NOOP, HIDDEN, fig, amd_txt, nvidia_txt, True)
fig = create_summary_page(Ci_results.df, Ci_results.available_models)
return (SHOWN, HIDDEN, SHOWN, gr.update(value=fig, visible=True), HIDDEN,
HIDDEN, HIDDEN, HIDDEN, NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False)
def handle_model_click(selected_model: str, history_mode: bool):
"""Handle clicking on a model button."""
if history_mode:
amd_ts, nvidia_ts = show_time_series_model(selected_model)
return (NOOP, NOOP, NOOP, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN,
HIDDEN, amd_ts, nvidia_ts, SHOWN, selected_model, True)
fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data)
return (fig, amd_txt, nvidia_txt, SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, NOOP, NOOP,
NOOP, NOOP, NOOP, HIDDEN, selected_model, True)
# Wire up history toggle
history_view_button.change(
fn=handle_history_toggle,
inputs=[history_view_button, selected_model_state, in_model_view_state],
outputs=[
current_view, historical_view, summary_view, summary_display, detail_view,
time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests,
time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view,
plot_output, amd_failed_tests_output, nvidia_failed_tests_output, in_model_view_state,
],
)
# Define common outputs for model click handlers
model_click_outputs = [
plot_output, amd_failed_tests_output, nvidia_failed_tests_output,
current_view, historical_view, summary_view, summary_display, detail_view,
time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests,
time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view,
selected_model_state, in_model_view_state,
]
# Helper function to connect button clicks
def connect_model_buttons(buttons, models):
"""Connect a list of buttons to their corresponding models."""
for btn, model_name in zip(buttons, models):
btn.click(
fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
inputs=[history_view_button],
outputs=model_click_outputs,
)
# Wire up all button groups
connect_model_buttons(model_buttons, model_choices)
connect_model_buttons(amd_buttons, sorted(amd_failing_models + both_failing_models))
connect_model_buttons(nvidia_buttons, sorted(nvidia_failing_models + both_failing_models))
connect_model_buttons(both_buttons, sorted(list(set(amd_failing_models + nvidia_failing_models + both_failing_models))))
# Summary button click handler
def show_summary_and_update_links():
"""Show summary page and update CI links."""
return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()
# Auto-update summary, description, CI links, and regressions when the interface loads
demo.load(
fn=show_summary_and_update_links,
outputs=[summary_display, description_display, ci_links_display]
).then(
fn=get_regressions_text,
outputs=[regressions_panel]
)
# Gradio entrypoint
if __name__ == "__main__":
demo.launch()