import matplotlib.pyplot as plt import matplotlib import pandas as pd import gradio as gr from gradio_toggle import Toggle from data import CIResults, find_new_regressions from utils import logger from summary_page import create_summary_page from model_page import plot_model_stats from time_series_gradio import ( create_time_series_summary_gradio, create_model_time_series_gradio, ) # Configure matplotlib to prevent memory warnings and set dark background matplotlib.rcParams['figure.facecolor'] = '#000000' matplotlib.rcParams['axes.facecolor'] = '#000000' matplotlib.rcParams['savefig.facecolor'] = '#000000' plt.ioff() # Turn off interactive mode to prevent figure accumulation # Load data once at startup Ci_results = CIResults() Ci_results.load_data() # Preload historical data at startup if Ci_results.available_dates: start_date_val = Ci_results.available_dates[-1] # Last date (oldest) end_date_val = Ci_results.available_dates[0] # First date (newest) Ci_results.load_historical_data(start_date_val, end_date_val) logger.info(f"Preloaded historical data: {len(Ci_results.historical_df)} records") # Start the auto-reload scheduler Ci_results.schedule_data_reload() # Function to check if a model has failures def model_has_failures_by_device(model_name, device='both'): if Ci_results.df is None or Ci_results.df.empty: return False model_name_lower = model_name.lower() if model_name_lower not in Ci_results.df.index: return False row = Ci_results.df.loc[model_name_lower] if device in ('amd', 'both'): if row.get('failed_multi_no_amd', 0) > 0 or row.get('failed_single_no_amd', 0) > 0: return True if device in ('nvidia', 'both'): if row.get('failed_multi_no_nvidia', 0) > 0 or row.get('failed_single_no_nvidia', 0) > 0: return True return False # Function to get current description text def get_description_text(): """Get description text with integrated last update time.""" msg = [ "Transformer CI Dashboard", "-", "AMD runs on MI325", "NVIDIA runs on A10", ] msg = ["**" + x + "**" for x in msg] + [""] if Ci_results.latest_update_msg: msg.append(f"*This dashboard only tracks important models*
*({Ci_results.latest_update_msg})*") else: msg.append("*This dashboard only tracks important models*
*(loading...)*") return "
".join(msg) # Function to format new regressions for display def get_regressions_text(): """Get formatted text for new regressions panel.""" try: regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data) if not regressions: return "### šŸŽ‰ No New Regressions\nAll failures were present in the previous run." # Group by model and device grouped = {} for reg in regressions: model = reg['model'] device = reg['device'].upper() gpu_type = reg['gpu_type'] test = reg['test'] key = f"{model} ({device} {gpu_type})" if key not in grouped: grouped[key] = [] grouped[key].append(test) # Format output lines = [f"### āš ļø New Regressions Detected: {len(regressions)} failure(s)"] lines.append("") for key in sorted(grouped.keys()): tests = grouped[key] lines.append(f"**{key}:**") for test in tests[:5]: # Limit to 5 tests per model lines.append(f" • {test}") if len(tests) > 5: lines.append(f" • ... and {len(tests) - 5} more") lines.append("") return "\n".join(lines) except Exception as e: logger.error(f"Error getting regressions: {e}") return "### āš ļø New Regressions\n*Unable to load regression data*" # Load CSS from external file def load_css(): try: with open("styles.css", "r") as f: css_content = f.read() return css_content except FileNotFoundError: logger.warning("styles.css not found, using minimal default styles") return "body { background: #000; color: #fff; }" js_func = """ function refresh() { const url = new URL(window.location); if (url.searchParams.get('__theme') !== 'dark') { url.searchParams.set('__theme', 'dark'); window.location.href = url.href; } } """ # Create the Gradio interface with sidebar and dark theme with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func) as demo: with gr.Row(): # Sidebar for model selection with gr.Column(scale=1, elem_classes=["sidebar"]): gr.Markdown("# šŸ¤– TCID", elem_classes=["sidebar-title"]) # Description with integrated last update time description_text = get_description_text() description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"]) # Summary button (for current view) summary_button = gr.Button( "summary\nšŸ“Š", variant="primary", size="lg", elem_classes=["summary-button"] ) history_view_button = Toggle( label="History view", value=False, interactive=True, elem_classes=["history-view-button"] ) # Model selection header (clickable toggle) model_toggle_button = gr.Button( f"ā–ŗ Select model ({len(Ci_results.available_models)})", variant="secondary", elem_classes=["model-header"] ) # Model buttons container (collapsible) - start folded with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container: # Toggles for filtering failing models by device with gr.Row(elem_classes=["failing-models-filter-row"]): show_amd_failures = gr.Checkbox( label="Failing on AMD", value=False, interactive=True, elem_classes=["failing-models-toggle", "amd-toggle"] ) show_nvidia_failures = gr.Checkbox( label="Failing on NVIDIA", value=False, interactive=True, elem_classes=["failing-models-toggle", "nvidia-toggle"] ) # Create individual buttons for each model model_buttons = [] model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"] # Categorize models by failure type amd_failing_models = [] nvidia_failing_models = [] both_failing_models = [] passing_models = [] print(f"Creating {len(model_choices)} model buttons: {model_choices}") for model_name in model_choices: has_amd = model_has_failures_by_device(model_name, 'amd') has_nvidia = model_has_failures_by_device(model_name, 'nvidia') if has_amd and has_nvidia: both_failing_models.append(model_name) elif has_amd: amd_failing_models.append(model_name) elif has_nvidia: nvidia_failing_models.append(model_name) else: passing_models.append(model_name) # Container for all models (visible by default) with gr.Column(visible=True, elem_classes=["all-models-container"]) as all_models_container: for model_name in model_choices: has_failures = model_has_failures_by_device(model_name, 'both') button_classes = ["model-button"] if has_failures: button_classes.append("model-button-failed") btn = gr.Button( model_name, variant="secondary", size="sm", elem_classes=button_classes ) model_buttons.append(btn) # Container for AMD failures (hidden by default) amd_buttons = [] with gr.Column(visible=False, elem_classes=["amd-failures-container"]) as amd_failures_container: amd_models_to_show = amd_failing_models + both_failing_models for model_name in sorted(amd_models_to_show): btn = gr.Button( model_name, variant="secondary", size="sm", elem_classes=["model-button", "model-button-failed"] ) amd_buttons.append(btn) # Container for NVIDIA failures (hidden by default) nvidia_buttons = [] with gr.Column(visible=False, elem_classes=["nvidia-failures-container"]) as nvidia_failures_container: nvidia_models_to_show = nvidia_failing_models + both_failing_models for model_name in sorted(nvidia_models_to_show): btn = gr.Button( model_name, variant="secondary", size="sm", elem_classes=["model-button", "model-button-failed"] ) nvidia_buttons.append(btn) # Container for both AMD and NVIDIA failures (hidden by default) both_buttons = [] with gr.Column(visible=False, elem_classes=["both-failures-container"]) as both_failures_container: all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models)) for model_name in sorted(all_failing): btn = gr.Button( model_name, variant="secondary", size="sm", elem_classes=["model-button", "model-button-failed"] ) both_buttons.append(btn) # CI job links at bottom of sidebar ci_links_display = gr.Markdown("šŸ”— **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"]) # Main content area with gr.Column(scale=4, elem_classes=["main-content"]): # Current view components with gr.Column(visible=True, elem_classes=["current-view"]) as current_view: # Summary view (contains summary plot and regressions panel) with gr.Column(visible=True, elem_classes=["summary-view"]) as summary_view: # Summary display (default view) summary_display = gr.Plot( value=create_summary_page(Ci_results.df, Ci_results.available_models), label="", format="png", elem_classes=["plot-container"], visible=True ) # New Regressions section (at the bottom, collapsible) regressions_toggle_button = gr.Button( "ā–ŗ New Regressions", variant="secondary", elem_classes=["regressions-header"] ) with gr.Column(elem_classes=["regressions-content", "regressions-content-hidden"]) as regressions_content: regressions_panel = gr.Markdown( value=get_regressions_text(), elem_classes=["regressions-panel"] ) # Detailed view components (hidden by default) with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view: # Create the plot output plot_output = gr.Plot( label="", format="png", elem_classes=["plot-container"] ) # Create two separate failed tests displays in a row layout with gr.Row(): with gr.Column(scale=1): amd_failed_tests_output = gr.Textbox( value="", lines=8, max_lines=8, interactive=False, container=False, elem_classes=["failed-tests"] ) with gr.Column(scale=1): nvidia_failed_tests_output = gr.Textbox( value="", lines=8, max_lines=8, interactive=False, container=False, elem_classes=["failed-tests"] ) # Historical view components (hidden by default) with gr.Column(visible=False, elem_classes=["historical-view"]) as historical_view: # Time-series summary displays (multiple Gradio plots) time_series_failure_rates = gr.Plot( label="", elem_classes=["plot-container"] ) time_series_amd_tests = gr.Plot( label="", elem_classes=["plot-container"] ) time_series_nvidia_tests = gr.Plot( label="", elem_classes=["plot-container"] ) # Time-series model view (hidden by default) with gr.Column(visible=False, elem_classes=["time-series-detail-view"]) as time_series_detail_view: # Time-series plots for specific model (with spacing) time_series_amd_model_plot = gr.Plot( label="", elem_classes=["plot-container"] ) time_series_nvidia_model_plot = gr.Plot( label="", elem_classes=["plot-container"] ) # Failing models filter functionality def filter_failing_models(show_amd, show_nvidia): """Filter models based on AMD and/or NVIDIA failures. Logic: - Neither checked: show all models - AMD only: show models with AMD failures (including those with both) - NVIDIA only: show models with NVIDIA failures (including those with both) - Both checked: show all models with any failures """ show_all = not show_amd and not show_nvidia show_amd_only = show_amd and not show_nvidia show_nvidia_only = not show_amd and show_nvidia show_all_failures = show_amd and show_nvidia return ( gr.update(visible=show_all), # all_models_container gr.update(visible=show_amd_only), # amd_failures_container gr.update(visible=show_nvidia_only), # nvidia_failures_container gr.update(visible=show_all_failures), # both_failures_container ) for checkbox in [show_amd_failures, show_nvidia_failures]: checkbox.change( fn=filter_failing_models, inputs=[show_amd_failures, show_nvidia_failures], outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container] ) # Regressions panel toggle functionality def toggle_regressions_panel(current_visible): """Toggle the visibility of the regressions panel.""" new_visible = not current_visible arrow = "ā–¼" if new_visible else "ā–ŗ" button_text = f"{arrow} New Regressions" # Use CSS classes instead of Gradio visibility css_classes = ["regressions-content"] if new_visible: css_classes.append("regressions-content-visible") else: css_classes.append("regressions-content-hidden") return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible # Track regressions panel visibility state regressions_visible = gr.State(False) regressions_toggle_button.click( fn=toggle_regressions_panel, inputs=[regressions_visible], outputs=[regressions_toggle_button, regressions_content, regressions_visible] ) # Model toggle functionality def toggle_model_list(current_visible): """Toggle the visibility of the model list.""" new_visible = not current_visible arrow = "ā–¼" if new_visible else "ā–ŗ" button_text = f"{arrow} Select model ({len(Ci_results.available_models)})" # Use CSS classes instead of Gradio visibility css_classes = ["model-list"] if new_visible: css_classes.append("model-list-visible") else: css_classes.append("model-list-hidden") return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible # Track model list visibility state model_list_visible = gr.State(False) # Track last selected model for mode switches selected_model_state = gr.State(None) # Track whether current view is model detail (True) or summary (False) in_model_view_state = gr.State(False) model_toggle_button.click( fn=toggle_model_list, inputs=[model_list_visible], outputs=[model_toggle_button, model_list_container, model_list_visible] ) # Unified summary handler: respects History toggle def handle_summary_click(history_mode: bool): description = get_description_text() links = get_ci_links() if history_mode: fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots() return (description, links, gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), fr_plot, amd_plot, nvidia_plot, gr.update(visible=False), False, "") else: fig = create_summary_page(Ci_results.df, Ci_results.available_models) return (description, links, gr.update(visible=True), gr.update(visible=False), gr.update(visible=True), gr.update(value=fig, visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), False, "") summary_button.click( fn=handle_summary_click, inputs=[history_view_button], outputs=[ description_display, ci_links_display, current_view, historical_view, summary_view, summary_display, detail_view, time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests, time_series_detail_view, in_model_view_state, selected_model_state, ], ) # Function to get CI job links def get_ci_links(): """Get CI job links from the most recent data.""" try: # Check if df exists and is not empty if Ci_results.df is None or Ci_results.df.empty: return "šŸ”— **CI Jobs:** *Loading...*" # Get links from any available model (they should be the same for all models in a run) amd_multi_link = None amd_single_link = None nvidia_multi_link = None nvidia_single_link = None for model_name in Ci_results.df.index: row = Ci_results.df.loc[model_name] # Extract AMD links if pd.notna(row.get('job_link_amd')) and (not amd_multi_link or not amd_single_link): amd_link_raw = row.get('job_link_amd') if isinstance(amd_link_raw, dict): if 'multi' in amd_link_raw and not amd_multi_link: amd_multi_link = amd_link_raw['multi'] if 'single' in amd_link_raw and not amd_single_link: amd_single_link = amd_link_raw['single'] # Extract NVIDIA links if pd.notna(row.get('job_link_nvidia')) and (not nvidia_multi_link or not nvidia_single_link): nvidia_link_raw = row.get('job_link_nvidia') if isinstance(nvidia_link_raw, dict): if 'multi' in nvidia_link_raw and not nvidia_multi_link: nvidia_multi_link = nvidia_link_raw['multi'] if 'single' in nvidia_link_raw and not nvidia_single_link: nvidia_single_link = nvidia_link_raw['single'] # Break if we have all links if amd_multi_link and amd_single_link and nvidia_multi_link and nvidia_single_link: break # Add FAQ link at the bottom links_md = "ā“ [**FAQ**](https://huggingface.co/spaces/transformers-community/transformers-ci-dashboard/blob/main/README.md)\n\n" links_md += "šŸ”— **CI Jobs:**\n\n" # AMD links if amd_multi_link or amd_single_link: links_md += "**AMD:**\n" if amd_multi_link: links_md += f"• [Multi GPU]({amd_multi_link})\n" if amd_single_link: links_md += f"• [Single GPU]({amd_single_link})\n" links_md += "\n" # NVIDIA links if nvidia_multi_link or nvidia_single_link: links_md += "**NVIDIA:**\n" if nvidia_multi_link: links_md += f"• [Multi GPU]({nvidia_multi_link})\n" if nvidia_single_link: links_md += f"• [Single GPU]({nvidia_single_link})\n" if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link): links_md += "*No links available*" return links_md except Exception as e: logger.error(f"getting CI links: {e}") return "šŸ”— **CI Jobs:** *Error loading links*\n\nā“ **[FAQ](README.md)**" # Constants for Gradio updates HIDDEN = gr.update(visible=False) SHOWN = gr.update(visible=True) NOOP = gr.update() def get_historical_summary_plots(): """Get historical summary plots from preloaded data.""" plots = create_time_series_summary_gradio(Ci_results.historical_df) return ( gr.update(value=plots['failure_rates'], visible=True), gr.update(value=plots['amd_tests'], visible=True), gr.update(value=plots['nvidia_tests'], visible=True), ) def show_time_series_model(selected_model): """Show time-series view for a specific model.""" plots = create_model_time_series_gradio(Ci_results.historical_df, selected_model) return ( gr.update(value=plots['amd_plot'], visible=True), gr.update(value=plots['nvidia_plot'], visible=True), ) def handle_history_toggle(history_mode, last_selected_model, in_model_view): """Handle toggling between current and historical view.""" if history_mode: # Historical mode: show model detail if in model view, otherwise summary if in_model_view and last_selected_model: amd_ts, nvidia_ts = show_time_series_model(last_selected_model) return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, amd_ts, nvidia_ts, SHOWN, NOOP, NOOP, NOOP, True) fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots() return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, fr_plot, amd_plot, nvidia_plot, NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False) else: # Current mode: show model detail if available, otherwise summary if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index: fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data) return (SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, NOOP, NOOP, HIDDEN, fig, amd_txt, nvidia_txt, True) fig = create_summary_page(Ci_results.df, Ci_results.available_models) return (SHOWN, HIDDEN, SHOWN, gr.update(value=fig, visible=True), HIDDEN, HIDDEN, HIDDEN, HIDDEN, NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False) def handle_model_click(selected_model: str, history_mode: bool): """Handle clicking on a model button.""" if history_mode: amd_ts, nvidia_ts = show_time_series_model(selected_model) return (NOOP, NOOP, NOOP, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, amd_ts, nvidia_ts, SHOWN, selected_model, True) fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data) return (fig, amd_txt, nvidia_txt, SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, NOOP, NOOP, NOOP, NOOP, NOOP, HIDDEN, selected_model, True) # Wire up history toggle history_view_button.change( fn=handle_history_toggle, inputs=[history_view_button, selected_model_state, in_model_view_state], outputs=[ current_view, historical_view, summary_view, summary_display, detail_view, time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests, time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view, plot_output, amd_failed_tests_output, nvidia_failed_tests_output, in_model_view_state, ], ) # Define common outputs for model click handlers model_click_outputs = [ plot_output, amd_failed_tests_output, nvidia_failed_tests_output, current_view, historical_view, summary_view, summary_display, detail_view, time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests, time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view, selected_model_state, in_model_view_state, ] # Helper function to connect button clicks def connect_model_buttons(buttons, models): """Connect a list of buttons to their corresponding models.""" for btn, model_name in zip(buttons, models): btn.click( fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode), inputs=[history_view_button], outputs=model_click_outputs, ) # Wire up all button groups connect_model_buttons(model_buttons, model_choices) connect_model_buttons(amd_buttons, sorted(amd_failing_models + both_failing_models)) connect_model_buttons(nvidia_buttons, sorted(nvidia_failing_models + both_failing_models)) connect_model_buttons(both_buttons, sorted(list(set(amd_failing_models + nvidia_failing_models + both_failing_models)))) # Summary button click handler def show_summary_and_update_links(): """Show summary page and update CI links.""" return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links() # Auto-update summary, description, CI links, and regressions when the interface loads demo.load( fn=show_summary_and_update_links, outputs=[summary_display, description_display, ci_links_display] ).then( fn=get_regressions_text, outputs=[regressions_panel] ) # Gradio entrypoint if __name__ == "__main__": demo.launch()