Spaces:

transformers-community
/

transformers-ci-dashboard

Running

App Files Files Community

transformers-ci-dashboard / app.py

badaoui HF Staff

improve encore

f3f4c77 about 15 hours ago

raw

history blame

29.2 kB

	import matplotlib.pyplot as plt
	import matplotlib
	import pandas as pd
	import gradio as gr
	from gradio_toggle import Toggle

	from data import CIResults, find_new_regressions
	from utils import logger
	from summary_page import create_summary_page
	from model_page import plot_model_stats
	from time_series_gradio import (
	create_time_series_summary_gradio,
	create_model_time_series_gradio,
	)


	# Configure matplotlib to prevent memory warnings and set dark background
	matplotlib.rcParams['figure.facecolor'] = '#000000'
	matplotlib.rcParams['axes.facecolor'] = '#000000'
	matplotlib.rcParams['savefig.facecolor'] = '#000000'
	plt.ioff() # Turn off interactive mode to prevent figure accumulation


	# Load data once at startup
	Ci_results = CIResults()
	Ci_results.load_data()
	# Preload historical data at startup
	if Ci_results.available_dates:
	start_date_val = Ci_results.available_dates[-1] # Last date (oldest)
	end_date_val = Ci_results.available_dates[0] # First date (newest)
	Ci_results.load_historical_data(start_date_val, end_date_val)
	logger.info(f"Preloaded historical data: {len(Ci_results.historical_df)} records")
	# Start the auto-reload scheduler
	Ci_results.schedule_data_reload()


	# Function to check if a model has failures
	def model_has_failures_by_device(model_name, device='both'):
	if Ci_results.df is None or Ci_results.df.empty:
	return False

	model_name_lower = model_name.lower()
	if model_name_lower not in Ci_results.df.index:
	return False

	row = Ci_results.df.loc[model_name_lower]

	if device in ('amd', 'both'):
	if row.get('failed_multi_no_amd', 0) > 0 or row.get('failed_single_no_amd', 0) > 0:
	return True

	if device in ('nvidia', 'both'):
	if row.get('failed_multi_no_nvidia', 0) > 0 or row.get('failed_single_no_nvidia', 0) > 0:
	return True

	return False


	# Function to get current description text
	def get_description_text():
	"""Get description text with integrated last update time."""
	msg = [
	"Transformer CI Dashboard",
	"-",
	"AMD runs on MI325",
	"NVIDIA runs on A10",
	]
	msg = ["" + x + "" for x in msg] + [""]
	if Ci_results.latest_update_msg:
	msg.append(f"This dashboard only tracks important models<br>({Ci_results.latest_update_msg})")
	else:
	msg.append("This dashboard only tracks important models<br>(loading...)")
	return "<br>".join(msg)

	# Function to format new regressions for display
	def get_regressions_text():
	"""Get formatted text for new regressions panel."""
	try:
	regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data)

	if not regressions:
	return "### 🎉 No New Regressions\nAll failures were present in the previous run."

	# Group by model and device
	grouped = {}
	for reg in regressions:
	model = reg['model']
	device = reg['device'].upper()
	gpu_type = reg['gpu_type']
	test = reg['test']

	key = f"{model} ({device} {gpu_type})"
	if key not in grouped:
	grouped[key] = []
	grouped[key].append(test)

	# Format output
	lines = [f"### ⚠️ New Regressions Detected: {len(regressions)} failure(s)"]
	lines.append("")

	for key in sorted(grouped.keys()):
	tests = grouped[key]
	lines.append(f"{key}:")
	for test in tests[:5]: # Limit to 5 tests per model
	lines.append(f" • {test}")
	if len(tests) > 5:
	lines.append(f" • ... and {len(tests) - 5} more")
	lines.append("")

	return "\n".join(lines)
	except Exception as e:
	logger.error(f"Error getting regressions: {e}")
	return "### ⚠️ New Regressions\nUnable to load regression data"

	# Load CSS from external file
	def load_css():
	try:
	with open("styles.css", "r") as f:
	css_content = f.read()

	return css_content
	except FileNotFoundError:
	logger.warning("styles.css not found, using minimal default styles")
	return "body { background: #000; color: #fff; }"

	js_func = """
	function refresh() {
	const url = new URL(window.location);

	if (url.searchParams.get('__theme') !== 'dark') {
	url.searchParams.set('__theme', 'dark');
	window.location.href = url.href;
	}
	}
	"""

	# Create the Gradio interface with sidebar and dark theme
	with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func) as demo:


	with gr.Row():
	# Sidebar for model selection
	with gr.Column(scale=1, elem_classes=["sidebar"]):
	gr.Markdown("# 🤖 TCID", elem_classes=["sidebar-title"])

	# Description with integrated last update time
	description_text = get_description_text()
	description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])

	# Summary button (for current view)
	summary_button = gr.Button(
	"summary\n📊",
	variant="primary",
	size="lg",
	elem_classes=["summary-button"]
	)

	history_view_button = Toggle(
	label="History view",
	value=False,
	interactive=True,
	elem_classes=["history-view-button"]
	)


	# Model selection header (clickable toggle)
	model_toggle_button = gr.Button(
	f"► Select model ({len(Ci_results.available_models)})",
	variant="secondary",
	elem_classes=["model-header"]
	)

	# Model buttons container (collapsible) - start folded
	with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
	# Toggles for filtering failing models by device
	with gr.Row(elem_classes=["failing-models-filter-row"]):
	show_amd_failures = gr.Checkbox(
	label="Failing on AMD",
	value=False,
	interactive=True,
	elem_classes=["failing-models-toggle", "amd-toggle"]
	)
	show_nvidia_failures = gr.Checkbox(
	label="Failing on NVIDIA",
	value=False,
	interactive=True,
	elem_classes=["failing-models-toggle", "nvidia-toggle"]
	)
	# Create individual buttons for each model
	model_buttons = []
	model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]

	# Categorize models by failure type
	amd_failing_models = []
	nvidia_failing_models = []
	both_failing_models = []
	passing_models = []

	print(f"Creating {len(model_choices)} model buttons: {model_choices}")

	for model_name in model_choices:
	has_amd = model_has_failures_by_device(model_name, 'amd')
	has_nvidia = model_has_failures_by_device(model_name, 'nvidia')

	if has_amd and has_nvidia:
	both_failing_models.append(model_name)
	elif has_amd:
	amd_failing_models.append(model_name)
	elif has_nvidia:
	nvidia_failing_models.append(model_name)
	else:
	passing_models.append(model_name)

	# Container for all models (visible by default)
	with gr.Column(visible=True, elem_classes=["all-models-container"]) as all_models_container:
	for model_name in model_choices:
	has_failures = model_has_failures_by_device(model_name, 'both')
	button_classes = ["model-button"]
	if has_failures:
	button_classes.append("model-button-failed")

	btn = gr.Button(
	model_name,
	variant="secondary",
	size="sm",
	elem_classes=button_classes
	)
	model_buttons.append(btn)

	# Container for AMD failures (hidden by default)
	amd_buttons = []
	with gr.Column(visible=False, elem_classes=["amd-failures-container"]) as amd_failures_container:
	amd_models_to_show = amd_failing_models + both_failing_models
	for model_name in sorted(amd_models_to_show):
	btn = gr.Button(
	model_name,
	variant="secondary",
	size="sm",
	elem_classes=["model-button", "model-button-failed"]
	)
	amd_buttons.append(btn)

	# Container for NVIDIA failures (hidden by default)
	nvidia_buttons = []
	with gr.Column(visible=False, elem_classes=["nvidia-failures-container"]) as nvidia_failures_container:
	nvidia_models_to_show = nvidia_failing_models + both_failing_models
	for model_name in sorted(nvidia_models_to_show):
	btn = gr.Button(
	model_name,
	variant="secondary",
	size="sm",
	elem_classes=["model-button", "model-button-failed"]
	)
	nvidia_buttons.append(btn)

	# Container for both AMD and NVIDIA failures (hidden by default)
	both_buttons = []
	with gr.Column(visible=False, elem_classes=["both-failures-container"]) as both_failures_container:
	all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models))
	for model_name in sorted(all_failing):
	btn = gr.Button(
	model_name,
	variant="secondary",
	size="sm",
	elem_classes=["model-button", "model-button-failed"]
	)
	both_buttons.append(btn)

	# CI job links at bottom of sidebar
	ci_links_display = gr.Markdown("🔗 CI Jobs: Loading...", elem_classes=["sidebar-links"])

	# Main content area
	with gr.Column(scale=4, elem_classes=["main-content"]):
	# Current view components
	with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
	# Summary view (contains summary plot and regressions panel)
	with gr.Column(visible=True, elem_classes=["summary-view"]) as summary_view:
	# Summary display (default view)
	summary_display = gr.Plot(
	value=create_summary_page(Ci_results.df, Ci_results.available_models),
	label="",
	format="png",
	elem_classes=["plot-container"],
	visible=True
	)

	# New Regressions section (at the bottom, collapsible)
	regressions_toggle_button = gr.Button(
	"► New Regressions",
	variant="secondary",
	elem_classes=["regressions-header"]
	)

	with gr.Column(elem_classes=["regressions-content", "regressions-content-hidden"]) as regressions_content:
	regressions_panel = gr.Markdown(
	value=get_regressions_text(),
	elem_classes=["regressions-panel"]
	)

	# Detailed view components (hidden by default)
	with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
	# Create the plot output
	plot_output = gr.Plot(
	label="",
	format="png",
	elem_classes=["plot-container"]
	)

	# Create two separate failed tests displays in a row layout
	with gr.Row():
	with gr.Column(scale=1):
	amd_failed_tests_output = gr.Textbox(
	value="",
	lines=8,
	max_lines=8,
	interactive=False,
	container=False,
	elem_classes=["failed-tests"]
	)
	with gr.Column(scale=1):
	nvidia_failed_tests_output = gr.Textbox(
	value="",
	lines=8,
	max_lines=8,
	interactive=False,
	container=False,
	elem_classes=["failed-tests"]
	)

	# Historical view components (hidden by default)
	with gr.Column(visible=False, elem_classes=["historical-view"]) as historical_view:


	# Time-series summary displays (multiple Gradio plots)
	time_series_failure_rates = gr.Plot(
	label="",
	elem_classes=["plot-container"]
	)

	time_series_amd_tests = gr.Plot(
	label="",
	elem_classes=["plot-container"]
	)

	time_series_nvidia_tests = gr.Plot(
	label="",
	elem_classes=["plot-container"]
	)

	# Time-series model view (hidden by default)
	with gr.Column(visible=False, elem_classes=["time-series-detail-view"]) as time_series_detail_view:
	# Time-series plots for specific model (with spacing)
	time_series_amd_model_plot = gr.Plot(
	label="",
	elem_classes=["plot-container"]
	)

	time_series_nvidia_model_plot = gr.Plot(
	label="",
	elem_classes=["plot-container"]
	)

	# Failing models filter functionality
	def filter_failing_models(show_amd, show_nvidia):
	"""Filter models based on AMD and/or NVIDIA failures.

	Logic:
	- Neither checked: show all models
	- AMD only: show models with AMD failures (including those with both)
	- NVIDIA only: show models with NVIDIA failures (including those with both)
	- Both checked: show all models with any failures
	"""
	show_all = not show_amd and not show_nvidia
	show_amd_only = show_amd and not show_nvidia
	show_nvidia_only = not show_amd and show_nvidia
	show_all_failures = show_amd and show_nvidia

	return (
	gr.update(visible=show_all), # all_models_container
	gr.update(visible=show_amd_only), # amd_failures_container
	gr.update(visible=show_nvidia_only), # nvidia_failures_container
	gr.update(visible=show_all_failures), # both_failures_container
	)

	for checkbox in [show_amd_failures, show_nvidia_failures]:
	checkbox.change(
	fn=filter_failing_models,
	inputs=[show_amd_failures, show_nvidia_failures],
	outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container]
	)

	# Regressions panel toggle functionality
	def toggle_regressions_panel(current_visible):
	"""Toggle the visibility of the regressions panel."""
	new_visible = not current_visible
	arrow = "▼" if new_visible else "►"
	button_text = f"{arrow} New Regressions"

	# Use CSS classes instead of Gradio visibility
	css_classes = ["regressions-content"]
	if new_visible:
	css_classes.append("regressions-content-visible")
	else:
	css_classes.append("regressions-content-hidden")

	return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible

	# Track regressions panel visibility state
	regressions_visible = gr.State(False)

	regressions_toggle_button.click(
	fn=toggle_regressions_panel,
	inputs=[regressions_visible],
	outputs=[regressions_toggle_button, regressions_content, regressions_visible]
	)

	# Model toggle functionality
	def toggle_model_list(current_visible):
	"""Toggle the visibility of the model list."""
	new_visible = not current_visible
	arrow = "▼" if new_visible else "►"
	button_text = f"{arrow} Select model ({len(Ci_results.available_models)})"

	# Use CSS classes instead of Gradio visibility
	css_classes = ["model-list"]
	if new_visible:
	css_classes.append("model-list-visible")
	else:
	css_classes.append("model-list-hidden")

	return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible

	# Track model list visibility state
	model_list_visible = gr.State(False)
	# Track last selected model for mode switches
	selected_model_state = gr.State(None)
	# Track whether current view is model detail (True) or summary (False)
	in_model_view_state = gr.State(False)

	model_toggle_button.click(
	fn=toggle_model_list,
	inputs=[model_list_visible],
	outputs=[model_toggle_button, model_list_container, model_list_visible]
	)


	# Unified summary handler: respects History toggle
	def handle_summary_click(history_mode: bool):
	description = get_description_text()
	links = get_ci_links()
	if history_mode:
	fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
	return (description, links, gr.update(visible=False), gr.update(visible=True),
	gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
	fr_plot, amd_plot, nvidia_plot, gr.update(visible=False), False, "")
	else:
	fig = create_summary_page(Ci_results.df, Ci_results.available_models)
	return (description, links, gr.update(visible=True), gr.update(visible=False),
	gr.update(visible=True), gr.update(value=fig, visible=True), gr.update(visible=False),
	gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
	gr.update(visible=False), False, "")

	summary_button.click(
	fn=handle_summary_click,
	inputs=[history_view_button],
	outputs=[
	description_display,
	ci_links_display,
	current_view,
	historical_view,
	summary_view,
	summary_display,
	detail_view,
	time_series_failure_rates,
	time_series_amd_tests,
	time_series_nvidia_tests,
	time_series_detail_view,
	in_model_view_state,
	selected_model_state,
	],
	)

	# Function to get CI job links
	def get_ci_links():
	"""Get CI job links from the most recent data."""
	try:
	# Check if df exists and is not empty
	if Ci_results.df is None or Ci_results.df.empty:
	return "🔗 CI Jobs: Loading..."

	# Get links from any available model (they should be the same for all models in a run)
	amd_multi_link = None
	amd_single_link = None
	nvidia_multi_link = None
	nvidia_single_link = None

	for model_name in Ci_results.df.index:
	row = Ci_results.df.loc[model_name]

	# Extract AMD links
	if pd.notna(row.get('job_link_amd')) and (not amd_multi_link or not amd_single_link):
	amd_link_raw = row.get('job_link_amd')
	if isinstance(amd_link_raw, dict):
	if 'multi' in amd_link_raw and not amd_multi_link:
	amd_multi_link = amd_link_raw['multi']
	if 'single' in amd_link_raw and not amd_single_link:
	amd_single_link = amd_link_raw['single']

	# Extract NVIDIA links
	if pd.notna(row.get('job_link_nvidia')) and (not nvidia_multi_link or not nvidia_single_link):
	nvidia_link_raw = row.get('job_link_nvidia')
	if isinstance(nvidia_link_raw, dict):
	if 'multi' in nvidia_link_raw and not nvidia_multi_link:
	nvidia_multi_link = nvidia_link_raw['multi']
	if 'single' in nvidia_link_raw and not nvidia_single_link:
	nvidia_single_link = nvidia_link_raw['single']

	# Break if we have all links
	if amd_multi_link and amd_single_link and nvidia_multi_link and nvidia_single_link:
	break


	# Add FAQ link at the bottom
	links_md = "❓ [FAQ](https://huggingface.co/spaces/transformers-community/transformers-ci-dashboard/blob/main/README.md)\n\n"
	links_md += "🔗 CI Jobs:\n\n"

	# AMD links
	if amd_multi_link or amd_single_link:
	links_md += "AMD:\n"
	if amd_multi_link:
	links_md += f"• [Multi GPU]({amd_multi_link})\n"
	if amd_single_link:
	links_md += f"• [Single GPU]({amd_single_link})\n"
	links_md += "\n"

	# NVIDIA links
	if nvidia_multi_link or nvidia_single_link:
	links_md += "NVIDIA:\n"
	if nvidia_multi_link:
	links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
	if nvidia_single_link:
	links_md += f"• [Single GPU]({nvidia_single_link})\n"

	if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
	links_md += "No links available"

	return links_md
	except Exception as e:
	logger.error(f"getting CI links: {e}")
	return "🔗 CI Jobs: Error loading links\n\n❓ [FAQ](README.md)"



	# Constants for Gradio updates
	HIDDEN = gr.update(visible=False)
	SHOWN = gr.update(visible=True)
	NOOP = gr.update()

	def get_historical_summary_plots():
	"""Get historical summary plots from preloaded data."""
	plots = create_time_series_summary_gradio(Ci_results.historical_df)
	return (
	gr.update(value=plots['failure_rates'], visible=True),
	gr.update(value=plots['amd_tests'], visible=True),
	gr.update(value=plots['nvidia_tests'], visible=True),
	)

	def show_time_series_model(selected_model):
	"""Show time-series view for a specific model."""
	plots = create_model_time_series_gradio(Ci_results.historical_df, selected_model)
	return (
	gr.update(value=plots['amd_plot'], visible=True),
	gr.update(value=plots['nvidia_plot'], visible=True),
	)

	def handle_history_toggle(history_mode, last_selected_model, in_model_view):
	"""Handle toggling between current and historical view."""
	if history_mode:
	# Historical mode: show model detail if in model view, otherwise summary
	if in_model_view and last_selected_model:
	amd_ts, nvidia_ts = show_time_series_model(last_selected_model)
	return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN,
	amd_ts, nvidia_ts, SHOWN, NOOP, NOOP, NOOP, True)

	fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
	return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, fr_plot, amd_plot, nvidia_plot,
	NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False)
	else:
	# Current mode: show model detail if available, otherwise summary
	if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
	fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data)
	return (SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN,
	NOOP, NOOP, HIDDEN, fig, amd_txt, nvidia_txt, True)

	fig = create_summary_page(Ci_results.df, Ci_results.available_models)
	return (SHOWN, HIDDEN, SHOWN, gr.update(value=fig, visible=True), HIDDEN,
	HIDDEN, HIDDEN, HIDDEN, NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False)

	def handle_model_click(selected_model: str, history_mode: bool):
	"""Handle clicking on a model button."""
	if history_mode:
	amd_ts, nvidia_ts = show_time_series_model(selected_model)
	return (NOOP, NOOP, NOOP, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN,
	HIDDEN, amd_ts, nvidia_ts, SHOWN, selected_model, True)

	fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data)
	return (fig, amd_txt, nvidia_txt, SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, NOOP, NOOP,
	NOOP, NOOP, NOOP, HIDDEN, selected_model, True)

	# Wire up history toggle
	history_view_button.change(
	fn=handle_history_toggle,
	inputs=[history_view_button, selected_model_state, in_model_view_state],
	outputs=[
	current_view, historical_view, summary_view, summary_display, detail_view,
	time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests,
	time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view,
	plot_output, amd_failed_tests_output, nvidia_failed_tests_output, in_model_view_state,
	],
	)

	# Define common outputs for model click handlers
	model_click_outputs = [
	plot_output, amd_failed_tests_output, nvidia_failed_tests_output,
	current_view, historical_view, summary_view, summary_display, detail_view,
	time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests,
	time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view,
	selected_model_state, in_model_view_state,
	]

	# Helper function to connect button clicks
	def connect_model_buttons(buttons, models):
	"""Connect a list of buttons to their corresponding models."""
	for btn, model_name in zip(buttons, models):
	btn.click(
	fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
	inputs=[history_view_button],
	outputs=model_click_outputs,
	)

	# Wire up all button groups
	connect_model_buttons(model_buttons, model_choices)
	connect_model_buttons(amd_buttons, sorted(amd_failing_models + both_failing_models))
	connect_model_buttons(nvidia_buttons, sorted(nvidia_failing_models + both_failing_models))
	connect_model_buttons(both_buttons, sorted(list(set(amd_failing_models + nvidia_failing_models + both_failing_models))))

	# Summary button click handler
	def show_summary_and_update_links():
	"""Show summary page and update CI links."""
	return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()

	# Auto-update summary, description, CI links, and regressions when the interface loads
	demo.load(
	fn=show_summary_and_update_links,
	outputs=[summary_display, description_display, ci_links_display]
	).then(
	fn=get_regressions_text,
	outputs=[regressions_panel]
	)


	# Gradio entrypoint
	if __name__ == "__main__":
	demo.launch()