Spaces:

transformers-community
/

transformers-ci-dashboard

Running

App Files Files Community

Add historical data visualization features

by badaoui HF Staff - opened 1 day ago

base: refs/heads/main

←

from: refs/pr/7

Discussion Files changed

+2637

-224

Files changed (10) hide show

app.py +692 -84
data.py +466 -13
logos/amd_logo.png +0 -0
logos/nvidia_logo.png +0 -0
model_page.py +44 -22
requirements.txt +2 -0
styles.css +416 -41
summary_page.py +145 -64
time_series.py +316 -0
time_series_gradio.py +556 -0

app.py CHANGED Viewed

@@ -2,11 +2,16 @@ import matplotlib.pyplot as plt
 import matplotlib
 import pandas as pd
 import gradio as gr
-from data import CIResults
 from utils import logger
 from summary_page import create_summary_page
 from model_page import plot_model_stats
 # Configure matplotlib to prevent memory warnings and set dark background
@@ -19,6 +24,12 @@ plt.ioff()  # Turn off interactive mode to prevent figure accumulation
 # Load data once at startup
 Ci_results = CIResults()
 Ci_results.load_data()
 # Start the auto-reload scheduler
 Ci_results.schedule_data_reload()
@@ -49,6 +60,34 @@ def model_has_failures(model_name):
         nvidia_single_failures > 0,
     ])
 # Function to get current description text
 def get_description_text():
@@ -66,6 +105,46 @@ def get_description_text():
         msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
     return "<br>".join(msg)
 # Load CSS from external file
 def load_css():
     try:
@@ -77,9 +156,19 @@ def load_css():
         logger.warning("styles.css not found, using minimal default styles")
         return "body { background: #000; color: #fff; }"
 # Create the Gradio interface with sidebar and dark theme
-with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cache=(3600, 3600)) as demo:
     with gr.Row():
@@ -91,7 +180,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
             description_text = get_description_text()
             description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
-            # Summary button at the top
             summary_button = gr.Button(
                 "summary\n📊",
                 variant="primary",
@@ -99,6 +188,14 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
                 elem_classes=["summary-button"]
             )
             # Model selection header (clickable toggle)
             model_toggle_button = gr.Button(
                 f"► Select model ({len(Ci_results.available_models)})",
@@ -108,82 +205,273 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
             # Model buttons container (collapsible) - start folded
             with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
                 # Create individual buttons for each model
                 model_buttons = []
                 model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
                 print(f"Creating {len(model_choices)} model buttons: {model_choices}")
                 for model_name in model_choices:
-                    # Check if model has failures to determine styling
-                    has_failures = model_has_failures(model_name)
-                    button_classes = ["model-button"]
-                    if has_failures:
-                        button_classes.append("model-button-failed")
-                    btn = gr.Button(
-                        model_name,
-                        variant="secondary",
-                        size="sm",
-                        elem_classes=button_classes
-                    )
-                    model_buttons.append(btn)
             # CI job links at bottom of sidebar
             ci_links_display = gr.Markdown("🔗 **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
         # Main content area
         with gr.Column(scale=4, elem_classes=["main-content"]):
-            # Summary display (default view)
-            summary_display = gr.Plot(
-                value=create_summary_page(Ci_results.df, Ci_results.available_models),
-                label="",
-                format="png",
-                elem_classes=["plot-container"],
-                visible=True
-            )
-            # Detailed view components (hidden by default)
-            with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
-                # Create the plot output
-                plot_output = gr.Plot(
                     label="",
-                    format="png",
                     elem_classes=["plot-container"]
                 )
-                # Create two separate failed tests displays in a row layout
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        amd_failed_tests_output = gr.Textbox(
-                            value="",
-                            lines=8,
-                            max_lines=8,
-                            interactive=False,
-                            container=False,
-                            elem_classes=["failed-tests"]
-                        )
-                    with gr.Column(scale=1):
-                        nvidia_failed_tests_output = gr.Textbox(
-                            value="",
-                            lines=8,
-                            max_lines=8,
-                            interactive=False,
-                            container=False,
-                            elem_classes=["failed-tests"]
-                        )
-    # Set up click handlers for model buttons
-    for i, btn in enumerate(model_buttons):
-        model_name = model_choices[i]
-        btn.click(
-            fn=lambda selected_model=model_name: plot_model_stats(Ci_results.df, selected_model),
-            outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
-        ).then(
-            fn=lambda: [gr.update(visible=False), gr.update(visible=True)],
-            outputs=[summary_display, detail_view]
-        )
     # Model toggle functionality
     def toggle_model_list(current_visible):
@@ -203,6 +491,10 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
     # Track model list visibility state
     model_list_visible = gr.State(False)
     model_toggle_button.click(
         fn=toggle_model_list,
@@ -210,17 +502,64 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
         outputs=[model_toggle_button, model_list_container, model_list_visible]
     )
-    # Summary button click handler
-    def show_summary_and_update_links():
-        """Show summary page and update CI links."""
-        return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()
     summary_button.click(
-        fn=show_summary_and_update_links,
-        outputs=[summary_display, description_display, ci_links_display]
-    ).then(
-        fn=lambda: [gr.update(visible=True), gr.update(visible=False)],
-        outputs=[summary_display, detail_view]
     )
     # Function to get CI job links
@@ -270,25 +609,19 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
             # AMD links
             if amd_multi_link or amd_single_link:
                 links_md += "**AMD:**\n"
-                if amd_multi_link == amd_single_link:
-                    links_md += f"• [Single and Multi GPU]({amd_multi_link})\n"
-                else:
-                    if amd_multi_link:
-                        links_md += f"• [Multi GPU]({amd_multi_link})\n"
-                    if amd_single_link:
-                        links_md += f"• [Single GPU]({amd_single_link})\n"
                 links_md += "\n"
             # NVIDIA links
             if nvidia_multi_link or nvidia_single_link:
                 links_md += "**NVIDIA:**\n"
-                if nvidia_single_link == nvidia_multi_link:
-                    links_md += f"• [Single and Multi GPU]({nvidia_multi_link})\n"
-                else:
-                    if nvidia_multi_link:
-                        links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
-                    if nvidia_single_link:
-                        links_md += f"• [Single GPU]({nvidia_single_link})\n"
             if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
                 links_md += "*No links available*"
@@ -299,10 +632,285 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
             return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
-    # Auto-update CI links when the interface loads
     demo.load(
-        fn=get_ci_links,
-        outputs=[ci_links_display]
     )

 import matplotlib
 import pandas as pd
 import gradio as gr
+from gradio_toggle import Toggle
+from data import CIResults, find_new_regressions
 from utils import logger
 from summary_page import create_summary_page
 from model_page import plot_model_stats
+from time_series_gradio import (
+    create_time_series_summary_gradio,
+    create_model_time_series_gradio,
+)
 # Configure matplotlib to prevent memory warnings and set dark background
 # Load data once at startup
 Ci_results = CIResults()
 Ci_results.load_data()
+# Preload historical data at startup
+if Ci_results.available_dates:
+    start_date_val = Ci_results.available_dates[-1]  # Last date (oldest)
+    end_date_val = Ci_results.available_dates[0]     # First date (newest)
+    Ci_results.load_historical_data(start_date_val, end_date_val)
+    logger.info(f"Preloaded historical data: {len(Ci_results.historical_df)} records")
 # Start the auto-reload scheduler
 Ci_results.schedule_data_reload()
         nvidia_single_failures > 0,
     ])
+def model_has_amd_failures(model_name):
+    """Check if a model has AMD failures."""
+    if Ci_results.df is None or Ci_results.df.empty:
+        return False
+    model_name_lower = model_name.lower()
+    if model_name_lower not in Ci_results.df.index:
+        return False
+    row = Ci_results.df.loc[model_name_lower]
+    amd_multi_failures = row.get('failed_multi_no_amd', 0)
+    amd_single_failures = row.get('failed_single_no_amd', 0)
+    return amd_multi_failures > 0 or amd_single_failures > 0
+def model_has_nvidia_failures(model_name):
+    """Check if a model has NVIDIA failures."""
+    if Ci_results.df is None or Ci_results.df.empty:
+        return False
+    model_name_lower = model_name.lower()
+    if model_name_lower not in Ci_results.df.index:
+        return False
+    row = Ci_results.df.loc[model_name_lower]
+    nvidia_multi_failures = row.get('failed_multi_no_nvidia', 0)
+    nvidia_single_failures = row.get('failed_single_no_nvidia', 0)
+    return nvidia_multi_failures > 0 or nvidia_single_failures > 0
 # Function to get current description text
 def get_description_text():
         msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
     return "<br>".join(msg)
+# Function to format new regressions for display
+def get_regressions_text():
+    """Get formatted text for new regressions panel."""
+    try:
+        regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data)
+        if not regressions:
+            return "### 🎉 No New Regressions\nAll failures were present in the previous run."
+        # Group by model and device
+        grouped = {}
+        for reg in regressions:
+            model = reg['model']
+            device = reg['device'].upper()
+            gpu_type = reg['gpu_type']
+            test = reg['test']
+            key = f"{model} ({device} {gpu_type})"
+            if key not in grouped:
+                grouped[key] = []
+            grouped[key].append(test)
+        # Format output
+        lines = [f"### ⚠️ New Regressions Detected: {len(regressions)} failure(s)"]
+        lines.append("")
+        for key in sorted(grouped.keys()):
+            tests = grouped[key]
+            lines.append(f"**{key}:**")
+            for test in tests[:5]:  # Limit to 5 tests per model
+                lines.append(f"  • {test}")
+            if len(tests) > 5:
+                lines.append(f"  • ... and {len(tests) - 5} more")
+            lines.append("")
+        return "\n".join(lines)
+    except Exception as e:
+        logger.error(f"Error getting regressions: {e}")
+        return "### ⚠️ New Regressions\n*Unable to load regression data*"
 # Load CSS from external file
 def load_css():
     try:
         logger.warning("styles.css not found, using minimal default styles")
         return "body { background: #000; color: #fff; }"
+js_func = """
+function refresh() {
+    const url = new URL(window.location);
+    if (url.searchParams.get('__theme') !== 'dark') {
+        url.searchParams.set('__theme', 'dark');
+        window.location.href = url.href;
+    }
+}
+"""
 # Create the Gradio interface with sidebar and dark theme
+with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func) as demo:
     with gr.Row():
             description_text = get_description_text()
             description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
+            # Summary button (for current view)
             summary_button = gr.Button(
                 "summary\n📊",
                 variant="primary",
                 elem_classes=["summary-button"]
             )
+            history_view_button = Toggle(
+                label="History view",
+                value=False,
+                interactive=True,
+                elem_classes=["history-view-button"]
+            )
             # Model selection header (clickable toggle)
             model_toggle_button = gr.Button(
                 f"► Select model ({len(Ci_results.available_models)})",
             # Model buttons container (collapsible) - start folded
             with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
+                # Toggles for filtering failing models by device
+                with gr.Row(elem_classes=["failing-models-filter-row"]):
+                    show_amd_failures = gr.Checkbox(
+                        label="AMD failures",
+                        value=False,
+                        interactive=True,
+                        elem_classes=["failing-models-toggle", "amd-toggle"]
+                    )
+                    show_nvidia_failures = gr.Checkbox(
+                        label="NVIDIA failures",
+                        value=False,
+                        interactive=True,
+                        elem_classes=["failing-models-toggle", "nvidia-toggle"]
+                    )
                 # Create individual buttons for each model
                 model_buttons = []
                 model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
+                # Categorize models by failure type
+                amd_failing_models = []
+                nvidia_failing_models = []
+                both_failing_models = []
+                passing_models = []
                 print(f"Creating {len(model_choices)} model buttons: {model_choices}")
                 for model_name in model_choices:
+                    has_amd = model_has_amd_failures(model_name)
+                    has_nvidia = model_has_nvidia_failures(model_name)
+                    if has_amd and has_nvidia:
+                        both_failing_models.append(model_name)
+                    elif has_amd:
+                        amd_failing_models.append(model_name)
+                    elif has_nvidia:
+                        nvidia_failing_models.append(model_name)
+                    else:
+                        passing_models.append(model_name)
+                # Container for all models (visible by default)
+                with gr.Column(visible=True, elem_classes=["all-models-container"]) as all_models_container:
+                    for model_name in model_choices:
+                        has_failures = model_has_failures(model_name)
+                        button_classes = ["model-button"]
+                        if has_failures:
+                            button_classes.append("model-button-failed")
+                        btn = gr.Button(
+                            model_name,
+                            variant="secondary",
+                            size="sm",
+                            elem_classes=button_classes
+                        )
+                        model_buttons.append(btn)
+                # Container for AMD failures (hidden by default)
+                amd_buttons = []
+                with gr.Column(visible=False, elem_classes=["amd-failures-container"]) as amd_failures_container:
+                    amd_models_to_show = amd_failing_models + both_failing_models
+                    for model_name in sorted(amd_models_to_show):
+                        btn = gr.Button(
+                            model_name,
+                            variant="secondary",
+                            size="sm",
+                            elem_classes=["model-button", "model-button-failed"]
+                        )
+                        amd_buttons.append(btn)
+                # Container for NVIDIA failures (hidden by default)
+                nvidia_buttons = []
+                with gr.Column(visible=False, elem_classes=["nvidia-failures-container"]) as nvidia_failures_container:
+                    nvidia_models_to_show = nvidia_failing_models + both_failing_models
+                    for model_name in sorted(nvidia_models_to_show):
+                        btn = gr.Button(
+                            model_name,
+                            variant="secondary",
+                            size="sm",
+                            elem_classes=["model-button", "model-button-failed"]
+                        )
+                        nvidia_buttons.append(btn)
+                # Container for both AMD and NVIDIA failures (hidden by default)
+                both_buttons = []
+                with gr.Column(visible=False, elem_classes=["both-failures-container"]) as both_failures_container:
+                    all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models))
+                    for model_name in sorted(all_failing):
+                        btn = gr.Button(
+                            model_name,
+                            variant="secondary",
+                            size="sm",
+                            elem_classes=["model-button", "model-button-failed"]
+                        )
+                        both_buttons.append(btn)
             # CI job links at bottom of sidebar
             ci_links_display = gr.Markdown("🔗 **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
         # Main content area
         with gr.Column(scale=4, elem_classes=["main-content"]):
+            # Current view components
+            with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
+                # Summary view (contains summary plot and regressions panel)
+                with gr.Column(visible=True, elem_classes=["summary-view"]) as summary_view:
+                    # Summary display (default view)
+                    summary_display = gr.Plot(
+                        value=create_summary_page(Ci_results.df, Ci_results.available_models),
+                        label="",
+                        format="png",
+                        elem_classes=["plot-container"],
+                        visible=True
+                    )
+                    # New Regressions section (at the bottom, collapsible)
+                    regressions_toggle_button = gr.Button(
+                        "► New Regressions",
+                        variant="secondary",
+                        elem_classes=["regressions-header"]
+                    )
+                    with gr.Column(elem_classes=["regressions-content", "regressions-content-hidden"]) as regressions_content:
+                        regressions_panel = gr.Markdown(
+                            value=get_regressions_text(),
+                            elem_classes=["regressions-panel"]
+                        )
+                # Detailed view components (hidden by default)
+                with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
+                    # Create the plot output
+                    plot_output = gr.Plot(
+                        label="",
+                        format="png",
+                        elem_classes=["plot-container"]
+                    )
+                    # Create two separate failed tests displays in a row layout
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            amd_failed_tests_output = gr.Textbox(
+                                value="",
+                                lines=8,
+                                max_lines=8,
+                                interactive=False,
+                                container=False,
+                                elem_classes=["failed-tests"]
+                            )
+                        with gr.Column(scale=1):
+                            nvidia_failed_tests_output = gr.Textbox(
+                                value="",
+                                lines=8,
+                                max_lines=8,
+                                interactive=False,
+                                container=False,
+                                elem_classes=["failed-tests"]
+                            )
+            # Historical view components (hidden by default)
+            with gr.Column(visible=False, elem_classes=["historical-view"]) as historical_view:
+                # Time-series summary displays (multiple Gradio plots)
+                time_series_failure_rates = gr.Plot(
+                    label="",
+                    elem_classes=["plot-container"]
+                )
+                time_series_amd_tests = gr.Plot(
+                    label="",
+                    elem_classes=["plot-container"]
+                )
+                time_series_nvidia_tests = gr.Plot(
                     label="",
                     elem_classes=["plot-container"]
                 )
+                # Time-series model view (hidden by default)
+                with gr.Column(visible=False, elem_classes=["time-series-detail-view"]) as time_series_detail_view:
+                    # Time-series plots for specific model (with spacing)
+                    time_series_amd_model_plot = gr.Plot(
+                        label="",
+                        elem_classes=["plot-container"]
+                    )
+                    time_series_nvidia_model_plot = gr.Plot(
+                        label="",
+                        elem_classes=["plot-container"]
+                    )
+    # Failing models filter functionality
+    def filter_failing_models(show_amd, show_nvidia):
+        """Filter models based on AMD and/or NVIDIA failures.
+        Logic:
+        - Neither checked: show all models
+        - AMD only: show models with AMD failures (including those with both)
+        - NVIDIA only: show models with NVIDIA failures (including those with both)
+        - Both checked: show all models with any failures
+        """
+        if not show_amd and not show_nvidia:
+            # Show all models container
+            return (
+                gr.update(visible=True),   # all_models_container
+                gr.update(visible=False),  # amd_failures_container
+                gr.update(visible=False),  # nvidia_failures_container
+                gr.update(visible=False),  # both_failures_container
+            )
+        elif show_amd and not show_nvidia:
+            # Show AMD failures only
+            return (
+                gr.update(visible=False),  # all_models_container
+                gr.update(visible=True),   # amd_failures_container
+                gr.update(visible=False),  # nvidia_failures_container
+                gr.update(visible=False),  # both_failures_container
+            )
+        elif not show_amd and show_nvidia:
+            # Show NVIDIA failures only
+            return (
+                gr.update(visible=False),  # all_models_container
+                gr.update(visible=False),  # amd_failures_container
+                gr.update(visible=True),   # nvidia_failures_container
+                gr.update(visible=False),  # both_failures_container
+            )
+        else:
+            # Show all failures
+            return (
+                gr.update(visible=False),  # all_models_container
+                gr.update(visible=False),  # amd_failures_container
+                gr.update(visible=False),  # nvidia_failures_container
+                gr.update(visible=True),   # both_failures_container
+            )
+    # Connect both checkboxes to the filter function
+    show_amd_failures.change(
+        fn=filter_failing_models,
+        inputs=[show_amd_failures, show_nvidia_failures],
+        outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container]
+    )
+    show_nvidia_failures.change(
+        fn=filter_failing_models,
+        inputs=[show_amd_failures, show_nvidia_failures],
+        outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container]
+    )
+    # Regressions panel toggle functionality
+    def toggle_regressions_panel(current_visible):
+        """Toggle the visibility of the regressions panel."""
+        new_visible = not current_visible
+        arrow = "▼" if new_visible else "►"
+        button_text = f"{arrow} New Regressions"
+        # Use CSS classes instead of Gradio visibility
+        css_classes = ["regressions-content"]
+        if new_visible:
+            css_classes.append("regressions-content-visible")
+        else:
+            css_classes.append("regressions-content-hidden")
+        return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible
+    # Track regressions panel visibility state
+    regressions_visible = gr.State(False)
+    regressions_toggle_button.click(
+        fn=toggle_regressions_panel,
+        inputs=[regressions_visible],
+        outputs=[regressions_toggle_button, regressions_content, regressions_visible]
+    )
     # Model toggle functionality
     def toggle_model_list(current_visible):
     # Track model list visibility state
     model_list_visible = gr.State(False)
+    # Track last selected model for mode switches
+    selected_model_state = gr.State(None)
+    # Track whether current view is model detail (True) or summary (False)
+    in_model_view_state = gr.State(False)
     model_toggle_button.click(
         fn=toggle_model_list,
         outputs=[model_toggle_button, model_list_container, model_list_visible]
     )
+    # Unified summary handler: respects History toggle
+    def handle_summary_click(history_mode: bool):
+        description = get_description_text()
+        links = get_ci_links()
+        fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
+        if history_mode:
+            return (
+                description,
+                links,
+                gr.update(visible=False),  # current_view
+                gr.update(visible=True),   # historical_view
+                gr.update(visible=False),  # summary_view
+                gr.update(visible=False),  # summary_display
+                gr.update(visible=False),  # detail_view
+                fr_plot,
+                amd_plot,
+                nvidia_plot,
+                gr.update(visible=False),  # time_series_detail_view
+                False,                     # in_model_view_state
+                "",                        # selected_model_state (clear it)
+            )
+        else:
+            fig = create_summary_page(Ci_results.df, Ci_results.available_models)
+            return (
+                description,
+                links,
+                gr.update(visible=True),   # current_view
+                gr.update(visible=False),  # historical_view
+                gr.update(visible=True),   # summary_view
+                gr.update(value=fig, visible=True), # summary_display
+                gr.update(visible=False),  # detail_view
+                gr.update(visible=False),  # time_series_failure_rates
+                gr.update(visible=False),  # time_series_amd_tests
+                gr.update(visible=False),  # time_series_nvidia_tests
+                gr.update(visible=False),  # time_series_detail_view
+                False,                     # in_model_view_state
+                "",                        # selected_model_state (clear it)
+            )
     summary_button.click(
+        fn=handle_summary_click,
+        inputs=[history_view_button],
+        outputs=[
+            description_display,
+            ci_links_display,
+            current_view,
+            historical_view,
+            summary_view,
+            summary_display,
+            detail_view,
+            time_series_failure_rates,
+            time_series_amd_tests,
+            time_series_nvidia_tests,
+            time_series_detail_view,
+            in_model_view_state,
+            selected_model_state,
+        ],
     )
     # Function to get CI job links
             # AMD links
             if amd_multi_link or amd_single_link:
                 links_md += "**AMD:**\n"
+                if amd_multi_link:
+                    links_md += f"• [Multi GPU]({amd_multi_link})\n"
+                if amd_single_link:
+                    links_md += f"• [Single GPU]({amd_single_link})\n"
                 links_md += "\n"
             # NVIDIA links
             if nvidia_multi_link or nvidia_single_link:
                 links_md += "**NVIDIA:**\n"
+                if nvidia_multi_link:
+                    links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
+                if nvidia_single_link:
+                    links_md += f"• [Single GPU]({nvidia_single_link})\n"
             if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
                 links_md += "*No links available*"
             return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
+    def get_historical_summary_plots():
+        """Get historical summary plots from preloaded data."""
+        plots = create_time_series_summary_gradio(Ci_results.historical_df)
+        return (
+            gr.update(value=plots['failure_rates'], visible=True),
+            gr.update(value=plots['amd_tests'], visible=True),
+            gr.update(value=plots['nvidia_tests'], visible=True),
+        )
+    def handle_history_toggle(history_mode, last_selected_model, in_model_view):
+        if history_mode:
+            # If currently in model view and valid model, show historical model detail
+            if in_model_view and last_selected_model:
+                amd_ts, nvidia_ts = show_time_series_model(last_selected_model)
+                return (
+                    gr.update(visible=False),   # current_view
+                    gr.update(visible=True),    # historical_view
+                    gr.update(visible=False),   # summary_view
+                    gr.update(visible=False),   # summary_display
+                    gr.update(visible=False),   # detail_view
+                    gr.update(visible=False),   # time_series_failure_rates
+                    gr.update(visible=False),   # time_series_amd_tests
+                    gr.update(visible=False),   # time_series_nvidia_tests
+                    amd_ts,                     # time_series_amd_model_plot
+                    nvidia_ts,                  # time_series_nvidia_model_plot
+                    gr.update(visible=True),    # time_series_detail_view
+                    gr.update(),                # plot_output
+                    gr.update(),                # amd_failed_tests_output
+                    gr.update(),                # nvidia_failed_tests_output
+                    True,                       # in_model_view_state (still in model view)
+                )
+            # Otherwise show historical summary
+            fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
+            return (
+                gr.update(visible=False),   # current_view
+                gr.update(visible=True),    # historical_view
+                gr.update(visible=False),   # summary_view
+                gr.update(visible=False),   # summary_display
+                gr.update(visible=False),   # detail_view
+                fr_plot,                    # time_series_failure_rates (value + keep visibility)
+                amd_plot,                   # time_series_amd_tests
+                nvidia_plot,                # time_series_nvidia_tests
+                gr.update(),                # time_series_amd_model_plot
+                gr.update(),                # time_series_nvidia_model_plot
+                gr.update(visible=False),   # time_series_detail_view
+                gr.update(),                # plot_output
+                gr.update(),                # amd_failed_tests_output
+                gr.update(),                # nvidia_failed_tests_output
+                False,                      # in_model_view_state
+            )
+        else:
+            # Switch to current mode: show model if selected; otherwise summary
+            if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
+                fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data)
+                return (
+                    gr.update(visible=True),    # current_view
+                    gr.update(visible=False),   # historical_view
+                    gr.update(visible=False),   # summary_view
+                    gr.update(visible=False),   # summary_display
+                    gr.update(visible=True),    # detail_view
+                    gr.update(visible=False),   # time_series_failure_rates
+                    gr.update(visible=False),   # time_series_amd_tests
+                    gr.update(visible=False),   # time_series_nvidia_tests
+                    gr.update(),                # time_series_amd_model_plot
+                    gr.update(),                # time_series_nvidia_model_plot
+                    gr.update(visible=False),   # time_series_detail_view
+                    fig,                        # plot_output
+                    amd_txt,                    # amd_failed_tests_output
+                    nvidia_txt,                 # nvidia_failed_tests_output
+                    True,                       # in_model_view_state
+                )
+            else:
+                fig = create_summary_page(Ci_results.df, Ci_results.available_models)
+                return (
+                    gr.update(visible=True),    # current_view
+                    gr.update(visible=False),   # historical_view
+                    gr.update(visible=True),    # summary_view
+                    gr.update(value=fig, visible=True),  # summary_display
+                    gr.update(visible=False),   # detail_view
+                    gr.update(visible=False),   # time_series_failure_rates
+                    gr.update(visible=False),   # time_series_amd_tests
+                    gr.update(visible=False),   # time_series_nvidia_tests
+                    gr.update(),                # time_series_amd_model_plot
+                    gr.update(),                # time_series_nvidia_model_plot
+                    gr.update(visible=False),   # time_series_detail_view
+                    gr.update(),                # plot_output
+                    gr.update(),                # amd_failed_tests_output
+                    gr.update(),                # nvidia_failed_tests_output
+                    False,                      # in_model_view_state
+                )
+    history_view_button.change(
+        fn=handle_history_toggle,
+        inputs=[history_view_button, selected_model_state, in_model_view_state],
+        outputs=[
+            current_view,
+            historical_view,
+            summary_view,
+            summary_display,
+            detail_view,
+            time_series_failure_rates,
+            time_series_amd_tests,
+            time_series_nvidia_tests,
+            time_series_amd_model_plot,
+            time_series_nvidia_model_plot,
+            time_series_detail_view,
+            plot_output,
+            amd_failed_tests_output,
+            nvidia_failed_tests_output,
+            in_model_view_state,
+        ],
+    )
+    # Time-series model selection functionality
+    def show_time_series_model(selected_model):
+        """Show time-series view for a specific model."""
+        plots = create_model_time_series_gradio(Ci_results.historical_df, selected_model)
+        return (
+            gr.update(value=plots['amd_plot'], visible=True),
+            gr.update(value=plots['nvidia_plot'], visible=True),
+        )
+    # Unified model click handler: respects History toggle
+    def handle_model_click(selected_model: str, history_mode: bool):
+        if history_mode:
+            amd_ts, nvidia_ts = show_time_series_model(selected_model)
+            return (
+                gr.update(),                 # plot_output
+                gr.update(),                 # amd_failed_tests_output
+                gr.update(),                 # nvidia_failed_tests_output
+                gr.update(visible=False),    # current_view
+                gr.update(visible=True),     # historical_view
+                gr.update(visible=False),    # summary_view
+                gr.update(visible=False),    # summary_display
+                gr.update(visible=False),    # detail_view
+                gr.update(visible=False),    # time_series_failure_rates
+                gr.update(visible=False),    # time_series_amd_tests
+                gr.update(visible=False),    # time_series_nvidia_tests
+                amd_ts,                      # time_series_amd_model_plot
+                nvidia_ts,                   # time_series_nvidia_model_plot
+                gr.update(visible=True),     # time_series_detail_view
+            selected_model, True)            # selected_model_state, in_model_view_state
+        else:
+            fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data)
+            return (
+                fig,
+                amd_txt,
+                nvidia_txt,
+                gr.update(visible=True),     # current_view
+                gr.update(visible=False),    # historical_view
+                gr.update(visible=False),    # summary_view
+                gr.update(visible=False),    # summary_display
+                gr.update(visible=True),     # detail_view
+                gr.update(),                 # time_series_failure_rates
+                gr.update(),                 # time_series_amd_tests
+                gr.update(),                 # time_series_nvidia_tests
+                gr.update(),                 # time_series_amd_model_plot
+                gr.update(),                 # time_series_nvidia_model_plot
+                gr.update(visible=False),    # time_series_detail_view
+            selected_model, True)            # selected_model_state, in_model_view_state
+    # Wire up all model buttons
+    for i, btn in enumerate(model_buttons):
+        model_name = model_choices[i]
+        btn.click(
+            fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
+            inputs=[history_view_button],
+            outputs=[
+                plot_output,
+                amd_failed_tests_output,
+                nvidia_failed_tests_output,
+                current_view,
+                historical_view,
+                summary_view,
+                summary_display,
+                detail_view,
+                time_series_failure_rates,
+                time_series_amd_tests,
+                time_series_nvidia_tests,
+                time_series_amd_model_plot,
+                time_series_nvidia_model_plot,
+                time_series_detail_view,
+                selected_model_state,
+                in_model_view_state,
+            ],
+        )
+    # Wire up AMD failing model buttons
+    amd_models_to_show = amd_failing_models + both_failing_models
+    for i, btn in enumerate(amd_buttons):
+        model_name = sorted(amd_models_to_show)[i]
+        btn.click(
+            fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
+            inputs=[history_view_button],
+            outputs=[
+                plot_output,
+                amd_failed_tests_output,
+                nvidia_failed_tests_output,
+                current_view,
+                historical_view,
+                summary_view,
+                summary_display,
+                detail_view,
+                time_series_failure_rates,
+                time_series_amd_tests,
+                time_series_nvidia_tests,
+                time_series_amd_model_plot,
+                time_series_nvidia_model_plot,
+                time_series_detail_view,
+                selected_model_state,
+                in_model_view_state,
+            ],
+        )
+    # Wire up NVIDIA failing model buttons
+    nvidia_models_to_show = nvidia_failing_models + both_failing_models
+    for i, btn in enumerate(nvidia_buttons):
+        model_name = sorted(nvidia_models_to_show)[i]
+        btn.click(
+            fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
+            inputs=[history_view_button],
+            outputs=[
+                plot_output,
+                amd_failed_tests_output,
+                nvidia_failed_tests_output,
+                current_view,
+                historical_view,
+                summary_view,
+                summary_display,
+                detail_view,
+                time_series_failure_rates,
+                time_series_amd_tests,
+                time_series_nvidia_tests,
+                time_series_amd_model_plot,
+                time_series_nvidia_model_plot,
+                time_series_detail_view,
+                selected_model_state,
+                in_model_view_state,
+            ],
+        )
+    # Wire up both failures model buttons
+    all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models))
+    for i, btn in enumerate(both_buttons):
+        model_name = sorted(all_failing)[i]
+        btn.click(
+            fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
+            inputs=[history_view_button],
+            outputs=[
+                plot_output,
+                amd_failed_tests_output,
+                nvidia_failed_tests_output,
+                current_view,
+                historical_view,
+                summary_view,
+                summary_display,
+                detail_view,
+                time_series_failure_rates,
+                time_series_amd_tests,
+                time_series_nvidia_tests,
+                time_series_amd_model_plot,
+                time_series_nvidia_model_plot,
+                time_series_detail_view,
+                selected_model_state,
+                in_model_view_state,
+            ],
+        )
+    # Auto-update CI links and regressions when the interface loads
+    def load_dashboard_data():
+        """Load both CI links and regressions data."""
+        return get_ci_links(), get_regressions_text()
     demo.load(
+        fn=load_dashboard_data,
+        outputs=[ci_links_display, regressions_panel]
     )

data.py CHANGED Viewed

@@ -1,10 +1,12 @@
 from huggingface_hub import HfFileSystem
 import pandas as pd
 from utils import logger
 import threading
 import traceback
 import json
 import re
 # NOTE: if caching is an issue, try adding `use_listings_cache=False`
 fs = HfFileSystem()
@@ -60,6 +62,8 @@ def log_dataframe_link(link: str) -> str:
     Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
     report.
     """
     logger.info(f"Reading df located at {link}")
     # Make sure the links starts with an http adress
     if link.startswith("hf://"):
@@ -102,26 +106,181 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
     df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
     return df, df_upload_date
-def get_first_working_df(file_list: list[str]) -> str:
-    for file in file_list:
-        job_links = file.rsplit('/', 1)[0] + "/job_links.json"
         try:
-            links = pd.read_json(f"hf://{job_links}", typ="series")
-            has_one_working_link = any(links.values)
         except Exception as e:
-            logger.error(f"Could not read job links from {job_links}: {e}")
-            has_one_working_link = False
-        if has_one_working_link:
-            return file
-        logger.warning(f"Skipping {file} as it has no working job links.")
-    raise RuntimeError("Could not find any working dataframe in the provided list.")
 def get_distant_data() -> tuple[pd.DataFrame, str]:
     # Retrieve AMD dataframe
     amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
     files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
-    file_amd = get_first_working_df(files_amd)
-    df_amd, date_df_amd = read_one_dataframe(f"hf://{file_amd}", "amd")
     # Retrieve NVIDIA dataframe, which pattern should be:
     # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
     nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
@@ -161,9 +320,229 @@ def get_sample_data() -> tuple[pd.DataFrame, str]:
     filtered_joined.index = "sample_" + filtered_joined.index
     return filtered_joined, "sample data was loaded"
 def safe_extract(row: pd.DataFrame, key: str) -> int:
     return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
     # Handle missing values and get counts directly from dataframe
@@ -203,6 +582,10 @@ class CIResults:
         self.df = pd.DataFrame()
         self.available_models = []
         self.latest_update_msg = ""
     def load_data(self) -> None:
         """Load data from the data source."""
@@ -211,6 +594,13 @@ class CIResults:
             logger.info("Loading distant data...")
             new_df, latest_update_msg = get_distant_data()
             self.latest_update_msg = latest_update_msg
         except Exception as e:
             error_msg = [
                 "Loading data failed:",
@@ -220,11 +610,18 @@ class CIResults:
                 "Falling back on sample data."
             ]
             logger.error("\n".join(error_msg))
             new_df, latest_update_msg = get_sample_data()
             self.latest_update_msg = latest_update_msg
         # Update attributes
         self.df = new_df
         self.available_models = new_df.index.tolist()
         # Log and return distant load status
         logger.info(f"Data loaded successfully: {len(self.available_models)} models")
         logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
@@ -242,6 +639,62 @@ class CIResults:
                 msg[model][col] = value
         logger.info(json.dumps(msg, indent=4))
     def schedule_data_reload(self):
         """Schedule the next data reload."""
         def reload_data():

 from huggingface_hub import HfFileSystem
 import pandas as pd
 from utils import logger
+from datetime import datetime, timedelta
 import threading
 import traceback
 import json
 import re
+from typing import List, Tuple, Optional
 # NOTE: if caching is an issue, try adding `use_listings_cache=False`
 fs = HfFileSystem()
     Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
     report.
     """
+    if link.startswith("sample_"):
+        return "9999-99-99"
     logger.info(f"Reading df located at {link}")
     # Make sure the links starts with an http adress
     if link.startswith("hf://"):
     df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
     return df, df_upload_date
+def get_available_dates() -> List[str]:
+    """Get list of available dates from both AMD and NVIDIA datasets."""
+    try:
+        # Get AMD dates - the path structure is: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
+        amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
+        files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
+        logger.info(f"Found {len(files_amd)} AMD files")
+        # Get NVIDIA dates - structure is: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
+        nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
+        files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
+        logger.info(f"Found {len(files_nvidia)} NVIDIA files")
+        # Extract dates from file paths
+        amd_dates = set()
+        for file_path in files_amd:
+            # Pattern to match the date in the AMD path: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
+            pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
+            match = re.search(pattern, file_path)
+            if match:
+                amd_dates.add(match.group(1))
+            else:
+                # Log unmatched paths for debugging
+                logger.debug(f"AMD file path didn't match pattern: {file_path}")
+        # Log a few example AMD file paths for debugging
+        if files_amd:
+            logger.info(f"Example AMD file paths: {files_amd[:3]}")
+        nvidia_dates = set()
+        for file_path in files_nvidia:
+            # Pattern to match the date in the NVIDIA path: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
+            pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
+            match = re.search(pattern, file_path)
+            if match:
+                nvidia_dates.add(match.group(1))
+        logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...")  # Show first 5
+        logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...")  # Show first 5
+        # Return intersection of both datasets (dates where both have data)
+        common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
+        logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
+        if common_dates:
+            return common_dates[:30]  # Limit to last 30 days for performance
+        else:
+            # If no real dates available, generate fake dates for the last 7 days
+            logger.warning("No real dates available, generating fake dates for demo purposes")
+            fake_dates = []
+            today = datetime.now()
+            for i in range(7):
+                date = today - timedelta(days=i)
+                fake_dates.append(date.strftime("%Y-%m-%d"))
+            return fake_dates
+    except Exception as e:
+        logger.error(f"Error getting available dates: {e}")
+        # Generate fake dates when there's an error
+        logger.info("Generating fake dates due to error")
+        fake_dates = []
+        today = datetime.now()
+        for i in range(7):
+            date = today - timedelta(days=i)
+            fake_dates.append(date.strftime("%Y-%m-%d"))
+        return fake_dates
+def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
+    """Get data for a specific date."""
+    try:
+        # For AMD, we need to find the specific run file for the date
+        # AMD structure: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
+        amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/runs/*/ci_results_run_models_gpu/model_results.json"
+        amd_files = fs.glob(amd_src, refresh=True)
+        if not amd_files:
+            raise FileNotFoundError(f"No AMD data found for date {target_date}")
+        # Use the first (most recent) run for the date
+        amd_file = amd_files[0]
+        # Ensure the AMD file path has the hf:// prefix
+        if not amd_file.startswith("hf://"):
+            amd_file = f"hf://{amd_file}"
+        # NVIDIA structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
+        nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
+        # Read dataframes - try each platform independently
+        df_amd = pd.DataFrame()
+        df_nvidia = pd.DataFrame()
         try:
+            df_amd, _ = read_one_dataframe(amd_file, "amd")
+            logger.info(f"Successfully loaded AMD data for {target_date}")
         except Exception as e:
+            logger.warning(f"Failed to load AMD data for {target_date}: {e}")
+        try:
+            df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
+            logger.info(f"Successfully loaded NVIDIA data for {target_date}")
+        except Exception as e:
+            logger.warning(f"Failed to load NVIDIA data for {target_date}: {e}")
+        # If both failed, return empty dataframe
+        if df_amd.empty and df_nvidia.empty:
+            logger.warning(f"No data available for either platform on {target_date}")
+            return pd.DataFrame(), target_date
+        # Join both dataframes (outer join to include data from either platform)
+        if not df_amd.empty and not df_nvidia.empty:
+            joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
+        elif not df_amd.empty:
+            joined = df_amd.copy()
+        else:
+            joined = df_nvidia.copy()
+        joined = joined[KEYS_TO_KEEP]
+        joined.index = joined.index.str.replace("^models_", "", regex=True)
+        # Filter out all but important models
+        important_models_lower = [model.lower() for model in IMPORTANT_MODELS]
+        filtered_joined = joined[joined.index.str.lower().isin(important_models_lower)]
+        return filtered_joined, target_date
+    except Exception as e:
+        logger.error(f"Error getting data for date {target_date}: {e}")
+        # Return empty dataframe instead of sample data for historical functionality
+        return pd.DataFrame(), target_date
+def get_historical_data(start_date: str, end_date: str, sample_data = False) -> pd.DataFrame:
+    """Get historical data for a date range."""
+    if sample_data:
+        return get_fake_historical_data(start_date, end_date)
+    try:
+        start_dt = datetime.strptime(start_date, "%Y-%m-%d")
+        end_dt = datetime.strptime(end_date, "%Y-%m-%d")
+        historical_data = []
+        current_dt = start_dt
+        while current_dt <= end_dt:
+            date_str = current_dt.strftime("%Y-%m-%d")
+            try:
+                df, _ = get_data_for_date(date_str)
+                # Only add non-empty dataframes
+                if not df.empty:
+                    df['date'] = date_str
+                    historical_data.append(df)
+                    logger.info(f"Loaded data for {date_str}")
+                else:
+                    logger.warning(f"No data available for {date_str}")
+            except Exception as e:
+                logger.warning(f"Could not load data for {date_str}: {e}")
+            current_dt += timedelta(days=1)
+        # Combine all dataframes
+        combined_df = pd.concat(historical_data, ignore_index=False)
+        return combined_df
+    except Exception as e:
+        logger.error(f"Error getting historical data: {e}")
+        # Fall back to fake data when there's an error
+        logger.info("Falling back to fake historical data due to error")
+        return get_fake_historical_data(start_date, end_date)
 def get_distant_data() -> tuple[pd.DataFrame, str]:
     # Retrieve AMD dataframe
     amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
     files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
+    df_amd, date_df_amd = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
     # Retrieve NVIDIA dataframe, which pattern should be:
     # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
     nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
     filtered_joined.index = "sample_" + filtered_joined.index
     return filtered_joined, "sample data was loaded"
+def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
+    """Generate fake historical data for a date range when real data loading fails."""
+    try:
+        start_dt = datetime.strptime(start_date, "%Y-%m-%d")
+        end_dt = datetime.strptime(end_date, "%Y-%m-%d")
+        # Generate fake data for each date in the range
+        historical_data = []
+        current_dt = start_dt
+        # Get base sample data to use as template
+        sample_df, _ = get_sample_data()
+        while current_dt <= end_dt:
+            date_str = current_dt.strftime("%Y-%m-%d")
+            # Create a copy of sample data for this date with some random variations
+            date_df = sample_df.copy()
+            date_df['date'] = date_str
+            # Add some random variation to make it look more realistic
+            import random
+            for idx in date_df.index:
+                # Vary the success/failure counts slightly (±20%)
+                for col in ['success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia']:
+                    if col in date_df.columns:
+                        original_val = date_df.loc[idx, col]
+                        if pd.notna(original_val) and original_val > 0:
+                            variation = random.uniform(0.8, 1.2)
+                            date_df.loc[idx, col] = max(0, int(original_val * variation))
+                # Vary failure counts more dramatically to show trends
+                for col in ['failed_multi_no_amd', 'failed_multi_no_nvidia', 'failed_single_no_amd', 'failed_single_no_nvidia']:
+                    if col in date_df.columns:
+                        original_val = date_df.loc[idx, col]
+                        if pd.notna(original_val):
+                            # Sometimes have more failures, sometimes fewer
+                            variation = random.uniform(0.5, 2.0)
+                            date_df.loc[idx, col] = max(0, int(original_val * variation))
+            historical_data.append(date_df)
+            current_dt += timedelta(days=1)
+        if not historical_data:
+            logger.warning("No fake historical data generated")
+            return pd.DataFrame()
+        # Combine all dataframes
+        combined_df = pd.concat(historical_data, ignore_index=False)
+        logger.info(f"Generated fake historical data: {len(combined_df)} records from {start_date} to {end_date}")
+        return combined_df
+    except Exception as e:
+        logger.error(f"Error generating fake historical data: {e}")
+        return pd.DataFrame()
 def safe_extract(row: pd.DataFrame, key: str) -> int:
     return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
+def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
+    """
+    Find the first date when a specific test failure appeared in historical data.
+    """
+    if historical_df.empty:
+        return None
+    try:
+        # Normalize model name to match DataFrame index
+        model_name_lower = model_name.lower()
+        # Filter historical data for this model
+        model_data = historical_df[historical_df.index == model_name_lower].copy()
+        if model_data.empty:
+            return None
+        # Sort by date (oldest first)
+        model_data = model_data.sort_values('date')
+        # Check each date for this failure
+        for idx, row in model_data.iterrows():
+            failures = row.get(f'failures_{device}', None)
+            if failures is None or pd.isna(failures):
+                continue
+            # Handle case where failures might be a string (JSON)
+            if isinstance(failures, str):
+                try:
+                    import json
+                    failures = json.loads(failures)
+                except:
+                    continue
+            # Check if this test appears in the failures for this gpu_type
+            if gpu_type in failures:
+                for test in failures[gpu_type]:
+                    test_line = test.get('line', '')
+                    if test_line == test_name:
+                        # Found the first occurrence
+                        return row.get('date', None)
+        return None
+    except Exception as e:
+        logger.error(f"Error finding first seen date for {test_name}: {e}")
+        return None
+def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
+    """
+    Compare CURRENT failures against PREVIOUS day's failures to find NEW regressions.
+    A regression is a test that:
+    - Is failing in the CURRENT/LATEST run (current_df)
+    - Was NOT failing in the PREVIOUS run (yesterday in historical_df)
+    """
+    if current_df.empty or historical_df.empty:
+        return []
+    new_regressions = []
+    # Get the most recent date from historical data (this is "yesterday")
+    available_dates = sorted(historical_df['date'].unique(), reverse=True)
+    if len(available_dates) < 1:
+        # No history to compare against
+        return []
+    yesterday_date = available_dates[0]
+    yesterday_data = historical_df[historical_df['date'] == yesterday_date]
+    # For each model in current data, compare against yesterday
+    for model_name in current_df.index:
+        model_name_lower = model_name.lower()
+        # Get CURRENT failures from current_df
+        current_row = current_df.loc[model_name]
+        # Get YESTERDAY's failures from historical_df
+        yesterday_row = yesterday_data[yesterday_data.index == model_name_lower]
+        yesterday_failures_amd = {}
+        yesterday_failures_nvidia = {}
+        if not yesterday_row.empty:
+            yesterday_row = yesterday_row.iloc[0]
+            yesterday_failures_amd = yesterday_row.get('failures_amd', {})
+            yesterday_failures_nvidia = yesterday_row.get('failures_nvidia', {})
+            # Handle string/dict conversion
+            if isinstance(yesterday_failures_amd, str):
+                try:
+                    yesterday_failures_amd = json.loads(yesterday_failures_amd)
+                except:
+                    yesterday_failures_amd = {}
+            if isinstance(yesterday_failures_nvidia, str):
+                try:
+                    yesterday_failures_nvidia = json.loads(yesterday_failures_nvidia)
+                except:
+                    yesterday_failures_nvidia = {}
+        # Get CURRENT failures
+        current_failures_amd = current_row.get('failures_amd', {})
+        current_failures_nvidia = current_row.get('failures_nvidia', {})
+        # Handle string/dict conversion
+        if isinstance(current_failures_amd, str):
+            try:
+                current_failures_amd = json.loads(current_failures_amd)
+            except:
+                current_failures_amd = {}
+        if isinstance(current_failures_nvidia, str):
+            try:
+                current_failures_nvidia = json.loads(current_failures_nvidia)
+            except:
+                current_failures_nvidia = {}
+        # Check AMD failures - find tests failing NOW but NOT yesterday
+        for gpu_type in ['single', 'multi']:
+            current_tests = current_failures_amd.get(gpu_type, [])
+            yesterday_tests = yesterday_failures_amd.get(gpu_type, [])
+            # Get test names
+            current_test_names = {test.get('line', '') for test in current_tests}
+            yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
+            # Find NEW failures: failing NOW but NOT yesterday
+            new_tests = current_test_names - yesterday_test_names
+            for test_name in new_tests:
+                if test_name:  # Skip empty names
+                    new_regressions.append({
+                        'model': model_name,
+                        'test': test_name.split('::')[-1],  # Short name
+                        'test_full': test_name,  # Full name
+                        'device': 'amd',
+                        'gpu_type': gpu_type
+                    })
+        # Check NVIDIA failures - find tests failing NOW but NOT yesterday
+        for gpu_type in ['single', 'multi']:
+            current_tests = current_failures_nvidia.get(gpu_type, [])
+            yesterday_tests = yesterday_failures_nvidia.get(gpu_type, [])
+            # Get test names
+            current_test_names = {test.get('line', '') for test in current_tests}
+            yesterday_test_names = {test.get('line', '') for test in yesterday_tests}
+            # Find NEW failures: failing NOW but NOT yesterday
+            new_tests = current_test_names - yesterday_test_names
+            for test_name in new_tests:
+                if test_name:  # Skip empty names
+                    new_regressions.append({
+                        'model': model_name,
+                        'test': test_name.split('::')[-1],  # Short name
+                        'test_full': test_name,  # Full name
+                        'device': 'nvidia',
+                        'gpu_type': gpu_type
+                    })
+    return new_regressions
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
     # Handle missing values and get counts directly from dataframe
         self.df = pd.DataFrame()
         self.available_models = []
         self.latest_update_msg = ""
+        self.available_dates = []
+        self.historical_df = pd.DataFrame()
+        self.all_historical_data = pd.DataFrame()  # Store all historical data at startup
+        self.sample_data = False
     def load_data(self) -> None:
         """Load data from the data source."""
             logger.info("Loading distant data...")
             new_df, latest_update_msg = get_distant_data()
             self.latest_update_msg = latest_update_msg
+            self.available_dates = get_available_dates()
+            logger.info(f"Available dates: {len(self.available_dates)} dates")
+            if self.available_dates:
+                logger.info(f"Date range: {self.available_dates[-1]} to {self.available_dates[0]}")
+            else:
+                logger.warning("No available dates found")
+                self.available_dates = []
         except Exception as e:
             error_msg = [
                 "Loading data failed:",
                 "Falling back on sample data."
             ]
             logger.error("\n".join(error_msg))
+            self.sample_data = True
             new_df, latest_update_msg = get_sample_data()
             self.latest_update_msg = latest_update_msg
+            self.available_dates = None
         # Update attributes
         self.df = new_df
         self.available_models = new_df.index.tolist()
+        # Load all historical data at startup
+        self.load_all_historical_data()
         # Log and return distant load status
         logger.info(f"Data loaded successfully: {len(self.available_models)} models")
         logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
                 msg[model][col] = value
         logger.info(json.dumps(msg, indent=4))
+    def load_all_historical_data(self) -> None:
+        """Load all available historical data at startup."""
+        try:
+            if not self.available_dates:
+                # Generate fake dates when no real dates are available
+                fake_dates = []
+                today = datetime.now()
+                for i in range(7):
+                    date = today - timedelta(days=i)
+                    fake_dates.append(date.strftime("%Y-%m-%d"))
+                self.available_dates = fake_dates
+                logger.info(f"No available dates found, generated {len(self.available_dates)} sample dates.")
+            logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
+            start_date = self.available_dates[-1]  # Oldest date
+            end_date = self.available_dates[0]     # Newest date
+            self.all_historical_data = get_historical_data(start_date, end_date, self.sample_data)
+            logger.info(f"All historical data loaded: {len(self.all_historical_data)} records")
+        except Exception as e:
+            logger.error(f"Error loading all historical data: {e}")
+            self.all_historical_data = pd.DataFrame()
+    def load_historical_data(self, start_date: str, end_date: str) -> None:
+        """Load historical data for a date range from pre-loaded data."""
+        try:
+            logger.info(f"Filtering historical data from {start_date} to {end_date}")
+            if self.all_historical_data.empty:
+                logger.warning("No pre-loaded historical data available")
+                self.historical_df = pd.DataFrame()
+                return
+            # Filter the pre-loaded data by date range
+            start_dt = datetime.strptime(start_date, "%Y-%m-%d")
+            end_dt = datetime.strptime(end_date, "%Y-%m-%d")
+            # Filter data within the date range
+            filtered_data = []
+            for date_str in self.all_historical_data['date'].unique():
+                date_dt = datetime.strptime(date_str, "%Y-%m-%d")
+                if start_dt <= date_dt <= end_dt:
+                    date_data = self.all_historical_data[self.all_historical_data['date'] == date_str]
+                    filtered_data.append(date_data)
+            if filtered_data:
+                self.historical_df = pd.concat(filtered_data, ignore_index=False)
+                logger.info(f"Historical data filtered: {len(self.historical_df)} records for {start_date} to {end_date}")
+            else:
+                self.historical_df = pd.DataFrame()
+                logger.warning(f"No historical data found for date range {start_date} to {end_date}")
+        except Exception as e:
+            logger.error(f"Error filtering historical data: {e}")
+            self.historical_df = pd.DataFrame()
     def schedule_data_reload(self):
         """Schedule the next data reload."""
         def reload_data():

logos/amd_logo.png ADDED Viewed

logos/nvidia_logo.png ADDED Viewed

model_page.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 from utils import generate_underlined_line
-from data import extract_model_data
 # Figure dimensions
 FIGURE_WIDTH_DUAL = 18
@@ -42,11 +42,11 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
     """Create a pie chart for device statistics."""
     if not filtered_stats:
         ax.text(0.5, 0.5, 'No test results',
-               horizontalalignment='center', verticalalignment='center',
-               transform=ax.transAxes, fontsize=14, color='#888888',
-               fontfamily='monospace', weight='normal')
         ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='bold',
-                    pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
         ax.axis('off')
         return
@@ -63,7 +63,7 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
         shadow=False,
         wedgeprops=dict(edgecolor='#1a1a1a', linewidth=BORDER_LINE_WIDTH),  # Minimal borders
         textprops={'fontsize': 12, 'weight': 'normal',
-                  'color': LABEL_COLOR, 'fontfamily': 'monospace'}
     )
     # Enhanced percentage text styling for better readability
@@ -82,10 +82,10 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
     # Device label closer to chart and bigger
     ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='normal',
-                pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
-def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str, str]:
     """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
     # Handle case where the dataframe is empty or the model name could not be found in it
     if df.empty or model_name not in df.index:
@@ -124,25 +124,25 @@ def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str
     # Add subtle separation line between charts - stops at device labels level
     line_x = 0.5
     fig.add_artist(plt.Line2D([line_x, line_x], [0.0, SEPARATOR_LINE_Y_END],
-                              color='#333333', linewidth=SEPARATOR_LINE_WIDTH,
-                              alpha=SEPARATOR_ALPHA, transform=fig.transFigure))
     # Add central shared title for model name
     fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold',
-                color='#CCCCCC', fontfamily='monospace', y=MODEL_TITLE_Y)
     # Clean layout with padding and space for central title
     plt.tight_layout()
     plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
-    amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered))
-    nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered))
     return fig, amd_failed_info, nvidia_failed_info
-def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool) -> str:
-    """Extract failure information from failures object."""
     # Catch the case where there is no data
     if not data_available:
         return generate_underlined_line(f"No data for {device}")
@@ -160,21 +160,43 @@ def prepare_textbox_content(failures: dict[str, list], device: str, data_availab
         ""
     ]
     # Add single-gpu failures
     if single_failures:
         info_lines.append(generate_underlined_line("Single GPU failures:"))
         for test in single_failures:
-            name = test.get("line", "::*could not find name*")
-            name = name.split("::")[-1]
-            info_lines.append(name)
         info_lines.append("\n")
     # Add multi-gpu failures
     if multi_failures:
         info_lines.append(generate_underlined_line("Multi GPU failures:"))
         for test in multi_failures:
-            name = test.get("line", "::*could not find name*")
-            name = name.split("::")[-1]
-            info_lines.append(name)
-    return "\n".join(info_lines)

 import matplotlib.pyplot as plt
 import pandas as pd
 from utils import generate_underlined_line
+from data import extract_model_data, find_failure_first_seen
 # Figure dimensions
 FIGURE_WIDTH_DUAL = 18
     """Create a pie chart for device statistics."""
     if not filtered_stats:
         ax.text(0.5, 0.5, 'No test results',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=14, color='#888888',
+                fontfamily='monospace', weight='normal')
         ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='bold',
+                     pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
         ax.axis('off')
         return
         shadow=False,
         wedgeprops=dict(edgecolor='#1a1a1a', linewidth=BORDER_LINE_WIDTH),  # Minimal borders
         textprops={'fontsize': 12, 'weight': 'normal',
+                   'color': LABEL_COLOR, 'fontfamily': 'monospace'}
     )
     # Enhanced percentage text styling for better readability
     # Device label closer to chart and bigger
     ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='normal',
+                 pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
+def plot_model_stats(df: pd.DataFrame, model_name: str, historical_df: pd.DataFrame = None) -> tuple[plt.Figure, str, str]:
     """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
     # Handle case where the dataframe is empty or the model name could not be found in it
     if df.empty or model_name not in df.index:
     # Add subtle separation line between charts - stops at device labels level
     line_x = 0.5
     fig.add_artist(plt.Line2D([line_x, line_x], [0.0, SEPARATOR_LINE_Y_END],
+                             color='#333333', linewidth=SEPARATOR_LINE_WIDTH,
+                             alpha=SEPARATOR_ALPHA, transform=fig.transFigure))
     # Add central shared title for model name
     fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold',
+                 color='#CCCCCC', fontfamily='monospace', y=MODEL_TITLE_Y)
     # Clean layout with padding and space for central title
     plt.tight_layout()
     plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
+    amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered), model_name, historical_df)
+    nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered), model_name, historical_df)
     return fig, amd_failed_info, nvidia_failed_info
+def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool, model_name: str = None, historical_df: pd.DataFrame = None) -> str:
+    """Extract failure information from failures object with first seen dates."""
     # Catch the case where there is no data
     if not data_available:
         return generate_underlined_line(f"No data for {device}")
         ""
     ]
+    # Helper function to format failure line with first seen date
+    def format_failure_line(test: dict, gpu_type: str) -> str:
+        full_name = test.get("line", "::*could not find name*")
+        short_name = full_name.split("::")[-1]
+        # Try to find first seen date if historical data is available
+        if historical_df is not None and model_name is not None and not historical_df.empty:
+            first_seen = find_failure_first_seen(
+                historical_df,
+                model_name,
+                full_name,
+                device.lower(),
+                gpu_type
+            )
+            if first_seen:
+                # Format date as MM-DD-YYYY
+                try:
+                    from datetime import datetime
+                    date_obj = datetime.strptime(first_seen, "%Y-%m-%d")
+                    formatted_date = date_obj.strftime("%m-%d-%Y")
+                    return f"{short_name} (First seen: {formatted_date})"
+                except:
+                    return f"{short_name} (First seen: {first_seen})"
+        return short_name
     # Add single-gpu failures
     if single_failures:
         info_lines.append(generate_underlined_line("Single GPU failures:"))
         for test in single_failures:
+            info_lines.append(format_failure_line(test, "single"))
         info_lines.append("\n")
     # Add multi-gpu failures
     if multi_failures:
         info_lines.append(generate_underlined_line("Multi GPU failures:"))
         for test in multi_failures:
+            info_lines.append(format_failure_line(test, "multi"))
+    return "\n".join(info_lines)

requirements.txt CHANGED Viewed

	@@ -1 +1,3 @@
1	matplotlib>=3.8

 matplotlib>=3.8
+gradio_toggle
+plotly>=5.0

styles.css CHANGED Viewed

@@ -3,6 +3,8 @@
     --main-content-bottom-margin: 10px; /* Configurable bottom margin for main content */
 }
 .gradio-container {
     background-color: #000000 !important;
     color: white !important;
@@ -173,6 +175,96 @@ div[data-testid="column"]:has(.sidebar) {
     transition: max-height 0.3s ease !important;
 }
 /* Model button styling */
 .model-button {
@@ -371,52 +463,28 @@ div[data-testid="column"]:has(.sidebar) {
 /* Plot container with smooth transitions and controlled scrolling */
 .plot-container {
-    background-color: #000000 !important;
     border: none !important;
     transition: opacity 0.6s ease-in-out !important;
     flex: 1 1 auto !important;
     min-height: 0 !important;
     overflow-y: auto !important;
     scrollbar-width: thin !important;
-    scrollbar-color: #333333 #000000 !important;
 }
 /* Custom scrollbar for plot container */
 .plot-container::-webkit-scrollbar {
     width: 8px !important;
-    background: #000000 !important;
-}
-.plot-container::-webkit-scrollbar-track {
-    background: #000000 !important;
-}
-.plot-container::-webkit-scrollbar-thumb {
-    background-color: #333333 !important;
-    border-radius: 4px !important;
-}
-.plot-container::-webkit-scrollbar-thumb:hover {
-    background-color: #555555 !important;
 }
-/* Gradio plot component styling */
-.gr-plot {
-    background-color: #000000 !important;
-    transition: opacity 0.6s ease-in-out !important;
-}
-.gr-plot .gradio-plot {
-    background-color: #000000 !important;
-    transition: opacity 0.6s ease-in-out !important;
-}
 .gr-plot img {
     transition: opacity 0.6s ease-in-out !important;
 }
 /* Target the plot wrapper */
-div[data-testid="plot"] {
     background-color: #000000 !important;
 }
@@ -427,11 +495,6 @@ div[data-testid="plot"] {
     background-color: #000000 !important;
 }
-/* Ensure plot area background */
-.gr-plot > div,
-.plot-container > div {
-    background-color: #000000 !important;
-}
 /* Prevent white flash during plot updates */
 .plot-container::before {
@@ -445,24 +508,26 @@ div[data-testid="plot"] {
     z-index: -1;
 }
-/* Force all plot elements to have black background */
-.plot-container *,
-.gr-plot *,
-div[data-testid="plot"] * {
-    background-color: #000000 !important;
 }
-/* Override any white backgrounds in matplotlib */
-.plot-container canvas,
-.gr-plot canvas {
-    background-color: #000000 !important;
-}
 /* Text elements */
 h1, h2, h3, p, .markdown {
     color: white !important;
 }
 /* Sidebar header enhancement */
 .sidebar h1 {
     background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
@@ -529,6 +594,116 @@ h1, h2, h3, p, .markdown {
     flex-direction: column !important;
 }
 /* Custom scrollbar for main content */
 .main-content {
     scrollbar-width: thin !important;
@@ -667,3 +842,203 @@ h1, h2, h3, p, .markdown {
     100% { scroll-behavior: auto; }
 }

     --main-content-bottom-margin: 10px; /* Configurable bottom margin for main content */
 }
 .gradio-container {
     background-color: #000000 !important;
     color: white !important;
     transition: max-height 0.3s ease !important;
 }
+.history-view-button {
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    margin: 0px 0px !important;
+    padding: 8px 12px !important;
+    font-weight: 600 !important;
+    font-size: 14px !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.3px !important;
+    font-family: monospace !important;
+    width: 100% !important;
+    max-width: 100% !important;
+    white-space: nowrap !important;
+    text-overflow: ellipsis !important;
+    display: block !important;
+    cursor: pointer !important;
+    transition: all 0.3s ease !important;
+}
+/* Failing models filter row */
+.failing-models-filter-row {
+    background: linear-gradient(145deg, #1a1a1a, #0f0f0f) !important;
+    border: 1px solid #333 !important;
+    border-radius: 6px !important;
+    padding: 8px 8px !important;
+    margin: 0px 0px 12px 0px !important;
+    gap: 8px !important;
+}
+/* Failing models toggle styling */
+.failing-models-toggle {
+    background: transparent !important;
+    border: none !important;
+    padding: 4px 6px !important;
+    margin: 0 !important;
+    flex: 1 !important;
+}
+.failing-models-toggle:hover {
+    background: rgba(255, 255, 255, 0.05) !important;
+    border-radius: 4px !important;
+}
+.failing-models-toggle label {
+    color: #FFFFFF !important;
+    font-family: monospace !important;
+    font-size: 11px !important;
+    font-weight: 600 !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.5px !important;
+    cursor: pointer !important;
+    display: flex !important;
+    align-items: center !important;
+    white-space: nowrap !important;
+}
+/* Override specific colors for AMD and NVIDIA to white */
+.amd-toggle label,
+.amd-toggle label span {
+    color: #FFFFFF !important;
+}
+.nvidia-toggle label,
+.nvidia-toggle label span {
+    color: #FFFFFF !important;
+}
+.failing-models-toggle input[type="checkbox"] {
+    cursor: pointer !important;
+    width: 16px !important;
+    height: 16px !important;
+    margin-right: 6px !important;
+}
+.amd-toggle input[type="checkbox"] {
+    accent-color: #FF6B6B !important;
+}
+.nvidia-toggle input[type="checkbox"] {
+    accent-color: #76B900 !important;
+}
+.amd-toggle input[type="checkbox"]:checked {
+    accent-color: #FF8888 !important;
+}
+.nvidia-toggle input[type="checkbox"]:checked {
+    accent-color: #8BD918 !important;
+}
 /* Model button styling */
 .model-button {
 /* Plot container with smooth transitions and controlled scrolling */
 .plot-container {
     border: none !important;
     transition: opacity 0.6s ease-in-out !important;
     flex: 1 1 auto !important;
     min-height: 0 !important;
     overflow-y: auto !important;
     scrollbar-width: thin !important;
+    padding: 0 !important;
 }
 /* Custom scrollbar for plot container */
 .plot-container::-webkit-scrollbar {
     width: 8px !important;
 }
 .gr-plot img {
     transition: opacity 0.6s ease-in-out !important;
 }
 /* Target the plot wrapper */
+div[data-testid="matplotlib"] {
     background-color: #000000 !important;
 }
     background-color: #000000 !important;
 }
 /* Prevent white flash during plot updates */
 .plot-container::before {
     z-index: -1;
 }
+.vega-embed {
+    position: absolute !important;
 }
 /* Text elements */
 h1, h2, h3, p, .markdown {
     color: white !important;
 }
+.toggle {
+    margin: 0 auto !important;
+}
+.toggle-label {
+    color: white !important;
+    font-family: monospace !important;
+    font-size: 14px !important;
+}
 /* Sidebar header enhancement */
 .sidebar h1 {
     background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
     flex-direction: column !important;
 }
+/* Summary view - position content slightly higher (not fully centered) */
+.summary-view {
+    display: flex !important;
+    flex-direction: column !important;
+    align-items: center !important;
+    justify-content: flex-start !important;
+    gap: 10px !important;
+    padding-top: 20px !important;
+}
+/* Keep the summary display centered */
+.summary-view .plot-container {
+    width: 100% !important;
+}
+/* Regressions components stay with the summary as a group */
+.regressions-header {
+    margin: 0px 0px 10px 0px !important;
+    width: 100% !important;
+    max-width: 100% !important;
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #8B4513 !important;
+    border-radius: 5px !important;
+    font-weight: 600 !important;
+    font-size: 14px !important;
+    font-family: monospace !important;
+    text-align: left !important;
+    width: 100% !important;
+    transition: all 0.3s ease !important;
+}
+.regressions-header:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #B8621B !important;
+}
+/* Collapsible regressions content */
+.regressions-content-visible {
+    max-height: 800px !important;
+    overflow-y: auto !important;
+    transition: max-height 0.3s ease !important;
+    scrollbar-width: thin !important;
+    -ms-overflow-style: none !important;
+}
+.regressions-content-visible::-webkit-scrollbar {
+    width: 8px !important;
+    background: transparent !important;
+}
+.regressions-content-visible::-webkit-scrollbar-thumb {
+    background-color: #333333 !important;
+    border-radius: 4px !important;
+}
+.regressions-content-hidden {
+    max-height: 0 !important;
+    overflow: hidden !important;
+    transition: max-height 0.3s ease !important;
+}
+/* New Regressions Panel */
+.regressions-panel {
+    background: linear-gradient(145deg, #2a1a1a, #1a0f0f) !important;
+    border: 2px solid #8B4513 !important;
+    border-radius: 8px !important;
+    padding: 15px 20px !important;
+    margin: 0px 0px 15px 0px !important;
+    box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2) !important;
+    animation: pulse-border 2s ease-in-out infinite !important;
+}
+.regressions-panel h3 {
+    color: #FFB86C !important;
+    font-family: monospace !important;
+    font-size: 16px !important;
+    font-weight: bold !important;
+    margin: 0 0 10px 0 !important;
+    display: flex !important;
+    align-items: center !important;
+}
+.regressions-panel p,
+.regressions-panel ul,
+.regressions-panel li {
+    color: #FFFFFF !important;
+    font-family: monospace !important;
+    font-size: 13px !important;
+    line-height: 1.6 !important;
+    margin: 4px 0 !important;
+}
+.regressions-panel strong {
+    color: #FF6B6B !important;
+    font-weight: 600 !important;
+}
+/* Pulse animation for new regressions */
+@keyframes pulse-border {
+    0%, 100% {
+        border-color: #8B4513;
+        box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2);
+    }
+    50% {
+        border-color: #B8621B;
+        box-shadow: 0 4px 16px rgba(255, 107, 107, 0.4);
+    }
+}
 /* Custom scrollbar for main content */
 .main-content {
     scrollbar-width: thin !important;
     100% { scroll-behavior: auto; }
 }
+/* View toggle buttons */
+.view-toggle-row {
+    display: flex !important;
+    gap: 5px !important;
+    margin-bottom: 15px !important;
+}
+.view-toggle-button {
+    flex: 1 !important;
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #333 !important;
+    border-radius: 5px !important;
+    padding: 8px 6px !important;
+    transition: all 0.3s ease !important;
+    font-weight: 600 !important;
+    font-size: 12px !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.3px !important;
+    font-family: monospace !important;
+    height: 50px !important;
+    display: flex !important;
+    flex-direction: column !important;
+    justify-content: center !important;
+    align-items: center !important;
+    line-height: 1.2 !important;
+    cursor: pointer !important;
+}
+.view-toggle-button:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #555 !important;
+}
+.view-toggle-active {
+    background: linear-gradient(135deg, #4a4a4a, #3e3e3e) !important;
+    border: 2px solid #555555 !important;
+    box-shadow:
+        0 4px 15px rgba(0, 0, 0, 0.3),
+        inset 0 1px 0 rgba(255, 255, 255, 0.2) !important;
+}
+/* Date selection styling */
+.date-selection {
+    flex-grow: 0 !important;
+    background: linear-gradient(145deg, #0f0f0f, #1a1a1a) !important;
+    border: 1px solid #333 !important;
+    border-radius: 8px !important;
+    padding: 15px !important;
+    margin-bottom: 15px !important;
+    transition: all 0.3s ease !important;
+    overflow: hidden !important;
+}
+.date-selection-hidden {
+    max-height: 0 !important;
+    padding: 0 15px !important;
+    margin-bottom: 0 !important;
+    border: none !important;
+}
+.date-selection-visible {
+    max-height: 500px !important;
+}
+.date-header {
+    margin-bottom: 10px !important;
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #333 !important;
+    border-radius: 5px !important;
+    padding: 8px 12px !important;
+    transition: all 0.3s ease !important;
+    font-family: monospace !important;
+    font-size: 12px !important;
+    text-align: left !important;
+    cursor: pointer !important;
+    width: 100% !important;
+    box-sizing: border-box !important;
+}
+.date-header:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #444 !important;
+    transform: translateY(-1px) !important;
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3) !important;
+}
+.date-dropdown {
+    background-color: #222222 !important;
+    color: white !important;
+    border: 1px solid #444444 !important;
+    border-radius: 5px !important;
+    font-family: monospace !important;
+    font-size: 12px !important;
+}
+.date-dropdown .gr-dropdown {
+    background-color: #222222 !important;
+    color: white !important;
+    border: 1px solid #444444 !important;
+}
+.load-historical-button {
+    background: linear-gradient(135deg, #2d5aa0, #1e3f73) !important;
+    color: white !important;
+    border: 1px solid #3a6bc7 !important;
+    border-radius: 5px !important;
+    padding: 8px 12px !important;
+    transition: all 0.3s ease !important;
+    font-weight: 500 !important;
+    font-size: 12px !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.1px !important;
+    font-family: monospace !important;
+    width: 100% !important;
+    margin-top: 10px !important;
+}
+.load-historical-button:hover {
+    background: linear-gradient(135deg, #3a6bc7, #2d5aa0) !important;
+    border-color: #4a7bd9 !important;
+}
+/* Historical view styling */
+.historical-view {
+    background-color: #000000 !important;
+    padding: 30px 20px !important;
+}
+.time-series-detail-view {
+    background-color: #000000 !important;
+    padding: 30px 20px !important;
+}
+/* Plotly chart styling for historical view */
+.historical-view .plot-container,
+.time-series-detail-view .plot-container {
+    background-color: #000000 !important;
+}
+/* Plotly specific text styling */
+.historical-view .js-plotly-plot .plotly,
+.time-series-detail-view .js-plotly-plot .plotly {
+    background-color: #000000 !important;
+}
+/* Plotly legend text */
+.historical-view .js-plotly-plot .legend text,
+.time-series-detail-view .js-plotly-plot .legend text {
+    font-size: 16px !important;
+    fill: #CCCCCC !important;
+}
+/* Plotly axis titles */
+.historical-view .js-plotly-plot .g-xtitle text,
+.historical-view .js-plotly-plot .g-ytitle text,
+.time-series-detail-view .js-plotly-plot .g-xtitle text,
+.time-series-detail-view .js-plotly-plot .g-ytitle text {
+    font-size: 16px !important;
+    fill: #CCCCCC !important;
+}
+/* Plotly axis tick labels */
+.historical-view .js-plotly-plot .xtick text,
+.historical-view .js-plotly-plot .ytick text,
+.time-series-detail-view .js-plotly-plot .xtick text,
+.time-series-detail-view .js-plotly-plot .ytick text {
+    font-size: 14px !important;
+    fill: #CCCCCC !important;
+}
+/* Plotly title */
+.historical-view .js-plotly-plot .g-gtitle text,
+.time-series-detail-view .js-plotly-plot .g-gtitle text {
+    font-size: 20px !important;
+    fill: #FFFFFF !important;
+    font-weight: 600 !important;
+}
+/* Back button styling */
+.back-button {
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #333 !important;
+    border-radius: 5px !important;
+    padding: 8px 12px !important;
+    transition: all 0.3s ease !important;
+    font-weight: 500 !important;
+    font-size: 12px !important;
+    font-family: monospace !important;
+    margin-bottom: 15px !important;
+    width: 100% !important;
+}
+.back-button:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #555 !important;
+    color: #74b9ff !important;
+}

summary_page.py CHANGED Viewed

@@ -1,26 +1,30 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 from data import extract_model_data
 # Layout parameters
 COLUMNS = 3
 # Derived constants
-COLUMN_WIDTH = 100 / COLUMNS  # Each column takes 25% of width
-BAR_WIDTH = COLUMN_WIDTH * 0.8  # 80% of column width for bars
-BAR_MARGIN = COLUMN_WIDTH * 0.1  # 10% margin on each side
 # Figure dimensions
-FIGURE_WIDTH = 22  # Wider to accommodate columns and legend
-MAX_HEIGHT = 14  # Maximum height in inches
 MIN_HEIGHT_PER_ROW = 2.8
 FIGURE_PADDING = 1
 # Bar styling
-BAR_HEIGHT_RATIO = 0.22  # Bar height as ratio of vertical spacing
-VERTICAL_SPACING_RATIO = 0.2  # Base vertical position ratio
-AMD_BAR_OFFSET = 0.25  # AMD bar offset ratio
-NVIDIA_BAR_OFFSET = 0.54  # NVIDIA bar offset ratio
 # Colors
 COLORS = {
@@ -34,21 +38,35 @@ COLORS = {
 # Font styling
 MODEL_NAME_FONT_SIZE = 16
 LABEL_FONT_SIZE = 14
-LABEL_OFFSET = 1  # Distance of label from bar
 FAILURE_RATE_FONT_SIZE = 28
-def get_overall_stats(df: pd.DataFrame, available_models: list[str]) -> tuple[list[int], list[int]]:
     """Calculate overall failure rates for AMD and NVIDIA across all models."""
     if df.empty or not available_models:
         return 0.0, 0.0
-    total_amd_passed = 0
-    total_amd_failed = 0
-    total_amd_skipped = 0
-    total_nvidia_passed = 0
-    total_nvidia_failed = 0
-    total_nvidia_skipped = 0
     for model_name in available_models:
         if model_name not in df.index:
@@ -58,19 +76,24 @@ def get_overall_stats(df: pd.DataFrame, available_models: list[str]) -> tuple[li
         amd_stats, nvidia_stats = extract_model_data(row)[:2]
         # AMD totals
-        total_amd_passed += amd_stats['passed']
-        total_amd_failed += amd_stats['failed'] + amd_stats['error']
-        total_amd_skipped += amd_stats['skipped']
         # NVIDIA totals
-        total_nvidia_passed += nvidia_stats['passed']
-        total_nvidia_failed += nvidia_stats['failed'] + nvidia_stats['error']
-        total_nvidia_skipped += nvidia_stats['skipped']
-    return [total_amd_passed, total_amd_failed, total_amd_skipped], [total_nvidia_passed, total_nvidia_failed, total_nvidia_skipped]
-def draw_text_and_bar(
     label: str,
     stats: dict[str, int],
     y_bar: float,
@@ -78,19 +101,72 @@ def draw_text_and_bar(
     bar_height: float,
     ax: plt.Axes,
 ) -> None:
-    """Draw a horizontal bar chart for given stats and its label on the left."""
-    # Text
-    label_x = column_left_position - LABEL_OFFSET
     failures_present = any(stats[category] > 0 for category in ['failed', 'error'])
     if failures_present:
-        props = dict(boxstyle='round', facecolor=COLORS['failed'], alpha=0.35)
     else:
-        props = dict(alpha=0)
-    ax.text(
-        label_x, y_bar, label, ha='right', va='center', color='#CCCCCC', fontsize=LABEL_FONT_SIZE,
-        fontfamily='monospace', fontweight='normal', bbox=props
     )
-    # Bar
     total = sum(stats.values())
     if total > 0:
         left = column_left_position
@@ -115,14 +191,7 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
         return fig
     # Calculate overall failure rates
-    amd_counts, nvidia_counts = get_overall_stats(df, available_models)
-    amd_non_skipped = amd_counts[0] + amd_counts[1]
-    amd_failure_rate = (amd_counts[1] / amd_non_skipped) if amd_non_skipped > 0 else 0.0
-    amd_failure_rate *= 100
-    nvidia_non_skipped = nvidia_counts[0] + nvidia_counts[1]
-    nvidia_failure_rate = (nvidia_counts[1] / nvidia_non_skipped) if nvidia_non_skipped > 0 else 0.0
-    nvidia_failure_rate *= 100
     # Calculate dimensions for N-column layout
     model_count = len(available_models)
@@ -143,6 +212,10 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
     visible_model_count = 0
     max_y = 0
     for i, model_name in enumerate(available_models):
         if model_name not in df.index:
@@ -152,6 +225,15 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
         # Extract and process model data
         amd_stats, nvidia_stats = extract_model_data(row)[:2]
         # Calculate position in 4-column grid
         col = visible_model_count % COLUMNS
@@ -176,44 +258,43 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
         # AMD label and bar in this column
         bar_height = min(0.4, vertical_spacing * BAR_HEIGHT_RATIO)
-        # Draw AMD bar
-        draw_text_and_bar("amd", amd_stats, y_amd_bar, col_left, bar_height, ax)
-        # Draw NVIDIA bar
-        draw_text_and_bar("nvidia", nvidia_stats, y_nvidia_bar, col_left, bar_height, ax)
         # Increment counter for next visible model
         visible_model_count += 1
     # Add AMD and NVIDIA test totals in the bottom left
     # Calculate line spacing to align middle with legend
     line_height = 0.4  # Height between lines
-    legend_y = max_y + 1
     # Position the two lines so their middle aligns with legend_y
     amd_y = legend_y - line_height / 2
     nvidia_y = legend_y + line_height / 2
-    amd_totals_text =    f"AMD Tests    - Passed: {amd_counts[0]}, Failed: {amd_counts[1]}, Skipped: {amd_counts[2]}"
-    nvidia_totals_text = f"NVIDIA Tests - Passed: {nvidia_counts[0]}, Failed: {nvidia_counts[1]}, Skipped: {nvidia_counts[2]}"
     ax.text(0, amd_y, amd_totals_text,
            ha='left', va='bottom', color='#CCCCCC',
            fontsize=14, fontfamily='monospace')
     ax.text(0, nvidia_y, nvidia_totals_text,
            ha='left', va='bottom', color='#CCCCCC',
            fontsize=14, fontfamily='monospace')
-    # Add legend horizontally in bottom right corner
-    patch_height = 0.3
-    patch_width = 3
-    legend_start_x = 68.7
-    legend_y = max_y + 1
-    legend_spacing = 10
-    legend_font_size = 15
     # Legend entries
     legend_items = [
         ('passed', 'Passed'),

 import matplotlib.pyplot as plt
 import pandas as pd
+from matplotlib.offsetbox import OffsetImage, AnnotationBbox
+from matplotlib.patches import FancyBboxPatch
+import matplotlib.image as mpimg
+import os
 from data import extract_model_data
 # Layout parameters
 COLUMNS = 3
 # Derived constants
+COLUMN_WIDTH = 100 / COLUMNS
+BAR_WIDTH = COLUMN_WIDTH * 0.8
+BAR_MARGIN = COLUMN_WIDTH * 0.1
 # Figure dimensions
+FIGURE_WIDTH = 22
+MAX_HEIGHT = 14
 MIN_HEIGHT_PER_ROW = 2.8
 FIGURE_PADDING = 1
 # Bar styling
+BAR_HEIGHT_RATIO = 0.22
+VERTICAL_SPACING_RATIO = 0.2
+AMD_BAR_OFFSET = 0.25
+NVIDIA_BAR_OFFSET = 0.54
 # Colors
 COLORS = {
 # Font styling
 MODEL_NAME_FONT_SIZE = 16
 LABEL_FONT_SIZE = 14
+LABEL_OFFSET = 1
 FAILURE_RATE_FONT_SIZE = 28
+# Logo settings
+LOGO_BOX_WIDTH = 4.5
+LOGO_BOX_HEIGHT = 0.43
+LOGO_ZOOM = 0.09
+# Load logos once at module level
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+try:
+    AMD_LOGO = mpimg.imread(os.path.join(SCRIPT_DIR, 'logos/amd_logo.png'))
+except:
+    AMD_LOGO = None
+try:
+    NVIDIA_LOGO = mpimg.imread(os.path.join(SCRIPT_DIR, 'logos/nvidia_logo.png'))
+except:
+    NVIDIA_LOGO = None
+def calculate_overall_failure_rates(df: pd.DataFrame, available_models: list[str]) -> tuple[float, float]:
     """Calculate overall failure rates for AMD and NVIDIA across all models."""
     if df.empty or not available_models:
         return 0.0, 0.0
+    total_amd_tests = 0
+    total_amd_failures = 0
+    total_nvidia_tests = 0
+    total_nvidia_failures = 0
     for model_name in available_models:
         if model_name not in df.index:
         amd_stats, nvidia_stats = extract_model_data(row)[:2]
         # AMD totals
+        amd_total = amd_stats['passed'] + amd_stats['failed'] + amd_stats['error']
+        if amd_total > 0:
+            total_amd_tests += amd_total
+            total_amd_failures += amd_stats['failed'] + amd_stats['error']
         # NVIDIA totals
+        nvidia_total = nvidia_stats['passed'] + nvidia_stats['failed'] + nvidia_stats['error']
+        if nvidia_total > 0:
+            total_nvidia_tests += nvidia_total
+            total_nvidia_failures += nvidia_stats['failed'] + nvidia_stats['error']
+    amd_failure_rate = (total_amd_failures / total_amd_tests * 100) if total_amd_tests > 0 else 0.0
+    nvidia_failure_rate = (total_nvidia_failures / total_nvidia_tests * 100) if total_nvidia_tests > 0 else 0.0
+    return amd_failure_rate, nvidia_failure_rate
+def draw_logo_and_bar(
     label: str,
     stats: dict[str, int],
     y_bar: float,
     bar_height: float,
     ax: plt.Axes,
 ) -> None:
+    """Draw a horizontal bar chart for given stats with a logo box on the left."""
+    # Determine if there are failures
     failures_present = any(stats[category] > 0 for category in ['failed', 'error'])
+    # Select the appropriate logo
+    logo = AMD_LOGO if label.lower() == "amd" else NVIDIA_LOGO
+    # Calculate box position (centered on the bar vertically)
+    box_x = column_left_position - LABEL_OFFSET - LOGO_BOX_WIDTH
+    box_y = y_bar - LOGO_BOX_HEIGHT / 2
+    # Draw the colored box
     if failures_present:
+        box_color = COLORS['failed']  # Red for failures
+        box_alpha = 0.6
     else:
+        box_color = '#2a2a2a'  # Dark gray for no failures
+        box_alpha = 0.5
+    box = FancyBboxPatch(
+        (box_x, box_y),
+        LOGO_BOX_WIDTH,
+        LOGO_BOX_HEIGHT,
+        boxstyle="round,pad=0.05",
+        facecolor=box_color,
+        edgecolor='#444444',
+        linewidth=1,
+        alpha=box_alpha
     )
+    ax.add_patch(box)
+    # Add logo image inside the box if available
+    if logo is not None:
+        try:
+            imagebox = OffsetImage(logo, zoom=LOGO_ZOOM)
+            ab = AnnotationBbox(
+                imagebox,
+                (box_x + LOGO_BOX_WIDTH / 2, y_bar),
+                frameon=False,
+                box_alignment=(0.5, 0.5)
+            )
+            ax.add_artist(ab)
+        except:
+            # Fallback to text if logo doesn't work
+            ax.text(
+                box_x + LOGO_BOX_WIDTH / 2, y_bar,
+                label.upper(),
+                ha='center', va='center',
+                color='#FFFFFF',
+                fontsize=10,
+                fontfamily='monospace',
+                fontweight='bold'
+            )
+    else:
+        # Fallback to text if logo not loaded
+        ax.text(
+            box_x + LOGO_BOX_WIDTH / 2, y_bar,
+            label.upper(),
+            ha='center', va='center',
+            color='#FFFFFF',
+            fontsize=10,
+            fontfamily='monospace',
+            fontweight='bold'
+        )
+    # Draw the bar
     total = sum(stats.values())
     if total > 0:
         left = column_left_position
         return fig
     # Calculate overall failure rates
+    amd_failure_rate, nvidia_failure_rate = calculate_overall_failure_rates(df, available_models)
     # Calculate dimensions for N-column layout
     model_count = len(available_models)
     visible_model_count = 0
     max_y = 0
+    # Initialize counters for total tests
+    amd_totals = {'passed': 0, 'failed': 0, 'skipped': 0}
+    nvidia_totals = {'passed': 0, 'failed': 0, 'skipped': 0}
     for i, model_name in enumerate(available_models):
         if model_name not in df.index:
         # Extract and process model data
         amd_stats, nvidia_stats = extract_model_data(row)[:2]
+        # Accumulate totals
+        amd_totals['passed'] += amd_stats['passed']
+        amd_totals['failed'] += amd_stats['failed'] + amd_stats['error']
+        amd_totals['skipped'] += amd_stats['skipped']
+        nvidia_totals['passed'] += nvidia_stats['passed']
+        nvidia_totals['failed'] += nvidia_stats['failed'] + nvidia_stats['error']
+        nvidia_totals['skipped'] += nvidia_stats['skipped']
         # Calculate position in 4-column grid
         col = visible_model_count % COLUMNS
         # AMD label and bar in this column
         bar_height = min(0.4, vertical_spacing * BAR_HEIGHT_RATIO)
+        # Draw AMD bar with logo
+        draw_logo_and_bar("amd", amd_stats, y_amd_bar, col_left, bar_height, ax)
+        # Draw NVIDIA bar with logo
+        draw_logo_and_bar("nvidia", nvidia_stats, y_nvidia_bar, col_left, bar_height, ax)
         # Increment counter for next visible model
         visible_model_count += 1
+    # Add legend horizontally in bottom right corner
+    patch_height = 0.3
+    patch_width = 3
+    legend_start_x = 68.7
+    legend_y = max_y + 1
+    legend_spacing = 10
+    legend_font_size = 15
     # Add AMD and NVIDIA test totals in the bottom left
     # Calculate line spacing to align middle with legend
     line_height = 0.4  # Height between lines
     # Position the two lines so their middle aligns with legend_y
     amd_y = legend_y - line_height / 2
     nvidia_y = legend_y + line_height / 2
+    amd_totals_text = f"AMD Tests    - Passed: {amd_totals['passed']}, Failed: {amd_totals['failed']}, Skipped: {amd_totals['skipped']}"
+    nvidia_totals_text = f"NVIDIA Tests - Passed: {nvidia_totals['passed']}, Failed: {nvidia_totals['failed']}, Skipped: {nvidia_totals['skipped']}"
     ax.text(0, amd_y, amd_totals_text,
            ha='left', va='bottom', color='#CCCCCC',
            fontsize=14, fontfamily='monospace')
     ax.text(0, nvidia_y, nvidia_totals_text,
            ha='left', va='bottom', color='#CCCCCC',
            fontsize=14, fontfamily='monospace')
     # Legend entries
     legend_items = [
         ('passed', 'Passed'),

time_series.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import matplotlib.pyplot as plt
+import pandas as pd
+import numpy as np
+from datetime import datetime
+from data import extract_model_data
+COLORS = {
+    'passed': '#4CAF50',
+    'failed': '#E53E3E',
+    'skipped': '#FFD54F',
+    'error': '#8B0000',
+    'amd': '#ED1C24',
+    'nvidia': '#76B900'
+}
+FIGURE_WIDTH = 20
+FIGURE_HEIGHT = 12
+BLACK = '#000000'
+LABEL_COLOR = '#CCCCCC'
+TITLE_COLOR = '#FFFFFF'
+GRID_COLOR = '#333333'
+TITLE_FONT_SIZE = 24
+LABEL_FONT_SIZE = 14
+LEGEND_FONT_SIZE = 12
+def create_time_series_summary(historical_df: pd.DataFrame) -> plt.Figure:
+    if historical_df.empty or 'date' not in historical_df.columns:
+        fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+        ax.set_facecolor(BLACK)
+        ax.text(0.5, 0.5, 'No historical data available',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=20, color='#888888',
+                fontfamily='monospace', weight='normal')
+        ax.axis('off')
+        return fig
+    historical_df['date_dt'] = pd.to_datetime(historical_df['date'])
+    historical_df = historical_df.sort_values('date_dt')
+    daily_stats = []
+    dates = []
+    for date in historical_df['date_dt'].unique():
+        date_data = historical_df[historical_df['date_dt'] == date]
+        total_amd_passed = total_amd_failed = total_amd_skipped = 0
+        total_nvidia_passed = total_nvidia_failed = total_nvidia_skipped = 0
+        for _, row in date_data.iterrows():
+            amd_stats, nvidia_stats = extract_model_data(row)[:2]
+            total_amd_passed += amd_stats['passed']
+            total_amd_failed += amd_stats['failed']
+            total_amd_skipped += amd_stats['skipped']
+            total_nvidia_passed += nvidia_stats['passed']
+            total_nvidia_failed += nvidia_stats['failed']
+            total_nvidia_skipped += nvidia_stats['skipped']
+        amd_total = total_amd_passed + total_amd_failed
+        nvidia_total = total_nvidia_passed + total_nvidia_failed
+        amd_failure_rate = (total_amd_failed / amd_total * 100) if amd_total > 0 else 0
+        nvidia_failure_rate = (total_nvidia_failed / nvidia_total * 100) if nvidia_total > 0 else 0
+        daily_stats.append({
+            'amd_failure_rate': amd_failure_rate,
+            'nvidia_failure_rate': nvidia_failure_rate,
+            'amd_passed': total_amd_passed,
+            'amd_failed': total_amd_failed,
+            'amd_skipped': total_amd_skipped,
+            'nvidia_passed': total_nvidia_passed,
+            'nvidia_failed': total_nvidia_failed,
+            'nvidia_skipped': total_nvidia_skipped
+        })
+        dates.append(date)
+    fig = plt.figure(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT + 4), facecolor=BLACK)
+    gs = fig.add_gridspec(3, 2, height_ratios=[1.2, 1, 1], width_ratios=[2, 1],
+                          hspace=0.3, wspace=0.25)
+    ax1 = fig.add_subplot(gs[0, :])
+    ax2 = fig.add_subplot(gs[1, 0])
+    ax3 = fig.add_subplot(gs[2, 0])
+    ax4 = fig.add_subplot(gs[1:, 1])
+    for ax in [ax1, ax2, ax3, ax4]:
+        ax.set_facecolor(BLACK)
+    dates_array = np.array(dates)
+    amd_rates = [stat['amd_failure_rate'] for stat in daily_stats]
+    nvidia_rates = [stat['nvidia_failure_rate'] for stat in daily_stats]
+    ax1.fill_between(dates_array, 0, amd_rates, color=COLORS['amd'], alpha=0.15)
+    ax1.fill_between(dates_array, 0, nvidia_rates, color=COLORS['nvidia'], alpha=0.15)
+    ax1.plot(dates_array, amd_rates, color=COLORS['amd'], linewidth=3,
+             label='AMD', marker='o', markersize=7, markeredgewidth=2, markeredgecolor=BLACK)
+    ax1.plot(dates_array, nvidia_rates, color=COLORS['nvidia'], linewidth=3,
+             label='NVIDIA', marker='s', markersize=7, markeredgewidth=2, markeredgecolor=BLACK)
+    if len(amd_rates) > 2:
+        z_amd = np.polyfit(range(len(amd_rates)), amd_rates, 1)
+        p_amd = np.poly1d(z_amd)
+        ax1.plot(dates_array, p_amd(range(len(amd_rates))),
+                color=COLORS['amd'], linestyle='--', alpha=0.5, linewidth=2)
+        z_nvidia = np.polyfit(range(len(nvidia_rates)), nvidia_rates, 1)
+        p_nvidia = np.poly1d(z_nvidia)
+        ax1.plot(dates_array, p_nvidia(range(len(nvidia_rates))),
+                color=COLORS['nvidia'], linestyle='--', alpha=0.5, linewidth=2)
+    ax1.set_title('Overall Failure Rates Over Time', fontsize=TITLE_FONT_SIZE,
+                  color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=20)
+    ax1.set_ylabel('Failure Rate (%)', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax1.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax1.legend(fontsize=LEGEND_FONT_SIZE, loc='upper right', frameon=False,
+               labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
+    ax1.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE, axis='x', rotation=45)
+    amd_passed = [stat['amd_passed'] for stat in daily_stats]
+    amd_failed = [stat['amd_failed'] for stat in daily_stats]
+    amd_skipped = [stat['amd_skipped'] for stat in daily_stats]
+    ax2.stackplot(dates_array, amd_passed, amd_failed, amd_skipped,
+                  colors=[COLORS['passed'], COLORS['failed'], COLORS['skipped']],
+                  alpha=0.8, labels=['Passed', 'Failed', 'Skipped'])
+    ax2.set_title('AMD Test Results', fontsize=TITLE_FONT_SIZE - 2,
+                  color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=15)
+    ax2.set_ylabel('Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax2.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax2.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE - 1, axis='x', rotation=45)
+    nvidia_passed = [stat['nvidia_passed'] for stat in daily_stats]
+    nvidia_failed = [stat['nvidia_failed'] for stat in daily_stats]
+    nvidia_skipped = [stat['nvidia_skipped'] for stat in daily_stats]
+    ax3.stackplot(dates_array, nvidia_passed, nvidia_failed, nvidia_skipped,
+                  colors=[COLORS['passed'], COLORS['failed'], COLORS['skipped']],
+                  alpha=0.8, labels=['Passed', 'Failed', 'Skipped'])
+    ax3.set_title('NVIDIA Test Results', fontsize=TITLE_FONT_SIZE - 2,
+                  color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=15)
+    ax3.set_ylabel('Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax3.set_xlabel('Date', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax3.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax3.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE - 1, axis='x', rotation=45)
+    latest = daily_stats[-1]
+    metrics = [
+        ('Latest AMD Failure Rate', f"{latest['amd_failure_rate']:.1f}%", COLORS['amd']),
+        ('Latest NVIDIA Failure Rate', f"{latest['nvidia_failure_rate']:.1f}%", COLORS['nvidia']),
+        ('', '', None),
+        ('Total AMD Tests', str(latest['amd_passed'] + latest['amd_failed'] + latest['amd_skipped']), '#888888'),
+        ('Total NVIDIA Tests', str(latest['nvidia_passed'] + latest['nvidia_failed'] + latest['nvidia_skipped']), '#888888'),
+    ]
+    ax4.axis('off')
+    y_pos = 0.9
+    ax4.text(0.5, 0.95, 'SUMMARY', ha='center', va='top', fontsize=TITLE_FONT_SIZE - 2,
+             color=TITLE_COLOR, fontfamily='monospace', fontweight='bold',
+             transform=ax4.transAxes)
+    for label, value, color in metrics:
+        if label:
+            ax4.text(0.1, y_pos, label, ha='left', va='center', fontsize=LABEL_FONT_SIZE,
+                    color=LABEL_COLOR, fontfamily='monospace', transform=ax4.transAxes)
+            ax4.text(0.9, y_pos, value, ha='right', va='center', fontsize=LABEL_FONT_SIZE + 2,
+                    color=color or LABEL_COLOR, fontfamily='monospace', fontweight='bold',
+                    transform=ax4.transAxes)
+        y_pos -= 0.15
+    handles = [plt.Rectangle((0,0),1,1, fc=COLORS['passed'], alpha=0.8),
+               plt.Rectangle((0,0),1,1, fc=COLORS['failed'], alpha=0.8),
+               plt.Rectangle((0,0),1,1, fc=COLORS['skipped'], alpha=0.8)]
+    ax4.legend(handles, ['Passed', 'Failed', 'Skipped'],
+              loc='lower center', fontsize=LEGEND_FONT_SIZE,
+              frameon=False, labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
+    plt.close('all')
+    return fig
+def create_model_time_series(historical_df: pd.DataFrame, model_name: str) -> plt.Figure:
+    if historical_df.empty or 'date' not in historical_df.columns:
+        fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+        ax.set_facecolor(BLACK)
+        ax.text(0.5, 0.5, f'No historical data available for {model_name}',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=20, color='#888888',
+                fontfamily='monospace', weight='normal')
+        ax.axis('off')
+        return fig
+    model_data = historical_df[historical_df.index.str.lower() == model_name.lower()]
+    if model_data.empty:
+        fig, ax = plt.subplots(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+        ax.set_facecolor(BLACK)
+        ax.text(0.5, 0.5, f'No data found for model: {model_name}',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=20, color='#888888',
+                fontfamily='monospace', weight='normal')
+        ax.axis('off')
+        return fig
+    model_data = model_data.copy()
+    model_data['date_dt'] = pd.to_datetime(model_data['date'])
+    model_data = model_data.sort_values('date_dt')
+    dates = model_data['date_dt'].values
+    amd_stats_list = []
+    nvidia_stats_list = []
+    for _, row in model_data.iterrows():
+        amd_stats, nvidia_stats = extract_model_data(row)[:2]
+        amd_stats_list.append(amd_stats)
+        nvidia_stats_list.append(nvidia_stats)
+    fig = plt.figure(figsize=(FIGURE_WIDTH, FIGURE_HEIGHT), facecolor=BLACK)
+    gs = fig.add_gridspec(2, 2, height_ratios=[1, 1], width_ratios=[3, 1],
+                          hspace=0.3, wspace=0.2)
+    ax1 = fig.add_subplot(gs[0, 0])
+    ax2 = fig.add_subplot(gs[1, 0])
+    ax3 = fig.add_subplot(gs[:, 1])
+    for ax in [ax1, ax2, ax3]:
+        ax.set_facecolor(BLACK)
+    amd_passed = [stats['passed'] for stats in amd_stats_list]
+    amd_failed = [stats['failed'] for stats in amd_stats_list]
+    amd_skipped = [stats['skipped'] for stats in amd_stats_list]
+    ax1.stackplot(dates, amd_passed, amd_failed, amd_skipped,
+                  colors=[COLORS['passed'], COLORS['failed'], COLORS['skipped']],
+                  alpha=0.7, labels=['Passed', 'Failed', 'Skipped'])
+    ax1.plot(dates, amd_failed, color=COLORS['failed'], linewidth=2.5,
+             marker='o', markersize=7, markeredgewidth=2, markeredgecolor=BLACK,
+             linestyle='-', label='_nolegend_')
+    ax1.set_title(f'{model_name.upper()} - AMD Results', fontsize=TITLE_FONT_SIZE,
+                  color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=20)
+    ax1.set_ylabel('Number of Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax1.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax1.legend(fontsize=LEGEND_FONT_SIZE, loc='upper left', frameon=False,
+               labelcolor=LABEL_COLOR, prop={'family': 'monospace'})
+    ax1.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE, axis='x', rotation=45)
+    nvidia_passed = [stats['passed'] for stats in nvidia_stats_list]
+    nvidia_failed = [stats['failed'] for stats in nvidia_stats_list]
+    nvidia_skipped = [stats['skipped'] for stats in nvidia_stats_list]
+    ax2.stackplot(dates, nvidia_passed, nvidia_failed, nvidia_skipped,
+                  colors=[COLORS['passed'], COLORS['failed'], COLORS['skipped']],
+                  alpha=0.7, labels=['Passed', 'Failed', 'Skipped'])
+    ax2.plot(dates, nvidia_failed, color=COLORS['failed'], linewidth=2.5,
+             marker='s', markersize=7, markeredgewidth=2, markeredgecolor=BLACK,
+             linestyle='-', label='_nolegend_')
+    ax2.set_title(f'{model_name.upper()} - NVIDIA Results', fontsize=TITLE_FONT_SIZE,
+                  color=TITLE_COLOR, fontfamily='monospace', fontweight='bold', pad=20)
+    ax2.set_ylabel('Number of Tests', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax2.set_xlabel('Date', fontsize=LABEL_FONT_SIZE, color=LABEL_COLOR, fontfamily='monospace')
+    ax2.grid(True, color=GRID_COLOR, alpha=0.3, linestyle='-', linewidth=0.5)
+    ax2.tick_params(colors=LABEL_COLOR, labelsize=LABEL_FONT_SIZE, axis='x', rotation=45)
+    ax3.axis('off')
+    latest_amd = amd_stats_list[-1]
+    latest_nvidia = nvidia_stats_list[-1]
+    amd_total = latest_amd['passed'] + latest_amd['failed']
+    nvidia_total = latest_nvidia['passed'] + latest_nvidia['failed']
+    amd_fail_rate = (latest_amd['failed'] / amd_total * 100) if amd_total > 0 else 0
+    nvidia_fail_rate = (latest_nvidia['failed'] / nvidia_total * 100) if nvidia_total > 0 else 0
+    ax3.text(0.5, 0.95, 'LATEST RESULTS', ha='center', va='top',
+             fontsize=TITLE_FONT_SIZE - 4, color=TITLE_COLOR, fontfamily='monospace',
+             fontweight='bold', transform=ax3.transAxes)
+    y = 0.80
+    sections = [
+        ('AMD', [
+            ('Pass Rate', f"{(latest_amd['passed']/amd_total*100) if amd_total > 0 else 0:.1f}%", COLORS['passed']),
+            ('Fail Rate', f"{amd_fail_rate:.1f}%", COLORS['failed']),
+            ('Total', str(latest_amd['passed'] + latest_amd['failed'] + latest_amd['skipped']), '#888888'),
+        ]),
+        ('NVIDIA', [
+            ('Pass Rate', f"{(latest_nvidia['passed']/nvidia_total*100) if nvidia_total > 0 else 0:.1f}%", COLORS['passed']),
+            ('Fail Rate', f"{nvidia_fail_rate:.1f}%", COLORS['failed']),
+            ('Total', str(latest_nvidia['passed'] + latest_nvidia['failed'] + latest_nvidia['skipped']), '#888888'),
+        ])
+    ]
+    for section_name, metrics in sections:
+        ax3.text(0.5, y, section_name, ha='center', va='center',
+                fontsize=LABEL_FONT_SIZE + 2, color=TITLE_COLOR,
+                fontfamily='monospace', fontweight='bold', transform=ax3.transAxes)
+        y -= 0.08
+        for label, value, color in metrics:
+            ax3.text(0.15, y, label, ha='left', va='center',
+                    fontsize=LABEL_FONT_SIZE - 1, color=LABEL_COLOR,
+                    fontfamily='monospace', transform=ax3.transAxes)
+            ax3.text(0.85, y, value, ha='right', va='center',
+                    fontsize=LABEL_FONT_SIZE, color=color,
+                    fontfamily='monospace', fontweight='bold', transform=ax3.transAxes)
+            y -= 0.07
+        y -= 0.05
+    plt.close('all')
+    return fig

time_series_gradio.py ADDED Viewed

	@@ -0,0 +1,556 @@

+import pandas as pd
+import numpy as np
+from datetime import datetime
+from data import extract_model_data
+import gradio as gr
+import plotly.express as px
+import plotly.graph_objects as go
+COLORS = {
+    'passed': '#4CAF50',
+    'failed': '#E53E3E',
+    'skipped': '#FFD54F',
+    'error': '#8B0000',
+    'amd': '#ED1C24',
+    'nvidia': '#76B900'
+}
+def get_time_series_summary_dfs(historical_df: pd.DataFrame) -> dict:
+    daily_stats = []
+    dates = sorted(historical_df['date'].unique())
+    for date in dates:
+        date_data = historical_df[historical_df['date'] == date]
+        amd_passed = date_data['success_amd'].sum() if 'success_amd' in date_data.columns else 0
+        amd_failed = (date_data['failed_multi_no_amd'].sum() + date_data['failed_single_no_amd'].sum()) if 'failed_multi_no_amd' in date_data.columns else 0
+        amd_skipped = date_data['skipped_amd'].sum() if 'skipped_amd' in date_data.columns else 0
+        amd_total = amd_passed + amd_failed + amd_skipped
+        amd_failure_rate = (amd_failed / amd_total * 100) if amd_total > 0 else 0
+        nvidia_passed = date_data['success_nvidia'].sum() if 'success_nvidia' in date_data.columns else 0
+        nvidia_failed = (date_data['failed_multi_no_nvidia'].sum() + date_data['failed_single_no_nvidia'].sum()) if 'failed_multi_no_nvidia' in date_data.columns else 0
+        nvidia_skipped = date_data['skipped_nvidia'].sum() if 'skipped_nvidia' in date_data.columns else 0
+        nvidia_total = nvidia_passed + nvidia_failed + nvidia_skipped
+        nvidia_failure_rate = (nvidia_failed / nvidia_total * 100) if nvidia_total > 0 else 0
+        daily_stats.append({
+            'date': date,
+            'amd_failure_rate': amd_failure_rate,
+            'nvidia_failure_rate': nvidia_failure_rate,
+            'amd_passed': amd_passed,
+            'amd_failed': amd_failed,
+            'amd_skipped': amd_skipped,
+            'nvidia_passed': nvidia_passed,
+            'nvidia_failed': nvidia_failed,
+            'nvidia_skipped': nvidia_skipped
+        })
+    failure_rate_data = []
+    for i, stat in enumerate(daily_stats):
+        amd_change = stat['amd_failure_rate'] - daily_stats[i-1]['amd_failure_rate'] if i > 0 else 0
+        nvidia_change = stat['nvidia_failure_rate'] - daily_stats[i-1]['nvidia_failure_rate'] if i > 0 else 0
+        failure_rate_data.extend([
+            {'date': stat['date'], 'failure_rate': stat['amd_failure_rate'], 'platform': 'AMD', 'change': amd_change},
+            {'date': stat['date'], 'failure_rate': stat['nvidia_failure_rate'], 'platform': 'NVIDIA', 'change': nvidia_change}
+        ])
+    failure_rate_df = pd.DataFrame(failure_rate_data)
+    amd_data = []
+    for i, stat in enumerate(daily_stats):
+        passed_change = stat['amd_passed'] - daily_stats[i-1]['amd_passed'] if i > 0 else 0
+        failed_change = stat['amd_failed'] - daily_stats[i-1]['amd_failed'] if i > 0 else 0
+        skipped_change = stat['amd_skipped'] - daily_stats[i-1]['amd_skipped'] if i > 0 else 0
+        amd_data.extend([
+            {'date': stat['date'], 'count': stat['amd_passed'], 'test_type': 'Passed', 'change': passed_change},
+            {'date': stat['date'], 'count': stat['amd_failed'], 'test_type': 'Failed', 'change': failed_change},
+            {'date': stat['date'], 'count': stat['amd_skipped'], 'test_type': 'Skipped', 'change': skipped_change}
+        ])
+    amd_df = pd.DataFrame(amd_data)
+    nvidia_data = []
+    for i, stat in enumerate(daily_stats):
+        passed_change = stat['nvidia_passed'] - daily_stats[i-1]['nvidia_passed'] if i > 0 else 0
+        failed_change = stat['nvidia_failed'] - daily_stats[i-1]['nvidia_failed'] if i > 0 else 0
+        skipped_change = stat['nvidia_skipped'] - daily_stats[i-1]['nvidia_skipped'] if i > 0 else 0
+        nvidia_data.extend([
+            {'date': stat['date'], 'count': stat['nvidia_passed'], 'test_type': 'Passed', 'change': passed_change},
+            {'date': stat['date'], 'count': stat['nvidia_failed'], 'test_type': 'Failed', 'change': failed_change},
+            {'date': stat['date'], 'count': stat['nvidia_skipped'], 'test_type': 'Skipped', 'change': skipped_change}
+        ])
+    nvidia_df = pd.DataFrame(nvidia_data)
+    return {
+        'failure_rates_df': failure_rate_df,
+        'amd_tests_df': amd_df,
+        'nvidia_tests_df': nvidia_df,
+    }
+def get_model_time_series_dfs(historical_df: pd.DataFrame, model_name: str) -> dict:
+    model_data = historical_df[historical_df.index.str.lower() == model_name.lower()]
+    if model_data.empty:
+        empty_df = pd.DataFrame({'date': [], 'count': [], 'test_type': [], 'change': []})
+        return {'amd_df': empty_df.copy(), 'nvidia_df': empty_df.copy()}
+    dates = sorted(model_data['date'].unique())
+    amd_data = []
+    nvidia_data = []
+    for i, date in enumerate(dates):
+        date_data = model_data[model_data['date'] == date]
+        row = date_data.iloc[0]
+        amd_passed = row.get('success_amd', 0)
+        amd_failed = row.get('failed_multi_no_amd', 0) + row.get('failed_single_no_amd', 0)
+        amd_skipped = row.get('skipped_amd', 0)
+        prev_row = model_data[model_data['date'] == dates[i-1]].iloc[0] if i > 0 and not model_data[model_data['date'] == dates[i-1]].empty else None
+        amd_passed_change = amd_passed - (prev_row.get('success_amd', 0) if prev_row is not None else 0)
+        amd_failed_change = amd_failed - (prev_row.get('failed_multi_no_amd', 0) + prev_row.get('failed_single_no_amd', 0) if prev_row is not None else 0)
+        amd_skipped_change = amd_skipped - (prev_row.get('skipped_amd', 0) if prev_row is not None else 0)
+        amd_data.extend([
+            {'date': date, 'count': amd_passed, 'test_type': 'Passed', 'change': amd_passed_change},
+            {'date': date, 'count': amd_failed, 'test_type': 'Failed', 'change': amd_failed_change},
+            {'date': date, 'count': amd_skipped, 'test_type': 'Skipped', 'change': amd_skipped_change}
+        ])
+        nvidia_passed = row.get('success_nvidia', 0)
+        nvidia_failed = row.get('failed_multi_no_nvidia', 0) + row.get('failed_single_no_nvidia', 0)
+        nvidia_skipped = row.get('skipped_nvidia', 0)
+        if prev_row is not None:
+            prev_nvidia_passed = prev_row.get('success_nvidia', 0)
+            prev_nvidia_failed = prev_row.get('failed_multi_no_nvidia', 0) + prev_row.get('failed_single_no_nvidia', 0)
+            prev_nvidia_skipped = prev_row.get('skipped_nvidia', 0)
+        else:
+            prev_nvidia_passed = prev_nvidia_failed = prev_nvidia_skipped = 0
+        nvidia_data.extend([
+            {'date': date, 'count': nvidia_passed, 'test_type': 'Passed', 'change': nvidia_passed - prev_nvidia_passed},
+            {'date': date, 'count': nvidia_failed, 'test_type': 'Failed', 'change': nvidia_failed - prev_nvidia_failed},
+            {'date': date, 'count': nvidia_skipped, 'test_type': 'Skipped', 'change': nvidia_skipped - prev_nvidia_skipped}
+        ])
+    return {'amd_df': pd.DataFrame(amd_data), 'nvidia_df': pd.DataFrame(nvidia_data)}
+def create_time_series_summary_gradio(historical_df: pd.DataFrame) -> dict:
+    if historical_df.empty or 'date' not in historical_df.columns:
+        # Create empty Plotly figure
+        empty_fig = go.Figure()
+        empty_fig.update_layout(
+            title="No historical data available",
+            height=500,
+            font=dict(size=16, color='#CCCCCC'),
+            paper_bgcolor='#000000',
+            plot_bgcolor='#1a1a1a',
+            margin=dict(b=130)
+        )
+        return {
+            'failure_rates': empty_fig,
+            'amd_tests': empty_fig,
+            'nvidia_tests': empty_fig
+        }
+    daily_stats = []
+    dates = sorted(historical_df['date'].unique())
+    for date in dates:
+        date_data = historical_df[historical_df['date'] == date]
+        # Calculate failure rates using the same logic as summary_page.py
+        # This includes ERROR tests in failures and excludes SKIPPED from total
+        total_amd_tests = 0
+        total_amd_failures = 0
+        total_nvidia_tests = 0
+        total_nvidia_failures = 0
+        amd_passed = 0
+        amd_failed = 0
+        amd_skipped = 0
+        nvidia_passed = 0
+        nvidia_failed = 0
+        nvidia_skipped = 0
+        for _, row in date_data.iterrows():
+            amd_stats, nvidia_stats = extract_model_data(row)[:2]
+            # AMD (matching summary_page.py logic: failed + error, excluding skipped)
+            amd_total = amd_stats['passed'] + amd_stats['failed'] + amd_stats['error']
+            if amd_total > 0:
+                total_amd_tests += amd_total
+                total_amd_failures += amd_stats['failed'] + amd_stats['error']
+            # For test counts graphs (these still use the old logic with skipped)
+            amd_passed += amd_stats['passed']
+            amd_failed += amd_stats['failed'] + amd_stats['error']
+            amd_skipped += amd_stats['skipped']
+            # NVIDIA (matching summary_page.py logic: failed + error, excluding skipped)
+            nvidia_total = nvidia_stats['passed'] + nvidia_stats['failed'] + nvidia_stats['error']
+            if nvidia_total > 0:
+                total_nvidia_tests += nvidia_total
+                total_nvidia_failures += nvidia_stats['failed'] + nvidia_stats['error']
+            # For test counts graphs (these still use the old logic with skipped)
+            nvidia_passed += nvidia_stats['passed']
+            nvidia_failed += nvidia_stats['failed'] + nvidia_stats['error']
+            nvidia_skipped += nvidia_stats['skipped']
+        amd_failure_rate = (total_amd_failures / total_amd_tests * 100) if total_amd_tests > 0 else 0
+        nvidia_failure_rate = (total_nvidia_failures / total_nvidia_tests * 100) if total_nvidia_tests > 0 else 0
+        daily_stats.append({
+            'date': date,
+            'amd_failure_rate': amd_failure_rate,
+            'nvidia_failure_rate': nvidia_failure_rate,
+            'amd_passed': amd_passed,
+            'amd_failed': amd_failed,
+            'amd_skipped': amd_skipped,
+            'nvidia_passed': nvidia_passed,
+            'nvidia_failed': nvidia_failed,
+            'nvidia_skipped': nvidia_skipped
+        })
+    failure_rate_data = []
+    for i, stat in enumerate(daily_stats):
+        amd_change = nvidia_change = 0
+        if i > 0:
+            amd_change = stat['amd_failure_rate'] - daily_stats[i-1]['amd_failure_rate']
+            nvidia_change = stat['nvidia_failure_rate'] - daily_stats[i-1]['nvidia_failure_rate']
+        failure_rate_data.extend([
+            {'date': stat['date'], 'failure_rate': stat['amd_failure_rate'], 'platform': 'AMD', 'change': amd_change},
+            {'date': stat['date'], 'failure_rate': stat['nvidia_failure_rate'], 'platform': 'NVIDIA', 'change': nvidia_change}
+        ])
+    failure_rate_df = pd.DataFrame(failure_rate_data)
+    amd_data = []
+    for i, stat in enumerate(daily_stats):
+        passed_change = failed_change = skipped_change = 0
+        if i > 0:
+            passed_change = stat['amd_passed'] - daily_stats[i-1]['amd_passed']
+            failed_change = stat['amd_failed'] - daily_stats[i-1]['amd_failed']
+            skipped_change = stat['amd_skipped'] - daily_stats[i-1]['amd_skipped']
+        amd_data.extend([
+            {'date': stat['date'], 'count': stat['amd_passed'], 'test_type': 'Passed', 'change': passed_change},
+            {'date': stat['date'], 'count': stat['amd_failed'], 'test_type': 'Failed', 'change': failed_change},
+            {'date': stat['date'], 'count': stat['amd_skipped'], 'test_type': 'Skipped', 'change': skipped_change}
+        ])
+    amd_df = pd.DataFrame(amd_data)
+    nvidia_data = []
+    for i, stat in enumerate(daily_stats):
+        passed_change = failed_change = skipped_change = 0
+        if i > 0:
+            passed_change = stat['nvidia_passed'] - daily_stats[i-1]['nvidia_passed']
+            failed_change = stat['nvidia_failed'] - daily_stats[i-1]['nvidia_failed']
+            skipped_change = stat['nvidia_skipped'] - daily_stats[i-1]['nvidia_skipped']
+        nvidia_data.extend([
+            {'date': stat['date'], 'count': stat['nvidia_passed'], 'test_type': 'Passed', 'change': passed_change},
+            {'date': stat['date'], 'count': stat['nvidia_failed'], 'test_type': 'Failed', 'change': failed_change},
+            {'date': stat['date'], 'count': stat['nvidia_skipped'], 'test_type': 'Skipped', 'change': skipped_change}
+        ])
+    nvidia_df = pd.DataFrame(nvidia_data)
+    # Create Plotly figure for failure rates with alternating colors
+    fig_failure_rates = go.Figure()
+    # Add NVIDIA line (green line with white markers - Barcelona style)
+    nvidia_data = failure_rate_df[failure_rate_df['platform'] == 'NVIDIA']
+    if not nvidia_data.empty:
+        fig_failure_rates.add_trace(go.Scatter(
+            x=nvidia_data['date'],
+            y=nvidia_data['failure_rate'],
+            mode='lines+markers',
+            name='NVIDIA',
+            line=dict(color='#76B900', width=3),  # Green line
+            marker=dict(size=12, color='#FFFFFF', line=dict(color='#76B900', width=2)),  # White markers with green border
+            hovertemplate='<b>NVIDIA</b><br>Date: %{x}<br>Failure Rate: %{y:.2f}%<extra></extra>'
+        ))
+    # Add AMD line (red line with dark gray markers - Barcelona style)
+    amd_data = failure_rate_df[failure_rate_df['platform'] == 'AMD']
+    if not amd_data.empty:
+        fig_failure_rates.add_trace(go.Scatter(
+            x=amd_data['date'],
+            y=amd_data['failure_rate'],
+            mode='lines+markers',
+            name='AMD',
+            line=dict(color='#ED1C24', width=3),  # Red line
+            marker=dict(size=12, color='#404040', line=dict(color='#ED1C24', width=2)),  # Dark gray markers with red border
+            hovertemplate='<b>AMD</b><br>Date: %{x}<br>Failure Rate: %{y:.2f}%<extra></extra>'
+        ))
+    fig_failure_rates.update_layout(
+        title="Overall Failure Rates Over Time",
+        height=500,
+        font=dict(size=16, color='#CCCCCC'),
+        paper_bgcolor='#000000',
+        plot_bgcolor='#1a1a1a',
+        title_font_size=20,
+        legend=dict(
+            font=dict(size=16),
+            bgcolor='rgba(0,0,0,0.5)',
+            orientation="h",
+            yanchor="bottom",
+            y=-0.4,
+            xanchor="center",
+            x=0.5
+        ),
+        xaxis=dict(title='Date', title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        yaxis=dict(title='Failure Rate (%)', title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        hovermode='x unified',
+        margin=dict(b=130)
+    )
+    # Create Plotly figure for AMD tests
+    fig_amd = px.line(
+        amd_df,
+        x='date',
+        y='count',
+        color='test_type',
+        color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
+        title="AMD Test Results Over Time",
+        labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'}
+    )
+    fig_amd.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
+    fig_amd.update_layout(
+        height=500,
+        font=dict(size=16, color='#CCCCCC'),
+        paper_bgcolor='#000000',
+        plot_bgcolor='#1a1a1a',
+        title_font_size=20,
+        legend=dict(
+            font=dict(size=16),
+            bgcolor='rgba(0,0,0,0.5)',
+            orientation="h",
+            yanchor="bottom",
+            y=-0.4,
+            xanchor="center",
+            x=0.5
+        ),
+        xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        hovermode='x unified',
+        margin=dict(b=130)
+    )
+    # Create Plotly figure for NVIDIA tests
+    fig_nvidia = px.line(
+        nvidia_df,
+        x='date',
+        y='count',
+        color='test_type',
+        color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
+        title="NVIDIA Test Results Over Time",
+        labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'}
+    )
+    fig_nvidia.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
+    fig_nvidia.update_layout(
+        height=500,
+        font=dict(size=16, color='#CCCCCC'),
+        paper_bgcolor='#000000',
+        plot_bgcolor='#1a1a1a',
+        title_font_size=20,
+        legend=dict(
+            font=dict(size=16),
+            bgcolor='rgba(0,0,0,0.5)',
+            orientation="h",
+            yanchor="bottom",
+            y=-0.4,
+            xanchor="center",
+            x=0.5
+        ),
+        xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        hovermode='x unified',
+        margin=dict(b=130)
+    )
+    return {
+        'failure_rates': fig_failure_rates,
+        'amd_tests': fig_amd,
+        'nvidia_tests': fig_nvidia
+    }
+def create_model_time_series_gradio(historical_df: pd.DataFrame, model_name: str) -> dict:
+    if historical_df.empty or 'date' not in historical_df.columns:
+        # Create empty Plotly figures
+        empty_fig_amd = go.Figure()
+        empty_fig_amd.update_layout(
+            title=f"{model_name.upper()} - AMD Results Over Time",
+            height=500,
+            font=dict(size=16, color='#CCCCCC'),
+            paper_bgcolor='#000000',
+            plot_bgcolor='#1a1a1a',
+            margin=dict(b=130)
+        )
+        empty_fig_nvidia = go.Figure()
+        empty_fig_nvidia.update_layout(
+            title=f"{model_name.upper()} - NVIDIA Results Over Time",
+            height=500,
+            font=dict(size=16, color='#CCCCCC'),
+            paper_bgcolor='#000000',
+            plot_bgcolor='#1a1a1a',
+            margin=dict(b=130)
+        )
+        return {
+            'amd_plot': empty_fig_amd,
+            'nvidia_plot': empty_fig_nvidia
+        }
+    model_data = historical_df[historical_df.index.str.lower() == model_name.lower()]
+    if model_data.empty:
+        # Create empty Plotly figures
+        empty_fig_amd = go.Figure()
+        empty_fig_amd.update_layout(
+            title=f"{model_name.upper()} - AMD Results Over Time",
+            height=500,
+            font=dict(size=16, color='#CCCCCC'),
+            paper_bgcolor='#000000',
+            plot_bgcolor='#1a1a1a',
+            margin=dict(b=130)
+        )
+        empty_fig_nvidia = go.Figure()
+        empty_fig_nvidia.update_layout(
+            title=f"{model_name.upper()} - NVIDIA Results Over Time",
+            height=500,
+            font=dict(size=16, color='#CCCCCC'),
+            paper_bgcolor='#000000',
+            plot_bgcolor='#1a1a1a',
+            margin=dict(b=130)
+        )
+        return {
+            'amd_plot': empty_fig_amd,
+            'nvidia_plot': empty_fig_nvidia
+        }
+    dates = sorted(model_data['date'].unique())
+    amd_data = []
+    nvidia_data = []
+    for i, date in enumerate(dates):
+        date_data = model_data[model_data['date'] == date]
+        if not date_data.empty:
+            row = date_data.iloc[0]
+            amd_passed = row.get('success_amd', 0)
+            amd_failed = row.get('failed_multi_no_amd', 0) + row.get('failed_single_no_amd', 0)
+            amd_skipped = row.get('skipped_amd', 0)
+            passed_change = failed_change = skipped_change = 0
+            if i > 0:
+                prev_date_data = model_data[model_data['date'] == dates[i-1]]
+                if not prev_date_data.empty:
+                    prev_row = prev_date_data.iloc[0]
+                    prev_amd_passed = prev_row.get('success_amd', 0)
+                    prev_amd_failed = prev_row.get('failed_multi_no_amd', 0) + prev_row.get('failed_single_no_amd', 0)
+                    prev_amd_skipped = prev_row.get('skipped_amd', 0)
+                    passed_change = amd_passed - prev_amd_passed
+                    failed_change = amd_failed - prev_amd_failed
+                    skipped_change = amd_skipped - prev_amd_skipped
+            amd_data.extend([
+                {'date': date, 'count': amd_passed, 'test_type': 'Passed', 'change': passed_change},
+                {'date': date, 'count': amd_failed, 'test_type': 'Failed', 'change': failed_change},
+                {'date': date, 'count': amd_skipped, 'test_type': 'Skipped', 'change': skipped_change}
+            ])
+            nvidia_passed = row.get('success_nvidia', 0)
+            nvidia_failed = row.get('failed_multi_no_nvidia', 0) + row.get('failed_single_no_nvidia', 0)
+            nvidia_skipped = row.get('skipped_nvidia', 0)
+            nvidia_passed_change = nvidia_failed_change = nvidia_skipped_change = 0
+            if i > 0:
+                prev_date_data = model_data[model_data['date'] == dates[i-1]]
+                if not prev_date_data.empty:
+                    prev_row = prev_date_data.iloc[0]
+                    prev_nvidia_passed = prev_row.get('success_nvidia', 0)
+                    prev_nvidia_failed = prev_row.get('failed_multi_no_nvidia', 0) + prev_row.get('failed_single_no_nvidia', 0)
+                    prev_nvidia_skipped = prev_row.get('skipped_nvidia', 0)
+                    nvidia_passed_change = nvidia_passed - prev_nvidia_passed
+                    nvidia_failed_change = nvidia_failed - prev_nvidia_failed
+                    nvidia_skipped_change = nvidia_skipped - prev_nvidia_skipped
+            nvidia_data.extend([
+                {'date': date, 'count': nvidia_passed, 'test_type': 'Passed', 'change': nvidia_passed_change},
+                {'date': date, 'count': nvidia_failed, 'test_type': 'Failed', 'change': nvidia_failed_change},
+                {'date': date, 'count': nvidia_skipped, 'test_type': 'Skipped', 'change': nvidia_skipped_change}
+            ])
+    amd_df = pd.DataFrame(amd_data)
+    nvidia_df = pd.DataFrame(nvidia_data)
+    # Create Plotly figure for AMD
+    fig_amd = px.line(
+        amd_df,
+        x='date',
+        y='count',
+        color='test_type',
+        color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
+        title=f"{model_name.upper()} - AMD Results Over Time",
+        labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'}
+    )
+    fig_amd.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
+    fig_amd.update_layout(
+        height=500,
+        font=dict(size=16, color='#CCCCCC'),
+        paper_bgcolor='#000000',
+        plot_bgcolor='#1a1a1a',
+        title_font_size=20,
+        legend=dict(
+            font=dict(size=16),
+            bgcolor='rgba(0,0,0,0.5)',
+            orientation="h",
+            yanchor="bottom",
+            y=-0.4,
+            xanchor="center",
+            x=0.5
+        ),
+        xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        hovermode='x unified',
+        margin=dict(b=130)
+    )
+    # Create Plotly figure for NVIDIA
+    fig_nvidia = px.line(
+        nvidia_df,
+        x='date',
+        y='count',
+        color='test_type',
+        color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
+        title=f"{model_name.upper()} - NVIDIA Results Over Time",
+        labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'}
+    )
+    fig_nvidia.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
+    fig_nvidia.update_layout(
+        height=500,
+        font=dict(size=16, color='#CCCCCC'),
+        paper_bgcolor='#000000',
+        plot_bgcolor='#1a1a1a',
+        title_font_size=20,
+        legend=dict(
+            font=dict(size=16),
+            bgcolor='rgba(0,0,0,0.5)',
+            orientation="h",
+            yanchor="bottom",
+            y=-0.4,
+            xanchor="center",
+            x=0.5
+        ),
+        xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        hovermode='x unified',
+        margin=dict(b=130)
+    )
+    return {
+        'amd_plot': fig_amd,
+        'nvidia_plot': fig_nvidia
+    }