Spaces:

transformers-community
/

transformers-ci-dashboard

Running

App Files Files Community

ror HF Staff

badaoui HF Staff commited on 5 days ago

Commit

721e588

verified ·

1 Parent(s): 78473e2

Add historical data visualization features (#7)

Browse files

- Add historical data visualization features (5309153902e48908d06ecd8f760b8d7faa23b08f)
- Remove logo images and use simple text labels (871d3046dfced20ab97c6f9a3c22fb9fcc5346c9)
- Merge main branch - resolve conflicts in app.py (76e62763f6cc45ecfb4a17b2b3d4cfa50ee71359)
- Remove unused files to reduce code size (a9eacdca256f51b6083daa882686822c64e6ef7d)
- small fix (6fc74fec4d3efa8b4771e2728e116159129b9f71)
- some code optimization (941f5e07ec8c7de6a868307f2f173fde66bebd4f)
- some more code factorization :) (fe596850370a2a4ce597340384555fa17ffb018b)
- remove some unused functions (2122146467c7c5fd5316bd7eee391b9abde7e26e)
- one fucntion for has_failures (63c305fd30cf61b2aee31616a20e8adf8876dddd)
- improve filter failing models (69d6e2e5856e33d75100ac0ee9c338b3606fa6cc)
- improve encore (f3f4c775fc02f6d67a42069e65ce9cd8f5b374a8)
- more factorization (0f8d3a81fe5811984035c2a37251366206a1ae08)
- fix data loading (646bbcb774acbb8fc658b9e43ef7dae1ea1877fa)
- fix first seen date feat (c3ed9cd51a4ed2d4b961e051e5c3b1331fa83467)

Co-authored-by: ABDENNACER BADAOUI <badaoui@users.noreply.huggingface.co>

Files changed (8) hide show

app.py +450 -97
data.py +408 -38
model_page.py +46 -30
requirements.txt +2 -0
styles.css +416 -41
summary_page.py +63 -61
time_series_gradio.py +150 -0
utils.py +12 -0

app.py CHANGED Viewed

@@ -2,11 +2,16 @@ import matplotlib.pyplot as plt
 import matplotlib
 import pandas as pd
 import gradio as gr
-from data import CIResults
 from utils import logger
 from summary_page import create_summary_page
 from model_page import plot_model_stats
 # Configure matplotlib to prevent memory warnings and set dark background
@@ -19,35 +24,36 @@ plt.ioff()  # Turn off interactive mode to prevent figure accumulation
 # Load data once at startup
 Ci_results = CIResults()
 Ci_results.load_data()
 # Start the auto-reload scheduler
 Ci_results.schedule_data_reload()
 # Function to check if a model has failures
-def model_has_failures(model_name):
-    """Check if a model has any failures (AMD or NVIDIA)."""
     if Ci_results.df is None or Ci_results.df.empty:
         return False
-    # Normalize model name to match DataFrame index
     model_name_lower = model_name.lower()
-    # Check if model exists in DataFrame
     if model_name_lower not in Ci_results.df.index:
         return False
     row = Ci_results.df.loc[model_name_lower]
-    # Check for failures in both AMD and NVIDIA
-    amd_multi_failures = row.get('failed_multi_no_amd', 0)
-    amd_single_failures = row.get('failed_single_no_amd', 0)
-    nvidia_multi_failures = row.get('failed_multi_no_nvidia', 0)
-    nvidia_single_failures = row.get('failed_single_no_nvidia', 0)
-    return any([
-        amd_multi_failures > 0,
-        amd_single_failures > 0,
-        nvidia_multi_failures > 0,
-        nvidia_single_failures > 0,
-    ])
 # Function to get current description text
@@ -66,6 +72,46 @@ def get_description_text():
         msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
     return "<br>".join(msg)
 # Load CSS from external file
 def load_css():
     try:
@@ -77,9 +123,19 @@ def load_css():
         logger.warning("styles.css not found, using minimal default styles")
         return "body { background: #000; color: #fff; }"
 # Create the Gradio interface with sidebar and dark theme
-with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cache=(3600, 3600)) as demo:
     with gr.Row():
@@ -91,7 +147,7 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
             description_text = get_description_text()
             description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
-            # Summary button at the top
             summary_button = gr.Button(
                 "summary\n📊",
                 variant="primary",
@@ -99,6 +155,14 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
                 elem_classes=["summary-button"]
             )
             # Model selection header (clickable toggle)
             model_toggle_button = gr.Button(
                 f"► Select model ({len(Ci_results.available_models)})",
@@ -108,83 +172,248 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
             # Model buttons container (collapsible) - start folded
             with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
                 # Create individual buttons for each model
                 model_buttons = []
                 model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
                 print(f"Creating {len(model_choices)} model buttons: {model_choices}")
                 for model_name in model_choices:
-                    # Check if model has failures to determine styling
-                    has_failures = model_has_failures(model_name)
-                    button_classes = ["model-button"]
-                    if has_failures:
-                        button_classes.append("model-button-failed")
-                    btn = gr.Button(
-                        model_name,
-                        variant="secondary",
-                        size="sm",
-                        elem_classes=button_classes
-                    )
-                    model_buttons.append(btn)
             # CI job links at bottom of sidebar
             ci_links_display = gr.Markdown("🔗 **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
         # Main content area
         with gr.Column(scale=4, elem_classes=["main-content"]):
-            # Summary display (default view)
-            summary_display = gr.Plot(
-                value=create_summary_page(Ci_results.df, Ci_results.available_models),
-                label="",
-                format="png",
-                elem_classes=["plot-container"],
-                visible=True
-            )
-            # Detailed view components (hidden by default)
-            with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
-                # Create the plot output
-                plot_output = gr.Plot(
                     label="",
-                    format="png",
                     elem_classes=["plot-container"]
                 )
-                # Create two separate failed tests displays in a row layout
-                with gr.Row():
-                    with gr.Column(scale=1):
-                        amd_failed_tests_output = gr.Textbox(
-                            value="",
-                            lines=8,
-                            max_lines=8,
-                            interactive=False,
-                            container=False,
-                            elem_classes=["failed-tests"]
-                        )
-                    with gr.Column(scale=1):
-                        nvidia_failed_tests_output = gr.Textbox(
-                            value="",
-                            lines=8,
-                            max_lines=8,
-                            interactive=False,
-                            container=False,
-                            elem_classes=["failed-tests"]
-                        )
-    # Set up click handlers for model buttons
-    for i, btn in enumerate(model_buttons):
-        model_name = model_choices[i]
-        btn.click(
-            fn=lambda selected_model=model_name: plot_model_stats(Ci_results.df, selected_model),
-            outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
-        ).then(
-            fn=lambda: [gr.update(visible=False), gr.update(visible=True)],
-            outputs=[summary_display, detail_view]
         )
     # Model toggle functionality
     def toggle_model_list(current_visible):
         """Toggle the visibility of the model list."""
@@ -203,6 +432,10 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
     # Track model list visibility state
     model_list_visible = gr.State(False)
     model_toggle_button.click(
         fn=toggle_model_list,
@@ -210,17 +443,41 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
         outputs=[model_toggle_button, model_list_container, model_list_visible]
     )
-    # Summary button click handler
-    def show_summary_and_update_links():
-        """Show summary page and update CI links."""
-        return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()
     summary_button.click(
-        fn=show_summary_and_update_links,
-        outputs=[summary_display, description_display, ci_links_display]
-    ).then(
-        fn=lambda: [gr.update(visible=True), gr.update(visible=False)],
-        outputs=[summary_display, detail_view]
     )
     # Function to get CI job links
@@ -270,25 +527,19 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
             # AMD links
             if amd_multi_link or amd_single_link:
                 links_md += "**AMD:**\n"
-                if amd_multi_link == amd_single_link:
-                    links_md += f"• [Single and Multi GPU]({amd_multi_link})\n"
-                else:
-                    if amd_multi_link:
-                        links_md += f"• [Multi GPU]({amd_multi_link})\n"
-                    if amd_single_link:
-                        links_md += f"• [Single GPU]({amd_single_link})\n"
                 links_md += "\n"
             # NVIDIA links
             if nvidia_multi_link or nvidia_single_link:
                 links_md += "**NVIDIA:**\n"
-                if nvidia_single_link == nvidia_multi_link:
-                    links_md += f"• [Single and Multi GPU]({nvidia_multi_link})\n"
-                else:
-                    if nvidia_multi_link:
-                        links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
-                    if nvidia_single_link:
-                        links_md += f"• [Single GPU]({nvidia_single_link})\n"
             if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
                 links_md += "*No links available*"
@@ -299,10 +550,112 @@ with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), delete_cach
             return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
-    # Auto-update summary, description, and CI links when the interface loads
     demo.load(
         fn=show_summary_and_update_links,
         outputs=[summary_display, description_display, ci_links_display]
     )

 import matplotlib
 import pandas as pd
 import gradio as gr
+from gradio_toggle import Toggle
+from data import CIResults, find_new_regressions
 from utils import logger
 from summary_page import create_summary_page
 from model_page import plot_model_stats
+from time_series_gradio import (
+    create_time_series_summary_gradio,
+    create_model_time_series_gradio,
+)
 # Configure matplotlib to prevent memory warnings and set dark background
 # Load data once at startup
 Ci_results = CIResults()
 Ci_results.load_data()
+# Preload historical data at startup
+if Ci_results.available_dates:
+    start_date_val = Ci_results.available_dates[-1]  # Last date (oldest)
+    end_date_val = Ci_results.available_dates[0]     # First date (newest)
+    Ci_results.load_historical_data(start_date_val, end_date_val)
+    logger.info(f"Preloaded historical data: {len(Ci_results.historical_df)} records")
 # Start the auto-reload scheduler
 Ci_results.schedule_data_reload()
 # Function to check if a model has failures
+def model_has_failures_by_device(model_name, device='both'):
     if Ci_results.df is None or Ci_results.df.empty:
         return False
     model_name_lower = model_name.lower()
     if model_name_lower not in Ci_results.df.index:
         return False
     row = Ci_results.df.loc[model_name_lower]
+    if device in ('amd', 'both'):
+        if row.get('failed_multi_no_amd', 0) > 0 or row.get('failed_single_no_amd', 0) > 0:
+            return True
+    if device in ('nvidia', 'both'):
+        if row.get('failed_multi_no_nvidia', 0) > 0 or row.get('failed_single_no_nvidia', 0) > 0:
+            return True
+    return False
 # Function to get current description text
         msg.append("*This dashboard only tracks important models*<br>*(loading...)*")
     return "<br>".join(msg)
+# Function to format new regressions for display
+def get_regressions_text():
+    """Get formatted text for new regressions panel."""
+    try:
+        regressions = find_new_regressions(Ci_results.df, Ci_results.all_historical_data)
+        if not regressions:
+            return "### 🎉 No New Regressions\nAll failures were present in the previous run."
+        # Group by model and device
+        grouped = {}
+        for reg in regressions:
+            model = reg['model']
+            device = reg['device'].upper()
+            gpu_type = reg['gpu_type']
+            test = reg['test']
+            key = f"{model} ({device} {gpu_type})"
+            if key not in grouped:
+                grouped[key] = []
+            grouped[key].append(test)
+        # Format output
+        lines = [f"### ⚠️ New Regressions Detected: {len(regressions)} failure(s)"]
+        lines.append("")
+        for key in sorted(grouped.keys()):
+            tests = grouped[key]
+            lines.append(f"**{key}:**")
+            for test in tests[:5]:  # Limit to 5 tests per model
+                lines.append(f"  • {test}")
+            if len(tests) > 5:
+                lines.append(f"  • ... and {len(tests) - 5} more")
+            lines.append("")
+        return "\n".join(lines)
+    except Exception as e:
+        logger.error(f"Error getting regressions: {e}")
+        return "### ⚠️ New Regressions\n*Unable to load regression data*"
 # Load CSS from external file
 def load_css():
     try:
         logger.warning("styles.css not found, using minimal default styles")
         return "body { background: #000; color: #fff; }"
+js_func = """
+function refresh() {
+    const url = new URL(window.location);
+    if (url.searchParams.get('__theme') !== 'dark') {
+        url.searchParams.set('__theme', 'dark');
+        window.location.href = url.href;
+    }
+}
+"""
 # Create the Gradio interface with sidebar and dark theme
+with gr.Blocks(title="Model Test Results Dashboard", css=load_css(), js=js_func) as demo:
     with gr.Row():
             description_text = get_description_text()
             description_display = gr.Markdown(description_text, elem_classes=["sidebar-description"])
+            # Summary button (for current view)
             summary_button = gr.Button(
                 "summary\n📊",
                 variant="primary",
                 elem_classes=["summary-button"]
             )
+            history_view_button = Toggle(
+                label="History view",
+                value=False,
+                interactive=True,
+                elem_classes=["history-view-button"]
+            )
             # Model selection header (clickable toggle)
             model_toggle_button = gr.Button(
                 f"► Select model ({len(Ci_results.available_models)})",
             # Model buttons container (collapsible) - start folded
             with gr.Column(elem_classes=["model-list", "model-list-hidden"]) as model_list_container:
+                # Toggles for filtering failing models by device
+                with gr.Row(elem_classes=["failing-models-filter-row"]):
+                    show_amd_failures = gr.Checkbox(
+                        label="Failing on AMD",
+                        value=False,
+                        interactive=True,
+                        elem_classes=["failing-models-toggle", "amd-toggle"]
+                    )
+                    show_nvidia_failures = gr.Checkbox(
+                        label="Failing on NVIDIA",
+                        value=False,
+                        interactive=True,
+                        elem_classes=["failing-models-toggle", "nvidia-toggle"]
+                    )
                 # Create individual buttons for each model
                 model_buttons = []
                 model_choices = [model.lower() for model in Ci_results.available_models] if Ci_results.available_models else ["auto", "bert", "clip", "llama"]
+                # Categorize models by failure type
+                amd_failing_models = []
+                nvidia_failing_models = []
+                both_failing_models = []
+                passing_models = []
                 print(f"Creating {len(model_choices)} model buttons: {model_choices}")
                 for model_name in model_choices:
+                    has_amd = model_has_failures_by_device(model_name, 'amd')
+                    has_nvidia = model_has_failures_by_device(model_name, 'nvidia')
+                    if has_amd and has_nvidia:
+                        both_failing_models.append(model_name)
+                    elif has_amd:
+                        amd_failing_models.append(model_name)
+                    elif has_nvidia:
+                        nvidia_failing_models.append(model_name)
+                    else:
+                        passing_models.append(model_name)
+                # Container for all models (visible by default)
+                with gr.Column(visible=True, elem_classes=["all-models-container"]) as all_models_container:
+                    for model_name in model_choices:
+                        has_failures = model_has_failures_by_device(model_name, 'both')
+                        button_classes = ["model-button"]
+                        if has_failures:
+                            button_classes.append("model-button-failed")
+                        btn = gr.Button(
+                            model_name,
+                            variant="secondary",
+                            size="sm",
+                            elem_classes=button_classes
+                        )
+                        model_buttons.append(btn)
+                # Container for AMD failures (hidden by default)
+                amd_buttons = []
+                with gr.Column(visible=False, elem_classes=["amd-failures-container"]) as amd_failures_container:
+                    amd_models_to_show = amd_failing_models + both_failing_models
+                    for model_name in sorted(amd_models_to_show):
+                        btn = gr.Button(
+                            model_name,
+                            variant="secondary",
+                            size="sm",
+                            elem_classes=["model-button", "model-button-failed"]
+                        )
+                        amd_buttons.append(btn)
+                # Container for NVIDIA failures (hidden by default)
+                nvidia_buttons = []
+                with gr.Column(visible=False, elem_classes=["nvidia-failures-container"]) as nvidia_failures_container:
+                    nvidia_models_to_show = nvidia_failing_models + both_failing_models
+                    for model_name in sorted(nvidia_models_to_show):
+                        btn = gr.Button(
+                            model_name,
+                            variant="secondary",
+                            size="sm",
+                            elem_classes=["model-button", "model-button-failed"]
+                        )
+                        nvidia_buttons.append(btn)
+                # Container for both AMD and NVIDIA failures (hidden by default)
+                both_buttons = []
+                with gr.Column(visible=False, elem_classes=["both-failures-container"]) as both_failures_container:
+                    all_failing = list(set(amd_failing_models + nvidia_failing_models + both_failing_models))
+                    for model_name in sorted(all_failing):
+                        btn = gr.Button(
+                            model_name,
+                            variant="secondary",
+                            size="sm",
+                            elem_classes=["model-button", "model-button-failed"]
+                        )
+                        both_buttons.append(btn)
             # CI job links at bottom of sidebar
             ci_links_display = gr.Markdown("🔗 **CI Jobs:** *Loading...*", elem_classes=["sidebar-links"])
         # Main content area
         with gr.Column(scale=4, elem_classes=["main-content"]):
+            # Current view components
+            with gr.Column(visible=True, elem_classes=["current-view"]) as current_view:
+                # Summary view (contains summary plot and regressions panel)
+                with gr.Column(visible=True, elem_classes=["summary-view"]) as summary_view:
+                    # Summary display (default view)
+                    summary_display = gr.Plot(
+                        value=create_summary_page(Ci_results.df, Ci_results.available_models),
+                        label="",
+                        format="png",
+                        elem_classes=["plot-container"],
+                        visible=True
+                    )
+                    # New Regressions section (at the bottom, collapsible)
+                    regressions_toggle_button = gr.Button(
+                        "► New Regressions",
+                        variant="secondary",
+                        elem_classes=["regressions-header"]
+                    )
+                    with gr.Column(elem_classes=["regressions-content", "regressions-content-hidden"]) as regressions_content:
+                        regressions_panel = gr.Markdown(
+                            value=get_regressions_text(),
+                            elem_classes=["regressions-panel"]
+                        )
+                # Detailed view components (hidden by default)
+                with gr.Column(visible=False, elem_classes=["detail-view"]) as detail_view:
+                    # Create the plot output
+                    plot_output = gr.Plot(
+                        label="",
+                        format="png",
+                        elem_classes=["plot-container"]
+                    )
+                    # Create two separate failed tests displays in a row layout
+                    with gr.Row():
+                        with gr.Column(scale=1):
+                            amd_failed_tests_output = gr.Textbox(
+                                value="",
+                                lines=8,
+                                max_lines=8,
+                                interactive=False,
+                                container=False,
+                                elem_classes=["failed-tests"]
+                            )
+                        with gr.Column(scale=1):
+                            nvidia_failed_tests_output = gr.Textbox(
+                                value="",
+                                lines=8,
+                                max_lines=8,
+                                interactive=False,
+                                container=False,
+                                elem_classes=["failed-tests"]
+                            )
+            # Historical view components (hidden by default)
+            with gr.Column(visible=False, elem_classes=["historical-view"]) as historical_view:
+                # Time-series summary displays (multiple Gradio plots)
+                time_series_failure_rates = gr.Plot(
+                    label="",
+                    elem_classes=["plot-container"]
+                )
+                time_series_amd_tests = gr.Plot(
+                    label="",
+                    elem_classes=["plot-container"]
+                )
+                time_series_nvidia_tests = gr.Plot(
                     label="",
                     elem_classes=["plot-container"]
                 )
+                # Time-series model view (hidden by default)
+                with gr.Column(visible=False, elem_classes=["time-series-detail-view"]) as time_series_detail_view:
+                    # Time-series plots for specific model (with spacing)
+                    time_series_amd_model_plot = gr.Plot(
+                        label="",
+                        elem_classes=["plot-container"]
+                    )
+                    time_series_nvidia_model_plot = gr.Plot(
+                        label="",
+                        elem_classes=["plot-container"]
+                    )
+    # Failing models filter functionality
+    def filter_failing_models(show_amd, show_nvidia):
+        """Filter models based on AMD and/or NVIDIA failures.
+        Logic:
+        - Neither checked: show all models
+        - AMD only: show models with AMD failures (including those with both)
+        - NVIDIA only: show models with NVIDIA failures (including those with both)
+        - Both checked: show all models with any failures
+        """
+        show_all = not show_amd and not show_nvidia
+        show_amd_only = show_amd and not show_nvidia
+        show_nvidia_only = not show_amd and show_nvidia
+        show_all_failures = show_amd and show_nvidia
+        return (
+            gr.update(visible=show_all),           # all_models_container
+            gr.update(visible=show_amd_only),      # amd_failures_container
+            gr.update(visible=show_nvidia_only),   # nvidia_failures_container
+            gr.update(visible=show_all_failures),  # both_failures_container
         )
+    for checkbox in [show_amd_failures, show_nvidia_failures]:
+        checkbox.change(
+            fn=filter_failing_models,
+            inputs=[show_amd_failures, show_nvidia_failures],
+            outputs=[all_models_container, amd_failures_container, nvidia_failures_container, both_failures_container]
+        )
+    # Regressions panel toggle functionality
+    def toggle_regressions_panel(current_visible):
+        """Toggle the visibility of the regressions panel."""
+        new_visible = not current_visible
+        arrow = "▼" if new_visible else "►"
+        button_text = f"{arrow} New Regressions"
+        # Use CSS classes instead of Gradio visibility
+        css_classes = ["regressions-content"]
+        if new_visible:
+            css_classes.append("regressions-content-visible")
+        else:
+            css_classes.append("regressions-content-hidden")
+        return gr.update(value=button_text), gr.update(elem_classes=css_classes), new_visible
+    # Track regressions panel visibility state
+    regressions_visible = gr.State(False)
+    regressions_toggle_button.click(
+        fn=toggle_regressions_panel,
+        inputs=[regressions_visible],
+        outputs=[regressions_toggle_button, regressions_content, regressions_visible]
+    )
     # Model toggle functionality
     def toggle_model_list(current_visible):
         """Toggle the visibility of the model list."""
     # Track model list visibility state
     model_list_visible = gr.State(False)
+    # Track last selected model for mode switches
+    selected_model_state = gr.State(None)
+    # Track whether current view is model detail (True) or summary (False)
+    in_model_view_state = gr.State(False)
     model_toggle_button.click(
         fn=toggle_model_list,
         outputs=[model_toggle_button, model_list_container, model_list_visible]
     )
+    # Unified summary handler: respects History toggle
+    def handle_summary_click(history_mode: bool):
+        description = get_description_text()
+        links = get_ci_links()
+        if history_mode:
+            fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
+            return (description, links, gr.update(visible=False), gr.update(visible=True),
+                    gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
+                    fr_plot, amd_plot, nvidia_plot, gr.update(visible=False), False, "")
+        else:
+            fig = create_summary_page(Ci_results.df, Ci_results.available_models)
+            return (description, links, gr.update(visible=True), gr.update(visible=False),
+                    gr.update(visible=True), gr.update(value=fig, visible=True), gr.update(visible=False),
+                    gr.update(visible=False), gr.update(visible=False), gr.update(visible=False),
+                    gr.update(visible=False), False, "")
     summary_button.click(
+        fn=handle_summary_click,
+        inputs=[history_view_button],
+        outputs=[
+            description_display,
+            ci_links_display,
+            current_view,
+            historical_view,
+            summary_view,
+            summary_display,
+            detail_view,
+            time_series_failure_rates,
+            time_series_amd_tests,
+            time_series_nvidia_tests,
+            time_series_detail_view,
+            in_model_view_state,
+            selected_model_state,
+        ],
     )
     # Function to get CI job links
             # AMD links
             if amd_multi_link or amd_single_link:
                 links_md += "**AMD:**\n"
+                if amd_multi_link:
+                    links_md += f"• [Multi GPU]({amd_multi_link})\n"
+                if amd_single_link:
+                    links_md += f"• [Single GPU]({amd_single_link})\n"
                 links_md += "\n"
             # NVIDIA links
             if nvidia_multi_link or nvidia_single_link:
                 links_md += "**NVIDIA:**\n"
+                if nvidia_multi_link:
+                    links_md += f"• [Multi GPU]({nvidia_multi_link})\n"
+                if nvidia_single_link:
+                    links_md += f"• [Single GPU]({nvidia_single_link})\n"
             if not (amd_multi_link or amd_single_link or nvidia_multi_link or nvidia_single_link):
                 links_md += "*No links available*"
             return "🔗 **CI Jobs:** *Error loading links*\n\n❓ **[FAQ](README.md)**"
+    # Constants for Gradio updates
+    HIDDEN = gr.update(visible=False)
+    SHOWN = gr.update(visible=True)
+    NOOP = gr.update()
+    def get_historical_summary_plots():
+        """Get historical summary plots from preloaded data."""
+        plots = create_time_series_summary_gradio(Ci_results.historical_df)
+        return (
+            gr.update(value=plots['failure_rates'], visible=True),
+            gr.update(value=plots['amd_tests'], visible=True),
+            gr.update(value=plots['nvidia_tests'], visible=True),
+        )
+    def show_time_series_model(selected_model):
+        """Show time-series view for a specific model."""
+        plots = create_model_time_series_gradio(Ci_results.historical_df, selected_model)
+        return (
+            gr.update(value=plots['amd_plot'], visible=True),
+            gr.update(value=plots['nvidia_plot'], visible=True),
+        )
+    def handle_history_toggle(history_mode, last_selected_model, in_model_view):
+        """Handle toggling between current and historical view."""
+        if history_mode:
+            # Historical mode: show model detail if in model view, otherwise summary
+            if in_model_view and last_selected_model:
+                amd_ts, nvidia_ts = show_time_series_model(last_selected_model)
+                return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN,
+                        amd_ts, nvidia_ts, SHOWN, NOOP, NOOP, NOOP, True)
+            fr_plot, amd_plot, nvidia_plot = get_historical_summary_plots()
+            return (HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, fr_plot, amd_plot, nvidia_plot,
+                    NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False)
+        else:
+            # Current mode: show model detail if available, otherwise summary
+            if last_selected_model and Ci_results.df is not None and not Ci_results.df.empty and last_selected_model in Ci_results.df.index:
+                fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, last_selected_model, Ci_results.all_historical_data)
+                return (SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN,
+                        NOOP, NOOP, HIDDEN, fig, amd_txt, nvidia_txt, True)
+            fig = create_summary_page(Ci_results.df, Ci_results.available_models)
+            return (SHOWN, HIDDEN, SHOWN, gr.update(value=fig, visible=True), HIDDEN,
+                    HIDDEN, HIDDEN, HIDDEN, NOOP, NOOP, HIDDEN, NOOP, NOOP, NOOP, False)
+    def handle_model_click(selected_model: str, history_mode: bool):
+        """Handle clicking on a model button."""
+        if history_mode:
+            amd_ts, nvidia_ts = show_time_series_model(selected_model)
+            return (NOOP, NOOP, NOOP, HIDDEN, SHOWN, HIDDEN, HIDDEN, HIDDEN, HIDDEN, HIDDEN,
+                    HIDDEN, amd_ts, nvidia_ts, SHOWN, selected_model, True)
+        fig, amd_txt, nvidia_txt = plot_model_stats(Ci_results.df, selected_model, Ci_results.all_historical_data)
+        return (fig, amd_txt, nvidia_txt, SHOWN, HIDDEN, HIDDEN, HIDDEN, SHOWN, NOOP, NOOP,
+                NOOP, NOOP, NOOP, HIDDEN, selected_model, True)
+    # Wire up history toggle
+    history_view_button.change(
+        fn=handle_history_toggle,
+        inputs=[history_view_button, selected_model_state, in_model_view_state],
+        outputs=[
+            current_view, historical_view, summary_view, summary_display, detail_view,
+            time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests,
+            time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view,
+            plot_output, amd_failed_tests_output, nvidia_failed_tests_output, in_model_view_state,
+        ],
+    )
+    # Define common outputs for model click handlers
+    model_click_outputs = [
+        plot_output, amd_failed_tests_output, nvidia_failed_tests_output,
+        current_view, historical_view, summary_view, summary_display, detail_view,
+        time_series_failure_rates, time_series_amd_tests, time_series_nvidia_tests,
+        time_series_amd_model_plot, time_series_nvidia_model_plot, time_series_detail_view,
+        selected_model_state, in_model_view_state,
+    ]
+    # Helper function to connect button clicks
+    def connect_model_buttons(buttons, models):
+        """Connect a list of buttons to their corresponding models."""
+        for btn, model_name in zip(buttons, models):
+            btn.click(
+                fn=lambda history_mode, m=model_name: handle_model_click(m, history_mode),
+                inputs=[history_view_button],
+                outputs=model_click_outputs,
+            )
+    # Wire up all button groups
+    connect_model_buttons(model_buttons, model_choices)
+    connect_model_buttons(amd_buttons, sorted(amd_failing_models + both_failing_models))
+    connect_model_buttons(nvidia_buttons, sorted(nvidia_failing_models + both_failing_models))
+    connect_model_buttons(both_buttons, sorted(list(set(amd_failing_models + nvidia_failing_models + both_failing_models))))
+    # Summary button click handler
+    def show_summary_and_update_links():
+        """Show summary page and update CI links."""
+        return create_summary_page(Ci_results.df, Ci_results.available_models), get_description_text(), get_ci_links()
+    # Auto-update summary, description, CI links, and regressions when the interface loads
     demo.load(
         fn=show_summary_and_update_links,
         outputs=[summary_display, description_display, ci_links_display]
+    ).then(
+        fn=get_regressions_text,
+        outputs=[regressions_panel]
     )

data.py CHANGED Viewed

@@ -1,10 +1,13 @@
 from huggingface_hub import HfFileSystem
 import pandas as pd
 from utils import logger
 import threading
 import traceback
 import json
 import re
 # NOTE: if caching is an issue, try adding `use_listings_cache=False`
 fs = HfFileSystem()
@@ -54,12 +57,57 @@ KEYS_TO_KEEP = [
     "job_link_nvidia",
 ]
 def log_dataframe_link(link: str) -> str:
     """
     Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
     report.
     """
     logger.info(f"Reading df located at {link}")
     # Make sure the links starts with an http adress
     if link.startswith("hf://"):
@@ -102,26 +150,148 @@ def read_one_dataframe(json_path: str, device_label: str) -> tuple[pd.DataFrame,
     df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
     return df, df_upload_date
-def get_first_working_df(file_list: list[str]) -> str:
-    for file in file_list:
-        job_links = file.rsplit('/', 1)[0] + "/job_links.json"
         try:
-            links = pd.read_json(f"hf://{job_links}", typ="series")
-            has_one_working_link = any(links.values)
         except Exception as e:
-            logger.error(f"Could not read job links from {job_links}: {e}")
-            has_one_working_link = False
-        if has_one_working_link:
-            return file
-        logger.warning(f"Skipping {file} as it has no working job links.")
-    raise RuntimeError("Could not find any working dataframe in the provided list.")
 def get_distant_data() -> tuple[pd.DataFrame, str]:
     # Retrieve AMD dataframe
     amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
     files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
-    file_amd = get_first_working_df(files_amd)
-    df_amd, date_df_amd = read_one_dataframe(f"hf://{file_amd}", "amd")
     # Retrieve NVIDIA dataframe, which pattern should be:
     # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
     nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
@@ -161,39 +331,173 @@ def get_sample_data() -> tuple[pd.DataFrame, str]:
     filtered_joined.index = "sample_" + filtered_joined.index
     return filtered_joined, "sample data was loaded"
-def safe_extract(row: pd.DataFrame, key: str) -> int:
-    return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
-    # Handle missing values and get counts directly from dataframe
-    success_nvidia = safe_extract(row, "success_nvidia")
-    success_amd = safe_extract(row, "success_amd")
-    skipped_nvidia = safe_extract(row, "skipped_nvidia")
-    skipped_amd = safe_extract(row, "skipped_amd")
-    failed_multi_amd = safe_extract(row, 'failed_multi_no_amd')
-    failed_multi_nvidia = safe_extract(row, 'failed_multi_no_nvidia')
-    failed_single_amd = safe_extract(row, 'failed_single_no_amd')
-    failed_single_nvidia = safe_extract(row, 'failed_single_no_nvidia')
-    # Calculate total failures
-    total_failed_amd = failed_multi_amd + failed_single_amd
-    total_failed_nvidia = failed_multi_nvidia + failed_single_nvidia
-    # Create stats dictionaries directly from dataframe values
     amd_stats = {
-        'passed': success_amd,
-        'failed': total_failed_amd,
-        'skipped': skipped_amd,
-        'error': 0     # Not available in this dataset
     }
     nvidia_stats = {
-        'passed': success_nvidia,
-        'failed': total_failed_nvidia,
-        'skipped': skipped_nvidia,
-        'error': 0     # Not available in this dataset
     }
-    return amd_stats, nvidia_stats, failed_multi_amd, failed_single_amd, failed_multi_nvidia, failed_single_nvidia
@@ -203,6 +507,10 @@ class CIResults:
         self.df = pd.DataFrame()
         self.available_models = []
         self.latest_update_msg = ""
     def load_data(self) -> None:
         """Load data from the data source."""
@@ -211,6 +519,13 @@ class CIResults:
             logger.info("Loading distant data...")
             new_df, latest_update_msg = get_distant_data()
             self.latest_update_msg = latest_update_msg
         except Exception as e:
             error_msg = [
                 "Loading data failed:",
@@ -220,11 +535,19 @@ class CIResults:
                 "Falling back on sample data."
             ]
             logger.error("\n".join(error_msg))
             new_df, latest_update_msg = get_sample_data()
             self.latest_update_msg = latest_update_msg
         # Update attributes
         self.df = new_df
         self.available_models = new_df.index.tolist()
         # Log and return distant load status
         logger.info(f"Data loaded successfully: {len(self.available_models)} models")
         logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
@@ -242,6 +565,53 @@ class CIResults:
                 msg[model][col] = value
         logger.info(json.dumps(msg, indent=4))
     def schedule_data_reload(self):
         """Schedule the next data reload."""
         def reload_data():

 from huggingface_hub import HfFileSystem
 import pandas as pd
 from utils import logger
+from datetime import datetime, timedelta
 import threading
 import traceback
 import json
 import re
+import random
+from typing import List, Tuple, Optional, Dict
 # NOTE: if caching is an issue, try adding `use_listings_cache=False`
 fs = HfFileSystem()
     "job_link_nvidia",
 ]
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+def generate_fake_dates(num_days: int = 7) -> List[str]:
+    """Generate fake dates for the last N days."""
+    today = datetime.now()
+    return [(today - timedelta(days=i)).strftime("%Y-%m-%d") for i in range(num_days)]
+def parse_json_field(value) -> dict:
+    """Safely parse a JSON field that might be a string or dict."""
+    if value is None or pd.isna(value):
+        return {}
+    if isinstance(value, str):
+        try:
+            return json.loads(value)
+        except:
+            return {}
+    # Handle dict-like objects (including pandas Series/dict)
+    if isinstance(value, dict):
+        return value
+    # Try to convert to dict if possible
+    try:
+        return dict(value) if hasattr(value, '__iter__') else {}
+    except:
+        return {}
+def extract_date_from_path(path: str, pattern: str) -> Optional[str]:
+    """Extract date from file path using regex pattern."""
+    match = re.search(pattern, path)
+    return match.group(1) if match else None
+def get_test_names(tests: list) -> set:
+    """Extract test names from a list of test dictionaries."""
+    return {test.get('line', '') for test in tests}
+def safe_extract(row: pd.Series, key: str) -> int:
+    """Safely extract an integer value from a DataFrame row."""
+    return int(row.get(key, 0)) if pd.notna(row.get(key, 0)) else 0
+# ============================================================================
+# DATA LOADING FUNCTIONS
+# ============================================================================
 def log_dataframe_link(link: str) -> str:
     """
     Adds the link to the dataset in the logs, modifies it to get a clockable link and then returns the date of the
     report.
     """
+    if link.startswith("sample_"):
+        return "9999-99-99"
     logger.info(f"Reading df located at {link}")
     # Make sure the links starts with an http adress
     if link.startswith("hf://"):
     df[f"failed_single_no_{device_label}"] = df["failures"].apply(lambda x: len(x["single"]) if "single" in x else 0)
     return df, df_upload_date
+def get_available_dates() -> List[str]:
+    """Get list of available dates from both AMD and NVIDIA datasets."""
+    try:
+        # Get file lists
+        amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
+        nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
+        files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
+        files_nvidia = sorted(fs.glob(nvidia_src, refresh=True), reverse=True)
+        logger.info(f"Found {len(files_amd)} AMD files, {len(files_nvidia)} NVIDIA files")
+        # Extract dates using patterns
+        amd_pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/runs/[^/]+/ci_results_run_models_gpu/model_results\.json'
+        nvidia_pattern = r'transformers_daily_ci/(\d{4}-\d{2}-\d{2})/ci_results_run_models_gpu/model_results\.json'
+        amd_dates = {extract_date_from_path(f, amd_pattern) for f in files_amd}
+        amd_dates.discard(None)  # Remove None values
+        nvidia_dates = {extract_date_from_path(f, nvidia_pattern) for f in files_nvidia}
+        nvidia_dates.discard(None)
+        logger.info(f"AMD dates: {sorted(amd_dates, reverse=True)[:5]}...")
+        logger.info(f"NVIDIA dates: {sorted(nvidia_dates, reverse=True)[:5]}...")
+        # Return intersection of both datasets
+        common_dates = sorted(amd_dates.intersection(nvidia_dates), reverse=True)
+        logger.info(f"Common dates: {len(common_dates)} dates where both AMD and NVIDIA have data")
+        if common_dates:
+            return common_dates[:30]  # Limit to last 30 days
+        # No real dates available - log warning and return empty list
+        # This will allow the system to fall back to sample data properly
+        logger.warning("No common dates found between AMD and NVIDIA datasets")
+        return []
+    except Exception as e:
+        logger.error(f"Error getting available dates: {e}")
+        return []
+def get_data_for_date(target_date: str) -> tuple[pd.DataFrame, str]:
+    """Get data for a specific date."""
+    try:
+        # For AMD, we need to find the specific run file for the date
+        # AMD structure: YYYY-MM-DD/runs/{run_id}/ci_results_run_models_gpu/model_results.json
+        amd_src = f"hf://datasets/optimum-amd/transformers_daily_ci/{target_date}/runs/*/ci_results_run_models_gpu/model_results.json"
+        amd_files = fs.glob(amd_src, refresh=True)
+        if not amd_files:
+            raise FileNotFoundError(f"No AMD data found for date {target_date}")
+        # Use the first (most recent) run for the date
+        amd_file = amd_files[0]
+        # Ensure the AMD file path has the hf:// prefix
+        if not amd_file.startswith("hf://"):
+            amd_file = f"hf://{amd_file}"
+        # NVIDIA structure: YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
+        nvidia_src = f"hf://datasets/hf-internal-testing/transformers_daily_ci/{target_date}/ci_results_run_models_gpu/model_results.json"
+        # Read dataframes - try each platform independently
+        df_amd = pd.DataFrame()
+        df_nvidia = pd.DataFrame()
+        try:
+            df_amd, _ = read_one_dataframe(amd_file, "amd")
+            logger.info(f"Successfully loaded AMD data for {target_date}")
+        except Exception as e:
+            logger.warning(f"Failed to load AMD data for {target_date}: {e}")
         try:
+            df_nvidia, _ = read_one_dataframe(nvidia_src, "nvidia")
+            logger.info(f"Successfully loaded NVIDIA data for {target_date}")
         except Exception as e:
+            logger.warning(f"Failed to load NVIDIA data for {target_date}: {e}")
+        # If both failed, return empty dataframe
+        if df_amd.empty and df_nvidia.empty:
+            logger.warning(f"No data available for either platform on {target_date}")
+            return pd.DataFrame(), target_date
+        # Join both dataframes (outer join to include data from either platform)
+        if not df_amd.empty and not df_nvidia.empty:
+            joined = df_amd.join(df_nvidia, rsuffix="_nvidia", lsuffix="_amd", how="outer")
+        elif not df_amd.empty:
+            joined = df_amd.copy()
+        else:
+            joined = df_nvidia.copy()
+        joined = joined[KEYS_TO_KEEP]
+        joined.index = joined.index.str.replace("^models_", "", regex=True)
+        # Filter out all but important models
+        important_models_lower = [model.lower() for model in IMPORTANT_MODELS]
+        filtered_joined = joined[joined.index.str.lower().isin(important_models_lower)]
+        return filtered_joined, target_date
+    except Exception as e:
+        logger.error(f"Error getting data for date {target_date}: {e}")
+        # Return empty dataframe instead of sample data for historical functionality
+        return pd.DataFrame(), target_date
+def get_historical_data(start_date: str, end_date: str, sample_data = False) -> pd.DataFrame:
+    """Get historical data for a date range."""
+    if sample_data:
+        return get_fake_historical_data(start_date, end_date)
+    try:
+        start_dt = datetime.strptime(start_date, "%Y-%m-%d")
+        end_dt = datetime.strptime(end_date, "%Y-%m-%d")
+        historical_data = []
+        # Load data for each day in range
+        current_dt = start_dt
+        while current_dt <= end_dt:
+            date_str = current_dt.strftime("%Y-%m-%d")
+            try:
+                df, _ = get_data_for_date(date_str)
+                if not df.empty:
+                    df['date'] = date_str
+                    historical_data.append(df)
+                    logger.info(f"Loaded data for {date_str}")
+            except Exception as e:
+                logger.warning(f"Could not load data for {date_str}: {e}")
+            current_dt += timedelta(days=1)
+        return pd.concat(historical_data, ignore_index=False) if historical_data else pd.DataFrame()
+    except Exception as e:
+        logger.error(f"Error getting historical data: {e}")
+        return get_fake_historical_data(start_date, end_date)
 def get_distant_data() -> tuple[pd.DataFrame, str]:
     # Retrieve AMD dataframe
     amd_src = "hf://datasets/optimum-amd/transformers_daily_ci/**/runs/**/ci_results_run_models_gpu/model_results.json"
     files_amd = sorted(fs.glob(amd_src, refresh=True), reverse=True)
+    df_amd, date_df_amd = read_one_dataframe(f"hf://{files_amd[0]}", "amd")
     # Retrieve NVIDIA dataframe, which pattern should be:
     # hf://datasets/hf-internal-testing`/transformers_daily_ci/raw/main/YYYY-MM-DD/ci_results_run_models_gpu/model_results.json
     nvidia_src = "hf://datasets/hf-internal-testing/transformers_daily_ci/*/ci_results_run_models_gpu/model_results.json"
     filtered_joined.index = "sample_" + filtered_joined.index
     return filtered_joined, "sample data was loaded"
+def get_fake_historical_data(start_date: str, end_date: str) -> pd.DataFrame:
+    """Generate fake historical data for a date range when real data loading fails."""
+    try:
+        start_dt = datetime.strptime(start_date, "%Y-%m-%d")
+        end_dt = datetime.strptime(end_date, "%Y-%m-%d")
+        sample_df, _ = get_sample_data()
+        historical_data = []
+        # Generate data for each date
+        current_dt = start_dt
+        while current_dt <= end_dt:
+            date_df = sample_df.copy()
+            date_df['date'] = current_dt.strftime("%Y-%m-%d")
+            # Add random variations to make it realistic
+            for idx in date_df.index:
+                # Vary success/skipped counts (±20%)
+                for col in ['success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia']:
+                    if col in date_df.columns and pd.notna(date_df.loc[idx, col]):
+                        val = date_df.loc[idx, col]
+                        if val > 0:
+                            date_df.loc[idx, col] = max(0, int(val * random.uniform(0.8, 1.2)))
+                # Vary failure counts more dramatically (±50-100%)
+                for col in ['failed_multi_no_amd', 'failed_multi_no_nvidia', 'failed_single_no_amd', 'failed_single_no_nvidia']:
+                    if col in date_df.columns and pd.notna(date_df.loc[idx, col]):
+                        val = date_df.loc[idx, col]
+                        date_df.loc[idx, col] = max(0, int(val * random.uniform(0.5, 2.0)))
+            historical_data.append(date_df)
+            current_dt += timedelta(days=1)
+        if not historical_data:
+            return pd.DataFrame()
+        combined_df = pd.concat(historical_data, ignore_index=False)
+        logger.info(f"Generated fake historical data: {len(combined_df)} records from {start_date} to {end_date}")
+        return combined_df
+    except Exception as e:
+        logger.error(f"Error generating fake historical data: {e}")
+        return pd.DataFrame()
+def find_failure_first_seen(historical_df: pd.DataFrame, model_name: str, test_name: str, device: str, gpu_type: str) -> Optional[str]:
+    """Find the first date when a specific test failure appeared in historical data."""
+    if historical_df is None or historical_df.empty:
+        return None
+    try:
+        model_name_lower = model_name.lower()
+        # Filter by model name (case-insensitive)
+        model_data = historical_df[historical_df.index.str.lower() == model_name_lower].copy()
+        if model_data.empty:
+            return None
+        # Ensure we have a 'date' column
+        if 'date' not in model_data.columns:
+            return None
+        # Check each date (oldest first) for this failure
+        for _, row in model_data.sort_values('date').iterrows():
+            failures_raw = row.get(f'failures_{device}')
+            if failures_raw is None or pd.isna(failures_raw):
+                continue
+            # Parse failures (could be dict, string, or already parsed)
+            failures = parse_json_field(failures_raw)
+            if not isinstance(failures, dict) or gpu_type not in failures:
+                continue
+            # Check each test in this gpu_type
+            for test in failures.get(gpu_type, []):
+                if isinstance(test, dict) and test.get('line', '') == test_name:
+                    date_value = row.get('date')
+                    return date_value if date_value else None
+        return None
+    except Exception as e:
+        logger.error(f"Error finding first seen date for {test_name}: {e}")
+        return None
+def _find_device_regressions(model_name: str, current_failures: dict, yesterday_failures: dict, device: str) -> list[dict]:
+    """Helper to find regressions for a specific device."""
+    regressions = []
+    for gpu_type in ['single', 'multi']:
+        current_tests = get_test_names(current_failures.get(gpu_type, []))
+        yesterday_tests = get_test_names(yesterday_failures.get(gpu_type, []))
+        # Find NEW failures: failing NOW but NOT yesterday
+        new_tests = current_tests - yesterday_tests
+        for test_name in new_tests:
+            if test_name:  # Skip empty names
+                regressions.append({
+                    'model': model_name,
+                    'test': test_name.split('::')[-1],  # Short name
+                    'test_full': test_name,  # Full name
+                    'device': device,
+                    'gpu_type': gpu_type
+                })
+    return regressions
+def find_new_regressions(current_df: pd.DataFrame, historical_df: pd.DataFrame) -> list[dict]:
+    """Compare current failures against previous day's failures to find new regressions."""
+    if current_df.empty or historical_df.empty:
+        return []
+    # Get yesterday's data
+    available_dates = sorted(historical_df['date'].unique(), reverse=True)
+    if not available_dates:
+        return []
+    yesterday_data = historical_df[historical_df['date'] == available_dates[0]]
+    new_regressions = []
+    # For each model, compare current vs yesterday
+    for model_name in current_df.index:
+        current_row = current_df.loc[model_name]
+        yesterday_row = yesterday_data[yesterday_data.index == model_name.lower()]
+        # Parse current failures
+        current_amd = parse_json_field(current_row.get('failures_amd', {}))
+        current_nvidia = parse_json_field(current_row.get('failures_nvidia', {}))
+        # Parse yesterday failures
+        yesterday_amd = {}
+        yesterday_nvidia = {}
+        if not yesterday_row.empty:
+            yesterday_row = yesterday_row.iloc[0]
+            yesterday_amd = parse_json_field(yesterday_row.get('failures_amd', {}))
+            yesterday_nvidia = parse_json_field(yesterday_row.get('failures_nvidia', {}))
+        # Find regressions for both devices
+        new_regressions.extend(_find_device_regressions(model_name, current_amd, yesterday_amd, 'amd'))
+        new_regressions.extend(_find_device_regressions(model_name, current_nvidia, yesterday_nvidia, 'nvidia'))
+    return new_regressions
 def extract_model_data(row: pd.Series) -> tuple[dict[str, int], dict[str, int], int, int, int, int]:
     """Extract and process model data from DataFrame row."""
+    # Extract all counts
+    counts = {key: safe_extract(row, key) for key in [
+        'success_amd', 'success_nvidia', 'skipped_amd', 'skipped_nvidia',
+        'failed_multi_no_amd', 'failed_multi_no_nvidia',
+        'failed_single_no_amd', 'failed_single_no_nvidia'
+    ]}
+    # Create stats dictionaries
     amd_stats = {
+        'passed': counts['success_amd'],
+        'failed': counts['failed_multi_no_amd'] + counts['failed_single_no_amd'],
+        'skipped': counts['skipped_amd'],
+        'error': 0
     }
     nvidia_stats = {
+        'passed': counts['success_nvidia'],
+        'failed': counts['failed_multi_no_nvidia'] + counts['failed_single_no_nvidia'],
+        'skipped': counts['skipped_nvidia'],
+        'error': 0
     }
+    return (amd_stats, nvidia_stats, counts['failed_multi_no_amd'],
+            counts['failed_single_no_amd'], counts['failed_multi_no_nvidia'],
+            counts['failed_single_no_nvidia'])
         self.df = pd.DataFrame()
         self.available_models = []
         self.latest_update_msg = ""
+        self.available_dates = []
+        self.historical_df = pd.DataFrame()
+        self.all_historical_data = pd.DataFrame()  # Store all historical data at startup
+        self.sample_data = False
     def load_data(self) -> None:
         """Load data from the data source."""
             logger.info("Loading distant data...")
             new_df, latest_update_msg = get_distant_data()
             self.latest_update_msg = latest_update_msg
+            self.available_dates = get_available_dates()
+            logger.info(f"Available dates: {len(self.available_dates)} dates")
+            if self.available_dates:
+                logger.info(f"Date range: {self.available_dates[-1]} to {self.available_dates[0]}")
+            else:
+                logger.warning("No available dates found")
+                self.available_dates = []
         except Exception as e:
             error_msg = [
                 "Loading data failed:",
                 "Falling back on sample data."
             ]
             logger.error("\n".join(error_msg))
+            self.sample_data = True
             new_df, latest_update_msg = get_sample_data()
             self.latest_update_msg = latest_update_msg
+            # Generate fake dates for sample data historical functionality
+            self.available_dates = generate_fake_dates()
         # Update attributes
         self.df = new_df
         self.available_models = new_df.index.tolist()
+        # Load all historical data at startup
+        self.load_all_historical_data()
         # Log and return distant load status
         logger.info(f"Data loaded successfully: {len(self.available_models)} models")
         logger.info(f"Models: {self.available_models[:5]}{'...' if len(self.available_models) > 5 else ''}")
                 msg[model][col] = value
         logger.info(json.dumps(msg, indent=4))
+    def load_all_historical_data(self) -> None:
+        """Load all available historical data at startup."""
+        try:
+            if not self.available_dates:
+                logger.warning("No available dates found, skipping historical data load")
+                self.all_historical_data = pd.DataFrame()
+                return
+            logger.info(f"Loading all historical data for {len(self.available_dates)} dates...")
+            start_date, end_date = self.available_dates[-1], self.available_dates[0]
+            self.all_historical_data = get_historical_data(start_date, end_date, self.sample_data)
+            logger.info(f"All historical data loaded: {len(self.all_historical_data)} records")
+        except Exception as e:
+            logger.error(f"Error loading all historical data: {e}")
+            self.all_historical_data = pd.DataFrame()
+    def load_historical_data(self, start_date: str, end_date: str) -> None:
+        """Load historical data for a date range from pre-loaded data."""
+        try:
+            logger.info(f"Filtering historical data from {start_date} to {end_date}")
+            if self.all_historical_data.empty:
+                logger.warning("No pre-loaded historical data available")
+                self.historical_df = pd.DataFrame()
+                return
+            # Filter by date range
+            start_dt = datetime.strptime(start_date, "%Y-%m-%d")
+            end_dt = datetime.strptime(end_date, "%Y-%m-%d")
+            filtered_data = [
+                self.all_historical_data[self.all_historical_data['date'] == date_str]
+                for date_str in self.all_historical_data['date'].unique()
+                if start_dt <= datetime.strptime(date_str, "%Y-%m-%d") <= end_dt
+            ]
+            if filtered_data:
+                self.historical_df = pd.concat(filtered_data, ignore_index=False)
+                logger.info(f"Historical data filtered: {len(self.historical_df)} records for {start_date} to {end_date}")
+            else:
+                self.historical_df = pd.DataFrame()
+                logger.warning(f"No historical data found for date range {start_date} to {end_date}")
+        except Exception as e:
+            logger.error(f"Error filtering historical data: {e}")
+            self.historical_df = pd.DataFrame()
     def schedule_data_reload(self):
         """Schedule the next data reload."""
         def reload_data():

model_page.py CHANGED Viewed

@@ -1,19 +1,13 @@
 import matplotlib.pyplot as plt
 import pandas as pd
-from utils import generate_underlined_line
-from data import extract_model_data
 # Figure dimensions
 FIGURE_WIDTH_DUAL = 18
 FIGURE_HEIGHT_DUAL = 9
-# Colors
-COLORS = {
-    'passed': '#4CAF50',    # Medium green
-    'failed': '#E53E3E',    # More red
-    'skipped': '#FFD54F',   # Medium yellow
-    'error': '#8B0000'      # Dark red
-}
 # Styling constants
 BLACK = '#000000'
@@ -42,11 +36,11 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
     """Create a pie chart for device statistics."""
     if not filtered_stats:
         ax.text(0.5, 0.5, 'No test results',
-               horizontalalignment='center', verticalalignment='center',
-               transform=ax.transAxes, fontsize=14, color='#888888',
-               fontfamily='monospace', weight='normal')
         ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='bold',
-                    pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
         ax.axis('off')
         return
@@ -63,7 +57,7 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
         shadow=False,
         wedgeprops=dict(edgecolor='#1a1a1a', linewidth=BORDER_LINE_WIDTH),  # Minimal borders
         textprops={'fontsize': 12, 'weight': 'normal',
-                  'color': LABEL_COLOR, 'fontfamily': 'monospace'}
     )
     # Enhanced percentage text styling for better readability
@@ -82,10 +76,10 @@ def _create_pie_chart(ax: plt.Axes, device_label: str, filtered_stats: dict) ->
     # Device label closer to chart and bigger
     ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='normal',
-                pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
-def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str, str]:
     """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
     # Handle case where the dataframe is empty or the model name could not be found in it
     if df.empty or model_name not in df.index:
@@ -124,25 +118,25 @@ def plot_model_stats(df: pd.DataFrame, model_name: str) -> tuple[plt.Figure, str
     # Add subtle separation line between charts - stops at device labels level
     line_x = 0.5
     fig.add_artist(plt.Line2D([line_x, line_x], [0.0, SEPARATOR_LINE_Y_END],
-                              color='#333333', linewidth=SEPARATOR_LINE_WIDTH,
-                              alpha=SEPARATOR_ALPHA, transform=fig.transFigure))
     # Add central shared title for model name
     fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold',
-                color='#CCCCCC', fontfamily='monospace', y=MODEL_TITLE_Y)
     # Clean layout with padding and space for central title
     plt.tight_layout()
     plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
-    amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered))
-    nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered))
     return fig, amd_failed_info, nvidia_failed_info
-def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool) -> str:
-    """Extract failure information from failures object."""
     # Catch the case where there is no data
     if not data_available:
         return generate_underlined_line(f"No data for {device}")
@@ -160,21 +154,43 @@ def prepare_textbox_content(failures: dict[str, list], device: str, data_availab
         ""
     ]
     # Add single-gpu failures
     if single_failures:
         info_lines.append(generate_underlined_line("Single GPU failures:"))
         for test in single_failures:
-            name = test.get("line", "::*could not find name*")
-            name = name.split("::")[-1]
-            info_lines.append(name)
         info_lines.append("\n")
     # Add multi-gpu failures
     if multi_failures:
         info_lines.append(generate_underlined_line("Multi GPU failures:"))
         for test in multi_failures:
-            name = test.get("line", "::*could not find name*")
-            name = name.split("::")[-1]
-            info_lines.append(name)
-    return "\n".join(info_lines)

 import matplotlib.pyplot as plt
 import pandas as pd
+from utils import generate_underlined_line, COLORS
+from data import extract_model_data, find_failure_first_seen
 # Figure dimensions
 FIGURE_WIDTH_DUAL = 18
 FIGURE_HEIGHT_DUAL = 9
+# Colors imported from utils
 # Styling constants
 BLACK = '#000000'
     """Create a pie chart for device statistics."""
     if not filtered_stats:
         ax.text(0.5, 0.5, 'No test results',
+                horizontalalignment='center', verticalalignment='center',
+                transform=ax.transAxes, fontsize=14, color='#888888',
+                fontfamily='monospace', weight='normal')
         ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='bold',
+                     pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
         ax.axis('off')
         return
         shadow=False,
         wedgeprops=dict(edgecolor='#1a1a1a', linewidth=BORDER_LINE_WIDTH),  # Minimal borders
         textprops={'fontsize': 12, 'weight': 'normal',
+                   'color': LABEL_COLOR, 'fontfamily': 'monospace'}
     )
     # Enhanced percentage text styling for better readability
     # Device label closer to chart and bigger
     ax.set_title(device_label, fontsize=DEVICE_TITLE_FONT_SIZE, weight='normal',
+                 pad=DEVICE_TITLE_PAD, color=TITLE_COLOR, fontfamily='monospace')
+def plot_model_stats(df: pd.DataFrame, model_name: str, historical_df: pd.DataFrame = None) -> tuple[plt.Figure, str, str]:
     """Draws pie charts of model's passed, failed, skipped, and error stats for AMD and NVIDIA."""
     # Handle case where the dataframe is empty or the model name could not be found in it
     if df.empty or model_name not in df.index:
     # Add subtle separation line between charts - stops at device labels level
     line_x = 0.5
     fig.add_artist(plt.Line2D([line_x, line_x], [0.0, SEPARATOR_LINE_Y_END],
+                             color='#333333', linewidth=SEPARATOR_LINE_WIDTH,
+                             alpha=SEPARATOR_ALPHA, transform=fig.transFigure))
     # Add central shared title for model name
     fig.suptitle(f'{model_name.lower()}', fontsize=32, weight='bold',
+                 color='#CCCCCC', fontfamily='monospace', y=MODEL_TITLE_Y)
     # Clean layout with padding and space for central title
     plt.tight_layout()
     plt.subplots_adjust(top=SUBPLOT_TOP, wspace=SUBPLOT_WSPACE)
+    amd_failed_info = prepare_textbox_content(failures_amd, 'AMD', bool(amd_filtered), model_name, historical_df)
+    nvidia_failed_info = prepare_textbox_content(failures_nvidia, 'NVIDIA', bool(nvidia_filtered), model_name, historical_df)
     return fig, amd_failed_info, nvidia_failed_info
+def prepare_textbox_content(failures: dict[str, list], device: str, data_available: bool, model_name: str = None, historical_df: pd.DataFrame = None) -> str:
+    """Extract failure information from failures object with first seen dates."""
     # Catch the case where there is no data
     if not data_available:
         return generate_underlined_line(f"No data for {device}")
         ""
     ]
+    # Helper function to format failure line with first seen date
+    def format_failure_line(test: dict, gpu_type: str) -> str:
+        full_name = test.get("line", "::*could not find name*")
+        short_name = full_name.split("::")[-1]
+        # Try to find first seen date if historical data is available
+        if historical_df is not None and model_name is not None and not historical_df.empty:
+            first_seen = find_failure_first_seen(
+                historical_df,
+                model_name,
+                full_name,
+                device.lower(),
+                gpu_type
+            )
+            if first_seen:
+                # Format date as MM-DD-YYYY
+                try:
+                    from datetime import datetime
+                    date_obj = datetime.strptime(first_seen, "%Y-%m-%d")
+                    formatted_date = date_obj.strftime("%m-%d-%Y")
+                    return f"{short_name} (First seen: {formatted_date})"
+                except:
+                    return f"{short_name} (First seen: {first_seen})"
+        return short_name
     # Add single-gpu failures
     if single_failures:
         info_lines.append(generate_underlined_line("Single GPU failures:"))
         for test in single_failures:
+            info_lines.append(format_failure_line(test, "single"))
         info_lines.append("\n")
     # Add multi-gpu failures
     if multi_failures:
         info_lines.append(generate_underlined_line("Multi GPU failures:"))
         for test in multi_failures:
+            info_lines.append(format_failure_line(test, "multi"))
+    return "\n".join(info_lines)

requirements.txt CHANGED Viewed

	@@ -1 +1,3 @@
1	matplotlib>=3.8

 matplotlib>=3.8
+gradio_toggle
+plotly>=5.0

styles.css CHANGED Viewed

@@ -3,6 +3,8 @@
     --main-content-bottom-margin: 10px; /* Configurable bottom margin for main content */
 }
 .gradio-container {
     background-color: #000000 !important;
     color: white !important;
@@ -173,6 +175,96 @@ div[data-testid="column"]:has(.sidebar) {
     transition: max-height 0.3s ease !important;
 }
 /* Model button styling */
 .model-button {
@@ -371,52 +463,28 @@ div[data-testid="column"]:has(.sidebar) {
 /* Plot container with smooth transitions and controlled scrolling */
 .plot-container {
-    background-color: #000000 !important;
     border: none !important;
     transition: opacity 0.6s ease-in-out !important;
     flex: 1 1 auto !important;
     min-height: 0 !important;
     overflow-y: auto !important;
     scrollbar-width: thin !important;
-    scrollbar-color: #333333 #000000 !important;
 }
 /* Custom scrollbar for plot container */
 .plot-container::-webkit-scrollbar {
     width: 8px !important;
-    background: #000000 !important;
-}
-.plot-container::-webkit-scrollbar-track {
-    background: #000000 !important;
-}
-.plot-container::-webkit-scrollbar-thumb {
-    background-color: #333333 !important;
-    border-radius: 4px !important;
-}
-.plot-container::-webkit-scrollbar-thumb:hover {
-    background-color: #555555 !important;
 }
-/* Gradio plot component styling */
-.gr-plot {
-    background-color: #000000 !important;
-    transition: opacity 0.6s ease-in-out !important;
-}
-.gr-plot .gradio-plot {
-    background-color: #000000 !important;
-    transition: opacity 0.6s ease-in-out !important;
-}
 .gr-plot img {
     transition: opacity 0.6s ease-in-out !important;
 }
 /* Target the plot wrapper */
-div[data-testid="plot"] {
     background-color: #000000 !important;
 }
@@ -427,11 +495,6 @@ div[data-testid="plot"] {
     background-color: #000000 !important;
 }
-/* Ensure plot area background */
-.gr-plot > div,
-.plot-container > div {
-    background-color: #000000 !important;
-}
 /* Prevent white flash during plot updates */
 .plot-container::before {
@@ -445,24 +508,26 @@ div[data-testid="plot"] {
     z-index: -1;
 }
-/* Force all plot elements to have black background */
-.plot-container *,
-.gr-plot *,
-div[data-testid="plot"] * {
-    background-color: #000000 !important;
 }
-/* Override any white backgrounds in matplotlib */
-.plot-container canvas,
-.gr-plot canvas {
-    background-color: #000000 !important;
-}
 /* Text elements */
 h1, h2, h3, p, .markdown {
     color: white !important;
 }
 /* Sidebar header enhancement */
 .sidebar h1 {
     background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
@@ -529,6 +594,116 @@ h1, h2, h3, p, .markdown {
     flex-direction: column !important;
 }
 /* Custom scrollbar for main content */
 .main-content {
     scrollbar-width: thin !important;
@@ -667,3 +842,203 @@ h1, h2, h3, p, .markdown {
     100% { scroll-behavior: auto; }
 }

     --main-content-bottom-margin: 10px; /* Configurable bottom margin for main content */
 }
 .gradio-container {
     background-color: #000000 !important;
     color: white !important;
     transition: max-height 0.3s ease !important;
 }
+.history-view-button {
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    margin: 0px 0px !important;
+    padding: 8px 12px !important;
+    font-weight: 600 !important;
+    font-size: 14px !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.3px !important;
+    font-family: monospace !important;
+    width: 100% !important;
+    max-width: 100% !important;
+    white-space: nowrap !important;
+    text-overflow: ellipsis !important;
+    display: block !important;
+    cursor: pointer !important;
+    transition: all 0.3s ease !important;
+}
+/* Failing models filter row */
+.failing-models-filter-row {
+    background: linear-gradient(145deg, #1a1a1a, #0f0f0f) !important;
+    border: 1px solid #333 !important;
+    border-radius: 6px !important;
+    padding: 8px 8px !important;
+    margin: 0px 0px 12px 0px !important;
+    gap: 8px !important;
+}
+/* Failing models toggle styling */
+.failing-models-toggle {
+    background: transparent !important;
+    border: none !important;
+    padding: 4px 6px !important;
+    margin: 0 !important;
+    flex: 1 !important;
+}
+.failing-models-toggle:hover {
+    background: rgba(255, 255, 255, 0.05) !important;
+    border-radius: 4px !important;
+}
+.failing-models-toggle label {
+    color: #FFFFFF !important;
+    font-family: monospace !important;
+    font-size: 11px !important;
+    font-weight: 600 !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.5px !important;
+    cursor: pointer !important;
+    display: flex !important;
+    align-items: center !important;
+    white-space: nowrap !important;
+}
+/* Override specific colors for AMD and NVIDIA to white */
+.amd-toggle label,
+.amd-toggle label span {
+    color: #FFFFFF !important;
+}
+.nvidia-toggle label,
+.nvidia-toggle label span {
+    color: #FFFFFF !important;
+}
+.failing-models-toggle input[type="checkbox"] {
+    cursor: pointer !important;
+    width: 16px !important;
+    height: 16px !important;
+    margin-right: 6px !important;
+}
+.amd-toggle input[type="checkbox"] {
+    accent-color: #FF6B6B !important;
+}
+.nvidia-toggle input[type="checkbox"] {
+    accent-color: #76B900 !important;
+}
+.amd-toggle input[type="checkbox"]:checked {
+    accent-color: #FF8888 !important;
+}
+.nvidia-toggle input[type="checkbox"]:checked {
+    accent-color: #8BD918 !important;
+}
 /* Model button styling */
 .model-button {
 /* Plot container with smooth transitions and controlled scrolling */
 .plot-container {
     border: none !important;
     transition: opacity 0.6s ease-in-out !important;
     flex: 1 1 auto !important;
     min-height: 0 !important;
     overflow-y: auto !important;
     scrollbar-width: thin !important;
+    padding: 0 !important;
 }
 /* Custom scrollbar for plot container */
 .plot-container::-webkit-scrollbar {
     width: 8px !important;
 }
 .gr-plot img {
     transition: opacity 0.6s ease-in-out !important;
 }
 /* Target the plot wrapper */
+div[data-testid="matplotlib"] {
     background-color: #000000 !important;
 }
     background-color: #000000 !important;
 }
 /* Prevent white flash during plot updates */
 .plot-container::before {
     z-index: -1;
 }
+.vega-embed {
+    position: absolute !important;
 }
 /* Text elements */
 h1, h2, h3, p, .markdown {
     color: white !important;
 }
+.toggle {
+    margin: 0 auto !important;
+}
+.toggle-label {
+    color: white !important;
+    font-family: monospace !important;
+    font-size: 14px !important;
+}
 /* Sidebar header enhancement */
 .sidebar h1 {
     background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
     flex-direction: column !important;
 }
+/* Summary view - position content slightly higher (not fully centered) */
+.summary-view {
+    display: flex !important;
+    flex-direction: column !important;
+    align-items: center !important;
+    justify-content: flex-start !important;
+    gap: 10px !important;
+    padding-top: 20px !important;
+}
+/* Keep the summary display centered */
+.summary-view .plot-container {
+    width: 100% !important;
+}
+/* Regressions components stay with the summary as a group */
+.regressions-header {
+    margin: 0px 0px 10px 0px !important;
+    width: 100% !important;
+    max-width: 100% !important;
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #8B4513 !important;
+    border-radius: 5px !important;
+    font-weight: 600 !important;
+    font-size: 14px !important;
+    font-family: monospace !important;
+    text-align: left !important;
+    width: 100% !important;
+    transition: all 0.3s ease !important;
+}
+.regressions-header:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #B8621B !important;
+}
+/* Collapsible regressions content */
+.regressions-content-visible {
+    max-height: 800px !important;
+    overflow-y: auto !important;
+    transition: max-height 0.3s ease !important;
+    scrollbar-width: thin !important;
+    -ms-overflow-style: none !important;
+}
+.regressions-content-visible::-webkit-scrollbar {
+    width: 8px !important;
+    background: transparent !important;
+}
+.regressions-content-visible::-webkit-scrollbar-thumb {
+    background-color: #333333 !important;
+    border-radius: 4px !important;
+}
+.regressions-content-hidden {
+    max-height: 0 !important;
+    overflow: hidden !important;
+    transition: max-height 0.3s ease !important;
+}
+/* New Regressions Panel */
+.regressions-panel {
+    background: linear-gradient(145deg, #2a1a1a, #1a0f0f) !important;
+    border: 2px solid #8B4513 !important;
+    border-radius: 8px !important;
+    padding: 15px 20px !important;
+    margin: 0px 0px 15px 0px !important;
+    box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2) !important;
+    animation: pulse-border 2s ease-in-out infinite !important;
+}
+.regressions-panel h3 {
+    color: #FFB86C !important;
+    font-family: monospace !important;
+    font-size: 16px !important;
+    font-weight: bold !important;
+    margin: 0 0 10px 0 !important;
+    display: flex !important;
+    align-items: center !important;
+}
+.regressions-panel p,
+.regressions-panel ul,
+.regressions-panel li {
+    color: #FFFFFF !important;
+    font-family: monospace !important;
+    font-size: 13px !important;
+    line-height: 1.6 !important;
+    margin: 4px 0 !important;
+}
+.regressions-panel strong {
+    color: #FF6B6B !important;
+    font-weight: 600 !important;
+}
+/* Pulse animation for new regressions */
+@keyframes pulse-border {
+    0%, 100% {
+        border-color: #8B4513;
+        box-shadow: 0 4px 12px rgba(255, 107, 107, 0.2);
+    }
+    50% {
+        border-color: #B8621B;
+        box-shadow: 0 4px 16px rgba(255, 107, 107, 0.4);
+    }
+}
 /* Custom scrollbar for main content */
 .main-content {
     scrollbar-width: thin !important;
     100% { scroll-behavior: auto; }
 }
+/* View toggle buttons */
+.view-toggle-row {
+    display: flex !important;
+    gap: 5px !important;
+    margin-bottom: 15px !important;
+}
+.view-toggle-button {
+    flex: 1 !important;
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #333 !important;
+    border-radius: 5px !important;
+    padding: 8px 6px !important;
+    transition: all 0.3s ease !important;
+    font-weight: 600 !important;
+    font-size: 12px !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.3px !important;
+    font-family: monospace !important;
+    height: 50px !important;
+    display: flex !important;
+    flex-direction: column !important;
+    justify-content: center !important;
+    align-items: center !important;
+    line-height: 1.2 !important;
+    cursor: pointer !important;
+}
+.view-toggle-button:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #555 !important;
+}
+.view-toggle-active {
+    background: linear-gradient(135deg, #4a4a4a, #3e3e3e) !important;
+    border: 2px solid #555555 !important;
+    box-shadow:
+        0 4px 15px rgba(0, 0, 0, 0.3),
+        inset 0 1px 0 rgba(255, 255, 255, 0.2) !important;
+}
+/* Date selection styling */
+.date-selection {
+    flex-grow: 0 !important;
+    background: linear-gradient(145deg, #0f0f0f, #1a1a1a) !important;
+    border: 1px solid #333 !important;
+    border-radius: 8px !important;
+    padding: 15px !important;
+    margin-bottom: 15px !important;
+    transition: all 0.3s ease !important;
+    overflow: hidden !important;
+}
+.date-selection-hidden {
+    max-height: 0 !important;
+    padding: 0 15px !important;
+    margin-bottom: 0 !important;
+    border: none !important;
+}
+.date-selection-visible {
+    max-height: 500px !important;
+}
+.date-header {
+    margin-bottom: 10px !important;
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #333 !important;
+    border-radius: 5px !important;
+    padding: 8px 12px !important;
+    transition: all 0.3s ease !important;
+    font-family: monospace !important;
+    font-size: 12px !important;
+    text-align: left !important;
+    cursor: pointer !important;
+    width: 100% !important;
+    box-sizing: border-box !important;
+}
+.date-header:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #444 !important;
+    transform: translateY(-1px) !important;
+    box-shadow: 0 2px 8px rgba(0, 0, 0, 0.3) !important;
+}
+.date-dropdown {
+    background-color: #222222 !important;
+    color: white !important;
+    border: 1px solid #444444 !important;
+    border-radius: 5px !important;
+    font-family: monospace !important;
+    font-size: 12px !important;
+}
+.date-dropdown .gr-dropdown {
+    background-color: #222222 !important;
+    color: white !important;
+    border: 1px solid #444444 !important;
+}
+.load-historical-button {
+    background: linear-gradient(135deg, #2d5aa0, #1e3f73) !important;
+    color: white !important;
+    border: 1px solid #3a6bc7 !important;
+    border-radius: 5px !important;
+    padding: 8px 12px !important;
+    transition: all 0.3s ease !important;
+    font-weight: 500 !important;
+    font-size: 12px !important;
+    text-transform: uppercase !important;
+    letter-spacing: 0.1px !important;
+    font-family: monospace !important;
+    width: 100% !important;
+    margin-top: 10px !important;
+}
+.load-historical-button:hover {
+    background: linear-gradient(135deg, #3a6bc7, #2d5aa0) !important;
+    border-color: #4a7bd9 !important;
+}
+/* Historical view styling */
+.historical-view {
+    background-color: #000000 !important;
+    padding: 30px 20px !important;
+}
+.time-series-detail-view {
+    background-color: #000000 !important;
+    padding: 30px 20px !important;
+}
+/* Plotly chart styling for historical view */
+.historical-view .plot-container,
+.time-series-detail-view .plot-container {
+    background-color: #000000 !important;
+}
+/* Plotly specific text styling */
+.historical-view .js-plotly-plot .plotly,
+.time-series-detail-view .js-plotly-plot .plotly {
+    background-color: #000000 !important;
+}
+/* Plotly legend text */
+.historical-view .js-plotly-plot .legend text,
+.time-series-detail-view .js-plotly-plot .legend text {
+    font-size: 16px !important;
+    fill: #CCCCCC !important;
+}
+/* Plotly axis titles */
+.historical-view .js-plotly-plot .g-xtitle text,
+.historical-view .js-plotly-plot .g-ytitle text,
+.time-series-detail-view .js-plotly-plot .g-xtitle text,
+.time-series-detail-view .js-plotly-plot .g-ytitle text {
+    font-size: 16px !important;
+    fill: #CCCCCC !important;
+}
+/* Plotly axis tick labels */
+.historical-view .js-plotly-plot .xtick text,
+.historical-view .js-plotly-plot .ytick text,
+.time-series-detail-view .js-plotly-plot .xtick text,
+.time-series-detail-view .js-plotly-plot .ytick text {
+    font-size: 14px !important;
+    fill: #CCCCCC !important;
+}
+/* Plotly title */
+.historical-view .js-plotly-plot .g-gtitle text,
+.time-series-detail-view .js-plotly-plot .g-gtitle text {
+    font-size: 20px !important;
+    fill: #FFFFFF !important;
+    font-weight: 600 !important;
+}
+/* Back button styling */
+.back-button {
+    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
+    color: white !important;
+    border: 1px solid #333 !important;
+    border-radius: 5px !important;
+    padding: 8px 12px !important;
+    transition: all 0.3s ease !important;
+    font-weight: 500 !important;
+    font-size: 12px !important;
+    font-family: monospace !important;
+    margin-bottom: 15px !important;
+    width: 100% !important;
+}
+.back-button:hover {
+    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
+    border-color: #555 !important;
+    color: #74b9ff !important;
+}

summary_page.py CHANGED Viewed

@@ -1,54 +1,47 @@
 import matplotlib.pyplot as plt
 import pandas as pd
 from data import extract_model_data
 # Layout parameters
 COLUMNS = 3
 # Derived constants
-COLUMN_WIDTH = 100 / COLUMNS  # Each column takes 25% of width
-BAR_WIDTH = COLUMN_WIDTH * 0.8  # 80% of column width for bars
-BAR_MARGIN = COLUMN_WIDTH * 0.1  # 10% margin on each side
 # Figure dimensions
-FIGURE_WIDTH = 22  # Wider to accommodate columns and legend
-MAX_HEIGHT = 14  # Maximum height in inches
 MIN_HEIGHT_PER_ROW = 2.8
 FIGURE_PADDING = 1
 # Bar styling
-BAR_HEIGHT_RATIO = 0.22  # Bar height as ratio of vertical spacing
-VERTICAL_SPACING_RATIO = 0.2  # Base vertical position ratio
-AMD_BAR_OFFSET = 0.25  # AMD bar offset ratio
-NVIDIA_BAR_OFFSET = 0.54  # NVIDIA bar offset ratio
-# Colors
-COLORS = {
-    'passed': '#4CAF50',
-    'failed': '#E53E3E',
-    'skipped': '#FFD54F',
-    'error': '#8B0000',
-    'empty': "#5B5B5B"
-}
 # Font styling
 MODEL_NAME_FONT_SIZE = 16
 LABEL_FONT_SIZE = 14
-LABEL_OFFSET = 1  # Distance of label from bar
 FAILURE_RATE_FONT_SIZE = 28
-def get_overall_stats(df: pd.DataFrame, available_models: list[str]) -> tuple[list[int], list[int]]:
     """Calculate overall failure rates for AMD and NVIDIA across all models."""
     if df.empty or not available_models:
         return 0.0, 0.0
-    total_amd_passed = 0
-    total_amd_failed = 0
-    total_amd_skipped = 0
-    total_nvidia_passed = 0
-    total_nvidia_failed = 0
-    total_nvidia_skipped = 0
     for model_name in available_models:
         if model_name not in df.index:
@@ -58,16 +51,21 @@ def get_overall_stats(df: pd.DataFrame, available_models: list[str]) -> tuple[li
         amd_stats, nvidia_stats = extract_model_data(row)[:2]
         # AMD totals
-        total_amd_passed += amd_stats['passed']
-        total_amd_failed += amd_stats['failed'] + amd_stats['error']
-        total_amd_skipped += amd_stats['skipped']
         # NVIDIA totals
-        total_nvidia_passed += nvidia_stats['passed']
-        total_nvidia_failed += nvidia_stats['failed'] + nvidia_stats['error']
-        total_nvidia_skipped += nvidia_stats['skipped']
-    return [total_amd_passed, total_amd_failed, total_amd_skipped], [total_nvidia_passed, total_nvidia_failed, total_nvidia_skipped]
 def draw_text_and_bar(
@@ -115,14 +113,7 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
         return fig
     # Calculate overall failure rates
-    amd_counts, nvidia_counts = get_overall_stats(df, available_models)
-    amd_non_skipped = amd_counts[0] + amd_counts[1]
-    amd_failure_rate = (amd_counts[1] / amd_non_skipped) if amd_non_skipped > 0 else 0.0
-    amd_failure_rate *= 100
-    nvidia_non_skipped = nvidia_counts[0] + nvidia_counts[1]
-    nvidia_failure_rate = (nvidia_counts[1] / nvidia_non_skipped) if nvidia_non_skipped > 0 else 0.0
-    nvidia_failure_rate *= 100
     # Calculate dimensions for N-column layout
     model_count = len(available_models)
@@ -143,6 +134,10 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
     visible_model_count = 0
     max_y = 0
     for i, model_name in enumerate(available_models):
         if model_name not in df.index:
@@ -152,6 +147,15 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
         # Extract and process model data
         amd_stats, nvidia_stats = extract_model_data(row)[:2]
         # Calculate position in 4-column grid
         col = visible_model_count % COLUMNS
@@ -176,44 +180,42 @@ def create_summary_page(df: pd.DataFrame, available_models: list[str]) -> plt.Fi
         # AMD label and bar in this column
         bar_height = min(0.4, vertical_spacing * BAR_HEIGHT_RATIO)
-        # Draw AMD bar
         draw_text_and_bar("amd", amd_stats, y_amd_bar, col_left, bar_height, ax)
-        # Draw NVIDIA bar
         draw_text_and_bar("nvidia", nvidia_stats, y_nvidia_bar, col_left, bar_height, ax)
         # Increment counter for next visible model
         visible_model_count += 1
     # Add AMD and NVIDIA test totals in the bottom left
     # Calculate line spacing to align middle with legend
     line_height = 0.4  # Height between lines
-    legend_y = max_y + 1
     # Position the two lines so their middle aligns with legend_y
     amd_y = legend_y - line_height / 2
     nvidia_y = legend_y + line_height / 2
-    amd_totals_text =    f"AMD Tests    - Passed: {amd_counts[0]}, Failed: {amd_counts[1]}, Skipped: {amd_counts[2]}"
-    nvidia_totals_text = f"NVIDIA Tests - Passed: {nvidia_counts[0]}, Failed: {nvidia_counts[1]}, Skipped: {nvidia_counts[2]}"
     ax.text(0, amd_y, amd_totals_text,
            ha='left', va='bottom', color='#CCCCCC',
            fontsize=14, fontfamily='monospace')
     ax.text(0, nvidia_y, nvidia_totals_text,
            ha='left', va='bottom', color='#CCCCCC',
            fontsize=14, fontfamily='monospace')
-    # Add legend horizontally in bottom right corner
-    patch_height = 0.3
-    patch_width = 3
-    legend_start_x = 68.7
-    legend_y = max_y + 1
-    legend_spacing = 10
-    legend_font_size = 15
     # Legend entries
     legend_items = [
         ('passed', 'Passed'),

 import matplotlib.pyplot as plt
 import pandas as pd
 from data import extract_model_data
+from utils import COLORS
 # Layout parameters
 COLUMNS = 3
 # Derived constants
+COLUMN_WIDTH = 100 / COLUMNS
+BAR_WIDTH = COLUMN_WIDTH * 0.8
+BAR_MARGIN = COLUMN_WIDTH * 0.1
 # Figure dimensions
+FIGURE_WIDTH = 22
+MAX_HEIGHT = 14
 MIN_HEIGHT_PER_ROW = 2.8
 FIGURE_PADDING = 1
 # Bar styling
+BAR_HEIGHT_RATIO = 0.22
+VERTICAL_SPACING_RATIO = 0.2
+AMD_BAR_OFFSET = 0.25
+NVIDIA_BAR_OFFSET = 0.54
+# Colors imported from utils
 # Font styling
 MODEL_NAME_FONT_SIZE = 16
 LABEL_FONT_SIZE = 14
+LABEL_OFFSET = 1
 FAILURE_RATE_FONT_SIZE = 28
+def calculate_overall_failure_rates(df: pd.DataFrame, available_models: list[str]) -> tuple[float, float]:
     """Calculate overall failure rates for AMD and NVIDIA across all models."""
     if df.empty or not available_models:
         return 0.0, 0.0
+    total_amd_tests = 0
+    total_amd_failures = 0
+    total_nvidia_tests = 0
+    total_nvidia_failures = 0
     for model_name in available_models:
         if model_name not in df.index:
         amd_stats, nvidia_stats = extract_model_data(row)[:2]
         # AMD totals
+        amd_total = amd_stats['passed'] + amd_stats['failed'] + amd_stats['error']
+        if amd_total > 0:
+            total_amd_tests += amd_total
+            total_amd_failures += amd_stats['failed'] + amd_stats['error']
         # NVIDIA totals
+        nvidia_total = nvidia_stats['passed'] + nvidia_stats['failed'] + nvidia_stats['error']
+        if nvidia_total > 0:
+            total_nvidia_tests += nvidia_total
+            total_nvidia_failures += nvidia_stats['failed'] + nvidia_stats['error']
+    amd_failure_rate = (total_amd_failures / total_amd_tests * 100) if total_amd_tests > 0 else 0.0
+    nvidia_failure_rate = (total_nvidia_failures / total_nvidia_tests * 100) if total_nvidia_tests > 0 else 0.0
+    return amd_failure_rate, nvidia_failure_rate
 def draw_text_and_bar(
         return fig
     # Calculate overall failure rates
+    amd_failure_rate, nvidia_failure_rate = calculate_overall_failure_rates(df, available_models)
     # Calculate dimensions for N-column layout
     model_count = len(available_models)
     visible_model_count = 0
     max_y = 0
+    # Initialize counters for total tests
+    amd_totals = {'passed': 0, 'failed': 0, 'skipped': 0}
+    nvidia_totals = {'passed': 0, 'failed': 0, 'skipped': 0}
     for i, model_name in enumerate(available_models):
         if model_name not in df.index:
         # Extract and process model data
         amd_stats, nvidia_stats = extract_model_data(row)[:2]
+        # Accumulate totals
+        amd_totals['passed'] += amd_stats['passed']
+        amd_totals['failed'] += amd_stats['failed'] + amd_stats['error']
+        amd_totals['skipped'] += amd_stats['skipped']
+        nvidia_totals['passed'] += nvidia_stats['passed']
+        nvidia_totals['failed'] += nvidia_stats['failed'] + nvidia_stats['error']
+        nvidia_totals['skipped'] += nvidia_stats['skipped']
         # Calculate position in 4-column grid
         col = visible_model_count % COLUMNS
         # AMD label and bar in this column
         bar_height = min(0.4, vertical_spacing * BAR_HEIGHT_RATIO)
         draw_text_and_bar("amd", amd_stats, y_amd_bar, col_left, bar_height, ax)
         draw_text_and_bar("nvidia", nvidia_stats, y_nvidia_bar, col_left, bar_height, ax)
         # Increment counter for next visible model
         visible_model_count += 1
+    # Add legend horizontally in bottom right corner
+    patch_height = 0.3
+    patch_width = 3
+    legend_start_x = 68.7
+    legend_y = max_y + 1
+    legend_spacing = 10
+    legend_font_size = 15
     # Add AMD and NVIDIA test totals in the bottom left
     # Calculate line spacing to align middle with legend
     line_height = 0.4  # Height between lines
     # Position the two lines so their middle aligns with legend_y
     amd_y = legend_y - line_height / 2
     nvidia_y = legend_y + line_height / 2
+    amd_totals_text = f"AMD Tests    - Passed: {amd_totals['passed']}, Failed: {amd_totals['failed']}, Skipped: {amd_totals['skipped']}"
+    nvidia_totals_text = f"NVIDIA Tests - Passed: {nvidia_totals['passed']}, Failed: {nvidia_totals['failed']}, Skipped: {nvidia_totals['skipped']}"
     ax.text(0, amd_y, amd_totals_text,
            ha='left', va='bottom', color='#CCCCCC',
            fontsize=14, fontfamily='monospace')
     ax.text(0, nvidia_y, nvidia_totals_text,
            ha='left', va='bottom', color='#CCCCCC',
            fontsize=14, fontfamily='monospace')
     # Legend entries
     legend_items = [
         ('passed', 'Passed'),

time_series_gradio.py ADDED Viewed

	@@ -0,0 +1,150 @@

+import pandas as pd
+import numpy as np
+from datetime import datetime
+from data import extract_model_data
+from utils import COLORS
+import gradio as gr
+import plotly.express as px
+import plotly.graph_objects as go
+def create_time_series_summary_gradio(historical_df: pd.DataFrame) -> dict:
+    empty_fig = lambda title: go.Figure().update_layout(title=title, height=500,
+        font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000',
+        plot_bgcolor='#1a1a1a', margin=dict(b=130)) or go.Figure()
+    if historical_df.empty or 'date' not in historical_df.columns:
+        ef = empty_fig("No historical data available")
+        return {'failure_rates': ef, 'amd_tests': ef, 'nvidia_tests': ef}
+    daily_stats = []
+    for date in sorted(historical_df['date'].unique()):
+        dd = historical_df[historical_df['date'] == date]
+        counts = {'date': date}
+        for platform in ['amd', 'nvidia']:
+            tot_tests = tot_fails = p = f = s = 0
+            for _, row in dd.iterrows():
+                stats = extract_model_data(row)[0 if platform == 'amd' else 1]
+                tot = stats['passed'] + stats['failed'] + stats['error']
+                if tot > 0:
+                    tot_tests += tot
+                    tot_fails += stats['failed'] + stats['error']
+                p += stats['passed']
+                f += stats['failed'] + stats['error']
+                s += stats['skipped']
+            counts.update({f'{platform}_failure_rate': (tot_fails / tot_tests * 100) if tot_tests > 0 else 0,
+                          f'{platform}_passed': p, f'{platform}_failed': f, f'{platform}_skipped': s})
+        daily_stats.append(counts)
+    fr_data = []
+    for i, s in enumerate(daily_stats):
+        for p in ['amd', 'nvidia']:
+            chg = s[f'{p}_failure_rate'] - daily_stats[i-1][f'{p}_failure_rate'] if i > 0 else 0
+            fr_data.append({'date': s['date'], 'failure_rate': s[f'{p}_failure_rate'],
+                           'platform': p.upper(), 'change': chg})
+    def build_test_data(platform):
+        data = []
+        for i, s in enumerate(daily_stats):
+            for tt in ['passed', 'failed', 'skipped']:
+                chg = s[f'{platform}_{tt}'] - daily_stats[i-1][f'{platform}_{tt}'] if i > 0 else 0
+                data.append({'date': s['date'], 'count': s[f'{platform}_{tt}'],
+                           'test_type': tt.capitalize(), 'change': chg})
+        return pd.DataFrame(data)
+    fr_df = pd.DataFrame(fr_data)
+    fig_fr = go.Figure()
+    for p, lc, mc in [('NVIDIA', '#76B900', '#FFFFFF'), ('AMD', '#ED1C24', '#404040')]:
+        d = fr_df[fr_df['platform'] == p]
+        if not d.empty:
+            fig_fr.add_trace(go.Scatter(x=d['date'], y=d['failure_rate'], mode='lines+markers',
+                name=p, line=dict(color=lc, width=3),
+                marker=dict(size=12, color=mc, line=dict(color=lc, width=2)),
+                hovertemplate=f'<b>{p}</b><br>Date: %{{x}}<br>Failure Rate: %{{y:.2f}}%<extra></extra>'))
+    fig_fr.update_layout(title="Overall Failure Rates Over Time", height=500,
+        font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000', plot_bgcolor='#1a1a1a',
+        title_font_size=20, legend=dict(font=dict(size=16), bgcolor='rgba(0,0,0,0.5)',
+        orientation="h", yanchor="bottom", y=-0.4, xanchor="center", x=0.5),
+        xaxis=dict(title='Date', title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        yaxis=dict(title='Failure Rate (%)', title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+        hovermode='x unified', margin=dict(b=130))
+    def create_line_fig(df, title):
+        fig = px.line(df, x='date', y='count', color='test_type',
+            color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
+            title=title, labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'})
+        fig.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
+        fig.update_layout(height=500, font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000',
+            plot_bgcolor='#1a1a1a', title_font_size=20, legend=dict(font=dict(size=16),
+            bgcolor='rgba(0,0,0,0.5)', orientation="h", yanchor="bottom", y=-0.4, xanchor="center", x=0.5),
+            xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+            yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+            hovermode='x unified', margin=dict(b=130))
+        return fig
+    return {'failure_rates': fig_fr,
+            'amd_tests': create_line_fig(build_test_data('amd'), "AMD Test Results Over Time"),
+            'nvidia_tests': create_line_fig(build_test_data('nvidia'), "NVIDIA Test Results Over Time")}
+def create_model_time_series_gradio(historical_df: pd.DataFrame, model_name: str) -> dict:
+    def empty_figs():
+        ef = lambda plat: go.Figure().update_layout(title=f"{model_name.upper()} - {plat} Results Over Time",
+            height=500, font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000',
+            plot_bgcolor='#1a1a1a', margin=dict(b=130)) or go.Figure()
+        return {'amd_plot': ef('AMD'), 'nvidia_plot': ef('NVIDIA')}
+    if historical_df.empty or 'date' not in historical_df.columns:
+        return empty_figs()
+    md = historical_df[historical_df.index.str.lower() == model_name.lower()]
+    if md.empty:
+        return empty_figs()
+    dates = sorted(md['date'].unique())
+    def build_data(platform):
+        data = []
+        for i, date in enumerate(dates):
+            dd = md[md['date'] == date]
+            if dd.empty:
+                continue
+            r = dd.iloc[0]
+            passed = r.get(f'success_{platform}', 0)
+            failed = r.get(f'failed_multi_no_{platform}', 0) + r.get(f'failed_single_no_{platform}', 0)
+            skipped = r.get(f'skipped_{platform}', 0)
+            pc = fc = sc = 0
+            if i > 0:
+                prev_dd = md[md['date'] == dates[i-1]]
+                if not prev_dd.empty:
+                    pr = prev_dd.iloc[0]
+                    pc = pr.get(f'success_{platform}', 0)
+                    fc = pr.get(f'failed_multi_no_{platform}', 0) + pr.get(f'failed_single_no_{platform}', 0)
+                    sc = pr.get(f'skipped_{platform}', 0)
+            data.extend([
+                {'date': date, 'count': passed, 'test_type': 'Passed', 'change': passed - pc},
+                {'date': date, 'count': failed, 'test_type': 'Failed', 'change': failed - fc},
+                {'date': date, 'count': skipped, 'test_type': 'Skipped', 'change': skipped - sc}
+            ])
+        return pd.DataFrame(data)
+    def create_fig(df, platform):
+        fig = px.line(df, x='date', y='count', color='test_type',
+            color_discrete_map={"Passed": COLORS['passed'], "Failed": COLORS['failed'], "Skipped": COLORS['skipped']},
+            title=f"{model_name.upper()} - {platform} Results Over Time",
+            labels={'count': 'Number of Tests', 'date': 'Date', 'test_type': 'Test Type'})
+        fig.update_traces(mode='lines+markers', marker=dict(size=8), line=dict(width=3))
+        fig.update_layout(height=500, font=dict(size=16, color='#CCCCCC'), paper_bgcolor='#000000',
+            plot_bgcolor='#1a1a1a', title_font_size=20, legend=dict(font=dict(size=16),
+            bgcolor='rgba(0,0,0,0.5)', orientation="h", yanchor="bottom", y=-0.4, xanchor="center", x=0.5),
+            xaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+            yaxis=dict(title_font_size=16, tickfont_size=14, gridcolor='#333333', showgrid=True),
+            hovermode='x unified', margin=dict(b=130))
+        return fig
+    return {'amd_plot': create_fig(build_data('amd'), 'AMD'),
+            'nvidia_plot': create_fig(build_data('nvidia'), 'NVIDIA')}

utils.py CHANGED Viewed

@@ -49,3 +49,15 @@ logger = setup_logger()
 def generate_underlined_line(text: str) -> str:
     return text + "\n" + "─" * len(text)

 def generate_underlined_line(text: str) -> str:
     return text + "\n" + "─" * len(text)
+# Shared color scheme across all visualization modules
+COLORS = {
+    'passed': '#4CAF50',
+    'failed': '#E53E3E',
+    'skipped': '#FFD54F',
+    'error': '#8B0000',
+    'empty': '#5B5B5B',
+    'amd': '#ED1C24',
+    'nvidia': '#76B900'
+}