Spaces:

transformers-community
/

transformers-ci-dashboard

Running

File size: 17,005 Bytes

import matplotlib.pyplot as plt
import numpy as np

import gradio as gr


# Sample test results with test names
MODELS = {
    "llama": {
        "amd": {
            "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore"],
            "failed": ["network_timeout"],
            "skipped": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu"],
            "error": []
        },
        "nvidia": {
            "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"],
            "failed": ["network_timeout", "distributed"],
            "skipped": ["multi_gpu"],
            "error": []
        }
    },
    "gemma3": {
        "amd": {
            "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation"],
            "failed": ["gpu_accel", "cuda_ops", "ml_inference", "tensor_ops", "distributed", "multi_gpu"],
            "skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw", "latency", "throughput"],
            "error": []
        },
        "nvidia": {
            "passed": ["auth_login", "data_validation", "api_response", "file_upload", "cache_hit", "user_permissions", "db_query", "session_mgmt", "input_sanitize", "rate_limit", "error_handling", "memory_alloc", "thread_safety", "backup_restore", "config_load", "log_rotation", "health_check", "metrics", "alerts", "monitoring", "security_scan", "password_hash", "jwt_token", "oauth_flow", "csrf_protect", "xss_filter", "sql_injection", "rate_limiter", "load_balance", "circuit_break", "retry_logic", "timeout_handle", "graceful_shutdown", "hot_reload", "config_watch", "env_vars", "secrets_mgmt", "tls_cert", "encryption", "compression", "serialization", "deserialization", "validation", "gpu_accel", "cuda_ops", "ml_inference", "tensor_ops"],
            "failed": ["distributed", "multi_gpu"],
            "skipped": ["perf_test", "stress_test", "load_test", "endurance", "benchmark", "profiling", "memory_leak", "cpu_usage", "disk_io", "network_bw"],
            "error": []
        }
    },
    "csm": {
        "amd": {
            "passed": [],
            "failed": [],
            "skipped": [],
            "error": ["system_crash"]
        },
        "nvidia": {
            "passed": [],
            "failed": [],
            "skipped": [],
            "error": ["system_crash"]
        }
    }
}

def plot_model_stats(model_name: str) -> tuple[plt.Figure, str, str]:
    """Draws a pie chart of model's passed, failed, skipped, and error stats."""
    model_stats = MODELS[model_name]
    
    # Softer color palette - less pastel, more vibrant
    colors = {
        'passed': '#4CAF50',    # Medium green
        'failed': '#E53E3E',    # More red
        'skipped': '#FFD54F',   # Medium yellow
        'error': '#8B0000'      # Dark red
    }
    
    # Convert test lists to counts for chart display
    amd_stats = {k: len(v) for k, v in model_stats['amd'].items()}
    nvidia_stats = {k: len(v) for k, v in model_stats['nvidia'].items()}
    
    # Filter out categories with 0 values for cleaner visualization
    amd_filtered = {k: v for k, v in amd_stats.items() if v > 0}
    nvidia_filtered = {k: v for k, v in nvidia_stats.items() if v > 0}
    
    if not amd_filtered and not nvidia_filtered:
        # Handle case where all values are 0 - minimal empty state
        fig, ax = plt.subplots(figsize=(10, 8), facecolor='#000000')
        ax.set_facecolor('#000000')
        ax.text(0.5, 0.5, 'No test results available', 
                horizontalalignment='center', verticalalignment='center',
                transform=ax.transAxes, fontsize=16, color='#888888',
                fontfamily='monospace', weight='normal')
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')
        return fig, "", ""
    
    # Create figure with two subplots side by side with padding
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(18, 9), facecolor='#000000')
    ax1.set_facecolor('#000000')
    ax2.set_facecolor('#000000')
    
    def create_pie_chart(ax, device_label, filtered_stats):
        if not filtered_stats:
            ax.text(0.5, 0.5, 'No test results', 
                   horizontalalignment='center', verticalalignment='center',
                   transform=ax.transAxes, fontsize=14, color='#888888',
                   fontfamily='monospace', weight='normal')
            ax.set_title(device_label, 
                        fontsize=28, weight='bold', pad=2, color='#FFFFFF', 
                        fontfamily='monospace')
            ax.axis('off')
            return
            
        chart_colors = [colors[category] for category in filtered_stats.keys()]
        
        # Create minimal pie chart - full pie, no donut effect
        wedges, texts, autotexts = ax.pie(
            filtered_stats.values(), 
            labels=[label.lower() for label in filtered_stats.keys()],  # Lowercase for minimal look
            colors=chart_colors,
            autopct=lambda pct: f'{int(pct/100*sum(filtered_stats.values()))}',
            startangle=90,
            explode=None,  # No separation
            shadow=False,
            wedgeprops=dict(edgecolor='#1a1a1a', linewidth=0.5),  # Minimal borders
            textprops={'fontsize': 12, 'weight': 'normal', 'color': '#CCCCCC', 'fontfamily': 'monospace'}
        )
        
        # Enhanced percentage text styling for better readability
        for autotext in autotexts:
            autotext.set_color('#000000')  # Black text for better contrast
            autotext.set_weight('bold')
            autotext.set_fontsize(14)
            autotext.set_fontfamily('monospace')
        
        # Minimal category labels
        for text in texts:
            text.set_color('#AAAAAA')
            text.set_weight('normal')
            text.set_fontsize(13)
            text.set_fontfamily('monospace')
        
        # Device label closer to chart and bigger
        ax.set_title(device_label, 
                    fontsize=28, weight='bold', pad=2, color='#FFFFFF', 
                    fontfamily='monospace')
    
    # Create both pie charts with device labels
    create_pie_chart(ax1, "amd", amd_filtered)
    create_pie_chart(ax2, "nvidia", nvidia_filtered)
    
    # Add subtle separation line between charts - stops at device labels level
    line_x = 0.5
    fig.add_artist(plt.Line2D([line_x, line_x], [0.0, 0.85], 
                              color='#333333', linewidth=1, alpha=0.5,
                              transform=fig.transFigure))
    
    # Add central shared title for model name
    fig.suptitle(f'{model_name.lower()}', 
                fontsize=18, weight='normal', color='#CCCCCC', 
                fontfamily='monospace', y=0.95)
    
    # Clean layout with padding and space for central title
    plt.tight_layout()
    plt.subplots_adjust(top=0.85, wspace=0.4)  # Added wspace for padding between charts
    
    # Generate separate failed tests info for AMD and NVIDIA
    amd_failed = model_stats['amd']['failed']
    nvidia_failed = model_stats['nvidia']['failed']
    
    amd_failed_info = "Fails on AMD:\n────────────\n" + ("\n".join(amd_failed) if amd_failed else "None")
    nvidia_failed_info = "Fails on NVIDIA:\n────────────────\n" + ("\n".join(nvidia_failed) if nvidia_failed else "None")
    
    return fig, amd_failed_info, nvidia_failed_info

def get_model_stats_summary(model_name: str) -> tuple:
    """Get summary stats for a model (total tests, success rate, status indicator)."""
    stats = MODELS[model_name]
    # Combine AMD and NVIDIA results
    total_passed = len(stats['amd']['passed']) + len(stats['nvidia']['passed'])
    total_failed = len(stats['amd']['failed']) + len(stats['nvidia']['failed'])
    total_skipped = len(stats['amd']['skipped']) + len(stats['nvidia']['skipped'])
    total_error = len(stats['amd']['error']) + len(stats['nvidia']['error'])
    
    total = total_passed + total_failed + total_skipped + total_error
    success_rate = (total_passed / total * 100) if total > 0 else 0
    
    # Determine status indicator color
    if success_rate >= 80:
        status_class = "success-high"
    elif success_rate >= 50:
        status_class = "success-medium"
    else:
        status_class = "success-low"
    
    return total, success_rate, status_class

# Custom CSS for dark theme
dark_theme_css = """
/* Global dark theme */
.gradio-container {
    background-color: #000000 !important;
    color: white !important;
}

/* Remove borders from all components */
.gr-box, .gr-form, .gr-panel {
    border: none !important;
    background-color: #000000 !important;
}

/* Sidebar styling */
.sidebar {
    background: linear-gradient(145deg, #111111, #1a1a1a) !important;
    border: none !important;
    padding: 25px !important;
    box-shadow: inset 2px 2px 5px rgba(0, 0, 0, 0.3) !important;
    margin: 0 !important;
    height: 100vh !important;
    position: fixed !important;
    left: 0 !important;
    top: 0 !important;
    width: 300px !important;
}

/* Enhanced model button styling */
.model-button {
    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
    color: white !important;
    border: 2px solid transparent !important;
    margin: 2px 0 !important;
    border-radius: 5px !important;
    padding: 8px 12px !important;
    transition: all 0.4s cubic-bezier(0.4, 0, 0.2, 1) !important;
    position: relative !important;
    overflow: hidden !important;
    box-shadow: 
        0 4px 15px rgba(0, 0, 0, 0.2),
        inset 0 1px 0 rgba(255, 255, 255, 0.1) !important;
    font-weight: 600 !important;
    font-size: 16px !important;
    text-transform: uppercase !important;
    letter-spacing: 0.5px !important;
    font-family: monospace !important;
}

.model-button:hover {
    background: linear-gradient(135deg, #3a3a3a, #2e2e2e) !important;
    color: #74b9ff !important;
}

.model-button:active {
    background: linear-gradient(135deg, #2a2a2a, #1e1e1e) !important;
    color: #5a9bd4 !important;
}

/* Model stats badge */
.model-stats {
    display: flex !important;
    justify-content: space-between !important;
    align-items: center !important;
    margin-top: 8px !important;
    font-size: 12px !important;
    opacity: 0.8 !important;
}

.stats-badge {
    background: rgba(116, 185, 255, 0.2) !important;
    padding: 4px 8px !important;
    border-radius: 10px !important;
    font-weight: 500 !important;
    font-size: 11px !important;
    color: #74b9ff !important;
}

.success-indicator {
    width: 8px !important;
    height: 8px !important;
    border-radius: 50% !important;
    display: inline-block !important;
    margin-right: 6px !important;
}

.success-high { background-color: #4CAF50 !important; }
.success-medium { background-color: #FF9800 !important; }
.success-low { background-color: #F44336 !important; }

/* Regular button styling for non-model buttons */
.gr-button:not(.model-button) {
    background-color: #222222 !important;
    color: white !important;
    border: 1px solid #444444 !important;
    margin: 5px 0 !important;
    border-radius: 8px !important;
    transition: all 0.3s ease !important;
}

.gr-button:not(.model-button):hover {
    background-color: #333333 !important;
    border-color: #666666 !important;
}

/* Plot container */
.plot-container {
    background-color: #000000 !important;
    border: none !important;
}

/* Text elements */
h1, h2, h3, p, .markdown {
    color: white !important;
}

/* Sidebar header enhancement */
.sidebar h1 {
    background: linear-gradient(45deg, #74b9ff, #a29bfe) !important;
    -webkit-background-clip: text !important;
    -webkit-text-fill-color: transparent !important;
    background-clip: text !important;
    text-align: center !important;
    margin-bottom: 15px !important;
    font-size: 28px !important;
    font-weight: 700 !important;
    font-family: monospace !important;
}

/* Sidebar description text */
.sidebar p {
    text-align: center !important;
    margin-bottom: 20px !important;
    line-height: 1.5 !important;
    font-size: 14px !important;
    font-family: monospace !important;
}

.sidebar strong {
    color: #74b9ff !important;
    font-weight: 600 !important;
    font-family: monospace !important;
}

.sidebar em {
    color: #a29bfe !important;
    font-style: normal !important;
    opacity: 0.9 !important;
    font-family: monospace !important;
}

/* Remove all borders globally */
* {
    border-color: transparent !important;
}

/* Main content area */
.main-content {
    background-color: #000000 !important;
    padding: 20px !important;
    margin-left: 300px !important;
}

/* Failed tests display - seamless appearance */
.failed-tests textarea {
    background-color: #000000 !important;
    color: #FFFFFF !important;
    font-family: monospace !important;
    font-size: 14px !important;
    border: none !important;
    padding: 10px !important;
    outline: none !important;
    line-height: 1.4 !important;
}

.failed-tests {
    background-color: #000000 !important;
}

.failed-tests .gr-textbox {
    background-color: #000000 !important;
    border: none !important;
}

"""

# Create the Gradio interface with sidebar and dark theme
with gr.Blocks(title="Model Test Results Dashboard", css=dark_theme_css) as demo:
    
    with gr.Row():
        # Sidebar for model selection
        with gr.Column(scale=1, elem_classes=["sidebar"]):
            gr.Markdown("# 🤖 AI Models")
            gr.Markdown("**Select a model to analyze test results**\n\n*Interactive dashboard with detailed metrics*")
            
            # Model selection buttons in sidebar
            model_buttons = []
            for model_name in MODELS.keys():
                btn = gr.Button(
                    f"{model_name.lower()}", 
                    variant="secondary",
                    size="lg",
                    elem_classes=["model-button"]
                )
                model_buttons.append(btn)
        
        # Main content area
        with gr.Column(scale=4, elem_classes=["main-content"]):
            gr.Markdown("# 📈 Test Results Dashboard")
            
            # Create the plot output
            plot_output = gr.Plot(
                label="", 
                format="png",
                elem_classes=["plot-container"]
            )
            
            # Create two separate failed tests displays in a row layout
            with gr.Row():
                with gr.Column(scale=1):
                    amd_failed_tests_output = gr.Textbox(
                        value="Fails on AMD:\n────────────\nnetwork_timeout\ngpu_initialization\nmemory_overflow",
                        lines=6,
                        interactive=False,
                        container=False,
                        elem_classes=["failed-tests"]
                    )
                with gr.Column(scale=1):
                    nvidia_failed_tests_output = gr.Textbox(
                        value="Fails on NVIDIA:\n────────────────\ndistributed\nmulti_gpu\ndriver_conflict",
                        lines=6,
                        interactive=False,
                        container=False,
                        elem_classes=["failed-tests"]
                    )
    
    # Set up click handlers for each button
    for i, (model_name, button) in enumerate(zip(MODELS.keys(), model_buttons)):
        button.click(
            fn=lambda name=model_name: plot_model_stats(name),
            outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
        )
    
    # Initialize with the first model
    demo.load(
        fn=lambda: plot_model_stats(list(MODELS.keys())[0]),
        outputs=[plot_output, amd_failed_tests_output, nvidia_failed_tests_output]
    )

if __name__ == "__main__":
    demo.launch()