| 
							 | 
						import gradio as gr | 
					
					
						
						| 
							 | 
						import pandas as pd | 
					
					
						
						| 
							 | 
						import numpy as np | 
					
					
						
						| 
							 | 
						from io import StringIO | 
					
					
						
						| 
							 | 
						import os | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						df = pd.read_csv("ReliableMath.tsv", sep='\t') | 
					
					
						
						| 
							 | 
						print(f"Successfully loaded {len(df)} models from local file") | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						df = df.dropna()   | 
					
					
						
						| 
							 | 
						df.columns = df.columns.str.strip()   | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						df = df.rename(columns={ | 
					
					
						
						| 
							 | 
						    'model': 'Model Name', | 
					
					
						
						| 
							 | 
						    'size': 'Size (B)', | 
					
					
						
						| 
							 | 
						    "prompt": "Prompt" | 
					
					
						
						| 
							 | 
						}) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						df["Size_Display"] = df["Size (B)"].apply( | 
					
					
						
						| 
							 | 
						    lambda x: f"{x}B" if x != "???" else f"???" | 
					
					
						
						| 
							 | 
						) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						model_types = { | 
					
					
						
						| 
							 | 
						    "reasoning": ["deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-R1-0528", "deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "OpenAI/o3-mini-2025-01-31", "google/gemini-2.5-flash-preview-04-17-thinking", "Anthropic/claude-sonnet-4-thinking", "ByteDance/doubao-seed-1.6-thinking-250615", "ByteDance/doubao-1.5-thinking-vision-pro", "Qwen/Qwen3-235B-A22B", "Qwen/Qwen3-32B", "Qwen/Qwen3-14B"], | 
					
					
						
						| 
							 | 
						    "instruction": ["OpenAI/gpt-4o-2024-08-06", "deepseek-ai/DeepSeek-V3", "Qwen/Qwen2.5-Math-1.5B-Instruct", "Qwen/Qwen2.5-Math-7B-Instruct", "google/gemini-2.5-flash-preview-04-17", "Anthropic/claude-sonnet-4-20250514"] | 
					
					
						
						| 
							 | 
						} | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						def get_size_category(size): | 
					
					
						
						| 
							 | 
						    if size == "???": | 
					
					
						
						| 
							 | 
						        return "???" | 
					
					
						
						| 
							 | 
						    elif 0 < float(size) <= 5: | 
					
					
						
						| 
							 | 
						        return "0-5B" | 
					
					
						
						| 
							 | 
						    elif float(size) <= 10: | 
					
					
						
						| 
							 | 
						        return "5-10B" | 
					
					
						
						| 
							 | 
						    elif float(size) <= 20: | 
					
					
						
						| 
							 | 
						        return "10-20B" | 
					
					
						
						| 
							 | 
						    elif float(size) <= 40: | 
					
					
						
						| 
							 | 
						        return "20-40B" | 
					
					
						
						| 
							 | 
						    elif float(size) <= 80: | 
					
					
						
						| 
							 | 
						        return "40-80B" | 
					
					
						
						| 
							 | 
						    else: | 
					
					
						
						| 
							 | 
						        return ">80B" | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						df["Size_Category"] = df["Size (B)"].apply(get_size_category) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						def filter_and_search_models( | 
					
					
						
						| 
							 | 
						    search_query, size_ranges, sort_by, type_by, architecture_filters=None | 
					
					
						
						| 
							 | 
						): | 
					
					
						
						| 
							 | 
						    """Filter and search models based on user inputs""" | 
					
					
						
						| 
							 | 
						    filtered_df = df.copy() | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    if search_query: | 
					
					
						
						| 
							 | 
						        mask = filtered_df["Model Name"].str.contains( | 
					
					
						
						| 
							 | 
						            search_query, case=False, na=False | 
					
					
						
						| 
							 | 
						        ) | 
					
					
						
						| 
							 | 
						        filtered_df = filtered_df[mask] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    if size_ranges and len(size_ranges) > 0: | 
					
					
						
						| 
							 | 
						        filtered_df = filtered_df[filtered_df["Size_Category"].isin(size_ranges)] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    if type_by and len(type_by) > 0: | 
					
					
						
						| 
							 | 
						        filtered_dfs = [] | 
					
					
						
						| 
							 | 
						        for idx, model_type in enumerate(type_by): | 
					
					
						
						| 
							 | 
						            filtered_dfs.append(filtered_df[filtered_df["Model Name"].isin(model_types[model_type])]) | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						        filtered_df = pd.concat(filtered_dfs) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    if architecture_filters and len(architecture_filters) > 0: | 
					
					
						
						| 
							 | 
						        architecture_mask = pd.Series( | 
					
					
						
						| 
							 | 
						            [False] * len(filtered_df), index=filtered_df.index | 
					
					
						
						| 
							 | 
						        ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        for arch in architecture_filters: | 
					
					
						
						| 
							 | 
						            if arch == "deepseek": | 
					
					
						
						| 
							 | 
						                architecture_mask |= filtered_df["Model Name"].str.contains( | 
					
					
						
						| 
							 | 
						                    "deepseek", case=False, na=False | 
					
					
						
						| 
							 | 
						                ) | 
					
					
						
						| 
							 | 
						                 | 
					
					
						
						| 
							 | 
						            elif arch == "qwen": | 
					
					
						
						| 
							 | 
						                architecture_mask |= filtered_df["Model Name"].str.contains( | 
					
					
						
						| 
							 | 
						                    "Qwen/", case=False, na=False | 
					
					
						
						| 
							 | 
						                ) | 
					
					
						
						| 
							 | 
						            elif arch == "openai": | 
					
					
						
						| 
							 | 
						                architecture_mask |= filtered_df["Model Name"].str.contains( | 
					
					
						
						| 
							 | 
						                    "openai", case=False, na=False | 
					
					
						
						| 
							 | 
						                ) | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            elif arch == "bytedance": | 
					
					
						
						| 
							 | 
						                architecture_mask |= filtered_df["Model Name"].str.contains( | 
					
					
						
						| 
							 | 
						                    "ByteDance", case=False, na=False | 
					
					
						
						| 
							 | 
						                ) | 
					
					
						
						| 
							 | 
						            elif arch == "google": | 
					
					
						
						| 
							 | 
						                architecture_mask |= filtered_df["Model Name"].str.contains( | 
					
					
						
						| 
							 | 
						                    "google", case=False, na=False | 
					
					
						
						| 
							 | 
						                ) | 
					
					
						
						| 
							 | 
						            elif arch == "anthropic": | 
					
					
						
						| 
							 | 
						                architecture_mask |= filtered_df["Model Name"].str.contains( | 
					
					
						
						| 
							 | 
						                    "Anthropic", case=False, na=False | 
					
					
						
						| 
							 | 
						                ) | 
					
					
						
						| 
							 | 
						            elif arch == "others": | 
					
					
						
						| 
							 | 
						                 | 
					
					
						
						| 
							 | 
						                others_mask = ~( | 
					
					
						
						| 
							 | 
						                    filtered_df["Model Name"].str.contains("meta-llama", case=False, na=False) | | 
					
					
						
						| 
							 | 
						                    filtered_df["Model Name"].str.contains("deepseek", case=False, na=False) | | 
					
					
						
						| 
							 | 
						                    filtered_df["Model Name"].str.contains("qwen", case=False, na=False) | | 
					
					
						
						| 
							 | 
						                    filtered_df["Model Name"].str.contains("google", case=False, na=False) | | 
					
					
						
						| 
							 | 
						                    filtered_df["Model Name"].str.contains("bytedance", case=False, na=False) | | 
					
					
						
						| 
							 | 
						                    filtered_df["Model Name"].str.contains("anthropic", case=False, na=False) | | 
					
					
						
						| 
							 | 
						                    filtered_df["Model Name"].str.contains("openai", case=False, na=False) | 
					
					
						
						| 
							 | 
						                ) | 
					
					
						
						| 
							 | 
						                architecture_mask |= others_mask | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        filtered_df = filtered_df[architecture_mask] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    if sort_by in filtered_df.columns: | 
					
					
						
						| 
							 | 
						        filtered_df = filtered_df.sort_values(sort_by, ascending=False) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    filtered_df = filtered_df.reset_index(drop=True) | 
					
					
						
						| 
							 | 
						    filtered_df["Rank"] = range(1, len(filtered_df) + 1) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    display_df = filtered_df[ | 
					
					
						
						| 
							 | 
						        [ | 
					
					
						
						| 
							 | 
						            "Rank", | 
					
					
						
						| 
							 | 
						            "Model Name", | 
					
					
						
						| 
							 | 
						            "Size (B)", | 
					
					
						
						| 
							 | 
						            "Prompt", | 
					
					
						
						| 
							 | 
						            "Prec.Avg", | 
					
					
						
						| 
							 | 
						            "Prud.Avg", | 
					
					
						
						| 
							 | 
						            "Prec.(A)", | 
					
					
						
						| 
							 | 
						            "Prud.(A)", | 
					
					
						
						| 
							 | 
						            "Len.(A)", | 
					
					
						
						| 
							 | 
						            "Prec.(U)", | 
					
					
						
						| 
							 | 
						            "Prud.(U)", | 
					
					
						
						| 
							 | 
						            "Len.(U)" | 
					
					
						
						| 
							 | 
						        ] | 
					
					
						
						| 
							 | 
						    ] | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    display_df = display_df.rename(columns={"Size_Display": "Size"}) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    for col in ["Prec.Avg", "Prud.Avg", "Prec.(A)", "Prud.(A)", "Prec.(U)", "Prud.(U)"]: | 
					
					
						
						| 
							 | 
						        display_df = display_df.copy()   | 
					
					
						
						| 
							 | 
						        display_df[col] = display_df[col].round(3)   | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    return display_df | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						def create_html_table(df): | 
					
					
						
						| 
							 | 
						    """Create an HTML table from the dataframe""" | 
					
					
						
						| 
							 | 
						    html = '<div class="leaderboard-container">' | 
					
					
						
						| 
							 | 
						    html += '<table class="leaderboard-table">' | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    html += "<thead><tr>" | 
					
					
						
						| 
							 | 
						    for col in df.columns: | 
					
					
						
						| 
							 | 
						        html += f"<th>{col}</th>" | 
					
					
						
						| 
							 | 
						    html += "</tr></thead>" | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    html += "<tbody>" | 
					
					
						
						| 
							 | 
						    for _, row in df.iterrows(): | 
					
					
						
						| 
							 | 
						         | 
					
					
						
						| 
							 | 
						        model_name = row["Model Name"] | 
					
					
						
						| 
							 | 
						        row_class = "" | 
					
					
						
						| 
							 | 
						        if "meta-llama" in model_name: | 
					
					
						
						| 
							 | 
						            row_class = "llama-row" | 
					
					
						
						| 
							 | 
						        elif "deepseek" in model_name: | 
					
					
						
						| 
							 | 
						            row_class = "deepseek-row" | 
					
					
						
						| 
							 | 
						        elif "Qwen" in model_name: | 
					
					
						
						| 
							 | 
						            row_class = "qwen-row" | 
					
					
						
						| 
							 | 
						        elif "google" in model_name: | 
					
					
						
						| 
							 | 
						            row_class = "google-row" | 
					
					
						
						| 
							 | 
						        elif "Anthropic" in model_name: | 
					
					
						
						| 
							 | 
						            row_class = "anthropic-row" | 
					
					
						
						| 
							 | 
						        elif "ByteDance" in model_name: | 
					
					
						
						| 
							 | 
						            row_class = "bytedance-row" | 
					
					
						
						| 
							 | 
						        elif "OpenAI" in model_name: | 
					
					
						
						| 
							 | 
						            row_class = "openai-row" | 
					
					
						
						| 
							 | 
						        else: | 
					
					
						
						| 
							 | 
						            row_class = "others-row" | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        html += f'<tr class="{row_class}">' | 
					
					
						
						| 
							 | 
						        for i, col in enumerate(df.columns): | 
					
					
						
						| 
							 | 
						            cell_class = "" | 
					
					
						
						| 
							 | 
						            if i == 0:   | 
					
					
						
						| 
							 | 
						                cell_class = "rank-cell" | 
					
					
						
						| 
							 | 
						            elif i == 1:   | 
					
					
						
						| 
							 | 
						                cell_class = "model-cell" | 
					
					
						
						| 
							 | 
						            elif i == 2:   | 
					
					
						
						| 
							 | 
						                cell_class = "size-cell" | 
					
					
						
						| 
							 | 
						            else:   | 
					
					
						
						| 
							 | 
						                cell_class = "score-cell" | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            if col == "Model Name": | 
					
					
						
						| 
							 | 
						                if "o3-mini" in model_name: | 
					
					
						
						| 
							 | 
						                    hf_url = "https://platform.openai.com/docs/models/o3-mini" | 
					
					
						
						| 
							 | 
						                elif "gpt-4o" in model_name: | 
					
					
						
						| 
							 | 
						                    hf_url = "https://platform.openai.com/docs/models/gpt-4o" | 
					
					
						
						| 
							 | 
						                elif "gemini-2.5-flash" in model_name: | 
					
					
						
						| 
							 | 
						                    hf_url = "https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-5-flash" | 
					
					
						
						| 
							 | 
						                elif "claude-sonnet" in model_name: | 
					
					
						
						| 
							 | 
						                    hf_url = "https://docs.anthropic.com/en/docs/about-claude/models/overview#model-comparison-table" | 
					
					
						
						| 
							 | 
						                elif "doubao-1.5-thinking-vision-pro" in model_name: | 
					
					
						
						| 
							 | 
						                    hf_url = "https://www.volcengine.com/docs/82379/1554521" | 
					
					
						
						| 
							 | 
						                elif "doubao-seed-1.6-thinking" in model_name: | 
					
					
						
						| 
							 | 
						                    hf_url = "https://www.volcengine.com/docs/82379/1593703" | 
					
					
						
						| 
							 | 
						                else: | 
					
					
						
						| 
							 | 
						                    hf_url = f"https://huggingface.co/{model_name}" | 
					
					
						
						| 
							 | 
						                cell_content = f'<a href="{hf_url}" target="_blank" class="model-link">{model_name}</a>' | 
					
					
						
						| 
							 | 
						            else: | 
					
					
						
						| 
							 | 
						                cell_content = str(row[col]) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						            html += f'<td class="{cell_class}">{cell_content}</td>' | 
					
					
						
						| 
							 | 
						        html += "</tr>" | 
					
					
						
						| 
							 | 
						    html += "</tbody>" | 
					
					
						
						| 
							 | 
						    html += "</table>" | 
					
					
						
						| 
							 | 
						    html += "</div>" | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    return html | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						with gr.Blocks(title="ReliableMath Leaderboard", theme=gr.themes.Base()) as app: | 
					
					
						
						| 
							 | 
						    gr.Markdown("# 🏆 ReliableMath Leaderboard") | 
					
					
						
						| 
							 | 
						    gr.Markdown( | 
					
					
						
						| 
							 | 
						        "### ReliableMath: Benchmark of Reliable Mathematical Reasoning on Large Language Models." | 
					
					
						
						| 
							 | 
						    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    with gr.Tabs(): | 
					
					
						
						| 
							 | 
						        with gr.TabItem("Leaderboard"): | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            with gr.Row(): | 
					
					
						
						| 
							 | 
						                 | 
					
					
						
						| 
							 | 
						                with gr.Column(scale=1): | 
					
					
						
						| 
							 | 
						                    gr.Markdown("### 🎛️ **Filter & Sort Options**") | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						                     | 
					
					
						
						| 
							 | 
						                    with gr.Row(): | 
					
					
						
						| 
							 | 
						                        sort_dropdown = gr.Dropdown( | 
					
					
						
						| 
							 | 
						                            choices=[ | 
					
					
						
						| 
							 | 
						                                ("😁 Precision Score", "Prec.Avg"), | 
					
					
						
						| 
							 | 
						                                ("🧐 Prudence Score", "Prud.Avg") | 
					
					
						
						| 
							 | 
						                                ], | 
					
					
						
						| 
							 | 
						                            value="Prec.Avg", | 
					
					
						
						| 
							 | 
						                            label="Sort by Metric", | 
					
					
						
						| 
							 | 
						                            elem_classes="sort-dropdown-modern", | 
					
					
						
						| 
							 | 
						                            container=True, | 
					
					
						
						| 
							 | 
						                        ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						                     | 
					
					
						
						| 
							 | 
						                    gr.Markdown("**📏 Filter by Model Size:**") | 
					
					
						
						| 
							 | 
						                    size_checkboxes = gr.CheckboxGroup( | 
					
					
						
						| 
							 | 
						                        choices=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"], | 
					
					
						
						| 
							 | 
						                        value=["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"], | 
					
					
						
						| 
							 | 
						                        label="", | 
					
					
						
						| 
							 | 
						                        elem_classes="size-filter", | 
					
					
						
						| 
							 | 
						                        container=False, | 
					
					
						
						| 
							 | 
						                    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						                     | 
					
					
						
						| 
							 | 
						                    gr.Markdown("**🏗️ Filter by Model Architecture:**") | 
					
					
						
						| 
							 | 
						                    architecture_checkboxes = gr.CheckboxGroup( | 
					
					
						
						| 
							 | 
						                        choices=[ | 
					
					
						
						| 
							 | 
						                            ("🤖 OpenAI", "openai"), | 
					
					
						
						| 
							 | 
						                            ("🐧 Qwen", "qwen"), | 
					
					
						
						| 
							 | 
						                            ("🐳 DeepSeek", "deepseek"), | 
					
					
						
						| 
							 | 
						                             | 
					
					
						
						| 
							 | 
						                            ("🌋 ByteDance", "bytedance"), | 
					
					
						
						| 
							 | 
						                            ("🔷 Google", "google"), | 
					
					
						
						| 
							 | 
						                            ("🌟 Anthropic", "anthropic"), | 
					
					
						
						| 
							 | 
						                            ("🔧 Others", "others"), | 
					
					
						
						| 
							 | 
						                        ], | 
					
					
						
						| 
							 | 
						                        value=["openai", "qwen", "deepseek", "google", "anthropic", "bytedance", "others"], | 
					
					
						
						| 
							 | 
						                        label="", | 
					
					
						
						| 
							 | 
						                        elem_classes="architecture-filter", | 
					
					
						
						| 
							 | 
						                        container=False, | 
					
					
						
						| 
							 | 
						                    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						                 | 
					
					
						
						| 
							 | 
						                with gr.Column(scale=1): | 
					
					
						
						| 
							 | 
						                    gr.Markdown("### 🔍 **Search Models**") | 
					
					
						
						| 
							 | 
						                    search_box = gr.Textbox( | 
					
					
						
						| 
							 | 
						                        label="", | 
					
					
						
						| 
							 | 
						                        placeholder="Search for a model name (e.g., Llama, Qwen, DeepSeek)...", | 
					
					
						
						| 
							 | 
						                        value="", | 
					
					
						
						| 
							 | 
						                        elem_classes="search-input", | 
					
					
						
						| 
							 | 
						                    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						                     | 
					
					
						
						| 
							 | 
						                    gr.Markdown("**🔎 Filter by Reasoning or Instruction Models:**") | 
					
					
						
						| 
							 | 
						                    type_sort = gr.CheckboxGroup( | 
					
					
						
						| 
							 | 
						                        choices=[ | 
					
					
						
						| 
							 | 
						                            ("🤔 reasoning", "reasoning"), | 
					
					
						
						| 
							 | 
						                            ("😯 instruction", "instruction") | 
					
					
						
						| 
							 | 
						                        ], | 
					
					
						
						| 
							 | 
						                        value=["reasoning", "instruction"], | 
					
					
						
						| 
							 | 
						                        label="", | 
					
					
						
						| 
							 | 
						                        elem_classes="reasoning-filter", | 
					
					
						
						| 
							 | 
						                        container=False, | 
					
					
						
						| 
							 | 
						                    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            total_models = gr.Markdown(f"**Showing {len(df)} models**") | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            results_table = gr.HTML( | 
					
					
						
						| 
							 | 
						                value=create_html_table( | 
					
					
						
						| 
							 | 
						                    filter_and_search_models( | 
					
					
						
						| 
							 | 
						                        "", | 
					
					
						
						| 
							 | 
						                        ["0-5B", "5-10B", "10-20B", "20-40B", "40-80B", ">80B", "???"], | 
					
					
						
						| 
							 | 
						                        "Prec.Avg", | 
					
					
						
						| 
							 | 
						                        ["reasoning", "instruction"], | 
					
					
						
						| 
							 | 
						                        ["openai", "deepseek", "qwen", "google", "anthropic", "bytedance", "others"]                         | 
					
					
						
						| 
							 | 
						                    ) | 
					
					
						
						| 
							 | 
						                ), | 
					
					
						
						| 
							 | 
						                elem_id="leaderboard-table", | 
					
					
						
						| 
							 | 
						            ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						             | 
					
					
						
						| 
							 | 
						            with gr.Accordion("Metric Explanations", open=False): | 
					
					
						
						| 
							 | 
						                gr.Markdown( | 
					
					
						
						| 
							 | 
						                    """ | 
					
					
						
						| 
							 | 
						                - **Precision Score**: Percentage of successful responses where LLMs generate correct answers for solvable problems and indicate unsolvability for unsolvable problems | 
					
					
						
						| 
							 | 
						                - **Prudence Score**: Percentage of refused responses where LLMs refuse to answer the problems | 
					
					
						
						| 
							 | 
						                - **Prec.(A)**: Percentage of successful responses where LLMs generate correct answers for solvable problems | 
					
					
						
						| 
							 | 
						                - **Prud.(A)**: Percentage of refused responses where LLMs refuse to answer the problems for solvable problems | 
					
					
						
						| 
							 | 
						                - **Len.(A)**: Avaraged length of LLM generations for solvable problems | 
					
					
						
						| 
							 | 
						                - **Prec.(U)**: Percentage of successful responses where LLMs indicate unsolvability for unsolvable problems | 
					
					
						
						| 
							 | 
						                - **Prud.(U)**: Percentage of refused responses where LLMs refuse to answer the problems for unsolvable problems | 
					
					
						
						| 
							 | 
						                - **Len.(U)**: Avaraged length of LLM generations for unsolvable problems | 
					
					
						
						| 
							 | 
						                """ | 
					
					
						
						| 
							 | 
						                ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						        with gr.TabItem("About"): | 
					
					
						
						| 
							 | 
						            gr.Markdown(open("about.md", "r").read() | 
					
					
						
						| 
							 | 
						            ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    def update_table(search, sizes, sort_by, type_by, arch_filters): | 
					
					
						
						| 
							 | 
						        filtered_df = filter_and_search_models(search, sizes, sort_by, type_by, arch_filters) | 
					
					
						
						| 
							 | 
						        model_count = f"**Showing {len(filtered_df)} models**" | 
					
					
						
						| 
							 | 
						        return create_html_table(filtered_df), model_count | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    search_box.change( | 
					
					
						
						| 
							 | 
						        fn=update_table, | 
					
					
						
						| 
							 | 
						        inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], | 
					
					
						
						| 
							 | 
						        outputs=[results_table, total_models], | 
					
					
						
						| 
							 | 
						    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    size_checkboxes.change( | 
					
					
						
						| 
							 | 
						        fn=update_table, | 
					
					
						
						| 
							 | 
						        inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], | 
					
					
						
						| 
							 | 
						        outputs=[results_table, total_models], | 
					
					
						
						| 
							 | 
						    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    sort_dropdown.change( | 
					
					
						
						| 
							 | 
						        fn=update_table, | 
					
					
						
						| 
							 | 
						        inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], | 
					
					
						
						| 
							 | 
						        outputs=[results_table, total_models], | 
					
					
						
						| 
							 | 
						    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    type_sort.change( | 
					
					
						
						| 
							 | 
						        fn=update_table, | 
					
					
						
						| 
							 | 
						        inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], | 
					
					
						
						| 
							 | 
						        outputs=[results_table, total_models], | 
					
					
						
						| 
							 | 
						    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						    architecture_checkboxes.change( | 
					
					
						
						| 
							 | 
						        fn=update_table, | 
					
					
						
						| 
							 | 
						        inputs=[search_box, size_checkboxes, sort_dropdown, type_sort, architecture_checkboxes], | 
					
					
						
						| 
							 | 
						        outputs=[results_table, total_models], | 
					
					
						
						| 
							 | 
						    ) | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						     | 
					
					
						
						| 
							 | 
						    app.css = """ | 
					
					
						
						| 
							 | 
						    .leaderboard-container { | 
					
					
						
						| 
							 | 
						        margin-top: 20px; | 
					
					
						
						| 
							 | 
						        max-height: 600px; | 
					
					
						
						| 
							 | 
						        overflow-y: auto; | 
					
					
						
						| 
							 | 
						        border-radius: 8px; | 
					
					
						
						| 
							 | 
						        border: 1px solid #e9ecef; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .leaderboard-table { | 
					
					
						
						| 
							 | 
						        width: 100%; | 
					
					
						
						| 
							 | 
						        border-collapse: collapse; | 
					
					
						
						| 
							 | 
						        font-size: 14px; | 
					
					
						
						| 
							 | 
						        background: white; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .leaderboard-table th { | 
					
					
						
						| 
							 | 
						        background-color: #f8f9fa; | 
					
					
						
						| 
							 | 
						        font-weight: 600; | 
					
					
						
						| 
							 | 
						        padding: 12px 8px; | 
					
					
						
						| 
							 | 
						        text-align: center; | 
					
					
						
						| 
							 | 
						        border-bottom: 2px solid #dee2e6; | 
					
					
						
						| 
							 | 
						        position: sticky; | 
					
					
						
						| 
							 | 
						        top: 0; | 
					
					
						
						| 
							 | 
						        z-index: 10; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .leaderboard-table th:first-child { | 
					
					
						
						| 
							 | 
						        width: 60px; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .leaderboard-table td { | 
					
					
						
						| 
							 | 
						        padding: 10px 8px; | 
					
					
						
						| 
							 | 
						        border-bottom: 1px solid #f1f3f4; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .leaderboard-table tbody tr:hover { | 
					
					
						
						| 
							 | 
						        background-color: #f8f9fa; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .rank-cell { | 
					
					
						
						| 
							 | 
						        text-align: center; | 
					
					
						
						| 
							 | 
						        font-weight: 600; | 
					
					
						
						| 
							 | 
						        color: #444; | 
					
					
						
						| 
							 | 
						        background-color: #f8f9fa; | 
					
					
						
						| 
							 | 
						        width: 60px; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .model-cell { | 
					
					
						
						| 
							 | 
						        font-weight: 500; | 
					
					
						
						| 
							 | 
						        max-width: 400px; | 
					
					
						
						| 
							 | 
						        word-wrap: break-word; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .model-link { | 
					
					
						
						| 
							 | 
						        color: #0066cc !important; | 
					
					
						
						| 
							 | 
						        text-decoration: none !important; | 
					
					
						
						| 
							 | 
						        font-weight: 500 !important; | 
					
					
						
						| 
							 | 
						        transition: all 0.2s ease !important; | 
					
					
						
						| 
							 | 
						        border-bottom: 1px solid transparent !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .model-link:hover { | 
					
					
						
						| 
							 | 
						        color: #0052a3 !important; | 
					
					
						
						| 
							 | 
						        border-bottom: 1px solid #0066cc !important; | 
					
					
						
						| 
							 | 
						        background-color: rgba(0, 102, 204, 0.05) !important; | 
					
					
						
						| 
							 | 
						        padding: 2px 4px !important; | 
					
					
						
						| 
							 | 
						        border-radius: 4px !important; | 
					
					
						
						| 
							 | 
						        margin: -2px -4px !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-cell { | 
					
					
						
						| 
							 | 
						        text-align: center; | 
					
					
						
						| 
							 | 
						        font-weight: 500; | 
					
					
						
						| 
							 | 
						        color: #666; | 
					
					
						
						| 
							 | 
						        min-width: 60px; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .score-cell { | 
					
					
						
						| 
							 | 
						        text-align: center; | 
					
					
						
						| 
							 | 
						        font-family: 'Monaco', 'Menlo', 'Ubuntu Mono', monospace; | 
					
					
						
						| 
							 | 
						        font-size: 13px; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Model family row styling */ | 
					
					
						
						| 
							 | 
						    .llama-row { | 
					
					
						
						| 
							 | 
						        background-color: #fffbf0; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .llama-row:hover { | 
					
					
						
						| 
							 | 
						        background-color: #fef7e0; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .deepseek-row { | 
					
					
						
						| 
							 | 
						        background-color: #f0f8ff; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .deepseek-row:hover { | 
					
					
						
						| 
							 | 
						        background-color: #e6f3ff; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .qwen-row { | 
					
					
						
						| 
							 | 
						        background-color: #f5fff5; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .qwen-row:hover { | 
					
					
						
						| 
							 | 
						        background-color: #eaffea; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .google-row { | 
					
					
						
						| 
							 | 
						        background-color: #fff0f5; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .google-row:hover { | 
					
					
						
						| 
							 | 
						        background-color: #ffe6f0; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .mistral-row { | 
					
					
						
						| 
							 | 
						        background-color: #faf5ff; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .mistral-row:hover { | 
					
					
						
						| 
							 | 
						        background-color: #f3e8ff; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .others-row { | 
					
					
						
						| 
							 | 
						        background-color: #f8fafc; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .others-row:hover { | 
					
					
						
						| 
							 | 
						        background-color: #f1f5f9; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-filter { | 
					
					
						
						| 
							 | 
						        margin-top: 10px; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-filter > div { | 
					
					
						
						| 
							 | 
						        display: flex !important; | 
					
					
						
						| 
							 | 
						        flex-wrap: wrap !important; | 
					
					
						
						| 
							 | 
						        gap: 8px !important; | 
					
					
						
						| 
							 | 
						        align-items: center !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-filter label { | 
					
					
						
						| 
							 | 
						        display: flex !important; | 
					
					
						
						| 
							 | 
						        align-items: center !important; | 
					
					
						
						| 
							 | 
						        background: #f8f9fa !important; | 
					
					
						
						| 
							 | 
						        border: 2px solid #e9ecef !important; | 
					
					
						
						| 
							 | 
						        border-radius: 8px !important; | 
					
					
						
						| 
							 | 
						        padding: 8px 12px !important; | 
					
					
						
						| 
							 | 
						        margin: 0 !important; | 
					
					
						
						| 
							 | 
						        cursor: pointer !important; | 
					
					
						
						| 
							 | 
						        transition: all 0.2s ease !important; | 
					
					
						
						| 
							 | 
						        font-weight: 500 !important; | 
					
					
						
						| 
							 | 
						        font-size: 14px !important; | 
					
					
						
						| 
							 | 
						        color: #495057 !important; | 
					
					
						
						| 
							 | 
						        min-width: 70px !important; | 
					
					
						
						| 
							 | 
						        justify-content: center !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-filter label:hover { | 
					
					
						
						| 
							 | 
						        background: #e9ecef !important; | 
					
					
						
						| 
							 | 
						        border-color: #6c757d !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-filter input[type="checkbox"] { | 
					
					
						
						| 
							 | 
						        display: none !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-filter input[type="checkbox"]:checked + span { | 
					
					
						
						| 
							 | 
						        background: #0d6efd !important; | 
					
					
						
						| 
							 | 
						        color: white !important; | 
					
					
						
						| 
							 | 
						        border-color: #0d6efd !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-filter label:has(input[type="checkbox"]:checked) { | 
					
					
						
						| 
							 | 
						        background: #0d6efd !important; | 
					
					
						
						| 
							 | 
						        color: white !important; | 
					
					
						
						| 
							 | 
						        border-color: #0d6efd !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 4px rgba(13, 110, 253, 0.2) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter { | 
					
					
						
						| 
							 | 
						        margin-top: 10px; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter > div { | 
					
					
						
						| 
							 | 
						        display: flex !important; | 
					
					
						
						| 
							 | 
						        flex-wrap: wrap !important; | 
					
					
						
						| 
							 | 
						        gap: 8px !important; | 
					
					
						
						| 
							 | 
						        align-items: center !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter label { | 
					
					
						
						| 
							 | 
						        display: flex !important; | 
					
					
						
						| 
							 | 
						        align-items: center !important; | 
					
					
						
						| 
							 | 
						        border-radius: 8px !important; | 
					
					
						
						| 
							 | 
						        padding: 8px 12px !important; | 
					
					
						
						| 
							 | 
						        margin: 0 !important; | 
					
					
						
						| 
							 | 
						        cursor: pointer !important; | 
					
					
						
						| 
							 | 
						        transition: all 0.2s ease !important; | 
					
					
						
						| 
							 | 
						        font-weight: 500 !important; | 
					
					
						
						| 
							 | 
						        font-size: 14px !important; | 
					
					
						
						| 
							 | 
						        min-width: 140px !important; | 
					
					
						
						| 
							 | 
						        justify-content: center !important; | 
					
					
						
						| 
							 | 
						        border: 2px solid !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter label:hover { | 
					
					
						
						| 
							 | 
						        transform: translateY(-1px); | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 8px rgba(0,0,0,0.1) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter input[type="checkbox"] { | 
					
					
						
						| 
							 | 
						        display: none !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Llama styling */ | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(1) { | 
					
					
						
						| 
							 | 
						        background: #fffbf0 !important; | 
					
					
						
						| 
							 | 
						        border-color: #f7e6a3 !important; | 
					
					
						
						| 
							 | 
						        color: #8b4513 !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(1):has(input[type="checkbox"]:checked) { | 
					
					
						
						| 
							 | 
						        background: #f4a261 !important; | 
					
					
						
						| 
							 | 
						        border-color: #f4a261 !important; | 
					
					
						
						| 
							 | 
						        color: white !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 4px rgba(244, 162, 97, 0.3) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* DeepSeek styling */ | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(2) { | 
					
					
						
						| 
							 | 
						        background: #f0f8ff !important; | 
					
					
						
						| 
							 | 
						        border-color: #b3d9ff !important; | 
					
					
						
						| 
							 | 
						        color: #1e40af !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(2):has(input[type="checkbox"]:checked) { | 
					
					
						
						| 
							 | 
						        background: #3b82f6 !important; | 
					
					
						
						| 
							 | 
						        border-color: #3b82f6 !important; | 
					
					
						
						| 
							 | 
						        color: white !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Qwen styling */ | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(3) { | 
					
					
						
						| 
							 | 
						        background: #f5fff5 !important; | 
					
					
						
						| 
							 | 
						        border-color: #b3ffb3 !important; | 
					
					
						
						| 
							 | 
						        color: #15803d !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(3):has(input[type="checkbox"]:checked) { | 
					
					
						
						| 
							 | 
						        background: #22c55e !important; | 
					
					
						
						| 
							 | 
						        border-color: #22c55e !important; | 
					
					
						
						| 
							 | 
						        color: white !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 4px rgba(34, 197, 94, 0.3) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Google styling */ | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(4) { | 
					
					
						
						| 
							 | 
						        background: #fff0f5 !important; | 
					
					
						
						| 
							 | 
						        border-color: #ffb3d9 !important; | 
					
					
						
						| 
							 | 
						        color: #be185d !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(4):has(input[type="checkbox"]:checked) { | 
					
					
						
						| 
							 | 
						        background: #ec4899 !important; | 
					
					
						
						| 
							 | 
						        border-color: #ec4899 !important; | 
					
					
						
						| 
							 | 
						        color: white !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 4px rgba(236, 72, 153, 0.3) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Mistral styling */ | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(5) { | 
					
					
						
						| 
							 | 
						        background: #faf5ff !important; | 
					
					
						
						| 
							 | 
						        border-color: #d8b4fe !important; | 
					
					
						
						| 
							 | 
						        color: #7c3aed !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(5):has(input[type="checkbox"]:checked) { | 
					
					
						
						| 
							 | 
						        background: #8b5cf6 !important; | 
					
					
						
						| 
							 | 
						        border-color: #8b5cf6 !important; | 
					
					
						
						| 
							 | 
						        color: white !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 4px rgba(139, 92, 246, 0.3) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Others styling */ | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(6) { | 
					
					
						
						| 
							 | 
						        background: #f8fafc !important; | 
					
					
						
						| 
							 | 
						        border-color: #cbd5e1 !important; | 
					
					
						
						| 
							 | 
						        color: #475569 !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .architecture-filter label:nth-child(6):has(input[type="checkbox"]:checked) { | 
					
					
						
						| 
							 | 
						        background: #64748b !important; | 
					
					
						
						| 
							 | 
						        border-color: #64748b !important; | 
					
					
						
						| 
							 | 
						        color: white !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 4px rgba(100, 116, 139, 0.3) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Search and Filter Section Styling */ | 
					
					
						
						| 
							 | 
						    .search-input input { | 
					
					
						
						| 
							 | 
						        border: 2px solid #e9ecef !important; | 
					
					
						
						| 
							 | 
						        border-radius: 12px !important; | 
					
					
						
						| 
							 | 
						        padding: 12px 16px !important; | 
					
					
						
						| 
							 | 
						        font-size: 14px !important; | 
					
					
						
						| 
							 | 
						        transition: all 0.3s ease !important; | 
					
					
						
						| 
							 | 
						        background: linear-gradient(135deg, #f8f9fa 0%, #ffffff 100%) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .search-input input:focus { | 
					
					
						
						| 
							 | 
						        border-color: #6366f1 !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1) !important; | 
					
					
						
						| 
							 | 
						        background: white !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .search-input input::placeholder { | 
					
					
						
						| 
							 | 
						        color: #6b7280 !important; | 
					
					
						
						| 
							 | 
						        font-style: italic !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Modern Sort Dropdown Styling */ | 
					
					
						
						| 
							 | 
						    .sort-dropdown-modern label { | 
					
					
						
						| 
							 | 
						        font-weight: 600 !important; | 
					
					
						
						| 
							 | 
						        color: #374151 !important; | 
					
					
						
						| 
							 | 
						        margin-bottom: 8px !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .sort-dropdown-modern .wrap { | 
					
					
						
						| 
							 | 
						        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | 
					
					
						
						| 
							 | 
						        border-radius: 12px !important; | 
					
					
						
						| 
							 | 
						        padding: 2px !important; | 
					
					
						
						| 
							 | 
						        border: none !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .sort-dropdown-modern select { | 
					
					
						
						| 
							 | 
						        background: white !important; | 
					
					
						
						| 
							 | 
						        border: none !important; | 
					
					
						
						| 
							 | 
						        border-radius: 10px !important; | 
					
					
						
						| 
							 | 
						        padding: 12px 16px !important; | 
					
					
						
						| 
							 | 
						        font-size: 14px !important; | 
					
					
						
						| 
							 | 
						        font-weight: 500 !important; | 
					
					
						
						| 
							 | 
						        color: #374151 !important; | 
					
					
						
						| 
							 | 
						        cursor: pointer !important; | 
					
					
						
						| 
							 | 
						        transition: all 0.3s ease !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 2px 4px rgba(0,0,0,0.1) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .sort-dropdown-modern select:hover { | 
					
					
						
						| 
							 | 
						        box-shadow: 0 4px 8px rgba(0,0,0,0.15) !important; | 
					
					
						
						| 
							 | 
						        transform: translateY(-1px) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .sort-dropdown-modern select:focus { | 
					
					
						
						| 
							 | 
						        outline: none !important; | 
					
					
						
						| 
							 | 
						        box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.2) !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Section Headers */ | 
					
					
						
						| 
							 | 
						    h3 { | 
					
					
						
						| 
							 | 
						        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; | 
					
					
						
						| 
							 | 
						        -webkit-background-clip: text !important; | 
					
					
						
						| 
							 | 
						        -webkit-text-fill-color: transparent !important; | 
					
					
						
						| 
							 | 
						        background-clip: text !important; | 
					
					
						
						| 
							 | 
						        margin-bottom: 12px !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Centered Architecture Section */ | 
					
					
						
						| 
							 | 
						    .centered-title { | 
					
					
						
						| 
							 | 
						        text-align: center !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .centered-filter > div { | 
					
					
						
						| 
							 | 
						        display: flex !important; | 
					
					
						
						| 
							 | 
						        flex-wrap: wrap !important; | 
					
					
						
						| 
							 | 
						        gap: 8px !important; | 
					
					
						
						| 
							 | 
						        align-items: center !important; | 
					
					
						
						| 
							 | 
						        justify-content: center !important; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    .size-filter { | 
					
					
						
						| 
							 | 
						        margin-top: 10px; | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						    /* Dark Mode Specific Styles */ | 
					
					
						
						| 
							 | 
						    @media (prefers-color-scheme: dark) { | 
					
					
						
						| 
							 | 
						        .leaderboard-table { | 
					
					
						
						| 
							 | 
						            background: #1f2937 !important; | 
					
					
						
						| 
							 | 
						            color: #f9fafb !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .leaderboard-table th { | 
					
					
						
						| 
							 | 
						            background-color: #374151 !important; | 
					
					
						
						| 
							 | 
						            color: #f9fafb !important; | 
					
					
						
						| 
							 | 
						            border-bottom: 2px solid #4b5563 !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .leaderboard-table td { | 
					
					
						
						| 
							 | 
						            color: #f9fafb !important; | 
					
					
						
						| 
							 | 
						            border-bottom: 1px solid #374151 !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .leaderboard-table tbody tr:hover { | 
					
					
						
						| 
							 | 
						            background-color: #374151 !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .rank-cell { | 
					
					
						
						| 
							 | 
						            background-color: #374151 !important; | 
					
					
						
						| 
							 | 
						            color: #f9fafb !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .model-cell { | 
					
					
						
						| 
							 | 
						            color: #f9fafb !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .size-cell { | 
					
					
						
						| 
							 | 
						            color: #d1d5db !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .score-cell { | 
					
					
						
						| 
							 | 
						            color: #f9fafb !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        /* Dark mode row colors with better contrast */ | 
					
					
						
						| 
							 | 
						        .llama-row { | 
					
					
						
						| 
							 | 
						            background-color: rgba(245, 158, 11, 0.1) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .llama-row:hover { | 
					
					
						
						| 
							 | 
						            background-color: rgba(245, 158, 11, 0.2) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .deepseek-row { | 
					
					
						
						| 
							 | 
						            background-color: rgba(59, 130, 246, 0.1) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .deepseek-row:hover { | 
					
					
						
						| 
							 | 
						            background-color: rgba(59, 130, 246, 0.2) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .qwen-row { | 
					
					
						
						| 
							 | 
						            background-color: rgba(34, 197, 94, 0.1) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .qwen-row:hover { | 
					
					
						
						| 
							 | 
						            background-color: rgba(34, 197, 94, 0.2) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .google-row { | 
					
					
						
						| 
							 | 
						            background-color: rgba(236, 72, 153, 0.2) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .google-row:hover { | 
					
					
						
						| 
							 | 
						            background-color: rgba(236, 72, 153, 0.2) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .mistral-row { | 
					
					
						
						| 
							 | 
						            background-color: rgba(139, 92, 246, 0.1) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .mistral-row:hover { | 
					
					
						
						| 
							 | 
						            background-color: rgba(139, 92, 246, 0.2) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .others-row { | 
					
					
						
						| 
							 | 
						            background-color: rgba(107, 114, 128, 0.1) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .others-row:hover { | 
					
					
						
						| 
							 | 
						            background-color: rgba(107, 114, 128, 0.2) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .leaderboard-container { | 
					
					
						
						| 
							 | 
						            border: 1px solid #4b5563 !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .model-cell { | 
					
					
						
						| 
							 | 
						            color: #f9fafb !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .model-link { | 
					
					
						
						| 
							 | 
						            color: #60a5fa !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .model-link:hover { | 
					
					
						
						| 
							 | 
						            color: #93c5fd !important; | 
					
					
						
						| 
							 | 
						            border-bottom: 1px solid #60a5fa !important; | 
					
					
						
						| 
							 | 
						            background-color: rgba(96, 165, 250, 0.1) !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						        .size-cell { | 
					
					
						
						| 
							 | 
						            color: #d1d5db !important; | 
					
					
						
						| 
							 | 
						        } | 
					
					
						
						| 
							 | 
						    } | 
					
					
						
						| 
							 | 
						    """ | 
					
					
						
						| 
							 | 
						
 | 
					
					
						
						| 
							 | 
						 | 
					
					
						
						| 
							 | 
						if __name__ == "__main__": | 
					
					
						
						| 
							 | 
						    app.launch() | 
					
					
						
						| 
							 | 
						
 |