Spaces:
Running
Running
| import pandas as pd | |
| import gradio as gr | |
| import csv | |
| import json | |
| import os | |
| import shutil | |
| from huggingface_hub import Repository | |
| import numpy as np | |
| # Load the JSON data | |
| with open("./static/eval_results/all_model_keywords_stats.json", "r") as f: | |
| MODEL_DATA = json.load(f) | |
| with open("./static/eval_results/all_summary.json", "r") as f: | |
| SUMMARY_DATA = json.load(f) | |
| # Define model name mapping | |
| MODEL_NAME_MAP = { | |
| "GPT_4o": "GPT-4o (0513)", | |
| "Claude_3.5": "Claude-3.5-Sonnet", | |
| "Gemini_1.5_pro_002": "Gemini-1.5-Pro-002", | |
| "InternVL2_76B": "InternVL2-Llama3-76B", | |
| "Qwen2_VL_72B": "Qwen2-VL-72B", | |
| "llava_onevision_72B": "Llava-OneVision-72B", | |
| "GPT_4o_mini": "GPT-4o mini", | |
| "Gemini_1.5_flash_002": "Gemini-1.5-Flash-002", | |
| "Pixtral_12B": "Pixtral 12B", | |
| "Qwen2_VL_7B": "Qwen2-VL-7B", | |
| "InternVL2_8B": "InternVL2-8B", | |
| "llava_onevision_7B": "Llava-OneVision-7B", | |
| "Llama_3_2_11B": "Llama-3.2-11B", | |
| "Phi-3.5-vision": "Phi-3.5-Vision", | |
| "MiniCPM_v2.6": "MiniCPM-V2.6", | |
| "Idefics3": "Idefics3-8B-Llama3", | |
| } | |
| # Custom name mapping for dimensions and keywords | |
| DIMENSION_NAME_MAP = { | |
| "skills": "Skills", | |
| "input_format": "Input Format", | |
| "output_format": "Output Format", | |
| "input_num": "Visual Input Number", | |
| "app": "Application" | |
| } | |
| KEYWORD_NAME_MAP = { | |
| # Skills | |
| "Object Recognition and Classification": "Object Recognition", | |
| "Text Recognition (OCR)": "OCR", | |
| "Language Understanding and Generation": "Language", | |
| "Scene and Event Understanding": "Scene/Event", | |
| "Mathematical and Logical Reasoning": "Math/Logic", | |
| "Commonsense and Social Reasoning": "Commonsense", | |
| "Ethical and Safety Reasoning": "Ethics/Safety", | |
| "Domain-Specific Knowledge and Skills": "Domain-Specific", | |
| "Spatial and Temporal Reasoning": "Spatial/Temporal", | |
| "Planning and Decision Making": "Planning/Decision", | |
| # Input Format | |
| 'User Interface Screenshots': "UI related", | |
| 'Text-Based Images and Documents': "Documents", | |
| 'Diagrams and Data Visualizations': "Infographics", | |
| 'Videos': "Videos", | |
| 'Artistic and Creative Content': "Arts/Creative", | |
| 'Photographs': "Photographs", | |
| '3D Models and Aerial Imagery': "3D related", | |
| # Application | |
| 'Information_Extraction': "Info Extraction", | |
| 'Planning' : "Planning", | |
| 'Coding': "Coding", | |
| 'Perception': "Perception", | |
| 'Metrics': "Metrics", | |
| 'Science': "Science", | |
| 'Knowledge': "Knowledge", | |
| 'Mathematics': "Math", | |
| # Output format | |
| 'contextual_formatted_text': "Contexual", | |
| 'structured_output': "Structured", | |
| 'exact_text': "Exact", | |
| 'numerical_data': "Numerical", | |
| 'open_ended_output': "Open-ended", | |
| 'multiple_choice': "MC", | |
| "6-8 images": "6-8 imgs", | |
| "1-image": "1 img", | |
| "2-3 images": "2-3 imgs", | |
| "4-5 images": "4-5 imgs", | |
| "9-image or more": "9+ imgs", | |
| "video": "Video", | |
| } | |
| # Extract super groups (dimensions) and their keywords | |
| SUPER_GROUPS = {DIMENSION_NAME_MAP[dim]: [KEYWORD_NAME_MAP.get(k, k) for k in MODEL_DATA[next(iter(MODEL_DATA))][dim].keys()] | |
| for dim in MODEL_DATA[next(iter(MODEL_DATA))]} | |
| SUBMISSION_NAME = "test_leaderboard_submission" | |
| SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/cccjc/", SUBMISSION_NAME) | |
| CSV_DIR = "./test_leaderboard_submission/results.csv" | |
| def get_original_dimension(mapped_dimension): | |
| return next(k for k, v in DIMENSION_NAME_MAP.items() if v == mapped_dimension) | |
| def get_original_keyword(mapped_keyword): | |
| return next((k for k, v in KEYWORD_NAME_MAP.items() if v == mapped_keyword), mapped_keyword) | |
| # Define model groups | |
| MODEL_GROUPS = { | |
| "All": list(MODEL_DATA.keys()), | |
| "Flagship Models": ['GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002', 'Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B'], | |
| "Efficienty Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini', 'Qwen2_VL_7B', 'Pixtral_12B', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3'], | |
| "Proprietary Flagship models": ['GPT_4o', 'Claude_3.5', 'Gemini_1.5_pro_002'], | |
| "Open-source Efficienty Models": ['Qwen2_VL_7B', 'Pixtral_12B', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3'], | |
| "Open-source Flagship Models": ['Qwen2_VL_72B', 'InternVL2_76B', 'llava_onevision_72B'], | |
| "Proprietary Efficienty Models": ['Gemini_1.5_flash_002', 'GPT_4o_mini', 'Qwen2_VL_7B', 'Pixtral_12B', 'InternVL2_8B', 'Phi-3.5-vision', 'MiniCPM_v2.6', 'llava_onevision_7B', 'Llama_3_2_11B', 'Idefics3'], | |
| } | |
| def get_display_model_name(model_name): | |
| return MODEL_NAME_MAP.get(model_name, model_name) | |
| def get_df(selected_super_group, selected_model_group): | |
| original_dimension = get_original_dimension(selected_super_group) | |
| data = [] | |
| for model in MODEL_GROUPS[selected_model_group]: | |
| model_data = MODEL_DATA[model] | |
| summary = SUMMARY_DATA[model] | |
| core_score = max(summary["core_noncot"]["macro_mean_score"], summary["core_cot"]["macro_mean_score"]) | |
| row = { | |
| "Models": get_display_model_name(model), # Use the mapped name | |
| "Overall": round(summary["overall_score"] * 100, 2), | |
| "Core": round(core_score * 100, 2), | |
| "Open-ended": round(summary["open"]["macro_mean_score"] * 100, 2) | |
| } | |
| for keyword in SUPER_GROUPS[selected_super_group]: | |
| original_keyword = get_original_keyword(keyword) | |
| if original_dimension in model_data and original_keyword in model_data[original_dimension]: | |
| row[keyword] = round(model_data[original_dimension][original_keyword]["average_score"] * 100, 2) | |
| else: | |
| row[keyword] = None | |
| data.append(row) | |
| df = pd.DataFrame(data) | |
| df = df.sort_values(by="Overall", ascending=False) | |
| return df | |
| def get_leaderboard_data(selected_super_group, selected_model_group): | |
| df = get_df(selected_super_group, selected_model_group) | |
| headers = ["Models", "Overall", "Core", "Open-ended"] + SUPER_GROUPS[selected_super_group] | |
| data = df[headers].values.tolist() | |
| return headers, data | |