Spaces:
Sleeping
Sleeping
| import os | |
| import glob | |
| import json | |
| import pandas as pd | |
| import gradio as gr | |
| import re | |
| # --- Constants and Configuration --- | |
| # Set the path to your data directory. | |
| # The script will change its working directory to this path if it exists. | |
| ABS_DATA_PATH = "data" | |
| if os.path.exists(ABS_DATA_PATH): | |
| os.chdir(ABS_DATA_PATH) | |
| AITW_DATA_ROOT = "." | |
| MODEL_DISPLAY_MAPPING = { | |
| "gpt": "OpenAI o1", | |
| "gemini": "Gemini 2.5 Pro", | |
| "qwen": "Qwen 2.5 VL 72B" | |
| } | |
| MODELS_IN_ORDER = ["gpt", "gemini", "qwen"] | |
| MAX_CARDS_TO_DISPLAY = 50 # Let's create placeholders for up to 50 items. | |
| # --- Data Loading Logic (Unchanged) --- | |
| def load_and_prepare_data(data_root_path): | |
| """ | |
| Loads step data from JSON files and prepares it as a list of dictionaries. | |
| """ | |
| primary_model_dir = os.path.join(data_root_path, MODELS_IN_ORDER[0]) | |
| if not os.path.isdir(primary_model_dir): | |
| print(f"Error: Primary model directory not found at '{primary_model_dir}'") | |
| return [] | |
| all_steps = [] | |
| json_files = glob.glob(os.path.join(primary_model_dir, "*.json")) | |
| for json_path in json_files: | |
| with open(json_path, 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| for episode_id, episode_data in data.items(): | |
| for step in episode_data.get("steps", []): | |
| question_block = step.get("questions", {}) | |
| question = question_block.get("question", "N/A") | |
| options = question_block.get("options", []) | |
| answer_index = question_block.get("correct_answer_index") | |
| correct_option_text = "N/A" | |
| if answer_index is not None and 0 <= int(answer_index) < len(options): | |
| correct_option_text = options[int(answer_index)] | |
| image_paths = {} | |
| base_screenshot_path = step.get("screenshot_path", "").lstrip("/") | |
| for model_key in MODELS_IN_ORDER: | |
| img_path = os.path.join(data_root_path, model_key, base_screenshot_path) | |
| image_paths[model_key] = img_path | |
| step_info = { | |
| "episode_goal": episode_data.get("episode_goal", "N/A"), | |
| "question": question, | |
| "options": options, | |
| "correct_option": correct_option_text, | |
| "image_paths": image_paths | |
| } | |
| all_steps.append(step_info) | |
| return all_steps | |
| # --- CSS for a better, full-width layout (Unchanged) --- | |
| app_css = """ | |
| .gradio-container { max-width: 95% !important; } | |
| .comparison-card { | |
| border: 1px solid #E5E7EB; border-radius: 8px; padding: 1rem; | |
| margin-bottom: 1.5rem; box-shadow: 0 1px 3px 0 rgba(0,0,0,0.1), 0 1px 2px 0 rgba(0,0,0,0.06); | |
| } | |
| .card-title { | |
| font-size: 1.1rem; font-weight: 600; color: #1F2937; | |
| border-bottom: 1px solid #F3F4F6; padding-bottom: 0.5rem; margin-bottom: 1rem; | |
| } | |
| .info-column { min-width: 300px; } | |
| .image-column .label-wrapper { display: none !important; } | |
| .model-title { text-align: center; font-weight: 500; color: #4B5563; } | |
| """ | |
| # --- Gradio Interface --- | |
| with gr.Blocks(theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_sm), css=app_css) as demo: | |
| gr.Markdown("# AITW Benchmark Visualizer") | |
| gr.Markdown("Visual comparison of model outputs for the Android in the Wild (AITW) benchmark.") | |
| # --- Create Static Placeholders --- | |
| # We will create a fixed number of hidden cards and then make them visible with data. | |
| placeholder_components = [] | |
| for i in range(MAX_CARDS_TO_DISPLAY): | |
| with gr.Group(visible=False) as card_group: | |
| card_title = gr.Markdown(elem_classes=["card-title"]) | |
| with gr.Row(): | |
| with gr.Column(scale=1, elem_classes=["info-column"]): | |
| info_md = gr.Markdown() | |
| with gr.Column(scale=3): | |
| with gr.Row(): | |
| image_outputs = {} | |
| for model_key in MODELS_IN_ORDER: | |
| with gr.Column(elem_classes=["image-column"]): | |
| gr.Markdown(f"<h4 class='model-title'>{MODEL_DISPLAY_MAPPING[model_key]}</h4>") | |
| image_outputs[model_key] = gr.Image( | |
| show_label=False, show_download_button=True, interactive=False, | |
| height=350, show_fullscreen_button=True | |
| ) | |
| placeholder_components.append({ | |
| "card": card_group, | |
| "title": card_title, | |
| "info": info_md, | |
| "images": image_outputs | |
| }) | |
| # --- Function to update the placeholders --- | |
| def load_and_update_ui(): | |
| print("Loading and preparing AITW data...") | |
| all_steps = load_and_prepare_data(AITW_DATA_ROOT) | |
| if not all_steps: | |
| gr.Warning(f"No data loaded. Please check that the '{AITW_DATA_ROOT}' directory is structured correctly.") | |
| else: | |
| print(f"Successfully loaded {len(all_steps)} steps. Updating UI...") | |
| # Create a flat list of updates for all components | |
| updates = [] | |
| num_steps_to_show = min(len(all_steps), MAX_CARDS_TO_DISPLAY) | |
| for i in range(MAX_CARDS_TO_DISPLAY): | |
| if i < num_steps_to_show: | |
| step_data = all_steps[i] | |
| # Update card visibility and title | |
| updates.append(gr.update(visible=True)) | |
| updates.append(gr.update(value=f"### Main Goal: {step_data['episode_goal']}")) | |
| # Update text info | |
| text_content = f""" | |
| **Question:** | |
| <p>{step_data['question']}</p> | |
| **Options:** | |
| <ol style="margin-top: 5px; padding-left: 20px;"> | |
| {''.join([f'<li>{opt}</li>' for opt in step_data['options']])} | |
| </ol> | |
| **Correct Answer:** | |
| <p style="color:green; font-weight:bold;">{step_data['correct_option']}</p> | |
| """ | |
| updates.append(gr.update(value=text_content)) | |
| # Update images | |
| for model_key in MODELS_IN_ORDER: | |
| img_path = step_data['image_paths'].get(model_key) | |
| updates.append(gr.update(value=img_path if os.path.exists(img_path) else None)) | |
| else: | |
| # Hide unused placeholder cards | |
| updates.append(gr.update(visible=False)) # Card group | |
| updates.append(gr.update(value="")) # Title | |
| updates.append(gr.update(value="")) # Info MD | |
| for model_key in MODELS_IN_ORDER: | |
| updates.append(gr.update(value=None)) # Images | |
| return updates | |
| # --- Flatten the list of placeholder components for the 'outputs' argument --- | |
| output_components_flat = [] | |
| for comp_dict in placeholder_components: | |
| output_components_flat.append(comp_dict['card']) | |
| output_components_flat.append(comp_dict['title']) | |
| output_components_flat.append(comp_dict['info']) | |
| for model_key in MODELS_IN_ORDER: | |
| output_components_flat.append(comp_dict['images'][model_key]) | |
| # --- Event Wiring --- | |
| demo.load(fn=load_and_update_ui, inputs=None, outputs=output_components_flat) | |
| if __name__ == "__main__": | |
| demo.launch(share=True, debug=True) |