Spaces:

advaitgupta
/

MCQ_Comparison

Sleeping

App Files Files Community

MCQ_Comparison / visualizer.py

advaitgupta

Update visualizer.py

3718d91 verified 4 months ago

raw

history blame contribute delete

7.47 kB


	import os
	import glob
	import json
	import pandas as pd
	import gradio as gr
	import re

	# --- Constants and Configuration ---
	# Set the path to your data directory.
	# The script will change its working directory to this path if it exists.
	ABS_DATA_PATH = "data"
	if os.path.exists(ABS_DATA_PATH):
	os.chdir(ABS_DATA_PATH)

	AITW_DATA_ROOT = "."

	MODEL_DISPLAY_MAPPING = {
	"gpt": "OpenAI o1",
	"gemini": "Gemini 2.5 Pro",
	"qwen": "Qwen 2.5 VL 72B"
	}
	MODELS_IN_ORDER = ["gpt", "gemini", "qwen"]
	MAX_CARDS_TO_DISPLAY = 50 # Let's create placeholders for up to 50 items.

	# --- Data Loading Logic (Unchanged) ---
	def load_and_prepare_data(data_root_path):
	"""
	Loads step data from JSON files and prepares it as a list of dictionaries.
	"""
	primary_model_dir = os.path.join(data_root_path, MODELS_IN_ORDER[0])
	if not os.path.isdir(primary_model_dir):
	print(f"Error: Primary model directory not found at '{primary_model_dir}'")
	return []

	all_steps = []
	json_files = glob.glob(os.path.join(primary_model_dir, "*.json"))

	for json_path in json_files:
	with open(json_path, 'r', encoding='utf-8') as f:
	data = json.load(f)

	for episode_id, episode_data in data.items():
	for step in episode_data.get("steps", []):
	question_block = step.get("questions", {})
	question = question_block.get("question", "N/A")
	options = question_block.get("options", [])
	answer_index = question_block.get("correct_answer_index")

	correct_option_text = "N/A"
	if answer_index is not None and 0 <= int(answer_index) < len(options):
	correct_option_text = options[int(answer_index)]

	image_paths = {}
	base_screenshot_path = step.get("screenshot_path", "").lstrip("/")
	for model_key in MODELS_IN_ORDER:
	img_path = os.path.join(data_root_path, model_key, base_screenshot_path)
	image_paths[model_key] = img_path

	step_info = {
	"episode_goal": episode_data.get("episode_goal", "N/A"),
	"question": question,
	"options": options,
	"correct_option": correct_option_text,
	"image_paths": image_paths
	}
	all_steps.append(step_info)
	return all_steps

	# --- CSS for a better, full-width layout (Unchanged) ---
	app_css = """
	.gradio-container { max-width: 95% !important; }
	.comparison-card {
	border: 1px solid #E5E7EB; border-radius: 8px; padding: 1rem;
	margin-bottom: 1.5rem; box-shadow: 0 1px 3px 0 rgba(0,0,0,0.1), 0 1px 2px 0 rgba(0,0,0,0.06);
	}
	.card-title {
	font-size: 1.1rem; font-weight: 600; color: #1F2937;
	border-bottom: 1px solid #F3F4F6; padding-bottom: 0.5rem; margin-bottom: 1rem;
	}
	.info-column { min-width: 300px; }
	.image-column .label-wrapper { display: none !important; }
	.model-title { text-align: center; font-weight: 500; color: #4B5563; }
	"""

	# --- Gradio Interface ---
	with gr.Blocks(theme=gr.themes.Default(spacing_size=gr.themes.sizes.spacing_sm), css=app_css) as demo:
	gr.Markdown("# AITW Benchmark Visualizer")
	gr.Markdown("Visual comparison of model outputs for the Android in the Wild (AITW) benchmark.")

	# --- Create Static Placeholders ---
	# We will create a fixed number of hidden cards and then make them visible with data.
	placeholder_components = []
	for i in range(MAX_CARDS_TO_DISPLAY):
	with gr.Group(visible=False) as card_group:
	card_title = gr.Markdown(elem_classes=["card-title"])
	with gr.Row():
	with gr.Column(scale=1, elem_classes=["info-column"]):
	info_md = gr.Markdown()
	with gr.Column(scale=3):
	with gr.Row():
	image_outputs = {}
	for model_key in MODELS_IN_ORDER:
	with gr.Column(elem_classes=["image-column"]):
	gr.Markdown(f"<h4 class='model-title'>{MODEL_DISPLAY_MAPPING[model_key]}</h4>")
	image_outputs[model_key] = gr.Image(
	show_label=False, show_download_button=True, interactive=False,
	height=350, show_fullscreen_button=True
	)
	placeholder_components.append({
	"card": card_group,
	"title": card_title,
	"info": info_md,
	"images": image_outputs
	})

	# --- Function to update the placeholders ---
	def load_and_update_ui():
	print("Loading and preparing AITW data...")
	all_steps = load_and_prepare_data(AITW_DATA_ROOT)

	if not all_steps:
	gr.Warning(f"No data loaded. Please check that the '{AITW_DATA_ROOT}' directory is structured correctly.")
	else:
	print(f"Successfully loaded {len(all_steps)} steps. Updating UI...")

	# Create a flat list of updates for all components
	updates = []
	num_steps_to_show = min(len(all_steps), MAX_CARDS_TO_DISPLAY)

	for i in range(MAX_CARDS_TO_DISPLAY):
	if i < num_steps_to_show:
	step_data = all_steps[i]

	# Update card visibility and title
	updates.append(gr.update(visible=True))
	updates.append(gr.update(value=f"### Main Goal: {step_data['episode_goal']}"))

	# Update text info
	text_content = f"""
	Question:
	<p>{step_data['question']}</p>
	Options:
	<ol style="margin-top: 5px; padding-left: 20px;">
	{''.join([f'<li>{opt}</li>' for opt in step_data['options']])}
	</ol>
	Correct Answer:
	<p style="color:green; font-weight:bold;">{step_data['correct_option']}</p>
	"""
	updates.append(gr.update(value=text_content))

	# Update images
	for model_key in MODELS_IN_ORDER:
	img_path = step_data['image_paths'].get(model_key)
	updates.append(gr.update(value=img_path if os.path.exists(img_path) else None))
	else:
	# Hide unused placeholder cards
	updates.append(gr.update(visible=False)) # Card group
	updates.append(gr.update(value="")) # Title
	updates.append(gr.update(value="")) # Info MD
	for model_key in MODELS_IN_ORDER:
	updates.append(gr.update(value=None)) # Images

	return updates

	# --- Flatten the list of placeholder components for the 'outputs' argument ---
	output_components_flat = []
	for comp_dict in placeholder_components:
	output_components_flat.append(comp_dict['card'])
	output_components_flat.append(comp_dict['title'])
	output_components_flat.append(comp_dict['info'])
	for model_key in MODELS_IN_ORDER:
	output_components_flat.append(comp_dict['images'][model_key])

	# --- Event Wiring ---
	demo.load(fn=load_and_update_ui, inputs=None, outputs=output_components_flat)


	if __name__ == "__main__":
	demo.launch(share=True, debug=True)