VLM_Comparison / aitw_4o_dataset.json
advaitgupta's picture
Update aitw_4o_dataset.json
8e688fe verified
{
"16849016505201470177": {
"episode_goal": "Check the settings for the Instagram app",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Swipe Up",
"Button: Press Home",
"Tap: [600, 1400]",
"Swipe: Left"
],
"correct_answer_index": 0
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [491, 753]",
"Tap: [500, 600]",
"Tap: [650, 750]",
"Swipe: Left"
],
"correct_answer_index": 0
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'Check the settings for the Instagram app'",
"Button: Press Home",
"Swipe: Left",
"Tap: [100, 1400]"
],
"correct_answer_index": 0
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [150, 320]",
"Tap: [128, 313]",
"Swipe: Down",
"Button: Press Home"
],
"correct_answer_index": 1
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Swipe: Left",
"Type: 'Check the settings for the Instagram app'",
"Tap: [540, 120]"
],
"correct_answer_index": 2
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_5.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]\nStep 4: Typed: 'Check the settings for the Instagram app'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [194, 108]",
"Tap: [200, 150]",
"Button: Press Home",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_6.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]\nStep 4: Typed: 'Check the settings for the Instagram app'\nStep 5: Tapped at pixel coordinates (x,y): [194, 108]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Tap: [500, 400]",
"Swipe: Down",
"Type: 'Check the settings for the Instagram app'"
],
"correct_answer_index": 3
}
},
{
"step_id": 7,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_7.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]\nStep 4: Typed: 'Check the settings for the Instagram app'\nStep 5: Tapped at pixel coordinates (x,y): [194, 108]\nStep 6: Typed: 'Check the settings for the Instagram app'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [221, 296]",
"Button: Press Back",
"Swipe: Down",
"Tap: [221, 350]"
],
"correct_answer_index": 0
}
},
{
"step_id": 8,
"screenshot_path": "./aitw_4o_images/episode_16849016505201470177_step_8.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [491, 753]\nStep 2: Typed: 'Check the settings for the Instagram app'\nStep 3: Tapped at pixel coordinates (x,y): [128, 313]\nStep 4: Typed: 'Check the settings for the Instagram app'\nStep 5: Tapped at pixel coordinates (x,y): [194, 108]\nStep 6: Typed: 'Check the settings for the Instagram app'\nStep 7: Tapped at pixel coordinates (x,y): [221, 296]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [700, 300]",
"Swipe: Down",
"Button: Press Back",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
},
"9836837435576948503": {
"episode_goal": "What's on the menu at Five Guys?",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Swipe Up",
"Tap: [360, 1450]",
"Button: Press Home",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Swipe: Down",
"Tap: [495, 743]",
"Tap: [495, 950]"
],
"correct_answer_index": 2
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [530, 763]",
"Button: Press Home",
"Swipe: Down",
"Tap: [500, 780]"
],
"correct_answer_index": 0
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [494, 754]",
"Button: Press Home",
"Swipe: Right",
"Tap: [490, 740]"
],
"correct_answer_index": 0
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [320, 282]",
"Button: Press Home",
"Tap: [310, 280]",
"Swipe: Right"
],
"correct_answer_index": 0
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_5.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [662, 103]",
"Tap: [660, 150]",
"Swipe: Down"
],
"correct_answer_index": 1
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_6.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]\nStep 5: Tapped at pixel coordinates (x,y): [662, 103]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Type: 'What's on the menu at Five Guys?'",
"Tap: [490, 850]",
"Type: 'Where is the nearest Five Guys?'"
],
"correct_answer_index": 1
}
},
{
"step_id": 7,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_7.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]\nStep 5: Tapped at pixel coordinates (x,y): [662, 103]\nStep 6: Typed: 'What's on the menu at Five Guys?'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Tap: [280, 240]",
"Tap: [282, 180]",
"Button: Press Home"
],
"correct_answer_index": 2
}
},
{
"step_id": 8,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_8.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]\nStep 5: Tapped at pixel coordinates (x,y): [662, 103]\nStep 6: Typed: 'What's on the menu at Five Guys?'\nStep 7: Tapped at pixel coordinates (x,y): [282, 180]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Button: Press Home",
"Tap: [210, 940]",
"Tap: [194, 1092]"
],
"correct_answer_index": 3
}
},
{
"step_id": 9,
"screenshot_path": "./aitw_4o_images/episode_9836837435576948503_step_9.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [495, 743]\nStep 2: Tapped at pixel coordinates (x,y): [530, 763]\nStep 3: Tapped at pixel coordinates (x,y): [494, 754]\nStep 4: Tapped at pixel coordinates (x,y): [320, 282]\nStep 5: Tapped at pixel coordinates (x,y): [662, 103]\nStep 6: Typed: 'What's on the menu at Five Guys?'\nStep 7: Tapped at pixel coordinates (x,y): [282, 180]\nStep 8: Tapped at pixel coordinates (x,y): [194, 1092]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [700, 100]",
"Button: Unknown",
"Swipe: Left"
],
"correct_answer_index": 2
}
}
]
}
}