VLM_Comparison / aitw_qwen_dataset.json
advaitgupta's picture
Update aitw_qwen_dataset.json
6c7e1ce verified
{
"12172380859428428757": {
"episode_goal": "Open a new Chrome private window",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [382, 838]",
"Tap: [265, 845]",
"Button: Press Back",
"Swipe: Up"
],
"correct_answer_index": 0
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_1.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [224, 776]",
"Swipe: Up",
"Tap: [450, 900]"
],
"correct_answer_index": 1
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_2.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [90, 780]",
"Tap: [420, 775]",
"Button: Press Back",
"Swipe: Up"
],
"correct_answer_index": 1
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_3.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Tap: [468, 64]",
"Button: Press Home",
"Tap: [270, 295]"
],
"correct_answer_index": 1
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_4.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [44, 78]",
"Swipe: Down",
"Button: Press Home",
"Tap: [230, 120]"
],
"correct_answer_index": 0
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_5.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Tap: [467, 62]",
"Button: Press Home",
"Tap: [270, 290]"
],
"correct_answer_index": 1
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_6.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Swipe: Down",
"Tap: [514, 64]",
"Tap: [230, 120]",
"Button: Press Home"
],
"correct_answer_index": 1
}
},
{
"step_id": 7,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_7.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]\nStep 6: Tapped at pixel coordinates (x,y): [514, 64]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [370, 127]",
"Swipe: Down",
"Tap: [350, 65]",
"Button: Press Home"
],
"correct_answer_index": 0
}
},
{
"step_id": 8,
"screenshot_path": "./aitw_qwen_images/episode_12172380859428428757_step_8.png",
"action_history": "Step 0: Tapped at pixel coordinates (x,y): [382, 838]\nStep 1: Tapped at pixel coordinates (x,y): [224, 776]\nStep 2: Tapped at pixel coordinates (x,y): [420, 775]\nStep 3: Tapped at pixel coordinates (x,y): [468, 64]\nStep 4: Tapped at pixel coordinates (x,y): [44, 78]\nStep 5: Tapped at pixel coordinates (x,y): [467, 62]\nStep 6: Tapped at pixel coordinates (x,y): [514, 64]\nStep 7: Tapped at pixel coordinates (x,y): [370, 127]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [200, 900]",
"Swipe: Down",
"Type: 'new tab'",
"Button: Unknown"
],
"correct_answer_index": 3
}
}
]
},
"9027843537045096280": {
"episode_goal": "Open a new Chrome window",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_qwen_images/episode_9027843537045096280_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Button: Press Back",
"Type: 'new window'",
"Tap: [270, 900]"
],
"correct_answer_index": 0
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_qwen_images/episode_9027843537045096280_step_1.png",
"action_history": "Step 0: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [365, 826]",
"Tap: [265, 840]",
"Swipe: Up",
"Button: Press Back"
],
"correct_answer_index": 0
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_qwen_images/episode_9027843537045096280_step_2.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Tapped at pixel coordinates (x,y): [365, 826]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [270, 900]",
"Swipe: Down",
"Button: Unknown",
"Type: 'new window'"
],
"correct_answer_index": 2
}
}
]
},
"2247237893045084693": {
"episode_goal": "Open the calendar and show me this week's events?",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_qwen_images/episode_2247237893045084693_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [270, 960]",
"Button: Press Back",
"Swipe: Down",
"Swipe: Swipe Up"
],
"correct_answer_index": 3
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_qwen_images/episode_2247237893045084693_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'this week's events'",
"Swipe: Down",
"Tap: [465, 183]",
"Tap: [379, 183]"
],
"correct_answer_index": 3
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_qwen_images/episode_2247237893045084693_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [265, 1050]",
"Swipe: Left",
"Tap: [499, 976]"
],
"correct_answer_index": 3
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_qwen_images/episode_2247237893045084693_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]\nStep 2: Tapped at pixel coordinates (x,y): [499, 976]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [302, 950]",
"Tap: [450, 700]",
"Button: Press Back",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_qwen_images/episode_2247237893045084693_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Tapped at pixel coordinates (x,y): [379, 183]\nStep 2: Tapped at pixel coordinates (x,y): [499, 976]\nStep 3: Tapped at pixel coordinates (x,y): [302, 950]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Swipe: Down",
"Button: Unknown",
"Tap: [400, 60]"
],
"correct_answer_index": 2
}
}
]
},
"12224608131504749719": {
"episode_goal": "How much does a 2x4x8 board cost at Lowes?",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_qwen_images/episode_12224608131504749719_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [270, 960]",
"Swipe: Swipe Up",
"Button: Press Back",
"Swipe: Down"
],
"correct_answer_index": 1
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_qwen_images/episode_12224608131504749719_step_1.png",
"action_history": "Step 0: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Button: Press Back",
"Tap: [270, 850]",
"Swipe: Down"
],
"correct_answer_index": 0
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_qwen_images/episode_12224608131504749719_step_2.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Swipe: Up",
"Tap: [289, 950]",
"Tap: [360, 850]"
],
"correct_answer_index": 2
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_qwen_images/episode_12224608131504749719_step_3.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [450, 600]",
"Type: 'How much does a 2x4x8 board cost at Lowes?'",
"Type: 'Find the nearest electronics store'",
"Button: Press Back"
],
"correct_answer_index": 1
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_qwen_images/episode_12224608131504749719_step_4.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]\nStep 3: Typed: 'How much does a 2x4x8 board cost at Lowes?'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Type: 'How much does a 2x4x8 board cost at Home Depot?'",
"Tap: [286, 129]",
"Tap: [400, 170]",
"Swipe: Down"
],
"correct_answer_index": 1
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_qwen_images/episode_12224608131504749719_step_5.png",
"action_history": "Step 0: Swiped: Swipe Up\nStep 1: Pressed Button: Press Home\nStep 2: Tapped at pixel coordinates (x,y): [289, 950]\nStep 3: Typed: 'How much does a 2x4x8 board cost at Lowes?'\nStep 4: Tapped at pixel coordinates (x,y): [286, 129]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [450, 750]",
"Type: 'Find the nearest electronics store'",
"Button: Unknown",
"Swipe: Down"
],
"correct_answer_index": 2
}
}
]
},
"1307957808436696980": {
"episode_goal": "Google the capital of Mexico",
"steps": [
{
"step_id": 0,
"screenshot_path": "./aitw_qwen_images/episode_1307957808436696980_step_0.png",
"action_history": "",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [270, 60]",
"Swipe: Up",
"Button: Press Home"
],
"correct_answer_index": 3
}
},
{
"step_id": 1,
"screenshot_path": "./aitw_qwen_images/episode_1307957808436696980_step_1.png",
"action_history": "Step 0: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Tap: [270, 960]",
"Swipe: Down",
"Swipe: Swipe Up"
],
"correct_answer_index": 3
}
},
{
"step_id": 2,
"screenshot_path": "./aitw_qwen_images/episode_1307957808436696980_step_2.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Home",
"Tap: [260, 840]",
"Type: 'capital of Spain'",
"Button: Press Back"
],
"correct_answer_index": 0
}
},
{
"step_id": 3,
"screenshot_path": "./aitw_qwen_images/episode_1307957808436696980_step_3.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Button: Press Back",
"Swipe: Up",
"Tap: [400, 850]",
"Tap: [320, 974]"
],
"correct_answer_index": 3
}
},
{
"step_id": 4,
"screenshot_path": "./aitw_qwen_images/episode_1307957808436696980_step_4.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [460, 400]",
"Type: 'Find the nearest electronics store'",
"Button: Press Back",
"Type: 'Google the capital of Mexico'"
],
"correct_answer_index": 3
}
},
{
"step_id": 5,
"screenshot_path": "./aitw_qwen_images/episode_1307957808436696980_step_5.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]\nStep 4: Typed: 'Google the capital of Mexico'",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [400, 245]",
"Swipe: Down",
"Tap: [185, 114]",
"Type: 'capital of Spain'"
],
"correct_answer_index": 2
}
},
{
"step_id": 6,
"screenshot_path": "./aitw_qwen_images/episode_1307957808436696980_step_6.png",
"action_history": "Step 0: Pressed Button: Press Home\nStep 1: Swiped: Swipe Up\nStep 2: Pressed Button: Press Home\nStep 3: Tapped at pixel coordinates (x,y): [320, 974]\nStep 4: Typed: 'Google the capital of Mexico'\nStep 5: Tapped at pixel coordinates (x,y): [185, 114]",
"ground_truth_format": "mcq",
"questions": {
"question": "Based on the goal and screen, what is the precise next action to perform?",
"options": [
"Tap: [450, 600]",
"Button: Unknown",
"Type: 'capital of Canada'",
"Swipe: Down"
],
"correct_answer_index": 1
}
}
]
}
}