Spaces:
Sleeping
Sleeping
| [ | |
| { | |
| "name": "Qwen2.5-0.5B", | |
| "avg_score": 3.0, | |
| "avg_time": 3.1588314941951205, | |
| "results": [ | |
| { | |
| "command": "show game state", | |
| "score": 4, | |
| "time": 4.286122560501099, | |
| "response": "Here is the response in JSON format:\n\n```json\n{\n " | |
| }, | |
| { | |
| "command": "move infantry to 100,200", | |
| "score": 4, | |
| "time": 4.499698638916016, | |
| "response": "In this case, the tool is \"move_units\" and the arg" | |
| }, | |
| { | |
| "command": "attack enemy tank at 300,150", | |
| "score": 5, | |
| "time": 4.139246940612793, | |
| "response": "In this case, the tool is \"attack_unit\" and the ar" | |
| }, | |
| { | |
| "command": "build barracks near 250,200", | |
| "score": 3, | |
| "time": 2.345900774002075, | |
| "response": ".\n```json\n{\n \"tool\": \"build_building\",\n \"args\": " | |
| }, | |
| { | |
| "command": "defend base with all units", | |
| "score": 1, | |
| "time": 1.6794795989990234, | |
| "response": ".\n```json\n{\n \"tool\": \"move_units\",\n \"args\": {\n " | |
| }, | |
| { | |
| "command": "harvest resources with harvester", | |
| "score": 2, | |
| "time": 1.0160574913024902, | |
| "response": ".\n```json\n{\n \"tool\": \"get_game_state\",\n \"args\": " | |
| }, | |
| { | |
| "command": "scout enemy positions", | |
| "score": 2, | |
| "time": 4.145314455032349, | |
| "response": "In this scenario, the user command is \"scout enemy" | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Qwen3-0.6B", | |
| "avg_score": 2.142857142857143, | |
| "avg_time": 7.839082138878958, | |
| "results": [ | |
| { | |
| "command": "show game state", | |
| "score": 4, | |
| "time": 8.01675820350647, | |
| "response": "If the user command is not available, respond with" | |
| }, | |
| { | |
| "command": "move infantry to 100,200", | |
| "score": 3, | |
| "time": 7.884659290313721, | |
| "response": "{\n \"tool\": \"move_units\",\n \"args\": {\n \"unit_id" | |
| }, | |
| { | |
| "command": "attack enemy tank at 300,150", | |
| "score": 3, | |
| "time": 7.239688873291016, | |
| "response": "{\n \"tool\": \"attack_unit\",\n \"args\": {\n \"attack" | |
| }, | |
| { | |
| "command": "build barracks near 250,200", | |
| "score": 1, | |
| "time": 7.6789350509643555, | |
| "response": "The correct answer is {\"tool\": \"build\", \"args\": {\"" | |
| }, | |
| { | |
| "command": "defend base with all units", | |
| "score": 1, | |
| "time": 8.047882318496704, | |
| "response": "Possible tool names: build, get_game_state, move, " | |
| }, | |
| { | |
| "command": "harvest resources with harvester", | |
| "score": 1, | |
| "time": 8.035732746124268, | |
| "response": "The correct answer is {\"tool\": \"harvest\", \"args\": " | |
| }, | |
| { | |
| "command": "scout enemy positions", | |
| "score": 2, | |
| "time": 7.969918489456177, | |
| "response": "The user's command is \"scout enemy positions\". Wha" | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Gemma-3-270M", | |
| "avg_score": 1.4285714285714286, | |
| "avg_time": 4.817336320877075, | |
| "results": [ | |
| { | |
| "command": "show game state", | |
| "score": 1, | |
| "time": 4.833978652954102, | |
| "response": "- show game state\n- move_units(unit_ids, target_x," | |
| }, | |
| { | |
| "command": "move infantry to 100,200", | |
| "score": 2, | |
| "time": 4.697685718536377, | |
| "response": "```\n`move infantry to 100,200`\n\n**Explanation:**\n\n" | |
| }, | |
| { | |
| "command": "attack enemy tank at 300,150", | |
| "score": 3, | |
| "time": 4.781869649887085, | |
| "response": "```\n**Explanation:**\n\n* `get_game_state()`: Retrie" | |
| }, | |
| { | |
| "command": "build barracks near 250,200", | |
| "score": 3, | |
| "time": 4.599961996078491, | |
| "response": "```\n`build barracks near 250,200`\n```\nJSON:\n```\n{\"" | |
| }, | |
| { | |
| "command": "defend base with all units", | |
| "score": 0, | |
| "time": 4.869190216064453, | |
| "response": "- `tool_name` is a string\n- `args` is a dictionary" | |
| }, | |
| { | |
| "command": "harvest resources with harvester", | |
| "score": 0, | |
| "time": 4.996307849884033, | |
| "response": "```\n- `harvest resources with harvester`\n- `harves" | |
| }, | |
| { | |
| "command": "scout enemy positions", | |
| "score": 1, | |
| "time": 4.942360162734985, | |
| "response": "```\n`scout enemy positions`\n- `tool`: \"scout\"\n- `a" | |
| } | |
| ] | |
| } | |
| ] |