Spaces:
Sleeping
Sleeping
| { | |
| "all_results": [ | |
| { | |
| "name": "Qwen2.5-0.5B", | |
| "avg_score": 4.0, | |
| "avg_time": 3.52385942141215, | |
| "efficiency": 1.1351190617011169, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 3.418940305709839, | |
| "response": ".\n\nOutils: get_game_state(), move_units(unit_ids, ..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.8486745357513428, | |
| "response": "La commande \"move_units\" est utilisée pour déplace..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 0, | |
| "time": 3.3039634227752686, | |
| "response": ", je vais faire une tâche de base. Je vais essayer..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Qwen3-0.6B", | |
| "avg_score": 6.0, | |
| "avg_time": 6.404076337814331, | |
| "efficiency": 0.936903260283084, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 6.516923427581787, | |
| "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 7, | |
| "time": 6.65591287612915, | |
| "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 6.039392709732056, | |
| "response": ", but not too much. The user is asking for a respo..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Gemma-3-1B", | |
| "avg_score": 4.0, | |
| "avg_time": 6.960511525472005, | |
| "efficiency": 0.5746704082540475, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 7.20223069190979, | |
| "response": "```json\n{{\"tool\": \"get_game_state\", \"args\": {\"map\"..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 6.998988628387451, | |
| "response": "```python\nimport json\n\ndef move_units(unit_ids, ta..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 0, | |
| "time": 6.680315256118774, | |
| "response": ".\n\nA. 100\nB. 200\nC. 300\nD. 400\nE. 500\nF. 600\nG. 70..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Gemma-3-270M", | |
| "avg_score": 4.666666666666667, | |
| "avg_time": 3.6990818977355957, | |
| "efficiency": 1.2615743029434903, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 5, | |
| "time": 3.697866201400757, | |
| "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.690243721008301, | |
| "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 3.7091357707977295, | |
| "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." | |
| } | |
| ] | |
| } | |
| ], | |
| "successful_models": [ | |
| { | |
| "name": "Qwen2.5-0.5B", | |
| "avg_score": 4.0, | |
| "avg_time": 3.52385942141215, | |
| "efficiency": 1.1351190617011169, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 3.418940305709839, | |
| "response": ".\n\nOutils: get_game_state(), move_units(unit_ids, ..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.8486745357513428, | |
| "response": "La commande \"move_units\" est utilisée pour déplace..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 0, | |
| "time": 3.3039634227752686, | |
| "response": ", je vais faire une tâche de base. Je vais essayer..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Qwen3-0.6B", | |
| "avg_score": 6.0, | |
| "avg_time": 6.404076337814331, | |
| "efficiency": 0.936903260283084, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 6.516923427581787, | |
| "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 7, | |
| "time": 6.65591287612915, | |
| "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 6.039392709732056, | |
| "response": ", but not too much. The user is asking for a respo..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Gemma-3-1B", | |
| "avg_score": 4.0, | |
| "avg_time": 6.960511525472005, | |
| "efficiency": 0.5746704082540475, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 7.20223069190979, | |
| "response": "```json\n{{\"tool\": \"get_game_state\", \"args\": {\"map\"..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 6.998988628387451, | |
| "response": "```python\nimport json\n\ndef move_units(unit_ids, ta..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 0, | |
| "time": 6.680315256118774, | |
| "response": ".\n\nA. 100\nB. 200\nC. 300\nD. 400\nE. 500\nF. 600\nG. 70..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Gemma-3-270M", | |
| "avg_score": 4.666666666666667, | |
| "avg_time": 3.6990818977355957, | |
| "efficiency": 1.2615743029434903, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 5, | |
| "time": 3.697866201400757, | |
| "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.690243721008301, | |
| "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 3.7091357707977295, | |
| "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." | |
| } | |
| ] | |
| } | |
| ], | |
| "ranking_by_score": [ | |
| { | |
| "name": "Qwen3-0.6B", | |
| "avg_score": 6.0, | |
| "avg_time": 6.404076337814331, | |
| "efficiency": 0.936903260283084, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 6.516923427581787, | |
| "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 7, | |
| "time": 6.65591287612915, | |
| "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 6.039392709732056, | |
| "response": ", but not too much. The user is asking for a respo..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Gemma-3-270M", | |
| "avg_score": 4.666666666666667, | |
| "avg_time": 3.6990818977355957, | |
| "efficiency": 1.2615743029434903, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 5, | |
| "time": 3.697866201400757, | |
| "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.690243721008301, | |
| "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 3.7091357707977295, | |
| "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Qwen2.5-0.5B", | |
| "avg_score": 4.0, | |
| "avg_time": 3.52385942141215, | |
| "efficiency": 1.1351190617011169, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 3.418940305709839, | |
| "response": ".\n\nOutils: get_game_state(), move_units(unit_ids, ..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.8486745357513428, | |
| "response": "La commande \"move_units\" est utilisée pour déplace..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 0, | |
| "time": 3.3039634227752686, | |
| "response": ", je vais faire une tâche de base. Je vais essayer..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Gemma-3-1B", | |
| "avg_score": 4.0, | |
| "avg_time": 6.960511525472005, | |
| "efficiency": 0.5746704082540475, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 7.20223069190979, | |
| "response": "```json\n{{\"tool\": \"get_game_state\", \"args\": {\"map\"..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 6.998988628387451, | |
| "response": "```python\nimport json\n\ndef move_units(unit_ids, ta..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 0, | |
| "time": 6.680315256118774, | |
| "response": ".\n\nA. 100\nB. 200\nC. 300\nD. 400\nE. 500\nF. 600\nG. 70..." | |
| } | |
| ] | |
| } | |
| ], | |
| "ranking_by_efficiency": [ | |
| { | |
| "name": "Gemma-3-270M", | |
| "avg_score": 4.666666666666667, | |
| "avg_time": 3.6990818977355957, | |
| "efficiency": 1.2615743029434903, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 5, | |
| "time": 3.697866201400757, | |
| "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.690243721008301, | |
| "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 3.7091357707977295, | |
| "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Qwen2.5-0.5B", | |
| "avg_score": 4.0, | |
| "avg_time": 3.52385942141215, | |
| "efficiency": 1.1351190617011169, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 3.418940305709839, | |
| "response": ".\n\nOutils: get_game_state(), move_units(unit_ids, ..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.8486745357513428, | |
| "response": "La commande \"move_units\" est utilisée pour déplace..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 0, | |
| "time": 3.3039634227752686, | |
| "response": ", je vais faire une tâche de base. Je vais essayer..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Qwen3-0.6B", | |
| "avg_score": 6.0, | |
| "avg_time": 6.404076337814331, | |
| "efficiency": 0.936903260283084, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 6.516923427581787, | |
| "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 7, | |
| "time": 6.65591287612915, | |
| "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 6.039392709732056, | |
| "response": ", but not too much. The user is asking for a respo..." | |
| } | |
| ] | |
| }, | |
| { | |
| "name": "Gemma-3-1B", | |
| "avg_score": 4.0, | |
| "avg_time": 6.960511525472005, | |
| "efficiency": 0.5746704082540475, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 7.20223069190979, | |
| "response": "```json\n{{\"tool\": \"get_game_state\", \"args\": {\"map\"..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 6.998988628387451, | |
| "response": "```python\nimport json\n\ndef move_units(unit_ids, ta..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 0, | |
| "time": 6.680315256118774, | |
| "response": ".\n\nA. 100\nB. 200\nC. 300\nD. 400\nE. 500\nF. 600\nG. 70..." | |
| } | |
| ] | |
| } | |
| ], | |
| "best_overall": { | |
| "name": "Qwen3-0.6B", | |
| "avg_score": 6.0, | |
| "avg_time": 6.404076337814331, | |
| "efficiency": 0.936903260283084, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 7, | |
| "time": 6.516923427581787, | |
| "response": "Exemple: {\"tool\": \"get_game_state\", \"args\": {\"unit..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 7, | |
| "time": 6.65591287612915, | |
| "response": "Réponse: {\"tool\": \"move_units\", \"args\": {\"unit_ids..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 6.039392709732056, | |
| "response": ", but not too much. The user is asking for a respo..." | |
| } | |
| ] | |
| }, | |
| "most_efficient": { | |
| "name": "Gemma-3-270M", | |
| "avg_score": 4.666666666666667, | |
| "avg_time": 3.6990818977355957, | |
| "efficiency": 1.2615743029434903, | |
| "tests": [ | |
| { | |
| "test": "Commande simple", | |
| "score": 5, | |
| "time": 3.697866201400757, | |
| "response": "```\n**Explication:**\n\n* `get_game_state()` : Récup..." | |
| }, | |
| { | |
| "test": "Action avec paramètres", | |
| "score": 5, | |
| "time": 3.690243721008301, | |
| "response": "```\n\n**Explication:**\n\n* `move_units` est un outil..." | |
| }, | |
| { | |
| "test": "Vitesse de réponse", | |
| "score": 4, | |
| "time": 3.7091357707977295, | |
| "response": ".\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\nOK.\n..." | |
| } | |
| ] | |
| } | |
| } |