rts-commander / tests /scripts /quick_model_comparison.py
Luigi's picture
Organize project structure: move test scripts to tests/scripts and documentation to docs/reports
d28c36c
"""
Test rapide de comparaison des modèles pour les tâches MCP
Focus sur les tests les plus importants
"""
import sys
import os
import json
import time
# Ajouter le chemin pour les imports
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
def test_model(model_path, model_name):
"""Test rapide d'un modèle"""
try:
from llama_cpp import Llama
print(f"🔄 Test de {model_name}...")
# Initialiser avec des paramètres plus rapides
llm = Llama(
model_path=model_path,
n_ctx=1024,
n_threads=1,
verbose=False
)
# Test simple de compréhension MCP
prompt = """Tu es un assistant IA pour un jeu RTS via MCP.
Outils: get_game_state(), move_units(unit_ids, target_x, target_y)
Commande: "Montre-moi l'état du jeu"
Réponds avec JSON: {{"tool": "nom_outil", "args": {{}}}}"""
start_time = time.time()
response = llm(
prompt,
max_tokens=100,
temperature=0.1,
stop=["</s>", "<|im_end|>"]
)
response_time = time.time() - start_time
response_text = response['choices'][0]['text'].strip()
# Analyser la réponse
score = 0
# Vérifier JSON
try:
json.loads(response_text)
score += 3
except:
pass
# Vérifier outil correct
if "get_game_state" in response_text:
score += 4
# Vérifier structure
if "tool" in response_text:
score += 2
# Vérifier cohérence
if "game" in response_text.lower():
score += 1
score = min(score, 10)
print(f"✅ {model_name}: {score}/10 | Temps: {response_time:.2f}s")
print(f" Réponse: {response_text[:100]}...")
return {
'name': model_name,
'score': score,
'time': response_time,
'response': response_text
}
except Exception as e:
print(f"❌ {model_name}: Erreur - {e}")
return {
'name': model_name,
'score': 0,
'time': 0,
'error': str(e)
}
def main():
"""Test rapide comparatif"""
print("🚀 TEST RAPIDE COMPARATIF MCP")
print("=" * 50)
# Modèles à tester
models = [
{
'name': 'Qwen2.5-0.5B',
'path': 'qwen2.5-0.5b-instruct-q4_0.gguf'
},
{
'name': 'Qwen3-0.6B',
'path': 'Qwen3-0.6B-Q8_0.gguf'
},
{
'name': 'Gemma-3-1B',
'path': 'google_gemma-3-1b-it-qat-Q4_0.gguf'
}
]
results = []
for model in models:
if os.path.exists(model['path']):
result = test_model(model['path'], model['name'])
results.append(result)
else:
print(f"❌ Fichier non trouvé: {model['path']}")
# Résultats
print("\n" + "=" * 50)
print("📊 RÉSULTATS COMPARATIFS")
print("=" * 50)
# Classement
sorted_results = sorted(results, key=lambda x: x['score'], reverse=True)
print(f"\n🏆 CLASSEMENT:")
for i, result in enumerate(sorted_results, 1):
if 'error' not in result:
print(f" {i}. {result['name']}: {result['score']}/10 ({result['time']:.2f}s)")
else:
print(f" {i}. {result['name']}: ÉCHEC")
# Analyse
successful_results = [r for r in results if 'error' not in r and r['score'] > 0]
if successful_results:
best_model = successful_results[0]
print(f"\n🎯 MEILLEUR MODÈLE: {best_model['name']}")
print(f" Score: {best_model['score']}/10")
print(f" Temps: {best_model['time']:.2f}s")
# Recommandations
if best_model['score'] >= 7:
print(f"\n✅ RECOMMANDATION: {best_model['name']} est EXCELLENT pour MCP")
elif best_model['score'] >= 5:
print(f"\n👍 RECOMMANDATION: {best_model['name']} est BON pour MCP")
else:
print(f"\n⚠️ RECOMMANDATION: {best_model['name']} est LIMITÉ pour MCP")
# Performance vs taille
print(f"\n⚖️ PERFORMANCE:")
for result in successful_results:
efficiency = result['score'] / result['time'] if result['time'] > 0 else 0
file_size = os.path.getsize([m['path'] for m in models if m['name'] == result['name']][0]) / (1024*1024)
print(f" {result['name']}: {efficiency:.2f} score/s | {file_size:.0f} MB")
# Sauvegarder
with open("quick_model_comparison.json", "w", encoding="utf-8") as f:
json.dump({
'results': results,
'ranking': sorted_results,
'best_model': successful_results[0]['name'] if successful_results else None
}, f, indent=2, ensure_ascii=False)
print(f"\n📄 Résultats sauvegardés dans: quick_model_comparison.json")
if __name__ == "__main__":
main()