Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +5 -6
- game_engine.py +151 -59
app.py
CHANGED
|
@@ -123,8 +123,7 @@ with gr.Blocks(
|
|
| 123 |
4. Cliquez sur **➡️ NEXT !** pour la question suivante
|
| 124 |
|
| 125 |
À la fin, vous pourrez consulter vos résultats et contribuer au dataset ouvert !
|
| 126 |
-
|
| 127 |
-
🚀 **Version ultra-optimisée** : OCR en fin de session pour une fluidité maximale !
|
| 128 |
"""
|
| 129 |
)
|
| 130 |
|
|
@@ -132,26 +131,26 @@ with gr.Blocks(
|
|
| 132 |
duration_choice = gr.Radio(
|
| 133 |
choices=["30 secondes", "60 secondes"],
|
| 134 |
value="30 secondes",
|
| 135 |
-
label="
|
| 136 |
)
|
| 137 |
|
| 138 |
operation_choice = gr.Radio(
|
| 139 |
choices=["×", "+", "-", "÷", "Aléatoire"],
|
| 140 |
value="×",
|
| 141 |
-
label="
|
| 142 |
)
|
| 143 |
|
| 144 |
difficulty_choice = gr.Radio(
|
| 145 |
choices=["Facile", "Difficile"],
|
| 146 |
value="Facile",
|
| 147 |
-
label="
|
| 148 |
)
|
| 149 |
|
| 150 |
with gr.Row():
|
| 151 |
with gr.Column(scale=1):
|
| 152 |
# Question
|
| 153 |
question_display = gr.HTML(
|
| 154 |
-
value='<div style="font-size: 2.5em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">Prêt
|
| 155 |
)
|
| 156 |
|
| 157 |
# Contrôles
|
|
|
|
| 123 |
4. Cliquez sur **➡️ NEXT !** pour la question suivante
|
| 124 |
|
| 125 |
À la fin, vous pourrez consulter vos résultats et contribuer au dataset ouvert !
|
| 126 |
+
|
|
|
|
| 127 |
"""
|
| 128 |
)
|
| 129 |
|
|
|
|
| 131 |
duration_choice = gr.Radio(
|
| 132 |
choices=["30 secondes", "60 secondes"],
|
| 133 |
value="30 secondes",
|
| 134 |
+
label="⏱️ Durée"
|
| 135 |
)
|
| 136 |
|
| 137 |
operation_choice = gr.Radio(
|
| 138 |
choices=["×", "+", "-", "÷", "Aléatoire"],
|
| 139 |
value="×",
|
| 140 |
+
label="🔢 Opération"
|
| 141 |
)
|
| 142 |
|
| 143 |
difficulty_choice = gr.Radio(
|
| 144 |
choices=["Facile", "Difficile"],
|
| 145 |
value="Facile",
|
| 146 |
+
label="🎯 Difficulté"
|
| 147 |
)
|
| 148 |
|
| 149 |
with gr.Row():
|
| 150 |
with gr.Column(scale=1):
|
| 151 |
# Question
|
| 152 |
question_display = gr.HTML(
|
| 153 |
+
value='<div style="font-size: 2.5em; font-weight: bold; text-align: center; padding: 20px; background: linear-gradient(45deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;">Prêt(e) ?</div>'
|
| 154 |
)
|
| 155 |
|
| 156 |
# Contrôles
|
game_engine.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
| 1 |
# ==========================================
|
| 2 |
-
# game_engine.py -
|
| 3 |
# ==========================================
|
| 4 |
|
| 5 |
"""
|
| 6 |
-
Moteur de jeu
|
| 7 |
-
OCR en fin de session uniquement - Performance optimale
|
| 8 |
"""
|
| 9 |
|
| 10 |
import random
|
|
@@ -26,7 +25,7 @@ from image_processing_gpu import (
|
|
| 26 |
get_ocr_model_info
|
| 27 |
)
|
| 28 |
|
| 29 |
-
print("✅ Game Engine: Mode GPU
|
| 30 |
|
| 31 |
# Imports dataset
|
| 32 |
try:
|
|
@@ -37,8 +36,8 @@ except ImportError as e:
|
|
| 37 |
DATASET_AVAILABLE = False
|
| 38 |
print(f"⚠️ Modules dataset non disponibles: {e}")
|
| 39 |
|
| 40 |
-
# Dataset name
|
| 41 |
-
DATASET_NAME = "hoololi/
|
| 42 |
|
| 43 |
# Configuration des difficultés par opération
|
| 44 |
DIFFICULTY_RANGES = {
|
|
@@ -48,13 +47,17 @@ DIFFICULTY_RANGES = {
|
|
| 48 |
"÷": {"Facile": (1, 10), "Difficile": (2, 12)}
|
| 49 |
}
|
| 50 |
|
| 51 |
-
def
|
| 52 |
-
"""Traite une image avec OCR et
|
| 53 |
|
| 54 |
-
print(f"🔍 Traitement OCR image #{i+1}")
|
| 55 |
|
| 56 |
-
# OCR
|
| 57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
|
| 59 |
try:
|
| 60 |
recognized_num = int(recognized) if recognized.isdigit() else 0
|
|
@@ -71,7 +74,7 @@ def create_result_row_with_images(i: int, image: dict | np.ndarray | Image.Image
|
|
| 71 |
# Miniature pour affichage
|
| 72 |
image_thumbnail = create_thumbnail_fast(optimized_image, size=(50, 50))
|
| 73 |
|
| 74 |
-
# Libérer mémoire
|
| 75 |
if optimized_image and hasattr(optimized_image, 'close'):
|
| 76 |
try:
|
| 77 |
optimized_image.close()
|
|
@@ -89,17 +92,19 @@ def create_result_row_with_images(i: int, image: dict | np.ndarray | Image.Image
|
|
| 89 |
<td style="text-align: center; padding: 8px; border: 1px solid #ddd;">{image_thumbnail}</td>
|
| 90 |
<td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{recognized_num}</td>
|
| 91 |
<td style="text-align: center; padding: 8px; border: 1px solid #ddd; color: #333;">{status_icon} {status_text}</td>
|
|
|
|
| 92 |
</tr>
|
| 93 |
""",
|
| 94 |
'is_correct': is_correct,
|
| 95 |
'recognized': recognized,
|
| 96 |
'recognized_num': recognized_num,
|
| 97 |
-
'dataset_image_data': dataset_image_data
|
|
|
|
| 98 |
}
|
| 99 |
|
| 100 |
|
| 101 |
class MathGame:
|
| 102 |
-
"""Moteur de jeu
|
| 103 |
|
| 104 |
def __init__(self):
|
| 105 |
self.is_running = False
|
|
@@ -305,11 +310,11 @@ class MathGame:
|
|
| 305 |
)
|
| 306 |
|
| 307 |
def end_game(self, final_image: dict | np.ndarray | Image.Image | None) -> tuple[str, Image.Image, str, str, gr.update, gr.update, str]:
|
| 308 |
-
"""Fin de jeu - OCR
|
| 309 |
|
| 310 |
self.is_running = False
|
| 311 |
|
| 312 |
-
print("🏁 Fin de jeu - Début OCR
|
| 313 |
|
| 314 |
# Ajouter la dernière image si présente
|
| 315 |
if final_image is not None:
|
|
@@ -323,7 +328,7 @@ class MathGame:
|
|
| 323 |
a, op, b = int(parts[0]), parts[1], int(parts[2])
|
| 324 |
self.operations_history.append((a, b, op, self.correct_answer))
|
| 325 |
|
| 326 |
-
# OCR SÉQUENTIEL
|
| 327 |
total_questions = len(self.user_images)
|
| 328 |
correct_answers = 0
|
| 329 |
table_rows_html = ""
|
|
@@ -331,17 +336,31 @@ class MathGame:
|
|
| 331 |
session_timestamp = datetime.datetime.now().isoformat()
|
| 332 |
session_id = f"session_{int(datetime.datetime.now().timestamp())}_{str(uuid.uuid4())[:8]}"
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
self.session_data = []
|
| 335 |
images_saved = 0
|
| 336 |
|
| 337 |
-
print(f"🔄 Traitement OCR
|
| 338 |
|
| 339 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
for i in range(total_questions):
|
| 341 |
print(f"📷 OCR image {i+1}/{total_questions}...")
|
| 342 |
|
| 343 |
-
# OCR
|
| 344 |
-
row_data =
|
| 345 |
i,
|
| 346 |
self.user_images[i],
|
| 347 |
self.expected_answers[i],
|
|
@@ -349,56 +368,92 @@ class MathGame:
|
|
| 349 |
)
|
| 350 |
|
| 351 |
table_rows_html += row_data['html_row']
|
|
|
|
| 352 |
|
| 353 |
if row_data['is_correct']:
|
| 354 |
correct_answers += 1
|
| 355 |
|
| 356 |
-
# Structure
|
| 357 |
a, b, operation, correct_result = self.operations_history[i] if i < len(self.operations_history) else (0, 0, "×", 0)
|
| 358 |
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
except Exception as e:
|
| 362 |
-
print(f"❌ Erreur get_ocr_model_info: {e}")
|
| 363 |
-
ocr_info_data = {"model_name": "TrOCR", "device": "ZeroGPU"}
|
| 364 |
|
| 365 |
entry = {
|
|
|
|
| 366 |
"session_id": session_id,
|
|
|
|
| 367 |
"timestamp": session_timestamp,
|
| 368 |
-
|
| 369 |
-
|
| 370 |
-
"operation_type": self.operation_type,
|
| 371 |
-
"difficulty_level": self.difficulty,
|
| 372 |
"operand_a": a,
|
| 373 |
"operand_b": b,
|
| 374 |
"operation": operation,
|
| 375 |
"correct_answer": self.expected_answers[i],
|
| 376 |
-
"
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
"
|
|
|
|
| 380 |
"is_correct": row_data['is_correct'],
|
| 381 |
-
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
}
|
| 384 |
|
| 385 |
# Image PIL native pour dataset
|
| 386 |
if row_data['dataset_image_data']:
|
| 387 |
entry["handwriting_image"] = row_data['dataset_image_data']["handwriting_image"]
|
| 388 |
-
entry["image_width"] = int(row_data['dataset_image_data']["width"])
|
| 389 |
-
entry["image_height"] = int(row_data['dataset_image_data']["height"])
|
| 390 |
-
entry["has_image"] = True
|
| 391 |
images_saved += 1
|
| 392 |
-
else:
|
| 393 |
-
entry["has_image"] = False
|
| 394 |
|
| 395 |
self.session_data.append(entry)
|
| 396 |
|
|
|
|
|
|
|
|
|
|
| 397 |
accuracy = (correct_answers / total_questions * 100) if total_questions > 0 else 0
|
| 398 |
|
| 399 |
-
# Ajouter
|
| 400 |
for entry in self.session_data:
|
| 401 |
entry["session_accuracy"] = accuracy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
| 403 |
# Nettoyage mémoire
|
| 404 |
for img in self.user_images:
|
|
@@ -410,9 +465,7 @@ class MathGame:
|
|
| 410 |
|
| 411 |
cleanup_memory()
|
| 412 |
|
| 413 |
-
|
| 414 |
-
|
| 415 |
-
# HTML résultats
|
| 416 |
table_html = f"""
|
| 417 |
<div style="overflow-x: auto; margin: 20px 0;">
|
| 418 |
<table style="width: 100%; border-collapse: collapse; border: 2px solid #4a90e2;">
|
|
@@ -426,6 +479,7 @@ class MathGame:
|
|
| 426 |
<th style="padding: 8px;">Votre dessin</th>
|
| 427 |
<th style="padding: 8px;">OCR</th>
|
| 428 |
<th style="padding: 8px;">Statut</th>
|
|
|
|
| 429 |
</tr>
|
| 430 |
</thead>
|
| 431 |
<tbody>
|
|
@@ -442,11 +496,13 @@ class MathGame:
|
|
| 442 |
if export_info["can_export"]:
|
| 443 |
export_section = f"""
|
| 444 |
<div style="margin-top: 20px; padding: 15px; background-color: #e8f5e8; border-radius: 8px;">
|
| 445 |
-
<h3 style="color: #2e7d32;">📊
|
| 446 |
<p style="color: #2e7d32;">
|
| 447 |
✅ {total_questions} réponses • 📊 {accuracy:.1f}% de précision<br>
|
| 448 |
🖼️ {images_saved} images sauvegardées<br>
|
| 449 |
-
|
|
|
|
|
|
|
| 450 |
⚙️ Configuration: {config_display}
|
| 451 |
</p>
|
| 452 |
</div>
|
|
@@ -475,9 +531,13 @@ class MathGame:
|
|
| 475 |
<div style="font-size: 2em; font-weight: bold;">{accuracy:.1f}%</div>
|
| 476 |
<div>Précision</div>
|
| 477 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
</div>
|
| 479 |
</div>
|
| 480 |
-
<h2 style="color: #4a90e2;">📊 Détail des Réponses
|
| 481 |
{table_html}
|
| 482 |
{export_section}
|
| 483 |
</div>
|
|
@@ -494,8 +554,8 @@ class MathGame:
|
|
| 494 |
)
|
| 495 |
|
| 496 |
|
| 497 |
-
def
|
| 498 |
-
"""Export vers le dataset
|
| 499 |
if dataset_name is None:
|
| 500 |
dataset_name = DATASET_NAME
|
| 501 |
|
|
@@ -507,15 +567,28 @@ def export_to_clean_dataset(session_data: list[dict], dataset_name: str = None)
|
|
| 507 |
return "❌ Token HuggingFace manquant"
|
| 508 |
|
| 509 |
try:
|
| 510 |
-
print(f"\n🚀 === EXPORT DATASET
|
| 511 |
print(f"📊 Dataset: {dataset_name}")
|
| 512 |
|
| 513 |
# Filtrer les entrées avec images
|
| 514 |
-
clean_entries = [entry for entry in session_data if entry.get('
|
| 515 |
|
| 516 |
if len(clean_entries) == 0:
|
| 517 |
return "❌ Aucune entrée avec image à exporter"
|
| 518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
# Charger dataset existant et combiner
|
| 520 |
try:
|
| 521 |
existing_dataset = load_dataset(dataset_name, split="train")
|
|
@@ -537,35 +610,54 @@ def export_to_clean_dataset(session_data: list[dict], dataset_name: str = None)
|
|
| 537 |
except Exception as e:
|
| 538 |
print(f"⚠️ Conversion image: {e}")
|
| 539 |
|
| 540 |
-
# Statistiques
|
| 541 |
operations_count = {}
|
| 542 |
for entry in clean_entries:
|
| 543 |
-
op = entry.get('
|
| 544 |
operations_count[op] = operations_count.get(op, 0) + 1
|
| 545 |
|
| 546 |
operations_summary = ", ".join([f"{op}: {count}" for op, count in operations_count.items()])
|
| 547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
# Push vers HuggingFace
|
| 549 |
print(f"📤 Push vers {dataset_name}...")
|
| 550 |
clean_dataset.push_to_hub(
|
| 551 |
dataset_name,
|
| 552 |
private=False,
|
| 553 |
token=hf_token,
|
| 554 |
-
commit_message=
|
| 555 |
)
|
| 556 |
|
| 557 |
cleanup_memory()
|
| 558 |
|
| 559 |
-
return f"""### ✅ Session ajoutée
|
| 560 |
|
| 561 |
📊 **Dataset:** {dataset_name}
|
| 562 |
🖼️ **Images:** {len(clean_entries)}
|
|
|
|
|
|
|
|
|
|
| 563 |
🔢 **Opérations:** {operations_summary}
|
| 564 |
-
📈 **Total:** {len(clean_dataset)}
|
| 565 |
|
| 566 |
🔗 <a href="https://huggingface.co/datasets/{dataset_name}" target="_blank">{dataset_name}</a>
|
|
|
|
| 567 |
"""
|
| 568 |
|
| 569 |
except Exception as e:
|
| 570 |
print(f"❌ ERREUR: {e}")
|
| 571 |
-
return f"❌ Erreur: {str(e)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# ==========================================
|
| 2 |
+
# game_engine.py - Avec métriques OCR et dataset optimisé
|
| 3 |
# ==========================================
|
| 4 |
|
| 5 |
"""
|
| 6 |
+
Moteur de jeu avec tracking complet des performances OCR
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import random
|
|
|
|
| 25 |
get_ocr_model_info
|
| 26 |
)
|
| 27 |
|
| 28 |
+
print("✅ Game Engine: Mode GPU avec métriques OCR")
|
| 29 |
|
| 30 |
# Imports dataset
|
| 31 |
try:
|
|
|
|
| 36 |
DATASET_AVAILABLE = False
|
| 37 |
print(f"⚠️ Modules dataset non disponibles: {e}")
|
| 38 |
|
| 39 |
+
# Dataset name avec nouvelle structure cohérente
|
| 40 |
+
DATASET_NAME = "hoololi/CalcTrainer_dataset"
|
| 41 |
|
| 42 |
# Configuration des difficultés par opération
|
| 43 |
DIFFICULTY_RANGES = {
|
|
|
|
| 47 |
"÷": {"Facile": (1, 10), "Difficile": (2, 12)}
|
| 48 |
}
|
| 49 |
|
| 50 |
+
def create_result_row_with_metrics(i: int, image: dict | np.ndarray | Image.Image, expected: int, operation_data: tuple[int, int, str, int]) -> dict:
|
| 51 |
+
"""Traite une image avec OCR et mesure les métriques"""
|
| 52 |
|
| 53 |
+
print(f"🔍 Traitement OCR image #{i+1}...")
|
| 54 |
|
| 55 |
+
# Mesurer temps OCR précisément
|
| 56 |
+
ocr_start_time = time.time()
|
| 57 |
+
recognized, optimized_image, dataset_image_data = recognize_number_fast_with_image(image, debug=False)
|
| 58 |
+
ocr_processing_time = time.time() - ocr_start_time
|
| 59 |
+
|
| 60 |
+
print(f" ⏱️ OCR temps: {ocr_processing_time:.3f}s → '{recognized}'")
|
| 61 |
|
| 62 |
try:
|
| 63 |
recognized_num = int(recognized) if recognized.isdigit() else 0
|
|
|
|
| 74 |
# Miniature pour affichage
|
| 75 |
image_thumbnail = create_thumbnail_fast(optimized_image, size=(50, 50))
|
| 76 |
|
| 77 |
+
# Libérer mémoire
|
| 78 |
if optimized_image and hasattr(optimized_image, 'close'):
|
| 79 |
try:
|
| 80 |
optimized_image.close()
|
|
|
|
| 92 |
<td style="text-align: center; padding: 8px; border: 1px solid #ddd;">{image_thumbnail}</td>
|
| 93 |
<td style="text-align: center; padding: 8px; border: 1px solid #ddd; font-weight: bold; color: #333;">{recognized_num}</td>
|
| 94 |
<td style="text-align: center; padding: 8px; border: 1px solid #ddd; color: #333;">{status_icon} {status_text}</td>
|
| 95 |
+
<td style="text-align: center; padding: 8px; border: 1px solid #ddd; color: #666; font-size: 0.9em;">{ocr_processing_time:.3f}s</td>
|
| 96 |
</tr>
|
| 97 |
""",
|
| 98 |
'is_correct': is_correct,
|
| 99 |
'recognized': recognized,
|
| 100 |
'recognized_num': recognized_num,
|
| 101 |
+
'dataset_image_data': dataset_image_data,
|
| 102 |
+
'ocr_processing_time': ocr_processing_time
|
| 103 |
}
|
| 104 |
|
| 105 |
|
| 106 |
class MathGame:
|
| 107 |
+
"""Moteur de jeu avec métriques OCR complètes"""
|
| 108 |
|
| 109 |
def __init__(self):
|
| 110 |
self.is_running = False
|
|
|
|
| 310 |
)
|
| 311 |
|
| 312 |
def end_game(self, final_image: dict | np.ndarray | Image.Image | None) -> tuple[str, Image.Image, str, str, gr.update, gr.update, str]:
|
| 313 |
+
"""Fin de jeu - OCR AVEC MÉTRIQUES COMPLÈTES"""
|
| 314 |
|
| 315 |
self.is_running = False
|
| 316 |
|
| 317 |
+
print("🏁 Fin de jeu - Début OCR avec métriques détaillées...")
|
| 318 |
|
| 319 |
# Ajouter la dernière image si présente
|
| 320 |
if final_image is not None:
|
|
|
|
| 328 |
a, op, b = int(parts[0]), parts[1], int(parts[2])
|
| 329 |
self.operations_history.append((a, b, op, self.correct_answer))
|
| 330 |
|
| 331 |
+
# OCR SÉQUENTIEL AVEC MÉTRIQUES
|
| 332 |
total_questions = len(self.user_images)
|
| 333 |
correct_answers = 0
|
| 334 |
table_rows_html = ""
|
|
|
|
| 336 |
session_timestamp = datetime.datetime.now().isoformat()
|
| 337 |
session_id = f"session_{int(datetime.datetime.now().timestamp())}_{str(uuid.uuid4())[:8]}"
|
| 338 |
|
| 339 |
+
# Métriques OCR globales
|
| 340 |
+
total_ocr_start_time = time.time()
|
| 341 |
+
ocr_times = []
|
| 342 |
+
|
| 343 |
self.session_data = []
|
| 344 |
images_saved = 0
|
| 345 |
|
| 346 |
+
print(f"🔄 Traitement OCR avec métriques de {total_questions} images...")
|
| 347 |
|
| 348 |
+
# Récupérer infos modèle OCR une seule fois
|
| 349 |
+
try:
|
| 350 |
+
ocr_model_info = get_ocr_model_info()
|
| 351 |
+
model_name = ocr_model_info.get("model_name", "microsoft/trocr-base-handwritten")
|
| 352 |
+
hardware = f"{ocr_model_info.get('device', 'Unknown')}-{ocr_model_info.get('gpu_name', 'Unknown')}"
|
| 353 |
+
except Exception as e:
|
| 354 |
+
print(f"❌ Erreur get_ocr_model_info: {e}")
|
| 355 |
+
model_name = "microsoft/trocr-base-handwritten"
|
| 356 |
+
hardware = "ZeroGPU-Unknown"
|
| 357 |
+
|
| 358 |
+
# Boucle OCR avec métriques
|
| 359 |
for i in range(total_questions):
|
| 360 |
print(f"📷 OCR image {i+1}/{total_questions}...")
|
| 361 |
|
| 362 |
+
# OCR avec métriques
|
| 363 |
+
row_data = create_result_row_with_metrics(
|
| 364 |
i,
|
| 365 |
self.user_images[i],
|
| 366 |
self.expected_answers[i],
|
|
|
|
| 368 |
)
|
| 369 |
|
| 370 |
table_rows_html += row_data['html_row']
|
| 371 |
+
ocr_times.append(row_data['ocr_processing_time'])
|
| 372 |
|
| 373 |
if row_data['is_correct']:
|
| 374 |
correct_answers += 1
|
| 375 |
|
| 376 |
+
# Structure dataset optimisée
|
| 377 |
a, b, operation, correct_result = self.operations_history[i] if i < len(self.operations_history) else (0, 0, "×", 0)
|
| 378 |
|
| 379 |
+
# ID unique pour cette question
|
| 380 |
+
question_id = f"{session_id}_q{i+1:02d}"
|
|
|
|
|
|
|
|
|
|
| 381 |
|
| 382 |
entry = {
|
| 383 |
+
# Identification
|
| 384 |
"session_id": session_id,
|
| 385 |
+
"question_id": question_id,
|
| 386 |
"timestamp": session_timestamp,
|
| 387 |
+
|
| 388 |
+
# Données mathématiques
|
|
|
|
|
|
|
| 389 |
"operand_a": a,
|
| 390 |
"operand_b": b,
|
| 391 |
"operation": operation,
|
| 392 |
"correct_answer": self.expected_answers[i],
|
| 393 |
+
"difficulty": self.difficulty,
|
| 394 |
+
|
| 395 |
+
# Données OCR
|
| 396 |
+
"ocr_prediction": row_data['recognized'],
|
| 397 |
+
"ocr_parsed_number": row_data['recognized_num'],
|
| 398 |
"is_correct": row_data['is_correct'],
|
| 399 |
+
|
| 400 |
+
# Métriques modèle OCR
|
| 401 |
+
"ocr_model_name": model_name,
|
| 402 |
+
"ocr_processing_time": row_data['ocr_processing_time'],
|
| 403 |
+
"ocr_confidence": 0.0, # Non disponible avec TrOCR actuel
|
| 404 |
+
|
| 405 |
+
# Métriques session (calculées à la fin)
|
| 406 |
+
"session_duration": self.duration,
|
| 407 |
+
"session_total_questions": total_questions,
|
| 408 |
+
|
| 409 |
+
# Métadonnées techniques
|
| 410 |
+
"app_version": "3.1_with_ocr_metrics",
|
| 411 |
+
"hardware": hardware
|
| 412 |
}
|
| 413 |
|
| 414 |
# Image PIL native pour dataset
|
| 415 |
if row_data['dataset_image_data']:
|
| 416 |
entry["handwriting_image"] = row_data['dataset_image_data']["handwriting_image"]
|
|
|
|
|
|
|
|
|
|
| 417 |
images_saved += 1
|
|
|
|
|
|
|
| 418 |
|
| 419 |
self.session_data.append(entry)
|
| 420 |
|
| 421 |
+
# Calculs finaux métriques
|
| 422 |
+
total_ocr_time = time.time() - total_ocr_start_time
|
| 423 |
+
avg_ocr_time = sum(ocr_times) / len(ocr_times) if ocr_times else 0.0
|
| 424 |
accuracy = (correct_answers / total_questions * 100) if total_questions > 0 else 0
|
| 425 |
|
| 426 |
+
# Ajouter métriques session à toutes les entrées
|
| 427 |
for entry in self.session_data:
|
| 428 |
entry["session_accuracy"] = accuracy
|
| 429 |
+
entry["session_total_ocr_time"] = total_ocr_time
|
| 430 |
+
entry["session_avg_ocr_time"] = avg_ocr_time
|
| 431 |
+
|
| 432 |
+
# Statistiques détaillées
|
| 433 |
+
print(f"📊 === MÉTRIQUES OCR COMPLÈTES ===")
|
| 434 |
+
print(f"📷 Images traitées: {total_questions}")
|
| 435 |
+
print(f"⏱️ Temps total OCR: {total_ocr_time:.2f}s")
|
| 436 |
+
print(f"⚡ Temps moyen/image: {avg_ocr_time:.3f}s")
|
| 437 |
+
print(f"🎯 Précision: {accuracy:.1f}%")
|
| 438 |
+
print(f"🤖 Modèle: {model_name}")
|
| 439 |
+
print(f"💻 Hardware: {hardware}")
|
| 440 |
+
|
| 441 |
+
# Statistiques par opération
|
| 442 |
+
operations_stats = {}
|
| 443 |
+
for entry in self.session_data:
|
| 444 |
+
op = entry['operation']
|
| 445 |
+
if op not in operations_stats:
|
| 446 |
+
operations_stats[op] = {'correct': 0, 'total': 0, 'times': []}
|
| 447 |
+
operations_stats[op]['total'] += 1
|
| 448 |
+
operations_stats[op]['times'].append(entry['ocr_processing_time'])
|
| 449 |
+
if entry['is_correct']:
|
| 450 |
+
operations_stats[op]['correct'] += 1
|
| 451 |
+
|
| 452 |
+
print(f"📈 Détail par opération:")
|
| 453 |
+
for op, stats in operations_stats.items():
|
| 454 |
+
op_accuracy = (stats['correct'] / stats['total'] * 100) if stats['total'] > 0 else 0
|
| 455 |
+
op_avg_time = sum(stats['times']) / len(stats['times']) if stats['times'] else 0
|
| 456 |
+
print(f" {op}: {op_accuracy:.1f}% précision, {op_avg_time:.3f}s/image ({stats['total']} images)")
|
| 457 |
|
| 458 |
# Nettoyage mémoire
|
| 459 |
for img in self.user_images:
|
|
|
|
| 465 |
|
| 466 |
cleanup_memory()
|
| 467 |
|
| 468 |
+
# HTML résultats avec colonne temps
|
|
|
|
|
|
|
| 469 |
table_html = f"""
|
| 470 |
<div style="overflow-x: auto; margin: 20px 0;">
|
| 471 |
<table style="width: 100%; border-collapse: collapse; border: 2px solid #4a90e2;">
|
|
|
|
| 479 |
<th style="padding: 8px;">Votre dessin</th>
|
| 480 |
<th style="padding: 8px;">OCR</th>
|
| 481 |
<th style="padding: 8px;">Statut</th>
|
| 482 |
+
<th style="padding: 8px;">Temps OCR</th>
|
| 483 |
</tr>
|
| 484 |
</thead>
|
| 485 |
<tbody>
|
|
|
|
| 496 |
if export_info["can_export"]:
|
| 497 |
export_section = f"""
|
| 498 |
<div style="margin-top: 20px; padding: 15px; background-color: #e8f5e8; border-radius: 8px;">
|
| 499 |
+
<h3 style="color: #2e7d32;">📊 Métriques de la série</h3>
|
| 500 |
<p style="color: #2e7d32;">
|
| 501 |
✅ {total_questions} réponses • 📊 {accuracy:.1f}% de précision<br>
|
| 502 |
🖼️ {images_saved} images sauvegardées<br>
|
| 503 |
+
⏱️ OCR: {total_ocr_time:.2f}s total, {avg_ocr_time:.3f}s/image<br>
|
| 504 |
+
🤖 Modèle: {model_name}<br>
|
| 505 |
+
💻 Hardware: {hardware}<br>
|
| 506 |
⚙️ Configuration: {config_display}
|
| 507 |
</p>
|
| 508 |
</div>
|
|
|
|
| 531 |
<div style="font-size: 2em; font-weight: bold;">{accuracy:.1f}%</div>
|
| 532 |
<div>Précision</div>
|
| 533 |
</div>
|
| 534 |
+
<div style="text-align: center; margin: 10px;">
|
| 535 |
+
<div style="font-size: 1.5em; font-weight: bold; color: #87CEEB;">{avg_ocr_time:.3f}s</div>
|
| 536 |
+
<div>Temps/image</div>
|
| 537 |
+
</div>
|
| 538 |
</div>
|
| 539 |
</div>
|
| 540 |
+
<h2 style="color: #4a90e2;">📊 Détail des Réponses avec Métriques OCR</h2>
|
| 541 |
{table_html}
|
| 542 |
{export_section}
|
| 543 |
</div>
|
|
|
|
| 554 |
)
|
| 555 |
|
| 556 |
|
| 557 |
+
def export_to_optimized_dataset(session_data: list[dict], dataset_name: str = None) -> str:
|
| 558 |
+
"""Export vers le dataset optimisé avec métriques OCR"""
|
| 559 |
if dataset_name is None:
|
| 560 |
dataset_name = DATASET_NAME
|
| 561 |
|
|
|
|
| 567 |
return "❌ Token HuggingFace manquant"
|
| 568 |
|
| 569 |
try:
|
| 570 |
+
print(f"\n🚀 === EXPORT DATASET OPTIMISÉ AVEC MÉTRIQUES ===")
|
| 571 |
print(f"📊 Dataset: {dataset_name}")
|
| 572 |
|
| 573 |
# Filtrer les entrées avec images
|
| 574 |
+
clean_entries = [entry for entry in session_data if entry.get('handwriting_image') is not None]
|
| 575 |
|
| 576 |
if len(clean_entries) == 0:
|
| 577 |
return "❌ Aucune entrée avec image à exporter"
|
| 578 |
|
| 579 |
+
# Statistiques pré-export
|
| 580 |
+
total_ocr_time = clean_entries[0].get('session_total_ocr_time', 0)
|
| 581 |
+
avg_ocr_time = clean_entries[0].get('session_avg_ocr_time', 0)
|
| 582 |
+
model_name = clean_entries[0].get('ocr_model_name', 'Unknown')
|
| 583 |
+
session_accuracy = clean_entries[0].get('session_accuracy', 0)
|
| 584 |
+
|
| 585 |
+
print(f"📈 Métriques session:")
|
| 586 |
+
print(f" - {len(clean_entries)} images")
|
| 587 |
+
print(f" - {session_accuracy:.1f}% précision")
|
| 588 |
+
print(f" - {total_ocr_time:.2f}s total OCR")
|
| 589 |
+
print(f" - {avg_ocr_time:.3f}s/image")
|
| 590 |
+
print(f" - Modèle: {model_name}")
|
| 591 |
+
|
| 592 |
# Charger dataset existant et combiner
|
| 593 |
try:
|
| 594 |
existing_dataset = load_dataset(dataset_name, split="train")
|
|
|
|
| 610 |
except Exception as e:
|
| 611 |
print(f"⚠️ Conversion image: {e}")
|
| 612 |
|
| 613 |
+
# Statistiques par opération pour commit message
|
| 614 |
operations_count = {}
|
| 615 |
for entry in clean_entries:
|
| 616 |
+
op = entry.get('operation', 'unknown')
|
| 617 |
operations_count[op] = operations_count.get(op, 0) + 1
|
| 618 |
|
| 619 |
operations_summary = ", ".join([f"{op}: {count}" for op, count in operations_count.items()])
|
| 620 |
|
| 621 |
+
# Message de commit enrichi avec métriques
|
| 622 |
+
commit_message = f"""Add {len(clean_entries)} samples with OCR metrics
|
| 623 |
+
|
| 624 |
+
Model: {model_name}
|
| 625 |
+
Accuracy: {session_accuracy:.1f}%
|
| 626 |
+
Avg OCR time: {avg_ocr_time:.3f}s/image
|
| 627 |
+
Operations: {operations_summary}
|
| 628 |
+
Hardware: {clean_entries[0].get('hardware', 'Unknown')}
|
| 629 |
+
"""
|
| 630 |
+
|
| 631 |
# Push vers HuggingFace
|
| 632 |
print(f"📤 Push vers {dataset_name}...")
|
| 633 |
clean_dataset.push_to_hub(
|
| 634 |
dataset_name,
|
| 635 |
private=False,
|
| 636 |
token=hf_token,
|
| 637 |
+
commit_message=commit_message
|
| 638 |
)
|
| 639 |
|
| 640 |
cleanup_memory()
|
| 641 |
|
| 642 |
+
return f"""### ✅ Session ajoutée au dataset optimisé !
|
| 643 |
|
| 644 |
📊 **Dataset:** {dataset_name}
|
| 645 |
🖼️ **Images:** {len(clean_entries)}
|
| 646 |
+
🎯 **Précision:** {session_accuracy:.1f}%
|
| 647 |
+
⏱️ **Performance:** {avg_ocr_time:.3f}s/image (total: {total_ocr_time:.1f}s)
|
| 648 |
+
🤖 **Modèle:** {model_name}
|
| 649 |
🔢 **Opérations:** {operations_summary}
|
| 650 |
+
📈 **Total dataset:** {len(clean_dataset)}
|
| 651 |
|
| 652 |
🔗 <a href="https://huggingface.co/datasets/{dataset_name}" target="_blank">{dataset_name}</a>
|
| 653 |
+
|
| 654 |
"""
|
| 655 |
|
| 656 |
except Exception as e:
|
| 657 |
print(f"❌ ERREUR: {e}")
|
| 658 |
+
return f"❌ Erreur: {str(e)}"
|
| 659 |
+
|
| 660 |
+
# Fonction de compatibilité pour ne pas casser l'interface
|
| 661 |
+
def export_to_clean_dataset(session_data: list[dict], dataset_name: str = None) -> str:
|
| 662 |
+
"""Wrapper pour compatibilité avec l'ancienne interface"""
|
| 663 |
+
return export_to_optimized_dataset(session_data, dataset_name)
|