Spaces:
Sleeping
Sleeping
Commit
·
8ed167c
1
Parent(s):
a8989f9
timeout warning
Browse files- app.py +51 -25
- validation.py +1 -0
app.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
|
|
@@ -16,41 +17,69 @@ import json
|
|
| 16 |
import time
|
| 17 |
import traceback
|
| 18 |
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
|
|
|
|
| 19 |
|
| 20 |
def process_file(file):
|
| 21 |
results = []
|
| 22 |
json_data = None
|
|
|
|
|
|
|
| 23 |
|
| 24 |
filename = file.name.split("/")[-1]
|
| 25 |
|
| 26 |
-
#
|
| 27 |
json_valid, json_message, json_data = validate_json(file.name)
|
| 28 |
json_message = json_message.replace("\n✓\n", "\n")
|
| 29 |
results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
|
| 30 |
-
|
| 31 |
if not json_valid:
|
| 32 |
-
return results, None
|
| 33 |
|
| 34 |
-
#
|
| 35 |
croissant_valid, croissant_message = validate_croissant(json_data)
|
| 36 |
croissant_message = croissant_message.replace("\n✓\n", "\n")
|
| 37 |
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
|
| 38 |
-
|
| 39 |
if not croissant_valid:
|
| 40 |
-
return results, None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
-
# Check 3: Records validation (with timeout-safe and error-specific logic)
|
| 43 |
-
records_valid, records_message, records_status = validate_records(json_data)
|
| 44 |
records_message = records_message.replace("\n✓\n", "\n")
|
| 45 |
results.append(("Records Generation Test", records_valid, records_message, records_status))
|
| 46 |
|
| 47 |
-
# Generate final report
|
| 48 |
report = generate_validation_report(filename, json_data, results)
|
| 49 |
-
|
| 50 |
-
return results, report
|
| 51 |
|
| 52 |
def create_ui():
|
| 53 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
|
|
|
| 54 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
| 55 |
gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
|
| 56 |
gr.Markdown("""
|
|
@@ -481,14 +510,12 @@ def create_ui():
|
|
| 481 |
gr.update(visible=False), # validation_results
|
| 482 |
gr.update(visible=False), # validation_progress
|
| 483 |
gr.update(visible=False), # report_group
|
| 484 |
-
None,
|
| 485 |
-
|
| 486 |
]
|
| 487 |
|
| 488 |
-
|
| 489 |
-
results, report = process_file(file)
|
| 490 |
|
| 491 |
-
# Extract dataset name from the JSON for the report filename
|
| 492 |
try:
|
| 493 |
with open(file.name, 'r') as f:
|
| 494 |
json_data = json.load(f)
|
|
@@ -496,19 +523,18 @@ def create_ui():
|
|
| 496 |
except:
|
| 497 |
dataset_name = 'unnamed'
|
| 498 |
|
| 499 |
-
# Save report to file with new naming convention
|
| 500 |
report_filename = f"report_croissant-validation_{dataset_name}.md"
|
| 501 |
if report:
|
| 502 |
with open(report_filename, "w") as f:
|
| 503 |
f.write(report)
|
| 504 |
|
| 505 |
-
# Return final state
|
| 506 |
return [
|
| 507 |
-
build_results_html(results),
|
| 508 |
-
gr.update(visible=False),
|
| 509 |
-
gr.update(visible=True) if report else gr.update(visible=False),
|
| 510 |
-
report if report else None,
|
| 511 |
-
report_filename if report else None
|
|
|
|
| 512 |
]
|
| 513 |
|
| 514 |
# Connect UI events to functions with updated outputs
|
|
@@ -538,7 +564,7 @@ def create_ui():
|
|
| 538 |
None, # report_text
|
| 539 |
None # report_md
|
| 540 |
]
|
| 541 |
-
|
| 542 |
validate_btn.click(
|
| 543 |
fn=show_progress,
|
| 544 |
inputs=None,
|
|
@@ -547,7 +573,7 @@ def create_ui():
|
|
| 547 |
).then(
|
| 548 |
fn=on_validate,
|
| 549 |
inputs=file_input,
|
| 550 |
-
outputs=[validation_results, validation_progress, report_group, report_text, report_md]
|
| 551 |
)
|
| 552 |
|
| 553 |
fetch_btn.click(
|
|
|
|
| 1 |
+
import mlcroissant._src.operation_graph.operations.download as dl_mod
|
| 2 |
import requests
|
| 3 |
import os
|
| 4 |
|
|
|
|
| 17 |
import time
|
| 18 |
import traceback
|
| 19 |
from validation import validate_json, validate_croissant, validate_records, generate_validation_report
|
| 20 |
+
import threading
|
| 21 |
|
| 22 |
def process_file(file):
|
| 23 |
results = []
|
| 24 |
json_data = None
|
| 25 |
+
timer = None
|
| 26 |
+
warning_text = None # to be set if timer fires
|
| 27 |
|
| 28 |
filename = file.name.split("/")[-1]
|
| 29 |
|
| 30 |
+
# JSON validation
|
| 31 |
json_valid, json_message, json_data = validate_json(file.name)
|
| 32 |
json_message = json_message.replace("\n✓\n", "\n")
|
| 33 |
results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
|
|
|
|
| 34 |
if not json_valid:
|
| 35 |
+
return results, None, None
|
| 36 |
|
| 37 |
+
# Schema validation
|
| 38 |
croissant_valid, croissant_message = validate_croissant(json_data)
|
| 39 |
croissant_message = croissant_message.replace("\n✓\n", "\n")
|
| 40 |
results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
|
|
|
|
| 41 |
if not croissant_valid:
|
| 42 |
+
return results, None, None
|
| 43 |
+
|
| 44 |
+
# Start timer before records validation
|
| 45 |
+
fired = threading.Event()
|
| 46 |
+
|
| 47 |
+
def trigger_warning():
|
| 48 |
+
nonlocal warning_text
|
| 49 |
+
warning_text = """
|
| 50 |
+
⚠️ <b>This is taking longer than usual</b>. It is possible that this checker is currently being used by a lot of people
|
| 51 |
+
at the same time, which may trigger rate limiting by the platform hosting your data. The app will then try again and may get into a very long loop.<br><br>
|
| 52 |
+
In that case, we recommend using any of the following options:
|
| 53 |
+
<ul style="text-align:left; margin: 0 auto; display:inline-block;">
|
| 54 |
+
<li>🔁 Duplicate this Space on Hugging Face</li>
|
| 55 |
+
<li>💻 Run it locally (GitHub or Docker)</li>
|
| 56 |
+
<li>🥐 Use <code>mlcroissant</code> from <a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a></li>
|
| 57 |
+
</ul>
|
| 58 |
+
"""
|
| 59 |
+
fired.set()
|
| 60 |
+
|
| 61 |
+
timer = threading.Timer(0.1, trigger_warning)
|
| 62 |
+
timer.start()
|
| 63 |
+
|
| 64 |
+
try:
|
| 65 |
+
records_valid, records_message, records_status = validate_records(json_data)
|
| 66 |
+
finally:
|
| 67 |
+
timer.cancel()
|
| 68 |
+
|
| 69 |
+
if fired.is_set():
|
| 70 |
+
warning_html_update = gr.update(value=warning_text, visible=True)
|
| 71 |
+
else:
|
| 72 |
+
warning_html_update = gr.update(visible=False)
|
| 73 |
|
|
|
|
|
|
|
| 74 |
records_message = records_message.replace("\n✓\n", "\n")
|
| 75 |
results.append(("Records Generation Test", records_valid, records_message, records_status))
|
| 76 |
|
|
|
|
| 77 |
report = generate_validation_report(filename, json_data, results)
|
| 78 |
+
return results, report, warning_html_update
|
|
|
|
| 79 |
|
| 80 |
def create_ui():
|
| 81 |
with gr.Blocks(theme=gr.themes.Soft()) as app:
|
| 82 |
+
delayed_warning_html = gr.HTML("", visible=False)
|
| 83 |
gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
|
| 84 |
gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
|
| 85 |
gr.Markdown("""
|
|
|
|
| 510 |
gr.update(visible=False), # validation_results
|
| 511 |
gr.update(visible=False), # validation_progress
|
| 512 |
gr.update(visible=False), # report_group
|
| 513 |
+
None, None, # report_text, report_md
|
| 514 |
+
gr.update(visible=False) # delayed_warning_html
|
| 515 |
]
|
| 516 |
|
| 517 |
+
results, report, warning_html_update = process_file(file)
|
|
|
|
| 518 |
|
|
|
|
| 519 |
try:
|
| 520 |
with open(file.name, 'r') as f:
|
| 521 |
json_data = json.load(f)
|
|
|
|
| 523 |
except:
|
| 524 |
dataset_name = 'unnamed'
|
| 525 |
|
|
|
|
| 526 |
report_filename = f"report_croissant-validation_{dataset_name}.md"
|
| 527 |
if report:
|
| 528 |
with open(report_filename, "w") as f:
|
| 529 |
f.write(report)
|
| 530 |
|
|
|
|
| 531 |
return [
|
| 532 |
+
build_results_html(results),
|
| 533 |
+
gr.update(visible=False),
|
| 534 |
+
gr.update(visible=True) if report else gr.update(visible=False),
|
| 535 |
+
report if report else None,
|
| 536 |
+
report_filename if report else None,
|
| 537 |
+
warning_html_update or gr.update(visible=False)
|
| 538 |
]
|
| 539 |
|
| 540 |
# Connect UI events to functions with updated outputs
|
|
|
|
| 564 |
None, # report_text
|
| 565 |
None # report_md
|
| 566 |
]
|
| 567 |
+
|
| 568 |
validate_btn.click(
|
| 569 |
fn=show_progress,
|
| 570 |
inputs=None,
|
|
|
|
| 573 |
).then(
|
| 574 |
fn=on_validate,
|
| 575 |
inputs=file_input,
|
| 576 |
+
outputs=[validation_results, validation_progress, report_group, report_text, report_md, delayed_warning_html]
|
| 577 |
)
|
| 578 |
|
| 579 |
fetch_btn.click(
|
validation.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
|
|
|
|
| 1 |
+
import mlcroissant._src.operation_graph.operations.download as dl_mod
|
| 2 |
import requests
|
| 3 |
import os
|
| 4 |
|