Commit 
							
							·
						
						8ed167c
	
1
								Parent(s):
							
							a8989f9
								
timeout warning
Browse files- app.py +51 -25
- validation.py +1 -0
    	
        app.py
    CHANGED
    
    | @@ -1,3 +1,4 @@ | |
|  | |
| 1 | 
             
            import requests
         | 
| 2 | 
             
            import os
         | 
| 3 |  | 
| @@ -16,41 +17,69 @@ import json | |
| 16 | 
             
            import time
         | 
| 17 | 
             
            import traceback
         | 
| 18 | 
             
            from validation import validate_json, validate_croissant, validate_records, generate_validation_report
         | 
|  | |
| 19 |  | 
| 20 | 
             
            def process_file(file):
         | 
| 21 | 
             
                results = []
         | 
| 22 | 
             
                json_data = None
         | 
|  | |
|  | |
| 23 |  | 
| 24 | 
             
                filename = file.name.split("/")[-1]
         | 
| 25 |  | 
| 26 | 
            -
                #  | 
| 27 | 
             
                json_valid, json_message, json_data = validate_json(file.name)
         | 
| 28 | 
             
                json_message = json_message.replace("\n✓\n", "\n")
         | 
| 29 | 
             
                results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
         | 
| 30 | 
            -
             | 
| 31 | 
             
                if not json_valid:
         | 
| 32 | 
            -
                    return results, None
         | 
| 33 |  | 
| 34 | 
            -
                #  | 
| 35 | 
             
                croissant_valid, croissant_message = validate_croissant(json_data)
         | 
| 36 | 
             
                croissant_message = croissant_message.replace("\n✓\n", "\n")
         | 
| 37 | 
             
                results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
         | 
| 38 | 
            -
             | 
| 39 | 
             
                if not croissant_valid:
         | 
| 40 | 
            -
                    return results, None
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 41 |  | 
| 42 | 
            -
                # Check 3: Records validation (with timeout-safe and error-specific logic)
         | 
| 43 | 
            -
                records_valid, records_message, records_status = validate_records(json_data)
         | 
| 44 | 
             
                records_message = records_message.replace("\n✓\n", "\n")
         | 
| 45 | 
             
                results.append(("Records Generation Test", records_valid, records_message, records_status))
         | 
| 46 |  | 
| 47 | 
            -
                # Generate final report
         | 
| 48 | 
             
                report = generate_validation_report(filename, json_data, results)
         | 
| 49 | 
            -
             | 
| 50 | 
            -
                return results, report
         | 
| 51 |  | 
| 52 | 
             
            def create_ui():
         | 
| 53 | 
             
                with gr.Blocks(theme=gr.themes.Soft()) as app:
         | 
|  | |
| 54 | 
             
                    gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
         | 
| 55 | 
             
                    gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
         | 
| 56 | 
             
                    gr.Markdown("""
         | 
| @@ -481,14 +510,12 @@ def create_ui(): | |
| 481 | 
             
                                gr.update(visible=False),  # validation_results
         | 
| 482 | 
             
                                gr.update(visible=False),  # validation_progress
         | 
| 483 | 
             
                                gr.update(visible=False),  # report_group
         | 
| 484 | 
            -
                                None, | 
| 485 | 
            -
                                 | 
| 486 | 
             
                            ]
         | 
| 487 |  | 
| 488 | 
            -
                         | 
| 489 | 
            -
                        results, report = process_file(file)
         | 
| 490 |  | 
| 491 | 
            -
                        # Extract dataset name from the JSON for the report filename
         | 
| 492 | 
             
                        try:
         | 
| 493 | 
             
                            with open(file.name, 'r') as f:
         | 
| 494 | 
             
                                json_data = json.load(f)
         | 
| @@ -496,19 +523,18 @@ def create_ui(): | |
| 496 | 
             
                        except:
         | 
| 497 | 
             
                            dataset_name = 'unnamed'
         | 
| 498 |  | 
| 499 | 
            -
                        # Save report to file with new naming convention
         | 
| 500 | 
             
                        report_filename = f"report_croissant-validation_{dataset_name}.md"
         | 
| 501 | 
             
                        if report:
         | 
| 502 | 
             
                            with open(report_filename, "w") as f:
         | 
| 503 | 
             
                                f.write(report)
         | 
| 504 |  | 
| 505 | 
            -
                        # Return final state
         | 
| 506 | 
             
                        return [
         | 
| 507 | 
            -
                            build_results_html(results), | 
| 508 | 
            -
                            gr.update(visible=False), | 
| 509 | 
            -
                            gr.update(visible=True) if report else gr.update(visible=False), | 
| 510 | 
            -
                            report if report else None, | 
| 511 | 
            -
                            report_filename if report else None | 
|  | |
| 512 | 
             
                        ]
         | 
| 513 |  | 
| 514 | 
             
                    # Connect UI events to functions with updated outputs
         | 
| @@ -538,7 +564,7 @@ def create_ui(): | |
| 538 | 
             
                            None,  # report_text
         | 
| 539 | 
             
                            None   # report_md
         | 
| 540 | 
             
                        ]
         | 
| 541 | 
            -
             | 
| 542 | 
             
                    validate_btn.click(
         | 
| 543 | 
             
                        fn=show_progress,
         | 
| 544 | 
             
                        inputs=None,
         | 
| @@ -547,7 +573,7 @@ def create_ui(): | |
| 547 | 
             
                    ).then(
         | 
| 548 | 
             
                        fn=on_validate,
         | 
| 549 | 
             
                        inputs=file_input,
         | 
| 550 | 
            -
                        outputs=[validation_results, validation_progress, report_group, report_text, report_md]
         | 
| 551 | 
             
                    )
         | 
| 552 |  | 
| 553 | 
             
                    fetch_btn.click(
         | 
|  | |
| 1 | 
            +
            import mlcroissant._src.operation_graph.operations.download as dl_mod
         | 
| 2 | 
             
            import requests
         | 
| 3 | 
             
            import os
         | 
| 4 |  | 
|  | |
| 17 | 
             
            import time
         | 
| 18 | 
             
            import traceback
         | 
| 19 | 
             
            from validation import validate_json, validate_croissant, validate_records, generate_validation_report
         | 
| 20 | 
            +
            import threading
         | 
| 21 |  | 
| 22 | 
             
            def process_file(file):
         | 
| 23 | 
             
                results = []
         | 
| 24 | 
             
                json_data = None
         | 
| 25 | 
            +
                timer = None
         | 
| 26 | 
            +
                warning_text = None  # to be set if timer fires
         | 
| 27 |  | 
| 28 | 
             
                filename = file.name.split("/")[-1]
         | 
| 29 |  | 
| 30 | 
            +
                # JSON validation
         | 
| 31 | 
             
                json_valid, json_message, json_data = validate_json(file.name)
         | 
| 32 | 
             
                json_message = json_message.replace("\n✓\n", "\n")
         | 
| 33 | 
             
                results.append(("JSON Format Validation", json_valid, json_message, "pass" if json_valid else "error"))
         | 
|  | |
| 34 | 
             
                if not json_valid:
         | 
| 35 | 
            +
                    return results, None, None
         | 
| 36 |  | 
| 37 | 
            +
                # Schema validation
         | 
| 38 | 
             
                croissant_valid, croissant_message = validate_croissant(json_data)
         | 
| 39 | 
             
                croissant_message = croissant_message.replace("\n✓\n", "\n")
         | 
| 40 | 
             
                results.append(("Croissant Schema Validation", croissant_valid, croissant_message, "pass" if croissant_valid else "error"))
         | 
|  | |
| 41 | 
             
                if not croissant_valid:
         | 
| 42 | 
            +
                    return results, None, None
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                # Start timer before records validation
         | 
| 45 | 
            +
                fired = threading.Event()
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                def trigger_warning():
         | 
| 48 | 
            +
                    nonlocal warning_text
         | 
| 49 | 
            +
                    warning_text = """
         | 
| 50 | 
            +
                    ⚠️ <b>This is taking longer than usual</b>. It is possible that this checker is currently being used by a lot of people
         | 
| 51 | 
            +
                    at the same time, which may trigger rate limiting by the platform hosting your data. The app will then try again and may get into a very long loop.<br><br>
         | 
| 52 | 
            +
                    In that case, we recommend using any of the following options:
         | 
| 53 | 
            +
                    <ul style="text-align:left; margin: 0 auto; display:inline-block;">
         | 
| 54 | 
            +
                        <li>🔁 Duplicate this Space on Hugging Face</li>
         | 
| 55 | 
            +
                        <li>💻 Run it locally (GitHub or Docker)</li>
         | 
| 56 | 
            +
                        <li>🥐 Use <code>mlcroissant</code> from <a href="https://github.com/mlcommons/croissant" target="_blank">GitHub</a></li>
         | 
| 57 | 
            +
                    </ul>
         | 
| 58 | 
            +
                    """
         | 
| 59 | 
            +
                    fired.set()
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                timer = threading.Timer(0.1, trigger_warning)
         | 
| 62 | 
            +
                timer.start()
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                try:
         | 
| 65 | 
            +
                    records_valid, records_message, records_status = validate_records(json_data)
         | 
| 66 | 
            +
                finally:
         | 
| 67 | 
            +
                    timer.cancel()
         | 
| 68 | 
            +
             | 
| 69 | 
            +
                if fired.is_set():
         | 
| 70 | 
            +
                    warning_html_update = gr.update(value=warning_text, visible=True)
         | 
| 71 | 
            +
                else:
         | 
| 72 | 
            +
                    warning_html_update = gr.update(visible=False)
         | 
| 73 |  | 
|  | |
|  | |
| 74 | 
             
                records_message = records_message.replace("\n✓\n", "\n")
         | 
| 75 | 
             
                results.append(("Records Generation Test", records_valid, records_message, records_status))
         | 
| 76 |  | 
|  | |
| 77 | 
             
                report = generate_validation_report(filename, json_data, results)
         | 
| 78 | 
            +
                return results, report, warning_html_update
         | 
|  | |
| 79 |  | 
| 80 | 
             
            def create_ui():
         | 
| 81 | 
             
                with gr.Blocks(theme=gr.themes.Soft()) as app:
         | 
| 82 | 
            +
                    delayed_warning_html = gr.HTML("", visible=False)
         | 
| 83 | 
             
                    gr.HTML("<p align='center'><img src='https://upload.wikimedia.org/wikipedia/en/0/08/Logo_for_Conference_on_Neural_Information_Processing_Systems.svg' alt='NeurIPS Logo' width='400'/></p>")
         | 
| 84 | 
             
                    gr.Markdown("# 🥐 Croissant Validator for NeurIPS")
         | 
| 85 | 
             
                    gr.Markdown("""
         | 
|  | |
| 510 | 
             
                                gr.update(visible=False),  # validation_results
         | 
| 511 | 
             
                                gr.update(visible=False),  # validation_progress
         | 
| 512 | 
             
                                gr.update(visible=False),  # report_group
         | 
| 513 | 
            +
                                None, None,                # report_text, report_md
         | 
| 514 | 
            +
                                gr.update(visible=False)   # delayed_warning_html
         | 
| 515 | 
             
                            ]
         | 
| 516 |  | 
| 517 | 
            +
                        results, report, warning_html_update = process_file(file)
         | 
|  | |
| 518 |  | 
|  | |
| 519 | 
             
                        try:
         | 
| 520 | 
             
                            with open(file.name, 'r') as f:
         | 
| 521 | 
             
                                json_data = json.load(f)
         | 
|  | |
| 523 | 
             
                        except:
         | 
| 524 | 
             
                            dataset_name = 'unnamed'
         | 
| 525 |  | 
|  | |
| 526 | 
             
                        report_filename = f"report_croissant-validation_{dataset_name}.md"
         | 
| 527 | 
             
                        if report:
         | 
| 528 | 
             
                            with open(report_filename, "w") as f:
         | 
| 529 | 
             
                                f.write(report)
         | 
| 530 |  | 
|  | |
| 531 | 
             
                        return [
         | 
| 532 | 
            +
                            build_results_html(results),
         | 
| 533 | 
            +
                            gr.update(visible=False),
         | 
| 534 | 
            +
                            gr.update(visible=True) if report else gr.update(visible=False),
         | 
| 535 | 
            +
                            report if report else None,
         | 
| 536 | 
            +
                            report_filename if report else None,
         | 
| 537 | 
            +
                            warning_html_update or gr.update(visible=False)
         | 
| 538 | 
             
                        ]
         | 
| 539 |  | 
| 540 | 
             
                    # Connect UI events to functions with updated outputs
         | 
|  | |
| 564 | 
             
                            None,  # report_text
         | 
| 565 | 
             
                            None   # report_md
         | 
| 566 | 
             
                        ]
         | 
| 567 | 
            +
                    
         | 
| 568 | 
             
                    validate_btn.click(
         | 
| 569 | 
             
                        fn=show_progress,
         | 
| 570 | 
             
                        inputs=None,
         | 
|  | |
| 573 | 
             
                    ).then(
         | 
| 574 | 
             
                        fn=on_validate,
         | 
| 575 | 
             
                        inputs=file_input,
         | 
| 576 | 
            +
                        outputs=[validation_results, validation_progress, report_group, report_text, report_md, delayed_warning_html]
         | 
| 577 | 
             
                    )
         | 
| 578 |  | 
| 579 | 
             
                    fetch_btn.click(
         | 
    	
        validation.py
    CHANGED
    
    | @@ -1,3 +1,4 @@ | |
|  | |
| 1 | 
             
            import requests
         | 
| 2 | 
             
            import os
         | 
| 3 |  | 
|  | |
| 1 | 
            +
            import mlcroissant._src.operation_graph.operations.download as dl_mod
         | 
| 2 | 
             
            import requests
         | 
| 3 | 
             
            import os
         | 
| 4 |  | 
