Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +25 -9
updated_word.py
CHANGED
|
@@ -774,7 +774,7 @@ def handle_management_summary_fix(cell, flat_json):
|
|
| 774 |
return replacements_made
|
| 775 |
|
| 776 |
def handle_operator_declaration_fix(table, flat_json):
|
| 777 |
-
"""Handle small Operator/Auditor Declaration tables"""
|
| 778 |
replacements_made = 0
|
| 779 |
|
| 780 |
if len(table.rows) > 4: # Only process small tables
|
|
@@ -786,21 +786,27 @@ def handle_operator_declaration_fix(table, flat_json):
|
|
| 786 |
for cell in row.cells:
|
| 787 |
table_text += get_clean_text(cell).lower() + " "
|
| 788 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 789 |
# Check if this is a declaration table
|
| 790 |
if not ("print name" in table_text or "signature" in table_text or "date" in table_text):
|
| 791 |
return 0
|
| 792 |
|
| 793 |
-
print(f" 🎯 Processing declaration table")
|
| 794 |
|
| 795 |
-
# Process each cell with red text
|
| 796 |
for row_idx, row in enumerate(table.rows):
|
| 797 |
for cell_idx, cell in enumerate(row.cells):
|
| 798 |
if has_red_text(cell):
|
| 799 |
-
# Try
|
| 800 |
declaration_fields = [
|
| 801 |
-
"
|
| 802 |
-
"
|
| 803 |
-
"
|
|
|
|
| 804 |
]
|
| 805 |
|
| 806 |
replaced = False
|
|
@@ -834,9 +840,20 @@ def handle_operator_declaration_fix(table, flat_json):
|
|
| 834 |
return replacements_made
|
| 835 |
|
| 836 |
def handle_print_accreditation_section(table, flat_json):
|
| 837 |
-
"""Handle Print Accreditation section"""
|
| 838 |
replacements_made = 0
|
| 839 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
print(f" 📋 Processing Print Accreditation section")
|
| 841 |
|
| 842 |
for row_idx, row in enumerate(table.rows):
|
|
@@ -845,7 +862,6 @@ def handle_print_accreditation_section(table, flat_json):
|
|
| 845 |
# Try print accreditation fields
|
| 846 |
accreditation_fields = [
|
| 847 |
"(print accreditation name)",
|
| 848 |
-
"Print Name",
|
| 849 |
"Operator name (Legal entity)"
|
| 850 |
]
|
| 851 |
|
|
|
|
| 774 |
return replacements_made
|
| 775 |
|
| 776 |
def handle_operator_declaration_fix(table, flat_json):
|
| 777 |
+
"""Handle small Operator/Auditor Declaration tables - SKIP if already processed"""
|
| 778 |
replacements_made = 0
|
| 779 |
|
| 780 |
if len(table.rows) > 4: # Only process small tables
|
|
|
|
| 786 |
for cell in row.cells:
|
| 787 |
table_text += get_clean_text(cell).lower() + " "
|
| 788 |
|
| 789 |
+
# SKIP if this is an Operator Declaration table (already handled by fix_operator_declaration_empty_values)
|
| 790 |
+
if "print name" in table_text and "position title" in table_text:
|
| 791 |
+
print(f" ⏭️ Skipping - Operator Declaration table already processed")
|
| 792 |
+
return 0
|
| 793 |
+
|
| 794 |
# Check if this is a declaration table
|
| 795 |
if not ("print name" in table_text or "signature" in table_text or "date" in table_text):
|
| 796 |
return 0
|
| 797 |
|
| 798 |
+
print(f" 🎯 Processing other declaration table")
|
| 799 |
|
| 800 |
+
# Process each cell with red text (for auditor declarations, etc.)
|
| 801 |
for row_idx, row in enumerate(table.rows):
|
| 802 |
for cell_idx, cell in enumerate(row.cells):
|
| 803 |
if has_red_text(cell):
|
| 804 |
+
# Try auditor-specific fields first
|
| 805 |
declaration_fields = [
|
| 806 |
+
"NHVAS Approved Auditor Declaration.Print Name",
|
| 807 |
+
"Auditor name",
|
| 808 |
+
"Signature",
|
| 809 |
+
"Date"
|
| 810 |
]
|
| 811 |
|
| 812 |
replaced = False
|
|
|
|
| 840 |
return replacements_made
|
| 841 |
|
| 842 |
def handle_print_accreditation_section(table, flat_json):
|
| 843 |
+
"""Handle Print Accreditation section - SKIP Operator Declaration tables"""
|
| 844 |
replacements_made = 0
|
| 845 |
|
| 846 |
+
# Get table context to check what type of table this is
|
| 847 |
+
table_context = ""
|
| 848 |
+
for row in table.rows:
|
| 849 |
+
for cell in row.cells:
|
| 850 |
+
table_context += get_clean_text(cell).lower() + " "
|
| 851 |
+
|
| 852 |
+
# SKIP if this is an Operator Declaration table
|
| 853 |
+
if "operator declaration" in table_context or ("print name" in table_context and "position title" in table_context):
|
| 854 |
+
print(f" ⏭️ Skipping Print Accreditation - this is an Operator Declaration table")
|
| 855 |
+
return 0
|
| 856 |
+
|
| 857 |
print(f" 📋 Processing Print Accreditation section")
|
| 858 |
|
| 859 |
for row_idx, row in enumerate(table.rows):
|
|
|
|
| 862 |
# Try print accreditation fields
|
| 863 |
accreditation_fields = [
|
| 864 |
"(print accreditation name)",
|
|
|
|
| 865 |
"Operator name (Legal entity)"
|
| 866 |
]
|
| 867 |
|