Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +156 -2
updated_word.py
CHANGED
|
@@ -673,8 +673,150 @@ def process_single_column_sections(cell, field_name, flat_json):
|
|
| 673 |
return cell_replacements
|
| 674 |
return 0
|
| 675 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 676 |
def process_tables(document, flat_json):
|
| 677 |
-
"""Your original function with
|
| 678 |
replacements_made = 0
|
| 679 |
|
| 680 |
for table_idx, table in enumerate(document.tables):
|
|
@@ -695,6 +837,13 @@ def process_tables(document, flat_json):
|
|
| 695 |
replacements_made += vehicle_replacements
|
| 696 |
continue
|
| 697 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
# Enhanced print accreditation detection
|
| 699 |
print_accreditation_indicators = ["print name", "position title"]
|
| 700 |
indicator_count = sum(1 for indicator in print_accreditation_indicators if indicator in table_text)
|
|
@@ -779,8 +928,13 @@ def process_tables(document, flat_json):
|
|
| 779 |
if cell_replacements == 0:
|
| 780 |
surgical_fix = handle_nature_business_multiline_fix(cell, flat_json)
|
| 781 |
replacements_made += surgical_fix
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 782 |
|
| 783 |
-
# π― SURGICAL FIX
|
| 784 |
print(f"\nπ― SURGICAL FIX: Checking for Operator/Auditor Declaration tables...")
|
| 785 |
for table in document.tables[-3:]: # Only check last 3 tables
|
| 786 |
if len(table.rows) <= 4: # Only small tables
|
|
|
|
| 673 |
return cell_replacements
|
| 674 |
return 0
|
| 675 |
|
| 676 |
+
# π― FINAL FIX 1: Add this function to handle Attendance List (unchanged)
|
| 677 |
+
def handle_attendance_list_fix(table, flat_json):
|
| 678 |
+
"""FINAL FIX: Handle Attendance List table specifically"""
|
| 679 |
+
replacements_made = 0
|
| 680 |
+
|
| 681 |
+
# Look for attendance list table
|
| 682 |
+
for row_idx, row in enumerate(table.rows):
|
| 683 |
+
if len(row.cells) >= 1:
|
| 684 |
+
cell_text = get_clean_text(row.cells[0]).lower()
|
| 685 |
+
|
| 686 |
+
# Check if this is the attendance list header
|
| 687 |
+
if "attendance list" in cell_text and "names and position titles" in cell_text:
|
| 688 |
+
print(f" π― FINAL FIX: Attendance List table detected at row {row_idx + 1}")
|
| 689 |
+
|
| 690 |
+
# The content should be in the same cell, look for red text
|
| 691 |
+
if has_red_text(row.cells[0]):
|
| 692 |
+
# Try to find attendance list data
|
| 693 |
+
attendance_value = None
|
| 694 |
+
for field_attempt in ["Attendance List (Names and Position Titles)", "attendance list", "Attendance List"]:
|
| 695 |
+
attendance_value = find_matching_json_value(field_attempt, flat_json)
|
| 696 |
+
if attendance_value is not None:
|
| 697 |
+
break
|
| 698 |
+
|
| 699 |
+
if attendance_value is not None:
|
| 700 |
+
attendance_text = get_value_as_string(attendance_value)
|
| 701 |
+
# Handle list format for attendance
|
| 702 |
+
if isinstance(attendance_value, list):
|
| 703 |
+
attendance_text = "\n".join(str(item) for item in attendance_value)
|
| 704 |
+
|
| 705 |
+
cell_replacements = replace_red_text_in_cell(row.cells[0], attendance_text)
|
| 706 |
+
replacements_made += cell_replacements
|
| 707 |
+
print(f" β
Fixed Attendance List: '{attendance_text[:50]}...'")
|
| 708 |
+
|
| 709 |
+
break # Found the table, stop looking
|
| 710 |
+
|
| 711 |
+
return replacements_made
|
| 712 |
+
|
| 713 |
+
# π― FINAL FIX 2: Generic Management Summary fix for ALL types (Mass, Fatigue, Maintenance)
|
| 714 |
+
def handle_management_summary_fix(cell, flat_json):
|
| 715 |
+
"""FINAL FIX: Handle ANY Management Summary section (Mass/Fatigue/Maintenance) - RED TEXT ONLY"""
|
| 716 |
+
if not has_red_text(cell):
|
| 717 |
+
return 0
|
| 718 |
+
|
| 719 |
+
# Check if this cell contains any Management Summary
|
| 720 |
+
cell_text = get_clean_text(cell).lower()
|
| 721 |
+
|
| 722 |
+
# Detect which type of management summary this is
|
| 723 |
+
management_type = None
|
| 724 |
+
if "mass management" in cell_text and "summary" in cell_text:
|
| 725 |
+
management_type = "Mass Management"
|
| 726 |
+
elif "fatigue management" in cell_text and "summary" in cell_text:
|
| 727 |
+
management_type = "Fatigue Management"
|
| 728 |
+
elif "maintenance management" in cell_text and "summary" in cell_text:
|
| 729 |
+
management_type = "Maintenance Management"
|
| 730 |
+
|
| 731 |
+
if not management_type:
|
| 732 |
+
return 0
|
| 733 |
+
|
| 734 |
+
print(f" π― FINAL FIX: {management_type} Summary processing - RED TEXT ONLY")
|
| 735 |
+
|
| 736 |
+
# ONLY process red text segments, not the entire cell text
|
| 737 |
+
red_segments = extract_red_text_segments(cell)
|
| 738 |
+
replacements_made = 0
|
| 739 |
+
|
| 740 |
+
# Try to replace ONLY the red text segments
|
| 741 |
+
for segment in red_segments:
|
| 742 |
+
segment_text = segment['text'].strip()
|
| 743 |
+
if not segment_text:
|
| 744 |
+
continue
|
| 745 |
+
|
| 746 |
+
print(f" π Processing red text segment: '{segment_text[:50]}...'")
|
| 747 |
+
|
| 748 |
+
# Try multiple variations based on the management type
|
| 749 |
+
summary_value = None
|
| 750 |
+
field_attempts = [
|
| 751 |
+
f"{management_type} Summary of Audit findings",
|
| 752 |
+
f"{management_type} Summary",
|
| 753 |
+
f"{management_type.lower()} summary",
|
| 754 |
+
management_type.lower(),
|
| 755 |
+
segment_text # Also try the exact red text
|
| 756 |
+
]
|
| 757 |
+
|
| 758 |
+
# Also try variations without "Management"
|
| 759 |
+
base_type = management_type.replace(" Management", "")
|
| 760 |
+
field_attempts.extend([
|
| 761 |
+
f"{base_type} Management Summary of Audit findings",
|
| 762 |
+
f"{base_type} Summary of Audit findings",
|
| 763 |
+
f"{base_type} Summary",
|
| 764 |
+
f"{base_type.lower()} summary"
|
| 765 |
+
])
|
| 766 |
+
|
| 767 |
+
for field_attempt in field_attempts:
|
| 768 |
+
summary_value = find_matching_json_value(field_attempt, flat_json)
|
| 769 |
+
if summary_value is not None:
|
| 770 |
+
print(f" β
Found match with field: '{field_attempt}'")
|
| 771 |
+
break
|
| 772 |
+
|
| 773 |
+
if summary_value is not None:
|
| 774 |
+
replacement_text = get_value_as_string(summary_value, segment_text)
|
| 775 |
+
if isinstance(summary_value, list):
|
| 776 |
+
replacement_text = "\n".join(str(item) for item in summary_value if str(item).strip())
|
| 777 |
+
|
| 778 |
+
success = replace_single_segment(segment, replacement_text)
|
| 779 |
+
if success:
|
| 780 |
+
replacements_made += 1
|
| 781 |
+
print(f" β
Fixed {management_type} Summary segment: '{segment_text[:30]}...' -> '{replacement_text[:30]}...'")
|
| 782 |
+
else:
|
| 783 |
+
print(f" β No match found for red text: '{segment_text[:30]}...'")
|
| 784 |
+
|
| 785 |
+
# If no individual segment matches, try combined approach on red text only
|
| 786 |
+
if replacements_made == 0 and red_segments:
|
| 787 |
+
combined_red_text = " ".join(seg['text'] for seg in red_segments).strip()
|
| 788 |
+
print(f" π Trying combined red text match: '{combined_red_text[:50]}...'")
|
| 789 |
+
|
| 790 |
+
# Try combined text matching with all field variations
|
| 791 |
+
field_attempts = [
|
| 792 |
+
f"{management_type} Summary of Audit findings",
|
| 793 |
+
f"{management_type} Summary",
|
| 794 |
+
f"{management_type.lower()} summary",
|
| 795 |
+
combined_red_text
|
| 796 |
+
]
|
| 797 |
+
|
| 798 |
+
base_type = management_type.replace(" Management", "")
|
| 799 |
+
field_attempts.extend([
|
| 800 |
+
f"{base_type} Management Summary of Audit findings",
|
| 801 |
+
f"{base_type} Summary of Audit findings",
|
| 802 |
+
f"{base_type} Summary"
|
| 803 |
+
])
|
| 804 |
+
|
| 805 |
+
for field_attempt in field_attempts:
|
| 806 |
+
summary_value = find_matching_json_value(field_attempt, flat_json)
|
| 807 |
+
if summary_value is not None:
|
| 808 |
+
replacement_text = get_value_as_string(summary_value, combined_red_text)
|
| 809 |
+
if isinstance(summary_value, list):
|
| 810 |
+
replacement_text = "\n".join(str(item) for item in summary_value if str(item).strip())
|
| 811 |
+
|
| 812 |
+
replacements_made = replace_all_red_segments(red_segments, replacement_text)
|
| 813 |
+
print(f" β
Fixed {management_type} Summary combined red text with field: '{field_attempt}'")
|
| 814 |
+
break
|
| 815 |
+
|
| 816 |
+
return replacements_made
|
| 817 |
+
|
| 818 |
def process_tables(document, flat_json):
|
| 819 |
+
"""Your original function with ALL surgical fixes added"""
|
| 820 |
replacements_made = 0
|
| 821 |
|
| 822 |
for table_idx, table in enumerate(document.tables):
|
|
|
|
| 837 |
replacements_made += vehicle_replacements
|
| 838 |
continue
|
| 839 |
|
| 840 |
+
# π― FINAL FIX 1: Enhanced attendance list detection
|
| 841 |
+
if "attendance list" in table_text and "names and position titles" in table_text:
|
| 842 |
+
print(f" π₯ Detected Attendance List table")
|
| 843 |
+
attendance_replacements = handle_attendance_list_fix(table, flat_json)
|
| 844 |
+
replacements_made += attendance_replacements
|
| 845 |
+
continue
|
| 846 |
+
|
| 847 |
# Enhanced print accreditation detection
|
| 848 |
print_accreditation_indicators = ["print name", "position title"]
|
| 849 |
indicator_count = sum(1 for indicator in print_accreditation_indicators if indicator in table_text)
|
|
|
|
| 928 |
if cell_replacements == 0:
|
| 929 |
surgical_fix = handle_nature_business_multiline_fix(cell, flat_json)
|
| 930 |
replacements_made += surgical_fix
|
| 931 |
+
|
| 932 |
+
# π― FINAL FIX 2: Only if still no replacements were made, try ANY Management Summary fix
|
| 933 |
+
if cell_replacements == 0 and surgical_fix == 0:
|
| 934 |
+
management_summary_fix = handle_management_summary_fix(cell, flat_json)
|
| 935 |
+
replacements_made += management_summary_fix
|
| 936 |
|
| 937 |
+
# π― SURGICAL FIX 3: Handle Operator Declaration tables (only check last few tables)
|
| 938 |
print(f"\nπ― SURGICAL FIX: Checking for Operator/Auditor Declaration tables...")
|
| 939 |
for table in document.tables[-3:]: # Only check last 3 tables
|
| 940 |
if len(table.rows) <= 4: # Only small tables
|