Shami96 commited on
Commit
8df4ecc
Β·
verified Β·
1 Parent(s): c38c9d4

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +156 -2
updated_word.py CHANGED
@@ -673,8 +673,150 @@ def process_single_column_sections(cell, field_name, flat_json):
673
  return cell_replacements
674
  return 0
675
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
  def process_tables(document, flat_json):
677
- """Your original function with minimal surgical fixes added"""
678
  replacements_made = 0
679
 
680
  for table_idx, table in enumerate(document.tables):
@@ -695,6 +837,13 @@ def process_tables(document, flat_json):
695
  replacements_made += vehicle_replacements
696
  continue
697
 
 
 
 
 
 
 
 
698
  # Enhanced print accreditation detection
699
  print_accreditation_indicators = ["print name", "position title"]
700
  indicator_count = sum(1 for indicator in print_accreditation_indicators if indicator in table_text)
@@ -779,8 +928,13 @@ def process_tables(document, flat_json):
779
  if cell_replacements == 0:
780
  surgical_fix = handle_nature_business_multiline_fix(cell, flat_json)
781
  replacements_made += surgical_fix
 
 
 
 
 
782
 
783
- # 🎯 SURGICAL FIX 2: Handle Operator Declaration tables (only check last few tables)
784
  print(f"\n🎯 SURGICAL FIX: Checking for Operator/Auditor Declaration tables...")
785
  for table in document.tables[-3:]: # Only check last 3 tables
786
  if len(table.rows) <= 4: # Only small tables
 
673
  return cell_replacements
674
  return 0
675
 
676
+ # 🎯 FINAL FIX 1: Add this function to handle Attendance List (unchanged)
677
+ def handle_attendance_list_fix(table, flat_json):
678
+ """FINAL FIX: Handle Attendance List table specifically"""
679
+ replacements_made = 0
680
+
681
+ # Look for attendance list table
682
+ for row_idx, row in enumerate(table.rows):
683
+ if len(row.cells) >= 1:
684
+ cell_text = get_clean_text(row.cells[0]).lower()
685
+
686
+ # Check if this is the attendance list header
687
+ if "attendance list" in cell_text and "names and position titles" in cell_text:
688
+ print(f" 🎯 FINAL FIX: Attendance List table detected at row {row_idx + 1}")
689
+
690
+ # The content should be in the same cell, look for red text
691
+ if has_red_text(row.cells[0]):
692
+ # Try to find attendance list data
693
+ attendance_value = None
694
+ for field_attempt in ["Attendance List (Names and Position Titles)", "attendance list", "Attendance List"]:
695
+ attendance_value = find_matching_json_value(field_attempt, flat_json)
696
+ if attendance_value is not None:
697
+ break
698
+
699
+ if attendance_value is not None:
700
+ attendance_text = get_value_as_string(attendance_value)
701
+ # Handle list format for attendance
702
+ if isinstance(attendance_value, list):
703
+ attendance_text = "\n".join(str(item) for item in attendance_value)
704
+
705
+ cell_replacements = replace_red_text_in_cell(row.cells[0], attendance_text)
706
+ replacements_made += cell_replacements
707
+ print(f" βœ… Fixed Attendance List: '{attendance_text[:50]}...'")
708
+
709
+ break # Found the table, stop looking
710
+
711
+ return replacements_made
712
+
713
+ # 🎯 FINAL FIX 2: Generic Management Summary fix for ALL types (Mass, Fatigue, Maintenance)
714
+ def handle_management_summary_fix(cell, flat_json):
715
+ """FINAL FIX: Handle ANY Management Summary section (Mass/Fatigue/Maintenance) - RED TEXT ONLY"""
716
+ if not has_red_text(cell):
717
+ return 0
718
+
719
+ # Check if this cell contains any Management Summary
720
+ cell_text = get_clean_text(cell).lower()
721
+
722
+ # Detect which type of management summary this is
723
+ management_type = None
724
+ if "mass management" in cell_text and "summary" in cell_text:
725
+ management_type = "Mass Management"
726
+ elif "fatigue management" in cell_text and "summary" in cell_text:
727
+ management_type = "Fatigue Management"
728
+ elif "maintenance management" in cell_text and "summary" in cell_text:
729
+ management_type = "Maintenance Management"
730
+
731
+ if not management_type:
732
+ return 0
733
+
734
+ print(f" 🎯 FINAL FIX: {management_type} Summary processing - RED TEXT ONLY")
735
+
736
+ # ONLY process red text segments, not the entire cell text
737
+ red_segments = extract_red_text_segments(cell)
738
+ replacements_made = 0
739
+
740
+ # Try to replace ONLY the red text segments
741
+ for segment in red_segments:
742
+ segment_text = segment['text'].strip()
743
+ if not segment_text:
744
+ continue
745
+
746
+ print(f" πŸ” Processing red text segment: '{segment_text[:50]}...'")
747
+
748
+ # Try multiple variations based on the management type
749
+ summary_value = None
750
+ field_attempts = [
751
+ f"{management_type} Summary of Audit findings",
752
+ f"{management_type} Summary",
753
+ f"{management_type.lower()} summary",
754
+ management_type.lower(),
755
+ segment_text # Also try the exact red text
756
+ ]
757
+
758
+ # Also try variations without "Management"
759
+ base_type = management_type.replace(" Management", "")
760
+ field_attempts.extend([
761
+ f"{base_type} Management Summary of Audit findings",
762
+ f"{base_type} Summary of Audit findings",
763
+ f"{base_type} Summary",
764
+ f"{base_type.lower()} summary"
765
+ ])
766
+
767
+ for field_attempt in field_attempts:
768
+ summary_value = find_matching_json_value(field_attempt, flat_json)
769
+ if summary_value is not None:
770
+ print(f" βœ… Found match with field: '{field_attempt}'")
771
+ break
772
+
773
+ if summary_value is not None:
774
+ replacement_text = get_value_as_string(summary_value, segment_text)
775
+ if isinstance(summary_value, list):
776
+ replacement_text = "\n".join(str(item) for item in summary_value if str(item).strip())
777
+
778
+ success = replace_single_segment(segment, replacement_text)
779
+ if success:
780
+ replacements_made += 1
781
+ print(f" βœ… Fixed {management_type} Summary segment: '{segment_text[:30]}...' -> '{replacement_text[:30]}...'")
782
+ else:
783
+ print(f" ❌ No match found for red text: '{segment_text[:30]}...'")
784
+
785
+ # If no individual segment matches, try combined approach on red text only
786
+ if replacements_made == 0 and red_segments:
787
+ combined_red_text = " ".join(seg['text'] for seg in red_segments).strip()
788
+ print(f" πŸ”„ Trying combined red text match: '{combined_red_text[:50]}...'")
789
+
790
+ # Try combined text matching with all field variations
791
+ field_attempts = [
792
+ f"{management_type} Summary of Audit findings",
793
+ f"{management_type} Summary",
794
+ f"{management_type.lower()} summary",
795
+ combined_red_text
796
+ ]
797
+
798
+ base_type = management_type.replace(" Management", "")
799
+ field_attempts.extend([
800
+ f"{base_type} Management Summary of Audit findings",
801
+ f"{base_type} Summary of Audit findings",
802
+ f"{base_type} Summary"
803
+ ])
804
+
805
+ for field_attempt in field_attempts:
806
+ summary_value = find_matching_json_value(field_attempt, flat_json)
807
+ if summary_value is not None:
808
+ replacement_text = get_value_as_string(summary_value, combined_red_text)
809
+ if isinstance(summary_value, list):
810
+ replacement_text = "\n".join(str(item) for item in summary_value if str(item).strip())
811
+
812
+ replacements_made = replace_all_red_segments(red_segments, replacement_text)
813
+ print(f" βœ… Fixed {management_type} Summary combined red text with field: '{field_attempt}'")
814
+ break
815
+
816
+ return replacements_made
817
+
818
  def process_tables(document, flat_json):
819
+ """Your original function with ALL surgical fixes added"""
820
  replacements_made = 0
821
 
822
  for table_idx, table in enumerate(document.tables):
 
837
  replacements_made += vehicle_replacements
838
  continue
839
 
840
+ # 🎯 FINAL FIX 1: Enhanced attendance list detection
841
+ if "attendance list" in table_text and "names and position titles" in table_text:
842
+ print(f" πŸ‘₯ Detected Attendance List table")
843
+ attendance_replacements = handle_attendance_list_fix(table, flat_json)
844
+ replacements_made += attendance_replacements
845
+ continue
846
+
847
  # Enhanced print accreditation detection
848
  print_accreditation_indicators = ["print name", "position title"]
849
  indicator_count = sum(1 for indicator in print_accreditation_indicators if indicator in table_text)
 
928
  if cell_replacements == 0:
929
  surgical_fix = handle_nature_business_multiline_fix(cell, flat_json)
930
  replacements_made += surgical_fix
931
+
932
+ # 🎯 FINAL FIX 2: Only if still no replacements were made, try ANY Management Summary fix
933
+ if cell_replacements == 0 and surgical_fix == 0:
934
+ management_summary_fix = handle_management_summary_fix(cell, flat_json)
935
+ replacements_made += management_summary_fix
936
 
937
+ # 🎯 SURGICAL FIX 3: Handle Operator Declaration tables (only check last few tables)
938
  print(f"\n🎯 SURGICAL FIX: Checking for Operator/Auditor Declaration tables...")
939
  for table in document.tables[-3:]: # Only check last 3 tables
940
  if len(table.rows) <= 4: # Only small tables