Shami96 commited on
Commit
076f0d9
Β·
verified Β·
1 Parent(s): f1c869a

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +72 -31
updated_word.py CHANGED
@@ -674,39 +674,80 @@ def process_single_column_sections(cell, field_name, flat_json):
674
  return 0
675
 
676
  # 🎯 FINAL FIX 1: Add this function to handle Attendance List (unchanged)
677
- def handle_attendance_list_fix(table, flat_json):
678
- """FINAL FIX: Handle Attendance List table specifically"""
679
  replacements_made = 0
680
 
681
- # Look for attendance list table
682
- for row_idx, row in enumerate(table.rows):
683
- if len(row.cells) >= 1:
684
- cell_text = get_clean_text(row.cells[0]).lower()
 
 
 
 
 
 
 
 
 
 
685
 
686
- # Check if this is the attendance list header
687
- if "attendance list" in cell_text and "names and position titles" in cell_text:
688
- print(f" 🎯 FINAL FIX: Attendance List table detected at row {row_idx + 1}")
689
-
690
- # The content should be in the same cell, look for red text
691
- if has_red_text(row.cells[0]):
692
- # Try to find attendance list data
693
- attendance_value = None
694
- for field_attempt in ["Attendance List (Names and Position Titles)", "attendance list", "Attendance List"]:
695
- attendance_value = find_matching_json_value(field_attempt, flat_json)
696
- if attendance_value is not None:
697
- break
698
-
699
- if attendance_value is not None:
700
- attendance_text = get_value_as_string(attendance_value)
701
- # Handle list format for attendance
702
- if isinstance(attendance_value, list):
703
- attendance_text = "\n".join(str(item) for item in attendance_value)
704
-
705
- cell_replacements = replace_red_text_in_cell(row.cells[0], attendance_text)
706
- replacements_made += cell_replacements
707
- print(f" βœ… Fixed Attendance List: '{attendance_text[:50]}...'")
708
-
709
- break # Found the table, stop looking
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
710
 
711
  return replacements_made
712
 
@@ -840,7 +881,7 @@ def process_tables(document, flat_json):
840
  # 🎯 FINAL FIX 1: Enhanced attendance list detection
841
  if "attendance list" in table_text and "names and position titles" in table_text:
842
  print(f" πŸ‘₯ Detected Attendance List table")
843
- attendance_replacements = handle_attendance_list_fix(table, flat_json)
844
  replacements_made += attendance_replacements
845
  continue
846
 
 
674
  return 0
675
 
676
  # 🎯 FINAL FIX 1: Add this function to handle Attendance List (unchanged)
677
+ def handle_attendance_list_table_enhanced(table, flat_json):
678
+ """Enhanced Attendance List processing with better detection"""
679
  replacements_made = 0
680
 
681
+ # Check multiple patterns for attendance list
682
+ attendance_patterns = [
683
+ "attendance list",
684
+ "names and position titles",
685
+ "attendees"
686
+ ]
687
+
688
+ # Scan all cells in the first few rows for attendance list indicators
689
+ found_attendance_row = None
690
+ found_attendance_cell = None
691
+
692
+ for row_idx, row in enumerate(table.rows[:3]): # Check first 3 rows
693
+ for cell_idx, cell in enumerate(row.cells):
694
+ cell_text = get_clean_text(cell).lower()
695
 
696
+ # Check if this cell contains attendance list header
697
+ if any(pattern in cell_text for pattern in attendance_patterns):
698
+ found_attendance_row = row_idx
699
+ found_attendance_cell = cell_idx
700
+ print(f" 🎯 ENHANCED: Found Attendance List in row {row_idx + 1}, cell {cell_idx + 1}")
701
+ break
702
+
703
+ if found_attendance_row is not None:
704
+ break
705
+
706
+ if found_attendance_row is None:
707
+ return 0
708
+
709
+ # Look for attendance data in JSON
710
+ attendance_value = None
711
+ attendance_search_keys = [
712
+ "Attendance List (Names and Position Titles)",
713
+ "attendance list",
714
+ "attendees",
715
+ "names and position titles"
716
+ ]
717
+
718
+ for search_key in attendance_search_keys:
719
+ # Try exact match first
720
+ attendance_value = find_matching_json_value(search_key, flat_json)
721
+ if attendance_value is not None:
722
+ print(f" βœ… Found attendance data with key: '{search_key}'")
723
+ break
724
+
725
+ if attendance_value is None:
726
+ print(f" ❌ No attendance data found in JSON")
727
+ return 0
728
+
729
+ # Process the attendance cell
730
+ target_row = table.rows[found_attendance_row]
731
+ target_cell = target_row.cells[found_attendance_cell]
732
+
733
+ # Format attendance data
734
+ if isinstance(attendance_value, list):
735
+ formatted_attendance = "\n".join(str(item) for item in attendance_value if str(item).strip())
736
+ else:
737
+ formatted_attendance = str(attendance_value)
738
+
739
+ # Replace red text or entire cell content if needed
740
+ if has_red_text(target_cell):
741
+ cell_replacements = replace_red_text_in_cell(target_cell, formatted_attendance)
742
+ replacements_made += cell_replacements
743
+ print(f" βœ… Replaced red text in attendance list with: '{formatted_attendance[:50]}...'")
744
+ else:
745
+ # If no red text, check if cell is mostly empty and fill it
746
+ current_text = get_clean_text(target_cell).strip()
747
+ if not current_text or len(current_text) < 20: # Likely placeholder
748
+ target_cell.text = formatted_attendance
749
+ replacements_made += 1
750
+ print(f" βœ… Filled empty attendance cell with: '{formatted_attendance[:50]}...'")
751
 
752
  return replacements_made
753
 
 
881
  # 🎯 FINAL FIX 1: Enhanced attendance list detection
882
  if "attendance list" in table_text and "names and position titles" in table_text:
883
  print(f" πŸ‘₯ Detected Attendance List table")
884
+ attendance_replacements = handle_attendance_list_table_enhanced(table, flat_json)
885
  replacements_made += attendance_replacements
886
  continue
887