Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +72 -31
updated_word.py
CHANGED
|
@@ -674,39 +674,80 @@ def process_single_column_sections(cell, field_name, flat_json):
|
|
| 674 |
return 0
|
| 675 |
|
| 676 |
# π― FINAL FIX 1: Add this function to handle Attendance List (unchanged)
|
| 677 |
-
def
|
| 678 |
-
"""
|
| 679 |
replacements_made = 0
|
| 680 |
|
| 681 |
-
#
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
|
| 686 |
-
# Check if this
|
| 687 |
-
if
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
| 694 |
-
|
| 695 |
-
|
| 696 |
-
|
| 697 |
-
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
| 706 |
-
|
| 707 |
-
|
| 708 |
-
|
| 709 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 710 |
|
| 711 |
return replacements_made
|
| 712 |
|
|
@@ -840,7 +881,7 @@ def process_tables(document, flat_json):
|
|
| 840 |
# π― FINAL FIX 1: Enhanced attendance list detection
|
| 841 |
if "attendance list" in table_text and "names and position titles" in table_text:
|
| 842 |
print(f" π₯ Detected Attendance List table")
|
| 843 |
-
attendance_replacements =
|
| 844 |
replacements_made += attendance_replacements
|
| 845 |
continue
|
| 846 |
|
|
|
|
| 674 |
return 0
|
| 675 |
|
| 676 |
# π― FINAL FIX 1: Add this function to handle Attendance List (unchanged)
|
| 677 |
+
def handle_attendance_list_table_enhanced(table, flat_json):
|
| 678 |
+
"""Enhanced Attendance List processing with better detection"""
|
| 679 |
replacements_made = 0
|
| 680 |
|
| 681 |
+
# Check multiple patterns for attendance list
|
| 682 |
+
attendance_patterns = [
|
| 683 |
+
"attendance list",
|
| 684 |
+
"names and position titles",
|
| 685 |
+
"attendees"
|
| 686 |
+
]
|
| 687 |
+
|
| 688 |
+
# Scan all cells in the first few rows for attendance list indicators
|
| 689 |
+
found_attendance_row = None
|
| 690 |
+
found_attendance_cell = None
|
| 691 |
+
|
| 692 |
+
for row_idx, row in enumerate(table.rows[:3]): # Check first 3 rows
|
| 693 |
+
for cell_idx, cell in enumerate(row.cells):
|
| 694 |
+
cell_text = get_clean_text(cell).lower()
|
| 695 |
|
| 696 |
+
# Check if this cell contains attendance list header
|
| 697 |
+
if any(pattern in cell_text for pattern in attendance_patterns):
|
| 698 |
+
found_attendance_row = row_idx
|
| 699 |
+
found_attendance_cell = cell_idx
|
| 700 |
+
print(f" π― ENHANCED: Found Attendance List in row {row_idx + 1}, cell {cell_idx + 1}")
|
| 701 |
+
break
|
| 702 |
+
|
| 703 |
+
if found_attendance_row is not None:
|
| 704 |
+
break
|
| 705 |
+
|
| 706 |
+
if found_attendance_row is None:
|
| 707 |
+
return 0
|
| 708 |
+
|
| 709 |
+
# Look for attendance data in JSON
|
| 710 |
+
attendance_value = None
|
| 711 |
+
attendance_search_keys = [
|
| 712 |
+
"Attendance List (Names and Position Titles)",
|
| 713 |
+
"attendance list",
|
| 714 |
+
"attendees",
|
| 715 |
+
"names and position titles"
|
| 716 |
+
]
|
| 717 |
+
|
| 718 |
+
for search_key in attendance_search_keys:
|
| 719 |
+
# Try exact match first
|
| 720 |
+
attendance_value = find_matching_json_value(search_key, flat_json)
|
| 721 |
+
if attendance_value is not None:
|
| 722 |
+
print(f" β
Found attendance data with key: '{search_key}'")
|
| 723 |
+
break
|
| 724 |
+
|
| 725 |
+
if attendance_value is None:
|
| 726 |
+
print(f" β No attendance data found in JSON")
|
| 727 |
+
return 0
|
| 728 |
+
|
| 729 |
+
# Process the attendance cell
|
| 730 |
+
target_row = table.rows[found_attendance_row]
|
| 731 |
+
target_cell = target_row.cells[found_attendance_cell]
|
| 732 |
+
|
| 733 |
+
# Format attendance data
|
| 734 |
+
if isinstance(attendance_value, list):
|
| 735 |
+
formatted_attendance = "\n".join(str(item) for item in attendance_value if str(item).strip())
|
| 736 |
+
else:
|
| 737 |
+
formatted_attendance = str(attendance_value)
|
| 738 |
+
|
| 739 |
+
# Replace red text or entire cell content if needed
|
| 740 |
+
if has_red_text(target_cell):
|
| 741 |
+
cell_replacements = replace_red_text_in_cell(target_cell, formatted_attendance)
|
| 742 |
+
replacements_made += cell_replacements
|
| 743 |
+
print(f" β
Replaced red text in attendance list with: '{formatted_attendance[:50]}...'")
|
| 744 |
+
else:
|
| 745 |
+
# If no red text, check if cell is mostly empty and fill it
|
| 746 |
+
current_text = get_clean_text(target_cell).strip()
|
| 747 |
+
if not current_text or len(current_text) < 20: # Likely placeholder
|
| 748 |
+
target_cell.text = formatted_attendance
|
| 749 |
+
replacements_made += 1
|
| 750 |
+
print(f" β
Filled empty attendance cell with: '{formatted_attendance[:50]}...'")
|
| 751 |
|
| 752 |
return replacements_made
|
| 753 |
|
|
|
|
| 881 |
# π― FINAL FIX 1: Enhanced attendance list detection
|
| 882 |
if "attendance list" in table_text and "names and position titles" in table_text:
|
| 883 |
print(f" π₯ Detected Attendance List table")
|
| 884 |
+
attendance_replacements = handle_attendance_list_table_enhanced(table, flat_json)
|
| 885 |
replacements_made += attendance_replacements
|
| 886 |
continue
|
| 887 |
|