Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +175 -100
updated_word.py
CHANGED
|
@@ -647,166 +647,241 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 647 |
# Canonical operator declaration fixer β SAFER
|
| 648 |
# ============================================================================
|
| 649 |
def fix_operator_declaration_empty_values(table, flat_json):
|
|
|
|
|
|
|
|
|
|
| 650 |
replacements_made = 0
|
| 651 |
print(f" π― FIX: Operator Declaration empty values processing")
|
|
|
|
|
|
|
| 652 |
table_context = ""
|
| 653 |
for row in table.rows:
|
| 654 |
for cell in row.cells:
|
| 655 |
table_context += get_clean_text(cell).lower() + " "
|
|
|
|
| 656 |
if not ("print name" in table_context and "position title" in table_context):
|
| 657 |
return 0
|
|
|
|
| 658 |
print(f" β
Confirmed Operator Declaration table")
|
| 659 |
|
| 660 |
def parse_name_and_position(value):
|
|
|
|
| 661 |
if value is None:
|
| 662 |
return None, None
|
|
|
|
| 663 |
if isinstance(value, list):
|
| 664 |
if len(value) == 0:
|
| 665 |
return None, None
|
| 666 |
if len(value) == 1:
|
| 667 |
return str(value[0]).strip(), None
|
| 668 |
-
|
|
|
|
| 669 |
first = str(value[0]).strip()
|
| 670 |
second = str(value[1]).strip()
|
| 671 |
if first and second:
|
| 672 |
return first, second
|
|
|
|
|
|
|
| 673 |
value = " ".join(str(v).strip() for v in value if str(v).strip())
|
|
|
|
| 674 |
s = str(value).strip()
|
| 675 |
if not s:
|
| 676 |
return None, None
|
| 677 |
-
|
| 678 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 679 |
left = parts[0].strip()
|
| 680 |
right = parts[1].strip()
|
|
|
|
|
|
|
| 681 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 682 |
-
'coordinator', 'driver', 'operator', 'representative', 'chief'
|
| 683 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 684 |
return left, right
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
return left, right
|
| 688 |
tokens = s.split()
|
| 689 |
if len(tokens) >= 2:
|
| 690 |
-
|
| 691 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 692 |
'coordinator', 'driver', 'operator', 'representative', 'chief']
|
| 693 |
-
if any(ind ==
|
| 694 |
-
return " ".join(tokens[:-1]),
|
|
|
|
| 695 |
return s, None
|
| 696 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 697 |
for row_idx, row in enumerate(table.rows):
|
| 698 |
if len(row.cells) >= 2:
|
| 699 |
cell1_text = get_clean_text(row.cells[0]).strip().lower()
|
| 700 |
cell2_text = get_clean_text(row.cells[1]).strip().lower()
|
| 701 |
-
|
|
|
|
| 702 |
if "print name" in cell1_text and "position" in cell2_text:
|
| 703 |
print(f" π Found header row at {row_idx + 1}")
|
|
|
|
|
|
|
| 704 |
if row_idx + 1 < len(table.rows):
|
| 705 |
data_row = table.rows[row_idx + 1]
|
| 706 |
if len(data_row.cells) >= 2:
|
| 707 |
name_cell = data_row.cells[0]
|
| 708 |
position_cell = data_row.cells[1]
|
| 709 |
-
|
| 710 |
-
|
| 711 |
-
|
| 712 |
-
|
| 713 |
-
|
| 714 |
-
|
| 715 |
-
|
| 716 |
-
|
| 717 |
-
name_value = name_kv[1] if name_kv else None
|
| 718 |
-
name_key = name_kv[0] if name_kv else None
|
| 719 |
-
|
| 720 |
-
position_value = position_kv[1] if position_kv else None
|
| 721 |
-
position_key = position_kv[0] if position_kv else None
|
| 722 |
-
|
| 723 |
-
# parse combined cases
|
| 724 |
-
parsed_name_from_nameval, parsed_pos_from_nameval = parse_name_and_position(name_value) if name_value is not None else (None, None)
|
| 725 |
-
parsed_name_from_posval, parsed_pos_from_posval = parse_name_and_position(position_value) if position_value is not None else (None, None)
|
| 726 |
-
|
| 727 |
final_name = None
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
|
| 732 |
-
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
#
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
if
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
|
| 762 |
-
|
| 763 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 764 |
else:
|
| 765 |
-
|
| 766 |
-
|
| 767 |
-
|
| 768 |
-
|
| 769 |
-
|
| 770 |
-
|
| 771 |
-
|
| 772 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 773 |
final_name = " ".join(str(x) for x in final_name).strip()
|
| 774 |
-
if isinstance(
|
| 775 |
-
|
| 776 |
-
|
| 777 |
-
|
| 778 |
-
|
| 779 |
-
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
| 783 |
-
|
| 784 |
-
bad_phrases = ["pty ltd", "company", "farming", "p/l", "plc"]
|
| 785 |
-
low = name_str.lower()
|
| 786 |
-
if any(bp in low for bp in bad_phrases):
|
| 787 |
-
return False
|
| 788 |
-
return len(name_str) > 1 and any(c.isalpha() for c in name_str)
|
| 789 |
-
|
| 790 |
-
# Write name if empty or red
|
| 791 |
-
if (not name_text or has_red_text(name_cell)) and final_name and looks_like_person(final_name):
|
| 792 |
if has_red_text(name_cell):
|
| 793 |
replace_red_text_in_cell(name_cell, final_name)
|
| 794 |
else:
|
| 795 |
name_cell.text = final_name
|
| 796 |
replacements_made += 1
|
| 797 |
print(f" β
Updated Print Name -> '{final_name}'")
|
| 798 |
-
|
| 799 |
-
#
|
| 800 |
-
if (not
|
| 801 |
if has_red_text(position_cell):
|
| 802 |
-
replace_red_text_in_cell(position_cell,
|
| 803 |
else:
|
| 804 |
-
position_cell.text =
|
| 805 |
replacements_made += 1
|
| 806 |
-
print(f" β
Updated Position Title -> '{
|
| 807 |
-
|
| 808 |
-
break
|
| 809 |
|
|
|
|
| 810 |
if replacements_made > 0:
|
| 811 |
try:
|
| 812 |
setattr(table, "_processed_operator_declaration", True)
|
|
|
|
| 647 |
# Canonical operator declaration fixer β SAFER
|
| 648 |
# ============================================================================
|
| 649 |
def fix_operator_declaration_empty_values(table, flat_json):
|
| 650 |
+
"""
|
| 651 |
+
IMPROVED: Better operator declaration handling with more reliable position detection
|
| 652 |
+
"""
|
| 653 |
replacements_made = 0
|
| 654 |
print(f" π― FIX: Operator Declaration empty values processing")
|
| 655 |
+
|
| 656 |
+
# Verify this is actually an operator declaration table
|
| 657 |
table_context = ""
|
| 658 |
for row in table.rows:
|
| 659 |
for cell in row.cells:
|
| 660 |
table_context += get_clean_text(cell).lower() + " "
|
| 661 |
+
|
| 662 |
if not ("print name" in table_context and "position title" in table_context):
|
| 663 |
return 0
|
| 664 |
+
|
| 665 |
print(f" β
Confirmed Operator Declaration table")
|
| 666 |
|
| 667 |
def parse_name_and_position(value):
|
| 668 |
+
"""Enhanced parsing for name/position combinations"""
|
| 669 |
if value is None:
|
| 670 |
return None, None
|
| 671 |
+
|
| 672 |
if isinstance(value, list):
|
| 673 |
if len(value) == 0:
|
| 674 |
return None, None
|
| 675 |
if len(value) == 1:
|
| 676 |
return str(value[0]).strip(), None
|
| 677 |
+
|
| 678 |
+
# Handle [name, position] pattern
|
| 679 |
first = str(value[0]).strip()
|
| 680 |
second = str(value[1]).strip()
|
| 681 |
if first and second:
|
| 682 |
return first, second
|
| 683 |
+
|
| 684 |
+
# Join list elements
|
| 685 |
value = " ".join(str(v).strip() for v in value if str(v).strip())
|
| 686 |
+
|
| 687 |
s = str(value).strip()
|
| 688 |
if not s:
|
| 689 |
return None, None
|
| 690 |
+
|
| 691 |
+
# Split on common separators
|
| 692 |
+
separators = [r'\s+[-ββ]\s+', r'\s*,\s*', r'\s*\|\s*', r'\s*;\s*']
|
| 693 |
+
parts = None
|
| 694 |
+
|
| 695 |
+
for sep_pattern in separators:
|
| 696 |
+
parts = re.split(sep_pattern, s)
|
| 697 |
+
if len(parts) >= 2:
|
| 698 |
+
break
|
| 699 |
+
|
| 700 |
+
if parts and len(parts) >= 2:
|
| 701 |
left = parts[0].strip()
|
| 702 |
right = parts[1].strip()
|
| 703 |
+
|
| 704 |
+
# Check which part is more likely to be a position
|
| 705 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 706 |
+
'coordinator', 'driver', 'operator', 'representative', 'chief',
|
| 707 |
+
'president', 'ceo', 'cfo', 'secretary', 'treasurer']
|
| 708 |
+
|
| 709 |
+
right_has_role = any(ind in right.lower() for ind in role_indicators)
|
| 710 |
+
left_has_role = any(ind in left.lower() for ind in role_indicators)
|
| 711 |
+
|
| 712 |
+
if right_has_role and not left_has_role:
|
| 713 |
+
return left, right # Standard: name, position
|
| 714 |
+
elif left_has_role and not right_has_role:
|
| 715 |
+
return right, left # Reversed: position, name
|
| 716 |
+
else:
|
| 717 |
+
# Default to left=name, right=position
|
| 718 |
return left, right
|
| 719 |
+
|
| 720 |
+
# Look for single word position at end
|
|
|
|
| 721 |
tokens = s.split()
|
| 722 |
if len(tokens) >= 2:
|
| 723 |
+
last_token = tokens[-1].lower()
|
| 724 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 725 |
'coordinator', 'driver', 'operator', 'representative', 'chief']
|
| 726 |
+
if any(ind == last_token for ind in role_indicators):
|
| 727 |
+
return " ".join(tokens[:-1]), tokens[-1]
|
| 728 |
+
|
| 729 |
return s, None
|
| 730 |
|
| 731 |
+
def looks_like_role(s: str) -> bool:
|
| 732 |
+
"""Check if string looks like a job role/position"""
|
| 733 |
+
if not s:
|
| 734 |
+
return False
|
| 735 |
+
|
| 736 |
+
s = s.lower().strip()
|
| 737 |
+
|
| 738 |
+
# Common role words
|
| 739 |
+
roles = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 740 |
+
'coordinator', 'driver', 'operator', 'representative', 'chief',
|
| 741 |
+
'president', 'ceo', 'cfo', 'secretary', 'treasurer', 'officer']
|
| 742 |
+
|
| 743 |
+
# Direct role match
|
| 744 |
+
if any(role in s for role in roles):
|
| 745 |
+
return True
|
| 746 |
+
|
| 747 |
+
# Short descriptive terms (likely roles)
|
| 748 |
+
if len(s.split()) <= 3 and any(c.isalpha() for c in s) and len(s) > 1:
|
| 749 |
+
return True
|
| 750 |
+
|
| 751 |
+
return False
|
| 752 |
+
|
| 753 |
+
def looks_like_person_name(s: str) -> bool:
|
| 754 |
+
"""Check if string looks like a person's name"""
|
| 755 |
+
if not s:
|
| 756 |
+
return False
|
| 757 |
+
|
| 758 |
+
s = s.strip()
|
| 759 |
+
|
| 760 |
+
# Exclude company-like terms
|
| 761 |
+
company_terms = ['pty ltd', 'ltd', 'inc', 'corp', 'company', 'llc', 'plc']
|
| 762 |
+
s_lower = s.lower()
|
| 763 |
+
if any(term in s_lower for term in company_terms):
|
| 764 |
+
return False
|
| 765 |
+
|
| 766 |
+
# Should have letters and reasonable length
|
| 767 |
+
if len(s) > 1 and any(c.isalpha() for c in s):
|
| 768 |
+
return True
|
| 769 |
+
|
| 770 |
+
return False
|
| 771 |
+
|
| 772 |
+
# Process the table
|
| 773 |
for row_idx, row in enumerate(table.rows):
|
| 774 |
if len(row.cells) >= 2:
|
| 775 |
cell1_text = get_clean_text(row.cells[0]).strip().lower()
|
| 776 |
cell2_text = get_clean_text(row.cells[1]).strip().lower()
|
| 777 |
+
|
| 778 |
+
# Detect header row
|
| 779 |
if "print name" in cell1_text and "position" in cell2_text:
|
| 780 |
print(f" π Found header row at {row_idx + 1}")
|
| 781 |
+
|
| 782 |
+
# Process data row (next row after header)
|
| 783 |
if row_idx + 1 < len(table.rows):
|
| 784 |
data_row = table.rows[row_idx + 1]
|
| 785 |
if len(data_row.cells) >= 2:
|
| 786 |
name_cell = data_row.cells[0]
|
| 787 |
position_cell = data_row.cells[1]
|
| 788 |
+
|
| 789 |
+
current_name = get_clean_text(name_cell).strip()
|
| 790 |
+
current_position = get_clean_text(position_cell).strip()
|
| 791 |
+
|
| 792 |
+
print(f" π Current values: Name='{current_name}', Position='{current_position}'")
|
| 793 |
+
|
| 794 |
+
# IMPROVED: More comprehensive search for operator declaration data
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 795 |
final_name = None
|
| 796 |
+
final_position = None
|
| 797 |
+
|
| 798 |
+
# Search strategies in order of preference
|
| 799 |
+
search_strategies = [
|
| 800 |
+
# Strategy 1: Direct operator declaration keys
|
| 801 |
+
("Operator Declaration.Print Name", "Operator Declaration.Position Title"),
|
| 802 |
+
|
| 803 |
+
# Strategy 2: Generic print name/position keys
|
| 804 |
+
("Print Name", "Position Title"),
|
| 805 |
+
|
| 806 |
+
# Strategy 3: Look in operator information section
|
| 807 |
+
("Operator Information.Print Name", "Operator Information.Position Title"),
|
| 808 |
+
|
| 809 |
+
# Strategy 4: Any key containing "print name" or "position"
|
| 810 |
+
(None, None) # Special case - will search all keys
|
| 811 |
+
]
|
| 812 |
+
|
| 813 |
+
for name_key_pattern, pos_key_pattern in search_strategies:
|
| 814 |
+
if final_name and final_position:
|
| 815 |
+
break
|
| 816 |
+
|
| 817 |
+
if name_key_pattern is None:
|
| 818 |
+
# Search all keys for relevant data
|
| 819 |
+
for key, value in flat_json.items():
|
| 820 |
+
key_lower = key.lower()
|
| 821 |
+
|
| 822 |
+
# Look for name-like keys
|
| 823 |
+
if not final_name and ("print name" in key_lower or
|
| 824 |
+
("name" in key_lower and "operator" in key_lower)):
|
| 825 |
+
if value and looks_like_person_name(str(value)):
|
| 826 |
+
name_from_val, pos_from_val = parse_name_and_position(value)
|
| 827 |
+
if name_from_val and looks_like_person_name(name_from_val):
|
| 828 |
+
final_name = name_from_val
|
| 829 |
+
if pos_from_val and looks_like_role(pos_from_val):
|
| 830 |
+
final_position = pos_from_val
|
| 831 |
+
|
| 832 |
+
# Look for position-like keys
|
| 833 |
+
if not final_position and ("position" in key_lower or "title" in key_lower):
|
| 834 |
+
if value and looks_like_role(str(value)):
|
| 835 |
+
final_position = str(value).strip()
|
| 836 |
else:
|
| 837 |
+
# Search for specific key patterns
|
| 838 |
+
name_kv = find_matching_json_key_and_value(name_key_pattern, flat_json)
|
| 839 |
+
pos_kv = find_matching_json_key_and_value(pos_key_pattern, flat_json)
|
| 840 |
+
|
| 841 |
+
if name_kv and name_kv[1]:
|
| 842 |
+
name_from_val, pos_from_val = parse_name_and_position(name_kv[1])
|
| 843 |
+
if name_from_val and looks_like_person_name(name_from_val):
|
| 844 |
+
final_name = name_from_val
|
| 845 |
+
if pos_from_val and looks_like_role(pos_from_val) and not final_position:
|
| 846 |
+
final_position = pos_from_val
|
| 847 |
+
|
| 848 |
+
if pos_kv and pos_kv[1] and not final_position:
|
| 849 |
+
pos_val = str(pos_kv[1]).strip()
|
| 850 |
+
if looks_like_role(pos_val):
|
| 851 |
+
final_position = pos_val
|
| 852 |
+
|
| 853 |
+
# Clean up final values
|
| 854 |
+
if isinstance(final_name, (list, tuple)):
|
| 855 |
final_name = " ".join(str(x) for x in final_name).strip()
|
| 856 |
+
if isinstance(final_position, (list, tuple)):
|
| 857 |
+
final_position = " ".join(str(x) for x in final_position).strip()
|
| 858 |
+
|
| 859 |
+
final_name = str(final_name).strip() if final_name else None
|
| 860 |
+
final_position = str(final_position).strip() if final_position else None
|
| 861 |
+
|
| 862 |
+
print(f" π― Final extracted values: Name='{final_name}', Position='{final_position}'")
|
| 863 |
+
|
| 864 |
+
# Update name cell if needed
|
| 865 |
+
if (not current_name or has_red_text(name_cell)) and final_name and looks_like_person_name(final_name):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 866 |
if has_red_text(name_cell):
|
| 867 |
replace_red_text_in_cell(name_cell, final_name)
|
| 868 |
else:
|
| 869 |
name_cell.text = final_name
|
| 870 |
replacements_made += 1
|
| 871 |
print(f" β
Updated Print Name -> '{final_name}'")
|
| 872 |
+
|
| 873 |
+
# Update position cell if needed
|
| 874 |
+
if (not current_position or has_red_text(position_cell)) and final_position and looks_like_role(final_position):
|
| 875 |
if has_red_text(position_cell):
|
| 876 |
+
replace_red_text_in_cell(position_cell, final_position)
|
| 877 |
else:
|
| 878 |
+
position_cell.text = final_position
|
| 879 |
replacements_made += 1
|
| 880 |
+
print(f" β
Updated Position Title -> '{final_position}'")
|
| 881 |
+
|
| 882 |
+
break # Found and processed the header row
|
| 883 |
|
| 884 |
+
# Mark table as processed
|
| 885 |
if replacements_made > 0:
|
| 886 |
try:
|
| 887 |
setattr(table, "_processed_operator_declaration", True)
|