Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +123 -34
updated_word.py
CHANGED
|
@@ -619,7 +619,10 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 619 |
# ========================================================================
|
| 620 |
|
| 621 |
def fix_operator_declaration_empty_values(table, flat_json):
|
| 622 |
-
"""Fix Operator Declaration table when values are empty or need updating
|
|
|
|
|
|
|
|
|
|
| 623 |
replacements_made = 0
|
| 624 |
|
| 625 |
print(f" π― FIX: Operator Declaration empty values processing")
|
|
@@ -635,72 +638,158 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 635 |
|
| 636 |
print(f" β
Confirmed Operator Declaration table")
|
| 637 |
|
| 638 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
for row_idx, row in enumerate(table.rows):
|
| 640 |
if len(row.cells) >= 2:
|
| 641 |
cell1_text = get_clean_text(row.cells[0]).strip().lower()
|
| 642 |
cell2_text = get_clean_text(row.cells[1]).strip().lower()
|
| 643 |
|
| 644 |
-
# Check if this is the header row
|
| 645 |
if "print name" in cell1_text and "position" in cell2_text:
|
| 646 |
print(f" π Found header row at {row_idx + 1}")
|
| 647 |
|
| 648 |
-
#
|
| 649 |
if row_idx + 1 < len(table.rows):
|
| 650 |
data_row = table.rows[row_idx + 1]
|
| 651 |
if len(data_row.cells) >= 2:
|
| 652 |
name_cell = data_row.cells[0]
|
| 653 |
position_cell = data_row.cells[1]
|
| 654 |
|
| 655 |
-
# Check if cells are empty or have red text
|
| 656 |
name_text = get_clean_text(name_cell).strip()
|
| 657 |
position_text = get_clean_text(position_cell).strip()
|
| 658 |
-
|
| 659 |
print(f" π Current values: Name='{name_text}', Position='{position_text}'")
|
| 660 |
|
| 661 |
-
#
|
| 662 |
-
print(f" π§ FORCE updating Print Name (exact-key first)")
|
| 663 |
name_value = find_matching_json_value("Operator Declaration.Print Name", flat_json)
|
| 664 |
if name_value is None:
|
| 665 |
-
# fallback to common alternatives
|
| 666 |
name_value = find_matching_json_value("Print Name", flat_json)
|
| 667 |
-
|
| 668 |
-
new_name = get_value_as_string(name_value).strip()
|
| 669 |
-
if new_name and "Pty Ltd" not in new_name and "Company" not in new_name and "Farming" not in new_name:
|
| 670 |
-
# attempt targeted replacement: if red exists, replace red, else set text
|
| 671 |
-
if has_red_text(name_cell):
|
| 672 |
-
replace_red_text_in_cell(name_cell, new_name)
|
| 673 |
-
else:
|
| 674 |
-
name_cell.text = new_name
|
| 675 |
-
replacements_made += 1
|
| 676 |
-
print(f" β
FORCE Updated Print Name: '{name_text}' -> '{new_name}'")
|
| 677 |
-
|
| 678 |
-
print(f" π§ FORCE updating Position Title (exact-key first)")
|
| 679 |
position_value = find_matching_json_value("Operator Declaration.Position Title", flat_json)
|
| 680 |
if position_value is None:
|
| 681 |
position_value = find_matching_json_value("Position Title", flat_json)
|
| 682 |
-
|
| 683 |
-
|
| 684 |
-
|
| 685 |
-
|
| 686 |
-
|
| 687 |
-
|
| 688 |
-
|
| 689 |
-
|
| 690 |
-
|
| 691 |
-
|
| 692 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 693 |
|
| 694 |
break
|
| 695 |
|
| 696 |
-
#
|
| 697 |
if replacements_made > 0:
|
| 698 |
try:
|
| 699 |
setattr(table, "_processed_operator_declaration", True)
|
| 700 |
print(" π Marked table as processed by Operator Declaration handler")
|
| 701 |
except Exception:
|
| 702 |
pass
|
| 703 |
-
# <<< END PATCH
|
| 704 |
|
| 705 |
return replacements_made
|
| 706 |
|
|
|
|
| 619 |
# ========================================================================
|
| 620 |
|
| 621 |
def fix_operator_declaration_empty_values(table, flat_json):
|
| 622 |
+
"""Fix Operator Declaration table when values are empty or need updating.
|
| 623 |
+
- Only update name/position cells if they're empty or contain red text.
|
| 624 |
+
- If JSON gives a combined 'Name - Position' value, split it.
|
| 625 |
+
"""
|
| 626 |
replacements_made = 0
|
| 627 |
|
| 628 |
print(f" π― FIX: Operator Declaration empty values processing")
|
|
|
|
| 638 |
|
| 639 |
print(f" β
Confirmed Operator Declaration table")
|
| 640 |
|
| 641 |
+
def parse_name_and_position(value):
|
| 642 |
+
"""Try to split combined name/position values into (name, position)."""
|
| 643 |
+
if value is None:
|
| 644 |
+
return None, None
|
| 645 |
+
|
| 646 |
+
# If it's a list of two items - treat as [name, position]
|
| 647 |
+
if isinstance(value, list):
|
| 648 |
+
if len(value) == 0:
|
| 649 |
+
return None, None
|
| 650 |
+
if len(value) == 1:
|
| 651 |
+
return str(value[0]).strip(), None
|
| 652 |
+
# if list has more than 1, try to use first two sensible entries
|
| 653 |
+
first = str(value[0]).strip()
|
| 654 |
+
second = str(value[1]).strip()
|
| 655 |
+
if first and second:
|
| 656 |
+
return first, second
|
| 657 |
+
# fallthrough to string join
|
| 658 |
+
value = " ".join(str(v).strip() for v in value if str(v).strip())
|
| 659 |
+
|
| 660 |
+
s = str(value).strip()
|
| 661 |
+
if not s:
|
| 662 |
+
return None, None
|
| 663 |
+
|
| 664 |
+
# Common separators: hyphen, en-dash, em-dash, comma, pipe
|
| 665 |
+
parts = re.split(r'\s+[-ββ]\s+|\s*,\s*|\s*\|\s*', s)
|
| 666 |
+
if len(parts) >= 2:
|
| 667 |
+
left = parts[0].strip()
|
| 668 |
+
right = parts[1].strip()
|
| 669 |
+
# Heuristic: if right looks like a role (contains common role words) treat as position
|
| 670 |
+
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 671 |
+
'coordinator', 'driver', 'operator', 'representative', 'chief']
|
| 672 |
+
if any(ind in right.lower() for ind in role_indicators) or len(right.split()) <= 4:
|
| 673 |
+
return left, right
|
| 674 |
+
# if left looks like a role and right looks like a name, invert
|
| 675 |
+
if any(ind in left.lower() for ind in role_indicators) and not any(ind in right.lower() for ind in role_indicators):
|
| 676 |
+
return right, left
|
| 677 |
+
# else assume left=name, right=position
|
| 678 |
+
return left, right
|
| 679 |
+
|
| 680 |
+
# No separator - try to detect "Firstname Lastname Title" (less reliable)
|
| 681 |
+
# If contains two capitalised tokens + a short token like 'Manager', split last token as position
|
| 682 |
+
tokens = s.split()
|
| 683 |
+
if len(tokens) >= 3 and tokens[-1].istitle() and any(ind in tokens[-1].lower() for ind in role_indicators):
|
| 684 |
+
return " ".join(tokens[:-1]), tokens[-1]
|
| 685 |
+
|
| 686 |
+
# fallback: treat entire string as name (no position)
|
| 687 |
+
return s, None
|
| 688 |
+
|
| 689 |
+
# Find the header row and the data row
|
| 690 |
for row_idx, row in enumerate(table.rows):
|
| 691 |
if len(row.cells) >= 2:
|
| 692 |
cell1_text = get_clean_text(row.cells[0]).strip().lower()
|
| 693 |
cell2_text = get_clean_text(row.cells[1]).strip().lower()
|
| 694 |
|
|
|
|
| 695 |
if "print name" in cell1_text and "position" in cell2_text:
|
| 696 |
print(f" π Found header row at {row_idx + 1}")
|
| 697 |
|
| 698 |
+
# data row is next row if present
|
| 699 |
if row_idx + 1 < len(table.rows):
|
| 700 |
data_row = table.rows[row_idx + 1]
|
| 701 |
if len(data_row.cells) >= 2:
|
| 702 |
name_cell = data_row.cells[0]
|
| 703 |
position_cell = data_row.cells[1]
|
| 704 |
|
|
|
|
| 705 |
name_text = get_clean_text(name_cell).strip()
|
| 706 |
position_text = get_clean_text(position_cell).strip()
|
|
|
|
| 707 |
print(f" π Current values: Name='{name_text}', Position='{position_text}'")
|
| 708 |
|
| 709 |
+
# Retrieve JSON candidates (prefer exact qualified keys)
|
|
|
|
| 710 |
name_value = find_matching_json_value("Operator Declaration.Print Name", flat_json)
|
| 711 |
if name_value is None:
|
|
|
|
| 712 |
name_value = find_matching_json_value("Print Name", flat_json)
|
| 713 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 714 |
position_value = find_matching_json_value("Operator Declaration.Position Title", flat_json)
|
| 715 |
if position_value is None:
|
| 716 |
position_value = find_matching_json_value("Position Title", flat_json)
|
| 717 |
+
|
| 718 |
+
# If name_value contains both name+position, split it
|
| 719 |
+
parsed_name_from_nameval, parsed_pos_from_nameval = parse_name_and_position(name_value) if name_value is not None else (None, None)
|
| 720 |
+
|
| 721 |
+
# If position_value also combined, parse it
|
| 722 |
+
parsed_name_from_posval, parsed_pos_from_posval = parse_name_and_position(position_value) if position_value is not None else (None, None)
|
| 723 |
+
|
| 724 |
+
# Decide final name and position candidates
|
| 725 |
+
final_name = None
|
| 726 |
+
final_pos = None
|
| 727 |
+
|
| 728 |
+
# Priority:
|
| 729 |
+
# - If parsed_name_from_nameval exists, use its name part as final_name and pos part as candidate for position
|
| 730 |
+
if parsed_name_from_nameval:
|
| 731 |
+
final_name = parsed_name_from_nameval
|
| 732 |
+
elif name_value is not None:
|
| 733 |
+
final_name = get_value_as_string(name_value)
|
| 734 |
+
|
| 735 |
+
# For position: prefer explicit position_value's parsed position,
|
| 736 |
+
# else use parsed_pos_from_nameval if present
|
| 737 |
+
if parsed_pos_from_posval:
|
| 738 |
+
final_pos = parsed_pos_from_posval
|
| 739 |
+
elif position_value is not None:
|
| 740 |
+
final_pos = get_value_as_string(position_value)
|
| 741 |
+
elif parsed_pos_from_nameval:
|
| 742 |
+
final_pos = parsed_pos_from_nameval
|
| 743 |
+
|
| 744 |
+
# Normalize to strings (strip)
|
| 745 |
+
if isinstance(final_name, list):
|
| 746 |
+
final_name = " ".join(str(x) for x in final_name).strip()
|
| 747 |
+
if isinstance(final_pos, list):
|
| 748 |
+
final_pos = " ".join(str(x) for x in final_pos).strip()
|
| 749 |
+
if isinstance(final_name, str):
|
| 750 |
+
final_name = final_name.strip()
|
| 751 |
+
if isinstance(final_pos, str):
|
| 752 |
+
final_pos = final_pos.strip()
|
| 753 |
+
|
| 754 |
+
# Filters to avoid writing company names into name slot
|
| 755 |
+
def looks_like_person(name_str):
|
| 756 |
+
if not name_str:
|
| 757 |
+
return False
|
| 758 |
+
bad_phrases = ["pty ltd", "company", "farming", "p/l", "plc"]
|
| 759 |
+
low = name_str.lower()
|
| 760 |
+
if any(bp in low for bp in bad_phrases):
|
| 761 |
+
return False
|
| 762 |
+
# also ensure there is at least one space (first + last) or common pattern
|
| 763 |
+
return len(name_str) > 1
|
| 764 |
+
|
| 765 |
+
# Now perform replacements only if cell is empty or has red text
|
| 766 |
+
# Update name cell
|
| 767 |
+
if (not name_text or has_red_text(name_cell)) and final_name and looks_like_person(final_name):
|
| 768 |
+
if has_red_text(name_cell):
|
| 769 |
+
replace_red_text_in_cell(name_cell, final_name)
|
| 770 |
+
else:
|
| 771 |
+
name_cell.text = final_name
|
| 772 |
+
replacements_made += 1
|
| 773 |
+
print(f" β
Updated Print Name -> '{final_name}'")
|
| 774 |
+
|
| 775 |
+
# Update position cell
|
| 776 |
+
if (not position_text or has_red_text(position_cell)) and final_pos:
|
| 777 |
+
if has_red_text(position_cell):
|
| 778 |
+
replace_red_text_in_cell(position_cell, final_pos)
|
| 779 |
+
else:
|
| 780 |
+
position_cell.text = final_pos
|
| 781 |
+
replacements_made += 1
|
| 782 |
+
print(f" β
Updated Position Title -> '{final_pos}'")
|
| 783 |
|
| 784 |
break
|
| 785 |
|
| 786 |
+
# Mark table processed so other handlers skip it
|
| 787 |
if replacements_made > 0:
|
| 788 |
try:
|
| 789 |
setattr(table, "_processed_operator_declaration", True)
|
| 790 |
print(" π Marked table as processed by Operator Declaration handler")
|
| 791 |
except Exception:
|
| 792 |
pass
|
|
|
|
| 793 |
|
| 794 |
return replacements_made
|
| 795 |
|