Spaces:
Running
Running
Update updated_word.py
Browse files- updated_word.py +48 -55
updated_word.py
CHANGED
|
@@ -620,8 +620,10 @@ def fix_management_summary_details_column(table, flat_json):
|
|
| 620 |
|
| 621 |
def fix_operator_declaration_empty_values(table, flat_json):
|
| 622 |
"""Fix Operator Declaration table when values are empty or need updating.
|
| 623 |
-
-
|
| 624 |
-
- If JSON
|
|
|
|
|
|
|
| 625 |
"""
|
| 626 |
replacements_made = 0
|
| 627 |
|
|
@@ -643,18 +645,17 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 643 |
if value is None:
|
| 644 |
return None, None
|
| 645 |
|
| 646 |
-
# If it's a list
|
| 647 |
if isinstance(value, list):
|
| 648 |
if len(value) == 0:
|
| 649 |
return None, None
|
| 650 |
if len(value) == 1:
|
| 651 |
return str(value[0]).strip(), None
|
| 652 |
-
#
|
| 653 |
first = str(value[0]).strip()
|
| 654 |
second = str(value[1]).strip()
|
| 655 |
if first and second:
|
| 656 |
return first, second
|
| 657 |
-
# fallthrough to string join
|
| 658 |
value = " ".join(str(v).strip() for v in value if str(v).strip())
|
| 659 |
|
| 660 |
s = str(value).strip()
|
|
@@ -666,27 +667,27 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 666 |
if len(parts) >= 2:
|
| 667 |
left = parts[0].strip()
|
| 668 |
right = parts[1].strip()
|
| 669 |
-
# Heuristic: if right looks like a role (contains common role words) treat as position
|
| 670 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 671 |
'coordinator', 'driver', 'operator', 'representative', 'chief']
|
| 672 |
if any(ind in right.lower() for ind in role_indicators) or len(right.split()) <= 4:
|
| 673 |
return left, right
|
| 674 |
-
# if left looks like a role and right looks like a name, invert
|
| 675 |
if any(ind in left.lower() for ind in role_indicators) and not any(ind in right.lower() for ind in role_indicators):
|
| 676 |
return right, left
|
| 677 |
-
# else assume left=name, right=position
|
| 678 |
return left, right
|
| 679 |
|
| 680 |
-
#
|
| 681 |
-
# If contains two capitalised tokens + a short token like 'Manager', split last token as position
|
| 682 |
tokens = s.split()
|
| 683 |
-
if len(tokens) >=
|
| 684 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 685 |
|
| 686 |
-
# fallback: treat entire string as name
|
| 687 |
return s, None
|
| 688 |
|
| 689 |
-
#
|
| 690 |
for row_idx, row in enumerate(table.rows):
|
| 691 |
if len(row.cells) >= 2:
|
| 692 |
cell1_text = get_clean_text(row.cells[0]).strip().lower()
|
|
@@ -695,7 +696,6 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 695 |
if "print name" in cell1_text and "position" in cell2_text:
|
| 696 |
print(f" π Found header row at {row_idx + 1}")
|
| 697 |
|
| 698 |
-
# data row is next row if present
|
| 699 |
if row_idx + 1 < len(table.rows):
|
| 700 |
data_row = table.rows[row_idx + 1]
|
| 701 |
if len(data_row.cells) >= 2:
|
|
@@ -706,7 +706,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 706 |
position_text = get_clean_text(position_cell).strip()
|
| 707 |
print(f" π Current values: Name='{name_text}', Position='{position_text}'")
|
| 708 |
|
| 709 |
-
#
|
| 710 |
name_value = find_matching_json_value("Operator Declaration.Print Name", flat_json)
|
| 711 |
if name_value is None:
|
| 712 |
name_value = find_matching_json_value("Print Name", flat_json)
|
|
@@ -715,25 +715,20 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 715 |
if position_value is None:
|
| 716 |
position_value = find_matching_json_value("Position Title", flat_json)
|
| 717 |
|
| 718 |
-
#
|
| 719 |
parsed_name_from_nameval, parsed_pos_from_nameval = parse_name_and_position(name_value) if name_value is not None else (None, None)
|
| 720 |
-
|
| 721 |
-
# If position_value also combined, parse it
|
| 722 |
parsed_name_from_posval, parsed_pos_from_posval = parse_name_and_position(position_value) if position_value is not None else (None, None)
|
| 723 |
|
| 724 |
-
#
|
| 725 |
final_name = None
|
| 726 |
final_pos = None
|
| 727 |
|
| 728 |
-
# Priority:
|
| 729 |
-
# - If parsed_name_from_nameval exists, use its name part as final_name and pos part as candidate for position
|
| 730 |
if parsed_name_from_nameval:
|
| 731 |
final_name = parsed_name_from_nameval
|
| 732 |
elif name_value is not None:
|
| 733 |
final_name = get_value_as_string(name_value)
|
| 734 |
|
| 735 |
-
#
|
| 736 |
-
# else use parsed_pos_from_nameval if present
|
| 737 |
if parsed_pos_from_posval:
|
| 738 |
final_pos = parsed_pos_from_posval
|
| 739 |
elif position_value is not None:
|
|
@@ -741,7 +736,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 741 |
elif parsed_pos_from_nameval:
|
| 742 |
final_pos = parsed_pos_from_nameval
|
| 743 |
|
| 744 |
-
#
|
| 745 |
if isinstance(final_name, list):
|
| 746 |
final_name = " ".join(str(x) for x in final_name).strip()
|
| 747 |
if isinstance(final_pos, list):
|
|
@@ -751,19 +746,16 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 751 |
if isinstance(final_pos, str):
|
| 752 |
final_pos = final_pos.strip()
|
| 753 |
|
| 754 |
-
# Filters to avoid writing company names into name slot
|
| 755 |
def looks_like_person(name_str):
|
| 756 |
-
if not name_str:
|
| 757 |
return False
|
| 758 |
bad_phrases = ["pty ltd", "company", "farming", "p/l", "plc"]
|
| 759 |
low = name_str.lower()
|
| 760 |
if any(bp in low for bp in bad_phrases):
|
| 761 |
return False
|
| 762 |
-
# also ensure there is at least one space (first + last) or common pattern
|
| 763 |
return len(name_str) > 1
|
| 764 |
|
| 765 |
-
#
|
| 766 |
-
# Update name cell
|
| 767 |
if (not name_text or has_red_text(name_cell)) and final_name and looks_like_person(final_name):
|
| 768 |
if has_red_text(name_cell):
|
| 769 |
replace_red_text_in_cell(name_cell, final_name)
|
|
@@ -772,7 +764,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 772 |
replacements_made += 1
|
| 773 |
print(f" β
Updated Print Name -> '{final_name}'")
|
| 774 |
|
| 775 |
-
#
|
| 776 |
if (not position_text or has_red_text(position_cell)) and final_pos:
|
| 777 |
if has_red_text(position_cell):
|
| 778 |
replace_red_text_in_cell(position_cell, final_pos)
|
|
@@ -783,7 +775,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
|
|
| 783 |
|
| 784 |
break
|
| 785 |
|
| 786 |
-
#
|
| 787 |
if replacements_made > 0:
|
| 788 |
try:
|
| 789 |
setattr(table, "_processed_operator_declaration", True)
|
|
@@ -884,40 +876,34 @@ def handle_management_summary_fix(cell, flat_json):
|
|
| 884 |
# ========================================================================
|
| 885 |
|
| 886 |
def handle_operator_declaration_fix(table, flat_json):
|
| 887 |
-
"""
|
|
|
|
|
|
|
| 888 |
replacements_made = 0
|
| 889 |
|
| 890 |
-
#
|
| 891 |
if getattr(table, "_processed_operator_declaration", False):
|
| 892 |
print(f" βοΈ Skipping - Operator Declaration table already processed")
|
| 893 |
return 0
|
| 894 |
-
# <<< END PATCH
|
| 895 |
-
|
| 896 |
-
if len(table.rows) > 4: # Only process small tables
|
| 897 |
-
return 0
|
| 898 |
-
|
| 899 |
-
# Get table context
|
| 900 |
-
table_text = ""
|
| 901 |
-
for row in table.rows:
|
| 902 |
-
for cell in row.cells:
|
| 903 |
-
table_text += get_clean_text(cell).lower() + " "
|
| 904 |
|
| 905 |
-
#
|
| 906 |
-
if
|
| 907 |
-
print(f" βοΈ Skipping - Operator Declaration table already processed")
|
| 908 |
return 0
|
| 909 |
|
| 910 |
-
#
|
| 911 |
-
|
| 912 |
-
|
|
|
|
|
|
|
|
|
|
| 913 |
|
| 914 |
-
|
|
|
|
|
|
|
| 915 |
|
| 916 |
-
# Process each cell with red text (for auditor declarations, etc.)
|
| 917 |
for row_idx, row in enumerate(table.rows):
|
| 918 |
for cell_idx, cell in enumerate(row.cells):
|
| 919 |
if has_red_text(cell):
|
| 920 |
-
# Try auditor-specific fields first
|
| 921 |
declaration_fields = [
|
| 922 |
"NHVAS Approved Auditor Declaration.Print Name",
|
| 923 |
"Auditor name",
|
|
@@ -938,7 +924,6 @@ def handle_operator_declaration_fix(table, flat_json):
|
|
| 938 |
replaced = True
|
| 939 |
break
|
| 940 |
|
| 941 |
-
# If no specific field match, try generic signature/date
|
| 942 |
if not replaced:
|
| 943 |
red_text = ""
|
| 944 |
for paragraph in cell.paragraphs:
|
|
@@ -953,6 +938,14 @@ def handle_operator_declaration_fix(table, flat_json):
|
|
| 953 |
cell_replacements = replace_red_text_in_cell(cell, "[Date]")
|
| 954 |
replacements_made += cell_replacements
|
| 955 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 956 |
return replacements_made
|
| 957 |
|
| 958 |
def handle_print_accreditation_section(table, flat_json):
|
|
@@ -1438,7 +1431,7 @@ def process_hf(json_file, docx_file, output_file):
|
|
| 1438 |
print(f" π Tables: {table_replacements}")
|
| 1439 |
print(f" π Paragraphs: {paragraph_replacements}")
|
| 1440 |
print(f" π Headings: {heading_replacements}")
|
| 1441 |
-
print(f" π― Force fixes: {force_replacements}")
|
| 1442 |
print(f"π Processing complete!")
|
| 1443 |
|
| 1444 |
except FileNotFoundError as e:
|
|
|
|
| 620 |
|
| 621 |
def fix_operator_declaration_empty_values(table, flat_json):
|
| 622 |
"""Fix Operator Declaration table when values are empty or need updating.
|
| 623 |
+
- Prefer exact qualified keys.
|
| 624 |
+
- If JSON has combined 'Name - Position', split it safely.
|
| 625 |
+
- Only write into cells that are empty or contain red text.
|
| 626 |
+
- Mark table as processed on success.
|
| 627 |
"""
|
| 628 |
replacements_made = 0
|
| 629 |
|
|
|
|
| 645 |
if value is None:
|
| 646 |
return None, None
|
| 647 |
|
| 648 |
+
# If it's a list: common pattern is [name, position]
|
| 649 |
if isinstance(value, list):
|
| 650 |
if len(value) == 0:
|
| 651 |
return None, None
|
| 652 |
if len(value) == 1:
|
| 653 |
return str(value[0]).strip(), None
|
| 654 |
+
# use first two sensible entries
|
| 655 |
first = str(value[0]).strip()
|
| 656 |
second = str(value[1]).strip()
|
| 657 |
if first and second:
|
| 658 |
return first, second
|
|
|
|
| 659 |
value = " ".join(str(v).strip() for v in value if str(v).strip())
|
| 660 |
|
| 661 |
s = str(value).strip()
|
|
|
|
| 667 |
if len(parts) >= 2:
|
| 668 |
left = parts[0].strip()
|
| 669 |
right = parts[1].strip()
|
|
|
|
| 670 |
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 671 |
'coordinator', 'driver', 'operator', 'representative', 'chief']
|
| 672 |
if any(ind in right.lower() for ind in role_indicators) or len(right.split()) <= 4:
|
| 673 |
return left, right
|
|
|
|
| 674 |
if any(ind in left.lower() for ind in role_indicators) and not any(ind in right.lower() for ind in role_indicators):
|
| 675 |
return right, left
|
|
|
|
| 676 |
return left, right
|
| 677 |
|
| 678 |
+
# If no separator, check trailing role token
|
|
|
|
| 679 |
tokens = s.split()
|
| 680 |
+
if len(tokens) >= 2:
|
| 681 |
+
last = tokens[-1]
|
| 682 |
+
role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
|
| 683 |
+
'coordinator', 'driver', 'operator', 'representative', 'chief']
|
| 684 |
+
if any(ind == last.lower() for ind in role_indicators):
|
| 685 |
+
return " ".join(tokens[:-1]), last
|
| 686 |
|
| 687 |
+
# fallback: treat entire string as name
|
| 688 |
return s, None
|
| 689 |
|
| 690 |
+
# Locate header row + data row
|
| 691 |
for row_idx, row in enumerate(table.rows):
|
| 692 |
if len(row.cells) >= 2:
|
| 693 |
cell1_text = get_clean_text(row.cells[0]).strip().lower()
|
|
|
|
| 696 |
if "print name" in cell1_text and "position" in cell2_text:
|
| 697 |
print(f" π Found header row at {row_idx + 1}")
|
| 698 |
|
|
|
|
| 699 |
if row_idx + 1 < len(table.rows):
|
| 700 |
data_row = table.rows[row_idx + 1]
|
| 701 |
if len(data_row.cells) >= 2:
|
|
|
|
| 706 |
position_text = get_clean_text(position_cell).strip()
|
| 707 |
print(f" π Current values: Name='{name_text}', Position='{position_text}'")
|
| 708 |
|
| 709 |
+
# Prefer exact qualified keys first
|
| 710 |
name_value = find_matching_json_value("Operator Declaration.Print Name", flat_json)
|
| 711 |
if name_value is None:
|
| 712 |
name_value = find_matching_json_value("Print Name", flat_json)
|
|
|
|
| 715 |
if position_value is None:
|
| 716 |
position_value = find_matching_json_value("Position Title", flat_json)
|
| 717 |
|
| 718 |
+
# parse combined cases
|
| 719 |
parsed_name_from_nameval, parsed_pos_from_nameval = parse_name_and_position(name_value) if name_value is not None else (None, None)
|
|
|
|
|
|
|
| 720 |
parsed_name_from_posval, parsed_pos_from_posval = parse_name_and_position(position_value) if position_value is not None else (None, None)
|
| 721 |
|
| 722 |
+
# decide final candidates
|
| 723 |
final_name = None
|
| 724 |
final_pos = None
|
| 725 |
|
|
|
|
|
|
|
| 726 |
if parsed_name_from_nameval:
|
| 727 |
final_name = parsed_name_from_nameval
|
| 728 |
elif name_value is not None:
|
| 729 |
final_name = get_value_as_string(name_value)
|
| 730 |
|
| 731 |
+
# position preference: parsed_pos_from_posval > explicit position_value > parsed_pos_from_nameval
|
|
|
|
| 732 |
if parsed_pos_from_posval:
|
| 733 |
final_pos = parsed_pos_from_posval
|
| 734 |
elif position_value is not None:
|
|
|
|
| 736 |
elif parsed_pos_from_nameval:
|
| 737 |
final_pos = parsed_pos_from_nameval
|
| 738 |
|
| 739 |
+
# normalize
|
| 740 |
if isinstance(final_name, list):
|
| 741 |
final_name = " ".join(str(x) for x in final_name).strip()
|
| 742 |
if isinstance(final_pos, list):
|
|
|
|
| 746 |
if isinstance(final_pos, str):
|
| 747 |
final_pos = final_pos.strip()
|
| 748 |
|
|
|
|
| 749 |
def looks_like_person(name_str):
|
| 750 |
+
if not name_str:
|
| 751 |
return False
|
| 752 |
bad_phrases = ["pty ltd", "company", "farming", "p/l", "plc"]
|
| 753 |
low = name_str.lower()
|
| 754 |
if any(bp in low for bp in bad_phrases):
|
| 755 |
return False
|
|
|
|
| 756 |
return len(name_str) > 1
|
| 757 |
|
| 758 |
+
# Write name if empty or red
|
|
|
|
| 759 |
if (not name_text or has_red_text(name_cell)) and final_name and looks_like_person(final_name):
|
| 760 |
if has_red_text(name_cell):
|
| 761 |
replace_red_text_in_cell(name_cell, final_name)
|
|
|
|
| 764 |
replacements_made += 1
|
| 765 |
print(f" β
Updated Print Name -> '{final_name}'")
|
| 766 |
|
| 767 |
+
# Write position if empty or red
|
| 768 |
if (not position_text or has_red_text(position_cell)) and final_pos:
|
| 769 |
if has_red_text(position_cell):
|
| 770 |
replace_red_text_in_cell(position_cell, final_pos)
|
|
|
|
| 775 |
|
| 776 |
break
|
| 777 |
|
| 778 |
+
# mark processed
|
| 779 |
if replacements_made > 0:
|
| 780 |
try:
|
| 781 |
setattr(table, "_processed_operator_declaration", True)
|
|
|
|
| 876 |
# ========================================================================
|
| 877 |
|
| 878 |
def handle_operator_declaration_fix(table, flat_json):
|
| 879 |
+
"""Wrapper for small declaration tables. Delegate to canonical fix first.
|
| 880 |
+
If canonical did not change anything, fall back to the small-table auditor handling.
|
| 881 |
+
"""
|
| 882 |
replacements_made = 0
|
| 883 |
|
| 884 |
+
# skip if already processed
|
| 885 |
if getattr(table, "_processed_operator_declaration", False):
|
| 886 |
print(f" βοΈ Skipping - Operator Declaration table already processed")
|
| 887 |
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 888 |
|
| 889 |
+
# only intended for small tables; if large, skip (your original condition)
|
| 890 |
+
if len(table.rows) > 4:
|
|
|
|
| 891 |
return 0
|
| 892 |
|
| 893 |
+
# First: try canonical operator declaration handler (covers primary case)
|
| 894 |
+
replaced = fix_operator_declaration_empty_values(table, flat_json)
|
| 895 |
+
replacements_made += replaced
|
| 896 |
+
if replaced:
|
| 897 |
+
# canonical handled it and set the processed flag
|
| 898 |
+
return replacements_made
|
| 899 |
|
| 900 |
+
# fallback: original small-table behaviour (auditor declaration etc.)
|
| 901 |
+
# (This mirrors your earlier auditor-specific logic but will not run if canonical updated table)
|
| 902 |
+
print(f" π― Processing other declaration table (fallback small-table behavior)")
|
| 903 |
|
|
|
|
| 904 |
for row_idx, row in enumerate(table.rows):
|
| 905 |
for cell_idx, cell in enumerate(row.cells):
|
| 906 |
if has_red_text(cell):
|
|
|
|
| 907 |
declaration_fields = [
|
| 908 |
"NHVAS Approved Auditor Declaration.Print Name",
|
| 909 |
"Auditor name",
|
|
|
|
| 924 |
replaced = True
|
| 925 |
break
|
| 926 |
|
|
|
|
| 927 |
if not replaced:
|
| 928 |
red_text = ""
|
| 929 |
for paragraph in cell.paragraphs:
|
|
|
|
| 938 |
cell_replacements = replace_red_text_in_cell(cell, "[Date]")
|
| 939 |
replacements_made += cell_replacements
|
| 940 |
|
| 941 |
+
# if any replacements made here, mark processed
|
| 942 |
+
if replacements_made > 0:
|
| 943 |
+
try:
|
| 944 |
+
setattr(table, "_processed_operator_declaration", True)
|
| 945 |
+
print(" π Marked table as processed by operator declaration fallback")
|
| 946 |
+
except Exception:
|
| 947 |
+
pass
|
| 948 |
+
|
| 949 |
return replacements_made
|
| 950 |
|
| 951 |
def handle_print_accreditation_section(table, flat_json):
|
|
|
|
| 1431 |
print(f" π Tables: {table_replacements}")
|
| 1432 |
print(f" π Paragraphs: {paragraph_replacements}")
|
| 1433 |
print(f" π Headings: {heading_replacements}")
|
| 1434 |
+
#print(f" π― Force fixes: {force_replacements}")
|
| 1435 |
print(f"π Processing complete!")
|
| 1436 |
|
| 1437 |
except FileNotFoundError as e:
|