Shami96 commited on
Commit
1f4d3cf
Β·
verified Β·
1 Parent(s): 61e7c5d

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +48 -55
updated_word.py CHANGED
@@ -620,8 +620,10 @@ def fix_management_summary_details_column(table, flat_json):
620
 
621
  def fix_operator_declaration_empty_values(table, flat_json):
622
  """Fix Operator Declaration table when values are empty or need updating.
623
- - Only update name/position cells if they're empty or contain red text.
624
- - If JSON gives a combined 'Name - Position' value, split it.
 
 
625
  """
626
  replacements_made = 0
627
 
@@ -643,18 +645,17 @@ def fix_operator_declaration_empty_values(table, flat_json):
643
  if value is None:
644
  return None, None
645
 
646
- # If it's a list of two items - treat as [name, position]
647
  if isinstance(value, list):
648
  if len(value) == 0:
649
  return None, None
650
  if len(value) == 1:
651
  return str(value[0]).strip(), None
652
- # if list has more than 1, try to use first two sensible entries
653
  first = str(value[0]).strip()
654
  second = str(value[1]).strip()
655
  if first and second:
656
  return first, second
657
- # fallthrough to string join
658
  value = " ".join(str(v).strip() for v in value if str(v).strip())
659
 
660
  s = str(value).strip()
@@ -666,27 +667,27 @@ def fix_operator_declaration_empty_values(table, flat_json):
666
  if len(parts) >= 2:
667
  left = parts[0].strip()
668
  right = parts[1].strip()
669
- # Heuristic: if right looks like a role (contains common role words) treat as position
670
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
671
  'coordinator', 'driver', 'operator', 'representative', 'chief']
672
  if any(ind in right.lower() for ind in role_indicators) or len(right.split()) <= 4:
673
  return left, right
674
- # if left looks like a role and right looks like a name, invert
675
  if any(ind in left.lower() for ind in role_indicators) and not any(ind in right.lower() for ind in role_indicators):
676
  return right, left
677
- # else assume left=name, right=position
678
  return left, right
679
 
680
- # No separator - try to detect "Firstname Lastname Title" (less reliable)
681
- # If contains two capitalised tokens + a short token like 'Manager', split last token as position
682
  tokens = s.split()
683
- if len(tokens) >= 3 and tokens[-1].istitle() and any(ind in tokens[-1].lower() for ind in role_indicators):
684
- return " ".join(tokens[:-1]), tokens[-1]
 
 
 
 
685
 
686
- # fallback: treat entire string as name (no position)
687
  return s, None
688
 
689
- # Find the header row and the data row
690
  for row_idx, row in enumerate(table.rows):
691
  if len(row.cells) >= 2:
692
  cell1_text = get_clean_text(row.cells[0]).strip().lower()
@@ -695,7 +696,6 @@ def fix_operator_declaration_empty_values(table, flat_json):
695
  if "print name" in cell1_text and "position" in cell2_text:
696
  print(f" πŸ“Œ Found header row at {row_idx + 1}")
697
 
698
- # data row is next row if present
699
  if row_idx + 1 < len(table.rows):
700
  data_row = table.rows[row_idx + 1]
701
  if len(data_row.cells) >= 2:
@@ -706,7 +706,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
706
  position_text = get_clean_text(position_cell).strip()
707
  print(f" πŸ“‹ Current values: Name='{name_text}', Position='{position_text}'")
708
 
709
- # Retrieve JSON candidates (prefer exact qualified keys)
710
  name_value = find_matching_json_value("Operator Declaration.Print Name", flat_json)
711
  if name_value is None:
712
  name_value = find_matching_json_value("Print Name", flat_json)
@@ -715,25 +715,20 @@ def fix_operator_declaration_empty_values(table, flat_json):
715
  if position_value is None:
716
  position_value = find_matching_json_value("Position Title", flat_json)
717
 
718
- # If name_value contains both name+position, split it
719
  parsed_name_from_nameval, parsed_pos_from_nameval = parse_name_and_position(name_value) if name_value is not None else (None, None)
720
-
721
- # If position_value also combined, parse it
722
  parsed_name_from_posval, parsed_pos_from_posval = parse_name_and_position(position_value) if position_value is not None else (None, None)
723
 
724
- # Decide final name and position candidates
725
  final_name = None
726
  final_pos = None
727
 
728
- # Priority:
729
- # - If parsed_name_from_nameval exists, use its name part as final_name and pos part as candidate for position
730
  if parsed_name_from_nameval:
731
  final_name = parsed_name_from_nameval
732
  elif name_value is not None:
733
  final_name = get_value_as_string(name_value)
734
 
735
- # For position: prefer explicit position_value's parsed position,
736
- # else use parsed_pos_from_nameval if present
737
  if parsed_pos_from_posval:
738
  final_pos = parsed_pos_from_posval
739
  elif position_value is not None:
@@ -741,7 +736,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
741
  elif parsed_pos_from_nameval:
742
  final_pos = parsed_pos_from_nameval
743
 
744
- # Normalize to strings (strip)
745
  if isinstance(final_name, list):
746
  final_name = " ".join(str(x) for x in final_name).strip()
747
  if isinstance(final_pos, list):
@@ -751,19 +746,16 @@ def fix_operator_declaration_empty_values(table, flat_json):
751
  if isinstance(final_pos, str):
752
  final_pos = final_pos.strip()
753
 
754
- # Filters to avoid writing company names into name slot
755
  def looks_like_person(name_str):
756
- if not name_str:
757
  return False
758
  bad_phrases = ["pty ltd", "company", "farming", "p/l", "plc"]
759
  low = name_str.lower()
760
  if any(bp in low for bp in bad_phrases):
761
  return False
762
- # also ensure there is at least one space (first + last) or common pattern
763
  return len(name_str) > 1
764
 
765
- # Now perform replacements only if cell is empty or has red text
766
- # Update name cell
767
  if (not name_text or has_red_text(name_cell)) and final_name and looks_like_person(final_name):
768
  if has_red_text(name_cell):
769
  replace_red_text_in_cell(name_cell, final_name)
@@ -772,7 +764,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
772
  replacements_made += 1
773
  print(f" βœ… Updated Print Name -> '{final_name}'")
774
 
775
- # Update position cell
776
  if (not position_text or has_red_text(position_cell)) and final_pos:
777
  if has_red_text(position_cell):
778
  replace_red_text_in_cell(position_cell, final_pos)
@@ -783,7 +775,7 @@ def fix_operator_declaration_empty_values(table, flat_json):
783
 
784
  break
785
 
786
- # Mark table processed so other handlers skip it
787
  if replacements_made > 0:
788
  try:
789
  setattr(table, "_processed_operator_declaration", True)
@@ -884,40 +876,34 @@ def handle_management_summary_fix(cell, flat_json):
884
  # ========================================================================
885
 
886
  def handle_operator_declaration_fix(table, flat_json):
887
- """Handle small Operator/Auditor Declaration tables - SKIP if already processed"""
 
 
888
  replacements_made = 0
889
 
890
- # <<< PATCH: skip if marked processed
891
  if getattr(table, "_processed_operator_declaration", False):
892
  print(f" ⏭️ Skipping - Operator Declaration table already processed")
893
  return 0
894
- # <<< END PATCH
895
-
896
- if len(table.rows) > 4: # Only process small tables
897
- return 0
898
-
899
- # Get table context
900
- table_text = ""
901
- for row in table.rows:
902
- for cell in row.cells:
903
- table_text += get_clean_text(cell).lower() + " "
904
 
905
- # SKIP if this is an Operator Declaration table (already handled by fix_operator_declaration_empty_values)
906
- if "print name" in table_text and "position title" in table_text:
907
- print(f" ⏭️ Skipping - Operator Declaration table already processed")
908
  return 0
909
 
910
- # Check if this is a declaration table
911
- if not ("print name" in table_text or "signature" in table_text or "date" in table_text):
912
- return 0
 
 
 
913
 
914
- print(f" 🎯 Processing other declaration table")
 
 
915
 
916
- # Process each cell with red text (for auditor declarations, etc.)
917
  for row_idx, row in enumerate(table.rows):
918
  for cell_idx, cell in enumerate(row.cells):
919
  if has_red_text(cell):
920
- # Try auditor-specific fields first
921
  declaration_fields = [
922
  "NHVAS Approved Auditor Declaration.Print Name",
923
  "Auditor name",
@@ -938,7 +924,6 @@ def handle_operator_declaration_fix(table, flat_json):
938
  replaced = True
939
  break
940
 
941
- # If no specific field match, try generic signature/date
942
  if not replaced:
943
  red_text = ""
944
  for paragraph in cell.paragraphs:
@@ -953,6 +938,14 @@ def handle_operator_declaration_fix(table, flat_json):
953
  cell_replacements = replace_red_text_in_cell(cell, "[Date]")
954
  replacements_made += cell_replacements
955
 
 
 
 
 
 
 
 
 
956
  return replacements_made
957
 
958
  def handle_print_accreditation_section(table, flat_json):
@@ -1438,7 +1431,7 @@ def process_hf(json_file, docx_file, output_file):
1438
  print(f" πŸ“Š Tables: {table_replacements}")
1439
  print(f" πŸ“ Paragraphs: {paragraph_replacements}")
1440
  print(f" πŸ“‹ Headings: {heading_replacements}")
1441
- print(f" 🎯 Force fixes: {force_replacements}")
1442
  print(f"πŸŽ‰ Processing complete!")
1443
 
1444
  except FileNotFoundError as e:
 
620
 
621
  def fix_operator_declaration_empty_values(table, flat_json):
622
  """Fix Operator Declaration table when values are empty or need updating.
623
+ - Prefer exact qualified keys.
624
+ - If JSON has combined 'Name - Position', split it safely.
625
+ - Only write into cells that are empty or contain red text.
626
+ - Mark table as processed on success.
627
  """
628
  replacements_made = 0
629
 
 
645
  if value is None:
646
  return None, None
647
 
648
+ # If it's a list: common pattern is [name, position]
649
  if isinstance(value, list):
650
  if len(value) == 0:
651
  return None, None
652
  if len(value) == 1:
653
  return str(value[0]).strip(), None
654
+ # use first two sensible entries
655
  first = str(value[0]).strip()
656
  second = str(value[1]).strip()
657
  if first and second:
658
  return first, second
 
659
  value = " ".join(str(v).strip() for v in value if str(v).strip())
660
 
661
  s = str(value).strip()
 
667
  if len(parts) >= 2:
668
  left = parts[0].strip()
669
  right = parts[1].strip()
 
670
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
671
  'coordinator', 'driver', 'operator', 'representative', 'chief']
672
  if any(ind in right.lower() for ind in role_indicators) or len(right.split()) <= 4:
673
  return left, right
 
674
  if any(ind in left.lower() for ind in role_indicators) and not any(ind in right.lower() for ind in role_indicators):
675
  return right, left
 
676
  return left, right
677
 
678
+ # If no separator, check trailing role token
 
679
  tokens = s.split()
680
+ if len(tokens) >= 2:
681
+ last = tokens[-1]
682
+ role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
683
+ 'coordinator', 'driver', 'operator', 'representative', 'chief']
684
+ if any(ind == last.lower() for ind in role_indicators):
685
+ return " ".join(tokens[:-1]), last
686
 
687
+ # fallback: treat entire string as name
688
  return s, None
689
 
690
+ # Locate header row + data row
691
  for row_idx, row in enumerate(table.rows):
692
  if len(row.cells) >= 2:
693
  cell1_text = get_clean_text(row.cells[0]).strip().lower()
 
696
  if "print name" in cell1_text and "position" in cell2_text:
697
  print(f" πŸ“Œ Found header row at {row_idx + 1}")
698
 
 
699
  if row_idx + 1 < len(table.rows):
700
  data_row = table.rows[row_idx + 1]
701
  if len(data_row.cells) >= 2:
 
706
  position_text = get_clean_text(position_cell).strip()
707
  print(f" πŸ“‹ Current values: Name='{name_text}', Position='{position_text}'")
708
 
709
+ # Prefer exact qualified keys first
710
  name_value = find_matching_json_value("Operator Declaration.Print Name", flat_json)
711
  if name_value is None:
712
  name_value = find_matching_json_value("Print Name", flat_json)
 
715
  if position_value is None:
716
  position_value = find_matching_json_value("Position Title", flat_json)
717
 
718
+ # parse combined cases
719
  parsed_name_from_nameval, parsed_pos_from_nameval = parse_name_and_position(name_value) if name_value is not None else (None, None)
 
 
720
  parsed_name_from_posval, parsed_pos_from_posval = parse_name_and_position(position_value) if position_value is not None else (None, None)
721
 
722
+ # decide final candidates
723
  final_name = None
724
  final_pos = None
725
 
 
 
726
  if parsed_name_from_nameval:
727
  final_name = parsed_name_from_nameval
728
  elif name_value is not None:
729
  final_name = get_value_as_string(name_value)
730
 
731
+ # position preference: parsed_pos_from_posval > explicit position_value > parsed_pos_from_nameval
 
732
  if parsed_pos_from_posval:
733
  final_pos = parsed_pos_from_posval
734
  elif position_value is not None:
 
736
  elif parsed_pos_from_nameval:
737
  final_pos = parsed_pos_from_nameval
738
 
739
+ # normalize
740
  if isinstance(final_name, list):
741
  final_name = " ".join(str(x) for x in final_name).strip()
742
  if isinstance(final_pos, list):
 
746
  if isinstance(final_pos, str):
747
  final_pos = final_pos.strip()
748
 
 
749
  def looks_like_person(name_str):
750
+ if not name_str:
751
  return False
752
  bad_phrases = ["pty ltd", "company", "farming", "p/l", "plc"]
753
  low = name_str.lower()
754
  if any(bp in low for bp in bad_phrases):
755
  return False
 
756
  return len(name_str) > 1
757
 
758
+ # Write name if empty or red
 
759
  if (not name_text or has_red_text(name_cell)) and final_name and looks_like_person(final_name):
760
  if has_red_text(name_cell):
761
  replace_red_text_in_cell(name_cell, final_name)
 
764
  replacements_made += 1
765
  print(f" βœ… Updated Print Name -> '{final_name}'")
766
 
767
+ # Write position if empty or red
768
  if (not position_text or has_red_text(position_cell)) and final_pos:
769
  if has_red_text(position_cell):
770
  replace_red_text_in_cell(position_cell, final_pos)
 
775
 
776
  break
777
 
778
+ # mark processed
779
  if replacements_made > 0:
780
  try:
781
  setattr(table, "_processed_operator_declaration", True)
 
876
  # ========================================================================
877
 
878
  def handle_operator_declaration_fix(table, flat_json):
879
+ """Wrapper for small declaration tables. Delegate to canonical fix first.
880
+ If canonical did not change anything, fall back to the small-table auditor handling.
881
+ """
882
  replacements_made = 0
883
 
884
+ # skip if already processed
885
  if getattr(table, "_processed_operator_declaration", False):
886
  print(f" ⏭️ Skipping - Operator Declaration table already processed")
887
  return 0
 
 
 
 
 
 
 
 
 
 
888
 
889
+ # only intended for small tables; if large, skip (your original condition)
890
+ if len(table.rows) > 4:
 
891
  return 0
892
 
893
+ # First: try canonical operator declaration handler (covers primary case)
894
+ replaced = fix_operator_declaration_empty_values(table, flat_json)
895
+ replacements_made += replaced
896
+ if replaced:
897
+ # canonical handled it and set the processed flag
898
+ return replacements_made
899
 
900
+ # fallback: original small-table behaviour (auditor declaration etc.)
901
+ # (This mirrors your earlier auditor-specific logic but will not run if canonical updated table)
902
+ print(f" 🎯 Processing other declaration table (fallback small-table behavior)")
903
 
 
904
  for row_idx, row in enumerate(table.rows):
905
  for cell_idx, cell in enumerate(row.cells):
906
  if has_red_text(cell):
 
907
  declaration_fields = [
908
  "NHVAS Approved Auditor Declaration.Print Name",
909
  "Auditor name",
 
924
  replaced = True
925
  break
926
 
 
927
  if not replaced:
928
  red_text = ""
929
  for paragraph in cell.paragraphs:
 
938
  cell_replacements = replace_red_text_in_cell(cell, "[Date]")
939
  replacements_made += cell_replacements
940
 
941
+ # if any replacements made here, mark processed
942
+ if replacements_made > 0:
943
+ try:
944
+ setattr(table, "_processed_operator_declaration", True)
945
+ print(" πŸ”– Marked table as processed by operator declaration fallback")
946
+ except Exception:
947
+ pass
948
+
949
  return replacements_made
950
 
951
  def handle_print_accreditation_section(table, flat_json):
 
1431
  print(f" πŸ“Š Tables: {table_replacements}")
1432
  print(f" πŸ“ Paragraphs: {paragraph_replacements}")
1433
  print(f" πŸ“‹ Headings: {heading_replacements}")
1434
+ #print(f" 🎯 Force fixes: {force_replacements}")
1435
  print(f"πŸŽ‰ Processing complete!")
1436
 
1437
  except FileNotFoundError as e: