Shami96 commited on
Commit
47ac43f
Β·
verified Β·
1 Parent(s): 7ec9f58

Update updated_word.py

Browse files
Files changed (1) hide show
  1. updated_word.py +175 -100
updated_word.py CHANGED
@@ -647,166 +647,241 @@ def fix_management_summary_details_column(table, flat_json):
647
  # Canonical operator declaration fixer β€” SAFER
648
  # ============================================================================
649
  def fix_operator_declaration_empty_values(table, flat_json):
 
 
 
650
  replacements_made = 0
651
  print(f" 🎯 FIX: Operator Declaration empty values processing")
 
 
652
  table_context = ""
653
  for row in table.rows:
654
  for cell in row.cells:
655
  table_context += get_clean_text(cell).lower() + " "
 
656
  if not ("print name" in table_context and "position title" in table_context):
657
  return 0
 
658
  print(f" βœ… Confirmed Operator Declaration table")
659
 
660
  def parse_name_and_position(value):
 
661
  if value is None:
662
  return None, None
 
663
  if isinstance(value, list):
664
  if len(value) == 0:
665
  return None, None
666
  if len(value) == 1:
667
  return str(value[0]).strip(), None
668
- # common [name, position] pattern
 
669
  first = str(value[0]).strip()
670
  second = str(value[1]).strip()
671
  if first and second:
672
  return first, second
 
 
673
  value = " ".join(str(v).strip() for v in value if str(v).strip())
 
674
  s = str(value).strip()
675
  if not s:
676
  return None, None
677
- parts = re.split(r'\s+[-–—]\s+|\s*,\s*|\s*\|\s*', s)
678
- if len(parts) >= 2:
 
 
 
 
 
 
 
 
 
679
  left = parts[0].strip()
680
  right = parts[1].strip()
 
 
681
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
682
- 'coordinator', 'driver', 'operator', 'representative', 'chief']
683
- if any(ind in right.lower() for ind in role_indicators) or len(right.split()) <= 4:
 
 
 
 
 
 
 
 
 
 
684
  return left, right
685
- if any(ind in left.lower() for ind in role_indicators) and not any(ind in right.lower() for ind in role_indicators):
686
- return right, left
687
- return left, right
688
  tokens = s.split()
689
  if len(tokens) >= 2:
690
- last = tokens[-1]
691
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
692
  'coordinator', 'driver', 'operator', 'representative', 'chief']
693
- if any(ind == last.lower() for ind in role_indicators):
694
- return " ".join(tokens[:-1]), last
 
695
  return s, None
696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
697
  for row_idx, row in enumerate(table.rows):
698
  if len(row.cells) >= 2:
699
  cell1_text = get_clean_text(row.cells[0]).strip().lower()
700
  cell2_text = get_clean_text(row.cells[1]).strip().lower()
701
- # header detection
 
702
  if "print name" in cell1_text and "position" in cell2_text:
703
  print(f" πŸ“Œ Found header row at {row_idx + 1}")
 
 
704
  if row_idx + 1 < len(table.rows):
705
  data_row = table.rows[row_idx + 1]
706
  if len(data_row.cells) >= 2:
707
  name_cell = data_row.cells[0]
708
  position_cell = data_row.cells[1]
709
- name_text = get_clean_text(name_cell).strip()
710
- position_text = get_clean_text(position_cell).strip()
711
- print(f" πŸ“‹ Current values: Name='{name_text}', Position='{position_text}'")
712
-
713
- # Prefer exact qualified keys first (use key-aware lookup)
714
- name_kv = find_matching_json_key_and_value("Operator Declaration.Print Name", flat_json) or find_matching_json_key_and_value("Print Name", flat_json)
715
- position_kv = find_matching_json_key_and_value("Operator Declaration.Position Title", flat_json) or find_matching_json_key_and_value("Position Title", flat_json)
716
-
717
- name_value = name_kv[1] if name_kv else None
718
- name_key = name_kv[0] if name_kv else None
719
-
720
- position_value = position_kv[1] if position_kv else None
721
- position_key = position_kv[0] if position_kv else None
722
-
723
- # parse combined cases
724
- parsed_name_from_nameval, parsed_pos_from_nameval = parse_name_and_position(name_value) if name_value is not None else (None, None)
725
- parsed_name_from_posval, parsed_pos_from_posval = parse_name_and_position(position_value) if position_value is not None else (None, None)
726
-
727
  final_name = None
728
- final_pos = None
729
-
730
- if parsed_name_from_nameval:
731
- final_name = parsed_name_from_nameval
732
- elif name_value is not None:
733
- final_name = get_value_as_string(name_value)
734
-
735
- # Position acceptance policy:
736
- # - Accept position_value ONLY if matched key indicates position/title OR parsed value looks like a role
737
- def looks_like_role(s: str) -> bool:
738
- if not s:
739
- return False
740
- s = s.lower()
741
- roles = ['manager', 'auditor', 'owner', 'director', 'supervisor', 'coordinator', 'driver', 'operator', 'representative', 'chief']
742
- # short role descriptions or containing role token
743
- if any(r in s for r in roles):
744
- return True
745
- # single/short token likely role (<=4 tokens)
746
- if len(s.split()) <= 4 and any(c.isalpha() for c in s):
747
- return True
748
- return False
749
-
750
- # Only use position_value if the matched key strongly indicates position/title
751
- use_position = False
752
- if position_kv:
753
- k_lower = (position_key or "").lower()
754
- if ("position" in k_lower or "title" in k_lower or "role" in k_lower):
755
- use_position = True
756
- # Avoid using attendance keys or attendance text as position source
757
- if position_kv and ("attendance" in position_key.lower() or "attendance list" in position_key.lower() or "attendees" in position_key.lower()):
758
- use_position = False
759
-
760
- if use_position:
761
- # choose parsed pos if available
762
- if parsed_pos_from_posval:
763
- final_pos = parsed_pos_from_posval
 
 
 
 
764
  else:
765
- final_pos = get_value_as_string(position_value) if position_value is not None else None
766
- else:
767
- # allow parsed position gleaned from name_value (if it looks like a role)
768
- if parsed_pos_from_nameval and looks_like_role(parsed_pos_from_nameval):
769
- final_pos = parsed_pos_from_nameval
770
-
771
- # final normalization
772
- if isinstance(final_name, list):
 
 
 
 
 
 
 
 
 
 
773
  final_name = " ".join(str(x) for x in final_name).strip()
774
- if isinstance(final_pos, list):
775
- final_pos = " ".join(str(x) for x in final_pos).strip()
776
- if isinstance(final_name, str):
777
- final_name = final_name.strip()
778
- if isinstance(final_pos, str):
779
- final_pos = final_pos.strip()
780
-
781
- def looks_like_person(name_str):
782
- if not name_str:
783
- return False
784
- bad_phrases = ["pty ltd", "company", "farming", "p/l", "plc"]
785
- low = name_str.lower()
786
- if any(bp in low for bp in bad_phrases):
787
- return False
788
- return len(name_str) > 1 and any(c.isalpha() for c in name_str)
789
-
790
- # Write name if empty or red
791
- if (not name_text or has_red_text(name_cell)) and final_name and looks_like_person(final_name):
792
  if has_red_text(name_cell):
793
  replace_red_text_in_cell(name_cell, final_name)
794
  else:
795
  name_cell.text = final_name
796
  replacements_made += 1
797
  print(f" βœ… Updated Print Name -> '{final_name}'")
798
-
799
- # Write position if empty or red and final_pos appears role-like
800
- if (not position_text or has_red_text(position_cell)) and final_pos and looks_like_role(final_pos):
801
  if has_red_text(position_cell):
802
- replace_red_text_in_cell(position_cell, final_pos)
803
  else:
804
- position_cell.text = final_pos
805
  replacements_made += 1
806
- print(f" βœ… Updated Position Title -> '{final_pos}'")
807
-
808
- break
809
 
 
810
  if replacements_made > 0:
811
  try:
812
  setattr(table, "_processed_operator_declaration", True)
 
647
  # Canonical operator declaration fixer β€” SAFER
648
  # ============================================================================
649
  def fix_operator_declaration_empty_values(table, flat_json):
650
+ """
651
+ IMPROVED: Better operator declaration handling with more reliable position detection
652
+ """
653
  replacements_made = 0
654
  print(f" 🎯 FIX: Operator Declaration empty values processing")
655
+
656
+ # Verify this is actually an operator declaration table
657
  table_context = ""
658
  for row in table.rows:
659
  for cell in row.cells:
660
  table_context += get_clean_text(cell).lower() + " "
661
+
662
  if not ("print name" in table_context and "position title" in table_context):
663
  return 0
664
+
665
  print(f" βœ… Confirmed Operator Declaration table")
666
 
667
  def parse_name_and_position(value):
668
+ """Enhanced parsing for name/position combinations"""
669
  if value is None:
670
  return None, None
671
+
672
  if isinstance(value, list):
673
  if len(value) == 0:
674
  return None, None
675
  if len(value) == 1:
676
  return str(value[0]).strip(), None
677
+
678
+ # Handle [name, position] pattern
679
  first = str(value[0]).strip()
680
  second = str(value[1]).strip()
681
  if first and second:
682
  return first, second
683
+
684
+ # Join list elements
685
  value = " ".join(str(v).strip() for v in value if str(v).strip())
686
+
687
  s = str(value).strip()
688
  if not s:
689
  return None, None
690
+
691
+ # Split on common separators
692
+ separators = [r'\s+[-–—]\s+', r'\s*,\s*', r'\s*\|\s*', r'\s*;\s*']
693
+ parts = None
694
+
695
+ for sep_pattern in separators:
696
+ parts = re.split(sep_pattern, s)
697
+ if len(parts) >= 2:
698
+ break
699
+
700
+ if parts and len(parts) >= 2:
701
  left = parts[0].strip()
702
  right = parts[1].strip()
703
+
704
+ # Check which part is more likely to be a position
705
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
706
+ 'coordinator', 'driver', 'operator', 'representative', 'chief',
707
+ 'president', 'ceo', 'cfo', 'secretary', 'treasurer']
708
+
709
+ right_has_role = any(ind in right.lower() for ind in role_indicators)
710
+ left_has_role = any(ind in left.lower() for ind in role_indicators)
711
+
712
+ if right_has_role and not left_has_role:
713
+ return left, right # Standard: name, position
714
+ elif left_has_role and not right_has_role:
715
+ return right, left # Reversed: position, name
716
+ else:
717
+ # Default to left=name, right=position
718
  return left, right
719
+
720
+ # Look for single word position at end
 
721
  tokens = s.split()
722
  if len(tokens) >= 2:
723
+ last_token = tokens[-1].lower()
724
  role_indicators = ['manager', 'auditor', 'owner', 'director', 'supervisor',
725
  'coordinator', 'driver', 'operator', 'representative', 'chief']
726
+ if any(ind == last_token for ind in role_indicators):
727
+ return " ".join(tokens[:-1]), tokens[-1]
728
+
729
  return s, None
730
 
731
+ def looks_like_role(s: str) -> bool:
732
+ """Check if string looks like a job role/position"""
733
+ if not s:
734
+ return False
735
+
736
+ s = s.lower().strip()
737
+
738
+ # Common role words
739
+ roles = ['manager', 'auditor', 'owner', 'director', 'supervisor',
740
+ 'coordinator', 'driver', 'operator', 'representative', 'chief',
741
+ 'president', 'ceo', 'cfo', 'secretary', 'treasurer', 'officer']
742
+
743
+ # Direct role match
744
+ if any(role in s for role in roles):
745
+ return True
746
+
747
+ # Short descriptive terms (likely roles)
748
+ if len(s.split()) <= 3 and any(c.isalpha() for c in s) and len(s) > 1:
749
+ return True
750
+
751
+ return False
752
+
753
+ def looks_like_person_name(s: str) -> bool:
754
+ """Check if string looks like a person's name"""
755
+ if not s:
756
+ return False
757
+
758
+ s = s.strip()
759
+
760
+ # Exclude company-like terms
761
+ company_terms = ['pty ltd', 'ltd', 'inc', 'corp', 'company', 'llc', 'plc']
762
+ s_lower = s.lower()
763
+ if any(term in s_lower for term in company_terms):
764
+ return False
765
+
766
+ # Should have letters and reasonable length
767
+ if len(s) > 1 and any(c.isalpha() for c in s):
768
+ return True
769
+
770
+ return False
771
+
772
+ # Process the table
773
  for row_idx, row in enumerate(table.rows):
774
  if len(row.cells) >= 2:
775
  cell1_text = get_clean_text(row.cells[0]).strip().lower()
776
  cell2_text = get_clean_text(row.cells[1]).strip().lower()
777
+
778
+ # Detect header row
779
  if "print name" in cell1_text and "position" in cell2_text:
780
  print(f" πŸ“Œ Found header row at {row_idx + 1}")
781
+
782
+ # Process data row (next row after header)
783
  if row_idx + 1 < len(table.rows):
784
  data_row = table.rows[row_idx + 1]
785
  if len(data_row.cells) >= 2:
786
  name_cell = data_row.cells[0]
787
  position_cell = data_row.cells[1]
788
+
789
+ current_name = get_clean_text(name_cell).strip()
790
+ current_position = get_clean_text(position_cell).strip()
791
+
792
+ print(f" πŸ“‹ Current values: Name='{current_name}', Position='{current_position}'")
793
+
794
+ # IMPROVED: More comprehensive search for operator declaration data
 
 
 
 
 
 
 
 
 
 
 
795
  final_name = None
796
+ final_position = None
797
+
798
+ # Search strategies in order of preference
799
+ search_strategies = [
800
+ # Strategy 1: Direct operator declaration keys
801
+ ("Operator Declaration.Print Name", "Operator Declaration.Position Title"),
802
+
803
+ # Strategy 2: Generic print name/position keys
804
+ ("Print Name", "Position Title"),
805
+
806
+ # Strategy 3: Look in operator information section
807
+ ("Operator Information.Print Name", "Operator Information.Position Title"),
808
+
809
+ # Strategy 4: Any key containing "print name" or "position"
810
+ (None, None) # Special case - will search all keys
811
+ ]
812
+
813
+ for name_key_pattern, pos_key_pattern in search_strategies:
814
+ if final_name and final_position:
815
+ break
816
+
817
+ if name_key_pattern is None:
818
+ # Search all keys for relevant data
819
+ for key, value in flat_json.items():
820
+ key_lower = key.lower()
821
+
822
+ # Look for name-like keys
823
+ if not final_name and ("print name" in key_lower or
824
+ ("name" in key_lower and "operator" in key_lower)):
825
+ if value and looks_like_person_name(str(value)):
826
+ name_from_val, pos_from_val = parse_name_and_position(value)
827
+ if name_from_val and looks_like_person_name(name_from_val):
828
+ final_name = name_from_val
829
+ if pos_from_val and looks_like_role(pos_from_val):
830
+ final_position = pos_from_val
831
+
832
+ # Look for position-like keys
833
+ if not final_position and ("position" in key_lower or "title" in key_lower):
834
+ if value and looks_like_role(str(value)):
835
+ final_position = str(value).strip()
836
  else:
837
+ # Search for specific key patterns
838
+ name_kv = find_matching_json_key_and_value(name_key_pattern, flat_json)
839
+ pos_kv = find_matching_json_key_and_value(pos_key_pattern, flat_json)
840
+
841
+ if name_kv and name_kv[1]:
842
+ name_from_val, pos_from_val = parse_name_and_position(name_kv[1])
843
+ if name_from_val and looks_like_person_name(name_from_val):
844
+ final_name = name_from_val
845
+ if pos_from_val and looks_like_role(pos_from_val) and not final_position:
846
+ final_position = pos_from_val
847
+
848
+ if pos_kv and pos_kv[1] and not final_position:
849
+ pos_val = str(pos_kv[1]).strip()
850
+ if looks_like_role(pos_val):
851
+ final_position = pos_val
852
+
853
+ # Clean up final values
854
+ if isinstance(final_name, (list, tuple)):
855
  final_name = " ".join(str(x) for x in final_name).strip()
856
+ if isinstance(final_position, (list, tuple)):
857
+ final_position = " ".join(str(x) for x in final_position).strip()
858
+
859
+ final_name = str(final_name).strip() if final_name else None
860
+ final_position = str(final_position).strip() if final_position else None
861
+
862
+ print(f" 🎯 Final extracted values: Name='{final_name}', Position='{final_position}'")
863
+
864
+ # Update name cell if needed
865
+ if (not current_name or has_red_text(name_cell)) and final_name and looks_like_person_name(final_name):
 
 
 
 
 
 
 
 
866
  if has_red_text(name_cell):
867
  replace_red_text_in_cell(name_cell, final_name)
868
  else:
869
  name_cell.text = final_name
870
  replacements_made += 1
871
  print(f" βœ… Updated Print Name -> '{final_name}'")
872
+
873
+ # Update position cell if needed
874
+ if (not current_position or has_red_text(position_cell)) and final_position and looks_like_role(final_position):
875
  if has_red_text(position_cell):
876
+ replace_red_text_in_cell(position_cell, final_position)
877
  else:
878
+ position_cell.text = final_position
879
  replacements_made += 1
880
+ print(f" βœ… Updated Position Title -> '{final_position}'")
881
+
882
+ break # Found and processed the header row
883
 
884
+ # Mark table as processed
885
  if replacements_made > 0:
886
  try:
887
  setattr(table, "_processed_operator_declaration", True)